{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.966722129783694, "eval_steps": 250, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0033277870216306157, "grad_norm": 25.586576461791992, "learning_rate": 5e-06, "loss": 2.0511, "num_input_tokens_seen": 62508, "step": 1 }, { "epoch": 0.0033277870216306157, "loss": 2.104052782058716, "loss_ce": 0.19975604116916656, "loss_iou": 0.5546875, "loss_num": 0.158203125, "loss_xval": 1.90625, "num_input_tokens_seen": 62508, "step": 1 }, { "epoch": 0.0066555740432612314, "grad_norm": 59.460784912109375, "learning_rate": 5e-06, "loss": 2.088, "num_input_tokens_seen": 126248, "step": 2 }, { "epoch": 0.0066555740432612314, "loss": 2.4017975330352783, "loss_ce": 0.21722719073295593, "loss_iou": 0.7109375, "loss_num": 0.15234375, "loss_xval": 2.1875, "num_input_tokens_seen": 126248, "step": 2 }, { "epoch": 0.009983361064891847, "grad_norm": 35.959197998046875, "learning_rate": 5e-06, "loss": 1.8259, "num_input_tokens_seen": 188076, "step": 3 }, { "epoch": 0.009983361064891847, "loss": 2.0907115936279297, "loss_ce": 0.11024291813373566, "loss_iou": 0.490234375, "loss_num": 0.19921875, "loss_xval": 1.984375, "num_input_tokens_seen": 188076, "step": 3 }, { "epoch": 0.013311148086522463, "grad_norm": 20.976266860961914, "learning_rate": 5e-06, "loss": 1.8763, "num_input_tokens_seen": 249288, "step": 4 }, { "epoch": 0.013311148086522463, "loss": 1.6172221899032593, "loss_ce": 0.2739604413509369, "loss_iou": 0.392578125, "loss_num": 0.11181640625, "loss_xval": 1.34375, "num_input_tokens_seen": 249288, "step": 4 }, { "epoch": 0.016638935108153077, "grad_norm": 173.79701232910156, "learning_rate": 5e-06, "loss": 1.645, "num_input_tokens_seen": 311976, "step": 5 }, { "epoch": 0.016638935108153077, "loss": 1.46919846534729, "loss_ce": 0.16573163866996765, "loss_iou": 0.322265625, "loss_num": 0.1318359375, "loss_xval": 1.3046875, "num_input_tokens_seen": 311976, "step": 5 }, { "epoch": 0.019966722129783693, "grad_norm": 26.787860870361328, "learning_rate": 5e-06, "loss": 2.1085, "num_input_tokens_seen": 373328, "step": 6 }, { "epoch": 0.019966722129783693, "loss": 2.1008501052856445, "loss_ce": 0.18092799186706543, "loss_iou": 0.5859375, "loss_num": 0.1494140625, "loss_xval": 1.921875, "num_input_tokens_seen": 373328, "step": 6 }, { "epoch": 0.02329450915141431, "grad_norm": 34.086795806884766, "learning_rate": 5e-06, "loss": 2.0316, "num_input_tokens_seen": 436496, "step": 7 }, { "epoch": 0.02329450915141431, "loss": 1.848116159439087, "loss_ce": 0.24923935532569885, "loss_iou": 0.46875, "loss_num": 0.1318359375, "loss_xval": 1.6015625, "num_input_tokens_seen": 436496, "step": 7 }, { "epoch": 0.026622296173044926, "grad_norm": 41.447261810302734, "learning_rate": 5e-06, "loss": 2.1408, "num_input_tokens_seen": 501760, "step": 8 }, { "epoch": 0.026622296173044926, "loss": 2.2013022899627686, "loss_ce": 0.25013038516044617, "loss_iou": 0.609375, "loss_num": 0.1455078125, "loss_xval": 1.953125, "num_input_tokens_seen": 501760, "step": 8 }, { "epoch": 0.029950083194675542, "grad_norm": 51.00299072265625, "learning_rate": 5e-06, "loss": 2.0381, "num_input_tokens_seen": 564076, "step": 9 }, { "epoch": 0.029950083194675542, "loss": 1.9200944900512695, "loss_ce": 0.21013353765010834, "loss_iou": 0.52734375, "loss_num": 0.1318359375, "loss_xval": 1.7109375, "num_input_tokens_seen": 564076, "step": 9 }, { "epoch": 0.033277870216306155, "grad_norm": 30.66320037841797, "learning_rate": 5e-06, "loss": 1.8953, "num_input_tokens_seen": 627120, "step": 10 }, { "epoch": 0.033277870216306155, "loss": 1.6392368078231812, "loss_ce": 0.07258643209934235, "loss_iou": 0.44921875, "loss_num": 0.1337890625, "loss_xval": 1.5703125, "num_input_tokens_seen": 627120, "step": 10 }, { "epoch": 0.036605657237936774, "grad_norm": 28.829208374023438, "learning_rate": 5e-06, "loss": 2.0803, "num_input_tokens_seen": 690868, "step": 11 }, { "epoch": 0.036605657237936774, "loss": 1.963969111442566, "loss_ce": 0.2418011873960495, "loss_iou": 0.455078125, "loss_num": 0.162109375, "loss_xval": 1.71875, "num_input_tokens_seen": 690868, "step": 11 }, { "epoch": 0.03993344425956739, "grad_norm": 22.68766212463379, "learning_rate": 5e-06, "loss": 1.3998, "num_input_tokens_seen": 752940, "step": 12 }, { "epoch": 0.03993344425956739, "loss": 0.8375154733657837, "loss_ce": 0.07140220701694489, "loss_iou": 0.0, "loss_num": 0.1533203125, "loss_xval": 0.765625, "num_input_tokens_seen": 752940, "step": 12 }, { "epoch": 0.04326123128119801, "grad_norm": 18.59205436706543, "learning_rate": 5e-06, "loss": 2.0087, "num_input_tokens_seen": 813640, "step": 13 }, { "epoch": 0.04326123128119801, "loss": 1.8005166053771973, "loss_ce": 0.14670801162719727, "loss_iou": 0.5, "loss_num": 0.130859375, "loss_xval": 1.65625, "num_input_tokens_seen": 813640, "step": 13 }, { "epoch": 0.04658901830282862, "grad_norm": 42.463226318359375, "learning_rate": 5e-06, "loss": 1.4693, "num_input_tokens_seen": 875444, "step": 14 }, { "epoch": 0.04658901830282862, "loss": 1.4814634323120117, "loss_ce": 0.16749869287014008, "loss_iou": 0.453125, "loss_num": 0.0810546875, "loss_xval": 1.3125, "num_input_tokens_seen": 875444, "step": 14 }, { "epoch": 0.04991680532445923, "grad_norm": 26.741979598999023, "learning_rate": 5e-06, "loss": 1.5847, "num_input_tokens_seen": 936456, "step": 15 }, { "epoch": 0.04991680532445923, "loss": 1.5655254125595093, "loss_ce": 0.13925588130950928, "loss_iou": 0.32421875, "loss_num": 0.1552734375, "loss_xval": 1.4296875, "num_input_tokens_seen": 936456, "step": 15 }, { "epoch": 0.05324459234608985, "grad_norm": 20.150821685791016, "learning_rate": 5e-06, "loss": 1.7527, "num_input_tokens_seen": 998624, "step": 16 }, { "epoch": 0.05324459234608985, "loss": 1.4237003326416016, "loss_ce": 0.1258487105369568, "loss_iou": 0.291015625, "loss_num": 0.1435546875, "loss_xval": 1.296875, "num_input_tokens_seen": 998624, "step": 16 }, { "epoch": 0.056572379367720464, "grad_norm": 82.50653076171875, "learning_rate": 5e-06, "loss": 1.6956, "num_input_tokens_seen": 1059820, "step": 17 }, { "epoch": 0.056572379367720464, "loss": 1.7093596458435059, "loss_ce": 0.316293329000473, "loss_iou": 0.34765625, "loss_num": 0.1396484375, "loss_xval": 1.390625, "num_input_tokens_seen": 1059820, "step": 17 }, { "epoch": 0.059900166389351084, "grad_norm": 22.57611083984375, "learning_rate": 5e-06, "loss": 1.8825, "num_input_tokens_seen": 1120056, "step": 18 }, { "epoch": 0.059900166389351084, "loss": 2.1664962768554688, "loss_ce": 0.18260937929153442, "loss_iou": 0.6484375, "loss_num": 0.138671875, "loss_xval": 1.984375, "num_input_tokens_seen": 1120056, "step": 18 }, { "epoch": 0.0632279534109817, "grad_norm": 38.09444046020508, "learning_rate": 5e-06, "loss": 2.0262, "num_input_tokens_seen": 1183548, "step": 19 }, { "epoch": 0.0632279534109817, "loss": 1.8747063875198364, "loss_ce": 0.10322199016809464, "loss_iou": 0.5234375, "loss_num": 0.14453125, "loss_xval": 1.7734375, "num_input_tokens_seen": 1183548, "step": 19 }, { "epoch": 0.06655574043261231, "grad_norm": 31.034862518310547, "learning_rate": 5e-06, "loss": 1.8203, "num_input_tokens_seen": 1243032, "step": 20 }, { "epoch": 0.06655574043261231, "loss": 1.6941111087799072, "loss_ce": 0.4692575931549072, "loss_iou": 0.1328125, "loss_num": 0.19140625, "loss_xval": 1.2265625, "num_input_tokens_seen": 1243032, "step": 20 }, { "epoch": 0.06988352745424292, "grad_norm": 31.796939849853516, "learning_rate": 5e-06, "loss": 1.7008, "num_input_tokens_seen": 1304364, "step": 21 }, { "epoch": 0.06988352745424292, "loss": 1.6697863340377808, "loss_ce": 0.3897570073604584, "loss_iou": 0.322265625, "loss_num": 0.1279296875, "loss_xval": 1.28125, "num_input_tokens_seen": 1304364, "step": 21 }, { "epoch": 0.07321131447587355, "grad_norm": 32.93001174926758, "learning_rate": 5e-06, "loss": 2.0114, "num_input_tokens_seen": 1367292, "step": 22 }, { "epoch": 0.07321131447587355, "loss": 1.7533671855926514, "loss_ce": 0.0761210098862648, "loss_iou": 0.48046875, "loss_num": 0.1435546875, "loss_xval": 1.6796875, "num_input_tokens_seen": 1367292, "step": 22 }, { "epoch": 0.07653910149750416, "grad_norm": 31.716394424438477, "learning_rate": 5e-06, "loss": 2.0535, "num_input_tokens_seen": 1430208, "step": 23 }, { "epoch": 0.07653910149750416, "loss": 2.0512099266052246, "loss_ce": 0.11370983719825745, "loss_iou": 0.59765625, "loss_num": 0.1484375, "loss_xval": 1.9375, "num_input_tokens_seen": 1430208, "step": 23 }, { "epoch": 0.07986688851913477, "grad_norm": 21.040109634399414, "learning_rate": 5e-06, "loss": 1.6829, "num_input_tokens_seen": 1490916, "step": 24 }, { "epoch": 0.07986688851913477, "loss": 1.4797439575195312, "loss_ce": 0.11035922914743423, "loss_iou": 0.26953125, "loss_num": 0.166015625, "loss_xval": 1.3671875, "num_input_tokens_seen": 1490916, "step": 24 }, { "epoch": 0.08319467554076539, "grad_norm": 20.70445442199707, "learning_rate": 5e-06, "loss": 1.9616, "num_input_tokens_seen": 1551840, "step": 25 }, { "epoch": 0.08319467554076539, "loss": 1.7711155414581299, "loss_ce": 0.4356662333011627, "loss_iou": 0.380859375, "loss_num": 0.115234375, "loss_xval": 1.3359375, "num_input_tokens_seen": 1551840, "step": 25 }, { "epoch": 0.08652246256239601, "grad_norm": 29.884246826171875, "learning_rate": 5e-06, "loss": 1.8749, "num_input_tokens_seen": 1614312, "step": 26 }, { "epoch": 0.08652246256239601, "loss": 1.7932677268981934, "loss_ce": 0.17217400670051575, "loss_iou": 0.43359375, "loss_num": 0.150390625, "loss_xval": 1.625, "num_input_tokens_seen": 1614312, "step": 26 }, { "epoch": 0.08985024958402663, "grad_norm": 19.663179397583008, "learning_rate": 5e-06, "loss": 1.8969, "num_input_tokens_seen": 1676412, "step": 27 }, { "epoch": 0.08985024958402663, "loss": 1.6153647899627686, "loss_ce": 0.27820655703544617, "loss_iou": 0.302734375, "loss_num": 0.1455078125, "loss_xval": 1.3359375, "num_input_tokens_seen": 1676412, "step": 27 }, { "epoch": 0.09317803660565724, "grad_norm": 23.70810890197754, "learning_rate": 5e-06, "loss": 1.7352, "num_input_tokens_seen": 1738576, "step": 28 }, { "epoch": 0.09317803660565724, "loss": 1.543161153793335, "loss_ce": 0.12323927879333496, "loss_iou": 0.35546875, "loss_num": 0.1416015625, "loss_xval": 1.421875, "num_input_tokens_seen": 1738576, "step": 28 }, { "epoch": 0.09650582362728785, "grad_norm": 16.41214370727539, "learning_rate": 5e-06, "loss": 1.6304, "num_input_tokens_seen": 1801036, "step": 29 }, { "epoch": 0.09650582362728785, "loss": 1.806901454925537, "loss_ce": 0.42115920782089233, "loss_iou": 0.380859375, "loss_num": 0.125, "loss_xval": 1.3828125, "num_input_tokens_seen": 1801036, "step": 29 }, { "epoch": 0.09983361064891846, "grad_norm": 25.195419311523438, "learning_rate": 5e-06, "loss": 1.8938, "num_input_tokens_seen": 1863536, "step": 30 }, { "epoch": 0.09983361064891846, "loss": 1.9765114784240723, "loss_ce": 0.2587380111217499, "loss_iou": 0.546875, "loss_num": 0.12451171875, "loss_xval": 1.71875, "num_input_tokens_seen": 1863536, "step": 30 }, { "epoch": 0.10316139767054909, "grad_norm": 17.6975154876709, "learning_rate": 5e-06, "loss": 1.9441, "num_input_tokens_seen": 1927648, "step": 31 }, { "epoch": 0.10316139767054909, "loss": 1.7742146253585815, "loss_ce": 0.21269112825393677, "loss_iou": 0.50390625, "loss_num": 0.1103515625, "loss_xval": 1.5625, "num_input_tokens_seen": 1927648, "step": 31 }, { "epoch": 0.1064891846921797, "grad_norm": 43.03065490722656, "learning_rate": 5e-06, "loss": 1.4075, "num_input_tokens_seen": 1988372, "step": 32 }, { "epoch": 0.1064891846921797, "loss": 1.496689796447754, "loss_ce": 0.11094758659601212, "loss_iou": 0.30078125, "loss_num": 0.1572265625, "loss_xval": 1.3828125, "num_input_tokens_seen": 1988372, "step": 32 }, { "epoch": 0.10981697171381032, "grad_norm": 18.254587173461914, "learning_rate": 5e-06, "loss": 1.7655, "num_input_tokens_seen": 2050944, "step": 33 }, { "epoch": 0.10981697171381032, "loss": 1.7537667751312256, "loss_ce": 0.309430867433548, "loss_iou": 0.33203125, "loss_num": 0.15625, "loss_xval": 1.4453125, "num_input_tokens_seen": 2050944, "step": 33 }, { "epoch": 0.11314475873544093, "grad_norm": 27.89544105529785, "learning_rate": 5e-06, "loss": 1.9816, "num_input_tokens_seen": 2113792, "step": 34 }, { "epoch": 0.11314475873544093, "loss": 1.868133783340454, "loss_ce": 0.4003603458404541, "loss_iou": 0.4140625, "loss_num": 0.1279296875, "loss_xval": 1.46875, "num_input_tokens_seen": 2113792, "step": 34 }, { "epoch": 0.11647254575707154, "grad_norm": 21.694988250732422, "learning_rate": 5e-06, "loss": 1.6937, "num_input_tokens_seen": 2176912, "step": 35 }, { "epoch": 0.11647254575707154, "loss": 1.7499048709869385, "loss_ce": 0.24184811115264893, "loss_iou": 0.435546875, "loss_num": 0.1279296875, "loss_xval": 1.5078125, "num_input_tokens_seen": 2176912, "step": 35 }, { "epoch": 0.11980033277870217, "grad_norm": 11.779193878173828, "learning_rate": 5e-06, "loss": 1.6187, "num_input_tokens_seen": 2240424, "step": 36 }, { "epoch": 0.11980033277870217, "loss": 1.5325156450271606, "loss_ce": 0.10624612867832184, "loss_iou": 0.41796875, "loss_num": 0.11865234375, "loss_xval": 1.4296875, "num_input_tokens_seen": 2240424, "step": 36 }, { "epoch": 0.12312811980033278, "grad_norm": 18.902915954589844, "learning_rate": 5e-06, "loss": 1.6352, "num_input_tokens_seen": 2302000, "step": 37 }, { "epoch": 0.12312811980033278, "loss": 2.090439796447754, "loss_ce": 0.35899433493614197, "loss_iou": 0.52734375, "loss_num": 0.1357421875, "loss_xval": 1.734375, "num_input_tokens_seen": 2302000, "step": 37 }, { "epoch": 0.1264559068219634, "grad_norm": 23.633113861083984, "learning_rate": 5e-06, "loss": 2.0591, "num_input_tokens_seen": 2364176, "step": 38 }, { "epoch": 0.1264559068219634, "loss": 2.167440891265869, "loss_ce": 0.3012297749519348, "loss_iou": 0.6171875, "loss_num": 0.1259765625, "loss_xval": 1.8671875, "num_input_tokens_seen": 2364176, "step": 38 }, { "epoch": 0.129783693843594, "grad_norm": 73.32101440429688, "learning_rate": 5e-06, "loss": 1.5635, "num_input_tokens_seen": 2427048, "step": 39 }, { "epoch": 0.129783693843594, "loss": 1.5494978427886963, "loss_ce": 0.23699793219566345, "loss_iou": 0.35546875, "loss_num": 0.1201171875, "loss_xval": 1.3125, "num_input_tokens_seen": 2427048, "step": 39 }, { "epoch": 0.13311148086522462, "grad_norm": 35.4217643737793, "learning_rate": 5e-06, "loss": 1.9861, "num_input_tokens_seen": 2489792, "step": 40 }, { "epoch": 0.13311148086522462, "loss": 1.7619882822036743, "loss_ce": 0.11745704710483551, "loss_iou": 0.4296875, "loss_num": 0.1572265625, "loss_xval": 1.640625, "num_input_tokens_seen": 2489792, "step": 40 }, { "epoch": 0.13643926788685523, "grad_norm": 21.786352157592773, "learning_rate": 5e-06, "loss": 1.5348, "num_input_tokens_seen": 2553004, "step": 41 }, { "epoch": 0.13643926788685523, "loss": 1.1435575485229492, "loss_ce": 0.09131141006946564, "loss_iou": 0.208984375, "loss_num": 0.126953125, "loss_xval": 1.0546875, "num_input_tokens_seen": 2553004, "step": 41 }, { "epoch": 0.13976705490848584, "grad_norm": 79.95733642578125, "learning_rate": 5e-06, "loss": 1.6947, "num_input_tokens_seen": 2615620, "step": 42 }, { "epoch": 0.13976705490848584, "loss": 1.4937045574188232, "loss_ce": 0.24394866824150085, "loss_iou": 0.36328125, "loss_num": 0.10400390625, "loss_xval": 1.25, "num_input_tokens_seen": 2615620, "step": 42 }, { "epoch": 0.14309484193011648, "grad_norm": 17.062623977661133, "learning_rate": 5e-06, "loss": 1.8647, "num_input_tokens_seen": 2676768, "step": 43 }, { "epoch": 0.14309484193011648, "loss": 1.7216743230819702, "loss_ce": 0.1909126341342926, "loss_iou": 0.490234375, "loss_num": 0.11083984375, "loss_xval": 1.53125, "num_input_tokens_seen": 2676768, "step": 43 }, { "epoch": 0.1464226289517471, "grad_norm": 30.43678855895996, "learning_rate": 5e-06, "loss": 1.9432, "num_input_tokens_seen": 2741332, "step": 44 }, { "epoch": 0.1464226289517471, "loss": 1.9834593534469604, "loss_ce": 0.21783432364463806, "loss_iou": 0.58203125, "loss_num": 0.12109375, "loss_xval": 1.765625, "num_input_tokens_seen": 2741332, "step": 44 }, { "epoch": 0.1497504159733777, "grad_norm": 21.58626365661621, "learning_rate": 5e-06, "loss": 1.5248, "num_input_tokens_seen": 2804636, "step": 45 }, { "epoch": 0.1497504159733777, "loss": 1.8653510808944702, "loss_ce": 0.11437452584505081, "loss_iou": 0.59765625, "loss_num": 0.11083984375, "loss_xval": 1.75, "num_input_tokens_seen": 2804636, "step": 45 }, { "epoch": 0.15307820299500832, "grad_norm": 13.903108596801758, "learning_rate": 5e-06, "loss": 1.6177, "num_input_tokens_seen": 2867664, "step": 46 }, { "epoch": 0.15307820299500832, "loss": 1.4962878227233887, "loss_ce": 0.05829951912164688, "loss_iou": 0.439453125, "loss_num": 0.11181640625, "loss_xval": 1.4375, "num_input_tokens_seen": 2867664, "step": 46 }, { "epoch": 0.15640599001663893, "grad_norm": 16.125890731811523, "learning_rate": 5e-06, "loss": 1.6285, "num_input_tokens_seen": 2931104, "step": 47 }, { "epoch": 0.15640599001663893, "loss": 1.7633947134017944, "loss_ce": 0.07003532350063324, "loss_iou": 0.5546875, "loss_num": 0.1171875, "loss_xval": 1.6953125, "num_input_tokens_seen": 2931104, "step": 47 }, { "epoch": 0.15973377703826955, "grad_norm": 27.122386932373047, "learning_rate": 5e-06, "loss": 1.6283, "num_input_tokens_seen": 2994172, "step": 48 }, { "epoch": 0.15973377703826955, "loss": 1.601656198501587, "loss_ce": 0.1192343533039093, "loss_iou": 0.42578125, "loss_num": 0.1259765625, "loss_xval": 1.484375, "num_input_tokens_seen": 2994172, "step": 48 }, { "epoch": 0.16306156405990016, "grad_norm": 22.370195388793945, "learning_rate": 5e-06, "loss": 1.9868, "num_input_tokens_seen": 3056852, "step": 49 }, { "epoch": 0.16306156405990016, "loss": 1.8162040710449219, "loss_ce": 0.21854785084724426, "loss_iou": 0.494140625, "loss_num": 0.12158203125, "loss_xval": 1.59375, "num_input_tokens_seen": 3056852, "step": 49 }, { "epoch": 0.16638935108153077, "grad_norm": 17.878007888793945, "learning_rate": 5e-06, "loss": 1.9372, "num_input_tokens_seen": 3119844, "step": 50 }, { "epoch": 0.16638935108153077, "loss": 1.9082149267196655, "loss_ce": 0.3510860204696655, "loss_iou": 0.388671875, "loss_num": 0.15625, "loss_xval": 1.5546875, "num_input_tokens_seen": 3119844, "step": 50 }, { "epoch": 0.16971713810316139, "grad_norm": 28.544042587280273, "learning_rate": 5e-06, "loss": 1.8052, "num_input_tokens_seen": 3183156, "step": 51 }, { "epoch": 0.16971713810316139, "loss": 1.7122398614883423, "loss_ce": 0.22737649083137512, "loss_iou": 0.478515625, "loss_num": 0.10595703125, "loss_xval": 1.484375, "num_input_tokens_seen": 3183156, "step": 51 }, { "epoch": 0.17304492512479203, "grad_norm": 18.788650512695312, "learning_rate": 5e-06, "loss": 1.6417, "num_input_tokens_seen": 3245336, "step": 52 }, { "epoch": 0.17304492512479203, "loss": 1.6718628406524658, "loss_ce": 0.06834729015827179, "loss_iou": 0.44921875, "loss_num": 0.140625, "loss_xval": 1.6015625, "num_input_tokens_seen": 3245336, "step": 52 }, { "epoch": 0.17637271214642264, "grad_norm": 24.63119888305664, "learning_rate": 5e-06, "loss": 1.6207, "num_input_tokens_seen": 3307840, "step": 53 }, { "epoch": 0.17637271214642264, "loss": 1.8642797470092773, "loss_ce": 0.1006079912185669, "loss_iou": 0.6171875, "loss_num": 0.10595703125, "loss_xval": 1.765625, "num_input_tokens_seen": 3307840, "step": 53 }, { "epoch": 0.17970049916805325, "grad_norm": 30.866594314575195, "learning_rate": 5e-06, "loss": 1.8085, "num_input_tokens_seen": 3370720, "step": 54 }, { "epoch": 0.17970049916805325, "loss": 2.13612699508667, "loss_ce": 0.2894473075866699, "loss_iou": 0.5703125, "loss_num": 0.140625, "loss_xval": 1.84375, "num_input_tokens_seen": 3370720, "step": 54 }, { "epoch": 0.18302828618968386, "grad_norm": 42.315670013427734, "learning_rate": 5e-06, "loss": 1.7211, "num_input_tokens_seen": 3434336, "step": 55 }, { "epoch": 0.18302828618968386, "loss": 1.6814637184143066, "loss_ce": 0.05548717826604843, "loss_iou": 0.51953125, "loss_num": 0.1181640625, "loss_xval": 1.625, "num_input_tokens_seen": 3434336, "step": 55 }, { "epoch": 0.18635607321131448, "grad_norm": 18.631044387817383, "learning_rate": 5e-06, "loss": 1.6828, "num_input_tokens_seen": 3495772, "step": 56 }, { "epoch": 0.18635607321131448, "loss": 1.5654587745666504, "loss_ce": 0.09231419861316681, "loss_iou": 0.427734375, "loss_num": 0.12353515625, "loss_xval": 1.4765625, "num_input_tokens_seen": 3495772, "step": 56 }, { "epoch": 0.1896838602329451, "grad_norm": 15.498687744140625, "learning_rate": 5e-06, "loss": 1.8859, "num_input_tokens_seen": 3560284, "step": 57 }, { "epoch": 0.1896838602329451, "loss": 1.7654845714569092, "loss_ce": 0.035015806555747986, "loss_iou": 0.55859375, "loss_num": 0.123046875, "loss_xval": 1.734375, "num_input_tokens_seen": 3560284, "step": 57 }, { "epoch": 0.1930116472545757, "grad_norm": 28.59018325805664, "learning_rate": 5e-06, "loss": 1.8022, "num_input_tokens_seen": 3622924, "step": 58 }, { "epoch": 0.1930116472545757, "loss": 1.824717402458191, "loss_ce": 0.2290142923593521, "loss_iou": 0.484375, "loss_num": 0.125, "loss_xval": 1.59375, "num_input_tokens_seen": 3622924, "step": 58 }, { "epoch": 0.19633943427620631, "grad_norm": 22.03955841064453, "learning_rate": 5e-06, "loss": 1.6031, "num_input_tokens_seen": 3685472, "step": 59 }, { "epoch": 0.19633943427620631, "loss": 1.6390851736068726, "loss_ce": 0.011155525222420692, "loss_iou": 0.515625, "loss_num": 0.11865234375, "loss_xval": 1.625, "num_input_tokens_seen": 3685472, "step": 59 }, { "epoch": 0.19966722129783693, "grad_norm": 38.88141632080078, "learning_rate": 5e-06, "loss": 1.7982, "num_input_tokens_seen": 3749084, "step": 60 }, { "epoch": 0.19966722129783693, "loss": 1.7619941234588623, "loss_ce": 0.18997271358966827, "loss_iou": 0.4609375, "loss_num": 0.1298828125, "loss_xval": 1.5703125, "num_input_tokens_seen": 3749084, "step": 60 }, { "epoch": 0.20299500831946754, "grad_norm": 125.27637481689453, "learning_rate": 5e-06, "loss": 1.5196, "num_input_tokens_seen": 3811252, "step": 61 }, { "epoch": 0.20299500831946754, "loss": 1.583364486694336, "loss_ce": 0.13707543909549713, "loss_iou": 0.314453125, "loss_num": 0.1640625, "loss_xval": 1.4453125, "num_input_tokens_seen": 3811252, "step": 61 }, { "epoch": 0.20632279534109818, "grad_norm": 20.979581832885742, "learning_rate": 5e-06, "loss": 1.3883, "num_input_tokens_seen": 3873272, "step": 62 }, { "epoch": 0.20632279534109818, "loss": 1.4390028715133667, "loss_ce": 0.0327528677880764, "loss_iou": 0.318359375, "loss_num": 0.154296875, "loss_xval": 1.40625, "num_input_tokens_seen": 3873272, "step": 62 }, { "epoch": 0.2096505823627288, "grad_norm": 37.39727783203125, "learning_rate": 5e-06, "loss": 1.6738, "num_input_tokens_seen": 3937536, "step": 63 }, { "epoch": 0.2096505823627288, "loss": 1.474726915359497, "loss_ce": 0.020625418052077293, "loss_iou": 0.4765625, "loss_num": 0.099609375, "loss_xval": 1.453125, "num_input_tokens_seen": 3937536, "step": 63 }, { "epoch": 0.2129783693843594, "grad_norm": 16.83596420288086, "learning_rate": 5e-06, "loss": 1.7559, "num_input_tokens_seen": 4001016, "step": 64 }, { "epoch": 0.2129783693843594, "loss": 1.8051061630249023, "loss_ce": 0.17717644572257996, "loss_iou": 0.5859375, "loss_num": 0.09130859375, "loss_xval": 1.625, "num_input_tokens_seen": 4001016, "step": 64 }, { "epoch": 0.21630615640599002, "grad_norm": 13.039124488830566, "learning_rate": 5e-06, "loss": 1.5523, "num_input_tokens_seen": 4064248, "step": 65 }, { "epoch": 0.21630615640599002, "loss": 1.560785174369812, "loss_ce": 0.07152736186981201, "loss_iou": 0.359375, "loss_num": 0.154296875, "loss_xval": 1.4921875, "num_input_tokens_seen": 4064248, "step": 65 }, { "epoch": 0.21963394342762063, "grad_norm": 30.31218910217285, "learning_rate": 5e-06, "loss": 1.4474, "num_input_tokens_seen": 4125668, "step": 66 }, { "epoch": 0.21963394342762063, "loss": 1.202639102935791, "loss_ce": 0.021486753597855568, "loss_iou": 0.296875, "loss_num": 0.1171875, "loss_xval": 1.1796875, "num_input_tokens_seen": 4125668, "step": 66 }, { "epoch": 0.22296173044925124, "grad_norm": 20.06421661376953, "learning_rate": 5e-06, "loss": 1.8647, "num_input_tokens_seen": 4189148, "step": 67 }, { "epoch": 0.22296173044925124, "loss": 1.854466438293457, "loss_ce": 0.1474352777004242, "loss_iou": 0.5703125, "loss_num": 0.11279296875, "loss_xval": 1.703125, "num_input_tokens_seen": 4189148, "step": 67 }, { "epoch": 0.22628951747088186, "grad_norm": 14.856746673583984, "learning_rate": 5e-06, "loss": 1.4717, "num_input_tokens_seen": 4250480, "step": 68 }, { "epoch": 0.22628951747088186, "loss": 1.278847575187683, "loss_ce": 0.031533174216747284, "loss_iou": 0.375, "loss_num": 0.099609375, "loss_xval": 1.25, "num_input_tokens_seen": 4250480, "step": 68 }, { "epoch": 0.22961730449251247, "grad_norm": 20.701168060302734, "learning_rate": 5e-06, "loss": 1.6959, "num_input_tokens_seen": 4312848, "step": 69 }, { "epoch": 0.22961730449251247, "loss": 1.5334434509277344, "loss_ce": 0.13451766967773438, "loss_iou": 0.458984375, "loss_num": 0.09619140625, "loss_xval": 1.3984375, "num_input_tokens_seen": 4312848, "step": 69 }, { "epoch": 0.23294509151414308, "grad_norm": 18.825393676757812, "learning_rate": 5e-06, "loss": 1.5928, "num_input_tokens_seen": 4375124, "step": 70 }, { "epoch": 0.23294509151414308, "loss": 1.6959267854690552, "loss_ce": 0.06995029747486115, "loss_iou": 0.52734375, "loss_num": 0.11376953125, "loss_xval": 1.625, "num_input_tokens_seen": 4375124, "step": 70 }, { "epoch": 0.23627287853577372, "grad_norm": 25.3826904296875, "learning_rate": 5e-06, "loss": 1.7646, "num_input_tokens_seen": 4437892, "step": 71 }, { "epoch": 0.23627287853577372, "loss": 1.745131492614746, "loss_ce": 0.22755324840545654, "loss_iou": 0.498046875, "loss_num": 0.1044921875, "loss_xval": 1.515625, "num_input_tokens_seen": 4437892, "step": 71 }, { "epoch": 0.23960066555740434, "grad_norm": 19.752620697021484, "learning_rate": 5e-06, "loss": 1.9255, "num_input_tokens_seen": 4501652, "step": 72 }, { "epoch": 0.23960066555740434, "loss": 1.622131586074829, "loss_ce": 0.035217635333538055, "loss_iou": 0.486328125, "loss_num": 0.12255859375, "loss_xval": 1.5859375, "num_input_tokens_seen": 4501652, "step": 72 }, { "epoch": 0.24292845257903495, "grad_norm": 12.059938430786133, "learning_rate": 5e-06, "loss": 1.6852, "num_input_tokens_seen": 4565024, "step": 73 }, { "epoch": 0.24292845257903495, "loss": 1.8856362104415894, "loss_ce": 0.12489404529333115, "loss_iou": 0.5390625, "loss_num": 0.13671875, "loss_xval": 1.7578125, "num_input_tokens_seen": 4565024, "step": 73 }, { "epoch": 0.24625623960066556, "grad_norm": 75.91551971435547, "learning_rate": 5e-06, "loss": 1.7711, "num_input_tokens_seen": 4628860, "step": 74 }, { "epoch": 0.24625623960066556, "loss": 1.8572239875793457, "loss_ce": 0.04570060968399048, "loss_iou": 0.59375, "loss_num": 0.12451171875, "loss_xval": 1.8125, "num_input_tokens_seen": 4628860, "step": 74 }, { "epoch": 0.24958402662229617, "grad_norm": 219.3521270751953, "learning_rate": 5e-06, "loss": 1.662, "num_input_tokens_seen": 4691484, "step": 75 }, { "epoch": 0.24958402662229617, "loss": 2.0041086673736572, "loss_ce": 0.11446025222539902, "loss_iou": 0.59375, "loss_num": 0.140625, "loss_xval": 1.890625, "num_input_tokens_seen": 4691484, "step": 75 }, { "epoch": 0.2529118136439268, "grad_norm": 31.69588851928711, "learning_rate": 5e-06, "loss": 1.6476, "num_input_tokens_seen": 4753936, "step": 76 }, { "epoch": 0.2529118136439268, "loss": 1.4542449712753296, "loss_ce": 0.02528989687561989, "loss_iou": 0.361328125, "loss_num": 0.140625, "loss_xval": 1.4296875, "num_input_tokens_seen": 4753936, "step": 76 }, { "epoch": 0.2562396006655574, "grad_norm": 23.477642059326172, "learning_rate": 5e-06, "loss": 1.8872, "num_input_tokens_seen": 4818588, "step": 77 }, { "epoch": 0.2562396006655574, "loss": 1.7592456340789795, "loss_ce": 0.030729947611689568, "loss_iou": 0.56640625, "loss_num": 0.11962890625, "loss_xval": 1.7265625, "num_input_tokens_seen": 4818588, "step": 77 }, { "epoch": 0.259567387687188, "grad_norm": 20.34519386291504, "learning_rate": 5e-06, "loss": 1.5801, "num_input_tokens_seen": 4881588, "step": 78 }, { "epoch": 0.259567387687188, "loss": 1.5691020488739014, "loss_ce": 0.06324272602796555, "loss_iou": 0.435546875, "loss_num": 0.126953125, "loss_xval": 1.5078125, "num_input_tokens_seen": 4881588, "step": 78 }, { "epoch": 0.2628951747088186, "grad_norm": 18.597923278808594, "learning_rate": 5e-06, "loss": 1.523, "num_input_tokens_seen": 4945572, "step": 79 }, { "epoch": 0.2628951747088186, "loss": 1.7528076171875, "loss_ce": 0.0848388820886612, "loss_iou": 0.52734375, "loss_num": 0.123046875, "loss_xval": 1.671875, "num_input_tokens_seen": 4945572, "step": 79 }, { "epoch": 0.26622296173044924, "grad_norm": 11.035025596618652, "learning_rate": 5e-06, "loss": 1.3792, "num_input_tokens_seen": 5007360, "step": 80 }, { "epoch": 0.26622296173044924, "loss": 1.5286271572113037, "loss_ce": 0.020814765244722366, "loss_iou": 0.453125, "loss_num": 0.1201171875, "loss_xval": 1.5078125, "num_input_tokens_seen": 5007360, "step": 80 }, { "epoch": 0.26955074875207985, "grad_norm": 12.091245651245117, "learning_rate": 5e-06, "loss": 1.6577, "num_input_tokens_seen": 5069436, "step": 81 }, { "epoch": 0.26955074875207985, "loss": 1.6581270694732666, "loss_ce": 0.07572957128286362, "loss_iou": 0.375, "loss_num": 0.1669921875, "loss_xval": 1.5859375, "num_input_tokens_seen": 5069436, "step": 81 }, { "epoch": 0.27287853577371046, "grad_norm": 22.181730270385742, "learning_rate": 5e-06, "loss": 1.6494, "num_input_tokens_seen": 5133184, "step": 82 }, { "epoch": 0.27287853577371046, "loss": 1.5007237195968628, "loss_ce": 0.08226672559976578, "loss_iou": 0.439453125, "loss_num": 0.10791015625, "loss_xval": 1.421875, "num_input_tokens_seen": 5133184, "step": 82 }, { "epoch": 0.2762063227953411, "grad_norm": 24.726543426513672, "learning_rate": 5e-06, "loss": 1.4726, "num_input_tokens_seen": 5195708, "step": 83 }, { "epoch": 0.2762063227953411, "loss": 1.643707513809204, "loss_ce": 0.03140285611152649, "loss_iou": 0.53515625, "loss_num": 0.10888671875, "loss_xval": 1.609375, "num_input_tokens_seen": 5195708, "step": 83 }, { "epoch": 0.2795341098169717, "grad_norm": 16.063886642456055, "learning_rate": 5e-06, "loss": 1.6358, "num_input_tokens_seen": 5258576, "step": 84 }, { "epoch": 0.2795341098169717, "loss": 1.6346626281738281, "loss_ce": 0.022113768383860588, "loss_iou": 0.5234375, "loss_num": 0.11279296875, "loss_xval": 1.609375, "num_input_tokens_seen": 5258576, "step": 84 }, { "epoch": 0.28286189683860236, "grad_norm": 18.069644927978516, "learning_rate": 5e-06, "loss": 1.6014, "num_input_tokens_seen": 5321584, "step": 85 }, { "epoch": 0.28286189683860236, "loss": 1.5491950511932373, "loss_ce": 0.01892169378697872, "loss_iou": 0.53125, "loss_num": 0.09375, "loss_xval": 1.53125, "num_input_tokens_seen": 5321584, "step": 85 }, { "epoch": 0.28618968386023297, "grad_norm": 21.28662109375, "learning_rate": 5e-06, "loss": 1.519, "num_input_tokens_seen": 5383692, "step": 86 }, { "epoch": 0.28618968386023297, "loss": 1.6978256702423096, "loss_ce": 0.08698593080043793, "loss_iou": 0.482421875, "loss_num": 0.12890625, "loss_xval": 1.609375, "num_input_tokens_seen": 5383692, "step": 86 }, { "epoch": 0.2895174708818636, "grad_norm": 29.965181350708008, "learning_rate": 5e-06, "loss": 1.5944, "num_input_tokens_seen": 5446696, "step": 87 }, { "epoch": 0.2895174708818636, "loss": 1.47395658493042, "loss_ce": 0.02669098973274231, "loss_iou": 0.390625, "loss_num": 0.1337890625, "loss_xval": 1.4453125, "num_input_tokens_seen": 5446696, "step": 87 }, { "epoch": 0.2928452579034942, "grad_norm": 17.947330474853516, "learning_rate": 5e-06, "loss": 1.6335, "num_input_tokens_seen": 5509100, "step": 88 }, { "epoch": 0.2928452579034942, "loss": 1.8120665550231934, "loss_ce": 0.06548449397087097, "loss_iou": 0.546875, "loss_num": 0.130859375, "loss_xval": 1.75, "num_input_tokens_seen": 5509100, "step": 88 }, { "epoch": 0.2961730449251248, "grad_norm": 29.202789306640625, "learning_rate": 5e-06, "loss": 1.5637, "num_input_tokens_seen": 5572948, "step": 89 }, { "epoch": 0.2961730449251248, "loss": 1.7134369611740112, "loss_ce": 0.05865185335278511, "loss_iou": 0.51171875, "loss_num": 0.125, "loss_xval": 1.65625, "num_input_tokens_seen": 5572948, "step": 89 }, { "epoch": 0.2995008319467554, "grad_norm": 24.063215255737305, "learning_rate": 5e-06, "loss": 1.7171, "num_input_tokens_seen": 5634984, "step": 90 }, { "epoch": 0.2995008319467554, "loss": 1.7892823219299316, "loss_ce": 0.03586425632238388, "loss_iou": 0.52734375, "loss_num": 0.1396484375, "loss_xval": 1.75, "num_input_tokens_seen": 5634984, "step": 90 }, { "epoch": 0.30282861896838603, "grad_norm": 10.826105117797852, "learning_rate": 5e-06, "loss": 1.3908, "num_input_tokens_seen": 5697024, "step": 91 }, { "epoch": 0.30282861896838603, "loss": 1.6418991088867188, "loss_ce": 0.012504665181040764, "loss_iou": 0.51171875, "loss_num": 0.12109375, "loss_xval": 1.6328125, "num_input_tokens_seen": 5697024, "step": 91 }, { "epoch": 0.30615640599001664, "grad_norm": 15.976494789123535, "learning_rate": 5e-06, "loss": 1.4575, "num_input_tokens_seen": 5760236, "step": 92 }, { "epoch": 0.30615640599001664, "loss": 1.4351558685302734, "loss_ce": 0.03427693247795105, "loss_iou": 0.419921875, "loss_num": 0.11181640625, "loss_xval": 1.3984375, "num_input_tokens_seen": 5760236, "step": 92 }, { "epoch": 0.30948419301164726, "grad_norm": 14.37692928314209, "learning_rate": 5e-06, "loss": 1.5497, "num_input_tokens_seen": 5824056, "step": 93 }, { "epoch": 0.30948419301164726, "loss": 1.5191597938537598, "loss_ce": 0.005732087418437004, "loss_iou": 0.4609375, "loss_num": 0.11865234375, "loss_xval": 1.515625, "num_input_tokens_seen": 5824056, "step": 93 }, { "epoch": 0.31281198003327787, "grad_norm": 14.017496109008789, "learning_rate": 5e-06, "loss": 1.4437, "num_input_tokens_seen": 5887136, "step": 94 }, { "epoch": 0.31281198003327787, "loss": 1.6226869821548462, "loss_ce": 0.006476097274571657, "loss_iou": 0.54296875, "loss_num": 0.1064453125, "loss_xval": 1.6171875, "num_input_tokens_seen": 5887136, "step": 94 }, { "epoch": 0.3161397670549085, "grad_norm": 32.76481246948242, "learning_rate": 5e-06, "loss": 1.5682, "num_input_tokens_seen": 5949648, "step": 95 }, { "epoch": 0.3161397670549085, "loss": 1.4490597248077393, "loss_ce": 0.04866912215948105, "loss_iou": 0.373046875, "loss_num": 0.130859375, "loss_xval": 1.3984375, "num_input_tokens_seen": 5949648, "step": 95 }, { "epoch": 0.3194675540765391, "grad_norm": 11.287385940551758, "learning_rate": 5e-06, "loss": 1.348, "num_input_tokens_seen": 6011196, "step": 96 }, { "epoch": 0.3194675540765391, "loss": 1.4357106685638428, "loss_ce": 0.03239036723971367, "loss_iou": 0.36328125, "loss_num": 0.1357421875, "loss_xval": 1.40625, "num_input_tokens_seen": 6011196, "step": 96 }, { "epoch": 0.3227953410981697, "grad_norm": 18.13044548034668, "learning_rate": 5e-06, "loss": 1.3112, "num_input_tokens_seen": 6073652, "step": 97 }, { "epoch": 0.3227953410981697, "loss": 1.3087111711502075, "loss_ce": 0.03966822475194931, "loss_iou": 0.3203125, "loss_num": 0.125, "loss_xval": 1.265625, "num_input_tokens_seen": 6073652, "step": 97 }, { "epoch": 0.3261231281198003, "grad_norm": 27.89438247680664, "learning_rate": 5e-06, "loss": 1.8075, "num_input_tokens_seen": 6138100, "step": 98 }, { "epoch": 0.3261231281198003, "loss": 1.6191132068634033, "loss_ce": 0.025851568207144737, "loss_iou": 0.578125, "loss_num": 0.0869140625, "loss_xval": 1.59375, "num_input_tokens_seen": 6138100, "step": 98 }, { "epoch": 0.32945091514143093, "grad_norm": 20.874568939208984, "learning_rate": 5e-06, "loss": 1.1631, "num_input_tokens_seen": 6199520, "step": 99 }, { "epoch": 0.32945091514143093, "loss": 1.3589457273483276, "loss_ce": 0.03375040739774704, "loss_iou": 0.400390625, "loss_num": 0.10498046875, "loss_xval": 1.328125, "num_input_tokens_seen": 6199520, "step": 99 }, { "epoch": 0.33277870216306155, "grad_norm": 17.16883087158203, "learning_rate": 5e-06, "loss": 1.6227, "num_input_tokens_seen": 6261088, "step": 100 }, { "epoch": 0.33277870216306155, "loss": 1.3614544868469238, "loss_ce": 0.037723928689956665, "loss_iou": 0.3671875, "loss_num": 0.1181640625, "loss_xval": 1.3203125, "num_input_tokens_seen": 6261088, "step": 100 }, { "epoch": 0.33610648918469216, "grad_norm": 22.42743492126465, "learning_rate": 5e-06, "loss": 1.4833, "num_input_tokens_seen": 6325120, "step": 101 }, { "epoch": 0.33610648918469216, "loss": 1.3737859725952148, "loss_ce": 0.019782012328505516, "loss_iou": 0.498046875, "loss_num": 0.0712890625, "loss_xval": 1.3515625, "num_input_tokens_seen": 6325120, "step": 101 }, { "epoch": 0.33943427620632277, "grad_norm": 17.12123680114746, "learning_rate": 5e-06, "loss": 1.4998, "num_input_tokens_seen": 6387768, "step": 102 }, { "epoch": 0.33943427620632277, "loss": 1.389526605606079, "loss_ce": 0.03259296715259552, "loss_iou": 0.388671875, "loss_num": 0.11669921875, "loss_xval": 1.359375, "num_input_tokens_seen": 6387768, "step": 102 }, { "epoch": 0.3427620632279534, "grad_norm": 32.11830139160156, "learning_rate": 5e-06, "loss": 1.6505, "num_input_tokens_seen": 6451012, "step": 103 }, { "epoch": 0.3427620632279534, "loss": 1.517958164215088, "loss_ce": 0.03309479355812073, "loss_iou": 0.486328125, "loss_num": 0.10205078125, "loss_xval": 1.484375, "num_input_tokens_seen": 6451012, "step": 103 }, { "epoch": 0.34608985024958405, "grad_norm": 57.26496124267578, "learning_rate": 5e-06, "loss": 1.3503, "num_input_tokens_seen": 6513496, "step": 104 }, { "epoch": 0.34608985024958405, "loss": 1.2844140529632568, "loss_ce": 0.07713857293128967, "loss_iou": 0.3046875, "loss_num": 0.11962890625, "loss_xval": 1.2109375, "num_input_tokens_seen": 6513496, "step": 104 }, { "epoch": 0.34941763727121466, "grad_norm": 25.62670135498047, "learning_rate": 5e-06, "loss": 1.3516, "num_input_tokens_seen": 6574316, "step": 105 }, { "epoch": 0.34941763727121466, "loss": 1.4192204475402832, "loss_ce": 0.029571905732154846, "loss_iou": 0.44140625, "loss_num": 0.10107421875, "loss_xval": 1.390625, "num_input_tokens_seen": 6574316, "step": 105 }, { "epoch": 0.3527454242928453, "grad_norm": 12.265321731567383, "learning_rate": 5e-06, "loss": 1.5544, "num_input_tokens_seen": 6637000, "step": 106 }, { "epoch": 0.3527454242928453, "loss": 1.544149398803711, "loss_ce": 0.033163003623485565, "loss_iou": 0.4921875, "loss_num": 0.10498046875, "loss_xval": 1.5078125, "num_input_tokens_seen": 6637000, "step": 106 }, { "epoch": 0.3560732113144759, "grad_norm": 41.6523551940918, "learning_rate": 5e-06, "loss": 1.6072, "num_input_tokens_seen": 6699196, "step": 107 }, { "epoch": 0.3560732113144759, "loss": 1.506690502166748, "loss_ce": 0.029151447117328644, "loss_iou": 0.47265625, "loss_num": 0.1064453125, "loss_xval": 1.4765625, "num_input_tokens_seen": 6699196, "step": 107 }, { "epoch": 0.3594009983361065, "grad_norm": 28.850421905517578, "learning_rate": 5e-06, "loss": 1.6697, "num_input_tokens_seen": 6762944, "step": 108 }, { "epoch": 0.3594009983361065, "loss": 1.4808459281921387, "loss_ce": 0.04041615128517151, "loss_iou": 0.47265625, "loss_num": 0.09912109375, "loss_xval": 1.4375, "num_input_tokens_seen": 6762944, "step": 108 }, { "epoch": 0.3627287853577371, "grad_norm": 22.583267211914062, "learning_rate": 5e-06, "loss": 1.2374, "num_input_tokens_seen": 6825144, "step": 109 }, { "epoch": 0.3627287853577371, "loss": 1.1380754709243774, "loss_ce": 0.02332938462495804, "loss_iou": 0.28515625, "loss_num": 0.109375, "loss_xval": 1.1171875, "num_input_tokens_seen": 6825144, "step": 109 }, { "epoch": 0.36605657237936773, "grad_norm": 11.474699020385742, "learning_rate": 5e-06, "loss": 1.7402, "num_input_tokens_seen": 6888208, "step": 110 }, { "epoch": 0.36605657237936773, "loss": 1.7693653106689453, "loss_ce": 0.01692386157810688, "loss_iou": 0.53515625, "loss_num": 0.13671875, "loss_xval": 1.75, "num_input_tokens_seen": 6888208, "step": 110 }, { "epoch": 0.36938435940099834, "grad_norm": 22.013742446899414, "learning_rate": 5e-06, "loss": 1.6804, "num_input_tokens_seen": 6951724, "step": 111 }, { "epoch": 0.36938435940099834, "loss": 1.523423671722412, "loss_ce": 0.01829671300947666, "loss_iou": 0.44140625, "loss_num": 0.125, "loss_xval": 1.5078125, "num_input_tokens_seen": 6951724, "step": 111 }, { "epoch": 0.37271214642262895, "grad_norm": 14.441619873046875, "learning_rate": 5e-06, "loss": 1.3199, "num_input_tokens_seen": 7013888, "step": 112 }, { "epoch": 0.37271214642262895, "loss": 1.32588529586792, "loss_ce": 0.02510397508740425, "loss_iou": 0.33984375, "loss_num": 0.1240234375, "loss_xval": 1.296875, "num_input_tokens_seen": 7013888, "step": 112 }, { "epoch": 0.37603993344425957, "grad_norm": 26.6738338470459, "learning_rate": 5e-06, "loss": 1.5318, "num_input_tokens_seen": 7077040, "step": 113 }, { "epoch": 0.37603993344425957, "loss": 1.5805257558822632, "loss_ce": 0.027791425585746765, "loss_iou": 0.474609375, "loss_num": 0.12109375, "loss_xval": 1.5546875, "num_input_tokens_seen": 7077040, "step": 113 }, { "epoch": 0.3793677204658902, "grad_norm": 21.01567840576172, "learning_rate": 5e-06, "loss": 1.2308, "num_input_tokens_seen": 7140124, "step": 114 }, { "epoch": 0.3793677204658902, "loss": 1.2644329071044922, "loss_ce": 0.04128829389810562, "loss_iou": 0.3515625, "loss_num": 0.10400390625, "loss_xval": 1.2265625, "num_input_tokens_seen": 7140124, "step": 114 }, { "epoch": 0.3826955074875208, "grad_norm": 8.94921875, "learning_rate": 5e-06, "loss": 1.4211, "num_input_tokens_seen": 7202388, "step": 115 }, { "epoch": 0.3826955074875208, "loss": 1.7065632343292236, "loss_ce": 0.016377653926610947, "loss_iou": 0.50390625, "loss_num": 0.13671875, "loss_xval": 1.6875, "num_input_tokens_seen": 7202388, "step": 115 }, { "epoch": 0.3860232945091514, "grad_norm": 18.632831573486328, "learning_rate": 5e-06, "loss": 1.3789, "num_input_tokens_seen": 7264836, "step": 116 }, { "epoch": 0.3860232945091514, "loss": 1.6013567447662354, "loss_ce": 0.017860619351267815, "loss_iou": 0.5, "loss_num": 0.1162109375, "loss_xval": 1.5859375, "num_input_tokens_seen": 7264836, "step": 116 }, { "epoch": 0.389351081530782, "grad_norm": 17.55350685119629, "learning_rate": 5e-06, "loss": 1.4843, "num_input_tokens_seen": 7327432, "step": 117 }, { "epoch": 0.389351081530782, "loss": 1.4033966064453125, "loss_ce": 0.05183415114879608, "loss_iou": 0.35546875, "loss_num": 0.1279296875, "loss_xval": 1.3515625, "num_input_tokens_seen": 7327432, "step": 117 }, { "epoch": 0.39267886855241263, "grad_norm": 32.239723205566406, "learning_rate": 5e-06, "loss": 1.4762, "num_input_tokens_seen": 7390696, "step": 118 }, { "epoch": 0.39267886855241263, "loss": 1.5093517303466797, "loss_ce": 0.04011353850364685, "loss_iou": 0.474609375, "loss_num": 0.10400390625, "loss_xval": 1.46875, "num_input_tokens_seen": 7390696, "step": 118 }, { "epoch": 0.39600665557404324, "grad_norm": 24.94428062438965, "learning_rate": 5e-06, "loss": 1.5043, "num_input_tokens_seen": 7454232, "step": 119 }, { "epoch": 0.39600665557404324, "loss": 1.3459110260009766, "loss_ce": 0.05269814282655716, "loss_iou": 0.359375, "loss_num": 0.1142578125, "loss_xval": 1.296875, "num_input_tokens_seen": 7454232, "step": 119 }, { "epoch": 0.39933444259567386, "grad_norm": 35.95510482788086, "learning_rate": 5e-06, "loss": 1.9394, "num_input_tokens_seen": 7516160, "step": 120 }, { "epoch": 0.39933444259567386, "loss": 2.001081943511963, "loss_ce": 0.02403094619512558, "loss_iou": 0.578125, "loss_num": 0.1640625, "loss_xval": 1.9765625, "num_input_tokens_seen": 7516160, "step": 120 }, { "epoch": 0.40266222961730447, "grad_norm": 10.742379188537598, "learning_rate": 5e-06, "loss": 1.2452, "num_input_tokens_seen": 7578020, "step": 121 }, { "epoch": 0.40266222961730447, "loss": 1.4017831087112427, "loss_ce": 0.01897064968943596, "loss_iou": 0.48046875, "loss_num": 0.08447265625, "loss_xval": 1.3828125, "num_input_tokens_seen": 7578020, "step": 121 }, { "epoch": 0.4059900166389351, "grad_norm": 15.053220748901367, "learning_rate": 5e-06, "loss": 1.2541, "num_input_tokens_seen": 7641404, "step": 122 }, { "epoch": 0.4059900166389351, "loss": 1.3544683456420898, "loss_ce": 0.025366686284542084, "loss_iou": 0.47265625, "loss_num": 0.07666015625, "loss_xval": 1.328125, "num_input_tokens_seen": 7641404, "step": 122 }, { "epoch": 0.40931780366056575, "grad_norm": 25.307113647460938, "learning_rate": 5e-06, "loss": 1.3746, "num_input_tokens_seen": 7704388, "step": 123 }, { "epoch": 0.40931780366056575, "loss": 1.20981764793396, "loss_ce": 0.008645843714475632, "loss_iou": 0.390625, "loss_num": 0.08349609375, "loss_xval": 1.203125, "num_input_tokens_seen": 7704388, "step": 123 }, { "epoch": 0.41264559068219636, "grad_norm": 19.531993865966797, "learning_rate": 5e-06, "loss": 1.4827, "num_input_tokens_seen": 7766968, "step": 124 }, { "epoch": 0.41264559068219636, "loss": 1.3800911903381348, "loss_ce": 0.029016952961683273, "loss_iou": 0.408203125, "loss_num": 0.10693359375, "loss_xval": 1.3515625, "num_input_tokens_seen": 7766968, "step": 124 }, { "epoch": 0.415973377703827, "grad_norm": 41.06065368652344, "learning_rate": 5e-06, "loss": 1.4337, "num_input_tokens_seen": 7827856, "step": 125 }, { "epoch": 0.415973377703827, "loss": 1.6557650566101074, "loss_ce": 0.009280672296881676, "loss_iou": 0.5703125, "loss_num": 0.10205078125, "loss_xval": 1.6484375, "num_input_tokens_seen": 7827856, "step": 125 }, { "epoch": 0.4193011647254576, "grad_norm": 23.957124710083008, "learning_rate": 5e-06, "loss": 1.226, "num_input_tokens_seen": 7889780, "step": 126 }, { "epoch": 0.4193011647254576, "loss": 1.1219735145568848, "loss_ce": 0.012842569500207901, "loss_iou": 0.318359375, "loss_num": 0.0947265625, "loss_xval": 1.109375, "num_input_tokens_seen": 7889780, "step": 126 }, { "epoch": 0.4226289517470882, "grad_norm": 14.951676368713379, "learning_rate": 5e-06, "loss": 1.6745, "num_input_tokens_seen": 7954280, "step": 127 }, { "epoch": 0.4226289517470882, "loss": 1.5898183584213257, "loss_ce": 0.021947259083390236, "loss_iou": 0.47265625, "loss_num": 0.12451171875, "loss_xval": 1.5703125, "num_input_tokens_seen": 7954280, "step": 127 }, { "epoch": 0.4259567387687188, "grad_norm": 27.87751579284668, "learning_rate": 5e-06, "loss": 1.5928, "num_input_tokens_seen": 8017812, "step": 128 }, { "epoch": 0.4259567387687188, "loss": 1.4125254154205322, "loss_ce": 0.03435150533914566, "loss_iou": 0.40625, "loss_num": 0.11328125, "loss_xval": 1.375, "num_input_tokens_seen": 8017812, "step": 128 }, { "epoch": 0.4292845257903494, "grad_norm": 20.96454620361328, "learning_rate": 5e-06, "loss": 1.4556, "num_input_tokens_seen": 8081064, "step": 129 }, { "epoch": 0.4292845257903494, "loss": 1.6540634632110596, "loss_ce": 0.02418062835931778, "loss_iou": 0.50390625, "loss_num": 0.125, "loss_xval": 1.6328125, "num_input_tokens_seen": 8081064, "step": 129 }, { "epoch": 0.43261231281198004, "grad_norm": 18.692684173583984, "learning_rate": 5e-06, "loss": 1.4309, "num_input_tokens_seen": 8144064, "step": 130 }, { "epoch": 0.43261231281198004, "loss": 1.4267277717590332, "loss_ce": 0.011200500652194023, "loss_iou": 0.419921875, "loss_num": 0.11572265625, "loss_xval": 1.4140625, "num_input_tokens_seen": 8144064, "step": 130 }, { "epoch": 0.43594009983361065, "grad_norm": 79.07939910888672, "learning_rate": 5e-06, "loss": 1.3293, "num_input_tokens_seen": 8207368, "step": 131 }, { "epoch": 0.43594009983361065, "loss": 1.390924096107483, "loss_ce": 0.01690058596432209, "loss_iou": 0.44921875, "loss_num": 0.0947265625, "loss_xval": 1.375, "num_input_tokens_seen": 8207368, "step": 131 }, { "epoch": 0.43926788685524126, "grad_norm": 19.479278564453125, "learning_rate": 5e-06, "loss": 1.2977, "num_input_tokens_seen": 8268408, "step": 132 }, { "epoch": 0.43926788685524126, "loss": 1.1297025680541992, "loss_ce": 0.01678752899169922, "loss_iou": 0.2353515625, "loss_num": 0.12890625, "loss_xval": 1.109375, "num_input_tokens_seen": 8268408, "step": 132 }, { "epoch": 0.4425956738768719, "grad_norm": 29.10662269592285, "learning_rate": 5e-06, "loss": 1.527, "num_input_tokens_seen": 8331360, "step": 133 }, { "epoch": 0.4425956738768719, "loss": 1.4170689582824707, "loss_ce": 0.0020298801828175783, "loss_iou": 0.375, "loss_num": 0.1328125, "loss_xval": 1.4140625, "num_input_tokens_seen": 8331360, "step": 133 }, { "epoch": 0.4459234608985025, "grad_norm": 24.087181091308594, "learning_rate": 5e-06, "loss": 1.5908, "num_input_tokens_seen": 8393216, "step": 134 }, { "epoch": 0.4459234608985025, "loss": 1.628963828086853, "loss_ce": 0.010311482474207878, "loss_iou": 0.451171875, "loss_num": 0.14453125, "loss_xval": 1.6171875, "num_input_tokens_seen": 8393216, "step": 134 }, { "epoch": 0.4492512479201331, "grad_norm": 22.316802978515625, "learning_rate": 5e-06, "loss": 1.7012, "num_input_tokens_seen": 8455612, "step": 135 }, { "epoch": 0.4492512479201331, "loss": 1.6430039405822754, "loss_ce": 0.0004256896791048348, "loss_iou": 0.49609375, "loss_num": 0.1298828125, "loss_xval": 1.640625, "num_input_tokens_seen": 8455612, "step": 135 }, { "epoch": 0.4525790349417637, "grad_norm": 15.70297622680664, "learning_rate": 5e-06, "loss": 1.443, "num_input_tokens_seen": 8519128, "step": 136 }, { "epoch": 0.4525790349417637, "loss": 1.7527647018432617, "loss_ce": 0.023272547870874405, "loss_iou": 0.5546875, "loss_num": 0.12451171875, "loss_xval": 1.7265625, "num_input_tokens_seen": 8519128, "step": 136 }, { "epoch": 0.4559068219633943, "grad_norm": 12.477302551269531, "learning_rate": 5e-06, "loss": 1.5193, "num_input_tokens_seen": 8581072, "step": 137 }, { "epoch": 0.4559068219633943, "loss": 1.439691424369812, "loss_ce": 0.010492183268070221, "loss_iou": 0.466796875, "loss_num": 0.099609375, "loss_xval": 1.4296875, "num_input_tokens_seen": 8581072, "step": 137 }, { "epoch": 0.45923460898502494, "grad_norm": 10.863971710205078, "learning_rate": 5e-06, "loss": 1.3895, "num_input_tokens_seen": 8642220, "step": 138 }, { "epoch": 0.45923460898502494, "loss": 1.0898735523223877, "loss_ce": 0.022063426673412323, "loss_iou": 0.2734375, "loss_num": 0.1044921875, "loss_xval": 1.0703125, "num_input_tokens_seen": 8642220, "step": 138 }, { "epoch": 0.46256239600665555, "grad_norm": 20.75811767578125, "learning_rate": 5e-06, "loss": 1.3959, "num_input_tokens_seen": 8704240, "step": 139 }, { "epoch": 0.46256239600665555, "loss": 1.3267550468444824, "loss_ce": 0.029880166053771973, "loss_iou": 0.373046875, "loss_num": 0.10986328125, "loss_xval": 1.296875, "num_input_tokens_seen": 8704240, "step": 139 }, { "epoch": 0.46589018302828616, "grad_norm": 62.800384521484375, "learning_rate": 5e-06, "loss": 1.4946, "num_input_tokens_seen": 8766820, "step": 140 }, { "epoch": 0.46589018302828616, "loss": 1.761798620223999, "loss_ce": 0.012775173410773277, "loss_iou": 0.5234375, "loss_num": 0.1396484375, "loss_xval": 1.75, "num_input_tokens_seen": 8766820, "step": 140 }, { "epoch": 0.46921797004991683, "grad_norm": 9.668963432312012, "learning_rate": 5e-06, "loss": 1.3491, "num_input_tokens_seen": 8825728, "step": 141 }, { "epoch": 0.46921797004991683, "loss": 1.0192482471466064, "loss_ce": 0.03633805364370346, "loss_iou": 0.1298828125, "loss_num": 0.14453125, "loss_xval": 0.984375, "num_input_tokens_seen": 8825728, "step": 141 }, { "epoch": 0.47254575707154745, "grad_norm": 17.259815216064453, "learning_rate": 5e-06, "loss": 1.3784, "num_input_tokens_seen": 8888884, "step": 142 }, { "epoch": 0.47254575707154745, "loss": 1.3853219747543335, "loss_ce": 0.007392271421849728, "loss_iou": 0.412109375, "loss_num": 0.1103515625, "loss_xval": 1.375, "num_input_tokens_seen": 8888884, "step": 142 }, { "epoch": 0.47587354409317806, "grad_norm": 24.6596736907959, "learning_rate": 5e-06, "loss": 1.623, "num_input_tokens_seen": 8952204, "step": 143 }, { "epoch": 0.47587354409317806, "loss": 1.5127699375152588, "loss_ce": 0.011793285608291626, "loss_iou": 0.4765625, "loss_num": 0.10986328125, "loss_xval": 1.5, "num_input_tokens_seen": 8952204, "step": 143 }, { "epoch": 0.47920133111480867, "grad_norm": 60.817298889160156, "learning_rate": 5e-06, "loss": 1.507, "num_input_tokens_seen": 9015980, "step": 144 }, { "epoch": 0.47920133111480867, "loss": 1.701507329940796, "loss_ce": 0.03549168258905411, "loss_iou": 0.5390625, "loss_num": 0.1181640625, "loss_xval": 1.6640625, "num_input_tokens_seen": 9015980, "step": 144 }, { "epoch": 0.4825291181364393, "grad_norm": 10.030848503112793, "learning_rate": 5e-06, "loss": 1.4497, "num_input_tokens_seen": 9076244, "step": 145 }, { "epoch": 0.4825291181364393, "loss": 1.3854782581329346, "loss_ce": 0.05686497688293457, "loss_iou": 0.345703125, "loss_num": 0.1279296875, "loss_xval": 1.328125, "num_input_tokens_seen": 9076244, "step": 145 }, { "epoch": 0.4858569051580699, "grad_norm": 17.436437606811523, "learning_rate": 5e-06, "loss": 1.6119, "num_input_tokens_seen": 9139832, "step": 146 }, { "epoch": 0.4858569051580699, "loss": 1.5991151332855225, "loss_ce": 0.008294829167425632, "loss_iou": 0.484375, "loss_num": 0.12451171875, "loss_xval": 1.59375, "num_input_tokens_seen": 9139832, "step": 146 }, { "epoch": 0.4891846921797005, "grad_norm": 29.329193115234375, "learning_rate": 5e-06, "loss": 1.3203, "num_input_tokens_seen": 9202668, "step": 147 }, { "epoch": 0.4891846921797005, "loss": 1.415173053741455, "loss_ce": 0.030651573091745377, "loss_iou": 0.37890625, "loss_num": 0.1259765625, "loss_xval": 1.3828125, "num_input_tokens_seen": 9202668, "step": 147 }, { "epoch": 0.4925124792013311, "grad_norm": 23.036569595336914, "learning_rate": 5e-06, "loss": 1.4391, "num_input_tokens_seen": 9265360, "step": 148 }, { "epoch": 0.4925124792013311, "loss": 1.467625379562378, "loss_ce": 0.022068778052926064, "loss_iou": 0.484375, "loss_num": 0.095703125, "loss_xval": 1.4453125, "num_input_tokens_seen": 9265360, "step": 148 }, { "epoch": 0.49584026622296173, "grad_norm": 10.736440658569336, "learning_rate": 5e-06, "loss": 1.4826, "num_input_tokens_seen": 9328552, "step": 149 }, { "epoch": 0.49584026622296173, "loss": 1.422354817390442, "loss_ce": 0.0029212520457804203, "loss_iou": 0.443359375, "loss_num": 0.1064453125, "loss_xval": 1.421875, "num_input_tokens_seen": 9328552, "step": 149 }, { "epoch": 0.49916805324459235, "grad_norm": 16.756797790527344, "learning_rate": 5e-06, "loss": 1.5275, "num_input_tokens_seen": 9392064, "step": 150 }, { "epoch": 0.49916805324459235, "loss": 1.650395154953003, "loss_ce": 0.021488958969712257, "loss_iou": 0.52734375, "loss_num": 0.11474609375, "loss_xval": 1.625, "num_input_tokens_seen": 9392064, "step": 150 }, { "epoch": 0.502495840266223, "grad_norm": 96.55653381347656, "learning_rate": 5e-06, "loss": 1.1381, "num_input_tokens_seen": 9454616, "step": 151 }, { "epoch": 0.502495840266223, "loss": 1.029345154762268, "loss_ce": 0.006395971402525902, "loss_iou": 0.26171875, "loss_num": 0.099609375, "loss_xval": 1.0234375, "num_input_tokens_seen": 9454616, "step": 151 }, { "epoch": 0.5058236272878536, "grad_norm": 21.376344680786133, "learning_rate": 5e-06, "loss": 1.5269, "num_input_tokens_seen": 9515824, "step": 152 }, { "epoch": 0.5058236272878536, "loss": 1.810450792312622, "loss_ce": 0.0008806025725789368, "loss_iou": 0.6015625, "loss_num": 0.1220703125, "loss_xval": 1.8125, "num_input_tokens_seen": 9515824, "step": 152 }, { "epoch": 0.5091514143094842, "grad_norm": 27.921884536743164, "learning_rate": 5e-06, "loss": 1.6502, "num_input_tokens_seen": 9579596, "step": 153 }, { "epoch": 0.5091514143094842, "loss": 1.56037175655365, "loss_ce": 0.004219432361423969, "loss_iou": 0.48046875, "loss_num": 0.11865234375, "loss_xval": 1.5546875, "num_input_tokens_seen": 9579596, "step": 153 }, { "epoch": 0.5124792013311148, "grad_norm": 23.917827606201172, "learning_rate": 5e-06, "loss": 1.5475, "num_input_tokens_seen": 9643136, "step": 154 }, { "epoch": 0.5124792013311148, "loss": 1.607767105102539, "loss_ce": 0.004251522943377495, "loss_iou": 0.51953125, "loss_num": 0.11328125, "loss_xval": 1.6015625, "num_input_tokens_seen": 9643136, "step": 154 }, { "epoch": 0.5158069883527454, "grad_norm": 27.250625610351562, "learning_rate": 5e-06, "loss": 1.513, "num_input_tokens_seen": 9705980, "step": 155 }, { "epoch": 0.5158069883527454, "loss": 1.3487316370010376, "loss_ce": 0.01621215045452118, "loss_iou": 0.38671875, "loss_num": 0.11279296875, "loss_xval": 1.3359375, "num_input_tokens_seen": 9705980, "step": 155 }, { "epoch": 0.519134775374376, "grad_norm": 179.79345703125, "learning_rate": 5e-06, "loss": 1.3535, "num_input_tokens_seen": 9769204, "step": 156 }, { "epoch": 0.519134775374376, "loss": 1.1300077438354492, "loss_ce": 0.01623823679983616, "loss_iou": 0.3046875, "loss_num": 0.1005859375, "loss_xval": 1.1171875, "num_input_tokens_seen": 9769204, "step": 156 }, { "epoch": 0.5224625623960066, "grad_norm": 11.58056354522705, "learning_rate": 5e-06, "loss": 1.4311, "num_input_tokens_seen": 9832704, "step": 157 }, { "epoch": 0.5224625623960066, "loss": 1.3317975997924805, "loss_ce": 0.003184268716722727, "loss_iou": 0.3828125, "loss_num": 0.11279296875, "loss_xval": 1.328125, "num_input_tokens_seen": 9832704, "step": 157 }, { "epoch": 0.5257903494176372, "grad_norm": 21.892366409301758, "learning_rate": 5e-06, "loss": 1.5207, "num_input_tokens_seen": 9894424, "step": 158 }, { "epoch": 0.5257903494176372, "loss": 1.5312166213989258, "loss_ce": 0.010220491327345371, "loss_iou": 0.4375, "loss_num": 0.1298828125, "loss_xval": 1.5234375, "num_input_tokens_seen": 9894424, "step": 158 }, { "epoch": 0.5291181364392679, "grad_norm": 25.840219497680664, "learning_rate": 5e-06, "loss": 1.4383, "num_input_tokens_seen": 9958680, "step": 159 }, { "epoch": 0.5291181364392679, "loss": 1.3586366176605225, "loss_ce": 0.015130765736103058, "loss_iou": 0.384765625, "loss_num": 0.11474609375, "loss_xval": 1.34375, "num_input_tokens_seen": 9958680, "step": 159 }, { "epoch": 0.5324459234608985, "grad_norm": 14.081452369689941, "learning_rate": 5e-06, "loss": 1.4915, "num_input_tokens_seen": 10023284, "step": 160 }, { "epoch": 0.5324459234608985, "loss": 1.5290977954864502, "loss_ce": 0.024214986711740494, "loss_iou": 0.47265625, "loss_num": 0.1123046875, "loss_xval": 1.5078125, "num_input_tokens_seen": 10023284, "step": 160 }, { "epoch": 0.5357737104825291, "grad_norm": 18.082963943481445, "learning_rate": 5e-06, "loss": 1.2444, "num_input_tokens_seen": 10085896, "step": 161 }, { "epoch": 0.5357737104825291, "loss": 1.108798623085022, "loss_ce": 0.0035740805324167013, "loss_iou": 0.375, "loss_num": 0.0712890625, "loss_xval": 1.1015625, "num_input_tokens_seen": 10085896, "step": 161 }, { "epoch": 0.5391014975041597, "grad_norm": 16.789939880371094, "learning_rate": 5e-06, "loss": 1.4746, "num_input_tokens_seen": 10150772, "step": 162 }, { "epoch": 0.5391014975041597, "loss": 1.669636845588684, "loss_ce": 0.010457233525812626, "loss_iou": 0.5703125, "loss_num": 0.10302734375, "loss_xval": 1.65625, "num_input_tokens_seen": 10150772, "step": 162 }, { "epoch": 0.5424292845257903, "grad_norm": 33.57561111450195, "learning_rate": 5e-06, "loss": 1.3151, "num_input_tokens_seen": 10214456, "step": 163 }, { "epoch": 0.5424292845257903, "loss": 1.375217080116272, "loss_ce": 0.007053041830658913, "loss_iou": 0.4765625, "loss_num": 0.0830078125, "loss_xval": 1.3671875, "num_input_tokens_seen": 10214456, "step": 163 }, { "epoch": 0.5457570715474209, "grad_norm": 15.636104583740234, "learning_rate": 5e-06, "loss": 1.4209, "num_input_tokens_seen": 10277340, "step": 164 }, { "epoch": 0.5457570715474209, "loss": 1.39033043384552, "loss_ce": 0.002696146722882986, "loss_iou": 0.404296875, "loss_num": 0.1162109375, "loss_xval": 1.390625, "num_input_tokens_seen": 10277340, "step": 164 }, { "epoch": 0.5490848585690515, "grad_norm": 13.9757661819458, "learning_rate": 5e-06, "loss": 1.275, "num_input_tokens_seen": 10340440, "step": 165 }, { "epoch": 0.5490848585690515, "loss": 1.290142297744751, "loss_ce": 0.003521149745211005, "loss_iou": 0.4609375, "loss_num": 0.0732421875, "loss_xval": 1.2890625, "num_input_tokens_seen": 10340440, "step": 165 }, { "epoch": 0.5524126455906821, "grad_norm": 28.90441131591797, "learning_rate": 5e-06, "loss": 1.2476, "num_input_tokens_seen": 10404276, "step": 166 }, { "epoch": 0.5524126455906821, "loss": 0.9718602895736694, "loss_ce": 0.003720683278515935, "loss_iou": 0.2578125, "loss_num": 0.0908203125, "loss_xval": 0.96875, "num_input_tokens_seen": 10404276, "step": 166 }, { "epoch": 0.5557404326123128, "grad_norm": 16.72624397277832, "learning_rate": 5e-06, "loss": 1.2823, "num_input_tokens_seen": 10466192, "step": 167 }, { "epoch": 0.5557404326123128, "loss": 1.4123389720916748, "loss_ce": 0.005112458020448685, "loss_iou": 0.470703125, "loss_num": 0.09375, "loss_xval": 1.40625, "num_input_tokens_seen": 10466192, "step": 167 }, { "epoch": 0.5590682196339434, "grad_norm": 15.205171585083008, "learning_rate": 5e-06, "loss": 1.3604, "num_input_tokens_seen": 10527564, "step": 168 }, { "epoch": 0.5590682196339434, "loss": 1.1021215915679932, "loss_ce": 0.0025123017840087414, "loss_iou": 0.232421875, "loss_num": 0.126953125, "loss_xval": 1.1015625, "num_input_tokens_seen": 10527564, "step": 168 }, { "epoch": 0.562396006655574, "grad_norm": 79.04884338378906, "learning_rate": 5e-06, "loss": 1.5569, "num_input_tokens_seen": 10590812, "step": 169 }, { "epoch": 0.562396006655574, "loss": 1.5157134532928467, "loss_ce": 0.0132721196860075, "loss_iou": 0.50390625, "loss_num": 0.09912109375, "loss_xval": 1.5, "num_input_tokens_seen": 10590812, "step": 169 }, { "epoch": 0.5657237936772047, "grad_norm": 81.16964721679688, "learning_rate": 5e-06, "loss": 1.3635, "num_input_tokens_seen": 10653136, "step": 170 }, { "epoch": 0.5657237936772047, "loss": 1.3099322319030762, "loss_ce": 0.008662772364914417, "loss_iou": 0.42578125, "loss_num": 0.08935546875, "loss_xval": 1.3046875, "num_input_tokens_seen": 10653136, "step": 170 }, { "epoch": 0.5690515806988353, "grad_norm": 12.858562469482422, "learning_rate": 5e-06, "loss": 1.2784, "num_input_tokens_seen": 10717036, "step": 171 }, { "epoch": 0.5690515806988353, "loss": 1.3266587257385254, "loss_ce": 0.009275856427848339, "loss_iou": 0.4609375, "loss_num": 0.0791015625, "loss_xval": 1.3203125, "num_input_tokens_seen": 10717036, "step": 171 }, { "epoch": 0.5723793677204659, "grad_norm": 20.32819366455078, "learning_rate": 5e-06, "loss": 1.4586, "num_input_tokens_seen": 10780108, "step": 172 }, { "epoch": 0.5723793677204659, "loss": 1.5075498819351196, "loss_ce": 0.012920939363539219, "loss_iou": 0.435546875, "loss_num": 0.125, "loss_xval": 1.4921875, "num_input_tokens_seen": 10780108, "step": 172 }, { "epoch": 0.5757071547420965, "grad_norm": 14.381611824035645, "learning_rate": 5e-06, "loss": 1.5289, "num_input_tokens_seen": 10842868, "step": 173 }, { "epoch": 0.5757071547420965, "loss": 1.6210050582885742, "loss_ce": 0.01309479121118784, "loss_iou": 0.5390625, "loss_num": 0.10498046875, "loss_xval": 1.609375, "num_input_tokens_seen": 10842868, "step": 173 }, { "epoch": 0.5790349417637272, "grad_norm": 34.5955696105957, "learning_rate": 5e-06, "loss": 1.5723, "num_input_tokens_seen": 10907284, "step": 174 }, { "epoch": 0.5790349417637272, "loss": 1.7221667766571045, "loss_ce": 0.005125775933265686, "loss_iou": 0.5234375, "loss_num": 0.134765625, "loss_xval": 1.71875, "num_input_tokens_seen": 10907284, "step": 174 }, { "epoch": 0.5823627287853578, "grad_norm": 91.00212860107422, "learning_rate": 5e-06, "loss": 1.3798, "num_input_tokens_seen": 10969812, "step": 175 }, { "epoch": 0.5823627287853578, "loss": 1.3366895914077759, "loss_ce": 0.0041700671426951885, "loss_iou": 0.349609375, "loss_num": 0.126953125, "loss_xval": 1.3359375, "num_input_tokens_seen": 10969812, "step": 175 }, { "epoch": 0.5856905158069884, "grad_norm": 13.480135917663574, "learning_rate": 5e-06, "loss": 1.28, "num_input_tokens_seen": 11030552, "step": 176 }, { "epoch": 0.5856905158069884, "loss": 1.2505651712417603, "loss_ce": 0.012039746157824993, "loss_iou": 0.341796875, "loss_num": 0.1103515625, "loss_xval": 1.2421875, "num_input_tokens_seen": 11030552, "step": 176 }, { "epoch": 0.589018302828619, "grad_norm": 19.45563316345215, "learning_rate": 5e-06, "loss": 1.6962, "num_input_tokens_seen": 11095156, "step": 177 }, { "epoch": 0.589018302828619, "loss": 1.8971412181854248, "loss_ce": 0.012375564314424992, "loss_iou": 0.6328125, "loss_num": 0.12353515625, "loss_xval": 1.8828125, "num_input_tokens_seen": 11095156, "step": 177 }, { "epoch": 0.5923460898502496, "grad_norm": 24.35399627685547, "learning_rate": 5e-06, "loss": 1.4179, "num_input_tokens_seen": 11158924, "step": 178 }, { "epoch": 0.5923460898502496, "loss": 1.2149405479431152, "loss_ce": 0.00986242014914751, "loss_iou": 0.41015625, "loss_num": 0.0771484375, "loss_xval": 1.203125, "num_input_tokens_seen": 11158924, "step": 178 }, { "epoch": 0.5956738768718802, "grad_norm": 24.791669845581055, "learning_rate": 5e-06, "loss": 1.4486, "num_input_tokens_seen": 11222224, "step": 179 }, { "epoch": 0.5956738768718802, "loss": 1.3925793170928955, "loss_ce": 0.0029308954253792763, "loss_iou": 0.486328125, "loss_num": 0.08349609375, "loss_xval": 1.390625, "num_input_tokens_seen": 11222224, "step": 179 }, { "epoch": 0.5990016638935108, "grad_norm": 8.845673561096191, "learning_rate": 5e-06, "loss": 1.233, "num_input_tokens_seen": 11283256, "step": 180 }, { "epoch": 0.5990016638935108, "loss": 1.2640702724456787, "loss_ce": 0.0003983813803642988, "loss_iou": 0.3671875, "loss_num": 0.10546875, "loss_xval": 1.265625, "num_input_tokens_seen": 11283256, "step": 180 }, { "epoch": 0.6023294509151415, "grad_norm": 18.830984115600586, "learning_rate": 5e-06, "loss": 1.3521, "num_input_tokens_seen": 11344564, "step": 181 }, { "epoch": 0.6023294509151415, "loss": 1.4359557628631592, "loss_ce": 0.0023620363790541887, "loss_iou": 0.439453125, "loss_num": 0.11083984375, "loss_xval": 1.4375, "num_input_tokens_seen": 11344564, "step": 181 }, { "epoch": 0.6056572379367721, "grad_norm": 21.3240966796875, "learning_rate": 5e-06, "loss": 1.1265, "num_input_tokens_seen": 11407232, "step": 182 }, { "epoch": 0.6056572379367721, "loss": 0.8678808212280273, "loss_ce": 0.005820285528898239, "loss_iou": 0.224609375, "loss_num": 0.0830078125, "loss_xval": 0.86328125, "num_input_tokens_seen": 11407232, "step": 182 }, { "epoch": 0.6089850249584027, "grad_norm": 19.2507266998291, "learning_rate": 5e-06, "loss": 1.2369, "num_input_tokens_seen": 11469156, "step": 183 }, { "epoch": 0.6089850249584027, "loss": 1.3422547578811646, "loss_ce": 0.0033875710796564817, "loss_iou": 0.4296875, "loss_num": 0.095703125, "loss_xval": 1.3359375, "num_input_tokens_seen": 11469156, "step": 183 }, { "epoch": 0.6123128119800333, "grad_norm": 8.983832359313965, "learning_rate": 5e-06, "loss": 1.3986, "num_input_tokens_seen": 11531644, "step": 184 }, { "epoch": 0.6123128119800333, "loss": 1.4675750732421875, "loss_ce": 0.014205848798155785, "loss_iou": 0.494140625, "loss_num": 0.0927734375, "loss_xval": 1.453125, "num_input_tokens_seen": 11531644, "step": 184 }, { "epoch": 0.6156405990016639, "grad_norm": 18.907955169677734, "learning_rate": 5e-06, "loss": 1.1885, "num_input_tokens_seen": 11594468, "step": 185 }, { "epoch": 0.6156405990016639, "loss": 1.3133213520050049, "loss_ce": 0.009122060611844063, "loss_iou": 0.37109375, "loss_num": 0.1123046875, "loss_xval": 1.3046875, "num_input_tokens_seen": 11594468, "step": 185 }, { "epoch": 0.6189683860232945, "grad_norm": 17.61789321899414, "learning_rate": 5e-06, "loss": 1.0064, "num_input_tokens_seen": 11653668, "step": 186 }, { "epoch": 0.6189683860232945, "loss": 0.8614457845687866, "loss_ce": 0.0006059531588107347, "loss_iou": 0.1240234375, "loss_num": 0.12255859375, "loss_xval": 0.859375, "num_input_tokens_seen": 11653668, "step": 186 }, { "epoch": 0.6222961730449251, "grad_norm": 24.141847610473633, "learning_rate": 5e-06, "loss": 1.1249, "num_input_tokens_seen": 11716696, "step": 187 }, { "epoch": 0.6222961730449251, "loss": 1.3346961736679077, "loss_ce": 0.005594606511294842, "loss_iou": 0.443359375, "loss_num": 0.08837890625, "loss_xval": 1.328125, "num_input_tokens_seen": 11716696, "step": 187 }, { "epoch": 0.6256239600665557, "grad_norm": 20.612201690673828, "learning_rate": 5e-06, "loss": 1.2531, "num_input_tokens_seen": 11776936, "step": 188 }, { "epoch": 0.6256239600665557, "loss": 1.2287157773971558, "loss_ce": 0.03755362331867218, "loss_iou": 0.3359375, "loss_num": 0.10400390625, "loss_xval": 1.1875, "num_input_tokens_seen": 11776936, "step": 188 }, { "epoch": 0.6289517470881864, "grad_norm": 12.621916770935059, "learning_rate": 5e-06, "loss": 1.3368, "num_input_tokens_seen": 11838900, "step": 189 }, { "epoch": 0.6289517470881864, "loss": 1.7955483198165894, "loss_ce": 0.01527489349246025, "loss_iou": 0.58984375, "loss_num": 0.11962890625, "loss_xval": 1.78125, "num_input_tokens_seen": 11838900, "step": 189 }, { "epoch": 0.632279534109817, "grad_norm": 78.11238861083984, "learning_rate": 5e-06, "loss": 1.451, "num_input_tokens_seen": 11902172, "step": 190 }, { "epoch": 0.632279534109817, "loss": 1.4000221490859985, "loss_ce": 0.03283461928367615, "loss_iou": 0.44140625, "loss_num": 0.09716796875, "loss_xval": 1.3671875, "num_input_tokens_seen": 11902172, "step": 190 }, { "epoch": 0.6356073211314476, "grad_norm": 6.907383918762207, "learning_rate": 5e-06, "loss": 1.1959, "num_input_tokens_seen": 11964228, "step": 191 }, { "epoch": 0.6356073211314476, "loss": 1.2943251132965088, "loss_ce": 0.005262609571218491, "loss_iou": 0.427734375, "loss_num": 0.0869140625, "loss_xval": 1.2890625, "num_input_tokens_seen": 11964228, "step": 191 }, { "epoch": 0.6389351081530782, "grad_norm": 22.353961944580078, "learning_rate": 5e-06, "loss": 1.159, "num_input_tokens_seen": 12026860, "step": 192 }, { "epoch": 0.6389351081530782, "loss": 0.8828039169311523, "loss_ce": 0.0012121323961764574, "loss_iou": 0.203125, "loss_num": 0.09521484375, "loss_xval": 0.8828125, "num_input_tokens_seen": 12026860, "step": 192 }, { "epoch": 0.6422628951747088, "grad_norm": 21.18896484375, "learning_rate": 5e-06, "loss": 1.1548, "num_input_tokens_seen": 12088616, "step": 193 }, { "epoch": 0.6422628951747088, "loss": 1.258060336112976, "loss_ce": 0.006595509592443705, "loss_iou": 0.27734375, "loss_num": 0.138671875, "loss_xval": 1.25, "num_input_tokens_seen": 12088616, "step": 193 }, { "epoch": 0.6455906821963394, "grad_norm": 14.17736530303955, "learning_rate": 5e-06, "loss": 1.3459, "num_input_tokens_seen": 12150744, "step": 194 }, { "epoch": 0.6455906821963394, "loss": 1.3711755275726318, "loss_ce": 0.0010583469411358237, "loss_iou": 0.3671875, "loss_num": 0.126953125, "loss_xval": 1.3671875, "num_input_tokens_seen": 12150744, "step": 194 }, { "epoch": 0.64891846921797, "grad_norm": 22.946456909179688, "learning_rate": 5e-06, "loss": 1.3367, "num_input_tokens_seen": 12214016, "step": 195 }, { "epoch": 0.64891846921797, "loss": 1.4311788082122803, "loss_ce": 0.012233471497893333, "loss_iou": 0.48828125, "loss_num": 0.0888671875, "loss_xval": 1.421875, "num_input_tokens_seen": 12214016, "step": 195 }, { "epoch": 0.6522462562396006, "grad_norm": 16.520225524902344, "learning_rate": 5e-06, "loss": 1.3171, "num_input_tokens_seen": 12276364, "step": 196 }, { "epoch": 0.6522462562396006, "loss": 0.8542745113372803, "loss_ce": 0.008327257819473743, "loss_iou": 0.21484375, "loss_num": 0.08349609375, "loss_xval": 0.84765625, "num_input_tokens_seen": 12276364, "step": 196 }, { "epoch": 0.6555740432612313, "grad_norm": 15.667872428894043, "learning_rate": 5e-06, "loss": 1.1668, "num_input_tokens_seen": 12337828, "step": 197 }, { "epoch": 0.6555740432612313, "loss": 1.2531006336212158, "loss_ce": 0.0013917124597355723, "loss_iou": 0.384765625, "loss_num": 0.09619140625, "loss_xval": 1.25, "num_input_tokens_seen": 12337828, "step": 197 }, { "epoch": 0.6589018302828619, "grad_norm": 26.073301315307617, "learning_rate": 5e-06, "loss": 1.4182, "num_input_tokens_seen": 12402048, "step": 198 }, { "epoch": 0.6589018302828619, "loss": 1.407191514968872, "loss_ce": 0.003871295368298888, "loss_iou": 0.404296875, "loss_num": 0.119140625, "loss_xval": 1.40625, "num_input_tokens_seen": 12402048, "step": 198 }, { "epoch": 0.6622296173044925, "grad_norm": 11.998749732971191, "learning_rate": 5e-06, "loss": 1.1549, "num_input_tokens_seen": 12464836, "step": 199 }, { "epoch": 0.6622296173044925, "loss": 1.060854196548462, "loss_ce": 0.0007956493645906448, "loss_iou": 0.3046875, "loss_num": 0.08935546875, "loss_xval": 1.0625, "num_input_tokens_seen": 12464836, "step": 199 }, { "epoch": 0.6655574043261231, "grad_norm": 18.047168731689453, "learning_rate": 5e-06, "loss": 1.1962, "num_input_tokens_seen": 12528168, "step": 200 }, { "epoch": 0.6655574043261231, "loss": 0.9978852272033691, "loss_ce": 0.0015473493840545416, "loss_iou": 0.31640625, "loss_num": 0.07275390625, "loss_xval": 0.99609375, "num_input_tokens_seen": 12528168, "step": 200 }, { "epoch": 0.6688851913477537, "grad_norm": 18.45530128479004, "learning_rate": 5e-06, "loss": 0.9877, "num_input_tokens_seen": 12589800, "step": 201 }, { "epoch": 0.6688851913477537, "loss": 1.1087231636047363, "loss_ce": 0.006672344170510769, "loss_iou": 0.326171875, "loss_num": 0.08984375, "loss_xval": 1.1015625, "num_input_tokens_seen": 12589800, "step": 201 }, { "epoch": 0.6722129783693843, "grad_norm": 10.576942443847656, "learning_rate": 5e-06, "loss": 1.3918, "num_input_tokens_seen": 12652800, "step": 202 }, { "epoch": 0.6722129783693843, "loss": 1.7812892198562622, "loss_ce": 0.00541029404848814, "loss_iou": 0.578125, "loss_num": 0.123046875, "loss_xval": 1.7734375, "num_input_tokens_seen": 12652800, "step": 202 }, { "epoch": 0.6755407653910149, "grad_norm": 43.13065719604492, "learning_rate": 5e-06, "loss": 0.9786, "num_input_tokens_seen": 12711628, "step": 203 }, { "epoch": 0.6755407653910149, "loss": 0.8935337662696838, "loss_ce": 0.006815013010054827, "loss_iou": 0.1953125, "loss_num": 0.09912109375, "loss_xval": 0.88671875, "num_input_tokens_seen": 12711628, "step": 203 }, { "epoch": 0.6788685524126455, "grad_norm": 12.505699157714844, "learning_rate": 5e-06, "loss": 1.2375, "num_input_tokens_seen": 12774344, "step": 204 }, { "epoch": 0.6788685524126455, "loss": 1.6092363595962524, "loss_ce": 0.004744212608784437, "loss_iou": 0.5078125, "loss_num": 0.11767578125, "loss_xval": 1.6015625, "num_input_tokens_seen": 12774344, "step": 204 }, { "epoch": 0.6821963394342762, "grad_norm": 11.248764991760254, "learning_rate": 5e-06, "loss": 1.5125, "num_input_tokens_seen": 12837452, "step": 205 }, { "epoch": 0.6821963394342762, "loss": 1.5051581859588623, "loss_ce": 0.006622936576604843, "loss_iou": 0.431640625, "loss_num": 0.126953125, "loss_xval": 1.5, "num_input_tokens_seen": 12837452, "step": 205 }, { "epoch": 0.6855241264559068, "grad_norm": 21.031944274902344, "learning_rate": 5e-06, "loss": 1.3594, "num_input_tokens_seen": 12900544, "step": 206 }, { "epoch": 0.6855241264559068, "loss": 1.1120269298553467, "loss_ce": 0.005093401297926903, "loss_iou": 0.333984375, "loss_num": 0.087890625, "loss_xval": 1.109375, "num_input_tokens_seen": 12900544, "step": 206 }, { "epoch": 0.6888519134775375, "grad_norm": 11.994505882263184, "learning_rate": 5e-06, "loss": 1.1096, "num_input_tokens_seen": 12963480, "step": 207 }, { "epoch": 0.6888519134775375, "loss": 1.0814727544784546, "loss_ce": 0.004080175422132015, "loss_iou": 0.3671875, "loss_num": 0.068359375, "loss_xval": 1.078125, "num_input_tokens_seen": 12963480, "step": 207 }, { "epoch": 0.6921797004991681, "grad_norm": 33.848323822021484, "learning_rate": 5e-06, "loss": 1.2748, "num_input_tokens_seen": 13025468, "step": 208 }, { "epoch": 0.6921797004991681, "loss": 1.216538906097412, "loss_ce": 0.0031599453650414944, "loss_iou": 0.365234375, "loss_num": 0.0966796875, "loss_xval": 1.2109375, "num_input_tokens_seen": 13025468, "step": 208 }, { "epoch": 0.6955074875207987, "grad_norm": 19.078296661376953, "learning_rate": 5e-06, "loss": 1.1865, "num_input_tokens_seen": 13088660, "step": 209 }, { "epoch": 0.6955074875207987, "loss": 1.0454603433609009, "loss_ce": 0.0010267498437315226, "loss_iou": 0.3046875, "loss_num": 0.0869140625, "loss_xval": 1.046875, "num_input_tokens_seen": 13088660, "step": 209 }, { "epoch": 0.6988352745424293, "grad_norm": 20.048908233642578, "learning_rate": 5e-06, "loss": 1.3775, "num_input_tokens_seen": 13152080, "step": 210 }, { "epoch": 0.6988352745424293, "loss": 1.3916330337524414, "loss_ce": 0.0073556555435061455, "loss_iou": 0.453125, "loss_num": 0.095703125, "loss_xval": 1.3828125, "num_input_tokens_seen": 13152080, "step": 210 }, { "epoch": 0.7021630615640599, "grad_norm": 11.95067024230957, "learning_rate": 5e-06, "loss": 1.2277, "num_input_tokens_seen": 13215476, "step": 211 }, { "epoch": 0.7021630615640599, "loss": 1.3213778734207153, "loss_ce": 0.002530196448788047, "loss_iou": 0.44140625, "loss_num": 0.0869140625, "loss_xval": 1.3203125, "num_input_tokens_seen": 13215476, "step": 211 }, { "epoch": 0.7054908485856906, "grad_norm": 18.306243896484375, "learning_rate": 5e-06, "loss": 0.9338, "num_input_tokens_seen": 13276292, "step": 212 }, { "epoch": 0.7054908485856906, "loss": 1.046345591545105, "loss_ce": 0.01021280512213707, "loss_iou": 0.30078125, "loss_num": 0.0869140625, "loss_xval": 1.0390625, "num_input_tokens_seen": 13276292, "step": 212 }, { "epoch": 0.7088186356073212, "grad_norm": 12.163400650024414, "learning_rate": 5e-06, "loss": 1.5527, "num_input_tokens_seen": 13340984, "step": 213 }, { "epoch": 0.7088186356073212, "loss": 1.7176798582077026, "loss_ce": 0.002836148487403989, "loss_iou": 0.5625, "loss_num": 0.11767578125, "loss_xval": 1.71875, "num_input_tokens_seen": 13340984, "step": 213 }, { "epoch": 0.7121464226289518, "grad_norm": 15.896777153015137, "learning_rate": 5e-06, "loss": 1.1224, "num_input_tokens_seen": 13404176, "step": 214 }, { "epoch": 0.7121464226289518, "loss": 1.0619571208953857, "loss_ce": 0.0033634251449257135, "loss_iou": 0.32421875, "loss_num": 0.08251953125, "loss_xval": 1.0625, "num_input_tokens_seen": 13404176, "step": 214 }, { "epoch": 0.7154742096505824, "grad_norm": 14.622868537902832, "learning_rate": 5e-06, "loss": 1.0171, "num_input_tokens_seen": 13466404, "step": 215 }, { "epoch": 0.7154742096505824, "loss": 1.2054071426391602, "loss_ce": 0.004235264845192432, "loss_iou": 0.412109375, "loss_num": 0.0751953125, "loss_xval": 1.203125, "num_input_tokens_seen": 13466404, "step": 215 }, { "epoch": 0.718801996672213, "grad_norm": 15.324631690979004, "learning_rate": 5e-06, "loss": 1.3024, "num_input_tokens_seen": 13530012, "step": 216 }, { "epoch": 0.718801996672213, "loss": 1.4247881174087524, "loss_ce": 0.03733690083026886, "loss_iou": 0.435546875, "loss_num": 0.103515625, "loss_xval": 1.390625, "num_input_tokens_seen": 13530012, "step": 216 }, { "epoch": 0.7221297836938436, "grad_norm": 7.520216464996338, "learning_rate": 5e-06, "loss": 1.247, "num_input_tokens_seen": 13592972, "step": 217 }, { "epoch": 0.7221297836938436, "loss": 1.5225374698638916, "loss_ce": 0.002029675990343094, "loss_iou": 0.494140625, "loss_num": 0.10595703125, "loss_xval": 1.5234375, "num_input_tokens_seen": 13592972, "step": 217 }, { "epoch": 0.7254575707154742, "grad_norm": 16.533926010131836, "learning_rate": 5e-06, "loss": 1.406, "num_input_tokens_seen": 13655928, "step": 218 }, { "epoch": 0.7254575707154742, "loss": 1.5076565742492676, "loss_ce": 0.010097987949848175, "loss_iou": 0.4921875, "loss_num": 0.1025390625, "loss_xval": 1.5, "num_input_tokens_seen": 13655928, "step": 218 }, { "epoch": 0.7287853577371048, "grad_norm": 14.04409122467041, "learning_rate": 5e-06, "loss": 1.1806, "num_input_tokens_seen": 13718240, "step": 219 }, { "epoch": 0.7287853577371048, "loss": 1.1807819604873657, "loss_ce": 0.0030475566163659096, "loss_iou": 0.341796875, "loss_num": 0.0986328125, "loss_xval": 1.1796875, "num_input_tokens_seen": 13718240, "step": 219 }, { "epoch": 0.7321131447587355, "grad_norm": 13.706603050231934, "learning_rate": 5e-06, "loss": 1.0003, "num_input_tokens_seen": 13780100, "step": 220 }, { "epoch": 0.7321131447587355, "loss": 1.0435665845870972, "loss_ce": 0.0040158554911613464, "loss_iou": 0.306640625, "loss_num": 0.0849609375, "loss_xval": 1.0390625, "num_input_tokens_seen": 13780100, "step": 220 }, { "epoch": 0.7354409317803661, "grad_norm": 15.417816162109375, "learning_rate": 5e-06, "loss": 1.282, "num_input_tokens_seen": 13844236, "step": 221 }, { "epoch": 0.7354409317803661, "loss": 1.389647364616394, "loss_ce": 0.0017078784294426441, "loss_iou": 0.44921875, "loss_num": 0.09765625, "loss_xval": 1.390625, "num_input_tokens_seen": 13844236, "step": 221 }, { "epoch": 0.7387687188019967, "grad_norm": 13.024497032165527, "learning_rate": 5e-06, "loss": 1.3279, "num_input_tokens_seen": 13908608, "step": 222 }, { "epoch": 0.7387687188019967, "loss": 1.0724756717681885, "loss_ce": 0.00704598193988204, "loss_iou": 0.330078125, "loss_num": 0.0810546875, "loss_xval": 1.0625, "num_input_tokens_seen": 13908608, "step": 222 }, { "epoch": 0.7420965058236273, "grad_norm": 14.204023361206055, "learning_rate": 5e-06, "loss": 1.2, "num_input_tokens_seen": 13970688, "step": 223 }, { "epoch": 0.7420965058236273, "loss": 1.312567949295044, "loss_ce": 0.005438992287963629, "loss_iou": 0.380859375, "loss_num": 0.10888671875, "loss_xval": 1.3046875, "num_input_tokens_seen": 13970688, "step": 223 }, { "epoch": 0.7454242928452579, "grad_norm": 17.40608787536621, "learning_rate": 5e-06, "loss": 1.0386, "num_input_tokens_seen": 14032508, "step": 224 }, { "epoch": 0.7454242928452579, "loss": 1.05086350440979, "loss_ce": 0.013998263515532017, "loss_iou": 0.28515625, "loss_num": 0.09375, "loss_xval": 1.0390625, "num_input_tokens_seen": 14032508, "step": 224 }, { "epoch": 0.7487520798668885, "grad_norm": 22.041357040405273, "learning_rate": 5e-06, "loss": 1.1354, "num_input_tokens_seen": 14095624, "step": 225 }, { "epoch": 0.7487520798668885, "loss": 1.32331383228302, "loss_ce": 0.003001346020027995, "loss_iou": 0.46875, "loss_num": 0.07666015625, "loss_xval": 1.3203125, "num_input_tokens_seen": 14095624, "step": 225 }, { "epoch": 0.7520798668885191, "grad_norm": 24.9224910736084, "learning_rate": 5e-06, "loss": 1.3588, "num_input_tokens_seen": 14158408, "step": 226 }, { "epoch": 0.7520798668885191, "loss": 1.5566089153289795, "loss_ce": 0.009978032670915127, "loss_iou": 0.4609375, "loss_num": 0.125, "loss_xval": 1.546875, "num_input_tokens_seen": 14158408, "step": 226 }, { "epoch": 0.7554076539101497, "grad_norm": 18.726009368896484, "learning_rate": 5e-06, "loss": 1.1006, "num_input_tokens_seen": 14219792, "step": 227 }, { "epoch": 0.7554076539101497, "loss": 1.0980417728424072, "loss_ce": 0.0020945887081325054, "loss_iou": 0.310546875, "loss_num": 0.0947265625, "loss_xval": 1.09375, "num_input_tokens_seen": 14219792, "step": 227 }, { "epoch": 0.7587354409317804, "grad_norm": 38.807525634765625, "learning_rate": 5e-06, "loss": 1.2295, "num_input_tokens_seen": 14283232, "step": 228 }, { "epoch": 0.7587354409317804, "loss": 1.1831730604171753, "loss_ce": 0.0003117123560514301, "loss_iou": 0.38671875, "loss_num": 0.08203125, "loss_xval": 1.1796875, "num_input_tokens_seen": 14283232, "step": 228 }, { "epoch": 0.762063227953411, "grad_norm": 30.236251831054688, "learning_rate": 5e-06, "loss": 1.2075, "num_input_tokens_seen": 14344760, "step": 229 }, { "epoch": 0.762063227953411, "loss": 1.403503656387329, "loss_ce": 0.01190214417874813, "loss_iou": 0.490234375, "loss_num": 0.08203125, "loss_xval": 1.390625, "num_input_tokens_seen": 14344760, "step": 229 }, { "epoch": 0.7653910149750416, "grad_norm": 16.025583267211914, "learning_rate": 5e-06, "loss": 1.2572, "num_input_tokens_seen": 14408156, "step": 230 }, { "epoch": 0.7653910149750416, "loss": 1.0829434394836426, "loss_ce": 0.0038419230841100216, "loss_iou": 0.29296875, "loss_num": 0.09912109375, "loss_xval": 1.078125, "num_input_tokens_seen": 14408156, "step": 230 }, { "epoch": 0.7687188019966722, "grad_norm": 22.021560668945312, "learning_rate": 5e-06, "loss": 1.2755, "num_input_tokens_seen": 14471584, "step": 231 }, { "epoch": 0.7687188019966722, "loss": 1.2591898441314697, "loss_ce": 0.0008891146862879395, "loss_iou": 0.4609375, "loss_num": 0.0673828125, "loss_xval": 1.2578125, "num_input_tokens_seen": 14471584, "step": 231 }, { "epoch": 0.7720465890183028, "grad_norm": 14.309478759765625, "learning_rate": 5e-06, "loss": 1.0887, "num_input_tokens_seen": 14533848, "step": 232 }, { "epoch": 0.7720465890183028, "loss": 0.9098241329193115, "loss_ce": 0.006015566643327475, "loss_iou": 0.267578125, "loss_num": 0.07373046875, "loss_xval": 0.90234375, "num_input_tokens_seen": 14533848, "step": 232 }, { "epoch": 0.7753743760399334, "grad_norm": 13.442574501037598, "learning_rate": 5e-06, "loss": 1.0359, "num_input_tokens_seen": 14595476, "step": 233 }, { "epoch": 0.7753743760399334, "loss": 0.9281876087188721, "loss_ce": 0.0036270441487431526, "loss_iou": 0.263671875, "loss_num": 0.0791015625, "loss_xval": 0.92578125, "num_input_tokens_seen": 14595476, "step": 233 }, { "epoch": 0.778702163061564, "grad_norm": 14.79719352722168, "learning_rate": 5e-06, "loss": 1.289, "num_input_tokens_seen": 14658144, "step": 234 }, { "epoch": 0.778702163061564, "loss": 1.5278695821762085, "loss_ce": 0.001990762073546648, "loss_iou": 0.462890625, "loss_num": 0.1201171875, "loss_xval": 1.5234375, "num_input_tokens_seen": 14658144, "step": 234 }, { "epoch": 0.7820299500831946, "grad_norm": 17.862680435180664, "learning_rate": 5e-06, "loss": 1.1643, "num_input_tokens_seen": 14720436, "step": 235 }, { "epoch": 0.7820299500831946, "loss": 1.2263193130493164, "loss_ce": 0.028077127411961555, "loss_iou": 0.376953125, "loss_num": 0.08837890625, "loss_xval": 1.1953125, "num_input_tokens_seen": 14720436, "step": 235 }, { "epoch": 0.7853577371048253, "grad_norm": 12.221160888671875, "learning_rate": 5e-06, "loss": 1.188, "num_input_tokens_seen": 14782692, "step": 236 }, { "epoch": 0.7853577371048253, "loss": 1.4104785919189453, "loss_ce": 0.0027637691237032413, "loss_iou": 0.47265625, "loss_num": 0.09228515625, "loss_xval": 1.40625, "num_input_tokens_seen": 14782692, "step": 236 }, { "epoch": 0.7886855241264559, "grad_norm": 21.184703826904297, "learning_rate": 5e-06, "loss": 1.0888, "num_input_tokens_seen": 14846708, "step": 237 }, { "epoch": 0.7886855241264559, "loss": 1.150669813156128, "loss_ce": 0.00565031124278903, "loss_iou": 0.3359375, "loss_num": 0.0947265625, "loss_xval": 1.1484375, "num_input_tokens_seen": 14846708, "step": 237 }, { "epoch": 0.7920133111480865, "grad_norm": 13.75352954864502, "learning_rate": 5e-06, "loss": 1.1357, "num_input_tokens_seen": 14910792, "step": 238 }, { "epoch": 0.7920133111480865, "loss": 1.0350043773651123, "loss_ce": 0.0015571790281683207, "loss_iou": 0.322265625, "loss_num": 0.078125, "loss_xval": 1.03125, "num_input_tokens_seen": 14910792, "step": 238 }, { "epoch": 0.7953410981697171, "grad_norm": 20.69968605041504, "learning_rate": 5e-06, "loss": 1.3736, "num_input_tokens_seen": 14973548, "step": 239 }, { "epoch": 0.7953410981697171, "loss": 1.4029014110565186, "loss_ce": 0.0122764203697443, "loss_iou": 0.51171875, "loss_num": 0.0732421875, "loss_xval": 1.390625, "num_input_tokens_seen": 14973548, "step": 239 }, { "epoch": 0.7986688851913477, "grad_norm": 14.282718658447266, "learning_rate": 5e-06, "loss": 1.1958, "num_input_tokens_seen": 15036312, "step": 240 }, { "epoch": 0.7986688851913477, "loss": 1.2044358253479004, "loss_ce": 0.008512871339917183, "loss_iou": 0.296875, "loss_num": 0.1201171875, "loss_xval": 1.1953125, "num_input_tokens_seen": 15036312, "step": 240 }, { "epoch": 0.8019966722129783, "grad_norm": 16.37322425842285, "learning_rate": 5e-06, "loss": 0.847, "num_input_tokens_seen": 15096548, "step": 241 }, { "epoch": 0.8019966722129783, "loss": 1.0973565578460693, "loss_ce": 0.004339014645665884, "loss_iou": 0.31640625, "loss_num": 0.09130859375, "loss_xval": 1.09375, "num_input_tokens_seen": 15096548, "step": 241 }, { "epoch": 0.8053244592346089, "grad_norm": 13.17944049835205, "learning_rate": 5e-06, "loss": 1.1795, "num_input_tokens_seen": 15159548, "step": 242 }, { "epoch": 0.8053244592346089, "loss": 1.1043052673339844, "loss_ce": 0.0012779628159478307, "loss_iou": 0.302734375, "loss_num": 0.09912109375, "loss_xval": 1.1015625, "num_input_tokens_seen": 15159548, "step": 242 }, { "epoch": 0.8086522462562395, "grad_norm": 13.032987594604492, "learning_rate": 5e-06, "loss": 1.3063, "num_input_tokens_seen": 15219756, "step": 243 }, { "epoch": 0.8086522462562395, "loss": 1.132462978363037, "loss_ce": 0.0020918657537549734, "loss_iou": 0.3828125, "loss_num": 0.07275390625, "loss_xval": 1.1328125, "num_input_tokens_seen": 15219756, "step": 243 }, { "epoch": 0.8119800332778702, "grad_norm": 22.91352081298828, "learning_rate": 5e-06, "loss": 1.1669, "num_input_tokens_seen": 15281888, "step": 244 }, { "epoch": 0.8119800332778702, "loss": 1.121889352798462, "loss_ce": 0.000795688945800066, "loss_iou": 0.34765625, "loss_num": 0.08544921875, "loss_xval": 1.125, "num_input_tokens_seen": 15281888, "step": 244 }, { "epoch": 0.8153078202995009, "grad_norm": 29.322757720947266, "learning_rate": 5e-06, "loss": 1.484, "num_input_tokens_seen": 15346236, "step": 245 }, { "epoch": 0.8153078202995009, "loss": 1.7189769744873047, "loss_ce": 0.0031566298566758633, "loss_iou": 0.609375, "loss_num": 0.09912109375, "loss_xval": 1.71875, "num_input_tokens_seen": 15346236, "step": 245 }, { "epoch": 0.8186356073211315, "grad_norm": 43.90084457397461, "learning_rate": 5e-06, "loss": 1.2322, "num_input_tokens_seen": 15408532, "step": 246 }, { "epoch": 0.8186356073211315, "loss": 1.0418851375579834, "loss_ce": 0.0003811809583567083, "loss_iou": 0.251953125, "loss_num": 0.107421875, "loss_xval": 1.0390625, "num_input_tokens_seen": 15408532, "step": 246 }, { "epoch": 0.8219633943427621, "grad_norm": 16.404064178466797, "learning_rate": 5e-06, "loss": 1.4442, "num_input_tokens_seen": 15471184, "step": 247 }, { "epoch": 0.8219633943427621, "loss": 1.5725810527801514, "loss_ce": 0.003245145082473755, "loss_iou": 0.54296875, "loss_num": 0.0966796875, "loss_xval": 1.5703125, "num_input_tokens_seen": 15471184, "step": 247 }, { "epoch": 0.8252911813643927, "grad_norm": 15.90378475189209, "learning_rate": 5e-06, "loss": 1.3978, "num_input_tokens_seen": 15533424, "step": 248 }, { "epoch": 0.8252911813643927, "loss": 1.4360511302947998, "loss_ce": 0.004288535099476576, "loss_iou": 0.439453125, "loss_num": 0.1103515625, "loss_xval": 1.4296875, "num_input_tokens_seen": 15533424, "step": 248 }, { "epoch": 0.8286189683860233, "grad_norm": 20.97991180419922, "learning_rate": 5e-06, "loss": 1.1423, "num_input_tokens_seen": 15596204, "step": 249 }, { "epoch": 0.8286189683860233, "loss": 1.2675387859344482, "loss_ce": 0.0028903260827064514, "loss_iou": 0.373046875, "loss_num": 0.103515625, "loss_xval": 1.265625, "num_input_tokens_seen": 15596204, "step": 249 }, { "epoch": 0.831946755407654, "grad_norm": 10.632160186767578, "learning_rate": 5e-06, "loss": 1.29, "num_input_tokens_seen": 15657564, "step": 250 }, { "epoch": 0.831946755407654, "eval_seeclick_CIoU": 0.1637101173400879, "eval_seeclick_GIoU": 0.19321135431528091, "eval_seeclick_IoU": 0.27142514288425446, "eval_seeclick_MAE_all": 0.17712896317243576, "eval_seeclick_MAE_h": 0.11952269077301025, "eval_seeclick_MAE_w": 0.1053374633193016, "eval_seeclick_MAE_x_boxes": 0.21721439808607101, "eval_seeclick_MAE_y_boxes": 0.1404191330075264, "eval_seeclick_NUM_probability": 0.999526172876358, "eval_seeclick_inside_bbox": 0.34062500298023224, "eval_seeclick_loss": 2.5791685581207275, "eval_seeclick_loss_ce": 0.05610966309905052, "eval_seeclick_loss_iou": 0.8470458984375, "eval_seeclick_loss_num": 0.1708526611328125, "eval_seeclick_loss_xval": 2.549560546875, "eval_seeclick_runtime": 61.2794, "eval_seeclick_samples_per_second": 0.767, "eval_seeclick_steps_per_second": 0.033, "num_input_tokens_seen": 15657564, "step": 250 }, { "epoch": 0.831946755407654, "eval_icons_CIoU": -0.14389386028051376, "eval_icons_GIoU": -0.0267653064802289, "eval_icons_IoU": 0.061875129118561745, "eval_icons_MAE_all": 0.22531522810459137, "eval_icons_MAE_h": 0.22444305568933487, "eval_icons_MAE_w": 0.18070007860660553, "eval_icons_MAE_x_boxes": 0.14902877807617188, "eval_icons_MAE_y_boxes": 0.14632483571767807, "eval_icons_NUM_probability": 0.9998290240764618, "eval_icons_inside_bbox": 0.09027777798473835, "eval_icons_loss": 3.1337761878967285, "eval_icons_loss_ce": 2.441077504045097e-05, "eval_icons_loss_iou": 1.01123046875, "eval_icons_loss_num": 0.225830078125, "eval_icons_loss_xval": 3.15234375, "eval_icons_runtime": 64.2404, "eval_icons_samples_per_second": 0.778, "eval_icons_steps_per_second": 0.031, "num_input_tokens_seen": 15657564, "step": 250 }, { "epoch": 0.831946755407654, "eval_screenspot_CIoU": 0.012024542937676111, "eval_screenspot_GIoU": 0.0480033370355765, "eval_screenspot_IoU": 0.1673232465982437, "eval_screenspot_MAE_all": 0.1892156501611074, "eval_screenspot_MAE_h": 0.12125971913337708, "eval_screenspot_MAE_w": 0.1696781317392985, "eval_screenspot_MAE_x_boxes": 0.25612760583559674, "eval_screenspot_MAE_y_boxes": 0.1249464675784111, "eval_screenspot_NUM_probability": 0.9998675386110941, "eval_screenspot_inside_bbox": 0.3312500019868215, "eval_screenspot_loss": 2.870220184326172, "eval_screenspot_loss_ce": 0.002282697862635056, "eval_screenspot_loss_iou": 0.9602864583333334, "eval_screenspot_loss_num": 0.19477335611979166, "eval_screenspot_loss_xval": 2.892578125, "eval_screenspot_runtime": 125.3181, "eval_screenspot_samples_per_second": 0.71, "eval_screenspot_steps_per_second": 0.024, "num_input_tokens_seen": 15657564, "step": 250 }, { "epoch": 0.831946755407654, "eval_compot_CIoU": -0.052139995619654655, "eval_compot_GIoU": 0.03255108371376991, "eval_compot_IoU": 0.11597498506307602, "eval_compot_MAE_all": 0.21709506213665009, "eval_compot_MAE_h": 0.12527992576360703, "eval_compot_MAE_w": 0.2657003700733185, "eval_compot_MAE_x_boxes": 0.16439608111977577, "eval_compot_MAE_y_boxes": 0.114028200507164, "eval_compot_NUM_probability": 0.9998819231987, "eval_compot_inside_bbox": 0.1927083358168602, "eval_compot_loss": 3.0493226051330566, "eval_compot_loss_ce": 0.0014912343467585742, "eval_compot_loss_iou": 0.980712890625, "eval_compot_loss_num": 0.227752685546875, "eval_compot_loss_xval": 3.1005859375, "eval_compot_runtime": 66.8909, "eval_compot_samples_per_second": 0.747, "eval_compot_steps_per_second": 0.03, "num_input_tokens_seen": 15657564, "step": 250 }, { "epoch": 0.831946755407654, "eval_custom_ui_MAE_all": 0.13057106733322144, "eval_custom_ui_MAE_x": 0.1494990736246109, "eval_custom_ui_MAE_y": 0.11164304614067078, "eval_custom_ui_NUM_probability": 0.9999012649059296, "eval_custom_ui_loss": 0.6442122459411621, "eval_custom_ui_loss_ce": 0.00029968630406074226, "eval_custom_ui_loss_num": 0.127471923828125, "eval_custom_ui_loss_xval": 0.6373291015625, "eval_custom_ui_runtime": 56.7883, "eval_custom_ui_samples_per_second": 0.88, "eval_custom_ui_steps_per_second": 0.035, "num_input_tokens_seen": 15657564, "step": 250 }, { "epoch": 0.831946755407654, "loss": 0.6348468065261841, "loss_ce": 0.0003253491304349154, "loss_iou": 0.0, "loss_num": 0.126953125, "loss_xval": 0.6328125, "num_input_tokens_seen": 15657564, "step": 250 }, { "epoch": 0.8352745424292846, "grad_norm": 21.572481155395508, "learning_rate": 5e-06, "loss": 1.0439, "num_input_tokens_seen": 15721140, "step": 251 }, { "epoch": 0.8352745424292846, "loss": 0.9871144890785217, "loss_ce": 0.001274666516110301, "loss_iou": 0.294921875, "loss_num": 0.07861328125, "loss_xval": 0.984375, "num_input_tokens_seen": 15721140, "step": 251 }, { "epoch": 0.8386023294509152, "grad_norm": 11.919601440429688, "learning_rate": 5e-06, "loss": 0.8642, "num_input_tokens_seen": 15782592, "step": 252 }, { "epoch": 0.8386023294509152, "loss": 0.7025572061538696, "loss_ce": 0.0001645814481889829, "loss_iou": 0.185546875, "loss_num": 0.06640625, "loss_xval": 0.703125, "num_input_tokens_seen": 15782592, "step": 252 }, { "epoch": 0.8419301164725458, "grad_norm": 13.507158279418945, "learning_rate": 5e-06, "loss": 1.3026, "num_input_tokens_seen": 15845496, "step": 253 }, { "epoch": 0.8419301164725458, "loss": 1.4116151332855225, "loss_ce": 0.0034119777847081423, "loss_iou": 0.466796875, "loss_num": 0.09521484375, "loss_xval": 1.40625, "num_input_tokens_seen": 15845496, "step": 253 }, { "epoch": 0.8452579034941764, "grad_norm": 12.370970726013184, "learning_rate": 5e-06, "loss": 1.0557, "num_input_tokens_seen": 15907580, "step": 254 }, { "epoch": 0.8452579034941764, "loss": 1.0885083675384521, "loss_ce": 0.0024488139897584915, "loss_iou": 0.32421875, "loss_num": 0.08740234375, "loss_xval": 1.0859375, "num_input_tokens_seen": 15907580, "step": 254 }, { "epoch": 0.848585690515807, "grad_norm": 13.69038200378418, "learning_rate": 5e-06, "loss": 1.1214, "num_input_tokens_seen": 15969616, "step": 255 }, { "epoch": 0.848585690515807, "loss": 1.225003719329834, "loss_ce": 0.010892418213188648, "loss_iou": 0.3828125, "loss_num": 0.08935546875, "loss_xval": 1.2109375, "num_input_tokens_seen": 15969616, "step": 255 }, { "epoch": 0.8519134775374376, "grad_norm": 17.91082763671875, "learning_rate": 5e-06, "loss": 1.0166, "num_input_tokens_seen": 16031068, "step": 256 }, { "epoch": 0.8519134775374376, "loss": 0.9940263032913208, "loss_ce": 0.00818653590977192, "loss_iou": 0.267578125, "loss_num": 0.08984375, "loss_xval": 0.984375, "num_input_tokens_seen": 16031068, "step": 256 }, { "epoch": 0.8552412645590682, "grad_norm": 18.975540161132812, "learning_rate": 5e-06, "loss": 1.415, "num_input_tokens_seen": 16094500, "step": 257 }, { "epoch": 0.8552412645590682, "loss": 1.3225477933883667, "loss_ce": 0.00858297199010849, "loss_iou": 0.3671875, "loss_num": 0.11572265625, "loss_xval": 1.3125, "num_input_tokens_seen": 16094500, "step": 257 }, { "epoch": 0.8585690515806988, "grad_norm": 10.849245071411133, "learning_rate": 5e-06, "loss": 1.0762, "num_input_tokens_seen": 16157080, "step": 258 }, { "epoch": 0.8585690515806988, "loss": 1.124495267868042, "loss_ce": 0.0009601035853847861, "loss_iou": 0.390625, "loss_num": 0.068359375, "loss_xval": 1.125, "num_input_tokens_seen": 16157080, "step": 258 }, { "epoch": 0.8618968386023295, "grad_norm": 12.286276817321777, "learning_rate": 5e-06, "loss": 1.1625, "num_input_tokens_seen": 16219816, "step": 259 }, { "epoch": 0.8618968386023295, "loss": 1.0627708435058594, "loss_ce": 0.0005150538054294884, "loss_iou": 0.33203125, "loss_num": 0.07958984375, "loss_xval": 1.0625, "num_input_tokens_seen": 16219816, "step": 259 }, { "epoch": 0.8652246256239601, "grad_norm": 18.6634521484375, "learning_rate": 5e-06, "loss": 1.097, "num_input_tokens_seen": 16282332, "step": 260 }, { "epoch": 0.8652246256239601, "loss": 1.189394474029541, "loss_ce": 0.05999988317489624, "loss_iou": 0.35546875, "loss_num": 0.083984375, "loss_xval": 1.1328125, "num_input_tokens_seen": 16282332, "step": 260 }, { "epoch": 0.8685524126455907, "grad_norm": 16.149057388305664, "learning_rate": 5e-06, "loss": 1.253, "num_input_tokens_seen": 16345872, "step": 261 }, { "epoch": 0.8685524126455907, "loss": 1.2571015357971191, "loss_ce": 0.005636734887957573, "loss_iou": 0.390625, "loss_num": 0.09375, "loss_xval": 1.25, "num_input_tokens_seen": 16345872, "step": 261 }, { "epoch": 0.8718801996672213, "grad_norm": 17.51819610595703, "learning_rate": 5e-06, "loss": 1.1602, "num_input_tokens_seen": 16409180, "step": 262 }, { "epoch": 0.8718801996672213, "loss": 1.1017225980758667, "loss_ce": 0.001136659411713481, "loss_iou": 0.322265625, "loss_num": 0.0908203125, "loss_xval": 1.1015625, "num_input_tokens_seen": 16409180, "step": 262 }, { "epoch": 0.8752079866888519, "grad_norm": 24.646146774291992, "learning_rate": 5e-06, "loss": 1.1859, "num_input_tokens_seen": 16472024, "step": 263 }, { "epoch": 0.8752079866888519, "loss": 1.1218383312225342, "loss_ce": 0.0007445079972967505, "loss_iou": 0.34765625, "loss_num": 0.08544921875, "loss_xval": 1.125, "num_input_tokens_seen": 16472024, "step": 263 }, { "epoch": 0.8785357737104825, "grad_norm": 13.253851890563965, "learning_rate": 5e-06, "loss": 1.2264, "num_input_tokens_seen": 16536084, "step": 264 }, { "epoch": 0.8785357737104825, "loss": 1.412590742111206, "loss_ce": 0.004387641325592995, "loss_iou": 0.474609375, "loss_num": 0.091796875, "loss_xval": 1.40625, "num_input_tokens_seen": 16536084, "step": 264 }, { "epoch": 0.8818635607321131, "grad_norm": 8.509352684020996, "learning_rate": 5e-06, "loss": 1.1404, "num_input_tokens_seen": 16598972, "step": 265 }, { "epoch": 0.8818635607321131, "loss": 1.3269490003585815, "loss_ce": 0.009077904745936394, "loss_iou": 0.42578125, "loss_num": 0.0927734375, "loss_xval": 1.3203125, "num_input_tokens_seen": 16598972, "step": 265 }, { "epoch": 0.8851913477537438, "grad_norm": 47.873111724853516, "learning_rate": 5e-06, "loss": 1.454, "num_input_tokens_seen": 16663840, "step": 266 }, { "epoch": 0.8851913477537438, "loss": 1.363907814025879, "loss_ce": 0.0040445649065077305, "loss_iou": 0.4375, "loss_num": 0.09619140625, "loss_xval": 1.359375, "num_input_tokens_seen": 16663840, "step": 266 }, { "epoch": 0.8885191347753744, "grad_norm": 19.438655853271484, "learning_rate": 5e-06, "loss": 0.9902, "num_input_tokens_seen": 16727384, "step": 267 }, { "epoch": 0.8885191347753744, "loss": 1.145904541015625, "loss_ce": 0.000884984212461859, "loss_iou": 0.353515625, "loss_num": 0.087890625, "loss_xval": 1.1484375, "num_input_tokens_seen": 16727384, "step": 267 }, { "epoch": 0.891846921797005, "grad_norm": 22.878734588623047, "learning_rate": 5e-06, "loss": 1.3198, "num_input_tokens_seen": 16790784, "step": 268 }, { "epoch": 0.891846921797005, "loss": 1.2260500192642212, "loss_ce": 0.0009523681947030127, "loss_iou": 0.408203125, "loss_num": 0.08154296875, "loss_xval": 1.2265625, "num_input_tokens_seen": 16790784, "step": 268 }, { "epoch": 0.8951747088186356, "grad_norm": 18.390544891357422, "learning_rate": 5e-06, "loss": 1.361, "num_input_tokens_seen": 16853672, "step": 269 }, { "epoch": 0.8951747088186356, "loss": 1.1613576412200928, "loss_ce": 0.007549000903964043, "loss_iou": 0.326171875, "loss_num": 0.10009765625, "loss_xval": 1.15625, "num_input_tokens_seen": 16853672, "step": 269 }, { "epoch": 0.8985024958402662, "grad_norm": 24.370708465576172, "learning_rate": 5e-06, "loss": 1.4294, "num_input_tokens_seen": 16916512, "step": 270 }, { "epoch": 0.8985024958402662, "loss": 1.3145873546600342, "loss_ce": 0.00111083232332021, "loss_iou": 0.423828125, "loss_num": 0.09326171875, "loss_xval": 1.3125, "num_input_tokens_seen": 16916512, "step": 270 }, { "epoch": 0.9018302828618968, "grad_norm": 20.53895378112793, "learning_rate": 5e-06, "loss": 1.0479, "num_input_tokens_seen": 16978928, "step": 271 }, { "epoch": 0.9018302828618968, "loss": 1.2197823524475098, "loss_ce": 0.00176473637111485, "loss_iou": 0.384765625, "loss_num": 0.09033203125, "loss_xval": 1.21875, "num_input_tokens_seen": 16978928, "step": 271 }, { "epoch": 0.9051580698835274, "grad_norm": 23.882923126220703, "learning_rate": 5e-06, "loss": 1.3089, "num_input_tokens_seen": 17042232, "step": 272 }, { "epoch": 0.9051580698835274, "loss": 1.3640576601028442, "loss_ce": 0.0002881159307435155, "loss_iou": 0.48046875, "loss_num": 0.080078125, "loss_xval": 1.3671875, "num_input_tokens_seen": 17042232, "step": 272 }, { "epoch": 0.908485856905158, "grad_norm": 25.272172927856445, "learning_rate": 5e-06, "loss": 1.1401, "num_input_tokens_seen": 17104348, "step": 273 }, { "epoch": 0.908485856905158, "loss": 1.1901004314422607, "loss_ce": 0.006506608799099922, "loss_iou": 0.3046875, "loss_num": 0.11474609375, "loss_xval": 1.1875, "num_input_tokens_seen": 17104348, "step": 273 }, { "epoch": 0.9118136439267887, "grad_norm": 23.974464416503906, "learning_rate": 5e-06, "loss": 1.1068, "num_input_tokens_seen": 17168028, "step": 274 }, { "epoch": 0.9118136439267887, "loss": 1.049910068511963, "loss_ce": 0.0274491049349308, "loss_iou": 0.275390625, "loss_num": 0.09423828125, "loss_xval": 1.0234375, "num_input_tokens_seen": 17168028, "step": 274 }, { "epoch": 0.9151414309484193, "grad_norm": 22.957887649536133, "learning_rate": 5e-06, "loss": 1.0775, "num_input_tokens_seen": 17231708, "step": 275 }, { "epoch": 0.9151414309484193, "loss": 1.0542798042297363, "loss_ce": 0.0054517509415745735, "loss_iou": 0.33203125, "loss_num": 0.07666015625, "loss_xval": 1.046875, "num_input_tokens_seen": 17231708, "step": 275 }, { "epoch": 0.9184692179700499, "grad_norm": 18.080631256103516, "learning_rate": 5e-06, "loss": 1.2168, "num_input_tokens_seen": 17294268, "step": 276 }, { "epoch": 0.9184692179700499, "loss": 1.314110279083252, "loss_ce": 0.008934599347412586, "loss_iou": 0.40234375, "loss_num": 0.10009765625, "loss_xval": 1.3046875, "num_input_tokens_seen": 17294268, "step": 276 }, { "epoch": 0.9217970049916805, "grad_norm": 8.779784202575684, "learning_rate": 5e-06, "loss": 0.749, "num_input_tokens_seen": 17357296, "step": 277 }, { "epoch": 0.9217970049916805, "loss": 0.7734503746032715, "loss_ce": 0.0009894431568682194, "loss_iou": 0.224609375, "loss_num": 0.06494140625, "loss_xval": 0.7734375, "num_input_tokens_seen": 17357296, "step": 277 }, { "epoch": 0.9251247920133111, "grad_norm": 15.270282745361328, "learning_rate": 5e-06, "loss": 0.8717, "num_input_tokens_seen": 17419472, "step": 278 }, { "epoch": 0.9251247920133111, "loss": 0.6954588890075684, "loss_ce": 0.0001463492662878707, "loss_iou": 0.197265625, "loss_num": 0.060302734375, "loss_xval": 0.6953125, "num_input_tokens_seen": 17419472, "step": 278 }, { "epoch": 0.9284525790349417, "grad_norm": 23.46548843383789, "learning_rate": 5e-06, "loss": 1.1007, "num_input_tokens_seen": 17482492, "step": 279 }, { "epoch": 0.9284525790349417, "loss": 1.3420525789260864, "loss_ce": 0.010997871868312359, "loss_iou": 0.4296875, "loss_num": 0.09375, "loss_xval": 1.328125, "num_input_tokens_seen": 17482492, "step": 279 }, { "epoch": 0.9317803660565723, "grad_norm": 27.430753707885742, "learning_rate": 5e-06, "loss": 1.2192, "num_input_tokens_seen": 17546180, "step": 280 }, { "epoch": 0.9317803660565723, "loss": 1.0658645629882812, "loss_ce": 0.0018997644074261189, "loss_iou": 0.3359375, "loss_num": 0.078125, "loss_xval": 1.0625, "num_input_tokens_seen": 17546180, "step": 280 }, { "epoch": 0.9351081530782029, "grad_norm": 13.674880027770996, "learning_rate": 5e-06, "loss": 1.1342, "num_input_tokens_seen": 17610472, "step": 281 }, { "epoch": 0.9351081530782029, "loss": 1.3510611057281494, "loss_ce": 0.0009633672889322042, "loss_iou": 0.443359375, "loss_num": 0.09228515625, "loss_xval": 1.3515625, "num_input_tokens_seen": 17610472, "step": 281 }, { "epoch": 0.9384359400998337, "grad_norm": 8.77342700958252, "learning_rate": 5e-06, "loss": 0.8201, "num_input_tokens_seen": 17672128, "step": 282 }, { "epoch": 0.9384359400998337, "loss": 0.7342939376831055, "loss_ce": 0.004557626321911812, "loss_iou": 0.150390625, "loss_num": 0.08544921875, "loss_xval": 0.73046875, "num_input_tokens_seen": 17672128, "step": 282 }, { "epoch": 0.9417637271214643, "grad_norm": 11.732505798339844, "learning_rate": 5e-06, "loss": 0.604, "num_input_tokens_seen": 17733204, "step": 283 }, { "epoch": 0.9417637271214643, "loss": 0.5884619951248169, "loss_ce": 0.002524492098018527, "loss_iou": 0.095703125, "loss_num": 0.0791015625, "loss_xval": 0.5859375, "num_input_tokens_seen": 17733204, "step": 283 }, { "epoch": 0.9450915141430949, "grad_norm": 16.048566818237305, "learning_rate": 5e-06, "loss": 1.0758, "num_input_tokens_seen": 17795816, "step": 284 }, { "epoch": 0.9450915141430949, "loss": 0.9209912419319153, "loss_ce": 0.001069328049197793, "loss_iou": 0.23828125, "loss_num": 0.08837890625, "loss_xval": 0.921875, "num_input_tokens_seen": 17795816, "step": 284 }, { "epoch": 0.9484193011647255, "grad_norm": 16.50386619567871, "learning_rate": 5e-06, "loss": 1.0822, "num_input_tokens_seen": 17858324, "step": 285 }, { "epoch": 0.9484193011647255, "loss": 1.0507948398590088, "loss_ce": 0.0024550450034439564, "loss_iou": 0.28125, "loss_num": 0.09716796875, "loss_xval": 1.046875, "num_input_tokens_seen": 17858324, "step": 285 }, { "epoch": 0.9517470881863561, "grad_norm": 26.581377029418945, "learning_rate": 5e-06, "loss": 1.2578, "num_input_tokens_seen": 17922904, "step": 286 }, { "epoch": 0.9517470881863561, "loss": 1.372084140777588, "loss_ce": 0.00465251412242651, "loss_iou": 0.447265625, "loss_num": 0.09423828125, "loss_xval": 1.3671875, "num_input_tokens_seen": 17922904, "step": 286 }, { "epoch": 0.9550748752079867, "grad_norm": 15.423108100891113, "learning_rate": 5e-06, "loss": 1.2041, "num_input_tokens_seen": 17984848, "step": 287 }, { "epoch": 0.9550748752079867, "loss": 0.9750069379806519, "loss_ce": 0.0035714467521756887, "loss_iou": 0.294921875, "loss_num": 0.076171875, "loss_xval": 0.97265625, "num_input_tokens_seen": 17984848, "step": 287 }, { "epoch": 0.9584026622296173, "grad_norm": 18.705720901489258, "learning_rate": 5e-06, "loss": 1.2847, "num_input_tokens_seen": 18047960, "step": 288 }, { "epoch": 0.9584026622296173, "loss": 1.0339102745056152, "loss_ce": 0.00021886028116568923, "loss_iou": 0.314453125, "loss_num": 0.08056640625, "loss_xval": 1.03125, "num_input_tokens_seen": 18047960, "step": 288 }, { "epoch": 0.961730449251248, "grad_norm": 21.023420333862305, "learning_rate": 5e-06, "loss": 1.318, "num_input_tokens_seen": 18111360, "step": 289 }, { "epoch": 0.961730449251248, "loss": 1.4019041061401367, "loss_ce": 0.02543933130800724, "loss_iou": 0.466796875, "loss_num": 0.08837890625, "loss_xval": 1.375, "num_input_tokens_seen": 18111360, "step": 289 }, { "epoch": 0.9650582362728786, "grad_norm": 27.743906021118164, "learning_rate": 5e-06, "loss": 1.1665, "num_input_tokens_seen": 18173860, "step": 290 }, { "epoch": 0.9650582362728786, "loss": 1.1183178424835205, "loss_ce": 0.0006421446450985968, "loss_iou": 0.3203125, "loss_num": 0.09521484375, "loss_xval": 1.1171875, "num_input_tokens_seen": 18173860, "step": 290 }, { "epoch": 0.9683860232945092, "grad_norm": 27.982433319091797, "learning_rate": 5e-06, "loss": 1.0317, "num_input_tokens_seen": 18235116, "step": 291 }, { "epoch": 0.9683860232945092, "loss": 1.2463921308517456, "loss_ce": 0.0027397728990763426, "loss_iou": 0.412109375, "loss_num": 0.083984375, "loss_xval": 1.2421875, "num_input_tokens_seen": 18235116, "step": 291 }, { "epoch": 0.9717138103161398, "grad_norm": 37.3447265625, "learning_rate": 5e-06, "loss": 1.0937, "num_input_tokens_seen": 18298408, "step": 292 }, { "epoch": 0.9717138103161398, "loss": 0.9288716912269592, "loss_ce": 0.000649062218144536, "loss_iou": 0.34375, "loss_num": 0.048095703125, "loss_xval": 0.9296875, "num_input_tokens_seen": 18298408, "step": 292 }, { "epoch": 0.9750415973377704, "grad_norm": 35.098751068115234, "learning_rate": 5e-06, "loss": 1.0408, "num_input_tokens_seen": 18361840, "step": 293 }, { "epoch": 0.9750415973377704, "loss": 0.8903936743736267, "loss_ce": 0.00025693600764498115, "loss_iou": 0.3125, "loss_num": 0.05322265625, "loss_xval": 0.890625, "num_input_tokens_seen": 18361840, "step": 293 }, { "epoch": 0.978369384359401, "grad_norm": 19.666545867919922, "learning_rate": 5e-06, "loss": 0.9926, "num_input_tokens_seen": 18423412, "step": 294 }, { "epoch": 0.978369384359401, "loss": 0.8818584680557251, "loss_ce": 0.0014873913023620844, "loss_iou": 0.25, "loss_num": 0.076171875, "loss_xval": 0.87890625, "num_input_tokens_seen": 18423412, "step": 294 }, { "epoch": 0.9816971713810316, "grad_norm": 25.35841178894043, "learning_rate": 5e-06, "loss": 1.1415, "num_input_tokens_seen": 18486260, "step": 295 }, { "epoch": 0.9816971713810316, "loss": 1.2383277416229248, "loss_ce": 0.016892246901988983, "loss_iou": 0.40625, "loss_num": 0.08154296875, "loss_xval": 1.21875, "num_input_tokens_seen": 18486260, "step": 295 }, { "epoch": 0.9850249584026622, "grad_norm": 16.71759033203125, "learning_rate": 5e-06, "loss": 1.0593, "num_input_tokens_seen": 18549592, "step": 296 }, { "epoch": 0.9850249584026622, "loss": 0.9016702175140381, "loss_ce": 0.0007912812288850546, "loss_iou": 0.3359375, "loss_num": 0.0458984375, "loss_xval": 0.90234375, "num_input_tokens_seen": 18549592, "step": 296 }, { "epoch": 0.9883527454242929, "grad_norm": 41.6589241027832, "learning_rate": 5e-06, "loss": 1.2865, "num_input_tokens_seen": 18611944, "step": 297 }, { "epoch": 0.9883527454242929, "loss": 1.150113821029663, "loss_ce": 0.0024086525663733482, "loss_iou": 0.380859375, "loss_num": 0.0771484375, "loss_xval": 1.1484375, "num_input_tokens_seen": 18611944, "step": 297 }, { "epoch": 0.9916805324459235, "grad_norm": 15.88283920288086, "learning_rate": 5e-06, "loss": 0.7537, "num_input_tokens_seen": 18674424, "step": 298 }, { "epoch": 0.9916805324459235, "loss": 0.6804075241088867, "loss_ce": 0.0005979957641102374, "loss_iou": 0.2275390625, "loss_num": 0.045166015625, "loss_xval": 0.6796875, "num_input_tokens_seen": 18674424, "step": 298 }, { "epoch": 0.9950083194675541, "grad_norm": 13.658927917480469, "learning_rate": 5e-06, "loss": 1.1422, "num_input_tokens_seen": 18737624, "step": 299 }, { "epoch": 0.9950083194675541, "loss": 1.2116801738739014, "loss_ce": 0.010752532631158829, "loss_iou": 0.40625, "loss_num": 0.0771484375, "loss_xval": 1.203125, "num_input_tokens_seen": 18737624, "step": 299 }, { "epoch": 0.9983361064891847, "grad_norm": 39.05524826049805, "learning_rate": 5e-06, "loss": 1.132, "num_input_tokens_seen": 18800356, "step": 300 }, { "epoch": 0.9983361064891847, "loss": 0.9712319374084473, "loss_ce": 0.0002846252464223653, "loss_iou": 0.29296875, "loss_num": 0.0771484375, "loss_xval": 0.97265625, "num_input_tokens_seen": 18800356, "step": 300 }, { "epoch": 0.9983361064891847, "loss": 1.005664348602295, "loss_ce": 0.000293174380203709, "loss_iou": 0.306640625, "loss_num": 0.07861328125, "loss_xval": 1.0078125, "num_input_tokens_seen": 18831744, "step": 300 }, { "epoch": 1.0016638935108153, "grad_norm": 20.688446044921875, "learning_rate": 5e-06, "loss": 0.9706, "num_input_tokens_seen": 18862988, "step": 301 }, { "epoch": 1.0016638935108153, "loss": 0.9355121850967407, "loss_ce": 0.004359826445579529, "loss_iou": 0.330078125, "loss_num": 0.05419921875, "loss_xval": 0.9296875, "num_input_tokens_seen": 18862988, "step": 301 }, { "epoch": 1.004991680532446, "grad_norm": 22.663740158081055, "learning_rate": 5e-06, "loss": 1.1254, "num_input_tokens_seen": 18925872, "step": 302 }, { "epoch": 1.004991680532446, "loss": 1.1522040367126465, "loss_ce": 0.0018133968114852905, "loss_iou": 0.353515625, "loss_num": 0.0888671875, "loss_xval": 1.1484375, "num_input_tokens_seen": 18925872, "step": 302 }, { "epoch": 1.0083194675540765, "grad_norm": 8.964369773864746, "learning_rate": 5e-06, "loss": 0.8262, "num_input_tokens_seen": 18987936, "step": 303 }, { "epoch": 1.0083194675540765, "loss": 0.8592677116394043, "loss_ce": 0.0023341416381299496, "loss_iou": 0.1865234375, "loss_num": 0.09716796875, "loss_xval": 0.85546875, "num_input_tokens_seen": 18987936, "step": 303 }, { "epoch": 1.0116472545757071, "grad_norm": 10.31577205657959, "learning_rate": 5e-06, "loss": 1.0093, "num_input_tokens_seen": 19051564, "step": 304 }, { "epoch": 1.0116472545757071, "loss": 0.9309121370315552, "loss_ce": 0.0007363811018876731, "loss_iou": 0.310546875, "loss_num": 0.0615234375, "loss_xval": 0.9296875, "num_input_tokens_seen": 19051564, "step": 304 }, { "epoch": 1.0149750415973378, "grad_norm": 13.649383544921875, "learning_rate": 5e-06, "loss": 0.8415, "num_input_tokens_seen": 19112960, "step": 305 }, { "epoch": 1.0149750415973378, "loss": 0.7516453266143799, "loss_ce": 0.00018046580953523517, "loss_iou": 0.1943359375, "loss_num": 0.07275390625, "loss_xval": 0.75, "num_input_tokens_seen": 19112960, "step": 305 }, { "epoch": 1.0183028286189684, "grad_norm": 19.828454971313477, "learning_rate": 5e-06, "loss": 1.2358, "num_input_tokens_seen": 19175956, "step": 306 }, { "epoch": 1.0183028286189684, "loss": 1.1316444873809814, "loss_ce": 0.000296859274385497, "loss_iou": 0.421875, "loss_num": 0.05712890625, "loss_xval": 1.1328125, "num_input_tokens_seen": 19175956, "step": 306 }, { "epoch": 1.021630615640599, "grad_norm": 19.776601791381836, "learning_rate": 5e-06, "loss": 0.9784, "num_input_tokens_seen": 19238364, "step": 307 }, { "epoch": 1.021630615640599, "loss": 0.9802345633506775, "loss_ce": 0.0017189187929034233, "loss_iou": 0.345703125, "loss_num": 0.057373046875, "loss_xval": 0.9765625, "num_input_tokens_seen": 19238364, "step": 307 }, { "epoch": 1.0249584026622296, "grad_norm": 10.466225624084473, "learning_rate": 5e-06, "loss": 1.1121, "num_input_tokens_seen": 19301488, "step": 308 }, { "epoch": 1.0249584026622296, "loss": 1.1866347789764404, "loss_ce": 0.0005995276151224971, "loss_iou": 0.42578125, "loss_num": 0.06640625, "loss_xval": 1.1875, "num_input_tokens_seen": 19301488, "step": 308 }, { "epoch": 1.0282861896838602, "grad_norm": 8.818867683410645, "learning_rate": 5e-06, "loss": 0.7348, "num_input_tokens_seen": 19363376, "step": 309 }, { "epoch": 1.0282861896838602, "loss": 0.5317307710647583, "loss_ce": 0.0009080054587684572, "loss_iou": 0.0927734375, "loss_num": 0.06884765625, "loss_xval": 0.53125, "num_input_tokens_seen": 19363376, "step": 309 }, { "epoch": 1.0316139767054908, "grad_norm": 12.430651664733887, "learning_rate": 5e-06, "loss": 1.0676, "num_input_tokens_seen": 19427280, "step": 310 }, { "epoch": 1.0316139767054908, "loss": 1.1095433235168457, "loss_ce": 0.002609734423458576, "loss_iou": 0.37890625, "loss_num": 0.0693359375, "loss_xval": 1.109375, "num_input_tokens_seen": 19427280, "step": 310 }, { "epoch": 1.0349417637271214, "grad_norm": 21.428102493286133, "learning_rate": 5e-06, "loss": 1.0806, "num_input_tokens_seen": 19492148, "step": 311 }, { "epoch": 1.0349417637271214, "loss": 1.2216033935546875, "loss_ce": 0.0023650832008570433, "loss_iou": 0.4453125, "loss_num": 0.06591796875, "loss_xval": 1.21875, "num_input_tokens_seen": 19492148, "step": 311 }, { "epoch": 1.038269550748752, "grad_norm": 13.665613174438477, "learning_rate": 5e-06, "loss": 1.0555, "num_input_tokens_seen": 19554652, "step": 312 }, { "epoch": 1.038269550748752, "loss": 0.9334628582000732, "loss_ce": 0.0020663391333073378, "loss_iou": 0.30859375, "loss_num": 0.0634765625, "loss_xval": 0.9296875, "num_input_tokens_seen": 19554652, "step": 312 }, { "epoch": 1.0415973377703827, "grad_norm": 13.296834945678711, "learning_rate": 5e-06, "loss": 1.1962, "num_input_tokens_seen": 19617640, "step": 313 }, { "epoch": 1.0415973377703827, "loss": 1.3586363792419434, "loss_ce": 0.001702840905636549, "loss_iou": 0.453125, "loss_num": 0.09033203125, "loss_xval": 1.359375, "num_input_tokens_seen": 19617640, "step": 313 }, { "epoch": 1.0449251247920133, "grad_norm": 13.294105529785156, "learning_rate": 5e-06, "loss": 0.9318, "num_input_tokens_seen": 19680612, "step": 314 }, { "epoch": 1.0449251247920133, "loss": 0.8829240202903748, "loss_ce": 0.00011155771790072322, "loss_iou": 0.28125, "loss_num": 0.064453125, "loss_xval": 0.8828125, "num_input_tokens_seen": 19680612, "step": 314 }, { "epoch": 1.0482529118136439, "grad_norm": 29.415206909179688, "learning_rate": 5e-06, "loss": 1.0025, "num_input_tokens_seen": 19742452, "step": 315 }, { "epoch": 1.0482529118136439, "loss": 1.2018414735794067, "loss_ce": 0.008482063189148903, "loss_iou": 0.3671875, "loss_num": 0.09130859375, "loss_xval": 1.1953125, "num_input_tokens_seen": 19742452, "step": 315 }, { "epoch": 1.0515806988352745, "grad_norm": 11.475259780883789, "learning_rate": 5e-06, "loss": 0.6227, "num_input_tokens_seen": 19802316, "step": 316 }, { "epoch": 1.0515806988352745, "loss": 0.5464380383491516, "loss_ce": 0.000783749797847122, "loss_iou": 0.0, "loss_num": 0.10888671875, "loss_xval": 0.546875, "num_input_tokens_seen": 19802316, "step": 316 }, { "epoch": 1.054908485856905, "grad_norm": 13.911805152893066, "learning_rate": 5e-06, "loss": 1.0074, "num_input_tokens_seen": 19864128, "step": 317 }, { "epoch": 1.054908485856905, "loss": 0.9430869817733765, "loss_ce": 0.0007042001816444099, "loss_iou": 0.27734375, "loss_num": 0.078125, "loss_xval": 0.94140625, "num_input_tokens_seen": 19864128, "step": 317 }, { "epoch": 1.0582362728785357, "grad_norm": 20.12273597717285, "learning_rate": 5e-06, "loss": 1.3843, "num_input_tokens_seen": 19927420, "step": 318 }, { "epoch": 1.0582362728785357, "loss": 1.4466303586959839, "loss_ce": 0.00034135475289076567, "loss_iou": 0.51953125, "loss_num": 0.08154296875, "loss_xval": 1.4453125, "num_input_tokens_seen": 19927420, "step": 318 }, { "epoch": 1.0615640599001663, "grad_norm": 18.511112213134766, "learning_rate": 5e-06, "loss": 0.9767, "num_input_tokens_seen": 19989560, "step": 319 }, { "epoch": 1.0615640599001663, "loss": 0.6565845012664795, "loss_ce": 0.0011890050955116749, "loss_iou": 0.1767578125, "loss_num": 0.06005859375, "loss_xval": 0.65625, "num_input_tokens_seen": 19989560, "step": 319 }, { "epoch": 1.064891846921797, "grad_norm": 10.491249084472656, "learning_rate": 5e-06, "loss": 0.8198, "num_input_tokens_seen": 20051352, "step": 320 }, { "epoch": 1.064891846921797, "loss": 0.8516696691513062, "loss_ce": 0.00010720050340751186, "loss_iou": 0.279296875, "loss_num": 0.05859375, "loss_xval": 0.8515625, "num_input_tokens_seen": 20051352, "step": 320 }, { "epoch": 1.0682196339434276, "grad_norm": 13.944743156433105, "learning_rate": 5e-06, "loss": 1.0268, "num_input_tokens_seen": 20112940, "step": 321 }, { "epoch": 1.0682196339434276, "loss": 1.2168986797332764, "loss_ce": 0.00010173316695727408, "loss_iou": 0.33203125, "loss_num": 0.1103515625, "loss_xval": 1.21875, "num_input_tokens_seen": 20112940, "step": 321 }, { "epoch": 1.0715474209650582, "grad_norm": 13.251590728759766, "learning_rate": 5e-06, "loss": 1.0174, "num_input_tokens_seen": 20175028, "step": 322 }, { "epoch": 1.0715474209650582, "loss": 1.0649445056915283, "loss_ce": 0.000552457757294178, "loss_iou": 0.330078125, "loss_num": 0.08056640625, "loss_xval": 1.0625, "num_input_tokens_seen": 20175028, "step": 322 }, { "epoch": 1.0748752079866888, "grad_norm": 23.606361389160156, "learning_rate": 5e-06, "loss": 0.8734, "num_input_tokens_seen": 20236452, "step": 323 }, { "epoch": 1.0748752079866888, "loss": 0.9539188146591187, "loss_ce": 0.002746941987425089, "loss_iou": 0.306640625, "loss_num": 0.06787109375, "loss_xval": 0.953125, "num_input_tokens_seen": 20236452, "step": 323 }, { "epoch": 1.0782029950083194, "grad_norm": 37.33720779418945, "learning_rate": 5e-06, "loss": 1.1052, "num_input_tokens_seen": 20299376, "step": 324 }, { "epoch": 1.0782029950083194, "loss": 1.0713891983032227, "loss_ce": 0.00107666221447289, "loss_iou": 0.326171875, "loss_num": 0.083984375, "loss_xval": 1.0703125, "num_input_tokens_seen": 20299376, "step": 324 }, { "epoch": 1.08153078202995, "grad_norm": 11.043768882751465, "learning_rate": 5e-06, "loss": 1.0411, "num_input_tokens_seen": 20362640, "step": 325 }, { "epoch": 1.08153078202995, "loss": 1.1990910768508911, "loss_ce": 0.0013371980749070644, "loss_iou": 0.42578125, "loss_num": 0.0693359375, "loss_xval": 1.1953125, "num_input_tokens_seen": 20362640, "step": 325 }, { "epoch": 1.0848585690515806, "grad_norm": 19.86610221862793, "learning_rate": 5e-06, "loss": 1.4274, "num_input_tokens_seen": 20425576, "step": 326 }, { "epoch": 1.0848585690515806, "loss": 1.4769482612609863, "loss_ce": 0.0013623626437038183, "loss_iou": 0.482421875, "loss_num": 0.1025390625, "loss_xval": 1.4765625, "num_input_tokens_seen": 20425576, "step": 326 }, { "epoch": 1.0881863560732112, "grad_norm": 12.096166610717773, "learning_rate": 5e-06, "loss": 1.2323, "num_input_tokens_seen": 20488272, "step": 327 }, { "epoch": 1.0881863560732112, "loss": 1.545140266418457, "loss_ce": 0.0002183896431233734, "loss_iou": 0.546875, "loss_num": 0.09033203125, "loss_xval": 1.546875, "num_input_tokens_seen": 20488272, "step": 327 }, { "epoch": 1.0915141430948418, "grad_norm": 13.61131477355957, "learning_rate": 5e-06, "loss": 0.8997, "num_input_tokens_seen": 20550160, "step": 328 }, { "epoch": 1.0915141430948418, "loss": 0.8179830312728882, "loss_ce": 0.0010885087540373206, "loss_iou": 0.2265625, "loss_num": 0.07275390625, "loss_xval": 0.81640625, "num_input_tokens_seen": 20550160, "step": 328 }, { "epoch": 1.0948419301164725, "grad_norm": 11.715150833129883, "learning_rate": 5e-06, "loss": 0.9644, "num_input_tokens_seen": 20613804, "step": 329 }, { "epoch": 1.0948419301164725, "loss": 0.964046835899353, "loss_ce": 0.0001796190917957574, "loss_iou": 0.349609375, "loss_num": 0.052978515625, "loss_xval": 0.96484375, "num_input_tokens_seen": 20613804, "step": 329 }, { "epoch": 1.098169717138103, "grad_norm": 9.858138084411621, "learning_rate": 5e-06, "loss": 1.2036, "num_input_tokens_seen": 20676252, "step": 330 }, { "epoch": 1.098169717138103, "loss": 1.2880395650863647, "loss_ce": 0.0019067421089857817, "loss_iou": 0.375, "loss_num": 0.10693359375, "loss_xval": 1.2890625, "num_input_tokens_seen": 20676252, "step": 330 }, { "epoch": 1.1014975041597337, "grad_norm": 13.484721183776855, "learning_rate": 5e-06, "loss": 1.0963, "num_input_tokens_seen": 20740020, "step": 331 }, { "epoch": 1.1014975041597337, "loss": 1.1542832851409912, "loss_ce": 0.003404431976377964, "loss_iou": 0.41796875, "loss_num": 0.0634765625, "loss_xval": 1.1484375, "num_input_tokens_seen": 20740020, "step": 331 }, { "epoch": 1.1048252911813643, "grad_norm": 9.757645606994629, "learning_rate": 5e-06, "loss": 0.9452, "num_input_tokens_seen": 20802796, "step": 332 }, { "epoch": 1.1048252911813643, "loss": 0.7349272966384888, "loss_ce": 0.003481978317722678, "loss_iou": 0.2490234375, "loss_num": 0.046875, "loss_xval": 0.73046875, "num_input_tokens_seen": 20802796, "step": 332 }, { "epoch": 1.108153078202995, "grad_norm": 15.33654499053955, "learning_rate": 5e-06, "loss": 0.7653, "num_input_tokens_seen": 20865212, "step": 333 }, { "epoch": 1.108153078202995, "loss": 0.9734396934509277, "loss_ce": 0.0017599575221538544, "loss_iou": 0.32421875, "loss_num": 0.06494140625, "loss_xval": 0.97265625, "num_input_tokens_seen": 20865212, "step": 333 }, { "epoch": 1.1114808652246255, "grad_norm": 13.662713050842285, "learning_rate": 5e-06, "loss": 1.3972, "num_input_tokens_seen": 20929480, "step": 334 }, { "epoch": 1.1114808652246255, "loss": 1.6420445442199707, "loss_ce": 0.0009312508627772331, "loss_iou": 0.55859375, "loss_num": 0.10546875, "loss_xval": 1.640625, "num_input_tokens_seen": 20929480, "step": 334 }, { "epoch": 1.1148086522462561, "grad_norm": 22.680776596069336, "learning_rate": 5e-06, "loss": 1.163, "num_input_tokens_seen": 20993044, "step": 335 }, { "epoch": 1.1148086522462561, "loss": 0.9729619026184082, "loss_ce": 0.002502923831343651, "loss_iou": 0.328125, "loss_num": 0.062255859375, "loss_xval": 0.96875, "num_input_tokens_seen": 20993044, "step": 335 }, { "epoch": 1.1181364392678868, "grad_norm": 42.96357345581055, "learning_rate": 5e-06, "loss": 1.3777, "num_input_tokens_seen": 21057020, "step": 336 }, { "epoch": 1.1181364392678868, "loss": 1.2434619665145874, "loss_ce": 0.0002978653647005558, "loss_iou": 0.4453125, "loss_num": 0.07080078125, "loss_xval": 1.2421875, "num_input_tokens_seen": 21057020, "step": 336 }, { "epoch": 1.1214642262895174, "grad_norm": 19.403898239135742, "learning_rate": 5e-06, "loss": 1.0263, "num_input_tokens_seen": 21120588, "step": 337 }, { "epoch": 1.1214642262895174, "loss": 1.0244314670562744, "loss_ce": 0.000993911293335259, "loss_iou": 0.34375, "loss_num": 0.06689453125, "loss_xval": 1.0234375, "num_input_tokens_seen": 21120588, "step": 337 }, { "epoch": 1.124792013311148, "grad_norm": 16.915956497192383, "learning_rate": 5e-06, "loss": 1.2745, "num_input_tokens_seen": 21184452, "step": 338 }, { "epoch": 1.124792013311148, "loss": 1.182100534439087, "loss_ce": 0.0014365393435582519, "loss_iou": 0.390625, "loss_num": 0.07958984375, "loss_xval": 1.1796875, "num_input_tokens_seen": 21184452, "step": 338 }, { "epoch": 1.1281198003327786, "grad_norm": 18.669437408447266, "learning_rate": 5e-06, "loss": 0.9773, "num_input_tokens_seen": 21247968, "step": 339 }, { "epoch": 1.1281198003327786, "loss": 1.0474261045455933, "loss_ce": 0.002260025357827544, "loss_iou": 0.330078125, "loss_num": 0.0771484375, "loss_xval": 1.046875, "num_input_tokens_seen": 21247968, "step": 339 }, { "epoch": 1.1314475873544092, "grad_norm": 16.28910255432129, "learning_rate": 5e-06, "loss": 1.2087, "num_input_tokens_seen": 21311952, "step": 340 }, { "epoch": 1.1314475873544092, "loss": 1.189194917678833, "loss_ce": 0.021470246836543083, "loss_iou": 0.38671875, "loss_num": 0.0791015625, "loss_xval": 1.1640625, "num_input_tokens_seen": 21311952, "step": 340 }, { "epoch": 1.1347753743760398, "grad_norm": 13.94228744506836, "learning_rate": 5e-06, "loss": 1.1315, "num_input_tokens_seen": 21375720, "step": 341 }, { "epoch": 1.1347753743760398, "loss": 0.9889962077140808, "loss_ce": 0.00047085504047572613, "loss_iou": 0.333984375, "loss_num": 0.06396484375, "loss_xval": 0.98828125, "num_input_tokens_seen": 21375720, "step": 341 }, { "epoch": 1.1381031613976704, "grad_norm": 14.070566177368164, "learning_rate": 5e-06, "loss": 1.029, "num_input_tokens_seen": 21437704, "step": 342 }, { "epoch": 1.1381031613976704, "loss": 1.0164613723754883, "loss_ce": 0.0015688447747379541, "loss_iou": 0.298828125, "loss_num": 0.083984375, "loss_xval": 1.015625, "num_input_tokens_seen": 21437704, "step": 342 }, { "epoch": 1.1414309484193013, "grad_norm": 17.267200469970703, "learning_rate": 5e-06, "loss": 1.2278, "num_input_tokens_seen": 21500984, "step": 343 }, { "epoch": 1.1414309484193013, "loss": 1.2168850898742676, "loss_ce": 0.002529619261622429, "loss_iou": 0.3828125, "loss_num": 0.08984375, "loss_xval": 1.2109375, "num_input_tokens_seen": 21500984, "step": 343 }, { "epoch": 1.1447587354409319, "grad_norm": 9.168577194213867, "learning_rate": 5e-06, "loss": 0.9593, "num_input_tokens_seen": 21564716, "step": 344 }, { "epoch": 1.1447587354409319, "loss": 1.0238981246948242, "loss_ce": 0.00046052533434703946, "loss_iou": 0.345703125, "loss_num": 0.06591796875, "loss_xval": 1.0234375, "num_input_tokens_seen": 21564716, "step": 344 }, { "epoch": 1.1480865224625625, "grad_norm": 14.753097534179688, "learning_rate": 5e-06, "loss": 1.1317, "num_input_tokens_seen": 21628680, "step": 345 }, { "epoch": 1.1480865224625625, "loss": 1.1598883867263794, "loss_ce": 0.0016852561384439468, "loss_iou": 0.38671875, "loss_num": 0.0771484375, "loss_xval": 1.15625, "num_input_tokens_seen": 21628680, "step": 345 }, { "epoch": 1.151414309484193, "grad_norm": 10.278120040893555, "learning_rate": 5e-06, "loss": 0.935, "num_input_tokens_seen": 21691120, "step": 346 }, { "epoch": 1.151414309484193, "loss": 0.975243330001831, "loss_ce": 0.001732556615024805, "loss_iou": 0.244140625, "loss_num": 0.09716796875, "loss_xval": 0.97265625, "num_input_tokens_seen": 21691120, "step": 346 }, { "epoch": 1.1547420965058237, "grad_norm": 12.571501731872559, "learning_rate": 5e-06, "loss": 0.9055, "num_input_tokens_seen": 21753216, "step": 347 }, { "epoch": 1.1547420965058237, "loss": 0.8693772554397583, "loss_ce": 0.0012132221600040793, "loss_iou": 0.28515625, "loss_num": 0.059326171875, "loss_xval": 0.8671875, "num_input_tokens_seen": 21753216, "step": 347 }, { "epoch": 1.1580698835274543, "grad_norm": 15.132104873657227, "learning_rate": 5e-06, "loss": 1.0404, "num_input_tokens_seen": 21814576, "step": 348 }, { "epoch": 1.1580698835274543, "loss": 0.9811496734619141, "loss_ce": 0.0009250296279788017, "loss_iou": 0.267578125, "loss_num": 0.0888671875, "loss_xval": 0.98046875, "num_input_tokens_seen": 21814576, "step": 348 }, { "epoch": 1.161397670549085, "grad_norm": 10.732748031616211, "learning_rate": 5e-06, "loss": 0.8791, "num_input_tokens_seen": 21876236, "step": 349 }, { "epoch": 1.161397670549085, "loss": 0.6810543537139893, "loss_ce": 0.0008785828249529004, "loss_iou": 0.1591796875, "loss_num": 0.072265625, "loss_xval": 0.6796875, "num_input_tokens_seen": 21876236, "step": 349 }, { "epoch": 1.1647254575707155, "grad_norm": 32.55437088012695, "learning_rate": 5e-06, "loss": 0.9927, "num_input_tokens_seen": 21939644, "step": 350 }, { "epoch": 1.1647254575707155, "loss": 0.8949718475341797, "loss_ce": 0.0011729662073776126, "loss_iou": 0.326171875, "loss_num": 0.048095703125, "loss_xval": 0.89453125, "num_input_tokens_seen": 21939644, "step": 350 }, { "epoch": 1.1680532445923462, "grad_norm": 15.197272300720215, "learning_rate": 5e-06, "loss": 0.8672, "num_input_tokens_seen": 22002236, "step": 351 }, { "epoch": 1.1680532445923462, "loss": 0.790031373500824, "loss_ce": 0.0007247051689773798, "loss_iou": 0.2578125, "loss_num": 0.05517578125, "loss_xval": 0.7890625, "num_input_tokens_seen": 22002236, "step": 351 }, { "epoch": 1.1713810316139768, "grad_norm": 18.31049156188965, "learning_rate": 5e-06, "loss": 0.9361, "num_input_tokens_seen": 22065312, "step": 352 }, { "epoch": 1.1713810316139768, "loss": 0.8961750268936157, "loss_ce": 0.00017895898781716824, "loss_iou": 0.337890625, "loss_num": 0.0439453125, "loss_xval": 0.89453125, "num_input_tokens_seen": 22065312, "step": 352 }, { "epoch": 1.1747088186356074, "grad_norm": 10.00696849822998, "learning_rate": 5e-06, "loss": 1.0501, "num_input_tokens_seen": 22129372, "step": 353 }, { "epoch": 1.1747088186356074, "loss": 0.8982384204864502, "loss_ce": 0.027632977813482285, "loss_iou": 0.2890625, "loss_num": 0.05859375, "loss_xval": 0.87109375, "num_input_tokens_seen": 22129372, "step": 353 }, { "epoch": 1.178036605657238, "grad_norm": 13.814727783203125, "learning_rate": 5e-06, "loss": 0.9941, "num_input_tokens_seen": 22191100, "step": 354 }, { "epoch": 1.178036605657238, "loss": 0.9056563973426819, "loss_ce": 0.00026089177117682993, "loss_iou": 0.279296875, "loss_num": 0.0693359375, "loss_xval": 0.90625, "num_input_tokens_seen": 22191100, "step": 354 }, { "epoch": 1.1813643926788686, "grad_norm": 13.330516815185547, "learning_rate": 5e-06, "loss": 1.1677, "num_input_tokens_seen": 22255328, "step": 355 }, { "epoch": 1.1813643926788686, "loss": 1.4402509927749634, "loss_ce": 0.0017744700890034437, "loss_iou": 0.498046875, "loss_num": 0.08837890625, "loss_xval": 1.4375, "num_input_tokens_seen": 22255328, "step": 355 }, { "epoch": 1.1846921797004992, "grad_norm": 20.281681060791016, "learning_rate": 5e-06, "loss": 0.7745, "num_input_tokens_seen": 22318376, "step": 356 }, { "epoch": 1.1846921797004992, "loss": 0.64408940076828, "loss_ce": 0.0024878759868443012, "loss_iou": 0.134765625, "loss_num": 0.07421875, "loss_xval": 0.640625, "num_input_tokens_seen": 22318376, "step": 356 }, { "epoch": 1.1880199667221298, "grad_norm": 13.091225624084473, "learning_rate": 5e-06, "loss": 0.7692, "num_input_tokens_seen": 22380440, "step": 357 }, { "epoch": 1.1880199667221298, "loss": 0.8383287787437439, "loss_ce": 0.0010484822560101748, "loss_iou": 0.275390625, "loss_num": 0.05712890625, "loss_xval": 0.8359375, "num_input_tokens_seen": 22380440, "step": 357 }, { "epoch": 1.1913477537437605, "grad_norm": 23.891265869140625, "learning_rate": 5e-06, "loss": 0.9614, "num_input_tokens_seen": 22442636, "step": 358 }, { "epoch": 1.1913477537437605, "loss": 0.7865191698074341, "loss_ce": 0.0003863187157548964, "loss_iou": 0.255859375, "loss_num": 0.054931640625, "loss_xval": 0.78515625, "num_input_tokens_seen": 22442636, "step": 358 }, { "epoch": 1.194675540765391, "grad_norm": 15.345987319946289, "learning_rate": 5e-06, "loss": 0.9528, "num_input_tokens_seen": 22505236, "step": 359 }, { "epoch": 1.194675540765391, "loss": 0.9262025356292725, "loss_ce": 0.001642039860598743, "loss_iou": 0.296875, "loss_num": 0.06591796875, "loss_xval": 0.92578125, "num_input_tokens_seen": 22505236, "step": 359 }, { "epoch": 1.1980033277870217, "grad_norm": 14.718965530395508, "learning_rate": 5e-06, "loss": 0.7841, "num_input_tokens_seen": 22567380, "step": 360 }, { "epoch": 1.1980033277870217, "loss": 0.6921905875205994, "loss_ce": 5.192415846977383e-05, "loss_iou": 0.1806640625, "loss_num": 0.06640625, "loss_xval": 0.69140625, "num_input_tokens_seen": 22567380, "step": 360 }, { "epoch": 1.2013311148086523, "grad_norm": 13.652656555175781, "learning_rate": 5e-06, "loss": 0.8746, "num_input_tokens_seen": 22629572, "step": 361 }, { "epoch": 1.2013311148086523, "loss": 1.1247365474700928, "loss_ce": 0.0002248799428343773, "loss_iou": 0.359375, "loss_num": 0.0810546875, "loss_xval": 1.125, "num_input_tokens_seen": 22629572, "step": 361 }, { "epoch": 1.204658901830283, "grad_norm": 15.861446380615234, "learning_rate": 5e-06, "loss": 1.3159, "num_input_tokens_seen": 22693300, "step": 362 }, { "epoch": 1.204658901830283, "loss": 1.3501554727554321, "loss_ce": 0.002499245572835207, "loss_iou": 0.474609375, "loss_num": 0.07958984375, "loss_xval": 1.34375, "num_input_tokens_seen": 22693300, "step": 362 }, { "epoch": 1.2079866888519135, "grad_norm": 20.82423210144043, "learning_rate": 5e-06, "loss": 1.1648, "num_input_tokens_seen": 22755872, "step": 363 }, { "epoch": 1.2079866888519135, "loss": 1.4157440662384033, "loss_ce": 0.00021676908363588154, "loss_iou": 0.462890625, "loss_num": 0.09765625, "loss_xval": 1.4140625, "num_input_tokens_seen": 22755872, "step": 363 }, { "epoch": 1.2113144758735441, "grad_norm": 20.18094253540039, "learning_rate": 5e-06, "loss": 0.8797, "num_input_tokens_seen": 22818080, "step": 364 }, { "epoch": 1.2113144758735441, "loss": 0.9456205368041992, "loss_ce": 0.0012846407480537891, "loss_iou": 0.2734375, "loss_num": 0.07958984375, "loss_xval": 0.9453125, "num_input_tokens_seen": 22818080, "step": 364 }, { "epoch": 1.2146422628951747, "grad_norm": 7.951483249664307, "learning_rate": 5e-06, "loss": 0.9243, "num_input_tokens_seen": 22880152, "step": 365 }, { "epoch": 1.2146422628951747, "loss": 0.8751887679100037, "loss_ce": 6.672355812042952e-05, "loss_iou": 0.23828125, "loss_num": 0.0791015625, "loss_xval": 0.875, "num_input_tokens_seen": 22880152, "step": 365 }, { "epoch": 1.2179700499168054, "grad_norm": 19.344919204711914, "learning_rate": 5e-06, "loss": 1.329, "num_input_tokens_seen": 22944624, "step": 366 }, { "epoch": 1.2179700499168054, "loss": 1.4669451713562012, "loss_ce": 0.0011247888905927539, "loss_iou": 0.478515625, "loss_num": 0.10205078125, "loss_xval": 1.46875, "num_input_tokens_seen": 22944624, "step": 366 }, { "epoch": 1.221297836938436, "grad_norm": 21.321701049804688, "learning_rate": 5e-06, "loss": 1.0875, "num_input_tokens_seen": 23007596, "step": 367 }, { "epoch": 1.221297836938436, "loss": 0.9833582639694214, "loss_ce": 0.0009364050347357988, "loss_iou": 0.2578125, "loss_num": 0.09326171875, "loss_xval": 0.984375, "num_input_tokens_seen": 23007596, "step": 367 }, { "epoch": 1.2246256239600666, "grad_norm": 8.80086898803711, "learning_rate": 5e-06, "loss": 0.8868, "num_input_tokens_seen": 23069368, "step": 368 }, { "epoch": 1.2246256239600666, "loss": 1.0895016193389893, "loss_ce": 0.003808253910392523, "loss_iou": 0.349609375, "loss_num": 0.0771484375, "loss_xval": 1.0859375, "num_input_tokens_seen": 23069368, "step": 368 }, { "epoch": 1.2279534109816972, "grad_norm": 21.068599700927734, "learning_rate": 5e-06, "loss": 1.0821, "num_input_tokens_seen": 23132124, "step": 369 }, { "epoch": 1.2279534109816972, "loss": 1.1068928241729736, "loss_ce": 0.0021564981434494257, "loss_iou": 0.3828125, "loss_num": 0.06787109375, "loss_xval": 1.1015625, "num_input_tokens_seen": 23132124, "step": 369 }, { "epoch": 1.2312811980033278, "grad_norm": 21.534465789794922, "learning_rate": 5e-06, "loss": 1.2146, "num_input_tokens_seen": 23193332, "step": 370 }, { "epoch": 1.2312811980033278, "loss": 1.0006368160247803, "loss_ce": 0.0006367888418026268, "loss_iou": 0.302734375, "loss_num": 0.07861328125, "loss_xval": 1.0, "num_input_tokens_seen": 23193332, "step": 370 }, { "epoch": 1.2346089850249584, "grad_norm": 14.221662521362305, "learning_rate": 5e-06, "loss": 0.8722, "num_input_tokens_seen": 23256600, "step": 371 }, { "epoch": 1.2346089850249584, "loss": 0.6839093565940857, "loss_ce": 0.0020245739724487066, "loss_iou": 0.171875, "loss_num": 0.0673828125, "loss_xval": 0.68359375, "num_input_tokens_seen": 23256600, "step": 371 }, { "epoch": 1.237936772046589, "grad_norm": 19.116409301757812, "learning_rate": 5e-06, "loss": 1.1716, "num_input_tokens_seen": 23320588, "step": 372 }, { "epoch": 1.237936772046589, "loss": 1.195875883102417, "loss_ce": 0.0015399318654090166, "loss_iou": 0.41796875, "loss_num": 0.07177734375, "loss_xval": 1.1953125, "num_input_tokens_seen": 23320588, "step": 372 }, { "epoch": 1.2412645590682196, "grad_norm": 20.856689453125, "learning_rate": 5e-06, "loss": 1.1053, "num_input_tokens_seen": 23383736, "step": 373 }, { "epoch": 1.2412645590682196, "loss": 1.319981336593628, "loss_ce": 0.000889583898242563, "loss_iou": 0.486328125, "loss_num": 0.0693359375, "loss_xval": 1.3203125, "num_input_tokens_seen": 23383736, "step": 373 }, { "epoch": 1.2445923460898503, "grad_norm": 23.128137588500977, "learning_rate": 5e-06, "loss": 1.0939, "num_input_tokens_seen": 23447404, "step": 374 }, { "epoch": 1.2445923460898503, "loss": 1.1355268955230713, "loss_ce": 0.0002729090047068894, "loss_iou": 0.36328125, "loss_num": 0.08154296875, "loss_xval": 1.1328125, "num_input_tokens_seen": 23447404, "step": 374 }, { "epoch": 1.2479201331114809, "grad_norm": 21.000869750976562, "learning_rate": 5e-06, "loss": 0.9722, "num_input_tokens_seen": 23510332, "step": 375 }, { "epoch": 1.2479201331114809, "loss": 1.0928606986999512, "loss_ce": 0.0008197306888177991, "loss_iou": 0.3828125, "loss_num": 0.0654296875, "loss_xval": 1.09375, "num_input_tokens_seen": 23510332, "step": 375 }, { "epoch": 1.2512479201331115, "grad_norm": 13.809325218200684, "learning_rate": 5e-06, "loss": 1.0725, "num_input_tokens_seen": 23573012, "step": 376 }, { "epoch": 1.2512479201331115, "loss": 1.34040367603302, "loss_ce": 0.000559894135221839, "loss_iou": 0.453125, "loss_num": 0.0869140625, "loss_xval": 1.34375, "num_input_tokens_seen": 23573012, "step": 376 }, { "epoch": 1.254575707154742, "grad_norm": 16.060985565185547, "learning_rate": 5e-06, "loss": 1.3448, "num_input_tokens_seen": 23634628, "step": 377 }, { "epoch": 1.254575707154742, "loss": 1.31657075881958, "loss_ce": 0.003094116458669305, "loss_iou": 0.44921875, "loss_num": 0.0830078125, "loss_xval": 1.3125, "num_input_tokens_seen": 23634628, "step": 377 }, { "epoch": 1.2579034941763727, "grad_norm": 436.0984802246094, "learning_rate": 5e-06, "loss": 0.8717, "num_input_tokens_seen": 23697236, "step": 378 }, { "epoch": 1.2579034941763727, "loss": 1.0005334615707397, "loss_ce": 0.0005334580200724304, "loss_iou": 0.328125, "loss_num": 0.06884765625, "loss_xval": 1.0, "num_input_tokens_seen": 23697236, "step": 378 }, { "epoch": 1.2612312811980033, "grad_norm": 10.832720756530762, "learning_rate": 5e-06, "loss": 0.9671, "num_input_tokens_seen": 23760184, "step": 379 }, { "epoch": 1.2612312811980033, "loss": 1.089341163635254, "loss_ce": 0.0030375197529792786, "loss_iou": 0.326171875, "loss_num": 0.08642578125, "loss_xval": 1.0859375, "num_input_tokens_seen": 23760184, "step": 379 }, { "epoch": 1.264559068219634, "grad_norm": 9.947023391723633, "learning_rate": 5e-06, "loss": 1.0962, "num_input_tokens_seen": 23823200, "step": 380 }, { "epoch": 1.264559068219634, "loss": 1.3489611148834229, "loss_ce": 0.001304990379139781, "loss_iou": 0.4765625, "loss_num": 0.0791015625, "loss_xval": 1.34375, "num_input_tokens_seen": 23823200, "step": 380 }, { "epoch": 1.2678868552412645, "grad_norm": 7.415253162384033, "learning_rate": 5e-06, "loss": 1.1254, "num_input_tokens_seen": 23885312, "step": 381 }, { "epoch": 1.2678868552412645, "loss": 0.9771711826324463, "loss_ce": 0.00012035526742693037, "loss_iou": 0.2890625, "loss_num": 0.07958984375, "loss_xval": 0.9765625, "num_input_tokens_seen": 23885312, "step": 381 }, { "epoch": 1.2712146422628952, "grad_norm": 36.99331283569336, "learning_rate": 5e-06, "loss": 1.0931, "num_input_tokens_seen": 23947212, "step": 382 }, { "epoch": 1.2712146422628952, "loss": 0.9794172048568726, "loss_ce": 0.0023664243053644896, "loss_iou": 0.26171875, "loss_num": 0.09033203125, "loss_xval": 0.9765625, "num_input_tokens_seen": 23947212, "step": 382 }, { "epoch": 1.2745424292845258, "grad_norm": 33.45127487182617, "learning_rate": 5e-06, "loss": 1.0743, "num_input_tokens_seen": 24008928, "step": 383 }, { "epoch": 1.2745424292845258, "loss": 1.1936092376708984, "loss_ce": 0.0026912454050034285, "loss_iou": 0.349609375, "loss_num": 0.0986328125, "loss_xval": 1.1875, "num_input_tokens_seen": 24008928, "step": 383 }, { "epoch": 1.2778702163061564, "grad_norm": 17.22743034362793, "learning_rate": 5e-06, "loss": 0.957, "num_input_tokens_seen": 24070464, "step": 384 }, { "epoch": 1.2778702163061564, "loss": 1.098173975944519, "loss_ce": 0.0010060181375592947, "loss_iou": 0.361328125, "loss_num": 0.07470703125, "loss_xval": 1.09375, "num_input_tokens_seen": 24070464, "step": 384 }, { "epoch": 1.281198003327787, "grad_norm": 13.999910354614258, "learning_rate": 5e-06, "loss": 0.7722, "num_input_tokens_seen": 24134044, "step": 385 }, { "epoch": 1.281198003327787, "loss": 0.8826847672462463, "loss_ce": 0.0006046402850188315, "loss_iou": 0.310546875, "loss_num": 0.052490234375, "loss_xval": 0.8828125, "num_input_tokens_seen": 24134044, "step": 385 }, { "epoch": 1.2845257903494176, "grad_norm": 14.314371109008789, "learning_rate": 5e-06, "loss": 0.9921, "num_input_tokens_seen": 24196356, "step": 386 }, { "epoch": 1.2845257903494176, "loss": 0.8601129055023193, "loss_ce": 0.006353084463626146, "loss_iou": 0.2373046875, "loss_num": 0.07568359375, "loss_xval": 0.85546875, "num_input_tokens_seen": 24196356, "step": 386 }, { "epoch": 1.2878535773710482, "grad_norm": 21.428884506225586, "learning_rate": 5e-06, "loss": 1.2038, "num_input_tokens_seen": 24260864, "step": 387 }, { "epoch": 1.2878535773710482, "loss": 1.1476117372512817, "loss_ce": 0.0030804486013948917, "loss_iou": 0.384765625, "loss_num": 0.07470703125, "loss_xval": 1.140625, "num_input_tokens_seen": 24260864, "step": 387 }, { "epoch": 1.2911813643926788, "grad_norm": 34.2386474609375, "learning_rate": 5e-06, "loss": 1.0796, "num_input_tokens_seen": 24324640, "step": 388 }, { "epoch": 1.2911813643926788, "loss": 0.9835814833641052, "loss_ce": 0.00018302012176718563, "loss_iou": 0.33984375, "loss_num": 0.061279296875, "loss_xval": 0.984375, "num_input_tokens_seen": 24324640, "step": 388 }, { "epoch": 1.2945091514143094, "grad_norm": 19.89606285095215, "learning_rate": 5e-06, "loss": 0.9774, "num_input_tokens_seen": 24385828, "step": 389 }, { "epoch": 1.2945091514143094, "loss": 1.0042197704315186, "loss_ce": 0.0015341450925916433, "loss_iou": 0.3125, "loss_num": 0.0751953125, "loss_xval": 1.0, "num_input_tokens_seen": 24385828, "step": 389 }, { "epoch": 1.29783693843594, "grad_norm": 8.920870780944824, "learning_rate": 5e-06, "loss": 0.801, "num_input_tokens_seen": 24448564, "step": 390 }, { "epoch": 1.29783693843594, "loss": 0.9892621040344238, "loss_ce": 0.0007366967620328069, "loss_iou": 0.376953125, "loss_num": 0.046875, "loss_xval": 0.98828125, "num_input_tokens_seen": 24448564, "step": 390 }, { "epoch": 1.3011647254575707, "grad_norm": 32.356964111328125, "learning_rate": 5e-06, "loss": 0.981, "num_input_tokens_seen": 24511484, "step": 391 }, { "epoch": 1.3011647254575707, "loss": 0.9405779838562012, "loss_ce": 0.002101422054693103, "loss_iou": 0.236328125, "loss_num": 0.09326171875, "loss_xval": 0.9375, "num_input_tokens_seen": 24511484, "step": 391 }, { "epoch": 1.3044925124792013, "grad_norm": 15.254705429077148, "learning_rate": 5e-06, "loss": 0.8256, "num_input_tokens_seen": 24574080, "step": 392 }, { "epoch": 1.3044925124792013, "loss": 0.8065765500068665, "loss_ce": 0.00018007968901656568, "loss_iou": 0.2734375, "loss_num": 0.052001953125, "loss_xval": 0.8046875, "num_input_tokens_seen": 24574080, "step": 392 }, { "epoch": 1.307820299500832, "grad_norm": 14.248711585998535, "learning_rate": 5e-06, "loss": 0.9474, "num_input_tokens_seen": 24637900, "step": 393 }, { "epoch": 1.307820299500832, "loss": 0.9951915144920349, "loss_ce": 7.434465078404173e-05, "loss_iou": 0.369140625, "loss_num": 0.0517578125, "loss_xval": 0.99609375, "num_input_tokens_seen": 24637900, "step": 393 }, { "epoch": 1.3111480865224625, "grad_norm": 11.03234577178955, "learning_rate": 5e-06, "loss": 1.1003, "num_input_tokens_seen": 24701596, "step": 394 }, { "epoch": 1.3111480865224625, "loss": 1.0410985946655273, "loss_ce": 0.008383785374462605, "loss_iou": 0.34765625, "loss_num": 0.0673828125, "loss_xval": 1.03125, "num_input_tokens_seen": 24701596, "step": 394 }, { "epoch": 1.3144758735440931, "grad_norm": 12.493695259094238, "learning_rate": 5e-06, "loss": 0.9812, "num_input_tokens_seen": 24764656, "step": 395 }, { "epoch": 1.3144758735440931, "loss": 0.9218541383743286, "loss_ce": 0.0004674248048104346, "loss_iou": 0.318359375, "loss_num": 0.056884765625, "loss_xval": 0.921875, "num_input_tokens_seen": 24764656, "step": 395 }, { "epoch": 1.3178036605657237, "grad_norm": 12.735949516296387, "learning_rate": 5e-06, "loss": 0.6976, "num_input_tokens_seen": 24825960, "step": 396 }, { "epoch": 1.3178036605657237, "loss": 0.6075152158737183, "loss_ce": 0.00039857986848801374, "loss_iou": 0.13671875, "loss_num": 0.06689453125, "loss_xval": 0.60546875, "num_input_tokens_seen": 24825960, "step": 396 }, { "epoch": 1.3211314475873543, "grad_norm": 12.757513999938965, "learning_rate": 5e-06, "loss": 1.103, "num_input_tokens_seen": 24888016, "step": 397 }, { "epoch": 1.3211314475873543, "loss": 1.0529699325561523, "loss_ce": 0.0004796354041900486, "loss_iou": 0.345703125, "loss_num": 0.072265625, "loss_xval": 1.0546875, "num_input_tokens_seen": 24888016, "step": 397 }, { "epoch": 1.324459234608985, "grad_norm": 18.36353302001953, "learning_rate": 5e-06, "loss": 1.0599, "num_input_tokens_seen": 24951520, "step": 398 }, { "epoch": 1.324459234608985, "loss": 0.9554644823074341, "loss_ce": 0.0013629624154418707, "loss_iou": 0.333984375, "loss_num": 0.0576171875, "loss_xval": 0.953125, "num_input_tokens_seen": 24951520, "step": 398 }, { "epoch": 1.3277870216306156, "grad_norm": 10.953916549682617, "learning_rate": 5e-06, "loss": 1.0773, "num_input_tokens_seen": 25013892, "step": 399 }, { "epoch": 1.3277870216306156, "loss": 0.9306296110153198, "loss_ce": 0.0004538163193501532, "loss_iou": 0.302734375, "loss_num": 0.064453125, "loss_xval": 0.9296875, "num_input_tokens_seen": 25013892, "step": 399 }, { "epoch": 1.3311148086522462, "grad_norm": 55.806068420410156, "learning_rate": 5e-06, "loss": 1.1304, "num_input_tokens_seen": 25077924, "step": 400 }, { "epoch": 1.3311148086522462, "loss": 1.146446943283081, "loss_ce": 0.001427447539754212, "loss_iou": 0.412109375, "loss_num": 0.06396484375, "loss_xval": 1.1484375, "num_input_tokens_seen": 25077924, "step": 400 }, { "epoch": 1.3344425956738768, "grad_norm": 15.109946250915527, "learning_rate": 5e-06, "loss": 0.8462, "num_input_tokens_seen": 25141284, "step": 401 }, { "epoch": 1.3344425956738768, "loss": 0.7409437894821167, "loss_ce": 0.0007094530155882239, "loss_iou": 0.203125, "loss_num": 0.06640625, "loss_xval": 0.7421875, "num_input_tokens_seen": 25141284, "step": 401 }, { "epoch": 1.3377703826955074, "grad_norm": 22.096900939941406, "learning_rate": 5e-06, "loss": 0.8861, "num_input_tokens_seen": 25204892, "step": 402 }, { "epoch": 1.3377703826955074, "loss": 0.6792917847633362, "loss_ce": 0.0011301651829853654, "loss_iou": 0.189453125, "loss_num": 0.06005859375, "loss_xval": 0.6796875, "num_input_tokens_seen": 25204892, "step": 402 }, { "epoch": 1.341098169717138, "grad_norm": 19.937292098999023, "learning_rate": 5e-06, "loss": 0.9353, "num_input_tokens_seen": 25267472, "step": 403 }, { "epoch": 1.341098169717138, "loss": 0.7488300800323486, "loss_ce": 5.078014510218054e-05, "loss_iou": 0.248046875, "loss_num": 0.05078125, "loss_xval": 0.75, "num_input_tokens_seen": 25267472, "step": 403 }, { "epoch": 1.3444259567387689, "grad_norm": 13.143715858459473, "learning_rate": 5e-06, "loss": 0.9148, "num_input_tokens_seen": 25329780, "step": 404 }, { "epoch": 1.3444259567387689, "loss": 0.7400535941123962, "loss_ce": 0.0003075096756219864, "loss_iou": 0.283203125, "loss_num": 0.03466796875, "loss_xval": 0.73828125, "num_input_tokens_seen": 25329780, "step": 404 }, { "epoch": 1.3477537437603995, "grad_norm": 11.737482070922852, "learning_rate": 5e-06, "loss": 0.9066, "num_input_tokens_seen": 25392832, "step": 405 }, { "epoch": 1.3477537437603995, "loss": 0.8952862620353699, "loss_ce": 0.0019756986293941736, "loss_iou": 0.2890625, "loss_num": 0.06298828125, "loss_xval": 0.89453125, "num_input_tokens_seen": 25392832, "step": 405 }, { "epoch": 1.35108153078203, "grad_norm": 9.287886619567871, "learning_rate": 5e-06, "loss": 0.9538, "num_input_tokens_seen": 25456848, "step": 406 }, { "epoch": 1.35108153078203, "loss": 0.9912151098251343, "loss_ce": 0.0024456141982227564, "loss_iou": 0.326171875, "loss_num": 0.06689453125, "loss_xval": 0.98828125, "num_input_tokens_seen": 25456848, "step": 406 }, { "epoch": 1.3544093178036607, "grad_norm": 17.6854190826416, "learning_rate": 5e-06, "loss": 0.9268, "num_input_tokens_seen": 25517152, "step": 407 }, { "epoch": 1.3544093178036607, "loss": 1.2912185192108154, "loss_ce": 0.00020294796559028327, "loss_iou": 0.392578125, "loss_num": 0.10107421875, "loss_xval": 1.2890625, "num_input_tokens_seen": 25517152, "step": 407 }, { "epoch": 1.3577371048252913, "grad_norm": 22.94211196899414, "learning_rate": 5e-06, "loss": 1.0581, "num_input_tokens_seen": 25579684, "step": 408 }, { "epoch": 1.3577371048252913, "loss": 0.8320871591567993, "loss_ce": 0.010798115283250809, "loss_iou": 0.296875, "loss_num": 0.045654296875, "loss_xval": 0.8203125, "num_input_tokens_seen": 25579684, "step": 408 }, { "epoch": 1.361064891846922, "grad_norm": 30.505630493164062, "learning_rate": 5e-06, "loss": 1.0947, "num_input_tokens_seen": 25642760, "step": 409 }, { "epoch": 1.361064891846922, "loss": 1.221017837524414, "loss_ce": 0.002145861741155386, "loss_iou": 0.396484375, "loss_num": 0.0849609375, "loss_xval": 1.21875, "num_input_tokens_seen": 25642760, "step": 409 }, { "epoch": 1.3643926788685525, "grad_norm": 19.50095558166504, "learning_rate": 5e-06, "loss": 0.8722, "num_input_tokens_seen": 25704444, "step": 410 }, { "epoch": 1.3643926788685525, "loss": 0.7587941884994507, "loss_ce": 0.0013478758046403527, "loss_iou": 0.1953125, "loss_num": 0.0732421875, "loss_xval": 0.7578125, "num_input_tokens_seen": 25704444, "step": 410 }, { "epoch": 1.3677204658901831, "grad_norm": 10.939312934875488, "learning_rate": 5e-06, "loss": 1.02, "num_input_tokens_seen": 25768724, "step": 411 }, { "epoch": 1.3677204658901831, "loss": 1.0943515300750732, "loss_ce": 0.00035737972939386964, "loss_iou": 0.396484375, "loss_num": 0.06005859375, "loss_xval": 1.09375, "num_input_tokens_seen": 25768724, "step": 411 }, { "epoch": 1.3710482529118138, "grad_norm": 10.127262115478516, "learning_rate": 5e-06, "loss": 0.8937, "num_input_tokens_seen": 25831612, "step": 412 }, { "epoch": 1.3710482529118138, "loss": 0.7164933085441589, "loss_ce": 6.263401155592874e-05, "loss_iou": 0.22265625, "loss_num": 0.05419921875, "loss_xval": 0.71484375, "num_input_tokens_seen": 25831612, "step": 412 }, { "epoch": 1.3743760399334444, "grad_norm": 17.616641998291016, "learning_rate": 5e-06, "loss": 0.946, "num_input_tokens_seen": 25895512, "step": 413 }, { "epoch": 1.3743760399334444, "loss": 0.8410751223564148, "loss_ce": 0.0011092738714069128, "loss_iou": 0.26171875, "loss_num": 0.06298828125, "loss_xval": 0.83984375, "num_input_tokens_seen": 25895512, "step": 413 }, { "epoch": 1.377703826955075, "grad_norm": 15.273797035217285, "learning_rate": 5e-06, "loss": 1.1047, "num_input_tokens_seen": 25958812, "step": 414 }, { "epoch": 1.377703826955075, "loss": 1.2649059295654297, "loss_ce": 0.0007457173778675497, "loss_iou": 0.4296875, "loss_num": 0.08154296875, "loss_xval": 1.265625, "num_input_tokens_seen": 25958812, "step": 414 }, { "epoch": 1.3810316139767056, "grad_norm": 12.507852554321289, "learning_rate": 5e-06, "loss": 0.7459, "num_input_tokens_seen": 26021692, "step": 415 }, { "epoch": 1.3810316139767056, "loss": 0.6690515875816345, "loss_ce": 0.0011438779765740037, "loss_iou": 0.21484375, "loss_num": 0.047607421875, "loss_xval": 0.66796875, "num_input_tokens_seen": 26021692, "step": 415 }, { "epoch": 1.3843594009983362, "grad_norm": 13.674393653869629, "learning_rate": 5e-06, "loss": 1.1989, "num_input_tokens_seen": 26085644, "step": 416 }, { "epoch": 1.3843594009983362, "loss": 1.2354817390441895, "loss_ce": 0.006966104730963707, "loss_iou": 0.44921875, "loss_num": 0.0654296875, "loss_xval": 1.2265625, "num_input_tokens_seen": 26085644, "step": 416 }, { "epoch": 1.3876871880199668, "grad_norm": 9.015851020812988, "learning_rate": 5e-06, "loss": 1.109, "num_input_tokens_seen": 26146280, "step": 417 }, { "epoch": 1.3876871880199668, "loss": 0.9011063575744629, "loss_ce": 0.00022745339083485305, "loss_iou": 0.23828125, "loss_num": 0.0849609375, "loss_xval": 0.90234375, "num_input_tokens_seen": 26146280, "step": 417 }, { "epoch": 1.3910149750415974, "grad_norm": 8.712250709533691, "learning_rate": 5e-06, "loss": 0.99, "num_input_tokens_seen": 26210000, "step": 418 }, { "epoch": 1.3910149750415974, "loss": 0.98512864112854, "loss_ce": 0.000509544915985316, "loss_iou": 0.35546875, "loss_num": 0.054443359375, "loss_xval": 0.984375, "num_input_tokens_seen": 26210000, "step": 418 }, { "epoch": 1.394342762063228, "grad_norm": 27.899654388427734, "learning_rate": 5e-06, "loss": 1.2184, "num_input_tokens_seen": 26273148, "step": 419 }, { "epoch": 1.394342762063228, "loss": 1.219191551208496, "loss_ce": 0.0004415863659232855, "loss_iou": 0.435546875, "loss_num": 0.0693359375, "loss_xval": 1.21875, "num_input_tokens_seen": 26273148, "step": 419 }, { "epoch": 1.3976705490848587, "grad_norm": 25.741943359375, "learning_rate": 5e-06, "loss": 1.0308, "num_input_tokens_seen": 26335908, "step": 420 }, { "epoch": 1.3976705490848587, "loss": 1.1688976287841797, "loss_ce": 0.0014171210350468755, "loss_iou": 0.33984375, "loss_num": 0.09814453125, "loss_xval": 1.1640625, "num_input_tokens_seen": 26335908, "step": 420 }, { "epoch": 1.4009983361064893, "grad_norm": 22.545154571533203, "learning_rate": 5e-06, "loss": 1.0516, "num_input_tokens_seen": 26400416, "step": 421 }, { "epoch": 1.4009983361064893, "loss": 1.0198687314987183, "loss_ce": 9.335303911939263e-05, "loss_iou": 0.365234375, "loss_num": 0.05810546875, "loss_xval": 1.0234375, "num_input_tokens_seen": 26400416, "step": 421 }, { "epoch": 1.4043261231281199, "grad_norm": 13.353900909423828, "learning_rate": 5e-06, "loss": 1.134, "num_input_tokens_seen": 26463048, "step": 422 }, { "epoch": 1.4043261231281199, "loss": 0.8695308566093445, "loss_ce": 0.00039019936230033636, "loss_iou": 0.2060546875, "loss_num": 0.091796875, "loss_xval": 0.8671875, "num_input_tokens_seen": 26463048, "step": 422 }, { "epoch": 1.4076539101497505, "grad_norm": 21.251747131347656, "learning_rate": 5e-06, "loss": 1.1205, "num_input_tokens_seen": 26525460, "step": 423 }, { "epoch": 1.4076539101497505, "loss": 1.0747185945510864, "loss_ce": 0.0024529588408768177, "loss_iou": 0.373046875, "loss_num": 0.06494140625, "loss_xval": 1.0703125, "num_input_tokens_seen": 26525460, "step": 423 }, { "epoch": 1.410981697171381, "grad_norm": 23.78187370300293, "learning_rate": 5e-06, "loss": 1.0137, "num_input_tokens_seen": 26590220, "step": 424 }, { "epoch": 1.410981697171381, "loss": 1.0101699829101562, "loss_ce": 0.0006484949844889343, "loss_iou": 0.353515625, "loss_num": 0.060302734375, "loss_xval": 1.0078125, "num_input_tokens_seen": 26590220, "step": 424 }, { "epoch": 1.4143094841930117, "grad_norm": 9.894415855407715, "learning_rate": 5e-06, "loss": 0.9023, "num_input_tokens_seen": 26653812, "step": 425 }, { "epoch": 1.4143094841930117, "loss": 0.7670003175735474, "loss_ce": 0.0008870094898156822, "loss_iou": 0.2353515625, "loss_num": 0.058837890625, "loss_xval": 0.765625, "num_input_tokens_seen": 26653812, "step": 425 }, { "epoch": 1.4176372712146423, "grad_norm": 16.12277603149414, "learning_rate": 5e-06, "loss": 1.0714, "num_input_tokens_seen": 26716944, "step": 426 }, { "epoch": 1.4176372712146423, "loss": 0.9022120237350464, "loss_ce": 0.0003565462538972497, "loss_iou": 0.294921875, "loss_num": 0.061767578125, "loss_xval": 0.90234375, "num_input_tokens_seen": 26716944, "step": 426 }, { "epoch": 1.420965058236273, "grad_norm": 17.212976455688477, "learning_rate": 5e-06, "loss": 1.0821, "num_input_tokens_seen": 26780096, "step": 427 }, { "epoch": 1.420965058236273, "loss": 1.079080581665039, "loss_ce": 0.00022315280511975288, "loss_iou": 0.38671875, "loss_num": 0.061279296875, "loss_xval": 1.078125, "num_input_tokens_seen": 26780096, "step": 427 }, { "epoch": 1.4242928452579036, "grad_norm": 24.057313919067383, "learning_rate": 5e-06, "loss": 0.7135, "num_input_tokens_seen": 26842224, "step": 428 }, { "epoch": 1.4242928452579036, "loss": 0.7545437812805176, "loss_ce": 0.006008637137711048, "loss_iou": 0.21484375, "loss_num": 0.06396484375, "loss_xval": 0.75, "num_input_tokens_seen": 26842224, "step": 428 }, { "epoch": 1.4276206322795342, "grad_norm": 25.677473068237305, "learning_rate": 5e-06, "loss": 1.1253, "num_input_tokens_seen": 26904704, "step": 429 }, { "epoch": 1.4276206322795342, "loss": 1.0171592235565186, "loss_ce": 6.944774941075593e-05, "loss_iou": 0.326171875, "loss_num": 0.0732421875, "loss_xval": 1.015625, "num_input_tokens_seen": 26904704, "step": 429 }, { "epoch": 1.4309484193011648, "grad_norm": 21.80251121520996, "learning_rate": 5e-06, "loss": 1.0098, "num_input_tokens_seen": 26967160, "step": 430 }, { "epoch": 1.4309484193011648, "loss": 1.3470512628555298, "loss_ce": 0.001348139368928969, "loss_iou": 0.435546875, "loss_num": 0.09423828125, "loss_xval": 1.34375, "num_input_tokens_seen": 26967160, "step": 430 }, { "epoch": 1.4342762063227954, "grad_norm": 13.05634880065918, "learning_rate": 5e-06, "loss": 0.9742, "num_input_tokens_seen": 27029688, "step": 431 }, { "epoch": 1.4342762063227954, "loss": 0.8077201843261719, "loss_ce": 0.0001029723061947152, "loss_iou": 0.275390625, "loss_num": 0.051513671875, "loss_xval": 0.80859375, "num_input_tokens_seen": 27029688, "step": 431 }, { "epoch": 1.437603993344426, "grad_norm": 18.26580238342285, "learning_rate": 5e-06, "loss": 1.2361, "num_input_tokens_seen": 27092720, "step": 432 }, { "epoch": 1.437603993344426, "loss": 1.205154299736023, "loss_ce": 0.0005644945194944739, "loss_iou": 0.396484375, "loss_num": 0.08203125, "loss_xval": 1.203125, "num_input_tokens_seen": 27092720, "step": 432 }, { "epoch": 1.4409317803660566, "grad_norm": 18.50592613220215, "learning_rate": 5e-06, "loss": 0.8447, "num_input_tokens_seen": 27155576, "step": 433 }, { "epoch": 1.4409317803660566, "loss": 0.755477249622345, "loss_ce": 0.0012047875206917524, "loss_iou": 0.201171875, "loss_num": 0.0703125, "loss_xval": 0.75390625, "num_input_tokens_seen": 27155576, "step": 433 }, { "epoch": 1.4442595673876872, "grad_norm": 36.2466926574707, "learning_rate": 5e-06, "loss": 0.9561, "num_input_tokens_seen": 27218704, "step": 434 }, { "epoch": 1.4442595673876872, "loss": 0.901737630367279, "loss_ce": 0.0006145976367406547, "loss_iou": 0.28125, "loss_num": 0.06787109375, "loss_xval": 0.90234375, "num_input_tokens_seen": 27218704, "step": 434 }, { "epoch": 1.4475873544093179, "grad_norm": 22.053478240966797, "learning_rate": 5e-06, "loss": 0.9297, "num_input_tokens_seen": 27282456, "step": 435 }, { "epoch": 1.4475873544093179, "loss": 0.7608993053436279, "loss_ce": 0.00040124962106347084, "loss_iou": 0.26171875, "loss_num": 0.04736328125, "loss_xval": 0.76171875, "num_input_tokens_seen": 27282456, "step": 435 }, { "epoch": 1.4509151414309485, "grad_norm": 9.609686851501465, "learning_rate": 5e-06, "loss": 1.0519, "num_input_tokens_seen": 27345896, "step": 436 }, { "epoch": 1.4509151414309485, "loss": 1.031361699104309, "loss_ce": 0.00011173041275469586, "loss_iou": 0.361328125, "loss_num": 0.06103515625, "loss_xval": 1.03125, "num_input_tokens_seen": 27345896, "step": 436 }, { "epoch": 1.454242928452579, "grad_norm": 33.34504318237305, "learning_rate": 5e-06, "loss": 1.1586, "num_input_tokens_seen": 27408568, "step": 437 }, { "epoch": 1.454242928452579, "loss": 1.1554999351501465, "loss_ce": 0.00022656703367829323, "loss_iou": 0.36328125, "loss_num": 0.08544921875, "loss_xval": 1.15625, "num_input_tokens_seen": 27408568, "step": 437 }, { "epoch": 1.4575707154742097, "grad_norm": 13.927755355834961, "learning_rate": 5e-06, "loss": 0.9148, "num_input_tokens_seen": 27470936, "step": 438 }, { "epoch": 1.4575707154742097, "loss": 0.6128742694854736, "loss_ce": 0.0006916266866028309, "loss_iou": 0.171875, "loss_num": 0.053466796875, "loss_xval": 0.61328125, "num_input_tokens_seen": 27470936, "step": 438 }, { "epoch": 1.4608985024958403, "grad_norm": 17.78131103515625, "learning_rate": 5e-06, "loss": 0.7538, "num_input_tokens_seen": 27531828, "step": 439 }, { "epoch": 1.4608985024958403, "loss": 0.9299443960189819, "loss_ce": 0.006360380910336971, "loss_iou": 0.3046875, "loss_num": 0.0625, "loss_xval": 0.921875, "num_input_tokens_seen": 27531828, "step": 439 }, { "epoch": 1.464226289517471, "grad_norm": 18.81191062927246, "learning_rate": 5e-06, "loss": 1.0091, "num_input_tokens_seen": 27595300, "step": 440 }, { "epoch": 1.464226289517471, "loss": 1.0845956802368164, "loss_ce": 0.002076168777421117, "loss_iou": 0.3671875, "loss_num": 0.06982421875, "loss_xval": 1.0859375, "num_input_tokens_seen": 27595300, "step": 440 }, { "epoch": 1.4675540765391015, "grad_norm": 21.179147720336914, "learning_rate": 5e-06, "loss": 1.321, "num_input_tokens_seen": 27658152, "step": 441 }, { "epoch": 1.4675540765391015, "loss": 1.1221023797988892, "loss_ce": 0.0010695864912122488, "loss_iou": 0.328125, "loss_num": 0.0927734375, "loss_xval": 1.1171875, "num_input_tokens_seen": 27658152, "step": 441 }, { "epoch": 1.4708818635607321, "grad_norm": 22.759368896484375, "learning_rate": 5e-06, "loss": 0.9895, "num_input_tokens_seen": 27718968, "step": 442 }, { "epoch": 1.4708818635607321, "loss": 1.2847583293914795, "loss_ce": 0.0005785563844256103, "loss_iou": 0.4453125, "loss_num": 0.07861328125, "loss_xval": 1.28125, "num_input_tokens_seen": 27718968, "step": 442 }, { "epoch": 1.4742096505823628, "grad_norm": 16.21900749206543, "learning_rate": 5e-06, "loss": 0.7664, "num_input_tokens_seen": 27781400, "step": 443 }, { "epoch": 1.4742096505823628, "loss": 0.9049963355064392, "loss_ce": 8.908379095373675e-05, "loss_iou": 0.34375, "loss_num": 0.043212890625, "loss_xval": 0.90625, "num_input_tokens_seen": 27781400, "step": 443 }, { "epoch": 1.4775374376039934, "grad_norm": 33.45806121826172, "learning_rate": 5e-06, "loss": 0.9512, "num_input_tokens_seen": 27843292, "step": 444 }, { "epoch": 1.4775374376039934, "loss": 0.6326927542686462, "loss_ce": 0.0003685495466925204, "loss_iou": 0.1826171875, "loss_num": 0.053466796875, "loss_xval": 0.6328125, "num_input_tokens_seen": 27843292, "step": 444 }, { "epoch": 1.480865224625624, "grad_norm": 22.78878402709961, "learning_rate": 5e-06, "loss": 0.8393, "num_input_tokens_seen": 27905576, "step": 445 }, { "epoch": 1.480865224625624, "loss": 1.0241775512695312, "loss_ce": 0.0009841093560680747, "loss_iou": 0.359375, "loss_num": 0.06103515625, "loss_xval": 1.0234375, "num_input_tokens_seen": 27905576, "step": 445 }, { "epoch": 1.4841930116472546, "grad_norm": 27.616863250732422, "learning_rate": 5e-06, "loss": 1.2214, "num_input_tokens_seen": 27967020, "step": 446 }, { "epoch": 1.4841930116472546, "loss": 1.3724949359893799, "loss_ce": 0.0009129252866841853, "loss_iou": 0.486328125, "loss_num": 0.07958984375, "loss_xval": 1.375, "num_input_tokens_seen": 27967020, "step": 446 }, { "epoch": 1.4875207986688852, "grad_norm": 9.08810043334961, "learning_rate": 5e-06, "loss": 0.7675, "num_input_tokens_seen": 28028872, "step": 447 }, { "epoch": 1.4875207986688852, "loss": 0.728617787361145, "loss_ce": 0.0010786643251776695, "loss_iou": 0.22265625, "loss_num": 0.05615234375, "loss_xval": 0.7265625, "num_input_tokens_seen": 28028872, "step": 447 }, { "epoch": 1.4908485856905158, "grad_norm": 30.69407081604004, "learning_rate": 5e-06, "loss": 1.145, "num_input_tokens_seen": 28090452, "step": 448 }, { "epoch": 1.4908485856905158, "loss": 1.0100305080413818, "loss_ce": 0.0014855489134788513, "loss_iou": 0.3046875, "loss_num": 0.07958984375, "loss_xval": 1.0078125, "num_input_tokens_seen": 28090452, "step": 448 }, { "epoch": 1.4941763727121464, "grad_norm": 31.316987991333008, "learning_rate": 5e-06, "loss": 1.292, "num_input_tokens_seen": 28153428, "step": 449 }, { "epoch": 1.4941763727121464, "loss": 1.4016584157943726, "loss_ce": 0.0002912741038016975, "loss_iou": 0.43359375, "loss_num": 0.1064453125, "loss_xval": 1.3984375, "num_input_tokens_seen": 28153428, "step": 449 }, { "epoch": 1.497504159733777, "grad_norm": 23.239837646484375, "learning_rate": 5e-06, "loss": 0.9771, "num_input_tokens_seen": 28214760, "step": 450 }, { "epoch": 1.497504159733777, "loss": 0.9000834226608276, "loss_ce": 0.003110786434262991, "loss_iou": 0.25390625, "loss_num": 0.078125, "loss_xval": 0.8984375, "num_input_tokens_seen": 28214760, "step": 450 }, { "epoch": 1.5008319467554077, "grad_norm": 21.81422996520996, "learning_rate": 5e-06, "loss": 0.5704, "num_input_tokens_seen": 28277060, "step": 451 }, { "epoch": 1.5008319467554077, "loss": 0.7159514427185059, "loss_ce": 0.0011076531372964382, "loss_iou": 0.2490234375, "loss_num": 0.04345703125, "loss_xval": 0.71484375, "num_input_tokens_seen": 28277060, "step": 451 }, { "epoch": 1.5041597337770383, "grad_norm": 9.91671371459961, "learning_rate": 5e-06, "loss": 0.8221, "num_input_tokens_seen": 28339296, "step": 452 }, { "epoch": 1.5041597337770383, "loss": 0.70356684923172, "loss_ce": 0.000197705885511823, "loss_iou": 0.2080078125, "loss_num": 0.0576171875, "loss_xval": 0.703125, "num_input_tokens_seen": 28339296, "step": 452 }, { "epoch": 1.5074875207986689, "grad_norm": 23.50018882751465, "learning_rate": 5e-06, "loss": 1.3437, "num_input_tokens_seen": 28402868, "step": 453 }, { "epoch": 1.5074875207986689, "loss": 1.3873112201690674, "loss_ce": 0.002667609602212906, "loss_iou": 0.490234375, "loss_num": 0.0810546875, "loss_xval": 1.3828125, "num_input_tokens_seen": 28402868, "step": 453 }, { "epoch": 1.5108153078202995, "grad_norm": 15.448333740234375, "learning_rate": 5e-06, "loss": 0.7128, "num_input_tokens_seen": 28465204, "step": 454 }, { "epoch": 1.5108153078202995, "loss": 0.6436554789543152, "loss_ce": 0.003884966252371669, "loss_iou": 0.1875, "loss_num": 0.052734375, "loss_xval": 0.640625, "num_input_tokens_seen": 28465204, "step": 454 }, { "epoch": 1.51414309484193, "grad_norm": 24.49448585510254, "learning_rate": 5e-06, "loss": 0.909, "num_input_tokens_seen": 28525340, "step": 455 }, { "epoch": 1.51414309484193, "loss": 1.1397053003311157, "loss_ce": 0.000789280456956476, "loss_iou": 0.373046875, "loss_num": 0.0791015625, "loss_xval": 1.140625, "num_input_tokens_seen": 28525340, "step": 455 }, { "epoch": 1.5174708818635607, "grad_norm": 21.09617042541504, "learning_rate": 5e-06, "loss": 0.9572, "num_input_tokens_seen": 28588404, "step": 456 }, { "epoch": 1.5174708818635607, "loss": 0.9665936231613159, "loss_ce": 0.0002849780721589923, "loss_iou": 0.3046875, "loss_num": 0.0712890625, "loss_xval": 0.96484375, "num_input_tokens_seen": 28588404, "step": 456 }, { "epoch": 1.5207986688851913, "grad_norm": 12.393025398254395, "learning_rate": 5e-06, "loss": 1.0552, "num_input_tokens_seen": 28652980, "step": 457 }, { "epoch": 1.5207986688851913, "loss": 1.1910388469696045, "loss_ce": 0.0006091375253163278, "loss_iou": 0.439453125, "loss_num": 0.062255859375, "loss_xval": 1.1875, "num_input_tokens_seen": 28652980, "step": 457 }, { "epoch": 1.524126455906822, "grad_norm": 16.389198303222656, "learning_rate": 5e-06, "loss": 0.8484, "num_input_tokens_seen": 28714288, "step": 458 }, { "epoch": 1.524126455906822, "loss": 0.7605820894241333, "loss_ce": 8.401063678320497e-05, "loss_iou": 0.2470703125, "loss_num": 0.05322265625, "loss_xval": 0.76171875, "num_input_tokens_seen": 28714288, "step": 458 }, { "epoch": 1.5274542429284526, "grad_norm": 24.185163497924805, "learning_rate": 5e-06, "loss": 1.0253, "num_input_tokens_seen": 28776688, "step": 459 }, { "epoch": 1.5274542429284526, "loss": 1.1079021692276, "loss_ce": 0.0012127062072977424, "loss_iou": 0.375, "loss_num": 0.07080078125, "loss_xval": 1.109375, "num_input_tokens_seen": 28776688, "step": 459 }, { "epoch": 1.5307820299500832, "grad_norm": 14.731950759887695, "learning_rate": 5e-06, "loss": 0.9914, "num_input_tokens_seen": 28837916, "step": 460 }, { "epoch": 1.5307820299500832, "loss": 0.8773554563522339, "loss_ce": 0.0004023656074423343, "loss_iou": 0.2578125, "loss_num": 0.07275390625, "loss_xval": 0.875, "num_input_tokens_seen": 28837916, "step": 460 }, { "epoch": 1.5341098169717138, "grad_norm": 14.834396362304688, "learning_rate": 5e-06, "loss": 0.9676, "num_input_tokens_seen": 28901104, "step": 461 }, { "epoch": 1.5341098169717138, "loss": 0.960180401802063, "loss_ce": 0.0007077236077748239, "loss_iou": 0.27734375, "loss_num": 0.0810546875, "loss_xval": 0.9609375, "num_input_tokens_seen": 28901104, "step": 461 }, { "epoch": 1.5374376039933444, "grad_norm": 32.69205093383789, "learning_rate": 5e-06, "loss": 1.1904, "num_input_tokens_seen": 28965764, "step": 462 }, { "epoch": 1.5374376039933444, "loss": 1.1993427276611328, "loss_ce": 0.000856377650052309, "loss_iou": 0.41796875, "loss_num": 0.07275390625, "loss_xval": 1.1953125, "num_input_tokens_seen": 28965764, "step": 462 }, { "epoch": 1.540765391014975, "grad_norm": 42.33073806762695, "learning_rate": 5e-06, "loss": 1.0913, "num_input_tokens_seen": 29029176, "step": 463 }, { "epoch": 1.540765391014975, "loss": 1.1216275691986084, "loss_ce": 0.0010221214033663273, "loss_iou": 0.39453125, "loss_num": 0.06640625, "loss_xval": 1.1171875, "num_input_tokens_seen": 29029176, "step": 463 }, { "epoch": 1.5440931780366056, "grad_norm": 18.55644989013672, "learning_rate": 5e-06, "loss": 1.0204, "num_input_tokens_seen": 29093028, "step": 464 }, { "epoch": 1.5440931780366056, "loss": 1.062420129776001, "loss_ce": 0.0005303866928443313, "loss_iou": 0.34375, "loss_num": 0.0751953125, "loss_xval": 1.0625, "num_input_tokens_seen": 29093028, "step": 464 }, { "epoch": 1.5474209650582362, "grad_norm": 13.655355453491211, "learning_rate": 5e-06, "loss": 1.0015, "num_input_tokens_seen": 29156672, "step": 465 }, { "epoch": 1.5474209650582362, "loss": 0.8456342220306396, "loss_ce": 0.0011518537066876888, "loss_iou": 0.2578125, "loss_num": 0.0654296875, "loss_xval": 0.84375, "num_input_tokens_seen": 29156672, "step": 465 }, { "epoch": 1.5507487520798668, "grad_norm": 10.323616981506348, "learning_rate": 5e-06, "loss": 1.0746, "num_input_tokens_seen": 29222096, "step": 466 }, { "epoch": 1.5507487520798668, "loss": 0.8638239502906799, "loss_ce": 0.0012750964378938079, "loss_iou": 0.310546875, "loss_num": 0.048095703125, "loss_xval": 0.86328125, "num_input_tokens_seen": 29222096, "step": 466 }, { "epoch": 1.5540765391014975, "grad_norm": 26.647079467773438, "learning_rate": 5e-06, "loss": 1.1652, "num_input_tokens_seen": 29283692, "step": 467 }, { "epoch": 1.5540765391014975, "loss": 1.2289832830429077, "loss_ce": 0.0016883397474884987, "loss_iou": 0.361328125, "loss_num": 0.10107421875, "loss_xval": 1.2265625, "num_input_tokens_seen": 29283692, "step": 467 }, { "epoch": 1.557404326123128, "grad_norm": 43.2814826965332, "learning_rate": 5e-06, "loss": 0.8369, "num_input_tokens_seen": 29346056, "step": 468 }, { "epoch": 1.557404326123128, "loss": 1.0478403568267822, "loss_ce": 0.00023299624444916844, "loss_iou": 0.365234375, "loss_num": 0.0634765625, "loss_xval": 1.046875, "num_input_tokens_seen": 29346056, "step": 468 }, { "epoch": 1.5607321131447587, "grad_norm": 18.766624450683594, "learning_rate": 5e-06, "loss": 0.9844, "num_input_tokens_seen": 29407640, "step": 469 }, { "epoch": 1.5607321131447587, "loss": 0.979688286781311, "loss_ce": 0.0010506456019356847, "loss_iou": 0.326171875, "loss_num": 0.0654296875, "loss_xval": 0.98046875, "num_input_tokens_seen": 29407640, "step": 469 }, { "epoch": 1.5640599001663893, "grad_norm": 14.187026977539062, "learning_rate": 5e-06, "loss": 0.8348, "num_input_tokens_seen": 29470356, "step": 470 }, { "epoch": 1.5640599001663893, "loss": 0.87372887134552, "loss_ce": 0.0009262266685254872, "loss_iou": 0.302734375, "loss_num": 0.052978515625, "loss_xval": 0.87109375, "num_input_tokens_seen": 29470356, "step": 470 }, { "epoch": 1.56738768718802, "grad_norm": 20.762351989746094, "learning_rate": 5e-06, "loss": 0.849, "num_input_tokens_seen": 29533504, "step": 471 }, { "epoch": 1.56738768718802, "loss": 0.723970353603363, "loss_ce": 0.0013141179224476218, "loss_iou": 0.205078125, "loss_num": 0.0625, "loss_xval": 0.72265625, "num_input_tokens_seen": 29533504, "step": 471 }, { "epoch": 1.5707154742096505, "grad_norm": 33.652610778808594, "learning_rate": 5e-06, "loss": 1.0056, "num_input_tokens_seen": 29596932, "step": 472 }, { "epoch": 1.5707154742096505, "loss": 1.0479035377502441, "loss_ce": 0.0013947292463853955, "loss_iou": 0.390625, "loss_num": 0.052734375, "loss_xval": 1.046875, "num_input_tokens_seen": 29596932, "step": 472 }, { "epoch": 1.5740432612312811, "grad_norm": 14.91476058959961, "learning_rate": 5e-06, "loss": 0.9835, "num_input_tokens_seen": 29658944, "step": 473 }, { "epoch": 1.5740432612312811, "loss": 1.0528045892715454, "loss_ce": 7.020424527581781e-05, "loss_iou": 0.36328125, "loss_num": 0.0654296875, "loss_xval": 1.0546875, "num_input_tokens_seen": 29658944, "step": 473 }, { "epoch": 1.5773710482529117, "grad_norm": 16.2688045501709, "learning_rate": 5e-06, "loss": 0.9865, "num_input_tokens_seen": 29722048, "step": 474 }, { "epoch": 1.5773710482529117, "loss": 0.8592187166213989, "loss_ce": 0.0008203479810617864, "loss_iou": 0.302734375, "loss_num": 0.05029296875, "loss_xval": 0.859375, "num_input_tokens_seen": 29722048, "step": 474 }, { "epoch": 1.5806988352745424, "grad_norm": 17.728527069091797, "learning_rate": 5e-06, "loss": 0.7485, "num_input_tokens_seen": 29783572, "step": 475 }, { "epoch": 1.5806988352745424, "loss": 0.5610158443450928, "loss_ce": 0.0012623894726857543, "loss_iou": 0.11865234375, "loss_num": 0.064453125, "loss_xval": 0.55859375, "num_input_tokens_seen": 29783572, "step": 475 }, { "epoch": 1.584026622296173, "grad_norm": 14.14136028289795, "learning_rate": 5e-06, "loss": 0.7122, "num_input_tokens_seen": 29844864, "step": 476 }, { "epoch": 1.584026622296173, "loss": 0.6858819723129272, "loss_ce": 0.0007012711721472442, "loss_iou": 0.265625, "loss_num": 0.031005859375, "loss_xval": 0.68359375, "num_input_tokens_seen": 29844864, "step": 476 }, { "epoch": 1.5873544093178036, "grad_norm": 10.864879608154297, "learning_rate": 5e-06, "loss": 0.7911, "num_input_tokens_seen": 29907688, "step": 477 }, { "epoch": 1.5873544093178036, "loss": 0.7445006370544434, "loss_ce": 0.001092430087737739, "loss_iou": 0.26171875, "loss_num": 0.0439453125, "loss_xval": 0.7421875, "num_input_tokens_seen": 29907688, "step": 477 }, { "epoch": 1.5906821963394342, "grad_norm": 13.406692504882812, "learning_rate": 5e-06, "loss": 0.9287, "num_input_tokens_seen": 29970336, "step": 478 }, { "epoch": 1.5906821963394342, "loss": 1.076006531715393, "loss_ce": 7.882327190600336e-05, "loss_iou": 0.3828125, "loss_num": 0.06201171875, "loss_xval": 1.078125, "num_input_tokens_seen": 29970336, "step": 478 }, { "epoch": 1.5940099833610648, "grad_norm": 19.617048263549805, "learning_rate": 5e-06, "loss": 0.752, "num_input_tokens_seen": 30033136, "step": 479 }, { "epoch": 1.5940099833610648, "loss": 0.6752302646636963, "loss_ce": 0.0016462799394503236, "loss_iou": 0.1923828125, "loss_num": 0.0576171875, "loss_xval": 0.671875, "num_input_tokens_seen": 30033136, "step": 479 }, { "epoch": 1.5973377703826954, "grad_norm": 12.158072471618652, "learning_rate": 5e-06, "loss": 0.9534, "num_input_tokens_seen": 30095076, "step": 480 }, { "epoch": 1.5973377703826954, "loss": 0.911781370639801, "loss_ce": 0.0021134286653250456, "loss_iou": 0.255859375, "loss_num": 0.07958984375, "loss_xval": 0.91015625, "num_input_tokens_seen": 30095076, "step": 480 }, { "epoch": 1.600665557404326, "grad_norm": 27.628393173217773, "learning_rate": 5e-06, "loss": 0.9826, "num_input_tokens_seen": 30157372, "step": 481 }, { "epoch": 1.600665557404326, "loss": 0.9524816870689392, "loss_ce": 0.0003332292544655502, "loss_iou": 0.33984375, "loss_num": 0.054443359375, "loss_xval": 0.953125, "num_input_tokens_seen": 30157372, "step": 481 }, { "epoch": 1.6039933444259566, "grad_norm": 13.934282302856445, "learning_rate": 5e-06, "loss": 0.9427, "num_input_tokens_seen": 30219644, "step": 482 }, { "epoch": 1.6039933444259566, "loss": 0.8178160190582275, "loss_ce": 0.0014097215607762337, "loss_iou": 0.2255859375, "loss_num": 0.07373046875, "loss_xval": 0.81640625, "num_input_tokens_seen": 30219644, "step": 482 }, { "epoch": 1.6073211314475873, "grad_norm": 24.397123336791992, "learning_rate": 5e-06, "loss": 1.0881, "num_input_tokens_seen": 30281548, "step": 483 }, { "epoch": 1.6073211314475873, "loss": 1.062880277633667, "loss_ce": 0.000624328211415559, "loss_iou": 0.27734375, "loss_num": 0.1015625, "loss_xval": 1.0625, "num_input_tokens_seen": 30281548, "step": 483 }, { "epoch": 1.6106489184692179, "grad_norm": 19.008699417114258, "learning_rate": 5e-06, "loss": 1.1287, "num_input_tokens_seen": 30343352, "step": 484 }, { "epoch": 1.6106489184692179, "loss": 1.2776226997375488, "loss_ce": 0.0007672393112443388, "loss_iou": 0.3671875, "loss_num": 0.10888671875, "loss_xval": 1.2734375, "num_input_tokens_seen": 30343352, "step": 484 }, { "epoch": 1.6139767054908485, "grad_norm": 14.159399032592773, "learning_rate": 5e-06, "loss": 0.7695, "num_input_tokens_seen": 30405128, "step": 485 }, { "epoch": 1.6139767054908485, "loss": 0.5511561632156372, "loss_ce": 0.00010023050708696246, "loss_iou": 0.154296875, "loss_num": 0.048583984375, "loss_xval": 0.55078125, "num_input_tokens_seen": 30405128, "step": 485 }, { "epoch": 1.617304492512479, "grad_norm": 33.96746826171875, "learning_rate": 5e-06, "loss": 1.059, "num_input_tokens_seen": 30468064, "step": 486 }, { "epoch": 1.617304492512479, "loss": 1.1239858865737915, "loss_ce": 0.0004507245612330735, "loss_iou": 0.40625, "loss_num": 0.06201171875, "loss_xval": 1.125, "num_input_tokens_seen": 30468064, "step": 486 }, { "epoch": 1.6206322795341097, "grad_norm": 31.724143981933594, "learning_rate": 5e-06, "loss": 1.0414, "num_input_tokens_seen": 30531744, "step": 487 }, { "epoch": 1.6206322795341097, "loss": 0.9699031114578247, "loss_ce": 0.0011531418422237039, "loss_iou": 0.330078125, "loss_num": 0.061767578125, "loss_xval": 0.96875, "num_input_tokens_seen": 30531744, "step": 487 }, { "epoch": 1.6239600665557403, "grad_norm": 55.814788818359375, "learning_rate": 5e-06, "loss": 0.8095, "num_input_tokens_seen": 30594308, "step": 488 }, { "epoch": 1.6239600665557403, "loss": 0.6811871528625488, "loss_ce": 0.0021100228186696768, "loss_iou": 0.2099609375, "loss_num": 0.051513671875, "loss_xval": 0.6796875, "num_input_tokens_seen": 30594308, "step": 488 }, { "epoch": 1.627287853577371, "grad_norm": 18.444459915161133, "learning_rate": 5e-06, "loss": 0.7932, "num_input_tokens_seen": 30656780, "step": 489 }, { "epoch": 1.627287853577371, "loss": 0.8429890871047974, "loss_ce": 0.001192222349345684, "loss_iou": 0.29296875, "loss_num": 0.051513671875, "loss_xval": 0.84375, "num_input_tokens_seen": 30656780, "step": 489 }, { "epoch": 1.6306156405990015, "grad_norm": 10.98024845123291, "learning_rate": 5e-06, "loss": 0.8646, "num_input_tokens_seen": 30720664, "step": 490 }, { "epoch": 1.6306156405990015, "loss": 0.9497690200805664, "loss_ce": 0.001038507791236043, "loss_iou": 0.35546875, "loss_num": 0.047607421875, "loss_xval": 0.94921875, "num_input_tokens_seen": 30720664, "step": 490 }, { "epoch": 1.6339434276206322, "grad_norm": 25.770841598510742, "learning_rate": 5e-06, "loss": 0.8741, "num_input_tokens_seen": 30782936, "step": 491 }, { "epoch": 1.6339434276206322, "loss": 0.7657675743103027, "loss_ce": 0.00014260777970775962, "loss_iou": 0.2314453125, "loss_num": 0.060546875, "loss_xval": 0.765625, "num_input_tokens_seen": 30782936, "step": 491 }, { "epoch": 1.6372712146422628, "grad_norm": 32.28664779663086, "learning_rate": 5e-06, "loss": 1.0502, "num_input_tokens_seen": 30847572, "step": 492 }, { "epoch": 1.6372712146422628, "loss": 1.0450204610824585, "loss_ce": 9.864305320661515e-05, "loss_iou": 0.37109375, "loss_num": 0.060302734375, "loss_xval": 1.046875, "num_input_tokens_seen": 30847572, "step": 492 }, { "epoch": 1.6405990016638934, "grad_norm": 14.635913848876953, "learning_rate": 5e-06, "loss": 0.7862, "num_input_tokens_seen": 30910236, "step": 493 }, { "epoch": 1.6405990016638934, "loss": 0.8603725433349609, "loss_ce": 0.0019741340074688196, "loss_iou": 0.2353515625, "loss_num": 0.07763671875, "loss_xval": 0.859375, "num_input_tokens_seen": 30910236, "step": 493 }, { "epoch": 1.643926788685524, "grad_norm": 17.99651336669922, "learning_rate": 5e-06, "loss": 1.0147, "num_input_tokens_seen": 30972852, "step": 494 }, { "epoch": 1.643926788685524, "loss": 0.9608148336410522, "loss_ce": 0.0010980231454595923, "loss_iou": 0.3515625, "loss_num": 0.051513671875, "loss_xval": 0.9609375, "num_input_tokens_seen": 30972852, "step": 494 }, { "epoch": 1.6472545757071546, "grad_norm": 19.670230865478516, "learning_rate": 5e-06, "loss": 0.8563, "num_input_tokens_seen": 31036164, "step": 495 }, { "epoch": 1.6472545757071546, "loss": 0.7549432516098022, "loss_ce": 0.0010369757656008005, "loss_iou": 0.224609375, "loss_num": 0.060791015625, "loss_xval": 0.75390625, "num_input_tokens_seen": 31036164, "step": 495 }, { "epoch": 1.6505823627287852, "grad_norm": 15.07642936706543, "learning_rate": 5e-06, "loss": 1.0478, "num_input_tokens_seen": 31100932, "step": 496 }, { "epoch": 1.6505823627287852, "loss": 1.292703628540039, "loss_ce": 0.00022312774672172964, "loss_iou": 0.44140625, "loss_num": 0.08203125, "loss_xval": 1.2890625, "num_input_tokens_seen": 31100932, "step": 496 }, { "epoch": 1.6539101497504158, "grad_norm": 11.568424224853516, "learning_rate": 5e-06, "loss": 0.9857, "num_input_tokens_seen": 31163920, "step": 497 }, { "epoch": 1.6539101497504158, "loss": 0.8776628971099854, "loss_ce": 0.0016863057389855385, "loss_iou": 0.28515625, "loss_num": 0.06103515625, "loss_xval": 0.875, "num_input_tokens_seen": 31163920, "step": 497 }, { "epoch": 1.6572379367720464, "grad_norm": 14.910255432128906, "learning_rate": 5e-06, "loss": 0.7452, "num_input_tokens_seen": 31226416, "step": 498 }, { "epoch": 1.6572379367720464, "loss": 0.6067147850990295, "loss_ce": 0.0002694768481887877, "loss_iou": 0.1943359375, "loss_num": 0.04345703125, "loss_xval": 0.60546875, "num_input_tokens_seen": 31226416, "step": 498 }, { "epoch": 1.660565723793677, "grad_norm": 20.980806350708008, "learning_rate": 5e-06, "loss": 0.8723, "num_input_tokens_seen": 31288532, "step": 499 }, { "epoch": 1.660565723793677, "loss": 1.0256372690200806, "loss_ce": 0.00024659678456373513, "loss_iou": 0.33203125, "loss_num": 0.072265625, "loss_xval": 1.0234375, "num_input_tokens_seen": 31288532, "step": 499 }, { "epoch": 1.6638935108153077, "grad_norm": 10.324045181274414, "learning_rate": 5e-06, "loss": 1.2927, "num_input_tokens_seen": 31351496, "step": 500 }, { "epoch": 1.6638935108153077, "eval_seeclick_CIoU": 0.1407727226614952, "eval_seeclick_GIoU": 0.16192460805177689, "eval_seeclick_IoU": 0.24831650406122208, "eval_seeclick_MAE_all": 0.15863317996263504, "eval_seeclick_MAE_h": 0.0617758184671402, "eval_seeclick_MAE_w": 0.1101977601647377, "eval_seeclick_MAE_x_boxes": 0.20691636204719543, "eval_seeclick_MAE_y_boxes": 0.1509951800107956, "eval_seeclick_NUM_probability": 0.999552309513092, "eval_seeclick_inside_bbox": 0.25833334028720856, "eval_seeclick_loss": 2.63395357131958, "eval_seeclick_loss_ce": 0.07620460540056229, "eval_seeclick_loss_iou": 0.8829345703125, "eval_seeclick_loss_num": 0.16785430908203125, "eval_seeclick_loss_xval": 2.605712890625, "eval_seeclick_runtime": 61.3386, "eval_seeclick_samples_per_second": 0.766, "eval_seeclick_steps_per_second": 0.033, "num_input_tokens_seen": 31351496, "step": 500 }, { "epoch": 1.6638935108153077, "eval_icons_CIoU": -0.08987009525299072, "eval_icons_GIoU": 0.008922544599045068, "eval_icons_IoU": 0.09777991846203804, "eval_icons_MAE_all": 0.19387147575616837, "eval_icons_MAE_h": 0.17065294086933136, "eval_icons_MAE_w": 0.14830636978149414, "eval_icons_MAE_x_boxes": 0.1475905030965805, "eval_icons_MAE_y_boxes": 0.11944273859262466, "eval_icons_NUM_probability": 0.9998856782913208, "eval_icons_inside_bbox": 0.1770833358168602, "eval_icons_loss": 2.8937652111053467, "eval_icons_loss_ce": 1.5150705166888656e-05, "eval_icons_loss_iou": 0.983154296875, "eval_icons_loss_num": 0.1881866455078125, "eval_icons_loss_xval": 2.908203125, "eval_icons_runtime": 64.9155, "eval_icons_samples_per_second": 0.77, "eval_icons_steps_per_second": 0.031, "num_input_tokens_seen": 31351496, "step": 500 }, { "epoch": 1.6638935108153077, "eval_screenspot_CIoU": 0.06984660774469376, "eval_screenspot_GIoU": 0.0980309545993805, "eval_screenspot_IoU": 0.20527644455432892, "eval_screenspot_MAE_all": 0.1600045015414556, "eval_screenspot_MAE_h": 0.08737764010826747, "eval_screenspot_MAE_w": 0.12003938108682632, "eval_screenspot_MAE_x_boxes": 0.20897812147935232, "eval_screenspot_MAE_y_boxes": 0.13371336460113525, "eval_screenspot_NUM_probability": 0.9998833537101746, "eval_screenspot_inside_bbox": 0.39666666587193805, "eval_screenspot_loss": 2.6362078189849854, "eval_screenspot_loss_ce": 0.0010837163620938857, "eval_screenspot_loss_iou": 0.91015625, "eval_screenspot_loss_num": 0.16780598958333334, "eval_screenspot_loss_xval": 2.66015625, "eval_screenspot_runtime": 110.1801, "eval_screenspot_samples_per_second": 0.808, "eval_screenspot_steps_per_second": 0.027, "num_input_tokens_seen": 31351496, "step": 500 }, { "epoch": 1.6638935108153077, "eval_compot_CIoU": -0.04207657277584076, "eval_compot_GIoU": 0.02166691841557622, "eval_compot_IoU": 0.12593939900398254, "eval_compot_MAE_all": 0.2019805982708931, "eval_compot_MAE_h": 0.1120777502655983, "eval_compot_MAE_w": 0.2181495651602745, "eval_compot_MAE_x_boxes": 0.16451644897460938, "eval_compot_MAE_y_boxes": 0.1350179947912693, "eval_compot_NUM_probability": 0.9998969435691833, "eval_compot_inside_bbox": 0.1493055559694767, "eval_compot_loss": 2.969686985015869, "eval_compot_loss_ce": 0.00425724231172353, "eval_compot_loss_iou": 0.98486328125, "eval_compot_loss_num": 0.21350860595703125, "eval_compot_loss_xval": 3.037109375, "eval_compot_runtime": 74.2021, "eval_compot_samples_per_second": 0.674, "eval_compot_steps_per_second": 0.027, "num_input_tokens_seen": 31351496, "step": 500 }, { "epoch": 1.6638935108153077, "eval_custom_ui_MAE_all": 0.09835522994399071, "eval_custom_ui_MAE_x": 0.1206330731511116, "eval_custom_ui_MAE_y": 0.076077401638031, "eval_custom_ui_NUM_probability": 0.9998647272586823, "eval_custom_ui_loss": 0.4728478193283081, "eval_custom_ui_loss_ce": 4.36340687883785e-05, "eval_custom_ui_loss_num": 0.095428466796875, "eval_custom_ui_loss_xval": 0.4771728515625, "eval_custom_ui_runtime": 50.5771, "eval_custom_ui_samples_per_second": 0.989, "eval_custom_ui_steps_per_second": 0.04, "num_input_tokens_seen": 31351496, "step": 500 }, { "epoch": 1.6638935108153077, "loss": 0.4966296851634979, "loss_ce": 4.7624427679693326e-05, "loss_iou": 0.0, "loss_num": 0.099609375, "loss_xval": 0.49609375, "num_input_tokens_seen": 31351496, "step": 500 }, { "epoch": 1.6672212978369383, "grad_norm": 15.344446182250977, "learning_rate": 5e-06, "loss": 0.9888, "num_input_tokens_seen": 31413356, "step": 501 }, { "epoch": 1.6672212978369383, "loss": 0.9617945551872253, "loss_ce": 0.0006129179382696748, "loss_iou": 0.30859375, "loss_num": 0.06884765625, "loss_xval": 0.9609375, "num_input_tokens_seen": 31413356, "step": 501 }, { "epoch": 1.670549084858569, "grad_norm": 26.52325439453125, "learning_rate": 5e-06, "loss": 1.1438, "num_input_tokens_seen": 31477812, "step": 502 }, { "epoch": 1.670549084858569, "loss": 0.9673887491226196, "loss_ce": 0.00022571717272512615, "loss_iou": 0.328125, "loss_num": 0.0625, "loss_xval": 0.96875, "num_input_tokens_seen": 31477812, "step": 502 }, { "epoch": 1.6738768718801995, "grad_norm": 7.430693626403809, "learning_rate": 5e-06, "loss": 0.9618, "num_input_tokens_seen": 31539776, "step": 503 }, { "epoch": 1.6738768718801995, "loss": 0.7569930553436279, "loss_ce": 0.00015710438310634345, "loss_iou": 0.1982421875, "loss_num": 0.07177734375, "loss_xval": 0.7578125, "num_input_tokens_seen": 31539776, "step": 503 }, { "epoch": 1.6772046589018301, "grad_norm": 21.236722946166992, "learning_rate": 5e-06, "loss": 1.0258, "num_input_tokens_seen": 31603112, "step": 504 }, { "epoch": 1.6772046589018301, "loss": 1.105105996131897, "loss_ce": 0.000491736049298197, "loss_iou": 0.369140625, "loss_num": 0.0732421875, "loss_xval": 1.1015625, "num_input_tokens_seen": 31603112, "step": 504 }, { "epoch": 1.6805324459234607, "grad_norm": 24.83149528503418, "learning_rate": 5e-06, "loss": 0.8819, "num_input_tokens_seen": 31666548, "step": 505 }, { "epoch": 1.6805324459234607, "loss": 1.06907320022583, "loss_ce": 0.006085007451474667, "loss_iou": 0.376953125, "loss_num": 0.061767578125, "loss_xval": 1.0625, "num_input_tokens_seen": 31666548, "step": 505 }, { "epoch": 1.6838602329450914, "grad_norm": 22.88380241394043, "learning_rate": 5e-06, "loss": 1.1046, "num_input_tokens_seen": 31729064, "step": 506 }, { "epoch": 1.6838602329450914, "loss": 1.034957766532898, "loss_ce": 0.0010222127893939614, "loss_iou": 0.33984375, "loss_num": 0.0703125, "loss_xval": 1.03125, "num_input_tokens_seen": 31729064, "step": 506 }, { "epoch": 1.687188019966722, "grad_norm": 28.19011116027832, "learning_rate": 5e-06, "loss": 1.0829, "num_input_tokens_seen": 31792644, "step": 507 }, { "epoch": 1.687188019966722, "loss": 0.9801334738731384, "loss_ce": 0.0016178734367713332, "loss_iou": 0.365234375, "loss_num": 0.04931640625, "loss_xval": 0.9765625, "num_input_tokens_seen": 31792644, "step": 507 }, { "epoch": 1.6905158069883528, "grad_norm": 11.66897201538086, "learning_rate": 5e-06, "loss": 0.6905, "num_input_tokens_seen": 31855528, "step": 508 }, { "epoch": 1.6905158069883528, "loss": 0.5484713315963745, "loss_ce": 0.0006197449984028935, "loss_iou": 0.1845703125, "loss_num": 0.03564453125, "loss_xval": 0.546875, "num_input_tokens_seen": 31855528, "step": 508 }, { "epoch": 1.6938435940099834, "grad_norm": 10.037336349487305, "learning_rate": 5e-06, "loss": 0.8526, "num_input_tokens_seen": 31918812, "step": 509 }, { "epoch": 1.6938435940099834, "loss": 0.9395761489868164, "loss_ce": 0.00012300777598284185, "loss_iou": 0.31640625, "loss_num": 0.060791015625, "loss_xval": 0.9375, "num_input_tokens_seen": 31918812, "step": 509 }, { "epoch": 1.697171381031614, "grad_norm": 15.060293197631836, "learning_rate": 5e-06, "loss": 0.9654, "num_input_tokens_seen": 31981784, "step": 510 }, { "epoch": 1.697171381031614, "loss": 0.8822901248931885, "loss_ce": 0.0005762342480011284, "loss_iou": 0.2734375, "loss_num": 0.06689453125, "loss_xval": 0.8828125, "num_input_tokens_seen": 31981784, "step": 510 }, { "epoch": 1.7004991680532446, "grad_norm": 24.509607315063477, "learning_rate": 5e-06, "loss": 1.1012, "num_input_tokens_seen": 32044616, "step": 511 }, { "epoch": 1.7004991680532446, "loss": 1.3109240531921387, "loss_ce": 0.0008654608973301947, "loss_iou": 0.421875, "loss_num": 0.0927734375, "loss_xval": 1.3125, "num_input_tokens_seen": 32044616, "step": 511 }, { "epoch": 1.7038269550748752, "grad_norm": 7.957863807678223, "learning_rate": 5e-06, "loss": 0.9465, "num_input_tokens_seen": 32107584, "step": 512 }, { "epoch": 1.7038269550748752, "loss": 1.0313119888305664, "loss_ce": 6.187129474710673e-05, "loss_iou": 0.396484375, "loss_num": 0.0478515625, "loss_xval": 1.03125, "num_input_tokens_seen": 32107584, "step": 512 }, { "epoch": 1.7071547420965059, "grad_norm": 11.338505744934082, "learning_rate": 5e-06, "loss": 0.8749, "num_input_tokens_seen": 32170332, "step": 513 }, { "epoch": 1.7071547420965059, "loss": 0.8663097620010376, "loss_ce": 9.878900164039806e-05, "loss_iou": 0.30078125, "loss_num": 0.05322265625, "loss_xval": 0.8671875, "num_input_tokens_seen": 32170332, "step": 513 }, { "epoch": 1.7104825291181365, "grad_norm": 15.753905296325684, "learning_rate": 5e-06, "loss": 0.8406, "num_input_tokens_seen": 32233800, "step": 514 }, { "epoch": 1.7104825291181365, "loss": 0.735957145690918, "loss_ce": 5.6298536946997046e-05, "loss_iou": 0.255859375, "loss_num": 0.044921875, "loss_xval": 0.734375, "num_input_tokens_seen": 32233800, "step": 514 }, { "epoch": 1.713810316139767, "grad_norm": 11.170035362243652, "learning_rate": 5e-06, "loss": 0.9196, "num_input_tokens_seen": 32295280, "step": 515 }, { "epoch": 1.713810316139767, "loss": 1.0259308815002441, "loss_ce": 0.0005403538234531879, "loss_iou": 0.345703125, "loss_num": 0.06689453125, "loss_xval": 1.0234375, "num_input_tokens_seen": 32295280, "step": 515 }, { "epoch": 1.7171381031613977, "grad_norm": 10.521838188171387, "learning_rate": 5e-06, "loss": 0.8688, "num_input_tokens_seen": 32357524, "step": 516 }, { "epoch": 1.7171381031613977, "loss": 1.1006793975830078, "loss_ce": 0.0003376527747604996, "loss_iou": 0.36328125, "loss_num": 0.07421875, "loss_xval": 1.1015625, "num_input_tokens_seen": 32357524, "step": 516 }, { "epoch": 1.7204658901830283, "grad_norm": 7.562518119812012, "learning_rate": 5e-06, "loss": 0.9232, "num_input_tokens_seen": 32419788, "step": 517 }, { "epoch": 1.7204658901830283, "loss": 0.6823647022247314, "loss_ce": 0.0002358041238039732, "loss_iou": 0.1953125, "loss_num": 0.05810546875, "loss_xval": 0.68359375, "num_input_tokens_seen": 32419788, "step": 517 }, { "epoch": 1.723793677204659, "grad_norm": 10.889182090759277, "learning_rate": 5e-06, "loss": 0.8767, "num_input_tokens_seen": 32483208, "step": 518 }, { "epoch": 1.723793677204659, "loss": 0.7226254940032959, "loss_ce": 9.124017378780991e-05, "loss_iou": 0.193359375, "loss_num": 0.06689453125, "loss_xval": 0.72265625, "num_input_tokens_seen": 32483208, "step": 518 }, { "epoch": 1.7271214642262895, "grad_norm": 10.785301208496094, "learning_rate": 5e-06, "loss": 0.8055, "num_input_tokens_seen": 32546916, "step": 519 }, { "epoch": 1.7271214642262895, "loss": 0.8370875716209412, "loss_ce": 0.0004176485526841134, "loss_iou": 0.271484375, "loss_num": 0.05859375, "loss_xval": 0.8359375, "num_input_tokens_seen": 32546916, "step": 519 }, { "epoch": 1.7304492512479202, "grad_norm": 13.234476089477539, "learning_rate": 5e-06, "loss": 0.7392, "num_input_tokens_seen": 32609956, "step": 520 }, { "epoch": 1.7304492512479202, "loss": 0.7869880795478821, "loss_ce": 0.0006110834656283259, "loss_iou": 0.232421875, "loss_num": 0.06396484375, "loss_xval": 0.78515625, "num_input_tokens_seen": 32609956, "step": 520 }, { "epoch": 1.7337770382695508, "grad_norm": 8.226746559143066, "learning_rate": 5e-06, "loss": 0.904, "num_input_tokens_seen": 32672000, "step": 521 }, { "epoch": 1.7337770382695508, "loss": 0.8872894048690796, "loss_ce": 0.0015472628874704242, "loss_iou": 0.267578125, "loss_num": 0.0703125, "loss_xval": 0.88671875, "num_input_tokens_seen": 32672000, "step": 521 }, { "epoch": 1.7371048252911814, "grad_norm": 11.019255638122559, "learning_rate": 5e-06, "loss": 0.7731, "num_input_tokens_seen": 32735724, "step": 522 }, { "epoch": 1.7371048252911814, "loss": 0.7001251578330994, "loss_ce": 0.0016388462390750647, "loss_iou": 0.236328125, "loss_num": 0.045166015625, "loss_xval": 0.69921875, "num_input_tokens_seen": 32735724, "step": 522 }, { "epoch": 1.740432612312812, "grad_norm": 11.40810489654541, "learning_rate": 5e-06, "loss": 0.988, "num_input_tokens_seen": 32799032, "step": 523 }, { "epoch": 1.740432612312812, "loss": 1.0509389638900757, "loss_ce": 0.0016226019943132997, "loss_iou": 0.369140625, "loss_num": 0.062255859375, "loss_xval": 1.046875, "num_input_tokens_seen": 32799032, "step": 523 }, { "epoch": 1.7437603993344426, "grad_norm": 24.868728637695312, "learning_rate": 5e-06, "loss": 1.1176, "num_input_tokens_seen": 32863204, "step": 524 }, { "epoch": 1.7437603993344426, "loss": 0.9876018166542053, "loss_ce": 0.0012737078359350562, "loss_iou": 0.341796875, "loss_num": 0.060546875, "loss_xval": 0.984375, "num_input_tokens_seen": 32863204, "step": 524 }, { "epoch": 1.7470881863560732, "grad_norm": 24.94504737854004, "learning_rate": 5e-06, "loss": 0.7812, "num_input_tokens_seen": 32925956, "step": 525 }, { "epoch": 1.7470881863560732, "loss": 0.9750490188598633, "loss_ce": 7.341763557633385e-05, "loss_iou": 0.333984375, "loss_num": 0.0615234375, "loss_xval": 0.9765625, "num_input_tokens_seen": 32925956, "step": 525 }, { "epoch": 1.7504159733777038, "grad_norm": 11.349876403808594, "learning_rate": 5e-06, "loss": 1.0901, "num_input_tokens_seen": 32988500, "step": 526 }, { "epoch": 1.7504159733777038, "loss": 1.3412702083587646, "loss_ce": 0.0011823027161881328, "loss_iou": 0.46484375, "loss_num": 0.08203125, "loss_xval": 1.34375, "num_input_tokens_seen": 32988500, "step": 526 }, { "epoch": 1.7537437603993344, "grad_norm": 11.285101890563965, "learning_rate": 5e-06, "loss": 0.8762, "num_input_tokens_seen": 33051032, "step": 527 }, { "epoch": 1.7537437603993344, "loss": 1.0813405513763428, "loss_ce": 0.0011403084499761462, "loss_iou": 0.34375, "loss_num": 0.07861328125, "loss_xval": 1.078125, "num_input_tokens_seen": 33051032, "step": 527 }, { "epoch": 1.757071547420965, "grad_norm": 18.434289932250977, "learning_rate": 5e-06, "loss": 0.672, "num_input_tokens_seen": 33113840, "step": 528 }, { "epoch": 1.757071547420965, "loss": 0.7331857681274414, "loss_ce": 0.0003977270389441401, "loss_iou": 0.25, "loss_num": 0.046630859375, "loss_xval": 0.734375, "num_input_tokens_seen": 33113840, "step": 528 }, { "epoch": 1.7603993344425957, "grad_norm": 27.202333450317383, "learning_rate": 5e-06, "loss": 0.9071, "num_input_tokens_seen": 33173696, "step": 529 }, { "epoch": 1.7603993344425957, "loss": 0.9044928550720215, "loss_ce": 0.0009284402476623654, "loss_iou": 0.283203125, "loss_num": 0.0673828125, "loss_xval": 0.90234375, "num_input_tokens_seen": 33173696, "step": 529 }, { "epoch": 1.7637271214642263, "grad_norm": 16.686687469482422, "learning_rate": 5e-06, "loss": 0.7909, "num_input_tokens_seen": 33235300, "step": 530 }, { "epoch": 1.7637271214642263, "loss": 0.7983999252319336, "loss_ce": 0.0028676651418209076, "loss_iou": 0.2275390625, "loss_num": 0.06787109375, "loss_xval": 0.796875, "num_input_tokens_seen": 33235300, "step": 530 }, { "epoch": 1.767054908485857, "grad_norm": 14.414863586425781, "learning_rate": 5e-06, "loss": 1.095, "num_input_tokens_seen": 33299248, "step": 531 }, { "epoch": 1.767054908485857, "loss": 1.0662611722946167, "loss_ce": 9.906104969559237e-05, "loss_iou": 0.3671875, "loss_num": 0.06640625, "loss_xval": 1.0625, "num_input_tokens_seen": 33299248, "step": 531 }, { "epoch": 1.7703826955074875, "grad_norm": 10.901994705200195, "learning_rate": 5e-06, "loss": 1.1139, "num_input_tokens_seen": 33361488, "step": 532 }, { "epoch": 1.7703826955074875, "loss": 1.0940532684326172, "loss_ce": 5.924535435042344e-05, "loss_iou": 0.34375, "loss_num": 0.08154296875, "loss_xval": 1.09375, "num_input_tokens_seen": 33361488, "step": 532 }, { "epoch": 1.7737104825291181, "grad_norm": 9.02663803100586, "learning_rate": 5e-06, "loss": 0.7596, "num_input_tokens_seen": 33423244, "step": 533 }, { "epoch": 1.7737104825291181, "loss": 0.8010953664779663, "loss_ce": 0.00019202599651180208, "loss_iou": 0.28125, "loss_num": 0.04736328125, "loss_xval": 0.80078125, "num_input_tokens_seen": 33423244, "step": 533 }, { "epoch": 1.7770382695507487, "grad_norm": 23.729066848754883, "learning_rate": 5e-06, "loss": 0.872, "num_input_tokens_seen": 33486120, "step": 534 }, { "epoch": 1.7770382695507487, "loss": 0.9203000068664551, "loss_ce": 0.00025603370158933103, "loss_iou": 0.287109375, "loss_num": 0.06884765625, "loss_xval": 0.921875, "num_input_tokens_seen": 33486120, "step": 534 }, { "epoch": 1.7803660565723793, "grad_norm": 18.88593864440918, "learning_rate": 5e-06, "loss": 0.6667, "num_input_tokens_seen": 33548448, "step": 535 }, { "epoch": 1.7803660565723793, "loss": 0.6696476936340332, "loss_ce": 0.0014347649412229657, "loss_iou": 0.2451171875, "loss_num": 0.035400390625, "loss_xval": 0.66796875, "num_input_tokens_seen": 33548448, "step": 535 }, { "epoch": 1.78369384359401, "grad_norm": 17.30825424194336, "learning_rate": 5e-06, "loss": 1.0589, "num_input_tokens_seen": 33611368, "step": 536 }, { "epoch": 1.78369384359401, "loss": 1.2292940616607666, "loss_ce": 0.0017550288466736674, "loss_iou": 0.43359375, "loss_num": 0.07177734375, "loss_xval": 1.2265625, "num_input_tokens_seen": 33611368, "step": 536 }, { "epoch": 1.7870216306156406, "grad_norm": 19.913711547851562, "learning_rate": 5e-06, "loss": 0.9414, "num_input_tokens_seen": 33674000, "step": 537 }, { "epoch": 1.7870216306156406, "loss": 1.089581847190857, "loss_ce": 0.001935345004312694, "loss_iou": 0.4140625, "loss_num": 0.05126953125, "loss_xval": 1.0859375, "num_input_tokens_seen": 33674000, "step": 537 }, { "epoch": 1.7903494176372712, "grad_norm": 21.72895622253418, "learning_rate": 5e-06, "loss": 1.0908, "num_input_tokens_seen": 33736264, "step": 538 }, { "epoch": 1.7903494176372712, "loss": 1.2249575853347778, "loss_ce": 0.000836513820104301, "loss_iou": 0.4453125, "loss_num": 0.06689453125, "loss_xval": 1.2265625, "num_input_tokens_seen": 33736264, "step": 538 }, { "epoch": 1.7936772046589018, "grad_norm": 28.31559944152832, "learning_rate": 5e-06, "loss": 0.7269, "num_input_tokens_seen": 33799068, "step": 539 }, { "epoch": 1.7936772046589018, "loss": 1.0997110605239868, "loss_ce": 0.0015665038954466581, "loss_iou": 0.380859375, "loss_num": 0.06787109375, "loss_xval": 1.1015625, "num_input_tokens_seen": 33799068, "step": 539 }, { "epoch": 1.7970049916805324, "grad_norm": 21.90004539489746, "learning_rate": 5e-06, "loss": 0.7649, "num_input_tokens_seen": 33862532, "step": 540 }, { "epoch": 1.7970049916805324, "loss": 0.9843000173568726, "loss_ce": 0.004319518804550171, "loss_iou": 0.37109375, "loss_num": 0.047607421875, "loss_xval": 0.98046875, "num_input_tokens_seen": 33862532, "step": 540 }, { "epoch": 1.800332778702163, "grad_norm": 25.75197410583496, "learning_rate": 5e-06, "loss": 0.9036, "num_input_tokens_seen": 33925504, "step": 541 }, { "epoch": 1.800332778702163, "loss": 0.7830278277397156, "loss_ce": 6.884754839120433e-05, "loss_iou": 0.3125, "loss_num": 0.031982421875, "loss_xval": 0.78125, "num_input_tokens_seen": 33925504, "step": 541 }, { "epoch": 1.8036605657237936, "grad_norm": 29.159202575683594, "learning_rate": 5e-06, "loss": 1.0031, "num_input_tokens_seen": 33989520, "step": 542 }, { "epoch": 1.8036605657237936, "loss": 1.0120999813079834, "loss_ce": 0.00038123998092487454, "loss_iou": 0.3359375, "loss_num": 0.06787109375, "loss_xval": 1.015625, "num_input_tokens_seen": 33989520, "step": 542 }, { "epoch": 1.8069883527454242, "grad_norm": 14.283863067626953, "learning_rate": 5e-06, "loss": 0.7717, "num_input_tokens_seen": 34052372, "step": 543 }, { "epoch": 1.8069883527454242, "loss": 0.869968056678772, "loss_ce": 0.0002170940861105919, "loss_iou": 0.2734375, "loss_num": 0.06494140625, "loss_xval": 0.87109375, "num_input_tokens_seen": 34052372, "step": 543 }, { "epoch": 1.8103161397670549, "grad_norm": 16.154088973999023, "learning_rate": 5e-06, "loss": 0.9688, "num_input_tokens_seen": 34115128, "step": 544 }, { "epoch": 1.8103161397670549, "loss": 0.8168380260467529, "loss_ce": 6.555484287673607e-05, "loss_iou": 0.1953125, "loss_num": 0.0849609375, "loss_xval": 0.81640625, "num_input_tokens_seen": 34115128, "step": 544 }, { "epoch": 1.8136439267886857, "grad_norm": 11.533266067504883, "learning_rate": 5e-06, "loss": 0.7305, "num_input_tokens_seen": 34177108, "step": 545 }, { "epoch": 1.8136439267886857, "loss": 0.5452122688293457, "loss_ce": 4.6220542571973056e-05, "loss_iou": 0.2001953125, "loss_num": 0.0289306640625, "loss_xval": 0.546875, "num_input_tokens_seen": 34177108, "step": 545 }, { "epoch": 1.8169717138103163, "grad_norm": 27.09062957763672, "learning_rate": 5e-06, "loss": 1.0109, "num_input_tokens_seen": 34240596, "step": 546 }, { "epoch": 1.8169717138103163, "loss": 0.9228719472885132, "loss_ce": 0.001485195243731141, "loss_iou": 0.341796875, "loss_num": 0.047607421875, "loss_xval": 0.921875, "num_input_tokens_seen": 34240596, "step": 546 }, { "epoch": 1.820299500831947, "grad_norm": 61.28173828125, "learning_rate": 5e-06, "loss": 1.0332, "num_input_tokens_seen": 34303684, "step": 547 }, { "epoch": 1.820299500831947, "loss": 1.129799723625183, "loss_ce": 0.00016104202950373292, "loss_iou": 0.388671875, "loss_num": 0.07080078125, "loss_xval": 1.1328125, "num_input_tokens_seen": 34303684, "step": 547 }, { "epoch": 1.8236272878535775, "grad_norm": 19.194339752197266, "learning_rate": 5e-06, "loss": 0.918, "num_input_tokens_seen": 34366564, "step": 548 }, { "epoch": 1.8236272878535775, "loss": 0.9430897235870361, "loss_ce": 0.00046277401270344853, "loss_iou": 0.341796875, "loss_num": 0.0517578125, "loss_xval": 0.94140625, "num_input_tokens_seen": 34366564, "step": 548 }, { "epoch": 1.8269550748752081, "grad_norm": 26.822731018066406, "learning_rate": 5e-06, "loss": 0.8636, "num_input_tokens_seen": 34428820, "step": 549 }, { "epoch": 1.8269550748752081, "loss": 1.0753934383392334, "loss_ce": 0.000686370360199362, "loss_iou": 0.359375, "loss_num": 0.07080078125, "loss_xval": 1.078125, "num_input_tokens_seen": 34428820, "step": 549 }, { "epoch": 1.8302828618968388, "grad_norm": 8.170743942260742, "learning_rate": 5e-06, "loss": 0.5979, "num_input_tokens_seen": 34489988, "step": 550 }, { "epoch": 1.8302828618968388, "loss": 0.761289119720459, "loss_ce": 5.860634337295778e-05, "loss_iou": 0.2353515625, "loss_num": 0.05859375, "loss_xval": 0.76171875, "num_input_tokens_seen": 34489988, "step": 550 }, { "epoch": 1.8336106489184694, "grad_norm": 16.041730880737305, "learning_rate": 5e-06, "loss": 1.0226, "num_input_tokens_seen": 34552528, "step": 551 }, { "epoch": 1.8336106489184694, "loss": 0.820946455001831, "loss_ce": 0.00014566481695510447, "loss_iou": 0.27734375, "loss_num": 0.053466796875, "loss_xval": 0.8203125, "num_input_tokens_seen": 34552528, "step": 551 }, { "epoch": 1.8369384359401, "grad_norm": 18.457284927368164, "learning_rate": 5e-06, "loss": 0.8108, "num_input_tokens_seen": 34615240, "step": 552 }, { "epoch": 1.8369384359401, "loss": 0.7772719264030457, "loss_ce": 0.00017231859965249896, "loss_iou": 0.287109375, "loss_num": 0.04052734375, "loss_xval": 0.77734375, "num_input_tokens_seen": 34615240, "step": 552 }, { "epoch": 1.8402662229617306, "grad_norm": 9.280012130737305, "learning_rate": 5e-06, "loss": 1.0903, "num_input_tokens_seen": 34678576, "step": 553 }, { "epoch": 1.8402662229617306, "loss": 1.092820405960083, "loss_ce": 0.0005351940635591745, "loss_iou": 0.4140625, "loss_num": 0.05322265625, "loss_xval": 1.09375, "num_input_tokens_seen": 34678576, "step": 553 }, { "epoch": 1.8435940099833612, "grad_norm": 13.509902000427246, "learning_rate": 5e-06, "loss": 0.9977, "num_input_tokens_seen": 34741560, "step": 554 }, { "epoch": 1.8435940099833612, "loss": 0.9331475496292114, "loss_ce": 0.0012017691042274237, "loss_iou": 0.32421875, "loss_num": 0.05615234375, "loss_xval": 0.93359375, "num_input_tokens_seen": 34741560, "step": 554 }, { "epoch": 1.8469217970049918, "grad_norm": 18.509479522705078, "learning_rate": 5e-06, "loss": 0.9555, "num_input_tokens_seen": 34806724, "step": 555 }, { "epoch": 1.8469217970049918, "loss": 1.0366463661193848, "loss_ce": 0.0034432627726346254, "loss_iou": 0.349609375, "loss_num": 0.0673828125, "loss_xval": 1.03125, "num_input_tokens_seen": 34806724, "step": 555 }, { "epoch": 1.8502495840266224, "grad_norm": 39.165348052978516, "learning_rate": 5e-06, "loss": 0.9968, "num_input_tokens_seen": 34869052, "step": 556 }, { "epoch": 1.8502495840266224, "loss": 1.176792860031128, "loss_ce": 0.003208851907402277, "loss_iou": 0.361328125, "loss_num": 0.09033203125, "loss_xval": 1.171875, "num_input_tokens_seen": 34869052, "step": 556 }, { "epoch": 1.853577371048253, "grad_norm": 22.69017791748047, "learning_rate": 5e-06, "loss": 1.0859, "num_input_tokens_seen": 34931472, "step": 557 }, { "epoch": 1.853577371048253, "loss": 1.264911413192749, "loss_ce": 0.0007511947769671679, "loss_iou": 0.427734375, "loss_num": 0.08154296875, "loss_xval": 1.265625, "num_input_tokens_seen": 34931472, "step": 557 }, { "epoch": 1.8569051580698837, "grad_norm": 14.08287525177002, "learning_rate": 5e-06, "loss": 0.9067, "num_input_tokens_seen": 34994332, "step": 558 }, { "epoch": 1.8569051580698837, "loss": 1.3093147277832031, "loss_ce": 0.0007209595059975982, "loss_iou": 0.4765625, "loss_num": 0.0712890625, "loss_xval": 1.3125, "num_input_tokens_seen": 34994332, "step": 558 }, { "epoch": 1.8602329450915143, "grad_norm": 23.314659118652344, "learning_rate": 5e-06, "loss": 1.0927, "num_input_tokens_seen": 35056908, "step": 559 }, { "epoch": 1.8602329450915143, "loss": 0.8283696174621582, "loss_ce": 0.0002446114958729595, "loss_iou": 0.3203125, "loss_num": 0.037353515625, "loss_xval": 0.828125, "num_input_tokens_seen": 35056908, "step": 559 }, { "epoch": 1.8635607321131449, "grad_norm": 25.26405143737793, "learning_rate": 5e-06, "loss": 1.08, "num_input_tokens_seen": 35120496, "step": 560 }, { "epoch": 1.8635607321131449, "loss": 1.3612158298492432, "loss_ce": 0.0008641867898404598, "loss_iou": 0.47265625, "loss_num": 0.08251953125, "loss_xval": 1.359375, "num_input_tokens_seen": 35120496, "step": 560 }, { "epoch": 1.8668885191347755, "grad_norm": 57.47762680053711, "learning_rate": 5e-06, "loss": 1.031, "num_input_tokens_seen": 35184732, "step": 561 }, { "epoch": 1.8668885191347755, "loss": 0.8519899249076843, "loss_ce": 0.000671501096803695, "loss_iou": 0.29296875, "loss_num": 0.052978515625, "loss_xval": 0.8515625, "num_input_tokens_seen": 35184732, "step": 561 }, { "epoch": 1.870216306156406, "grad_norm": 19.90561294555664, "learning_rate": 5e-06, "loss": 0.66, "num_input_tokens_seen": 35247080, "step": 562 }, { "epoch": 1.870216306156406, "loss": 0.6702248454093933, "loss_ce": 0.0007912360597401857, "loss_iou": 0.171875, "loss_num": 0.06494140625, "loss_xval": 0.66796875, "num_input_tokens_seen": 35247080, "step": 562 }, { "epoch": 1.8735440931780367, "grad_norm": 11.36895751953125, "learning_rate": 5e-06, "loss": 0.8835, "num_input_tokens_seen": 35310360, "step": 563 }, { "epoch": 1.8735440931780367, "loss": 0.854943037033081, "loss_ce": 0.00045084196608513594, "loss_iou": 0.302734375, "loss_num": 0.05029296875, "loss_xval": 0.85546875, "num_input_tokens_seen": 35310360, "step": 563 }, { "epoch": 1.8768718801996673, "grad_norm": 59.27011489868164, "learning_rate": 5e-06, "loss": 0.997, "num_input_tokens_seen": 35372132, "step": 564 }, { "epoch": 1.8768718801996673, "loss": 0.7903463840484619, "loss_ce": 0.0003073564439546317, "loss_iou": 0.2890625, "loss_num": 0.0419921875, "loss_xval": 0.7890625, "num_input_tokens_seen": 35372132, "step": 564 }, { "epoch": 1.880199667221298, "grad_norm": 14.463926315307617, "learning_rate": 5e-06, "loss": 0.8219, "num_input_tokens_seen": 35432864, "step": 565 }, { "epoch": 1.880199667221298, "loss": 0.9007686376571655, "loss_ce": 0.004772528540343046, "loss_iou": 0.310546875, "loss_num": 0.0546875, "loss_xval": 0.89453125, "num_input_tokens_seen": 35432864, "step": 565 }, { "epoch": 1.8835274542429286, "grad_norm": 9.443766593933105, "learning_rate": 5e-06, "loss": 0.6903, "num_input_tokens_seen": 35495096, "step": 566 }, { "epoch": 1.8835274542429286, "loss": 0.728258490562439, "loss_ce": 0.00023112227790988982, "loss_iou": 0.25, "loss_num": 0.0458984375, "loss_xval": 0.7265625, "num_input_tokens_seen": 35495096, "step": 566 }, { "epoch": 1.8868552412645592, "grad_norm": 16.495222091674805, "learning_rate": 5e-06, "loss": 0.791, "num_input_tokens_seen": 35557768, "step": 567 }, { "epoch": 1.8868552412645592, "loss": 0.7283313274383545, "loss_ce": 5.9868783864658326e-05, "loss_iou": 0.263671875, "loss_num": 0.0400390625, "loss_xval": 0.7265625, "num_input_tokens_seen": 35557768, "step": 567 }, { "epoch": 1.8901830282861898, "grad_norm": 19.051145553588867, "learning_rate": 5e-06, "loss": 0.9891, "num_input_tokens_seen": 35620732, "step": 568 }, { "epoch": 1.8901830282861898, "loss": 0.9619607329368591, "loss_ce": 0.0010232452768832445, "loss_iou": 0.279296875, "loss_num": 0.08056640625, "loss_xval": 0.9609375, "num_input_tokens_seen": 35620732, "step": 568 }, { "epoch": 1.8935108153078204, "grad_norm": 7.292303085327148, "learning_rate": 5e-06, "loss": 0.7214, "num_input_tokens_seen": 35683260, "step": 569 }, { "epoch": 1.8935108153078204, "loss": 0.8143761157989502, "loss_ce": 0.00016709069313947111, "loss_iou": 0.259765625, "loss_num": 0.0595703125, "loss_xval": 0.8125, "num_input_tokens_seen": 35683260, "step": 569 }, { "epoch": 1.896838602329451, "grad_norm": 8.129812240600586, "learning_rate": 5e-06, "loss": 0.6213, "num_input_tokens_seen": 35744772, "step": 570 }, { "epoch": 1.896838602329451, "loss": 0.5385463237762451, "loss_ce": 0.0005214047851040959, "loss_iou": 0.14453125, "loss_num": 0.0498046875, "loss_xval": 0.5390625, "num_input_tokens_seen": 35744772, "step": 570 }, { "epoch": 1.9001663893510816, "grad_norm": 13.715632438659668, "learning_rate": 5e-06, "loss": 0.7359, "num_input_tokens_seen": 35808032, "step": 571 }, { "epoch": 1.9001663893510816, "loss": 0.7909501791000366, "loss_ce": 0.0007890532724559307, "loss_iou": 0.26953125, "loss_num": 0.051025390625, "loss_xval": 0.7890625, "num_input_tokens_seen": 35808032, "step": 571 }, { "epoch": 1.9034941763727122, "grad_norm": 15.428297996520996, "learning_rate": 5e-06, "loss": 0.9073, "num_input_tokens_seen": 35870888, "step": 572 }, { "epoch": 1.9034941763727122, "loss": 1.0250931978225708, "loss_ce": 0.0016556547489017248, "loss_iou": 0.337890625, "loss_num": 0.06982421875, "loss_xval": 1.0234375, "num_input_tokens_seen": 35870888, "step": 572 }, { "epoch": 1.9068219633943428, "grad_norm": 9.923898696899414, "learning_rate": 5e-06, "loss": 0.992, "num_input_tokens_seen": 35934240, "step": 573 }, { "epoch": 1.9068219633943428, "loss": 0.9211689233779907, "loss_ce": 0.0017353565199300647, "loss_iou": 0.248046875, "loss_num": 0.083984375, "loss_xval": 0.91796875, "num_input_tokens_seen": 35934240, "step": 573 }, { "epoch": 1.9101497504159735, "grad_norm": 15.104902267456055, "learning_rate": 5e-06, "loss": 0.7486, "num_input_tokens_seen": 35996968, "step": 574 }, { "epoch": 1.9101497504159735, "loss": 0.7508370876312256, "loss_ce": 0.0027901488356292248, "loss_iou": 0.25390625, "loss_num": 0.048095703125, "loss_xval": 0.75, "num_input_tokens_seen": 35996968, "step": 574 }, { "epoch": 1.913477537437604, "grad_norm": 27.97852325439453, "learning_rate": 5e-06, "loss": 0.8351, "num_input_tokens_seen": 36059152, "step": 575 }, { "epoch": 1.913477537437604, "loss": 0.7264510989189148, "loss_ce": 0.0008651453536003828, "loss_iou": 0.171875, "loss_num": 0.076171875, "loss_xval": 0.7265625, "num_input_tokens_seen": 36059152, "step": 575 }, { "epoch": 1.9168053244592347, "grad_norm": 20.044519424438477, "learning_rate": 5e-06, "loss": 0.9202, "num_input_tokens_seen": 36121384, "step": 576 }, { "epoch": 1.9168053244592347, "loss": 0.7155839204788208, "loss_ce": 0.0007402128539979458, "loss_iou": 0.171875, "loss_num": 0.07421875, "loss_xval": 0.71484375, "num_input_tokens_seen": 36121384, "step": 576 }, { "epoch": 1.9201331114808653, "grad_norm": 27.610477447509766, "learning_rate": 5e-06, "loss": 1.0044, "num_input_tokens_seen": 36183140, "step": 577 }, { "epoch": 1.9201331114808653, "loss": 1.052855134010315, "loss_ce": 0.0006090715178288519, "loss_iou": 0.376953125, "loss_num": 0.059814453125, "loss_xval": 1.0546875, "num_input_tokens_seen": 36183140, "step": 577 }, { "epoch": 1.923460898502496, "grad_norm": 15.871465682983398, "learning_rate": 5e-06, "loss": 0.7585, "num_input_tokens_seen": 36244504, "step": 578 }, { "epoch": 1.923460898502496, "loss": 0.5033844709396362, "loss_ce": 8.860958041623235e-05, "loss_iou": 0.12255859375, "loss_num": 0.051513671875, "loss_xval": 0.50390625, "num_input_tokens_seen": 36244504, "step": 578 }, { "epoch": 1.9267886855241265, "grad_norm": 14.993875503540039, "learning_rate": 5e-06, "loss": 1.0223, "num_input_tokens_seen": 36307388, "step": 579 }, { "epoch": 1.9267886855241265, "loss": 0.8857296705245972, "loss_ce": 0.0012081291060894728, "loss_iou": 0.34375, "loss_num": 0.03955078125, "loss_xval": 0.8828125, "num_input_tokens_seen": 36307388, "step": 579 }, { "epoch": 1.9301164725457571, "grad_norm": 24.496679306030273, "learning_rate": 5e-06, "loss": 1.0946, "num_input_tokens_seen": 36369840, "step": 580 }, { "epoch": 1.9301164725457571, "loss": 0.9771441221237183, "loss_ce": 0.0007036330061964691, "loss_iou": 0.326171875, "loss_num": 0.06494140625, "loss_xval": 0.9765625, "num_input_tokens_seen": 36369840, "step": 580 }, { "epoch": 1.9334442595673877, "grad_norm": 16.5665225982666, "learning_rate": 5e-06, "loss": 0.9718, "num_input_tokens_seen": 36430680, "step": 581 }, { "epoch": 1.9334442595673877, "loss": 1.3224194049835205, "loss_ce": 0.00039791452581994236, "loss_iou": 0.435546875, "loss_num": 0.08984375, "loss_xval": 1.3203125, "num_input_tokens_seen": 36430680, "step": 581 }, { "epoch": 1.9367720465890184, "grad_norm": 26.21039581298828, "learning_rate": 5e-06, "loss": 0.986, "num_input_tokens_seen": 36493932, "step": 582 }, { "epoch": 1.9367720465890184, "loss": 1.11195969581604, "loss_ce": 0.002096442738547921, "loss_iou": 0.38671875, "loss_num": 0.0673828125, "loss_xval": 1.109375, "num_input_tokens_seen": 36493932, "step": 582 }, { "epoch": 1.940099833610649, "grad_norm": 24.678203582763672, "learning_rate": 5e-06, "loss": 0.9522, "num_input_tokens_seen": 36555844, "step": 583 }, { "epoch": 1.940099833610649, "loss": 1.0085680484771729, "loss_ce": 0.0034411484375596046, "loss_iou": 0.302734375, "loss_num": 0.080078125, "loss_xval": 1.0078125, "num_input_tokens_seen": 36555844, "step": 583 }, { "epoch": 1.9434276206322796, "grad_norm": 12.278304100036621, "learning_rate": 5e-06, "loss": 0.8262, "num_input_tokens_seen": 36619640, "step": 584 }, { "epoch": 1.9434276206322796, "loss": 0.786083459854126, "loss_ce": 0.0004388842498883605, "loss_iou": 0.263671875, "loss_num": 0.052001953125, "loss_xval": 0.78515625, "num_input_tokens_seen": 36619640, "step": 584 }, { "epoch": 1.9467554076539102, "grad_norm": 11.144486427307129, "learning_rate": 5e-06, "loss": 0.7976, "num_input_tokens_seen": 36683012, "step": 585 }, { "epoch": 1.9467554076539102, "loss": 0.9428272843360901, "loss_ce": 0.00044451182475313544, "loss_iou": 0.296875, "loss_num": 0.06982421875, "loss_xval": 0.94140625, "num_input_tokens_seen": 36683012, "step": 585 }, { "epoch": 1.9500831946755408, "grad_norm": 11.807775497436523, "learning_rate": 5e-06, "loss": 0.7514, "num_input_tokens_seen": 36745356, "step": 586 }, { "epoch": 1.9500831946755408, "loss": 0.8311335444450378, "loss_ce": 7.881710916990414e-05, "loss_iou": 0.283203125, "loss_num": 0.052978515625, "loss_xval": 0.83203125, "num_input_tokens_seen": 36745356, "step": 586 }, { "epoch": 1.9534109816971714, "grad_norm": 11.127016067504883, "learning_rate": 5e-06, "loss": 0.7933, "num_input_tokens_seen": 36809432, "step": 587 }, { "epoch": 1.9534109816971714, "loss": 0.9195138216018677, "loss_ce": 0.0015450288774445653, "loss_iou": 0.2890625, "loss_num": 0.06787109375, "loss_xval": 0.91796875, "num_input_tokens_seen": 36809432, "step": 587 }, { "epoch": 1.956738768718802, "grad_norm": 10.099778175354004, "learning_rate": 5e-06, "loss": 1.3049, "num_input_tokens_seen": 36873560, "step": 588 }, { "epoch": 1.956738768718802, "loss": 1.321709394454956, "loss_ce": 0.002007234375923872, "loss_iou": 0.48828125, "loss_num": 0.06884765625, "loss_xval": 1.3203125, "num_input_tokens_seen": 36873560, "step": 588 }, { "epoch": 1.9600665557404326, "grad_norm": 18.197284698486328, "learning_rate": 5e-06, "loss": 1.0251, "num_input_tokens_seen": 36936776, "step": 589 }, { "epoch": 1.9600665557404326, "loss": 0.9879869818687439, "loss_ce": 0.0004381673061288893, "loss_iou": 0.3671875, "loss_num": 0.05078125, "loss_xval": 0.98828125, "num_input_tokens_seen": 36936776, "step": 589 }, { "epoch": 1.9633943427620633, "grad_norm": 30.173526763916016, "learning_rate": 5e-06, "loss": 0.7697, "num_input_tokens_seen": 36998688, "step": 590 }, { "epoch": 1.9633943427620633, "loss": 0.6133254170417786, "loss_ce": 0.00041039640200324357, "loss_iou": 0.1494140625, "loss_num": 0.06298828125, "loss_xval": 0.61328125, "num_input_tokens_seen": 36998688, "step": 590 }, { "epoch": 1.9667221297836939, "grad_norm": 28.585336685180664, "learning_rate": 5e-06, "loss": 0.8288, "num_input_tokens_seen": 37060948, "step": 591 }, { "epoch": 1.9667221297836939, "loss": 0.673335075378418, "loss_ce": 0.0004835330764763057, "loss_iou": 0.232421875, "loss_num": 0.04150390625, "loss_xval": 0.671875, "num_input_tokens_seen": 37060948, "step": 591 }, { "epoch": 1.9700499168053245, "grad_norm": 17.124629974365234, "learning_rate": 5e-06, "loss": 0.8759, "num_input_tokens_seen": 37124284, "step": 592 }, { "epoch": 1.9700499168053245, "loss": 0.8624151945114136, "loss_ce": 0.004382956773042679, "loss_iou": 0.306640625, "loss_num": 0.04931640625, "loss_xval": 0.859375, "num_input_tokens_seen": 37124284, "step": 592 }, { "epoch": 1.973377703826955, "grad_norm": 13.656801223754883, "learning_rate": 5e-06, "loss": 0.7541, "num_input_tokens_seen": 37187368, "step": 593 }, { "epoch": 1.973377703826955, "loss": 0.8068069815635681, "loss_ce": 0.0005325791426002979, "loss_iou": 0.28125, "loss_num": 0.049072265625, "loss_xval": 0.8046875, "num_input_tokens_seen": 37187368, "step": 593 }, { "epoch": 1.9767054908485857, "grad_norm": 50.78049850463867, "learning_rate": 5e-06, "loss": 1.0118, "num_input_tokens_seen": 37248784, "step": 594 }, { "epoch": 1.9767054908485857, "loss": 1.0193705558776855, "loss_ce": 8.3520746557042e-05, "loss_iou": 0.2890625, "loss_num": 0.08837890625, "loss_xval": 1.015625, "num_input_tokens_seen": 37248784, "step": 594 }, { "epoch": 1.9800332778702163, "grad_norm": 12.020565032958984, "learning_rate": 5e-06, "loss": 1.1149, "num_input_tokens_seen": 37313140, "step": 595 }, { "epoch": 1.9800332778702163, "loss": 0.9964249134063721, "loss_ce": 0.0005752903525717556, "loss_iou": 0.318359375, "loss_num": 0.0712890625, "loss_xval": 0.99609375, "num_input_tokens_seen": 37313140, "step": 595 }, { "epoch": 1.983361064891847, "grad_norm": 15.852691650390625, "learning_rate": 5e-06, "loss": 0.9825, "num_input_tokens_seen": 37374648, "step": 596 }, { "epoch": 1.983361064891847, "loss": 1.0676076412200928, "loss_ce": 0.0007131557213142514, "loss_iou": 0.357421875, "loss_num": 0.0703125, "loss_xval": 1.0703125, "num_input_tokens_seen": 37374648, "step": 596 }, { "epoch": 1.9866888519134775, "grad_norm": 13.392165184020996, "learning_rate": 5e-06, "loss": 0.7633, "num_input_tokens_seen": 37435772, "step": 597 }, { "epoch": 1.9866888519134775, "loss": 0.8616316318511963, "loss_ce": 5.93814293097239e-05, "loss_iou": 0.291015625, "loss_num": 0.05615234375, "loss_xval": 0.86328125, "num_input_tokens_seen": 37435772, "step": 597 }, { "epoch": 1.9900166389351082, "grad_norm": 19.98641586303711, "learning_rate": 5e-06, "loss": 0.8829, "num_input_tokens_seen": 37498976, "step": 598 }, { "epoch": 1.9900166389351082, "loss": 0.9794598817825317, "loss_ce": 0.0009442184818908572, "loss_iou": 0.34375, "loss_num": 0.057861328125, "loss_xval": 0.9765625, "num_input_tokens_seen": 37498976, "step": 598 }, { "epoch": 1.9933444259567388, "grad_norm": 20.405685424804688, "learning_rate": 5e-06, "loss": 0.7916, "num_input_tokens_seen": 37562808, "step": 599 }, { "epoch": 1.9933444259567388, "loss": 0.8605334758758545, "loss_ce": 0.0009143200004473329, "loss_iou": 0.33203125, "loss_num": 0.0390625, "loss_xval": 0.859375, "num_input_tokens_seen": 37562808, "step": 599 }, { "epoch": 1.9966722129783694, "grad_norm": 12.160991668701172, "learning_rate": 5e-06, "loss": 1.0432, "num_input_tokens_seen": 37627092, "step": 600 }, { "epoch": 1.9966722129783694, "loss": 1.4242031574249268, "loss_ce": 0.001351633109152317, "loss_iou": 0.486328125, "loss_num": 0.08984375, "loss_xval": 1.421875, "num_input_tokens_seen": 37627092, "step": 600 }, { "epoch": 2.0, "grad_norm": 14.517803192138672, "learning_rate": 5e-06, "loss": 0.8826, "num_input_tokens_seen": 37689136, "step": 601 }, { "epoch": 2.0, "loss": 0.7798376083374023, "loss_ce": 0.0012731285532936454, "loss_iou": 0.212890625, "loss_num": 0.07080078125, "loss_xval": 0.77734375, "num_input_tokens_seen": 37689136, "step": 601 }, { "epoch": 2.0033277870216306, "grad_norm": 30.973886489868164, "learning_rate": 5e-06, "loss": 1.0158, "num_input_tokens_seen": 37752860, "step": 602 }, { "epoch": 2.0033277870216306, "loss": 1.0603678226470947, "loss_ce": 6.51167647447437e-05, "loss_iou": 0.412109375, "loss_num": 0.0478515625, "loss_xval": 1.0625, "num_input_tokens_seen": 37752860, "step": 602 }, { "epoch": 2.0066555740432612, "grad_norm": 25.496232986450195, "learning_rate": 5e-06, "loss": 0.5947, "num_input_tokens_seen": 37813476, "step": 603 }, { "epoch": 2.0066555740432612, "loss": 0.40228742361068726, "loss_ce": 6.573630525963381e-05, "loss_iou": 0.0, "loss_num": 0.08056640625, "loss_xval": 0.40234375, "num_input_tokens_seen": 37813476, "step": 603 }, { "epoch": 2.009983361064892, "grad_norm": 11.61388111114502, "learning_rate": 5e-06, "loss": 0.5502, "num_input_tokens_seen": 37873776, "step": 604 }, { "epoch": 2.009983361064892, "loss": 0.7943422794342041, "loss_ce": 0.0008852038299664855, "loss_iou": 0.2734375, "loss_num": 0.049072265625, "loss_xval": 0.79296875, "num_input_tokens_seen": 37873776, "step": 604 }, { "epoch": 2.0133111480865225, "grad_norm": 15.668353080749512, "learning_rate": 5e-06, "loss": 0.8569, "num_input_tokens_seen": 37936712, "step": 605 }, { "epoch": 2.0133111480865225, "loss": 1.3431980609893799, "loss_ce": 0.000912969873752445, "loss_iou": 0.49609375, "loss_num": 0.06982421875, "loss_xval": 1.34375, "num_input_tokens_seen": 37936712, "step": 605 }, { "epoch": 2.016638935108153, "grad_norm": 8.846783638000488, "learning_rate": 5e-06, "loss": 0.7561, "num_input_tokens_seen": 37997520, "step": 606 }, { "epoch": 2.016638935108153, "loss": 1.0741982460021973, "loss_ce": 0.00046769127948209643, "loss_iou": 0.369140625, "loss_num": 0.0673828125, "loss_xval": 1.0703125, "num_input_tokens_seen": 37997520, "step": 606 }, { "epoch": 2.0199667221297837, "grad_norm": 11.790915489196777, "learning_rate": 5e-06, "loss": 0.9225, "num_input_tokens_seen": 38060864, "step": 607 }, { "epoch": 2.0199667221297837, "loss": 0.7281050086021423, "loss_ce": 0.00044387669186107814, "loss_iou": 0.27734375, "loss_num": 0.034423828125, "loss_xval": 0.7265625, "num_input_tokens_seen": 38060864, "step": 607 }, { "epoch": 2.0232945091514143, "grad_norm": 12.154414176940918, "learning_rate": 5e-06, "loss": 0.8019, "num_input_tokens_seen": 38122352, "step": 608 }, { "epoch": 2.0232945091514143, "loss": 0.825474739074707, "loss_ce": 3.521280086715706e-05, "loss_iou": 0.25390625, "loss_num": 0.0634765625, "loss_xval": 0.82421875, "num_input_tokens_seen": 38122352, "step": 608 }, { "epoch": 2.026622296173045, "grad_norm": 16.283340454101562, "learning_rate": 5e-06, "loss": 1.0835, "num_input_tokens_seen": 38186384, "step": 609 }, { "epoch": 2.026622296173045, "loss": 1.279355525970459, "loss_ce": 0.0005468539893627167, "loss_iou": 0.423828125, "loss_num": 0.0859375, "loss_xval": 1.28125, "num_input_tokens_seen": 38186384, "step": 609 }, { "epoch": 2.0299500831946755, "grad_norm": 21.782196044921875, "learning_rate": 5e-06, "loss": 0.9711, "num_input_tokens_seen": 38249732, "step": 610 }, { "epoch": 2.0299500831946755, "loss": 1.042248010635376, "loss_ce": 0.0012324631679803133, "loss_iou": 0.4296875, "loss_num": 0.0361328125, "loss_xval": 1.0390625, "num_input_tokens_seen": 38249732, "step": 610 }, { "epoch": 2.033277870216306, "grad_norm": 7.99960470199585, "learning_rate": 5e-06, "loss": 0.7002, "num_input_tokens_seen": 38310568, "step": 611 }, { "epoch": 2.033277870216306, "loss": 0.9979411363601685, "loss_ce": 0.00038251292426139116, "loss_iou": 0.3359375, "loss_num": 0.06494140625, "loss_xval": 0.99609375, "num_input_tokens_seen": 38310568, "step": 611 }, { "epoch": 2.0366056572379367, "grad_norm": 14.15860652923584, "learning_rate": 5e-06, "loss": 0.7936, "num_input_tokens_seen": 38374728, "step": 612 }, { "epoch": 2.0366056572379367, "loss": 0.8470059633255005, "loss_ce": 0.00032628036569803953, "loss_iou": 0.28515625, "loss_num": 0.0556640625, "loss_xval": 0.84765625, "num_input_tokens_seen": 38374728, "step": 612 }, { "epoch": 2.0399334442595674, "grad_norm": 21.689844131469727, "learning_rate": 5e-06, "loss": 1.0208, "num_input_tokens_seen": 38440220, "step": 613 }, { "epoch": 2.0399334442595674, "loss": 1.0721608400344849, "loss_ce": 0.0016042077913880348, "loss_iou": 0.41796875, "loss_num": 0.046875, "loss_xval": 1.0703125, "num_input_tokens_seen": 38440220, "step": 613 }, { "epoch": 2.043261231281198, "grad_norm": 29.98301887512207, "learning_rate": 5e-06, "loss": 0.8499, "num_input_tokens_seen": 38503540, "step": 614 }, { "epoch": 2.043261231281198, "loss": 0.7557715773582458, "loss_ce": 0.0025977180339396, "loss_iou": 0.24609375, "loss_num": 0.052001953125, "loss_xval": 0.75390625, "num_input_tokens_seen": 38503540, "step": 614 }, { "epoch": 2.0465890183028286, "grad_norm": 25.389629364013672, "learning_rate": 5e-06, "loss": 1.1726, "num_input_tokens_seen": 38566084, "step": 615 }, { "epoch": 2.0465890183028286, "loss": 1.1096099615097046, "loss_ce": 0.0007232207572087646, "loss_iou": 0.3984375, "loss_num": 0.06298828125, "loss_xval": 1.109375, "num_input_tokens_seen": 38566084, "step": 615 }, { "epoch": 2.049916805324459, "grad_norm": 40.99822998046875, "learning_rate": 5e-06, "loss": 0.8326, "num_input_tokens_seen": 38628036, "step": 616 }, { "epoch": 2.049916805324459, "loss": 1.0969488620758057, "loss_ce": 2.5054428988369182e-05, "loss_iou": 0.388671875, "loss_num": 0.06396484375, "loss_xval": 1.09375, "num_input_tokens_seen": 38628036, "step": 616 }, { "epoch": 2.05324459234609, "grad_norm": 14.922050476074219, "learning_rate": 5e-06, "loss": 0.9263, "num_input_tokens_seen": 38691752, "step": 617 }, { "epoch": 2.05324459234609, "loss": 0.816463828086853, "loss_ce": 5.754768062615767e-05, "loss_iou": 0.310546875, "loss_num": 0.039306640625, "loss_xval": 0.81640625, "num_input_tokens_seen": 38691752, "step": 617 }, { "epoch": 2.0565723793677204, "grad_norm": 7.948287010192871, "learning_rate": 5e-06, "loss": 0.7192, "num_input_tokens_seen": 38753040, "step": 618 }, { "epoch": 2.0565723793677204, "loss": 0.7961825132369995, "loss_ce": 0.0005282175843603909, "loss_iou": 0.2421875, "loss_num": 0.06201171875, "loss_xval": 0.796875, "num_input_tokens_seen": 38753040, "step": 618 }, { "epoch": 2.059900166389351, "grad_norm": 11.027917861938477, "learning_rate": 5e-06, "loss": 0.6517, "num_input_tokens_seen": 38814072, "step": 619 }, { "epoch": 2.059900166389351, "loss": 0.7890029549598694, "loss_ce": 0.0004287601332180202, "loss_iou": 0.2314453125, "loss_num": 0.0654296875, "loss_xval": 0.7890625, "num_input_tokens_seen": 38814072, "step": 619 }, { "epoch": 2.0632279534109816, "grad_norm": 14.607577323913574, "learning_rate": 5e-06, "loss": 1.1518, "num_input_tokens_seen": 38874928, "step": 620 }, { "epoch": 2.0632279534109816, "loss": 0.8943509459495544, "loss_ce": 6.386065069818869e-05, "loss_iou": 0.275390625, "loss_num": 0.06884765625, "loss_xval": 0.89453125, "num_input_tokens_seen": 38874928, "step": 620 }, { "epoch": 2.0665557404326123, "grad_norm": 15.261740684509277, "learning_rate": 5e-06, "loss": 0.943, "num_input_tokens_seen": 38938500, "step": 621 }, { "epoch": 2.0665557404326123, "loss": 0.9635213017463684, "loss_ce": 0.00014242026372812688, "loss_iou": 0.345703125, "loss_num": 0.054931640625, "loss_xval": 0.96484375, "num_input_tokens_seen": 38938500, "step": 621 }, { "epoch": 2.069883527454243, "grad_norm": 24.183643341064453, "learning_rate": 5e-06, "loss": 0.8083, "num_input_tokens_seen": 39000448, "step": 622 }, { "epoch": 2.069883527454243, "loss": 0.7654788494110107, "loss_ce": 9.800932457437739e-05, "loss_iou": 0.2490234375, "loss_num": 0.053466796875, "loss_xval": 0.765625, "num_input_tokens_seen": 39000448, "step": 622 }, { "epoch": 2.0732113144758735, "grad_norm": 12.5536527633667, "learning_rate": 5e-06, "loss": 0.742, "num_input_tokens_seen": 39062856, "step": 623 }, { "epoch": 2.0732113144758735, "loss": 0.737716555595398, "loss_ce": 0.0005339848576113582, "loss_iou": 0.236328125, "loss_num": 0.052978515625, "loss_xval": 0.73828125, "num_input_tokens_seen": 39062856, "step": 623 }, { "epoch": 2.076539101497504, "grad_norm": 17.075580596923828, "learning_rate": 5e-06, "loss": 0.8302, "num_input_tokens_seen": 39125680, "step": 624 }, { "epoch": 2.076539101497504, "loss": 0.80293869972229, "loss_ce": 0.0014250573003664613, "loss_iou": 0.251953125, "loss_num": 0.0595703125, "loss_xval": 0.80078125, "num_input_tokens_seen": 39125680, "step": 624 }, { "epoch": 2.0798668885191347, "grad_norm": 27.300207138061523, "learning_rate": 5e-06, "loss": 0.9867, "num_input_tokens_seen": 39189848, "step": 625 }, { "epoch": 2.0798668885191347, "loss": 1.0080411434173584, "loss_ce": 0.00047270796494558454, "loss_iou": 0.375, "loss_num": 0.05126953125, "loss_xval": 1.0078125, "num_input_tokens_seen": 39189848, "step": 625 }, { "epoch": 2.0831946755407653, "grad_norm": 45.939727783203125, "learning_rate": 5e-06, "loss": 0.8271, "num_input_tokens_seen": 39251912, "step": 626 }, { "epoch": 2.0831946755407653, "loss": 0.9387578964233398, "loss_ce": 0.0012579010799527168, "loss_iou": 0.27734375, "loss_num": 0.07666015625, "loss_xval": 0.9375, "num_input_tokens_seen": 39251912, "step": 626 }, { "epoch": 2.086522462562396, "grad_norm": 6.726690769195557, "learning_rate": 5e-06, "loss": 0.5493, "num_input_tokens_seen": 39313960, "step": 627 }, { "epoch": 2.086522462562396, "loss": 0.49948009848594666, "loss_ce": 0.0003345796139910817, "loss_iou": 0.06640625, "loss_num": 0.0732421875, "loss_xval": 0.5, "num_input_tokens_seen": 39313960, "step": 627 }, { "epoch": 2.0898502495840265, "grad_norm": 13.367768287658691, "learning_rate": 5e-06, "loss": 0.7786, "num_input_tokens_seen": 39377692, "step": 628 }, { "epoch": 2.0898502495840265, "loss": 0.9391250610351562, "loss_ce": 0.0001602199045009911, "loss_iou": 0.32421875, "loss_num": 0.057861328125, "loss_xval": 0.9375, "num_input_tokens_seen": 39377692, "step": 628 }, { "epoch": 2.093178036605657, "grad_norm": 27.149415969848633, "learning_rate": 5e-06, "loss": 1.1158, "num_input_tokens_seen": 39441180, "step": 629 }, { "epoch": 2.093178036605657, "loss": 1.2539303302764893, "loss_ce": 0.0005124328308738768, "loss_iou": 0.4609375, "loss_num": 0.06591796875, "loss_xval": 1.25, "num_input_tokens_seen": 39441180, "step": 629 }, { "epoch": 2.0965058236272878, "grad_norm": 28.47737693786621, "learning_rate": 5e-06, "loss": 0.6266, "num_input_tokens_seen": 39503432, "step": 630 }, { "epoch": 2.0965058236272878, "loss": 0.7614917159080505, "loss_ce": 0.0017260813619941473, "loss_iou": 0.302734375, "loss_num": 0.0308837890625, "loss_xval": 0.7578125, "num_input_tokens_seen": 39503432, "step": 630 }, { "epoch": 2.0998336106489184, "grad_norm": 24.26272201538086, "learning_rate": 5e-06, "loss": 0.8803, "num_input_tokens_seen": 39566748, "step": 631 }, { "epoch": 2.0998336106489184, "loss": 0.9874843955039978, "loss_ce": 0.000790032499935478, "loss_iou": 0.353515625, "loss_num": 0.0556640625, "loss_xval": 0.98828125, "num_input_tokens_seen": 39566748, "step": 631 }, { "epoch": 2.103161397670549, "grad_norm": 38.4467658996582, "learning_rate": 5e-06, "loss": 0.7448, "num_input_tokens_seen": 39629348, "step": 632 }, { "epoch": 2.103161397670549, "loss": 0.6154653429985046, "loss_ce": 0.00023097392113413662, "loss_iou": 0.2060546875, "loss_num": 0.040771484375, "loss_xval": 0.6171875, "num_input_tokens_seen": 39629348, "step": 632 }, { "epoch": 2.1064891846921796, "grad_norm": 22.152023315429688, "learning_rate": 5e-06, "loss": 0.7819, "num_input_tokens_seen": 39691812, "step": 633 }, { "epoch": 2.1064891846921796, "loss": 0.7278599739074707, "loss_ce": 7.679283589823171e-05, "loss_iou": 0.25390625, "loss_num": 0.043701171875, "loss_xval": 0.7265625, "num_input_tokens_seen": 39691812, "step": 633 }, { "epoch": 2.10981697171381, "grad_norm": 8.988296508789062, "learning_rate": 5e-06, "loss": 0.8629, "num_input_tokens_seen": 39753744, "step": 634 }, { "epoch": 2.10981697171381, "loss": 1.1058130264282227, "loss_ce": 0.011330533772706985, "loss_iou": 0.3671875, "loss_num": 0.072265625, "loss_xval": 1.09375, "num_input_tokens_seen": 39753744, "step": 634 }, { "epoch": 2.113144758735441, "grad_norm": 12.001303672790527, "learning_rate": 5e-06, "loss": 0.8412, "num_input_tokens_seen": 39817240, "step": 635 }, { "epoch": 2.113144758735441, "loss": 0.5625143051147461, "loss_ce": 0.0003194359305780381, "loss_iou": 0.17578125, "loss_num": 0.0419921875, "loss_xval": 0.5625, "num_input_tokens_seen": 39817240, "step": 635 }, { "epoch": 2.1164725457570714, "grad_norm": 28.099958419799805, "learning_rate": 5e-06, "loss": 0.9895, "num_input_tokens_seen": 39880520, "step": 636 }, { "epoch": 2.1164725457570714, "loss": 0.6317470073699951, "loss_ce": 3.3161224564537406e-05, "loss_iou": 0.1923828125, "loss_num": 0.04931640625, "loss_xval": 0.6328125, "num_input_tokens_seen": 39880520, "step": 636 }, { "epoch": 2.119800332778702, "grad_norm": 22.06964111328125, "learning_rate": 5e-06, "loss": 0.6823, "num_input_tokens_seen": 39941644, "step": 637 }, { "epoch": 2.119800332778702, "loss": 0.7159059047698975, "loss_ce": 2.454487548675388e-05, "loss_iou": 0.2216796875, "loss_num": 0.054443359375, "loss_xval": 0.71484375, "num_input_tokens_seen": 39941644, "step": 637 }, { "epoch": 2.1231281198003327, "grad_norm": 11.363022804260254, "learning_rate": 5e-06, "loss": 0.6387, "num_input_tokens_seen": 40003996, "step": 638 }, { "epoch": 2.1231281198003327, "loss": 0.6509506702423096, "loss_ce": 7.174572965595871e-05, "loss_iou": 0.2392578125, "loss_num": 0.034423828125, "loss_xval": 0.65234375, "num_input_tokens_seen": 40003996, "step": 638 }, { "epoch": 2.1264559068219633, "grad_norm": 16.36107063293457, "learning_rate": 5e-06, "loss": 0.989, "num_input_tokens_seen": 40068608, "step": 639 }, { "epoch": 2.1264559068219633, "loss": 0.9016914367675781, "loss_ce": 0.0010567284189164639, "loss_iou": 0.33984375, "loss_num": 0.04443359375, "loss_xval": 0.90234375, "num_input_tokens_seen": 40068608, "step": 639 }, { "epoch": 2.129783693843594, "grad_norm": 13.093620300292969, "learning_rate": 5e-06, "loss": 0.9181, "num_input_tokens_seen": 40131000, "step": 640 }, { "epoch": 2.129783693843594, "loss": 1.0685086250305176, "loss_ce": 0.00039345581899397075, "loss_iou": 0.3515625, "loss_num": 0.07373046875, "loss_xval": 1.0703125, "num_input_tokens_seen": 40131000, "step": 640 }, { "epoch": 2.1331114808652245, "grad_norm": 19.350475311279297, "learning_rate": 5e-06, "loss": 0.8279, "num_input_tokens_seen": 40193396, "step": 641 }, { "epoch": 2.1331114808652245, "loss": 0.9883468151092529, "loss_ce": 6.548353121615946e-05, "loss_iou": 0.357421875, "loss_num": 0.054443359375, "loss_xval": 0.98828125, "num_input_tokens_seen": 40193396, "step": 641 }, { "epoch": 2.136439267886855, "grad_norm": 15.550766944885254, "learning_rate": 5e-06, "loss": 0.8769, "num_input_tokens_seen": 40254804, "step": 642 }, { "epoch": 2.136439267886855, "loss": 0.7730555534362793, "loss_ce": 0.0010829265229403973, "loss_iou": 0.17578125, "loss_num": 0.083984375, "loss_xval": 0.7734375, "num_input_tokens_seen": 40254804, "step": 642 }, { "epoch": 2.1397670549084857, "grad_norm": 19.069194793701172, "learning_rate": 5e-06, "loss": 0.8022, "num_input_tokens_seen": 40318044, "step": 643 }, { "epoch": 2.1397670549084857, "loss": 0.8988676071166992, "loss_ce": 0.0006743170088157058, "loss_iou": 0.337890625, "loss_num": 0.04443359375, "loss_xval": 0.8984375, "num_input_tokens_seen": 40318044, "step": 643 }, { "epoch": 2.1430948419301163, "grad_norm": 21.11611557006836, "learning_rate": 5e-06, "loss": 0.8242, "num_input_tokens_seen": 40379508, "step": 644 }, { "epoch": 2.1430948419301163, "loss": 0.5716738104820251, "loss_ce": 0.0003847332263831049, "loss_iou": 0.15625, "loss_num": 0.0517578125, "loss_xval": 0.5703125, "num_input_tokens_seen": 40379508, "step": 644 }, { "epoch": 2.146422628951747, "grad_norm": 9.42721176147461, "learning_rate": 5e-06, "loss": 0.7245, "num_input_tokens_seen": 40441996, "step": 645 }, { "epoch": 2.146422628951747, "loss": 0.39663252234458923, "loss_ce": 0.001063689822331071, "loss_iou": 0.10693359375, "loss_num": 0.0361328125, "loss_xval": 0.396484375, "num_input_tokens_seen": 40441996, "step": 645 }, { "epoch": 2.1497504159733776, "grad_norm": 17.946130752563477, "learning_rate": 5e-06, "loss": 1.3419, "num_input_tokens_seen": 40506504, "step": 646 }, { "epoch": 2.1497504159733776, "loss": 1.1742212772369385, "loss_ce": 0.0013696793466806412, "loss_iou": 0.419921875, "loss_num": 0.06689453125, "loss_xval": 1.171875, "num_input_tokens_seen": 40506504, "step": 646 }, { "epoch": 2.153078202995008, "grad_norm": 15.945223808288574, "learning_rate": 5e-06, "loss": 0.9095, "num_input_tokens_seen": 40569228, "step": 647 }, { "epoch": 2.153078202995008, "loss": 0.8010599613189697, "loss_ce": 3.459819345152937e-05, "loss_iou": 0.2490234375, "loss_num": 0.060546875, "loss_xval": 0.80078125, "num_input_tokens_seen": 40569228, "step": 647 }, { "epoch": 2.156405990016639, "grad_norm": 17.24291229248047, "learning_rate": 5e-06, "loss": 0.7383, "num_input_tokens_seen": 40631988, "step": 648 }, { "epoch": 2.156405990016639, "loss": 0.8338418006896973, "loss_ce": 0.00034570720163173974, "loss_iou": 0.27734375, "loss_num": 0.05517578125, "loss_xval": 0.83203125, "num_input_tokens_seen": 40631988, "step": 648 }, { "epoch": 2.1597337770382694, "grad_norm": 32.635887145996094, "learning_rate": 5e-06, "loss": 0.873, "num_input_tokens_seen": 40694476, "step": 649 }, { "epoch": 2.1597337770382694, "loss": 0.7903171181678772, "loss_ce": 3.390955680515617e-05, "loss_iou": 0.25, "loss_num": 0.057861328125, "loss_xval": 0.7890625, "num_input_tokens_seen": 40694476, "step": 649 }, { "epoch": 2.1630615640599, "grad_norm": 12.058259963989258, "learning_rate": 5e-06, "loss": 0.7904, "num_input_tokens_seen": 40757460, "step": 650 }, { "epoch": 2.1630615640599, "loss": 0.8955097198486328, "loss_ce": 0.0004902560031041503, "loss_iou": 0.31640625, "loss_num": 0.05224609375, "loss_xval": 0.89453125, "num_input_tokens_seen": 40757460, "step": 650 }, { "epoch": 2.1663893510815306, "grad_norm": 15.843310356140137, "learning_rate": 5e-06, "loss": 0.8578, "num_input_tokens_seen": 40820320, "step": 651 }, { "epoch": 2.1663893510815306, "loss": 0.7772939205169678, "loss_ce": 0.0008046207949519157, "loss_iou": 0.25390625, "loss_num": 0.05322265625, "loss_xval": 0.77734375, "num_input_tokens_seen": 40820320, "step": 651 }, { "epoch": 2.1697171381031612, "grad_norm": 9.728944778442383, "learning_rate": 5e-06, "loss": 0.7636, "num_input_tokens_seen": 40883392, "step": 652 }, { "epoch": 2.1697171381031612, "loss": 0.5050860643386841, "loss_ce": 0.0028887807857245207, "loss_iou": 0.1103515625, "loss_num": 0.05615234375, "loss_xval": 0.50390625, "num_input_tokens_seen": 40883392, "step": 652 }, { "epoch": 2.173044925124792, "grad_norm": 19.815095901489258, "learning_rate": 5e-06, "loss": 0.8418, "num_input_tokens_seen": 40945804, "step": 653 }, { "epoch": 2.173044925124792, "loss": 0.5374061465263367, "loss_ce": 5.2633422455983236e-05, "loss_iou": 0.1806640625, "loss_num": 0.03515625, "loss_xval": 0.5390625, "num_input_tokens_seen": 40945804, "step": 653 }, { "epoch": 2.1763727121464225, "grad_norm": 11.395315170288086, "learning_rate": 5e-06, "loss": 0.9589, "num_input_tokens_seen": 41008440, "step": 654 }, { "epoch": 2.1763727121464225, "loss": 0.8540284633636475, "loss_ce": 2.4587085135863163e-05, "loss_iou": 0.267578125, "loss_num": 0.06396484375, "loss_xval": 0.85546875, "num_input_tokens_seen": 41008440, "step": 654 }, { "epoch": 2.179700499168053, "grad_norm": 50.023685455322266, "learning_rate": 5e-06, "loss": 1.2023, "num_input_tokens_seen": 41072936, "step": 655 }, { "epoch": 2.179700499168053, "loss": 1.007543921470642, "loss_ce": 0.0004638732934836298, "loss_iou": 0.373046875, "loss_num": 0.052490234375, "loss_xval": 1.0078125, "num_input_tokens_seen": 41072936, "step": 655 }, { "epoch": 2.1830282861896837, "grad_norm": 10.923096656799316, "learning_rate": 5e-06, "loss": 0.8021, "num_input_tokens_seen": 41136232, "step": 656 }, { "epoch": 2.1830282861896837, "loss": 0.6872982978820801, "loss_ce": 0.0007748391944915056, "loss_iou": 0.23046875, "loss_num": 0.045166015625, "loss_xval": 0.6875, "num_input_tokens_seen": 41136232, "step": 656 }, { "epoch": 2.1863560732113143, "grad_norm": 24.93172836303711, "learning_rate": 5e-06, "loss": 1.0529, "num_input_tokens_seen": 41201032, "step": 657 }, { "epoch": 2.1863560732113143, "loss": 0.9588685035705566, "loss_ce": 0.0011048305314034224, "loss_iou": 0.32421875, "loss_num": 0.062255859375, "loss_xval": 0.95703125, "num_input_tokens_seen": 41201032, "step": 657 }, { "epoch": 2.189683860232945, "grad_norm": 15.047933578491211, "learning_rate": 5e-06, "loss": 0.8714, "num_input_tokens_seen": 41263140, "step": 658 }, { "epoch": 2.189683860232945, "loss": 0.6614832282066345, "loss_ce": 0.0004114416951779276, "loss_iou": 0.1845703125, "loss_num": 0.058349609375, "loss_xval": 0.66015625, "num_input_tokens_seen": 41263140, "step": 658 }, { "epoch": 2.1930116472545755, "grad_norm": 9.536981582641602, "learning_rate": 5e-06, "loss": 0.6494, "num_input_tokens_seen": 41324496, "step": 659 }, { "epoch": 2.1930116472545755, "loss": 0.6628710031509399, "loss_ce": 0.0007616429938934743, "loss_iou": 0.169921875, "loss_num": 0.06396484375, "loss_xval": 0.6640625, "num_input_tokens_seen": 41324496, "step": 659 }, { "epoch": 2.196339434276206, "grad_norm": 17.474279403686523, "learning_rate": 5e-06, "loss": 0.9896, "num_input_tokens_seen": 41387404, "step": 660 }, { "epoch": 2.196339434276206, "loss": 1.0129848718643188, "loss_ce": 0.0012661850778385997, "loss_iou": 0.228515625, "loss_num": 0.11083984375, "loss_xval": 1.015625, "num_input_tokens_seen": 41387404, "step": 660 }, { "epoch": 2.1996672212978368, "grad_norm": 12.42483901977539, "learning_rate": 5e-06, "loss": 0.7325, "num_input_tokens_seen": 41450608, "step": 661 }, { "epoch": 2.1996672212978368, "loss": 0.6042838096618652, "loss_ce": 3.571172783267684e-05, "loss_iou": 0.2119140625, "loss_num": 0.035888671875, "loss_xval": 0.60546875, "num_input_tokens_seen": 41450608, "step": 661 }, { "epoch": 2.2029950083194674, "grad_norm": 10.884109497070312, "learning_rate": 5e-06, "loss": 0.7692, "num_input_tokens_seen": 41513084, "step": 662 }, { "epoch": 2.2029950083194674, "loss": 0.7437999248504639, "loss_ce": 2.553203739807941e-05, "loss_iou": 0.251953125, "loss_num": 0.047607421875, "loss_xval": 0.7421875, "num_input_tokens_seen": 41513084, "step": 662 }, { "epoch": 2.206322795341098, "grad_norm": 18.17184066772461, "learning_rate": 5e-06, "loss": 0.6812, "num_input_tokens_seen": 41574756, "step": 663 }, { "epoch": 2.206322795341098, "loss": 0.6215100288391113, "loss_ce": 5.003004480386153e-05, "loss_iou": 0.1806640625, "loss_num": 0.052001953125, "loss_xval": 0.62109375, "num_input_tokens_seen": 41574756, "step": 663 }, { "epoch": 2.2096505823627286, "grad_norm": 7.696889400482178, "learning_rate": 5e-06, "loss": 0.8513, "num_input_tokens_seen": 41637364, "step": 664 }, { "epoch": 2.2096505823627286, "loss": 0.767685055732727, "loss_ce": 0.0003510438255034387, "loss_iou": 0.205078125, "loss_num": 0.07177734375, "loss_xval": 0.765625, "num_input_tokens_seen": 41637364, "step": 664 }, { "epoch": 2.212978369384359, "grad_norm": 8.297330856323242, "learning_rate": 5e-06, "loss": 0.8774, "num_input_tokens_seen": 41699440, "step": 665 }, { "epoch": 2.212978369384359, "loss": 0.7078053951263428, "loss_ce": 0.000774118525441736, "loss_iou": 0.171875, "loss_num": 0.07275390625, "loss_xval": 0.70703125, "num_input_tokens_seen": 41699440, "step": 665 }, { "epoch": 2.21630615640599, "grad_norm": 9.820082664489746, "learning_rate": 5e-06, "loss": 0.628, "num_input_tokens_seen": 41761504, "step": 666 }, { "epoch": 2.21630615640599, "loss": 0.7205036878585815, "loss_ce": 4.47365346190054e-05, "loss_iou": 0.2421875, "loss_num": 0.046875, "loss_xval": 0.71875, "num_input_tokens_seen": 41761504, "step": 666 }, { "epoch": 2.2196339434276204, "grad_norm": 11.582853317260742, "learning_rate": 5e-06, "loss": 0.7409, "num_input_tokens_seen": 41824208, "step": 667 }, { "epoch": 2.2196339434276204, "loss": 1.0360679626464844, "loss_ce": 0.00030133899417705834, "loss_iou": 0.359375, "loss_num": 0.06396484375, "loss_xval": 1.0390625, "num_input_tokens_seen": 41824208, "step": 667 }, { "epoch": 2.222961730449251, "grad_norm": 18.858745574951172, "learning_rate": 5e-06, "loss": 0.8452, "num_input_tokens_seen": 41887632, "step": 668 }, { "epoch": 2.222961730449251, "loss": 0.766628623008728, "loss_ce": 2.7076355763711035e-05, "loss_iou": 0.28125, "loss_num": 0.040771484375, "loss_xval": 0.765625, "num_input_tokens_seen": 41887632, "step": 668 }, { "epoch": 2.2262895174708817, "grad_norm": 26.38115692138672, "learning_rate": 5e-06, "loss": 0.9085, "num_input_tokens_seen": 41950464, "step": 669 }, { "epoch": 2.2262895174708817, "loss": 0.9082783460617065, "loss_ce": 7.520228973589838e-05, "loss_iou": 0.35546875, "loss_num": 0.039794921875, "loss_xval": 0.90625, "num_input_tokens_seen": 41950464, "step": 669 }, { "epoch": 2.2296173044925123, "grad_norm": 29.40104103088379, "learning_rate": 5e-06, "loss": 0.9208, "num_input_tokens_seen": 42013876, "step": 670 }, { "epoch": 2.2296173044925123, "loss": 0.7661808133125305, "loss_ce": 6.753447814844549e-05, "loss_iou": 0.283203125, "loss_num": 0.0400390625, "loss_xval": 0.765625, "num_input_tokens_seen": 42013876, "step": 670 }, { "epoch": 2.232945091514143, "grad_norm": 18.40077018737793, "learning_rate": 5e-06, "loss": 0.7364, "num_input_tokens_seen": 42076504, "step": 671 }, { "epoch": 2.232945091514143, "loss": 0.5976995229721069, "loss_ce": 0.0005315973539836705, "loss_iou": 0.1767578125, "loss_num": 0.048583984375, "loss_xval": 0.59765625, "num_input_tokens_seen": 42076504, "step": 671 }, { "epoch": 2.2362728785357735, "grad_norm": 12.559344291687012, "learning_rate": 5e-06, "loss": 0.8679, "num_input_tokens_seen": 42138656, "step": 672 }, { "epoch": 2.2362728785357735, "loss": 0.9127550721168518, "loss_ce": 0.0016222422709688544, "loss_iou": 0.314453125, "loss_num": 0.055908203125, "loss_xval": 0.91015625, "num_input_tokens_seen": 42138656, "step": 672 }, { "epoch": 2.239600665557404, "grad_norm": 7.66146183013916, "learning_rate": 5e-06, "loss": 0.8083, "num_input_tokens_seen": 42201868, "step": 673 }, { "epoch": 2.239600665557404, "loss": 0.7142347693443298, "loss_ce": 0.0010999977821484208, "loss_iou": 0.25, "loss_num": 0.04248046875, "loss_xval": 0.71484375, "num_input_tokens_seen": 42201868, "step": 673 }, { "epoch": 2.2429284525790347, "grad_norm": 6.687897205352783, "learning_rate": 5e-06, "loss": 0.5835, "num_input_tokens_seen": 42262560, "step": 674 }, { "epoch": 2.2429284525790347, "loss": 0.590713381767273, "loss_ce": 0.0005034485948272049, "loss_iou": 0.193359375, "loss_num": 0.041015625, "loss_xval": 0.58984375, "num_input_tokens_seen": 42262560, "step": 674 }, { "epoch": 2.2462562396006653, "grad_norm": 30.33318328857422, "learning_rate": 5e-06, "loss": 0.6662, "num_input_tokens_seen": 42323624, "step": 675 }, { "epoch": 2.2462562396006653, "loss": 0.38735389709472656, "loss_ce": 2.4773054974502884e-05, "loss_iou": 0.0966796875, "loss_num": 0.038818359375, "loss_xval": 0.38671875, "num_input_tokens_seen": 42323624, "step": 675 }, { "epoch": 2.249584026622296, "grad_norm": 16.869159698486328, "learning_rate": 5e-06, "loss": 0.6675, "num_input_tokens_seen": 42384968, "step": 676 }, { "epoch": 2.249584026622296, "loss": 0.6275444030761719, "loss_ce": 0.00010297947301296517, "loss_iou": 0.2119140625, "loss_num": 0.041015625, "loss_xval": 0.62890625, "num_input_tokens_seen": 42384968, "step": 676 }, { "epoch": 2.2529118136439266, "grad_norm": 35.7281379699707, "learning_rate": 5e-06, "loss": 0.641, "num_input_tokens_seen": 42446912, "step": 677 }, { "epoch": 2.2529118136439266, "loss": 0.5296227931976318, "loss_ce": 0.00032592000206932425, "loss_iou": 0.19140625, "loss_num": 0.029541015625, "loss_xval": 0.53125, "num_input_tokens_seen": 42446912, "step": 677 }, { "epoch": 2.256239600665557, "grad_norm": 33.22330856323242, "learning_rate": 5e-06, "loss": 0.9766, "num_input_tokens_seen": 42510000, "step": 678 }, { "epoch": 2.256239600665557, "loss": 0.9966510534286499, "loss_ce": 0.0005573289236053824, "loss_iou": 0.37109375, "loss_num": 0.050537109375, "loss_xval": 0.99609375, "num_input_tokens_seen": 42510000, "step": 678 }, { "epoch": 2.259567387687188, "grad_norm": 19.551441192626953, "learning_rate": 5e-06, "loss": 0.7846, "num_input_tokens_seen": 42573224, "step": 679 }, { "epoch": 2.259567387687188, "loss": 0.7725611925125122, "loss_ce": 0.0001002717181108892, "loss_iou": 0.298828125, "loss_num": 0.03466796875, "loss_xval": 0.7734375, "num_input_tokens_seen": 42573224, "step": 679 }, { "epoch": 2.2628951747088184, "grad_norm": 10.101374626159668, "learning_rate": 5e-06, "loss": 0.8712, "num_input_tokens_seen": 42634780, "step": 680 }, { "epoch": 2.2628951747088184, "loss": 0.9629559516906738, "loss_ce": 0.0007977750501595438, "loss_iou": 0.298828125, "loss_num": 0.07275390625, "loss_xval": 0.9609375, "num_input_tokens_seen": 42634780, "step": 680 }, { "epoch": 2.266222961730449, "grad_norm": 12.58094596862793, "learning_rate": 5e-06, "loss": 1.0314, "num_input_tokens_seen": 42698976, "step": 681 }, { "epoch": 2.266222961730449, "loss": 0.8273358345031738, "loss_ce": 6.533270789077505e-05, "loss_iou": 0.2890625, "loss_num": 0.04931640625, "loss_xval": 0.828125, "num_input_tokens_seen": 42698976, "step": 681 }, { "epoch": 2.2695507487520796, "grad_norm": 12.096894264221191, "learning_rate": 5e-06, "loss": 0.785, "num_input_tokens_seen": 42762252, "step": 682 }, { "epoch": 2.2695507487520796, "loss": 0.7928378582000732, "loss_ce": 0.0012118633603677154, "loss_iou": 0.23046875, "loss_num": 0.06591796875, "loss_xval": 0.79296875, "num_input_tokens_seen": 42762252, "step": 682 }, { "epoch": 2.2728785357737102, "grad_norm": 19.4775333404541, "learning_rate": 5e-06, "loss": 1.234, "num_input_tokens_seen": 42825968, "step": 683 }, { "epoch": 2.2728785357737102, "loss": 1.1148924827575684, "loss_ce": 0.00039050806662999094, "loss_iou": 0.34375, "loss_num": 0.0859375, "loss_xval": 1.1171875, "num_input_tokens_seen": 42825968, "step": 683 }, { "epoch": 2.276206322795341, "grad_norm": 16.102102279663086, "learning_rate": 5e-06, "loss": 0.8429, "num_input_tokens_seen": 42888344, "step": 684 }, { "epoch": 2.276206322795341, "loss": 0.6326913237571716, "loss_ce": 0.00012292822066228837, "loss_iou": 0.18359375, "loss_num": 0.052734375, "loss_xval": 0.6328125, "num_input_tokens_seen": 42888344, "step": 684 }, { "epoch": 2.2795341098169715, "grad_norm": 9.076275825500488, "learning_rate": 5e-06, "loss": 0.9461, "num_input_tokens_seen": 42951328, "step": 685 }, { "epoch": 2.2795341098169715, "loss": 0.7907319664955139, "loss_ce": 0.0008149757049977779, "loss_iou": 0.216796875, "loss_num": 0.0712890625, "loss_xval": 0.7890625, "num_input_tokens_seen": 42951328, "step": 685 }, { "epoch": 2.2828618968386025, "grad_norm": 21.053485870361328, "learning_rate": 5e-06, "loss": 0.9269, "num_input_tokens_seen": 43014420, "step": 686 }, { "epoch": 2.2828618968386025, "loss": 0.7547416687011719, "loss_ce": 0.00034716277150437236, "loss_iou": 0.2109375, "loss_num": 0.06640625, "loss_xval": 0.75390625, "num_input_tokens_seen": 43014420, "step": 686 }, { "epoch": 2.286189683860233, "grad_norm": 15.344162940979004, "learning_rate": 5e-06, "loss": 0.8238, "num_input_tokens_seen": 43076760, "step": 687 }, { "epoch": 2.286189683860233, "loss": 0.658710241317749, "loss_ce": 1.882428114186041e-05, "loss_iou": 0.244140625, "loss_num": 0.03369140625, "loss_xval": 0.66015625, "num_input_tokens_seen": 43076760, "step": 687 }, { "epoch": 2.2895174708818637, "grad_norm": 12.810603141784668, "learning_rate": 5e-06, "loss": 0.8104, "num_input_tokens_seen": 43140168, "step": 688 }, { "epoch": 2.2895174708818637, "loss": 0.9727632999420166, "loss_ce": 0.00010709227353800088, "loss_iou": 0.3125, "loss_num": 0.0693359375, "loss_xval": 0.97265625, "num_input_tokens_seen": 43140168, "step": 688 }, { "epoch": 2.2928452579034944, "grad_norm": 42.084774017333984, "learning_rate": 5e-06, "loss": 0.7599, "num_input_tokens_seen": 43203960, "step": 689 }, { "epoch": 2.2928452579034944, "loss": 0.8948392868041992, "loss_ce": 0.00043005510815419257, "loss_iou": 0.326171875, "loss_num": 0.048583984375, "loss_xval": 0.89453125, "num_input_tokens_seen": 43203960, "step": 689 }, { "epoch": 2.296173044925125, "grad_norm": 21.960391998291016, "learning_rate": 5e-06, "loss": 0.9685, "num_input_tokens_seen": 43266248, "step": 690 }, { "epoch": 2.296173044925125, "loss": 0.8010978698730469, "loss_ce": 7.245346932904795e-05, "loss_iou": 0.287109375, "loss_num": 0.044677734375, "loss_xval": 0.80078125, "num_input_tokens_seen": 43266248, "step": 690 }, { "epoch": 2.2995008319467556, "grad_norm": 18.489133834838867, "learning_rate": 5e-06, "loss": 0.5364, "num_input_tokens_seen": 43327640, "step": 691 }, { "epoch": 2.2995008319467556, "loss": 0.6377068758010864, "loss_ce": 0.00013360046432353556, "loss_iou": 0.2060546875, "loss_num": 0.045166015625, "loss_xval": 0.63671875, "num_input_tokens_seen": 43327640, "step": 691 }, { "epoch": 2.302828618968386, "grad_norm": 12.483297348022461, "learning_rate": 5e-06, "loss": 0.484, "num_input_tokens_seen": 43388716, "step": 692 }, { "epoch": 2.302828618968386, "loss": 0.4971961975097656, "loss_ce": 0.0001258695701835677, "loss_iou": 0.138671875, "loss_num": 0.0439453125, "loss_xval": 0.49609375, "num_input_tokens_seen": 43388716, "step": 692 }, { "epoch": 2.306156405990017, "grad_norm": 11.531423568725586, "learning_rate": 5e-06, "loss": 0.8333, "num_input_tokens_seen": 43452328, "step": 693 }, { "epoch": 2.306156405990017, "loss": 0.6683314442634583, "loss_ce": 0.00011856696801260114, "loss_iou": 0.2392578125, "loss_num": 0.037841796875, "loss_xval": 0.66796875, "num_input_tokens_seen": 43452328, "step": 693 }, { "epoch": 2.3094841930116474, "grad_norm": 5.993093013763428, "learning_rate": 5e-06, "loss": 0.6532, "num_input_tokens_seen": 43514696, "step": 694 }, { "epoch": 2.3094841930116474, "loss": 0.6426668167114258, "loss_ce": 0.00021079863654449582, "loss_iou": 0.2119140625, "loss_num": 0.04345703125, "loss_xval": 0.640625, "num_input_tokens_seen": 43514696, "step": 694 }, { "epoch": 2.312811980033278, "grad_norm": 18.50348472595215, "learning_rate": 5e-06, "loss": 0.8346, "num_input_tokens_seen": 43575372, "step": 695 }, { "epoch": 2.312811980033278, "loss": 0.7472645044326782, "loss_ce": 0.0006824589800089598, "loss_iou": 0.2734375, "loss_num": 0.0400390625, "loss_xval": 0.74609375, "num_input_tokens_seen": 43575372, "step": 695 }, { "epoch": 2.3161397670549086, "grad_norm": 13.541839599609375, "learning_rate": 5e-06, "loss": 0.8463, "num_input_tokens_seen": 43638652, "step": 696 }, { "epoch": 2.3161397670549086, "loss": 0.7147372961044312, "loss_ce": 0.0003818150726146996, "loss_iou": 0.19921875, "loss_num": 0.0634765625, "loss_xval": 0.71484375, "num_input_tokens_seen": 43638652, "step": 696 }, { "epoch": 2.3194675540765393, "grad_norm": 24.54562759399414, "learning_rate": 5e-06, "loss": 1.0204, "num_input_tokens_seen": 43701632, "step": 697 }, { "epoch": 2.3194675540765393, "loss": 0.892731249332428, "loss_ce": 3.1049828976392746e-05, "loss_iou": 0.28515625, "loss_num": 0.064453125, "loss_xval": 0.89453125, "num_input_tokens_seen": 43701632, "step": 697 }, { "epoch": 2.32279534109817, "grad_norm": 25.402902603149414, "learning_rate": 5e-06, "loss": 0.8464, "num_input_tokens_seen": 43763056, "step": 698 }, { "epoch": 2.32279534109817, "loss": 0.700952410697937, "loss_ce": 2.4683897208888084e-05, "loss_iou": 0.2490234375, "loss_num": 0.04052734375, "loss_xval": 0.69921875, "num_input_tokens_seen": 43763056, "step": 698 }, { "epoch": 2.3261231281198005, "grad_norm": 8.614212989807129, "learning_rate": 5e-06, "loss": 0.7787, "num_input_tokens_seen": 43827112, "step": 699 }, { "epoch": 2.3261231281198005, "loss": 0.6044846773147583, "loss_ce": 0.0008469896856695414, "loss_iou": 0.20703125, "loss_num": 0.0380859375, "loss_xval": 0.60546875, "num_input_tokens_seen": 43827112, "step": 699 }, { "epoch": 2.329450915141431, "grad_norm": 21.64102554321289, "learning_rate": 5e-06, "loss": 0.905, "num_input_tokens_seen": 43889088, "step": 700 }, { "epoch": 2.329450915141431, "loss": 0.5793565511703491, "loss_ce": 1.0861856026167516e-05, "loss_iou": 0.10546875, "loss_num": 0.07373046875, "loss_xval": 0.578125, "num_input_tokens_seen": 43889088, "step": 700 }, { "epoch": 2.3327787021630617, "grad_norm": 12.798100471496582, "learning_rate": 5e-06, "loss": 0.9444, "num_input_tokens_seen": 43952452, "step": 701 }, { "epoch": 2.3327787021630617, "loss": 0.826103687286377, "loss_ce": 0.0011525340378284454, "loss_iou": 0.296875, "loss_num": 0.046142578125, "loss_xval": 0.82421875, "num_input_tokens_seen": 43952452, "step": 701 }, { "epoch": 2.3361064891846923, "grad_norm": 13.59535026550293, "learning_rate": 5e-06, "loss": 0.9917, "num_input_tokens_seen": 44015684, "step": 702 }, { "epoch": 2.3361064891846923, "loss": 0.9468133449554443, "loss_ce": 3.602740980568342e-05, "loss_iou": 0.35546875, "loss_num": 0.047119140625, "loss_xval": 0.9453125, "num_input_tokens_seen": 44015684, "step": 702 }, { "epoch": 2.339434276206323, "grad_norm": 15.593339920043945, "learning_rate": 5e-06, "loss": 1.0275, "num_input_tokens_seen": 44081344, "step": 703 }, { "epoch": 2.339434276206323, "loss": 1.152112364768982, "loss_ce": 0.0002568770432844758, "loss_iou": 0.412109375, "loss_num": 0.06591796875, "loss_xval": 1.1484375, "num_input_tokens_seen": 44081344, "step": 703 }, { "epoch": 2.3427620632279536, "grad_norm": 11.514885902404785, "learning_rate": 5e-06, "loss": 0.7159, "num_input_tokens_seen": 44144356, "step": 704 }, { "epoch": 2.3427620632279536, "loss": 0.5067183971405029, "loss_ce": 0.00012654870806727558, "loss_iou": 0.1806640625, "loss_num": 0.029052734375, "loss_xval": 0.5078125, "num_input_tokens_seen": 44144356, "step": 704 }, { "epoch": 2.346089850249584, "grad_norm": 15.352267265319824, "learning_rate": 5e-06, "loss": 1.0853, "num_input_tokens_seen": 44208192, "step": 705 }, { "epoch": 2.346089850249584, "loss": 0.9742995500564575, "loss_ce": 0.0011550791095942259, "loss_iou": 0.3046875, "loss_num": 0.07275390625, "loss_xval": 0.97265625, "num_input_tokens_seen": 44208192, "step": 705 }, { "epoch": 2.3494176372712148, "grad_norm": 11.715181350708008, "learning_rate": 5e-06, "loss": 0.8735, "num_input_tokens_seen": 44272008, "step": 706 }, { "epoch": 2.3494176372712148, "loss": 0.8866299390792847, "loss_ce": 0.00039946436299942434, "loss_iou": 0.31640625, "loss_num": 0.05078125, "loss_xval": 0.88671875, "num_input_tokens_seen": 44272008, "step": 706 }, { "epoch": 2.3527454242928454, "grad_norm": 8.671246528625488, "learning_rate": 5e-06, "loss": 0.858, "num_input_tokens_seen": 44335544, "step": 707 }, { "epoch": 2.3527454242928454, "loss": 0.6201329827308655, "loss_ce": 1.5798959793755785e-05, "loss_iou": 0.2041015625, "loss_num": 0.04248046875, "loss_xval": 0.62109375, "num_input_tokens_seen": 44335544, "step": 707 }, { "epoch": 2.356073211314476, "grad_norm": 12.736263275146484, "learning_rate": 5e-06, "loss": 0.8517, "num_input_tokens_seen": 44399308, "step": 708 }, { "epoch": 2.356073211314476, "loss": 0.7626245021820068, "loss_ce": 0.0009057295392267406, "loss_iou": 0.2734375, "loss_num": 0.042724609375, "loss_xval": 0.76171875, "num_input_tokens_seen": 44399308, "step": 708 }, { "epoch": 2.3594009983361066, "grad_norm": 23.638330459594727, "learning_rate": 5e-06, "loss": 1.0551, "num_input_tokens_seen": 44463192, "step": 709 }, { "epoch": 2.3594009983361066, "loss": 0.9373329281806946, "loss_ce": 0.00032120716059580445, "loss_iou": 0.376953125, "loss_num": 0.037109375, "loss_xval": 0.9375, "num_input_tokens_seen": 44463192, "step": 709 }, { "epoch": 2.3627287853577372, "grad_norm": 16.6831111907959, "learning_rate": 5e-06, "loss": 0.8864, "num_input_tokens_seen": 44525432, "step": 710 }, { "epoch": 2.3627287853577372, "loss": 0.723406195640564, "loss_ce": 0.0009940601885318756, "loss_iou": 0.25390625, "loss_num": 0.04296875, "loss_xval": 0.72265625, "num_input_tokens_seen": 44525432, "step": 710 }, { "epoch": 2.366056572379368, "grad_norm": 42.67438507080078, "learning_rate": 5e-06, "loss": 0.8198, "num_input_tokens_seen": 44589092, "step": 711 }, { "epoch": 2.366056572379368, "loss": 0.6157445311546326, "loss_ce": 0.0011205194750800729, "loss_iou": 0.1865234375, "loss_num": 0.04833984375, "loss_xval": 0.61328125, "num_input_tokens_seen": 44589092, "step": 711 }, { "epoch": 2.3693843594009985, "grad_norm": 9.488656997680664, "learning_rate": 5e-06, "loss": 0.8397, "num_input_tokens_seen": 44652380, "step": 712 }, { "epoch": 2.3693843594009985, "loss": 0.8618941307067871, "loss_ce": 7.773353718221188e-05, "loss_iou": 0.3203125, "loss_num": 0.044677734375, "loss_xval": 0.86328125, "num_input_tokens_seen": 44652380, "step": 712 }, { "epoch": 2.372712146422629, "grad_norm": 8.377887725830078, "learning_rate": 5e-06, "loss": 0.9821, "num_input_tokens_seen": 44714300, "step": 713 }, { "epoch": 2.372712146422629, "loss": 1.102597951889038, "loss_ce": 0.0015237996121868491, "loss_iou": 0.375, "loss_num": 0.07080078125, "loss_xval": 1.1015625, "num_input_tokens_seen": 44714300, "step": 713 }, { "epoch": 2.3760399334442597, "grad_norm": 8.001431465148926, "learning_rate": 5e-06, "loss": 0.6308, "num_input_tokens_seen": 44776084, "step": 714 }, { "epoch": 2.3760399334442597, "loss": 0.6766414642333984, "loss_ce": 0.0028133769519627094, "loss_iou": 0.216796875, "loss_num": 0.048095703125, "loss_xval": 0.671875, "num_input_tokens_seen": 44776084, "step": 714 }, { "epoch": 2.3793677204658903, "grad_norm": 13.848015785217285, "learning_rate": 5e-06, "loss": 0.6121, "num_input_tokens_seen": 44839196, "step": 715 }, { "epoch": 2.3793677204658903, "loss": 0.5025069117546082, "loss_ce": 0.005558652337640524, "loss_iou": 0.16015625, "loss_num": 0.03515625, "loss_xval": 0.49609375, "num_input_tokens_seen": 44839196, "step": 715 }, { "epoch": 2.382695507487521, "grad_norm": 11.468194007873535, "learning_rate": 5e-06, "loss": 0.9085, "num_input_tokens_seen": 44901004, "step": 716 }, { "epoch": 2.382695507487521, "loss": 0.7659911513328552, "loss_ce": 0.0006102509214542806, "loss_iou": 0.2373046875, "loss_num": 0.05810546875, "loss_xval": 0.765625, "num_input_tokens_seen": 44901004, "step": 716 }, { "epoch": 2.3860232945091515, "grad_norm": 100.81312561035156, "learning_rate": 5e-06, "loss": 0.816, "num_input_tokens_seen": 44964160, "step": 717 }, { "epoch": 2.3860232945091515, "loss": 0.9275991916656494, "loss_ce": 0.00035306636709719896, "loss_iou": 0.375, "loss_num": 0.035400390625, "loss_xval": 0.92578125, "num_input_tokens_seen": 44964160, "step": 717 }, { "epoch": 2.389351081530782, "grad_norm": 8.282294273376465, "learning_rate": 5e-06, "loss": 0.457, "num_input_tokens_seen": 45026476, "step": 718 }, { "epoch": 2.389351081530782, "loss": 0.4619190990924835, "loss_ce": 0.0008595389663241804, "loss_iou": 0.1552734375, "loss_num": 0.0302734375, "loss_xval": 0.4609375, "num_input_tokens_seen": 45026476, "step": 718 }, { "epoch": 2.3926788685524127, "grad_norm": 15.622560501098633, "learning_rate": 5e-06, "loss": 0.8591, "num_input_tokens_seen": 45090048, "step": 719 }, { "epoch": 2.3926788685524127, "loss": 0.8411388397216797, "loss_ce": 7.440555782523006e-05, "loss_iou": 0.3359375, "loss_num": 0.033203125, "loss_xval": 0.83984375, "num_input_tokens_seen": 45090048, "step": 719 }, { "epoch": 2.3960066555740434, "grad_norm": 12.364948272705078, "learning_rate": 5e-06, "loss": 0.6973, "num_input_tokens_seen": 45152144, "step": 720 }, { "epoch": 2.3960066555740434, "loss": 0.8305813074111938, "loss_ce": 1.4855088011245243e-05, "loss_iou": 0.296875, "loss_num": 0.047607421875, "loss_xval": 0.83203125, "num_input_tokens_seen": 45152144, "step": 720 }, { "epoch": 2.399334442595674, "grad_norm": 33.03171157836914, "learning_rate": 5e-06, "loss": 0.7516, "num_input_tokens_seen": 45214144, "step": 721 }, { "epoch": 2.399334442595674, "loss": 0.7860850095748901, "loss_ce": 0.0015391036868095398, "loss_iou": 0.2734375, "loss_num": 0.04736328125, "loss_xval": 0.78515625, "num_input_tokens_seen": 45214144, "step": 721 }, { "epoch": 2.4026622296173046, "grad_norm": 20.416688919067383, "learning_rate": 5e-06, "loss": 0.7633, "num_input_tokens_seen": 45277820, "step": 722 }, { "epoch": 2.4026622296173046, "loss": 0.7667117118835449, "loss_ce": 0.00011011668539140373, "loss_iou": 0.29296875, "loss_num": 0.036376953125, "loss_xval": 0.765625, "num_input_tokens_seen": 45277820, "step": 722 }, { "epoch": 2.405990016638935, "grad_norm": 9.330942153930664, "learning_rate": 5e-06, "loss": 0.7103, "num_input_tokens_seen": 45340388, "step": 723 }, { "epoch": 2.405990016638935, "loss": 0.3887462615966797, "loss_ce": 0.000318503996822983, "loss_iou": 0.0849609375, "loss_num": 0.043701171875, "loss_xval": 0.388671875, "num_input_tokens_seen": 45340388, "step": 723 }, { "epoch": 2.409317803660566, "grad_norm": 16.6447696685791, "learning_rate": 5e-06, "loss": 0.745, "num_input_tokens_seen": 45402584, "step": 724 }, { "epoch": 2.409317803660566, "loss": 0.7268455028533936, "loss_ce": 0.001808908418752253, "loss_iou": 0.28515625, "loss_num": 0.0308837890625, "loss_xval": 0.7265625, "num_input_tokens_seen": 45402584, "step": 724 }, { "epoch": 2.4126455906821964, "grad_norm": 20.65241050720215, "learning_rate": 5e-06, "loss": 0.723, "num_input_tokens_seen": 45464804, "step": 725 }, { "epoch": 2.4126455906821964, "loss": 0.6308388710021973, "loss_ce": 0.0009560787002556026, "loss_iou": 0.2265625, "loss_num": 0.035400390625, "loss_xval": 0.62890625, "num_input_tokens_seen": 45464804, "step": 725 }, { "epoch": 2.415973377703827, "grad_norm": 30.81974220275879, "learning_rate": 5e-06, "loss": 0.9556, "num_input_tokens_seen": 45527936, "step": 726 }, { "epoch": 2.415973377703827, "loss": 0.9199089407920837, "loss_ce": 0.0008415859774686396, "loss_iou": 0.310546875, "loss_num": 0.06005859375, "loss_xval": 0.91796875, "num_input_tokens_seen": 45527936, "step": 726 }, { "epoch": 2.4193011647254576, "grad_norm": 23.18498992919922, "learning_rate": 5e-06, "loss": 0.763, "num_input_tokens_seen": 45590292, "step": 727 }, { "epoch": 2.4193011647254576, "loss": 0.8546520471572876, "loss_ce": 0.0006481358432210982, "loss_iou": 0.26953125, "loss_num": 0.06298828125, "loss_xval": 0.85546875, "num_input_tokens_seen": 45590292, "step": 727 }, { "epoch": 2.4226289517470883, "grad_norm": 11.9873046875, "learning_rate": 5e-06, "loss": 0.9806, "num_input_tokens_seen": 45653976, "step": 728 }, { "epoch": 2.4226289517470883, "loss": 1.0390448570251465, "loss_ce": 0.005109299439936876, "loss_iou": 0.37890625, "loss_num": 0.05517578125, "loss_xval": 1.03125, "num_input_tokens_seen": 45653976, "step": 728 }, { "epoch": 2.425956738768719, "grad_norm": 14.772794723510742, "learning_rate": 5e-06, "loss": 0.9063, "num_input_tokens_seen": 45717436, "step": 729 }, { "epoch": 2.425956738768719, "loss": 0.7487889528274536, "loss_ce": 0.0004979821969754994, "loss_iou": 0.22265625, "loss_num": 0.060546875, "loss_xval": 0.75, "num_input_tokens_seen": 45717436, "step": 729 }, { "epoch": 2.4292845257903495, "grad_norm": 10.266166687011719, "learning_rate": 5e-06, "loss": 0.7886, "num_input_tokens_seen": 45778428, "step": 730 }, { "epoch": 2.4292845257903495, "loss": 0.7389390468597412, "loss_ce": 0.0002610463707242161, "loss_iou": 0.28125, "loss_num": 0.035400390625, "loss_xval": 0.73828125, "num_input_tokens_seen": 45778428, "step": 730 }, { "epoch": 2.43261231281198, "grad_norm": 13.902536392211914, "learning_rate": 5e-06, "loss": 0.6035, "num_input_tokens_seen": 45841592, "step": 731 }, { "epoch": 2.43261231281198, "loss": 0.7757643461227417, "loss_ce": 0.0006178857292979956, "loss_iou": 0.259765625, "loss_num": 0.05126953125, "loss_xval": 0.7734375, "num_input_tokens_seen": 45841592, "step": 731 }, { "epoch": 2.4359400998336107, "grad_norm": 11.125432968139648, "learning_rate": 5e-06, "loss": 0.7436, "num_input_tokens_seen": 45904204, "step": 732 }, { "epoch": 2.4359400998336107, "loss": 0.8660734295845032, "loss_ce": 0.0007170129101723433, "loss_iou": 0.3359375, "loss_num": 0.03857421875, "loss_xval": 0.8671875, "num_input_tokens_seen": 45904204, "step": 732 }, { "epoch": 2.4392678868552413, "grad_norm": 12.151094436645508, "learning_rate": 5e-06, "loss": 0.6954, "num_input_tokens_seen": 45966944, "step": 733 }, { "epoch": 2.4392678868552413, "loss": 0.5378880500793457, "loss_ce": 0.00029038116917945445, "loss_iou": 0.17578125, "loss_num": 0.037353515625, "loss_xval": 0.5390625, "num_input_tokens_seen": 45966944, "step": 733 }, { "epoch": 2.442595673876872, "grad_norm": 7.525664806365967, "learning_rate": 5e-06, "loss": 0.5737, "num_input_tokens_seen": 46028688, "step": 734 }, { "epoch": 2.442595673876872, "loss": 0.6163484454154968, "loss_ce": 0.0006258049979805946, "loss_iou": 0.181640625, "loss_num": 0.050537109375, "loss_xval": 0.6171875, "num_input_tokens_seen": 46028688, "step": 734 }, { "epoch": 2.4459234608985025, "grad_norm": 24.891660690307617, "learning_rate": 5e-06, "loss": 1.014, "num_input_tokens_seen": 46091236, "step": 735 }, { "epoch": 2.4459234608985025, "loss": 1.1315410137176514, "loss_ce": 0.00043744005961343646, "loss_iou": 0.328125, "loss_num": 0.09521484375, "loss_xval": 1.1328125, "num_input_tokens_seen": 46091236, "step": 735 }, { "epoch": 2.449251247920133, "grad_norm": 12.98134708404541, "learning_rate": 5e-06, "loss": 0.6535, "num_input_tokens_seen": 46152740, "step": 736 }, { "epoch": 2.449251247920133, "loss": 0.6800500154495239, "loss_ce": 0.000606651185080409, "loss_iou": 0.197265625, "loss_num": 0.056884765625, "loss_xval": 0.6796875, "num_input_tokens_seen": 46152740, "step": 736 }, { "epoch": 2.4525790349417638, "grad_norm": 17.395307540893555, "learning_rate": 5e-06, "loss": 0.885, "num_input_tokens_seen": 46215172, "step": 737 }, { "epoch": 2.4525790349417638, "loss": 0.9187045097351074, "loss_ce": 0.0007357874419540167, "loss_iou": 0.28515625, "loss_num": 0.06982421875, "loss_xval": 0.91796875, "num_input_tokens_seen": 46215172, "step": 737 }, { "epoch": 2.4559068219633944, "grad_norm": 32.95661926269531, "learning_rate": 5e-06, "loss": 0.7945, "num_input_tokens_seen": 46279364, "step": 738 }, { "epoch": 2.4559068219633944, "loss": 0.7413176894187927, "loss_ce": 0.00035090395249426365, "loss_iou": 0.291015625, "loss_num": 0.03173828125, "loss_xval": 0.7421875, "num_input_tokens_seen": 46279364, "step": 738 }, { "epoch": 2.459234608985025, "grad_norm": 12.986007690429688, "learning_rate": 5e-06, "loss": 0.8186, "num_input_tokens_seen": 46341792, "step": 739 }, { "epoch": 2.459234608985025, "loss": 1.1065441370010376, "loss_ce": 0.0010754023678600788, "loss_iou": 0.380859375, "loss_num": 0.0693359375, "loss_xval": 1.109375, "num_input_tokens_seen": 46341792, "step": 739 }, { "epoch": 2.4625623960066556, "grad_norm": 9.2813138961792, "learning_rate": 5e-06, "loss": 0.6504, "num_input_tokens_seen": 46403348, "step": 740 }, { "epoch": 2.4625623960066556, "loss": 0.6935011148452759, "loss_ce": 0.0005079866969026625, "loss_iou": 0.1826171875, "loss_num": 0.0654296875, "loss_xval": 0.69140625, "num_input_tokens_seen": 46403348, "step": 740 }, { "epoch": 2.465890183028286, "grad_norm": 41.71429443359375, "learning_rate": 5e-06, "loss": 0.9927, "num_input_tokens_seen": 46467084, "step": 741 }, { "epoch": 2.465890183028286, "loss": 1.1221803426742554, "loss_ce": 0.004138402175158262, "loss_iou": 0.392578125, "loss_num": 0.06689453125, "loss_xval": 1.1171875, "num_input_tokens_seen": 46467084, "step": 741 }, { "epoch": 2.469217970049917, "grad_norm": 24.372764587402344, "learning_rate": 5e-06, "loss": 0.7691, "num_input_tokens_seen": 46529216, "step": 742 }, { "epoch": 2.469217970049917, "loss": 0.8545128703117371, "loss_ce": 0.0005089840269647539, "loss_iou": 0.29296875, "loss_num": 0.053955078125, "loss_xval": 0.85546875, "num_input_tokens_seen": 46529216, "step": 742 }, { "epoch": 2.4725457570715474, "grad_norm": 34.00208282470703, "learning_rate": 5e-06, "loss": 1.1629, "num_input_tokens_seen": 46591988, "step": 743 }, { "epoch": 2.4725457570715474, "loss": 1.331702470779419, "loss_ce": 0.00015946310304570943, "loss_iou": 0.41015625, "loss_num": 0.1025390625, "loss_xval": 1.328125, "num_input_tokens_seen": 46591988, "step": 743 }, { "epoch": 2.475873544093178, "grad_norm": 16.292890548706055, "learning_rate": 5e-06, "loss": 0.9306, "num_input_tokens_seen": 46654472, "step": 744 }, { "epoch": 2.475873544093178, "loss": 1.071473479270935, "loss_ce": 0.0006726733408868313, "loss_iou": 0.369140625, "loss_num": 0.06689453125, "loss_xval": 1.0703125, "num_input_tokens_seen": 46654472, "step": 744 }, { "epoch": 2.4792013311148087, "grad_norm": 11.041157722473145, "learning_rate": 5e-06, "loss": 0.7353, "num_input_tokens_seen": 46716728, "step": 745 }, { "epoch": 2.4792013311148087, "loss": 0.6517635583877563, "loss_ce": 0.0011288196546956897, "loss_iou": 0.2255859375, "loss_num": 0.0400390625, "loss_xval": 0.65234375, "num_input_tokens_seen": 46716728, "step": 745 }, { "epoch": 2.4825291181364393, "grad_norm": 12.098727226257324, "learning_rate": 5e-06, "loss": 0.7571, "num_input_tokens_seen": 46779476, "step": 746 }, { "epoch": 2.4825291181364393, "loss": 0.7326780557632446, "loss_ce": 1.2083750334568322e-05, "loss_iou": 0.2041015625, "loss_num": 0.06494140625, "loss_xval": 0.734375, "num_input_tokens_seen": 46779476, "step": 746 }, { "epoch": 2.48585690515807, "grad_norm": 15.153691291809082, "learning_rate": 5e-06, "loss": 0.8021, "num_input_tokens_seen": 46842836, "step": 747 }, { "epoch": 2.48585690515807, "loss": 0.8030003309249878, "loss_ce": 2.181137097068131e-05, "loss_iou": 0.296875, "loss_num": 0.0419921875, "loss_xval": 0.8046875, "num_input_tokens_seen": 46842836, "step": 747 }, { "epoch": 2.4891846921797005, "grad_norm": 19.263431549072266, "learning_rate": 5e-06, "loss": 1.0008, "num_input_tokens_seen": 46904764, "step": 748 }, { "epoch": 2.4891846921797005, "loss": 0.7467707395553589, "loss_ce": 0.001287369173951447, "loss_iou": 0.21875, "loss_num": 0.0615234375, "loss_xval": 0.74609375, "num_input_tokens_seen": 46904764, "step": 748 }, { "epoch": 2.492512479201331, "grad_norm": 18.505611419677734, "learning_rate": 5e-06, "loss": 0.6956, "num_input_tokens_seen": 46967092, "step": 749 }, { "epoch": 2.492512479201331, "loss": 0.8270572423934937, "loss_ce": 0.0008853643084876239, "loss_iou": 0.28125, "loss_num": 0.052978515625, "loss_xval": 0.828125, "num_input_tokens_seen": 46967092, "step": 749 }, { "epoch": 2.4958402662229617, "grad_norm": 10.45076847076416, "learning_rate": 5e-06, "loss": 0.6872, "num_input_tokens_seen": 47029460, "step": 750 }, { "epoch": 2.4958402662229617, "eval_seeclick_CIoU": 0.0734884450212121, "eval_seeclick_GIoU": 0.0995967723429203, "eval_seeclick_IoU": 0.19568058103322983, "eval_seeclick_MAE_all": 0.17629285901784897, "eval_seeclick_MAE_h": 0.0558263435959816, "eval_seeclick_MAE_w": 0.1402217000722885, "eval_seeclick_MAE_x_boxes": 0.24939251691102982, "eval_seeclick_MAE_y_boxes": 0.17625297605991364, "eval_seeclick_NUM_probability": 0.999792754650116, "eval_seeclick_inside_bbox": 0.19375000149011612, "eval_seeclick_loss": 2.8479881286621094, "eval_seeclick_loss_ce": 0.08972010388970375, "eval_seeclick_loss_iou": 0.9342041015625, "eval_seeclick_loss_num": 0.182373046875, "eval_seeclick_loss_xval": 2.778076171875, "eval_seeclick_runtime": 61.0363, "eval_seeclick_samples_per_second": 0.77, "eval_seeclick_steps_per_second": 0.033, "num_input_tokens_seen": 47029460, "step": 750 }, { "epoch": 2.4958402662229617, "eval_icons_CIoU": -0.07027491927146912, "eval_icons_GIoU": 0.017897209618240595, "eval_icons_IoU": 0.09695422276854515, "eval_icons_MAE_all": 0.1645146682858467, "eval_icons_MAE_h": 0.10222694277763367, "eval_icons_MAE_w": 0.1458854302763939, "eval_icons_MAE_x_boxes": 0.13255972415208817, "eval_icons_MAE_y_boxes": 0.12015364319086075, "eval_icons_NUM_probability": 0.9995680749416351, "eval_icons_inside_bbox": 0.1614583358168602, "eval_icons_loss": 2.7319436073303223, "eval_icons_loss_ce": 6.722200305375736e-05, "eval_icons_loss_iou": 0.977294921875, "eval_icons_loss_num": 0.1565704345703125, "eval_icons_loss_xval": 2.73681640625, "eval_icons_runtime": 64.5641, "eval_icons_samples_per_second": 0.774, "eval_icons_steps_per_second": 0.031, "num_input_tokens_seen": 47029460, "step": 750 }, { "epoch": 2.4958402662229617, "eval_screenspot_CIoU": 0.07267025051017602, "eval_screenspot_GIoU": 0.10169834891955058, "eval_screenspot_IoU": 0.206079031030337, "eval_screenspot_MAE_all": 0.15985962748527527, "eval_screenspot_MAE_h": 0.07775780310233434, "eval_screenspot_MAE_w": 0.1154338742295901, "eval_screenspot_MAE_x_boxes": 0.22314242521921793, "eval_screenspot_MAE_y_boxes": 0.12497067699829738, "eval_screenspot_NUM_probability": 0.9999499519666036, "eval_screenspot_inside_bbox": 0.36833332975705463, "eval_screenspot_loss": 2.624130964279175, "eval_screenspot_loss_ce": 0.00021677961437186846, "eval_screenspot_loss_iou": 0.9044596354166666, "eval_screenspot_loss_num": 0.16988627115885416, "eval_screenspot_loss_xval": 2.658203125, "eval_screenspot_runtime": 111.031, "eval_screenspot_samples_per_second": 0.802, "eval_screenspot_steps_per_second": 0.027, "num_input_tokens_seen": 47029460, "step": 750 }, { "epoch": 2.4958402662229617, "eval_compot_CIoU": -0.0433688135817647, "eval_compot_GIoU": 0.03804280236363411, "eval_compot_IoU": 0.12814124301075935, "eval_compot_MAE_all": 0.2096344456076622, "eval_compot_MAE_h": 0.10326477885246277, "eval_compot_MAE_w": 0.1770506054162979, "eval_compot_MAE_x_boxes": 0.21715683490037918, "eval_compot_MAE_y_boxes": 0.14301852509379387, "eval_compot_NUM_probability": 0.9999598264694214, "eval_compot_inside_bbox": 0.1927083358168602, "eval_compot_loss": 2.9428277015686035, "eval_compot_loss_ce": 0.0044660314451903105, "eval_compot_loss_iou": 0.95849609375, "eval_compot_loss_num": 0.2134246826171875, "eval_compot_loss_xval": 2.98486328125, "eval_compot_runtime": 65.0427, "eval_compot_samples_per_second": 0.769, "eval_compot_steps_per_second": 0.031, "num_input_tokens_seen": 47029460, "step": 750 }, { "epoch": 2.4958402662229617, "eval_custom_ui_MAE_all": 0.07819526642560959, "eval_custom_ui_MAE_x": 0.07787354290485382, "eval_custom_ui_MAE_y": 0.07851698994636536, "eval_custom_ui_NUM_probability": 0.9999693334102631, "eval_custom_ui_loss": 0.37869468331336975, "eval_custom_ui_loss_ce": 3.311750515422318e-05, "eval_custom_ui_loss_num": 0.076629638671875, "eval_custom_ui_loss_xval": 0.38311767578125, "eval_custom_ui_runtime": 55.6292, "eval_custom_ui_samples_per_second": 0.899, "eval_custom_ui_steps_per_second": 0.036, "num_input_tokens_seen": 47029460, "step": 750 }, { "epoch": 2.4958402662229617, "loss": 0.3785746693611145, "loss_ce": 3.463430402916856e-05, "loss_iou": 0.0, "loss_num": 0.07568359375, "loss_xval": 0.37890625, "num_input_tokens_seen": 47029460, "step": 750 }, { "epoch": 2.4991680532445923, "grad_norm": 23.789785385131836, "learning_rate": 5e-06, "loss": 0.7497, "num_input_tokens_seen": 47092252, "step": 751 }, { "epoch": 2.4991680532445923, "loss": 0.7207818031311035, "loss_ce": 1.771647839632351e-05, "loss_iou": 0.2138671875, "loss_num": 0.058349609375, "loss_xval": 0.72265625, "num_input_tokens_seen": 47092252, "step": 751 }, { "epoch": 2.502495840266223, "grad_norm": 17.456363677978516, "learning_rate": 5e-06, "loss": 0.7229, "num_input_tokens_seen": 47154632, "step": 752 }, { "epoch": 2.502495840266223, "loss": 0.9199798107147217, "loss_ce": 5.79504958295729e-05, "loss_iou": 0.275390625, "loss_num": 0.07421875, "loss_xval": 0.921875, "num_input_tokens_seen": 47154632, "step": 752 }, { "epoch": 2.5058236272878536, "grad_norm": 15.332846641540527, "learning_rate": 5e-06, "loss": 0.8093, "num_input_tokens_seen": 47216012, "step": 753 }, { "epoch": 2.5058236272878536, "loss": 0.6803818941116333, "loss_ce": 0.00032815078157000244, "loss_iou": 0.259765625, "loss_num": 0.032470703125, "loss_xval": 0.6796875, "num_input_tokens_seen": 47216012, "step": 753 }, { "epoch": 2.509151414309484, "grad_norm": 32.81474685668945, "learning_rate": 5e-06, "loss": 0.7191, "num_input_tokens_seen": 47279696, "step": 754 }, { "epoch": 2.509151414309484, "loss": 0.7503370046615601, "loss_ce": 0.0013135733315721154, "loss_iou": 0.265625, "loss_num": 0.043212890625, "loss_xval": 0.75, "num_input_tokens_seen": 47279696, "step": 754 }, { "epoch": 2.512479201331115, "grad_norm": 22.866039276123047, "learning_rate": 5e-06, "loss": 0.6615, "num_input_tokens_seen": 47341140, "step": 755 }, { "epoch": 2.512479201331115, "loss": 0.8093788623809814, "loss_ce": 0.000540995504707098, "loss_iou": 0.294921875, "loss_num": 0.0439453125, "loss_xval": 0.80859375, "num_input_tokens_seen": 47341140, "step": 755 }, { "epoch": 2.5158069883527454, "grad_norm": 8.103184700012207, "learning_rate": 5e-06, "loss": 0.5868, "num_input_tokens_seen": 47403964, "step": 756 }, { "epoch": 2.5158069883527454, "loss": 0.5880100131034851, "loss_ce": 0.0003635000030044466, "loss_iou": 0.154296875, "loss_num": 0.055908203125, "loss_xval": 0.5859375, "num_input_tokens_seen": 47403964, "step": 756 }, { "epoch": 2.519134775374376, "grad_norm": 5.781442165374756, "learning_rate": 5e-06, "loss": 0.5177, "num_input_tokens_seen": 47465328, "step": 757 }, { "epoch": 2.519134775374376, "loss": 0.609734296798706, "loss_ce": 0.0014579689595848322, "loss_iou": 0.1513671875, "loss_num": 0.06103515625, "loss_xval": 0.609375, "num_input_tokens_seen": 47465328, "step": 757 }, { "epoch": 2.5224625623960066, "grad_norm": 16.77414894104004, "learning_rate": 5e-06, "loss": 0.6903, "num_input_tokens_seen": 47526856, "step": 758 }, { "epoch": 2.5224625623960066, "loss": 1.0022685527801514, "loss_ce": 7.129125879146159e-05, "loss_iou": 0.337890625, "loss_num": 0.0654296875, "loss_xval": 1.0, "num_input_tokens_seen": 47526856, "step": 758 }, { "epoch": 2.5257903494176372, "grad_norm": 39.55748748779297, "learning_rate": 5e-06, "loss": 0.8619, "num_input_tokens_seen": 47589740, "step": 759 }, { "epoch": 2.5257903494176372, "loss": 1.0036578178405762, "loss_ce": 0.0017046760767698288, "loss_iou": 0.328125, "loss_num": 0.06884765625, "loss_xval": 1.0, "num_input_tokens_seen": 47589740, "step": 759 }, { "epoch": 2.529118136439268, "grad_norm": 12.93166446685791, "learning_rate": 5e-06, "loss": 0.9628, "num_input_tokens_seen": 47652696, "step": 760 }, { "epoch": 2.529118136439268, "loss": 0.7494051456451416, "loss_ce": 7.65049408073537e-05, "loss_iou": 0.287109375, "loss_num": 0.03515625, "loss_xval": 0.75, "num_input_tokens_seen": 47652696, "step": 760 }, { "epoch": 2.5324459234608985, "grad_norm": 12.870399475097656, "learning_rate": 5e-06, "loss": 0.9854, "num_input_tokens_seen": 47715496, "step": 761 }, { "epoch": 2.5324459234608985, "loss": 1.0767834186553955, "loss_ce": 0.00012328368029557168, "loss_iou": 0.380859375, "loss_num": 0.0634765625, "loss_xval": 1.078125, "num_input_tokens_seen": 47715496, "step": 761 }, { "epoch": 2.535773710482529, "grad_norm": 18.96577262878418, "learning_rate": 5e-06, "loss": 0.7605, "num_input_tokens_seen": 47778632, "step": 762 }, { "epoch": 2.535773710482529, "loss": 0.8610488772392273, "loss_ce": 0.0002090678462991491, "loss_iou": 0.28515625, "loss_num": 0.05810546875, "loss_xval": 0.859375, "num_input_tokens_seen": 47778632, "step": 762 }, { "epoch": 2.5391014975041597, "grad_norm": 9.927651405334473, "learning_rate": 5e-06, "loss": 0.8039, "num_input_tokens_seen": 47842220, "step": 763 }, { "epoch": 2.5391014975041597, "loss": 0.8016228675842285, "loss_ce": 0.0014519505202770233, "loss_iou": 0.283203125, "loss_num": 0.047119140625, "loss_xval": 0.80078125, "num_input_tokens_seen": 47842220, "step": 763 }, { "epoch": 2.5424292845257903, "grad_norm": 21.00914764404297, "learning_rate": 5e-06, "loss": 0.95, "num_input_tokens_seen": 47905184, "step": 764 }, { "epoch": 2.5424292845257903, "loss": 1.0961568355560303, "loss_ce": 0.0009420657297596335, "loss_iou": 0.3671875, "loss_num": 0.07177734375, "loss_xval": 1.09375, "num_input_tokens_seen": 47905184, "step": 764 }, { "epoch": 2.545757071547421, "grad_norm": 14.406147956848145, "learning_rate": 5e-06, "loss": 0.7262, "num_input_tokens_seen": 47968364, "step": 765 }, { "epoch": 2.545757071547421, "loss": 0.5079929232597351, "loss_ce": 0.0014011403545737267, "loss_iou": 0.12890625, "loss_num": 0.0498046875, "loss_xval": 0.5078125, "num_input_tokens_seen": 47968364, "step": 765 }, { "epoch": 2.5490848585690515, "grad_norm": 9.191800117492676, "learning_rate": 5e-06, "loss": 0.6565, "num_input_tokens_seen": 48027032, "step": 766 }, { "epoch": 2.5490848585690515, "loss": 0.6796262860298157, "loss_ce": 6.083587868488394e-05, "loss_iou": 0.1796875, "loss_num": 0.064453125, "loss_xval": 0.6796875, "num_input_tokens_seen": 48027032, "step": 766 }, { "epoch": 2.552412645590682, "grad_norm": 8.631855010986328, "learning_rate": 5e-06, "loss": 0.6616, "num_input_tokens_seen": 48088040, "step": 767 }, { "epoch": 2.552412645590682, "loss": 0.6877455711364746, "loss_ce": 0.000733867462258786, "loss_iou": 0.267578125, "loss_num": 0.0302734375, "loss_xval": 0.6875, "num_input_tokens_seen": 48088040, "step": 767 }, { "epoch": 2.5557404326123128, "grad_norm": 15.254091262817383, "learning_rate": 5e-06, "loss": 1.0199, "num_input_tokens_seen": 48150880, "step": 768 }, { "epoch": 2.5557404326123128, "loss": 0.9874637126922607, "loss_ce": 0.00211210735142231, "loss_iou": 0.294921875, "loss_num": 0.07958984375, "loss_xval": 0.984375, "num_input_tokens_seen": 48150880, "step": 768 }, { "epoch": 2.5590682196339434, "grad_norm": 21.36817741394043, "learning_rate": 5e-06, "loss": 0.7199, "num_input_tokens_seen": 48213364, "step": 769 }, { "epoch": 2.5590682196339434, "loss": 0.5034525394439697, "loss_ce": 6.508058140752837e-05, "loss_iou": 0.16015625, "loss_num": 0.036865234375, "loss_xval": 0.50390625, "num_input_tokens_seen": 48213364, "step": 769 }, { "epoch": 2.562396006655574, "grad_norm": 17.851638793945312, "learning_rate": 5e-06, "loss": 0.8265, "num_input_tokens_seen": 48275488, "step": 770 }, { "epoch": 2.562396006655574, "loss": 0.6952813863754272, "loss_ce": 9.096534631680697e-05, "loss_iou": 0.248046875, "loss_num": 0.03955078125, "loss_xval": 0.6953125, "num_input_tokens_seen": 48275488, "step": 770 }, { "epoch": 2.5657237936772046, "grad_norm": 19.338138580322266, "learning_rate": 5e-06, "loss": 1.0289, "num_input_tokens_seen": 48338840, "step": 771 }, { "epoch": 2.5657237936772046, "loss": 0.909343957901001, "loss_ce": 0.0006526327924802899, "loss_iou": 0.353515625, "loss_num": 0.040771484375, "loss_xval": 0.91015625, "num_input_tokens_seen": 48338840, "step": 771 }, { "epoch": 2.569051580698835, "grad_norm": 18.85759162902832, "learning_rate": 5e-06, "loss": 0.8179, "num_input_tokens_seen": 48401924, "step": 772 }, { "epoch": 2.569051580698835, "loss": 0.8673095703125, "loss_ce": 0.0018920726142823696, "loss_iou": 0.275390625, "loss_num": 0.0625, "loss_xval": 0.8671875, "num_input_tokens_seen": 48401924, "step": 772 }, { "epoch": 2.572379367720466, "grad_norm": 14.904184341430664, "learning_rate": 5e-06, "loss": 0.8054, "num_input_tokens_seen": 48464296, "step": 773 }, { "epoch": 2.572379367720466, "loss": 0.9306117296218872, "loss_ce": 0.00019183362019248307, "loss_iou": 0.302734375, "loss_num": 0.06494140625, "loss_xval": 0.9296875, "num_input_tokens_seen": 48464296, "step": 773 }, { "epoch": 2.5757071547420964, "grad_norm": 15.765727996826172, "learning_rate": 5e-06, "loss": 0.889, "num_input_tokens_seen": 48527304, "step": 774 }, { "epoch": 2.5757071547420964, "loss": 0.9316340684890747, "loss_ce": 0.0036555223632603884, "loss_iou": 0.31640625, "loss_num": 0.05908203125, "loss_xval": 0.9296875, "num_input_tokens_seen": 48527304, "step": 774 }, { "epoch": 2.579034941763727, "grad_norm": 11.427712440490723, "learning_rate": 5e-06, "loss": 0.76, "num_input_tokens_seen": 48589984, "step": 775 }, { "epoch": 2.579034941763727, "loss": 0.7222743034362793, "loss_ce": 0.00047253709635697305, "loss_iou": 0.2412109375, "loss_num": 0.0478515625, "loss_xval": 0.72265625, "num_input_tokens_seen": 48589984, "step": 775 }, { "epoch": 2.5823627287853577, "grad_norm": 18.097305297851562, "learning_rate": 5e-06, "loss": 0.8925, "num_input_tokens_seen": 48652336, "step": 776 }, { "epoch": 2.5823627287853577, "loss": 0.9065259695053101, "loss_ce": 3.1771924113854766e-05, "loss_iou": 0.326171875, "loss_num": 0.05029296875, "loss_xval": 0.90625, "num_input_tokens_seen": 48652336, "step": 776 }, { "epoch": 2.5856905158069883, "grad_norm": 9.136321067810059, "learning_rate": 5e-06, "loss": 0.9126, "num_input_tokens_seen": 48715440, "step": 777 }, { "epoch": 2.5856905158069883, "loss": 0.801749050617218, "loss_ce": 0.00023535554646514356, "loss_iou": 0.30859375, "loss_num": 0.036865234375, "loss_xval": 0.80078125, "num_input_tokens_seen": 48715440, "step": 777 }, { "epoch": 2.589018302828619, "grad_norm": 39.80327606201172, "learning_rate": 5e-06, "loss": 0.9437, "num_input_tokens_seen": 48778212, "step": 778 }, { "epoch": 2.589018302828619, "loss": 1.0715380907058716, "loss_ce": 0.0004931373405270278, "loss_iou": 0.416015625, "loss_num": 0.048095703125, "loss_xval": 1.0703125, "num_input_tokens_seen": 48778212, "step": 778 }, { "epoch": 2.5923460898502495, "grad_norm": 21.966609954833984, "learning_rate": 5e-06, "loss": 0.9162, "num_input_tokens_seen": 48841016, "step": 779 }, { "epoch": 2.5923460898502495, "loss": 1.1759932041168213, "loss_ce": 0.0007002617930993438, "loss_iou": 0.4453125, "loss_num": 0.056884765625, "loss_xval": 1.171875, "num_input_tokens_seen": 48841016, "step": 779 }, { "epoch": 2.59567387687188, "grad_norm": 11.473250389099121, "learning_rate": 5e-06, "loss": 0.6972, "num_input_tokens_seen": 48904848, "step": 780 }, { "epoch": 2.59567387687188, "loss": 0.8778279423713684, "loss_ce": 0.00038655271055176854, "loss_iou": 0.333984375, "loss_num": 0.0419921875, "loss_xval": 0.87890625, "num_input_tokens_seen": 48904848, "step": 780 }, { "epoch": 2.5990016638935107, "grad_norm": 11.912096977233887, "learning_rate": 5e-06, "loss": 0.5857, "num_input_tokens_seen": 48967260, "step": 781 }, { "epoch": 2.5990016638935107, "loss": 0.5350910425186157, "loss_ce": 0.0005451616598293185, "loss_iou": 0.181640625, "loss_num": 0.034423828125, "loss_xval": 0.53515625, "num_input_tokens_seen": 48967260, "step": 781 }, { "epoch": 2.6023294509151413, "grad_norm": 13.463471412658691, "learning_rate": 5e-06, "loss": 0.4668, "num_input_tokens_seen": 49029024, "step": 782 }, { "epoch": 2.6023294509151413, "loss": 0.31681114435195923, "loss_ce": 6.920869054738432e-05, "loss_iou": 0.09423828125, "loss_num": 0.0255126953125, "loss_xval": 0.31640625, "num_input_tokens_seen": 49029024, "step": 782 }, { "epoch": 2.605657237936772, "grad_norm": 17.505420684814453, "learning_rate": 5e-06, "loss": 0.8343, "num_input_tokens_seen": 49091932, "step": 783 }, { "epoch": 2.605657237936772, "loss": 0.8508896827697754, "loss_ce": 0.0004258063272573054, "loss_iou": 0.298828125, "loss_num": 0.05029296875, "loss_xval": 0.8515625, "num_input_tokens_seen": 49091932, "step": 783 }, { "epoch": 2.6089850249584026, "grad_norm": 26.349285125732422, "learning_rate": 5e-06, "loss": 0.8789, "num_input_tokens_seen": 49154456, "step": 784 }, { "epoch": 2.6089850249584026, "loss": 0.6761670112609863, "loss_ce": 1.9504437659634277e-05, "loss_iou": 0.2158203125, "loss_num": 0.048828125, "loss_xval": 0.67578125, "num_input_tokens_seen": 49154456, "step": 784 }, { "epoch": 2.612312811980033, "grad_norm": 19.235546112060547, "learning_rate": 5e-06, "loss": 0.7823, "num_input_tokens_seen": 49213776, "step": 785 }, { "epoch": 2.612312811980033, "loss": 0.9341618418693542, "loss_ce": 7.980548252817243e-05, "loss_iou": 0.33203125, "loss_num": 0.053955078125, "loss_xval": 0.93359375, "num_input_tokens_seen": 49213776, "step": 785 }, { "epoch": 2.615640599001664, "grad_norm": 8.649251937866211, "learning_rate": 5e-06, "loss": 0.6877, "num_input_tokens_seen": 49275928, "step": 786 }, { "epoch": 2.615640599001664, "loss": 0.7523374557495117, "loss_ce": 0.0006285379640758038, "loss_iou": 0.2255859375, "loss_num": 0.06005859375, "loss_xval": 0.75, "num_input_tokens_seen": 49275928, "step": 786 }, { "epoch": 2.6189683860232944, "grad_norm": 8.426392555236816, "learning_rate": 5e-06, "loss": 0.7155, "num_input_tokens_seen": 49338088, "step": 787 }, { "epoch": 2.6189683860232944, "loss": 0.46052855253219604, "loss_ce": 0.0005675906431861222, "loss_iou": 0.1376953125, "loss_num": 0.036865234375, "loss_xval": 0.4609375, "num_input_tokens_seen": 49338088, "step": 787 }, { "epoch": 2.622296173044925, "grad_norm": 16.39332389831543, "learning_rate": 5e-06, "loss": 0.8291, "num_input_tokens_seen": 49400440, "step": 788 }, { "epoch": 2.622296173044925, "loss": 1.041515827178955, "loss_ce": 0.0006222816882655025, "loss_iou": 0.3828125, "loss_num": 0.054931640625, "loss_xval": 1.0390625, "num_input_tokens_seen": 49400440, "step": 788 }, { "epoch": 2.6256239600665556, "grad_norm": 19.96884536743164, "learning_rate": 5e-06, "loss": 0.8634, "num_input_tokens_seen": 49463176, "step": 789 }, { "epoch": 2.6256239600665556, "loss": 0.7899863123893738, "loss_ce": 6.92872199579142e-05, "loss_iou": 0.2109375, "loss_num": 0.07373046875, "loss_xval": 0.7890625, "num_input_tokens_seen": 49463176, "step": 789 }, { "epoch": 2.6289517470881862, "grad_norm": 16.452699661254883, "learning_rate": 5e-06, "loss": 0.6694, "num_input_tokens_seen": 49524556, "step": 790 }, { "epoch": 2.6289517470881862, "loss": 0.4771328568458557, "loss_ce": 0.000631356961093843, "loss_iou": 0.1484375, "loss_num": 0.03564453125, "loss_xval": 0.4765625, "num_input_tokens_seen": 49524556, "step": 790 }, { "epoch": 2.632279534109817, "grad_norm": 14.13532829284668, "learning_rate": 5e-06, "loss": 0.6136, "num_input_tokens_seen": 49587708, "step": 791 }, { "epoch": 2.632279534109817, "loss": 0.46144992113113403, "loss_ce": 0.0005124062299728394, "loss_iou": 0.1318359375, "loss_num": 0.03955078125, "loss_xval": 0.4609375, "num_input_tokens_seen": 49587708, "step": 791 }, { "epoch": 2.6356073211314475, "grad_norm": 10.96834945678711, "learning_rate": 5e-06, "loss": 0.6557, "num_input_tokens_seen": 49649048, "step": 792 }, { "epoch": 2.6356073211314475, "loss": 0.7462871670722961, "loss_ce": 0.0004375177959445864, "loss_iou": 0.2578125, "loss_num": 0.046142578125, "loss_xval": 0.74609375, "num_input_tokens_seen": 49649048, "step": 792 }, { "epoch": 2.638935108153078, "grad_norm": 10.10130500793457, "learning_rate": 5e-06, "loss": 0.8846, "num_input_tokens_seen": 49711624, "step": 793 }, { "epoch": 2.638935108153078, "loss": 0.7654234170913696, "loss_ce": 4.250594793120399e-05, "loss_iou": 0.255859375, "loss_num": 0.05078125, "loss_xval": 0.765625, "num_input_tokens_seen": 49711624, "step": 793 }, { "epoch": 2.6422628951747087, "grad_norm": 21.53556251525879, "learning_rate": 5e-06, "loss": 0.8802, "num_input_tokens_seen": 49774472, "step": 794 }, { "epoch": 2.6422628951747087, "loss": 0.7851353287696838, "loss_ce": 0.001077704131603241, "loss_iou": 0.287109375, "loss_num": 0.0419921875, "loss_xval": 0.78515625, "num_input_tokens_seen": 49774472, "step": 794 }, { "epoch": 2.6455906821963393, "grad_norm": 16.540266036987305, "learning_rate": 5e-06, "loss": 0.7369, "num_input_tokens_seen": 49835620, "step": 795 }, { "epoch": 2.6455906821963393, "loss": 0.5554940700531006, "loss_ce": 0.0003183374064974487, "loss_iou": 0.1318359375, "loss_num": 0.05859375, "loss_xval": 0.5546875, "num_input_tokens_seen": 49835620, "step": 795 }, { "epoch": 2.64891846921797, "grad_norm": 25.08919334411621, "learning_rate": 5e-06, "loss": 0.894, "num_input_tokens_seen": 49897508, "step": 796 }, { "epoch": 2.64891846921797, "loss": 0.945233166217804, "loss_ce": 0.0011413240572437644, "loss_iou": 0.29296875, "loss_num": 0.0712890625, "loss_xval": 0.9453125, "num_input_tokens_seen": 49897508, "step": 796 }, { "epoch": 2.6522462562396005, "grad_norm": 9.519119262695312, "learning_rate": 5e-06, "loss": 0.7185, "num_input_tokens_seen": 49960940, "step": 797 }, { "epoch": 2.6522462562396005, "loss": 0.6784650087356567, "loss_ce": 0.00036446438753046095, "loss_iou": 0.20703125, "loss_num": 0.052734375, "loss_xval": 0.6796875, "num_input_tokens_seen": 49960940, "step": 797 }, { "epoch": 2.655574043261231, "grad_norm": 8.236225128173828, "learning_rate": 5e-06, "loss": 0.7899, "num_input_tokens_seen": 50022236, "step": 798 }, { "epoch": 2.655574043261231, "loss": 0.7636892199516296, "loss_ce": 1.7315825971309096e-05, "loss_iou": 0.26171875, "loss_num": 0.0478515625, "loss_xval": 0.765625, "num_input_tokens_seen": 50022236, "step": 798 }, { "epoch": 2.6589018302828618, "grad_norm": 16.24251937866211, "learning_rate": 5e-06, "loss": 0.7819, "num_input_tokens_seen": 50085800, "step": 799 }, { "epoch": 2.6589018302828618, "loss": 0.7982438206672668, "loss_ce": 0.00039225342334248126, "loss_iou": 0.314453125, "loss_num": 0.03369140625, "loss_xval": 0.796875, "num_input_tokens_seen": 50085800, "step": 799 }, { "epoch": 2.6622296173044924, "grad_norm": 19.55490493774414, "learning_rate": 5e-06, "loss": 0.9039, "num_input_tokens_seen": 50149700, "step": 800 }, { "epoch": 2.6622296173044924, "loss": 0.9562564492225647, "loss_ce": 0.000812111422419548, "loss_iou": 0.38671875, "loss_num": 0.03662109375, "loss_xval": 0.95703125, "num_input_tokens_seen": 50149700, "step": 800 }, { "epoch": 2.665557404326123, "grad_norm": 10.239023208618164, "learning_rate": 5e-06, "loss": 0.8664, "num_input_tokens_seen": 50212752, "step": 801 }, { "epoch": 2.665557404326123, "loss": 0.7656987905502319, "loss_ce": 7.37954760552384e-05, "loss_iou": 0.2275390625, "loss_num": 0.062255859375, "loss_xval": 0.765625, "num_input_tokens_seen": 50212752, "step": 801 }, { "epoch": 2.6688851913477536, "grad_norm": 9.083358764648438, "learning_rate": 5e-06, "loss": 0.7008, "num_input_tokens_seen": 50275860, "step": 802 }, { "epoch": 2.6688851913477536, "loss": 0.8081342577934265, "loss_ce": 2.879274143197108e-05, "loss_iou": 0.294921875, "loss_num": 0.0439453125, "loss_xval": 0.80859375, "num_input_tokens_seen": 50275860, "step": 802 }, { "epoch": 2.672212978369384, "grad_norm": 9.288610458374023, "learning_rate": 5e-06, "loss": 0.7961, "num_input_tokens_seen": 50337568, "step": 803 }, { "epoch": 2.672212978369384, "loss": 0.6985122561454773, "loss_ce": 0.00039212280535139143, "loss_iou": 0.2451171875, "loss_num": 0.041259765625, "loss_xval": 0.69921875, "num_input_tokens_seen": 50337568, "step": 803 }, { "epoch": 2.675540765391015, "grad_norm": 16.272144317626953, "learning_rate": 5e-06, "loss": 0.9598, "num_input_tokens_seen": 50400580, "step": 804 }, { "epoch": 2.675540765391015, "loss": 0.9980466365814209, "loss_ce": 0.00024393126659560949, "loss_iou": 0.3515625, "loss_num": 0.058837890625, "loss_xval": 0.99609375, "num_input_tokens_seen": 50400580, "step": 804 }, { "epoch": 2.6788685524126454, "grad_norm": 17.62891387939453, "learning_rate": 5e-06, "loss": 1.0274, "num_input_tokens_seen": 50463740, "step": 805 }, { "epoch": 2.6788685524126454, "loss": 1.29103422164917, "loss_ce": 1.8554455891717225e-05, "loss_iou": 0.455078125, "loss_num": 0.076171875, "loss_xval": 1.2890625, "num_input_tokens_seen": 50463740, "step": 805 }, { "epoch": 2.682196339434276, "grad_norm": 9.38234806060791, "learning_rate": 5e-06, "loss": 0.8099, "num_input_tokens_seen": 50528688, "step": 806 }, { "epoch": 2.682196339434276, "loss": 0.883965790271759, "loss_ce": 0.0015194909647107124, "loss_iou": 0.314453125, "loss_num": 0.050537109375, "loss_xval": 0.8828125, "num_input_tokens_seen": 50528688, "step": 806 }, { "epoch": 2.6855241264559067, "grad_norm": 22.780593872070312, "learning_rate": 5e-06, "loss": 0.9114, "num_input_tokens_seen": 50592020, "step": 807 }, { "epoch": 2.6855241264559067, "loss": 0.8689115047454834, "loss_ce": 1.5041307960927952e-05, "loss_iou": 0.318359375, "loss_num": 0.046142578125, "loss_xval": 0.8671875, "num_input_tokens_seen": 50592020, "step": 807 }, { "epoch": 2.6888519134775377, "grad_norm": 18.877073287963867, "learning_rate": 5e-06, "loss": 0.827, "num_input_tokens_seen": 50655348, "step": 808 }, { "epoch": 2.6888519134775377, "loss": 0.7745095491409302, "loss_ce": 0.00033960340078920126, "loss_iou": 0.306640625, "loss_num": 0.031982421875, "loss_xval": 0.7734375, "num_input_tokens_seen": 50655348, "step": 808 }, { "epoch": 2.6921797004991683, "grad_norm": 7.44007682800293, "learning_rate": 5e-06, "loss": 0.6639, "num_input_tokens_seen": 50718640, "step": 809 }, { "epoch": 2.6921797004991683, "loss": 0.7117258310317993, "loss_ce": 0.00030002190032973886, "loss_iou": 0.220703125, "loss_num": 0.053955078125, "loss_xval": 0.7109375, "num_input_tokens_seen": 50718640, "step": 809 }, { "epoch": 2.695507487520799, "grad_norm": 13.671259880065918, "learning_rate": 5e-06, "loss": 0.9001, "num_input_tokens_seen": 50780636, "step": 810 }, { "epoch": 2.695507487520799, "loss": 0.8555184602737427, "loss_ce": 0.0005380017682909966, "loss_iou": 0.263671875, "loss_num": 0.06494140625, "loss_xval": 0.85546875, "num_input_tokens_seen": 50780636, "step": 810 }, { "epoch": 2.6988352745424296, "grad_norm": 11.612728118896484, "learning_rate": 5e-06, "loss": 0.6957, "num_input_tokens_seen": 50842360, "step": 811 }, { "epoch": 2.6988352745424296, "loss": 0.8578015565872192, "loss_ce": 0.0012341841356828809, "loss_iou": 0.2578125, "loss_num": 0.06787109375, "loss_xval": 0.85546875, "num_input_tokens_seen": 50842360, "step": 811 }, { "epoch": 2.70216306156406, "grad_norm": 11.44379997253418, "learning_rate": 5e-06, "loss": 0.9141, "num_input_tokens_seen": 50904836, "step": 812 }, { "epoch": 2.70216306156406, "loss": 1.17952561378479, "loss_ce": 0.00014328735414892435, "loss_iou": 0.42578125, "loss_num": 0.06591796875, "loss_xval": 1.1796875, "num_input_tokens_seen": 50904836, "step": 812 }, { "epoch": 2.7054908485856908, "grad_norm": 25.091814041137695, "learning_rate": 5e-06, "loss": 1.0485, "num_input_tokens_seen": 50969336, "step": 813 }, { "epoch": 2.7054908485856908, "loss": 1.0000128746032715, "loss_ce": 0.0005010617314837873, "loss_iou": 0.375, "loss_num": 0.0498046875, "loss_xval": 1.0, "num_input_tokens_seen": 50969336, "step": 813 }, { "epoch": 2.7088186356073214, "grad_norm": 9.175084114074707, "learning_rate": 5e-06, "loss": 0.7176, "num_input_tokens_seen": 51031912, "step": 814 }, { "epoch": 2.7088186356073214, "loss": 0.7256096601486206, "loss_ce": 0.0002678523596841842, "loss_iou": 0.2490234375, "loss_num": 0.04541015625, "loss_xval": 0.7265625, "num_input_tokens_seen": 51031912, "step": 814 }, { "epoch": 2.712146422628952, "grad_norm": 25.772275924682617, "learning_rate": 5e-06, "loss": 0.7449, "num_input_tokens_seen": 51096024, "step": 815 }, { "epoch": 2.712146422628952, "loss": 0.8133649826049805, "loss_ce": 0.00025464242207817733, "loss_iou": 0.326171875, "loss_num": 0.032470703125, "loss_xval": 0.8125, "num_input_tokens_seen": 51096024, "step": 815 }, { "epoch": 2.7154742096505826, "grad_norm": 43.45026779174805, "learning_rate": 5e-06, "loss": 0.933, "num_input_tokens_seen": 51159052, "step": 816 }, { "epoch": 2.7154742096505826, "loss": 0.9415866732597351, "loss_ce": 5.8369518228573725e-05, "loss_iou": 0.34375, "loss_num": 0.05126953125, "loss_xval": 0.94140625, "num_input_tokens_seen": 51159052, "step": 816 }, { "epoch": 2.7188019966722132, "grad_norm": 28.74701690673828, "learning_rate": 5e-06, "loss": 0.7016, "num_input_tokens_seen": 51220308, "step": 817 }, { "epoch": 2.7188019966722132, "loss": 0.6007994413375854, "loss_ce": 3.0412125852308236e-05, "loss_iou": 0.1845703125, "loss_num": 0.04638671875, "loss_xval": 0.6015625, "num_input_tokens_seen": 51220308, "step": 817 }, { "epoch": 2.722129783693844, "grad_norm": 9.16032600402832, "learning_rate": 5e-06, "loss": 0.9523, "num_input_tokens_seen": 51284628, "step": 818 }, { "epoch": 2.722129783693844, "loss": 0.8527227640151978, "loss_ce": 0.00018370727775618434, "loss_iou": 0.3359375, "loss_num": 0.036376953125, "loss_xval": 0.8515625, "num_input_tokens_seen": 51284628, "step": 818 }, { "epoch": 2.7254575707154745, "grad_norm": 22.749645233154297, "learning_rate": 5e-06, "loss": 0.5959, "num_input_tokens_seen": 51346872, "step": 819 }, { "epoch": 2.7254575707154745, "loss": 0.48708152770996094, "loss_ce": 2.0974162907805294e-05, "loss_iou": 0.1669921875, "loss_num": 0.03076171875, "loss_xval": 0.486328125, "num_input_tokens_seen": 51346872, "step": 819 }, { "epoch": 2.728785357737105, "grad_norm": 20.728750228881836, "learning_rate": 5e-06, "loss": 0.9829, "num_input_tokens_seen": 51408904, "step": 820 }, { "epoch": 2.728785357737105, "loss": 1.020106554031372, "loss_ce": 0.0009415639797225595, "loss_iou": 0.369140625, "loss_num": 0.05615234375, "loss_xval": 1.015625, "num_input_tokens_seen": 51408904, "step": 820 }, { "epoch": 2.7321131447587357, "grad_norm": 20.01232147216797, "learning_rate": 5e-06, "loss": 0.9293, "num_input_tokens_seen": 51470984, "step": 821 }, { "epoch": 2.7321131447587357, "loss": 0.9964734315872192, "loss_ce": 1.3501612556865439e-05, "loss_iou": 0.333984375, "loss_num": 0.06591796875, "loss_xval": 0.99609375, "num_input_tokens_seen": 51470984, "step": 821 }, { "epoch": 2.7354409317803663, "grad_norm": 26.69096565246582, "learning_rate": 5e-06, "loss": 0.7207, "num_input_tokens_seen": 51534144, "step": 822 }, { "epoch": 2.7354409317803663, "loss": 0.870019793510437, "loss_ce": 0.000635035103186965, "loss_iou": 0.31640625, "loss_num": 0.04736328125, "loss_xval": 0.87109375, "num_input_tokens_seen": 51534144, "step": 822 }, { "epoch": 2.738768718801997, "grad_norm": 18.575462341308594, "learning_rate": 5e-06, "loss": 0.9289, "num_input_tokens_seen": 51598260, "step": 823 }, { "epoch": 2.738768718801997, "loss": 0.9322853088378906, "loss_ce": 0.0004004844813607633, "loss_iou": 0.357421875, "loss_num": 0.043212890625, "loss_xval": 0.93359375, "num_input_tokens_seen": 51598260, "step": 823 }, { "epoch": 2.7420965058236275, "grad_norm": 15.447315216064453, "learning_rate": 5e-06, "loss": 0.6488, "num_input_tokens_seen": 51660592, "step": 824 }, { "epoch": 2.7420965058236275, "loss": 0.6367542743682861, "loss_ce": 3.5481833037920296e-05, "loss_iou": 0.2109375, "loss_num": 0.042724609375, "loss_xval": 0.63671875, "num_input_tokens_seen": 51660592, "step": 824 }, { "epoch": 2.745424292845258, "grad_norm": 14.117728233337402, "learning_rate": 5e-06, "loss": 0.8426, "num_input_tokens_seen": 51723816, "step": 825 }, { "epoch": 2.745424292845258, "loss": 0.7834662199020386, "loss_ce": 0.006000405643135309, "loss_iou": 0.279296875, "loss_num": 0.0439453125, "loss_xval": 0.77734375, "num_input_tokens_seen": 51723816, "step": 825 }, { "epoch": 2.7487520798668887, "grad_norm": 13.778304100036621, "learning_rate": 5e-06, "loss": 1.0028, "num_input_tokens_seen": 51788500, "step": 826 }, { "epoch": 2.7487520798668887, "loss": 0.8457362055778503, "loss_ce": 3.3119446015916765e-05, "loss_iou": 0.287109375, "loss_num": 0.0537109375, "loss_xval": 0.84375, "num_input_tokens_seen": 51788500, "step": 826 }, { "epoch": 2.7520798668885194, "grad_norm": 17.102991104125977, "learning_rate": 5e-06, "loss": 0.645, "num_input_tokens_seen": 51851236, "step": 827 }, { "epoch": 2.7520798668885194, "loss": 0.5853976607322693, "loss_ce": 9.483837857260369e-06, "loss_iou": 0.177734375, "loss_num": 0.046142578125, "loss_xval": 0.5859375, "num_input_tokens_seen": 51851236, "step": 827 }, { "epoch": 2.75540765391015, "grad_norm": 17.917057037353516, "learning_rate": 5e-06, "loss": 0.7111, "num_input_tokens_seen": 51913216, "step": 828 }, { "epoch": 2.75540765391015, "loss": 0.8741090297698975, "loss_ce": 0.00032974532223306596, "loss_iou": 0.24609375, "loss_num": 0.076171875, "loss_xval": 0.875, "num_input_tokens_seen": 51913216, "step": 828 }, { "epoch": 2.7587354409317806, "grad_norm": 11.250595092773438, "learning_rate": 5e-06, "loss": 0.5955, "num_input_tokens_seen": 51974740, "step": 829 }, { "epoch": 2.7587354409317806, "loss": 0.7106689214706421, "loss_ce": 0.00021965843916404992, "loss_iou": 0.224609375, "loss_num": 0.05224609375, "loss_xval": 0.7109375, "num_input_tokens_seen": 51974740, "step": 829 }, { "epoch": 2.762063227953411, "grad_norm": 79.73774719238281, "learning_rate": 5e-06, "loss": 1.0182, "num_input_tokens_seen": 52038060, "step": 830 }, { "epoch": 2.762063227953411, "loss": 1.1705613136291504, "loss_ce": 0.0001511987647973001, "loss_iou": 0.453125, "loss_num": 0.052978515625, "loss_xval": 1.171875, "num_input_tokens_seen": 52038060, "step": 830 }, { "epoch": 2.765391014975042, "grad_norm": 32.597877502441406, "learning_rate": 5e-06, "loss": 0.7906, "num_input_tokens_seen": 52100548, "step": 831 }, { "epoch": 2.765391014975042, "loss": 0.744775116443634, "loss_ce": 2.4136337742675096e-05, "loss_iou": 0.255859375, "loss_num": 0.046630859375, "loss_xval": 0.74609375, "num_input_tokens_seen": 52100548, "step": 831 }, { "epoch": 2.7687188019966724, "grad_norm": 25.43697166442871, "learning_rate": 5e-06, "loss": 0.8114, "num_input_tokens_seen": 52162216, "step": 832 }, { "epoch": 2.7687188019966724, "loss": 0.6264140009880066, "loss_ce": 0.0009257167694158852, "loss_iou": 0.1669921875, "loss_num": 0.05859375, "loss_xval": 0.625, "num_input_tokens_seen": 52162216, "step": 832 }, { "epoch": 2.772046589018303, "grad_norm": 12.007596015930176, "learning_rate": 5e-06, "loss": 0.4881, "num_input_tokens_seen": 52224180, "step": 833 }, { "epoch": 2.772046589018303, "loss": 0.46583065390586853, "loss_ce": 4.088615605724044e-05, "loss_iou": 0.14453125, "loss_num": 0.03564453125, "loss_xval": 0.46484375, "num_input_tokens_seen": 52224180, "step": 833 }, { "epoch": 2.7753743760399336, "grad_norm": 15.632837295532227, "learning_rate": 5e-06, "loss": 0.8777, "num_input_tokens_seen": 52287644, "step": 834 }, { "epoch": 2.7753743760399336, "loss": 0.7958810329437256, "loss_ce": 0.0002267022500745952, "loss_iou": 0.279296875, "loss_num": 0.047119140625, "loss_xval": 0.796875, "num_input_tokens_seen": 52287644, "step": 834 }, { "epoch": 2.7787021630615643, "grad_norm": 15.249378204345703, "learning_rate": 5e-06, "loss": 0.6745, "num_input_tokens_seen": 52349544, "step": 835 }, { "epoch": 2.7787021630615643, "loss": 0.7983464598655701, "loss_ce": 6.6373549998388626e-06, "loss_iou": 0.19140625, "loss_num": 0.0830078125, "loss_xval": 0.796875, "num_input_tokens_seen": 52349544, "step": 835 }, { "epoch": 2.782029950083195, "grad_norm": 9.68162727355957, "learning_rate": 5e-06, "loss": 0.797, "num_input_tokens_seen": 52409296, "step": 836 }, { "epoch": 2.782029950083195, "loss": 0.9106274843215942, "loss_ce": 0.00022710610937792808, "loss_iou": 0.251953125, "loss_num": 0.08154296875, "loss_xval": 0.91015625, "num_input_tokens_seen": 52409296, "step": 836 }, { "epoch": 2.7853577371048255, "grad_norm": 14.630462646484375, "learning_rate": 5e-06, "loss": 0.966, "num_input_tokens_seen": 52473192, "step": 837 }, { "epoch": 2.7853577371048255, "loss": 0.8400080800056458, "loss_ce": 0.0018733233446255326, "loss_iou": 0.318359375, "loss_num": 0.0400390625, "loss_xval": 0.83984375, "num_input_tokens_seen": 52473192, "step": 837 }, { "epoch": 2.788685524126456, "grad_norm": 17.682964324951172, "learning_rate": 5e-06, "loss": 0.8121, "num_input_tokens_seen": 52536228, "step": 838 }, { "epoch": 2.788685524126456, "loss": 0.9199892282485962, "loss_ce": 0.0003114935243502259, "loss_iou": 0.328125, "loss_num": 0.052490234375, "loss_xval": 0.91796875, "num_input_tokens_seen": 52536228, "step": 838 }, { "epoch": 2.7920133111480867, "grad_norm": 15.561915397644043, "learning_rate": 5e-06, "loss": 0.6371, "num_input_tokens_seen": 52600284, "step": 839 }, { "epoch": 2.7920133111480867, "loss": 0.7090641260147095, "loss_ce": 0.0008732442511245608, "loss_iou": 0.283203125, "loss_num": 0.0283203125, "loss_xval": 0.70703125, "num_input_tokens_seen": 52600284, "step": 839 }, { "epoch": 2.7953410981697173, "grad_norm": 8.331130981445312, "learning_rate": 5e-06, "loss": 0.9367, "num_input_tokens_seen": 52663872, "step": 840 }, { "epoch": 2.7953410981697173, "loss": 0.8647535443305969, "loss_ce": 0.000739889801479876, "loss_iou": 0.2255859375, "loss_num": 0.08251953125, "loss_xval": 0.86328125, "num_input_tokens_seen": 52663872, "step": 840 }, { "epoch": 2.798668885191348, "grad_norm": 49.02280807495117, "learning_rate": 5e-06, "loss": 0.6277, "num_input_tokens_seen": 52726020, "step": 841 }, { "epoch": 2.798668885191348, "loss": 0.5782544612884521, "loss_ce": 7.363456006714841e-06, "loss_iou": 0.1787109375, "loss_num": 0.044189453125, "loss_xval": 0.578125, "num_input_tokens_seen": 52726020, "step": 841 }, { "epoch": 2.8019966722129785, "grad_norm": 9.815765380859375, "learning_rate": 5e-06, "loss": 0.759, "num_input_tokens_seen": 52790172, "step": 842 }, { "epoch": 2.8019966722129785, "loss": 0.8251725435256958, "loss_ce": 0.0011979036498814821, "loss_iou": 0.306640625, "loss_num": 0.0419921875, "loss_xval": 0.82421875, "num_input_tokens_seen": 52790172, "step": 842 }, { "epoch": 2.805324459234609, "grad_norm": 14.346421241760254, "learning_rate": 5e-06, "loss": 0.8258, "num_input_tokens_seen": 52851380, "step": 843 }, { "epoch": 2.805324459234609, "loss": 0.7079565525054932, "loss_ce": 0.0006811939529143274, "loss_iou": 0.2451171875, "loss_num": 0.043212890625, "loss_xval": 0.70703125, "num_input_tokens_seen": 52851380, "step": 843 }, { "epoch": 2.8086522462562398, "grad_norm": 12.317045211791992, "learning_rate": 5e-06, "loss": 0.8788, "num_input_tokens_seen": 52915068, "step": 844 }, { "epoch": 2.8086522462562398, "loss": 0.4973338842391968, "loss_ce": 0.0003856297116726637, "loss_iou": 0.16796875, "loss_num": 0.0322265625, "loss_xval": 0.49609375, "num_input_tokens_seen": 52915068, "step": 844 }, { "epoch": 2.8119800332778704, "grad_norm": 6.6768083572387695, "learning_rate": 5e-06, "loss": 0.6261, "num_input_tokens_seen": 52976964, "step": 845 }, { "epoch": 2.8119800332778704, "loss": 0.8055537343025208, "loss_ce": 0.00037789929774589837, "loss_iou": 0.283203125, "loss_num": 0.0478515625, "loss_xval": 0.8046875, "num_input_tokens_seen": 52976964, "step": 845 }, { "epoch": 2.815307820299501, "grad_norm": 11.683884620666504, "learning_rate": 5e-06, "loss": 0.6091, "num_input_tokens_seen": 53039924, "step": 846 }, { "epoch": 2.815307820299501, "loss": 0.6055033802986145, "loss_ce": 0.0018656485481187701, "loss_iou": 0.21875, "loss_num": 0.033203125, "loss_xval": 0.60546875, "num_input_tokens_seen": 53039924, "step": 846 }, { "epoch": 2.8186356073211316, "grad_norm": 13.659690856933594, "learning_rate": 5e-06, "loss": 0.9679, "num_input_tokens_seen": 53104456, "step": 847 }, { "epoch": 2.8186356073211316, "loss": 1.0553326606750488, "loss_ce": 0.0040631480515003204, "loss_iou": 0.373046875, "loss_num": 0.06103515625, "loss_xval": 1.0546875, "num_input_tokens_seen": 53104456, "step": 847 }, { "epoch": 2.821963394342762, "grad_norm": 29.937044143676758, "learning_rate": 5e-06, "loss": 0.8535, "num_input_tokens_seen": 53166408, "step": 848 }, { "epoch": 2.821963394342762, "loss": 1.0855714082717896, "loss_ce": 0.00012218697520438582, "loss_iou": 0.35546875, "loss_num": 0.0751953125, "loss_xval": 1.0859375, "num_input_tokens_seen": 53166408, "step": 848 }, { "epoch": 2.825291181364393, "grad_norm": 37.90950012207031, "learning_rate": 5e-06, "loss": 0.9913, "num_input_tokens_seen": 53226280, "step": 849 }, { "epoch": 2.825291181364393, "loss": 1.0137234926223755, "loss_ce": 0.0002957689284812659, "loss_iou": 0.283203125, "loss_num": 0.08935546875, "loss_xval": 1.015625, "num_input_tokens_seen": 53226280, "step": 849 }, { "epoch": 2.8286189683860234, "grad_norm": 40.06550598144531, "learning_rate": 5e-06, "loss": 0.9942, "num_input_tokens_seen": 53289856, "step": 850 }, { "epoch": 2.8286189683860234, "loss": 1.1431384086608887, "loss_ce": 7.198890671133995e-05, "loss_iou": 0.408203125, "loss_num": 0.0654296875, "loss_xval": 1.140625, "num_input_tokens_seen": 53289856, "step": 850 }, { "epoch": 2.831946755407654, "grad_norm": 29.066486358642578, "learning_rate": 5e-06, "loss": 0.9127, "num_input_tokens_seen": 53351764, "step": 851 }, { "epoch": 2.831946755407654, "loss": 0.8141542673110962, "loss_ce": 0.00018944813928101212, "loss_iou": 0.2890625, "loss_num": 0.047607421875, "loss_xval": 0.8125, "num_input_tokens_seen": 53351764, "step": 851 }, { "epoch": 2.8352745424292847, "grad_norm": 10.681289672851562, "learning_rate": 5e-06, "loss": 0.7968, "num_input_tokens_seen": 53414936, "step": 852 }, { "epoch": 2.8352745424292847, "loss": 1.1512733697891235, "loss_ce": 0.0008828034624457359, "loss_iou": 0.44921875, "loss_num": 0.050048828125, "loss_xval": 1.1484375, "num_input_tokens_seen": 53414936, "step": 852 }, { "epoch": 2.8386023294509153, "grad_norm": 9.543261528015137, "learning_rate": 5e-06, "loss": 0.6924, "num_input_tokens_seen": 53477328, "step": 853 }, { "epoch": 2.8386023294509153, "loss": 0.7157683968544006, "loss_ce": 7.013405411271378e-05, "loss_iou": 0.265625, "loss_num": 0.037109375, "loss_xval": 0.71484375, "num_input_tokens_seen": 53477328, "step": 853 }, { "epoch": 2.841930116472546, "grad_norm": 21.23872184753418, "learning_rate": 5e-06, "loss": 0.8171, "num_input_tokens_seen": 53540276, "step": 854 }, { "epoch": 2.841930116472546, "loss": 1.0230469703674316, "loss_ce": 0.0004640130791813135, "loss_iou": 0.37109375, "loss_num": 0.05615234375, "loss_xval": 1.0234375, "num_input_tokens_seen": 53540276, "step": 854 }, { "epoch": 2.8452579034941765, "grad_norm": 32.094459533691406, "learning_rate": 5e-06, "loss": 0.7928, "num_input_tokens_seen": 53603856, "step": 855 }, { "epoch": 2.8452579034941765, "loss": 0.9636791944503784, "loss_ce": 0.0010326473275199533, "loss_iou": 0.3515625, "loss_num": 0.05224609375, "loss_xval": 0.9609375, "num_input_tokens_seen": 53603856, "step": 855 }, { "epoch": 2.848585690515807, "grad_norm": 24.845178604125977, "learning_rate": 5e-06, "loss": 1.2545, "num_input_tokens_seen": 53665868, "step": 856 }, { "epoch": 2.848585690515807, "loss": 0.8841911554336548, "loss_ce": 0.0002800060319714248, "loss_iou": 0.30859375, "loss_num": 0.052978515625, "loss_xval": 0.8828125, "num_input_tokens_seen": 53665868, "step": 856 }, { "epoch": 2.8519134775374377, "grad_norm": 10.065874099731445, "learning_rate": 5e-06, "loss": 0.7501, "num_input_tokens_seen": 53730176, "step": 857 }, { "epoch": 2.8519134775374377, "loss": 0.6680462956428528, "loss_ce": 0.0008099457481876016, "loss_iou": 0.23828125, "loss_num": 0.038330078125, "loss_xval": 0.66796875, "num_input_tokens_seen": 53730176, "step": 857 }, { "epoch": 2.8552412645590683, "grad_norm": 11.404300689697266, "learning_rate": 5e-06, "loss": 0.8141, "num_input_tokens_seen": 53791604, "step": 858 }, { "epoch": 2.8552412645590683, "loss": 0.9741370677947998, "loss_ce": 1.6005247744033113e-05, "loss_iou": 0.337890625, "loss_num": 0.059326171875, "loss_xval": 0.97265625, "num_input_tokens_seen": 53791604, "step": 858 }, { "epoch": 2.858569051580699, "grad_norm": 15.735603332519531, "learning_rate": 5e-06, "loss": 0.9062, "num_input_tokens_seen": 53856684, "step": 859 }, { "epoch": 2.858569051580699, "loss": 1.0561212301254272, "loss_ce": 0.0005792746087536216, "loss_iou": 0.4296875, "loss_num": 0.0390625, "loss_xval": 1.0546875, "num_input_tokens_seen": 53856684, "step": 859 }, { "epoch": 2.8618968386023296, "grad_norm": 20.481887817382812, "learning_rate": 5e-06, "loss": 0.705, "num_input_tokens_seen": 53919896, "step": 860 }, { "epoch": 2.8618968386023296, "loss": 0.677882194519043, "loss_ce": 0.0008802823722362518, "loss_iou": 0.201171875, "loss_num": 0.05517578125, "loss_xval": 0.67578125, "num_input_tokens_seen": 53919896, "step": 860 }, { "epoch": 2.86522462562396, "grad_norm": 17.972482681274414, "learning_rate": 5e-06, "loss": 0.6315, "num_input_tokens_seen": 53981836, "step": 861 }, { "epoch": 2.86522462562396, "loss": 0.4665610194206238, "loss_ce": 8.29627697385149e-06, "loss_iou": 0.1572265625, "loss_num": 0.030517578125, "loss_xval": 0.466796875, "num_input_tokens_seen": 53981836, "step": 861 }, { "epoch": 2.868552412645591, "grad_norm": 21.63575553894043, "learning_rate": 5e-06, "loss": 0.994, "num_input_tokens_seen": 54045632, "step": 862 }, { "epoch": 2.868552412645591, "loss": 0.9137723445892334, "loss_ce": 0.0009305117418989539, "loss_iou": 0.34375, "loss_num": 0.04443359375, "loss_xval": 0.9140625, "num_input_tokens_seen": 54045632, "step": 862 }, { "epoch": 2.8718801996672214, "grad_norm": 11.723126411437988, "learning_rate": 5e-06, "loss": 0.6927, "num_input_tokens_seen": 54108668, "step": 863 }, { "epoch": 2.8718801996672214, "loss": 0.8126630783081055, "loss_ce": 0.0013837890001013875, "loss_iou": 0.287109375, "loss_num": 0.047119140625, "loss_xval": 0.8125, "num_input_tokens_seen": 54108668, "step": 863 }, { "epoch": 2.875207986688852, "grad_norm": 15.467992782592773, "learning_rate": 5e-06, "loss": 0.7921, "num_input_tokens_seen": 54171088, "step": 864 }, { "epoch": 2.875207986688852, "loss": 0.5551916360855103, "loss_ce": 1.581585092935711e-05, "loss_iou": 0.09765625, "loss_num": 0.07177734375, "loss_xval": 0.5546875, "num_input_tokens_seen": 54171088, "step": 864 }, { "epoch": 2.8785357737104826, "grad_norm": 11.699230194091797, "learning_rate": 5e-06, "loss": 0.8306, "num_input_tokens_seen": 54235344, "step": 865 }, { "epoch": 2.8785357737104826, "loss": 0.9300848245620728, "loss_ce": 0.0013738907873630524, "loss_iou": 0.330078125, "loss_num": 0.05322265625, "loss_xval": 0.9296875, "num_input_tokens_seen": 54235344, "step": 865 }, { "epoch": 2.8818635607321132, "grad_norm": 21.204299926757812, "learning_rate": 5e-06, "loss": 0.8931, "num_input_tokens_seen": 54297296, "step": 866 }, { "epoch": 2.8818635607321132, "loss": 0.857491672039032, "loss_ce": 6.98186267982237e-05, "loss_iou": 0.33203125, "loss_num": 0.0390625, "loss_xval": 0.859375, "num_input_tokens_seen": 54297296, "step": 866 }, { "epoch": 2.885191347753744, "grad_norm": 38.91550827026367, "learning_rate": 5e-06, "loss": 0.9858, "num_input_tokens_seen": 54360772, "step": 867 }, { "epoch": 2.885191347753744, "loss": 0.8661935329437256, "loss_ce": 0.00047088958672247827, "loss_iou": 0.279296875, "loss_num": 0.061279296875, "loss_xval": 0.8671875, "num_input_tokens_seen": 54360772, "step": 867 }, { "epoch": 2.8885191347753745, "grad_norm": 30.643917083740234, "learning_rate": 5e-06, "loss": 0.8152, "num_input_tokens_seen": 54423520, "step": 868 }, { "epoch": 2.8885191347753745, "loss": 0.9406991004943848, "loss_ce": 0.00026937766233459115, "loss_iou": 0.291015625, "loss_num": 0.0712890625, "loss_xval": 0.94140625, "num_input_tokens_seen": 54423520, "step": 868 }, { "epoch": 2.891846921797005, "grad_norm": 6.868099689483643, "learning_rate": 5e-06, "loss": 0.9647, "num_input_tokens_seen": 54487024, "step": 869 }, { "epoch": 2.891846921797005, "loss": 1.1396923065185547, "loss_ce": 4.3880856537725776e-05, "loss_iou": 0.39453125, "loss_num": 0.0703125, "loss_xval": 1.140625, "num_input_tokens_seen": 54487024, "step": 869 }, { "epoch": 2.8951747088186357, "grad_norm": 15.951972007751465, "learning_rate": 5e-06, "loss": 0.6826, "num_input_tokens_seen": 54550240, "step": 870 }, { "epoch": 2.8951747088186357, "loss": 0.6227001547813416, "loss_ce": 0.0011181265581399202, "loss_iou": 0.2216796875, "loss_num": 0.035888671875, "loss_xval": 0.62109375, "num_input_tokens_seen": 54550240, "step": 870 }, { "epoch": 2.8985024958402663, "grad_norm": 10.875045776367188, "learning_rate": 5e-06, "loss": 0.818, "num_input_tokens_seen": 54612024, "step": 871 }, { "epoch": 2.8985024958402663, "loss": 0.6584023237228394, "loss_ce": 0.0005654271226376295, "loss_iou": 0.220703125, "loss_num": 0.043212890625, "loss_xval": 0.65625, "num_input_tokens_seen": 54612024, "step": 871 }, { "epoch": 2.901830282861897, "grad_norm": 16.954748153686523, "learning_rate": 5e-06, "loss": 0.7487, "num_input_tokens_seen": 54674712, "step": 872 }, { "epoch": 2.901830282861897, "loss": 0.6434586644172668, "loss_ce": 0.0042984881438314915, "loss_iou": 0.2216796875, "loss_num": 0.039306640625, "loss_xval": 0.640625, "num_input_tokens_seen": 54674712, "step": 872 }, { "epoch": 2.9051580698835275, "grad_norm": 8.517369270324707, "learning_rate": 5e-06, "loss": 0.813, "num_input_tokens_seen": 54737272, "step": 873 }, { "epoch": 2.9051580698835275, "loss": 0.6707565784454346, "loss_ce": 0.0005905504804104567, "loss_iou": 0.203125, "loss_num": 0.052734375, "loss_xval": 0.671875, "num_input_tokens_seen": 54737272, "step": 873 }, { "epoch": 2.908485856905158, "grad_norm": 16.266172409057617, "learning_rate": 5e-06, "loss": 0.5366, "num_input_tokens_seen": 54798988, "step": 874 }, { "epoch": 2.908485856905158, "loss": 0.5999138355255127, "loss_ce": 0.000304465793306008, "loss_iou": 0.2265625, "loss_num": 0.029541015625, "loss_xval": 0.6015625, "num_input_tokens_seen": 54798988, "step": 874 }, { "epoch": 2.9118136439267888, "grad_norm": 115.90511322021484, "learning_rate": 5e-06, "loss": 0.8299, "num_input_tokens_seen": 54861216, "step": 875 }, { "epoch": 2.9118136439267888, "loss": 0.9268336892127991, "loss_ce": 0.0003199669881723821, "loss_iou": 0.326171875, "loss_num": 0.05517578125, "loss_xval": 0.92578125, "num_input_tokens_seen": 54861216, "step": 875 }, { "epoch": 2.9151414309484194, "grad_norm": 20.038921356201172, "learning_rate": 5e-06, "loss": 0.7256, "num_input_tokens_seen": 54924216, "step": 876 }, { "epoch": 2.9151414309484194, "loss": 0.7849488258361816, "loss_ce": 0.00015877312398515642, "loss_iou": 0.2099609375, "loss_num": 0.0732421875, "loss_xval": 0.78515625, "num_input_tokens_seen": 54924216, "step": 876 }, { "epoch": 2.91846921797005, "grad_norm": 11.608718872070312, "learning_rate": 5e-06, "loss": 0.6507, "num_input_tokens_seen": 54988024, "step": 877 }, { "epoch": 2.91846921797005, "loss": 0.7010060548782349, "loss_ce": 7.835189171601087e-05, "loss_iou": 0.265625, "loss_num": 0.03369140625, "loss_xval": 0.69921875, "num_input_tokens_seen": 54988024, "step": 877 }, { "epoch": 2.9217970049916806, "grad_norm": 12.678232192993164, "learning_rate": 5e-06, "loss": 0.8196, "num_input_tokens_seen": 55050876, "step": 878 }, { "epoch": 2.9217970049916806, "loss": 0.702242374420166, "loss_ce": 0.0004601774853654206, "loss_iou": 0.265625, "loss_num": 0.033935546875, "loss_xval": 0.703125, "num_input_tokens_seen": 55050876, "step": 878 }, { "epoch": 2.925124792013311, "grad_norm": 15.374126434326172, "learning_rate": 5e-06, "loss": 0.6451, "num_input_tokens_seen": 55112848, "step": 879 }, { "epoch": 2.925124792013311, "loss": 0.5901122093200684, "loss_ce": 2.4316601411555894e-05, "loss_iou": 0.1962890625, "loss_num": 0.03955078125, "loss_xval": 0.58984375, "num_input_tokens_seen": 55112848, "step": 879 }, { "epoch": 2.928452579034942, "grad_norm": 15.845576286315918, "learning_rate": 5e-06, "loss": 0.7954, "num_input_tokens_seen": 55174940, "step": 880 }, { "epoch": 2.928452579034942, "loss": 0.5542623996734619, "loss_ce": 6.318293162621558e-05, "loss_iou": 0.1748046875, "loss_num": 0.041015625, "loss_xval": 0.5546875, "num_input_tokens_seen": 55174940, "step": 880 }, { "epoch": 2.9317803660565724, "grad_norm": 22.468929290771484, "learning_rate": 5e-06, "loss": 0.7013, "num_input_tokens_seen": 55237608, "step": 881 }, { "epoch": 2.9317803660565724, "loss": 0.6675467491149902, "loss_ce": 0.00018834351794794202, "loss_iou": 0.2197265625, "loss_num": 0.04541015625, "loss_xval": 0.66796875, "num_input_tokens_seen": 55237608, "step": 881 }, { "epoch": 2.935108153078203, "grad_norm": 23.92618179321289, "learning_rate": 5e-06, "loss": 0.5758, "num_input_tokens_seen": 55301544, "step": 882 }, { "epoch": 2.935108153078203, "loss": 0.6431406736373901, "loss_ce": 0.00019637373043224216, "loss_iou": 0.208984375, "loss_num": 0.045166015625, "loss_xval": 0.64453125, "num_input_tokens_seen": 55301544, "step": 882 }, { "epoch": 2.9384359400998337, "grad_norm": 12.396492004394531, "learning_rate": 5e-06, "loss": 1.0115, "num_input_tokens_seen": 55366092, "step": 883 }, { "epoch": 2.9384359400998337, "loss": 1.2334052324295044, "loss_ce": 0.0014715917641296983, "loss_iou": 0.451171875, "loss_num": 0.06591796875, "loss_xval": 1.234375, "num_input_tokens_seen": 55366092, "step": 883 }, { "epoch": 2.9417637271214643, "grad_norm": 11.810384750366211, "learning_rate": 5e-06, "loss": 0.7243, "num_input_tokens_seen": 55429384, "step": 884 }, { "epoch": 2.9417637271214643, "loss": 0.6694672107696533, "loss_ce": 3.362074130563997e-05, "loss_iou": 0.240234375, "loss_num": 0.03759765625, "loss_xval": 0.66796875, "num_input_tokens_seen": 55429384, "step": 884 }, { "epoch": 2.945091514143095, "grad_norm": 45.88756561279297, "learning_rate": 5e-06, "loss": 0.9161, "num_input_tokens_seen": 55492632, "step": 885 }, { "epoch": 2.945091514143095, "loss": 0.845177412033081, "loss_ce": 0.0004507881822064519, "loss_iou": 0.330078125, "loss_num": 0.036865234375, "loss_xval": 0.84375, "num_input_tokens_seen": 55492632, "step": 885 }, { "epoch": 2.9484193011647255, "grad_norm": 20.243730545043945, "learning_rate": 5e-06, "loss": 0.8116, "num_input_tokens_seen": 55553992, "step": 886 }, { "epoch": 2.9484193011647255, "loss": 0.8613571524620056, "loss_ce": 2.9010727303102612e-05, "loss_iou": 0.259765625, "loss_num": 0.068359375, "loss_xval": 0.859375, "num_input_tokens_seen": 55553992, "step": 886 }, { "epoch": 2.951747088186356, "grad_norm": 10.068111419677734, "learning_rate": 5e-06, "loss": 0.8218, "num_input_tokens_seen": 55616712, "step": 887 }, { "epoch": 2.951747088186356, "loss": 0.8360637426376343, "loss_ce": 0.00012628096737898886, "loss_iou": 0.302734375, "loss_num": 0.0458984375, "loss_xval": 0.8359375, "num_input_tokens_seen": 55616712, "step": 887 }, { "epoch": 2.9550748752079867, "grad_norm": 13.990077018737793, "learning_rate": 5e-06, "loss": 0.9699, "num_input_tokens_seen": 55679592, "step": 888 }, { "epoch": 2.9550748752079867, "loss": 1.069408655166626, "loss_ce": 0.002025844529271126, "loss_iou": 0.388671875, "loss_num": 0.058349609375, "loss_xval": 1.0703125, "num_input_tokens_seen": 55679592, "step": 888 }, { "epoch": 2.9584026622296173, "grad_norm": 19.091962814331055, "learning_rate": 5e-06, "loss": 0.9275, "num_input_tokens_seen": 55742324, "step": 889 }, { "epoch": 2.9584026622296173, "loss": 0.8976117968559265, "loss_ce": 0.0006391539354808629, "loss_iou": 0.349609375, "loss_num": 0.03955078125, "loss_xval": 0.8984375, "num_input_tokens_seen": 55742324, "step": 889 }, { "epoch": 2.961730449251248, "grad_norm": 8.14298152923584, "learning_rate": 5e-06, "loss": 0.5047, "num_input_tokens_seen": 55804324, "step": 890 }, { "epoch": 2.961730449251248, "loss": 0.4935609698295593, "loss_ce": 0.0006410854402929544, "loss_iou": 0.1650390625, "loss_num": 0.03271484375, "loss_xval": 0.4921875, "num_input_tokens_seen": 55804324, "step": 890 }, { "epoch": 2.9650582362728786, "grad_norm": 10.041300773620605, "learning_rate": 5e-06, "loss": 0.8786, "num_input_tokens_seen": 55868808, "step": 891 }, { "epoch": 2.9650582362728786, "loss": 1.0165386199951172, "loss_ce": 0.0006695774500258267, "loss_iou": 0.36328125, "loss_num": 0.0576171875, "loss_xval": 1.015625, "num_input_tokens_seen": 55868808, "step": 891 }, { "epoch": 2.968386023294509, "grad_norm": 10.427651405334473, "learning_rate": 5e-06, "loss": 0.7601, "num_input_tokens_seen": 55932628, "step": 892 }, { "epoch": 2.968386023294509, "loss": 0.5908448696136475, "loss_ce": 2.4524768377887085e-05, "loss_iou": 0.2099609375, "loss_num": 0.0341796875, "loss_xval": 0.58984375, "num_input_tokens_seen": 55932628, "step": 892 }, { "epoch": 2.97171381031614, "grad_norm": 10.290075302124023, "learning_rate": 5e-06, "loss": 0.4848, "num_input_tokens_seen": 55994232, "step": 893 }, { "epoch": 2.97171381031614, "loss": 0.44012752175331116, "loss_ce": 0.00018609606195241213, "loss_iou": 0.12109375, "loss_num": 0.03955078125, "loss_xval": 0.439453125, "num_input_tokens_seen": 55994232, "step": 893 }, { "epoch": 2.9750415973377704, "grad_norm": 6.576986312866211, "learning_rate": 5e-06, "loss": 0.9683, "num_input_tokens_seen": 56056172, "step": 894 }, { "epoch": 2.9750415973377704, "loss": 0.9573631882667542, "loss_ce": 8.776094182394445e-05, "loss_iou": 0.275390625, "loss_num": 0.08154296875, "loss_xval": 0.95703125, "num_input_tokens_seen": 56056172, "step": 894 }, { "epoch": 2.978369384359401, "grad_norm": 13.240888595581055, "learning_rate": 5e-06, "loss": 0.9394, "num_input_tokens_seen": 56119328, "step": 895 }, { "epoch": 2.978369384359401, "loss": 0.8229560852050781, "loss_ce": 0.000812539248727262, "loss_iou": 0.271484375, "loss_num": 0.055908203125, "loss_xval": 0.8203125, "num_input_tokens_seen": 56119328, "step": 895 }, { "epoch": 2.9816971713810316, "grad_norm": 11.05700969696045, "learning_rate": 5e-06, "loss": 0.7593, "num_input_tokens_seen": 56182176, "step": 896 }, { "epoch": 2.9816971713810316, "loss": 0.9214538931846619, "loss_ce": 0.0007995938649401069, "loss_iou": 0.2890625, "loss_num": 0.068359375, "loss_xval": 0.921875, "num_input_tokens_seen": 56182176, "step": 896 }, { "epoch": 2.9850249584026622, "grad_norm": 10.255393981933594, "learning_rate": 5e-06, "loss": 0.6812, "num_input_tokens_seen": 56245376, "step": 897 }, { "epoch": 2.9850249584026622, "loss": 0.628034234046936, "loss_ce": 0.0003486672940198332, "loss_iou": 0.2021484375, "loss_num": 0.04443359375, "loss_xval": 0.62890625, "num_input_tokens_seen": 56245376, "step": 897 }, { "epoch": 2.988352745424293, "grad_norm": 11.19140625, "learning_rate": 5e-06, "loss": 1.0147, "num_input_tokens_seen": 56309880, "step": 898 }, { "epoch": 2.988352745424293, "loss": 0.8816752433776855, "loss_ce": 0.0005717527237720788, "loss_iou": 0.3046875, "loss_num": 0.05419921875, "loss_xval": 0.8828125, "num_input_tokens_seen": 56309880, "step": 898 }, { "epoch": 2.9916805324459235, "grad_norm": 7.155737400054932, "learning_rate": 5e-06, "loss": 0.6519, "num_input_tokens_seen": 56371428, "step": 899 }, { "epoch": 2.9916805324459235, "loss": 0.60736083984375, "loss_ce": 0.0007934325258247554, "loss_iou": 0.150390625, "loss_num": 0.06103515625, "loss_xval": 0.60546875, "num_input_tokens_seen": 56371428, "step": 899 }, { "epoch": 2.995008319467554, "grad_norm": 18.8540096282959, "learning_rate": 5e-06, "loss": 0.6362, "num_input_tokens_seen": 56434084, "step": 900 }, { "epoch": 2.995008319467554, "loss": 0.6303885579109192, "loss_ce": 1.7456186469644308e-05, "loss_iou": 0.18359375, "loss_num": 0.052490234375, "loss_xval": 0.62890625, "num_input_tokens_seen": 56434084, "step": 900 }, { "epoch": 2.9983361064891847, "grad_norm": 13.222797393798828, "learning_rate": 5e-06, "loss": 0.8727, "num_input_tokens_seen": 56497992, "step": 901 }, { "epoch": 2.9983361064891847, "loss": 1.109879732131958, "loss_ce": 0.0006269026780501008, "loss_iou": 0.357421875, "loss_num": 0.0791015625, "loss_xval": 1.109375, "num_input_tokens_seen": 56497992, "step": 901 }, { "epoch": 2.9983361064891847, "loss": 0.8237664699554443, "loss_ce": 0.0002801114460453391, "loss_iou": 0.296875, "loss_num": 0.046142578125, "loss_xval": 0.82421875, "num_input_tokens_seen": 56529896, "step": 901 }, { "epoch": 3.0016638935108153, "grad_norm": 21.796117782592773, "learning_rate": 5e-06, "loss": 0.7756, "num_input_tokens_seen": 56561008, "step": 902 }, { "epoch": 3.0016638935108153, "loss": 0.7274184226989746, "loss_ce": 0.00024560184101574123, "loss_iou": 0.2080078125, "loss_num": 0.062255859375, "loss_xval": 0.7265625, "num_input_tokens_seen": 56561008, "step": 902 }, { "epoch": 3.004991680532446, "grad_norm": 22.889524459838867, "learning_rate": 5e-06, "loss": 0.6265, "num_input_tokens_seen": 56622816, "step": 903 }, { "epoch": 3.004991680532446, "loss": 0.8083924055099487, "loss_ce": 0.0006531348917633295, "loss_iou": 0.251953125, "loss_num": 0.060791015625, "loss_xval": 0.80859375, "num_input_tokens_seen": 56622816, "step": 903 }, { "epoch": 3.0083194675540765, "grad_norm": 34.61276626586914, "learning_rate": 5e-06, "loss": 1.0312, "num_input_tokens_seen": 56684616, "step": 904 }, { "epoch": 3.0083194675540765, "loss": 1.007310390472412, "loss_ce": 0.0014510017354041338, "loss_iou": 0.373046875, "loss_num": 0.05224609375, "loss_xval": 1.0078125, "num_input_tokens_seen": 56684616, "step": 904 }, { "epoch": 3.011647254575707, "grad_norm": 14.508090019226074, "learning_rate": 5e-06, "loss": 0.3584, "num_input_tokens_seen": 56745716, "step": 905 }, { "epoch": 3.011647254575707, "loss": 0.25656479597091675, "loss_ce": 0.00024766643764451146, "loss_iou": 0.0, "loss_num": 0.05126953125, "loss_xval": 0.255859375, "num_input_tokens_seen": 56745716, "step": 905 }, { "epoch": 3.0149750415973378, "grad_norm": 10.856987953186035, "learning_rate": 5e-06, "loss": 0.623, "num_input_tokens_seen": 56807540, "step": 906 }, { "epoch": 3.0149750415973378, "loss": 0.8287237286567688, "loss_ce": 4.9377267714589834e-05, "loss_iou": 0.28515625, "loss_num": 0.052001953125, "loss_xval": 0.828125, "num_input_tokens_seen": 56807540, "step": 906 }, { "epoch": 3.0183028286189684, "grad_norm": 14.01893138885498, "learning_rate": 5e-06, "loss": 0.8038, "num_input_tokens_seen": 56870060, "step": 907 }, { "epoch": 3.0183028286189684, "loss": 0.820210337638855, "loss_ce": 0.0011185152688995004, "loss_iou": 0.310546875, "loss_num": 0.03955078125, "loss_xval": 0.8203125, "num_input_tokens_seen": 56870060, "step": 907 }, { "epoch": 3.021630615640599, "grad_norm": 6.372365474700928, "learning_rate": 5e-06, "loss": 0.4368, "num_input_tokens_seen": 56933120, "step": 908 }, { "epoch": 3.021630615640599, "loss": 0.6791857481002808, "loss_ce": 0.00023066448920872062, "loss_iou": 0.263671875, "loss_num": 0.02978515625, "loss_xval": 0.6796875, "num_input_tokens_seen": 56933120, "step": 908 }, { "epoch": 3.0249584026622296, "grad_norm": 16.884952545166016, "learning_rate": 5e-06, "loss": 0.8391, "num_input_tokens_seen": 56996456, "step": 909 }, { "epoch": 3.0249584026622296, "loss": 0.8749474287033081, "loss_ce": 0.0011681468458846211, "loss_iou": 0.33984375, "loss_num": 0.038818359375, "loss_xval": 0.875, "num_input_tokens_seen": 56996456, "step": 909 }, { "epoch": 3.02828618968386, "grad_norm": 17.381507873535156, "learning_rate": 5e-06, "loss": 0.7329, "num_input_tokens_seen": 57059188, "step": 910 }, { "epoch": 3.02828618968386, "loss": 0.370250940322876, "loss_ce": 1.1703556083375588e-05, "loss_iou": 0.1474609375, "loss_num": 0.0152587890625, "loss_xval": 0.37109375, "num_input_tokens_seen": 57059188, "step": 910 }, { "epoch": 3.031613976705491, "grad_norm": 20.502656936645508, "learning_rate": 5e-06, "loss": 0.7003, "num_input_tokens_seen": 57119852, "step": 911 }, { "epoch": 3.031613976705491, "loss": 0.8628953099250793, "loss_ce": 0.00034649402368813753, "loss_iou": 0.3046875, "loss_num": 0.05078125, "loss_xval": 0.86328125, "num_input_tokens_seen": 57119852, "step": 911 }, { "epoch": 3.0349417637271214, "grad_norm": 15.375128746032715, "learning_rate": 5e-06, "loss": 0.6873, "num_input_tokens_seen": 57181036, "step": 912 }, { "epoch": 3.0349417637271214, "loss": 0.5885519981384277, "loss_ce": 0.0009055188274942338, "loss_iou": 0.1953125, "loss_num": 0.039306640625, "loss_xval": 0.5859375, "num_input_tokens_seen": 57181036, "step": 912 }, { "epoch": 3.038269550748752, "grad_norm": 18.92561912536621, "learning_rate": 5e-06, "loss": 0.7695, "num_input_tokens_seen": 57243680, "step": 913 }, { "epoch": 3.038269550748752, "loss": 0.5670027732849121, "loss_ce": 0.00016924660303629935, "loss_iou": 0.1181640625, "loss_num": 0.06591796875, "loss_xval": 0.56640625, "num_input_tokens_seen": 57243680, "step": 913 }, { "epoch": 3.0415973377703827, "grad_norm": 21.369565963745117, "learning_rate": 5e-06, "loss": 0.7097, "num_input_tokens_seen": 57307356, "step": 914 }, { "epoch": 3.0415973377703827, "loss": 0.744112491607666, "loss_ce": 0.00021595595171675086, "loss_iou": 0.287109375, "loss_num": 0.03369140625, "loss_xval": 0.7421875, "num_input_tokens_seen": 57307356, "step": 914 }, { "epoch": 3.0449251247920133, "grad_norm": 8.696249961853027, "learning_rate": 5e-06, "loss": 0.7384, "num_input_tokens_seen": 57371044, "step": 915 }, { "epoch": 3.0449251247920133, "loss": 0.7669232487678528, "loss_ce": 0.0001996213395614177, "loss_iou": 0.271484375, "loss_num": 0.044677734375, "loss_xval": 0.765625, "num_input_tokens_seen": 57371044, "step": 915 }, { "epoch": 3.048252911813644, "grad_norm": 14.845057487487793, "learning_rate": 5e-06, "loss": 0.7057, "num_input_tokens_seen": 57434152, "step": 916 }, { "epoch": 3.048252911813644, "loss": 0.5545109510421753, "loss_ce": 0.0005558902630582452, "loss_iou": 0.2060546875, "loss_num": 0.0281982421875, "loss_xval": 0.5546875, "num_input_tokens_seen": 57434152, "step": 916 }, { "epoch": 3.0515806988352745, "grad_norm": 29.272449493408203, "learning_rate": 5e-06, "loss": 0.7314, "num_input_tokens_seen": 57496376, "step": 917 }, { "epoch": 3.0515806988352745, "loss": 0.7164074182510376, "loss_ce": 0.0013195187784731388, "loss_iou": 0.25390625, "loss_num": 0.041748046875, "loss_xval": 0.71484375, "num_input_tokens_seen": 57496376, "step": 917 }, { "epoch": 3.054908485856905, "grad_norm": 13.444619178771973, "learning_rate": 5e-06, "loss": 0.6064, "num_input_tokens_seen": 57559212, "step": 918 }, { "epoch": 3.054908485856905, "loss": 0.4947461187839508, "loss_ce": 0.00011721412010956556, "loss_iou": 0.1474609375, "loss_num": 0.039794921875, "loss_xval": 0.494140625, "num_input_tokens_seen": 57559212, "step": 918 }, { "epoch": 3.0582362728785357, "grad_norm": 21.77284812927246, "learning_rate": 5e-06, "loss": 0.8155, "num_input_tokens_seen": 57622536, "step": 919 }, { "epoch": 3.0582362728785357, "loss": 0.889853835105896, "loss_ce": 0.002158483723178506, "loss_iou": 0.298828125, "loss_num": 0.05810546875, "loss_xval": 0.88671875, "num_input_tokens_seen": 57622536, "step": 919 }, { "epoch": 3.0615640599001663, "grad_norm": 24.70244026184082, "learning_rate": 5e-06, "loss": 0.9231, "num_input_tokens_seen": 57686580, "step": 920 }, { "epoch": 3.0615640599001663, "loss": 1.1854498386383057, "loss_ce": 2.4984303308883682e-05, "loss_iou": 0.46484375, "loss_num": 0.051513671875, "loss_xval": 1.1875, "num_input_tokens_seen": 57686580, "step": 920 }, { "epoch": 3.064891846921797, "grad_norm": 34.745235443115234, "learning_rate": 5e-06, "loss": 0.7534, "num_input_tokens_seen": 57749852, "step": 921 }, { "epoch": 3.064891846921797, "loss": 0.9552972912788391, "loss_ce": 0.0011957723181694746, "loss_iou": 0.3515625, "loss_num": 0.05029296875, "loss_xval": 0.953125, "num_input_tokens_seen": 57749852, "step": 921 }, { "epoch": 3.0682196339434276, "grad_norm": 17.46992301940918, "learning_rate": 5e-06, "loss": 0.8771, "num_input_tokens_seen": 57813348, "step": 922 }, { "epoch": 3.0682196339434276, "loss": 0.910987138748169, "loss_ce": 0.0009529738454148173, "loss_iou": 0.359375, "loss_num": 0.038818359375, "loss_xval": 0.91015625, "num_input_tokens_seen": 57813348, "step": 922 }, { "epoch": 3.071547420965058, "grad_norm": 16.52344512939453, "learning_rate": 5e-06, "loss": 0.8762, "num_input_tokens_seen": 57878648, "step": 923 }, { "epoch": 3.071547420965058, "loss": 0.8385041952133179, "loss_ce": 0.0014680216554552317, "loss_iou": 0.3203125, "loss_num": 0.038818359375, "loss_xval": 0.8359375, "num_input_tokens_seen": 57878648, "step": 923 }, { "epoch": 3.074875207986689, "grad_norm": 12.111948013305664, "learning_rate": 5e-06, "loss": 0.8092, "num_input_tokens_seen": 57941344, "step": 924 }, { "epoch": 3.074875207986689, "loss": 0.6001216173171997, "loss_ce": 0.0006343690911307931, "loss_iou": 0.203125, "loss_num": 0.038330078125, "loss_xval": 0.59765625, "num_input_tokens_seen": 57941344, "step": 924 }, { "epoch": 3.0782029950083194, "grad_norm": 24.53022003173828, "learning_rate": 5e-06, "loss": 0.8229, "num_input_tokens_seen": 58005836, "step": 925 }, { "epoch": 3.0782029950083194, "loss": 0.788398265838623, "loss_ce": 6.81536112097092e-05, "loss_iou": 0.26171875, "loss_num": 0.052978515625, "loss_xval": 0.7890625, "num_input_tokens_seen": 58005836, "step": 925 }, { "epoch": 3.08153078202995, "grad_norm": 25.884660720825195, "learning_rate": 5e-06, "loss": 0.7543, "num_input_tokens_seen": 58069292, "step": 926 }, { "epoch": 3.08153078202995, "loss": 0.7262783050537109, "loss_ce": 2.1044981622253545e-05, "loss_iou": 0.271484375, "loss_num": 0.03662109375, "loss_xval": 0.7265625, "num_input_tokens_seen": 58069292, "step": 926 }, { "epoch": 3.0848585690515806, "grad_norm": 10.537081718444824, "learning_rate": 5e-06, "loss": 0.4332, "num_input_tokens_seen": 58131228, "step": 927 }, { "epoch": 3.0848585690515806, "loss": 0.5137090682983398, "loss_ce": 0.00015923853788990527, "loss_iou": 0.18359375, "loss_num": 0.029296875, "loss_xval": 0.51171875, "num_input_tokens_seen": 58131228, "step": 927 }, { "epoch": 3.0881863560732112, "grad_norm": 12.702507972717285, "learning_rate": 5e-06, "loss": 0.7231, "num_input_tokens_seen": 58194740, "step": 928 }, { "epoch": 3.0881863560732112, "loss": 0.586982250213623, "loss_ce": 0.00025130249559879303, "loss_iou": 0.201171875, "loss_num": 0.037109375, "loss_xval": 0.5859375, "num_input_tokens_seen": 58194740, "step": 928 }, { "epoch": 3.091514143094842, "grad_norm": 24.366914749145508, "learning_rate": 5e-06, "loss": 0.8089, "num_input_tokens_seen": 58257992, "step": 929 }, { "epoch": 3.091514143094842, "loss": 0.962645411491394, "loss_ce": 0.0009755539940670133, "loss_iou": 0.3515625, "loss_num": 0.0517578125, "loss_xval": 0.9609375, "num_input_tokens_seen": 58257992, "step": 929 }, { "epoch": 3.0948419301164725, "grad_norm": 20.55196762084961, "learning_rate": 5e-06, "loss": 0.7118, "num_input_tokens_seen": 58321524, "step": 930 }, { "epoch": 3.0948419301164725, "loss": 0.5327604413032532, "loss_ce": 0.0005338864284567535, "loss_iou": 0.15234375, "loss_num": 0.045166015625, "loss_xval": 0.53125, "num_input_tokens_seen": 58321524, "step": 930 }, { "epoch": 3.098169717138103, "grad_norm": 12.970935821533203, "learning_rate": 5e-06, "loss": 0.7816, "num_input_tokens_seen": 58383840, "step": 931 }, { "epoch": 3.098169717138103, "loss": 0.6380434036254883, "loss_ce": 0.00010397518053650856, "loss_iou": 0.2490234375, "loss_num": 0.0281982421875, "loss_xval": 0.63671875, "num_input_tokens_seen": 58383840, "step": 931 }, { "epoch": 3.1014975041597337, "grad_norm": 22.602128982543945, "learning_rate": 5e-06, "loss": 0.7884, "num_input_tokens_seen": 58446640, "step": 932 }, { "epoch": 3.1014975041597337, "loss": 0.8503059148788452, "loss_ce": 0.0014289830578491092, "loss_iou": 0.25, "loss_num": 0.06982421875, "loss_xval": 0.84765625, "num_input_tokens_seen": 58446640, "step": 932 }, { "epoch": 3.1048252911813643, "grad_norm": 13.73291301727295, "learning_rate": 5e-06, "loss": 0.6752, "num_input_tokens_seen": 58510196, "step": 933 }, { "epoch": 3.1048252911813643, "loss": 0.6011087894439697, "loss_ce": 0.000278731546131894, "loss_iou": 0.1953125, "loss_num": 0.0419921875, "loss_xval": 0.6015625, "num_input_tokens_seen": 58510196, "step": 933 }, { "epoch": 3.108153078202995, "grad_norm": 13.253497123718262, "learning_rate": 5e-06, "loss": 0.6881, "num_input_tokens_seen": 58571932, "step": 934 }, { "epoch": 3.108153078202995, "loss": 0.5735898613929749, "loss_ce": 0.0011411437299102545, "loss_iou": 0.15625, "loss_num": 0.0517578125, "loss_xval": 0.57421875, "num_input_tokens_seen": 58571932, "step": 934 }, { "epoch": 3.1114808652246255, "grad_norm": 16.51205062866211, "learning_rate": 5e-06, "loss": 0.6767, "num_input_tokens_seen": 58633804, "step": 935 }, { "epoch": 3.1114808652246255, "loss": 0.7584989070892334, "loss_ce": 7.602832920383662e-05, "loss_iou": 0.271484375, "loss_num": 0.04296875, "loss_xval": 0.7578125, "num_input_tokens_seen": 58633804, "step": 935 }, { "epoch": 3.114808652246256, "grad_norm": 12.61417007446289, "learning_rate": 5e-06, "loss": 0.9212, "num_input_tokens_seen": 58698112, "step": 936 }, { "epoch": 3.114808652246256, "loss": 1.0310556888580322, "loss_ce": 0.0010264001321047544, "loss_iou": 0.40234375, "loss_num": 0.045654296875, "loss_xval": 1.03125, "num_input_tokens_seen": 58698112, "step": 936 }, { "epoch": 3.1181364392678868, "grad_norm": 24.36832046508789, "learning_rate": 5e-06, "loss": 0.8457, "num_input_tokens_seen": 58759328, "step": 937 }, { "epoch": 3.1181364392678868, "loss": 0.8562856316566467, "loss_ce": 0.000816886720713228, "loss_iou": 0.3359375, "loss_num": 0.037109375, "loss_xval": 0.85546875, "num_input_tokens_seen": 58759328, "step": 937 }, { "epoch": 3.1214642262895174, "grad_norm": 14.088415145874023, "learning_rate": 5e-06, "loss": 0.7334, "num_input_tokens_seen": 58822164, "step": 938 }, { "epoch": 3.1214642262895174, "loss": 0.488110214471817, "loss_ce": 0.00019515973690431565, "loss_iou": 0.173828125, "loss_num": 0.0281982421875, "loss_xval": 0.48828125, "num_input_tokens_seen": 58822164, "step": 938 }, { "epoch": 3.124792013311148, "grad_norm": 27.538347244262695, "learning_rate": 5e-06, "loss": 0.6484, "num_input_tokens_seen": 58883820, "step": 939 }, { "epoch": 3.124792013311148, "loss": 0.5764639973640442, "loss_ce": 4.79936134070158e-05, "loss_iou": 0.220703125, "loss_num": 0.0269775390625, "loss_xval": 0.578125, "num_input_tokens_seen": 58883820, "step": 939 }, { "epoch": 3.1281198003327786, "grad_norm": 14.995172500610352, "learning_rate": 5e-06, "loss": 0.6867, "num_input_tokens_seen": 58945412, "step": 940 }, { "epoch": 3.1281198003327786, "loss": 0.6922029256820679, "loss_ce": 6.422427395591512e-05, "loss_iou": 0.26953125, "loss_num": 0.0303955078125, "loss_xval": 0.69140625, "num_input_tokens_seen": 58945412, "step": 940 }, { "epoch": 3.131447587354409, "grad_norm": 22.634292602539062, "learning_rate": 5e-06, "loss": 0.7909, "num_input_tokens_seen": 59007688, "step": 941 }, { "epoch": 3.131447587354409, "loss": 1.0647785663604736, "loss_ce": 0.0005695015424862504, "loss_iou": 0.431640625, "loss_num": 0.040283203125, "loss_xval": 1.0625, "num_input_tokens_seen": 59007688, "step": 941 }, { "epoch": 3.13477537437604, "grad_norm": 12.697657585144043, "learning_rate": 5e-06, "loss": 0.7515, "num_input_tokens_seen": 59070980, "step": 942 }, { "epoch": 3.13477537437604, "loss": 0.6128317713737488, "loss_ce": 0.00028294071671552956, "loss_iou": 0.2294921875, "loss_num": 0.0308837890625, "loss_xval": 0.61328125, "num_input_tokens_seen": 59070980, "step": 942 }, { "epoch": 3.1381031613976704, "grad_norm": 10.244808197021484, "learning_rate": 5e-06, "loss": 0.5148, "num_input_tokens_seen": 59132444, "step": 943 }, { "epoch": 3.1381031613976704, "loss": 0.5928980112075806, "loss_ce": 0.0006128327222540975, "loss_iou": 0.1748046875, "loss_num": 0.048095703125, "loss_xval": 0.59375, "num_input_tokens_seen": 59132444, "step": 943 }, { "epoch": 3.141430948419301, "grad_norm": 51.340370178222656, "learning_rate": 5e-06, "loss": 0.7432, "num_input_tokens_seen": 59194092, "step": 944 }, { "epoch": 3.141430948419301, "loss": 0.688306450843811, "loss_ce": 0.00025715091032907367, "loss_iou": 0.2158203125, "loss_num": 0.05126953125, "loss_xval": 0.6875, "num_input_tokens_seen": 59194092, "step": 944 }, { "epoch": 3.1447587354409317, "grad_norm": 28.75864601135254, "learning_rate": 5e-06, "loss": 0.9202, "num_input_tokens_seen": 59257976, "step": 945 }, { "epoch": 3.1447587354409317, "loss": 0.8468874096870422, "loss_ce": 8.563219307688996e-05, "loss_iou": 0.326171875, "loss_num": 0.0390625, "loss_xval": 0.84765625, "num_input_tokens_seen": 59257976, "step": 945 }, { "epoch": 3.1480865224625623, "grad_norm": 14.228109359741211, "learning_rate": 5e-06, "loss": 0.7687, "num_input_tokens_seen": 59319416, "step": 946 }, { "epoch": 3.1480865224625623, "loss": 0.7417944073677063, "loss_ce": 9.51740876189433e-05, "loss_iou": 0.2890625, "loss_num": 0.032958984375, "loss_xval": 0.7421875, "num_input_tokens_seen": 59319416, "step": 946 }, { "epoch": 3.151414309484193, "grad_norm": 42.648712158203125, "learning_rate": 5e-06, "loss": 0.5547, "num_input_tokens_seen": 59382368, "step": 947 }, { "epoch": 3.151414309484193, "loss": 0.567420244216919, "loss_ce": 0.00028155455947853625, "loss_iou": 0.2314453125, "loss_num": 0.020751953125, "loss_xval": 0.56640625, "num_input_tokens_seen": 59382368, "step": 947 }, { "epoch": 3.1547420965058235, "grad_norm": 13.58983039855957, "learning_rate": 5e-06, "loss": 0.6281, "num_input_tokens_seen": 59445036, "step": 948 }, { "epoch": 3.1547420965058235, "loss": 0.7668274641036987, "loss_ce": 0.0007142096292227507, "loss_iou": 0.314453125, "loss_num": 0.027099609375, "loss_xval": 0.765625, "num_input_tokens_seen": 59445036, "step": 948 }, { "epoch": 3.158069883527454, "grad_norm": 15.522891998291016, "learning_rate": 5e-06, "loss": 0.7467, "num_input_tokens_seen": 59507760, "step": 949 }, { "epoch": 3.158069883527454, "loss": 0.8365803360939026, "loss_ce": 0.0003987147647421807, "loss_iou": 0.294921875, "loss_num": 0.04931640625, "loss_xval": 0.8359375, "num_input_tokens_seen": 59507760, "step": 949 }, { "epoch": 3.1613976705490847, "grad_norm": 12.101635932922363, "learning_rate": 5e-06, "loss": 0.7195, "num_input_tokens_seen": 59569884, "step": 950 }, { "epoch": 3.1613976705490847, "loss": 0.9462418556213379, "loss_ce": 0.0015396635280922055, "loss_iou": 0.337890625, "loss_num": 0.053955078125, "loss_xval": 0.9453125, "num_input_tokens_seen": 59569884, "step": 950 }, { "epoch": 3.1647254575707153, "grad_norm": 12.66640853881836, "learning_rate": 5e-06, "loss": 0.546, "num_input_tokens_seen": 59631576, "step": 951 }, { "epoch": 3.1647254575707153, "loss": 0.46537649631500244, "loss_ce": 0.0010210101027041674, "loss_iou": 0.1298828125, "loss_num": 0.040771484375, "loss_xval": 0.46484375, "num_input_tokens_seen": 59631576, "step": 951 }, { "epoch": 3.168053244592346, "grad_norm": 16.23946189880371, "learning_rate": 5e-06, "loss": 0.8974, "num_input_tokens_seen": 59694204, "step": 952 }, { "epoch": 3.168053244592346, "loss": 1.0476288795471191, "loss_ce": 2.1510506485356018e-05, "loss_iou": 0.419921875, "loss_num": 0.041259765625, "loss_xval": 1.046875, "num_input_tokens_seen": 59694204, "step": 952 }, { "epoch": 3.1713810316139766, "grad_norm": 12.098518371582031, "learning_rate": 5e-06, "loss": 0.6835, "num_input_tokens_seen": 59757120, "step": 953 }, { "epoch": 3.1713810316139766, "loss": 0.802497148513794, "loss_ce": 6.950212991796434e-06, "loss_iou": 0.2412109375, "loss_num": 0.06396484375, "loss_xval": 0.80078125, "num_input_tokens_seen": 59757120, "step": 953 }, { "epoch": 3.174708818635607, "grad_norm": 14.556507110595703, "learning_rate": 5e-06, "loss": 0.6771, "num_input_tokens_seen": 59820932, "step": 954 }, { "epoch": 3.174708818635607, "loss": 0.6860435009002686, "loss_ce": 0.0004966831766068935, "loss_iou": 0.2265625, "loss_num": 0.046142578125, "loss_xval": 0.6875, "num_input_tokens_seen": 59820932, "step": 954 }, { "epoch": 3.178036605657238, "grad_norm": 14.193598747253418, "learning_rate": 5e-06, "loss": 0.6814, "num_input_tokens_seen": 59881848, "step": 955 }, { "epoch": 3.178036605657238, "loss": 0.6385188698768616, "loss_ce": 0.0003352407948113978, "loss_iou": 0.208984375, "loss_num": 0.0439453125, "loss_xval": 0.63671875, "num_input_tokens_seen": 59881848, "step": 955 }, { "epoch": 3.1813643926788684, "grad_norm": 16.13985824584961, "learning_rate": 5e-06, "loss": 0.8029, "num_input_tokens_seen": 59944936, "step": 956 }, { "epoch": 3.1813643926788684, "loss": 0.7531977891921997, "loss_ce": 2.3958233214216307e-05, "loss_iou": 0.248046875, "loss_num": 0.05126953125, "loss_xval": 0.75390625, "num_input_tokens_seen": 59944936, "step": 956 }, { "epoch": 3.184692179700499, "grad_norm": 12.17919921875, "learning_rate": 5e-06, "loss": 0.6205, "num_input_tokens_seen": 60007368, "step": 957 }, { "epoch": 3.184692179700499, "loss": 0.6794863343238831, "loss_ce": 0.0002871401375159621, "loss_iou": 0.17578125, "loss_num": 0.0654296875, "loss_xval": 0.6796875, "num_input_tokens_seen": 60007368, "step": 957 }, { "epoch": 3.1880199667221296, "grad_norm": 9.853569030761719, "learning_rate": 5e-06, "loss": 0.8028, "num_input_tokens_seen": 60071296, "step": 958 }, { "epoch": 3.1880199667221296, "loss": 0.9358205795288086, "loss_ce": 0.0014943924034014344, "loss_iou": 0.33203125, "loss_num": 0.05419921875, "loss_xval": 0.93359375, "num_input_tokens_seen": 60071296, "step": 958 }, { "epoch": 3.1913477537437602, "grad_norm": 16.247297286987305, "learning_rate": 5e-06, "loss": 0.7029, "num_input_tokens_seen": 60134888, "step": 959 }, { "epoch": 3.1913477537437602, "loss": 0.6146410703659058, "loss_ce": 0.00026120367692783475, "loss_iou": 0.203125, "loss_num": 0.041748046875, "loss_xval": 0.61328125, "num_input_tokens_seen": 60134888, "step": 959 }, { "epoch": 3.194675540765391, "grad_norm": 25.35675811767578, "learning_rate": 5e-06, "loss": 0.9464, "num_input_tokens_seen": 60198456, "step": 960 }, { "epoch": 3.194675540765391, "loss": 1.11307954788208, "loss_ce": 4.250149504514411e-05, "loss_iou": 0.39453125, "loss_num": 0.0654296875, "loss_xval": 1.109375, "num_input_tokens_seen": 60198456, "step": 960 }, { "epoch": 3.1980033277870215, "grad_norm": 14.904539108276367, "learning_rate": 5e-06, "loss": 0.7424, "num_input_tokens_seen": 60260792, "step": 961 }, { "epoch": 3.1980033277870215, "loss": 0.6098666787147522, "loss_ce": 0.0002475565706845373, "loss_iou": 0.1845703125, "loss_num": 0.04833984375, "loss_xval": 0.609375, "num_input_tokens_seen": 60260792, "step": 961 }, { "epoch": 3.201331114808652, "grad_norm": 11.232295036315918, "learning_rate": 5e-06, "loss": 0.595, "num_input_tokens_seen": 60322996, "step": 962 }, { "epoch": 3.201331114808652, "loss": 0.48930224776268005, "loss_ce": 1.3895768461225089e-05, "loss_iou": 0.189453125, "loss_num": 0.022216796875, "loss_xval": 0.490234375, "num_input_tokens_seen": 60322996, "step": 962 }, { "epoch": 3.2046589018302827, "grad_norm": 8.756627082824707, "learning_rate": 5e-06, "loss": 0.5685, "num_input_tokens_seen": 60385912, "step": 963 }, { "epoch": 3.2046589018302827, "loss": 0.709077000617981, "loss_ce": 9.269396105082706e-05, "loss_iou": 0.265625, "loss_num": 0.03564453125, "loss_xval": 0.7109375, "num_input_tokens_seen": 60385912, "step": 963 }, { "epoch": 3.2079866888519133, "grad_norm": 21.235980987548828, "learning_rate": 5e-06, "loss": 0.6594, "num_input_tokens_seen": 60448868, "step": 964 }, { "epoch": 3.2079866888519133, "loss": 0.5883943438529968, "loss_ce": 1.5452342267963104e-05, "loss_iou": 0.205078125, "loss_num": 0.035400390625, "loss_xval": 0.58984375, "num_input_tokens_seen": 60448868, "step": 964 }, { "epoch": 3.211314475873544, "grad_norm": 14.96311092376709, "learning_rate": 5e-06, "loss": 0.82, "num_input_tokens_seen": 60510212, "step": 965 }, { "epoch": 3.211314475873544, "loss": 0.6608465313911438, "loss_ce": 1.8919981812359765e-05, "loss_iou": 0.220703125, "loss_num": 0.0439453125, "loss_xval": 0.66015625, "num_input_tokens_seen": 60510212, "step": 965 }, { "epoch": 3.2146422628951745, "grad_norm": 14.463862419128418, "learning_rate": 5e-06, "loss": 0.8046, "num_input_tokens_seen": 60573960, "step": 966 }, { "epoch": 3.2146422628951745, "loss": 1.0216771364212036, "loss_ce": 0.0009251884766854346, "loss_iou": 0.388671875, "loss_num": 0.04833984375, "loss_xval": 1.0234375, "num_input_tokens_seen": 60573960, "step": 966 }, { "epoch": 3.217970049916805, "grad_norm": 10.740462303161621, "learning_rate": 5e-06, "loss": 0.8366, "num_input_tokens_seen": 60636348, "step": 967 }, { "epoch": 3.217970049916805, "loss": 0.7922353744506836, "loss_ce": 0.0010976394405588508, "loss_iou": 0.25, "loss_num": 0.058349609375, "loss_xval": 0.79296875, "num_input_tokens_seen": 60636348, "step": 967 }, { "epoch": 3.2212978369384357, "grad_norm": 8.795841217041016, "learning_rate": 5e-06, "loss": 0.6293, "num_input_tokens_seen": 60698992, "step": 968 }, { "epoch": 3.2212978369384357, "loss": 0.6731522083282471, "loss_ce": 0.0006668201531283557, "loss_iou": 0.25390625, "loss_num": 0.033203125, "loss_xval": 0.671875, "num_input_tokens_seen": 60698992, "step": 968 }, { "epoch": 3.2246256239600664, "grad_norm": 7.10551643371582, "learning_rate": 5e-06, "loss": 0.5305, "num_input_tokens_seen": 60761808, "step": 969 }, { "epoch": 3.2246256239600664, "loss": 0.6101760268211365, "loss_ce": 0.002510027028620243, "loss_iou": 0.208984375, "loss_num": 0.03759765625, "loss_xval": 0.609375, "num_input_tokens_seen": 60761808, "step": 969 }, { "epoch": 3.227953410981697, "grad_norm": 18.331600189208984, "learning_rate": 5e-06, "loss": 0.9276, "num_input_tokens_seen": 60823748, "step": 970 }, { "epoch": 3.227953410981697, "loss": 1.0038847923278809, "loss_ce": 0.00046675364137627184, "loss_iou": 0.271484375, "loss_num": 0.09228515625, "loss_xval": 1.0, "num_input_tokens_seen": 60823748, "step": 970 }, { "epoch": 3.2312811980033276, "grad_norm": 16.485706329345703, "learning_rate": 5e-06, "loss": 0.633, "num_input_tokens_seen": 60885436, "step": 971 }, { "epoch": 3.2312811980033276, "loss": 0.8716785311698914, "loss_ce": 0.001561349374242127, "loss_iou": 0.287109375, "loss_num": 0.0595703125, "loss_xval": 0.87109375, "num_input_tokens_seen": 60885436, "step": 971 }, { "epoch": 3.234608985024958, "grad_norm": 8.577469825744629, "learning_rate": 5e-06, "loss": 0.6411, "num_input_tokens_seen": 60947976, "step": 972 }, { "epoch": 3.234608985024958, "loss": 0.6187483668327332, "loss_ce": 9.600786142982543e-05, "loss_iou": 0.2158203125, "loss_num": 0.03759765625, "loss_xval": 0.6171875, "num_input_tokens_seen": 60947976, "step": 972 }, { "epoch": 3.237936772046589, "grad_norm": 8.472862243652344, "learning_rate": 5e-06, "loss": 0.7079, "num_input_tokens_seen": 61011980, "step": 973 }, { "epoch": 3.237936772046589, "loss": 0.8590962886810303, "loss_ce": 0.0003316626534797251, "loss_iou": 0.330078125, "loss_num": 0.039794921875, "loss_xval": 0.859375, "num_input_tokens_seen": 61011980, "step": 973 }, { "epoch": 3.2412645590682194, "grad_norm": 12.240633010864258, "learning_rate": 5e-06, "loss": 0.7009, "num_input_tokens_seen": 61075356, "step": 974 }, { "epoch": 3.2412645590682194, "loss": 0.8444112539291382, "loss_ce": 5.095464803162031e-05, "loss_iou": 0.287109375, "loss_num": 0.05419921875, "loss_xval": 0.84375, "num_input_tokens_seen": 61075356, "step": 974 }, { "epoch": 3.24459234608985, "grad_norm": 9.924382209777832, "learning_rate": 5e-06, "loss": 0.7248, "num_input_tokens_seen": 61140072, "step": 975 }, { "epoch": 3.24459234608985, "loss": 0.9121953248977661, "loss_ce": 0.0008183298632502556, "loss_iou": 0.341796875, "loss_num": 0.04541015625, "loss_xval": 0.91015625, "num_input_tokens_seen": 61140072, "step": 975 }, { "epoch": 3.2479201331114806, "grad_norm": 13.432154655456543, "learning_rate": 5e-06, "loss": 0.6591, "num_input_tokens_seen": 61201616, "step": 976 }, { "epoch": 3.2479201331114806, "loss": 0.5972070693969727, "loss_ce": 0.0008936430094763637, "loss_iou": 0.2333984375, "loss_num": 0.025634765625, "loss_xval": 0.59765625, "num_input_tokens_seen": 61201616, "step": 976 }, { "epoch": 3.2512479201331113, "grad_norm": 22.53764533996582, "learning_rate": 5e-06, "loss": 1.0177, "num_input_tokens_seen": 61265076, "step": 977 }, { "epoch": 3.2512479201331113, "loss": 1.1379694938659668, "loss_ce": 2.999762727995403e-05, "loss_iou": 0.435546875, "loss_num": 0.053466796875, "loss_xval": 1.140625, "num_input_tokens_seen": 61265076, "step": 977 }, { "epoch": 3.254575707154742, "grad_norm": 27.144577026367188, "learning_rate": 5e-06, "loss": 0.9568, "num_input_tokens_seen": 61327436, "step": 978 }, { "epoch": 3.254575707154742, "loss": 0.7128638029098511, "loss_ce": 0.0007055765599943697, "loss_iou": 0.24609375, "loss_num": 0.044189453125, "loss_xval": 0.7109375, "num_input_tokens_seen": 61327436, "step": 978 }, { "epoch": 3.2579034941763725, "grad_norm": 15.161336898803711, "learning_rate": 5e-06, "loss": 0.6589, "num_input_tokens_seen": 61390456, "step": 979 }, { "epoch": 3.2579034941763725, "loss": 0.6246421337127686, "loss_ce": 8.365123903786298e-06, "loss_iou": 0.216796875, "loss_num": 0.038330078125, "loss_xval": 0.625, "num_input_tokens_seen": 61390456, "step": 979 }, { "epoch": 3.261231281198003, "grad_norm": 7.792651653289795, "learning_rate": 5e-06, "loss": 0.4651, "num_input_tokens_seen": 61452176, "step": 980 }, { "epoch": 3.261231281198003, "loss": 0.4168563783168793, "loss_ce": 0.0003524985513649881, "loss_iou": 0.10302734375, "loss_num": 0.042236328125, "loss_xval": 0.416015625, "num_input_tokens_seen": 61452176, "step": 980 }, { "epoch": 3.2645590682196337, "grad_norm": 12.54578971862793, "learning_rate": 5e-06, "loss": 0.728, "num_input_tokens_seen": 61515320, "step": 981 }, { "epoch": 3.2645590682196337, "loss": 0.8815405964851379, "loss_ce": 0.0011695139110088348, "loss_iou": 0.263671875, "loss_num": 0.07080078125, "loss_xval": 0.87890625, "num_input_tokens_seen": 61515320, "step": 981 }, { "epoch": 3.2678868552412643, "grad_norm": 14.04428768157959, "learning_rate": 5e-06, "loss": 0.6063, "num_input_tokens_seen": 61577744, "step": 982 }, { "epoch": 3.2678868552412643, "loss": 0.4867231547832489, "loss_ce": 0.00027296593179926276, "loss_iou": 0.12158203125, "loss_num": 0.048583984375, "loss_xval": 0.486328125, "num_input_tokens_seen": 61577744, "step": 982 }, { "epoch": 3.271214642262895, "grad_norm": 17.12869644165039, "learning_rate": 5e-06, "loss": 0.7536, "num_input_tokens_seen": 61640572, "step": 983 }, { "epoch": 3.271214642262895, "loss": 0.7018469572067261, "loss_ce": 0.0004309040086809546, "loss_iou": 0.2734375, "loss_num": 0.0308837890625, "loss_xval": 0.703125, "num_input_tokens_seen": 61640572, "step": 983 }, { "epoch": 3.2745424292845255, "grad_norm": 41.85936737060547, "learning_rate": 5e-06, "loss": 0.8743, "num_input_tokens_seen": 61703936, "step": 984 }, { "epoch": 3.2745424292845255, "loss": 0.918325662612915, "loss_ce": 0.00011270974937360734, "loss_iou": 0.36328125, "loss_num": 0.03857421875, "loss_xval": 0.91796875, "num_input_tokens_seen": 61703936, "step": 984 }, { "epoch": 3.277870216306156, "grad_norm": 13.673544883728027, "learning_rate": 5e-06, "loss": 0.6591, "num_input_tokens_seen": 61767552, "step": 985 }, { "epoch": 3.277870216306156, "loss": 0.5250503420829773, "loss_ce": 2.5950845156330615e-05, "loss_iou": 0.1494140625, "loss_num": 0.04541015625, "loss_xval": 0.5234375, "num_input_tokens_seen": 61767552, "step": 985 }, { "epoch": 3.2811980033277868, "grad_norm": 11.463003158569336, "learning_rate": 5e-06, "loss": 0.9295, "num_input_tokens_seen": 61830148, "step": 986 }, { "epoch": 3.2811980033277868, "loss": 0.9378980398178101, "loss_ce": 0.00039809508598409593, "loss_iou": 0.32421875, "loss_num": 0.05810546875, "loss_xval": 0.9375, "num_input_tokens_seen": 61830148, "step": 986 }, { "epoch": 3.284525790349418, "grad_norm": 13.278491973876953, "learning_rate": 5e-06, "loss": 0.7629, "num_input_tokens_seen": 61894276, "step": 987 }, { "epoch": 3.284525790349418, "loss": 0.8032286167144775, "loss_ce": 0.0004942516097798944, "loss_iou": 0.310546875, "loss_num": 0.036376953125, "loss_xval": 0.8046875, "num_input_tokens_seen": 61894276, "step": 987 }, { "epoch": 3.2878535773710484, "grad_norm": 7.292080402374268, "learning_rate": 5e-06, "loss": 0.3758, "num_input_tokens_seen": 61955344, "step": 988 }, { "epoch": 3.2878535773710484, "loss": 0.43695181608200073, "loss_ce": 0.0001842538476921618, "loss_iou": 0.130859375, "loss_num": 0.034912109375, "loss_xval": 0.4375, "num_input_tokens_seen": 61955344, "step": 988 }, { "epoch": 3.291181364392679, "grad_norm": 28.49940299987793, "learning_rate": 5e-06, "loss": 0.9496, "num_input_tokens_seen": 62019316, "step": 989 }, { "epoch": 3.291181364392679, "loss": 1.1382167339324951, "loss_ce": 3.3076714316848665e-05, "loss_iou": 0.447265625, "loss_num": 0.048828125, "loss_xval": 1.140625, "num_input_tokens_seen": 62019316, "step": 989 }, { "epoch": 3.2945091514143097, "grad_norm": 35.954505920410156, "learning_rate": 5e-06, "loss": 0.8872, "num_input_tokens_seen": 62082856, "step": 990 }, { "epoch": 3.2945091514143097, "loss": 0.9216300845146179, "loss_ce": 0.0004875370650552213, "loss_iou": 0.33984375, "loss_num": 0.04833984375, "loss_xval": 0.921875, "num_input_tokens_seen": 62082856, "step": 990 }, { "epoch": 3.2978369384359403, "grad_norm": 17.55529022216797, "learning_rate": 5e-06, "loss": 0.5793, "num_input_tokens_seen": 62144984, "step": 991 }, { "epoch": 3.2978369384359403, "loss": 0.6133896112442017, "loss_ce": 0.0003524775383993983, "loss_iou": 0.1748046875, "loss_num": 0.052734375, "loss_xval": 0.61328125, "num_input_tokens_seen": 62144984, "step": 991 }, { "epoch": 3.301164725457571, "grad_norm": 21.352609634399414, "learning_rate": 5e-06, "loss": 0.7561, "num_input_tokens_seen": 62208796, "step": 992 }, { "epoch": 3.301164725457571, "loss": 0.8144993782043457, "loss_ce": 0.0002904089924413711, "loss_iou": 0.28125, "loss_num": 0.05029296875, "loss_xval": 0.8125, "num_input_tokens_seen": 62208796, "step": 992 }, { "epoch": 3.3044925124792015, "grad_norm": 26.83047103881836, "learning_rate": 5e-06, "loss": 0.7166, "num_input_tokens_seen": 62270656, "step": 993 }, { "epoch": 3.3044925124792015, "loss": 0.7601873874664307, "loss_ce": 0.001764480723068118, "loss_iou": 0.2080078125, "loss_num": 0.06884765625, "loss_xval": 0.7578125, "num_input_tokens_seen": 62270656, "step": 993 }, { "epoch": 3.307820299500832, "grad_norm": 15.3873872756958, "learning_rate": 5e-06, "loss": 0.733, "num_input_tokens_seen": 62333124, "step": 994 }, { "epoch": 3.307820299500832, "loss": 0.5635322332382202, "loss_ce": 5.564358434639871e-05, "loss_iou": 0.1865234375, "loss_num": 0.0380859375, "loss_xval": 0.5625, "num_input_tokens_seen": 62333124, "step": 994 }, { "epoch": 3.3111480865224627, "grad_norm": 28.301122665405273, "learning_rate": 5e-06, "loss": 0.9499, "num_input_tokens_seen": 62394508, "step": 995 }, { "epoch": 3.3111480865224627, "loss": 0.9298645257949829, "loss_ce": 0.000177039357367903, "loss_iou": 0.30078125, "loss_num": 0.0654296875, "loss_xval": 0.9296875, "num_input_tokens_seen": 62394508, "step": 995 }, { "epoch": 3.3144758735440933, "grad_norm": 28.92433738708496, "learning_rate": 5e-06, "loss": 0.7486, "num_input_tokens_seen": 62457352, "step": 996 }, { "epoch": 3.3144758735440933, "loss": 0.8685736656188965, "loss_ce": 0.0008979399572126567, "loss_iou": 0.314453125, "loss_num": 0.0478515625, "loss_xval": 0.8671875, "num_input_tokens_seen": 62457352, "step": 996 }, { "epoch": 3.317803660565724, "grad_norm": 36.97626876831055, "learning_rate": 5e-06, "loss": 0.6889, "num_input_tokens_seen": 62519816, "step": 997 }, { "epoch": 3.317803660565724, "loss": 1.0353097915649414, "loss_ce": 0.0013743548188358545, "loss_iou": 0.3359375, "loss_num": 0.07177734375, "loss_xval": 1.03125, "num_input_tokens_seen": 62519816, "step": 997 }, { "epoch": 3.3211314475873546, "grad_norm": 20.735578536987305, "learning_rate": 5e-06, "loss": 0.7328, "num_input_tokens_seen": 62582088, "step": 998 }, { "epoch": 3.3211314475873546, "loss": 0.5872924327850342, "loss_ce": 1.2138524652982596e-05, "loss_iou": 0.2099609375, "loss_num": 0.033203125, "loss_xval": 0.5859375, "num_input_tokens_seen": 62582088, "step": 998 }, { "epoch": 3.324459234608985, "grad_norm": 25.57074737548828, "learning_rate": 5e-06, "loss": 0.7645, "num_input_tokens_seen": 62645988, "step": 999 }, { "epoch": 3.324459234608985, "loss": 0.421669065952301, "loss_ce": 3.820429992629215e-05, "loss_iou": 0.1630859375, "loss_num": 0.01904296875, "loss_xval": 0.421875, "num_input_tokens_seen": 62645988, "step": 999 }, { "epoch": 3.327787021630616, "grad_norm": 28.99658966064453, "learning_rate": 5e-06, "loss": 0.6753, "num_input_tokens_seen": 62709356, "step": 1000 }, { "epoch": 3.327787021630616, "eval_seeclick_CIoU": 0.06469295546412468, "eval_seeclick_GIoU": 0.09000834450125694, "eval_seeclick_IoU": 0.17648741602897644, "eval_seeclick_MAE_all": 0.1684766560792923, "eval_seeclick_MAE_h": 0.053172217682003975, "eval_seeclick_MAE_w": 0.14640185236930847, "eval_seeclick_MAE_x_boxes": 0.21364449709653854, "eval_seeclick_MAE_y_boxes": 0.16201536357402802, "eval_seeclick_NUM_probability": 0.9998578131198883, "eval_seeclick_inside_bbox": 0.20937500149011612, "eval_seeclick_loss": 2.809871196746826, "eval_seeclick_loss_ce": 0.11128965765237808, "eval_seeclick_loss_iou": 0.91748046875, "eval_seeclick_loss_num": 0.16803741455078125, "eval_seeclick_loss_xval": 2.67529296875, "eval_seeclick_runtime": 60.7366, "eval_seeclick_samples_per_second": 0.774, "eval_seeclick_steps_per_second": 0.033, "num_input_tokens_seen": 62709356, "step": 1000 }, { "epoch": 3.327787021630616, "eval_icons_CIoU": -0.05401154048740864, "eval_icons_GIoU": 0.0392756424844265, "eval_icons_IoU": 0.11743708699941635, "eval_icons_MAE_all": 0.17453473061323166, "eval_icons_MAE_h": 0.12691883370280266, "eval_icons_MAE_w": 0.15283362567424774, "eval_icons_MAE_x_boxes": 0.13812357187271118, "eval_icons_MAE_y_boxes": 0.11175516247749329, "eval_icons_NUM_probability": 0.9998853206634521, "eval_icons_inside_bbox": 0.2083333358168602, "eval_icons_loss": 2.733140230178833, "eval_icons_loss_ce": 1.7001142623485066e-05, "eval_icons_loss_iou": 0.9609375, "eval_icons_loss_num": 0.16851043701171875, "eval_icons_loss_xval": 2.763671875, "eval_icons_runtime": 74.163, "eval_icons_samples_per_second": 0.674, "eval_icons_steps_per_second": 0.027, "num_input_tokens_seen": 62709356, "step": 1000 }, { "epoch": 3.327787021630616, "eval_screenspot_CIoU": 0.10195153082410495, "eval_screenspot_GIoU": 0.13212568561236063, "eval_screenspot_IoU": 0.23143841326236725, "eval_screenspot_MAE_all": 0.15005385130643845, "eval_screenspot_MAE_h": 0.07803368320067723, "eval_screenspot_MAE_w": 0.12479293594757716, "eval_screenspot_MAE_x_boxes": 0.20538078745206198, "eval_screenspot_MAE_y_boxes": 0.10620040198167165, "eval_screenspot_NUM_probability": 0.9999656081199646, "eval_screenspot_inside_bbox": 0.4262500007947286, "eval_screenspot_loss": 2.5218491554260254, "eval_screenspot_loss_ce": 0.0002643535650956134, "eval_screenspot_loss_iou": 0.87939453125, "eval_screenspot_loss_num": 0.1600341796875, "eval_screenspot_loss_xval": 2.5589192708333335, "eval_screenspot_runtime": 112.9314, "eval_screenspot_samples_per_second": 0.788, "eval_screenspot_steps_per_second": 0.027, "num_input_tokens_seen": 62709356, "step": 1000 }, { "epoch": 3.327787021630616, "eval_compot_CIoU": -0.022290704771876335, "eval_compot_GIoU": 0.04790537618100643, "eval_compot_IoU": 0.13918618857860565, "eval_compot_MAE_all": 0.2006586417555809, "eval_compot_MAE_h": 0.10345861688256264, "eval_compot_MAE_w": 0.19712596386671066, "eval_compot_MAE_x_boxes": 0.19054331630468369, "eval_compot_MAE_y_boxes": 0.13909221440553665, "eval_compot_NUM_probability": 0.9999749064445496, "eval_compot_inside_bbox": 0.2673611119389534, "eval_compot_loss": 2.919442653656006, "eval_compot_loss_ce": 0.0077364586759358644, "eval_compot_loss_iou": 0.95849609375, "eval_compot_loss_num": 0.20458984375, "eval_compot_loss_xval": 2.94189453125, "eval_compot_runtime": 64.3758, "eval_compot_samples_per_second": 0.777, "eval_compot_steps_per_second": 0.031, "num_input_tokens_seen": 62709356, "step": 1000 }, { "epoch": 3.327787021630616, "eval_custom_ui_MAE_all": 0.08360651507973671, "eval_custom_ui_MAE_x": 0.08679963275790215, "eval_custom_ui_MAE_y": 0.08041340485215187, "eval_custom_ui_NUM_probability": 0.9999817907810211, "eval_custom_ui_loss": 0.4009445607662201, "eval_custom_ui_loss_ce": 7.820694463589462e-06, "eval_custom_ui_loss_num": 0.0805206298828125, "eval_custom_ui_loss_xval": 0.4027099609375, "eval_custom_ui_runtime": 56.1092, "eval_custom_ui_samples_per_second": 0.891, "eval_custom_ui_steps_per_second": 0.036, "num_input_tokens_seen": 62709356, "step": 1000 }, { "epoch": 3.327787021630616, "loss": 0.40503746271133423, "loss_ce": 8.157371667039115e-06, "loss_iou": 0.0, "loss_num": 0.0810546875, "loss_xval": 0.404296875, "num_input_tokens_seen": 62709356, "step": 1000 }, { "epoch": 3.3311148086522464, "grad_norm": 7.776968479156494, "learning_rate": 5e-06, "loss": 0.5117, "num_input_tokens_seen": 62772100, "step": 1001 }, { "epoch": 3.3311148086522464, "loss": 0.2846817374229431, "loss_ce": 1.3767563359579071e-05, "loss_iou": 0.052490234375, "loss_num": 0.035888671875, "loss_xval": 0.28515625, "num_input_tokens_seen": 62772100, "step": 1001 }, { "epoch": 3.334442595673877, "grad_norm": 30.39413070678711, "learning_rate": 5e-06, "loss": 0.5212, "num_input_tokens_seen": 62834048, "step": 1002 }, { "epoch": 3.334442595673877, "loss": 0.511046826839447, "loss_ce": 6.050094816600904e-05, "loss_iou": 0.2001953125, "loss_num": 0.0223388671875, "loss_xval": 0.51171875, "num_input_tokens_seen": 62834048, "step": 1002 }, { "epoch": 3.3377703826955076, "grad_norm": 27.171674728393555, "learning_rate": 5e-06, "loss": 0.8095, "num_input_tokens_seen": 62898200, "step": 1003 }, { "epoch": 3.3377703826955076, "loss": 0.8486383557319641, "loss_ce": 0.001226250664331019, "loss_iou": 0.2734375, "loss_num": 0.059814453125, "loss_xval": 0.84765625, "num_input_tokens_seen": 62898200, "step": 1003 }, { "epoch": 3.3410981697171382, "grad_norm": 16.957368850708008, "learning_rate": 5e-06, "loss": 0.8835, "num_input_tokens_seen": 62961712, "step": 1004 }, { "epoch": 3.3410981697171382, "loss": 0.5378614068031311, "loss_ce": 0.00014167974586598575, "loss_iou": 0.126953125, "loss_num": 0.056884765625, "loss_xval": 0.5390625, "num_input_tokens_seen": 62961712, "step": 1004 }, { "epoch": 3.344425956738769, "grad_norm": 8.965785026550293, "learning_rate": 5e-06, "loss": 0.62, "num_input_tokens_seen": 63023508, "step": 1005 }, { "epoch": 3.344425956738769, "loss": 0.6407850384712219, "loss_ce": 0.0001600124960532412, "loss_iou": 0.1572265625, "loss_num": 0.06494140625, "loss_xval": 0.640625, "num_input_tokens_seen": 63023508, "step": 1005 }, { "epoch": 3.3477537437603995, "grad_norm": 19.833818435668945, "learning_rate": 5e-06, "loss": 0.701, "num_input_tokens_seen": 63084860, "step": 1006 }, { "epoch": 3.3477537437603995, "loss": 0.6166751980781555, "loss_ce": 0.0062015545554459095, "loss_iou": 0.171875, "loss_num": 0.05322265625, "loss_xval": 0.609375, "num_input_tokens_seen": 63084860, "step": 1006 }, { "epoch": 3.35108153078203, "grad_norm": 13.646302223205566, "learning_rate": 5e-06, "loss": 0.6888, "num_input_tokens_seen": 63146644, "step": 1007 }, { "epoch": 3.35108153078203, "loss": 0.8403863906860352, "loss_ce": 0.0002984975290019065, "loss_iou": 0.30859375, "loss_num": 0.04443359375, "loss_xval": 0.83984375, "num_input_tokens_seen": 63146644, "step": 1007 }, { "epoch": 3.3544093178036607, "grad_norm": 25.743032455444336, "learning_rate": 5e-06, "loss": 0.7378, "num_input_tokens_seen": 63209828, "step": 1008 }, { "epoch": 3.3544093178036607, "loss": 0.7881006598472595, "loss_ce": 0.001235442003235221, "loss_iou": 0.28515625, "loss_num": 0.04345703125, "loss_xval": 0.78515625, "num_input_tokens_seen": 63209828, "step": 1008 }, { "epoch": 3.3577371048252913, "grad_norm": 31.79527473449707, "learning_rate": 5e-06, "loss": 0.942, "num_input_tokens_seen": 63273748, "step": 1009 }, { "epoch": 3.3577371048252913, "loss": 0.8840484619140625, "loss_ce": 0.0002593940880615264, "loss_iou": 0.337890625, "loss_num": 0.041748046875, "loss_xval": 0.8828125, "num_input_tokens_seen": 63273748, "step": 1009 }, { "epoch": 3.361064891846922, "grad_norm": 26.88178825378418, "learning_rate": 5e-06, "loss": 0.9688, "num_input_tokens_seen": 63337520, "step": 1010 }, { "epoch": 3.361064891846922, "loss": 0.8791961073875427, "loss_ce": 0.000411954679293558, "loss_iou": 0.2734375, "loss_num": 0.06640625, "loss_xval": 0.87890625, "num_input_tokens_seen": 63337520, "step": 1010 }, { "epoch": 3.3643926788685525, "grad_norm": 36.61302185058594, "learning_rate": 5e-06, "loss": 0.7593, "num_input_tokens_seen": 63400884, "step": 1011 }, { "epoch": 3.3643926788685525, "loss": 0.7895728349685669, "loss_ce": 0.0002662379411049187, "loss_iou": 0.296875, "loss_num": 0.03955078125, "loss_xval": 0.7890625, "num_input_tokens_seen": 63400884, "step": 1011 }, { "epoch": 3.367720465890183, "grad_norm": 21.82211685180664, "learning_rate": 5e-06, "loss": 0.8408, "num_input_tokens_seen": 63461376, "step": 1012 }, { "epoch": 3.367720465890183, "loss": 0.8267179727554321, "loss_ce": 5.779770071967505e-05, "loss_iou": 0.25, "loss_num": 0.0654296875, "loss_xval": 0.828125, "num_input_tokens_seen": 63461376, "step": 1012 }, { "epoch": 3.3710482529118138, "grad_norm": 11.12902545928955, "learning_rate": 5e-06, "loss": 0.7744, "num_input_tokens_seen": 63524316, "step": 1013 }, { "epoch": 3.3710482529118138, "loss": 0.7194188237190247, "loss_ce": 0.0006688575376756489, "loss_iou": 0.2265625, "loss_num": 0.05322265625, "loss_xval": 0.71875, "num_input_tokens_seen": 63524316, "step": 1013 }, { "epoch": 3.3743760399334444, "grad_norm": 14.568037986755371, "learning_rate": 5e-06, "loss": 0.6472, "num_input_tokens_seen": 63586476, "step": 1014 }, { "epoch": 3.3743760399334444, "loss": 0.5252353549003601, "loss_ce": 8.888516458682716e-05, "loss_iou": 0.142578125, "loss_num": 0.048095703125, "loss_xval": 0.5234375, "num_input_tokens_seen": 63586476, "step": 1014 }, { "epoch": 3.377703826955075, "grad_norm": 7.535749912261963, "learning_rate": 5e-06, "loss": 0.7923, "num_input_tokens_seen": 63648060, "step": 1015 }, { "epoch": 3.377703826955075, "loss": 0.6590226888656616, "loss_ce": 0.00075855094473809, "loss_iou": 0.224609375, "loss_num": 0.041748046875, "loss_xval": 0.66015625, "num_input_tokens_seen": 63648060, "step": 1015 }, { "epoch": 3.3810316139767056, "grad_norm": 10.491238594055176, "learning_rate": 5e-06, "loss": 0.8015, "num_input_tokens_seen": 63710016, "step": 1016 }, { "epoch": 3.3810316139767056, "loss": 0.5362685322761536, "loss_ce": 1.3618517186841927e-05, "loss_iou": 0.1953125, "loss_num": 0.0294189453125, "loss_xval": 0.53515625, "num_input_tokens_seen": 63710016, "step": 1016 }, { "epoch": 3.384359400998336, "grad_norm": 10.463374137878418, "learning_rate": 5e-06, "loss": 0.8182, "num_input_tokens_seen": 63772552, "step": 1017 }, { "epoch": 3.384359400998336, "loss": 0.7745547294616699, "loss_ce": 0.0007509992574341595, "loss_iou": 0.2578125, "loss_num": 0.0517578125, "loss_xval": 0.7734375, "num_input_tokens_seen": 63772552, "step": 1017 }, { "epoch": 3.387687188019967, "grad_norm": 12.239856719970703, "learning_rate": 5e-06, "loss": 0.7396, "num_input_tokens_seen": 63835748, "step": 1018 }, { "epoch": 3.387687188019967, "loss": 0.8536421060562134, "loss_ce": 0.0006147997919470072, "loss_iou": 0.298828125, "loss_num": 0.051513671875, "loss_xval": 0.8515625, "num_input_tokens_seen": 63835748, "step": 1018 }, { "epoch": 3.3910149750415974, "grad_norm": 19.16669464111328, "learning_rate": 5e-06, "loss": 0.8739, "num_input_tokens_seen": 63897248, "step": 1019 }, { "epoch": 3.3910149750415974, "loss": 0.9951910376548767, "loss_ce": 7.383939373539761e-05, "loss_iou": 0.353515625, "loss_num": 0.057373046875, "loss_xval": 0.99609375, "num_input_tokens_seen": 63897248, "step": 1019 }, { "epoch": 3.394342762063228, "grad_norm": 21.313337326049805, "learning_rate": 5e-06, "loss": 0.7035, "num_input_tokens_seen": 63959036, "step": 1020 }, { "epoch": 3.394342762063228, "loss": 0.7610794901847839, "loss_ce": 0.0008255698485299945, "loss_iou": 0.291015625, "loss_num": 0.03564453125, "loss_xval": 0.76171875, "num_input_tokens_seen": 63959036, "step": 1020 }, { "epoch": 3.3976705490848587, "grad_norm": 25.196046829223633, "learning_rate": 5e-06, "loss": 0.8431, "num_input_tokens_seen": 64021348, "step": 1021 }, { "epoch": 3.3976705490848587, "loss": 0.9046334028244019, "loss_ce": 0.0003365739539731294, "loss_iou": 0.326171875, "loss_num": 0.05029296875, "loss_xval": 0.90625, "num_input_tokens_seen": 64021348, "step": 1021 }, { "epoch": 3.4009983361064893, "grad_norm": 25.156089782714844, "learning_rate": 5e-06, "loss": 0.6749, "num_input_tokens_seen": 64083396, "step": 1022 }, { "epoch": 3.4009983361064893, "loss": 0.5849233865737915, "loss_ce": 0.00020661459711845964, "loss_iou": 0.193359375, "loss_num": 0.039794921875, "loss_xval": 0.5859375, "num_input_tokens_seen": 64083396, "step": 1022 }, { "epoch": 3.40432612312812, "grad_norm": 12.88276481628418, "learning_rate": 5e-06, "loss": 0.7487, "num_input_tokens_seen": 64146916, "step": 1023 }, { "epoch": 3.40432612312812, "loss": 0.6373682022094727, "loss_ce": 0.0015039595309644938, "loss_iou": 0.2119140625, "loss_num": 0.04248046875, "loss_xval": 0.63671875, "num_input_tokens_seen": 64146916, "step": 1023 }, { "epoch": 3.4076539101497505, "grad_norm": 16.09154510498047, "learning_rate": 5e-06, "loss": 0.7258, "num_input_tokens_seen": 64211168, "step": 1024 }, { "epoch": 3.4076539101497505, "loss": 0.5692362785339355, "loss_ce": 2.2426196665037423e-05, "loss_iou": 0.2080078125, "loss_num": 0.030517578125, "loss_xval": 0.5703125, "num_input_tokens_seen": 64211168, "step": 1024 }, { "epoch": 3.410981697171381, "grad_norm": 21.734678268432617, "learning_rate": 5e-06, "loss": 0.7836, "num_input_tokens_seen": 64273668, "step": 1025 }, { "epoch": 3.410981697171381, "loss": 1.0455684661865234, "loss_ce": 0.0013790387893095613, "loss_iou": 0.39453125, "loss_num": 0.05078125, "loss_xval": 1.046875, "num_input_tokens_seen": 64273668, "step": 1025 }, { "epoch": 3.4143094841930117, "grad_norm": 27.346384048461914, "learning_rate": 5e-06, "loss": 0.8577, "num_input_tokens_seen": 64336972, "step": 1026 }, { "epoch": 3.4143094841930117, "loss": 0.91633141040802, "loss_ce": 0.0003157990868203342, "loss_iou": 0.3046875, "loss_num": 0.060791015625, "loss_xval": 0.9140625, "num_input_tokens_seen": 64336972, "step": 1026 }, { "epoch": 3.4176372712146423, "grad_norm": 18.70287322998047, "learning_rate": 5e-06, "loss": 0.8027, "num_input_tokens_seen": 64400652, "step": 1027 }, { "epoch": 3.4176372712146423, "loss": 0.8191217184066772, "loss_ce": 2.9910979719716124e-05, "loss_iou": 0.328125, "loss_num": 0.032958984375, "loss_xval": 0.8203125, "num_input_tokens_seen": 64400652, "step": 1027 }, { "epoch": 3.420965058236273, "grad_norm": 12.181321144104004, "learning_rate": 5e-06, "loss": 0.7355, "num_input_tokens_seen": 64464860, "step": 1028 }, { "epoch": 3.420965058236273, "loss": 0.8187693357467651, "loss_ce": 0.0007761840242892504, "loss_iou": 0.2890625, "loss_num": 0.04833984375, "loss_xval": 0.81640625, "num_input_tokens_seen": 64464860, "step": 1028 }, { "epoch": 3.4242928452579036, "grad_norm": 18.49558448791504, "learning_rate": 5e-06, "loss": 0.7493, "num_input_tokens_seen": 64525420, "step": 1029 }, { "epoch": 3.4242928452579036, "loss": 0.866330623626709, "loss_ce": 0.0003638428170233965, "loss_iou": 0.283203125, "loss_num": 0.0595703125, "loss_xval": 0.8671875, "num_input_tokens_seen": 64525420, "step": 1029 }, { "epoch": 3.427620632279534, "grad_norm": 9.978404998779297, "learning_rate": 5e-06, "loss": 0.727, "num_input_tokens_seen": 64588796, "step": 1030 }, { "epoch": 3.427620632279534, "loss": 0.9154368042945862, "loss_ce": 3.15469442284666e-05, "loss_iou": 0.34375, "loss_num": 0.044921875, "loss_xval": 0.9140625, "num_input_tokens_seen": 64588796, "step": 1030 }, { "epoch": 3.430948419301165, "grad_norm": 13.115931510925293, "learning_rate": 5e-06, "loss": 0.7031, "num_input_tokens_seen": 64651428, "step": 1031 }, { "epoch": 3.430948419301165, "loss": 0.6643157005310059, "loss_ce": 0.0003752529446501285, "loss_iou": 0.2373046875, "loss_num": 0.0380859375, "loss_xval": 0.6640625, "num_input_tokens_seen": 64651428, "step": 1031 }, { "epoch": 3.4342762063227954, "grad_norm": 9.373936653137207, "learning_rate": 5e-06, "loss": 0.6866, "num_input_tokens_seen": 64714600, "step": 1032 }, { "epoch": 3.4342762063227954, "loss": 0.5706291198730469, "loss_ce": 0.0004386901273392141, "loss_iou": 0.2001953125, "loss_num": 0.033935546875, "loss_xval": 0.5703125, "num_input_tokens_seen": 64714600, "step": 1032 }, { "epoch": 3.437603993344426, "grad_norm": 7.854013919830322, "learning_rate": 5e-06, "loss": 0.5182, "num_input_tokens_seen": 64775332, "step": 1033 }, { "epoch": 3.437603993344426, "loss": 0.4718819856643677, "loss_ce": 8.020136010600254e-05, "loss_iou": 0.12109375, "loss_num": 0.0458984375, "loss_xval": 0.47265625, "num_input_tokens_seen": 64775332, "step": 1033 }, { "epoch": 3.4409317803660566, "grad_norm": 14.210572242736816, "learning_rate": 5e-06, "loss": 0.6678, "num_input_tokens_seen": 64837956, "step": 1034 }, { "epoch": 3.4409317803660566, "loss": 0.7226674556732178, "loss_ce": 0.0014760298654437065, "loss_iou": 0.25390625, "loss_num": 0.04296875, "loss_xval": 0.72265625, "num_input_tokens_seen": 64837956, "step": 1034 }, { "epoch": 3.4442595673876872, "grad_norm": 6.943016529083252, "learning_rate": 5e-06, "loss": 0.5522, "num_input_tokens_seen": 64899764, "step": 1035 }, { "epoch": 3.4442595673876872, "loss": 0.6755484938621521, "loss_ce": 0.00129311578348279, "loss_iou": 0.17578125, "loss_num": 0.06396484375, "loss_xval": 0.67578125, "num_input_tokens_seen": 64899764, "step": 1035 }, { "epoch": 3.447587354409318, "grad_norm": 6.488821029663086, "learning_rate": 5e-06, "loss": 0.6404, "num_input_tokens_seen": 64964300, "step": 1036 }, { "epoch": 3.447587354409318, "loss": 0.7309756875038147, "loss_ce": 1.8662194634089246e-05, "loss_iou": 0.265625, "loss_num": 0.040283203125, "loss_xval": 0.73046875, "num_input_tokens_seen": 64964300, "step": 1036 }, { "epoch": 3.4509151414309485, "grad_norm": 25.04932975769043, "learning_rate": 5e-06, "loss": 0.7355, "num_input_tokens_seen": 65027372, "step": 1037 }, { "epoch": 3.4509151414309485, "loss": 0.6644483804702759, "loss_ce": 0.0021864601876586676, "loss_iou": 0.2080078125, "loss_num": 0.04931640625, "loss_xval": 0.6640625, "num_input_tokens_seen": 65027372, "step": 1037 }, { "epoch": 3.454242928452579, "grad_norm": 27.46053695678711, "learning_rate": 5e-06, "loss": 0.8327, "num_input_tokens_seen": 65089104, "step": 1038 }, { "epoch": 3.454242928452579, "loss": 0.705582857131958, "loss_ce": 0.00026057378272525966, "loss_iou": 0.2333984375, "loss_num": 0.047607421875, "loss_xval": 0.70703125, "num_input_tokens_seen": 65089104, "step": 1038 }, { "epoch": 3.4575707154742097, "grad_norm": 9.879676818847656, "learning_rate": 5e-06, "loss": 1.0361, "num_input_tokens_seen": 65152120, "step": 1039 }, { "epoch": 3.4575707154742097, "loss": 1.1938526630401611, "loss_ce": 0.0004933266900479794, "loss_iou": 0.45703125, "loss_num": 0.055908203125, "loss_xval": 1.1953125, "num_input_tokens_seen": 65152120, "step": 1039 }, { "epoch": 3.4608985024958403, "grad_norm": 13.6068115234375, "learning_rate": 5e-06, "loss": 0.6587, "num_input_tokens_seen": 65213564, "step": 1040 }, { "epoch": 3.4608985024958403, "loss": 0.7951961755752563, "loss_ce": 0.0002742463257163763, "loss_iou": 0.283203125, "loss_num": 0.0458984375, "loss_xval": 0.796875, "num_input_tokens_seen": 65213564, "step": 1040 }, { "epoch": 3.464226289517471, "grad_norm": 5.12733793258667, "learning_rate": 5e-06, "loss": 0.4204, "num_input_tokens_seen": 65275644, "step": 1041 }, { "epoch": 3.464226289517471, "loss": 0.5345946550369263, "loss_ce": 4.874749720329419e-05, "loss_iou": 0.2021484375, "loss_num": 0.0260009765625, "loss_xval": 0.53515625, "num_input_tokens_seen": 65275644, "step": 1041 }, { "epoch": 3.4675540765391015, "grad_norm": 13.5778226852417, "learning_rate": 5e-06, "loss": 0.6384, "num_input_tokens_seen": 65339908, "step": 1042 }, { "epoch": 3.4675540765391015, "loss": 0.5538702011108398, "loss_ce": 0.0004033859877381474, "loss_iou": 0.2109375, "loss_num": 0.0263671875, "loss_xval": 0.5546875, "num_input_tokens_seen": 65339908, "step": 1042 }, { "epoch": 3.470881863560732, "grad_norm": 14.015215873718262, "learning_rate": 5e-06, "loss": 0.7393, "num_input_tokens_seen": 65402288, "step": 1043 }, { "epoch": 3.470881863560732, "loss": 0.5048419833183289, "loss_ce": 0.0003253856557421386, "loss_iou": 0.12890625, "loss_num": 0.04931640625, "loss_xval": 0.50390625, "num_input_tokens_seen": 65402288, "step": 1043 }, { "epoch": 3.4742096505823628, "grad_norm": 8.984355926513672, "learning_rate": 5e-06, "loss": 0.8661, "num_input_tokens_seen": 65464148, "step": 1044 }, { "epoch": 3.4742096505823628, "loss": 0.9578490257263184, "loss_ce": 0.00032945917337201536, "loss_iou": 0.33203125, "loss_num": 0.058349609375, "loss_xval": 0.95703125, "num_input_tokens_seen": 65464148, "step": 1044 }, { "epoch": 3.4775374376039934, "grad_norm": 9.052020072937012, "learning_rate": 5e-06, "loss": 0.8591, "num_input_tokens_seen": 65527396, "step": 1045 }, { "epoch": 3.4775374376039934, "loss": 0.634093165397644, "loss_ce": 5.996242180117406e-05, "loss_iou": 0.2314453125, "loss_num": 0.0341796875, "loss_xval": 0.6328125, "num_input_tokens_seen": 65527396, "step": 1045 }, { "epoch": 3.480865224625624, "grad_norm": 9.523575782775879, "learning_rate": 5e-06, "loss": 0.6342, "num_input_tokens_seen": 65590100, "step": 1046 }, { "epoch": 3.480865224625624, "loss": 0.6340984106063843, "loss_ce": 0.0004314111720304936, "loss_iou": 0.21484375, "loss_num": 0.041259765625, "loss_xval": 0.6328125, "num_input_tokens_seen": 65590100, "step": 1046 }, { "epoch": 3.4841930116472546, "grad_norm": 6.930280685424805, "learning_rate": 5e-06, "loss": 0.4848, "num_input_tokens_seen": 65652840, "step": 1047 }, { "epoch": 3.4841930116472546, "loss": 0.2999436557292938, "loss_ce": 1.6897251043701544e-05, "loss_iou": 0.10107421875, "loss_num": 0.0194091796875, "loss_xval": 0.30078125, "num_input_tokens_seen": 65652840, "step": 1047 }, { "epoch": 3.487520798668885, "grad_norm": 11.900886535644531, "learning_rate": 5e-06, "loss": 0.8359, "num_input_tokens_seen": 65715124, "step": 1048 }, { "epoch": 3.487520798668885, "loss": 0.6697213649749756, "loss_ce": 4.359194281278178e-05, "loss_iou": 0.2177734375, "loss_num": 0.046630859375, "loss_xval": 0.66796875, "num_input_tokens_seen": 65715124, "step": 1048 }, { "epoch": 3.490848585690516, "grad_norm": 14.04080581665039, "learning_rate": 5e-06, "loss": 0.8482, "num_input_tokens_seen": 65778484, "step": 1049 }, { "epoch": 3.490848585690516, "loss": 0.7816200256347656, "loss_ce": 0.00293355411849916, "loss_iou": 0.255859375, "loss_num": 0.052978515625, "loss_xval": 0.77734375, "num_input_tokens_seen": 65778484, "step": 1049 }, { "epoch": 3.4941763727121464, "grad_norm": 8.629766464233398, "learning_rate": 5e-06, "loss": 0.7293, "num_input_tokens_seen": 65839936, "step": 1050 }, { "epoch": 3.4941763727121464, "loss": 0.5982376337051392, "loss_ce": 0.0020462353713810444, "loss_iou": 0.1728515625, "loss_num": 0.0498046875, "loss_xval": 0.59765625, "num_input_tokens_seen": 65839936, "step": 1050 }, { "epoch": 3.497504159733777, "grad_norm": 8.150739669799805, "learning_rate": 5e-06, "loss": 0.7565, "num_input_tokens_seen": 65901100, "step": 1051 }, { "epoch": 3.497504159733777, "loss": 0.956345796585083, "loss_ce": 4.695860843639821e-05, "loss_iou": 0.279296875, "loss_num": 0.07958984375, "loss_xval": 0.95703125, "num_input_tokens_seen": 65901100, "step": 1051 }, { "epoch": 3.5008319467554077, "grad_norm": 7.999705791473389, "learning_rate": 5e-06, "loss": 0.6639, "num_input_tokens_seen": 65965192, "step": 1052 }, { "epoch": 3.5008319467554077, "loss": 0.6293894052505493, "loss_ce": 0.0013986685080453753, "loss_iou": 0.2197265625, "loss_num": 0.037841796875, "loss_xval": 0.62890625, "num_input_tokens_seen": 65965192, "step": 1052 }, { "epoch": 3.5041597337770383, "grad_norm": 10.955790519714355, "learning_rate": 5e-06, "loss": 0.7596, "num_input_tokens_seen": 66027448, "step": 1053 }, { "epoch": 3.5041597337770383, "loss": 0.680887758731842, "loss_ce": 4.0574639569967985e-05, "loss_iou": 0.2353515625, "loss_num": 0.0419921875, "loss_xval": 0.6796875, "num_input_tokens_seen": 66027448, "step": 1053 }, { "epoch": 3.507487520798669, "grad_norm": 10.91659927368164, "learning_rate": 5e-06, "loss": 0.7322, "num_input_tokens_seen": 66090372, "step": 1054 }, { "epoch": 3.507487520798669, "loss": 0.7654873132705688, "loss_ce": 0.000350588234141469, "loss_iou": 0.244140625, "loss_num": 0.055419921875, "loss_xval": 0.765625, "num_input_tokens_seen": 66090372, "step": 1054 }, { "epoch": 3.5108153078202995, "grad_norm": 30.876070022583008, "learning_rate": 5e-06, "loss": 0.6355, "num_input_tokens_seen": 66153396, "step": 1055 }, { "epoch": 3.5108153078202995, "loss": 0.6095548272132874, "loss_ce": 0.0008512315107509494, "loss_iou": 0.2099609375, "loss_num": 0.037841796875, "loss_xval": 0.609375, "num_input_tokens_seen": 66153396, "step": 1055 }, { "epoch": 3.51414309484193, "grad_norm": 19.838214874267578, "learning_rate": 5e-06, "loss": 0.656, "num_input_tokens_seen": 66215644, "step": 1056 }, { "epoch": 3.51414309484193, "loss": 1.0556720495224, "loss_ce": 0.0004962346283718944, "loss_iou": 0.376953125, "loss_num": 0.06005859375, "loss_xval": 1.0546875, "num_input_tokens_seen": 66215644, "step": 1056 }, { "epoch": 3.5174708818635607, "grad_norm": 13.517842292785645, "learning_rate": 5e-06, "loss": 0.5506, "num_input_tokens_seen": 66277904, "step": 1057 }, { "epoch": 3.5174708818635607, "loss": 0.421215295791626, "loss_ce": 1.168130347650731e-05, "loss_iou": 0.1416015625, "loss_num": 0.0277099609375, "loss_xval": 0.421875, "num_input_tokens_seen": 66277904, "step": 1057 }, { "epoch": 3.5207986688851913, "grad_norm": 6.226344585418701, "learning_rate": 5e-06, "loss": 0.4628, "num_input_tokens_seen": 66340292, "step": 1058 }, { "epoch": 3.5207986688851913, "loss": 0.5400905609130859, "loss_ce": 0.0005397890927270055, "loss_iou": 0.1787109375, "loss_num": 0.03662109375, "loss_xval": 0.5390625, "num_input_tokens_seen": 66340292, "step": 1058 }, { "epoch": 3.524126455906822, "grad_norm": 20.18250274658203, "learning_rate": 5e-06, "loss": 0.8048, "num_input_tokens_seen": 66403936, "step": 1059 }, { "epoch": 3.524126455906822, "loss": 0.6000856757164001, "loss_ce": 1.8516982891014777e-05, "loss_iou": 0.234375, "loss_num": 0.026123046875, "loss_xval": 0.6015625, "num_input_tokens_seen": 66403936, "step": 1059 }, { "epoch": 3.5274542429284526, "grad_norm": 13.89167594909668, "learning_rate": 5e-06, "loss": 0.8002, "num_input_tokens_seen": 66467492, "step": 1060 }, { "epoch": 3.5274542429284526, "loss": 0.5974984169006348, "loss_ce": 0.00045254032011143863, "loss_iou": 0.1806640625, "loss_num": 0.047119140625, "loss_xval": 0.59765625, "num_input_tokens_seen": 66467492, "step": 1060 }, { "epoch": 3.530782029950083, "grad_norm": 15.044062614440918, "learning_rate": 5e-06, "loss": 0.6599, "num_input_tokens_seen": 66529040, "step": 1061 }, { "epoch": 3.530782029950083, "loss": 0.819593071937561, "loss_ce": 1.297298695135396e-05, "loss_iou": 0.265625, "loss_num": 0.057373046875, "loss_xval": 0.8203125, "num_input_tokens_seen": 66529040, "step": 1061 }, { "epoch": 3.534109816971714, "grad_norm": 18.571321487426758, "learning_rate": 5e-06, "loss": 0.7795, "num_input_tokens_seen": 66591032, "step": 1062 }, { "epoch": 3.534109816971714, "loss": 0.9003162980079651, "loss_ce": 0.0003529402893036604, "loss_iou": 0.328125, "loss_num": 0.04833984375, "loss_xval": 0.8984375, "num_input_tokens_seen": 66591032, "step": 1062 }, { "epoch": 3.5374376039933444, "grad_norm": 10.543642044067383, "learning_rate": 5e-06, "loss": 0.7132, "num_input_tokens_seen": 66652628, "step": 1063 }, { "epoch": 3.5374376039933444, "loss": 0.5837070345878601, "loss_ce": 0.0011875331401824951, "loss_iou": 0.193359375, "loss_num": 0.039306640625, "loss_xval": 0.58203125, "num_input_tokens_seen": 66652628, "step": 1063 }, { "epoch": 3.540765391014975, "grad_norm": 16.399181365966797, "learning_rate": 5e-06, "loss": 1.0372, "num_input_tokens_seen": 66716008, "step": 1064 }, { "epoch": 3.540765391014975, "loss": 0.862764835357666, "loss_ce": 0.0011925328290089965, "loss_iou": 0.2314453125, "loss_num": 0.07958984375, "loss_xval": 0.86328125, "num_input_tokens_seen": 66716008, "step": 1064 }, { "epoch": 3.5440931780366056, "grad_norm": 14.214118957519531, "learning_rate": 5e-06, "loss": 0.6347, "num_input_tokens_seen": 66779004, "step": 1065 }, { "epoch": 3.5440931780366056, "loss": 0.6140273809432983, "loss_ce": 1.3659923752129544e-05, "loss_iou": 0.23046875, "loss_num": 0.030517578125, "loss_xval": 0.61328125, "num_input_tokens_seen": 66779004, "step": 1065 }, { "epoch": 3.5474209650582362, "grad_norm": 11.190632820129395, "learning_rate": 5e-06, "loss": 0.9006, "num_input_tokens_seen": 66841176, "step": 1066 }, { "epoch": 3.5474209650582362, "loss": 0.8209356069564819, "loss_ce": 1.2752819202432875e-05, "loss_iou": 0.23828125, "loss_num": 0.0693359375, "loss_xval": 0.8203125, "num_input_tokens_seen": 66841176, "step": 1066 }, { "epoch": 3.550748752079867, "grad_norm": 10.628087997436523, "learning_rate": 5e-06, "loss": 0.7966, "num_input_tokens_seen": 66903764, "step": 1067 }, { "epoch": 3.550748752079867, "loss": 0.7464427351951599, "loss_ce": 0.002241076435893774, "loss_iou": 0.2314453125, "loss_num": 0.056640625, "loss_xval": 0.74609375, "num_input_tokens_seen": 66903764, "step": 1067 }, { "epoch": 3.5540765391014975, "grad_norm": 10.589995384216309, "learning_rate": 5e-06, "loss": 0.7664, "num_input_tokens_seen": 66965988, "step": 1068 }, { "epoch": 3.5540765391014975, "loss": 0.8199621438980103, "loss_ce": 0.0005041161784902215, "loss_iou": 0.259765625, "loss_num": 0.06005859375, "loss_xval": 0.8203125, "num_input_tokens_seen": 66965988, "step": 1068 }, { "epoch": 3.557404326123128, "grad_norm": 22.26519775390625, "learning_rate": 5e-06, "loss": 0.6614, "num_input_tokens_seen": 67029320, "step": 1069 }, { "epoch": 3.557404326123128, "loss": 0.8162084817886353, "loss_ce": 0.0012670910218730569, "loss_iou": 0.30078125, "loss_num": 0.042724609375, "loss_xval": 0.81640625, "num_input_tokens_seen": 67029320, "step": 1069 }, { "epoch": 3.5607321131447587, "grad_norm": 24.054384231567383, "learning_rate": 5e-06, "loss": 0.6236, "num_input_tokens_seen": 67093124, "step": 1070 }, { "epoch": 3.5607321131447587, "loss": 0.7679802179336548, "loss_ce": 3.58659090125002e-05, "loss_iou": 0.2734375, "loss_num": 0.04443359375, "loss_xval": 0.76953125, "num_input_tokens_seen": 67093124, "step": 1070 }, { "epoch": 3.5640599001663893, "grad_norm": 23.027179718017578, "learning_rate": 5e-06, "loss": 0.8518, "num_input_tokens_seen": 67156372, "step": 1071 }, { "epoch": 3.5640599001663893, "loss": 0.6882663369178772, "loss_ce": 0.00015594727301504463, "loss_iou": 0.23828125, "loss_num": 0.04248046875, "loss_xval": 0.6875, "num_input_tokens_seen": 67156372, "step": 1071 }, { "epoch": 3.56738768718802, "grad_norm": 14.46191692352295, "learning_rate": 5e-06, "loss": 0.5376, "num_input_tokens_seen": 67219472, "step": 1072 }, { "epoch": 3.56738768718802, "loss": 0.4712623953819275, "loss_ce": 0.0005592681118287146, "loss_iou": 0.1298828125, "loss_num": 0.04248046875, "loss_xval": 0.470703125, "num_input_tokens_seen": 67219472, "step": 1072 }, { "epoch": 3.5707154742096505, "grad_norm": 10.546446800231934, "learning_rate": 5e-06, "loss": 0.4898, "num_input_tokens_seen": 67282660, "step": 1073 }, { "epoch": 3.5707154742096505, "loss": 0.578747570514679, "loss_ce": 0.0005004953127354383, "loss_iou": 0.208984375, "loss_num": 0.0322265625, "loss_xval": 0.578125, "num_input_tokens_seen": 67282660, "step": 1073 }, { "epoch": 3.574043261231281, "grad_norm": 10.484993934631348, "learning_rate": 5e-06, "loss": 0.4659, "num_input_tokens_seen": 67344288, "step": 1074 }, { "epoch": 3.574043261231281, "loss": 0.38344889879226685, "loss_ce": 0.00014813469897489995, "loss_iou": 0.134765625, "loss_num": 0.0224609375, "loss_xval": 0.3828125, "num_input_tokens_seen": 67344288, "step": 1074 }, { "epoch": 3.5773710482529117, "grad_norm": 29.584060668945312, "learning_rate": 5e-06, "loss": 0.8105, "num_input_tokens_seen": 67407268, "step": 1075 }, { "epoch": 3.5773710482529117, "loss": 0.8634161949157715, "loss_ce": 0.0006232375744730234, "loss_iou": 0.298828125, "loss_num": 0.05322265625, "loss_xval": 0.86328125, "num_input_tokens_seen": 67407268, "step": 1075 }, { "epoch": 3.5806988352745424, "grad_norm": 19.2270450592041, "learning_rate": 5e-06, "loss": 0.65, "num_input_tokens_seen": 67468188, "step": 1076 }, { "epoch": 3.5806988352745424, "loss": 0.7708810567855835, "loss_ce": 7.029987955320394e-06, "loss_iou": 0.2578125, "loss_num": 0.050537109375, "loss_xval": 0.76953125, "num_input_tokens_seen": 67468188, "step": 1076 }, { "epoch": 3.584026622296173, "grad_norm": 12.855040550231934, "learning_rate": 5e-06, "loss": 0.6479, "num_input_tokens_seen": 67529724, "step": 1077 }, { "epoch": 3.584026622296173, "loss": 0.5244239568710327, "loss_ce": 9.931431122822687e-06, "loss_iou": 0.1318359375, "loss_num": 0.052490234375, "loss_xval": 0.5234375, "num_input_tokens_seen": 67529724, "step": 1077 }, { "epoch": 3.5873544093178036, "grad_norm": 20.388090133666992, "learning_rate": 5e-06, "loss": 0.69, "num_input_tokens_seen": 67591452, "step": 1078 }, { "epoch": 3.5873544093178036, "loss": 0.7222661972045898, "loss_ce": 0.00034232076723128557, "loss_iou": 0.255859375, "loss_num": 0.042236328125, "loss_xval": 0.72265625, "num_input_tokens_seen": 67591452, "step": 1078 }, { "epoch": 3.590682196339434, "grad_norm": 15.090326309204102, "learning_rate": 5e-06, "loss": 0.7673, "num_input_tokens_seen": 67654424, "step": 1079 }, { "epoch": 3.590682196339434, "loss": 1.1384917497634888, "loss_ce": 0.00030815397622063756, "loss_iou": 0.400390625, "loss_num": 0.0673828125, "loss_xval": 1.140625, "num_input_tokens_seen": 67654424, "step": 1079 }, { "epoch": 3.594009983361065, "grad_norm": 13.868745803833008, "learning_rate": 5e-06, "loss": 0.9698, "num_input_tokens_seen": 67715976, "step": 1080 }, { "epoch": 3.594009983361065, "loss": 0.9520023465156555, "loss_ce": 9.806698653846979e-05, "loss_iou": 0.361328125, "loss_num": 0.046142578125, "loss_xval": 0.953125, "num_input_tokens_seen": 67715976, "step": 1080 }, { "epoch": 3.5973377703826954, "grad_norm": 15.400452613830566, "learning_rate": 5e-06, "loss": 0.6667, "num_input_tokens_seen": 67780088, "step": 1081 }, { "epoch": 3.5973377703826954, "loss": 0.7208122611045837, "loss_ce": 0.00010916421160800382, "loss_iou": 0.302734375, "loss_num": 0.0228271484375, "loss_xval": 0.71875, "num_input_tokens_seen": 67780088, "step": 1081 }, { "epoch": 3.600665557404326, "grad_norm": 13.722559928894043, "learning_rate": 5e-06, "loss": 0.7529, "num_input_tokens_seen": 67842448, "step": 1082 }, { "epoch": 3.600665557404326, "loss": 0.877677857875824, "loss_ce": 0.001701317960396409, "loss_iou": 0.294921875, "loss_num": 0.057373046875, "loss_xval": 0.875, "num_input_tokens_seen": 67842448, "step": 1082 }, { "epoch": 3.6039933444259566, "grad_norm": 6.793117523193359, "learning_rate": 5e-06, "loss": 0.5181, "num_input_tokens_seen": 67903352, "step": 1083 }, { "epoch": 3.6039933444259566, "loss": 0.3806585967540741, "loss_ce": 0.00016542298544663936, "loss_iou": 0.08251953125, "loss_num": 0.04296875, "loss_xval": 0.380859375, "num_input_tokens_seen": 67903352, "step": 1083 }, { "epoch": 3.6073211314475873, "grad_norm": 12.197762489318848, "learning_rate": 5e-06, "loss": 0.6014, "num_input_tokens_seen": 67965112, "step": 1084 }, { "epoch": 3.6073211314475873, "loss": 0.6365228891372681, "loss_ce": 0.0007806817884556949, "loss_iou": 0.23828125, "loss_num": 0.03173828125, "loss_xval": 0.63671875, "num_input_tokens_seen": 67965112, "step": 1084 }, { "epoch": 3.610648918469218, "grad_norm": 10.732518196105957, "learning_rate": 5e-06, "loss": 0.5105, "num_input_tokens_seen": 68026472, "step": 1085 }, { "epoch": 3.610648918469218, "loss": 0.5152493715286255, "loss_ce": 0.00023474822228308767, "loss_iou": 0.125, "loss_num": 0.052978515625, "loss_xval": 0.515625, "num_input_tokens_seen": 68026472, "step": 1085 }, { "epoch": 3.6139767054908485, "grad_norm": 7.907350540161133, "learning_rate": 5e-06, "loss": 0.6326, "num_input_tokens_seen": 68089332, "step": 1086 }, { "epoch": 3.6139767054908485, "loss": 0.6826620697975159, "loss_ce": 0.0005331688444130123, "loss_iou": 0.2216796875, "loss_num": 0.0478515625, "loss_xval": 0.68359375, "num_input_tokens_seen": 68089332, "step": 1086 }, { "epoch": 3.617304492512479, "grad_norm": 10.400790214538574, "learning_rate": 5e-06, "loss": 0.5931, "num_input_tokens_seen": 68149524, "step": 1087 }, { "epoch": 3.617304492512479, "loss": 0.505582869052887, "loss_ce": 0.0002728076360654086, "loss_iou": 0.14453125, "loss_num": 0.043212890625, "loss_xval": 0.50390625, "num_input_tokens_seen": 68149524, "step": 1087 }, { "epoch": 3.6206322795341097, "grad_norm": 13.637054443359375, "learning_rate": 5e-06, "loss": 0.5593, "num_input_tokens_seen": 68211488, "step": 1088 }, { "epoch": 3.6206322795341097, "loss": 0.5451844334602356, "loss_ce": 1.8409613403491676e-05, "loss_iou": 0.2080078125, "loss_num": 0.0257568359375, "loss_xval": 0.546875, "num_input_tokens_seen": 68211488, "step": 1088 }, { "epoch": 3.6239600665557403, "grad_norm": 19.496362686157227, "learning_rate": 5e-06, "loss": 0.8044, "num_input_tokens_seen": 68274544, "step": 1089 }, { "epoch": 3.6239600665557403, "loss": 0.8190243244171143, "loss_ce": 0.00017665771883912385, "loss_iou": 0.283203125, "loss_num": 0.05029296875, "loss_xval": 0.8203125, "num_input_tokens_seen": 68274544, "step": 1089 }, { "epoch": 3.627287853577371, "grad_norm": 17.207883834838867, "learning_rate": 5e-06, "loss": 0.8453, "num_input_tokens_seen": 68337468, "step": 1090 }, { "epoch": 3.627287853577371, "loss": 0.8036708831787109, "loss_ce": 0.0005702917696908116, "loss_iou": 0.26953125, "loss_num": 0.052978515625, "loss_xval": 0.8046875, "num_input_tokens_seen": 68337468, "step": 1090 }, { "epoch": 3.6306156405990015, "grad_norm": 19.721860885620117, "learning_rate": 5e-06, "loss": 0.5562, "num_input_tokens_seen": 68400360, "step": 1091 }, { "epoch": 3.6306156405990015, "loss": 0.6015818119049072, "loss_ce": 1.9314107703394257e-05, "loss_iou": 0.2177734375, "loss_num": 0.033203125, "loss_xval": 0.6015625, "num_input_tokens_seen": 68400360, "step": 1091 }, { "epoch": 3.633943427620632, "grad_norm": 21.86146354675293, "learning_rate": 5e-06, "loss": 0.7548, "num_input_tokens_seen": 68463472, "step": 1092 }, { "epoch": 3.633943427620632, "loss": 0.5142448544502258, "loss_ce": 0.0004509001155383885, "loss_iou": 0.177734375, "loss_num": 0.031494140625, "loss_xval": 0.515625, "num_input_tokens_seen": 68463472, "step": 1092 }, { "epoch": 3.6372712146422628, "grad_norm": 13.99090576171875, "learning_rate": 5e-06, "loss": 0.7055, "num_input_tokens_seen": 68526604, "step": 1093 }, { "epoch": 3.6372712146422628, "loss": 0.7171334028244019, "loss_ce": 9.236542973667383e-05, "loss_iou": 0.26171875, "loss_num": 0.03857421875, "loss_xval": 0.71875, "num_input_tokens_seen": 68526604, "step": 1093 }, { "epoch": 3.6405990016638934, "grad_norm": 14.313507080078125, "learning_rate": 5e-06, "loss": 0.8862, "num_input_tokens_seen": 68589700, "step": 1094 }, { "epoch": 3.6405990016638934, "loss": 0.9934593439102173, "loss_ce": 0.0005393972969613969, "loss_iou": 0.357421875, "loss_num": 0.0556640625, "loss_xval": 0.9921875, "num_input_tokens_seen": 68589700, "step": 1094 }, { "epoch": 3.643926788685524, "grad_norm": 30.540050506591797, "learning_rate": 5e-06, "loss": 0.6676, "num_input_tokens_seen": 68651616, "step": 1095 }, { "epoch": 3.643926788685524, "loss": 0.7543338537216187, "loss_ce": 0.002380660967901349, "loss_iou": 0.296875, "loss_num": 0.03125, "loss_xval": 0.75, "num_input_tokens_seen": 68651616, "step": 1095 }, { "epoch": 3.6472545757071546, "grad_norm": 25.96578025817871, "learning_rate": 5e-06, "loss": 0.6844, "num_input_tokens_seen": 68714936, "step": 1096 }, { "epoch": 3.6472545757071546, "loss": 0.6945148706436157, "loss_ce": 5.6843869970180094e-05, "loss_iou": 0.259765625, "loss_num": 0.03515625, "loss_xval": 0.6953125, "num_input_tokens_seen": 68714936, "step": 1096 }, { "epoch": 3.6505823627287852, "grad_norm": 23.38460350036621, "learning_rate": 5e-06, "loss": 0.8717, "num_input_tokens_seen": 68777808, "step": 1097 }, { "epoch": 3.6505823627287852, "loss": 0.8712129592895508, "loss_ce": 0.0006074883858673275, "loss_iou": 0.251953125, "loss_num": 0.07275390625, "loss_xval": 0.87109375, "num_input_tokens_seen": 68777808, "step": 1097 }, { "epoch": 3.653910149750416, "grad_norm": 27.162778854370117, "learning_rate": 5e-06, "loss": 0.9425, "num_input_tokens_seen": 68840680, "step": 1098 }, { "epoch": 3.653910149750416, "loss": 1.1504534482955933, "loss_ce": 0.00030695440364070237, "loss_iou": 0.392578125, "loss_num": 0.0732421875, "loss_xval": 1.1484375, "num_input_tokens_seen": 68840680, "step": 1098 }, { "epoch": 3.6572379367720464, "grad_norm": 30.578449249267578, "learning_rate": 5e-06, "loss": 0.7774, "num_input_tokens_seen": 68903592, "step": 1099 }, { "epoch": 3.6572379367720464, "loss": 0.7029037475585938, "loss_ce": 2.2924290533410385e-05, "loss_iou": 0.1865234375, "loss_num": 0.06591796875, "loss_xval": 0.703125, "num_input_tokens_seen": 68903592, "step": 1099 }, { "epoch": 3.660565723793677, "grad_norm": 26.96510887145996, "learning_rate": 5e-06, "loss": 0.8418, "num_input_tokens_seen": 68964652, "step": 1100 }, { "epoch": 3.660565723793677, "loss": 0.8413865566253662, "loss_ce": 7.792103860992938e-05, "loss_iou": 0.318359375, "loss_num": 0.040771484375, "loss_xval": 0.83984375, "num_input_tokens_seen": 68964652, "step": 1100 }, { "epoch": 3.6638935108153077, "grad_norm": 12.595534324645996, "learning_rate": 5e-06, "loss": 0.6941, "num_input_tokens_seen": 69027888, "step": 1101 }, { "epoch": 3.6638935108153077, "loss": 0.9148514866828918, "loss_ce": 0.0005448164301924407, "loss_iou": 0.3125, "loss_num": 0.05810546875, "loss_xval": 0.9140625, "num_input_tokens_seen": 69027888, "step": 1101 }, { "epoch": 3.6672212978369383, "grad_norm": 14.556594848632812, "learning_rate": 5e-06, "loss": 0.6295, "num_input_tokens_seen": 69089104, "step": 1102 }, { "epoch": 3.6672212978369383, "loss": 0.7465066909790039, "loss_ce": 0.0006570467376150191, "loss_iou": 0.234375, "loss_num": 0.055419921875, "loss_xval": 0.74609375, "num_input_tokens_seen": 69089104, "step": 1102 }, { "epoch": 3.670549084858569, "grad_norm": 18.188962936401367, "learning_rate": 5e-06, "loss": 0.6833, "num_input_tokens_seen": 69151656, "step": 1103 }, { "epoch": 3.670549084858569, "loss": 0.7938522696495056, "loss_ce": 0.0001511187438154593, "loss_iou": 0.267578125, "loss_num": 0.051513671875, "loss_xval": 0.79296875, "num_input_tokens_seen": 69151656, "step": 1103 }, { "epoch": 3.6738768718801995, "grad_norm": 19.146791458129883, "learning_rate": 5e-06, "loss": 0.8265, "num_input_tokens_seen": 69215964, "step": 1104 }, { "epoch": 3.6738768718801995, "loss": 0.9214905500411987, "loss_ce": 0.00010388476948719472, "loss_iou": 0.376953125, "loss_num": 0.033935546875, "loss_xval": 0.921875, "num_input_tokens_seen": 69215964, "step": 1104 }, { "epoch": 3.67720465890183, "grad_norm": 10.40029239654541, "learning_rate": 5e-06, "loss": 0.8867, "num_input_tokens_seen": 69277632, "step": 1105 }, { "epoch": 3.67720465890183, "loss": 1.0277212858200073, "loss_ce": 0.00037753285141661763, "loss_iou": 0.30078125, "loss_num": 0.0849609375, "loss_xval": 1.03125, "num_input_tokens_seen": 69277632, "step": 1105 }, { "epoch": 3.6805324459234607, "grad_norm": 11.056280136108398, "learning_rate": 5e-06, "loss": 0.5556, "num_input_tokens_seen": 69338932, "step": 1106 }, { "epoch": 3.6805324459234607, "loss": 0.41542482376098633, "loss_ce": 1.9551252989913337e-05, "loss_iou": 0.10888671875, "loss_num": 0.03955078125, "loss_xval": 0.416015625, "num_input_tokens_seen": 69338932, "step": 1106 }, { "epoch": 3.6838602329450914, "grad_norm": 10.586615562438965, "learning_rate": 5e-06, "loss": 0.5498, "num_input_tokens_seen": 69400704, "step": 1107 }, { "epoch": 3.6838602329450914, "loss": 0.3909175992012024, "loss_ce": 4.845410876441747e-05, "loss_iou": 0.0654296875, "loss_num": 0.052001953125, "loss_xval": 0.390625, "num_input_tokens_seen": 69400704, "step": 1107 }, { "epoch": 3.687188019966722, "grad_norm": 15.897762298583984, "learning_rate": 5e-06, "loss": 0.7618, "num_input_tokens_seen": 69463240, "step": 1108 }, { "epoch": 3.687188019966722, "loss": 0.6331614851951599, "loss_ce": 0.00025742477737367153, "loss_iou": 0.208984375, "loss_num": 0.043212890625, "loss_xval": 0.6328125, "num_input_tokens_seen": 69463240, "step": 1108 }, { "epoch": 3.690515806988353, "grad_norm": 15.982316017150879, "learning_rate": 5e-06, "loss": 0.8545, "num_input_tokens_seen": 69527000, "step": 1109 }, { "epoch": 3.690515806988353, "loss": 0.8708171844482422, "loss_ce": 0.00021170845138840377, "loss_iou": 0.314453125, "loss_num": 0.04833984375, "loss_xval": 0.87109375, "num_input_tokens_seen": 69527000, "step": 1109 }, { "epoch": 3.6938435940099836, "grad_norm": 16.388246536254883, "learning_rate": 5e-06, "loss": 0.8336, "num_input_tokens_seen": 69587628, "step": 1110 }, { "epoch": 3.6938435940099836, "loss": 0.7268214225769043, "loss_ce": 1.4772957911191043e-05, "loss_iou": 0.25390625, "loss_num": 0.0439453125, "loss_xval": 0.7265625, "num_input_tokens_seen": 69587628, "step": 1110 }, { "epoch": 3.6971713810316142, "grad_norm": 16.924457550048828, "learning_rate": 5e-06, "loss": 0.7034, "num_input_tokens_seen": 69651040, "step": 1111 }, { "epoch": 3.6971713810316142, "loss": 0.9893540740013123, "loss_ce": 0.0010728809284046292, "loss_iou": 0.376953125, "loss_num": 0.046875, "loss_xval": 0.98828125, "num_input_tokens_seen": 69651040, "step": 1111 }, { "epoch": 3.700499168053245, "grad_norm": 14.321928024291992, "learning_rate": 5e-06, "loss": 0.7839, "num_input_tokens_seen": 69714752, "step": 1112 }, { "epoch": 3.700499168053245, "loss": 0.7039906978607178, "loss_ce": 0.00013329835201147944, "loss_iou": 0.23046875, "loss_num": 0.048583984375, "loss_xval": 0.703125, "num_input_tokens_seen": 69714752, "step": 1112 }, { "epoch": 3.7038269550748755, "grad_norm": 17.317468643188477, "learning_rate": 5e-06, "loss": 0.6627, "num_input_tokens_seen": 69776936, "step": 1113 }, { "epoch": 3.7038269550748755, "loss": 0.7631000876426697, "loss_ce": 0.0011372218141332269, "loss_iou": 0.228515625, "loss_num": 0.061279296875, "loss_xval": 0.76171875, "num_input_tokens_seen": 69776936, "step": 1113 }, { "epoch": 3.707154742096506, "grad_norm": 19.601694107055664, "learning_rate": 5e-06, "loss": 0.8247, "num_input_tokens_seen": 69838948, "step": 1114 }, { "epoch": 3.707154742096506, "loss": 1.0055017471313477, "loss_ce": 0.00061885203467682, "loss_iou": 0.357421875, "loss_num": 0.05810546875, "loss_xval": 1.0078125, "num_input_tokens_seen": 69838948, "step": 1114 }, { "epoch": 3.7104825291181367, "grad_norm": 24.8586368560791, "learning_rate": 5e-06, "loss": 0.8095, "num_input_tokens_seen": 69901716, "step": 1115 }, { "epoch": 3.7104825291181367, "loss": 0.7781587839126587, "loss_ce": 0.0008150676148943603, "loss_iou": 0.2333984375, "loss_num": 0.06201171875, "loss_xval": 0.77734375, "num_input_tokens_seen": 69901716, "step": 1115 }, { "epoch": 3.7138103161397673, "grad_norm": 24.516605377197266, "learning_rate": 5e-06, "loss": 0.5332, "num_input_tokens_seen": 69964648, "step": 1116 }, { "epoch": 3.7138103161397673, "loss": 0.4899260997772217, "loss_ce": 0.00014946938608773053, "loss_iou": 0.1650390625, "loss_num": 0.031982421875, "loss_xval": 0.490234375, "num_input_tokens_seen": 69964648, "step": 1116 }, { "epoch": 3.717138103161398, "grad_norm": 7.976213455200195, "learning_rate": 5e-06, "loss": 0.687, "num_input_tokens_seen": 70027600, "step": 1117 }, { "epoch": 3.717138103161398, "loss": 0.7671523690223694, "loss_ce": 0.0006728653679601848, "loss_iou": 0.28515625, "loss_num": 0.0390625, "loss_xval": 0.765625, "num_input_tokens_seen": 70027600, "step": 1117 }, { "epoch": 3.7204658901830285, "grad_norm": 12.2144136428833, "learning_rate": 5e-06, "loss": 0.7044, "num_input_tokens_seen": 70090940, "step": 1118 }, { "epoch": 3.7204658901830285, "loss": 0.820762574672699, "loss_ce": 0.00045003159902989864, "loss_iou": 0.314453125, "loss_num": 0.0380859375, "loss_xval": 0.8203125, "num_input_tokens_seen": 70090940, "step": 1118 }, { "epoch": 3.723793677204659, "grad_norm": 16.47080421447754, "learning_rate": 5e-06, "loss": 1.0177, "num_input_tokens_seen": 70154348, "step": 1119 }, { "epoch": 3.723793677204659, "loss": 0.7807816863059998, "loss_ce": 1.99810692720348e-05, "loss_iou": 0.27734375, "loss_num": 0.045654296875, "loss_xval": 0.78125, "num_input_tokens_seen": 70154348, "step": 1119 }, { "epoch": 3.7271214642262898, "grad_norm": 17.003631591796875, "learning_rate": 5e-06, "loss": 0.7932, "num_input_tokens_seen": 70218336, "step": 1120 }, { "epoch": 3.7271214642262898, "loss": 0.5653332471847534, "loss_ce": 0.0012462872546166182, "loss_iou": 0.1904296875, "loss_num": 0.03662109375, "loss_xval": 0.5625, "num_input_tokens_seen": 70218336, "step": 1120 }, { "epoch": 3.7304492512479204, "grad_norm": 21.877683639526367, "learning_rate": 5e-06, "loss": 0.7007, "num_input_tokens_seen": 70280388, "step": 1121 }, { "epoch": 3.7304492512479204, "loss": 0.8100776672363281, "loss_ce": 0.001972246915102005, "loss_iou": 0.28125, "loss_num": 0.049072265625, "loss_xval": 0.80859375, "num_input_tokens_seen": 70280388, "step": 1121 }, { "epoch": 3.733777038269551, "grad_norm": 37.3043212890625, "learning_rate": 5e-06, "loss": 0.8237, "num_input_tokens_seen": 70342540, "step": 1122 }, { "epoch": 3.733777038269551, "loss": 0.7199664115905762, "loss_ce": 0.0003009082574862987, "loss_iou": 0.259765625, "loss_num": 0.040283203125, "loss_xval": 0.71875, "num_input_tokens_seen": 70342540, "step": 1122 }, { "epoch": 3.7371048252911816, "grad_norm": 33.500179290771484, "learning_rate": 5e-06, "loss": 0.8162, "num_input_tokens_seen": 70406484, "step": 1123 }, { "epoch": 3.7371048252911816, "loss": 0.982054591178894, "loss_ce": 0.003416831837967038, "loss_iou": 0.3046875, "loss_num": 0.07373046875, "loss_xval": 0.98046875, "num_input_tokens_seen": 70406484, "step": 1123 }, { "epoch": 3.740432612312812, "grad_norm": 22.78214454650879, "learning_rate": 5e-06, "loss": 0.6243, "num_input_tokens_seen": 70469180, "step": 1124 }, { "epoch": 3.740432612312812, "loss": 0.569847583770752, "loss_ce": 2.3391572540276684e-05, "loss_iou": 0.2119140625, "loss_num": 0.0291748046875, "loss_xval": 0.5703125, "num_input_tokens_seen": 70469180, "step": 1124 }, { "epoch": 3.743760399334443, "grad_norm": 11.87626838684082, "learning_rate": 5e-06, "loss": 0.6247, "num_input_tokens_seen": 70532816, "step": 1125 }, { "epoch": 3.743760399334443, "loss": 0.6783220171928406, "loss_ce": 0.00034350171335972846, "loss_iou": 0.255859375, "loss_num": 0.033203125, "loss_xval": 0.6796875, "num_input_tokens_seen": 70532816, "step": 1125 }, { "epoch": 3.7470881863560734, "grad_norm": 10.649834632873535, "learning_rate": 5e-06, "loss": 0.7844, "num_input_tokens_seen": 70594144, "step": 1126 }, { "epoch": 3.7470881863560734, "loss": 0.8571797609329224, "loss_ce": 0.0007344760233536363, "loss_iou": 0.28515625, "loss_num": 0.057373046875, "loss_xval": 0.85546875, "num_input_tokens_seen": 70594144, "step": 1126 }, { "epoch": 3.750415973377704, "grad_norm": 10.80798625946045, "learning_rate": 5e-06, "loss": 0.7674, "num_input_tokens_seen": 70656980, "step": 1127 }, { "epoch": 3.750415973377704, "loss": 0.7541730999946594, "loss_ce": 2.2729434931534342e-05, "loss_iou": 0.259765625, "loss_num": 0.046875, "loss_xval": 0.75390625, "num_input_tokens_seen": 70656980, "step": 1127 }, { "epoch": 3.7537437603993347, "grad_norm": 19.034072875976562, "learning_rate": 5e-06, "loss": 0.8665, "num_input_tokens_seen": 70719136, "step": 1128 }, { "epoch": 3.7537437603993347, "loss": 0.6461155414581299, "loss_ce": 0.0006077055586501956, "loss_iou": 0.2021484375, "loss_num": 0.04833984375, "loss_xval": 0.64453125, "num_input_tokens_seen": 70719136, "step": 1128 }, { "epoch": 3.7570715474209653, "grad_norm": 9.131019592285156, "learning_rate": 5e-06, "loss": 0.7573, "num_input_tokens_seen": 70780740, "step": 1129 }, { "epoch": 3.7570715474209653, "loss": 0.5673661231994629, "loss_ce": 0.0004715931718237698, "loss_iou": 0.1845703125, "loss_num": 0.039306640625, "loss_xval": 0.56640625, "num_input_tokens_seen": 70780740, "step": 1129 }, { "epoch": 3.760399334442596, "grad_norm": 28.324533462524414, "learning_rate": 5e-06, "loss": 0.8675, "num_input_tokens_seen": 70844248, "step": 1130 }, { "epoch": 3.760399334442596, "loss": 0.6069740056991577, "loss_ce": 4.039845953229815e-05, "loss_iou": 0.2099609375, "loss_num": 0.037353515625, "loss_xval": 0.60546875, "num_input_tokens_seen": 70844248, "step": 1130 }, { "epoch": 3.7637271214642265, "grad_norm": 60.507694244384766, "learning_rate": 5e-06, "loss": 0.7213, "num_input_tokens_seen": 70908060, "step": 1131 }, { "epoch": 3.7637271214642265, "loss": 0.4517989158630371, "loss_ce": 1.671621430432424e-05, "loss_iou": 0.119140625, "loss_num": 0.04296875, "loss_xval": 0.451171875, "num_input_tokens_seen": 70908060, "step": 1131 }, { "epoch": 3.767054908485857, "grad_norm": 31.26624870300293, "learning_rate": 5e-06, "loss": 0.8275, "num_input_tokens_seen": 70970564, "step": 1132 }, { "epoch": 3.767054908485857, "loss": 0.7981075644493103, "loss_ce": 1.18794669106137e-05, "loss_iou": 0.271484375, "loss_num": 0.05126953125, "loss_xval": 0.796875, "num_input_tokens_seen": 70970564, "step": 1132 }, { "epoch": 3.7703826955074877, "grad_norm": 15.183494567871094, "learning_rate": 5e-06, "loss": 0.8203, "num_input_tokens_seen": 71031756, "step": 1133 }, { "epoch": 3.7703826955074877, "loss": 1.0462652444839478, "loss_ce": 0.0003667599812615663, "loss_iou": 0.337890625, "loss_num": 0.07373046875, "loss_xval": 1.046875, "num_input_tokens_seen": 71031756, "step": 1133 }, { "epoch": 3.7737104825291183, "grad_norm": 7.543668270111084, "learning_rate": 5e-06, "loss": 0.4053, "num_input_tokens_seen": 71093600, "step": 1134 }, { "epoch": 3.7737104825291183, "loss": 0.4197523295879364, "loss_ce": 0.0011732416460290551, "loss_iou": 0.1689453125, "loss_num": 0.0162353515625, "loss_xval": 0.41796875, "num_input_tokens_seen": 71093600, "step": 1134 }, { "epoch": 3.777038269550749, "grad_norm": 8.871347427368164, "learning_rate": 5e-06, "loss": 0.559, "num_input_tokens_seen": 71155160, "step": 1135 }, { "epoch": 3.777038269550749, "loss": 0.5367743372917175, "loss_ce": 0.00015322788385674357, "loss_iou": 0.1337890625, "loss_num": 0.0537109375, "loss_xval": 0.53515625, "num_input_tokens_seen": 71155160, "step": 1135 }, { "epoch": 3.7803660565723796, "grad_norm": 14.673795700073242, "learning_rate": 5e-06, "loss": 0.8169, "num_input_tokens_seen": 71218140, "step": 1136 }, { "epoch": 3.7803660565723796, "loss": 0.9330596923828125, "loss_ce": 0.00141904316842556, "loss_iou": 0.32421875, "loss_num": 0.05712890625, "loss_xval": 0.9296875, "num_input_tokens_seen": 71218140, "step": 1136 }, { "epoch": 3.78369384359401, "grad_norm": 18.478084564208984, "learning_rate": 5e-06, "loss": 0.7599, "num_input_tokens_seen": 71281304, "step": 1137 }, { "epoch": 3.78369384359401, "loss": 0.641684353351593, "loss_ce": 0.0005710768164135516, "loss_iou": 0.2451171875, "loss_num": 0.0302734375, "loss_xval": 0.640625, "num_input_tokens_seen": 71281304, "step": 1137 }, { "epoch": 3.787021630615641, "grad_norm": 19.889446258544922, "learning_rate": 5e-06, "loss": 0.7246, "num_input_tokens_seen": 71343856, "step": 1138 }, { "epoch": 3.787021630615641, "loss": 1.0464940071105957, "loss_ce": 0.0010838445741683245, "loss_iou": 0.36328125, "loss_num": 0.06396484375, "loss_xval": 1.046875, "num_input_tokens_seen": 71343856, "step": 1138 }, { "epoch": 3.7903494176372714, "grad_norm": 22.445941925048828, "learning_rate": 5e-06, "loss": 0.7391, "num_input_tokens_seen": 71407460, "step": 1139 }, { "epoch": 3.7903494176372714, "loss": 0.5872915387153625, "loss_ce": 0.0004995546769350767, "loss_iou": 0.158203125, "loss_num": 0.05419921875, "loss_xval": 0.5859375, "num_input_tokens_seen": 71407460, "step": 1139 }, { "epoch": 3.793677204658902, "grad_norm": 19.468961715698242, "learning_rate": 5e-06, "loss": 0.8335, "num_input_tokens_seen": 71469920, "step": 1140 }, { "epoch": 3.793677204658902, "loss": 0.7757041454315186, "loss_ce": 0.000801790738478303, "loss_iou": 0.26953125, "loss_num": 0.04736328125, "loss_xval": 0.7734375, "num_input_tokens_seen": 71469920, "step": 1140 }, { "epoch": 3.7970049916805326, "grad_norm": 20.548389434814453, "learning_rate": 5e-06, "loss": 0.6643, "num_input_tokens_seen": 71533732, "step": 1141 }, { "epoch": 3.7970049916805326, "loss": 0.8974963426589966, "loss_ce": 0.0004626529407687485, "loss_iou": 0.337890625, "loss_num": 0.044189453125, "loss_xval": 0.8984375, "num_input_tokens_seen": 71533732, "step": 1141 }, { "epoch": 3.8003327787021632, "grad_norm": 16.472898483276367, "learning_rate": 5e-06, "loss": 0.9413, "num_input_tokens_seen": 71596956, "step": 1142 }, { "epoch": 3.8003327787021632, "loss": 0.8280500173568726, "loss_ce": 0.0021223120857030153, "loss_iou": 0.28515625, "loss_num": 0.051513671875, "loss_xval": 0.82421875, "num_input_tokens_seen": 71596956, "step": 1142 }, { "epoch": 3.803660565723794, "grad_norm": 21.399391174316406, "learning_rate": 5e-06, "loss": 0.5512, "num_input_tokens_seen": 71658996, "step": 1143 }, { "epoch": 3.803660565723794, "loss": 0.4864642024040222, "loss_ce": 0.0005022773984819651, "loss_iou": 0.1376953125, "loss_num": 0.0419921875, "loss_xval": 0.486328125, "num_input_tokens_seen": 71658996, "step": 1143 }, { "epoch": 3.8069883527454245, "grad_norm": 46.48324966430664, "learning_rate": 5e-06, "loss": 0.919, "num_input_tokens_seen": 71723260, "step": 1144 }, { "epoch": 3.8069883527454245, "loss": 0.8016548156738281, "loss_ce": 0.0006294648628681898, "loss_iou": 0.263671875, "loss_num": 0.05517578125, "loss_xval": 0.80078125, "num_input_tokens_seen": 71723260, "step": 1144 }, { "epoch": 3.810316139767055, "grad_norm": 10.930686950683594, "learning_rate": 5e-06, "loss": 0.5652, "num_input_tokens_seen": 71783848, "step": 1145 }, { "epoch": 3.810316139767055, "loss": 0.5992456674575806, "loss_ce": 6.35108444839716e-05, "loss_iou": 0.1591796875, "loss_num": 0.056640625, "loss_xval": 0.59765625, "num_input_tokens_seen": 71783848, "step": 1145 }, { "epoch": 3.8136439267886857, "grad_norm": 10.352221488952637, "learning_rate": 5e-06, "loss": 0.502, "num_input_tokens_seen": 71845680, "step": 1146 }, { "epoch": 3.8136439267886857, "loss": 0.5386055707931519, "loss_ce": 8.46451712277485e-06, "loss_iou": 0.189453125, "loss_num": 0.031982421875, "loss_xval": 0.5390625, "num_input_tokens_seen": 71845680, "step": 1146 }, { "epoch": 3.8169717138103163, "grad_norm": 22.350732803344727, "learning_rate": 5e-06, "loss": 0.8697, "num_input_tokens_seen": 71909288, "step": 1147 }, { "epoch": 3.8169717138103163, "loss": 0.5688712000846863, "loss_ce": 2.353949639655184e-05, "loss_iou": 0.208984375, "loss_num": 0.0302734375, "loss_xval": 0.5703125, "num_input_tokens_seen": 71909288, "step": 1147 }, { "epoch": 3.820299500831947, "grad_norm": 14.251409530639648, "learning_rate": 5e-06, "loss": 0.8319, "num_input_tokens_seen": 71971296, "step": 1148 }, { "epoch": 3.820299500831947, "loss": 1.011674165725708, "loss_ce": 0.00026065230485983193, "loss_iou": 0.345703125, "loss_num": 0.06396484375, "loss_xval": 1.0078125, "num_input_tokens_seen": 71971296, "step": 1148 }, { "epoch": 3.8236272878535775, "grad_norm": 14.31558609008789, "learning_rate": 5e-06, "loss": 0.8037, "num_input_tokens_seen": 72034244, "step": 1149 }, { "epoch": 3.8236272878535775, "loss": 0.6927176117897034, "loss_ce": 0.00015172874554991722, "loss_iou": 0.2333984375, "loss_num": 0.04541015625, "loss_xval": 0.69140625, "num_input_tokens_seen": 72034244, "step": 1149 }, { "epoch": 3.826955074875208, "grad_norm": 10.787935256958008, "learning_rate": 5e-06, "loss": 0.4841, "num_input_tokens_seen": 72097340, "step": 1150 }, { "epoch": 3.826955074875208, "loss": 0.5313506126403809, "loss_ce": 0.0007719796267338097, "loss_iou": 0.181640625, "loss_num": 0.03369140625, "loss_xval": 0.53125, "num_input_tokens_seen": 72097340, "step": 1150 }, { "epoch": 3.8302828618968388, "grad_norm": 5.37345027923584, "learning_rate": 5e-06, "loss": 0.474, "num_input_tokens_seen": 72158732, "step": 1151 }, { "epoch": 3.8302828618968388, "loss": 0.5978557467460632, "loss_ce": 0.00019951070134993643, "loss_iou": 0.1416015625, "loss_num": 0.06298828125, "loss_xval": 0.59765625, "num_input_tokens_seen": 72158732, "step": 1151 }, { "epoch": 3.8336106489184694, "grad_norm": 9.821433067321777, "learning_rate": 5e-06, "loss": 1.055, "num_input_tokens_seen": 72220200, "step": 1152 }, { "epoch": 3.8336106489184694, "loss": 1.0266846418380737, "loss_ce": 0.0005615626578219235, "loss_iou": 0.36328125, "loss_num": 0.059814453125, "loss_xval": 1.0234375, "num_input_tokens_seen": 72220200, "step": 1152 }, { "epoch": 3.8369384359401, "grad_norm": 9.921424865722656, "learning_rate": 5e-06, "loss": 0.5339, "num_input_tokens_seen": 72282260, "step": 1153 }, { "epoch": 3.8369384359401, "loss": 0.4890683889389038, "loss_ce": 0.0007871561101637781, "loss_iou": 0.1650390625, "loss_num": 0.03173828125, "loss_xval": 0.48828125, "num_input_tokens_seen": 72282260, "step": 1153 }, { "epoch": 3.8402662229617306, "grad_norm": 14.469644546508789, "learning_rate": 5e-06, "loss": 0.6844, "num_input_tokens_seen": 72345424, "step": 1154 }, { "epoch": 3.8402662229617306, "loss": 0.6094980239868164, "loss_ce": 0.0006113078561611474, "loss_iou": 0.232421875, "loss_num": 0.0289306640625, "loss_xval": 0.609375, "num_input_tokens_seen": 72345424, "step": 1154 }, { "epoch": 3.843594009983361, "grad_norm": 30.00448226928711, "learning_rate": 5e-06, "loss": 0.929, "num_input_tokens_seen": 72408536, "step": 1155 }, { "epoch": 3.843594009983361, "loss": 0.7703616619110107, "loss_ce": 0.0010744906030595303, "loss_iou": 0.259765625, "loss_num": 0.049560546875, "loss_xval": 0.76953125, "num_input_tokens_seen": 72408536, "step": 1155 }, { "epoch": 3.846921797004992, "grad_norm": 25.733505249023438, "learning_rate": 5e-06, "loss": 0.7221, "num_input_tokens_seen": 72470900, "step": 1156 }, { "epoch": 3.846921797004992, "loss": 0.7588719129562378, "loss_ce": 0.0005711028934456408, "loss_iou": 0.2138671875, "loss_num": 0.06591796875, "loss_xval": 0.7578125, "num_input_tokens_seen": 72470900, "step": 1156 }, { "epoch": 3.8502495840266224, "grad_norm": 21.96674346923828, "learning_rate": 5e-06, "loss": 0.7692, "num_input_tokens_seen": 72533296, "step": 1157 }, { "epoch": 3.8502495840266224, "loss": 0.9065471887588501, "loss_ce": 0.0005413006874732673, "loss_iou": 0.28515625, "loss_num": 0.06787109375, "loss_xval": 0.90625, "num_input_tokens_seen": 72533296, "step": 1157 }, { "epoch": 3.853577371048253, "grad_norm": 11.02868366241455, "learning_rate": 5e-06, "loss": 0.4585, "num_input_tokens_seen": 72594976, "step": 1158 }, { "epoch": 3.853577371048253, "loss": 0.4103836715221405, "loss_ce": 0.0008377685444429517, "loss_iou": 0.1142578125, "loss_num": 0.036376953125, "loss_xval": 0.41015625, "num_input_tokens_seen": 72594976, "step": 1158 }, { "epoch": 3.8569051580698837, "grad_norm": 8.885700225830078, "learning_rate": 5e-06, "loss": 0.7145, "num_input_tokens_seen": 72657744, "step": 1159 }, { "epoch": 3.8569051580698837, "loss": 0.6634550094604492, "loss_ce": 0.0004911944270133972, "loss_iou": 0.1748046875, "loss_num": 0.06298828125, "loss_xval": 0.6640625, "num_input_tokens_seen": 72657744, "step": 1159 }, { "epoch": 3.8602329450915143, "grad_norm": 15.352354049682617, "learning_rate": 5e-06, "loss": 0.8367, "num_input_tokens_seen": 72720896, "step": 1160 }, { "epoch": 3.8602329450915143, "loss": 0.7889204621315002, "loss_ce": 0.0008344987290911376, "loss_iou": 0.244140625, "loss_num": 0.059814453125, "loss_xval": 0.7890625, "num_input_tokens_seen": 72720896, "step": 1160 }, { "epoch": 3.863560732113145, "grad_norm": 10.166818618774414, "learning_rate": 5e-06, "loss": 0.6161, "num_input_tokens_seen": 72782896, "step": 1161 }, { "epoch": 3.863560732113145, "loss": 0.6218513250350952, "loss_ce": 2.5179688236676157e-05, "loss_iou": 0.1962890625, "loss_num": 0.0458984375, "loss_xval": 0.62109375, "num_input_tokens_seen": 72782896, "step": 1161 }, { "epoch": 3.8668885191347755, "grad_norm": 14.400707244873047, "learning_rate": 5e-06, "loss": 0.5438, "num_input_tokens_seen": 72844244, "step": 1162 }, { "epoch": 3.8668885191347755, "loss": 0.5799353122711182, "loss_ce": 0.0016271895729005337, "loss_iou": 0.2177734375, "loss_num": 0.0286865234375, "loss_xval": 0.578125, "num_input_tokens_seen": 72844244, "step": 1162 }, { "epoch": 3.870216306156406, "grad_norm": 9.334291458129883, "learning_rate": 5e-06, "loss": 0.7988, "num_input_tokens_seen": 72907312, "step": 1163 }, { "epoch": 3.870216306156406, "loss": 0.9044045209884644, "loss_ce": 0.0003517647855915129, "loss_iou": 0.36328125, "loss_num": 0.0361328125, "loss_xval": 0.90234375, "num_input_tokens_seen": 72907312, "step": 1163 }, { "epoch": 3.8735440931780367, "grad_norm": 10.470926284790039, "learning_rate": 5e-06, "loss": 0.6329, "num_input_tokens_seen": 72967600, "step": 1164 }, { "epoch": 3.8735440931780367, "loss": 0.6138835549354553, "loss_ce": 0.0006023072055540979, "loss_iou": 0.2138671875, "loss_num": 0.037109375, "loss_xval": 0.61328125, "num_input_tokens_seen": 72967600, "step": 1164 }, { "epoch": 3.8768718801996673, "grad_norm": 8.372231483459473, "learning_rate": 5e-06, "loss": 0.6501, "num_input_tokens_seen": 73029296, "step": 1165 }, { "epoch": 3.8768718801996673, "loss": 0.4854399263858795, "loss_ce": 0.000439322815509513, "loss_iou": 0.125, "loss_num": 0.046875, "loss_xval": 0.484375, "num_input_tokens_seen": 73029296, "step": 1165 }, { "epoch": 3.880199667221298, "grad_norm": 13.88845157623291, "learning_rate": 5e-06, "loss": 0.6841, "num_input_tokens_seen": 73092408, "step": 1166 }, { "epoch": 3.880199667221298, "loss": 0.5785520076751709, "loss_ce": 0.0009152430575340986, "loss_iou": 0.16796875, "loss_num": 0.04833984375, "loss_xval": 0.578125, "num_input_tokens_seen": 73092408, "step": 1166 }, { "epoch": 3.8835274542429286, "grad_norm": 16.01328468322754, "learning_rate": 5e-06, "loss": 0.8116, "num_input_tokens_seen": 73156212, "step": 1167 }, { "epoch": 3.8835274542429286, "loss": 0.8285574913024902, "loss_ce": 6.626629328820854e-05, "loss_iou": 0.306640625, "loss_num": 0.043212890625, "loss_xval": 0.828125, "num_input_tokens_seen": 73156212, "step": 1167 }, { "epoch": 3.886855241264559, "grad_norm": 24.061256408691406, "learning_rate": 5e-06, "loss": 0.7467, "num_input_tokens_seen": 73220336, "step": 1168 }, { "epoch": 3.886855241264559, "loss": 0.623063325881958, "loss_ce": 0.0005047806189395487, "loss_iou": 0.26171875, "loss_num": 0.020263671875, "loss_xval": 0.62109375, "num_input_tokens_seen": 73220336, "step": 1168 }, { "epoch": 3.89018302828619, "grad_norm": 24.542905807495117, "learning_rate": 5e-06, "loss": 0.8473, "num_input_tokens_seen": 73282624, "step": 1169 }, { "epoch": 3.89018302828619, "loss": 0.9383523464202881, "loss_ce": 0.0006082289037294686, "loss_iou": 0.34765625, "loss_num": 0.04833984375, "loss_xval": 0.9375, "num_input_tokens_seen": 73282624, "step": 1169 }, { "epoch": 3.8935108153078204, "grad_norm": 22.716054916381836, "learning_rate": 5e-06, "loss": 0.9282, "num_input_tokens_seen": 73346652, "step": 1170 }, { "epoch": 3.8935108153078204, "loss": 1.0615354776382446, "loss_ce": 1.2018746929243207e-05, "loss_iou": 0.384765625, "loss_num": 0.05810546875, "loss_xval": 1.0625, "num_input_tokens_seen": 73346652, "step": 1170 }, { "epoch": 3.896838602329451, "grad_norm": 16.922956466674805, "learning_rate": 5e-06, "loss": 0.4869, "num_input_tokens_seen": 73408296, "step": 1171 }, { "epoch": 3.896838602329451, "loss": 0.27920499444007874, "loss_ce": 0.0003353637584950775, "loss_iou": 0.0498046875, "loss_num": 0.035888671875, "loss_xval": 0.279296875, "num_input_tokens_seen": 73408296, "step": 1171 }, { "epoch": 3.9001663893510816, "grad_norm": 20.513999938964844, "learning_rate": 5e-06, "loss": 0.779, "num_input_tokens_seen": 73471588, "step": 1172 }, { "epoch": 3.9001663893510816, "loss": 0.7383794784545898, "loss_ce": 9.824803419178352e-05, "loss_iou": 0.2578125, "loss_num": 0.044189453125, "loss_xval": 0.73828125, "num_input_tokens_seen": 73471588, "step": 1172 }, { "epoch": 3.9034941763727122, "grad_norm": 22.318389892578125, "learning_rate": 5e-06, "loss": 0.8148, "num_input_tokens_seen": 73534576, "step": 1173 }, { "epoch": 3.9034941763727122, "loss": 0.6879972815513611, "loss_ce": 8.99755468708463e-06, "loss_iou": 0.271484375, "loss_num": 0.029052734375, "loss_xval": 0.6875, "num_input_tokens_seen": 73534576, "step": 1173 }, { "epoch": 3.906821963394343, "grad_norm": 14.50841236114502, "learning_rate": 5e-06, "loss": 0.4606, "num_input_tokens_seen": 73595300, "step": 1174 }, { "epoch": 3.906821963394343, "loss": 0.6296467185020447, "loss_ce": 8.019393135327846e-06, "loss_iou": 0.197265625, "loss_num": 0.047119140625, "loss_xval": 0.62890625, "num_input_tokens_seen": 73595300, "step": 1174 }, { "epoch": 3.9101497504159735, "grad_norm": 22.01475715637207, "learning_rate": 5e-06, "loss": 0.9739, "num_input_tokens_seen": 73658032, "step": 1175 }, { "epoch": 3.9101497504159735, "loss": 0.9515210390090942, "loss_ce": 0.0005932895583100617, "loss_iou": 0.34765625, "loss_num": 0.05078125, "loss_xval": 0.94921875, "num_input_tokens_seen": 73658032, "step": 1175 }, { "epoch": 3.913477537437604, "grad_norm": 15.7901611328125, "learning_rate": 5e-06, "loss": 0.7554, "num_input_tokens_seen": 73720572, "step": 1176 }, { "epoch": 3.913477537437604, "loss": 0.7041723132133484, "loss_ce": 0.00028435932472348213, "loss_iou": 0.263671875, "loss_num": 0.035400390625, "loss_xval": 0.703125, "num_input_tokens_seen": 73720572, "step": 1176 }, { "epoch": 3.9168053244592347, "grad_norm": 12.228435516357422, "learning_rate": 5e-06, "loss": 0.6967, "num_input_tokens_seen": 73782192, "step": 1177 }, { "epoch": 3.9168053244592347, "loss": 0.5940466523170471, "loss_ce": 0.00041872059227898717, "loss_iou": 0.2041015625, "loss_num": 0.037109375, "loss_xval": 0.59375, "num_input_tokens_seen": 73782192, "step": 1177 }, { "epoch": 3.9201331114808653, "grad_norm": 7.769267559051514, "learning_rate": 5e-06, "loss": 0.7102, "num_input_tokens_seen": 73842600, "step": 1178 }, { "epoch": 3.9201331114808653, "loss": 0.5037946701049805, "loss_ce": 0.0020857183262705803, "loss_iou": 0.10888671875, "loss_num": 0.056640625, "loss_xval": 0.5, "num_input_tokens_seen": 73842600, "step": 1178 }, { "epoch": 3.923460898502496, "grad_norm": 9.83575439453125, "learning_rate": 5e-06, "loss": 0.6174, "num_input_tokens_seen": 73902976, "step": 1179 }, { "epoch": 3.923460898502496, "loss": 0.7872860431671143, "loss_ce": 0.0006649384740740061, "loss_iou": 0.23828125, "loss_num": 0.061767578125, "loss_xval": 0.78515625, "num_input_tokens_seen": 73902976, "step": 1179 }, { "epoch": 3.9267886855241265, "grad_norm": 11.017817497253418, "learning_rate": 5e-06, "loss": 0.7529, "num_input_tokens_seen": 73965968, "step": 1180 }, { "epoch": 3.9267886855241265, "loss": 0.5365115404129028, "loss_ce": 0.0003787049208767712, "loss_iou": 0.189453125, "loss_num": 0.031494140625, "loss_xval": 0.53515625, "num_input_tokens_seen": 73965968, "step": 1180 }, { "epoch": 3.930116472545757, "grad_norm": 23.81562614440918, "learning_rate": 5e-06, "loss": 0.5873, "num_input_tokens_seen": 74026536, "step": 1181 }, { "epoch": 3.930116472545757, "loss": 0.49497172236442566, "loss_ce": 0.00028176925843581557, "loss_iou": 0.171875, "loss_num": 0.0303955078125, "loss_xval": 0.494140625, "num_input_tokens_seen": 74026536, "step": 1181 }, { "epoch": 3.9334442595673877, "grad_norm": 11.67773151397705, "learning_rate": 5e-06, "loss": 0.6669, "num_input_tokens_seen": 74088552, "step": 1182 }, { "epoch": 3.9334442595673877, "loss": 0.8013591766357422, "loss_ce": 0.0004711093788500875, "loss_iou": 0.275390625, "loss_num": 0.0498046875, "loss_xval": 0.80078125, "num_input_tokens_seen": 74088552, "step": 1182 }, { "epoch": 3.9367720465890184, "grad_norm": 27.18492317199707, "learning_rate": 5e-06, "loss": 0.8022, "num_input_tokens_seen": 74150352, "step": 1183 }, { "epoch": 3.9367720465890184, "loss": 0.6742436289787292, "loss_ce": 4.931000876240432e-05, "loss_iou": 0.181640625, "loss_num": 0.062255859375, "loss_xval": 0.67578125, "num_input_tokens_seen": 74150352, "step": 1183 }, { "epoch": 3.940099833610649, "grad_norm": 40.752525329589844, "learning_rate": 5e-06, "loss": 0.7512, "num_input_tokens_seen": 74213272, "step": 1184 }, { "epoch": 3.940099833610649, "loss": 0.7507821321487427, "loss_ce": 0.0007821011822670698, "loss_iou": 0.287109375, "loss_num": 0.035888671875, "loss_xval": 0.75, "num_input_tokens_seen": 74213272, "step": 1184 }, { "epoch": 3.9434276206322796, "grad_norm": 38.456844329833984, "learning_rate": 5e-06, "loss": 0.8477, "num_input_tokens_seen": 74275112, "step": 1185 }, { "epoch": 3.9434276206322796, "loss": 0.9110428094863892, "loss_ce": 0.0011306656524538994, "loss_iou": 0.310546875, "loss_num": 0.05810546875, "loss_xval": 0.91015625, "num_input_tokens_seen": 74275112, "step": 1185 }, { "epoch": 3.94675540765391, "grad_norm": 12.272015571594238, "learning_rate": 5e-06, "loss": 0.4226, "num_input_tokens_seen": 74338004, "step": 1186 }, { "epoch": 3.94675540765391, "loss": 0.41022905707359314, "loss_ce": 1.1784154594352003e-05, "loss_iou": 0.140625, "loss_num": 0.0260009765625, "loss_xval": 0.41015625, "num_input_tokens_seen": 74338004, "step": 1186 }, { "epoch": 3.950083194675541, "grad_norm": 9.29517650604248, "learning_rate": 5e-06, "loss": 0.8527, "num_input_tokens_seen": 74400360, "step": 1187 }, { "epoch": 3.950083194675541, "loss": 0.9925890564918518, "loss_ce": 0.00052362319547683, "loss_iou": 0.326171875, "loss_num": 0.06787109375, "loss_xval": 0.9921875, "num_input_tokens_seen": 74400360, "step": 1187 }, { "epoch": 3.9534109816971714, "grad_norm": 12.79312515258789, "learning_rate": 5e-06, "loss": 0.7506, "num_input_tokens_seen": 74461460, "step": 1188 }, { "epoch": 3.9534109816971714, "loss": 0.6777966022491455, "loss_ce": 0.0003063697076868266, "loss_iou": 0.2412109375, "loss_num": 0.0390625, "loss_xval": 0.67578125, "num_input_tokens_seen": 74461460, "step": 1188 }, { "epoch": 3.956738768718802, "grad_norm": 14.106738090515137, "learning_rate": 5e-06, "loss": 0.6894, "num_input_tokens_seen": 74524864, "step": 1189 }, { "epoch": 3.956738768718802, "loss": 0.5103456974029541, "loss_ce": 9.175027662422508e-05, "loss_iou": 0.1845703125, "loss_num": 0.0281982421875, "loss_xval": 0.51171875, "num_input_tokens_seen": 74524864, "step": 1189 }, { "epoch": 3.9600665557404326, "grad_norm": 17.29133415222168, "learning_rate": 5e-06, "loss": 0.5445, "num_input_tokens_seen": 74586092, "step": 1190 }, { "epoch": 3.9600665557404326, "loss": 0.3772073984146118, "loss_ce": 1.0112635209225118e-05, "loss_iou": 0.09765625, "loss_num": 0.036376953125, "loss_xval": 0.376953125, "num_input_tokens_seen": 74586092, "step": 1190 }, { "epoch": 3.9633943427620633, "grad_norm": 9.60549545288086, "learning_rate": 5e-06, "loss": 0.7547, "num_input_tokens_seen": 74648440, "step": 1191 }, { "epoch": 3.9633943427620633, "loss": 1.0421998500823975, "loss_ce": 0.0004516915651038289, "loss_iou": 0.396484375, "loss_num": 0.05029296875, "loss_xval": 1.0390625, "num_input_tokens_seen": 74648440, "step": 1191 }, { "epoch": 3.966722129783694, "grad_norm": 24.30030059814453, "learning_rate": 5e-06, "loss": 0.7511, "num_input_tokens_seen": 74711176, "step": 1192 }, { "epoch": 3.966722129783694, "loss": 0.7426217198371887, "loss_ce": 0.001899072783999145, "loss_iou": 0.212890625, "loss_num": 0.06298828125, "loss_xval": 0.7421875, "num_input_tokens_seen": 74711176, "step": 1192 }, { "epoch": 3.9700499168053245, "grad_norm": 22.564708709716797, "learning_rate": 5e-06, "loss": 0.5895, "num_input_tokens_seen": 74771980, "step": 1193 }, { "epoch": 3.9700499168053245, "loss": 0.6924465894699097, "loss_ce": 0.0010403223568573594, "loss_iou": 0.2216796875, "loss_num": 0.04931640625, "loss_xval": 0.69140625, "num_input_tokens_seen": 74771980, "step": 1193 }, { "epoch": 3.973377703826955, "grad_norm": 34.72041320800781, "learning_rate": 5e-06, "loss": 1.0369, "num_input_tokens_seen": 74835232, "step": 1194 }, { "epoch": 3.973377703826955, "loss": 1.1662983894348145, "loss_ce": 0.00028277342789806426, "loss_iou": 0.380859375, "loss_num": 0.08056640625, "loss_xval": 1.1640625, "num_input_tokens_seen": 74835232, "step": 1194 }, { "epoch": 3.9767054908485857, "grad_norm": 32.24665451049805, "learning_rate": 5e-06, "loss": 0.9714, "num_input_tokens_seen": 74899092, "step": 1195 }, { "epoch": 3.9767054908485857, "loss": 0.8533996343612671, "loss_ce": 0.0018371050246059895, "loss_iou": 0.271484375, "loss_num": 0.061279296875, "loss_xval": 0.8515625, "num_input_tokens_seen": 74899092, "step": 1195 }, { "epoch": 3.9800332778702163, "grad_norm": 13.845719337463379, "learning_rate": 5e-06, "loss": 0.578, "num_input_tokens_seen": 74961620, "step": 1196 }, { "epoch": 3.9800332778702163, "loss": 0.5864394307136536, "loss_ce": 1.3658591342391446e-05, "loss_iou": 0.1787109375, "loss_num": 0.045654296875, "loss_xval": 0.5859375, "num_input_tokens_seen": 74961620, "step": 1196 }, { "epoch": 3.983361064891847, "grad_norm": 19.974306106567383, "learning_rate": 5e-06, "loss": 0.4176, "num_input_tokens_seen": 75024436, "step": 1197 }, { "epoch": 3.983361064891847, "loss": 0.48389583826065063, "loss_ce": 8.541123679606244e-05, "loss_iou": 0.169921875, "loss_num": 0.028564453125, "loss_xval": 0.484375, "num_input_tokens_seen": 75024436, "step": 1197 }, { "epoch": 3.9866888519134775, "grad_norm": 33.58815002441406, "learning_rate": 5e-06, "loss": 0.5782, "num_input_tokens_seen": 75086404, "step": 1198 }, { "epoch": 3.9866888519134775, "loss": 0.5042873620986938, "loss_ce": 1.4926767107681371e-05, "loss_iou": 0.158203125, "loss_num": 0.037353515625, "loss_xval": 0.50390625, "num_input_tokens_seen": 75086404, "step": 1198 }, { "epoch": 3.990016638935108, "grad_norm": 26.256038665771484, "learning_rate": 5e-06, "loss": 0.866, "num_input_tokens_seen": 75147788, "step": 1199 }, { "epoch": 3.990016638935108, "loss": 0.9974926114082336, "loss_ce": 0.0004222542338538915, "loss_iou": 0.357421875, "loss_num": 0.056396484375, "loss_xval": 0.99609375, "num_input_tokens_seen": 75147788, "step": 1199 }, { "epoch": 3.9933444259567388, "grad_norm": 10.55799388885498, "learning_rate": 5e-06, "loss": 0.7979, "num_input_tokens_seen": 75208792, "step": 1200 }, { "epoch": 3.9933444259567388, "loss": 0.9055243134498596, "loss_ce": 6.735265742463525e-06, "loss_iou": 0.32421875, "loss_num": 0.0517578125, "loss_xval": 0.90625, "num_input_tokens_seen": 75208792, "step": 1200 }, { "epoch": 3.9966722129783694, "grad_norm": 10.002418518066406, "learning_rate": 5e-06, "loss": 0.629, "num_input_tokens_seen": 75272764, "step": 1201 }, { "epoch": 3.9966722129783694, "loss": 0.5389520525932312, "loss_ce": 1.165019602922257e-05, "loss_iou": 0.1708984375, "loss_num": 0.03955078125, "loss_xval": 0.5390625, "num_input_tokens_seen": 75272764, "step": 1201 }, { "epoch": 4.0, "grad_norm": 13.451079368591309, "learning_rate": 5e-06, "loss": 0.6302, "num_input_tokens_seen": 75335296, "step": 1202 }, { "epoch": 4.0, "loss": 0.653794527053833, "loss_ce": 0.0002300913183717057, "loss_iou": 0.1826171875, "loss_num": 0.0576171875, "loss_xval": 0.65234375, "num_input_tokens_seen": 75335296, "step": 1202 }, { "epoch": 4.003327787021631, "grad_norm": 10.134664535522461, "learning_rate": 5e-06, "loss": 0.5931, "num_input_tokens_seen": 75396240, "step": 1203 }, { "epoch": 4.003327787021631, "loss": 0.9381798505783081, "loss_ce": 0.00013054934970568866, "loss_iou": 0.357421875, "loss_num": 0.04443359375, "loss_xval": 0.9375, "num_input_tokens_seen": 75396240, "step": 1203 }, { "epoch": 4.006655574043261, "grad_norm": 7.342705726623535, "learning_rate": 5e-06, "loss": 0.486, "num_input_tokens_seen": 75458616, "step": 1204 }, { "epoch": 4.006655574043261, "loss": 0.3220045268535614, "loss_ce": 0.0020582363940775394, "loss_iou": 0.0, "loss_num": 0.06396484375, "loss_xval": 0.3203125, "num_input_tokens_seen": 75458616, "step": 1204 }, { "epoch": 4.009983361064892, "grad_norm": 9.686637878417969, "learning_rate": 5e-06, "loss": 0.6467, "num_input_tokens_seen": 75521736, "step": 1205 }, { "epoch": 4.009983361064892, "loss": 0.6421002149581909, "loss_ce": 1.0430228940094821e-05, "loss_iou": 0.255859375, "loss_num": 0.02587890625, "loss_xval": 0.640625, "num_input_tokens_seen": 75521736, "step": 1205 }, { "epoch": 4.0133111480865225, "grad_norm": 11.917696952819824, "learning_rate": 5e-06, "loss": 0.4783, "num_input_tokens_seen": 75582716, "step": 1206 }, { "epoch": 4.0133111480865225, "loss": 0.5268844366073608, "loss_ce": 2.898617458413355e-05, "loss_iou": 0.173828125, "loss_num": 0.03564453125, "loss_xval": 0.52734375, "num_input_tokens_seen": 75582716, "step": 1206 }, { "epoch": 4.016638935108153, "grad_norm": 32.53382110595703, "learning_rate": 5e-06, "loss": 0.8453, "num_input_tokens_seen": 75647264, "step": 1207 }, { "epoch": 4.016638935108153, "loss": 0.9277580976486206, "loss_ce": 2.3713711925665848e-05, "loss_iou": 0.388671875, "loss_num": 0.0301513671875, "loss_xval": 0.9296875, "num_input_tokens_seen": 75647264, "step": 1207 }, { "epoch": 4.019966722129784, "grad_norm": 19.00457191467285, "learning_rate": 5e-06, "loss": 0.8042, "num_input_tokens_seen": 75712296, "step": 1208 }, { "epoch": 4.019966722129784, "loss": 0.8812234401702881, "loss_ce": 0.001157482503913343, "loss_iou": 0.314453125, "loss_num": 0.050048828125, "loss_xval": 0.87890625, "num_input_tokens_seen": 75712296, "step": 1208 }, { "epoch": 4.023294509151414, "grad_norm": 7.750196933746338, "learning_rate": 5e-06, "loss": 0.4817, "num_input_tokens_seen": 75772604, "step": 1209 }, { "epoch": 4.023294509151414, "loss": 0.47095412015914917, "loss_ce": 6.830508937127888e-06, "loss_iou": 0.169921875, "loss_num": 0.0260009765625, "loss_xval": 0.470703125, "num_input_tokens_seen": 75772604, "step": 1209 }, { "epoch": 4.026622296173045, "grad_norm": 21.401397705078125, "learning_rate": 5e-06, "loss": 0.8369, "num_input_tokens_seen": 75834900, "step": 1210 }, { "epoch": 4.026622296173045, "loss": 0.9536416530609131, "loss_ce": 0.00027255434542894363, "loss_iou": 0.3359375, "loss_num": 0.056396484375, "loss_xval": 0.953125, "num_input_tokens_seen": 75834900, "step": 1210 }, { "epoch": 4.0299500831946755, "grad_norm": 21.841358184814453, "learning_rate": 5e-06, "loss": 0.7628, "num_input_tokens_seen": 75896776, "step": 1211 }, { "epoch": 4.0299500831946755, "loss": 0.8109995126724243, "loss_ce": 0.00045263877836987376, "loss_iou": 0.29296875, "loss_num": 0.04541015625, "loss_xval": 0.8125, "num_input_tokens_seen": 75896776, "step": 1211 }, { "epoch": 4.033277870216306, "grad_norm": 31.634183883666992, "learning_rate": 5e-06, "loss": 0.7408, "num_input_tokens_seen": 75961288, "step": 1212 }, { "epoch": 4.033277870216306, "loss": 0.6983240842819214, "loss_ce": 0.0006922991015017033, "loss_iou": 0.251953125, "loss_num": 0.038330078125, "loss_xval": 0.69921875, "num_input_tokens_seen": 75961288, "step": 1212 }, { "epoch": 4.036605657237937, "grad_norm": 16.962753295898438, "learning_rate": 5e-06, "loss": 0.8123, "num_input_tokens_seen": 76024648, "step": 1213 }, { "epoch": 4.036605657237937, "loss": 0.8816708326339722, "loss_ce": 0.0003231820010114461, "loss_iou": 0.322265625, "loss_num": 0.047119140625, "loss_xval": 0.8828125, "num_input_tokens_seen": 76024648, "step": 1213 }, { "epoch": 4.039933444259567, "grad_norm": 13.604915618896484, "learning_rate": 5e-06, "loss": 0.6718, "num_input_tokens_seen": 76086868, "step": 1214 }, { "epoch": 4.039933444259567, "loss": 0.7207829356193542, "loss_ce": 0.00020186348410788924, "loss_iou": 0.255859375, "loss_num": 0.042236328125, "loss_xval": 0.71875, "num_input_tokens_seen": 76086868, "step": 1214 }, { "epoch": 4.043261231281198, "grad_norm": 11.45804214477539, "learning_rate": 5e-06, "loss": 0.6918, "num_input_tokens_seen": 76148272, "step": 1215 }, { "epoch": 4.043261231281198, "loss": 0.7739920616149902, "loss_ce": 5.272301677905489e-06, "loss_iou": 0.2373046875, "loss_num": 0.059814453125, "loss_xval": 0.7734375, "num_input_tokens_seen": 76148272, "step": 1215 }, { "epoch": 4.046589018302829, "grad_norm": 6.911097526550293, "learning_rate": 5e-06, "loss": 0.7163, "num_input_tokens_seen": 76212304, "step": 1216 }, { "epoch": 4.046589018302829, "loss": 0.7182790040969849, "loss_ce": 1.725198126223404e-05, "loss_iou": 0.263671875, "loss_num": 0.03857421875, "loss_xval": 0.71875, "num_input_tokens_seen": 76212304, "step": 1216 }, { "epoch": 4.049916805324459, "grad_norm": 11.124464988708496, "learning_rate": 5e-06, "loss": 0.7056, "num_input_tokens_seen": 76276568, "step": 1217 }, { "epoch": 4.049916805324459, "loss": 0.5908313989639282, "loss_ce": 1.1106720194220543e-05, "loss_iou": 0.236328125, "loss_num": 0.02392578125, "loss_xval": 0.58984375, "num_input_tokens_seen": 76276568, "step": 1217 }, { "epoch": 4.05324459234609, "grad_norm": 38.99077224731445, "learning_rate": 5e-06, "loss": 0.6922, "num_input_tokens_seen": 76339120, "step": 1218 }, { "epoch": 4.05324459234609, "loss": 0.6051743030548096, "loss_ce": 1.0704436135711148e-05, "loss_iou": 0.2236328125, "loss_num": 0.03125, "loss_xval": 0.60546875, "num_input_tokens_seen": 76339120, "step": 1218 }, { "epoch": 4.05657237936772, "grad_norm": 14.10145092010498, "learning_rate": 5e-06, "loss": 0.7924, "num_input_tokens_seen": 76403376, "step": 1219 }, { "epoch": 4.05657237936772, "loss": 0.9449859261512756, "loss_ce": 3.9671351260039955e-05, "loss_iou": 0.34765625, "loss_num": 0.049560546875, "loss_xval": 0.9453125, "num_input_tokens_seen": 76403376, "step": 1219 }, { "epoch": 4.059900166389351, "grad_norm": 9.600042343139648, "learning_rate": 5e-06, "loss": 0.5602, "num_input_tokens_seen": 76467168, "step": 1220 }, { "epoch": 4.059900166389351, "loss": 0.6643515825271606, "loss_ce": 4.4928823626833037e-05, "loss_iou": 0.2470703125, "loss_num": 0.033935546875, "loss_xval": 0.6640625, "num_input_tokens_seen": 76467168, "step": 1220 }, { "epoch": 4.063227953410982, "grad_norm": 10.294352531433105, "learning_rate": 5e-06, "loss": 0.7367, "num_input_tokens_seen": 76528424, "step": 1221 }, { "epoch": 4.063227953410982, "loss": 0.708865761756897, "loss_ce": 0.0004917742917314172, "loss_iou": 0.1591796875, "loss_num": 0.07763671875, "loss_xval": 0.70703125, "num_input_tokens_seen": 76528424, "step": 1221 }, { "epoch": 4.066555740432612, "grad_norm": 22.319923400878906, "learning_rate": 5e-06, "loss": 0.8316, "num_input_tokens_seen": 76593204, "step": 1222 }, { "epoch": 4.066555740432612, "loss": 0.9223567247390747, "loss_ce": 0.001214136485941708, "loss_iou": 0.34765625, "loss_num": 0.04541015625, "loss_xval": 0.921875, "num_input_tokens_seen": 76593204, "step": 1222 }, { "epoch": 4.069883527454243, "grad_norm": 30.089197158813477, "learning_rate": 5e-06, "loss": 0.8181, "num_input_tokens_seen": 76657548, "step": 1223 }, { "epoch": 4.069883527454243, "loss": 0.7841982841491699, "loss_ce": 1.858196992543526e-05, "loss_iou": 0.294921875, "loss_num": 0.0390625, "loss_xval": 0.78515625, "num_input_tokens_seen": 76657548, "step": 1223 }, { "epoch": 4.0732113144758735, "grad_norm": 33.664066314697266, "learning_rate": 5e-06, "loss": 0.7967, "num_input_tokens_seen": 76719292, "step": 1224 }, { "epoch": 4.0732113144758735, "loss": 0.8570939302444458, "loss_ce": 0.00016029489052016288, "loss_iou": 0.271484375, "loss_num": 0.06298828125, "loss_xval": 0.85546875, "num_input_tokens_seen": 76719292, "step": 1224 }, { "epoch": 4.076539101497504, "grad_norm": 50.2418327331543, "learning_rate": 5e-06, "loss": 0.8276, "num_input_tokens_seen": 76782492, "step": 1225 }, { "epoch": 4.076539101497504, "loss": 0.8675827383995056, "loss_ce": 2.904290522565134e-05, "loss_iou": 0.318359375, "loss_num": 0.04638671875, "loss_xval": 0.8671875, "num_input_tokens_seen": 76782492, "step": 1225 }, { "epoch": 4.079866888519135, "grad_norm": 12.176660537719727, "learning_rate": 5e-06, "loss": 0.6237, "num_input_tokens_seen": 76844564, "step": 1226 }, { "epoch": 4.079866888519135, "loss": 0.7886014580726624, "loss_ce": 0.0005765281384810805, "loss_iou": 0.3125, "loss_num": 0.032958984375, "loss_xval": 0.7890625, "num_input_tokens_seen": 76844564, "step": 1226 }, { "epoch": 4.083194675540765, "grad_norm": 6.557040214538574, "learning_rate": 5e-06, "loss": 0.7401, "num_input_tokens_seen": 76908992, "step": 1227 }, { "epoch": 4.083194675540765, "loss": 0.750075101852417, "loss_ce": 0.00031925359508022666, "loss_iou": 0.2578125, "loss_num": 0.046875, "loss_xval": 0.75, "num_input_tokens_seen": 76908992, "step": 1227 }, { "epoch": 4.086522462562396, "grad_norm": 16.275442123413086, "learning_rate": 5e-06, "loss": 0.762, "num_input_tokens_seen": 76972528, "step": 1228 }, { "epoch": 4.086522462562396, "loss": 0.6492881774902344, "loss_ce": 0.0006065062480047345, "loss_iou": 0.2490234375, "loss_num": 0.0301513671875, "loss_xval": 0.6484375, "num_input_tokens_seen": 76972528, "step": 1228 }, { "epoch": 4.0898502495840265, "grad_norm": 19.650428771972656, "learning_rate": 5e-06, "loss": 0.8475, "num_input_tokens_seen": 77033472, "step": 1229 }, { "epoch": 4.0898502495840265, "loss": 0.8361536264419556, "loss_ce": 2.5276287942688214e-06, "loss_iou": 0.267578125, "loss_num": 0.06005859375, "loss_xval": 0.8359375, "num_input_tokens_seen": 77033472, "step": 1229 }, { "epoch": 4.093178036605657, "grad_norm": 15.119193077087402, "learning_rate": 5e-06, "loss": 0.5222, "num_input_tokens_seen": 77096260, "step": 1230 }, { "epoch": 4.093178036605657, "loss": 0.512615442276001, "loss_ce": 4.222230563755147e-05, "loss_iou": 0.142578125, "loss_num": 0.04541015625, "loss_xval": 0.51171875, "num_input_tokens_seen": 77096260, "step": 1230 }, { "epoch": 4.096505823627288, "grad_norm": 22.070770263671875, "learning_rate": 5e-06, "loss": 0.8678, "num_input_tokens_seen": 77160616, "step": 1231 }, { "epoch": 4.096505823627288, "loss": 0.8838921189308167, "loss_ce": 0.00010302869486622512, "loss_iou": 0.314453125, "loss_num": 0.051513671875, "loss_xval": 0.8828125, "num_input_tokens_seen": 77160616, "step": 1231 }, { "epoch": 4.099833610648918, "grad_norm": 27.72943687438965, "learning_rate": 5e-06, "loss": 0.6912, "num_input_tokens_seen": 77223116, "step": 1232 }, { "epoch": 4.099833610648918, "loss": 0.6356049180030823, "loss_ce": 0.00022896112932357937, "loss_iou": 0.2080078125, "loss_num": 0.0439453125, "loss_xval": 0.63671875, "num_input_tokens_seen": 77223116, "step": 1232 }, { "epoch": 4.103161397670549, "grad_norm": 15.809282302856445, "learning_rate": 5e-06, "loss": 0.8661, "num_input_tokens_seen": 77285364, "step": 1233 }, { "epoch": 4.103161397670549, "loss": 0.9589939117431641, "loss_ce": 9.549862625135574e-06, "loss_iou": 0.3671875, "loss_num": 0.044677734375, "loss_xval": 0.9609375, "num_input_tokens_seen": 77285364, "step": 1233 }, { "epoch": 4.10648918469218, "grad_norm": 14.031881332397461, "learning_rate": 5e-06, "loss": 0.8008, "num_input_tokens_seen": 77348536, "step": 1234 }, { "epoch": 4.10648918469218, "loss": 0.7415446043014526, "loss_ce": 0.0003336329245939851, "loss_iou": 0.2470703125, "loss_num": 0.049560546875, "loss_xval": 0.7421875, "num_input_tokens_seen": 77348536, "step": 1234 }, { "epoch": 4.10981697171381, "grad_norm": 7.842467784881592, "learning_rate": 5e-06, "loss": 0.7402, "num_input_tokens_seen": 77413080, "step": 1235 }, { "epoch": 4.10981697171381, "loss": 0.740845799446106, "loss_ce": 0.00036727398401126266, "loss_iou": 0.267578125, "loss_num": 0.040771484375, "loss_xval": 0.7421875, "num_input_tokens_seen": 77413080, "step": 1235 }, { "epoch": 4.113144758735441, "grad_norm": 9.866108894348145, "learning_rate": 5e-06, "loss": 0.7857, "num_input_tokens_seen": 77474548, "step": 1236 }, { "epoch": 4.113144758735441, "loss": 0.7509984970092773, "loss_ce": 0.0002660911704879254, "loss_iou": 0.2392578125, "loss_num": 0.054443359375, "loss_xval": 0.75, "num_input_tokens_seen": 77474548, "step": 1236 }, { "epoch": 4.116472545757071, "grad_norm": 19.495542526245117, "learning_rate": 5e-06, "loss": 0.7234, "num_input_tokens_seen": 77537000, "step": 1237 }, { "epoch": 4.116472545757071, "loss": 0.7190049886703491, "loss_ce": 1.082837570720585e-05, "loss_iou": 0.298828125, "loss_num": 0.024658203125, "loss_xval": 0.71875, "num_input_tokens_seen": 77537000, "step": 1237 }, { "epoch": 4.119800332778702, "grad_norm": 21.736934661865234, "learning_rate": 5e-06, "loss": 0.6775, "num_input_tokens_seen": 77600484, "step": 1238 }, { "epoch": 4.119800332778702, "loss": 0.70020592212677, "loss_ce": 1.0643694622558542e-05, "loss_iou": 0.271484375, "loss_num": 0.031494140625, "loss_xval": 0.69921875, "num_input_tokens_seen": 77600484, "step": 1238 }, { "epoch": 4.123128119800333, "grad_norm": 22.383136749267578, "learning_rate": 5e-06, "loss": 0.74, "num_input_tokens_seen": 77663276, "step": 1239 }, { "epoch": 4.123128119800333, "loss": 0.9630830883979797, "loss_ce": 0.0010469970293343067, "loss_iou": 0.33984375, "loss_num": 0.056396484375, "loss_xval": 0.9609375, "num_input_tokens_seen": 77663276, "step": 1239 }, { "epoch": 4.126455906821963, "grad_norm": 32.640625, "learning_rate": 5e-06, "loss": 0.565, "num_input_tokens_seen": 77725788, "step": 1240 }, { "epoch": 4.126455906821963, "loss": 0.5865948796272278, "loss_ce": 0.0010236025555059314, "loss_iou": 0.2109375, "loss_num": 0.03271484375, "loss_xval": 0.5859375, "num_input_tokens_seen": 77725788, "step": 1240 }, { "epoch": 4.129783693843594, "grad_norm": 31.970279693603516, "learning_rate": 5e-06, "loss": 0.7779, "num_input_tokens_seen": 77788220, "step": 1241 }, { "epoch": 4.129783693843594, "loss": 0.7889679670333862, "loss_ce": 0.00014954953803680837, "loss_iou": 0.27734375, "loss_num": 0.046630859375, "loss_xval": 0.7890625, "num_input_tokens_seen": 77788220, "step": 1241 }, { "epoch": 4.1331114808652245, "grad_norm": 32.130428314208984, "learning_rate": 5e-06, "loss": 0.6959, "num_input_tokens_seen": 77852120, "step": 1242 }, { "epoch": 4.1331114808652245, "loss": 0.6154385805130005, "loss_ce": 0.0006924752378836274, "loss_iou": 0.2138671875, "loss_num": 0.037353515625, "loss_xval": 0.61328125, "num_input_tokens_seen": 77852120, "step": 1242 }, { "epoch": 4.136439267886855, "grad_norm": 30.604116439819336, "learning_rate": 5e-06, "loss": 0.8563, "num_input_tokens_seen": 77916316, "step": 1243 }, { "epoch": 4.136439267886855, "loss": 0.9377624988555908, "loss_ce": 1.8331618775846437e-05, "loss_iou": 0.318359375, "loss_num": 0.06005859375, "loss_xval": 0.9375, "num_input_tokens_seen": 77916316, "step": 1243 }, { "epoch": 4.139767054908486, "grad_norm": 16.981704711914062, "learning_rate": 5e-06, "loss": 0.4769, "num_input_tokens_seen": 77979376, "step": 1244 }, { "epoch": 4.139767054908486, "loss": 0.4578045606613159, "loss_ce": 1.040008464769926e-05, "loss_iou": 0.1484375, "loss_num": 0.0322265625, "loss_xval": 0.45703125, "num_input_tokens_seen": 77979376, "step": 1244 }, { "epoch": 4.143094841930116, "grad_norm": 8.534014701843262, "learning_rate": 5e-06, "loss": 0.4721, "num_input_tokens_seen": 78039860, "step": 1245 }, { "epoch": 4.143094841930116, "loss": 0.5085457563400269, "loss_ce": 6.183484947541729e-05, "loss_iou": 0.1396484375, "loss_num": 0.0458984375, "loss_xval": 0.5078125, "num_input_tokens_seen": 78039860, "step": 1245 }, { "epoch": 4.146422628951747, "grad_norm": 18.475008010864258, "learning_rate": 5e-06, "loss": 0.502, "num_input_tokens_seen": 78101256, "step": 1246 }, { "epoch": 4.146422628951747, "loss": 0.5057373642921448, "loss_ce": 0.00036624903441406786, "loss_iou": 0.1591796875, "loss_num": 0.037353515625, "loss_xval": 0.50390625, "num_input_tokens_seen": 78101256, "step": 1246 }, { "epoch": 4.149750415973378, "grad_norm": 14.759221076965332, "learning_rate": 5e-06, "loss": 0.5889, "num_input_tokens_seen": 78163720, "step": 1247 }, { "epoch": 4.149750415973378, "loss": 0.7398657202720642, "loss_ce": 5.860287637915462e-05, "loss_iou": 0.259765625, "loss_num": 0.0439453125, "loss_xval": 0.73828125, "num_input_tokens_seen": 78163720, "step": 1247 }, { "epoch": 4.153078202995008, "grad_norm": 8.236388206481934, "learning_rate": 5e-06, "loss": 0.8215, "num_input_tokens_seen": 78226252, "step": 1248 }, { "epoch": 4.153078202995008, "loss": 0.8308947086334229, "loss_ce": 0.0005724473739974201, "loss_iou": 0.24609375, "loss_num": 0.06787109375, "loss_xval": 0.83203125, "num_input_tokens_seen": 78226252, "step": 1248 }, { "epoch": 4.156405990016639, "grad_norm": 21.270627975463867, "learning_rate": 5e-06, "loss": 0.8277, "num_input_tokens_seen": 78289956, "step": 1249 }, { "epoch": 4.156405990016639, "loss": 0.8488887548446655, "loss_ce": 1.1790533790190239e-05, "loss_iou": 0.306640625, "loss_num": 0.047119140625, "loss_xval": 0.84765625, "num_input_tokens_seen": 78289956, "step": 1249 }, { "epoch": 4.159733777038269, "grad_norm": 46.86408996582031, "learning_rate": 5e-06, "loss": 1.0004, "num_input_tokens_seen": 78352800, "step": 1250 }, { "epoch": 4.159733777038269, "eval_seeclick_CIoU": 0.0411010067909956, "eval_seeclick_GIoU": 0.06610255688428879, "eval_seeclick_IoU": 0.15579545497894287, "eval_seeclick_MAE_all": 0.17167094349861145, "eval_seeclick_MAE_h": 0.06063028983771801, "eval_seeclick_MAE_w": 0.13175999373197556, "eval_seeclick_MAE_x_boxes": 0.21394891291856766, "eval_seeclick_MAE_y_boxes": 0.17476319521665573, "eval_seeclick_NUM_probability": 0.9997701644897461, "eval_seeclick_inside_bbox": 0.22500000149011612, "eval_seeclick_loss": 2.8505024909973145, "eval_seeclick_loss_ce": 0.11920517683029175, "eval_seeclick_loss_iou": 0.935546875, "eval_seeclick_loss_num": 0.16717529296875, "eval_seeclick_loss_xval": 2.70849609375, "eval_seeclick_runtime": 63.7626, "eval_seeclick_samples_per_second": 0.737, "eval_seeclick_steps_per_second": 0.031, "num_input_tokens_seen": 78352800, "step": 1250 }, { "epoch": 4.159733777038269, "eval_icons_CIoU": -0.05261573940515518, "eval_icons_GIoU": 0.034715624526143074, "eval_icons_IoU": 0.11978563293814659, "eval_icons_MAE_all": 0.16910801827907562, "eval_icons_MAE_h": 0.15430551767349243, "eval_icons_MAE_w": 0.17065216600894928, "eval_icons_MAE_x_boxes": 0.110549446195364, "eval_icons_MAE_y_boxes": 0.08487226068973541, "eval_icons_NUM_probability": 0.999939352273941, "eval_icons_inside_bbox": 0.2083333358168602, "eval_icons_loss": 2.7312588691711426, "eval_icons_loss_ce": 9.676391528046224e-06, "eval_icons_loss_iou": 0.97314453125, "eval_icons_loss_num": 0.1680011749267578, "eval_icons_loss_xval": 2.7841796875, "eval_icons_runtime": 65.1712, "eval_icons_samples_per_second": 0.767, "eval_icons_steps_per_second": 0.031, "num_input_tokens_seen": 78352800, "step": 1250 }, { "epoch": 4.159733777038269, "eval_screenspot_CIoU": 0.11181284114718437, "eval_screenspot_GIoU": 0.1334042822321256, "eval_screenspot_IoU": 0.23128188649813333, "eval_screenspot_MAE_all": 0.1488722562789917, "eval_screenspot_MAE_h": 0.08053762838244438, "eval_screenspot_MAE_w": 0.14393815149863562, "eval_screenspot_MAE_x_boxes": 0.18041572471459708, "eval_screenspot_MAE_y_boxes": 0.1104625016450882, "eval_screenspot_NUM_probability": 0.9999677936236063, "eval_screenspot_inside_bbox": 0.4662500023841858, "eval_screenspot_loss": 2.5235776901245117, "eval_screenspot_loss_ce": 5.2315189047173284e-05, "eval_screenspot_loss_iou": 0.8854166666666666, "eval_screenspot_loss_num": 0.16054789225260416, "eval_screenspot_loss_xval": 2.5745442708333335, "eval_screenspot_runtime": 112.3936, "eval_screenspot_samples_per_second": 0.792, "eval_screenspot_steps_per_second": 0.027, "num_input_tokens_seen": 78352800, "step": 1250 }, { "epoch": 4.159733777038269, "eval_compot_CIoU": -0.008852380327880383, "eval_compot_GIoU": 0.058575043454766273, "eval_compot_IoU": 0.14424490183591843, "eval_compot_MAE_all": 0.19978154450654984, "eval_compot_MAE_h": 0.13088013604283333, "eval_compot_MAE_w": 0.21783354133367538, "eval_compot_MAE_x_boxes": 0.15127842873334885, "eval_compot_MAE_y_boxes": 0.12745947018265724, "eval_compot_NUM_probability": 0.9999769330024719, "eval_compot_inside_bbox": 0.2673611119389534, "eval_compot_loss": 2.8673794269561768, "eval_compot_loss_ce": 0.002981940167956054, "eval_compot_loss_iou": 0.941162109375, "eval_compot_loss_num": 0.20378875732421875, "eval_compot_loss_xval": 2.8994140625, "eval_compot_runtime": 64.5423, "eval_compot_samples_per_second": 0.775, "eval_compot_steps_per_second": 0.031, "num_input_tokens_seen": 78352800, "step": 1250 }, { "epoch": 4.159733777038269, "eval_custom_ui_MAE_all": 0.07659339159727097, "eval_custom_ui_MAE_x": 0.08288927748799324, "eval_custom_ui_MAE_y": 0.0702974982559681, "eval_custom_ui_NUM_probability": 0.9999881684780121, "eval_custom_ui_loss": 0.3609429895877838, "eval_custom_ui_loss_ce": 4.916884336125804e-06, "eval_custom_ui_loss_num": 0.0721893310546875, "eval_custom_ui_loss_xval": 0.361328125, "eval_custom_ui_runtime": 55.767, "eval_custom_ui_samples_per_second": 0.897, "eval_custom_ui_steps_per_second": 0.036, "num_input_tokens_seen": 78352800, "step": 1250 }, { "epoch": 4.159733777038269, "loss": 0.3712204396724701, "loss_ce": 4.623752829502337e-06, "loss_iou": 0.0, "loss_num": 0.07421875, "loss_xval": 0.37109375, "num_input_tokens_seen": 78352800, "step": 1250 }, { "epoch": 4.1630615640599, "grad_norm": 27.439401626586914, "learning_rate": 5e-06, "loss": 0.7756, "num_input_tokens_seen": 78415540, "step": 1251 }, { "epoch": 4.1630615640599, "loss": 0.5637520551681519, "loss_ce": 0.0008248202502727509, "loss_iou": 0.205078125, "loss_num": 0.0302734375, "loss_xval": 0.5625, "num_input_tokens_seen": 78415540, "step": 1251 }, { "epoch": 4.166389351081531, "grad_norm": 29.857797622680664, "learning_rate": 5e-06, "loss": 1.0257, "num_input_tokens_seen": 78479392, "step": 1252 }, { "epoch": 4.166389351081531, "loss": 1.061448097229004, "loss_ce": 0.000901204242836684, "loss_iou": 0.423828125, "loss_num": 0.043212890625, "loss_xval": 1.0625, "num_input_tokens_seen": 78479392, "step": 1252 }, { "epoch": 4.169717138103161, "grad_norm": 25.49187469482422, "learning_rate": 5e-06, "loss": 0.7688, "num_input_tokens_seen": 78543512, "step": 1253 }, { "epoch": 4.169717138103161, "loss": 0.699540913105011, "loss_ce": 7.803901098668575e-05, "loss_iou": 0.296875, "loss_num": 0.0213623046875, "loss_xval": 0.69921875, "num_input_tokens_seen": 78543512, "step": 1253 }, { "epoch": 4.173044925124792, "grad_norm": 8.424339294433594, "learning_rate": 5e-06, "loss": 0.6042, "num_input_tokens_seen": 78605204, "step": 1254 }, { "epoch": 4.173044925124792, "loss": 0.4900583028793335, "loss_ce": 0.00012909977522213012, "loss_iou": 0.083984375, "loss_num": 0.064453125, "loss_xval": 0.490234375, "num_input_tokens_seen": 78605204, "step": 1254 }, { "epoch": 4.1763727121464225, "grad_norm": 26.64162254333496, "learning_rate": 5e-06, "loss": 0.8675, "num_input_tokens_seen": 78670004, "step": 1255 }, { "epoch": 4.1763727121464225, "loss": 0.9319003820419312, "loss_ce": 0.002212880179286003, "loss_iou": 0.359375, "loss_num": 0.0419921875, "loss_xval": 0.9296875, "num_input_tokens_seen": 78670004, "step": 1255 }, { "epoch": 4.179700499168053, "grad_norm": 29.97724723815918, "learning_rate": 5e-06, "loss": 0.4917, "num_input_tokens_seen": 78732164, "step": 1256 }, { "epoch": 4.179700499168053, "loss": 0.5948033332824707, "loss_ce": 7.679940608795732e-05, "loss_iou": 0.2255859375, "loss_num": 0.0286865234375, "loss_xval": 0.59375, "num_input_tokens_seen": 78732164, "step": 1256 }, { "epoch": 4.183028286189684, "grad_norm": 24.311927795410156, "learning_rate": 5e-06, "loss": 0.612, "num_input_tokens_seen": 78793820, "step": 1257 }, { "epoch": 4.183028286189684, "loss": 0.8346253633499146, "loss_ce": 3.0578277801396325e-05, "loss_iou": 0.296875, "loss_num": 0.048095703125, "loss_xval": 0.8359375, "num_input_tokens_seen": 78793820, "step": 1257 }, { "epoch": 4.186356073211314, "grad_norm": 18.071741104125977, "learning_rate": 5e-06, "loss": 0.687, "num_input_tokens_seen": 78857168, "step": 1258 }, { "epoch": 4.186356073211314, "loss": 0.7082351446151733, "loss_ce": 0.0009597208700142801, "loss_iou": 0.25390625, "loss_num": 0.0400390625, "loss_xval": 0.70703125, "num_input_tokens_seen": 78857168, "step": 1258 }, { "epoch": 4.189683860232945, "grad_norm": 19.613140106201172, "learning_rate": 5e-06, "loss": 0.6764, "num_input_tokens_seen": 78920452, "step": 1259 }, { "epoch": 4.189683860232945, "loss": 0.7232838273048401, "loss_ce": 0.0027027707546949387, "loss_iou": 0.25390625, "loss_num": 0.042236328125, "loss_xval": 0.71875, "num_input_tokens_seen": 78920452, "step": 1259 }, { "epoch": 4.1930116472545755, "grad_norm": 11.482674598693848, "learning_rate": 5e-06, "loss": 0.7203, "num_input_tokens_seen": 78982192, "step": 1260 }, { "epoch": 4.1930116472545755, "loss": 0.6436780095100403, "loss_ce": 0.00024541548918932676, "loss_iou": 0.1796875, "loss_num": 0.056884765625, "loss_xval": 0.64453125, "num_input_tokens_seen": 78982192, "step": 1260 }, { "epoch": 4.196339434276206, "grad_norm": 13.14754867553711, "learning_rate": 5e-06, "loss": 0.6061, "num_input_tokens_seen": 79043060, "step": 1261 }, { "epoch": 4.196339434276206, "loss": 0.6200226545333862, "loss_ce": 0.000271646014880389, "loss_iou": 0.2080078125, "loss_num": 0.041015625, "loss_xval": 0.62109375, "num_input_tokens_seen": 79043060, "step": 1261 }, { "epoch": 4.199667221297837, "grad_norm": 21.364551544189453, "learning_rate": 5e-06, "loss": 0.7445, "num_input_tokens_seen": 79106228, "step": 1262 }, { "epoch": 4.199667221297837, "loss": 0.5737521052360535, "loss_ce": 2.1653402654919773e-05, "loss_iou": 0.21484375, "loss_num": 0.029052734375, "loss_xval": 0.57421875, "num_input_tokens_seen": 79106228, "step": 1262 }, { "epoch": 4.202995008319467, "grad_norm": 8.903093338012695, "learning_rate": 5e-06, "loss": 0.417, "num_input_tokens_seen": 79164768, "step": 1263 }, { "epoch": 4.202995008319467, "loss": 0.3991989195346832, "loss_ce": 2.8972301151952706e-05, "loss_iou": 0.0888671875, "loss_num": 0.04443359375, "loss_xval": 0.3984375, "num_input_tokens_seen": 79164768, "step": 1263 }, { "epoch": 4.206322795341098, "grad_norm": 11.570475578308105, "learning_rate": 5e-06, "loss": 0.6149, "num_input_tokens_seen": 79227000, "step": 1264 }, { "epoch": 4.206322795341098, "loss": 0.568337619304657, "loss_ce": 0.00022238263045437634, "loss_iou": 0.2099609375, "loss_num": 0.0299072265625, "loss_xval": 0.56640625, "num_input_tokens_seen": 79227000, "step": 1264 }, { "epoch": 4.209650582362729, "grad_norm": 12.106314659118652, "learning_rate": 5e-06, "loss": 0.5246, "num_input_tokens_seen": 79288948, "step": 1265 }, { "epoch": 4.209650582362729, "loss": 0.587887704372406, "loss_ce": 0.00030225006048567593, "loss_iou": 0.185546875, "loss_num": 0.04345703125, "loss_xval": 0.5859375, "num_input_tokens_seen": 79288948, "step": 1265 }, { "epoch": 4.212978369384359, "grad_norm": 12.739585876464844, "learning_rate": 5e-06, "loss": 0.6362, "num_input_tokens_seen": 79351388, "step": 1266 }, { "epoch": 4.212978369384359, "loss": 0.40339529514312744, "loss_ce": 0.0008074131910689175, "loss_iou": 0.14453125, "loss_num": 0.022705078125, "loss_xval": 0.40234375, "num_input_tokens_seen": 79351388, "step": 1266 }, { "epoch": 4.21630615640599, "grad_norm": 43.02858352661133, "learning_rate": 5e-06, "loss": 0.7389, "num_input_tokens_seen": 79413828, "step": 1267 }, { "epoch": 4.21630615640599, "loss": 0.7740235328674316, "loss_ce": 0.00015877322584856302, "loss_iou": 0.2578125, "loss_num": 0.05126953125, "loss_xval": 0.7734375, "num_input_tokens_seen": 79413828, "step": 1267 }, { "epoch": 4.21963394342762, "grad_norm": 16.455984115600586, "learning_rate": 5e-06, "loss": 0.505, "num_input_tokens_seen": 79476808, "step": 1268 }, { "epoch": 4.21963394342762, "loss": 0.48288407921791077, "loss_ce": 0.00030960733420215547, "loss_iou": 0.134765625, "loss_num": 0.042724609375, "loss_xval": 0.482421875, "num_input_tokens_seen": 79476808, "step": 1268 }, { "epoch": 4.222961730449251, "grad_norm": 22.450849533081055, "learning_rate": 5e-06, "loss": 1.1186, "num_input_tokens_seen": 79540864, "step": 1269 }, { "epoch": 4.222961730449251, "loss": 1.1303770542144775, "loss_ce": 0.002691594883799553, "loss_iou": 0.412109375, "loss_num": 0.060302734375, "loss_xval": 1.125, "num_input_tokens_seen": 79540864, "step": 1269 }, { "epoch": 4.226289517470882, "grad_norm": 12.23768424987793, "learning_rate": 5e-06, "loss": 0.8718, "num_input_tokens_seen": 79603976, "step": 1270 }, { "epoch": 4.226289517470882, "loss": 0.9246631860733032, "loss_ce": 0.0005909305764362216, "loss_iou": 0.31640625, "loss_num": 0.05859375, "loss_xval": 0.92578125, "num_input_tokens_seen": 79603976, "step": 1270 }, { "epoch": 4.229617304492512, "grad_norm": 7.6892242431640625, "learning_rate": 5e-06, "loss": 0.6565, "num_input_tokens_seen": 79665516, "step": 1271 }, { "epoch": 4.229617304492512, "loss": 0.8138464689254761, "loss_ce": 0.000980203622020781, "loss_iou": 0.2890625, "loss_num": 0.047119140625, "loss_xval": 0.8125, "num_input_tokens_seen": 79665516, "step": 1271 }, { "epoch": 4.232945091514143, "grad_norm": 14.234360694885254, "learning_rate": 5e-06, "loss": 0.6913, "num_input_tokens_seen": 79728776, "step": 1272 }, { "epoch": 4.232945091514143, "loss": 0.4744793474674225, "loss_ce": 0.0003582719073165208, "loss_iou": 0.1806640625, "loss_num": 0.0225830078125, "loss_xval": 0.474609375, "num_input_tokens_seen": 79728776, "step": 1272 }, { "epoch": 4.2362728785357735, "grad_norm": 10.238362312316895, "learning_rate": 5e-06, "loss": 0.5814, "num_input_tokens_seen": 79791552, "step": 1273 }, { "epoch": 4.2362728785357735, "loss": 0.834240198135376, "loss_ce": 1.1714266292983666e-05, "loss_iou": 0.30078125, "loss_num": 0.046142578125, "loss_xval": 0.8359375, "num_input_tokens_seen": 79791552, "step": 1273 }, { "epoch": 4.239600665557404, "grad_norm": 20.62790870666504, "learning_rate": 5e-06, "loss": 0.3782, "num_input_tokens_seen": 79854184, "step": 1274 }, { "epoch": 4.239600665557404, "loss": 0.2787053883075714, "loss_ce": 1.885721940197982e-05, "loss_iou": 0.05859375, "loss_num": 0.0322265625, "loss_xval": 0.279296875, "num_input_tokens_seen": 79854184, "step": 1274 }, { "epoch": 4.242928452579035, "grad_norm": 12.329323768615723, "learning_rate": 5e-06, "loss": 0.9912, "num_input_tokens_seen": 79917468, "step": 1275 }, { "epoch": 4.242928452579035, "loss": 0.8702594041824341, "loss_ce": 0.0006304816342890263, "loss_iou": 0.306640625, "loss_num": 0.05126953125, "loss_xval": 0.87109375, "num_input_tokens_seen": 79917468, "step": 1275 }, { "epoch": 4.246256239600665, "grad_norm": 15.735509872436523, "learning_rate": 5e-06, "loss": 0.6479, "num_input_tokens_seen": 79979128, "step": 1276 }, { "epoch": 4.246256239600665, "loss": 0.6392305493354797, "loss_ce": 9.386229066876695e-06, "loss_iou": 0.1552734375, "loss_num": 0.0654296875, "loss_xval": 0.640625, "num_input_tokens_seen": 79979128, "step": 1276 }, { "epoch": 4.249584026622296, "grad_norm": 14.464531898498535, "learning_rate": 5e-06, "loss": 0.9374, "num_input_tokens_seen": 80043868, "step": 1277 }, { "epoch": 4.249584026622296, "loss": 1.1296863555908203, "loss_ce": 0.0007800288731232285, "loss_iou": 0.42578125, "loss_num": 0.055908203125, "loss_xval": 1.125, "num_input_tokens_seen": 80043868, "step": 1277 }, { "epoch": 4.252911813643927, "grad_norm": 19.477603912353516, "learning_rate": 5e-06, "loss": 0.6839, "num_input_tokens_seen": 80105736, "step": 1278 }, { "epoch": 4.252911813643927, "loss": 0.5724726915359497, "loss_ce": 0.00014606832701247185, "loss_iou": 0.205078125, "loss_num": 0.03271484375, "loss_xval": 0.57421875, "num_input_tokens_seen": 80105736, "step": 1278 }, { "epoch": 4.256239600665557, "grad_norm": 21.04270362854004, "learning_rate": 5e-06, "loss": 0.7735, "num_input_tokens_seen": 80167208, "step": 1279 }, { "epoch": 4.256239600665557, "loss": 0.8213939070701599, "loss_ce": 0.00022691901540383697, "loss_iou": 0.29296875, "loss_num": 0.047119140625, "loss_xval": 0.8203125, "num_input_tokens_seen": 80167208, "step": 1279 }, { "epoch": 4.259567387687188, "grad_norm": 9.50141716003418, "learning_rate": 5e-06, "loss": 0.832, "num_input_tokens_seen": 80230704, "step": 1280 }, { "epoch": 4.259567387687188, "loss": 0.8658267855644226, "loss_ce": 0.00010415755969006568, "loss_iou": 0.3515625, "loss_num": 0.032958984375, "loss_xval": 0.8671875, "num_input_tokens_seen": 80230704, "step": 1280 }, { "epoch": 4.262895174708818, "grad_norm": 18.833486557006836, "learning_rate": 5e-06, "loss": 0.5111, "num_input_tokens_seen": 80294256, "step": 1281 }, { "epoch": 4.262895174708818, "loss": 0.4673139452934265, "loss_ce": 0.00015087361680343747, "loss_iou": 0.1943359375, "loss_num": 0.0159912109375, "loss_xval": 0.466796875, "num_input_tokens_seen": 80294256, "step": 1281 }, { "epoch": 4.266222961730449, "grad_norm": 22.414358139038086, "learning_rate": 5e-06, "loss": 0.5584, "num_input_tokens_seen": 80355548, "step": 1282 }, { "epoch": 4.266222961730449, "loss": 0.5364385843276978, "loss_ce": 0.00030578914447687566, "loss_iou": 0.134765625, "loss_num": 0.053466796875, "loss_xval": 0.53515625, "num_input_tokens_seen": 80355548, "step": 1282 }, { "epoch": 4.26955074875208, "grad_norm": 12.399456024169922, "learning_rate": 5e-06, "loss": 0.7232, "num_input_tokens_seen": 80419040, "step": 1283 }, { "epoch": 4.26955074875208, "loss": 0.6155070066452026, "loss_ce": 2.848647636710666e-05, "loss_iou": 0.23046875, "loss_num": 0.03076171875, "loss_xval": 0.6171875, "num_input_tokens_seen": 80419040, "step": 1283 }, { "epoch": 4.27287853577371, "grad_norm": 15.996452331542969, "learning_rate": 5e-06, "loss": 0.5117, "num_input_tokens_seen": 80482680, "step": 1284 }, { "epoch": 4.27287853577371, "loss": 0.5829875469207764, "loss_ce": 0.0008342466317117214, "loss_iou": 0.2314453125, "loss_num": 0.02392578125, "loss_xval": 0.58203125, "num_input_tokens_seen": 80482680, "step": 1284 }, { "epoch": 4.276206322795341, "grad_norm": 21.942983627319336, "learning_rate": 5e-06, "loss": 0.9671, "num_input_tokens_seen": 80546192, "step": 1285 }, { "epoch": 4.276206322795341, "loss": 1.1663360595703125, "loss_ce": 0.002273513237014413, "loss_iou": 0.376953125, "loss_num": 0.08203125, "loss_xval": 1.1640625, "num_input_tokens_seen": 80546192, "step": 1285 }, { "epoch": 4.2795341098169715, "grad_norm": 17.45485496520996, "learning_rate": 5e-06, "loss": 0.7379, "num_input_tokens_seen": 80610548, "step": 1286 }, { "epoch": 4.2795341098169715, "loss": 0.6753256320953369, "loss_ce": 3.262608151999302e-05, "loss_iou": 0.232421875, "loss_num": 0.0419921875, "loss_xval": 0.67578125, "num_input_tokens_seen": 80610548, "step": 1286 }, { "epoch": 4.282861896838602, "grad_norm": 9.229435920715332, "learning_rate": 5e-06, "loss": 0.4411, "num_input_tokens_seen": 80671072, "step": 1287 }, { "epoch": 4.282861896838602, "loss": 0.5477426052093506, "loss_ce": 1.3110267900628969e-05, "loss_iou": 0.212890625, "loss_num": 0.0242919921875, "loss_xval": 0.546875, "num_input_tokens_seen": 80671072, "step": 1287 }, { "epoch": 4.286189683860233, "grad_norm": 35.23735809326172, "learning_rate": 5e-06, "loss": 0.7478, "num_input_tokens_seen": 80734592, "step": 1288 }, { "epoch": 4.286189683860233, "loss": 0.9059512615203857, "loss_ce": 0.0006778583629056811, "loss_iou": 0.326171875, "loss_num": 0.050537109375, "loss_xval": 0.90625, "num_input_tokens_seen": 80734592, "step": 1288 }, { "epoch": 4.289517470881863, "grad_norm": 42.050071716308594, "learning_rate": 5e-06, "loss": 0.7224, "num_input_tokens_seen": 80797996, "step": 1289 }, { "epoch": 4.289517470881863, "loss": 0.6101228594779968, "loss_ce": 1.542076097393874e-05, "loss_iou": 0.208984375, "loss_num": 0.03857421875, "loss_xval": 0.609375, "num_input_tokens_seen": 80797996, "step": 1289 }, { "epoch": 4.292845257903494, "grad_norm": 34.539249420166016, "learning_rate": 5e-06, "loss": 0.7275, "num_input_tokens_seen": 80861096, "step": 1290 }, { "epoch": 4.292845257903494, "loss": 0.6886411309242249, "loss_ce": 0.00016455614240840077, "loss_iou": 0.234375, "loss_num": 0.0439453125, "loss_xval": 0.6875, "num_input_tokens_seen": 80861096, "step": 1290 }, { "epoch": 4.2961730449251245, "grad_norm": 23.271974563598633, "learning_rate": 5e-06, "loss": 0.4677, "num_input_tokens_seen": 80923964, "step": 1291 }, { "epoch": 4.2961730449251245, "loss": 0.4971497058868408, "loss_ce": 0.0006897358107380569, "loss_iou": 0.1875, "loss_num": 0.0244140625, "loss_xval": 0.49609375, "num_input_tokens_seen": 80923964, "step": 1291 }, { "epoch": 4.299500831946755, "grad_norm": 12.256768226623535, "learning_rate": 5e-06, "loss": 0.748, "num_input_tokens_seen": 80987240, "step": 1292 }, { "epoch": 4.299500831946755, "loss": 0.7120282649993896, "loss_ce": 0.0003583685902412981, "loss_iou": 0.25, "loss_num": 0.04248046875, "loss_xval": 0.7109375, "num_input_tokens_seen": 80987240, "step": 1292 }, { "epoch": 4.302828618968386, "grad_norm": 28.615036010742188, "learning_rate": 5e-06, "loss": 0.6679, "num_input_tokens_seen": 81049532, "step": 1293 }, { "epoch": 4.302828618968386, "loss": 0.6948373317718506, "loss_ce": 1.3097147530061193e-05, "loss_iou": 0.275390625, "loss_num": 0.028564453125, "loss_xval": 0.6953125, "num_input_tokens_seen": 81049532, "step": 1293 }, { "epoch": 4.306156405990016, "grad_norm": 42.26750564575195, "learning_rate": 5e-06, "loss": 0.8327, "num_input_tokens_seen": 81112472, "step": 1294 }, { "epoch": 4.306156405990016, "loss": 0.6265087127685547, "loss_ce": 0.00028800699510611594, "loss_iou": 0.2353515625, "loss_num": 0.031005859375, "loss_xval": 0.625, "num_input_tokens_seen": 81112472, "step": 1294 }, { "epoch": 4.309484193011647, "grad_norm": 23.361072540283203, "learning_rate": 5e-06, "loss": 0.5348, "num_input_tokens_seen": 81173948, "step": 1295 }, { "epoch": 4.309484193011647, "loss": 0.6413629055023193, "loss_ce": 5.4855458984093275e-06, "loss_iou": 0.2353515625, "loss_num": 0.034423828125, "loss_xval": 0.640625, "num_input_tokens_seen": 81173948, "step": 1295 }, { "epoch": 4.312811980033278, "grad_norm": 17.196260452270508, "learning_rate": 5e-06, "loss": 0.786, "num_input_tokens_seen": 81237616, "step": 1296 }, { "epoch": 4.312811980033278, "loss": 0.9394415616989136, "loss_ce": 4.945868568029255e-05, "loss_iou": 0.26171875, "loss_num": 0.0830078125, "loss_xval": 0.9375, "num_input_tokens_seen": 81237616, "step": 1296 }, { "epoch": 4.316139767054908, "grad_norm": 8.687355041503906, "learning_rate": 5e-06, "loss": 0.7235, "num_input_tokens_seen": 81300704, "step": 1297 }, { "epoch": 4.316139767054908, "loss": 0.7191054821014404, "loss_ce": 0.0007827409426681697, "loss_iou": 0.1884765625, "loss_num": 0.068359375, "loss_xval": 0.71875, "num_input_tokens_seen": 81300704, "step": 1297 }, { "epoch": 4.319467554076539, "grad_norm": 8.467069625854492, "learning_rate": 5e-06, "loss": 0.6741, "num_input_tokens_seen": 81363596, "step": 1298 }, { "epoch": 4.319467554076539, "loss": 0.5166745185852051, "loss_ce": 1.1927927516808268e-05, "loss_iou": 0.158203125, "loss_num": 0.0400390625, "loss_xval": 0.515625, "num_input_tokens_seen": 81363596, "step": 1298 }, { "epoch": 4.322795341098169, "grad_norm": 12.247100830078125, "learning_rate": 5e-06, "loss": 0.6453, "num_input_tokens_seen": 81426572, "step": 1299 }, { "epoch": 4.322795341098169, "loss": 0.6940947771072388, "loss_ce": 0.00012503366451710463, "loss_iou": 0.244140625, "loss_num": 0.040771484375, "loss_xval": 0.6953125, "num_input_tokens_seen": 81426572, "step": 1299 }, { "epoch": 4.3261231281198, "grad_norm": 14.04256820678711, "learning_rate": 5e-06, "loss": 0.4527, "num_input_tokens_seen": 81489276, "step": 1300 }, { "epoch": 4.3261231281198, "loss": 0.39016443490982056, "loss_ce": 2.7704523745342158e-05, "loss_iou": 0.1474609375, "loss_num": 0.0189208984375, "loss_xval": 0.390625, "num_input_tokens_seen": 81489276, "step": 1300 }, { "epoch": 4.329450915141431, "grad_norm": 9.792634963989258, "learning_rate": 5e-06, "loss": 0.6542, "num_input_tokens_seen": 81551796, "step": 1301 }, { "epoch": 4.329450915141431, "loss": 0.5883185267448425, "loss_ce": 0.002686193445697427, "loss_iou": 0.1455078125, "loss_num": 0.05908203125, "loss_xval": 0.5859375, "num_input_tokens_seen": 81551796, "step": 1301 }, { "epoch": 4.332778702163061, "grad_norm": 16.797924041748047, "learning_rate": 5e-06, "loss": 0.5659, "num_input_tokens_seen": 81614932, "step": 1302 }, { "epoch": 4.332778702163061, "loss": 0.4511442184448242, "loss_ce": 0.0011930274777114391, "loss_iou": 0.1494140625, "loss_num": 0.0303955078125, "loss_xval": 0.44921875, "num_input_tokens_seen": 81614932, "step": 1302 }, { "epoch": 4.336106489184692, "grad_norm": 10.221659660339355, "learning_rate": 5e-06, "loss": 0.4192, "num_input_tokens_seen": 81677328, "step": 1303 }, { "epoch": 4.336106489184692, "loss": 0.3609451353549957, "loss_ce": 0.0003494042030069977, "loss_iou": 0.14453125, "loss_num": 0.014404296875, "loss_xval": 0.361328125, "num_input_tokens_seen": 81677328, "step": 1303 }, { "epoch": 4.3394342762063225, "grad_norm": 16.190181732177734, "learning_rate": 5e-06, "loss": 0.9697, "num_input_tokens_seen": 81741144, "step": 1304 }, { "epoch": 4.3394342762063225, "loss": 0.9564138650894165, "loss_ce": 0.00011508618626976386, "loss_iou": 0.365234375, "loss_num": 0.045654296875, "loss_xval": 0.95703125, "num_input_tokens_seen": 81741144, "step": 1304 }, { "epoch": 4.342762063227953, "grad_norm": 10.22618293762207, "learning_rate": 5e-06, "loss": 0.654, "num_input_tokens_seen": 81802700, "step": 1305 }, { "epoch": 4.342762063227953, "loss": 0.5586150884628296, "loss_ce": 0.0003875755937770009, "loss_iou": 0.197265625, "loss_num": 0.03271484375, "loss_xval": 0.55859375, "num_input_tokens_seen": 81802700, "step": 1305 }, { "epoch": 4.346089850249584, "grad_norm": 22.13401222229004, "learning_rate": 5e-06, "loss": 0.767, "num_input_tokens_seen": 81865476, "step": 1306 }, { "epoch": 4.346089850249584, "loss": 0.9452147483825684, "loss_ce": 0.00026843693922273815, "loss_iou": 0.345703125, "loss_num": 0.05078125, "loss_xval": 0.9453125, "num_input_tokens_seen": 81865476, "step": 1306 }, { "epoch": 4.349417637271214, "grad_norm": 28.866174697875977, "learning_rate": 5e-06, "loss": 0.8512, "num_input_tokens_seen": 81930068, "step": 1307 }, { "epoch": 4.349417637271214, "loss": 0.9075159430503845, "loss_ce": 0.00016731294454075396, "loss_iou": 0.306640625, "loss_num": 0.05859375, "loss_xval": 0.90625, "num_input_tokens_seen": 81930068, "step": 1307 }, { "epoch": 4.352745424292845, "grad_norm": 24.100893020629883, "learning_rate": 5e-06, "loss": 0.7954, "num_input_tokens_seen": 81992292, "step": 1308 }, { "epoch": 4.352745424292845, "loss": 0.7724366784095764, "loss_ce": 0.00021988632215652615, "loss_iou": 0.294921875, "loss_num": 0.036376953125, "loss_xval": 0.7734375, "num_input_tokens_seen": 81992292, "step": 1308 }, { "epoch": 4.356073211314476, "grad_norm": 10.286542892456055, "learning_rate": 5e-06, "loss": 0.7113, "num_input_tokens_seen": 82053968, "step": 1309 }, { "epoch": 4.356073211314476, "loss": 0.692034125328064, "loss_ce": 4.808099038200453e-05, "loss_iou": 0.2119140625, "loss_num": 0.0537109375, "loss_xval": 0.69140625, "num_input_tokens_seen": 82053968, "step": 1309 }, { "epoch": 4.359400998336106, "grad_norm": 14.468509674072266, "learning_rate": 5e-06, "loss": 0.8074, "num_input_tokens_seen": 82117760, "step": 1310 }, { "epoch": 4.359400998336106, "loss": 0.6124804615974426, "loss_ce": 0.0011523185530677438, "loss_iou": 0.2236328125, "loss_num": 0.032958984375, "loss_xval": 0.609375, "num_input_tokens_seen": 82117760, "step": 1310 }, { "epoch": 4.362728785357737, "grad_norm": 12.03175163269043, "learning_rate": 5e-06, "loss": 0.9334, "num_input_tokens_seen": 82180340, "step": 1311 }, { "epoch": 4.362728785357737, "loss": 1.097083330154419, "loss_ce": 3.7389592762338e-05, "loss_iou": 0.3984375, "loss_num": 0.0595703125, "loss_xval": 1.09375, "num_input_tokens_seen": 82180340, "step": 1311 }, { "epoch": 4.366056572379367, "grad_norm": 17.09552001953125, "learning_rate": 5e-06, "loss": 0.7349, "num_input_tokens_seen": 82245268, "step": 1312 }, { "epoch": 4.366056572379367, "loss": 0.6913478374481201, "loss_ce": 0.00012475968105718493, "loss_iou": 0.2578125, "loss_num": 0.03515625, "loss_xval": 0.69140625, "num_input_tokens_seen": 82245268, "step": 1312 }, { "epoch": 4.369384359400998, "grad_norm": 20.01402473449707, "learning_rate": 5e-06, "loss": 0.6697, "num_input_tokens_seen": 82306684, "step": 1313 }, { "epoch": 4.369384359400998, "loss": 0.61667400598526, "loss_ce": 0.0009513536933809519, "loss_iou": 0.234375, "loss_num": 0.0294189453125, "loss_xval": 0.6171875, "num_input_tokens_seen": 82306684, "step": 1313 }, { "epoch": 4.372712146422629, "grad_norm": 121.90161895751953, "learning_rate": 5e-06, "loss": 0.7735, "num_input_tokens_seen": 82368880, "step": 1314 }, { "epoch": 4.372712146422629, "loss": 0.8540662527084351, "loss_ce": 6.237881461856887e-05, "loss_iou": 0.302734375, "loss_num": 0.050048828125, "loss_xval": 0.85546875, "num_input_tokens_seen": 82368880, "step": 1314 }, { "epoch": 4.376039933444259, "grad_norm": 12.27063274383545, "learning_rate": 5e-06, "loss": 0.6451, "num_input_tokens_seen": 82431660, "step": 1315 }, { "epoch": 4.376039933444259, "loss": 0.6398316621780396, "loss_ce": 0.00042732813744805753, "loss_iou": 0.2080078125, "loss_num": 0.044921875, "loss_xval": 0.640625, "num_input_tokens_seen": 82431660, "step": 1315 }, { "epoch": 4.37936772046589, "grad_norm": 13.58110523223877, "learning_rate": 5e-06, "loss": 0.8232, "num_input_tokens_seen": 82494624, "step": 1316 }, { "epoch": 4.37936772046589, "loss": 0.6468814611434937, "loss_ce": 0.0001528922002762556, "loss_iou": 0.1474609375, "loss_num": 0.0703125, "loss_xval": 0.6484375, "num_input_tokens_seen": 82494624, "step": 1316 }, { "epoch": 4.3826955074875205, "grad_norm": 9.46317195892334, "learning_rate": 5e-06, "loss": 0.9478, "num_input_tokens_seen": 82557264, "step": 1317 }, { "epoch": 4.3826955074875205, "loss": 0.9231317043304443, "loss_ce": 0.0005242591141723096, "loss_iou": 0.3125, "loss_num": 0.059326171875, "loss_xval": 0.921875, "num_input_tokens_seen": 82557264, "step": 1317 }, { "epoch": 4.386023294509151, "grad_norm": 14.079225540161133, "learning_rate": 5e-06, "loss": 0.8769, "num_input_tokens_seen": 82621528, "step": 1318 }, { "epoch": 4.386023294509151, "loss": 0.9120485782623291, "loss_ce": 0.00042748835403472185, "loss_iou": 0.298828125, "loss_num": 0.062255859375, "loss_xval": 0.91015625, "num_input_tokens_seen": 82621528, "step": 1318 }, { "epoch": 4.389351081530782, "grad_norm": 10.575733184814453, "learning_rate": 5e-06, "loss": 0.5298, "num_input_tokens_seen": 82684528, "step": 1319 }, { "epoch": 4.389351081530782, "loss": 0.5374753475189209, "loss_ce": 0.00024388919700868428, "loss_iou": 0.15234375, "loss_num": 0.04638671875, "loss_xval": 0.5390625, "num_input_tokens_seen": 82684528, "step": 1319 }, { "epoch": 4.392678868552412, "grad_norm": 9.236316680908203, "learning_rate": 5e-06, "loss": 0.6288, "num_input_tokens_seen": 82745868, "step": 1320 }, { "epoch": 4.392678868552412, "loss": 0.6650440692901611, "loss_ce": 4.970107966073556e-06, "loss_iou": 0.263671875, "loss_num": 0.0272216796875, "loss_xval": 0.6640625, "num_input_tokens_seen": 82745868, "step": 1320 }, { "epoch": 4.396006655574043, "grad_norm": 13.09516716003418, "learning_rate": 5e-06, "loss": 0.8366, "num_input_tokens_seen": 82809088, "step": 1321 }, { "epoch": 4.396006655574043, "loss": 0.7077580094337463, "loss_ce": 5.536075332202017e-05, "loss_iou": 0.2578125, "loss_num": 0.03857421875, "loss_xval": 0.70703125, "num_input_tokens_seen": 82809088, "step": 1321 }, { "epoch": 4.3993344425956735, "grad_norm": 22.83343505859375, "learning_rate": 5e-06, "loss": 0.4686, "num_input_tokens_seen": 82870940, "step": 1322 }, { "epoch": 4.3993344425956735, "loss": 0.5811432600021362, "loss_ce": 0.0003327082667965442, "loss_iou": 0.16015625, "loss_num": 0.05224609375, "loss_xval": 0.58203125, "num_input_tokens_seen": 82870940, "step": 1322 }, { "epoch": 4.402662229617304, "grad_norm": 23.519207000732422, "learning_rate": 5e-06, "loss": 0.8076, "num_input_tokens_seen": 82934272, "step": 1323 }, { "epoch": 4.402662229617304, "loss": 0.6759135127067566, "loss_ce": 1.0233001376036555e-05, "loss_iou": 0.2333984375, "loss_num": 0.041748046875, "loss_xval": 0.67578125, "num_input_tokens_seen": 82934272, "step": 1323 }, { "epoch": 4.405990016638935, "grad_norm": 7.860145568847656, "learning_rate": 5e-06, "loss": 0.7258, "num_input_tokens_seen": 82998876, "step": 1324 }, { "epoch": 4.405990016638935, "loss": 0.7730740308761597, "loss_ce": 0.00024693459272384644, "loss_iou": 0.302734375, "loss_num": 0.033203125, "loss_xval": 0.7734375, "num_input_tokens_seen": 82998876, "step": 1324 }, { "epoch": 4.409317803660565, "grad_norm": 19.75787925720215, "learning_rate": 5e-06, "loss": 0.8422, "num_input_tokens_seen": 83062248, "step": 1325 }, { "epoch": 4.409317803660565, "loss": 0.8698641061782837, "loss_ce": 0.0007234690710902214, "loss_iou": 0.333984375, "loss_num": 0.0400390625, "loss_xval": 0.8671875, "num_input_tokens_seen": 83062248, "step": 1325 }, { "epoch": 4.412645590682196, "grad_norm": 21.181398391723633, "learning_rate": 5e-06, "loss": 0.7054, "num_input_tokens_seen": 83126332, "step": 1326 }, { "epoch": 4.412645590682196, "loss": 0.8323586583137512, "loss_ce": 0.0013039561454206705, "loss_iou": 0.306640625, "loss_num": 0.043701171875, "loss_xval": 0.83203125, "num_input_tokens_seen": 83126332, "step": 1326 }, { "epoch": 4.415973377703827, "grad_norm": 10.401611328125, "learning_rate": 5e-06, "loss": 0.8051, "num_input_tokens_seen": 83190668, "step": 1327 }, { "epoch": 4.415973377703827, "loss": 1.088860034942627, "loss_ce": 0.0004811858234461397, "loss_iou": 0.41796875, "loss_num": 0.050048828125, "loss_xval": 1.0859375, "num_input_tokens_seen": 83190668, "step": 1327 }, { "epoch": 4.419301164725457, "grad_norm": 22.405797958374023, "learning_rate": 5e-06, "loss": 0.7423, "num_input_tokens_seen": 83253544, "step": 1328 }, { "epoch": 4.419301164725457, "loss": 0.6563088893890381, "loss_ce": 0.00030303088715299964, "loss_iou": 0.1787109375, "loss_num": 0.059814453125, "loss_xval": 0.65625, "num_input_tokens_seen": 83253544, "step": 1328 }, { "epoch": 4.422628951747088, "grad_norm": 14.80013370513916, "learning_rate": 5e-06, "loss": 0.8032, "num_input_tokens_seen": 83315384, "step": 1329 }, { "epoch": 4.422628951747088, "loss": 0.8288633227348328, "loss_ce": 5.904023510083789e-06, "loss_iou": 0.28515625, "loss_num": 0.051513671875, "loss_xval": 0.828125, "num_input_tokens_seen": 83315384, "step": 1329 }, { "epoch": 4.425956738768718, "grad_norm": 18.27556037902832, "learning_rate": 5e-06, "loss": 0.5839, "num_input_tokens_seen": 83377408, "step": 1330 }, { "epoch": 4.425956738768718, "loss": 0.594611406326294, "loss_ce": 6.921641215740237e-06, "loss_iou": 0.232421875, "loss_num": 0.026123046875, "loss_xval": 0.59375, "num_input_tokens_seen": 83377408, "step": 1330 }, { "epoch": 4.429284525790349, "grad_norm": 18.135482788085938, "learning_rate": 5e-06, "loss": 0.8385, "num_input_tokens_seen": 83440780, "step": 1331 }, { "epoch": 4.429284525790349, "loss": 0.8478043675422668, "loss_ce": 2.60383021668531e-05, "loss_iou": 0.333984375, "loss_num": 0.0361328125, "loss_xval": 0.84765625, "num_input_tokens_seen": 83440780, "step": 1331 }, { "epoch": 4.43261231281198, "grad_norm": 16.065000534057617, "learning_rate": 5e-06, "loss": 0.6789, "num_input_tokens_seen": 83502220, "step": 1332 }, { "epoch": 4.43261231281198, "loss": 0.6492142677307129, "loss_ce": 4.437244206201285e-05, "loss_iou": 0.1845703125, "loss_num": 0.05615234375, "loss_xval": 0.6484375, "num_input_tokens_seen": 83502220, "step": 1332 }, { "epoch": 4.43594009983361, "grad_norm": 27.677690505981445, "learning_rate": 5e-06, "loss": 0.6976, "num_input_tokens_seen": 83566368, "step": 1333 }, { "epoch": 4.43594009983361, "loss": 0.5435962677001953, "loss_ce": 0.0006274799234233797, "loss_iou": 0.1962890625, "loss_num": 0.0301513671875, "loss_xval": 0.54296875, "num_input_tokens_seen": 83566368, "step": 1333 }, { "epoch": 4.439267886855241, "grad_norm": 18.383197784423828, "learning_rate": 5e-06, "loss": 0.5382, "num_input_tokens_seen": 83628728, "step": 1334 }, { "epoch": 4.439267886855241, "loss": 0.4342476427555084, "loss_ce": 0.00028765652677975595, "loss_iou": 0.07763671875, "loss_num": 0.0556640625, "loss_xval": 0.43359375, "num_input_tokens_seen": 83628728, "step": 1334 }, { "epoch": 4.4425956738768715, "grad_norm": 8.024295806884766, "learning_rate": 5e-06, "loss": 0.6467, "num_input_tokens_seen": 83690360, "step": 1335 }, { "epoch": 4.4425956738768715, "loss": 0.9086010456085205, "loss_ce": 0.00015376352530438453, "loss_iou": 0.310546875, "loss_num": 0.056884765625, "loss_xval": 0.91015625, "num_input_tokens_seen": 83690360, "step": 1335 }, { "epoch": 4.445923460898502, "grad_norm": 26.591236114501953, "learning_rate": 5e-06, "loss": 0.362, "num_input_tokens_seen": 83752956, "step": 1336 }, { "epoch": 4.445923460898502, "loss": 0.30534249544143677, "loss_ce": 0.0007770901429466903, "loss_iou": 0.08935546875, "loss_num": 0.0250244140625, "loss_xval": 0.3046875, "num_input_tokens_seen": 83752956, "step": 1336 }, { "epoch": 4.449251247920133, "grad_norm": 33.432376861572266, "learning_rate": 5e-06, "loss": 0.6526, "num_input_tokens_seen": 83815184, "step": 1337 }, { "epoch": 4.449251247920133, "loss": 0.7117882370948792, "loss_ce": 0.00036243151407688856, "loss_iou": 0.279296875, "loss_num": 0.0306396484375, "loss_xval": 0.7109375, "num_input_tokens_seen": 83815184, "step": 1337 }, { "epoch": 4.452579034941763, "grad_norm": 22.944896697998047, "learning_rate": 5e-06, "loss": 0.6759, "num_input_tokens_seen": 83875708, "step": 1338 }, { "epoch": 4.452579034941763, "loss": 0.7695423364639282, "loss_ce": 1.107938624045346e-05, "loss_iou": 0.2001953125, "loss_num": 0.07421875, "loss_xval": 0.76953125, "num_input_tokens_seen": 83875708, "step": 1338 }, { "epoch": 4.455906821963394, "grad_norm": 24.16956329345703, "learning_rate": 5e-06, "loss": 0.7432, "num_input_tokens_seen": 83939132, "step": 1339 }, { "epoch": 4.455906821963394, "loss": 0.6491574048995972, "loss_ce": 0.0007198970997706056, "loss_iou": 0.2431640625, "loss_num": 0.0322265625, "loss_xval": 0.6484375, "num_input_tokens_seen": 83939132, "step": 1339 }, { "epoch": 4.4592346089850246, "grad_norm": 11.392342567443848, "learning_rate": 5e-06, "loss": 0.7587, "num_input_tokens_seen": 84001120, "step": 1340 }, { "epoch": 4.4592346089850246, "loss": 0.9079697728157043, "loss_ce": 1.0750731235020794e-05, "loss_iou": 0.361328125, "loss_num": 0.037109375, "loss_xval": 0.90625, "num_input_tokens_seen": 84001120, "step": 1340 }, { "epoch": 4.462562396006655, "grad_norm": 18.56732940673828, "learning_rate": 5e-06, "loss": 0.7499, "num_input_tokens_seen": 84063012, "step": 1341 }, { "epoch": 4.462562396006655, "loss": 0.7202978134155273, "loss_ce": 8.293554128613323e-05, "loss_iou": 0.2060546875, "loss_num": 0.0615234375, "loss_xval": 0.71875, "num_input_tokens_seen": 84063012, "step": 1341 }, { "epoch": 4.465890183028286, "grad_norm": 11.404187202453613, "learning_rate": 5e-06, "loss": 0.7075, "num_input_tokens_seen": 84124740, "step": 1342 }, { "epoch": 4.465890183028286, "loss": 0.5816681385040283, "loss_ce": 0.0002472280466463417, "loss_iou": 0.1767578125, "loss_num": 0.045654296875, "loss_xval": 0.58203125, "num_input_tokens_seen": 84124740, "step": 1342 }, { "epoch": 4.469217970049916, "grad_norm": 11.722935676574707, "learning_rate": 5e-06, "loss": 0.6558, "num_input_tokens_seen": 84187416, "step": 1343 }, { "epoch": 4.469217970049916, "loss": 0.5433543920516968, "loss_ce": 0.0004466616956051439, "loss_iou": 0.193359375, "loss_num": 0.031494140625, "loss_xval": 0.54296875, "num_input_tokens_seen": 84187416, "step": 1343 }, { "epoch": 4.472545757071547, "grad_norm": 18.490428924560547, "learning_rate": 5e-06, "loss": 0.5917, "num_input_tokens_seen": 84250460, "step": 1344 }, { "epoch": 4.472545757071547, "loss": 0.6309865713119507, "loss_ce": 5.1386250561336055e-06, "loss_iou": 0.23828125, "loss_num": 0.03076171875, "loss_xval": 0.6328125, "num_input_tokens_seen": 84250460, "step": 1344 }, { "epoch": 4.475873544093178, "grad_norm": 24.012821197509766, "learning_rate": 5e-06, "loss": 0.8423, "num_input_tokens_seen": 84313384, "step": 1345 }, { "epoch": 4.475873544093178, "loss": 0.8356732130050659, "loss_ce": 0.0004681595310103148, "loss_iou": 0.310546875, "loss_num": 0.042236328125, "loss_xval": 0.8359375, "num_input_tokens_seen": 84313384, "step": 1345 }, { "epoch": 4.479201331114808, "grad_norm": 10.270696640014648, "learning_rate": 5e-06, "loss": 0.681, "num_input_tokens_seen": 84375172, "step": 1346 }, { "epoch": 4.479201331114808, "loss": 0.812825620174408, "loss_ce": 0.0005698194145224988, "loss_iou": 0.2890625, "loss_num": 0.046875, "loss_xval": 0.8125, "num_input_tokens_seen": 84375172, "step": 1346 }, { "epoch": 4.482529118136439, "grad_norm": 11.441398620605469, "learning_rate": 5e-06, "loss": 0.5946, "num_input_tokens_seen": 84435736, "step": 1347 }, { "epoch": 4.482529118136439, "loss": 0.5803534388542175, "loss_ce": 0.0008856821223162115, "loss_iou": 0.189453125, "loss_num": 0.039794921875, "loss_xval": 0.578125, "num_input_tokens_seen": 84435736, "step": 1347 }, { "epoch": 4.4858569051580695, "grad_norm": 9.025400161743164, "learning_rate": 5e-06, "loss": 0.7007, "num_input_tokens_seen": 84498868, "step": 1348 }, { "epoch": 4.4858569051580695, "loss": 0.7202355861663818, "loss_ce": 2.0708434021798894e-05, "loss_iou": 0.291015625, "loss_num": 0.02734375, "loss_xval": 0.71875, "num_input_tokens_seen": 84498868, "step": 1348 }, { "epoch": 4.4891846921797, "grad_norm": 20.62155532836914, "learning_rate": 5e-06, "loss": 0.6522, "num_input_tokens_seen": 84561996, "step": 1349 }, { "epoch": 4.4891846921797, "loss": 0.5740362405776978, "loss_ce": 6.169134576339275e-05, "loss_iou": 0.1943359375, "loss_num": 0.037109375, "loss_xval": 0.57421875, "num_input_tokens_seen": 84561996, "step": 1349 }, { "epoch": 4.492512479201331, "grad_norm": 27.57354736328125, "learning_rate": 5e-06, "loss": 0.8956, "num_input_tokens_seen": 84625352, "step": 1350 }, { "epoch": 4.492512479201331, "loss": 0.8519359827041626, "loss_ce": 7.229617040138692e-06, "loss_iou": 0.279296875, "loss_num": 0.058837890625, "loss_xval": 0.8515625, "num_input_tokens_seen": 84625352, "step": 1350 }, { "epoch": 4.495840266222961, "grad_norm": 13.722368240356445, "learning_rate": 5e-06, "loss": 0.7336, "num_input_tokens_seen": 84689008, "step": 1351 }, { "epoch": 4.495840266222961, "loss": 0.751276433467865, "loss_ce": 0.0007881558849476278, "loss_iou": 0.26953125, "loss_num": 0.041748046875, "loss_xval": 0.75, "num_input_tokens_seen": 84689008, "step": 1351 }, { "epoch": 4.499168053244592, "grad_norm": 9.512694358825684, "learning_rate": 5e-06, "loss": 0.6014, "num_input_tokens_seen": 84751564, "step": 1352 }, { "epoch": 4.499168053244592, "loss": 0.6554710865020752, "loss_ce": 0.0008079749532043934, "loss_iou": 0.25390625, "loss_num": 0.029296875, "loss_xval": 0.65625, "num_input_tokens_seen": 84751564, "step": 1352 }, { "epoch": 4.5024958402662225, "grad_norm": 15.738521575927734, "learning_rate": 5e-06, "loss": 0.5719, "num_input_tokens_seen": 84813980, "step": 1353 }, { "epoch": 4.5024958402662225, "loss": 0.5394357442855835, "loss_ce": 7.072425432852469e-06, "loss_iou": 0.208984375, "loss_num": 0.024169921875, "loss_xval": 0.5390625, "num_input_tokens_seen": 84813980, "step": 1353 }, { "epoch": 4.505823627287853, "grad_norm": 14.036721229553223, "learning_rate": 5e-06, "loss": 0.5921, "num_input_tokens_seen": 84876628, "step": 1354 }, { "epoch": 4.505823627287853, "loss": 0.5310646295547485, "loss_ce": 0.0001807953231036663, "loss_iou": 0.1455078125, "loss_num": 0.0478515625, "loss_xval": 0.53125, "num_input_tokens_seen": 84876628, "step": 1354 }, { "epoch": 4.509151414309484, "grad_norm": 25.116914749145508, "learning_rate": 5e-06, "loss": 0.6227, "num_input_tokens_seen": 84940328, "step": 1355 }, { "epoch": 4.509151414309484, "loss": 0.6232784390449524, "loss_ce": 0.0019405623897910118, "loss_iou": 0.216796875, "loss_num": 0.037353515625, "loss_xval": 0.62109375, "num_input_tokens_seen": 84940328, "step": 1355 }, { "epoch": 4.512479201331114, "grad_norm": 11.831351280212402, "learning_rate": 5e-06, "loss": 0.6977, "num_input_tokens_seen": 85003364, "step": 1356 }, { "epoch": 4.512479201331114, "loss": 0.8116161823272705, "loss_ce": 0.0005809839349240065, "loss_iou": 0.2734375, "loss_num": 0.05322265625, "loss_xval": 0.8125, "num_input_tokens_seen": 85003364, "step": 1356 }, { "epoch": 4.515806988352745, "grad_norm": 31.03595733642578, "learning_rate": 5e-06, "loss": 0.6383, "num_input_tokens_seen": 85066364, "step": 1357 }, { "epoch": 4.515806988352745, "loss": 0.4333268404006958, "loss_ce": 0.0004655019729398191, "loss_iou": 0.17578125, "loss_num": 0.0164794921875, "loss_xval": 0.43359375, "num_input_tokens_seen": 85066364, "step": 1357 }, { "epoch": 4.519134775374376, "grad_norm": 38.70811462402344, "learning_rate": 5e-06, "loss": 0.7278, "num_input_tokens_seen": 85129812, "step": 1358 }, { "epoch": 4.519134775374376, "loss": 0.4072532057762146, "loss_ce": 0.00014869704318698496, "loss_iou": 0.1669921875, "loss_num": 0.0147705078125, "loss_xval": 0.40625, "num_input_tokens_seen": 85129812, "step": 1358 }, { "epoch": 4.522462562396006, "grad_norm": 20.269290924072266, "learning_rate": 5e-06, "loss": 0.836, "num_input_tokens_seen": 85192756, "step": 1359 }, { "epoch": 4.522462562396006, "loss": 0.9213912487030029, "loss_ce": 4.500491286307806e-06, "loss_iou": 0.31640625, "loss_num": 0.0576171875, "loss_xval": 0.921875, "num_input_tokens_seen": 85192756, "step": 1359 }, { "epoch": 4.525790349417637, "grad_norm": 12.051100730895996, "learning_rate": 5e-06, "loss": 0.6803, "num_input_tokens_seen": 85255636, "step": 1360 }, { "epoch": 4.525790349417637, "loss": 0.6096853017807007, "loss_ce": 5.107871402287856e-06, "loss_iou": 0.197265625, "loss_num": 0.04296875, "loss_xval": 0.609375, "num_input_tokens_seen": 85255636, "step": 1360 }, { "epoch": 4.529118136439267, "grad_norm": 10.959836959838867, "learning_rate": 5e-06, "loss": 0.6622, "num_input_tokens_seen": 85316940, "step": 1361 }, { "epoch": 4.529118136439267, "loss": 0.7610074877738953, "loss_ce": 0.0014860231894999743, "loss_iou": 0.2578125, "loss_num": 0.048583984375, "loss_xval": 0.7578125, "num_input_tokens_seen": 85316940, "step": 1361 }, { "epoch": 4.532445923460898, "grad_norm": 11.152680397033691, "learning_rate": 5e-06, "loss": 0.5283, "num_input_tokens_seen": 85379304, "step": 1362 }, { "epoch": 4.532445923460898, "loss": 0.5571681261062622, "loss_ce": 0.00040544505463913083, "loss_iou": 0.1875, "loss_num": 0.036376953125, "loss_xval": 0.55859375, "num_input_tokens_seen": 85379304, "step": 1362 }, { "epoch": 4.535773710482529, "grad_norm": 13.463736534118652, "learning_rate": 5e-06, "loss": 0.5955, "num_input_tokens_seen": 85442004, "step": 1363 }, { "epoch": 4.535773710482529, "loss": 0.7695366144180298, "loss_ce": 5.329041414370295e-06, "loss_iou": 0.287109375, "loss_num": 0.039306640625, "loss_xval": 0.76953125, "num_input_tokens_seen": 85442004, "step": 1363 }, { "epoch": 4.539101497504159, "grad_norm": 11.949771881103516, "learning_rate": 5e-06, "loss": 0.8034, "num_input_tokens_seen": 85505824, "step": 1364 }, { "epoch": 4.539101497504159, "loss": 0.8794767260551453, "loss_ce": 0.0003263682301621884, "loss_iou": 0.36328125, "loss_num": 0.030517578125, "loss_xval": 0.87890625, "num_input_tokens_seen": 85505824, "step": 1364 }, { "epoch": 4.54242928452579, "grad_norm": 21.069055557250977, "learning_rate": 5e-06, "loss": 0.7048, "num_input_tokens_seen": 85569220, "step": 1365 }, { "epoch": 4.54242928452579, "loss": 0.8421709537506104, "loss_ce": 0.00012992339907214046, "loss_iou": 0.28515625, "loss_num": 0.053955078125, "loss_xval": 0.84375, "num_input_tokens_seen": 85569220, "step": 1365 }, { "epoch": 4.5457570715474205, "grad_norm": 18.99080467224121, "learning_rate": 5e-06, "loss": 0.4548, "num_input_tokens_seen": 85631980, "step": 1366 }, { "epoch": 4.5457570715474205, "loss": 0.3215389549732208, "loss_ce": 5.750360742240446e-06, "loss_iou": 0.10009765625, "loss_num": 0.0242919921875, "loss_xval": 0.322265625, "num_input_tokens_seen": 85631980, "step": 1366 }, { "epoch": 4.549084858569051, "grad_norm": 9.994710922241211, "learning_rate": 5e-06, "loss": 0.69, "num_input_tokens_seen": 85693344, "step": 1367 }, { "epoch": 4.549084858569051, "loss": 0.8808728456497192, "loss_ce": 1.3468677025230136e-05, "loss_iou": 0.333984375, "loss_num": 0.04248046875, "loss_xval": 0.8828125, "num_input_tokens_seen": 85693344, "step": 1367 }, { "epoch": 4.552412645590682, "grad_norm": 8.047813415527344, "learning_rate": 5e-06, "loss": 0.7441, "num_input_tokens_seen": 85755952, "step": 1368 }, { "epoch": 4.552412645590682, "loss": 0.628337562084198, "loss_ce": 0.0016285981982946396, "loss_iou": 0.166015625, "loss_num": 0.059326171875, "loss_xval": 0.625, "num_input_tokens_seen": 85755952, "step": 1368 }, { "epoch": 4.555740432612312, "grad_norm": 9.475415229797363, "learning_rate": 5e-06, "loss": 0.5802, "num_input_tokens_seen": 85819060, "step": 1369 }, { "epoch": 4.555740432612312, "loss": 0.2980998158454895, "loss_ce": 4.109616384084802e-06, "loss_iou": 0.07373046875, "loss_num": 0.0301513671875, "loss_xval": 0.298828125, "num_input_tokens_seen": 85819060, "step": 1369 }, { "epoch": 4.559068219633943, "grad_norm": 17.368986129760742, "learning_rate": 5e-06, "loss": 0.5304, "num_input_tokens_seen": 85880836, "step": 1370 }, { "epoch": 4.559068219633943, "loss": 0.43152907490730286, "loss_ce": 1.0513085726415738e-05, "loss_iou": 0.08203125, "loss_num": 0.053466796875, "loss_xval": 0.431640625, "num_input_tokens_seen": 85880836, "step": 1370 }, { "epoch": 4.5623960066555735, "grad_norm": 14.004551887512207, "learning_rate": 5e-06, "loss": 0.7525, "num_input_tokens_seen": 85944652, "step": 1371 }, { "epoch": 4.5623960066555735, "loss": 0.7391983866691589, "loss_ce": 6.25981847406365e-05, "loss_iou": 0.26953125, "loss_num": 0.039794921875, "loss_xval": 0.73828125, "num_input_tokens_seen": 85944652, "step": 1371 }, { "epoch": 4.565723793677205, "grad_norm": 21.5966739654541, "learning_rate": 5e-06, "loss": 0.6888, "num_input_tokens_seen": 86006448, "step": 1372 }, { "epoch": 4.565723793677205, "loss": 0.74265456199646, "loss_ce": 0.001199502730742097, "loss_iou": 0.26953125, "loss_num": 0.040771484375, "loss_xval": 0.7421875, "num_input_tokens_seen": 86006448, "step": 1372 }, { "epoch": 4.569051580698836, "grad_norm": 10.73180866241455, "learning_rate": 5e-06, "loss": 0.711, "num_input_tokens_seen": 86069348, "step": 1373 }, { "epoch": 4.569051580698836, "loss": 0.8499873876571655, "loss_ce": 0.0001338462607236579, "loss_iou": 0.2578125, "loss_num": 0.06689453125, "loss_xval": 0.8515625, "num_input_tokens_seen": 86069348, "step": 1373 }, { "epoch": 4.572379367720466, "grad_norm": 19.848661422729492, "learning_rate": 5e-06, "loss": 0.8061, "num_input_tokens_seen": 86133076, "step": 1374 }, { "epoch": 4.572379367720466, "loss": 0.6911985278129578, "loss_ce": 0.0007078329799696803, "loss_iou": 0.2421875, "loss_num": 0.041015625, "loss_xval": 0.69140625, "num_input_tokens_seen": 86133076, "step": 1374 }, { "epoch": 4.575707154742097, "grad_norm": 16.093006134033203, "learning_rate": 5e-06, "loss": 0.6143, "num_input_tokens_seen": 86194668, "step": 1375 }, { "epoch": 4.575707154742097, "loss": 0.9343423843383789, "loss_ce": 0.0009927398059517145, "loss_iou": 0.34375, "loss_num": 0.04931640625, "loss_xval": 0.93359375, "num_input_tokens_seen": 86194668, "step": 1375 }, { "epoch": 4.5790349417637275, "grad_norm": 35.044029235839844, "learning_rate": 5e-06, "loss": 0.8292, "num_input_tokens_seen": 86257604, "step": 1376 }, { "epoch": 4.5790349417637275, "loss": 0.8202038407325745, "loss_ce": 1.3448401659843512e-05, "loss_iou": 0.310546875, "loss_num": 0.0400390625, "loss_xval": 0.8203125, "num_input_tokens_seen": 86257604, "step": 1376 }, { "epoch": 4.582362728785358, "grad_norm": 20.022912979125977, "learning_rate": 5e-06, "loss": 0.4297, "num_input_tokens_seen": 86318188, "step": 1377 }, { "epoch": 4.582362728785358, "loss": 0.5561081171035767, "loss_ce": 0.0001999134401557967, "loss_iou": 0.171875, "loss_num": 0.04248046875, "loss_xval": 0.5546875, "num_input_tokens_seen": 86318188, "step": 1377 }, { "epoch": 4.585690515806989, "grad_norm": 28.097261428833008, "learning_rate": 5e-06, "loss": 0.617, "num_input_tokens_seen": 86381132, "step": 1378 }, { "epoch": 4.585690515806989, "loss": 0.5312559604644775, "loss_ce": 6.014421614963794e-06, "loss_iou": 0.193359375, "loss_num": 0.029052734375, "loss_xval": 0.53125, "num_input_tokens_seen": 86381132, "step": 1378 }, { "epoch": 4.589018302828619, "grad_norm": 17.568449020385742, "learning_rate": 5e-06, "loss": 0.5779, "num_input_tokens_seen": 86442004, "step": 1379 }, { "epoch": 4.589018302828619, "loss": 0.30957379937171936, "loss_ce": 3.469650891929632e-06, "loss_iou": 0.076171875, "loss_num": 0.031494140625, "loss_xval": 0.30859375, "num_input_tokens_seen": 86442004, "step": 1379 }, { "epoch": 4.59234608985025, "grad_norm": 10.973957061767578, "learning_rate": 5e-06, "loss": 0.8009, "num_input_tokens_seen": 86505552, "step": 1380 }, { "epoch": 4.59234608985025, "loss": 0.9419060945510864, "loss_ce": 1.1563648513401859e-05, "loss_iou": 0.33203125, "loss_num": 0.055419921875, "loss_xval": 0.94140625, "num_input_tokens_seen": 86505552, "step": 1380 }, { "epoch": 4.595673876871881, "grad_norm": 18.400348663330078, "learning_rate": 5e-06, "loss": 0.6309, "num_input_tokens_seen": 86567092, "step": 1381 }, { "epoch": 4.595673876871881, "loss": 0.35449641942977905, "loss_ce": 4.244451247359393e-06, "loss_iou": 0.08642578125, "loss_num": 0.0361328125, "loss_xval": 0.35546875, "num_input_tokens_seen": 86567092, "step": 1381 }, { "epoch": 4.599001663893511, "grad_norm": 16.44472312927246, "learning_rate": 5e-06, "loss": 0.7037, "num_input_tokens_seen": 86629636, "step": 1382 }, { "epoch": 4.599001663893511, "loss": 0.7437885999679565, "loss_ce": 0.0006245552212931216, "loss_iou": 0.255859375, "loss_num": 0.046142578125, "loss_xval": 0.7421875, "num_input_tokens_seen": 86629636, "step": 1382 }, { "epoch": 4.602329450915142, "grad_norm": 10.367685317993164, "learning_rate": 5e-06, "loss": 0.6743, "num_input_tokens_seen": 86693452, "step": 1383 }, { "epoch": 4.602329450915142, "loss": 0.8325238823890686, "loss_ce": 4.332084245106671e-06, "loss_iou": 0.31640625, "loss_num": 0.040283203125, "loss_xval": 0.83203125, "num_input_tokens_seen": 86693452, "step": 1383 }, { "epoch": 4.605657237936772, "grad_norm": 13.16484260559082, "learning_rate": 5e-06, "loss": 0.7708, "num_input_tokens_seen": 86757464, "step": 1384 }, { "epoch": 4.605657237936772, "loss": 0.7749779224395752, "loss_ce": 0.0005639095325022936, "loss_iou": 0.3125, "loss_num": 0.0299072265625, "loss_xval": 0.7734375, "num_input_tokens_seen": 86757464, "step": 1384 }, { "epoch": 4.608985024958403, "grad_norm": 16.996618270874023, "learning_rate": 5e-06, "loss": 0.6426, "num_input_tokens_seen": 86816280, "step": 1385 }, { "epoch": 4.608985024958403, "loss": 0.44452953338623047, "loss_ce": 1.048046578944195e-05, "loss_iou": 0.1103515625, "loss_num": 0.044677734375, "loss_xval": 0.4453125, "num_input_tokens_seen": 86816280, "step": 1385 }, { "epoch": 4.612312811980034, "grad_norm": 7.123061180114746, "learning_rate": 5e-06, "loss": 0.716, "num_input_tokens_seen": 86879712, "step": 1386 }, { "epoch": 4.612312811980034, "loss": 0.589363694190979, "loss_ce": 8.274035280919634e-06, "loss_iou": 0.1806640625, "loss_num": 0.045654296875, "loss_xval": 0.58984375, "num_input_tokens_seen": 86879712, "step": 1386 }, { "epoch": 4.615640599001664, "grad_norm": 9.968111991882324, "learning_rate": 5e-06, "loss": 0.7198, "num_input_tokens_seen": 86943036, "step": 1387 }, { "epoch": 4.615640599001664, "loss": 0.6882371306419373, "loss_ce": 4.710074335889658e-06, "loss_iou": 0.26953125, "loss_num": 0.0296630859375, "loss_xval": 0.6875, "num_input_tokens_seen": 86943036, "step": 1387 }, { "epoch": 4.618968386023295, "grad_norm": 7.521435737609863, "learning_rate": 5e-06, "loss": 0.7067, "num_input_tokens_seen": 87005832, "step": 1388 }, { "epoch": 4.618968386023295, "loss": 0.6977737545967102, "loss_ce": 0.0005081485724076629, "loss_iou": 0.2216796875, "loss_num": 0.05078125, "loss_xval": 0.6953125, "num_input_tokens_seen": 87005832, "step": 1388 }, { "epoch": 4.6222961730449255, "grad_norm": 41.41930389404297, "learning_rate": 5e-06, "loss": 0.7386, "num_input_tokens_seen": 87067064, "step": 1389 }, { "epoch": 4.6222961730449255, "loss": 0.6512438654899597, "loss_ce": 0.0006091056275181472, "loss_iou": 0.2060546875, "loss_num": 0.0478515625, "loss_xval": 0.65234375, "num_input_tokens_seen": 87067064, "step": 1389 }, { "epoch": 4.625623960066556, "grad_norm": 17.593530654907227, "learning_rate": 5e-06, "loss": 0.7477, "num_input_tokens_seen": 87130024, "step": 1390 }, { "epoch": 4.625623960066556, "loss": 0.5482279062271118, "loss_ce": 0.00013217845116741955, "loss_iou": 0.1875, "loss_num": 0.034912109375, "loss_xval": 0.546875, "num_input_tokens_seen": 87130024, "step": 1390 }, { "epoch": 4.628951747088187, "grad_norm": 21.13423728942871, "learning_rate": 5e-06, "loss": 0.7824, "num_input_tokens_seen": 87194072, "step": 1391 }, { "epoch": 4.628951747088187, "loss": 0.5900379419326782, "loss_ce": 0.0003162088105455041, "loss_iou": 0.2060546875, "loss_num": 0.03564453125, "loss_xval": 0.58984375, "num_input_tokens_seen": 87194072, "step": 1391 }, { "epoch": 4.632279534109817, "grad_norm": 30.159034729003906, "learning_rate": 5e-06, "loss": 0.7439, "num_input_tokens_seen": 87256772, "step": 1392 }, { "epoch": 4.632279534109817, "loss": 0.6605543494224548, "loss_ce": 3.193483280483633e-05, "loss_iou": 0.2373046875, "loss_num": 0.03759765625, "loss_xval": 0.66015625, "num_input_tokens_seen": 87256772, "step": 1392 }, { "epoch": 4.635607321131448, "grad_norm": 17.288198471069336, "learning_rate": 5e-06, "loss": 0.7511, "num_input_tokens_seen": 87320728, "step": 1393 }, { "epoch": 4.635607321131448, "loss": 0.7466317415237427, "loss_ce": 0.0002938044199254364, "loss_iou": 0.265625, "loss_num": 0.042724609375, "loss_xval": 0.74609375, "num_input_tokens_seen": 87320728, "step": 1393 }, { "epoch": 4.6389351081530785, "grad_norm": 19.027690887451172, "learning_rate": 5e-06, "loss": 0.6129, "num_input_tokens_seen": 87381188, "step": 1394 }, { "epoch": 4.6389351081530785, "loss": 0.5858246088027954, "loss_ce": 9.231265721609816e-06, "loss_iou": 0.1875, "loss_num": 0.04248046875, "loss_xval": 0.5859375, "num_input_tokens_seen": 87381188, "step": 1394 }, { "epoch": 4.642262895174709, "grad_norm": 33.02770233154297, "learning_rate": 5e-06, "loss": 0.9622, "num_input_tokens_seen": 87444392, "step": 1395 }, { "epoch": 4.642262895174709, "loss": 0.8132398128509521, "loss_ce": 7.421998816425912e-06, "loss_iou": 0.353515625, "loss_num": 0.0213623046875, "loss_xval": 0.8125, "num_input_tokens_seen": 87444392, "step": 1395 }, { "epoch": 4.64559068219634, "grad_norm": 31.30838966369629, "learning_rate": 5e-06, "loss": 0.7085, "num_input_tokens_seen": 87508216, "step": 1396 }, { "epoch": 4.64559068219634, "loss": 0.715539276599884, "loss_ce": 0.00020728196250274777, "loss_iou": 0.275390625, "loss_num": 0.032958984375, "loss_xval": 0.71484375, "num_input_tokens_seen": 87508216, "step": 1396 }, { "epoch": 4.64891846921797, "grad_norm": 259.32489013671875, "learning_rate": 5e-06, "loss": 0.6664, "num_input_tokens_seen": 87571040, "step": 1397 }, { "epoch": 4.64891846921797, "loss": 1.0068187713623047, "loss_ce": 0.0002270036202389747, "loss_iou": 0.3828125, "loss_num": 0.0478515625, "loss_xval": 1.0078125, "num_input_tokens_seen": 87571040, "step": 1397 }, { "epoch": 4.652246256239601, "grad_norm": 15.726283073425293, "learning_rate": 5e-06, "loss": 0.7203, "num_input_tokens_seen": 87635252, "step": 1398 }, { "epoch": 4.652246256239601, "loss": 0.7017278671264648, "loss_ce": 6.690443569823401e-06, "loss_iou": 0.2412109375, "loss_num": 0.0439453125, "loss_xval": 0.703125, "num_input_tokens_seen": 87635252, "step": 1398 }, { "epoch": 4.655574043261232, "grad_norm": 13.434452056884766, "learning_rate": 5e-06, "loss": 0.8226, "num_input_tokens_seen": 87699088, "step": 1399 }, { "epoch": 4.655574043261232, "loss": 0.8787941932678223, "loss_ce": 1.004004843707662e-05, "loss_iou": 0.318359375, "loss_num": 0.04833984375, "loss_xval": 0.87890625, "num_input_tokens_seen": 87699088, "step": 1399 }, { "epoch": 4.658901830282862, "grad_norm": 10.94394588470459, "learning_rate": 5e-06, "loss": 0.5418, "num_input_tokens_seen": 87761788, "step": 1400 }, { "epoch": 4.658901830282862, "loss": 0.5910704731941223, "loss_ce": 6.02423278905917e-06, "loss_iou": 0.1904296875, "loss_num": 0.0419921875, "loss_xval": 0.58984375, "num_input_tokens_seen": 87761788, "step": 1400 }, { "epoch": 4.662229617304493, "grad_norm": 14.59238052368164, "learning_rate": 5e-06, "loss": 0.7979, "num_input_tokens_seen": 87824600, "step": 1401 }, { "epoch": 4.662229617304493, "loss": 0.6216334104537964, "loss_ce": 0.0007227640016935766, "loss_iou": 0.1826171875, "loss_num": 0.051025390625, "loss_xval": 0.62109375, "num_input_tokens_seen": 87824600, "step": 1401 }, { "epoch": 4.665557404326123, "grad_norm": 35.27493667602539, "learning_rate": 5e-06, "loss": 0.6161, "num_input_tokens_seen": 87887504, "step": 1402 }, { "epoch": 4.665557404326123, "loss": 0.5002973079681396, "loss_ce": 0.0002972899528685957, "loss_iou": 0.181640625, "loss_num": 0.027587890625, "loss_xval": 0.5, "num_input_tokens_seen": 87887504, "step": 1402 }, { "epoch": 4.668885191347754, "grad_norm": 24.9193172454834, "learning_rate": 5e-06, "loss": 0.635, "num_input_tokens_seen": 87950680, "step": 1403 }, { "epoch": 4.668885191347754, "loss": 0.5660473108291626, "loss_ce": 7.2361781349172816e-06, "loss_iou": 0.2138671875, "loss_num": 0.0277099609375, "loss_xval": 0.56640625, "num_input_tokens_seen": 87950680, "step": 1403 }, { "epoch": 4.672212978369385, "grad_norm": 15.197111129760742, "learning_rate": 5e-06, "loss": 0.8246, "num_input_tokens_seen": 88012704, "step": 1404 }, { "epoch": 4.672212978369385, "loss": 0.6560282707214355, "loss_ce": 0.001121073728427291, "loss_iou": 0.21484375, "loss_num": 0.044921875, "loss_xval": 0.65625, "num_input_tokens_seen": 88012704, "step": 1404 }, { "epoch": 4.675540765391015, "grad_norm": 38.91653060913086, "learning_rate": 5e-06, "loss": 0.6649, "num_input_tokens_seen": 88075520, "step": 1405 }, { "epoch": 4.675540765391015, "loss": 0.8304678797721863, "loss_ce": 0.0008779908530414104, "loss_iou": 0.2890625, "loss_num": 0.050048828125, "loss_xval": 0.828125, "num_input_tokens_seen": 88075520, "step": 1405 }, { "epoch": 4.678868552412646, "grad_norm": 9.682918548583984, "learning_rate": 5e-06, "loss": 0.5147, "num_input_tokens_seen": 88139348, "step": 1406 }, { "epoch": 4.678868552412646, "loss": 0.7572877407073975, "loss_ce": 0.0009400282287970185, "loss_iou": 0.259765625, "loss_num": 0.047607421875, "loss_xval": 0.7578125, "num_input_tokens_seen": 88139348, "step": 1406 }, { "epoch": 4.6821963394342765, "grad_norm": 8.297053337097168, "learning_rate": 5e-06, "loss": 0.5352, "num_input_tokens_seen": 88200592, "step": 1407 }, { "epoch": 4.6821963394342765, "loss": 0.42061173915863037, "loss_ce": 1.84620530490065e-05, "loss_iou": 0.1474609375, "loss_num": 0.0252685546875, "loss_xval": 0.419921875, "num_input_tokens_seen": 88200592, "step": 1407 }, { "epoch": 4.685524126455907, "grad_norm": 24.615375518798828, "learning_rate": 5e-06, "loss": 0.8693, "num_input_tokens_seen": 88262280, "step": 1408 }, { "epoch": 4.685524126455907, "loss": 0.8850483298301697, "loss_ce": 3.8560301618417725e-05, "loss_iou": 0.33203125, "loss_num": 0.044189453125, "loss_xval": 0.88671875, "num_input_tokens_seen": 88262280, "step": 1408 }, { "epoch": 4.688851913477538, "grad_norm": 28.879241943359375, "learning_rate": 5e-06, "loss": 0.7895, "num_input_tokens_seen": 88325444, "step": 1409 }, { "epoch": 4.688851913477538, "loss": 0.8084349632263184, "loss_ce": 0.0017943980637937784, "loss_iou": 0.30078125, "loss_num": 0.041259765625, "loss_xval": 0.8046875, "num_input_tokens_seen": 88325444, "step": 1409 }, { "epoch": 4.692179700499168, "grad_norm": 16.046236038208008, "learning_rate": 5e-06, "loss": 0.4716, "num_input_tokens_seen": 88387372, "step": 1410 }, { "epoch": 4.692179700499168, "loss": 0.6305035352706909, "loss_ce": 1.0359564839745872e-05, "loss_iou": 0.203125, "loss_num": 0.044921875, "loss_xval": 0.62890625, "num_input_tokens_seen": 88387372, "step": 1410 }, { "epoch": 4.695507487520799, "grad_norm": 9.54500675201416, "learning_rate": 5e-06, "loss": 0.752, "num_input_tokens_seen": 88450328, "step": 1411 }, { "epoch": 4.695507487520799, "loss": 0.5868390798568726, "loss_ce": 0.0006574149592779577, "loss_iou": 0.1416015625, "loss_num": 0.060791015625, "loss_xval": 0.5859375, "num_input_tokens_seen": 88450328, "step": 1411 }, { "epoch": 4.6988352745424296, "grad_norm": 15.332375526428223, "learning_rate": 5e-06, "loss": 0.6946, "num_input_tokens_seen": 88511536, "step": 1412 }, { "epoch": 4.6988352745424296, "loss": 0.8135286569595337, "loss_ce": 0.0004182563570793718, "loss_iou": 0.279296875, "loss_num": 0.05078125, "loss_xval": 0.8125, "num_input_tokens_seen": 88511536, "step": 1412 }, { "epoch": 4.70216306156406, "grad_norm": 13.92263126373291, "learning_rate": 5e-06, "loss": 0.447, "num_input_tokens_seen": 88573152, "step": 1413 }, { "epoch": 4.70216306156406, "loss": 0.4112605154514313, "loss_ce": 5.630700798064936e-06, "loss_iou": 0.1455078125, "loss_num": 0.0240478515625, "loss_xval": 0.412109375, "num_input_tokens_seen": 88573152, "step": 1413 }, { "epoch": 4.705490848585691, "grad_norm": 23.44112777709961, "learning_rate": 5e-06, "loss": 0.6818, "num_input_tokens_seen": 88636160, "step": 1414 }, { "epoch": 4.705490848585691, "loss": 0.9239535331726074, "loss_ce": 0.0011019170051440597, "loss_iou": 0.33203125, "loss_num": 0.051513671875, "loss_xval": 0.921875, "num_input_tokens_seen": 88636160, "step": 1414 }, { "epoch": 4.708818635607321, "grad_norm": 21.574085235595703, "learning_rate": 5e-06, "loss": 0.6247, "num_input_tokens_seen": 88698896, "step": 1415 }, { "epoch": 4.708818635607321, "loss": 0.6632546782493591, "loss_ce": 0.0002908269816543907, "loss_iou": 0.16796875, "loss_num": 0.0654296875, "loss_xval": 0.6640625, "num_input_tokens_seen": 88698896, "step": 1415 }, { "epoch": 4.712146422628952, "grad_norm": 29.22531509399414, "learning_rate": 5e-06, "loss": 0.6788, "num_input_tokens_seen": 88762200, "step": 1416 }, { "epoch": 4.712146422628952, "loss": 0.4651842713356018, "loss_ce": 0.0011339938500896096, "loss_iou": 0.1875, "loss_num": 0.0177001953125, "loss_xval": 0.46484375, "num_input_tokens_seen": 88762200, "step": 1416 }, { "epoch": 4.715474209650583, "grad_norm": 25.034208297729492, "learning_rate": 5e-06, "loss": 0.6795, "num_input_tokens_seen": 88824484, "step": 1417 }, { "epoch": 4.715474209650583, "loss": 0.6723681688308716, "loss_ce": 4.903771696262993e-06, "loss_iou": 0.240234375, "loss_num": 0.03857421875, "loss_xval": 0.671875, "num_input_tokens_seen": 88824484, "step": 1417 }, { "epoch": 4.718801996672213, "grad_norm": 8.118911743164062, "learning_rate": 5e-06, "loss": 0.5332, "num_input_tokens_seen": 88886292, "step": 1418 }, { "epoch": 4.718801996672213, "loss": 0.6744147539138794, "loss_ce": 0.0005561455618590117, "loss_iou": 0.2236328125, "loss_num": 0.045166015625, "loss_xval": 0.67578125, "num_input_tokens_seen": 88886292, "step": 1418 }, { "epoch": 4.722129783693844, "grad_norm": 8.823586463928223, "learning_rate": 5e-06, "loss": 0.6604, "num_input_tokens_seen": 88949668, "step": 1419 }, { "epoch": 4.722129783693844, "loss": 0.6599315404891968, "loss_ce": 1.941867230925709e-05, "loss_iou": 0.2421875, "loss_num": 0.03515625, "loss_xval": 0.66015625, "num_input_tokens_seen": 88949668, "step": 1419 }, { "epoch": 4.7254575707154745, "grad_norm": 8.537189483642578, "learning_rate": 5e-06, "loss": 0.6307, "num_input_tokens_seen": 89012008, "step": 1420 }, { "epoch": 4.7254575707154745, "loss": 0.5407520532608032, "loss_ce": 0.0007130015874281526, "loss_iou": 0.197265625, "loss_num": 0.0291748046875, "loss_xval": 0.5390625, "num_input_tokens_seen": 89012008, "step": 1420 }, { "epoch": 4.728785357737105, "grad_norm": 6.051877021789551, "learning_rate": 5e-06, "loss": 0.7279, "num_input_tokens_seen": 89076488, "step": 1421 }, { "epoch": 4.728785357737105, "loss": 0.7321413159370422, "loss_ce": 0.00032978906529024243, "loss_iou": 0.259765625, "loss_num": 0.042724609375, "loss_xval": 0.73046875, "num_input_tokens_seen": 89076488, "step": 1421 }, { "epoch": 4.732113144758736, "grad_norm": 13.135444641113281, "learning_rate": 5e-06, "loss": 0.5427, "num_input_tokens_seen": 89138220, "step": 1422 }, { "epoch": 4.732113144758736, "loss": 0.6555225849151611, "loss_ce": 5.03173032484483e-06, "loss_iou": 0.1962890625, "loss_num": 0.052734375, "loss_xval": 0.65625, "num_input_tokens_seen": 89138220, "step": 1422 }, { "epoch": 4.735440931780366, "grad_norm": 16.723936080932617, "learning_rate": 5e-06, "loss": 0.9415, "num_input_tokens_seen": 89199868, "step": 1423 }, { "epoch": 4.735440931780366, "loss": 0.7951744794845581, "loss_ce": 8.43474299472291e-06, "loss_iou": 0.25, "loss_num": 0.058837890625, "loss_xval": 0.796875, "num_input_tokens_seen": 89199868, "step": 1423 }, { "epoch": 4.738768718801997, "grad_norm": 16.47538185119629, "learning_rate": 5e-06, "loss": 0.7721, "num_input_tokens_seen": 89263632, "step": 1424 }, { "epoch": 4.738768718801997, "loss": 0.5803350210189819, "loss_ce": 1.276938655792037e-05, "loss_iou": 0.1826171875, "loss_num": 0.04296875, "loss_xval": 0.58203125, "num_input_tokens_seen": 89263632, "step": 1424 }, { "epoch": 4.7420965058236275, "grad_norm": 16.74688148498535, "learning_rate": 5e-06, "loss": 0.4707, "num_input_tokens_seen": 89325800, "step": 1425 }, { "epoch": 4.7420965058236275, "loss": 0.5409659147262573, "loss_ce": 0.00025547522818669677, "loss_iou": 0.1982421875, "loss_num": 0.029052734375, "loss_xval": 0.5390625, "num_input_tokens_seen": 89325800, "step": 1425 }, { "epoch": 4.745424292845258, "grad_norm": 7.459796905517578, "learning_rate": 5e-06, "loss": 0.3887, "num_input_tokens_seen": 89388640, "step": 1426 }, { "epoch": 4.745424292845258, "loss": 0.44415873289108276, "loss_ce": 6.692421447951347e-05, "loss_iou": 0.1650390625, "loss_num": 0.02294921875, "loss_xval": 0.443359375, "num_input_tokens_seen": 89388640, "step": 1426 }, { "epoch": 4.748752079866889, "grad_norm": 22.47068214416504, "learning_rate": 5e-06, "loss": 0.5686, "num_input_tokens_seen": 89451016, "step": 1427 }, { "epoch": 4.748752079866889, "loss": 0.5648741722106934, "loss_ce": 0.0009093122789636254, "loss_iou": 0.1787109375, "loss_num": 0.041259765625, "loss_xval": 0.5625, "num_input_tokens_seen": 89451016, "step": 1427 }, { "epoch": 4.752079866888519, "grad_norm": 18.738285064697266, "learning_rate": 5e-06, "loss": 0.6894, "num_input_tokens_seen": 89513320, "step": 1428 }, { "epoch": 4.752079866888519, "loss": 0.682348370552063, "loss_ce": 0.00046356869279406965, "loss_iou": 0.234375, "loss_num": 0.042724609375, "loss_xval": 0.68359375, "num_input_tokens_seen": 89513320, "step": 1428 }, { "epoch": 4.75540765391015, "grad_norm": 15.870285987854004, "learning_rate": 5e-06, "loss": 0.6165, "num_input_tokens_seen": 89576172, "step": 1429 }, { "epoch": 4.75540765391015, "loss": 0.6336005926132202, "loss_ce": 5.568322376348078e-05, "loss_iou": 0.2197265625, "loss_num": 0.038818359375, "loss_xval": 0.6328125, "num_input_tokens_seen": 89576172, "step": 1429 }, { "epoch": 4.758735440931781, "grad_norm": 14.812638282775879, "learning_rate": 5e-06, "loss": 0.5155, "num_input_tokens_seen": 89639340, "step": 1430 }, { "epoch": 4.758735440931781, "loss": 0.5822361707687378, "loss_ce": 0.0002049263275694102, "loss_iou": 0.1787109375, "loss_num": 0.044677734375, "loss_xval": 0.58203125, "num_input_tokens_seen": 89639340, "step": 1430 }, { "epoch": 4.762063227953411, "grad_norm": 15.044964790344238, "learning_rate": 5e-06, "loss": 0.7304, "num_input_tokens_seen": 89702648, "step": 1431 }, { "epoch": 4.762063227953411, "loss": 0.8407307863235474, "loss_ce": 3.256250420236029e-05, "loss_iou": 0.310546875, "loss_num": 0.044189453125, "loss_xval": 0.83984375, "num_input_tokens_seen": 89702648, "step": 1431 }, { "epoch": 4.765391014975042, "grad_norm": 32.418582916259766, "learning_rate": 5e-06, "loss": 0.7699, "num_input_tokens_seen": 89766308, "step": 1432 }, { "epoch": 4.765391014975042, "loss": 0.7296576499938965, "loss_ce": 0.00040958679164759815, "loss_iou": 0.255859375, "loss_num": 0.043701171875, "loss_xval": 0.73046875, "num_input_tokens_seen": 89766308, "step": 1432 }, { "epoch": 4.768718801996672, "grad_norm": 19.981163024902344, "learning_rate": 5e-06, "loss": 0.4537, "num_input_tokens_seen": 89828716, "step": 1433 }, { "epoch": 4.768718801996672, "loss": 0.39242231845855713, "loss_ce": 0.0016752248629927635, "loss_iou": 0.146484375, "loss_num": 0.01953125, "loss_xval": 0.390625, "num_input_tokens_seen": 89828716, "step": 1433 }, { "epoch": 4.772046589018303, "grad_norm": 10.683984756469727, "learning_rate": 5e-06, "loss": 0.715, "num_input_tokens_seen": 89891856, "step": 1434 }, { "epoch": 4.772046589018303, "loss": 0.7536187171936035, "loss_ce": 0.0004449051048140973, "loss_iou": 0.2177734375, "loss_num": 0.0634765625, "loss_xval": 0.75390625, "num_input_tokens_seen": 89891856, "step": 1434 }, { "epoch": 4.775374376039934, "grad_norm": 12.289992332458496, "learning_rate": 5e-06, "loss": 0.8538, "num_input_tokens_seen": 89954484, "step": 1435 }, { "epoch": 4.775374376039934, "loss": 0.6435446739196777, "loss_ce": 0.00023415654140990227, "loss_iou": 0.208984375, "loss_num": 0.044921875, "loss_xval": 0.64453125, "num_input_tokens_seen": 89954484, "step": 1435 }, { "epoch": 4.778702163061564, "grad_norm": 16.614036560058594, "learning_rate": 5e-06, "loss": 0.6228, "num_input_tokens_seen": 90016408, "step": 1436 }, { "epoch": 4.778702163061564, "loss": 0.793232798576355, "loss_ce": 0.0002640403981786221, "loss_iou": 0.25, "loss_num": 0.05859375, "loss_xval": 0.79296875, "num_input_tokens_seen": 90016408, "step": 1436 }, { "epoch": 4.782029950083195, "grad_norm": 11.303678512573242, "learning_rate": 5e-06, "loss": 0.7452, "num_input_tokens_seen": 90079488, "step": 1437 }, { "epoch": 4.782029950083195, "loss": 0.62999027967453, "loss_ce": 0.00010747316991910338, "loss_iou": 0.224609375, "loss_num": 0.0361328125, "loss_xval": 0.62890625, "num_input_tokens_seen": 90079488, "step": 1437 }, { "epoch": 4.7853577371048255, "grad_norm": 38.122825622558594, "learning_rate": 5e-06, "loss": 0.8768, "num_input_tokens_seen": 90142916, "step": 1438 }, { "epoch": 4.7853577371048255, "loss": 0.8739835023880005, "loss_ce": 0.0011807718547061086, "loss_iou": 0.328125, "loss_num": 0.043212890625, "loss_xval": 0.87109375, "num_input_tokens_seen": 90142916, "step": 1438 }, { "epoch": 4.788685524126456, "grad_norm": 21.891645431518555, "learning_rate": 5e-06, "loss": 0.7699, "num_input_tokens_seen": 90206372, "step": 1439 }, { "epoch": 4.788685524126456, "loss": 0.9136231541633606, "loss_ce": 4.8945243179332465e-05, "loss_iou": 0.37109375, "loss_num": 0.0341796875, "loss_xval": 0.9140625, "num_input_tokens_seen": 90206372, "step": 1439 }, { "epoch": 4.792013311148087, "grad_norm": 6.0356011390686035, "learning_rate": 5e-06, "loss": 0.5411, "num_input_tokens_seen": 90268296, "step": 1440 }, { "epoch": 4.792013311148087, "loss": 0.5046484470367432, "loss_ce": 9.745693205331918e-06, "loss_iou": 0.17578125, "loss_num": 0.0303955078125, "loss_xval": 0.50390625, "num_input_tokens_seen": 90268296, "step": 1440 }, { "epoch": 4.795341098169717, "grad_norm": 43.16458511352539, "learning_rate": 5e-06, "loss": 0.83, "num_input_tokens_seen": 90329808, "step": 1441 }, { "epoch": 4.795341098169717, "loss": 0.8969796895980835, "loss_ce": 7.049820851534605e-06, "loss_iou": 0.337890625, "loss_num": 0.044677734375, "loss_xval": 0.8984375, "num_input_tokens_seen": 90329808, "step": 1441 }, { "epoch": 4.798668885191348, "grad_norm": 36.28164291381836, "learning_rate": 5e-06, "loss": 0.6166, "num_input_tokens_seen": 90392836, "step": 1442 }, { "epoch": 4.798668885191348, "loss": 0.5822373032569885, "loss_ce": 0.0005722856149077415, "loss_iou": 0.23828125, "loss_num": 0.0211181640625, "loss_xval": 0.58203125, "num_input_tokens_seen": 90392836, "step": 1442 }, { "epoch": 4.8019966722129785, "grad_norm": 17.036026000976562, "learning_rate": 5e-06, "loss": 0.6163, "num_input_tokens_seen": 90456084, "step": 1443 }, { "epoch": 4.8019966722129785, "loss": 0.7121388912200928, "loss_ce": 0.000468974671093747, "loss_iou": 0.234375, "loss_num": 0.048583984375, "loss_xval": 0.7109375, "num_input_tokens_seen": 90456084, "step": 1443 }, { "epoch": 4.805324459234609, "grad_norm": 13.723752975463867, "learning_rate": 5e-06, "loss": 0.537, "num_input_tokens_seen": 90519376, "step": 1444 }, { "epoch": 4.805324459234609, "loss": 0.3949107825756073, "loss_ce": 1.3307385415828321e-05, "loss_iou": 0.1455078125, "loss_num": 0.0208740234375, "loss_xval": 0.39453125, "num_input_tokens_seen": 90519376, "step": 1444 }, { "epoch": 4.80865224625624, "grad_norm": 14.543593406677246, "learning_rate": 5e-06, "loss": 0.6377, "num_input_tokens_seen": 90583048, "step": 1445 }, { "epoch": 4.80865224625624, "loss": 0.6011487245559692, "loss_ce": 0.00031868519727140665, "loss_iou": 0.2001953125, "loss_num": 0.0400390625, "loss_xval": 0.6015625, "num_input_tokens_seen": 90583048, "step": 1445 }, { "epoch": 4.81198003327787, "grad_norm": 10.937410354614258, "learning_rate": 5e-06, "loss": 0.5167, "num_input_tokens_seen": 90646000, "step": 1446 }, { "epoch": 4.81198003327787, "loss": 0.6793892979621887, "loss_ce": 0.0009224972454831004, "loss_iou": 0.25, "loss_num": 0.0361328125, "loss_xval": 0.6796875, "num_input_tokens_seen": 90646000, "step": 1446 }, { "epoch": 4.815307820299501, "grad_norm": 12.456496238708496, "learning_rate": 5e-06, "loss": 0.5521, "num_input_tokens_seen": 90707336, "step": 1447 }, { "epoch": 4.815307820299501, "loss": 0.5406190156936646, "loss_ce": 0.0005188892828300595, "loss_iou": 0.1875, "loss_num": 0.032958984375, "loss_xval": 0.5390625, "num_input_tokens_seen": 90707336, "step": 1447 }, { "epoch": 4.818635607321132, "grad_norm": 5.625698089599609, "learning_rate": 5e-06, "loss": 0.7707, "num_input_tokens_seen": 90769724, "step": 1448 }, { "epoch": 4.818635607321132, "loss": 0.6896767020225525, "loss_ce": 0.0007118177600204945, "loss_iou": 0.240234375, "loss_num": 0.04150390625, "loss_xval": 0.6875, "num_input_tokens_seen": 90769724, "step": 1448 }, { "epoch": 4.821963394342762, "grad_norm": 11.702658653259277, "learning_rate": 5e-06, "loss": 0.3704, "num_input_tokens_seen": 90832580, "step": 1449 }, { "epoch": 4.821963394342762, "loss": 0.41007310152053833, "loss_ce": 0.0004051065188832581, "loss_iou": 0.1357421875, "loss_num": 0.027587890625, "loss_xval": 0.41015625, "num_input_tokens_seen": 90832580, "step": 1449 }, { "epoch": 4.825291181364393, "grad_norm": 25.472347259521484, "learning_rate": 5e-06, "loss": 0.614, "num_input_tokens_seen": 90893812, "step": 1450 }, { "epoch": 4.825291181364393, "loss": 0.4433688521385193, "loss_ce": 9.507555660093203e-06, "loss_iou": 0.1416015625, "loss_num": 0.031982421875, "loss_xval": 0.443359375, "num_input_tokens_seen": 90893812, "step": 1450 }, { "epoch": 4.8286189683860234, "grad_norm": 24.175207138061523, "learning_rate": 5e-06, "loss": 0.5945, "num_input_tokens_seen": 90956160, "step": 1451 }, { "epoch": 4.8286189683860234, "loss": 0.7115362286567688, "loss_ce": 0.0010869718389585614, "loss_iou": 0.2216796875, "loss_num": 0.05322265625, "loss_xval": 0.7109375, "num_input_tokens_seen": 90956160, "step": 1451 }, { "epoch": 4.831946755407654, "grad_norm": 22.367578506469727, "learning_rate": 5e-06, "loss": 0.684, "num_input_tokens_seen": 91018224, "step": 1452 }, { "epoch": 4.831946755407654, "loss": 0.5522526502609253, "loss_ce": 6.579542059625965e-06, "loss_iou": 0.2197265625, "loss_num": 0.0224609375, "loss_xval": 0.55078125, "num_input_tokens_seen": 91018224, "step": 1452 }, { "epoch": 4.835274542429285, "grad_norm": 16.159204483032227, "learning_rate": 5e-06, "loss": 0.618, "num_input_tokens_seen": 91079168, "step": 1453 }, { "epoch": 4.835274542429285, "loss": 0.8116936683654785, "loss_ce": 4.8155787226278335e-05, "loss_iou": 0.27734375, "loss_num": 0.051513671875, "loss_xval": 0.8125, "num_input_tokens_seen": 91079168, "step": 1453 }, { "epoch": 4.838602329450915, "grad_norm": 25.261545181274414, "learning_rate": 5e-06, "loss": 0.8063, "num_input_tokens_seen": 91142508, "step": 1454 }, { "epoch": 4.838602329450915, "loss": 0.7533633708953857, "loss_ce": 0.00018951859965454787, "loss_iou": 0.271484375, "loss_num": 0.04248046875, "loss_xval": 0.75390625, "num_input_tokens_seen": 91142508, "step": 1454 }, { "epoch": 4.841930116472546, "grad_norm": 12.562150001525879, "learning_rate": 5e-06, "loss": 0.5985, "num_input_tokens_seen": 91206160, "step": 1455 }, { "epoch": 4.841930116472546, "loss": 0.6163426637649536, "loss_ce": 9.607569154468365e-06, "loss_iou": 0.2255859375, "loss_num": 0.032958984375, "loss_xval": 0.6171875, "num_input_tokens_seen": 91206160, "step": 1455 }, { "epoch": 4.8452579034941765, "grad_norm": 12.291951179504395, "learning_rate": 5e-06, "loss": 0.8524, "num_input_tokens_seen": 91269612, "step": 1456 }, { "epoch": 4.8452579034941765, "loss": 0.866235613822937, "loss_ce": 0.0011233410332351923, "loss_iou": 0.330078125, "loss_num": 0.04150390625, "loss_xval": 0.86328125, "num_input_tokens_seen": 91269612, "step": 1456 }, { "epoch": 4.848585690515807, "grad_norm": 13.571959495544434, "learning_rate": 5e-06, "loss": 0.6948, "num_input_tokens_seen": 91333272, "step": 1457 }, { "epoch": 4.848585690515807, "loss": 0.6613231301307678, "loss_ce": 0.00019031127158086747, "loss_iou": 0.248046875, "loss_num": 0.033203125, "loss_xval": 0.66015625, "num_input_tokens_seen": 91333272, "step": 1457 }, { "epoch": 4.851913477537438, "grad_norm": 10.801651000976562, "learning_rate": 5e-06, "loss": 0.608, "num_input_tokens_seen": 91395984, "step": 1458 }, { "epoch": 4.851913477537438, "loss": 0.6873751878738403, "loss_ce": 0.0006076014251448214, "loss_iou": 0.2451171875, "loss_num": 0.03955078125, "loss_xval": 0.6875, "num_input_tokens_seen": 91395984, "step": 1458 }, { "epoch": 4.855241264559068, "grad_norm": 6.719429969787598, "learning_rate": 5e-06, "loss": 0.6546, "num_input_tokens_seen": 91458932, "step": 1459 }, { "epoch": 4.855241264559068, "loss": 0.5861361026763916, "loss_ce": 1.5493311366299167e-05, "loss_iou": 0.193359375, "loss_num": 0.0400390625, "loss_xval": 0.5859375, "num_input_tokens_seen": 91458932, "step": 1459 }, { "epoch": 4.858569051580699, "grad_norm": 5.881053447723389, "learning_rate": 5e-06, "loss": 0.6877, "num_input_tokens_seen": 91522072, "step": 1460 }, { "epoch": 4.858569051580699, "loss": 0.7815065383911133, "loss_ce": 1.2384867659420706e-05, "loss_iou": 0.275390625, "loss_num": 0.046142578125, "loss_xval": 0.78125, "num_input_tokens_seen": 91522072, "step": 1460 }, { "epoch": 4.86189683860233, "grad_norm": 25.46339988708496, "learning_rate": 5e-06, "loss": 0.7087, "num_input_tokens_seen": 91584964, "step": 1461 }, { "epoch": 4.86189683860233, "loss": 0.6519922614097595, "loss_ce": 1.4735630429640878e-05, "loss_iou": 0.2060546875, "loss_num": 0.048095703125, "loss_xval": 0.65234375, "num_input_tokens_seen": 91584964, "step": 1461 }, { "epoch": 4.86522462562396, "grad_norm": 24.335346221923828, "learning_rate": 5e-06, "loss": 0.6643, "num_input_tokens_seen": 91647520, "step": 1462 }, { "epoch": 4.86522462562396, "loss": 0.7870299220085144, "loss_ce": 0.0006529521197080612, "loss_iou": 0.26171875, "loss_num": 0.05224609375, "loss_xval": 0.78515625, "num_input_tokens_seen": 91647520, "step": 1462 }, { "epoch": 4.868552412645591, "grad_norm": 8.697700500488281, "learning_rate": 5e-06, "loss": 0.6499, "num_input_tokens_seen": 91708892, "step": 1463 }, { "epoch": 4.868552412645591, "loss": 0.5759354829788208, "loss_ce": 7.716497748333495e-06, "loss_iou": 0.21484375, "loss_num": 0.029052734375, "loss_xval": 0.57421875, "num_input_tokens_seen": 91708892, "step": 1463 }, { "epoch": 4.871880199667221, "grad_norm": 17.367408752441406, "learning_rate": 5e-06, "loss": 0.6339, "num_input_tokens_seen": 91771392, "step": 1464 }, { "epoch": 4.871880199667221, "loss": 0.47630244493484497, "loss_ce": 0.0009606416570022702, "loss_iou": 0.1650390625, "loss_num": 0.0291748046875, "loss_xval": 0.474609375, "num_input_tokens_seen": 91771392, "step": 1464 }, { "epoch": 4.875207986688852, "grad_norm": 22.917564392089844, "learning_rate": 5e-06, "loss": 0.7947, "num_input_tokens_seen": 91833316, "step": 1465 }, { "epoch": 4.875207986688852, "loss": 0.7659457325935364, "loss_ce": 0.0015414311783388257, "loss_iou": 0.263671875, "loss_num": 0.047119140625, "loss_xval": 0.765625, "num_input_tokens_seen": 91833316, "step": 1465 }, { "epoch": 4.878535773710483, "grad_norm": 14.553898811340332, "learning_rate": 5e-06, "loss": 0.7414, "num_input_tokens_seen": 91896000, "step": 1466 }, { "epoch": 4.878535773710483, "loss": 0.8736675381660461, "loss_ce": 0.0013530435971915722, "loss_iou": 0.30078125, "loss_num": 0.0537109375, "loss_xval": 0.87109375, "num_input_tokens_seen": 91896000, "step": 1466 }, { "epoch": 4.881863560732113, "grad_norm": 6.048940181732178, "learning_rate": 5e-06, "loss": 0.6011, "num_input_tokens_seen": 91958596, "step": 1467 }, { "epoch": 4.881863560732113, "loss": 0.6972119808197021, "loss_ce": 7.424458999594208e-06, "loss_iou": 0.220703125, "loss_num": 0.051513671875, "loss_xval": 0.6953125, "num_input_tokens_seen": 91958596, "step": 1467 }, { "epoch": 4.885191347753744, "grad_norm": 9.67534065246582, "learning_rate": 5e-06, "loss": 0.7041, "num_input_tokens_seen": 92022212, "step": 1468 }, { "epoch": 4.885191347753744, "loss": 0.7177750468254089, "loss_ce": 0.0007340267184190452, "loss_iou": 0.26953125, "loss_num": 0.03515625, "loss_xval": 0.71875, "num_input_tokens_seen": 92022212, "step": 1468 }, { "epoch": 4.8885191347753745, "grad_norm": 8.458477020263672, "learning_rate": 5e-06, "loss": 0.5584, "num_input_tokens_seen": 92083704, "step": 1469 }, { "epoch": 4.8885191347753745, "loss": 0.5981578230857849, "loss_ce": 0.00025747111067175865, "loss_iou": 0.154296875, "loss_num": 0.057861328125, "loss_xval": 0.59765625, "num_input_tokens_seen": 92083704, "step": 1469 }, { "epoch": 4.891846921797005, "grad_norm": 16.016294479370117, "learning_rate": 5e-06, "loss": 0.6156, "num_input_tokens_seen": 92146604, "step": 1470 }, { "epoch": 4.891846921797005, "loss": 0.6351546049118042, "loss_ce": 2.2759943021810614e-05, "loss_iou": 0.2109375, "loss_num": 0.04248046875, "loss_xval": 0.63671875, "num_input_tokens_seen": 92146604, "step": 1470 }, { "epoch": 4.895174708818636, "grad_norm": 15.973528861999512, "learning_rate": 5e-06, "loss": 0.6866, "num_input_tokens_seen": 92208856, "step": 1471 }, { "epoch": 4.895174708818636, "loss": 0.46397870779037476, "loss_ce": 0.0007829050882719457, "loss_iou": 0.1474609375, "loss_num": 0.03369140625, "loss_xval": 0.462890625, "num_input_tokens_seen": 92208856, "step": 1471 }, { "epoch": 4.898502495840266, "grad_norm": 66.14910888671875, "learning_rate": 5e-06, "loss": 0.6034, "num_input_tokens_seen": 92269632, "step": 1472 }, { "epoch": 4.898502495840266, "loss": 0.6951184272766113, "loss_ce": 0.00023322502966038883, "loss_iou": 0.248046875, "loss_num": 0.039794921875, "loss_xval": 0.6953125, "num_input_tokens_seen": 92269632, "step": 1472 }, { "epoch": 4.901830282861897, "grad_norm": 8.95643138885498, "learning_rate": 5e-06, "loss": 0.8607, "num_input_tokens_seen": 92331396, "step": 1473 }, { "epoch": 4.901830282861897, "loss": 0.874767541885376, "loss_ce": 1.1740381523850374e-05, "loss_iou": 0.244140625, "loss_num": 0.07763671875, "loss_xval": 0.875, "num_input_tokens_seen": 92331396, "step": 1473 }, { "epoch": 4.9051580698835275, "grad_norm": 8.05871295928955, "learning_rate": 5e-06, "loss": 0.7398, "num_input_tokens_seen": 92394372, "step": 1474 }, { "epoch": 4.9051580698835275, "loss": 0.6223906874656677, "loss_ce": 0.00044243200682103634, "loss_iou": 0.1806640625, "loss_num": 0.05224609375, "loss_xval": 0.62109375, "num_input_tokens_seen": 92394372, "step": 1474 }, { "epoch": 4.908485856905158, "grad_norm": 9.45444393157959, "learning_rate": 5e-06, "loss": 0.606, "num_input_tokens_seen": 92454668, "step": 1475 }, { "epoch": 4.908485856905158, "loss": 0.5225332975387573, "loss_ce": 0.0009268427966162562, "loss_iou": 0.15234375, "loss_num": 0.043701171875, "loss_xval": 0.5234375, "num_input_tokens_seen": 92454668, "step": 1475 }, { "epoch": 4.911813643926789, "grad_norm": 13.965225219726562, "learning_rate": 5e-06, "loss": 0.6407, "num_input_tokens_seen": 92516668, "step": 1476 }, { "epoch": 4.911813643926789, "loss": 0.7352874279022217, "loss_ce": 0.0004241612332407385, "loss_iou": 0.21875, "loss_num": 0.059326171875, "loss_xval": 0.734375, "num_input_tokens_seen": 92516668, "step": 1476 }, { "epoch": 4.915141430948419, "grad_norm": 31.405977249145508, "learning_rate": 5e-06, "loss": 0.6339, "num_input_tokens_seen": 92578740, "step": 1477 }, { "epoch": 4.915141430948419, "loss": 0.7649446725845337, "loss_ce": 5.21071269758977e-05, "loss_iou": 0.283203125, "loss_num": 0.03955078125, "loss_xval": 0.765625, "num_input_tokens_seen": 92578740, "step": 1477 }, { "epoch": 4.91846921797005, "grad_norm": 38.17366409301758, "learning_rate": 5e-06, "loss": 0.7025, "num_input_tokens_seen": 92641960, "step": 1478 }, { "epoch": 4.91846921797005, "loss": 0.5510313510894775, "loss_ce": 6.01200326855178e-06, "loss_iou": 0.2138671875, "loss_num": 0.0247802734375, "loss_xval": 0.55078125, "num_input_tokens_seen": 92641960, "step": 1478 }, { "epoch": 4.921797004991681, "grad_norm": 26.557897567749023, "learning_rate": 5e-06, "loss": 0.8459, "num_input_tokens_seen": 92705296, "step": 1479 }, { "epoch": 4.921797004991681, "loss": 0.9874820709228516, "loss_ce": 0.001154016237705946, "loss_iou": 0.36328125, "loss_num": 0.052001953125, "loss_xval": 0.984375, "num_input_tokens_seen": 92705296, "step": 1479 }, { "epoch": 4.925124792013311, "grad_norm": 16.19771385192871, "learning_rate": 5e-06, "loss": 0.6112, "num_input_tokens_seen": 92768672, "step": 1480 }, { "epoch": 4.925124792013311, "loss": 0.4784819781780243, "loss_ce": 0.000942906248383224, "loss_iou": 0.2041015625, "loss_num": 0.01397705078125, "loss_xval": 0.4765625, "num_input_tokens_seen": 92768672, "step": 1480 }, { "epoch": 4.928452579034942, "grad_norm": 14.849689483642578, "learning_rate": 5e-06, "loss": 0.5547, "num_input_tokens_seen": 92830956, "step": 1481 }, { "epoch": 4.928452579034942, "loss": 0.3901027739048004, "loss_ce": 0.0006984665524214506, "loss_iou": 0.1328125, "loss_num": 0.024658203125, "loss_xval": 0.388671875, "num_input_tokens_seen": 92830956, "step": 1481 }, { "epoch": 4.931780366056572, "grad_norm": 7.257058143615723, "learning_rate": 5e-06, "loss": 0.5095, "num_input_tokens_seen": 92893536, "step": 1482 }, { "epoch": 4.931780366056572, "loss": 0.5134562253952026, "loss_ce": 0.00012005659664282575, "loss_iou": 0.185546875, "loss_num": 0.0283203125, "loss_xval": 0.51171875, "num_input_tokens_seen": 92893536, "step": 1482 }, { "epoch": 4.935108153078203, "grad_norm": 20.446090698242188, "learning_rate": 5e-06, "loss": 0.779, "num_input_tokens_seen": 92957384, "step": 1483 }, { "epoch": 4.935108153078203, "loss": 0.5768802762031555, "loss_ce": 6.451336957979947e-06, "loss_iou": 0.1796875, "loss_num": 0.04345703125, "loss_xval": 0.578125, "num_input_tokens_seen": 92957384, "step": 1483 }, { "epoch": 4.938435940099834, "grad_norm": 11.192956924438477, "learning_rate": 5e-06, "loss": 0.6112, "num_input_tokens_seen": 93020708, "step": 1484 }, { "epoch": 4.938435940099834, "loss": 0.7152775526046753, "loss_ce": 0.0009221016080118716, "loss_iou": 0.2412109375, "loss_num": 0.04638671875, "loss_xval": 0.71484375, "num_input_tokens_seen": 93020708, "step": 1484 }, { "epoch": 4.941763727121464, "grad_norm": 5.90722131729126, "learning_rate": 5e-06, "loss": 0.5643, "num_input_tokens_seen": 93083832, "step": 1485 }, { "epoch": 4.941763727121464, "loss": 0.6696557998657227, "loss_ce": 0.0005884410347789526, "loss_iou": 0.2177734375, "loss_num": 0.046630859375, "loss_xval": 0.66796875, "num_input_tokens_seen": 93083832, "step": 1485 }, { "epoch": 4.945091514143095, "grad_norm": 10.14202880859375, "learning_rate": 5e-06, "loss": 0.6555, "num_input_tokens_seen": 93146696, "step": 1486 }, { "epoch": 4.945091514143095, "loss": 0.800066351890564, "loss_ce": 0.0007499469793401659, "loss_iou": 0.29296875, "loss_num": 0.04248046875, "loss_xval": 0.80078125, "num_input_tokens_seen": 93146696, "step": 1486 }, { "epoch": 4.9484193011647255, "grad_norm": 26.203956604003906, "learning_rate": 5e-06, "loss": 0.5737, "num_input_tokens_seen": 93209324, "step": 1487 }, { "epoch": 4.9484193011647255, "loss": 0.6305347681045532, "loss_ce": 0.001018121256493032, "loss_iou": 0.224609375, "loss_num": 0.035888671875, "loss_xval": 0.62890625, "num_input_tokens_seen": 93209324, "step": 1487 }, { "epoch": 4.951747088186356, "grad_norm": 23.09466552734375, "learning_rate": 5e-06, "loss": 0.6184, "num_input_tokens_seen": 93272160, "step": 1488 }, { "epoch": 4.951747088186356, "loss": 0.8817948698997498, "loss_ce": 0.0014237661380320787, "loss_iou": 0.3359375, "loss_num": 0.0419921875, "loss_xval": 0.87890625, "num_input_tokens_seen": 93272160, "step": 1488 }, { "epoch": 4.955074875207987, "grad_norm": 9.5159912109375, "learning_rate": 5e-06, "loss": 0.6904, "num_input_tokens_seen": 93334832, "step": 1489 }, { "epoch": 4.955074875207987, "loss": 0.6759295463562012, "loss_ce": 0.0018573238048702478, "loss_iou": 0.1943359375, "loss_num": 0.05712890625, "loss_xval": 0.67578125, "num_input_tokens_seen": 93334832, "step": 1489 }, { "epoch": 4.958402662229617, "grad_norm": 11.571125030517578, "learning_rate": 5e-06, "loss": 0.7879, "num_input_tokens_seen": 93397664, "step": 1490 }, { "epoch": 4.958402662229617, "loss": 0.8480530977249146, "loss_ce": 3.063214535359293e-05, "loss_iou": 0.28125, "loss_num": 0.057373046875, "loss_xval": 0.84765625, "num_input_tokens_seen": 93397664, "step": 1490 }, { "epoch": 4.961730449251248, "grad_norm": 19.706268310546875, "learning_rate": 5e-06, "loss": 0.6867, "num_input_tokens_seen": 93461596, "step": 1491 }, { "epoch": 4.961730449251248, "loss": 0.786535382270813, "loss_ce": 0.0010129549773409963, "loss_iou": 0.2255859375, "loss_num": 0.06689453125, "loss_xval": 0.78515625, "num_input_tokens_seen": 93461596, "step": 1491 }, { "epoch": 4.965058236272879, "grad_norm": 29.474609375, "learning_rate": 5e-06, "loss": 0.6213, "num_input_tokens_seen": 93525408, "step": 1492 }, { "epoch": 4.965058236272879, "loss": 0.4931163191795349, "loss_ce": 0.00019638639059849083, "loss_iou": 0.1728515625, "loss_num": 0.029296875, "loss_xval": 0.4921875, "num_input_tokens_seen": 93525408, "step": 1492 }, { "epoch": 4.968386023294509, "grad_norm": 18.11968421936035, "learning_rate": 5e-06, "loss": 0.7208, "num_input_tokens_seen": 93588728, "step": 1493 }, { "epoch": 4.968386023294509, "loss": 0.5526838302612305, "loss_ce": 7.154500053729862e-05, "loss_iou": 0.169921875, "loss_num": 0.04248046875, "loss_xval": 0.55078125, "num_input_tokens_seen": 93588728, "step": 1493 }, { "epoch": 4.97171381031614, "grad_norm": 23.50432777404785, "learning_rate": 5e-06, "loss": 0.6647, "num_input_tokens_seen": 93651272, "step": 1494 }, { "epoch": 4.97171381031614, "loss": 0.46906614303588867, "loss_ce": 1.0960973668261431e-05, "loss_iou": 0.1484375, "loss_num": 0.034423828125, "loss_xval": 0.46875, "num_input_tokens_seen": 93651272, "step": 1494 }, { "epoch": 4.97504159733777, "grad_norm": 9.207002639770508, "learning_rate": 5e-06, "loss": 0.5717, "num_input_tokens_seen": 93714276, "step": 1495 }, { "epoch": 4.97504159733777, "loss": 0.6608612537384033, "loss_ce": 0.00046087297960184515, "loss_iou": 0.2138671875, "loss_num": 0.046630859375, "loss_xval": 0.66015625, "num_input_tokens_seen": 93714276, "step": 1495 }, { "epoch": 4.978369384359401, "grad_norm": 38.251895904541016, "learning_rate": 5e-06, "loss": 0.6622, "num_input_tokens_seen": 93776808, "step": 1496 }, { "epoch": 4.978369384359401, "loss": 0.6945087909698486, "loss_ce": 0.0005390573642216623, "loss_iou": 0.201171875, "loss_num": 0.05810546875, "loss_xval": 0.6953125, "num_input_tokens_seen": 93776808, "step": 1496 }, { "epoch": 4.981697171381032, "grad_norm": 21.425600051879883, "learning_rate": 5e-06, "loss": 0.5562, "num_input_tokens_seen": 93838552, "step": 1497 }, { "epoch": 4.981697171381032, "loss": 0.5125634670257568, "loss_ce": 0.00011228623043280095, "loss_iou": 0.1728515625, "loss_num": 0.033447265625, "loss_xval": 0.51171875, "num_input_tokens_seen": 93838552, "step": 1497 }, { "epoch": 4.985024958402662, "grad_norm": 14.057490348815918, "learning_rate": 5e-06, "loss": 0.7785, "num_input_tokens_seen": 93901584, "step": 1498 }, { "epoch": 4.985024958402662, "loss": 0.8411925435066223, "loss_ce": 6.0324600781314075e-06, "loss_iou": 0.3046875, "loss_num": 0.04638671875, "loss_xval": 0.83984375, "num_input_tokens_seen": 93901584, "step": 1498 }, { "epoch": 4.988352745424293, "grad_norm": 9.682928085327148, "learning_rate": 5e-06, "loss": 0.7614, "num_input_tokens_seen": 93964140, "step": 1499 }, { "epoch": 4.988352745424293, "loss": 0.6496632099151611, "loss_ce": 4.991494733985746e-06, "loss_iou": 0.2080078125, "loss_num": 0.046630859375, "loss_xval": 0.6484375, "num_input_tokens_seen": 93964140, "step": 1499 }, { "epoch": 4.9916805324459235, "grad_norm": 14.523565292358398, "learning_rate": 5e-06, "loss": 0.5103, "num_input_tokens_seen": 94026304, "step": 1500 }, { "epoch": 4.9916805324459235, "eval_seeclick_CIoU": 0.04270816780626774, "eval_seeclick_GIoU": 0.05947853717952967, "eval_seeclick_IoU": 0.15783283114433289, "eval_seeclick_MAE_all": 0.17508918046951294, "eval_seeclick_MAE_h": 0.06801742874085903, "eval_seeclick_MAE_w": 0.13476183265447617, "eval_seeclick_MAE_x_boxes": 0.2204396203160286, "eval_seeclick_MAE_y_boxes": 0.17908421158790588, "eval_seeclick_NUM_probability": 0.9998188018798828, "eval_seeclick_inside_bbox": 0.25833334028720856, "eval_seeclick_loss": 2.8822147846221924, "eval_seeclick_loss_ce": 0.11620939522981644, "eval_seeclick_loss_iou": 0.941162109375, "eval_seeclick_loss_num": 0.17176055908203125, "eval_seeclick_loss_xval": 2.74169921875, "eval_seeclick_runtime": 62.1418, "eval_seeclick_samples_per_second": 0.756, "eval_seeclick_steps_per_second": 0.032, "num_input_tokens_seen": 94026304, "step": 1500 }, { "epoch": 4.9916805324459235, "eval_icons_CIoU": -0.08198189735412598, "eval_icons_GIoU": 0.004217052832245827, "eval_icons_IoU": 0.10965506732463837, "eval_icons_MAE_all": 0.19301337748765945, "eval_icons_MAE_h": 0.18926072120666504, "eval_icons_MAE_w": 0.19488335400819778, "eval_icons_MAE_x_boxes": 0.12957751378417015, "eval_icons_MAE_y_boxes": 0.09386412799358368, "eval_icons_NUM_probability": 0.9999632239341736, "eval_icons_inside_bbox": 0.2204861119389534, "eval_icons_loss": 2.8837552070617676, "eval_icons_loss_ce": 5.943001951891347e-06, "eval_icons_loss_iou": 0.989013671875, "eval_icons_loss_num": 0.18653488159179688, "eval_icons_loss_xval": 2.9111328125, "eval_icons_runtime": 70.1475, "eval_icons_samples_per_second": 0.713, "eval_icons_steps_per_second": 0.029, "num_input_tokens_seen": 94026304, "step": 1500 }, { "epoch": 4.9916805324459235, "eval_screenspot_CIoU": 0.10867175708214442, "eval_screenspot_GIoU": 0.12545882413784662, "eval_screenspot_IoU": 0.2380554179350535, "eval_screenspot_MAE_all": 0.15076185514529547, "eval_screenspot_MAE_h": 0.07169030234217644, "eval_screenspot_MAE_w": 0.16880851487318674, "eval_screenspot_MAE_x_boxes": 0.17548154294490814, "eval_screenspot_MAE_y_boxes": 0.11784802377223969, "eval_screenspot_NUM_probability": 0.9999725421269735, "eval_screenspot_inside_bbox": 0.4529166618982951, "eval_screenspot_loss": 2.5563974380493164, "eval_screenspot_loss_ce": 0.00036748944694409147, "eval_screenspot_loss_iou": 0.89794921875, "eval_screenspot_loss_num": 0.1639862060546875, "eval_screenspot_loss_xval": 2.6149088541666665, "eval_screenspot_runtime": 115.8287, "eval_screenspot_samples_per_second": 0.768, "eval_screenspot_steps_per_second": 0.026, "num_input_tokens_seen": 94026304, "step": 1500 }, { "epoch": 4.9916805324459235, "eval_compot_CIoU": -0.0008479300886392593, "eval_compot_GIoU": 0.0575521495193243, "eval_compot_IoU": 0.1603601798415184, "eval_compot_MAE_all": 0.20261351019144058, "eval_compot_MAE_h": 0.12544260174036026, "eval_compot_MAE_w": 0.2130431905388832, "eval_compot_MAE_x_boxes": 0.1687571033835411, "eval_compot_MAE_y_boxes": 0.14694544300436974, "eval_compot_NUM_probability": 0.9999663233757019, "eval_compot_inside_bbox": 0.3541666716337204, "eval_compot_loss": 2.897027015686035, "eval_compot_loss_ce": 0.0021384565625339746, "eval_compot_loss_iou": 0.943115234375, "eval_compot_loss_num": 0.2084503173828125, "eval_compot_loss_xval": 2.927734375, "eval_compot_runtime": 78.5096, "eval_compot_samples_per_second": 0.637, "eval_compot_steps_per_second": 0.025, "num_input_tokens_seen": 94026304, "step": 1500 }, { "epoch": 4.9916805324459235, "eval_custom_ui_MAE_all": 0.07522419467568398, "eval_custom_ui_MAE_x": 0.0817125029861927, "eval_custom_ui_MAE_y": 0.06873589009046555, "eval_custom_ui_NUM_probability": 0.99999138712883, "eval_custom_ui_loss": 0.35816749930381775, "eval_custom_ui_loss_ce": 3.272582830504689e-06, "eval_custom_ui_loss_num": 0.0727081298828125, "eval_custom_ui_loss_xval": 0.363494873046875, "eval_custom_ui_runtime": 50.9693, "eval_custom_ui_samples_per_second": 0.981, "eval_custom_ui_steps_per_second": 0.039, "num_input_tokens_seen": 94026304, "step": 1500 }, { "epoch": 4.9916805324459235, "loss": 0.36041486263275146, "loss_ce": 2.289598114657565e-06, "loss_iou": 0.0, "loss_num": 0.072265625, "loss_xval": 0.361328125, "num_input_tokens_seen": 94026304, "step": 1500 }, { "epoch": 4.995008319467554, "grad_norm": 9.621530532836914, "learning_rate": 5e-06, "loss": 0.5141, "num_input_tokens_seen": 94088064, "step": 1501 }, { "epoch": 4.995008319467554, "loss": 0.5835400819778442, "loss_ce": 0.00010507624392630532, "loss_iou": 0.1982421875, "loss_num": 0.03759765625, "loss_xval": 0.58203125, "num_input_tokens_seen": 94088064, "step": 1501 }, { "epoch": 4.998336106489185, "grad_norm": 10.087261199951172, "learning_rate": 5e-06, "loss": 0.6195, "num_input_tokens_seen": 94150448, "step": 1502 }, { "epoch": 4.998336106489185, "loss": 0.6320337057113647, "loss_ce": 1.4681490938528441e-05, "loss_iou": 0.2412109375, "loss_num": 0.030029296875, "loss_xval": 0.6328125, "num_input_tokens_seen": 94150448, "step": 1502 }, { "epoch": 4.998336106489185, "loss": 0.9262059926986694, "loss_ce": 0.00127924676053226, "loss_iou": 0.37109375, "loss_num": 0.03662109375, "loss_xval": 0.92578125, "num_input_tokens_seen": 94181836, "step": 1502 }, { "epoch": 5.001663893510815, "grad_norm": 17.964723587036133, "learning_rate": 5e-06, "loss": 0.7453, "num_input_tokens_seen": 94213536, "step": 1503 }, { "epoch": 5.001663893510815, "loss": 0.5644786953926086, "loss_ce": 0.000757978530600667, "loss_iou": 0.208984375, "loss_num": 0.029052734375, "loss_xval": 0.5625, "num_input_tokens_seen": 94213536, "step": 1503 }, { "epoch": 5.004991680532446, "grad_norm": 24.917987823486328, "learning_rate": 5e-06, "loss": 0.8342, "num_input_tokens_seen": 94277680, "step": 1504 }, { "epoch": 5.004991680532446, "loss": 0.7740134000778198, "loss_ce": 8.76397971296683e-05, "loss_iou": 0.28125, "loss_num": 0.042724609375, "loss_xval": 0.7734375, "num_input_tokens_seen": 94277680, "step": 1504 }, { "epoch": 5.0083194675540765, "grad_norm": 28.204835891723633, "learning_rate": 5e-06, "loss": 0.6461, "num_input_tokens_seen": 94341488, "step": 1505 }, { "epoch": 5.0083194675540765, "loss": 0.7013037204742432, "loss_ce": 0.00037597300251945853, "loss_iou": 0.26953125, "loss_num": 0.0322265625, "loss_xval": 0.69921875, "num_input_tokens_seen": 94341488, "step": 1505 }, { "epoch": 5.011647254575707, "grad_norm": 17.724748611450195, "learning_rate": 5e-06, "loss": 0.7494, "num_input_tokens_seen": 94402736, "step": 1506 }, { "epoch": 5.011647254575707, "loss": 0.6644572019577026, "loss_ce": 0.0002726172679103911, "loss_iou": 0.25, "loss_num": 0.032470703125, "loss_xval": 0.6640625, "num_input_tokens_seen": 94402736, "step": 1506 }, { "epoch": 5.014975041597338, "grad_norm": 8.471407890319824, "learning_rate": 5e-06, "loss": 0.5543, "num_input_tokens_seen": 94466864, "step": 1507 }, { "epoch": 5.014975041597338, "loss": 0.5229892730712891, "loss_ce": 0.0006198729388415813, "loss_iou": 0.2138671875, "loss_num": 0.0191650390625, "loss_xval": 0.5234375, "num_input_tokens_seen": 94466864, "step": 1507 }, { "epoch": 5.018302828618968, "grad_norm": 14.484640121459961, "learning_rate": 5e-06, "loss": 0.5906, "num_input_tokens_seen": 94527952, "step": 1508 }, { "epoch": 5.018302828618968, "loss": 0.7876707315444946, "loss_ce": 0.00031720142578706145, "loss_iou": 0.28515625, "loss_num": 0.0439453125, "loss_xval": 0.7890625, "num_input_tokens_seen": 94527952, "step": 1508 }, { "epoch": 5.021630615640599, "grad_norm": 10.31477165222168, "learning_rate": 5e-06, "loss": 0.681, "num_input_tokens_seen": 94589676, "step": 1509 }, { "epoch": 5.021630615640599, "loss": 0.5929126739501953, "loss_ce": 0.0005054158973507583, "loss_iou": 0.1884765625, "loss_num": 0.04296875, "loss_xval": 0.59375, "num_input_tokens_seen": 94589676, "step": 1509 }, { "epoch": 5.02495840266223, "grad_norm": 25.61829948425293, "learning_rate": 5e-06, "loss": 0.7227, "num_input_tokens_seen": 94653448, "step": 1510 }, { "epoch": 5.02495840266223, "loss": 0.7796362638473511, "loss_ce": 0.0011938437819480896, "loss_iou": 0.326171875, "loss_num": 0.0252685546875, "loss_xval": 0.77734375, "num_input_tokens_seen": 94653448, "step": 1510 }, { "epoch": 5.02828618968386, "grad_norm": 38.60111999511719, "learning_rate": 5e-06, "loss": 0.7575, "num_input_tokens_seen": 94716188, "step": 1511 }, { "epoch": 5.02828618968386, "loss": 0.5527058839797974, "loss_ce": 0.0002461562107782811, "loss_iou": 0.2060546875, "loss_num": 0.028076171875, "loss_xval": 0.55078125, "num_input_tokens_seen": 94716188, "step": 1511 }, { "epoch": 5.031613976705491, "grad_norm": 30.470426559448242, "learning_rate": 5e-06, "loss": 0.6559, "num_input_tokens_seen": 94779124, "step": 1512 }, { "epoch": 5.031613976705491, "loss": 0.561592698097229, "loss_ce": 8.199468538805377e-06, "loss_iou": 0.1767578125, "loss_num": 0.04150390625, "loss_xval": 0.5625, "num_input_tokens_seen": 94779124, "step": 1512 }, { "epoch": 5.034941763727121, "grad_norm": 32.25939178466797, "learning_rate": 5e-06, "loss": 0.6632, "num_input_tokens_seen": 94841752, "step": 1513 }, { "epoch": 5.034941763727121, "loss": 0.662388801574707, "loss_ce": 3.5258635762147605e-05, "loss_iou": 0.2236328125, "loss_num": 0.04296875, "loss_xval": 0.6640625, "num_input_tokens_seen": 94841752, "step": 1513 }, { "epoch": 5.038269550748752, "grad_norm": 20.437286376953125, "learning_rate": 5e-06, "loss": 0.555, "num_input_tokens_seen": 94903848, "step": 1514 }, { "epoch": 5.038269550748752, "loss": 0.5813217163085938, "loss_ce": 0.0011825321707874537, "loss_iou": 0.0986328125, "loss_num": 0.07666015625, "loss_xval": 0.58203125, "num_input_tokens_seen": 94903848, "step": 1514 }, { "epoch": 5.041597337770383, "grad_norm": 15.885831832885742, "learning_rate": 5e-06, "loss": 0.7297, "num_input_tokens_seen": 94966960, "step": 1515 }, { "epoch": 5.041597337770383, "loss": 0.7980432510375977, "loss_ce": 0.0002527273609302938, "loss_iou": 0.28515625, "loss_num": 0.045654296875, "loss_xval": 0.796875, "num_input_tokens_seen": 94966960, "step": 1515 }, { "epoch": 5.044925124792013, "grad_norm": 10.761627197265625, "learning_rate": 5e-06, "loss": 0.5483, "num_input_tokens_seen": 95030764, "step": 1516 }, { "epoch": 5.044925124792013, "loss": 0.608400821685791, "loss_ce": 0.000490694073960185, "loss_iou": 0.234375, "loss_num": 0.0279541015625, "loss_xval": 0.609375, "num_input_tokens_seen": 95030764, "step": 1516 }, { "epoch": 5.048252911813644, "grad_norm": 15.04028606414795, "learning_rate": 5e-06, "loss": 0.8497, "num_input_tokens_seen": 95092396, "step": 1517 }, { "epoch": 5.048252911813644, "loss": 0.7606963515281677, "loss_ce": 0.00019833659462165087, "loss_iou": 0.2578125, "loss_num": 0.048828125, "loss_xval": 0.76171875, "num_input_tokens_seen": 95092396, "step": 1517 }, { "epoch": 5.0515806988352745, "grad_norm": 8.182450294494629, "learning_rate": 5e-06, "loss": 0.74, "num_input_tokens_seen": 95155860, "step": 1518 }, { "epoch": 5.0515806988352745, "loss": 0.6113855242729187, "loss_ce": 0.0017663489561527967, "loss_iou": 0.21875, "loss_num": 0.03466796875, "loss_xval": 0.609375, "num_input_tokens_seen": 95155860, "step": 1518 }, { "epoch": 5.054908485856905, "grad_norm": 11.478755950927734, "learning_rate": 5e-06, "loss": 0.5542, "num_input_tokens_seen": 95218936, "step": 1519 }, { "epoch": 5.054908485856905, "loss": 0.7388406991958618, "loss_ce": 0.0003153325815219432, "loss_iou": 0.2734375, "loss_num": 0.03857421875, "loss_xval": 0.73828125, "num_input_tokens_seen": 95218936, "step": 1519 }, { "epoch": 5.058236272878536, "grad_norm": 10.155901908874512, "learning_rate": 5e-06, "loss": 0.6887, "num_input_tokens_seen": 95281936, "step": 1520 }, { "epoch": 5.058236272878536, "loss": 0.4614357650279999, "loss_ce": 9.986786608351395e-06, "loss_iou": 0.1806640625, "loss_num": 0.0198974609375, "loss_xval": 0.4609375, "num_input_tokens_seen": 95281936, "step": 1520 }, { "epoch": 5.061564059900166, "grad_norm": 37.45384979248047, "learning_rate": 5e-06, "loss": 0.8727, "num_input_tokens_seen": 95344100, "step": 1521 }, { "epoch": 5.061564059900166, "loss": 0.8230183124542236, "loss_ce": 0.00014231627574190497, "loss_iou": 0.296875, "loss_num": 0.045654296875, "loss_xval": 0.82421875, "num_input_tokens_seen": 95344100, "step": 1521 }, { "epoch": 5.064891846921797, "grad_norm": 27.166980743408203, "learning_rate": 5e-06, "loss": 0.5743, "num_input_tokens_seen": 95404464, "step": 1522 }, { "epoch": 5.064891846921797, "loss": 0.7703295946121216, "loss_ce": 0.0003100370813626796, "loss_iou": 0.275390625, "loss_num": 0.044189453125, "loss_xval": 0.76953125, "num_input_tokens_seen": 95404464, "step": 1522 }, { "epoch": 5.068219633943428, "grad_norm": 13.809371948242188, "learning_rate": 5e-06, "loss": 0.5321, "num_input_tokens_seen": 95466952, "step": 1523 }, { "epoch": 5.068219633943428, "loss": 0.5198078155517578, "loss_ce": 0.00015449420607183129, "loss_iou": 0.1533203125, "loss_num": 0.042724609375, "loss_xval": 0.51953125, "num_input_tokens_seen": 95466952, "step": 1523 }, { "epoch": 5.071547420965058, "grad_norm": 10.335877418518066, "learning_rate": 5e-06, "loss": 0.7238, "num_input_tokens_seen": 95529416, "step": 1524 }, { "epoch": 5.071547420965058, "loss": 0.7628429532051086, "loss_ce": 0.00039176808786578476, "loss_iou": 0.271484375, "loss_num": 0.0439453125, "loss_xval": 0.76171875, "num_input_tokens_seen": 95529416, "step": 1524 }, { "epoch": 5.074875207986689, "grad_norm": 14.917417526245117, "learning_rate": 5e-06, "loss": 0.8733, "num_input_tokens_seen": 95593868, "step": 1525 }, { "epoch": 5.074875207986689, "loss": 0.6986047029495239, "loss_ce": 0.0010949805146083236, "loss_iou": 0.28125, "loss_num": 0.0267333984375, "loss_xval": 0.69921875, "num_input_tokens_seen": 95593868, "step": 1525 }, { "epoch": 5.078202995008319, "grad_norm": 12.556556701660156, "learning_rate": 5e-06, "loss": 0.8313, "num_input_tokens_seen": 95655832, "step": 1526 }, { "epoch": 5.078202995008319, "loss": 0.672004222869873, "loss_ce": 7.1917634159035515e-06, "loss_iou": 0.259765625, "loss_num": 0.0301513671875, "loss_xval": 0.671875, "num_input_tokens_seen": 95655832, "step": 1526 }, { "epoch": 5.08153078202995, "grad_norm": 18.913822174072266, "learning_rate": 5e-06, "loss": 0.8958, "num_input_tokens_seen": 95720420, "step": 1527 }, { "epoch": 5.08153078202995, "loss": 0.9158020615577698, "loss_ce": 0.0006409242050722241, "loss_iou": 0.341796875, "loss_num": 0.046142578125, "loss_xval": 0.9140625, "num_input_tokens_seen": 95720420, "step": 1527 }, { "epoch": 5.084858569051581, "grad_norm": 55.04985046386719, "learning_rate": 5e-06, "loss": 0.7202, "num_input_tokens_seen": 95783088, "step": 1528 }, { "epoch": 5.084858569051581, "loss": 0.6768368482589722, "loss_ce": 0.0014828136190772057, "loss_iou": 0.2734375, "loss_num": 0.0260009765625, "loss_xval": 0.67578125, "num_input_tokens_seen": 95783088, "step": 1528 }, { "epoch": 5.088186356073211, "grad_norm": 26.374725341796875, "learning_rate": 5e-06, "loss": 0.8094, "num_input_tokens_seen": 95846684, "step": 1529 }, { "epoch": 5.088186356073211, "loss": 0.7539188861846924, "loss_ce": 1.2668057934206445e-05, "loss_iou": 0.314453125, "loss_num": 0.025146484375, "loss_xval": 0.75390625, "num_input_tokens_seen": 95846684, "step": 1529 }, { "epoch": 5.091514143094842, "grad_norm": 30.568553924560547, "learning_rate": 5e-06, "loss": 0.6748, "num_input_tokens_seen": 95908320, "step": 1530 }, { "epoch": 5.091514143094842, "loss": 0.6049854755401611, "loss_ce": 4.994103164790431e-06, "loss_iou": 0.2421875, "loss_num": 0.0242919921875, "loss_xval": 0.60546875, "num_input_tokens_seen": 95908320, "step": 1530 }, { "epoch": 5.0948419301164725, "grad_norm": 19.20185089111328, "learning_rate": 5e-06, "loss": 0.5684, "num_input_tokens_seen": 95969884, "step": 1531 }, { "epoch": 5.0948419301164725, "loss": 0.611803412437439, "loss_ce": 0.00035322128678672016, "loss_iou": 0.2158203125, "loss_num": 0.035888671875, "loss_xval": 0.61328125, "num_input_tokens_seen": 95969884, "step": 1531 }, { "epoch": 5.098169717138103, "grad_norm": 12.004545211791992, "learning_rate": 5e-06, "loss": 0.9871, "num_input_tokens_seen": 96033172, "step": 1532 }, { "epoch": 5.098169717138103, "loss": 1.2355549335479736, "loss_ce": 0.0014240844175219536, "loss_iou": 0.43359375, "loss_num": 0.07275390625, "loss_xval": 1.234375, "num_input_tokens_seen": 96033172, "step": 1532 }, { "epoch": 5.101497504159734, "grad_norm": 10.294998168945312, "learning_rate": 5e-06, "loss": 0.4265, "num_input_tokens_seen": 96095148, "step": 1533 }, { "epoch": 5.101497504159734, "loss": 0.5855773687362671, "loss_ce": 0.0002502031857147813, "loss_iou": 0.2119140625, "loss_num": 0.032470703125, "loss_xval": 0.5859375, "num_input_tokens_seen": 96095148, "step": 1533 }, { "epoch": 5.104825291181364, "grad_norm": 8.439178466796875, "learning_rate": 5e-06, "loss": 0.4613, "num_input_tokens_seen": 96156736, "step": 1534 }, { "epoch": 5.104825291181364, "loss": 0.546348512172699, "loss_ce": 0.0003890287480317056, "loss_iou": 0.21484375, "loss_num": 0.0234375, "loss_xval": 0.546875, "num_input_tokens_seen": 96156736, "step": 1534 }, { "epoch": 5.108153078202995, "grad_norm": 12.101310729980469, "learning_rate": 5e-06, "loss": 0.7153, "num_input_tokens_seen": 96220384, "step": 1535 }, { "epoch": 5.108153078202995, "loss": 0.9940405488014221, "loss_ce": 0.00038817088352516294, "loss_iou": 0.390625, "loss_num": 0.042236328125, "loss_xval": 0.9921875, "num_input_tokens_seen": 96220384, "step": 1535 }, { "epoch": 5.1114808652246255, "grad_norm": 9.335909843444824, "learning_rate": 5e-06, "loss": 0.4596, "num_input_tokens_seen": 96282636, "step": 1536 }, { "epoch": 5.1114808652246255, "loss": 0.4521195590496063, "loss_ce": 3.2176241802517325e-05, "loss_iou": 0.166015625, "loss_num": 0.024169921875, "loss_xval": 0.451171875, "num_input_tokens_seen": 96282636, "step": 1536 }, { "epoch": 5.114808652246256, "grad_norm": 13.422652244567871, "learning_rate": 5e-06, "loss": 0.6144, "num_input_tokens_seen": 96344824, "step": 1537 }, { "epoch": 5.114808652246256, "loss": 0.7167405486106873, "loss_ce": 0.00018783820269163698, "loss_iou": 0.2734375, "loss_num": 0.033935546875, "loss_xval": 0.71484375, "num_input_tokens_seen": 96344824, "step": 1537 }, { "epoch": 5.118136439267887, "grad_norm": 15.545272827148438, "learning_rate": 5e-06, "loss": 0.5423, "num_input_tokens_seen": 96406092, "step": 1538 }, { "epoch": 5.118136439267887, "loss": 0.5440765619277954, "loss_ce": 9.231320291291922e-06, "loss_iou": 0.201171875, "loss_num": 0.0284423828125, "loss_xval": 0.54296875, "num_input_tokens_seen": 96406092, "step": 1538 }, { "epoch": 5.121464226289517, "grad_norm": 28.56312370300293, "learning_rate": 5e-06, "loss": 0.7871, "num_input_tokens_seen": 96466292, "step": 1539 }, { "epoch": 5.121464226289517, "loss": 0.7364552617073059, "loss_ce": 5.0140715757152066e-06, "loss_iou": 0.2451171875, "loss_num": 0.04931640625, "loss_xval": 0.73828125, "num_input_tokens_seen": 96466292, "step": 1539 }, { "epoch": 5.124792013311148, "grad_norm": 12.332464218139648, "learning_rate": 5e-06, "loss": 0.5316, "num_input_tokens_seen": 96527928, "step": 1540 }, { "epoch": 5.124792013311148, "loss": 0.7799962759017944, "loss_ce": 8.907571464078501e-05, "loss_iou": 0.259765625, "loss_num": 0.052734375, "loss_xval": 0.78125, "num_input_tokens_seen": 96527928, "step": 1540 }, { "epoch": 5.128119800332779, "grad_norm": 8.962374687194824, "learning_rate": 5e-06, "loss": 0.6006, "num_input_tokens_seen": 96590408, "step": 1541 }, { "epoch": 5.128119800332779, "loss": 0.5486743450164795, "loss_ce": 0.000334536365699023, "loss_iou": 0.220703125, "loss_num": 0.0216064453125, "loss_xval": 0.546875, "num_input_tokens_seen": 96590408, "step": 1541 }, { "epoch": 5.131447587354409, "grad_norm": 15.073986053466797, "learning_rate": 5e-06, "loss": 0.7561, "num_input_tokens_seen": 96654032, "step": 1542 }, { "epoch": 5.131447587354409, "loss": 0.8482543230056763, "loss_ce": 0.0003539093304425478, "loss_iou": 0.3125, "loss_num": 0.044921875, "loss_xval": 0.84765625, "num_input_tokens_seen": 96654032, "step": 1542 }, { "epoch": 5.13477537437604, "grad_norm": 22.654132843017578, "learning_rate": 5e-06, "loss": 0.7081, "num_input_tokens_seen": 96714960, "step": 1543 }, { "epoch": 5.13477537437604, "loss": 0.7147778868675232, "loss_ce": 0.0019177908543497324, "loss_iou": 0.212890625, "loss_num": 0.05712890625, "loss_xval": 0.7109375, "num_input_tokens_seen": 96714960, "step": 1543 }, { "epoch": 5.13810316139767, "grad_norm": 22.77555274963379, "learning_rate": 5e-06, "loss": 0.8886, "num_input_tokens_seen": 96778136, "step": 1544 }, { "epoch": 5.13810316139767, "loss": 0.9632219672203064, "loss_ce": 0.0003313274646643549, "loss_iou": 0.37890625, "loss_num": 0.041259765625, "loss_xval": 0.9609375, "num_input_tokens_seen": 96778136, "step": 1544 }, { "epoch": 5.141430948419301, "grad_norm": 8.248432159423828, "learning_rate": 5e-06, "loss": 0.7021, "num_input_tokens_seen": 96841760, "step": 1545 }, { "epoch": 5.141430948419301, "loss": 0.9046546816825867, "loss_ce": 0.0003578167234081775, "loss_iou": 0.357421875, "loss_num": 0.0380859375, "loss_xval": 0.90625, "num_input_tokens_seen": 96841760, "step": 1545 }, { "epoch": 5.144758735440932, "grad_norm": 10.648483276367188, "learning_rate": 5e-06, "loss": 0.6296, "num_input_tokens_seen": 96904744, "step": 1546 }, { "epoch": 5.144758735440932, "loss": 0.4327312707901001, "loss_ce": 0.00011409384023863822, "loss_iou": 0.1533203125, "loss_num": 0.025390625, "loss_xval": 0.43359375, "num_input_tokens_seen": 96904744, "step": 1546 }, { "epoch": 5.148086522462562, "grad_norm": 15.166829109191895, "learning_rate": 5e-06, "loss": 0.6218, "num_input_tokens_seen": 96966696, "step": 1547 }, { "epoch": 5.148086522462562, "loss": 0.4544420838356018, "loss_ce": 4.8301358219760004e-06, "loss_iou": 0.10400390625, "loss_num": 0.04931640625, "loss_xval": 0.455078125, "num_input_tokens_seen": 96966696, "step": 1547 }, { "epoch": 5.151414309484193, "grad_norm": 26.0044002532959, "learning_rate": 5e-06, "loss": 1.0288, "num_input_tokens_seen": 97030960, "step": 1548 }, { "epoch": 5.151414309484193, "loss": 0.7978941202163696, "loss_ce": 0.001629421953111887, "loss_iou": 0.3046875, "loss_num": 0.037109375, "loss_xval": 0.796875, "num_input_tokens_seen": 97030960, "step": 1548 }, { "epoch": 5.1547420965058235, "grad_norm": 24.103193283081055, "learning_rate": 5e-06, "loss": 0.5186, "num_input_tokens_seen": 97091908, "step": 1549 }, { "epoch": 5.1547420965058235, "loss": 0.5128387212753296, "loss_ce": 2.1342875697882846e-05, "loss_iou": 0.1904296875, "loss_num": 0.0263671875, "loss_xval": 0.51171875, "num_input_tokens_seen": 97091908, "step": 1549 }, { "epoch": 5.158069883527454, "grad_norm": 25.78814125061035, "learning_rate": 5e-06, "loss": 0.6246, "num_input_tokens_seen": 97155292, "step": 1550 }, { "epoch": 5.158069883527454, "loss": 0.6603955030441284, "loss_ce": 0.0010327360359951854, "loss_iou": 0.2431640625, "loss_num": 0.03466796875, "loss_xval": 0.66015625, "num_input_tokens_seen": 97155292, "step": 1550 }, { "epoch": 5.161397670549085, "grad_norm": 18.84185028076172, "learning_rate": 5e-06, "loss": 0.6987, "num_input_tokens_seen": 97216868, "step": 1551 }, { "epoch": 5.161397670549085, "loss": 0.464704304933548, "loss_ce": 0.00022673951752949506, "loss_iou": 0.158203125, "loss_num": 0.02978515625, "loss_xval": 0.46484375, "num_input_tokens_seen": 97216868, "step": 1551 }, { "epoch": 5.164725457570715, "grad_norm": 12.514577865600586, "learning_rate": 5e-06, "loss": 0.6761, "num_input_tokens_seen": 97279212, "step": 1552 }, { "epoch": 5.164725457570715, "loss": 0.4405713677406311, "loss_ce": 0.00020271647372283041, "loss_iou": 0.1357421875, "loss_num": 0.033935546875, "loss_xval": 0.439453125, "num_input_tokens_seen": 97279212, "step": 1552 }, { "epoch": 5.168053244592346, "grad_norm": 16.056718826293945, "learning_rate": 5e-06, "loss": 0.7033, "num_input_tokens_seen": 97341400, "step": 1553 }, { "epoch": 5.168053244592346, "loss": 0.5781562328338623, "loss_ce": 3.121720510534942e-05, "loss_iou": 0.205078125, "loss_num": 0.033447265625, "loss_xval": 0.578125, "num_input_tokens_seen": 97341400, "step": 1553 }, { "epoch": 5.1713810316139766, "grad_norm": 11.489456176757812, "learning_rate": 5e-06, "loss": 0.5641, "num_input_tokens_seen": 97404004, "step": 1554 }, { "epoch": 5.1713810316139766, "loss": 0.6566271781921387, "loss_ce": 1.0952774573524948e-05, "loss_iou": 0.2451171875, "loss_num": 0.033203125, "loss_xval": 0.65625, "num_input_tokens_seen": 97404004, "step": 1554 }, { "epoch": 5.174708818635607, "grad_norm": 20.704303741455078, "learning_rate": 5e-06, "loss": 0.6354, "num_input_tokens_seen": 97467080, "step": 1555 }, { "epoch": 5.174708818635607, "loss": 0.5469754934310913, "loss_ce": 0.00022257471573539078, "loss_iou": 0.185546875, "loss_num": 0.03515625, "loss_xval": 0.546875, "num_input_tokens_seen": 97467080, "step": 1555 }, { "epoch": 5.178036605657238, "grad_norm": 10.664935111999512, "learning_rate": 5e-06, "loss": 0.5433, "num_input_tokens_seen": 97529676, "step": 1556 }, { "epoch": 5.178036605657238, "loss": 0.4614706039428711, "loss_ce": 1.4318410649138968e-05, "loss_iou": 0.1787109375, "loss_num": 0.020751953125, "loss_xval": 0.4609375, "num_input_tokens_seen": 97529676, "step": 1556 }, { "epoch": 5.181364392678868, "grad_norm": 56.418243408203125, "learning_rate": 5e-06, "loss": 0.6639, "num_input_tokens_seen": 97592376, "step": 1557 }, { "epoch": 5.181364392678868, "loss": 0.7802623510360718, "loss_ce": 0.0004161929537076503, "loss_iou": 0.283203125, "loss_num": 0.04296875, "loss_xval": 0.78125, "num_input_tokens_seen": 97592376, "step": 1557 }, { "epoch": 5.184692179700499, "grad_norm": 12.19267749786377, "learning_rate": 5e-06, "loss": 0.6051, "num_input_tokens_seen": 97653928, "step": 1558 }, { "epoch": 5.184692179700499, "loss": 0.35268425941467285, "loss_ce": 2.311077514605131e-05, "loss_iou": 0.06396484375, "loss_num": 0.044921875, "loss_xval": 0.353515625, "num_input_tokens_seen": 97653928, "step": 1558 }, { "epoch": 5.18801996672213, "grad_norm": 26.610260009765625, "learning_rate": 5e-06, "loss": 0.8163, "num_input_tokens_seen": 97718852, "step": 1559 }, { "epoch": 5.18801996672213, "loss": 0.8163248300552368, "loss_ce": 4.0587485273135826e-05, "loss_iou": 0.326171875, "loss_num": 0.032470703125, "loss_xval": 0.81640625, "num_input_tokens_seen": 97718852, "step": 1559 }, { "epoch": 5.19134775374376, "grad_norm": 10.392890930175781, "learning_rate": 5e-06, "loss": 0.6213, "num_input_tokens_seen": 97780956, "step": 1560 }, { "epoch": 5.19134775374376, "loss": 0.7370084524154663, "loss_ce": 0.0003751750919036567, "loss_iou": 0.25, "loss_num": 0.046875, "loss_xval": 0.73828125, "num_input_tokens_seen": 97780956, "step": 1560 }, { "epoch": 5.194675540765391, "grad_norm": 11.72368049621582, "learning_rate": 5e-06, "loss": 0.6657, "num_input_tokens_seen": 97843592, "step": 1561 }, { "epoch": 5.194675540765391, "loss": 0.6524834036827087, "loss_ce": 0.00013966135156806558, "loss_iou": 0.2255859375, "loss_num": 0.040283203125, "loss_xval": 0.65234375, "num_input_tokens_seen": 97843592, "step": 1561 }, { "epoch": 5.1980033277870215, "grad_norm": 24.580183029174805, "learning_rate": 5e-06, "loss": 0.7505, "num_input_tokens_seen": 97906788, "step": 1562 }, { "epoch": 5.1980033277870215, "loss": 0.8106405735015869, "loss_ce": 9.368562314193696e-05, "loss_iou": 0.3125, "loss_num": 0.037109375, "loss_xval": 0.8125, "num_input_tokens_seen": 97906788, "step": 1562 }, { "epoch": 5.201331114808652, "grad_norm": 28.45431137084961, "learning_rate": 5e-06, "loss": 0.8591, "num_input_tokens_seen": 97970892, "step": 1563 }, { "epoch": 5.201331114808652, "loss": 0.8156955242156982, "loss_ce": 0.0001437873870600015, "loss_iou": 0.3125, "loss_num": 0.038330078125, "loss_xval": 0.81640625, "num_input_tokens_seen": 97970892, "step": 1563 }, { "epoch": 5.204658901830283, "grad_norm": 20.867841720581055, "learning_rate": 5e-06, "loss": 0.7299, "num_input_tokens_seen": 98031988, "step": 1564 }, { "epoch": 5.204658901830283, "loss": 0.5688271522521973, "loss_ce": 0.0001015645029838197, "loss_iou": 0.1875, "loss_num": 0.03857421875, "loss_xval": 0.5703125, "num_input_tokens_seen": 98031988, "step": 1564 }, { "epoch": 5.207986688851913, "grad_norm": 17.65668296813965, "learning_rate": 5e-06, "loss": 0.7998, "num_input_tokens_seen": 98095004, "step": 1565 }, { "epoch": 5.207986688851913, "loss": 0.8811174035072327, "loss_ce": 1.3924514860264026e-05, "loss_iou": 0.328125, "loss_num": 0.044921875, "loss_xval": 0.8828125, "num_input_tokens_seen": 98095004, "step": 1565 }, { "epoch": 5.211314475873544, "grad_norm": 40.313148498535156, "learning_rate": 5e-06, "loss": 0.6859, "num_input_tokens_seen": 98158228, "step": 1566 }, { "epoch": 5.211314475873544, "loss": 0.6108708381652832, "loss_ce": 3.099464811384678e-05, "loss_iou": 0.2421875, "loss_num": 0.025146484375, "loss_xval": 0.609375, "num_input_tokens_seen": 98158228, "step": 1566 }, { "epoch": 5.2146422628951745, "grad_norm": 37.769710540771484, "learning_rate": 5e-06, "loss": 0.9572, "num_input_tokens_seen": 98222420, "step": 1567 }, { "epoch": 5.2146422628951745, "loss": 1.3382879495620728, "loss_ce": 0.00015320628881454468, "loss_iou": 0.462890625, "loss_num": 0.08251953125, "loss_xval": 1.3359375, "num_input_tokens_seen": 98222420, "step": 1567 }, { "epoch": 5.217970049916805, "grad_norm": 25.49356460571289, "learning_rate": 5e-06, "loss": 0.6006, "num_input_tokens_seen": 98285384, "step": 1568 }, { "epoch": 5.217970049916805, "loss": 0.6685374975204468, "loss_ce": 0.00044664012966677547, "loss_iou": 0.25, "loss_num": 0.033447265625, "loss_xval": 0.66796875, "num_input_tokens_seen": 98285384, "step": 1568 }, { "epoch": 5.221297836938436, "grad_norm": 56.28620529174805, "learning_rate": 5e-06, "loss": 0.5859, "num_input_tokens_seen": 98348264, "step": 1569 }, { "epoch": 5.221297836938436, "loss": 0.7834440469741821, "loss_ce": 0.0010953641030937433, "loss_iou": 0.28125, "loss_num": 0.0439453125, "loss_xval": 0.78125, "num_input_tokens_seen": 98348264, "step": 1569 }, { "epoch": 5.224625623960066, "grad_norm": 12.540011405944824, "learning_rate": 5e-06, "loss": 0.6585, "num_input_tokens_seen": 98412316, "step": 1570 }, { "epoch": 5.224625623960066, "loss": 0.6923198699951172, "loss_ce": 0.00070000602863729, "loss_iou": 0.263671875, "loss_num": 0.03271484375, "loss_xval": 0.69140625, "num_input_tokens_seen": 98412316, "step": 1570 }, { "epoch": 5.227953410981697, "grad_norm": 18.63296890258789, "learning_rate": 5e-06, "loss": 0.7227, "num_input_tokens_seen": 98473628, "step": 1571 }, { "epoch": 5.227953410981697, "loss": 0.5489916801452637, "loss_ce": 0.0004077033372595906, "loss_iou": 0.22265625, "loss_num": 0.0205078125, "loss_xval": 0.546875, "num_input_tokens_seen": 98473628, "step": 1571 }, { "epoch": 5.231281198003328, "grad_norm": 20.73672866821289, "learning_rate": 5e-06, "loss": 0.8009, "num_input_tokens_seen": 98537644, "step": 1572 }, { "epoch": 5.231281198003328, "loss": 0.7523382902145386, "loss_ce": 0.0011175735853612423, "loss_iou": 0.29296875, "loss_num": 0.032958984375, "loss_xval": 0.75, "num_input_tokens_seen": 98537644, "step": 1572 }, { "epoch": 5.234608985024958, "grad_norm": 12.437963485717773, "learning_rate": 5e-06, "loss": 0.4193, "num_input_tokens_seen": 98600652, "step": 1573 }, { "epoch": 5.234608985024958, "loss": 0.4980931878089905, "loss_ce": 4.6317738451762125e-05, "loss_iou": 0.203125, "loss_num": 0.0184326171875, "loss_xval": 0.498046875, "num_input_tokens_seen": 98600652, "step": 1573 }, { "epoch": 5.237936772046589, "grad_norm": 15.847247123718262, "learning_rate": 5e-06, "loss": 0.7185, "num_input_tokens_seen": 98662588, "step": 1574 }, { "epoch": 5.237936772046589, "loss": 0.640664279460907, "loss_ce": 3.931096216547303e-05, "loss_iou": 0.23046875, "loss_num": 0.035888671875, "loss_xval": 0.640625, "num_input_tokens_seen": 98662588, "step": 1574 }, { "epoch": 5.241264559068219, "grad_norm": 10.624329566955566, "learning_rate": 5e-06, "loss": 0.5985, "num_input_tokens_seen": 98724744, "step": 1575 }, { "epoch": 5.241264559068219, "loss": 0.5354199409484863, "loss_ce": 1.9564831745810807e-05, "loss_iou": 0.1943359375, "loss_num": 0.029296875, "loss_xval": 0.53515625, "num_input_tokens_seen": 98724744, "step": 1575 }, { "epoch": 5.24459234608985, "grad_norm": 7.233494758605957, "learning_rate": 5e-06, "loss": 0.608, "num_input_tokens_seen": 98787868, "step": 1576 }, { "epoch": 5.24459234608985, "loss": 0.5527728796005249, "loss_ce": 3.850481516565196e-05, "loss_iou": 0.2060546875, "loss_num": 0.028076171875, "loss_xval": 0.5546875, "num_input_tokens_seen": 98787868, "step": 1576 }, { "epoch": 5.247920133111481, "grad_norm": 14.444751739501953, "learning_rate": 5e-06, "loss": 0.6454, "num_input_tokens_seen": 98849748, "step": 1577 }, { "epoch": 5.247920133111481, "loss": 0.7664744853973389, "loss_ce": 0.00011705853830790147, "loss_iou": 0.25390625, "loss_num": 0.0517578125, "loss_xval": 0.765625, "num_input_tokens_seen": 98849748, "step": 1577 }, { "epoch": 5.251247920133111, "grad_norm": 47.16133117675781, "learning_rate": 5e-06, "loss": 0.6949, "num_input_tokens_seen": 98913688, "step": 1578 }, { "epoch": 5.251247920133111, "loss": 0.48933345079421997, "loss_ce": 0.0005639244918711483, "loss_iou": 0.1767578125, "loss_num": 0.027099609375, "loss_xval": 0.48828125, "num_input_tokens_seen": 98913688, "step": 1578 }, { "epoch": 5.254575707154742, "grad_norm": 8.695144653320312, "learning_rate": 5e-06, "loss": 0.6434, "num_input_tokens_seen": 98975412, "step": 1579 }, { "epoch": 5.254575707154742, "loss": 0.674487829208374, "loss_ce": 0.00041555988718755543, "loss_iou": 0.2470703125, "loss_num": 0.035888671875, "loss_xval": 0.67578125, "num_input_tokens_seen": 98975412, "step": 1579 }, { "epoch": 5.2579034941763725, "grad_norm": 16.159177780151367, "learning_rate": 5e-06, "loss": 0.4007, "num_input_tokens_seen": 99036360, "step": 1580 }, { "epoch": 5.2579034941763725, "loss": 0.35921430587768555, "loss_ce": 2.2437618099502288e-05, "loss_iou": 0.1298828125, "loss_num": 0.019775390625, "loss_xval": 0.359375, "num_input_tokens_seen": 99036360, "step": 1580 }, { "epoch": 5.261231281198003, "grad_norm": 23.034101486206055, "learning_rate": 5e-06, "loss": 0.6136, "num_input_tokens_seen": 99099260, "step": 1581 }, { "epoch": 5.261231281198003, "loss": 0.5759777426719666, "loss_ce": 0.0002941254642792046, "loss_iou": 0.193359375, "loss_num": 0.0380859375, "loss_xval": 0.57421875, "num_input_tokens_seen": 99099260, "step": 1581 }, { "epoch": 5.264559068219634, "grad_norm": 6.690896987915039, "learning_rate": 5e-06, "loss": 0.4405, "num_input_tokens_seen": 99162144, "step": 1582 }, { "epoch": 5.264559068219634, "loss": 0.33105891942977905, "loss_ce": 4.204376182315173e-06, "loss_iou": 0.09619140625, "loss_num": 0.0277099609375, "loss_xval": 0.33203125, "num_input_tokens_seen": 99162144, "step": 1582 }, { "epoch": 5.267886855241264, "grad_norm": 15.207876205444336, "learning_rate": 5e-06, "loss": 0.962, "num_input_tokens_seen": 99225572, "step": 1583 }, { "epoch": 5.267886855241264, "loss": 0.906517505645752, "loss_ce": 2.33937862503808e-05, "loss_iou": 0.3359375, "loss_num": 0.046875, "loss_xval": 0.90625, "num_input_tokens_seen": 99225572, "step": 1583 }, { "epoch": 5.271214642262895, "grad_norm": 18.985828399658203, "learning_rate": 5e-06, "loss": 0.6445, "num_input_tokens_seen": 99287628, "step": 1584 }, { "epoch": 5.271214642262895, "loss": 0.6738436222076416, "loss_ce": 0.0002596047124825418, "loss_iou": 0.228515625, "loss_num": 0.043212890625, "loss_xval": 0.671875, "num_input_tokens_seen": 99287628, "step": 1584 }, { "epoch": 5.2745424292845255, "grad_norm": 37.433284759521484, "learning_rate": 5e-06, "loss": 0.7042, "num_input_tokens_seen": 99350756, "step": 1585 }, { "epoch": 5.2745424292845255, "loss": 0.7724255323410034, "loss_ce": 2.5673223717603832e-05, "loss_iou": 0.31640625, "loss_num": 0.027587890625, "loss_xval": 0.7734375, "num_input_tokens_seen": 99350756, "step": 1585 }, { "epoch": 5.277870216306156, "grad_norm": 31.332178115844727, "learning_rate": 5e-06, "loss": 0.6629, "num_input_tokens_seen": 99414312, "step": 1586 }, { "epoch": 5.277870216306156, "loss": 0.7176036834716797, "loss_ce": 7.437036401825026e-05, "loss_iou": 0.291015625, "loss_num": 0.0272216796875, "loss_xval": 0.71875, "num_input_tokens_seen": 99414312, "step": 1586 }, { "epoch": 5.281198003327787, "grad_norm": 25.67292594909668, "learning_rate": 5e-06, "loss": 0.5839, "num_input_tokens_seen": 99476892, "step": 1587 }, { "epoch": 5.281198003327787, "loss": 0.5565001964569092, "loss_ce": 0.0005309820408001542, "loss_iou": 0.1953125, "loss_num": 0.033203125, "loss_xval": 0.5546875, "num_input_tokens_seen": 99476892, "step": 1587 }, { "epoch": 5.284525790349417, "grad_norm": 11.792510986328125, "learning_rate": 5e-06, "loss": 0.5957, "num_input_tokens_seen": 99537236, "step": 1588 }, { "epoch": 5.284525790349417, "loss": 0.5811848640441895, "loss_ce": 8.147982953232713e-06, "loss_iou": 0.2080078125, "loss_num": 0.032958984375, "loss_xval": 0.58203125, "num_input_tokens_seen": 99537236, "step": 1588 }, { "epoch": 5.287853577371048, "grad_norm": 8.043577194213867, "learning_rate": 5e-06, "loss": 0.4007, "num_input_tokens_seen": 99598516, "step": 1589 }, { "epoch": 5.287853577371048, "loss": 0.5172300934791565, "loss_ce": 0.0006285394774749875, "loss_iou": 0.1591796875, "loss_num": 0.03955078125, "loss_xval": 0.515625, "num_input_tokens_seen": 99598516, "step": 1589 }, { "epoch": 5.291181364392679, "grad_norm": 8.817167282104492, "learning_rate": 5e-06, "loss": 0.7817, "num_input_tokens_seen": 99661384, "step": 1590 }, { "epoch": 5.291181364392679, "loss": 0.8474394679069519, "loss_ce": 0.0003020001749973744, "loss_iou": 0.33984375, "loss_num": 0.033447265625, "loss_xval": 0.84765625, "num_input_tokens_seen": 99661384, "step": 1590 }, { "epoch": 5.294509151414309, "grad_norm": 8.858013153076172, "learning_rate": 5e-06, "loss": 0.6231, "num_input_tokens_seen": 99723804, "step": 1591 }, { "epoch": 5.294509151414309, "loss": 0.6893442869186401, "loss_ce": 0.0008677446166984737, "loss_iou": 0.2421875, "loss_num": 0.041015625, "loss_xval": 0.6875, "num_input_tokens_seen": 99723804, "step": 1591 }, { "epoch": 5.29783693843594, "grad_norm": 14.866471290588379, "learning_rate": 5e-06, "loss": 0.5656, "num_input_tokens_seen": 99786560, "step": 1592 }, { "epoch": 5.29783693843594, "loss": 0.7030919194221497, "loss_ce": 0.001187638845294714, "loss_iou": 0.220703125, "loss_num": 0.05224609375, "loss_xval": 0.703125, "num_input_tokens_seen": 99786560, "step": 1592 }, { "epoch": 5.3011647254575704, "grad_norm": 30.753992080688477, "learning_rate": 5e-06, "loss": 0.5998, "num_input_tokens_seen": 99848612, "step": 1593 }, { "epoch": 5.3011647254575704, "loss": 0.5466430187225342, "loss_ce": 1.2159855032223277e-05, "loss_iou": 0.1806640625, "loss_num": 0.037109375, "loss_xval": 0.546875, "num_input_tokens_seen": 99848612, "step": 1593 }, { "epoch": 5.304492512479201, "grad_norm": 21.648799896240234, "learning_rate": 5e-06, "loss": 0.4731, "num_input_tokens_seen": 99911172, "step": 1594 }, { "epoch": 5.304492512479201, "loss": 0.49872225522994995, "loss_ce": 3.981073859904427e-06, "loss_iou": 0.166015625, "loss_num": 0.033203125, "loss_xval": 0.498046875, "num_input_tokens_seen": 99911172, "step": 1594 }, { "epoch": 5.307820299500832, "grad_norm": 10.105705261230469, "learning_rate": 5e-06, "loss": 0.6273, "num_input_tokens_seen": 99974132, "step": 1595 }, { "epoch": 5.307820299500832, "loss": 0.756218433380127, "loss_ce": 0.00023700158635620028, "loss_iou": 0.29296875, "loss_num": 0.034423828125, "loss_xval": 0.7578125, "num_input_tokens_seen": 99974132, "step": 1595 }, { "epoch": 5.311148086522462, "grad_norm": 9.880415916442871, "learning_rate": 5e-06, "loss": 0.5729, "num_input_tokens_seen": 100037056, "step": 1596 }, { "epoch": 5.311148086522462, "loss": 0.7500790357589722, "loss_ce": 0.0004452507710084319, "loss_iou": 0.267578125, "loss_num": 0.04296875, "loss_xval": 0.75, "num_input_tokens_seen": 100037056, "step": 1596 }, { "epoch": 5.314475873544093, "grad_norm": 11.304596900939941, "learning_rate": 5e-06, "loss": 0.6915, "num_input_tokens_seen": 100099736, "step": 1597 }, { "epoch": 5.314475873544093, "loss": 0.6385194063186646, "loss_ce": 0.0005799396312795579, "loss_iou": 0.2255859375, "loss_num": 0.037353515625, "loss_xval": 0.63671875, "num_input_tokens_seen": 100099736, "step": 1597 }, { "epoch": 5.3178036605657235, "grad_norm": 15.554214477539062, "learning_rate": 5e-06, "loss": 0.4008, "num_input_tokens_seen": 100162876, "step": 1598 }, { "epoch": 5.3178036605657235, "loss": 0.4724809527397156, "loss_ce": 7.81012568040751e-06, "loss_iou": 0.1640625, "loss_num": 0.029052734375, "loss_xval": 0.47265625, "num_input_tokens_seen": 100162876, "step": 1598 }, { "epoch": 5.321131447587354, "grad_norm": 15.841327667236328, "learning_rate": 5e-06, "loss": 0.4647, "num_input_tokens_seen": 100223888, "step": 1599 }, { "epoch": 5.321131447587354, "loss": 0.5920575857162476, "loss_ce": 1.6615902495686896e-05, "loss_iou": 0.220703125, "loss_num": 0.0301513671875, "loss_xval": 0.59375, "num_input_tokens_seen": 100223888, "step": 1599 }, { "epoch": 5.324459234608985, "grad_norm": 14.503348350524902, "learning_rate": 5e-06, "loss": 0.5838, "num_input_tokens_seen": 100287936, "step": 1600 }, { "epoch": 5.324459234608985, "loss": 0.7094786167144775, "loss_ce": 5.972106009721756e-06, "loss_iou": 0.279296875, "loss_num": 0.0301513671875, "loss_xval": 0.7109375, "num_input_tokens_seen": 100287936, "step": 1600 }, { "epoch": 5.327787021630615, "grad_norm": 16.300003051757812, "learning_rate": 5e-06, "loss": 0.8038, "num_input_tokens_seen": 100350724, "step": 1601 }, { "epoch": 5.327787021630615, "loss": 0.7446346282958984, "loss_ce": 5.705600869987393e-06, "loss_iou": 0.263671875, "loss_num": 0.04345703125, "loss_xval": 0.74609375, "num_input_tokens_seen": 100350724, "step": 1601 }, { "epoch": 5.331114808652246, "grad_norm": 9.915740966796875, "learning_rate": 5e-06, "loss": 0.5478, "num_input_tokens_seen": 100413628, "step": 1602 }, { "epoch": 5.331114808652246, "loss": 0.6431159377098083, "loss_ce": 0.0007209106115624309, "loss_iou": 0.2314453125, "loss_num": 0.0361328125, "loss_xval": 0.640625, "num_input_tokens_seen": 100413628, "step": 1602 }, { "epoch": 5.334442595673877, "grad_norm": 11.628096580505371, "learning_rate": 5e-06, "loss": 0.7347, "num_input_tokens_seen": 100477348, "step": 1603 }, { "epoch": 5.334442595673877, "loss": 0.8101913928985596, "loss_ce": 1.0693570402509067e-05, "loss_iou": 0.267578125, "loss_num": 0.05517578125, "loss_xval": 0.80859375, "num_input_tokens_seen": 100477348, "step": 1603 }, { "epoch": 5.337770382695507, "grad_norm": 13.016229629516602, "learning_rate": 5e-06, "loss": 0.7153, "num_input_tokens_seen": 100540256, "step": 1604 }, { "epoch": 5.337770382695507, "loss": 0.8160589933395386, "loss_ce": 0.00014098671090323478, "loss_iou": 0.306640625, "loss_num": 0.040771484375, "loss_xval": 0.81640625, "num_input_tokens_seen": 100540256, "step": 1604 }, { "epoch": 5.341098169717138, "grad_norm": 12.405158042907715, "learning_rate": 5e-06, "loss": 0.7691, "num_input_tokens_seen": 100602364, "step": 1605 }, { "epoch": 5.341098169717138, "loss": 0.7265721559524536, "loss_ce": 9.668710845289752e-06, "loss_iou": 0.2158203125, "loss_num": 0.05908203125, "loss_xval": 0.7265625, "num_input_tokens_seen": 100602364, "step": 1605 }, { "epoch": 5.344425956738768, "grad_norm": 8.555596351623535, "learning_rate": 5e-06, "loss": 0.5018, "num_input_tokens_seen": 100666176, "step": 1606 }, { "epoch": 5.344425956738768, "loss": 0.5672014951705933, "loss_ce": 0.000917275610845536, "loss_iou": 0.2412109375, "loss_num": 0.0169677734375, "loss_xval": 0.56640625, "num_input_tokens_seen": 100666176, "step": 1606 }, { "epoch": 5.347753743760399, "grad_norm": 9.452916145324707, "learning_rate": 5e-06, "loss": 0.4304, "num_input_tokens_seen": 100728252, "step": 1607 }, { "epoch": 5.347753743760399, "loss": 0.45034635066986084, "loss_ce": 2.8950918931514025e-05, "loss_iou": 0.125, "loss_num": 0.0400390625, "loss_xval": 0.451171875, "num_input_tokens_seen": 100728252, "step": 1607 }, { "epoch": 5.35108153078203, "grad_norm": 15.707732200622559, "learning_rate": 5e-06, "loss": 0.7111, "num_input_tokens_seen": 100792808, "step": 1608 }, { "epoch": 5.35108153078203, "loss": 0.6466152667999268, "loss_ce": 8.794582754489966e-06, "loss_iou": 0.232421875, "loss_num": 0.03662109375, "loss_xval": 0.6484375, "num_input_tokens_seen": 100792808, "step": 1608 }, { "epoch": 5.35440931780366, "grad_norm": 7.950328350067139, "learning_rate": 5e-06, "loss": 0.703, "num_input_tokens_seen": 100854960, "step": 1609 }, { "epoch": 5.35440931780366, "loss": 0.4984325170516968, "loss_ce": 4.995334893465042e-05, "loss_iou": 0.134765625, "loss_num": 0.0458984375, "loss_xval": 0.498046875, "num_input_tokens_seen": 100854960, "step": 1609 }, { "epoch": 5.357737104825291, "grad_norm": 28.904659271240234, "learning_rate": 5e-06, "loss": 0.4373, "num_input_tokens_seen": 100917808, "step": 1610 }, { "epoch": 5.357737104825291, "loss": 0.3865182399749756, "loss_ce": 4.365260974736884e-05, "loss_iou": 0.154296875, "loss_num": 0.01556396484375, "loss_xval": 0.38671875, "num_input_tokens_seen": 100917808, "step": 1610 }, { "epoch": 5.3610648918469215, "grad_norm": 27.833314895629883, "learning_rate": 5e-06, "loss": 0.7168, "num_input_tokens_seen": 100980612, "step": 1611 }, { "epoch": 5.3610648918469215, "loss": 0.7097264528274536, "loss_ce": 9.63068669079803e-06, "loss_iou": 0.2734375, "loss_num": 0.0322265625, "loss_xval": 0.7109375, "num_input_tokens_seen": 100980612, "step": 1611 }, { "epoch": 5.364392678868552, "grad_norm": 21.03418731689453, "learning_rate": 5e-06, "loss": 0.392, "num_input_tokens_seen": 101043280, "step": 1612 }, { "epoch": 5.364392678868552, "loss": 0.437528520822525, "loss_ce": 0.001249218126758933, "loss_iou": 0.1669921875, "loss_num": 0.0203857421875, "loss_xval": 0.435546875, "num_input_tokens_seen": 101043280, "step": 1612 }, { "epoch": 5.367720465890183, "grad_norm": 22.735187530517578, "learning_rate": 5e-06, "loss": 0.6771, "num_input_tokens_seen": 101106612, "step": 1613 }, { "epoch": 5.367720465890183, "loss": 0.597554087638855, "loss_ce": 0.0002640723541844636, "loss_iou": 0.2392578125, "loss_num": 0.0238037109375, "loss_xval": 0.59765625, "num_input_tokens_seen": 101106612, "step": 1613 }, { "epoch": 5.371048252911813, "grad_norm": 25.894041061401367, "learning_rate": 5e-06, "loss": 0.6754, "num_input_tokens_seen": 101171044, "step": 1614 }, { "epoch": 5.371048252911813, "loss": 0.8543329834938049, "loss_ce": 0.0027704958338290453, "loss_iou": 0.33984375, "loss_num": 0.034423828125, "loss_xval": 0.8515625, "num_input_tokens_seen": 101171044, "step": 1614 }, { "epoch": 5.374376039933444, "grad_norm": 10.28608226776123, "learning_rate": 5e-06, "loss": 0.494, "num_input_tokens_seen": 101232712, "step": 1615 }, { "epoch": 5.374376039933444, "loss": 0.38373783230781555, "loss_ce": 9.81677385425428e-06, "loss_iou": 0.11181640625, "loss_num": 0.031982421875, "loss_xval": 0.3828125, "num_input_tokens_seen": 101232712, "step": 1615 }, { "epoch": 5.3777038269550745, "grad_norm": 8.205290794372559, "learning_rate": 5e-06, "loss": 0.7475, "num_input_tokens_seen": 101296196, "step": 1616 }, { "epoch": 5.3777038269550745, "loss": 0.8132391571998596, "loss_ce": 6.710686648148112e-06, "loss_iou": 0.287109375, "loss_num": 0.047607421875, "loss_xval": 0.8125, "num_input_tokens_seen": 101296196, "step": 1616 }, { "epoch": 5.381031613976705, "grad_norm": 27.368783950805664, "learning_rate": 5e-06, "loss": 0.8961, "num_input_tokens_seen": 101358188, "step": 1617 }, { "epoch": 5.381031613976705, "loss": 0.590094268321991, "loss_ce": 6.354800916597014e-06, "loss_iou": 0.224609375, "loss_num": 0.028076171875, "loss_xval": 0.58984375, "num_input_tokens_seen": 101358188, "step": 1617 }, { "epoch": 5.384359400998336, "grad_norm": 19.155134201049805, "learning_rate": 5e-06, "loss": 0.499, "num_input_tokens_seen": 101419724, "step": 1618 }, { "epoch": 5.384359400998336, "loss": 0.47583693265914917, "loss_ce": 6.851345005998155e-06, "loss_iou": 0.150390625, "loss_num": 0.034912109375, "loss_xval": 0.4765625, "num_input_tokens_seen": 101419724, "step": 1618 }, { "epoch": 5.387687188019966, "grad_norm": 10.29180908203125, "learning_rate": 5e-06, "loss": 0.6789, "num_input_tokens_seen": 101483016, "step": 1619 }, { "epoch": 5.387687188019966, "loss": 0.8924827575683594, "loss_ce": 0.0011253345292061567, "loss_iou": 0.318359375, "loss_num": 0.05078125, "loss_xval": 0.890625, "num_input_tokens_seen": 101483016, "step": 1619 }, { "epoch": 5.391014975041597, "grad_norm": 9.877927780151367, "learning_rate": 5e-06, "loss": 0.6422, "num_input_tokens_seen": 101545020, "step": 1620 }, { "epoch": 5.391014975041597, "loss": 0.4606378972530365, "loss_ce": 6.662701343884692e-05, "loss_iou": 0.1396484375, "loss_num": 0.036376953125, "loss_xval": 0.4609375, "num_input_tokens_seen": 101545020, "step": 1620 }, { "epoch": 5.394342762063228, "grad_norm": 6.684299945831299, "learning_rate": 5e-06, "loss": 0.6279, "num_input_tokens_seen": 101606944, "step": 1621 }, { "epoch": 5.394342762063228, "loss": 0.5572009086608887, "loss_ce": 7.198992534540594e-05, "loss_iou": 0.1982421875, "loss_num": 0.031982421875, "loss_xval": 0.55859375, "num_input_tokens_seen": 101606944, "step": 1621 }, { "epoch": 5.397670549084858, "grad_norm": 9.343018531799316, "learning_rate": 5e-06, "loss": 0.485, "num_input_tokens_seen": 101670488, "step": 1622 }, { "epoch": 5.397670549084858, "loss": 0.359870970249176, "loss_ce": 7.678188922000118e-06, "loss_iou": 0.1201171875, "loss_num": 0.0240478515625, "loss_xval": 0.359375, "num_input_tokens_seen": 101670488, "step": 1622 }, { "epoch": 5.400998336106489, "grad_norm": 11.42383098602295, "learning_rate": 5e-06, "loss": 0.4616, "num_input_tokens_seen": 101732492, "step": 1623 }, { "epoch": 5.400998336106489, "loss": 0.526130199432373, "loss_ce": 7.132602604542626e-06, "loss_iou": 0.1708984375, "loss_num": 0.03662109375, "loss_xval": 0.52734375, "num_input_tokens_seen": 101732492, "step": 1623 }, { "epoch": 5.404326123128119, "grad_norm": 23.553722381591797, "learning_rate": 5e-06, "loss": 0.6852, "num_input_tokens_seen": 101796144, "step": 1624 }, { "epoch": 5.404326123128119, "loss": 0.7048993110656738, "loss_ce": 6.532372935907915e-05, "loss_iou": 0.2734375, "loss_num": 0.031982421875, "loss_xval": 0.703125, "num_input_tokens_seen": 101796144, "step": 1624 }, { "epoch": 5.40765391014975, "grad_norm": 17.543825149536133, "learning_rate": 5e-06, "loss": 0.6133, "num_input_tokens_seen": 101858992, "step": 1625 }, { "epoch": 5.40765391014975, "loss": 0.6574287414550781, "loss_ce": 0.0002022029075305909, "loss_iou": 0.2158203125, "loss_num": 0.044921875, "loss_xval": 0.65625, "num_input_tokens_seen": 101858992, "step": 1625 }, { "epoch": 5.410981697171381, "grad_norm": 9.773475646972656, "learning_rate": 5e-06, "loss": 0.6865, "num_input_tokens_seen": 101920736, "step": 1626 }, { "epoch": 5.410981697171381, "loss": 0.8123984336853027, "loss_ce": 2.056111225101631e-05, "loss_iou": 0.294921875, "loss_num": 0.044189453125, "loss_xval": 0.8125, "num_input_tokens_seen": 101920736, "step": 1626 }, { "epoch": 5.414309484193011, "grad_norm": 7.398290634155273, "learning_rate": 5e-06, "loss": 0.7631, "num_input_tokens_seen": 101984852, "step": 1627 }, { "epoch": 5.414309484193011, "loss": 0.7693369388580322, "loss_ce": 4.986457497579977e-05, "loss_iou": 0.291015625, "loss_num": 0.037109375, "loss_xval": 0.76953125, "num_input_tokens_seen": 101984852, "step": 1627 }, { "epoch": 5.417637271214642, "grad_norm": 16.37160873413086, "learning_rate": 5e-06, "loss": 0.7596, "num_input_tokens_seen": 102048276, "step": 1628 }, { "epoch": 5.417637271214642, "loss": 1.0821470022201538, "loss_ce": 0.0006040593725629151, "loss_iou": 0.4140625, "loss_num": 0.050537109375, "loss_xval": 1.078125, "num_input_tokens_seen": 102048276, "step": 1628 }, { "epoch": 5.4209650582362725, "grad_norm": 11.9351806640625, "learning_rate": 5e-06, "loss": 0.5921, "num_input_tokens_seen": 102111908, "step": 1629 }, { "epoch": 5.4209650582362725, "loss": 0.5010902881622314, "loss_ce": 0.00011371778964530677, "loss_iou": 0.2001953125, "loss_num": 0.0201416015625, "loss_xval": 0.5, "num_input_tokens_seen": 102111908, "step": 1629 }, { "epoch": 5.424292845257903, "grad_norm": 12.436307907104492, "learning_rate": 5e-06, "loss": 0.4667, "num_input_tokens_seen": 102173512, "step": 1630 }, { "epoch": 5.424292845257903, "loss": 0.4460484981536865, "loss_ce": 3.5583727822086075e-06, "loss_iou": 0.12890625, "loss_num": 0.03759765625, "loss_xval": 0.4453125, "num_input_tokens_seen": 102173512, "step": 1630 }, { "epoch": 5.427620632279534, "grad_norm": 5.2960286140441895, "learning_rate": 5e-06, "loss": 0.5887, "num_input_tokens_seen": 102233948, "step": 1631 }, { "epoch": 5.427620632279534, "loss": 0.7739818692207336, "loss_ce": 0.0010325934272259474, "loss_iou": 0.26171875, "loss_num": 0.049560546875, "loss_xval": 0.7734375, "num_input_tokens_seen": 102233948, "step": 1631 }, { "epoch": 5.430948419301164, "grad_norm": 26.14053726196289, "learning_rate": 5e-06, "loss": 0.6227, "num_input_tokens_seen": 102296700, "step": 1632 }, { "epoch": 5.430948419301164, "loss": 0.5990396738052368, "loss_ce": 0.0001627473975531757, "loss_iou": 0.21875, "loss_num": 0.0322265625, "loss_xval": 0.59765625, "num_input_tokens_seen": 102296700, "step": 1632 }, { "epoch": 5.434276206322795, "grad_norm": 31.809429168701172, "learning_rate": 5e-06, "loss": 0.8462, "num_input_tokens_seen": 102359204, "step": 1633 }, { "epoch": 5.434276206322795, "loss": 0.9356186985969543, "loss_ce": 7.183669367805123e-05, "loss_iou": 0.359375, "loss_num": 0.04345703125, "loss_xval": 0.9375, "num_input_tokens_seen": 102359204, "step": 1633 }, { "epoch": 5.437603993344426, "grad_norm": 14.257760047912598, "learning_rate": 5e-06, "loss": 0.5789, "num_input_tokens_seen": 102422568, "step": 1634 }, { "epoch": 5.437603993344426, "loss": 0.4764263927936554, "loss_ce": 0.00010802644828800112, "loss_iou": 0.1787109375, "loss_num": 0.02392578125, "loss_xval": 0.4765625, "num_input_tokens_seen": 102422568, "step": 1634 }, { "epoch": 5.440931780366056, "grad_norm": 8.30467414855957, "learning_rate": 5e-06, "loss": 0.6839, "num_input_tokens_seen": 102485644, "step": 1635 }, { "epoch": 5.440931780366056, "loss": 0.6356232762336731, "loss_ce": 3.1584886528435163e-06, "loss_iou": 0.224609375, "loss_num": 0.037353515625, "loss_xval": 0.63671875, "num_input_tokens_seen": 102485644, "step": 1635 }, { "epoch": 5.444259567387687, "grad_norm": 9.754112243652344, "learning_rate": 5e-06, "loss": 0.6331, "num_input_tokens_seen": 102549036, "step": 1636 }, { "epoch": 5.444259567387687, "loss": 0.5296279788017273, "loss_ce": 2.5940513296518475e-05, "loss_iou": 0.1552734375, "loss_num": 0.04345703125, "loss_xval": 0.53125, "num_input_tokens_seen": 102549036, "step": 1636 }, { "epoch": 5.447587354409317, "grad_norm": 23.580772399902344, "learning_rate": 5e-06, "loss": 0.6052, "num_input_tokens_seen": 102611512, "step": 1637 }, { "epoch": 5.447587354409317, "loss": 0.6287937164306641, "loss_ce": 0.0008640490705147386, "loss_iou": 0.248046875, "loss_num": 0.0264892578125, "loss_xval": 0.62890625, "num_input_tokens_seen": 102611512, "step": 1637 }, { "epoch": 5.450915141430948, "grad_norm": 28.77077865600586, "learning_rate": 5e-06, "loss": 0.4949, "num_input_tokens_seen": 102674132, "step": 1638 }, { "epoch": 5.450915141430948, "loss": 0.3735789656639099, "loss_ce": 0.0004100346704944968, "loss_iou": 0.142578125, "loss_num": 0.0174560546875, "loss_xval": 0.373046875, "num_input_tokens_seen": 102674132, "step": 1638 }, { "epoch": 5.454242928452579, "grad_norm": 16.224525451660156, "learning_rate": 5e-06, "loss": 0.4872, "num_input_tokens_seen": 102734756, "step": 1639 }, { "epoch": 5.454242928452579, "loss": 0.5118467211723328, "loss_ce": 0.0004941736115142703, "loss_iou": 0.1875, "loss_num": 0.02734375, "loss_xval": 0.51171875, "num_input_tokens_seen": 102734756, "step": 1639 }, { "epoch": 5.457570715474209, "grad_norm": 13.900581359863281, "learning_rate": 5e-06, "loss": 0.8763, "num_input_tokens_seen": 102798876, "step": 1640 }, { "epoch": 5.457570715474209, "loss": 0.7113577723503113, "loss_ce": 0.00029820884810760617, "loss_iou": 0.265625, "loss_num": 0.035888671875, "loss_xval": 0.7109375, "num_input_tokens_seen": 102798876, "step": 1640 }, { "epoch": 5.46089850249584, "grad_norm": 15.232627868652344, "learning_rate": 5e-06, "loss": 0.6676, "num_input_tokens_seen": 102861604, "step": 1641 }, { "epoch": 5.46089850249584, "loss": 0.8082327246665955, "loss_ce": 5.173581939743599e-06, "loss_iou": 0.291015625, "loss_num": 0.04541015625, "loss_xval": 0.80859375, "num_input_tokens_seen": 102861604, "step": 1641 }, { "epoch": 5.4642262895174705, "grad_norm": 27.81981086730957, "learning_rate": 5e-06, "loss": 0.7923, "num_input_tokens_seen": 102924404, "step": 1642 }, { "epoch": 5.4642262895174705, "loss": 0.9987320899963379, "loss_ce": 7.487506081815809e-05, "loss_iou": 0.330078125, "loss_num": 0.0673828125, "loss_xval": 1.0, "num_input_tokens_seen": 102924404, "step": 1642 }, { "epoch": 5.467554076539101, "grad_norm": 47.511383056640625, "learning_rate": 5e-06, "loss": 0.6262, "num_input_tokens_seen": 102987668, "step": 1643 }, { "epoch": 5.467554076539101, "loss": 0.7327072620391846, "loss_ce": 4.125821578782052e-05, "loss_iou": 0.2412109375, "loss_num": 0.0498046875, "loss_xval": 0.734375, "num_input_tokens_seen": 102987668, "step": 1643 }, { "epoch": 5.470881863560733, "grad_norm": 20.063034057617188, "learning_rate": 5e-06, "loss": 0.4772, "num_input_tokens_seen": 103050484, "step": 1644 }, { "epoch": 5.470881863560733, "loss": 0.39718499779701233, "loss_ce": 2.924094587797299e-05, "loss_iou": 0.1357421875, "loss_num": 0.025146484375, "loss_xval": 0.396484375, "num_input_tokens_seen": 103050484, "step": 1644 }, { "epoch": 5.474209650582363, "grad_norm": 10.728224754333496, "learning_rate": 5e-06, "loss": 0.7005, "num_input_tokens_seen": 103113536, "step": 1645 }, { "epoch": 5.474209650582363, "loss": 0.7891544103622437, "loss_ce": 9.195022721542045e-05, "loss_iou": 0.302734375, "loss_num": 0.036865234375, "loss_xval": 0.7890625, "num_input_tokens_seen": 103113536, "step": 1645 }, { "epoch": 5.477537437603994, "grad_norm": 15.080349922180176, "learning_rate": 5e-06, "loss": 0.6859, "num_input_tokens_seen": 103174900, "step": 1646 }, { "epoch": 5.477537437603994, "loss": 0.5258921980857849, "loss_ce": 0.0008677899022586644, "loss_iou": 0.181640625, "loss_num": 0.032470703125, "loss_xval": 0.5234375, "num_input_tokens_seen": 103174900, "step": 1646 }, { "epoch": 5.480865224625624, "grad_norm": 8.437542915344238, "learning_rate": 5e-06, "loss": 0.5857, "num_input_tokens_seen": 103238100, "step": 1647 }, { "epoch": 5.480865224625624, "loss": 0.5032402276992798, "loss_ce": 0.00018848066974896938, "loss_iou": 0.1904296875, "loss_num": 0.024658203125, "loss_xval": 0.50390625, "num_input_tokens_seen": 103238100, "step": 1647 }, { "epoch": 5.484193011647255, "grad_norm": 22.979642868041992, "learning_rate": 5e-06, "loss": 0.5582, "num_input_tokens_seen": 103300436, "step": 1648 }, { "epoch": 5.484193011647255, "loss": 0.5222282409667969, "loss_ce": 1.1491346413095016e-05, "loss_iou": 0.2158203125, "loss_num": 0.01806640625, "loss_xval": 0.5234375, "num_input_tokens_seen": 103300436, "step": 1648 }, { "epoch": 5.487520798668886, "grad_norm": 16.576942443847656, "learning_rate": 5e-06, "loss": 0.4938, "num_input_tokens_seen": 103361948, "step": 1649 }, { "epoch": 5.487520798668886, "loss": 0.5874297022819519, "loss_ce": 2.737017348408699e-05, "loss_iou": 0.212890625, "loss_num": 0.0322265625, "loss_xval": 0.5859375, "num_input_tokens_seen": 103361948, "step": 1649 }, { "epoch": 5.490848585690516, "grad_norm": 12.247213363647461, "learning_rate": 5e-06, "loss": 0.5248, "num_input_tokens_seen": 103423472, "step": 1650 }, { "epoch": 5.490848585690516, "loss": 0.7788193821907043, "loss_ce": 1.0813319022418e-05, "loss_iou": 0.2578125, "loss_num": 0.052734375, "loss_xval": 0.77734375, "num_input_tokens_seen": 103423472, "step": 1650 }, { "epoch": 5.494176372712147, "grad_norm": 25.354419708251953, "learning_rate": 5e-06, "loss": 0.7204, "num_input_tokens_seen": 103486688, "step": 1651 }, { "epoch": 5.494176372712147, "loss": 0.9855786561965942, "loss_ce": 0.00034913705894723535, "loss_iou": 0.33203125, "loss_num": 0.064453125, "loss_xval": 0.984375, "num_input_tokens_seen": 103486688, "step": 1651 }, { "epoch": 5.4975041597337775, "grad_norm": 45.347530364990234, "learning_rate": 5e-06, "loss": 0.9044, "num_input_tokens_seen": 103549404, "step": 1652 }, { "epoch": 5.4975041597337775, "loss": 0.7556682825088501, "loss_ce": 0.0024945084005594254, "loss_iou": 0.208984375, "loss_num": 0.06689453125, "loss_xval": 0.75390625, "num_input_tokens_seen": 103549404, "step": 1652 }, { "epoch": 5.500831946755408, "grad_norm": 19.855636596679688, "learning_rate": 5e-06, "loss": 0.7169, "num_input_tokens_seen": 103612296, "step": 1653 }, { "epoch": 5.500831946755408, "loss": 0.9769325852394104, "loss_ce": 0.00012593074643518776, "loss_iou": 0.35546875, "loss_num": 0.05322265625, "loss_xval": 0.9765625, "num_input_tokens_seen": 103612296, "step": 1653 }, { "epoch": 5.504159733777039, "grad_norm": 11.011178970336914, "learning_rate": 5e-06, "loss": 0.7839, "num_input_tokens_seen": 103673296, "step": 1654 }, { "epoch": 5.504159733777039, "loss": 0.7490344047546387, "loss_ce": 1.089822944777552e-05, "loss_iou": 0.263671875, "loss_num": 0.044677734375, "loss_xval": 0.75, "num_input_tokens_seen": 103673296, "step": 1654 }, { "epoch": 5.507487520798669, "grad_norm": 9.313552856445312, "learning_rate": 5e-06, "loss": 0.6408, "num_input_tokens_seen": 103735916, "step": 1655 }, { "epoch": 5.507487520798669, "loss": 0.7944145202636719, "loss_ce": 0.00022508336405735463, "loss_iou": 0.251953125, "loss_num": 0.0576171875, "loss_xval": 0.79296875, "num_input_tokens_seen": 103735916, "step": 1655 }, { "epoch": 5.5108153078203, "grad_norm": 40.64817810058594, "learning_rate": 5e-06, "loss": 0.5265, "num_input_tokens_seen": 103799644, "step": 1656 }, { "epoch": 5.5108153078203, "loss": 0.523930549621582, "loss_ce": 4.7979447117540985e-06, "loss_iou": 0.1875, "loss_num": 0.02978515625, "loss_xval": 0.5234375, "num_input_tokens_seen": 103799644, "step": 1656 }, { "epoch": 5.5141430948419305, "grad_norm": 25.617191314697266, "learning_rate": 5e-06, "loss": 0.6049, "num_input_tokens_seen": 103862508, "step": 1657 }, { "epoch": 5.5141430948419305, "loss": 0.6131722331047058, "loss_ce": 1.3055303497822024e-05, "loss_iou": 0.2294921875, "loss_num": 0.03076171875, "loss_xval": 0.61328125, "num_input_tokens_seen": 103862508, "step": 1657 }, { "epoch": 5.517470881863561, "grad_norm": 16.35482406616211, "learning_rate": 5e-06, "loss": 0.4502, "num_input_tokens_seen": 103924828, "step": 1658 }, { "epoch": 5.517470881863561, "loss": 0.4366779923439026, "loss_ce": 3.2524490961804986e-05, "loss_iou": 0.14453125, "loss_num": 0.029541015625, "loss_xval": 0.4375, "num_input_tokens_seen": 103924828, "step": 1658 }, { "epoch": 5.520798668885192, "grad_norm": 7.07016658782959, "learning_rate": 5e-06, "loss": 0.6634, "num_input_tokens_seen": 103989120, "step": 1659 }, { "epoch": 5.520798668885192, "loss": 0.7409207820892334, "loss_ce": 0.0012968007940798998, "loss_iou": 0.296875, "loss_num": 0.029541015625, "loss_xval": 0.73828125, "num_input_tokens_seen": 103989120, "step": 1659 }, { "epoch": 5.524126455906822, "grad_norm": 13.07374382019043, "learning_rate": 5e-06, "loss": 0.664, "num_input_tokens_seen": 104053128, "step": 1660 }, { "epoch": 5.524126455906822, "loss": 0.6540793180465698, "loss_ce": 2.6540050384937786e-05, "loss_iou": 0.23828125, "loss_num": 0.035400390625, "loss_xval": 0.65234375, "num_input_tokens_seen": 104053128, "step": 1660 }, { "epoch": 5.527454242928453, "grad_norm": 14.145462036132812, "learning_rate": 5e-06, "loss": 0.6692, "num_input_tokens_seen": 104115052, "step": 1661 }, { "epoch": 5.527454242928453, "loss": 0.6679408550262451, "loss_ce": 0.00021623028442263603, "loss_iou": 0.23046875, "loss_num": 0.04150390625, "loss_xval": 0.66796875, "num_input_tokens_seen": 104115052, "step": 1661 }, { "epoch": 5.530782029950084, "grad_norm": 26.904611587524414, "learning_rate": 5e-06, "loss": 0.6505, "num_input_tokens_seen": 104177688, "step": 1662 }, { "epoch": 5.530782029950084, "loss": 0.5422796010971069, "loss_ce": 0.00010432445560581982, "loss_iou": 0.189453125, "loss_num": 0.03271484375, "loss_xval": 0.54296875, "num_input_tokens_seen": 104177688, "step": 1662 }, { "epoch": 5.534109816971714, "grad_norm": 19.420194625854492, "learning_rate": 5e-06, "loss": 0.6672, "num_input_tokens_seen": 104241480, "step": 1663 }, { "epoch": 5.534109816971714, "loss": 0.7687850594520569, "loss_ce": 0.0001692876685410738, "loss_iou": 0.296875, "loss_num": 0.035400390625, "loss_xval": 0.76953125, "num_input_tokens_seen": 104241480, "step": 1663 }, { "epoch": 5.537437603993345, "grad_norm": 13.972832679748535, "learning_rate": 5e-06, "loss": 0.7583, "num_input_tokens_seen": 104304524, "step": 1664 }, { "epoch": 5.537437603993345, "loss": 0.7085192203521729, "loss_ce": 0.0010912283323705196, "loss_iou": 0.255859375, "loss_num": 0.038818359375, "loss_xval": 0.70703125, "num_input_tokens_seen": 104304524, "step": 1664 }, { "epoch": 5.5407653910149754, "grad_norm": 16.88719367980957, "learning_rate": 5e-06, "loss": 0.7004, "num_input_tokens_seen": 104365776, "step": 1665 }, { "epoch": 5.5407653910149754, "loss": 0.4905754327774048, "loss_ce": 5.361266175896162e-06, "loss_iou": 0.1591796875, "loss_num": 0.034423828125, "loss_xval": 0.490234375, "num_input_tokens_seen": 104365776, "step": 1665 }, { "epoch": 5.544093178036606, "grad_norm": 40.14616775512695, "learning_rate": 5e-06, "loss": 0.7665, "num_input_tokens_seen": 104428768, "step": 1666 }, { "epoch": 5.544093178036606, "loss": 0.9921320676803589, "loss_ce": 6.667529669357464e-05, "loss_iou": 0.341796875, "loss_num": 0.0615234375, "loss_xval": 0.9921875, "num_input_tokens_seen": 104428768, "step": 1666 }, { "epoch": 5.547420965058237, "grad_norm": 30.98312759399414, "learning_rate": 5e-06, "loss": 0.6109, "num_input_tokens_seen": 104490428, "step": 1667 }, { "epoch": 5.547420965058237, "loss": 0.5373561978340149, "loss_ce": 2.6737072857940802e-06, "loss_iou": 0.1923828125, "loss_num": 0.030517578125, "loss_xval": 0.5390625, "num_input_tokens_seen": 104490428, "step": 1667 }, { "epoch": 5.550748752079867, "grad_norm": 16.116302490234375, "learning_rate": 5e-06, "loss": 0.4699, "num_input_tokens_seen": 104552180, "step": 1668 }, { "epoch": 5.550748752079867, "loss": 0.488040030002594, "loss_ce": 2.9427542358462233e-06, "loss_iou": 0.126953125, "loss_num": 0.046630859375, "loss_xval": 0.48828125, "num_input_tokens_seen": 104552180, "step": 1668 }, { "epoch": 5.554076539101498, "grad_norm": 21.750362396240234, "learning_rate": 5e-06, "loss": 0.7576, "num_input_tokens_seen": 104615784, "step": 1669 }, { "epoch": 5.554076539101498, "loss": 0.6832214593887329, "loss_ce": 0.00011601504229474813, "loss_iou": 0.2060546875, "loss_num": 0.05419921875, "loss_xval": 0.68359375, "num_input_tokens_seen": 104615784, "step": 1669 }, { "epoch": 5.5574043261231285, "grad_norm": 13.395889282226562, "learning_rate": 5e-06, "loss": 0.6447, "num_input_tokens_seen": 104679044, "step": 1670 }, { "epoch": 5.5574043261231285, "loss": 0.48271387815475464, "loss_ce": 0.0002919872058555484, "loss_iou": 0.140625, "loss_num": 0.040283203125, "loss_xval": 0.482421875, "num_input_tokens_seen": 104679044, "step": 1670 }, { "epoch": 5.560732113144759, "grad_norm": 10.361615180969238, "learning_rate": 5e-06, "loss": 0.5725, "num_input_tokens_seen": 104741748, "step": 1671 }, { "epoch": 5.560732113144759, "loss": 0.6794488430023193, "loss_ce": 5.4566316975979134e-06, "loss_iou": 0.23828125, "loss_num": 0.04052734375, "loss_xval": 0.6796875, "num_input_tokens_seen": 104741748, "step": 1671 }, { "epoch": 5.56405990016639, "grad_norm": 9.87459945678711, "learning_rate": 5e-06, "loss": 0.6702, "num_input_tokens_seen": 104805184, "step": 1672 }, { "epoch": 5.56405990016639, "loss": 0.875102698802948, "loss_ce": 0.00028576701879501343, "loss_iou": 0.310546875, "loss_num": 0.05078125, "loss_xval": 0.875, "num_input_tokens_seen": 104805184, "step": 1672 }, { "epoch": 5.56738768718802, "grad_norm": 15.05173397064209, "learning_rate": 5e-06, "loss": 0.944, "num_input_tokens_seen": 104869336, "step": 1673 }, { "epoch": 5.56738768718802, "loss": 0.9187459945678711, "loss_ce": 0.0006551960250362754, "loss_iou": 0.263671875, "loss_num": 0.07763671875, "loss_xval": 0.91796875, "num_input_tokens_seen": 104869336, "step": 1673 }, { "epoch": 5.570715474209651, "grad_norm": 9.039440155029297, "learning_rate": 5e-06, "loss": 0.7632, "num_input_tokens_seen": 104931472, "step": 1674 }, { "epoch": 5.570715474209651, "loss": 0.8769832849502563, "loss_ce": 0.0002743012737482786, "loss_iou": 0.326171875, "loss_num": 0.044921875, "loss_xval": 0.875, "num_input_tokens_seen": 104931472, "step": 1674 }, { "epoch": 5.574043261231282, "grad_norm": 13.609042167663574, "learning_rate": 5e-06, "loss": 0.6203, "num_input_tokens_seen": 104995356, "step": 1675 }, { "epoch": 5.574043261231282, "loss": 0.49927204847335815, "loss_ce": 0.0006148211541585624, "loss_iou": 0.1787109375, "loss_num": 0.0281982421875, "loss_xval": 0.498046875, "num_input_tokens_seen": 104995356, "step": 1675 }, { "epoch": 5.577371048252912, "grad_norm": 19.690515518188477, "learning_rate": 5e-06, "loss": 0.7057, "num_input_tokens_seen": 105058256, "step": 1676 }, { "epoch": 5.577371048252912, "loss": 0.8930066227912903, "loss_ce": 0.000672618392854929, "loss_iou": 0.328125, "loss_num": 0.047607421875, "loss_xval": 0.890625, "num_input_tokens_seen": 105058256, "step": 1676 }, { "epoch": 5.580698835274543, "grad_norm": 27.50149154663086, "learning_rate": 5e-06, "loss": 0.5587, "num_input_tokens_seen": 105120488, "step": 1677 }, { "epoch": 5.580698835274543, "loss": 0.6127033829689026, "loss_ce": 0.0005207486101426184, "loss_iou": 0.216796875, "loss_num": 0.03564453125, "loss_xval": 0.61328125, "num_input_tokens_seen": 105120488, "step": 1677 }, { "epoch": 5.584026622296173, "grad_norm": 19.621849060058594, "learning_rate": 5e-06, "loss": 0.5183, "num_input_tokens_seen": 105181856, "step": 1678 }, { "epoch": 5.584026622296173, "loss": 0.38672128319740295, "loss_ce": 2.5411072783754207e-06, "loss_iou": 0.1396484375, "loss_num": 0.0216064453125, "loss_xval": 0.38671875, "num_input_tokens_seen": 105181856, "step": 1678 }, { "epoch": 5.587354409317804, "grad_norm": 9.598878860473633, "learning_rate": 5e-06, "loss": 0.6327, "num_input_tokens_seen": 105245088, "step": 1679 }, { "epoch": 5.587354409317804, "loss": 0.5732457637786865, "loss_ce": 3.539852059475379e-06, "loss_iou": 0.2080078125, "loss_num": 0.031494140625, "loss_xval": 0.57421875, "num_input_tokens_seen": 105245088, "step": 1679 }, { "epoch": 5.590682196339435, "grad_norm": 22.08926010131836, "learning_rate": 5e-06, "loss": 0.769, "num_input_tokens_seen": 105307928, "step": 1680 }, { "epoch": 5.590682196339435, "loss": 0.7176547050476074, "loss_ce": 3.3723208616720513e-06, "loss_iou": 0.26171875, "loss_num": 0.038818359375, "loss_xval": 0.71875, "num_input_tokens_seen": 105307928, "step": 1680 }, { "epoch": 5.594009983361065, "grad_norm": 168.8852081298828, "learning_rate": 5e-06, "loss": 0.901, "num_input_tokens_seen": 105372148, "step": 1681 }, { "epoch": 5.594009983361065, "loss": 0.7313987016677856, "loss_ce": 0.0009299588855355978, "loss_iou": 0.251953125, "loss_num": 0.045654296875, "loss_xval": 0.73046875, "num_input_tokens_seen": 105372148, "step": 1681 }, { "epoch": 5.597337770382696, "grad_norm": 22.258724212646484, "learning_rate": 5e-06, "loss": 0.503, "num_input_tokens_seen": 105433068, "step": 1682 }, { "epoch": 5.597337770382696, "loss": 0.41324421763420105, "loss_ce": 0.0003413849917706102, "loss_iou": 0.15625, "loss_num": 0.0198974609375, "loss_xval": 0.412109375, "num_input_tokens_seen": 105433068, "step": 1682 }, { "epoch": 5.6006655574043265, "grad_norm": 15.447331428527832, "learning_rate": 5e-06, "loss": 0.7073, "num_input_tokens_seen": 105495692, "step": 1683 }, { "epoch": 5.6006655574043265, "loss": 0.585699737071991, "loss_ce": 0.0025088500697165728, "loss_iou": 0.1962890625, "loss_num": 0.0380859375, "loss_xval": 0.58203125, "num_input_tokens_seen": 105495692, "step": 1683 }, { "epoch": 5.603993344425957, "grad_norm": 6.542105674743652, "learning_rate": 5e-06, "loss": 0.6754, "num_input_tokens_seen": 105559148, "step": 1684 }, { "epoch": 5.603993344425957, "loss": 0.7631018757820129, "loss_ce": 0.0010779737494885921, "loss_iou": 0.267578125, "loss_num": 0.04541015625, "loss_xval": 0.76171875, "num_input_tokens_seen": 105559148, "step": 1684 }, { "epoch": 5.607321131447588, "grad_norm": 5.566797256469727, "learning_rate": 5e-06, "loss": 0.6803, "num_input_tokens_seen": 105621324, "step": 1685 }, { "epoch": 5.607321131447588, "loss": 0.4558660686016083, "loss_ce": 0.0006200757343322039, "loss_iou": 0.1396484375, "loss_num": 0.034912109375, "loss_xval": 0.455078125, "num_input_tokens_seen": 105621324, "step": 1685 }, { "epoch": 5.610648918469218, "grad_norm": 31.55694580078125, "learning_rate": 5e-06, "loss": 0.6664, "num_input_tokens_seen": 105684584, "step": 1686 }, { "epoch": 5.610648918469218, "loss": 0.7554718255996704, "loss_ce": 0.00034485722426325083, "loss_iou": 0.25, "loss_num": 0.05078125, "loss_xval": 0.75390625, "num_input_tokens_seen": 105684584, "step": 1686 }, { "epoch": 5.613976705490849, "grad_norm": 46.99787902832031, "learning_rate": 5e-06, "loss": 0.6837, "num_input_tokens_seen": 105746404, "step": 1687 }, { "epoch": 5.613976705490849, "loss": 0.6141798496246338, "loss_ce": 4.411762347444892e-05, "loss_iou": 0.201171875, "loss_num": 0.042236328125, "loss_xval": 0.61328125, "num_input_tokens_seen": 105746404, "step": 1687 }, { "epoch": 5.6173044925124795, "grad_norm": 11.044576644897461, "learning_rate": 5e-06, "loss": 0.7171, "num_input_tokens_seen": 105809520, "step": 1688 }, { "epoch": 5.6173044925124795, "loss": 0.8241130113601685, "loss_ce": 0.0005046174628660083, "loss_iou": 0.30859375, "loss_num": 0.041259765625, "loss_xval": 0.82421875, "num_input_tokens_seen": 105809520, "step": 1688 }, { "epoch": 5.62063227953411, "grad_norm": 15.941658973693848, "learning_rate": 5e-06, "loss": 0.5886, "num_input_tokens_seen": 105872924, "step": 1689 }, { "epoch": 5.62063227953411, "loss": 0.55299973487854, "loss_ce": 0.0002653474803082645, "loss_iou": 0.20703125, "loss_num": 0.0277099609375, "loss_xval": 0.5546875, "num_input_tokens_seen": 105872924, "step": 1689 }, { "epoch": 5.623960066555741, "grad_norm": 17.497533798217773, "learning_rate": 5e-06, "loss": 0.7076, "num_input_tokens_seen": 105934368, "step": 1690 }, { "epoch": 5.623960066555741, "loss": 0.966990053653717, "loss_ce": 0.0004373170086182654, "loss_iou": 0.359375, "loss_num": 0.049560546875, "loss_xval": 0.96484375, "num_input_tokens_seen": 105934368, "step": 1690 }, { "epoch": 5.627287853577371, "grad_norm": 9.360628128051758, "learning_rate": 5e-06, "loss": 0.469, "num_input_tokens_seen": 105996356, "step": 1691 }, { "epoch": 5.627287853577371, "loss": 0.5639382600784302, "loss_ce": 0.00021752640896011144, "loss_iou": 0.181640625, "loss_num": 0.0400390625, "loss_xval": 0.5625, "num_input_tokens_seen": 105996356, "step": 1691 }, { "epoch": 5.630615640599002, "grad_norm": 14.039356231689453, "learning_rate": 5e-06, "loss": 0.6005, "num_input_tokens_seen": 106059068, "step": 1692 }, { "epoch": 5.630615640599002, "loss": 0.6978870630264282, "loss_ce": 0.00013320930884219706, "loss_iou": 0.251953125, "loss_num": 0.03857421875, "loss_xval": 0.69921875, "num_input_tokens_seen": 106059068, "step": 1692 }, { "epoch": 5.633943427620633, "grad_norm": 22.922609329223633, "learning_rate": 5e-06, "loss": 0.6155, "num_input_tokens_seen": 106122956, "step": 1693 }, { "epoch": 5.633943427620633, "loss": 0.7696903944015503, "loss_ce": 0.00015912571689113975, "loss_iou": 0.296875, "loss_num": 0.035400390625, "loss_xval": 0.76953125, "num_input_tokens_seen": 106122956, "step": 1693 }, { "epoch": 5.637271214642263, "grad_norm": 29.654361724853516, "learning_rate": 5e-06, "loss": 0.6266, "num_input_tokens_seen": 106186776, "step": 1694 }, { "epoch": 5.637271214642263, "loss": 0.7533686757087708, "loss_ce": 0.0006831216160207987, "loss_iou": 0.28515625, "loss_num": 0.03662109375, "loss_xval": 0.75390625, "num_input_tokens_seen": 106186776, "step": 1694 }, { "epoch": 5.640599001663894, "grad_norm": 24.80531883239746, "learning_rate": 5e-06, "loss": 0.7552, "num_input_tokens_seen": 106250400, "step": 1695 }, { "epoch": 5.640599001663894, "loss": 0.7820540070533752, "loss_ce": 0.0008039996610023081, "loss_iou": 0.29296875, "loss_num": 0.03857421875, "loss_xval": 0.78125, "num_input_tokens_seen": 106250400, "step": 1695 }, { "epoch": 5.643926788685524, "grad_norm": 21.132755279541016, "learning_rate": 5e-06, "loss": 0.7273, "num_input_tokens_seen": 106312868, "step": 1696 }, { "epoch": 5.643926788685524, "loss": 0.8165279626846313, "loss_ce": 0.0006099676829762757, "loss_iou": 0.33203125, "loss_num": 0.030517578125, "loss_xval": 0.81640625, "num_input_tokens_seen": 106312868, "step": 1696 }, { "epoch": 5.647254575707155, "grad_norm": 6.561370372772217, "learning_rate": 5e-06, "loss": 0.4159, "num_input_tokens_seen": 106374608, "step": 1697 }, { "epoch": 5.647254575707155, "loss": 0.2832570970058441, "loss_ce": 0.0009084665798582137, "loss_iou": 0.049072265625, "loss_num": 0.036865234375, "loss_xval": 0.283203125, "num_input_tokens_seen": 106374608, "step": 1697 }, { "epoch": 5.650582362728786, "grad_norm": 69.27725219726562, "learning_rate": 5e-06, "loss": 0.5177, "num_input_tokens_seen": 106437156, "step": 1698 }, { "epoch": 5.650582362728786, "loss": 0.44986850023269653, "loss_ce": 0.0007718338747508824, "loss_iou": 0.13671875, "loss_num": 0.034912109375, "loss_xval": 0.44921875, "num_input_tokens_seen": 106437156, "step": 1698 }, { "epoch": 5.653910149750416, "grad_norm": 14.340590476989746, "learning_rate": 5e-06, "loss": 0.682, "num_input_tokens_seen": 106500236, "step": 1699 }, { "epoch": 5.653910149750416, "loss": 0.6428415179252625, "loss_ce": 1.9202825569664128e-05, "loss_iou": 0.25390625, "loss_num": 0.026611328125, "loss_xval": 0.64453125, "num_input_tokens_seen": 106500236, "step": 1699 }, { "epoch": 5.657237936772047, "grad_norm": 17.766748428344727, "learning_rate": 5e-06, "loss": 0.5629, "num_input_tokens_seen": 106561988, "step": 1700 }, { "epoch": 5.657237936772047, "loss": 0.5454926490783691, "loss_ce": 0.00032663418096490204, "loss_iou": 0.16796875, "loss_num": 0.0419921875, "loss_xval": 0.546875, "num_input_tokens_seen": 106561988, "step": 1700 }, { "epoch": 5.6605657237936775, "grad_norm": 12.290051460266113, "learning_rate": 5e-06, "loss": 0.5762, "num_input_tokens_seen": 106625212, "step": 1701 }, { "epoch": 5.6605657237936775, "loss": 0.8226503133773804, "loss_ce": 0.00026262665051035583, "loss_iou": 0.296875, "loss_num": 0.0458984375, "loss_xval": 0.82421875, "num_input_tokens_seen": 106625212, "step": 1701 }, { "epoch": 5.663893510815308, "grad_norm": 12.397076606750488, "learning_rate": 5e-06, "loss": 0.4598, "num_input_tokens_seen": 106687368, "step": 1702 }, { "epoch": 5.663893510815308, "loss": 0.4560442566871643, "loss_ce": 0.0007219562539830804, "loss_iou": 0.16015625, "loss_num": 0.02685546875, "loss_xval": 0.455078125, "num_input_tokens_seen": 106687368, "step": 1702 }, { "epoch": 5.667221297836939, "grad_norm": 11.242500305175781, "learning_rate": 5e-06, "loss": 0.6388, "num_input_tokens_seen": 106750504, "step": 1703 }, { "epoch": 5.667221297836939, "loss": 0.7236143350601196, "loss_ce": 0.0005308025283738971, "loss_iou": 0.30078125, "loss_num": 0.0240478515625, "loss_xval": 0.72265625, "num_input_tokens_seen": 106750504, "step": 1703 }, { "epoch": 5.670549084858569, "grad_norm": 10.13815689086914, "learning_rate": 5e-06, "loss": 0.6542, "num_input_tokens_seen": 106811856, "step": 1704 }, { "epoch": 5.670549084858569, "loss": 0.7757124900817871, "loss_ce": 0.0002608740178402513, "loss_iou": 0.251953125, "loss_num": 0.054443359375, "loss_xval": 0.77734375, "num_input_tokens_seen": 106811856, "step": 1704 }, { "epoch": 5.6738768718802, "grad_norm": 7.466888904571533, "learning_rate": 5e-06, "loss": 0.5185, "num_input_tokens_seen": 106873476, "step": 1705 }, { "epoch": 5.6738768718802, "loss": 0.4416944980621338, "loss_ce": 1.3564701475843322e-05, "loss_iou": 0.1318359375, "loss_num": 0.03564453125, "loss_xval": 0.44140625, "num_input_tokens_seen": 106873476, "step": 1705 }, { "epoch": 5.677204658901831, "grad_norm": 12.144712448120117, "learning_rate": 5e-06, "loss": 0.7171, "num_input_tokens_seen": 106936228, "step": 1706 }, { "epoch": 5.677204658901831, "loss": 0.59663987159729, "loss_ce": 0.00020431567099876702, "loss_iou": 0.224609375, "loss_num": 0.029296875, "loss_xval": 0.59765625, "num_input_tokens_seen": 106936228, "step": 1706 }, { "epoch": 5.680532445923461, "grad_norm": 14.107093811035156, "learning_rate": 5e-06, "loss": 0.4935, "num_input_tokens_seen": 106997544, "step": 1707 }, { "epoch": 5.680532445923461, "loss": 0.5952327251434326, "loss_ce": 7.892050052760169e-05, "loss_iou": 0.181640625, "loss_num": 0.046142578125, "loss_xval": 0.59375, "num_input_tokens_seen": 106997544, "step": 1707 }, { "epoch": 5.683860232945092, "grad_norm": 17.524843215942383, "learning_rate": 5e-06, "loss": 0.6913, "num_input_tokens_seen": 107060108, "step": 1708 }, { "epoch": 5.683860232945092, "loss": 0.6572305560112, "loss_ce": 4.020728283649078e-06, "loss_iou": 0.2236328125, "loss_num": 0.0419921875, "loss_xval": 0.65625, "num_input_tokens_seen": 107060108, "step": 1708 }, { "epoch": 5.687188019966722, "grad_norm": 6.087458610534668, "learning_rate": 5e-06, "loss": 0.6424, "num_input_tokens_seen": 107122596, "step": 1709 }, { "epoch": 5.687188019966722, "loss": 0.5598827004432678, "loss_ce": 7.200603704404784e-06, "loss_iou": 0.1708984375, "loss_num": 0.043701171875, "loss_xval": 0.55859375, "num_input_tokens_seen": 107122596, "step": 1709 }, { "epoch": 5.690515806988353, "grad_norm": 8.854772567749023, "learning_rate": 5e-06, "loss": 0.6383, "num_input_tokens_seen": 107183304, "step": 1710 }, { "epoch": 5.690515806988353, "loss": 0.3946506381034851, "loss_ce": 0.0007297521806322038, "loss_iou": 0.11669921875, "loss_num": 0.0322265625, "loss_xval": 0.39453125, "num_input_tokens_seen": 107183304, "step": 1710 }, { "epoch": 5.693843594009984, "grad_norm": 11.708149909973145, "learning_rate": 5e-06, "loss": 0.7584, "num_input_tokens_seen": 107247384, "step": 1711 }, { "epoch": 5.693843594009984, "loss": 0.8736662864685059, "loss_ce": 0.00037525969673879445, "loss_iou": 0.314453125, "loss_num": 0.049072265625, "loss_xval": 0.875, "num_input_tokens_seen": 107247384, "step": 1711 }, { "epoch": 5.697171381031614, "grad_norm": 10.559351921081543, "learning_rate": 5e-06, "loss": 0.6289, "num_input_tokens_seen": 107310668, "step": 1712 }, { "epoch": 5.697171381031614, "loss": 0.6340956687927246, "loss_ce": 0.000306632777210325, "loss_iou": 0.1943359375, "loss_num": 0.049072265625, "loss_xval": 0.6328125, "num_input_tokens_seen": 107310668, "step": 1712 }, { "epoch": 5.700499168053245, "grad_norm": 18.490694046020508, "learning_rate": 5e-06, "loss": 0.5318, "num_input_tokens_seen": 107370972, "step": 1713 }, { "epoch": 5.700499168053245, "loss": 0.38782674074172974, "loss_ce": 0.00013142020907253027, "loss_iou": 0.13671875, "loss_num": 0.0228271484375, "loss_xval": 0.38671875, "num_input_tokens_seen": 107370972, "step": 1713 }, { "epoch": 5.7038269550748755, "grad_norm": 16.43109893798828, "learning_rate": 5e-06, "loss": 0.7263, "num_input_tokens_seen": 107434928, "step": 1714 }, { "epoch": 5.7038269550748755, "loss": 0.9202851057052612, "loss_ce": 0.0013398126466199756, "loss_iou": 0.318359375, "loss_num": 0.056396484375, "loss_xval": 0.91796875, "num_input_tokens_seen": 107434928, "step": 1714 }, { "epoch": 5.707154742096506, "grad_norm": 7.379390716552734, "learning_rate": 5e-06, "loss": 0.4464, "num_input_tokens_seen": 107498380, "step": 1715 }, { "epoch": 5.707154742096506, "loss": 0.39358004927635193, "loss_ce": 2.5351406293339096e-05, "loss_iou": 0.10546875, "loss_num": 0.03662109375, "loss_xval": 0.39453125, "num_input_tokens_seen": 107498380, "step": 1715 }, { "epoch": 5.710482529118137, "grad_norm": 14.778491973876953, "learning_rate": 5e-06, "loss": 0.6498, "num_input_tokens_seen": 107561568, "step": 1716 }, { "epoch": 5.710482529118137, "loss": 0.6542474031448364, "loss_ce": 0.000438811257481575, "loss_iou": 0.2333984375, "loss_num": 0.037353515625, "loss_xval": 0.65234375, "num_input_tokens_seen": 107561568, "step": 1716 }, { "epoch": 5.713810316139767, "grad_norm": 7.23340368270874, "learning_rate": 5e-06, "loss": 0.3932, "num_input_tokens_seen": 107622300, "step": 1717 }, { "epoch": 5.713810316139767, "loss": 0.3672172725200653, "loss_ce": 0.0010063358349725604, "loss_iou": 0.083984375, "loss_num": 0.03955078125, "loss_xval": 0.3671875, "num_input_tokens_seen": 107622300, "step": 1717 }, { "epoch": 5.717138103161398, "grad_norm": 17.978878021240234, "learning_rate": 5e-06, "loss": 0.6873, "num_input_tokens_seen": 107685104, "step": 1718 }, { "epoch": 5.717138103161398, "loss": 0.6887626647949219, "loss_ce": 4.1962564864661545e-05, "loss_iou": 0.2138671875, "loss_num": 0.052490234375, "loss_xval": 0.6875, "num_input_tokens_seen": 107685104, "step": 1718 }, { "epoch": 5.7204658901830285, "grad_norm": 28.955184936523438, "learning_rate": 5e-06, "loss": 0.5497, "num_input_tokens_seen": 107746612, "step": 1719 }, { "epoch": 5.7204658901830285, "loss": 0.6328150033950806, "loss_ce": 2.510774720576592e-06, "loss_iou": 0.23828125, "loss_num": 0.031005859375, "loss_xval": 0.6328125, "num_input_tokens_seen": 107746612, "step": 1719 }, { "epoch": 5.723793677204659, "grad_norm": 16.940858840942383, "learning_rate": 5e-06, "loss": 0.658, "num_input_tokens_seen": 107810612, "step": 1720 }, { "epoch": 5.723793677204659, "loss": 0.706731915473938, "loss_ce": 0.00018891351646743715, "loss_iou": 0.2314453125, "loss_num": 0.048828125, "loss_xval": 0.70703125, "num_input_tokens_seen": 107810612, "step": 1720 }, { "epoch": 5.72712146422629, "grad_norm": 11.316946029663086, "learning_rate": 5e-06, "loss": 0.6643, "num_input_tokens_seen": 107873468, "step": 1721 }, { "epoch": 5.72712146422629, "loss": 0.620333194732666, "loss_ce": 0.0007042735815048218, "loss_iou": 0.18359375, "loss_num": 0.05029296875, "loss_xval": 0.62109375, "num_input_tokens_seen": 107873468, "step": 1721 }, { "epoch": 5.73044925124792, "grad_norm": 15.305099487304688, "learning_rate": 5e-06, "loss": 0.5985, "num_input_tokens_seen": 107936044, "step": 1722 }, { "epoch": 5.73044925124792, "loss": 0.5398041009902954, "loss_ce": 9.173478247248568e-06, "loss_iou": 0.197265625, "loss_num": 0.0289306640625, "loss_xval": 0.5390625, "num_input_tokens_seen": 107936044, "step": 1722 }, { "epoch": 5.733777038269551, "grad_norm": 22.33281135559082, "learning_rate": 5e-06, "loss": 0.6276, "num_input_tokens_seen": 107999576, "step": 1723 }, { "epoch": 5.733777038269551, "loss": 0.8423997163772583, "loss_ce": 0.0004502593947108835, "loss_iou": 0.28515625, "loss_num": 0.05419921875, "loss_xval": 0.84375, "num_input_tokens_seen": 107999576, "step": 1723 }, { "epoch": 5.737104825291182, "grad_norm": 30.52532196044922, "learning_rate": 5e-06, "loss": 0.6446, "num_input_tokens_seen": 108061644, "step": 1724 }, { "epoch": 5.737104825291182, "loss": 0.9165446162223816, "loss_ce": 0.0007731046061962843, "loss_iou": 0.357421875, "loss_num": 0.0400390625, "loss_xval": 0.9140625, "num_input_tokens_seen": 108061644, "step": 1724 }, { "epoch": 5.740432612312812, "grad_norm": 37.41810607910156, "learning_rate": 5e-06, "loss": 0.7034, "num_input_tokens_seen": 108125384, "step": 1725 }, { "epoch": 5.740432612312812, "loss": 0.6823127865791321, "loss_ce": 0.0005500713596120477, "loss_iou": 0.2412109375, "loss_num": 0.039794921875, "loss_xval": 0.68359375, "num_input_tokens_seen": 108125384, "step": 1725 }, { "epoch": 5.743760399334443, "grad_norm": 47.83035659790039, "learning_rate": 5e-06, "loss": 0.8563, "num_input_tokens_seen": 108189064, "step": 1726 }, { "epoch": 5.743760399334443, "loss": 0.8046451807022095, "loss_ce": 0.00020182624575681984, "loss_iou": 0.279296875, "loss_num": 0.049072265625, "loss_xval": 0.8046875, "num_input_tokens_seen": 108189064, "step": 1726 }, { "epoch": 5.747088186356073, "grad_norm": 32.13214874267578, "learning_rate": 5e-06, "loss": 0.8543, "num_input_tokens_seen": 108250384, "step": 1727 }, { "epoch": 5.747088186356073, "loss": 1.0282011032104492, "loss_ce": 2.907304406107869e-06, "loss_iou": 0.34765625, "loss_num": 0.06640625, "loss_xval": 1.03125, "num_input_tokens_seen": 108250384, "step": 1727 }, { "epoch": 5.750415973377704, "grad_norm": 10.281401634216309, "learning_rate": 5e-06, "loss": 0.7168, "num_input_tokens_seen": 108313460, "step": 1728 }, { "epoch": 5.750415973377704, "loss": 0.6618569493293762, "loss_ce": 0.0004189417522866279, "loss_iou": 0.2451171875, "loss_num": 0.0341796875, "loss_xval": 0.66015625, "num_input_tokens_seen": 108313460, "step": 1728 }, { "epoch": 5.753743760399335, "grad_norm": 8.915122985839844, "learning_rate": 5e-06, "loss": 0.6318, "num_input_tokens_seen": 108377684, "step": 1729 }, { "epoch": 5.753743760399335, "loss": 0.62421715259552, "loss_ce": 0.00043785208254121244, "loss_iou": 0.2158203125, "loss_num": 0.03857421875, "loss_xval": 0.625, "num_input_tokens_seen": 108377684, "step": 1729 }, { "epoch": 5.757071547420965, "grad_norm": 9.063756942749023, "learning_rate": 5e-06, "loss": 0.6516, "num_input_tokens_seen": 108438956, "step": 1730 }, { "epoch": 5.757071547420965, "loss": 0.6943504810333252, "loss_ce": 0.0009911722736433148, "loss_iou": 0.2431640625, "loss_num": 0.041259765625, "loss_xval": 0.6953125, "num_input_tokens_seen": 108438956, "step": 1730 }, { "epoch": 5.760399334442596, "grad_norm": 6.283392429351807, "learning_rate": 5e-06, "loss": 0.5892, "num_input_tokens_seen": 108501164, "step": 1731 }, { "epoch": 5.760399334442596, "loss": 0.6234263777732849, "loss_ce": 0.0001353639963781461, "loss_iou": 0.2236328125, "loss_num": 0.035400390625, "loss_xval": 0.625, "num_input_tokens_seen": 108501164, "step": 1731 }, { "epoch": 5.7637271214642265, "grad_norm": 27.98554801940918, "learning_rate": 5e-06, "loss": 0.7058, "num_input_tokens_seen": 108563452, "step": 1732 }, { "epoch": 5.7637271214642265, "loss": 0.7763590812683105, "loss_ce": 0.000602235842961818, "loss_iou": 0.310546875, "loss_num": 0.0306396484375, "loss_xval": 0.77734375, "num_input_tokens_seen": 108563452, "step": 1732 }, { "epoch": 5.767054908485857, "grad_norm": 37.64786911010742, "learning_rate": 5e-06, "loss": 0.857, "num_input_tokens_seen": 108626900, "step": 1733 }, { "epoch": 5.767054908485857, "loss": 0.6566188931465149, "loss_ce": 2.669273499122937e-06, "loss_iou": 0.2265625, "loss_num": 0.040771484375, "loss_xval": 0.65625, "num_input_tokens_seen": 108626900, "step": 1733 }, { "epoch": 5.770382695507488, "grad_norm": 25.70686149597168, "learning_rate": 5e-06, "loss": 0.6413, "num_input_tokens_seen": 108689136, "step": 1734 }, { "epoch": 5.770382695507488, "loss": 0.6587027907371521, "loss_ce": 1.1369125786586665e-05, "loss_iou": 0.2216796875, "loss_num": 0.043212890625, "loss_xval": 0.66015625, "num_input_tokens_seen": 108689136, "step": 1734 }, { "epoch": 5.773710482529118, "grad_norm": 41.048892974853516, "learning_rate": 5e-06, "loss": 0.9081, "num_input_tokens_seen": 108752268, "step": 1735 }, { "epoch": 5.773710482529118, "loss": 0.8428740501403809, "loss_ce": 0.00010060011118184775, "loss_iou": 0.287109375, "loss_num": 0.05419921875, "loss_xval": 0.84375, "num_input_tokens_seen": 108752268, "step": 1735 }, { "epoch": 5.777038269550749, "grad_norm": 26.29478645324707, "learning_rate": 5e-06, "loss": 0.6343, "num_input_tokens_seen": 108814612, "step": 1736 }, { "epoch": 5.777038269550749, "loss": 0.6782281994819641, "loss_ce": 0.0007380052120424807, "loss_iou": 0.2294921875, "loss_num": 0.043701171875, "loss_xval": 0.67578125, "num_input_tokens_seen": 108814612, "step": 1736 }, { "epoch": 5.78036605657238, "grad_norm": 12.207544326782227, "learning_rate": 5e-06, "loss": 0.5949, "num_input_tokens_seen": 108878500, "step": 1737 }, { "epoch": 5.78036605657238, "loss": 0.6717415452003479, "loss_ce": 0.0007210183539427817, "loss_iou": 0.251953125, "loss_num": 0.033935546875, "loss_xval": 0.671875, "num_input_tokens_seen": 108878500, "step": 1737 }, { "epoch": 5.78369384359401, "grad_norm": 125.72965240478516, "learning_rate": 5e-06, "loss": 0.844, "num_input_tokens_seen": 108940380, "step": 1738 }, { "epoch": 5.78369384359401, "loss": 1.2214411497116089, "loss_ce": 5.610462267213734e-06, "loss_iou": 0.447265625, "loss_num": 0.0654296875, "loss_xval": 1.21875, "num_input_tokens_seen": 108940380, "step": 1738 }, { "epoch": 5.787021630615641, "grad_norm": 36.35127258300781, "learning_rate": 5e-06, "loss": 0.664, "num_input_tokens_seen": 109004572, "step": 1739 }, { "epoch": 5.787021630615641, "loss": 0.5705589056015015, "loss_ce": 0.0008872911566868424, "loss_iou": 0.2099609375, "loss_num": 0.02978515625, "loss_xval": 0.5703125, "num_input_tokens_seen": 109004572, "step": 1739 }, { "epoch": 5.790349417637271, "grad_norm": 24.79090118408203, "learning_rate": 5e-06, "loss": 0.7072, "num_input_tokens_seen": 109068984, "step": 1740 }, { "epoch": 5.790349417637271, "loss": 0.49274492263793945, "loss_ce": 0.0005574416136369109, "loss_iou": 0.173828125, "loss_num": 0.02880859375, "loss_xval": 0.4921875, "num_input_tokens_seen": 109068984, "step": 1740 }, { "epoch": 5.793677204658902, "grad_norm": 22.55794334411621, "learning_rate": 5e-06, "loss": 0.6506, "num_input_tokens_seen": 109130736, "step": 1741 }, { "epoch": 5.793677204658902, "loss": 0.7202203273773193, "loss_ce": 5.447911462397315e-06, "loss_iou": 0.20703125, "loss_num": 0.06103515625, "loss_xval": 0.71875, "num_input_tokens_seen": 109130736, "step": 1741 }, { "epoch": 5.797004991680533, "grad_norm": 27.762441635131836, "learning_rate": 5e-06, "loss": 0.7022, "num_input_tokens_seen": 109194620, "step": 1742 }, { "epoch": 5.797004991680533, "loss": 0.689822793006897, "loss_ce": 3.4941667763632722e-06, "loss_iou": 0.248046875, "loss_num": 0.03857421875, "loss_xval": 0.69140625, "num_input_tokens_seen": 109194620, "step": 1742 }, { "epoch": 5.800332778702163, "grad_norm": 21.479480743408203, "learning_rate": 5e-06, "loss": 0.7576, "num_input_tokens_seen": 109256136, "step": 1743 }, { "epoch": 5.800332778702163, "loss": 0.7364202737808228, "loss_ce": 9.21435421332717e-05, "loss_iou": 0.291015625, "loss_num": 0.0311279296875, "loss_xval": 0.734375, "num_input_tokens_seen": 109256136, "step": 1743 }, { "epoch": 5.803660565723794, "grad_norm": 10.55981159210205, "learning_rate": 5e-06, "loss": 0.5755, "num_input_tokens_seen": 109318796, "step": 1744 }, { "epoch": 5.803660565723794, "loss": 0.4672881066799164, "loss_ce": 0.0008574322564527392, "loss_iou": 0.10986328125, "loss_num": 0.04931640625, "loss_xval": 0.466796875, "num_input_tokens_seen": 109318796, "step": 1744 }, { "epoch": 5.8069883527454245, "grad_norm": 12.515119552612305, "learning_rate": 5e-06, "loss": 0.5652, "num_input_tokens_seen": 109381860, "step": 1745 }, { "epoch": 5.8069883527454245, "loss": 0.3233053684234619, "loss_ce": 2.160295935027534e-06, "loss_iou": 0.10986328125, "loss_num": 0.0206298828125, "loss_xval": 0.32421875, "num_input_tokens_seen": 109381860, "step": 1745 }, { "epoch": 5.810316139767055, "grad_norm": 5.954592227935791, "learning_rate": 5e-06, "loss": 0.326, "num_input_tokens_seen": 109443628, "step": 1746 }, { "epoch": 5.810316139767055, "loss": 0.3413779139518738, "loss_ce": 8.268460987892468e-06, "loss_iou": 0.0849609375, "loss_num": 0.034423828125, "loss_xval": 0.341796875, "num_input_tokens_seen": 109443628, "step": 1746 }, { "epoch": 5.813643926788686, "grad_norm": 13.875561714172363, "learning_rate": 5e-06, "loss": 0.8563, "num_input_tokens_seen": 109508128, "step": 1747 }, { "epoch": 5.813643926788686, "loss": 0.9743266701698303, "loss_ce": 0.0020366478711366653, "loss_iou": 0.33984375, "loss_num": 0.05810546875, "loss_xval": 0.97265625, "num_input_tokens_seen": 109508128, "step": 1747 }, { "epoch": 5.816971713810316, "grad_norm": 123.25865173339844, "learning_rate": 5e-06, "loss": 0.725, "num_input_tokens_seen": 109570428, "step": 1748 }, { "epoch": 5.816971713810316, "loss": 0.7866584658622742, "loss_ce": 0.0005866786232218146, "loss_iou": 0.296875, "loss_num": 0.038330078125, "loss_xval": 0.78515625, "num_input_tokens_seen": 109570428, "step": 1748 }, { "epoch": 5.820299500831947, "grad_norm": 18.198345184326172, "learning_rate": 5e-06, "loss": 0.7065, "num_input_tokens_seen": 109633324, "step": 1749 }, { "epoch": 5.820299500831947, "loss": 0.8565720915794373, "loss_ce": 4.681164682551753e-06, "loss_iou": 0.326171875, "loss_num": 0.04052734375, "loss_xval": 0.85546875, "num_input_tokens_seen": 109633324, "step": 1749 }, { "epoch": 5.8236272878535775, "grad_norm": 6.5098772048950195, "learning_rate": 5e-06, "loss": 0.4992, "num_input_tokens_seen": 109696240, "step": 1750 }, { "epoch": 5.8236272878535775, "eval_seeclick_CIoU": 0.04558872431516647, "eval_seeclick_GIoU": 0.05588574334979057, "eval_seeclick_IoU": 0.15699758380651474, "eval_seeclick_MAE_all": 0.17700505256652832, "eval_seeclick_MAE_h": 0.06537941843271255, "eval_seeclick_MAE_w": 0.14101681485772133, "eval_seeclick_MAE_x_boxes": 0.20457972586154938, "eval_seeclick_MAE_y_boxes": 0.19252178817987442, "eval_seeclick_NUM_probability": 0.9999195039272308, "eval_seeclick_inside_bbox": 0.20937500149011612, "eval_seeclick_loss": 2.903984546661377, "eval_seeclick_loss_ce": 0.12477785721421242, "eval_seeclick_loss_iou": 0.947998046875, "eval_seeclick_loss_num": 0.17411041259765625, "eval_seeclick_loss_xval": 2.76806640625, "eval_seeclick_runtime": 60.2834, "eval_seeclick_samples_per_second": 0.78, "eval_seeclick_steps_per_second": 0.033, "num_input_tokens_seen": 109696240, "step": 1750 }, { "epoch": 5.8236272878535775, "eval_icons_CIoU": -0.06363356672227383, "eval_icons_GIoU": 0.02578481985256076, "eval_icons_IoU": 0.1152363270521164, "eval_icons_MAE_all": 0.17808056622743607, "eval_icons_MAE_h": 0.13277868926525116, "eval_icons_MAE_w": 0.20845502614974976, "eval_icons_MAE_x_boxes": 0.13157816603779793, "eval_icons_MAE_y_boxes": 0.08782356604933739, "eval_icons_NUM_probability": 0.9999783635139465, "eval_icons_inside_bbox": 0.2916666716337204, "eval_icons_loss": 2.7893776893615723, "eval_icons_loss_ce": 3.021459178853547e-06, "eval_icons_loss_iou": 0.967529296875, "eval_icons_loss_num": 0.177490234375, "eval_icons_loss_xval": 2.82080078125, "eval_icons_runtime": 74.2764, "eval_icons_samples_per_second": 0.673, "eval_icons_steps_per_second": 0.027, "num_input_tokens_seen": 109696240, "step": 1750 }, { "epoch": 5.8236272878535775, "eval_screenspot_CIoU": 0.10973412171006203, "eval_screenspot_GIoU": 0.13653232902288437, "eval_screenspot_IoU": 0.24227474629878998, "eval_screenspot_MAE_all": 0.14987651507059732, "eval_screenspot_MAE_h": 0.06570250665148099, "eval_screenspot_MAE_w": 0.15486965080102286, "eval_screenspot_MAE_x_boxes": 0.19311866164207458, "eval_screenspot_MAE_y_boxes": 0.11810305714607239, "eval_screenspot_NUM_probability": 0.9999792377154032, "eval_screenspot_inside_bbox": 0.45000000794728595, "eval_screenspot_loss": 2.5161385536193848, "eval_screenspot_loss_ce": 0.00015203603106783703, "eval_screenspot_loss_iou": 0.8806966145833334, "eval_screenspot_loss_num": 0.15731302897135416, "eval_screenspot_loss_xval": 2.5491536458333335, "eval_screenspot_runtime": 111.664, "eval_screenspot_samples_per_second": 0.797, "eval_screenspot_steps_per_second": 0.027, "num_input_tokens_seen": 109696240, "step": 1750 }, { "epoch": 5.8236272878535775, "eval_compot_CIoU": -0.003148819785565138, "eval_compot_GIoU": 0.05417838133871555, "eval_compot_IoU": 0.1631442978978157, "eval_compot_MAE_all": 0.19288938492536545, "eval_compot_MAE_h": 0.08088488504290581, "eval_compot_MAE_w": 0.2187066450715065, "eval_compot_MAE_x_boxes": 0.1669960431754589, "eval_compot_MAE_y_boxes": 0.16191855818033218, "eval_compot_NUM_probability": 0.9999774992465973, "eval_compot_inside_bbox": 0.3263888955116272, "eval_compot_loss": 2.8791656494140625, "eval_compot_loss_ce": 0.0026283912593498826, "eval_compot_loss_iou": 0.95751953125, "eval_compot_loss_num": 0.2052154541015625, "eval_compot_loss_xval": 2.94189453125, "eval_compot_runtime": 66.0943, "eval_compot_samples_per_second": 0.756, "eval_compot_steps_per_second": 0.03, "num_input_tokens_seen": 109696240, "step": 1750 }, { "epoch": 5.8236272878535775, "eval_custom_ui_MAE_all": 0.07984431087970734, "eval_custom_ui_MAE_x": 0.08612548559904099, "eval_custom_ui_MAE_y": 0.07356313616037369, "eval_custom_ui_NUM_probability": 0.9999949038028717, "eval_custom_ui_loss": 0.382111519575119, "eval_custom_ui_loss_ce": 1.850072749220999e-06, "eval_custom_ui_loss_num": 0.0765228271484375, "eval_custom_ui_loss_xval": 0.38287353515625, "eval_custom_ui_runtime": 50.2288, "eval_custom_ui_samples_per_second": 0.995, "eval_custom_ui_steps_per_second": 0.04, "num_input_tokens_seen": 109696240, "step": 1750 }, { "epoch": 5.8236272878535775, "loss": 0.3747571110725403, "loss_ce": 1.2315579169808188e-06, "loss_iou": 0.0, "loss_num": 0.07470703125, "loss_xval": 0.375, "num_input_tokens_seen": 109696240, "step": 1750 }, { "epoch": 5.826955074875208, "grad_norm": 9.669290542602539, "learning_rate": 5e-06, "loss": 0.5534, "num_input_tokens_seen": 109759952, "step": 1751 }, { "epoch": 5.826955074875208, "loss": 0.5026319026947021, "loss_ce": 7.391112831101054e-06, "loss_iou": 0.1640625, "loss_num": 0.034912109375, "loss_xval": 0.50390625, "num_input_tokens_seen": 109759952, "step": 1751 }, { "epoch": 5.830282861896839, "grad_norm": 11.64921760559082, "learning_rate": 5e-06, "loss": 0.7147, "num_input_tokens_seen": 109822100, "step": 1752 }, { "epoch": 5.830282861896839, "loss": 0.4949053227901459, "loss_ce": 3.227880006306805e-05, "loss_iou": 0.173828125, "loss_num": 0.029541015625, "loss_xval": 0.494140625, "num_input_tokens_seen": 109822100, "step": 1752 }, { "epoch": 5.833610648918469, "grad_norm": 10.443007469177246, "learning_rate": 5e-06, "loss": 0.5797, "num_input_tokens_seen": 109883936, "step": 1753 }, { "epoch": 5.833610648918469, "loss": 0.7714334726333618, "loss_ce": 1.0098984603246208e-05, "loss_iou": 0.2294921875, "loss_num": 0.0625, "loss_xval": 0.76953125, "num_input_tokens_seen": 109883936, "step": 1753 }, { "epoch": 5.8369384359401, "grad_norm": 17.915584564208984, "learning_rate": 5e-06, "loss": 0.5889, "num_input_tokens_seen": 109946000, "step": 1754 }, { "epoch": 5.8369384359401, "loss": 0.36335277557373047, "loss_ce": 0.0008039638050831854, "loss_iou": 0.1376953125, "loss_num": 0.017333984375, "loss_xval": 0.36328125, "num_input_tokens_seen": 109946000, "step": 1754 }, { "epoch": 5.840266222961731, "grad_norm": 12.162701606750488, "learning_rate": 5e-06, "loss": 0.6661, "num_input_tokens_seen": 110008884, "step": 1755 }, { "epoch": 5.840266222961731, "loss": 0.774360179901123, "loss_ce": 0.0006785123841837049, "loss_iou": 0.28125, "loss_num": 0.042236328125, "loss_xval": 0.7734375, "num_input_tokens_seen": 110008884, "step": 1755 }, { "epoch": 5.843594009983361, "grad_norm": 13.172515869140625, "learning_rate": 5e-06, "loss": 0.6538, "num_input_tokens_seen": 110071696, "step": 1756 }, { "epoch": 5.843594009983361, "loss": 0.5380659699440002, "loss_ce": 0.0005293394206091762, "loss_iou": 0.1962890625, "loss_num": 0.029052734375, "loss_xval": 0.5390625, "num_input_tokens_seen": 110071696, "step": 1756 }, { "epoch": 5.846921797004992, "grad_norm": 16.609647750854492, "learning_rate": 5e-06, "loss": 0.6284, "num_input_tokens_seen": 110133908, "step": 1757 }, { "epoch": 5.846921797004992, "loss": 0.43046775460243225, "loss_ce": 0.00029198676929809153, "loss_iou": 0.1513671875, "loss_num": 0.0252685546875, "loss_xval": 0.4296875, "num_input_tokens_seen": 110133908, "step": 1757 }, { "epoch": 5.850249584026622, "grad_norm": 17.745349884033203, "learning_rate": 5e-06, "loss": 0.4801, "num_input_tokens_seen": 110197060, "step": 1758 }, { "epoch": 5.850249584026622, "loss": 0.4054111838340759, "loss_ce": 1.5682009689044207e-05, "loss_iou": 0.173828125, "loss_num": 0.0113525390625, "loss_xval": 0.40625, "num_input_tokens_seen": 110197060, "step": 1758 }, { "epoch": 5.853577371048253, "grad_norm": 30.772167205810547, "learning_rate": 5e-06, "loss": 0.6799, "num_input_tokens_seen": 110260308, "step": 1759 }, { "epoch": 5.853577371048253, "loss": 0.5879861116409302, "loss_ce": 0.0003396476968191564, "loss_iou": 0.177734375, "loss_num": 0.046142578125, "loss_xval": 0.5859375, "num_input_tokens_seen": 110260308, "step": 1759 }, { "epoch": 5.856905158069884, "grad_norm": 19.761573791503906, "learning_rate": 5e-06, "loss": 0.7012, "num_input_tokens_seen": 110322836, "step": 1760 }, { "epoch": 5.856905158069884, "loss": 0.5329855680465698, "loss_ce": 0.0003927953075617552, "loss_iou": 0.1953125, "loss_num": 0.028564453125, "loss_xval": 0.53125, "num_input_tokens_seen": 110322836, "step": 1760 }, { "epoch": 5.860232945091514, "grad_norm": 7.510958671569824, "learning_rate": 5e-06, "loss": 0.5683, "num_input_tokens_seen": 110385456, "step": 1761 }, { "epoch": 5.860232945091514, "loss": 0.7871187329292297, "loss_ce": 0.000894367229193449, "loss_iou": 0.283203125, "loss_num": 0.0439453125, "loss_xval": 0.78515625, "num_input_tokens_seen": 110385456, "step": 1761 }, { "epoch": 5.863560732113145, "grad_norm": 17.104408264160156, "learning_rate": 5e-06, "loss": 0.7455, "num_input_tokens_seen": 110447816, "step": 1762 }, { "epoch": 5.863560732113145, "loss": 0.7688043713569641, "loss_ce": 0.0008599988650530577, "loss_iou": 0.28515625, "loss_num": 0.039794921875, "loss_xval": 0.76953125, "num_input_tokens_seen": 110447816, "step": 1762 }, { "epoch": 5.8668885191347755, "grad_norm": 17.898662567138672, "learning_rate": 5e-06, "loss": 0.604, "num_input_tokens_seen": 110510432, "step": 1763 }, { "epoch": 5.8668885191347755, "loss": 0.6344746351242065, "loss_ce": 1.4242864381230902e-05, "loss_iou": 0.2197265625, "loss_num": 0.0390625, "loss_xval": 0.6328125, "num_input_tokens_seen": 110510432, "step": 1763 }, { "epoch": 5.870216306156406, "grad_norm": 14.683412551879883, "learning_rate": 5e-06, "loss": 0.4767, "num_input_tokens_seen": 110573480, "step": 1764 }, { "epoch": 5.870216306156406, "loss": 0.4930281341075897, "loss_ce": 0.00035233181552030146, "loss_iou": 0.193359375, "loss_num": 0.0213623046875, "loss_xval": 0.4921875, "num_input_tokens_seen": 110573480, "step": 1764 }, { "epoch": 5.873544093178037, "grad_norm": 10.970849990844727, "learning_rate": 5e-06, "loss": 0.6713, "num_input_tokens_seen": 110636184, "step": 1765 }, { "epoch": 5.873544093178037, "loss": 0.6815940737724304, "loss_ce": 1.4488330634776503e-05, "loss_iou": 0.2451171875, "loss_num": 0.038330078125, "loss_xval": 0.6796875, "num_input_tokens_seen": 110636184, "step": 1765 }, { "epoch": 5.876871880199667, "grad_norm": 14.49959945678711, "learning_rate": 5e-06, "loss": 0.5965, "num_input_tokens_seen": 110698788, "step": 1766 }, { "epoch": 5.876871880199667, "loss": 0.6318074464797974, "loss_ce": 9.355986549053341e-05, "loss_iou": 0.265625, "loss_num": 0.0196533203125, "loss_xval": 0.6328125, "num_input_tokens_seen": 110698788, "step": 1766 }, { "epoch": 5.880199667221298, "grad_norm": 9.622454643249512, "learning_rate": 5e-06, "loss": 0.836, "num_input_tokens_seen": 110762872, "step": 1767 }, { "epoch": 5.880199667221298, "loss": 0.9278162121772766, "loss_ce": 0.00032593755167908967, "loss_iou": 0.314453125, "loss_num": 0.0595703125, "loss_xval": 0.92578125, "num_input_tokens_seen": 110762872, "step": 1767 }, { "epoch": 5.883527454242929, "grad_norm": 11.421942710876465, "learning_rate": 5e-06, "loss": 0.4977, "num_input_tokens_seen": 110825212, "step": 1768 }, { "epoch": 5.883527454242929, "loss": 0.5902063846588135, "loss_ce": 0.00011848886788357049, "loss_iou": 0.1943359375, "loss_num": 0.040283203125, "loss_xval": 0.58984375, "num_input_tokens_seen": 110825212, "step": 1768 }, { "epoch": 5.886855241264559, "grad_norm": 17.882009506225586, "learning_rate": 5e-06, "loss": 0.6975, "num_input_tokens_seen": 110889092, "step": 1769 }, { "epoch": 5.886855241264559, "loss": 0.7640447616577148, "loss_ce": 6.69338760417304e-06, "loss_iou": 0.267578125, "loss_num": 0.0458984375, "loss_xval": 0.765625, "num_input_tokens_seen": 110889092, "step": 1769 }, { "epoch": 5.89018302828619, "grad_norm": 12.932168960571289, "learning_rate": 5e-06, "loss": 0.5011, "num_input_tokens_seen": 110951528, "step": 1770 }, { "epoch": 5.89018302828619, "loss": 0.3435995578765869, "loss_ce": 3.2694166293367743e-05, "loss_iou": 0.09521484375, "loss_num": 0.0306396484375, "loss_xval": 0.34375, "num_input_tokens_seen": 110951528, "step": 1770 }, { "epoch": 5.89351081530782, "grad_norm": 21.379364013671875, "learning_rate": 5e-06, "loss": 0.6458, "num_input_tokens_seen": 111015084, "step": 1771 }, { "epoch": 5.89351081530782, "loss": 0.5193750858306885, "loss_ce": 2.696138108149171e-05, "loss_iou": 0.1630859375, "loss_num": 0.03857421875, "loss_xval": 0.51953125, "num_input_tokens_seen": 111015084, "step": 1771 }, { "epoch": 5.896838602329451, "grad_norm": 29.157508850097656, "learning_rate": 5e-06, "loss": 0.5928, "num_input_tokens_seen": 111078152, "step": 1772 }, { "epoch": 5.896838602329451, "loss": 0.560308039188385, "loss_ce": 0.00021890524658374488, "loss_iou": 0.171875, "loss_num": 0.043701171875, "loss_xval": 0.55859375, "num_input_tokens_seen": 111078152, "step": 1772 }, { "epoch": 5.900166389351082, "grad_norm": 27.28708267211914, "learning_rate": 5e-06, "loss": 0.7852, "num_input_tokens_seen": 111141916, "step": 1773 }, { "epoch": 5.900166389351082, "loss": 0.614811897277832, "loss_ce": 6.58001663396135e-05, "loss_iou": 0.2373046875, "loss_num": 0.02783203125, "loss_xval": 0.61328125, "num_input_tokens_seen": 111141916, "step": 1773 }, { "epoch": 5.903494176372712, "grad_norm": 7.881934642791748, "learning_rate": 5e-06, "loss": 0.8055, "num_input_tokens_seen": 111206120, "step": 1774 }, { "epoch": 5.903494176372712, "loss": 0.7207139730453491, "loss_ce": 0.0002549611672293395, "loss_iou": 0.259765625, "loss_num": 0.0400390625, "loss_xval": 0.71875, "num_input_tokens_seen": 111206120, "step": 1774 }, { "epoch": 5.906821963394343, "grad_norm": 16.97408676147461, "learning_rate": 5e-06, "loss": 0.6079, "num_input_tokens_seen": 111269428, "step": 1775 }, { "epoch": 5.906821963394343, "loss": 0.6962382793426514, "loss_ce": 0.0005900564137846231, "loss_iou": 0.26953125, "loss_num": 0.031494140625, "loss_xval": 0.6953125, "num_input_tokens_seen": 111269428, "step": 1775 }, { "epoch": 5.9101497504159735, "grad_norm": 22.03546142578125, "learning_rate": 5e-06, "loss": 0.755, "num_input_tokens_seen": 111332632, "step": 1776 }, { "epoch": 5.9101497504159735, "loss": 0.6281803250312805, "loss_ce": 6.494733497675043e-06, "loss_iou": 0.2373046875, "loss_num": 0.0306396484375, "loss_xval": 0.62890625, "num_input_tokens_seen": 111332632, "step": 1776 }, { "epoch": 5.913477537437604, "grad_norm": 12.667680740356445, "learning_rate": 5e-06, "loss": 0.4636, "num_input_tokens_seen": 111394840, "step": 1777 }, { "epoch": 5.913477537437604, "loss": 0.6527183651924133, "loss_ce": 8.42046119942097e-06, "loss_iou": 0.236328125, "loss_num": 0.035888671875, "loss_xval": 0.65234375, "num_input_tokens_seen": 111394840, "step": 1777 }, { "epoch": 5.916805324459235, "grad_norm": 13.780396461486816, "learning_rate": 5e-06, "loss": 0.8361, "num_input_tokens_seen": 111458408, "step": 1778 }, { "epoch": 5.916805324459235, "loss": 0.9675428867340088, "loss_ce": 1.3609411325887777e-05, "loss_iou": 0.369140625, "loss_num": 0.045654296875, "loss_xval": 0.96875, "num_input_tokens_seen": 111458408, "step": 1778 }, { "epoch": 5.920133111480865, "grad_norm": 14.493585586547852, "learning_rate": 5e-06, "loss": 1.0169, "num_input_tokens_seen": 111520932, "step": 1779 }, { "epoch": 5.920133111480865, "loss": 0.8924142718315125, "loss_ce": 0.00032439938513562083, "loss_iou": 0.33984375, "loss_num": 0.04248046875, "loss_xval": 0.890625, "num_input_tokens_seen": 111520932, "step": 1779 }, { "epoch": 5.923460898502496, "grad_norm": 9.426305770874023, "learning_rate": 5e-06, "loss": 0.7083, "num_input_tokens_seen": 111584548, "step": 1780 }, { "epoch": 5.923460898502496, "loss": 0.5540918111801147, "loss_ce": 1.4637488675361965e-05, "loss_iou": 0.1982421875, "loss_num": 0.03125, "loss_xval": 0.5546875, "num_input_tokens_seen": 111584548, "step": 1780 }, { "epoch": 5.9267886855241265, "grad_norm": 16.58771324157715, "learning_rate": 5e-06, "loss": 0.5497, "num_input_tokens_seen": 111647528, "step": 1781 }, { "epoch": 5.9267886855241265, "loss": 0.5545692443847656, "loss_ce": 3.764728944588569e-06, "loss_iou": 0.1923828125, "loss_num": 0.0341796875, "loss_xval": 0.5546875, "num_input_tokens_seen": 111647528, "step": 1781 }, { "epoch": 5.930116472545757, "grad_norm": 19.532678604125977, "learning_rate": 5e-06, "loss": 0.6149, "num_input_tokens_seen": 111709116, "step": 1782 }, { "epoch": 5.930116472545757, "loss": 0.6148645877838135, "loss_ce": 0.00036261696368455887, "loss_iou": 0.1865234375, "loss_num": 0.04833984375, "loss_xval": 0.61328125, "num_input_tokens_seen": 111709116, "step": 1782 }, { "epoch": 5.933444259567388, "grad_norm": 10.731025695800781, "learning_rate": 5e-06, "loss": 0.638, "num_input_tokens_seen": 111772360, "step": 1783 }, { "epoch": 5.933444259567388, "loss": 0.6903102397918701, "loss_ce": 2.6692309802456293e-06, "loss_iou": 0.291015625, "loss_num": 0.021484375, "loss_xval": 0.69140625, "num_input_tokens_seen": 111772360, "step": 1783 }, { "epoch": 5.936772046589018, "grad_norm": 7.659904479980469, "learning_rate": 5e-06, "loss": 0.6265, "num_input_tokens_seen": 111834520, "step": 1784 }, { "epoch": 5.936772046589018, "loss": 0.660835862159729, "loss_ce": 0.000923778279684484, "loss_iou": 0.2080078125, "loss_num": 0.048828125, "loss_xval": 0.66015625, "num_input_tokens_seen": 111834520, "step": 1784 }, { "epoch": 5.940099833610649, "grad_norm": 10.833303451538086, "learning_rate": 5e-06, "loss": 0.6411, "num_input_tokens_seen": 111897812, "step": 1785 }, { "epoch": 5.940099833610649, "loss": 0.48204123973846436, "loss_ce": 0.0015724744880571961, "loss_iou": 0.1552734375, "loss_num": 0.03369140625, "loss_xval": 0.48046875, "num_input_tokens_seen": 111897812, "step": 1785 }, { "epoch": 5.94342762063228, "grad_norm": 13.161028861999512, "learning_rate": 5e-06, "loss": 0.739, "num_input_tokens_seen": 111960736, "step": 1786 }, { "epoch": 5.94342762063228, "loss": 0.6385595798492432, "loss_ce": 9.793347089726012e-06, "loss_iou": 0.259765625, "loss_num": 0.0240478515625, "loss_xval": 0.63671875, "num_input_tokens_seen": 111960736, "step": 1786 }, { "epoch": 5.94675540765391, "grad_norm": 17.966291427612305, "learning_rate": 5e-06, "loss": 0.6649, "num_input_tokens_seen": 112023060, "step": 1787 }, { "epoch": 5.94675540765391, "loss": 0.559450626373291, "loss_ce": 2.389829205640126e-06, "loss_iou": 0.17578125, "loss_num": 0.041259765625, "loss_xval": 0.55859375, "num_input_tokens_seen": 112023060, "step": 1787 }, { "epoch": 5.950083194675541, "grad_norm": 13.08378791809082, "learning_rate": 5e-06, "loss": 0.6958, "num_input_tokens_seen": 112086652, "step": 1788 }, { "epoch": 5.950083194675541, "loss": 0.5189230442047119, "loss_ce": 2.1638900307152653e-06, "loss_iou": 0.169921875, "loss_num": 0.03564453125, "loss_xval": 0.51953125, "num_input_tokens_seen": 112086652, "step": 1788 }, { "epoch": 5.953410981697171, "grad_norm": 7.4864935874938965, "learning_rate": 5e-06, "loss": 0.64, "num_input_tokens_seen": 112149356, "step": 1789 }, { "epoch": 5.953410981697171, "loss": 0.735355019569397, "loss_ce": 3.5002217373403255e-06, "loss_iou": 0.2490234375, "loss_num": 0.047607421875, "loss_xval": 0.734375, "num_input_tokens_seen": 112149356, "step": 1789 }, { "epoch": 5.956738768718802, "grad_norm": 15.17789363861084, "learning_rate": 5e-06, "loss": 0.6652, "num_input_tokens_seen": 112213516, "step": 1790 }, { "epoch": 5.956738768718802, "loss": 0.46006709337234497, "loss_ce": 0.00083856878336519, "loss_iou": 0.185546875, "loss_num": 0.017822265625, "loss_xval": 0.458984375, "num_input_tokens_seen": 112213516, "step": 1790 }, { "epoch": 5.960066555740433, "grad_norm": 15.437460899353027, "learning_rate": 5e-06, "loss": 0.6772, "num_input_tokens_seen": 112276920, "step": 1791 }, { "epoch": 5.960066555740433, "loss": 0.7749212980270386, "loss_ce": 1.895882269309368e-05, "loss_iou": 0.30078125, "loss_num": 0.03466796875, "loss_xval": 0.7734375, "num_input_tokens_seen": 112276920, "step": 1791 }, { "epoch": 5.963394342762063, "grad_norm": 19.293550491333008, "learning_rate": 5e-06, "loss": 0.645, "num_input_tokens_seen": 112340056, "step": 1792 }, { "epoch": 5.963394342762063, "loss": 0.5155013203620911, "loss_ce": 5.9425779909361154e-05, "loss_iou": 0.169921875, "loss_num": 0.03515625, "loss_xval": 0.515625, "num_input_tokens_seen": 112340056, "step": 1792 }, { "epoch": 5.966722129783694, "grad_norm": 163.175048828125, "learning_rate": 5e-06, "loss": 0.5563, "num_input_tokens_seen": 112403884, "step": 1793 }, { "epoch": 5.966722129783694, "loss": 0.4368853271007538, "loss_ce": 0.00023983999562915415, "loss_iou": 0.1640625, "loss_num": 0.0216064453125, "loss_xval": 0.4375, "num_input_tokens_seen": 112403884, "step": 1793 }, { "epoch": 5.9700499168053245, "grad_norm": 5.786351203918457, "learning_rate": 5e-06, "loss": 0.5125, "num_input_tokens_seen": 112466060, "step": 1794 }, { "epoch": 5.9700499168053245, "loss": 0.5404790639877319, "loss_ce": 1.2786626029992476e-05, "loss_iou": 0.1826171875, "loss_num": 0.034912109375, "loss_xval": 0.5390625, "num_input_tokens_seen": 112466060, "step": 1794 }, { "epoch": 5.973377703826955, "grad_norm": 9.815216064453125, "learning_rate": 5e-06, "loss": 0.5406, "num_input_tokens_seen": 112529052, "step": 1795 }, { "epoch": 5.973377703826955, "loss": 0.519108235836029, "loss_ce": 0.0007976829074323177, "loss_iou": 0.193359375, "loss_num": 0.0264892578125, "loss_xval": 0.51953125, "num_input_tokens_seen": 112529052, "step": 1795 }, { "epoch": 5.976705490848586, "grad_norm": 8.976653099060059, "learning_rate": 5e-06, "loss": 0.5399, "num_input_tokens_seen": 112590984, "step": 1796 }, { "epoch": 5.976705490848586, "loss": 0.3640413284301758, "loss_ce": 2.7628357202047482e-05, "loss_iou": 0.07666015625, "loss_num": 0.042236328125, "loss_xval": 0.36328125, "num_input_tokens_seen": 112590984, "step": 1796 }, { "epoch": 5.980033277870216, "grad_norm": 7.841921329498291, "learning_rate": 5e-06, "loss": 0.3822, "num_input_tokens_seen": 112651508, "step": 1797 }, { "epoch": 5.980033277870216, "loss": 0.4589163661003113, "loss_ce": 0.0007865179213695228, "loss_iou": 0.130859375, "loss_num": 0.039306640625, "loss_xval": 0.458984375, "num_input_tokens_seen": 112651508, "step": 1797 }, { "epoch": 5.983361064891847, "grad_norm": 11.313789367675781, "learning_rate": 5e-06, "loss": 0.4139, "num_input_tokens_seen": 112713492, "step": 1798 }, { "epoch": 5.983361064891847, "loss": 0.4683811664581299, "loss_ce": 0.0003635825705714524, "loss_iou": 0.150390625, "loss_num": 0.033447265625, "loss_xval": 0.46875, "num_input_tokens_seen": 112713492, "step": 1798 }, { "epoch": 5.9866888519134775, "grad_norm": 19.052278518676758, "learning_rate": 5e-06, "loss": 0.5904, "num_input_tokens_seen": 112776436, "step": 1799 }, { "epoch": 5.9866888519134775, "loss": 0.5506741404533386, "loss_ce": 1.4955488040868659e-05, "loss_iou": 0.20703125, "loss_num": 0.0274658203125, "loss_xval": 0.55078125, "num_input_tokens_seen": 112776436, "step": 1799 }, { "epoch": 5.990016638935108, "grad_norm": 11.906286239624023, "learning_rate": 5e-06, "loss": 0.834, "num_input_tokens_seen": 112839184, "step": 1800 }, { "epoch": 5.990016638935108, "loss": 0.9059774875640869, "loss_ce": 0.0002158021816285327, "loss_iou": 0.34765625, "loss_num": 0.04248046875, "loss_xval": 0.90625, "num_input_tokens_seen": 112839184, "step": 1800 }, { "epoch": 5.993344425956739, "grad_norm": 11.046154975891113, "learning_rate": 5e-06, "loss": 0.6103, "num_input_tokens_seen": 112902480, "step": 1801 }, { "epoch": 5.993344425956739, "loss": 0.6410876512527466, "loss_ce": 0.000767821678891778, "loss_iou": 0.265625, "loss_num": 0.021728515625, "loss_xval": 0.640625, "num_input_tokens_seen": 112902480, "step": 1801 }, { "epoch": 5.996672212978369, "grad_norm": 8.254463195800781, "learning_rate": 5e-06, "loss": 0.5668, "num_input_tokens_seen": 112965684, "step": 1802 }, { "epoch": 5.996672212978369, "loss": 0.5886285901069641, "loss_ce": 5.582224730460439e-06, "loss_iou": 0.2099609375, "loss_num": 0.03369140625, "loss_xval": 0.58984375, "num_input_tokens_seen": 112965684, "step": 1802 }, { "epoch": 6.0, "grad_norm": 17.944581985473633, "learning_rate": 5e-06, "loss": 0.5016, "num_input_tokens_seen": 113027952, "step": 1803 }, { "epoch": 6.0, "loss": 0.541974663734436, "loss_ce": 0.001325235585682094, "loss_iou": 0.1708984375, "loss_num": 0.0400390625, "loss_xval": 0.5390625, "num_input_tokens_seen": 113027952, "step": 1803 }, { "epoch": 6.003327787021631, "grad_norm": 9.674674987792969, "learning_rate": 5e-06, "loss": 0.6921, "num_input_tokens_seen": 113091996, "step": 1804 }, { "epoch": 6.003327787021631, "loss": 0.6527393460273743, "loss_ce": 2.937300450867042e-05, "loss_iou": 0.2578125, "loss_num": 0.027587890625, "loss_xval": 0.65234375, "num_input_tokens_seen": 113091996, "step": 1804 }, { "epoch": 6.006655574043261, "grad_norm": 9.173537254333496, "learning_rate": 5e-06, "loss": 0.4881, "num_input_tokens_seen": 113154104, "step": 1805 }, { "epoch": 6.006655574043261, "loss": 0.6374631524085999, "loss_ce": 1.1972469110332895e-05, "loss_iou": 0.20703125, "loss_num": 0.044921875, "loss_xval": 0.63671875, "num_input_tokens_seen": 113154104, "step": 1805 }, { "epoch": 6.009983361064892, "grad_norm": 8.455547332763672, "learning_rate": 5e-06, "loss": 0.6016, "num_input_tokens_seen": 113217076, "step": 1806 }, { "epoch": 6.009983361064892, "loss": 0.7122929096221924, "loss_ce": 1.2594562576850876e-05, "loss_iou": 0.267578125, "loss_num": 0.035400390625, "loss_xval": 0.7109375, "num_input_tokens_seen": 113217076, "step": 1806 }, { "epoch": 6.0133111480865225, "grad_norm": 9.247243881225586, "learning_rate": 5e-06, "loss": 0.648, "num_input_tokens_seen": 113278848, "step": 1807 }, { "epoch": 6.0133111480865225, "loss": 0.74269700050354, "loss_ce": 2.1219018890406005e-05, "loss_iou": 0.28125, "loss_num": 0.035888671875, "loss_xval": 0.7421875, "num_input_tokens_seen": 113278848, "step": 1807 }, { "epoch": 6.016638935108153, "grad_norm": 27.487987518310547, "learning_rate": 5e-06, "loss": 0.585, "num_input_tokens_seen": 113342420, "step": 1808 }, { "epoch": 6.016638935108153, "loss": 0.6871448159217834, "loss_ce": 0.0009876075200736523, "loss_iou": 0.25, "loss_num": 0.037353515625, "loss_xval": 0.6875, "num_input_tokens_seen": 113342420, "step": 1808 }, { "epoch": 6.019966722129784, "grad_norm": 19.232105255126953, "learning_rate": 5e-06, "loss": 0.4805, "num_input_tokens_seen": 113404988, "step": 1809 }, { "epoch": 6.019966722129784, "loss": 0.5036937594413757, "loss_ce": 3.1642361136619e-05, "loss_iou": 0.1767578125, "loss_num": 0.0299072265625, "loss_xval": 0.50390625, "num_input_tokens_seen": 113404988, "step": 1809 }, { "epoch": 6.023294509151414, "grad_norm": 12.246728897094727, "learning_rate": 5e-06, "loss": 0.3926, "num_input_tokens_seen": 113465256, "step": 1810 }, { "epoch": 6.023294509151414, "loss": 0.46057966351509094, "loss_ce": 8.364679160877131e-06, "loss_iou": 0.1435546875, "loss_num": 0.034912109375, "loss_xval": 0.4609375, "num_input_tokens_seen": 113465256, "step": 1810 }, { "epoch": 6.026622296173045, "grad_norm": 13.189004898071289, "learning_rate": 5e-06, "loss": 0.5578, "num_input_tokens_seen": 113529688, "step": 1811 }, { "epoch": 6.026622296173045, "loss": 0.29475241899490356, "loss_ce": 0.0006850441568531096, "loss_iou": 0.09326171875, "loss_num": 0.021484375, "loss_xval": 0.294921875, "num_input_tokens_seen": 113529688, "step": 1811 }, { "epoch": 6.0299500831946755, "grad_norm": 13.761101722717285, "learning_rate": 5e-06, "loss": 0.4934, "num_input_tokens_seen": 113591872, "step": 1812 }, { "epoch": 6.0299500831946755, "loss": 0.6085264086723328, "loss_ce": 5.928580321779009e-06, "loss_iou": 0.236328125, "loss_num": 0.0274658203125, "loss_xval": 0.609375, "num_input_tokens_seen": 113591872, "step": 1812 }, { "epoch": 6.033277870216306, "grad_norm": 20.645238876342773, "learning_rate": 5e-06, "loss": 0.6173, "num_input_tokens_seen": 113655208, "step": 1813 }, { "epoch": 6.033277870216306, "loss": 0.3555956482887268, "loss_ce": 4.832388185604941e-06, "loss_iou": 0.1376953125, "loss_num": 0.01611328125, "loss_xval": 0.35546875, "num_input_tokens_seen": 113655208, "step": 1813 }, { "epoch": 6.036605657237937, "grad_norm": 22.53754997253418, "learning_rate": 5e-06, "loss": 0.5212, "num_input_tokens_seen": 113716792, "step": 1814 }, { "epoch": 6.036605657237937, "loss": 0.5042844414710999, "loss_ce": 1.197168148792116e-05, "loss_iou": 0.18359375, "loss_num": 0.0274658203125, "loss_xval": 0.50390625, "num_input_tokens_seen": 113716792, "step": 1814 }, { "epoch": 6.039933444259567, "grad_norm": 9.635963439941406, "learning_rate": 5e-06, "loss": 0.5187, "num_input_tokens_seen": 113779264, "step": 1815 }, { "epoch": 6.039933444259567, "loss": 0.5786181092262268, "loss_ce": 4.849131073569879e-06, "loss_iou": 0.236328125, "loss_num": 0.021240234375, "loss_xval": 0.578125, "num_input_tokens_seen": 113779264, "step": 1815 }, { "epoch": 6.043261231281198, "grad_norm": 10.88184928894043, "learning_rate": 5e-06, "loss": 0.5676, "num_input_tokens_seen": 113842512, "step": 1816 }, { "epoch": 6.043261231281198, "loss": 0.554840087890625, "loss_ce": 3.054763510590419e-05, "loss_iou": 0.19921875, "loss_num": 0.031494140625, "loss_xval": 0.5546875, "num_input_tokens_seen": 113842512, "step": 1816 }, { "epoch": 6.046589018302829, "grad_norm": 11.561516761779785, "learning_rate": 5e-06, "loss": 0.6489, "num_input_tokens_seen": 113905996, "step": 1817 }, { "epoch": 6.046589018302829, "loss": 0.714735209941864, "loss_ce": 0.000990099273622036, "loss_iou": 0.291015625, "loss_num": 0.026123046875, "loss_xval": 0.71484375, "num_input_tokens_seen": 113905996, "step": 1817 }, { "epoch": 6.049916805324459, "grad_norm": 12.262077331542969, "learning_rate": 5e-06, "loss": 0.534, "num_input_tokens_seen": 113968400, "step": 1818 }, { "epoch": 6.049916805324459, "loss": 0.3480096459388733, "loss_ce": 0.00047548062866553664, "loss_iou": 0.091796875, "loss_num": 0.032958984375, "loss_xval": 0.34765625, "num_input_tokens_seen": 113968400, "step": 1818 }, { "epoch": 6.05324459234609, "grad_norm": 31.599517822265625, "learning_rate": 5e-06, "loss": 0.7502, "num_input_tokens_seen": 114031864, "step": 1819 }, { "epoch": 6.05324459234609, "loss": 0.8395349979400635, "loss_ce": 5.742616122006439e-05, "loss_iou": 0.302734375, "loss_num": 0.047119140625, "loss_xval": 0.83984375, "num_input_tokens_seen": 114031864, "step": 1819 }, { "epoch": 6.05657237936772, "grad_norm": 22.028301239013672, "learning_rate": 5e-06, "loss": 0.4261, "num_input_tokens_seen": 114094760, "step": 1820 }, { "epoch": 6.05657237936772, "loss": 0.3527792692184448, "loss_ce": 0.0005453916382975876, "loss_iou": 0.1376953125, "loss_num": 0.0155029296875, "loss_xval": 0.3515625, "num_input_tokens_seen": 114094760, "step": 1820 }, { "epoch": 6.059900166389351, "grad_norm": 10.08108901977539, "learning_rate": 5e-06, "loss": 0.7445, "num_input_tokens_seen": 114157800, "step": 1821 }, { "epoch": 6.059900166389351, "loss": 0.47491908073425293, "loss_ce": 0.0001876326132332906, "loss_iou": 0.1396484375, "loss_num": 0.0390625, "loss_xval": 0.474609375, "num_input_tokens_seen": 114157800, "step": 1821 }, { "epoch": 6.063227953410982, "grad_norm": 13.524107933044434, "learning_rate": 5e-06, "loss": 0.7475, "num_input_tokens_seen": 114219300, "step": 1822 }, { "epoch": 6.063227953410982, "loss": 0.8414603471755981, "loss_ce": 0.0005179790314286947, "loss_iou": 0.318359375, "loss_num": 0.040771484375, "loss_xval": 0.83984375, "num_input_tokens_seen": 114219300, "step": 1822 }, { "epoch": 6.066555740432612, "grad_norm": 36.742916107177734, "learning_rate": 5e-06, "loss": 0.5156, "num_input_tokens_seen": 114282356, "step": 1823 }, { "epoch": 6.066555740432612, "loss": 0.6236370801925659, "loss_ce": 0.0003460935549810529, "loss_iou": 0.251953125, "loss_num": 0.0240478515625, "loss_xval": 0.625, "num_input_tokens_seen": 114282356, "step": 1823 }, { "epoch": 6.069883527454243, "grad_norm": 23.24175262451172, "learning_rate": 5e-06, "loss": 0.4682, "num_input_tokens_seen": 114343184, "step": 1824 }, { "epoch": 6.069883527454243, "loss": 0.6115790605545044, "loss_ce": 6.832242434029467e-06, "loss_iou": 0.2265625, "loss_num": 0.031982421875, "loss_xval": 0.61328125, "num_input_tokens_seen": 114343184, "step": 1824 }, { "epoch": 6.0732113144758735, "grad_norm": 3.49735164642334, "learning_rate": 5e-06, "loss": 0.5489, "num_input_tokens_seen": 114402052, "step": 1825 }, { "epoch": 6.0732113144758735, "loss": 0.5846163034439087, "loss_ce": 0.0003877862764056772, "loss_iou": 0.185546875, "loss_num": 0.04248046875, "loss_xval": 0.5859375, "num_input_tokens_seen": 114402052, "step": 1825 }, { "epoch": 6.076539101497504, "grad_norm": 14.314276695251465, "learning_rate": 5e-06, "loss": 0.6319, "num_input_tokens_seen": 114465592, "step": 1826 }, { "epoch": 6.076539101497504, "loss": 0.6413317322731018, "loss_ce": 0.0001573978952364996, "loss_iou": 0.2158203125, "loss_num": 0.041748046875, "loss_xval": 0.640625, "num_input_tokens_seen": 114465592, "step": 1826 }, { "epoch": 6.079866888519135, "grad_norm": 14.094009399414062, "learning_rate": 5e-06, "loss": 0.8465, "num_input_tokens_seen": 114529128, "step": 1827 }, { "epoch": 6.079866888519135, "loss": 1.044364333152771, "loss_ce": 0.00041907295235432684, "loss_iou": 0.40625, "loss_num": 0.046142578125, "loss_xval": 1.046875, "num_input_tokens_seen": 114529128, "step": 1827 }, { "epoch": 6.083194675540765, "grad_norm": 24.5706729888916, "learning_rate": 5e-06, "loss": 0.545, "num_input_tokens_seen": 114591280, "step": 1828 }, { "epoch": 6.083194675540765, "loss": 0.23401130735874176, "loss_ce": 2.510785179765662e-06, "loss_iou": 0.048828125, "loss_num": 0.02734375, "loss_xval": 0.234375, "num_input_tokens_seen": 114591280, "step": 1828 }, { "epoch": 6.086522462562396, "grad_norm": 12.082476615905762, "learning_rate": 5e-06, "loss": 0.6638, "num_input_tokens_seen": 114653484, "step": 1829 }, { "epoch": 6.086522462562396, "loss": 0.6505266427993774, "loss_ce": 1.3973678505863063e-05, "loss_iou": 0.185546875, "loss_num": 0.0556640625, "loss_xval": 0.65234375, "num_input_tokens_seen": 114653484, "step": 1829 }, { "epoch": 6.0898502495840265, "grad_norm": 8.548013687133789, "learning_rate": 5e-06, "loss": 0.6343, "num_input_tokens_seen": 114716464, "step": 1830 }, { "epoch": 6.0898502495840265, "loss": 0.7071037292480469, "loss_ce": 0.0006828161422163248, "loss_iou": 0.244140625, "loss_num": 0.04345703125, "loss_xval": 0.70703125, "num_input_tokens_seen": 114716464, "step": 1830 }, { "epoch": 6.093178036605657, "grad_norm": 16.813053131103516, "learning_rate": 5e-06, "loss": 0.7246, "num_input_tokens_seen": 114780024, "step": 1831 }, { "epoch": 6.093178036605657, "loss": 0.812924861907959, "loss_ce": 0.00018073590763378888, "loss_iou": 0.31640625, "loss_num": 0.035888671875, "loss_xval": 0.8125, "num_input_tokens_seen": 114780024, "step": 1831 }, { "epoch": 6.096505823627288, "grad_norm": 8.985211372375488, "learning_rate": 5e-06, "loss": 0.3769, "num_input_tokens_seen": 114841372, "step": 1832 }, { "epoch": 6.096505823627288, "loss": 0.4680231213569641, "loss_ce": 5.536644493986387e-06, "loss_iou": 0.1904296875, "loss_num": 0.0177001953125, "loss_xval": 0.46875, "num_input_tokens_seen": 114841372, "step": 1832 }, { "epoch": 6.099833610648918, "grad_norm": 8.535601615905762, "learning_rate": 5e-06, "loss": 0.7946, "num_input_tokens_seen": 114904420, "step": 1833 }, { "epoch": 6.099833610648918, "loss": 1.0373455286026, "loss_ce": 0.00023611943470314145, "loss_iou": 0.37890625, "loss_num": 0.055908203125, "loss_xval": 1.0390625, "num_input_tokens_seen": 114904420, "step": 1833 }, { "epoch": 6.103161397670549, "grad_norm": 93.50047302246094, "learning_rate": 5e-06, "loss": 0.6555, "num_input_tokens_seen": 114967060, "step": 1834 }, { "epoch": 6.103161397670549, "loss": 0.5249112844467163, "loss_ce": 0.000771891325712204, "loss_iou": 0.1318359375, "loss_num": 0.052001953125, "loss_xval": 0.5234375, "num_input_tokens_seen": 114967060, "step": 1834 }, { "epoch": 6.10648918469218, "grad_norm": 14.458890914916992, "learning_rate": 5e-06, "loss": 0.8666, "num_input_tokens_seen": 115029480, "step": 1835 }, { "epoch": 6.10648918469218, "loss": 0.7600847482681274, "loss_ce": 0.0003191790310665965, "loss_iou": 0.26953125, "loss_num": 0.044189453125, "loss_xval": 0.7578125, "num_input_tokens_seen": 115029480, "step": 1835 }, { "epoch": 6.10981697171381, "grad_norm": 27.348670959472656, "learning_rate": 5e-06, "loss": 0.7703, "num_input_tokens_seen": 115091824, "step": 1836 }, { "epoch": 6.10981697171381, "loss": 0.897258996963501, "loss_ce": 0.0002863441768568009, "loss_iou": 0.341796875, "loss_num": 0.042724609375, "loss_xval": 0.8984375, "num_input_tokens_seen": 115091824, "step": 1836 }, { "epoch": 6.113144758735441, "grad_norm": 35.71364212036133, "learning_rate": 5e-06, "loss": 0.7033, "num_input_tokens_seen": 115155864, "step": 1837 }, { "epoch": 6.113144758735441, "loss": 0.6706986427307129, "loss_ce": 0.0002884720452129841, "loss_iou": 0.28125, "loss_num": 0.0213623046875, "loss_xval": 0.671875, "num_input_tokens_seen": 115155864, "step": 1837 }, { "epoch": 6.116472545757071, "grad_norm": 40.142601013183594, "learning_rate": 5e-06, "loss": 0.692, "num_input_tokens_seen": 115218072, "step": 1838 }, { "epoch": 6.116472545757071, "loss": 0.5009810328483582, "loss_ce": 4.504245680436725e-06, "loss_iou": 0.177734375, "loss_num": 0.029296875, "loss_xval": 0.5, "num_input_tokens_seen": 115218072, "step": 1838 }, { "epoch": 6.119800332778702, "grad_norm": 63.8580207824707, "learning_rate": 5e-06, "loss": 0.8776, "num_input_tokens_seen": 115281140, "step": 1839 }, { "epoch": 6.119800332778702, "loss": 1.3138946294784546, "loss_ce": 0.00041809308459050953, "loss_iou": 0.5, "loss_num": 0.0625, "loss_xval": 1.3125, "num_input_tokens_seen": 115281140, "step": 1839 }, { "epoch": 6.123128119800333, "grad_norm": 38.08137893676758, "learning_rate": 5e-06, "loss": 0.8016, "num_input_tokens_seen": 115345264, "step": 1840 }, { "epoch": 6.123128119800333, "loss": 0.7194625735282898, "loss_ce": 0.0014449949376285076, "loss_iou": 0.28125, "loss_num": 0.031494140625, "loss_xval": 0.71875, "num_input_tokens_seen": 115345264, "step": 1840 }, { "epoch": 6.126455906821963, "grad_norm": 16.620935440063477, "learning_rate": 5e-06, "loss": 0.5626, "num_input_tokens_seen": 115408084, "step": 1841 }, { "epoch": 6.126455906821963, "loss": 0.4897364377975464, "loss_ce": 0.0002344690728932619, "loss_iou": 0.166015625, "loss_num": 0.03173828125, "loss_xval": 0.490234375, "num_input_tokens_seen": 115408084, "step": 1841 }, { "epoch": 6.129783693843594, "grad_norm": 15.078736305236816, "learning_rate": 5e-06, "loss": 0.426, "num_input_tokens_seen": 115468608, "step": 1842 }, { "epoch": 6.129783693843594, "loss": 0.41058778762817383, "loss_ce": 4.296011411497602e-06, "loss_iou": 0.11669921875, "loss_num": 0.035400390625, "loss_xval": 0.41015625, "num_input_tokens_seen": 115468608, "step": 1842 }, { "epoch": 6.1331114808652245, "grad_norm": 13.707100868225098, "learning_rate": 5e-06, "loss": 0.4833, "num_input_tokens_seen": 115531128, "step": 1843 }, { "epoch": 6.1331114808652245, "loss": 0.32538461685180664, "loss_ce": 6.226244295248762e-06, "loss_iou": 0.11279296875, "loss_num": 0.0198974609375, "loss_xval": 0.326171875, "num_input_tokens_seen": 115531128, "step": 1843 }, { "epoch": 6.136439267886855, "grad_norm": 9.169779777526855, "learning_rate": 5e-06, "loss": 0.4214, "num_input_tokens_seen": 115594968, "step": 1844 }, { "epoch": 6.136439267886855, "loss": 0.44348764419555664, "loss_ce": 6.204313649504911e-06, "loss_iou": 0.1572265625, "loss_num": 0.025634765625, "loss_xval": 0.443359375, "num_input_tokens_seen": 115594968, "step": 1844 }, { "epoch": 6.139767054908486, "grad_norm": 13.466840744018555, "learning_rate": 5e-06, "loss": 0.5918, "num_input_tokens_seen": 115658540, "step": 1845 }, { "epoch": 6.139767054908486, "loss": 0.4453752040863037, "loss_ce": 1.6507557347722468e-06, "loss_iou": 0.1640625, "loss_num": 0.0234375, "loss_xval": 0.4453125, "num_input_tokens_seen": 115658540, "step": 1845 }, { "epoch": 6.143094841930116, "grad_norm": 21.461875915527344, "learning_rate": 5e-06, "loss": 0.6385, "num_input_tokens_seen": 115720368, "step": 1846 }, { "epoch": 6.143094841930116, "loss": 0.46438270807266235, "loss_ce": 0.0002713671128731221, "loss_iou": 0.1845703125, "loss_num": 0.0191650390625, "loss_xval": 0.46484375, "num_input_tokens_seen": 115720368, "step": 1846 }, { "epoch": 6.146422628951747, "grad_norm": 13.239744186401367, "learning_rate": 5e-06, "loss": 0.6392, "num_input_tokens_seen": 115783808, "step": 1847 }, { "epoch": 6.146422628951747, "loss": 0.526348352432251, "loss_ce": 0.0012934907572343946, "loss_iou": 0.2021484375, "loss_num": 0.02392578125, "loss_xval": 0.5234375, "num_input_tokens_seen": 115783808, "step": 1847 }, { "epoch": 6.149750415973378, "grad_norm": 10.293550491333008, "learning_rate": 5e-06, "loss": 0.5234, "num_input_tokens_seen": 115846332, "step": 1848 }, { "epoch": 6.149750415973378, "loss": 0.5129554271697998, "loss_ce": 7.699175330344588e-05, "loss_iou": 0.177734375, "loss_num": 0.03173828125, "loss_xval": 0.51171875, "num_input_tokens_seen": 115846332, "step": 1848 }, { "epoch": 6.153078202995008, "grad_norm": 29.244150161743164, "learning_rate": 5e-06, "loss": 0.4974, "num_input_tokens_seen": 115909080, "step": 1849 }, { "epoch": 6.153078202995008, "loss": 0.4363846480846405, "loss_ce": 0.00016639340901747346, "loss_iou": 0.185546875, "loss_num": 0.012939453125, "loss_xval": 0.435546875, "num_input_tokens_seen": 115909080, "step": 1849 }, { "epoch": 6.156405990016639, "grad_norm": 24.0628719329834, "learning_rate": 5e-06, "loss": 0.6768, "num_input_tokens_seen": 115972612, "step": 1850 }, { "epoch": 6.156405990016639, "loss": 0.6228271126747131, "loss_ce": 2.4393082640017383e-05, "loss_iou": 0.2294921875, "loss_num": 0.032958984375, "loss_xval": 0.62109375, "num_input_tokens_seen": 115972612, "step": 1850 }, { "epoch": 6.159733777038269, "grad_norm": 11.995357513427734, "learning_rate": 5e-06, "loss": 0.6627, "num_input_tokens_seen": 116034864, "step": 1851 }, { "epoch": 6.159733777038269, "loss": 0.6822923421859741, "loss_ce": 0.0002855151833500713, "loss_iou": 0.2412109375, "loss_num": 0.0400390625, "loss_xval": 0.68359375, "num_input_tokens_seen": 116034864, "step": 1851 }, { "epoch": 6.1630615640599, "grad_norm": 16.793289184570312, "learning_rate": 5e-06, "loss": 0.4512, "num_input_tokens_seen": 116096048, "step": 1852 }, { "epoch": 6.1630615640599, "loss": 0.4262848496437073, "loss_ce": 1.5310766684706323e-05, "loss_iou": 0.1328125, "loss_num": 0.031982421875, "loss_xval": 0.42578125, "num_input_tokens_seen": 116096048, "step": 1852 }, { "epoch": 6.166389351081531, "grad_norm": 29.745019912719727, "learning_rate": 5e-06, "loss": 0.5301, "num_input_tokens_seen": 116160116, "step": 1853 }, { "epoch": 6.166389351081531, "loss": 0.5290565490722656, "loss_ce": 3.857565388898365e-06, "loss_iou": 0.224609375, "loss_num": 0.0162353515625, "loss_xval": 0.52734375, "num_input_tokens_seen": 116160116, "step": 1853 }, { "epoch": 6.169717138103161, "grad_norm": 28.486772537231445, "learning_rate": 5e-06, "loss": 0.9159, "num_input_tokens_seen": 116224180, "step": 1854 }, { "epoch": 6.169717138103161, "loss": 0.8901699781417847, "loss_ce": 3.323800046928227e-05, "loss_iou": 0.294921875, "loss_num": 0.059814453125, "loss_xval": 0.890625, "num_input_tokens_seen": 116224180, "step": 1854 }, { "epoch": 6.173044925124792, "grad_norm": 21.615991592407227, "learning_rate": 5e-06, "loss": 0.7607, "num_input_tokens_seen": 116288236, "step": 1855 }, { "epoch": 6.173044925124792, "loss": 0.7306792140007019, "loss_ce": 8.840014925226569e-05, "loss_iou": 0.263671875, "loss_num": 0.041015625, "loss_xval": 0.73046875, "num_input_tokens_seen": 116288236, "step": 1855 }, { "epoch": 6.1763727121464225, "grad_norm": 13.898966789245605, "learning_rate": 5e-06, "loss": 0.887, "num_input_tokens_seen": 116351732, "step": 1856 }, { "epoch": 6.1763727121464225, "loss": 0.8519605398178101, "loss_ce": 0.0006421194411814213, "loss_iou": 0.28515625, "loss_num": 0.056396484375, "loss_xval": 0.8515625, "num_input_tokens_seen": 116351732, "step": 1856 }, { "epoch": 6.179700499168053, "grad_norm": 13.027920722961426, "learning_rate": 5e-06, "loss": 0.4988, "num_input_tokens_seen": 116413132, "step": 1857 }, { "epoch": 6.179700499168053, "loss": 0.28084152936935425, "loss_ce": 0.00021712988382205367, "loss_iou": 0.09228515625, "loss_num": 0.019287109375, "loss_xval": 0.28125, "num_input_tokens_seen": 116413132, "step": 1857 }, { "epoch": 6.183028286189684, "grad_norm": 10.372034072875977, "learning_rate": 5e-06, "loss": 0.7474, "num_input_tokens_seen": 116475332, "step": 1858 }, { "epoch": 6.183028286189684, "loss": 0.6742005348205566, "loss_ce": 6.238299192773411e-06, "loss_iou": 0.21484375, "loss_num": 0.048828125, "loss_xval": 0.67578125, "num_input_tokens_seen": 116475332, "step": 1858 }, { "epoch": 6.186356073211314, "grad_norm": 9.677175521850586, "learning_rate": 5e-06, "loss": 0.5668, "num_input_tokens_seen": 116538180, "step": 1859 }, { "epoch": 6.186356073211314, "loss": 0.3914852440357208, "loss_ce": 5.770006282546092e-06, "loss_iou": 0.1455078125, "loss_num": 0.0198974609375, "loss_xval": 0.390625, "num_input_tokens_seen": 116538180, "step": 1859 }, { "epoch": 6.189683860232945, "grad_norm": 21.819242477416992, "learning_rate": 5e-06, "loss": 0.7448, "num_input_tokens_seen": 116601436, "step": 1860 }, { "epoch": 6.189683860232945, "loss": 0.8663401007652283, "loss_ce": 0.0006174079608172178, "loss_iou": 0.2890625, "loss_num": 0.057373046875, "loss_xval": 0.8671875, "num_input_tokens_seen": 116601436, "step": 1860 }, { "epoch": 6.1930116472545755, "grad_norm": 13.650229454040527, "learning_rate": 5e-06, "loss": 0.4914, "num_input_tokens_seen": 116664152, "step": 1861 }, { "epoch": 6.1930116472545755, "loss": 0.5534778833389282, "loss_ce": 1.1048641681554727e-05, "loss_iou": 0.189453125, "loss_num": 0.034912109375, "loss_xval": 0.5546875, "num_input_tokens_seen": 116664152, "step": 1861 }, { "epoch": 6.196339434276206, "grad_norm": 10.993196487426758, "learning_rate": 5e-06, "loss": 0.6325, "num_input_tokens_seen": 116727620, "step": 1862 }, { "epoch": 6.196339434276206, "loss": 0.564603328704834, "loss_ce": 0.0001502439408795908, "loss_iou": 0.203125, "loss_num": 0.03173828125, "loss_xval": 0.5625, "num_input_tokens_seen": 116727620, "step": 1862 }, { "epoch": 6.199667221297837, "grad_norm": 10.632341384887695, "learning_rate": 5e-06, "loss": 0.8296, "num_input_tokens_seen": 116790384, "step": 1863 }, { "epoch": 6.199667221297837, "loss": 0.9421427845954895, "loss_ce": 0.00024822127306833863, "loss_iou": 0.359375, "loss_num": 0.044677734375, "loss_xval": 0.94140625, "num_input_tokens_seen": 116790384, "step": 1863 }, { "epoch": 6.202995008319467, "grad_norm": 12.55924129486084, "learning_rate": 5e-06, "loss": 0.6508, "num_input_tokens_seen": 116853404, "step": 1864 }, { "epoch": 6.202995008319467, "loss": 0.7379372715950012, "loss_ce": 2.2252330381888896e-05, "loss_iou": 0.251953125, "loss_num": 0.04638671875, "loss_xval": 0.73828125, "num_input_tokens_seen": 116853404, "step": 1864 }, { "epoch": 6.206322795341098, "grad_norm": 9.573348999023438, "learning_rate": 5e-06, "loss": 0.4899, "num_input_tokens_seen": 116915180, "step": 1865 }, { "epoch": 6.206322795341098, "loss": 0.5604289770126343, "loss_ce": 4.176784386800136e-06, "loss_iou": 0.212890625, "loss_num": 0.027099609375, "loss_xval": 0.55859375, "num_input_tokens_seen": 116915180, "step": 1865 }, { "epoch": 6.209650582362729, "grad_norm": 13.672651290893555, "learning_rate": 5e-06, "loss": 0.8266, "num_input_tokens_seen": 116977432, "step": 1866 }, { "epoch": 6.209650582362729, "loss": 0.8442440032958984, "loss_ce": 5.746081569668604e-06, "loss_iou": 0.34765625, "loss_num": 0.0296630859375, "loss_xval": 0.84375, "num_input_tokens_seen": 116977432, "step": 1866 }, { "epoch": 6.212978369384359, "grad_norm": 21.70526695251465, "learning_rate": 5e-06, "loss": 0.6817, "num_input_tokens_seen": 117040464, "step": 1867 }, { "epoch": 6.212978369384359, "loss": 0.44515183568000793, "loss_ce": 8.34673919598572e-05, "loss_iou": 0.173828125, "loss_num": 0.0194091796875, "loss_xval": 0.4453125, "num_input_tokens_seen": 117040464, "step": 1867 }, { "epoch": 6.21630615640599, "grad_norm": 30.76572608947754, "learning_rate": 5e-06, "loss": 0.6901, "num_input_tokens_seen": 117102624, "step": 1868 }, { "epoch": 6.21630615640599, "loss": 0.6595780849456787, "loss_ce": 0.0002763564989436418, "loss_iou": 0.2412109375, "loss_num": 0.03515625, "loss_xval": 0.66015625, "num_input_tokens_seen": 117102624, "step": 1868 }, { "epoch": 6.21963394342762, "grad_norm": 34.83747863769531, "learning_rate": 5e-06, "loss": 0.635, "num_input_tokens_seen": 117164784, "step": 1869 }, { "epoch": 6.21963394342762, "loss": 0.5983932018280029, "loss_ce": 4.4655953388428316e-06, "loss_iou": 0.2158203125, "loss_num": 0.033447265625, "loss_xval": 0.59765625, "num_input_tokens_seen": 117164784, "step": 1869 }, { "epoch": 6.222961730449251, "grad_norm": 17.383846282958984, "learning_rate": 5e-06, "loss": 0.7551, "num_input_tokens_seen": 117228512, "step": 1870 }, { "epoch": 6.222961730449251, "loss": 0.789067268371582, "loss_ce": 4.7702878873678856e-06, "loss_iou": 0.29296875, "loss_num": 0.040771484375, "loss_xval": 0.7890625, "num_input_tokens_seen": 117228512, "step": 1870 }, { "epoch": 6.226289517470882, "grad_norm": 7.634170055389404, "learning_rate": 5e-06, "loss": 0.5058, "num_input_tokens_seen": 117290664, "step": 1871 }, { "epoch": 6.226289517470882, "loss": 0.4545946717262268, "loss_ce": 0.0002489521575625986, "loss_iou": 0.1806640625, "loss_num": 0.0184326171875, "loss_xval": 0.455078125, "num_input_tokens_seen": 117290664, "step": 1871 }, { "epoch": 6.229617304492512, "grad_norm": 10.459074974060059, "learning_rate": 5e-06, "loss": 0.6708, "num_input_tokens_seen": 117352208, "step": 1872 }, { "epoch": 6.229617304492512, "loss": 0.6733894348144531, "loss_ce": 0.0022468536626547575, "loss_iou": 0.220703125, "loss_num": 0.046142578125, "loss_xval": 0.671875, "num_input_tokens_seen": 117352208, "step": 1872 }, { "epoch": 6.232945091514143, "grad_norm": 10.635107040405273, "learning_rate": 5e-06, "loss": 0.6496, "num_input_tokens_seen": 117415324, "step": 1873 }, { "epoch": 6.232945091514143, "loss": 0.5553305149078369, "loss_ce": 3.267010106355883e-05, "loss_iou": 0.2060546875, "loss_num": 0.0286865234375, "loss_xval": 0.5546875, "num_input_tokens_seen": 117415324, "step": 1873 }, { "epoch": 6.2362728785357735, "grad_norm": 11.903627395629883, "learning_rate": 5e-06, "loss": 0.4879, "num_input_tokens_seen": 117476524, "step": 1874 }, { "epoch": 6.2362728785357735, "loss": 0.5665339827537537, "loss_ce": 5.682074515789282e-06, "loss_iou": 0.1552734375, "loss_num": 0.05126953125, "loss_xval": 0.56640625, "num_input_tokens_seen": 117476524, "step": 1874 }, { "epoch": 6.239600665557404, "grad_norm": 26.258224487304688, "learning_rate": 5e-06, "loss": 0.4816, "num_input_tokens_seen": 117538104, "step": 1875 }, { "epoch": 6.239600665557404, "loss": 0.45252174139022827, "loss_ce": 0.0006174290319904685, "loss_iou": 0.080078125, "loss_num": 0.058349609375, "loss_xval": 0.451171875, "num_input_tokens_seen": 117538104, "step": 1875 }, { "epoch": 6.242928452579035, "grad_norm": 19.102313995361328, "learning_rate": 5e-06, "loss": 0.5417, "num_input_tokens_seen": 117601844, "step": 1876 }, { "epoch": 6.242928452579035, "loss": 0.7448785305023193, "loss_ce": 5.478878847497981e-06, "loss_iou": 0.267578125, "loss_num": 0.0419921875, "loss_xval": 0.74609375, "num_input_tokens_seen": 117601844, "step": 1876 }, { "epoch": 6.246256239600665, "grad_norm": 28.73021697998047, "learning_rate": 5e-06, "loss": 0.776, "num_input_tokens_seen": 117665936, "step": 1877 }, { "epoch": 6.246256239600665, "loss": 1.0089422464370728, "loss_ce": 0.0008856566273607314, "loss_iou": 0.380859375, "loss_num": 0.048828125, "loss_xval": 1.0078125, "num_input_tokens_seen": 117665936, "step": 1877 }, { "epoch": 6.249584026622296, "grad_norm": 18.161848068237305, "learning_rate": 5e-06, "loss": 0.5941, "num_input_tokens_seen": 117728540, "step": 1878 }, { "epoch": 6.249584026622296, "loss": 0.5405303835868835, "loss_ce": 3.0245664675021544e-06, "loss_iou": 0.181640625, "loss_num": 0.035400390625, "loss_xval": 0.5390625, "num_input_tokens_seen": 117728540, "step": 1878 }, { "epoch": 6.252911813643927, "grad_norm": 26.851957321166992, "learning_rate": 5e-06, "loss": 0.5273, "num_input_tokens_seen": 117790244, "step": 1879 }, { "epoch": 6.252911813643927, "loss": 0.6641072034835815, "loss_ce": 0.0013875153381377459, "loss_iou": 0.25390625, "loss_num": 0.031005859375, "loss_xval": 0.6640625, "num_input_tokens_seen": 117790244, "step": 1879 }, { "epoch": 6.256239600665557, "grad_norm": 12.159568786621094, "learning_rate": 5e-06, "loss": 0.3864, "num_input_tokens_seen": 117851164, "step": 1880 }, { "epoch": 6.256239600665557, "loss": 0.43371862173080444, "loss_ce": 2.7776063689088915e-06, "loss_iou": 0.1376953125, "loss_num": 0.03173828125, "loss_xval": 0.43359375, "num_input_tokens_seen": 117851164, "step": 1880 }, { "epoch": 6.259567387687188, "grad_norm": 9.506256103515625, "learning_rate": 5e-06, "loss": 0.8167, "num_input_tokens_seen": 117914440, "step": 1881 }, { "epoch": 6.259567387687188, "loss": 0.6575681567192078, "loss_ce": 9.747529838932678e-05, "loss_iou": 0.23828125, "loss_num": 0.0361328125, "loss_xval": 0.65625, "num_input_tokens_seen": 117914440, "step": 1881 }, { "epoch": 6.262895174708818, "grad_norm": 20.55832862854004, "learning_rate": 5e-06, "loss": 0.6532, "num_input_tokens_seen": 117976484, "step": 1882 }, { "epoch": 6.262895174708818, "loss": 0.817456841468811, "loss_ce": 1.30117896333104e-05, "loss_iou": 0.294921875, "loss_num": 0.045166015625, "loss_xval": 0.81640625, "num_input_tokens_seen": 117976484, "step": 1882 }, { "epoch": 6.266222961730449, "grad_norm": 18.518428802490234, "learning_rate": 5e-06, "loss": 0.5995, "num_input_tokens_seen": 118039700, "step": 1883 }, { "epoch": 6.266222961730449, "loss": 0.7166527509689331, "loss_ce": 8.421022357651964e-06, "loss_iou": 0.306640625, "loss_num": 0.0206298828125, "loss_xval": 0.71484375, "num_input_tokens_seen": 118039700, "step": 1883 }, { "epoch": 6.26955074875208, "grad_norm": 11.69603157043457, "learning_rate": 5e-06, "loss": 0.6937, "num_input_tokens_seen": 118102780, "step": 1884 }, { "epoch": 6.26955074875208, "loss": 0.7009948492050171, "loss_ce": 0.000738509523216635, "loss_iou": 0.2138671875, "loss_num": 0.054443359375, "loss_xval": 0.69921875, "num_input_tokens_seen": 118102780, "step": 1884 }, { "epoch": 6.27287853577371, "grad_norm": 12.617609977722168, "learning_rate": 5e-06, "loss": 0.5843, "num_input_tokens_seen": 118165944, "step": 1885 }, { "epoch": 6.27287853577371, "loss": 0.6710278987884521, "loss_ce": 7.402048595395172e-06, "loss_iou": 0.2451171875, "loss_num": 0.036376953125, "loss_xval": 0.671875, "num_input_tokens_seen": 118165944, "step": 1885 }, { "epoch": 6.276206322795341, "grad_norm": 23.12303924560547, "learning_rate": 5e-06, "loss": 0.5396, "num_input_tokens_seen": 118228200, "step": 1886 }, { "epoch": 6.276206322795341, "loss": 0.4650565981864929, "loss_ce": 9.078408766072243e-05, "loss_iou": 0.14453125, "loss_num": 0.03515625, "loss_xval": 0.46484375, "num_input_tokens_seen": 118228200, "step": 1886 }, { "epoch": 6.2795341098169715, "grad_norm": 23.25615882873535, "learning_rate": 5e-06, "loss": 0.7863, "num_input_tokens_seen": 118290532, "step": 1887 }, { "epoch": 6.2795341098169715, "loss": 0.5749128460884094, "loss_ce": 0.0005110011552460492, "loss_iou": 0.21875, "loss_num": 0.0274658203125, "loss_xval": 0.57421875, "num_input_tokens_seen": 118290532, "step": 1887 }, { "epoch": 6.282861896838602, "grad_norm": 11.193317413330078, "learning_rate": 5e-06, "loss": 0.4665, "num_input_tokens_seen": 118353648, "step": 1888 }, { "epoch": 6.282861896838602, "loss": 0.54447340965271, "loss_ce": 0.00040606883703731, "loss_iou": 0.197265625, "loss_num": 0.0299072265625, "loss_xval": 0.54296875, "num_input_tokens_seen": 118353648, "step": 1888 }, { "epoch": 6.286189683860233, "grad_norm": 7.910712718963623, "learning_rate": 5e-06, "loss": 0.6236, "num_input_tokens_seen": 118415828, "step": 1889 }, { "epoch": 6.286189683860233, "loss": 0.6780449151992798, "loss_ce": 5.3703006415162235e-06, "loss_iou": 0.232421875, "loss_num": 0.04248046875, "loss_xval": 0.6796875, "num_input_tokens_seen": 118415828, "step": 1889 }, { "epoch": 6.289517470881863, "grad_norm": 11.725362777709961, "learning_rate": 5e-06, "loss": 0.2913, "num_input_tokens_seen": 118476732, "step": 1890 }, { "epoch": 6.289517470881863, "loss": 0.28724896907806396, "loss_ce": 1.75142522493843e-05, "loss_iou": 0.0732421875, "loss_num": 0.0281982421875, "loss_xval": 0.287109375, "num_input_tokens_seen": 118476732, "step": 1890 }, { "epoch": 6.292845257903494, "grad_norm": 8.73429012298584, "learning_rate": 5e-06, "loss": 0.6006, "num_input_tokens_seen": 118539264, "step": 1891 }, { "epoch": 6.292845257903494, "loss": 0.6598675847053528, "loss_ce": 0.00013859081082046032, "loss_iou": 0.1767578125, "loss_num": 0.0615234375, "loss_xval": 0.66015625, "num_input_tokens_seen": 118539264, "step": 1891 }, { "epoch": 6.2961730449251245, "grad_norm": 18.457820892333984, "learning_rate": 5e-06, "loss": 0.7737, "num_input_tokens_seen": 118600992, "step": 1892 }, { "epoch": 6.2961730449251245, "loss": 0.8073782324790955, "loss_ce": 5.173724730411777e-06, "loss_iou": 0.26953125, "loss_num": 0.0537109375, "loss_xval": 0.80859375, "num_input_tokens_seen": 118600992, "step": 1892 }, { "epoch": 6.299500831946755, "grad_norm": 17.106287002563477, "learning_rate": 5e-06, "loss": 0.543, "num_input_tokens_seen": 118662868, "step": 1893 }, { "epoch": 6.299500831946755, "loss": 0.7287246584892273, "loss_ce": 0.0006973082781769335, "loss_iou": 0.212890625, "loss_num": 0.060546875, "loss_xval": 0.7265625, "num_input_tokens_seen": 118662868, "step": 1893 }, { "epoch": 6.302828618968386, "grad_norm": 8.667593002319336, "learning_rate": 5e-06, "loss": 0.475, "num_input_tokens_seen": 118724928, "step": 1894 }, { "epoch": 6.302828618968386, "loss": 0.4340454041957855, "loss_ce": 2.4409535399172455e-05, "loss_iou": 0.1572265625, "loss_num": 0.024169921875, "loss_xval": 0.43359375, "num_input_tokens_seen": 118724928, "step": 1894 }, { "epoch": 6.306156405990016, "grad_norm": 13.89818286895752, "learning_rate": 5e-06, "loss": 0.6325, "num_input_tokens_seen": 118787460, "step": 1895 }, { "epoch": 6.306156405990016, "loss": 0.7812597751617432, "loss_ce": 9.770903488970362e-06, "loss_iou": 0.287109375, "loss_num": 0.04150390625, "loss_xval": 0.78125, "num_input_tokens_seen": 118787460, "step": 1895 }, { "epoch": 6.309484193011647, "grad_norm": 18.24062728881836, "learning_rate": 5e-06, "loss": 0.601, "num_input_tokens_seen": 118850520, "step": 1896 }, { "epoch": 6.309484193011647, "loss": 0.5813175439834595, "loss_ce": 0.00026285299099981785, "loss_iou": 0.228515625, "loss_num": 0.02490234375, "loss_xval": 0.58203125, "num_input_tokens_seen": 118850520, "step": 1896 }, { "epoch": 6.312811980033278, "grad_norm": 29.606103897094727, "learning_rate": 5e-06, "loss": 0.8921, "num_input_tokens_seen": 118913128, "step": 1897 }, { "epoch": 6.312811980033278, "loss": 0.9781937599182129, "loss_ce": 0.00016637261433061212, "loss_iou": 0.38671875, "loss_num": 0.040771484375, "loss_xval": 0.9765625, "num_input_tokens_seen": 118913128, "step": 1897 }, { "epoch": 6.316139767054908, "grad_norm": 18.117826461791992, "learning_rate": 5e-06, "loss": 0.4624, "num_input_tokens_seen": 118976000, "step": 1898 }, { "epoch": 6.316139767054908, "loss": 0.5369445085525513, "loss_ce": 7.92514329077676e-05, "loss_iou": 0.203125, "loss_num": 0.0262451171875, "loss_xval": 0.53515625, "num_input_tokens_seen": 118976000, "step": 1898 }, { "epoch": 6.319467554076539, "grad_norm": 7.8375563621521, "learning_rate": 5e-06, "loss": 0.4849, "num_input_tokens_seen": 119038484, "step": 1899 }, { "epoch": 6.319467554076539, "loss": 0.45295053720474243, "loss_ce": 0.00019175911438651383, "loss_iou": 0.16796875, "loss_num": 0.0234375, "loss_xval": 0.453125, "num_input_tokens_seen": 119038484, "step": 1899 }, { "epoch": 6.322795341098169, "grad_norm": 13.073270797729492, "learning_rate": 5e-06, "loss": 0.7471, "num_input_tokens_seen": 119101732, "step": 1900 }, { "epoch": 6.322795341098169, "loss": 0.6276774406433105, "loss_ce": 5.2915853302692994e-05, "loss_iou": 0.21875, "loss_num": 0.037841796875, "loss_xval": 0.62890625, "num_input_tokens_seen": 119101732, "step": 1900 }, { "epoch": 6.3261231281198, "grad_norm": 18.98079490661621, "learning_rate": 5e-06, "loss": 0.7389, "num_input_tokens_seen": 119166388, "step": 1901 }, { "epoch": 6.3261231281198, "loss": 0.732851505279541, "loss_ce": 0.0004296669503673911, "loss_iou": 0.267578125, "loss_num": 0.0390625, "loss_xval": 0.734375, "num_input_tokens_seen": 119166388, "step": 1901 }, { "epoch": 6.329450915141431, "grad_norm": 15.860111236572266, "learning_rate": 5e-06, "loss": 0.6563, "num_input_tokens_seen": 119227856, "step": 1902 }, { "epoch": 6.329450915141431, "loss": 0.3609044551849365, "loss_ce": 3.5475788990879664e-06, "loss_iou": 0.0888671875, "loss_num": 0.03662109375, "loss_xval": 0.361328125, "num_input_tokens_seen": 119227856, "step": 1902 }, { "epoch": 6.332778702163061, "grad_norm": 10.316705703735352, "learning_rate": 5e-06, "loss": 0.6524, "num_input_tokens_seen": 119289240, "step": 1903 }, { "epoch": 6.332778702163061, "loss": 0.5905210971832275, "loss_ce": 0.00031114081502892077, "loss_iou": 0.1953125, "loss_num": 0.039794921875, "loss_xval": 0.58984375, "num_input_tokens_seen": 119289240, "step": 1903 }, { "epoch": 6.336106489184692, "grad_norm": 9.102293014526367, "learning_rate": 5e-06, "loss": 0.6413, "num_input_tokens_seen": 119351260, "step": 1904 }, { "epoch": 6.336106489184692, "loss": 0.6257441639900208, "loss_ce": 1.1760233974200673e-05, "loss_iou": 0.2080078125, "loss_num": 0.042236328125, "loss_xval": 0.625, "num_input_tokens_seen": 119351260, "step": 1904 }, { "epoch": 6.3394342762063225, "grad_norm": 11.729586601257324, "learning_rate": 5e-06, "loss": 0.4582, "num_input_tokens_seen": 119412504, "step": 1905 }, { "epoch": 6.3394342762063225, "loss": 0.3936731815338135, "loss_ce": 2.6947098376695067e-05, "loss_iou": 0.1376953125, "loss_num": 0.023681640625, "loss_xval": 0.39453125, "num_input_tokens_seen": 119412504, "step": 1905 }, { "epoch": 6.342762063227953, "grad_norm": 7.044126033782959, "learning_rate": 5e-06, "loss": 0.3498, "num_input_tokens_seen": 119473268, "step": 1906 }, { "epoch": 6.342762063227953, "loss": 0.3633645176887512, "loss_ce": 8.325102680828422e-05, "loss_iou": 0.09521484375, "loss_num": 0.03466796875, "loss_xval": 0.36328125, "num_input_tokens_seen": 119473268, "step": 1906 }, { "epoch": 6.346089850249584, "grad_norm": 15.250025749206543, "learning_rate": 5e-06, "loss": 0.5319, "num_input_tokens_seen": 119536564, "step": 1907 }, { "epoch": 6.346089850249584, "loss": 0.6647416949272156, "loss_ce": 0.0006791893974877894, "loss_iou": 0.255859375, "loss_num": 0.0302734375, "loss_xval": 0.6640625, "num_input_tokens_seen": 119536564, "step": 1907 }, { "epoch": 6.349417637271214, "grad_norm": 8.832159996032715, "learning_rate": 5e-06, "loss": 0.4767, "num_input_tokens_seen": 119599260, "step": 1908 }, { "epoch": 6.349417637271214, "loss": 0.4554310441017151, "loss_ce": 1.9810768208117224e-06, "loss_iou": 0.1484375, "loss_num": 0.03173828125, "loss_xval": 0.455078125, "num_input_tokens_seen": 119599260, "step": 1908 }, { "epoch": 6.352745424292845, "grad_norm": 20.940961837768555, "learning_rate": 5e-06, "loss": 0.5773, "num_input_tokens_seen": 119659672, "step": 1909 }, { "epoch": 6.352745424292845, "loss": 0.7618123292922974, "loss_ce": 0.00033771333983168006, "loss_iou": 0.267578125, "loss_num": 0.045166015625, "loss_xval": 0.76171875, "num_input_tokens_seen": 119659672, "step": 1909 }, { "epoch": 6.356073211314476, "grad_norm": 24.2661075592041, "learning_rate": 5e-06, "loss": 0.696, "num_input_tokens_seen": 119722528, "step": 1910 }, { "epoch": 6.356073211314476, "loss": 0.7429176568984985, "loss_ce": 0.0019508958794176579, "loss_iou": 0.2412109375, "loss_num": 0.0517578125, "loss_xval": 0.7421875, "num_input_tokens_seen": 119722528, "step": 1910 }, { "epoch": 6.359400998336106, "grad_norm": 37.242706298828125, "learning_rate": 5e-06, "loss": 0.634, "num_input_tokens_seen": 119785720, "step": 1911 }, { "epoch": 6.359400998336106, "loss": 0.5460271835327148, "loss_ce": 0.0003728592419065535, "loss_iou": 0.1572265625, "loss_num": 0.046142578125, "loss_xval": 0.546875, "num_input_tokens_seen": 119785720, "step": 1911 }, { "epoch": 6.362728785357737, "grad_norm": 31.73426628112793, "learning_rate": 5e-06, "loss": 0.3969, "num_input_tokens_seen": 119845744, "step": 1912 }, { "epoch": 6.362728785357737, "loss": 0.410375714302063, "loss_ce": 9.737786604091525e-05, "loss_iou": 0.1201171875, "loss_num": 0.033935546875, "loss_xval": 0.41015625, "num_input_tokens_seen": 119845744, "step": 1912 }, { "epoch": 6.366056572379367, "grad_norm": 14.353583335876465, "learning_rate": 5e-06, "loss": 0.744, "num_input_tokens_seen": 119908044, "step": 1913 }, { "epoch": 6.366056572379367, "loss": 0.6974769234657288, "loss_ce": 0.0003333640634082258, "loss_iou": 0.255859375, "loss_num": 0.037353515625, "loss_xval": 0.6953125, "num_input_tokens_seen": 119908044, "step": 1913 }, { "epoch": 6.369384359400998, "grad_norm": 20.74404525756836, "learning_rate": 5e-06, "loss": 0.7444, "num_input_tokens_seen": 119971476, "step": 1914 }, { "epoch": 6.369384359400998, "loss": 0.809575080871582, "loss_ce": 0.0004930697614327073, "loss_iou": 0.33203125, "loss_num": 0.0286865234375, "loss_xval": 0.80859375, "num_input_tokens_seen": 119971476, "step": 1914 }, { "epoch": 6.372712146422629, "grad_norm": 30.654464721679688, "learning_rate": 5e-06, "loss": 0.8526, "num_input_tokens_seen": 120035104, "step": 1915 }, { "epoch": 6.372712146422629, "loss": 0.6872565746307373, "loss_ce": 0.0003058834408875555, "loss_iou": 0.244140625, "loss_num": 0.03955078125, "loss_xval": 0.6875, "num_input_tokens_seen": 120035104, "step": 1915 }, { "epoch": 6.376039933444259, "grad_norm": 30.053728103637695, "learning_rate": 5e-06, "loss": 0.5157, "num_input_tokens_seen": 120095480, "step": 1916 }, { "epoch": 6.376039933444259, "loss": 0.44497495889663696, "loss_ce": 0.0003948814992327243, "loss_iou": 0.1376953125, "loss_num": 0.03369140625, "loss_xval": 0.4453125, "num_input_tokens_seen": 120095480, "step": 1916 }, { "epoch": 6.37936772046589, "grad_norm": 21.59766960144043, "learning_rate": 5e-06, "loss": 0.6, "num_input_tokens_seen": 120159084, "step": 1917 }, { "epoch": 6.37936772046589, "loss": 0.4307330250740051, "loss_ce": 0.00031313335057348013, "loss_iou": 0.1689453125, "loss_num": 0.018310546875, "loss_xval": 0.4296875, "num_input_tokens_seen": 120159084, "step": 1917 }, { "epoch": 6.3826955074875205, "grad_norm": 22.481719970703125, "learning_rate": 5e-06, "loss": 0.4955, "num_input_tokens_seen": 120221212, "step": 1918 }, { "epoch": 6.3826955074875205, "loss": 0.38188186287879944, "loss_ce": 0.00010695134551497176, "loss_iou": 0.10693359375, "loss_num": 0.03369140625, "loss_xval": 0.380859375, "num_input_tokens_seen": 120221212, "step": 1918 }, { "epoch": 6.386023294509151, "grad_norm": 14.502326011657715, "learning_rate": 5e-06, "loss": 0.591, "num_input_tokens_seen": 120282008, "step": 1919 }, { "epoch": 6.386023294509151, "loss": 0.5068145990371704, "loss_ce": 3.9744641981087625e-05, "loss_iou": 0.134765625, "loss_num": 0.04736328125, "loss_xval": 0.5078125, "num_input_tokens_seen": 120282008, "step": 1919 }, { "epoch": 6.389351081530782, "grad_norm": 18.54855728149414, "learning_rate": 5e-06, "loss": 0.5545, "num_input_tokens_seen": 120343332, "step": 1920 }, { "epoch": 6.389351081530782, "loss": 0.5725500583648682, "loss_ce": 0.0002844818227458745, "loss_iou": 0.193359375, "loss_num": 0.036865234375, "loss_xval": 0.5703125, "num_input_tokens_seen": 120343332, "step": 1920 }, { "epoch": 6.392678868552412, "grad_norm": 14.879735946655273, "learning_rate": 5e-06, "loss": 0.5655, "num_input_tokens_seen": 120405376, "step": 1921 }, { "epoch": 6.392678868552412, "loss": 0.3202165961265564, "loss_ce": 2.6154777515330352e-05, "loss_iou": 0.0947265625, "loss_num": 0.026123046875, "loss_xval": 0.3203125, "num_input_tokens_seen": 120405376, "step": 1921 }, { "epoch": 6.396006655574043, "grad_norm": 8.68228816986084, "learning_rate": 5e-06, "loss": 0.6403, "num_input_tokens_seen": 120468084, "step": 1922 }, { "epoch": 6.396006655574043, "loss": 0.7539181709289551, "loss_ce": 1.1935225302295294e-05, "loss_iou": 0.296875, "loss_num": 0.031494140625, "loss_xval": 0.75390625, "num_input_tokens_seen": 120468084, "step": 1922 }, { "epoch": 6.3993344425956735, "grad_norm": 13.649946212768555, "learning_rate": 5e-06, "loss": 0.4563, "num_input_tokens_seen": 120527552, "step": 1923 }, { "epoch": 6.3993344425956735, "loss": 0.5272024869918823, "loss_ce": 0.0005301763885654509, "loss_iou": 0.1875, "loss_num": 0.030517578125, "loss_xval": 0.52734375, "num_input_tokens_seen": 120527552, "step": 1923 }, { "epoch": 6.402662229617304, "grad_norm": 16.22941780090332, "learning_rate": 5e-06, "loss": 0.5914, "num_input_tokens_seen": 120591932, "step": 1924 }, { "epoch": 6.402662229617304, "loss": 0.6250810623168945, "loss_ce": 8.112353680189699e-05, "loss_iou": 0.248046875, "loss_num": 0.02587890625, "loss_xval": 0.625, "num_input_tokens_seen": 120591932, "step": 1924 }, { "epoch": 6.405990016638935, "grad_norm": 10.798599243164062, "learning_rate": 5e-06, "loss": 0.7191, "num_input_tokens_seen": 120654760, "step": 1925 }, { "epoch": 6.405990016638935, "loss": 0.8389195203781128, "loss_ce": 5.2269249863456935e-05, "loss_iou": 0.2470703125, "loss_num": 0.06884765625, "loss_xval": 0.83984375, "num_input_tokens_seen": 120654760, "step": 1925 }, { "epoch": 6.409317803660565, "grad_norm": 19.232425689697266, "learning_rate": 5e-06, "loss": 0.6786, "num_input_tokens_seen": 120719032, "step": 1926 }, { "epoch": 6.409317803660565, "loss": 0.6705414652824402, "loss_ce": 0.00013132776075508446, "loss_iou": 0.224609375, "loss_num": 0.044189453125, "loss_xval": 0.671875, "num_input_tokens_seen": 120719032, "step": 1926 }, { "epoch": 6.412645590682196, "grad_norm": 16.992197036743164, "learning_rate": 5e-06, "loss": 0.6058, "num_input_tokens_seen": 120782040, "step": 1927 }, { "epoch": 6.412645590682196, "loss": 0.49470213055610657, "loss_ce": 0.0007751373923383653, "loss_iou": 0.1669921875, "loss_num": 0.0322265625, "loss_xval": 0.494140625, "num_input_tokens_seen": 120782040, "step": 1927 }, { "epoch": 6.415973377703827, "grad_norm": 24.596677780151367, "learning_rate": 5e-06, "loss": 0.5817, "num_input_tokens_seen": 120844944, "step": 1928 }, { "epoch": 6.415973377703827, "loss": 0.8382806777954102, "loss_ce": 0.0006341689731925726, "loss_iou": 0.337890625, "loss_num": 0.0322265625, "loss_xval": 0.8359375, "num_input_tokens_seen": 120844944, "step": 1928 }, { "epoch": 6.419301164725457, "grad_norm": 57.29833221435547, "learning_rate": 5e-06, "loss": 0.6712, "num_input_tokens_seen": 120908508, "step": 1929 }, { "epoch": 6.419301164725457, "loss": 0.9020666480064392, "loss_ce": 0.0009436344844289124, "loss_iou": 0.357421875, "loss_num": 0.03759765625, "loss_xval": 0.90234375, "num_input_tokens_seen": 120908508, "step": 1929 }, { "epoch": 6.422628951747088, "grad_norm": 34.022457122802734, "learning_rate": 5e-06, "loss": 0.7903, "num_input_tokens_seen": 120971296, "step": 1930 }, { "epoch": 6.422628951747088, "loss": 0.9732491970062256, "loss_ce": 0.0003488144720904529, "loss_iou": 0.318359375, "loss_num": 0.0673828125, "loss_xval": 0.97265625, "num_input_tokens_seen": 120971296, "step": 1930 }, { "epoch": 6.425956738768718, "grad_norm": 9.980318069458008, "learning_rate": 5e-06, "loss": 0.7862, "num_input_tokens_seen": 121033336, "step": 1931 }, { "epoch": 6.425956738768718, "loss": 0.950108528137207, "loss_ce": 0.0002794343454297632, "loss_iou": 0.357421875, "loss_num": 0.046875, "loss_xval": 0.94921875, "num_input_tokens_seen": 121033336, "step": 1931 }, { "epoch": 6.429284525790349, "grad_norm": 32.449562072753906, "learning_rate": 5e-06, "loss": 0.8398, "num_input_tokens_seen": 121096428, "step": 1932 }, { "epoch": 6.429284525790349, "loss": 0.9572634696960449, "loss_ce": 0.0016970571596175432, "loss_iou": 0.337890625, "loss_num": 0.0556640625, "loss_xval": 0.95703125, "num_input_tokens_seen": 121096428, "step": 1932 }, { "epoch": 6.43261231281198, "grad_norm": 35.117149353027344, "learning_rate": 5e-06, "loss": 0.5623, "num_input_tokens_seen": 121159848, "step": 1933 }, { "epoch": 6.43261231281198, "loss": 0.5546911358833313, "loss_ce": 3.627412752393866e-06, "loss_iou": 0.2421875, "loss_num": 0.01416015625, "loss_xval": 0.5546875, "num_input_tokens_seen": 121159848, "step": 1933 }, { "epoch": 6.43594009983361, "grad_norm": 15.779326438903809, "learning_rate": 5e-06, "loss": 0.7291, "num_input_tokens_seen": 121223532, "step": 1934 }, { "epoch": 6.43594009983361, "loss": 0.5407153367996216, "loss_ce": 4.906645699520595e-06, "loss_iou": 0.173828125, "loss_num": 0.03857421875, "loss_xval": 0.5390625, "num_input_tokens_seen": 121223532, "step": 1934 }, { "epoch": 6.439267886855241, "grad_norm": 9.247297286987305, "learning_rate": 5e-06, "loss": 0.6451, "num_input_tokens_seen": 121286572, "step": 1935 }, { "epoch": 6.439267886855241, "loss": 0.7102029323577881, "loss_ce": 0.00011988512414973229, "loss_iou": 0.275390625, "loss_num": 0.03173828125, "loss_xval": 0.7109375, "num_input_tokens_seen": 121286572, "step": 1935 }, { "epoch": 6.4425956738768715, "grad_norm": 14.492554664611816, "learning_rate": 5e-06, "loss": 0.7337, "num_input_tokens_seen": 121349904, "step": 1936 }, { "epoch": 6.4425956738768715, "loss": 0.624147891998291, "loss_ce": 2.418019448668929e-06, "loss_iou": 0.234375, "loss_num": 0.031005859375, "loss_xval": 0.625, "num_input_tokens_seen": 121349904, "step": 1936 }, { "epoch": 6.445923460898502, "grad_norm": 8.146483421325684, "learning_rate": 5e-06, "loss": 0.643, "num_input_tokens_seen": 121412904, "step": 1937 }, { "epoch": 6.445923460898502, "loss": 0.7984688878059387, "loss_ce": 0.0006173126748763025, "loss_iou": 0.2578125, "loss_num": 0.056396484375, "loss_xval": 0.796875, "num_input_tokens_seen": 121412904, "step": 1937 }, { "epoch": 6.449251247920133, "grad_norm": 16.592941284179688, "learning_rate": 5e-06, "loss": 0.7215, "num_input_tokens_seen": 121476308, "step": 1938 }, { "epoch": 6.449251247920133, "loss": 0.5246685743331909, "loss_ce": 1.0389683666289784e-05, "loss_iou": 0.203125, "loss_num": 0.0238037109375, "loss_xval": 0.5234375, "num_input_tokens_seen": 121476308, "step": 1938 }, { "epoch": 6.452579034941763, "grad_norm": 31.58148765563965, "learning_rate": 5e-06, "loss": 0.5718, "num_input_tokens_seen": 121539036, "step": 1939 }, { "epoch": 6.452579034941763, "loss": 0.5909501314163208, "loss_ce": 7.782642569509335e-06, "loss_iou": 0.1796875, "loss_num": 0.04638671875, "loss_xval": 0.58984375, "num_input_tokens_seen": 121539036, "step": 1939 }, { "epoch": 6.455906821963394, "grad_norm": 18.935075759887695, "learning_rate": 5e-06, "loss": 0.5605, "num_input_tokens_seen": 121602176, "step": 1940 }, { "epoch": 6.455906821963394, "loss": 0.6536628007888794, "loss_ce": 9.836910612648353e-05, "loss_iou": 0.259765625, "loss_num": 0.02685546875, "loss_xval": 0.65234375, "num_input_tokens_seen": 121602176, "step": 1940 }, { "epoch": 6.4592346089850246, "grad_norm": 21.790918350219727, "learning_rate": 5e-06, "loss": 0.5405, "num_input_tokens_seen": 121665172, "step": 1941 }, { "epoch": 6.4592346089850246, "loss": 0.50648033618927, "loss_ce": 1.0590497367957141e-05, "loss_iou": 0.208984375, "loss_num": 0.017578125, "loss_xval": 0.5078125, "num_input_tokens_seen": 121665172, "step": 1941 }, { "epoch": 6.462562396006655, "grad_norm": 13.132232666015625, "learning_rate": 5e-06, "loss": 0.5395, "num_input_tokens_seen": 121728200, "step": 1942 }, { "epoch": 6.462562396006655, "loss": 0.5707424283027649, "loss_ce": 0.0006130391266196966, "loss_iou": 0.2060546875, "loss_num": 0.03173828125, "loss_xval": 0.5703125, "num_input_tokens_seen": 121728200, "step": 1942 }, { "epoch": 6.465890183028286, "grad_norm": 17.78367805480957, "learning_rate": 5e-06, "loss": 0.6586, "num_input_tokens_seen": 121792512, "step": 1943 }, { "epoch": 6.465890183028286, "loss": 0.5325543880462646, "loss_ce": 8.367840200662613e-05, "loss_iou": 0.2158203125, "loss_num": 0.0203857421875, "loss_xval": 0.53125, "num_input_tokens_seen": 121792512, "step": 1943 }, { "epoch": 6.469217970049916, "grad_norm": 15.686144828796387, "learning_rate": 5e-06, "loss": 0.6654, "num_input_tokens_seen": 121856412, "step": 1944 }, { "epoch": 6.469217970049916, "loss": 0.7615238428115845, "loss_ce": 0.0012699364451691508, "loss_iou": 0.314453125, "loss_num": 0.026123046875, "loss_xval": 0.76171875, "num_input_tokens_seen": 121856412, "step": 1944 }, { "epoch": 6.472545757071547, "grad_norm": 7.845279216766357, "learning_rate": 5e-06, "loss": 0.4848, "num_input_tokens_seen": 121917020, "step": 1945 }, { "epoch": 6.472545757071547, "loss": 0.688970685005188, "loss_ce": 5.831275302625727e-06, "loss_iou": 0.234375, "loss_num": 0.043701171875, "loss_xval": 0.6875, "num_input_tokens_seen": 121917020, "step": 1945 }, { "epoch": 6.475873544093178, "grad_norm": 20.568973541259766, "learning_rate": 5e-06, "loss": 0.7828, "num_input_tokens_seen": 121979608, "step": 1946 }, { "epoch": 6.475873544093178, "loss": 0.8081650733947754, "loss_ce": 5.954839434707537e-05, "loss_iou": 0.29296875, "loss_num": 0.044677734375, "loss_xval": 0.80859375, "num_input_tokens_seen": 121979608, "step": 1946 }, { "epoch": 6.479201331114808, "grad_norm": 16.98797035217285, "learning_rate": 5e-06, "loss": 0.5843, "num_input_tokens_seen": 122041476, "step": 1947 }, { "epoch": 6.479201331114808, "loss": 0.7684845328330994, "loss_ce": 0.00023504139971919358, "loss_iou": 0.2412109375, "loss_num": 0.05712890625, "loss_xval": 0.76953125, "num_input_tokens_seen": 122041476, "step": 1947 }, { "epoch": 6.482529118136439, "grad_norm": 28.3812198638916, "learning_rate": 5e-06, "loss": 0.5155, "num_input_tokens_seen": 122104280, "step": 1948 }, { "epoch": 6.482529118136439, "loss": 0.5390049815177917, "loss_ce": 3.5391926758165937e-06, "loss_iou": 0.19921875, "loss_num": 0.0283203125, "loss_xval": 0.5390625, "num_input_tokens_seen": 122104280, "step": 1948 }, { "epoch": 6.4858569051580695, "grad_norm": 20.950366973876953, "learning_rate": 5e-06, "loss": 0.6033, "num_input_tokens_seen": 122167244, "step": 1949 }, { "epoch": 6.4858569051580695, "loss": 0.5729150772094727, "loss_ce": 3.912730608135462e-05, "loss_iou": 0.1943359375, "loss_num": 0.037109375, "loss_xval": 0.57421875, "num_input_tokens_seen": 122167244, "step": 1949 }, { "epoch": 6.4891846921797, "grad_norm": 169.8596954345703, "learning_rate": 5e-06, "loss": 0.6967, "num_input_tokens_seen": 122230148, "step": 1950 }, { "epoch": 6.4891846921797, "loss": 0.7146044373512268, "loss_ce": 4.7948406063369475e-06, "loss_iou": 0.2490234375, "loss_num": 0.043212890625, "loss_xval": 0.71484375, "num_input_tokens_seen": 122230148, "step": 1950 }, { "epoch": 6.492512479201331, "grad_norm": 17.167802810668945, "learning_rate": 5e-06, "loss": 0.6851, "num_input_tokens_seen": 122293260, "step": 1951 }, { "epoch": 6.492512479201331, "loss": 0.9411917924880981, "loss_ce": 0.00015173610881902277, "loss_iou": 0.310546875, "loss_num": 0.06396484375, "loss_xval": 0.94140625, "num_input_tokens_seen": 122293260, "step": 1951 }, { "epoch": 6.495840266222961, "grad_norm": 16.642410278320312, "learning_rate": 5e-06, "loss": 0.6711, "num_input_tokens_seen": 122357024, "step": 1952 }, { "epoch": 6.495840266222961, "loss": 0.7117819786071777, "loss_ce": 0.0008444524137303233, "loss_iou": 0.22265625, "loss_num": 0.052978515625, "loss_xval": 0.7109375, "num_input_tokens_seen": 122357024, "step": 1952 }, { "epoch": 6.499168053244592, "grad_norm": 42.91002655029297, "learning_rate": 5e-06, "loss": 0.4998, "num_input_tokens_seen": 122419040, "step": 1953 }, { "epoch": 6.499168053244592, "loss": 0.5494789481163025, "loss_ce": 9.958090231521055e-06, "loss_iou": 0.1201171875, "loss_num": 0.061767578125, "loss_xval": 0.55078125, "num_input_tokens_seen": 122419040, "step": 1953 }, { "epoch": 6.5024958402662225, "grad_norm": 13.140467643737793, "learning_rate": 5e-06, "loss": 0.4957, "num_input_tokens_seen": 122481344, "step": 1954 }, { "epoch": 6.5024958402662225, "loss": 0.5514519810676575, "loss_ce": 0.0006096858996897936, "loss_iou": 0.19140625, "loss_num": 0.033447265625, "loss_xval": 0.55078125, "num_input_tokens_seen": 122481344, "step": 1954 }, { "epoch": 6.505823627287853, "grad_norm": 10.665213584899902, "learning_rate": 5e-06, "loss": 0.4857, "num_input_tokens_seen": 122543032, "step": 1955 }, { "epoch": 6.505823627287853, "loss": 0.5029000639915466, "loss_ce": 0.0010384938213974237, "loss_iou": 0.146484375, "loss_num": 0.041748046875, "loss_xval": 0.5, "num_input_tokens_seen": 122543032, "step": 1955 }, { "epoch": 6.509151414309484, "grad_norm": 13.343578338623047, "learning_rate": 5e-06, "loss": 0.677, "num_input_tokens_seen": 122606856, "step": 1956 }, { "epoch": 6.509151414309484, "loss": 0.6026760935783386, "loss_ce": 1.4985269444878213e-05, "loss_iou": 0.2060546875, "loss_num": 0.037841796875, "loss_xval": 0.6015625, "num_input_tokens_seen": 122606856, "step": 1956 }, { "epoch": 6.512479201331114, "grad_norm": 28.366371154785156, "learning_rate": 5e-06, "loss": 0.6724, "num_input_tokens_seen": 122670248, "step": 1957 }, { "epoch": 6.512479201331114, "loss": 0.7314993143081665, "loss_ce": 5.398355642682873e-05, "loss_iou": 0.28125, "loss_num": 0.033935546875, "loss_xval": 0.73046875, "num_input_tokens_seen": 122670248, "step": 1957 }, { "epoch": 6.515806988352745, "grad_norm": 27.247312545776367, "learning_rate": 5e-06, "loss": 0.7705, "num_input_tokens_seen": 122732860, "step": 1958 }, { "epoch": 6.515806988352745, "loss": 0.8790924549102783, "loss_ce": 3.0824662644590717e-06, "loss_iou": 0.30078125, "loss_num": 0.05517578125, "loss_xval": 0.87890625, "num_input_tokens_seen": 122732860, "step": 1958 }, { "epoch": 6.519134775374376, "grad_norm": 21.155467987060547, "learning_rate": 5e-06, "loss": 0.4622, "num_input_tokens_seen": 122796200, "step": 1959 }, { "epoch": 6.519134775374376, "loss": 0.5186492204666138, "loss_ce": 3.3504758903291076e-05, "loss_iou": 0.216796875, "loss_num": 0.0172119140625, "loss_xval": 0.51953125, "num_input_tokens_seen": 122796200, "step": 1959 }, { "epoch": 6.522462562396006, "grad_norm": 23.27404022216797, "learning_rate": 5e-06, "loss": 0.4168, "num_input_tokens_seen": 122860296, "step": 1960 }, { "epoch": 6.522462562396006, "loss": 0.4012794494628906, "loss_ce": 0.000156403926666826, "loss_iou": 0.1650390625, "loss_num": 0.0142822265625, "loss_xval": 0.400390625, "num_input_tokens_seen": 122860296, "step": 1960 }, { "epoch": 6.525790349417637, "grad_norm": 13.030220985412598, "learning_rate": 5e-06, "loss": 0.3663, "num_input_tokens_seen": 122921044, "step": 1961 }, { "epoch": 6.525790349417637, "loss": 0.3120866119861603, "loss_ce": 0.0003190397401340306, "loss_iou": 0.0625, "loss_num": 0.037353515625, "loss_xval": 0.3125, "num_input_tokens_seen": 122921044, "step": 1961 }, { "epoch": 6.529118136439267, "grad_norm": 10.037192344665527, "learning_rate": 5e-06, "loss": 0.7406, "num_input_tokens_seen": 122984868, "step": 1962 }, { "epoch": 6.529118136439267, "loss": 0.5707440376281738, "loss_ce": 4.282629561203066e-06, "loss_iou": 0.158203125, "loss_num": 0.05126953125, "loss_xval": 0.5703125, "num_input_tokens_seen": 122984868, "step": 1962 }, { "epoch": 6.532445923460898, "grad_norm": 10.589938163757324, "learning_rate": 5e-06, "loss": 0.6566, "num_input_tokens_seen": 123048232, "step": 1963 }, { "epoch": 6.532445923460898, "loss": 0.7610204219818115, "loss_ce": 3.5769035093835555e-06, "loss_iou": 0.271484375, "loss_num": 0.04345703125, "loss_xval": 0.76171875, "num_input_tokens_seen": 123048232, "step": 1963 }, { "epoch": 6.535773710482529, "grad_norm": 8.422849655151367, "learning_rate": 5e-06, "loss": 0.6621, "num_input_tokens_seen": 123111472, "step": 1964 }, { "epoch": 6.535773710482529, "loss": 0.9324227571487427, "loss_ce": 0.0006600393098779023, "loss_iou": 0.359375, "loss_num": 0.04296875, "loss_xval": 0.93359375, "num_input_tokens_seen": 123111472, "step": 1964 }, { "epoch": 6.539101497504159, "grad_norm": 15.22018051147461, "learning_rate": 5e-06, "loss": 0.5645, "num_input_tokens_seen": 123173888, "step": 1965 }, { "epoch": 6.539101497504159, "loss": 0.6508817076683044, "loss_ce": 2.81253505818313e-06, "loss_iou": 0.2216796875, "loss_num": 0.04150390625, "loss_xval": 0.65234375, "num_input_tokens_seen": 123173888, "step": 1965 }, { "epoch": 6.54242928452579, "grad_norm": 24.73088836669922, "learning_rate": 5e-06, "loss": 0.5783, "num_input_tokens_seen": 123238200, "step": 1966 }, { "epoch": 6.54242928452579, "loss": 0.5465837121009827, "loss_ce": 0.00041062149102799594, "loss_iou": 0.212890625, "loss_num": 0.024169921875, "loss_xval": 0.546875, "num_input_tokens_seen": 123238200, "step": 1966 }, { "epoch": 6.5457570715474205, "grad_norm": 12.23395824432373, "learning_rate": 5e-06, "loss": 0.4848, "num_input_tokens_seen": 123300532, "step": 1967 }, { "epoch": 6.5457570715474205, "loss": 0.31552085280418396, "loss_ce": 3.0131395760690793e-05, "loss_iou": 0.08447265625, "loss_num": 0.029296875, "loss_xval": 0.31640625, "num_input_tokens_seen": 123300532, "step": 1967 }, { "epoch": 6.549084858569051, "grad_norm": 9.520037651062012, "learning_rate": 5e-06, "loss": 0.5345, "num_input_tokens_seen": 123363960, "step": 1968 }, { "epoch": 6.549084858569051, "loss": 0.6138197183609009, "loss_ce": 0.00017227133503183722, "loss_iou": 0.255859375, "loss_num": 0.0206298828125, "loss_xval": 0.61328125, "num_input_tokens_seen": 123363960, "step": 1968 }, { "epoch": 6.552412645590682, "grad_norm": 14.548693656921387, "learning_rate": 5e-06, "loss": 0.7731, "num_input_tokens_seen": 123425812, "step": 1969 }, { "epoch": 6.552412645590682, "loss": 0.6923923492431641, "loss_ce": 9.576291631674394e-06, "loss_iou": 0.2578125, "loss_num": 0.03515625, "loss_xval": 0.69140625, "num_input_tokens_seen": 123425812, "step": 1969 }, { "epoch": 6.555740432612312, "grad_norm": 8.019848823547363, "learning_rate": 5e-06, "loss": 0.6374, "num_input_tokens_seen": 123489716, "step": 1970 }, { "epoch": 6.555740432612312, "loss": 0.8213018178939819, "loss_ce": 0.0011114223161712289, "loss_iou": 0.30859375, "loss_num": 0.040771484375, "loss_xval": 0.8203125, "num_input_tokens_seen": 123489716, "step": 1970 }, { "epoch": 6.559068219633943, "grad_norm": 16.91303825378418, "learning_rate": 5e-06, "loss": 0.7287, "num_input_tokens_seen": 123552712, "step": 1971 }, { "epoch": 6.559068219633943, "loss": 1.0515234470367432, "loss_ce": 9.81136690825224e-06, "loss_iou": 0.390625, "loss_num": 0.05419921875, "loss_xval": 1.0546875, "num_input_tokens_seen": 123552712, "step": 1971 }, { "epoch": 6.5623960066555735, "grad_norm": 21.37618637084961, "learning_rate": 5e-06, "loss": 0.6965, "num_input_tokens_seen": 123617200, "step": 1972 }, { "epoch": 6.5623960066555735, "loss": 0.7568812370300293, "loss_ce": 0.0005336235626600683, "loss_iou": 0.296875, "loss_num": 0.03271484375, "loss_xval": 0.7578125, "num_input_tokens_seen": 123617200, "step": 1972 }, { "epoch": 6.565723793677205, "grad_norm": 22.937864303588867, "learning_rate": 5e-06, "loss": 0.5355, "num_input_tokens_seen": 123680176, "step": 1973 }, { "epoch": 6.565723793677205, "loss": 0.5937581658363342, "loss_ce": 8.172046364052221e-06, "loss_iou": 0.1953125, "loss_num": 0.04052734375, "loss_xval": 0.59375, "num_input_tokens_seen": 123680176, "step": 1973 }, { "epoch": 6.569051580698836, "grad_norm": 24.594566345214844, "learning_rate": 5e-06, "loss": 0.3389, "num_input_tokens_seen": 123743100, "step": 1974 }, { "epoch": 6.569051580698836, "loss": 0.30621254444122314, "loss_ce": 0.0013419582974165678, "loss_iou": 0.09765625, "loss_num": 0.02197265625, "loss_xval": 0.3046875, "num_input_tokens_seen": 123743100, "step": 1974 }, { "epoch": 6.572379367720466, "grad_norm": 25.608503341674805, "learning_rate": 5e-06, "loss": 0.8212, "num_input_tokens_seen": 123805448, "step": 1975 }, { "epoch": 6.572379367720466, "loss": 0.7135950922966003, "loss_ce": 0.00012463887105695903, "loss_iou": 0.216796875, "loss_num": 0.055908203125, "loss_xval": 0.71484375, "num_input_tokens_seen": 123805448, "step": 1975 }, { "epoch": 6.575707154742097, "grad_norm": 24.193180084228516, "learning_rate": 5e-06, "loss": 0.6803, "num_input_tokens_seen": 123869248, "step": 1976 }, { "epoch": 6.575707154742097, "loss": 0.6412417888641357, "loss_ce": 6.492140528280288e-06, "loss_iou": 0.2294921875, "loss_num": 0.03662109375, "loss_xval": 0.640625, "num_input_tokens_seen": 123869248, "step": 1976 }, { "epoch": 6.5790349417637275, "grad_norm": 10.406999588012695, "learning_rate": 5e-06, "loss": 0.4479, "num_input_tokens_seen": 123928860, "step": 1977 }, { "epoch": 6.5790349417637275, "loss": 0.41016751527786255, "loss_ce": 1.1262142834311817e-05, "loss_iou": 0.1064453125, "loss_num": 0.039306640625, "loss_xval": 0.41015625, "num_input_tokens_seen": 123928860, "step": 1977 }, { "epoch": 6.582362728785358, "grad_norm": 8.002711296081543, "learning_rate": 5e-06, "loss": 0.567, "num_input_tokens_seen": 123990632, "step": 1978 }, { "epoch": 6.582362728785358, "loss": 0.494340181350708, "loss_ce": 0.0027019870467483997, "loss_iou": 0.1455078125, "loss_num": 0.0400390625, "loss_xval": 0.4921875, "num_input_tokens_seen": 123990632, "step": 1978 }, { "epoch": 6.585690515806989, "grad_norm": 16.56126594543457, "learning_rate": 5e-06, "loss": 0.5104, "num_input_tokens_seen": 124054400, "step": 1979 }, { "epoch": 6.585690515806989, "loss": 0.6907404065132141, "loss_ce": 0.0001886559184640646, "loss_iou": 0.232421875, "loss_num": 0.045166015625, "loss_xval": 0.69140625, "num_input_tokens_seen": 124054400, "step": 1979 }, { "epoch": 6.589018302828619, "grad_norm": 25.32575035095215, "learning_rate": 5e-06, "loss": 0.9261, "num_input_tokens_seen": 124116464, "step": 1980 }, { "epoch": 6.589018302828619, "loss": 0.9043534994125366, "loss_ce": 0.0009415812091901898, "loss_iou": 0.3515625, "loss_num": 0.0400390625, "loss_xval": 0.90234375, "num_input_tokens_seen": 124116464, "step": 1980 }, { "epoch": 6.59234608985025, "grad_norm": 16.51534080505371, "learning_rate": 5e-06, "loss": 0.4392, "num_input_tokens_seen": 124178408, "step": 1981 }, { "epoch": 6.59234608985025, "loss": 0.41714999079704285, "loss_ce": 3.5727938666241243e-05, "loss_iou": 0.140625, "loss_num": 0.027099609375, "loss_xval": 0.41796875, "num_input_tokens_seen": 124178408, "step": 1981 }, { "epoch": 6.595673876871881, "grad_norm": 7.1235480308532715, "learning_rate": 5e-06, "loss": 0.5644, "num_input_tokens_seen": 124240520, "step": 1982 }, { "epoch": 6.595673876871881, "loss": 0.4954923391342163, "loss_ce": 8.92287880560616e-06, "loss_iou": 0.171875, "loss_num": 0.0303955078125, "loss_xval": 0.49609375, "num_input_tokens_seen": 124240520, "step": 1982 }, { "epoch": 6.599001663893511, "grad_norm": 24.982694625854492, "learning_rate": 5e-06, "loss": 0.6272, "num_input_tokens_seen": 124302512, "step": 1983 }, { "epoch": 6.599001663893511, "loss": 0.5436439514160156, "loss_ce": 0.00043108168756589293, "loss_iou": 0.1708984375, "loss_num": 0.04052734375, "loss_xval": 0.54296875, "num_input_tokens_seen": 124302512, "step": 1983 }, { "epoch": 6.602329450915142, "grad_norm": 33.017826080322266, "learning_rate": 5e-06, "loss": 0.3919, "num_input_tokens_seen": 124362800, "step": 1984 }, { "epoch": 6.602329450915142, "loss": 0.46781930327415466, "loss_ce": 0.00047312505193986, "loss_iou": 0.1513671875, "loss_num": 0.03271484375, "loss_xval": 0.466796875, "num_input_tokens_seen": 124362800, "step": 1984 }, { "epoch": 6.605657237936772, "grad_norm": 22.29727554321289, "learning_rate": 5e-06, "loss": 0.7233, "num_input_tokens_seen": 124426224, "step": 1985 }, { "epoch": 6.605657237936772, "loss": 0.5487404465675354, "loss_ce": 0.00015649232955183834, "loss_iou": 0.228515625, "loss_num": 0.0181884765625, "loss_xval": 0.546875, "num_input_tokens_seen": 124426224, "step": 1985 }, { "epoch": 6.608985024958403, "grad_norm": 20.616010665893555, "learning_rate": 5e-06, "loss": 0.672, "num_input_tokens_seen": 124488524, "step": 1986 }, { "epoch": 6.608985024958403, "loss": 0.8039915561676025, "loss_ce": 3.646014738478698e-05, "loss_iou": 0.29296875, "loss_num": 0.0439453125, "loss_xval": 0.8046875, "num_input_tokens_seen": 124488524, "step": 1986 }, { "epoch": 6.612312811980034, "grad_norm": 23.763381958007812, "learning_rate": 5e-06, "loss": 0.3352, "num_input_tokens_seen": 124550424, "step": 1987 }, { "epoch": 6.612312811980034, "loss": 0.43114519119262695, "loss_ce": 0.0008473432390019298, "loss_iou": 0.16796875, "loss_num": 0.018798828125, "loss_xval": 0.4296875, "num_input_tokens_seen": 124550424, "step": 1987 }, { "epoch": 6.615640599001664, "grad_norm": 27.879549026489258, "learning_rate": 5e-06, "loss": 0.4624, "num_input_tokens_seen": 124613560, "step": 1988 }, { "epoch": 6.615640599001664, "loss": 0.37632501125335693, "loss_ce": 0.00022637727670371532, "loss_iou": 0.1083984375, "loss_num": 0.03173828125, "loss_xval": 0.376953125, "num_input_tokens_seen": 124613560, "step": 1988 }, { "epoch": 6.618968386023295, "grad_norm": 10.46766471862793, "learning_rate": 5e-06, "loss": 0.7217, "num_input_tokens_seen": 124675324, "step": 1989 }, { "epoch": 6.618968386023295, "loss": 0.9369086623191833, "loss_ce": 0.0006293991464190185, "loss_iou": 0.330078125, "loss_num": 0.054931640625, "loss_xval": 0.9375, "num_input_tokens_seen": 124675324, "step": 1989 }, { "epoch": 6.6222961730449255, "grad_norm": 9.375749588012695, "learning_rate": 5e-06, "loss": 0.5045, "num_input_tokens_seen": 124737360, "step": 1990 }, { "epoch": 6.6222961730449255, "loss": 0.5972847938537598, "loss_ce": 0.00011687100777635351, "loss_iou": 0.15625, "loss_num": 0.05712890625, "loss_xval": 0.59765625, "num_input_tokens_seen": 124737360, "step": 1990 }, { "epoch": 6.625623960066556, "grad_norm": 11.062143325805664, "learning_rate": 5e-06, "loss": 0.6695, "num_input_tokens_seen": 124800240, "step": 1991 }, { "epoch": 6.625623960066556, "loss": 0.7425123453140259, "loss_ce": 0.00014171643124427646, "loss_iou": 0.259765625, "loss_num": 0.044677734375, "loss_xval": 0.7421875, "num_input_tokens_seen": 124800240, "step": 1991 }, { "epoch": 6.628951747088187, "grad_norm": 20.793167114257812, "learning_rate": 5e-06, "loss": 0.716, "num_input_tokens_seen": 124862880, "step": 1992 }, { "epoch": 6.628951747088187, "loss": 0.7573298215866089, "loss_ce": 0.0008600892615504563, "loss_iou": 0.3125, "loss_num": 0.0260009765625, "loss_xval": 0.7578125, "num_input_tokens_seen": 124862880, "step": 1992 }, { "epoch": 6.632279534109817, "grad_norm": 14.1843843460083, "learning_rate": 5e-06, "loss": 0.7859, "num_input_tokens_seen": 124926248, "step": 1993 }, { "epoch": 6.632279534109817, "loss": 0.7015025615692139, "loss_ce": 0.0005137378466315567, "loss_iou": 0.24609375, "loss_num": 0.0419921875, "loss_xval": 0.69921875, "num_input_tokens_seen": 124926248, "step": 1993 }, { "epoch": 6.635607321131448, "grad_norm": 11.194124221801758, "learning_rate": 5e-06, "loss": 0.7268, "num_input_tokens_seen": 124988676, "step": 1994 }, { "epoch": 6.635607321131448, "loss": 0.6264942288398743, "loss_ce": 0.00015145396173465997, "loss_iou": 0.2275390625, "loss_num": 0.034423828125, "loss_xval": 0.625, "num_input_tokens_seen": 124988676, "step": 1994 }, { "epoch": 6.6389351081530785, "grad_norm": 25.166015625, "learning_rate": 5e-06, "loss": 0.8144, "num_input_tokens_seen": 125052608, "step": 1995 }, { "epoch": 6.6389351081530785, "loss": 0.6432565450668335, "loss_ce": 0.0005563285085372627, "loss_iou": 0.26953125, "loss_num": 0.0211181640625, "loss_xval": 0.64453125, "num_input_tokens_seen": 125052608, "step": 1995 }, { "epoch": 6.642262895174709, "grad_norm": 13.27176284790039, "learning_rate": 5e-06, "loss": 0.5501, "num_input_tokens_seen": 125115068, "step": 1996 }, { "epoch": 6.642262895174709, "loss": 0.636357307434082, "loss_ce": 4.755427653435618e-06, "loss_iou": 0.236328125, "loss_num": 0.03271484375, "loss_xval": 0.63671875, "num_input_tokens_seen": 125115068, "step": 1996 }, { "epoch": 6.64559068219634, "grad_norm": 9.809295654296875, "learning_rate": 5e-06, "loss": 0.4838, "num_input_tokens_seen": 125178836, "step": 1997 }, { "epoch": 6.64559068219634, "loss": 0.4671699106693268, "loss_ce": 6.814506832597544e-06, "loss_iou": 0.17578125, "loss_num": 0.023193359375, "loss_xval": 0.466796875, "num_input_tokens_seen": 125178836, "step": 1997 }, { "epoch": 6.64891846921797, "grad_norm": 12.373933792114258, "learning_rate": 5e-06, "loss": 0.6771, "num_input_tokens_seen": 125240156, "step": 1998 }, { "epoch": 6.64891846921797, "loss": 0.8025074005126953, "loss_ce": 0.000261329987552017, "loss_iou": 0.3125, "loss_num": 0.03564453125, "loss_xval": 0.80078125, "num_input_tokens_seen": 125240156, "step": 1998 }, { "epoch": 6.652246256239601, "grad_norm": 9.41878890991211, "learning_rate": 5e-06, "loss": 0.3868, "num_input_tokens_seen": 125301936, "step": 1999 }, { "epoch": 6.652246256239601, "loss": 0.34799274802207947, "loss_ce": 0.0012367584276944399, "loss_iou": 0.1044921875, "loss_num": 0.0277099609375, "loss_xval": 0.34765625, "num_input_tokens_seen": 125301936, "step": 1999 }, { "epoch": 6.655574043261232, "grad_norm": 14.496203422546387, "learning_rate": 5e-06, "loss": 0.5779, "num_input_tokens_seen": 125362816, "step": 2000 }, { "epoch": 6.655574043261232, "eval_seeclick_CIoU": 0.049663716927170753, "eval_seeclick_GIoU": 0.06374245136976242, "eval_seeclick_IoU": 0.16034941375255585, "eval_seeclick_MAE_all": 0.17226766049861908, "eval_seeclick_MAE_h": 0.06194095313549042, "eval_seeclick_MAE_w": 0.14316179975867271, "eval_seeclick_MAE_x_boxes": 0.1952127441763878, "eval_seeclick_MAE_y_boxes": 0.17947708815336227, "eval_seeclick_NUM_probability": 0.9999348521232605, "eval_seeclick_inside_bbox": 0.22500000149011612, "eval_seeclick_loss": 2.8665401935577393, "eval_seeclick_loss_ce": 0.13720494508743286, "eval_seeclick_loss_iou": 0.944091796875, "eval_seeclick_loss_num": 0.1699981689453125, "eval_seeclick_loss_xval": 2.73779296875, "eval_seeclick_runtime": 67.6924, "eval_seeclick_samples_per_second": 0.694, "eval_seeclick_steps_per_second": 0.03, "num_input_tokens_seen": 125362816, "step": 2000 }, { "epoch": 6.655574043261232, "eval_icons_CIoU": -0.025080785155296326, "eval_icons_GIoU": 0.060260893777012825, "eval_icons_IoU": 0.1339002624154091, "eval_icons_MAE_all": 0.1575927510857582, "eval_icons_MAE_h": 0.11913510411977768, "eval_icons_MAE_w": 0.1782698780298233, "eval_icons_MAE_x_boxes": 0.11928394436836243, "eval_icons_MAE_y_boxes": 0.05949154309928417, "eval_icons_NUM_probability": 0.999984860420227, "eval_icons_inside_bbox": 0.3194444477558136, "eval_icons_loss": 2.695627212524414, "eval_icons_loss_ce": 2.2335291305353167e-06, "eval_icons_loss_iou": 0.9580078125, "eval_icons_loss_num": 0.1633148193359375, "eval_icons_loss_xval": 2.73291015625, "eval_icons_runtime": 64.762, "eval_icons_samples_per_second": 0.772, "eval_icons_steps_per_second": 0.031, "num_input_tokens_seen": 125362816, "step": 2000 }, { "epoch": 6.655574043261232, "eval_screenspot_CIoU": 0.15519791344801584, "eval_screenspot_GIoU": 0.18316218753655752, "eval_screenspot_IoU": 0.2752463718255361, "eval_screenspot_MAE_all": 0.12586518128712973, "eval_screenspot_MAE_h": 0.06649150823553403, "eval_screenspot_MAE_w": 0.11607248336076736, "eval_screenspot_MAE_x_boxes": 0.15282956510782242, "eval_screenspot_MAE_y_boxes": 0.10026555508375168, "eval_screenspot_NUM_probability": 0.9999839663505554, "eval_screenspot_inside_bbox": 0.4662500023841858, "eval_screenspot_loss": 2.317641496658325, "eval_screenspot_loss_ce": 4.679763090583341e-05, "eval_screenspot_loss_iou": 0.84130859375, "eval_screenspot_loss_num": 0.13783772786458334, "eval_screenspot_loss_xval": 2.3727213541666665, "eval_screenspot_runtime": 118.2468, "eval_screenspot_samples_per_second": 0.753, "eval_screenspot_steps_per_second": 0.025, "num_input_tokens_seen": 125362816, "step": 2000 }, { "epoch": 6.655574043261232, "eval_compot_CIoU": -0.0015120906755328178, "eval_compot_GIoU": 0.05934638902544975, "eval_compot_IoU": 0.16132647544145584, "eval_compot_MAE_all": 0.18283969908952713, "eval_compot_MAE_h": 0.07420476526021957, "eval_compot_MAE_w": 0.2161550223827362, "eval_compot_MAE_x_boxes": 0.1663825437426567, "eval_compot_MAE_y_boxes": 0.13507302105426788, "eval_compot_NUM_probability": 0.9999814033508301, "eval_compot_inside_bbox": 0.3263888955116272, "eval_compot_loss": 2.8217058181762695, "eval_compot_loss_ce": 0.003316763788461685, "eval_compot_loss_iou": 0.955322265625, "eval_compot_loss_num": 0.19614028930664062, "eval_compot_loss_xval": 2.8935546875, "eval_compot_runtime": 73.1289, "eval_compot_samples_per_second": 0.684, "eval_compot_steps_per_second": 0.027, "num_input_tokens_seen": 125362816, "step": 2000 }, { "epoch": 6.655574043261232, "eval_custom_ui_MAE_all": 0.07874628901481628, "eval_custom_ui_MAE_x": 0.0833187997341156, "eval_custom_ui_MAE_y": 0.07417377084493637, "eval_custom_ui_NUM_probability": 0.9999963641166687, "eval_custom_ui_loss": 0.36914271116256714, "eval_custom_ui_loss_ce": 1.776359056293586e-06, "eval_custom_ui_loss_num": 0.074951171875, "eval_custom_ui_loss_xval": 0.37506103515625, "eval_custom_ui_runtime": 50.8936, "eval_custom_ui_samples_per_second": 0.982, "eval_custom_ui_steps_per_second": 0.039, "num_input_tokens_seen": 125362816, "step": 2000 }, { "epoch": 6.655574043261232, "loss": 0.3878188133239746, "loss_ce": 1.4133303238850203e-06, "loss_iou": 0.0, "loss_num": 0.07763671875, "loss_xval": 0.388671875, "num_input_tokens_seen": 125362816, "step": 2000 }, { "epoch": 6.658901830282862, "grad_norm": 14.186056137084961, "learning_rate": 5e-06, "loss": 0.5285, "num_input_tokens_seen": 125425272, "step": 2001 }, { "epoch": 6.658901830282862, "loss": 0.5756962299346924, "loss_ce": 1.261890065507032e-05, "loss_iou": 0.1767578125, "loss_num": 0.04443359375, "loss_xval": 0.57421875, "num_input_tokens_seen": 125425272, "step": 2001 }, { "epoch": 6.662229617304493, "grad_norm": 10.43306827545166, "learning_rate": 5e-06, "loss": 0.4228, "num_input_tokens_seen": 125487528, "step": 2002 }, { "epoch": 6.662229617304493, "loss": 0.25102782249450684, "loss_ce": 0.00035644686431623995, "loss_iou": 0.0732421875, "loss_num": 0.02099609375, "loss_xval": 0.25, "num_input_tokens_seen": 125487528, "step": 2002 }, { "epoch": 6.665557404326123, "grad_norm": 20.973928451538086, "learning_rate": 5e-06, "loss": 0.5455, "num_input_tokens_seen": 125549488, "step": 2003 }, { "epoch": 6.665557404326123, "loss": 0.5683923363685608, "loss_ce": 3.297496004961431e-05, "loss_iou": 0.1904296875, "loss_num": 0.03759765625, "loss_xval": 0.5703125, "num_input_tokens_seen": 125549488, "step": 2003 }, { "epoch": 6.668885191347754, "grad_norm": 29.850643157958984, "learning_rate": 5e-06, "loss": 0.5505, "num_input_tokens_seen": 125609572, "step": 2004 }, { "epoch": 6.668885191347754, "loss": 0.5029670000076294, "loss_ce": 3.7360387068474665e-05, "loss_iou": 0.1611328125, "loss_num": 0.0361328125, "loss_xval": 0.50390625, "num_input_tokens_seen": 125609572, "step": 2004 }, { "epoch": 6.672212978369385, "grad_norm": 21.895559310913086, "learning_rate": 5e-06, "loss": 0.5828, "num_input_tokens_seen": 125671396, "step": 2005 }, { "epoch": 6.672212978369385, "loss": 0.485055536031723, "loss_ce": 0.0002532901708036661, "loss_iou": 0.1484375, "loss_num": 0.03759765625, "loss_xval": 0.484375, "num_input_tokens_seen": 125671396, "step": 2005 }, { "epoch": 6.675540765391015, "grad_norm": 18.204973220825195, "learning_rate": 5e-06, "loss": 0.5182, "num_input_tokens_seen": 125734056, "step": 2006 }, { "epoch": 6.675540765391015, "loss": 0.5263726711273193, "loss_ce": 5.517941190191777e-06, "loss_iou": 0.2275390625, "loss_num": 0.01422119140625, "loss_xval": 0.52734375, "num_input_tokens_seen": 125734056, "step": 2006 }, { "epoch": 6.678868552412646, "grad_norm": 18.863664627075195, "learning_rate": 5e-06, "loss": 0.75, "num_input_tokens_seen": 125797056, "step": 2007 }, { "epoch": 6.678868552412646, "loss": 0.4690133333206177, "loss_ce": 1.9206974684493616e-05, "loss_iou": 0.1328125, "loss_num": 0.04052734375, "loss_xval": 0.46875, "num_input_tokens_seen": 125797056, "step": 2007 }, { "epoch": 6.6821963394342765, "grad_norm": 18.244375228881836, "learning_rate": 5e-06, "loss": 0.6438, "num_input_tokens_seen": 125860240, "step": 2008 }, { "epoch": 6.6821963394342765, "loss": 0.4698897898197174, "loss_ce": 2.9896518753957935e-06, "loss_iou": 0.1748046875, "loss_num": 0.0240478515625, "loss_xval": 0.470703125, "num_input_tokens_seen": 125860240, "step": 2008 }, { "epoch": 6.685524126455907, "grad_norm": 9.29032039642334, "learning_rate": 5e-06, "loss": 0.7089, "num_input_tokens_seen": 125923096, "step": 2009 }, { "epoch": 6.685524126455907, "loss": 0.7341325283050537, "loss_ce": 1.6860747109603835e-06, "loss_iou": 0.26953125, "loss_num": 0.038818359375, "loss_xval": 0.734375, "num_input_tokens_seen": 125923096, "step": 2009 }, { "epoch": 6.688851913477538, "grad_norm": 8.804953575134277, "learning_rate": 5e-06, "loss": 0.5586, "num_input_tokens_seen": 125986016, "step": 2010 }, { "epoch": 6.688851913477538, "loss": 0.7141602635383606, "loss_ce": 0.0003541393089108169, "loss_iou": 0.291015625, "loss_num": 0.0267333984375, "loss_xval": 0.71484375, "num_input_tokens_seen": 125986016, "step": 2010 }, { "epoch": 6.692179700499168, "grad_norm": 10.328939437866211, "learning_rate": 5e-06, "loss": 0.6132, "num_input_tokens_seen": 126048944, "step": 2011 }, { "epoch": 6.692179700499168, "loss": 0.4991012215614319, "loss_ce": 0.0012984691420570016, "loss_iou": 0.2021484375, "loss_num": 0.0189208984375, "loss_xval": 0.498046875, "num_input_tokens_seen": 126048944, "step": 2011 }, { "epoch": 6.695507487520799, "grad_norm": 14.701022148132324, "learning_rate": 5e-06, "loss": 0.3903, "num_input_tokens_seen": 126108936, "step": 2012 }, { "epoch": 6.695507487520799, "loss": 0.41955673694610596, "loss_ce": 0.0006114620482549071, "loss_iou": 0.11279296875, "loss_num": 0.03857421875, "loss_xval": 0.41796875, "num_input_tokens_seen": 126108936, "step": 2012 }, { "epoch": 6.6988352745424296, "grad_norm": 6.501366138458252, "learning_rate": 5e-06, "loss": 0.7083, "num_input_tokens_seen": 126171720, "step": 2013 }, { "epoch": 6.6988352745424296, "loss": 0.5719038248062134, "loss_ce": 4.408991571835941e-06, "loss_iou": 0.2080078125, "loss_num": 0.03125, "loss_xval": 0.5703125, "num_input_tokens_seen": 126171720, "step": 2013 }, { "epoch": 6.70216306156406, "grad_norm": 14.552860260009766, "learning_rate": 5e-06, "loss": 0.6624, "num_input_tokens_seen": 126234992, "step": 2014 }, { "epoch": 6.70216306156406, "loss": 0.7105560898780823, "loss_ce": 0.00047308087232522666, "loss_iou": 0.255859375, "loss_num": 0.039794921875, "loss_xval": 0.7109375, "num_input_tokens_seen": 126234992, "step": 2014 }, { "epoch": 6.705490848585691, "grad_norm": 8.832845687866211, "learning_rate": 5e-06, "loss": 0.3492, "num_input_tokens_seen": 126296556, "step": 2015 }, { "epoch": 6.705490848585691, "loss": 0.27500104904174805, "loss_ce": 0.002540139015763998, "loss_iou": 0.052001953125, "loss_num": 0.03369140625, "loss_xval": 0.2734375, "num_input_tokens_seen": 126296556, "step": 2015 }, { "epoch": 6.708818635607321, "grad_norm": 27.097858428955078, "learning_rate": 5e-06, "loss": 0.7891, "num_input_tokens_seen": 126359288, "step": 2016 }, { "epoch": 6.708818635607321, "loss": 0.8859854340553284, "loss_ce": 0.001830164808779955, "loss_iou": 0.298828125, "loss_num": 0.057861328125, "loss_xval": 0.8828125, "num_input_tokens_seen": 126359288, "step": 2016 }, { "epoch": 6.712146422628952, "grad_norm": 25.380584716796875, "learning_rate": 5e-06, "loss": 0.5383, "num_input_tokens_seen": 126420024, "step": 2017 }, { "epoch": 6.712146422628952, "loss": 0.47374069690704346, "loss_ce": 0.00010790046508191153, "loss_iou": 0.1611328125, "loss_num": 0.0301513671875, "loss_xval": 0.47265625, "num_input_tokens_seen": 126420024, "step": 2017 }, { "epoch": 6.715474209650583, "grad_norm": 23.486085891723633, "learning_rate": 5e-06, "loss": 0.6775, "num_input_tokens_seen": 126483000, "step": 2018 }, { "epoch": 6.715474209650583, "loss": 0.5473673343658447, "loss_ce": 4.071555849805009e-06, "loss_iou": 0.2060546875, "loss_num": 0.0269775390625, "loss_xval": 0.546875, "num_input_tokens_seen": 126483000, "step": 2018 }, { "epoch": 6.718801996672213, "grad_norm": 44.5804328918457, "learning_rate": 5e-06, "loss": 0.661, "num_input_tokens_seen": 126546428, "step": 2019 }, { "epoch": 6.718801996672213, "loss": 0.6915466785430908, "loss_ce": 1.8373744751443155e-05, "loss_iou": 0.283203125, "loss_num": 0.0252685546875, "loss_xval": 0.69140625, "num_input_tokens_seen": 126546428, "step": 2019 }, { "epoch": 6.722129783693844, "grad_norm": 37.45322036743164, "learning_rate": 5e-06, "loss": 0.7419, "num_input_tokens_seen": 126611048, "step": 2020 }, { "epoch": 6.722129783693844, "loss": 0.6283579468727112, "loss_ce": 0.00018413460929878056, "loss_iou": 0.26171875, "loss_num": 0.02099609375, "loss_xval": 0.62890625, "num_input_tokens_seen": 126611048, "step": 2020 }, { "epoch": 6.7254575707154745, "grad_norm": 22.551172256469727, "learning_rate": 5e-06, "loss": 0.7573, "num_input_tokens_seen": 126675088, "step": 2021 }, { "epoch": 6.7254575707154745, "loss": 0.6667607426643372, "loss_ce": 1.2725381566269789e-05, "loss_iou": 0.240234375, "loss_num": 0.036865234375, "loss_xval": 0.66796875, "num_input_tokens_seen": 126675088, "step": 2021 }, { "epoch": 6.728785357737105, "grad_norm": 26.919328689575195, "learning_rate": 5e-06, "loss": 0.6641, "num_input_tokens_seen": 126738020, "step": 2022 }, { "epoch": 6.728785357737105, "loss": 0.8174043893814087, "loss_ce": 2.1600082618533634e-05, "loss_iou": 0.32421875, "loss_num": 0.03369140625, "loss_xval": 0.81640625, "num_input_tokens_seen": 126738020, "step": 2022 }, { "epoch": 6.732113144758736, "grad_norm": 14.623039245605469, "learning_rate": 5e-06, "loss": 0.5223, "num_input_tokens_seen": 126800740, "step": 2023 }, { "epoch": 6.732113144758736, "loss": 0.48008039593696594, "loss_ce": 0.0005882148398086429, "loss_iou": 0.1201171875, "loss_num": 0.0478515625, "loss_xval": 0.48046875, "num_input_tokens_seen": 126800740, "step": 2023 }, { "epoch": 6.735440931780366, "grad_norm": 17.909788131713867, "learning_rate": 5e-06, "loss": 0.7749, "num_input_tokens_seen": 126864348, "step": 2024 }, { "epoch": 6.735440931780366, "loss": 0.7843947410583496, "loss_ce": 0.00045923038851469755, "loss_iou": 0.3046875, "loss_num": 0.03466796875, "loss_xval": 0.78515625, "num_input_tokens_seen": 126864348, "step": 2024 }, { "epoch": 6.738768718801997, "grad_norm": 7.9001336097717285, "learning_rate": 5e-06, "loss": 0.3627, "num_input_tokens_seen": 126925644, "step": 2025 }, { "epoch": 6.738768718801997, "loss": 0.4097365140914917, "loss_ce": 0.0004347754002083093, "loss_iou": 0.1572265625, "loss_num": 0.0189208984375, "loss_xval": 0.41015625, "num_input_tokens_seen": 126925644, "step": 2025 }, { "epoch": 6.7420965058236275, "grad_norm": 14.718302726745605, "learning_rate": 5e-06, "loss": 0.8282, "num_input_tokens_seen": 126989728, "step": 2026 }, { "epoch": 6.7420965058236275, "loss": 0.7629995346069336, "loss_ce": 0.0007924530073069036, "loss_iou": 0.298828125, "loss_num": 0.033203125, "loss_xval": 0.76171875, "num_input_tokens_seen": 126989728, "step": 2026 }, { "epoch": 6.745424292845258, "grad_norm": 24.248943328857422, "learning_rate": 5e-06, "loss": 0.6663, "num_input_tokens_seen": 127052264, "step": 2027 }, { "epoch": 6.745424292845258, "loss": 0.7646561861038208, "loss_ce": 7.78716457716655e-06, "loss_iou": 0.2412109375, "loss_num": 0.05615234375, "loss_xval": 0.765625, "num_input_tokens_seen": 127052264, "step": 2027 }, { "epoch": 6.748752079866889, "grad_norm": 20.369253158569336, "learning_rate": 5e-06, "loss": 0.5408, "num_input_tokens_seen": 127113320, "step": 2028 }, { "epoch": 6.748752079866889, "loss": 0.5985287427902222, "loss_ce": 1.7999178453464992e-05, "loss_iou": 0.20703125, "loss_num": 0.037109375, "loss_xval": 0.59765625, "num_input_tokens_seen": 127113320, "step": 2028 }, { "epoch": 6.752079866888519, "grad_norm": 10.595885276794434, "learning_rate": 5e-06, "loss": 0.5684, "num_input_tokens_seen": 127175768, "step": 2029 }, { "epoch": 6.752079866888519, "loss": 0.5575874447822571, "loss_ce": 3.1296531233238056e-05, "loss_iou": 0.20703125, "loss_num": 0.02880859375, "loss_xval": 0.55859375, "num_input_tokens_seen": 127175768, "step": 2029 }, { "epoch": 6.75540765391015, "grad_norm": 9.724506378173828, "learning_rate": 5e-06, "loss": 0.6905, "num_input_tokens_seen": 127239488, "step": 2030 }, { "epoch": 6.75540765391015, "loss": 0.4640352427959442, "loss_ce": 0.0001680646528257057, "loss_iou": 0.16796875, "loss_num": 0.0255126953125, "loss_xval": 0.46484375, "num_input_tokens_seen": 127239488, "step": 2030 }, { "epoch": 6.758735440931781, "grad_norm": 14.413116455078125, "learning_rate": 5e-06, "loss": 0.7187, "num_input_tokens_seen": 127302644, "step": 2031 }, { "epoch": 6.758735440931781, "loss": 0.7189881801605225, "loss_ce": 0.0006044059991836548, "loss_iou": 0.259765625, "loss_num": 0.039794921875, "loss_xval": 0.71875, "num_input_tokens_seen": 127302644, "step": 2031 }, { "epoch": 6.762063227953411, "grad_norm": 12.814227104187012, "learning_rate": 5e-06, "loss": 0.5472, "num_input_tokens_seen": 127365888, "step": 2032 }, { "epoch": 6.762063227953411, "loss": 0.5584053993225098, "loss_ce": 0.000544054782949388, "loss_iou": 0.2001953125, "loss_num": 0.031494140625, "loss_xval": 0.55859375, "num_input_tokens_seen": 127365888, "step": 2032 }, { "epoch": 6.765391014975042, "grad_norm": 10.193601608276367, "learning_rate": 5e-06, "loss": 0.6058, "num_input_tokens_seen": 127428156, "step": 2033 }, { "epoch": 6.765391014975042, "loss": 0.7717403173446655, "loss_ce": 1.1823009117506444e-05, "loss_iou": 0.298828125, "loss_num": 0.034423828125, "loss_xval": 0.7734375, "num_input_tokens_seen": 127428156, "step": 2033 }, { "epoch": 6.768718801996672, "grad_norm": 16.16275405883789, "learning_rate": 5e-06, "loss": 0.5138, "num_input_tokens_seen": 127491248, "step": 2034 }, { "epoch": 6.768718801996672, "loss": 0.37994009256362915, "loss_ce": 3.902232947439188e-06, "loss_iou": 0.1279296875, "loss_num": 0.0247802734375, "loss_xval": 0.380859375, "num_input_tokens_seen": 127491248, "step": 2034 }, { "epoch": 6.772046589018303, "grad_norm": 11.477570533752441, "learning_rate": 5e-06, "loss": 0.6085, "num_input_tokens_seen": 127555728, "step": 2035 }, { "epoch": 6.772046589018303, "loss": 0.4243267774581909, "loss_ce": 1.0379582818131894e-05, "loss_iou": 0.123046875, "loss_num": 0.03564453125, "loss_xval": 0.423828125, "num_input_tokens_seen": 127555728, "step": 2035 }, { "epoch": 6.775374376039934, "grad_norm": 21.08380126953125, "learning_rate": 5e-06, "loss": 0.6649, "num_input_tokens_seen": 127618872, "step": 2036 }, { "epoch": 6.775374376039934, "loss": 0.7904080152511597, "loss_ce": 2.750139628915349e-06, "loss_iou": 0.31640625, "loss_num": 0.031982421875, "loss_xval": 0.7890625, "num_input_tokens_seen": 127618872, "step": 2036 }, { "epoch": 6.778702163061564, "grad_norm": 12.41211986541748, "learning_rate": 5e-06, "loss": 0.5988, "num_input_tokens_seen": 127683040, "step": 2037 }, { "epoch": 6.778702163061564, "loss": 0.7977491021156311, "loss_ce": 0.00136237358674407, "loss_iou": 0.3125, "loss_num": 0.034423828125, "loss_xval": 0.796875, "num_input_tokens_seen": 127683040, "step": 2037 }, { "epoch": 6.782029950083195, "grad_norm": 30.21271514892578, "learning_rate": 5e-06, "loss": 0.6923, "num_input_tokens_seen": 127747056, "step": 2038 }, { "epoch": 6.782029950083195, "loss": 0.6862843036651611, "loss_ce": 5.008545031159883e-06, "loss_iou": 0.220703125, "loss_num": 0.049072265625, "loss_xval": 0.6875, "num_input_tokens_seen": 127747056, "step": 2038 }, { "epoch": 6.7853577371048255, "grad_norm": 18.77168083190918, "learning_rate": 5e-06, "loss": 0.5475, "num_input_tokens_seen": 127810348, "step": 2039 }, { "epoch": 6.7853577371048255, "loss": 0.6531755924224854, "loss_ce": 0.0011980710551142693, "loss_iou": 0.255859375, "loss_num": 0.0283203125, "loss_xval": 0.65234375, "num_input_tokens_seen": 127810348, "step": 2039 }, { "epoch": 6.788685524126456, "grad_norm": 29.515806198120117, "learning_rate": 5e-06, "loss": 0.6291, "num_input_tokens_seen": 127873440, "step": 2040 }, { "epoch": 6.788685524126456, "loss": 0.7723410129547119, "loss_ce": 2.1204255062912125e-06, "loss_iou": 0.263671875, "loss_num": 0.048583984375, "loss_xval": 0.7734375, "num_input_tokens_seen": 127873440, "step": 2040 }, { "epoch": 6.792013311148087, "grad_norm": 11.260506629943848, "learning_rate": 5e-06, "loss": 0.6274, "num_input_tokens_seen": 127936520, "step": 2041 }, { "epoch": 6.792013311148087, "loss": 0.6940391063690186, "loss_ce": 0.00031353323720395565, "loss_iou": 0.251953125, "loss_num": 0.0380859375, "loss_xval": 0.6953125, "num_input_tokens_seen": 127936520, "step": 2041 }, { "epoch": 6.795341098169717, "grad_norm": 9.247034072875977, "learning_rate": 5e-06, "loss": 0.6442, "num_input_tokens_seen": 127999544, "step": 2042 }, { "epoch": 6.795341098169717, "loss": 0.36908620595932007, "loss_ce": 6.636597390752286e-06, "loss_iou": 0.1171875, "loss_num": 0.0269775390625, "loss_xval": 0.369140625, "num_input_tokens_seen": 127999544, "step": 2042 }, { "epoch": 6.798668885191348, "grad_norm": 33.01329803466797, "learning_rate": 5e-06, "loss": 0.8701, "num_input_tokens_seen": 128062564, "step": 2043 }, { "epoch": 6.798668885191348, "loss": 0.9910855293273926, "loss_ce": 0.0003628195554483682, "loss_iou": 0.4140625, "loss_num": 0.0322265625, "loss_xval": 0.9921875, "num_input_tokens_seen": 128062564, "step": 2043 }, { "epoch": 6.8019966722129785, "grad_norm": 33.03346633911133, "learning_rate": 5e-06, "loss": 0.8302, "num_input_tokens_seen": 128125212, "step": 2044 }, { "epoch": 6.8019966722129785, "loss": 0.47632086277008057, "loss_ce": 2.505117663531564e-06, "loss_iou": 0.185546875, "loss_num": 0.020751953125, "loss_xval": 0.4765625, "num_input_tokens_seen": 128125212, "step": 2044 }, { "epoch": 6.805324459234609, "grad_norm": 10.370849609375, "learning_rate": 5e-06, "loss": 0.4539, "num_input_tokens_seen": 128185708, "step": 2045 }, { "epoch": 6.805324459234609, "loss": 0.45429426431655884, "loss_ce": 9.597249118087348e-06, "loss_iou": 0.1328125, "loss_num": 0.03759765625, "loss_xval": 0.455078125, "num_input_tokens_seen": 128185708, "step": 2045 }, { "epoch": 6.80865224625624, "grad_norm": 9.65555191040039, "learning_rate": 5e-06, "loss": 0.4212, "num_input_tokens_seen": 128249556, "step": 2046 }, { "epoch": 6.80865224625624, "loss": 0.3541676998138428, "loss_ce": 4.1715149563970044e-05, "loss_iou": 0.1201171875, "loss_num": 0.0228271484375, "loss_xval": 0.353515625, "num_input_tokens_seen": 128249556, "step": 2046 }, { "epoch": 6.81198003327787, "grad_norm": 7.117613792419434, "learning_rate": 5e-06, "loss": 0.5734, "num_input_tokens_seen": 128313596, "step": 2047 }, { "epoch": 6.81198003327787, "loss": 0.5445963144302368, "loss_ce": 0.0011392920278012753, "loss_iou": 0.1904296875, "loss_num": 0.03271484375, "loss_xval": 0.54296875, "num_input_tokens_seen": 128313596, "step": 2047 }, { "epoch": 6.815307820299501, "grad_norm": 8.703977584838867, "learning_rate": 5e-06, "loss": 0.4121, "num_input_tokens_seen": 128374948, "step": 2048 }, { "epoch": 6.815307820299501, "loss": 0.3772086501121521, "loss_ce": 1.1357213224982843e-05, "loss_iou": 0.107421875, "loss_num": 0.0322265625, "loss_xval": 0.376953125, "num_input_tokens_seen": 128374948, "step": 2048 }, { "epoch": 6.818635607321132, "grad_norm": 19.232887268066406, "learning_rate": 5e-06, "loss": 0.6211, "num_input_tokens_seen": 128439652, "step": 2049 }, { "epoch": 6.818635607321132, "loss": 0.6167006492614746, "loss_ce": 0.0007338491268455982, "loss_iou": 0.236328125, "loss_num": 0.0289306640625, "loss_xval": 0.6171875, "num_input_tokens_seen": 128439652, "step": 2049 }, { "epoch": 6.821963394342762, "grad_norm": 12.786202430725098, "learning_rate": 5e-06, "loss": 0.5593, "num_input_tokens_seen": 128502012, "step": 2050 }, { "epoch": 6.821963394342762, "loss": 0.5961904525756836, "loss_ce": 0.00012110465468140319, "loss_iou": 0.177734375, "loss_num": 0.04833984375, "loss_xval": 0.59765625, "num_input_tokens_seen": 128502012, "step": 2050 }, { "epoch": 6.825291181364393, "grad_norm": 10.602361679077148, "learning_rate": 5e-06, "loss": 0.3063, "num_input_tokens_seen": 128564460, "step": 2051 }, { "epoch": 6.825291181364393, "loss": 0.3625529408454895, "loss_ce": 4.135474227950908e-06, "loss_iou": 0.12060546875, "loss_num": 0.0242919921875, "loss_xval": 0.36328125, "num_input_tokens_seen": 128564460, "step": 2051 }, { "epoch": 6.8286189683860234, "grad_norm": 8.027532577514648, "learning_rate": 5e-06, "loss": 0.8377, "num_input_tokens_seen": 128628028, "step": 2052 }, { "epoch": 6.8286189683860234, "loss": 0.9404404163360596, "loss_ce": 1.0742259291873779e-05, "loss_iou": 0.337890625, "loss_num": 0.052978515625, "loss_xval": 0.94140625, "num_input_tokens_seen": 128628028, "step": 2052 }, { "epoch": 6.831946755407654, "grad_norm": 18.537893295288086, "learning_rate": 5e-06, "loss": 0.6, "num_input_tokens_seen": 128691964, "step": 2053 }, { "epoch": 6.831946755407654, "loss": 0.6519191265106201, "loss_ce": 2.656855485838605e-06, "loss_iou": 0.2314453125, "loss_num": 0.0380859375, "loss_xval": 0.65234375, "num_input_tokens_seen": 128691964, "step": 2053 }, { "epoch": 6.835274542429285, "grad_norm": 6.270979404449463, "learning_rate": 5e-06, "loss": 0.421, "num_input_tokens_seen": 128753220, "step": 2054 }, { "epoch": 6.835274542429285, "loss": 0.4683782458305359, "loss_ce": 0.00011653243564069271, "loss_iou": 0.1484375, "loss_num": 0.0341796875, "loss_xval": 0.46875, "num_input_tokens_seen": 128753220, "step": 2054 }, { "epoch": 6.838602329450915, "grad_norm": 11.007545471191406, "learning_rate": 5e-06, "loss": 0.8062, "num_input_tokens_seen": 128816684, "step": 2055 }, { "epoch": 6.838602329450915, "loss": 0.732926607131958, "loss_ce": 0.0002605952031444758, "loss_iou": 0.287109375, "loss_num": 0.03173828125, "loss_xval": 0.734375, "num_input_tokens_seen": 128816684, "step": 2055 }, { "epoch": 6.841930116472546, "grad_norm": 16.74416160583496, "learning_rate": 5e-06, "loss": 0.6739, "num_input_tokens_seen": 128878812, "step": 2056 }, { "epoch": 6.841930116472546, "loss": 0.7158355712890625, "loss_ce": 0.0006256001070141792, "loss_iou": 0.240234375, "loss_num": 0.047119140625, "loss_xval": 0.71484375, "num_input_tokens_seen": 128878812, "step": 2056 }, { "epoch": 6.8452579034941765, "grad_norm": 9.694169044494629, "learning_rate": 5e-06, "loss": 0.8048, "num_input_tokens_seen": 128942252, "step": 2057 }, { "epoch": 6.8452579034941765, "loss": 0.5953434705734253, "loss_ce": 6.563391252711881e-06, "loss_iou": 0.24609375, "loss_num": 0.0205078125, "loss_xval": 0.59375, "num_input_tokens_seen": 128942252, "step": 2057 }, { "epoch": 6.848585690515807, "grad_norm": 9.638193130493164, "learning_rate": 5e-06, "loss": 0.5957, "num_input_tokens_seen": 129003712, "step": 2058 }, { "epoch": 6.848585690515807, "loss": 0.6989909410476685, "loss_ce": 0.0006876841653138399, "loss_iou": 0.25, "loss_num": 0.03955078125, "loss_xval": 0.69921875, "num_input_tokens_seen": 129003712, "step": 2058 }, { "epoch": 6.851913477537438, "grad_norm": 31.37020492553711, "learning_rate": 5e-06, "loss": 0.5928, "num_input_tokens_seen": 129066428, "step": 2059 }, { "epoch": 6.851913477537438, "loss": 0.5930197238922119, "loss_ce": 2.146906354028033e-06, "loss_iou": 0.25, "loss_num": 0.0186767578125, "loss_xval": 0.59375, "num_input_tokens_seen": 129066428, "step": 2059 }, { "epoch": 6.855241264559068, "grad_norm": 24.857219696044922, "learning_rate": 5e-06, "loss": 0.6166, "num_input_tokens_seen": 129129856, "step": 2060 }, { "epoch": 6.855241264559068, "loss": 0.7651916146278381, "loss_ce": 5.489503746503033e-05, "loss_iou": 0.2890625, "loss_num": 0.03759765625, "loss_xval": 0.765625, "num_input_tokens_seen": 129129856, "step": 2060 }, { "epoch": 6.858569051580699, "grad_norm": 35.20671081542969, "learning_rate": 5e-06, "loss": 0.4982, "num_input_tokens_seen": 129192724, "step": 2061 }, { "epoch": 6.858569051580699, "loss": 0.4035661518573761, "loss_ce": 1.69701752383844e-06, "loss_iou": 0.158203125, "loss_num": 0.017578125, "loss_xval": 0.404296875, "num_input_tokens_seen": 129192724, "step": 2061 }, { "epoch": 6.86189683860233, "grad_norm": 38.629539489746094, "learning_rate": 5e-06, "loss": 0.4976, "num_input_tokens_seen": 129255228, "step": 2062 }, { "epoch": 6.86189683860233, "loss": 0.41774308681488037, "loss_ce": 1.8458322301739827e-05, "loss_iou": 0.134765625, "loss_num": 0.029541015625, "loss_xval": 0.41796875, "num_input_tokens_seen": 129255228, "step": 2062 }, { "epoch": 6.86522462562396, "grad_norm": 7.631236553192139, "learning_rate": 5e-06, "loss": 0.6045, "num_input_tokens_seen": 129318460, "step": 2063 }, { "epoch": 6.86522462562396, "loss": 0.5400491952896118, "loss_ce": 1.0135449883819092e-05, "loss_iou": 0.201171875, "loss_num": 0.0277099609375, "loss_xval": 0.5390625, "num_input_tokens_seen": 129318460, "step": 2063 }, { "epoch": 6.868552412645591, "grad_norm": 15.044014930725098, "learning_rate": 5e-06, "loss": 0.919, "num_input_tokens_seen": 129383464, "step": 2064 }, { "epoch": 6.868552412645591, "loss": 0.7456334829330444, "loss_ce": 2.7992284231004305e-05, "loss_iou": 0.26953125, "loss_num": 0.041259765625, "loss_xval": 0.74609375, "num_input_tokens_seen": 129383464, "step": 2064 }, { "epoch": 6.871880199667221, "grad_norm": 18.491485595703125, "learning_rate": 5e-06, "loss": 0.5914, "num_input_tokens_seen": 129445500, "step": 2065 }, { "epoch": 6.871880199667221, "loss": 0.6128226518630981, "loss_ce": 2.965956628031563e-05, "loss_iou": 0.224609375, "loss_num": 0.03271484375, "loss_xval": 0.61328125, "num_input_tokens_seen": 129445500, "step": 2065 }, { "epoch": 6.875207986688852, "grad_norm": 8.384293556213379, "learning_rate": 5e-06, "loss": 0.5302, "num_input_tokens_seen": 129508484, "step": 2066 }, { "epoch": 6.875207986688852, "loss": 0.29032838344573975, "loss_ce": 0.0010827973019331694, "loss_iou": 0.09521484375, "loss_num": 0.019775390625, "loss_xval": 0.2890625, "num_input_tokens_seen": 129508484, "step": 2066 }, { "epoch": 6.878535773710483, "grad_norm": 30.570682525634766, "learning_rate": 5e-06, "loss": 0.7456, "num_input_tokens_seen": 129571368, "step": 2067 }, { "epoch": 6.878535773710483, "loss": 0.5278448462486267, "loss_ce": 1.2810547559638508e-05, "loss_iou": 0.173828125, "loss_num": 0.0361328125, "loss_xval": 0.52734375, "num_input_tokens_seen": 129571368, "step": 2067 }, { "epoch": 6.881863560732113, "grad_norm": 26.541101455688477, "learning_rate": 5e-06, "loss": 0.5533, "num_input_tokens_seen": 129632828, "step": 2068 }, { "epoch": 6.881863560732113, "loss": 0.44568026065826416, "loss_ce": 1.5455581205969793e-06, "loss_iou": 0.1142578125, "loss_num": 0.043701171875, "loss_xval": 0.4453125, "num_input_tokens_seen": 129632828, "step": 2068 }, { "epoch": 6.885191347753744, "grad_norm": 27.95594596862793, "learning_rate": 5e-06, "loss": 0.7074, "num_input_tokens_seen": 129697224, "step": 2069 }, { "epoch": 6.885191347753744, "loss": 0.7104054689407349, "loss_ce": 0.0004445456143002957, "loss_iou": 0.265625, "loss_num": 0.035888671875, "loss_xval": 0.7109375, "num_input_tokens_seen": 129697224, "step": 2069 }, { "epoch": 6.8885191347753745, "grad_norm": 24.946670532226562, "learning_rate": 5e-06, "loss": 0.3574, "num_input_tokens_seen": 129759740, "step": 2070 }, { "epoch": 6.8885191347753745, "loss": 0.2868678867816925, "loss_ce": 2.6489242372917943e-06, "loss_iou": 0.076171875, "loss_num": 0.02685546875, "loss_xval": 0.287109375, "num_input_tokens_seen": 129759740, "step": 2070 }, { "epoch": 6.891846921797005, "grad_norm": 15.590705871582031, "learning_rate": 5e-06, "loss": 0.6807, "num_input_tokens_seen": 129822948, "step": 2071 }, { "epoch": 6.891846921797005, "loss": 0.6475872993469238, "loss_ce": 0.0008587598567828536, "loss_iou": 0.244140625, "loss_num": 0.03173828125, "loss_xval": 0.6484375, "num_input_tokens_seen": 129822948, "step": 2071 }, { "epoch": 6.895174708818636, "grad_norm": 16.893766403198242, "learning_rate": 5e-06, "loss": 0.5091, "num_input_tokens_seen": 129885448, "step": 2072 }, { "epoch": 6.895174708818636, "loss": 0.4554440379142761, "loss_ce": 0.0003659002832137048, "loss_iou": 0.158203125, "loss_num": 0.0279541015625, "loss_xval": 0.455078125, "num_input_tokens_seen": 129885448, "step": 2072 }, { "epoch": 6.898502495840266, "grad_norm": 19.797025680541992, "learning_rate": 5e-06, "loss": 0.5874, "num_input_tokens_seen": 129947980, "step": 2073 }, { "epoch": 6.898502495840266, "loss": 0.647710919380188, "loss_ce": 5.832657734572422e-06, "loss_iou": 0.279296875, "loss_num": 0.01806640625, "loss_xval": 0.6484375, "num_input_tokens_seen": 129947980, "step": 2073 }, { "epoch": 6.901830282861897, "grad_norm": 35.54941177368164, "learning_rate": 5e-06, "loss": 0.5312, "num_input_tokens_seen": 130011140, "step": 2074 }, { "epoch": 6.901830282861897, "loss": 0.6007834076881409, "loss_ce": 0.0005636924761347473, "loss_iou": 0.240234375, "loss_num": 0.0240478515625, "loss_xval": 0.6015625, "num_input_tokens_seen": 130011140, "step": 2074 }, { "epoch": 6.9051580698835275, "grad_norm": 33.2252311706543, "learning_rate": 5e-06, "loss": 0.6403, "num_input_tokens_seen": 130074152, "step": 2075 }, { "epoch": 6.9051580698835275, "loss": 0.6493196487426758, "loss_ce": 2.764021155599039e-05, "loss_iou": 0.26953125, "loss_num": 0.021728515625, "loss_xval": 0.6484375, "num_input_tokens_seen": 130074152, "step": 2075 }, { "epoch": 6.908485856905158, "grad_norm": 10.355801582336426, "learning_rate": 5e-06, "loss": 0.8395, "num_input_tokens_seen": 130137024, "step": 2076 }, { "epoch": 6.908485856905158, "loss": 1.1387450695037842, "loss_ce": 0.00025634057237766683, "loss_iou": 0.390625, "loss_num": 0.07177734375, "loss_xval": 1.140625, "num_input_tokens_seen": 130137024, "step": 2076 }, { "epoch": 6.911813643926789, "grad_norm": 17.79325294494629, "learning_rate": 5e-06, "loss": 0.7184, "num_input_tokens_seen": 130199476, "step": 2077 }, { "epoch": 6.911813643926789, "loss": 0.764284610748291, "loss_ce": 0.0009789575124159455, "loss_iou": 0.291015625, "loss_num": 0.0361328125, "loss_xval": 0.76171875, "num_input_tokens_seen": 130199476, "step": 2077 }, { "epoch": 6.915141430948419, "grad_norm": 15.910724639892578, "learning_rate": 5e-06, "loss": 0.5689, "num_input_tokens_seen": 130261976, "step": 2078 }, { "epoch": 6.915141430948419, "loss": 0.6021930575370789, "loss_ce": 2.0164037778158672e-05, "loss_iou": 0.2392578125, "loss_num": 0.024658203125, "loss_xval": 0.6015625, "num_input_tokens_seen": 130261976, "step": 2078 }, { "epoch": 6.91846921797005, "grad_norm": 14.110590934753418, "learning_rate": 5e-06, "loss": 0.6201, "num_input_tokens_seen": 130324716, "step": 2079 }, { "epoch": 6.91846921797005, "loss": 0.5781773328781128, "loss_ce": 5.237920777290128e-05, "loss_iou": 0.185546875, "loss_num": 0.041259765625, "loss_xval": 0.578125, "num_input_tokens_seen": 130324716, "step": 2079 }, { "epoch": 6.921797004991681, "grad_norm": 15.305957794189453, "learning_rate": 5e-06, "loss": 0.4047, "num_input_tokens_seen": 130387324, "step": 2080 }, { "epoch": 6.921797004991681, "loss": 0.5393695831298828, "loss_ce": 1.924355728988303e-06, "loss_iou": 0.20703125, "loss_num": 0.0252685546875, "loss_xval": 0.5390625, "num_input_tokens_seen": 130387324, "step": 2080 }, { "epoch": 6.925124792013311, "grad_norm": 14.415096282958984, "learning_rate": 5e-06, "loss": 0.6996, "num_input_tokens_seen": 130448092, "step": 2081 }, { "epoch": 6.925124792013311, "loss": 0.4480394423007965, "loss_ce": 0.0008959031547419727, "loss_iou": 0.1552734375, "loss_num": 0.02734375, "loss_xval": 0.447265625, "num_input_tokens_seen": 130448092, "step": 2081 }, { "epoch": 6.928452579034942, "grad_norm": 16.91680335998535, "learning_rate": 5e-06, "loss": 0.4149, "num_input_tokens_seen": 130510668, "step": 2082 }, { "epoch": 6.928452579034942, "loss": 0.22287049889564514, "loss_ce": 3.115561412414536e-05, "loss_iou": 0.087890625, "loss_num": 0.00946044921875, "loss_xval": 0.22265625, "num_input_tokens_seen": 130510668, "step": 2082 }, { "epoch": 6.931780366056572, "grad_norm": 21.434885025024414, "learning_rate": 5e-06, "loss": 0.6823, "num_input_tokens_seen": 130573952, "step": 2083 }, { "epoch": 6.931780366056572, "loss": 0.6594431400299072, "loss_ce": 0.002277584746479988, "loss_iou": 0.236328125, "loss_num": 0.036865234375, "loss_xval": 0.65625, "num_input_tokens_seen": 130573952, "step": 2083 }, { "epoch": 6.935108153078203, "grad_norm": 29.567922592163086, "learning_rate": 5e-06, "loss": 0.6679, "num_input_tokens_seen": 130637412, "step": 2084 }, { "epoch": 6.935108153078203, "loss": 0.752985954284668, "loss_ce": 5.6241711718030274e-05, "loss_iou": 0.265625, "loss_num": 0.044677734375, "loss_xval": 0.75390625, "num_input_tokens_seen": 130637412, "step": 2084 }, { "epoch": 6.938435940099834, "grad_norm": 27.37392807006836, "learning_rate": 5e-06, "loss": 0.6757, "num_input_tokens_seen": 130700356, "step": 2085 }, { "epoch": 6.938435940099834, "loss": 0.6538108587265015, "loss_ce": 2.2474218894785736e-06, "loss_iou": 0.2216796875, "loss_num": 0.0419921875, "loss_xval": 0.65234375, "num_input_tokens_seen": 130700356, "step": 2085 }, { "epoch": 6.941763727121464, "grad_norm": 20.880840301513672, "learning_rate": 5e-06, "loss": 0.809, "num_input_tokens_seen": 130763664, "step": 2086 }, { "epoch": 6.941763727121464, "loss": 0.8932182192802429, "loss_ce": 0.00039596963324584067, "loss_iou": 0.26953125, "loss_num": 0.07080078125, "loss_xval": 0.89453125, "num_input_tokens_seen": 130763664, "step": 2086 }, { "epoch": 6.945091514143095, "grad_norm": 14.646089553833008, "learning_rate": 5e-06, "loss": 0.5301, "num_input_tokens_seen": 130824480, "step": 2087 }, { "epoch": 6.945091514143095, "loss": 0.5801482200622559, "loss_ce": 0.0005583561141975224, "loss_iou": 0.201171875, "loss_num": 0.035400390625, "loss_xval": 0.578125, "num_input_tokens_seen": 130824480, "step": 2087 }, { "epoch": 6.9484193011647255, "grad_norm": 10.873848915100098, "learning_rate": 5e-06, "loss": 0.6527, "num_input_tokens_seen": 130887144, "step": 2088 }, { "epoch": 6.9484193011647255, "loss": 0.6155133247375488, "loss_ce": 3.482702959445305e-05, "loss_iou": 0.2255859375, "loss_num": 0.032958984375, "loss_xval": 0.6171875, "num_input_tokens_seen": 130887144, "step": 2088 }, { "epoch": 6.951747088186356, "grad_norm": 13.644984245300293, "learning_rate": 5e-06, "loss": 0.5628, "num_input_tokens_seen": 130949656, "step": 2089 }, { "epoch": 6.951747088186356, "loss": 0.3859878182411194, "loss_ce": 1.4590775663236855e-06, "loss_iou": 0.10986328125, "loss_num": 0.033203125, "loss_xval": 0.38671875, "num_input_tokens_seen": 130949656, "step": 2089 }, { "epoch": 6.955074875207987, "grad_norm": 12.300344467163086, "learning_rate": 5e-06, "loss": 0.5655, "num_input_tokens_seen": 131012156, "step": 2090 }, { "epoch": 6.955074875207987, "loss": 0.7430570125579834, "loss_ce": 0.0007474847952835262, "loss_iou": 0.248046875, "loss_num": 0.049072265625, "loss_xval": 0.7421875, "num_input_tokens_seen": 131012156, "step": 2090 }, { "epoch": 6.958402662229617, "grad_norm": 8.354609489440918, "learning_rate": 5e-06, "loss": 0.5565, "num_input_tokens_seen": 131075440, "step": 2091 }, { "epoch": 6.958402662229617, "loss": 0.5676477551460266, "loss_ce": 0.00017340901831630617, "loss_iou": 0.197265625, "loss_num": 0.03466796875, "loss_xval": 0.56640625, "num_input_tokens_seen": 131075440, "step": 2091 }, { "epoch": 6.961730449251248, "grad_norm": 20.184093475341797, "learning_rate": 5e-06, "loss": 0.9461, "num_input_tokens_seen": 131140056, "step": 2092 }, { "epoch": 6.961730449251248, "loss": 1.046269178390503, "loss_ce": 0.0011030529858544469, "loss_iou": 0.369140625, "loss_num": 0.0615234375, "loss_xval": 1.046875, "num_input_tokens_seen": 131140056, "step": 2092 }, { "epoch": 6.965058236272879, "grad_norm": 35.20927810668945, "learning_rate": 5e-06, "loss": 0.6384, "num_input_tokens_seen": 131203472, "step": 2093 }, { "epoch": 6.965058236272879, "loss": 0.5683769583702087, "loss_ce": 0.0006279383087530732, "loss_iou": 0.23046875, "loss_num": 0.021484375, "loss_xval": 0.56640625, "num_input_tokens_seen": 131203472, "step": 2093 }, { "epoch": 6.968386023294509, "grad_norm": 28.997852325439453, "learning_rate": 5e-06, "loss": 0.4333, "num_input_tokens_seen": 131264452, "step": 2094 }, { "epoch": 6.968386023294509, "loss": 0.6776357293128967, "loss_ce": 2.345820212212857e-05, "loss_iou": 0.2470703125, "loss_num": 0.036865234375, "loss_xval": 0.67578125, "num_input_tokens_seen": 131264452, "step": 2094 }, { "epoch": 6.97171381031614, "grad_norm": 14.191515922546387, "learning_rate": 5e-06, "loss": 0.5978, "num_input_tokens_seen": 131327560, "step": 2095 }, { "epoch": 6.97171381031614, "loss": 0.6406088471412659, "loss_ce": 0.0007162687252275646, "loss_iou": 0.2734375, "loss_num": 0.0186767578125, "loss_xval": 0.640625, "num_input_tokens_seen": 131327560, "step": 2095 }, { "epoch": 6.97504159733777, "grad_norm": 13.989681243896484, "learning_rate": 5e-06, "loss": 0.6243, "num_input_tokens_seen": 131390084, "step": 2096 }, { "epoch": 6.97504159733777, "loss": 0.639626145362854, "loss_ce": 0.0007101118098944426, "loss_iou": 0.220703125, "loss_num": 0.03955078125, "loss_xval": 0.640625, "num_input_tokens_seen": 131390084, "step": 2096 }, { "epoch": 6.978369384359401, "grad_norm": 19.350723266601562, "learning_rate": 5e-06, "loss": 0.6318, "num_input_tokens_seen": 131452696, "step": 2097 }, { "epoch": 6.978369384359401, "loss": 0.5311285257339478, "loss_ce": 0.00012263574171811342, "loss_iou": 0.177734375, "loss_num": 0.035400390625, "loss_xval": 0.53125, "num_input_tokens_seen": 131452696, "step": 2097 }, { "epoch": 6.981697171381032, "grad_norm": 17.37226676940918, "learning_rate": 5e-06, "loss": 0.5591, "num_input_tokens_seen": 131515468, "step": 2098 }, { "epoch": 6.981697171381032, "loss": 0.5075392127037048, "loss_ce": 0.00021499168360605836, "loss_iou": 0.1796875, "loss_num": 0.02978515625, "loss_xval": 0.5078125, "num_input_tokens_seen": 131515468, "step": 2098 }, { "epoch": 6.985024958402662, "grad_norm": 7.955191135406494, "learning_rate": 5e-06, "loss": 0.5154, "num_input_tokens_seen": 131579304, "step": 2099 }, { "epoch": 6.985024958402662, "loss": 0.4880402088165283, "loss_ce": 0.00024723957176320255, "loss_iou": 0.1611328125, "loss_num": 0.032958984375, "loss_xval": 0.48828125, "num_input_tokens_seen": 131579304, "step": 2099 }, { "epoch": 6.988352745424293, "grad_norm": 20.51535415649414, "learning_rate": 5e-06, "loss": 0.693, "num_input_tokens_seen": 131642824, "step": 2100 }, { "epoch": 6.988352745424293, "loss": 0.7713662385940552, "loss_ce": 3.9287588151637465e-06, "loss_iou": 0.263671875, "loss_num": 0.049072265625, "loss_xval": 0.76953125, "num_input_tokens_seen": 131642824, "step": 2100 }, { "epoch": 6.9916805324459235, "grad_norm": 29.88747215270996, "learning_rate": 5e-06, "loss": 0.7045, "num_input_tokens_seen": 131705896, "step": 2101 }, { "epoch": 6.9916805324459235, "loss": 0.9383728504180908, "loss_ce": 0.000506624230183661, "loss_iou": 0.361328125, "loss_num": 0.04296875, "loss_xval": 0.9375, "num_input_tokens_seen": 131705896, "step": 2101 }, { "epoch": 6.995008319467554, "grad_norm": 16.69076919555664, "learning_rate": 5e-06, "loss": 0.6428, "num_input_tokens_seen": 131768916, "step": 2102 }, { "epoch": 6.995008319467554, "loss": 0.7973799109458923, "loss_ce": 1.66679219546495e-05, "loss_iou": 0.26953125, "loss_num": 0.0517578125, "loss_xval": 0.796875, "num_input_tokens_seen": 131768916, "step": 2102 }, { "epoch": 6.998336106489185, "grad_norm": 8.810283660888672, "learning_rate": 5e-06, "loss": 0.517, "num_input_tokens_seen": 131830836, "step": 2103 }, { "epoch": 6.998336106489185, "loss": 0.3123791217803955, "loss_ce": 1.186176859846455e-06, "loss_iou": 0.11474609375, "loss_num": 0.0166015625, "loss_xval": 0.3125, "num_input_tokens_seen": 131830836, "step": 2103 }, { "epoch": 6.998336106489185, "loss": 0.6518619060516357, "loss_ce": 6.421546004276024e-06, "loss_iou": 0.2060546875, "loss_num": 0.048095703125, "loss_xval": 0.65234375, "num_input_tokens_seen": 131862292, "step": 2103 }, { "epoch": 7.001663893510815, "grad_norm": 22.60040855407715, "learning_rate": 5e-06, "loss": 0.7185, "num_input_tokens_seen": 131893472, "step": 2104 }, { "epoch": 7.001663893510815, "loss": 0.785060465335846, "loss_ce": 0.0015521487221121788, "loss_iou": 0.296875, "loss_num": 0.038330078125, "loss_xval": 0.78515625, "num_input_tokens_seen": 131893472, "step": 2104 }, { "epoch": 7.004991680532446, "grad_norm": 34.42317199707031, "learning_rate": 5e-06, "loss": 0.5658, "num_input_tokens_seen": 131956572, "step": 2105 }, { "epoch": 7.004991680532446, "loss": 0.5000066161155701, "loss_ce": 0.0004948793794028461, "loss_iou": 0.201171875, "loss_num": 0.019287109375, "loss_xval": 0.5, "num_input_tokens_seen": 131956572, "step": 2105 }, { "epoch": 7.0083194675540765, "grad_norm": 34.3641357421875, "learning_rate": 5e-06, "loss": 0.7585, "num_input_tokens_seen": 132019600, "step": 2106 }, { "epoch": 7.0083194675540765, "loss": 0.7151626348495483, "loss_ce": 0.00031885469797998667, "loss_iou": 0.251953125, "loss_num": 0.0419921875, "loss_xval": 0.71484375, "num_input_tokens_seen": 132019600, "step": 2106 }, { "epoch": 7.011647254575707, "grad_norm": 23.11007308959961, "learning_rate": 5e-06, "loss": 0.675, "num_input_tokens_seen": 132083152, "step": 2107 }, { "epoch": 7.011647254575707, "loss": 0.7283380627632141, "loss_ce": 0.0004328204959165305, "loss_iou": 0.298828125, "loss_num": 0.02587890625, "loss_xval": 0.7265625, "num_input_tokens_seen": 132083152, "step": 2107 }, { "epoch": 7.014975041597338, "grad_norm": 10.93664836883545, "learning_rate": 5e-06, "loss": 0.6969, "num_input_tokens_seen": 132146988, "step": 2108 }, { "epoch": 7.014975041597338, "loss": 0.9235168695449829, "loss_ce": 5.496757512446493e-05, "loss_iou": 0.3203125, "loss_num": 0.056640625, "loss_xval": 0.921875, "num_input_tokens_seen": 132146988, "step": 2108 }, { "epoch": 7.018302828618968, "grad_norm": 32.23749923706055, "learning_rate": 5e-06, "loss": 0.7922, "num_input_tokens_seen": 132211324, "step": 2109 }, { "epoch": 7.018302828618968, "loss": 0.5865480899810791, "loss_ce": 6.12727235420607e-05, "loss_iou": 0.2265625, "loss_num": 0.0263671875, "loss_xval": 0.5859375, "num_input_tokens_seen": 132211324, "step": 2109 }, { "epoch": 7.021630615640599, "grad_norm": 14.129114151000977, "learning_rate": 5e-06, "loss": 0.5205, "num_input_tokens_seen": 132270284, "step": 2110 }, { "epoch": 7.021630615640599, "loss": 0.17486637830734253, "loss_ce": 6.626144113397459e-07, "loss_iou": 0.0, "loss_num": 0.034912109375, "loss_xval": 0.1748046875, "num_input_tokens_seen": 132270284, "step": 2110 }, { "epoch": 7.02495840266223, "grad_norm": 24.958602905273438, "learning_rate": 5e-06, "loss": 0.5935, "num_input_tokens_seen": 132333892, "step": 2111 }, { "epoch": 7.02495840266223, "loss": 0.7141265869140625, "loss_ce": 0.0007476459722965956, "loss_iou": 0.275390625, "loss_num": 0.0322265625, "loss_xval": 0.71484375, "num_input_tokens_seen": 132333892, "step": 2111 }, { "epoch": 7.02828618968386, "grad_norm": 14.144628524780273, "learning_rate": 5e-06, "loss": 0.548, "num_input_tokens_seen": 132397308, "step": 2112 }, { "epoch": 7.02828618968386, "loss": 0.4922676384449005, "loss_ce": 1.9095172319794074e-05, "loss_iou": 0.205078125, "loss_num": 0.016357421875, "loss_xval": 0.4921875, "num_input_tokens_seen": 132397308, "step": 2112 }, { "epoch": 7.031613976705491, "grad_norm": 6.050787448883057, "learning_rate": 5e-06, "loss": 0.4729, "num_input_tokens_seen": 132458960, "step": 2113 }, { "epoch": 7.031613976705491, "loss": 0.12198328971862793, "loss_ce": 4.526739758148324e-06, "loss_iou": 0.01385498046875, "loss_num": 0.0189208984375, "loss_xval": 0.1220703125, "num_input_tokens_seen": 132458960, "step": 2113 }, { "epoch": 7.034941763727121, "grad_norm": 11.335150718688965, "learning_rate": 5e-06, "loss": 0.644, "num_input_tokens_seen": 132522248, "step": 2114 }, { "epoch": 7.034941763727121, "loss": 0.5466324090957642, "loss_ce": 1.5256084680004278e-06, "loss_iou": 0.2080078125, "loss_num": 0.0262451171875, "loss_xval": 0.546875, "num_input_tokens_seen": 132522248, "step": 2114 }, { "epoch": 7.038269550748752, "grad_norm": 8.750298500061035, "learning_rate": 5e-06, "loss": 0.6537, "num_input_tokens_seen": 132585732, "step": 2115 }, { "epoch": 7.038269550748752, "loss": 0.32825011014938354, "loss_ce": 3.050979103136342e-06, "loss_iou": 0.1357421875, "loss_num": 0.01129150390625, "loss_xval": 0.328125, "num_input_tokens_seen": 132585732, "step": 2115 }, { "epoch": 7.041597337770383, "grad_norm": 9.573755264282227, "learning_rate": 5e-06, "loss": 0.4304, "num_input_tokens_seen": 132648144, "step": 2116 }, { "epoch": 7.041597337770383, "loss": 0.5163666605949402, "loss_ce": 9.265577318728901e-06, "loss_iou": 0.2109375, "loss_num": 0.018798828125, "loss_xval": 0.515625, "num_input_tokens_seen": 132648144, "step": 2116 }, { "epoch": 7.044925124792013, "grad_norm": 9.657639503479004, "learning_rate": 5e-06, "loss": 0.4635, "num_input_tokens_seen": 132710872, "step": 2117 }, { "epoch": 7.044925124792013, "loss": 0.27575814723968506, "loss_ce": 1.32057891732984e-06, "loss_iou": 0.08642578125, "loss_num": 0.0205078125, "loss_xval": 0.275390625, "num_input_tokens_seen": 132710872, "step": 2117 }, { "epoch": 7.048252911813644, "grad_norm": 23.95869255065918, "learning_rate": 5e-06, "loss": 0.5998, "num_input_tokens_seen": 132774160, "step": 2118 }, { "epoch": 7.048252911813644, "loss": 0.6318380236625671, "loss_ce": 0.0001241380232386291, "loss_iou": 0.2265625, "loss_num": 0.03564453125, "loss_xval": 0.6328125, "num_input_tokens_seen": 132774160, "step": 2118 }, { "epoch": 7.0515806988352745, "grad_norm": 25.69697380065918, "learning_rate": 5e-06, "loss": 0.8364, "num_input_tokens_seen": 132836612, "step": 2119 }, { "epoch": 7.0515806988352745, "loss": 1.0410590171813965, "loss_ce": 4.3432712118374184e-05, "loss_iou": 0.37109375, "loss_num": 0.06005859375, "loss_xval": 1.0390625, "num_input_tokens_seen": 132836612, "step": 2119 }, { "epoch": 7.054908485856905, "grad_norm": 17.810001373291016, "learning_rate": 5e-06, "loss": 0.5047, "num_input_tokens_seen": 132900060, "step": 2120 }, { "epoch": 7.054908485856905, "loss": 0.6560269594192505, "loss_ce": 0.000631491478998214, "loss_iou": 0.251953125, "loss_num": 0.0303955078125, "loss_xval": 0.65625, "num_input_tokens_seen": 132900060, "step": 2120 }, { "epoch": 7.058236272878536, "grad_norm": 5.995996952056885, "learning_rate": 5e-06, "loss": 0.3272, "num_input_tokens_seen": 132962460, "step": 2121 }, { "epoch": 7.058236272878536, "loss": 0.22238288819789886, "loss_ce": 3.1820909498492256e-05, "loss_iou": 0.0419921875, "loss_num": 0.0277099609375, "loss_xval": 0.22265625, "num_input_tokens_seen": 132962460, "step": 2121 }, { "epoch": 7.061564059900166, "grad_norm": 13.972514152526855, "learning_rate": 5e-06, "loss": 0.4911, "num_input_tokens_seen": 133025832, "step": 2122 }, { "epoch": 7.061564059900166, "loss": 0.3984716832637787, "loss_ce": 0.000156262336531654, "loss_iou": 0.169921875, "loss_num": 0.01171875, "loss_xval": 0.3984375, "num_input_tokens_seen": 133025832, "step": 2122 }, { "epoch": 7.064891846921797, "grad_norm": 24.692224502563477, "learning_rate": 5e-06, "loss": 0.3271, "num_input_tokens_seen": 133087196, "step": 2123 }, { "epoch": 7.064891846921797, "loss": 0.3001381754875183, "loss_ce": 0.0002724815276451409, "loss_iou": 0.10546875, "loss_num": 0.017822265625, "loss_xval": 0.30078125, "num_input_tokens_seen": 133087196, "step": 2123 }, { "epoch": 7.068219633943428, "grad_norm": 23.639907836914062, "learning_rate": 5e-06, "loss": 0.7239, "num_input_tokens_seen": 133151864, "step": 2124 }, { "epoch": 7.068219633943428, "loss": 0.8269512057304382, "loss_ce": 0.0008403375977650285, "loss_iou": 0.267578125, "loss_num": 0.057861328125, "loss_xval": 0.82421875, "num_input_tokens_seen": 133151864, "step": 2124 }, { "epoch": 7.071547420965058, "grad_norm": 24.74835968017578, "learning_rate": 5e-06, "loss": 0.4631, "num_input_tokens_seen": 133213772, "step": 2125 }, { "epoch": 7.071547420965058, "loss": 0.4257923364639282, "loss_ce": 0.00040779763367027044, "loss_iou": 0.162109375, "loss_num": 0.0201416015625, "loss_xval": 0.42578125, "num_input_tokens_seen": 133213772, "step": 2125 }, { "epoch": 7.074875207986689, "grad_norm": 19.926782608032227, "learning_rate": 5e-06, "loss": 0.7907, "num_input_tokens_seen": 133276116, "step": 2126 }, { "epoch": 7.074875207986689, "loss": 0.8679744601249695, "loss_ce": 0.0007869648397900164, "loss_iou": 0.3203125, "loss_num": 0.045654296875, "loss_xval": 0.8671875, "num_input_tokens_seen": 133276116, "step": 2126 }, { "epoch": 7.078202995008319, "grad_norm": 16.30159568786621, "learning_rate": 5e-06, "loss": 0.5164, "num_input_tokens_seen": 133339176, "step": 2127 }, { "epoch": 7.078202995008319, "loss": 0.5031135082244873, "loss_ce": 0.0002448575687594712, "loss_iou": 0.2041015625, "loss_num": 0.0189208984375, "loss_xval": 0.50390625, "num_input_tokens_seen": 133339176, "step": 2127 }, { "epoch": 7.08153078202995, "grad_norm": 8.151956558227539, "learning_rate": 5e-06, "loss": 0.421, "num_input_tokens_seen": 133400304, "step": 2128 }, { "epoch": 7.08153078202995, "loss": 0.3930218815803528, "loss_ce": 1.2608427368832054e-06, "loss_iou": 0.0908203125, "loss_num": 0.0419921875, "loss_xval": 0.392578125, "num_input_tokens_seen": 133400304, "step": 2128 }, { "epoch": 7.084858569051581, "grad_norm": 13.133843421936035, "learning_rate": 5e-06, "loss": 0.4159, "num_input_tokens_seen": 133462648, "step": 2129 }, { "epoch": 7.084858569051581, "loss": 0.4501465857028961, "loss_ce": 0.0003479903098195791, "loss_iou": 0.158203125, "loss_num": 0.026611328125, "loss_xval": 0.44921875, "num_input_tokens_seen": 133462648, "step": 2129 }, { "epoch": 7.088186356073211, "grad_norm": 8.622377395629883, "learning_rate": 5e-06, "loss": 0.6023, "num_input_tokens_seen": 133526648, "step": 2130 }, { "epoch": 7.088186356073211, "loss": 0.4499531388282776, "loss_ce": 1.972313157239114e-06, "loss_iou": 0.166015625, "loss_num": 0.0235595703125, "loss_xval": 0.44921875, "num_input_tokens_seen": 133526648, "step": 2130 }, { "epoch": 7.091514143094842, "grad_norm": 7.122912883758545, "learning_rate": 5e-06, "loss": 0.5123, "num_input_tokens_seen": 133588968, "step": 2131 }, { "epoch": 7.091514143094842, "loss": 0.44815200567245483, "loss_ce": 0.0008863582042977214, "loss_iou": 0.1357421875, "loss_num": 0.035400390625, "loss_xval": 0.447265625, "num_input_tokens_seen": 133588968, "step": 2131 }, { "epoch": 7.0948419301164725, "grad_norm": 18.7135066986084, "learning_rate": 5e-06, "loss": 0.3894, "num_input_tokens_seen": 133649712, "step": 2132 }, { "epoch": 7.0948419301164725, "loss": 0.4233492612838745, "loss_ce": 0.00025354442186653614, "loss_iou": 0.1416015625, "loss_num": 0.0283203125, "loss_xval": 0.423828125, "num_input_tokens_seen": 133649712, "step": 2132 }, { "epoch": 7.098169717138103, "grad_norm": 13.719013214111328, "learning_rate": 5e-06, "loss": 0.39, "num_input_tokens_seen": 133712552, "step": 2133 }, { "epoch": 7.098169717138103, "loss": 0.49302032589912415, "loss_ce": 0.0003140392655041069, "loss_iou": 0.1552734375, "loss_num": 0.03662109375, "loss_xval": 0.4921875, "num_input_tokens_seen": 133712552, "step": 2133 }, { "epoch": 7.101497504159734, "grad_norm": 19.051218032836914, "learning_rate": 5e-06, "loss": 0.6902, "num_input_tokens_seen": 133777008, "step": 2134 }, { "epoch": 7.101497504159734, "loss": 0.6656725406646729, "loss_ce": 2.3110767870093696e-05, "loss_iou": 0.2490234375, "loss_num": 0.033447265625, "loss_xval": 0.6640625, "num_input_tokens_seen": 133777008, "step": 2134 }, { "epoch": 7.104825291181364, "grad_norm": 27.850263595581055, "learning_rate": 5e-06, "loss": 0.8101, "num_input_tokens_seen": 133841120, "step": 2135 }, { "epoch": 7.104825291181364, "loss": 0.7834853529930115, "loss_ce": 0.00040428288048133254, "loss_iou": 0.287109375, "loss_num": 0.041748046875, "loss_xval": 0.78125, "num_input_tokens_seen": 133841120, "step": 2135 }, { "epoch": 7.108153078202995, "grad_norm": 11.196677207946777, "learning_rate": 5e-06, "loss": 0.6175, "num_input_tokens_seen": 133903880, "step": 2136 }, { "epoch": 7.108153078202995, "loss": 0.5809149742126465, "loss_ce": 0.0010504369856789708, "loss_iou": 0.205078125, "loss_num": 0.033935546875, "loss_xval": 0.578125, "num_input_tokens_seen": 133903880, "step": 2136 }, { "epoch": 7.1114808652246255, "grad_norm": 14.843448638916016, "learning_rate": 5e-06, "loss": 0.669, "num_input_tokens_seen": 133967252, "step": 2137 }, { "epoch": 7.1114808652246255, "loss": 0.607893705368042, "loss_ce": 0.00022766689653508365, "loss_iou": 0.2333984375, "loss_num": 0.0281982421875, "loss_xval": 0.609375, "num_input_tokens_seen": 133967252, "step": 2137 }, { "epoch": 7.114808652246256, "grad_norm": 14.351639747619629, "learning_rate": 5e-06, "loss": 0.5826, "num_input_tokens_seen": 134030000, "step": 2138 }, { "epoch": 7.114808652246256, "loss": 0.6054301261901855, "loss_ce": 0.0003275854396633804, "loss_iou": 0.248046875, "loss_num": 0.0218505859375, "loss_xval": 0.60546875, "num_input_tokens_seen": 134030000, "step": 2138 }, { "epoch": 7.118136439267887, "grad_norm": 13.315241813659668, "learning_rate": 5e-06, "loss": 0.3675, "num_input_tokens_seen": 134091832, "step": 2139 }, { "epoch": 7.118136439267887, "loss": 0.2843996584415436, "loss_ce": 3.687216667458415e-05, "loss_iou": 0.099609375, "loss_num": 0.0169677734375, "loss_xval": 0.28515625, "num_input_tokens_seen": 134091832, "step": 2139 }, { "epoch": 7.121464226289517, "grad_norm": 8.605134963989258, "learning_rate": 5e-06, "loss": 0.7246, "num_input_tokens_seen": 134154940, "step": 2140 }, { "epoch": 7.121464226289517, "loss": 0.7148795127868652, "loss_ce": 3.583366560633294e-05, "loss_iou": 0.2890625, "loss_num": 0.027587890625, "loss_xval": 0.71484375, "num_input_tokens_seen": 134154940, "step": 2140 }, { "epoch": 7.124792013311148, "grad_norm": 11.956085205078125, "learning_rate": 5e-06, "loss": 0.6273, "num_input_tokens_seen": 134217168, "step": 2141 }, { "epoch": 7.124792013311148, "loss": 0.8701420426368713, "loss_ce": 2.4833516363287345e-05, "loss_iou": 0.349609375, "loss_num": 0.034423828125, "loss_xval": 0.87109375, "num_input_tokens_seen": 134217168, "step": 2141 }, { "epoch": 7.128119800332779, "grad_norm": 12.167706489562988, "learning_rate": 5e-06, "loss": 0.5856, "num_input_tokens_seen": 134280644, "step": 2142 }, { "epoch": 7.128119800332779, "loss": 0.4021015465259552, "loss_ce": 1.92600100490381e-06, "loss_iou": 0.1455078125, "loss_num": 0.02197265625, "loss_xval": 0.40234375, "num_input_tokens_seen": 134280644, "step": 2142 }, { "epoch": 7.131447587354409, "grad_norm": 8.307374954223633, "learning_rate": 5e-06, "loss": 0.5468, "num_input_tokens_seen": 134343288, "step": 2143 }, { "epoch": 7.131447587354409, "loss": 0.4531511664390564, "loss_ce": 2.616040364955552e-05, "loss_iou": 0.1318359375, "loss_num": 0.0380859375, "loss_xval": 0.453125, "num_input_tokens_seen": 134343288, "step": 2143 }, { "epoch": 7.13477537437604, "grad_norm": 13.545499801635742, "learning_rate": 5e-06, "loss": 0.5719, "num_input_tokens_seen": 134405408, "step": 2144 }, { "epoch": 7.13477537437604, "loss": 0.6209876537322998, "loss_ce": 1.599979259481188e-05, "loss_iou": 0.2099609375, "loss_num": 0.040283203125, "loss_xval": 0.62109375, "num_input_tokens_seen": 134405408, "step": 2144 }, { "epoch": 7.13810316139767, "grad_norm": 10.958232879638672, "learning_rate": 5e-06, "loss": 0.4651, "num_input_tokens_seen": 134468492, "step": 2145 }, { "epoch": 7.13810316139767, "loss": 0.4312274754047394, "loss_ce": 7.5122581620235e-05, "loss_iou": 0.1572265625, "loss_num": 0.023193359375, "loss_xval": 0.431640625, "num_input_tokens_seen": 134468492, "step": 2145 }, { "epoch": 7.141430948419301, "grad_norm": 13.61579418182373, "learning_rate": 5e-06, "loss": 0.685, "num_input_tokens_seen": 134530964, "step": 2146 }, { "epoch": 7.141430948419301, "loss": 0.6885131001472473, "loss_ce": 0.000402759644202888, "loss_iou": 0.25, "loss_num": 0.037841796875, "loss_xval": 0.6875, "num_input_tokens_seen": 134530964, "step": 2146 }, { "epoch": 7.144758735440932, "grad_norm": 22.375713348388672, "learning_rate": 5e-06, "loss": 0.4229, "num_input_tokens_seen": 134591868, "step": 2147 }, { "epoch": 7.144758735440932, "loss": 0.3761381208896637, "loss_ce": 2.4225275410572067e-05, "loss_iou": 0.09716796875, "loss_num": 0.036376953125, "loss_xval": 0.376953125, "num_input_tokens_seen": 134591868, "step": 2147 }, { "epoch": 7.148086522462562, "grad_norm": 44.56144714355469, "learning_rate": 5e-06, "loss": 0.8937, "num_input_tokens_seen": 134656396, "step": 2148 }, { "epoch": 7.148086522462562, "loss": 1.0236693620681763, "loss_ce": 0.0007201368571259081, "loss_iou": 0.40234375, "loss_num": 0.043701171875, "loss_xval": 1.0234375, "num_input_tokens_seen": 134656396, "step": 2148 }, { "epoch": 7.151414309484193, "grad_norm": 39.387786865234375, "learning_rate": 5e-06, "loss": 0.8271, "num_input_tokens_seen": 134719168, "step": 2149 }, { "epoch": 7.151414309484193, "loss": 0.8682547807693481, "loss_ce": 0.0008231945685110986, "loss_iou": 0.32421875, "loss_num": 0.043701171875, "loss_xval": 0.8671875, "num_input_tokens_seen": 134719168, "step": 2149 }, { "epoch": 7.1547420965058235, "grad_norm": 20.380252838134766, "learning_rate": 5e-06, "loss": 0.8397, "num_input_tokens_seen": 134782724, "step": 2150 }, { "epoch": 7.1547420965058235, "loss": 0.7608818411827087, "loss_ce": 1.756585515977349e-05, "loss_iou": 0.2265625, "loss_num": 0.061767578125, "loss_xval": 0.76171875, "num_input_tokens_seen": 134782724, "step": 2150 }, { "epoch": 7.158069883527454, "grad_norm": 31.273258209228516, "learning_rate": 5e-06, "loss": 0.5032, "num_input_tokens_seen": 134845868, "step": 2151 }, { "epoch": 7.158069883527454, "loss": 0.6265900731086731, "loss_ce": 0.0002472873020451516, "loss_iou": 0.2421875, "loss_num": 0.0283203125, "loss_xval": 0.625, "num_input_tokens_seen": 134845868, "step": 2151 }, { "epoch": 7.161397670549085, "grad_norm": 107.38353729248047, "learning_rate": 5e-06, "loss": 0.5315, "num_input_tokens_seen": 134908612, "step": 2152 }, { "epoch": 7.161397670549085, "loss": 0.5550842881202698, "loss_ce": 0.00015266229456756264, "loss_iou": 0.181640625, "loss_num": 0.0380859375, "loss_xval": 0.5546875, "num_input_tokens_seen": 134908612, "step": 2152 }, { "epoch": 7.164725457570715, "grad_norm": 19.38658905029297, "learning_rate": 5e-06, "loss": 0.6502, "num_input_tokens_seen": 134972560, "step": 2153 }, { "epoch": 7.164725457570715, "loss": 0.7336630821228027, "loss_ce": 2.0465968191274442e-05, "loss_iou": 0.25390625, "loss_num": 0.044921875, "loss_xval": 0.734375, "num_input_tokens_seen": 134972560, "step": 2153 }, { "epoch": 7.168053244592346, "grad_norm": 7.492495536804199, "learning_rate": 5e-06, "loss": 0.6256, "num_input_tokens_seen": 135034692, "step": 2154 }, { "epoch": 7.168053244592346, "loss": 0.46116214990615845, "loss_ce": 0.00022465427173301578, "loss_iou": 0.142578125, "loss_num": 0.03515625, "loss_xval": 0.4609375, "num_input_tokens_seen": 135034692, "step": 2154 }, { "epoch": 7.1713810316139766, "grad_norm": 7.443278789520264, "learning_rate": 5e-06, "loss": 0.3557, "num_input_tokens_seen": 135097568, "step": 2155 }, { "epoch": 7.1713810316139766, "loss": 0.3782976269721985, "loss_ce": 1.700637426438334e-06, "loss_iou": 0.150390625, "loss_num": 0.01531982421875, "loss_xval": 0.37890625, "num_input_tokens_seen": 135097568, "step": 2155 }, { "epoch": 7.174708818635607, "grad_norm": 14.635993957519531, "learning_rate": 5e-06, "loss": 0.6318, "num_input_tokens_seen": 135161024, "step": 2156 }, { "epoch": 7.174708818635607, "loss": 0.6834495067596436, "loss_ce": 0.0003440164728090167, "loss_iou": 0.240234375, "loss_num": 0.04052734375, "loss_xval": 0.68359375, "num_input_tokens_seen": 135161024, "step": 2156 }, { "epoch": 7.178036605657238, "grad_norm": 12.840943336486816, "learning_rate": 5e-06, "loss": 0.3605, "num_input_tokens_seen": 135223448, "step": 2157 }, { "epoch": 7.178036605657238, "loss": 0.3703942596912384, "loss_ce": 2.410541583230952e-06, "loss_iou": 0.1513671875, "loss_num": 0.01336669921875, "loss_xval": 0.37109375, "num_input_tokens_seen": 135223448, "step": 2157 }, { "epoch": 7.181364392678868, "grad_norm": 8.797908782958984, "learning_rate": 5e-06, "loss": 0.6635, "num_input_tokens_seen": 135284944, "step": 2158 }, { "epoch": 7.181364392678868, "loss": 0.6668326258659363, "loss_ce": 0.00020665349438786507, "loss_iou": 0.2138671875, "loss_num": 0.0478515625, "loss_xval": 0.66796875, "num_input_tokens_seen": 135284944, "step": 2158 }, { "epoch": 7.184692179700499, "grad_norm": 8.726399421691895, "learning_rate": 5e-06, "loss": 0.4715, "num_input_tokens_seen": 135346504, "step": 2159 }, { "epoch": 7.184692179700499, "loss": 0.32166117429733276, "loss_ce": 5.90826630286756e-06, "loss_iou": 0.091796875, "loss_num": 0.027587890625, "loss_xval": 0.322265625, "num_input_tokens_seen": 135346504, "step": 2159 }, { "epoch": 7.18801996672213, "grad_norm": 8.025845527648926, "learning_rate": 5e-06, "loss": 0.495, "num_input_tokens_seen": 135406064, "step": 2160 }, { "epoch": 7.18801996672213, "loss": 0.5212523937225342, "loss_ce": 1.2177071766927838e-05, "loss_iou": 0.1279296875, "loss_num": 0.052978515625, "loss_xval": 0.51953125, "num_input_tokens_seen": 135406064, "step": 2160 }, { "epoch": 7.19134775374376, "grad_norm": 31.28653907775879, "learning_rate": 5e-06, "loss": 0.6667, "num_input_tokens_seen": 135469200, "step": 2161 }, { "epoch": 7.19134775374376, "loss": 0.7990618944168091, "loss_ce": 0.00011170025391038507, "loss_iou": 0.310546875, "loss_num": 0.03564453125, "loss_xval": 0.80078125, "num_input_tokens_seen": 135469200, "step": 2161 }, { "epoch": 7.194675540765391, "grad_norm": 54.90666198730469, "learning_rate": 5e-06, "loss": 0.9046, "num_input_tokens_seen": 135532464, "step": 2162 }, { "epoch": 7.194675540765391, "loss": 1.0472854375839233, "loss_ce": 0.0008986816392280161, "loss_iou": 0.40625, "loss_num": 0.046875, "loss_xval": 1.046875, "num_input_tokens_seen": 135532464, "step": 2162 }, { "epoch": 7.1980033277870215, "grad_norm": 50.0347785949707, "learning_rate": 5e-06, "loss": 0.6674, "num_input_tokens_seen": 135596124, "step": 2163 }, { "epoch": 7.1980033277870215, "loss": 0.7793710231781006, "loss_ce": 0.0009286506101489067, "loss_iou": 0.30078125, "loss_num": 0.035888671875, "loss_xval": 0.77734375, "num_input_tokens_seen": 135596124, "step": 2163 }, { "epoch": 7.201331114808652, "grad_norm": 27.123538970947266, "learning_rate": 5e-06, "loss": 0.7014, "num_input_tokens_seen": 135659412, "step": 2164 }, { "epoch": 7.201331114808652, "loss": 0.9253852367401123, "loss_ce": 9.2299647803884e-05, "loss_iou": 0.369140625, "loss_num": 0.03759765625, "loss_xval": 0.92578125, "num_input_tokens_seen": 135659412, "step": 2164 }, { "epoch": 7.204658901830283, "grad_norm": 19.49197769165039, "learning_rate": 5e-06, "loss": 0.6537, "num_input_tokens_seen": 135723352, "step": 2165 }, { "epoch": 7.204658901830283, "loss": 0.8150896430015564, "loss_ce": 0.0006365244626067579, "loss_iou": 0.314453125, "loss_num": 0.037109375, "loss_xval": 0.8125, "num_input_tokens_seen": 135723352, "step": 2165 }, { "epoch": 7.207986688851913, "grad_norm": 14.17443561553955, "learning_rate": 5e-06, "loss": 0.5658, "num_input_tokens_seen": 135785900, "step": 2166 }, { "epoch": 7.207986688851913, "loss": 0.5921099185943604, "loss_ce": 0.0005571962101384997, "loss_iou": 0.19140625, "loss_num": 0.041748046875, "loss_xval": 0.58984375, "num_input_tokens_seen": 135785900, "step": 2166 }, { "epoch": 7.211314475873544, "grad_norm": 19.05231285095215, "learning_rate": 5e-06, "loss": 0.6358, "num_input_tokens_seen": 135848144, "step": 2167 }, { "epoch": 7.211314475873544, "loss": 0.6873797178268433, "loss_ce": 1.776991211954737e-06, "loss_iou": 0.2421875, "loss_num": 0.04052734375, "loss_xval": 0.6875, "num_input_tokens_seen": 135848144, "step": 2167 }, { "epoch": 7.2146422628951745, "grad_norm": 18.096403121948242, "learning_rate": 5e-06, "loss": 0.5271, "num_input_tokens_seen": 135910460, "step": 2168 }, { "epoch": 7.2146422628951745, "loss": 0.5621370673179626, "loss_ce": 3.3101055123552214e-06, "loss_iou": 0.1923828125, "loss_num": 0.035400390625, "loss_xval": 0.5625, "num_input_tokens_seen": 135910460, "step": 2168 }, { "epoch": 7.217970049916805, "grad_norm": 20.059396743774414, "learning_rate": 5e-06, "loss": 0.6003, "num_input_tokens_seen": 135975380, "step": 2169 }, { "epoch": 7.217970049916805, "loss": 0.838268518447876, "loss_ce": 0.0003778632963076234, "loss_iou": 0.310546875, "loss_num": 0.043212890625, "loss_xval": 0.8359375, "num_input_tokens_seen": 135975380, "step": 2169 }, { "epoch": 7.221297836938436, "grad_norm": 8.007813453674316, "learning_rate": 5e-06, "loss": 0.5115, "num_input_tokens_seen": 136038064, "step": 2170 }, { "epoch": 7.221297836938436, "loss": 0.3074108958244324, "loss_ce": 0.0016552694141864777, "loss_iou": 0.0771484375, "loss_num": 0.0303955078125, "loss_xval": 0.306640625, "num_input_tokens_seen": 136038064, "step": 2170 }, { "epoch": 7.224625623960066, "grad_norm": 21.28730583190918, "learning_rate": 5e-06, "loss": 0.574, "num_input_tokens_seen": 136099920, "step": 2171 }, { "epoch": 7.224625623960066, "loss": 0.4126671552658081, "loss_ce": 6.950089300516993e-05, "loss_iou": 0.083984375, "loss_num": 0.049072265625, "loss_xval": 0.412109375, "num_input_tokens_seen": 136099920, "step": 2171 }, { "epoch": 7.227953410981697, "grad_norm": 8.285771369934082, "learning_rate": 5e-06, "loss": 0.4561, "num_input_tokens_seen": 136161848, "step": 2172 }, { "epoch": 7.227953410981697, "loss": 0.5040911436080933, "loss_ce": 1.798846142264665e-06, "loss_iou": 0.19140625, "loss_num": 0.0242919921875, "loss_xval": 0.50390625, "num_input_tokens_seen": 136161848, "step": 2172 }, { "epoch": 7.231281198003328, "grad_norm": 14.594169616699219, "learning_rate": 5e-06, "loss": 0.6573, "num_input_tokens_seen": 136225140, "step": 2173 }, { "epoch": 7.231281198003328, "loss": 0.8803286552429199, "loss_ce": 0.00032380284392274916, "loss_iou": 0.349609375, "loss_num": 0.035888671875, "loss_xval": 0.87890625, "num_input_tokens_seen": 136225140, "step": 2173 }, { "epoch": 7.234608985024958, "grad_norm": 30.296472549438477, "learning_rate": 5e-06, "loss": 0.4099, "num_input_tokens_seen": 136288212, "step": 2174 }, { "epoch": 7.234608985024958, "loss": 0.5052783489227295, "loss_ce": 2.9340175387915224e-05, "loss_iou": 0.193359375, "loss_num": 0.02392578125, "loss_xval": 0.50390625, "num_input_tokens_seen": 136288212, "step": 2174 }, { "epoch": 7.237936772046589, "grad_norm": 26.452207565307617, "learning_rate": 5e-06, "loss": 0.5173, "num_input_tokens_seen": 136350880, "step": 2175 }, { "epoch": 7.237936772046589, "loss": 0.67698734998703, "loss_ce": 1.968817196029704e-05, "loss_iou": 0.23828125, "loss_num": 0.0400390625, "loss_xval": 0.67578125, "num_input_tokens_seen": 136350880, "step": 2175 }, { "epoch": 7.241264559068219, "grad_norm": 12.72392749786377, "learning_rate": 5e-06, "loss": 0.4882, "num_input_tokens_seen": 136412816, "step": 2176 }, { "epoch": 7.241264559068219, "loss": 0.44616037607192993, "loss_ce": 0.0003596358874347061, "loss_iou": 0.150390625, "loss_num": 0.02880859375, "loss_xval": 0.4453125, "num_input_tokens_seen": 136412816, "step": 2176 }, { "epoch": 7.24459234608985, "grad_norm": 8.840194702148438, "learning_rate": 5e-06, "loss": 0.5647, "num_input_tokens_seen": 136476156, "step": 2177 }, { "epoch": 7.24459234608985, "loss": 0.4819354712963104, "loss_ce": 1.8873446379075176e-06, "loss_iou": 0.181640625, "loss_num": 0.02392578125, "loss_xval": 0.482421875, "num_input_tokens_seen": 136476156, "step": 2177 }, { "epoch": 7.247920133111481, "grad_norm": 7.642147541046143, "learning_rate": 5e-06, "loss": 0.5221, "num_input_tokens_seen": 136537200, "step": 2178 }, { "epoch": 7.247920133111481, "loss": 0.4028283953666687, "loss_ce": 0.00030153203988447785, "loss_iou": 0.11083984375, "loss_num": 0.0361328125, "loss_xval": 0.40234375, "num_input_tokens_seen": 136537200, "step": 2178 }, { "epoch": 7.251247920133111, "grad_norm": 11.908682823181152, "learning_rate": 5e-06, "loss": 0.5747, "num_input_tokens_seen": 136599616, "step": 2179 }, { "epoch": 7.251247920133111, "loss": 0.6730359196662903, "loss_ce": 1.2189480003144126e-06, "loss_iou": 0.2392578125, "loss_num": 0.038818359375, "loss_xval": 0.671875, "num_input_tokens_seen": 136599616, "step": 2179 }, { "epoch": 7.254575707154742, "grad_norm": 15.636880874633789, "learning_rate": 5e-06, "loss": 0.4868, "num_input_tokens_seen": 136662144, "step": 2180 }, { "epoch": 7.254575707154742, "loss": 0.4792640507221222, "loss_ce": 0.00010754182585515082, "loss_iou": 0.173828125, "loss_num": 0.0262451171875, "loss_xval": 0.478515625, "num_input_tokens_seen": 136662144, "step": 2180 }, { "epoch": 7.2579034941763725, "grad_norm": 14.893200874328613, "learning_rate": 5e-06, "loss": 0.5396, "num_input_tokens_seen": 136724824, "step": 2181 }, { "epoch": 7.2579034941763725, "loss": 0.7730810046195984, "loss_ce": 9.706915079732426e-06, "loss_iou": 0.2890625, "loss_num": 0.0390625, "loss_xval": 0.7734375, "num_input_tokens_seen": 136724824, "step": 2181 }, { "epoch": 7.261231281198003, "grad_norm": 24.566614151000977, "learning_rate": 5e-06, "loss": 0.5443, "num_input_tokens_seen": 136788196, "step": 2182 }, { "epoch": 7.261231281198003, "loss": 0.46374863386154175, "loss_ce": 3.5248613130534068e-06, "loss_iou": 0.1865234375, "loss_num": 0.0181884765625, "loss_xval": 0.462890625, "num_input_tokens_seen": 136788196, "step": 2182 }, { "epoch": 7.264559068219634, "grad_norm": 15.425735473632812, "learning_rate": 5e-06, "loss": 0.4932, "num_input_tokens_seen": 136850088, "step": 2183 }, { "epoch": 7.264559068219634, "loss": 0.2756975591182709, "loss_ce": 1.7433937955502188e-06, "loss_iou": 0.06640625, "loss_num": 0.028564453125, "loss_xval": 0.275390625, "num_input_tokens_seen": 136850088, "step": 2183 }, { "epoch": 7.267886855241264, "grad_norm": 16.81602668762207, "learning_rate": 5e-06, "loss": 0.8287, "num_input_tokens_seen": 136913232, "step": 2184 }, { "epoch": 7.267886855241264, "loss": 0.8084591627120972, "loss_ce": 0.00035373945138417184, "loss_iou": 0.345703125, "loss_num": 0.0235595703125, "loss_xval": 0.80859375, "num_input_tokens_seen": 136913232, "step": 2184 }, { "epoch": 7.271214642262895, "grad_norm": 13.915985107421875, "learning_rate": 5e-06, "loss": 0.5439, "num_input_tokens_seen": 136976388, "step": 2185 }, { "epoch": 7.271214642262895, "loss": 0.33465680480003357, "loss_ce": 1.0548567388468655e-06, "loss_iou": 0.11865234375, "loss_num": 0.01953125, "loss_xval": 0.333984375, "num_input_tokens_seen": 136976388, "step": 2185 }, { "epoch": 7.2745424292845255, "grad_norm": 20.259357452392578, "learning_rate": 5e-06, "loss": 0.5915, "num_input_tokens_seen": 137040244, "step": 2186 }, { "epoch": 7.2745424292845255, "loss": 0.6317089796066284, "loss_ce": 0.0008496057707816362, "loss_iou": 0.240234375, "loss_num": 0.030029296875, "loss_xval": 0.6328125, "num_input_tokens_seen": 137040244, "step": 2186 }, { "epoch": 7.277870216306156, "grad_norm": 48.91740036010742, "learning_rate": 5e-06, "loss": 0.7124, "num_input_tokens_seen": 137104496, "step": 2187 }, { "epoch": 7.277870216306156, "loss": 0.743898868560791, "loss_ce": 2.3123652681533713e-06, "loss_iou": 0.294921875, "loss_num": 0.0308837890625, "loss_xval": 0.7421875, "num_input_tokens_seen": 137104496, "step": 2187 }, { "epoch": 7.281198003327787, "grad_norm": 36.946380615234375, "learning_rate": 5e-06, "loss": 0.7402, "num_input_tokens_seen": 137168028, "step": 2188 }, { "epoch": 7.281198003327787, "loss": 0.8287315368652344, "loss_ce": 0.0002097858814522624, "loss_iou": 0.345703125, "loss_num": 0.0272216796875, "loss_xval": 0.828125, "num_input_tokens_seen": 137168028, "step": 2188 }, { "epoch": 7.284525790349417, "grad_norm": 9.247626304626465, "learning_rate": 5e-06, "loss": 0.6259, "num_input_tokens_seen": 137229732, "step": 2189 }, { "epoch": 7.284525790349417, "loss": 0.4721830189228058, "loss_ce": 1.5054575669637416e-05, "loss_iou": 0.1171875, "loss_num": 0.047607421875, "loss_xval": 0.47265625, "num_input_tokens_seen": 137229732, "step": 2189 }, { "epoch": 7.287853577371048, "grad_norm": 19.686338424682617, "learning_rate": 5e-06, "loss": 0.4015, "num_input_tokens_seen": 137291156, "step": 2190 }, { "epoch": 7.287853577371048, "loss": 0.5028140544891357, "loss_ce": 6.472037057392299e-06, "loss_iou": 0.154296875, "loss_num": 0.038818359375, "loss_xval": 0.50390625, "num_input_tokens_seen": 137291156, "step": 2190 }, { "epoch": 7.291181364392679, "grad_norm": 13.738224029541016, "learning_rate": 5e-06, "loss": 0.635, "num_input_tokens_seen": 137352632, "step": 2191 }, { "epoch": 7.291181364392679, "loss": 0.6826224327087402, "loss_ce": 5.20381308888318e-06, "loss_iou": 0.275390625, "loss_num": 0.026123046875, "loss_xval": 0.68359375, "num_input_tokens_seen": 137352632, "step": 2191 }, { "epoch": 7.294509151414309, "grad_norm": 9.715002059936523, "learning_rate": 5e-06, "loss": 0.5425, "num_input_tokens_seen": 137414092, "step": 2192 }, { "epoch": 7.294509151414309, "loss": 0.6846959590911865, "loss_ce": 3.579268650355516e-06, "loss_iou": 0.232421875, "loss_num": 0.044189453125, "loss_xval": 0.68359375, "num_input_tokens_seen": 137414092, "step": 2192 }, { "epoch": 7.29783693843594, "grad_norm": 28.617979049682617, "learning_rate": 5e-06, "loss": 0.7235, "num_input_tokens_seen": 137476172, "step": 2193 }, { "epoch": 7.29783693843594, "loss": 0.8596231341362, "loss_ce": 4.00542194256559e-06, "loss_iou": 0.32421875, "loss_num": 0.042236328125, "loss_xval": 0.859375, "num_input_tokens_seen": 137476172, "step": 2193 }, { "epoch": 7.3011647254575704, "grad_norm": 40.17922592163086, "learning_rate": 5e-06, "loss": 0.6294, "num_input_tokens_seen": 137539904, "step": 2194 }, { "epoch": 7.3011647254575704, "loss": 0.768734335899353, "loss_ce": 0.00017961469711735845, "loss_iou": 0.291015625, "loss_num": 0.03759765625, "loss_xval": 0.76953125, "num_input_tokens_seen": 137539904, "step": 2194 }, { "epoch": 7.304492512479201, "grad_norm": 29.39878273010254, "learning_rate": 5e-06, "loss": 0.6412, "num_input_tokens_seen": 137602712, "step": 2195 }, { "epoch": 7.304492512479201, "loss": 0.8083170652389526, "loss_ce": 0.00021154813293833286, "loss_iou": 0.318359375, "loss_num": 0.033935546875, "loss_xval": 0.80859375, "num_input_tokens_seen": 137602712, "step": 2195 }, { "epoch": 7.307820299500832, "grad_norm": 26.26633644104004, "learning_rate": 5e-06, "loss": 0.6482, "num_input_tokens_seen": 137665076, "step": 2196 }, { "epoch": 7.307820299500832, "loss": 0.9297459721565247, "loss_ce": 0.00030264025554060936, "loss_iou": 0.34765625, "loss_num": 0.04638671875, "loss_xval": 0.9296875, "num_input_tokens_seen": 137665076, "step": 2196 }, { "epoch": 7.311148086522462, "grad_norm": 29.523025512695312, "learning_rate": 5e-06, "loss": 0.9805, "num_input_tokens_seen": 137727904, "step": 2197 }, { "epoch": 7.311148086522462, "loss": 1.2526119947433472, "loss_ce": 0.00029262248426675797, "loss_iou": 0.421875, "loss_num": 0.08203125, "loss_xval": 1.25, "num_input_tokens_seen": 137727904, "step": 2197 }, { "epoch": 7.314475873544093, "grad_norm": 10.501200675964355, "learning_rate": 5e-06, "loss": 0.5847, "num_input_tokens_seen": 137791072, "step": 2198 }, { "epoch": 7.314475873544093, "loss": 0.47004061937332153, "loss_ce": 0.00019196512585040182, "loss_iou": 0.15625, "loss_num": 0.03125, "loss_xval": 0.470703125, "num_input_tokens_seen": 137791072, "step": 2198 }, { "epoch": 7.3178036605657235, "grad_norm": 39.24136734008789, "learning_rate": 5e-06, "loss": 0.542, "num_input_tokens_seen": 137852956, "step": 2199 }, { "epoch": 7.3178036605657235, "loss": 0.3972112238407135, "loss_ce": 0.0006047880742698908, "loss_iou": 0.1611328125, "loss_num": 0.014892578125, "loss_xval": 0.396484375, "num_input_tokens_seen": 137852956, "step": 2199 }, { "epoch": 7.321131447587354, "grad_norm": 35.60654067993164, "learning_rate": 5e-06, "loss": 0.6981, "num_input_tokens_seen": 137916176, "step": 2200 }, { "epoch": 7.321131447587354, "loss": 0.9853753447532654, "loss_ce": 2.3854707251302898e-05, "loss_iou": 0.37890625, "loss_num": 0.04541015625, "loss_xval": 0.984375, "num_input_tokens_seen": 137916176, "step": 2200 }, { "epoch": 7.324459234608985, "grad_norm": 22.625009536743164, "learning_rate": 5e-06, "loss": 0.5768, "num_input_tokens_seen": 137978320, "step": 2201 }, { "epoch": 7.324459234608985, "loss": 0.6514400243759155, "loss_ce": 0.0011714803986251354, "loss_iou": 0.21875, "loss_num": 0.042724609375, "loss_xval": 0.6484375, "num_input_tokens_seen": 137978320, "step": 2201 }, { "epoch": 7.327787021630615, "grad_norm": 21.0862979888916, "learning_rate": 5e-06, "loss": 0.6942, "num_input_tokens_seen": 138041700, "step": 2202 }, { "epoch": 7.327787021630615, "loss": 0.7108654975891113, "loss_ce": 0.00017217599088326097, "loss_iou": 0.263671875, "loss_num": 0.036865234375, "loss_xval": 0.7109375, "num_input_tokens_seen": 138041700, "step": 2202 }, { "epoch": 7.331114808652246, "grad_norm": 12.749632835388184, "learning_rate": 5e-06, "loss": 0.6096, "num_input_tokens_seen": 138105400, "step": 2203 }, { "epoch": 7.331114808652246, "loss": 0.6893917322158813, "loss_ce": 0.0006710394518449903, "loss_iou": 0.244140625, "loss_num": 0.0400390625, "loss_xval": 0.6875, "num_input_tokens_seen": 138105400, "step": 2203 }, { "epoch": 7.334442595673877, "grad_norm": 6.5325798988342285, "learning_rate": 5e-06, "loss": 0.5942, "num_input_tokens_seen": 138168816, "step": 2204 }, { "epoch": 7.334442595673877, "loss": 0.722283124923706, "loss_ce": 0.0007254565134644508, "loss_iou": 0.267578125, "loss_num": 0.037353515625, "loss_xval": 0.72265625, "num_input_tokens_seen": 138168816, "step": 2204 }, { "epoch": 7.337770382695507, "grad_norm": 8.450157165527344, "learning_rate": 5e-06, "loss": 0.4747, "num_input_tokens_seen": 138231252, "step": 2205 }, { "epoch": 7.337770382695507, "loss": 0.3077167868614197, "loss_ce": 8.024676390050445e-06, "loss_iou": 0.0966796875, "loss_num": 0.0228271484375, "loss_xval": 0.30859375, "num_input_tokens_seen": 138231252, "step": 2205 }, { "epoch": 7.341098169717138, "grad_norm": 6.746068477630615, "learning_rate": 5e-06, "loss": 0.4976, "num_input_tokens_seen": 138292864, "step": 2206 }, { "epoch": 7.341098169717138, "loss": 0.6572527885437012, "loss_ce": 2.625125489430502e-05, "loss_iou": 0.1943359375, "loss_num": 0.0537109375, "loss_xval": 0.65625, "num_input_tokens_seen": 138292864, "step": 2206 }, { "epoch": 7.344425956738768, "grad_norm": 11.310198783874512, "learning_rate": 5e-06, "loss": 0.595, "num_input_tokens_seen": 138354632, "step": 2207 }, { "epoch": 7.344425956738768, "loss": 0.8515398502349854, "loss_ce": 0.0002214901614934206, "loss_iou": 0.353515625, "loss_num": 0.0289306640625, "loss_xval": 0.8515625, "num_input_tokens_seen": 138354632, "step": 2207 }, { "epoch": 7.347753743760399, "grad_norm": 8.63731861114502, "learning_rate": 5e-06, "loss": 0.5567, "num_input_tokens_seen": 138416896, "step": 2208 }, { "epoch": 7.347753743760399, "loss": 0.727154552936554, "loss_ce": 1.2276163033675402e-05, "loss_iou": 0.275390625, "loss_num": 0.03515625, "loss_xval": 0.7265625, "num_input_tokens_seen": 138416896, "step": 2208 }, { "epoch": 7.35108153078203, "grad_norm": 9.029618263244629, "learning_rate": 5e-06, "loss": 0.5121, "num_input_tokens_seen": 138479288, "step": 2209 }, { "epoch": 7.35108153078203, "loss": 0.5669015645980835, "loss_ce": 0.00012912850070279092, "loss_iou": 0.228515625, "loss_num": 0.02197265625, "loss_xval": 0.56640625, "num_input_tokens_seen": 138479288, "step": 2209 }, { "epoch": 7.35440931780366, "grad_norm": 9.962007522583008, "learning_rate": 5e-06, "loss": 0.5648, "num_input_tokens_seen": 138542280, "step": 2210 }, { "epoch": 7.35440931780366, "loss": 0.647117018699646, "loss_ce": 2.233907434856519e-05, "loss_iou": 0.224609375, "loss_num": 0.039794921875, "loss_xval": 0.6484375, "num_input_tokens_seen": 138542280, "step": 2210 }, { "epoch": 7.357737104825291, "grad_norm": 13.882840156555176, "learning_rate": 5e-06, "loss": 0.4917, "num_input_tokens_seen": 138604104, "step": 2211 }, { "epoch": 7.357737104825291, "loss": 0.4311217665672302, "loss_ce": 3.0484174203593284e-05, "loss_iou": 0.16015625, "loss_num": 0.0220947265625, "loss_xval": 0.431640625, "num_input_tokens_seen": 138604104, "step": 2211 }, { "epoch": 7.3610648918469215, "grad_norm": 10.99440860748291, "learning_rate": 5e-06, "loss": 0.6163, "num_input_tokens_seen": 138668156, "step": 2212 }, { "epoch": 7.3610648918469215, "loss": 0.5804467797279358, "loss_ce": 2.4466739887429867e-06, "loss_iou": 0.2373046875, "loss_num": 0.0211181640625, "loss_xval": 0.58203125, "num_input_tokens_seen": 138668156, "step": 2212 }, { "epoch": 7.364392678868552, "grad_norm": 36.95643615722656, "learning_rate": 5e-06, "loss": 0.7682, "num_input_tokens_seen": 138730268, "step": 2213 }, { "epoch": 7.364392678868552, "loss": 0.7193616628646851, "loss_ce": 1.2985758530703606e-06, "loss_iou": 0.251953125, "loss_num": 0.04296875, "loss_xval": 0.71875, "num_input_tokens_seen": 138730268, "step": 2213 }, { "epoch": 7.367720465890183, "grad_norm": 32.78574752807617, "learning_rate": 5e-06, "loss": 0.5979, "num_input_tokens_seen": 138793436, "step": 2214 }, { "epoch": 7.367720465890183, "loss": 0.7063370943069458, "loss_ce": 3.822601865977049e-05, "loss_iou": 0.267578125, "loss_num": 0.03369140625, "loss_xval": 0.70703125, "num_input_tokens_seen": 138793436, "step": 2214 }, { "epoch": 7.371048252911813, "grad_norm": 13.223318099975586, "learning_rate": 5e-06, "loss": 0.5658, "num_input_tokens_seen": 138856840, "step": 2215 }, { "epoch": 7.371048252911813, "loss": 0.5589154362678528, "loss_ce": 0.00019964051898568869, "loss_iou": 0.2001953125, "loss_num": 0.03173828125, "loss_xval": 0.55859375, "num_input_tokens_seen": 138856840, "step": 2215 }, { "epoch": 7.374376039933444, "grad_norm": 14.709639549255371, "learning_rate": 5e-06, "loss": 0.4572, "num_input_tokens_seen": 138920368, "step": 2216 }, { "epoch": 7.374376039933444, "loss": 0.3885447382926941, "loss_ce": 0.0006663264939561486, "loss_iou": 0.1328125, "loss_num": 0.0245361328125, "loss_xval": 0.388671875, "num_input_tokens_seen": 138920368, "step": 2216 }, { "epoch": 7.3777038269550745, "grad_norm": 16.93494415283203, "learning_rate": 5e-06, "loss": 0.6125, "num_input_tokens_seen": 138980696, "step": 2217 }, { "epoch": 7.3777038269550745, "loss": 0.6335501670837402, "loss_ce": 5.2527407206071075e-06, "loss_iou": 0.2109375, "loss_num": 0.04248046875, "loss_xval": 0.6328125, "num_input_tokens_seen": 138980696, "step": 2217 }, { "epoch": 7.381031613976705, "grad_norm": 17.167526245117188, "learning_rate": 5e-06, "loss": 0.522, "num_input_tokens_seen": 139042056, "step": 2218 }, { "epoch": 7.381031613976705, "loss": 0.44043320417404175, "loss_ce": 3.5211305657867342e-06, "loss_iou": 0.1298828125, "loss_num": 0.0361328125, "loss_xval": 0.44140625, "num_input_tokens_seen": 139042056, "step": 2218 }, { "epoch": 7.384359400998336, "grad_norm": 17.030746459960938, "learning_rate": 5e-06, "loss": 0.7257, "num_input_tokens_seen": 139106308, "step": 2219 }, { "epoch": 7.384359400998336, "loss": 0.666380763053894, "loss_ce": 0.000487148470710963, "loss_iou": 0.2294921875, "loss_num": 0.04150390625, "loss_xval": 0.6640625, "num_input_tokens_seen": 139106308, "step": 2219 }, { "epoch": 7.387687188019966, "grad_norm": 14.87932014465332, "learning_rate": 5e-06, "loss": 0.636, "num_input_tokens_seen": 139169272, "step": 2220 }, { "epoch": 7.387687188019966, "loss": 0.6396602392196655, "loss_ce": 1.1841842024296056e-05, "loss_iou": 0.25390625, "loss_num": 0.0264892578125, "loss_xval": 0.640625, "num_input_tokens_seen": 139169272, "step": 2220 }, { "epoch": 7.391014975041597, "grad_norm": 11.073452949523926, "learning_rate": 5e-06, "loss": 0.6228, "num_input_tokens_seen": 139233268, "step": 2221 }, { "epoch": 7.391014975041597, "loss": 0.673436164855957, "loss_ce": 9.630218846723437e-05, "loss_iou": 0.25390625, "loss_num": 0.032958984375, "loss_xval": 0.671875, "num_input_tokens_seen": 139233268, "step": 2221 }, { "epoch": 7.394342762063228, "grad_norm": 11.26318073272705, "learning_rate": 5e-06, "loss": 0.6642, "num_input_tokens_seen": 139296540, "step": 2222 }, { "epoch": 7.394342762063228, "loss": 0.5657228231430054, "loss_ce": 4.9032354581868276e-05, "loss_iou": 0.20703125, "loss_num": 0.0302734375, "loss_xval": 0.56640625, "num_input_tokens_seen": 139296540, "step": 2222 }, { "epoch": 7.397670549084858, "grad_norm": 15.113363265991211, "learning_rate": 5e-06, "loss": 0.7418, "num_input_tokens_seen": 139360116, "step": 2223 }, { "epoch": 7.397670549084858, "loss": 0.6828631162643433, "loss_ce": 1.7772672435967252e-06, "loss_iou": 0.283203125, "loss_num": 0.0234375, "loss_xval": 0.68359375, "num_input_tokens_seen": 139360116, "step": 2223 }, { "epoch": 7.400998336106489, "grad_norm": 18.52653694152832, "learning_rate": 5e-06, "loss": 0.5989, "num_input_tokens_seen": 139422848, "step": 2224 }, { "epoch": 7.400998336106489, "loss": 0.6897132396697998, "loss_ce": 0.0012366485316306353, "loss_iou": 0.2431640625, "loss_num": 0.040283203125, "loss_xval": 0.6875, "num_input_tokens_seen": 139422848, "step": 2224 }, { "epoch": 7.404326123128119, "grad_norm": 13.065515518188477, "learning_rate": 5e-06, "loss": 0.7091, "num_input_tokens_seen": 139486628, "step": 2225 }, { "epoch": 7.404326123128119, "loss": 0.7603779435157776, "loss_ce": 1.9716978840733645e-06, "loss_iou": 0.27734375, "loss_num": 0.04150390625, "loss_xval": 0.76171875, "num_input_tokens_seen": 139486628, "step": 2225 }, { "epoch": 7.40765391014975, "grad_norm": 15.903464317321777, "learning_rate": 5e-06, "loss": 0.766, "num_input_tokens_seen": 139550408, "step": 2226 }, { "epoch": 7.40765391014975, "loss": 0.7958680391311646, "loss_ce": 0.0007020551711320877, "loss_iou": 0.30859375, "loss_num": 0.03515625, "loss_xval": 0.796875, "num_input_tokens_seen": 139550408, "step": 2226 }, { "epoch": 7.410981697171381, "grad_norm": 9.467540740966797, "learning_rate": 5e-06, "loss": 0.6279, "num_input_tokens_seen": 139613796, "step": 2227 }, { "epoch": 7.410981697171381, "loss": 0.634937584400177, "loss_ce": 0.00017195659165736288, "loss_iou": 0.2060546875, "loss_num": 0.044677734375, "loss_xval": 0.6328125, "num_input_tokens_seen": 139613796, "step": 2227 }, { "epoch": 7.414309484193011, "grad_norm": 8.689229011535645, "learning_rate": 5e-06, "loss": 0.6209, "num_input_tokens_seen": 139677200, "step": 2228 }, { "epoch": 7.414309484193011, "loss": 0.6817660927772522, "loss_ce": 3.414712637095363e-06, "loss_iou": 0.2314453125, "loss_num": 0.043701171875, "loss_xval": 0.68359375, "num_input_tokens_seen": 139677200, "step": 2228 }, { "epoch": 7.417637271214642, "grad_norm": 13.251606941223145, "learning_rate": 5e-06, "loss": 0.4906, "num_input_tokens_seen": 139740068, "step": 2229 }, { "epoch": 7.417637271214642, "loss": 0.5885028839111328, "loss_ce": 1.938191189765348e-06, "loss_iou": 0.1943359375, "loss_num": 0.0400390625, "loss_xval": 0.58984375, "num_input_tokens_seen": 139740068, "step": 2229 }, { "epoch": 7.4209650582362725, "grad_norm": 11.008500099182129, "learning_rate": 5e-06, "loss": 0.5164, "num_input_tokens_seen": 139803112, "step": 2230 }, { "epoch": 7.4209650582362725, "loss": 0.5260682702064514, "loss_ce": 6.267149728955701e-06, "loss_iou": 0.2099609375, "loss_num": 0.0213623046875, "loss_xval": 0.52734375, "num_input_tokens_seen": 139803112, "step": 2230 }, { "epoch": 7.424292845257903, "grad_norm": 9.735112190246582, "learning_rate": 5e-06, "loss": 0.8534, "num_input_tokens_seen": 139867240, "step": 2231 }, { "epoch": 7.424292845257903, "loss": 0.8774033188819885, "loss_ce": 0.0005112169310450554, "loss_iou": 0.302734375, "loss_num": 0.054443359375, "loss_xval": 0.875, "num_input_tokens_seen": 139867240, "step": 2231 }, { "epoch": 7.427620632279534, "grad_norm": 14.215788841247559, "learning_rate": 5e-06, "loss": 0.6226, "num_input_tokens_seen": 139929484, "step": 2232 }, { "epoch": 7.427620632279534, "loss": 0.5658280849456787, "loss_ce": 9.323460835730657e-05, "loss_iou": 0.224609375, "loss_num": 0.0230712890625, "loss_xval": 0.56640625, "num_input_tokens_seen": 139929484, "step": 2232 }, { "epoch": 7.430948419301164, "grad_norm": 21.460582733154297, "learning_rate": 5e-06, "loss": 0.5453, "num_input_tokens_seen": 139992020, "step": 2233 }, { "epoch": 7.430948419301164, "loss": 0.6169675588607788, "loss_ce": 0.0001462907821405679, "loss_iou": 0.19140625, "loss_num": 0.046630859375, "loss_xval": 0.6171875, "num_input_tokens_seen": 139992020, "step": 2233 }, { "epoch": 7.434276206322795, "grad_norm": 33.52012634277344, "learning_rate": 5e-06, "loss": 0.3928, "num_input_tokens_seen": 140052256, "step": 2234 }, { "epoch": 7.434276206322795, "loss": 0.5726792216300964, "loss_ce": 0.0002914984943345189, "loss_iou": 0.2236328125, "loss_num": 0.02490234375, "loss_xval": 0.57421875, "num_input_tokens_seen": 140052256, "step": 2234 }, { "epoch": 7.437603993344426, "grad_norm": 26.063772201538086, "learning_rate": 5e-06, "loss": 0.4534, "num_input_tokens_seen": 140114512, "step": 2235 }, { "epoch": 7.437603993344426, "loss": 0.4469048082828522, "loss_ce": 5.386709744925611e-06, "loss_iou": 0.1796875, "loss_num": 0.0177001953125, "loss_xval": 0.447265625, "num_input_tokens_seen": 140114512, "step": 2235 }, { "epoch": 7.440931780366056, "grad_norm": 11.77233600616455, "learning_rate": 5e-06, "loss": 0.7861, "num_input_tokens_seen": 140178400, "step": 2236 }, { "epoch": 7.440931780366056, "loss": 0.9004534482955933, "loss_ce": 6.283719267230481e-05, "loss_iou": 0.32421875, "loss_num": 0.050537109375, "loss_xval": 0.8984375, "num_input_tokens_seen": 140178400, "step": 2236 }, { "epoch": 7.444259567387687, "grad_norm": 19.813735961914062, "learning_rate": 5e-06, "loss": 0.564, "num_input_tokens_seen": 140240616, "step": 2237 }, { "epoch": 7.444259567387687, "loss": 0.6536905169487, "loss_ce": 0.0009805441368371248, "loss_iou": 0.216796875, "loss_num": 0.043701171875, "loss_xval": 0.65234375, "num_input_tokens_seen": 140240616, "step": 2237 }, { "epoch": 7.447587354409317, "grad_norm": 9.354145050048828, "learning_rate": 5e-06, "loss": 0.6029, "num_input_tokens_seen": 140304124, "step": 2238 }, { "epoch": 7.447587354409317, "loss": 0.6453248858451843, "loss_ce": 0.0001832567504607141, "loss_iou": 0.25390625, "loss_num": 0.027587890625, "loss_xval": 0.64453125, "num_input_tokens_seen": 140304124, "step": 2238 }, { "epoch": 7.450915141430948, "grad_norm": 13.90583324432373, "learning_rate": 5e-06, "loss": 0.5418, "num_input_tokens_seen": 140365780, "step": 2239 }, { "epoch": 7.450915141430948, "loss": 0.4816911220550537, "loss_ce": 1.6704357221897226e-06, "loss_iou": 0.1572265625, "loss_num": 0.033447265625, "loss_xval": 0.482421875, "num_input_tokens_seen": 140365780, "step": 2239 }, { "epoch": 7.454242928452579, "grad_norm": 11.93407154083252, "learning_rate": 5e-06, "loss": 0.4393, "num_input_tokens_seen": 140426728, "step": 2240 }, { "epoch": 7.454242928452579, "loss": 0.3569353520870209, "loss_ce": 1.783795710252889e-06, "loss_iou": 0.1171875, "loss_num": 0.0244140625, "loss_xval": 0.357421875, "num_input_tokens_seen": 140426728, "step": 2240 }, { "epoch": 7.457570715474209, "grad_norm": 11.00178337097168, "learning_rate": 5e-06, "loss": 0.6598, "num_input_tokens_seen": 140491416, "step": 2241 }, { "epoch": 7.457570715474209, "loss": 0.5615384578704834, "loss_ce": 1.5008969967311714e-05, "loss_iou": 0.20703125, "loss_num": 0.029541015625, "loss_xval": 0.5625, "num_input_tokens_seen": 140491416, "step": 2241 }, { "epoch": 7.46089850249584, "grad_norm": 9.441125869750977, "learning_rate": 5e-06, "loss": 0.6705, "num_input_tokens_seen": 140554596, "step": 2242 }, { "epoch": 7.46089850249584, "loss": 0.5933608412742615, "loss_ce": 9.91330889519304e-05, "loss_iou": 0.212890625, "loss_num": 0.033447265625, "loss_xval": 0.59375, "num_input_tokens_seen": 140554596, "step": 2242 }, { "epoch": 7.4642262895174705, "grad_norm": 101.01548767089844, "learning_rate": 5e-06, "loss": 0.5166, "num_input_tokens_seen": 140617556, "step": 2243 }, { "epoch": 7.4642262895174705, "loss": 0.5115799903869629, "loss_ce": 0.00041056566988117993, "loss_iou": 0.1806640625, "loss_num": 0.030029296875, "loss_xval": 0.51171875, "num_input_tokens_seen": 140617556, "step": 2243 }, { "epoch": 7.467554076539101, "grad_norm": 18.899377822875977, "learning_rate": 5e-06, "loss": 0.582, "num_input_tokens_seen": 140680292, "step": 2244 }, { "epoch": 7.467554076539101, "loss": 0.5535838603973389, "loss_ce": 0.00036123624886386096, "loss_iou": 0.205078125, "loss_num": 0.028564453125, "loss_xval": 0.5546875, "num_input_tokens_seen": 140680292, "step": 2244 }, { "epoch": 7.470881863560733, "grad_norm": 13.207154273986816, "learning_rate": 5e-06, "loss": 0.4969, "num_input_tokens_seen": 140741376, "step": 2245 }, { "epoch": 7.470881863560733, "loss": 0.3703635334968567, "loss_ce": 2.1828875560458982e-06, "loss_iou": 0.150390625, "loss_num": 0.013916015625, "loss_xval": 0.37109375, "num_input_tokens_seen": 140741376, "step": 2245 }, { "epoch": 7.474209650582363, "grad_norm": 15.315943717956543, "learning_rate": 5e-06, "loss": 0.6504, "num_input_tokens_seen": 140805776, "step": 2246 }, { "epoch": 7.474209650582363, "loss": 0.5884134769439697, "loss_ce": 9.563988714944571e-05, "loss_iou": 0.21875, "loss_num": 0.0301513671875, "loss_xval": 0.58984375, "num_input_tokens_seen": 140805776, "step": 2246 }, { "epoch": 7.477537437603994, "grad_norm": 5.673097133636475, "learning_rate": 5e-06, "loss": 0.3685, "num_input_tokens_seen": 140867756, "step": 2247 }, { "epoch": 7.477537437603994, "loss": 0.3981379568576813, "loss_ce": 0.0007990803569555283, "loss_iou": 0.11962890625, "loss_num": 0.031494140625, "loss_xval": 0.396484375, "num_input_tokens_seen": 140867756, "step": 2247 }, { "epoch": 7.480865224625624, "grad_norm": 7.4531121253967285, "learning_rate": 5e-06, "loss": 0.604, "num_input_tokens_seen": 140931224, "step": 2248 }, { "epoch": 7.480865224625624, "loss": 0.6574175357818604, "loss_ce": 7.892101166362409e-06, "loss_iou": 0.2265625, "loss_num": 0.041015625, "loss_xval": 0.65625, "num_input_tokens_seen": 140931224, "step": 2248 }, { "epoch": 7.484193011647255, "grad_norm": 16.258926391601562, "learning_rate": 5e-06, "loss": 0.5589, "num_input_tokens_seen": 140992168, "step": 2249 }, { "epoch": 7.484193011647255, "loss": 0.4230063557624817, "loss_ce": 0.00039895670488476753, "loss_iou": 0.08935546875, "loss_num": 0.048583984375, "loss_xval": 0.421875, "num_input_tokens_seen": 140992168, "step": 2249 }, { "epoch": 7.487520798668886, "grad_norm": 8.96210765838623, "learning_rate": 5e-06, "loss": 0.4833, "num_input_tokens_seen": 141053456, "step": 2250 }, { "epoch": 7.487520798668886, "eval_seeclick_CIoU": 0.04868214577436447, "eval_seeclick_GIoU": 0.051491254940629005, "eval_seeclick_IoU": 0.1618114709854126, "eval_seeclick_MAE_all": 0.16479428857564926, "eval_seeclick_MAE_h": 0.05561095289885998, "eval_seeclick_MAE_w": 0.12737343087792397, "eval_seeclick_MAE_x_boxes": 0.20155268162488937, "eval_seeclick_MAE_y_boxes": 0.17707742750644684, "eval_seeclick_NUM_probability": 0.9999283850193024, "eval_seeclick_inside_bbox": 0.24270834028720856, "eval_seeclick_loss": 2.858401298522949, "eval_seeclick_loss_ce": 0.14430225640535355, "eval_seeclick_loss_iou": 0.945556640625, "eval_seeclick_loss_num": 0.16275787353515625, "eval_seeclick_loss_xval": 2.705078125, "eval_seeclick_runtime": 62.4681, "eval_seeclick_samples_per_second": 0.752, "eval_seeclick_steps_per_second": 0.032, "num_input_tokens_seen": 141053456, "step": 2250 }, { "epoch": 7.487520798668886, "eval_icons_CIoU": -0.026562778279185295, "eval_icons_GIoU": 0.06010494381189346, "eval_icons_IoU": 0.138919860124588, "eval_icons_MAE_all": 0.1639707162976265, "eval_icons_MAE_h": 0.12445582449436188, "eval_icons_MAE_w": 0.1814054735004902, "eval_icons_MAE_x_boxes": 0.12949233502149582, "eval_icons_MAE_y_boxes": 0.061991749331355095, "eval_icons_NUM_probability": 0.9999834895133972, "eval_icons_inside_bbox": 0.3038194477558136, "eval_icons_loss": 2.7037525177001953, "eval_icons_loss_ce": 2.5522297164570773e-06, "eval_icons_loss_iou": 0.9501953125, "eval_icons_loss_num": 0.167999267578125, "eval_icons_loss_xval": 2.73974609375, "eval_icons_runtime": 64.3887, "eval_icons_samples_per_second": 0.777, "eval_icons_steps_per_second": 0.031, "num_input_tokens_seen": 141053456, "step": 2250 }, { "epoch": 7.487520798668886, "eval_screenspot_CIoU": 0.1789665644367536, "eval_screenspot_GIoU": 0.2078845351934433, "eval_screenspot_IoU": 0.29446254173914593, "eval_screenspot_MAE_all": 0.12250990668932597, "eval_screenspot_MAE_h": 0.06907364477713902, "eval_screenspot_MAE_w": 0.11186742410063744, "eval_screenspot_MAE_x_boxes": 0.15160935620466867, "eval_screenspot_MAE_y_boxes": 0.0891392504175504, "eval_screenspot_NUM_probability": 0.9999844233194987, "eval_screenspot_inside_bbox": 0.4870833357175191, "eval_screenspot_loss": 2.2591538429260254, "eval_screenspot_loss_ce": 2.3037793956367143e-05, "eval_screenspot_loss_iou": 0.8191731770833334, "eval_screenspot_loss_num": 0.13759867350260416, "eval_screenspot_loss_xval": 2.3264973958333335, "eval_screenspot_runtime": 118.5019, "eval_screenspot_samples_per_second": 0.751, "eval_screenspot_steps_per_second": 0.025, "num_input_tokens_seen": 141053456, "step": 2250 }, { "epoch": 7.487520798668886, "eval_compot_CIoU": 0.010449499706737697, "eval_compot_GIoU": 0.06850353255867958, "eval_compot_IoU": 0.1718440130352974, "eval_compot_MAE_all": 0.18534545600414276, "eval_compot_MAE_h": 0.08798845484852791, "eval_compot_MAE_w": 0.20128058642148972, "eval_compot_MAE_x_boxes": 0.15803614631295204, "eval_compot_MAE_y_boxes": 0.1437467709183693, "eval_compot_NUM_probability": 0.9999820590019226, "eval_compot_inside_bbox": 0.3541666716337204, "eval_compot_loss": 2.8078675270080566, "eval_compot_loss_ce": 0.002573121862951666, "eval_compot_loss_iou": 0.944091796875, "eval_compot_loss_num": 0.204193115234375, "eval_compot_loss_xval": 2.90869140625, "eval_compot_runtime": 68.1916, "eval_compot_samples_per_second": 0.733, "eval_compot_steps_per_second": 0.029, "num_input_tokens_seen": 141053456, "step": 2250 }, { "epoch": 7.487520798668886, "eval_custom_ui_MAE_all": 0.07385894656181335, "eval_custom_ui_MAE_x": 0.07941742613911629, "eval_custom_ui_MAE_y": 0.06830045953392982, "eval_custom_ui_NUM_probability": 0.9999962449073792, "eval_custom_ui_loss": 0.33961018919944763, "eval_custom_ui_loss_ce": 7.585402386212081e-07, "eval_custom_ui_loss_num": 0.068603515625, "eval_custom_ui_loss_xval": 0.3427734375, "eval_custom_ui_runtime": 52.9265, "eval_custom_ui_samples_per_second": 0.945, "eval_custom_ui_steps_per_second": 0.038, "num_input_tokens_seen": 141053456, "step": 2250 }, { "epoch": 7.487520798668886, "loss": 0.36547935009002686, "loss_ce": 8.513787861375022e-07, "loss_iou": 0.0, "loss_num": 0.0732421875, "loss_xval": 0.365234375, "num_input_tokens_seen": 141053456, "step": 2250 }, { "epoch": 7.490848585690516, "grad_norm": 28.496105194091797, "learning_rate": 5e-06, "loss": 0.658, "num_input_tokens_seen": 141115928, "step": 2251 }, { "epoch": 7.490848585690516, "loss": 0.5133959650993347, "loss_ce": 0.0007006514351814985, "loss_iou": 0.158203125, "loss_num": 0.03955078125, "loss_xval": 0.51171875, "num_input_tokens_seen": 141115928, "step": 2251 }, { "epoch": 7.494176372712147, "grad_norm": 18.858518600463867, "learning_rate": 5e-06, "loss": 0.6112, "num_input_tokens_seen": 141178272, "step": 2252 }, { "epoch": 7.494176372712147, "loss": 0.7322477102279663, "loss_ce": 6.996921729296446e-05, "loss_iou": 0.27734375, "loss_num": 0.035888671875, "loss_xval": 0.73046875, "num_input_tokens_seen": 141178272, "step": 2252 }, { "epoch": 7.4975041597337775, "grad_norm": 17.32355499267578, "learning_rate": 5e-06, "loss": 0.6079, "num_input_tokens_seen": 141242128, "step": 2253 }, { "epoch": 7.4975041597337775, "loss": 0.652695894241333, "loss_ce": 0.00023008455173112452, "loss_iou": 0.259765625, "loss_num": 0.0269775390625, "loss_xval": 0.65234375, "num_input_tokens_seen": 141242128, "step": 2253 }, { "epoch": 7.500831946755408, "grad_norm": 25.166427612304688, "learning_rate": 5e-06, "loss": 0.7791, "num_input_tokens_seen": 141306648, "step": 2254 }, { "epoch": 7.500831946755408, "loss": 0.6692072749137878, "loss_ce": 0.0005060855764895678, "loss_iou": 0.271484375, "loss_num": 0.02490234375, "loss_xval": 0.66796875, "num_input_tokens_seen": 141306648, "step": 2254 }, { "epoch": 7.504159733777039, "grad_norm": 23.0955753326416, "learning_rate": 5e-06, "loss": 0.6876, "num_input_tokens_seen": 141369472, "step": 2255 }, { "epoch": 7.504159733777039, "loss": 0.8727232813835144, "loss_ce": 0.000652988557703793, "loss_iou": 0.30078125, "loss_num": 0.05419921875, "loss_xval": 0.87109375, "num_input_tokens_seen": 141369472, "step": 2255 }, { "epoch": 7.507487520798669, "grad_norm": 10.286460876464844, "learning_rate": 5e-06, "loss": 0.4259, "num_input_tokens_seen": 141432016, "step": 2256 }, { "epoch": 7.507487520798669, "loss": 0.40814074873924255, "loss_ce": 9.021385631058365e-05, "loss_iou": 0.154296875, "loss_num": 0.019775390625, "loss_xval": 0.408203125, "num_input_tokens_seen": 141432016, "step": 2256 }, { "epoch": 7.5108153078203, "grad_norm": 12.377793312072754, "learning_rate": 5e-06, "loss": 0.6123, "num_input_tokens_seen": 141494792, "step": 2257 }, { "epoch": 7.5108153078203, "loss": 0.7302893400192261, "loss_ce": 0.0001257470721611753, "loss_iou": 0.275390625, "loss_num": 0.0361328125, "loss_xval": 0.73046875, "num_input_tokens_seen": 141494792, "step": 2257 }, { "epoch": 7.5141430948419305, "grad_norm": 5.3923540115356445, "learning_rate": 5e-06, "loss": 0.5446, "num_input_tokens_seen": 141557004, "step": 2258 }, { "epoch": 7.5141430948419305, "loss": 0.37164705991744995, "loss_ce": 4.0002778405323625e-06, "loss_iou": 0.10986328125, "loss_num": 0.0303955078125, "loss_xval": 0.37109375, "num_input_tokens_seen": 141557004, "step": 2258 }, { "epoch": 7.517470881863561, "grad_norm": 14.250569343566895, "learning_rate": 5e-06, "loss": 0.5696, "num_input_tokens_seen": 141620716, "step": 2259 }, { "epoch": 7.517470881863561, "loss": 0.6698814630508423, "loss_ce": 0.0011802929220721126, "loss_iou": 0.23046875, "loss_num": 0.04150390625, "loss_xval": 0.66796875, "num_input_tokens_seen": 141620716, "step": 2259 }, { "epoch": 7.520798668885192, "grad_norm": 12.357198715209961, "learning_rate": 5e-06, "loss": 0.6668, "num_input_tokens_seen": 141684548, "step": 2260 }, { "epoch": 7.520798668885192, "loss": 0.7958953380584717, "loss_ce": 0.00024107249919325113, "loss_iou": 0.291015625, "loss_num": 0.04248046875, "loss_xval": 0.796875, "num_input_tokens_seen": 141684548, "step": 2260 }, { "epoch": 7.524126455906822, "grad_norm": 14.868229866027832, "learning_rate": 5e-06, "loss": 0.4741, "num_input_tokens_seen": 141747828, "step": 2261 }, { "epoch": 7.524126455906822, "loss": 0.5542324185371399, "loss_ce": 0.0005214751581661403, "loss_iou": 0.2109375, "loss_num": 0.0264892578125, "loss_xval": 0.5546875, "num_input_tokens_seen": 141747828, "step": 2261 }, { "epoch": 7.527454242928453, "grad_norm": 14.062470436096191, "learning_rate": 5e-06, "loss": 0.4715, "num_input_tokens_seen": 141811020, "step": 2262 }, { "epoch": 7.527454242928453, "loss": 0.3584764003753662, "loss_ce": 0.0008103977306745946, "loss_iou": 0.1142578125, "loss_num": 0.02587890625, "loss_xval": 0.357421875, "num_input_tokens_seen": 141811020, "step": 2262 }, { "epoch": 7.530782029950084, "grad_norm": 9.819461822509766, "learning_rate": 5e-06, "loss": 0.3989, "num_input_tokens_seen": 141874964, "step": 2263 }, { "epoch": 7.530782029950084, "loss": 0.3567330241203308, "loss_ce": 4.359758167993277e-05, "loss_iou": 0.142578125, "loss_num": 0.01434326171875, "loss_xval": 0.357421875, "num_input_tokens_seen": 141874964, "step": 2263 }, { "epoch": 7.534109816971714, "grad_norm": 22.8493595123291, "learning_rate": 5e-06, "loss": 0.6885, "num_input_tokens_seen": 141937512, "step": 2264 }, { "epoch": 7.534109816971714, "loss": 0.7072858810424805, "loss_ce": 1.051372782967519e-05, "loss_iou": 0.265625, "loss_num": 0.034912109375, "loss_xval": 0.70703125, "num_input_tokens_seen": 141937512, "step": 2264 }, { "epoch": 7.537437603993345, "grad_norm": 29.124679565429688, "learning_rate": 5e-06, "loss": 0.5657, "num_input_tokens_seen": 142000732, "step": 2265 }, { "epoch": 7.537437603993345, "loss": 0.536865234375, "loss_ce": 0.0003662196977529675, "loss_iou": 0.162109375, "loss_num": 0.042236328125, "loss_xval": 0.53515625, "num_input_tokens_seen": 142000732, "step": 2265 }, { "epoch": 7.5407653910149754, "grad_norm": 21.29768943786621, "learning_rate": 5e-06, "loss": 0.5597, "num_input_tokens_seen": 142062212, "step": 2266 }, { "epoch": 7.5407653910149754, "loss": 0.6205000877380371, "loss_ce": 0.0011153186205774546, "loss_iou": 0.224609375, "loss_num": 0.033935546875, "loss_xval": 0.62109375, "num_input_tokens_seen": 142062212, "step": 2266 }, { "epoch": 7.544093178036606, "grad_norm": 19.763662338256836, "learning_rate": 5e-06, "loss": 0.5987, "num_input_tokens_seen": 142124788, "step": 2267 }, { "epoch": 7.544093178036606, "loss": 0.6311676502227783, "loss_ce": 3.101161382801365e-06, "loss_iou": 0.2275390625, "loss_num": 0.035400390625, "loss_xval": 0.6328125, "num_input_tokens_seen": 142124788, "step": 2267 }, { "epoch": 7.547420965058237, "grad_norm": 21.890323638916016, "learning_rate": 5e-06, "loss": 0.6335, "num_input_tokens_seen": 142188824, "step": 2268 }, { "epoch": 7.547420965058237, "loss": 0.6932433843612671, "loss_ce": 6.039998879714403e-06, "loss_iou": 0.248046875, "loss_num": 0.03955078125, "loss_xval": 0.69140625, "num_input_tokens_seen": 142188824, "step": 2268 }, { "epoch": 7.550748752079867, "grad_norm": 21.54544448852539, "learning_rate": 5e-06, "loss": 0.4933, "num_input_tokens_seen": 142250564, "step": 2269 }, { "epoch": 7.550748752079867, "loss": 0.4507805407047272, "loss_ce": 5.38805852556834e-06, "loss_iou": 0.1884765625, "loss_num": 0.01470947265625, "loss_xval": 0.451171875, "num_input_tokens_seen": 142250564, "step": 2269 }, { "epoch": 7.554076539101498, "grad_norm": 10.234813690185547, "learning_rate": 5e-06, "loss": 0.6407, "num_input_tokens_seen": 142313060, "step": 2270 }, { "epoch": 7.554076539101498, "loss": 0.4982016980648041, "loss_ce": 2.217929250036832e-06, "loss_iou": 0.2119140625, "loss_num": 0.0146484375, "loss_xval": 0.498046875, "num_input_tokens_seen": 142313060, "step": 2270 }, { "epoch": 7.5574043261231285, "grad_norm": 11.792405128479004, "learning_rate": 5e-06, "loss": 0.5364, "num_input_tokens_seen": 142376540, "step": 2271 }, { "epoch": 7.5574043261231285, "loss": 0.4926389157772064, "loss_ce": 0.0005734919686801732, "loss_iou": 0.1796875, "loss_num": 0.0263671875, "loss_xval": 0.4921875, "num_input_tokens_seen": 142376540, "step": 2271 }, { "epoch": 7.560732113144759, "grad_norm": 109.84727478027344, "learning_rate": 5e-06, "loss": 0.6426, "num_input_tokens_seen": 142439640, "step": 2272 }, { "epoch": 7.560732113144759, "loss": 0.8490808010101318, "loss_ce": 0.0005700996844097972, "loss_iou": 0.31640625, "loss_num": 0.04296875, "loss_xval": 0.84765625, "num_input_tokens_seen": 142439640, "step": 2272 }, { "epoch": 7.56405990016639, "grad_norm": 22.729698181152344, "learning_rate": 5e-06, "loss": 0.4732, "num_input_tokens_seen": 142501372, "step": 2273 }, { "epoch": 7.56405990016639, "loss": 0.4337175488471985, "loss_ce": 1.7566994756634813e-06, "loss_iou": 0.138671875, "loss_num": 0.031494140625, "loss_xval": 0.43359375, "num_input_tokens_seen": 142501372, "step": 2273 }, { "epoch": 7.56738768718802, "grad_norm": 34.67817306518555, "learning_rate": 5e-06, "loss": 0.75, "num_input_tokens_seen": 142565392, "step": 2274 }, { "epoch": 7.56738768718802, "loss": 0.7129374146461487, "loss_ce": 0.0015116410795599222, "loss_iou": 0.2392578125, "loss_num": 0.04638671875, "loss_xval": 0.7109375, "num_input_tokens_seen": 142565392, "step": 2274 }, { "epoch": 7.570715474209651, "grad_norm": 15.34155559539795, "learning_rate": 5e-06, "loss": 0.2921, "num_input_tokens_seen": 142625392, "step": 2275 }, { "epoch": 7.570715474209651, "loss": 0.3977106213569641, "loss_ce": 5.54428470422863e-06, "loss_iou": 0.126953125, "loss_num": 0.02880859375, "loss_xval": 0.3984375, "num_input_tokens_seen": 142625392, "step": 2275 }, { "epoch": 7.574043261231282, "grad_norm": 14.919290542602539, "learning_rate": 5e-06, "loss": 0.6842, "num_input_tokens_seen": 142688852, "step": 2276 }, { "epoch": 7.574043261231282, "loss": 0.6416488885879517, "loss_ce": 0.0010239144321531057, "loss_iou": 0.2255859375, "loss_num": 0.037841796875, "loss_xval": 0.640625, "num_input_tokens_seen": 142688852, "step": 2276 }, { "epoch": 7.577371048252912, "grad_norm": 20.433574676513672, "learning_rate": 5e-06, "loss": 0.5465, "num_input_tokens_seen": 142752416, "step": 2277 }, { "epoch": 7.577371048252912, "loss": 0.42586106061935425, "loss_ce": 3.54246367351152e-06, "loss_iou": 0.14453125, "loss_num": 0.0274658203125, "loss_xval": 0.42578125, "num_input_tokens_seen": 142752416, "step": 2277 }, { "epoch": 7.580698835274543, "grad_norm": 20.729290008544922, "learning_rate": 5e-06, "loss": 0.7225, "num_input_tokens_seen": 142817092, "step": 2278 }, { "epoch": 7.580698835274543, "loss": 0.713057816028595, "loss_ce": 0.00016715926176402718, "loss_iou": 0.294921875, "loss_num": 0.02490234375, "loss_xval": 0.7109375, "num_input_tokens_seen": 142817092, "step": 2278 }, { "epoch": 7.584026622296173, "grad_norm": 24.117467880249023, "learning_rate": 5e-06, "loss": 0.4804, "num_input_tokens_seen": 142878608, "step": 2279 }, { "epoch": 7.584026622296173, "loss": 0.3950633406639099, "loss_ce": 1.328250982624013e-05, "loss_iou": 0.16796875, "loss_num": 0.01165771484375, "loss_xval": 0.39453125, "num_input_tokens_seen": 142878608, "step": 2279 }, { "epoch": 7.587354409317804, "grad_norm": 23.54152488708496, "learning_rate": 5e-06, "loss": 0.5926, "num_input_tokens_seen": 142942172, "step": 2280 }, { "epoch": 7.587354409317804, "loss": 0.6235460042953491, "loss_ce": 1.0880641639232635e-05, "loss_iou": 0.25, "loss_num": 0.0247802734375, "loss_xval": 0.625, "num_input_tokens_seen": 142942172, "step": 2280 }, { "epoch": 7.590682196339435, "grad_norm": 10.788458824157715, "learning_rate": 5e-06, "loss": 0.468, "num_input_tokens_seen": 143005172, "step": 2281 }, { "epoch": 7.590682196339435, "loss": 0.5332049131393433, "loss_ce": 1.7618580159250996e-06, "loss_iou": 0.18359375, "loss_num": 0.033447265625, "loss_xval": 0.53125, "num_input_tokens_seen": 143005172, "step": 2281 }, { "epoch": 7.594009983361065, "grad_norm": 20.500139236450195, "learning_rate": 5e-06, "loss": 0.8531, "num_input_tokens_seen": 143068628, "step": 2282 }, { "epoch": 7.594009983361065, "loss": 0.7287222146987915, "loss_ce": 0.0010000152979046106, "loss_iou": 0.24609375, "loss_num": 0.046875, "loss_xval": 0.7265625, "num_input_tokens_seen": 143068628, "step": 2282 }, { "epoch": 7.597337770382696, "grad_norm": 17.355417251586914, "learning_rate": 5e-06, "loss": 0.5014, "num_input_tokens_seen": 143132104, "step": 2283 }, { "epoch": 7.597337770382696, "loss": 0.4443753957748413, "loss_ce": 0.0002530909259803593, "loss_iou": 0.1474609375, "loss_num": 0.0296630859375, "loss_xval": 0.443359375, "num_input_tokens_seen": 143132104, "step": 2283 }, { "epoch": 7.6006655574043265, "grad_norm": 8.896656036376953, "learning_rate": 5e-06, "loss": 0.5089, "num_input_tokens_seen": 143193592, "step": 2284 }, { "epoch": 7.6006655574043265, "loss": 0.5104822516441345, "loss_ce": 4.526367047219537e-05, "loss_iou": 0.1845703125, "loss_num": 0.028076171875, "loss_xval": 0.51171875, "num_input_tokens_seen": 143193592, "step": 2284 }, { "epoch": 7.603993344425957, "grad_norm": 11.21259593963623, "learning_rate": 5e-06, "loss": 0.6531, "num_input_tokens_seen": 143255012, "step": 2285 }, { "epoch": 7.603993344425957, "loss": 0.616536557674408, "loss_ce": 0.00026458536740392447, "loss_iou": 0.234375, "loss_num": 0.029541015625, "loss_xval": 0.6171875, "num_input_tokens_seen": 143255012, "step": 2285 }, { "epoch": 7.607321131447588, "grad_norm": 15.692087173461914, "learning_rate": 5e-06, "loss": 0.6683, "num_input_tokens_seen": 143318616, "step": 2286 }, { "epoch": 7.607321131447588, "loss": 0.4470236599445343, "loss_ce": 2.1694991119147744e-06, "loss_iou": 0.1591796875, "loss_num": 0.02587890625, "loss_xval": 0.447265625, "num_input_tokens_seen": 143318616, "step": 2286 }, { "epoch": 7.610648918469218, "grad_norm": 7.317299842834473, "learning_rate": 5e-06, "loss": 0.3521, "num_input_tokens_seen": 143381020, "step": 2287 }, { "epoch": 7.610648918469218, "loss": 0.416263610124588, "loss_ce": 3.87332693208009e-06, "loss_iou": 0.1259765625, "loss_num": 0.03271484375, "loss_xval": 0.416015625, "num_input_tokens_seen": 143381020, "step": 2287 }, { "epoch": 7.613976705490849, "grad_norm": 12.07167911529541, "learning_rate": 5e-06, "loss": 0.5995, "num_input_tokens_seen": 143443344, "step": 2288 }, { "epoch": 7.613976705490849, "loss": 0.45160427689552307, "loss_ce": 6.618063343921676e-05, "loss_iou": 0.146484375, "loss_num": 0.03173828125, "loss_xval": 0.451171875, "num_input_tokens_seen": 143443344, "step": 2288 }, { "epoch": 7.6173044925124795, "grad_norm": 11.88379955291748, "learning_rate": 5e-06, "loss": 0.4995, "num_input_tokens_seen": 143506224, "step": 2289 }, { "epoch": 7.6173044925124795, "loss": 0.4618697166442871, "loss_ce": 3.195551107637584e-05, "loss_iou": 0.1494140625, "loss_num": 0.032470703125, "loss_xval": 0.4609375, "num_input_tokens_seen": 143506224, "step": 2289 }, { "epoch": 7.62063227953411, "grad_norm": 68.98832702636719, "learning_rate": 5e-06, "loss": 0.5626, "num_input_tokens_seen": 143568140, "step": 2290 }, { "epoch": 7.62063227953411, "loss": 0.5182206630706787, "loss_ce": 0.0005204376066103578, "loss_iou": 0.17578125, "loss_num": 0.033447265625, "loss_xval": 0.51953125, "num_input_tokens_seen": 143568140, "step": 2290 }, { "epoch": 7.623960066555741, "grad_norm": 20.953100204467773, "learning_rate": 5e-06, "loss": 0.8377, "num_input_tokens_seen": 143631744, "step": 2291 }, { "epoch": 7.623960066555741, "loss": 0.7692372798919678, "loss_ce": 7.221214036690071e-05, "loss_iou": 0.296875, "loss_num": 0.03515625, "loss_xval": 0.76953125, "num_input_tokens_seen": 143631744, "step": 2291 }, { "epoch": 7.627287853577371, "grad_norm": 31.06548500061035, "learning_rate": 5e-06, "loss": 0.5776, "num_input_tokens_seen": 143694432, "step": 2292 }, { "epoch": 7.627287853577371, "loss": 0.6338036060333252, "loss_ce": 0.00013659281830769032, "loss_iou": 0.2021484375, "loss_num": 0.046142578125, "loss_xval": 0.6328125, "num_input_tokens_seen": 143694432, "step": 2292 }, { "epoch": 7.630615640599002, "grad_norm": 25.51969337463379, "learning_rate": 5e-06, "loss": 0.4778, "num_input_tokens_seen": 143756732, "step": 2293 }, { "epoch": 7.630615640599002, "loss": 0.4081454873085022, "loss_ce": 3.4113231777155306e-06, "loss_iou": 0.17578125, "loss_num": 0.01153564453125, "loss_xval": 0.408203125, "num_input_tokens_seen": 143756732, "step": 2293 }, { "epoch": 7.633943427620633, "grad_norm": 23.810195922851562, "learning_rate": 5e-06, "loss": 0.6558, "num_input_tokens_seen": 143819380, "step": 2294 }, { "epoch": 7.633943427620633, "loss": 0.5564870834350586, "loss_ce": 0.0003347487945575267, "loss_iou": 0.2109375, "loss_num": 0.026611328125, "loss_xval": 0.5546875, "num_input_tokens_seen": 143819380, "step": 2294 }, { "epoch": 7.637271214642263, "grad_norm": 16.409740447998047, "learning_rate": 5e-06, "loss": 0.5978, "num_input_tokens_seen": 143881780, "step": 2295 }, { "epoch": 7.637271214642263, "loss": 0.8087245225906372, "loss_ce": 8.700639227754436e-06, "loss_iou": 0.283203125, "loss_num": 0.048583984375, "loss_xval": 0.80859375, "num_input_tokens_seen": 143881780, "step": 2295 }, { "epoch": 7.640599001663894, "grad_norm": 6.4450764656066895, "learning_rate": 5e-06, "loss": 0.5419, "num_input_tokens_seen": 143945304, "step": 2296 }, { "epoch": 7.640599001663894, "loss": 0.593813419342041, "loss_ce": 2.359945028729271e-06, "loss_iou": 0.2001953125, "loss_num": 0.038818359375, "loss_xval": 0.59375, "num_input_tokens_seen": 143945304, "step": 2296 }, { "epoch": 7.643926788685524, "grad_norm": 12.708121299743652, "learning_rate": 5e-06, "loss": 0.4997, "num_input_tokens_seen": 144007752, "step": 2297 }, { "epoch": 7.643926788685524, "loss": 0.5552474856376648, "loss_ce": 0.00043791189091280103, "loss_iou": 0.1962890625, "loss_num": 0.03271484375, "loss_xval": 0.5546875, "num_input_tokens_seen": 144007752, "step": 2297 }, { "epoch": 7.647254575707155, "grad_norm": 12.28138542175293, "learning_rate": 5e-06, "loss": 0.4717, "num_input_tokens_seen": 144070008, "step": 2298 }, { "epoch": 7.647254575707155, "loss": 0.4582750201225281, "loss_ce": 0.00014512574125546962, "loss_iou": 0.173828125, "loss_num": 0.022216796875, "loss_xval": 0.458984375, "num_input_tokens_seen": 144070008, "step": 2298 }, { "epoch": 7.650582362728786, "grad_norm": 15.309338569641113, "learning_rate": 5e-06, "loss": 0.5539, "num_input_tokens_seen": 144133332, "step": 2299 }, { "epoch": 7.650582362728786, "loss": 0.49819236993789673, "loss_ce": 0.00014551982167176902, "loss_iou": 0.16015625, "loss_num": 0.03564453125, "loss_xval": 0.498046875, "num_input_tokens_seen": 144133332, "step": 2299 }, { "epoch": 7.653910149750416, "grad_norm": 27.782732009887695, "learning_rate": 5e-06, "loss": 0.4947, "num_input_tokens_seen": 144195192, "step": 2300 }, { "epoch": 7.653910149750416, "loss": 0.43646007776260376, "loss_ce": 5.8725199778564274e-05, "loss_iou": 0.1630859375, "loss_num": 0.0220947265625, "loss_xval": 0.435546875, "num_input_tokens_seen": 144195192, "step": 2300 }, { "epoch": 7.657237936772047, "grad_norm": 24.944934844970703, "learning_rate": 5e-06, "loss": 0.7025, "num_input_tokens_seen": 144258712, "step": 2301 }, { "epoch": 7.657237936772047, "loss": 0.4864397644996643, "loss_ce": 0.00023369600239675492, "loss_iou": 0.1875, "loss_num": 0.0223388671875, "loss_xval": 0.486328125, "num_input_tokens_seen": 144258712, "step": 2301 }, { "epoch": 7.6605657237936775, "grad_norm": 16.83536148071289, "learning_rate": 5e-06, "loss": 0.59, "num_input_tokens_seen": 144321896, "step": 2302 }, { "epoch": 7.6605657237936775, "loss": 0.5722736120223999, "loss_ce": 8.019840606721118e-06, "loss_iou": 0.216796875, "loss_num": 0.02783203125, "loss_xval": 0.5703125, "num_input_tokens_seen": 144321896, "step": 2302 }, { "epoch": 7.663893510815308, "grad_norm": 9.036219596862793, "learning_rate": 5e-06, "loss": 0.4311, "num_input_tokens_seen": 144383300, "step": 2303 }, { "epoch": 7.663893510815308, "loss": 0.5116596221923828, "loss_ce": 1.915680059028091e-06, "loss_iou": 0.1767578125, "loss_num": 0.031494140625, "loss_xval": 0.51171875, "num_input_tokens_seen": 144383300, "step": 2303 }, { "epoch": 7.667221297836939, "grad_norm": 19.19466781616211, "learning_rate": 5e-06, "loss": 0.6339, "num_input_tokens_seen": 144446176, "step": 2304 }, { "epoch": 7.667221297836939, "loss": 0.584559977054596, "loss_ce": 0.003017015289515257, "loss_iou": 0.1953125, "loss_num": 0.0380859375, "loss_xval": 0.58203125, "num_input_tokens_seen": 144446176, "step": 2304 }, { "epoch": 7.670549084858569, "grad_norm": 15.914405822753906, "learning_rate": 5e-06, "loss": 0.4653, "num_input_tokens_seen": 144508736, "step": 2305 }, { "epoch": 7.670549084858569, "loss": 0.5055596828460693, "loss_ce": 6.651376315858215e-05, "loss_iou": 0.162109375, "loss_num": 0.0361328125, "loss_xval": 0.50390625, "num_input_tokens_seen": 144508736, "step": 2305 }, { "epoch": 7.6738768718802, "grad_norm": 19.158828735351562, "learning_rate": 5e-06, "loss": 0.7279, "num_input_tokens_seen": 144572204, "step": 2306 }, { "epoch": 7.6738768718802, "loss": 0.9824299812316895, "loss_ce": 8.114659067359753e-06, "loss_iou": 0.3515625, "loss_num": 0.05615234375, "loss_xval": 0.984375, "num_input_tokens_seen": 144572204, "step": 2306 }, { "epoch": 7.677204658901831, "grad_norm": 13.835026741027832, "learning_rate": 5e-06, "loss": 0.4144, "num_input_tokens_seen": 144634064, "step": 2307 }, { "epoch": 7.677204658901831, "loss": 0.37560293078422546, "loss_ce": 5.3617244702763855e-05, "loss_iou": 0.1220703125, "loss_num": 0.026123046875, "loss_xval": 0.375, "num_input_tokens_seen": 144634064, "step": 2307 }, { "epoch": 7.680532445923461, "grad_norm": 14.74710750579834, "learning_rate": 5e-06, "loss": 0.7669, "num_input_tokens_seen": 144697512, "step": 2308 }, { "epoch": 7.680532445923461, "loss": 0.990049421787262, "loss_ce": 0.0003032838285434991, "loss_iou": 0.38671875, "loss_num": 0.04345703125, "loss_xval": 0.98828125, "num_input_tokens_seen": 144697512, "step": 2308 }, { "epoch": 7.683860232945092, "grad_norm": 9.0419282913208, "learning_rate": 5e-06, "loss": 0.6051, "num_input_tokens_seen": 144760664, "step": 2309 }, { "epoch": 7.683860232945092, "loss": 0.5725328922271729, "loss_ce": 8.417123171966523e-05, "loss_iou": 0.19921875, "loss_num": 0.034912109375, "loss_xval": 0.57421875, "num_input_tokens_seen": 144760664, "step": 2309 }, { "epoch": 7.687188019966722, "grad_norm": 21.533023834228516, "learning_rate": 5e-06, "loss": 0.67, "num_input_tokens_seen": 144824796, "step": 2310 }, { "epoch": 7.687188019966722, "loss": 0.45595037937164307, "loss_ce": 0.00038394785951822996, "loss_iou": 0.173828125, "loss_num": 0.021728515625, "loss_xval": 0.455078125, "num_input_tokens_seen": 144824796, "step": 2310 }, { "epoch": 7.690515806988353, "grad_norm": 26.46889877319336, "learning_rate": 5e-06, "loss": 0.6291, "num_input_tokens_seen": 144887604, "step": 2311 }, { "epoch": 7.690515806988353, "loss": 0.5724204778671265, "loss_ce": 3.275485505582765e-05, "loss_iou": 0.177734375, "loss_num": 0.043212890625, "loss_xval": 0.57421875, "num_input_tokens_seen": 144887604, "step": 2311 }, { "epoch": 7.693843594009984, "grad_norm": 13.528558731079102, "learning_rate": 5e-06, "loss": 0.5453, "num_input_tokens_seen": 144950576, "step": 2312 }, { "epoch": 7.693843594009984, "loss": 0.5879080295562744, "loss_ce": 0.00020050497550982982, "loss_iou": 0.2294921875, "loss_num": 0.02587890625, "loss_xval": 0.5859375, "num_input_tokens_seen": 144950576, "step": 2312 }, { "epoch": 7.697171381031614, "grad_norm": 7.634233474731445, "learning_rate": 5e-06, "loss": 0.3488, "num_input_tokens_seen": 145013020, "step": 2313 }, { "epoch": 7.697171381031614, "loss": 0.35935062170028687, "loss_ce": 0.0003418382548261434, "loss_iou": 0.09716796875, "loss_num": 0.03271484375, "loss_xval": 0.359375, "num_input_tokens_seen": 145013020, "step": 2313 }, { "epoch": 7.700499168053245, "grad_norm": 17.464176177978516, "learning_rate": 5e-06, "loss": 0.5532, "num_input_tokens_seen": 145076552, "step": 2314 }, { "epoch": 7.700499168053245, "loss": 0.5963326692581177, "loss_ce": 0.0009041736484505236, "loss_iou": 0.244140625, "loss_num": 0.0213623046875, "loss_xval": 0.59375, "num_input_tokens_seen": 145076552, "step": 2314 }, { "epoch": 7.7038269550748755, "grad_norm": 15.60339641571045, "learning_rate": 5e-06, "loss": 0.4201, "num_input_tokens_seen": 145138404, "step": 2315 }, { "epoch": 7.7038269550748755, "loss": 0.3221888244152069, "loss_ce": 0.0013422694755718112, "loss_iou": 0.034912109375, "loss_num": 0.050048828125, "loss_xval": 0.3203125, "num_input_tokens_seen": 145138404, "step": 2315 }, { "epoch": 7.707154742096506, "grad_norm": 9.446913719177246, "learning_rate": 5e-06, "loss": 0.5933, "num_input_tokens_seen": 145201580, "step": 2316 }, { "epoch": 7.707154742096506, "loss": 0.6647981405258179, "loss_ce": 3.2738314530433854e-06, "loss_iou": 0.259765625, "loss_num": 0.02880859375, "loss_xval": 0.6640625, "num_input_tokens_seen": 145201580, "step": 2316 }, { "epoch": 7.710482529118137, "grad_norm": 12.91634750366211, "learning_rate": 5e-06, "loss": 0.6812, "num_input_tokens_seen": 145264296, "step": 2317 }, { "epoch": 7.710482529118137, "loss": 0.651868462562561, "loss_ce": 1.3039256373303942e-05, "loss_iou": 0.19921875, "loss_num": 0.05078125, "loss_xval": 0.65234375, "num_input_tokens_seen": 145264296, "step": 2317 }, { "epoch": 7.713810316139767, "grad_norm": 19.873207092285156, "learning_rate": 5e-06, "loss": 0.6908, "num_input_tokens_seen": 145327972, "step": 2318 }, { "epoch": 7.713810316139767, "loss": 0.7034003138542175, "loss_ce": 0.0007635843940079212, "loss_iou": 0.283203125, "loss_num": 0.0272216796875, "loss_xval": 0.703125, "num_input_tokens_seen": 145327972, "step": 2318 }, { "epoch": 7.717138103161398, "grad_norm": 9.83537769317627, "learning_rate": 5e-06, "loss": 0.5024, "num_input_tokens_seen": 145390496, "step": 2319 }, { "epoch": 7.717138103161398, "loss": 0.7008110284805298, "loss_ce": 5.349003004084807e-06, "loss_iou": 0.251953125, "loss_num": 0.039306640625, "loss_xval": 0.69921875, "num_input_tokens_seen": 145390496, "step": 2319 }, { "epoch": 7.7204658901830285, "grad_norm": 16.17021369934082, "learning_rate": 5e-06, "loss": 0.5825, "num_input_tokens_seen": 145452760, "step": 2320 }, { "epoch": 7.7204658901830285, "loss": 0.5186789631843567, "loss_ce": 2.1908138023718493e-06, "loss_iou": 0.1796875, "loss_num": 0.03173828125, "loss_xval": 0.51953125, "num_input_tokens_seen": 145452760, "step": 2320 }, { "epoch": 7.723793677204659, "grad_norm": 25.943056106567383, "learning_rate": 5e-06, "loss": 0.6088, "num_input_tokens_seen": 145515940, "step": 2321 }, { "epoch": 7.723793677204659, "loss": 0.5606353878974915, "loss_ce": 0.0003631455183494836, "loss_iou": 0.1572265625, "loss_num": 0.049072265625, "loss_xval": 0.55859375, "num_input_tokens_seen": 145515940, "step": 2321 }, { "epoch": 7.72712146422629, "grad_norm": 18.087554931640625, "learning_rate": 5e-06, "loss": 0.6088, "num_input_tokens_seen": 145580196, "step": 2322 }, { "epoch": 7.72712146422629, "loss": 0.716313898563385, "loss_ce": 5.26880876350333e-06, "loss_iou": 0.2490234375, "loss_num": 0.04345703125, "loss_xval": 0.71484375, "num_input_tokens_seen": 145580196, "step": 2322 }, { "epoch": 7.73044925124792, "grad_norm": 9.075494766235352, "learning_rate": 5e-06, "loss": 0.5173, "num_input_tokens_seen": 145642056, "step": 2323 }, { "epoch": 7.73044925124792, "loss": 0.4884061813354492, "loss_ce": 2.8649069463426713e-06, "loss_iou": 0.1689453125, "loss_num": 0.0303955078125, "loss_xval": 0.48828125, "num_input_tokens_seen": 145642056, "step": 2323 }, { "epoch": 7.733777038269551, "grad_norm": 8.690394401550293, "learning_rate": 5e-06, "loss": 0.6698, "num_input_tokens_seen": 145704696, "step": 2324 }, { "epoch": 7.733777038269551, "loss": 0.7379202842712402, "loss_ce": 5.256584699964151e-06, "loss_iou": 0.275390625, "loss_num": 0.037353515625, "loss_xval": 0.73828125, "num_input_tokens_seen": 145704696, "step": 2324 }, { "epoch": 7.737104825291182, "grad_norm": 27.690406799316406, "learning_rate": 5e-06, "loss": 0.5538, "num_input_tokens_seen": 145767192, "step": 2325 }, { "epoch": 7.737104825291182, "loss": 0.48852768540382385, "loss_ce": 2.325112973267096e-06, "loss_iou": 0.1728515625, "loss_num": 0.0286865234375, "loss_xval": 0.48828125, "num_input_tokens_seen": 145767192, "step": 2325 }, { "epoch": 7.740432612312812, "grad_norm": 31.589567184448242, "learning_rate": 5e-06, "loss": 0.5443, "num_input_tokens_seen": 145830196, "step": 2326 }, { "epoch": 7.740432612312812, "loss": 0.6278164982795715, "loss_ce": 8.885833267413545e-06, "loss_iou": 0.248046875, "loss_num": 0.0263671875, "loss_xval": 0.62890625, "num_input_tokens_seen": 145830196, "step": 2326 }, { "epoch": 7.743760399334443, "grad_norm": 11.081936836242676, "learning_rate": 5e-06, "loss": 0.5364, "num_input_tokens_seen": 145893440, "step": 2327 }, { "epoch": 7.743760399334443, "loss": 0.565431535243988, "loss_ce": 1.8363116396358237e-06, "loss_iou": 0.2060546875, "loss_num": 0.0308837890625, "loss_xval": 0.56640625, "num_input_tokens_seen": 145893440, "step": 2327 }, { "epoch": 7.747088186356073, "grad_norm": 19.1000919342041, "learning_rate": 5e-06, "loss": 0.5158, "num_input_tokens_seen": 145956380, "step": 2328 }, { "epoch": 7.747088186356073, "loss": 0.387780100107193, "loss_ce": 2.3756949303788133e-05, "loss_iou": 0.14453125, "loss_num": 0.0196533203125, "loss_xval": 0.388671875, "num_input_tokens_seen": 145956380, "step": 2328 }, { "epoch": 7.750415973377704, "grad_norm": 17.6572265625, "learning_rate": 5e-06, "loss": 0.7041, "num_input_tokens_seen": 146016456, "step": 2329 }, { "epoch": 7.750415973377704, "loss": 0.505922794342041, "loss_ce": 2.325915829715086e-06, "loss_iou": 0.1630859375, "loss_num": 0.03564453125, "loss_xval": 0.5078125, "num_input_tokens_seen": 146016456, "step": 2329 }, { "epoch": 7.753743760399335, "grad_norm": 9.671598434448242, "learning_rate": 5e-06, "loss": 0.4584, "num_input_tokens_seen": 146078908, "step": 2330 }, { "epoch": 7.753743760399335, "loss": 0.562719464302063, "loss_ce": 3.630670835264027e-05, "loss_iou": 0.1982421875, "loss_num": 0.033447265625, "loss_xval": 0.5625, "num_input_tokens_seen": 146078908, "step": 2330 }, { "epoch": 7.757071547420965, "grad_norm": 12.324542045593262, "learning_rate": 5e-06, "loss": 0.5502, "num_input_tokens_seen": 146142048, "step": 2331 }, { "epoch": 7.757071547420965, "loss": 0.42419546842575073, "loss_ce": 1.1486179118946893e-06, "loss_iou": 0.169921875, "loss_num": 0.016845703125, "loss_xval": 0.423828125, "num_input_tokens_seen": 146142048, "step": 2331 }, { "epoch": 7.760399334442596, "grad_norm": 28.509401321411133, "learning_rate": 5e-06, "loss": 0.7678, "num_input_tokens_seen": 146207704, "step": 2332 }, { "epoch": 7.760399334442596, "loss": 0.9033709764480591, "loss_ce": 0.00020321847114246339, "loss_iou": 0.345703125, "loss_num": 0.042724609375, "loss_xval": 0.90234375, "num_input_tokens_seen": 146207704, "step": 2332 }, { "epoch": 7.7637271214642265, "grad_norm": 9.252092361450195, "learning_rate": 5e-06, "loss": 0.524, "num_input_tokens_seen": 146271092, "step": 2333 }, { "epoch": 7.7637271214642265, "loss": 0.5001236200332642, "loss_ce": 1.5533704527115333e-06, "loss_iou": 0.18359375, "loss_num": 0.0267333984375, "loss_xval": 0.5, "num_input_tokens_seen": 146271092, "step": 2333 }, { "epoch": 7.767054908485857, "grad_norm": 18.240367889404297, "learning_rate": 5e-06, "loss": 0.6018, "num_input_tokens_seen": 146334720, "step": 2334 }, { "epoch": 7.767054908485857, "loss": 0.4027436375617981, "loss_ce": 3.3661573979770765e-05, "loss_iou": 0.1474609375, "loss_num": 0.021484375, "loss_xval": 0.40234375, "num_input_tokens_seen": 146334720, "step": 2334 }, { "epoch": 7.770382695507488, "grad_norm": 30.29695701599121, "learning_rate": 5e-06, "loss": 0.6799, "num_input_tokens_seen": 146397212, "step": 2335 }, { "epoch": 7.770382695507488, "loss": 0.5450466275215149, "loss_ce": 2.6856564545596484e-06, "loss_iou": 0.1669921875, "loss_num": 0.042236328125, "loss_xval": 0.546875, "num_input_tokens_seen": 146397212, "step": 2335 }, { "epoch": 7.773710482529118, "grad_norm": 21.53461456298828, "learning_rate": 5e-06, "loss": 0.4944, "num_input_tokens_seen": 146458508, "step": 2336 }, { "epoch": 7.773710482529118, "loss": 0.5303375720977783, "loss_ce": 0.0020172488875687122, "loss_iou": 0.1865234375, "loss_num": 0.0311279296875, "loss_xval": 0.52734375, "num_input_tokens_seen": 146458508, "step": 2336 }, { "epoch": 7.777038269550749, "grad_norm": 9.095748901367188, "learning_rate": 5e-06, "loss": 0.5641, "num_input_tokens_seen": 146521480, "step": 2337 }, { "epoch": 7.777038269550749, "loss": 0.44714677333831787, "loss_ce": 3.2378779906139243e-06, "loss_iou": 0.17578125, "loss_num": 0.0191650390625, "loss_xval": 0.447265625, "num_input_tokens_seen": 146521480, "step": 2337 }, { "epoch": 7.78036605657238, "grad_norm": 17.774215698242188, "learning_rate": 5e-06, "loss": 0.421, "num_input_tokens_seen": 146583188, "step": 2338 }, { "epoch": 7.78036605657238, "loss": 0.4039463400840759, "loss_ce": 1.568140032759402e-05, "loss_iou": 0.1640625, "loss_num": 0.015380859375, "loss_xval": 0.404296875, "num_input_tokens_seen": 146583188, "step": 2338 }, { "epoch": 7.78369384359401, "grad_norm": 24.037382125854492, "learning_rate": 5e-06, "loss": 0.6551, "num_input_tokens_seen": 146645420, "step": 2339 }, { "epoch": 7.78369384359401, "loss": 0.849486768245697, "loss_ce": 0.001464241067878902, "loss_iou": 0.3046875, "loss_num": 0.04736328125, "loss_xval": 0.84765625, "num_input_tokens_seen": 146645420, "step": 2339 }, { "epoch": 7.787021630615641, "grad_norm": 17.54924774169922, "learning_rate": 5e-06, "loss": 0.5547, "num_input_tokens_seen": 146709024, "step": 2340 }, { "epoch": 7.787021630615641, "loss": 0.6887951493263245, "loss_ce": 0.0005627042846754193, "loss_iou": 0.259765625, "loss_num": 0.033935546875, "loss_xval": 0.6875, "num_input_tokens_seen": 146709024, "step": 2340 }, { "epoch": 7.790349417637271, "grad_norm": 30.691261291503906, "learning_rate": 5e-06, "loss": 0.738, "num_input_tokens_seen": 146772456, "step": 2341 }, { "epoch": 7.790349417637271, "loss": 1.040102481842041, "loss_ce": 6.349569594021887e-05, "loss_iou": 0.412109375, "loss_num": 0.04345703125, "loss_xval": 1.0390625, "num_input_tokens_seen": 146772456, "step": 2341 }, { "epoch": 7.793677204658902, "grad_norm": 32.27275466918945, "learning_rate": 5e-06, "loss": 0.4566, "num_input_tokens_seen": 146835344, "step": 2342 }, { "epoch": 7.793677204658902, "loss": 0.5181585550308228, "loss_ce": 0.0002141979057341814, "loss_iou": 0.1650390625, "loss_num": 0.03759765625, "loss_xval": 0.51953125, "num_input_tokens_seen": 146835344, "step": 2342 }, { "epoch": 7.797004991680533, "grad_norm": 18.2391414642334, "learning_rate": 5e-06, "loss": 0.4142, "num_input_tokens_seen": 146897448, "step": 2343 }, { "epoch": 7.797004991680533, "loss": 0.48322445154190063, "loss_ce": 0.00043634313624352217, "loss_iou": 0.1748046875, "loss_num": 0.02685546875, "loss_xval": 0.482421875, "num_input_tokens_seen": 146897448, "step": 2343 }, { "epoch": 7.800332778702163, "grad_norm": 28.72343635559082, "learning_rate": 5e-06, "loss": 0.4673, "num_input_tokens_seen": 146959752, "step": 2344 }, { "epoch": 7.800332778702163, "loss": 0.500123143196106, "loss_ce": 1.070698885996535e-06, "loss_iou": 0.1767578125, "loss_num": 0.0291748046875, "loss_xval": 0.5, "num_input_tokens_seen": 146959752, "step": 2344 }, { "epoch": 7.803660565723794, "grad_norm": 13.166524887084961, "learning_rate": 5e-06, "loss": 0.443, "num_input_tokens_seen": 147023036, "step": 2345 }, { "epoch": 7.803660565723794, "loss": 0.478252649307251, "loss_ce": 0.0002252986014354974, "loss_iou": 0.177734375, "loss_num": 0.0244140625, "loss_xval": 0.478515625, "num_input_tokens_seen": 147023036, "step": 2345 }, { "epoch": 7.8069883527454245, "grad_norm": 15.563891410827637, "learning_rate": 5e-06, "loss": 0.6214, "num_input_tokens_seen": 147085096, "step": 2346 }, { "epoch": 7.8069883527454245, "loss": 0.7249844670295715, "loss_ce": 8.89886086952174e-06, "loss_iou": 0.28125, "loss_num": 0.0322265625, "loss_xval": 0.7265625, "num_input_tokens_seen": 147085096, "step": 2346 }, { "epoch": 7.810316139767055, "grad_norm": 12.216253280639648, "learning_rate": 5e-06, "loss": 0.6297, "num_input_tokens_seen": 147148632, "step": 2347 }, { "epoch": 7.810316139767055, "loss": 0.6089211702346802, "loss_ce": 0.0010720957070589066, "loss_iou": 0.22265625, "loss_num": 0.0322265625, "loss_xval": 0.609375, "num_input_tokens_seen": 147148632, "step": 2347 }, { "epoch": 7.813643926788686, "grad_norm": 10.345468521118164, "learning_rate": 5e-06, "loss": 0.7692, "num_input_tokens_seen": 147210988, "step": 2348 }, { "epoch": 7.813643926788686, "loss": 0.7181065082550049, "loss_ce": 8.888819138519466e-05, "loss_iou": 0.248046875, "loss_num": 0.044677734375, "loss_xval": 0.71875, "num_input_tokens_seen": 147210988, "step": 2348 }, { "epoch": 7.816971713810316, "grad_norm": 17.722108840942383, "learning_rate": 5e-06, "loss": 0.5034, "num_input_tokens_seen": 147273824, "step": 2349 }, { "epoch": 7.816971713810316, "loss": 0.3487628996372223, "loss_ce": 8.011365935089998e-06, "loss_iou": 0.08349609375, "loss_num": 0.036376953125, "loss_xval": 0.349609375, "num_input_tokens_seen": 147273824, "step": 2349 }, { "epoch": 7.820299500831947, "grad_norm": 31.35045623779297, "learning_rate": 5e-06, "loss": 0.5662, "num_input_tokens_seen": 147336816, "step": 2350 }, { "epoch": 7.820299500831947, "loss": 0.5998554229736328, "loss_ce": 1.8970739574797335e-06, "loss_iou": 0.2236328125, "loss_num": 0.0303955078125, "loss_xval": 0.6015625, "num_input_tokens_seen": 147336816, "step": 2350 }, { "epoch": 7.8236272878535775, "grad_norm": 30.567657470703125, "learning_rate": 5e-06, "loss": 0.5362, "num_input_tokens_seen": 147399360, "step": 2351 }, { "epoch": 7.8236272878535775, "loss": 0.42389237880706787, "loss_ce": 0.00043046631617471576, "loss_iou": 0.171875, "loss_num": 0.01611328125, "loss_xval": 0.423828125, "num_input_tokens_seen": 147399360, "step": 2351 }, { "epoch": 7.826955074875208, "grad_norm": 28.7270565032959, "learning_rate": 5e-06, "loss": 0.5194, "num_input_tokens_seen": 147461648, "step": 2352 }, { "epoch": 7.826955074875208, "loss": 0.648038923740387, "loss_ce": 8.966919267550111e-05, "loss_iou": 0.248046875, "loss_num": 0.030517578125, "loss_xval": 0.6484375, "num_input_tokens_seen": 147461648, "step": 2352 }, { "epoch": 7.830282861896839, "grad_norm": 18.714345932006836, "learning_rate": 5e-06, "loss": 0.5247, "num_input_tokens_seen": 147523612, "step": 2353 }, { "epoch": 7.830282861896839, "loss": 0.5159938931465149, "loss_ce": 2.703457539610099e-06, "loss_iou": 0.2109375, "loss_num": 0.0189208984375, "loss_xval": 0.515625, "num_input_tokens_seen": 147523612, "step": 2353 }, { "epoch": 7.833610648918469, "grad_norm": 21.440248489379883, "learning_rate": 5e-06, "loss": 0.7369, "num_input_tokens_seen": 147585784, "step": 2354 }, { "epoch": 7.833610648918469, "loss": 0.6678215861320496, "loss_ce": 0.0007073488086462021, "loss_iou": 0.2197265625, "loss_num": 0.04541015625, "loss_xval": 0.66796875, "num_input_tokens_seen": 147585784, "step": 2354 }, { "epoch": 7.8369384359401, "grad_norm": 12.692069053649902, "learning_rate": 5e-06, "loss": 0.4814, "num_input_tokens_seen": 147648948, "step": 2355 }, { "epoch": 7.8369384359401, "loss": 0.49195215106010437, "loss_ce": 8.784380952420179e-06, "loss_iou": 0.197265625, "loss_num": 0.01953125, "loss_xval": 0.4921875, "num_input_tokens_seen": 147648948, "step": 2355 }, { "epoch": 7.840266222961731, "grad_norm": 10.984856605529785, "learning_rate": 5e-06, "loss": 0.5291, "num_input_tokens_seen": 147711984, "step": 2356 }, { "epoch": 7.840266222961731, "loss": 0.4536631107330322, "loss_ce": 4.9827915063360706e-05, "loss_iou": 0.1669921875, "loss_num": 0.024169921875, "loss_xval": 0.453125, "num_input_tokens_seen": 147711984, "step": 2356 }, { "epoch": 7.843594009983361, "grad_norm": 11.892590522766113, "learning_rate": 5e-06, "loss": 0.5538, "num_input_tokens_seen": 147775384, "step": 2357 }, { "epoch": 7.843594009983361, "loss": 0.7309591770172119, "loss_ce": 0.0009786862647160888, "loss_iou": 0.279296875, "loss_num": 0.0341796875, "loss_xval": 0.73046875, "num_input_tokens_seen": 147775384, "step": 2357 }, { "epoch": 7.846921797004992, "grad_norm": 9.510071754455566, "learning_rate": 5e-06, "loss": 0.6095, "num_input_tokens_seen": 147838388, "step": 2358 }, { "epoch": 7.846921797004992, "loss": 0.4072890877723694, "loss_ce": 1.4935619674361078e-06, "loss_iou": 0.10498046875, "loss_num": 0.039306640625, "loss_xval": 0.408203125, "num_input_tokens_seen": 147838388, "step": 2358 }, { "epoch": 7.850249584026622, "grad_norm": 12.00825309753418, "learning_rate": 5e-06, "loss": 0.6217, "num_input_tokens_seen": 147900520, "step": 2359 }, { "epoch": 7.850249584026622, "loss": 0.6626142859458923, "loss_ce": 0.0007490179850719869, "loss_iou": 0.224609375, "loss_num": 0.04248046875, "loss_xval": 0.66015625, "num_input_tokens_seen": 147900520, "step": 2359 }, { "epoch": 7.853577371048253, "grad_norm": 8.75156307220459, "learning_rate": 5e-06, "loss": 0.7008, "num_input_tokens_seen": 147962584, "step": 2360 }, { "epoch": 7.853577371048253, "loss": 0.797311544418335, "loss_ce": 7.033840665826574e-05, "loss_iou": 0.26953125, "loss_num": 0.051513671875, "loss_xval": 0.796875, "num_input_tokens_seen": 147962584, "step": 2360 }, { "epoch": 7.856905158069884, "grad_norm": 6.89595365524292, "learning_rate": 5e-06, "loss": 0.4839, "num_input_tokens_seen": 148025856, "step": 2361 }, { "epoch": 7.856905158069884, "loss": 0.2785659432411194, "loss_ce": 1.4824133813817753e-06, "loss_iou": 0.0888671875, "loss_num": 0.020263671875, "loss_xval": 0.279296875, "num_input_tokens_seen": 148025856, "step": 2361 }, { "epoch": 7.860232945091514, "grad_norm": 6.703277111053467, "learning_rate": 5e-06, "loss": 0.5748, "num_input_tokens_seen": 148088968, "step": 2362 }, { "epoch": 7.860232945091514, "loss": 0.6625131368637085, "loss_ce": 0.0005258401506580412, "loss_iou": 0.216796875, "loss_num": 0.04541015625, "loss_xval": 0.66015625, "num_input_tokens_seen": 148088968, "step": 2362 }, { "epoch": 7.863560732113145, "grad_norm": 18.188783645629883, "learning_rate": 5e-06, "loss": 0.5556, "num_input_tokens_seen": 148149792, "step": 2363 }, { "epoch": 7.863560732113145, "loss": 0.639039158821106, "loss_ce": 1.0946615702778217e-06, "loss_iou": 0.2216796875, "loss_num": 0.0390625, "loss_xval": 0.640625, "num_input_tokens_seen": 148149792, "step": 2363 }, { "epoch": 7.8668885191347755, "grad_norm": 29.201555252075195, "learning_rate": 5e-06, "loss": 0.5279, "num_input_tokens_seen": 148212668, "step": 2364 }, { "epoch": 7.8668885191347755, "loss": 0.3579132556915283, "loss_ce": 3.0817495826340746e-06, "loss_iou": 0.1123046875, "loss_num": 0.0264892578125, "loss_xval": 0.357421875, "num_input_tokens_seen": 148212668, "step": 2364 }, { "epoch": 7.870216306156406, "grad_norm": 33.83229446411133, "learning_rate": 5e-06, "loss": 0.4734, "num_input_tokens_seen": 148275920, "step": 2365 }, { "epoch": 7.870216306156406, "loss": 0.40930452942848206, "loss_ce": 2.780019713100046e-06, "loss_iou": 0.10546875, "loss_num": 0.03955078125, "loss_xval": 0.41015625, "num_input_tokens_seen": 148275920, "step": 2365 }, { "epoch": 7.873544093178037, "grad_norm": 54.3865966796875, "learning_rate": 5e-06, "loss": 0.7978, "num_input_tokens_seen": 148339376, "step": 2366 }, { "epoch": 7.873544093178037, "loss": 0.5963486433029175, "loss_ce": 4.655184966395609e-06, "loss_iou": 0.197265625, "loss_num": 0.0400390625, "loss_xval": 0.59765625, "num_input_tokens_seen": 148339376, "step": 2366 }, { "epoch": 7.876871880199667, "grad_norm": 40.385196685791016, "learning_rate": 5e-06, "loss": 0.7978, "num_input_tokens_seen": 148402588, "step": 2367 }, { "epoch": 7.876871880199667, "loss": 0.9389755129814148, "loss_ce": 1.0665748050087132e-05, "loss_iou": 0.396484375, "loss_num": 0.0289306640625, "loss_xval": 0.9375, "num_input_tokens_seen": 148402588, "step": 2367 }, { "epoch": 7.880199667221298, "grad_norm": 10.402012825012207, "learning_rate": 5e-06, "loss": 0.5361, "num_input_tokens_seen": 148466136, "step": 2368 }, { "epoch": 7.880199667221298, "loss": 0.49829232692718506, "loss_ce": 1.3146844821676495e-06, "loss_iou": 0.1796875, "loss_num": 0.0277099609375, "loss_xval": 0.498046875, "num_input_tokens_seen": 148466136, "step": 2368 }, { "epoch": 7.883527454242929, "grad_norm": 21.844648361206055, "learning_rate": 5e-06, "loss": 0.6145, "num_input_tokens_seen": 148530216, "step": 2369 }, { "epoch": 7.883527454242929, "loss": 0.5205156803131104, "loss_ce": 0.0007402912597171962, "loss_iou": 0.181640625, "loss_num": 0.03125, "loss_xval": 0.51953125, "num_input_tokens_seen": 148530216, "step": 2369 }, { "epoch": 7.886855241264559, "grad_norm": 24.081764221191406, "learning_rate": 5e-06, "loss": 0.6235, "num_input_tokens_seen": 148590816, "step": 2370 }, { "epoch": 7.886855241264559, "loss": 0.8015685081481934, "loss_ce": 0.004006906412541866, "loss_iou": 0.3125, "loss_num": 0.034423828125, "loss_xval": 0.796875, "num_input_tokens_seen": 148590816, "step": 2370 }, { "epoch": 7.89018302828619, "grad_norm": 11.873494148254395, "learning_rate": 5e-06, "loss": 0.5982, "num_input_tokens_seen": 148653580, "step": 2371 }, { "epoch": 7.89018302828619, "loss": 0.5470032691955566, "loss_ce": 6.2137487475411035e-06, "loss_iou": 0.193359375, "loss_num": 0.031982421875, "loss_xval": 0.546875, "num_input_tokens_seen": 148653580, "step": 2371 }, { "epoch": 7.89351081530782, "grad_norm": 18.19002342224121, "learning_rate": 5e-06, "loss": 0.4555, "num_input_tokens_seen": 148715544, "step": 2372 }, { "epoch": 7.89351081530782, "loss": 0.581056535243988, "loss_ce": 1.8852560970117338e-06, "loss_iou": 0.2177734375, "loss_num": 0.0291748046875, "loss_xval": 0.58203125, "num_input_tokens_seen": 148715544, "step": 2372 }, { "epoch": 7.896838602329451, "grad_norm": 9.300827980041504, "learning_rate": 5e-06, "loss": 0.528, "num_input_tokens_seen": 148779628, "step": 2373 }, { "epoch": 7.896838602329451, "loss": 0.4032377004623413, "loss_ce": 3.943583578802645e-05, "loss_iou": 0.1513671875, "loss_num": 0.02001953125, "loss_xval": 0.40234375, "num_input_tokens_seen": 148779628, "step": 2373 }, { "epoch": 7.900166389351082, "grad_norm": 9.580190658569336, "learning_rate": 5e-06, "loss": 0.3823, "num_input_tokens_seen": 148842432, "step": 2374 }, { "epoch": 7.900166389351082, "loss": 0.49036872386932373, "loss_ce": 1.2248729944985826e-05, "loss_iou": 0.1708984375, "loss_num": 0.0296630859375, "loss_xval": 0.490234375, "num_input_tokens_seen": 148842432, "step": 2374 }, { "epoch": 7.903494176372712, "grad_norm": 10.321649551391602, "learning_rate": 5e-06, "loss": 0.6282, "num_input_tokens_seen": 148905380, "step": 2375 }, { "epoch": 7.903494176372712, "loss": 0.5415287017822266, "loss_ce": 5.5345866712741554e-05, "loss_iou": 0.1884765625, "loss_num": 0.032958984375, "loss_xval": 0.54296875, "num_input_tokens_seen": 148905380, "step": 2375 }, { "epoch": 7.906821963394343, "grad_norm": 16.251270294189453, "learning_rate": 5e-06, "loss": 0.6128, "num_input_tokens_seen": 148969636, "step": 2376 }, { "epoch": 7.906821963394343, "loss": 0.7741854190826416, "loss_ce": 0.00025961361825466156, "loss_iou": 0.318359375, "loss_num": 0.02685546875, "loss_xval": 0.7734375, "num_input_tokens_seen": 148969636, "step": 2376 }, { "epoch": 7.9101497504159735, "grad_norm": 9.08081340789795, "learning_rate": 5e-06, "loss": 0.7223, "num_input_tokens_seen": 149032696, "step": 2377 }, { "epoch": 7.9101497504159735, "loss": 0.5799582004547119, "loss_ce": 2.159239102184074e-06, "loss_iou": 0.2392578125, "loss_num": 0.0203857421875, "loss_xval": 0.578125, "num_input_tokens_seen": 149032696, "step": 2377 }, { "epoch": 7.913477537437604, "grad_norm": 60.43627166748047, "learning_rate": 5e-06, "loss": 0.6083, "num_input_tokens_seen": 149094312, "step": 2378 }, { "epoch": 7.913477537437604, "loss": 0.6017109155654907, "loss_ce": 8.739640179555863e-05, "loss_iou": 0.2373046875, "loss_num": 0.025390625, "loss_xval": 0.6015625, "num_input_tokens_seen": 149094312, "step": 2378 }, { "epoch": 7.916805324459235, "grad_norm": 10.877464294433594, "learning_rate": 5e-06, "loss": 0.6033, "num_input_tokens_seen": 149157168, "step": 2379 }, { "epoch": 7.916805324459235, "loss": 0.6978522539138794, "loss_ce": 0.0003425328468438238, "loss_iou": 0.2578125, "loss_num": 0.036376953125, "loss_xval": 0.69921875, "num_input_tokens_seen": 149157168, "step": 2379 }, { "epoch": 7.920133111480865, "grad_norm": 9.518467903137207, "learning_rate": 5e-06, "loss": 0.743, "num_input_tokens_seen": 149221484, "step": 2380 }, { "epoch": 7.920133111480865, "loss": 0.700452446937561, "loss_ce": 0.002698545577004552, "loss_iou": 0.228515625, "loss_num": 0.048095703125, "loss_xval": 0.69921875, "num_input_tokens_seen": 149221484, "step": 2380 }, { "epoch": 7.923460898502496, "grad_norm": 7.324571132659912, "learning_rate": 5e-06, "loss": 0.5427, "num_input_tokens_seen": 149283676, "step": 2381 }, { "epoch": 7.923460898502496, "loss": 0.5743650197982788, "loss_ce": 2.4192306227632798e-05, "loss_iou": 0.232421875, "loss_num": 0.0220947265625, "loss_xval": 0.57421875, "num_input_tokens_seen": 149283676, "step": 2381 }, { "epoch": 7.9267886855241265, "grad_norm": 8.265053749084473, "learning_rate": 5e-06, "loss": 0.4718, "num_input_tokens_seen": 149344460, "step": 2382 }, { "epoch": 7.9267886855241265, "loss": 0.25451797246932983, "loss_ce": 1.368559196635033e-06, "loss_iou": 0.1005859375, "loss_num": 0.01055908203125, "loss_xval": 0.25390625, "num_input_tokens_seen": 149344460, "step": 2382 }, { "epoch": 7.930116472545757, "grad_norm": 14.823125839233398, "learning_rate": 5e-06, "loss": 0.6832, "num_input_tokens_seen": 149405820, "step": 2383 }, { "epoch": 7.930116472545757, "loss": 0.9761956930160522, "loss_ce": 0.00012149095709901303, "loss_iou": 0.341796875, "loss_num": 0.058349609375, "loss_xval": 0.9765625, "num_input_tokens_seen": 149405820, "step": 2383 }, { "epoch": 7.933444259567388, "grad_norm": 11.668529510498047, "learning_rate": 5e-06, "loss": 0.5891, "num_input_tokens_seen": 149467460, "step": 2384 }, { "epoch": 7.933444259567388, "loss": 0.6653825044631958, "loss_ce": 0.00022135495964903384, "loss_iou": 0.21875, "loss_num": 0.04541015625, "loss_xval": 0.6640625, "num_input_tokens_seen": 149467460, "step": 2384 }, { "epoch": 7.936772046589018, "grad_norm": 12.773284912109375, "learning_rate": 5e-06, "loss": 0.5844, "num_input_tokens_seen": 149530776, "step": 2385 }, { "epoch": 7.936772046589018, "loss": 0.4907369613647461, "loss_ce": 1.4291425031842664e-05, "loss_iou": 0.19140625, "loss_num": 0.021728515625, "loss_xval": 0.490234375, "num_input_tokens_seen": 149530776, "step": 2385 }, { "epoch": 7.940099833610649, "grad_norm": 8.36546802520752, "learning_rate": 5e-06, "loss": 0.5011, "num_input_tokens_seen": 149593028, "step": 2386 }, { "epoch": 7.940099833610649, "loss": 0.5706056356430054, "loss_ce": 0.00029314306448213756, "loss_iou": 0.2001953125, "loss_num": 0.033935546875, "loss_xval": 0.5703125, "num_input_tokens_seen": 149593028, "step": 2386 }, { "epoch": 7.94342762063228, "grad_norm": 12.976410865783691, "learning_rate": 5e-06, "loss": 0.4776, "num_input_tokens_seen": 149656644, "step": 2387 }, { "epoch": 7.94342762063228, "loss": 0.41238707304000854, "loss_ce": 0.00015561465988866985, "loss_iou": 0.1708984375, "loss_num": 0.0140380859375, "loss_xval": 0.412109375, "num_input_tokens_seen": 149656644, "step": 2387 }, { "epoch": 7.94675540765391, "grad_norm": 14.215097427368164, "learning_rate": 5e-06, "loss": 0.5386, "num_input_tokens_seen": 149719100, "step": 2388 }, { "epoch": 7.94675540765391, "loss": 0.6402016878128052, "loss_ce": 3.877669314533705e-06, "loss_iou": 0.248046875, "loss_num": 0.02880859375, "loss_xval": 0.640625, "num_input_tokens_seen": 149719100, "step": 2388 }, { "epoch": 7.950083194675541, "grad_norm": 25.129207611083984, "learning_rate": 5e-06, "loss": 0.5153, "num_input_tokens_seen": 149781072, "step": 2389 }, { "epoch": 7.950083194675541, "loss": 0.4898923933506012, "loss_ce": 2.422572833893355e-05, "loss_iou": 0.1748046875, "loss_num": 0.028076171875, "loss_xval": 0.490234375, "num_input_tokens_seen": 149781072, "step": 2389 }, { "epoch": 7.953410981697171, "grad_norm": 25.4522647857666, "learning_rate": 5e-06, "loss": 0.5679, "num_input_tokens_seen": 149842756, "step": 2390 }, { "epoch": 7.953410981697171, "loss": 0.6536891460418701, "loss_ce": 2.6219738629151834e-06, "loss_iou": 0.2216796875, "loss_num": 0.0419921875, "loss_xval": 0.65234375, "num_input_tokens_seen": 149842756, "step": 2390 }, { "epoch": 7.956738768718802, "grad_norm": 15.125835418701172, "learning_rate": 5e-06, "loss": 0.4868, "num_input_tokens_seen": 149905308, "step": 2391 }, { "epoch": 7.956738768718802, "loss": 0.5996114015579224, "loss_ce": 2.022020908043487e-06, "loss_iou": 0.24609375, "loss_num": 0.0216064453125, "loss_xval": 0.6015625, "num_input_tokens_seen": 149905308, "step": 2391 }, { "epoch": 7.960066555740433, "grad_norm": 15.37833309173584, "learning_rate": 5e-06, "loss": 0.5377, "num_input_tokens_seen": 149967532, "step": 2392 }, { "epoch": 7.960066555740433, "loss": 0.40106362104415894, "loss_ce": 1.5977386738086352e-06, "loss_iou": 0.1240234375, "loss_num": 0.030517578125, "loss_xval": 0.400390625, "num_input_tokens_seen": 149967532, "step": 2392 }, { "epoch": 7.963394342762063, "grad_norm": 16.598520278930664, "learning_rate": 5e-06, "loss": 0.4238, "num_input_tokens_seen": 150027648, "step": 2393 }, { "epoch": 7.963394342762063, "loss": 0.32995736598968506, "loss_ce": 1.320679302807548e-06, "loss_iou": 0.09033203125, "loss_num": 0.02978515625, "loss_xval": 0.330078125, "num_input_tokens_seen": 150027648, "step": 2393 }, { "epoch": 7.966722129783694, "grad_norm": 21.397706985473633, "learning_rate": 5e-06, "loss": 0.5789, "num_input_tokens_seen": 150090464, "step": 2394 }, { "epoch": 7.966722129783694, "loss": 0.5835870504379272, "loss_ce": 0.0009759521344676614, "loss_iou": 0.2353515625, "loss_num": 0.0224609375, "loss_xval": 0.58203125, "num_input_tokens_seen": 150090464, "step": 2394 }, { "epoch": 7.9700499168053245, "grad_norm": 16.042980194091797, "learning_rate": 5e-06, "loss": 0.4311, "num_input_tokens_seen": 150151888, "step": 2395 }, { "epoch": 7.9700499168053245, "loss": 0.5340585708618164, "loss_ce": 9.44493478982622e-07, "loss_iou": 0.1796875, "loss_num": 0.034912109375, "loss_xval": 0.53515625, "num_input_tokens_seen": 150151888, "step": 2395 }, { "epoch": 7.973377703826955, "grad_norm": 24.508251190185547, "learning_rate": 5e-06, "loss": 0.5662, "num_input_tokens_seen": 150213516, "step": 2396 }, { "epoch": 7.973377703826955, "loss": 0.4852312505245209, "loss_ce": 1.7506299627711996e-06, "loss_iou": 0.1455078125, "loss_num": 0.038818359375, "loss_xval": 0.484375, "num_input_tokens_seen": 150213516, "step": 2396 }, { "epoch": 7.976705490848586, "grad_norm": 10.518195152282715, "learning_rate": 5e-06, "loss": 0.4442, "num_input_tokens_seen": 150277576, "step": 2397 }, { "epoch": 7.976705490848586, "loss": 0.4083232283592224, "loss_ce": 0.00024218307225964963, "loss_iou": 0.158203125, "loss_num": 0.0184326171875, "loss_xval": 0.408203125, "num_input_tokens_seen": 150277576, "step": 2397 }, { "epoch": 7.980033277870216, "grad_norm": 13.654875755310059, "learning_rate": 5e-06, "loss": 0.5606, "num_input_tokens_seen": 150341524, "step": 2398 }, { "epoch": 7.980033277870216, "loss": 0.5705694556236267, "loss_ce": 1.2777829397236928e-05, "loss_iou": 0.240234375, "loss_num": 0.017822265625, "loss_xval": 0.5703125, "num_input_tokens_seen": 150341524, "step": 2398 }, { "epoch": 7.983361064891847, "grad_norm": 16.801708221435547, "learning_rate": 5e-06, "loss": 0.5418, "num_input_tokens_seen": 150402712, "step": 2399 }, { "epoch": 7.983361064891847, "loss": 0.8060579299926758, "loss_ce": 0.00014974072109907866, "loss_iou": 0.259765625, "loss_num": 0.057373046875, "loss_xval": 0.8046875, "num_input_tokens_seen": 150402712, "step": 2399 }, { "epoch": 7.9866888519134775, "grad_norm": 18.47366714477539, "learning_rate": 5e-06, "loss": 0.462, "num_input_tokens_seen": 150465508, "step": 2400 }, { "epoch": 7.9866888519134775, "loss": 0.37381842732429504, "loss_ce": 0.0005274119321256876, "loss_iou": 0.12353515625, "loss_num": 0.0252685546875, "loss_xval": 0.373046875, "num_input_tokens_seen": 150465508, "step": 2400 }, { "epoch": 7.990016638935108, "grad_norm": 18.120771408081055, "learning_rate": 5e-06, "loss": 0.3519, "num_input_tokens_seen": 150527700, "step": 2401 }, { "epoch": 7.990016638935108, "loss": 0.37521040439605713, "loss_ce": 2.7321235393173993e-05, "loss_iou": 0.11083984375, "loss_num": 0.03076171875, "loss_xval": 0.375, "num_input_tokens_seen": 150527700, "step": 2401 }, { "epoch": 7.993344425956739, "grad_norm": 7.402740478515625, "learning_rate": 5e-06, "loss": 0.5944, "num_input_tokens_seen": 150590612, "step": 2402 }, { "epoch": 7.993344425956739, "loss": 0.7119214534759521, "loss_ce": 7.3846513259923086e-06, "loss_iou": 0.263671875, "loss_num": 0.037353515625, "loss_xval": 0.7109375, "num_input_tokens_seen": 150590612, "step": 2402 }, { "epoch": 7.996672212978369, "grad_norm": 24.257715225219727, "learning_rate": 5e-06, "loss": 0.4791, "num_input_tokens_seen": 150653248, "step": 2403 }, { "epoch": 7.996672212978369, "loss": 0.5422375202178955, "loss_ce": 1.1802052313214517e-06, "loss_iou": 0.19921875, "loss_num": 0.0289306640625, "loss_xval": 0.54296875, "num_input_tokens_seen": 150653248, "step": 2403 }, { "epoch": 8.0, "grad_norm": 27.688859939575195, "learning_rate": 5e-06, "loss": 0.5704, "num_input_tokens_seen": 150716448, "step": 2404 }, { "epoch": 8.0, "loss": 0.5572729110717773, "loss_ce": 0.00014399091014638543, "loss_iou": 0.203125, "loss_num": 0.0303955078125, "loss_xval": 0.55859375, "num_input_tokens_seen": 150716448, "step": 2404 }, { "epoch": 8.00332778702163, "grad_norm": 18.1149959564209, "learning_rate": 5e-06, "loss": 0.6812, "num_input_tokens_seen": 150779480, "step": 2405 }, { "epoch": 8.00332778702163, "loss": 0.8981951475143433, "loss_ce": 1.78021946339868e-06, "loss_iou": 0.333984375, "loss_num": 0.04638671875, "loss_xval": 0.8984375, "num_input_tokens_seen": 150779480, "step": 2405 }, { "epoch": 8.006655574043261, "grad_norm": 7.36126184463501, "learning_rate": 5e-06, "loss": 0.3407, "num_input_tokens_seen": 150840844, "step": 2406 }, { "epoch": 8.006655574043261, "loss": 0.2751489281654358, "loss_ce": 2.453669367241673e-06, "loss_iou": 0.10107421875, "loss_num": 0.01470947265625, "loss_xval": 0.275390625, "num_input_tokens_seen": 150840844, "step": 2406 }, { "epoch": 8.009983361064892, "grad_norm": 9.0914945602417, "learning_rate": 5e-06, "loss": 0.5433, "num_input_tokens_seen": 150905236, "step": 2407 }, { "epoch": 8.009983361064892, "loss": 0.6044920086860657, "loss_ce": 0.0007322192541323602, "loss_iou": 0.1923828125, "loss_num": 0.0439453125, "loss_xval": 0.60546875, "num_input_tokens_seen": 150905236, "step": 2407 }, { "epoch": 8.013311148086522, "grad_norm": 9.11225414276123, "learning_rate": 5e-06, "loss": 0.4171, "num_input_tokens_seen": 150964372, "step": 2408 }, { "epoch": 8.013311148086522, "loss": 0.5028600692749023, "loss_ce": 0.0001745099143590778, "loss_iou": 0.1640625, "loss_num": 0.03466796875, "loss_xval": 0.50390625, "num_input_tokens_seen": 150964372, "step": 2408 }, { "epoch": 8.016638935108153, "grad_norm": 25.22624969482422, "learning_rate": 5e-06, "loss": 0.6851, "num_input_tokens_seen": 151028124, "step": 2409 }, { "epoch": 8.016638935108153, "loss": 0.6237929463386536, "loss_ce": 1.3650748769578058e-05, "loss_iou": 0.232421875, "loss_num": 0.031982421875, "loss_xval": 0.625, "num_input_tokens_seen": 151028124, "step": 2409 }, { "epoch": 8.019966722129784, "grad_norm": 23.616727828979492, "learning_rate": 5e-06, "loss": 0.542, "num_input_tokens_seen": 151088856, "step": 2410 }, { "epoch": 8.019966722129784, "loss": 0.5690186023712158, "loss_ce": 4.891072239843197e-05, "loss_iou": 0.2216796875, "loss_num": 0.025390625, "loss_xval": 0.5703125, "num_input_tokens_seen": 151088856, "step": 2410 }, { "epoch": 8.023294509151414, "grad_norm": 17.05337142944336, "learning_rate": 5e-06, "loss": 0.421, "num_input_tokens_seen": 151151052, "step": 2411 }, { "epoch": 8.023294509151414, "loss": 0.3356812298297882, "loss_ce": 0.00010993298201356083, "loss_iou": 0.125, "loss_num": 0.01708984375, "loss_xval": 0.3359375, "num_input_tokens_seen": 151151052, "step": 2411 }, { "epoch": 8.026622296173045, "grad_norm": 15.094304084777832, "learning_rate": 5e-06, "loss": 0.459, "num_input_tokens_seen": 151212932, "step": 2412 }, { "epoch": 8.026622296173045, "loss": 0.6263443231582642, "loss_ce": 1.5726018318673596e-06, "loss_iou": 0.1748046875, "loss_num": 0.055419921875, "loss_xval": 0.625, "num_input_tokens_seen": 151212932, "step": 2412 }, { "epoch": 8.029950083194676, "grad_norm": 15.563193321228027, "learning_rate": 5e-06, "loss": 0.4865, "num_input_tokens_seen": 151273996, "step": 2413 }, { "epoch": 8.029950083194676, "loss": 0.4824829697608948, "loss_ce": 0.0005188671057112515, "loss_iou": 0.185546875, "loss_num": 0.0224609375, "loss_xval": 0.482421875, "num_input_tokens_seen": 151273996, "step": 2413 }, { "epoch": 8.033277870216306, "grad_norm": 32.03296661376953, "learning_rate": 5e-06, "loss": 0.5699, "num_input_tokens_seen": 151338364, "step": 2414 }, { "epoch": 8.033277870216306, "loss": 0.45406216382980347, "loss_ce": 8.267463999800384e-05, "loss_iou": 0.1748046875, "loss_num": 0.0208740234375, "loss_xval": 0.453125, "num_input_tokens_seen": 151338364, "step": 2414 }, { "epoch": 8.036605657237937, "grad_norm": 32.94428253173828, "learning_rate": 5e-06, "loss": 0.6587, "num_input_tokens_seen": 151402624, "step": 2415 }, { "epoch": 8.036605657237937, "loss": 0.5931004285812378, "loss_ce": 0.0015477320412173867, "loss_iou": 0.2265625, "loss_num": 0.027587890625, "loss_xval": 0.58984375, "num_input_tokens_seen": 151402624, "step": 2415 }, { "epoch": 8.039933444259567, "grad_norm": 11.829931259155273, "learning_rate": 5e-06, "loss": 0.4489, "num_input_tokens_seen": 151465300, "step": 2416 }, { "epoch": 8.039933444259567, "loss": 0.5881564617156982, "loss_ce": 2.1727586499764584e-05, "loss_iou": 0.248046875, "loss_num": 0.018310546875, "loss_xval": 0.58984375, "num_input_tokens_seen": 151465300, "step": 2416 }, { "epoch": 8.043261231281198, "grad_norm": 12.115344047546387, "learning_rate": 5e-06, "loss": 0.6476, "num_input_tokens_seen": 151529704, "step": 2417 }, { "epoch": 8.043261231281198, "loss": 0.5628034472465515, "loss_ce": 0.000791735015809536, "loss_iou": 0.228515625, "loss_num": 0.0211181640625, "loss_xval": 0.5625, "num_input_tokens_seen": 151529704, "step": 2417 }, { "epoch": 8.046589018302829, "grad_norm": 15.642353057861328, "learning_rate": 5e-06, "loss": 0.582, "num_input_tokens_seen": 151593792, "step": 2418 }, { "epoch": 8.046589018302829, "loss": 0.7199845314025879, "loss_ce": 1.3774792023468763e-05, "loss_iou": 0.28515625, "loss_num": 0.030029296875, "loss_xval": 0.71875, "num_input_tokens_seen": 151593792, "step": 2418 }, { "epoch": 8.04991680532446, "grad_norm": 16.085811614990234, "learning_rate": 5e-06, "loss": 0.4757, "num_input_tokens_seen": 151655832, "step": 2419 }, { "epoch": 8.04991680532446, "loss": 0.5246597528457642, "loss_ce": 1.5337647027990897e-06, "loss_iou": 0.2021484375, "loss_num": 0.024169921875, "loss_xval": 0.5234375, "num_input_tokens_seen": 151655832, "step": 2419 }, { "epoch": 8.05324459234609, "grad_norm": 22.611623764038086, "learning_rate": 5e-06, "loss": 0.5547, "num_input_tokens_seen": 151718536, "step": 2420 }, { "epoch": 8.05324459234609, "loss": 0.5084238052368164, "loss_ce": 9.541911367705325e-07, "loss_iou": 0.212890625, "loss_num": 0.0164794921875, "loss_xval": 0.5078125, "num_input_tokens_seen": 151718536, "step": 2420 }, { "epoch": 8.05657237936772, "grad_norm": 26.871212005615234, "learning_rate": 5e-06, "loss": 0.5195, "num_input_tokens_seen": 151780060, "step": 2421 }, { "epoch": 8.05657237936772, "loss": 0.4436456859111786, "loss_ce": 0.00013371184468269348, "loss_iou": 0.177734375, "loss_num": 0.017333984375, "loss_xval": 0.443359375, "num_input_tokens_seen": 151780060, "step": 2421 }, { "epoch": 8.059900166389351, "grad_norm": 16.326187133789062, "learning_rate": 5e-06, "loss": 0.4651, "num_input_tokens_seen": 151844348, "step": 2422 }, { "epoch": 8.059900166389351, "loss": 0.48791730403900146, "loss_ce": 2.2628007627645275e-06, "loss_iou": 0.1904296875, "loss_num": 0.0216064453125, "loss_xval": 0.48828125, "num_input_tokens_seen": 151844348, "step": 2422 }, { "epoch": 8.063227953410982, "grad_norm": 13.009902000427246, "learning_rate": 5e-06, "loss": 0.5375, "num_input_tokens_seen": 151907712, "step": 2423 }, { "epoch": 8.063227953410982, "loss": 0.5223662853240967, "loss_ce": 0.0006377990357577801, "loss_iou": 0.203125, "loss_num": 0.023193359375, "loss_xval": 0.5234375, "num_input_tokens_seen": 151907712, "step": 2423 }, { "epoch": 8.066555740432612, "grad_norm": 6.712582588195801, "learning_rate": 5e-06, "loss": 0.4222, "num_input_tokens_seen": 151969636, "step": 2424 }, { "epoch": 8.066555740432612, "loss": 0.41247832775115967, "loss_ce": 0.0004300192231312394, "loss_iou": 0.1748046875, "loss_num": 0.0125732421875, "loss_xval": 0.412109375, "num_input_tokens_seen": 151969636, "step": 2424 }, { "epoch": 8.069883527454243, "grad_norm": 10.352930068969727, "learning_rate": 5e-06, "loss": 0.5744, "num_input_tokens_seen": 152032880, "step": 2425 }, { "epoch": 8.069883527454243, "loss": 0.8300479054450989, "loss_ce": 0.00021388895402196795, "loss_iou": 0.330078125, "loss_num": 0.03369140625, "loss_xval": 0.828125, "num_input_tokens_seen": 152032880, "step": 2425 }, { "epoch": 8.073211314475873, "grad_norm": 57.63808822631836, "learning_rate": 5e-06, "loss": 0.5143, "num_input_tokens_seen": 152096488, "step": 2426 }, { "epoch": 8.073211314475873, "loss": 0.4737561345100403, "loss_ce": 1.2529344530776143e-06, "loss_iou": 0.1904296875, "loss_num": 0.018798828125, "loss_xval": 0.474609375, "num_input_tokens_seen": 152096488, "step": 2426 }, { "epoch": 8.076539101497504, "grad_norm": 23.341392517089844, "learning_rate": 5e-06, "loss": 0.5826, "num_input_tokens_seen": 152160104, "step": 2427 }, { "epoch": 8.076539101497504, "loss": 0.678065299987793, "loss_ce": 0.0006970908725634217, "loss_iou": 0.2734375, "loss_num": 0.026611328125, "loss_xval": 0.67578125, "num_input_tokens_seen": 152160104, "step": 2427 }, { "epoch": 8.079866888519135, "grad_norm": 18.72080421447754, "learning_rate": 5e-06, "loss": 0.6317, "num_input_tokens_seen": 152223992, "step": 2428 }, { "epoch": 8.079866888519135, "loss": 0.42368650436401367, "loss_ce": 0.00010252871288685128, "loss_iou": 0.125, "loss_num": 0.03466796875, "loss_xval": 0.423828125, "num_input_tokens_seen": 152223992, "step": 2428 }, { "epoch": 8.083194675540765, "grad_norm": 14.705158233642578, "learning_rate": 5e-06, "loss": 0.5572, "num_input_tokens_seen": 152286828, "step": 2429 }, { "epoch": 8.083194675540765, "loss": 0.676171064376831, "loss_ce": 2.362320628890302e-05, "loss_iou": 0.294921875, "loss_num": 0.017333984375, "loss_xval": 0.67578125, "num_input_tokens_seen": 152286828, "step": 2429 }, { "epoch": 8.086522462562396, "grad_norm": 11.723017692565918, "learning_rate": 5e-06, "loss": 0.6171, "num_input_tokens_seen": 152349552, "step": 2430 }, { "epoch": 8.086522462562396, "loss": 0.6467012166976929, "loss_ce": 0.00021685042884200811, "loss_iou": 0.2412109375, "loss_num": 0.033203125, "loss_xval": 0.6484375, "num_input_tokens_seen": 152349552, "step": 2430 }, { "epoch": 8.089850249584027, "grad_norm": 35.75983810424805, "learning_rate": 5e-06, "loss": 0.6164, "num_input_tokens_seen": 152412668, "step": 2431 }, { "epoch": 8.089850249584027, "loss": 0.6807894706726074, "loss_ce": 0.0002475123037584126, "loss_iou": 0.255859375, "loss_num": 0.033935546875, "loss_xval": 0.6796875, "num_input_tokens_seen": 152412668, "step": 2431 }, { "epoch": 8.093178036605657, "grad_norm": 29.226917266845703, "learning_rate": 5e-06, "loss": 0.7367, "num_input_tokens_seen": 152474972, "step": 2432 }, { "epoch": 8.093178036605657, "loss": 0.8186821341514587, "loss_ce": 7.858355820644647e-05, "loss_iou": 0.322265625, "loss_num": 0.03466796875, "loss_xval": 0.8203125, "num_input_tokens_seen": 152474972, "step": 2432 }, { "epoch": 8.096505823627288, "grad_norm": 42.15151596069336, "learning_rate": 5e-06, "loss": 0.5939, "num_input_tokens_seen": 152537620, "step": 2433 }, { "epoch": 8.096505823627288, "loss": 0.4473893642425537, "loss_ce": 1.674603936407948e-06, "loss_iou": 0.1484375, "loss_num": 0.02978515625, "loss_xval": 0.447265625, "num_input_tokens_seen": 152537620, "step": 2433 }, { "epoch": 8.099833610648918, "grad_norm": 8.637619972229004, "learning_rate": 5e-06, "loss": 0.5339, "num_input_tokens_seen": 152598384, "step": 2434 }, { "epoch": 8.099833610648918, "loss": 0.37832561135292053, "loss_ce": 0.0002433224581182003, "loss_iou": 0.1123046875, "loss_num": 0.03076171875, "loss_xval": 0.37890625, "num_input_tokens_seen": 152598384, "step": 2434 }, { "epoch": 8.103161397670549, "grad_norm": 29.386947631835938, "learning_rate": 5e-06, "loss": 0.7366, "num_input_tokens_seen": 152662040, "step": 2435 }, { "epoch": 8.103161397670549, "loss": 0.7205250263214111, "loss_ce": 6.606460374314338e-05, "loss_iou": 0.291015625, "loss_num": 0.0274658203125, "loss_xval": 0.71875, "num_input_tokens_seen": 152662040, "step": 2435 }, { "epoch": 8.10648918469218, "grad_norm": 40.0624885559082, "learning_rate": 5e-06, "loss": 0.4312, "num_input_tokens_seen": 152723624, "step": 2436 }, { "epoch": 8.10648918469218, "loss": 0.45446550846099854, "loss_ce": 0.000119820237159729, "loss_iou": 0.1484375, "loss_num": 0.031494140625, "loss_xval": 0.455078125, "num_input_tokens_seen": 152723624, "step": 2436 }, { "epoch": 8.10981697171381, "grad_norm": 10.424002647399902, "learning_rate": 5e-06, "loss": 0.5024, "num_input_tokens_seen": 152786032, "step": 2437 }, { "epoch": 8.10981697171381, "loss": 0.40211498737335205, "loss_ce": 1.5366931620519608e-05, "loss_iou": 0.16015625, "loss_num": 0.01611328125, "loss_xval": 0.40234375, "num_input_tokens_seen": 152786032, "step": 2437 }, { "epoch": 8.11314475873544, "grad_norm": 9.189531326293945, "learning_rate": 5e-06, "loss": 0.4893, "num_input_tokens_seen": 152849280, "step": 2438 }, { "epoch": 8.11314475873544, "loss": 0.5251463651657104, "loss_ce": 0.00024404437863267958, "loss_iou": 0.177734375, "loss_num": 0.033935546875, "loss_xval": 0.5234375, "num_input_tokens_seen": 152849280, "step": 2438 }, { "epoch": 8.116472545757071, "grad_norm": 9.56404972076416, "learning_rate": 5e-06, "loss": 0.4746, "num_input_tokens_seen": 152911752, "step": 2439 }, { "epoch": 8.116472545757071, "loss": 0.4675353467464447, "loss_ce": 6.056262009224156e-06, "loss_iou": 0.1806640625, "loss_num": 0.021240234375, "loss_xval": 0.466796875, "num_input_tokens_seen": 152911752, "step": 2439 }, { "epoch": 8.119800332778702, "grad_norm": 9.372553825378418, "learning_rate": 5e-06, "loss": 0.5694, "num_input_tokens_seen": 152975308, "step": 2440 }, { "epoch": 8.119800332778702, "loss": 0.591071605682373, "loss_ce": 7.141482910810737e-06, "loss_iou": 0.228515625, "loss_num": 0.02685546875, "loss_xval": 0.58984375, "num_input_tokens_seen": 152975308, "step": 2440 }, { "epoch": 8.123128119800333, "grad_norm": 12.370013236999512, "learning_rate": 5e-06, "loss": 0.6476, "num_input_tokens_seen": 153038296, "step": 2441 }, { "epoch": 8.123128119800333, "loss": 0.3000500500202179, "loss_ce": 1.2275347671675263e-06, "loss_iou": 0.099609375, "loss_num": 0.020263671875, "loss_xval": 0.30078125, "num_input_tokens_seen": 153038296, "step": 2441 }, { "epoch": 8.126455906821963, "grad_norm": 15.418782234191895, "learning_rate": 5e-06, "loss": 0.5086, "num_input_tokens_seen": 153097300, "step": 2442 }, { "epoch": 8.126455906821963, "loss": 0.3468443751335144, "loss_ce": 4.25894686486572e-05, "loss_iou": 0.125, "loss_num": 0.0194091796875, "loss_xval": 0.34765625, "num_input_tokens_seen": 153097300, "step": 2442 }, { "epoch": 8.129783693843594, "grad_norm": 13.893099784851074, "learning_rate": 5e-06, "loss": 0.4593, "num_input_tokens_seen": 153161424, "step": 2443 }, { "epoch": 8.129783693843594, "loss": 0.4725702404975891, "loss_ce": 0.00021917115373071283, "loss_iou": 0.1845703125, "loss_num": 0.020751953125, "loss_xval": 0.47265625, "num_input_tokens_seen": 153161424, "step": 2443 }, { "epoch": 8.133111480865225, "grad_norm": 7.253799915313721, "learning_rate": 5e-06, "loss": 0.5749, "num_input_tokens_seen": 153224468, "step": 2444 }, { "epoch": 8.133111480865225, "loss": 0.31199324131011963, "loss_ce": 4.2538693378446624e-05, "loss_iou": 0.1181640625, "loss_num": 0.01507568359375, "loss_xval": 0.3125, "num_input_tokens_seen": 153224468, "step": 2444 }, { "epoch": 8.136439267886855, "grad_norm": 12.632119178771973, "learning_rate": 5e-06, "loss": 0.4008, "num_input_tokens_seen": 153287228, "step": 2445 }, { "epoch": 8.136439267886855, "loss": 0.4356737732887268, "loss_ce": 4.833314051211346e-06, "loss_iou": 0.1875, "loss_num": 0.01226806640625, "loss_xval": 0.435546875, "num_input_tokens_seen": 153287228, "step": 2445 }, { "epoch": 8.139767054908486, "grad_norm": 19.96878433227539, "learning_rate": 5e-06, "loss": 0.6359, "num_input_tokens_seen": 153349768, "step": 2446 }, { "epoch": 8.139767054908486, "loss": 0.5901039242744446, "loss_ce": 1.6032543499022722e-05, "loss_iou": 0.17578125, "loss_num": 0.0478515625, "loss_xval": 0.58984375, "num_input_tokens_seen": 153349768, "step": 2446 }, { "epoch": 8.143094841930116, "grad_norm": 29.680932998657227, "learning_rate": 5e-06, "loss": 0.5605, "num_input_tokens_seen": 153413840, "step": 2447 }, { "epoch": 8.143094841930116, "loss": 0.7876870632171631, "loss_ce": 0.0005776546895503998, "loss_iou": 0.28515625, "loss_num": 0.043212890625, "loss_xval": 0.7890625, "num_input_tokens_seen": 153413840, "step": 2447 }, { "epoch": 8.146422628951747, "grad_norm": 19.14665412902832, "learning_rate": 5e-06, "loss": 0.2737, "num_input_tokens_seen": 153474836, "step": 2448 }, { "epoch": 8.146422628951747, "loss": 0.2937498688697815, "loss_ce": 4.869881013291888e-05, "loss_iou": 0.09619140625, "loss_num": 0.020263671875, "loss_xval": 0.29296875, "num_input_tokens_seen": 153474836, "step": 2448 }, { "epoch": 8.149750415973378, "grad_norm": 15.778740882873535, "learning_rate": 5e-06, "loss": 0.5655, "num_input_tokens_seen": 153537536, "step": 2449 }, { "epoch": 8.149750415973378, "loss": 0.5182590484619141, "loss_ce": 0.00019263816648162901, "loss_iou": 0.1826171875, "loss_num": 0.030517578125, "loss_xval": 0.51953125, "num_input_tokens_seen": 153537536, "step": 2449 }, { "epoch": 8.153078202995008, "grad_norm": 21.42453956604004, "learning_rate": 5e-06, "loss": 0.5633, "num_input_tokens_seen": 153599124, "step": 2450 }, { "epoch": 8.153078202995008, "loss": 0.6198490858078003, "loss_ce": 9.811091149458662e-05, "loss_iou": 0.212890625, "loss_num": 0.038818359375, "loss_xval": 0.62109375, "num_input_tokens_seen": 153599124, "step": 2450 }, { "epoch": 8.156405990016639, "grad_norm": 34.63134002685547, "learning_rate": 5e-06, "loss": 0.6157, "num_input_tokens_seen": 153661960, "step": 2451 }, { "epoch": 8.156405990016639, "loss": 0.7873554229736328, "loss_ce": 1.9057225699725677e-06, "loss_iou": 0.31640625, "loss_num": 0.0311279296875, "loss_xval": 0.7890625, "num_input_tokens_seen": 153661960, "step": 2451 }, { "epoch": 8.15973377703827, "grad_norm": 57.15771484375, "learning_rate": 5e-06, "loss": 0.6547, "num_input_tokens_seen": 153725148, "step": 2452 }, { "epoch": 8.15973377703827, "loss": 0.4613756835460663, "loss_ce": 1.0944631867459975e-05, "loss_iou": 0.125, "loss_num": 0.042236328125, "loss_xval": 0.4609375, "num_input_tokens_seen": 153725148, "step": 2452 }, { "epoch": 8.1630615640599, "grad_norm": 27.680862426757812, "learning_rate": 5e-06, "loss": 0.5433, "num_input_tokens_seen": 153787864, "step": 2453 }, { "epoch": 8.1630615640599, "loss": 0.6128358840942383, "loss_ce": 0.0006532339612022042, "loss_iou": 0.203125, "loss_num": 0.04150390625, "loss_xval": 0.61328125, "num_input_tokens_seen": 153787864, "step": 2453 }, { "epoch": 8.16638935108153, "grad_norm": 8.831384658813477, "learning_rate": 5e-06, "loss": 0.3657, "num_input_tokens_seen": 153849384, "step": 2454 }, { "epoch": 8.16638935108153, "loss": 0.2893179655075073, "loss_ce": 0.00019441285985521972, "loss_iou": 0.07421875, "loss_num": 0.0281982421875, "loss_xval": 0.2890625, "num_input_tokens_seen": 153849384, "step": 2454 }, { "epoch": 8.169717138103161, "grad_norm": 35.82368469238281, "learning_rate": 5e-06, "loss": 0.6475, "num_input_tokens_seen": 153913056, "step": 2455 }, { "epoch": 8.169717138103161, "loss": 0.4591553509235382, "loss_ce": 0.0004151106986682862, "loss_iou": 0.166015625, "loss_num": 0.025390625, "loss_xval": 0.458984375, "num_input_tokens_seen": 153913056, "step": 2455 }, { "epoch": 8.173044925124792, "grad_norm": 20.591894149780273, "learning_rate": 5e-06, "loss": 0.5232, "num_input_tokens_seen": 153976140, "step": 2456 }, { "epoch": 8.173044925124792, "loss": 0.6609939336776733, "loss_ce": 0.0016921632923185825, "loss_iou": 0.2333984375, "loss_num": 0.038330078125, "loss_xval": 0.66015625, "num_input_tokens_seen": 153976140, "step": 2456 }, { "epoch": 8.176372712146422, "grad_norm": 10.39164924621582, "learning_rate": 5e-06, "loss": 0.622, "num_input_tokens_seen": 154039004, "step": 2457 }, { "epoch": 8.176372712146422, "loss": 0.7399322986602783, "loss_ce": 0.00043033823021687567, "loss_iou": 0.27734375, "loss_num": 0.037109375, "loss_xval": 0.73828125, "num_input_tokens_seen": 154039004, "step": 2457 }, { "epoch": 8.179700499168053, "grad_norm": 8.35913372039795, "learning_rate": 5e-06, "loss": 0.4956, "num_input_tokens_seen": 154102048, "step": 2458 }, { "epoch": 8.179700499168053, "loss": 0.6295105218887329, "loss_ce": 0.00017702500917948782, "loss_iou": 0.228515625, "loss_num": 0.034423828125, "loss_xval": 0.62890625, "num_input_tokens_seen": 154102048, "step": 2458 }, { "epoch": 8.183028286189684, "grad_norm": 20.717138290405273, "learning_rate": 5e-06, "loss": 0.5084, "num_input_tokens_seen": 154165060, "step": 2459 }, { "epoch": 8.183028286189684, "loss": 0.5961953997612, "loss_ce": 3.9954029489308596e-06, "loss_iou": 0.23828125, "loss_num": 0.02392578125, "loss_xval": 0.59765625, "num_input_tokens_seen": 154165060, "step": 2459 }, { "epoch": 8.186356073211314, "grad_norm": 19.188718795776367, "learning_rate": 5e-06, "loss": 0.4807, "num_input_tokens_seen": 154227612, "step": 2460 }, { "epoch": 8.186356073211314, "loss": 0.48298630118370056, "loss_ce": 0.0001982160029001534, "loss_iou": 0.15234375, "loss_num": 0.03564453125, "loss_xval": 0.482421875, "num_input_tokens_seen": 154227612, "step": 2460 }, { "epoch": 8.189683860232945, "grad_norm": 12.763959884643555, "learning_rate": 5e-06, "loss": 0.5967, "num_input_tokens_seen": 154290924, "step": 2461 }, { "epoch": 8.189683860232945, "loss": 0.4651564061641693, "loss_ce": 0.0004347363719716668, "loss_iou": 0.1669921875, "loss_num": 0.0262451171875, "loss_xval": 0.46484375, "num_input_tokens_seen": 154290924, "step": 2461 }, { "epoch": 8.193011647254576, "grad_norm": 11.874279975891113, "learning_rate": 5e-06, "loss": 0.6729, "num_input_tokens_seen": 154354104, "step": 2462 }, { "epoch": 8.193011647254576, "loss": 0.6199962496757507, "loss_ce": 1.1306653959763935e-06, "loss_iou": 0.20703125, "loss_num": 0.041259765625, "loss_xval": 0.62109375, "num_input_tokens_seen": 154354104, "step": 2462 }, { "epoch": 8.196339434276206, "grad_norm": 15.436007499694824, "learning_rate": 5e-06, "loss": 0.5786, "num_input_tokens_seen": 154417052, "step": 2463 }, { "epoch": 8.196339434276206, "loss": 0.6340981125831604, "loss_ce": 3.906352048943518e-06, "loss_iou": 0.236328125, "loss_num": 0.0322265625, "loss_xval": 0.6328125, "num_input_tokens_seen": 154417052, "step": 2463 }, { "epoch": 8.199667221297837, "grad_norm": 18.182641983032227, "learning_rate": 5e-06, "loss": 0.4539, "num_input_tokens_seen": 154479924, "step": 2464 }, { "epoch": 8.199667221297837, "loss": 0.4256572723388672, "loss_ce": 0.0003642738447524607, "loss_iou": 0.126953125, "loss_num": 0.0341796875, "loss_xval": 0.42578125, "num_input_tokens_seen": 154479924, "step": 2464 }, { "epoch": 8.202995008319467, "grad_norm": 21.872167587280273, "learning_rate": 5e-06, "loss": 0.5356, "num_input_tokens_seen": 154543136, "step": 2465 }, { "epoch": 8.202995008319467, "loss": 0.6230975389480591, "loss_ce": 0.00029479750082828104, "loss_iou": 0.251953125, "loss_num": 0.0240478515625, "loss_xval": 0.62109375, "num_input_tokens_seen": 154543136, "step": 2465 }, { "epoch": 8.206322795341098, "grad_norm": 23.36290168762207, "learning_rate": 5e-06, "loss": 0.5504, "num_input_tokens_seen": 154606444, "step": 2466 }, { "epoch": 8.206322795341098, "loss": 0.4408794939517975, "loss_ce": 8.360129140783101e-05, "loss_iou": 0.1708984375, "loss_num": 0.02001953125, "loss_xval": 0.44140625, "num_input_tokens_seen": 154606444, "step": 2466 }, { "epoch": 8.209650582362729, "grad_norm": 12.096077919006348, "learning_rate": 5e-06, "loss": 0.3805, "num_input_tokens_seen": 154665244, "step": 2467 }, { "epoch": 8.209650582362729, "loss": 0.4396175146102905, "loss_ce": 4.2326264519942924e-05, "loss_iou": 0.1123046875, "loss_num": 0.04296875, "loss_xval": 0.439453125, "num_input_tokens_seen": 154665244, "step": 2467 }, { "epoch": 8.21297836938436, "grad_norm": 12.722036361694336, "learning_rate": 5e-06, "loss": 0.5659, "num_input_tokens_seen": 154726924, "step": 2468 }, { "epoch": 8.21297836938436, "loss": 0.6334260702133179, "loss_ce": 3.2235163871519035e-06, "loss_iou": 0.171875, "loss_num": 0.0576171875, "loss_xval": 0.6328125, "num_input_tokens_seen": 154726924, "step": 2468 }, { "epoch": 8.21630615640599, "grad_norm": 27.916044235229492, "learning_rate": 5e-06, "loss": 0.589, "num_input_tokens_seen": 154789396, "step": 2469 }, { "epoch": 8.21630615640599, "loss": 0.5427565574645996, "loss_ce": 1.4041515896678902e-06, "loss_iou": 0.177734375, "loss_num": 0.03759765625, "loss_xval": 0.54296875, "num_input_tokens_seen": 154789396, "step": 2469 }, { "epoch": 8.21963394342762, "grad_norm": 35.45465087890625, "learning_rate": 5e-06, "loss": 0.6524, "num_input_tokens_seen": 154852476, "step": 2470 }, { "epoch": 8.21963394342762, "loss": 0.5512363314628601, "loss_ce": 0.00021094981639180332, "loss_iou": 0.205078125, "loss_num": 0.0283203125, "loss_xval": 0.55078125, "num_input_tokens_seen": 154852476, "step": 2470 }, { "epoch": 8.222961730449251, "grad_norm": 6.636467933654785, "learning_rate": 5e-06, "loss": 0.3911, "num_input_tokens_seen": 154914844, "step": 2471 }, { "epoch": 8.222961730449251, "loss": 0.3936777412891388, "loss_ce": 9.63363959272101e-07, "loss_iou": 0.14453125, "loss_num": 0.0211181640625, "loss_xval": 0.39453125, "num_input_tokens_seen": 154914844, "step": 2471 }, { "epoch": 8.226289517470882, "grad_norm": 10.995253562927246, "learning_rate": 5e-06, "loss": 0.5364, "num_input_tokens_seen": 154977152, "step": 2472 }, { "epoch": 8.226289517470882, "loss": 0.5563974380493164, "loss_ce": 9.414006854058243e-07, "loss_iou": 0.1884765625, "loss_num": 0.035888671875, "loss_xval": 0.5546875, "num_input_tokens_seen": 154977152, "step": 2472 }, { "epoch": 8.229617304492512, "grad_norm": 12.64140510559082, "learning_rate": 5e-06, "loss": 0.6529, "num_input_tokens_seen": 155041040, "step": 2473 }, { "epoch": 8.229617304492512, "loss": 0.8103291988372803, "loss_ce": 2.6566573069430888e-05, "loss_iou": 0.33203125, "loss_num": 0.029296875, "loss_xval": 0.80859375, "num_input_tokens_seen": 155041040, "step": 2473 }, { "epoch": 8.232945091514143, "grad_norm": 4.252609729766846, "learning_rate": 5e-06, "loss": 0.2958, "num_input_tokens_seen": 155103064, "step": 2474 }, { "epoch": 8.232945091514143, "loss": 0.2659931778907776, "loss_ce": 1.956154619620065e-06, "loss_iou": 0.09033203125, "loss_num": 0.0172119140625, "loss_xval": 0.265625, "num_input_tokens_seen": 155103064, "step": 2474 }, { "epoch": 8.236272878535774, "grad_norm": 10.922815322875977, "learning_rate": 5e-06, "loss": 0.5138, "num_input_tokens_seen": 155165824, "step": 2475 }, { "epoch": 8.236272878535774, "loss": 0.41961944103240967, "loss_ce": 2.7222399694437627e-06, "loss_iou": 0.1591796875, "loss_num": 0.0201416015625, "loss_xval": 0.419921875, "num_input_tokens_seen": 155165824, "step": 2475 }, { "epoch": 8.239600665557404, "grad_norm": 14.53327465057373, "learning_rate": 5e-06, "loss": 0.5771, "num_input_tokens_seen": 155229108, "step": 2476 }, { "epoch": 8.239600665557404, "loss": 0.3875596523284912, "loss_ce": 0.0006577945314347744, "loss_iou": 0.1005859375, "loss_num": 0.037353515625, "loss_xval": 0.38671875, "num_input_tokens_seen": 155229108, "step": 2476 }, { "epoch": 8.242928452579035, "grad_norm": 11.884573936462402, "learning_rate": 5e-06, "loss": 0.6193, "num_input_tokens_seen": 155292348, "step": 2477 }, { "epoch": 8.242928452579035, "loss": 0.7795412540435791, "loss_ce": 0.0002443709527142346, "loss_iou": 0.3046875, "loss_num": 0.0341796875, "loss_xval": 0.78125, "num_input_tokens_seen": 155292348, "step": 2477 }, { "epoch": 8.246256239600665, "grad_norm": 16.318918228149414, "learning_rate": 5e-06, "loss": 0.5409, "num_input_tokens_seen": 155354912, "step": 2478 }, { "epoch": 8.246256239600665, "loss": 0.479867160320282, "loss_ce": 8.79096842254512e-06, "loss_iou": 0.14453125, "loss_num": 0.038330078125, "loss_xval": 0.48046875, "num_input_tokens_seen": 155354912, "step": 2478 }, { "epoch": 8.249584026622296, "grad_norm": 14.785120964050293, "learning_rate": 5e-06, "loss": 0.6726, "num_input_tokens_seen": 155417444, "step": 2479 }, { "epoch": 8.249584026622296, "loss": 0.6091422438621521, "loss_ce": 0.0003775875084102154, "loss_iou": 0.2021484375, "loss_num": 0.040771484375, "loss_xval": 0.609375, "num_input_tokens_seen": 155417444, "step": 2479 }, { "epoch": 8.252911813643927, "grad_norm": 10.74803352355957, "learning_rate": 5e-06, "loss": 0.6509, "num_input_tokens_seen": 155480860, "step": 2480 }, { "epoch": 8.252911813643927, "loss": 0.6075456738471985, "loss_ce": 1.7003012544591911e-06, "loss_iou": 0.208984375, "loss_num": 0.037841796875, "loss_xval": 0.609375, "num_input_tokens_seen": 155480860, "step": 2480 }, { "epoch": 8.256239600665557, "grad_norm": 9.276893615722656, "learning_rate": 5e-06, "loss": 0.5354, "num_input_tokens_seen": 155542984, "step": 2481 }, { "epoch": 8.256239600665557, "loss": 0.5825490951538086, "loss_ce": 2.9578255634987727e-05, "loss_iou": 0.2080078125, "loss_num": 0.033203125, "loss_xval": 0.58203125, "num_input_tokens_seen": 155542984, "step": 2481 }, { "epoch": 8.259567387687188, "grad_norm": 15.122297286987305, "learning_rate": 5e-06, "loss": 0.5749, "num_input_tokens_seen": 155607132, "step": 2482 }, { "epoch": 8.259567387687188, "loss": 0.41847485303878784, "loss_ce": 0.00020093029888812453, "loss_iou": 0.1259765625, "loss_num": 0.033203125, "loss_xval": 0.41796875, "num_input_tokens_seen": 155607132, "step": 2482 }, { "epoch": 8.262895174708818, "grad_norm": 25.431367874145508, "learning_rate": 5e-06, "loss": 0.8218, "num_input_tokens_seen": 155670820, "step": 2483 }, { "epoch": 8.262895174708818, "loss": 0.7718661427497864, "loss_ce": 1.5555149730062112e-05, "loss_iou": 0.294921875, "loss_num": 0.035888671875, "loss_xval": 0.7734375, "num_input_tokens_seen": 155670820, "step": 2483 }, { "epoch": 8.266222961730449, "grad_norm": 12.312417030334473, "learning_rate": 5e-06, "loss": 0.6586, "num_input_tokens_seen": 155733672, "step": 2484 }, { "epoch": 8.266222961730449, "loss": 0.6406716108322144, "loss_ce": 0.0015725099947303534, "loss_iou": 0.21875, "loss_num": 0.04052734375, "loss_xval": 0.640625, "num_input_tokens_seen": 155733672, "step": 2484 }, { "epoch": 8.26955074875208, "grad_norm": 6.2763190269470215, "learning_rate": 5e-06, "loss": 0.63, "num_input_tokens_seen": 155796480, "step": 2485 }, { "epoch": 8.26955074875208, "loss": 0.6902257204055786, "loss_ce": 9.670462532085367e-06, "loss_iou": 0.2265625, "loss_num": 0.047607421875, "loss_xval": 0.69140625, "num_input_tokens_seen": 155796480, "step": 2485 }, { "epoch": 8.27287853577371, "grad_norm": 14.841312408447266, "learning_rate": 5e-06, "loss": 0.446, "num_input_tokens_seen": 155858312, "step": 2486 }, { "epoch": 8.27287853577371, "loss": 0.5637843608856201, "loss_ce": 2.63432229985483e-06, "loss_iou": 0.2109375, "loss_num": 0.028564453125, "loss_xval": 0.5625, "num_input_tokens_seen": 155858312, "step": 2486 }, { "epoch": 8.27620632279534, "grad_norm": 10.321304321289062, "learning_rate": 5e-06, "loss": 0.4844, "num_input_tokens_seen": 155921480, "step": 2487 }, { "epoch": 8.27620632279534, "loss": 0.3402569591999054, "loss_ce": 0.0004742431337945163, "loss_iou": 0.109375, "loss_num": 0.0242919921875, "loss_xval": 0.33984375, "num_input_tokens_seen": 155921480, "step": 2487 }, { "epoch": 8.279534109816971, "grad_norm": 6.322237968444824, "learning_rate": 5e-06, "loss": 0.3087, "num_input_tokens_seen": 155981980, "step": 2488 }, { "epoch": 8.279534109816971, "loss": 0.38029998540878296, "loss_ce": 2.047650377789978e-05, "loss_iou": 0.1513671875, "loss_num": 0.0157470703125, "loss_xval": 0.380859375, "num_input_tokens_seen": 155981980, "step": 2488 }, { "epoch": 8.282861896838602, "grad_norm": 7.140236854553223, "learning_rate": 5e-06, "loss": 0.3548, "num_input_tokens_seen": 156042284, "step": 2489 }, { "epoch": 8.282861896838602, "loss": 0.38679540157318115, "loss_ce": 3.619849451297341e-07, "loss_iou": 0.0888671875, "loss_num": 0.041748046875, "loss_xval": 0.38671875, "num_input_tokens_seen": 156042284, "step": 2489 }, { "epoch": 8.286189683860233, "grad_norm": 10.104727745056152, "learning_rate": 5e-06, "loss": 0.6041, "num_input_tokens_seen": 156104740, "step": 2490 }, { "epoch": 8.286189683860233, "loss": 0.5026873350143433, "loss_ce": 1.7432555523555493e-06, "loss_iou": 0.1787109375, "loss_num": 0.0291748046875, "loss_xval": 0.50390625, "num_input_tokens_seen": 156104740, "step": 2490 }, { "epoch": 8.289517470881863, "grad_norm": 6.5530104637146, "learning_rate": 5e-06, "loss": 0.5038, "num_input_tokens_seen": 156166868, "step": 2491 }, { "epoch": 8.289517470881863, "loss": 0.2386511266231537, "loss_ce": 3.6533210732159205e-06, "loss_iou": 0.07666015625, "loss_num": 0.01708984375, "loss_xval": 0.23828125, "num_input_tokens_seen": 156166868, "step": 2491 }, { "epoch": 8.292845257903494, "grad_norm": 12.56015396118164, "learning_rate": 5e-06, "loss": 0.5523, "num_input_tokens_seen": 156230304, "step": 2492 }, { "epoch": 8.292845257903494, "loss": 0.38745206594467163, "loss_ce": 8.996341307465627e-07, "loss_iou": 0.16015625, "loss_num": 0.01336669921875, "loss_xval": 0.38671875, "num_input_tokens_seen": 156230304, "step": 2492 }, { "epoch": 8.296173044925125, "grad_norm": 20.144866943359375, "learning_rate": 5e-06, "loss": 0.5688, "num_input_tokens_seen": 156294008, "step": 2493 }, { "epoch": 8.296173044925125, "loss": 0.7101075649261475, "loss_ce": 0.0007569859153591096, "loss_iou": 0.2373046875, "loss_num": 0.046875, "loss_xval": 0.7109375, "num_input_tokens_seen": 156294008, "step": 2493 }, { "epoch": 8.299500831946755, "grad_norm": 13.705316543579102, "learning_rate": 5e-06, "loss": 0.8762, "num_input_tokens_seen": 156357604, "step": 2494 }, { "epoch": 8.299500831946755, "loss": 0.9153454899787903, "loss_ce": 0.00030645259539596736, "loss_iou": 0.302734375, "loss_num": 0.061767578125, "loss_xval": 0.9140625, "num_input_tokens_seen": 156357604, "step": 2494 }, { "epoch": 8.302828618968386, "grad_norm": 6.725883483886719, "learning_rate": 5e-06, "loss": 0.6093, "num_input_tokens_seen": 156420776, "step": 2495 }, { "epoch": 8.302828618968386, "loss": 0.6404510736465454, "loss_ce": 9.190831406158395e-06, "loss_iou": 0.24609375, "loss_num": 0.0296630859375, "loss_xval": 0.640625, "num_input_tokens_seen": 156420776, "step": 2495 }, { "epoch": 8.306156405990016, "grad_norm": 14.249688148498535, "learning_rate": 5e-06, "loss": 0.509, "num_input_tokens_seen": 156483912, "step": 2496 }, { "epoch": 8.306156405990016, "loss": 0.5704067945480347, "loss_ce": 3.3265965612372383e-05, "loss_iou": 0.2001953125, "loss_num": 0.033935546875, "loss_xval": 0.5703125, "num_input_tokens_seen": 156483912, "step": 2496 }, { "epoch": 8.309484193011647, "grad_norm": 34.63666534423828, "learning_rate": 5e-06, "loss": 0.379, "num_input_tokens_seen": 156546880, "step": 2497 }, { "epoch": 8.309484193011647, "loss": 0.35534900426864624, "loss_ce": 2.314334551556385e-06, "loss_iou": 0.10595703125, "loss_num": 0.0286865234375, "loss_xval": 0.35546875, "num_input_tokens_seen": 156546880, "step": 2497 }, { "epoch": 8.312811980033278, "grad_norm": 7.164108753204346, "learning_rate": 5e-06, "loss": 0.4524, "num_input_tokens_seen": 156610144, "step": 2498 }, { "epoch": 8.312811980033278, "loss": 0.48853427171707153, "loss_ce": 8.897723091649823e-06, "loss_iou": 0.1806640625, "loss_num": 0.025634765625, "loss_xval": 0.48828125, "num_input_tokens_seen": 156610144, "step": 2498 }, { "epoch": 8.316139767054908, "grad_norm": 22.78814697265625, "learning_rate": 5e-06, "loss": 0.4521, "num_input_tokens_seen": 156672712, "step": 2499 }, { "epoch": 8.316139767054908, "loss": 0.37561333179473877, "loss_ce": 2.981432089654845e-06, "loss_iou": 0.154296875, "loss_num": 0.01348876953125, "loss_xval": 0.375, "num_input_tokens_seen": 156672712, "step": 2499 }, { "epoch": 8.319467554076539, "grad_norm": 40.50959777832031, "learning_rate": 5e-06, "loss": 0.6141, "num_input_tokens_seen": 156735632, "step": 2500 }, { "epoch": 8.319467554076539, "eval_seeclick_CIoU": 0.0621575228869915, "eval_seeclick_GIoU": 0.0670594647526741, "eval_seeclick_IoU": 0.17269698530435562, "eval_seeclick_MAE_all": 0.16260817646980286, "eval_seeclick_MAE_h": 0.0522994976490736, "eval_seeclick_MAE_w": 0.12316643074154854, "eval_seeclick_MAE_x_boxes": 0.2016635462641716, "eval_seeclick_MAE_y_boxes": 0.17584563046693802, "eval_seeclick_NUM_probability": 0.9999386966228485, "eval_seeclick_inside_bbox": 0.24270834028720856, "eval_seeclick_loss": 2.8373279571533203, "eval_seeclick_loss_ce": 0.15481698513031006, "eval_seeclick_loss_iou": 0.9365234375, "eval_seeclick_loss_num": 0.16153717041015625, "eval_seeclick_loss_xval": 2.68017578125, "eval_seeclick_runtime": 61.9237, "eval_seeclick_samples_per_second": 0.759, "eval_seeclick_steps_per_second": 0.032, "num_input_tokens_seen": 156735632, "step": 2500 }, { "epoch": 8.319467554076539, "eval_icons_CIoU": -0.05338548123836517, "eval_icons_GIoU": 0.03951136860996485, "eval_icons_IoU": 0.12723936140537262, "eval_icons_MAE_all": 0.1708369180560112, "eval_icons_MAE_h": 0.12086945027112961, "eval_icons_MAE_w": 0.21488387882709503, "eval_icons_MAE_x_boxes": 0.12955006211996078, "eval_icons_MAE_y_boxes": 0.06595474667847157, "eval_icons_NUM_probability": 0.9999790191650391, "eval_icons_inside_bbox": 0.2916666716337204, "eval_icons_loss": 2.7506277561187744, "eval_icons_loss_ce": 2.827531147886475e-06, "eval_icons_loss_iou": 0.959228515625, "eval_icons_loss_num": 0.17169189453125, "eval_icons_loss_xval": 2.77685546875, "eval_icons_runtime": 64.4101, "eval_icons_samples_per_second": 0.776, "eval_icons_steps_per_second": 0.031, "num_input_tokens_seen": 156735632, "step": 2500 }, { "epoch": 8.319467554076539, "eval_screenspot_CIoU": 0.16885255525509515, "eval_screenspot_GIoU": 0.20235247413317362, "eval_screenspot_IoU": 0.2874511629343033, "eval_screenspot_MAE_all": 0.12440182268619537, "eval_screenspot_MAE_h": 0.06770160421729088, "eval_screenspot_MAE_w": 0.10852090145150821, "eval_screenspot_MAE_x_boxes": 0.16760768989721933, "eval_screenspot_MAE_y_boxes": 0.0879176730910937, "eval_screenspot_NUM_probability": 0.9999856154123942, "eval_screenspot_inside_bbox": 0.49458332856496173, "eval_screenspot_loss": 2.269505500793457, "eval_screenspot_loss_ce": 1.7213376850122586e-05, "eval_screenspot_loss_iou": 0.8216145833333334, "eval_screenspot_loss_num": 0.138092041015625, "eval_screenspot_loss_xval": 2.3343098958333335, "eval_screenspot_runtime": 128.1904, "eval_screenspot_samples_per_second": 0.694, "eval_screenspot_steps_per_second": 0.023, "num_input_tokens_seen": 156735632, "step": 2500 }, { "epoch": 8.319467554076539, "eval_compot_CIoU": 0.02278616465628147, "eval_compot_GIoU": 0.09313322231173515, "eval_compot_IoU": 0.18145453929901123, "eval_compot_MAE_all": 0.17492558062076569, "eval_compot_MAE_h": 0.08428217843174934, "eval_compot_MAE_w": 0.1881752908229828, "eval_compot_MAE_x_boxes": 0.14925488084554672, "eval_compot_MAE_y_boxes": 0.1275140568614006, "eval_compot_NUM_probability": 0.9999840259552002, "eval_compot_inside_bbox": 0.3541666716337204, "eval_compot_loss": 2.687747001647949, "eval_compot_loss_ce": 0.0038672068621963263, "eval_compot_loss_iou": 0.91552734375, "eval_compot_loss_num": 0.19329452514648438, "eval_compot_loss_xval": 2.79931640625, "eval_compot_runtime": 74.029, "eval_compot_samples_per_second": 0.675, "eval_compot_steps_per_second": 0.027, "num_input_tokens_seen": 156735632, "step": 2500 }, { "epoch": 8.319467554076539, "eval_custom_ui_MAE_all": 0.06784218549728394, "eval_custom_ui_MAE_x": 0.07681642472743988, "eval_custom_ui_MAE_y": 0.05886794440448284, "eval_custom_ui_NUM_probability": 0.999996542930603, "eval_custom_ui_loss": 0.30898517370224, "eval_custom_ui_loss_ce": 8.088518370641395e-07, "eval_custom_ui_loss_num": 0.0627288818359375, "eval_custom_ui_loss_xval": 0.31353759765625, "eval_custom_ui_runtime": 52.1041, "eval_custom_ui_samples_per_second": 0.96, "eval_custom_ui_steps_per_second": 0.038, "num_input_tokens_seen": 156735632, "step": 2500 }, { "epoch": 8.319467554076539, "loss": 0.33825749158859253, "loss_ce": 6.661663292106823e-07, "loss_iou": 0.0, "loss_num": 0.06787109375, "loss_xval": 0.337890625, "num_input_tokens_seen": 156735632, "step": 2500 }, { "epoch": 8.32279534109817, "grad_norm": 18.61150360107422, "learning_rate": 5e-06, "loss": 0.5538, "num_input_tokens_seen": 156798260, "step": 2501 }, { "epoch": 8.32279534109817, "loss": 0.5912804007530212, "loss_ce": 2.3564516595797613e-06, "loss_iou": 0.224609375, "loss_num": 0.0283203125, "loss_xval": 0.58984375, "num_input_tokens_seen": 156798260, "step": 2501 }, { "epoch": 8.3261231281198, "grad_norm": 32.894187927246094, "learning_rate": 5e-06, "loss": 0.504, "num_input_tokens_seen": 156861952, "step": 2502 }, { "epoch": 8.3261231281198, "loss": 0.5520709753036499, "loss_ce": 7.96058611740591e-06, "loss_iou": 0.216796875, "loss_num": 0.0234375, "loss_xval": 0.55078125, "num_input_tokens_seen": 156861952, "step": 2502 }, { "epoch": 8.32945091514143, "grad_norm": 13.269022941589355, "learning_rate": 5e-06, "loss": 0.5789, "num_input_tokens_seen": 156925340, "step": 2503 }, { "epoch": 8.32945091514143, "loss": 0.5295536518096924, "loss_ce": 1.2644528396776877e-05, "loss_iou": 0.2236328125, "loss_num": 0.016357421875, "loss_xval": 0.53125, "num_input_tokens_seen": 156925340, "step": 2503 }, { "epoch": 8.332778702163061, "grad_norm": 8.135456085205078, "learning_rate": 5e-06, "loss": 0.4552, "num_input_tokens_seen": 156987656, "step": 2504 }, { "epoch": 8.332778702163061, "loss": 0.4360538125038147, "loss_ce": 1.8656459360499866e-05, "loss_iou": 0.1484375, "loss_num": 0.0279541015625, "loss_xval": 0.435546875, "num_input_tokens_seen": 156987656, "step": 2504 }, { "epoch": 8.336106489184692, "grad_norm": 24.425687789916992, "learning_rate": 5e-06, "loss": 0.6434, "num_input_tokens_seen": 157050064, "step": 2505 }, { "epoch": 8.336106489184692, "loss": 0.4534982442855835, "loss_ce": 7.046838618407492e-06, "loss_iou": 0.1826171875, "loss_num": 0.017822265625, "loss_xval": 0.453125, "num_input_tokens_seen": 157050064, "step": 2505 }, { "epoch": 8.339434276206322, "grad_norm": 26.16646957397461, "learning_rate": 5e-06, "loss": 0.6365, "num_input_tokens_seen": 157113028, "step": 2506 }, { "epoch": 8.339434276206322, "loss": 0.950733482837677, "loss_ce": 0.00023298736778087914, "loss_iou": 0.38671875, "loss_num": 0.035400390625, "loss_xval": 0.94921875, "num_input_tokens_seen": 157113028, "step": 2506 }, { "epoch": 8.342762063227953, "grad_norm": 15.704716682434082, "learning_rate": 5e-06, "loss": 0.4665, "num_input_tokens_seen": 157174272, "step": 2507 }, { "epoch": 8.342762063227953, "loss": 0.5097678899765015, "loss_ce": 2.2550557332579046e-06, "loss_iou": 0.193359375, "loss_num": 0.0245361328125, "loss_xval": 0.5078125, "num_input_tokens_seen": 157174272, "step": 2507 }, { "epoch": 8.346089850249584, "grad_norm": 15.796403884887695, "learning_rate": 5e-06, "loss": 0.6218, "num_input_tokens_seen": 157237700, "step": 2508 }, { "epoch": 8.346089850249584, "loss": 0.8442539572715759, "loss_ce": 1.572317887621466e-05, "loss_iou": 0.3203125, "loss_num": 0.04052734375, "loss_xval": 0.84375, "num_input_tokens_seen": 157237700, "step": 2508 }, { "epoch": 8.349417637271214, "grad_norm": 13.575068473815918, "learning_rate": 5e-06, "loss": 0.5411, "num_input_tokens_seen": 157300228, "step": 2509 }, { "epoch": 8.349417637271214, "loss": 0.43475377559661865, "loss_ce": 0.00018348608864471316, "loss_iou": 0.1181640625, "loss_num": 0.039794921875, "loss_xval": 0.43359375, "num_input_tokens_seen": 157300228, "step": 2509 }, { "epoch": 8.352745424292845, "grad_norm": 11.119095802307129, "learning_rate": 5e-06, "loss": 0.5658, "num_input_tokens_seen": 157363912, "step": 2510 }, { "epoch": 8.352745424292845, "loss": 0.5700771808624268, "loss_ce": 8.799183888186235e-06, "loss_iou": 0.251953125, "loss_num": 0.01287841796875, "loss_xval": 0.5703125, "num_input_tokens_seen": 157363912, "step": 2510 }, { "epoch": 8.356073211314476, "grad_norm": 12.673629760742188, "learning_rate": 5e-06, "loss": 0.6139, "num_input_tokens_seen": 157425980, "step": 2511 }, { "epoch": 8.356073211314476, "loss": 0.5944139361381531, "loss_ce": 5.3611478506354615e-05, "loss_iou": 0.19921875, "loss_num": 0.039306640625, "loss_xval": 0.59375, "num_input_tokens_seen": 157425980, "step": 2511 }, { "epoch": 8.359400998336106, "grad_norm": 7.757660865783691, "learning_rate": 5e-06, "loss": 0.5401, "num_input_tokens_seen": 157489864, "step": 2512 }, { "epoch": 8.359400998336106, "loss": 0.6958622932434082, "loss_ce": 6.149343971628696e-05, "loss_iou": 0.275390625, "loss_num": 0.0289306640625, "loss_xval": 0.6953125, "num_input_tokens_seen": 157489864, "step": 2512 }, { "epoch": 8.362728785357737, "grad_norm": 15.645263671875, "learning_rate": 5e-06, "loss": 0.5724, "num_input_tokens_seen": 157553216, "step": 2513 }, { "epoch": 8.362728785357737, "loss": 0.4384782910346985, "loss_ce": 1.704424335002841e-06, "loss_iou": 0.1591796875, "loss_num": 0.0238037109375, "loss_xval": 0.4375, "num_input_tokens_seen": 157553216, "step": 2513 }, { "epoch": 8.366056572379367, "grad_norm": 21.691728591918945, "learning_rate": 5e-06, "loss": 0.3959, "num_input_tokens_seen": 157616552, "step": 2514 }, { "epoch": 8.366056572379367, "loss": 0.38067665696144104, "loss_ce": 0.0005496871890500188, "loss_iou": 0.1357421875, "loss_num": 0.0216064453125, "loss_xval": 0.380859375, "num_input_tokens_seen": 157616552, "step": 2514 }, { "epoch": 8.369384359400998, "grad_norm": 8.640152931213379, "learning_rate": 5e-06, "loss": 0.4099, "num_input_tokens_seen": 157678940, "step": 2515 }, { "epoch": 8.369384359400998, "loss": 0.352906197309494, "loss_ce": 9.26579843962827e-07, "loss_iou": 0.1455078125, "loss_num": 0.01226806640625, "loss_xval": 0.353515625, "num_input_tokens_seen": 157678940, "step": 2515 }, { "epoch": 8.372712146422629, "grad_norm": 8.657814025878906, "learning_rate": 5e-06, "loss": 0.5511, "num_input_tokens_seen": 157741696, "step": 2516 }, { "epoch": 8.372712146422629, "loss": 0.5018146634101868, "loss_ce": 0.00010568110155873, "loss_iou": 0.1708984375, "loss_num": 0.031982421875, "loss_xval": 0.5, "num_input_tokens_seen": 157741696, "step": 2516 }, { "epoch": 8.37603993344426, "grad_norm": 14.109491348266602, "learning_rate": 5e-06, "loss": 0.6139, "num_input_tokens_seen": 157804344, "step": 2517 }, { "epoch": 8.37603993344426, "loss": 0.6678574085235596, "loss_ce": 0.00025487542734481394, "loss_iou": 0.2265625, "loss_num": 0.042724609375, "loss_xval": 0.66796875, "num_input_tokens_seen": 157804344, "step": 2517 }, { "epoch": 8.37936772046589, "grad_norm": 24.283531188964844, "learning_rate": 5e-06, "loss": 0.6752, "num_input_tokens_seen": 157867404, "step": 2518 }, { "epoch": 8.37936772046589, "loss": 0.8516241312026978, "loss_ce": 6.157417374197394e-05, "loss_iou": 0.349609375, "loss_num": 0.0303955078125, "loss_xval": 0.8515625, "num_input_tokens_seen": 157867404, "step": 2518 }, { "epoch": 8.38269550748752, "grad_norm": 22.426895141601562, "learning_rate": 5e-06, "loss": 0.6696, "num_input_tokens_seen": 157928728, "step": 2519 }, { "epoch": 8.38269550748752, "loss": 0.755038857460022, "loss_ce": 0.0005222304025664926, "loss_iou": 0.296875, "loss_num": 0.0322265625, "loss_xval": 0.75390625, "num_input_tokens_seen": 157928728, "step": 2519 }, { "epoch": 8.386023294509151, "grad_norm": 12.39080810546875, "learning_rate": 5e-06, "loss": 0.6586, "num_input_tokens_seen": 157992160, "step": 2520 }, { "epoch": 8.386023294509151, "loss": 0.5690947771072388, "loss_ce": 2.9503210043912986e-06, "loss_iou": 0.189453125, "loss_num": 0.0380859375, "loss_xval": 0.5703125, "num_input_tokens_seen": 157992160, "step": 2520 }, { "epoch": 8.389351081530782, "grad_norm": 12.792807579040527, "learning_rate": 5e-06, "loss": 0.4425, "num_input_tokens_seen": 158055504, "step": 2521 }, { "epoch": 8.389351081530782, "loss": 0.5219136476516724, "loss_ce": 2.029714323725784e-06, "loss_iou": 0.181640625, "loss_num": 0.031982421875, "loss_xval": 0.5234375, "num_input_tokens_seen": 158055504, "step": 2521 }, { "epoch": 8.392678868552412, "grad_norm": 19.38718032836914, "learning_rate": 5e-06, "loss": 0.5534, "num_input_tokens_seen": 158117144, "step": 2522 }, { "epoch": 8.392678868552412, "loss": 0.614152193069458, "loss_ce": 1.6467489331262186e-05, "loss_iou": 0.263671875, "loss_num": 0.017578125, "loss_xval": 0.61328125, "num_input_tokens_seen": 158117144, "step": 2522 }, { "epoch": 8.396006655574043, "grad_norm": 25.542036056518555, "learning_rate": 5e-06, "loss": 0.6625, "num_input_tokens_seen": 158179744, "step": 2523 }, { "epoch": 8.396006655574043, "loss": 0.5997140407562256, "loss_ce": 0.0005929746548645198, "loss_iou": 0.23046875, "loss_num": 0.0274658203125, "loss_xval": 0.59765625, "num_input_tokens_seen": 158179744, "step": 2523 }, { "epoch": 8.399334442595674, "grad_norm": 17.39588165283203, "learning_rate": 5e-06, "loss": 0.4661, "num_input_tokens_seen": 158242528, "step": 2524 }, { "epoch": 8.399334442595674, "loss": 0.46451112627983093, "loss_ce": 0.0001556577772134915, "loss_iou": 0.15625, "loss_num": 0.0303955078125, "loss_xval": 0.46484375, "num_input_tokens_seen": 158242528, "step": 2524 }, { "epoch": 8.402662229617304, "grad_norm": 8.051742553710938, "learning_rate": 5e-06, "loss": 0.4679, "num_input_tokens_seen": 158304920, "step": 2525 }, { "epoch": 8.402662229617304, "loss": 0.4890361428260803, "loss_ce": 2.2462723791250028e-05, "loss_iou": 0.1748046875, "loss_num": 0.02783203125, "loss_xval": 0.48828125, "num_input_tokens_seen": 158304920, "step": 2525 }, { "epoch": 8.405990016638935, "grad_norm": 9.084053039550781, "learning_rate": 5e-06, "loss": 0.5462, "num_input_tokens_seen": 158366936, "step": 2526 }, { "epoch": 8.405990016638935, "loss": 0.2924574911594391, "loss_ce": 0.000404273479944095, "loss_iou": 0.08642578125, "loss_num": 0.0238037109375, "loss_xval": 0.29296875, "num_input_tokens_seen": 158366936, "step": 2526 }, { "epoch": 8.409317803660565, "grad_norm": 10.446352005004883, "learning_rate": 5e-06, "loss": 0.6599, "num_input_tokens_seen": 158428652, "step": 2527 }, { "epoch": 8.409317803660565, "loss": 0.5303962826728821, "loss_ce": 7.651402142982988e-07, "loss_iou": 0.1962890625, "loss_num": 0.02783203125, "loss_xval": 0.53125, "num_input_tokens_seen": 158428652, "step": 2527 }, { "epoch": 8.412645590682196, "grad_norm": 43.01057052612305, "learning_rate": 5e-06, "loss": 0.5498, "num_input_tokens_seen": 158490372, "step": 2528 }, { "epoch": 8.412645590682196, "loss": 0.4998488426208496, "loss_ce": 0.0007033411529846489, "loss_iou": 0.17578125, "loss_num": 0.02978515625, "loss_xval": 0.5, "num_input_tokens_seen": 158490372, "step": 2528 }, { "epoch": 8.415973377703827, "grad_norm": 13.538323402404785, "learning_rate": 5e-06, "loss": 0.7307, "num_input_tokens_seen": 158554140, "step": 2529 }, { "epoch": 8.415973377703827, "loss": 0.6824997663497925, "loss_ce": 4.620101208274718e-06, "loss_iou": 0.263671875, "loss_num": 0.0311279296875, "loss_xval": 0.68359375, "num_input_tokens_seen": 158554140, "step": 2529 }, { "epoch": 8.419301164725457, "grad_norm": 7.717212677001953, "learning_rate": 5e-06, "loss": 0.4052, "num_input_tokens_seen": 158615192, "step": 2530 }, { "epoch": 8.419301164725457, "loss": 0.29168790578842163, "loss_ce": 9.063656420948973e-07, "loss_iou": 0.09130859375, "loss_num": 0.02197265625, "loss_xval": 0.291015625, "num_input_tokens_seen": 158615192, "step": 2530 }, { "epoch": 8.422628951747088, "grad_norm": 15.609619140625, "learning_rate": 5e-06, "loss": 0.4591, "num_input_tokens_seen": 158677224, "step": 2531 }, { "epoch": 8.422628951747088, "loss": 0.4130549430847168, "loss_ce": 0.0005793655873276293, "loss_iou": 0.1328125, "loss_num": 0.0294189453125, "loss_xval": 0.412109375, "num_input_tokens_seen": 158677224, "step": 2531 }, { "epoch": 8.425956738768718, "grad_norm": 35.589500427246094, "learning_rate": 5e-06, "loss": 0.5909, "num_input_tokens_seen": 158740752, "step": 2532 }, { "epoch": 8.425956738768718, "loss": 0.5754365921020508, "loss_ce": 0.0019502720097079873, "loss_iou": 0.1845703125, "loss_num": 0.041015625, "loss_xval": 0.57421875, "num_input_tokens_seen": 158740752, "step": 2532 }, { "epoch": 8.429284525790349, "grad_norm": 21.833093643188477, "learning_rate": 5e-06, "loss": 0.3925, "num_input_tokens_seen": 158802780, "step": 2533 }, { "epoch": 8.429284525790349, "loss": 0.5529793500900269, "loss_ce": 8.059317906372598e-07, "loss_iou": 0.1572265625, "loss_num": 0.047607421875, "loss_xval": 0.5546875, "num_input_tokens_seen": 158802780, "step": 2533 }, { "epoch": 8.43261231281198, "grad_norm": 8.355440139770508, "learning_rate": 5e-06, "loss": 0.3451, "num_input_tokens_seen": 158865624, "step": 2534 }, { "epoch": 8.43261231281198, "loss": 0.3248395621776581, "loss_ce": 1.0465357263456099e-05, "loss_iou": 0.115234375, "loss_num": 0.018798828125, "loss_xval": 0.32421875, "num_input_tokens_seen": 158865624, "step": 2534 }, { "epoch": 8.43594009983361, "grad_norm": 15.777928352355957, "learning_rate": 5e-06, "loss": 0.5581, "num_input_tokens_seen": 158928612, "step": 2535 }, { "epoch": 8.43594009983361, "loss": 0.6276968121528625, "loss_ce": 1.1282990271865856e-05, "loss_iou": 0.23828125, "loss_num": 0.030029296875, "loss_xval": 0.62890625, "num_input_tokens_seen": 158928612, "step": 2535 }, { "epoch": 8.43926788685524, "grad_norm": 13.055331230163574, "learning_rate": 5e-06, "loss": 0.4884, "num_input_tokens_seen": 158988924, "step": 2536 }, { "epoch": 8.43926788685524, "loss": 0.5241285562515259, "loss_ce": 1.9643723135231994e-05, "loss_iou": 0.19140625, "loss_num": 0.0281982421875, "loss_xval": 0.5234375, "num_input_tokens_seen": 158988924, "step": 2536 }, { "epoch": 8.442595673876871, "grad_norm": 18.49947166442871, "learning_rate": 5e-06, "loss": 0.4483, "num_input_tokens_seen": 159050652, "step": 2537 }, { "epoch": 8.442595673876871, "loss": 0.5077347755432129, "loss_ce": 1.3810964446747676e-05, "loss_iou": 0.2060546875, "loss_num": 0.0189208984375, "loss_xval": 0.5078125, "num_input_tokens_seen": 159050652, "step": 2537 }, { "epoch": 8.445923460898502, "grad_norm": 38.57990264892578, "learning_rate": 5e-06, "loss": 0.813, "num_input_tokens_seen": 159113712, "step": 2538 }, { "epoch": 8.445923460898502, "loss": 0.690744161605835, "loss_ce": 0.00031445518834516406, "loss_iou": 0.25, "loss_num": 0.037841796875, "loss_xval": 0.69140625, "num_input_tokens_seen": 159113712, "step": 2538 }, { "epoch": 8.449251247920133, "grad_norm": 26.82927131652832, "learning_rate": 5e-06, "loss": 0.4015, "num_input_tokens_seen": 159176488, "step": 2539 }, { "epoch": 8.449251247920133, "loss": 0.4396427571773529, "loss_ce": 6.538172783621121e-06, "loss_iou": 0.1611328125, "loss_num": 0.0234375, "loss_xval": 0.439453125, "num_input_tokens_seen": 159176488, "step": 2539 }, { "epoch": 8.452579034941763, "grad_norm": 12.029715538024902, "learning_rate": 5e-06, "loss": 0.3319, "num_input_tokens_seen": 159238896, "step": 2540 }, { "epoch": 8.452579034941763, "loss": 0.2290416955947876, "loss_ce": 3.7790585338370875e-05, "loss_iou": 0.047607421875, "loss_num": 0.0267333984375, "loss_xval": 0.228515625, "num_input_tokens_seen": 159238896, "step": 2540 }, { "epoch": 8.455906821963394, "grad_norm": 12.14916706085205, "learning_rate": 5e-06, "loss": 0.525, "num_input_tokens_seen": 159301688, "step": 2541 }, { "epoch": 8.455906821963394, "loss": 0.5366224646568298, "loss_ce": 0.0002455082430969924, "loss_iou": 0.21484375, "loss_num": 0.021484375, "loss_xval": 0.53515625, "num_input_tokens_seen": 159301688, "step": 2541 }, { "epoch": 8.459234608985025, "grad_norm": 27.401453018188477, "learning_rate": 5e-06, "loss": 0.7655, "num_input_tokens_seen": 159366428, "step": 2542 }, { "epoch": 8.459234608985025, "loss": 0.7718775272369385, "loss_ce": 0.0005152634694240987, "loss_iou": 0.310546875, "loss_num": 0.0301513671875, "loss_xval": 0.76953125, "num_input_tokens_seen": 159366428, "step": 2542 }, { "epoch": 8.462562396006655, "grad_norm": 32.893585205078125, "learning_rate": 5e-06, "loss": 0.5718, "num_input_tokens_seen": 159429564, "step": 2543 }, { "epoch": 8.462562396006655, "loss": 0.5463824272155762, "loss_ce": 0.0006060699815861881, "loss_iou": 0.2119140625, "loss_num": 0.0242919921875, "loss_xval": 0.546875, "num_input_tokens_seen": 159429564, "step": 2543 }, { "epoch": 8.465890183028286, "grad_norm": 37.4318962097168, "learning_rate": 5e-06, "loss": 0.6064, "num_input_tokens_seen": 159492088, "step": 2544 }, { "epoch": 8.465890183028286, "loss": 0.46649304032325745, "loss_ce": 1.3392184428084875e-06, "loss_iou": 0.1650390625, "loss_num": 0.0272216796875, "loss_xval": 0.466796875, "num_input_tokens_seen": 159492088, "step": 2544 }, { "epoch": 8.469217970049916, "grad_norm": 23.849897384643555, "learning_rate": 5e-06, "loss": 0.5951, "num_input_tokens_seen": 159554828, "step": 2545 }, { "epoch": 8.469217970049916, "loss": 0.6194241046905518, "loss_ce": 0.002358713187277317, "loss_iou": 0.228515625, "loss_num": 0.032470703125, "loss_xval": 0.6171875, "num_input_tokens_seen": 159554828, "step": 2545 }, { "epoch": 8.472545757071547, "grad_norm": 13.373376846313477, "learning_rate": 5e-06, "loss": 0.5236, "num_input_tokens_seen": 159617792, "step": 2546 }, { "epoch": 8.472545757071547, "loss": 0.4137875437736511, "loss_ce": 0.000701591488905251, "loss_iou": 0.142578125, "loss_num": 0.0255126953125, "loss_xval": 0.4140625, "num_input_tokens_seen": 159617792, "step": 2546 }, { "epoch": 8.475873544093178, "grad_norm": 17.73874282836914, "learning_rate": 5e-06, "loss": 0.4204, "num_input_tokens_seen": 159681160, "step": 2547 }, { "epoch": 8.475873544093178, "loss": 0.3553406000137329, "loss_ce": 0.0004822305927518755, "loss_iou": 0.1396484375, "loss_num": 0.01513671875, "loss_xval": 0.35546875, "num_input_tokens_seen": 159681160, "step": 2547 }, { "epoch": 8.479201331114808, "grad_norm": 12.72988510131836, "learning_rate": 5e-06, "loss": 0.4259, "num_input_tokens_seen": 159743336, "step": 2548 }, { "epoch": 8.479201331114808, "loss": 0.29593223333358765, "loss_ce": 0.00021690991707146168, "loss_iou": 0.060302734375, "loss_num": 0.034912109375, "loss_xval": 0.294921875, "num_input_tokens_seen": 159743336, "step": 2548 }, { "epoch": 8.482529118136439, "grad_norm": 18.69135284423828, "learning_rate": 5e-06, "loss": 0.5856, "num_input_tokens_seen": 159805232, "step": 2549 }, { "epoch": 8.482529118136439, "loss": 0.4781320095062256, "loss_ce": 0.00016569950093980879, "loss_iou": 0.189453125, "loss_num": 0.0198974609375, "loss_xval": 0.478515625, "num_input_tokens_seen": 159805232, "step": 2549 }, { "epoch": 8.48585690515807, "grad_norm": 31.58562660217285, "learning_rate": 5e-06, "loss": 0.5295, "num_input_tokens_seen": 159868320, "step": 2550 }, { "epoch": 8.48585690515807, "loss": 0.5475597977638245, "loss_ce": 0.00044062710367143154, "loss_iou": 0.1826171875, "loss_num": 0.036376953125, "loss_xval": 0.546875, "num_input_tokens_seen": 159868320, "step": 2550 }, { "epoch": 8.4891846921797, "grad_norm": 28.00341033935547, "learning_rate": 5e-06, "loss": 0.5901, "num_input_tokens_seen": 159930432, "step": 2551 }, { "epoch": 8.4891846921797, "loss": 0.5617691874504089, "loss_ce": 1.61368234330439e-06, "loss_iou": 0.212890625, "loss_num": 0.0269775390625, "loss_xval": 0.5625, "num_input_tokens_seen": 159930432, "step": 2551 }, { "epoch": 8.49251247920133, "grad_norm": 17.852930068969727, "learning_rate": 5e-06, "loss": 0.5136, "num_input_tokens_seen": 159992708, "step": 2552 }, { "epoch": 8.49251247920133, "loss": 0.572786808013916, "loss_ce": 2.3867264644650277e-06, "loss_iou": 0.21484375, "loss_num": 0.0286865234375, "loss_xval": 0.57421875, "num_input_tokens_seen": 159992708, "step": 2552 }, { "epoch": 8.495840266222961, "grad_norm": 16.18577766418457, "learning_rate": 5e-06, "loss": 0.4059, "num_input_tokens_seen": 160054488, "step": 2553 }, { "epoch": 8.495840266222961, "loss": 0.2463880479335785, "loss_ce": 0.0001722233573673293, "loss_iou": 0.051025390625, "loss_num": 0.02880859375, "loss_xval": 0.24609375, "num_input_tokens_seen": 160054488, "step": 2553 }, { "epoch": 8.499168053244592, "grad_norm": 16.345096588134766, "learning_rate": 5e-06, "loss": 0.496, "num_input_tokens_seen": 160117260, "step": 2554 }, { "epoch": 8.499168053244592, "loss": 0.4603882133960724, "loss_ce": 0.00030522237648256123, "loss_iou": 0.1435546875, "loss_num": 0.03466796875, "loss_xval": 0.4609375, "num_input_tokens_seen": 160117260, "step": 2554 }, { "epoch": 8.502495840266223, "grad_norm": 15.297966003417969, "learning_rate": 5e-06, "loss": 0.4259, "num_input_tokens_seen": 160180096, "step": 2555 }, { "epoch": 8.502495840266223, "loss": 0.2536148726940155, "loss_ce": 1.378984688926721e-05, "loss_iou": 0.07958984375, "loss_num": 0.0189208984375, "loss_xval": 0.25390625, "num_input_tokens_seen": 160180096, "step": 2555 }, { "epoch": 8.505823627287853, "grad_norm": 9.780435562133789, "learning_rate": 5e-06, "loss": 0.4449, "num_input_tokens_seen": 160242432, "step": 2556 }, { "epoch": 8.505823627287853, "loss": 0.49249687790870667, "loss_ce": 4.1849107219604775e-06, "loss_iou": 0.2119140625, "loss_num": 0.013671875, "loss_xval": 0.4921875, "num_input_tokens_seen": 160242432, "step": 2556 }, { "epoch": 8.509151414309484, "grad_norm": 18.829652786254883, "learning_rate": 5e-06, "loss": 0.6334, "num_input_tokens_seen": 160305776, "step": 2557 }, { "epoch": 8.509151414309484, "loss": 0.5541389584541321, "loss_ce": 0.0006721552344970405, "loss_iou": 0.2333984375, "loss_num": 0.01708984375, "loss_xval": 0.5546875, "num_input_tokens_seen": 160305776, "step": 2557 }, { "epoch": 8.512479201331114, "grad_norm": 27.24390411376953, "learning_rate": 5e-06, "loss": 0.4896, "num_input_tokens_seen": 160368840, "step": 2558 }, { "epoch": 8.512479201331114, "loss": 0.48400986194610596, "loss_ce": 1.0848632427951088e-06, "loss_iou": 0.19140625, "loss_num": 0.020263671875, "loss_xval": 0.484375, "num_input_tokens_seen": 160368840, "step": 2558 }, { "epoch": 8.515806988352745, "grad_norm": 28.425018310546875, "learning_rate": 5e-06, "loss": 0.6756, "num_input_tokens_seen": 160432164, "step": 2559 }, { "epoch": 8.515806988352745, "loss": 0.6684607267379761, "loss_ce": 3.646892992037465e-06, "loss_iou": 0.259765625, "loss_num": 0.02978515625, "loss_xval": 0.66796875, "num_input_tokens_seen": 160432164, "step": 2559 }, { "epoch": 8.519134775374376, "grad_norm": 16.901491165161133, "learning_rate": 5e-06, "loss": 0.5266, "num_input_tokens_seen": 160495680, "step": 2560 }, { "epoch": 8.519134775374376, "loss": 0.5603169798851013, "loss_ce": 1.4270461178966798e-05, "loss_iou": 0.189453125, "loss_num": 0.0361328125, "loss_xval": 0.55859375, "num_input_tokens_seen": 160495680, "step": 2560 }, { "epoch": 8.522462562396006, "grad_norm": 11.458944320678711, "learning_rate": 5e-06, "loss": 0.5675, "num_input_tokens_seen": 160560280, "step": 2561 }, { "epoch": 8.522462562396006, "loss": 0.7348663806915283, "loss_ce": 3.1309864425566047e-06, "loss_iou": 0.294921875, "loss_num": 0.029296875, "loss_xval": 0.734375, "num_input_tokens_seen": 160560280, "step": 2561 }, { "epoch": 8.525790349417637, "grad_norm": 14.062773704528809, "learning_rate": 5e-06, "loss": 0.5522, "num_input_tokens_seen": 160623656, "step": 2562 }, { "epoch": 8.525790349417637, "loss": 0.4510602056980133, "loss_ce": 1.0410053619125392e-05, "loss_iou": 0.1865234375, "loss_num": 0.01556396484375, "loss_xval": 0.451171875, "num_input_tokens_seen": 160623656, "step": 2562 }, { "epoch": 8.529118136439267, "grad_norm": 13.908117294311523, "learning_rate": 5e-06, "loss": 0.348, "num_input_tokens_seen": 160685336, "step": 2563 }, { "epoch": 8.529118136439267, "loss": 0.296863853931427, "loss_ce": 4.987372813047841e-05, "loss_iou": 0.083984375, "loss_num": 0.0257568359375, "loss_xval": 0.296875, "num_input_tokens_seen": 160685336, "step": 2563 }, { "epoch": 8.532445923460898, "grad_norm": 12.855901718139648, "learning_rate": 5e-06, "loss": 0.5979, "num_input_tokens_seen": 160748828, "step": 2564 }, { "epoch": 8.532445923460898, "loss": 0.6564036011695862, "loss_ce": 0.001618452137336135, "loss_iou": 0.2216796875, "loss_num": 0.042236328125, "loss_xval": 0.65625, "num_input_tokens_seen": 160748828, "step": 2564 }, { "epoch": 8.535773710482529, "grad_norm": 9.006926536560059, "learning_rate": 5e-06, "loss": 0.5736, "num_input_tokens_seen": 160811376, "step": 2565 }, { "epoch": 8.535773710482529, "loss": 0.7576814293861389, "loss_ce": 0.0017610173672437668, "loss_iou": 0.2890625, "loss_num": 0.03515625, "loss_xval": 0.7578125, "num_input_tokens_seen": 160811376, "step": 2565 }, { "epoch": 8.53910149750416, "grad_norm": 10.868459701538086, "learning_rate": 5e-06, "loss": 0.5701, "num_input_tokens_seen": 160874652, "step": 2566 }, { "epoch": 8.53910149750416, "loss": 0.5102552771568298, "loss_ce": 1.3844024806530797e-06, "loss_iou": 0.1796875, "loss_num": 0.030029296875, "loss_xval": 0.51171875, "num_input_tokens_seen": 160874652, "step": 2566 }, { "epoch": 8.54242928452579, "grad_norm": 12.974596977233887, "learning_rate": 5e-06, "loss": 0.3803, "num_input_tokens_seen": 160937272, "step": 2567 }, { "epoch": 8.54242928452579, "loss": 0.429630309343338, "loss_ce": 3.835672941931989e-06, "loss_iou": 0.138671875, "loss_num": 0.0303955078125, "loss_xval": 0.4296875, "num_input_tokens_seen": 160937272, "step": 2567 }, { "epoch": 8.54575707154742, "grad_norm": 13.257129669189453, "learning_rate": 5e-06, "loss": 0.3902, "num_input_tokens_seen": 160999744, "step": 2568 }, { "epoch": 8.54575707154742, "loss": 0.36145779490470886, "loss_ce": 7.591223493363941e-06, "loss_iou": 0.1220703125, "loss_num": 0.0234375, "loss_xval": 0.361328125, "num_input_tokens_seen": 160999744, "step": 2568 }, { "epoch": 8.549084858569051, "grad_norm": 10.106705665588379, "learning_rate": 5e-06, "loss": 0.4466, "num_input_tokens_seen": 161062132, "step": 2569 }, { "epoch": 8.549084858569051, "loss": 0.20788881182670593, "loss_ce": 3.0809662803221727e-06, "loss_iou": 0.0634765625, "loss_num": 0.0162353515625, "loss_xval": 0.2080078125, "num_input_tokens_seen": 161062132, "step": 2569 }, { "epoch": 8.552412645590682, "grad_norm": 10.386427879333496, "learning_rate": 5e-06, "loss": 0.5971, "num_input_tokens_seen": 161124404, "step": 2570 }, { "epoch": 8.552412645590682, "loss": 0.6997551918029785, "loss_ce": 4.8191763198701665e-05, "loss_iou": 0.263671875, "loss_num": 0.034912109375, "loss_xval": 0.69921875, "num_input_tokens_seen": 161124404, "step": 2570 }, { "epoch": 8.555740432612312, "grad_norm": 11.598371505737305, "learning_rate": 5e-06, "loss": 0.5282, "num_input_tokens_seen": 161187244, "step": 2571 }, { "epoch": 8.555740432612312, "loss": 0.48213207721710205, "loss_ce": 0.002220242517068982, "loss_iou": 0.158203125, "loss_num": 0.032470703125, "loss_xval": 0.48046875, "num_input_tokens_seen": 161187244, "step": 2571 }, { "epoch": 8.559068219633943, "grad_norm": 19.122591018676758, "learning_rate": 5e-06, "loss": 0.7354, "num_input_tokens_seen": 161251392, "step": 2572 }, { "epoch": 8.559068219633943, "loss": 0.6357005834579468, "loss_ce": 0.00014147856563795358, "loss_iou": 0.2314453125, "loss_num": 0.034423828125, "loss_xval": 0.63671875, "num_input_tokens_seen": 161251392, "step": 2572 }, { "epoch": 8.562396006655574, "grad_norm": 10.077008247375488, "learning_rate": 5e-06, "loss": 0.3102, "num_input_tokens_seen": 161314316, "step": 2573 }, { "epoch": 8.562396006655574, "loss": 0.4179697632789612, "loss_ce": 9.882079439194058e-07, "loss_iou": 0.158203125, "loss_num": 0.020263671875, "loss_xval": 0.41796875, "num_input_tokens_seen": 161314316, "step": 2573 }, { "epoch": 8.565723793677204, "grad_norm": 6.904613971710205, "learning_rate": 5e-06, "loss": 0.5353, "num_input_tokens_seen": 161376264, "step": 2574 }, { "epoch": 8.565723793677204, "loss": 0.5064716339111328, "loss_ce": 1.8997932329511968e-06, "loss_iou": 0.185546875, "loss_num": 0.02685546875, "loss_xval": 0.5078125, "num_input_tokens_seen": 161376264, "step": 2574 }, { "epoch": 8.569051580698835, "grad_norm": 28.415937423706055, "learning_rate": 5e-06, "loss": 0.7243, "num_input_tokens_seen": 161440088, "step": 2575 }, { "epoch": 8.569051580698835, "loss": 0.6772851347923279, "loss_ce": 8.502975106239319e-06, "loss_iou": 0.251953125, "loss_num": 0.03466796875, "loss_xval": 0.67578125, "num_input_tokens_seen": 161440088, "step": 2575 }, { "epoch": 8.572379367720465, "grad_norm": 12.901464462280273, "learning_rate": 5e-06, "loss": 0.4387, "num_input_tokens_seen": 161503220, "step": 2576 }, { "epoch": 8.572379367720465, "loss": 0.3778802454471588, "loss_ce": 0.0011102157877758145, "loss_iou": 0.1298828125, "loss_num": 0.023193359375, "loss_xval": 0.376953125, "num_input_tokens_seen": 161503220, "step": 2576 }, { "epoch": 8.575707154742096, "grad_norm": 12.385323524475098, "learning_rate": 5e-06, "loss": 0.481, "num_input_tokens_seen": 161565160, "step": 2577 }, { "epoch": 8.575707154742096, "loss": 0.6777368783950806, "loss_ce": 2.497131845302647e-06, "loss_iou": 0.2451171875, "loss_num": 0.03759765625, "loss_xval": 0.6796875, "num_input_tokens_seen": 161565160, "step": 2577 }, { "epoch": 8.579034941763727, "grad_norm": 6.737237453460693, "learning_rate": 5e-06, "loss": 0.4389, "num_input_tokens_seen": 161627656, "step": 2578 }, { "epoch": 8.579034941763727, "loss": 0.42138952016830444, "loss_ce": 2.770086211967282e-06, "loss_iou": 0.1728515625, "loss_num": 0.01513671875, "loss_xval": 0.421875, "num_input_tokens_seen": 161627656, "step": 2578 }, { "epoch": 8.582362728785357, "grad_norm": 11.025839805603027, "learning_rate": 5e-06, "loss": 0.5066, "num_input_tokens_seen": 161690684, "step": 2579 }, { "epoch": 8.582362728785357, "loss": 0.45585301518440247, "loss_ce": 0.0007748928037472069, "loss_iou": 0.173828125, "loss_num": 0.0213623046875, "loss_xval": 0.455078125, "num_input_tokens_seen": 161690684, "step": 2579 }, { "epoch": 8.585690515806988, "grad_norm": 25.683406829833984, "learning_rate": 5e-06, "loss": 0.7002, "num_input_tokens_seen": 161752612, "step": 2580 }, { "epoch": 8.585690515806988, "loss": 0.5723000764846802, "loss_ce": 3.4451306419214234e-05, "loss_iou": 0.2216796875, "loss_num": 0.02587890625, "loss_xval": 0.5703125, "num_input_tokens_seen": 161752612, "step": 2580 }, { "epoch": 8.589018302828618, "grad_norm": 59.10491180419922, "learning_rate": 5e-06, "loss": 0.7706, "num_input_tokens_seen": 161817220, "step": 2581 }, { "epoch": 8.589018302828618, "loss": 0.8546428680419922, "loss_ce": 0.0011272061383351684, "loss_iou": 0.32421875, "loss_num": 0.040771484375, "loss_xval": 0.8515625, "num_input_tokens_seen": 161817220, "step": 2581 }, { "epoch": 8.592346089850249, "grad_norm": 23.72857666015625, "learning_rate": 5e-06, "loss": 0.5565, "num_input_tokens_seen": 161881092, "step": 2582 }, { "epoch": 8.592346089850249, "loss": 0.46124857664108276, "loss_ce": 0.00018901658768299967, "loss_iou": 0.169921875, "loss_num": 0.0242919921875, "loss_xval": 0.4609375, "num_input_tokens_seen": 161881092, "step": 2582 }, { "epoch": 8.59567387687188, "grad_norm": 15.71208381652832, "learning_rate": 5e-06, "loss": 0.5233, "num_input_tokens_seen": 161944048, "step": 2583 }, { "epoch": 8.59567387687188, "loss": 0.6312089562416077, "loss_ce": 0.000105427170637995, "loss_iou": 0.19140625, "loss_num": 0.0498046875, "loss_xval": 0.6328125, "num_input_tokens_seen": 161944048, "step": 2583 }, { "epoch": 8.59900166389351, "grad_norm": 11.194658279418945, "learning_rate": 5e-06, "loss": 0.4896, "num_input_tokens_seen": 162005480, "step": 2584 }, { "epoch": 8.59900166389351, "loss": 0.33762127161026, "loss_ce": 5.328947281668661e-06, "loss_iou": 0.10302734375, "loss_num": 0.0263671875, "loss_xval": 0.337890625, "num_input_tokens_seen": 162005480, "step": 2584 }, { "epoch": 8.602329450915141, "grad_norm": 11.085467338562012, "learning_rate": 5e-06, "loss": 0.5454, "num_input_tokens_seen": 162067456, "step": 2585 }, { "epoch": 8.602329450915141, "loss": 0.7172982692718506, "loss_ce": 4.366732173366472e-05, "loss_iou": 0.2490234375, "loss_num": 0.043701171875, "loss_xval": 0.71875, "num_input_tokens_seen": 162067456, "step": 2585 }, { "epoch": 8.605657237936772, "grad_norm": 10.360188484191895, "learning_rate": 5e-06, "loss": 0.4719, "num_input_tokens_seen": 162129484, "step": 2586 }, { "epoch": 8.605657237936772, "loss": 0.41778719425201416, "loss_ce": 1.5880120827205246e-06, "loss_iou": 0.173828125, "loss_num": 0.013916015625, "loss_xval": 0.41796875, "num_input_tokens_seen": 162129484, "step": 2586 }, { "epoch": 8.608985024958402, "grad_norm": 8.305403709411621, "learning_rate": 5e-06, "loss": 0.3696, "num_input_tokens_seen": 162191776, "step": 2587 }, { "epoch": 8.608985024958402, "loss": 0.336800217628479, "loss_ce": 3.876361370203085e-05, "loss_iou": 0.10595703125, "loss_num": 0.02490234375, "loss_xval": 0.3359375, "num_input_tokens_seen": 162191776, "step": 2587 }, { "epoch": 8.612312811980033, "grad_norm": 11.546163558959961, "learning_rate": 5e-06, "loss": 0.471, "num_input_tokens_seen": 162254096, "step": 2588 }, { "epoch": 8.612312811980033, "loss": 0.5014094710350037, "loss_ce": 5.674646217812551e-06, "loss_iou": 0.1796875, "loss_num": 0.0284423828125, "loss_xval": 0.5, "num_input_tokens_seen": 162254096, "step": 2588 }, { "epoch": 8.615640599001663, "grad_norm": 5.613809585571289, "learning_rate": 5e-06, "loss": 0.449, "num_input_tokens_seen": 162314564, "step": 2589 }, { "epoch": 8.615640599001663, "loss": 0.30285775661468506, "loss_ce": 1.2939040061610285e-06, "loss_iou": 0.10302734375, "loss_num": 0.019287109375, "loss_xval": 0.302734375, "num_input_tokens_seen": 162314564, "step": 2589 }, { "epoch": 8.618968386023294, "grad_norm": 13.15559196472168, "learning_rate": 5e-06, "loss": 0.7566, "num_input_tokens_seen": 162377268, "step": 2590 }, { "epoch": 8.618968386023294, "loss": 0.7530189752578735, "loss_ce": 8.929365139920264e-05, "loss_iou": 0.291015625, "loss_num": 0.0341796875, "loss_xval": 0.75390625, "num_input_tokens_seen": 162377268, "step": 2590 }, { "epoch": 8.622296173044925, "grad_norm": 11.493435859680176, "learning_rate": 5e-06, "loss": 0.5152, "num_input_tokens_seen": 162439620, "step": 2591 }, { "epoch": 8.622296173044925, "loss": 0.3789670169353485, "loss_ce": 0.0007931980071589351, "loss_iou": 0.11669921875, "loss_num": 0.02880859375, "loss_xval": 0.37890625, "num_input_tokens_seen": 162439620, "step": 2591 }, { "epoch": 8.625623960066555, "grad_norm": 10.092290878295898, "learning_rate": 5e-06, "loss": 0.5022, "num_input_tokens_seen": 162504028, "step": 2592 }, { "epoch": 8.625623960066555, "loss": 0.28314438462257385, "loss_ce": 2.2904284833202837e-06, "loss_iou": 0.12451171875, "loss_num": 0.006805419921875, "loss_xval": 0.283203125, "num_input_tokens_seen": 162504028, "step": 2592 }, { "epoch": 8.628951747088186, "grad_norm": 24.66574478149414, "learning_rate": 5e-06, "loss": 0.5965, "num_input_tokens_seen": 162566764, "step": 2593 }, { "epoch": 8.628951747088186, "loss": 0.6608072519302368, "loss_ce": 0.00016271659114863724, "loss_iou": 0.21484375, "loss_num": 0.046142578125, "loss_xval": 0.66015625, "num_input_tokens_seen": 162566764, "step": 2593 }, { "epoch": 8.632279534109816, "grad_norm": 11.436604499816895, "learning_rate": 5e-06, "loss": 0.668, "num_input_tokens_seen": 162630800, "step": 2594 }, { "epoch": 8.632279534109816, "loss": 0.5808868408203125, "loss_ce": 0.00013734347885474563, "loss_iou": 0.224609375, "loss_num": 0.0263671875, "loss_xval": 0.58203125, "num_input_tokens_seen": 162630800, "step": 2594 }, { "epoch": 8.635607321131447, "grad_norm": 18.783933639526367, "learning_rate": 5e-06, "loss": 0.5909, "num_input_tokens_seen": 162694880, "step": 2595 }, { "epoch": 8.635607321131447, "loss": 0.6555695533752441, "loss_ce": 0.000296160695143044, "loss_iou": 0.2578125, "loss_num": 0.0277099609375, "loss_xval": 0.65625, "num_input_tokens_seen": 162694880, "step": 2595 }, { "epoch": 8.638935108153078, "grad_norm": 38.576412200927734, "learning_rate": 5e-06, "loss": 0.6731, "num_input_tokens_seen": 162757312, "step": 2596 }, { "epoch": 8.638935108153078, "loss": 0.6694474220275879, "loss_ce": 1.386600524710957e-05, "loss_iou": 0.2255859375, "loss_num": 0.043701171875, "loss_xval": 0.66796875, "num_input_tokens_seen": 162757312, "step": 2596 }, { "epoch": 8.642262895174708, "grad_norm": 19.749004364013672, "learning_rate": 5e-06, "loss": 0.6392, "num_input_tokens_seen": 162820224, "step": 2597 }, { "epoch": 8.642262895174708, "loss": 0.4840720295906067, "loss_ce": 2.2034266748960363e-06, "loss_iou": 0.14453125, "loss_num": 0.0390625, "loss_xval": 0.484375, "num_input_tokens_seen": 162820224, "step": 2597 }, { "epoch": 8.645590682196339, "grad_norm": 48.174163818359375, "learning_rate": 5e-06, "loss": 0.6002, "num_input_tokens_seen": 162882940, "step": 2598 }, { "epoch": 8.645590682196339, "loss": 0.6938499212265015, "loss_ce": 2.2609694951825077e-06, "loss_iou": 0.2451171875, "loss_num": 0.040771484375, "loss_xval": 0.6953125, "num_input_tokens_seen": 162882940, "step": 2598 }, { "epoch": 8.64891846921797, "grad_norm": 17.961458206176758, "learning_rate": 5e-06, "loss": 0.5589, "num_input_tokens_seen": 162944860, "step": 2599 }, { "epoch": 8.64891846921797, "loss": 0.27713140845298767, "loss_ce": 1.2781802070094272e-06, "loss_iou": 0.0986328125, "loss_num": 0.01611328125, "loss_xval": 0.27734375, "num_input_tokens_seen": 162944860, "step": 2599 }, { "epoch": 8.6522462562396, "grad_norm": 17.12492561340332, "learning_rate": 5e-06, "loss": 0.4984, "num_input_tokens_seen": 163006680, "step": 2600 }, { "epoch": 8.6522462562396, "loss": 0.4886104464530945, "loss_ce": 2.4024253434618004e-05, "loss_iou": 0.1630859375, "loss_num": 0.03271484375, "loss_xval": 0.48828125, "num_input_tokens_seen": 163006680, "step": 2600 }, { "epoch": 8.65557404326123, "grad_norm": 6.443399429321289, "learning_rate": 5e-06, "loss": 0.5364, "num_input_tokens_seen": 163069448, "step": 2601 }, { "epoch": 8.65557404326123, "loss": 0.4713142514228821, "loss_ce": 7.797708576617879e-07, "loss_iou": 0.12255859375, "loss_num": 0.04541015625, "loss_xval": 0.470703125, "num_input_tokens_seen": 163069448, "step": 2601 }, { "epoch": 8.658901830282861, "grad_norm": 19.23252296447754, "learning_rate": 5e-06, "loss": 0.6609, "num_input_tokens_seen": 163132548, "step": 2602 }, { "epoch": 8.658901830282861, "loss": 0.47451192140579224, "loss_ce": 8.566172618884593e-05, "loss_iou": 0.17578125, "loss_num": 0.0244140625, "loss_xval": 0.474609375, "num_input_tokens_seen": 163132548, "step": 2602 }, { "epoch": 8.662229617304492, "grad_norm": 19.472877502441406, "learning_rate": 5e-06, "loss": 0.4106, "num_input_tokens_seen": 163194472, "step": 2603 }, { "epoch": 8.662229617304492, "loss": 0.4780290722846985, "loss_ce": 1.7346720824207296e-06, "loss_iou": 0.173828125, "loss_num": 0.0260009765625, "loss_xval": 0.478515625, "num_input_tokens_seen": 163194472, "step": 2603 }, { "epoch": 8.665557404326123, "grad_norm": 13.35362720489502, "learning_rate": 5e-06, "loss": 0.6408, "num_input_tokens_seen": 163258172, "step": 2604 }, { "epoch": 8.665557404326123, "loss": 0.7598928213119507, "loss_ce": 5.092465471534524e-06, "loss_iou": 0.29296875, "loss_num": 0.03515625, "loss_xval": 0.76171875, "num_input_tokens_seen": 163258172, "step": 2604 }, { "epoch": 8.668885191347753, "grad_norm": 21.191377639770508, "learning_rate": 5e-06, "loss": 0.6911, "num_input_tokens_seen": 163320420, "step": 2605 }, { "epoch": 8.668885191347753, "loss": 0.5891138315200806, "loss_ce": 2.487584652044461e-06, "loss_iou": 0.1865234375, "loss_num": 0.043212890625, "loss_xval": 0.58984375, "num_input_tokens_seen": 163320420, "step": 2605 }, { "epoch": 8.672212978369384, "grad_norm": 27.505525588989258, "learning_rate": 5e-06, "loss": 0.6181, "num_input_tokens_seen": 163384120, "step": 2606 }, { "epoch": 8.672212978369384, "loss": 0.7866055369377136, "loss_ce": 0.00035065022530034184, "loss_iou": 0.2890625, "loss_num": 0.04150390625, "loss_xval": 0.78515625, "num_input_tokens_seen": 163384120, "step": 2606 }, { "epoch": 8.675540765391014, "grad_norm": 28.213647842407227, "learning_rate": 5e-06, "loss": 0.5859, "num_input_tokens_seen": 163446192, "step": 2607 }, { "epoch": 8.675540765391014, "loss": 0.5129499435424805, "loss_ce": 1.048034209816251e-05, "loss_iou": 0.1982421875, "loss_num": 0.0233154296875, "loss_xval": 0.51171875, "num_input_tokens_seen": 163446192, "step": 2607 }, { "epoch": 8.678868552412645, "grad_norm": 18.515117645263672, "learning_rate": 5e-06, "loss": 0.6942, "num_input_tokens_seen": 163509128, "step": 2608 }, { "epoch": 8.678868552412645, "loss": 0.8513048887252808, "loss_ce": 1.7387551451975014e-06, "loss_iou": 0.341796875, "loss_num": 0.03369140625, "loss_xval": 0.8515625, "num_input_tokens_seen": 163509128, "step": 2608 }, { "epoch": 8.682196339434276, "grad_norm": 8.365104675292969, "learning_rate": 5e-06, "loss": 0.5006, "num_input_tokens_seen": 163570800, "step": 2609 }, { "epoch": 8.682196339434276, "loss": 0.4860261082649231, "loss_ce": 3.1493764254264534e-06, "loss_iou": 0.1826171875, "loss_num": 0.0240478515625, "loss_xval": 0.486328125, "num_input_tokens_seen": 163570800, "step": 2609 }, { "epoch": 8.685524126455906, "grad_norm": 14.244555473327637, "learning_rate": 5e-06, "loss": 0.6116, "num_input_tokens_seen": 163633080, "step": 2610 }, { "epoch": 8.685524126455906, "loss": 0.4235767126083374, "loss_ce": 0.00011478186934255064, "loss_iou": 0.1357421875, "loss_num": 0.030517578125, "loss_xval": 0.423828125, "num_input_tokens_seen": 163633080, "step": 2610 }, { "epoch": 8.688851913477537, "grad_norm": 26.318683624267578, "learning_rate": 5e-06, "loss": 0.6206, "num_input_tokens_seen": 163695772, "step": 2611 }, { "epoch": 8.688851913477537, "loss": 0.4914668798446655, "loss_ce": 1.179387436422985e-05, "loss_iou": 0.1689453125, "loss_num": 0.0308837890625, "loss_xval": 0.4921875, "num_input_tokens_seen": 163695772, "step": 2611 }, { "epoch": 8.692179700499167, "grad_norm": 25.076519012451172, "learning_rate": 5e-06, "loss": 0.6026, "num_input_tokens_seen": 163758672, "step": 2612 }, { "epoch": 8.692179700499167, "loss": 0.5386121869087219, "loss_ce": 3.794700023718178e-05, "loss_iou": 0.193359375, "loss_num": 0.030517578125, "loss_xval": 0.5390625, "num_input_tokens_seen": 163758672, "step": 2612 }, { "epoch": 8.695507487520798, "grad_norm": 65.98371124267578, "learning_rate": 5e-06, "loss": 0.6414, "num_input_tokens_seen": 163821680, "step": 2613 }, { "epoch": 8.695507487520798, "loss": 0.41736340522766113, "loss_ce": 5.0008220568997785e-06, "loss_iou": 0.1806640625, "loss_num": 0.0113525390625, "loss_xval": 0.41796875, "num_input_tokens_seen": 163821680, "step": 2613 }, { "epoch": 8.698835274542429, "grad_norm": 40.74962615966797, "learning_rate": 5e-06, "loss": 0.5967, "num_input_tokens_seen": 163884432, "step": 2614 }, { "epoch": 8.698835274542429, "loss": 0.6979994773864746, "loss_ce": 1.455628535040887e-06, "loss_iou": 0.25, "loss_num": 0.03955078125, "loss_xval": 0.69921875, "num_input_tokens_seen": 163884432, "step": 2614 }, { "epoch": 8.70216306156406, "grad_norm": 32.56483840942383, "learning_rate": 5e-06, "loss": 0.4028, "num_input_tokens_seen": 163947288, "step": 2615 }, { "epoch": 8.70216306156406, "loss": 0.43441683053970337, "loss_ce": 9.063594916369766e-05, "loss_iou": 0.1533203125, "loss_num": 0.0255126953125, "loss_xval": 0.43359375, "num_input_tokens_seen": 163947288, "step": 2615 }, { "epoch": 8.70549084858569, "grad_norm": 25.95416831970215, "learning_rate": 5e-06, "loss": 0.4828, "num_input_tokens_seen": 164009992, "step": 2616 }, { "epoch": 8.70549084858569, "loss": 0.3466024398803711, "loss_ce": 0.0006246463744901121, "loss_iou": 0.080078125, "loss_num": 0.037109375, "loss_xval": 0.345703125, "num_input_tokens_seen": 164009992, "step": 2616 }, { "epoch": 8.70881863560732, "grad_norm": 19.83292579650879, "learning_rate": 5e-06, "loss": 0.5835, "num_input_tokens_seen": 164072796, "step": 2617 }, { "epoch": 8.70881863560732, "loss": 0.43209221959114075, "loss_ce": 2.4360571842407808e-05, "loss_iou": 0.12890625, "loss_num": 0.034912109375, "loss_xval": 0.431640625, "num_input_tokens_seen": 164072796, "step": 2617 }, { "epoch": 8.712146422628951, "grad_norm": 23.249391555786133, "learning_rate": 5e-06, "loss": 0.4463, "num_input_tokens_seen": 164135764, "step": 2618 }, { "epoch": 8.712146422628951, "loss": 0.3320271372795105, "loss_ce": 0.00017898494843393564, "loss_iou": 0.1357421875, "loss_num": 0.01214599609375, "loss_xval": 0.33203125, "num_input_tokens_seen": 164135764, "step": 2618 }, { "epoch": 8.715474209650582, "grad_norm": 8.63182544708252, "learning_rate": 5e-06, "loss": 0.5264, "num_input_tokens_seen": 164198644, "step": 2619 }, { "epoch": 8.715474209650582, "loss": 0.4586324989795685, "loss_ce": 1.4335352716443595e-05, "loss_iou": 0.1552734375, "loss_num": 0.02978515625, "loss_xval": 0.458984375, "num_input_tokens_seen": 164198644, "step": 2619 }, { "epoch": 8.718801996672212, "grad_norm": 11.102532386779785, "learning_rate": 5e-06, "loss": 0.6117, "num_input_tokens_seen": 164261460, "step": 2620 }, { "epoch": 8.718801996672212, "loss": 0.3844517171382904, "loss_ce": 5.229059388511814e-05, "loss_iou": 0.1298828125, "loss_num": 0.024658203125, "loss_xval": 0.384765625, "num_input_tokens_seen": 164261460, "step": 2620 }, { "epoch": 8.722129783693843, "grad_norm": 9.561666488647461, "learning_rate": 5e-06, "loss": 0.4767, "num_input_tokens_seen": 164323656, "step": 2621 }, { "epoch": 8.722129783693843, "loss": 0.49207940697669983, "loss_ce": 1.3976500667922664e-05, "loss_iou": 0.2119140625, "loss_num": 0.01373291015625, "loss_xval": 0.4921875, "num_input_tokens_seen": 164323656, "step": 2621 }, { "epoch": 8.725457570715474, "grad_norm": 7.227001667022705, "learning_rate": 5e-06, "loss": 0.4765, "num_input_tokens_seen": 164385784, "step": 2622 }, { "epoch": 8.725457570715474, "loss": 0.5397161245346069, "loss_ce": 0.0004095190088264644, "loss_iou": 0.2158203125, "loss_num": 0.021484375, "loss_xval": 0.5390625, "num_input_tokens_seen": 164385784, "step": 2622 }, { "epoch": 8.728785357737104, "grad_norm": 16.507421493530273, "learning_rate": 5e-06, "loss": 0.4704, "num_input_tokens_seen": 164448792, "step": 2623 }, { "epoch": 8.728785357737104, "loss": 0.5310739278793335, "loss_ce": 7.072923835949041e-06, "loss_iou": 0.205078125, "loss_num": 0.02392578125, "loss_xval": 0.53125, "num_input_tokens_seen": 164448792, "step": 2623 }, { "epoch": 8.732113144758735, "grad_norm": 9.803139686584473, "learning_rate": 5e-06, "loss": 0.4463, "num_input_tokens_seen": 164511872, "step": 2624 }, { "epoch": 8.732113144758735, "loss": 0.47733116149902344, "loss_ce": 0.0005245241918601096, "loss_iou": 0.1640625, "loss_num": 0.02978515625, "loss_xval": 0.4765625, "num_input_tokens_seen": 164511872, "step": 2624 }, { "epoch": 8.735440931780365, "grad_norm": 17.92215347290039, "learning_rate": 5e-06, "loss": 0.4268, "num_input_tokens_seen": 164575384, "step": 2625 }, { "epoch": 8.735440931780365, "loss": 0.5671224594116211, "loss_ce": 0.00010586697317194194, "loss_iou": 0.2314453125, "loss_num": 0.0208740234375, "loss_xval": 0.56640625, "num_input_tokens_seen": 164575384, "step": 2625 }, { "epoch": 8.738768718801996, "grad_norm": 29.20288848876953, "learning_rate": 5e-06, "loss": 0.5889, "num_input_tokens_seen": 164638456, "step": 2626 }, { "epoch": 8.738768718801996, "loss": 0.475547194480896, "loss_ce": 0.00020538826356641948, "loss_iou": 0.1484375, "loss_num": 0.035400390625, "loss_xval": 0.474609375, "num_input_tokens_seen": 164638456, "step": 2626 }, { "epoch": 8.742096505823627, "grad_norm": 17.16310691833496, "learning_rate": 5e-06, "loss": 0.5969, "num_input_tokens_seen": 164702336, "step": 2627 }, { "epoch": 8.742096505823627, "loss": 0.5690972805023193, "loss_ce": 5.500802672031568e-06, "loss_iou": 0.2275390625, "loss_num": 0.0225830078125, "loss_xval": 0.5703125, "num_input_tokens_seen": 164702336, "step": 2627 }, { "epoch": 8.745424292845257, "grad_norm": 11.235305786132812, "learning_rate": 5e-06, "loss": 0.5006, "num_input_tokens_seen": 164765452, "step": 2628 }, { "epoch": 8.745424292845257, "loss": 0.5440701842308044, "loss_ce": 2.790181042655604e-06, "loss_iou": 0.220703125, "loss_num": 0.020751953125, "loss_xval": 0.54296875, "num_input_tokens_seen": 164765452, "step": 2628 }, { "epoch": 8.748752079866888, "grad_norm": 42.0651969909668, "learning_rate": 5e-06, "loss": 0.4788, "num_input_tokens_seen": 164827480, "step": 2629 }, { "epoch": 8.748752079866888, "loss": 0.527103066444397, "loss_ce": 0.00018658065528143197, "loss_iou": 0.1826171875, "loss_num": 0.0322265625, "loss_xval": 0.52734375, "num_input_tokens_seen": 164827480, "step": 2629 }, { "epoch": 8.752079866888518, "grad_norm": 31.08562660217285, "learning_rate": 5e-06, "loss": 0.5327, "num_input_tokens_seen": 164888656, "step": 2630 }, { "epoch": 8.752079866888518, "loss": 0.5981454849243164, "loss_ce": 9.463648211749387e-07, "loss_iou": 0.2041015625, "loss_num": 0.037841796875, "loss_xval": 0.59765625, "num_input_tokens_seen": 164888656, "step": 2630 }, { "epoch": 8.755407653910149, "grad_norm": 17.113222122192383, "learning_rate": 5e-06, "loss": 0.4415, "num_input_tokens_seen": 164952028, "step": 2631 }, { "epoch": 8.755407653910149, "loss": 0.3336372375488281, "loss_ce": 1.909946877276525e-05, "loss_iou": 0.125, "loss_num": 0.0164794921875, "loss_xval": 0.333984375, "num_input_tokens_seen": 164952028, "step": 2631 }, { "epoch": 8.75873544093178, "grad_norm": 18.15458869934082, "learning_rate": 5e-06, "loss": 0.7117, "num_input_tokens_seen": 165015824, "step": 2632 }, { "epoch": 8.75873544093178, "loss": 0.5313310027122498, "loss_ce": 0.00014203149476088583, "loss_iou": 0.189453125, "loss_num": 0.0303955078125, "loss_xval": 0.53125, "num_input_tokens_seen": 165015824, "step": 2632 }, { "epoch": 8.76206322795341, "grad_norm": 7.1659111976623535, "learning_rate": 5e-06, "loss": 0.7076, "num_input_tokens_seen": 165077932, "step": 2633 }, { "epoch": 8.76206322795341, "loss": 0.7382878065109253, "loss_ce": 6.544657480844762e-06, "loss_iou": 0.25390625, "loss_num": 0.04638671875, "loss_xval": 0.73828125, "num_input_tokens_seen": 165077932, "step": 2633 }, { "epoch": 8.765391014975041, "grad_norm": 7.968719005584717, "learning_rate": 5e-06, "loss": 0.4108, "num_input_tokens_seen": 165139380, "step": 2634 }, { "epoch": 8.765391014975041, "loss": 0.3980604410171509, "loss_ce": 0.0010877473978325725, "loss_iou": 0.13671875, "loss_num": 0.02490234375, "loss_xval": 0.396484375, "num_input_tokens_seen": 165139380, "step": 2634 }, { "epoch": 8.768718801996672, "grad_norm": 12.53625202178955, "learning_rate": 5e-06, "loss": 0.4035, "num_input_tokens_seen": 165202888, "step": 2635 }, { "epoch": 8.768718801996672, "loss": 0.22742587327957153, "loss_ce": 8.883545888238586e-06, "loss_iou": 0.0771484375, "loss_num": 0.01458740234375, "loss_xval": 0.2275390625, "num_input_tokens_seen": 165202888, "step": 2635 }, { "epoch": 8.772046589018302, "grad_norm": 14.167214393615723, "learning_rate": 5e-06, "loss": 0.708, "num_input_tokens_seen": 165266020, "step": 2636 }, { "epoch": 8.772046589018302, "loss": 0.6256218552589417, "loss_ce": 0.00046926370123401284, "loss_iou": 0.2138671875, "loss_num": 0.03955078125, "loss_xval": 0.625, "num_input_tokens_seen": 165266020, "step": 2636 }, { "epoch": 8.775374376039933, "grad_norm": 22.661975860595703, "learning_rate": 5e-06, "loss": 0.6752, "num_input_tokens_seen": 165329272, "step": 2637 }, { "epoch": 8.775374376039933, "loss": 0.9430770874023438, "loss_ce": 0.0003281228127889335, "loss_iou": 0.30859375, "loss_num": 0.0654296875, "loss_xval": 0.94140625, "num_input_tokens_seen": 165329272, "step": 2637 }, { "epoch": 8.778702163061563, "grad_norm": 35.86247253417969, "learning_rate": 5e-06, "loss": 0.7095, "num_input_tokens_seen": 165392420, "step": 2638 }, { "epoch": 8.778702163061563, "loss": 0.549805760383606, "loss_ce": 1.0293617833667668e-06, "loss_iou": 0.216796875, "loss_num": 0.023193359375, "loss_xval": 0.55078125, "num_input_tokens_seen": 165392420, "step": 2638 }, { "epoch": 8.782029950083194, "grad_norm": 31.708436965942383, "learning_rate": 5e-06, "loss": 0.6365, "num_input_tokens_seen": 165455036, "step": 2639 }, { "epoch": 8.782029950083194, "loss": 0.7686181664466858, "loss_ce": 6.353753269650042e-05, "loss_iou": 0.30078125, "loss_num": 0.03369140625, "loss_xval": 0.76953125, "num_input_tokens_seen": 165455036, "step": 2639 }, { "epoch": 8.785357737104825, "grad_norm": 17.050518035888672, "learning_rate": 5e-06, "loss": 0.4273, "num_input_tokens_seen": 165518012, "step": 2640 }, { "epoch": 8.785357737104825, "loss": 0.3162250220775604, "loss_ce": 0.0007343136239796877, "loss_iou": 0.1142578125, "loss_num": 0.017333984375, "loss_xval": 0.31640625, "num_input_tokens_seen": 165518012, "step": 2640 }, { "epoch": 8.788685524126455, "grad_norm": 12.636157989501953, "learning_rate": 5e-06, "loss": 0.5568, "num_input_tokens_seen": 165580220, "step": 2641 }, { "epoch": 8.788685524126455, "loss": 0.47808837890625, "loss_ce": 0.001403809990733862, "loss_iou": 0.1796875, "loss_num": 0.023681640625, "loss_xval": 0.4765625, "num_input_tokens_seen": 165580220, "step": 2641 }, { "epoch": 8.792013311148086, "grad_norm": 11.59026050567627, "learning_rate": 5e-06, "loss": 0.6309, "num_input_tokens_seen": 165643800, "step": 2642 }, { "epoch": 8.792013311148086, "loss": 0.46340662240982056, "loss_ce": 2.7736045012716204e-05, "loss_iou": 0.1591796875, "loss_num": 0.02880859375, "loss_xval": 0.462890625, "num_input_tokens_seen": 165643800, "step": 2642 }, { "epoch": 8.795341098169716, "grad_norm": 10.767539024353027, "learning_rate": 5e-06, "loss": 0.4572, "num_input_tokens_seen": 165707012, "step": 2643 }, { "epoch": 8.795341098169716, "loss": 0.3974631428718567, "loss_ce": 2.2147823983686976e-06, "loss_iou": 0.1640625, "loss_num": 0.013671875, "loss_xval": 0.3984375, "num_input_tokens_seen": 165707012, "step": 2643 }, { "epoch": 8.798668885191347, "grad_norm": 9.26812744140625, "learning_rate": 5e-06, "loss": 0.3935, "num_input_tokens_seen": 165769208, "step": 2644 }, { "epoch": 8.798668885191347, "loss": 0.39911314845085144, "loss_ce": 4.279268523532664e-06, "loss_iou": 0.162109375, "loss_num": 0.0147705078125, "loss_xval": 0.3984375, "num_input_tokens_seen": 165769208, "step": 2644 }, { "epoch": 8.801996672212978, "grad_norm": 9.654847145080566, "learning_rate": 5e-06, "loss": 0.3698, "num_input_tokens_seen": 165830508, "step": 2645 }, { "epoch": 8.801996672212978, "loss": 0.31747594475746155, "loss_ce": 1.5821939314264455e-06, "loss_iou": 0.1142578125, "loss_num": 0.0177001953125, "loss_xval": 0.318359375, "num_input_tokens_seen": 165830508, "step": 2645 }, { "epoch": 8.805324459234608, "grad_norm": 14.914422035217285, "learning_rate": 5e-06, "loss": 0.3932, "num_input_tokens_seen": 165892712, "step": 2646 }, { "epoch": 8.805324459234608, "loss": 0.27504193782806396, "loss_ce": 4.804755008080974e-05, "loss_iou": 0.09228515625, "loss_num": 0.01806640625, "loss_xval": 0.275390625, "num_input_tokens_seen": 165892712, "step": 2646 }, { "epoch": 8.808652246256239, "grad_norm": 20.909866333007812, "learning_rate": 5e-06, "loss": 0.6091, "num_input_tokens_seen": 165956660, "step": 2647 }, { "epoch": 8.808652246256239, "loss": 0.7597754001617432, "loss_ce": 9.747442163643427e-06, "loss_iou": 0.30078125, "loss_num": 0.03125, "loss_xval": 0.7578125, "num_input_tokens_seen": 165956660, "step": 2647 }, { "epoch": 8.81198003327787, "grad_norm": 14.562281608581543, "learning_rate": 5e-06, "loss": 0.4858, "num_input_tokens_seen": 166019580, "step": 2648 }, { "epoch": 8.81198003327787, "loss": 0.43079134821891785, "loss_ce": 5.223499101703055e-06, "loss_iou": 0.1357421875, "loss_num": 0.031982421875, "loss_xval": 0.431640625, "num_input_tokens_seen": 166019580, "step": 2648 }, { "epoch": 8.8153078202995, "grad_norm": 9.276741981506348, "learning_rate": 5e-06, "loss": 0.4561, "num_input_tokens_seen": 166080820, "step": 2649 }, { "epoch": 8.8153078202995, "loss": 0.43725699186325073, "loss_ce": 1.135577235800156e-06, "loss_iou": 0.1806640625, "loss_num": 0.0152587890625, "loss_xval": 0.4375, "num_input_tokens_seen": 166080820, "step": 2649 }, { "epoch": 8.81863560732113, "grad_norm": 6.810776710510254, "learning_rate": 5e-06, "loss": 0.5901, "num_input_tokens_seen": 166140788, "step": 2650 }, { "epoch": 8.81863560732113, "loss": 0.4802318215370178, "loss_ce": 7.2112934503820725e-06, "loss_iou": 0.1162109375, "loss_num": 0.0498046875, "loss_xval": 0.48046875, "num_input_tokens_seen": 166140788, "step": 2650 }, { "epoch": 8.821963394342761, "grad_norm": 8.609230995178223, "learning_rate": 5e-06, "loss": 0.5667, "num_input_tokens_seen": 166203304, "step": 2651 }, { "epoch": 8.821963394342761, "loss": 0.5621353387832642, "loss_ce": 1.5513187463511713e-06, "loss_iou": 0.2236328125, "loss_num": 0.0230712890625, "loss_xval": 0.5625, "num_input_tokens_seen": 166203304, "step": 2651 }, { "epoch": 8.825291181364392, "grad_norm": 19.36880874633789, "learning_rate": 5e-06, "loss": 0.5814, "num_input_tokens_seen": 166265360, "step": 2652 }, { "epoch": 8.825291181364392, "loss": 0.3197976052761078, "loss_ce": 9.545496141072363e-05, "loss_iou": 0.09814453125, "loss_num": 0.024658203125, "loss_xval": 0.3203125, "num_input_tokens_seen": 166265360, "step": 2652 }, { "epoch": 8.828618968386023, "grad_norm": 17.841169357299805, "learning_rate": 5e-06, "loss": 0.5238, "num_input_tokens_seen": 166328276, "step": 2653 }, { "epoch": 8.828618968386023, "loss": 0.7275412082672119, "loss_ce": 2.098976665365626e-06, "loss_iou": 0.28515625, "loss_num": 0.03173828125, "loss_xval": 0.7265625, "num_input_tokens_seen": 166328276, "step": 2653 }, { "epoch": 8.831946755407653, "grad_norm": 23.220767974853516, "learning_rate": 5e-06, "loss": 0.8706, "num_input_tokens_seen": 166391676, "step": 2654 }, { "epoch": 8.831946755407653, "loss": 0.9284038543701172, "loss_ce": 0.0009135938598774374, "loss_iou": 0.33203125, "loss_num": 0.052978515625, "loss_xval": 0.92578125, "num_input_tokens_seen": 166391676, "step": 2654 }, { "epoch": 8.835274542429284, "grad_norm": 17.233768463134766, "learning_rate": 5e-06, "loss": 0.3696, "num_input_tokens_seen": 166454156, "step": 2655 }, { "epoch": 8.835274542429284, "loss": 0.5088720321655273, "loss_ce": 0.00044921974767930806, "loss_iou": 0.14453125, "loss_num": 0.043701171875, "loss_xval": 0.5078125, "num_input_tokens_seen": 166454156, "step": 2655 }, { "epoch": 8.838602329450914, "grad_norm": 9.756841659545898, "learning_rate": 5e-06, "loss": 0.4684, "num_input_tokens_seen": 166514516, "step": 2656 }, { "epoch": 8.838602329450914, "loss": 0.43786710500717163, "loss_ce": 8.842661145536113e-07, "loss_iou": 0.134765625, "loss_num": 0.033447265625, "loss_xval": 0.4375, "num_input_tokens_seen": 166514516, "step": 2656 }, { "epoch": 8.841930116472545, "grad_norm": 5.191752910614014, "learning_rate": 5e-06, "loss": 0.4136, "num_input_tokens_seen": 166576612, "step": 2657 }, { "epoch": 8.841930116472545, "loss": 0.3831273913383484, "loss_ce": 7.076036126818508e-05, "loss_iou": 0.09619140625, "loss_num": 0.0380859375, "loss_xval": 0.3828125, "num_input_tokens_seen": 166576612, "step": 2657 }, { "epoch": 8.845257903494176, "grad_norm": 24.358196258544922, "learning_rate": 5e-06, "loss": 0.5497, "num_input_tokens_seen": 166639664, "step": 2658 }, { "epoch": 8.845257903494176, "loss": 0.4469009041786194, "loss_ce": 1.508597279098467e-06, "loss_iou": 0.1357421875, "loss_num": 0.034912109375, "loss_xval": 0.447265625, "num_input_tokens_seen": 166639664, "step": 2658 }, { "epoch": 8.848585690515806, "grad_norm": 37.56662368774414, "learning_rate": 5e-06, "loss": 0.6608, "num_input_tokens_seen": 166701416, "step": 2659 }, { "epoch": 8.848585690515806, "loss": 0.594239354133606, "loss_ce": 1.079844196283375e-06, "loss_iou": 0.193359375, "loss_num": 0.041748046875, "loss_xval": 0.59375, "num_input_tokens_seen": 166701416, "step": 2659 }, { "epoch": 8.851913477537437, "grad_norm": 28.871477127075195, "learning_rate": 5e-06, "loss": 0.5801, "num_input_tokens_seen": 166764852, "step": 2660 }, { "epoch": 8.851913477537437, "loss": 0.471131294965744, "loss_ce": 9.28295037283533e-07, "loss_iou": 0.154296875, "loss_num": 0.0322265625, "loss_xval": 0.470703125, "num_input_tokens_seen": 166764852, "step": 2660 }, { "epoch": 8.855241264559067, "grad_norm": 16.83323097229004, "learning_rate": 5e-06, "loss": 0.5827, "num_input_tokens_seen": 166827752, "step": 2661 }, { "epoch": 8.855241264559067, "loss": 0.6159947514533997, "loss_ce": 2.7999261874356307e-05, "loss_iou": 0.2265625, "loss_num": 0.032470703125, "loss_xval": 0.6171875, "num_input_tokens_seen": 166827752, "step": 2661 }, { "epoch": 8.858569051580698, "grad_norm": 9.028475761413574, "learning_rate": 5e-06, "loss": 0.6174, "num_input_tokens_seen": 166889508, "step": 2662 }, { "epoch": 8.858569051580698, "loss": 0.5328382849693298, "loss_ce": 1.3875932154405746e-06, "loss_iou": 0.173828125, "loss_num": 0.037109375, "loss_xval": 0.53125, "num_input_tokens_seen": 166889508, "step": 2662 }, { "epoch": 8.861896838602329, "grad_norm": 13.672860145568848, "learning_rate": 5e-06, "loss": 0.3277, "num_input_tokens_seen": 166952464, "step": 2663 }, { "epoch": 8.861896838602329, "loss": 0.2861974537372589, "loss_ce": 0.00018671242287382483, "loss_iou": 0.0947265625, "loss_num": 0.019287109375, "loss_xval": 0.28515625, "num_input_tokens_seen": 166952464, "step": 2663 }, { "epoch": 8.86522462562396, "grad_norm": 23.795124053955078, "learning_rate": 5e-06, "loss": 0.64, "num_input_tokens_seen": 167015144, "step": 2664 }, { "epoch": 8.86522462562396, "loss": 0.5717785954475403, "loss_ce": 0.000367454020306468, "loss_iou": 0.1875, "loss_num": 0.0390625, "loss_xval": 0.5703125, "num_input_tokens_seen": 167015144, "step": 2664 }, { "epoch": 8.86855241264559, "grad_norm": 24.243633270263672, "learning_rate": 5e-06, "loss": 0.3329, "num_input_tokens_seen": 167077716, "step": 2665 }, { "epoch": 8.86855241264559, "loss": 0.449888676404953, "loss_ce": 0.00012058133143000305, "loss_iou": 0.162109375, "loss_num": 0.0250244140625, "loss_xval": 0.44921875, "num_input_tokens_seen": 167077716, "step": 2665 }, { "epoch": 8.87188019966722, "grad_norm": 43.51081466674805, "learning_rate": 5e-06, "loss": 0.6884, "num_input_tokens_seen": 167140764, "step": 2666 }, { "epoch": 8.87188019966722, "loss": 0.9655192494392395, "loss_ce": 0.00012614778825081885, "loss_iou": 0.328125, "loss_num": 0.061279296875, "loss_xval": 0.96484375, "num_input_tokens_seen": 167140764, "step": 2666 }, { "epoch": 8.875207986688851, "grad_norm": 13.640003204345703, "learning_rate": 5e-06, "loss": 0.3429, "num_input_tokens_seen": 167203048, "step": 2667 }, { "epoch": 8.875207986688851, "loss": 0.3342365026473999, "loss_ce": 7.994059160409961e-06, "loss_iou": 0.12451171875, "loss_num": 0.01708984375, "loss_xval": 0.333984375, "num_input_tokens_seen": 167203048, "step": 2667 }, { "epoch": 8.878535773710482, "grad_norm": 18.239809036254883, "learning_rate": 5e-06, "loss": 0.6821, "num_input_tokens_seen": 167263140, "step": 2668 }, { "epoch": 8.878535773710482, "loss": 0.7690868973731995, "loss_ce": 4.393236304167658e-05, "loss_iou": 0.279296875, "loss_num": 0.042236328125, "loss_xval": 0.76953125, "num_input_tokens_seen": 167263140, "step": 2668 }, { "epoch": 8.881863560732112, "grad_norm": 17.22405433654785, "learning_rate": 5e-06, "loss": 0.5247, "num_input_tokens_seen": 167326224, "step": 2669 }, { "epoch": 8.881863560732112, "loss": 0.3908403515815735, "loss_ce": 1.7319248399871867e-06, "loss_iou": 0.1494140625, "loss_num": 0.0184326171875, "loss_xval": 0.390625, "num_input_tokens_seen": 167326224, "step": 2669 }, { "epoch": 8.885191347753743, "grad_norm": 11.331925392150879, "learning_rate": 5e-06, "loss": 0.4538, "num_input_tokens_seen": 167388244, "step": 2670 }, { "epoch": 8.885191347753743, "loss": 0.3231828510761261, "loss_ce": 0.0005510023911483586, "loss_iou": 0.10888671875, "loss_num": 0.0208740234375, "loss_xval": 0.322265625, "num_input_tokens_seen": 167388244, "step": 2670 }, { "epoch": 8.888519134775374, "grad_norm": 24.963266372680664, "learning_rate": 5e-06, "loss": 0.4951, "num_input_tokens_seen": 167451944, "step": 2671 }, { "epoch": 8.888519134775374, "loss": 0.5492954254150391, "loss_ce": 0.0008335388265550137, "loss_iou": 0.2236328125, "loss_num": 0.020263671875, "loss_xval": 0.546875, "num_input_tokens_seen": 167451944, "step": 2671 }, { "epoch": 8.891846921797004, "grad_norm": 7.833455562591553, "learning_rate": 5e-06, "loss": 0.5269, "num_input_tokens_seen": 167514196, "step": 2672 }, { "epoch": 8.891846921797004, "loss": 0.38208454847335815, "loss_ce": 0.001164114917628467, "loss_iou": 0.11376953125, "loss_num": 0.0306396484375, "loss_xval": 0.380859375, "num_input_tokens_seen": 167514196, "step": 2672 }, { "epoch": 8.895174708818635, "grad_norm": 7.524612903594971, "learning_rate": 5e-06, "loss": 0.4999, "num_input_tokens_seen": 167576088, "step": 2673 }, { "epoch": 8.895174708818635, "loss": 0.7075351476669312, "loss_ce": 0.00010719084821175784, "loss_iou": 0.267578125, "loss_num": 0.0341796875, "loss_xval": 0.70703125, "num_input_tokens_seen": 167576088, "step": 2673 }, { "epoch": 8.898502495840265, "grad_norm": 21.40434455871582, "learning_rate": 5e-06, "loss": 0.8159, "num_input_tokens_seen": 167640548, "step": 2674 }, { "epoch": 8.898502495840265, "loss": 1.0485985279083252, "loss_ce": 0.0005029482999816537, "loss_iou": 0.373046875, "loss_num": 0.060302734375, "loss_xval": 1.046875, "num_input_tokens_seen": 167640548, "step": 2674 }, { "epoch": 8.901830282861896, "grad_norm": 47.54486083984375, "learning_rate": 5e-06, "loss": 0.4583, "num_input_tokens_seen": 167703304, "step": 2675 }, { "epoch": 8.901830282861896, "loss": 0.46423467993736267, "loss_ce": 1.285072585233138e-06, "loss_iou": 0.15625, "loss_num": 0.0301513671875, "loss_xval": 0.46484375, "num_input_tokens_seen": 167703304, "step": 2675 }, { "epoch": 8.905158069883527, "grad_norm": 10.597177505493164, "learning_rate": 5e-06, "loss": 0.292, "num_input_tokens_seen": 167765560, "step": 2676 }, { "epoch": 8.905158069883527, "loss": 0.3576675355434418, "loss_ce": 1.5191774309641914e-06, "loss_iou": 0.146484375, "loss_num": 0.012939453125, "loss_xval": 0.357421875, "num_input_tokens_seen": 167765560, "step": 2676 }, { "epoch": 8.908485856905157, "grad_norm": 12.820937156677246, "learning_rate": 5e-06, "loss": 0.609, "num_input_tokens_seen": 167828376, "step": 2677 }, { "epoch": 8.908485856905157, "loss": 0.5501753091812134, "loss_ce": 4.429482942214236e-06, "loss_iou": 0.2197265625, "loss_num": 0.022216796875, "loss_xval": 0.55078125, "num_input_tokens_seen": 167828376, "step": 2677 }, { "epoch": 8.911813643926788, "grad_norm": 13.128549575805664, "learning_rate": 5e-06, "loss": 0.5751, "num_input_tokens_seen": 167890860, "step": 2678 }, { "epoch": 8.911813643926788, "loss": 0.3802506625652313, "loss_ce": 1.637358309380943e-06, "loss_iou": 0.09619140625, "loss_num": 0.03759765625, "loss_xval": 0.380859375, "num_input_tokens_seen": 167890860, "step": 2678 }, { "epoch": 8.915141430948418, "grad_norm": 8.900168418884277, "learning_rate": 5e-06, "loss": 0.5614, "num_input_tokens_seen": 167953844, "step": 2679 }, { "epoch": 8.915141430948418, "loss": 0.34907323122024536, "loss_ce": 1.3173314073355868e-05, "loss_iou": 0.12109375, "loss_num": 0.0213623046875, "loss_xval": 0.349609375, "num_input_tokens_seen": 167953844, "step": 2679 }, { "epoch": 8.918469217970049, "grad_norm": 14.629141807556152, "learning_rate": 5e-06, "loss": 0.6464, "num_input_tokens_seen": 168017792, "step": 2680 }, { "epoch": 8.918469217970049, "loss": 0.759157657623291, "loss_ce": 2.3447612420568475e-06, "loss_iou": 0.298828125, "loss_num": 0.031982421875, "loss_xval": 0.7578125, "num_input_tokens_seen": 168017792, "step": 2680 }, { "epoch": 8.92179700499168, "grad_norm": 21.198070526123047, "learning_rate": 5e-06, "loss": 0.674, "num_input_tokens_seen": 168081864, "step": 2681 }, { "epoch": 8.92179700499168, "loss": 0.6480833292007446, "loss_ce": 4.2603729525581e-05, "loss_iou": 0.2470703125, "loss_num": 0.03076171875, "loss_xval": 0.6484375, "num_input_tokens_seen": 168081864, "step": 2681 }, { "epoch": 8.92512479201331, "grad_norm": 19.035846710205078, "learning_rate": 5e-06, "loss": 0.587, "num_input_tokens_seen": 168144056, "step": 2682 }, { "epoch": 8.92512479201331, "loss": 0.5480601787567139, "loss_ce": 0.0008799948263913393, "loss_iou": 0.1884765625, "loss_num": 0.0341796875, "loss_xval": 0.546875, "num_input_tokens_seen": 168144056, "step": 2682 }, { "epoch": 8.928452579034941, "grad_norm": 9.774259567260742, "learning_rate": 5e-06, "loss": 0.6753, "num_input_tokens_seen": 168207732, "step": 2683 }, { "epoch": 8.928452579034941, "loss": 0.670180082321167, "loss_ce": 1.4096419363340829e-05, "loss_iou": 0.259765625, "loss_num": 0.0302734375, "loss_xval": 0.671875, "num_input_tokens_seen": 168207732, "step": 2683 }, { "epoch": 8.931780366056572, "grad_norm": 30.552696228027344, "learning_rate": 5e-06, "loss": 0.5897, "num_input_tokens_seen": 168271188, "step": 2684 }, { "epoch": 8.931780366056572, "loss": 0.6347341537475586, "loss_ce": 9.062641765922308e-05, "loss_iou": 0.25, "loss_num": 0.02685546875, "loss_xval": 0.6328125, "num_input_tokens_seen": 168271188, "step": 2684 }, { "epoch": 8.935108153078202, "grad_norm": 30.102340698242188, "learning_rate": 5e-06, "loss": 0.3753, "num_input_tokens_seen": 168333400, "step": 2685 }, { "epoch": 8.935108153078202, "loss": 0.4498347043991089, "loss_ce": 0.0008906264556571841, "loss_iou": 0.1904296875, "loss_num": 0.01373291015625, "loss_xval": 0.44921875, "num_input_tokens_seen": 168333400, "step": 2685 }, { "epoch": 8.938435940099833, "grad_norm": 11.782821655273438, "learning_rate": 5e-06, "loss": 0.4484, "num_input_tokens_seen": 168396540, "step": 2686 }, { "epoch": 8.938435940099833, "loss": 0.5144100189208984, "loss_ce": 5.754771791544044e-06, "loss_iou": 0.181640625, "loss_num": 0.0302734375, "loss_xval": 0.515625, "num_input_tokens_seen": 168396540, "step": 2686 }, { "epoch": 8.941763727121465, "grad_norm": 9.617989540100098, "learning_rate": 5e-06, "loss": 0.8457, "num_input_tokens_seen": 168460464, "step": 2687 }, { "epoch": 8.941763727121465, "loss": 0.9661890864372253, "loss_ce": 2.5237577574444003e-06, "loss_iou": 0.32421875, "loss_num": 0.06396484375, "loss_xval": 0.96484375, "num_input_tokens_seen": 168460464, "step": 2687 }, { "epoch": 8.945091514143094, "grad_norm": 11.526954650878906, "learning_rate": 5e-06, "loss": 0.6394, "num_input_tokens_seen": 168524204, "step": 2688 }, { "epoch": 8.945091514143094, "loss": 0.7776308059692383, "loss_ce": 4.2937102989526466e-05, "loss_iou": 0.26953125, "loss_num": 0.0478515625, "loss_xval": 0.77734375, "num_input_tokens_seen": 168524204, "step": 2688 }, { "epoch": 8.948419301164726, "grad_norm": 22.83611297607422, "learning_rate": 5e-06, "loss": 0.4675, "num_input_tokens_seen": 168586936, "step": 2689 }, { "epoch": 8.948419301164726, "loss": 0.5030264854431152, "loss_ce": 5.271287591313012e-06, "loss_iou": 0.2060546875, "loss_num": 0.0179443359375, "loss_xval": 0.50390625, "num_input_tokens_seen": 168586936, "step": 2689 }, { "epoch": 8.951747088186355, "grad_norm": 30.83562660217285, "learning_rate": 5e-06, "loss": 0.7406, "num_input_tokens_seen": 168648856, "step": 2690 }, { "epoch": 8.951747088186355, "loss": 0.7299227118492126, "loss_ce": 0.0006746797007508576, "loss_iou": 0.287109375, "loss_num": 0.0308837890625, "loss_xval": 0.73046875, "num_input_tokens_seen": 168648856, "step": 2690 }, { "epoch": 8.955074875207988, "grad_norm": 17.343488693237305, "learning_rate": 5e-06, "loss": 0.4942, "num_input_tokens_seen": 168711500, "step": 2691 }, { "epoch": 8.955074875207988, "loss": 0.5665715336799622, "loss_ce": 0.00043992584687657654, "loss_iou": 0.220703125, "loss_num": 0.0247802734375, "loss_xval": 0.56640625, "num_input_tokens_seen": 168711500, "step": 2691 }, { "epoch": 8.958402662229616, "grad_norm": 11.925487518310547, "learning_rate": 5e-06, "loss": 0.6318, "num_input_tokens_seen": 168774868, "step": 2692 }, { "epoch": 8.958402662229616, "loss": 0.7759591341018677, "loss_ce": 0.00032440933864563704, "loss_iou": 0.306640625, "loss_num": 0.0322265625, "loss_xval": 0.77734375, "num_input_tokens_seen": 168774868, "step": 2692 }, { "epoch": 8.961730449251249, "grad_norm": 20.610261917114258, "learning_rate": 5e-06, "loss": 0.6392, "num_input_tokens_seen": 168838240, "step": 2693 }, { "epoch": 8.961730449251249, "loss": 0.8284150958061218, "loss_ce": 0.001754939672537148, "loss_iou": 0.33984375, "loss_num": 0.029541015625, "loss_xval": 0.828125, "num_input_tokens_seen": 168838240, "step": 2693 }, { "epoch": 8.965058236272878, "grad_norm": 97.65433502197266, "learning_rate": 5e-06, "loss": 0.6372, "num_input_tokens_seen": 168900348, "step": 2694 }, { "epoch": 8.965058236272878, "loss": 0.496358186006546, "loss_ce": 0.0002644681080710143, "loss_iou": 0.1455078125, "loss_num": 0.040771484375, "loss_xval": 0.49609375, "num_input_tokens_seen": 168900348, "step": 2694 }, { "epoch": 8.96838602329451, "grad_norm": 17.934450149536133, "learning_rate": 5e-06, "loss": 0.6689, "num_input_tokens_seen": 168961824, "step": 2695 }, { "epoch": 8.96838602329451, "loss": 0.5545665621757507, "loss_ce": 1.1236714954065974e-06, "loss_iou": 0.1884765625, "loss_num": 0.035400390625, "loss_xval": 0.5546875, "num_input_tokens_seen": 168961824, "step": 2695 }, { "epoch": 8.971713810316139, "grad_norm": 10.624424934387207, "learning_rate": 5e-06, "loss": 0.6765, "num_input_tokens_seen": 169024744, "step": 2696 }, { "epoch": 8.971713810316139, "loss": 0.5974724292755127, "loss_ce": 6.032624878571369e-05, "loss_iou": 0.21484375, "loss_num": 0.03369140625, "loss_xval": 0.59765625, "num_input_tokens_seen": 169024744, "step": 2696 }, { "epoch": 8.975041597337771, "grad_norm": 12.104410171508789, "learning_rate": 5e-06, "loss": 0.557, "num_input_tokens_seen": 169087820, "step": 2697 }, { "epoch": 8.975041597337771, "loss": 0.4194393754005432, "loss_ce": 0.002019947860389948, "loss_iou": 0.16796875, "loss_num": 0.0164794921875, "loss_xval": 0.41796875, "num_input_tokens_seen": 169087820, "step": 2697 }, { "epoch": 8.9783693843594, "grad_norm": 13.122030258178711, "learning_rate": 5e-06, "loss": 0.7816, "num_input_tokens_seen": 169149692, "step": 2698 }, { "epoch": 8.9783693843594, "loss": 0.3458264470100403, "loss_ce": 1.2438408703019377e-06, "loss_iou": 0.1064453125, "loss_num": 0.026611328125, "loss_xval": 0.345703125, "num_input_tokens_seen": 169149692, "step": 2698 }, { "epoch": 8.981697171381033, "grad_norm": 18.86631202697754, "learning_rate": 5e-06, "loss": 0.6563, "num_input_tokens_seen": 169212824, "step": 2699 }, { "epoch": 8.981697171381033, "loss": 0.6881248950958252, "loss_ce": 0.0001365854259347543, "loss_iou": 0.2216796875, "loss_num": 0.048828125, "loss_xval": 0.6875, "num_input_tokens_seen": 169212824, "step": 2699 }, { "epoch": 8.985024958402661, "grad_norm": 11.301755905151367, "learning_rate": 5e-06, "loss": 0.5444, "num_input_tokens_seen": 169276872, "step": 2700 }, { "epoch": 8.985024958402661, "loss": 0.6372315287590027, "loss_ce": 2.4507255147909746e-05, "loss_iou": 0.244140625, "loss_num": 0.029541015625, "loss_xval": 0.63671875, "num_input_tokens_seen": 169276872, "step": 2700 }, { "epoch": 8.988352745424294, "grad_norm": 5.262257099151611, "learning_rate": 5e-06, "loss": 0.4703, "num_input_tokens_seen": 169340160, "step": 2701 }, { "epoch": 8.988352745424294, "loss": 0.4246836304664612, "loss_ce": 1.0118308182427427e-06, "loss_iou": 0.1357421875, "loss_num": 0.0306396484375, "loss_xval": 0.423828125, "num_input_tokens_seen": 169340160, "step": 2701 }, { "epoch": 8.991680532445923, "grad_norm": 7.663944244384766, "learning_rate": 5e-06, "loss": 0.4597, "num_input_tokens_seen": 169402848, "step": 2702 }, { "epoch": 8.991680532445923, "loss": 0.30487698316574097, "loss_ce": 6.40138841845328e-06, "loss_iou": 0.09033203125, "loss_num": 0.02490234375, "loss_xval": 0.3046875, "num_input_tokens_seen": 169402848, "step": 2702 }, { "epoch": 8.995008319467555, "grad_norm": 6.93941068649292, "learning_rate": 5e-06, "loss": 0.4029, "num_input_tokens_seen": 169464620, "step": 2703 }, { "epoch": 8.995008319467555, "loss": 0.4304252564907074, "loss_ce": 5.324491212377325e-06, "loss_iou": 0.10986328125, "loss_num": 0.042236328125, "loss_xval": 0.4296875, "num_input_tokens_seen": 169464620, "step": 2703 }, { "epoch": 8.998336106489184, "grad_norm": 7.04376220703125, "learning_rate": 5e-06, "loss": 0.6619, "num_input_tokens_seen": 169528172, "step": 2704 }, { "epoch": 8.998336106489184, "loss": 0.6223162412643433, "loss_ce": 1.7747397578204982e-06, "loss_iou": 0.24609375, "loss_num": 0.0262451171875, "loss_xval": 0.62109375, "num_input_tokens_seen": 169528172, "step": 2704 }, { "epoch": 8.998336106489184, "loss": 0.959973156452179, "loss_ce": 1.2224787496961653e-05, "loss_iou": 0.3203125, "loss_num": 0.0634765625, "loss_xval": 0.9609375, "num_input_tokens_seen": 169558832, "step": 2704 }, { "epoch": 9.001663893510816, "grad_norm": 13.652216911315918, "learning_rate": 5e-06, "loss": 0.7842, "num_input_tokens_seen": 169590728, "step": 2705 }, { "epoch": 9.001663893510816, "loss": 0.6083753705024719, "loss_ce": 0.0002210831007687375, "loss_iou": 0.25390625, "loss_num": 0.020263671875, "loss_xval": 0.609375, "num_input_tokens_seen": 169590728, "step": 2705 }, { "epoch": 9.004991680532447, "grad_norm": 11.147074699401855, "learning_rate": 5e-06, "loss": 0.3023, "num_input_tokens_seen": 169652268, "step": 2706 }, { "epoch": 9.004991680532447, "loss": 0.19669443368911743, "loss_ce": 3.915288107236847e-05, "loss_iou": 0.03955078125, "loss_num": 0.0235595703125, "loss_xval": 0.1962890625, "num_input_tokens_seen": 169652268, "step": 2706 }, { "epoch": 9.008319467554077, "grad_norm": 7.115983963012695, "learning_rate": 5e-06, "loss": 0.4782, "num_input_tokens_seen": 169714752, "step": 2707 }, { "epoch": 9.008319467554077, "loss": 0.42755308747291565, "loss_ce": 1.8141753344025346e-06, "loss_iou": 0.1376953125, "loss_num": 0.0303955078125, "loss_xval": 0.427734375, "num_input_tokens_seen": 169714752, "step": 2707 }, { "epoch": 9.011647254575708, "grad_norm": 15.33203411102295, "learning_rate": 5e-06, "loss": 0.6444, "num_input_tokens_seen": 169778236, "step": 2708 }, { "epoch": 9.011647254575708, "loss": 0.8031663298606873, "loss_ce": 0.0004319301515351981, "loss_iou": 0.310546875, "loss_num": 0.03662109375, "loss_xval": 0.8046875, "num_input_tokens_seen": 169778236, "step": 2708 }, { "epoch": 9.014975041597339, "grad_norm": 23.726062774658203, "learning_rate": 5e-06, "loss": 0.578, "num_input_tokens_seen": 169841652, "step": 2709 }, { "epoch": 9.014975041597339, "loss": 0.5748501420021057, "loss_ce": 0.0003872677334584296, "loss_iou": 0.21484375, "loss_num": 0.02880859375, "loss_xval": 0.57421875, "num_input_tokens_seen": 169841652, "step": 2709 }, { "epoch": 9.01830282861897, "grad_norm": 15.519218444824219, "learning_rate": 5e-06, "loss": 0.6462, "num_input_tokens_seen": 169904084, "step": 2710 }, { "epoch": 9.01830282861897, "loss": 0.8565700054168701, "loss_ce": 0.0005824435502290726, "loss_iou": 0.2734375, "loss_num": 0.0615234375, "loss_xval": 0.85546875, "num_input_tokens_seen": 169904084, "step": 2710 }, { "epoch": 9.0216306156406, "grad_norm": 15.396018028259277, "learning_rate": 5e-06, "loss": 0.5181, "num_input_tokens_seen": 169966376, "step": 2711 }, { "epoch": 9.0216306156406, "loss": 0.5503358840942383, "loss_ce": 4.292500307201408e-05, "loss_iou": 0.18359375, "loss_num": 0.036376953125, "loss_xval": 0.55078125, "num_input_tokens_seen": 169966376, "step": 2711 }, { "epoch": 9.02495840266223, "grad_norm": 35.249698638916016, "learning_rate": 5e-06, "loss": 0.7318, "num_input_tokens_seen": 170029028, "step": 2712 }, { "epoch": 9.02495840266223, "loss": 0.7787384986877441, "loss_ce": 0.00029614046798087656, "loss_iou": 0.314453125, "loss_num": 0.0301513671875, "loss_xval": 0.77734375, "num_input_tokens_seen": 170029028, "step": 2712 }, { "epoch": 9.028286189683861, "grad_norm": 39.24230194091797, "learning_rate": 5e-06, "loss": 0.4301, "num_input_tokens_seen": 170091936, "step": 2713 }, { "epoch": 9.028286189683861, "loss": 0.2884552478790283, "loss_ce": 3.0876740311214235e-06, "loss_iou": 0.09228515625, "loss_num": 0.0206298828125, "loss_xval": 0.2890625, "num_input_tokens_seen": 170091936, "step": 2713 }, { "epoch": 9.031613976705492, "grad_norm": 18.050870895385742, "learning_rate": 5e-06, "loss": 0.2986, "num_input_tokens_seen": 170151076, "step": 2714 }, { "epoch": 9.031613976705492, "loss": 0.34296002984046936, "loss_ce": 3.4835511542041786e-06, "loss_iou": 0.10302734375, "loss_num": 0.02734375, "loss_xval": 0.34375, "num_input_tokens_seen": 170151076, "step": 2714 }, { "epoch": 9.034941763727122, "grad_norm": 12.596750259399414, "learning_rate": 5e-06, "loss": 0.5173, "num_input_tokens_seen": 170215504, "step": 2715 }, { "epoch": 9.034941763727122, "loss": 0.6197566986083984, "loss_ce": 5.703982878912939e-06, "loss_iou": 0.2333984375, "loss_num": 0.0303955078125, "loss_xval": 0.62109375, "num_input_tokens_seen": 170215504, "step": 2715 }, { "epoch": 9.038269550748753, "grad_norm": 27.407209396362305, "learning_rate": 5e-06, "loss": 0.5802, "num_input_tokens_seen": 170277724, "step": 2716 }, { "epoch": 9.038269550748753, "loss": 0.4334884285926819, "loss_ce": 0.0001388303644489497, "loss_iou": 0.150390625, "loss_num": 0.026611328125, "loss_xval": 0.43359375, "num_input_tokens_seen": 170277724, "step": 2716 }, { "epoch": 9.041597337770384, "grad_norm": 20.459091186523438, "learning_rate": 5e-06, "loss": 0.719, "num_input_tokens_seen": 170341188, "step": 2717 }, { "epoch": 9.041597337770384, "loss": 0.7030481696128845, "loss_ce": 0.00041144975693896413, "loss_iou": 0.29296875, "loss_num": 0.0230712890625, "loss_xval": 0.703125, "num_input_tokens_seen": 170341188, "step": 2717 }, { "epoch": 9.044925124792014, "grad_norm": 13.981463432312012, "learning_rate": 5e-06, "loss": 0.483, "num_input_tokens_seen": 170403872, "step": 2718 }, { "epoch": 9.044925124792014, "loss": 0.5364397764205933, "loss_ce": 1.7581903648533626e-06, "loss_iou": 0.208984375, "loss_num": 0.0238037109375, "loss_xval": 0.53515625, "num_input_tokens_seen": 170403872, "step": 2718 }, { "epoch": 9.048252911813645, "grad_norm": 11.303633689880371, "learning_rate": 5e-06, "loss": 0.4914, "num_input_tokens_seen": 170466996, "step": 2719 }, { "epoch": 9.048252911813645, "loss": 0.444522887468338, "loss_ce": 0.0001259240525541827, "loss_iou": 0.169921875, "loss_num": 0.0206298828125, "loss_xval": 0.4453125, "num_input_tokens_seen": 170466996, "step": 2719 }, { "epoch": 9.051580698835275, "grad_norm": 17.028438568115234, "learning_rate": 5e-06, "loss": 0.702, "num_input_tokens_seen": 170531136, "step": 2720 }, { "epoch": 9.051580698835275, "loss": 1.0484275817871094, "loss_ce": 0.0010642717825248837, "loss_iou": 0.376953125, "loss_num": 0.05859375, "loss_xval": 1.046875, "num_input_tokens_seen": 170531136, "step": 2720 }, { "epoch": 9.054908485856906, "grad_norm": 6.848005294799805, "learning_rate": 5e-06, "loss": 0.2079, "num_input_tokens_seen": 170592420, "step": 2721 }, { "epoch": 9.054908485856906, "loss": 0.14693525433540344, "loss_ce": 5.415128543972969e-05, "loss_iou": 0.0135498046875, "loss_num": 0.02392578125, "loss_xval": 0.146484375, "num_input_tokens_seen": 170592420, "step": 2721 }, { "epoch": 9.058236272878537, "grad_norm": 10.784342765808105, "learning_rate": 5e-06, "loss": 0.7395, "num_input_tokens_seen": 170656984, "step": 2722 }, { "epoch": 9.058236272878537, "loss": 0.8115100860595703, "loss_ce": 0.00023073975171428174, "loss_iou": 0.326171875, "loss_num": 0.031494140625, "loss_xval": 0.8125, "num_input_tokens_seen": 170656984, "step": 2722 }, { "epoch": 9.061564059900167, "grad_norm": 17.1337947845459, "learning_rate": 5e-06, "loss": 0.6539, "num_input_tokens_seen": 170719468, "step": 2723 }, { "epoch": 9.061564059900167, "loss": 0.8414515256881714, "loss_ce": 0.0006311996257863939, "loss_iou": 0.3203125, "loss_num": 0.039794921875, "loss_xval": 0.83984375, "num_input_tokens_seen": 170719468, "step": 2723 }, { "epoch": 9.064891846921798, "grad_norm": 18.44516944885254, "learning_rate": 5e-06, "loss": 0.5409, "num_input_tokens_seen": 170783296, "step": 2724 }, { "epoch": 9.064891846921798, "loss": 0.6411939859390259, "loss_ce": 0.00020275494898669422, "loss_iou": 0.25, "loss_num": 0.0281982421875, "loss_xval": 0.640625, "num_input_tokens_seen": 170783296, "step": 2724 }, { "epoch": 9.068219633943428, "grad_norm": 16.612060546875, "learning_rate": 5e-06, "loss": 0.5058, "num_input_tokens_seen": 170846008, "step": 2725 }, { "epoch": 9.068219633943428, "loss": 0.4974035322666168, "loss_ce": 2.8039890821673907e-05, "loss_iou": 0.1982421875, "loss_num": 0.0201416015625, "loss_xval": 0.498046875, "num_input_tokens_seen": 170846008, "step": 2725 }, { "epoch": 9.071547420965059, "grad_norm": 10.429084777832031, "learning_rate": 5e-06, "loss": 0.5718, "num_input_tokens_seen": 170909784, "step": 2726 }, { "epoch": 9.071547420965059, "loss": 0.584109902381897, "loss_ce": 3.4446729841874912e-06, "loss_iou": 0.2109375, "loss_num": 0.03271484375, "loss_xval": 0.5859375, "num_input_tokens_seen": 170909784, "step": 2726 }, { "epoch": 9.07487520798669, "grad_norm": 8.421403884887695, "learning_rate": 5e-06, "loss": 0.2619, "num_input_tokens_seen": 170971684, "step": 2727 }, { "epoch": 9.07487520798669, "loss": 0.1774003505706787, "loss_ce": 1.6690398751961766e-06, "loss_iou": 0.059814453125, "loss_num": 0.01153564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 170971684, "step": 2727 }, { "epoch": 9.07820299500832, "grad_norm": 13.845110893249512, "learning_rate": 5e-06, "loss": 0.5743, "num_input_tokens_seen": 171034948, "step": 2728 }, { "epoch": 9.07820299500832, "loss": 0.7243666052818298, "loss_ce": 0.0004896331811323762, "loss_iou": 0.2578125, "loss_num": 0.041748046875, "loss_xval": 0.72265625, "num_input_tokens_seen": 171034948, "step": 2728 }, { "epoch": 9.081530782029951, "grad_norm": 14.171822547912598, "learning_rate": 5e-06, "loss": 0.4901, "num_input_tokens_seen": 171097008, "step": 2729 }, { "epoch": 9.081530782029951, "loss": 0.6414907574653625, "loss_ce": 1.1279277714493219e-05, "loss_iou": 0.2314453125, "loss_num": 0.03564453125, "loss_xval": 0.640625, "num_input_tokens_seen": 171097008, "step": 2729 }, { "epoch": 9.084858569051582, "grad_norm": 13.765453338623047, "learning_rate": 5e-06, "loss": 0.53, "num_input_tokens_seen": 171160476, "step": 2730 }, { "epoch": 9.084858569051582, "loss": 0.6902370452880859, "loss_ce": 5.14911298523657e-05, "loss_iou": 0.2578125, "loss_num": 0.035400390625, "loss_xval": 0.69140625, "num_input_tokens_seen": 171160476, "step": 2730 }, { "epoch": 9.088186356073212, "grad_norm": 13.126117706298828, "learning_rate": 5e-06, "loss": 0.5603, "num_input_tokens_seen": 171222940, "step": 2731 }, { "epoch": 9.088186356073212, "loss": 0.7285605072975159, "loss_ce": 4.486861143959686e-05, "loss_iou": 0.271484375, "loss_num": 0.036865234375, "loss_xval": 0.7265625, "num_input_tokens_seen": 171222940, "step": 2731 }, { "epoch": 9.091514143094843, "grad_norm": 16.399322509765625, "learning_rate": 5e-06, "loss": 0.3682, "num_input_tokens_seen": 171284808, "step": 2732 }, { "epoch": 9.091514143094843, "loss": 0.4369552433490753, "loss_ce": 4.553273356577847e-06, "loss_iou": 0.1796875, "loss_num": 0.01556396484375, "loss_xval": 0.4375, "num_input_tokens_seen": 171284808, "step": 2732 }, { "epoch": 9.094841930116473, "grad_norm": 18.18143081665039, "learning_rate": 5e-06, "loss": 0.6586, "num_input_tokens_seen": 171345776, "step": 2733 }, { "epoch": 9.094841930116473, "loss": 0.9724295735359192, "loss_ce": 1.7464280972490087e-05, "loss_iou": 0.34765625, "loss_num": 0.05517578125, "loss_xval": 0.97265625, "num_input_tokens_seen": 171345776, "step": 2733 }, { "epoch": 9.098169717138104, "grad_norm": 14.916769981384277, "learning_rate": 5e-06, "loss": 0.3805, "num_input_tokens_seen": 171408280, "step": 2734 }, { "epoch": 9.098169717138104, "loss": 0.4874306917190552, "loss_ce": 0.0003701338719110936, "loss_iou": 0.1806640625, "loss_num": 0.025146484375, "loss_xval": 0.486328125, "num_input_tokens_seen": 171408280, "step": 2734 }, { "epoch": 9.101497504159735, "grad_norm": 10.510838508605957, "learning_rate": 5e-06, "loss": 0.4826, "num_input_tokens_seen": 171470928, "step": 2735 }, { "epoch": 9.101497504159735, "loss": 0.5706886649131775, "loss_ce": 9.965518074750435e-06, "loss_iou": 0.2001953125, "loss_num": 0.034423828125, "loss_xval": 0.5703125, "num_input_tokens_seen": 171470928, "step": 2735 }, { "epoch": 9.104825291181365, "grad_norm": 8.274164199829102, "learning_rate": 5e-06, "loss": 0.5602, "num_input_tokens_seen": 171534656, "step": 2736 }, { "epoch": 9.104825291181365, "loss": 0.41030192375183105, "loss_ce": 2.3632663214812055e-05, "loss_iou": 0.146484375, "loss_num": 0.023681640625, "loss_xval": 0.41015625, "num_input_tokens_seen": 171534656, "step": 2736 }, { "epoch": 9.108153078202996, "grad_norm": 5.749737739562988, "learning_rate": 5e-06, "loss": 0.3749, "num_input_tokens_seen": 171596248, "step": 2737 }, { "epoch": 9.108153078202996, "loss": 0.4064052104949951, "loss_ce": 2.634807060530875e-06, "loss_iou": 0.1240234375, "loss_num": 0.03173828125, "loss_xval": 0.40625, "num_input_tokens_seen": 171596248, "step": 2737 }, { "epoch": 9.111480865224626, "grad_norm": 7.882076263427734, "learning_rate": 5e-06, "loss": 0.4144, "num_input_tokens_seen": 171658944, "step": 2738 }, { "epoch": 9.111480865224626, "loss": 0.3450947403907776, "loss_ce": 1.96473683899967e-06, "loss_iou": 0.10986328125, "loss_num": 0.02490234375, "loss_xval": 0.345703125, "num_input_tokens_seen": 171658944, "step": 2738 }, { "epoch": 9.114808652246257, "grad_norm": 23.37879180908203, "learning_rate": 5e-06, "loss": 0.6729, "num_input_tokens_seen": 171722740, "step": 2739 }, { "epoch": 9.114808652246257, "loss": 0.8356950879096985, "loss_ce": 1.7175163975480245e-06, "loss_iou": 0.306640625, "loss_num": 0.04443359375, "loss_xval": 0.8359375, "num_input_tokens_seen": 171722740, "step": 2739 }, { "epoch": 9.118136439267888, "grad_norm": 27.243497848510742, "learning_rate": 5e-06, "loss": 0.5185, "num_input_tokens_seen": 171785616, "step": 2740 }, { "epoch": 9.118136439267888, "loss": 0.4735810160636902, "loss_ce": 9.228238923242316e-06, "loss_iou": 0.16796875, "loss_num": 0.02734375, "loss_xval": 0.47265625, "num_input_tokens_seen": 171785616, "step": 2740 }, { "epoch": 9.121464226289518, "grad_norm": 31.74296760559082, "learning_rate": 5e-06, "loss": 0.629, "num_input_tokens_seen": 171847480, "step": 2741 }, { "epoch": 9.121464226289518, "loss": 0.4412848949432373, "loss_ce": 7.00203429460089e-07, "loss_iou": 0.1630859375, "loss_num": 0.02294921875, "loss_xval": 0.44140625, "num_input_tokens_seen": 171847480, "step": 2741 }, { "epoch": 9.124792013311149, "grad_norm": 41.023223876953125, "learning_rate": 5e-06, "loss": 0.5946, "num_input_tokens_seen": 171909972, "step": 2742 }, { "epoch": 9.124792013311149, "loss": 0.7530545592308044, "loss_ce": 2.8247814043425024e-06, "loss_iou": 0.28515625, "loss_num": 0.03662109375, "loss_xval": 0.75390625, "num_input_tokens_seen": 171909972, "step": 2742 }, { "epoch": 9.12811980033278, "grad_norm": 31.096200942993164, "learning_rate": 5e-06, "loss": 0.5958, "num_input_tokens_seen": 171972424, "step": 2743 }, { "epoch": 9.12811980033278, "loss": 0.7417131662368774, "loss_ce": 1.3965724065201357e-05, "loss_iou": 0.255859375, "loss_num": 0.0458984375, "loss_xval": 0.7421875, "num_input_tokens_seen": 171972424, "step": 2743 }, { "epoch": 9.13144758735441, "grad_norm": 15.148573875427246, "learning_rate": 5e-06, "loss": 0.5329, "num_input_tokens_seen": 172035764, "step": 2744 }, { "epoch": 9.13144758735441, "loss": 0.6782916784286499, "loss_ce": 7.997306965989992e-06, "loss_iou": 0.236328125, "loss_num": 0.041015625, "loss_xval": 0.6796875, "num_input_tokens_seen": 172035764, "step": 2744 }, { "epoch": 9.13477537437604, "grad_norm": 25.202062606811523, "learning_rate": 5e-06, "loss": 0.4139, "num_input_tokens_seen": 172098584, "step": 2745 }, { "epoch": 9.13477537437604, "loss": 0.42631563544273376, "loss_ce": 0.00022920458286534995, "loss_iou": 0.16015625, "loss_num": 0.02099609375, "loss_xval": 0.42578125, "num_input_tokens_seen": 172098584, "step": 2745 }, { "epoch": 9.138103161397671, "grad_norm": 38.29652786254883, "learning_rate": 5e-06, "loss": 0.568, "num_input_tokens_seen": 172160412, "step": 2746 }, { "epoch": 9.138103161397671, "loss": 0.7039110660552979, "loss_ce": 0.0002977612311951816, "loss_iou": 0.251953125, "loss_num": 0.039794921875, "loss_xval": 0.703125, "num_input_tokens_seen": 172160412, "step": 2746 }, { "epoch": 9.141430948419302, "grad_norm": 29.625240325927734, "learning_rate": 5e-06, "loss": 0.4914, "num_input_tokens_seen": 172222796, "step": 2747 }, { "epoch": 9.141430948419302, "loss": 0.2858741879463196, "loss_ce": 4.6526962250936776e-05, "loss_iou": 0.10986328125, "loss_num": 0.01318359375, "loss_xval": 0.28515625, "num_input_tokens_seen": 172222796, "step": 2747 }, { "epoch": 9.144758735440933, "grad_norm": 65.04967498779297, "learning_rate": 5e-06, "loss": 0.5045, "num_input_tokens_seen": 172285204, "step": 2748 }, { "epoch": 9.144758735440933, "loss": 0.4827478528022766, "loss_ce": 0.0006921901949681342, "loss_iou": 0.1767578125, "loss_num": 0.025634765625, "loss_xval": 0.482421875, "num_input_tokens_seen": 172285204, "step": 2748 }, { "epoch": 9.148086522462563, "grad_norm": 8.8961763381958, "learning_rate": 5e-06, "loss": 0.4652, "num_input_tokens_seen": 172348980, "step": 2749 }, { "epoch": 9.148086522462563, "loss": 0.652510941028595, "loss_ce": 0.00028924556681886315, "loss_iou": 0.248046875, "loss_num": 0.031005859375, "loss_xval": 0.65234375, "num_input_tokens_seen": 172348980, "step": 2749 }, { "epoch": 9.151414309484194, "grad_norm": 11.915822982788086, "learning_rate": 5e-06, "loss": 0.538, "num_input_tokens_seen": 172413008, "step": 2750 }, { "epoch": 9.151414309484194, "eval_seeclick_CIoU": 0.06947783194482327, "eval_seeclick_GIoU": 0.07308981381356716, "eval_seeclick_IoU": 0.17966796457767487, "eval_seeclick_MAE_all": 0.16444826126098633, "eval_seeclick_MAE_h": 0.05615009553730488, "eval_seeclick_MAE_w": 0.12581640109419823, "eval_seeclick_MAE_x_boxes": 0.20301883667707443, "eval_seeclick_MAE_y_boxes": 0.1761958822607994, "eval_seeclick_NUM_probability": 0.9999235570430756, "eval_seeclick_inside_bbox": 0.20937500149011612, "eval_seeclick_loss": 2.8503429889678955, "eval_seeclick_loss_ce": 0.15844284743070602, "eval_seeclick_loss_iou": 0.942138671875, "eval_seeclick_loss_num": 0.16451263427734375, "eval_seeclick_loss_xval": 2.705078125, "eval_seeclick_runtime": 65.6753, "eval_seeclick_samples_per_second": 0.716, "eval_seeclick_steps_per_second": 0.03, "num_input_tokens_seen": 172413008, "step": 2750 }, { "epoch": 9.151414309484194, "eval_icons_CIoU": -0.023895956110209227, "eval_icons_GIoU": 0.07082461938261986, "eval_icons_IoU": 0.14338011294603348, "eval_icons_MAE_all": 0.17763781547546387, "eval_icons_MAE_h": 0.13453229516744614, "eval_icons_MAE_w": 0.18049750477075577, "eval_icons_MAE_x_boxes": 0.13495558127760887, "eval_icons_MAE_y_boxes": 0.09269100055098534, "eval_icons_NUM_probability": 0.9999789297580719, "eval_icons_inside_bbox": 0.2916666716337204, "eval_icons_loss": 2.7318778038024902, "eval_icons_loss_ce": 3.102192181358987e-06, "eval_icons_loss_iou": 0.939697265625, "eval_icons_loss_num": 0.1769866943359375, "eval_icons_loss_xval": 2.76416015625, "eval_icons_runtime": 73.4338, "eval_icons_samples_per_second": 0.681, "eval_icons_steps_per_second": 0.027, "num_input_tokens_seen": 172413008, "step": 2750 }, { "epoch": 9.151414309484194, "eval_screenspot_CIoU": 0.1709420457482338, "eval_screenspot_GIoU": 0.20991336305936178, "eval_screenspot_IoU": 0.28911465406417847, "eval_screenspot_MAE_all": 0.12488498042027156, "eval_screenspot_MAE_h": 0.0680040717124939, "eval_screenspot_MAE_w": 0.10874731590350468, "eval_screenspot_MAE_x_boxes": 0.17782202859719595, "eval_screenspot_MAE_y_boxes": 0.08481001233061154, "eval_screenspot_NUM_probability": 0.9999839266141256, "eval_screenspot_inside_bbox": 0.49458332856496173, "eval_screenspot_loss": 2.2436914443969727, "eval_screenspot_loss_ce": 1.1287289908068487e-05, "eval_screenspot_loss_iou": 0.8058268229166666, "eval_screenspot_loss_num": 0.13721466064453125, "eval_screenspot_loss_xval": 2.2975260416666665, "eval_screenspot_runtime": 124.7942, "eval_screenspot_samples_per_second": 0.713, "eval_screenspot_steps_per_second": 0.024, "num_input_tokens_seen": 172413008, "step": 2750 }, { "epoch": 9.151414309484194, "eval_compot_CIoU": 0.030097968876361847, "eval_compot_GIoU": 0.0925954096019268, "eval_compot_IoU": 0.18918800354003906, "eval_compot_MAE_all": 0.1833871304988861, "eval_compot_MAE_h": 0.08791621681302786, "eval_compot_MAE_w": 0.18676985800266266, "eval_compot_MAE_x_boxes": 0.1735633909702301, "eval_compot_MAE_y_boxes": 0.1339048519730568, "eval_compot_NUM_probability": 0.9999822080135345, "eval_compot_inside_bbox": 0.3229166716337204, "eval_compot_loss": 2.7316160202026367, "eval_compot_loss_ce": 0.001829457498388365, "eval_compot_loss_iou": 0.916015625, "eval_compot_loss_num": 0.19844818115234375, "eval_compot_loss_xval": 2.8251953125, "eval_compot_runtime": 71.6233, "eval_compot_samples_per_second": 0.698, "eval_compot_steps_per_second": 0.028, "num_input_tokens_seen": 172413008, "step": 2750 }, { "epoch": 9.151414309484194, "eval_custom_ui_MAE_all": 0.06560781225562096, "eval_custom_ui_MAE_x": 0.07491783797740936, "eval_custom_ui_MAE_y": 0.05629779398441315, "eval_custom_ui_NUM_probability": 0.9999980330467224, "eval_custom_ui_loss": 0.30281370878219604, "eval_custom_ui_loss_ce": 1.1578626981645357e-06, "eval_custom_ui_loss_num": 0.0614166259765625, "eval_custom_ui_loss_xval": 0.30712890625, "eval_custom_ui_runtime": 58.6318, "eval_custom_ui_samples_per_second": 0.853, "eval_custom_ui_steps_per_second": 0.034, "num_input_tokens_seen": 172413008, "step": 2750 }, { "epoch": 9.151414309484194, "loss": 0.3197028934955597, "loss_ce": 7.324169928324409e-07, "loss_iou": 0.0, "loss_num": 0.06396484375, "loss_xval": 0.3203125, "num_input_tokens_seen": 172413008, "step": 2750 }, { "epoch": 9.154742096505824, "grad_norm": 10.048532485961914, "learning_rate": 5e-06, "loss": 0.6047, "num_input_tokens_seen": 172475600, "step": 2751 }, { "epoch": 9.154742096505824, "loss": 0.5075211524963379, "loss_ce": 0.0003190401184838265, "loss_iou": 0.1689453125, "loss_num": 0.03369140625, "loss_xval": 0.5078125, "num_input_tokens_seen": 172475600, "step": 2751 }, { "epoch": 9.158069883527455, "grad_norm": 9.929963111877441, "learning_rate": 5e-06, "loss": 0.4248, "num_input_tokens_seen": 172538876, "step": 2752 }, { "epoch": 9.158069883527455, "loss": 0.510750412940979, "loss_ce": 0.00037446091300807893, "loss_iou": 0.1826171875, "loss_num": 0.0289306640625, "loss_xval": 0.51171875, "num_input_tokens_seen": 172538876, "step": 2752 }, { "epoch": 9.161397670549086, "grad_norm": 10.752462387084961, "learning_rate": 5e-06, "loss": 0.6541, "num_input_tokens_seen": 172600572, "step": 2753 }, { "epoch": 9.161397670549086, "loss": 0.566318154335022, "loss_ce": 0.0005413366016000509, "loss_iou": 0.203125, "loss_num": 0.031982421875, "loss_xval": 0.56640625, "num_input_tokens_seen": 172600572, "step": 2753 }, { "epoch": 9.164725457570716, "grad_norm": 10.673312187194824, "learning_rate": 5e-06, "loss": 0.52, "num_input_tokens_seen": 172662944, "step": 2754 }, { "epoch": 9.164725457570716, "loss": 0.409506618976593, "loss_ce": 8.278216409962624e-05, "loss_iou": 0.1328125, "loss_num": 0.028564453125, "loss_xval": 0.41015625, "num_input_tokens_seen": 172662944, "step": 2754 }, { "epoch": 9.168053244592347, "grad_norm": 46.93904495239258, "learning_rate": 5e-06, "loss": 0.3953, "num_input_tokens_seen": 172725668, "step": 2755 }, { "epoch": 9.168053244592347, "loss": 0.4391924738883972, "loss_ce": 7.501443906221539e-05, "loss_iou": 0.1728515625, "loss_num": 0.0184326171875, "loss_xval": 0.439453125, "num_input_tokens_seen": 172725668, "step": 2755 }, { "epoch": 9.171381031613977, "grad_norm": 9.46899127960205, "learning_rate": 5e-06, "loss": 0.4246, "num_input_tokens_seen": 172788584, "step": 2756 }, { "epoch": 9.171381031613977, "loss": 0.4191989600658417, "loss_ce": 9.471514204051346e-06, "loss_iou": 0.12890625, "loss_num": 0.031982421875, "loss_xval": 0.419921875, "num_input_tokens_seen": 172788584, "step": 2756 }, { "epoch": 9.174708818635608, "grad_norm": 16.74191665649414, "learning_rate": 5e-06, "loss": 0.5423, "num_input_tokens_seen": 172851220, "step": 2757 }, { "epoch": 9.174708818635608, "loss": 0.7331981062889099, "loss_ce": 4.3821703002322465e-05, "loss_iou": 0.248046875, "loss_num": 0.047607421875, "loss_xval": 0.734375, "num_input_tokens_seen": 172851220, "step": 2757 }, { "epoch": 9.178036605657239, "grad_norm": 19.963937759399414, "learning_rate": 5e-06, "loss": 0.4182, "num_input_tokens_seen": 172913664, "step": 2758 }, { "epoch": 9.178036605657239, "loss": 0.25976815819740295, "loss_ce": 2.528965978854103e-06, "loss_iou": 0.091796875, "loss_num": 0.01513671875, "loss_xval": 0.259765625, "num_input_tokens_seen": 172913664, "step": 2758 }, { "epoch": 9.18136439267887, "grad_norm": 26.211523056030273, "learning_rate": 5e-06, "loss": 0.531, "num_input_tokens_seen": 172976800, "step": 2759 }, { "epoch": 9.18136439267887, "loss": 0.2998438775539398, "loss_ce": 1.035507011692971e-06, "loss_iou": 0.09375, "loss_num": 0.0224609375, "loss_xval": 0.30078125, "num_input_tokens_seen": 172976800, "step": 2759 }, { "epoch": 9.1846921797005, "grad_norm": 58.50944137573242, "learning_rate": 5e-06, "loss": 0.4934, "num_input_tokens_seen": 173040792, "step": 2760 }, { "epoch": 9.1846921797005, "loss": 0.3176276683807373, "loss_ce": 6.853003924334189e-07, "loss_iou": 0.12158203125, "loss_num": 0.01483154296875, "loss_xval": 0.318359375, "num_input_tokens_seen": 173040792, "step": 2760 }, { "epoch": 9.18801996672213, "grad_norm": 38.231998443603516, "learning_rate": 5e-06, "loss": 0.4761, "num_input_tokens_seen": 173104616, "step": 2761 }, { "epoch": 9.18801996672213, "loss": 0.4772014617919922, "loss_ce": 0.0001506824919488281, "loss_iou": 0.2197265625, "loss_num": 0.007659912109375, "loss_xval": 0.4765625, "num_input_tokens_seen": 173104616, "step": 2761 }, { "epoch": 9.191347753743761, "grad_norm": 21.3988094329834, "learning_rate": 5e-06, "loss": 0.576, "num_input_tokens_seen": 173168084, "step": 2762 }, { "epoch": 9.191347753743761, "loss": 0.6923550367355347, "loss_ce": 0.0005215106648392975, "loss_iou": 0.2734375, "loss_num": 0.029052734375, "loss_xval": 0.69140625, "num_input_tokens_seen": 173168084, "step": 2762 }, { "epoch": 9.194675540765392, "grad_norm": 22.348876953125, "learning_rate": 5e-06, "loss": 0.4898, "num_input_tokens_seen": 173229844, "step": 2763 }, { "epoch": 9.194675540765392, "loss": 0.33520936965942383, "loss_ce": 4.317801540310029e-06, "loss_iou": 0.1396484375, "loss_num": 0.0113525390625, "loss_xval": 0.3359375, "num_input_tokens_seen": 173229844, "step": 2763 }, { "epoch": 9.198003327787022, "grad_norm": 13.418854713439941, "learning_rate": 5e-06, "loss": 0.6033, "num_input_tokens_seen": 173294000, "step": 2764 }, { "epoch": 9.198003327787022, "loss": 0.4378677308559418, "loss_ce": 1.5161267583607696e-06, "loss_iou": 0.189453125, "loss_num": 0.0118408203125, "loss_xval": 0.4375, "num_input_tokens_seen": 173294000, "step": 2764 }, { "epoch": 9.201331114808653, "grad_norm": 25.52296257019043, "learning_rate": 5e-06, "loss": 0.7482, "num_input_tokens_seen": 173358708, "step": 2765 }, { "epoch": 9.201331114808653, "loss": 0.6247121691703796, "loss_ce": 0.00023090995091479272, "loss_iou": 0.251953125, "loss_num": 0.0244140625, "loss_xval": 0.625, "num_input_tokens_seen": 173358708, "step": 2765 }, { "epoch": 9.204658901830284, "grad_norm": 38.03092956542969, "learning_rate": 5e-06, "loss": 0.3735, "num_input_tokens_seen": 173420372, "step": 2766 }, { "epoch": 9.204658901830284, "loss": 0.3793972134590149, "loss_ce": 2.6909960979537573e-06, "loss_iou": 0.1435546875, "loss_num": 0.018310546875, "loss_xval": 0.37890625, "num_input_tokens_seen": 173420372, "step": 2766 }, { "epoch": 9.207986688851914, "grad_norm": 39.38788986206055, "learning_rate": 5e-06, "loss": 0.4965, "num_input_tokens_seen": 173485044, "step": 2767 }, { "epoch": 9.207986688851914, "loss": 0.4639807939529419, "loss_ce": 0.0008460237295366824, "loss_iou": 0.2041015625, "loss_num": 0.0111083984375, "loss_xval": 0.462890625, "num_input_tokens_seen": 173485044, "step": 2767 }, { "epoch": 9.211314475873545, "grad_norm": 22.26861000061035, "learning_rate": 5e-06, "loss": 0.5243, "num_input_tokens_seen": 173547780, "step": 2768 }, { "epoch": 9.211314475873545, "loss": 0.3463151752948761, "loss_ce": 1.6854107798280893e-06, "loss_iou": 0.1376953125, "loss_num": 0.01434326171875, "loss_xval": 0.345703125, "num_input_tokens_seen": 173547780, "step": 2768 }, { "epoch": 9.214642262895175, "grad_norm": 14.41019344329834, "learning_rate": 5e-06, "loss": 0.5068, "num_input_tokens_seen": 173608740, "step": 2769 }, { "epoch": 9.214642262895175, "loss": 0.4049099385738373, "loss_ce": 2.704691269173054e-06, "loss_iou": 0.09521484375, "loss_num": 0.042724609375, "loss_xval": 0.404296875, "num_input_tokens_seen": 173608740, "step": 2769 }, { "epoch": 9.217970049916806, "grad_norm": 8.507943153381348, "learning_rate": 5e-06, "loss": 0.506, "num_input_tokens_seen": 173670616, "step": 2770 }, { "epoch": 9.217970049916806, "loss": 0.25177162885665894, "loss_ce": 1.6148906070156954e-06, "loss_iou": 0.06591796875, "loss_num": 0.02392578125, "loss_xval": 0.251953125, "num_input_tokens_seen": 173670616, "step": 2770 }, { "epoch": 9.221297836938437, "grad_norm": 6.104926586151123, "learning_rate": 5e-06, "loss": 0.5259, "num_input_tokens_seen": 173731484, "step": 2771 }, { "epoch": 9.221297836938437, "loss": 0.7128210067749023, "loss_ce": 6.6632255766307935e-06, "loss_iou": 0.298828125, "loss_num": 0.0230712890625, "loss_xval": 0.7109375, "num_input_tokens_seen": 173731484, "step": 2771 }, { "epoch": 9.224625623960067, "grad_norm": 7.453578948974609, "learning_rate": 5e-06, "loss": 0.5535, "num_input_tokens_seen": 173795048, "step": 2772 }, { "epoch": 9.224625623960067, "loss": 0.6473469734191895, "loss_ce": 8.074549441516865e-06, "loss_iou": 0.2216796875, "loss_num": 0.040771484375, "loss_xval": 0.6484375, "num_input_tokens_seen": 173795048, "step": 2772 }, { "epoch": 9.227953410981698, "grad_norm": 12.145768165588379, "learning_rate": 5e-06, "loss": 0.472, "num_input_tokens_seen": 173857412, "step": 2773 }, { "epoch": 9.227953410981698, "loss": 0.43049514293670654, "loss_ce": 1.416546820109943e-05, "loss_iou": 0.15234375, "loss_num": 0.0250244140625, "loss_xval": 0.4296875, "num_input_tokens_seen": 173857412, "step": 2773 }, { "epoch": 9.231281198003328, "grad_norm": 13.712037086486816, "learning_rate": 5e-06, "loss": 0.5034, "num_input_tokens_seen": 173920296, "step": 2774 }, { "epoch": 9.231281198003328, "loss": 0.4371587038040161, "loss_ce": 2.492428757250309e-05, "loss_iou": 0.15625, "loss_num": 0.0250244140625, "loss_xval": 0.4375, "num_input_tokens_seen": 173920296, "step": 2774 }, { "epoch": 9.234608985024959, "grad_norm": 10.31271743774414, "learning_rate": 5e-06, "loss": 0.5305, "num_input_tokens_seen": 173982140, "step": 2775 }, { "epoch": 9.234608985024959, "loss": 0.705445408821106, "loss_ce": 1.0682998663469334e-06, "loss_iou": 0.294921875, "loss_num": 0.0234375, "loss_xval": 0.70703125, "num_input_tokens_seen": 173982140, "step": 2775 }, { "epoch": 9.23793677204659, "grad_norm": 16.602554321289062, "learning_rate": 5e-06, "loss": 0.5221, "num_input_tokens_seen": 174046008, "step": 2776 }, { "epoch": 9.23793677204659, "loss": 0.49756234884262085, "loss_ce": 3.7706142848037416e-06, "loss_iou": 0.1953125, "loss_num": 0.0213623046875, "loss_xval": 0.498046875, "num_input_tokens_seen": 174046008, "step": 2776 }, { "epoch": 9.24126455906822, "grad_norm": 14.97239875793457, "learning_rate": 5e-06, "loss": 0.5523, "num_input_tokens_seen": 174109072, "step": 2777 }, { "epoch": 9.24126455906822, "loss": 0.39726150035858154, "loss_ce": 0.00010573832696536556, "loss_iou": 0.11669921875, "loss_num": 0.03271484375, "loss_xval": 0.396484375, "num_input_tokens_seen": 174109072, "step": 2777 }, { "epoch": 9.244592346089851, "grad_norm": 10.991167068481445, "learning_rate": 5e-06, "loss": 0.3605, "num_input_tokens_seen": 174171724, "step": 2778 }, { "epoch": 9.244592346089851, "loss": 0.42676156759262085, "loss_ce": 3.7799536585225724e-06, "loss_iou": 0.158203125, "loss_num": 0.02197265625, "loss_xval": 0.42578125, "num_input_tokens_seen": 174171724, "step": 2778 }, { "epoch": 9.247920133111482, "grad_norm": 8.223103523254395, "learning_rate": 5e-06, "loss": 0.5997, "num_input_tokens_seen": 174233804, "step": 2779 }, { "epoch": 9.247920133111482, "loss": 0.6258002519607544, "loss_ce": 6.770993877580622e-06, "loss_iou": 0.2421875, "loss_num": 0.0281982421875, "loss_xval": 0.625, "num_input_tokens_seen": 174233804, "step": 2779 }, { "epoch": 9.251247920133112, "grad_norm": 10.543940544128418, "learning_rate": 5e-06, "loss": 0.5913, "num_input_tokens_seen": 174297804, "step": 2780 }, { "epoch": 9.251247920133112, "loss": 0.5330869555473328, "loss_ce": 5.9048343246104196e-06, "loss_iou": 0.2001953125, "loss_num": 0.0263671875, "loss_xval": 0.53125, "num_input_tokens_seen": 174297804, "step": 2780 }, { "epoch": 9.254575707154743, "grad_norm": 8.248653411865234, "learning_rate": 5e-06, "loss": 0.5982, "num_input_tokens_seen": 174361812, "step": 2781 }, { "epoch": 9.254575707154743, "loss": 0.5761756896972656, "loss_ce": 3.7968316064507235e-06, "loss_iou": 0.2197265625, "loss_num": 0.0272216796875, "loss_xval": 0.578125, "num_input_tokens_seen": 174361812, "step": 2781 }, { "epoch": 9.257903494176373, "grad_norm": 18.96731185913086, "learning_rate": 5e-06, "loss": 0.3488, "num_input_tokens_seen": 174424620, "step": 2782 }, { "epoch": 9.257903494176373, "loss": 0.29916518926620483, "loss_ce": 1.3715236946154619e-06, "loss_iou": 0.0927734375, "loss_num": 0.022705078125, "loss_xval": 0.298828125, "num_input_tokens_seen": 174424620, "step": 2782 }, { "epoch": 9.261231281198004, "grad_norm": 8.779787063598633, "learning_rate": 5e-06, "loss": 0.463, "num_input_tokens_seen": 174487456, "step": 2783 }, { "epoch": 9.261231281198004, "loss": 0.40821564197540283, "loss_ce": 1.252237689186586e-05, "loss_iou": 0.146484375, "loss_num": 0.0230712890625, "loss_xval": 0.408203125, "num_input_tokens_seen": 174487456, "step": 2783 }, { "epoch": 9.264559068219635, "grad_norm": 10.011458396911621, "learning_rate": 5e-06, "loss": 0.5854, "num_input_tokens_seen": 174550140, "step": 2784 }, { "epoch": 9.264559068219635, "loss": 0.5038581490516663, "loss_ce": 1.2918044376419857e-05, "loss_iou": 0.171875, "loss_num": 0.031982421875, "loss_xval": 0.50390625, "num_input_tokens_seen": 174550140, "step": 2784 }, { "epoch": 9.267886855241265, "grad_norm": 13.366437911987305, "learning_rate": 5e-06, "loss": 0.5405, "num_input_tokens_seen": 174612024, "step": 2785 }, { "epoch": 9.267886855241265, "loss": 0.42850345373153687, "loss_ce": 0.00015874754171818495, "loss_iou": 0.1494140625, "loss_num": 0.0257568359375, "loss_xval": 0.427734375, "num_input_tokens_seen": 174612024, "step": 2785 }, { "epoch": 9.271214642262896, "grad_norm": 12.592924118041992, "learning_rate": 5e-06, "loss": 0.7583, "num_input_tokens_seen": 174675744, "step": 2786 }, { "epoch": 9.271214642262896, "loss": 0.6576433777809143, "loss_ce": 0.0001726491464069113, "loss_iou": 0.2578125, "loss_num": 0.0284423828125, "loss_xval": 0.65625, "num_input_tokens_seen": 174675744, "step": 2786 }, { "epoch": 9.274542429284526, "grad_norm": 11.616028785705566, "learning_rate": 5e-06, "loss": 0.8347, "num_input_tokens_seen": 174738332, "step": 2787 }, { "epoch": 9.274542429284526, "loss": 1.1231741905212402, "loss_ce": 5.285221050144173e-06, "loss_iou": 0.37890625, "loss_num": 0.0732421875, "loss_xval": 1.125, "num_input_tokens_seen": 174738332, "step": 2787 }, { "epoch": 9.277870216306157, "grad_norm": 10.812943458557129, "learning_rate": 5e-06, "loss": 0.7801, "num_input_tokens_seen": 174802848, "step": 2788 }, { "epoch": 9.277870216306157, "loss": 0.7474313974380493, "loss_ce": 0.00231424393132329, "loss_iou": 0.2734375, "loss_num": 0.0400390625, "loss_xval": 0.74609375, "num_input_tokens_seen": 174802848, "step": 2788 }, { "epoch": 9.281198003327788, "grad_norm": 9.942484855651855, "learning_rate": 5e-06, "loss": 0.7004, "num_input_tokens_seen": 174865212, "step": 2789 }, { "epoch": 9.281198003327788, "loss": 0.6783484220504761, "loss_ce": 3.7371573853306472e-06, "loss_iou": 0.267578125, "loss_num": 0.02880859375, "loss_xval": 0.6796875, "num_input_tokens_seen": 174865212, "step": 2789 }, { "epoch": 9.284525790349418, "grad_norm": 7.543990612030029, "learning_rate": 5e-06, "loss": 0.5031, "num_input_tokens_seen": 174926992, "step": 2790 }, { "epoch": 9.284525790349418, "loss": 0.6254899501800537, "loss_ce": 1.6843619050632697e-06, "loss_iou": 0.2197265625, "loss_num": 0.03759765625, "loss_xval": 0.625, "num_input_tokens_seen": 174926992, "step": 2790 }, { "epoch": 9.287853577371049, "grad_norm": 17.07059097290039, "learning_rate": 5e-06, "loss": 0.5121, "num_input_tokens_seen": 174988932, "step": 2791 }, { "epoch": 9.287853577371049, "loss": 0.5114823579788208, "loss_ce": 7.750173608656041e-06, "loss_iou": 0.1962890625, "loss_num": 0.0240478515625, "loss_xval": 0.51171875, "num_input_tokens_seen": 174988932, "step": 2791 }, { "epoch": 9.29118136439268, "grad_norm": 17.189598083496094, "learning_rate": 5e-06, "loss": 0.5448, "num_input_tokens_seen": 175053512, "step": 2792 }, { "epoch": 9.29118136439268, "loss": 0.46077588200569153, "loss_ce": 3.6774814361706376e-05, "loss_iou": 0.1923828125, "loss_num": 0.0152587890625, "loss_xval": 0.4609375, "num_input_tokens_seen": 175053512, "step": 2792 }, { "epoch": 9.29450915141431, "grad_norm": 15.60692310333252, "learning_rate": 5e-06, "loss": 0.6067, "num_input_tokens_seen": 175116692, "step": 2793 }, { "epoch": 9.29450915141431, "loss": 0.834723949432373, "loss_ce": 7.1391132223652676e-06, "loss_iou": 0.291015625, "loss_num": 0.05029296875, "loss_xval": 0.8359375, "num_input_tokens_seen": 175116692, "step": 2793 }, { "epoch": 9.29783693843594, "grad_norm": 8.088654518127441, "learning_rate": 5e-06, "loss": 0.6013, "num_input_tokens_seen": 175179644, "step": 2794 }, { "epoch": 9.29783693843594, "loss": 0.7104343175888062, "loss_ce": 0.00010718373232521117, "loss_iou": 0.275390625, "loss_num": 0.0322265625, "loss_xval": 0.7109375, "num_input_tokens_seen": 175179644, "step": 2794 }, { "epoch": 9.301164725457571, "grad_norm": 26.151933670043945, "learning_rate": 5e-06, "loss": 0.4868, "num_input_tokens_seen": 175243384, "step": 2795 }, { "epoch": 9.301164725457571, "loss": 0.3956838548183441, "loss_ce": 0.00017606203618925065, "loss_iou": 0.16015625, "loss_num": 0.01519775390625, "loss_xval": 0.39453125, "num_input_tokens_seen": 175243384, "step": 2795 }, { "epoch": 9.304492512479202, "grad_norm": 25.242366790771484, "learning_rate": 5e-06, "loss": 0.5228, "num_input_tokens_seen": 175306924, "step": 2796 }, { "epoch": 9.304492512479202, "loss": 0.49152469635009766, "loss_ce": 0.00037478923331946135, "loss_iou": 0.162109375, "loss_num": 0.033447265625, "loss_xval": 0.490234375, "num_input_tokens_seen": 175306924, "step": 2796 }, { "epoch": 9.307820299500833, "grad_norm": 17.728410720825195, "learning_rate": 5e-06, "loss": 0.5733, "num_input_tokens_seen": 175369964, "step": 2797 }, { "epoch": 9.307820299500833, "loss": 0.61460280418396, "loss_ce": 1.698776031844318e-06, "loss_iou": 0.224609375, "loss_num": 0.033447265625, "loss_xval": 0.61328125, "num_input_tokens_seen": 175369964, "step": 2797 }, { "epoch": 9.311148086522463, "grad_norm": 21.072357177734375, "learning_rate": 5e-06, "loss": 0.5018, "num_input_tokens_seen": 175432952, "step": 2798 }, { "epoch": 9.311148086522463, "loss": 0.34997648000717163, "loss_ce": 8.984144415080664e-07, "loss_iou": 0.1328125, "loss_num": 0.016845703125, "loss_xval": 0.349609375, "num_input_tokens_seen": 175432952, "step": 2798 }, { "epoch": 9.314475873544094, "grad_norm": 23.05731964111328, "learning_rate": 5e-06, "loss": 0.6237, "num_input_tokens_seen": 175495260, "step": 2799 }, { "epoch": 9.314475873544094, "loss": 0.6182025074958801, "loss_ce": 3.840016870526597e-05, "loss_iou": 0.2412109375, "loss_num": 0.027099609375, "loss_xval": 0.6171875, "num_input_tokens_seen": 175495260, "step": 2799 }, { "epoch": 9.317803660565724, "grad_norm": 20.148698806762695, "learning_rate": 5e-06, "loss": 0.5762, "num_input_tokens_seen": 175557168, "step": 2800 }, { "epoch": 9.317803660565724, "loss": 0.4740031659603119, "loss_ce": 4.132334197493037e-06, "loss_iou": 0.138671875, "loss_num": 0.039306640625, "loss_xval": 0.474609375, "num_input_tokens_seen": 175557168, "step": 2800 }, { "epoch": 9.321131447587355, "grad_norm": 16.143217086791992, "learning_rate": 5e-06, "loss": 0.5625, "num_input_tokens_seen": 175620516, "step": 2801 }, { "epoch": 9.321131447587355, "loss": 0.675630509853363, "loss_ce": 9.341766417492181e-05, "loss_iou": 0.2578125, "loss_num": 0.031982421875, "loss_xval": 0.67578125, "num_input_tokens_seen": 175620516, "step": 2801 }, { "epoch": 9.324459234608986, "grad_norm": 10.721323013305664, "learning_rate": 5e-06, "loss": 0.5053, "num_input_tokens_seen": 175683968, "step": 2802 }, { "epoch": 9.324459234608986, "loss": 0.5024441480636597, "loss_ce": 2.759701828836114e-06, "loss_iou": 0.1865234375, "loss_num": 0.0260009765625, "loss_xval": 0.50390625, "num_input_tokens_seen": 175683968, "step": 2802 }, { "epoch": 9.327787021630616, "grad_norm": 21.367809295654297, "learning_rate": 5e-06, "loss": 0.5044, "num_input_tokens_seen": 175745980, "step": 2803 }, { "epoch": 9.327787021630616, "loss": 0.6001753211021423, "loss_ce": 0.00044388434616848826, "loss_iou": 0.2138671875, "loss_num": 0.034423828125, "loss_xval": 0.6015625, "num_input_tokens_seen": 175745980, "step": 2803 }, { "epoch": 9.331114808652247, "grad_norm": 29.714889526367188, "learning_rate": 5e-06, "loss": 0.6784, "num_input_tokens_seen": 175809404, "step": 2804 }, { "epoch": 9.331114808652247, "loss": 0.7390154004096985, "loss_ce": 6.277194188442081e-05, "loss_iou": 0.263671875, "loss_num": 0.042236328125, "loss_xval": 0.73828125, "num_input_tokens_seen": 175809404, "step": 2804 }, { "epoch": 9.334442595673877, "grad_norm": 11.817981719970703, "learning_rate": 5e-06, "loss": 0.556, "num_input_tokens_seen": 175872648, "step": 2805 }, { "epoch": 9.334442595673877, "loss": 0.437641441822052, "loss_ce": 0.0003855900140479207, "loss_iou": 0.142578125, "loss_num": 0.0303955078125, "loss_xval": 0.4375, "num_input_tokens_seen": 175872648, "step": 2805 }, { "epoch": 9.337770382695508, "grad_norm": 17.187898635864258, "learning_rate": 5e-06, "loss": 0.5783, "num_input_tokens_seen": 175933996, "step": 2806 }, { "epoch": 9.337770382695508, "loss": 0.7237929105758667, "loss_ce": 3.7953141145408154e-05, "loss_iou": 0.263671875, "loss_num": 0.039306640625, "loss_xval": 0.72265625, "num_input_tokens_seen": 175933996, "step": 2806 }, { "epoch": 9.341098169717139, "grad_norm": 40.25809860229492, "learning_rate": 5e-06, "loss": 0.6483, "num_input_tokens_seen": 175996284, "step": 2807 }, { "epoch": 9.341098169717139, "loss": 0.9205341935157776, "loss_ce": 1.94854692381341e-06, "loss_iou": 0.345703125, "loss_num": 0.045654296875, "loss_xval": 0.921875, "num_input_tokens_seen": 175996284, "step": 2807 }, { "epoch": 9.34442595673877, "grad_norm": 23.069408416748047, "learning_rate": 5e-06, "loss": 0.7172, "num_input_tokens_seen": 176060056, "step": 2808 }, { "epoch": 9.34442595673877, "loss": 0.6950420141220093, "loss_ce": 3.4653727198019624e-05, "loss_iou": 0.25390625, "loss_num": 0.037841796875, "loss_xval": 0.6953125, "num_input_tokens_seen": 176060056, "step": 2808 }, { "epoch": 9.3477537437604, "grad_norm": 7.635735988616943, "learning_rate": 5e-06, "loss": 0.5942, "num_input_tokens_seen": 176122068, "step": 2809 }, { "epoch": 9.3477537437604, "loss": 0.3765885531902313, "loss_ce": 1.6480935300933197e-06, "loss_iou": 0.10546875, "loss_num": 0.033203125, "loss_xval": 0.376953125, "num_input_tokens_seen": 176122068, "step": 2809 }, { "epoch": 9.35108153078203, "grad_norm": 9.234174728393555, "learning_rate": 5e-06, "loss": 0.458, "num_input_tokens_seen": 176184328, "step": 2810 }, { "epoch": 9.35108153078203, "loss": 0.5166089534759521, "loss_ce": 7.344293408095837e-06, "loss_iou": 0.216796875, "loss_num": 0.016845703125, "loss_xval": 0.515625, "num_input_tokens_seen": 176184328, "step": 2810 }, { "epoch": 9.354409317803661, "grad_norm": 8.30647087097168, "learning_rate": 5e-06, "loss": 0.5591, "num_input_tokens_seen": 176247504, "step": 2811 }, { "epoch": 9.354409317803661, "loss": 0.5996133089065552, "loss_ce": 3.879905307258014e-06, "loss_iou": 0.2060546875, "loss_num": 0.037353515625, "loss_xval": 0.6015625, "num_input_tokens_seen": 176247504, "step": 2811 }, { "epoch": 9.357737104825292, "grad_norm": 21.501352310180664, "learning_rate": 5e-06, "loss": 0.5835, "num_input_tokens_seen": 176310244, "step": 2812 }, { "epoch": 9.357737104825292, "loss": 0.7054111957550049, "loss_ce": 0.00021104140614625067, "loss_iou": 0.28125, "loss_num": 0.028564453125, "loss_xval": 0.70703125, "num_input_tokens_seen": 176310244, "step": 2812 }, { "epoch": 9.361064891846922, "grad_norm": 30.854997634887695, "learning_rate": 5e-06, "loss": 0.4154, "num_input_tokens_seen": 176372252, "step": 2813 }, { "epoch": 9.361064891846922, "loss": 0.44800567626953125, "loss_ce": 7.633727364009246e-06, "loss_iou": 0.1494140625, "loss_num": 0.030029296875, "loss_xval": 0.447265625, "num_input_tokens_seen": 176372252, "step": 2813 }, { "epoch": 9.364392678868553, "grad_norm": 22.772724151611328, "learning_rate": 5e-06, "loss": 0.5675, "num_input_tokens_seen": 176434952, "step": 2814 }, { "epoch": 9.364392678868553, "loss": 0.848635196685791, "loss_ce": 2.4447733721899567e-06, "loss_iou": 0.333984375, "loss_num": 0.0361328125, "loss_xval": 0.84765625, "num_input_tokens_seen": 176434952, "step": 2814 }, { "epoch": 9.367720465890184, "grad_norm": 14.045674324035645, "learning_rate": 5e-06, "loss": 0.6294, "num_input_tokens_seen": 176499376, "step": 2815 }, { "epoch": 9.367720465890184, "loss": 0.7235807180404663, "loss_ce": 0.0005582500016316772, "loss_iou": 0.283203125, "loss_num": 0.0308837890625, "loss_xval": 0.72265625, "num_input_tokens_seen": 176499376, "step": 2815 }, { "epoch": 9.371048252911814, "grad_norm": 25.543506622314453, "learning_rate": 5e-06, "loss": 0.5517, "num_input_tokens_seen": 176561408, "step": 2816 }, { "epoch": 9.371048252911814, "loss": 0.512883186340332, "loss_ce": 0.0004320646112319082, "loss_iou": 0.1962890625, "loss_num": 0.0238037109375, "loss_xval": 0.51171875, "num_input_tokens_seen": 176561408, "step": 2816 }, { "epoch": 9.374376039933445, "grad_norm": 25.772268295288086, "learning_rate": 5e-06, "loss": 0.4737, "num_input_tokens_seen": 176624000, "step": 2817 }, { "epoch": 9.374376039933445, "loss": 0.4772736132144928, "loss_ce": 0.000650081958156079, "loss_iou": 0.162109375, "loss_num": 0.03076171875, "loss_xval": 0.4765625, "num_input_tokens_seen": 176624000, "step": 2817 }, { "epoch": 9.377703826955075, "grad_norm": 7.208528518676758, "learning_rate": 5e-06, "loss": 0.3208, "num_input_tokens_seen": 176686520, "step": 2818 }, { "epoch": 9.377703826955075, "loss": 0.38058602809906006, "loss_ce": 1.2911316389363492e-06, "loss_iou": 0.142578125, "loss_num": 0.0191650390625, "loss_xval": 0.380859375, "num_input_tokens_seen": 176686520, "step": 2818 }, { "epoch": 9.381031613976706, "grad_norm": 21.487192153930664, "learning_rate": 5e-06, "loss": 0.4346, "num_input_tokens_seen": 176748024, "step": 2819 }, { "epoch": 9.381031613976706, "loss": 0.4281339645385742, "loss_ce": 2.8750453111570096e-06, "loss_iou": 0.11474609375, "loss_num": 0.03955078125, "loss_xval": 0.427734375, "num_input_tokens_seen": 176748024, "step": 2819 }, { "epoch": 9.384359400998337, "grad_norm": 17.75954246520996, "learning_rate": 5e-06, "loss": 0.6609, "num_input_tokens_seen": 176811896, "step": 2820 }, { "epoch": 9.384359400998337, "loss": 0.7659556269645691, "loss_ce": 0.0012461732840165496, "loss_iou": 0.271484375, "loss_num": 0.04443359375, "loss_xval": 0.765625, "num_input_tokens_seen": 176811896, "step": 2820 }, { "epoch": 9.387687188019967, "grad_norm": 11.192198753356934, "learning_rate": 5e-06, "loss": 0.3933, "num_input_tokens_seen": 176874660, "step": 2821 }, { "epoch": 9.387687188019967, "loss": 0.4472675025463104, "loss_ce": 1.8862117485696217e-06, "loss_iou": 0.13671875, "loss_num": 0.03466796875, "loss_xval": 0.447265625, "num_input_tokens_seen": 176874660, "step": 2821 }, { "epoch": 9.391014975041598, "grad_norm": 14.579025268554688, "learning_rate": 5e-06, "loss": 0.6258, "num_input_tokens_seen": 176937604, "step": 2822 }, { "epoch": 9.391014975041598, "loss": 0.6241573095321655, "loss_ce": 0.0007288857595995069, "loss_iou": 0.1875, "loss_num": 0.0498046875, "loss_xval": 0.625, "num_input_tokens_seen": 176937604, "step": 2822 }, { "epoch": 9.394342762063228, "grad_norm": 12.839289665222168, "learning_rate": 5e-06, "loss": 0.6168, "num_input_tokens_seen": 177000932, "step": 2823 }, { "epoch": 9.394342762063228, "loss": 0.5619252920150757, "loss_ce": 5.150438028067583e-06, "loss_iou": 0.1904296875, "loss_num": 0.036376953125, "loss_xval": 0.5625, "num_input_tokens_seen": 177000932, "step": 2823 }, { "epoch": 9.397670549084859, "grad_norm": 9.213924407958984, "learning_rate": 5e-06, "loss": 0.4719, "num_input_tokens_seen": 177063808, "step": 2824 }, { "epoch": 9.397670549084859, "loss": 0.5537137389183044, "loss_ce": 2.792497753034695e-06, "loss_iou": 0.189453125, "loss_num": 0.034912109375, "loss_xval": 0.5546875, "num_input_tokens_seen": 177063808, "step": 2824 }, { "epoch": 9.40099833610649, "grad_norm": 7.611035346984863, "learning_rate": 5e-06, "loss": 0.4859, "num_input_tokens_seen": 177126396, "step": 2825 }, { "epoch": 9.40099833610649, "loss": 0.5038723945617676, "loss_ce": 0.0006985652726143599, "loss_iou": 0.146484375, "loss_num": 0.042236328125, "loss_xval": 0.50390625, "num_input_tokens_seen": 177126396, "step": 2825 }, { "epoch": 9.40432612312812, "grad_norm": 9.468575477600098, "learning_rate": 5e-06, "loss": 0.5623, "num_input_tokens_seen": 177189056, "step": 2826 }, { "epoch": 9.40432612312812, "loss": 0.6276854872703552, "loss_ce": 0.00024407866294495761, "loss_iou": 0.2119140625, "loss_num": 0.041015625, "loss_xval": 0.62890625, "num_input_tokens_seen": 177189056, "step": 2826 }, { "epoch": 9.407653910149751, "grad_norm": 19.075319290161133, "learning_rate": 5e-06, "loss": 0.5034, "num_input_tokens_seen": 177251428, "step": 2827 }, { "epoch": 9.407653910149751, "loss": 0.4444577693939209, "loss_ce": 6.076990393921733e-05, "loss_iou": 0.177734375, "loss_num": 0.017822265625, "loss_xval": 0.4453125, "num_input_tokens_seen": 177251428, "step": 2827 }, { "epoch": 9.410981697171382, "grad_norm": 11.264944076538086, "learning_rate": 5e-06, "loss": 0.4954, "num_input_tokens_seen": 177312672, "step": 2828 }, { "epoch": 9.410981697171382, "loss": 0.5504429936408997, "loss_ce": 0.0005925772711634636, "loss_iou": 0.18359375, "loss_num": 0.036376953125, "loss_xval": 0.55078125, "num_input_tokens_seen": 177312672, "step": 2828 }, { "epoch": 9.414309484193012, "grad_norm": 8.11817455291748, "learning_rate": 5e-06, "loss": 0.6285, "num_input_tokens_seen": 177377204, "step": 2829 }, { "epoch": 9.414309484193012, "loss": 0.4997619092464447, "loss_ce": 6.708334694849327e-05, "loss_iou": 0.1357421875, "loss_num": 0.04541015625, "loss_xval": 0.5, "num_input_tokens_seen": 177377204, "step": 2829 }, { "epoch": 9.417637271214643, "grad_norm": 18.523183822631836, "learning_rate": 5e-06, "loss": 0.6501, "num_input_tokens_seen": 177439244, "step": 2830 }, { "epoch": 9.417637271214643, "loss": 0.5413879156112671, "loss_ce": 6.078982551116496e-06, "loss_iou": 0.2109375, "loss_num": 0.02392578125, "loss_xval": 0.54296875, "num_input_tokens_seen": 177439244, "step": 2830 }, { "epoch": 9.420965058236273, "grad_norm": 48.59062576293945, "learning_rate": 5e-06, "loss": 0.7548, "num_input_tokens_seen": 177503124, "step": 2831 }, { "epoch": 9.420965058236273, "loss": 0.8572800159454346, "loss_ce": 0.0005905752768740058, "loss_iou": 0.328125, "loss_num": 0.0400390625, "loss_xval": 0.85546875, "num_input_tokens_seen": 177503124, "step": 2831 }, { "epoch": 9.424292845257904, "grad_norm": 51.041690826416016, "learning_rate": 5e-06, "loss": 0.9129, "num_input_tokens_seen": 177565984, "step": 2832 }, { "epoch": 9.424292845257904, "loss": 0.6420590877532959, "loss_ce": 3.032462154806126e-05, "loss_iou": 0.251953125, "loss_num": 0.0274658203125, "loss_xval": 0.640625, "num_input_tokens_seen": 177565984, "step": 2832 }, { "epoch": 9.427620632279535, "grad_norm": 15.153587341308594, "learning_rate": 5e-06, "loss": 0.4854, "num_input_tokens_seen": 177625764, "step": 2833 }, { "epoch": 9.427620632279535, "loss": 0.443972110748291, "loss_ce": 2.3767706807120703e-06, "loss_iou": 0.1357421875, "loss_num": 0.034423828125, "loss_xval": 0.443359375, "num_input_tokens_seen": 177625764, "step": 2833 }, { "epoch": 9.430948419301165, "grad_norm": 11.895259857177734, "learning_rate": 5e-06, "loss": 0.6362, "num_input_tokens_seen": 177688336, "step": 2834 }, { "epoch": 9.430948419301165, "loss": 0.7607632875442505, "loss_ce": 2.1101297534187324e-05, "loss_iou": 0.2890625, "loss_num": 0.036376953125, "loss_xval": 0.76171875, "num_input_tokens_seen": 177688336, "step": 2834 }, { "epoch": 9.434276206322796, "grad_norm": 15.875905990600586, "learning_rate": 5e-06, "loss": 0.593, "num_input_tokens_seen": 177751400, "step": 2835 }, { "epoch": 9.434276206322796, "loss": 0.6878855228424072, "loss_ce": 1.9327684640302323e-05, "loss_iou": 0.271484375, "loss_num": 0.0289306640625, "loss_xval": 0.6875, "num_input_tokens_seen": 177751400, "step": 2835 }, { "epoch": 9.437603993344426, "grad_norm": 34.34024429321289, "learning_rate": 5e-06, "loss": 0.8352, "num_input_tokens_seen": 177812924, "step": 2836 }, { "epoch": 9.437603993344426, "loss": 1.0024514198303223, "loss_ce": 1.010709547699662e-05, "loss_iou": 0.34765625, "loss_num": 0.061279296875, "loss_xval": 1.0, "num_input_tokens_seen": 177812924, "step": 2836 }, { "epoch": 9.440931780366057, "grad_norm": 30.03080177307129, "learning_rate": 5e-06, "loss": 0.5621, "num_input_tokens_seen": 177876372, "step": 2837 }, { "epoch": 9.440931780366057, "loss": 0.6658174991607666, "loss_ce": 4.603321576723829e-05, "loss_iou": 0.232421875, "loss_num": 0.04052734375, "loss_xval": 0.6640625, "num_input_tokens_seen": 177876372, "step": 2837 }, { "epoch": 9.444259567387688, "grad_norm": 48.5079460144043, "learning_rate": 5e-06, "loss": 0.7197, "num_input_tokens_seen": 177940356, "step": 2838 }, { "epoch": 9.444259567387688, "loss": 0.7403602004051208, "loss_ce": 3.7246804822643753e-06, "loss_iou": 0.296875, "loss_num": 0.0296630859375, "loss_xval": 0.7421875, "num_input_tokens_seen": 177940356, "step": 2838 }, { "epoch": 9.447587354409318, "grad_norm": 31.359596252441406, "learning_rate": 5e-06, "loss": 0.6751, "num_input_tokens_seen": 178002756, "step": 2839 }, { "epoch": 9.447587354409318, "loss": 0.7134596705436707, "loss_ce": 0.00014179470599628985, "loss_iou": 0.2490234375, "loss_num": 0.043212890625, "loss_xval": 0.71484375, "num_input_tokens_seen": 178002756, "step": 2839 }, { "epoch": 9.450915141430949, "grad_norm": 5.147191047668457, "learning_rate": 5e-06, "loss": 0.4251, "num_input_tokens_seen": 178064208, "step": 2840 }, { "epoch": 9.450915141430949, "loss": 0.4888937771320343, "loss_ce": 2.169248546124436e-06, "loss_iou": 0.162109375, "loss_num": 0.033203125, "loss_xval": 0.48828125, "num_input_tokens_seen": 178064208, "step": 2840 }, { "epoch": 9.45424292845258, "grad_norm": 8.288912773132324, "learning_rate": 5e-06, "loss": 0.4888, "num_input_tokens_seen": 178127236, "step": 2841 }, { "epoch": 9.45424292845258, "loss": 0.26867374777793884, "loss_ce": 0.00011905832798220217, "loss_iou": 0.103515625, "loss_num": 0.012451171875, "loss_xval": 0.26953125, "num_input_tokens_seen": 178127236, "step": 2841 }, { "epoch": 9.45757071547421, "grad_norm": 7.269131183624268, "learning_rate": 5e-06, "loss": 0.5197, "num_input_tokens_seen": 178190516, "step": 2842 }, { "epoch": 9.45757071547421, "loss": 0.3814705014228821, "loss_ce": 7.613942898387904e-07, "loss_iou": 0.1298828125, "loss_num": 0.0244140625, "loss_xval": 0.380859375, "num_input_tokens_seen": 178190516, "step": 2842 }, { "epoch": 9.46089850249584, "grad_norm": 50.7984619140625, "learning_rate": 5e-06, "loss": 0.5005, "num_input_tokens_seen": 178253212, "step": 2843 }, { "epoch": 9.46089850249584, "loss": 0.49420326948165894, "loss_ce": 1.606902401363186e-06, "loss_iou": 0.203125, "loss_num": 0.017578125, "loss_xval": 0.494140625, "num_input_tokens_seen": 178253212, "step": 2843 }, { "epoch": 9.464226289517471, "grad_norm": 40.14226150512695, "learning_rate": 5e-06, "loss": 0.7844, "num_input_tokens_seen": 178315656, "step": 2844 }, { "epoch": 9.464226289517471, "loss": 0.80559241771698, "loss_ce": 4.673010607803008e-06, "loss_iou": 0.294921875, "loss_num": 0.043212890625, "loss_xval": 0.8046875, "num_input_tokens_seen": 178315656, "step": 2844 }, { "epoch": 9.467554076539102, "grad_norm": 16.32898712158203, "learning_rate": 5e-06, "loss": 0.4149, "num_input_tokens_seen": 178376000, "step": 2845 }, { "epoch": 9.467554076539102, "loss": 0.35608112812042236, "loss_ce": 2.0209176909702364e-06, "loss_iou": 0.1279296875, "loss_num": 0.0198974609375, "loss_xval": 0.35546875, "num_input_tokens_seen": 178376000, "step": 2845 }, { "epoch": 9.470881863560733, "grad_norm": 11.813389778137207, "learning_rate": 5e-06, "loss": 0.5371, "num_input_tokens_seen": 178438116, "step": 2846 }, { "epoch": 9.470881863560733, "loss": 0.5391201376914978, "loss_ce": 0.00017969519831240177, "loss_iou": 0.19140625, "loss_num": 0.03125, "loss_xval": 0.5390625, "num_input_tokens_seen": 178438116, "step": 2846 }, { "epoch": 9.474209650582363, "grad_norm": 14.114508628845215, "learning_rate": 5e-06, "loss": 0.4602, "num_input_tokens_seen": 178500764, "step": 2847 }, { "epoch": 9.474209650582363, "loss": 0.5838631391525269, "loss_ce": 0.00012285553384572268, "loss_iou": 0.244140625, "loss_num": 0.0191650390625, "loss_xval": 0.58203125, "num_input_tokens_seen": 178500764, "step": 2847 }, { "epoch": 9.477537437603994, "grad_norm": 79.6426773071289, "learning_rate": 5e-06, "loss": 0.4757, "num_input_tokens_seen": 178562416, "step": 2848 }, { "epoch": 9.477537437603994, "loss": 0.2491786628961563, "loss_ce": 2.6319423795939656e-06, "loss_iou": 0.078125, "loss_num": 0.0185546875, "loss_xval": 0.2490234375, "num_input_tokens_seen": 178562416, "step": 2848 }, { "epoch": 9.480865224625624, "grad_norm": 16.811922073364258, "learning_rate": 5e-06, "loss": 0.5544, "num_input_tokens_seen": 178624668, "step": 2849 }, { "epoch": 9.480865224625624, "loss": 0.6341567039489746, "loss_ce": 1.4434843933486263e-06, "loss_iou": 0.255859375, "loss_num": 0.0245361328125, "loss_xval": 0.6328125, "num_input_tokens_seen": 178624668, "step": 2849 }, { "epoch": 9.484193011647255, "grad_norm": 29.526060104370117, "learning_rate": 5e-06, "loss": 0.7433, "num_input_tokens_seen": 178687968, "step": 2850 }, { "epoch": 9.484193011647255, "loss": 0.873848021030426, "loss_ce": 6.871817458886653e-05, "loss_iou": 0.306640625, "loss_num": 0.052001953125, "loss_xval": 0.875, "num_input_tokens_seen": 178687968, "step": 2850 }, { "epoch": 9.487520798668886, "grad_norm": 37.23567199707031, "learning_rate": 5e-06, "loss": 0.692, "num_input_tokens_seen": 178750484, "step": 2851 }, { "epoch": 9.487520798668886, "loss": 0.759718656539917, "loss_ce": 0.00044129352318122983, "loss_iou": 0.265625, "loss_num": 0.045654296875, "loss_xval": 0.7578125, "num_input_tokens_seen": 178750484, "step": 2851 }, { "epoch": 9.490848585690516, "grad_norm": 23.00562286376953, "learning_rate": 5e-06, "loss": 0.4484, "num_input_tokens_seen": 178812532, "step": 2852 }, { "epoch": 9.490848585690516, "loss": 0.39591747522354126, "loss_ce": 0.0003486370842438191, "loss_iou": 0.158203125, "loss_num": 0.015625, "loss_xval": 0.396484375, "num_input_tokens_seen": 178812532, "step": 2852 }, { "epoch": 9.494176372712147, "grad_norm": 10.81666374206543, "learning_rate": 5e-06, "loss": 0.6804, "num_input_tokens_seen": 178873956, "step": 2853 }, { "epoch": 9.494176372712147, "loss": 0.5655537843704224, "loss_ce": 2.0092106751690153e-06, "loss_iou": 0.1689453125, "loss_num": 0.045654296875, "loss_xval": 0.56640625, "num_input_tokens_seen": 178873956, "step": 2853 }, { "epoch": 9.497504159733777, "grad_norm": 9.439098358154297, "learning_rate": 5e-06, "loss": 0.7262, "num_input_tokens_seen": 178936012, "step": 2854 }, { "epoch": 9.497504159733777, "loss": 0.9835778474807739, "loss_ce": 0.0008508089231327176, "loss_iou": 0.35546875, "loss_num": 0.0537109375, "loss_xval": 0.984375, "num_input_tokens_seen": 178936012, "step": 2854 }, { "epoch": 9.500831946755408, "grad_norm": 20.610403060913086, "learning_rate": 5e-06, "loss": 0.4584, "num_input_tokens_seen": 178997060, "step": 2855 }, { "epoch": 9.500831946755408, "loss": 0.5353657007217407, "loss_ce": 0.0003315026988275349, "loss_iou": 0.197265625, "loss_num": 0.028076171875, "loss_xval": 0.53515625, "num_input_tokens_seen": 178997060, "step": 2855 }, { "epoch": 9.504159733777039, "grad_norm": 10.486116409301758, "learning_rate": 5e-06, "loss": 0.3713, "num_input_tokens_seen": 179058876, "step": 2856 }, { "epoch": 9.504159733777039, "loss": 0.4641799032688141, "loss_ce": 7.550125246780226e-06, "loss_iou": 0.1650390625, "loss_num": 0.0269775390625, "loss_xval": 0.46484375, "num_input_tokens_seen": 179058876, "step": 2856 }, { "epoch": 9.50748752079867, "grad_norm": 12.751068115234375, "learning_rate": 5e-06, "loss": 0.5818, "num_input_tokens_seen": 179122340, "step": 2857 }, { "epoch": 9.50748752079867, "loss": 0.7475054860115051, "loss_ce": 0.0003131235425826162, "loss_iou": 0.30078125, "loss_num": 0.02880859375, "loss_xval": 0.74609375, "num_input_tokens_seen": 179122340, "step": 2857 }, { "epoch": 9.5108153078203, "grad_norm": 17.553115844726562, "learning_rate": 5e-06, "loss": 0.5349, "num_input_tokens_seen": 179186192, "step": 2858 }, { "epoch": 9.5108153078203, "loss": 0.778904914855957, "loss_ce": 0.0009507741197012365, "loss_iou": 0.294921875, "loss_num": 0.037353515625, "loss_xval": 0.77734375, "num_input_tokens_seen": 179186192, "step": 2858 }, { "epoch": 9.51414309484193, "grad_norm": 47.813724517822266, "learning_rate": 5e-06, "loss": 0.6812, "num_input_tokens_seen": 179247968, "step": 2859 }, { "epoch": 9.51414309484193, "loss": 0.8396366834640503, "loss_ce": 3.705518611241132e-05, "loss_iou": 0.330078125, "loss_num": 0.0361328125, "loss_xval": 0.83984375, "num_input_tokens_seen": 179247968, "step": 2859 }, { "epoch": 9.517470881863561, "grad_norm": 33.87745666503906, "learning_rate": 5e-06, "loss": 0.6968, "num_input_tokens_seen": 179310508, "step": 2860 }, { "epoch": 9.517470881863561, "loss": 0.725839376449585, "loss_ce": 0.0002534246305003762, "loss_iou": 0.27734375, "loss_num": 0.03369140625, "loss_xval": 0.7265625, "num_input_tokens_seen": 179310508, "step": 2860 }, { "epoch": 9.520798668885192, "grad_norm": 24.744770050048828, "learning_rate": 5e-06, "loss": 0.6927, "num_input_tokens_seen": 179373524, "step": 2861 }, { "epoch": 9.520798668885192, "loss": 0.5446223020553589, "loss_ce": 6.664014654234052e-05, "loss_iou": 0.2060546875, "loss_num": 0.026611328125, "loss_xval": 0.54296875, "num_input_tokens_seen": 179373524, "step": 2861 }, { "epoch": 9.524126455906822, "grad_norm": 17.483234405517578, "learning_rate": 5e-06, "loss": 0.605, "num_input_tokens_seen": 179435608, "step": 2862 }, { "epoch": 9.524126455906822, "loss": 0.5129495859146118, "loss_ce": 1.0159601515624672e-05, "loss_iou": 0.2109375, "loss_num": 0.01806640625, "loss_xval": 0.51171875, "num_input_tokens_seen": 179435608, "step": 2862 }, { "epoch": 9.527454242928453, "grad_norm": 15.151481628417969, "learning_rate": 5e-06, "loss": 0.501, "num_input_tokens_seen": 179498696, "step": 2863 }, { "epoch": 9.527454242928453, "loss": 0.7478040456771851, "loss_ce": 1.3369547104957746e-06, "loss_iou": 0.2734375, "loss_num": 0.039794921875, "loss_xval": 0.74609375, "num_input_tokens_seen": 179498696, "step": 2863 }, { "epoch": 9.530782029950084, "grad_norm": 9.191595077514648, "learning_rate": 5e-06, "loss": 0.5611, "num_input_tokens_seen": 179561196, "step": 2864 }, { "epoch": 9.530782029950084, "loss": 0.3617577850818634, "loss_ce": 2.4161467990779784e-06, "loss_iou": 0.111328125, "loss_num": 0.02783203125, "loss_xval": 0.361328125, "num_input_tokens_seen": 179561196, "step": 2864 }, { "epoch": 9.534109816971714, "grad_norm": 8.483697891235352, "learning_rate": 5e-06, "loss": 0.5128, "num_input_tokens_seen": 179624368, "step": 2865 }, { "epoch": 9.534109816971714, "loss": 0.4834626317024231, "loss_ce": 0.00043040141463279724, "loss_iou": 0.1767578125, "loss_num": 0.025634765625, "loss_xval": 0.482421875, "num_input_tokens_seen": 179624368, "step": 2865 }, { "epoch": 9.537437603993345, "grad_norm": 24.02324676513672, "learning_rate": 5e-06, "loss": 0.5273, "num_input_tokens_seen": 179686408, "step": 2866 }, { "epoch": 9.537437603993345, "loss": 0.8202345371246338, "loss_ce": 0.0006544209318235517, "loss_iou": 0.345703125, "loss_num": 0.025634765625, "loss_xval": 0.8203125, "num_input_tokens_seen": 179686408, "step": 2866 }, { "epoch": 9.540765391014975, "grad_norm": 17.56954574584961, "learning_rate": 5e-06, "loss": 0.5138, "num_input_tokens_seen": 179748444, "step": 2867 }, { "epoch": 9.540765391014975, "loss": 0.3165045976638794, "loss_ce": 0.00022041713236831129, "loss_iou": 0.087890625, "loss_num": 0.028076171875, "loss_xval": 0.31640625, "num_input_tokens_seen": 179748444, "step": 2867 }, { "epoch": 9.544093178036606, "grad_norm": 13.504623413085938, "learning_rate": 5e-06, "loss": 0.6758, "num_input_tokens_seen": 179810748, "step": 2868 }, { "epoch": 9.544093178036606, "loss": 0.6726351976394653, "loss_ce": 0.0013095358153805137, "loss_iou": 0.26171875, "loss_num": 0.029296875, "loss_xval": 0.671875, "num_input_tokens_seen": 179810748, "step": 2868 }, { "epoch": 9.547420965058237, "grad_norm": 14.49623966217041, "learning_rate": 5e-06, "loss": 0.5531, "num_input_tokens_seen": 179874420, "step": 2869 }, { "epoch": 9.547420965058237, "loss": 0.44169145822525024, "loss_ce": 0.0005293316207826138, "loss_iou": 0.142578125, "loss_num": 0.031005859375, "loss_xval": 0.44140625, "num_input_tokens_seen": 179874420, "step": 2869 }, { "epoch": 9.550748752079867, "grad_norm": 8.936128616333008, "learning_rate": 5e-06, "loss": 0.5063, "num_input_tokens_seen": 179935580, "step": 2870 }, { "epoch": 9.550748752079867, "loss": 0.667414665222168, "loss_ce": 5.6251592468470335e-05, "loss_iou": 0.265625, "loss_num": 0.02734375, "loss_xval": 0.66796875, "num_input_tokens_seen": 179935580, "step": 2870 }, { "epoch": 9.554076539101498, "grad_norm": 14.153772354125977, "learning_rate": 5e-06, "loss": 0.5099, "num_input_tokens_seen": 179997036, "step": 2871 }, { "epoch": 9.554076539101498, "loss": 0.6683484315872192, "loss_ce": 0.0002576397091615945, "loss_iou": 0.26171875, "loss_num": 0.0291748046875, "loss_xval": 0.66796875, "num_input_tokens_seen": 179997036, "step": 2871 }, { "epoch": 9.557404326123129, "grad_norm": 21.987409591674805, "learning_rate": 5e-06, "loss": 0.4353, "num_input_tokens_seen": 180059256, "step": 2872 }, { "epoch": 9.557404326123129, "loss": 0.4719490706920624, "loss_ce": 0.00014728913083672523, "loss_iou": 0.166015625, "loss_num": 0.0279541015625, "loss_xval": 0.47265625, "num_input_tokens_seen": 180059256, "step": 2872 }, { "epoch": 9.56073211314476, "grad_norm": 19.800947189331055, "learning_rate": 5e-06, "loss": 0.4392, "num_input_tokens_seen": 180121388, "step": 2873 }, { "epoch": 9.56073211314476, "loss": 0.5235124826431274, "loss_ce": 0.00019701708515640348, "loss_iou": 0.1943359375, "loss_num": 0.02685546875, "loss_xval": 0.5234375, "num_input_tokens_seen": 180121388, "step": 2873 }, { "epoch": 9.56405990016639, "grad_norm": 10.601434707641602, "learning_rate": 5e-06, "loss": 0.4846, "num_input_tokens_seen": 180184132, "step": 2874 }, { "epoch": 9.56405990016639, "loss": 0.31836026906967163, "loss_ce": 8.875696835275448e-07, "loss_iou": 0.09375, "loss_num": 0.026123046875, "loss_xval": 0.318359375, "num_input_tokens_seen": 180184132, "step": 2874 }, { "epoch": 9.56738768718802, "grad_norm": 11.481494903564453, "learning_rate": 5e-06, "loss": 0.4257, "num_input_tokens_seen": 180247272, "step": 2875 }, { "epoch": 9.56738768718802, "loss": 0.33598166704177856, "loss_ce": 0.00010521389049245045, "loss_iou": 0.12890625, "loss_num": 0.015625, "loss_xval": 0.3359375, "num_input_tokens_seen": 180247272, "step": 2875 }, { "epoch": 9.570715474209651, "grad_norm": 10.37196159362793, "learning_rate": 5e-06, "loss": 0.5299, "num_input_tokens_seen": 180310324, "step": 2876 }, { "epoch": 9.570715474209651, "loss": 0.6017493009567261, "loss_ce": 3.6752658161276486e-06, "loss_iou": 0.224609375, "loss_num": 0.0303955078125, "loss_xval": 0.6015625, "num_input_tokens_seen": 180310324, "step": 2876 }, { "epoch": 9.574043261231282, "grad_norm": 14.631085395812988, "learning_rate": 5e-06, "loss": 0.5968, "num_input_tokens_seen": 180373596, "step": 2877 }, { "epoch": 9.574043261231282, "loss": 0.5733333230018616, "loss_ce": 3.0057406547712162e-05, "loss_iou": 0.21875, "loss_num": 0.0272216796875, "loss_xval": 0.57421875, "num_input_tokens_seen": 180373596, "step": 2877 }, { "epoch": 9.577371048252912, "grad_norm": 19.152679443359375, "learning_rate": 5e-06, "loss": 0.5592, "num_input_tokens_seen": 180436424, "step": 2878 }, { "epoch": 9.577371048252912, "loss": 0.6669936180114746, "loss_ce": 1.4542742974299472e-06, "loss_iou": 0.2421875, "loss_num": 0.036376953125, "loss_xval": 0.66796875, "num_input_tokens_seen": 180436424, "step": 2878 }, { "epoch": 9.580698835274543, "grad_norm": 14.985772132873535, "learning_rate": 5e-06, "loss": 0.4739, "num_input_tokens_seen": 180497540, "step": 2879 }, { "epoch": 9.580698835274543, "loss": 0.7291678190231323, "loss_ce": 0.0009574173600412905, "loss_iou": 0.263671875, "loss_num": 0.0400390625, "loss_xval": 0.7265625, "num_input_tokens_seen": 180497540, "step": 2879 }, { "epoch": 9.584026622296173, "grad_norm": 8.409133911132812, "learning_rate": 5e-06, "loss": 0.3915, "num_input_tokens_seen": 180558920, "step": 2880 }, { "epoch": 9.584026622296173, "loss": 0.287842720746994, "loss_ce": 9.367062148157856e-07, "loss_iou": 0.11279296875, "loss_num": 0.01251220703125, "loss_xval": 0.287109375, "num_input_tokens_seen": 180558920, "step": 2880 }, { "epoch": 9.587354409317804, "grad_norm": 16.330278396606445, "learning_rate": 5e-06, "loss": 0.6191, "num_input_tokens_seen": 180622320, "step": 2881 }, { "epoch": 9.587354409317804, "loss": 0.6613394021987915, "loss_ce": 8.448536391369998e-05, "loss_iou": 0.2314453125, "loss_num": 0.03955078125, "loss_xval": 0.66015625, "num_input_tokens_seen": 180622320, "step": 2881 }, { "epoch": 9.590682196339435, "grad_norm": 36.16854476928711, "learning_rate": 5e-06, "loss": 0.535, "num_input_tokens_seen": 180686192, "step": 2882 }, { "epoch": 9.590682196339435, "loss": 0.5808942317962646, "loss_ce": 0.000449948973255232, "loss_iou": 0.251953125, "loss_num": 0.014892578125, "loss_xval": 0.58203125, "num_input_tokens_seen": 180686192, "step": 2882 }, { "epoch": 9.594009983361065, "grad_norm": 27.17749786376953, "learning_rate": 5e-06, "loss": 0.5662, "num_input_tokens_seen": 180748728, "step": 2883 }, { "epoch": 9.594009983361065, "loss": 0.5452895164489746, "loss_ce": 1.4504679484161898e-06, "loss_iou": 0.1875, "loss_num": 0.033935546875, "loss_xval": 0.546875, "num_input_tokens_seen": 180748728, "step": 2883 }, { "epoch": 9.597337770382696, "grad_norm": 15.235310554504395, "learning_rate": 5e-06, "loss": 0.6021, "num_input_tokens_seen": 180811784, "step": 2884 }, { "epoch": 9.597337770382696, "loss": 0.6982437968254089, "loss_ce": 1.6466378838231321e-06, "loss_iou": 0.2890625, "loss_num": 0.0238037109375, "loss_xval": 0.69921875, "num_input_tokens_seen": 180811784, "step": 2884 }, { "epoch": 9.600665557404326, "grad_norm": 11.813919067382812, "learning_rate": 5e-06, "loss": 0.5298, "num_input_tokens_seen": 180871996, "step": 2885 }, { "epoch": 9.600665557404326, "loss": 0.4959532916545868, "loss_ce": 0.00010368620860390365, "loss_iou": 0.1630859375, "loss_num": 0.033935546875, "loss_xval": 0.49609375, "num_input_tokens_seen": 180871996, "step": 2885 }, { "epoch": 9.603993344425957, "grad_norm": 19.253658294677734, "learning_rate": 5e-06, "loss": 0.578, "num_input_tokens_seen": 180934672, "step": 2886 }, { "epoch": 9.603993344425957, "loss": 0.556031346321106, "loss_ce": 0.0010997041827067733, "loss_iou": 0.19921875, "loss_num": 0.031494140625, "loss_xval": 0.5546875, "num_input_tokens_seen": 180934672, "step": 2886 }, { "epoch": 9.607321131447588, "grad_norm": 17.63612174987793, "learning_rate": 5e-06, "loss": 0.4544, "num_input_tokens_seen": 180996132, "step": 2887 }, { "epoch": 9.607321131447588, "loss": 0.474445104598999, "loss_ce": 1.884983248601202e-05, "loss_iou": 0.2021484375, "loss_num": 0.0137939453125, "loss_xval": 0.474609375, "num_input_tokens_seen": 180996132, "step": 2887 }, { "epoch": 9.610648918469218, "grad_norm": 17.657238006591797, "learning_rate": 5e-06, "loss": 0.4489, "num_input_tokens_seen": 181059620, "step": 2888 }, { "epoch": 9.610648918469218, "loss": 0.4900582432746887, "loss_ce": 0.0010445851366966963, "loss_iou": 0.1875, "loss_num": 0.022705078125, "loss_xval": 0.48828125, "num_input_tokens_seen": 181059620, "step": 2888 }, { "epoch": 9.613976705490849, "grad_norm": 11.642956733703613, "learning_rate": 5e-06, "loss": 0.5664, "num_input_tokens_seen": 181122012, "step": 2889 }, { "epoch": 9.613976705490849, "loss": 0.7675122022628784, "loss_ce": 2.5674018615973182e-05, "loss_iou": 0.296875, "loss_num": 0.03466796875, "loss_xval": 0.765625, "num_input_tokens_seen": 181122012, "step": 2889 }, { "epoch": 9.61730449251248, "grad_norm": 25.64425277709961, "learning_rate": 5e-06, "loss": 0.5598, "num_input_tokens_seen": 181185028, "step": 2890 }, { "epoch": 9.61730449251248, "loss": 0.5267351269721985, "loss_ce": 1.717099280540424e-06, "loss_iou": 0.2119140625, "loss_num": 0.0206298828125, "loss_xval": 0.52734375, "num_input_tokens_seen": 181185028, "step": 2890 }, { "epoch": 9.62063227953411, "grad_norm": 14.39548110961914, "learning_rate": 5e-06, "loss": 0.4817, "num_input_tokens_seen": 181247668, "step": 2891 }, { "epoch": 9.62063227953411, "loss": 0.44513314962387085, "loss_ce": 3.7619822705892147e-06, "loss_iou": 0.1728515625, "loss_num": 0.0198974609375, "loss_xval": 0.4453125, "num_input_tokens_seen": 181247668, "step": 2891 }, { "epoch": 9.62396006655574, "grad_norm": 10.032007217407227, "learning_rate": 5e-06, "loss": 0.3571, "num_input_tokens_seen": 181309540, "step": 2892 }, { "epoch": 9.62396006655574, "loss": 0.39188215136528015, "loss_ce": 3.6451107007451355e-05, "loss_iou": 0.125, "loss_num": 0.0284423828125, "loss_xval": 0.392578125, "num_input_tokens_seen": 181309540, "step": 2892 }, { "epoch": 9.627287853577371, "grad_norm": 9.790216445922852, "learning_rate": 5e-06, "loss": 0.6386, "num_input_tokens_seen": 181371588, "step": 2893 }, { "epoch": 9.627287853577371, "loss": 0.42482489347457886, "loss_ce": 2.0203247913741507e-05, "loss_iou": 0.1181640625, "loss_num": 0.037841796875, "loss_xval": 0.42578125, "num_input_tokens_seen": 181371588, "step": 2893 }, { "epoch": 9.630615640599002, "grad_norm": 17.605876922607422, "learning_rate": 5e-06, "loss": 0.6122, "num_input_tokens_seen": 181434584, "step": 2894 }, { "epoch": 9.630615640599002, "loss": 0.49892154335975647, "loss_ce": 2.018469422182534e-05, "loss_iou": 0.1962890625, "loss_num": 0.021240234375, "loss_xval": 0.498046875, "num_input_tokens_seen": 181434584, "step": 2894 }, { "epoch": 9.633943427620633, "grad_norm": 25.41998291015625, "learning_rate": 5e-06, "loss": 0.4016, "num_input_tokens_seen": 181498312, "step": 2895 }, { "epoch": 9.633943427620633, "loss": 0.36003467440605164, "loss_ce": 0.0002629423688631505, "loss_iou": 0.1064453125, "loss_num": 0.029296875, "loss_xval": 0.359375, "num_input_tokens_seen": 181498312, "step": 2895 }, { "epoch": 9.637271214642263, "grad_norm": 23.614757537841797, "learning_rate": 5e-06, "loss": 0.3173, "num_input_tokens_seen": 181561288, "step": 2896 }, { "epoch": 9.637271214642263, "loss": 0.18891361355781555, "loss_ce": 9.81022367341211e-06, "loss_iou": 0.0556640625, "loss_num": 0.0155029296875, "loss_xval": 0.1884765625, "num_input_tokens_seen": 181561288, "step": 2896 }, { "epoch": 9.640599001663894, "grad_norm": 14.82948112487793, "learning_rate": 5e-06, "loss": 0.5204, "num_input_tokens_seen": 181622620, "step": 2897 }, { "epoch": 9.640599001663894, "loss": 0.43384477496147156, "loss_ce": 6.861679594294401e-06, "loss_iou": 0.14453125, "loss_num": 0.029296875, "loss_xval": 0.43359375, "num_input_tokens_seen": 181622620, "step": 2897 }, { "epoch": 9.643926788685524, "grad_norm": 12.184654235839844, "learning_rate": 5e-06, "loss": 0.6075, "num_input_tokens_seen": 181685668, "step": 2898 }, { "epoch": 9.643926788685524, "loss": 0.4072582721710205, "loss_ce": 1.199368398374645e-06, "loss_iou": 0.1064453125, "loss_num": 0.038818359375, "loss_xval": 0.408203125, "num_input_tokens_seen": 181685668, "step": 2898 }, { "epoch": 9.647254575707155, "grad_norm": 10.665274620056152, "learning_rate": 5e-06, "loss": 0.6722, "num_input_tokens_seen": 181749280, "step": 2899 }, { "epoch": 9.647254575707155, "loss": 0.5776404142379761, "loss_ce": 3.6620429000322474e-06, "loss_iou": 0.1875, "loss_num": 0.040771484375, "loss_xval": 0.578125, "num_input_tokens_seen": 181749280, "step": 2899 }, { "epoch": 9.650582362728786, "grad_norm": 10.453713417053223, "learning_rate": 5e-06, "loss": 0.5457, "num_input_tokens_seen": 181812348, "step": 2900 }, { "epoch": 9.650582362728786, "loss": 0.5622572302818298, "loss_ce": 1.3879771358915605e-06, "loss_iou": 0.234375, "loss_num": 0.0186767578125, "loss_xval": 0.5625, "num_input_tokens_seen": 181812348, "step": 2900 }, { "epoch": 9.653910149750416, "grad_norm": 29.289257049560547, "learning_rate": 5e-06, "loss": 0.505, "num_input_tokens_seen": 181874804, "step": 2901 }, { "epoch": 9.653910149750416, "loss": 0.6137291789054871, "loss_ce": 5.4018310038372874e-06, "loss_iou": 0.25390625, "loss_num": 0.021484375, "loss_xval": 0.61328125, "num_input_tokens_seen": 181874804, "step": 2901 }, { "epoch": 9.657237936772047, "grad_norm": 41.40977478027344, "learning_rate": 5e-06, "loss": 0.7906, "num_input_tokens_seen": 181938680, "step": 2902 }, { "epoch": 9.657237936772047, "loss": 1.0443272590637207, "loss_ce": 0.0001378083834424615, "loss_iou": 0.326171875, "loss_num": 0.078125, "loss_xval": 1.046875, "num_input_tokens_seen": 181938680, "step": 2902 }, { "epoch": 9.660565723793678, "grad_norm": 20.28166961669922, "learning_rate": 5e-06, "loss": 0.4957, "num_input_tokens_seen": 181999780, "step": 2903 }, { "epoch": 9.660565723793678, "loss": 0.4646109938621521, "loss_ce": 0.0001334706466877833, "loss_iou": 0.1650390625, "loss_num": 0.0267333984375, "loss_xval": 0.46484375, "num_input_tokens_seen": 181999780, "step": 2903 }, { "epoch": 9.663893510815308, "grad_norm": 13.747200012207031, "learning_rate": 5e-06, "loss": 0.5312, "num_input_tokens_seen": 182064192, "step": 2904 }, { "epoch": 9.663893510815308, "loss": 0.6682794094085693, "loss_ce": 0.001653485232964158, "loss_iou": 0.263671875, "loss_num": 0.0277099609375, "loss_xval": 0.66796875, "num_input_tokens_seen": 182064192, "step": 2904 }, { "epoch": 9.667221297836939, "grad_norm": 16.626697540283203, "learning_rate": 5e-06, "loss": 0.6192, "num_input_tokens_seen": 182127156, "step": 2905 }, { "epoch": 9.667221297836939, "loss": 0.7842123508453369, "loss_ce": 3.269535591243766e-05, "loss_iou": 0.271484375, "loss_num": 0.048095703125, "loss_xval": 0.78515625, "num_input_tokens_seen": 182127156, "step": 2905 }, { "epoch": 9.67054908485857, "grad_norm": 25.02231788635254, "learning_rate": 5e-06, "loss": 0.5325, "num_input_tokens_seen": 182189792, "step": 2906 }, { "epoch": 9.67054908485857, "loss": 0.6210551261901855, "loss_ce": 8.345707465196028e-05, "loss_iou": 0.2294921875, "loss_num": 0.032470703125, "loss_xval": 0.62109375, "num_input_tokens_seen": 182189792, "step": 2906 }, { "epoch": 9.6738768718802, "grad_norm": 23.580507278442383, "learning_rate": 5e-06, "loss": 0.5169, "num_input_tokens_seen": 182251536, "step": 2907 }, { "epoch": 9.6738768718802, "loss": 0.4550899565219879, "loss_ce": 1.1837126294267364e-05, "loss_iou": 0.1181640625, "loss_num": 0.0439453125, "loss_xval": 0.455078125, "num_input_tokens_seen": 182251536, "step": 2907 }, { "epoch": 9.67720465890183, "grad_norm": 10.676534652709961, "learning_rate": 5e-06, "loss": 0.566, "num_input_tokens_seen": 182314184, "step": 2908 }, { "epoch": 9.67720465890183, "loss": 0.6040498614311218, "loss_ce": 4.5958542614243925e-05, "loss_iou": 0.232421875, "loss_num": 0.0277099609375, "loss_xval": 0.60546875, "num_input_tokens_seen": 182314184, "step": 2908 }, { "epoch": 9.680532445923461, "grad_norm": 23.31178092956543, "learning_rate": 5e-06, "loss": 0.5553, "num_input_tokens_seen": 182377332, "step": 2909 }, { "epoch": 9.680532445923461, "loss": 0.37121710181236267, "loss_ce": 1.276160901397816e-06, "loss_iou": 0.15234375, "loss_num": 0.01348876953125, "loss_xval": 0.37109375, "num_input_tokens_seen": 182377332, "step": 2909 }, { "epoch": 9.683860232945092, "grad_norm": 61.50010299682617, "learning_rate": 5e-06, "loss": 0.5359, "num_input_tokens_seen": 182440368, "step": 2910 }, { "epoch": 9.683860232945092, "loss": 0.5208009481430054, "loss_ce": 0.0002931583148892969, "loss_iou": 0.1826171875, "loss_num": 0.0308837890625, "loss_xval": 0.51953125, "num_input_tokens_seen": 182440368, "step": 2910 }, { "epoch": 9.687188019966722, "grad_norm": 22.887176513671875, "learning_rate": 5e-06, "loss": 0.4298, "num_input_tokens_seen": 182502128, "step": 2911 }, { "epoch": 9.687188019966722, "loss": 0.4003071188926697, "loss_ce": 0.000862547371070832, "loss_iou": 0.15625, "loss_num": 0.017333984375, "loss_xval": 0.400390625, "num_input_tokens_seen": 182502128, "step": 2911 }, { "epoch": 9.690515806988353, "grad_norm": 19.855134963989258, "learning_rate": 5e-06, "loss": 0.7554, "num_input_tokens_seen": 182565828, "step": 2912 }, { "epoch": 9.690515806988353, "loss": 0.5266812443733215, "loss_ce": 7.756317791063339e-05, "loss_iou": 0.203125, "loss_num": 0.024169921875, "loss_xval": 0.52734375, "num_input_tokens_seen": 182565828, "step": 2912 }, { "epoch": 9.693843594009984, "grad_norm": 25.20647430419922, "learning_rate": 5e-06, "loss": 0.8871, "num_input_tokens_seen": 182629376, "step": 2913 }, { "epoch": 9.693843594009984, "loss": 0.8542974591255188, "loss_ce": 0.0001715070684440434, "loss_iou": 0.322265625, "loss_num": 0.041748046875, "loss_xval": 0.85546875, "num_input_tokens_seen": 182629376, "step": 2913 }, { "epoch": 9.697171381031614, "grad_norm": 21.838083267211914, "learning_rate": 5e-06, "loss": 0.5599, "num_input_tokens_seen": 182691284, "step": 2914 }, { "epoch": 9.697171381031614, "loss": 0.5457344055175781, "loss_ce": 8.01321366452612e-05, "loss_iou": 0.2001953125, "loss_num": 0.02880859375, "loss_xval": 0.546875, "num_input_tokens_seen": 182691284, "step": 2914 }, { "epoch": 9.700499168053245, "grad_norm": 20.264493942260742, "learning_rate": 5e-06, "loss": 0.4326, "num_input_tokens_seen": 182755244, "step": 2915 }, { "epoch": 9.700499168053245, "loss": 0.4876824915409088, "loss_ce": 1.1605930922087282e-05, "loss_iou": 0.1875, "loss_num": 0.0224609375, "loss_xval": 0.48828125, "num_input_tokens_seen": 182755244, "step": 2915 }, { "epoch": 9.703826955074875, "grad_norm": 20.834692001342773, "learning_rate": 5e-06, "loss": 0.4843, "num_input_tokens_seen": 182818236, "step": 2916 }, { "epoch": 9.703826955074875, "loss": 0.29776865243911743, "loss_ce": 0.00013070134446024895, "loss_iou": 0.0849609375, "loss_num": 0.025634765625, "loss_xval": 0.296875, "num_input_tokens_seen": 182818236, "step": 2916 }, { "epoch": 9.707154742096506, "grad_norm": 8.280861854553223, "learning_rate": 5e-06, "loss": 0.4889, "num_input_tokens_seen": 182882376, "step": 2917 }, { "epoch": 9.707154742096506, "loss": 0.4441372752189636, "loss_ce": 0.0005337632610462606, "loss_iou": 0.1796875, "loss_num": 0.016845703125, "loss_xval": 0.443359375, "num_input_tokens_seen": 182882376, "step": 2917 }, { "epoch": 9.710482529118137, "grad_norm": 10.78421401977539, "learning_rate": 5e-06, "loss": 0.4709, "num_input_tokens_seen": 182944684, "step": 2918 }, { "epoch": 9.710482529118137, "loss": 0.4492223858833313, "loss_ce": 3.6031333365826868e-06, "loss_iou": 0.1259765625, "loss_num": 0.039306640625, "loss_xval": 0.44921875, "num_input_tokens_seen": 182944684, "step": 2918 }, { "epoch": 9.713810316139767, "grad_norm": 22.87497329711914, "learning_rate": 5e-06, "loss": 0.5183, "num_input_tokens_seen": 183008292, "step": 2919 }, { "epoch": 9.713810316139767, "loss": 0.6346837282180786, "loss_ce": 0.00016221043188124895, "loss_iou": 0.25390625, "loss_num": 0.025146484375, "loss_xval": 0.6328125, "num_input_tokens_seen": 183008292, "step": 2919 }, { "epoch": 9.717138103161398, "grad_norm": 15.675990104675293, "learning_rate": 5e-06, "loss": 0.4804, "num_input_tokens_seen": 183071516, "step": 2920 }, { "epoch": 9.717138103161398, "loss": 0.4415128231048584, "loss_ce": 0.00028968745027668774, "loss_iou": 0.1396484375, "loss_num": 0.0322265625, "loss_xval": 0.44140625, "num_input_tokens_seen": 183071516, "step": 2920 }, { "epoch": 9.720465890183029, "grad_norm": 19.108692169189453, "learning_rate": 5e-06, "loss": 0.4842, "num_input_tokens_seen": 183134508, "step": 2921 }, { "epoch": 9.720465890183029, "loss": 0.5877708196640015, "loss_ce": 0.00012434810923878103, "loss_iou": 0.251953125, "loss_num": 0.0164794921875, "loss_xval": 0.5859375, "num_input_tokens_seen": 183134508, "step": 2921 }, { "epoch": 9.72379367720466, "grad_norm": 16.29625129699707, "learning_rate": 5e-06, "loss": 0.5883, "num_input_tokens_seen": 183197068, "step": 2922 }, { "epoch": 9.72379367720466, "loss": 0.8499786257743835, "loss_ce": 3.034261226275703e-06, "loss_iou": 0.314453125, "loss_num": 0.044189453125, "loss_xval": 0.8515625, "num_input_tokens_seen": 183197068, "step": 2922 }, { "epoch": 9.72712146422629, "grad_norm": 10.96635913848877, "learning_rate": 5e-06, "loss": 0.3945, "num_input_tokens_seen": 183259988, "step": 2923 }, { "epoch": 9.72712146422629, "loss": 0.46448180079460144, "loss_ce": 4.236604581819847e-06, "loss_iou": 0.1767578125, "loss_num": 0.0223388671875, "loss_xval": 0.46484375, "num_input_tokens_seen": 183259988, "step": 2923 }, { "epoch": 9.73044925124792, "grad_norm": 10.945318222045898, "learning_rate": 5e-06, "loss": 0.3403, "num_input_tokens_seen": 183322008, "step": 2924 }, { "epoch": 9.73044925124792, "loss": 0.34558433294296265, "loss_ce": 3.2646182717144256e-06, "loss_iou": 0.10107421875, "loss_num": 0.0286865234375, "loss_xval": 0.345703125, "num_input_tokens_seen": 183322008, "step": 2924 }, { "epoch": 9.733777038269551, "grad_norm": 18.17209243774414, "learning_rate": 5e-06, "loss": 0.5286, "num_input_tokens_seen": 183383892, "step": 2925 }, { "epoch": 9.733777038269551, "loss": 0.6787197589874268, "loss_ce": 0.0004970963927917182, "loss_iou": 0.2314453125, "loss_num": 0.04296875, "loss_xval": 0.6796875, "num_input_tokens_seen": 183383892, "step": 2925 }, { "epoch": 9.737104825291182, "grad_norm": 17.843584060668945, "learning_rate": 5e-06, "loss": 0.5339, "num_input_tokens_seen": 183446644, "step": 2926 }, { "epoch": 9.737104825291182, "loss": 0.47083163261413574, "loss_ce": 6.443891834351234e-06, "loss_iou": 0.171875, "loss_num": 0.0255126953125, "loss_xval": 0.470703125, "num_input_tokens_seen": 183446644, "step": 2926 }, { "epoch": 9.740432612312812, "grad_norm": 14.67287826538086, "learning_rate": 5e-06, "loss": 0.536, "num_input_tokens_seen": 183509356, "step": 2927 }, { "epoch": 9.740432612312812, "loss": 0.3606500029563904, "loss_ce": 0.0001763652398949489, "loss_iou": 0.115234375, "loss_num": 0.0260009765625, "loss_xval": 0.361328125, "num_input_tokens_seen": 183509356, "step": 2927 }, { "epoch": 9.743760399334443, "grad_norm": 43.77641677856445, "learning_rate": 5e-06, "loss": 0.6395, "num_input_tokens_seen": 183573224, "step": 2928 }, { "epoch": 9.743760399334443, "loss": 0.777004063129425, "loss_ce": 0.0005758479819633067, "loss_iou": 0.2734375, "loss_num": 0.0458984375, "loss_xval": 0.77734375, "num_input_tokens_seen": 183573224, "step": 2928 }, { "epoch": 9.747088186356073, "grad_norm": 22.967857360839844, "learning_rate": 5e-06, "loss": 0.6244, "num_input_tokens_seen": 183634480, "step": 2929 }, { "epoch": 9.747088186356073, "loss": 0.41754454374313354, "loss_ce": 3.033622078874032e-06, "loss_iou": 0.130859375, "loss_num": 0.0311279296875, "loss_xval": 0.41796875, "num_input_tokens_seen": 183634480, "step": 2929 }, { "epoch": 9.750415973377704, "grad_norm": 11.340044975280762, "learning_rate": 5e-06, "loss": 0.7507, "num_input_tokens_seen": 183697716, "step": 2930 }, { "epoch": 9.750415973377704, "loss": 0.9725834131240845, "loss_ce": 0.0004154295311309397, "loss_iou": 0.353515625, "loss_num": 0.052734375, "loss_xval": 0.97265625, "num_input_tokens_seen": 183697716, "step": 2930 }, { "epoch": 9.753743760399335, "grad_norm": 7.111901760101318, "learning_rate": 5e-06, "loss": 0.492, "num_input_tokens_seen": 183760628, "step": 2931 }, { "epoch": 9.753743760399335, "loss": 0.23556654155254364, "loss_ce": 1.3585929536930053e-06, "loss_iou": 0.0751953125, "loss_num": 0.0169677734375, "loss_xval": 0.2353515625, "num_input_tokens_seen": 183760628, "step": 2931 }, { "epoch": 9.757071547420965, "grad_norm": 20.44097137451172, "learning_rate": 5e-06, "loss": 0.4706, "num_input_tokens_seen": 183818892, "step": 2932 }, { "epoch": 9.757071547420965, "loss": 0.3630286455154419, "loss_ce": 0.0003577587194740772, "loss_iou": 0.12890625, "loss_num": 0.0211181640625, "loss_xval": 0.36328125, "num_input_tokens_seen": 183818892, "step": 2932 }, { "epoch": 9.760399334442596, "grad_norm": 30.950407028198242, "learning_rate": 5e-06, "loss": 0.4902, "num_input_tokens_seen": 183880796, "step": 2933 }, { "epoch": 9.760399334442596, "loss": 0.657253623008728, "loss_ce": 2.7076372134615667e-05, "loss_iou": 0.248046875, "loss_num": 0.0322265625, "loss_xval": 0.65625, "num_input_tokens_seen": 183880796, "step": 2933 }, { "epoch": 9.763727121464226, "grad_norm": 18.537403106689453, "learning_rate": 5e-06, "loss": 0.4359, "num_input_tokens_seen": 183943420, "step": 2934 }, { "epoch": 9.763727121464226, "loss": 0.4472185969352722, "loss_ce": 0.00016661055269651115, "loss_iou": 0.1611328125, "loss_num": 0.0250244140625, "loss_xval": 0.447265625, "num_input_tokens_seen": 183943420, "step": 2934 }, { "epoch": 9.767054908485857, "grad_norm": 21.21495819091797, "learning_rate": 5e-06, "loss": 0.5467, "num_input_tokens_seen": 184006920, "step": 2935 }, { "epoch": 9.767054908485857, "loss": 0.5021676421165466, "loss_ce": 0.0007028186228126287, "loss_iou": 0.1689453125, "loss_num": 0.032958984375, "loss_xval": 0.5, "num_input_tokens_seen": 184006920, "step": 2935 }, { "epoch": 9.770382695507488, "grad_norm": 23.89349937438965, "learning_rate": 5e-06, "loss": 0.6967, "num_input_tokens_seen": 184068492, "step": 2936 }, { "epoch": 9.770382695507488, "loss": 0.6308439373970032, "loss_ce": 0.0004118177166674286, "loss_iou": 0.2373046875, "loss_num": 0.03125, "loss_xval": 0.62890625, "num_input_tokens_seen": 184068492, "step": 2936 }, { "epoch": 9.773710482529118, "grad_norm": 20.2669620513916, "learning_rate": 5e-06, "loss": 0.4629, "num_input_tokens_seen": 184131720, "step": 2937 }, { "epoch": 9.773710482529118, "loss": 0.28981783986091614, "loss_ce": 0.0002517920802347362, "loss_iou": 0.11962890625, "loss_num": 0.01007080078125, "loss_xval": 0.2890625, "num_input_tokens_seen": 184131720, "step": 2937 }, { "epoch": 9.777038269550749, "grad_norm": 15.572280883789062, "learning_rate": 5e-06, "loss": 0.6071, "num_input_tokens_seen": 184194528, "step": 2938 }, { "epoch": 9.777038269550749, "loss": 0.7286790013313293, "loss_ce": 0.0004075277829542756, "loss_iou": 0.287109375, "loss_num": 0.0306396484375, "loss_xval": 0.7265625, "num_input_tokens_seen": 184194528, "step": 2938 }, { "epoch": 9.78036605657238, "grad_norm": 5.90762996673584, "learning_rate": 5e-06, "loss": 0.4751, "num_input_tokens_seen": 184256676, "step": 2939 }, { "epoch": 9.78036605657238, "loss": 0.5109905004501343, "loss_ce": 4.183239980193321e-06, "loss_iou": 0.173828125, "loss_num": 0.03271484375, "loss_xval": 0.51171875, "num_input_tokens_seen": 184256676, "step": 2939 }, { "epoch": 9.78369384359401, "grad_norm": 12.259622573852539, "learning_rate": 5e-06, "loss": 0.7213, "num_input_tokens_seen": 184320648, "step": 2940 }, { "epoch": 9.78369384359401, "loss": 0.8315571546554565, "loss_ce": 1.4160917089611758e-05, "loss_iou": 0.3515625, "loss_num": 0.026123046875, "loss_xval": 0.83203125, "num_input_tokens_seen": 184320648, "step": 2940 }, { "epoch": 9.78702163061564, "grad_norm": 6.491352081298828, "learning_rate": 5e-06, "loss": 0.4534, "num_input_tokens_seen": 184383516, "step": 2941 }, { "epoch": 9.78702163061564, "loss": 0.4263755679130554, "loss_ce": 0.0005943034193478525, "loss_iou": 0.150390625, "loss_num": 0.0250244140625, "loss_xval": 0.42578125, "num_input_tokens_seen": 184383516, "step": 2941 }, { "epoch": 9.790349417637271, "grad_norm": 11.50826358795166, "learning_rate": 5e-06, "loss": 0.3717, "num_input_tokens_seen": 184445944, "step": 2942 }, { "epoch": 9.790349417637271, "loss": 0.5536211729049683, "loss_ce": 0.0008257832378149033, "loss_iou": 0.2099609375, "loss_num": 0.0264892578125, "loss_xval": 0.5546875, "num_input_tokens_seen": 184445944, "step": 2942 }, { "epoch": 9.793677204658902, "grad_norm": 12.576164245605469, "learning_rate": 5e-06, "loss": 0.4042, "num_input_tokens_seen": 184508832, "step": 2943 }, { "epoch": 9.793677204658902, "loss": 0.34631186723709106, "loss_ce": 0.0004866549570579082, "loss_iou": 0.1357421875, "loss_num": 0.0150146484375, "loss_xval": 0.345703125, "num_input_tokens_seen": 184508832, "step": 2943 }, { "epoch": 9.797004991680533, "grad_norm": 38.45964813232422, "learning_rate": 5e-06, "loss": 0.7263, "num_input_tokens_seen": 184571876, "step": 2944 }, { "epoch": 9.797004991680533, "loss": 1.0240578651428223, "loss_ce": 0.0023293346166610718, "loss_iou": 0.423828125, "loss_num": 0.03466796875, "loss_xval": 1.0234375, "num_input_tokens_seen": 184571876, "step": 2944 }, { "epoch": 9.800332778702163, "grad_norm": 8.401918411254883, "learning_rate": 5e-06, "loss": 0.4407, "num_input_tokens_seen": 184634436, "step": 2945 }, { "epoch": 9.800332778702163, "loss": 0.5190562605857849, "loss_ce": 1.3288834452396259e-05, "loss_iou": 0.1650390625, "loss_num": 0.03759765625, "loss_xval": 0.51953125, "num_input_tokens_seen": 184634436, "step": 2945 }, { "epoch": 9.803660565723794, "grad_norm": 16.73207664489746, "learning_rate": 5e-06, "loss": 0.4604, "num_input_tokens_seen": 184697808, "step": 2946 }, { "epoch": 9.803660565723794, "loss": 0.6504992246627808, "loss_ce": 4.759197690873407e-05, "loss_iou": 0.263671875, "loss_num": 0.02490234375, "loss_xval": 0.65234375, "num_input_tokens_seen": 184697808, "step": 2946 }, { "epoch": 9.806988352745424, "grad_norm": 50.72026062011719, "learning_rate": 5e-06, "loss": 0.5647, "num_input_tokens_seen": 184761836, "step": 2947 }, { "epoch": 9.806988352745424, "loss": 0.7493867874145508, "loss_ce": 0.0007295781979337335, "loss_iou": 0.302734375, "loss_num": 0.028564453125, "loss_xval": 0.75, "num_input_tokens_seen": 184761836, "step": 2947 }, { "epoch": 9.810316139767055, "grad_norm": 31.340721130371094, "learning_rate": 5e-06, "loss": 0.5829, "num_input_tokens_seen": 184824456, "step": 2948 }, { "epoch": 9.810316139767055, "loss": 0.5891902446746826, "loss_ce": 0.00032305007334798574, "loss_iou": 0.2216796875, "loss_num": 0.029296875, "loss_xval": 0.58984375, "num_input_tokens_seen": 184824456, "step": 2948 }, { "epoch": 9.813643926788686, "grad_norm": 14.0571928024292, "learning_rate": 5e-06, "loss": 0.568, "num_input_tokens_seen": 184886536, "step": 2949 }, { "epoch": 9.813643926788686, "loss": 0.694050669670105, "loss_ce": 0.00014192562957759947, "loss_iou": 0.2275390625, "loss_num": 0.0478515625, "loss_xval": 0.6953125, "num_input_tokens_seen": 184886536, "step": 2949 }, { "epoch": 9.816971713810316, "grad_norm": 20.58320426940918, "learning_rate": 5e-06, "loss": 0.708, "num_input_tokens_seen": 184948652, "step": 2950 }, { "epoch": 9.816971713810316, "loss": 0.6847269535064697, "loss_ce": 4.02244813813013e-06, "loss_iou": 0.2451171875, "loss_num": 0.038818359375, "loss_xval": 0.68359375, "num_input_tokens_seen": 184948652, "step": 2950 }, { "epoch": 9.820299500831947, "grad_norm": 28.178550720214844, "learning_rate": 5e-06, "loss": 0.6767, "num_input_tokens_seen": 185011692, "step": 2951 }, { "epoch": 9.820299500831947, "loss": 0.6380277872085571, "loss_ce": 0.0014311012346297503, "loss_iou": 0.2060546875, "loss_num": 0.045166015625, "loss_xval": 0.63671875, "num_input_tokens_seen": 185011692, "step": 2951 }, { "epoch": 9.823627287853578, "grad_norm": 22.895492553710938, "learning_rate": 5e-06, "loss": 0.3948, "num_input_tokens_seen": 185074480, "step": 2952 }, { "epoch": 9.823627287853578, "loss": 0.3174755275249481, "loss_ce": 1.1557795005501248e-06, "loss_iou": 0.1181640625, "loss_num": 0.016357421875, "loss_xval": 0.318359375, "num_input_tokens_seen": 185074480, "step": 2952 }, { "epoch": 9.826955074875208, "grad_norm": 19.94285011291504, "learning_rate": 5e-06, "loss": 0.609, "num_input_tokens_seen": 185137884, "step": 2953 }, { "epoch": 9.826955074875208, "loss": 0.8048732280731201, "loss_ce": 2.606492216727929e-06, "loss_iou": 0.306640625, "loss_num": 0.03857421875, "loss_xval": 0.8046875, "num_input_tokens_seen": 185137884, "step": 2953 }, { "epoch": 9.830282861896839, "grad_norm": 18.113679885864258, "learning_rate": 5e-06, "loss": 0.5319, "num_input_tokens_seen": 185199120, "step": 2954 }, { "epoch": 9.830282861896839, "loss": 0.49911385774612427, "loss_ce": 0.00021249095152597874, "loss_iou": 0.1875, "loss_num": 0.0247802734375, "loss_xval": 0.498046875, "num_input_tokens_seen": 185199120, "step": 2954 }, { "epoch": 9.83361064891847, "grad_norm": 21.8562068939209, "learning_rate": 5e-06, "loss": 0.5532, "num_input_tokens_seen": 185260432, "step": 2955 }, { "epoch": 9.83361064891847, "loss": 0.4795268774032593, "loss_ce": 0.0005229542148299515, "loss_iou": 0.1376953125, "loss_num": 0.040771484375, "loss_xval": 0.478515625, "num_input_tokens_seen": 185260432, "step": 2955 }, { "epoch": 9.8369384359401, "grad_norm": 8.37660026550293, "learning_rate": 5e-06, "loss": 0.5321, "num_input_tokens_seen": 185321944, "step": 2956 }, { "epoch": 9.8369384359401, "loss": 0.6521629095077515, "loss_ce": 2.2415692910726648e-06, "loss_iou": 0.2451171875, "loss_num": 0.032470703125, "loss_xval": 0.65234375, "num_input_tokens_seen": 185321944, "step": 2956 }, { "epoch": 9.84026622296173, "grad_norm": 24.36867904663086, "learning_rate": 5e-06, "loss": 0.4546, "num_input_tokens_seen": 185383804, "step": 2957 }, { "epoch": 9.84026622296173, "loss": 0.4386008083820343, "loss_ce": 2.199285063397838e-06, "loss_iou": 0.1494140625, "loss_num": 0.028076171875, "loss_xval": 0.439453125, "num_input_tokens_seen": 185383804, "step": 2957 }, { "epoch": 9.843594009983361, "grad_norm": 21.701900482177734, "learning_rate": 5e-06, "loss": 0.4497, "num_input_tokens_seen": 185445216, "step": 2958 }, { "epoch": 9.843594009983361, "loss": 0.5024458169937134, "loss_ce": 4.45495015810593e-06, "loss_iou": 0.1494140625, "loss_num": 0.04052734375, "loss_xval": 0.50390625, "num_input_tokens_seen": 185445216, "step": 2958 }, { "epoch": 9.846921797004992, "grad_norm": 10.770414352416992, "learning_rate": 5e-06, "loss": 0.6184, "num_input_tokens_seen": 185507996, "step": 2959 }, { "epoch": 9.846921797004992, "loss": 0.7926148176193237, "loss_ce": 1.2229816093167756e-05, "loss_iou": 0.25, "loss_num": 0.058349609375, "loss_xval": 0.79296875, "num_input_tokens_seen": 185507996, "step": 2959 }, { "epoch": 9.850249584026622, "grad_norm": 11.392953872680664, "learning_rate": 5e-06, "loss": 0.6254, "num_input_tokens_seen": 185570800, "step": 2960 }, { "epoch": 9.850249584026622, "loss": 0.3653448224067688, "loss_ce": 0.0005071785999462008, "loss_iou": 0.12158203125, "loss_num": 0.0244140625, "loss_xval": 0.365234375, "num_input_tokens_seen": 185570800, "step": 2960 }, { "epoch": 9.853577371048253, "grad_norm": 23.492708206176758, "learning_rate": 5e-06, "loss": 0.4777, "num_input_tokens_seen": 185633248, "step": 2961 }, { "epoch": 9.853577371048253, "loss": 0.6120617389678955, "loss_ce": 0.0007183490670286119, "loss_iou": 0.2158203125, "loss_num": 0.035888671875, "loss_xval": 0.61328125, "num_input_tokens_seen": 185633248, "step": 2961 }, { "epoch": 9.856905158069884, "grad_norm": 30.233816146850586, "learning_rate": 5e-06, "loss": 0.3369, "num_input_tokens_seen": 185695904, "step": 2962 }, { "epoch": 9.856905158069884, "loss": 0.35925430059432983, "loss_ce": 1.3526805560104549e-06, "loss_iou": 0.1328125, "loss_num": 0.0186767578125, "loss_xval": 0.359375, "num_input_tokens_seen": 185695904, "step": 2962 }, { "epoch": 9.860232945091514, "grad_norm": 27.25787925720215, "learning_rate": 5e-06, "loss": 0.7145, "num_input_tokens_seen": 185759112, "step": 2963 }, { "epoch": 9.860232945091514, "loss": 0.6496385335922241, "loss_ce": 0.00040764789446257055, "loss_iou": 0.2216796875, "loss_num": 0.041259765625, "loss_xval": 0.6484375, "num_input_tokens_seen": 185759112, "step": 2963 }, { "epoch": 9.863560732113145, "grad_norm": 15.232561111450195, "learning_rate": 5e-06, "loss": 0.7525, "num_input_tokens_seen": 185823552, "step": 2964 }, { "epoch": 9.863560732113145, "loss": 0.7345577478408813, "loss_ce": 0.0006710804300382733, "loss_iou": 0.2734375, "loss_num": 0.037353515625, "loss_xval": 0.734375, "num_input_tokens_seen": 185823552, "step": 2964 }, { "epoch": 9.866888519134775, "grad_norm": 10.656804084777832, "learning_rate": 5e-06, "loss": 0.4664, "num_input_tokens_seen": 185886768, "step": 2965 }, { "epoch": 9.866888519134775, "loss": 0.4633829593658447, "loss_ce": 4.064297172590159e-06, "loss_iou": 0.1669921875, "loss_num": 0.026123046875, "loss_xval": 0.462890625, "num_input_tokens_seen": 185886768, "step": 2965 }, { "epoch": 9.870216306156406, "grad_norm": 17.09125328063965, "learning_rate": 5e-06, "loss": 0.5879, "num_input_tokens_seen": 185947556, "step": 2966 }, { "epoch": 9.870216306156406, "loss": 0.5537671446800232, "loss_ce": 0.00011720253678504378, "loss_iou": 0.19921875, "loss_num": 0.03125, "loss_xval": 0.5546875, "num_input_tokens_seen": 185947556, "step": 2966 }, { "epoch": 9.873544093178037, "grad_norm": 17.664875030517578, "learning_rate": 5e-06, "loss": 0.5581, "num_input_tokens_seen": 186009144, "step": 2967 }, { "epoch": 9.873544093178037, "loss": 0.7634984254837036, "loss_ce": 0.00031478816526941955, "loss_iou": 0.267578125, "loss_num": 0.0458984375, "loss_xval": 0.76171875, "num_input_tokens_seen": 186009144, "step": 2967 }, { "epoch": 9.876871880199667, "grad_norm": 12.547320365905762, "learning_rate": 5e-06, "loss": 0.4809, "num_input_tokens_seen": 186070604, "step": 2968 }, { "epoch": 9.876871880199667, "loss": 0.4118204712867737, "loss_ce": 1.6269259504042566e-05, "loss_iou": 0.1494140625, "loss_num": 0.022705078125, "loss_xval": 0.412109375, "num_input_tokens_seen": 186070604, "step": 2968 }, { "epoch": 9.880199667221298, "grad_norm": 20.54847526550293, "learning_rate": 5e-06, "loss": 0.7324, "num_input_tokens_seen": 186133792, "step": 2969 }, { "epoch": 9.880199667221298, "loss": 0.8450015783309937, "loss_ce": 0.00015295481716748327, "loss_iou": 0.298828125, "loss_num": 0.049560546875, "loss_xval": 0.84375, "num_input_tokens_seen": 186133792, "step": 2969 }, { "epoch": 9.883527454242929, "grad_norm": 25.140666961669922, "learning_rate": 5e-06, "loss": 0.5339, "num_input_tokens_seen": 186195792, "step": 2970 }, { "epoch": 9.883527454242929, "loss": 0.6265375018119812, "loss_ce": 1.1630965673248284e-05, "loss_iou": 0.232421875, "loss_num": 0.032470703125, "loss_xval": 0.625, "num_input_tokens_seen": 186195792, "step": 2970 }, { "epoch": 9.88685524126456, "grad_norm": 7.222326755523682, "learning_rate": 5e-06, "loss": 0.6697, "num_input_tokens_seen": 186258380, "step": 2971 }, { "epoch": 9.88685524126456, "loss": 0.8968218564987183, "loss_ce": 0.00015437515685334802, "loss_iou": 0.326171875, "loss_num": 0.048583984375, "loss_xval": 0.8984375, "num_input_tokens_seen": 186258380, "step": 2971 }, { "epoch": 9.89018302828619, "grad_norm": 20.897594451904297, "learning_rate": 5e-06, "loss": 0.5948, "num_input_tokens_seen": 186321540, "step": 2972 }, { "epoch": 9.89018302828619, "loss": 0.5382413268089294, "loss_ce": 2.842073627107311e-06, "loss_iou": 0.2060546875, "loss_num": 0.0255126953125, "loss_xval": 0.5390625, "num_input_tokens_seen": 186321540, "step": 2972 }, { "epoch": 9.89351081530782, "grad_norm": 11.164376258850098, "learning_rate": 5e-06, "loss": 0.484, "num_input_tokens_seen": 186381864, "step": 2973 }, { "epoch": 9.89351081530782, "loss": 0.5809335708618164, "loss_ce": 9.929600537361694e-07, "loss_iou": 0.208984375, "loss_num": 0.032470703125, "loss_xval": 0.58203125, "num_input_tokens_seen": 186381864, "step": 2973 }, { "epoch": 9.896838602329451, "grad_norm": 10.127054214477539, "learning_rate": 5e-06, "loss": 0.517, "num_input_tokens_seen": 186444584, "step": 2974 }, { "epoch": 9.896838602329451, "loss": 0.5854883193969727, "loss_ce": 0.0006342055276036263, "loss_iou": 0.1884765625, "loss_num": 0.041259765625, "loss_xval": 0.5859375, "num_input_tokens_seen": 186444584, "step": 2974 }, { "epoch": 9.900166389351082, "grad_norm": 7.6226277351379395, "learning_rate": 5e-06, "loss": 0.5166, "num_input_tokens_seen": 186507108, "step": 2975 }, { "epoch": 9.900166389351082, "loss": 0.505378007888794, "loss_ce": 0.00014422145613934845, "loss_iou": 0.1611328125, "loss_num": 0.036376953125, "loss_xval": 0.50390625, "num_input_tokens_seen": 186507108, "step": 2975 }, { "epoch": 9.903494176372712, "grad_norm": 9.699047088623047, "learning_rate": 5e-06, "loss": 0.4695, "num_input_tokens_seen": 186569748, "step": 2976 }, { "epoch": 9.903494176372712, "loss": 0.3668079376220703, "loss_ce": 1.9091828562523006e-06, "loss_iou": 0.134765625, "loss_num": 0.01953125, "loss_xval": 0.3671875, "num_input_tokens_seen": 186569748, "step": 2976 }, { "epoch": 9.906821963394343, "grad_norm": 5.780817985534668, "learning_rate": 5e-06, "loss": 0.3488, "num_input_tokens_seen": 186630908, "step": 2977 }, { "epoch": 9.906821963394343, "loss": 0.41220828890800476, "loss_ce": 9.891945956042036e-05, "loss_iou": 0.146484375, "loss_num": 0.02392578125, "loss_xval": 0.412109375, "num_input_tokens_seen": 186630908, "step": 2977 }, { "epoch": 9.910149750415973, "grad_norm": 25.51630210876465, "learning_rate": 5e-06, "loss": 0.3194, "num_input_tokens_seen": 186693884, "step": 2978 }, { "epoch": 9.910149750415973, "loss": 0.3703857660293579, "loss_ce": 2.4441334971925244e-05, "loss_iou": 0.1201171875, "loss_num": 0.0260009765625, "loss_xval": 0.37109375, "num_input_tokens_seen": 186693884, "step": 2978 }, { "epoch": 9.913477537437604, "grad_norm": 22.48125648498535, "learning_rate": 5e-06, "loss": 0.6152, "num_input_tokens_seen": 186757060, "step": 2979 }, { "epoch": 9.913477537437604, "loss": 0.7153621912002563, "loss_ce": 0.0006404991145245731, "loss_iou": 0.259765625, "loss_num": 0.0390625, "loss_xval": 0.71484375, "num_input_tokens_seen": 186757060, "step": 2979 }, { "epoch": 9.916805324459235, "grad_norm": 18.029312133789062, "learning_rate": 5e-06, "loss": 0.6236, "num_input_tokens_seen": 186821292, "step": 2980 }, { "epoch": 9.916805324459235, "loss": 0.5619189739227295, "loss_ce": 2.934921212727204e-05, "loss_iou": 0.2490234375, "loss_num": 0.0128173828125, "loss_xval": 0.5625, "num_input_tokens_seen": 186821292, "step": 2980 }, { "epoch": 9.920133111480865, "grad_norm": 23.220338821411133, "learning_rate": 5e-06, "loss": 0.3737, "num_input_tokens_seen": 186884212, "step": 2981 }, { "epoch": 9.920133111480865, "loss": 0.4064289629459381, "loss_ce": 0.00036208020173944533, "loss_iou": 0.1572265625, "loss_num": 0.0185546875, "loss_xval": 0.40625, "num_input_tokens_seen": 186884212, "step": 2981 }, { "epoch": 9.923460898502496, "grad_norm": 94.67524719238281, "learning_rate": 5e-06, "loss": 0.6576, "num_input_tokens_seen": 186946928, "step": 2982 }, { "epoch": 9.923460898502496, "loss": 0.7172431349754333, "loss_ce": 8.006451389519498e-05, "loss_iou": 0.263671875, "loss_num": 0.037841796875, "loss_xval": 0.71875, "num_input_tokens_seen": 186946928, "step": 2982 }, { "epoch": 9.926788685524127, "grad_norm": 8.6438570022583, "learning_rate": 5e-06, "loss": 0.3155, "num_input_tokens_seen": 187009164, "step": 2983 }, { "epoch": 9.926788685524127, "loss": 0.17397019267082214, "loss_ce": 4.731091394205578e-06, "loss_iou": 0.0537109375, "loss_num": 0.01336669921875, "loss_xval": 0.173828125, "num_input_tokens_seen": 187009164, "step": 2983 }, { "epoch": 9.930116472545757, "grad_norm": 10.318918228149414, "learning_rate": 5e-06, "loss": 0.4179, "num_input_tokens_seen": 187072052, "step": 2984 }, { "epoch": 9.930116472545757, "loss": 0.3106231689453125, "loss_ce": 1.524603612779174e-05, "loss_iou": 0.11328125, "loss_num": 0.0167236328125, "loss_xval": 0.310546875, "num_input_tokens_seen": 187072052, "step": 2984 }, { "epoch": 9.933444259567388, "grad_norm": 14.053253173828125, "learning_rate": 5e-06, "loss": 0.4104, "num_input_tokens_seen": 187133740, "step": 2985 }, { "epoch": 9.933444259567388, "loss": 0.3394802510738373, "loss_ce": 2.695783223316539e-06, "loss_iou": 0.12353515625, "loss_num": 0.0184326171875, "loss_xval": 0.33984375, "num_input_tokens_seen": 187133740, "step": 2985 }, { "epoch": 9.936772046589018, "grad_norm": 8.857370376586914, "learning_rate": 5e-06, "loss": 0.5852, "num_input_tokens_seen": 187196832, "step": 2986 }, { "epoch": 9.936772046589018, "loss": 0.3771408796310425, "loss_ce": 4.639761300495593e-06, "loss_iou": 0.15625, "loss_num": 0.0126953125, "loss_xval": 0.376953125, "num_input_tokens_seen": 187196832, "step": 2986 }, { "epoch": 9.940099833610649, "grad_norm": 7.722167491912842, "learning_rate": 5e-06, "loss": 0.4955, "num_input_tokens_seen": 187258412, "step": 2987 }, { "epoch": 9.940099833610649, "loss": 0.48202943801879883, "loss_ce": 1.9546418116078712e-05, "loss_iou": 0.185546875, "loss_num": 0.02197265625, "loss_xval": 0.482421875, "num_input_tokens_seen": 187258412, "step": 2987 }, { "epoch": 9.94342762063228, "grad_norm": 10.538965225219727, "learning_rate": 5e-06, "loss": 0.4354, "num_input_tokens_seen": 187322048, "step": 2988 }, { "epoch": 9.94342762063228, "loss": 0.46878063678741455, "loss_ce": 0.00039687997195869684, "loss_iou": 0.1962890625, "loss_num": 0.01519775390625, "loss_xval": 0.46875, "num_input_tokens_seen": 187322048, "step": 2988 }, { "epoch": 9.94675540765391, "grad_norm": 11.949478149414062, "learning_rate": 5e-06, "loss": 0.559, "num_input_tokens_seen": 187384916, "step": 2989 }, { "epoch": 9.94675540765391, "loss": 0.4809683859348297, "loss_ce": 0.00013342870806809515, "loss_iou": 0.1796875, "loss_num": 0.0242919921875, "loss_xval": 0.48046875, "num_input_tokens_seen": 187384916, "step": 2989 }, { "epoch": 9.95008319467554, "grad_norm": 12.851706504821777, "learning_rate": 5e-06, "loss": 0.5684, "num_input_tokens_seen": 187448560, "step": 2990 }, { "epoch": 9.95008319467554, "loss": 0.5823075771331787, "loss_ce": 1.6918929759413004e-06, "loss_iou": 0.2255859375, "loss_num": 0.026123046875, "loss_xval": 0.58203125, "num_input_tokens_seen": 187448560, "step": 2990 }, { "epoch": 9.953410981697171, "grad_norm": 10.396065711975098, "learning_rate": 5e-06, "loss": 0.4766, "num_input_tokens_seen": 187510624, "step": 2991 }, { "epoch": 9.953410981697171, "loss": 0.6372580528259277, "loss_ce": 0.0002951490751001984, "loss_iou": 0.2353515625, "loss_num": 0.033447265625, "loss_xval": 0.63671875, "num_input_tokens_seen": 187510624, "step": 2991 }, { "epoch": 9.956738768718802, "grad_norm": 9.510651588439941, "learning_rate": 5e-06, "loss": 0.4408, "num_input_tokens_seen": 187572164, "step": 2992 }, { "epoch": 9.956738768718802, "loss": 0.3732092082500458, "loss_ce": 4.0275022911373526e-05, "loss_iou": 0.130859375, "loss_num": 0.02197265625, "loss_xval": 0.373046875, "num_input_tokens_seen": 187572164, "step": 2992 }, { "epoch": 9.960066555740433, "grad_norm": 20.076208114624023, "learning_rate": 5e-06, "loss": 0.541, "num_input_tokens_seen": 187635272, "step": 2993 }, { "epoch": 9.960066555740433, "loss": 0.4024675786495209, "loss_ce": 1.7401100649294676e-06, "loss_iou": 0.1689453125, "loss_num": 0.01263427734375, "loss_xval": 0.40234375, "num_input_tokens_seen": 187635272, "step": 2993 }, { "epoch": 9.963394342762063, "grad_norm": 35.81378173828125, "learning_rate": 5e-06, "loss": 0.7217, "num_input_tokens_seen": 187698796, "step": 2994 }, { "epoch": 9.963394342762063, "loss": 0.7675830125808716, "loss_ce": 4.944665306538809e-06, "loss_iou": 0.255859375, "loss_num": 0.051025390625, "loss_xval": 0.765625, "num_input_tokens_seen": 187698796, "step": 2994 }, { "epoch": 9.966722129783694, "grad_norm": 21.198890686035156, "learning_rate": 5e-06, "loss": 0.4449, "num_input_tokens_seen": 187760952, "step": 2995 }, { "epoch": 9.966722129783694, "loss": 0.4654337763786316, "loss_ce": 1.018742477754131e-05, "loss_iou": 0.1708984375, "loss_num": 0.024658203125, "loss_xval": 0.46484375, "num_input_tokens_seen": 187760952, "step": 2995 }, { "epoch": 9.970049916805324, "grad_norm": 13.340892791748047, "learning_rate": 5e-06, "loss": 0.5066, "num_input_tokens_seen": 187823388, "step": 2996 }, { "epoch": 9.970049916805324, "loss": 0.4313995838165283, "loss_ce": 3.110358193225693e-06, "loss_iou": 0.1728515625, "loss_num": 0.0169677734375, "loss_xval": 0.431640625, "num_input_tokens_seen": 187823388, "step": 2996 }, { "epoch": 9.973377703826955, "grad_norm": 17.230812072753906, "learning_rate": 5e-06, "loss": 0.7108, "num_input_tokens_seen": 187887332, "step": 2997 }, { "epoch": 9.973377703826955, "loss": 0.7832648754119873, "loss_ce": 0.0006720570381730795, "loss_iou": 0.27734375, "loss_num": 0.04541015625, "loss_xval": 0.78125, "num_input_tokens_seen": 187887332, "step": 2997 }, { "epoch": 9.976705490848586, "grad_norm": 9.343647956848145, "learning_rate": 5e-06, "loss": 0.4911, "num_input_tokens_seen": 187948964, "step": 2998 }, { "epoch": 9.976705490848586, "loss": 0.6791399717330933, "loss_ce": 1.761499333952088e-06, "loss_iou": 0.2392578125, "loss_num": 0.0400390625, "loss_xval": 0.6796875, "num_input_tokens_seen": 187948964, "step": 2998 }, { "epoch": 9.980033277870216, "grad_norm": 19.095632553100586, "learning_rate": 5e-06, "loss": 0.7061, "num_input_tokens_seen": 188010760, "step": 2999 }, { "epoch": 9.980033277870216, "loss": 0.9483574628829956, "loss_ce": 0.0013360142474994063, "loss_iou": 0.375, "loss_num": 0.0400390625, "loss_xval": 0.9453125, "num_input_tokens_seen": 188010760, "step": 2999 }, { "epoch": 9.983361064891847, "grad_norm": 35.92034912109375, "learning_rate": 5e-06, "loss": 0.4326, "num_input_tokens_seen": 188070872, "step": 3000 }, { "epoch": 9.983361064891847, "eval_seeclick_CIoU": 0.059540221467614174, "eval_seeclick_GIoU": 0.06474006548523903, "eval_seeclick_IoU": 0.17442839592695236, "eval_seeclick_MAE_all": 0.16842354834079742, "eval_seeclick_MAE_h": 0.05648810602724552, "eval_seeclick_MAE_w": 0.1372782401740551, "eval_seeclick_MAE_x_boxes": 0.20087267458438873, "eval_seeclick_MAE_y_boxes": 0.17900529503822327, "eval_seeclick_NUM_probability": 0.9999480545520782, "eval_seeclick_inside_bbox": 0.22500000149011612, "eval_seeclick_loss": 2.8763561248779297, "eval_seeclick_loss_ce": 0.15939994156360626, "eval_seeclick_loss_iou": 0.9462890625, "eval_seeclick_loss_num": 0.17037200927734375, "eval_seeclick_loss_xval": 2.74267578125, "eval_seeclick_runtime": 74.6392, "eval_seeclick_samples_per_second": 0.63, "eval_seeclick_steps_per_second": 0.027, "num_input_tokens_seen": 188070872, "step": 3000 }, { "epoch": 9.983361064891847, "eval_icons_CIoU": -0.0347603764384985, "eval_icons_GIoU": 0.06389028578996658, "eval_icons_IoU": 0.13180043548345566, "eval_icons_MAE_all": 0.17505701631307602, "eval_icons_MAE_h": 0.1184907928109169, "eval_icons_MAE_w": 0.18472349643707275, "eval_icons_MAE_x_boxes": 0.13223521783947945, "eval_icons_MAE_y_boxes": 0.09800738841295242, "eval_icons_NUM_probability": 0.9999829232692719, "eval_icons_inside_bbox": 0.2916666716337204, "eval_icons_loss": 2.7200021743774414, "eval_icons_loss_ce": 2.4341628659385606e-06, "eval_icons_loss_iou": 0.930419921875, "eval_icons_loss_num": 0.173248291015625, "eval_icons_loss_xval": 2.728515625, "eval_icons_runtime": 79.4167, "eval_icons_samples_per_second": 0.63, "eval_icons_steps_per_second": 0.025, "num_input_tokens_seen": 188070872, "step": 3000 }, { "epoch": 9.983361064891847, "eval_screenspot_CIoU": 0.17780398080746332, "eval_screenspot_GIoU": 0.21460233628749847, "eval_screenspot_IoU": 0.2911508282025655, "eval_screenspot_MAE_all": 0.12231641262769699, "eval_screenspot_MAE_h": 0.07238687202334404, "eval_screenspot_MAE_w": 0.09640180319547653, "eval_screenspot_MAE_x_boxes": 0.1741406818230947, "eval_screenspot_MAE_y_boxes": 0.08766034493843715, "eval_screenspot_NUM_probability": 0.9999868075052897, "eval_screenspot_inside_bbox": 0.5049999952316284, "eval_screenspot_loss": 2.2178843021392822, "eval_screenspot_loss_ce": 1.1339401529160872e-05, "eval_screenspot_loss_iou": 0.7994791666666666, "eval_screenspot_loss_num": 0.13478597005208334, "eval_screenspot_loss_xval": 2.2724609375, "eval_screenspot_runtime": 140.3736, "eval_screenspot_samples_per_second": 0.634, "eval_screenspot_steps_per_second": 0.021, "num_input_tokens_seen": 188070872, "step": 3000 }, { "epoch": 9.983361064891847, "eval_compot_CIoU": 0.039684439077973366, "eval_compot_GIoU": 0.09787866845726967, "eval_compot_IoU": 0.19241078943014145, "eval_compot_MAE_all": 0.17171455174684525, "eval_compot_MAE_h": 0.07504569459706545, "eval_compot_MAE_w": 0.17828496545553207, "eval_compot_MAE_x_boxes": 0.16543401777744293, "eval_compot_MAE_y_boxes": 0.13084300979971886, "eval_compot_NUM_probability": 0.9999890923500061, "eval_compot_inside_bbox": 0.3229166716337204, "eval_compot_loss": 2.656287908554077, "eval_compot_loss_ce": 0.0020744651556015015, "eval_compot_loss_iou": 0.9189453125, "eval_compot_loss_num": 0.176727294921875, "eval_compot_loss_xval": 2.72021484375, "eval_compot_runtime": 90.7233, "eval_compot_samples_per_second": 0.551, "eval_compot_steps_per_second": 0.022, "num_input_tokens_seen": 188070872, "step": 3000 }, { "epoch": 9.983361064891847, "eval_custom_ui_MAE_all": 0.06391769461333752, "eval_custom_ui_MAE_x": 0.07262291014194489, "eval_custom_ui_MAE_y": 0.05521249212324619, "eval_custom_ui_NUM_probability": 0.9999982118606567, "eval_custom_ui_loss": 0.2948858141899109, "eval_custom_ui_loss_ce": 2.6010940246123937e-06, "eval_custom_ui_loss_num": 0.0615081787109375, "eval_custom_ui_loss_xval": 0.307464599609375, "eval_custom_ui_runtime": 69.5466, "eval_custom_ui_samples_per_second": 0.719, "eval_custom_ui_steps_per_second": 0.029, "num_input_tokens_seen": 188070872, "step": 3000 }, { "epoch": 9.983361064891847, "loss": 0.3301408886909485, "loss_ce": 1.7051454506145092e-06, "loss_iou": 0.0, "loss_num": 0.06591796875, "loss_xval": 0.330078125, "num_input_tokens_seen": 188070872, "step": 3000 }, { "epoch": 9.986688851913478, "grad_norm": 40.65644454956055, "learning_rate": 5e-06, "loss": 0.6914, "num_input_tokens_seen": 188135068, "step": 3001 }, { "epoch": 9.986688851913478, "loss": 0.6115773916244507, "loss_ce": 5.134588718647137e-06, "loss_iou": 0.2578125, "loss_num": 0.0194091796875, "loss_xval": 0.61328125, "num_input_tokens_seen": 188135068, "step": 3001 }, { "epoch": 9.990016638935108, "grad_norm": 53.838687896728516, "learning_rate": 5e-06, "loss": 0.8539, "num_input_tokens_seen": 188199000, "step": 3002 }, { "epoch": 9.990016638935108, "loss": 0.8233672380447388, "loss_ce": 2.9494638056348776e-06, "loss_iou": 0.2890625, "loss_num": 0.049072265625, "loss_xval": 0.82421875, "num_input_tokens_seen": 188199000, "step": 3002 }, { "epoch": 9.993344425956739, "grad_norm": 20.16024398803711, "learning_rate": 5e-06, "loss": 0.5723, "num_input_tokens_seen": 188261612, "step": 3003 }, { "epoch": 9.993344425956739, "loss": 0.5471505522727966, "loss_ce": 9.080844165509916e-07, "loss_iou": 0.1884765625, "loss_num": 0.033935546875, "loss_xval": 0.546875, "num_input_tokens_seen": 188261612, "step": 3003 }, { "epoch": 9.99667221297837, "grad_norm": 15.831808090209961, "learning_rate": 5e-06, "loss": 0.5267, "num_input_tokens_seen": 188325528, "step": 3004 }, { "epoch": 9.99667221297837, "loss": 0.700700044631958, "loss_ce": 0.00026064683333970606, "loss_iou": 0.306640625, "loss_num": 0.0172119140625, "loss_xval": 0.69921875, "num_input_tokens_seen": 188325528, "step": 3004 }, { "epoch": 10.0, "grad_norm": 9.404145240783691, "learning_rate": 5e-06, "loss": 0.3537, "num_input_tokens_seen": 188387628, "step": 3005 }, { "epoch": 10.0, "loss": 0.4381360411643982, "loss_ce": 2.5682262275950052e-05, "loss_iou": 0.1708984375, "loss_num": 0.0191650390625, "loss_xval": 0.4375, "num_input_tokens_seen": 188387628, "step": 3005 }, { "epoch": 10.00332778702163, "grad_norm": 10.982834815979004, "learning_rate": 5e-06, "loss": 0.7208, "num_input_tokens_seen": 188451080, "step": 3006 }, { "epoch": 10.00332778702163, "loss": 0.6706142425537109, "loss_ce": 8.206519851228222e-05, "loss_iou": 0.26171875, "loss_num": 0.029296875, "loss_xval": 0.671875, "num_input_tokens_seen": 188451080, "step": 3006 }, { "epoch": 10.006655574043261, "grad_norm": 7.176749229431152, "learning_rate": 5e-06, "loss": 0.429, "num_input_tokens_seen": 188514448, "step": 3007 }, { "epoch": 10.006655574043261, "loss": 0.34011411666870117, "loss_ce": 0.00014829964493401349, "loss_iou": 0.125, "loss_num": 0.0179443359375, "loss_xval": 0.33984375, "num_input_tokens_seen": 188514448, "step": 3007 }, { "epoch": 10.009983361064892, "grad_norm": 9.905712127685547, "learning_rate": 5e-06, "loss": 0.4083, "num_input_tokens_seen": 188576168, "step": 3008 }, { "epoch": 10.009983361064892, "loss": 0.5211450457572937, "loss_ce": 0.0005151022924110293, "loss_iou": 0.1806640625, "loss_num": 0.03173828125, "loss_xval": 0.51953125, "num_input_tokens_seen": 188576168, "step": 3008 }, { "epoch": 10.013311148086522, "grad_norm": 10.175334930419922, "learning_rate": 5e-06, "loss": 0.522, "num_input_tokens_seen": 188639812, "step": 3009 }, { "epoch": 10.013311148086522, "loss": 0.5417640209197998, "loss_ce": 1.5957270079525188e-05, "loss_iou": 0.19921875, "loss_num": 0.02880859375, "loss_xval": 0.54296875, "num_input_tokens_seen": 188639812, "step": 3009 }, { "epoch": 10.016638935108153, "grad_norm": 10.259011268615723, "learning_rate": 5e-06, "loss": 0.6119, "num_input_tokens_seen": 188702848, "step": 3010 }, { "epoch": 10.016638935108153, "loss": 0.598767101764679, "loss_ce": 1.2256194168003276e-05, "loss_iou": 0.224609375, "loss_num": 0.0299072265625, "loss_xval": 0.59765625, "num_input_tokens_seen": 188702848, "step": 3010 }, { "epoch": 10.019966722129784, "grad_norm": 6.279110908508301, "learning_rate": 5e-06, "loss": 0.2726, "num_input_tokens_seen": 188763228, "step": 3011 }, { "epoch": 10.019966722129784, "loss": 0.2634589374065399, "loss_ce": 6.819274176450563e-07, "loss_iou": 0.0, "loss_num": 0.052734375, "loss_xval": 0.263671875, "num_input_tokens_seen": 188763228, "step": 3011 }, { "epoch": 10.023294509151414, "grad_norm": 10.614663124084473, "learning_rate": 5e-06, "loss": 0.4853, "num_input_tokens_seen": 188826504, "step": 3012 }, { "epoch": 10.023294509151414, "loss": 0.551394522190094, "loss_ce": 2.949188456113916e-06, "loss_iou": 0.2197265625, "loss_num": 0.0224609375, "loss_xval": 0.55078125, "num_input_tokens_seen": 188826504, "step": 3012 }, { "epoch": 10.026622296173045, "grad_norm": 13.265521049499512, "learning_rate": 5e-06, "loss": 0.6662, "num_input_tokens_seen": 188890620, "step": 3013 }, { "epoch": 10.026622296173045, "loss": 0.5429857969284058, "loss_ce": 0.00020019143994431943, "loss_iou": 0.1962890625, "loss_num": 0.030029296875, "loss_xval": 0.54296875, "num_input_tokens_seen": 188890620, "step": 3013 }, { "epoch": 10.029950083194676, "grad_norm": 14.208629608154297, "learning_rate": 5e-06, "loss": 0.6541, "num_input_tokens_seen": 188954308, "step": 3014 }, { "epoch": 10.029950083194676, "loss": 0.6669336557388306, "loss_ce": 2.5172653295157943e-06, "loss_iou": 0.248046875, "loss_num": 0.0341796875, "loss_xval": 0.66796875, "num_input_tokens_seen": 188954308, "step": 3014 }, { "epoch": 10.033277870216306, "grad_norm": 17.447580337524414, "learning_rate": 5e-06, "loss": 0.3617, "num_input_tokens_seen": 189016572, "step": 3015 }, { "epoch": 10.033277870216306, "loss": 0.45203158259391785, "loss_ce": 5.199141924094874e-06, "loss_iou": 0.171875, "loss_num": 0.0218505859375, "loss_xval": 0.451171875, "num_input_tokens_seen": 189016572, "step": 3015 }, { "epoch": 10.036605657237937, "grad_norm": 7.900660514831543, "learning_rate": 5e-06, "loss": 0.5234, "num_input_tokens_seen": 189080904, "step": 3016 }, { "epoch": 10.036605657237937, "loss": 0.4187104105949402, "loss_ce": 9.255587428924628e-06, "loss_iou": 0.1650390625, "loss_num": 0.017822265625, "loss_xval": 0.41796875, "num_input_tokens_seen": 189080904, "step": 3016 }, { "epoch": 10.039933444259567, "grad_norm": 8.302372932434082, "learning_rate": 5e-06, "loss": 0.6312, "num_input_tokens_seen": 189141808, "step": 3017 }, { "epoch": 10.039933444259567, "loss": 0.7612323760986328, "loss_ce": 1.9046354964302736e-06, "loss_iou": 0.27734375, "loss_num": 0.041015625, "loss_xval": 0.76171875, "num_input_tokens_seen": 189141808, "step": 3017 }, { "epoch": 10.043261231281198, "grad_norm": 8.327303886413574, "learning_rate": 5e-06, "loss": 0.535, "num_input_tokens_seen": 189205804, "step": 3018 }, { "epoch": 10.043261231281198, "loss": 0.47005999088287354, "loss_ce": 8.92794705578126e-05, "loss_iou": 0.2001953125, "loss_num": 0.0140380859375, "loss_xval": 0.470703125, "num_input_tokens_seen": 189205804, "step": 3018 }, { "epoch": 10.046589018302829, "grad_norm": 7.597850322723389, "learning_rate": 5e-06, "loss": 0.5108, "num_input_tokens_seen": 189269776, "step": 3019 }, { "epoch": 10.046589018302829, "loss": 0.4864760935306549, "loss_ce": 2.5872610422084108e-05, "loss_iou": 0.140625, "loss_num": 0.041259765625, "loss_xval": 0.486328125, "num_input_tokens_seen": 189269776, "step": 3019 }, { "epoch": 10.04991680532446, "grad_norm": 10.545403480529785, "learning_rate": 5e-06, "loss": 0.4227, "num_input_tokens_seen": 189332696, "step": 3020 }, { "epoch": 10.04991680532446, "loss": 0.49829307198524475, "loss_ce": 2.0778766156581696e-06, "loss_iou": 0.2236328125, "loss_num": 0.01007080078125, "loss_xval": 0.498046875, "num_input_tokens_seen": 189332696, "step": 3020 }, { "epoch": 10.05324459234609, "grad_norm": 25.575084686279297, "learning_rate": 5e-06, "loss": 0.6434, "num_input_tokens_seen": 189395432, "step": 3021 }, { "epoch": 10.05324459234609, "loss": 0.4554736018180847, "loss_ce": 2.9259337679832242e-05, "loss_iou": 0.1787109375, "loss_num": 0.019775390625, "loss_xval": 0.455078125, "num_input_tokens_seen": 189395432, "step": 3021 }, { "epoch": 10.05657237936772, "grad_norm": 20.497854232788086, "learning_rate": 5e-06, "loss": 0.3678, "num_input_tokens_seen": 189458204, "step": 3022 }, { "epoch": 10.05657237936772, "loss": 0.37878894805908203, "loss_ce": 4.748573246615706e-06, "loss_iou": 0.154296875, "loss_num": 0.013916015625, "loss_xval": 0.37890625, "num_input_tokens_seen": 189458204, "step": 3022 }, { "epoch": 10.059900166389351, "grad_norm": 12.98408031463623, "learning_rate": 5e-06, "loss": 0.4049, "num_input_tokens_seen": 189521232, "step": 3023 }, { "epoch": 10.059900166389351, "loss": 0.31691911816596985, "loss_ce": 0.00039078487316146493, "loss_iou": 0.1259765625, "loss_num": 0.012939453125, "loss_xval": 0.31640625, "num_input_tokens_seen": 189521232, "step": 3023 }, { "epoch": 10.063227953410982, "grad_norm": 7.8746867179870605, "learning_rate": 5e-06, "loss": 0.5735, "num_input_tokens_seen": 189584044, "step": 3024 }, { "epoch": 10.063227953410982, "loss": 0.5365042686462402, "loss_ce": 5.216595127421897e-06, "loss_iou": 0.203125, "loss_num": 0.0260009765625, "loss_xval": 0.53515625, "num_input_tokens_seen": 189584044, "step": 3024 }, { "epoch": 10.066555740432612, "grad_norm": 22.620357513427734, "learning_rate": 5e-06, "loss": 0.6676, "num_input_tokens_seen": 189646168, "step": 3025 }, { "epoch": 10.066555740432612, "loss": 0.6565216183662415, "loss_ce": 2.7446232707006857e-05, "loss_iou": 0.2392578125, "loss_num": 0.035400390625, "loss_xval": 0.65625, "num_input_tokens_seen": 189646168, "step": 3025 }, { "epoch": 10.069883527454243, "grad_norm": 32.430763244628906, "learning_rate": 5e-06, "loss": 0.5995, "num_input_tokens_seen": 189709516, "step": 3026 }, { "epoch": 10.069883527454243, "loss": 0.646669328212738, "loss_ce": 0.0006732175243087113, "loss_iou": 0.267578125, "loss_num": 0.0225830078125, "loss_xval": 0.64453125, "num_input_tokens_seen": 189709516, "step": 3026 }, { "epoch": 10.073211314475873, "grad_norm": 19.34904670715332, "learning_rate": 5e-06, "loss": 0.5311, "num_input_tokens_seen": 189771756, "step": 3027 }, { "epoch": 10.073211314475873, "loss": 0.5482203364372253, "loss_ce": 0.0004908722476102412, "loss_iou": 0.1533203125, "loss_num": 0.04833984375, "loss_xval": 0.546875, "num_input_tokens_seen": 189771756, "step": 3027 }, { "epoch": 10.076539101497504, "grad_norm": 14.083971977233887, "learning_rate": 5e-06, "loss": 0.4123, "num_input_tokens_seen": 189834828, "step": 3028 }, { "epoch": 10.076539101497504, "loss": 0.3039546608924866, "loss_ce": 6.061364183551632e-05, "loss_iou": 0.12109375, "loss_num": 0.0123291015625, "loss_xval": 0.3046875, "num_input_tokens_seen": 189834828, "step": 3028 }, { "epoch": 10.079866888519135, "grad_norm": 11.137245178222656, "learning_rate": 5e-06, "loss": 0.5062, "num_input_tokens_seen": 189898444, "step": 3029 }, { "epoch": 10.079866888519135, "loss": 0.5593129396438599, "loss_ce": 0.001329529914073646, "loss_iou": 0.232421875, "loss_num": 0.0185546875, "loss_xval": 0.55859375, "num_input_tokens_seen": 189898444, "step": 3029 }, { "epoch": 10.083194675540765, "grad_norm": 17.324718475341797, "learning_rate": 5e-06, "loss": 0.5347, "num_input_tokens_seen": 189961160, "step": 3030 }, { "epoch": 10.083194675540765, "loss": 0.4331216812133789, "loss_ce": 9.69301026998437e-07, "loss_iou": 0.1494140625, "loss_num": 0.0269775390625, "loss_xval": 0.43359375, "num_input_tokens_seen": 189961160, "step": 3030 }, { "epoch": 10.086522462562396, "grad_norm": 18.3051815032959, "learning_rate": 5e-06, "loss": 0.6733, "num_input_tokens_seen": 190025272, "step": 3031 }, { "epoch": 10.086522462562396, "loss": 0.5705600380897522, "loss_ce": 3.4003985547315096e-06, "loss_iou": 0.228515625, "loss_num": 0.0228271484375, "loss_xval": 0.5703125, "num_input_tokens_seen": 190025272, "step": 3031 }, { "epoch": 10.089850249584027, "grad_norm": 10.145862579345703, "learning_rate": 5e-06, "loss": 0.5114, "num_input_tokens_seen": 190089084, "step": 3032 }, { "epoch": 10.089850249584027, "loss": 0.6036412715911865, "loss_ce": 3.5967414078186266e-06, "loss_iou": 0.2314453125, "loss_num": 0.0281982421875, "loss_xval": 0.60546875, "num_input_tokens_seen": 190089084, "step": 3032 }, { "epoch": 10.093178036605657, "grad_norm": 11.641491889953613, "learning_rate": 5e-06, "loss": 0.3448, "num_input_tokens_seen": 190150960, "step": 3033 }, { "epoch": 10.093178036605657, "loss": 0.3454905152320862, "loss_ce": 9.25664571695961e-05, "loss_iou": 0.1396484375, "loss_num": 0.01318359375, "loss_xval": 0.345703125, "num_input_tokens_seen": 190150960, "step": 3033 }, { "epoch": 10.096505823627288, "grad_norm": 10.1557035446167, "learning_rate": 5e-06, "loss": 0.5527, "num_input_tokens_seen": 190213752, "step": 3034 }, { "epoch": 10.096505823627288, "loss": 0.4658280611038208, "loss_ce": 7.748703865217976e-06, "loss_iou": 0.1875, "loss_num": 0.01806640625, "loss_xval": 0.46484375, "num_input_tokens_seen": 190213752, "step": 3034 }, { "epoch": 10.099833610648918, "grad_norm": 19.44452667236328, "learning_rate": 5e-06, "loss": 0.5249, "num_input_tokens_seen": 190276052, "step": 3035 }, { "epoch": 10.099833610648918, "loss": 0.5429807901382446, "loss_ce": 1.2047621567035094e-05, "loss_iou": 0.1806640625, "loss_num": 0.0361328125, "loss_xval": 0.54296875, "num_input_tokens_seen": 190276052, "step": 3035 }, { "epoch": 10.103161397670549, "grad_norm": 8.697257995605469, "learning_rate": 5e-06, "loss": 0.5537, "num_input_tokens_seen": 190338260, "step": 3036 }, { "epoch": 10.103161397670549, "loss": 0.43021589517593384, "loss_ce": 0.00016217224765568972, "loss_iou": 0.1298828125, "loss_num": 0.0341796875, "loss_xval": 0.4296875, "num_input_tokens_seen": 190338260, "step": 3036 }, { "epoch": 10.10648918469218, "grad_norm": 9.91154670715332, "learning_rate": 5e-06, "loss": 0.5312, "num_input_tokens_seen": 190400296, "step": 3037 }, { "epoch": 10.10648918469218, "loss": 0.5304298400878906, "loss_ce": 3.431052027735859e-05, "loss_iou": 0.21484375, "loss_num": 0.0203857421875, "loss_xval": 0.53125, "num_input_tokens_seen": 190400296, "step": 3037 }, { "epoch": 10.10981697171381, "grad_norm": 12.569117546081543, "learning_rate": 5e-06, "loss": 0.6394, "num_input_tokens_seen": 190463836, "step": 3038 }, { "epoch": 10.10981697171381, "loss": 0.715487003326416, "loss_ce": 0.0007653248612768948, "loss_iou": 0.2578125, "loss_num": 0.040283203125, "loss_xval": 0.71484375, "num_input_tokens_seen": 190463836, "step": 3038 }, { "epoch": 10.11314475873544, "grad_norm": 9.417847633361816, "learning_rate": 5e-06, "loss": 0.4183, "num_input_tokens_seen": 190526364, "step": 3039 }, { "epoch": 10.11314475873544, "loss": 0.3923393189907074, "loss_ce": 5.328005499904975e-06, "loss_iou": 0.134765625, "loss_num": 0.0245361328125, "loss_xval": 0.392578125, "num_input_tokens_seen": 190526364, "step": 3039 }, { "epoch": 10.116472545757071, "grad_norm": 6.080514907836914, "learning_rate": 5e-06, "loss": 0.2853, "num_input_tokens_seen": 190587532, "step": 3040 }, { "epoch": 10.116472545757071, "loss": 0.24054569005966187, "loss_ce": 6.145877705421299e-06, "loss_iou": 0.061279296875, "loss_num": 0.0235595703125, "loss_xval": 0.240234375, "num_input_tokens_seen": 190587532, "step": 3040 }, { "epoch": 10.119800332778702, "grad_norm": 10.045504570007324, "learning_rate": 5e-06, "loss": 0.4962, "num_input_tokens_seen": 190651088, "step": 3041 }, { "epoch": 10.119800332778702, "loss": 0.38061732053756714, "loss_ce": 2.081745378745836e-06, "loss_iou": 0.1123046875, "loss_num": 0.03125, "loss_xval": 0.380859375, "num_input_tokens_seen": 190651088, "step": 3041 }, { "epoch": 10.123128119800333, "grad_norm": 28.564218521118164, "learning_rate": 5e-06, "loss": 0.5924, "num_input_tokens_seen": 190714220, "step": 3042 }, { "epoch": 10.123128119800333, "loss": 0.6246444582939148, "loss_ce": 1.0683411346690264e-05, "loss_iou": 0.1923828125, "loss_num": 0.048095703125, "loss_xval": 0.625, "num_input_tokens_seen": 190714220, "step": 3042 }, { "epoch": 10.126455906821963, "grad_norm": 20.12108612060547, "learning_rate": 5e-06, "loss": 0.4384, "num_input_tokens_seen": 190776896, "step": 3043 }, { "epoch": 10.126455906821963, "loss": 0.436653733253479, "loss_ce": 8.220132258429658e-06, "loss_iou": 0.15625, "loss_num": 0.0250244140625, "loss_xval": 0.4375, "num_input_tokens_seen": 190776896, "step": 3043 }, { "epoch": 10.129783693843594, "grad_norm": 10.866451263427734, "learning_rate": 5e-06, "loss": 0.3711, "num_input_tokens_seen": 190839632, "step": 3044 }, { "epoch": 10.129783693843594, "loss": 0.26520782709121704, "loss_ce": 2.4345720248675207e-06, "loss_iou": 0.068359375, "loss_num": 0.0257568359375, "loss_xval": 0.265625, "num_input_tokens_seen": 190839632, "step": 3044 }, { "epoch": 10.133111480865225, "grad_norm": 15.497193336486816, "learning_rate": 5e-06, "loss": 0.4369, "num_input_tokens_seen": 190901356, "step": 3045 }, { "epoch": 10.133111480865225, "loss": 0.3753911554813385, "loss_ce": 2.4935565306805074e-05, "loss_iou": 0.12255859375, "loss_num": 0.0260009765625, "loss_xval": 0.375, "num_input_tokens_seen": 190901356, "step": 3045 }, { "epoch": 10.136439267886855, "grad_norm": 14.93649673461914, "learning_rate": 5e-06, "loss": 0.3421, "num_input_tokens_seen": 190964744, "step": 3046 }, { "epoch": 10.136439267886855, "loss": 0.2631550431251526, "loss_ce": 1.9539129425538704e-06, "loss_iou": 0.09814453125, "loss_num": 0.01336669921875, "loss_xval": 0.263671875, "num_input_tokens_seen": 190964744, "step": 3046 }, { "epoch": 10.139767054908486, "grad_norm": 16.877704620361328, "learning_rate": 5e-06, "loss": 0.6482, "num_input_tokens_seen": 191028968, "step": 3047 }, { "epoch": 10.139767054908486, "loss": 0.5950077772140503, "loss_ce": 3.706754432641901e-05, "loss_iou": 0.2255859375, "loss_num": 0.028564453125, "loss_xval": 0.59375, "num_input_tokens_seen": 191028968, "step": 3047 }, { "epoch": 10.143094841930116, "grad_norm": 20.004287719726562, "learning_rate": 5e-06, "loss": 0.4617, "num_input_tokens_seen": 191092156, "step": 3048 }, { "epoch": 10.143094841930116, "loss": 0.4247455894947052, "loss_ce": 0.0006122913910076022, "loss_iou": 0.1484375, "loss_num": 0.025390625, "loss_xval": 0.423828125, "num_input_tokens_seen": 191092156, "step": 3048 }, { "epoch": 10.146422628951747, "grad_norm": 25.07771873474121, "learning_rate": 5e-06, "loss": 0.4454, "num_input_tokens_seen": 191156124, "step": 3049 }, { "epoch": 10.146422628951747, "loss": 0.569155752658844, "loss_ce": 6.394248339347541e-05, "loss_iou": 0.2177734375, "loss_num": 0.0267333984375, "loss_xval": 0.5703125, "num_input_tokens_seen": 191156124, "step": 3049 }, { "epoch": 10.149750415973378, "grad_norm": 24.994394302368164, "learning_rate": 5e-06, "loss": 0.7121, "num_input_tokens_seen": 191220980, "step": 3050 }, { "epoch": 10.149750415973378, "loss": 0.629396378993988, "loss_ce": 1.8700096688917256e-06, "loss_iou": 0.259765625, "loss_num": 0.021728515625, "loss_xval": 0.62890625, "num_input_tokens_seen": 191220980, "step": 3050 }, { "epoch": 10.153078202995008, "grad_norm": 9.349181175231934, "learning_rate": 5e-06, "loss": 0.5115, "num_input_tokens_seen": 191284448, "step": 3051 }, { "epoch": 10.153078202995008, "loss": 0.4700396656990051, "loss_ce": 7.930599167593755e-06, "loss_iou": 0.1611328125, "loss_num": 0.02978515625, "loss_xval": 0.470703125, "num_input_tokens_seen": 191284448, "step": 3051 }, { "epoch": 10.156405990016639, "grad_norm": 14.521713256835938, "learning_rate": 5e-06, "loss": 0.4979, "num_input_tokens_seen": 191347032, "step": 3052 }, { "epoch": 10.156405990016639, "loss": 0.45709407329559326, "loss_ce": 1.7887315379994106e-06, "loss_iou": 0.1552734375, "loss_num": 0.0294189453125, "loss_xval": 0.45703125, "num_input_tokens_seen": 191347032, "step": 3052 }, { "epoch": 10.15973377703827, "grad_norm": 7.539127349853516, "learning_rate": 5e-06, "loss": 0.5543, "num_input_tokens_seen": 191409352, "step": 3053 }, { "epoch": 10.15973377703827, "loss": 0.47942155599594116, "loss_ce": 5.143308226251975e-05, "loss_iou": 0.205078125, "loss_num": 0.013671875, "loss_xval": 0.478515625, "num_input_tokens_seen": 191409352, "step": 3053 }, { "epoch": 10.1630615640599, "grad_norm": 7.065848350524902, "learning_rate": 5e-06, "loss": 0.5706, "num_input_tokens_seen": 191471980, "step": 3054 }, { "epoch": 10.1630615640599, "loss": 0.750413179397583, "loss_ce": 0.00016900789341889322, "loss_iou": 0.306640625, "loss_num": 0.02783203125, "loss_xval": 0.75, "num_input_tokens_seen": 191471980, "step": 3054 }, { "epoch": 10.16638935108153, "grad_norm": 5.362648010253906, "learning_rate": 5e-06, "loss": 0.493, "num_input_tokens_seen": 191535912, "step": 3055 }, { "epoch": 10.16638935108153, "loss": 0.5290584564208984, "loss_ce": 5.745611815655138e-06, "loss_iou": 0.224609375, "loss_num": 0.01611328125, "loss_xval": 0.52734375, "num_input_tokens_seen": 191535912, "step": 3055 }, { "epoch": 10.169717138103161, "grad_norm": 4.862673282623291, "learning_rate": 5e-06, "loss": 0.2707, "num_input_tokens_seen": 191596608, "step": 3056 }, { "epoch": 10.169717138103161, "loss": 0.2966657876968384, "loss_ce": 9.594694711267948e-05, "loss_iou": 0.09130859375, "loss_num": 0.0228271484375, "loss_xval": 0.296875, "num_input_tokens_seen": 191596608, "step": 3056 }, { "epoch": 10.173044925124792, "grad_norm": 7.769982814788818, "learning_rate": 5e-06, "loss": 0.636, "num_input_tokens_seen": 191658552, "step": 3057 }, { "epoch": 10.173044925124792, "loss": 0.706032931804657, "loss_ce": 8.745970262680203e-06, "loss_iou": 0.279296875, "loss_num": 0.02978515625, "loss_xval": 0.70703125, "num_input_tokens_seen": 191658552, "step": 3057 }, { "epoch": 10.176372712146422, "grad_norm": 24.126157760620117, "learning_rate": 5e-06, "loss": 0.9376, "num_input_tokens_seen": 191722496, "step": 3058 }, { "epoch": 10.176372712146422, "loss": 0.7786626815795898, "loss_ce": 0.00034243357367813587, "loss_iou": 0.2890625, "loss_num": 0.0400390625, "loss_xval": 0.77734375, "num_input_tokens_seen": 191722496, "step": 3058 }, { "epoch": 10.179700499168053, "grad_norm": 32.29357147216797, "learning_rate": 5e-06, "loss": 0.584, "num_input_tokens_seen": 191785704, "step": 3059 }, { "epoch": 10.179700499168053, "loss": 0.44250717759132385, "loss_ce": 2.281553861394059e-06, "loss_iou": 0.173828125, "loss_num": 0.01904296875, "loss_xval": 0.443359375, "num_input_tokens_seen": 191785704, "step": 3059 }, { "epoch": 10.183028286189684, "grad_norm": 32.1975212097168, "learning_rate": 5e-06, "loss": 0.6317, "num_input_tokens_seen": 191848496, "step": 3060 }, { "epoch": 10.183028286189684, "loss": 0.5443167686462402, "loss_ce": 5.253312338027172e-06, "loss_iou": 0.15234375, "loss_num": 0.0478515625, "loss_xval": 0.54296875, "num_input_tokens_seen": 191848496, "step": 3060 }, { "epoch": 10.186356073211314, "grad_norm": 25.340356826782227, "learning_rate": 5e-06, "loss": 0.4889, "num_input_tokens_seen": 191910408, "step": 3061 }, { "epoch": 10.186356073211314, "loss": 0.47789621353149414, "loss_ce": 0.00029610138153657317, "loss_iou": 0.1689453125, "loss_num": 0.02783203125, "loss_xval": 0.478515625, "num_input_tokens_seen": 191910408, "step": 3061 }, { "epoch": 10.189683860232945, "grad_norm": 16.824565887451172, "learning_rate": 5e-06, "loss": 0.3482, "num_input_tokens_seen": 191972564, "step": 3062 }, { "epoch": 10.189683860232945, "loss": 0.351199746131897, "loss_ce": 3.4757617868308444e-06, "loss_iou": 0.11376953125, "loss_num": 0.024658203125, "loss_xval": 0.3515625, "num_input_tokens_seen": 191972564, "step": 3062 }, { "epoch": 10.193011647254576, "grad_norm": 19.056201934814453, "learning_rate": 5e-06, "loss": 0.4946, "num_input_tokens_seen": 192036520, "step": 3063 }, { "epoch": 10.193011647254576, "loss": 0.430402934551239, "loss_ce": 4.406685184221715e-05, "loss_iou": 0.185546875, "loss_num": 0.0115966796875, "loss_xval": 0.4296875, "num_input_tokens_seen": 192036520, "step": 3063 }, { "epoch": 10.196339434276206, "grad_norm": 28.761810302734375, "learning_rate": 5e-06, "loss": 0.5554, "num_input_tokens_seen": 192099040, "step": 3064 }, { "epoch": 10.196339434276206, "loss": 0.659856915473938, "loss_ce": 0.00034157236223109066, "loss_iou": 0.255859375, "loss_num": 0.0296630859375, "loss_xval": 0.66015625, "num_input_tokens_seen": 192099040, "step": 3064 }, { "epoch": 10.199667221297837, "grad_norm": 28.498756408691406, "learning_rate": 5e-06, "loss": 0.6641, "num_input_tokens_seen": 192162380, "step": 3065 }, { "epoch": 10.199667221297837, "loss": 0.6944209337234497, "loss_ce": 0.0020381463691592216, "loss_iou": 0.283203125, "loss_num": 0.0252685546875, "loss_xval": 0.69140625, "num_input_tokens_seen": 192162380, "step": 3065 }, { "epoch": 10.202995008319467, "grad_norm": 28.610897064208984, "learning_rate": 5e-06, "loss": 0.613, "num_input_tokens_seen": 192226888, "step": 3066 }, { "epoch": 10.202995008319467, "loss": 0.6037613153457642, "loss_ce": 1.5383794789158856e-06, "loss_iou": 0.2412109375, "loss_num": 0.02392578125, "loss_xval": 0.60546875, "num_input_tokens_seen": 192226888, "step": 3066 }, { "epoch": 10.206322795341098, "grad_norm": 32.06743240356445, "learning_rate": 5e-06, "loss": 0.5405, "num_input_tokens_seen": 192289964, "step": 3067 }, { "epoch": 10.206322795341098, "loss": 0.5583080053329468, "loss_ce": 0.0003169428964611143, "loss_iou": 0.2353515625, "loss_num": 0.0174560546875, "loss_xval": 0.55859375, "num_input_tokens_seen": 192289964, "step": 3067 }, { "epoch": 10.209650582362729, "grad_norm": 11.078126907348633, "learning_rate": 5e-06, "loss": 0.5424, "num_input_tokens_seen": 192353696, "step": 3068 }, { "epoch": 10.209650582362729, "loss": 0.4965316951274872, "loss_ce": 0.0004989890148863196, "loss_iou": 0.1865234375, "loss_num": 0.0245361328125, "loss_xval": 0.49609375, "num_input_tokens_seen": 192353696, "step": 3068 }, { "epoch": 10.21297836938436, "grad_norm": 6.875300884246826, "learning_rate": 5e-06, "loss": 0.4594, "num_input_tokens_seen": 192416064, "step": 3069 }, { "epoch": 10.21297836938436, "loss": 0.41626161336898804, "loss_ce": 1.8693312995310407e-06, "loss_iou": 0.1298828125, "loss_num": 0.03125, "loss_xval": 0.416015625, "num_input_tokens_seen": 192416064, "step": 3069 }, { "epoch": 10.21630615640599, "grad_norm": 23.199928283691406, "learning_rate": 5e-06, "loss": 0.4675, "num_input_tokens_seen": 192479344, "step": 3070 }, { "epoch": 10.21630615640599, "loss": 0.46121320128440857, "loss_ce": 1.026628410727426e-06, "loss_iou": 0.1484375, "loss_num": 0.032958984375, "loss_xval": 0.4609375, "num_input_tokens_seen": 192479344, "step": 3070 }, { "epoch": 10.21963394342762, "grad_norm": 26.034263610839844, "learning_rate": 5e-06, "loss": 0.5815, "num_input_tokens_seen": 192543316, "step": 3071 }, { "epoch": 10.21963394342762, "loss": 0.41426169872283936, "loss_ce": 1.6064879673649557e-05, "loss_iou": 0.1591796875, "loss_num": 0.0194091796875, "loss_xval": 0.4140625, "num_input_tokens_seen": 192543316, "step": 3071 }, { "epoch": 10.222961730449251, "grad_norm": 23.661075592041016, "learning_rate": 5e-06, "loss": 0.4116, "num_input_tokens_seen": 192606532, "step": 3072 }, { "epoch": 10.222961730449251, "loss": 0.38391339778900146, "loss_ce": 2.2775270736019593e-06, "loss_iou": 0.11083984375, "loss_num": 0.032470703125, "loss_xval": 0.384765625, "num_input_tokens_seen": 192606532, "step": 3072 }, { "epoch": 10.226289517470882, "grad_norm": 20.008886337280273, "learning_rate": 5e-06, "loss": 0.557, "num_input_tokens_seen": 192669472, "step": 3073 }, { "epoch": 10.226289517470882, "loss": 0.6502923965454102, "loss_ce": 0.00026799103943631053, "loss_iou": 0.267578125, "loss_num": 0.023193359375, "loss_xval": 0.6484375, "num_input_tokens_seen": 192669472, "step": 3073 }, { "epoch": 10.229617304492512, "grad_norm": 20.163618087768555, "learning_rate": 5e-06, "loss": 0.4192, "num_input_tokens_seen": 192732196, "step": 3074 }, { "epoch": 10.229617304492512, "loss": 0.3582490086555481, "loss_ce": 3.370467675267719e-05, "loss_iou": 0.1494140625, "loss_num": 0.01190185546875, "loss_xval": 0.357421875, "num_input_tokens_seen": 192732196, "step": 3074 }, { "epoch": 10.232945091514143, "grad_norm": 19.058269500732422, "learning_rate": 5e-06, "loss": 0.5733, "num_input_tokens_seen": 192795320, "step": 3075 }, { "epoch": 10.232945091514143, "loss": 0.6328134536743164, "loss_ce": 9.538081258142483e-07, "loss_iou": 0.2294921875, "loss_num": 0.03466796875, "loss_xval": 0.6328125, "num_input_tokens_seen": 192795320, "step": 3075 }, { "epoch": 10.236272878535774, "grad_norm": 24.825345993041992, "learning_rate": 5e-06, "loss": 0.5958, "num_input_tokens_seen": 192859020, "step": 3076 }, { "epoch": 10.236272878535774, "loss": 0.5857861042022705, "loss_ce": 1.166250285677961e-06, "loss_iou": 0.2470703125, "loss_num": 0.018310546875, "loss_xval": 0.5859375, "num_input_tokens_seen": 192859020, "step": 3076 }, { "epoch": 10.239600665557404, "grad_norm": 23.23516845703125, "learning_rate": 5e-06, "loss": 0.7906, "num_input_tokens_seen": 192923356, "step": 3077 }, { "epoch": 10.239600665557404, "loss": 0.598469614982605, "loss_ce": 1.9913110008928925e-05, "loss_iou": 0.2060546875, "loss_num": 0.03759765625, "loss_xval": 0.59765625, "num_input_tokens_seen": 192923356, "step": 3077 }, { "epoch": 10.242928452579035, "grad_norm": 29.959075927734375, "learning_rate": 5e-06, "loss": 0.7423, "num_input_tokens_seen": 192984544, "step": 3078 }, { "epoch": 10.242928452579035, "loss": 0.741845965385437, "loss_ce": 0.0008792080916464329, "loss_iou": 0.2578125, "loss_num": 0.045166015625, "loss_xval": 0.7421875, "num_input_tokens_seen": 192984544, "step": 3078 }, { "epoch": 10.246256239600665, "grad_norm": 37.191585540771484, "learning_rate": 5e-06, "loss": 0.5805, "num_input_tokens_seen": 193047440, "step": 3079 }, { "epoch": 10.246256239600665, "loss": 0.5592362880706787, "loss_ce": 0.0007645573932677507, "loss_iou": 0.2275390625, "loss_num": 0.0205078125, "loss_xval": 0.55859375, "num_input_tokens_seen": 193047440, "step": 3079 }, { "epoch": 10.249584026622296, "grad_norm": 29.718374252319336, "learning_rate": 5e-06, "loss": 0.4163, "num_input_tokens_seen": 193110932, "step": 3080 }, { "epoch": 10.249584026622296, "loss": 0.3393867611885071, "loss_ce": 0.0006721497047692537, "loss_iou": 0.1416015625, "loss_num": 0.01092529296875, "loss_xval": 0.337890625, "num_input_tokens_seen": 193110932, "step": 3080 }, { "epoch": 10.252911813643927, "grad_norm": 23.894695281982422, "learning_rate": 5e-06, "loss": 0.547, "num_input_tokens_seen": 193173644, "step": 3081 }, { "epoch": 10.252911813643927, "loss": 0.4709550738334656, "loss_ce": 7.797201760695316e-06, "loss_iou": 0.1533203125, "loss_num": 0.03271484375, "loss_xval": 0.470703125, "num_input_tokens_seen": 193173644, "step": 3081 }, { "epoch": 10.256239600665557, "grad_norm": 6.970986366271973, "learning_rate": 5e-06, "loss": 0.4056, "num_input_tokens_seen": 193235684, "step": 3082 }, { "epoch": 10.256239600665557, "loss": 0.29149943590164185, "loss_ce": 8.706864173291251e-05, "loss_iou": 0.037109375, "loss_num": 0.04345703125, "loss_xval": 0.291015625, "num_input_tokens_seen": 193235684, "step": 3082 }, { "epoch": 10.259567387687188, "grad_norm": 15.20837688446045, "learning_rate": 5e-06, "loss": 0.4557, "num_input_tokens_seen": 193298660, "step": 3083 }, { "epoch": 10.259567387687188, "loss": 0.4956831634044647, "loss_ce": 1.403331680194242e-06, "loss_iou": 0.1708984375, "loss_num": 0.03076171875, "loss_xval": 0.49609375, "num_input_tokens_seen": 193298660, "step": 3083 }, { "epoch": 10.262895174708818, "grad_norm": 9.477559089660645, "learning_rate": 5e-06, "loss": 0.6239, "num_input_tokens_seen": 193362168, "step": 3084 }, { "epoch": 10.262895174708818, "loss": 0.6364237070083618, "loss_ce": 1.0118887075805105e-05, "loss_iou": 0.2431640625, "loss_num": 0.0299072265625, "loss_xval": 0.63671875, "num_input_tokens_seen": 193362168, "step": 3084 }, { "epoch": 10.266222961730449, "grad_norm": 11.925027847290039, "learning_rate": 5e-06, "loss": 0.3024, "num_input_tokens_seen": 193423956, "step": 3085 }, { "epoch": 10.266222961730449, "loss": 0.29871463775634766, "loss_ce": 8.582259397371672e-06, "loss_iou": 0.099609375, "loss_num": 0.019775390625, "loss_xval": 0.298828125, "num_input_tokens_seen": 193423956, "step": 3085 }, { "epoch": 10.26955074875208, "grad_norm": 9.44187068939209, "learning_rate": 5e-06, "loss": 0.455, "num_input_tokens_seen": 193484700, "step": 3086 }, { "epoch": 10.26955074875208, "loss": 0.5214810371398926, "loss_ce": 0.000729041057638824, "loss_iou": 0.146484375, "loss_num": 0.04541015625, "loss_xval": 0.51953125, "num_input_tokens_seen": 193484700, "step": 3086 }, { "epoch": 10.27287853577371, "grad_norm": 11.455476760864258, "learning_rate": 5e-06, "loss": 0.4226, "num_input_tokens_seen": 193544952, "step": 3087 }, { "epoch": 10.27287853577371, "loss": 0.45529183745384216, "loss_ce": 0.00024425247102044523, "loss_iou": 0.1357421875, "loss_num": 0.036865234375, "loss_xval": 0.455078125, "num_input_tokens_seen": 193544952, "step": 3087 }, { "epoch": 10.27620632279534, "grad_norm": 13.431963920593262, "learning_rate": 5e-06, "loss": 0.443, "num_input_tokens_seen": 193606072, "step": 3088 }, { "epoch": 10.27620632279534, "loss": 0.5231263637542725, "loss_ce": 0.0005433732294477522, "loss_iou": 0.15625, "loss_num": 0.0419921875, "loss_xval": 0.5234375, "num_input_tokens_seen": 193606072, "step": 3088 }, { "epoch": 10.279534109816971, "grad_norm": 10.464255332946777, "learning_rate": 5e-06, "loss": 0.5561, "num_input_tokens_seen": 193667884, "step": 3089 }, { "epoch": 10.279534109816971, "loss": 0.5365639925003052, "loss_ce": 3.889118943334324e-06, "loss_iou": 0.169921875, "loss_num": 0.03955078125, "loss_xval": 0.53515625, "num_input_tokens_seen": 193667884, "step": 3089 }, { "epoch": 10.282861896838602, "grad_norm": 12.759673118591309, "learning_rate": 5e-06, "loss": 0.4347, "num_input_tokens_seen": 193729400, "step": 3090 }, { "epoch": 10.282861896838602, "loss": 0.6359878182411194, "loss_ce": 1.4766865206183866e-06, "loss_iou": 0.201171875, "loss_num": 0.04638671875, "loss_xval": 0.63671875, "num_input_tokens_seen": 193729400, "step": 3090 }, { "epoch": 10.286189683860233, "grad_norm": 12.847373962402344, "learning_rate": 5e-06, "loss": 0.5564, "num_input_tokens_seen": 193792436, "step": 3091 }, { "epoch": 10.286189683860233, "loss": 0.619999349117279, "loss_ce": 4.217282366880681e-06, "loss_iou": 0.21875, "loss_num": 0.036865234375, "loss_xval": 0.62109375, "num_input_tokens_seen": 193792436, "step": 3091 }, { "epoch": 10.289517470881863, "grad_norm": 15.703346252441406, "learning_rate": 5e-06, "loss": 0.5579, "num_input_tokens_seen": 193854496, "step": 3092 }, { "epoch": 10.289517470881863, "loss": 0.5279616117477417, "loss_ce": 7.4915737968694884e-06, "loss_iou": 0.1884765625, "loss_num": 0.0301513671875, "loss_xval": 0.52734375, "num_input_tokens_seen": 193854496, "step": 3092 }, { "epoch": 10.292845257903494, "grad_norm": 17.75246810913086, "learning_rate": 5e-06, "loss": 0.6193, "num_input_tokens_seen": 193916944, "step": 3093 }, { "epoch": 10.292845257903494, "loss": 0.21536491811275482, "loss_ce": 2.378702447458636e-06, "loss_iou": 0.054931640625, "loss_num": 0.0211181640625, "loss_xval": 0.2158203125, "num_input_tokens_seen": 193916944, "step": 3093 }, { "epoch": 10.296173044925125, "grad_norm": 10.570527076721191, "learning_rate": 5e-06, "loss": 0.5837, "num_input_tokens_seen": 193979852, "step": 3094 }, { "epoch": 10.296173044925125, "loss": 0.5061784982681274, "loss_ce": 0.00044118057121522725, "loss_iou": 0.1572265625, "loss_num": 0.0380859375, "loss_xval": 0.50390625, "num_input_tokens_seen": 193979852, "step": 3094 }, { "epoch": 10.299500831946755, "grad_norm": 12.654913902282715, "learning_rate": 5e-06, "loss": 0.4302, "num_input_tokens_seen": 194042360, "step": 3095 }, { "epoch": 10.299500831946755, "loss": 0.48602521419525146, "loss_ce": 2.2938597794563975e-06, "loss_iou": 0.19140625, "loss_num": 0.0206298828125, "loss_xval": 0.486328125, "num_input_tokens_seen": 194042360, "step": 3095 }, { "epoch": 10.302828618968386, "grad_norm": 16.550018310546875, "learning_rate": 5e-06, "loss": 0.3589, "num_input_tokens_seen": 194105496, "step": 3096 }, { "epoch": 10.302828618968386, "loss": 0.36742842197418213, "loss_ce": 0.00030194222927093506, "loss_iou": 0.1513671875, "loss_num": 0.01300048828125, "loss_xval": 0.3671875, "num_input_tokens_seen": 194105496, "step": 3096 }, { "epoch": 10.306156405990016, "grad_norm": 10.485855102539062, "learning_rate": 5e-06, "loss": 0.3395, "num_input_tokens_seen": 194167056, "step": 3097 }, { "epoch": 10.306156405990016, "loss": 0.37208351492881775, "loss_ce": 0.0011728565441444516, "loss_iou": 0.08837890625, "loss_num": 0.038818359375, "loss_xval": 0.37109375, "num_input_tokens_seen": 194167056, "step": 3097 }, { "epoch": 10.309484193011647, "grad_norm": 7.026902675628662, "learning_rate": 5e-06, "loss": 0.6702, "num_input_tokens_seen": 194231576, "step": 3098 }, { "epoch": 10.309484193011647, "loss": 0.5810527801513672, "loss_ce": 0.0008830744191072881, "loss_iou": 0.2431640625, "loss_num": 0.018798828125, "loss_xval": 0.58203125, "num_input_tokens_seen": 194231576, "step": 3098 }, { "epoch": 10.312811980033278, "grad_norm": 14.910632133483887, "learning_rate": 5e-06, "loss": 0.4101, "num_input_tokens_seen": 194294192, "step": 3099 }, { "epoch": 10.312811980033278, "loss": 0.49103254079818726, "loss_ce": 3.522088809404522e-05, "loss_iou": 0.205078125, "loss_num": 0.0162353515625, "loss_xval": 0.490234375, "num_input_tokens_seen": 194294192, "step": 3099 }, { "epoch": 10.316139767054908, "grad_norm": 12.54509449005127, "learning_rate": 5e-06, "loss": 0.4323, "num_input_tokens_seen": 194356568, "step": 3100 }, { "epoch": 10.316139767054908, "loss": 0.49328428506851196, "loss_ce": 0.0009136786684393883, "loss_iou": 0.1796875, "loss_num": 0.0262451171875, "loss_xval": 0.4921875, "num_input_tokens_seen": 194356568, "step": 3100 }, { "epoch": 10.319467554076539, "grad_norm": 8.624955177307129, "learning_rate": 5e-06, "loss": 0.521, "num_input_tokens_seen": 194420332, "step": 3101 }, { "epoch": 10.319467554076539, "loss": 0.5790767669677734, "loss_ce": 0.0002803581883199513, "loss_iou": 0.208984375, "loss_num": 0.0322265625, "loss_xval": 0.578125, "num_input_tokens_seen": 194420332, "step": 3101 }, { "epoch": 10.32279534109817, "grad_norm": 12.264798164367676, "learning_rate": 5e-06, "loss": 0.5136, "num_input_tokens_seen": 194483056, "step": 3102 }, { "epoch": 10.32279534109817, "loss": 0.5368785262107849, "loss_ce": 0.0002574215177446604, "loss_iou": 0.193359375, "loss_num": 0.0299072265625, "loss_xval": 0.53515625, "num_input_tokens_seen": 194483056, "step": 3102 }, { "epoch": 10.3261231281198, "grad_norm": 23.379796981811523, "learning_rate": 5e-06, "loss": 0.4628, "num_input_tokens_seen": 194544688, "step": 3103 }, { "epoch": 10.3261231281198, "loss": 0.42459118366241455, "loss_ce": 9.168320684693754e-05, "loss_iou": 0.1396484375, "loss_num": 0.029296875, "loss_xval": 0.423828125, "num_input_tokens_seen": 194544688, "step": 3103 }, { "epoch": 10.32945091514143, "grad_norm": 13.6691312789917, "learning_rate": 5e-06, "loss": 0.3835, "num_input_tokens_seen": 194607484, "step": 3104 }, { "epoch": 10.32945091514143, "loss": 0.2846556007862091, "loss_ce": 0.00010970523726427928, "loss_iou": 0.0634765625, "loss_num": 0.031494140625, "loss_xval": 0.28515625, "num_input_tokens_seen": 194607484, "step": 3104 }, { "epoch": 10.332778702163061, "grad_norm": 9.86731243133545, "learning_rate": 5e-06, "loss": 0.4318, "num_input_tokens_seen": 194668268, "step": 3105 }, { "epoch": 10.332778702163061, "loss": 0.1989775449037552, "loss_ce": 2.9333814381971024e-06, "loss_iou": 0.038330078125, "loss_num": 0.0245361328125, "loss_xval": 0.19921875, "num_input_tokens_seen": 194668268, "step": 3105 }, { "epoch": 10.336106489184692, "grad_norm": 22.68243408203125, "learning_rate": 5e-06, "loss": 0.5399, "num_input_tokens_seen": 194731736, "step": 3106 }, { "epoch": 10.336106489184692, "loss": 0.5155133008956909, "loss_ce": 0.0007428178796544671, "loss_iou": 0.2060546875, "loss_num": 0.0206298828125, "loss_xval": 0.515625, "num_input_tokens_seen": 194731736, "step": 3106 }, { "epoch": 10.339434276206322, "grad_norm": 8.105544090270996, "learning_rate": 5e-06, "loss": 0.5312, "num_input_tokens_seen": 194794644, "step": 3107 }, { "epoch": 10.339434276206322, "loss": 0.4062131941318512, "loss_ce": 0.0006040580337867141, "loss_iou": 0.1630859375, "loss_num": 0.0159912109375, "loss_xval": 0.40625, "num_input_tokens_seen": 194794644, "step": 3107 }, { "epoch": 10.342762063227953, "grad_norm": 7.307918548583984, "learning_rate": 5e-06, "loss": 0.3346, "num_input_tokens_seen": 194857116, "step": 3108 }, { "epoch": 10.342762063227953, "loss": 0.34685957431793213, "loss_ce": 0.00017987354658544064, "loss_iou": 0.1396484375, "loss_num": 0.013671875, "loss_xval": 0.34765625, "num_input_tokens_seen": 194857116, "step": 3108 }, { "epoch": 10.346089850249584, "grad_norm": 9.559342384338379, "learning_rate": 5e-06, "loss": 0.4829, "num_input_tokens_seen": 194919784, "step": 3109 }, { "epoch": 10.346089850249584, "loss": 0.44881635904312134, "loss_ce": 0.0007573028560727835, "loss_iou": 0.138671875, "loss_num": 0.033935546875, "loss_xval": 0.447265625, "num_input_tokens_seen": 194919784, "step": 3109 }, { "epoch": 10.349417637271214, "grad_norm": 20.00583839416504, "learning_rate": 5e-06, "loss": 0.4658, "num_input_tokens_seen": 194982280, "step": 3110 }, { "epoch": 10.349417637271214, "loss": 0.6431305408477783, "loss_ce": 3.083094270550646e-06, "loss_iou": 0.224609375, "loss_num": 0.038818359375, "loss_xval": 0.64453125, "num_input_tokens_seen": 194982280, "step": 3110 }, { "epoch": 10.352745424292845, "grad_norm": 23.72500991821289, "learning_rate": 5e-06, "loss": 0.5671, "num_input_tokens_seen": 195044956, "step": 3111 }, { "epoch": 10.352745424292845, "loss": 0.6478495597839355, "loss_ce": 3.7687012081732973e-05, "loss_iou": 0.2373046875, "loss_num": 0.03466796875, "loss_xval": 0.6484375, "num_input_tokens_seen": 195044956, "step": 3111 }, { "epoch": 10.356073211314476, "grad_norm": 13.791129112243652, "learning_rate": 5e-06, "loss": 0.6162, "num_input_tokens_seen": 195107616, "step": 3112 }, { "epoch": 10.356073211314476, "loss": 0.6540576219558716, "loss_ce": 4.829793397220783e-06, "loss_iou": 0.25, "loss_num": 0.0306396484375, "loss_xval": 0.65234375, "num_input_tokens_seen": 195107616, "step": 3112 }, { "epoch": 10.359400998336106, "grad_norm": 6.7067952156066895, "learning_rate": 5e-06, "loss": 0.4177, "num_input_tokens_seen": 195169548, "step": 3113 }, { "epoch": 10.359400998336106, "loss": 0.40686917304992676, "loss_ce": 8.827409146761056e-06, "loss_iou": 0.1455078125, "loss_num": 0.023193359375, "loss_xval": 0.40625, "num_input_tokens_seen": 195169548, "step": 3113 }, { "epoch": 10.362728785357737, "grad_norm": 9.176291465759277, "learning_rate": 5e-06, "loss": 0.4606, "num_input_tokens_seen": 195231380, "step": 3114 }, { "epoch": 10.362728785357737, "loss": 0.3578280806541443, "loss_ce": 0.0008334572776220739, "loss_iou": 0.08154296875, "loss_num": 0.038818359375, "loss_xval": 0.357421875, "num_input_tokens_seen": 195231380, "step": 3114 }, { "epoch": 10.366056572379367, "grad_norm": 22.324018478393555, "learning_rate": 5e-06, "loss": 0.7595, "num_input_tokens_seen": 195294020, "step": 3115 }, { "epoch": 10.366056572379367, "loss": 0.7246437072753906, "loss_ce": 0.00021747812570538372, "loss_iou": 0.263671875, "loss_num": 0.039794921875, "loss_xval": 0.72265625, "num_input_tokens_seen": 195294020, "step": 3115 }, { "epoch": 10.369384359400998, "grad_norm": 20.822927474975586, "learning_rate": 5e-06, "loss": 0.514, "num_input_tokens_seen": 195358112, "step": 3116 }, { "epoch": 10.369384359400998, "loss": 0.6169407367706299, "loss_ce": 0.0002109689376084134, "loss_iou": 0.24609375, "loss_num": 0.0252685546875, "loss_xval": 0.6171875, "num_input_tokens_seen": 195358112, "step": 3116 }, { "epoch": 10.372712146422629, "grad_norm": 23.408584594726562, "learning_rate": 5e-06, "loss": 0.7954, "num_input_tokens_seen": 195422244, "step": 3117 }, { "epoch": 10.372712146422629, "loss": 0.9132540225982666, "loss_ce": 0.00016802029858808964, "loss_iou": 0.36328125, "loss_num": 0.037109375, "loss_xval": 0.9140625, "num_input_tokens_seen": 195422244, "step": 3117 }, { "epoch": 10.37603993344426, "grad_norm": 25.502975463867188, "learning_rate": 5e-06, "loss": 0.4733, "num_input_tokens_seen": 195483700, "step": 3118 }, { "epoch": 10.37603993344426, "loss": 0.5538331866264343, "loss_ce": 6.121165642980486e-05, "loss_iou": 0.19140625, "loss_num": 0.034423828125, "loss_xval": 0.5546875, "num_input_tokens_seen": 195483700, "step": 3118 }, { "epoch": 10.37936772046589, "grad_norm": 29.945476531982422, "learning_rate": 5e-06, "loss": 0.4281, "num_input_tokens_seen": 195547132, "step": 3119 }, { "epoch": 10.37936772046589, "loss": 0.3587566018104553, "loss_ce": 2.2465446818387136e-05, "loss_iou": 0.1484375, "loss_num": 0.01263427734375, "loss_xval": 0.359375, "num_input_tokens_seen": 195547132, "step": 3119 }, { "epoch": 10.38269550748752, "grad_norm": 140.21304321289062, "learning_rate": 5e-06, "loss": 0.375, "num_input_tokens_seen": 195610304, "step": 3120 }, { "epoch": 10.38269550748752, "loss": 0.44570237398147583, "loss_ce": 0.0005119582638144493, "loss_iou": 0.1953125, "loss_num": 0.0107421875, "loss_xval": 0.4453125, "num_input_tokens_seen": 195610304, "step": 3120 }, { "epoch": 10.386023294509151, "grad_norm": 7.883390426635742, "learning_rate": 5e-06, "loss": 0.3443, "num_input_tokens_seen": 195673012, "step": 3121 }, { "epoch": 10.386023294509151, "loss": 0.4079606533050537, "loss_ce": 1.6582054058744689e-06, "loss_iou": 0.15234375, "loss_num": 0.0205078125, "loss_xval": 0.408203125, "num_input_tokens_seen": 195673012, "step": 3121 }, { "epoch": 10.389351081530782, "grad_norm": 6.584036350250244, "learning_rate": 5e-06, "loss": 0.4558, "num_input_tokens_seen": 195735756, "step": 3122 }, { "epoch": 10.389351081530782, "loss": 0.5374500751495361, "loss_ce": 4.950232323608361e-06, "loss_iou": 0.1865234375, "loss_num": 0.033203125, "loss_xval": 0.5390625, "num_input_tokens_seen": 195735756, "step": 3122 }, { "epoch": 10.392678868552412, "grad_norm": 18.09377098083496, "learning_rate": 5e-06, "loss": 0.4439, "num_input_tokens_seen": 195798480, "step": 3123 }, { "epoch": 10.392678868552412, "loss": 0.3458314538002014, "loss_ce": 6.23269670541049e-06, "loss_iou": 0.0927734375, "loss_num": 0.0322265625, "loss_xval": 0.345703125, "num_input_tokens_seen": 195798480, "step": 3123 }, { "epoch": 10.396006655574043, "grad_norm": 15.338449478149414, "learning_rate": 5e-06, "loss": 0.4668, "num_input_tokens_seen": 195860160, "step": 3124 }, { "epoch": 10.396006655574043, "loss": 0.3224650025367737, "loss_ce": 1.6281897842418402e-05, "loss_iou": 0.11669921875, "loss_num": 0.0177001953125, "loss_xval": 0.322265625, "num_input_tokens_seen": 195860160, "step": 3124 }, { "epoch": 10.399334442595674, "grad_norm": 20.96832847595215, "learning_rate": 5e-06, "loss": 0.6287, "num_input_tokens_seen": 195922844, "step": 3125 }, { "epoch": 10.399334442595674, "loss": 0.6337583065032959, "loss_ce": 3.0237466489779763e-05, "loss_iou": 0.244140625, "loss_num": 0.02880859375, "loss_xval": 0.6328125, "num_input_tokens_seen": 195922844, "step": 3125 }, { "epoch": 10.402662229617304, "grad_norm": 18.269514083862305, "learning_rate": 5e-06, "loss": 0.3438, "num_input_tokens_seen": 195984248, "step": 3126 }, { "epoch": 10.402662229617304, "loss": 0.390931636095047, "loss_ce": 1.436807110621885e-06, "loss_iou": 0.1337890625, "loss_num": 0.024658203125, "loss_xval": 0.390625, "num_input_tokens_seen": 195984248, "step": 3126 }, { "epoch": 10.405990016638935, "grad_norm": 21.377254486083984, "learning_rate": 5e-06, "loss": 0.4849, "num_input_tokens_seen": 196046184, "step": 3127 }, { "epoch": 10.405990016638935, "loss": 0.5809249877929688, "loss_ce": 0.00016017329471651465, "loss_iou": 0.203125, "loss_num": 0.03515625, "loss_xval": 0.58203125, "num_input_tokens_seen": 196046184, "step": 3127 }, { "epoch": 10.409317803660565, "grad_norm": 34.336177825927734, "learning_rate": 5e-06, "loss": 0.5646, "num_input_tokens_seen": 196111432, "step": 3128 }, { "epoch": 10.409317803660565, "loss": 0.3972820043563843, "loss_ce": 4.1816101656877436e-06, "loss_iou": 0.1767578125, "loss_num": 0.00885009765625, "loss_xval": 0.396484375, "num_input_tokens_seen": 196111432, "step": 3128 }, { "epoch": 10.412645590682196, "grad_norm": 25.270919799804688, "learning_rate": 5e-06, "loss": 0.4972, "num_input_tokens_seen": 196172844, "step": 3129 }, { "epoch": 10.412645590682196, "loss": 0.5598230361938477, "loss_ce": 8.566583346691914e-06, "loss_iou": 0.2421875, "loss_num": 0.0152587890625, "loss_xval": 0.55859375, "num_input_tokens_seen": 196172844, "step": 3129 }, { "epoch": 10.415973377703827, "grad_norm": 22.688373565673828, "learning_rate": 5e-06, "loss": 0.5855, "num_input_tokens_seen": 196236572, "step": 3130 }, { "epoch": 10.415973377703827, "loss": 0.4187075197696686, "loss_ce": 6.3505613070447e-06, "loss_iou": 0.1259765625, "loss_num": 0.033203125, "loss_xval": 0.41796875, "num_input_tokens_seen": 196236572, "step": 3130 }, { "epoch": 10.419301164725457, "grad_norm": 37.3753662109375, "learning_rate": 5e-06, "loss": 0.5543, "num_input_tokens_seen": 196300192, "step": 3131 }, { "epoch": 10.419301164725457, "loss": 0.5390644073486328, "loss_ce": 1.9501246697473107e-06, "loss_iou": 0.212890625, "loss_num": 0.022705078125, "loss_xval": 0.5390625, "num_input_tokens_seen": 196300192, "step": 3131 }, { "epoch": 10.422628951747088, "grad_norm": 28.438404083251953, "learning_rate": 5e-06, "loss": 0.4185, "num_input_tokens_seen": 196363836, "step": 3132 }, { "epoch": 10.422628951747088, "loss": 0.47962385416030884, "loss_ce": 9.608128493709955e-06, "loss_iou": 0.20703125, "loss_num": 0.01300048828125, "loss_xval": 0.48046875, "num_input_tokens_seen": 196363836, "step": 3132 }, { "epoch": 10.425956738768718, "grad_norm": 23.482799530029297, "learning_rate": 5e-06, "loss": 0.6844, "num_input_tokens_seen": 196428084, "step": 3133 }, { "epoch": 10.425956738768718, "loss": 0.8150649070739746, "loss_ce": 1.4517198678731802e-06, "loss_iou": 0.326171875, "loss_num": 0.032470703125, "loss_xval": 0.81640625, "num_input_tokens_seen": 196428084, "step": 3133 }, { "epoch": 10.429284525790349, "grad_norm": 19.535551071166992, "learning_rate": 5e-06, "loss": 0.4722, "num_input_tokens_seen": 196490060, "step": 3134 }, { "epoch": 10.429284525790349, "loss": 0.4555947780609131, "loss_ce": 0.0005166547489352524, "loss_iou": 0.1572265625, "loss_num": 0.028076171875, "loss_xval": 0.455078125, "num_input_tokens_seen": 196490060, "step": 3134 }, { "epoch": 10.43261231281198, "grad_norm": 19.09634780883789, "learning_rate": 5e-06, "loss": 0.5731, "num_input_tokens_seen": 196553224, "step": 3135 }, { "epoch": 10.43261231281198, "loss": 0.5106952786445618, "loss_ce": 1.414250709785847e-05, "loss_iou": 0.1806640625, "loss_num": 0.0299072265625, "loss_xval": 0.51171875, "num_input_tokens_seen": 196553224, "step": 3135 }, { "epoch": 10.43594009983361, "grad_norm": 12.403202056884766, "learning_rate": 5e-06, "loss": 0.419, "num_input_tokens_seen": 196615424, "step": 3136 }, { "epoch": 10.43594009983361, "loss": 0.38159260153770447, "loss_ce": 7.98605356067128e-07, "loss_iou": 0.140625, "loss_num": 0.02001953125, "loss_xval": 0.380859375, "num_input_tokens_seen": 196615424, "step": 3136 }, { "epoch": 10.43926788685524, "grad_norm": 7.544939994812012, "learning_rate": 5e-06, "loss": 0.5649, "num_input_tokens_seen": 196678632, "step": 3137 }, { "epoch": 10.43926788685524, "loss": 0.59173583984375, "loss_ce": 0.0001831005502026528, "loss_iou": 0.216796875, "loss_num": 0.031494140625, "loss_xval": 0.58984375, "num_input_tokens_seen": 196678632, "step": 3137 }, { "epoch": 10.442595673876871, "grad_norm": 26.970592498779297, "learning_rate": 5e-06, "loss": 0.5718, "num_input_tokens_seen": 196741932, "step": 3138 }, { "epoch": 10.442595673876871, "loss": 0.49341416358947754, "loss_ce": 5.966597200313117e-06, "loss_iou": 0.197265625, "loss_num": 0.019775390625, "loss_xval": 0.494140625, "num_input_tokens_seen": 196741932, "step": 3138 }, { "epoch": 10.445923460898502, "grad_norm": 13.625749588012695, "learning_rate": 5e-06, "loss": 0.4189, "num_input_tokens_seen": 196805836, "step": 3139 }, { "epoch": 10.445923460898502, "loss": 0.39596301317214966, "loss_ce": 0.0003331073676235974, "loss_iou": 0.1240234375, "loss_num": 0.0296630859375, "loss_xval": 0.396484375, "num_input_tokens_seen": 196805836, "step": 3139 }, { "epoch": 10.449251247920133, "grad_norm": 14.967351913452148, "learning_rate": 5e-06, "loss": 0.4158, "num_input_tokens_seen": 196867348, "step": 3140 }, { "epoch": 10.449251247920133, "loss": 0.43680763244628906, "loss_ce": 0.0008335243328474462, "loss_iou": 0.1318359375, "loss_num": 0.03466796875, "loss_xval": 0.435546875, "num_input_tokens_seen": 196867348, "step": 3140 }, { "epoch": 10.452579034941763, "grad_norm": 14.749964714050293, "learning_rate": 5e-06, "loss": 0.3762, "num_input_tokens_seen": 196929520, "step": 3141 }, { "epoch": 10.452579034941763, "loss": 0.43615156412124634, "loss_ce": 0.0003910852537956089, "loss_iou": 0.16796875, "loss_num": 0.020263671875, "loss_xval": 0.435546875, "num_input_tokens_seen": 196929520, "step": 3141 }, { "epoch": 10.455906821963394, "grad_norm": 13.218477249145508, "learning_rate": 5e-06, "loss": 0.555, "num_input_tokens_seen": 196992484, "step": 3142 }, { "epoch": 10.455906821963394, "loss": 0.6889394521713257, "loss_ce": 5.090315426059533e-06, "loss_iou": 0.25390625, "loss_num": 0.036376953125, "loss_xval": 0.6875, "num_input_tokens_seen": 196992484, "step": 3142 }, { "epoch": 10.459234608985025, "grad_norm": 9.939301490783691, "learning_rate": 5e-06, "loss": 0.4456, "num_input_tokens_seen": 197053956, "step": 3143 }, { "epoch": 10.459234608985025, "loss": 0.3953554034233093, "loss_ce": 0.00021382884006015956, "loss_iou": 0.123046875, "loss_num": 0.02978515625, "loss_xval": 0.39453125, "num_input_tokens_seen": 197053956, "step": 3143 }, { "epoch": 10.462562396006655, "grad_norm": 10.877116203308105, "learning_rate": 5e-06, "loss": 0.5004, "num_input_tokens_seen": 197116308, "step": 3144 }, { "epoch": 10.462562396006655, "loss": 0.426567018032074, "loss_ce": 5.3353182011051103e-05, "loss_iou": 0.1748046875, "loss_num": 0.0152587890625, "loss_xval": 0.42578125, "num_input_tokens_seen": 197116308, "step": 3144 }, { "epoch": 10.465890183028286, "grad_norm": 8.264814376831055, "learning_rate": 5e-06, "loss": 0.5265, "num_input_tokens_seen": 197179764, "step": 3145 }, { "epoch": 10.465890183028286, "loss": 0.6860368847846985, "loss_ce": 0.00012381038686726242, "loss_iou": 0.287109375, "loss_num": 0.0220947265625, "loss_xval": 0.6875, "num_input_tokens_seen": 197179764, "step": 3145 }, { "epoch": 10.469217970049916, "grad_norm": 9.48729419708252, "learning_rate": 5e-06, "loss": 0.4124, "num_input_tokens_seen": 197240992, "step": 3146 }, { "epoch": 10.469217970049916, "loss": 0.48975008726119995, "loss_ce": 3.996933628513943e-06, "loss_iou": 0.1796875, "loss_num": 0.0260009765625, "loss_xval": 0.490234375, "num_input_tokens_seen": 197240992, "step": 3146 }, { "epoch": 10.472545757071547, "grad_norm": 17.525287628173828, "learning_rate": 5e-06, "loss": 0.4512, "num_input_tokens_seen": 197304224, "step": 3147 }, { "epoch": 10.472545757071547, "loss": 0.3575773239135742, "loss_ce": 0.00030801582033745944, "loss_iou": 0.125, "loss_num": 0.0213623046875, "loss_xval": 0.357421875, "num_input_tokens_seen": 197304224, "step": 3147 }, { "epoch": 10.475873544093178, "grad_norm": 18.673843383789062, "learning_rate": 5e-06, "loss": 0.6824, "num_input_tokens_seen": 197368424, "step": 3148 }, { "epoch": 10.475873544093178, "loss": 0.720625638961792, "loss_ce": 0.0007770396769046783, "loss_iou": 0.3125, "loss_num": 0.0191650390625, "loss_xval": 0.71875, "num_input_tokens_seen": 197368424, "step": 3148 }, { "epoch": 10.479201331114808, "grad_norm": 12.04966926574707, "learning_rate": 5e-06, "loss": 0.32, "num_input_tokens_seen": 197429972, "step": 3149 }, { "epoch": 10.479201331114808, "loss": 0.3271048069000244, "loss_ce": 2.159032419513096e-06, "loss_iou": 0.1123046875, "loss_num": 0.0205078125, "loss_xval": 0.326171875, "num_input_tokens_seen": 197429972, "step": 3149 }, { "epoch": 10.482529118136439, "grad_norm": 17.758878707885742, "learning_rate": 5e-06, "loss": 0.4651, "num_input_tokens_seen": 197491936, "step": 3150 }, { "epoch": 10.482529118136439, "loss": 0.5349693894386292, "loss_ce": 0.000301414227578789, "loss_iou": 0.2119140625, "loss_num": 0.022216796875, "loss_xval": 0.53515625, "num_input_tokens_seen": 197491936, "step": 3150 }, { "epoch": 10.48585690515807, "grad_norm": 19.94994354248047, "learning_rate": 5e-06, "loss": 0.696, "num_input_tokens_seen": 197553124, "step": 3151 }, { "epoch": 10.48585690515807, "loss": 0.6158182621002197, "loss_ce": 3.455540718277916e-05, "loss_iou": 0.2216796875, "loss_num": 0.034423828125, "loss_xval": 0.6171875, "num_input_tokens_seen": 197553124, "step": 3151 }, { "epoch": 10.4891846921797, "grad_norm": 27.638120651245117, "learning_rate": 5e-06, "loss": 0.4179, "num_input_tokens_seen": 197617728, "step": 3152 }, { "epoch": 10.4891846921797, "loss": 0.42849215865135193, "loss_ce": 0.00014742666098754853, "loss_iou": 0.1533203125, "loss_num": 0.0244140625, "loss_xval": 0.427734375, "num_input_tokens_seen": 197617728, "step": 3152 }, { "epoch": 10.49251247920133, "grad_norm": 25.869277954101562, "learning_rate": 5e-06, "loss": 0.6217, "num_input_tokens_seen": 197681596, "step": 3153 }, { "epoch": 10.49251247920133, "loss": 0.7978588342666626, "loss_ce": 0.0009227616246789694, "loss_iou": 0.267578125, "loss_num": 0.052490234375, "loss_xval": 0.796875, "num_input_tokens_seen": 197681596, "step": 3153 }, { "epoch": 10.495840266222961, "grad_norm": 22.334674835205078, "learning_rate": 5e-06, "loss": 0.3607, "num_input_tokens_seen": 197744928, "step": 3154 }, { "epoch": 10.495840266222961, "loss": 0.43449538946151733, "loss_ce": 1.359198677164386e-06, "loss_iou": 0.1826171875, "loss_num": 0.013916015625, "loss_xval": 0.43359375, "num_input_tokens_seen": 197744928, "step": 3154 }, { "epoch": 10.499168053244592, "grad_norm": 24.99123764038086, "learning_rate": 5e-06, "loss": 0.5013, "num_input_tokens_seen": 197807924, "step": 3155 }, { "epoch": 10.499168053244592, "loss": 0.43491873145103455, "loss_ce": 0.00010429159738123417, "loss_iou": 0.1533203125, "loss_num": 0.025634765625, "loss_xval": 0.435546875, "num_input_tokens_seen": 197807924, "step": 3155 }, { "epoch": 10.502495840266223, "grad_norm": 23.797060012817383, "learning_rate": 5e-06, "loss": 0.4883, "num_input_tokens_seen": 197870640, "step": 3156 }, { "epoch": 10.502495840266223, "loss": 0.3052990734577179, "loss_ce": 1.2382990917103598e-06, "loss_iou": 0.1083984375, "loss_num": 0.017822265625, "loss_xval": 0.3046875, "num_input_tokens_seen": 197870640, "step": 3156 }, { "epoch": 10.505823627287853, "grad_norm": 17.461458206176758, "learning_rate": 5e-06, "loss": 0.4953, "num_input_tokens_seen": 197933808, "step": 3157 }, { "epoch": 10.505823627287853, "loss": 0.6093829870223999, "loss_ce": 0.00025210363673977554, "loss_iou": 0.2421875, "loss_num": 0.025146484375, "loss_xval": 0.609375, "num_input_tokens_seen": 197933808, "step": 3157 }, { "epoch": 10.509151414309484, "grad_norm": 9.03349494934082, "learning_rate": 5e-06, "loss": 0.3479, "num_input_tokens_seen": 197993876, "step": 3158 }, { "epoch": 10.509151414309484, "loss": 0.36255180835723877, "loss_ce": 3.0015144147910178e-06, "loss_iou": 0.076171875, "loss_num": 0.0419921875, "loss_xval": 0.36328125, "num_input_tokens_seen": 197993876, "step": 3158 }, { "epoch": 10.512479201331114, "grad_norm": 15.596484184265137, "learning_rate": 5e-06, "loss": 0.5397, "num_input_tokens_seen": 198055284, "step": 3159 }, { "epoch": 10.512479201331114, "loss": 0.43726176023483276, "loss_ce": 5.905304078623885e-06, "loss_iou": 0.15234375, "loss_num": 0.0264892578125, "loss_xval": 0.4375, "num_input_tokens_seen": 198055284, "step": 3159 }, { "epoch": 10.515806988352745, "grad_norm": 15.159150123596191, "learning_rate": 5e-06, "loss": 0.8302, "num_input_tokens_seen": 198119132, "step": 3160 }, { "epoch": 10.515806988352745, "loss": 1.0969740152359009, "loss_ce": 0.0012708548456430435, "loss_iou": 0.4296875, "loss_num": 0.047119140625, "loss_xval": 1.09375, "num_input_tokens_seen": 198119132, "step": 3160 }, { "epoch": 10.519134775374376, "grad_norm": 8.75708293914795, "learning_rate": 5e-06, "loss": 0.3498, "num_input_tokens_seen": 198178848, "step": 3161 }, { "epoch": 10.519134775374376, "loss": 0.4310537576675415, "loss_ce": 0.0003591941494960338, "loss_iou": 0.11572265625, "loss_num": 0.03955078125, "loss_xval": 0.431640625, "num_input_tokens_seen": 198178848, "step": 3161 }, { "epoch": 10.522462562396006, "grad_norm": 9.524789810180664, "learning_rate": 5e-06, "loss": 0.5433, "num_input_tokens_seen": 198241480, "step": 3162 }, { "epoch": 10.522462562396006, "loss": 0.6954172849655151, "loss_ce": 0.0003489176742732525, "loss_iou": 0.267578125, "loss_num": 0.0322265625, "loss_xval": 0.6953125, "num_input_tokens_seen": 198241480, "step": 3162 }, { "epoch": 10.525790349417637, "grad_norm": 9.931750297546387, "learning_rate": 5e-06, "loss": 0.3163, "num_input_tokens_seen": 198302172, "step": 3163 }, { "epoch": 10.525790349417637, "loss": 0.2914140820503235, "loss_ce": 3.223785824957304e-05, "loss_iou": 0.107421875, "loss_num": 0.015380859375, "loss_xval": 0.291015625, "num_input_tokens_seen": 198302172, "step": 3163 }, { "epoch": 10.529118136439267, "grad_norm": 20.365432739257812, "learning_rate": 5e-06, "loss": 0.4261, "num_input_tokens_seen": 198365328, "step": 3164 }, { "epoch": 10.529118136439267, "loss": 0.4057645797729492, "loss_ce": 2.8530230338219553e-06, "loss_iou": 0.1689453125, "loss_num": 0.01348876953125, "loss_xval": 0.40625, "num_input_tokens_seen": 198365328, "step": 3164 }, { "epoch": 10.532445923460898, "grad_norm": 18.614635467529297, "learning_rate": 5e-06, "loss": 0.4827, "num_input_tokens_seen": 198426364, "step": 3165 }, { "epoch": 10.532445923460898, "loss": 0.6155465245246887, "loss_ce": 7.002046913839877e-06, "loss_iou": 0.23046875, "loss_num": 0.031005859375, "loss_xval": 0.6171875, "num_input_tokens_seen": 198426364, "step": 3165 }, { "epoch": 10.535773710482529, "grad_norm": 12.060473442077637, "learning_rate": 5e-06, "loss": 0.399, "num_input_tokens_seen": 198488436, "step": 3166 }, { "epoch": 10.535773710482529, "loss": 0.3417341113090515, "loss_ce": 0.00024243911320809275, "loss_iou": 0.1259765625, "loss_num": 0.0179443359375, "loss_xval": 0.341796875, "num_input_tokens_seen": 198488436, "step": 3166 }, { "epoch": 10.53910149750416, "grad_norm": 7.74644660949707, "learning_rate": 5e-06, "loss": 0.5598, "num_input_tokens_seen": 198551320, "step": 3167 }, { "epoch": 10.53910149750416, "loss": 0.44363734126091003, "loss_ce": 3.382530121598393e-05, "loss_iou": 0.1767578125, "loss_num": 0.01806640625, "loss_xval": 0.443359375, "num_input_tokens_seen": 198551320, "step": 3167 }, { "epoch": 10.54242928452579, "grad_norm": 10.755378723144531, "learning_rate": 5e-06, "loss": 0.5824, "num_input_tokens_seen": 198615384, "step": 3168 }, { "epoch": 10.54242928452579, "loss": 0.5051895976066589, "loss_ce": 1.6242489664364257e-06, "loss_iou": 0.2041015625, "loss_num": 0.019287109375, "loss_xval": 0.50390625, "num_input_tokens_seen": 198615384, "step": 3168 }, { "epoch": 10.54575707154742, "grad_norm": 11.815871238708496, "learning_rate": 5e-06, "loss": 0.5159, "num_input_tokens_seen": 198677392, "step": 3169 }, { "epoch": 10.54575707154742, "loss": 0.5515753626823425, "loss_ce": 6.437771844503004e-07, "loss_iou": 0.1962890625, "loss_num": 0.03173828125, "loss_xval": 0.55078125, "num_input_tokens_seen": 198677392, "step": 3169 }, { "epoch": 10.549084858569051, "grad_norm": 11.534939765930176, "learning_rate": 5e-06, "loss": 0.3703, "num_input_tokens_seen": 198738456, "step": 3170 }, { "epoch": 10.549084858569051, "loss": 0.4279818832874298, "loss_ce": 3.3901364986377303e-06, "loss_iou": 0.1240234375, "loss_num": 0.035888671875, "loss_xval": 0.427734375, "num_input_tokens_seen": 198738456, "step": 3170 }, { "epoch": 10.552412645590682, "grad_norm": 12.667360305786133, "learning_rate": 5e-06, "loss": 0.6141, "num_input_tokens_seen": 198801816, "step": 3171 }, { "epoch": 10.552412645590682, "loss": 0.5931175947189331, "loss_ce": 0.0003441515436861664, "loss_iou": 0.22265625, "loss_num": 0.0294189453125, "loss_xval": 0.59375, "num_input_tokens_seen": 198801816, "step": 3171 }, { "epoch": 10.555740432612312, "grad_norm": 9.192916870117188, "learning_rate": 5e-06, "loss": 0.4965, "num_input_tokens_seen": 198863028, "step": 3172 }, { "epoch": 10.555740432612312, "loss": 0.7036224007606506, "loss_ce": 9.119192327489145e-06, "loss_iou": 0.275390625, "loss_num": 0.0303955078125, "loss_xval": 0.703125, "num_input_tokens_seen": 198863028, "step": 3172 }, { "epoch": 10.559068219633943, "grad_norm": 13.471563339233398, "learning_rate": 5e-06, "loss": 0.6965, "num_input_tokens_seen": 198925388, "step": 3173 }, { "epoch": 10.559068219633943, "loss": 0.44262903928756714, "loss_ce": 2.1075393306091428e-06, "loss_iou": 0.173828125, "loss_num": 0.018798828125, "loss_xval": 0.443359375, "num_input_tokens_seen": 198925388, "step": 3173 }, { "epoch": 10.562396006655574, "grad_norm": 7.238758563995361, "learning_rate": 5e-06, "loss": 0.5821, "num_input_tokens_seen": 198988672, "step": 3174 }, { "epoch": 10.562396006655574, "loss": 0.5336349606513977, "loss_ce": 4.5499195948650595e-06, "loss_iou": 0.1669921875, "loss_num": 0.039794921875, "loss_xval": 0.53515625, "num_input_tokens_seen": 198988672, "step": 3174 }, { "epoch": 10.565723793677204, "grad_norm": 18.416780471801758, "learning_rate": 5e-06, "loss": 0.5126, "num_input_tokens_seen": 199053712, "step": 3175 }, { "epoch": 10.565723793677204, "loss": 0.5397998690605164, "loss_ce": 4.941748557030223e-06, "loss_iou": 0.2060546875, "loss_num": 0.025390625, "loss_xval": 0.5390625, "num_input_tokens_seen": 199053712, "step": 3175 }, { "epoch": 10.569051580698835, "grad_norm": 33.10842514038086, "learning_rate": 5e-06, "loss": 0.6892, "num_input_tokens_seen": 199117520, "step": 3176 }, { "epoch": 10.569051580698835, "loss": 0.5805074572563171, "loss_ce": 2.103258566421573e-06, "loss_iou": 0.21484375, "loss_num": 0.0302734375, "loss_xval": 0.58203125, "num_input_tokens_seen": 199117520, "step": 3176 }, { "epoch": 10.572379367720465, "grad_norm": 33.647586822509766, "learning_rate": 5e-06, "loss": 0.5091, "num_input_tokens_seen": 199179760, "step": 3177 }, { "epoch": 10.572379367720465, "loss": 0.39654338359832764, "loss_ce": 5.899169991607778e-05, "loss_iou": 0.142578125, "loss_num": 0.0223388671875, "loss_xval": 0.396484375, "num_input_tokens_seen": 199179760, "step": 3177 }, { "epoch": 10.575707154742096, "grad_norm": 23.931730270385742, "learning_rate": 5e-06, "loss": 0.6313, "num_input_tokens_seen": 199241856, "step": 3178 }, { "epoch": 10.575707154742096, "loss": 0.4221808910369873, "loss_ce": 7.017567895672983e-07, "loss_iou": 0.1328125, "loss_num": 0.031494140625, "loss_xval": 0.421875, "num_input_tokens_seen": 199241856, "step": 3178 }, { "epoch": 10.579034941763727, "grad_norm": 20.246000289916992, "learning_rate": 5e-06, "loss": 0.457, "num_input_tokens_seen": 199304860, "step": 3179 }, { "epoch": 10.579034941763727, "loss": 0.5771558284759521, "loss_ce": 7.431153790093958e-06, "loss_iou": 0.24609375, "loss_num": 0.0169677734375, "loss_xval": 0.578125, "num_input_tokens_seen": 199304860, "step": 3179 }, { "epoch": 10.582362728785357, "grad_norm": 9.476475715637207, "learning_rate": 5e-06, "loss": 0.5099, "num_input_tokens_seen": 199366920, "step": 3180 }, { "epoch": 10.582362728785357, "loss": 0.5145886540412903, "loss_ce": 1.2462237464205828e-06, "loss_iou": 0.189453125, "loss_num": 0.0272216796875, "loss_xval": 0.515625, "num_input_tokens_seen": 199366920, "step": 3180 }, { "epoch": 10.585690515806988, "grad_norm": 9.485166549682617, "learning_rate": 5e-06, "loss": 0.3965, "num_input_tokens_seen": 199427756, "step": 3181 }, { "epoch": 10.585690515806988, "loss": 0.39102184772491455, "loss_ce": 0.00021376398217398673, "loss_iou": 0.1376953125, "loss_num": 0.023193359375, "loss_xval": 0.390625, "num_input_tokens_seen": 199427756, "step": 3181 }, { "epoch": 10.589018302828618, "grad_norm": 14.954903602600098, "learning_rate": 5e-06, "loss": 0.6309, "num_input_tokens_seen": 199491004, "step": 3182 }, { "epoch": 10.589018302828618, "loss": 0.5178244709968567, "loss_ce": 2.2201086267159553e-06, "loss_iou": 0.1923828125, "loss_num": 0.026611328125, "loss_xval": 0.51953125, "num_input_tokens_seen": 199491004, "step": 3182 }, { "epoch": 10.592346089850249, "grad_norm": 13.890663146972656, "learning_rate": 5e-06, "loss": 0.5036, "num_input_tokens_seen": 199554728, "step": 3183 }, { "epoch": 10.592346089850249, "loss": 0.4558119773864746, "loss_ce": 1.4313685596789583e-06, "loss_iou": 0.1865234375, "loss_num": 0.0164794921875, "loss_xval": 0.455078125, "num_input_tokens_seen": 199554728, "step": 3183 }, { "epoch": 10.59567387687188, "grad_norm": 19.80140495300293, "learning_rate": 5e-06, "loss": 0.3987, "num_input_tokens_seen": 199616616, "step": 3184 }, { "epoch": 10.59567387687188, "loss": 0.47398653626441956, "loss_ce": 0.0004757922433782369, "loss_iou": 0.1904296875, "loss_num": 0.0184326171875, "loss_xval": 0.47265625, "num_input_tokens_seen": 199616616, "step": 3184 }, { "epoch": 10.59900166389351, "grad_norm": 31.115558624267578, "learning_rate": 5e-06, "loss": 0.5136, "num_input_tokens_seen": 199679900, "step": 3185 }, { "epoch": 10.59900166389351, "loss": 0.6156057119369507, "loss_ce": 0.00024927526828832924, "loss_iou": 0.2392578125, "loss_num": 0.02734375, "loss_xval": 0.6171875, "num_input_tokens_seen": 199679900, "step": 3185 }, { "epoch": 10.602329450915141, "grad_norm": 9.04697036743164, "learning_rate": 5e-06, "loss": 0.3674, "num_input_tokens_seen": 199741672, "step": 3186 }, { "epoch": 10.602329450915141, "loss": 0.3618794083595276, "loss_ce": 1.9843614609271754e-06, "loss_iou": 0.12255859375, "loss_num": 0.0233154296875, "loss_xval": 0.361328125, "num_input_tokens_seen": 199741672, "step": 3186 }, { "epoch": 10.605657237936772, "grad_norm": 13.348240852355957, "learning_rate": 5e-06, "loss": 0.7487, "num_input_tokens_seen": 199803304, "step": 3187 }, { "epoch": 10.605657237936772, "loss": 0.598301887512207, "loss_ce": 0.0023546332959085703, "loss_iou": 0.2099609375, "loss_num": 0.035400390625, "loss_xval": 0.59765625, "num_input_tokens_seen": 199803304, "step": 3187 }, { "epoch": 10.608985024958402, "grad_norm": 22.485300064086914, "learning_rate": 5e-06, "loss": 0.3838, "num_input_tokens_seen": 199864588, "step": 3188 }, { "epoch": 10.608985024958402, "loss": 0.38281404972076416, "loss_ce": 1.5730497580079827e-06, "loss_iou": 0.140625, "loss_num": 0.0201416015625, "loss_xval": 0.3828125, "num_input_tokens_seen": 199864588, "step": 3188 }, { "epoch": 10.612312811980033, "grad_norm": 19.964641571044922, "learning_rate": 5e-06, "loss": 0.4405, "num_input_tokens_seen": 199928276, "step": 3189 }, { "epoch": 10.612312811980033, "loss": 0.27045729756355286, "loss_ce": 1.052311017701868e-05, "loss_iou": 0.115234375, "loss_num": 0.0079345703125, "loss_xval": 0.26953125, "num_input_tokens_seen": 199928276, "step": 3189 }, { "epoch": 10.615640599001663, "grad_norm": 12.223901748657227, "learning_rate": 5e-06, "loss": 0.5082, "num_input_tokens_seen": 199991440, "step": 3190 }, { "epoch": 10.615640599001663, "loss": 0.4593600034713745, "loss_ce": 9.392253559781238e-06, "loss_iou": 0.17578125, "loss_num": 0.0213623046875, "loss_xval": 0.458984375, "num_input_tokens_seen": 199991440, "step": 3190 }, { "epoch": 10.618968386023294, "grad_norm": 17.094640731811523, "learning_rate": 5e-06, "loss": 0.6435, "num_input_tokens_seen": 200054544, "step": 3191 }, { "epoch": 10.618968386023294, "loss": 0.6876361966133118, "loss_ce": 0.0009296812349930406, "loss_iou": 0.275390625, "loss_num": 0.02734375, "loss_xval": 0.6875, "num_input_tokens_seen": 200054544, "step": 3191 }, { "epoch": 10.622296173044925, "grad_norm": 17.13616943359375, "learning_rate": 5e-06, "loss": 0.4815, "num_input_tokens_seen": 200117980, "step": 3192 }, { "epoch": 10.622296173044925, "loss": 0.5218002796173096, "loss_ce": 7.176816870924085e-05, "loss_iou": 0.2177734375, "loss_num": 0.0172119140625, "loss_xval": 0.5234375, "num_input_tokens_seen": 200117980, "step": 3192 }, { "epoch": 10.625623960066555, "grad_norm": 18.959232330322266, "learning_rate": 5e-06, "loss": 0.6358, "num_input_tokens_seen": 200182204, "step": 3193 }, { "epoch": 10.625623960066555, "loss": 0.44638779759407043, "loss_ce": 0.000586993875913322, "loss_iou": 0.154296875, "loss_num": 0.0274658203125, "loss_xval": 0.4453125, "num_input_tokens_seen": 200182204, "step": 3193 }, { "epoch": 10.628951747088186, "grad_norm": 26.962236404418945, "learning_rate": 5e-06, "loss": 0.5399, "num_input_tokens_seen": 200245180, "step": 3194 }, { "epoch": 10.628951747088186, "loss": 0.360384464263916, "loss_ce": 0.0006432256777770817, "loss_iou": 0.12060546875, "loss_num": 0.0238037109375, "loss_xval": 0.359375, "num_input_tokens_seen": 200245180, "step": 3194 }, { "epoch": 10.632279534109816, "grad_norm": 15.262831687927246, "learning_rate": 5e-06, "loss": 0.5125, "num_input_tokens_seen": 200306228, "step": 3195 }, { "epoch": 10.632279534109816, "loss": 0.7021516561508179, "loss_ce": 3.2019431728258496e-06, "loss_iou": 0.265625, "loss_num": 0.034423828125, "loss_xval": 0.703125, "num_input_tokens_seen": 200306228, "step": 3195 }, { "epoch": 10.635607321131447, "grad_norm": 8.113373756408691, "learning_rate": 5e-06, "loss": 0.5905, "num_input_tokens_seen": 200368748, "step": 3196 }, { "epoch": 10.635607321131447, "loss": 0.6698644161224365, "loss_ce": 3.5804732760880142e-06, "loss_iou": 0.2333984375, "loss_num": 0.040771484375, "loss_xval": 0.66796875, "num_input_tokens_seen": 200368748, "step": 3196 }, { "epoch": 10.638935108153078, "grad_norm": 12.542490005493164, "learning_rate": 5e-06, "loss": 0.5694, "num_input_tokens_seen": 200429428, "step": 3197 }, { "epoch": 10.638935108153078, "loss": 0.49064576625823975, "loss_ce": 0.00022827088832855225, "loss_iou": 0.1484375, "loss_num": 0.038818359375, "loss_xval": 0.490234375, "num_input_tokens_seen": 200429428, "step": 3197 }, { "epoch": 10.642262895174708, "grad_norm": 5.51321268081665, "learning_rate": 5e-06, "loss": 0.4776, "num_input_tokens_seen": 200491208, "step": 3198 }, { "epoch": 10.642262895174708, "loss": 0.48917853832244873, "loss_ce": 4.2779764044098556e-05, "loss_iou": 0.171875, "loss_num": 0.0289306640625, "loss_xval": 0.48828125, "num_input_tokens_seen": 200491208, "step": 3198 }, { "epoch": 10.645590682196339, "grad_norm": 5.676181793212891, "learning_rate": 5e-06, "loss": 0.3416, "num_input_tokens_seen": 200551924, "step": 3199 }, { "epoch": 10.645590682196339, "loss": 0.22441360354423523, "loss_ce": 2.5812892090470996e-06, "loss_iou": 0.04150390625, "loss_num": 0.0283203125, "loss_xval": 0.224609375, "num_input_tokens_seen": 200551924, "step": 3199 }, { "epoch": 10.64891846921797, "grad_norm": 18.426841735839844, "learning_rate": 5e-06, "loss": 0.552, "num_input_tokens_seen": 200616324, "step": 3200 }, { "epoch": 10.64891846921797, "loss": 0.5798373222351074, "loss_ce": 3.2986088172037853e-06, "loss_iou": 0.2353515625, "loss_num": 0.02197265625, "loss_xval": 0.578125, "num_input_tokens_seen": 200616324, "step": 3200 }, { "epoch": 10.6522462562396, "grad_norm": 21.973159790039062, "learning_rate": 5e-06, "loss": 0.4648, "num_input_tokens_seen": 200678852, "step": 3201 }, { "epoch": 10.6522462562396, "loss": 0.3732374310493469, "loss_ce": 0.000129519437905401, "loss_iou": 0.1328125, "loss_num": 0.0213623046875, "loss_xval": 0.373046875, "num_input_tokens_seen": 200678852, "step": 3201 }, { "epoch": 10.65557404326123, "grad_norm": 25.09562110900879, "learning_rate": 5e-06, "loss": 0.6374, "num_input_tokens_seen": 200741980, "step": 3202 }, { "epoch": 10.65557404326123, "loss": 0.6488202214241028, "loss_ce": 1.2852623285652953e-06, "loss_iou": 0.263671875, "loss_num": 0.024169921875, "loss_xval": 0.6484375, "num_input_tokens_seen": 200741980, "step": 3202 }, { "epoch": 10.658901830282861, "grad_norm": 28.943723678588867, "learning_rate": 5e-06, "loss": 0.3993, "num_input_tokens_seen": 200804536, "step": 3203 }, { "epoch": 10.658901830282861, "loss": 0.4503249526023865, "loss_ce": 0.0001296316331718117, "loss_iou": 0.185546875, "loss_num": 0.0159912109375, "loss_xval": 0.44921875, "num_input_tokens_seen": 200804536, "step": 3203 }, { "epoch": 10.662229617304492, "grad_norm": 19.329702377319336, "learning_rate": 5e-06, "loss": 0.5334, "num_input_tokens_seen": 200865168, "step": 3204 }, { "epoch": 10.662229617304492, "loss": 0.4484238028526306, "loss_ce": 5.9564266848610714e-05, "loss_iou": 0.126953125, "loss_num": 0.03857421875, "loss_xval": 0.44921875, "num_input_tokens_seen": 200865168, "step": 3204 }, { "epoch": 10.665557404326123, "grad_norm": 5.265035152435303, "learning_rate": 5e-06, "loss": 0.3153, "num_input_tokens_seen": 200926840, "step": 3205 }, { "epoch": 10.665557404326123, "loss": 0.24121254682540894, "loss_ce": 1.61381956331752e-06, "loss_iou": 0.07763671875, "loss_num": 0.0172119140625, "loss_xval": 0.2412109375, "num_input_tokens_seen": 200926840, "step": 3205 }, { "epoch": 10.668885191347753, "grad_norm": 9.888785362243652, "learning_rate": 5e-06, "loss": 0.3634, "num_input_tokens_seen": 200988760, "step": 3206 }, { "epoch": 10.668885191347753, "loss": 0.3609628677368164, "loss_ce": 9.510457630312885e-07, "loss_iou": 0.140625, "loss_num": 0.015869140625, "loss_xval": 0.361328125, "num_input_tokens_seen": 200988760, "step": 3206 }, { "epoch": 10.672212978369384, "grad_norm": 11.212008476257324, "learning_rate": 5e-06, "loss": 0.7029, "num_input_tokens_seen": 201052192, "step": 3207 }, { "epoch": 10.672212978369384, "loss": 0.8856221437454224, "loss_ce": 1.9921765215258347e-06, "loss_iou": 0.328125, "loss_num": 0.0458984375, "loss_xval": 0.88671875, "num_input_tokens_seen": 201052192, "step": 3207 }, { "epoch": 10.675540765391014, "grad_norm": 19.561853408813477, "learning_rate": 5e-06, "loss": 0.4965, "num_input_tokens_seen": 201115988, "step": 3208 }, { "epoch": 10.675540765391014, "loss": 0.45325347781181335, "loss_ce": 6.408513399946969e-06, "loss_iou": 0.17578125, "loss_num": 0.0205078125, "loss_xval": 0.453125, "num_input_tokens_seen": 201115988, "step": 3208 }, { "epoch": 10.678868552412645, "grad_norm": 17.706567764282227, "learning_rate": 5e-06, "loss": 0.3854, "num_input_tokens_seen": 201179252, "step": 3209 }, { "epoch": 10.678868552412645, "loss": 0.39864641427993774, "loss_ce": 0.00014789022679906338, "loss_iou": 0.1455078125, "loss_num": 0.0213623046875, "loss_xval": 0.3984375, "num_input_tokens_seen": 201179252, "step": 3209 }, { "epoch": 10.682196339434276, "grad_norm": 24.22810935974121, "learning_rate": 5e-06, "loss": 0.5096, "num_input_tokens_seen": 201241864, "step": 3210 }, { "epoch": 10.682196339434276, "loss": 0.4078432023525238, "loss_ce": 6.284335540840402e-06, "loss_iou": 0.162109375, "loss_num": 0.016845703125, "loss_xval": 0.408203125, "num_input_tokens_seen": 201241864, "step": 3210 }, { "epoch": 10.685524126455906, "grad_norm": 30.552120208740234, "learning_rate": 5e-06, "loss": 0.5508, "num_input_tokens_seen": 201304716, "step": 3211 }, { "epoch": 10.685524126455906, "loss": 0.5156716108322144, "loss_ce": 4.6629000280518085e-05, "loss_iou": 0.1943359375, "loss_num": 0.0255126953125, "loss_xval": 0.515625, "num_input_tokens_seen": 201304716, "step": 3211 }, { "epoch": 10.688851913477537, "grad_norm": 39.056640625, "learning_rate": 5e-06, "loss": 0.5201, "num_input_tokens_seen": 201367852, "step": 3212 }, { "epoch": 10.688851913477537, "loss": 0.5144665837287903, "loss_ce": 1.2665874464801163e-06, "loss_iou": 0.21484375, "loss_num": 0.0167236328125, "loss_xval": 0.515625, "num_input_tokens_seen": 201367852, "step": 3212 }, { "epoch": 10.692179700499167, "grad_norm": 120.22439575195312, "learning_rate": 5e-06, "loss": 0.4706, "num_input_tokens_seen": 201432100, "step": 3213 }, { "epoch": 10.692179700499167, "loss": 0.46857044100761414, "loss_ce": 3.528039997036103e-06, "loss_iou": 0.177734375, "loss_num": 0.0223388671875, "loss_xval": 0.46875, "num_input_tokens_seen": 201432100, "step": 3213 }, { "epoch": 10.695507487520798, "grad_norm": 38.853660583496094, "learning_rate": 5e-06, "loss": 0.6417, "num_input_tokens_seen": 201495160, "step": 3214 }, { "epoch": 10.695507487520798, "loss": 0.6792020797729492, "loss_ce": 2.8723725336021744e-06, "loss_iou": 0.2392578125, "loss_num": 0.039794921875, "loss_xval": 0.6796875, "num_input_tokens_seen": 201495160, "step": 3214 }, { "epoch": 10.698835274542429, "grad_norm": 41.596500396728516, "learning_rate": 5e-06, "loss": 0.5606, "num_input_tokens_seen": 201557900, "step": 3215 }, { "epoch": 10.698835274542429, "loss": 0.49036189913749695, "loss_ce": 5.448148840514477e-06, "loss_iou": 0.162109375, "loss_num": 0.032958984375, "loss_xval": 0.490234375, "num_input_tokens_seen": 201557900, "step": 3215 }, { "epoch": 10.70216306156406, "grad_norm": 33.32130432128906, "learning_rate": 5e-06, "loss": 0.7592, "num_input_tokens_seen": 201621392, "step": 3216 }, { "epoch": 10.70216306156406, "loss": 0.7030688524246216, "loss_ce": 0.00018802333215717226, "loss_iou": 0.298828125, "loss_num": 0.0208740234375, "loss_xval": 0.703125, "num_input_tokens_seen": 201621392, "step": 3216 }, { "epoch": 10.70549084858569, "grad_norm": 15.807605743408203, "learning_rate": 5e-06, "loss": 0.6973, "num_input_tokens_seen": 201684948, "step": 3217 }, { "epoch": 10.70549084858569, "loss": 0.6929945945739746, "loss_ce": 1.3989454146212665e-06, "loss_iou": 0.2470703125, "loss_num": 0.039794921875, "loss_xval": 0.69140625, "num_input_tokens_seen": 201684948, "step": 3217 }, { "epoch": 10.70881863560732, "grad_norm": 10.403249740600586, "learning_rate": 5e-06, "loss": 0.4986, "num_input_tokens_seen": 201747516, "step": 3218 }, { "epoch": 10.70881863560732, "loss": 0.5282003879547119, "loss_ce": 2.1590926735370886e-06, "loss_iou": 0.2041015625, "loss_num": 0.02392578125, "loss_xval": 0.52734375, "num_input_tokens_seen": 201747516, "step": 3218 }, { "epoch": 10.712146422628951, "grad_norm": 10.333413124084473, "learning_rate": 5e-06, "loss": 0.3624, "num_input_tokens_seen": 201808332, "step": 3219 }, { "epoch": 10.712146422628951, "loss": 0.49318575859069824, "loss_ce": 0.0003879130817949772, "loss_iou": 0.177734375, "loss_num": 0.0274658203125, "loss_xval": 0.4921875, "num_input_tokens_seen": 201808332, "step": 3219 }, { "epoch": 10.715474209650582, "grad_norm": 9.031089782714844, "learning_rate": 5e-06, "loss": 0.4294, "num_input_tokens_seen": 201871468, "step": 3220 }, { "epoch": 10.715474209650582, "loss": 0.412561297416687, "loss_ce": 0.00020778155885636806, "loss_iou": 0.1748046875, "loss_num": 0.012451171875, "loss_xval": 0.412109375, "num_input_tokens_seen": 201871468, "step": 3220 }, { "epoch": 10.718801996672212, "grad_norm": 5.487795352935791, "learning_rate": 5e-06, "loss": 0.472, "num_input_tokens_seen": 201934344, "step": 3221 }, { "epoch": 10.718801996672212, "loss": 0.5174592137336731, "loss_ce": 0.0003083410847466439, "loss_iou": 0.1875, "loss_num": 0.0284423828125, "loss_xval": 0.515625, "num_input_tokens_seen": 201934344, "step": 3221 }, { "epoch": 10.722129783693843, "grad_norm": 19.773242950439453, "learning_rate": 5e-06, "loss": 0.5409, "num_input_tokens_seen": 201998060, "step": 3222 }, { "epoch": 10.722129783693843, "loss": 0.5085470676422119, "loss_ce": 2.1468156319315312e-06, "loss_iou": 0.1806640625, "loss_num": 0.0294189453125, "loss_xval": 0.5078125, "num_input_tokens_seen": 201998060, "step": 3222 }, { "epoch": 10.725457570715474, "grad_norm": 21.78604507446289, "learning_rate": 5e-06, "loss": 0.4888, "num_input_tokens_seen": 202059676, "step": 3223 }, { "epoch": 10.725457570715474, "loss": 0.2922218441963196, "loss_ce": 4.6570694394176826e-05, "loss_iou": 0.1044921875, "loss_num": 0.0167236328125, "loss_xval": 0.29296875, "num_input_tokens_seen": 202059676, "step": 3223 }, { "epoch": 10.728785357737104, "grad_norm": 22.984146118164062, "learning_rate": 5e-06, "loss": 0.4171, "num_input_tokens_seen": 202122632, "step": 3224 }, { "epoch": 10.728785357737104, "loss": 0.5228381156921387, "loss_ce": 0.0006213147426024079, "loss_iou": 0.189453125, "loss_num": 0.02880859375, "loss_xval": 0.5234375, "num_input_tokens_seen": 202122632, "step": 3224 }, { "epoch": 10.732113144758735, "grad_norm": 9.722721099853516, "learning_rate": 5e-06, "loss": 0.489, "num_input_tokens_seen": 202185020, "step": 3225 }, { "epoch": 10.732113144758735, "loss": 0.5637834668159485, "loss_ce": 3.2249256037175655e-05, "loss_iou": 0.2109375, "loss_num": 0.0284423828125, "loss_xval": 0.5625, "num_input_tokens_seen": 202185020, "step": 3225 }, { "epoch": 10.735440931780365, "grad_norm": 17.096384048461914, "learning_rate": 5e-06, "loss": 0.4397, "num_input_tokens_seen": 202248316, "step": 3226 }, { "epoch": 10.735440931780365, "loss": 0.36713075637817383, "loss_ce": 4.286400326236617e-06, "loss_iou": 0.1201171875, "loss_num": 0.025390625, "loss_xval": 0.3671875, "num_input_tokens_seen": 202248316, "step": 3226 }, { "epoch": 10.738768718801996, "grad_norm": 18.449420928955078, "learning_rate": 5e-06, "loss": 0.5932, "num_input_tokens_seen": 202310384, "step": 3227 }, { "epoch": 10.738768718801996, "loss": 0.7733438014984131, "loss_ce": 0.00039459351683035493, "loss_iou": 0.2412109375, "loss_num": 0.05810546875, "loss_xval": 0.7734375, "num_input_tokens_seen": 202310384, "step": 3227 }, { "epoch": 10.742096505823627, "grad_norm": 38.62018966674805, "learning_rate": 5e-06, "loss": 0.4747, "num_input_tokens_seen": 202371776, "step": 3228 }, { "epoch": 10.742096505823627, "loss": 0.3140300512313843, "loss_ce": 4.1566495383449364e-06, "loss_iou": 0.09814453125, "loss_num": 0.0235595703125, "loss_xval": 0.314453125, "num_input_tokens_seen": 202371776, "step": 3228 }, { "epoch": 10.745424292845257, "grad_norm": 26.273462295532227, "learning_rate": 5e-06, "loss": 0.419, "num_input_tokens_seen": 202434116, "step": 3229 }, { "epoch": 10.745424292845257, "loss": 0.26697850227355957, "loss_ce": 1.0720829777710605e-05, "loss_iou": 0.06494140625, "loss_num": 0.0274658203125, "loss_xval": 0.267578125, "num_input_tokens_seen": 202434116, "step": 3229 }, { "epoch": 10.748752079866888, "grad_norm": 11.736581802368164, "learning_rate": 5e-06, "loss": 0.4031, "num_input_tokens_seen": 202495968, "step": 3230 }, { "epoch": 10.748752079866888, "loss": 0.28161680698394775, "loss_ce": 6.136577326287806e-07, "loss_iou": 0.10205078125, "loss_num": 0.01556396484375, "loss_xval": 0.28125, "num_input_tokens_seen": 202495968, "step": 3230 }, { "epoch": 10.752079866888518, "grad_norm": 11.263690948486328, "learning_rate": 5e-06, "loss": 0.3931, "num_input_tokens_seen": 202559328, "step": 3231 }, { "epoch": 10.752079866888518, "loss": 0.24508076906204224, "loss_ce": 0.0020387666299939156, "loss_iou": 0.06494140625, "loss_num": 0.022705078125, "loss_xval": 0.2431640625, "num_input_tokens_seen": 202559328, "step": 3231 }, { "epoch": 10.755407653910149, "grad_norm": 11.104879379272461, "learning_rate": 5e-06, "loss": 0.6702, "num_input_tokens_seen": 202621520, "step": 3232 }, { "epoch": 10.755407653910149, "loss": 0.8420716524124146, "loss_ce": 0.00036630340036936104, "loss_iou": 0.294921875, "loss_num": 0.050537109375, "loss_xval": 0.83984375, "num_input_tokens_seen": 202621520, "step": 3232 }, { "epoch": 10.75873544093178, "grad_norm": 13.672425270080566, "learning_rate": 5e-06, "loss": 0.5873, "num_input_tokens_seen": 202684748, "step": 3233 }, { "epoch": 10.75873544093178, "loss": 0.5370587110519409, "loss_ce": 1.0393386219220702e-05, "loss_iou": 0.220703125, "loss_num": 0.0189208984375, "loss_xval": 0.53515625, "num_input_tokens_seen": 202684748, "step": 3233 }, { "epoch": 10.76206322795341, "grad_norm": 10.926619529724121, "learning_rate": 5e-06, "loss": 0.4028, "num_input_tokens_seen": 202747088, "step": 3234 }, { "epoch": 10.76206322795341, "loss": 0.3883204460144043, "loss_ce": 0.00038100697565823793, "loss_iou": 0.154296875, "loss_num": 0.01611328125, "loss_xval": 0.388671875, "num_input_tokens_seen": 202747088, "step": 3234 }, { "epoch": 10.765391014975041, "grad_norm": 12.71107292175293, "learning_rate": 5e-06, "loss": 0.5013, "num_input_tokens_seen": 202809672, "step": 3235 }, { "epoch": 10.765391014975041, "loss": 0.4365600645542145, "loss_ce": 0.00015868655464146286, "loss_iou": 0.1123046875, "loss_num": 0.042236328125, "loss_xval": 0.435546875, "num_input_tokens_seen": 202809672, "step": 3235 }, { "epoch": 10.768718801996672, "grad_norm": 34.576839447021484, "learning_rate": 5e-06, "loss": 0.4722, "num_input_tokens_seen": 202871804, "step": 3236 }, { "epoch": 10.768718801996672, "loss": 0.3540046811103821, "loss_ce": 7.776924348945613e-07, "loss_iou": 0.1298828125, "loss_num": 0.018798828125, "loss_xval": 0.353515625, "num_input_tokens_seen": 202871804, "step": 3236 }, { "epoch": 10.772046589018302, "grad_norm": 41.7801399230957, "learning_rate": 5e-06, "loss": 0.6519, "num_input_tokens_seen": 202934432, "step": 3237 }, { "epoch": 10.772046589018302, "loss": 0.6197620630264282, "loss_ce": 0.0009876032127067447, "loss_iou": 0.26953125, "loss_num": 0.0159912109375, "loss_xval": 0.6171875, "num_input_tokens_seen": 202934432, "step": 3237 }, { "epoch": 10.775374376039933, "grad_norm": 37.027130126953125, "learning_rate": 5e-06, "loss": 0.5055, "num_input_tokens_seen": 202996436, "step": 3238 }, { "epoch": 10.775374376039933, "loss": 0.3240392208099365, "loss_ce": 0.0007970117731019855, "loss_iou": 0.11376953125, "loss_num": 0.0191650390625, "loss_xval": 0.32421875, "num_input_tokens_seen": 202996436, "step": 3238 }, { "epoch": 10.778702163061563, "grad_norm": 40.41749572753906, "learning_rate": 5e-06, "loss": 0.5796, "num_input_tokens_seen": 203059600, "step": 3239 }, { "epoch": 10.778702163061563, "loss": 0.5472980737686157, "loss_ce": 5.687751399818808e-05, "loss_iou": 0.2216796875, "loss_num": 0.0208740234375, "loss_xval": 0.546875, "num_input_tokens_seen": 203059600, "step": 3239 }, { "epoch": 10.782029950083194, "grad_norm": 29.158180236816406, "learning_rate": 5e-06, "loss": 0.5602, "num_input_tokens_seen": 203121024, "step": 3240 }, { "epoch": 10.782029950083194, "loss": 0.7230173945426941, "loss_ce": 0.00011700851609930396, "loss_iou": 0.296875, "loss_num": 0.02587890625, "loss_xval": 0.72265625, "num_input_tokens_seen": 203121024, "step": 3240 }, { "epoch": 10.785357737104825, "grad_norm": 33.21638107299805, "learning_rate": 5e-06, "loss": 0.6427, "num_input_tokens_seen": 203182688, "step": 3241 }, { "epoch": 10.785357737104825, "loss": 0.4714270234107971, "loss_ce": 5.2501203754218295e-05, "loss_iou": 0.1796875, "loss_num": 0.022216796875, "loss_xval": 0.470703125, "num_input_tokens_seen": 203182688, "step": 3241 }, { "epoch": 10.788685524126455, "grad_norm": 21.46150779724121, "learning_rate": 5e-06, "loss": 0.4027, "num_input_tokens_seen": 203243708, "step": 3242 }, { "epoch": 10.788685524126455, "loss": 0.2962700128555298, "loss_ce": 5.379179128794931e-06, "loss_iou": 0.08447265625, "loss_num": 0.0255126953125, "loss_xval": 0.296875, "num_input_tokens_seen": 203243708, "step": 3242 }, { "epoch": 10.792013311148086, "grad_norm": 6.62225866317749, "learning_rate": 5e-06, "loss": 0.3734, "num_input_tokens_seen": 203305792, "step": 3243 }, { "epoch": 10.792013311148086, "loss": 0.46657416224479675, "loss_ce": 0.00014348202967084944, "loss_iou": 0.1767578125, "loss_num": 0.0224609375, "loss_xval": 0.466796875, "num_input_tokens_seen": 203305792, "step": 3243 }, { "epoch": 10.795341098169716, "grad_norm": 17.803699493408203, "learning_rate": 5e-06, "loss": 0.4822, "num_input_tokens_seen": 203365592, "step": 3244 }, { "epoch": 10.795341098169716, "loss": 0.6164565682411194, "loss_ce": 1.452749870622938e-06, "loss_iou": 0.197265625, "loss_num": 0.04443359375, "loss_xval": 0.6171875, "num_input_tokens_seen": 203365592, "step": 3244 }, { "epoch": 10.798668885191347, "grad_norm": 21.75189781188965, "learning_rate": 5e-06, "loss": 0.362, "num_input_tokens_seen": 203429136, "step": 3245 }, { "epoch": 10.798668885191347, "loss": 0.2635517418384552, "loss_ce": 1.9474832697596867e-06, "loss_iou": 0.0859375, "loss_num": 0.0181884765625, "loss_xval": 0.263671875, "num_input_tokens_seen": 203429136, "step": 3245 }, { "epoch": 10.801996672212978, "grad_norm": 30.185544967651367, "learning_rate": 5e-06, "loss": 0.8818, "num_input_tokens_seen": 203492904, "step": 3246 }, { "epoch": 10.801996672212978, "loss": 0.8896151781082153, "loss_ce": 2.771599974948913e-05, "loss_iou": 0.33203125, "loss_num": 0.045166015625, "loss_xval": 0.890625, "num_input_tokens_seen": 203492904, "step": 3246 }, { "epoch": 10.805324459234608, "grad_norm": 22.433570861816406, "learning_rate": 5e-06, "loss": 0.4476, "num_input_tokens_seen": 203555696, "step": 3247 }, { "epoch": 10.805324459234608, "loss": 0.6403331756591797, "loss_ce": 0.000654177158139646, "loss_iou": 0.2333984375, "loss_num": 0.034912109375, "loss_xval": 0.640625, "num_input_tokens_seen": 203555696, "step": 3247 }, { "epoch": 10.808652246256239, "grad_norm": 14.813493728637695, "learning_rate": 5e-06, "loss": 0.4956, "num_input_tokens_seen": 203617952, "step": 3248 }, { "epoch": 10.808652246256239, "loss": 0.37531745433807373, "loss_ce": 0.0003479677834548056, "loss_iou": 0.1083984375, "loss_num": 0.03173828125, "loss_xval": 0.375, "num_input_tokens_seen": 203617952, "step": 3248 }, { "epoch": 10.81198003327787, "grad_norm": 17.64182472229004, "learning_rate": 5e-06, "loss": 0.5013, "num_input_tokens_seen": 203681380, "step": 3249 }, { "epoch": 10.81198003327787, "loss": 0.5271783471107483, "loss_ce": 1.7695874703349546e-05, "loss_iou": 0.224609375, "loss_num": 0.0155029296875, "loss_xval": 0.52734375, "num_input_tokens_seen": 203681380, "step": 3249 }, { "epoch": 10.8153078202995, "grad_norm": 15.654128074645996, "learning_rate": 5e-06, "loss": 0.6485, "num_input_tokens_seen": 203745148, "step": 3250 }, { "epoch": 10.8153078202995, "eval_seeclick_CIoU": 0.056656209751963615, "eval_seeclick_GIoU": 0.06025896035134792, "eval_seeclick_IoU": 0.17274213582277298, "eval_seeclick_MAE_all": 0.1677640825510025, "eval_seeclick_MAE_h": 0.05629223212599754, "eval_seeclick_MAE_w": 0.13848064094781876, "eval_seeclick_MAE_x_boxes": 0.19955745339393616, "eval_seeclick_MAE_y_boxes": 0.1762779951095581, "eval_seeclick_NUM_probability": 0.9999544322490692, "eval_seeclick_inside_bbox": 0.19375000149011612, "eval_seeclick_loss": 2.8975889682769775, "eval_seeclick_loss_ce": 0.1650034710764885, "eval_seeclick_loss_iou": 0.95263671875, "eval_seeclick_loss_num": 0.1708221435546875, "eval_seeclick_loss_xval": 2.76025390625, "eval_seeclick_runtime": 72.0769, "eval_seeclick_samples_per_second": 0.652, "eval_seeclick_steps_per_second": 0.028, "num_input_tokens_seen": 203745148, "step": 3250 }, { "epoch": 10.8153078202995, "eval_icons_CIoU": -0.03681820537894964, "eval_icons_GIoU": 0.04891681671142578, "eval_icons_IoU": 0.12414276599884033, "eval_icons_MAE_all": 0.17678464204072952, "eval_icons_MAE_h": 0.1401018127799034, "eval_icons_MAE_w": 0.17375428974628448, "eval_icons_MAE_x_boxes": 0.1361026018857956, "eval_icons_MAE_y_boxes": 0.090394776314497, "eval_icons_NUM_probability": 0.9999855756759644, "eval_icons_inside_bbox": 0.3072916716337204, "eval_icons_loss": 2.756878137588501, "eval_icons_loss_ce": 3.795301154241315e-06, "eval_icons_loss_iou": 0.951904296875, "eval_icons_loss_num": 0.1739959716796875, "eval_icons_loss_xval": 2.775390625, "eval_icons_runtime": 73.15, "eval_icons_samples_per_second": 0.684, "eval_icons_steps_per_second": 0.027, "num_input_tokens_seen": 203745148, "step": 3250 }, { "epoch": 10.8153078202995, "eval_screenspot_CIoU": 0.18034246812264124, "eval_screenspot_GIoU": 0.21565457681814829, "eval_screenspot_IoU": 0.2895529766877492, "eval_screenspot_MAE_all": 0.1154794047276179, "eval_screenspot_MAE_h": 0.07627355555693309, "eval_screenspot_MAE_w": 0.09403730928897858, "eval_screenspot_MAE_x_boxes": 0.15675128002961478, "eval_screenspot_MAE_y_boxes": 0.07865347961584727, "eval_screenspot_NUM_probability": 0.999988853931427, "eval_screenspot_inside_bbox": 0.512500007947286, "eval_screenspot_loss": 2.1868228912353516, "eval_screenspot_loss_ce": 2.3794259201774064e-05, "eval_screenspot_loss_iou": 0.8028971354166666, "eval_screenspot_loss_num": 0.1273040771484375, "eval_screenspot_loss_xval": 2.2425130208333335, "eval_screenspot_runtime": 116.4545, "eval_screenspot_samples_per_second": 0.764, "eval_screenspot_steps_per_second": 0.026, "num_input_tokens_seen": 203745148, "step": 3250 }, { "epoch": 10.8153078202995, "eval_compot_CIoU": 0.049945903941988945, "eval_compot_GIoU": 0.1016257293522358, "eval_compot_IoU": 0.1970420628786087, "eval_compot_MAE_all": 0.16960866749286652, "eval_compot_MAE_h": 0.0800932114943862, "eval_compot_MAE_w": 0.19288084656000137, "eval_compot_MAE_x_boxes": 0.14919423311948776, "eval_compot_MAE_y_boxes": 0.12724914029240608, "eval_compot_NUM_probability": 0.9999901056289673, "eval_compot_inside_bbox": 0.2986111119389534, "eval_compot_loss": 2.6252388954162598, "eval_compot_loss_ce": 0.0016628538724035025, "eval_compot_loss_iou": 0.904052734375, "eval_compot_loss_num": 0.1769084930419922, "eval_compot_loss_xval": 2.6943359375, "eval_compot_runtime": 69.736, "eval_compot_samples_per_second": 0.717, "eval_compot_steps_per_second": 0.029, "num_input_tokens_seen": 203745148, "step": 3250 }, { "epoch": 10.8153078202995, "eval_custom_ui_MAE_all": 0.06346129439771175, "eval_custom_ui_MAE_x": 0.06945383176207542, "eval_custom_ui_MAE_y": 0.057468755170702934, "eval_custom_ui_NUM_probability": 0.9999981224536896, "eval_custom_ui_loss": 0.2966082990169525, "eval_custom_ui_loss_ce": 5.5326763686025515e-06, "eval_custom_ui_loss_num": 0.0627899169921875, "eval_custom_ui_loss_xval": 0.313751220703125, "eval_custom_ui_runtime": 51.7514, "eval_custom_ui_samples_per_second": 0.966, "eval_custom_ui_steps_per_second": 0.039, "num_input_tokens_seen": 203745148, "step": 3250 }, { "epoch": 10.8153078202995, "loss": 0.32959598302841187, "loss_ce": 6.142689016996883e-06, "loss_iou": 0.0, "loss_num": 0.06591796875, "loss_xval": 0.330078125, "num_input_tokens_seen": 203745148, "step": 3250 }, { "epoch": 10.81863560732113, "grad_norm": 19.500164031982422, "learning_rate": 5e-06, "loss": 0.4793, "num_input_tokens_seen": 203806948, "step": 3251 }, { "epoch": 10.81863560732113, "loss": 0.36294740438461304, "loss_ce": 1.8342342400501366e-06, "loss_iou": 0.13671875, "loss_num": 0.017822265625, "loss_xval": 0.36328125, "num_input_tokens_seen": 203806948, "step": 3251 }, { "epoch": 10.821963394342761, "grad_norm": 23.915363311767578, "learning_rate": 5e-06, "loss": 0.6391, "num_input_tokens_seen": 203869148, "step": 3252 }, { "epoch": 10.821963394342761, "loss": 0.691957950592041, "loss_ce": 2.406623025308363e-06, "loss_iou": 0.23046875, "loss_num": 0.04638671875, "loss_xval": 0.69140625, "num_input_tokens_seen": 203869148, "step": 3252 }, { "epoch": 10.825291181364392, "grad_norm": 9.050357818603516, "learning_rate": 5e-06, "loss": 0.3968, "num_input_tokens_seen": 203931176, "step": 3253 }, { "epoch": 10.825291181364392, "loss": 0.3356984555721283, "loss_ce": 6.613643927266821e-05, "loss_iou": 0.1396484375, "loss_num": 0.01141357421875, "loss_xval": 0.3359375, "num_input_tokens_seen": 203931176, "step": 3253 }, { "epoch": 10.828618968386023, "grad_norm": 22.97841453552246, "learning_rate": 5e-06, "loss": 0.5462, "num_input_tokens_seen": 203994920, "step": 3254 }, { "epoch": 10.828618968386023, "loss": 0.7527483105659485, "loss_ce": 1.7116703929787036e-06, "loss_iou": 0.3125, "loss_num": 0.02587890625, "loss_xval": 0.75390625, "num_input_tokens_seen": 203994920, "step": 3254 }, { "epoch": 10.831946755407653, "grad_norm": 28.681312561035156, "learning_rate": 5e-06, "loss": 0.5234, "num_input_tokens_seen": 204059004, "step": 3255 }, { "epoch": 10.831946755407653, "loss": 0.6167087554931641, "loss_ce": 9.52705795498332e-06, "loss_iou": 0.267578125, "loss_num": 0.01611328125, "loss_xval": 0.6171875, "num_input_tokens_seen": 204059004, "step": 3255 }, { "epoch": 10.835274542429284, "grad_norm": 5.087278842926025, "learning_rate": 5e-06, "loss": 0.3637, "num_input_tokens_seen": 204121656, "step": 3256 }, { "epoch": 10.835274542429284, "loss": 0.3670705258846283, "loss_ce": 5.097794200992212e-06, "loss_iou": 0.14453125, "loss_num": 0.015625, "loss_xval": 0.3671875, "num_input_tokens_seen": 204121656, "step": 3256 }, { "epoch": 10.838602329450914, "grad_norm": 9.670217514038086, "learning_rate": 5e-06, "loss": 0.5085, "num_input_tokens_seen": 204185792, "step": 3257 }, { "epoch": 10.838602329450914, "loss": 0.5141081213951111, "loss_ce": 8.985247404780239e-06, "loss_iou": 0.1904296875, "loss_num": 0.0267333984375, "loss_xval": 0.515625, "num_input_tokens_seen": 204185792, "step": 3257 }, { "epoch": 10.841930116472545, "grad_norm": 12.542338371276855, "learning_rate": 5e-06, "loss": 0.5124, "num_input_tokens_seen": 204248388, "step": 3258 }, { "epoch": 10.841930116472545, "loss": 0.607423722743988, "loss_ce": 1.854933657341462e-06, "loss_iou": 0.2578125, "loss_num": 0.01806640625, "loss_xval": 0.609375, "num_input_tokens_seen": 204248388, "step": 3258 }, { "epoch": 10.845257903494176, "grad_norm": 25.696374893188477, "learning_rate": 5e-06, "loss": 0.5517, "num_input_tokens_seen": 204311264, "step": 3259 }, { "epoch": 10.845257903494176, "loss": 0.43165290355682373, "loss_ce": 1.2300725757086184e-05, "loss_iou": 0.1708984375, "loss_num": 0.0179443359375, "loss_xval": 0.431640625, "num_input_tokens_seen": 204311264, "step": 3259 }, { "epoch": 10.848585690515806, "grad_norm": 20.952449798583984, "learning_rate": 5e-06, "loss": 0.3591, "num_input_tokens_seen": 204373108, "step": 3260 }, { "epoch": 10.848585690515806, "loss": 0.2205154299736023, "loss_ce": 0.0003005805774591863, "loss_iou": 0.0712890625, "loss_num": 0.0155029296875, "loss_xval": 0.220703125, "num_input_tokens_seen": 204373108, "step": 3260 }, { "epoch": 10.851913477537437, "grad_norm": 7.9592437744140625, "learning_rate": 5e-06, "loss": 0.4141, "num_input_tokens_seen": 204435164, "step": 3261 }, { "epoch": 10.851913477537437, "loss": 0.3361702859401703, "loss_ce": 0.00015651097055524588, "loss_iou": 0.1103515625, "loss_num": 0.0230712890625, "loss_xval": 0.3359375, "num_input_tokens_seen": 204435164, "step": 3261 }, { "epoch": 10.855241264559067, "grad_norm": 9.420581817626953, "learning_rate": 5e-06, "loss": 0.4597, "num_input_tokens_seen": 204497548, "step": 3262 }, { "epoch": 10.855241264559067, "loss": 0.6635773181915283, "loss_ce": 3.054047738260124e-06, "loss_iou": 0.26171875, "loss_num": 0.0284423828125, "loss_xval": 0.6640625, "num_input_tokens_seen": 204497548, "step": 3262 }, { "epoch": 10.858569051580698, "grad_norm": 17.0826358795166, "learning_rate": 5e-06, "loss": 0.5172, "num_input_tokens_seen": 204561076, "step": 3263 }, { "epoch": 10.858569051580698, "loss": 0.42720162868499756, "loss_ce": 1.6577134374529123e-05, "loss_iou": 0.1884765625, "loss_num": 0.00994873046875, "loss_xval": 0.427734375, "num_input_tokens_seen": 204561076, "step": 3263 }, { "epoch": 10.861896838602329, "grad_norm": 10.89204216003418, "learning_rate": 5e-06, "loss": 0.5434, "num_input_tokens_seen": 204623956, "step": 3264 }, { "epoch": 10.861896838602329, "loss": 0.41191911697387695, "loss_ce": 2.3379441699944437e-05, "loss_iou": 0.15234375, "loss_num": 0.0213623046875, "loss_xval": 0.412109375, "num_input_tokens_seen": 204623956, "step": 3264 }, { "epoch": 10.86522462562396, "grad_norm": 13.792790412902832, "learning_rate": 5e-06, "loss": 0.5102, "num_input_tokens_seen": 204687624, "step": 3265 }, { "epoch": 10.86522462562396, "loss": 0.5496379137039185, "loss_ce": 0.0001994360500248149, "loss_iou": 0.1904296875, "loss_num": 0.033447265625, "loss_xval": 0.55078125, "num_input_tokens_seen": 204687624, "step": 3265 }, { "epoch": 10.86855241264559, "grad_norm": 30.16346549987793, "learning_rate": 5e-06, "loss": 0.5641, "num_input_tokens_seen": 204750768, "step": 3266 }, { "epoch": 10.86855241264559, "loss": 0.46053346991539, "loss_ce": 0.000694595102686435, "loss_iou": 0.1845703125, "loss_num": 0.01806640625, "loss_xval": 0.458984375, "num_input_tokens_seen": 204750768, "step": 3266 }, { "epoch": 10.87188019966722, "grad_norm": 30.0433292388916, "learning_rate": 5e-06, "loss": 0.4097, "num_input_tokens_seen": 204813684, "step": 3267 }, { "epoch": 10.87188019966722, "loss": 0.6063628792762756, "loss_ce": 0.00028377078706398606, "loss_iou": 0.21484375, "loss_num": 0.03515625, "loss_xval": 0.60546875, "num_input_tokens_seen": 204813684, "step": 3267 }, { "epoch": 10.875207986688851, "grad_norm": 13.966445922851562, "learning_rate": 5e-06, "loss": 0.4036, "num_input_tokens_seen": 204876328, "step": 3268 }, { "epoch": 10.875207986688851, "loss": 0.2893088161945343, "loss_ce": 2.159887571906438e-06, "loss_iou": 0.08837890625, "loss_num": 0.0225830078125, "loss_xval": 0.2890625, "num_input_tokens_seen": 204876328, "step": 3268 }, { "epoch": 10.878535773710482, "grad_norm": 21.4516544342041, "learning_rate": 5e-06, "loss": 0.6255, "num_input_tokens_seen": 204938652, "step": 3269 }, { "epoch": 10.878535773710482, "loss": 0.8481483459472656, "loss_ce": 3.877681592712179e-06, "loss_iou": 0.326171875, "loss_num": 0.0390625, "loss_xval": 0.84765625, "num_input_tokens_seen": 204938652, "step": 3269 }, { "epoch": 10.881863560732112, "grad_norm": 7.414470195770264, "learning_rate": 5e-06, "loss": 0.4167, "num_input_tokens_seen": 205001616, "step": 3270 }, { "epoch": 10.881863560732112, "loss": 0.5581079721450806, "loss_ce": 2.5527224352117628e-06, "loss_iou": 0.212890625, "loss_num": 0.026611328125, "loss_xval": 0.55859375, "num_input_tokens_seen": 205001616, "step": 3270 }, { "epoch": 10.885191347753743, "grad_norm": 21.355422973632812, "learning_rate": 5e-06, "loss": 0.5965, "num_input_tokens_seen": 205065792, "step": 3271 }, { "epoch": 10.885191347753743, "loss": 0.7156021595001221, "loss_ce": 0.0002701705088838935, "loss_iou": 0.294921875, "loss_num": 0.025390625, "loss_xval": 0.71484375, "num_input_tokens_seen": 205065792, "step": 3271 }, { "epoch": 10.888519134775374, "grad_norm": 36.2135009765625, "learning_rate": 5e-06, "loss": 0.6426, "num_input_tokens_seen": 205126020, "step": 3272 }, { "epoch": 10.888519134775374, "loss": 0.6474828124046326, "loss_ce": 0.0007390384562313557, "loss_iou": 0.2373046875, "loss_num": 0.0341796875, "loss_xval": 0.6484375, "num_input_tokens_seen": 205126020, "step": 3272 }, { "epoch": 10.891846921797004, "grad_norm": 29.724729537963867, "learning_rate": 5e-06, "loss": 0.4874, "num_input_tokens_seen": 205188536, "step": 3273 }, { "epoch": 10.891846921797004, "loss": 0.4392111301422119, "loss_ce": 2.170806055801222e-06, "loss_iou": 0.171875, "loss_num": 0.0191650390625, "loss_xval": 0.439453125, "num_input_tokens_seen": 205188536, "step": 3273 }, { "epoch": 10.895174708818635, "grad_norm": 23.0649471282959, "learning_rate": 5e-06, "loss": 0.6723, "num_input_tokens_seen": 205251868, "step": 3274 }, { "epoch": 10.895174708818635, "loss": 0.7898342609405518, "loss_ce": 3.936688881367445e-05, "loss_iou": 0.328125, "loss_num": 0.027099609375, "loss_xval": 0.7890625, "num_input_tokens_seen": 205251868, "step": 3274 }, { "epoch": 10.898502495840265, "grad_norm": 30.016801834106445, "learning_rate": 5e-06, "loss": 0.423, "num_input_tokens_seen": 205314968, "step": 3275 }, { "epoch": 10.898502495840265, "loss": 0.3730838894844055, "loss_ce": 0.0001590859319549054, "loss_iou": 0.1484375, "loss_num": 0.01513671875, "loss_xval": 0.373046875, "num_input_tokens_seen": 205314968, "step": 3275 }, { "epoch": 10.901830282861896, "grad_norm": 8.61716079711914, "learning_rate": 5e-06, "loss": 0.42, "num_input_tokens_seen": 205377872, "step": 3276 }, { "epoch": 10.901830282861896, "loss": 0.48672643303871155, "loss_ce": 1.586046778356831e-06, "loss_iou": 0.19140625, "loss_num": 0.020751953125, "loss_xval": 0.486328125, "num_input_tokens_seen": 205377872, "step": 3276 }, { "epoch": 10.905158069883527, "grad_norm": 7.146363258361816, "learning_rate": 5e-06, "loss": 0.5403, "num_input_tokens_seen": 205440712, "step": 3277 }, { "epoch": 10.905158069883527, "loss": 0.5486406683921814, "loss_ce": 0.0003618651535362005, "loss_iou": 0.2119140625, "loss_num": 0.0247802734375, "loss_xval": 0.546875, "num_input_tokens_seen": 205440712, "step": 3277 }, { "epoch": 10.908485856905157, "grad_norm": 14.710494995117188, "learning_rate": 5e-06, "loss": 0.6044, "num_input_tokens_seen": 205504360, "step": 3278 }, { "epoch": 10.908485856905157, "loss": 0.5494498014450073, "loss_ce": 1.1359736163285561e-05, "loss_iou": 0.21484375, "loss_num": 0.0242919921875, "loss_xval": 0.55078125, "num_input_tokens_seen": 205504360, "step": 3278 }, { "epoch": 10.911813643926788, "grad_norm": 25.678834915161133, "learning_rate": 5e-06, "loss": 0.5216, "num_input_tokens_seen": 205567904, "step": 3279 }, { "epoch": 10.911813643926788, "loss": 0.5419096350669861, "loss_ce": 0.00046674092300236225, "loss_iou": 0.201171875, "loss_num": 0.02783203125, "loss_xval": 0.54296875, "num_input_tokens_seen": 205567904, "step": 3279 }, { "epoch": 10.915141430948418, "grad_norm": 28.51442527770996, "learning_rate": 5e-06, "loss": 0.8048, "num_input_tokens_seen": 205632148, "step": 3280 }, { "epoch": 10.915141430948418, "loss": 0.7815250158309937, "loss_ce": 0.00027503492310643196, "loss_iou": 0.3359375, "loss_num": 0.0218505859375, "loss_xval": 0.78125, "num_input_tokens_seen": 205632148, "step": 3280 }, { "epoch": 10.918469217970049, "grad_norm": 23.239221572875977, "learning_rate": 5e-06, "loss": 0.5748, "num_input_tokens_seen": 205694000, "step": 3281 }, { "epoch": 10.918469217970049, "loss": 0.49702706933021545, "loss_ce": 0.001421620137989521, "loss_iou": 0.1552734375, "loss_num": 0.036865234375, "loss_xval": 0.49609375, "num_input_tokens_seen": 205694000, "step": 3281 }, { "epoch": 10.92179700499168, "grad_norm": 9.778992652893066, "learning_rate": 5e-06, "loss": 0.3526, "num_input_tokens_seen": 205755292, "step": 3282 }, { "epoch": 10.92179700499168, "loss": 0.22308574616909027, "loss_ce": 2.2501233161165146e-06, "loss_iou": 0.0361328125, "loss_num": 0.0301513671875, "loss_xval": 0.22265625, "num_input_tokens_seen": 205755292, "step": 3282 }, { "epoch": 10.92512479201331, "grad_norm": 11.450294494628906, "learning_rate": 5e-06, "loss": 0.4155, "num_input_tokens_seen": 205817344, "step": 3283 }, { "epoch": 10.92512479201331, "loss": 0.30084437131881714, "loss_ce": 2.103481165249832e-06, "loss_iou": 0.09912109375, "loss_num": 0.0205078125, "loss_xval": 0.30078125, "num_input_tokens_seen": 205817344, "step": 3283 }, { "epoch": 10.928452579034941, "grad_norm": 5.760648250579834, "learning_rate": 5e-06, "loss": 0.4445, "num_input_tokens_seen": 205880504, "step": 3284 }, { "epoch": 10.928452579034941, "loss": 0.5923076868057251, "loss_ce": 0.0006329065072350204, "loss_iou": 0.2119140625, "loss_num": 0.03369140625, "loss_xval": 0.58984375, "num_input_tokens_seen": 205880504, "step": 3284 }, { "epoch": 10.931780366056572, "grad_norm": 10.627935409545898, "learning_rate": 5e-06, "loss": 0.3613, "num_input_tokens_seen": 205943392, "step": 3285 }, { "epoch": 10.931780366056572, "loss": 0.2754357159137726, "loss_ce": 0.0004113083705306053, "loss_iou": 0.11376953125, "loss_num": 0.0093994140625, "loss_xval": 0.275390625, "num_input_tokens_seen": 205943392, "step": 3285 }, { "epoch": 10.935108153078202, "grad_norm": 5.765439033508301, "learning_rate": 5e-06, "loss": 0.4205, "num_input_tokens_seen": 206003512, "step": 3286 }, { "epoch": 10.935108153078202, "loss": 0.27417653799057007, "loss_ce": 0.0006780011462979019, "loss_iou": 0.08984375, "loss_num": 0.0186767578125, "loss_xval": 0.2734375, "num_input_tokens_seen": 206003512, "step": 3286 }, { "epoch": 10.938435940099833, "grad_norm": 16.43202018737793, "learning_rate": 5e-06, "loss": 0.5622, "num_input_tokens_seen": 206066308, "step": 3287 }, { "epoch": 10.938435940099833, "loss": 0.6480729579925537, "loss_ce": 1.666274556555436e-06, "loss_iou": 0.2041015625, "loss_num": 0.0478515625, "loss_xval": 0.6484375, "num_input_tokens_seen": 206066308, "step": 3287 }, { "epoch": 10.941763727121465, "grad_norm": 22.203262329101562, "learning_rate": 5e-06, "loss": 0.6136, "num_input_tokens_seen": 206129948, "step": 3288 }, { "epoch": 10.941763727121465, "loss": 0.6691297292709351, "loss_ce": 0.00015385517326649278, "loss_iou": 0.2421875, "loss_num": 0.036865234375, "loss_xval": 0.66796875, "num_input_tokens_seen": 206129948, "step": 3288 }, { "epoch": 10.945091514143094, "grad_norm": 22.110502243041992, "learning_rate": 5e-06, "loss": 0.6091, "num_input_tokens_seen": 206193532, "step": 3289 }, { "epoch": 10.945091514143094, "loss": 0.8260629773139954, "loss_ce": 0.0007455982267856598, "loss_iou": 0.3203125, "loss_num": 0.036865234375, "loss_xval": 0.82421875, "num_input_tokens_seen": 206193532, "step": 3289 }, { "epoch": 10.948419301164726, "grad_norm": 42.004817962646484, "learning_rate": 5e-06, "loss": 0.7121, "num_input_tokens_seen": 206256804, "step": 3290 }, { "epoch": 10.948419301164726, "loss": 1.0056182146072388, "loss_ce": 2.9624491162394406e-06, "loss_iou": 0.361328125, "loss_num": 0.056640625, "loss_xval": 1.0078125, "num_input_tokens_seen": 206256804, "step": 3290 }, { "epoch": 10.951747088186355, "grad_norm": 35.22500228881836, "learning_rate": 5e-06, "loss": 0.4381, "num_input_tokens_seen": 206317916, "step": 3291 }, { "epoch": 10.951747088186355, "loss": 0.3925095498561859, "loss_ce": 0.00017557885439600796, "loss_iou": 0.08203125, "loss_num": 0.045654296875, "loss_xval": 0.392578125, "num_input_tokens_seen": 206317916, "step": 3291 }, { "epoch": 10.955074875207988, "grad_norm": 16.054805755615234, "learning_rate": 5e-06, "loss": 0.3313, "num_input_tokens_seen": 206378220, "step": 3292 }, { "epoch": 10.955074875207988, "loss": 0.31341779232025146, "loss_ce": 2.2512081159220543e-06, "loss_iou": 0.07373046875, "loss_num": 0.033203125, "loss_xval": 0.3125, "num_input_tokens_seen": 206378220, "step": 3292 }, { "epoch": 10.958402662229616, "grad_norm": 14.142621040344238, "learning_rate": 5e-06, "loss": 0.4641, "num_input_tokens_seen": 206441272, "step": 3293 }, { "epoch": 10.958402662229616, "loss": 0.49635446071624756, "loss_ce": 0.00026069776504300535, "loss_iou": 0.1884765625, "loss_num": 0.0240478515625, "loss_xval": 0.49609375, "num_input_tokens_seen": 206441272, "step": 3293 }, { "epoch": 10.961730449251249, "grad_norm": 44.88037109375, "learning_rate": 5e-06, "loss": 0.7772, "num_input_tokens_seen": 206503388, "step": 3294 }, { "epoch": 10.961730449251249, "loss": 0.7530589699745178, "loss_ce": 7.221136002044659e-06, "loss_iou": 0.2294921875, "loss_num": 0.05859375, "loss_xval": 0.75390625, "num_input_tokens_seen": 206503388, "step": 3294 }, { "epoch": 10.965058236272878, "grad_norm": 76.14167785644531, "learning_rate": 5e-06, "loss": 0.6792, "num_input_tokens_seen": 206566156, "step": 3295 }, { "epoch": 10.965058236272878, "loss": 0.8689050674438477, "loss_ce": 8.593149686930701e-06, "loss_iou": 0.359375, "loss_num": 0.030517578125, "loss_xval": 0.8671875, "num_input_tokens_seen": 206566156, "step": 3295 }, { "epoch": 10.96838602329451, "grad_norm": 24.908950805664062, "learning_rate": 5e-06, "loss": 0.6291, "num_input_tokens_seen": 206629236, "step": 3296 }, { "epoch": 10.96838602329451, "loss": 0.511483907699585, "loss_ce": 9.333186426374596e-06, "loss_iou": 0.2080078125, "loss_num": 0.0189208984375, "loss_xval": 0.51171875, "num_input_tokens_seen": 206629236, "step": 3296 }, { "epoch": 10.971713810316139, "grad_norm": 33.68063735961914, "learning_rate": 5e-06, "loss": 0.6719, "num_input_tokens_seen": 206691528, "step": 3297 }, { "epoch": 10.971713810316139, "loss": 0.5567221641540527, "loss_ce": 2.048153692157939e-05, "loss_iou": 0.2119140625, "loss_num": 0.0264892578125, "loss_xval": 0.55859375, "num_input_tokens_seen": 206691528, "step": 3297 }, { "epoch": 10.975041597337771, "grad_norm": 29.158540725708008, "learning_rate": 5e-06, "loss": 0.5439, "num_input_tokens_seen": 206755108, "step": 3298 }, { "epoch": 10.975041597337771, "loss": 0.38196301460266113, "loss_ce": 5.0166595428891014e-06, "loss_iou": 0.154296875, "loss_num": 0.01483154296875, "loss_xval": 0.3828125, "num_input_tokens_seen": 206755108, "step": 3298 }, { "epoch": 10.9783693843594, "grad_norm": 24.4852294921875, "learning_rate": 5e-06, "loss": 0.3807, "num_input_tokens_seen": 206816476, "step": 3299 }, { "epoch": 10.9783693843594, "loss": 0.3983447849750519, "loss_ce": 9.038842108566314e-05, "loss_iou": 0.11376953125, "loss_num": 0.0341796875, "loss_xval": 0.3984375, "num_input_tokens_seen": 206816476, "step": 3299 }, { "epoch": 10.981697171381033, "grad_norm": 17.541980743408203, "learning_rate": 5e-06, "loss": 0.5148, "num_input_tokens_seen": 206879772, "step": 3300 }, { "epoch": 10.981697171381033, "loss": 0.5191359519958496, "loss_ce": 9.299909288529307e-05, "loss_iou": 0.21875, "loss_num": 0.016357421875, "loss_xval": 0.51953125, "num_input_tokens_seen": 206879772, "step": 3300 }, { "epoch": 10.985024958402661, "grad_norm": 17.79423713684082, "learning_rate": 5e-06, "loss": 0.5578, "num_input_tokens_seen": 206942384, "step": 3301 }, { "epoch": 10.985024958402661, "loss": 0.5588536858558655, "loss_ce": 1.5814010112080723e-05, "loss_iou": 0.203125, "loss_num": 0.0306396484375, "loss_xval": 0.55859375, "num_input_tokens_seen": 206942384, "step": 3301 }, { "epoch": 10.988352745424294, "grad_norm": 17.613571166992188, "learning_rate": 5e-06, "loss": 0.563, "num_input_tokens_seen": 207004288, "step": 3302 }, { "epoch": 10.988352745424294, "loss": 0.7579607963562012, "loss_ce": 0.0005144801107235253, "loss_iou": 0.291015625, "loss_num": 0.035400390625, "loss_xval": 0.7578125, "num_input_tokens_seen": 207004288, "step": 3302 }, { "epoch": 10.991680532445923, "grad_norm": 28.980024337768555, "learning_rate": 5e-06, "loss": 0.575, "num_input_tokens_seen": 207066588, "step": 3303 }, { "epoch": 10.991680532445923, "loss": 0.5439819693565369, "loss_ce": 0.0004791583924088627, "loss_iou": 0.21875, "loss_num": 0.0213623046875, "loss_xval": 0.54296875, "num_input_tokens_seen": 207066588, "step": 3303 }, { "epoch": 10.995008319467555, "grad_norm": 32.20448303222656, "learning_rate": 5e-06, "loss": 0.5791, "num_input_tokens_seen": 207130044, "step": 3304 }, { "epoch": 10.995008319467555, "loss": 0.583682656288147, "loss_ce": 6.450540968216956e-05, "loss_iou": 0.2431640625, "loss_num": 0.0196533203125, "loss_xval": 0.58203125, "num_input_tokens_seen": 207130044, "step": 3304 }, { "epoch": 10.998336106489184, "grad_norm": 20.353527069091797, "learning_rate": 5e-06, "loss": 0.4329, "num_input_tokens_seen": 207191832, "step": 3305 }, { "epoch": 10.998336106489184, "loss": 0.44604626297950745, "loss_ce": 1.3430681065074168e-06, "loss_iou": 0.1640625, "loss_num": 0.0234375, "loss_xval": 0.4453125, "num_input_tokens_seen": 207191832, "step": 3305 }, { "epoch": 10.998336106489184, "loss": 0.6204924583435059, "loss_ce": 9.013368980959058e-06, "loss_iou": 0.255859375, "loss_num": 0.0218505859375, "loss_xval": 0.62109375, "num_input_tokens_seen": 207222236, "step": 3305 }, { "epoch": 11.001663893510816, "grad_norm": 12.57391357421875, "learning_rate": 5e-06, "loss": 0.6169, "num_input_tokens_seen": 207254248, "step": 3306 }, { "epoch": 11.001663893510816, "loss": 0.6133512854576111, "loss_ce": 9.029296961671207e-06, "loss_iou": 0.244140625, "loss_num": 0.0250244140625, "loss_xval": 0.61328125, "num_input_tokens_seen": 207254248, "step": 3306 }, { "epoch": 11.004991680532447, "grad_norm": 7.424095630645752, "learning_rate": 5e-06, "loss": 0.2925, "num_input_tokens_seen": 207316896, "step": 3307 }, { "epoch": 11.004991680532447, "loss": 0.31033891439437866, "loss_ce": 0.00037189171416684985, "loss_iou": 0.1064453125, "loss_num": 0.0194091796875, "loss_xval": 0.310546875, "num_input_tokens_seen": 207316896, "step": 3307 }, { "epoch": 11.008319467554077, "grad_norm": 13.86965274810791, "learning_rate": 5e-06, "loss": 0.3735, "num_input_tokens_seen": 207378480, "step": 3308 }, { "epoch": 11.008319467554077, "loss": 0.28919899463653564, "loss_ce": 1.442568282072898e-05, "loss_iou": 0.05810546875, "loss_num": 0.03466796875, "loss_xval": 0.2890625, "num_input_tokens_seen": 207378480, "step": 3308 }, { "epoch": 11.011647254575708, "grad_norm": 9.50208854675293, "learning_rate": 5e-06, "loss": 0.5147, "num_input_tokens_seen": 207442180, "step": 3309 }, { "epoch": 11.011647254575708, "loss": 0.5457166433334351, "loss_ce": 1.320983983532642e-06, "loss_iou": 0.1904296875, "loss_num": 0.03271484375, "loss_xval": 0.546875, "num_input_tokens_seen": 207442180, "step": 3309 }, { "epoch": 11.014975041597339, "grad_norm": 15.967816352844238, "learning_rate": 5e-06, "loss": 0.6663, "num_input_tokens_seen": 207507444, "step": 3310 }, { "epoch": 11.014975041597339, "loss": 0.6505467295646667, "loss_ce": 0.0002781808143481612, "loss_iou": 0.2578125, "loss_num": 0.0272216796875, "loss_xval": 0.6484375, "num_input_tokens_seen": 207507444, "step": 3310 }, { "epoch": 11.01830282861897, "grad_norm": 12.475397109985352, "learning_rate": 5e-06, "loss": 0.4435, "num_input_tokens_seen": 207571044, "step": 3311 }, { "epoch": 11.01830282861897, "loss": 0.44828125834465027, "loss_ce": 0.0001611513434909284, "loss_iou": 0.1376953125, "loss_num": 0.034912109375, "loss_xval": 0.447265625, "num_input_tokens_seen": 207571044, "step": 3311 }, { "epoch": 11.0216306156406, "grad_norm": 17.22987174987793, "learning_rate": 5e-06, "loss": 0.5442, "num_input_tokens_seen": 207635120, "step": 3312 }, { "epoch": 11.0216306156406, "loss": 0.5152022242546082, "loss_ce": 4.4743069338437635e-06, "loss_iou": 0.1875, "loss_num": 0.02783203125, "loss_xval": 0.515625, "num_input_tokens_seen": 207635120, "step": 3312 }, { "epoch": 11.02495840266223, "grad_norm": 8.820594787597656, "learning_rate": 5e-06, "loss": 0.329, "num_input_tokens_seen": 207697784, "step": 3313 }, { "epoch": 11.02495840266223, "loss": 0.38241589069366455, "loss_ce": 0.00033580412855371833, "loss_iou": 0.1298828125, "loss_num": 0.0244140625, "loss_xval": 0.3828125, "num_input_tokens_seen": 207697784, "step": 3313 }, { "epoch": 11.028286189683861, "grad_norm": 12.78525447845459, "learning_rate": 5e-06, "loss": 0.5048, "num_input_tokens_seen": 207760716, "step": 3314 }, { "epoch": 11.028286189683861, "loss": 0.4716256856918335, "loss_ce": 0.00037325185257941484, "loss_iou": 0.177734375, "loss_num": 0.0233154296875, "loss_xval": 0.470703125, "num_input_tokens_seen": 207760716, "step": 3314 }, { "epoch": 11.031613976705492, "grad_norm": 9.478928565979004, "learning_rate": 5e-06, "loss": 0.4867, "num_input_tokens_seen": 207823184, "step": 3315 }, { "epoch": 11.031613976705492, "loss": 0.3928849697113037, "loss_ce": 1.6631020116619766e-06, "loss_iou": 0.1201171875, "loss_num": 0.030517578125, "loss_xval": 0.392578125, "num_input_tokens_seen": 207823184, "step": 3315 }, { "epoch": 11.034941763727122, "grad_norm": 9.980057716369629, "learning_rate": 5e-06, "loss": 0.4318, "num_input_tokens_seen": 207886228, "step": 3316 }, { "epoch": 11.034941763727122, "loss": 0.28340965509414673, "loss_ce": 5.527857638298883e-07, "loss_iou": 0.1083984375, "loss_num": 0.013427734375, "loss_xval": 0.283203125, "num_input_tokens_seen": 207886228, "step": 3316 }, { "epoch": 11.038269550748753, "grad_norm": 17.494918823242188, "learning_rate": 5e-06, "loss": 0.3613, "num_input_tokens_seen": 207949884, "step": 3317 }, { "epoch": 11.038269550748753, "loss": 0.42725759744644165, "loss_ce": 0.00013358065916690975, "loss_iou": 0.1640625, "loss_num": 0.02001953125, "loss_xval": 0.427734375, "num_input_tokens_seen": 207949884, "step": 3317 }, { "epoch": 11.041597337770384, "grad_norm": 26.103118896484375, "learning_rate": 5e-06, "loss": 0.6932, "num_input_tokens_seen": 208013476, "step": 3318 }, { "epoch": 11.041597337770384, "loss": 0.5456943511962891, "loss_ce": 4.008870018878952e-05, "loss_iou": 0.2119140625, "loss_num": 0.0245361328125, "loss_xval": 0.546875, "num_input_tokens_seen": 208013476, "step": 3318 }, { "epoch": 11.044925124792014, "grad_norm": 34.500640869140625, "learning_rate": 5e-06, "loss": 0.5642, "num_input_tokens_seen": 208074488, "step": 3319 }, { "epoch": 11.044925124792014, "loss": 0.6722356081008911, "loss_ce": 5.547124965232797e-05, "loss_iou": 0.279296875, "loss_num": 0.02294921875, "loss_xval": 0.671875, "num_input_tokens_seen": 208074488, "step": 3319 }, { "epoch": 11.048252911813645, "grad_norm": 29.740283966064453, "learning_rate": 5e-06, "loss": 0.4442, "num_input_tokens_seen": 208135772, "step": 3320 }, { "epoch": 11.048252911813645, "loss": 0.5949976444244385, "loss_ce": 2.6937759685097262e-05, "loss_iou": 0.2392578125, "loss_num": 0.0233154296875, "loss_xval": 0.59375, "num_input_tokens_seen": 208135772, "step": 3320 }, { "epoch": 11.051580698835275, "grad_norm": 29.314697265625, "learning_rate": 5e-06, "loss": 0.4742, "num_input_tokens_seen": 208199100, "step": 3321 }, { "epoch": 11.051580698835275, "loss": 0.5375991463661194, "loss_ce": 1.4704094155604253e-06, "loss_iou": 0.2392578125, "loss_num": 0.01202392578125, "loss_xval": 0.5390625, "num_input_tokens_seen": 208199100, "step": 3321 }, { "epoch": 11.054908485856906, "grad_norm": 33.036720275878906, "learning_rate": 5e-06, "loss": 0.5121, "num_input_tokens_seen": 208263248, "step": 3322 }, { "epoch": 11.054908485856906, "loss": 0.5124043226242065, "loss_ce": 0.0001972641475731507, "loss_iou": 0.2294921875, "loss_num": 0.010498046875, "loss_xval": 0.51171875, "num_input_tokens_seen": 208263248, "step": 3322 }, { "epoch": 11.058236272878537, "grad_norm": 45.271697998046875, "learning_rate": 5e-06, "loss": 0.4742, "num_input_tokens_seen": 208326964, "step": 3323 }, { "epoch": 11.058236272878537, "loss": 0.551560640335083, "loss_ce": 4.696972609963268e-05, "loss_iou": 0.2099609375, "loss_num": 0.0262451171875, "loss_xval": 0.55078125, "num_input_tokens_seen": 208326964, "step": 3323 }, { "epoch": 11.061564059900167, "grad_norm": 32.799930572509766, "learning_rate": 5e-06, "loss": 0.5306, "num_input_tokens_seen": 208388776, "step": 3324 }, { "epoch": 11.061564059900167, "loss": 0.651814341545105, "loss_ce": 0.0001419505279045552, "loss_iou": 0.216796875, "loss_num": 0.04345703125, "loss_xval": 0.65234375, "num_input_tokens_seen": 208388776, "step": 3324 }, { "epoch": 11.064891846921798, "grad_norm": 18.02379608154297, "learning_rate": 5e-06, "loss": 0.4083, "num_input_tokens_seen": 208450080, "step": 3325 }, { "epoch": 11.064891846921798, "loss": 0.3852813243865967, "loss_ce": 0.0002715623704716563, "loss_iou": 0.126953125, "loss_num": 0.026123046875, "loss_xval": 0.384765625, "num_input_tokens_seen": 208450080, "step": 3325 }, { "epoch": 11.068219633943428, "grad_norm": 19.595748901367188, "learning_rate": 5e-06, "loss": 0.7423, "num_input_tokens_seen": 208512812, "step": 3326 }, { "epoch": 11.068219633943428, "loss": 1.0821542739868164, "loss_ce": 9.342853672933416e-07, "loss_iou": 0.44140625, "loss_num": 0.040283203125, "loss_xval": 1.0859375, "num_input_tokens_seen": 208512812, "step": 3326 }, { "epoch": 11.071547420965059, "grad_norm": 21.645172119140625, "learning_rate": 5e-06, "loss": 0.5221, "num_input_tokens_seen": 208575340, "step": 3327 }, { "epoch": 11.071547420965059, "loss": 0.6057476997375488, "loss_ce": 3.480708983261138e-05, "loss_iou": 0.236328125, "loss_num": 0.026611328125, "loss_xval": 0.60546875, "num_input_tokens_seen": 208575340, "step": 3327 }, { "epoch": 11.07487520798669, "grad_norm": 19.947349548339844, "learning_rate": 5e-06, "loss": 0.6134, "num_input_tokens_seen": 208637364, "step": 3328 }, { "epoch": 11.07487520798669, "loss": 0.6662614345550537, "loss_ce": 1.6799004924905603e-06, "loss_iou": 0.2060546875, "loss_num": 0.051025390625, "loss_xval": 0.66796875, "num_input_tokens_seen": 208637364, "step": 3328 }, { "epoch": 11.07820299500832, "grad_norm": 32.66655349731445, "learning_rate": 5e-06, "loss": 0.4883, "num_input_tokens_seen": 208701292, "step": 3329 }, { "epoch": 11.07820299500832, "loss": 0.6123229265213013, "loss_ce": 1.824634637159761e-05, "loss_iou": 0.2734375, "loss_num": 0.01263427734375, "loss_xval": 0.61328125, "num_input_tokens_seen": 208701292, "step": 3329 }, { "epoch": 11.081530782029951, "grad_norm": 36.79994583129883, "learning_rate": 5e-06, "loss": 0.5879, "num_input_tokens_seen": 208763380, "step": 3330 }, { "epoch": 11.081530782029951, "loss": 0.6383075714111328, "loss_ce": 1.9004201021743938e-06, "loss_iou": 0.2734375, "loss_num": 0.0185546875, "loss_xval": 0.63671875, "num_input_tokens_seen": 208763380, "step": 3330 }, { "epoch": 11.084858569051582, "grad_norm": 19.27757453918457, "learning_rate": 5e-06, "loss": 0.4146, "num_input_tokens_seen": 208826708, "step": 3331 }, { "epoch": 11.084858569051582, "loss": 0.44850045442581177, "loss_ce": 0.0009906796040013433, "loss_iou": 0.181640625, "loss_num": 0.016845703125, "loss_xval": 0.447265625, "num_input_tokens_seen": 208826708, "step": 3331 }, { "epoch": 11.088186356073212, "grad_norm": 9.157150268554688, "learning_rate": 5e-06, "loss": 0.5076, "num_input_tokens_seen": 208890456, "step": 3332 }, { "epoch": 11.088186356073212, "loss": 0.5103991031646729, "loss_ce": 2.3139413315220736e-05, "loss_iou": 0.2255859375, "loss_num": 0.01171875, "loss_xval": 0.51171875, "num_input_tokens_seen": 208890456, "step": 3332 }, { "epoch": 11.091514143094843, "grad_norm": 11.910614013671875, "learning_rate": 5e-06, "loss": 0.4372, "num_input_tokens_seen": 208952252, "step": 3333 }, { "epoch": 11.091514143094843, "loss": 0.2857721745967865, "loss_ce": 5.562942533288151e-06, "loss_iou": 0.10546875, "loss_num": 0.01495361328125, "loss_xval": 0.28515625, "num_input_tokens_seen": 208952252, "step": 3333 }, { "epoch": 11.094841930116473, "grad_norm": 15.679207801818848, "learning_rate": 5e-06, "loss": 0.63, "num_input_tokens_seen": 209016900, "step": 3334 }, { "epoch": 11.094841930116473, "loss": 0.8339930772781372, "loss_ce": 8.733704817132093e-06, "loss_iou": 0.326171875, "loss_num": 0.036376953125, "loss_xval": 0.8359375, "num_input_tokens_seen": 209016900, "step": 3334 }, { "epoch": 11.098169717138104, "grad_norm": 23.414913177490234, "learning_rate": 5e-06, "loss": 0.6942, "num_input_tokens_seen": 209079416, "step": 3335 }, { "epoch": 11.098169717138104, "loss": 0.5245986580848694, "loss_ce": 1.4993006516306195e-06, "loss_iou": 0.1728515625, "loss_num": 0.035888671875, "loss_xval": 0.5234375, "num_input_tokens_seen": 209079416, "step": 3335 }, { "epoch": 11.101497504159735, "grad_norm": 23.038705825805664, "learning_rate": 5e-06, "loss": 0.341, "num_input_tokens_seen": 209141128, "step": 3336 }, { "epoch": 11.101497504159735, "loss": 0.19441525638103485, "loss_ce": 1.828773383749649e-05, "loss_iou": 0.06640625, "loss_num": 0.01226806640625, "loss_xval": 0.1943359375, "num_input_tokens_seen": 209141128, "step": 3336 }, { "epoch": 11.104825291181365, "grad_norm": 12.398072242736816, "learning_rate": 5e-06, "loss": 0.3945, "num_input_tokens_seen": 209204696, "step": 3337 }, { "epoch": 11.104825291181365, "loss": 0.5769065618515015, "loss_ce": 2.2560707293450832e-06, "loss_iou": 0.2333984375, "loss_num": 0.0220947265625, "loss_xval": 0.578125, "num_input_tokens_seen": 209204696, "step": 3337 }, { "epoch": 11.108153078202996, "grad_norm": 9.335553169250488, "learning_rate": 5e-06, "loss": 0.6075, "num_input_tokens_seen": 209266180, "step": 3338 }, { "epoch": 11.108153078202996, "loss": 0.613347053527832, "loss_ce": 4.759307557833381e-06, "loss_iou": 0.2275390625, "loss_num": 0.031494140625, "loss_xval": 0.61328125, "num_input_tokens_seen": 209266180, "step": 3338 }, { "epoch": 11.111480865224626, "grad_norm": 11.141563415527344, "learning_rate": 5e-06, "loss": 0.561, "num_input_tokens_seen": 209330740, "step": 3339 }, { "epoch": 11.111480865224626, "loss": 0.5312932133674622, "loss_ce": 0.0002873589110095054, "loss_iou": 0.2177734375, "loss_num": 0.0189208984375, "loss_xval": 0.53125, "num_input_tokens_seen": 209330740, "step": 3339 }, { "epoch": 11.114808652246257, "grad_norm": 4.414292812347412, "learning_rate": 5e-06, "loss": 0.4893, "num_input_tokens_seen": 209392876, "step": 3340 }, { "epoch": 11.114808652246257, "loss": 0.5155174136161804, "loss_ce": 1.4460183592746034e-05, "loss_iou": 0.1962890625, "loss_num": 0.0247802734375, "loss_xval": 0.515625, "num_input_tokens_seen": 209392876, "step": 3340 }, { "epoch": 11.118136439267888, "grad_norm": 11.01452350616455, "learning_rate": 5e-06, "loss": 0.5223, "num_input_tokens_seen": 209456420, "step": 3341 }, { "epoch": 11.118136439267888, "loss": 0.5212640762329102, "loss_ce": 2.37896620092215e-05, "loss_iou": 0.2216796875, "loss_num": 0.01544189453125, "loss_xval": 0.51953125, "num_input_tokens_seen": 209456420, "step": 3341 }, { "epoch": 11.121464226289518, "grad_norm": 8.93923568725586, "learning_rate": 5e-06, "loss": 0.4029, "num_input_tokens_seen": 209521084, "step": 3342 }, { "epoch": 11.121464226289518, "loss": 0.39057299494743347, "loss_ce": 9.028810382005759e-06, "loss_iou": 0.15234375, "loss_num": 0.01708984375, "loss_xval": 0.390625, "num_input_tokens_seen": 209521084, "step": 3342 }, { "epoch": 11.124792013311149, "grad_norm": 12.138001441955566, "learning_rate": 5e-06, "loss": 0.3754, "num_input_tokens_seen": 209582444, "step": 3343 }, { "epoch": 11.124792013311149, "loss": 0.34802699089050293, "loss_ce": 4.554894985631108e-06, "loss_iou": 0.08544921875, "loss_num": 0.035400390625, "loss_xval": 0.34765625, "num_input_tokens_seen": 209582444, "step": 3343 }, { "epoch": 11.12811980033278, "grad_norm": 15.958658218383789, "learning_rate": 5e-06, "loss": 0.544, "num_input_tokens_seen": 209645224, "step": 3344 }, { "epoch": 11.12811980033278, "loss": 0.5966376066207886, "loss_ce": 0.0002020784158958122, "loss_iou": 0.271484375, "loss_num": 0.01068115234375, "loss_xval": 0.59765625, "num_input_tokens_seen": 209645224, "step": 3344 }, { "epoch": 11.13144758735441, "grad_norm": 20.32972526550293, "learning_rate": 5e-06, "loss": 0.5731, "num_input_tokens_seen": 209705312, "step": 3345 }, { "epoch": 11.13144758735441, "loss": 0.41970908641815186, "loss_ce": 8.574571666031261e-07, "loss_iou": 0.1279296875, "loss_num": 0.032958984375, "loss_xval": 0.419921875, "num_input_tokens_seen": 209705312, "step": 3345 }, { "epoch": 11.13477537437604, "grad_norm": 18.78236961364746, "learning_rate": 5e-06, "loss": 0.3359, "num_input_tokens_seen": 209766096, "step": 3346 }, { "epoch": 11.13477537437604, "loss": 0.4704223871231079, "loss_ce": 8.548003825126216e-05, "loss_iou": 0.1650390625, "loss_num": 0.028076171875, "loss_xval": 0.470703125, "num_input_tokens_seen": 209766096, "step": 3346 }, { "epoch": 11.138103161397671, "grad_norm": 12.494746208190918, "learning_rate": 5e-06, "loss": 0.5513, "num_input_tokens_seen": 209827916, "step": 3347 }, { "epoch": 11.138103161397671, "loss": 0.4626549184322357, "loss_ce": 0.0006187908002175391, "loss_iou": 0.173828125, "loss_num": 0.022705078125, "loss_xval": 0.462890625, "num_input_tokens_seen": 209827916, "step": 3347 }, { "epoch": 11.141430948419302, "grad_norm": 12.125555992126465, "learning_rate": 5e-06, "loss": 0.5227, "num_input_tokens_seen": 209890612, "step": 3348 }, { "epoch": 11.141430948419302, "loss": 0.5962186455726624, "loss_ce": 0.0005155237740837038, "loss_iou": 0.2421875, "loss_num": 0.0220947265625, "loss_xval": 0.59375, "num_input_tokens_seen": 209890612, "step": 3348 }, { "epoch": 11.144758735440933, "grad_norm": 6.307405948638916, "learning_rate": 5e-06, "loss": 0.4731, "num_input_tokens_seen": 209953188, "step": 3349 }, { "epoch": 11.144758735440933, "loss": 0.44371214509010315, "loss_ce": 0.00041380408219993114, "loss_iou": 0.166015625, "loss_num": 0.0220947265625, "loss_xval": 0.443359375, "num_input_tokens_seen": 209953188, "step": 3349 }, { "epoch": 11.148086522462563, "grad_norm": 8.800322532653809, "learning_rate": 5e-06, "loss": 0.4835, "num_input_tokens_seen": 210014996, "step": 3350 }, { "epoch": 11.148086522462563, "loss": 0.2558647394180298, "loss_ce": 5.364325261325575e-06, "loss_iou": 0.0576171875, "loss_num": 0.0281982421875, "loss_xval": 0.255859375, "num_input_tokens_seen": 210014996, "step": 3350 }, { "epoch": 11.151414309484194, "grad_norm": 21.071828842163086, "learning_rate": 5e-06, "loss": 0.5773, "num_input_tokens_seen": 210078216, "step": 3351 }, { "epoch": 11.151414309484194, "loss": 0.5444000959396362, "loss_ce": 8.858892397256568e-05, "loss_iou": 0.1943359375, "loss_num": 0.0311279296875, "loss_xval": 0.54296875, "num_input_tokens_seen": 210078216, "step": 3351 }, { "epoch": 11.154742096505824, "grad_norm": 25.049301147460938, "learning_rate": 5e-06, "loss": 0.5707, "num_input_tokens_seen": 210139044, "step": 3352 }, { "epoch": 11.154742096505824, "loss": 0.8177282810211182, "loss_ce": 0.00010133983596460894, "loss_iou": 0.330078125, "loss_num": 0.03173828125, "loss_xval": 0.81640625, "num_input_tokens_seen": 210139044, "step": 3352 }, { "epoch": 11.158069883527455, "grad_norm": 23.882822036743164, "learning_rate": 5e-06, "loss": 0.5712, "num_input_tokens_seen": 210201648, "step": 3353 }, { "epoch": 11.158069883527455, "loss": 0.5269463062286377, "loss_ce": 0.0002128811029251665, "loss_iou": 0.1943359375, "loss_num": 0.027587890625, "loss_xval": 0.52734375, "num_input_tokens_seen": 210201648, "step": 3353 }, { "epoch": 11.161397670549086, "grad_norm": 26.75176239013672, "learning_rate": 5e-06, "loss": 0.6382, "num_input_tokens_seen": 210264484, "step": 3354 }, { "epoch": 11.161397670549086, "loss": 0.6261005997657776, "loss_ce": 1.948735189216677e-06, "loss_iou": 0.279296875, "loss_num": 0.013916015625, "loss_xval": 0.625, "num_input_tokens_seen": 210264484, "step": 3354 }, { "epoch": 11.164725457570716, "grad_norm": 34.572776794433594, "learning_rate": 5e-06, "loss": 0.7796, "num_input_tokens_seen": 210327324, "step": 3355 }, { "epoch": 11.164725457570716, "loss": 0.702437698841095, "loss_ce": 0.00010614506754791364, "loss_iou": 0.251953125, "loss_num": 0.03955078125, "loss_xval": 0.703125, "num_input_tokens_seen": 210327324, "step": 3355 }, { "epoch": 11.168053244592347, "grad_norm": 46.818965911865234, "learning_rate": 5e-06, "loss": 0.5824, "num_input_tokens_seen": 210389732, "step": 3356 }, { "epoch": 11.168053244592347, "loss": 0.5811402797698975, "loss_ce": 0.00029925937997177243, "loss_iou": 0.240234375, "loss_num": 0.0201416015625, "loss_xval": 0.58203125, "num_input_tokens_seen": 210389732, "step": 3356 }, { "epoch": 11.171381031613977, "grad_norm": 29.99363136291504, "learning_rate": 5e-06, "loss": 0.5685, "num_input_tokens_seen": 210452728, "step": 3357 }, { "epoch": 11.171381031613977, "loss": 0.759689450263977, "loss_ce": 0.00016792438691481948, "loss_iou": 0.2890625, "loss_num": 0.036376953125, "loss_xval": 0.7578125, "num_input_tokens_seen": 210452728, "step": 3357 }, { "epoch": 11.174708818635608, "grad_norm": 17.245424270629883, "learning_rate": 5e-06, "loss": 0.6224, "num_input_tokens_seen": 210515900, "step": 3358 }, { "epoch": 11.174708818635608, "loss": 0.5367443561553955, "loss_ce": 1.2347879874141654e-06, "loss_iou": 0.2236328125, "loss_num": 0.0179443359375, "loss_xval": 0.53515625, "num_input_tokens_seen": 210515900, "step": 3358 }, { "epoch": 11.178036605657239, "grad_norm": 9.638038635253906, "learning_rate": 5e-06, "loss": 0.3662, "num_input_tokens_seen": 210579280, "step": 3359 }, { "epoch": 11.178036605657239, "loss": 0.24621699750423431, "loss_ce": 1.161768750534975e-06, "loss_iou": 0.0927734375, "loss_num": 0.0120849609375, "loss_xval": 0.24609375, "num_input_tokens_seen": 210579280, "step": 3359 }, { "epoch": 11.18136439267887, "grad_norm": 12.079889297485352, "learning_rate": 5e-06, "loss": 0.4546, "num_input_tokens_seen": 210642884, "step": 3360 }, { "epoch": 11.18136439267887, "loss": 0.5816894769668579, "loss_ce": 2.4466400645906106e-05, "loss_iou": 0.1962890625, "loss_num": 0.037841796875, "loss_xval": 0.58203125, "num_input_tokens_seen": 210642884, "step": 3360 }, { "epoch": 11.1846921797005, "grad_norm": 5.400938510894775, "learning_rate": 5e-06, "loss": 0.3467, "num_input_tokens_seen": 210704524, "step": 3361 }, { "epoch": 11.1846921797005, "loss": 0.23669511079788208, "loss_ce": 7.688555569984601e-07, "loss_iou": 0.06298828125, "loss_num": 0.0220947265625, "loss_xval": 0.236328125, "num_input_tokens_seen": 210704524, "step": 3361 }, { "epoch": 11.18801996672213, "grad_norm": 11.549376487731934, "learning_rate": 5e-06, "loss": 0.36, "num_input_tokens_seen": 210766864, "step": 3362 }, { "epoch": 11.18801996672213, "loss": 0.3484129309654236, "loss_ce": 0.00023789459373801947, "loss_iou": 0.12890625, "loss_num": 0.01806640625, "loss_xval": 0.34765625, "num_input_tokens_seen": 210766864, "step": 3362 }, { "epoch": 11.191347753743761, "grad_norm": 19.65756607055664, "learning_rate": 5e-06, "loss": 0.4177, "num_input_tokens_seen": 210829420, "step": 3363 }, { "epoch": 11.191347753743761, "loss": 0.3625582456588745, "loss_ce": 0.0017794453306123614, "loss_iou": 0.12109375, "loss_num": 0.023681640625, "loss_xval": 0.361328125, "num_input_tokens_seen": 210829420, "step": 3363 }, { "epoch": 11.194675540765392, "grad_norm": 29.322952270507812, "learning_rate": 5e-06, "loss": 0.4357, "num_input_tokens_seen": 210892080, "step": 3364 }, { "epoch": 11.194675540765392, "loss": 0.3580339252948761, "loss_ce": 1.7024769931595074e-06, "loss_iou": 0.123046875, "loss_num": 0.0224609375, "loss_xval": 0.357421875, "num_input_tokens_seen": 210892080, "step": 3364 }, { "epoch": 11.198003327787022, "grad_norm": 9.68643569946289, "learning_rate": 5e-06, "loss": 0.4015, "num_input_tokens_seen": 210954448, "step": 3365 }, { "epoch": 11.198003327787022, "loss": 0.44549286365509033, "loss_ce": 5.831208181916736e-05, "loss_iou": 0.162109375, "loss_num": 0.0244140625, "loss_xval": 0.4453125, "num_input_tokens_seen": 210954448, "step": 3365 }, { "epoch": 11.201331114808653, "grad_norm": 14.82058048248291, "learning_rate": 5e-06, "loss": 0.5707, "num_input_tokens_seen": 211018180, "step": 3366 }, { "epoch": 11.201331114808653, "loss": 0.6648024320602417, "loss_ce": 0.000312704942189157, "loss_iou": 0.259765625, "loss_num": 0.02880859375, "loss_xval": 0.6640625, "num_input_tokens_seen": 211018180, "step": 3366 }, { "epoch": 11.204658901830284, "grad_norm": 9.430171966552734, "learning_rate": 5e-06, "loss": 0.3779, "num_input_tokens_seen": 211078632, "step": 3367 }, { "epoch": 11.204658901830284, "loss": 0.19323796033859253, "loss_ce": 6.527611162709945e-07, "loss_iou": 0.056884765625, "loss_num": 0.0159912109375, "loss_xval": 0.193359375, "num_input_tokens_seen": 211078632, "step": 3367 }, { "epoch": 11.207986688851914, "grad_norm": 17.07076072692871, "learning_rate": 5e-06, "loss": 0.4566, "num_input_tokens_seen": 211140628, "step": 3368 }, { "epoch": 11.207986688851914, "loss": 0.5188108682632446, "loss_ce": 1.2038321074214764e-05, "loss_iou": 0.1806640625, "loss_num": 0.03173828125, "loss_xval": 0.51953125, "num_input_tokens_seen": 211140628, "step": 3368 }, { "epoch": 11.211314475873545, "grad_norm": 16.402246475219727, "learning_rate": 5e-06, "loss": 0.7848, "num_input_tokens_seen": 211202348, "step": 3369 }, { "epoch": 11.211314475873545, "loss": 0.8639352917671204, "loss_ce": 0.0001047362748067826, "loss_iou": 0.310546875, "loss_num": 0.04833984375, "loss_xval": 0.86328125, "num_input_tokens_seen": 211202348, "step": 3369 }, { "epoch": 11.214642262895175, "grad_norm": 7.523104667663574, "learning_rate": 5e-06, "loss": 0.423, "num_input_tokens_seen": 211264808, "step": 3370 }, { "epoch": 11.214642262895175, "loss": 0.5613157749176025, "loss_ce": 0.00015856519166845828, "loss_iou": 0.2177734375, "loss_num": 0.0250244140625, "loss_xval": 0.5625, "num_input_tokens_seen": 211264808, "step": 3370 }, { "epoch": 11.217970049916806, "grad_norm": 6.877736568450928, "learning_rate": 5e-06, "loss": 0.3878, "num_input_tokens_seen": 211327184, "step": 3371 }, { "epoch": 11.217970049916806, "loss": 0.4388206899166107, "loss_ce": 0.00031360649154521525, "loss_iou": 0.16796875, "loss_num": 0.0205078125, "loss_xval": 0.439453125, "num_input_tokens_seen": 211327184, "step": 3371 }, { "epoch": 11.221297836938437, "grad_norm": 9.58187484741211, "learning_rate": 5e-06, "loss": 0.5393, "num_input_tokens_seen": 211390244, "step": 3372 }, { "epoch": 11.221297836938437, "loss": 0.532960832118988, "loss_ce": 1.8207501852884889e-06, "loss_iou": 0.2001953125, "loss_num": 0.0264892578125, "loss_xval": 0.53125, "num_input_tokens_seen": 211390244, "step": 3372 }, { "epoch": 11.224625623960067, "grad_norm": 10.796221733093262, "learning_rate": 5e-06, "loss": 0.4618, "num_input_tokens_seen": 211453612, "step": 3373 }, { "epoch": 11.224625623960067, "loss": 0.4304216504096985, "loss_ce": 1.7106275436162832e-06, "loss_iou": 0.16015625, "loss_num": 0.0220947265625, "loss_xval": 0.4296875, "num_input_tokens_seen": 211453612, "step": 3373 }, { "epoch": 11.227953410981698, "grad_norm": 14.314764022827148, "learning_rate": 5e-06, "loss": 0.479, "num_input_tokens_seen": 211516472, "step": 3374 }, { "epoch": 11.227953410981698, "loss": 0.5441311597824097, "loss_ce": 2.7590008357947227e-06, "loss_iou": 0.1953125, "loss_num": 0.0303955078125, "loss_xval": 0.54296875, "num_input_tokens_seen": 211516472, "step": 3374 }, { "epoch": 11.231281198003328, "grad_norm": 18.28016471862793, "learning_rate": 5e-06, "loss": 0.5823, "num_input_tokens_seen": 211579792, "step": 3375 }, { "epoch": 11.231281198003328, "loss": 0.6611372232437134, "loss_ce": 0.000980975804850459, "loss_iou": 0.2421875, "loss_num": 0.035400390625, "loss_xval": 0.66015625, "num_input_tokens_seen": 211579792, "step": 3375 }, { "epoch": 11.234608985024959, "grad_norm": 6.622052192687988, "learning_rate": 5e-06, "loss": 0.487, "num_input_tokens_seen": 211642028, "step": 3376 }, { "epoch": 11.234608985024959, "loss": 0.641849160194397, "loss_ce": 3.524876774463337e-06, "loss_iou": 0.220703125, "loss_num": 0.040283203125, "loss_xval": 0.640625, "num_input_tokens_seen": 211642028, "step": 3376 }, { "epoch": 11.23793677204659, "grad_norm": 14.657718658447266, "learning_rate": 5e-06, "loss": 0.3834, "num_input_tokens_seen": 211704948, "step": 3377 }, { "epoch": 11.23793677204659, "loss": 0.4299467206001282, "loss_ce": 1.505272575741401e-05, "loss_iou": 0.1650390625, "loss_num": 0.0201416015625, "loss_xval": 0.4296875, "num_input_tokens_seen": 211704948, "step": 3377 }, { "epoch": 11.24126455906822, "grad_norm": 24.41314125061035, "learning_rate": 5e-06, "loss": 0.4885, "num_input_tokens_seen": 211767048, "step": 3378 }, { "epoch": 11.24126455906822, "loss": 0.27829137444496155, "loss_ce": 1.5755671256556525e-06, "loss_iou": 0.09619140625, "loss_num": 0.0172119140625, "loss_xval": 0.27734375, "num_input_tokens_seen": 211767048, "step": 3378 }, { "epoch": 11.244592346089851, "grad_norm": 7.939348220825195, "learning_rate": 5e-06, "loss": 0.3702, "num_input_tokens_seen": 211828412, "step": 3379 }, { "epoch": 11.244592346089851, "loss": 0.40565502643585205, "loss_ce": 3.0639886972494423e-05, "loss_iou": 0.1201171875, "loss_num": 0.033203125, "loss_xval": 0.40625, "num_input_tokens_seen": 211828412, "step": 3379 }, { "epoch": 11.247920133111482, "grad_norm": 12.43234920501709, "learning_rate": 5e-06, "loss": 0.4837, "num_input_tokens_seen": 211892104, "step": 3380 }, { "epoch": 11.247920133111482, "loss": 0.5193054676055908, "loss_ce": 1.830406836234033e-05, "loss_iou": 0.1708984375, "loss_num": 0.03564453125, "loss_xval": 0.51953125, "num_input_tokens_seen": 211892104, "step": 3380 }, { "epoch": 11.251247920133112, "grad_norm": 14.769754409790039, "learning_rate": 5e-06, "loss": 0.4395, "num_input_tokens_seen": 211954660, "step": 3381 }, { "epoch": 11.251247920133112, "loss": 0.313024640083313, "loss_ce": 0.00047885539242997766, "loss_iou": 0.0771484375, "loss_num": 0.03173828125, "loss_xval": 0.3125, "num_input_tokens_seen": 211954660, "step": 3381 }, { "epoch": 11.254575707154743, "grad_norm": 13.457191467285156, "learning_rate": 5e-06, "loss": 0.3198, "num_input_tokens_seen": 212017444, "step": 3382 }, { "epoch": 11.254575707154743, "loss": 0.28302115201950073, "loss_ce": 1.1107069894933375e-06, "loss_iou": 0.09814453125, "loss_num": 0.017333984375, "loss_xval": 0.283203125, "num_input_tokens_seen": 212017444, "step": 3382 }, { "epoch": 11.257903494176373, "grad_norm": 16.170909881591797, "learning_rate": 5e-06, "loss": 0.5886, "num_input_tokens_seen": 212081064, "step": 3383 }, { "epoch": 11.257903494176373, "loss": 0.5495759844779968, "loss_ce": 1.5413490473292768e-05, "loss_iou": 0.240234375, "loss_num": 0.013671875, "loss_xval": 0.55078125, "num_input_tokens_seen": 212081064, "step": 3383 }, { "epoch": 11.261231281198004, "grad_norm": 23.0472412109375, "learning_rate": 5e-06, "loss": 0.5113, "num_input_tokens_seen": 212143824, "step": 3384 }, { "epoch": 11.261231281198004, "loss": 0.3989904522895813, "loss_ce": 3.6127908060734626e-06, "loss_iou": 0.1220703125, "loss_num": 0.031005859375, "loss_xval": 0.3984375, "num_input_tokens_seen": 212143824, "step": 3384 }, { "epoch": 11.264559068219635, "grad_norm": 19.477458953857422, "learning_rate": 5e-06, "loss": 0.4648, "num_input_tokens_seen": 212207224, "step": 3385 }, { "epoch": 11.264559068219635, "loss": 0.4903796911239624, "loss_ce": 2.32157799473498e-05, "loss_iou": 0.1923828125, "loss_num": 0.021484375, "loss_xval": 0.490234375, "num_input_tokens_seen": 212207224, "step": 3385 }, { "epoch": 11.267886855241265, "grad_norm": 12.424951553344727, "learning_rate": 5e-06, "loss": 0.4447, "num_input_tokens_seen": 212270200, "step": 3386 }, { "epoch": 11.267886855241265, "loss": 0.3161046504974365, "loss_ce": 3.590731921576662e-06, "loss_iou": 0.119140625, "loss_num": 0.015625, "loss_xval": 0.31640625, "num_input_tokens_seen": 212270200, "step": 3386 }, { "epoch": 11.271214642262896, "grad_norm": 13.345970153808594, "learning_rate": 5e-06, "loss": 0.3489, "num_input_tokens_seen": 212333256, "step": 3387 }, { "epoch": 11.271214642262896, "loss": 0.3471885025501251, "loss_ce": 0.0005088262842036784, "loss_iou": 0.1435546875, "loss_num": 0.011962890625, "loss_xval": 0.34765625, "num_input_tokens_seen": 212333256, "step": 3387 }, { "epoch": 11.274542429284526, "grad_norm": 17.8961124420166, "learning_rate": 5e-06, "loss": 0.5206, "num_input_tokens_seen": 212395272, "step": 3388 }, { "epoch": 11.274542429284526, "loss": 0.5088512301445007, "loss_ce": 1.1313163668091875e-06, "loss_iou": 0.19921875, "loss_num": 0.02197265625, "loss_xval": 0.5078125, "num_input_tokens_seen": 212395272, "step": 3388 }, { "epoch": 11.277870216306157, "grad_norm": 25.25090217590332, "learning_rate": 5e-06, "loss": 0.4829, "num_input_tokens_seen": 212456900, "step": 3389 }, { "epoch": 11.277870216306157, "loss": 0.6870266199111938, "loss_ce": 1.4901274880685378e-05, "loss_iou": 0.275390625, "loss_num": 0.027099609375, "loss_xval": 0.6875, "num_input_tokens_seen": 212456900, "step": 3389 }, { "epoch": 11.281198003327788, "grad_norm": 24.631311416625977, "learning_rate": 5e-06, "loss": 0.5684, "num_input_tokens_seen": 212520184, "step": 3390 }, { "epoch": 11.281198003327788, "loss": 0.48858755826950073, "loss_ce": 1.1510389867908088e-06, "loss_iou": 0.1806640625, "loss_num": 0.0255126953125, "loss_xval": 0.48828125, "num_input_tokens_seen": 212520184, "step": 3390 }, { "epoch": 11.284525790349418, "grad_norm": 12.505722045898438, "learning_rate": 5e-06, "loss": 0.4485, "num_input_tokens_seen": 212582248, "step": 3391 }, { "epoch": 11.284525790349418, "loss": 0.5717790126800537, "loss_ce": 1.643845848775527e-06, "loss_iou": 0.2255859375, "loss_num": 0.02392578125, "loss_xval": 0.5703125, "num_input_tokens_seen": 212582248, "step": 3391 }, { "epoch": 11.287853577371049, "grad_norm": 23.03280258178711, "learning_rate": 5e-06, "loss": 0.5268, "num_input_tokens_seen": 212645852, "step": 3392 }, { "epoch": 11.287853577371049, "loss": 0.6284734010696411, "loss_ce": 0.0004216255038045347, "loss_iou": 0.2421875, "loss_num": 0.02880859375, "loss_xval": 0.62890625, "num_input_tokens_seen": 212645852, "step": 3392 }, { "epoch": 11.29118136439268, "grad_norm": 12.421693801879883, "learning_rate": 5e-06, "loss": 0.9012, "num_input_tokens_seen": 212709292, "step": 3393 }, { "epoch": 11.29118136439268, "loss": 1.1222984790802002, "loss_ce": 0.00022819254081696272, "loss_iou": 0.404296875, "loss_num": 0.06298828125, "loss_xval": 1.125, "num_input_tokens_seen": 212709292, "step": 3393 }, { "epoch": 11.29450915141431, "grad_norm": 26.848878860473633, "learning_rate": 5e-06, "loss": 0.4304, "num_input_tokens_seen": 212769252, "step": 3394 }, { "epoch": 11.29450915141431, "loss": 0.3660937249660492, "loss_ce": 4.884193003817927e-06, "loss_iou": 0.107421875, "loss_num": 0.0301513671875, "loss_xval": 0.365234375, "num_input_tokens_seen": 212769252, "step": 3394 }, { "epoch": 11.29783693843594, "grad_norm": 35.5877799987793, "learning_rate": 5e-06, "loss": 0.5554, "num_input_tokens_seen": 212832652, "step": 3395 }, { "epoch": 11.29783693843594, "loss": 0.29928892850875854, "loss_ce": 3.052543661397067e-06, "loss_iou": 0.10791015625, "loss_num": 0.0166015625, "loss_xval": 0.298828125, "num_input_tokens_seen": 212832652, "step": 3395 }, { "epoch": 11.301164725457571, "grad_norm": 25.83563232421875, "learning_rate": 5e-06, "loss": 0.5483, "num_input_tokens_seen": 212895916, "step": 3396 }, { "epoch": 11.301164725457571, "loss": 0.5456006526947021, "loss_ce": 6.84588958392851e-05, "loss_iou": 0.1982421875, "loss_num": 0.02978515625, "loss_xval": 0.546875, "num_input_tokens_seen": 212895916, "step": 3396 }, { "epoch": 11.304492512479202, "grad_norm": 16.490707397460938, "learning_rate": 5e-06, "loss": 0.5392, "num_input_tokens_seen": 212959416, "step": 3397 }, { "epoch": 11.304492512479202, "loss": 0.5911589860916138, "loss_ce": 2.9511679713323247e-06, "loss_iou": 0.236328125, "loss_num": 0.0238037109375, "loss_xval": 0.58984375, "num_input_tokens_seen": 212959416, "step": 3397 }, { "epoch": 11.307820299500833, "grad_norm": 12.089359283447266, "learning_rate": 5e-06, "loss": 0.6823, "num_input_tokens_seen": 213022172, "step": 3398 }, { "epoch": 11.307820299500833, "loss": 0.5559098720550537, "loss_ce": 1.6623985175101552e-06, "loss_iou": 0.2021484375, "loss_num": 0.0302734375, "loss_xval": 0.5546875, "num_input_tokens_seen": 213022172, "step": 3398 }, { "epoch": 11.311148086522463, "grad_norm": 22.218358993530273, "learning_rate": 5e-06, "loss": 0.4922, "num_input_tokens_seen": 213085272, "step": 3399 }, { "epoch": 11.311148086522463, "loss": 0.5730011463165283, "loss_ce": 3.141061142741819e-06, "loss_iou": 0.2119140625, "loss_num": 0.02978515625, "loss_xval": 0.57421875, "num_input_tokens_seen": 213085272, "step": 3399 }, { "epoch": 11.314475873544094, "grad_norm": 20.80188751220703, "learning_rate": 5e-06, "loss": 0.6882, "num_input_tokens_seen": 213149072, "step": 3400 }, { "epoch": 11.314475873544094, "loss": 0.6808702945709229, "loss_ce": 8.414199692197144e-05, "loss_iou": 0.2890625, "loss_num": 0.0203857421875, "loss_xval": 0.6796875, "num_input_tokens_seen": 213149072, "step": 3400 }, { "epoch": 11.317803660565724, "grad_norm": 6.295179843902588, "learning_rate": 5e-06, "loss": 0.6447, "num_input_tokens_seen": 213211492, "step": 3401 }, { "epoch": 11.317803660565724, "loss": 0.913345992565155, "loss_ce": 1.5890533177298494e-05, "loss_iou": 0.3203125, "loss_num": 0.05419921875, "loss_xval": 0.9140625, "num_input_tokens_seen": 213211492, "step": 3401 }, { "epoch": 11.321131447587355, "grad_norm": 14.70199966430664, "learning_rate": 5e-06, "loss": 0.5698, "num_input_tokens_seen": 213276024, "step": 3402 }, { "epoch": 11.321131447587355, "loss": 0.41964805126190186, "loss_ce": 8.420539643338998e-07, "loss_iou": 0.158203125, "loss_num": 0.0205078125, "loss_xval": 0.419921875, "num_input_tokens_seen": 213276024, "step": 3402 }, { "epoch": 11.324459234608986, "grad_norm": 18.00429916381836, "learning_rate": 5e-06, "loss": 0.5221, "num_input_tokens_seen": 213338544, "step": 3403 }, { "epoch": 11.324459234608986, "loss": 0.345974326133728, "loss_ce": 0.00016438915918115526, "loss_iou": 0.13671875, "loss_num": 0.01446533203125, "loss_xval": 0.345703125, "num_input_tokens_seen": 213338544, "step": 3403 }, { "epoch": 11.327787021630616, "grad_norm": 12.928200721740723, "learning_rate": 5e-06, "loss": 0.5287, "num_input_tokens_seen": 213399812, "step": 3404 }, { "epoch": 11.327787021630616, "loss": 0.5290573835372925, "loss_ce": 0.0004929386195726693, "loss_iou": 0.220703125, "loss_num": 0.017333984375, "loss_xval": 0.52734375, "num_input_tokens_seen": 213399812, "step": 3404 }, { "epoch": 11.331114808652247, "grad_norm": 12.411236763000488, "learning_rate": 5e-06, "loss": 0.4884, "num_input_tokens_seen": 213463168, "step": 3405 }, { "epoch": 11.331114808652247, "loss": 0.5254011750221252, "loss_ce": 1.055449502018746e-05, "loss_iou": 0.205078125, "loss_num": 0.0230712890625, "loss_xval": 0.5234375, "num_input_tokens_seen": 213463168, "step": 3405 }, { "epoch": 11.334442595673877, "grad_norm": 22.23971176147461, "learning_rate": 5e-06, "loss": 0.6383, "num_input_tokens_seen": 213526852, "step": 3406 }, { "epoch": 11.334442595673877, "loss": 0.838417649269104, "loss_ce": 3.871129229082726e-05, "loss_iou": 0.333984375, "loss_num": 0.0341796875, "loss_xval": 0.83984375, "num_input_tokens_seen": 213526852, "step": 3406 }, { "epoch": 11.337770382695508, "grad_norm": 9.166321754455566, "learning_rate": 5e-06, "loss": 0.4428, "num_input_tokens_seen": 213589732, "step": 3407 }, { "epoch": 11.337770382695508, "loss": 0.4437301754951477, "loss_ce": 4.576714673021343e-06, "loss_iou": 0.1552734375, "loss_num": 0.02685546875, "loss_xval": 0.443359375, "num_input_tokens_seen": 213589732, "step": 3407 }, { "epoch": 11.341098169717139, "grad_norm": 12.367635726928711, "learning_rate": 5e-06, "loss": 0.5189, "num_input_tokens_seen": 213653416, "step": 3408 }, { "epoch": 11.341098169717139, "loss": 0.33713793754577637, "loss_ce": 0.0005900840042158961, "loss_iou": 0.1328125, "loss_num": 0.01416015625, "loss_xval": 0.3359375, "num_input_tokens_seen": 213653416, "step": 3408 }, { "epoch": 11.34442595673877, "grad_norm": 19.60237693786621, "learning_rate": 5e-06, "loss": 0.6866, "num_input_tokens_seen": 213716788, "step": 3409 }, { "epoch": 11.34442595673877, "loss": 0.6438875794410706, "loss_ce": 5.824194522574544e-05, "loss_iou": 0.2353515625, "loss_num": 0.034423828125, "loss_xval": 0.64453125, "num_input_tokens_seen": 213716788, "step": 3409 }, { "epoch": 11.3477537437604, "grad_norm": 19.858064651489258, "learning_rate": 5e-06, "loss": 0.4334, "num_input_tokens_seen": 213779724, "step": 3410 }, { "epoch": 11.3477537437604, "loss": 0.3901156485080719, "loss_ce": 0.0005282529746182263, "loss_iou": 0.1376953125, "loss_num": 0.0230712890625, "loss_xval": 0.388671875, "num_input_tokens_seen": 213779724, "step": 3410 }, { "epoch": 11.35108153078203, "grad_norm": 7.780879497528076, "learning_rate": 5e-06, "loss": 0.4453, "num_input_tokens_seen": 213840904, "step": 3411 }, { "epoch": 11.35108153078203, "loss": 0.5611635446548462, "loss_ce": 6.293532806012081e-06, "loss_iou": 0.19921875, "loss_num": 0.03271484375, "loss_xval": 0.5625, "num_input_tokens_seen": 213840904, "step": 3411 }, { "epoch": 11.354409317803661, "grad_norm": 15.984895706176758, "learning_rate": 5e-06, "loss": 0.452, "num_input_tokens_seen": 213904140, "step": 3412 }, { "epoch": 11.354409317803661, "loss": 0.4715519845485687, "loss_ce": 0.00036057931720279157, "loss_iou": 0.1943359375, "loss_num": 0.0162353515625, "loss_xval": 0.470703125, "num_input_tokens_seen": 213904140, "step": 3412 }, { "epoch": 11.357737104825292, "grad_norm": 9.239534378051758, "learning_rate": 5e-06, "loss": 0.36, "num_input_tokens_seen": 213967556, "step": 3413 }, { "epoch": 11.357737104825292, "loss": 0.5131917595863342, "loss_ce": 8.15928797237575e-06, "loss_iou": 0.1748046875, "loss_num": 0.032470703125, "loss_xval": 0.51171875, "num_input_tokens_seen": 213967556, "step": 3413 }, { "epoch": 11.361064891846922, "grad_norm": 10.108990669250488, "learning_rate": 5e-06, "loss": 0.6256, "num_input_tokens_seen": 214030108, "step": 3414 }, { "epoch": 11.361064891846922, "loss": 0.691166877746582, "loss_ce": 4.796277607965749e-06, "loss_iou": 0.28515625, "loss_num": 0.0245361328125, "loss_xval": 0.69140625, "num_input_tokens_seen": 214030108, "step": 3414 }, { "epoch": 11.364392678868553, "grad_norm": 13.287681579589844, "learning_rate": 5e-06, "loss": 0.502, "num_input_tokens_seen": 214094132, "step": 3415 }, { "epoch": 11.364392678868553, "loss": 0.5253951549530029, "loss_ce": 4.576358151098248e-06, "loss_iou": 0.2177734375, "loss_num": 0.01806640625, "loss_xval": 0.5234375, "num_input_tokens_seen": 214094132, "step": 3415 }, { "epoch": 11.367720465890184, "grad_norm": 29.500104904174805, "learning_rate": 5e-06, "loss": 0.5285, "num_input_tokens_seen": 214156772, "step": 3416 }, { "epoch": 11.367720465890184, "loss": 0.5335703492164612, "loss_ce": 9.835530363488942e-07, "loss_iou": 0.2265625, "loss_num": 0.01611328125, "loss_xval": 0.53515625, "num_input_tokens_seen": 214156772, "step": 3416 }, { "epoch": 11.371048252911814, "grad_norm": 41.063079833984375, "learning_rate": 5e-06, "loss": 0.5374, "num_input_tokens_seen": 214220380, "step": 3417 }, { "epoch": 11.371048252911814, "loss": 0.6097512245178223, "loss_ce": 0.00013208728341851383, "loss_iou": 0.23828125, "loss_num": 0.0262451171875, "loss_xval": 0.609375, "num_input_tokens_seen": 214220380, "step": 3417 }, { "epoch": 11.374376039933445, "grad_norm": 28.95822525024414, "learning_rate": 5e-06, "loss": 0.6624, "num_input_tokens_seen": 214282300, "step": 3418 }, { "epoch": 11.374376039933445, "loss": 0.6441663503646851, "loss_ce": 1.2746381798933726e-06, "loss_iou": 0.26171875, "loss_num": 0.02392578125, "loss_xval": 0.64453125, "num_input_tokens_seen": 214282300, "step": 3418 }, { "epoch": 11.377703826955075, "grad_norm": 38.047080993652344, "learning_rate": 5e-06, "loss": 0.4236, "num_input_tokens_seen": 214344956, "step": 3419 }, { "epoch": 11.377703826955075, "loss": 0.4488297700881958, "loss_ce": 9.928193321684375e-05, "loss_iou": 0.1884765625, "loss_num": 0.014404296875, "loss_xval": 0.44921875, "num_input_tokens_seen": 214344956, "step": 3419 }, { "epoch": 11.381031613976706, "grad_norm": 29.38704490661621, "learning_rate": 5e-06, "loss": 0.5082, "num_input_tokens_seen": 214408068, "step": 3420 }, { "epoch": 11.381031613976706, "loss": 0.3907485604286194, "loss_ce": 1.4747547538718209e-06, "loss_iou": 0.1669921875, "loss_num": 0.01141357421875, "loss_xval": 0.390625, "num_input_tokens_seen": 214408068, "step": 3420 }, { "epoch": 11.384359400998337, "grad_norm": 27.72543716430664, "learning_rate": 5e-06, "loss": 0.4101, "num_input_tokens_seen": 214470652, "step": 3421 }, { "epoch": 11.384359400998337, "loss": 0.3531828224658966, "loss_ce": 2.8934150577697437e-06, "loss_iou": 0.1337890625, "loss_num": 0.0169677734375, "loss_xval": 0.353515625, "num_input_tokens_seen": 214470652, "step": 3421 }, { "epoch": 11.387687188019967, "grad_norm": 15.533161163330078, "learning_rate": 5e-06, "loss": 0.4468, "num_input_tokens_seen": 214533804, "step": 3422 }, { "epoch": 11.387687188019967, "loss": 0.3163697123527527, "loss_ce": 2.448669692967087e-05, "loss_iou": 0.12109375, "loss_num": 0.0147705078125, "loss_xval": 0.31640625, "num_input_tokens_seen": 214533804, "step": 3422 }, { "epoch": 11.391014975041598, "grad_norm": 11.47153091430664, "learning_rate": 5e-06, "loss": 0.3358, "num_input_tokens_seen": 214597328, "step": 3423 }, { "epoch": 11.391014975041598, "loss": 0.26200538873672485, "loss_ce": 0.00016456423327326775, "loss_iou": 0.08544921875, "loss_num": 0.0181884765625, "loss_xval": 0.26171875, "num_input_tokens_seen": 214597328, "step": 3423 }, { "epoch": 11.394342762063228, "grad_norm": 13.557439804077148, "learning_rate": 5e-06, "loss": 0.6224, "num_input_tokens_seen": 214659956, "step": 3424 }, { "epoch": 11.394342762063228, "loss": 0.6181806325912476, "loss_ce": 0.00026066400459967554, "loss_iou": 0.1845703125, "loss_num": 0.0498046875, "loss_xval": 0.6171875, "num_input_tokens_seen": 214659956, "step": 3424 }, { "epoch": 11.397670549084859, "grad_norm": 8.34886360168457, "learning_rate": 5e-06, "loss": 0.5523, "num_input_tokens_seen": 214723116, "step": 3425 }, { "epoch": 11.397670549084859, "loss": 0.7662197947502136, "loss_ce": 0.0007168528391048312, "loss_iou": 0.283203125, "loss_num": 0.03955078125, "loss_xval": 0.765625, "num_input_tokens_seen": 214723116, "step": 3425 }, { "epoch": 11.40099833610649, "grad_norm": 19.906719207763672, "learning_rate": 5e-06, "loss": 0.4835, "num_input_tokens_seen": 214785716, "step": 3426 }, { "epoch": 11.40099833610649, "loss": 0.5484092235565186, "loss_ce": 8.339980922755785e-06, "loss_iou": 0.20703125, "loss_num": 0.0269775390625, "loss_xval": 0.546875, "num_input_tokens_seen": 214785716, "step": 3426 }, { "epoch": 11.40432612312812, "grad_norm": 17.579544067382812, "learning_rate": 5e-06, "loss": 0.578, "num_input_tokens_seen": 214848448, "step": 3427 }, { "epoch": 11.40432612312812, "loss": 0.6235864162445068, "loss_ce": 0.0003259488439653069, "loss_iou": 0.2255859375, "loss_num": 0.034423828125, "loss_xval": 0.625, "num_input_tokens_seen": 214848448, "step": 3427 }, { "epoch": 11.407653910149751, "grad_norm": 18.205581665039062, "learning_rate": 5e-06, "loss": 0.6892, "num_input_tokens_seen": 214910400, "step": 3428 }, { "epoch": 11.407653910149751, "loss": 0.8579701781272888, "loss_ce": 0.0003041746676899493, "loss_iou": 0.3359375, "loss_num": 0.036865234375, "loss_xval": 0.859375, "num_input_tokens_seen": 214910400, "step": 3428 }, { "epoch": 11.410981697171382, "grad_norm": 13.816856384277344, "learning_rate": 5e-06, "loss": 0.5188, "num_input_tokens_seen": 214972252, "step": 3429 }, { "epoch": 11.410981697171382, "loss": 0.6876459717750549, "loss_ce": 0.00039011400076560676, "loss_iou": 0.244140625, "loss_num": 0.039794921875, "loss_xval": 0.6875, "num_input_tokens_seen": 214972252, "step": 3429 }, { "epoch": 11.414309484193012, "grad_norm": 18.94110679626465, "learning_rate": 5e-06, "loss": 0.5729, "num_input_tokens_seen": 215035068, "step": 3430 }, { "epoch": 11.414309484193012, "loss": 0.7335920333862305, "loss_ce": 0.0003156510938424617, "loss_iou": 0.28125, "loss_num": 0.0341796875, "loss_xval": 0.734375, "num_input_tokens_seen": 215035068, "step": 3430 }, { "epoch": 11.417637271214643, "grad_norm": 25.502443313598633, "learning_rate": 5e-06, "loss": 0.3445, "num_input_tokens_seen": 215097932, "step": 3431 }, { "epoch": 11.417637271214643, "loss": 0.4162612557411194, "loss_ce": 1.4885433756717248e-06, "loss_iou": 0.1826171875, "loss_num": 0.0101318359375, "loss_xval": 0.416015625, "num_input_tokens_seen": 215097932, "step": 3431 }, { "epoch": 11.420965058236273, "grad_norm": 29.48317527770996, "learning_rate": 5e-06, "loss": 0.4855, "num_input_tokens_seen": 215160352, "step": 3432 }, { "epoch": 11.420965058236273, "loss": 0.7080239057540894, "loss_ce": 8.076999620243441e-07, "loss_iou": 0.25, "loss_num": 0.041748046875, "loss_xval": 0.70703125, "num_input_tokens_seen": 215160352, "step": 3432 }, { "epoch": 11.424292845257904, "grad_norm": 9.498638153076172, "learning_rate": 5e-06, "loss": 0.6174, "num_input_tokens_seen": 215223756, "step": 3433 }, { "epoch": 11.424292845257904, "loss": 0.4360114336013794, "loss_ce": 0.00022042967611923814, "loss_iou": 0.14453125, "loss_num": 0.0294189453125, "loss_xval": 0.435546875, "num_input_tokens_seen": 215223756, "step": 3433 }, { "epoch": 11.427620632279535, "grad_norm": 10.60441780090332, "learning_rate": 5e-06, "loss": 0.3396, "num_input_tokens_seen": 215285216, "step": 3434 }, { "epoch": 11.427620632279535, "loss": 0.2794690728187561, "loss_ce": 1.9590961528592743e-05, "loss_iou": 0.0751953125, "loss_num": 0.0257568359375, "loss_xval": 0.279296875, "num_input_tokens_seen": 215285216, "step": 3434 }, { "epoch": 11.430948419301165, "grad_norm": 20.43962860107422, "learning_rate": 5e-06, "loss": 0.4254, "num_input_tokens_seen": 215347824, "step": 3435 }, { "epoch": 11.430948419301165, "loss": 0.43124955892562866, "loss_ce": 5.672713086823933e-06, "loss_iou": 0.1640625, "loss_num": 0.0206298828125, "loss_xval": 0.431640625, "num_input_tokens_seen": 215347824, "step": 3435 }, { "epoch": 11.434276206322796, "grad_norm": 19.176536560058594, "learning_rate": 5e-06, "loss": 0.6131, "num_input_tokens_seen": 215410540, "step": 3436 }, { "epoch": 11.434276206322796, "loss": 0.818851888179779, "loss_ce": 4.238702786096837e-06, "loss_iou": 0.306640625, "loss_num": 0.041015625, "loss_xval": 0.8203125, "num_input_tokens_seen": 215410540, "step": 3436 }, { "epoch": 11.437603993344426, "grad_norm": 12.163830757141113, "learning_rate": 5e-06, "loss": 0.4114, "num_input_tokens_seen": 215473512, "step": 3437 }, { "epoch": 11.437603993344426, "loss": 0.27012115716934204, "loss_ce": 4.060037463204935e-05, "loss_iou": 0.061279296875, "loss_num": 0.029541015625, "loss_xval": 0.26953125, "num_input_tokens_seen": 215473512, "step": 3437 }, { "epoch": 11.440931780366057, "grad_norm": 8.751675605773926, "learning_rate": 5e-06, "loss": 0.5299, "num_input_tokens_seen": 215536880, "step": 3438 }, { "epoch": 11.440931780366057, "loss": 0.5208498239517212, "loss_ce": 0.00022758070554118603, "loss_iou": 0.2041015625, "loss_num": 0.0225830078125, "loss_xval": 0.51953125, "num_input_tokens_seen": 215536880, "step": 3438 }, { "epoch": 11.444259567387688, "grad_norm": 9.835968971252441, "learning_rate": 5e-06, "loss": 0.3337, "num_input_tokens_seen": 215599652, "step": 3439 }, { "epoch": 11.444259567387688, "loss": 0.3313302993774414, "loss_ce": 9.384751251673151e-07, "loss_iou": 0.1181640625, "loss_num": 0.0189208984375, "loss_xval": 0.33203125, "num_input_tokens_seen": 215599652, "step": 3439 }, { "epoch": 11.447587354409318, "grad_norm": 10.25355339050293, "learning_rate": 5e-06, "loss": 0.5387, "num_input_tokens_seen": 215661768, "step": 3440 }, { "epoch": 11.447587354409318, "loss": 0.5723901987075806, "loss_ce": 2.4799787752272096e-06, "loss_iou": 0.2236328125, "loss_num": 0.02490234375, "loss_xval": 0.57421875, "num_input_tokens_seen": 215661768, "step": 3440 }, { "epoch": 11.450915141430949, "grad_norm": 8.601304054260254, "learning_rate": 5e-06, "loss": 0.3398, "num_input_tokens_seen": 215723140, "step": 3441 }, { "epoch": 11.450915141430949, "loss": 0.21281567215919495, "loss_ce": 1.3349157370612375e-06, "loss_iou": 0.049072265625, "loss_num": 0.02294921875, "loss_xval": 0.212890625, "num_input_tokens_seen": 215723140, "step": 3441 }, { "epoch": 11.45424292845258, "grad_norm": 16.138690948486328, "learning_rate": 5e-06, "loss": 0.8537, "num_input_tokens_seen": 215786968, "step": 3442 }, { "epoch": 11.45424292845258, "loss": 1.056840181350708, "loss_ce": 0.0005656966823153198, "loss_iou": 0.412109375, "loss_num": 0.04638671875, "loss_xval": 1.0546875, "num_input_tokens_seen": 215786968, "step": 3442 }, { "epoch": 11.45757071547421, "grad_norm": 17.24413299560547, "learning_rate": 5e-06, "loss": 0.5615, "num_input_tokens_seen": 215850536, "step": 3443 }, { "epoch": 11.45757071547421, "loss": 0.5545469522476196, "loss_ce": 0.0002638145233504474, "loss_iou": 0.2080078125, "loss_num": 0.02783203125, "loss_xval": 0.5546875, "num_input_tokens_seen": 215850536, "step": 3443 }, { "epoch": 11.46089850249584, "grad_norm": 18.573286056518555, "learning_rate": 5e-06, "loss": 0.462, "num_input_tokens_seen": 215913148, "step": 3444 }, { "epoch": 11.46089850249584, "loss": 0.6288305521011353, "loss_ce": 0.00035155288060195744, "loss_iou": 0.2255859375, "loss_num": 0.03564453125, "loss_xval": 0.62890625, "num_input_tokens_seen": 215913148, "step": 3444 }, { "epoch": 11.464226289517471, "grad_norm": 9.16270923614502, "learning_rate": 5e-06, "loss": 0.4667, "num_input_tokens_seen": 215976020, "step": 3445 }, { "epoch": 11.464226289517471, "loss": 0.5113560557365417, "loss_ce": 3.516503284117789e-06, "loss_iou": 0.1904296875, "loss_num": 0.0263671875, "loss_xval": 0.51171875, "num_input_tokens_seen": 215976020, "step": 3445 }, { "epoch": 11.467554076539102, "grad_norm": 18.042343139648438, "learning_rate": 5e-06, "loss": 0.6258, "num_input_tokens_seen": 216038952, "step": 3446 }, { "epoch": 11.467554076539102, "loss": 0.616923987865448, "loss_ce": 4.1647574107628316e-05, "loss_iou": 0.251953125, "loss_num": 0.023193359375, "loss_xval": 0.6171875, "num_input_tokens_seen": 216038952, "step": 3446 }, { "epoch": 11.470881863560733, "grad_norm": 26.65952491760254, "learning_rate": 5e-06, "loss": 0.6154, "num_input_tokens_seen": 216103360, "step": 3447 }, { "epoch": 11.470881863560733, "loss": 0.6269551515579224, "loss_ce": 2.0248467080818955e-06, "loss_iou": 0.228515625, "loss_num": 0.03369140625, "loss_xval": 0.625, "num_input_tokens_seen": 216103360, "step": 3447 }, { "epoch": 11.474209650582363, "grad_norm": 30.748014450073242, "learning_rate": 5e-06, "loss": 0.5254, "num_input_tokens_seen": 216165220, "step": 3448 }, { "epoch": 11.474209650582363, "loss": 0.33963072299957275, "loss_ce": 6.267359822231811e-07, "loss_iou": 0.10107421875, "loss_num": 0.02734375, "loss_xval": 0.33984375, "num_input_tokens_seen": 216165220, "step": 3448 }, { "epoch": 11.477537437603994, "grad_norm": 16.46575355529785, "learning_rate": 5e-06, "loss": 0.3932, "num_input_tokens_seen": 216227468, "step": 3449 }, { "epoch": 11.477537437603994, "loss": 0.5158704519271851, "loss_ce": 1.3444207525026286e-06, "loss_iou": 0.1669921875, "loss_num": 0.0361328125, "loss_xval": 0.515625, "num_input_tokens_seen": 216227468, "step": 3449 }, { "epoch": 11.480865224625624, "grad_norm": 12.867646217346191, "learning_rate": 5e-06, "loss": 0.6103, "num_input_tokens_seen": 216290252, "step": 3450 }, { "epoch": 11.480865224625624, "loss": 0.5968178510665894, "loss_ce": 0.00013816282444167882, "loss_iou": 0.2099609375, "loss_num": 0.035400390625, "loss_xval": 0.59765625, "num_input_tokens_seen": 216290252, "step": 3450 }, { "epoch": 11.484193011647255, "grad_norm": 19.748159408569336, "learning_rate": 5e-06, "loss": 0.5234, "num_input_tokens_seen": 216353944, "step": 3451 }, { "epoch": 11.484193011647255, "loss": 0.5722155570983887, "loss_ce": 0.0008043647976592183, "loss_iou": 0.1796875, "loss_num": 0.042236328125, "loss_xval": 0.5703125, "num_input_tokens_seen": 216353944, "step": 3451 }, { "epoch": 11.487520798668886, "grad_norm": 12.289689064025879, "learning_rate": 5e-06, "loss": 0.5057, "num_input_tokens_seen": 216416972, "step": 3452 }, { "epoch": 11.487520798668886, "loss": 0.40985777974128723, "loss_ce": 0.00038815996958874166, "loss_iou": 0.173828125, "loss_num": 0.01226806640625, "loss_xval": 0.41015625, "num_input_tokens_seen": 216416972, "step": 3452 }, { "epoch": 11.490848585690516, "grad_norm": 8.861686706542969, "learning_rate": 5e-06, "loss": 0.475, "num_input_tokens_seen": 216480008, "step": 3453 }, { "epoch": 11.490848585690516, "loss": 0.5598204135894775, "loss_ce": 0.00012804195284843445, "loss_iou": 0.212890625, "loss_num": 0.0269775390625, "loss_xval": 0.55859375, "num_input_tokens_seen": 216480008, "step": 3453 }, { "epoch": 11.494176372712147, "grad_norm": 17.1367130279541, "learning_rate": 5e-06, "loss": 0.5687, "num_input_tokens_seen": 216544644, "step": 3454 }, { "epoch": 11.494176372712147, "loss": 0.6093776822090149, "loss_ce": 2.6572747628961224e-06, "loss_iou": 0.255859375, "loss_num": 0.0196533203125, "loss_xval": 0.609375, "num_input_tokens_seen": 216544644, "step": 3454 }, { "epoch": 11.497504159733777, "grad_norm": 12.502127647399902, "learning_rate": 5e-06, "loss": 0.5035, "num_input_tokens_seen": 216607412, "step": 3455 }, { "epoch": 11.497504159733777, "loss": 0.3921874165534973, "loss_ce": 3.65586020052433e-05, "loss_iou": 0.126953125, "loss_num": 0.0277099609375, "loss_xval": 0.392578125, "num_input_tokens_seen": 216607412, "step": 3455 }, { "epoch": 11.500831946755408, "grad_norm": 11.693992614746094, "learning_rate": 5e-06, "loss": 0.4975, "num_input_tokens_seen": 216670968, "step": 3456 }, { "epoch": 11.500831946755408, "loss": 0.6839096546173096, "loss_ce": 0.0006211085710674524, "loss_iou": 0.26953125, "loss_num": 0.02880859375, "loss_xval": 0.68359375, "num_input_tokens_seen": 216670968, "step": 3456 }, { "epoch": 11.504159733777039, "grad_norm": 19.36003303527832, "learning_rate": 5e-06, "loss": 0.6733, "num_input_tokens_seen": 216734768, "step": 3457 }, { "epoch": 11.504159733777039, "loss": 0.6083134412765503, "loss_ce": 3.706954521476291e-05, "loss_iou": 0.2275390625, "loss_num": 0.0308837890625, "loss_xval": 0.609375, "num_input_tokens_seen": 216734768, "step": 3457 }, { "epoch": 11.50748752079867, "grad_norm": 22.45104217529297, "learning_rate": 5e-06, "loss": 0.4068, "num_input_tokens_seen": 216795516, "step": 3458 }, { "epoch": 11.50748752079867, "loss": 0.3477317690849304, "loss_ce": 0.00041123153641819954, "loss_iou": 0.12451171875, "loss_num": 0.0198974609375, "loss_xval": 0.34765625, "num_input_tokens_seen": 216795516, "step": 3458 }, { "epoch": 11.5108153078203, "grad_norm": 21.334108352661133, "learning_rate": 5e-06, "loss": 0.6646, "num_input_tokens_seen": 216858816, "step": 3459 }, { "epoch": 11.5108153078203, "loss": 0.4955092668533325, "loss_ce": 0.00014793846639804542, "loss_iou": 0.1748046875, "loss_num": 0.0291748046875, "loss_xval": 0.49609375, "num_input_tokens_seen": 216858816, "step": 3459 }, { "epoch": 11.51414309484193, "grad_norm": 8.968974113464355, "learning_rate": 5e-06, "loss": 0.3424, "num_input_tokens_seen": 216921696, "step": 3460 }, { "epoch": 11.51414309484193, "loss": 0.2936971187591553, "loss_ce": 2.6444669856573455e-05, "loss_iou": 0.08349609375, "loss_num": 0.0255126953125, "loss_xval": 0.29296875, "num_input_tokens_seen": 216921696, "step": 3460 }, { "epoch": 11.517470881863561, "grad_norm": 15.085412979125977, "learning_rate": 5e-06, "loss": 0.5257, "num_input_tokens_seen": 216985028, "step": 3461 }, { "epoch": 11.517470881863561, "loss": 0.48291176557540894, "loss_ce": 1.6170602066267747e-06, "loss_iou": 0.1884765625, "loss_num": 0.0213623046875, "loss_xval": 0.482421875, "num_input_tokens_seen": 216985028, "step": 3461 }, { "epoch": 11.520798668885192, "grad_norm": 19.322677612304688, "learning_rate": 5e-06, "loss": 0.5985, "num_input_tokens_seen": 217048284, "step": 3462 }, { "epoch": 11.520798668885192, "loss": 0.6103538274765015, "loss_ce": 2.3059981231199345e-06, "loss_iou": 0.234375, "loss_num": 0.028076171875, "loss_xval": 0.609375, "num_input_tokens_seen": 217048284, "step": 3462 }, { "epoch": 11.524126455906822, "grad_norm": 28.263582229614258, "learning_rate": 5e-06, "loss": 0.5806, "num_input_tokens_seen": 217111512, "step": 3463 }, { "epoch": 11.524126455906822, "loss": 0.7131632566452026, "loss_ce": 0.0013712530490010977, "loss_iou": 0.28125, "loss_num": 0.030029296875, "loss_xval": 0.7109375, "num_input_tokens_seen": 217111512, "step": 3463 }, { "epoch": 11.527454242928453, "grad_norm": 61.78733825683594, "learning_rate": 5e-06, "loss": 0.7584, "num_input_tokens_seen": 217175656, "step": 3464 }, { "epoch": 11.527454242928453, "loss": 0.7672163248062134, "loss_ce": 4.371685918158619e-06, "loss_iou": 0.310546875, "loss_num": 0.0294189453125, "loss_xval": 0.765625, "num_input_tokens_seen": 217175656, "step": 3464 }, { "epoch": 11.530782029950084, "grad_norm": 33.49160385131836, "learning_rate": 5e-06, "loss": 0.5733, "num_input_tokens_seen": 217238304, "step": 3465 }, { "epoch": 11.530782029950084, "loss": 0.6738321781158447, "loss_ce": 4.086127319169464e-06, "loss_iou": 0.26171875, "loss_num": 0.0299072265625, "loss_xval": 0.671875, "num_input_tokens_seen": 217238304, "step": 3465 }, { "epoch": 11.534109816971714, "grad_norm": 15.601361274719238, "learning_rate": 5e-06, "loss": 0.3998, "num_input_tokens_seen": 217301672, "step": 3466 }, { "epoch": 11.534109816971714, "loss": 0.3643876314163208, "loss_ce": 7.721272595517803e-06, "loss_iou": 0.1357421875, "loss_num": 0.0185546875, "loss_xval": 0.365234375, "num_input_tokens_seen": 217301672, "step": 3466 }, { "epoch": 11.537437603993345, "grad_norm": 14.162123680114746, "learning_rate": 5e-06, "loss": 0.4751, "num_input_tokens_seen": 217364596, "step": 3467 }, { "epoch": 11.537437603993345, "loss": 0.4647921323776245, "loss_ce": 0.00022302583965938538, "loss_iou": 0.2060546875, "loss_num": 0.01068115234375, "loss_xval": 0.46484375, "num_input_tokens_seen": 217364596, "step": 3467 }, { "epoch": 11.540765391014975, "grad_norm": 7.534528732299805, "learning_rate": 5e-06, "loss": 0.576, "num_input_tokens_seen": 217426864, "step": 3468 }, { "epoch": 11.540765391014975, "loss": 0.5969950556755066, "loss_ce": 1.0221011507383082e-05, "loss_iou": 0.2392578125, "loss_num": 0.0238037109375, "loss_xval": 0.59765625, "num_input_tokens_seen": 217426864, "step": 3468 }, { "epoch": 11.544093178036606, "grad_norm": 7.742190361022949, "learning_rate": 5e-06, "loss": 0.3304, "num_input_tokens_seen": 217486792, "step": 3469 }, { "epoch": 11.544093178036606, "loss": 0.28906363248825073, "loss_ce": 0.0001231908390764147, "loss_iou": 0.09814453125, "loss_num": 0.0186767578125, "loss_xval": 0.2890625, "num_input_tokens_seen": 217486792, "step": 3469 }, { "epoch": 11.547420965058237, "grad_norm": 9.111221313476562, "learning_rate": 5e-06, "loss": 0.3173, "num_input_tokens_seen": 217549368, "step": 3470 }, { "epoch": 11.547420965058237, "loss": 0.360109806060791, "loss_ce": 2.355821607125108e-06, "loss_iou": 0.11865234375, "loss_num": 0.0245361328125, "loss_xval": 0.359375, "num_input_tokens_seen": 217549368, "step": 3470 }, { "epoch": 11.550748752079867, "grad_norm": 9.266467094421387, "learning_rate": 5e-06, "loss": 0.5366, "num_input_tokens_seen": 217612616, "step": 3471 }, { "epoch": 11.550748752079867, "loss": 0.5578884482383728, "loss_ce": 2.7117877834825777e-05, "loss_iou": 0.1982421875, "loss_num": 0.031982421875, "loss_xval": 0.55859375, "num_input_tokens_seen": 217612616, "step": 3471 }, { "epoch": 11.554076539101498, "grad_norm": 23.567066192626953, "learning_rate": 5e-06, "loss": 0.3987, "num_input_tokens_seen": 217675852, "step": 3472 }, { "epoch": 11.554076539101498, "loss": 0.3721940219402313, "loss_ce": 1.6242804576904746e-06, "loss_iou": 0.1484375, "loss_num": 0.01531982421875, "loss_xval": 0.373046875, "num_input_tokens_seen": 217675852, "step": 3472 }, { "epoch": 11.557404326123129, "grad_norm": 27.93842124938965, "learning_rate": 5e-06, "loss": 0.5121, "num_input_tokens_seen": 217738944, "step": 3473 }, { "epoch": 11.557404326123129, "loss": 0.5508066415786743, "loss_ce": 0.0008798525086604059, "loss_iou": 0.2099609375, "loss_num": 0.0260009765625, "loss_xval": 0.55078125, "num_input_tokens_seen": 217738944, "step": 3473 }, { "epoch": 11.56073211314476, "grad_norm": 11.653983116149902, "learning_rate": 5e-06, "loss": 0.5181, "num_input_tokens_seen": 217803684, "step": 3474 }, { "epoch": 11.56073211314476, "loss": 0.6239655613899231, "loss_ce": 3.1423053314938443e-06, "loss_iou": 0.23828125, "loss_num": 0.0294189453125, "loss_xval": 0.625, "num_input_tokens_seen": 217803684, "step": 3474 }, { "epoch": 11.56405990016639, "grad_norm": 17.29817008972168, "learning_rate": 5e-06, "loss": 0.3498, "num_input_tokens_seen": 217866628, "step": 3475 }, { "epoch": 11.56405990016639, "loss": 0.3882454037666321, "loss_ce": 7.498421155105461e-07, "loss_iou": 0.1533203125, "loss_num": 0.0166015625, "loss_xval": 0.388671875, "num_input_tokens_seen": 217866628, "step": 3475 }, { "epoch": 11.56738768718802, "grad_norm": 22.488197326660156, "learning_rate": 5e-06, "loss": 0.4228, "num_input_tokens_seen": 217928952, "step": 3476 }, { "epoch": 11.56738768718802, "loss": 0.5019124746322632, "loss_ce": 0.0006917447317391634, "loss_iou": 0.166015625, "loss_num": 0.033935546875, "loss_xval": 0.5, "num_input_tokens_seen": 217928952, "step": 3476 }, { "epoch": 11.570715474209651, "grad_norm": 11.320959091186523, "learning_rate": 5e-06, "loss": 0.4266, "num_input_tokens_seen": 217990788, "step": 3477 }, { "epoch": 11.570715474209651, "loss": 0.4171241521835327, "loss_ce": 9.887447959044948e-06, "loss_iou": 0.12255859375, "loss_num": 0.034423828125, "loss_xval": 0.41796875, "num_input_tokens_seen": 217990788, "step": 3477 }, { "epoch": 11.574043261231282, "grad_norm": 9.35457706451416, "learning_rate": 5e-06, "loss": 0.4248, "num_input_tokens_seen": 218054476, "step": 3478 }, { "epoch": 11.574043261231282, "loss": 0.24473607540130615, "loss_ce": 1.5630474081262946e-05, "loss_iou": 0.08740234375, "loss_num": 0.013916015625, "loss_xval": 0.2451171875, "num_input_tokens_seen": 218054476, "step": 3478 }, { "epoch": 11.577371048252912, "grad_norm": 11.13575267791748, "learning_rate": 5e-06, "loss": 0.4505, "num_input_tokens_seen": 218117236, "step": 3479 }, { "epoch": 11.577371048252912, "loss": 0.6233565807342529, "loss_ce": 0.0017134789377450943, "loss_iou": 0.255859375, "loss_num": 0.0223388671875, "loss_xval": 0.62109375, "num_input_tokens_seen": 218117236, "step": 3479 }, { "epoch": 11.580698835274543, "grad_norm": 36.582454681396484, "learning_rate": 5e-06, "loss": 0.5941, "num_input_tokens_seen": 218181292, "step": 3480 }, { "epoch": 11.580698835274543, "loss": 0.45822417736053467, "loss_ce": 0.00015532842371612787, "loss_iou": 0.1904296875, "loss_num": 0.0152587890625, "loss_xval": 0.458984375, "num_input_tokens_seen": 218181292, "step": 3480 }, { "epoch": 11.584026622296173, "grad_norm": 9.784381866455078, "learning_rate": 5e-06, "loss": 0.4179, "num_input_tokens_seen": 218244748, "step": 3481 }, { "epoch": 11.584026622296173, "loss": 0.46349063515663147, "loss_ce": 0.0003558751486707479, "loss_iou": 0.1962890625, "loss_num": 0.01422119140625, "loss_xval": 0.462890625, "num_input_tokens_seen": 218244748, "step": 3481 }, { "epoch": 11.587354409317804, "grad_norm": 7.703891277313232, "learning_rate": 5e-06, "loss": 0.6976, "num_input_tokens_seen": 218306764, "step": 3482 }, { "epoch": 11.587354409317804, "loss": 0.48275673389434814, "loss_ce": 0.00033484402229078114, "loss_iou": 0.16015625, "loss_num": 0.032470703125, "loss_xval": 0.482421875, "num_input_tokens_seen": 218306764, "step": 3482 }, { "epoch": 11.590682196339435, "grad_norm": 6.9311604499816895, "learning_rate": 5e-06, "loss": 0.4781, "num_input_tokens_seen": 218370192, "step": 3483 }, { "epoch": 11.590682196339435, "loss": 0.4528202712535858, "loss_ce": 0.00042769996798597276, "loss_iou": 0.1669921875, "loss_num": 0.0238037109375, "loss_xval": 0.453125, "num_input_tokens_seen": 218370192, "step": 3483 }, { "epoch": 11.594009983361065, "grad_norm": 16.214101791381836, "learning_rate": 5e-06, "loss": 0.5062, "num_input_tokens_seen": 218432100, "step": 3484 }, { "epoch": 11.594009983361065, "loss": 0.4556869864463806, "loss_ce": 0.0003036742564290762, "loss_iou": 0.162109375, "loss_num": 0.0264892578125, "loss_xval": 0.455078125, "num_input_tokens_seen": 218432100, "step": 3484 }, { "epoch": 11.597337770382696, "grad_norm": 22.169532775878906, "learning_rate": 5e-06, "loss": 0.4908, "num_input_tokens_seen": 218494068, "step": 3485 }, { "epoch": 11.597337770382696, "loss": 0.5675075054168701, "loss_ce": 2.6459001674083993e-06, "loss_iou": 0.2197265625, "loss_num": 0.025634765625, "loss_xval": 0.56640625, "num_input_tokens_seen": 218494068, "step": 3485 }, { "epoch": 11.600665557404326, "grad_norm": 40.3531379699707, "learning_rate": 5e-06, "loss": 0.5566, "num_input_tokens_seen": 218557152, "step": 3486 }, { "epoch": 11.600665557404326, "loss": 0.36689504981040955, "loss_ce": 1.274495298275724e-05, "loss_iou": 0.10107421875, "loss_num": 0.03271484375, "loss_xval": 0.3671875, "num_input_tokens_seen": 218557152, "step": 3486 }, { "epoch": 11.603993344425957, "grad_norm": 23.778518676757812, "learning_rate": 5e-06, "loss": 0.3801, "num_input_tokens_seen": 218619668, "step": 3487 }, { "epoch": 11.603993344425957, "loss": 0.40087997913360596, "loss_ce": 1.051680101227248e-06, "loss_iou": 0.1669921875, "loss_num": 0.0135498046875, "loss_xval": 0.400390625, "num_input_tokens_seen": 218619668, "step": 3487 }, { "epoch": 11.607321131447588, "grad_norm": 10.52493667602539, "learning_rate": 5e-06, "loss": 0.5528, "num_input_tokens_seen": 218682584, "step": 3488 }, { "epoch": 11.607321131447588, "loss": 0.5113567113876343, "loss_ce": 4.162156983511522e-06, "loss_iou": 0.2158203125, "loss_num": 0.015869140625, "loss_xval": 0.51171875, "num_input_tokens_seen": 218682584, "step": 3488 }, { "epoch": 11.610648918469218, "grad_norm": 11.507619857788086, "learning_rate": 5e-06, "loss": 0.3107, "num_input_tokens_seen": 218745620, "step": 3489 }, { "epoch": 11.610648918469218, "loss": 0.24684518575668335, "loss_ce": 0.00044624408474192023, "loss_iou": 0.08447265625, "loss_num": 0.015625, "loss_xval": 0.24609375, "num_input_tokens_seen": 218745620, "step": 3489 }, { "epoch": 11.613976705490849, "grad_norm": 9.783194541931152, "learning_rate": 5e-06, "loss": 0.4939, "num_input_tokens_seen": 218807464, "step": 3490 }, { "epoch": 11.613976705490849, "loss": 0.5549371838569641, "loss_ce": 5.521426828636322e-06, "loss_iou": 0.2333984375, "loss_num": 0.0177001953125, "loss_xval": 0.5546875, "num_input_tokens_seen": 218807464, "step": 3490 }, { "epoch": 11.61730449251248, "grad_norm": 17.09255027770996, "learning_rate": 5e-06, "loss": 0.6676, "num_input_tokens_seen": 218873320, "step": 3491 }, { "epoch": 11.61730449251248, "loss": 0.6556416749954224, "loss_ce": 2.002896053454606e-06, "loss_iou": 0.240234375, "loss_num": 0.03515625, "loss_xval": 0.65625, "num_input_tokens_seen": 218873320, "step": 3491 }, { "epoch": 11.62063227953411, "grad_norm": 16.8724365234375, "learning_rate": 5e-06, "loss": 0.494, "num_input_tokens_seen": 218934480, "step": 3492 }, { "epoch": 11.62063227953411, "loss": 0.5572545528411865, "loss_ce": 3.5638290682982188e-06, "loss_iou": 0.2001953125, "loss_num": 0.031494140625, "loss_xval": 0.55859375, "num_input_tokens_seen": 218934480, "step": 3492 }, { "epoch": 11.62396006655574, "grad_norm": 8.903665542602539, "learning_rate": 5e-06, "loss": 0.612, "num_input_tokens_seen": 218998176, "step": 3493 }, { "epoch": 11.62396006655574, "loss": 0.7124406695365906, "loss_ce": 0.00016038533067330718, "loss_iou": 0.294921875, "loss_num": 0.0244140625, "loss_xval": 0.7109375, "num_input_tokens_seen": 218998176, "step": 3493 }, { "epoch": 11.627287853577371, "grad_norm": 21.76995849609375, "learning_rate": 5e-06, "loss": 0.5182, "num_input_tokens_seen": 219060700, "step": 3494 }, { "epoch": 11.627287853577371, "loss": 0.43091124296188354, "loss_ce": 3.064657448703656e-06, "loss_iou": 0.1552734375, "loss_num": 0.0240478515625, "loss_xval": 0.431640625, "num_input_tokens_seen": 219060700, "step": 3494 }, { "epoch": 11.630615640599002, "grad_norm": 18.293724060058594, "learning_rate": 5e-06, "loss": 0.5915, "num_input_tokens_seen": 219124972, "step": 3495 }, { "epoch": 11.630615640599002, "loss": 0.6171760559082031, "loss_ce": 0.0008430538000538945, "loss_iou": 0.21875, "loss_num": 0.03564453125, "loss_xval": 0.6171875, "num_input_tokens_seen": 219124972, "step": 3495 }, { "epoch": 11.633943427620633, "grad_norm": 15.716788291931152, "learning_rate": 5e-06, "loss": 0.4207, "num_input_tokens_seen": 219187120, "step": 3496 }, { "epoch": 11.633943427620633, "loss": 0.4183405339717865, "loss_ce": 5.576021067099646e-06, "loss_iou": 0.1484375, "loss_num": 0.024169921875, "loss_xval": 0.41796875, "num_input_tokens_seen": 219187120, "step": 3496 }, { "epoch": 11.637271214642263, "grad_norm": 21.618167877197266, "learning_rate": 5e-06, "loss": 0.7394, "num_input_tokens_seen": 219250080, "step": 3497 }, { "epoch": 11.637271214642263, "loss": 0.7108774781227112, "loss_ce": 1.003699821922055e-06, "loss_iou": 0.271484375, "loss_num": 0.033447265625, "loss_xval": 0.7109375, "num_input_tokens_seen": 219250080, "step": 3497 }, { "epoch": 11.640599001663894, "grad_norm": 9.311153411865234, "learning_rate": 5e-06, "loss": 0.5163, "num_input_tokens_seen": 219313108, "step": 3498 }, { "epoch": 11.640599001663894, "loss": 0.6300204992294312, "loss_ce": 0.001480484614148736, "loss_iou": 0.25390625, "loss_num": 0.023681640625, "loss_xval": 0.62890625, "num_input_tokens_seen": 219313108, "step": 3498 }, { "epoch": 11.643926788685524, "grad_norm": 7.875514030456543, "learning_rate": 5e-06, "loss": 0.4441, "num_input_tokens_seen": 219373852, "step": 3499 }, { "epoch": 11.643926788685524, "loss": 0.3963712453842163, "loss_ce": 8.956414603744633e-06, "loss_iou": 0.1318359375, "loss_num": 0.0264892578125, "loss_xval": 0.396484375, "num_input_tokens_seen": 219373852, "step": 3499 }, { "epoch": 11.647254575707155, "grad_norm": 12.24455451965332, "learning_rate": 5e-06, "loss": 0.4104, "num_input_tokens_seen": 219434188, "step": 3500 }, { "epoch": 11.647254575707155, "eval_seeclick_CIoU": 0.04145405255258083, "eval_seeclick_GIoU": 0.0433823186904192, "eval_seeclick_IoU": 0.1595434918999672, "eval_seeclick_MAE_all": 0.17026817798614502, "eval_seeclick_MAE_h": 0.05937519669532776, "eval_seeclick_MAE_w": 0.14418015256524086, "eval_seeclick_MAE_x_boxes": 0.20008638501167297, "eval_seeclick_MAE_y_boxes": 0.1773783266544342, "eval_seeclick_NUM_probability": 0.999977171421051, "eval_seeclick_inside_bbox": 0.17812500149011612, "eval_seeclick_loss": 2.9515390396118164, "eval_seeclick_loss_ce": 0.16706125438213348, "eval_seeclick_loss_iou": 0.968017578125, "eval_seeclick_loss_num": 0.17383575439453125, "eval_seeclick_loss_xval": 2.80615234375, "eval_seeclick_runtime": 67.36, "eval_seeclick_samples_per_second": 0.698, "eval_seeclick_steps_per_second": 0.03, "num_input_tokens_seen": 219434188, "step": 3500 }, { "epoch": 11.647254575707155, "eval_icons_CIoU": -0.046459888108074665, "eval_icons_GIoU": 0.05916312523186207, "eval_icons_IoU": 0.12030323967337608, "eval_icons_MAE_all": 0.1644785925745964, "eval_icons_MAE_h": 0.11820004135370255, "eval_icons_MAE_w": 0.174150962382555, "eval_icons_MAE_x_boxes": 0.1330418810248375, "eval_icons_MAE_y_boxes": 0.06877180561423302, "eval_icons_NUM_probability": 0.9999887943267822, "eval_icons_inside_bbox": 0.2760416716337204, "eval_icons_loss": 2.6837527751922607, "eval_icons_loss_ce": 3.067219154218037e-06, "eval_icons_loss_iou": 0.953125, "eval_icons_loss_num": 0.16757965087890625, "eval_icons_loss_xval": 2.74365234375, "eval_icons_runtime": 73.0438, "eval_icons_samples_per_second": 0.685, "eval_icons_steps_per_second": 0.027, "num_input_tokens_seen": 219434188, "step": 3500 }, { "epoch": 11.647254575707155, "eval_screenspot_CIoU": 0.17989537368218103, "eval_screenspot_GIoU": 0.2155372897783915, "eval_screenspot_IoU": 0.2897418936093648, "eval_screenspot_MAE_all": 0.11611270407835643, "eval_screenspot_MAE_h": 0.07238364592194557, "eval_screenspot_MAE_w": 0.09275218099355698, "eval_screenspot_MAE_x_boxes": 0.15620492895444235, "eval_screenspot_MAE_y_boxes": 0.08326464643081029, "eval_screenspot_NUM_probability": 0.9999940196673075, "eval_screenspot_inside_bbox": 0.5020833412806193, "eval_screenspot_loss": 2.196298599243164, "eval_screenspot_loss_ce": 2.1036333540299285e-05, "eval_screenspot_loss_iou": 0.8053385416666666, "eval_screenspot_loss_num": 0.1282806396484375, "eval_screenspot_loss_xval": 2.2513020833333335, "eval_screenspot_runtime": 122.4466, "eval_screenspot_samples_per_second": 0.727, "eval_screenspot_steps_per_second": 0.025, "num_input_tokens_seen": 219434188, "step": 3500 }, { "epoch": 11.647254575707155, "eval_compot_CIoU": 0.09882057458162308, "eval_compot_GIoU": 0.14507890492677689, "eval_compot_IoU": 0.23971349000930786, "eval_compot_MAE_all": 0.145487941801548, "eval_compot_MAE_h": 0.06269853748381138, "eval_compot_MAE_w": 0.17866067588329315, "eval_compot_MAE_x_boxes": 0.1263837218284607, "eval_compot_MAE_y_boxes": 0.10911508649587631, "eval_compot_NUM_probability": 0.9999947845935822, "eval_compot_inside_bbox": 0.4010416716337204, "eval_compot_loss": 2.4088857173919678, "eval_compot_loss_ce": 0.001836341805756092, "eval_compot_loss_iou": 0.8626708984375, "eval_compot_loss_num": 0.1470813751220703, "eval_compot_loss_xval": 2.4609375, "eval_compot_runtime": 72.0166, "eval_compot_samples_per_second": 0.694, "eval_compot_steps_per_second": 0.028, "num_input_tokens_seen": 219434188, "step": 3500 }, { "epoch": 11.647254575707155, "eval_custom_ui_MAE_all": 0.06419179029762745, "eval_custom_ui_MAE_x": 0.07677025347948074, "eval_custom_ui_MAE_y": 0.05161333829164505, "eval_custom_ui_NUM_probability": 0.9999985694885254, "eval_custom_ui_loss": 0.3060508668422699, "eval_custom_ui_loss_ce": 2.780430168058956e-05, "eval_custom_ui_loss_num": 0.063201904296875, "eval_custom_ui_loss_xval": 0.31622314453125, "eval_custom_ui_runtime": 54.3452, "eval_custom_ui_samples_per_second": 0.92, "eval_custom_ui_steps_per_second": 0.037, "num_input_tokens_seen": 219434188, "step": 3500 }, { "epoch": 11.647254575707155, "loss": 0.32729968428611755, "loss_ce": 2.917419988079928e-05, "loss_iou": 0.0, "loss_num": 0.0654296875, "loss_xval": 0.328125, "num_input_tokens_seen": 219434188, "step": 3500 }, { "epoch": 11.650582362728786, "grad_norm": 8.559126853942871, "learning_rate": 5e-06, "loss": 0.494, "num_input_tokens_seen": 219497068, "step": 3501 }, { "epoch": 11.650582362728786, "loss": 0.5286893844604492, "loss_ce": 2.8845001907029655e-06, "loss_iou": 0.1728515625, "loss_num": 0.036865234375, "loss_xval": 0.52734375, "num_input_tokens_seen": 219497068, "step": 3501 }, { "epoch": 11.653910149750416, "grad_norm": 25.051130294799805, "learning_rate": 5e-06, "loss": 0.4111, "num_input_tokens_seen": 219557688, "step": 3502 }, { "epoch": 11.653910149750416, "loss": 0.3852543830871582, "loss_ce": 4.5820033278687333e-07, "loss_iou": 0.13671875, "loss_num": 0.022216796875, "loss_xval": 0.384765625, "num_input_tokens_seen": 219557688, "step": 3502 }, { "epoch": 11.657237936772047, "grad_norm": 24.257904052734375, "learning_rate": 5e-06, "loss": 0.4207, "num_input_tokens_seen": 219620448, "step": 3503 }, { "epoch": 11.657237936772047, "loss": 0.29239925742149353, "loss_ce": 1.0357220162404701e-05, "loss_iou": 0.10546875, "loss_num": 0.0162353515625, "loss_xval": 0.29296875, "num_input_tokens_seen": 219620448, "step": 3503 }, { "epoch": 11.660565723793678, "grad_norm": 9.481152534484863, "learning_rate": 5e-06, "loss": 0.5903, "num_input_tokens_seen": 219683704, "step": 3504 }, { "epoch": 11.660565723793678, "loss": 0.7026574015617371, "loss_ce": 0.00038688615313731134, "loss_iou": 0.275390625, "loss_num": 0.0303955078125, "loss_xval": 0.703125, "num_input_tokens_seen": 219683704, "step": 3504 }, { "epoch": 11.663893510815308, "grad_norm": 15.99586009979248, "learning_rate": 5e-06, "loss": 0.5536, "num_input_tokens_seen": 219747248, "step": 3505 }, { "epoch": 11.663893510815308, "loss": 0.6690737009048462, "loss_ce": 6.293374099186622e-06, "loss_iou": 0.2392578125, "loss_num": 0.0380859375, "loss_xval": 0.66796875, "num_input_tokens_seen": 219747248, "step": 3505 }, { "epoch": 11.667221297836939, "grad_norm": 22.224111557006836, "learning_rate": 5e-06, "loss": 0.4178, "num_input_tokens_seen": 219809776, "step": 3506 }, { "epoch": 11.667221297836939, "loss": 0.3154001235961914, "loss_ce": 9.419070465810364e-07, "loss_iou": 0.11572265625, "loss_num": 0.0169677734375, "loss_xval": 0.314453125, "num_input_tokens_seen": 219809776, "step": 3506 }, { "epoch": 11.67054908485857, "grad_norm": 20.683507919311523, "learning_rate": 5e-06, "loss": 0.5667, "num_input_tokens_seen": 219873096, "step": 3507 }, { "epoch": 11.67054908485857, "loss": 0.6531944274902344, "loss_ce": 0.00011829871073132381, "loss_iou": 0.26953125, "loss_num": 0.0224609375, "loss_xval": 0.65234375, "num_input_tokens_seen": 219873096, "step": 3507 }, { "epoch": 11.6738768718802, "grad_norm": 11.971016883850098, "learning_rate": 5e-06, "loss": 0.4874, "num_input_tokens_seen": 219936168, "step": 3508 }, { "epoch": 11.6738768718802, "loss": 0.4031430184841156, "loss_ce": 5.837212484038901e-06, "loss_iou": 0.158203125, "loss_num": 0.017333984375, "loss_xval": 0.40234375, "num_input_tokens_seen": 219936168, "step": 3508 }, { "epoch": 11.67720465890183, "grad_norm": 10.524487495422363, "learning_rate": 5e-06, "loss": 0.4217, "num_input_tokens_seen": 219998476, "step": 3509 }, { "epoch": 11.67720465890183, "loss": 0.3714013695716858, "loss_ce": 2.4515852601325605e-06, "loss_iou": 0.142578125, "loss_num": 0.0172119140625, "loss_xval": 0.37109375, "num_input_tokens_seen": 219998476, "step": 3509 }, { "epoch": 11.680532445923461, "grad_norm": 12.585525512695312, "learning_rate": 5e-06, "loss": 0.4195, "num_input_tokens_seen": 220060212, "step": 3510 }, { "epoch": 11.680532445923461, "loss": 0.5460212230682373, "loss_ce": 7.372340178335435e-07, "loss_iou": 0.2255859375, "loss_num": 0.01904296875, "loss_xval": 0.546875, "num_input_tokens_seen": 220060212, "step": 3510 }, { "epoch": 11.683860232945092, "grad_norm": 22.08562660217285, "learning_rate": 5e-06, "loss": 0.4475, "num_input_tokens_seen": 220123868, "step": 3511 }, { "epoch": 11.683860232945092, "loss": 0.3176925778388977, "loss_ce": 0.0006149325054138899, "loss_iou": 0.125, "loss_num": 0.01348876953125, "loss_xval": 0.31640625, "num_input_tokens_seen": 220123868, "step": 3511 }, { "epoch": 11.687188019966722, "grad_norm": 29.639217376708984, "learning_rate": 5e-06, "loss": 0.6056, "num_input_tokens_seen": 220188252, "step": 3512 }, { "epoch": 11.687188019966722, "loss": 0.5878997445106506, "loss_ce": 9.093329026654828e-06, "loss_iou": 0.2255859375, "loss_num": 0.0272216796875, "loss_xval": 0.5859375, "num_input_tokens_seen": 220188252, "step": 3512 }, { "epoch": 11.690515806988353, "grad_norm": 54.26960372924805, "learning_rate": 5e-06, "loss": 0.6923, "num_input_tokens_seen": 220252608, "step": 3513 }, { "epoch": 11.690515806988353, "loss": 0.6254927515983582, "loss_ce": 4.485933004616527e-06, "loss_iou": 0.25390625, "loss_num": 0.0233154296875, "loss_xval": 0.625, "num_input_tokens_seen": 220252608, "step": 3513 }, { "epoch": 11.693843594009984, "grad_norm": 23.02007293701172, "learning_rate": 5e-06, "loss": 0.3535, "num_input_tokens_seen": 220314040, "step": 3514 }, { "epoch": 11.693843594009984, "loss": 0.3257761299610138, "loss_ce": 9.84004827842e-07, "loss_iou": 0.11474609375, "loss_num": 0.019287109375, "loss_xval": 0.326171875, "num_input_tokens_seen": 220314040, "step": 3514 }, { "epoch": 11.697171381031614, "grad_norm": 7.533168792724609, "learning_rate": 5e-06, "loss": 0.3344, "num_input_tokens_seen": 220376380, "step": 3515 }, { "epoch": 11.697171381031614, "loss": 0.48004746437072754, "loss_ce": 5.978110493742861e-06, "loss_iou": 0.1845703125, "loss_num": 0.022216796875, "loss_xval": 0.48046875, "num_input_tokens_seen": 220376380, "step": 3515 }, { "epoch": 11.700499168053245, "grad_norm": 10.153213500976562, "learning_rate": 5e-06, "loss": 0.5528, "num_input_tokens_seen": 220438960, "step": 3516 }, { "epoch": 11.700499168053245, "loss": 0.7763609290122986, "loss_ce": 0.00048201606841757894, "loss_iou": 0.28515625, "loss_num": 0.041259765625, "loss_xval": 0.77734375, "num_input_tokens_seen": 220438960, "step": 3516 }, { "epoch": 11.703826955074875, "grad_norm": 9.452091217041016, "learning_rate": 5e-06, "loss": 0.4467, "num_input_tokens_seen": 220502440, "step": 3517 }, { "epoch": 11.703826955074875, "loss": 0.4608350992202759, "loss_ce": 0.00026381408679299057, "loss_iou": 0.150390625, "loss_num": 0.031982421875, "loss_xval": 0.4609375, "num_input_tokens_seen": 220502440, "step": 3517 }, { "epoch": 11.707154742096506, "grad_norm": 22.010047912597656, "learning_rate": 5e-06, "loss": 0.5923, "num_input_tokens_seen": 220565868, "step": 3518 }, { "epoch": 11.707154742096506, "loss": 0.757597029209137, "loss_ce": 2.8664464480243623e-05, "loss_iou": 0.26953125, "loss_num": 0.043701171875, "loss_xval": 0.7578125, "num_input_tokens_seen": 220565868, "step": 3518 }, { "epoch": 11.710482529118137, "grad_norm": 15.119479179382324, "learning_rate": 5e-06, "loss": 0.386, "num_input_tokens_seen": 220629076, "step": 3519 }, { "epoch": 11.710482529118137, "loss": 0.388003408908844, "loss_ce": 2.9244661163829733e-06, "loss_iou": 0.11669921875, "loss_num": 0.0308837890625, "loss_xval": 0.388671875, "num_input_tokens_seen": 220629076, "step": 3519 }, { "epoch": 11.713810316139767, "grad_norm": 19.25171661376953, "learning_rate": 5e-06, "loss": 0.5061, "num_input_tokens_seen": 220693200, "step": 3520 }, { "epoch": 11.713810316139767, "loss": 0.5588202476501465, "loss_ce": 0.00010440753976581618, "loss_iou": 0.1865234375, "loss_num": 0.037353515625, "loss_xval": 0.55859375, "num_input_tokens_seen": 220693200, "step": 3520 }, { "epoch": 11.717138103161398, "grad_norm": 14.505496978759766, "learning_rate": 5e-06, "loss": 0.4169, "num_input_tokens_seen": 220754400, "step": 3521 }, { "epoch": 11.717138103161398, "loss": 0.522443413734436, "loss_ce": 0.0009743173723109066, "loss_iou": 0.205078125, "loss_num": 0.0224609375, "loss_xval": 0.51953125, "num_input_tokens_seen": 220754400, "step": 3521 }, { "epoch": 11.720465890183029, "grad_norm": 14.704300880432129, "learning_rate": 5e-06, "loss": 0.4551, "num_input_tokens_seen": 220817924, "step": 3522 }, { "epoch": 11.720465890183029, "loss": 0.39044666290283203, "loss_ce": 4.756139333039755e-06, "loss_iou": 0.146484375, "loss_num": 0.019775390625, "loss_xval": 0.390625, "num_input_tokens_seen": 220817924, "step": 3522 }, { "epoch": 11.72379367720466, "grad_norm": 9.576857566833496, "learning_rate": 5e-06, "loss": 0.4292, "num_input_tokens_seen": 220880076, "step": 3523 }, { "epoch": 11.72379367720466, "loss": 0.31898048520088196, "loss_ce": 1.076033731806092e-05, "loss_iou": 0.11865234375, "loss_num": 0.0162353515625, "loss_xval": 0.318359375, "num_input_tokens_seen": 220880076, "step": 3523 }, { "epoch": 11.72712146422629, "grad_norm": 15.378133773803711, "learning_rate": 5e-06, "loss": 0.5104, "num_input_tokens_seen": 220943500, "step": 3524 }, { "epoch": 11.72712146422629, "loss": 0.5056197047233582, "loss_ce": 4.479845756577561e-06, "loss_iou": 0.1875, "loss_num": 0.026123046875, "loss_xval": 0.50390625, "num_input_tokens_seen": 220943500, "step": 3524 }, { "epoch": 11.73044925124792, "grad_norm": 8.97268009185791, "learning_rate": 5e-06, "loss": 0.4028, "num_input_tokens_seen": 221006632, "step": 3525 }, { "epoch": 11.73044925124792, "loss": 0.37133973836898804, "loss_ce": 1.8418363652017433e-06, "loss_iou": 0.1611328125, "loss_num": 0.00958251953125, "loss_xval": 0.37109375, "num_input_tokens_seen": 221006632, "step": 3525 }, { "epoch": 11.733777038269551, "grad_norm": 26.393774032592773, "learning_rate": 5e-06, "loss": 0.6125, "num_input_tokens_seen": 221069900, "step": 3526 }, { "epoch": 11.733777038269551, "loss": 0.7500627636909485, "loss_ce": 1.724401499814121e-06, "loss_iou": 0.287109375, "loss_num": 0.035400390625, "loss_xval": 0.75, "num_input_tokens_seen": 221069900, "step": 3526 }, { "epoch": 11.737104825291182, "grad_norm": 24.31625747680664, "learning_rate": 5e-06, "loss": 0.5406, "num_input_tokens_seen": 221132332, "step": 3527 }, { "epoch": 11.737104825291182, "loss": 0.4568498730659485, "loss_ce": 1.7498539364169119e-06, "loss_iou": 0.1474609375, "loss_num": 0.032470703125, "loss_xval": 0.45703125, "num_input_tokens_seen": 221132332, "step": 3527 }, { "epoch": 11.740432612312812, "grad_norm": 30.655399322509766, "learning_rate": 5e-06, "loss": 0.5677, "num_input_tokens_seen": 221197808, "step": 3528 }, { "epoch": 11.740432612312812, "loss": 0.5651890635490417, "loss_ce": 3.50237860402558e-06, "loss_iou": 0.2333984375, "loss_num": 0.0194091796875, "loss_xval": 0.56640625, "num_input_tokens_seen": 221197808, "step": 3528 }, { "epoch": 11.743760399334443, "grad_norm": 28.278818130493164, "learning_rate": 5e-06, "loss": 0.5099, "num_input_tokens_seen": 221261236, "step": 3529 }, { "epoch": 11.743760399334443, "loss": 0.5142279863357544, "loss_ce": 6.818133442720864e-06, "loss_iou": 0.189453125, "loss_num": 0.0269775390625, "loss_xval": 0.515625, "num_input_tokens_seen": 221261236, "step": 3529 }, { "epoch": 11.747088186356073, "grad_norm": 13.968306541442871, "learning_rate": 5e-06, "loss": 0.4074, "num_input_tokens_seen": 221322948, "step": 3530 }, { "epoch": 11.747088186356073, "loss": 0.49926865100860596, "loss_ce": 1.076849912351463e-06, "loss_iou": 0.2060546875, "loss_num": 0.0174560546875, "loss_xval": 0.5, "num_input_tokens_seen": 221322948, "step": 3530 }, { "epoch": 11.750415973377704, "grad_norm": 8.058409690856934, "learning_rate": 5e-06, "loss": 0.5343, "num_input_tokens_seen": 221385800, "step": 3531 }, { "epoch": 11.750415973377704, "loss": 0.6283369064331055, "loss_ce": 4.0987237298395485e-05, "loss_iou": 0.236328125, "loss_num": 0.0308837890625, "loss_xval": 0.62890625, "num_input_tokens_seen": 221385800, "step": 3531 }, { "epoch": 11.753743760399335, "grad_norm": 16.460447311401367, "learning_rate": 5e-06, "loss": 0.5142, "num_input_tokens_seen": 221447588, "step": 3532 }, { "epoch": 11.753743760399335, "loss": 0.5996144413948059, "loss_ce": 5.06465403304901e-06, "loss_iou": 0.212890625, "loss_num": 0.03466796875, "loss_xval": 0.6015625, "num_input_tokens_seen": 221447588, "step": 3532 }, { "epoch": 11.757071547420965, "grad_norm": 14.226716041564941, "learning_rate": 5e-06, "loss": 0.3144, "num_input_tokens_seen": 221510088, "step": 3533 }, { "epoch": 11.757071547420965, "loss": 0.32690557837486267, "loss_ce": 1.2713733212876832e-06, "loss_iou": 0.1240234375, "loss_num": 0.0157470703125, "loss_xval": 0.326171875, "num_input_tokens_seen": 221510088, "step": 3533 }, { "epoch": 11.760399334442596, "grad_norm": 14.855547904968262, "learning_rate": 5e-06, "loss": 0.6099, "num_input_tokens_seen": 221571304, "step": 3534 }, { "epoch": 11.760399334442596, "loss": 0.6240004301071167, "loss_ce": 7.4598833634809125e-06, "loss_iou": 0.2373046875, "loss_num": 0.0299072265625, "loss_xval": 0.625, "num_input_tokens_seen": 221571304, "step": 3534 }, { "epoch": 11.763727121464226, "grad_norm": 18.697784423828125, "learning_rate": 5e-06, "loss": 0.5152, "num_input_tokens_seen": 221634616, "step": 3535 }, { "epoch": 11.763727121464226, "loss": 0.45215314626693726, "loss_ce": 4.717503998108441e-06, "loss_iou": 0.140625, "loss_num": 0.0341796875, "loss_xval": 0.453125, "num_input_tokens_seen": 221634616, "step": 3535 }, { "epoch": 11.767054908485857, "grad_norm": 13.55033016204834, "learning_rate": 5e-06, "loss": 0.8265, "num_input_tokens_seen": 221698976, "step": 3536 }, { "epoch": 11.767054908485857, "loss": 0.8255966901779175, "loss_ce": 0.0007065454265102744, "loss_iou": 0.365234375, "loss_num": 0.0189208984375, "loss_xval": 0.82421875, "num_input_tokens_seen": 221698976, "step": 3536 }, { "epoch": 11.770382695507488, "grad_norm": 18.96334457397461, "learning_rate": 5e-06, "loss": 0.4723, "num_input_tokens_seen": 221762432, "step": 3537 }, { "epoch": 11.770382695507488, "loss": 0.5790195465087891, "loss_ce": 0.0005894032074138522, "loss_iou": 0.2294921875, "loss_num": 0.02392578125, "loss_xval": 0.578125, "num_input_tokens_seen": 221762432, "step": 3537 }, { "epoch": 11.773710482529118, "grad_norm": 20.583208084106445, "learning_rate": 5e-06, "loss": 0.4629, "num_input_tokens_seen": 221825824, "step": 3538 }, { "epoch": 11.773710482529118, "loss": 0.6283589601516724, "loss_ce": 2.0155605398031184e-06, "loss_iou": 0.2373046875, "loss_num": 0.0308837890625, "loss_xval": 0.62890625, "num_input_tokens_seen": 221825824, "step": 3538 }, { "epoch": 11.777038269550749, "grad_norm": 20.015644073486328, "learning_rate": 5e-06, "loss": 0.3671, "num_input_tokens_seen": 221889072, "step": 3539 }, { "epoch": 11.777038269550749, "loss": 0.30737775564193726, "loss_ce": 4.7260937208193354e-06, "loss_iou": 0.126953125, "loss_num": 0.0107421875, "loss_xval": 0.306640625, "num_input_tokens_seen": 221889072, "step": 3539 }, { "epoch": 11.78036605657238, "grad_norm": 12.256757736206055, "learning_rate": 5e-06, "loss": 0.5804, "num_input_tokens_seen": 221951628, "step": 3540 }, { "epoch": 11.78036605657238, "loss": 0.6481873393058777, "loss_ce": 0.0007263936568051577, "loss_iou": 0.2734375, "loss_num": 0.0205078125, "loss_xval": 0.6484375, "num_input_tokens_seen": 221951628, "step": 3540 }, { "epoch": 11.78369384359401, "grad_norm": 12.399874687194824, "learning_rate": 5e-06, "loss": 0.3193, "num_input_tokens_seen": 222013704, "step": 3541 }, { "epoch": 11.78369384359401, "loss": 0.5067779421806335, "loss_ce": 3.0596293072449043e-06, "loss_iou": 0.197265625, "loss_num": 0.0223388671875, "loss_xval": 0.5078125, "num_input_tokens_seen": 222013704, "step": 3541 }, { "epoch": 11.78702163061564, "grad_norm": 15.797649383544922, "learning_rate": 5e-06, "loss": 0.4204, "num_input_tokens_seen": 222077244, "step": 3542 }, { "epoch": 11.78702163061564, "loss": 0.591759979724884, "loss_ce": 2.4108259822241962e-05, "loss_iou": 0.2451171875, "loss_num": 0.02001953125, "loss_xval": 0.58984375, "num_input_tokens_seen": 222077244, "step": 3542 }, { "epoch": 11.790349417637271, "grad_norm": 11.963024139404297, "learning_rate": 5e-06, "loss": 0.5347, "num_input_tokens_seen": 222140860, "step": 3543 }, { "epoch": 11.790349417637271, "loss": 0.5724523663520813, "loss_ce": 3.6299632029113127e-06, "loss_iou": 0.2236328125, "loss_num": 0.0250244140625, "loss_xval": 0.57421875, "num_input_tokens_seen": 222140860, "step": 3543 }, { "epoch": 11.793677204658902, "grad_norm": 12.224693298339844, "learning_rate": 5e-06, "loss": 0.3819, "num_input_tokens_seen": 222203716, "step": 3544 }, { "epoch": 11.793677204658902, "loss": 0.4454392194747925, "loss_ce": 0.0006607878603972495, "loss_iou": 0.173828125, "loss_num": 0.0194091796875, "loss_xval": 0.4453125, "num_input_tokens_seen": 222203716, "step": 3544 }, { "epoch": 11.797004991680533, "grad_norm": 9.761552810668945, "learning_rate": 5e-06, "loss": 0.3603, "num_input_tokens_seen": 222264992, "step": 3545 }, { "epoch": 11.797004991680533, "loss": 0.4728774428367615, "loss_ce": 0.0013198177330195904, "loss_iou": 0.154296875, "loss_num": 0.03271484375, "loss_xval": 0.470703125, "num_input_tokens_seen": 222264992, "step": 3545 }, { "epoch": 11.800332778702163, "grad_norm": 19.934093475341797, "learning_rate": 5e-06, "loss": 0.4654, "num_input_tokens_seen": 222328780, "step": 3546 }, { "epoch": 11.800332778702163, "loss": 0.5421138405799866, "loss_ce": 0.0006099700112827122, "loss_iou": 0.22265625, "loss_num": 0.019287109375, "loss_xval": 0.54296875, "num_input_tokens_seen": 222328780, "step": 3546 }, { "epoch": 11.803660565723794, "grad_norm": 29.226486206054688, "learning_rate": 5e-06, "loss": 0.4344, "num_input_tokens_seen": 222390816, "step": 3547 }, { "epoch": 11.803660565723794, "loss": 0.5584104657173157, "loss_ce": 6.0859048971906304e-05, "loss_iou": 0.2041015625, "loss_num": 0.030029296875, "loss_xval": 0.55859375, "num_input_tokens_seen": 222390816, "step": 3547 }, { "epoch": 11.806988352745424, "grad_norm": 17.3790225982666, "learning_rate": 5e-06, "loss": 0.3379, "num_input_tokens_seen": 222454260, "step": 3548 }, { "epoch": 11.806988352745424, "loss": 0.42244166135787964, "loss_ce": 7.838210876798257e-05, "loss_iou": 0.1640625, "loss_num": 0.0189208984375, "loss_xval": 0.421875, "num_input_tokens_seen": 222454260, "step": 3548 }, { "epoch": 11.810316139767055, "grad_norm": 14.205162048339844, "learning_rate": 5e-06, "loss": 0.4326, "num_input_tokens_seen": 222516688, "step": 3549 }, { "epoch": 11.810316139767055, "loss": 0.45849689841270447, "loss_ce": 7.894451528045465e-07, "loss_iou": 0.1669921875, "loss_num": 0.0247802734375, "loss_xval": 0.458984375, "num_input_tokens_seen": 222516688, "step": 3549 }, { "epoch": 11.813643926788686, "grad_norm": 7.465273380279541, "learning_rate": 5e-06, "loss": 0.3171, "num_input_tokens_seen": 222578480, "step": 3550 }, { "epoch": 11.813643926788686, "loss": 0.264527291059494, "loss_ce": 9.263141578230716e-07, "loss_iou": 0.1025390625, "loss_num": 0.01171875, "loss_xval": 0.263671875, "num_input_tokens_seen": 222578480, "step": 3550 }, { "epoch": 11.816971713810316, "grad_norm": 13.565558433532715, "learning_rate": 5e-06, "loss": 0.5928, "num_input_tokens_seen": 222641492, "step": 3551 }, { "epoch": 11.816971713810316, "loss": 0.6602325439453125, "loss_ce": 1.5281220839824528e-05, "loss_iou": 0.2421875, "loss_num": 0.03515625, "loss_xval": 0.66015625, "num_input_tokens_seen": 222641492, "step": 3551 }, { "epoch": 11.820299500831947, "grad_norm": 8.734349250793457, "learning_rate": 5e-06, "loss": 0.5912, "num_input_tokens_seen": 222704656, "step": 3552 }, { "epoch": 11.820299500831947, "loss": 0.5346720814704895, "loss_ce": 4.092928065801971e-06, "loss_iou": 0.197265625, "loss_num": 0.0281982421875, "loss_xval": 0.53515625, "num_input_tokens_seen": 222704656, "step": 3552 }, { "epoch": 11.823627287853578, "grad_norm": 20.29110336303711, "learning_rate": 5e-06, "loss": 0.3718, "num_input_tokens_seen": 222766600, "step": 3553 }, { "epoch": 11.823627287853578, "loss": 0.3203747570514679, "loss_ce": 1.2150272823419073e-06, "loss_iou": 0.1337890625, "loss_num": 0.0106201171875, "loss_xval": 0.3203125, "num_input_tokens_seen": 222766600, "step": 3553 }, { "epoch": 11.826955074875208, "grad_norm": 19.782983779907227, "learning_rate": 5e-06, "loss": 0.3955, "num_input_tokens_seen": 222828816, "step": 3554 }, { "epoch": 11.826955074875208, "loss": 0.3985621929168701, "loss_ce": 2.6472780518815853e-06, "loss_iou": 0.125, "loss_num": 0.0296630859375, "loss_xval": 0.3984375, "num_input_tokens_seen": 222828816, "step": 3554 }, { "epoch": 11.830282861896839, "grad_norm": 8.067495346069336, "learning_rate": 5e-06, "loss": 0.37, "num_input_tokens_seen": 222890924, "step": 3555 }, { "epoch": 11.830282861896839, "loss": 0.3916335105895996, "loss_ce": 1.415005272065173e-06, "loss_iou": 0.1494140625, "loss_num": 0.0186767578125, "loss_xval": 0.392578125, "num_input_tokens_seen": 222890924, "step": 3555 }, { "epoch": 11.83361064891847, "grad_norm": 14.176030158996582, "learning_rate": 5e-06, "loss": 0.5956, "num_input_tokens_seen": 222953988, "step": 3556 }, { "epoch": 11.83361064891847, "loss": 0.5911171436309814, "loss_ce": 0.00011374703899491578, "loss_iou": 0.236328125, "loss_num": 0.0238037109375, "loss_xval": 0.58984375, "num_input_tokens_seen": 222953988, "step": 3556 }, { "epoch": 11.8369384359401, "grad_norm": 39.198787689208984, "learning_rate": 5e-06, "loss": 0.6789, "num_input_tokens_seen": 223016716, "step": 3557 }, { "epoch": 11.8369384359401, "loss": 0.7756354808807373, "loss_ce": 6.940294383639412e-07, "loss_iou": 0.333984375, "loss_num": 0.02197265625, "loss_xval": 0.77734375, "num_input_tokens_seen": 223016716, "step": 3557 }, { "epoch": 11.84026622296173, "grad_norm": 23.616535186767578, "learning_rate": 5e-06, "loss": 0.4856, "num_input_tokens_seen": 223080056, "step": 3558 }, { "epoch": 11.84026622296173, "loss": 0.31555697321891785, "loss_ce": 6.62723250570707e-05, "loss_iou": 0.140625, "loss_num": 0.0068359375, "loss_xval": 0.31640625, "num_input_tokens_seen": 223080056, "step": 3558 }, { "epoch": 11.843594009983361, "grad_norm": 22.97985076904297, "learning_rate": 5e-06, "loss": 0.7535, "num_input_tokens_seen": 223142784, "step": 3559 }, { "epoch": 11.843594009983361, "loss": 0.8857477903366089, "loss_ce": 5.66840753890574e-06, "loss_iou": 0.35546875, "loss_num": 0.03515625, "loss_xval": 0.88671875, "num_input_tokens_seen": 223142784, "step": 3559 }, { "epoch": 11.846921797004992, "grad_norm": 39.94523239135742, "learning_rate": 5e-06, "loss": 0.5018, "num_input_tokens_seen": 223205220, "step": 3560 }, { "epoch": 11.846921797004992, "loss": 0.36049434542655945, "loss_ce": 0.0003869244537781924, "loss_iou": 0.1484375, "loss_num": 0.0126953125, "loss_xval": 0.359375, "num_input_tokens_seen": 223205220, "step": 3560 }, { "epoch": 11.850249584026622, "grad_norm": 37.986602783203125, "learning_rate": 5e-06, "loss": 0.4589, "num_input_tokens_seen": 223267984, "step": 3561 }, { "epoch": 11.850249584026622, "loss": 0.38489872217178345, "loss_ce": 0.0010486195096746087, "loss_iou": 0.14453125, "loss_num": 0.0186767578125, "loss_xval": 0.384765625, "num_input_tokens_seen": 223267984, "step": 3561 }, { "epoch": 11.853577371048253, "grad_norm": 41.77482986450195, "learning_rate": 5e-06, "loss": 0.6053, "num_input_tokens_seen": 223331520, "step": 3562 }, { "epoch": 11.853577371048253, "loss": 0.7701247930526733, "loss_ce": 0.0007156134815886617, "loss_iou": 0.31640625, "loss_num": 0.0274658203125, "loss_xval": 0.76953125, "num_input_tokens_seen": 223331520, "step": 3562 }, { "epoch": 11.856905158069884, "grad_norm": 23.879619598388672, "learning_rate": 5e-06, "loss": 0.4491, "num_input_tokens_seen": 223393012, "step": 3563 }, { "epoch": 11.856905158069884, "loss": 0.6469746828079224, "loss_ce": 2.043438144028187e-06, "loss_iou": 0.28125, "loss_num": 0.0169677734375, "loss_xval": 0.6484375, "num_input_tokens_seen": 223393012, "step": 3563 }, { "epoch": 11.860232945091514, "grad_norm": 15.75499439239502, "learning_rate": 5e-06, "loss": 0.5882, "num_input_tokens_seen": 223456072, "step": 3564 }, { "epoch": 11.860232945091514, "loss": 0.6172223687171936, "loss_ce": 3.488008587737568e-05, "loss_iou": 0.248046875, "loss_num": 0.024169921875, "loss_xval": 0.6171875, "num_input_tokens_seen": 223456072, "step": 3564 }, { "epoch": 11.863560732113145, "grad_norm": 39.80231475830078, "learning_rate": 5e-06, "loss": 0.5506, "num_input_tokens_seen": 223519292, "step": 3565 }, { "epoch": 11.863560732113145, "loss": 0.5271011590957642, "loss_ce": 1.5451578292413615e-06, "loss_iou": 0.20703125, "loss_num": 0.0224609375, "loss_xval": 0.52734375, "num_input_tokens_seen": 223519292, "step": 3565 }, { "epoch": 11.866888519134775, "grad_norm": 41.10429382324219, "learning_rate": 5e-06, "loss": 0.5101, "num_input_tokens_seen": 223582684, "step": 3566 }, { "epoch": 11.866888519134775, "loss": 0.6300239562988281, "loss_ce": 1.9095623429166153e-05, "loss_iou": 0.2578125, "loss_num": 0.022705078125, "loss_xval": 0.62890625, "num_input_tokens_seen": 223582684, "step": 3566 }, { "epoch": 11.870216306156406, "grad_norm": 34.14036178588867, "learning_rate": 5e-06, "loss": 0.5805, "num_input_tokens_seen": 223644524, "step": 3567 }, { "epoch": 11.870216306156406, "loss": 0.7253423929214478, "loss_ce": 5.721730644836498e-07, "loss_iou": 0.32421875, "loss_num": 0.01507568359375, "loss_xval": 0.7265625, "num_input_tokens_seen": 223644524, "step": 3567 }, { "epoch": 11.873544093178037, "grad_norm": 20.468687057495117, "learning_rate": 5e-06, "loss": 0.4402, "num_input_tokens_seen": 223707012, "step": 3568 }, { "epoch": 11.873544093178037, "loss": 0.46942704916000366, "loss_ce": 0.0005854758201166987, "loss_iou": 0.169921875, "loss_num": 0.025634765625, "loss_xval": 0.46875, "num_input_tokens_seen": 223707012, "step": 3568 }, { "epoch": 11.876871880199667, "grad_norm": 12.284894943237305, "learning_rate": 5e-06, "loss": 0.5368, "num_input_tokens_seen": 223770632, "step": 3569 }, { "epoch": 11.876871880199667, "loss": 0.5924705862998962, "loss_ce": 2.352222963963868e-06, "loss_iou": 0.244140625, "loss_num": 0.0208740234375, "loss_xval": 0.59375, "num_input_tokens_seen": 223770632, "step": 3569 }, { "epoch": 11.880199667221298, "grad_norm": 12.67453384399414, "learning_rate": 5e-06, "loss": 0.6036, "num_input_tokens_seen": 223833456, "step": 3570 }, { "epoch": 11.880199667221298, "loss": 0.6144264936447144, "loss_ce": 0.00016871416301000863, "loss_iou": 0.244140625, "loss_num": 0.0252685546875, "loss_xval": 0.61328125, "num_input_tokens_seen": 223833456, "step": 3570 }, { "epoch": 11.883527454242929, "grad_norm": 19.113454818725586, "learning_rate": 5e-06, "loss": 0.4811, "num_input_tokens_seen": 223896228, "step": 3571 }, { "epoch": 11.883527454242929, "loss": 0.5026337504386902, "loss_ce": 9.230750947608612e-06, "loss_iou": 0.2119140625, "loss_num": 0.015869140625, "loss_xval": 0.50390625, "num_input_tokens_seen": 223896228, "step": 3571 }, { "epoch": 11.88685524126456, "grad_norm": 14.744512557983398, "learning_rate": 5e-06, "loss": 0.4439, "num_input_tokens_seen": 223958908, "step": 3572 }, { "epoch": 11.88685524126456, "loss": 0.5912655591964722, "loss_ce": 1.7980717530008405e-05, "loss_iou": 0.2021484375, "loss_num": 0.03759765625, "loss_xval": 0.58984375, "num_input_tokens_seen": 223958908, "step": 3572 }, { "epoch": 11.89018302828619, "grad_norm": 13.608325958251953, "learning_rate": 5e-06, "loss": 0.5123, "num_input_tokens_seen": 224019752, "step": 3573 }, { "epoch": 11.89018302828619, "loss": 0.6452793478965759, "loss_ce": 4.5912787527413457e-07, "loss_iou": 0.228515625, "loss_num": 0.03759765625, "loss_xval": 0.64453125, "num_input_tokens_seen": 224019752, "step": 3573 }, { "epoch": 11.89351081530782, "grad_norm": 31.046213150024414, "learning_rate": 5e-06, "loss": 0.6426, "num_input_tokens_seen": 224083392, "step": 3574 }, { "epoch": 11.89351081530782, "loss": 0.6525874137878418, "loss_ce": 0.00012159140169387683, "loss_iou": 0.259765625, "loss_num": 0.026611328125, "loss_xval": 0.65234375, "num_input_tokens_seen": 224083392, "step": 3574 }, { "epoch": 11.896838602329451, "grad_norm": 28.231470108032227, "learning_rate": 5e-06, "loss": 0.5917, "num_input_tokens_seen": 224146784, "step": 3575 }, { "epoch": 11.896838602329451, "loss": 0.34351760149002075, "loss_ce": 1.175187026092317e-05, "loss_iou": 0.1171875, "loss_num": 0.0218505859375, "loss_xval": 0.34375, "num_input_tokens_seen": 224146784, "step": 3575 }, { "epoch": 11.900166389351082, "grad_norm": 9.482150077819824, "learning_rate": 5e-06, "loss": 0.3357, "num_input_tokens_seen": 224208384, "step": 3576 }, { "epoch": 11.900166389351082, "loss": 0.45229771733283997, "loss_ce": 0.00039341123192571104, "loss_iou": 0.087890625, "loss_num": 0.055419921875, "loss_xval": 0.451171875, "num_input_tokens_seen": 224208384, "step": 3576 }, { "epoch": 11.903494176372712, "grad_norm": 9.780954360961914, "learning_rate": 5e-06, "loss": 0.5876, "num_input_tokens_seen": 224271944, "step": 3577 }, { "epoch": 11.903494176372712, "loss": 0.7033237814903259, "loss_ce": 7.668440957786515e-05, "loss_iou": 0.263671875, "loss_num": 0.03515625, "loss_xval": 0.703125, "num_input_tokens_seen": 224271944, "step": 3577 }, { "epoch": 11.906821963394343, "grad_norm": 10.047873497009277, "learning_rate": 5e-06, "loss": 0.4701, "num_input_tokens_seen": 224334108, "step": 3578 }, { "epoch": 11.906821963394343, "loss": 0.3383442759513855, "loss_ce": 0.00011794007878052071, "loss_iou": 0.1181640625, "loss_num": 0.0205078125, "loss_xval": 0.337890625, "num_input_tokens_seen": 224334108, "step": 3578 }, { "epoch": 11.910149750415973, "grad_norm": 15.045981407165527, "learning_rate": 5e-06, "loss": 0.6061, "num_input_tokens_seen": 224396484, "step": 3579 }, { "epoch": 11.910149750415973, "loss": 0.5881359577178955, "loss_ce": 1.1534530131029896e-06, "loss_iou": 0.2177734375, "loss_num": 0.030517578125, "loss_xval": 0.58984375, "num_input_tokens_seen": 224396484, "step": 3579 }, { "epoch": 11.913477537437604, "grad_norm": 7.511158466339111, "learning_rate": 5e-06, "loss": 0.5496, "num_input_tokens_seen": 224458068, "step": 3580 }, { "epoch": 11.913477537437604, "loss": 0.5705637335777283, "loss_ce": 9.86446175375022e-05, "loss_iou": 0.1796875, "loss_num": 0.042236328125, "loss_xval": 0.5703125, "num_input_tokens_seen": 224458068, "step": 3580 }, { "epoch": 11.916805324459235, "grad_norm": 8.993900299072266, "learning_rate": 5e-06, "loss": 0.3592, "num_input_tokens_seen": 224518932, "step": 3581 }, { "epoch": 11.916805324459235, "loss": 0.25341227650642395, "loss_ce": 0.0006656988989561796, "loss_iou": 0.07421875, "loss_num": 0.020751953125, "loss_xval": 0.251953125, "num_input_tokens_seen": 224518932, "step": 3581 }, { "epoch": 11.920133111480865, "grad_norm": 12.807104110717773, "learning_rate": 5e-06, "loss": 0.545, "num_input_tokens_seen": 224582440, "step": 3582 }, { "epoch": 11.920133111480865, "loss": 0.6594873070716858, "loss_ce": 0.00018555697170086205, "loss_iou": 0.1923828125, "loss_num": 0.05517578125, "loss_xval": 0.66015625, "num_input_tokens_seen": 224582440, "step": 3582 }, { "epoch": 11.923460898502496, "grad_norm": 8.517556190490723, "learning_rate": 5e-06, "loss": 0.4585, "num_input_tokens_seen": 224644836, "step": 3583 }, { "epoch": 11.923460898502496, "loss": 0.4253217577934265, "loss_ce": 0.0003339825489092618, "loss_iou": 0.10791015625, "loss_num": 0.041748046875, "loss_xval": 0.42578125, "num_input_tokens_seen": 224644836, "step": 3583 }, { "epoch": 11.926788685524127, "grad_norm": 14.915726661682129, "learning_rate": 5e-06, "loss": 0.3358, "num_input_tokens_seen": 224705464, "step": 3584 }, { "epoch": 11.926788685524127, "loss": 0.4076545834541321, "loss_ce": 0.0004280407156329602, "loss_iou": 0.1298828125, "loss_num": 0.029541015625, "loss_xval": 0.40625, "num_input_tokens_seen": 224705464, "step": 3584 }, { "epoch": 11.930116472545757, "grad_norm": 24.456527709960938, "learning_rate": 5e-06, "loss": 0.7183, "num_input_tokens_seen": 224769048, "step": 3585 }, { "epoch": 11.930116472545757, "loss": 0.5177053213119507, "loss_ce": 5.151061031938298e-06, "loss_iou": 0.1982421875, "loss_num": 0.0240478515625, "loss_xval": 0.51953125, "num_input_tokens_seen": 224769048, "step": 3585 }, { "epoch": 11.933444259567388, "grad_norm": 6.406157493591309, "learning_rate": 5e-06, "loss": 0.4977, "num_input_tokens_seen": 224830764, "step": 3586 }, { "epoch": 11.933444259567388, "loss": 0.5045808553695679, "loss_ce": 3.221236511308234e-06, "loss_iou": 0.2001953125, "loss_num": 0.0208740234375, "loss_xval": 0.50390625, "num_input_tokens_seen": 224830764, "step": 3586 }, { "epoch": 11.936772046589018, "grad_norm": 14.206915855407715, "learning_rate": 5e-06, "loss": 0.4819, "num_input_tokens_seen": 224892900, "step": 3587 }, { "epoch": 11.936772046589018, "loss": 0.46016210317611694, "loss_ce": 0.0002011738979490474, "loss_iou": 0.15625, "loss_num": 0.029296875, "loss_xval": 0.4609375, "num_input_tokens_seen": 224892900, "step": 3587 }, { "epoch": 11.940099833610649, "grad_norm": 4.954067230224609, "learning_rate": 5e-06, "loss": 0.412, "num_input_tokens_seen": 224954508, "step": 3588 }, { "epoch": 11.940099833610649, "loss": 0.416653037071228, "loss_ce": 0.00014914048369973898, "loss_iou": 0.1591796875, "loss_num": 0.0196533203125, "loss_xval": 0.416015625, "num_input_tokens_seen": 224954508, "step": 3588 }, { "epoch": 11.94342762063228, "grad_norm": 18.628999710083008, "learning_rate": 5e-06, "loss": 0.4154, "num_input_tokens_seen": 225017936, "step": 3589 }, { "epoch": 11.94342762063228, "loss": 0.3900908827781677, "loss_ce": 7.576824827992823e-06, "loss_iou": 0.119140625, "loss_num": 0.0302734375, "loss_xval": 0.390625, "num_input_tokens_seen": 225017936, "step": 3589 }, { "epoch": 11.94675540765391, "grad_norm": 21.633724212646484, "learning_rate": 5e-06, "loss": 0.3575, "num_input_tokens_seen": 225079532, "step": 3590 }, { "epoch": 11.94675540765391, "loss": 0.22882996499538422, "loss_ce": 9.166803465632256e-06, "loss_iou": 0.06494140625, "loss_num": 0.0198974609375, "loss_xval": 0.228515625, "num_input_tokens_seen": 225079532, "step": 3590 }, { "epoch": 11.95008319467554, "grad_norm": 11.669150352478027, "learning_rate": 5e-06, "loss": 0.5058, "num_input_tokens_seen": 225142328, "step": 3591 }, { "epoch": 11.95008319467554, "loss": 0.5285732746124268, "loss_ce": 8.867129508871585e-06, "loss_iou": 0.20703125, "loss_num": 0.022705078125, "loss_xval": 0.52734375, "num_input_tokens_seen": 225142328, "step": 3591 }, { "epoch": 11.953410981697171, "grad_norm": 11.280741691589355, "learning_rate": 5e-06, "loss": 0.3391, "num_input_tokens_seen": 225204812, "step": 3592 }, { "epoch": 11.953410981697171, "loss": 0.3194628655910492, "loss_ce": 4.8533706831221934e-06, "loss_iou": 0.09814453125, "loss_num": 0.0245361328125, "loss_xval": 0.3203125, "num_input_tokens_seen": 225204812, "step": 3592 }, { "epoch": 11.956738768718802, "grad_norm": 16.818256378173828, "learning_rate": 5e-06, "loss": 0.6285, "num_input_tokens_seen": 225267104, "step": 3593 }, { "epoch": 11.956738768718802, "loss": 0.5338151454925537, "loss_ce": 1.6487189213876263e-06, "loss_iou": 0.208984375, "loss_num": 0.023193359375, "loss_xval": 0.53515625, "num_input_tokens_seen": 225267104, "step": 3593 }, { "epoch": 11.960066555740433, "grad_norm": 21.73577880859375, "learning_rate": 5e-06, "loss": 0.5644, "num_input_tokens_seen": 225330436, "step": 3594 }, { "epoch": 11.960066555740433, "loss": 0.5425610542297363, "loss_ce": 0.00020265298371668905, "loss_iou": 0.2021484375, "loss_num": 0.0279541015625, "loss_xval": 0.54296875, "num_input_tokens_seen": 225330436, "step": 3594 }, { "epoch": 11.963394342762063, "grad_norm": 12.222944259643555, "learning_rate": 5e-06, "loss": 0.4721, "num_input_tokens_seen": 225393232, "step": 3595 }, { "epoch": 11.963394342762063, "loss": 0.6120119690895081, "loss_ce": 7.352505053859204e-05, "loss_iou": 0.2451171875, "loss_num": 0.0245361328125, "loss_xval": 0.61328125, "num_input_tokens_seen": 225393232, "step": 3595 }, { "epoch": 11.966722129783694, "grad_norm": 8.467930793762207, "learning_rate": 5e-06, "loss": 0.5068, "num_input_tokens_seen": 225456236, "step": 3596 }, { "epoch": 11.966722129783694, "loss": 0.6729846596717834, "loss_ce": 1.0975919394695666e-05, "loss_iou": 0.26171875, "loss_num": 0.030029296875, "loss_xval": 0.671875, "num_input_tokens_seen": 225456236, "step": 3596 }, { "epoch": 11.970049916805324, "grad_norm": 10.18317985534668, "learning_rate": 5e-06, "loss": 0.3878, "num_input_tokens_seen": 225518696, "step": 3597 }, { "epoch": 11.970049916805324, "loss": 0.24841386079788208, "loss_ce": 7.718440429016482e-07, "loss_iou": 0.08984375, "loss_num": 0.01385498046875, "loss_xval": 0.248046875, "num_input_tokens_seen": 225518696, "step": 3597 }, { "epoch": 11.973377703826955, "grad_norm": 10.094636917114258, "learning_rate": 5e-06, "loss": 0.5594, "num_input_tokens_seen": 225580988, "step": 3598 }, { "epoch": 11.973377703826955, "loss": 0.704042911529541, "loss_ce": 0.0006737210205756128, "loss_iou": 0.28125, "loss_num": 0.0279541015625, "loss_xval": 0.703125, "num_input_tokens_seen": 225580988, "step": 3598 }, { "epoch": 11.976705490848586, "grad_norm": 13.175618171691895, "learning_rate": 5e-06, "loss": 0.4872, "num_input_tokens_seen": 225641496, "step": 3599 }, { "epoch": 11.976705490848586, "loss": 0.5009208917617798, "loss_ce": 5.319694537320174e-06, "loss_iou": 0.1728515625, "loss_num": 0.03125, "loss_xval": 0.5, "num_input_tokens_seen": 225641496, "step": 3599 }, { "epoch": 11.980033277870216, "grad_norm": 15.480469703674316, "learning_rate": 5e-06, "loss": 0.596, "num_input_tokens_seen": 225704276, "step": 3600 }, { "epoch": 11.980033277870216, "loss": 0.8280503153800964, "loss_ce": 0.0004136155766900629, "loss_iou": 0.294921875, "loss_num": 0.04736328125, "loss_xval": 0.828125, "num_input_tokens_seen": 225704276, "step": 3600 }, { "epoch": 11.983361064891847, "grad_norm": 15.52588939666748, "learning_rate": 5e-06, "loss": 0.3801, "num_input_tokens_seen": 225767888, "step": 3601 }, { "epoch": 11.983361064891847, "loss": 0.3965487480163574, "loss_ce": 3.333892209411715e-06, "loss_iou": 0.1708984375, "loss_num": 0.01104736328125, "loss_xval": 0.396484375, "num_input_tokens_seen": 225767888, "step": 3601 }, { "epoch": 11.986688851913478, "grad_norm": 12.219480514526367, "learning_rate": 5e-06, "loss": 0.6122, "num_input_tokens_seen": 225830172, "step": 3602 }, { "epoch": 11.986688851913478, "loss": 0.7155794501304626, "loss_ce": 3.311846285214415e-06, "loss_iou": 0.271484375, "loss_num": 0.0341796875, "loss_xval": 0.71484375, "num_input_tokens_seen": 225830172, "step": 3602 }, { "epoch": 11.990016638935108, "grad_norm": 17.817956924438477, "learning_rate": 5e-06, "loss": 0.58, "num_input_tokens_seen": 225893388, "step": 3603 }, { "epoch": 11.990016638935108, "loss": 0.7027574181556702, "loss_ce": 0.0003648407000582665, "loss_iou": 0.2490234375, "loss_num": 0.040771484375, "loss_xval": 0.703125, "num_input_tokens_seen": 225893388, "step": 3603 }, { "epoch": 11.993344425956739, "grad_norm": 9.410577774047852, "learning_rate": 5e-06, "loss": 0.4694, "num_input_tokens_seen": 225955356, "step": 3604 }, { "epoch": 11.993344425956739, "loss": 0.5113706588745117, "loss_ce": 1.8130265743820928e-05, "loss_iou": 0.1904296875, "loss_num": 0.02587890625, "loss_xval": 0.51171875, "num_input_tokens_seen": 225955356, "step": 3604 }, { "epoch": 11.99667221297837, "grad_norm": 11.882787704467773, "learning_rate": 5e-06, "loss": 0.2523, "num_input_tokens_seen": 226016120, "step": 3605 }, { "epoch": 11.99667221297837, "loss": 0.3414000868797302, "loss_ce": 9.149447578238323e-05, "loss_iou": 0.1259765625, "loss_num": 0.0177001953125, "loss_xval": 0.341796875, "num_input_tokens_seen": 226016120, "step": 3605 }, { "epoch": 12.0, "grad_norm": 6.815561294555664, "learning_rate": 5e-06, "loss": 0.4756, "num_input_tokens_seen": 226079544, "step": 3606 }, { "epoch": 12.0, "loss": 0.5995486974716187, "loss_ce": 0.00018348342564422637, "loss_iou": 0.236328125, "loss_num": 0.0252685546875, "loss_xval": 0.59765625, "num_input_tokens_seen": 226079544, "step": 3606 }, { "epoch": 12.00332778702163, "grad_norm": 8.781085968017578, "learning_rate": 5e-06, "loss": 0.5342, "num_input_tokens_seen": 226143308, "step": 3607 }, { "epoch": 12.00332778702163, "loss": 0.4077243208885193, "loss_ce": 9.486148883297574e-06, "loss_iou": 0.1328125, "loss_num": 0.0283203125, "loss_xval": 0.408203125, "num_input_tokens_seen": 226143308, "step": 3607 }, { "epoch": 12.006655574043261, "grad_norm": 17.34701919555664, "learning_rate": 5e-06, "loss": 0.565, "num_input_tokens_seen": 226206520, "step": 3608 }, { "epoch": 12.006655574043261, "loss": 0.4869929552078247, "loss_ce": 5.4449130402645096e-05, "loss_iou": 0.2060546875, "loss_num": 0.01483154296875, "loss_xval": 0.486328125, "num_input_tokens_seen": 226206520, "step": 3608 }, { "epoch": 12.009983361064892, "grad_norm": 17.614789962768555, "learning_rate": 5e-06, "loss": 0.5705, "num_input_tokens_seen": 226269744, "step": 3609 }, { "epoch": 12.009983361064892, "loss": 0.3466750383377075, "loss_ce": 1.0613476661092136e-05, "loss_iou": 0.1064453125, "loss_num": 0.0269775390625, "loss_xval": 0.345703125, "num_input_tokens_seen": 226269744, "step": 3609 }, { "epoch": 12.013311148086522, "grad_norm": 27.91145896911621, "learning_rate": 5e-06, "loss": 0.5638, "num_input_tokens_seen": 226333304, "step": 3610 }, { "epoch": 12.013311148086522, "loss": 0.6107625961303711, "loss_ce": 0.00041100100497715175, "loss_iou": 0.236328125, "loss_num": 0.027587890625, "loss_xval": 0.609375, "num_input_tokens_seen": 226333304, "step": 3610 }, { "epoch": 12.016638935108153, "grad_norm": 32.357383728027344, "learning_rate": 5e-06, "loss": 0.497, "num_input_tokens_seen": 226395860, "step": 3611 }, { "epoch": 12.016638935108153, "loss": 0.689986526966095, "loss_ce": 4.511252336669713e-05, "loss_iou": 0.27734375, "loss_num": 0.0269775390625, "loss_xval": 0.69140625, "num_input_tokens_seen": 226395860, "step": 3611 }, { "epoch": 12.019966722129784, "grad_norm": 22.94788932800293, "learning_rate": 5e-06, "loss": 0.4379, "num_input_tokens_seen": 226458088, "step": 3612 }, { "epoch": 12.019966722129784, "loss": 0.2799735963344574, "loss_ce": 5.333593890100019e-06, "loss_iou": 0.068359375, "loss_num": 0.028564453125, "loss_xval": 0.279296875, "num_input_tokens_seen": 226458088, "step": 3612 }, { "epoch": 12.023294509151414, "grad_norm": 23.780651092529297, "learning_rate": 5e-06, "loss": 0.6233, "num_input_tokens_seen": 226520672, "step": 3613 }, { "epoch": 12.023294509151414, "loss": 0.5970170497894287, "loss_ce": 0.00033731100847944617, "loss_iou": 0.2490234375, "loss_num": 0.0194091796875, "loss_xval": 0.59765625, "num_input_tokens_seen": 226520672, "step": 3613 }, { "epoch": 12.026622296173045, "grad_norm": 26.930875778198242, "learning_rate": 5e-06, "loss": 0.472, "num_input_tokens_seen": 226583416, "step": 3614 }, { "epoch": 12.026622296173045, "loss": 0.5517615079879761, "loss_ce": 3.6974074646423105e-06, "loss_iou": 0.2265625, "loss_num": 0.0198974609375, "loss_xval": 0.55078125, "num_input_tokens_seen": 226583416, "step": 3614 }, { "epoch": 12.029950083194676, "grad_norm": 22.417985916137695, "learning_rate": 5e-06, "loss": 0.6581, "num_input_tokens_seen": 226644188, "step": 3615 }, { "epoch": 12.029950083194676, "loss": 0.6123055219650269, "loss_ce": 8.572826573072234e-07, "loss_iou": 0.2294921875, "loss_num": 0.030517578125, "loss_xval": 0.61328125, "num_input_tokens_seen": 226644188, "step": 3615 }, { "epoch": 12.033277870216306, "grad_norm": 10.989538192749023, "learning_rate": 5e-06, "loss": 0.2629, "num_input_tokens_seen": 226706260, "step": 3616 }, { "epoch": 12.033277870216306, "loss": 0.26330268383026123, "loss_ce": 5.80622763663996e-05, "loss_iou": 0.103515625, "loss_num": 0.01116943359375, "loss_xval": 0.263671875, "num_input_tokens_seen": 226706260, "step": 3616 }, { "epoch": 12.036605657237937, "grad_norm": 29.382143020629883, "learning_rate": 5e-06, "loss": 0.5065, "num_input_tokens_seen": 226768980, "step": 3617 }, { "epoch": 12.036605657237937, "loss": 0.5752057433128357, "loss_ce": 1.0448078683111817e-05, "loss_iou": 0.2216796875, "loss_num": 0.0263671875, "loss_xval": 0.57421875, "num_input_tokens_seen": 226768980, "step": 3617 }, { "epoch": 12.039933444259567, "grad_norm": 27.026630401611328, "learning_rate": 5e-06, "loss": 0.5031, "num_input_tokens_seen": 226831032, "step": 3618 }, { "epoch": 12.039933444259567, "loss": 0.5082716345787048, "loss_ce": 1.3644530554302037e-06, "loss_iou": 0.197265625, "loss_num": 0.02294921875, "loss_xval": 0.5078125, "num_input_tokens_seen": 226831032, "step": 3618 }, { "epoch": 12.043261231281198, "grad_norm": 14.441183090209961, "learning_rate": 5e-06, "loss": 0.4977, "num_input_tokens_seen": 226895936, "step": 3619 }, { "epoch": 12.043261231281198, "loss": 0.3484007716178894, "loss_ce": 1.2104188499506563e-05, "loss_iou": 0.15234375, "loss_num": 0.00872802734375, "loss_xval": 0.34765625, "num_input_tokens_seen": 226895936, "step": 3619 }, { "epoch": 12.046589018302829, "grad_norm": 6.6318745613098145, "learning_rate": 5e-06, "loss": 0.5349, "num_input_tokens_seen": 226958588, "step": 3620 }, { "epoch": 12.046589018302829, "loss": 0.29362064599990845, "loss_ce": 1.1010197340510786e-05, "loss_iou": 0.09912109375, "loss_num": 0.0189208984375, "loss_xval": 0.29296875, "num_input_tokens_seen": 226958588, "step": 3620 }, { "epoch": 12.04991680532446, "grad_norm": 16.22369384765625, "learning_rate": 5e-06, "loss": 0.5314, "num_input_tokens_seen": 227020536, "step": 3621 }, { "epoch": 12.04991680532446, "loss": 0.3035435676574707, "loss_ce": 4.541063276519708e-07, "loss_iou": 0.10302734375, "loss_num": 0.01953125, "loss_xval": 0.302734375, "num_input_tokens_seen": 227020536, "step": 3621 }, { "epoch": 12.05324459234609, "grad_norm": 19.868885040283203, "learning_rate": 5e-06, "loss": 0.3565, "num_input_tokens_seen": 227083736, "step": 3622 }, { "epoch": 12.05324459234609, "loss": 0.3149426281452179, "loss_ce": 1.2296095519559458e-06, "loss_iou": 0.1103515625, "loss_num": 0.0189208984375, "loss_xval": 0.314453125, "num_input_tokens_seen": 227083736, "step": 3622 }, { "epoch": 12.05657237936772, "grad_norm": 21.965768814086914, "learning_rate": 5e-06, "loss": 0.4156, "num_input_tokens_seen": 227145868, "step": 3623 }, { "epoch": 12.05657237936772, "loss": 0.4776461124420166, "loss_ce": 0.0008394730975851417, "loss_iou": 0.1435546875, "loss_num": 0.0380859375, "loss_xval": 0.4765625, "num_input_tokens_seen": 227145868, "step": 3623 }, { "epoch": 12.059900166389351, "grad_norm": 19.99475860595703, "learning_rate": 5e-06, "loss": 0.4895, "num_input_tokens_seen": 227207972, "step": 3624 }, { "epoch": 12.059900166389351, "loss": 0.2570817172527313, "loss_ce": 1.6339297417289345e-06, "loss_iou": 0.07861328125, "loss_num": 0.02001953125, "loss_xval": 0.2578125, "num_input_tokens_seen": 227207972, "step": 3624 }, { "epoch": 12.063227953410982, "grad_norm": 14.230467796325684, "learning_rate": 5e-06, "loss": 0.5462, "num_input_tokens_seen": 227270696, "step": 3625 }, { "epoch": 12.063227953410982, "loss": 0.6326147317886353, "loss_ce": 0.00010737713455455378, "loss_iou": 0.2060546875, "loss_num": 0.044189453125, "loss_xval": 0.6328125, "num_input_tokens_seen": 227270696, "step": 3625 }, { "epoch": 12.066555740432612, "grad_norm": 7.9542999267578125, "learning_rate": 5e-06, "loss": 0.4007, "num_input_tokens_seen": 227334648, "step": 3626 }, { "epoch": 12.066555740432612, "loss": 0.4038970470428467, "loss_ce": 2.7415268050390296e-05, "loss_iou": 0.173828125, "loss_num": 0.0113525390625, "loss_xval": 0.404296875, "num_input_tokens_seen": 227334648, "step": 3626 }, { "epoch": 12.069883527454243, "grad_norm": 10.886966705322266, "learning_rate": 5e-06, "loss": 0.4137, "num_input_tokens_seen": 227398396, "step": 3627 }, { "epoch": 12.069883527454243, "loss": 0.335619181394577, "loss_ce": 0.0006582419737242162, "loss_iou": 0.1279296875, "loss_num": 0.0159912109375, "loss_xval": 0.3359375, "num_input_tokens_seen": 227398396, "step": 3627 }, { "epoch": 12.073211314475873, "grad_norm": 8.991013526916504, "learning_rate": 5e-06, "loss": 0.344, "num_input_tokens_seen": 227459828, "step": 3628 }, { "epoch": 12.073211314475873, "loss": 0.32029807567596436, "loss_ce": 8.380115446016134e-07, "loss_iou": 0.0751953125, "loss_num": 0.033935546875, "loss_xval": 0.3203125, "num_input_tokens_seen": 227459828, "step": 3628 }, { "epoch": 12.076539101497504, "grad_norm": 24.334083557128906, "learning_rate": 5e-06, "loss": 0.5271, "num_input_tokens_seen": 227522324, "step": 3629 }, { "epoch": 12.076539101497504, "loss": 0.5346799492835999, "loss_ce": 0.0001340369926765561, "loss_iou": 0.1474609375, "loss_num": 0.0478515625, "loss_xval": 0.53515625, "num_input_tokens_seen": 227522324, "step": 3629 }, { "epoch": 12.079866888519135, "grad_norm": 13.799015998840332, "learning_rate": 5e-06, "loss": 0.5423, "num_input_tokens_seen": 227583128, "step": 3630 }, { "epoch": 12.079866888519135, "loss": 0.5261608362197876, "loss_ce": 3.776444646064192e-05, "loss_iou": 0.2021484375, "loss_num": 0.0242919921875, "loss_xval": 0.52734375, "num_input_tokens_seen": 227583128, "step": 3630 }, { "epoch": 12.083194675540765, "grad_norm": 10.043610572814941, "learning_rate": 5e-06, "loss": 0.4598, "num_input_tokens_seen": 227643428, "step": 3631 }, { "epoch": 12.083194675540765, "loss": 0.5852952599525452, "loss_ce": 0.00021222778013907373, "loss_iou": 0.224609375, "loss_num": 0.0272216796875, "loss_xval": 0.5859375, "num_input_tokens_seen": 227643428, "step": 3631 }, { "epoch": 12.086522462562396, "grad_norm": 7.048380374908447, "learning_rate": 5e-06, "loss": 0.4063, "num_input_tokens_seen": 227706988, "step": 3632 }, { "epoch": 12.086522462562396, "loss": 0.5335097312927246, "loss_ce": 1.4042277598491637e-06, "loss_iou": 0.2080078125, "loss_num": 0.0234375, "loss_xval": 0.53515625, "num_input_tokens_seen": 227706988, "step": 3632 }, { "epoch": 12.089850249584027, "grad_norm": 12.409662246704102, "learning_rate": 5e-06, "loss": 0.5281, "num_input_tokens_seen": 227769668, "step": 3633 }, { "epoch": 12.089850249584027, "loss": 0.28881949186325073, "loss_ce": 1.134649323830672e-06, "loss_iou": 0.10302734375, "loss_num": 0.0164794921875, "loss_xval": 0.2890625, "num_input_tokens_seen": 227769668, "step": 3633 }, { "epoch": 12.093178036605657, "grad_norm": 11.196216583251953, "learning_rate": 5e-06, "loss": 0.4676, "num_input_tokens_seen": 227833140, "step": 3634 }, { "epoch": 12.093178036605657, "loss": 0.3458382189273834, "loss_ce": 1.3044584193266928e-05, "loss_iou": 0.1376953125, "loss_num": 0.01409912109375, "loss_xval": 0.345703125, "num_input_tokens_seen": 227833140, "step": 3634 }, { "epoch": 12.096505823627288, "grad_norm": 14.385194778442383, "learning_rate": 5e-06, "loss": 0.4624, "num_input_tokens_seen": 227896080, "step": 3635 }, { "epoch": 12.096505823627288, "loss": 0.3058025538921356, "loss_ce": 7.745559560135007e-05, "loss_iou": 0.091796875, "loss_num": 0.0242919921875, "loss_xval": 0.306640625, "num_input_tokens_seen": 227896080, "step": 3635 }, { "epoch": 12.099833610648918, "grad_norm": 13.85834789276123, "learning_rate": 5e-06, "loss": 0.5537, "num_input_tokens_seen": 227959316, "step": 3636 }, { "epoch": 12.099833610648918, "loss": 0.49659591913223267, "loss_ce": 1.3897312783228699e-05, "loss_iou": 0.18359375, "loss_num": 0.0260009765625, "loss_xval": 0.49609375, "num_input_tokens_seen": 227959316, "step": 3636 }, { "epoch": 12.103161397670549, "grad_norm": 20.504776000976562, "learning_rate": 5e-06, "loss": 0.4711, "num_input_tokens_seen": 228022972, "step": 3637 }, { "epoch": 12.103161397670549, "loss": 0.39112985134124756, "loss_ce": 1.305156729358714e-06, "loss_iou": 0.142578125, "loss_num": 0.0213623046875, "loss_xval": 0.390625, "num_input_tokens_seen": 228022972, "step": 3637 }, { "epoch": 12.10648918469218, "grad_norm": 9.968609809875488, "learning_rate": 5e-06, "loss": 0.4271, "num_input_tokens_seen": 228084136, "step": 3638 }, { "epoch": 12.10648918469218, "loss": 0.3968530297279358, "loss_ce": 2.4321855107700685e-06, "loss_iou": 0.1162109375, "loss_num": 0.032958984375, "loss_xval": 0.396484375, "num_input_tokens_seen": 228084136, "step": 3638 }, { "epoch": 12.10981697171381, "grad_norm": 15.962533950805664, "learning_rate": 5e-06, "loss": 0.7935, "num_input_tokens_seen": 228148052, "step": 3639 }, { "epoch": 12.10981697171381, "loss": 0.6260138750076294, "loss_ce": 6.757577466487419e-06, "loss_iou": 0.2373046875, "loss_num": 0.030029296875, "loss_xval": 0.625, "num_input_tokens_seen": 228148052, "step": 3639 }, { "epoch": 12.11314475873544, "grad_norm": 5.88557767868042, "learning_rate": 5e-06, "loss": 0.476, "num_input_tokens_seen": 228209904, "step": 3640 }, { "epoch": 12.11314475873544, "loss": 0.5297883749008179, "loss_ce": 3.276815732533578e-06, "loss_iou": 0.1552734375, "loss_num": 0.043701171875, "loss_xval": 0.53125, "num_input_tokens_seen": 228209904, "step": 3640 }, { "epoch": 12.116472545757071, "grad_norm": 8.009611129760742, "learning_rate": 5e-06, "loss": 0.4153, "num_input_tokens_seen": 228272668, "step": 3641 }, { "epoch": 12.116472545757071, "loss": 0.5428498983383179, "loss_ce": 3.212908495697775e-06, "loss_iou": 0.181640625, "loss_num": 0.03564453125, "loss_xval": 0.54296875, "num_input_tokens_seen": 228272668, "step": 3641 }, { "epoch": 12.119800332778702, "grad_norm": 9.692776679992676, "learning_rate": 5e-06, "loss": 0.3983, "num_input_tokens_seen": 228333000, "step": 3642 }, { "epoch": 12.119800332778702, "loss": 0.4154307246208191, "loss_ce": 0.00043742245179601014, "loss_iou": 0.1083984375, "loss_num": 0.039794921875, "loss_xval": 0.4140625, "num_input_tokens_seen": 228333000, "step": 3642 }, { "epoch": 12.123128119800333, "grad_norm": 32.1581916809082, "learning_rate": 5e-06, "loss": 0.5175, "num_input_tokens_seen": 228395968, "step": 3643 }, { "epoch": 12.123128119800333, "loss": 0.3020749092102051, "loss_ce": 0.0001340005692327395, "loss_iou": 0.10107421875, "loss_num": 0.02001953125, "loss_xval": 0.302734375, "num_input_tokens_seen": 228395968, "step": 3643 }, { "epoch": 12.126455906821963, "grad_norm": 19.25678253173828, "learning_rate": 5e-06, "loss": 0.4605, "num_input_tokens_seen": 228457924, "step": 3644 }, { "epoch": 12.126455906821963, "loss": 0.4884178340435028, "loss_ce": 0.0006858932902105153, "loss_iou": 0.181640625, "loss_num": 0.0250244140625, "loss_xval": 0.48828125, "num_input_tokens_seen": 228457924, "step": 3644 }, { "epoch": 12.129783693843594, "grad_norm": 15.412956237792969, "learning_rate": 5e-06, "loss": 0.3346, "num_input_tokens_seen": 228521740, "step": 3645 }, { "epoch": 12.129783693843594, "loss": 0.41527366638183594, "loss_ce": 5.153231177246198e-05, "loss_iou": 0.173828125, "loss_num": 0.01361083984375, "loss_xval": 0.416015625, "num_input_tokens_seen": 228521740, "step": 3645 }, { "epoch": 12.133111480865225, "grad_norm": 11.183586120605469, "learning_rate": 5e-06, "loss": 0.4073, "num_input_tokens_seen": 228584092, "step": 3646 }, { "epoch": 12.133111480865225, "loss": 0.553970217704773, "loss_ce": 1.5161427654675208e-05, "loss_iou": 0.203125, "loss_num": 0.029296875, "loss_xval": 0.5546875, "num_input_tokens_seen": 228584092, "step": 3646 }, { "epoch": 12.136439267886855, "grad_norm": 15.658541679382324, "learning_rate": 5e-06, "loss": 0.7162, "num_input_tokens_seen": 228648868, "step": 3647 }, { "epoch": 12.136439267886855, "loss": 0.8057396411895752, "loss_ce": 0.00019770894141402096, "loss_iou": 0.30078125, "loss_num": 0.040771484375, "loss_xval": 0.8046875, "num_input_tokens_seen": 228648868, "step": 3647 }, { "epoch": 12.139767054908486, "grad_norm": 29.069164276123047, "learning_rate": 5e-06, "loss": 0.4786, "num_input_tokens_seen": 228711972, "step": 3648 }, { "epoch": 12.139767054908486, "loss": 0.4234861433506012, "loss_ce": 2.4231168936239555e-05, "loss_iou": 0.15234375, "loss_num": 0.0235595703125, "loss_xval": 0.423828125, "num_input_tokens_seen": 228711972, "step": 3648 }, { "epoch": 12.143094841930116, "grad_norm": 35.97545623779297, "learning_rate": 5e-06, "loss": 0.6636, "num_input_tokens_seen": 228774024, "step": 3649 }, { "epoch": 12.143094841930116, "loss": 0.47935038805007935, "loss_ce": 0.0001023375807562843, "loss_iou": 0.1640625, "loss_num": 0.0301513671875, "loss_xval": 0.478515625, "num_input_tokens_seen": 228774024, "step": 3649 }, { "epoch": 12.146422628951747, "grad_norm": 32.43571090698242, "learning_rate": 5e-06, "loss": 0.5853, "num_input_tokens_seen": 228837112, "step": 3650 }, { "epoch": 12.146422628951747, "loss": 0.6774919033050537, "loss_ce": 1.692945147624414e-06, "loss_iou": 0.30078125, "loss_num": 0.01531982421875, "loss_xval": 0.67578125, "num_input_tokens_seen": 228837112, "step": 3650 }, { "epoch": 12.149750415973378, "grad_norm": 27.52887535095215, "learning_rate": 5e-06, "loss": 0.4564, "num_input_tokens_seen": 228899136, "step": 3651 }, { "epoch": 12.149750415973378, "loss": 0.561587393283844, "loss_ce": 6.396190292434767e-05, "loss_iou": 0.1982421875, "loss_num": 0.033203125, "loss_xval": 0.5625, "num_input_tokens_seen": 228899136, "step": 3651 }, { "epoch": 12.153078202995008, "grad_norm": 22.283403396606445, "learning_rate": 5e-06, "loss": 0.443, "num_input_tokens_seen": 228961392, "step": 3652 }, { "epoch": 12.153078202995008, "loss": 0.5658779740333557, "loss_ce": 0.0002041479165200144, "loss_iou": 0.2109375, "loss_num": 0.02880859375, "loss_xval": 0.56640625, "num_input_tokens_seen": 228961392, "step": 3652 }, { "epoch": 12.156405990016639, "grad_norm": 20.275049209594727, "learning_rate": 5e-06, "loss": 0.613, "num_input_tokens_seen": 229025644, "step": 3653 }, { "epoch": 12.156405990016639, "loss": 0.6084173321723938, "loss_ce": 0.0007512961747124791, "loss_iou": 0.2333984375, "loss_num": 0.0279541015625, "loss_xval": 0.609375, "num_input_tokens_seen": 229025644, "step": 3653 }, { "epoch": 12.15973377703827, "grad_norm": 32.40563201904297, "learning_rate": 5e-06, "loss": 0.449, "num_input_tokens_seen": 229087892, "step": 3654 }, { "epoch": 12.15973377703827, "loss": 0.3071431517601013, "loss_ce": 4.475590685615316e-05, "loss_iou": 0.11474609375, "loss_num": 0.015625, "loss_xval": 0.306640625, "num_input_tokens_seen": 229087892, "step": 3654 }, { "epoch": 12.1630615640599, "grad_norm": 117.33587646484375, "learning_rate": 5e-06, "loss": 0.5981, "num_input_tokens_seen": 229150976, "step": 3655 }, { "epoch": 12.1630615640599, "loss": 0.618297815322876, "loss_ce": 1.169552342616953e-05, "loss_iou": 0.2275390625, "loss_num": 0.03271484375, "loss_xval": 0.6171875, "num_input_tokens_seen": 229150976, "step": 3655 }, { "epoch": 12.16638935108153, "grad_norm": 23.933963775634766, "learning_rate": 5e-06, "loss": 0.5173, "num_input_tokens_seen": 229211648, "step": 3656 }, { "epoch": 12.16638935108153, "loss": 0.44000357389450073, "loss_ce": 1.150398134086572e-06, "loss_iou": 0.150390625, "loss_num": 0.0277099609375, "loss_xval": 0.439453125, "num_input_tokens_seen": 229211648, "step": 3656 }, { "epoch": 12.169717138103161, "grad_norm": 8.790037155151367, "learning_rate": 5e-06, "loss": 0.3233, "num_input_tokens_seen": 229273768, "step": 3657 }, { "epoch": 12.169717138103161, "loss": 0.37289655208587646, "loss_ce": 2.2499725673696958e-06, "loss_iou": 0.1455078125, "loss_num": 0.016357421875, "loss_xval": 0.373046875, "num_input_tokens_seen": 229273768, "step": 3657 }, { "epoch": 12.173044925124792, "grad_norm": 6.526882171630859, "learning_rate": 5e-06, "loss": 0.2056, "num_input_tokens_seen": 229334812, "step": 3658 }, { "epoch": 12.173044925124792, "loss": 0.2861352562904358, "loss_ce": 2.4268231300084153e-06, "loss_iou": 0.111328125, "loss_num": 0.0125732421875, "loss_xval": 0.28515625, "num_input_tokens_seen": 229334812, "step": 3658 }, { "epoch": 12.176372712146422, "grad_norm": 12.70242691040039, "learning_rate": 5e-06, "loss": 0.468, "num_input_tokens_seen": 229396536, "step": 3659 }, { "epoch": 12.176372712146422, "loss": 0.32021182775497437, "loss_ce": 6.165778358990792e-06, "loss_iou": 0.09130859375, "loss_num": 0.027587890625, "loss_xval": 0.3203125, "num_input_tokens_seen": 229396536, "step": 3659 }, { "epoch": 12.179700499168053, "grad_norm": 23.68262481689453, "learning_rate": 5e-06, "loss": 0.4456, "num_input_tokens_seen": 229460156, "step": 3660 }, { "epoch": 12.179700499168053, "loss": 0.6194669604301453, "loss_ce": 0.00020426094124559313, "loss_iou": 0.259765625, "loss_num": 0.02001953125, "loss_xval": 0.62109375, "num_input_tokens_seen": 229460156, "step": 3660 }, { "epoch": 12.183028286189684, "grad_norm": 17.400352478027344, "learning_rate": 5e-06, "loss": 0.3537, "num_input_tokens_seen": 229523388, "step": 3661 }, { "epoch": 12.183028286189684, "loss": 0.3036530911922455, "loss_ce": 3.1773222417541547e-06, "loss_iou": 0.12158203125, "loss_num": 0.01202392578125, "loss_xval": 0.302734375, "num_input_tokens_seen": 229523388, "step": 3661 }, { "epoch": 12.186356073211314, "grad_norm": 8.795613288879395, "learning_rate": 5e-06, "loss": 0.5132, "num_input_tokens_seen": 229585320, "step": 3662 }, { "epoch": 12.186356073211314, "loss": 0.4938979744911194, "loss_ce": 1.504932470197673e-06, "loss_iou": 0.166015625, "loss_num": 0.0322265625, "loss_xval": 0.494140625, "num_input_tokens_seen": 229585320, "step": 3662 }, { "epoch": 12.189683860232945, "grad_norm": 13.124205589294434, "learning_rate": 5e-06, "loss": 0.7097, "num_input_tokens_seen": 229647640, "step": 3663 }, { "epoch": 12.189683860232945, "loss": 0.6273373365402222, "loss_ce": 1.7999896954279393e-05, "loss_iou": 0.205078125, "loss_num": 0.043212890625, "loss_xval": 0.62890625, "num_input_tokens_seen": 229647640, "step": 3663 }, { "epoch": 12.193011647254576, "grad_norm": 18.556976318359375, "learning_rate": 5e-06, "loss": 0.4923, "num_input_tokens_seen": 229710012, "step": 3664 }, { "epoch": 12.193011647254576, "loss": 0.44056540727615356, "loss_ce": 0.0009902343153953552, "loss_iou": 0.138671875, "loss_num": 0.032470703125, "loss_xval": 0.439453125, "num_input_tokens_seen": 229710012, "step": 3664 }, { "epoch": 12.196339434276206, "grad_norm": 21.199731826782227, "learning_rate": 5e-06, "loss": 0.4241, "num_input_tokens_seen": 229773228, "step": 3665 }, { "epoch": 12.196339434276206, "loss": 0.43921002745628357, "loss_ce": 1.0333361615266767e-06, "loss_iou": 0.169921875, "loss_num": 0.020263671875, "loss_xval": 0.439453125, "num_input_tokens_seen": 229773228, "step": 3665 }, { "epoch": 12.199667221297837, "grad_norm": 28.286821365356445, "learning_rate": 5e-06, "loss": 0.711, "num_input_tokens_seen": 229834652, "step": 3666 }, { "epoch": 12.199667221297837, "loss": 0.8312239646911621, "loss_ce": 1.451658704354486e-06, "loss_iou": 0.298828125, "loss_num": 0.046630859375, "loss_xval": 0.83203125, "num_input_tokens_seen": 229834652, "step": 3666 }, { "epoch": 12.202995008319467, "grad_norm": 32.32391357421875, "learning_rate": 5e-06, "loss": 0.4255, "num_input_tokens_seen": 229895244, "step": 3667 }, { "epoch": 12.202995008319467, "loss": 0.39245694875717163, "loss_ce": 9.166213317257643e-07, "loss_iou": 0.142578125, "loss_num": 0.0216064453125, "loss_xval": 0.392578125, "num_input_tokens_seen": 229895244, "step": 3667 }, { "epoch": 12.206322795341098, "grad_norm": 35.548824310302734, "learning_rate": 5e-06, "loss": 0.5643, "num_input_tokens_seen": 229957560, "step": 3668 }, { "epoch": 12.206322795341098, "loss": 0.682237982749939, "loss_ce": 0.00010907315299846232, "loss_iou": 0.28515625, "loss_num": 0.0224609375, "loss_xval": 0.68359375, "num_input_tokens_seen": 229957560, "step": 3668 }, { "epoch": 12.209650582362729, "grad_norm": 23.75815773010254, "learning_rate": 5e-06, "loss": 0.5991, "num_input_tokens_seen": 230021660, "step": 3669 }, { "epoch": 12.209650582362729, "loss": 0.5705752968788147, "loss_ce": 0.00038485892582684755, "loss_iou": 0.224609375, "loss_num": 0.024169921875, "loss_xval": 0.5703125, "num_input_tokens_seen": 230021660, "step": 3669 }, { "epoch": 12.21297836938436, "grad_norm": 12.681497573852539, "learning_rate": 5e-06, "loss": 0.3732, "num_input_tokens_seen": 230084276, "step": 3670 }, { "epoch": 12.21297836938436, "loss": 0.49249422550201416, "loss_ce": 1.5364787486760179e-06, "loss_iou": 0.19921875, "loss_num": 0.018798828125, "loss_xval": 0.4921875, "num_input_tokens_seen": 230084276, "step": 3670 }, { "epoch": 12.21630615640599, "grad_norm": 16.6107234954834, "learning_rate": 5e-06, "loss": 0.4826, "num_input_tokens_seen": 230147796, "step": 3671 }, { "epoch": 12.21630615640599, "loss": 0.4271872639656067, "loss_ce": 0.00018530410306993872, "loss_iou": 0.18359375, "loss_num": 0.0120849609375, "loss_xval": 0.427734375, "num_input_tokens_seen": 230147796, "step": 3671 }, { "epoch": 12.21963394342762, "grad_norm": 15.590807914733887, "learning_rate": 5e-06, "loss": 0.6288, "num_input_tokens_seen": 230211616, "step": 3672 }, { "epoch": 12.21963394342762, "loss": 0.5012725591659546, "loss_ce": 8.234033157350495e-05, "loss_iou": 0.1943359375, "loss_num": 0.0224609375, "loss_xval": 0.5, "num_input_tokens_seen": 230211616, "step": 3672 }, { "epoch": 12.222961730449251, "grad_norm": 19.385725021362305, "learning_rate": 5e-06, "loss": 0.5102, "num_input_tokens_seen": 230273164, "step": 3673 }, { "epoch": 12.222961730449251, "loss": 0.5537664294242859, "loss_ce": 2.497113928257022e-05, "loss_iou": 0.1982421875, "loss_num": 0.03173828125, "loss_xval": 0.5546875, "num_input_tokens_seen": 230273164, "step": 3673 }, { "epoch": 12.226289517470882, "grad_norm": 23.928598403930664, "learning_rate": 5e-06, "loss": 0.5562, "num_input_tokens_seen": 230335900, "step": 3674 }, { "epoch": 12.226289517470882, "loss": 0.5327159762382507, "loss_ce": 1.127923724197899e-06, "loss_iou": 0.2275390625, "loss_num": 0.01556396484375, "loss_xval": 0.53125, "num_input_tokens_seen": 230335900, "step": 3674 }, { "epoch": 12.229617304492512, "grad_norm": 34.358482360839844, "learning_rate": 5e-06, "loss": 0.4797, "num_input_tokens_seen": 230398240, "step": 3675 }, { "epoch": 12.229617304492512, "loss": 0.5817902088165283, "loss_ce": 3.0469584544334793e-06, "loss_iou": 0.216796875, "loss_num": 0.0294189453125, "loss_xval": 0.58203125, "num_input_tokens_seen": 230398240, "step": 3675 }, { "epoch": 12.232945091514143, "grad_norm": 27.785236358642578, "learning_rate": 5e-06, "loss": 0.5657, "num_input_tokens_seen": 230461844, "step": 3676 }, { "epoch": 12.232945091514143, "loss": 0.5925767421722412, "loss_ce": 0.00010844002827070653, "loss_iou": 0.26171875, "loss_num": 0.01348876953125, "loss_xval": 0.59375, "num_input_tokens_seen": 230461844, "step": 3676 }, { "epoch": 12.236272878535774, "grad_norm": 22.6358699798584, "learning_rate": 5e-06, "loss": 0.5678, "num_input_tokens_seen": 230523568, "step": 3677 }, { "epoch": 12.236272878535774, "loss": 0.33268463611602783, "loss_ce": 1.2503771358751692e-05, "loss_iou": 0.09814453125, "loss_num": 0.0272216796875, "loss_xval": 0.33203125, "num_input_tokens_seen": 230523568, "step": 3677 }, { "epoch": 12.239600665557404, "grad_norm": 10.111736297607422, "learning_rate": 5e-06, "loss": 0.3131, "num_input_tokens_seen": 230586912, "step": 3678 }, { "epoch": 12.239600665557404, "loss": 0.3600076735019684, "loss_ce": 2.230254904134199e-05, "loss_iou": 0.15234375, "loss_num": 0.010986328125, "loss_xval": 0.359375, "num_input_tokens_seen": 230586912, "step": 3678 }, { "epoch": 12.242928452579035, "grad_norm": 12.585610389709473, "learning_rate": 5e-06, "loss": 0.2656, "num_input_tokens_seen": 230648724, "step": 3679 }, { "epoch": 12.242928452579035, "loss": 0.31457599997520447, "loss_ce": 7.978240432748862e-07, "loss_iou": 0.111328125, "loss_num": 0.0184326171875, "loss_xval": 0.314453125, "num_input_tokens_seen": 230648724, "step": 3679 }, { "epoch": 12.246256239600665, "grad_norm": 29.194082260131836, "learning_rate": 5e-06, "loss": 0.5135, "num_input_tokens_seen": 230711344, "step": 3680 }, { "epoch": 12.246256239600665, "loss": 0.3827005624771118, "loss_ce": 1.0131998351425864e-05, "loss_iou": 0.1376953125, "loss_num": 0.0213623046875, "loss_xval": 0.3828125, "num_input_tokens_seen": 230711344, "step": 3680 }, { "epoch": 12.249584026622296, "grad_norm": 29.68413543701172, "learning_rate": 5e-06, "loss": 0.4798, "num_input_tokens_seen": 230773292, "step": 3681 }, { "epoch": 12.249584026622296, "loss": 0.4005741477012634, "loss_ce": 4.1084564372795285e-07, "loss_iou": 0.1396484375, "loss_num": 0.0242919921875, "loss_xval": 0.400390625, "num_input_tokens_seen": 230773292, "step": 3681 }, { "epoch": 12.252911813643927, "grad_norm": 16.442899703979492, "learning_rate": 5e-06, "loss": 0.551, "num_input_tokens_seen": 230837960, "step": 3682 }, { "epoch": 12.252911813643927, "loss": 0.6426308155059814, "loss_ce": 5.270661858958192e-05, "loss_iou": 0.2578125, "loss_num": 0.025634765625, "loss_xval": 0.640625, "num_input_tokens_seen": 230837960, "step": 3682 }, { "epoch": 12.256239600665557, "grad_norm": 8.353416442871094, "learning_rate": 5e-06, "loss": 0.6489, "num_input_tokens_seen": 230901616, "step": 3683 }, { "epoch": 12.256239600665557, "loss": 0.6857982873916626, "loss_ce": 7.269713023561053e-06, "loss_iou": 0.234375, "loss_num": 0.043212890625, "loss_xval": 0.6875, "num_input_tokens_seen": 230901616, "step": 3683 }, { "epoch": 12.259567387687188, "grad_norm": 16.903249740600586, "learning_rate": 5e-06, "loss": 0.4105, "num_input_tokens_seen": 230965880, "step": 3684 }, { "epoch": 12.259567387687188, "loss": 0.3366122841835022, "loss_ce": 3.425740942475386e-06, "loss_iou": 0.1455078125, "loss_num": 0.00927734375, "loss_xval": 0.3359375, "num_input_tokens_seen": 230965880, "step": 3684 }, { "epoch": 12.262895174708818, "grad_norm": 10.665207862854004, "learning_rate": 5e-06, "loss": 0.3678, "num_input_tokens_seen": 231027264, "step": 3685 }, { "epoch": 12.262895174708818, "loss": 0.2913898825645447, "loss_ce": 8.037482075451408e-06, "loss_iou": 0.0986328125, "loss_num": 0.0189208984375, "loss_xval": 0.291015625, "num_input_tokens_seen": 231027264, "step": 3685 }, { "epoch": 12.266222961730449, "grad_norm": 23.535675048828125, "learning_rate": 5e-06, "loss": 0.514, "num_input_tokens_seen": 231089768, "step": 3686 }, { "epoch": 12.266222961730449, "loss": 0.7083240747451782, "loss_ce": 0.0003162726934533566, "loss_iou": 0.25390625, "loss_num": 0.039794921875, "loss_xval": 0.70703125, "num_input_tokens_seen": 231089768, "step": 3686 }, { "epoch": 12.26955074875208, "grad_norm": 23.223861694335938, "learning_rate": 5e-06, "loss": 0.4617, "num_input_tokens_seen": 231152860, "step": 3687 }, { "epoch": 12.26955074875208, "loss": 0.5789802670478821, "loss_ce": 7.717234780102444e-07, "loss_iou": 0.20703125, "loss_num": 0.033203125, "loss_xval": 0.578125, "num_input_tokens_seen": 231152860, "step": 3687 }, { "epoch": 12.27287853577371, "grad_norm": 17.758737564086914, "learning_rate": 5e-06, "loss": 0.4671, "num_input_tokens_seen": 231216092, "step": 3688 }, { "epoch": 12.27287853577371, "loss": 0.40698373317718506, "loss_ce": 1.2952673387189861e-06, "loss_iou": 0.1669921875, "loss_num": 0.0146484375, "loss_xval": 0.40625, "num_input_tokens_seen": 231216092, "step": 3688 }, { "epoch": 12.27620632279534, "grad_norm": 20.84171485900879, "learning_rate": 5e-06, "loss": 0.5697, "num_input_tokens_seen": 231279784, "step": 3689 }, { "epoch": 12.27620632279534, "loss": 0.6758971214294434, "loss_ce": 0.00048204176709987223, "loss_iou": 0.251953125, "loss_num": 0.0341796875, "loss_xval": 0.67578125, "num_input_tokens_seen": 231279784, "step": 3689 }, { "epoch": 12.279534109816971, "grad_norm": 22.22574806213379, "learning_rate": 5e-06, "loss": 0.5914, "num_input_tokens_seen": 231342708, "step": 3690 }, { "epoch": 12.279534109816971, "loss": 0.5830093026161194, "loss_ce": 1.4851719924990903e-06, "loss_iou": 0.220703125, "loss_num": 0.0286865234375, "loss_xval": 0.58203125, "num_input_tokens_seen": 231342708, "step": 3690 }, { "epoch": 12.282861896838602, "grad_norm": 25.204105377197266, "learning_rate": 5e-06, "loss": 0.4636, "num_input_tokens_seen": 231405628, "step": 3691 }, { "epoch": 12.282861896838602, "loss": 0.41278183460235596, "loss_ce": 1.0622394484016695e-06, "loss_iou": 0.126953125, "loss_num": 0.031982421875, "loss_xval": 0.412109375, "num_input_tokens_seen": 231405628, "step": 3691 }, { "epoch": 12.286189683860233, "grad_norm": 32.40183639526367, "learning_rate": 5e-06, "loss": 0.4897, "num_input_tokens_seen": 231469500, "step": 3692 }, { "epoch": 12.286189683860233, "loss": 0.5823505520820618, "loss_ce": 0.00019722813158296049, "loss_iou": 0.232421875, "loss_num": 0.0235595703125, "loss_xval": 0.58203125, "num_input_tokens_seen": 231469500, "step": 3692 }, { "epoch": 12.289517470881863, "grad_norm": 16.425233840942383, "learning_rate": 5e-06, "loss": 0.3834, "num_input_tokens_seen": 231532000, "step": 3693 }, { "epoch": 12.289517470881863, "loss": 0.440933495759964, "loss_ce": 1.5534975318587385e-05, "loss_iou": 0.1728515625, "loss_num": 0.019287109375, "loss_xval": 0.44140625, "num_input_tokens_seen": 231532000, "step": 3693 }, { "epoch": 12.292845257903494, "grad_norm": 15.689983367919922, "learning_rate": 5e-06, "loss": 0.5042, "num_input_tokens_seen": 231594876, "step": 3694 }, { "epoch": 12.292845257903494, "loss": 0.44730114936828613, "loss_ce": 4.967775566910859e-06, "loss_iou": 0.1708984375, "loss_num": 0.0211181640625, "loss_xval": 0.447265625, "num_input_tokens_seen": 231594876, "step": 3694 }, { "epoch": 12.296173044925125, "grad_norm": 11.674674987792969, "learning_rate": 5e-06, "loss": 0.5687, "num_input_tokens_seen": 231657256, "step": 3695 }, { "epoch": 12.296173044925125, "loss": 0.42815595865249634, "loss_ce": 1.9838325897580944e-06, "loss_iou": 0.13671875, "loss_num": 0.0311279296875, "loss_xval": 0.427734375, "num_input_tokens_seen": 231657256, "step": 3695 }, { "epoch": 12.299500831946755, "grad_norm": 284.0730285644531, "learning_rate": 5e-06, "loss": 0.7308, "num_input_tokens_seen": 231719680, "step": 3696 }, { "epoch": 12.299500831946755, "loss": 0.7981675267219543, "loss_ce": 1.0744190149125643e-05, "loss_iou": 0.2890625, "loss_num": 0.0439453125, "loss_xval": 0.796875, "num_input_tokens_seen": 231719680, "step": 3696 }, { "epoch": 12.302828618968386, "grad_norm": 16.88345718383789, "learning_rate": 5e-06, "loss": 0.4468, "num_input_tokens_seen": 231783308, "step": 3697 }, { "epoch": 12.302828618968386, "loss": 0.4687206745147705, "loss_ce": 1.1807312603195896e-06, "loss_iou": 0.2001953125, "loss_num": 0.01361083984375, "loss_xval": 0.46875, "num_input_tokens_seen": 231783308, "step": 3697 }, { "epoch": 12.306156405990016, "grad_norm": 11.841068267822266, "learning_rate": 5e-06, "loss": 0.5096, "num_input_tokens_seen": 231846768, "step": 3698 }, { "epoch": 12.306156405990016, "loss": 0.4613052010536194, "loss_ce": 1.4969406265663565e-06, "loss_iou": 0.1435546875, "loss_num": 0.03466796875, "loss_xval": 0.4609375, "num_input_tokens_seen": 231846768, "step": 3698 }, { "epoch": 12.309484193011647, "grad_norm": 10.751826286315918, "learning_rate": 5e-06, "loss": 0.3873, "num_input_tokens_seen": 231907620, "step": 3699 }, { "epoch": 12.309484193011647, "loss": 0.2768259048461914, "loss_ce": 9.56456233325298e-07, "loss_iou": 0.058837890625, "loss_num": 0.03173828125, "loss_xval": 0.27734375, "num_input_tokens_seen": 231907620, "step": 3699 }, { "epoch": 12.312811980033278, "grad_norm": 18.689571380615234, "learning_rate": 5e-06, "loss": 0.6703, "num_input_tokens_seen": 231970728, "step": 3700 }, { "epoch": 12.312811980033278, "loss": 0.5368751883506775, "loss_ce": 9.9799153758795e-06, "loss_iou": 0.1962890625, "loss_num": 0.029052734375, "loss_xval": 0.53515625, "num_input_tokens_seen": 231970728, "step": 3700 }, { "epoch": 12.316139767054908, "grad_norm": 12.697893142700195, "learning_rate": 5e-06, "loss": 0.5692, "num_input_tokens_seen": 232035008, "step": 3701 }, { "epoch": 12.316139767054908, "loss": 0.4069279432296753, "loss_ce": 0.000433776673162356, "loss_iou": 0.1640625, "loss_num": 0.0157470703125, "loss_xval": 0.40625, "num_input_tokens_seen": 232035008, "step": 3701 }, { "epoch": 12.319467554076539, "grad_norm": 14.091324806213379, "learning_rate": 5e-06, "loss": 0.3908, "num_input_tokens_seen": 232094672, "step": 3702 }, { "epoch": 12.319467554076539, "loss": 0.4635047912597656, "loss_ce": 3.839922555926023e-06, "loss_iou": 0.1650390625, "loss_num": 0.026611328125, "loss_xval": 0.462890625, "num_input_tokens_seen": 232094672, "step": 3702 }, { "epoch": 12.32279534109817, "grad_norm": 17.948162078857422, "learning_rate": 5e-06, "loss": 0.5232, "num_input_tokens_seen": 232157948, "step": 3703 }, { "epoch": 12.32279534109817, "loss": 0.23475351929664612, "loss_ce": 1.2307420547585934e-05, "loss_iou": 0.0908203125, "loss_num": 0.0106201171875, "loss_xval": 0.234375, "num_input_tokens_seen": 232157948, "step": 3703 }, { "epoch": 12.3261231281198, "grad_norm": 14.404576301574707, "learning_rate": 5e-06, "loss": 0.3506, "num_input_tokens_seen": 232220500, "step": 3704 }, { "epoch": 12.3261231281198, "loss": 0.4848410189151764, "loss_ce": 0.00022191116295289248, "loss_iou": 0.134765625, "loss_num": 0.042724609375, "loss_xval": 0.484375, "num_input_tokens_seen": 232220500, "step": 3704 }, { "epoch": 12.32945091514143, "grad_norm": 6.486727714538574, "learning_rate": 5e-06, "loss": 0.4202, "num_input_tokens_seen": 232283052, "step": 3705 }, { "epoch": 12.32945091514143, "loss": 0.2749040126800537, "loss_ce": 1.6679757663951023e-06, "loss_iou": 0.0791015625, "loss_num": 0.0233154296875, "loss_xval": 0.275390625, "num_input_tokens_seen": 232283052, "step": 3705 }, { "epoch": 12.332778702163061, "grad_norm": 5.345829963684082, "learning_rate": 5e-06, "loss": 0.2902, "num_input_tokens_seen": 232343500, "step": 3706 }, { "epoch": 12.332778702163061, "loss": 0.2248854637145996, "loss_ce": 1.4281310996011598e-06, "loss_iou": 0.0224609375, "loss_num": 0.035888671875, "loss_xval": 0.224609375, "num_input_tokens_seen": 232343500, "step": 3706 }, { "epoch": 12.336106489184692, "grad_norm": 19.14884376525879, "learning_rate": 5e-06, "loss": 0.428, "num_input_tokens_seen": 232406008, "step": 3707 }, { "epoch": 12.336106489184692, "loss": 0.632783830165863, "loss_ce": 3.236802149331197e-05, "loss_iou": 0.267578125, "loss_num": 0.0194091796875, "loss_xval": 0.6328125, "num_input_tokens_seen": 232406008, "step": 3707 }, { "epoch": 12.339434276206322, "grad_norm": 16.92552947998047, "learning_rate": 5e-06, "loss": 0.3775, "num_input_tokens_seen": 232470312, "step": 3708 }, { "epoch": 12.339434276206322, "loss": 0.4561805725097656, "loss_ce": 3.829568413493689e-06, "loss_iou": 0.1904296875, "loss_num": 0.014892578125, "loss_xval": 0.45703125, "num_input_tokens_seen": 232470312, "step": 3708 }, { "epoch": 12.342762063227953, "grad_norm": 11.256321907043457, "learning_rate": 5e-06, "loss": 0.3098, "num_input_tokens_seen": 232532888, "step": 3709 }, { "epoch": 12.342762063227953, "loss": 0.31873443722724915, "loss_ce": 8.844984222378116e-06, "loss_iou": 0.1240234375, "loss_num": 0.01409912109375, "loss_xval": 0.318359375, "num_input_tokens_seen": 232532888, "step": 3709 }, { "epoch": 12.346089850249584, "grad_norm": 10.919595718383789, "learning_rate": 5e-06, "loss": 0.5723, "num_input_tokens_seen": 232594672, "step": 3710 }, { "epoch": 12.346089850249584, "loss": 0.5385823845863342, "loss_ce": 3.866082624881528e-05, "loss_iou": 0.2158203125, "loss_num": 0.021240234375, "loss_xval": 0.5390625, "num_input_tokens_seen": 232594672, "step": 3710 }, { "epoch": 12.349417637271214, "grad_norm": 41.25916290283203, "learning_rate": 5e-06, "loss": 0.5533, "num_input_tokens_seen": 232657876, "step": 3711 }, { "epoch": 12.349417637271214, "loss": 0.49431830644607544, "loss_ce": 5.558085467782803e-05, "loss_iou": 0.169921875, "loss_num": 0.031005859375, "loss_xval": 0.494140625, "num_input_tokens_seen": 232657876, "step": 3711 }, { "epoch": 12.352745424292845, "grad_norm": 6.918086051940918, "learning_rate": 5e-06, "loss": 0.3256, "num_input_tokens_seen": 232720316, "step": 3712 }, { "epoch": 12.352745424292845, "loss": 0.1880282759666443, "loss_ce": 9.484840120421723e-06, "loss_iou": 0.05126953125, "loss_num": 0.01708984375, "loss_xval": 0.1884765625, "num_input_tokens_seen": 232720316, "step": 3712 }, { "epoch": 12.356073211314476, "grad_norm": 13.547465324401855, "learning_rate": 5e-06, "loss": 0.3734, "num_input_tokens_seen": 232781240, "step": 3713 }, { "epoch": 12.356073211314476, "loss": 0.43420594930648804, "loss_ce": 1.8495244376026676e-06, "loss_iou": 0.1572265625, "loss_num": 0.0238037109375, "loss_xval": 0.43359375, "num_input_tokens_seen": 232781240, "step": 3713 }, { "epoch": 12.359400998336106, "grad_norm": 13.32815170288086, "learning_rate": 5e-06, "loss": 0.374, "num_input_tokens_seen": 232842820, "step": 3714 }, { "epoch": 12.359400998336106, "loss": 0.26577818393707275, "loss_ce": 6.14148461863806e-07, "loss_iou": 0.08251953125, "loss_num": 0.02001953125, "loss_xval": 0.265625, "num_input_tokens_seen": 232842820, "step": 3714 }, { "epoch": 12.362728785357737, "grad_norm": 8.34607982635498, "learning_rate": 5e-06, "loss": 0.7169, "num_input_tokens_seen": 232905704, "step": 3715 }, { "epoch": 12.362728785357737, "loss": 0.6201179027557373, "loss_ce": 7.201313678706356e-07, "loss_iou": 0.2158203125, "loss_num": 0.037841796875, "loss_xval": 0.62109375, "num_input_tokens_seen": 232905704, "step": 3715 }, { "epoch": 12.366056572379367, "grad_norm": 19.19894027709961, "learning_rate": 5e-06, "loss": 0.5984, "num_input_tokens_seen": 232969528, "step": 3716 }, { "epoch": 12.366056572379367, "loss": 0.4860396683216095, "loss_ce": 1.437010496374569e-06, "loss_iou": 0.212890625, "loss_num": 0.0120849609375, "loss_xval": 0.486328125, "num_input_tokens_seen": 232969528, "step": 3716 }, { "epoch": 12.369384359400998, "grad_norm": 27.8046817779541, "learning_rate": 5e-06, "loss": 0.4598, "num_input_tokens_seen": 233032368, "step": 3717 }, { "epoch": 12.369384359400998, "loss": 0.3823345899581909, "loss_ce": 1.03554239103687e-05, "loss_iou": 0.1572265625, "loss_num": 0.0137939453125, "loss_xval": 0.3828125, "num_input_tokens_seen": 233032368, "step": 3717 }, { "epoch": 12.372712146422629, "grad_norm": 19.445667266845703, "learning_rate": 5e-06, "loss": 0.524, "num_input_tokens_seen": 233096224, "step": 3718 }, { "epoch": 12.372712146422629, "loss": 0.6174051761627197, "loss_ce": 0.00015662649821024388, "loss_iou": 0.259765625, "loss_num": 0.0198974609375, "loss_xval": 0.6171875, "num_input_tokens_seen": 233096224, "step": 3718 }, { "epoch": 12.37603993344426, "grad_norm": 7.643185615539551, "learning_rate": 5e-06, "loss": 0.343, "num_input_tokens_seen": 233158892, "step": 3719 }, { "epoch": 12.37603993344426, "loss": 0.42028990387916565, "loss_ce": 1.7950603705685353e-06, "loss_iou": 0.1650390625, "loss_num": 0.0179443359375, "loss_xval": 0.419921875, "num_input_tokens_seen": 233158892, "step": 3719 }, { "epoch": 12.37936772046589, "grad_norm": 6.845453262329102, "learning_rate": 5e-06, "loss": 0.4534, "num_input_tokens_seen": 233221384, "step": 3720 }, { "epoch": 12.37936772046589, "loss": 0.6591840982437134, "loss_ce": 4.392376467876602e-06, "loss_iou": 0.27734375, "loss_num": 0.0208740234375, "loss_xval": 0.66015625, "num_input_tokens_seen": 233221384, "step": 3720 }, { "epoch": 12.38269550748752, "grad_norm": 7.377474784851074, "learning_rate": 5e-06, "loss": 0.3804, "num_input_tokens_seen": 233283004, "step": 3721 }, { "epoch": 12.38269550748752, "loss": 0.29821932315826416, "loss_ce": 1.5312302821257617e-06, "loss_iou": 0.1025390625, "loss_num": 0.0186767578125, "loss_xval": 0.298828125, "num_input_tokens_seen": 233283004, "step": 3721 }, { "epoch": 12.386023294509151, "grad_norm": 23.928085327148438, "learning_rate": 5e-06, "loss": 0.4567, "num_input_tokens_seen": 233345660, "step": 3722 }, { "epoch": 12.386023294509151, "loss": 0.2813146710395813, "loss_ce": 3.6373603506945074e-06, "loss_iou": 0.09912109375, "loss_num": 0.0167236328125, "loss_xval": 0.28125, "num_input_tokens_seen": 233345660, "step": 3722 }, { "epoch": 12.389351081530782, "grad_norm": 18.58507537841797, "learning_rate": 5e-06, "loss": 0.6013, "num_input_tokens_seen": 233408764, "step": 3723 }, { "epoch": 12.389351081530782, "loss": 0.5954127907752991, "loss_ce": 7.588303560623899e-05, "loss_iou": 0.2265625, "loss_num": 0.0281982421875, "loss_xval": 0.59375, "num_input_tokens_seen": 233408764, "step": 3723 }, { "epoch": 12.392678868552412, "grad_norm": 9.877181053161621, "learning_rate": 5e-06, "loss": 0.5215, "num_input_tokens_seen": 233471956, "step": 3724 }, { "epoch": 12.392678868552412, "loss": 0.6219655871391296, "loss_ce": 7.841934711905196e-05, "loss_iou": 0.2236328125, "loss_num": 0.03515625, "loss_xval": 0.62109375, "num_input_tokens_seen": 233471956, "step": 3724 }, { "epoch": 12.396006655574043, "grad_norm": 16.271574020385742, "learning_rate": 5e-06, "loss": 0.4889, "num_input_tokens_seen": 233533956, "step": 3725 }, { "epoch": 12.396006655574043, "loss": 0.6629677414894104, "loss_ce": 3.869759893859737e-06, "loss_iou": 0.267578125, "loss_num": 0.025634765625, "loss_xval": 0.6640625, "num_input_tokens_seen": 233533956, "step": 3725 }, { "epoch": 12.399334442595674, "grad_norm": 24.879911422729492, "learning_rate": 5e-06, "loss": 0.5977, "num_input_tokens_seen": 233597696, "step": 3726 }, { "epoch": 12.399334442595674, "loss": 0.6900641918182373, "loss_ce": 6.412707875824708e-07, "loss_iou": 0.26171875, "loss_num": 0.033203125, "loss_xval": 0.69140625, "num_input_tokens_seen": 233597696, "step": 3726 }, { "epoch": 12.402662229617304, "grad_norm": 31.49070167541504, "learning_rate": 5e-06, "loss": 0.7146, "num_input_tokens_seen": 233661792, "step": 3727 }, { "epoch": 12.402662229617304, "loss": 0.6923010349273682, "loss_ce": 0.0001623404968995601, "loss_iou": 0.259765625, "loss_num": 0.03466796875, "loss_xval": 0.69140625, "num_input_tokens_seen": 233661792, "step": 3727 }, { "epoch": 12.405990016638935, "grad_norm": 30.303892135620117, "learning_rate": 5e-06, "loss": 0.5678, "num_input_tokens_seen": 233724648, "step": 3728 }, { "epoch": 12.405990016638935, "loss": 0.5066385269165039, "loss_ce": 4.678855475503951e-05, "loss_iou": 0.169921875, "loss_num": 0.033447265625, "loss_xval": 0.5078125, "num_input_tokens_seen": 233724648, "step": 3728 }, { "epoch": 12.409317803660565, "grad_norm": 15.219099998474121, "learning_rate": 5e-06, "loss": 0.3593, "num_input_tokens_seen": 233787964, "step": 3729 }, { "epoch": 12.409317803660565, "loss": 0.4229809045791626, "loss_ce": 7.253333933476824e-06, "loss_iou": 0.154296875, "loss_num": 0.02294921875, "loss_xval": 0.423828125, "num_input_tokens_seen": 233787964, "step": 3729 }, { "epoch": 12.412645590682196, "grad_norm": 15.763005256652832, "learning_rate": 5e-06, "loss": 0.4368, "num_input_tokens_seen": 233851144, "step": 3730 }, { "epoch": 12.412645590682196, "loss": 0.48169025778770447, "loss_ce": 7.991923780537036e-07, "loss_iou": 0.185546875, "loss_num": 0.02197265625, "loss_xval": 0.482421875, "num_input_tokens_seen": 233851144, "step": 3730 }, { "epoch": 12.415973377703827, "grad_norm": 19.008567810058594, "learning_rate": 5e-06, "loss": 0.5305, "num_input_tokens_seen": 233913624, "step": 3731 }, { "epoch": 12.415973377703827, "loss": 0.7049521207809448, "loss_ce": 0.0020407086703926325, "loss_iou": 0.275390625, "loss_num": 0.0302734375, "loss_xval": 0.703125, "num_input_tokens_seen": 233913624, "step": 3731 }, { "epoch": 12.419301164725457, "grad_norm": 11.476460456848145, "learning_rate": 5e-06, "loss": 0.591, "num_input_tokens_seen": 233976560, "step": 3732 }, { "epoch": 12.419301164725457, "loss": 0.6463944911956787, "loss_ce": 3.221923907403834e-05, "loss_iou": 0.2421875, "loss_num": 0.032470703125, "loss_xval": 0.64453125, "num_input_tokens_seen": 233976560, "step": 3732 }, { "epoch": 12.422628951747088, "grad_norm": 12.683476448059082, "learning_rate": 5e-06, "loss": 0.6159, "num_input_tokens_seen": 234039796, "step": 3733 }, { "epoch": 12.422628951747088, "loss": 0.5401417016983032, "loss_ce": 0.00031631573801860213, "loss_iou": 0.24609375, "loss_num": 0.0096435546875, "loss_xval": 0.5390625, "num_input_tokens_seen": 234039796, "step": 3733 }, { "epoch": 12.425956738768718, "grad_norm": 13.382314682006836, "learning_rate": 5e-06, "loss": 0.433, "num_input_tokens_seen": 234103440, "step": 3734 }, { "epoch": 12.425956738768718, "loss": 0.2769189178943634, "loss_ce": 2.4169389689632226e-06, "loss_iou": 0.11083984375, "loss_num": 0.0111083984375, "loss_xval": 0.27734375, "num_input_tokens_seen": 234103440, "step": 3734 }, { "epoch": 12.429284525790349, "grad_norm": 15.604523658752441, "learning_rate": 5e-06, "loss": 0.3894, "num_input_tokens_seen": 234166444, "step": 3735 }, { "epoch": 12.429284525790349, "loss": 0.3989883065223694, "loss_ce": 1.4764231082153856e-06, "loss_iou": 0.1494140625, "loss_num": 0.0201416015625, "loss_xval": 0.3984375, "num_input_tokens_seen": 234166444, "step": 3735 }, { "epoch": 12.43261231281198, "grad_norm": 20.673295974731445, "learning_rate": 5e-06, "loss": 0.4288, "num_input_tokens_seen": 234230600, "step": 3736 }, { "epoch": 12.43261231281198, "loss": 0.3911149799823761, "loss_ce": 1.696121898930869e-06, "loss_iou": 0.1591796875, "loss_num": 0.01470947265625, "loss_xval": 0.390625, "num_input_tokens_seen": 234230600, "step": 3736 }, { "epoch": 12.43594009983361, "grad_norm": 42.617427825927734, "learning_rate": 5e-06, "loss": 0.6181, "num_input_tokens_seen": 234293928, "step": 3737 }, { "epoch": 12.43594009983361, "loss": 0.7442426681518555, "loss_ce": 0.0003766668087337166, "loss_iou": 0.291015625, "loss_num": 0.032470703125, "loss_xval": 0.7421875, "num_input_tokens_seen": 234293928, "step": 3737 }, { "epoch": 12.43926788685524, "grad_norm": 28.987735748291016, "learning_rate": 5e-06, "loss": 0.4479, "num_input_tokens_seen": 234357820, "step": 3738 }, { "epoch": 12.43926788685524, "loss": 0.5932722091674805, "loss_ce": 1.0525476682232693e-05, "loss_iou": 0.2333984375, "loss_num": 0.0255126953125, "loss_xval": 0.59375, "num_input_tokens_seen": 234357820, "step": 3738 }, { "epoch": 12.442595673876871, "grad_norm": 20.860637664794922, "learning_rate": 5e-06, "loss": 0.6595, "num_input_tokens_seen": 234421340, "step": 3739 }, { "epoch": 12.442595673876871, "loss": 0.6846110820770264, "loss_ce": 0.0007121558883227408, "loss_iou": 0.2490234375, "loss_num": 0.037109375, "loss_xval": 0.68359375, "num_input_tokens_seen": 234421340, "step": 3739 }, { "epoch": 12.445923460898502, "grad_norm": 11.62499713897705, "learning_rate": 5e-06, "loss": 0.3908, "num_input_tokens_seen": 234484764, "step": 3740 }, { "epoch": 12.445923460898502, "loss": 0.4673497676849365, "loss_ce": 3.5612833926279563e-06, "loss_iou": 0.2021484375, "loss_num": 0.012451171875, "loss_xval": 0.466796875, "num_input_tokens_seen": 234484764, "step": 3740 }, { "epoch": 12.449251247920133, "grad_norm": 18.36492347717285, "learning_rate": 5e-06, "loss": 0.4266, "num_input_tokens_seen": 234547860, "step": 3741 }, { "epoch": 12.449251247920133, "loss": 0.26782622933387756, "loss_ce": 0.00012603640789166093, "loss_iou": 0.10498046875, "loss_num": 0.0115966796875, "loss_xval": 0.267578125, "num_input_tokens_seen": 234547860, "step": 3741 }, { "epoch": 12.452579034941763, "grad_norm": 11.828195571899414, "learning_rate": 5e-06, "loss": 0.4996, "num_input_tokens_seen": 234610164, "step": 3742 }, { "epoch": 12.452579034941763, "loss": 0.224489226937294, "loss_ce": 1.9137175968353404e-06, "loss_iou": 0.043701171875, "loss_num": 0.0274658203125, "loss_xval": 0.224609375, "num_input_tokens_seen": 234610164, "step": 3742 }, { "epoch": 12.455906821963394, "grad_norm": 10.733247756958008, "learning_rate": 5e-06, "loss": 0.3827, "num_input_tokens_seen": 234673924, "step": 3743 }, { "epoch": 12.455906821963394, "loss": 0.3317621946334839, "loss_ce": 5.601774319075048e-06, "loss_iou": 0.1357421875, "loss_num": 0.0120849609375, "loss_xval": 0.33203125, "num_input_tokens_seen": 234673924, "step": 3743 }, { "epoch": 12.459234608985025, "grad_norm": 10.34771728515625, "learning_rate": 5e-06, "loss": 0.6301, "num_input_tokens_seen": 234737932, "step": 3744 }, { "epoch": 12.459234608985025, "loss": 0.6927498579025269, "loss_ce": 7.815427238710981e-07, "loss_iou": 0.267578125, "loss_num": 0.03173828125, "loss_xval": 0.69140625, "num_input_tokens_seen": 234737932, "step": 3744 }, { "epoch": 12.462562396006655, "grad_norm": 9.746963500976562, "learning_rate": 5e-06, "loss": 0.5245, "num_input_tokens_seen": 234801424, "step": 3745 }, { "epoch": 12.462562396006655, "loss": 0.48498600721359253, "loss_ce": 6.532977181450406e-07, "loss_iou": 0.1982421875, "loss_num": 0.0174560546875, "loss_xval": 0.484375, "num_input_tokens_seen": 234801424, "step": 3745 }, { "epoch": 12.465890183028286, "grad_norm": 17.172203063964844, "learning_rate": 5e-06, "loss": 0.6716, "num_input_tokens_seen": 234864532, "step": 3746 }, { "epoch": 12.465890183028286, "loss": 0.8371988534927368, "loss_ce": 0.00016269145999103785, "loss_iou": 0.328125, "loss_num": 0.0361328125, "loss_xval": 0.8359375, "num_input_tokens_seen": 234864532, "step": 3746 }, { "epoch": 12.469217970049916, "grad_norm": 17.39699363708496, "learning_rate": 5e-06, "loss": 0.4517, "num_input_tokens_seen": 234927496, "step": 3747 }, { "epoch": 12.469217970049916, "loss": 0.5053513050079346, "loss_ce": 4.126534622628242e-05, "loss_iou": 0.1982421875, "loss_num": 0.0216064453125, "loss_xval": 0.50390625, "num_input_tokens_seen": 234927496, "step": 3747 }, { "epoch": 12.472545757071547, "grad_norm": 9.007098197937012, "learning_rate": 5e-06, "loss": 0.5868, "num_input_tokens_seen": 234989608, "step": 3748 }, { "epoch": 12.472545757071547, "loss": 0.680741548538208, "loss_ce": 1.6429743482149206e-05, "loss_iou": 0.1845703125, "loss_num": 0.062255859375, "loss_xval": 0.6796875, "num_input_tokens_seen": 234989608, "step": 3748 }, { "epoch": 12.475873544093178, "grad_norm": 10.419960021972656, "learning_rate": 5e-06, "loss": 0.3924, "num_input_tokens_seen": 235051460, "step": 3749 }, { "epoch": 12.475873544093178, "loss": 0.4091986417770386, "loss_ce": 1.893247645057272e-05, "loss_iou": 0.173828125, "loss_num": 0.01239013671875, "loss_xval": 0.41015625, "num_input_tokens_seen": 235051460, "step": 3749 }, { "epoch": 12.479201331114808, "grad_norm": 21.960668563842773, "learning_rate": 5e-06, "loss": 0.4474, "num_input_tokens_seen": 235115648, "step": 3750 }, { "epoch": 12.479201331114808, "eval_seeclick_CIoU": 0.041715556755661964, "eval_seeclick_GIoU": 0.042995328083634377, "eval_seeclick_IoU": 0.16147079318761826, "eval_seeclick_MAE_all": 0.17059868574142456, "eval_seeclick_MAE_h": 0.05768238380551338, "eval_seeclick_MAE_w": 0.13999300450086594, "eval_seeclick_MAE_x_boxes": 0.20860368013381958, "eval_seeclick_MAE_y_boxes": 0.18143728375434875, "eval_seeclick_NUM_probability": 0.9999815821647644, "eval_seeclick_inside_bbox": 0.17812500149011612, "eval_seeclick_loss": 2.95798659324646, "eval_seeclick_loss_ce": 0.1649812012910843, "eval_seeclick_loss_iou": 0.970947265625, "eval_seeclick_loss_num": 0.1735687255859375, "eval_seeclick_loss_xval": 2.810546875, "eval_seeclick_runtime": 66.7514, "eval_seeclick_samples_per_second": 0.704, "eval_seeclick_steps_per_second": 0.03, "num_input_tokens_seen": 235115648, "step": 3750 }, { "epoch": 12.479201331114808, "eval_icons_CIoU": -0.06343189813196659, "eval_icons_GIoU": 0.037338437512516975, "eval_icons_IoU": 0.1199527159333229, "eval_icons_MAE_all": 0.1952221468091011, "eval_icons_MAE_h": 0.15716271847486496, "eval_icons_MAE_w": 0.21476763486862183, "eval_icons_MAE_x_boxes": 0.13614078238606453, "eval_icons_MAE_y_boxes": 0.0957186184823513, "eval_icons_NUM_probability": 0.9999814331531525, "eval_icons_inside_bbox": 0.2204861119389534, "eval_icons_loss": 2.8575026988983154, "eval_icons_loss_ce": 2.4552280137868365e-06, "eval_icons_loss_iou": 0.9580078125, "eval_icons_loss_num": 0.1923828125, "eval_icons_loss_xval": 2.876953125, "eval_icons_runtime": 69.6821, "eval_icons_samples_per_second": 0.718, "eval_icons_steps_per_second": 0.029, "num_input_tokens_seen": 235115648, "step": 3750 }, { "epoch": 12.479201331114808, "eval_screenspot_CIoU": 0.16704998910427094, "eval_screenspot_GIoU": 0.20313948392868042, "eval_screenspot_IoU": 0.2816409021615982, "eval_screenspot_MAE_all": 0.11510271330674489, "eval_screenspot_MAE_h": 0.0711837795873483, "eval_screenspot_MAE_w": 0.08903796225786209, "eval_screenspot_MAE_x_boxes": 0.1584967076778412, "eval_screenspot_MAE_y_boxes": 0.08810579528411229, "eval_screenspot_NUM_probability": 0.999993900458018, "eval_screenspot_inside_bbox": 0.512500007947286, "eval_screenspot_loss": 2.2205288410186768, "eval_screenspot_loss_ce": 2.2631903296617868e-05, "eval_screenspot_loss_iou": 0.8157552083333334, "eval_screenspot_loss_num": 0.12469228108723958, "eval_screenspot_loss_xval": 2.2555338541666665, "eval_screenspot_runtime": 118.6093, "eval_screenspot_samples_per_second": 0.75, "eval_screenspot_steps_per_second": 0.025, "num_input_tokens_seen": 235115648, "step": 3750 }, { "epoch": 12.479201331114808, "eval_compot_CIoU": 0.12303280457854271, "eval_compot_GIoU": 0.16174692660570145, "eval_compot_IoU": 0.2542417496442795, "eval_compot_MAE_all": 0.13973219692707062, "eval_compot_MAE_h": 0.07503979466855526, "eval_compot_MAE_w": 0.17071513086557388, "eval_compot_MAE_x_boxes": 0.10623523220419884, "eval_compot_MAE_y_boxes": 0.10725085437297821, "eval_compot_NUM_probability": 0.9999958276748657, "eval_compot_inside_bbox": 0.4288194477558136, "eval_compot_loss": 2.341046094894409, "eval_compot_loss_ce": 0.002560611057560891, "eval_compot_loss_iou": 0.84423828125, "eval_compot_loss_num": 0.144073486328125, "eval_compot_loss_xval": 2.408203125, "eval_compot_runtime": 68.3789, "eval_compot_samples_per_second": 0.731, "eval_compot_steps_per_second": 0.029, "num_input_tokens_seen": 235115648, "step": 3750 }, { "epoch": 12.479201331114808, "eval_custom_ui_MAE_all": 0.06386992894113064, "eval_custom_ui_MAE_x": 0.07593025639653206, "eval_custom_ui_MAE_y": 0.051809605211019516, "eval_custom_ui_NUM_probability": 0.999998927116394, "eval_custom_ui_loss": 0.296334832906723, "eval_custom_ui_loss_ce": 5.566233312492841e-06, "eval_custom_ui_loss_num": 0.0607452392578125, "eval_custom_ui_loss_xval": 0.30364990234375, "eval_custom_ui_runtime": 51.9466, "eval_custom_ui_samples_per_second": 0.963, "eval_custom_ui_steps_per_second": 0.039, "num_input_tokens_seen": 235115648, "step": 3750 }, { "epoch": 12.479201331114808, "loss": 0.3286204934120178, "loss_ce": 7.232087682496058e-06, "loss_iou": 0.0, "loss_num": 0.06591796875, "loss_xval": 0.328125, "num_input_tokens_seen": 235115648, "step": 3750 }, { "epoch": 12.482529118136439, "grad_norm": 26.682798385620117, "learning_rate": 5e-06, "loss": 0.376, "num_input_tokens_seen": 235178404, "step": 3751 }, { "epoch": 12.482529118136439, "loss": 0.4586907923221588, "loss_ce": 1.1596400327107403e-05, "loss_iou": 0.173828125, "loss_num": 0.0220947265625, "loss_xval": 0.458984375, "num_input_tokens_seen": 235178404, "step": 3751 }, { "epoch": 12.48585690515807, "grad_norm": 16.239173889160156, "learning_rate": 5e-06, "loss": 0.3824, "num_input_tokens_seen": 235241172, "step": 3752 }, { "epoch": 12.48585690515807, "loss": 0.19443093240261078, "loss_ce": 3.3963504392886534e-05, "loss_iou": 0.068359375, "loss_num": 0.01153564453125, "loss_xval": 0.1943359375, "num_input_tokens_seen": 235241172, "step": 3752 }, { "epoch": 12.4891846921797, "grad_norm": 20.623336791992188, "learning_rate": 5e-06, "loss": 0.631, "num_input_tokens_seen": 235302604, "step": 3753 }, { "epoch": 12.4891846921797, "loss": 0.7632473707199097, "loss_ce": 2.7797466373158386e-06, "loss_iou": 0.279296875, "loss_num": 0.04052734375, "loss_xval": 0.76171875, "num_input_tokens_seen": 235302604, "step": 3753 }, { "epoch": 12.49251247920133, "grad_norm": 36.480690002441406, "learning_rate": 5e-06, "loss": 0.5436, "num_input_tokens_seen": 235367076, "step": 3754 }, { "epoch": 12.49251247920133, "loss": 0.5117820501327515, "loss_ce": 2.28353974307538e-06, "loss_iou": 0.19140625, "loss_num": 0.025634765625, "loss_xval": 0.51171875, "num_input_tokens_seen": 235367076, "step": 3754 }, { "epoch": 12.495840266222961, "grad_norm": 29.562986373901367, "learning_rate": 5e-06, "loss": 0.4911, "num_input_tokens_seen": 235429468, "step": 3755 }, { "epoch": 12.495840266222961, "loss": 0.35681402683258057, "loss_ce": 2.515098231015145e-06, "loss_iou": 0.1396484375, "loss_num": 0.0155029296875, "loss_xval": 0.357421875, "num_input_tokens_seen": 235429468, "step": 3755 }, { "epoch": 12.499168053244592, "grad_norm": 11.737725257873535, "learning_rate": 5e-06, "loss": 0.6963, "num_input_tokens_seen": 235493936, "step": 3756 }, { "epoch": 12.499168053244592, "loss": 0.8258066177368164, "loss_ce": 0.00012302302639000118, "loss_iou": 0.33984375, "loss_num": 0.0296630859375, "loss_xval": 0.82421875, "num_input_tokens_seen": 235493936, "step": 3756 }, { "epoch": 12.502495840266223, "grad_norm": 17.461950302124023, "learning_rate": 5e-06, "loss": 0.5442, "num_input_tokens_seen": 235556988, "step": 3757 }, { "epoch": 12.502495840266223, "loss": 0.4331299066543579, "loss_ce": 8.54562022141181e-05, "loss_iou": 0.181640625, "loss_num": 0.013916015625, "loss_xval": 0.43359375, "num_input_tokens_seen": 235556988, "step": 3757 }, { "epoch": 12.505823627287853, "grad_norm": 10.058496475219727, "learning_rate": 5e-06, "loss": 0.3874, "num_input_tokens_seen": 235619364, "step": 3758 }, { "epoch": 12.505823627287853, "loss": 0.37945982813835144, "loss_ce": 0.00040100261685438454, "loss_iou": 0.140625, "loss_num": 0.0194091796875, "loss_xval": 0.37890625, "num_input_tokens_seen": 235619364, "step": 3758 }, { "epoch": 12.509151414309484, "grad_norm": 15.424076080322266, "learning_rate": 5e-06, "loss": 0.3752, "num_input_tokens_seen": 235681068, "step": 3759 }, { "epoch": 12.509151414309484, "loss": 0.42548471689224243, "loss_ce": 8.6501931946259e-06, "loss_iou": 0.1787109375, "loss_num": 0.0137939453125, "loss_xval": 0.42578125, "num_input_tokens_seen": 235681068, "step": 3759 }, { "epoch": 12.512479201331114, "grad_norm": 9.391256332397461, "learning_rate": 5e-06, "loss": 0.3717, "num_input_tokens_seen": 235744532, "step": 3760 }, { "epoch": 12.512479201331114, "loss": 0.4770270884037018, "loss_ce": 6.840450168965617e-06, "loss_iou": 0.1630859375, "loss_num": 0.0301513671875, "loss_xval": 0.4765625, "num_input_tokens_seen": 235744532, "step": 3760 }, { "epoch": 12.515806988352745, "grad_norm": 10.259928703308105, "learning_rate": 5e-06, "loss": 0.308, "num_input_tokens_seen": 235806296, "step": 3761 }, { "epoch": 12.515806988352745, "loss": 0.3894050717353821, "loss_ce": 7.658254617126659e-07, "loss_iou": 0.138671875, "loss_num": 0.0224609375, "loss_xval": 0.388671875, "num_input_tokens_seen": 235806296, "step": 3761 }, { "epoch": 12.519134775374376, "grad_norm": 29.895326614379883, "learning_rate": 5e-06, "loss": 0.4663, "num_input_tokens_seen": 235869048, "step": 3762 }, { "epoch": 12.519134775374376, "loss": 0.407509982585907, "loss_ce": 3.9287551771849394e-05, "loss_iou": 0.154296875, "loss_num": 0.0196533203125, "loss_xval": 0.408203125, "num_input_tokens_seen": 235869048, "step": 3762 }, { "epoch": 12.522462562396006, "grad_norm": 20.528553009033203, "learning_rate": 5e-06, "loss": 0.5031, "num_input_tokens_seen": 235930288, "step": 3763 }, { "epoch": 12.522462562396006, "loss": 0.40908747911453247, "loss_ce": 2.9851038561901078e-05, "loss_iou": 0.138671875, "loss_num": 0.0262451171875, "loss_xval": 0.408203125, "num_input_tokens_seen": 235930288, "step": 3763 }, { "epoch": 12.525790349417637, "grad_norm": 10.902203559875488, "learning_rate": 5e-06, "loss": 0.4244, "num_input_tokens_seen": 235992664, "step": 3764 }, { "epoch": 12.525790349417637, "loss": 0.5156962871551514, "loss_ce": 1.0223078788840212e-05, "loss_iou": 0.1845703125, "loss_num": 0.029296875, "loss_xval": 0.515625, "num_input_tokens_seen": 235992664, "step": 3764 }, { "epoch": 12.529118136439267, "grad_norm": 6.961420059204102, "learning_rate": 5e-06, "loss": 0.6088, "num_input_tokens_seen": 236055408, "step": 3765 }, { "epoch": 12.529118136439267, "loss": 0.5346723794937134, "loss_ce": 4.428592092153849e-06, "loss_iou": 0.21484375, "loss_num": 0.02099609375, "loss_xval": 0.53515625, "num_input_tokens_seen": 236055408, "step": 3765 }, { "epoch": 12.532445923460898, "grad_norm": 17.291385650634766, "learning_rate": 5e-06, "loss": 0.3153, "num_input_tokens_seen": 236115952, "step": 3766 }, { "epoch": 12.532445923460898, "loss": 0.30525320768356323, "loss_ce": 7.743141031824052e-05, "loss_iou": 0.1064453125, "loss_num": 0.018310546875, "loss_xval": 0.3046875, "num_input_tokens_seen": 236115952, "step": 3766 }, { "epoch": 12.535773710482529, "grad_norm": 22.863636016845703, "learning_rate": 5e-06, "loss": 0.3739, "num_input_tokens_seen": 236179600, "step": 3767 }, { "epoch": 12.535773710482529, "loss": 0.3933734893798828, "loss_ce": 1.914947461045813e-06, "loss_iou": 0.1767578125, "loss_num": 0.0081787109375, "loss_xval": 0.392578125, "num_input_tokens_seen": 236179600, "step": 3767 }, { "epoch": 12.53910149750416, "grad_norm": 20.337724685668945, "learning_rate": 5e-06, "loss": 0.6797, "num_input_tokens_seen": 236242456, "step": 3768 }, { "epoch": 12.53910149750416, "loss": 0.6878678798675537, "loss_ce": 1.648585111979628e-06, "loss_iou": 0.267578125, "loss_num": 0.0303955078125, "loss_xval": 0.6875, "num_input_tokens_seen": 236242456, "step": 3768 }, { "epoch": 12.54242928452579, "grad_norm": 12.610240936279297, "learning_rate": 5e-06, "loss": 0.3768, "num_input_tokens_seen": 236305264, "step": 3769 }, { "epoch": 12.54242928452579, "loss": 0.3897746801376343, "loss_ce": 4.155329406785313e-06, "loss_iou": 0.142578125, "loss_num": 0.02099609375, "loss_xval": 0.390625, "num_input_tokens_seen": 236305264, "step": 3769 }, { "epoch": 12.54575707154742, "grad_norm": 8.464836120605469, "learning_rate": 5e-06, "loss": 0.5754, "num_input_tokens_seen": 236368620, "step": 3770 }, { "epoch": 12.54575707154742, "loss": 0.5493173599243164, "loss_ce": 9.000067393571953e-07, "loss_iou": 0.2041015625, "loss_num": 0.028076171875, "loss_xval": 0.55078125, "num_input_tokens_seen": 236368620, "step": 3770 }, { "epoch": 12.549084858569051, "grad_norm": 11.005350112915039, "learning_rate": 5e-06, "loss": 0.5182, "num_input_tokens_seen": 236432084, "step": 3771 }, { "epoch": 12.549084858569051, "loss": 0.6344714164733887, "loss_ce": 0.0004992254544049501, "loss_iou": 0.236328125, "loss_num": 0.032470703125, "loss_xval": 0.6328125, "num_input_tokens_seen": 236432084, "step": 3771 }, { "epoch": 12.552412645590682, "grad_norm": 27.612234115600586, "learning_rate": 5e-06, "loss": 0.4237, "num_input_tokens_seen": 236493560, "step": 3772 }, { "epoch": 12.552412645590682, "loss": 0.5661652088165283, "loss_ce": 3.088631956416066e-06, "loss_iou": 0.2109375, "loss_num": 0.029052734375, "loss_xval": 0.56640625, "num_input_tokens_seen": 236493560, "step": 3772 }, { "epoch": 12.555740432612312, "grad_norm": 41.974613189697266, "learning_rate": 5e-06, "loss": 0.5583, "num_input_tokens_seen": 236556360, "step": 3773 }, { "epoch": 12.555740432612312, "loss": 0.4926479756832123, "loss_ce": 2.7105011213279795e-06, "loss_iou": 0.2119140625, "loss_num": 0.01361083984375, "loss_xval": 0.4921875, "num_input_tokens_seen": 236556360, "step": 3773 }, { "epoch": 12.559068219633943, "grad_norm": 25.714746475219727, "learning_rate": 5e-06, "loss": 0.4624, "num_input_tokens_seen": 236617832, "step": 3774 }, { "epoch": 12.559068219633943, "loss": 0.46679970622062683, "loss_ce": 2.8282795483391965e-06, "loss_iou": 0.1767578125, "loss_num": 0.022705078125, "loss_xval": 0.466796875, "num_input_tokens_seen": 236617832, "step": 3774 }, { "epoch": 12.562396006655574, "grad_norm": 8.746572494506836, "learning_rate": 5e-06, "loss": 0.6246, "num_input_tokens_seen": 236679556, "step": 3775 }, { "epoch": 12.562396006655574, "loss": 0.8901417851448059, "loss_ce": 5.053834684076719e-06, "loss_iou": 0.34765625, "loss_num": 0.0390625, "loss_xval": 0.890625, "num_input_tokens_seen": 236679556, "step": 3775 }, { "epoch": 12.565723793677204, "grad_norm": 27.096590042114258, "learning_rate": 5e-06, "loss": 0.725, "num_input_tokens_seen": 236743392, "step": 3776 }, { "epoch": 12.565723793677204, "loss": 0.7533008456230164, "loss_ce": 6.59946963423863e-05, "loss_iou": 0.296875, "loss_num": 0.031982421875, "loss_xval": 0.75390625, "num_input_tokens_seen": 236743392, "step": 3776 }, { "epoch": 12.569051580698835, "grad_norm": 25.670040130615234, "learning_rate": 5e-06, "loss": 0.6192, "num_input_tokens_seen": 236804548, "step": 3777 }, { "epoch": 12.569051580698835, "loss": 0.5271010994911194, "loss_ce": 1.4906622709531803e-06, "loss_iou": 0.20703125, "loss_num": 0.022705078125, "loss_xval": 0.52734375, "num_input_tokens_seen": 236804548, "step": 3777 }, { "epoch": 12.572379367720465, "grad_norm": 11.287446022033691, "learning_rate": 5e-06, "loss": 0.8185, "num_input_tokens_seen": 236867964, "step": 3778 }, { "epoch": 12.572379367720465, "loss": 0.65547114610672, "loss_ce": 7.564974657725543e-05, "loss_iou": 0.28515625, "loss_num": 0.017333984375, "loss_xval": 0.65625, "num_input_tokens_seen": 236867964, "step": 3778 }, { "epoch": 12.575707154742096, "grad_norm": 18.620737075805664, "learning_rate": 5e-06, "loss": 0.5811, "num_input_tokens_seen": 236932256, "step": 3779 }, { "epoch": 12.575707154742096, "loss": 0.5243432521820068, "loss_ce": 5.124375456944108e-05, "loss_iou": 0.205078125, "loss_num": 0.0225830078125, "loss_xval": 0.5234375, "num_input_tokens_seen": 236932256, "step": 3779 }, { "epoch": 12.579034941763727, "grad_norm": 27.987060546875, "learning_rate": 5e-06, "loss": 0.5243, "num_input_tokens_seen": 236995164, "step": 3780 }, { "epoch": 12.579034941763727, "loss": 0.47717374563217163, "loss_ce": 8.759080856179935e-07, "loss_iou": 0.1806640625, "loss_num": 0.0234375, "loss_xval": 0.4765625, "num_input_tokens_seen": 236995164, "step": 3780 }, { "epoch": 12.582362728785357, "grad_norm": 27.89103889465332, "learning_rate": 5e-06, "loss": 0.5441, "num_input_tokens_seen": 237057964, "step": 3781 }, { "epoch": 12.582362728785357, "loss": 0.5661492943763733, "loss_ce": 0.00017028136062435806, "loss_iou": 0.1865234375, "loss_num": 0.03857421875, "loss_xval": 0.56640625, "num_input_tokens_seen": 237057964, "step": 3781 }, { "epoch": 12.585690515806988, "grad_norm": 18.122779846191406, "learning_rate": 5e-06, "loss": 0.5207, "num_input_tokens_seen": 237121876, "step": 3782 }, { "epoch": 12.585690515806988, "loss": 0.6241471767425537, "loss_ce": 1.6555050024180673e-06, "loss_iou": 0.259765625, "loss_num": 0.0208740234375, "loss_xval": 0.625, "num_input_tokens_seen": 237121876, "step": 3782 }, { "epoch": 12.589018302828618, "grad_norm": 11.77163028717041, "learning_rate": 5e-06, "loss": 0.4778, "num_input_tokens_seen": 237183744, "step": 3783 }, { "epoch": 12.589018302828618, "loss": 0.5204194188117981, "loss_ce": 0.00039989184006117284, "loss_iou": 0.2138671875, "loss_num": 0.0184326171875, "loss_xval": 0.51953125, "num_input_tokens_seen": 237183744, "step": 3783 }, { "epoch": 12.592346089850249, "grad_norm": 8.46462631225586, "learning_rate": 5e-06, "loss": 0.2988, "num_input_tokens_seen": 237247056, "step": 3784 }, { "epoch": 12.592346089850249, "loss": 0.1800607144832611, "loss_ce": 6.9971597440599e-06, "loss_iou": 0.07373046875, "loss_num": 0.006591796875, "loss_xval": 0.1796875, "num_input_tokens_seen": 237247056, "step": 3784 }, { "epoch": 12.59567387687188, "grad_norm": 16.377792358398438, "learning_rate": 5e-06, "loss": 0.3789, "num_input_tokens_seen": 237310300, "step": 3785 }, { "epoch": 12.59567387687188, "loss": 0.39544522762298584, "loss_ce": 5.949291153228842e-05, "loss_iou": 0.1826171875, "loss_num": 0.0059814453125, "loss_xval": 0.39453125, "num_input_tokens_seen": 237310300, "step": 3785 }, { "epoch": 12.59900166389351, "grad_norm": 9.251996040344238, "learning_rate": 5e-06, "loss": 0.4269, "num_input_tokens_seen": 237372420, "step": 3786 }, { "epoch": 12.59900166389351, "loss": 0.35577571392059326, "loss_ce": 1.7719611378197442e-06, "loss_iou": 0.12109375, "loss_num": 0.022705078125, "loss_xval": 0.35546875, "num_input_tokens_seen": 237372420, "step": 3786 }, { "epoch": 12.602329450915141, "grad_norm": 15.54635238647461, "learning_rate": 5e-06, "loss": 0.5067, "num_input_tokens_seen": 237434916, "step": 3787 }, { "epoch": 12.602329450915141, "loss": 0.41357946395874023, "loss_ce": 5.25965197084588e-06, "loss_iou": 0.18359375, "loss_num": 0.00927734375, "loss_xval": 0.4140625, "num_input_tokens_seen": 237434916, "step": 3787 }, { "epoch": 12.605657237936772, "grad_norm": 33.5209846496582, "learning_rate": 5e-06, "loss": 0.4902, "num_input_tokens_seen": 237498716, "step": 3788 }, { "epoch": 12.605657237936772, "loss": 0.5400415658950806, "loss_ce": 2.494265345376334e-06, "loss_iou": 0.2060546875, "loss_num": 0.025634765625, "loss_xval": 0.5390625, "num_input_tokens_seen": 237498716, "step": 3788 }, { "epoch": 12.608985024958402, "grad_norm": 26.23750877380371, "learning_rate": 5e-06, "loss": 0.5979, "num_input_tokens_seen": 237560492, "step": 3789 }, { "epoch": 12.608985024958402, "loss": 0.6588156223297119, "loss_ce": 2.1331147763703484e-06, "loss_iou": 0.2421875, "loss_num": 0.03515625, "loss_xval": 0.66015625, "num_input_tokens_seen": 237560492, "step": 3789 }, { "epoch": 12.612312811980033, "grad_norm": 14.710817337036133, "learning_rate": 5e-06, "loss": 0.6281, "num_input_tokens_seen": 237624276, "step": 3790 }, { "epoch": 12.612312811980033, "loss": 0.7763148546218872, "loss_ce": 6.968952220631763e-05, "loss_iou": 0.28125, "loss_num": 0.04296875, "loss_xval": 0.77734375, "num_input_tokens_seen": 237624276, "step": 3790 }, { "epoch": 12.615640599001663, "grad_norm": 36.709285736083984, "learning_rate": 5e-06, "loss": 0.7989, "num_input_tokens_seen": 237687560, "step": 3791 }, { "epoch": 12.615640599001663, "loss": 0.7837679982185364, "loss_ce": 0.00019868536037392914, "loss_iou": 0.287109375, "loss_num": 0.0419921875, "loss_xval": 0.78515625, "num_input_tokens_seen": 237687560, "step": 3791 }, { "epoch": 12.618968386023294, "grad_norm": 39.20414352416992, "learning_rate": 5e-06, "loss": 0.511, "num_input_tokens_seen": 237750928, "step": 3792 }, { "epoch": 12.618968386023294, "loss": 0.7578154802322388, "loss_ce": 2.9356751838349737e-06, "loss_iou": 0.28515625, "loss_num": 0.037109375, "loss_xval": 0.7578125, "num_input_tokens_seen": 237750928, "step": 3792 }, { "epoch": 12.622296173044925, "grad_norm": 26.30007553100586, "learning_rate": 5e-06, "loss": 0.4589, "num_input_tokens_seen": 237813756, "step": 3793 }, { "epoch": 12.622296173044925, "loss": 0.4475182890892029, "loss_ce": 8.51997083373135e-06, "loss_iou": 0.1904296875, "loss_num": 0.01312255859375, "loss_xval": 0.447265625, "num_input_tokens_seen": 237813756, "step": 3793 }, { "epoch": 12.625623960066555, "grad_norm": 17.114482879638672, "learning_rate": 5e-06, "loss": 0.5277, "num_input_tokens_seen": 237876428, "step": 3794 }, { "epoch": 12.625623960066555, "loss": 0.4005761742591858, "loss_ce": 2.418644271529047e-06, "loss_iou": 0.126953125, "loss_num": 0.029296875, "loss_xval": 0.400390625, "num_input_tokens_seen": 237876428, "step": 3794 }, { "epoch": 12.628951747088186, "grad_norm": 6.393742084503174, "learning_rate": 5e-06, "loss": 0.35, "num_input_tokens_seen": 237938580, "step": 3795 }, { "epoch": 12.628951747088186, "loss": 0.26906484365463257, "loss_ce": 2.1856973035028204e-05, "loss_iou": 0.06689453125, "loss_num": 0.0269775390625, "loss_xval": 0.26953125, "num_input_tokens_seen": 237938580, "step": 3795 }, { "epoch": 12.632279534109816, "grad_norm": 10.822392463684082, "learning_rate": 5e-06, "loss": 0.4561, "num_input_tokens_seen": 237999952, "step": 3796 }, { "epoch": 12.632279534109816, "loss": 0.48053035140037537, "loss_ce": 0.0010992023162543774, "loss_iou": 0.1689453125, "loss_num": 0.0284423828125, "loss_xval": 0.478515625, "num_input_tokens_seen": 237999952, "step": 3796 }, { "epoch": 12.635607321131447, "grad_norm": 15.406231880187988, "learning_rate": 5e-06, "loss": 0.4091, "num_input_tokens_seen": 238063244, "step": 3797 }, { "epoch": 12.635607321131447, "loss": 0.46155521273612976, "loss_ce": 7.353270575549686e-06, "loss_iou": 0.14453125, "loss_num": 0.034423828125, "loss_xval": 0.4609375, "num_input_tokens_seen": 238063244, "step": 3797 }, { "epoch": 12.638935108153078, "grad_norm": 16.78388023376465, "learning_rate": 5e-06, "loss": 0.5535, "num_input_tokens_seen": 238127424, "step": 3798 }, { "epoch": 12.638935108153078, "loss": 0.7066671252250671, "loss_ce": 2.100206756949774e-06, "loss_iou": 0.259765625, "loss_num": 0.03759765625, "loss_xval": 0.70703125, "num_input_tokens_seen": 238127424, "step": 3798 }, { "epoch": 12.642262895174708, "grad_norm": 9.44034194946289, "learning_rate": 5e-06, "loss": 0.6186, "num_input_tokens_seen": 238191892, "step": 3799 }, { "epoch": 12.642262895174708, "loss": 0.6955592632293701, "loss_ce": 2.5953813747037202e-06, "loss_iou": 0.283203125, "loss_num": 0.0255126953125, "loss_xval": 0.6953125, "num_input_tokens_seen": 238191892, "step": 3799 }, { "epoch": 12.645590682196339, "grad_norm": 25.788143157958984, "learning_rate": 5e-06, "loss": 0.5583, "num_input_tokens_seen": 238253304, "step": 3800 }, { "epoch": 12.645590682196339, "loss": 0.5506696701049805, "loss_ce": 1.0451874004502315e-05, "loss_iou": 0.1943359375, "loss_num": 0.032470703125, "loss_xval": 0.55078125, "num_input_tokens_seen": 238253304, "step": 3800 }, { "epoch": 12.64891846921797, "grad_norm": 14.829249382019043, "learning_rate": 5e-06, "loss": 0.492, "num_input_tokens_seen": 238315392, "step": 3801 }, { "epoch": 12.64891846921797, "loss": 0.47880709171295166, "loss_ce": 4.734347021440044e-05, "loss_iou": 0.1484375, "loss_num": 0.03662109375, "loss_xval": 0.478515625, "num_input_tokens_seen": 238315392, "step": 3801 }, { "epoch": 12.6522462562396, "grad_norm": 9.16076946258545, "learning_rate": 5e-06, "loss": 0.5476, "num_input_tokens_seen": 238379160, "step": 3802 }, { "epoch": 12.6522462562396, "loss": 0.5791024565696716, "loss_ce": 9.104804803428124e-07, "loss_iou": 0.2333984375, "loss_num": 0.0225830078125, "loss_xval": 0.578125, "num_input_tokens_seen": 238379160, "step": 3802 }, { "epoch": 12.65557404326123, "grad_norm": 11.214707374572754, "learning_rate": 5e-06, "loss": 0.4048, "num_input_tokens_seen": 238441284, "step": 3803 }, { "epoch": 12.65557404326123, "loss": 0.32288575172424316, "loss_ce": 1.3570950613939203e-05, "loss_iou": 0.130859375, "loss_num": 0.0123291015625, "loss_xval": 0.322265625, "num_input_tokens_seen": 238441284, "step": 3803 }, { "epoch": 12.658901830282861, "grad_norm": 11.121038436889648, "learning_rate": 5e-06, "loss": 0.3323, "num_input_tokens_seen": 238503624, "step": 3804 }, { "epoch": 12.658901830282861, "loss": 0.2935224175453186, "loss_ce": 4.346682544564828e-06, "loss_iou": 0.11572265625, "loss_num": 0.01251220703125, "loss_xval": 0.29296875, "num_input_tokens_seen": 238503624, "step": 3804 }, { "epoch": 12.662229617304492, "grad_norm": 24.762792587280273, "learning_rate": 5e-06, "loss": 0.5474, "num_input_tokens_seen": 238567084, "step": 3805 }, { "epoch": 12.662229617304492, "loss": 0.577521562576294, "loss_ce": 6.939269951544702e-06, "loss_iou": 0.2470703125, "loss_num": 0.0164794921875, "loss_xval": 0.578125, "num_input_tokens_seen": 238567084, "step": 3805 }, { "epoch": 12.665557404326123, "grad_norm": 24.13604164123535, "learning_rate": 5e-06, "loss": 0.6292, "num_input_tokens_seen": 238630532, "step": 3806 }, { "epoch": 12.665557404326123, "loss": 0.5954920649528503, "loss_ce": 3.312312765046954e-05, "loss_iou": 0.1953125, "loss_num": 0.041015625, "loss_xval": 0.59375, "num_input_tokens_seen": 238630532, "step": 3806 }, { "epoch": 12.668885191347753, "grad_norm": 13.250652313232422, "learning_rate": 5e-06, "loss": 0.4037, "num_input_tokens_seen": 238694004, "step": 3807 }, { "epoch": 12.668885191347753, "loss": 0.5115330815315247, "loss_ce": 0.0004246938624419272, "loss_iou": 0.18359375, "loss_num": 0.02880859375, "loss_xval": 0.51171875, "num_input_tokens_seen": 238694004, "step": 3807 }, { "epoch": 12.672212978369384, "grad_norm": 10.077059745788574, "learning_rate": 5e-06, "loss": 0.2972, "num_input_tokens_seen": 238757300, "step": 3808 }, { "epoch": 12.672212978369384, "loss": 0.3602331280708313, "loss_ce": 3.660452421172522e-06, "loss_iou": 0.146484375, "loss_num": 0.0135498046875, "loss_xval": 0.359375, "num_input_tokens_seen": 238757300, "step": 3808 }, { "epoch": 12.675540765391014, "grad_norm": 8.029096603393555, "learning_rate": 5e-06, "loss": 0.3679, "num_input_tokens_seen": 238820604, "step": 3809 }, { "epoch": 12.675540765391014, "loss": 0.4616379737854004, "loss_ce": 0.001066724769771099, "loss_iou": 0.17578125, "loss_num": 0.0218505859375, "loss_xval": 0.4609375, "num_input_tokens_seen": 238820604, "step": 3809 }, { "epoch": 12.678868552412645, "grad_norm": 13.994856834411621, "learning_rate": 5e-06, "loss": 0.6111, "num_input_tokens_seen": 238882552, "step": 3810 }, { "epoch": 12.678868552412645, "loss": 0.5727636814117432, "loss_ce": 9.726418284117244e-06, "loss_iou": 0.2177734375, "loss_num": 0.0274658203125, "loss_xval": 0.57421875, "num_input_tokens_seen": 238882552, "step": 3810 }, { "epoch": 12.682196339434276, "grad_norm": 20.062076568603516, "learning_rate": 5e-06, "loss": 0.4873, "num_input_tokens_seen": 238945356, "step": 3811 }, { "epoch": 12.682196339434276, "loss": 0.7474430203437805, "loss_ce": 0.0003726930299308151, "loss_iou": 0.28125, "loss_num": 0.036376953125, "loss_xval": 0.74609375, "num_input_tokens_seen": 238945356, "step": 3811 }, { "epoch": 12.685524126455906, "grad_norm": 20.39287567138672, "learning_rate": 5e-06, "loss": 0.461, "num_input_tokens_seen": 239007120, "step": 3812 }, { "epoch": 12.685524126455906, "loss": 0.5751957893371582, "loss_ce": 4.406022071634652e-07, "loss_iou": 0.23828125, "loss_num": 0.01953125, "loss_xval": 0.57421875, "num_input_tokens_seen": 239007120, "step": 3812 }, { "epoch": 12.688851913477537, "grad_norm": 27.50640106201172, "learning_rate": 5e-06, "loss": 0.7723, "num_input_tokens_seen": 239071208, "step": 3813 }, { "epoch": 12.688851913477537, "loss": 0.6471766829490662, "loss_ce": 0.00020399727509357035, "loss_iou": 0.25, "loss_num": 0.029052734375, "loss_xval": 0.6484375, "num_input_tokens_seen": 239071208, "step": 3813 }, { "epoch": 12.692179700499167, "grad_norm": 19.17807960510254, "learning_rate": 5e-06, "loss": 0.5349, "num_input_tokens_seen": 239134272, "step": 3814 }, { "epoch": 12.692179700499167, "loss": 0.579480767250061, "loss_ce": 1.2987229638383724e-05, "loss_iou": 0.228515625, "loss_num": 0.0244140625, "loss_xval": 0.578125, "num_input_tokens_seen": 239134272, "step": 3814 }, { "epoch": 12.695507487520798, "grad_norm": 17.38690185546875, "learning_rate": 5e-06, "loss": 0.5072, "num_input_tokens_seen": 239193928, "step": 3815 }, { "epoch": 12.695507487520798, "loss": 0.5798991918563843, "loss_ce": 4.155655005888548e-06, "loss_iou": 0.2099609375, "loss_num": 0.03173828125, "loss_xval": 0.578125, "num_input_tokens_seen": 239193928, "step": 3815 }, { "epoch": 12.698835274542429, "grad_norm": 12.091056823730469, "learning_rate": 5e-06, "loss": 0.5343, "num_input_tokens_seen": 239258716, "step": 3816 }, { "epoch": 12.698835274542429, "loss": 0.45080995559692383, "loss_ce": 4.32412207374e-06, "loss_iou": 0.20703125, "loss_num": 0.00738525390625, "loss_xval": 0.451171875, "num_input_tokens_seen": 239258716, "step": 3816 }, { "epoch": 12.70216306156406, "grad_norm": 23.431682586669922, "learning_rate": 5e-06, "loss": 0.4843, "num_input_tokens_seen": 239320820, "step": 3817 }, { "epoch": 12.70216306156406, "loss": 0.6337898969650269, "loss_ce": 8.405846756431856e-07, "loss_iou": 0.267578125, "loss_num": 0.0194091796875, "loss_xval": 0.6328125, "num_input_tokens_seen": 239320820, "step": 3817 }, { "epoch": 12.70549084858569, "grad_norm": 17.075468063354492, "learning_rate": 5e-06, "loss": 0.452, "num_input_tokens_seen": 239383868, "step": 3818 }, { "epoch": 12.70549084858569, "loss": 0.5164220929145813, "loss_ce": 3.629437060226337e-06, "loss_iou": 0.2099609375, "loss_num": 0.01953125, "loss_xval": 0.515625, "num_input_tokens_seen": 239383868, "step": 3818 }, { "epoch": 12.70881863560732, "grad_norm": 30.3526668548584, "learning_rate": 5e-06, "loss": 0.3484, "num_input_tokens_seen": 239447500, "step": 3819 }, { "epoch": 12.70881863560732, "loss": 0.4055677056312561, "loss_ce": 0.0008435670752078295, "loss_iou": 0.1630859375, "loss_num": 0.015625, "loss_xval": 0.404296875, "num_input_tokens_seen": 239447500, "step": 3819 }, { "epoch": 12.712146422628951, "grad_norm": 18.42508316040039, "learning_rate": 5e-06, "loss": 0.45, "num_input_tokens_seen": 239510508, "step": 3820 }, { "epoch": 12.712146422628951, "loss": 0.48572102189064026, "loss_ce": 3.255951469327556e-06, "loss_iou": 0.1533203125, "loss_num": 0.035888671875, "loss_xval": 0.486328125, "num_input_tokens_seen": 239510508, "step": 3820 }, { "epoch": 12.715474209650582, "grad_norm": 8.017897605895996, "learning_rate": 5e-06, "loss": 0.4255, "num_input_tokens_seen": 239570856, "step": 3821 }, { "epoch": 12.715474209650582, "loss": 0.5509679913520813, "loss_ce": 3.624304554250557e-06, "loss_iou": 0.212890625, "loss_num": 0.0250244140625, "loss_xval": 0.55078125, "num_input_tokens_seen": 239570856, "step": 3821 }, { "epoch": 12.718801996672212, "grad_norm": 6.321202754974365, "learning_rate": 5e-06, "loss": 0.2928, "num_input_tokens_seen": 239631548, "step": 3822 }, { "epoch": 12.718801996672212, "loss": 0.34472784399986267, "loss_ce": 1.278046738661942e-06, "loss_iou": 0.1435546875, "loss_num": 0.01153564453125, "loss_xval": 0.34375, "num_input_tokens_seen": 239631548, "step": 3822 }, { "epoch": 12.722129783693843, "grad_norm": 6.653834819793701, "learning_rate": 5e-06, "loss": 0.3126, "num_input_tokens_seen": 239693380, "step": 3823 }, { "epoch": 12.722129783693843, "loss": 0.33227676153182983, "loss_ce": 1.3867442021364695e-06, "loss_iou": 0.103515625, "loss_num": 0.0250244140625, "loss_xval": 0.33203125, "num_input_tokens_seen": 239693380, "step": 3823 }, { "epoch": 12.725457570715474, "grad_norm": 17.387039184570312, "learning_rate": 5e-06, "loss": 0.452, "num_input_tokens_seen": 239756412, "step": 3824 }, { "epoch": 12.725457570715474, "loss": 0.5406571626663208, "loss_ce": 7.743967216811143e-06, "loss_iou": 0.1787109375, "loss_num": 0.036865234375, "loss_xval": 0.5390625, "num_input_tokens_seen": 239756412, "step": 3824 }, { "epoch": 12.728785357737104, "grad_norm": 20.04737091064453, "learning_rate": 5e-06, "loss": 0.4559, "num_input_tokens_seen": 239817248, "step": 3825 }, { "epoch": 12.728785357737104, "loss": 0.39850255846977234, "loss_ce": 4.037079634144902e-06, "loss_iou": 0.14453125, "loss_num": 0.0218505859375, "loss_xval": 0.3984375, "num_input_tokens_seen": 239817248, "step": 3825 }, { "epoch": 12.732113144758735, "grad_norm": 21.668739318847656, "learning_rate": 5e-06, "loss": 0.6037, "num_input_tokens_seen": 239880268, "step": 3826 }, { "epoch": 12.732113144758735, "loss": 0.5322365164756775, "loss_ce": 9.994382708100602e-06, "loss_iou": 0.1845703125, "loss_num": 0.032470703125, "loss_xval": 0.53125, "num_input_tokens_seen": 239880268, "step": 3826 }, { "epoch": 12.735440931780365, "grad_norm": 26.78547477722168, "learning_rate": 5e-06, "loss": 0.4666, "num_input_tokens_seen": 239943680, "step": 3827 }, { "epoch": 12.735440931780365, "loss": 0.2946785092353821, "loss_ce": 7.931328127597226e-07, "loss_iou": 0.1083984375, "loss_num": 0.015625, "loss_xval": 0.294921875, "num_input_tokens_seen": 239943680, "step": 3827 }, { "epoch": 12.738768718801996, "grad_norm": 11.519979476928711, "learning_rate": 5e-06, "loss": 0.3919, "num_input_tokens_seen": 240005936, "step": 3828 }, { "epoch": 12.738768718801996, "loss": 0.46631115674972534, "loss_ce": 2.574762675067177e-06, "loss_iou": 0.1728515625, "loss_num": 0.0240478515625, "loss_xval": 0.466796875, "num_input_tokens_seen": 240005936, "step": 3828 }, { "epoch": 12.742096505823627, "grad_norm": 7.346210956573486, "learning_rate": 5e-06, "loss": 0.2969, "num_input_tokens_seen": 240066028, "step": 3829 }, { "epoch": 12.742096505823627, "loss": 0.31023454666137695, "loss_ce": 0.0001454350131098181, "loss_iou": 0.1171875, "loss_num": 0.0152587890625, "loss_xval": 0.310546875, "num_input_tokens_seen": 240066028, "step": 3829 }, { "epoch": 12.745424292845257, "grad_norm": 15.198709487915039, "learning_rate": 5e-06, "loss": 0.5704, "num_input_tokens_seen": 240129784, "step": 3830 }, { "epoch": 12.745424292845257, "loss": 0.6631309986114502, "loss_ce": 4.5060121919959784e-05, "loss_iou": 0.275390625, "loss_num": 0.022705078125, "loss_xval": 0.6640625, "num_input_tokens_seen": 240129784, "step": 3830 }, { "epoch": 12.748752079866888, "grad_norm": 15.716217041015625, "learning_rate": 5e-06, "loss": 0.4406, "num_input_tokens_seen": 240192404, "step": 3831 }, { "epoch": 12.748752079866888, "loss": 0.49011409282684326, "loss_ce": 1.7788340755942045e-06, "loss_iou": 0.197265625, "loss_num": 0.0191650390625, "loss_xval": 0.490234375, "num_input_tokens_seen": 240192404, "step": 3831 }, { "epoch": 12.752079866888518, "grad_norm": 19.75092315673828, "learning_rate": 5e-06, "loss": 0.5395, "num_input_tokens_seen": 240255388, "step": 3832 }, { "epoch": 12.752079866888518, "loss": 0.2954116761684418, "loss_ce": 1.5173383189903689e-06, "loss_iou": 0.1083984375, "loss_num": 0.015625, "loss_xval": 0.294921875, "num_input_tokens_seen": 240255388, "step": 3832 }, { "epoch": 12.755407653910149, "grad_norm": 11.630691528320312, "learning_rate": 5e-06, "loss": 0.7171, "num_input_tokens_seen": 240318624, "step": 3833 }, { "epoch": 12.755407653910149, "loss": 0.8089807629585266, "loss_ce": 0.0001428161485819146, "loss_iou": 0.33203125, "loss_num": 0.0291748046875, "loss_xval": 0.80859375, "num_input_tokens_seen": 240318624, "step": 3833 }, { "epoch": 12.75873544093178, "grad_norm": 10.398391723632812, "learning_rate": 5e-06, "loss": 0.4633, "num_input_tokens_seen": 240382380, "step": 3834 }, { "epoch": 12.75873544093178, "loss": 0.41986215114593506, "loss_ce": 1.3003295862290543e-06, "loss_iou": 0.1748046875, "loss_num": 0.01385498046875, "loss_xval": 0.419921875, "num_input_tokens_seen": 240382380, "step": 3834 }, { "epoch": 12.76206322795341, "grad_norm": 21.886856079101562, "learning_rate": 5e-06, "loss": 0.4332, "num_input_tokens_seen": 240444760, "step": 3835 }, { "epoch": 12.76206322795341, "loss": 0.3947203755378723, "loss_ce": 6.026362825650722e-06, "loss_iou": 0.15234375, "loss_num": 0.01806640625, "loss_xval": 0.39453125, "num_input_tokens_seen": 240444760, "step": 3835 }, { "epoch": 12.765391014975041, "grad_norm": 13.669419288635254, "learning_rate": 5e-06, "loss": 0.6797, "num_input_tokens_seen": 240507800, "step": 3836 }, { "epoch": 12.765391014975041, "loss": 0.6113303303718567, "loss_ce": 2.1805503820360173e-06, "loss_iou": 0.20703125, "loss_num": 0.039794921875, "loss_xval": 0.609375, "num_input_tokens_seen": 240507800, "step": 3836 }, { "epoch": 12.768718801996672, "grad_norm": 12.902509689331055, "learning_rate": 5e-06, "loss": 0.4202, "num_input_tokens_seen": 240570052, "step": 3837 }, { "epoch": 12.768718801996672, "loss": 0.3364452123641968, "loss_ce": 4.142151738051325e-06, "loss_iou": 0.1162109375, "loss_num": 0.02099609375, "loss_xval": 0.3359375, "num_input_tokens_seen": 240570052, "step": 3837 }, { "epoch": 12.772046589018302, "grad_norm": 31.147409439086914, "learning_rate": 5e-06, "loss": 0.5129, "num_input_tokens_seen": 240634124, "step": 3838 }, { "epoch": 12.772046589018302, "loss": 0.5127073526382446, "loss_ce": 0.0001036196990753524, "loss_iou": 0.203125, "loss_num": 0.021484375, "loss_xval": 0.51171875, "num_input_tokens_seen": 240634124, "step": 3838 }, { "epoch": 12.775374376039933, "grad_norm": 50.11201858520508, "learning_rate": 5e-06, "loss": 0.6592, "num_input_tokens_seen": 240697628, "step": 3839 }, { "epoch": 12.775374376039933, "loss": 0.649232029914856, "loss_ce": 1.0609346645651385e-06, "loss_iou": 0.27734375, "loss_num": 0.0184326171875, "loss_xval": 0.6484375, "num_input_tokens_seen": 240697628, "step": 3839 }, { "epoch": 12.778702163061563, "grad_norm": 31.447265625, "learning_rate": 5e-06, "loss": 0.416, "num_input_tokens_seen": 240758796, "step": 3840 }, { "epoch": 12.778702163061563, "loss": 0.41735947132110596, "loss_ce": 1.0453878758198698e-06, "loss_iou": 0.15625, "loss_num": 0.0211181640625, "loss_xval": 0.41796875, "num_input_tokens_seen": 240758796, "step": 3840 }, { "epoch": 12.782029950083194, "grad_norm": 13.733940124511719, "learning_rate": 5e-06, "loss": 0.383, "num_input_tokens_seen": 240820924, "step": 3841 }, { "epoch": 12.782029950083194, "loss": 0.41864126920700073, "loss_ce": 1.1026675110770157e-06, "loss_iou": 0.134765625, "loss_num": 0.0299072265625, "loss_xval": 0.41796875, "num_input_tokens_seen": 240820924, "step": 3841 }, { "epoch": 12.785357737104825, "grad_norm": 24.03006935119629, "learning_rate": 5e-06, "loss": 0.5102, "num_input_tokens_seen": 240885116, "step": 3842 }, { "epoch": 12.785357737104825, "loss": 0.6015112996101379, "loss_ce": 0.00019295622769277543, "loss_iou": 0.25390625, "loss_num": 0.0185546875, "loss_xval": 0.6015625, "num_input_tokens_seen": 240885116, "step": 3842 }, { "epoch": 12.788685524126455, "grad_norm": 10.521800994873047, "learning_rate": 5e-06, "loss": 0.3971, "num_input_tokens_seen": 240946632, "step": 3843 }, { "epoch": 12.788685524126455, "loss": 0.45868027210235596, "loss_ce": 1.0752210073405877e-06, "loss_iou": 0.1650390625, "loss_num": 0.02587890625, "loss_xval": 0.458984375, "num_input_tokens_seen": 240946632, "step": 3843 }, { "epoch": 12.792013311148086, "grad_norm": 6.218596458435059, "learning_rate": 5e-06, "loss": 0.345, "num_input_tokens_seen": 241010496, "step": 3844 }, { "epoch": 12.792013311148086, "loss": 0.5071977376937866, "loss_ce": 0.0004228082543704659, "loss_iou": 0.1787109375, "loss_num": 0.030029296875, "loss_xval": 0.5078125, "num_input_tokens_seen": 241010496, "step": 3844 }, { "epoch": 12.795341098169716, "grad_norm": 5.843862533569336, "learning_rate": 5e-06, "loss": 0.3172, "num_input_tokens_seen": 241072328, "step": 3845 }, { "epoch": 12.795341098169716, "loss": 0.2397482991218567, "loss_ce": 2.1982805264997296e-06, "loss_iou": 0.0849609375, "loss_num": 0.013916015625, "loss_xval": 0.240234375, "num_input_tokens_seen": 241072328, "step": 3845 }, { "epoch": 12.798668885191347, "grad_norm": 6.487051963806152, "learning_rate": 5e-06, "loss": 0.4047, "num_input_tokens_seen": 241134320, "step": 3846 }, { "epoch": 12.798668885191347, "loss": 0.31091445684432983, "loss_ce": 1.3584005955635803e-06, "loss_iou": 0.12890625, "loss_num": 0.0106201171875, "loss_xval": 0.310546875, "num_input_tokens_seen": 241134320, "step": 3846 }, { "epoch": 12.801996672212978, "grad_norm": 8.040000915527344, "learning_rate": 5e-06, "loss": 0.4707, "num_input_tokens_seen": 241196376, "step": 3847 }, { "epoch": 12.801996672212978, "loss": 0.40832632780075073, "loss_ce": 1.1377724149497226e-06, "loss_iou": 0.1533203125, "loss_num": 0.020263671875, "loss_xval": 0.408203125, "num_input_tokens_seen": 241196376, "step": 3847 }, { "epoch": 12.805324459234608, "grad_norm": 11.682188034057617, "learning_rate": 5e-06, "loss": 0.4482, "num_input_tokens_seen": 241257848, "step": 3848 }, { "epoch": 12.805324459234608, "loss": 0.5454580783843994, "loss_ce": 0.00041412963764742017, "loss_iou": 0.2001953125, "loss_num": 0.02880859375, "loss_xval": 0.546875, "num_input_tokens_seen": 241257848, "step": 3848 }, { "epoch": 12.808652246256239, "grad_norm": 25.272829055786133, "learning_rate": 5e-06, "loss": 0.6293, "num_input_tokens_seen": 241320248, "step": 3849 }, { "epoch": 12.808652246256239, "loss": 0.5165159106254578, "loss_ce": 0.0004026125534437597, "loss_iou": 0.1796875, "loss_num": 0.03125, "loss_xval": 0.515625, "num_input_tokens_seen": 241320248, "step": 3849 }, { "epoch": 12.81198003327787, "grad_norm": 20.499597549438477, "learning_rate": 5e-06, "loss": 0.5185, "num_input_tokens_seen": 241384108, "step": 3850 }, { "epoch": 12.81198003327787, "loss": 0.5875718593597412, "loss_ce": 0.00041362509364262223, "loss_iou": 0.234375, "loss_num": 0.023681640625, "loss_xval": 0.5859375, "num_input_tokens_seen": 241384108, "step": 3850 }, { "epoch": 12.8153078202995, "grad_norm": 16.369136810302734, "learning_rate": 5e-06, "loss": 0.4376, "num_input_tokens_seen": 241447324, "step": 3851 }, { "epoch": 12.8153078202995, "loss": 0.4931468367576599, "loss_ce": 1.3292484254634473e-05, "loss_iou": 0.18359375, "loss_num": 0.025146484375, "loss_xval": 0.4921875, "num_input_tokens_seen": 241447324, "step": 3851 }, { "epoch": 12.81863560732113, "grad_norm": 27.638885498046875, "learning_rate": 5e-06, "loss": 0.4875, "num_input_tokens_seen": 241510724, "step": 3852 }, { "epoch": 12.81863560732113, "loss": 0.5139824748039246, "loss_ce": 5.44370277566486e-06, "loss_iou": 0.193359375, "loss_num": 0.0255126953125, "loss_xval": 0.515625, "num_input_tokens_seen": 241510724, "step": 3852 }, { "epoch": 12.821963394342761, "grad_norm": 20.068620681762695, "learning_rate": 5e-06, "loss": 0.4415, "num_input_tokens_seen": 241573524, "step": 3853 }, { "epoch": 12.821963394342761, "loss": 0.1808176338672638, "loss_ce": 9.819827937462833e-07, "loss_iou": 0.050537109375, "loss_num": 0.0159912109375, "loss_xval": 0.1806640625, "num_input_tokens_seen": 241573524, "step": 3853 }, { "epoch": 12.825291181364392, "grad_norm": 9.15927505493164, "learning_rate": 5e-06, "loss": 0.5424, "num_input_tokens_seen": 241635496, "step": 3854 }, { "epoch": 12.825291181364392, "loss": 0.35055625438690186, "loss_ce": 1.6117253835545853e-05, "loss_iou": 0.12109375, "loss_num": 0.021728515625, "loss_xval": 0.349609375, "num_input_tokens_seen": 241635496, "step": 3854 }, { "epoch": 12.828618968386023, "grad_norm": 15.048893928527832, "learning_rate": 5e-06, "loss": 0.663, "num_input_tokens_seen": 241698208, "step": 3855 }, { "epoch": 12.828618968386023, "loss": 0.6879159212112427, "loss_ce": 0.00035487598506733775, "loss_iou": 0.271484375, "loss_num": 0.02880859375, "loss_xval": 0.6875, "num_input_tokens_seen": 241698208, "step": 3855 }, { "epoch": 12.831946755407653, "grad_norm": 9.742718696594238, "learning_rate": 5e-06, "loss": 0.4153, "num_input_tokens_seen": 241761616, "step": 3856 }, { "epoch": 12.831946755407653, "loss": 0.5016161203384399, "loss_ce": 0.0005174983525648713, "loss_iou": 0.2021484375, "loss_num": 0.019287109375, "loss_xval": 0.5, "num_input_tokens_seen": 241761616, "step": 3856 }, { "epoch": 12.835274542429284, "grad_norm": 10.47567081451416, "learning_rate": 5e-06, "loss": 0.5017, "num_input_tokens_seen": 241822844, "step": 3857 }, { "epoch": 12.835274542429284, "loss": 0.4925834536552429, "loss_ce": 2.9739252568106167e-05, "loss_iou": 0.18359375, "loss_num": 0.02490234375, "loss_xval": 0.4921875, "num_input_tokens_seen": 241822844, "step": 3857 }, { "epoch": 12.838602329450914, "grad_norm": 18.42937660217285, "learning_rate": 5e-06, "loss": 0.4644, "num_input_tokens_seen": 241886728, "step": 3858 }, { "epoch": 12.838602329450914, "loss": 0.475235253572464, "loss_ce": 7.88668967288686e-06, "loss_iou": 0.166015625, "loss_num": 0.02880859375, "loss_xval": 0.474609375, "num_input_tokens_seen": 241886728, "step": 3858 }, { "epoch": 12.841930116472545, "grad_norm": 17.574005126953125, "learning_rate": 5e-06, "loss": 0.5416, "num_input_tokens_seen": 241949220, "step": 3859 }, { "epoch": 12.841930116472545, "loss": 0.49573346972465515, "loss_ce": 5.941336894466076e-06, "loss_iou": 0.1669921875, "loss_num": 0.031982421875, "loss_xval": 0.49609375, "num_input_tokens_seen": 241949220, "step": 3859 }, { "epoch": 12.845257903494176, "grad_norm": 13.06165885925293, "learning_rate": 5e-06, "loss": 0.7547, "num_input_tokens_seen": 242012024, "step": 3860 }, { "epoch": 12.845257903494176, "loss": 0.9995684623718262, "loss_ce": 0.0005450373864732683, "loss_iou": 0.39453125, "loss_num": 0.041748046875, "loss_xval": 1.0, "num_input_tokens_seen": 242012024, "step": 3860 }, { "epoch": 12.848585690515806, "grad_norm": 59.757747650146484, "learning_rate": 5e-06, "loss": 0.585, "num_input_tokens_seen": 242074232, "step": 3861 }, { "epoch": 12.848585690515806, "loss": 0.34702742099761963, "loss_ce": 1.2011768376396503e-05, "loss_iou": 0.08447265625, "loss_num": 0.035400390625, "loss_xval": 0.34765625, "num_input_tokens_seen": 242074232, "step": 3861 }, { "epoch": 12.851913477537437, "grad_norm": 13.67387580871582, "learning_rate": 5e-06, "loss": 0.5333, "num_input_tokens_seen": 242135296, "step": 3862 }, { "epoch": 12.851913477537437, "loss": 0.3868679702281952, "loss_ce": 2.7132631657877937e-05, "loss_iou": 0.1396484375, "loss_num": 0.021484375, "loss_xval": 0.38671875, "num_input_tokens_seen": 242135296, "step": 3862 }, { "epoch": 12.855241264559067, "grad_norm": 23.882417678833008, "learning_rate": 5e-06, "loss": 0.4023, "num_input_tokens_seen": 242197880, "step": 3863 }, { "epoch": 12.855241264559067, "loss": 0.2842375636100769, "loss_ce": 5.785437679151073e-05, "loss_iou": 0.083984375, "loss_num": 0.023193359375, "loss_xval": 0.28515625, "num_input_tokens_seen": 242197880, "step": 3863 }, { "epoch": 12.858569051580698, "grad_norm": 7.190594673156738, "learning_rate": 5e-06, "loss": 0.3519, "num_input_tokens_seen": 242258840, "step": 3864 }, { "epoch": 12.858569051580698, "loss": 0.4286506772041321, "loss_ce": 7.786005085108627e-07, "loss_iou": 0.130859375, "loss_num": 0.033203125, "loss_xval": 0.427734375, "num_input_tokens_seen": 242258840, "step": 3864 }, { "epoch": 12.861896838602329, "grad_norm": 8.777844429016113, "learning_rate": 5e-06, "loss": 0.4706, "num_input_tokens_seen": 242322276, "step": 3865 }, { "epoch": 12.861896838602329, "loss": 0.45947328209877014, "loss_ce": 6.353134835990204e-07, "loss_iou": 0.19140625, "loss_num": 0.01531982421875, "loss_xval": 0.458984375, "num_input_tokens_seen": 242322276, "step": 3865 }, { "epoch": 12.86522462562396, "grad_norm": 17.874174118041992, "learning_rate": 5e-06, "loss": 0.5925, "num_input_tokens_seen": 242384344, "step": 3866 }, { "epoch": 12.86522462562396, "loss": 0.7237383723258972, "loss_ce": 4.452573193702847e-05, "loss_iou": 0.302734375, "loss_num": 0.0234375, "loss_xval": 0.72265625, "num_input_tokens_seen": 242384344, "step": 3866 }, { "epoch": 12.86855241264559, "grad_norm": 35.54883575439453, "learning_rate": 5e-06, "loss": 0.4848, "num_input_tokens_seen": 242446532, "step": 3867 }, { "epoch": 12.86855241264559, "loss": 0.4174686670303345, "loss_ce": 1.8724294932326302e-05, "loss_iou": 0.126953125, "loss_num": 0.032958984375, "loss_xval": 0.41796875, "num_input_tokens_seen": 242446532, "step": 3867 }, { "epoch": 12.87188019966722, "grad_norm": 49.13055419921875, "learning_rate": 5e-06, "loss": 0.5759, "num_input_tokens_seen": 242509928, "step": 3868 }, { "epoch": 12.87188019966722, "loss": 0.5128186941146851, "loss_ce": 1.337245748800342e-06, "loss_iou": 0.212890625, "loss_num": 0.0177001953125, "loss_xval": 0.51171875, "num_input_tokens_seen": 242509928, "step": 3868 }, { "epoch": 12.875207986688851, "grad_norm": 24.34088134765625, "learning_rate": 5e-06, "loss": 0.4929, "num_input_tokens_seen": 242573840, "step": 3869 }, { "epoch": 12.875207986688851, "loss": 0.6067729592323303, "loss_ce": 8.350206189788878e-05, "loss_iou": 0.21875, "loss_num": 0.0341796875, "loss_xval": 0.60546875, "num_input_tokens_seen": 242573840, "step": 3869 }, { "epoch": 12.878535773710482, "grad_norm": 16.218822479248047, "learning_rate": 5e-06, "loss": 0.4746, "num_input_tokens_seen": 242636412, "step": 3870 }, { "epoch": 12.878535773710482, "loss": 0.42644810676574707, "loss_ce": 2.5973338779294863e-05, "loss_iou": 0.1552734375, "loss_num": 0.023193359375, "loss_xval": 0.42578125, "num_input_tokens_seen": 242636412, "step": 3870 }, { "epoch": 12.881863560732112, "grad_norm": 12.35728931427002, "learning_rate": 5e-06, "loss": 0.5317, "num_input_tokens_seen": 242699820, "step": 3871 }, { "epoch": 12.881863560732112, "loss": 0.6263703107833862, "loss_ce": 0.00014964889851398766, "loss_iou": 0.263671875, "loss_num": 0.01953125, "loss_xval": 0.625, "num_input_tokens_seen": 242699820, "step": 3871 }, { "epoch": 12.885191347753743, "grad_norm": 12.223444938659668, "learning_rate": 5e-06, "loss": 0.5809, "num_input_tokens_seen": 242760544, "step": 3872 }, { "epoch": 12.885191347753743, "loss": 0.5957051515579224, "loss_ce": 2.0003899408038706e-06, "loss_iou": 0.2177734375, "loss_num": 0.031982421875, "loss_xval": 0.59375, "num_input_tokens_seen": 242760544, "step": 3872 }, { "epoch": 12.888519134775374, "grad_norm": 20.478775024414062, "learning_rate": 5e-06, "loss": 0.4596, "num_input_tokens_seen": 242823044, "step": 3873 }, { "epoch": 12.888519134775374, "loss": 0.434076189994812, "loss_ce": 2.4683296942384914e-05, "loss_iou": 0.171875, "loss_num": 0.01806640625, "loss_xval": 0.43359375, "num_input_tokens_seen": 242823044, "step": 3873 }, { "epoch": 12.891846921797004, "grad_norm": 9.702995300292969, "learning_rate": 5e-06, "loss": 0.6371, "num_input_tokens_seen": 242886728, "step": 3874 }, { "epoch": 12.891846921797004, "loss": 0.5846003293991089, "loss_ce": 5.639709343085997e-06, "loss_iou": 0.2158203125, "loss_num": 0.03076171875, "loss_xval": 0.5859375, "num_input_tokens_seen": 242886728, "step": 3874 }, { "epoch": 12.895174708818635, "grad_norm": 9.463374137878418, "learning_rate": 5e-06, "loss": 0.5235, "num_input_tokens_seen": 242948892, "step": 3875 }, { "epoch": 12.895174708818635, "loss": 0.3809531331062317, "loss_ce": 2.201334609708283e-06, "loss_iou": 0.150390625, "loss_num": 0.01611328125, "loss_xval": 0.380859375, "num_input_tokens_seen": 242948892, "step": 3875 }, { "epoch": 12.898502495840265, "grad_norm": 23.592641830444336, "learning_rate": 5e-06, "loss": 0.4752, "num_input_tokens_seen": 243010752, "step": 3876 }, { "epoch": 12.898502495840265, "loss": 0.4637471139431, "loss_ce": 1.9773785879806383e-06, "loss_iou": 0.16796875, "loss_num": 0.0255126953125, "loss_xval": 0.462890625, "num_input_tokens_seen": 243010752, "step": 3876 }, { "epoch": 12.901830282861896, "grad_norm": 16.6806583404541, "learning_rate": 5e-06, "loss": 0.4533, "num_input_tokens_seen": 243073728, "step": 3877 }, { "epoch": 12.901830282861896, "loss": 0.2863641381263733, "loss_ce": 2.435194346617209e-06, "loss_iou": 0.08740234375, "loss_num": 0.0223388671875, "loss_xval": 0.287109375, "num_input_tokens_seen": 243073728, "step": 3877 }, { "epoch": 12.905158069883527, "grad_norm": 23.699617385864258, "learning_rate": 5e-06, "loss": 0.4196, "num_input_tokens_seen": 243135952, "step": 3878 }, { "epoch": 12.905158069883527, "loss": 0.46462225914001465, "loss_ce": 5.317965042195283e-05, "loss_iou": 0.158203125, "loss_num": 0.02978515625, "loss_xval": 0.46484375, "num_input_tokens_seen": 243135952, "step": 3878 }, { "epoch": 12.908485856905157, "grad_norm": 24.5529842376709, "learning_rate": 5e-06, "loss": 0.6149, "num_input_tokens_seen": 243197236, "step": 3879 }, { "epoch": 12.908485856905157, "loss": 0.572481632232666, "loss_ce": 0.0008873940678313375, "loss_iou": 0.154296875, "loss_num": 0.052490234375, "loss_xval": 0.5703125, "num_input_tokens_seen": 243197236, "step": 3879 }, { "epoch": 12.911813643926788, "grad_norm": 12.644448280334473, "learning_rate": 5e-06, "loss": 0.4853, "num_input_tokens_seen": 243260280, "step": 3880 }, { "epoch": 12.911813643926788, "loss": 0.614421010017395, "loss_ce": 4.113077375222929e-05, "loss_iou": 0.232421875, "loss_num": 0.0296630859375, "loss_xval": 0.61328125, "num_input_tokens_seen": 243260280, "step": 3880 }, { "epoch": 12.915141430948418, "grad_norm": 18.124326705932617, "learning_rate": 5e-06, "loss": 0.4629, "num_input_tokens_seen": 243324624, "step": 3881 }, { "epoch": 12.915141430948418, "loss": 0.4482647180557251, "loss_ce": 0.00014461397950071841, "loss_iou": 0.193359375, "loss_num": 0.0123291015625, "loss_xval": 0.447265625, "num_input_tokens_seen": 243324624, "step": 3881 }, { "epoch": 12.918469217970049, "grad_norm": 13.521050453186035, "learning_rate": 5e-06, "loss": 0.5191, "num_input_tokens_seen": 243387100, "step": 3882 }, { "epoch": 12.918469217970049, "loss": 0.5521883964538574, "loss_ce": 3.3404878649889724e-06, "loss_iou": 0.2109375, "loss_num": 0.0262451171875, "loss_xval": 0.55078125, "num_input_tokens_seen": 243387100, "step": 3882 }, { "epoch": 12.92179700499168, "grad_norm": 9.821001052856445, "learning_rate": 5e-06, "loss": 0.5966, "num_input_tokens_seen": 243450292, "step": 3883 }, { "epoch": 12.92179700499168, "loss": 0.4426358640193939, "loss_ce": 0.000863413093611598, "loss_iou": 0.1533203125, "loss_num": 0.0272216796875, "loss_xval": 0.44140625, "num_input_tokens_seen": 243450292, "step": 3883 }, { "epoch": 12.92512479201331, "grad_norm": 6.517783164978027, "learning_rate": 5e-06, "loss": 0.413, "num_input_tokens_seen": 243513892, "step": 3884 }, { "epoch": 12.92512479201331, "loss": 0.5594608783721924, "loss_ce": 1.2639008673431817e-05, "loss_iou": 0.228515625, "loss_num": 0.0206298828125, "loss_xval": 0.55859375, "num_input_tokens_seen": 243513892, "step": 3884 }, { "epoch": 12.928452579034941, "grad_norm": 9.376770973205566, "learning_rate": 5e-06, "loss": 0.6276, "num_input_tokens_seen": 243576616, "step": 3885 }, { "epoch": 12.928452579034941, "loss": 0.8109264373779297, "loss_ce": 0.00013542332453653216, "loss_iou": 0.326171875, "loss_num": 0.031982421875, "loss_xval": 0.8125, "num_input_tokens_seen": 243576616, "step": 3885 }, { "epoch": 12.931780366056572, "grad_norm": 17.0826358795166, "learning_rate": 5e-06, "loss": 0.3245, "num_input_tokens_seen": 243640664, "step": 3886 }, { "epoch": 12.931780366056572, "loss": 0.1949598342180252, "loss_ce": 1.3537921404349618e-05, "loss_iou": 0.06640625, "loss_num": 0.01239013671875, "loss_xval": 0.1953125, "num_input_tokens_seen": 243640664, "step": 3886 }, { "epoch": 12.935108153078202, "grad_norm": 21.12853240966797, "learning_rate": 5e-06, "loss": 0.5557, "num_input_tokens_seen": 243703532, "step": 3887 }, { "epoch": 12.935108153078202, "loss": 0.4188096225261688, "loss_ce": 1.6302465155604295e-06, "loss_iou": 0.169921875, "loss_num": 0.015869140625, "loss_xval": 0.41796875, "num_input_tokens_seen": 243703532, "step": 3887 }, { "epoch": 12.938435940099833, "grad_norm": 15.669259071350098, "learning_rate": 5e-06, "loss": 0.37, "num_input_tokens_seen": 243766372, "step": 3888 }, { "epoch": 12.938435940099833, "loss": 0.4198063611984253, "loss_ce": 6.524903710669605e-06, "loss_iou": 0.154296875, "loss_num": 0.022216796875, "loss_xval": 0.419921875, "num_input_tokens_seen": 243766372, "step": 3888 }, { "epoch": 12.941763727121465, "grad_norm": 10.95678997039795, "learning_rate": 5e-06, "loss": 0.3326, "num_input_tokens_seen": 243828920, "step": 3889 }, { "epoch": 12.941763727121465, "loss": 0.30346885323524475, "loss_ce": 2.0517259144980926e-06, "loss_iou": 0.10107421875, "loss_num": 0.0201416015625, "loss_xval": 0.302734375, "num_input_tokens_seen": 243828920, "step": 3889 }, { "epoch": 12.945091514143094, "grad_norm": 12.914162635803223, "learning_rate": 5e-06, "loss": 0.4751, "num_input_tokens_seen": 243890488, "step": 3890 }, { "epoch": 12.945091514143094, "loss": 0.49070829153060913, "loss_ce": 4.669004920287989e-05, "loss_iou": 0.1796875, "loss_num": 0.0262451171875, "loss_xval": 0.490234375, "num_input_tokens_seen": 243890488, "step": 3890 }, { "epoch": 12.948419301164726, "grad_norm": 19.47538185119629, "learning_rate": 5e-06, "loss": 0.4529, "num_input_tokens_seen": 243952776, "step": 3891 }, { "epoch": 12.948419301164726, "loss": 0.23497374355793, "loss_ce": 0.0005377225461415946, "loss_iou": 0.06201171875, "loss_num": 0.0220947265625, "loss_xval": 0.234375, "num_input_tokens_seen": 243952776, "step": 3891 }, { "epoch": 12.951747088186355, "grad_norm": 35.087554931640625, "learning_rate": 5e-06, "loss": 0.5815, "num_input_tokens_seen": 244016720, "step": 3892 }, { "epoch": 12.951747088186355, "loss": 0.7304743528366089, "loss_ce": 5.598939424089622e-06, "loss_iou": 0.2734375, "loss_num": 0.036376953125, "loss_xval": 0.73046875, "num_input_tokens_seen": 244016720, "step": 3892 }, { "epoch": 12.955074875207988, "grad_norm": 28.222410202026367, "learning_rate": 5e-06, "loss": 0.4747, "num_input_tokens_seen": 244079060, "step": 3893 }, { "epoch": 12.955074875207988, "loss": 0.5615221858024597, "loss_ce": 0.00036497320979833603, "loss_iou": 0.2060546875, "loss_num": 0.02978515625, "loss_xval": 0.5625, "num_input_tokens_seen": 244079060, "step": 3893 }, { "epoch": 12.958402662229616, "grad_norm": 13.769379615783691, "learning_rate": 5e-06, "loss": 0.5504, "num_input_tokens_seen": 244143116, "step": 3894 }, { "epoch": 12.958402662229616, "loss": 0.5052711963653564, "loss_ce": 0.00011370135325705633, "loss_iou": 0.21875, "loss_num": 0.0135498046875, "loss_xval": 0.50390625, "num_input_tokens_seen": 244143116, "step": 3894 }, { "epoch": 12.961730449251249, "grad_norm": 20.534709930419922, "learning_rate": 5e-06, "loss": 0.3114, "num_input_tokens_seen": 244205168, "step": 3895 }, { "epoch": 12.961730449251249, "loss": 0.22433580458164215, "loss_ce": 1.082078028957767e-06, "loss_iou": 0.06640625, "loss_num": 0.018310546875, "loss_xval": 0.224609375, "num_input_tokens_seen": 244205168, "step": 3895 }, { "epoch": 12.965058236272878, "grad_norm": 22.39807891845703, "learning_rate": 5e-06, "loss": 0.5488, "num_input_tokens_seen": 244268244, "step": 3896 }, { "epoch": 12.965058236272878, "loss": 0.6031724810600281, "loss_ce": 2.3082706320565194e-05, "loss_iou": 0.255859375, "loss_num": 0.0179443359375, "loss_xval": 0.6015625, "num_input_tokens_seen": 244268244, "step": 3896 }, { "epoch": 12.96838602329451, "grad_norm": 21.176443099975586, "learning_rate": 5e-06, "loss": 0.5601, "num_input_tokens_seen": 244328756, "step": 3897 }, { "epoch": 12.96838602329451, "loss": 0.47897663712501526, "loss_ce": 0.0001863425859482959, "loss_iou": 0.18359375, "loss_num": 0.022216796875, "loss_xval": 0.478515625, "num_input_tokens_seen": 244328756, "step": 3897 }, { "epoch": 12.971713810316139, "grad_norm": 11.698783874511719, "learning_rate": 5e-06, "loss": 0.4446, "num_input_tokens_seen": 244391168, "step": 3898 }, { "epoch": 12.971713810316139, "loss": 0.38678663969039917, "loss_ce": 6.835179647168843e-06, "loss_iou": 0.140625, "loss_num": 0.0211181640625, "loss_xval": 0.38671875, "num_input_tokens_seen": 244391168, "step": 3898 }, { "epoch": 12.975041597337771, "grad_norm": 17.386978149414062, "learning_rate": 5e-06, "loss": 0.5398, "num_input_tokens_seen": 244454268, "step": 3899 }, { "epoch": 12.975041597337771, "loss": 0.32611697912216187, "loss_ce": 6.114146799518494e-06, "loss_iou": 0.12353515625, "loss_num": 0.0157470703125, "loss_xval": 0.326171875, "num_input_tokens_seen": 244454268, "step": 3899 }, { "epoch": 12.9783693843594, "grad_norm": 16.915910720825195, "learning_rate": 5e-06, "loss": 0.4612, "num_input_tokens_seen": 244517032, "step": 3900 }, { "epoch": 12.9783693843594, "loss": 0.3920668959617615, "loss_ce": 0.00016014455468393862, "loss_iou": 0.14453125, "loss_num": 0.0206298828125, "loss_xval": 0.392578125, "num_input_tokens_seen": 244517032, "step": 3900 }, { "epoch": 12.981697171381033, "grad_norm": 19.576353073120117, "learning_rate": 5e-06, "loss": 0.4469, "num_input_tokens_seen": 244580060, "step": 3901 }, { "epoch": 12.981697171381033, "loss": 0.5471581220626831, "loss_ce": 0.00019159713701810688, "loss_iou": 0.224609375, "loss_num": 0.01953125, "loss_xval": 0.546875, "num_input_tokens_seen": 244580060, "step": 3901 }, { "epoch": 12.985024958402661, "grad_norm": 32.75544357299805, "learning_rate": 5e-06, "loss": 0.5476, "num_input_tokens_seen": 244642708, "step": 3902 }, { "epoch": 12.985024958402661, "loss": 0.4739565849304199, "loss_ce": 7.96155072748661e-05, "loss_iou": 0.1767578125, "loss_num": 0.0240478515625, "loss_xval": 0.474609375, "num_input_tokens_seen": 244642708, "step": 3902 }, { "epoch": 12.988352745424294, "grad_norm": 20.57472038269043, "learning_rate": 5e-06, "loss": 0.4764, "num_input_tokens_seen": 244703144, "step": 3903 }, { "epoch": 12.988352745424294, "loss": 0.4455580711364746, "loss_ce": 1.4273629176386748e-06, "loss_iou": 0.1796875, "loss_num": 0.0172119140625, "loss_xval": 0.4453125, "num_input_tokens_seen": 244703144, "step": 3903 }, { "epoch": 12.991680532445923, "grad_norm": 7.146425247192383, "learning_rate": 5e-06, "loss": 0.4013, "num_input_tokens_seen": 244766240, "step": 3904 }, { "epoch": 12.991680532445923, "loss": 0.44959622621536255, "loss_ce": 1.1299454854452051e-05, "loss_iou": 0.1455078125, "loss_num": 0.031982421875, "loss_xval": 0.44921875, "num_input_tokens_seen": 244766240, "step": 3904 }, { "epoch": 12.995008319467555, "grad_norm": 17.255502700805664, "learning_rate": 5e-06, "loss": 0.5696, "num_input_tokens_seen": 244828876, "step": 3905 }, { "epoch": 12.995008319467555, "loss": 0.4636303186416626, "loss_ce": 7.2486186581954826e-06, "loss_iou": 0.1689453125, "loss_num": 0.0250244140625, "loss_xval": 0.462890625, "num_input_tokens_seen": 244828876, "step": 3905 }, { "epoch": 12.998336106489184, "grad_norm": 17.230485916137695, "learning_rate": 5e-06, "loss": 0.3981, "num_input_tokens_seen": 244891816, "step": 3906 }, { "epoch": 12.998336106489184, "loss": 0.42627081274986267, "loss_ce": 1.2942764442414045e-06, "loss_iou": 0.16015625, "loss_num": 0.0211181640625, "loss_xval": 0.42578125, "num_input_tokens_seen": 244891816, "step": 3906 }, { "epoch": 12.998336106489184, "loss": 0.43797436356544495, "loss_ce": 1.3628450687974691e-06, "loss_iou": 0.1533203125, "loss_num": 0.0262451171875, "loss_xval": 0.4375, "num_input_tokens_seen": 244922584, "step": 3906 }, { "epoch": 13.001663893510816, "grad_norm": 11.583531379699707, "learning_rate": 5e-06, "loss": 0.3932, "num_input_tokens_seen": 244954316, "step": 3907 }, { "epoch": 13.001663893510816, "loss": 0.3485146164894104, "loss_ce": 0.002262154594063759, "loss_iou": 0.1181640625, "loss_num": 0.0218505859375, "loss_xval": 0.345703125, "num_input_tokens_seen": 244954316, "step": 3907 }, { "epoch": 13.004991680532447, "grad_norm": 6.606874942779541, "learning_rate": 5e-06, "loss": 0.4081, "num_input_tokens_seen": 245018004, "step": 3908 }, { "epoch": 13.004991680532447, "loss": 0.3219027519226074, "loss_ce": 3.345622644701507e-06, "loss_iou": 0.1318359375, "loss_num": 0.01153564453125, "loss_xval": 0.322265625, "num_input_tokens_seen": 245018004, "step": 3908 }, { "epoch": 13.008319467554077, "grad_norm": 14.633254051208496, "learning_rate": 5e-06, "loss": 0.4985, "num_input_tokens_seen": 245082360, "step": 3909 }, { "epoch": 13.008319467554077, "loss": 0.4757797122001648, "loss_ce": 1.0657757229637355e-05, "loss_iou": 0.19921875, "loss_num": 0.015625, "loss_xval": 0.4765625, "num_input_tokens_seen": 245082360, "step": 3909 }, { "epoch": 13.011647254575708, "grad_norm": 26.70598602294922, "learning_rate": 5e-06, "loss": 0.4798, "num_input_tokens_seen": 245145984, "step": 3910 }, { "epoch": 13.011647254575708, "loss": 0.5029640793800354, "loss_ce": 3.4380165743641555e-05, "loss_iou": 0.185546875, "loss_num": 0.026123046875, "loss_xval": 0.50390625, "num_input_tokens_seen": 245145984, "step": 3910 }, { "epoch": 13.014975041597339, "grad_norm": 23.014808654785156, "learning_rate": 5e-06, "loss": 0.4504, "num_input_tokens_seen": 245207908, "step": 3911 }, { "epoch": 13.014975041597339, "loss": 0.33215630054473877, "loss_ce": 2.9613902370329015e-06, "loss_iou": 0.11767578125, "loss_num": 0.019287109375, "loss_xval": 0.33203125, "num_input_tokens_seen": 245207908, "step": 3911 }, { "epoch": 13.01830282861897, "grad_norm": 11.95120906829834, "learning_rate": 5e-06, "loss": 0.6268, "num_input_tokens_seen": 245269320, "step": 3912 }, { "epoch": 13.01830282861897, "loss": 0.5090344548225403, "loss_ce": 1.2384440424284548e-06, "loss_iou": 0.1904296875, "loss_num": 0.0255126953125, "loss_xval": 0.5078125, "num_input_tokens_seen": 245269320, "step": 3912 }, { "epoch": 13.0216306156406, "grad_norm": 9.519312858581543, "learning_rate": 5e-06, "loss": 0.376, "num_input_tokens_seen": 245331080, "step": 3913 }, { "epoch": 13.0216306156406, "loss": 0.32593345642089844, "loss_ce": 5.6922681324067526e-06, "loss_iou": 0.126953125, "loss_num": 0.01422119140625, "loss_xval": 0.326171875, "num_input_tokens_seen": 245331080, "step": 3913 }, { "epoch": 13.02495840266223, "grad_norm": 15.73210620880127, "learning_rate": 5e-06, "loss": 0.5047, "num_input_tokens_seen": 245393904, "step": 3914 }, { "epoch": 13.02495840266223, "loss": 0.6040130853652954, "loss_ce": 9.22907202038914e-06, "loss_iou": 0.232421875, "loss_num": 0.02783203125, "loss_xval": 0.60546875, "num_input_tokens_seen": 245393904, "step": 3914 }, { "epoch": 13.028286189683861, "grad_norm": 19.225013732910156, "learning_rate": 5e-06, "loss": 0.499, "num_input_tokens_seen": 245456908, "step": 3915 }, { "epoch": 13.028286189683861, "loss": 0.3258955776691437, "loss_ce": 1.3630517969431821e-05, "loss_iou": 0.12890625, "loss_num": 0.013671875, "loss_xval": 0.326171875, "num_input_tokens_seen": 245456908, "step": 3915 }, { "epoch": 13.031613976705492, "grad_norm": 27.990089416503906, "learning_rate": 5e-06, "loss": 0.5592, "num_input_tokens_seen": 245519320, "step": 3916 }, { "epoch": 13.031613976705492, "loss": 0.6405162811279297, "loss_ce": 1.3355858754948713e-05, "loss_iou": 0.23046875, "loss_num": 0.0361328125, "loss_xval": 0.640625, "num_input_tokens_seen": 245519320, "step": 3916 }, { "epoch": 13.034941763727122, "grad_norm": 23.98921775817871, "learning_rate": 5e-06, "loss": 0.6694, "num_input_tokens_seen": 245582640, "step": 3917 }, { "epoch": 13.034941763727122, "loss": 0.8093380928039551, "loss_ce": 0.00019503738440107554, "loss_iou": 0.30859375, "loss_num": 0.038818359375, "loss_xval": 0.80859375, "num_input_tokens_seen": 245582640, "step": 3917 }, { "epoch": 13.038269550748753, "grad_norm": 25.511415481567383, "learning_rate": 5e-06, "loss": 0.5017, "num_input_tokens_seen": 245645368, "step": 3918 }, { "epoch": 13.038269550748753, "loss": 0.4655778408050537, "loss_ce": 1.662037220739876e-06, "loss_iou": 0.2021484375, "loss_num": 0.01226806640625, "loss_xval": 0.46484375, "num_input_tokens_seen": 245645368, "step": 3918 }, { "epoch": 13.041597337770384, "grad_norm": 27.116680145263672, "learning_rate": 5e-06, "loss": 0.5733, "num_input_tokens_seen": 245708832, "step": 3919 }, { "epoch": 13.041597337770384, "loss": 0.5809359550476074, "loss_ce": 3.3253404581046198e-06, "loss_iou": 0.228515625, "loss_num": 0.025146484375, "loss_xval": 0.58203125, "num_input_tokens_seen": 245708832, "step": 3919 }, { "epoch": 13.044925124792014, "grad_norm": 20.32222557067871, "learning_rate": 5e-06, "loss": 0.2909, "num_input_tokens_seen": 245772124, "step": 3920 }, { "epoch": 13.044925124792014, "loss": 0.2705526649951935, "loss_ce": 4.485169120016508e-05, "loss_iou": 0.109375, "loss_num": 0.0103759765625, "loss_xval": 0.26953125, "num_input_tokens_seen": 245772124, "step": 3920 }, { "epoch": 13.048252911813645, "grad_norm": 25.511484146118164, "learning_rate": 5e-06, "loss": 0.5037, "num_input_tokens_seen": 245834152, "step": 3921 }, { "epoch": 13.048252911813645, "loss": 0.5819467902183533, "loss_ce": 0.00015967852959875017, "loss_iou": 0.21875, "loss_num": 0.0289306640625, "loss_xval": 0.58203125, "num_input_tokens_seen": 245834152, "step": 3921 }, { "epoch": 13.051580698835275, "grad_norm": 21.587182998657227, "learning_rate": 5e-06, "loss": 0.4481, "num_input_tokens_seen": 245896272, "step": 3922 }, { "epoch": 13.051580698835275, "loss": 0.6657172441482544, "loss_ce": 6.8211434154363815e-06, "loss_iou": 0.26171875, "loss_num": 0.028564453125, "loss_xval": 0.6640625, "num_input_tokens_seen": 245896272, "step": 3922 }, { "epoch": 13.054908485856906, "grad_norm": 19.926738739013672, "learning_rate": 5e-06, "loss": 0.4204, "num_input_tokens_seen": 245959044, "step": 3923 }, { "epoch": 13.054908485856906, "loss": 0.5488715171813965, "loss_ce": 0.0003180096682626754, "loss_iou": 0.220703125, "loss_num": 0.021240234375, "loss_xval": 0.546875, "num_input_tokens_seen": 245959044, "step": 3923 }, { "epoch": 13.058236272878537, "grad_norm": 22.1927433013916, "learning_rate": 5e-06, "loss": 0.5842, "num_input_tokens_seen": 246021324, "step": 3924 }, { "epoch": 13.058236272878537, "loss": 0.7203502655029297, "loss_ce": 7.440536137437448e-05, "loss_iou": 0.29296875, "loss_num": 0.0264892578125, "loss_xval": 0.71875, "num_input_tokens_seen": 246021324, "step": 3924 }, { "epoch": 13.061564059900167, "grad_norm": 14.879684448242188, "learning_rate": 5e-06, "loss": 0.3883, "num_input_tokens_seen": 246082644, "step": 3925 }, { "epoch": 13.061564059900167, "loss": 0.202302947640419, "loss_ce": 1.923195441122516e-06, "loss_iou": 0.07421875, "loss_num": 0.0107421875, "loss_xval": 0.2021484375, "num_input_tokens_seen": 246082644, "step": 3925 }, { "epoch": 13.064891846921798, "grad_norm": 23.384220123291016, "learning_rate": 5e-06, "loss": 0.3208, "num_input_tokens_seen": 246144484, "step": 3926 }, { "epoch": 13.064891846921798, "loss": 0.372560054063797, "loss_ce": 1.4661437717222725e-06, "loss_iou": 0.1240234375, "loss_num": 0.0250244140625, "loss_xval": 0.373046875, "num_input_tokens_seen": 246144484, "step": 3926 }, { "epoch": 13.068219633943428, "grad_norm": 44.74557113647461, "learning_rate": 5e-06, "loss": 0.6887, "num_input_tokens_seen": 246208304, "step": 3927 }, { "epoch": 13.068219633943428, "loss": 0.7547788023948669, "loss_ce": 1.8108230506186374e-05, "loss_iou": 0.314453125, "loss_num": 0.02490234375, "loss_xval": 0.75390625, "num_input_tokens_seen": 246208304, "step": 3927 }, { "epoch": 13.071547420965059, "grad_norm": 35.67026138305664, "learning_rate": 5e-06, "loss": 0.5352, "num_input_tokens_seen": 246272476, "step": 3928 }, { "epoch": 13.071547420965059, "loss": 0.400394469499588, "loss_ce": 3.841433681373019e-06, "loss_iou": 0.169921875, "loss_num": 0.0123291015625, "loss_xval": 0.400390625, "num_input_tokens_seen": 246272476, "step": 3928 }, { "epoch": 13.07487520798669, "grad_norm": 17.048898696899414, "learning_rate": 5e-06, "loss": 0.4124, "num_input_tokens_seen": 246333092, "step": 3929 }, { "epoch": 13.07487520798669, "loss": 0.4265062212944031, "loss_ce": 0.0004960771184414625, "loss_iou": 0.1279296875, "loss_num": 0.0341796875, "loss_xval": 0.42578125, "num_input_tokens_seen": 246333092, "step": 3929 }, { "epoch": 13.07820299500832, "grad_norm": 6.856128215789795, "learning_rate": 5e-06, "loss": 0.4896, "num_input_tokens_seen": 246396304, "step": 3930 }, { "epoch": 13.07820299500832, "loss": 0.35479897260665894, "loss_ce": 1.5831495829843334e-06, "loss_iou": 0.14453125, "loss_num": 0.0133056640625, "loss_xval": 0.35546875, "num_input_tokens_seen": 246396304, "step": 3930 }, { "epoch": 13.081530782029951, "grad_norm": 20.62779998779297, "learning_rate": 5e-06, "loss": 0.5909, "num_input_tokens_seen": 246458836, "step": 3931 }, { "epoch": 13.081530782029951, "loss": 0.36812859773635864, "loss_ce": 5.2300096285762265e-05, "loss_iou": 0.1376953125, "loss_num": 0.0185546875, "loss_xval": 0.3671875, "num_input_tokens_seen": 246458836, "step": 3931 }, { "epoch": 13.084858569051582, "grad_norm": 26.433717727661133, "learning_rate": 5e-06, "loss": 0.4447, "num_input_tokens_seen": 246521620, "step": 3932 }, { "epoch": 13.084858569051582, "loss": 0.43731409311294556, "loss_ce": 0.00036344374530017376, "loss_iou": 0.18359375, "loss_num": 0.013671875, "loss_xval": 0.4375, "num_input_tokens_seen": 246521620, "step": 3932 }, { "epoch": 13.088186356073212, "grad_norm": 19.541790008544922, "learning_rate": 5e-06, "loss": 0.5147, "num_input_tokens_seen": 246584648, "step": 3933 }, { "epoch": 13.088186356073212, "loss": 0.45338475704193115, "loss_ce": 1.560973578307312e-05, "loss_iou": 0.177734375, "loss_num": 0.0194091796875, "loss_xval": 0.453125, "num_input_tokens_seen": 246584648, "step": 3933 }, { "epoch": 13.091514143094843, "grad_norm": 15.987112998962402, "learning_rate": 5e-06, "loss": 0.4363, "num_input_tokens_seen": 246645516, "step": 3934 }, { "epoch": 13.091514143094843, "loss": 0.3282424509525299, "loss_ce": 3.352684507262893e-05, "loss_iou": 0.0859375, "loss_num": 0.031494140625, "loss_xval": 0.328125, "num_input_tokens_seen": 246645516, "step": 3934 }, { "epoch": 13.094841930116473, "grad_norm": 12.495582580566406, "learning_rate": 5e-06, "loss": 0.3378, "num_input_tokens_seen": 246708264, "step": 3935 }, { "epoch": 13.094841930116473, "loss": 0.253867506980896, "loss_ce": 7.026177627267316e-06, "loss_iou": 0.0947265625, "loss_num": 0.0128173828125, "loss_xval": 0.25390625, "num_input_tokens_seen": 246708264, "step": 3935 }, { "epoch": 13.098169717138104, "grad_norm": 11.117823600769043, "learning_rate": 5e-06, "loss": 0.5555, "num_input_tokens_seen": 246771432, "step": 3936 }, { "epoch": 13.098169717138104, "loss": 0.48598712682724, "loss_ce": 0.0008796825422905385, "loss_iou": 0.2041015625, "loss_num": 0.01531982421875, "loss_xval": 0.484375, "num_input_tokens_seen": 246771432, "step": 3936 }, { "epoch": 13.101497504159735, "grad_norm": 14.354990005493164, "learning_rate": 5e-06, "loss": 0.4501, "num_input_tokens_seen": 246833456, "step": 3937 }, { "epoch": 13.101497504159735, "loss": 0.5306648015975952, "loss_ce": 2.512578430469148e-05, "loss_iou": 0.2099609375, "loss_num": 0.0220947265625, "loss_xval": 0.53125, "num_input_tokens_seen": 246833456, "step": 3937 }, { "epoch": 13.104825291181365, "grad_norm": 22.855907440185547, "learning_rate": 5e-06, "loss": 0.48, "num_input_tokens_seen": 246897244, "step": 3938 }, { "epoch": 13.104825291181365, "loss": 0.5190008878707886, "loss_ce": 7.997643115231767e-05, "loss_iou": 0.2197265625, "loss_num": 0.0157470703125, "loss_xval": 0.51953125, "num_input_tokens_seen": 246897244, "step": 3938 }, { "epoch": 13.108153078202996, "grad_norm": 43.62656784057617, "learning_rate": 5e-06, "loss": 0.5481, "num_input_tokens_seen": 246961160, "step": 3939 }, { "epoch": 13.108153078202996, "loss": 0.4798605144023895, "loss_ce": 2.1040714273112826e-06, "loss_iou": 0.201171875, "loss_num": 0.015625, "loss_xval": 0.48046875, "num_input_tokens_seen": 246961160, "step": 3939 }, { "epoch": 13.111480865224626, "grad_norm": 32.8066291809082, "learning_rate": 5e-06, "loss": 0.3781, "num_input_tokens_seen": 247024756, "step": 3940 }, { "epoch": 13.111480865224626, "loss": 0.2658957839012146, "loss_ce": 0.000453901884611696, "loss_iou": 0.109375, "loss_num": 0.00927734375, "loss_xval": 0.265625, "num_input_tokens_seen": 247024756, "step": 3940 }, { "epoch": 13.114808652246257, "grad_norm": 22.72580337524414, "learning_rate": 5e-06, "loss": 0.4553, "num_input_tokens_seen": 247087492, "step": 3941 }, { "epoch": 13.114808652246257, "loss": 0.5884561538696289, "loss_ce": 9.296596772401244e-07, "loss_iou": 0.220703125, "loss_num": 0.0294189453125, "loss_xval": 0.58984375, "num_input_tokens_seen": 247087492, "step": 3941 }, { "epoch": 13.118136439267888, "grad_norm": 13.90997314453125, "learning_rate": 5e-06, "loss": 0.4496, "num_input_tokens_seen": 247150944, "step": 3942 }, { "epoch": 13.118136439267888, "loss": 0.3897212743759155, "loss_ce": 1.1818842722277623e-05, "loss_iou": 0.1474609375, "loss_num": 0.01904296875, "loss_xval": 0.390625, "num_input_tokens_seen": 247150944, "step": 3942 }, { "epoch": 13.121464226289518, "grad_norm": 11.13926887512207, "learning_rate": 5e-06, "loss": 0.2895, "num_input_tokens_seen": 247210624, "step": 3943 }, { "epoch": 13.121464226289518, "loss": 0.3165774345397949, "loss_ce": 3.3337917102471692e-06, "loss_iou": 0.09228515625, "loss_num": 0.0264892578125, "loss_xval": 0.31640625, "num_input_tokens_seen": 247210624, "step": 3943 }, { "epoch": 13.124792013311149, "grad_norm": 10.88071346282959, "learning_rate": 5e-06, "loss": 0.2291, "num_input_tokens_seen": 247272328, "step": 3944 }, { "epoch": 13.124792013311149, "loss": 0.25840896368026733, "loss_ce": 1.3861446177543257e-06, "loss_iou": 0.1005859375, "loss_num": 0.0113525390625, "loss_xval": 0.2578125, "num_input_tokens_seen": 247272328, "step": 3944 }, { "epoch": 13.12811980033278, "grad_norm": 16.53541374206543, "learning_rate": 5e-06, "loss": 0.3375, "num_input_tokens_seen": 247334680, "step": 3945 }, { "epoch": 13.12811980033278, "loss": 0.4982917010784149, "loss_ce": 6.973386916797608e-07, "loss_iou": 0.203125, "loss_num": 0.0185546875, "loss_xval": 0.498046875, "num_input_tokens_seen": 247334680, "step": 3945 }, { "epoch": 13.13144758735441, "grad_norm": 12.867178916931152, "learning_rate": 5e-06, "loss": 0.4137, "num_input_tokens_seen": 247397452, "step": 3946 }, { "epoch": 13.13144758735441, "loss": 0.35758674144744873, "loss_ce": 1.2275111657800153e-05, "loss_iou": 0.146484375, "loss_num": 0.0128173828125, "loss_xval": 0.357421875, "num_input_tokens_seen": 247397452, "step": 3946 }, { "epoch": 13.13477537437604, "grad_norm": 23.651187896728516, "learning_rate": 5e-06, "loss": 0.559, "num_input_tokens_seen": 247460736, "step": 3947 }, { "epoch": 13.13477537437604, "loss": 0.41764387488365173, "loss_ce": 1.8473581803846173e-05, "loss_iou": 0.158203125, "loss_num": 0.02001953125, "loss_xval": 0.41796875, "num_input_tokens_seen": 247460736, "step": 3947 }, { "epoch": 13.138103161397671, "grad_norm": 23.116016387939453, "learning_rate": 5e-06, "loss": 0.5575, "num_input_tokens_seen": 247523620, "step": 3948 }, { "epoch": 13.138103161397671, "loss": 0.5645247101783752, "loss_ce": 7.154881313908845e-05, "loss_iou": 0.21875, "loss_num": 0.025634765625, "loss_xval": 0.5625, "num_input_tokens_seen": 247523620, "step": 3948 }, { "epoch": 13.141430948419302, "grad_norm": 26.280858993530273, "learning_rate": 5e-06, "loss": 0.542, "num_input_tokens_seen": 247586272, "step": 3949 }, { "epoch": 13.141430948419302, "loss": 0.6468560695648193, "loss_ce": 5.456648523249896e-06, "loss_iou": 0.2412109375, "loss_num": 0.032958984375, "loss_xval": 0.6484375, "num_input_tokens_seen": 247586272, "step": 3949 }, { "epoch": 13.144758735440933, "grad_norm": 33.63874816894531, "learning_rate": 5e-06, "loss": 0.6239, "num_input_tokens_seen": 247650408, "step": 3950 }, { "epoch": 13.144758735440933, "loss": 0.6477120518684387, "loss_ce": 6.955501248739893e-06, "loss_iou": 0.2333984375, "loss_num": 0.0361328125, "loss_xval": 0.6484375, "num_input_tokens_seen": 247650408, "step": 3950 }, { "epoch": 13.148086522462563, "grad_norm": 33.4152717590332, "learning_rate": 5e-06, "loss": 0.5505, "num_input_tokens_seen": 247714072, "step": 3951 }, { "epoch": 13.148086522462563, "loss": 0.6738759875297546, "loss_ce": 0.00010890807607211173, "loss_iou": 0.26171875, "loss_num": 0.030029296875, "loss_xval": 0.671875, "num_input_tokens_seen": 247714072, "step": 3951 }, { "epoch": 13.151414309484194, "grad_norm": 17.09111976623535, "learning_rate": 5e-06, "loss": 0.5359, "num_input_tokens_seen": 247777628, "step": 3952 }, { "epoch": 13.151414309484194, "loss": 0.5676301121711731, "loss_ce": 3.15324768962455e-06, "loss_iou": 0.208984375, "loss_num": 0.0299072265625, "loss_xval": 0.56640625, "num_input_tokens_seen": 247777628, "step": 3952 }, { "epoch": 13.154742096505824, "grad_norm": 24.867462158203125, "learning_rate": 5e-06, "loss": 0.6465, "num_input_tokens_seen": 247840676, "step": 3953 }, { "epoch": 13.154742096505824, "loss": 0.7252205610275269, "loss_ce": 8.343219519701961e-07, "loss_iou": 0.259765625, "loss_num": 0.041259765625, "loss_xval": 0.7265625, "num_input_tokens_seen": 247840676, "step": 3953 }, { "epoch": 13.158069883527455, "grad_norm": 16.156150817871094, "learning_rate": 5e-06, "loss": 0.4081, "num_input_tokens_seen": 247901184, "step": 3954 }, { "epoch": 13.158069883527455, "loss": 0.22943337261676788, "loss_ce": 2.225777052444755e-06, "loss_iou": 0.07177734375, "loss_num": 0.0172119140625, "loss_xval": 0.2294921875, "num_input_tokens_seen": 247901184, "step": 3954 }, { "epoch": 13.161397670549086, "grad_norm": 8.741374015808105, "learning_rate": 5e-06, "loss": 0.5315, "num_input_tokens_seen": 247963140, "step": 3955 }, { "epoch": 13.161397670549086, "loss": 0.5163595080375671, "loss_ce": 2.1104287952766754e-06, "loss_iou": 0.193359375, "loss_num": 0.026123046875, "loss_xval": 0.515625, "num_input_tokens_seen": 247963140, "step": 3955 }, { "epoch": 13.164725457570716, "grad_norm": 7.948845863342285, "learning_rate": 5e-06, "loss": 0.5194, "num_input_tokens_seen": 248025880, "step": 3956 }, { "epoch": 13.164725457570716, "loss": 0.49402129650115967, "loss_ce": 2.7312839847581927e-06, "loss_iou": 0.201171875, "loss_num": 0.0184326171875, "loss_xval": 0.494140625, "num_input_tokens_seen": 248025880, "step": 3956 }, { "epoch": 13.168053244592347, "grad_norm": 6.40903902053833, "learning_rate": 5e-06, "loss": 0.3279, "num_input_tokens_seen": 248088272, "step": 3957 }, { "epoch": 13.168053244592347, "loss": 0.4793049693107605, "loss_ce": 0.00017900583043228835, "loss_iou": 0.177734375, "loss_num": 0.0247802734375, "loss_xval": 0.478515625, "num_input_tokens_seen": 248088272, "step": 3957 }, { "epoch": 13.171381031613977, "grad_norm": 16.162424087524414, "learning_rate": 5e-06, "loss": 0.6063, "num_input_tokens_seen": 248150684, "step": 3958 }, { "epoch": 13.171381031613977, "loss": 0.8040172457695007, "loss_ce": 1.1276438272034284e-06, "loss_iou": 0.326171875, "loss_num": 0.030029296875, "loss_xval": 0.8046875, "num_input_tokens_seen": 248150684, "step": 3958 }, { "epoch": 13.174708818635608, "grad_norm": 37.79758071899414, "learning_rate": 5e-06, "loss": 0.3801, "num_input_tokens_seen": 248214360, "step": 3959 }, { "epoch": 13.174708818635608, "loss": 0.3806212246417999, "loss_ce": 5.994595539959846e-06, "loss_iou": 0.15234375, "loss_num": 0.01544189453125, "loss_xval": 0.380859375, "num_input_tokens_seen": 248214360, "step": 3959 }, { "epoch": 13.178036605657239, "grad_norm": 23.977142333984375, "learning_rate": 5e-06, "loss": 0.6113, "num_input_tokens_seen": 248278512, "step": 3960 }, { "epoch": 13.178036605657239, "loss": 0.5216233730316162, "loss_ce": 1.6928719560382888e-05, "loss_iou": 0.181640625, "loss_num": 0.031494140625, "loss_xval": 0.5234375, "num_input_tokens_seen": 248278512, "step": 3960 }, { "epoch": 13.18136439267887, "grad_norm": 7.7176408767700195, "learning_rate": 5e-06, "loss": 0.2896, "num_input_tokens_seen": 248339496, "step": 3961 }, { "epoch": 13.18136439267887, "loss": 0.14673498272895813, "loss_ce": 6.465834303526208e-06, "loss_iou": 0.021728515625, "loss_num": 0.0206298828125, "loss_xval": 0.146484375, "num_input_tokens_seen": 248339496, "step": 3961 }, { "epoch": 13.1846921797005, "grad_norm": 9.193425178527832, "learning_rate": 5e-06, "loss": 0.2999, "num_input_tokens_seen": 248402720, "step": 3962 }, { "epoch": 13.1846921797005, "loss": 0.35376256704330444, "loss_ce": 2.7840073926199693e-06, "loss_iou": 0.1279296875, "loss_num": 0.0196533203125, "loss_xval": 0.353515625, "num_input_tokens_seen": 248402720, "step": 3962 }, { "epoch": 13.18801996672213, "grad_norm": 9.454279899597168, "learning_rate": 5e-06, "loss": 0.4408, "num_input_tokens_seen": 248465152, "step": 3963 }, { "epoch": 13.18801996672213, "loss": 0.44963961839675903, "loss_ce": 5.467256414704025e-05, "loss_iou": 0.1728515625, "loss_num": 0.02099609375, "loss_xval": 0.44921875, "num_input_tokens_seen": 248465152, "step": 3963 }, { "epoch": 13.191347753743761, "grad_norm": 12.704276084899902, "learning_rate": 5e-06, "loss": 0.3498, "num_input_tokens_seen": 248527828, "step": 3964 }, { "epoch": 13.191347753743761, "loss": 0.22354722023010254, "loss_ce": 5.97145162828383e-06, "loss_iou": 0.07373046875, "loss_num": 0.0152587890625, "loss_xval": 0.2236328125, "num_input_tokens_seen": 248527828, "step": 3964 }, { "epoch": 13.194675540765392, "grad_norm": 12.46349811553955, "learning_rate": 5e-06, "loss": 0.3903, "num_input_tokens_seen": 248590508, "step": 3965 }, { "epoch": 13.194675540765392, "loss": 0.46933677792549133, "loss_ce": 0.0002205812488682568, "loss_iou": 0.17578125, "loss_num": 0.0235595703125, "loss_xval": 0.46875, "num_input_tokens_seen": 248590508, "step": 3965 }, { "epoch": 13.198003327787022, "grad_norm": 22.677122116088867, "learning_rate": 5e-06, "loss": 0.5018, "num_input_tokens_seen": 248651592, "step": 3966 }, { "epoch": 13.198003327787022, "loss": 0.5930202007293701, "loss_ce": 2.645464974193601e-06, "loss_iou": 0.2041015625, "loss_num": 0.037109375, "loss_xval": 0.59375, "num_input_tokens_seen": 248651592, "step": 3966 }, { "epoch": 13.201331114808653, "grad_norm": 26.041736602783203, "learning_rate": 5e-06, "loss": 0.6773, "num_input_tokens_seen": 248715672, "step": 3967 }, { "epoch": 13.201331114808653, "loss": 0.675052285194397, "loss_ce": 3.4494532883400097e-06, "loss_iou": 0.259765625, "loss_num": 0.031494140625, "loss_xval": 0.67578125, "num_input_tokens_seen": 248715672, "step": 3967 }, { "epoch": 13.204658901830284, "grad_norm": 14.39686107635498, "learning_rate": 5e-06, "loss": 0.5454, "num_input_tokens_seen": 248779032, "step": 3968 }, { "epoch": 13.204658901830284, "loss": 0.5338184833526611, "loss_ce": 5.001632416679058e-06, "loss_iou": 0.21484375, "loss_num": 0.02099609375, "loss_xval": 0.53515625, "num_input_tokens_seen": 248779032, "step": 3968 }, { "epoch": 13.207986688851914, "grad_norm": 24.105783462524414, "learning_rate": 5e-06, "loss": 0.4011, "num_input_tokens_seen": 248840996, "step": 3969 }, { "epoch": 13.207986688851914, "loss": 0.28503525257110596, "loss_ce": 1.0836386081791716e-06, "loss_iou": 0.087890625, "loss_num": 0.0218505859375, "loss_xval": 0.28515625, "num_input_tokens_seen": 248840996, "step": 3969 }, { "epoch": 13.211314475873545, "grad_norm": 27.946157455444336, "learning_rate": 5e-06, "loss": 0.5789, "num_input_tokens_seen": 248905284, "step": 3970 }, { "epoch": 13.211314475873545, "loss": 0.648919939994812, "loss_ce": 0.0006654973258264363, "loss_iou": 0.283203125, "loss_num": 0.0166015625, "loss_xval": 0.6484375, "num_input_tokens_seen": 248905284, "step": 3970 }, { "epoch": 13.214642262895175, "grad_norm": 42.29182052612305, "learning_rate": 5e-06, "loss": 0.6088, "num_input_tokens_seen": 248968612, "step": 3971 }, { "epoch": 13.214642262895175, "loss": 0.6953861713409424, "loss_ce": 0.0001957314379978925, "loss_iou": 0.318359375, "loss_num": 0.0118408203125, "loss_xval": 0.6953125, "num_input_tokens_seen": 248968612, "step": 3971 }, { "epoch": 13.217970049916806, "grad_norm": 31.781221389770508, "learning_rate": 5e-06, "loss": 0.5198, "num_input_tokens_seen": 249031716, "step": 3972 }, { "epoch": 13.217970049916806, "loss": 0.5160942673683167, "loss_ce": 0.0004692665534093976, "loss_iou": 0.205078125, "loss_num": 0.0211181640625, "loss_xval": 0.515625, "num_input_tokens_seen": 249031716, "step": 3972 }, { "epoch": 13.221297836938437, "grad_norm": 10.687870025634766, "learning_rate": 5e-06, "loss": 0.5129, "num_input_tokens_seen": 249095652, "step": 3973 }, { "epoch": 13.221297836938437, "loss": 0.5044269561767578, "loss_ce": 1.961268026207108e-06, "loss_iou": 0.2265625, "loss_num": 0.01019287109375, "loss_xval": 0.50390625, "num_input_tokens_seen": 249095652, "step": 3973 }, { "epoch": 13.224625623960067, "grad_norm": 14.738362312316895, "learning_rate": 5e-06, "loss": 0.3877, "num_input_tokens_seen": 249158032, "step": 3974 }, { "epoch": 13.224625623960067, "loss": 0.46472322940826416, "loss_ce": 1.568457832945569e-06, "loss_iou": 0.201171875, "loss_num": 0.0126953125, "loss_xval": 0.46484375, "num_input_tokens_seen": 249158032, "step": 3974 }, { "epoch": 13.227953410981698, "grad_norm": 17.230342864990234, "learning_rate": 5e-06, "loss": 0.5375, "num_input_tokens_seen": 249221632, "step": 3975 }, { "epoch": 13.227953410981698, "loss": 0.5975486636161804, "loss_ce": 1.4466144421021454e-05, "loss_iou": 0.2138671875, "loss_num": 0.0341796875, "loss_xval": 0.59765625, "num_input_tokens_seen": 249221632, "step": 3975 }, { "epoch": 13.231281198003328, "grad_norm": 14.9638671875, "learning_rate": 5e-06, "loss": 0.5941, "num_input_tokens_seen": 249284696, "step": 3976 }, { "epoch": 13.231281198003328, "loss": 0.6124744415283203, "loss_ce": 0.00010873316205106676, "loss_iou": 0.25, "loss_num": 0.0224609375, "loss_xval": 0.61328125, "num_input_tokens_seen": 249284696, "step": 3976 }, { "epoch": 13.234608985024959, "grad_norm": 11.980072021484375, "learning_rate": 5e-06, "loss": 0.4784, "num_input_tokens_seen": 249346404, "step": 3977 }, { "epoch": 13.234608985024959, "loss": 0.7685558795928955, "loss_ce": 1.1471048537714523e-06, "loss_iou": 0.251953125, "loss_num": 0.05322265625, "loss_xval": 0.76953125, "num_input_tokens_seen": 249346404, "step": 3977 }, { "epoch": 13.23793677204659, "grad_norm": 7.391626358032227, "learning_rate": 5e-06, "loss": 0.5992, "num_input_tokens_seen": 249409252, "step": 3978 }, { "epoch": 13.23793677204659, "loss": 0.44826793670654297, "loss_ce": 2.575176040409133e-05, "loss_iou": 0.1845703125, "loss_num": 0.0159912109375, "loss_xval": 0.44921875, "num_input_tokens_seen": 249409252, "step": 3978 }, { "epoch": 13.24126455906822, "grad_norm": 11.159095764160156, "learning_rate": 5e-06, "loss": 0.4346, "num_input_tokens_seen": 249471880, "step": 3979 }, { "epoch": 13.24126455906822, "loss": 0.40003740787506104, "loss_ce": 0.00022662075934931636, "loss_iou": 0.1669921875, "loss_num": 0.01300048828125, "loss_xval": 0.400390625, "num_input_tokens_seen": 249471880, "step": 3979 }, { "epoch": 13.244592346089851, "grad_norm": 5.773029327392578, "learning_rate": 5e-06, "loss": 0.3627, "num_input_tokens_seen": 249533456, "step": 3980 }, { "epoch": 13.244592346089851, "loss": 0.5003679990768433, "loss_ce": 1.8101329715136671e-06, "loss_iou": 0.173828125, "loss_num": 0.03076171875, "loss_xval": 0.5, "num_input_tokens_seen": 249533456, "step": 3980 }, { "epoch": 13.247920133111482, "grad_norm": 7.904747486114502, "learning_rate": 5e-06, "loss": 0.4501, "num_input_tokens_seen": 249595772, "step": 3981 }, { "epoch": 13.247920133111482, "loss": 0.44471806287765503, "loss_ce": 1.5908364730421454e-05, "loss_iou": 0.1669921875, "loss_num": 0.02197265625, "loss_xval": 0.4453125, "num_input_tokens_seen": 249595772, "step": 3981 }, { "epoch": 13.251247920133112, "grad_norm": 9.016499519348145, "learning_rate": 5e-06, "loss": 0.5909, "num_input_tokens_seen": 249659528, "step": 3982 }, { "epoch": 13.251247920133112, "loss": 0.45109298825263977, "loss_ce": 4.3171221477678046e-05, "loss_iou": 0.1689453125, "loss_num": 0.022705078125, "loss_xval": 0.451171875, "num_input_tokens_seen": 249659528, "step": 3982 }, { "epoch": 13.254575707154743, "grad_norm": 9.991205215454102, "learning_rate": 5e-06, "loss": 0.5633, "num_input_tokens_seen": 249721644, "step": 3983 }, { "epoch": 13.254575707154743, "loss": 0.5603051781654358, "loss_ce": 2.430057975288946e-06, "loss_iou": 0.2265625, "loss_num": 0.0213623046875, "loss_xval": 0.55859375, "num_input_tokens_seen": 249721644, "step": 3983 }, { "epoch": 13.257903494176373, "grad_norm": 9.797656059265137, "learning_rate": 5e-06, "loss": 0.4928, "num_input_tokens_seen": 249784464, "step": 3984 }, { "epoch": 13.257903494176373, "loss": 0.4885881543159485, "loss_ce": 1.7017564459820278e-06, "loss_iou": 0.169921875, "loss_num": 0.02978515625, "loss_xval": 0.48828125, "num_input_tokens_seen": 249784464, "step": 3984 }, { "epoch": 13.261231281198004, "grad_norm": 15.903599739074707, "learning_rate": 5e-06, "loss": 0.731, "num_input_tokens_seen": 249847604, "step": 3985 }, { "epoch": 13.261231281198004, "loss": 0.6174191236495972, "loss_ce": 0.00017058123194146901, "loss_iou": 0.236328125, "loss_num": 0.02880859375, "loss_xval": 0.6171875, "num_input_tokens_seen": 249847604, "step": 3985 }, { "epoch": 13.264559068219635, "grad_norm": 18.10671615600586, "learning_rate": 5e-06, "loss": 0.4184, "num_input_tokens_seen": 249910652, "step": 3986 }, { "epoch": 13.264559068219635, "loss": 0.34003227949142456, "loss_ce": 5.426510597317247e-06, "loss_iou": 0.130859375, "loss_num": 0.01556396484375, "loss_xval": 0.33984375, "num_input_tokens_seen": 249910652, "step": 3986 }, { "epoch": 13.267886855241265, "grad_norm": 12.721144676208496, "learning_rate": 5e-06, "loss": 0.4242, "num_input_tokens_seen": 249973320, "step": 3987 }, { "epoch": 13.267886855241265, "loss": 0.30755937099456787, "loss_ce": 3.235157009839895e-06, "loss_iou": 0.1044921875, "loss_num": 0.019775390625, "loss_xval": 0.306640625, "num_input_tokens_seen": 249973320, "step": 3987 }, { "epoch": 13.271214642262896, "grad_norm": 10.393861770629883, "learning_rate": 5e-06, "loss": 0.327, "num_input_tokens_seen": 250033636, "step": 3988 }, { "epoch": 13.271214642262896, "loss": 0.4330669939517975, "loss_ce": 2.2552976588485762e-05, "loss_iou": 0.1455078125, "loss_num": 0.0283203125, "loss_xval": 0.43359375, "num_input_tokens_seen": 250033636, "step": 3988 }, { "epoch": 13.274542429284526, "grad_norm": 11.215907096862793, "learning_rate": 5e-06, "loss": 0.2804, "num_input_tokens_seen": 250095220, "step": 3989 }, { "epoch": 13.274542429284526, "loss": 0.27912217378616333, "loss_ce": 8.397261808568146e-06, "loss_iou": 0.10693359375, "loss_num": 0.01300048828125, "loss_xval": 0.279296875, "num_input_tokens_seen": 250095220, "step": 3989 }, { "epoch": 13.277870216306157, "grad_norm": 10.812259674072266, "learning_rate": 5e-06, "loss": 0.3846, "num_input_tokens_seen": 250157336, "step": 3990 }, { "epoch": 13.277870216306157, "loss": 0.4688740074634552, "loss_ce": 1.9557674022507854e-06, "loss_iou": 0.1904296875, "loss_num": 0.0177001953125, "loss_xval": 0.46875, "num_input_tokens_seen": 250157336, "step": 3990 }, { "epoch": 13.281198003327788, "grad_norm": 6.895930767059326, "learning_rate": 5e-06, "loss": 0.5686, "num_input_tokens_seen": 250220784, "step": 3991 }, { "epoch": 13.281198003327788, "loss": 0.5455338954925537, "loss_ce": 1.6395167676819256e-06, "loss_iou": 0.1767578125, "loss_num": 0.038330078125, "loss_xval": 0.546875, "num_input_tokens_seen": 250220784, "step": 3991 }, { "epoch": 13.284525790349418, "grad_norm": 14.647802352905273, "learning_rate": 5e-06, "loss": 0.4482, "num_input_tokens_seen": 250283520, "step": 3992 }, { "epoch": 13.284525790349418, "loss": 0.5952843427658081, "loss_ce": 8.502679520461243e-06, "loss_iou": 0.2373046875, "loss_num": 0.0238037109375, "loss_xval": 0.59375, "num_input_tokens_seen": 250283520, "step": 3992 }, { "epoch": 13.287853577371049, "grad_norm": 11.032774925231934, "learning_rate": 5e-06, "loss": 0.5781, "num_input_tokens_seen": 250344492, "step": 3993 }, { "epoch": 13.287853577371049, "loss": 0.39966046810150146, "loss_ce": 2.2658762190985726e-06, "loss_iou": 0.1357421875, "loss_num": 0.0255126953125, "loss_xval": 0.400390625, "num_input_tokens_seen": 250344492, "step": 3993 }, { "epoch": 13.29118136439268, "grad_norm": 15.436237335205078, "learning_rate": 5e-06, "loss": 0.4304, "num_input_tokens_seen": 250408720, "step": 3994 }, { "epoch": 13.29118136439268, "loss": 0.5640706419944763, "loss_ce": 0.00010577555076451972, "loss_iou": 0.23828125, "loss_num": 0.017578125, "loss_xval": 0.5625, "num_input_tokens_seen": 250408720, "step": 3994 }, { "epoch": 13.29450915141431, "grad_norm": 17.280414581298828, "learning_rate": 5e-06, "loss": 0.4284, "num_input_tokens_seen": 250470236, "step": 3995 }, { "epoch": 13.29450915141431, "loss": 0.5623456239700317, "loss_ce": 0.0010663170833140612, "loss_iou": 0.205078125, "loss_num": 0.030029296875, "loss_xval": 0.5625, "num_input_tokens_seen": 250470236, "step": 3995 }, { "epoch": 13.29783693843594, "grad_norm": 32.528358459472656, "learning_rate": 5e-06, "loss": 0.7685, "num_input_tokens_seen": 250534984, "step": 3996 }, { "epoch": 13.29783693843594, "loss": 0.804225504398346, "loss_ce": 0.000148349572555162, "loss_iou": 0.30078125, "loss_num": 0.040771484375, "loss_xval": 0.8046875, "num_input_tokens_seen": 250534984, "step": 3996 }, { "epoch": 13.301164725457571, "grad_norm": 29.2388973236084, "learning_rate": 5e-06, "loss": 0.5732, "num_input_tokens_seen": 250597232, "step": 3997 }, { "epoch": 13.301164725457571, "loss": 0.7464748620986938, "loss_ce": 7.589724555145949e-05, "loss_iou": 0.283203125, "loss_num": 0.036376953125, "loss_xval": 0.74609375, "num_input_tokens_seen": 250597232, "step": 3997 }, { "epoch": 13.304492512479202, "grad_norm": 15.676419258117676, "learning_rate": 5e-06, "loss": 0.3571, "num_input_tokens_seen": 250658820, "step": 3998 }, { "epoch": 13.304492512479202, "loss": 0.38513320684432983, "loss_ce": 1.3798302234135917e-06, "loss_iou": 0.138671875, "loss_num": 0.021484375, "loss_xval": 0.384765625, "num_input_tokens_seen": 250658820, "step": 3998 }, { "epoch": 13.307820299500833, "grad_norm": 18.296266555786133, "learning_rate": 5e-06, "loss": 0.5668, "num_input_tokens_seen": 250721844, "step": 3999 }, { "epoch": 13.307820299500833, "loss": 0.49501362442970276, "loss_ce": 1.8492113667889498e-05, "loss_iou": 0.1806640625, "loss_num": 0.026611328125, "loss_xval": 0.494140625, "num_input_tokens_seen": 250721844, "step": 3999 }, { "epoch": 13.311148086522463, "grad_norm": 13.97290325164795, "learning_rate": 5e-06, "loss": 0.3843, "num_input_tokens_seen": 250783556, "step": 4000 }, { "epoch": 13.311148086522463, "eval_seeclick_CIoU": 0.0401131734251976, "eval_seeclick_GIoU": 0.037240875884890556, "eval_seeclick_IoU": 0.15966929495334625, "eval_seeclick_MAE_all": 0.17109407484531403, "eval_seeclick_MAE_h": 0.060333045199513435, "eval_seeclick_MAE_w": 0.132522851228714, "eval_seeclick_MAE_x_boxes": 0.20975444465875626, "eval_seeclick_MAE_y_boxes": 0.1880405694246292, "eval_seeclick_NUM_probability": 0.999980092048645, "eval_seeclick_inside_bbox": 0.16250000149011612, "eval_seeclick_loss": 2.969235420227051, "eval_seeclick_loss_ce": 0.1648871749639511, "eval_seeclick_loss_iou": 0.974609375, "eval_seeclick_loss_num": 0.17327117919921875, "eval_seeclick_loss_xval": 2.8173828125, "eval_seeclick_runtime": 68.028, "eval_seeclick_samples_per_second": 0.691, "eval_seeclick_steps_per_second": 0.029, "num_input_tokens_seen": 250783556, "step": 4000 }, { "epoch": 13.311148086522463, "eval_icons_CIoU": -0.08000272512435913, "eval_icons_GIoU": 0.010233924724161625, "eval_icons_IoU": 0.10820939019322395, "eval_icons_MAE_all": 0.20556750148534775, "eval_icons_MAE_h": 0.16237898170948029, "eval_icons_MAE_w": 0.23544737696647644, "eval_icons_MAE_x_boxes": 0.14878180995583534, "eval_icons_MAE_y_boxes": 0.0992947556078434, "eval_icons_NUM_probability": 0.9999783039093018, "eval_icons_inside_bbox": 0.2326388955116272, "eval_icons_loss": 2.957519769668579, "eval_icons_loss_ce": 6.551265505549964e-05, "eval_icons_loss_iou": 0.9755859375, "eval_icons_loss_num": 0.2015380859375, "eval_icons_loss_xval": 2.958984375, "eval_icons_runtime": 70.7328, "eval_icons_samples_per_second": 0.707, "eval_icons_steps_per_second": 0.028, "num_input_tokens_seen": 250783556, "step": 4000 }, { "epoch": 13.311148086522463, "eval_screenspot_CIoU": 0.17064758141835532, "eval_screenspot_GIoU": 0.20539005597432455, "eval_screenspot_IoU": 0.2827802101771037, "eval_screenspot_MAE_all": 0.1173891747991244, "eval_screenspot_MAE_h": 0.0692595901588599, "eval_screenspot_MAE_w": 0.09308085466424625, "eval_screenspot_MAE_x_boxes": 0.16173570851484934, "eval_screenspot_MAE_y_boxes": 0.08932050069173177, "eval_screenspot_NUM_probability": 0.9999942382176717, "eval_screenspot_inside_bbox": 0.5258333285649618, "eval_screenspot_loss": 2.215608835220337, "eval_screenspot_loss_ce": 1.790876831364585e-05, "eval_screenspot_loss_iou": 0.8069661458333334, "eval_screenspot_loss_num": 0.12707010904947916, "eval_screenspot_loss_xval": 2.2506510416666665, "eval_screenspot_runtime": 123.5469, "eval_screenspot_samples_per_second": 0.72, "eval_screenspot_steps_per_second": 0.024, "num_input_tokens_seen": 250783556, "step": 4000 }, { "epoch": 13.311148086522463, "eval_compot_CIoU": 0.13092739507555962, "eval_compot_GIoU": 0.17571666836738586, "eval_compot_IoU": 0.2661994695663452, "eval_compot_MAE_all": 0.1353999674320221, "eval_compot_MAE_h": 0.07347099855542183, "eval_compot_MAE_w": 0.17579501867294312, "eval_compot_MAE_x_boxes": 0.09459010884165764, "eval_compot_MAE_y_boxes": 0.10249979048967361, "eval_compot_NUM_probability": 0.9999965727329254, "eval_compot_inside_bbox": 0.4565972238779068, "eval_compot_loss": 2.3006114959716797, "eval_compot_loss_ce": 0.0029682923923246562, "eval_compot_loss_iou": 0.83203125, "eval_compot_loss_num": 0.14318466186523438, "eval_compot_loss_xval": 2.38037109375, "eval_compot_runtime": 74.9133, "eval_compot_samples_per_second": 0.667, "eval_compot_steps_per_second": 0.027, "num_input_tokens_seen": 250783556, "step": 4000 }, { "epoch": 13.311148086522463, "eval_custom_ui_MAE_all": 0.06289888173341751, "eval_custom_ui_MAE_x": 0.0704539604485035, "eval_custom_ui_MAE_y": 0.05534380488097668, "eval_custom_ui_NUM_probability": 0.9999986588954926, "eval_custom_ui_loss": 0.29390913248062134, "eval_custom_ui_loss_ce": 3.023015551661956e-06, "eval_custom_ui_loss_num": 0.06036376953125, "eval_custom_ui_loss_xval": 0.302001953125, "eval_custom_ui_runtime": 49.8196, "eval_custom_ui_samples_per_second": 1.004, "eval_custom_ui_steps_per_second": 0.04, "num_input_tokens_seen": 250783556, "step": 4000 }, { "epoch": 13.311148086522463, "loss": 0.3259308338165283, "loss_ce": 3.0994974622444715e-06, "loss_iou": 0.0, "loss_num": 0.06494140625, "loss_xval": 0.326171875, "num_input_tokens_seen": 250783556, "step": 4000 }, { "epoch": 13.314475873544094, "grad_norm": 7.303277492523193, "learning_rate": 5e-06, "loss": 0.3046, "num_input_tokens_seen": 250845428, "step": 4001 }, { "epoch": 13.314475873544094, "loss": 0.3303237557411194, "loss_ce": 1.5066576679600985e-06, "loss_iou": 0.11328125, "loss_num": 0.0208740234375, "loss_xval": 0.330078125, "num_input_tokens_seen": 250845428, "step": 4001 }, { "epoch": 13.317803660565724, "grad_norm": 13.108943939208984, "learning_rate": 5e-06, "loss": 0.5498, "num_input_tokens_seen": 250907516, "step": 4002 }, { "epoch": 13.317803660565724, "loss": 0.3902915120124817, "loss_ce": 2.237259877801989e-06, "loss_iou": 0.134765625, "loss_num": 0.0242919921875, "loss_xval": 0.390625, "num_input_tokens_seen": 250907516, "step": 4002 }, { "epoch": 13.321131447587355, "grad_norm": 29.334354400634766, "learning_rate": 5e-06, "loss": 0.4898, "num_input_tokens_seen": 250970000, "step": 4003 }, { "epoch": 13.321131447587355, "loss": 0.6739609241485596, "loss_ce": 1.0703003681555856e-05, "loss_iou": 0.23046875, "loss_num": 0.042724609375, "loss_xval": 0.67578125, "num_input_tokens_seen": 250970000, "step": 4003 }, { "epoch": 13.324459234608986, "grad_norm": 27.505264282226562, "learning_rate": 5e-06, "loss": 0.5407, "num_input_tokens_seen": 251032276, "step": 4004 }, { "epoch": 13.324459234608986, "loss": 0.5970475673675537, "loss_ce": 1.6480253179906867e-06, "loss_iou": 0.2431640625, "loss_num": 0.0220947265625, "loss_xval": 0.59765625, "num_input_tokens_seen": 251032276, "step": 4004 }, { "epoch": 13.327787021630616, "grad_norm": 13.638843536376953, "learning_rate": 5e-06, "loss": 0.3929, "num_input_tokens_seen": 251095040, "step": 4005 }, { "epoch": 13.327787021630616, "loss": 0.438029944896698, "loss_ce": 1.1145155440317467e-05, "loss_iou": 0.189453125, "loss_num": 0.011962890625, "loss_xval": 0.4375, "num_input_tokens_seen": 251095040, "step": 4005 }, { "epoch": 13.331114808652247, "grad_norm": 8.901224136352539, "learning_rate": 5e-06, "loss": 0.4523, "num_input_tokens_seen": 251157960, "step": 4006 }, { "epoch": 13.331114808652247, "loss": 0.27401790022850037, "loss_ce": 5.518913894775324e-07, "loss_iou": 0.1064453125, "loss_num": 0.01220703125, "loss_xval": 0.2734375, "num_input_tokens_seen": 251157960, "step": 4006 }, { "epoch": 13.334442595673877, "grad_norm": 7.182138442993164, "learning_rate": 5e-06, "loss": 0.2289, "num_input_tokens_seen": 251220396, "step": 4007 }, { "epoch": 13.334442595673877, "loss": 0.19941219687461853, "loss_ce": 1.0344107067794539e-05, "loss_iou": 0.068359375, "loss_num": 0.01251220703125, "loss_xval": 0.19921875, "num_input_tokens_seen": 251220396, "step": 4007 }, { "epoch": 13.337770382695508, "grad_norm": 15.361597061157227, "learning_rate": 5e-06, "loss": 0.7501, "num_input_tokens_seen": 251283992, "step": 4008 }, { "epoch": 13.337770382695508, "loss": 0.7814378142356873, "loss_ce": 4.685370640800102e-06, "loss_iou": 0.28125, "loss_num": 0.044189453125, "loss_xval": 0.78125, "num_input_tokens_seen": 251283992, "step": 4008 }, { "epoch": 13.341098169717139, "grad_norm": 18.461496353149414, "learning_rate": 5e-06, "loss": 0.4305, "num_input_tokens_seen": 251345208, "step": 4009 }, { "epoch": 13.341098169717139, "loss": 0.3521742820739746, "loss_ce": 1.4629215456807287e-06, "loss_iou": 0.1005859375, "loss_num": 0.0302734375, "loss_xval": 0.3515625, "num_input_tokens_seen": 251345208, "step": 4009 }, { "epoch": 13.34442595673877, "grad_norm": 24.140893936157227, "learning_rate": 5e-06, "loss": 0.4843, "num_input_tokens_seen": 251407412, "step": 4010 }, { "epoch": 13.34442595673877, "loss": 0.5330855250358582, "loss_ce": 4.4636567508860026e-06, "loss_iou": 0.1884765625, "loss_num": 0.031005859375, "loss_xval": 0.53125, "num_input_tokens_seen": 251407412, "step": 4010 }, { "epoch": 13.3477537437604, "grad_norm": 8.456592559814453, "learning_rate": 5e-06, "loss": 0.2907, "num_input_tokens_seen": 251471104, "step": 4011 }, { "epoch": 13.3477537437604, "loss": 0.40100938081741333, "loss_ce": 8.392295967496466e-06, "loss_iou": 0.1708984375, "loss_num": 0.0118408203125, "loss_xval": 0.400390625, "num_input_tokens_seen": 251471104, "step": 4011 }, { "epoch": 13.35108153078203, "grad_norm": 14.956840515136719, "learning_rate": 5e-06, "loss": 0.3094, "num_input_tokens_seen": 251533532, "step": 4012 }, { "epoch": 13.35108153078203, "loss": 0.34113243222236633, "loss_ce": 6.9533502937702e-06, "loss_iou": 0.1103515625, "loss_num": 0.024169921875, "loss_xval": 0.341796875, "num_input_tokens_seen": 251533532, "step": 4012 }, { "epoch": 13.354409317803661, "grad_norm": 13.013337135314941, "learning_rate": 5e-06, "loss": 0.4644, "num_input_tokens_seen": 251597756, "step": 4013 }, { "epoch": 13.354409317803661, "loss": 0.2762458920478821, "loss_ce": 7.689019412282505e-07, "loss_iou": 0.08447265625, "loss_num": 0.021484375, "loss_xval": 0.275390625, "num_input_tokens_seen": 251597756, "step": 4013 }, { "epoch": 13.357737104825292, "grad_norm": 10.260213851928711, "learning_rate": 5e-06, "loss": 0.3413, "num_input_tokens_seen": 251659800, "step": 4014 }, { "epoch": 13.357737104825292, "loss": 0.2952476739883423, "loss_ce": 0.00017321942141279578, "loss_iou": 0.11083984375, "loss_num": 0.0146484375, "loss_xval": 0.294921875, "num_input_tokens_seen": 251659800, "step": 4014 }, { "epoch": 13.361064891846922, "grad_norm": 18.33027458190918, "learning_rate": 5e-06, "loss": 0.638, "num_input_tokens_seen": 251724292, "step": 4015 }, { "epoch": 13.361064891846922, "loss": 0.7707099318504333, "loss_ce": 0.0002021368418354541, "loss_iou": 0.29296875, "loss_num": 0.036865234375, "loss_xval": 0.76953125, "num_input_tokens_seen": 251724292, "step": 4015 }, { "epoch": 13.364392678868553, "grad_norm": 14.315872192382812, "learning_rate": 5e-06, "loss": 0.3762, "num_input_tokens_seen": 251787068, "step": 4016 }, { "epoch": 13.364392678868553, "loss": 0.32232898473739624, "loss_ce": 2.3340089683188125e-06, "loss_iou": 0.12109375, "loss_num": 0.0159912109375, "loss_xval": 0.322265625, "num_input_tokens_seen": 251787068, "step": 4016 }, { "epoch": 13.367720465890184, "grad_norm": 17.152469635009766, "learning_rate": 5e-06, "loss": 0.4269, "num_input_tokens_seen": 251850680, "step": 4017 }, { "epoch": 13.367720465890184, "loss": 0.39758336544036865, "loss_ce": 0.0016483075451105833, "loss_iou": 0.154296875, "loss_num": 0.017333984375, "loss_xval": 0.396484375, "num_input_tokens_seen": 251850680, "step": 4017 }, { "epoch": 13.371048252911814, "grad_norm": 24.009235382080078, "learning_rate": 5e-06, "loss": 0.3271, "num_input_tokens_seen": 251911608, "step": 4018 }, { "epoch": 13.371048252911814, "loss": 0.4100278615951538, "loss_ce": 0.0001157603255705908, "loss_iou": 0.1123046875, "loss_num": 0.037109375, "loss_xval": 0.41015625, "num_input_tokens_seen": 251911608, "step": 4018 }, { "epoch": 13.374376039933445, "grad_norm": 22.174949645996094, "learning_rate": 5e-06, "loss": 0.5098, "num_input_tokens_seen": 251975164, "step": 4019 }, { "epoch": 13.374376039933445, "loss": 0.6766976118087769, "loss_ce": 8.345499509232468e-07, "loss_iou": 0.28125, "loss_num": 0.02294921875, "loss_xval": 0.67578125, "num_input_tokens_seen": 251975164, "step": 4019 }, { "epoch": 13.377703826955075, "grad_norm": 24.108800888061523, "learning_rate": 5e-06, "loss": 0.4411, "num_input_tokens_seen": 252038096, "step": 4020 }, { "epoch": 13.377703826955075, "loss": 0.4536397159099579, "loss_ce": 0.000514713698066771, "loss_iou": 0.1865234375, "loss_num": 0.016357421875, "loss_xval": 0.453125, "num_input_tokens_seen": 252038096, "step": 4020 }, { "epoch": 13.381031613976706, "grad_norm": 11.7285737991333, "learning_rate": 5e-06, "loss": 0.4207, "num_input_tokens_seen": 252100052, "step": 4021 }, { "epoch": 13.381031613976706, "loss": 0.5114766359329224, "loss_ce": 2.014082383539062e-06, "loss_iou": 0.181640625, "loss_num": 0.0296630859375, "loss_xval": 0.51171875, "num_input_tokens_seen": 252100052, "step": 4021 }, { "epoch": 13.384359400998337, "grad_norm": 13.782764434814453, "learning_rate": 5e-06, "loss": 0.4676, "num_input_tokens_seen": 252162648, "step": 4022 }, { "epoch": 13.384359400998337, "loss": 0.23151850700378418, "loss_ce": 1.2149435860919766e-05, "loss_iou": 0.07763671875, "loss_num": 0.0152587890625, "loss_xval": 0.2314453125, "num_input_tokens_seen": 252162648, "step": 4022 }, { "epoch": 13.387687188019967, "grad_norm": 5.635368347167969, "learning_rate": 5e-06, "loss": 0.4566, "num_input_tokens_seen": 252225620, "step": 4023 }, { "epoch": 13.387687188019967, "loss": 0.26623642444610596, "loss_ce": 1.0709060234148637e-06, "loss_iou": 0.07177734375, "loss_num": 0.0245361328125, "loss_xval": 0.265625, "num_input_tokens_seen": 252225620, "step": 4023 }, { "epoch": 13.391014975041598, "grad_norm": 9.059968948364258, "learning_rate": 5e-06, "loss": 0.5101, "num_input_tokens_seen": 252288036, "step": 4024 }, { "epoch": 13.391014975041598, "loss": 0.545184314250946, "loss_ce": 7.933237066026777e-05, "loss_iou": 0.20703125, "loss_num": 0.026123046875, "loss_xval": 0.546875, "num_input_tokens_seen": 252288036, "step": 4024 }, { "epoch": 13.394342762063228, "grad_norm": 19.220178604125977, "learning_rate": 5e-06, "loss": 0.5524, "num_input_tokens_seen": 252351416, "step": 4025 }, { "epoch": 13.394342762063228, "loss": 0.4785241484642029, "loss_ce": 8.548906407668255e-06, "loss_iou": 0.1669921875, "loss_num": 0.0291748046875, "loss_xval": 0.478515625, "num_input_tokens_seen": 252351416, "step": 4025 }, { "epoch": 13.397670549084859, "grad_norm": 34.31489181518555, "learning_rate": 5e-06, "loss": 0.4825, "num_input_tokens_seen": 252414308, "step": 4026 }, { "epoch": 13.397670549084859, "loss": 0.40625107288360596, "loss_ce": 1.0760422810562886e-06, "loss_iou": 0.1396484375, "loss_num": 0.0255126953125, "loss_xval": 0.40625, "num_input_tokens_seen": 252414308, "step": 4026 }, { "epoch": 13.40099833610649, "grad_norm": 29.904218673706055, "learning_rate": 5e-06, "loss": 0.5231, "num_input_tokens_seen": 252477976, "step": 4027 }, { "epoch": 13.40099833610649, "loss": 0.3475455641746521, "loss_ce": 1.1389466635591816e-05, "loss_iou": 0.11962890625, "loss_num": 0.0216064453125, "loss_xval": 0.34765625, "num_input_tokens_seen": 252477976, "step": 4027 }, { "epoch": 13.40432612312812, "grad_norm": 20.333845138549805, "learning_rate": 5e-06, "loss": 0.4593, "num_input_tokens_seen": 252539276, "step": 4028 }, { "epoch": 13.40432612312812, "loss": 0.42329245805740356, "loss_ce": 1.3653409951075446e-05, "loss_iou": 0.1650390625, "loss_num": 0.018798828125, "loss_xval": 0.423828125, "num_input_tokens_seen": 252539276, "step": 4028 }, { "epoch": 13.407653910149751, "grad_norm": 22.580284118652344, "learning_rate": 5e-06, "loss": 0.3362, "num_input_tokens_seen": 252602700, "step": 4029 }, { "epoch": 13.407653910149751, "loss": 0.31532666087150574, "loss_ce": 1.90346017916454e-05, "loss_iou": 0.123046875, "loss_num": 0.013916015625, "loss_xval": 0.314453125, "num_input_tokens_seen": 252602700, "step": 4029 }, { "epoch": 13.410981697171382, "grad_norm": 44.397518157958984, "learning_rate": 5e-06, "loss": 0.6128, "num_input_tokens_seen": 252665620, "step": 4030 }, { "epoch": 13.410981697171382, "loss": 0.543948233127594, "loss_ce": 2.9438028832373675e-06, "loss_iou": 0.220703125, "loss_num": 0.0206298828125, "loss_xval": 0.54296875, "num_input_tokens_seen": 252665620, "step": 4030 }, { "epoch": 13.414309484193012, "grad_norm": 26.23483657836914, "learning_rate": 5e-06, "loss": 0.6356, "num_input_tokens_seen": 252728200, "step": 4031 }, { "epoch": 13.414309484193012, "loss": 0.7006915807723999, "loss_ce": 7.931359505164437e-06, "loss_iou": 0.255859375, "loss_num": 0.0380859375, "loss_xval": 0.69921875, "num_input_tokens_seen": 252728200, "step": 4031 }, { "epoch": 13.417637271214643, "grad_norm": 7.548967361450195, "learning_rate": 5e-06, "loss": 0.4049, "num_input_tokens_seen": 252790840, "step": 4032 }, { "epoch": 13.417637271214643, "loss": 0.3702625334262848, "loss_ce": 2.328571463294793e-05, "loss_iou": 0.1533203125, "loss_num": 0.01275634765625, "loss_xval": 0.37109375, "num_input_tokens_seen": 252790840, "step": 4032 }, { "epoch": 13.420965058236273, "grad_norm": 12.885796546936035, "learning_rate": 5e-06, "loss": 0.3616, "num_input_tokens_seen": 252853164, "step": 4033 }, { "epoch": 13.420965058236273, "loss": 0.2990785837173462, "loss_ce": 6.737445801263675e-05, "loss_iou": 0.12353515625, "loss_num": 0.0103759765625, "loss_xval": 0.298828125, "num_input_tokens_seen": 252853164, "step": 4033 }, { "epoch": 13.424292845257904, "grad_norm": 17.131969451904297, "learning_rate": 5e-06, "loss": 0.6573, "num_input_tokens_seen": 252915688, "step": 4034 }, { "epoch": 13.424292845257904, "loss": 0.6782631874084473, "loss_ce": 0.0001015565067064017, "loss_iou": 0.287109375, "loss_num": 0.0208740234375, "loss_xval": 0.6796875, "num_input_tokens_seen": 252915688, "step": 4034 }, { "epoch": 13.427620632279535, "grad_norm": 17.784334182739258, "learning_rate": 5e-06, "loss": 0.4824, "num_input_tokens_seen": 252979228, "step": 4035 }, { "epoch": 13.427620632279535, "loss": 0.3071547746658325, "loss_ce": 2.5833523977780715e-05, "loss_iou": 0.1318359375, "loss_num": 0.0087890625, "loss_xval": 0.306640625, "num_input_tokens_seen": 252979228, "step": 4035 }, { "epoch": 13.430948419301165, "grad_norm": 8.047124862670898, "learning_rate": 5e-06, "loss": 0.6477, "num_input_tokens_seen": 253041860, "step": 4036 }, { "epoch": 13.430948419301165, "loss": 0.41977089643478394, "loss_ce": 9.314579801866785e-05, "loss_iou": 0.1533203125, "loss_num": 0.022705078125, "loss_xval": 0.419921875, "num_input_tokens_seen": 253041860, "step": 4036 }, { "epoch": 13.434276206322796, "grad_norm": 13.153543472290039, "learning_rate": 5e-06, "loss": 0.5711, "num_input_tokens_seen": 253105352, "step": 4037 }, { "epoch": 13.434276206322796, "loss": 0.5682383179664612, "loss_ce": 1.0407838999526575e-06, "loss_iou": 0.2216796875, "loss_num": 0.025146484375, "loss_xval": 0.56640625, "num_input_tokens_seen": 253105352, "step": 4037 }, { "epoch": 13.437603993344426, "grad_norm": 25.596834182739258, "learning_rate": 5e-06, "loss": 0.6306, "num_input_tokens_seen": 253168244, "step": 4038 }, { "epoch": 13.437603993344426, "loss": 0.755189061164856, "loss_ce": 1.01950377029425e-06, "loss_iou": 0.30078125, "loss_num": 0.03076171875, "loss_xval": 0.75390625, "num_input_tokens_seen": 253168244, "step": 4038 }, { "epoch": 13.440931780366057, "grad_norm": 22.277341842651367, "learning_rate": 5e-06, "loss": 0.3317, "num_input_tokens_seen": 253231292, "step": 4039 }, { "epoch": 13.440931780366057, "loss": 0.3621840476989746, "loss_ce": 1.434115006304637e-06, "loss_iou": 0.1318359375, "loss_num": 0.02001953125, "loss_xval": 0.361328125, "num_input_tokens_seen": 253231292, "step": 4039 }, { "epoch": 13.444259567387688, "grad_norm": 27.06279945373535, "learning_rate": 5e-06, "loss": 0.3712, "num_input_tokens_seen": 253294532, "step": 4040 }, { "epoch": 13.444259567387688, "loss": 0.3563537895679474, "loss_ce": 3.054995249840431e-05, "loss_iou": 0.1376953125, "loss_num": 0.01611328125, "loss_xval": 0.35546875, "num_input_tokens_seen": 253294532, "step": 4040 }, { "epoch": 13.447587354409318, "grad_norm": 36.06754684448242, "learning_rate": 5e-06, "loss": 0.4637, "num_input_tokens_seen": 253356052, "step": 4041 }, { "epoch": 13.447587354409318, "loss": 0.4671350121498108, "loss_ce": 0.0001245077874045819, "loss_iou": 0.185546875, "loss_num": 0.019287109375, "loss_xval": 0.466796875, "num_input_tokens_seen": 253356052, "step": 4041 }, { "epoch": 13.450915141430949, "grad_norm": 37.05292892456055, "learning_rate": 5e-06, "loss": 0.6249, "num_input_tokens_seen": 253419316, "step": 4042 }, { "epoch": 13.450915141430949, "loss": 0.6953914165496826, "loss_ce": 0.0003229951544199139, "loss_iou": 0.287109375, "loss_num": 0.024169921875, "loss_xval": 0.6953125, "num_input_tokens_seen": 253419316, "step": 4042 }, { "epoch": 13.45424292845258, "grad_norm": 22.668041229248047, "learning_rate": 5e-06, "loss": 0.3937, "num_input_tokens_seen": 253482744, "step": 4043 }, { "epoch": 13.45424292845258, "loss": 0.36609089374542236, "loss_ce": 2.0181798845442245e-06, "loss_iou": 0.12158203125, "loss_num": 0.024658203125, "loss_xval": 0.365234375, "num_input_tokens_seen": 253482744, "step": 4043 }, { "epoch": 13.45757071547421, "grad_norm": 5.961122989654541, "learning_rate": 5e-06, "loss": 0.3318, "num_input_tokens_seen": 253545252, "step": 4044 }, { "epoch": 13.45757071547421, "loss": 0.3604752719402313, "loss_ce": 1.646630153118167e-06, "loss_iou": 0.1337890625, "loss_num": 0.0186767578125, "loss_xval": 0.361328125, "num_input_tokens_seen": 253545252, "step": 4044 }, { "epoch": 13.46089850249584, "grad_norm": 5.519584655761719, "learning_rate": 5e-06, "loss": 0.6394, "num_input_tokens_seen": 253609512, "step": 4045 }, { "epoch": 13.46089850249584, "loss": 0.5491964817047119, "loss_ce": 2.1075002223369665e-06, "loss_iou": 0.1875, "loss_num": 0.034912109375, "loss_xval": 0.55078125, "num_input_tokens_seen": 253609512, "step": 4045 }, { "epoch": 13.464226289517471, "grad_norm": 7.78495979309082, "learning_rate": 5e-06, "loss": 0.5427, "num_input_tokens_seen": 253672240, "step": 4046 }, { "epoch": 13.464226289517471, "loss": 0.5413833856582642, "loss_ce": 1.5791622445249232e-06, "loss_iou": 0.1865234375, "loss_num": 0.03369140625, "loss_xval": 0.54296875, "num_input_tokens_seen": 253672240, "step": 4046 }, { "epoch": 13.467554076539102, "grad_norm": 13.018278121948242, "learning_rate": 5e-06, "loss": 0.526, "num_input_tokens_seen": 253735072, "step": 4047 }, { "epoch": 13.467554076539102, "loss": 0.6617438793182373, "loss_ce": 6.945905397515162e-07, "loss_iou": 0.2119140625, "loss_num": 0.047607421875, "loss_xval": 0.66015625, "num_input_tokens_seen": 253735072, "step": 4047 }, { "epoch": 13.470881863560733, "grad_norm": 6.639475345611572, "learning_rate": 5e-06, "loss": 0.3544, "num_input_tokens_seen": 253797940, "step": 4048 }, { "epoch": 13.470881863560733, "loss": 0.30625277757644653, "loss_ce": 0.003152174409478903, "loss_iou": 0.0908203125, "loss_num": 0.024169921875, "loss_xval": 0.302734375, "num_input_tokens_seen": 253797940, "step": 4048 }, { "epoch": 13.474209650582363, "grad_norm": 7.292903423309326, "learning_rate": 5e-06, "loss": 0.4455, "num_input_tokens_seen": 253860888, "step": 4049 }, { "epoch": 13.474209650582363, "loss": 0.47754013538360596, "loss_ce": 1.0975902569043683e-06, "loss_iou": 0.197265625, "loss_num": 0.0166015625, "loss_xval": 0.4765625, "num_input_tokens_seen": 253860888, "step": 4049 }, { "epoch": 13.477537437603994, "grad_norm": 9.464625358581543, "learning_rate": 5e-06, "loss": 0.3136, "num_input_tokens_seen": 253924424, "step": 4050 }, { "epoch": 13.477537437603994, "loss": 0.26420387625694275, "loss_ce": 4.3721320253098384e-05, "loss_iou": 0.0947265625, "loss_num": 0.014892578125, "loss_xval": 0.263671875, "num_input_tokens_seen": 253924424, "step": 4050 }, { "epoch": 13.480865224625624, "grad_norm": 13.43946361541748, "learning_rate": 5e-06, "loss": 0.4566, "num_input_tokens_seen": 253986336, "step": 4051 }, { "epoch": 13.480865224625624, "loss": 0.35017162561416626, "loss_ce": 1.2941072782268748e-05, "loss_iou": 0.1396484375, "loss_num": 0.01434326171875, "loss_xval": 0.349609375, "num_input_tokens_seen": 253986336, "step": 4051 }, { "epoch": 13.484193011647255, "grad_norm": 16.803226470947266, "learning_rate": 5e-06, "loss": 0.6699, "num_input_tokens_seen": 254049108, "step": 4052 }, { "epoch": 13.484193011647255, "loss": 0.8337728977203369, "loss_ce": 0.0010092302691191435, "loss_iou": 0.345703125, "loss_num": 0.0284423828125, "loss_xval": 0.83203125, "num_input_tokens_seen": 254049108, "step": 4052 }, { "epoch": 13.487520798668886, "grad_norm": 17.97068214416504, "learning_rate": 5e-06, "loss": 0.6167, "num_input_tokens_seen": 254111620, "step": 4053 }, { "epoch": 13.487520798668886, "loss": 0.5087605714797974, "loss_ce": 2.004602947636158e-06, "loss_iou": 0.1845703125, "loss_num": 0.0277099609375, "loss_xval": 0.5078125, "num_input_tokens_seen": 254111620, "step": 4053 }, { "epoch": 13.490848585690516, "grad_norm": 15.225116729736328, "learning_rate": 5e-06, "loss": 0.3236, "num_input_tokens_seen": 254173432, "step": 4054 }, { "epoch": 13.490848585690516, "loss": 0.42340269684791565, "loss_ce": 1.828260224101541e-06, "loss_iou": 0.1923828125, "loss_num": 0.00787353515625, "loss_xval": 0.423828125, "num_input_tokens_seen": 254173432, "step": 4054 }, { "epoch": 13.494176372712147, "grad_norm": 14.46038818359375, "learning_rate": 5e-06, "loss": 0.6412, "num_input_tokens_seen": 254236556, "step": 4055 }, { "epoch": 13.494176372712147, "loss": 0.3684091866016388, "loss_ce": 9.816506008064607e-07, "loss_iou": 0.1435546875, "loss_num": 0.016357421875, "loss_xval": 0.369140625, "num_input_tokens_seen": 254236556, "step": 4055 }, { "epoch": 13.497504159733777, "grad_norm": 16.94215965270996, "learning_rate": 5e-06, "loss": 0.3743, "num_input_tokens_seen": 254299480, "step": 4056 }, { "epoch": 13.497504159733777, "loss": 0.31707966327667236, "loss_ce": 2.0396473701111972e-06, "loss_iou": 0.1259765625, "loss_num": 0.01287841796875, "loss_xval": 0.31640625, "num_input_tokens_seen": 254299480, "step": 4056 }, { "epoch": 13.500831946755408, "grad_norm": 10.556267738342285, "learning_rate": 5e-06, "loss": 0.3634, "num_input_tokens_seen": 254361276, "step": 4057 }, { "epoch": 13.500831946755408, "loss": 0.352419376373291, "loss_ce": 2.37226140598068e-06, "loss_iou": 0.150390625, "loss_num": 0.01043701171875, "loss_xval": 0.3515625, "num_input_tokens_seen": 254361276, "step": 4057 }, { "epoch": 13.504159733777039, "grad_norm": 7.271184921264648, "learning_rate": 5e-06, "loss": 0.3177, "num_input_tokens_seen": 254422132, "step": 4058 }, { "epoch": 13.504159733777039, "loss": 0.4142470955848694, "loss_ce": 1.4738275240233634e-06, "loss_iou": 0.1630859375, "loss_num": 0.017578125, "loss_xval": 0.4140625, "num_input_tokens_seen": 254422132, "step": 4058 }, { "epoch": 13.50748752079867, "grad_norm": 15.827631950378418, "learning_rate": 5e-06, "loss": 0.3635, "num_input_tokens_seen": 254485516, "step": 4059 }, { "epoch": 13.50748752079867, "loss": 0.39009252190589905, "loss_ce": 0.0005966631579212844, "loss_iou": 0.1240234375, "loss_num": 0.0283203125, "loss_xval": 0.388671875, "num_input_tokens_seen": 254485516, "step": 4059 }, { "epoch": 13.5108153078203, "grad_norm": 15.556474685668945, "learning_rate": 5e-06, "loss": 0.3927, "num_input_tokens_seen": 254548020, "step": 4060 }, { "epoch": 13.5108153078203, "loss": 0.31174105405807495, "loss_ce": 3.976404514105525e-06, "loss_iou": 0.12109375, "loss_num": 0.01397705078125, "loss_xval": 0.3125, "num_input_tokens_seen": 254548020, "step": 4060 }, { "epoch": 13.51414309484193, "grad_norm": 12.011070251464844, "learning_rate": 5e-06, "loss": 0.4939, "num_input_tokens_seen": 254609448, "step": 4061 }, { "epoch": 13.51414309484193, "loss": 0.5745378732681274, "loss_ce": 7.491508586099371e-05, "loss_iou": 0.2001953125, "loss_num": 0.034912109375, "loss_xval": 0.57421875, "num_input_tokens_seen": 254609448, "step": 4061 }, { "epoch": 13.517470881863561, "grad_norm": 11.355887413024902, "learning_rate": 5e-06, "loss": 0.2994, "num_input_tokens_seen": 254672532, "step": 4062 }, { "epoch": 13.517470881863561, "loss": 0.2952972650527954, "loss_ce": 9.166074960376136e-06, "loss_iou": 0.103515625, "loss_num": 0.0177001953125, "loss_xval": 0.294921875, "num_input_tokens_seen": 254672532, "step": 4062 }, { "epoch": 13.520798668885192, "grad_norm": 11.106226921081543, "learning_rate": 5e-06, "loss": 0.3802, "num_input_tokens_seen": 254733188, "step": 4063 }, { "epoch": 13.520798668885192, "loss": 0.42902201414108276, "loss_ce": 5.895336471439805e-06, "loss_iou": 0.1533203125, "loss_num": 0.0242919921875, "loss_xval": 0.4296875, "num_input_tokens_seen": 254733188, "step": 4063 }, { "epoch": 13.524126455906822, "grad_norm": 7.066707134246826, "learning_rate": 5e-06, "loss": 0.3908, "num_input_tokens_seen": 254795880, "step": 4064 }, { "epoch": 13.524126455906822, "loss": 0.5147523880004883, "loss_ce": 4.291576624382287e-05, "loss_iou": 0.1884765625, "loss_num": 0.027587890625, "loss_xval": 0.515625, "num_input_tokens_seen": 254795880, "step": 4064 }, { "epoch": 13.527454242928453, "grad_norm": 12.230327606201172, "learning_rate": 5e-06, "loss": 0.3661, "num_input_tokens_seen": 254857924, "step": 4065 }, { "epoch": 13.527454242928453, "loss": 0.5029102563858032, "loss_ce": 0.001201295992359519, "loss_iou": 0.212890625, "loss_num": 0.01495361328125, "loss_xval": 0.5, "num_input_tokens_seen": 254857924, "step": 4065 }, { "epoch": 13.530782029950084, "grad_norm": 32.40962600708008, "learning_rate": 5e-06, "loss": 0.3536, "num_input_tokens_seen": 254920624, "step": 4066 }, { "epoch": 13.530782029950084, "loss": 0.4018838405609131, "loss_ce": 2.8356282200547867e-05, "loss_iou": 0.11669921875, "loss_num": 0.03369140625, "loss_xval": 0.40234375, "num_input_tokens_seen": 254920624, "step": 4066 }, { "epoch": 13.534109816971714, "grad_norm": 30.057220458984375, "learning_rate": 5e-06, "loss": 0.6506, "num_input_tokens_seen": 254982240, "step": 4067 }, { "epoch": 13.534109816971714, "loss": 0.8581557273864746, "loss_ce": 1.4225306586013176e-06, "loss_iou": 0.35546875, "loss_num": 0.0294189453125, "loss_xval": 0.859375, "num_input_tokens_seen": 254982240, "step": 4067 }, { "epoch": 13.537437603993345, "grad_norm": 32.07613754272461, "learning_rate": 5e-06, "loss": 0.4002, "num_input_tokens_seen": 255045564, "step": 4068 }, { "epoch": 13.537437603993345, "loss": 0.5168465375900269, "loss_ce": 8.492093002132606e-07, "loss_iou": 0.2001953125, "loss_num": 0.0233154296875, "loss_xval": 0.515625, "num_input_tokens_seen": 255045564, "step": 4068 }, { "epoch": 13.540765391014975, "grad_norm": 28.112735748291016, "learning_rate": 5e-06, "loss": 0.6398, "num_input_tokens_seen": 255108468, "step": 4069 }, { "epoch": 13.540765391014975, "loss": 0.7946584820747375, "loss_ce": 0.00010284439485985786, "loss_iou": 0.3203125, "loss_num": 0.030517578125, "loss_xval": 0.79296875, "num_input_tokens_seen": 255108468, "step": 4069 }, { "epoch": 13.544093178036606, "grad_norm": 11.23192310333252, "learning_rate": 5e-06, "loss": 0.3115, "num_input_tokens_seen": 255170792, "step": 4070 }, { "epoch": 13.544093178036606, "loss": 0.40171176195144653, "loss_ce": 8.884642738848925e-06, "loss_iou": 0.16015625, "loss_num": 0.01611328125, "loss_xval": 0.40234375, "num_input_tokens_seen": 255170792, "step": 4070 }, { "epoch": 13.547420965058237, "grad_norm": 7.52577018737793, "learning_rate": 5e-06, "loss": 0.3355, "num_input_tokens_seen": 255233436, "step": 4071 }, { "epoch": 13.547420965058237, "loss": 0.361275851726532, "loss_ce": 0.0003139561740681529, "loss_iou": 0.123046875, "loss_num": 0.02294921875, "loss_xval": 0.361328125, "num_input_tokens_seen": 255233436, "step": 4071 }, { "epoch": 13.550748752079867, "grad_norm": 16.499244689941406, "learning_rate": 5e-06, "loss": 0.5558, "num_input_tokens_seen": 255296192, "step": 4072 }, { "epoch": 13.550748752079867, "loss": 0.5158827304840088, "loss_ce": 1.360809437755961e-05, "loss_iou": 0.193359375, "loss_num": 0.02587890625, "loss_xval": 0.515625, "num_input_tokens_seen": 255296192, "step": 4072 }, { "epoch": 13.554076539101498, "grad_norm": 5.548696041107178, "learning_rate": 5e-06, "loss": 0.5129, "num_input_tokens_seen": 255358612, "step": 4073 }, { "epoch": 13.554076539101498, "loss": 0.54181307554245, "loss_ce": 3.993906830146443e-06, "loss_iou": 0.2021484375, "loss_num": 0.027587890625, "loss_xval": 0.54296875, "num_input_tokens_seen": 255358612, "step": 4073 }, { "epoch": 13.557404326123129, "grad_norm": 7.650638103485107, "learning_rate": 5e-06, "loss": 0.5133, "num_input_tokens_seen": 255421084, "step": 4074 }, { "epoch": 13.557404326123129, "loss": 0.5935797691345215, "loss_ce": 1.2902387425128836e-05, "loss_iou": 0.2333984375, "loss_num": 0.0252685546875, "loss_xval": 0.59375, "num_input_tokens_seen": 255421084, "step": 4074 }, { "epoch": 13.56073211314476, "grad_norm": 18.650245666503906, "learning_rate": 5e-06, "loss": 0.6318, "num_input_tokens_seen": 255483152, "step": 4075 }, { "epoch": 13.56073211314476, "loss": 0.4408916234970093, "loss_ce": 4.190536856185645e-06, "loss_iou": 0.162109375, "loss_num": 0.0233154296875, "loss_xval": 0.44140625, "num_input_tokens_seen": 255483152, "step": 4075 }, { "epoch": 13.56405990016639, "grad_norm": 21.816991806030273, "learning_rate": 5e-06, "loss": 0.4768, "num_input_tokens_seen": 255544956, "step": 4076 }, { "epoch": 13.56405990016639, "loss": 0.6124305725097656, "loss_ce": 3.846075742330868e-06, "loss_iou": 0.2041015625, "loss_num": 0.040771484375, "loss_xval": 0.61328125, "num_input_tokens_seen": 255544956, "step": 4076 }, { "epoch": 13.56738768718802, "grad_norm": 20.4606990814209, "learning_rate": 5e-06, "loss": 0.415, "num_input_tokens_seen": 255607028, "step": 4077 }, { "epoch": 13.56738768718802, "loss": 0.3276844322681427, "loss_ce": 1.9266774415882537e-06, "loss_iou": 0.115234375, "loss_num": 0.019287109375, "loss_xval": 0.328125, "num_input_tokens_seen": 255607028, "step": 4077 }, { "epoch": 13.570715474209651, "grad_norm": 19.24645233154297, "learning_rate": 5e-06, "loss": 0.4988, "num_input_tokens_seen": 255669608, "step": 4078 }, { "epoch": 13.570715474209651, "loss": 0.4898703694343567, "loss_ce": 2.1828057015227387e-06, "loss_iou": 0.201171875, "loss_num": 0.017578125, "loss_xval": 0.490234375, "num_input_tokens_seen": 255669608, "step": 4078 }, { "epoch": 13.574043261231282, "grad_norm": 9.036434173583984, "learning_rate": 5e-06, "loss": 0.2497, "num_input_tokens_seen": 255732824, "step": 4079 }, { "epoch": 13.574043261231282, "loss": 0.3042739927768707, "loss_ce": 4.4240390707273036e-05, "loss_iou": 0.1171875, "loss_num": 0.013916015625, "loss_xval": 0.3046875, "num_input_tokens_seen": 255732824, "step": 4079 }, { "epoch": 13.577371048252912, "grad_norm": 9.026924133300781, "learning_rate": 5e-06, "loss": 0.4032, "num_input_tokens_seen": 255796408, "step": 4080 }, { "epoch": 13.577371048252912, "loss": 0.34216946363449097, "loss_ce": 6.386504537658766e-06, "loss_iou": 0.1298828125, "loss_num": 0.0166015625, "loss_xval": 0.341796875, "num_input_tokens_seen": 255796408, "step": 4080 }, { "epoch": 13.580698835274543, "grad_norm": 13.957313537597656, "learning_rate": 5e-06, "loss": 0.6533, "num_input_tokens_seen": 255859664, "step": 4081 }, { "epoch": 13.580698835274543, "loss": 0.8101817965507507, "loss_ce": 0.001343924319371581, "loss_iou": 0.31640625, "loss_num": 0.03564453125, "loss_xval": 0.80859375, "num_input_tokens_seen": 255859664, "step": 4081 }, { "epoch": 13.584026622296173, "grad_norm": 33.54194641113281, "learning_rate": 5e-06, "loss": 0.6584, "num_input_tokens_seen": 255921164, "step": 4082 }, { "epoch": 13.584026622296173, "loss": 0.6405044794082642, "loss_ce": 1.5685135394960525e-06, "loss_iou": 0.2470703125, "loss_num": 0.0291748046875, "loss_xval": 0.640625, "num_input_tokens_seen": 255921164, "step": 4082 }, { "epoch": 13.587354409317804, "grad_norm": 40.17051696777344, "learning_rate": 5e-06, "loss": 0.5826, "num_input_tokens_seen": 255983948, "step": 4083 }, { "epoch": 13.587354409317804, "loss": 0.7437734603881836, "loss_ce": 0.00012106593931093812, "loss_iou": 0.322265625, "loss_num": 0.0198974609375, "loss_xval": 0.7421875, "num_input_tokens_seen": 255983948, "step": 4083 }, { "epoch": 13.590682196339435, "grad_norm": 49.89726638793945, "learning_rate": 5e-06, "loss": 0.6456, "num_input_tokens_seen": 256046668, "step": 4084 }, { "epoch": 13.590682196339435, "loss": 0.6706563830375671, "loss_ce": 2.1150349311938044e-06, "loss_iou": 0.26171875, "loss_num": 0.0289306640625, "loss_xval": 0.671875, "num_input_tokens_seen": 256046668, "step": 4084 }, { "epoch": 13.594009983361065, "grad_norm": 44.148807525634766, "learning_rate": 5e-06, "loss": 0.6885, "num_input_tokens_seen": 256109968, "step": 4085 }, { "epoch": 13.594009983361065, "loss": 0.6370869278907776, "loss_ce": 1.9723643163160887e-06, "loss_iou": 0.2734375, "loss_num": 0.0181884765625, "loss_xval": 0.63671875, "num_input_tokens_seen": 256109968, "step": 4085 }, { "epoch": 13.597337770382696, "grad_norm": 33.869049072265625, "learning_rate": 5e-06, "loss": 0.4865, "num_input_tokens_seen": 256172704, "step": 4086 }, { "epoch": 13.597337770382696, "loss": 0.45526349544525146, "loss_ce": 2.2786553017795086e-06, "loss_iou": 0.1826171875, "loss_num": 0.01806640625, "loss_xval": 0.455078125, "num_input_tokens_seen": 256172704, "step": 4086 }, { "epoch": 13.600665557404326, "grad_norm": 20.771800994873047, "learning_rate": 5e-06, "loss": 0.3901, "num_input_tokens_seen": 256234240, "step": 4087 }, { "epoch": 13.600665557404326, "loss": 0.20660483837127686, "loss_ce": 8.326026659233321e-07, "loss_iou": 0.064453125, "loss_num": 0.015625, "loss_xval": 0.20703125, "num_input_tokens_seen": 256234240, "step": 4087 }, { "epoch": 13.603993344425957, "grad_norm": 6.325231552124023, "learning_rate": 5e-06, "loss": 0.2817, "num_input_tokens_seen": 256295580, "step": 4088 }, { "epoch": 13.603993344425957, "loss": 0.1686728596687317, "loss_ce": 2.2049493964004796e-06, "loss_iou": 0.059326171875, "loss_num": 0.010009765625, "loss_xval": 0.1689453125, "num_input_tokens_seen": 256295580, "step": 4088 }, { "epoch": 13.607321131447588, "grad_norm": 12.384597778320312, "learning_rate": 5e-06, "loss": 0.4224, "num_input_tokens_seen": 256358536, "step": 4089 }, { "epoch": 13.607321131447588, "loss": 0.47076863050460815, "loss_ce": 4.478586561162956e-06, "loss_iou": 0.1552734375, "loss_num": 0.031982421875, "loss_xval": 0.470703125, "num_input_tokens_seen": 256358536, "step": 4089 }, { "epoch": 13.610648918469218, "grad_norm": 6.748762607574463, "learning_rate": 5e-06, "loss": 0.4365, "num_input_tokens_seen": 256420276, "step": 4090 }, { "epoch": 13.610648918469218, "loss": 0.42396867275238037, "loss_ce": 1.8475595425115898e-05, "loss_iou": 0.1416015625, "loss_num": 0.028076171875, "loss_xval": 0.423828125, "num_input_tokens_seen": 256420276, "step": 4090 }, { "epoch": 13.613976705490849, "grad_norm": 10.375592231750488, "learning_rate": 5e-06, "loss": 0.4683, "num_input_tokens_seen": 256483680, "step": 4091 }, { "epoch": 13.613976705490849, "loss": 0.5501725673675537, "loss_ce": 1.6659645325489691e-06, "loss_iou": 0.1953125, "loss_num": 0.0322265625, "loss_xval": 0.55078125, "num_input_tokens_seen": 256483680, "step": 4091 }, { "epoch": 13.61730449251248, "grad_norm": 9.56699275970459, "learning_rate": 5e-06, "loss": 0.3283, "num_input_tokens_seen": 256546544, "step": 4092 }, { "epoch": 13.61730449251248, "loss": 0.33765095472335815, "loss_ce": 4.486355919652851e-06, "loss_iou": 0.10205078125, "loss_num": 0.026611328125, "loss_xval": 0.337890625, "num_input_tokens_seen": 256546544, "step": 4092 }, { "epoch": 13.62063227953411, "grad_norm": 15.511360168457031, "learning_rate": 5e-06, "loss": 0.4797, "num_input_tokens_seen": 256610504, "step": 4093 }, { "epoch": 13.62063227953411, "loss": 0.5859394669532776, "loss_ce": 1.9777321540459525e-06, "loss_iou": 0.25, "loss_num": 0.0172119140625, "loss_xval": 0.5859375, "num_input_tokens_seen": 256610504, "step": 4093 }, { "epoch": 13.62396006655574, "grad_norm": 26.149227142333984, "learning_rate": 5e-06, "loss": 0.5141, "num_input_tokens_seen": 256674520, "step": 4094 }, { "epoch": 13.62396006655574, "loss": 0.6741971969604492, "loss_ce": 0.00012492875976022333, "loss_iou": 0.271484375, "loss_num": 0.0263671875, "loss_xval": 0.67578125, "num_input_tokens_seen": 256674520, "step": 4094 }, { "epoch": 13.627287853577371, "grad_norm": 26.208885192871094, "learning_rate": 5e-06, "loss": 0.4107, "num_input_tokens_seen": 256738356, "step": 4095 }, { "epoch": 13.627287853577371, "loss": 0.4801068902015686, "loss_ce": 0.00024848454631865025, "loss_iou": 0.19921875, "loss_num": 0.01611328125, "loss_xval": 0.48046875, "num_input_tokens_seen": 256738356, "step": 4095 }, { "epoch": 13.630615640599002, "grad_norm": 16.056440353393555, "learning_rate": 5e-06, "loss": 0.3583, "num_input_tokens_seen": 256800756, "step": 4096 }, { "epoch": 13.630615640599002, "loss": 0.263735294342041, "loss_ce": 2.380900696152821e-06, "loss_iou": 0.09912109375, "loss_num": 0.01300048828125, "loss_xval": 0.263671875, "num_input_tokens_seen": 256800756, "step": 4096 }, { "epoch": 13.633943427620633, "grad_norm": 24.105731964111328, "learning_rate": 5e-06, "loss": 0.4955, "num_input_tokens_seen": 256863732, "step": 4097 }, { "epoch": 13.633943427620633, "loss": 0.5454325675964355, "loss_ce": 2.2416239517042413e-05, "loss_iou": 0.1953125, "loss_num": 0.0311279296875, "loss_xval": 0.546875, "num_input_tokens_seen": 256863732, "step": 4097 }, { "epoch": 13.637271214642263, "grad_norm": 19.653297424316406, "learning_rate": 5e-06, "loss": 0.5181, "num_input_tokens_seen": 256927744, "step": 4098 }, { "epoch": 13.637271214642263, "loss": 0.589111328125, "loss_ce": 0.0001831356348702684, "loss_iou": 0.2236328125, "loss_num": 0.0281982421875, "loss_xval": 0.58984375, "num_input_tokens_seen": 256927744, "step": 4098 }, { "epoch": 13.640599001663894, "grad_norm": 6.855571746826172, "learning_rate": 5e-06, "loss": 0.3905, "num_input_tokens_seen": 256990688, "step": 4099 }, { "epoch": 13.640599001663894, "loss": 0.4964617192745209, "loss_ce": 1.750424416968599e-06, "loss_iou": 0.197265625, "loss_num": 0.020263671875, "loss_xval": 0.49609375, "num_input_tokens_seen": 256990688, "step": 4099 }, { "epoch": 13.643926788685524, "grad_norm": 15.335451126098633, "learning_rate": 5e-06, "loss": 0.571, "num_input_tokens_seen": 257053536, "step": 4100 }, { "epoch": 13.643926788685524, "loss": 0.6144665479660034, "loss_ce": 2.5595374609110877e-05, "loss_iou": 0.25390625, "loss_num": 0.021240234375, "loss_xval": 0.61328125, "num_input_tokens_seen": 257053536, "step": 4100 }, { "epoch": 13.647254575707155, "grad_norm": 21.927885055541992, "learning_rate": 5e-06, "loss": 0.3671, "num_input_tokens_seen": 257116848, "step": 4101 }, { "epoch": 13.647254575707155, "loss": 0.49841612577438354, "loss_ce": 3.074759206356248e-06, "loss_iou": 0.1806640625, "loss_num": 0.027587890625, "loss_xval": 0.498046875, "num_input_tokens_seen": 257116848, "step": 4101 }, { "epoch": 13.650582362728786, "grad_norm": 18.56333351135254, "learning_rate": 5e-06, "loss": 0.5217, "num_input_tokens_seen": 257180336, "step": 4102 }, { "epoch": 13.650582362728786, "loss": 0.3581867814064026, "loss_ce": 3.2467574783368036e-05, "loss_iou": 0.1416015625, "loss_num": 0.01513671875, "loss_xval": 0.357421875, "num_input_tokens_seen": 257180336, "step": 4102 }, { "epoch": 13.653910149750416, "grad_norm": 24.884056091308594, "learning_rate": 5e-06, "loss": 0.6415, "num_input_tokens_seen": 257243896, "step": 4103 }, { "epoch": 13.653910149750416, "loss": 0.7182716131210327, "loss_ce": 9.896423762256745e-06, "loss_iou": 0.26171875, "loss_num": 0.03857421875, "loss_xval": 0.71875, "num_input_tokens_seen": 257243896, "step": 4103 }, { "epoch": 13.657237936772047, "grad_norm": 29.729398727416992, "learning_rate": 5e-06, "loss": 0.7039, "num_input_tokens_seen": 257306132, "step": 4104 }, { "epoch": 13.657237936772047, "loss": 0.613772988319397, "loss_ce": 3.4185936783615034e-06, "loss_iou": 0.2314453125, "loss_num": 0.0302734375, "loss_xval": 0.61328125, "num_input_tokens_seen": 257306132, "step": 4104 }, { "epoch": 13.660565723793678, "grad_norm": 24.440303802490234, "learning_rate": 5e-06, "loss": 0.3599, "num_input_tokens_seen": 257367000, "step": 4105 }, { "epoch": 13.660565723793678, "loss": 0.3002835512161255, "loss_ce": 5.835092906636419e-06, "loss_iou": 0.09619140625, "loss_num": 0.0216064453125, "loss_xval": 0.30078125, "num_input_tokens_seen": 257367000, "step": 4105 }, { "epoch": 13.663893510815308, "grad_norm": 23.90325355529785, "learning_rate": 5e-06, "loss": 0.4659, "num_input_tokens_seen": 257430024, "step": 4106 }, { "epoch": 13.663893510815308, "loss": 0.4957571029663086, "loss_ce": 6.009064964018762e-05, "loss_iou": 0.2080078125, "loss_num": 0.0159912109375, "loss_xval": 0.49609375, "num_input_tokens_seen": 257430024, "step": 4106 }, { "epoch": 13.667221297836939, "grad_norm": 14.558021545410156, "learning_rate": 5e-06, "loss": 0.5998, "num_input_tokens_seen": 257491720, "step": 4107 }, { "epoch": 13.667221297836939, "loss": 0.7290202975273132, "loss_ce": 1.637609238969162e-05, "loss_iou": 0.248046875, "loss_num": 0.04638671875, "loss_xval": 0.73046875, "num_input_tokens_seen": 257491720, "step": 4107 }, { "epoch": 13.67054908485857, "grad_norm": 6.063190460205078, "learning_rate": 5e-06, "loss": 0.3454, "num_input_tokens_seen": 257555060, "step": 4108 }, { "epoch": 13.67054908485857, "loss": 0.38465070724487305, "loss_ce": 7.144503797462676e-06, "loss_iou": 0.1494140625, "loss_num": 0.0172119140625, "loss_xval": 0.384765625, "num_input_tokens_seen": 257555060, "step": 4108 }, { "epoch": 13.6738768718802, "grad_norm": 8.691733360290527, "learning_rate": 5e-06, "loss": 0.5168, "num_input_tokens_seen": 257618432, "step": 4109 }, { "epoch": 13.6738768718802, "loss": 0.500863790512085, "loss_ce": 7.029424887150526e-05, "loss_iou": 0.18359375, "loss_num": 0.026611328125, "loss_xval": 0.5, "num_input_tokens_seen": 257618432, "step": 4109 }, { "epoch": 13.67720465890183, "grad_norm": 6.432375907897949, "learning_rate": 5e-06, "loss": 0.4398, "num_input_tokens_seen": 257682164, "step": 4110 }, { "epoch": 13.67720465890183, "loss": 0.47852322459220886, "loss_ce": 7.623445526405703e-06, "loss_iou": 0.17578125, "loss_num": 0.025390625, "loss_xval": 0.478515625, "num_input_tokens_seen": 257682164, "step": 4110 }, { "epoch": 13.680532445923461, "grad_norm": 9.346251487731934, "learning_rate": 5e-06, "loss": 0.5075, "num_input_tokens_seen": 257745188, "step": 4111 }, { "epoch": 13.680532445923461, "loss": 0.4828639030456543, "loss_ce": 1.475741282774834e-05, "loss_iou": 0.1728515625, "loss_num": 0.027099609375, "loss_xval": 0.482421875, "num_input_tokens_seen": 257745188, "step": 4111 }, { "epoch": 13.683860232945092, "grad_norm": 10.870111465454102, "learning_rate": 5e-06, "loss": 0.4635, "num_input_tokens_seen": 257806500, "step": 4112 }, { "epoch": 13.683860232945092, "loss": 0.4433865547180176, "loss_ce": 2.7188678359379992e-05, "loss_iou": 0.1669921875, "loss_num": 0.021728515625, "loss_xval": 0.443359375, "num_input_tokens_seen": 257806500, "step": 4112 }, { "epoch": 13.687188019966722, "grad_norm": 7.393191337585449, "learning_rate": 5e-06, "loss": 0.296, "num_input_tokens_seen": 257868640, "step": 4113 }, { "epoch": 13.687188019966722, "loss": 0.32833385467529297, "loss_ce": 8.677194273332134e-05, "loss_iou": 0.0986328125, "loss_num": 0.0262451171875, "loss_xval": 0.328125, "num_input_tokens_seen": 257868640, "step": 4113 }, { "epoch": 13.690515806988353, "grad_norm": 19.714548110961914, "learning_rate": 5e-06, "loss": 0.5716, "num_input_tokens_seen": 257932776, "step": 4114 }, { "epoch": 13.690515806988353, "loss": 0.578864336013794, "loss_ce": 6.865636805741815e-06, "loss_iou": 0.240234375, "loss_num": 0.01953125, "loss_xval": 0.578125, "num_input_tokens_seen": 257932776, "step": 4114 }, { "epoch": 13.693843594009984, "grad_norm": 19.521366119384766, "learning_rate": 5e-06, "loss": 0.4515, "num_input_tokens_seen": 257996268, "step": 4115 }, { "epoch": 13.693843594009984, "loss": 0.395265132188797, "loss_ce": 1.4590141290682368e-06, "loss_iou": 0.1591796875, "loss_num": 0.01531982421875, "loss_xval": 0.39453125, "num_input_tokens_seen": 257996268, "step": 4115 }, { "epoch": 13.697171381031614, "grad_norm": 21.068723678588867, "learning_rate": 5e-06, "loss": 0.3765, "num_input_tokens_seen": 258059508, "step": 4116 }, { "epoch": 13.697171381031614, "loss": 0.4218769967556, "loss_ce": 1.992436182263191e-06, "loss_iou": 0.1708984375, "loss_num": 0.0159912109375, "loss_xval": 0.421875, "num_input_tokens_seen": 258059508, "step": 4116 }, { "epoch": 13.700499168053245, "grad_norm": 12.399932861328125, "learning_rate": 5e-06, "loss": 0.3777, "num_input_tokens_seen": 258123020, "step": 4117 }, { "epoch": 13.700499168053245, "loss": 0.38121166825294495, "loss_ce": 4.710209759650752e-05, "loss_iou": 0.15234375, "loss_num": 0.01513671875, "loss_xval": 0.380859375, "num_input_tokens_seen": 258123020, "step": 4117 }, { "epoch": 13.703826955074875, "grad_norm": 8.13824462890625, "learning_rate": 5e-06, "loss": 0.4029, "num_input_tokens_seen": 258185248, "step": 4118 }, { "epoch": 13.703826955074875, "loss": 0.4743243455886841, "loss_ce": 2.015084828599356e-05, "loss_iou": 0.173828125, "loss_num": 0.0257568359375, "loss_xval": 0.474609375, "num_input_tokens_seen": 258185248, "step": 4118 }, { "epoch": 13.707154742096506, "grad_norm": 15.745996475219727, "learning_rate": 5e-06, "loss": 0.6362, "num_input_tokens_seen": 258249184, "step": 4119 }, { "epoch": 13.707154742096506, "loss": 0.552534818649292, "loss_ce": 4.4617001549340785e-05, "loss_iou": 0.2255859375, "loss_num": 0.0201416015625, "loss_xval": 0.55078125, "num_input_tokens_seen": 258249184, "step": 4119 }, { "epoch": 13.710482529118137, "grad_norm": 13.630387306213379, "learning_rate": 5e-06, "loss": 0.5091, "num_input_tokens_seen": 258312516, "step": 4120 }, { "epoch": 13.710482529118137, "loss": 0.5033610463142395, "loss_ce": 4.092620656592771e-06, "loss_iou": 0.18359375, "loss_num": 0.027099609375, "loss_xval": 0.50390625, "num_input_tokens_seen": 258312516, "step": 4120 }, { "epoch": 13.713810316139767, "grad_norm": 32.22291564941406, "learning_rate": 5e-06, "loss": 0.4899, "num_input_tokens_seen": 258375704, "step": 4121 }, { "epoch": 13.713810316139767, "loss": 0.46344617009162903, "loss_ce": 6.2311119108926505e-06, "loss_iou": 0.1748046875, "loss_num": 0.02294921875, "loss_xval": 0.462890625, "num_input_tokens_seen": 258375704, "step": 4121 }, { "epoch": 13.717138103161398, "grad_norm": 85.24789428710938, "learning_rate": 5e-06, "loss": 0.5754, "num_input_tokens_seen": 258440296, "step": 4122 }, { "epoch": 13.717138103161398, "loss": 0.5267801880836487, "loss_ce": 0.0001688729680608958, "loss_iou": 0.220703125, "loss_num": 0.01708984375, "loss_xval": 0.52734375, "num_input_tokens_seen": 258440296, "step": 4122 }, { "epoch": 13.720465890183029, "grad_norm": 27.215478897094727, "learning_rate": 5e-06, "loss": 0.5289, "num_input_tokens_seen": 258504484, "step": 4123 }, { "epoch": 13.720465890183029, "loss": 0.5699639320373535, "loss_ce": 1.7621881852392107e-05, "loss_iou": 0.2294921875, "loss_num": 0.0224609375, "loss_xval": 0.5703125, "num_input_tokens_seen": 258504484, "step": 4123 }, { "epoch": 13.72379367720466, "grad_norm": 19.103492736816406, "learning_rate": 5e-06, "loss": 0.555, "num_input_tokens_seen": 258566924, "step": 4124 }, { "epoch": 13.72379367720466, "loss": 0.6002248525619507, "loss_ce": 5.09880919707939e-06, "loss_iou": 0.234375, "loss_num": 0.0260009765625, "loss_xval": 0.6015625, "num_input_tokens_seen": 258566924, "step": 4124 }, { "epoch": 13.72712146422629, "grad_norm": 8.503244400024414, "learning_rate": 5e-06, "loss": 0.563, "num_input_tokens_seen": 258629900, "step": 4125 }, { "epoch": 13.72712146422629, "loss": 0.5037890672683716, "loss_ce": 4.879424068349181e-06, "loss_iou": 0.208984375, "loss_num": 0.01708984375, "loss_xval": 0.50390625, "num_input_tokens_seen": 258629900, "step": 4125 }, { "epoch": 13.73044925124792, "grad_norm": 8.70824909210205, "learning_rate": 5e-06, "loss": 0.456, "num_input_tokens_seen": 258692724, "step": 4126 }, { "epoch": 13.73044925124792, "loss": 0.6002252101898193, "loss_ce": 5.526888344320469e-06, "loss_iou": 0.23046875, "loss_num": 0.02783203125, "loss_xval": 0.6015625, "num_input_tokens_seen": 258692724, "step": 4126 }, { "epoch": 13.733777038269551, "grad_norm": 14.168745040893555, "learning_rate": 5e-06, "loss": 0.5027, "num_input_tokens_seen": 258754944, "step": 4127 }, { "epoch": 13.733777038269551, "loss": 0.4983527362346649, "loss_ce": 6.855873948552471e-07, "loss_iou": 0.1796875, "loss_num": 0.0277099609375, "loss_xval": 0.498046875, "num_input_tokens_seen": 258754944, "step": 4127 }, { "epoch": 13.737104825291182, "grad_norm": 15.298704147338867, "learning_rate": 5e-06, "loss": 0.6115, "num_input_tokens_seen": 258816592, "step": 4128 }, { "epoch": 13.737104825291182, "loss": 0.7449043989181519, "loss_ce": 8.112772320600925e-07, "loss_iou": 0.28125, "loss_num": 0.03662109375, "loss_xval": 0.74609375, "num_input_tokens_seen": 258816592, "step": 4128 }, { "epoch": 13.740432612312812, "grad_norm": 28.55160903930664, "learning_rate": 5e-06, "loss": 0.4143, "num_input_tokens_seen": 258878900, "step": 4129 }, { "epoch": 13.740432612312812, "loss": 0.3935388922691345, "loss_ce": 1.4733976968273055e-05, "loss_iou": 0.0986328125, "loss_num": 0.039306640625, "loss_xval": 0.392578125, "num_input_tokens_seen": 258878900, "step": 4129 }, { "epoch": 13.743760399334443, "grad_norm": 37.17100143432617, "learning_rate": 5e-06, "loss": 0.5985, "num_input_tokens_seen": 258942572, "step": 4130 }, { "epoch": 13.743760399334443, "loss": 0.3404959440231323, "loss_ce": 1.1328882465022616e-05, "loss_iou": 0.1484375, "loss_num": 0.00860595703125, "loss_xval": 0.33984375, "num_input_tokens_seen": 258942572, "step": 4130 }, { "epoch": 13.747088186356073, "grad_norm": 28.381826400756836, "learning_rate": 5e-06, "loss": 0.7216, "num_input_tokens_seen": 259004192, "step": 4131 }, { "epoch": 13.747088186356073, "loss": 0.8831803798675537, "loss_ce": 1.6599676655459916e-06, "loss_iou": 0.34375, "loss_num": 0.038818359375, "loss_xval": 0.8828125, "num_input_tokens_seen": 259004192, "step": 4131 }, { "epoch": 13.750415973377704, "grad_norm": 18.126379013061523, "learning_rate": 5e-06, "loss": 0.4222, "num_input_tokens_seen": 259066504, "step": 4132 }, { "epoch": 13.750415973377704, "loss": 0.42548638582229614, "loss_ce": 1.0291502803738695e-05, "loss_iou": 0.1376953125, "loss_num": 0.0299072265625, "loss_xval": 0.42578125, "num_input_tokens_seen": 259066504, "step": 4132 }, { "epoch": 13.753743760399335, "grad_norm": 17.986480712890625, "learning_rate": 5e-06, "loss": 0.576, "num_input_tokens_seen": 259129596, "step": 4133 }, { "epoch": 13.753743760399335, "loss": 0.7285751104354858, "loss_ce": 0.0011581132421270013, "loss_iou": 0.27734375, "loss_num": 0.034423828125, "loss_xval": 0.7265625, "num_input_tokens_seen": 259129596, "step": 4133 }, { "epoch": 13.757071547420965, "grad_norm": 20.216264724731445, "learning_rate": 5e-06, "loss": 0.4167, "num_input_tokens_seen": 259192896, "step": 4134 }, { "epoch": 13.757071547420965, "loss": 0.4848650097846985, "loss_ce": 1.7008343320412678e-06, "loss_iou": 0.154296875, "loss_num": 0.03515625, "loss_xval": 0.484375, "num_input_tokens_seen": 259192896, "step": 4134 }, { "epoch": 13.760399334442596, "grad_norm": 23.975759506225586, "learning_rate": 5e-06, "loss": 0.4091, "num_input_tokens_seen": 259256208, "step": 4135 }, { "epoch": 13.760399334442596, "loss": 0.35009893774986267, "loss_ce": 1.2538218925328692e-06, "loss_iou": 0.12890625, "loss_num": 0.0184326171875, "loss_xval": 0.349609375, "num_input_tokens_seen": 259256208, "step": 4135 }, { "epoch": 13.763727121464226, "grad_norm": 26.60926628112793, "learning_rate": 5e-06, "loss": 0.372, "num_input_tokens_seen": 259319164, "step": 4136 }, { "epoch": 13.763727121464226, "loss": 0.38587069511413574, "loss_ce": 6.453223249991424e-06, "loss_iou": 0.1474609375, "loss_num": 0.01806640625, "loss_xval": 0.38671875, "num_input_tokens_seen": 259319164, "step": 4136 }, { "epoch": 13.767054908485857, "grad_norm": 24.609804153442383, "learning_rate": 5e-06, "loss": 0.4815, "num_input_tokens_seen": 259381388, "step": 4137 }, { "epoch": 13.767054908485857, "loss": 0.46875232458114624, "loss_ce": 2.3318236799241276e-06, "loss_iou": 0.1806640625, "loss_num": 0.021484375, "loss_xval": 0.46875, "num_input_tokens_seen": 259381388, "step": 4137 }, { "epoch": 13.770382695507488, "grad_norm": 13.758931159973145, "learning_rate": 5e-06, "loss": 0.3749, "num_input_tokens_seen": 259444460, "step": 4138 }, { "epoch": 13.770382695507488, "loss": 0.29412978887557983, "loss_ce": 1.383949438604759e-06, "loss_iou": 0.09619140625, "loss_num": 0.020263671875, "loss_xval": 0.294921875, "num_input_tokens_seen": 259444460, "step": 4138 }, { "epoch": 13.773710482529118, "grad_norm": 14.464776039123535, "learning_rate": 5e-06, "loss": 0.6532, "num_input_tokens_seen": 259508968, "step": 4139 }, { "epoch": 13.773710482529118, "loss": 0.6091318130493164, "loss_ce": 9.1477681962715e-07, "loss_iou": 0.2421875, "loss_num": 0.024658203125, "loss_xval": 0.609375, "num_input_tokens_seen": 259508968, "step": 4139 }, { "epoch": 13.777038269550749, "grad_norm": 10.780937194824219, "learning_rate": 5e-06, "loss": 0.483, "num_input_tokens_seen": 259572512, "step": 4140 }, { "epoch": 13.777038269550749, "loss": 0.5982675552368164, "loss_ce": 9.841752444117446e-07, "loss_iou": 0.220703125, "loss_num": 0.03173828125, "loss_xval": 0.59765625, "num_input_tokens_seen": 259572512, "step": 4140 }, { "epoch": 13.78036605657238, "grad_norm": 16.601978302001953, "learning_rate": 5e-06, "loss": 0.4752, "num_input_tokens_seen": 259634616, "step": 4141 }, { "epoch": 13.78036605657238, "loss": 0.5981664657592773, "loss_ce": 0.00014399025531020015, "loss_iou": 0.2119140625, "loss_num": 0.03515625, "loss_xval": 0.59765625, "num_input_tokens_seen": 259634616, "step": 4141 }, { "epoch": 13.78369384359401, "grad_norm": 20.26970672607422, "learning_rate": 5e-06, "loss": 0.5202, "num_input_tokens_seen": 259697940, "step": 4142 }, { "epoch": 13.78369384359401, "loss": 0.42225173115730286, "loss_ce": 1.0517720511415973e-05, "loss_iou": 0.185546875, "loss_num": 0.0101318359375, "loss_xval": 0.421875, "num_input_tokens_seen": 259697940, "step": 4142 }, { "epoch": 13.78702163061564, "grad_norm": 19.267318725585938, "learning_rate": 5e-06, "loss": 0.5561, "num_input_tokens_seen": 259761032, "step": 4143 }, { "epoch": 13.78702163061564, "loss": 0.6392995119094849, "loss_ce": 0.0005971319042146206, "loss_iou": 0.2314453125, "loss_num": 0.03515625, "loss_xval": 0.640625, "num_input_tokens_seen": 259761032, "step": 4143 }, { "epoch": 13.790349417637271, "grad_norm": 17.45833396911621, "learning_rate": 5e-06, "loss": 0.398, "num_input_tokens_seen": 259823232, "step": 4144 }, { "epoch": 13.790349417637271, "loss": 0.275579035282135, "loss_ce": 5.31115119883907e-06, "loss_iou": 0.1083984375, "loss_num": 0.01171875, "loss_xval": 0.275390625, "num_input_tokens_seen": 259823232, "step": 4144 }, { "epoch": 13.793677204658902, "grad_norm": 37.33243179321289, "learning_rate": 5e-06, "loss": 0.5921, "num_input_tokens_seen": 259886452, "step": 4145 }, { "epoch": 13.793677204658902, "loss": 0.47204649448394775, "loss_ce": 5.850189950251661e-07, "loss_iou": 0.1767578125, "loss_num": 0.0238037109375, "loss_xval": 0.47265625, "num_input_tokens_seen": 259886452, "step": 4145 }, { "epoch": 13.797004991680533, "grad_norm": 32.37745666503906, "learning_rate": 5e-06, "loss": 0.4031, "num_input_tokens_seen": 259948904, "step": 4146 }, { "epoch": 13.797004991680533, "loss": 0.4423835277557373, "loss_ce": 7.260487109306268e-07, "loss_iou": 0.185546875, "loss_num": 0.0142822265625, "loss_xval": 0.44140625, "num_input_tokens_seen": 259948904, "step": 4146 }, { "epoch": 13.800332778702163, "grad_norm": 18.94452476501465, "learning_rate": 5e-06, "loss": 0.4573, "num_input_tokens_seen": 260012356, "step": 4147 }, { "epoch": 13.800332778702163, "loss": 0.46966734528541565, "loss_ce": 1.8332575564272702e-06, "loss_iou": 0.205078125, "loss_num": 0.01190185546875, "loss_xval": 0.46875, "num_input_tokens_seen": 260012356, "step": 4147 }, { "epoch": 13.803660565723794, "grad_norm": 12.458765029907227, "learning_rate": 5e-06, "loss": 0.3854, "num_input_tokens_seen": 260076292, "step": 4148 }, { "epoch": 13.803660565723794, "loss": 0.5068869590759277, "loss_ce": 5.1017697842326015e-05, "loss_iou": 0.216796875, "loss_num": 0.0146484375, "loss_xval": 0.5078125, "num_input_tokens_seen": 260076292, "step": 4148 }, { "epoch": 13.806988352745424, "grad_norm": 17.685161590576172, "learning_rate": 5e-06, "loss": 0.2988, "num_input_tokens_seen": 260137976, "step": 4149 }, { "epoch": 13.806988352745424, "loss": 0.3355743885040283, "loss_ce": 3.10162567984662e-06, "loss_iou": 0.1259765625, "loss_num": 0.0167236328125, "loss_xval": 0.3359375, "num_input_tokens_seen": 260137976, "step": 4149 }, { "epoch": 13.810316139767055, "grad_norm": 25.503149032592773, "learning_rate": 5e-06, "loss": 0.5004, "num_input_tokens_seen": 260201396, "step": 4150 }, { "epoch": 13.810316139767055, "loss": 0.5567536950111389, "loss_ce": 0.00011304580402793363, "loss_iou": 0.22265625, "loss_num": 0.0223388671875, "loss_xval": 0.5546875, "num_input_tokens_seen": 260201396, "step": 4150 }, { "epoch": 13.813643926788686, "grad_norm": 40.2658576965332, "learning_rate": 5e-06, "loss": 0.4138, "num_input_tokens_seen": 260264396, "step": 4151 }, { "epoch": 13.813643926788686, "loss": 0.2669002413749695, "loss_ce": 5.452123878058046e-05, "loss_iou": 0.09130859375, "loss_num": 0.016845703125, "loss_xval": 0.267578125, "num_input_tokens_seen": 260264396, "step": 4151 }, { "epoch": 13.816971713810316, "grad_norm": 54.08342361450195, "learning_rate": 5e-06, "loss": 0.5941, "num_input_tokens_seen": 260327836, "step": 4152 }, { "epoch": 13.816971713810316, "loss": 0.6112378835678101, "loss_ce": 1.3049984772806056e-06, "loss_iou": 0.234375, "loss_num": 0.0286865234375, "loss_xval": 0.609375, "num_input_tokens_seen": 260327836, "step": 4152 }, { "epoch": 13.820299500831947, "grad_norm": 36.5843620300293, "learning_rate": 5e-06, "loss": 0.364, "num_input_tokens_seen": 260389608, "step": 4153 }, { "epoch": 13.820299500831947, "loss": 0.3364275395870209, "loss_ce": 1.7697987004794413e-06, "loss_iou": 0.12451171875, "loss_num": 0.017578125, "loss_xval": 0.3359375, "num_input_tokens_seen": 260389608, "step": 4153 }, { "epoch": 13.823627287853578, "grad_norm": 11.26008415222168, "learning_rate": 5e-06, "loss": 0.6221, "num_input_tokens_seen": 260451840, "step": 4154 }, { "epoch": 13.823627287853578, "loss": 0.4192509651184082, "loss_ce": 4.819352170670754e-07, "loss_iou": 0.166015625, "loss_num": 0.0174560546875, "loss_xval": 0.419921875, "num_input_tokens_seen": 260451840, "step": 4154 }, { "epoch": 13.826955074875208, "grad_norm": 27.00114631652832, "learning_rate": 5e-06, "loss": 0.3773, "num_input_tokens_seen": 260512916, "step": 4155 }, { "epoch": 13.826955074875208, "loss": 0.46809375286102295, "loss_ce": 0.0007170668104663491, "loss_iou": 0.19140625, "loss_num": 0.0167236328125, "loss_xval": 0.466796875, "num_input_tokens_seen": 260512916, "step": 4155 }, { "epoch": 13.830282861896839, "grad_norm": 18.49332046508789, "learning_rate": 5e-06, "loss": 0.4637, "num_input_tokens_seen": 260576624, "step": 4156 }, { "epoch": 13.830282861896839, "loss": 0.37524574995040894, "loss_ce": 1.618271767256374e-06, "loss_iou": 0.1435546875, "loss_num": 0.017822265625, "loss_xval": 0.375, "num_input_tokens_seen": 260576624, "step": 4156 }, { "epoch": 13.83361064891847, "grad_norm": 9.001463890075684, "learning_rate": 5e-06, "loss": 0.424, "num_input_tokens_seen": 260639764, "step": 4157 }, { "epoch": 13.83361064891847, "loss": 0.5342144966125488, "loss_ce": 3.4764929296215996e-05, "loss_iou": 0.2060546875, "loss_num": 0.0245361328125, "loss_xval": 0.53515625, "num_input_tokens_seen": 260639764, "step": 4157 }, { "epoch": 13.8369384359401, "grad_norm": 39.20524978637695, "learning_rate": 5e-06, "loss": 0.3862, "num_input_tokens_seen": 260703392, "step": 4158 }, { "epoch": 13.8369384359401, "loss": 0.41677916049957275, "loss_ce": 5.98414771957323e-07, "loss_iou": 0.1435546875, "loss_num": 0.026123046875, "loss_xval": 0.416015625, "num_input_tokens_seen": 260703392, "step": 4158 }, { "epoch": 13.84026622296173, "grad_norm": 21.14249610900879, "learning_rate": 5e-06, "loss": 0.4462, "num_input_tokens_seen": 260766632, "step": 4159 }, { "epoch": 13.84026622296173, "loss": 0.44205182790756226, "loss_ce": 0.00015729958249721676, "loss_iou": 0.177734375, "loss_num": 0.0172119140625, "loss_xval": 0.44140625, "num_input_tokens_seen": 260766632, "step": 4159 }, { "epoch": 13.843594009983361, "grad_norm": 11.38596248626709, "learning_rate": 5e-06, "loss": 0.3551, "num_input_tokens_seen": 260829384, "step": 4160 }, { "epoch": 13.843594009983361, "loss": 0.3455911874771118, "loss_ce": 1.0130413102160674e-05, "loss_iou": 0.109375, "loss_num": 0.0252685546875, "loss_xval": 0.345703125, "num_input_tokens_seen": 260829384, "step": 4160 }, { "epoch": 13.846921797004992, "grad_norm": 14.194250106811523, "learning_rate": 5e-06, "loss": 0.4558, "num_input_tokens_seen": 260892692, "step": 4161 }, { "epoch": 13.846921797004992, "loss": 0.6096424460411072, "loss_ce": 2.33037062571384e-05, "loss_iou": 0.25390625, "loss_num": 0.019775390625, "loss_xval": 0.609375, "num_input_tokens_seen": 260892692, "step": 4161 }, { "epoch": 13.850249584026622, "grad_norm": 8.901708602905273, "learning_rate": 5e-06, "loss": 0.5242, "num_input_tokens_seen": 260956004, "step": 4162 }, { "epoch": 13.850249584026622, "loss": 0.5508407950401306, "loss_ce": 0.0013412985717877746, "loss_iou": 0.185546875, "loss_num": 0.03564453125, "loss_xval": 0.55078125, "num_input_tokens_seen": 260956004, "step": 4162 }, { "epoch": 13.853577371048253, "grad_norm": 9.50367259979248, "learning_rate": 5e-06, "loss": 0.4539, "num_input_tokens_seen": 261019180, "step": 4163 }, { "epoch": 13.853577371048253, "loss": 0.44399166107177734, "loss_ce": 0.0001439961779396981, "loss_iou": 0.1865234375, "loss_num": 0.01422119140625, "loss_xval": 0.443359375, "num_input_tokens_seen": 261019180, "step": 4163 }, { "epoch": 13.856905158069884, "grad_norm": 13.04059886932373, "learning_rate": 5e-06, "loss": 0.5548, "num_input_tokens_seen": 261082696, "step": 4164 }, { "epoch": 13.856905158069884, "loss": 0.5068386197090149, "loss_ce": 2.6628422347130254e-06, "loss_iou": 0.1982421875, "loss_num": 0.022216796875, "loss_xval": 0.5078125, "num_input_tokens_seen": 261082696, "step": 4164 }, { "epoch": 13.860232945091514, "grad_norm": 20.135576248168945, "learning_rate": 5e-06, "loss": 0.5038, "num_input_tokens_seen": 261144784, "step": 4165 }, { "epoch": 13.860232945091514, "loss": 0.4175124764442444, "loss_ce": 1.4839307596048457e-06, "loss_iou": 0.1767578125, "loss_num": 0.0125732421875, "loss_xval": 0.41796875, "num_input_tokens_seen": 261144784, "step": 4165 }, { "epoch": 13.863560732113145, "grad_norm": 15.200905799865723, "learning_rate": 5e-06, "loss": 0.3569, "num_input_tokens_seen": 261207380, "step": 4166 }, { "epoch": 13.863560732113145, "loss": 0.40321385860443115, "loss_ce": 1.5573752534692176e-05, "loss_iou": 0.1591796875, "loss_num": 0.01708984375, "loss_xval": 0.40234375, "num_input_tokens_seen": 261207380, "step": 4166 }, { "epoch": 13.866888519134775, "grad_norm": 15.928007125854492, "learning_rate": 5e-06, "loss": 0.3808, "num_input_tokens_seen": 261269096, "step": 4167 }, { "epoch": 13.866888519134775, "loss": 0.47064271569252014, "loss_ce": 6.498075890704058e-07, "loss_iou": 0.1875, "loss_num": 0.01904296875, "loss_xval": 0.470703125, "num_input_tokens_seen": 261269096, "step": 4167 }, { "epoch": 13.870216306156406, "grad_norm": 7.179503917694092, "learning_rate": 5e-06, "loss": 0.4821, "num_input_tokens_seen": 261330172, "step": 4168 }, { "epoch": 13.870216306156406, "loss": 0.5345505475997925, "loss_ce": 4.625876954378327e-06, "loss_iou": 0.185546875, "loss_num": 0.032470703125, "loss_xval": 0.53515625, "num_input_tokens_seen": 261330172, "step": 4168 }, { "epoch": 13.873544093178037, "grad_norm": 10.414982795715332, "learning_rate": 5e-06, "loss": 0.517, "num_input_tokens_seen": 261392488, "step": 4169 }, { "epoch": 13.873544093178037, "loss": 0.4621618986129761, "loss_ce": 3.6924211599398404e-06, "loss_iou": 0.16015625, "loss_num": 0.0283203125, "loss_xval": 0.462890625, "num_input_tokens_seen": 261392488, "step": 4169 }, { "epoch": 13.876871880199667, "grad_norm": 13.206289291381836, "learning_rate": 5e-06, "loss": 0.4807, "num_input_tokens_seen": 261454804, "step": 4170 }, { "epoch": 13.876871880199667, "loss": 0.6068129539489746, "loss_ce": 1.3770036275673192e-06, "loss_iou": 0.23046875, "loss_num": 0.0291748046875, "loss_xval": 0.60546875, "num_input_tokens_seen": 261454804, "step": 4170 }, { "epoch": 13.880199667221298, "grad_norm": 17.491151809692383, "learning_rate": 5e-06, "loss": 0.4421, "num_input_tokens_seen": 261517908, "step": 4171 }, { "epoch": 13.880199667221298, "loss": 0.4669951796531677, "loss_ce": 1.5229624295898248e-05, "loss_iou": 0.1875, "loss_num": 0.0185546875, "loss_xval": 0.466796875, "num_input_tokens_seen": 261517908, "step": 4171 }, { "epoch": 13.883527454242929, "grad_norm": 8.45610237121582, "learning_rate": 5e-06, "loss": 0.589, "num_input_tokens_seen": 261581388, "step": 4172 }, { "epoch": 13.883527454242929, "loss": 0.5972967147827148, "loss_ce": 6.636100806645118e-06, "loss_iou": 0.228515625, "loss_num": 0.028076171875, "loss_xval": 0.59765625, "num_input_tokens_seen": 261581388, "step": 4172 }, { "epoch": 13.88685524126456, "grad_norm": 11.545846939086914, "learning_rate": 5e-06, "loss": 0.3578, "num_input_tokens_seen": 261643080, "step": 4173 }, { "epoch": 13.88685524126456, "loss": 0.31348562240600586, "loss_ce": 9.092224900086876e-06, "loss_iou": 0.09423828125, "loss_num": 0.025146484375, "loss_xval": 0.3125, "num_input_tokens_seen": 261643080, "step": 4173 }, { "epoch": 13.89018302828619, "grad_norm": 29.71828842163086, "learning_rate": 5e-06, "loss": 0.5903, "num_input_tokens_seen": 261706924, "step": 4174 }, { "epoch": 13.89018302828619, "loss": 0.6342206597328186, "loss_ce": 4.3492768782016356e-06, "loss_iou": 0.263671875, "loss_num": 0.021484375, "loss_xval": 0.6328125, "num_input_tokens_seen": 261706924, "step": 4174 }, { "epoch": 13.89351081530782, "grad_norm": 28.362262725830078, "learning_rate": 5e-06, "loss": 0.4706, "num_input_tokens_seen": 261769552, "step": 4175 }, { "epoch": 13.89351081530782, "loss": 0.4404001235961914, "loss_ce": 9.261647164748865e-07, "loss_iou": 0.181640625, "loss_num": 0.015380859375, "loss_xval": 0.439453125, "num_input_tokens_seen": 261769552, "step": 4175 }, { "epoch": 13.896838602329451, "grad_norm": 19.71221160888672, "learning_rate": 5e-06, "loss": 0.6048, "num_input_tokens_seen": 261834004, "step": 4176 }, { "epoch": 13.896838602329451, "loss": 0.29052841663360596, "loss_ce": 1.0931541964964708e-06, "loss_iou": 0.1005859375, "loss_num": 0.017822265625, "loss_xval": 0.291015625, "num_input_tokens_seen": 261834004, "step": 4176 }, { "epoch": 13.900166389351082, "grad_norm": 14.413411140441895, "learning_rate": 5e-06, "loss": 0.416, "num_input_tokens_seen": 261896792, "step": 4177 }, { "epoch": 13.900166389351082, "loss": 0.35779571533203125, "loss_ce": 7.63965908845421e-06, "loss_iou": 0.1318359375, "loss_num": 0.018798828125, "loss_xval": 0.357421875, "num_input_tokens_seen": 261896792, "step": 4177 }, { "epoch": 13.903494176372712, "grad_norm": 7.369339942932129, "learning_rate": 5e-06, "loss": 0.6636, "num_input_tokens_seen": 261959184, "step": 4178 }, { "epoch": 13.903494176372712, "loss": 0.7493350505828857, "loss_ce": 0.0015323145780712366, "loss_iou": 0.279296875, "loss_num": 0.03759765625, "loss_xval": 0.74609375, "num_input_tokens_seen": 261959184, "step": 4178 }, { "epoch": 13.906821963394343, "grad_norm": 11.74126148223877, "learning_rate": 5e-06, "loss": 0.5504, "num_input_tokens_seen": 262021780, "step": 4179 }, { "epoch": 13.906821963394343, "loss": 0.5264617800712585, "loss_ce": 0.001559424097649753, "loss_iou": 0.181640625, "loss_num": 0.0322265625, "loss_xval": 0.5234375, "num_input_tokens_seen": 262021780, "step": 4179 }, { "epoch": 13.910149750415973, "grad_norm": 20.901283264160156, "learning_rate": 5e-06, "loss": 0.4908, "num_input_tokens_seen": 262085140, "step": 4180 }, { "epoch": 13.910149750415973, "loss": 0.303406298160553, "loss_ce": 5.220409775574808e-07, "loss_iou": 0.1240234375, "loss_num": 0.0111083984375, "loss_xval": 0.302734375, "num_input_tokens_seen": 262085140, "step": 4180 }, { "epoch": 13.913477537437604, "grad_norm": 24.705482482910156, "learning_rate": 5e-06, "loss": 0.4245, "num_input_tokens_seen": 262147624, "step": 4181 }, { "epoch": 13.913477537437604, "loss": 0.4911850094795227, "loss_ce": 0.00013427484373096377, "loss_iou": 0.1796875, "loss_num": 0.0262451171875, "loss_xval": 0.490234375, "num_input_tokens_seen": 262147624, "step": 4181 }, { "epoch": 13.916805324459235, "grad_norm": 10.806558609008789, "learning_rate": 5e-06, "loss": 0.4776, "num_input_tokens_seen": 262211152, "step": 4182 }, { "epoch": 13.916805324459235, "loss": 0.5557447671890259, "loss_ce": 0.00011119159171357751, "loss_iou": 0.2490234375, "loss_num": 0.01165771484375, "loss_xval": 0.5546875, "num_input_tokens_seen": 262211152, "step": 4182 }, { "epoch": 13.920133111480865, "grad_norm": 7.101350784301758, "learning_rate": 5e-06, "loss": 0.2665, "num_input_tokens_seen": 262273556, "step": 4183 }, { "epoch": 13.920133111480865, "loss": 0.33929675817489624, "loss_ce": 2.317368398507824e-06, "loss_iou": 0.12353515625, "loss_num": 0.0184326171875, "loss_xval": 0.33984375, "num_input_tokens_seen": 262273556, "step": 4183 }, { "epoch": 13.923460898502496, "grad_norm": 16.265722274780273, "learning_rate": 5e-06, "loss": 0.5496, "num_input_tokens_seen": 262337092, "step": 4184 }, { "epoch": 13.923460898502496, "loss": 0.7234882712364197, "loss_ce": 8.04023784439778e-06, "loss_iou": 0.294921875, "loss_num": 0.026611328125, "loss_xval": 0.72265625, "num_input_tokens_seen": 262337092, "step": 4184 }, { "epoch": 13.926788685524127, "grad_norm": 17.4716854095459, "learning_rate": 5e-06, "loss": 0.5137, "num_input_tokens_seen": 262401932, "step": 4185 }, { "epoch": 13.926788685524127, "loss": 0.4861641526222229, "loss_ce": 0.0011788180563598871, "loss_iou": 0.1689453125, "loss_num": 0.0294189453125, "loss_xval": 0.484375, "num_input_tokens_seen": 262401932, "step": 4185 }, { "epoch": 13.930116472545757, "grad_norm": 31.211177825927734, "learning_rate": 5e-06, "loss": 0.454, "num_input_tokens_seen": 262464116, "step": 4186 }, { "epoch": 13.930116472545757, "loss": 0.4623167812824249, "loss_ce": 5.983335995551897e-06, "loss_iou": 0.197265625, "loss_num": 0.013671875, "loss_xval": 0.462890625, "num_input_tokens_seen": 262464116, "step": 4186 }, { "epoch": 13.933444259567388, "grad_norm": 36.0714225769043, "learning_rate": 5e-06, "loss": 0.4549, "num_input_tokens_seen": 262525688, "step": 4187 }, { "epoch": 13.933444259567388, "loss": 0.3329482674598694, "loss_ce": 1.4729369013366522e-06, "loss_iou": 0.1083984375, "loss_num": 0.023193359375, "loss_xval": 0.33203125, "num_input_tokens_seen": 262525688, "step": 4187 }, { "epoch": 13.936772046589018, "grad_norm": 24.010040283203125, "learning_rate": 5e-06, "loss": 0.5828, "num_input_tokens_seen": 262588424, "step": 4188 }, { "epoch": 13.936772046589018, "loss": 0.5183777809143066, "loss_ce": 6.156031304271892e-06, "loss_iou": 0.2138671875, "loss_num": 0.0179443359375, "loss_xval": 0.51953125, "num_input_tokens_seen": 262588424, "step": 4188 }, { "epoch": 13.940099833610649, "grad_norm": 8.100127220153809, "learning_rate": 5e-06, "loss": 0.3211, "num_input_tokens_seen": 262650544, "step": 4189 }, { "epoch": 13.940099833610649, "loss": 0.42334598302841187, "loss_ce": 6.135181592981098e-06, "loss_iou": 0.16015625, "loss_num": 0.020751953125, "loss_xval": 0.423828125, "num_input_tokens_seen": 262650544, "step": 4189 }, { "epoch": 13.94342762063228, "grad_norm": 11.229194641113281, "learning_rate": 5e-06, "loss": 0.3143, "num_input_tokens_seen": 262711868, "step": 4190 }, { "epoch": 13.94342762063228, "loss": 0.3173248767852783, "loss_ce": 3.1096797101781704e-06, "loss_iou": 0.115234375, "loss_num": 0.017333984375, "loss_xval": 0.31640625, "num_input_tokens_seen": 262711868, "step": 4190 }, { "epoch": 13.94675540765391, "grad_norm": 10.681403160095215, "learning_rate": 5e-06, "loss": 0.3796, "num_input_tokens_seen": 262774156, "step": 4191 }, { "epoch": 13.94675540765391, "loss": 0.5111153721809387, "loss_ce": 0.0004952649469487369, "loss_iou": 0.1884765625, "loss_num": 0.0264892578125, "loss_xval": 0.51171875, "num_input_tokens_seen": 262774156, "step": 4191 }, { "epoch": 13.95008319467554, "grad_norm": 26.87320327758789, "learning_rate": 5e-06, "loss": 0.3641, "num_input_tokens_seen": 262837480, "step": 4192 }, { "epoch": 13.95008319467554, "loss": 0.3777617812156677, "loss_ce": 1.5182764400378801e-05, "loss_iou": 0.115234375, "loss_num": 0.029541015625, "loss_xval": 0.376953125, "num_input_tokens_seen": 262837480, "step": 4192 }, { "epoch": 13.953410981697171, "grad_norm": 32.866668701171875, "learning_rate": 5e-06, "loss": 0.5139, "num_input_tokens_seen": 262900612, "step": 4193 }, { "epoch": 13.953410981697171, "loss": 0.4974377751350403, "loss_ce": 1.263889430447307e-06, "loss_iou": 0.2021484375, "loss_num": 0.018798828125, "loss_xval": 0.498046875, "num_input_tokens_seen": 262900612, "step": 4193 }, { "epoch": 13.956738768718802, "grad_norm": 34.96686553955078, "learning_rate": 5e-06, "loss": 0.4507, "num_input_tokens_seen": 262964064, "step": 4194 }, { "epoch": 13.956738768718802, "loss": 0.3602965474128723, "loss_ce": 6.012279300193768e-06, "loss_iou": 0.1474609375, "loss_num": 0.0133056640625, "loss_xval": 0.359375, "num_input_tokens_seen": 262964064, "step": 4194 }, { "epoch": 13.960066555740433, "grad_norm": 28.281095504760742, "learning_rate": 5e-06, "loss": 0.5331, "num_input_tokens_seen": 263027472, "step": 4195 }, { "epoch": 13.960066555740433, "loss": 0.5915960669517517, "loss_ce": 0.0013860954204574227, "loss_iou": 0.234375, "loss_num": 0.024169921875, "loss_xval": 0.58984375, "num_input_tokens_seen": 263027472, "step": 4195 }, { "epoch": 13.963394342762063, "grad_norm": 28.499845504760742, "learning_rate": 5e-06, "loss": 0.5232, "num_input_tokens_seen": 263090776, "step": 4196 }, { "epoch": 13.963394342762063, "loss": 0.5542546510696411, "loss_ce": 0.00017747058882378042, "loss_iou": 0.228515625, "loss_num": 0.0191650390625, "loss_xval": 0.5546875, "num_input_tokens_seen": 263090776, "step": 4196 }, { "epoch": 13.966722129783694, "grad_norm": 17.477785110473633, "learning_rate": 5e-06, "loss": 0.5421, "num_input_tokens_seen": 263153384, "step": 4197 }, { "epoch": 13.966722129783694, "loss": 0.598820149898529, "loss_ce": 4.2306674004066736e-06, "loss_iou": 0.205078125, "loss_num": 0.037841796875, "loss_xval": 0.59765625, "num_input_tokens_seen": 263153384, "step": 4197 }, { "epoch": 13.970049916805324, "grad_norm": 10.232625961303711, "learning_rate": 5e-06, "loss": 0.4846, "num_input_tokens_seen": 263216040, "step": 4198 }, { "epoch": 13.970049916805324, "loss": 0.5003185272216797, "loss_ce": 1.3361682249524165e-05, "loss_iou": 0.1845703125, "loss_num": 0.0260009765625, "loss_xval": 0.5, "num_input_tokens_seen": 263216040, "step": 4198 }, { "epoch": 13.973377703826955, "grad_norm": 13.773412704467773, "learning_rate": 5e-06, "loss": 0.321, "num_input_tokens_seen": 263279476, "step": 4199 }, { "epoch": 13.973377703826955, "loss": 0.2312287539243698, "loss_ce": 2.759552444331348e-05, "loss_iou": 0.1005859375, "loss_num": 0.005950927734375, "loss_xval": 0.2314453125, "num_input_tokens_seen": 263279476, "step": 4199 }, { "epoch": 13.976705490848586, "grad_norm": 8.4588623046875, "learning_rate": 5e-06, "loss": 0.3861, "num_input_tokens_seen": 263342088, "step": 4200 }, { "epoch": 13.976705490848586, "loss": 0.4108920693397522, "loss_ce": 3.390924348423141e-06, "loss_iou": 0.1337890625, "loss_num": 0.0286865234375, "loss_xval": 0.41015625, "num_input_tokens_seen": 263342088, "step": 4200 }, { "epoch": 13.980033277870216, "grad_norm": 12.790979385375977, "learning_rate": 5e-06, "loss": 0.3106, "num_input_tokens_seen": 263402364, "step": 4201 }, { "epoch": 13.980033277870216, "loss": 0.2775770127773285, "loss_ce": 0.0001111864039557986, "loss_iou": 0.1044921875, "loss_num": 0.0137939453125, "loss_xval": 0.27734375, "num_input_tokens_seen": 263402364, "step": 4201 }, { "epoch": 13.983361064891847, "grad_norm": 8.041509628295898, "learning_rate": 5e-06, "loss": 0.419, "num_input_tokens_seen": 263465016, "step": 4202 }, { "epoch": 13.983361064891847, "loss": 0.34456050395965576, "loss_ce": 1.7064625353668816e-05, "loss_iou": 0.1083984375, "loss_num": 0.0255126953125, "loss_xval": 0.34375, "num_input_tokens_seen": 263465016, "step": 4202 }, { "epoch": 13.986688851913478, "grad_norm": 7.935578346252441, "learning_rate": 5e-06, "loss": 0.6199, "num_input_tokens_seen": 263528692, "step": 4203 }, { "epoch": 13.986688851913478, "loss": 0.3575460612773895, "loss_ce": 2.101908876284142e-06, "loss_iou": 0.138671875, "loss_num": 0.015869140625, "loss_xval": 0.357421875, "num_input_tokens_seen": 263528692, "step": 4203 }, { "epoch": 13.990016638935108, "grad_norm": 8.420280456542969, "learning_rate": 5e-06, "loss": 0.5649, "num_input_tokens_seen": 263591600, "step": 4204 }, { "epoch": 13.990016638935108, "loss": 0.7557424306869507, "loss_ce": 5.16831414643093e-06, "loss_iou": 0.25390625, "loss_num": 0.049560546875, "loss_xval": 0.75390625, "num_input_tokens_seen": 263591600, "step": 4204 }, { "epoch": 13.993344425956739, "grad_norm": 11.127486228942871, "learning_rate": 5e-06, "loss": 0.3589, "num_input_tokens_seen": 263653052, "step": 4205 }, { "epoch": 13.993344425956739, "loss": 0.23529121279716492, "loss_ce": 6.687557743134676e-07, "loss_iou": 0.08447265625, "loss_num": 0.01324462890625, "loss_xval": 0.2353515625, "num_input_tokens_seen": 263653052, "step": 4205 }, { "epoch": 13.99667221297837, "grad_norm": 6.877384662628174, "learning_rate": 5e-06, "loss": 0.2468, "num_input_tokens_seen": 263715488, "step": 4206 }, { "epoch": 13.99667221297837, "loss": 0.1862880140542984, "loss_ce": 8.720118785277009e-06, "loss_iou": 0.058837890625, "loss_num": 0.01373291015625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 263715488, "step": 4206 }, { "epoch": 14.0, "grad_norm": 16.73561668395996, "learning_rate": 5e-06, "loss": 0.5233, "num_input_tokens_seen": 263778524, "step": 4207 }, { "epoch": 14.0, "loss": 0.6190521717071533, "loss_ce": 0.0020172731019556522, "loss_iou": 0.2109375, "loss_num": 0.0390625, "loss_xval": 0.6171875, "num_input_tokens_seen": 263778524, "step": 4207 }, { "epoch": 14.00332778702163, "grad_norm": 24.709754943847656, "learning_rate": 5e-06, "loss": 0.6205, "num_input_tokens_seen": 263843296, "step": 4208 }, { "epoch": 14.00332778702163, "loss": 0.8068971633911133, "loss_ce": 1.240019264514558e-05, "loss_iou": 0.32421875, "loss_num": 0.03173828125, "loss_xval": 0.80859375, "num_input_tokens_seen": 263843296, "step": 4208 }, { "epoch": 14.006655574043261, "grad_norm": 33.059547424316406, "learning_rate": 5e-06, "loss": 0.6025, "num_input_tokens_seen": 263906736, "step": 4209 }, { "epoch": 14.006655574043261, "loss": 0.9326975345611572, "loss_ce": 0.00032456862390972674, "loss_iou": 0.388671875, "loss_num": 0.031005859375, "loss_xval": 0.93359375, "num_input_tokens_seen": 263906736, "step": 4209 }, { "epoch": 14.009983361064892, "grad_norm": 32.75065231323242, "learning_rate": 5e-06, "loss": 0.5479, "num_input_tokens_seen": 263970312, "step": 4210 }, { "epoch": 14.009983361064892, "loss": 0.3469250202178955, "loss_ce": 1.2192776921438053e-06, "loss_iou": 0.146484375, "loss_num": 0.01080322265625, "loss_xval": 0.34765625, "num_input_tokens_seen": 263970312, "step": 4210 }, { "epoch": 14.013311148086522, "grad_norm": 23.350500106811523, "learning_rate": 5e-06, "loss": 0.4029, "num_input_tokens_seen": 264033376, "step": 4211 }, { "epoch": 14.013311148086522, "loss": 0.5439468622207642, "loss_ce": 1.5427989410454757e-06, "loss_iou": 0.2255859375, "loss_num": 0.018798828125, "loss_xval": 0.54296875, "num_input_tokens_seen": 264033376, "step": 4211 }, { "epoch": 14.016638935108153, "grad_norm": 12.35250473022461, "learning_rate": 5e-06, "loss": 0.468, "num_input_tokens_seen": 264095252, "step": 4212 }, { "epoch": 14.016638935108153, "loss": 0.5150014758110046, "loss_ce": 2.107987256749766e-06, "loss_iou": 0.181640625, "loss_num": 0.030517578125, "loss_xval": 0.515625, "num_input_tokens_seen": 264095252, "step": 4212 }, { "epoch": 14.019966722129784, "grad_norm": 8.719674110412598, "learning_rate": 5e-06, "loss": 0.4808, "num_input_tokens_seen": 264158244, "step": 4213 }, { "epoch": 14.019966722129784, "loss": 0.4314887225627899, "loss_ce": 6.956252036616206e-07, "loss_iou": 0.169921875, "loss_num": 0.018310546875, "loss_xval": 0.431640625, "num_input_tokens_seen": 264158244, "step": 4213 }, { "epoch": 14.023294509151414, "grad_norm": 10.30602741241455, "learning_rate": 5e-06, "loss": 0.4796, "num_input_tokens_seen": 264221424, "step": 4214 }, { "epoch": 14.023294509151414, "loss": 0.2733812928199768, "loss_ce": 4.8424158194393385e-06, "loss_iou": 0.11474609375, "loss_num": 0.0087890625, "loss_xval": 0.2734375, "num_input_tokens_seen": 264221424, "step": 4214 }, { "epoch": 14.026622296173045, "grad_norm": 12.00129508972168, "learning_rate": 5e-06, "loss": 0.6749, "num_input_tokens_seen": 264285228, "step": 4215 }, { "epoch": 14.026622296173045, "loss": 0.8930212259292603, "loss_ce": 7.692494546063244e-05, "loss_iou": 0.361328125, "loss_num": 0.0341796875, "loss_xval": 0.89453125, "num_input_tokens_seen": 264285228, "step": 4215 }, { "epoch": 14.029950083194676, "grad_norm": 18.773229598999023, "learning_rate": 5e-06, "loss": 0.6201, "num_input_tokens_seen": 264348928, "step": 4216 }, { "epoch": 14.029950083194676, "loss": 0.5148939490318298, "loss_ce": 1.3984314364279271e-06, "loss_iou": 0.2109375, "loss_num": 0.0186767578125, "loss_xval": 0.515625, "num_input_tokens_seen": 264348928, "step": 4216 }, { "epoch": 14.033277870216306, "grad_norm": 16.370933532714844, "learning_rate": 5e-06, "loss": 0.354, "num_input_tokens_seen": 264410668, "step": 4217 }, { "epoch": 14.033277870216306, "loss": 0.5559133291244507, "loss_ce": 5.100707767269341e-06, "loss_iou": 0.2158203125, "loss_num": 0.0250244140625, "loss_xval": 0.5546875, "num_input_tokens_seen": 264410668, "step": 4217 }, { "epoch": 14.036605657237937, "grad_norm": 11.080531120300293, "learning_rate": 5e-06, "loss": 0.4856, "num_input_tokens_seen": 264472420, "step": 4218 }, { "epoch": 14.036605657237937, "loss": 0.49585413932800293, "loss_ce": 4.51941741630435e-06, "loss_iou": 0.1337890625, "loss_num": 0.0458984375, "loss_xval": 0.49609375, "num_input_tokens_seen": 264472420, "step": 4218 }, { "epoch": 14.039933444259567, "grad_norm": 8.685437202453613, "learning_rate": 5e-06, "loss": 0.3208, "num_input_tokens_seen": 264532360, "step": 4219 }, { "epoch": 14.039933444259567, "loss": 0.25086328387260437, "loss_ce": 8.789220373728313e-06, "loss_iou": 0.03857421875, "loss_num": 0.03466796875, "loss_xval": 0.25, "num_input_tokens_seen": 264532360, "step": 4219 }, { "epoch": 14.043261231281198, "grad_norm": 6.877601146697998, "learning_rate": 5e-06, "loss": 0.3583, "num_input_tokens_seen": 264594000, "step": 4220 }, { "epoch": 14.043261231281198, "loss": 0.5131865739822388, "loss_ce": 2.999113803525688e-06, "loss_iou": 0.171875, "loss_num": 0.0341796875, "loss_xval": 0.51171875, "num_input_tokens_seen": 264594000, "step": 4220 }, { "epoch": 14.046589018302829, "grad_norm": 7.007058620452881, "learning_rate": 5e-06, "loss": 0.4533, "num_input_tokens_seen": 264657284, "step": 4221 }, { "epoch": 14.046589018302829, "loss": 0.46757376194000244, "loss_ce": 0.0005327538819983602, "loss_iou": 0.1630859375, "loss_num": 0.028076171875, "loss_xval": 0.466796875, "num_input_tokens_seen": 264657284, "step": 4221 }, { "epoch": 14.04991680532446, "grad_norm": 9.986302375793457, "learning_rate": 5e-06, "loss": 0.2004, "num_input_tokens_seen": 264718944, "step": 4222 }, { "epoch": 14.04991680532446, "loss": 0.10154412686824799, "loss_ce": 1.2151211194577627e-05, "loss_iou": 0.0, "loss_num": 0.020263671875, "loss_xval": 0.1015625, "num_input_tokens_seen": 264718944, "step": 4222 }, { "epoch": 14.05324459234609, "grad_norm": 27.668672561645508, "learning_rate": 5e-06, "loss": 0.5234, "num_input_tokens_seen": 264781576, "step": 4223 }, { "epoch": 14.05324459234609, "loss": 0.5112311840057373, "loss_ce": 7.389016900560819e-07, "loss_iou": 0.130859375, "loss_num": 0.0498046875, "loss_xval": 0.51171875, "num_input_tokens_seen": 264781576, "step": 4223 }, { "epoch": 14.05657237936772, "grad_norm": 26.976900100708008, "learning_rate": 5e-06, "loss": 0.5078, "num_input_tokens_seen": 264844020, "step": 4224 }, { "epoch": 14.05657237936772, "loss": 0.48487383127212524, "loss_ce": 0.00031572417356073856, "loss_iou": 0.1982421875, "loss_num": 0.0174560546875, "loss_xval": 0.484375, "num_input_tokens_seen": 264844020, "step": 4224 }, { "epoch": 14.059900166389351, "grad_norm": 9.445661544799805, "learning_rate": 5e-06, "loss": 0.4203, "num_input_tokens_seen": 264907704, "step": 4225 }, { "epoch": 14.059900166389351, "loss": 0.5549349784851074, "loss_ce": 3.4020904422504827e-06, "loss_iou": 0.212890625, "loss_num": 0.02587890625, "loss_xval": 0.5546875, "num_input_tokens_seen": 264907704, "step": 4225 }, { "epoch": 14.063227953410982, "grad_norm": 10.994095802307129, "learning_rate": 5e-06, "loss": 0.4855, "num_input_tokens_seen": 264971360, "step": 4226 }, { "epoch": 14.063227953410982, "loss": 0.46079879999160767, "loss_ce": 7.494293822674081e-05, "loss_iou": 0.1845703125, "loss_num": 0.0185546875, "loss_xval": 0.4609375, "num_input_tokens_seen": 264971360, "step": 4226 }, { "epoch": 14.066555740432612, "grad_norm": 10.911681175231934, "learning_rate": 5e-06, "loss": 0.5948, "num_input_tokens_seen": 265034152, "step": 4227 }, { "epoch": 14.066555740432612, "loss": 0.6547344923019409, "loss_ce": 1.0327545169275254e-05, "loss_iou": 0.265625, "loss_num": 0.0242919921875, "loss_xval": 0.65625, "num_input_tokens_seen": 265034152, "step": 4227 }, { "epoch": 14.069883527454243, "grad_norm": 12.401256561279297, "learning_rate": 5e-06, "loss": 0.354, "num_input_tokens_seen": 265097048, "step": 4228 }, { "epoch": 14.069883527454243, "loss": 0.35523876547813416, "loss_ce": 1.4149353773973417e-05, "loss_iou": 0.125, "loss_num": 0.02099609375, "loss_xval": 0.35546875, "num_input_tokens_seen": 265097048, "step": 4228 }, { "epoch": 14.073211314475873, "grad_norm": 11.03134822845459, "learning_rate": 5e-06, "loss": 0.2794, "num_input_tokens_seen": 265159648, "step": 4229 }, { "epoch": 14.073211314475873, "loss": 0.24896475672721863, "loss_ce": 2.3595209768245695e-06, "loss_iou": 0.0888671875, "loss_num": 0.01434326171875, "loss_xval": 0.2490234375, "num_input_tokens_seen": 265159648, "step": 4229 }, { "epoch": 14.076539101497504, "grad_norm": 20.58761215209961, "learning_rate": 5e-06, "loss": 0.4458, "num_input_tokens_seen": 265221008, "step": 4230 }, { "epoch": 14.076539101497504, "loss": 0.39816364645957947, "loss_ce": 8.192342306756473e-07, "loss_iou": 0.146484375, "loss_num": 0.02099609375, "loss_xval": 0.3984375, "num_input_tokens_seen": 265221008, "step": 4230 }, { "epoch": 14.079866888519135, "grad_norm": 22.323287963867188, "learning_rate": 5e-06, "loss": 0.3919, "num_input_tokens_seen": 265283076, "step": 4231 }, { "epoch": 14.079866888519135, "loss": 0.4545992612838745, "loss_ce": 9.405711352883372e-06, "loss_iou": 0.1533203125, "loss_num": 0.029541015625, "loss_xval": 0.455078125, "num_input_tokens_seen": 265283076, "step": 4231 }, { "epoch": 14.083194675540765, "grad_norm": 20.8972225189209, "learning_rate": 5e-06, "loss": 0.4029, "num_input_tokens_seen": 265345524, "step": 4232 }, { "epoch": 14.083194675540765, "loss": 0.37396568059921265, "loss_ce": 0.0005525969318114221, "loss_iou": 0.138671875, "loss_num": 0.0191650390625, "loss_xval": 0.373046875, "num_input_tokens_seen": 265345524, "step": 4232 }, { "epoch": 14.086522462562396, "grad_norm": 16.077070236206055, "learning_rate": 5e-06, "loss": 0.5641, "num_input_tokens_seen": 265408980, "step": 4233 }, { "epoch": 14.086522462562396, "loss": 0.5711417198181152, "loss_ce": 0.00031043574563227594, "loss_iou": 0.2255859375, "loss_num": 0.02392578125, "loss_xval": 0.5703125, "num_input_tokens_seen": 265408980, "step": 4233 }, { "epoch": 14.089850249584027, "grad_norm": 10.54637622833252, "learning_rate": 5e-06, "loss": 0.6363, "num_input_tokens_seen": 265473780, "step": 4234 }, { "epoch": 14.089850249584027, "loss": 0.5799759030342102, "loss_ce": 1.984233495022636e-05, "loss_iou": 0.2275390625, "loss_num": 0.0250244140625, "loss_xval": 0.578125, "num_input_tokens_seen": 265473780, "step": 4234 }, { "epoch": 14.093178036605657, "grad_norm": 14.868681907653809, "learning_rate": 5e-06, "loss": 0.3504, "num_input_tokens_seen": 265536552, "step": 4235 }, { "epoch": 14.093178036605657, "loss": 0.35083112120628357, "loss_ce": 1.0285350526828552e-06, "loss_iou": 0.1328125, "loss_num": 0.0172119140625, "loss_xval": 0.3515625, "num_input_tokens_seen": 265536552, "step": 4235 }, { "epoch": 14.096505823627288, "grad_norm": 11.45353889465332, "learning_rate": 5e-06, "loss": 0.3727, "num_input_tokens_seen": 265597116, "step": 4236 }, { "epoch": 14.096505823627288, "loss": 0.4339621067047119, "loss_ce": 2.1542894046433503e-06, "loss_iou": 0.169921875, "loss_num": 0.018798828125, "loss_xval": 0.43359375, "num_input_tokens_seen": 265597116, "step": 4236 }, { "epoch": 14.099833610648918, "grad_norm": 21.45042610168457, "learning_rate": 5e-06, "loss": 0.4282, "num_input_tokens_seen": 265658828, "step": 4237 }, { "epoch": 14.099833610648918, "loss": 0.4377025365829468, "loss_ce": 1.942342714755796e-05, "loss_iou": 0.150390625, "loss_num": 0.02734375, "loss_xval": 0.4375, "num_input_tokens_seen": 265658828, "step": 4237 }, { "epoch": 14.103161397670549, "grad_norm": 31.983409881591797, "learning_rate": 5e-06, "loss": 0.5398, "num_input_tokens_seen": 265722956, "step": 4238 }, { "epoch": 14.103161397670549, "loss": 0.4728257954120636, "loss_ce": 1.672292455623392e-06, "loss_iou": 0.208984375, "loss_num": 0.01092529296875, "loss_xval": 0.47265625, "num_input_tokens_seen": 265722956, "step": 4238 }, { "epoch": 14.10648918469218, "grad_norm": 28.246620178222656, "learning_rate": 5e-06, "loss": 0.5219, "num_input_tokens_seen": 265783988, "step": 4239 }, { "epoch": 14.10648918469218, "loss": 0.5360109210014343, "loss_ce": 0.00012224675447214395, "loss_iou": 0.2119140625, "loss_num": 0.0223388671875, "loss_xval": 0.53515625, "num_input_tokens_seen": 265783988, "step": 4239 }, { "epoch": 14.10981697171381, "grad_norm": 10.215570449829102, "learning_rate": 5e-06, "loss": 0.6677, "num_input_tokens_seen": 265846452, "step": 4240 }, { "epoch": 14.10981697171381, "loss": 0.5485853552818298, "loss_ce": 1.3829572935719625e-06, "loss_iou": 0.181640625, "loss_num": 0.037109375, "loss_xval": 0.546875, "num_input_tokens_seen": 265846452, "step": 4240 }, { "epoch": 14.11314475873544, "grad_norm": 10.147332191467285, "learning_rate": 5e-06, "loss": 0.2881, "num_input_tokens_seen": 265909492, "step": 4241 }, { "epoch": 14.11314475873544, "loss": 0.21753013134002686, "loss_ce": 8.391707524424419e-07, "loss_iou": 0.08837890625, "loss_num": 0.00811767578125, "loss_xval": 0.2177734375, "num_input_tokens_seen": 265909492, "step": 4241 }, { "epoch": 14.116472545757071, "grad_norm": 13.217887878417969, "learning_rate": 5e-06, "loss": 0.5335, "num_input_tokens_seen": 265971956, "step": 4242 }, { "epoch": 14.116472545757071, "loss": 0.7165735960006714, "loss_ce": 2.0856530682067387e-05, "loss_iou": 0.294921875, "loss_num": 0.025146484375, "loss_xval": 0.71484375, "num_input_tokens_seen": 265971956, "step": 4242 }, { "epoch": 14.119800332778702, "grad_norm": 12.270796775817871, "learning_rate": 5e-06, "loss": 0.2689, "num_input_tokens_seen": 266033744, "step": 4243 }, { "epoch": 14.119800332778702, "loss": 0.2116919457912445, "loss_ce": 6.758520612493157e-06, "loss_iou": 0.05908203125, "loss_num": 0.018798828125, "loss_xval": 0.2119140625, "num_input_tokens_seen": 266033744, "step": 4243 }, { "epoch": 14.123128119800333, "grad_norm": 12.684266090393066, "learning_rate": 5e-06, "loss": 0.2786, "num_input_tokens_seen": 266095756, "step": 4244 }, { "epoch": 14.123128119800333, "loss": 0.33108121156692505, "loss_ce": 0.00013332246453501284, "loss_iou": 0.126953125, "loss_num": 0.01544189453125, "loss_xval": 0.330078125, "num_input_tokens_seen": 266095756, "step": 4244 }, { "epoch": 14.126455906821963, "grad_norm": 19.511606216430664, "learning_rate": 5e-06, "loss": 0.6107, "num_input_tokens_seen": 266158660, "step": 4245 }, { "epoch": 14.126455906821963, "loss": 0.42749086022377014, "loss_ce": 6.273273811530089e-07, "loss_iou": 0.16796875, "loss_num": 0.018310546875, "loss_xval": 0.427734375, "num_input_tokens_seen": 266158660, "step": 4245 }, { "epoch": 14.129783693843594, "grad_norm": 36.27968215942383, "learning_rate": 5e-06, "loss": 0.4893, "num_input_tokens_seen": 266222608, "step": 4246 }, { "epoch": 14.129783693843594, "loss": 0.42727941274642944, "loss_ce": 2.809280658766511e-06, "loss_iou": 0.17578125, "loss_num": 0.01531982421875, "loss_xval": 0.427734375, "num_input_tokens_seen": 266222608, "step": 4246 }, { "epoch": 14.133111480865225, "grad_norm": 36.3953857421875, "learning_rate": 5e-06, "loss": 0.5749, "num_input_tokens_seen": 266286120, "step": 4247 }, { "epoch": 14.133111480865225, "loss": 0.7857969403266907, "loss_ce": 3.032176027772948e-05, "loss_iou": 0.296875, "loss_num": 0.038330078125, "loss_xval": 0.78515625, "num_input_tokens_seen": 266286120, "step": 4247 }, { "epoch": 14.136439267886855, "grad_norm": 17.046031951904297, "learning_rate": 5e-06, "loss": 0.3489, "num_input_tokens_seen": 266348384, "step": 4248 }, { "epoch": 14.136439267886855, "loss": 0.32184073328971863, "loss_ce": 2.3490119929192588e-06, "loss_iou": 0.10205078125, "loss_num": 0.023681640625, "loss_xval": 0.322265625, "num_input_tokens_seen": 266348384, "step": 4248 }, { "epoch": 14.139767054908486, "grad_norm": 6.386213302612305, "learning_rate": 5e-06, "loss": 0.1974, "num_input_tokens_seen": 266409936, "step": 4249 }, { "epoch": 14.139767054908486, "loss": 0.16226781904697418, "loss_ce": 3.637740155681968e-05, "loss_iou": 0.052734375, "loss_num": 0.01141357421875, "loss_xval": 0.162109375, "num_input_tokens_seen": 266409936, "step": 4249 }, { "epoch": 14.143094841930116, "grad_norm": 41.56739044189453, "learning_rate": 5e-06, "loss": 0.6161, "num_input_tokens_seen": 266474280, "step": 4250 }, { "epoch": 14.143094841930116, "eval_seeclick_CIoU": 0.051826974377036095, "eval_seeclick_GIoU": 0.049425460398197174, "eval_seeclick_IoU": 0.1689988225698471, "eval_seeclick_MAE_all": 0.16940681636333466, "eval_seeclick_MAE_h": 0.0676682498306036, "eval_seeclick_MAE_w": 0.13247355446219444, "eval_seeclick_MAE_x_boxes": 0.2064315527677536, "eval_seeclick_MAE_y_boxes": 0.1793569028377533, "eval_seeclick_NUM_probability": 0.9999780654907227, "eval_seeclick_inside_bbox": 0.17812500149011612, "eval_seeclick_loss": 2.94836688041687, "eval_seeclick_loss_ce": 0.16913575679063797, "eval_seeclick_loss_iou": 0.964599609375, "eval_seeclick_loss_num": 0.17236328125, "eval_seeclick_loss_xval": 2.7919921875, "eval_seeclick_runtime": 68.2774, "eval_seeclick_samples_per_second": 0.688, "eval_seeclick_steps_per_second": 0.029, "num_input_tokens_seen": 266474280, "step": 4250 }, { "epoch": 14.143094841930116, "eval_icons_CIoU": -0.06432507000863552, "eval_icons_GIoU": 0.033729610266163945, "eval_icons_IoU": 0.11656715720891953, "eval_icons_MAE_all": 0.20603451132774353, "eval_icons_MAE_h": 0.18511297553777695, "eval_icons_MAE_w": 0.21292082965373993, "eval_icons_MAE_x_boxes": 0.15068383887410164, "eval_icons_MAE_y_boxes": 0.09764442220330238, "eval_icons_NUM_probability": 0.9999719560146332, "eval_icons_inside_bbox": 0.1927083358168602, "eval_icons_loss": 2.9100120067596436, "eval_icons_loss_ce": 1.0390334864496253e-05, "eval_icons_loss_iou": 0.962890625, "eval_icons_loss_num": 0.2005615234375, "eval_icons_loss_xval": 2.9296875, "eval_icons_runtime": 65.8423, "eval_icons_samples_per_second": 0.759, "eval_icons_steps_per_second": 0.03, "num_input_tokens_seen": 266474280, "step": 4250 }, { "epoch": 14.143094841930116, "eval_screenspot_CIoU": 0.18533208966255188, "eval_screenspot_GIoU": 0.22139165302117667, "eval_screenspot_IoU": 0.2944800357023875, "eval_screenspot_MAE_all": 0.11593271295229594, "eval_screenspot_MAE_h": 0.06746742750207584, "eval_screenspot_MAE_w": 0.09266744181513786, "eval_screenspot_MAE_x_boxes": 0.15928281843662262, "eval_screenspot_MAE_y_boxes": 0.08583711832761765, "eval_screenspot_NUM_probability": 0.9999937216440836, "eval_screenspot_inside_bbox": 0.5391666690508524, "eval_screenspot_loss": 2.1833672523498535, "eval_screenspot_loss_ce": 7.505412941100076e-05, "eval_screenspot_loss_iou": 0.79296875, "eval_screenspot_loss_num": 0.1253662109375, "eval_screenspot_loss_xval": 2.2132161458333335, "eval_screenspot_runtime": 115.6025, "eval_screenspot_samples_per_second": 0.77, "eval_screenspot_steps_per_second": 0.026, "num_input_tokens_seen": 266474280, "step": 4250 }, { "epoch": 14.143094841930116, "eval_compot_CIoU": 0.1561531126499176, "eval_compot_GIoU": 0.1972309574484825, "eval_compot_IoU": 0.2777993083000183, "eval_compot_MAE_all": 0.1359853371977806, "eval_compot_MAE_h": 0.07194142043590546, "eval_compot_MAE_w": 0.1582878902554512, "eval_compot_MAE_x_boxes": 0.1068047434091568, "eval_compot_MAE_y_boxes": 0.10375743359327316, "eval_compot_NUM_probability": 0.9999968707561493, "eval_compot_inside_bbox": 0.4288194477558136, "eval_compot_loss": 2.272066354751587, "eval_compot_loss_ce": 0.004369709407910705, "eval_compot_loss_iou": 0.8214111328125, "eval_compot_loss_num": 0.1413726806640625, "eval_compot_loss_xval": 2.351318359375, "eval_compot_runtime": 68.6249, "eval_compot_samples_per_second": 0.729, "eval_compot_steps_per_second": 0.029, "num_input_tokens_seen": 266474280, "step": 4250 }, { "epoch": 14.143094841930116, "eval_custom_ui_MAE_all": 0.05974045768380165, "eval_custom_ui_MAE_x": 0.06753784045577049, "eval_custom_ui_MAE_y": 0.05194307118654251, "eval_custom_ui_NUM_probability": 0.9999988079071045, "eval_custom_ui_loss": 0.2787171006202698, "eval_custom_ui_loss_ce": 5.105196350996266e-06, "eval_custom_ui_loss_num": 0.0579986572265625, "eval_custom_ui_loss_xval": 0.289886474609375, "eval_custom_ui_runtime": 60.0822, "eval_custom_ui_samples_per_second": 0.832, "eval_custom_ui_steps_per_second": 0.033, "num_input_tokens_seen": 266474280, "step": 4250 }, { "epoch": 14.143094841930116, "loss": 0.3181847929954529, "loss_ce": 8.54061363497749e-06, "loss_iou": 0.0, "loss_num": 0.0634765625, "loss_xval": 0.318359375, "num_input_tokens_seen": 266474280, "step": 4250 }, { "epoch": 14.146422628951747, "grad_norm": 34.60765838623047, "learning_rate": 5e-06, "loss": 0.4669, "num_input_tokens_seen": 266536180, "step": 4251 }, { "epoch": 14.146422628951747, "loss": 0.5726351737976074, "loss_ce": 3.333069798827637e-06, "loss_iou": 0.2392578125, "loss_num": 0.01904296875, "loss_xval": 0.57421875, "num_input_tokens_seen": 266536180, "step": 4251 }, { "epoch": 14.149750415973378, "grad_norm": 15.131364822387695, "learning_rate": 5e-06, "loss": 0.4423, "num_input_tokens_seen": 266599084, "step": 4252 }, { "epoch": 14.149750415973378, "loss": 0.2912493348121643, "loss_ce": 0.00017265568021684885, "loss_iou": 0.111328125, "loss_num": 0.01361083984375, "loss_xval": 0.291015625, "num_input_tokens_seen": 266599084, "step": 4252 }, { "epoch": 14.153078202995008, "grad_norm": 16.598709106445312, "learning_rate": 5e-06, "loss": 0.3988, "num_input_tokens_seen": 266662132, "step": 4253 }, { "epoch": 14.153078202995008, "loss": 0.5135213732719421, "loss_ce": 2.081095317407744e-06, "loss_iou": 0.1845703125, "loss_num": 0.029052734375, "loss_xval": 0.51171875, "num_input_tokens_seen": 266662132, "step": 4253 }, { "epoch": 14.156405990016639, "grad_norm": 12.78003215789795, "learning_rate": 5e-06, "loss": 0.3415, "num_input_tokens_seen": 266724224, "step": 4254 }, { "epoch": 14.156405990016639, "loss": 0.3434455990791321, "loss_ce": 7.951226166369452e-07, "loss_iou": 0.111328125, "loss_num": 0.024169921875, "loss_xval": 0.34375, "num_input_tokens_seen": 266724224, "step": 4254 }, { "epoch": 14.15973377703827, "grad_norm": 7.278827667236328, "learning_rate": 5e-06, "loss": 0.3706, "num_input_tokens_seen": 266785692, "step": 4255 }, { "epoch": 14.15973377703827, "loss": 0.5507819652557373, "loss_ce": 7.228003369164071e-07, "loss_iou": 0.189453125, "loss_num": 0.034423828125, "loss_xval": 0.55078125, "num_input_tokens_seen": 266785692, "step": 4255 }, { "epoch": 14.1630615640599, "grad_norm": 14.342928886413574, "learning_rate": 5e-06, "loss": 0.4644, "num_input_tokens_seen": 266849360, "step": 4256 }, { "epoch": 14.1630615640599, "loss": 0.5681145191192627, "loss_ce": 0.00018238616758026183, "loss_iou": 0.2158203125, "loss_num": 0.0272216796875, "loss_xval": 0.56640625, "num_input_tokens_seen": 266849360, "step": 4256 }, { "epoch": 14.16638935108153, "grad_norm": 20.4388484954834, "learning_rate": 5e-06, "loss": 0.3695, "num_input_tokens_seen": 266912080, "step": 4257 }, { "epoch": 14.16638935108153, "loss": 0.401680052280426, "loss_ce": 0.00019079650519415736, "loss_iou": 0.16015625, "loss_num": 0.0162353515625, "loss_xval": 0.40234375, "num_input_tokens_seen": 266912080, "step": 4257 }, { "epoch": 14.169717138103161, "grad_norm": 10.064804077148438, "learning_rate": 5e-06, "loss": 0.5358, "num_input_tokens_seen": 266973824, "step": 4258 }, { "epoch": 14.169717138103161, "loss": 0.46729201078414917, "loss_ce": 6.829801350249909e-06, "loss_iou": 0.171875, "loss_num": 0.0247802734375, "loss_xval": 0.466796875, "num_input_tokens_seen": 266973824, "step": 4258 }, { "epoch": 14.173044925124792, "grad_norm": 14.501526832580566, "learning_rate": 5e-06, "loss": 0.4793, "num_input_tokens_seen": 267037120, "step": 4259 }, { "epoch": 14.173044925124792, "loss": 0.4069889783859253, "loss_ce": 6.542967639688868e-06, "loss_iou": 0.1689453125, "loss_num": 0.013671875, "loss_xval": 0.40625, "num_input_tokens_seen": 267037120, "step": 4259 }, { "epoch": 14.176372712146422, "grad_norm": 10.236066818237305, "learning_rate": 5e-06, "loss": 0.5246, "num_input_tokens_seen": 267099828, "step": 4260 }, { "epoch": 14.176372712146422, "loss": 0.4776042103767395, "loss_ce": 4.100692422071006e-06, "loss_iou": 0.1552734375, "loss_num": 0.033447265625, "loss_xval": 0.478515625, "num_input_tokens_seen": 267099828, "step": 4260 }, { "epoch": 14.179700499168053, "grad_norm": 16.903764724731445, "learning_rate": 5e-06, "loss": 0.4204, "num_input_tokens_seen": 267161672, "step": 4261 }, { "epoch": 14.179700499168053, "loss": 0.3657248914241791, "loss_ce": 2.2228025500226067e-06, "loss_iou": 0.142578125, "loss_num": 0.0162353515625, "loss_xval": 0.365234375, "num_input_tokens_seen": 267161672, "step": 4261 }, { "epoch": 14.183028286189684, "grad_norm": 7.318026065826416, "learning_rate": 5e-06, "loss": 0.2763, "num_input_tokens_seen": 267222204, "step": 4262 }, { "epoch": 14.183028286189684, "loss": 0.2193167507648468, "loss_ce": 2.1728094452555524e-06, "loss_iou": 0.050048828125, "loss_num": 0.0238037109375, "loss_xval": 0.2197265625, "num_input_tokens_seen": 267222204, "step": 4262 }, { "epoch": 14.186356073211314, "grad_norm": 10.09364128112793, "learning_rate": 5e-06, "loss": 0.5257, "num_input_tokens_seen": 267285032, "step": 4263 }, { "epoch": 14.186356073211314, "loss": 0.6088875532150269, "loss_ce": 8.638288591100718e-07, "loss_iou": 0.244140625, "loss_num": 0.0240478515625, "loss_xval": 0.609375, "num_input_tokens_seen": 267285032, "step": 4263 }, { "epoch": 14.189683860232945, "grad_norm": 9.433971405029297, "learning_rate": 5e-06, "loss": 0.4081, "num_input_tokens_seen": 267348296, "step": 4264 }, { "epoch": 14.189683860232945, "loss": 0.3823610246181488, "loss_ce": 0.00015888795314822346, "loss_iou": 0.154296875, "loss_num": 0.01470947265625, "loss_xval": 0.3828125, "num_input_tokens_seen": 267348296, "step": 4264 }, { "epoch": 14.193011647254576, "grad_norm": 8.587752342224121, "learning_rate": 5e-06, "loss": 0.3721, "num_input_tokens_seen": 267411852, "step": 4265 }, { "epoch": 14.193011647254576, "loss": 0.3267361521720886, "loss_ce": 7.598894444527104e-05, "loss_iou": 0.12353515625, "loss_num": 0.0159912109375, "loss_xval": 0.326171875, "num_input_tokens_seen": 267411852, "step": 4265 }, { "epoch": 14.196339434276206, "grad_norm": 26.713397979736328, "learning_rate": 5e-06, "loss": 0.4474, "num_input_tokens_seen": 267474312, "step": 4266 }, { "epoch": 14.196339434276206, "loss": 0.4087551236152649, "loss_ce": 3.321061740280129e-05, "loss_iou": 0.158203125, "loss_num": 0.0184326171875, "loss_xval": 0.408203125, "num_input_tokens_seen": 267474312, "step": 4266 }, { "epoch": 14.199667221297837, "grad_norm": 10.554451942443848, "learning_rate": 5e-06, "loss": 0.5322, "num_input_tokens_seen": 267537156, "step": 4267 }, { "epoch": 14.199667221297837, "loss": 0.5902738571166992, "loss_ce": 2.893686087190872e-06, "loss_iou": 0.24609375, "loss_num": 0.0196533203125, "loss_xval": 0.58984375, "num_input_tokens_seen": 267537156, "step": 4267 }, { "epoch": 14.202995008319467, "grad_norm": 10.555356979370117, "learning_rate": 5e-06, "loss": 0.3748, "num_input_tokens_seen": 267599480, "step": 4268 }, { "epoch": 14.202995008319467, "loss": 0.2803092896938324, "loss_ce": 5.331567081157118e-06, "loss_iou": 0.1025390625, "loss_num": 0.01507568359375, "loss_xval": 0.28125, "num_input_tokens_seen": 267599480, "step": 4268 }, { "epoch": 14.206322795341098, "grad_norm": 14.089458465576172, "learning_rate": 5e-06, "loss": 0.2841, "num_input_tokens_seen": 267663904, "step": 4269 }, { "epoch": 14.206322795341098, "loss": 0.3330128788948059, "loss_ce": 5.0802877922251355e-06, "loss_iou": 0.1416015625, "loss_num": 0.00994873046875, "loss_xval": 0.33203125, "num_input_tokens_seen": 267663904, "step": 4269 }, { "epoch": 14.209650582362729, "grad_norm": 17.4295597076416, "learning_rate": 5e-06, "loss": 0.5175, "num_input_tokens_seen": 267726804, "step": 4270 }, { "epoch": 14.209650582362729, "loss": 0.678717851638794, "loss_ce": 6.796203524572775e-05, "loss_iou": 0.259765625, "loss_num": 0.031982421875, "loss_xval": 0.6796875, "num_input_tokens_seen": 267726804, "step": 4270 }, { "epoch": 14.21297836938436, "grad_norm": 8.857832908630371, "learning_rate": 5e-06, "loss": 0.2713, "num_input_tokens_seen": 267788300, "step": 4271 }, { "epoch": 14.21297836938436, "loss": 0.3435678482055664, "loss_ce": 9.792005357667222e-07, "loss_iou": 0.11865234375, "loss_num": 0.021240234375, "loss_xval": 0.34375, "num_input_tokens_seen": 267788300, "step": 4271 }, { "epoch": 14.21630615640599, "grad_norm": 11.300460815429688, "learning_rate": 5e-06, "loss": 0.5051, "num_input_tokens_seen": 267850584, "step": 4272 }, { "epoch": 14.21630615640599, "loss": 0.4365869164466858, "loss_ce": 2.4323435354745016e-06, "loss_iou": 0.1630859375, "loss_num": 0.02197265625, "loss_xval": 0.4375, "num_input_tokens_seen": 267850584, "step": 4272 }, { "epoch": 14.21963394342762, "grad_norm": 11.689573287963867, "learning_rate": 5e-06, "loss": 0.4197, "num_input_tokens_seen": 267913428, "step": 4273 }, { "epoch": 14.21963394342762, "loss": 0.3986843526363373, "loss_ce": 2.712849436647957e-06, "loss_iou": 0.1630859375, "loss_num": 0.0145263671875, "loss_xval": 0.3984375, "num_input_tokens_seen": 267913428, "step": 4273 }, { "epoch": 14.222961730449251, "grad_norm": 14.501953125, "learning_rate": 5e-06, "loss": 0.5554, "num_input_tokens_seen": 267976348, "step": 4274 }, { "epoch": 14.222961730449251, "loss": 0.6787856817245483, "loss_ce": 0.0001358065492240712, "loss_iou": 0.25390625, "loss_num": 0.0341796875, "loss_xval": 0.6796875, "num_input_tokens_seen": 267976348, "step": 4274 }, { "epoch": 14.226289517470882, "grad_norm": 18.62929344177246, "learning_rate": 5e-06, "loss": 0.4234, "num_input_tokens_seen": 268039568, "step": 4275 }, { "epoch": 14.226289517470882, "loss": 0.42836618423461914, "loss_ce": 2.1457155526150018e-05, "loss_iou": 0.150390625, "loss_num": 0.0255126953125, "loss_xval": 0.427734375, "num_input_tokens_seen": 268039568, "step": 4275 }, { "epoch": 14.229617304492512, "grad_norm": 10.173242568969727, "learning_rate": 5e-06, "loss": 0.5836, "num_input_tokens_seen": 268100376, "step": 4276 }, { "epoch": 14.229617304492512, "loss": 0.4477580785751343, "loss_ce": 4.178941708232742e-06, "loss_iou": 0.16015625, "loss_num": 0.025634765625, "loss_xval": 0.447265625, "num_input_tokens_seen": 268100376, "step": 4276 }, { "epoch": 14.232945091514143, "grad_norm": 35.1286506652832, "learning_rate": 5e-06, "loss": 0.5794, "num_input_tokens_seen": 268164384, "step": 4277 }, { "epoch": 14.232945091514143, "loss": 0.6462433934211731, "loss_ce": 3.1450672395294532e-06, "loss_iou": 0.279296875, "loss_num": 0.0179443359375, "loss_xval": 0.64453125, "num_input_tokens_seen": 268164384, "step": 4277 }, { "epoch": 14.236272878535774, "grad_norm": 45.21717834472656, "learning_rate": 5e-06, "loss": 0.4743, "num_input_tokens_seen": 268227388, "step": 4278 }, { "epoch": 14.236272878535774, "loss": 0.4095679223537445, "loss_ce": 8.306093513965607e-05, "loss_iou": 0.166015625, "loss_num": 0.0155029296875, "loss_xval": 0.41015625, "num_input_tokens_seen": 268227388, "step": 4278 }, { "epoch": 14.239600665557404, "grad_norm": 4.204310894012451, "learning_rate": 5e-06, "loss": 0.3594, "num_input_tokens_seen": 268289612, "step": 4279 }, { "epoch": 14.239600665557404, "loss": 0.3114811182022095, "loss_ce": 7.976328197401017e-05, "loss_iou": 0.13671875, "loss_num": 0.00750732421875, "loss_xval": 0.310546875, "num_input_tokens_seen": 268289612, "step": 4279 }, { "epoch": 14.242928452579035, "grad_norm": 19.79909324645996, "learning_rate": 5e-06, "loss": 0.5383, "num_input_tokens_seen": 268353496, "step": 4280 }, { "epoch": 14.242928452579035, "loss": 0.6062660217285156, "loss_ce": 3.7736274407507153e-06, "loss_iou": 0.2119140625, "loss_num": 0.03662109375, "loss_xval": 0.60546875, "num_input_tokens_seen": 268353496, "step": 4280 }, { "epoch": 14.246256239600665, "grad_norm": 16.437400817871094, "learning_rate": 5e-06, "loss": 0.3336, "num_input_tokens_seen": 268415744, "step": 4281 }, { "epoch": 14.246256239600665, "loss": 0.17508158087730408, "loss_ce": 2.2480169263872085e-06, "loss_iou": 0.06494140625, "loss_num": 0.00909423828125, "loss_xval": 0.1748046875, "num_input_tokens_seen": 268415744, "step": 4281 }, { "epoch": 14.249584026622296, "grad_norm": 15.501030921936035, "learning_rate": 5e-06, "loss": 0.573, "num_input_tokens_seen": 268478032, "step": 4282 }, { "epoch": 14.249584026622296, "loss": 0.5497473478317261, "loss_ce": 3.7193485695752315e-06, "loss_iou": 0.2060546875, "loss_num": 0.0274658203125, "loss_xval": 0.55078125, "num_input_tokens_seen": 268478032, "step": 4282 }, { "epoch": 14.252911813643927, "grad_norm": 13.873480796813965, "learning_rate": 5e-06, "loss": 0.5149, "num_input_tokens_seen": 268541556, "step": 4283 }, { "epoch": 14.252911813643927, "loss": 0.6522301435470581, "loss_ce": 8.457856893073767e-06, "loss_iou": 0.2451171875, "loss_num": 0.0322265625, "loss_xval": 0.65234375, "num_input_tokens_seen": 268541556, "step": 4283 }, { "epoch": 14.256239600665557, "grad_norm": 16.162372589111328, "learning_rate": 5e-06, "loss": 0.669, "num_input_tokens_seen": 268603268, "step": 4284 }, { "epoch": 14.256239600665557, "loss": 0.8242664933204651, "loss_ce": 4.7739224100951105e-05, "loss_iou": 0.357421875, "loss_num": 0.022216796875, "loss_xval": 0.82421875, "num_input_tokens_seen": 268603268, "step": 4284 }, { "epoch": 14.259567387687188, "grad_norm": 8.816900253295898, "learning_rate": 5e-06, "loss": 0.5204, "num_input_tokens_seen": 268666136, "step": 4285 }, { "epoch": 14.259567387687188, "loss": 0.645542323589325, "loss_ce": 3.9695446503174026e-06, "loss_iou": 0.2431640625, "loss_num": 0.03173828125, "loss_xval": 0.64453125, "num_input_tokens_seen": 268666136, "step": 4285 }, { "epoch": 14.262895174708818, "grad_norm": 9.553703308105469, "learning_rate": 5e-06, "loss": 0.4535, "num_input_tokens_seen": 268728520, "step": 4286 }, { "epoch": 14.262895174708818, "loss": 0.5980539321899414, "loss_ce": 9.089757213587291e-07, "loss_iou": 0.2314453125, "loss_num": 0.0269775390625, "loss_xval": 0.59765625, "num_input_tokens_seen": 268728520, "step": 4286 }, { "epoch": 14.266222961730449, "grad_norm": 6.037521839141846, "learning_rate": 5e-06, "loss": 0.4282, "num_input_tokens_seen": 268792056, "step": 4287 }, { "epoch": 14.266222961730449, "loss": 0.43354877829551697, "loss_ce": 1.60637737280922e-05, "loss_iou": 0.173828125, "loss_num": 0.0172119140625, "loss_xval": 0.43359375, "num_input_tokens_seen": 268792056, "step": 4287 }, { "epoch": 14.26955074875208, "grad_norm": 7.058043956756592, "learning_rate": 5e-06, "loss": 0.306, "num_input_tokens_seen": 268852964, "step": 4288 }, { "epoch": 14.26955074875208, "loss": 0.28668296337127686, "loss_ce": 8.232196364588162e-07, "loss_iou": 0.095703125, "loss_num": 0.01904296875, "loss_xval": 0.287109375, "num_input_tokens_seen": 268852964, "step": 4288 }, { "epoch": 14.27287853577371, "grad_norm": 10.528111457824707, "learning_rate": 5e-06, "loss": 0.3513, "num_input_tokens_seen": 268911628, "step": 4289 }, { "epoch": 14.27287853577371, "loss": 0.47415250539779663, "loss_ce": 8.72016414632526e-07, "loss_iou": 0.1572265625, "loss_num": 0.03173828125, "loss_xval": 0.474609375, "num_input_tokens_seen": 268911628, "step": 4289 }, { "epoch": 14.27620632279534, "grad_norm": 14.233771324157715, "learning_rate": 5e-06, "loss": 0.4729, "num_input_tokens_seen": 268973288, "step": 4290 }, { "epoch": 14.27620632279534, "loss": 0.4445437490940094, "loss_ce": 8.574700041208416e-05, "loss_iou": 0.10205078125, "loss_num": 0.0478515625, "loss_xval": 0.4453125, "num_input_tokens_seen": 268973288, "step": 4290 }, { "epoch": 14.279534109816971, "grad_norm": 21.9337100982666, "learning_rate": 5e-06, "loss": 0.8238, "num_input_tokens_seen": 269038064, "step": 4291 }, { "epoch": 14.279534109816971, "loss": 0.8322770595550537, "loss_ce": 1.6371311630791752e-06, "loss_iou": 0.33203125, "loss_num": 0.033935546875, "loss_xval": 0.83203125, "num_input_tokens_seen": 269038064, "step": 4291 }, { "epoch": 14.282861896838602, "grad_norm": 24.452438354492188, "learning_rate": 5e-06, "loss": 0.5223, "num_input_tokens_seen": 269101648, "step": 4292 }, { "epoch": 14.282861896838602, "loss": 0.5797796249389648, "loss_ce": 6.6315037656750064e-06, "loss_iou": 0.263671875, "loss_num": 0.01043701171875, "loss_xval": 0.578125, "num_input_tokens_seen": 269101648, "step": 4292 }, { "epoch": 14.286189683860233, "grad_norm": 18.27286720275879, "learning_rate": 5e-06, "loss": 0.3837, "num_input_tokens_seen": 269164148, "step": 4293 }, { "epoch": 14.286189683860233, "loss": 0.3799145519733429, "loss_ce": 1.230005182151217e-06, "loss_iou": 0.125, "loss_num": 0.0260009765625, "loss_xval": 0.380859375, "num_input_tokens_seen": 269164148, "step": 4293 }, { "epoch": 14.289517470881863, "grad_norm": 12.44662094116211, "learning_rate": 5e-06, "loss": 0.356, "num_input_tokens_seen": 269226696, "step": 4294 }, { "epoch": 14.289517470881863, "loss": 0.43231910467147827, "loss_ce": 6.815307278884575e-05, "loss_iou": 0.15234375, "loss_num": 0.025390625, "loss_xval": 0.431640625, "num_input_tokens_seen": 269226696, "step": 4294 }, { "epoch": 14.292845257903494, "grad_norm": 8.356432914733887, "learning_rate": 5e-06, "loss": 0.4214, "num_input_tokens_seen": 269289900, "step": 4295 }, { "epoch": 14.292845257903494, "loss": 0.31555211544036865, "loss_ce": 3.611850445395248e-07, "loss_iou": 0.11572265625, "loss_num": 0.0166015625, "loss_xval": 0.31640625, "num_input_tokens_seen": 269289900, "step": 4295 }, { "epoch": 14.296173044925125, "grad_norm": 13.320980072021484, "learning_rate": 5e-06, "loss": 0.4508, "num_input_tokens_seen": 269352504, "step": 4296 }, { "epoch": 14.296173044925125, "loss": 0.3837094306945801, "loss_ce": 0.0001645092124817893, "loss_iou": 0.1396484375, "loss_num": 0.02099609375, "loss_xval": 0.3828125, "num_input_tokens_seen": 269352504, "step": 4296 }, { "epoch": 14.299500831946755, "grad_norm": 11.710000038146973, "learning_rate": 5e-06, "loss": 0.5339, "num_input_tokens_seen": 269415896, "step": 4297 }, { "epoch": 14.299500831946755, "loss": 0.46252861618995667, "loss_ce": 4.185033048997866e-06, "loss_iou": 0.1806640625, "loss_num": 0.020263671875, "loss_xval": 0.462890625, "num_input_tokens_seen": 269415896, "step": 4297 }, { "epoch": 14.302828618968386, "grad_norm": 15.155484199523926, "learning_rate": 5e-06, "loss": 0.401, "num_input_tokens_seen": 269476956, "step": 4298 }, { "epoch": 14.302828618968386, "loss": 0.27058494091033936, "loss_ce": 8.586406465838081e-07, "loss_iou": 0.0986328125, "loss_num": 0.01470947265625, "loss_xval": 0.271484375, "num_input_tokens_seen": 269476956, "step": 4298 }, { "epoch": 14.306156405990016, "grad_norm": 24.98201560974121, "learning_rate": 5e-06, "loss": 0.4238, "num_input_tokens_seen": 269538996, "step": 4299 }, { "epoch": 14.306156405990016, "loss": 0.5072081089019775, "loss_ce": 5.939763468632009e-06, "loss_iou": 0.1904296875, "loss_num": 0.0252685546875, "loss_xval": 0.5078125, "num_input_tokens_seen": 269538996, "step": 4299 }, { "epoch": 14.309484193011647, "grad_norm": 38.86208724975586, "learning_rate": 5e-06, "loss": 0.7459, "num_input_tokens_seen": 269601704, "step": 4300 }, { "epoch": 14.309484193011647, "loss": 0.6598401069641113, "loss_ce": 5.0067712436430156e-05, "loss_iou": 0.21875, "loss_num": 0.04443359375, "loss_xval": 0.66015625, "num_input_tokens_seen": 269601704, "step": 4300 }, { "epoch": 14.312811980033278, "grad_norm": 41.21592330932617, "learning_rate": 5e-06, "loss": 0.5614, "num_input_tokens_seen": 269666216, "step": 4301 }, { "epoch": 14.312811980033278, "loss": 0.5220980644226074, "loss_ce": 3.2939888114924543e-06, "loss_iou": 0.205078125, "loss_num": 0.022216796875, "loss_xval": 0.5234375, "num_input_tokens_seen": 269666216, "step": 4301 }, { "epoch": 14.316139767054908, "grad_norm": 18.85580062866211, "learning_rate": 5e-06, "loss": 0.4879, "num_input_tokens_seen": 269729572, "step": 4302 }, { "epoch": 14.316139767054908, "loss": 0.4489876627922058, "loss_ce": 0.0001351458631688729, "loss_iou": 0.1669921875, "loss_num": 0.0230712890625, "loss_xval": 0.44921875, "num_input_tokens_seen": 269729572, "step": 4302 }, { "epoch": 14.319467554076539, "grad_norm": 13.772705078125, "learning_rate": 5e-06, "loss": 0.6602, "num_input_tokens_seen": 269794688, "step": 4303 }, { "epoch": 14.319467554076539, "loss": 0.9682779908180237, "loss_ce": 0.0002603687171358615, "loss_iou": 0.353515625, "loss_num": 0.052001953125, "loss_xval": 0.96875, "num_input_tokens_seen": 269794688, "step": 4303 }, { "epoch": 14.32279534109817, "grad_norm": 10.44072437286377, "learning_rate": 5e-06, "loss": 0.3123, "num_input_tokens_seen": 269857356, "step": 4304 }, { "epoch": 14.32279534109817, "loss": 0.36814019083976746, "loss_ce": 0.00034235467319376767, "loss_iou": 0.125, "loss_num": 0.0235595703125, "loss_xval": 0.3671875, "num_input_tokens_seen": 269857356, "step": 4304 }, { "epoch": 14.3261231281198, "grad_norm": 6.513827800750732, "learning_rate": 5e-06, "loss": 0.3784, "num_input_tokens_seen": 269918656, "step": 4305 }, { "epoch": 14.3261231281198, "loss": 0.41894668340682983, "loss_ce": 1.3617434433399467e-06, "loss_iou": 0.166015625, "loss_num": 0.0174560546875, "loss_xval": 0.41796875, "num_input_tokens_seen": 269918656, "step": 4305 }, { "epoch": 14.32945091514143, "grad_norm": 17.246156692504883, "learning_rate": 5e-06, "loss": 0.3734, "num_input_tokens_seen": 269981740, "step": 4306 }, { "epoch": 14.32945091514143, "loss": 0.4971316456794739, "loss_ce": 3.2061871024779975e-07, "loss_iou": 0.19921875, "loss_num": 0.0196533203125, "loss_xval": 0.498046875, "num_input_tokens_seen": 269981740, "step": 4306 }, { "epoch": 14.332778702163061, "grad_norm": 22.388166427612305, "learning_rate": 5e-06, "loss": 0.4132, "num_input_tokens_seen": 270045180, "step": 4307 }, { "epoch": 14.332778702163061, "loss": 0.45631858706474304, "loss_ce": 1.975946724996902e-05, "loss_iou": 0.16015625, "loss_num": 0.02734375, "loss_xval": 0.45703125, "num_input_tokens_seen": 270045180, "step": 4307 }, { "epoch": 14.336106489184692, "grad_norm": 9.42569637298584, "learning_rate": 5e-06, "loss": 0.4818, "num_input_tokens_seen": 270107896, "step": 4308 }, { "epoch": 14.336106489184692, "loss": 0.2971554398536682, "loss_ce": 5.783334472653223e-06, "loss_iou": 0.1181640625, "loss_num": 0.0120849609375, "loss_xval": 0.296875, "num_input_tokens_seen": 270107896, "step": 4308 }, { "epoch": 14.339434276206322, "grad_norm": 29.495290756225586, "learning_rate": 5e-06, "loss": 0.6574, "num_input_tokens_seen": 270171364, "step": 4309 }, { "epoch": 14.339434276206322, "loss": 0.43904536962509155, "loss_ce": 0.0004467529652174562, "loss_iou": 0.1787109375, "loss_num": 0.0162353515625, "loss_xval": 0.439453125, "num_input_tokens_seen": 270171364, "step": 4309 }, { "epoch": 14.342762063227953, "grad_norm": 26.4663028717041, "learning_rate": 5e-06, "loss": 0.7072, "num_input_tokens_seen": 270234436, "step": 4310 }, { "epoch": 14.342762063227953, "loss": 0.5456563234329224, "loss_ce": 2.0000588847324252e-06, "loss_iou": 0.2080078125, "loss_num": 0.02587890625, "loss_xval": 0.546875, "num_input_tokens_seen": 270234436, "step": 4310 }, { "epoch": 14.346089850249584, "grad_norm": 21.397348403930664, "learning_rate": 5e-06, "loss": 0.5992, "num_input_tokens_seen": 270296004, "step": 4311 }, { "epoch": 14.346089850249584, "loss": 0.3887980282306671, "loss_ce": 0.00018717760394793004, "loss_iou": 0.154296875, "loss_num": 0.01611328125, "loss_xval": 0.388671875, "num_input_tokens_seen": 270296004, "step": 4311 }, { "epoch": 14.349417637271214, "grad_norm": 21.364896774291992, "learning_rate": 5e-06, "loss": 0.5202, "num_input_tokens_seen": 270358196, "step": 4312 }, { "epoch": 14.349417637271214, "loss": 0.43103623390197754, "loss_ce": 5.973474799247924e-06, "loss_iou": 0.1650390625, "loss_num": 0.020263671875, "loss_xval": 0.431640625, "num_input_tokens_seen": 270358196, "step": 4312 }, { "epoch": 14.352745424292845, "grad_norm": 13.638509750366211, "learning_rate": 5e-06, "loss": 0.4814, "num_input_tokens_seen": 270421328, "step": 4313 }, { "epoch": 14.352745424292845, "loss": 0.4574071168899536, "loss_ce": 9.681136361905374e-06, "loss_iou": 0.162109375, "loss_num": 0.026611328125, "loss_xval": 0.45703125, "num_input_tokens_seen": 270421328, "step": 4313 }, { "epoch": 14.356073211314476, "grad_norm": 14.205890655517578, "learning_rate": 5e-06, "loss": 0.4553, "num_input_tokens_seen": 270484716, "step": 4314 }, { "epoch": 14.356073211314476, "loss": 0.4262295365333557, "loss_ce": 0.00014308006211649626, "loss_iou": 0.1796875, "loss_num": 0.01336669921875, "loss_xval": 0.42578125, "num_input_tokens_seen": 270484716, "step": 4314 }, { "epoch": 14.359400998336106, "grad_norm": 21.462020874023438, "learning_rate": 5e-06, "loss": 0.4135, "num_input_tokens_seen": 270547884, "step": 4315 }, { "epoch": 14.359400998336106, "loss": 0.4087067246437073, "loss_ce": 1.5309187801904045e-05, "loss_iou": 0.158203125, "loss_num": 0.0184326171875, "loss_xval": 0.408203125, "num_input_tokens_seen": 270547884, "step": 4315 }, { "epoch": 14.362728785357737, "grad_norm": 9.136557579040527, "learning_rate": 5e-06, "loss": 0.6716, "num_input_tokens_seen": 270610984, "step": 4316 }, { "epoch": 14.362728785357737, "loss": 0.7414735555648804, "loss_ce": 1.8515791452955455e-05, "loss_iou": 0.291015625, "loss_num": 0.031494140625, "loss_xval": 0.7421875, "num_input_tokens_seen": 270610984, "step": 4316 }, { "epoch": 14.366056572379367, "grad_norm": 9.093321800231934, "learning_rate": 5e-06, "loss": 0.417, "num_input_tokens_seen": 270673424, "step": 4317 }, { "epoch": 14.366056572379367, "loss": 0.43697217106819153, "loss_ce": 0.0004487437545321882, "loss_iou": 0.1337890625, "loss_num": 0.033935546875, "loss_xval": 0.4375, "num_input_tokens_seen": 270673424, "step": 4317 }, { "epoch": 14.369384359400998, "grad_norm": 17.667577743530273, "learning_rate": 5e-06, "loss": 0.4706, "num_input_tokens_seen": 270735404, "step": 4318 }, { "epoch": 14.369384359400998, "loss": 0.4727807641029358, "loss_ce": 2.4393443709413987e-06, "loss_iou": 0.1650390625, "loss_num": 0.0284423828125, "loss_xval": 0.47265625, "num_input_tokens_seen": 270735404, "step": 4318 }, { "epoch": 14.372712146422629, "grad_norm": 10.240601539611816, "learning_rate": 5e-06, "loss": 0.3681, "num_input_tokens_seen": 270797904, "step": 4319 }, { "epoch": 14.372712146422629, "loss": 0.462789386510849, "loss_ce": 2.085066444124095e-05, "loss_iou": 0.181640625, "loss_num": 0.019775390625, "loss_xval": 0.462890625, "num_input_tokens_seen": 270797904, "step": 4319 }, { "epoch": 14.37603993344426, "grad_norm": 22.783924102783203, "learning_rate": 5e-06, "loss": 0.5834, "num_input_tokens_seen": 270859700, "step": 4320 }, { "epoch": 14.37603993344426, "loss": 0.3510909974575043, "loss_ce": 1.678466287557967e-05, "loss_iou": 0.10107421875, "loss_num": 0.02978515625, "loss_xval": 0.3515625, "num_input_tokens_seen": 270859700, "step": 4320 }, { "epoch": 14.37936772046589, "grad_norm": 29.118526458740234, "learning_rate": 5e-06, "loss": 0.5382, "num_input_tokens_seen": 270923544, "step": 4321 }, { "epoch": 14.37936772046589, "loss": 0.45869719982147217, "loss_ce": 1.8010738131124526e-05, "loss_iou": 0.181640625, "loss_num": 0.0191650390625, "loss_xval": 0.458984375, "num_input_tokens_seen": 270923544, "step": 4321 }, { "epoch": 14.38269550748752, "grad_norm": 29.42204475402832, "learning_rate": 5e-06, "loss": 0.5446, "num_input_tokens_seen": 270984668, "step": 4322 }, { "epoch": 14.38269550748752, "loss": 0.6400872468948364, "loss_ce": 7.261104474309832e-05, "loss_iou": 0.2578125, "loss_num": 0.024658203125, "loss_xval": 0.640625, "num_input_tokens_seen": 270984668, "step": 4322 }, { "epoch": 14.386023294509151, "grad_norm": 41.173866271972656, "learning_rate": 5e-06, "loss": 0.5833, "num_input_tokens_seen": 271047724, "step": 4323 }, { "epoch": 14.386023294509151, "loss": 0.5174872875213623, "loss_ce": 7.039316187729128e-07, "loss_iou": 0.224609375, "loss_num": 0.01361083984375, "loss_xval": 0.515625, "num_input_tokens_seen": 271047724, "step": 4323 }, { "epoch": 14.389351081530782, "grad_norm": 50.958621978759766, "learning_rate": 5e-06, "loss": 0.5623, "num_input_tokens_seen": 271110320, "step": 4324 }, { "epoch": 14.389351081530782, "loss": 0.7524846792221069, "loss_ce": 4.325476038502529e-05, "loss_iou": 0.275390625, "loss_num": 0.0400390625, "loss_xval": 0.75390625, "num_input_tokens_seen": 271110320, "step": 4324 }, { "epoch": 14.392678868552412, "grad_norm": 41.847206115722656, "learning_rate": 5e-06, "loss": 0.5509, "num_input_tokens_seen": 271172504, "step": 4325 }, { "epoch": 14.392678868552412, "loss": 0.41565021872520447, "loss_ce": 8.042817967179872e-07, "loss_iou": 0.171875, "loss_num": 0.01446533203125, "loss_xval": 0.416015625, "num_input_tokens_seen": 271172504, "step": 4325 }, { "epoch": 14.396006655574043, "grad_norm": 18.551034927368164, "learning_rate": 5e-06, "loss": 0.4744, "num_input_tokens_seen": 271234596, "step": 4326 }, { "epoch": 14.396006655574043, "loss": 0.49939092993736267, "loss_ce": 1.2767928865287104e-06, "loss_iou": 0.1982421875, "loss_num": 0.0206298828125, "loss_xval": 0.5, "num_input_tokens_seen": 271234596, "step": 4326 }, { "epoch": 14.399334442595674, "grad_norm": 22.15739631652832, "learning_rate": 5e-06, "loss": 0.2915, "num_input_tokens_seen": 271298164, "step": 4327 }, { "epoch": 14.399334442595674, "loss": 0.3210485875606537, "loss_ce": 3.6780625123356003e-06, "loss_iou": 0.134765625, "loss_num": 0.0103759765625, "loss_xval": 0.3203125, "num_input_tokens_seen": 271298164, "step": 4327 }, { "epoch": 14.402662229617304, "grad_norm": 23.303667068481445, "learning_rate": 5e-06, "loss": 0.3749, "num_input_tokens_seen": 271360008, "step": 4328 }, { "epoch": 14.402662229617304, "loss": 0.27130264043807983, "loss_ce": 1.3549370123655535e-06, "loss_iou": 0.09912109375, "loss_num": 0.01458740234375, "loss_xval": 0.271484375, "num_input_tokens_seen": 271360008, "step": 4328 }, { "epoch": 14.405990016638935, "grad_norm": 6.598902702331543, "learning_rate": 5e-06, "loss": 0.3223, "num_input_tokens_seen": 271422892, "step": 4329 }, { "epoch": 14.405990016638935, "loss": 0.26006022095680237, "loss_ce": 3.5205834137741476e-05, "loss_iou": 0.080078125, "loss_num": 0.0198974609375, "loss_xval": 0.259765625, "num_input_tokens_seen": 271422892, "step": 4329 }, { "epoch": 14.409317803660565, "grad_norm": 18.97261619567871, "learning_rate": 5e-06, "loss": 0.4596, "num_input_tokens_seen": 271486188, "step": 4330 }, { "epoch": 14.409317803660565, "loss": 0.5011312365531921, "loss_ce": 0.0003682943352032453, "loss_iou": 0.201171875, "loss_num": 0.01953125, "loss_xval": 0.5, "num_input_tokens_seen": 271486188, "step": 4330 }, { "epoch": 14.412645590682196, "grad_norm": 21.229341506958008, "learning_rate": 5e-06, "loss": 0.6726, "num_input_tokens_seen": 271549248, "step": 4331 }, { "epoch": 14.412645590682196, "loss": 0.5169740319252014, "loss_ce": 6.25379607299692e-06, "loss_iou": 0.220703125, "loss_num": 0.01513671875, "loss_xval": 0.515625, "num_input_tokens_seen": 271549248, "step": 4331 }, { "epoch": 14.415973377703827, "grad_norm": 11.489543914794922, "learning_rate": 5e-06, "loss": 0.3613, "num_input_tokens_seen": 271609704, "step": 4332 }, { "epoch": 14.415973377703827, "loss": 0.4329024851322174, "loss_ce": 4.116824857192114e-05, "loss_iou": 0.1484375, "loss_num": 0.027099609375, "loss_xval": 0.43359375, "num_input_tokens_seen": 271609704, "step": 4332 }, { "epoch": 14.419301164725457, "grad_norm": 15.188780784606934, "learning_rate": 5e-06, "loss": 0.4236, "num_input_tokens_seen": 271673188, "step": 4333 }, { "epoch": 14.419301164725457, "loss": 0.5161935091018677, "loss_ce": 3.9074907363101374e-06, "loss_iou": 0.212890625, "loss_num": 0.017822265625, "loss_xval": 0.515625, "num_input_tokens_seen": 271673188, "step": 4333 }, { "epoch": 14.422628951747088, "grad_norm": 8.45056438446045, "learning_rate": 5e-06, "loss": 0.472, "num_input_tokens_seen": 271735412, "step": 4334 }, { "epoch": 14.422628951747088, "loss": 0.4090895652770996, "loss_ce": 1.4199109727996984e-06, "loss_iou": 0.142578125, "loss_num": 0.0247802734375, "loss_xval": 0.408203125, "num_input_tokens_seen": 271735412, "step": 4334 }, { "epoch": 14.425956738768718, "grad_norm": 8.602866172790527, "learning_rate": 5e-06, "loss": 0.5601, "num_input_tokens_seen": 271797492, "step": 4335 }, { "epoch": 14.425956738768718, "loss": 0.47649964690208435, "loss_ce": 5.9204856370342895e-05, "loss_iou": 0.2041015625, "loss_num": 0.0137939453125, "loss_xval": 0.4765625, "num_input_tokens_seen": 271797492, "step": 4335 }, { "epoch": 14.429284525790349, "grad_norm": 30.725181579589844, "learning_rate": 5e-06, "loss": 0.6086, "num_input_tokens_seen": 271861472, "step": 4336 }, { "epoch": 14.429284525790349, "loss": 0.5170307755470276, "loss_ce": 1.9848894226015545e-06, "loss_iou": 0.20703125, "loss_num": 0.0206298828125, "loss_xval": 0.515625, "num_input_tokens_seen": 271861472, "step": 4336 }, { "epoch": 14.43261231281198, "grad_norm": 32.68539047241211, "learning_rate": 5e-06, "loss": 0.3584, "num_input_tokens_seen": 271923896, "step": 4337 }, { "epoch": 14.43261231281198, "loss": 0.3881847560405731, "loss_ce": 1.1807126156782033e-06, "loss_iou": 0.1513671875, "loss_num": 0.0172119140625, "loss_xval": 0.388671875, "num_input_tokens_seen": 271923896, "step": 4337 }, { "epoch": 14.43594009983361, "grad_norm": 14.838808059692383, "learning_rate": 5e-06, "loss": 0.4595, "num_input_tokens_seen": 271986508, "step": 4338 }, { "epoch": 14.43594009983361, "loss": 0.47802940011024475, "loss_ce": 2.072071310976753e-06, "loss_iou": 0.212890625, "loss_num": 0.010498046875, "loss_xval": 0.478515625, "num_input_tokens_seen": 271986508, "step": 4338 }, { "epoch": 14.43926788685524, "grad_norm": 6.997244834899902, "learning_rate": 5e-06, "loss": 0.3978, "num_input_tokens_seen": 272050356, "step": 4339 }, { "epoch": 14.43926788685524, "loss": 0.31567680835723877, "loss_ce": 2.978351403726265e-06, "loss_iou": 0.1220703125, "loss_num": 0.0142822265625, "loss_xval": 0.31640625, "num_input_tokens_seen": 272050356, "step": 4339 }, { "epoch": 14.442595673876871, "grad_norm": 5.997839450836182, "learning_rate": 5e-06, "loss": 0.4032, "num_input_tokens_seen": 272114048, "step": 4340 }, { "epoch": 14.442595673876871, "loss": 0.3639224171638489, "loss_ce": 3.083857518504374e-05, "loss_iou": 0.1650390625, "loss_num": 0.0069580078125, "loss_xval": 0.36328125, "num_input_tokens_seen": 272114048, "step": 4340 }, { "epoch": 14.445923460898502, "grad_norm": 10.688399314880371, "learning_rate": 5e-06, "loss": 0.3718, "num_input_tokens_seen": 272176000, "step": 4341 }, { "epoch": 14.445923460898502, "loss": 0.361581027507782, "loss_ce": 0.0003444594913162291, "loss_iou": 0.150390625, "loss_num": 0.01220703125, "loss_xval": 0.361328125, "num_input_tokens_seen": 272176000, "step": 4341 }, { "epoch": 14.449251247920133, "grad_norm": 9.768308639526367, "learning_rate": 5e-06, "loss": 0.4968, "num_input_tokens_seen": 272239012, "step": 4342 }, { "epoch": 14.449251247920133, "loss": 0.2733427882194519, "loss_ce": 0.00027151257381774485, "loss_iou": 0.1083984375, "loss_num": 0.01129150390625, "loss_xval": 0.2734375, "num_input_tokens_seen": 272239012, "step": 4342 }, { "epoch": 14.452579034941763, "grad_norm": 11.229026794433594, "learning_rate": 5e-06, "loss": 0.5758, "num_input_tokens_seen": 272302700, "step": 4343 }, { "epoch": 14.452579034941763, "loss": 0.6046149730682373, "loss_ce": 7.196315436885925e-07, "loss_iou": 0.22265625, "loss_num": 0.031982421875, "loss_xval": 0.60546875, "num_input_tokens_seen": 272302700, "step": 4343 }, { "epoch": 14.455906821963394, "grad_norm": 15.154906272888184, "learning_rate": 5e-06, "loss": 0.3665, "num_input_tokens_seen": 272365876, "step": 4344 }, { "epoch": 14.455906821963394, "loss": 0.37094151973724365, "loss_ce": 3.0889288609614596e-05, "loss_iou": 0.1328125, "loss_num": 0.021240234375, "loss_xval": 0.37109375, "num_input_tokens_seen": 272365876, "step": 4344 }, { "epoch": 14.459234608985025, "grad_norm": 12.826123237609863, "learning_rate": 5e-06, "loss": 0.284, "num_input_tokens_seen": 272427404, "step": 4345 }, { "epoch": 14.459234608985025, "loss": 0.21820959448814392, "loss_ce": 0.00010046892566606402, "loss_iou": 0.06591796875, "loss_num": 0.0172119140625, "loss_xval": 0.2177734375, "num_input_tokens_seen": 272427404, "step": 4345 }, { "epoch": 14.462562396006655, "grad_norm": 9.485841751098633, "learning_rate": 5e-06, "loss": 0.2653, "num_input_tokens_seen": 272488872, "step": 4346 }, { "epoch": 14.462562396006655, "loss": 0.10568425804376602, "loss_ce": 1.886727432065527e-06, "loss_iou": 0.014404296875, "loss_num": 0.01531982421875, "loss_xval": 0.10546875, "num_input_tokens_seen": 272488872, "step": 4346 }, { "epoch": 14.465890183028286, "grad_norm": 9.939403533935547, "learning_rate": 5e-06, "loss": 0.4595, "num_input_tokens_seen": 272552184, "step": 4347 }, { "epoch": 14.465890183028286, "loss": 0.4805942177772522, "loss_ce": 3.418450432945974e-06, "loss_iou": 0.154296875, "loss_num": 0.0341796875, "loss_xval": 0.48046875, "num_input_tokens_seen": 272552184, "step": 4347 }, { "epoch": 14.469217970049916, "grad_norm": 19.85611915588379, "learning_rate": 5e-06, "loss": 0.5594, "num_input_tokens_seen": 272615556, "step": 4348 }, { "epoch": 14.469217970049916, "loss": 0.7963935136795044, "loss_ce": 6.8299832491902635e-06, "loss_iou": 0.28515625, "loss_num": 0.04541015625, "loss_xval": 0.796875, "num_input_tokens_seen": 272615556, "step": 4348 }, { "epoch": 14.472545757071547, "grad_norm": 8.625822067260742, "learning_rate": 5e-06, "loss": 0.4593, "num_input_tokens_seen": 272678460, "step": 4349 }, { "epoch": 14.472545757071547, "loss": 0.314820259809494, "loss_ce": 9.382747521158308e-07, "loss_iou": 0.130859375, "loss_num": 0.0107421875, "loss_xval": 0.314453125, "num_input_tokens_seen": 272678460, "step": 4349 }, { "epoch": 14.475873544093178, "grad_norm": 15.050008773803711, "learning_rate": 5e-06, "loss": 0.3757, "num_input_tokens_seen": 272741176, "step": 4350 }, { "epoch": 14.475873544093178, "loss": 0.28978803753852844, "loss_ce": 8.341184184246231e-06, "loss_iou": 0.09912109375, "loss_num": 0.018310546875, "loss_xval": 0.2890625, "num_input_tokens_seen": 272741176, "step": 4350 }, { "epoch": 14.479201331114808, "grad_norm": 16.437021255493164, "learning_rate": 5e-06, "loss": 0.3589, "num_input_tokens_seen": 272802612, "step": 4351 }, { "epoch": 14.479201331114808, "loss": 0.48044174909591675, "loss_ce": 3.554901013558265e-06, "loss_iou": 0.181640625, "loss_num": 0.0233154296875, "loss_xval": 0.48046875, "num_input_tokens_seen": 272802612, "step": 4351 }, { "epoch": 14.482529118136439, "grad_norm": 9.587303161621094, "learning_rate": 5e-06, "loss": 0.5375, "num_input_tokens_seen": 272866252, "step": 4352 }, { "epoch": 14.482529118136439, "loss": 0.2768338918685913, "loss_ce": 3.9485781599069014e-05, "loss_iou": 0.11279296875, "loss_num": 0.01019287109375, "loss_xval": 0.27734375, "num_input_tokens_seen": 272866252, "step": 4352 }, { "epoch": 14.48585690515807, "grad_norm": 22.50981330871582, "learning_rate": 5e-06, "loss": 0.5442, "num_input_tokens_seen": 272928464, "step": 4353 }, { "epoch": 14.48585690515807, "loss": 0.48803824186325073, "loss_ce": 1.1729946436389582e-06, "loss_iou": 0.171875, "loss_num": 0.0289306640625, "loss_xval": 0.48828125, "num_input_tokens_seen": 272928464, "step": 4353 }, { "epoch": 14.4891846921797, "grad_norm": 37.163238525390625, "learning_rate": 5e-06, "loss": 0.6032, "num_input_tokens_seen": 272991424, "step": 4354 }, { "epoch": 14.4891846921797, "loss": 0.31952035427093506, "loss_ce": 1.3033056802669307e-06, "loss_iou": 0.0966796875, "loss_num": 0.025146484375, "loss_xval": 0.3203125, "num_input_tokens_seen": 272991424, "step": 4354 }, { "epoch": 14.49251247920133, "grad_norm": 33.00027847290039, "learning_rate": 5e-06, "loss": 0.766, "num_input_tokens_seen": 273055964, "step": 4355 }, { "epoch": 14.49251247920133, "loss": 0.9118112325668335, "loss_ce": 0.0006784539436921477, "loss_iou": 0.373046875, "loss_num": 0.032958984375, "loss_xval": 0.91015625, "num_input_tokens_seen": 273055964, "step": 4355 }, { "epoch": 14.495840266222961, "grad_norm": 43.3911247253418, "learning_rate": 5e-06, "loss": 0.4298, "num_input_tokens_seen": 273118000, "step": 4356 }, { "epoch": 14.495840266222961, "loss": 0.3955764174461365, "loss_ce": 7.564320185338147e-06, "loss_iou": 0.1435546875, "loss_num": 0.021728515625, "loss_xval": 0.396484375, "num_input_tokens_seen": 273118000, "step": 4356 }, { "epoch": 14.499168053244592, "grad_norm": 33.27085494995117, "learning_rate": 5e-06, "loss": 0.3616, "num_input_tokens_seen": 273180772, "step": 4357 }, { "epoch": 14.499168053244592, "loss": 0.4363442659378052, "loss_ce": 3.9165465750556905e-06, "loss_iou": 0.1923828125, "loss_num": 0.01019287109375, "loss_xval": 0.435546875, "num_input_tokens_seen": 273180772, "step": 4357 }, { "epoch": 14.502495840266223, "grad_norm": 9.424775123596191, "learning_rate": 5e-06, "loss": 0.391, "num_input_tokens_seen": 273243676, "step": 4358 }, { "epoch": 14.502495840266223, "loss": 0.45331889390945435, "loss_ce": 0.0018113128608092666, "loss_iou": 0.1796875, "loss_num": 0.0181884765625, "loss_xval": 0.451171875, "num_input_tokens_seen": 273243676, "step": 4358 }, { "epoch": 14.505823627287853, "grad_norm": 5.0066423416137695, "learning_rate": 5e-06, "loss": 0.5892, "num_input_tokens_seen": 273307944, "step": 4359 }, { "epoch": 14.505823627287853, "loss": 0.5397607088088989, "loss_ce": 5.736624734709039e-05, "loss_iou": 0.197265625, "loss_num": 0.029052734375, "loss_xval": 0.5390625, "num_input_tokens_seen": 273307944, "step": 4359 }, { "epoch": 14.509151414309484, "grad_norm": 10.695034980773926, "learning_rate": 5e-06, "loss": 0.478, "num_input_tokens_seen": 273371848, "step": 4360 }, { "epoch": 14.509151414309484, "loss": 0.36120718717575073, "loss_ce": 1.1105450994364219e-06, "loss_iou": 0.1318359375, "loss_num": 0.0196533203125, "loss_xval": 0.361328125, "num_input_tokens_seen": 273371848, "step": 4360 }, { "epoch": 14.512479201331114, "grad_norm": 9.637301445007324, "learning_rate": 5e-06, "loss": 0.378, "num_input_tokens_seen": 273434568, "step": 4361 }, { "epoch": 14.512479201331114, "loss": 0.5061667561531067, "loss_ce": 2.245663608846371e-06, "loss_iou": 0.19921875, "loss_num": 0.021240234375, "loss_xval": 0.5078125, "num_input_tokens_seen": 273434568, "step": 4361 }, { "epoch": 14.515806988352745, "grad_norm": 8.061670303344727, "learning_rate": 5e-06, "loss": 0.4395, "num_input_tokens_seen": 273497388, "step": 4362 }, { "epoch": 14.515806988352745, "loss": 0.4327434003353119, "loss_ce": 4.148275365878362e-06, "loss_iou": 0.17578125, "loss_num": 0.0162353515625, "loss_xval": 0.43359375, "num_input_tokens_seen": 273497388, "step": 4362 }, { "epoch": 14.519134775374376, "grad_norm": 10.086694717407227, "learning_rate": 5e-06, "loss": 0.4946, "num_input_tokens_seen": 273559452, "step": 4363 }, { "epoch": 14.519134775374376, "loss": 0.6074405908584595, "loss_ce": 1.8725337213254534e-05, "loss_iou": 0.259765625, "loss_num": 0.0174560546875, "loss_xval": 0.609375, "num_input_tokens_seen": 273559452, "step": 4363 }, { "epoch": 14.522462562396006, "grad_norm": 11.902276992797852, "learning_rate": 5e-06, "loss": 0.3594, "num_input_tokens_seen": 273622616, "step": 4364 }, { "epoch": 14.522462562396006, "loss": 0.2928107976913452, "loss_ce": 0.00014723424101248384, "loss_iou": 0.1005859375, "loss_num": 0.018310546875, "loss_xval": 0.29296875, "num_input_tokens_seen": 273622616, "step": 4364 }, { "epoch": 14.525790349417637, "grad_norm": 6.261537075042725, "learning_rate": 5e-06, "loss": 0.5077, "num_input_tokens_seen": 273685212, "step": 4365 }, { "epoch": 14.525790349417637, "loss": 0.5952982902526855, "loss_ce": 2.24269533646293e-05, "loss_iou": 0.2158203125, "loss_num": 0.032958984375, "loss_xval": 0.59375, "num_input_tokens_seen": 273685212, "step": 4365 }, { "epoch": 14.529118136439267, "grad_norm": 7.1007280349731445, "learning_rate": 5e-06, "loss": 0.4281, "num_input_tokens_seen": 273746964, "step": 4366 }, { "epoch": 14.529118136439267, "loss": 0.3156786262989044, "loss_ce": 4.7886323955026455e-06, "loss_iou": 0.11865234375, "loss_num": 0.0157470703125, "loss_xval": 0.31640625, "num_input_tokens_seen": 273746964, "step": 4366 }, { "epoch": 14.532445923460898, "grad_norm": 7.422349452972412, "learning_rate": 5e-06, "loss": 0.3488, "num_input_tokens_seen": 273809060, "step": 4367 }, { "epoch": 14.532445923460898, "loss": 0.4381692409515381, "loss_ce": 0.0012338121887296438, "loss_iou": 0.169921875, "loss_num": 0.019287109375, "loss_xval": 0.4375, "num_input_tokens_seen": 273809060, "step": 4367 }, { "epoch": 14.535773710482529, "grad_norm": 11.709922790527344, "learning_rate": 5e-06, "loss": 0.312, "num_input_tokens_seen": 273871576, "step": 4368 }, { "epoch": 14.535773710482529, "loss": 0.2994101047515869, "loss_ce": 2.146141014236491e-06, "loss_iou": 0.10791015625, "loss_num": 0.016845703125, "loss_xval": 0.298828125, "num_input_tokens_seen": 273871576, "step": 4368 }, { "epoch": 14.53910149750416, "grad_norm": 6.6025214195251465, "learning_rate": 5e-06, "loss": 0.3386, "num_input_tokens_seen": 273933160, "step": 4369 }, { "epoch": 14.53910149750416, "loss": 0.4512343406677246, "loss_ce": 1.4504918226521113e-06, "loss_iou": 0.1904296875, "loss_num": 0.013916015625, "loss_xval": 0.451171875, "num_input_tokens_seen": 273933160, "step": 4369 }, { "epoch": 14.54242928452579, "grad_norm": 14.495214462280273, "learning_rate": 5e-06, "loss": 0.5121, "num_input_tokens_seen": 273996636, "step": 4370 }, { "epoch": 14.54242928452579, "loss": 0.5918031930923462, "loss_ce": 6.2615404203825165e-06, "loss_iou": 0.220703125, "loss_num": 0.0299072265625, "loss_xval": 0.59375, "num_input_tokens_seen": 273996636, "step": 4370 }, { "epoch": 14.54575707154742, "grad_norm": 12.173548698425293, "learning_rate": 5e-06, "loss": 0.2506, "num_input_tokens_seen": 274057724, "step": 4371 }, { "epoch": 14.54575707154742, "loss": 0.16007372736930847, "loss_ce": 1.4072350040805759e-06, "loss_iou": 0.0615234375, "loss_num": 0.007415771484375, "loss_xval": 0.16015625, "num_input_tokens_seen": 274057724, "step": 4371 }, { "epoch": 14.549084858569051, "grad_norm": 18.637659072875977, "learning_rate": 5e-06, "loss": 0.443, "num_input_tokens_seen": 274121520, "step": 4372 }, { "epoch": 14.549084858569051, "loss": 0.49097123742103577, "loss_ce": 4.4580738176591694e-06, "loss_iou": 0.19140625, "loss_num": 0.021484375, "loss_xval": 0.490234375, "num_input_tokens_seen": 274121520, "step": 4372 }, { "epoch": 14.552412645590682, "grad_norm": 38.073978424072266, "learning_rate": 5e-06, "loss": 0.3426, "num_input_tokens_seen": 274184488, "step": 4373 }, { "epoch": 14.552412645590682, "loss": 0.3245964050292969, "loss_ce": 1.1441736205597408e-05, "loss_iou": 0.12109375, "loss_num": 0.0166015625, "loss_xval": 0.32421875, "num_input_tokens_seen": 274184488, "step": 4373 }, { "epoch": 14.555740432612312, "grad_norm": 42.6907958984375, "learning_rate": 5e-06, "loss": 0.6015, "num_input_tokens_seen": 274247636, "step": 4374 }, { "epoch": 14.555740432612312, "loss": 0.4598396420478821, "loss_ce": 7.756156037430628e-07, "loss_iou": 0.19921875, "loss_num": 0.012451171875, "loss_xval": 0.458984375, "num_input_tokens_seen": 274247636, "step": 4374 }, { "epoch": 14.559068219633943, "grad_norm": 19.633350372314453, "learning_rate": 5e-06, "loss": 0.3232, "num_input_tokens_seen": 274308388, "step": 4375 }, { "epoch": 14.559068219633943, "loss": 0.27501180768013, "loss_ce": 2.658601715666009e-06, "loss_iou": 0.107421875, "loss_num": 0.01202392578125, "loss_xval": 0.275390625, "num_input_tokens_seen": 274308388, "step": 4375 }, { "epoch": 14.562396006655574, "grad_norm": 13.447009086608887, "learning_rate": 5e-06, "loss": 0.4277, "num_input_tokens_seen": 274370184, "step": 4376 }, { "epoch": 14.562396006655574, "loss": 0.1970936357975006, "loss_ce": 1.1112778338429052e-05, "loss_iou": 0.04443359375, "loss_num": 0.021484375, "loss_xval": 0.197265625, "num_input_tokens_seen": 274370184, "step": 4376 }, { "epoch": 14.565723793677204, "grad_norm": 10.919513702392578, "learning_rate": 5e-06, "loss": 0.5617, "num_input_tokens_seen": 274434368, "step": 4377 }, { "epoch": 14.565723793677204, "loss": 0.33081164956092834, "loss_ce": 1.1043581480407738e-06, "loss_iou": 0.1376953125, "loss_num": 0.01123046875, "loss_xval": 0.330078125, "num_input_tokens_seen": 274434368, "step": 4377 }, { "epoch": 14.569051580698835, "grad_norm": 10.040543556213379, "learning_rate": 5e-06, "loss": 0.2827, "num_input_tokens_seen": 274494472, "step": 4378 }, { "epoch": 14.569051580698835, "loss": 0.2492895871400833, "loss_ce": 2.2004356651450507e-05, "loss_iou": 0.078125, "loss_num": 0.0186767578125, "loss_xval": 0.2490234375, "num_input_tokens_seen": 274494472, "step": 4378 }, { "epoch": 14.572379367720465, "grad_norm": 27.83384895324707, "learning_rate": 5e-06, "loss": 0.4136, "num_input_tokens_seen": 274556860, "step": 4379 }, { "epoch": 14.572379367720465, "loss": 0.41305702924728394, "loss_ce": 1.6203562154259998e-06, "loss_iou": 0.177734375, "loss_num": 0.01129150390625, "loss_xval": 0.412109375, "num_input_tokens_seen": 274556860, "step": 4379 }, { "epoch": 14.575707154742096, "grad_norm": 22.169631958007812, "learning_rate": 5e-06, "loss": 0.4914, "num_input_tokens_seen": 274619900, "step": 4380 }, { "epoch": 14.575707154742096, "loss": 0.2503684461116791, "loss_ce": 2.2439278382080374e-06, "loss_iou": 0.09814453125, "loss_num": 0.01092529296875, "loss_xval": 0.25, "num_input_tokens_seen": 274619900, "step": 4380 }, { "epoch": 14.579034941763727, "grad_norm": 8.153644561767578, "learning_rate": 5e-06, "loss": 0.3185, "num_input_tokens_seen": 274682680, "step": 4381 }, { "epoch": 14.579034941763727, "loss": 0.2596004605293274, "loss_ce": 1.7941856640391052e-05, "loss_iou": 0.0849609375, "loss_num": 0.0179443359375, "loss_xval": 0.259765625, "num_input_tokens_seen": 274682680, "step": 4381 }, { "epoch": 14.582362728785357, "grad_norm": 16.57394027709961, "learning_rate": 5e-06, "loss": 0.603, "num_input_tokens_seen": 274744412, "step": 4382 }, { "epoch": 14.582362728785357, "loss": 0.5609146356582642, "loss_ce": 1.5434380884471466e-06, "loss_iou": 0.2314453125, "loss_num": 0.01953125, "loss_xval": 0.5625, "num_input_tokens_seen": 274744412, "step": 4382 }, { "epoch": 14.585690515806988, "grad_norm": 21.537227630615234, "learning_rate": 5e-06, "loss": 0.5298, "num_input_tokens_seen": 274805616, "step": 4383 }, { "epoch": 14.585690515806988, "loss": 0.4543471932411194, "loss_ce": 1.514305608907307e-06, "loss_iou": 0.1689453125, "loss_num": 0.0233154296875, "loss_xval": 0.455078125, "num_input_tokens_seen": 274805616, "step": 4383 }, { "epoch": 14.589018302828618, "grad_norm": 7.3515424728393555, "learning_rate": 5e-06, "loss": 0.7048, "num_input_tokens_seen": 274868936, "step": 4384 }, { "epoch": 14.589018302828618, "loss": 0.48816046118736267, "loss_ce": 1.2784499858753406e-06, "loss_iou": 0.1767578125, "loss_num": 0.027099609375, "loss_xval": 0.48828125, "num_input_tokens_seen": 274868936, "step": 4384 }, { "epoch": 14.592346089850249, "grad_norm": 14.305977821350098, "learning_rate": 5e-06, "loss": 0.3762, "num_input_tokens_seen": 274930836, "step": 4385 }, { "epoch": 14.592346089850249, "loss": 0.5598316788673401, "loss_ce": 1.97461963580281e-06, "loss_iou": 0.19140625, "loss_num": 0.03564453125, "loss_xval": 0.55859375, "num_input_tokens_seen": 274930836, "step": 4385 }, { "epoch": 14.59567387687188, "grad_norm": 21.381214141845703, "learning_rate": 5e-06, "loss": 0.397, "num_input_tokens_seen": 274993880, "step": 4386 }, { "epoch": 14.59567387687188, "loss": 0.45130422711372375, "loss_ce": 1.0301153452019207e-05, "loss_iou": 0.1708984375, "loss_num": 0.021728515625, "loss_xval": 0.451171875, "num_input_tokens_seen": 274993880, "step": 4386 }, { "epoch": 14.59900166389351, "grad_norm": 18.444046020507812, "learning_rate": 5e-06, "loss": 0.4304, "num_input_tokens_seen": 275056964, "step": 4387 }, { "epoch": 14.59900166389351, "loss": 0.4404313862323761, "loss_ce": 1.6857790114954696e-06, "loss_iou": 0.189453125, "loss_num": 0.01220703125, "loss_xval": 0.44140625, "num_input_tokens_seen": 275056964, "step": 4387 }, { "epoch": 14.602329450915141, "grad_norm": 6.5251007080078125, "learning_rate": 5e-06, "loss": 0.2722, "num_input_tokens_seen": 275120568, "step": 4388 }, { "epoch": 14.602329450915141, "loss": 0.3375011384487152, "loss_ce": 3.7757785321446136e-05, "loss_iou": 0.1162109375, "loss_num": 0.02099609375, "loss_xval": 0.337890625, "num_input_tokens_seen": 275120568, "step": 4388 }, { "epoch": 14.605657237936772, "grad_norm": 27.045808792114258, "learning_rate": 5e-06, "loss": 0.3273, "num_input_tokens_seen": 275183840, "step": 4389 }, { "epoch": 14.605657237936772, "loss": 0.3381637632846832, "loss_ce": 2.897229205700569e-05, "loss_iou": 0.1455078125, "loss_num": 0.00958251953125, "loss_xval": 0.337890625, "num_input_tokens_seen": 275183840, "step": 4389 }, { "epoch": 14.608985024958402, "grad_norm": 54.29413986206055, "learning_rate": 5e-06, "loss": 0.6357, "num_input_tokens_seen": 275247028, "step": 4390 }, { "epoch": 14.608985024958402, "loss": 0.6120638847351074, "loss_ce": 3.3430821986257797e-06, "loss_iou": 0.2451171875, "loss_num": 0.024658203125, "loss_xval": 0.61328125, "num_input_tokens_seen": 275247028, "step": 4390 }, { "epoch": 14.612312811980033, "grad_norm": 38.098731994628906, "learning_rate": 5e-06, "loss": 0.5057, "num_input_tokens_seen": 275309580, "step": 4391 }, { "epoch": 14.612312811980033, "loss": 0.5858173966407776, "loss_ce": 1.967508524103323e-06, "loss_iou": 0.2265625, "loss_num": 0.0263671875, "loss_xval": 0.5859375, "num_input_tokens_seen": 275309580, "step": 4391 }, { "epoch": 14.615640599001663, "grad_norm": 20.856224060058594, "learning_rate": 5e-06, "loss": 0.5195, "num_input_tokens_seen": 275371848, "step": 4392 }, { "epoch": 14.615640599001663, "loss": 0.7561754584312439, "loss_ce": 1.0906262104981579e-05, "loss_iou": 0.29296875, "loss_num": 0.034423828125, "loss_xval": 0.7578125, "num_input_tokens_seen": 275371848, "step": 4392 }, { "epoch": 14.618968386023294, "grad_norm": 27.64605140686035, "learning_rate": 5e-06, "loss": 0.4327, "num_input_tokens_seen": 275433892, "step": 4393 }, { "epoch": 14.618968386023294, "loss": 0.35073214769363403, "loss_ce": 2.4142282200045884e-05, "loss_iou": 0.1416015625, "loss_num": 0.013427734375, "loss_xval": 0.3515625, "num_input_tokens_seen": 275433892, "step": 4393 }, { "epoch": 14.622296173044925, "grad_norm": 27.821861267089844, "learning_rate": 5e-06, "loss": 0.4986, "num_input_tokens_seen": 275497624, "step": 4394 }, { "epoch": 14.622296173044925, "loss": 0.42669743299484253, "loss_ce": 6.818207793912734e-07, "loss_iou": 0.181640625, "loss_num": 0.0126953125, "loss_xval": 0.42578125, "num_input_tokens_seen": 275497624, "step": 4394 }, { "epoch": 14.625623960066555, "grad_norm": 20.461109161376953, "learning_rate": 5e-06, "loss": 0.4985, "num_input_tokens_seen": 275561224, "step": 4395 }, { "epoch": 14.625623960066555, "loss": 0.40393775701522827, "loss_ce": 0.00012915796833112836, "loss_iou": 0.1689453125, "loss_num": 0.01318359375, "loss_xval": 0.404296875, "num_input_tokens_seen": 275561224, "step": 4395 }, { "epoch": 14.628951747088186, "grad_norm": 16.011869430541992, "learning_rate": 5e-06, "loss": 0.4856, "num_input_tokens_seen": 275622972, "step": 4396 }, { "epoch": 14.628951747088186, "loss": 0.46608757972717285, "loss_ce": 2.3106400476535782e-05, "loss_iou": 0.1865234375, "loss_num": 0.0185546875, "loss_xval": 0.466796875, "num_input_tokens_seen": 275622972, "step": 4396 }, { "epoch": 14.632279534109816, "grad_norm": 21.659160614013672, "learning_rate": 5e-06, "loss": 0.4345, "num_input_tokens_seen": 275685624, "step": 4397 }, { "epoch": 14.632279534109816, "loss": 0.4100455343723297, "loss_ce": 1.1364645615685731e-05, "loss_iou": 0.177734375, "loss_num": 0.0106201171875, "loss_xval": 0.41015625, "num_input_tokens_seen": 275685624, "step": 4397 }, { "epoch": 14.635607321131447, "grad_norm": 17.635883331298828, "learning_rate": 5e-06, "loss": 0.5399, "num_input_tokens_seen": 275747776, "step": 4398 }, { "epoch": 14.635607321131447, "loss": 0.4549591541290283, "loss_ce": 3.1043612125358777e-06, "loss_iou": 0.1455078125, "loss_num": 0.032958984375, "loss_xval": 0.455078125, "num_input_tokens_seen": 275747776, "step": 4398 }, { "epoch": 14.638935108153078, "grad_norm": 14.505583763122559, "learning_rate": 5e-06, "loss": 0.534, "num_input_tokens_seen": 275812144, "step": 4399 }, { "epoch": 14.638935108153078, "loss": 0.40833133459091187, "loss_ce": 6.136205229267944e-06, "loss_iou": 0.150390625, "loss_num": 0.021484375, "loss_xval": 0.408203125, "num_input_tokens_seen": 275812144, "step": 4399 }, { "epoch": 14.642262895174708, "grad_norm": 8.947648048400879, "learning_rate": 5e-06, "loss": 0.2846, "num_input_tokens_seen": 275873740, "step": 4400 }, { "epoch": 14.642262895174708, "loss": 0.2559526860713959, "loss_ce": 1.7654678003964364e-06, "loss_iou": 0.07275390625, "loss_num": 0.0220947265625, "loss_xval": 0.255859375, "num_input_tokens_seen": 275873740, "step": 4400 }, { "epoch": 14.645590682196339, "grad_norm": 18.310165405273438, "learning_rate": 5e-06, "loss": 0.5489, "num_input_tokens_seen": 275936608, "step": 4401 }, { "epoch": 14.645590682196339, "loss": 0.5800797343254089, "loss_ce": 1.6049342548285495e-06, "loss_iou": 0.24609375, "loss_num": 0.0177001953125, "loss_xval": 0.578125, "num_input_tokens_seen": 275936608, "step": 4401 }, { "epoch": 14.64891846921797, "grad_norm": 14.345108032226562, "learning_rate": 5e-06, "loss": 0.4478, "num_input_tokens_seen": 275997984, "step": 4402 }, { "epoch": 14.64891846921797, "loss": 0.32785969972610474, "loss_ce": 9.379607035953086e-06, "loss_iou": 0.0673828125, "loss_num": 0.03857421875, "loss_xval": 0.328125, "num_input_tokens_seen": 275997984, "step": 4402 }, { "epoch": 14.6522462562396, "grad_norm": 22.79925537109375, "learning_rate": 5e-06, "loss": 0.5843, "num_input_tokens_seen": 276062036, "step": 4403 }, { "epoch": 14.6522462562396, "loss": 0.6886192560195923, "loss_ce": 2.0628940546885133e-05, "loss_iou": 0.291015625, "loss_num": 0.021484375, "loss_xval": 0.6875, "num_input_tokens_seen": 276062036, "step": 4403 }, { "epoch": 14.65557404326123, "grad_norm": 32.61845779418945, "learning_rate": 5e-06, "loss": 0.4152, "num_input_tokens_seen": 276124108, "step": 4404 }, { "epoch": 14.65557404326123, "loss": 0.49342429637908936, "loss_ce": 1.609901119081769e-05, "loss_iou": 0.1923828125, "loss_num": 0.0216064453125, "loss_xval": 0.494140625, "num_input_tokens_seen": 276124108, "step": 4404 }, { "epoch": 14.658901830282861, "grad_norm": 23.477413177490234, "learning_rate": 5e-06, "loss": 0.3663, "num_input_tokens_seen": 276188052, "step": 4405 }, { "epoch": 14.658901830282861, "loss": 0.3542737662792206, "loss_ce": 2.570570541138295e-05, "loss_iou": 0.142578125, "loss_num": 0.01397705078125, "loss_xval": 0.353515625, "num_input_tokens_seen": 276188052, "step": 4405 }, { "epoch": 14.662229617304492, "grad_norm": 15.846478462219238, "learning_rate": 5e-06, "loss": 0.4701, "num_input_tokens_seen": 276251228, "step": 4406 }, { "epoch": 14.662229617304492, "loss": 0.3865387439727783, "loss_ce": 3.0987166610429995e-06, "loss_iou": 0.158203125, "loss_num": 0.01397705078125, "loss_xval": 0.38671875, "num_input_tokens_seen": 276251228, "step": 4406 }, { "epoch": 14.665557404326123, "grad_norm": 14.246919631958008, "learning_rate": 5e-06, "loss": 0.4592, "num_input_tokens_seen": 276314220, "step": 4407 }, { "epoch": 14.665557404326123, "loss": 0.3571954667568207, "loss_ce": 7.877952884882689e-05, "loss_iou": 0.14453125, "loss_num": 0.0137939453125, "loss_xval": 0.357421875, "num_input_tokens_seen": 276314220, "step": 4407 }, { "epoch": 14.668885191347753, "grad_norm": 7.656552314758301, "learning_rate": 5e-06, "loss": 0.4491, "num_input_tokens_seen": 276377712, "step": 4408 }, { "epoch": 14.668885191347753, "loss": 0.4771760404109955, "loss_ce": 3.168236389683443e-06, "loss_iou": 0.16796875, "loss_num": 0.0284423828125, "loss_xval": 0.4765625, "num_input_tokens_seen": 276377712, "step": 4408 }, { "epoch": 14.672212978369384, "grad_norm": 7.5447282791137695, "learning_rate": 5e-06, "loss": 0.2366, "num_input_tokens_seen": 276439124, "step": 4409 }, { "epoch": 14.672212978369384, "loss": 0.24718277156352997, "loss_ce": 5.655683253280586e-06, "loss_iou": 0.08154296875, "loss_num": 0.016845703125, "loss_xval": 0.2470703125, "num_input_tokens_seen": 276439124, "step": 4409 }, { "epoch": 14.675540765391014, "grad_norm": 9.480350494384766, "learning_rate": 5e-06, "loss": 0.3632, "num_input_tokens_seen": 276502176, "step": 4410 }, { "epoch": 14.675540765391014, "loss": 0.42162182927131653, "loss_ce": 5.2011651860084385e-05, "loss_iou": 0.1552734375, "loss_num": 0.0220947265625, "loss_xval": 0.421875, "num_input_tokens_seen": 276502176, "step": 4410 }, { "epoch": 14.678868552412645, "grad_norm": 21.061677932739258, "learning_rate": 5e-06, "loss": 0.6245, "num_input_tokens_seen": 276566000, "step": 4411 }, { "epoch": 14.678868552412645, "loss": 0.64813232421875, "loss_ce": 6.105707871029153e-05, "loss_iou": 0.25, "loss_num": 0.029296875, "loss_xval": 0.6484375, "num_input_tokens_seen": 276566000, "step": 4411 }, { "epoch": 14.682196339434276, "grad_norm": 33.53836441040039, "learning_rate": 5e-06, "loss": 0.6278, "num_input_tokens_seen": 276629836, "step": 4412 }, { "epoch": 14.682196339434276, "loss": 0.6157275438308716, "loss_ce": 4.924499080516398e-06, "loss_iou": 0.244140625, "loss_num": 0.025634765625, "loss_xval": 0.6171875, "num_input_tokens_seen": 276629836, "step": 4412 }, { "epoch": 14.685524126455906, "grad_norm": 30.444976806640625, "learning_rate": 5e-06, "loss": 0.5946, "num_input_tokens_seen": 276692708, "step": 4413 }, { "epoch": 14.685524126455906, "loss": 0.6987326145172119, "loss_ce": 2.1382231807365315e-06, "loss_iou": 0.26953125, "loss_num": 0.031982421875, "loss_xval": 0.69921875, "num_input_tokens_seen": 276692708, "step": 4413 }, { "epoch": 14.688851913477537, "grad_norm": 26.877092361450195, "learning_rate": 5e-06, "loss": 0.4967, "num_input_tokens_seen": 276755200, "step": 4414 }, { "epoch": 14.688851913477537, "loss": 0.5265551209449768, "loss_ce": 1.243820042873267e-05, "loss_iou": 0.17578125, "loss_num": 0.03466796875, "loss_xval": 0.52734375, "num_input_tokens_seen": 276755200, "step": 4414 }, { "epoch": 14.692179700499167, "grad_norm": 6.068382740020752, "learning_rate": 5e-06, "loss": 0.2737, "num_input_tokens_seen": 276817072, "step": 4415 }, { "epoch": 14.692179700499167, "loss": 0.1558845192193985, "loss_ce": 7.25134270851413e-07, "loss_iou": 0.047119140625, "loss_num": 0.01239013671875, "loss_xval": 0.15625, "num_input_tokens_seen": 276817072, "step": 4415 }, { "epoch": 14.695507487520798, "grad_norm": 14.823811531066895, "learning_rate": 5e-06, "loss": 0.4902, "num_input_tokens_seen": 276878868, "step": 4416 }, { "epoch": 14.695507487520798, "loss": 0.6711479425430298, "loss_ce": 5.41061490366701e-06, "loss_iou": 0.25390625, "loss_num": 0.032958984375, "loss_xval": 0.671875, "num_input_tokens_seen": 276878868, "step": 4416 }, { "epoch": 14.698835274542429, "grad_norm": 7.799158573150635, "learning_rate": 5e-06, "loss": 0.2248, "num_input_tokens_seen": 276941472, "step": 4417 }, { "epoch": 14.698835274542429, "loss": 0.11747036874294281, "loss_ce": 8.216852620535064e-06, "loss_iou": 0.0286865234375, "loss_num": 0.01202392578125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 276941472, "step": 4417 }, { "epoch": 14.70216306156406, "grad_norm": 10.297386169433594, "learning_rate": 5e-06, "loss": 0.4004, "num_input_tokens_seen": 277003636, "step": 4418 }, { "epoch": 14.70216306156406, "loss": 0.5107603073120117, "loss_ce": 1.8109509255737066e-05, "loss_iou": 0.1923828125, "loss_num": 0.025146484375, "loss_xval": 0.51171875, "num_input_tokens_seen": 277003636, "step": 4418 }, { "epoch": 14.70549084858569, "grad_norm": 8.654190063476562, "learning_rate": 5e-06, "loss": 0.4347, "num_input_tokens_seen": 277065768, "step": 4419 }, { "epoch": 14.70549084858569, "loss": 0.4387019872665405, "loss_ce": 0.0001643894356675446, "loss_iou": 0.1298828125, "loss_num": 0.03564453125, "loss_xval": 0.439453125, "num_input_tokens_seen": 277065768, "step": 4419 }, { "epoch": 14.70881863560732, "grad_norm": 10.706563949584961, "learning_rate": 5e-06, "loss": 0.5598, "num_input_tokens_seen": 277128892, "step": 4420 }, { "epoch": 14.70881863560732, "loss": 0.729148805141449, "loss_ce": 2.2828700821264647e-05, "loss_iou": 0.287109375, "loss_num": 0.0303955078125, "loss_xval": 0.73046875, "num_input_tokens_seen": 277128892, "step": 4420 }, { "epoch": 14.712146422628951, "grad_norm": 17.38576316833496, "learning_rate": 5e-06, "loss": 0.6229, "num_input_tokens_seen": 277192232, "step": 4421 }, { "epoch": 14.712146422628951, "loss": 0.7527759075164795, "loss_ce": 0.00015142618212848902, "loss_iou": 0.296875, "loss_num": 0.0322265625, "loss_xval": 0.75390625, "num_input_tokens_seen": 277192232, "step": 4421 }, { "epoch": 14.715474209650582, "grad_norm": 28.71639633178711, "learning_rate": 5e-06, "loss": 0.5754, "num_input_tokens_seen": 277255280, "step": 4422 }, { "epoch": 14.715474209650582, "loss": 0.5349102020263672, "loss_ce": 5.9154870541533455e-05, "loss_iou": 0.224609375, "loss_num": 0.0169677734375, "loss_xval": 0.53515625, "num_input_tokens_seen": 277255280, "step": 4422 }, { "epoch": 14.718801996672212, "grad_norm": 24.136993408203125, "learning_rate": 5e-06, "loss": 0.5413, "num_input_tokens_seen": 277317316, "step": 4423 }, { "epoch": 14.718801996672212, "loss": 0.5849724411964417, "loss_ce": 1.149810486822389e-05, "loss_iou": 0.234375, "loss_num": 0.0233154296875, "loss_xval": 0.5859375, "num_input_tokens_seen": 277317316, "step": 4423 }, { "epoch": 14.722129783693843, "grad_norm": 15.024176597595215, "learning_rate": 5e-06, "loss": 0.3382, "num_input_tokens_seen": 277379284, "step": 4424 }, { "epoch": 14.722129783693843, "loss": 0.32514023780822754, "loss_ce": 5.972583039692836e-06, "loss_iou": 0.119140625, "loss_num": 0.0172119140625, "loss_xval": 0.32421875, "num_input_tokens_seen": 277379284, "step": 4424 }, { "epoch": 14.725457570715474, "grad_norm": 28.920265197753906, "learning_rate": 5e-06, "loss": 0.4844, "num_input_tokens_seen": 277442384, "step": 4425 }, { "epoch": 14.725457570715474, "loss": 0.4024553894996643, "loss_ce": 2.0090061298105866e-05, "loss_iou": 0.158203125, "loss_num": 0.01708984375, "loss_xval": 0.40234375, "num_input_tokens_seen": 277442384, "step": 4425 }, { "epoch": 14.728785357737104, "grad_norm": 31.71909523010254, "learning_rate": 5e-06, "loss": 0.6409, "num_input_tokens_seen": 277504344, "step": 4426 }, { "epoch": 14.728785357737104, "loss": 0.5945351719856262, "loss_ce": 6.974718417041004e-06, "loss_iou": 0.2265625, "loss_num": 0.0279541015625, "loss_xval": 0.59375, "num_input_tokens_seen": 277504344, "step": 4426 }, { "epoch": 14.732113144758735, "grad_norm": 39.308589935302734, "learning_rate": 5e-06, "loss": 0.4094, "num_input_tokens_seen": 277568064, "step": 4427 }, { "epoch": 14.732113144758735, "loss": 0.41041240096092224, "loss_ce": 0.0001340901362709701, "loss_iou": 0.17578125, "loss_num": 0.0115966796875, "loss_xval": 0.41015625, "num_input_tokens_seen": 277568064, "step": 4427 }, { "epoch": 14.735440931780365, "grad_norm": 22.519731521606445, "learning_rate": 5e-06, "loss": 0.475, "num_input_tokens_seen": 277628828, "step": 4428 }, { "epoch": 14.735440931780365, "loss": 0.5147721767425537, "loss_ce": 1.6893890233404818e-06, "loss_iou": 0.2138671875, "loss_num": 0.017578125, "loss_xval": 0.515625, "num_input_tokens_seen": 277628828, "step": 4428 }, { "epoch": 14.738768718801996, "grad_norm": 23.009490966796875, "learning_rate": 5e-06, "loss": 0.5463, "num_input_tokens_seen": 277692656, "step": 4429 }, { "epoch": 14.738768718801996, "loss": 0.5579595565795898, "loss_ce": 9.819919796427712e-05, "loss_iou": 0.21875, "loss_num": 0.0238037109375, "loss_xval": 0.55859375, "num_input_tokens_seen": 277692656, "step": 4429 }, { "epoch": 14.742096505823627, "grad_norm": 30.171539306640625, "learning_rate": 5e-06, "loss": 0.3379, "num_input_tokens_seen": 277754852, "step": 4430 }, { "epoch": 14.742096505823627, "loss": 0.4420204758644104, "loss_ce": 3.839280907413922e-06, "loss_iou": 0.193359375, "loss_num": 0.0108642578125, "loss_xval": 0.44140625, "num_input_tokens_seen": 277754852, "step": 4430 }, { "epoch": 14.745424292845257, "grad_norm": 44.78130340576172, "learning_rate": 5e-06, "loss": 0.5618, "num_input_tokens_seen": 277817468, "step": 4431 }, { "epoch": 14.745424292845257, "loss": 0.6276364326477051, "loss_ce": 1.1919742064492311e-05, "loss_iou": 0.27734375, "loss_num": 0.01513671875, "loss_xval": 0.62890625, "num_input_tokens_seen": 277817468, "step": 4431 }, { "epoch": 14.748752079866888, "grad_norm": 34.91379928588867, "learning_rate": 5e-06, "loss": 0.4505, "num_input_tokens_seen": 277879216, "step": 4432 }, { "epoch": 14.748752079866888, "loss": 0.5357301235198975, "loss_ce": 2.4553337425459176e-05, "loss_iou": 0.1884765625, "loss_num": 0.031494140625, "loss_xval": 0.53515625, "num_input_tokens_seen": 277879216, "step": 4432 }, { "epoch": 14.752079866888518, "grad_norm": 9.309161186218262, "learning_rate": 5e-06, "loss": 0.4075, "num_input_tokens_seen": 277941364, "step": 4433 }, { "epoch": 14.752079866888518, "loss": 0.23468106985092163, "loss_ce": 8.997348004413652e-07, "loss_iou": 0.07421875, "loss_num": 0.017333984375, "loss_xval": 0.234375, "num_input_tokens_seen": 277941364, "step": 4433 }, { "epoch": 14.755407653910149, "grad_norm": 8.480807304382324, "learning_rate": 5e-06, "loss": 0.3519, "num_input_tokens_seen": 278003920, "step": 4434 }, { "epoch": 14.755407653910149, "loss": 0.3415541350841522, "loss_ce": 1.4121749245532556e-06, "loss_iou": 0.1416015625, "loss_num": 0.011474609375, "loss_xval": 0.341796875, "num_input_tokens_seen": 278003920, "step": 4434 }, { "epoch": 14.75873544093178, "grad_norm": 15.962180137634277, "learning_rate": 5e-06, "loss": 0.3963, "num_input_tokens_seen": 278063008, "step": 4435 }, { "epoch": 14.75873544093178, "loss": 0.45007866621017456, "loss_ce": 5.388137196860043e-06, "loss_iou": 0.173828125, "loss_num": 0.0205078125, "loss_xval": 0.44921875, "num_input_tokens_seen": 278063008, "step": 4435 }, { "epoch": 14.76206322795341, "grad_norm": 10.526843070983887, "learning_rate": 5e-06, "loss": 0.52, "num_input_tokens_seen": 278126176, "step": 4436 }, { "epoch": 14.76206322795341, "loss": 0.5874671936035156, "loss_ce": 6.48185086902231e-05, "loss_iou": 0.2109375, "loss_num": 0.03271484375, "loss_xval": 0.5859375, "num_input_tokens_seen": 278126176, "step": 4436 }, { "epoch": 14.765391014975041, "grad_norm": 9.148394584655762, "learning_rate": 5e-06, "loss": 0.36, "num_input_tokens_seen": 278189620, "step": 4437 }, { "epoch": 14.765391014975041, "loss": 0.3359537124633789, "loss_ce": 1.621810042706784e-05, "loss_iou": 0.1318359375, "loss_num": 0.01446533203125, "loss_xval": 0.3359375, "num_input_tokens_seen": 278189620, "step": 4437 }, { "epoch": 14.768718801996672, "grad_norm": 18.56692123413086, "learning_rate": 5e-06, "loss": 0.3699, "num_input_tokens_seen": 278249448, "step": 4438 }, { "epoch": 14.768718801996672, "loss": 0.23059141635894775, "loss_ce": 5.951853268015839e-07, "loss_iou": 0.0634765625, "loss_num": 0.020751953125, "loss_xval": 0.23046875, "num_input_tokens_seen": 278249448, "step": 4438 }, { "epoch": 14.772046589018302, "grad_norm": 26.267658233642578, "learning_rate": 5e-06, "loss": 0.5424, "num_input_tokens_seen": 278311992, "step": 4439 }, { "epoch": 14.772046589018302, "loss": 0.47318291664123535, "loss_ce": 9.942356700776145e-05, "loss_iou": 0.1533203125, "loss_num": 0.032958984375, "loss_xval": 0.47265625, "num_input_tokens_seen": 278311992, "step": 4439 }, { "epoch": 14.775374376039933, "grad_norm": 14.36841106414795, "learning_rate": 5e-06, "loss": 0.4677, "num_input_tokens_seen": 278374804, "step": 4440 }, { "epoch": 14.775374376039933, "loss": 0.4836646318435669, "loss_ce": 2.205314376624301e-05, "loss_iou": 0.2001953125, "loss_num": 0.016845703125, "loss_xval": 0.484375, "num_input_tokens_seen": 278374804, "step": 4440 }, { "epoch": 14.778702163061563, "grad_norm": 16.45238494873047, "learning_rate": 5e-06, "loss": 0.6429, "num_input_tokens_seen": 278438688, "step": 4441 }, { "epoch": 14.778702163061563, "loss": 0.6975128650665283, "loss_ce": 3.105322775809327e-06, "loss_iou": 0.271484375, "loss_num": 0.0311279296875, "loss_xval": 0.69921875, "num_input_tokens_seen": 278438688, "step": 4441 }, { "epoch": 14.782029950083194, "grad_norm": 12.60893726348877, "learning_rate": 5e-06, "loss": 0.5818, "num_input_tokens_seen": 278502664, "step": 4442 }, { "epoch": 14.782029950083194, "loss": 0.49869731068611145, "loss_ce": 9.583160135662183e-06, "loss_iou": 0.20703125, "loss_num": 0.0167236328125, "loss_xval": 0.498046875, "num_input_tokens_seen": 278502664, "step": 4442 }, { "epoch": 14.785357737104825, "grad_norm": 8.64749526977539, "learning_rate": 5e-06, "loss": 0.3519, "num_input_tokens_seen": 278565140, "step": 4443 }, { "epoch": 14.785357737104825, "loss": 0.244967982172966, "loss_ce": 3.3734470434865216e-06, "loss_iou": 0.09326171875, "loss_num": 0.01171875, "loss_xval": 0.2451171875, "num_input_tokens_seen": 278565140, "step": 4443 }, { "epoch": 14.788685524126455, "grad_norm": 14.802993774414062, "learning_rate": 5e-06, "loss": 0.4007, "num_input_tokens_seen": 278628356, "step": 4444 }, { "epoch": 14.788685524126455, "loss": 0.27382606267929077, "loss_ce": 0.00020543081336654723, "loss_iou": 0.11376953125, "loss_num": 0.00927734375, "loss_xval": 0.2734375, "num_input_tokens_seen": 278628356, "step": 4444 }, { "epoch": 14.792013311148086, "grad_norm": 17.379623413085938, "learning_rate": 5e-06, "loss": 0.3752, "num_input_tokens_seen": 278691400, "step": 4445 }, { "epoch": 14.792013311148086, "loss": 0.5064117908477783, "loss_ce": 3.073038669754169e-06, "loss_iou": 0.1953125, "loss_num": 0.0234375, "loss_xval": 0.5078125, "num_input_tokens_seen": 278691400, "step": 4445 }, { "epoch": 14.795341098169716, "grad_norm": 26.701868057250977, "learning_rate": 5e-06, "loss": 0.5498, "num_input_tokens_seen": 278755420, "step": 4446 }, { "epoch": 14.795341098169716, "loss": 0.5086500644683838, "loss_ce": 4.4124783016741276e-05, "loss_iou": 0.20703125, "loss_num": 0.018798828125, "loss_xval": 0.5078125, "num_input_tokens_seen": 278755420, "step": 4446 }, { "epoch": 14.798668885191347, "grad_norm": 15.733376502990723, "learning_rate": 5e-06, "loss": 0.5602, "num_input_tokens_seen": 278819448, "step": 4447 }, { "epoch": 14.798668885191347, "loss": 0.5236829519271851, "loss_ce": 1.3360909179027658e-06, "loss_iou": 0.2197265625, "loss_num": 0.016845703125, "loss_xval": 0.5234375, "num_input_tokens_seen": 278819448, "step": 4447 }, { "epoch": 14.801996672212978, "grad_norm": 7.581531047821045, "learning_rate": 5e-06, "loss": 0.5232, "num_input_tokens_seen": 278883084, "step": 4448 }, { "epoch": 14.801996672212978, "loss": 0.5674829483032227, "loss_ce": 3.905688936356455e-05, "loss_iou": 0.2314453125, "loss_num": 0.0208740234375, "loss_xval": 0.56640625, "num_input_tokens_seen": 278883084, "step": 4448 }, { "epoch": 14.805324459234608, "grad_norm": 11.931282043457031, "learning_rate": 5e-06, "loss": 0.4639, "num_input_tokens_seen": 278945852, "step": 4449 }, { "epoch": 14.805324459234608, "loss": 0.49023866653442383, "loss_ce": 4.282980626157951e-06, "loss_iou": 0.1875, "loss_num": 0.0230712890625, "loss_xval": 0.490234375, "num_input_tokens_seen": 278945852, "step": 4449 }, { "epoch": 14.808652246256239, "grad_norm": 7.969456672668457, "learning_rate": 5e-06, "loss": 0.383, "num_input_tokens_seen": 279009164, "step": 4450 }, { "epoch": 14.808652246256239, "loss": 0.30286091566085815, "loss_ce": 4.479904418985825e-06, "loss_iou": 0.10498046875, "loss_num": 0.0184326171875, "loss_xval": 0.302734375, "num_input_tokens_seen": 279009164, "step": 4450 }, { "epoch": 14.81198003327787, "grad_norm": 24.813203811645508, "learning_rate": 5e-06, "loss": 0.5349, "num_input_tokens_seen": 279071760, "step": 4451 }, { "epoch": 14.81198003327787, "loss": 0.5273119211196899, "loss_ce": 0.0014635181287303567, "loss_iou": 0.1748046875, "loss_num": 0.035400390625, "loss_xval": 0.52734375, "num_input_tokens_seen": 279071760, "step": 4451 }, { "epoch": 14.8153078202995, "grad_norm": 37.699092864990234, "learning_rate": 5e-06, "loss": 0.4135, "num_input_tokens_seen": 279135796, "step": 4452 }, { "epoch": 14.8153078202995, "loss": 0.24855653941631317, "loss_ce": 2.1371581169660203e-05, "loss_iou": 0.095703125, "loss_num": 0.0113525390625, "loss_xval": 0.248046875, "num_input_tokens_seen": 279135796, "step": 4452 }, { "epoch": 14.81863560732113, "grad_norm": 27.721654891967773, "learning_rate": 5e-06, "loss": 0.2946, "num_input_tokens_seen": 279199160, "step": 4453 }, { "epoch": 14.81863560732113, "loss": 0.23317363858222961, "loss_ce": 8.03867878858e-05, "loss_iou": 0.0966796875, "loss_num": 0.0079345703125, "loss_xval": 0.2333984375, "num_input_tokens_seen": 279199160, "step": 4453 }, { "epoch": 14.821963394342761, "grad_norm": 26.123289108276367, "learning_rate": 5e-06, "loss": 0.498, "num_input_tokens_seen": 279260500, "step": 4454 }, { "epoch": 14.821963394342761, "loss": 0.6156148910522461, "loss_ce": 1.4323609320854302e-05, "loss_iou": 0.2392578125, "loss_num": 0.0277099609375, "loss_xval": 0.6171875, "num_input_tokens_seen": 279260500, "step": 4454 }, { "epoch": 14.825291181364392, "grad_norm": 21.67037010192871, "learning_rate": 5e-06, "loss": 0.7057, "num_input_tokens_seen": 279324216, "step": 4455 }, { "epoch": 14.825291181364392, "loss": 0.8286161422729492, "loss_ce": 2.8265999389986973e-06, "loss_iou": 0.345703125, "loss_num": 0.02783203125, "loss_xval": 0.828125, "num_input_tokens_seen": 279324216, "step": 4455 }, { "epoch": 14.828618968386023, "grad_norm": 28.85512924194336, "learning_rate": 5e-06, "loss": 0.5738, "num_input_tokens_seen": 279388200, "step": 4456 }, { "epoch": 14.828618968386023, "loss": 0.7161811590194702, "loss_ce": 2.511378625058569e-05, "loss_iou": 0.322265625, "loss_num": 0.01397705078125, "loss_xval": 0.71484375, "num_input_tokens_seen": 279388200, "step": 4456 }, { "epoch": 14.831946755407653, "grad_norm": 10.29651165008545, "learning_rate": 5e-06, "loss": 0.2336, "num_input_tokens_seen": 279450784, "step": 4457 }, { "epoch": 14.831946755407653, "loss": 0.1559162586927414, "loss_ce": 1.9549534044926986e-06, "loss_iou": 0.051025390625, "loss_num": 0.01080322265625, "loss_xval": 0.15625, "num_input_tokens_seen": 279450784, "step": 4457 }, { "epoch": 14.835274542429284, "grad_norm": 13.085043907165527, "learning_rate": 5e-06, "loss": 0.2891, "num_input_tokens_seen": 279513096, "step": 4458 }, { "epoch": 14.835274542429284, "loss": 0.2287449836730957, "loss_ce": 4.7377227474498795e-07, "loss_iou": 0.091796875, "loss_num": 0.00897216796875, "loss_xval": 0.228515625, "num_input_tokens_seen": 279513096, "step": 4458 }, { "epoch": 14.838602329450914, "grad_norm": 16.66327667236328, "learning_rate": 5e-06, "loss": 0.521, "num_input_tokens_seen": 279574896, "step": 4459 }, { "epoch": 14.838602329450914, "loss": 0.22445186972618103, "loss_ce": 2.5592735255486332e-05, "loss_iou": 0.046142578125, "loss_num": 0.0264892578125, "loss_xval": 0.224609375, "num_input_tokens_seen": 279574896, "step": 4459 }, { "epoch": 14.841930116472545, "grad_norm": 5.633624076843262, "learning_rate": 5e-06, "loss": 0.5256, "num_input_tokens_seen": 279638248, "step": 4460 }, { "epoch": 14.841930116472545, "loss": 0.4275827407836914, "loss_ce": 9.43988311519206e-07, "loss_iou": 0.1669921875, "loss_num": 0.018798828125, "loss_xval": 0.427734375, "num_input_tokens_seen": 279638248, "step": 4460 }, { "epoch": 14.845257903494176, "grad_norm": 8.100128173828125, "learning_rate": 5e-06, "loss": 0.3138, "num_input_tokens_seen": 279701288, "step": 4461 }, { "epoch": 14.845257903494176, "loss": 0.323005735874176, "loss_ce": 7.708879820711445e-06, "loss_iou": 0.11083984375, "loss_num": 0.020263671875, "loss_xval": 0.322265625, "num_input_tokens_seen": 279701288, "step": 4461 }, { "epoch": 14.848585690515806, "grad_norm": 13.352751731872559, "learning_rate": 5e-06, "loss": 0.4209, "num_input_tokens_seen": 279763860, "step": 4462 }, { "epoch": 14.848585690515806, "loss": 0.36523938179016113, "loss_ce": 5.0072867452399805e-06, "loss_iou": 0.1240234375, "loss_num": 0.0233154296875, "loss_xval": 0.365234375, "num_input_tokens_seen": 279763860, "step": 4462 }, { "epoch": 14.851913477537437, "grad_norm": 29.394184112548828, "learning_rate": 5e-06, "loss": 0.3657, "num_input_tokens_seen": 279826496, "step": 4463 }, { "epoch": 14.851913477537437, "loss": 0.3645968735218048, "loss_ce": 3.366386408742983e-06, "loss_iou": 0.11865234375, "loss_num": 0.025390625, "loss_xval": 0.365234375, "num_input_tokens_seen": 279826496, "step": 4463 }, { "epoch": 14.855241264559067, "grad_norm": 22.59187889099121, "learning_rate": 5e-06, "loss": 0.4032, "num_input_tokens_seen": 279889000, "step": 4464 }, { "epoch": 14.855241264559067, "loss": 0.47487422823905945, "loss_ce": 2.0717605366371572e-05, "loss_iou": 0.185546875, "loss_num": 0.020751953125, "loss_xval": 0.474609375, "num_input_tokens_seen": 279889000, "step": 4464 }, { "epoch": 14.858569051580698, "grad_norm": 25.635038375854492, "learning_rate": 5e-06, "loss": 0.4788, "num_input_tokens_seen": 279951128, "step": 4465 }, { "epoch": 14.858569051580698, "loss": 0.5038115978240967, "loss_ce": 2.744383891695179e-05, "loss_iou": 0.18359375, "loss_num": 0.027099609375, "loss_xval": 0.50390625, "num_input_tokens_seen": 279951128, "step": 4465 }, { "epoch": 14.861896838602329, "grad_norm": 20.46647834777832, "learning_rate": 5e-06, "loss": 0.5165, "num_input_tokens_seen": 280013992, "step": 4466 }, { "epoch": 14.861896838602329, "loss": 0.6349215507507324, "loss_ce": 3.385189120308496e-05, "loss_iou": 0.24609375, "loss_num": 0.0286865234375, "loss_xval": 0.63671875, "num_input_tokens_seen": 280013992, "step": 4466 }, { "epoch": 14.86522462562396, "grad_norm": 11.763768196105957, "learning_rate": 5e-06, "loss": 0.4119, "num_input_tokens_seen": 280078504, "step": 4467 }, { "epoch": 14.86522462562396, "loss": 0.4125998616218567, "loss_ce": 2.187295649491716e-06, "loss_iou": 0.1748046875, "loss_num": 0.0126953125, "loss_xval": 0.412109375, "num_input_tokens_seen": 280078504, "step": 4467 }, { "epoch": 14.86855241264559, "grad_norm": 30.445154190063477, "learning_rate": 5e-06, "loss": 0.5485, "num_input_tokens_seen": 280141336, "step": 4468 }, { "epoch": 14.86855241264559, "loss": 0.7160234451293945, "loss_ce": 4.822464234166546e-06, "loss_iou": 0.287109375, "loss_num": 0.0281982421875, "loss_xval": 0.71484375, "num_input_tokens_seen": 280141336, "step": 4468 }, { "epoch": 14.87188019966722, "grad_norm": 13.947284698486328, "learning_rate": 5e-06, "loss": 0.4744, "num_input_tokens_seen": 280203412, "step": 4469 }, { "epoch": 14.87188019966722, "loss": 0.4786407947540283, "loss_ce": 3.1246786420524586e-06, "loss_iou": 0.1806640625, "loss_num": 0.0234375, "loss_xval": 0.478515625, "num_input_tokens_seen": 280203412, "step": 4469 }, { "epoch": 14.875207986688851, "grad_norm": 9.001945495605469, "learning_rate": 5e-06, "loss": 0.5097, "num_input_tokens_seen": 280266296, "step": 4470 }, { "epoch": 14.875207986688851, "loss": 0.5129414796829224, "loss_ce": 2.0731129097839585e-06, "loss_iou": 0.19140625, "loss_num": 0.026123046875, "loss_xval": 0.51171875, "num_input_tokens_seen": 280266296, "step": 4470 }, { "epoch": 14.878535773710482, "grad_norm": 7.69210147857666, "learning_rate": 5e-06, "loss": 0.3863, "num_input_tokens_seen": 280329096, "step": 4471 }, { "epoch": 14.878535773710482, "loss": 0.43335092067718506, "loss_ce": 1.3127880720276153e-06, "loss_iou": 0.1748046875, "loss_num": 0.0169677734375, "loss_xval": 0.43359375, "num_input_tokens_seen": 280329096, "step": 4471 }, { "epoch": 14.881863560732112, "grad_norm": 9.412554740905762, "learning_rate": 5e-06, "loss": 0.3559, "num_input_tokens_seen": 280392024, "step": 4472 }, { "epoch": 14.881863560732112, "loss": 0.3131720721721649, "loss_ce": 6.942979098312207e-07, "loss_iou": 0.111328125, "loss_num": 0.0181884765625, "loss_xval": 0.3125, "num_input_tokens_seen": 280392024, "step": 4472 }, { "epoch": 14.885191347753743, "grad_norm": 30.54673957824707, "learning_rate": 5e-06, "loss": 0.4949, "num_input_tokens_seen": 280455452, "step": 4473 }, { "epoch": 14.885191347753743, "loss": 0.5597951412200928, "loss_ce": 4.170662214164622e-05, "loss_iou": 0.2431640625, "loss_num": 0.0146484375, "loss_xval": 0.55859375, "num_input_tokens_seen": 280455452, "step": 4473 }, { "epoch": 14.888519134775374, "grad_norm": 37.20708084106445, "learning_rate": 5e-06, "loss": 0.5499, "num_input_tokens_seen": 280518584, "step": 4474 }, { "epoch": 14.888519134775374, "loss": 0.5750881433486938, "loss_ce": 1.4925844880053774e-05, "loss_iou": 0.208984375, "loss_num": 0.0311279296875, "loss_xval": 0.57421875, "num_input_tokens_seen": 280518584, "step": 4474 }, { "epoch": 14.891846921797004, "grad_norm": 19.713266372680664, "learning_rate": 5e-06, "loss": 0.4796, "num_input_tokens_seen": 280582328, "step": 4475 }, { "epoch": 14.891846921797004, "loss": 0.6166227459907532, "loss_ce": 0.0004423097416292876, "loss_iou": 0.255859375, "loss_num": 0.0208740234375, "loss_xval": 0.6171875, "num_input_tokens_seen": 280582328, "step": 4475 }, { "epoch": 14.895174708818635, "grad_norm": 25.01416015625, "learning_rate": 5e-06, "loss": 0.3421, "num_input_tokens_seen": 280644636, "step": 4476 }, { "epoch": 14.895174708818635, "loss": 0.35487452149391174, "loss_ce": 8.770434192229004e-07, "loss_iou": 0.1552734375, "loss_num": 0.0089111328125, "loss_xval": 0.35546875, "num_input_tokens_seen": 280644636, "step": 4476 }, { "epoch": 14.898502495840265, "grad_norm": 26.86385154724121, "learning_rate": 5e-06, "loss": 0.5049, "num_input_tokens_seen": 280707296, "step": 4477 }, { "epoch": 14.898502495840265, "loss": 0.7200193405151367, "loss_ce": 4.0977880416903645e-05, "loss_iou": 0.2890625, "loss_num": 0.0279541015625, "loss_xval": 0.71875, "num_input_tokens_seen": 280707296, "step": 4477 }, { "epoch": 14.901830282861896, "grad_norm": 16.752559661865234, "learning_rate": 5e-06, "loss": 0.5299, "num_input_tokens_seen": 280770956, "step": 4478 }, { "epoch": 14.901830282861896, "loss": 0.6673858165740967, "loss_ce": 0.00014949802425689995, "loss_iou": 0.271484375, "loss_num": 0.0250244140625, "loss_xval": 0.66796875, "num_input_tokens_seen": 280770956, "step": 4478 }, { "epoch": 14.905158069883527, "grad_norm": 6.045605182647705, "learning_rate": 5e-06, "loss": 0.6017, "num_input_tokens_seen": 280834536, "step": 4479 }, { "epoch": 14.905158069883527, "loss": 0.8004218339920044, "loss_ce": 0.00015938753494992852, "loss_iou": 0.31640625, "loss_num": 0.033935546875, "loss_xval": 0.80078125, "num_input_tokens_seen": 280834536, "step": 4479 }, { "epoch": 14.908485856905157, "grad_norm": 16.799558639526367, "learning_rate": 5e-06, "loss": 0.5373, "num_input_tokens_seen": 280897388, "step": 4480 }, { "epoch": 14.908485856905157, "loss": 0.6131317019462585, "loss_ce": 0.00036925289896316826, "loss_iou": 0.21875, "loss_num": 0.03515625, "loss_xval": 0.61328125, "num_input_tokens_seen": 280897388, "step": 4480 }, { "epoch": 14.911813643926788, "grad_norm": 10.636488914489746, "learning_rate": 5e-06, "loss": 0.45, "num_input_tokens_seen": 280960228, "step": 4481 }, { "epoch": 14.911813643926788, "loss": 0.21637842059135437, "loss_ce": 1.160664055532834e-06, "loss_iou": 0.07421875, "loss_num": 0.01348876953125, "loss_xval": 0.216796875, "num_input_tokens_seen": 280960228, "step": 4481 }, { "epoch": 14.915141430948418, "grad_norm": 9.380715370178223, "learning_rate": 5e-06, "loss": 0.4889, "num_input_tokens_seen": 281022248, "step": 4482 }, { "epoch": 14.915141430948418, "loss": 0.3765498399734497, "loss_ce": 2.3956525183166377e-05, "loss_iou": 0.130859375, "loss_num": 0.0230712890625, "loss_xval": 0.376953125, "num_input_tokens_seen": 281022248, "step": 4482 }, { "epoch": 14.918469217970049, "grad_norm": 6.782865047454834, "learning_rate": 5e-06, "loss": 0.4616, "num_input_tokens_seen": 281084164, "step": 4483 }, { "epoch": 14.918469217970049, "loss": 0.2147846519947052, "loss_ce": 1.937228716997197e-06, "loss_iou": 0.059326171875, "loss_num": 0.0191650390625, "loss_xval": 0.21484375, "num_input_tokens_seen": 281084164, "step": 4483 }, { "epoch": 14.92179700499168, "grad_norm": 9.148962020874023, "learning_rate": 5e-06, "loss": 0.4143, "num_input_tokens_seen": 281146120, "step": 4484 }, { "epoch": 14.92179700499168, "loss": 0.4603465795516968, "loss_ce": 0.0002025184512604028, "loss_iou": 0.1630859375, "loss_num": 0.0267333984375, "loss_xval": 0.4609375, "num_input_tokens_seen": 281146120, "step": 4484 }, { "epoch": 14.92512479201331, "grad_norm": 11.567804336547852, "learning_rate": 5e-06, "loss": 0.2942, "num_input_tokens_seen": 281208952, "step": 4485 }, { "epoch": 14.92512479201331, "loss": 0.3358781337738037, "loss_ce": 1.6806482108222554e-06, "loss_iou": 0.1357421875, "loss_num": 0.0128173828125, "loss_xval": 0.3359375, "num_input_tokens_seen": 281208952, "step": 4485 }, { "epoch": 14.928452579034941, "grad_norm": 20.582063674926758, "learning_rate": 5e-06, "loss": 0.4819, "num_input_tokens_seen": 281272280, "step": 4486 }, { "epoch": 14.928452579034941, "loss": 0.6651656031608582, "loss_ce": 0.0007979340152814984, "loss_iou": 0.255859375, "loss_num": 0.0301513671875, "loss_xval": 0.6640625, "num_input_tokens_seen": 281272280, "step": 4486 }, { "epoch": 14.931780366056572, "grad_norm": 19.08031463623047, "learning_rate": 5e-06, "loss": 0.4472, "num_input_tokens_seen": 281336548, "step": 4487 }, { "epoch": 14.931780366056572, "loss": 0.42031657695770264, "loss_ce": 2.8497062885435298e-05, "loss_iou": 0.173828125, "loss_num": 0.01446533203125, "loss_xval": 0.419921875, "num_input_tokens_seen": 281336548, "step": 4487 }, { "epoch": 14.935108153078202, "grad_norm": 14.542515754699707, "learning_rate": 5e-06, "loss": 0.3669, "num_input_tokens_seen": 281399908, "step": 4488 }, { "epoch": 14.935108153078202, "loss": 0.3366711735725403, "loss_ce": 1.24955158753437e-06, "loss_iou": 0.1337890625, "loss_num": 0.01397705078125, "loss_xval": 0.3359375, "num_input_tokens_seen": 281399908, "step": 4488 }, { "epoch": 14.938435940099833, "grad_norm": 11.501893997192383, "learning_rate": 5e-06, "loss": 0.3964, "num_input_tokens_seen": 281460976, "step": 4489 }, { "epoch": 14.938435940099833, "loss": 0.2835092544555664, "loss_ce": 9.399119562658598e-07, "loss_iou": 0.09765625, "loss_num": 0.017822265625, "loss_xval": 0.283203125, "num_input_tokens_seen": 281460976, "step": 4489 }, { "epoch": 14.941763727121465, "grad_norm": 12.96999454498291, "learning_rate": 5e-06, "loss": 0.5421, "num_input_tokens_seen": 281524928, "step": 4490 }, { "epoch": 14.941763727121465, "loss": 0.5098999738693237, "loss_ce": 0.0003784933651331812, "loss_iou": 0.1923828125, "loss_num": 0.025146484375, "loss_xval": 0.5078125, "num_input_tokens_seen": 281524928, "step": 4490 }, { "epoch": 14.945091514143094, "grad_norm": 8.574435234069824, "learning_rate": 5e-06, "loss": 0.3955, "num_input_tokens_seen": 281587064, "step": 4491 }, { "epoch": 14.945091514143094, "loss": 0.3652832806110382, "loss_ce": 4.890588388661854e-05, "loss_iou": 0.1337890625, "loss_num": 0.0196533203125, "loss_xval": 0.365234375, "num_input_tokens_seen": 281587064, "step": 4491 }, { "epoch": 14.948419301164726, "grad_norm": 9.557830810546875, "learning_rate": 5e-06, "loss": 0.3998, "num_input_tokens_seen": 281650304, "step": 4492 }, { "epoch": 14.948419301164726, "loss": 0.5292978286743164, "loss_ce": 9.43827842547762e-07, "loss_iou": 0.20703125, "loss_num": 0.0230712890625, "loss_xval": 0.53125, "num_input_tokens_seen": 281650304, "step": 4492 }, { "epoch": 14.951747088186355, "grad_norm": 11.43794059753418, "learning_rate": 5e-06, "loss": 0.3131, "num_input_tokens_seen": 281712328, "step": 4493 }, { "epoch": 14.951747088186355, "loss": 0.29992860555648804, "loss_ce": 1.8577394484964316e-06, "loss_iou": 0.1181640625, "loss_num": 0.0126953125, "loss_xval": 0.30078125, "num_input_tokens_seen": 281712328, "step": 4493 }, { "epoch": 14.955074875207988, "grad_norm": 14.445018768310547, "learning_rate": 5e-06, "loss": 0.5451, "num_input_tokens_seen": 281776656, "step": 4494 }, { "epoch": 14.955074875207988, "loss": 0.6211732625961304, "loss_ce": 0.0016664352733641863, "loss_iou": 0.232421875, "loss_num": 0.031005859375, "loss_xval": 0.62109375, "num_input_tokens_seen": 281776656, "step": 4494 }, { "epoch": 14.958402662229616, "grad_norm": 19.493574142456055, "learning_rate": 5e-06, "loss": 0.3767, "num_input_tokens_seen": 281839052, "step": 4495 }, { "epoch": 14.958402662229616, "loss": 0.431080162525177, "loss_ce": 4.988308864994906e-05, "loss_iou": 0.1513671875, "loss_num": 0.0257568359375, "loss_xval": 0.431640625, "num_input_tokens_seen": 281839052, "step": 4495 }, { "epoch": 14.961730449251249, "grad_norm": 12.31751537322998, "learning_rate": 5e-06, "loss": 0.297, "num_input_tokens_seen": 281901836, "step": 4496 }, { "epoch": 14.961730449251249, "loss": 0.23285368084907532, "loss_ce": 4.5519141167460475e-06, "loss_iou": 0.07861328125, "loss_num": 0.01513671875, "loss_xval": 0.232421875, "num_input_tokens_seen": 281901836, "step": 4496 }, { "epoch": 14.965058236272878, "grad_norm": 21.429702758789062, "learning_rate": 5e-06, "loss": 0.5748, "num_input_tokens_seen": 281964904, "step": 4497 }, { "epoch": 14.965058236272878, "loss": 0.6006070375442505, "loss_ce": 0.00014316457964014262, "loss_iou": 0.22265625, "loss_num": 0.03076171875, "loss_xval": 0.6015625, "num_input_tokens_seen": 281964904, "step": 4497 }, { "epoch": 14.96838602329451, "grad_norm": 51.37761688232422, "learning_rate": 5e-06, "loss": 0.5471, "num_input_tokens_seen": 282028356, "step": 4498 }, { "epoch": 14.96838602329451, "loss": 0.6128571033477783, "loss_ce": 3.1024255804368295e-06, "loss_iou": 0.228515625, "loss_num": 0.031005859375, "loss_xval": 0.61328125, "num_input_tokens_seen": 282028356, "step": 4498 }, { "epoch": 14.971713810316139, "grad_norm": 34.49252700805664, "learning_rate": 5e-06, "loss": 0.3966, "num_input_tokens_seen": 282090628, "step": 4499 }, { "epoch": 14.971713810316139, "loss": 0.28802645206451416, "loss_ce": 1.5645086932636332e-06, "loss_iou": 0.08984375, "loss_num": 0.021728515625, "loss_xval": 0.287109375, "num_input_tokens_seen": 282090628, "step": 4499 }, { "epoch": 14.975041597337771, "grad_norm": 14.890804290771484, "learning_rate": 5e-06, "loss": 0.3662, "num_input_tokens_seen": 282154196, "step": 4500 }, { "epoch": 14.975041597337771, "eval_seeclick_CIoU": 0.03235625382512808, "eval_seeclick_GIoU": 0.02637081453576684, "eval_seeclick_IoU": 0.15843632072210312, "eval_seeclick_MAE_all": 0.1699148416519165, "eval_seeclick_MAE_h": 0.07641784101724625, "eval_seeclick_MAE_w": 0.13416019454598427, "eval_seeclick_MAE_x_boxes": 0.20355188101530075, "eval_seeclick_MAE_y_boxes": 0.18394551426172256, "eval_seeclick_NUM_probability": 0.9999759495258331, "eval_seeclick_inside_bbox": 0.17812500149011612, "eval_seeclick_loss": 3.0007259845733643, "eval_seeclick_loss_ce": 0.17055770754814148, "eval_seeclick_loss_iou": 0.988525390625, "eval_seeclick_loss_num": 0.17266082763671875, "eval_seeclick_loss_xval": 2.8408203125, "eval_seeclick_runtime": 70.2645, "eval_seeclick_samples_per_second": 0.669, "eval_seeclick_steps_per_second": 0.028, "num_input_tokens_seen": 282154196, "step": 4500 }, { "epoch": 14.975041597337771, "eval_icons_CIoU": -0.06467249430716038, "eval_icons_GIoU": 0.025919748237356544, "eval_icons_IoU": 0.11227575689554214, "eval_icons_MAE_all": 0.2064221203327179, "eval_icons_MAE_h": 0.18448376655578613, "eval_icons_MAE_w": 0.2180299609899521, "eval_icons_MAE_x_boxes": 0.14449850469827652, "eval_icons_MAE_y_boxes": 0.09101571515202522, "eval_icons_NUM_probability": 0.9999731183052063, "eval_icons_inside_bbox": 0.2326388955116272, "eval_icons_loss": 2.9256293773651123, "eval_icons_loss_ce": 3.98132169721066e-06, "eval_icons_loss_iou": 0.969482421875, "eval_icons_loss_num": 0.2015380859375, "eval_icons_loss_xval": 2.947265625, "eval_icons_runtime": 67.0074, "eval_icons_samples_per_second": 0.746, "eval_icons_steps_per_second": 0.03, "num_input_tokens_seen": 282154196, "step": 4500 }, { "epoch": 14.975041597337771, "eval_screenspot_CIoU": 0.1849653646349907, "eval_screenspot_GIoU": 0.21696599821249643, "eval_screenspot_IoU": 0.2947640319665273, "eval_screenspot_MAE_all": 0.1130404199163119, "eval_screenspot_MAE_h": 0.06495961919426918, "eval_screenspot_MAE_w": 0.09426060194770496, "eval_screenspot_MAE_x_boxes": 0.15774365266164145, "eval_screenspot_MAE_y_boxes": 0.08488077918688457, "eval_screenspot_NUM_probability": 0.999993364016215, "eval_screenspot_inside_bbox": 0.5362499952316284, "eval_screenspot_loss": 2.176417350769043, "eval_screenspot_loss_ce": 0.0001420898904598289, "eval_screenspot_loss_iou": 0.7991536458333334, "eval_screenspot_loss_num": 0.12213897705078125, "eval_screenspot_loss_xval": 2.2086588541666665, "eval_screenspot_runtime": 142.8388, "eval_screenspot_samples_per_second": 0.623, "eval_screenspot_steps_per_second": 0.021, "num_input_tokens_seen": 282154196, "step": 4500 }, { "epoch": 14.975041597337771, "eval_compot_CIoU": 0.14449793100357056, "eval_compot_GIoU": 0.1965753138065338, "eval_compot_IoU": 0.27343665063381195, "eval_compot_MAE_all": 0.13349328935146332, "eval_compot_MAE_h": 0.06379309482872486, "eval_compot_MAE_w": 0.1592893972992897, "eval_compot_MAE_x_boxes": 0.11272940412163734, "eval_compot_MAE_y_boxes": 0.09537710249423981, "eval_compot_NUM_probability": 0.9999967217445374, "eval_compot_inside_bbox": 0.4288194477558136, "eval_compot_loss": 2.285511016845703, "eval_compot_loss_ce": 0.006790464511141181, "eval_compot_loss_iou": 0.8294677734375, "eval_compot_loss_num": 0.1446514129638672, "eval_compot_loss_xval": 2.3818359375, "eval_compot_runtime": 69.4727, "eval_compot_samples_per_second": 0.72, "eval_compot_steps_per_second": 0.029, "num_input_tokens_seen": 282154196, "step": 4500 }, { "epoch": 14.975041597337771, "eval_custom_ui_MAE_all": 0.06071800924837589, "eval_custom_ui_MAE_x": 0.07170315459370613, "eval_custom_ui_MAE_y": 0.049732865765690804, "eval_custom_ui_NUM_probability": 0.999998927116394, "eval_custom_ui_loss": 0.2859807312488556, "eval_custom_ui_loss_ce": 4.0202264699473744e-06, "eval_custom_ui_loss_num": 0.05950164794921875, "eval_custom_ui_loss_xval": 0.297271728515625, "eval_custom_ui_runtime": 51.821, "eval_custom_ui_samples_per_second": 0.965, "eval_custom_ui_steps_per_second": 0.039, "num_input_tokens_seen": 282154196, "step": 4500 }, { "epoch": 14.975041597337771, "loss": 0.3192797899246216, "loss_ce": 4.8813553803483956e-06, "loss_iou": 0.0, "loss_num": 0.06396484375, "loss_xval": 0.318359375, "num_input_tokens_seen": 282154196, "step": 4500 }, { "epoch": 14.9783693843594, "grad_norm": 17.14983367919922, "learning_rate": 5e-06, "loss": 0.6438, "num_input_tokens_seen": 282215872, "step": 4501 }, { "epoch": 14.9783693843594, "loss": 0.8416829705238342, "loss_ce": 8.144384082697798e-06, "loss_iou": 0.330078125, "loss_num": 0.036376953125, "loss_xval": 0.83984375, "num_input_tokens_seen": 282215872, "step": 4501 }, { "epoch": 14.981697171381033, "grad_norm": 22.990419387817383, "learning_rate": 5e-06, "loss": 0.4413, "num_input_tokens_seen": 282278264, "step": 4502 }, { "epoch": 14.981697171381033, "loss": 0.5105607509613037, "loss_ce": 1.6913658100747853e-06, "loss_iou": 0.224609375, "loss_num": 0.01220703125, "loss_xval": 0.51171875, "num_input_tokens_seen": 282278264, "step": 4502 }, { "epoch": 14.985024958402661, "grad_norm": 24.565719604492188, "learning_rate": 5e-06, "loss": 0.5421, "num_input_tokens_seen": 282342004, "step": 4503 }, { "epoch": 14.985024958402661, "loss": 0.6494170427322388, "loss_ce": 2.9905220344517147e-06, "loss_iou": 0.240234375, "loss_num": 0.033935546875, "loss_xval": 0.6484375, "num_input_tokens_seen": 282342004, "step": 4503 }, { "epoch": 14.988352745424294, "grad_norm": 19.464162826538086, "learning_rate": 5e-06, "loss": 0.4888, "num_input_tokens_seen": 282405328, "step": 4504 }, { "epoch": 14.988352745424294, "loss": 0.43432751297950745, "loss_ce": 1.3494840231942362e-06, "loss_iou": 0.1572265625, "loss_num": 0.0240478515625, "loss_xval": 0.43359375, "num_input_tokens_seen": 282405328, "step": 4504 }, { "epoch": 14.991680532445923, "grad_norm": 24.3284854888916, "learning_rate": 5e-06, "loss": 0.577, "num_input_tokens_seen": 282468128, "step": 4505 }, { "epoch": 14.991680532445923, "loss": 0.6341432929039001, "loss_ce": 0.00011007361899828538, "loss_iou": 0.248046875, "loss_num": 0.027587890625, "loss_xval": 0.6328125, "num_input_tokens_seen": 282468128, "step": 4505 }, { "epoch": 14.995008319467555, "grad_norm": 31.346296310424805, "learning_rate": 5e-06, "loss": 0.3843, "num_input_tokens_seen": 282531312, "step": 4506 }, { "epoch": 14.995008319467555, "loss": 0.39844048023223877, "loss_ce": 2.9513146273529856e-06, "loss_iou": 0.1650390625, "loss_num": 0.013671875, "loss_xval": 0.3984375, "num_input_tokens_seen": 282531312, "step": 4506 }, { "epoch": 14.998336106489184, "grad_norm": 20.354524612426758, "learning_rate": 5e-06, "loss": 0.4163, "num_input_tokens_seen": 282593760, "step": 4507 }, { "epoch": 14.998336106489184, "loss": 0.43822795152664185, "loss_ce": 0.00023967158631421626, "loss_iou": 0.1865234375, "loss_num": 0.012939453125, "loss_xval": 0.4375, "num_input_tokens_seen": 282593760, "step": 4507 }, { "epoch": 14.998336106489184, "loss": 0.31915396451950073, "loss_ce": 1.1151848866575165e-06, "loss_iou": 0.1181640625, "loss_num": 0.016357421875, "loss_xval": 0.318359375, "num_input_tokens_seen": 282625820, "step": 4507 }, { "epoch": 15.001663893510816, "grad_norm": 15.755663871765137, "learning_rate": 5e-06, "loss": 0.5682, "num_input_tokens_seen": 282657424, "step": 4508 }, { "epoch": 15.001663893510816, "loss": 0.817203938961029, "loss_ce": 0.0004314788384363055, "loss_iou": 0.310546875, "loss_num": 0.038818359375, "loss_xval": 0.81640625, "num_input_tokens_seen": 282657424, "step": 4508 }, { "epoch": 15.004991680532447, "grad_norm": 9.151430130004883, "learning_rate": 5e-06, "loss": 0.3262, "num_input_tokens_seen": 282717016, "step": 4509 }, { "epoch": 15.004991680532447, "loss": 0.5042070150375366, "loss_ce": 0.00030078738927841187, "loss_iou": 0.16015625, "loss_num": 0.036865234375, "loss_xval": 0.50390625, "num_input_tokens_seen": 282717016, "step": 4509 }, { "epoch": 15.008319467554077, "grad_norm": 20.894622802734375, "learning_rate": 5e-06, "loss": 0.3707, "num_input_tokens_seen": 282780400, "step": 4510 }, { "epoch": 15.008319467554077, "loss": 0.37513697147369385, "loss_ce": 1.488865655119298e-05, "loss_iou": 0.1591796875, "loss_num": 0.01123046875, "loss_xval": 0.375, "num_input_tokens_seen": 282780400, "step": 4510 }, { "epoch": 15.011647254575708, "grad_norm": 23.898847579956055, "learning_rate": 5e-06, "loss": 0.4852, "num_input_tokens_seen": 282842624, "step": 4511 }, { "epoch": 15.011647254575708, "loss": 0.15289412438869476, "loss_ce": 1.0641608696460025e-06, "loss_iou": 0.0194091796875, "loss_num": 0.0228271484375, "loss_xval": 0.1533203125, "num_input_tokens_seen": 282842624, "step": 4511 }, { "epoch": 15.014975041597339, "grad_norm": 8.803077697753906, "learning_rate": 5e-06, "loss": 0.2779, "num_input_tokens_seen": 282905812, "step": 4512 }, { "epoch": 15.014975041597339, "loss": 0.34826934337615967, "loss_ce": 2.731310814851895e-06, "loss_iou": 0.1474609375, "loss_num": 0.01080322265625, "loss_xval": 0.34765625, "num_input_tokens_seen": 282905812, "step": 4512 }, { "epoch": 15.01830282861897, "grad_norm": 5.480289459228516, "learning_rate": 5e-06, "loss": 0.2758, "num_input_tokens_seen": 282967908, "step": 4513 }, { "epoch": 15.01830282861897, "loss": 0.3067801296710968, "loss_ce": 1.7434782421332784e-05, "loss_iou": 0.09814453125, "loss_num": 0.0220947265625, "loss_xval": 0.306640625, "num_input_tokens_seen": 282967908, "step": 4513 }, { "epoch": 15.0216306156406, "grad_norm": 7.246910572052002, "learning_rate": 5e-06, "loss": 0.2879, "num_input_tokens_seen": 283029488, "step": 4514 }, { "epoch": 15.0216306156406, "loss": 0.10633578896522522, "loss_ce": 5.831682574353181e-05, "loss_iou": 0.0, "loss_num": 0.021240234375, "loss_xval": 0.1064453125, "num_input_tokens_seen": 283029488, "step": 4514 }, { "epoch": 15.02495840266223, "grad_norm": 5.654145240783691, "learning_rate": 5e-06, "loss": 0.2117, "num_input_tokens_seen": 283090404, "step": 4515 }, { "epoch": 15.02495840266223, "loss": 0.2553728222846985, "loss_ce": 0.0001238000695593655, "loss_iou": 0.09130859375, "loss_num": 0.0145263671875, "loss_xval": 0.255859375, "num_input_tokens_seen": 283090404, "step": 4515 }, { "epoch": 15.028286189683861, "grad_norm": 8.420428276062012, "learning_rate": 5e-06, "loss": 0.4935, "num_input_tokens_seen": 283153136, "step": 4516 }, { "epoch": 15.028286189683861, "loss": 0.5417757630348206, "loss_ce": 0.00014981582353357226, "loss_iou": 0.2119140625, "loss_num": 0.0235595703125, "loss_xval": 0.54296875, "num_input_tokens_seen": 283153136, "step": 4516 }, { "epoch": 15.031613976705492, "grad_norm": 24.439529418945312, "learning_rate": 5e-06, "loss": 0.2618, "num_input_tokens_seen": 283214260, "step": 4517 }, { "epoch": 15.031613976705492, "loss": 0.24921171367168427, "loss_ce": 5.189937837712932e-06, "loss_iou": 0.0869140625, "loss_num": 0.01513671875, "loss_xval": 0.2490234375, "num_input_tokens_seen": 283214260, "step": 4517 }, { "epoch": 15.034941763727122, "grad_norm": 29.037302017211914, "learning_rate": 5e-06, "loss": 0.4502, "num_input_tokens_seen": 283274360, "step": 4518 }, { "epoch": 15.034941763727122, "loss": 0.5615622997283936, "loss_ce": 8.371015610464383e-06, "loss_iou": 0.208984375, "loss_num": 0.028564453125, "loss_xval": 0.5625, "num_input_tokens_seen": 283274360, "step": 4518 }, { "epoch": 15.038269550748753, "grad_norm": 9.413277626037598, "learning_rate": 5e-06, "loss": 0.313, "num_input_tokens_seen": 283337336, "step": 4519 }, { "epoch": 15.038269550748753, "loss": 0.4017363488674164, "loss_ce": 2.925994749602978e-06, "loss_iou": 0.15625, "loss_num": 0.017822265625, "loss_xval": 0.40234375, "num_input_tokens_seen": 283337336, "step": 4519 }, { "epoch": 15.041597337770384, "grad_norm": 7.919177532196045, "learning_rate": 5e-06, "loss": 0.4492, "num_input_tokens_seen": 283398852, "step": 4520 }, { "epoch": 15.041597337770384, "loss": 0.6654120683670044, "loss_ce": 6.753840807505185e-06, "loss_iou": 0.263671875, "loss_num": 0.0274658203125, "loss_xval": 0.6640625, "num_input_tokens_seen": 283398852, "step": 4520 }, { "epoch": 15.044925124792014, "grad_norm": 11.14826488494873, "learning_rate": 5e-06, "loss": 0.5801, "num_input_tokens_seen": 283462184, "step": 4521 }, { "epoch": 15.044925124792014, "loss": 0.7620192766189575, "loss_ce": 5.63853609492071e-05, "loss_iou": 0.248046875, "loss_num": 0.052978515625, "loss_xval": 0.76171875, "num_input_tokens_seen": 283462184, "step": 4521 }, { "epoch": 15.048252911813645, "grad_norm": 28.402976989746094, "learning_rate": 5e-06, "loss": 0.448, "num_input_tokens_seen": 283523720, "step": 4522 }, { "epoch": 15.048252911813645, "loss": 0.5054929852485657, "loss_ce": 6.08654590905644e-05, "loss_iou": 0.166015625, "loss_num": 0.034423828125, "loss_xval": 0.50390625, "num_input_tokens_seen": 283523720, "step": 4522 }, { "epoch": 15.051580698835275, "grad_norm": 22.465238571166992, "learning_rate": 5e-06, "loss": 0.3992, "num_input_tokens_seen": 283587012, "step": 4523 }, { "epoch": 15.051580698835275, "loss": 0.48255202174186707, "loss_ce": 8.075374353211373e-06, "loss_iou": 0.1748046875, "loss_num": 0.0263671875, "loss_xval": 0.482421875, "num_input_tokens_seen": 283587012, "step": 4523 }, { "epoch": 15.054908485856906, "grad_norm": 36.806114196777344, "learning_rate": 5e-06, "loss": 0.4997, "num_input_tokens_seen": 283650172, "step": 4524 }, { "epoch": 15.054908485856906, "loss": 0.40368789434432983, "loss_ce": 1.3662903484146227e-06, "loss_iou": 0.177734375, "loss_num": 0.009521484375, "loss_xval": 0.404296875, "num_input_tokens_seen": 283650172, "step": 4524 }, { "epoch": 15.058236272878537, "grad_norm": 21.20890235900879, "learning_rate": 5e-06, "loss": 0.3122, "num_input_tokens_seen": 283712852, "step": 4525 }, { "epoch": 15.058236272878537, "loss": 0.2825118899345398, "loss_ce": 1.0664391083992086e-05, "loss_iou": 0.0966796875, "loss_num": 0.017822265625, "loss_xval": 0.283203125, "num_input_tokens_seen": 283712852, "step": 4525 }, { "epoch": 15.061564059900167, "grad_norm": 12.640700340270996, "learning_rate": 5e-06, "loss": 0.4009, "num_input_tokens_seen": 283776780, "step": 4526 }, { "epoch": 15.061564059900167, "loss": 0.5130629539489746, "loss_ce": 1.3749064464718685e-06, "loss_iou": 0.185546875, "loss_num": 0.0283203125, "loss_xval": 0.51171875, "num_input_tokens_seen": 283776780, "step": 4526 }, { "epoch": 15.064891846921798, "grad_norm": 33.79093933105469, "learning_rate": 5e-06, "loss": 0.7106, "num_input_tokens_seen": 283840116, "step": 4527 }, { "epoch": 15.064891846921798, "loss": 0.8033466339111328, "loss_ce": 1.8924212099591387e-06, "loss_iou": 0.2890625, "loss_num": 0.044921875, "loss_xval": 0.8046875, "num_input_tokens_seen": 283840116, "step": 4527 }, { "epoch": 15.068219633943428, "grad_norm": 26.03872299194336, "learning_rate": 5e-06, "loss": 0.4043, "num_input_tokens_seen": 283900388, "step": 4528 }, { "epoch": 15.068219633943428, "loss": 0.4832174777984619, "loss_ce": 6.316081271506846e-05, "loss_iou": 0.1865234375, "loss_num": 0.0218505859375, "loss_xval": 0.482421875, "num_input_tokens_seen": 283900388, "step": 4528 }, { "epoch": 15.071547420965059, "grad_norm": 13.02631950378418, "learning_rate": 5e-06, "loss": 0.4724, "num_input_tokens_seen": 283964548, "step": 4529 }, { "epoch": 15.071547420965059, "loss": 0.47656458616256714, "loss_ce": 2.0863162717432715e-06, "loss_iou": 0.2021484375, "loss_num": 0.01446533203125, "loss_xval": 0.4765625, "num_input_tokens_seen": 283964548, "step": 4529 }, { "epoch": 15.07487520798669, "grad_norm": 7.6748127937316895, "learning_rate": 5e-06, "loss": 0.3099, "num_input_tokens_seen": 284026868, "step": 4530 }, { "epoch": 15.07487520798669, "loss": 0.28612756729125977, "loss_ce": 1.003130182652967e-05, "loss_iou": 0.1044921875, "loss_num": 0.01544189453125, "loss_xval": 0.28515625, "num_input_tokens_seen": 284026868, "step": 4530 }, { "epoch": 15.07820299500832, "grad_norm": 15.339949607849121, "learning_rate": 5e-06, "loss": 0.7569, "num_input_tokens_seen": 284090348, "step": 4531 }, { "epoch": 15.07820299500832, "loss": 0.6089690923690796, "loss_ce": 5.1832434110110626e-05, "loss_iou": 0.224609375, "loss_num": 0.03173828125, "loss_xval": 0.609375, "num_input_tokens_seen": 284090348, "step": 4531 }, { "epoch": 15.081530782029951, "grad_norm": 15.856538772583008, "learning_rate": 5e-06, "loss": 0.3353, "num_input_tokens_seen": 284153912, "step": 4532 }, { "epoch": 15.081530782029951, "loss": 0.24116523563861847, "loss_ce": 1.5342546248575673e-05, "loss_iou": 0.0830078125, "loss_num": 0.0150146484375, "loss_xval": 0.2412109375, "num_input_tokens_seen": 284153912, "step": 4532 }, { "epoch": 15.084858569051582, "grad_norm": 6.219130516052246, "learning_rate": 5e-06, "loss": 0.3069, "num_input_tokens_seen": 284216672, "step": 4533 }, { "epoch": 15.084858569051582, "loss": 0.28206634521484375, "loss_ce": 3.8141326513141394e-05, "loss_iou": 0.10546875, "loss_num": 0.01422119140625, "loss_xval": 0.28125, "num_input_tokens_seen": 284216672, "step": 4533 }, { "epoch": 15.088186356073212, "grad_norm": 16.686302185058594, "learning_rate": 5e-06, "loss": 0.3727, "num_input_tokens_seen": 284279400, "step": 4534 }, { "epoch": 15.088186356073212, "loss": 0.32844820618629456, "loss_ce": 1.801156031433493e-05, "loss_iou": 0.1015625, "loss_num": 0.02490234375, "loss_xval": 0.328125, "num_input_tokens_seen": 284279400, "step": 4534 }, { "epoch": 15.091514143094843, "grad_norm": 11.285345077514648, "learning_rate": 5e-06, "loss": 0.2984, "num_input_tokens_seen": 284341536, "step": 4535 }, { "epoch": 15.091514143094843, "loss": 0.4008106589317322, "loss_ce": 8.029032869671937e-06, "loss_iou": 0.162109375, "loss_num": 0.0155029296875, "loss_xval": 0.400390625, "num_input_tokens_seen": 284341536, "step": 4535 }, { "epoch": 15.094841930116473, "grad_norm": 16.094900131225586, "learning_rate": 5e-06, "loss": 0.4288, "num_input_tokens_seen": 284404472, "step": 4536 }, { "epoch": 15.094841930116473, "loss": 0.21255797147750854, "loss_ce": 3.0418764254136477e-06, "loss_iou": 0.07470703125, "loss_num": 0.0125732421875, "loss_xval": 0.212890625, "num_input_tokens_seen": 284404472, "step": 4536 }, { "epoch": 15.098169717138104, "grad_norm": 13.242379188537598, "learning_rate": 5e-06, "loss": 0.2867, "num_input_tokens_seen": 284467628, "step": 4537 }, { "epoch": 15.098169717138104, "loss": 0.32240328192710876, "loss_ce": 1.5586500012432225e-05, "loss_iou": 0.1396484375, "loss_num": 0.008544921875, "loss_xval": 0.322265625, "num_input_tokens_seen": 284467628, "step": 4537 }, { "epoch": 15.101497504159735, "grad_norm": 11.262799263000488, "learning_rate": 5e-06, "loss": 0.4499, "num_input_tokens_seen": 284531564, "step": 4538 }, { "epoch": 15.101497504159735, "loss": 0.5313245058059692, "loss_ce": 0.00010501892393222079, "loss_iou": 0.2158203125, "loss_num": 0.0198974609375, "loss_xval": 0.53125, "num_input_tokens_seen": 284531564, "step": 4538 }, { "epoch": 15.104825291181365, "grad_norm": 14.02185344696045, "learning_rate": 5e-06, "loss": 0.3167, "num_input_tokens_seen": 284593640, "step": 4539 }, { "epoch": 15.104825291181365, "loss": 0.22341516613960266, "loss_ce": 2.6483901820029132e-05, "loss_iou": 0.0361328125, "loss_num": 0.0301513671875, "loss_xval": 0.2236328125, "num_input_tokens_seen": 284593640, "step": 4539 }, { "epoch": 15.108153078202996, "grad_norm": 9.372699737548828, "learning_rate": 5e-06, "loss": 0.3831, "num_input_tokens_seen": 284654448, "step": 4540 }, { "epoch": 15.108153078202996, "loss": 0.3266764283180237, "loss_ce": 0.00026041705859825015, "loss_iou": 0.11865234375, "loss_num": 0.017822265625, "loss_xval": 0.326171875, "num_input_tokens_seen": 284654448, "step": 4540 }, { "epoch": 15.111480865224626, "grad_norm": 17.441776275634766, "learning_rate": 5e-06, "loss": 0.2144, "num_input_tokens_seen": 284716292, "step": 4541 }, { "epoch": 15.111480865224626, "loss": 0.2144482135772705, "loss_ce": 1.1984059256064938e-06, "loss_iou": 0.09130859375, "loss_num": 0.006317138671875, "loss_xval": 0.21484375, "num_input_tokens_seen": 284716292, "step": 4541 }, { "epoch": 15.114808652246257, "grad_norm": 10.226694107055664, "learning_rate": 5e-06, "loss": 0.4041, "num_input_tokens_seen": 284778888, "step": 4542 }, { "epoch": 15.114808652246257, "loss": 0.26408231258392334, "loss_ce": 1.3703298463951796e-05, "loss_iou": 0.09375, "loss_num": 0.015380859375, "loss_xval": 0.263671875, "num_input_tokens_seen": 284778888, "step": 4542 }, { "epoch": 15.118136439267888, "grad_norm": 18.160917282104492, "learning_rate": 5e-06, "loss": 0.3229, "num_input_tokens_seen": 284841764, "step": 4543 }, { "epoch": 15.118136439267888, "loss": 0.3366100788116455, "loss_ce": 1.1903340464414214e-06, "loss_iou": 0.11572265625, "loss_num": 0.02099609375, "loss_xval": 0.3359375, "num_input_tokens_seen": 284841764, "step": 4543 }, { "epoch": 15.121464226289518, "grad_norm": 33.385250091552734, "learning_rate": 5e-06, "loss": 0.5194, "num_input_tokens_seen": 284905844, "step": 4544 }, { "epoch": 15.121464226289518, "loss": 0.48951256275177, "loss_ce": 1.0592473699944094e-05, "loss_iou": 0.1806640625, "loss_num": 0.025634765625, "loss_xval": 0.490234375, "num_input_tokens_seen": 284905844, "step": 4544 }, { "epoch": 15.124792013311149, "grad_norm": 29.7326602935791, "learning_rate": 5e-06, "loss": 0.5962, "num_input_tokens_seen": 284969908, "step": 4545 }, { "epoch": 15.124792013311149, "loss": 0.48989659547805786, "loss_ce": 0.0005396233173087239, "loss_iou": 0.2080078125, "loss_num": 0.01458740234375, "loss_xval": 0.490234375, "num_input_tokens_seen": 284969908, "step": 4545 }, { "epoch": 15.12811980033278, "grad_norm": 22.88463592529297, "learning_rate": 5e-06, "loss": 0.5209, "num_input_tokens_seen": 285032444, "step": 4546 }, { "epoch": 15.12811980033278, "loss": 0.5555436015129089, "loss_ce": 1.6017852431104984e-06, "loss_iou": 0.2060546875, "loss_num": 0.0286865234375, "loss_xval": 0.5546875, "num_input_tokens_seen": 285032444, "step": 4546 }, { "epoch": 15.13144758735441, "grad_norm": 10.139613151550293, "learning_rate": 5e-06, "loss": 0.2395, "num_input_tokens_seen": 285094980, "step": 4547 }, { "epoch": 15.13144758735441, "loss": 0.29333555698394775, "loss_ce": 6.136144747870276e-07, "loss_iou": 0.11376953125, "loss_num": 0.01318359375, "loss_xval": 0.29296875, "num_input_tokens_seen": 285094980, "step": 4547 }, { "epoch": 15.13477537437604, "grad_norm": 12.15960693359375, "learning_rate": 5e-06, "loss": 0.4285, "num_input_tokens_seen": 285158256, "step": 4548 }, { "epoch": 15.13477537437604, "loss": 0.5205085873603821, "loss_ce": 7.673770596738905e-07, "loss_iou": 0.2109375, "loss_num": 0.01953125, "loss_xval": 0.51953125, "num_input_tokens_seen": 285158256, "step": 4548 }, { "epoch": 15.138103161397671, "grad_norm": 8.482779502868652, "learning_rate": 5e-06, "loss": 0.4127, "num_input_tokens_seen": 285220004, "step": 4549 }, { "epoch": 15.138103161397671, "loss": 0.6713677644729614, "loss_ce": 4.209015241940506e-05, "loss_iou": 0.255859375, "loss_num": 0.031982421875, "loss_xval": 0.671875, "num_input_tokens_seen": 285220004, "step": 4549 }, { "epoch": 15.141430948419302, "grad_norm": 6.794999599456787, "learning_rate": 5e-06, "loss": 0.4347, "num_input_tokens_seen": 285283172, "step": 4550 }, { "epoch": 15.141430948419302, "loss": 0.42046335339546204, "loss_ce": 0.0001752876560203731, "loss_iou": 0.1630859375, "loss_num": 0.018798828125, "loss_xval": 0.419921875, "num_input_tokens_seen": 285283172, "step": 4550 }, { "epoch": 15.144758735440933, "grad_norm": 28.342369079589844, "learning_rate": 5e-06, "loss": 0.5406, "num_input_tokens_seen": 285346184, "step": 4551 }, { "epoch": 15.144758735440933, "loss": 0.37398064136505127, "loss_ce": 1.8226397514808923e-05, "loss_iou": 0.1640625, "loss_num": 0.00909423828125, "loss_xval": 0.373046875, "num_input_tokens_seen": 285346184, "step": 4551 }, { "epoch": 15.148086522462563, "grad_norm": 24.744304656982422, "learning_rate": 5e-06, "loss": 0.4127, "num_input_tokens_seen": 285408372, "step": 4552 }, { "epoch": 15.148086522462563, "loss": 0.33801349997520447, "loss_ce": 7.991276333996211e-07, "loss_iou": 0.1103515625, "loss_num": 0.0235595703125, "loss_xval": 0.337890625, "num_input_tokens_seen": 285408372, "step": 4552 }, { "epoch": 15.151414309484194, "grad_norm": 12.03886890411377, "learning_rate": 5e-06, "loss": 0.431, "num_input_tokens_seen": 285472108, "step": 4553 }, { "epoch": 15.151414309484194, "loss": 0.5327252745628357, "loss_ce": 1.043549673340749e-05, "loss_iou": 0.22265625, "loss_num": 0.0172119140625, "loss_xval": 0.53125, "num_input_tokens_seen": 285472108, "step": 4553 }, { "epoch": 15.154742096505824, "grad_norm": 12.338217735290527, "learning_rate": 5e-06, "loss": 0.3377, "num_input_tokens_seen": 285532520, "step": 4554 }, { "epoch": 15.154742096505824, "loss": 0.3454596698284149, "loss_ce": 6.80201310387929e-07, "loss_iou": 0.1416015625, "loss_num": 0.012451171875, "loss_xval": 0.345703125, "num_input_tokens_seen": 285532520, "step": 4554 }, { "epoch": 15.158069883527455, "grad_norm": 7.005237579345703, "learning_rate": 5e-06, "loss": 0.4338, "num_input_tokens_seen": 285596048, "step": 4555 }, { "epoch": 15.158069883527455, "loss": 0.5343725085258484, "loss_ce": 9.71332974586403e-06, "loss_iou": 0.197265625, "loss_num": 0.028076171875, "loss_xval": 0.53515625, "num_input_tokens_seen": 285596048, "step": 4555 }, { "epoch": 15.161397670549086, "grad_norm": 9.00033950805664, "learning_rate": 5e-06, "loss": 0.3518, "num_input_tokens_seen": 285659488, "step": 4556 }, { "epoch": 15.161397670549086, "loss": 0.44570887088775635, "loss_ce": 3.0155184504110366e-05, "loss_iou": 0.1923828125, "loss_num": 0.01220703125, "loss_xval": 0.4453125, "num_input_tokens_seen": 285659488, "step": 4556 }, { "epoch": 15.164725457570716, "grad_norm": 12.702378273010254, "learning_rate": 5e-06, "loss": 0.4881, "num_input_tokens_seen": 285721996, "step": 4557 }, { "epoch": 15.164725457570716, "loss": 0.7256221771240234, "loss_ce": 3.630161882028915e-05, "loss_iou": 0.26171875, "loss_num": 0.04052734375, "loss_xval": 0.7265625, "num_input_tokens_seen": 285721996, "step": 4557 }, { "epoch": 15.168053244592347, "grad_norm": 24.704486846923828, "learning_rate": 5e-06, "loss": 0.4462, "num_input_tokens_seen": 285785140, "step": 4558 }, { "epoch": 15.168053244592347, "loss": 0.5114542841911316, "loss_ce": 7.122370880097151e-05, "loss_iou": 0.193359375, "loss_num": 0.02490234375, "loss_xval": 0.51171875, "num_input_tokens_seen": 285785140, "step": 4558 }, { "epoch": 15.171381031613977, "grad_norm": 13.976387023925781, "learning_rate": 5e-06, "loss": 0.3236, "num_input_tokens_seen": 285844688, "step": 4559 }, { "epoch": 15.171381031613977, "loss": 0.294984370470047, "loss_ce": 1.4425860399569501e-06, "loss_iou": 0.09375, "loss_num": 0.0216064453125, "loss_xval": 0.294921875, "num_input_tokens_seen": 285844688, "step": 4559 }, { "epoch": 15.174708818635608, "grad_norm": 9.882658004760742, "learning_rate": 5e-06, "loss": 0.365, "num_input_tokens_seen": 285906288, "step": 4560 }, { "epoch": 15.174708818635608, "loss": 0.43947362899780273, "loss_ce": 0.00014258497685659677, "loss_iou": 0.17578125, "loss_num": 0.0177001953125, "loss_xval": 0.439453125, "num_input_tokens_seen": 285906288, "step": 4560 }, { "epoch": 15.178036605657239, "grad_norm": 17.674585342407227, "learning_rate": 5e-06, "loss": 0.2923, "num_input_tokens_seen": 285967416, "step": 4561 }, { "epoch": 15.178036605657239, "loss": 0.3049788177013397, "loss_ce": 1.3988612863613525e-06, "loss_iou": 0.138671875, "loss_num": 0.005645751953125, "loss_xval": 0.3046875, "num_input_tokens_seen": 285967416, "step": 4561 }, { "epoch": 15.18136439267887, "grad_norm": 10.234919548034668, "learning_rate": 5e-06, "loss": 0.2471, "num_input_tokens_seen": 286029076, "step": 4562 }, { "epoch": 15.18136439267887, "loss": 0.18289315700531006, "loss_ce": 1.2965507494300255e-06, "loss_iou": 0.06640625, "loss_num": 0.00994873046875, "loss_xval": 0.1826171875, "num_input_tokens_seen": 286029076, "step": 4562 }, { "epoch": 15.1846921797005, "grad_norm": 7.7204790115356445, "learning_rate": 5e-06, "loss": 0.4037, "num_input_tokens_seen": 286091544, "step": 4563 }, { "epoch": 15.1846921797005, "loss": 0.36609381437301636, "loss_ce": 4.951091796101537e-06, "loss_iou": 0.1181640625, "loss_num": 0.0260009765625, "loss_xval": 0.365234375, "num_input_tokens_seen": 286091544, "step": 4563 }, { "epoch": 15.18801996672213, "grad_norm": 10.760502815246582, "learning_rate": 5e-06, "loss": 0.2281, "num_input_tokens_seen": 286153896, "step": 4564 }, { "epoch": 15.18801996672213, "loss": 0.2563520073890686, "loss_ce": 4.349764822109137e-06, "loss_iou": 0.09765625, "loss_num": 0.01220703125, "loss_xval": 0.255859375, "num_input_tokens_seen": 286153896, "step": 4564 }, { "epoch": 15.191347753743761, "grad_norm": 8.041827201843262, "learning_rate": 5e-06, "loss": 0.6029, "num_input_tokens_seen": 286216736, "step": 4565 }, { "epoch": 15.191347753743761, "loss": 0.6424098014831543, "loss_ce": 1.4736376215296332e-05, "loss_iou": 0.25390625, "loss_num": 0.026611328125, "loss_xval": 0.640625, "num_input_tokens_seen": 286216736, "step": 4565 }, { "epoch": 15.194675540765392, "grad_norm": 6.81574010848999, "learning_rate": 5e-06, "loss": 0.415, "num_input_tokens_seen": 286280880, "step": 4566 }, { "epoch": 15.194675540765392, "loss": 0.502405047416687, "loss_ce": 3.99081181967631e-05, "loss_iou": 0.185546875, "loss_num": 0.0262451171875, "loss_xval": 0.50390625, "num_input_tokens_seen": 286280880, "step": 4566 }, { "epoch": 15.198003327787022, "grad_norm": 10.979909896850586, "learning_rate": 5e-06, "loss": 0.4362, "num_input_tokens_seen": 286343660, "step": 4567 }, { "epoch": 15.198003327787022, "loss": 0.31217294931411743, "loss_ce": 8.648222319607157e-06, "loss_iou": 0.12890625, "loss_num": 0.0108642578125, "loss_xval": 0.3125, "num_input_tokens_seen": 286343660, "step": 4567 }, { "epoch": 15.201331114808653, "grad_norm": 25.473012924194336, "learning_rate": 5e-06, "loss": 0.6032, "num_input_tokens_seen": 286406920, "step": 4568 }, { "epoch": 15.201331114808653, "loss": 0.7210706472396851, "loss_ce": 1.2828079434257234e-06, "loss_iou": 0.2734375, "loss_num": 0.034423828125, "loss_xval": 0.72265625, "num_input_tokens_seen": 286406920, "step": 4568 }, { "epoch": 15.204658901830284, "grad_norm": 25.169084548950195, "learning_rate": 5e-06, "loss": 0.4736, "num_input_tokens_seen": 286469944, "step": 4569 }, { "epoch": 15.204658901830284, "loss": 0.4140671491622925, "loss_ce": 4.637690381059656e-06, "loss_iou": 0.1572265625, "loss_num": 0.0201416015625, "loss_xval": 0.4140625, "num_input_tokens_seen": 286469944, "step": 4569 }, { "epoch": 15.207986688851914, "grad_norm": 23.21827507019043, "learning_rate": 5e-06, "loss": 0.6208, "num_input_tokens_seen": 286531864, "step": 4570 }, { "epoch": 15.207986688851914, "loss": 0.6872669458389282, "loss_ce": 1.1077624549216125e-05, "loss_iou": 0.255859375, "loss_num": 0.035400390625, "loss_xval": 0.6875, "num_input_tokens_seen": 286531864, "step": 4570 }, { "epoch": 15.211314475873545, "grad_norm": 9.739476203918457, "learning_rate": 5e-06, "loss": 0.6115, "num_input_tokens_seen": 286594336, "step": 4571 }, { "epoch": 15.211314475873545, "loss": 0.5388875603675842, "loss_ce": 8.167289706761949e-06, "loss_iou": 0.205078125, "loss_num": 0.02587890625, "loss_xval": 0.5390625, "num_input_tokens_seen": 286594336, "step": 4571 }, { "epoch": 15.214642262895175, "grad_norm": 12.47042179107666, "learning_rate": 5e-06, "loss": 0.5033, "num_input_tokens_seen": 286657900, "step": 4572 }, { "epoch": 15.214642262895175, "loss": 0.4539285898208618, "loss_ce": 1.0152009053854272e-05, "loss_iou": 0.177734375, "loss_num": 0.0198974609375, "loss_xval": 0.453125, "num_input_tokens_seen": 286657900, "step": 4572 }, { "epoch": 15.217970049916806, "grad_norm": 18.811687469482422, "learning_rate": 5e-06, "loss": 0.661, "num_input_tokens_seen": 286720736, "step": 4573 }, { "epoch": 15.217970049916806, "loss": 0.4756474494934082, "loss_ce": 4.662843480218726e-07, "loss_iou": 0.19921875, "loss_num": 0.015380859375, "loss_xval": 0.4765625, "num_input_tokens_seen": 286720736, "step": 4573 }, { "epoch": 15.221297836938437, "grad_norm": 40.87036895751953, "learning_rate": 5e-06, "loss": 0.4662, "num_input_tokens_seen": 286784132, "step": 4574 }, { "epoch": 15.221297836938437, "loss": 0.5576193332672119, "loss_ce": 2.1610467229038477e-06, "loss_iou": 0.2294921875, "loss_num": 0.0198974609375, "loss_xval": 0.55859375, "num_input_tokens_seen": 286784132, "step": 4574 }, { "epoch": 15.224625623960067, "grad_norm": 44.1612663269043, "learning_rate": 5e-06, "loss": 0.4256, "num_input_tokens_seen": 286846640, "step": 4575 }, { "epoch": 15.224625623960067, "loss": 0.6409971714019775, "loss_ce": 5.956973836873658e-06, "loss_iou": 0.24609375, "loss_num": 0.029541015625, "loss_xval": 0.640625, "num_input_tokens_seen": 286846640, "step": 4575 }, { "epoch": 15.227953410981698, "grad_norm": 6.324528694152832, "learning_rate": 5e-06, "loss": 0.4236, "num_input_tokens_seen": 286909976, "step": 4576 }, { "epoch": 15.227953410981698, "loss": 0.3673129975795746, "loss_ce": 3.4280403724551434e-06, "loss_iou": 0.10205078125, "loss_num": 0.032470703125, "loss_xval": 0.3671875, "num_input_tokens_seen": 286909976, "step": 4576 }, { "epoch": 15.231281198003328, "grad_norm": 6.926846981048584, "learning_rate": 5e-06, "loss": 0.4235, "num_input_tokens_seen": 286974396, "step": 4577 }, { "epoch": 15.231281198003328, "loss": 0.37427520751953125, "loss_ce": 7.645970981684513e-06, "loss_iou": 0.15625, "loss_num": 0.012451171875, "loss_xval": 0.375, "num_input_tokens_seen": 286974396, "step": 4577 }, { "epoch": 15.234608985024959, "grad_norm": 11.48361587524414, "learning_rate": 5e-06, "loss": 0.4104, "num_input_tokens_seen": 287035580, "step": 4578 }, { "epoch": 15.234608985024959, "loss": 0.567388117313385, "loss_ce": 5.273603164823726e-06, "loss_iou": 0.19140625, "loss_num": 0.036865234375, "loss_xval": 0.56640625, "num_input_tokens_seen": 287035580, "step": 4578 }, { "epoch": 15.23793677204659, "grad_norm": 12.638822555541992, "learning_rate": 5e-06, "loss": 0.3301, "num_input_tokens_seen": 287097224, "step": 4579 }, { "epoch": 15.23793677204659, "loss": 0.3312477767467499, "loss_ce": 4.051247742609121e-05, "loss_iou": 0.1318359375, "loss_num": 0.0135498046875, "loss_xval": 0.33203125, "num_input_tokens_seen": 287097224, "step": 4579 }, { "epoch": 15.24126455906822, "grad_norm": 16.294843673706055, "learning_rate": 5e-06, "loss": 0.2935, "num_input_tokens_seen": 287160048, "step": 4580 }, { "epoch": 15.24126455906822, "loss": 0.3338048756122589, "loss_ce": 3.621429868871928e-06, "loss_iou": 0.1064453125, "loss_num": 0.0240478515625, "loss_xval": 0.333984375, "num_input_tokens_seen": 287160048, "step": 4580 }, { "epoch": 15.244592346089851, "grad_norm": 12.813994407653809, "learning_rate": 5e-06, "loss": 0.605, "num_input_tokens_seen": 287223552, "step": 4581 }, { "epoch": 15.244592346089851, "loss": 0.718090295791626, "loss_ce": 0.00016428239177912474, "loss_iou": 0.265625, "loss_num": 0.037353515625, "loss_xval": 0.71875, "num_input_tokens_seen": 287223552, "step": 4581 }, { "epoch": 15.247920133111482, "grad_norm": 29.59439468383789, "learning_rate": 5e-06, "loss": 0.5247, "num_input_tokens_seen": 287287624, "step": 4582 }, { "epoch": 15.247920133111482, "loss": 0.4014034569263458, "loss_ce": 3.628609192674048e-05, "loss_iou": 0.1494140625, "loss_num": 0.0203857421875, "loss_xval": 0.40234375, "num_input_tokens_seen": 287287624, "step": 4582 }, { "epoch": 15.251247920133112, "grad_norm": 20.960721969604492, "learning_rate": 5e-06, "loss": 0.4338, "num_input_tokens_seen": 287349892, "step": 4583 }, { "epoch": 15.251247920133112, "loss": 0.5334494113922119, "loss_ce": 2.1709736302000238e-06, "loss_iou": 0.2158203125, "loss_num": 0.020263671875, "loss_xval": 0.53515625, "num_input_tokens_seen": 287349892, "step": 4583 }, { "epoch": 15.254575707154743, "grad_norm": 8.018237113952637, "learning_rate": 5e-06, "loss": 0.6146, "num_input_tokens_seen": 287412540, "step": 4584 }, { "epoch": 15.254575707154743, "loss": 0.7259538173675537, "loss_ce": 1.703712882772379e-06, "loss_iou": 0.25, "loss_num": 0.04541015625, "loss_xval": 0.7265625, "num_input_tokens_seen": 287412540, "step": 4584 }, { "epoch": 15.257903494176373, "grad_norm": 10.782513618469238, "learning_rate": 5e-06, "loss": 0.45, "num_input_tokens_seen": 287476364, "step": 4585 }, { "epoch": 15.257903494176373, "loss": 0.6118171215057373, "loss_ce": 6.971446850911889e-07, "loss_iou": 0.2412109375, "loss_num": 0.0260009765625, "loss_xval": 0.61328125, "num_input_tokens_seen": 287476364, "step": 4585 }, { "epoch": 15.261231281198004, "grad_norm": 19.826793670654297, "learning_rate": 5e-06, "loss": 0.3808, "num_input_tokens_seen": 287538324, "step": 4586 }, { "epoch": 15.261231281198004, "loss": 0.4390872120857239, "loss_ce": 2.7774095201493765e-07, "loss_iou": 0.1787109375, "loss_num": 0.0162353515625, "loss_xval": 0.439453125, "num_input_tokens_seen": 287538324, "step": 4586 }, { "epoch": 15.264559068219635, "grad_norm": 29.970834732055664, "learning_rate": 5e-06, "loss": 0.4925, "num_input_tokens_seen": 287600404, "step": 4587 }, { "epoch": 15.264559068219635, "loss": 0.4899302124977112, "loss_ce": 1.0101649650096078e-06, "loss_iou": 0.2119140625, "loss_num": 0.012939453125, "loss_xval": 0.490234375, "num_input_tokens_seen": 287600404, "step": 4587 }, { "epoch": 15.267886855241265, "grad_norm": 36.896034240722656, "learning_rate": 5e-06, "loss": 0.5945, "num_input_tokens_seen": 287662964, "step": 4588 }, { "epoch": 15.267886855241265, "loss": 0.45202961564064026, "loss_ce": 3.2555840334680397e-06, "loss_iou": 0.1904296875, "loss_num": 0.0142822265625, "loss_xval": 0.451171875, "num_input_tokens_seen": 287662964, "step": 4588 }, { "epoch": 15.271214642262896, "grad_norm": 27.655914306640625, "learning_rate": 5e-06, "loss": 0.384, "num_input_tokens_seen": 287724348, "step": 4589 }, { "epoch": 15.271214642262896, "loss": 0.39073967933654785, "loss_ce": 7.8329494499485e-06, "loss_iou": 0.15234375, "loss_num": 0.01708984375, "loss_xval": 0.390625, "num_input_tokens_seen": 287724348, "step": 4589 }, { "epoch": 15.274542429284526, "grad_norm": 10.863263130187988, "learning_rate": 5e-06, "loss": 0.3973, "num_input_tokens_seen": 287787420, "step": 4590 }, { "epoch": 15.274542429284526, "loss": 0.45471590757369995, "loss_ce": 4.02724981540814e-06, "loss_iou": 0.17578125, "loss_num": 0.0206298828125, "loss_xval": 0.455078125, "num_input_tokens_seen": 287787420, "step": 4590 }, { "epoch": 15.277870216306157, "grad_norm": 16.76195526123047, "learning_rate": 5e-06, "loss": 0.2762, "num_input_tokens_seen": 287849736, "step": 4591 }, { "epoch": 15.277870216306157, "loss": 0.24811138212680817, "loss_ce": 3.4672048059292138e-06, "loss_iou": 0.09765625, "loss_num": 0.01055908203125, "loss_xval": 0.248046875, "num_input_tokens_seen": 287849736, "step": 4591 }, { "epoch": 15.281198003327788, "grad_norm": 19.441368103027344, "learning_rate": 5e-06, "loss": 0.4755, "num_input_tokens_seen": 287911800, "step": 4592 }, { "epoch": 15.281198003327788, "loss": 0.5964096784591675, "loss_ce": 5.0389571697451174e-05, "loss_iou": 0.2421875, "loss_num": 0.0224609375, "loss_xval": 0.59765625, "num_input_tokens_seen": 287911800, "step": 4592 }, { "epoch": 15.284525790349418, "grad_norm": 35.086360931396484, "learning_rate": 5e-06, "loss": 0.5531, "num_input_tokens_seen": 287975604, "step": 4593 }, { "epoch": 15.284525790349418, "loss": 0.6106084585189819, "loss_ce": 1.2825578778574709e-05, "loss_iou": 0.2373046875, "loss_num": 0.0272216796875, "loss_xval": 0.609375, "num_input_tokens_seen": 287975604, "step": 4593 }, { "epoch": 15.287853577371049, "grad_norm": 30.03073501586914, "learning_rate": 5e-06, "loss": 0.4573, "num_input_tokens_seen": 288038520, "step": 4594 }, { "epoch": 15.287853577371049, "loss": 0.44952452182769775, "loss_ce": 5.955536721558019e-07, "loss_iou": 0.1650390625, "loss_num": 0.0238037109375, "loss_xval": 0.44921875, "num_input_tokens_seen": 288038520, "step": 4594 }, { "epoch": 15.29118136439268, "grad_norm": 11.778630256652832, "learning_rate": 5e-06, "loss": 0.4043, "num_input_tokens_seen": 288101132, "step": 4595 }, { "epoch": 15.29118136439268, "loss": 0.3439713418483734, "loss_ce": 3.8228587072808295e-05, "loss_iou": 0.14453125, "loss_num": 0.01104736328125, "loss_xval": 0.34375, "num_input_tokens_seen": 288101132, "step": 4595 }, { "epoch": 15.29450915141431, "grad_norm": 6.6887383460998535, "learning_rate": 5e-06, "loss": 0.3754, "num_input_tokens_seen": 288163064, "step": 4596 }, { "epoch": 15.29450915141431, "loss": 0.4716283679008484, "loss_ce": 9.703298928798176e-06, "loss_iou": 0.19140625, "loss_num": 0.017578125, "loss_xval": 0.470703125, "num_input_tokens_seen": 288163064, "step": 4596 }, { "epoch": 15.29783693843594, "grad_norm": 10.128789901733398, "learning_rate": 5e-06, "loss": 0.4303, "num_input_tokens_seen": 288223456, "step": 4597 }, { "epoch": 15.29783693843594, "loss": 0.4277987480163574, "loss_ce": 3.3163933039759286e-06, "loss_iou": 0.1630859375, "loss_num": 0.020263671875, "loss_xval": 0.427734375, "num_input_tokens_seen": 288223456, "step": 4597 }, { "epoch": 15.301164725457571, "grad_norm": 36.889862060546875, "learning_rate": 5e-06, "loss": 0.5961, "num_input_tokens_seen": 288287316, "step": 4598 }, { "epoch": 15.301164725457571, "loss": 0.6839616894721985, "loss_ce": 1.7246998140763026e-06, "loss_iou": 0.314453125, "loss_num": 0.0107421875, "loss_xval": 0.68359375, "num_input_tokens_seen": 288287316, "step": 4598 }, { "epoch": 15.304492512479202, "grad_norm": 42.05767059326172, "learning_rate": 5e-06, "loss": 0.3537, "num_input_tokens_seen": 288350820, "step": 4599 }, { "epoch": 15.304492512479202, "loss": 0.328248530626297, "loss_ce": 1.4536104799844907e-06, "loss_iou": 0.1376953125, "loss_num": 0.01043701171875, "loss_xval": 0.328125, "num_input_tokens_seen": 288350820, "step": 4599 }, { "epoch": 15.307820299500833, "grad_norm": 26.433490753173828, "learning_rate": 5e-06, "loss": 0.5146, "num_input_tokens_seen": 288412720, "step": 4600 }, { "epoch": 15.307820299500833, "loss": 0.7761029005050659, "loss_ce": 0.00016293970111291856, "loss_iou": 0.3125, "loss_num": 0.0302734375, "loss_xval": 0.77734375, "num_input_tokens_seen": 288412720, "step": 4600 }, { "epoch": 15.311148086522463, "grad_norm": 18.623001098632812, "learning_rate": 5e-06, "loss": 0.6127, "num_input_tokens_seen": 288476468, "step": 4601 }, { "epoch": 15.311148086522463, "loss": 0.3966079652309418, "loss_ce": 1.5320449620048748e-06, "loss_iou": 0.138671875, "loss_num": 0.023681640625, "loss_xval": 0.396484375, "num_input_tokens_seen": 288476468, "step": 4601 }, { "epoch": 15.314475873544094, "grad_norm": 37.62676239013672, "learning_rate": 5e-06, "loss": 0.3742, "num_input_tokens_seen": 288540204, "step": 4602 }, { "epoch": 15.314475873544094, "loss": 0.46582838892936707, "loss_ce": 8.06908337835921e-06, "loss_iou": 0.2021484375, "loss_num": 0.0123291015625, "loss_xval": 0.46484375, "num_input_tokens_seen": 288540204, "step": 4602 }, { "epoch": 15.317803660565724, "grad_norm": 26.76841163635254, "learning_rate": 5e-06, "loss": 0.4617, "num_input_tokens_seen": 288602620, "step": 4603 }, { "epoch": 15.317803660565724, "loss": 0.3240070939064026, "loss_ce": 1.984919435926713e-06, "loss_iou": 0.1318359375, "loss_num": 0.01202392578125, "loss_xval": 0.32421875, "num_input_tokens_seen": 288602620, "step": 4603 }, { "epoch": 15.321131447587355, "grad_norm": 19.452714920043945, "learning_rate": 5e-06, "loss": 0.4767, "num_input_tokens_seen": 288666568, "step": 4604 }, { "epoch": 15.321131447587355, "loss": 0.5289028286933899, "loss_ce": 9.421651338925585e-05, "loss_iou": 0.197265625, "loss_num": 0.02685546875, "loss_xval": 0.52734375, "num_input_tokens_seen": 288666568, "step": 4604 }, { "epoch": 15.324459234608986, "grad_norm": 12.614535331726074, "learning_rate": 5e-06, "loss": 0.4844, "num_input_tokens_seen": 288728444, "step": 4605 }, { "epoch": 15.324459234608986, "loss": 0.6412378549575806, "loss_ce": 2.5138222099485574e-06, "loss_iou": 0.2060546875, "loss_num": 0.0458984375, "loss_xval": 0.640625, "num_input_tokens_seen": 288728444, "step": 4605 }, { "epoch": 15.327787021630616, "grad_norm": 7.2156782150268555, "learning_rate": 5e-06, "loss": 0.3189, "num_input_tokens_seen": 288790068, "step": 4606 }, { "epoch": 15.327787021630616, "loss": 0.28562918305397034, "loss_ce": 1.5179290130618028e-05, "loss_iou": 0.09375, "loss_num": 0.0196533203125, "loss_xval": 0.28515625, "num_input_tokens_seen": 288790068, "step": 4606 }, { "epoch": 15.331114808652247, "grad_norm": 15.795019149780273, "learning_rate": 5e-06, "loss": 0.4467, "num_input_tokens_seen": 288853948, "step": 4607 }, { "epoch": 15.331114808652247, "loss": 0.6253030300140381, "loss_ce": 0.0001809417299227789, "loss_iou": 0.26171875, "loss_num": 0.020263671875, "loss_xval": 0.625, "num_input_tokens_seen": 288853948, "step": 4607 }, { "epoch": 15.334442595673877, "grad_norm": 12.783130645751953, "learning_rate": 5e-06, "loss": 0.3343, "num_input_tokens_seen": 288916568, "step": 4608 }, { "epoch": 15.334442595673877, "loss": 0.4430909752845764, "loss_ce": 6.279766694206046e-06, "loss_iou": 0.1796875, "loss_num": 0.016845703125, "loss_xval": 0.443359375, "num_input_tokens_seen": 288916568, "step": 4608 }, { "epoch": 15.337770382695508, "grad_norm": 12.411849975585938, "learning_rate": 5e-06, "loss": 0.3628, "num_input_tokens_seen": 288977744, "step": 4609 }, { "epoch": 15.337770382695508, "loss": 0.38202184438705444, "loss_ce": 2.786483946692897e-06, "loss_iou": 0.14453125, "loss_num": 0.0186767578125, "loss_xval": 0.3828125, "num_input_tokens_seen": 288977744, "step": 4609 }, { "epoch": 15.341098169717139, "grad_norm": 12.559247970581055, "learning_rate": 5e-06, "loss": 0.3631, "num_input_tokens_seen": 289040408, "step": 4610 }, { "epoch": 15.341098169717139, "loss": 0.5894789695739746, "loss_ce": 1.4297517054728814e-06, "loss_iou": 0.1982421875, "loss_num": 0.038818359375, "loss_xval": 0.58984375, "num_input_tokens_seen": 289040408, "step": 4610 }, { "epoch": 15.34442595673877, "grad_norm": 17.385177612304688, "learning_rate": 5e-06, "loss": 0.3718, "num_input_tokens_seen": 289103188, "step": 4611 }, { "epoch": 15.34442595673877, "loss": 0.46666979789733887, "loss_ce": 0.00011705526412697509, "loss_iou": 0.193359375, "loss_num": 0.0162353515625, "loss_xval": 0.466796875, "num_input_tokens_seen": 289103188, "step": 4611 }, { "epoch": 15.3477537437604, "grad_norm": 19.83721160888672, "learning_rate": 5e-06, "loss": 0.8099, "num_input_tokens_seen": 289167968, "step": 4612 }, { "epoch": 15.3477537437604, "loss": 0.8700894117355347, "loss_ce": 1.8008306142291985e-05, "loss_iou": 0.353515625, "loss_num": 0.033203125, "loss_xval": 0.87109375, "num_input_tokens_seen": 289167968, "step": 4612 }, { "epoch": 15.35108153078203, "grad_norm": 43.896507263183594, "learning_rate": 5e-06, "loss": 0.6318, "num_input_tokens_seen": 289230968, "step": 4613 }, { "epoch": 15.35108153078203, "loss": 0.5656895637512207, "loss_ce": 7.675975211896002e-05, "loss_iou": 0.25, "loss_num": 0.01287841796875, "loss_xval": 0.56640625, "num_input_tokens_seen": 289230968, "step": 4613 }, { "epoch": 15.354409317803661, "grad_norm": 31.197540283203125, "learning_rate": 5e-06, "loss": 0.4348, "num_input_tokens_seen": 289292140, "step": 4614 }, { "epoch": 15.354409317803661, "loss": 0.5615403652191162, "loss_ce": 1.692141086095944e-05, "loss_iou": 0.2021484375, "loss_num": 0.031494140625, "loss_xval": 0.5625, "num_input_tokens_seen": 289292140, "step": 4614 }, { "epoch": 15.357737104825292, "grad_norm": 12.9256010055542, "learning_rate": 5e-06, "loss": 0.4538, "num_input_tokens_seen": 289356292, "step": 4615 }, { "epoch": 15.357737104825292, "loss": 0.4623045027256012, "loss_ce": 0.00014629126235377043, "loss_iou": 0.177734375, "loss_num": 0.0213623046875, "loss_xval": 0.462890625, "num_input_tokens_seen": 289356292, "step": 4615 }, { "epoch": 15.361064891846922, "grad_norm": 16.504594802856445, "learning_rate": 5e-06, "loss": 0.5287, "num_input_tokens_seen": 289419444, "step": 4616 }, { "epoch": 15.361064891846922, "loss": 0.4014597535133362, "loss_ce": 9.943902341547073e-07, "loss_iou": 0.1416015625, "loss_num": 0.0234375, "loss_xval": 0.40234375, "num_input_tokens_seen": 289419444, "step": 4616 }, { "epoch": 15.364392678868553, "grad_norm": 8.734028816223145, "learning_rate": 5e-06, "loss": 0.3844, "num_input_tokens_seen": 289481940, "step": 4617 }, { "epoch": 15.364392678868553, "loss": 0.46374666690826416, "loss_ce": 1.56751571012137e-06, "loss_iou": 0.1708984375, "loss_num": 0.0244140625, "loss_xval": 0.462890625, "num_input_tokens_seen": 289481940, "step": 4617 }, { "epoch": 15.367720465890184, "grad_norm": 15.086784362792969, "learning_rate": 5e-06, "loss": 0.3161, "num_input_tokens_seen": 289544268, "step": 4618 }, { "epoch": 15.367720465890184, "loss": 0.3362296521663666, "loss_ce": 1.7501370166428387e-05, "loss_iou": 0.109375, "loss_num": 0.0235595703125, "loss_xval": 0.3359375, "num_input_tokens_seen": 289544268, "step": 4618 }, { "epoch": 15.371048252911814, "grad_norm": 13.450602531433105, "learning_rate": 5e-06, "loss": 0.4903, "num_input_tokens_seen": 289607020, "step": 4619 }, { "epoch": 15.371048252911814, "loss": 0.4376518130302429, "loss_ce": 2.9758171876892447e-05, "loss_iou": 0.158203125, "loss_num": 0.0244140625, "loss_xval": 0.4375, "num_input_tokens_seen": 289607020, "step": 4619 }, { "epoch": 15.374376039933445, "grad_norm": 28.727689743041992, "learning_rate": 5e-06, "loss": 0.495, "num_input_tokens_seen": 289670812, "step": 4620 }, { "epoch": 15.374376039933445, "loss": 0.4261817932128906, "loss_ce": 1.9077177057624795e-05, "loss_iou": 0.1806640625, "loss_num": 0.0130615234375, "loss_xval": 0.42578125, "num_input_tokens_seen": 289670812, "step": 4620 }, { "epoch": 15.377703826955075, "grad_norm": 30.502914428710938, "learning_rate": 5e-06, "loss": 0.4167, "num_input_tokens_seen": 289733116, "step": 4621 }, { "epoch": 15.377703826955075, "loss": 0.3542577624320984, "loss_ce": 0.0016576657071709633, "loss_iou": 0.15625, "loss_num": 0.008056640625, "loss_xval": 0.353515625, "num_input_tokens_seen": 289733116, "step": 4621 }, { "epoch": 15.381031613976706, "grad_norm": 15.835978507995605, "learning_rate": 5e-06, "loss": 0.3184, "num_input_tokens_seen": 289795036, "step": 4622 }, { "epoch": 15.381031613976706, "loss": 0.2711339592933655, "loss_ce": 1.5810588593012653e-05, "loss_iou": 0.09033203125, "loss_num": 0.0181884765625, "loss_xval": 0.271484375, "num_input_tokens_seen": 289795036, "step": 4622 }, { "epoch": 15.384359400998337, "grad_norm": 13.582571029663086, "learning_rate": 5e-06, "loss": 0.4352, "num_input_tokens_seen": 289859264, "step": 4623 }, { "epoch": 15.384359400998337, "loss": 0.5640891790390015, "loss_ce": 2.2631013507634634e-06, "loss_iou": 0.236328125, "loss_num": 0.018310546875, "loss_xval": 0.5625, "num_input_tokens_seen": 289859264, "step": 4623 }, { "epoch": 15.387687188019967, "grad_norm": 8.066040992736816, "learning_rate": 5e-06, "loss": 0.377, "num_input_tokens_seen": 289921860, "step": 4624 }, { "epoch": 15.387687188019967, "loss": 0.22323819994926453, "loss_ce": 2.118132670148043e-06, "loss_iou": 0.07568359375, "loss_num": 0.01434326171875, "loss_xval": 0.2236328125, "num_input_tokens_seen": 289921860, "step": 4624 }, { "epoch": 15.391014975041598, "grad_norm": 27.713502883911133, "learning_rate": 5e-06, "loss": 0.4759, "num_input_tokens_seen": 289984852, "step": 4625 }, { "epoch": 15.391014975041598, "loss": 0.3892848491668701, "loss_ce": 2.6411412363813724e-06, "loss_iou": 0.15625, "loss_num": 0.01531982421875, "loss_xval": 0.388671875, "num_input_tokens_seen": 289984852, "step": 4625 }, { "epoch": 15.394342762063228, "grad_norm": 25.582962036132812, "learning_rate": 5e-06, "loss": 0.3856, "num_input_tokens_seen": 290048500, "step": 4626 }, { "epoch": 15.394342762063228, "loss": 0.31919753551483154, "loss_ce": 0.0006550746038556099, "loss_iou": 0.11279296875, "loss_num": 0.0186767578125, "loss_xval": 0.318359375, "num_input_tokens_seen": 290048500, "step": 4626 }, { "epoch": 15.397670549084859, "grad_norm": 37.981136322021484, "learning_rate": 5e-06, "loss": 0.352, "num_input_tokens_seen": 290109388, "step": 4627 }, { "epoch": 15.397670549084859, "loss": 0.13711994886398315, "loss_ce": 9.603313083061948e-05, "loss_iou": 0.0308837890625, "loss_num": 0.01507568359375, "loss_xval": 0.13671875, "num_input_tokens_seen": 290109388, "step": 4627 }, { "epoch": 15.40099833610649, "grad_norm": 36.5746955871582, "learning_rate": 5e-06, "loss": 0.503, "num_input_tokens_seen": 290170480, "step": 4628 }, { "epoch": 15.40099833610649, "loss": 0.6241487264633179, "loss_ce": 3.2677617127774283e-06, "loss_iou": 0.2197265625, "loss_num": 0.03662109375, "loss_xval": 0.625, "num_input_tokens_seen": 290170480, "step": 4628 }, { "epoch": 15.40432612312812, "grad_norm": 37.635841369628906, "learning_rate": 5e-06, "loss": 0.5871, "num_input_tokens_seen": 290231700, "step": 4629 }, { "epoch": 15.40432612312812, "loss": 0.6444717049598694, "loss_ce": 1.414128064425313e-06, "loss_iou": 0.25390625, "loss_num": 0.027587890625, "loss_xval": 0.64453125, "num_input_tokens_seen": 290231700, "step": 4629 }, { "epoch": 15.407653910149751, "grad_norm": 23.63850975036621, "learning_rate": 5e-06, "loss": 0.5051, "num_input_tokens_seen": 290293920, "step": 4630 }, { "epoch": 15.407653910149751, "loss": 0.2697855234146118, "loss_ce": 1.014315421343781e-05, "loss_iou": 0.08447265625, "loss_num": 0.0201416015625, "loss_xval": 0.26953125, "num_input_tokens_seen": 290293920, "step": 4630 }, { "epoch": 15.410981697171382, "grad_norm": 10.474287986755371, "learning_rate": 5e-06, "loss": 0.2837, "num_input_tokens_seen": 290356648, "step": 4631 }, { "epoch": 15.410981697171382, "loss": 0.21837802231311798, "loss_ce": 0.0016650703037157655, "loss_iou": 0.0791015625, "loss_num": 0.01177978515625, "loss_xval": 0.216796875, "num_input_tokens_seen": 290356648, "step": 4631 }, { "epoch": 15.414309484193012, "grad_norm": 13.374776840209961, "learning_rate": 5e-06, "loss": 0.5202, "num_input_tokens_seen": 290419536, "step": 4632 }, { "epoch": 15.414309484193012, "loss": 0.5755019187927246, "loss_ce": 1.4609720437874785e-06, "loss_iou": 0.2333984375, "loss_num": 0.0216064453125, "loss_xval": 0.57421875, "num_input_tokens_seen": 290419536, "step": 4632 }, { "epoch": 15.417637271214643, "grad_norm": 25.12147331237793, "learning_rate": 5e-06, "loss": 0.5613, "num_input_tokens_seen": 290481304, "step": 4633 }, { "epoch": 15.417637271214643, "loss": 0.605491042137146, "loss_ce": 2.230720201623626e-05, "loss_iou": 0.2353515625, "loss_num": 0.0272216796875, "loss_xval": 0.60546875, "num_input_tokens_seen": 290481304, "step": 4633 }, { "epoch": 15.420965058236273, "grad_norm": 16.244218826293945, "learning_rate": 5e-06, "loss": 0.4364, "num_input_tokens_seen": 290544564, "step": 4634 }, { "epoch": 15.420965058236273, "loss": 0.49995505809783936, "loss_ce": 1.6102505469461903e-05, "loss_iou": 0.2060546875, "loss_num": 0.017333984375, "loss_xval": 0.5, "num_input_tokens_seen": 290544564, "step": 4634 }, { "epoch": 15.424292845257904, "grad_norm": 8.300942420959473, "learning_rate": 5e-06, "loss": 0.3271, "num_input_tokens_seen": 290607428, "step": 4635 }, { "epoch": 15.424292845257904, "loss": 0.2690449357032776, "loss_ce": 1.9749297734961146e-06, "loss_iou": 0.07470703125, "loss_num": 0.02392578125, "loss_xval": 0.26953125, "num_input_tokens_seen": 290607428, "step": 4635 }, { "epoch": 15.427620632279535, "grad_norm": 9.840921401977539, "learning_rate": 5e-06, "loss": 0.5164, "num_input_tokens_seen": 290670752, "step": 4636 }, { "epoch": 15.427620632279535, "loss": 0.6389344334602356, "loss_ce": 1.8439701307215728e-05, "loss_iou": 0.2578125, "loss_num": 0.0244140625, "loss_xval": 0.640625, "num_input_tokens_seen": 290670752, "step": 4636 }, { "epoch": 15.430948419301165, "grad_norm": 24.048887252807617, "learning_rate": 5e-06, "loss": 0.4372, "num_input_tokens_seen": 290733032, "step": 4637 }, { "epoch": 15.430948419301165, "loss": 0.23984988033771515, "loss_ce": 0.0001648107572691515, "loss_iou": 0.08203125, "loss_num": 0.0150146484375, "loss_xval": 0.2392578125, "num_input_tokens_seen": 290733032, "step": 4637 }, { "epoch": 15.434276206322796, "grad_norm": 13.509739875793457, "learning_rate": 5e-06, "loss": 0.5625, "num_input_tokens_seen": 290795056, "step": 4638 }, { "epoch": 15.434276206322796, "loss": 0.7454675436019897, "loss_ce": 1.4647911484644283e-05, "loss_iou": 0.2412109375, "loss_num": 0.05224609375, "loss_xval": 0.74609375, "num_input_tokens_seen": 290795056, "step": 4638 }, { "epoch": 15.437603993344426, "grad_norm": 120.78231811523438, "learning_rate": 5e-06, "loss": 0.3602, "num_input_tokens_seen": 290858036, "step": 4639 }, { "epoch": 15.437603993344426, "loss": 0.22827580571174622, "loss_ce": 4.320701918913983e-06, "loss_iou": 0.09130859375, "loss_num": 0.00921630859375, "loss_xval": 0.228515625, "num_input_tokens_seen": 290858036, "step": 4639 }, { "epoch": 15.440931780366057, "grad_norm": 8.476497650146484, "learning_rate": 5e-06, "loss": 0.3813, "num_input_tokens_seen": 290919600, "step": 4640 }, { "epoch": 15.440931780366057, "loss": 0.3580329120159149, "loss_ce": 6.767580771338544e-07, "loss_iou": 0.125, "loss_num": 0.0216064453125, "loss_xval": 0.357421875, "num_input_tokens_seen": 290919600, "step": 4640 }, { "epoch": 15.444259567387688, "grad_norm": 13.13843059539795, "learning_rate": 5e-06, "loss": 0.4958, "num_input_tokens_seen": 290982612, "step": 4641 }, { "epoch": 15.444259567387688, "loss": 0.4605555534362793, "loss_ce": 4.5281944039743394e-05, "loss_iou": 0.2041015625, "loss_num": 0.01043701171875, "loss_xval": 0.4609375, "num_input_tokens_seen": 290982612, "step": 4641 }, { "epoch": 15.447587354409318, "grad_norm": 16.051185607910156, "learning_rate": 5e-06, "loss": 0.3923, "num_input_tokens_seen": 291045148, "step": 4642 }, { "epoch": 15.447587354409318, "loss": 0.35351788997650146, "loss_ce": 2.2722381345374743e-06, "loss_iou": 0.146484375, "loss_num": 0.0120849609375, "loss_xval": 0.353515625, "num_input_tokens_seen": 291045148, "step": 4642 }, { "epoch": 15.450915141430949, "grad_norm": 22.843399047851562, "learning_rate": 5e-06, "loss": 0.489, "num_input_tokens_seen": 291107684, "step": 4643 }, { "epoch": 15.450915141430949, "loss": 0.4906079173088074, "loss_ce": 6.834689702372998e-05, "loss_iou": 0.20703125, "loss_num": 0.01531982421875, "loss_xval": 0.490234375, "num_input_tokens_seen": 291107684, "step": 4643 }, { "epoch": 15.45424292845258, "grad_norm": 16.05107879638672, "learning_rate": 5e-06, "loss": 0.4752, "num_input_tokens_seen": 291170564, "step": 4644 }, { "epoch": 15.45424292845258, "loss": 0.4323166608810425, "loss_ce": 4.682584858528571e-06, "loss_iou": 0.166015625, "loss_num": 0.02001953125, "loss_xval": 0.431640625, "num_input_tokens_seen": 291170564, "step": 4644 }, { "epoch": 15.45757071547421, "grad_norm": 17.070064544677734, "learning_rate": 5e-06, "loss": 0.3012, "num_input_tokens_seen": 291234376, "step": 4645 }, { "epoch": 15.45757071547421, "loss": 0.27023571729660034, "loss_ce": 2.5835679480223916e-06, "loss_iou": 0.10986328125, "loss_num": 0.01019287109375, "loss_xval": 0.26953125, "num_input_tokens_seen": 291234376, "step": 4645 }, { "epoch": 15.46089850249584, "grad_norm": 30.759979248046875, "learning_rate": 5e-06, "loss": 0.5338, "num_input_tokens_seen": 291298304, "step": 4646 }, { "epoch": 15.46089850249584, "loss": 0.40026944875717163, "loss_ce": 9.010752819449408e-07, "loss_iou": 0.162109375, "loss_num": 0.01507568359375, "loss_xval": 0.400390625, "num_input_tokens_seen": 291298304, "step": 4646 }, { "epoch": 15.464226289517471, "grad_norm": 22.734485626220703, "learning_rate": 5e-06, "loss": 0.5006, "num_input_tokens_seen": 291360852, "step": 4647 }, { "epoch": 15.464226289517471, "loss": 0.5762366056442261, "loss_ce": 3.667661985673476e-06, "loss_iou": 0.236328125, "loss_num": 0.0208740234375, "loss_xval": 0.578125, "num_input_tokens_seen": 291360852, "step": 4647 }, { "epoch": 15.467554076539102, "grad_norm": 10.2783842086792, "learning_rate": 5e-06, "loss": 0.5197, "num_input_tokens_seen": 291423636, "step": 4648 }, { "epoch": 15.467554076539102, "loss": 0.539190948009491, "loss_ce": 6.375766133714933e-06, "loss_iou": 0.2138671875, "loss_num": 0.02197265625, "loss_xval": 0.5390625, "num_input_tokens_seen": 291423636, "step": 4648 }, { "epoch": 15.470881863560733, "grad_norm": 7.026730537414551, "learning_rate": 5e-06, "loss": 0.3549, "num_input_tokens_seen": 291485064, "step": 4649 }, { "epoch": 15.470881863560733, "loss": 0.2523987293243408, "loss_ce": 1.836389856180176e-05, "loss_iou": 0.09033203125, "loss_num": 0.014404296875, "loss_xval": 0.251953125, "num_input_tokens_seen": 291485064, "step": 4649 }, { "epoch": 15.474209650582363, "grad_norm": 11.29751968383789, "learning_rate": 5e-06, "loss": 0.4732, "num_input_tokens_seen": 291549680, "step": 4650 }, { "epoch": 15.474209650582363, "loss": 0.6521248817443848, "loss_ce": 0.0001473398006055504, "loss_iou": 0.26953125, "loss_num": 0.02294921875, "loss_xval": 0.65234375, "num_input_tokens_seen": 291549680, "step": 4650 }, { "epoch": 15.477537437603994, "grad_norm": 9.901208877563477, "learning_rate": 5e-06, "loss": 0.4707, "num_input_tokens_seen": 291611556, "step": 4651 }, { "epoch": 15.477537437603994, "loss": 0.30945318937301636, "loss_ce": 4.947806246491382e-06, "loss_iou": 0.10400390625, "loss_num": 0.0201416015625, "loss_xval": 0.30859375, "num_input_tokens_seen": 291611556, "step": 4651 }, { "epoch": 15.480865224625624, "grad_norm": 11.128103256225586, "learning_rate": 5e-06, "loss": 0.4991, "num_input_tokens_seen": 291672192, "step": 4652 }, { "epoch": 15.480865224625624, "loss": 0.7348965406417847, "loss_ce": 2.7707405934052076e-06, "loss_iou": 0.26171875, "loss_num": 0.0419921875, "loss_xval": 0.734375, "num_input_tokens_seen": 291672192, "step": 4652 }, { "epoch": 15.484193011647255, "grad_norm": 11.256495475769043, "learning_rate": 5e-06, "loss": 0.3788, "num_input_tokens_seen": 291734404, "step": 4653 }, { "epoch": 15.484193011647255, "loss": 0.43951499462127686, "loss_ce": 8.395798545279831e-07, "loss_iou": 0.193359375, "loss_num": 0.0107421875, "loss_xval": 0.439453125, "num_input_tokens_seen": 291734404, "step": 4653 }, { "epoch": 15.487520798668886, "grad_norm": 18.110658645629883, "learning_rate": 5e-06, "loss": 0.3118, "num_input_tokens_seen": 291796388, "step": 4654 }, { "epoch": 15.487520798668886, "loss": 0.4263976514339447, "loss_ce": 6.0581342040677555e-06, "loss_iou": 0.1630859375, "loss_num": 0.0198974609375, "loss_xval": 0.42578125, "num_input_tokens_seen": 291796388, "step": 4654 }, { "epoch": 15.490848585690516, "grad_norm": 16.36403465270996, "learning_rate": 5e-06, "loss": 0.6452, "num_input_tokens_seen": 291858824, "step": 4655 }, { "epoch": 15.490848585690516, "loss": 0.6425802111625671, "loss_ce": 2.082681021420285e-06, "loss_iou": 0.2373046875, "loss_num": 0.033447265625, "loss_xval": 0.640625, "num_input_tokens_seen": 291858824, "step": 4655 }, { "epoch": 15.494176372712147, "grad_norm": 18.44325828552246, "learning_rate": 5e-06, "loss": 0.5304, "num_input_tokens_seen": 291920504, "step": 4656 }, { "epoch": 15.494176372712147, "loss": 0.5397971868515015, "loss_ce": 2.223588580818614e-06, "loss_iou": 0.2216796875, "loss_num": 0.01904296875, "loss_xval": 0.5390625, "num_input_tokens_seen": 291920504, "step": 4656 }, { "epoch": 15.497504159733777, "grad_norm": 31.121217727661133, "learning_rate": 5e-06, "loss": 0.3109, "num_input_tokens_seen": 291982612, "step": 4657 }, { "epoch": 15.497504159733777, "loss": 0.2522483766078949, "loss_ce": 2.0594816305674613e-05, "loss_iou": 0.091796875, "loss_num": 0.01385498046875, "loss_xval": 0.251953125, "num_input_tokens_seen": 291982612, "step": 4657 }, { "epoch": 15.500831946755408, "grad_norm": 14.447250366210938, "learning_rate": 5e-06, "loss": 0.433, "num_input_tokens_seen": 292046248, "step": 4658 }, { "epoch": 15.500831946755408, "loss": 0.5285735130310059, "loss_ce": 9.058602699951734e-06, "loss_iou": 0.2255859375, "loss_num": 0.0155029296875, "loss_xval": 0.52734375, "num_input_tokens_seen": 292046248, "step": 4658 }, { "epoch": 15.504159733777039, "grad_norm": 18.706165313720703, "learning_rate": 5e-06, "loss": 0.3466, "num_input_tokens_seen": 292108036, "step": 4659 }, { "epoch": 15.504159733777039, "loss": 0.3569354712963104, "loss_ce": 1.8732782791630598e-06, "loss_iou": 0.154296875, "loss_num": 0.0096435546875, "loss_xval": 0.357421875, "num_input_tokens_seen": 292108036, "step": 4659 }, { "epoch": 15.50748752079867, "grad_norm": 24.29754066467285, "learning_rate": 5e-06, "loss": 0.4414, "num_input_tokens_seen": 292172288, "step": 4660 }, { "epoch": 15.50748752079867, "loss": 0.5383032560348511, "loss_ce": 9.530741954222322e-05, "loss_iou": 0.2216796875, "loss_num": 0.0189208984375, "loss_xval": 0.5390625, "num_input_tokens_seen": 292172288, "step": 4660 }, { "epoch": 15.5108153078203, "grad_norm": 23.17440414428711, "learning_rate": 5e-06, "loss": 0.4773, "num_input_tokens_seen": 292236520, "step": 4661 }, { "epoch": 15.5108153078203, "loss": 0.4831855893135071, "loss_ce": 7.659693324058026e-07, "loss_iou": 0.212890625, "loss_num": 0.011474609375, "loss_xval": 0.482421875, "num_input_tokens_seen": 292236520, "step": 4661 }, { "epoch": 15.51414309484193, "grad_norm": 21.153987884521484, "learning_rate": 5e-06, "loss": 0.4943, "num_input_tokens_seen": 292299764, "step": 4662 }, { "epoch": 15.51414309484193, "loss": 0.48871326446533203, "loss_ce": 4.77489174954826e-06, "loss_iou": 0.1748046875, "loss_num": 0.02783203125, "loss_xval": 0.48828125, "num_input_tokens_seen": 292299764, "step": 4662 }, { "epoch": 15.517470881863561, "grad_norm": 25.25882911682129, "learning_rate": 5e-06, "loss": 0.442, "num_input_tokens_seen": 292363792, "step": 4663 }, { "epoch": 15.517470881863561, "loss": 0.38969293236732483, "loss_ce": 4.448069375939667e-05, "loss_iou": 0.16796875, "loss_num": 0.0106201171875, "loss_xval": 0.390625, "num_input_tokens_seen": 292363792, "step": 4663 }, { "epoch": 15.520798668885192, "grad_norm": 12.109362602233887, "learning_rate": 5e-06, "loss": 0.4236, "num_input_tokens_seen": 292426324, "step": 4664 }, { "epoch": 15.520798668885192, "loss": 0.31924548745155334, "loss_ce": 1.1044210168620339e-06, "loss_iou": 0.08935546875, "loss_num": 0.028076171875, "loss_xval": 0.318359375, "num_input_tokens_seen": 292426324, "step": 4664 }, { "epoch": 15.524126455906822, "grad_norm": 7.233288764953613, "learning_rate": 5e-06, "loss": 0.5755, "num_input_tokens_seen": 292487428, "step": 4665 }, { "epoch": 15.524126455906822, "loss": 0.4920737147331238, "loss_ce": 8.302112291858066e-06, "loss_iou": 0.1826171875, "loss_num": 0.0252685546875, "loss_xval": 0.4921875, "num_input_tokens_seen": 292487428, "step": 4665 }, { "epoch": 15.527454242928453, "grad_norm": 27.67567253112793, "learning_rate": 5e-06, "loss": 0.4066, "num_input_tokens_seen": 292549936, "step": 4666 }, { "epoch": 15.527454242928453, "loss": 0.30762696266174316, "loss_ce": 2.1426521925604902e-06, "loss_iou": 0.12060546875, "loss_num": 0.0133056640625, "loss_xval": 0.30859375, "num_input_tokens_seen": 292549936, "step": 4666 }, { "epoch": 15.530782029950084, "grad_norm": 17.628591537475586, "learning_rate": 5e-06, "loss": 0.4467, "num_input_tokens_seen": 292612468, "step": 4667 }, { "epoch": 15.530782029950084, "loss": 0.3725593090057373, "loss_ce": 7.078766088852717e-07, "loss_iou": 0.123046875, "loss_num": 0.0255126953125, "loss_xval": 0.373046875, "num_input_tokens_seen": 292612468, "step": 4667 }, { "epoch": 15.534109816971714, "grad_norm": 10.085298538208008, "learning_rate": 5e-06, "loss": 0.3655, "num_input_tokens_seen": 292673416, "step": 4668 }, { "epoch": 15.534109816971714, "loss": 0.5480970740318298, "loss_ce": 1.3468522865878185e-06, "loss_iou": 0.1953125, "loss_num": 0.031494140625, "loss_xval": 0.546875, "num_input_tokens_seen": 292673416, "step": 4668 }, { "epoch": 15.537437603993345, "grad_norm": 8.916891098022461, "learning_rate": 5e-06, "loss": 0.5524, "num_input_tokens_seen": 292737356, "step": 4669 }, { "epoch": 15.537437603993345, "loss": 0.6737887263298035, "loss_ce": 5.978413173579611e-05, "loss_iou": 0.26171875, "loss_num": 0.02978515625, "loss_xval": 0.671875, "num_input_tokens_seen": 292737356, "step": 4669 }, { "epoch": 15.540765391014975, "grad_norm": 7.334640979766846, "learning_rate": 5e-06, "loss": 0.3445, "num_input_tokens_seen": 292800216, "step": 4670 }, { "epoch": 15.540765391014975, "loss": 0.42114517092704773, "loss_ce": 2.593459612398874e-06, "loss_iou": 0.1904296875, "loss_num": 0.00811767578125, "loss_xval": 0.421875, "num_input_tokens_seen": 292800216, "step": 4670 }, { "epoch": 15.544093178036606, "grad_norm": 24.948543548583984, "learning_rate": 5e-06, "loss": 0.6475, "num_input_tokens_seen": 292864388, "step": 4671 }, { "epoch": 15.544093178036606, "loss": 0.6406274437904358, "loss_ce": 2.4240971470135264e-06, "loss_iou": 0.26953125, "loss_num": 0.019775390625, "loss_xval": 0.640625, "num_input_tokens_seen": 292864388, "step": 4671 }, { "epoch": 15.547420965058237, "grad_norm": 7.732693195343018, "learning_rate": 5e-06, "loss": 0.3702, "num_input_tokens_seen": 292925116, "step": 4672 }, { "epoch": 15.547420965058237, "loss": 0.36349642276763916, "loss_ce": 1.5466713421119493e-06, "loss_iou": 0.1259765625, "loss_num": 0.0224609375, "loss_xval": 0.36328125, "num_input_tokens_seen": 292925116, "step": 4672 }, { "epoch": 15.550748752079867, "grad_norm": 11.683064460754395, "learning_rate": 5e-06, "loss": 0.3193, "num_input_tokens_seen": 292988136, "step": 4673 }, { "epoch": 15.550748752079867, "loss": 0.2990120053291321, "loss_ce": 7.89676903423242e-07, "loss_iou": 0.11962890625, "loss_num": 0.0118408203125, "loss_xval": 0.298828125, "num_input_tokens_seen": 292988136, "step": 4673 }, { "epoch": 15.554076539101498, "grad_norm": 21.177091598510742, "learning_rate": 5e-06, "loss": 0.6736, "num_input_tokens_seen": 293052188, "step": 4674 }, { "epoch": 15.554076539101498, "loss": 0.7539273500442505, "loss_ce": 2.105686508002691e-05, "loss_iou": 0.32421875, "loss_num": 0.0211181640625, "loss_xval": 0.75390625, "num_input_tokens_seen": 293052188, "step": 4674 }, { "epoch": 15.557404326123129, "grad_norm": 16.56670570373535, "learning_rate": 5e-06, "loss": 0.4601, "num_input_tokens_seen": 293115164, "step": 4675 }, { "epoch": 15.557404326123129, "loss": 0.41116729378700256, "loss_ce": 0.0001870664127636701, "loss_iou": 0.138671875, "loss_num": 0.0267333984375, "loss_xval": 0.41015625, "num_input_tokens_seen": 293115164, "step": 4675 }, { "epoch": 15.56073211314476, "grad_norm": 8.687105178833008, "learning_rate": 5e-06, "loss": 0.451, "num_input_tokens_seen": 293177188, "step": 4676 }, { "epoch": 15.56073211314476, "loss": 0.5467841625213623, "loss_ce": 0.0002143154852092266, "loss_iou": 0.2119140625, "loss_num": 0.0244140625, "loss_xval": 0.546875, "num_input_tokens_seen": 293177188, "step": 4676 }, { "epoch": 15.56405990016639, "grad_norm": 7.787600994110107, "learning_rate": 5e-06, "loss": 0.3921, "num_input_tokens_seen": 293239680, "step": 4677 }, { "epoch": 15.56405990016639, "loss": 0.46110308170318604, "loss_ce": 1.299084397032857e-05, "loss_iou": 0.1787109375, "loss_num": 0.0208740234375, "loss_xval": 0.4609375, "num_input_tokens_seen": 293239680, "step": 4677 }, { "epoch": 15.56738768718802, "grad_norm": 33.44221878051758, "learning_rate": 5e-06, "loss": 0.4551, "num_input_tokens_seen": 293302376, "step": 4678 }, { "epoch": 15.56738768718802, "loss": 0.5790234804153442, "loss_ce": 4.396074655232951e-05, "loss_iou": 0.228515625, "loss_num": 0.02392578125, "loss_xval": 0.578125, "num_input_tokens_seen": 293302376, "step": 4678 }, { "epoch": 15.570715474209651, "grad_norm": 32.514495849609375, "learning_rate": 5e-06, "loss": 0.5665, "num_input_tokens_seen": 293365336, "step": 4679 }, { "epoch": 15.570715474209651, "loss": 0.6757228374481201, "loss_ce": 2.5795102374104317e-06, "loss_iou": 0.267578125, "loss_num": 0.0284423828125, "loss_xval": 0.67578125, "num_input_tokens_seen": 293365336, "step": 4679 }, { "epoch": 15.574043261231282, "grad_norm": 10.580674171447754, "learning_rate": 5e-06, "loss": 0.4878, "num_input_tokens_seen": 293427248, "step": 4680 }, { "epoch": 15.574043261231282, "loss": 0.37825414538383484, "loss_ce": 0.00011085709411418065, "loss_iou": 0.11962890625, "loss_num": 0.0277099609375, "loss_xval": 0.37890625, "num_input_tokens_seen": 293427248, "step": 4680 }, { "epoch": 15.577371048252912, "grad_norm": 11.98954963684082, "learning_rate": 5e-06, "loss": 0.4911, "num_input_tokens_seen": 293490880, "step": 4681 }, { "epoch": 15.577371048252912, "loss": 0.5353068113327026, "loss_ce": 1.3255478734208737e-05, "loss_iou": 0.2109375, "loss_num": 0.0225830078125, "loss_xval": 0.53515625, "num_input_tokens_seen": 293490880, "step": 4681 }, { "epoch": 15.580698835274543, "grad_norm": 8.103081703186035, "learning_rate": 5e-06, "loss": 0.4906, "num_input_tokens_seen": 293553488, "step": 4682 }, { "epoch": 15.580698835274543, "loss": 0.4344519376754761, "loss_ce": 3.725479018612532e-06, "loss_iou": 0.15234375, "loss_num": 0.0257568359375, "loss_xval": 0.43359375, "num_input_tokens_seen": 293553488, "step": 4682 }, { "epoch": 15.584026622296173, "grad_norm": 9.111823081970215, "learning_rate": 5e-06, "loss": 0.3214, "num_input_tokens_seen": 293615196, "step": 4683 }, { "epoch": 15.584026622296173, "loss": 0.3397538959980011, "loss_ce": 9.32622206164524e-05, "loss_iou": 0.1142578125, "loss_num": 0.022216796875, "loss_xval": 0.33984375, "num_input_tokens_seen": 293615196, "step": 4683 }, { "epoch": 15.587354409317804, "grad_norm": 9.743192672729492, "learning_rate": 5e-06, "loss": 0.3851, "num_input_tokens_seen": 293677044, "step": 4684 }, { "epoch": 15.587354409317804, "loss": 0.5540828704833984, "loss_ce": 6.675184704363346e-05, "loss_iou": 0.2294921875, "loss_num": 0.0189208984375, "loss_xval": 0.5546875, "num_input_tokens_seen": 293677044, "step": 4684 }, { "epoch": 15.590682196339435, "grad_norm": 15.232601165771484, "learning_rate": 5e-06, "loss": 0.2771, "num_input_tokens_seen": 293739384, "step": 4685 }, { "epoch": 15.590682196339435, "loss": 0.3478104770183563, "loss_ce": 1.6310265209540376e-06, "loss_iou": 0.13671875, "loss_num": 0.0147705078125, "loss_xval": 0.34765625, "num_input_tokens_seen": 293739384, "step": 4685 }, { "epoch": 15.594009983361065, "grad_norm": 30.944406509399414, "learning_rate": 5e-06, "loss": 0.4162, "num_input_tokens_seen": 293801600, "step": 4686 }, { "epoch": 15.594009983361065, "loss": 0.6131649017333984, "loss_ce": 5.646686531690648e-06, "loss_iou": 0.236328125, "loss_num": 0.028076171875, "loss_xval": 0.61328125, "num_input_tokens_seen": 293801600, "step": 4686 }, { "epoch": 15.597337770382696, "grad_norm": 26.7600040435791, "learning_rate": 5e-06, "loss": 0.58, "num_input_tokens_seen": 293865056, "step": 4687 }, { "epoch": 15.597337770382696, "loss": 0.5910053253173828, "loss_ce": 1.925290007420699e-06, "loss_iou": 0.228515625, "loss_num": 0.0269775390625, "loss_xval": 0.58984375, "num_input_tokens_seen": 293865056, "step": 4687 }, { "epoch": 15.600665557404326, "grad_norm": 6.127683162689209, "learning_rate": 5e-06, "loss": 0.2954, "num_input_tokens_seen": 293925368, "step": 4688 }, { "epoch": 15.600665557404326, "loss": 0.3646259605884552, "loss_ce": 1.957647782546701e-06, "loss_iou": 0.125, "loss_num": 0.0228271484375, "loss_xval": 0.365234375, "num_input_tokens_seen": 293925368, "step": 4688 }, { "epoch": 15.603993344425957, "grad_norm": 5.455382823944092, "learning_rate": 5e-06, "loss": 0.2074, "num_input_tokens_seen": 293987860, "step": 4689 }, { "epoch": 15.603993344425957, "loss": 0.17163345217704773, "loss_ce": 2.5969579837692436e-06, "loss_iou": 0.058349609375, "loss_num": 0.010986328125, "loss_xval": 0.171875, "num_input_tokens_seen": 293987860, "step": 4689 }, { "epoch": 15.607321131447588, "grad_norm": 15.402108192443848, "learning_rate": 5e-06, "loss": 0.4745, "num_input_tokens_seen": 294051320, "step": 4690 }, { "epoch": 15.607321131447588, "loss": 0.5974264144897461, "loss_ce": 1.4284895769378636e-05, "loss_iou": 0.2294921875, "loss_num": 0.0279541015625, "loss_xval": 0.59765625, "num_input_tokens_seen": 294051320, "step": 4690 }, { "epoch": 15.610648918469218, "grad_norm": 27.00069808959961, "learning_rate": 5e-06, "loss": 0.443, "num_input_tokens_seen": 294115456, "step": 4691 }, { "epoch": 15.610648918469218, "loss": 0.6201558113098145, "loss_ce": 3.858868876704946e-05, "loss_iou": 0.251953125, "loss_num": 0.0230712890625, "loss_xval": 0.62109375, "num_input_tokens_seen": 294115456, "step": 4691 }, { "epoch": 15.613976705490849, "grad_norm": 24.46932601928711, "learning_rate": 5e-06, "loss": 0.3145, "num_input_tokens_seen": 294178296, "step": 4692 }, { "epoch": 15.613976705490849, "loss": 0.43750184774398804, "loss_ce": 1.8690425349632278e-06, "loss_iou": 0.16015625, "loss_num": 0.0233154296875, "loss_xval": 0.4375, "num_input_tokens_seen": 294178296, "step": 4692 }, { "epoch": 15.61730449251248, "grad_norm": 16.666397094726562, "learning_rate": 5e-06, "loss": 0.4289, "num_input_tokens_seen": 294240296, "step": 4693 }, { "epoch": 15.61730449251248, "loss": 0.4185827970504761, "loss_ce": 3.6777723835257348e-06, "loss_iou": 0.1572265625, "loss_num": 0.02099609375, "loss_xval": 0.41796875, "num_input_tokens_seen": 294240296, "step": 4693 }, { "epoch": 15.62063227953411, "grad_norm": 17.273990631103516, "learning_rate": 5e-06, "loss": 0.3711, "num_input_tokens_seen": 294304076, "step": 4694 }, { "epoch": 15.62063227953411, "loss": 0.32410770654678345, "loss_ce": 1.1026867468899582e-05, "loss_iou": 0.11328125, "loss_num": 0.0196533203125, "loss_xval": 0.32421875, "num_input_tokens_seen": 294304076, "step": 4694 }, { "epoch": 15.62396006655574, "grad_norm": 18.360397338867188, "learning_rate": 5e-06, "loss": 0.4959, "num_input_tokens_seen": 294367116, "step": 4695 }, { "epoch": 15.62396006655574, "loss": 0.4484930634498596, "loss_ce": 6.75817318551708e-06, "loss_iou": 0.18359375, "loss_num": 0.0159912109375, "loss_xval": 0.44921875, "num_input_tokens_seen": 294367116, "step": 4695 }, { "epoch": 15.627287853577371, "grad_norm": 24.195575714111328, "learning_rate": 5e-06, "loss": 0.3972, "num_input_tokens_seen": 294430372, "step": 4696 }, { "epoch": 15.627287853577371, "loss": 0.42067569494247437, "loss_ce": 2.141589175153058e-05, "loss_iou": 0.1533203125, "loss_num": 0.0225830078125, "loss_xval": 0.419921875, "num_input_tokens_seen": 294430372, "step": 4696 }, { "epoch": 15.630615640599002, "grad_norm": 30.780113220214844, "learning_rate": 5e-06, "loss": 0.3006, "num_input_tokens_seen": 294492604, "step": 4697 }, { "epoch": 15.630615640599002, "loss": 0.3325938582420349, "loss_ce": 4.380011523608118e-05, "loss_iou": 0.11376953125, "loss_num": 0.02099609375, "loss_xval": 0.33203125, "num_input_tokens_seen": 294492604, "step": 4697 }, { "epoch": 15.633943427620633, "grad_norm": 26.659025192260742, "learning_rate": 5e-06, "loss": 0.3397, "num_input_tokens_seen": 294556168, "step": 4698 }, { "epoch": 15.633943427620633, "loss": 0.41180652379989624, "loss_ce": 0.0006737185176461935, "loss_iou": 0.1806640625, "loss_num": 0.0101318359375, "loss_xval": 0.41015625, "num_input_tokens_seen": 294556168, "step": 4698 }, { "epoch": 15.637271214642263, "grad_norm": 21.95252799987793, "learning_rate": 5e-06, "loss": 0.3089, "num_input_tokens_seen": 294619664, "step": 4699 }, { "epoch": 15.637271214642263, "loss": 0.3047538697719574, "loss_ce": 5.344056717149215e-06, "loss_iou": 0.1015625, "loss_num": 0.0203857421875, "loss_xval": 0.3046875, "num_input_tokens_seen": 294619664, "step": 4699 }, { "epoch": 15.640599001663894, "grad_norm": 23.80196762084961, "learning_rate": 5e-06, "loss": 0.5456, "num_input_tokens_seen": 294682428, "step": 4700 }, { "epoch": 15.640599001663894, "loss": 0.5537309646606445, "loss_ce": 2.0025974663440138e-05, "loss_iou": 0.2333984375, "loss_num": 0.0174560546875, "loss_xval": 0.5546875, "num_input_tokens_seen": 294682428, "step": 4700 }, { "epoch": 15.643926788685524, "grad_norm": 22.51323127746582, "learning_rate": 5e-06, "loss": 0.5695, "num_input_tokens_seen": 294746280, "step": 4701 }, { "epoch": 15.643926788685524, "loss": 0.603613018989563, "loss_ce": 0.0002194869302911684, "loss_iou": 0.2578125, "loss_num": 0.0179443359375, "loss_xval": 0.6015625, "num_input_tokens_seen": 294746280, "step": 4701 }, { "epoch": 15.647254575707155, "grad_norm": 24.66767692565918, "learning_rate": 5e-06, "loss": 0.4755, "num_input_tokens_seen": 294808800, "step": 4702 }, { "epoch": 15.647254575707155, "loss": 0.4858284592628479, "loss_ce": 1.9137458366458304e-05, "loss_iou": 0.212890625, "loss_num": 0.011962890625, "loss_xval": 0.486328125, "num_input_tokens_seen": 294808800, "step": 4702 }, { "epoch": 15.650582362728786, "grad_norm": 22.89531135559082, "learning_rate": 5e-06, "loss": 0.4389, "num_input_tokens_seen": 294872564, "step": 4703 }, { "epoch": 15.650582362728786, "loss": 0.47850683331489563, "loss_ce": 0.00023536001390311867, "loss_iou": 0.1982421875, "loss_num": 0.0162353515625, "loss_xval": 0.478515625, "num_input_tokens_seen": 294872564, "step": 4703 }, { "epoch": 15.653910149750416, "grad_norm": 23.247591018676758, "learning_rate": 5e-06, "loss": 0.4233, "num_input_tokens_seen": 294936212, "step": 4704 }, { "epoch": 15.653910149750416, "loss": 0.39453309774398804, "loss_ce": 1.8573462057247525e-06, "loss_iou": 0.1689453125, "loss_num": 0.01123046875, "loss_xval": 0.39453125, "num_input_tokens_seen": 294936212, "step": 4704 }, { "epoch": 15.657237936772047, "grad_norm": 18.120298385620117, "learning_rate": 5e-06, "loss": 0.5065, "num_input_tokens_seen": 294999060, "step": 4705 }, { "epoch": 15.657237936772047, "loss": 0.5571354627609253, "loss_ce": 6.5627482399577275e-06, "loss_iou": 0.2265625, "loss_num": 0.0206298828125, "loss_xval": 0.55859375, "num_input_tokens_seen": 294999060, "step": 4705 }, { "epoch": 15.660565723793678, "grad_norm": 12.13640022277832, "learning_rate": 5e-06, "loss": 0.4816, "num_input_tokens_seen": 295062120, "step": 4706 }, { "epoch": 15.660565723793678, "loss": 0.578525960445404, "loss_ce": 3.4746593883028254e-05, "loss_iou": 0.2265625, "loss_num": 0.0252685546875, "loss_xval": 0.578125, "num_input_tokens_seen": 295062120, "step": 4706 }, { "epoch": 15.663893510815308, "grad_norm": 8.907550811767578, "learning_rate": 5e-06, "loss": 0.3469, "num_input_tokens_seen": 295124156, "step": 4707 }, { "epoch": 15.663893510815308, "loss": 0.1961851567029953, "loss_ce": 2.906122517742915e-06, "loss_iou": 0.07666015625, "loss_num": 0.00860595703125, "loss_xval": 0.1962890625, "num_input_tokens_seen": 295124156, "step": 4707 }, { "epoch": 15.667221297836939, "grad_norm": 12.01865005493164, "learning_rate": 5e-06, "loss": 0.5699, "num_input_tokens_seen": 295187104, "step": 4708 }, { "epoch": 15.667221297836939, "loss": 0.29379087686538696, "loss_ce": 0.00033386453287675977, "loss_iou": 0.115234375, "loss_num": 0.01263427734375, "loss_xval": 0.29296875, "num_input_tokens_seen": 295187104, "step": 4708 }, { "epoch": 15.67054908485857, "grad_norm": 11.482641220092773, "learning_rate": 5e-06, "loss": 0.2323, "num_input_tokens_seen": 295248264, "step": 4709 }, { "epoch": 15.67054908485857, "loss": 0.28257083892822266, "loss_ce": 0.00022220669779926538, "loss_iou": 0.111328125, "loss_num": 0.011962890625, "loss_xval": 0.283203125, "num_input_tokens_seen": 295248264, "step": 4709 }, { "epoch": 15.6738768718802, "grad_norm": 31.124792098999023, "learning_rate": 5e-06, "loss": 0.3383, "num_input_tokens_seen": 295310460, "step": 4710 }, { "epoch": 15.6738768718802, "loss": 0.433901309967041, "loss_ce": 2.401079655101057e-06, "loss_iou": 0.1376953125, "loss_num": 0.031494140625, "loss_xval": 0.43359375, "num_input_tokens_seen": 295310460, "step": 4710 }, { "epoch": 15.67720465890183, "grad_norm": 8.12982177734375, "learning_rate": 5e-06, "loss": 0.489, "num_input_tokens_seen": 295372388, "step": 4711 }, { "epoch": 15.67720465890183, "loss": 0.40455490350723267, "loss_ce": 1.387465999869164e-05, "loss_iou": 0.166015625, "loss_num": 0.01446533203125, "loss_xval": 0.404296875, "num_input_tokens_seen": 295372388, "step": 4711 }, { "epoch": 15.680532445923461, "grad_norm": 15.505697250366211, "learning_rate": 5e-06, "loss": 0.4931, "num_input_tokens_seen": 295436156, "step": 4712 }, { "epoch": 15.680532445923461, "loss": 0.5461620092391968, "loss_ce": 0.00011098339746240526, "loss_iou": 0.220703125, "loss_num": 0.0208740234375, "loss_xval": 0.546875, "num_input_tokens_seen": 295436156, "step": 4712 }, { "epoch": 15.683860232945092, "grad_norm": 17.917030334472656, "learning_rate": 5e-06, "loss": 0.3724, "num_input_tokens_seen": 295498508, "step": 4713 }, { "epoch": 15.683860232945092, "loss": 0.4445043206214905, "loss_ce": 5.470140536090184e-07, "loss_iou": 0.185546875, "loss_num": 0.01458740234375, "loss_xval": 0.4453125, "num_input_tokens_seen": 295498508, "step": 4713 }, { "epoch": 15.687188019966722, "grad_norm": 5.930410385131836, "learning_rate": 5e-06, "loss": 0.2983, "num_input_tokens_seen": 295560208, "step": 4714 }, { "epoch": 15.687188019966722, "loss": 0.4585585594177246, "loss_ce": 0.00018455248209647834, "loss_iou": 0.1982421875, "loss_num": 0.012451171875, "loss_xval": 0.458984375, "num_input_tokens_seen": 295560208, "step": 4714 }, { "epoch": 15.690515806988353, "grad_norm": 20.885374069213867, "learning_rate": 5e-06, "loss": 0.5414, "num_input_tokens_seen": 295623648, "step": 4715 }, { "epoch": 15.690515806988353, "loss": 0.6073014140129089, "loss_ce": 1.5965588318067603e-06, "loss_iou": 0.22265625, "loss_num": 0.032470703125, "loss_xval": 0.60546875, "num_input_tokens_seen": 295623648, "step": 4715 }, { "epoch": 15.693843594009984, "grad_norm": 18.91248321533203, "learning_rate": 5e-06, "loss": 0.4476, "num_input_tokens_seen": 295685656, "step": 4716 }, { "epoch": 15.693843594009984, "loss": 0.15765415132045746, "loss_ce": 3.45290970926726e-07, "loss_iou": 0.040771484375, "loss_num": 0.0152587890625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 295685656, "step": 4716 }, { "epoch": 15.697171381031614, "grad_norm": 5.66803503036499, "learning_rate": 5e-06, "loss": 0.347, "num_input_tokens_seen": 295749352, "step": 4717 }, { "epoch": 15.697171381031614, "loss": 0.3472955524921417, "loss_ce": 5.505349690793082e-06, "loss_iou": 0.138671875, "loss_num": 0.01409912109375, "loss_xval": 0.34765625, "num_input_tokens_seen": 295749352, "step": 4717 }, { "epoch": 15.700499168053245, "grad_norm": 22.33350944519043, "learning_rate": 5e-06, "loss": 0.3897, "num_input_tokens_seen": 295813092, "step": 4718 }, { "epoch": 15.700499168053245, "loss": 0.40518391132354736, "loss_ce": 2.036331352428533e-06, "loss_iou": 0.1640625, "loss_num": 0.01544189453125, "loss_xval": 0.404296875, "num_input_tokens_seen": 295813092, "step": 4718 }, { "epoch": 15.703826955074875, "grad_norm": 10.96514892578125, "learning_rate": 5e-06, "loss": 0.4276, "num_input_tokens_seen": 295875132, "step": 4719 }, { "epoch": 15.703826955074875, "loss": 0.42096155881881714, "loss_ce": 2.105304929500562e-06, "loss_iou": 0.1455078125, "loss_num": 0.0260009765625, "loss_xval": 0.421875, "num_input_tokens_seen": 295875132, "step": 4719 }, { "epoch": 15.707154742096506, "grad_norm": 10.210363388061523, "learning_rate": 5e-06, "loss": 0.3418, "num_input_tokens_seen": 295937900, "step": 4720 }, { "epoch": 15.707154742096506, "loss": 0.40729397535324097, "loss_ce": 6.376930286933202e-06, "loss_iou": 0.125, "loss_num": 0.031494140625, "loss_xval": 0.408203125, "num_input_tokens_seen": 295937900, "step": 4720 }, { "epoch": 15.710482529118137, "grad_norm": 10.229593276977539, "learning_rate": 5e-06, "loss": 0.3862, "num_input_tokens_seen": 295999728, "step": 4721 }, { "epoch": 15.710482529118137, "loss": 0.30780917406082153, "loss_ce": 0.0003750808828044683, "loss_iou": 0.1142578125, "loss_num": 0.015869140625, "loss_xval": 0.306640625, "num_input_tokens_seen": 295999728, "step": 4721 }, { "epoch": 15.713810316139767, "grad_norm": 8.67190170288086, "learning_rate": 5e-06, "loss": 0.5099, "num_input_tokens_seen": 296062380, "step": 4722 }, { "epoch": 15.713810316139767, "loss": 0.49365371465682983, "loss_ce": 1.3536918004319887e-06, "loss_iou": 0.203125, "loss_num": 0.0174560546875, "loss_xval": 0.494140625, "num_input_tokens_seen": 296062380, "step": 4722 }, { "epoch": 15.717138103161398, "grad_norm": 12.108484268188477, "learning_rate": 5e-06, "loss": 0.4608, "num_input_tokens_seen": 296126068, "step": 4723 }, { "epoch": 15.717138103161398, "loss": 0.2628180682659149, "loss_ce": 6.783048434044758e-07, "loss_iou": 0.0966796875, "loss_num": 0.0137939453125, "loss_xval": 0.263671875, "num_input_tokens_seen": 296126068, "step": 4723 }, { "epoch": 15.720465890183029, "grad_norm": 29.011367797851562, "learning_rate": 5e-06, "loss": 0.6355, "num_input_tokens_seen": 296189140, "step": 4724 }, { "epoch": 15.720465890183029, "loss": 0.4321460723876953, "loss_ce": 1.7174723325297236e-05, "loss_iou": 0.1767578125, "loss_num": 0.0157470703125, "loss_xval": 0.431640625, "num_input_tokens_seen": 296189140, "step": 4724 }, { "epoch": 15.72379367720466, "grad_norm": 16.084138870239258, "learning_rate": 5e-06, "loss": 0.419, "num_input_tokens_seen": 296253028, "step": 4725 }, { "epoch": 15.72379367720466, "loss": 0.47571322321891785, "loss_ce": 5.216907084104605e-06, "loss_iou": 0.193359375, "loss_num": 0.017822265625, "loss_xval": 0.4765625, "num_input_tokens_seen": 296253028, "step": 4725 }, { "epoch": 15.72712146422629, "grad_norm": 22.678302764892578, "learning_rate": 5e-06, "loss": 0.4113, "num_input_tokens_seen": 296315276, "step": 4726 }, { "epoch": 15.72712146422629, "loss": 0.4160776436328888, "loss_ce": 1.0070656344396411e-06, "loss_iou": 0.126953125, "loss_num": 0.032470703125, "loss_xval": 0.416015625, "num_input_tokens_seen": 296315276, "step": 4726 }, { "epoch": 15.73044925124792, "grad_norm": 26.09799575805664, "learning_rate": 5e-06, "loss": 0.5222, "num_input_tokens_seen": 296378168, "step": 4727 }, { "epoch": 15.73044925124792, "loss": 0.6222028732299805, "loss_ce": 1.0520398063817993e-05, "loss_iou": 0.1943359375, "loss_num": 0.046875, "loss_xval": 0.62109375, "num_input_tokens_seen": 296378168, "step": 4727 }, { "epoch": 15.733777038269551, "grad_norm": 13.492410659790039, "learning_rate": 5e-06, "loss": 0.4865, "num_input_tokens_seen": 296437764, "step": 4728 }, { "epoch": 15.733777038269551, "loss": 0.5535293817520142, "loss_ce": 1.5146810028454638e-06, "loss_iou": 0.2109375, "loss_num": 0.0262451171875, "loss_xval": 0.5546875, "num_input_tokens_seen": 296437764, "step": 4728 }, { "epoch": 15.737104825291182, "grad_norm": 10.783493041992188, "learning_rate": 5e-06, "loss": 0.5151, "num_input_tokens_seen": 296501068, "step": 4729 }, { "epoch": 15.737104825291182, "loss": 0.48302161693573, "loss_ce": 0.00023353857977781445, "loss_iou": 0.19921875, "loss_num": 0.0166015625, "loss_xval": 0.482421875, "num_input_tokens_seen": 296501068, "step": 4729 }, { "epoch": 15.740432612312812, "grad_norm": 9.47224235534668, "learning_rate": 5e-06, "loss": 0.3375, "num_input_tokens_seen": 296563200, "step": 4730 }, { "epoch": 15.740432612312812, "loss": 0.28687459230422974, "loss_ce": 9.370046427648049e-06, "loss_iou": 0.1142578125, "loss_num": 0.01165771484375, "loss_xval": 0.287109375, "num_input_tokens_seen": 296563200, "step": 4730 }, { "epoch": 15.743760399334443, "grad_norm": 9.544328689575195, "learning_rate": 5e-06, "loss": 0.3789, "num_input_tokens_seen": 296625352, "step": 4731 }, { "epoch": 15.743760399334443, "loss": 0.499088317155838, "loss_ce": 3.840708359348355e-06, "loss_iou": 0.1845703125, "loss_num": 0.0260009765625, "loss_xval": 0.5, "num_input_tokens_seen": 296625352, "step": 4731 }, { "epoch": 15.747088186356073, "grad_norm": 14.19858455657959, "learning_rate": 5e-06, "loss": 0.3286, "num_input_tokens_seen": 296687404, "step": 4732 }, { "epoch": 15.747088186356073, "loss": 0.3892841339111328, "loss_ce": 1.893824673970812e-06, "loss_iou": 0.1572265625, "loss_num": 0.01513671875, "loss_xval": 0.388671875, "num_input_tokens_seen": 296687404, "step": 4732 }, { "epoch": 15.750415973377704, "grad_norm": 10.497945785522461, "learning_rate": 5e-06, "loss": 0.3926, "num_input_tokens_seen": 296747336, "step": 4733 }, { "epoch": 15.750415973377704, "loss": 0.2391367256641388, "loss_ce": 9.799084637052147e-07, "loss_iou": 0.06640625, "loss_num": 0.021240234375, "loss_xval": 0.2392578125, "num_input_tokens_seen": 296747336, "step": 4733 }, { "epoch": 15.753743760399335, "grad_norm": 11.708738327026367, "learning_rate": 5e-06, "loss": 0.3105, "num_input_tokens_seen": 296808956, "step": 4734 }, { "epoch": 15.753743760399335, "loss": 0.269550621509552, "loss_ce": 4.106642336410005e-06, "loss_iou": 0.080078125, "loss_num": 0.0218505859375, "loss_xval": 0.26953125, "num_input_tokens_seen": 296808956, "step": 4734 }, { "epoch": 15.757071547420965, "grad_norm": 30.85634994506836, "learning_rate": 5e-06, "loss": 0.5716, "num_input_tokens_seen": 296870056, "step": 4735 }, { "epoch": 15.757071547420965, "loss": 0.7861958146095276, "loss_ce": 1.949959141711588e-06, "loss_iou": 0.328125, "loss_num": 0.026123046875, "loss_xval": 0.78515625, "num_input_tokens_seen": 296870056, "step": 4735 }, { "epoch": 15.760399334442596, "grad_norm": 36.776126861572266, "learning_rate": 5e-06, "loss": 0.5211, "num_input_tokens_seen": 296932428, "step": 4736 }, { "epoch": 15.760399334442596, "loss": 0.3291773200035095, "loss_ce": 1.469330436520977e-05, "loss_iou": 0.12109375, "loss_num": 0.0174560546875, "loss_xval": 0.330078125, "num_input_tokens_seen": 296932428, "step": 4736 }, { "epoch": 15.763727121464226, "grad_norm": 17.54241180419922, "learning_rate": 5e-06, "loss": 0.4095, "num_input_tokens_seen": 296995144, "step": 4737 }, { "epoch": 15.763727121464226, "loss": 0.5247822403907776, "loss_ce": 1.9379835975996684e-06, "loss_iou": 0.208984375, "loss_num": 0.021240234375, "loss_xval": 0.5234375, "num_input_tokens_seen": 296995144, "step": 4737 }, { "epoch": 15.767054908485857, "grad_norm": 12.120732307434082, "learning_rate": 5e-06, "loss": 0.304, "num_input_tokens_seen": 297057340, "step": 4738 }, { "epoch": 15.767054908485857, "loss": 0.25024908781051636, "loss_ce": 0.0009204863454215229, "loss_iou": 0.0869140625, "loss_num": 0.01507568359375, "loss_xval": 0.2490234375, "num_input_tokens_seen": 297057340, "step": 4738 }, { "epoch": 15.770382695507488, "grad_norm": 15.528348922729492, "learning_rate": 5e-06, "loss": 0.4694, "num_input_tokens_seen": 297119860, "step": 4739 }, { "epoch": 15.770382695507488, "loss": 0.5127941370010376, "loss_ce": 0.00017517601372674108, "loss_iou": 0.1962890625, "loss_num": 0.0240478515625, "loss_xval": 0.51171875, "num_input_tokens_seen": 297119860, "step": 4739 }, { "epoch": 15.773710482529118, "grad_norm": 8.232756614685059, "learning_rate": 5e-06, "loss": 0.4906, "num_input_tokens_seen": 297181920, "step": 4740 }, { "epoch": 15.773710482529118, "loss": 0.4018896520137787, "loss_ce": 3.6670896861323854e-06, "loss_iou": 0.125, "loss_num": 0.0303955078125, "loss_xval": 0.40234375, "num_input_tokens_seen": 297181920, "step": 4740 }, { "epoch": 15.777038269550749, "grad_norm": 8.439435005187988, "learning_rate": 5e-06, "loss": 0.4634, "num_input_tokens_seen": 297246108, "step": 4741 }, { "epoch": 15.777038269550749, "loss": 0.6185979843139648, "loss_ce": 6.745264727214817e-06, "loss_iou": 0.228515625, "loss_num": 0.0322265625, "loss_xval": 0.6171875, "num_input_tokens_seen": 297246108, "step": 4741 }, { "epoch": 15.78036605657238, "grad_norm": 11.313188552856445, "learning_rate": 5e-06, "loss": 0.4861, "num_input_tokens_seen": 297309604, "step": 4742 }, { "epoch": 15.78036605657238, "loss": 0.5053737163543701, "loss_ce": 2.6103632535523502e-06, "loss_iou": 0.1748046875, "loss_num": 0.03125, "loss_xval": 0.50390625, "num_input_tokens_seen": 297309604, "step": 4742 }, { "epoch": 15.78369384359401, "grad_norm": 12.101418495178223, "learning_rate": 5e-06, "loss": 0.3073, "num_input_tokens_seen": 297371900, "step": 4743 }, { "epoch": 15.78369384359401, "loss": 0.4256041646003723, "loss_ce": 6.041940196155338e-06, "loss_iou": 0.173828125, "loss_num": 0.01544189453125, "loss_xval": 0.42578125, "num_input_tokens_seen": 297371900, "step": 4743 }, { "epoch": 15.78702163061564, "grad_norm": 45.269935607910156, "learning_rate": 5e-06, "loss": 0.4493, "num_input_tokens_seen": 297435248, "step": 4744 }, { "epoch": 15.78702163061564, "loss": 0.22462235391139984, "loss_ce": 1.2979136954527348e-05, "loss_iou": 0.08984375, "loss_num": 0.00909423828125, "loss_xval": 0.224609375, "num_input_tokens_seen": 297435248, "step": 4744 }, { "epoch": 15.790349417637271, "grad_norm": 30.277002334594727, "learning_rate": 5e-06, "loss": 0.6647, "num_input_tokens_seen": 297496976, "step": 4745 }, { "epoch": 15.790349417637271, "loss": 0.6623324155807495, "loss_ce": 0.00010093052696902305, "loss_iou": 0.25390625, "loss_num": 0.0303955078125, "loss_xval": 0.6640625, "num_input_tokens_seen": 297496976, "step": 4745 }, { "epoch": 15.793677204658902, "grad_norm": 22.721879959106445, "learning_rate": 5e-06, "loss": 0.5173, "num_input_tokens_seen": 297559656, "step": 4746 }, { "epoch": 15.793677204658902, "loss": 0.4824249744415283, "loss_ce": 3.1232555102178594e-06, "loss_iou": 0.1796875, "loss_num": 0.024658203125, "loss_xval": 0.482421875, "num_input_tokens_seen": 297559656, "step": 4746 }, { "epoch": 15.797004991680533, "grad_norm": 20.993858337402344, "learning_rate": 5e-06, "loss": 0.4215, "num_input_tokens_seen": 297623652, "step": 4747 }, { "epoch": 15.797004991680533, "loss": 0.45875439047813416, "loss_ce": 1.4163408195599914e-05, "loss_iou": 0.173828125, "loss_num": 0.0223388671875, "loss_xval": 0.458984375, "num_input_tokens_seen": 297623652, "step": 4747 }, { "epoch": 15.800332778702163, "grad_norm": 8.629959106445312, "learning_rate": 5e-06, "loss": 0.3653, "num_input_tokens_seen": 297688316, "step": 4748 }, { "epoch": 15.800332778702163, "loss": 0.3150976896286011, "loss_ce": 3.4197095374111086e-05, "loss_iou": 0.12109375, "loss_num": 0.0145263671875, "loss_xval": 0.314453125, "num_input_tokens_seen": 297688316, "step": 4748 }, { "epoch": 15.803660565723794, "grad_norm": 8.310785293579102, "learning_rate": 5e-06, "loss": 0.2525, "num_input_tokens_seen": 297748548, "step": 4749 }, { "epoch": 15.803660565723794, "loss": 0.27449002861976624, "loss_ce": 1.4921854017302394e-05, "loss_iou": 0.09765625, "loss_num": 0.015869140625, "loss_xval": 0.275390625, "num_input_tokens_seen": 297748548, "step": 4749 }, { "epoch": 15.806988352745424, "grad_norm": 8.423526763916016, "learning_rate": 5e-06, "loss": 0.4103, "num_input_tokens_seen": 297812500, "step": 4750 }, { "epoch": 15.806988352745424, "eval_seeclick_CIoU": 0.029157564043998718, "eval_seeclick_GIoU": 0.019403559621423483, "eval_seeclick_IoU": 0.1561538502573967, "eval_seeclick_MAE_all": 0.17077035456895828, "eval_seeclick_MAE_h": 0.0766241941601038, "eval_seeclick_MAE_w": 0.13744798675179482, "eval_seeclick_MAE_x_boxes": 0.21058619022369385, "eval_seeclick_MAE_y_boxes": 0.18302666395902634, "eval_seeclick_NUM_probability": 0.9999736249446869, "eval_seeclick_inside_bbox": 0.16250000149011612, "eval_seeclick_loss": 3.021742105484009, "eval_seeclick_loss_ce": 0.1734461784362793, "eval_seeclick_loss_iou": 0.994140625, "eval_seeclick_loss_num": 0.17120361328125, "eval_seeclick_loss_xval": 2.84326171875, "eval_seeclick_runtime": 71.3614, "eval_seeclick_samples_per_second": 0.659, "eval_seeclick_steps_per_second": 0.028, "num_input_tokens_seen": 297812500, "step": 4750 }, { "epoch": 15.806988352745424, "eval_icons_CIoU": -0.07303258031606674, "eval_icons_GIoU": 0.03454606328159571, "eval_icons_IoU": 0.11482841148972511, "eval_icons_MAE_all": 0.21323162317276, "eval_icons_MAE_h": 0.1937904879450798, "eval_icons_MAE_w": 0.22310296446084976, "eval_icons_MAE_x_boxes": 0.14678749442100525, "eval_icons_MAE_y_boxes": 0.09761923551559448, "eval_icons_NUM_probability": 0.999968409538269, "eval_icons_inside_bbox": 0.2170138955116272, "eval_icons_loss": 2.9575164318084717, "eval_icons_loss_ce": 1.3783185067950399e-05, "eval_icons_loss_iou": 0.96630859375, "eval_icons_loss_num": 0.2103271484375, "eval_icons_loss_xval": 2.984375, "eval_icons_runtime": 66.2396, "eval_icons_samples_per_second": 0.755, "eval_icons_steps_per_second": 0.03, "num_input_tokens_seen": 297812500, "step": 4750 }, { "epoch": 15.806988352745424, "eval_screenspot_CIoU": 0.1823714723189672, "eval_screenspot_GIoU": 0.21866750220457712, "eval_screenspot_IoU": 0.2934034764766693, "eval_screenspot_MAE_all": 0.11385433127482732, "eval_screenspot_MAE_h": 0.06112374613682429, "eval_screenspot_MAE_w": 0.09801691025495529, "eval_screenspot_MAE_x_boxes": 0.16084632774194083, "eval_screenspot_MAE_y_boxes": 0.08713458354274432, "eval_screenspot_NUM_probability": 0.9999935428301493, "eval_screenspot_inside_bbox": 0.5362499952316284, "eval_screenspot_loss": 2.177732229232788, "eval_screenspot_loss_ce": 5.9218421104863715e-05, "eval_screenspot_loss_iou": 0.7975260416666666, "eval_screenspot_loss_num": 0.12339019775390625, "eval_screenspot_loss_xval": 2.212890625, "eval_screenspot_runtime": 118.8103, "eval_screenspot_samples_per_second": 0.749, "eval_screenspot_steps_per_second": 0.025, "num_input_tokens_seen": 297812500, "step": 4750 }, { "epoch": 15.806988352745424, "eval_compot_CIoU": 0.15605434775352478, "eval_compot_GIoU": 0.2116793915629387, "eval_compot_IoU": 0.2855927497148514, "eval_compot_MAE_all": 0.1273484006524086, "eval_compot_MAE_h": 0.05273274565115571, "eval_compot_MAE_w": 0.1391521915793419, "eval_compot_MAE_x_boxes": 0.11774072423577309, "eval_compot_MAE_y_boxes": 0.0949038527905941, "eval_compot_NUM_probability": 0.9999969005584717, "eval_compot_inside_bbox": 0.4131944477558136, "eval_compot_loss": 2.2026281356811523, "eval_compot_loss_ce": 0.004983726888895035, "eval_compot_loss_iou": 0.8070068359375, "eval_compot_loss_num": 0.13776206970214844, "eval_compot_loss_xval": 2.3046875, "eval_compot_runtime": 69.1633, "eval_compot_samples_per_second": 0.723, "eval_compot_steps_per_second": 0.029, "num_input_tokens_seen": 297812500, "step": 4750 }, { "epoch": 15.806988352745424, "eval_custom_ui_MAE_all": 0.05910385213792324, "eval_custom_ui_MAE_x": 0.07061638124287128, "eval_custom_ui_MAE_y": 0.04759131371974945, "eval_custom_ui_NUM_probability": 0.9999956488609314, "eval_custom_ui_loss": 0.28215038776397705, "eval_custom_ui_loss_ce": 1.9253887444392603e-06, "eval_custom_ui_loss_num": 0.05829620361328125, "eval_custom_ui_loss_xval": 0.291717529296875, "eval_custom_ui_runtime": 51.4895, "eval_custom_ui_samples_per_second": 0.971, "eval_custom_ui_steps_per_second": 0.039, "num_input_tokens_seen": 297812500, "step": 4750 }, { "epoch": 15.806988352745424, "loss": 0.30218684673309326, "loss_ce": 1.7972091654883116e-06, "loss_iou": 0.0, "loss_num": 0.060302734375, "loss_xval": 0.302734375, "num_input_tokens_seen": 297812500, "step": 4750 }, { "epoch": 15.810316139767055, "grad_norm": 13.493416786193848, "learning_rate": 5e-06, "loss": 0.3888, "num_input_tokens_seen": 297875588, "step": 4751 }, { "epoch": 15.810316139767055, "loss": 0.5007399320602417, "loss_ce": 7.469850515917642e-06, "loss_iou": 0.1875, "loss_num": 0.0250244140625, "loss_xval": 0.5, "num_input_tokens_seen": 297875588, "step": 4751 }, { "epoch": 15.813643926788686, "grad_norm": 30.26608657836914, "learning_rate": 5e-06, "loss": 0.5622, "num_input_tokens_seen": 297937584, "step": 4752 }, { "epoch": 15.813643926788686, "loss": 0.6644657850265503, "loss_ce": 3.7109555705683306e-05, "loss_iou": 0.26953125, "loss_num": 0.025390625, "loss_xval": 0.6640625, "num_input_tokens_seen": 297937584, "step": 4752 }, { "epoch": 15.816971713810316, "grad_norm": 32.582984924316406, "learning_rate": 5e-06, "loss": 0.4699, "num_input_tokens_seen": 297999880, "step": 4753 }, { "epoch": 15.816971713810316, "loss": 0.4011867046356201, "loss_ce": 2.6088632694154512e-06, "loss_iou": 0.1494140625, "loss_num": 0.0203857421875, "loss_xval": 0.400390625, "num_input_tokens_seen": 297999880, "step": 4753 }, { "epoch": 15.820299500831947, "grad_norm": 16.306838989257812, "learning_rate": 5e-06, "loss": 0.3611, "num_input_tokens_seen": 298062580, "step": 4754 }, { "epoch": 15.820299500831947, "loss": 0.38684284687042236, "loss_ce": 2.0209588456054917e-06, "loss_iou": 0.134765625, "loss_num": 0.023681640625, "loss_xval": 0.38671875, "num_input_tokens_seen": 298062580, "step": 4754 }, { "epoch": 15.823627287853578, "grad_norm": 10.133922576904297, "learning_rate": 5e-06, "loss": 0.3538, "num_input_tokens_seen": 298125896, "step": 4755 }, { "epoch": 15.823627287853578, "loss": 0.32715877890586853, "loss_ce": 1.0347936040489003e-05, "loss_iou": 0.130859375, "loss_num": 0.0128173828125, "loss_xval": 0.328125, "num_input_tokens_seen": 298125896, "step": 4755 }, { "epoch": 15.826955074875208, "grad_norm": 15.947661399841309, "learning_rate": 5e-06, "loss": 0.5258, "num_input_tokens_seen": 298189832, "step": 4756 }, { "epoch": 15.826955074875208, "loss": 0.5393199920654297, "loss_ce": 0.0005321474163793027, "loss_iou": 0.2119140625, "loss_num": 0.02294921875, "loss_xval": 0.5390625, "num_input_tokens_seen": 298189832, "step": 4756 }, { "epoch": 15.830282861896839, "grad_norm": 18.17816734313965, "learning_rate": 5e-06, "loss": 0.5522, "num_input_tokens_seen": 298252916, "step": 4757 }, { "epoch": 15.830282861896839, "loss": 0.6662337779998779, "loss_ce": 9.61166515480727e-05, "loss_iou": 0.265625, "loss_num": 0.02734375, "loss_xval": 0.66796875, "num_input_tokens_seen": 298252916, "step": 4757 }, { "epoch": 15.83361064891847, "grad_norm": 7.972437858581543, "learning_rate": 5e-06, "loss": 0.424, "num_input_tokens_seen": 298316432, "step": 4758 }, { "epoch": 15.83361064891847, "loss": 0.43964362144470215, "loss_ce": 7.411298156512203e-06, "loss_iou": 0.1689453125, "loss_num": 0.0203857421875, "loss_xval": 0.439453125, "num_input_tokens_seen": 298316432, "step": 4758 }, { "epoch": 15.8369384359401, "grad_norm": 19.960220336914062, "learning_rate": 5e-06, "loss": 0.5728, "num_input_tokens_seen": 298378360, "step": 4759 }, { "epoch": 15.8369384359401, "loss": 0.28930747509002686, "loss_ce": 8.18900673493772e-07, "loss_iou": 0.09912109375, "loss_num": 0.0181884765625, "loss_xval": 0.2890625, "num_input_tokens_seen": 298378360, "step": 4759 }, { "epoch": 15.84026622296173, "grad_norm": 11.772397994995117, "learning_rate": 5e-06, "loss": 0.3372, "num_input_tokens_seen": 298439676, "step": 4760 }, { "epoch": 15.84026622296173, "loss": 0.32910609245300293, "loss_ce": 4.541122507362161e-06, "loss_iou": 0.1298828125, "loss_num": 0.01409912109375, "loss_xval": 0.328125, "num_input_tokens_seen": 298439676, "step": 4760 }, { "epoch": 15.843594009983361, "grad_norm": 11.405454635620117, "learning_rate": 5e-06, "loss": 0.4108, "num_input_tokens_seen": 298502492, "step": 4761 }, { "epoch": 15.843594009983361, "loss": 0.37378400564193726, "loss_ce": 4.734482445201138e-06, "loss_iou": 0.134765625, "loss_num": 0.0211181640625, "loss_xval": 0.373046875, "num_input_tokens_seen": 298502492, "step": 4761 }, { "epoch": 15.846921797004992, "grad_norm": 21.32131576538086, "learning_rate": 5e-06, "loss": 0.4197, "num_input_tokens_seen": 298565124, "step": 4762 }, { "epoch": 15.846921797004992, "loss": 0.520630955696106, "loss_ce": 1.0382105983808287e-06, "loss_iou": 0.21484375, "loss_num": 0.0181884765625, "loss_xval": 0.51953125, "num_input_tokens_seen": 298565124, "step": 4762 }, { "epoch": 15.850249584026622, "grad_norm": 15.677260398864746, "learning_rate": 5e-06, "loss": 0.5122, "num_input_tokens_seen": 298628108, "step": 4763 }, { "epoch": 15.850249584026622, "loss": 0.6070635318756104, "loss_ce": 7.857217497075908e-06, "loss_iou": 0.25390625, "loss_num": 0.0201416015625, "loss_xval": 0.60546875, "num_input_tokens_seen": 298628108, "step": 4763 }, { "epoch": 15.853577371048253, "grad_norm": 16.31813621520996, "learning_rate": 5e-06, "loss": 0.382, "num_input_tokens_seen": 298691040, "step": 4764 }, { "epoch": 15.853577371048253, "loss": 0.25398021936416626, "loss_ce": 3.581076816772111e-05, "loss_iou": 0.09375, "loss_num": 0.01336669921875, "loss_xval": 0.25390625, "num_input_tokens_seen": 298691040, "step": 4764 }, { "epoch": 15.856905158069884, "grad_norm": 33.07413864135742, "learning_rate": 5e-06, "loss": 0.6849, "num_input_tokens_seen": 298754892, "step": 4765 }, { "epoch": 15.856905158069884, "loss": 0.7363287806510925, "loss_ce": 6.399845915439073e-07, "loss_iou": 0.302734375, "loss_num": 0.02587890625, "loss_xval": 0.734375, "num_input_tokens_seen": 298754892, "step": 4765 }, { "epoch": 15.860232945091514, "grad_norm": 13.758151054382324, "learning_rate": 5e-06, "loss": 0.4058, "num_input_tokens_seen": 298816772, "step": 4766 }, { "epoch": 15.860232945091514, "loss": 0.3761114776134491, "loss_ce": 4.3341169657651335e-05, "loss_iou": 0.142578125, "loss_num": 0.01806640625, "loss_xval": 0.376953125, "num_input_tokens_seen": 298816772, "step": 4766 }, { "epoch": 15.863560732113145, "grad_norm": 26.256446838378906, "learning_rate": 5e-06, "loss": 0.5079, "num_input_tokens_seen": 298880296, "step": 4767 }, { "epoch": 15.863560732113145, "loss": 0.6721336841583252, "loss_ce": 1.452376636734698e-05, "loss_iou": 0.259765625, "loss_num": 0.0303955078125, "loss_xval": 0.671875, "num_input_tokens_seen": 298880296, "step": 4767 }, { "epoch": 15.866888519134775, "grad_norm": 34.92399215698242, "learning_rate": 5e-06, "loss": 0.56, "num_input_tokens_seen": 298944272, "step": 4768 }, { "epoch": 15.866888519134775, "loss": 0.5298492908477783, "loss_ce": 3.099083642155165e-06, "loss_iou": 0.201171875, "loss_num": 0.0257568359375, "loss_xval": 0.53125, "num_input_tokens_seen": 298944272, "step": 4768 }, { "epoch": 15.870216306156406, "grad_norm": 15.535185813903809, "learning_rate": 5e-06, "loss": 0.5007, "num_input_tokens_seen": 299007016, "step": 4769 }, { "epoch": 15.870216306156406, "loss": 0.38805001974105835, "loss_ce": 3.7551228615484433e-06, "loss_iou": 0.1318359375, "loss_num": 0.0247802734375, "loss_xval": 0.388671875, "num_input_tokens_seen": 299007016, "step": 4769 }, { "epoch": 15.873544093178037, "grad_norm": 7.525320529937744, "learning_rate": 5e-06, "loss": 0.4439, "num_input_tokens_seen": 299069552, "step": 4770 }, { "epoch": 15.873544093178037, "loss": 0.326937198638916, "loss_ce": 2.3895197500678478e-06, "loss_iou": 0.1376953125, "loss_num": 0.01025390625, "loss_xval": 0.326171875, "num_input_tokens_seen": 299069552, "step": 4770 }, { "epoch": 15.876871880199667, "grad_norm": 11.298612594604492, "learning_rate": 5e-06, "loss": 0.3978, "num_input_tokens_seen": 299131076, "step": 4771 }, { "epoch": 15.876871880199667, "loss": 0.39529556035995483, "loss_ce": 1.379846025884035e-06, "loss_iou": 0.1650390625, "loss_num": 0.01318359375, "loss_xval": 0.39453125, "num_input_tokens_seen": 299131076, "step": 4771 }, { "epoch": 15.880199667221298, "grad_norm": 17.09076690673828, "learning_rate": 5e-06, "loss": 0.5313, "num_input_tokens_seen": 299193060, "step": 4772 }, { "epoch": 15.880199667221298, "loss": 0.4308010935783386, "loss_ce": 1.4929051758372225e-05, "loss_iou": 0.1708984375, "loss_num": 0.0177001953125, "loss_xval": 0.431640625, "num_input_tokens_seen": 299193060, "step": 4772 }, { "epoch": 15.883527454242929, "grad_norm": 19.20327377319336, "learning_rate": 5e-06, "loss": 0.3515, "num_input_tokens_seen": 299253244, "step": 4773 }, { "epoch": 15.883527454242929, "loss": 0.3533228933811188, "loss_ce": 2.0886864149360918e-05, "loss_iou": 0.1259765625, "loss_num": 0.0201416015625, "loss_xval": 0.353515625, "num_input_tokens_seen": 299253244, "step": 4773 }, { "epoch": 15.88685524126456, "grad_norm": 10.072816848754883, "learning_rate": 5e-06, "loss": 0.4568, "num_input_tokens_seen": 299314304, "step": 4774 }, { "epoch": 15.88685524126456, "loss": 0.5325936675071716, "loss_ce": 9.055698910742649e-07, "loss_iou": 0.1767578125, "loss_num": 0.035888671875, "loss_xval": 0.53125, "num_input_tokens_seen": 299314304, "step": 4774 }, { "epoch": 15.89018302828619, "grad_norm": 7.633636474609375, "learning_rate": 5e-06, "loss": 0.3762, "num_input_tokens_seen": 299374084, "step": 4775 }, { "epoch": 15.89018302828619, "loss": 0.2807336449623108, "loss_ce": 2.433651843603002e-06, "loss_iou": 0.11083984375, "loss_num": 0.01190185546875, "loss_xval": 0.28125, "num_input_tokens_seen": 299374084, "step": 4775 }, { "epoch": 15.89351081530782, "grad_norm": 9.626849174499512, "learning_rate": 5e-06, "loss": 0.3251, "num_input_tokens_seen": 299437044, "step": 4776 }, { "epoch": 15.89351081530782, "loss": 0.3862648010253906, "loss_ce": 3.835079041891731e-06, "loss_iou": 0.1494140625, "loss_num": 0.017333984375, "loss_xval": 0.38671875, "num_input_tokens_seen": 299437044, "step": 4776 }, { "epoch": 15.896838602329451, "grad_norm": 11.070808410644531, "learning_rate": 5e-06, "loss": 0.3112, "num_input_tokens_seen": 299499828, "step": 4777 }, { "epoch": 15.896838602329451, "loss": 0.2581849694252014, "loss_ce": 6.274092356761685e-06, "loss_iou": 0.08154296875, "loss_num": 0.0189208984375, "loss_xval": 0.2578125, "num_input_tokens_seen": 299499828, "step": 4777 }, { "epoch": 15.900166389351082, "grad_norm": 17.853702545166016, "learning_rate": 5e-06, "loss": 0.5035, "num_input_tokens_seen": 299563748, "step": 4778 }, { "epoch": 15.900166389351082, "loss": 0.539259135723114, "loss_ce": 1.3533438504964579e-05, "loss_iou": 0.220703125, "loss_num": 0.01953125, "loss_xval": 0.5390625, "num_input_tokens_seen": 299563748, "step": 4778 }, { "epoch": 15.903494176372712, "grad_norm": 17.606773376464844, "learning_rate": 5e-06, "loss": 0.4877, "num_input_tokens_seen": 299627088, "step": 4779 }, { "epoch": 15.903494176372712, "loss": 0.5357086658477783, "loss_ce": 3.1169802241493016e-06, "loss_iou": 0.201171875, "loss_num": 0.0264892578125, "loss_xval": 0.53515625, "num_input_tokens_seen": 299627088, "step": 4779 }, { "epoch": 15.906821963394343, "grad_norm": 15.162994384765625, "learning_rate": 5e-06, "loss": 0.4949, "num_input_tokens_seen": 299689228, "step": 4780 }, { "epoch": 15.906821963394343, "loss": 0.6636998653411865, "loss_ce": 3.6236349387763767e-06, "loss_iou": 0.25390625, "loss_num": 0.031005859375, "loss_xval": 0.6640625, "num_input_tokens_seen": 299689228, "step": 4780 }, { "epoch": 15.910149750415973, "grad_norm": 17.111719131469727, "learning_rate": 5e-06, "loss": 0.3661, "num_input_tokens_seen": 299751680, "step": 4781 }, { "epoch": 15.910149750415973, "loss": 0.23205924034118652, "loss_ce": 3.5839966585626826e-06, "loss_iou": 0.08544921875, "loss_num": 0.0123291015625, "loss_xval": 0.232421875, "num_input_tokens_seen": 299751680, "step": 4781 }, { "epoch": 15.913477537437604, "grad_norm": 22.672941207885742, "learning_rate": 5e-06, "loss": 0.3683, "num_input_tokens_seen": 299812496, "step": 4782 }, { "epoch": 15.913477537437604, "loss": 0.33441516757011414, "loss_ce": 3.530915819283109e-06, "loss_iou": 0.138671875, "loss_num": 0.0113525390625, "loss_xval": 0.333984375, "num_input_tokens_seen": 299812496, "step": 4782 }, { "epoch": 15.916805324459235, "grad_norm": 14.570302963256836, "learning_rate": 5e-06, "loss": 0.5428, "num_input_tokens_seen": 299875512, "step": 4783 }, { "epoch": 15.916805324459235, "loss": 0.5495611429214478, "loss_ce": 6.015169446982327e-07, "loss_iou": 0.216796875, "loss_num": 0.0233154296875, "loss_xval": 0.55078125, "num_input_tokens_seen": 299875512, "step": 4783 }, { "epoch": 15.920133111480865, "grad_norm": 5.836132049560547, "learning_rate": 5e-06, "loss": 0.4204, "num_input_tokens_seen": 299939496, "step": 4784 }, { "epoch": 15.920133111480865, "loss": 0.35419654846191406, "loss_ce": 9.537381629343145e-06, "loss_iou": 0.13671875, "loss_num": 0.01611328125, "loss_xval": 0.353515625, "num_input_tokens_seen": 299939496, "step": 4784 }, { "epoch": 15.923460898502496, "grad_norm": 14.691683769226074, "learning_rate": 5e-06, "loss": 0.3482, "num_input_tokens_seen": 300001044, "step": 4785 }, { "epoch": 15.923460898502496, "loss": 0.368061900138855, "loss_ce": 8.09211705927737e-05, "loss_iou": 0.13671875, "loss_num": 0.018798828125, "loss_xval": 0.3671875, "num_input_tokens_seen": 300001044, "step": 4785 }, { "epoch": 15.926788685524127, "grad_norm": 8.65625, "learning_rate": 5e-06, "loss": 0.3908, "num_input_tokens_seen": 300062584, "step": 4786 }, { "epoch": 15.926788685524127, "loss": 0.2227770984172821, "loss_ce": 2.92981494567357e-05, "loss_iou": 0.06884765625, "loss_num": 0.01708984375, "loss_xval": 0.22265625, "num_input_tokens_seen": 300062584, "step": 4786 }, { "epoch": 15.930116472545757, "grad_norm": 27.730018615722656, "learning_rate": 5e-06, "loss": 0.4714, "num_input_tokens_seen": 300125504, "step": 4787 }, { "epoch": 15.930116472545757, "loss": 0.4107685983181, "loss_ce": 1.9823869479296263e-06, "loss_iou": 0.1533203125, "loss_num": 0.0206298828125, "loss_xval": 0.41015625, "num_input_tokens_seen": 300125504, "step": 4787 }, { "epoch": 15.933444259567388, "grad_norm": 24.062456130981445, "learning_rate": 5e-06, "loss": 0.2937, "num_input_tokens_seen": 300188252, "step": 4788 }, { "epoch": 15.933444259567388, "loss": 0.281619668006897, "loss_ce": 0.00012553544365800917, "loss_iou": 0.1240234375, "loss_num": 0.006683349609375, "loss_xval": 0.28125, "num_input_tokens_seen": 300188252, "step": 4788 }, { "epoch": 15.936772046589018, "grad_norm": 27.887496948242188, "learning_rate": 5e-06, "loss": 0.4253, "num_input_tokens_seen": 300249768, "step": 4789 }, { "epoch": 15.936772046589018, "loss": 0.34498029947280884, "loss_ce": 0.00013167360157240182, "loss_iou": 0.123046875, "loss_num": 0.0198974609375, "loss_xval": 0.345703125, "num_input_tokens_seen": 300249768, "step": 4789 }, { "epoch": 15.940099833610649, "grad_norm": 7.689976692199707, "learning_rate": 5e-06, "loss": 0.2539, "num_input_tokens_seen": 300312444, "step": 4790 }, { "epoch": 15.940099833610649, "loss": 0.36432161927223206, "loss_ce": 2.7806931939267088e-06, "loss_iou": 0.142578125, "loss_num": 0.0157470703125, "loss_xval": 0.365234375, "num_input_tokens_seen": 300312444, "step": 4790 }, { "epoch": 15.94342762063228, "grad_norm": 13.532173156738281, "learning_rate": 5e-06, "loss": 0.4566, "num_input_tokens_seen": 300375344, "step": 4791 }, { "epoch": 15.94342762063228, "loss": 0.547744631767273, "loss_ce": 1.5153582353377715e-05, "loss_iou": 0.216796875, "loss_num": 0.022705078125, "loss_xval": 0.546875, "num_input_tokens_seen": 300375344, "step": 4791 }, { "epoch": 15.94675540765391, "grad_norm": 9.621451377868652, "learning_rate": 5e-06, "loss": 0.3187, "num_input_tokens_seen": 300438592, "step": 4792 }, { "epoch": 15.94675540765391, "loss": 0.40632033348083496, "loss_ce": 3.9837184885982424e-05, "loss_iou": 0.1728515625, "loss_num": 0.01214599609375, "loss_xval": 0.40625, "num_input_tokens_seen": 300438592, "step": 4792 }, { "epoch": 15.95008319467554, "grad_norm": 9.186487197875977, "learning_rate": 5e-06, "loss": 0.3811, "num_input_tokens_seen": 300501216, "step": 4793 }, { "epoch": 15.95008319467554, "loss": 0.4092121720314026, "loss_ce": 1.9812296159216203e-06, "loss_iou": 0.12060546875, "loss_num": 0.03369140625, "loss_xval": 0.41015625, "num_input_tokens_seen": 300501216, "step": 4793 }, { "epoch": 15.953410981697171, "grad_norm": 23.390169143676758, "learning_rate": 5e-06, "loss": 0.4024, "num_input_tokens_seen": 300564104, "step": 4794 }, { "epoch": 15.953410981697171, "loss": 0.33813679218292236, "loss_ce": 2.0322636373748537e-06, "loss_iou": 0.140625, "loss_num": 0.01153564453125, "loss_xval": 0.337890625, "num_input_tokens_seen": 300564104, "step": 4794 }, { "epoch": 15.956738768718802, "grad_norm": 25.926279067993164, "learning_rate": 5e-06, "loss": 0.4229, "num_input_tokens_seen": 300626888, "step": 4795 }, { "epoch": 15.956738768718802, "loss": 0.3641880750656128, "loss_ce": 8.283957868115976e-05, "loss_iou": 0.1337890625, "loss_num": 0.0191650390625, "loss_xval": 0.36328125, "num_input_tokens_seen": 300626888, "step": 4795 }, { "epoch": 15.960066555740433, "grad_norm": 22.5251522064209, "learning_rate": 5e-06, "loss": 0.3913, "num_input_tokens_seen": 300690292, "step": 4796 }, { "epoch": 15.960066555740433, "loss": 0.2619059681892395, "loss_ce": 4.102630555280484e-06, "loss_iou": 0.1044921875, "loss_num": 0.0107421875, "loss_xval": 0.26171875, "num_input_tokens_seen": 300690292, "step": 4796 }, { "epoch": 15.963394342762063, "grad_norm": 29.58542251586914, "learning_rate": 5e-06, "loss": 0.4428, "num_input_tokens_seen": 300752632, "step": 4797 }, { "epoch": 15.963394342762063, "loss": 0.2602841854095459, "loss_ce": 4.552450991468504e-05, "loss_iou": 0.111328125, "loss_num": 0.00750732421875, "loss_xval": 0.259765625, "num_input_tokens_seen": 300752632, "step": 4797 }, { "epoch": 15.966722129783694, "grad_norm": 35.483455657958984, "learning_rate": 5e-06, "loss": 0.3985, "num_input_tokens_seen": 300815812, "step": 4798 }, { "epoch": 15.966722129783694, "loss": 0.37629637122154236, "loss_ce": 1.4635284060204867e-05, "loss_iou": 0.1455078125, "loss_num": 0.01708984375, "loss_xval": 0.376953125, "num_input_tokens_seen": 300815812, "step": 4798 }, { "epoch": 15.970049916805324, "grad_norm": 34.72282028198242, "learning_rate": 5e-06, "loss": 0.6371, "num_input_tokens_seen": 300879460, "step": 4799 }, { "epoch": 15.970049916805324, "loss": 0.7236055731773376, "loss_ce": 0.00018637048196978867, "loss_iou": 0.306640625, "loss_num": 0.022216796875, "loss_xval": 0.72265625, "num_input_tokens_seen": 300879460, "step": 4799 }, { "epoch": 15.973377703826955, "grad_norm": 12.291335105895996, "learning_rate": 5e-06, "loss": 0.1823, "num_input_tokens_seen": 300941964, "step": 4800 }, { "epoch": 15.973377703826955, "loss": 0.1423085629940033, "loss_ce": 5.10969994138577e-06, "loss_iou": 0.0439453125, "loss_num": 0.01092529296875, "loss_xval": 0.142578125, "num_input_tokens_seen": 300941964, "step": 4800 }, { "epoch": 15.976705490848586, "grad_norm": 33.23076248168945, "learning_rate": 5e-06, "loss": 0.4986, "num_input_tokens_seen": 301004848, "step": 4801 }, { "epoch": 15.976705490848586, "loss": 0.2839101552963257, "loss_ce": 5.131345460540615e-06, "loss_iou": 0.11083984375, "loss_num": 0.01239013671875, "loss_xval": 0.283203125, "num_input_tokens_seen": 301004848, "step": 4801 }, { "epoch": 15.980033277870216, "grad_norm": 48.836063385009766, "learning_rate": 5e-06, "loss": 0.7891, "num_input_tokens_seen": 301068700, "step": 4802 }, { "epoch": 15.980033277870216, "loss": 0.5706245303153992, "loss_ce": 6.788315658923239e-05, "loss_iou": 0.26171875, "loss_num": 0.00933837890625, "loss_xval": 0.5703125, "num_input_tokens_seen": 301068700, "step": 4802 }, { "epoch": 15.983361064891847, "grad_norm": 46.2684326171875, "learning_rate": 5e-06, "loss": 0.5392, "num_input_tokens_seen": 301133156, "step": 4803 }, { "epoch": 15.983361064891847, "loss": 0.5094614028930664, "loss_ce": 9.54913730311091e-07, "loss_iou": 0.216796875, "loss_num": 0.01507568359375, "loss_xval": 0.5078125, "num_input_tokens_seen": 301133156, "step": 4803 }, { "epoch": 15.986688851913478, "grad_norm": 18.406099319458008, "learning_rate": 5e-06, "loss": 0.3287, "num_input_tokens_seen": 301194644, "step": 4804 }, { "epoch": 15.986688851913478, "loss": 0.341677725315094, "loss_ce": 6.396362732630223e-05, "loss_iou": 0.140625, "loss_num": 0.011962890625, "loss_xval": 0.341796875, "num_input_tokens_seen": 301194644, "step": 4804 }, { "epoch": 15.990016638935108, "grad_norm": 13.786677360534668, "learning_rate": 5e-06, "loss": 0.5442, "num_input_tokens_seen": 301257668, "step": 4805 }, { "epoch": 15.990016638935108, "loss": 0.6868314743041992, "loss_ce": 2.820802365022246e-06, "loss_iou": 0.28125, "loss_num": 0.02490234375, "loss_xval": 0.6875, "num_input_tokens_seen": 301257668, "step": 4805 }, { "epoch": 15.993344425956739, "grad_norm": 8.8687105178833, "learning_rate": 5e-06, "loss": 0.4441, "num_input_tokens_seen": 301320716, "step": 4806 }, { "epoch": 15.993344425956739, "loss": 0.6389800310134888, "loss_ce": 3.0224828151403926e-06, "loss_iou": 0.208984375, "loss_num": 0.0439453125, "loss_xval": 0.640625, "num_input_tokens_seen": 301320716, "step": 4806 }, { "epoch": 15.99667221297837, "grad_norm": 7.907088756561279, "learning_rate": 5e-06, "loss": 0.4, "num_input_tokens_seen": 301382976, "step": 4807 }, { "epoch": 15.99667221297837, "loss": 0.32703855633735657, "loss_ce": 0.00022580279619432986, "loss_iou": 0.1279296875, "loss_num": 0.01422119140625, "loss_xval": 0.326171875, "num_input_tokens_seen": 301382976, "step": 4807 }, { "epoch": 16.0, "grad_norm": 18.15125274658203, "learning_rate": 5e-06, "loss": 0.5016, "num_input_tokens_seen": 301446556, "step": 4808 }, { "epoch": 16.0, "loss": 0.4648454189300537, "loss_ce": 1.6781875729066087e-06, "loss_iou": 0.205078125, "loss_num": 0.01092529296875, "loss_xval": 0.46484375, "num_input_tokens_seen": 301446556, "step": 4808 }, { "epoch": 16.003327787021632, "grad_norm": 8.42670726776123, "learning_rate": 5e-06, "loss": 0.4687, "num_input_tokens_seen": 301507892, "step": 4809 }, { "epoch": 16.003327787021632, "loss": 0.5129404067993164, "loss_ce": 9.940384870787966e-07, "loss_iou": 0.1953125, "loss_num": 0.024658203125, "loss_xval": 0.51171875, "num_input_tokens_seen": 301507892, "step": 4809 }, { "epoch": 16.00665557404326, "grad_norm": 43.091896057128906, "learning_rate": 5e-06, "loss": 0.5069, "num_input_tokens_seen": 301570108, "step": 4810 }, { "epoch": 16.00665557404326, "loss": 0.6032954454421997, "loss_ce": 2.3967604647623375e-05, "loss_iou": 0.2470703125, "loss_num": 0.021728515625, "loss_xval": 0.6015625, "num_input_tokens_seen": 301570108, "step": 4810 }, { "epoch": 16.009983361064894, "grad_norm": 9.502971649169922, "learning_rate": 5e-06, "loss": 0.4074, "num_input_tokens_seen": 301631244, "step": 4811 }, { "epoch": 16.009983361064894, "loss": 0.6056583523750305, "loss_ce": 6.496436526504112e-06, "loss_iou": 0.2041015625, "loss_num": 0.039306640625, "loss_xval": 0.60546875, "num_input_tokens_seen": 301631244, "step": 4811 }, { "epoch": 16.013311148086522, "grad_norm": 25.711753845214844, "learning_rate": 5e-06, "loss": 0.3733, "num_input_tokens_seen": 301691732, "step": 4812 }, { "epoch": 16.013311148086522, "loss": 0.23706457018852234, "loss_ce": 4.018440449726768e-06, "loss_iou": 0.07861328125, "loss_num": 0.0159912109375, "loss_xval": 0.2373046875, "num_input_tokens_seen": 301691732, "step": 4812 }, { "epoch": 16.016638935108155, "grad_norm": 49.10768127441406, "learning_rate": 5e-06, "loss": 0.5141, "num_input_tokens_seen": 301754100, "step": 4813 }, { "epoch": 16.016638935108155, "loss": 0.4765652120113373, "loss_ce": 2.7297664928482845e-06, "loss_iou": 0.1982421875, "loss_num": 0.01611328125, "loss_xval": 0.4765625, "num_input_tokens_seen": 301754100, "step": 4813 }, { "epoch": 16.019966722129784, "grad_norm": 17.371301651000977, "learning_rate": 5e-06, "loss": 0.4089, "num_input_tokens_seen": 301816872, "step": 4814 }, { "epoch": 16.019966722129784, "loss": 0.2864515781402588, "loss_ce": 1.3607609616883565e-05, "loss_iou": 0.095703125, "loss_num": 0.0189208984375, "loss_xval": 0.287109375, "num_input_tokens_seen": 301816872, "step": 4814 }, { "epoch": 16.023294509151416, "grad_norm": 12.2205171585083, "learning_rate": 5e-06, "loss": 0.4592, "num_input_tokens_seen": 301881660, "step": 4815 }, { "epoch": 16.023294509151416, "loss": 0.36954712867736816, "loss_ce": 9.73195164988283e-06, "loss_iou": 0.1376953125, "loss_num": 0.018798828125, "loss_xval": 0.369140625, "num_input_tokens_seen": 301881660, "step": 4815 }, { "epoch": 16.026622296173045, "grad_norm": 6.74208402633667, "learning_rate": 5e-06, "loss": 0.2425, "num_input_tokens_seen": 301944440, "step": 4816 }, { "epoch": 16.026622296173045, "loss": 0.2684488296508789, "loss_ce": 1.6234704162343405e-05, "loss_iou": 0.10595703125, "loss_num": 0.0113525390625, "loss_xval": 0.267578125, "num_input_tokens_seen": 301944440, "step": 4816 }, { "epoch": 16.029950083194677, "grad_norm": 27.58441162109375, "learning_rate": 5e-06, "loss": 0.3927, "num_input_tokens_seen": 302008352, "step": 4817 }, { "epoch": 16.029950083194677, "loss": 0.3455365300178528, "loss_ce": 1.6541471268283203e-05, "loss_iou": 0.1494140625, "loss_num": 0.00921630859375, "loss_xval": 0.345703125, "num_input_tokens_seen": 302008352, "step": 4817 }, { "epoch": 16.033277870216306, "grad_norm": 34.301021575927734, "learning_rate": 5e-06, "loss": 0.6441, "num_input_tokens_seen": 302072692, "step": 4818 }, { "epoch": 16.033277870216306, "loss": 0.6693199276924133, "loss_ce": 8.408219400735106e-06, "loss_iou": 0.267578125, "loss_num": 0.0267333984375, "loss_xval": 0.66796875, "num_input_tokens_seen": 302072692, "step": 4818 }, { "epoch": 16.03660565723794, "grad_norm": 13.916579246520996, "learning_rate": 5e-06, "loss": 0.4059, "num_input_tokens_seen": 302135276, "step": 4819 }, { "epoch": 16.03660565723794, "loss": 0.3438444137573242, "loss_ce": 2.8444032977859024e-06, "loss_iou": 0.13671875, "loss_num": 0.01409912109375, "loss_xval": 0.34375, "num_input_tokens_seen": 302135276, "step": 4819 }, { "epoch": 16.039933444259567, "grad_norm": 13.604660987854004, "learning_rate": 5e-06, "loss": 0.5948, "num_input_tokens_seen": 302198792, "step": 4820 }, { "epoch": 16.039933444259567, "loss": 0.5420131683349609, "loss_ce": 2.1013695004512556e-05, "loss_iou": 0.2275390625, "loss_num": 0.0172119140625, "loss_xval": 0.54296875, "num_input_tokens_seen": 302198792, "step": 4820 }, { "epoch": 16.0432612312812, "grad_norm": 29.939849853515625, "learning_rate": 5e-06, "loss": 0.3201, "num_input_tokens_seen": 302260860, "step": 4821 }, { "epoch": 16.0432612312812, "loss": 0.28564614057540894, "loss_ce": 1.5978671399352606e-06, "loss_iou": 0.11572265625, "loss_num": 0.0107421875, "loss_xval": 0.28515625, "num_input_tokens_seen": 302260860, "step": 4821 }, { "epoch": 16.04658901830283, "grad_norm": 30.0074462890625, "learning_rate": 5e-06, "loss": 0.3657, "num_input_tokens_seen": 302322248, "step": 4822 }, { "epoch": 16.04658901830283, "loss": 0.5222200155258179, "loss_ce": 3.2333848594134906e-06, "loss_iou": 0.201171875, "loss_num": 0.024169921875, "loss_xval": 0.5234375, "num_input_tokens_seen": 302322248, "step": 4822 }, { "epoch": 16.04991680532446, "grad_norm": 27.492033004760742, "learning_rate": 5e-06, "loss": 0.5616, "num_input_tokens_seen": 302385156, "step": 4823 }, { "epoch": 16.04991680532446, "loss": 0.4811334013938904, "loss_ce": 5.4296160669764504e-05, "loss_iou": 0.2080078125, "loss_num": 0.012939453125, "loss_xval": 0.48046875, "num_input_tokens_seen": 302385156, "step": 4823 }, { "epoch": 16.05324459234609, "grad_norm": 59.0771598815918, "learning_rate": 5e-06, "loss": 0.4968, "num_input_tokens_seen": 302448552, "step": 4824 }, { "epoch": 16.05324459234609, "loss": 0.43676936626434326, "loss_ce": 1.772319365045405e-06, "loss_iou": 0.1904296875, "loss_num": 0.01129150390625, "loss_xval": 0.4375, "num_input_tokens_seen": 302448552, "step": 4824 }, { "epoch": 16.056572379367722, "grad_norm": 126.53820037841797, "learning_rate": 5e-06, "loss": 0.4831, "num_input_tokens_seen": 302510868, "step": 4825 }, { "epoch": 16.056572379367722, "loss": 0.38464558124542236, "loss_ce": 2.0366433091112413e-06, "loss_iou": 0.1748046875, "loss_num": 0.006866455078125, "loss_xval": 0.384765625, "num_input_tokens_seen": 302510868, "step": 4825 }, { "epoch": 16.05990016638935, "grad_norm": 21.684185028076172, "learning_rate": 5e-06, "loss": 0.3337, "num_input_tokens_seen": 302574480, "step": 4826 }, { "epoch": 16.05990016638935, "loss": 0.28558510541915894, "loss_ce": 1.6239166598097654e-06, "loss_iou": 0.11767578125, "loss_num": 0.01007080078125, "loss_xval": 0.28515625, "num_input_tokens_seen": 302574480, "step": 4826 }, { "epoch": 16.063227953410983, "grad_norm": 7.7894673347473145, "learning_rate": 5e-06, "loss": 0.404, "num_input_tokens_seen": 302637104, "step": 4827 }, { "epoch": 16.063227953410983, "loss": 0.4346933662891388, "loss_ce": 9.60074430622626e-07, "loss_iou": 0.1552734375, "loss_num": 0.0247802734375, "loss_xval": 0.435546875, "num_input_tokens_seen": 302637104, "step": 4827 }, { "epoch": 16.066555740432612, "grad_norm": 8.091959953308105, "learning_rate": 5e-06, "loss": 0.5402, "num_input_tokens_seen": 302701312, "step": 4828 }, { "epoch": 16.066555740432612, "loss": 0.650759220123291, "loss_ce": 2.386314235991449e-06, "loss_iou": 0.2255859375, "loss_num": 0.0400390625, "loss_xval": 0.65234375, "num_input_tokens_seen": 302701312, "step": 4828 }, { "epoch": 16.069883527454245, "grad_norm": 41.34107971191406, "learning_rate": 5e-06, "loss": 0.3567, "num_input_tokens_seen": 302762396, "step": 4829 }, { "epoch": 16.069883527454245, "loss": 0.27753007411956787, "loss_ce": 3.2230648230324732e-06, "loss_iou": 0.11083984375, "loss_num": 0.0111083984375, "loss_xval": 0.27734375, "num_input_tokens_seen": 302762396, "step": 4829 }, { "epoch": 16.073211314475873, "grad_norm": 16.6628475189209, "learning_rate": 5e-06, "loss": 0.3888, "num_input_tokens_seen": 302825808, "step": 4830 }, { "epoch": 16.073211314475873, "loss": 0.44990766048431396, "loss_ce": 7.853315764805302e-05, "loss_iou": 0.1748046875, "loss_num": 0.0198974609375, "loss_xval": 0.44921875, "num_input_tokens_seen": 302825808, "step": 4830 }, { "epoch": 16.076539101497506, "grad_norm": 11.578617095947266, "learning_rate": 5e-06, "loss": 0.3134, "num_input_tokens_seen": 302886932, "step": 4831 }, { "epoch": 16.076539101497506, "loss": 0.4570501446723938, "loss_ce": 1.8890777937485836e-05, "loss_iou": 0.15234375, "loss_num": 0.0303955078125, "loss_xval": 0.45703125, "num_input_tokens_seen": 302886932, "step": 4831 }, { "epoch": 16.079866888519135, "grad_norm": 11.925562858581543, "learning_rate": 5e-06, "loss": 0.2148, "num_input_tokens_seen": 302948004, "step": 4832 }, { "epoch": 16.079866888519135, "loss": 0.27160823345184326, "loss_ce": 1.793953742890153e-06, "loss_iou": 0.10009765625, "loss_num": 0.01422119140625, "loss_xval": 0.271484375, "num_input_tokens_seen": 302948004, "step": 4832 }, { "epoch": 16.083194675540767, "grad_norm": 13.874751091003418, "learning_rate": 5e-06, "loss": 0.439, "num_input_tokens_seen": 303011844, "step": 4833 }, { "epoch": 16.083194675540767, "loss": 0.19918853044509888, "loss_ce": 2.8634485715883784e-07, "loss_iou": 0.07568359375, "loss_num": 0.00946044921875, "loss_xval": 0.19921875, "num_input_tokens_seen": 303011844, "step": 4833 }, { "epoch": 16.086522462562396, "grad_norm": 12.068015098571777, "learning_rate": 5e-06, "loss": 0.379, "num_input_tokens_seen": 303074192, "step": 4834 }, { "epoch": 16.086522462562396, "loss": 0.40227049589157104, "loss_ce": 0.0004913402372039855, "loss_iou": 0.1640625, "loss_num": 0.014892578125, "loss_xval": 0.40234375, "num_input_tokens_seen": 303074192, "step": 4834 }, { "epoch": 16.08985024958403, "grad_norm": 23.273401260375977, "learning_rate": 5e-06, "loss": 0.5091, "num_input_tokens_seen": 303136264, "step": 4835 }, { "epoch": 16.08985024958403, "loss": 0.5609976053237915, "loss_ce": 8.455901843262836e-05, "loss_iou": 0.2470703125, "loss_num": 0.01348876953125, "loss_xval": 0.5625, "num_input_tokens_seen": 303136264, "step": 4835 }, { "epoch": 16.093178036605657, "grad_norm": 10.237247467041016, "learning_rate": 5e-06, "loss": 0.5334, "num_input_tokens_seen": 303199044, "step": 4836 }, { "epoch": 16.093178036605657, "loss": 0.5614808797836304, "loss_ce": 1.850568878580816e-05, "loss_iou": 0.2041015625, "loss_num": 0.0306396484375, "loss_xval": 0.5625, "num_input_tokens_seen": 303199044, "step": 4836 }, { "epoch": 16.09650582362729, "grad_norm": 7.215312957763672, "learning_rate": 5e-06, "loss": 0.3679, "num_input_tokens_seen": 303261408, "step": 4837 }, { "epoch": 16.09650582362729, "loss": 0.37996718287467957, "loss_ce": 1.5711608284618706e-05, "loss_iou": 0.146484375, "loss_num": 0.0172119140625, "loss_xval": 0.380859375, "num_input_tokens_seen": 303261408, "step": 4837 }, { "epoch": 16.09983361064892, "grad_norm": 7.931913375854492, "learning_rate": 5e-06, "loss": 0.3374, "num_input_tokens_seen": 303324596, "step": 4838 }, { "epoch": 16.09983361064892, "loss": 0.18182438611984253, "loss_ce": 6.516185635518923e-07, "loss_iou": 0.060546875, "loss_num": 0.01214599609375, "loss_xval": 0.181640625, "num_input_tokens_seen": 303324596, "step": 4838 }, { "epoch": 16.10316139767055, "grad_norm": 19.080562591552734, "learning_rate": 5e-06, "loss": 0.4272, "num_input_tokens_seen": 303387780, "step": 4839 }, { "epoch": 16.10316139767055, "loss": 0.4360779821872711, "loss_ce": 4.282082954887301e-05, "loss_iou": 0.1435546875, "loss_num": 0.0296630859375, "loss_xval": 0.435546875, "num_input_tokens_seen": 303387780, "step": 4839 }, { "epoch": 16.10648918469218, "grad_norm": 17.77251625061035, "learning_rate": 5e-06, "loss": 0.3136, "num_input_tokens_seen": 303450520, "step": 4840 }, { "epoch": 16.10648918469218, "loss": 0.3346882462501526, "loss_ce": 1.953280161615112e-06, "loss_iou": 0.142578125, "loss_num": 0.01007080078125, "loss_xval": 0.333984375, "num_input_tokens_seen": 303450520, "step": 4840 }, { "epoch": 16.109816971713812, "grad_norm": 14.779105186462402, "learning_rate": 5e-06, "loss": 0.4901, "num_input_tokens_seen": 303513216, "step": 4841 }, { "epoch": 16.109816971713812, "loss": 0.4610653221607208, "loss_ce": 5.7432835092186e-06, "loss_iou": 0.173828125, "loss_num": 0.022705078125, "loss_xval": 0.4609375, "num_input_tokens_seen": 303513216, "step": 4841 }, { "epoch": 16.11314475873544, "grad_norm": 16.365137100219727, "learning_rate": 5e-06, "loss": 0.2172, "num_input_tokens_seen": 303574464, "step": 4842 }, { "epoch": 16.11314475873544, "loss": 0.23443886637687683, "loss_ce": 2.8228253086126642e-06, "loss_iou": 0.0859375, "loss_num": 0.0125732421875, "loss_xval": 0.234375, "num_input_tokens_seen": 303574464, "step": 4842 }, { "epoch": 16.116472545757073, "grad_norm": 10.045598983764648, "learning_rate": 5e-06, "loss": 0.6362, "num_input_tokens_seen": 303638456, "step": 4843 }, { "epoch": 16.116472545757073, "loss": 0.608643651008606, "loss_ce": 1.1237096941840719e-06, "loss_iou": 0.240234375, "loss_num": 0.025390625, "loss_xval": 0.609375, "num_input_tokens_seen": 303638456, "step": 4843 }, { "epoch": 16.119800332778702, "grad_norm": 12.887083053588867, "learning_rate": 5e-06, "loss": 0.5243, "num_input_tokens_seen": 303701708, "step": 4844 }, { "epoch": 16.119800332778702, "loss": 0.5805249214172363, "loss_ce": 1.9519318811944686e-05, "loss_iou": 0.2470703125, "loss_num": 0.01708984375, "loss_xval": 0.58203125, "num_input_tokens_seen": 303701708, "step": 4844 }, { "epoch": 16.123128119800334, "grad_norm": 7.2284255027771, "learning_rate": 5e-06, "loss": 0.4053, "num_input_tokens_seen": 303763332, "step": 4845 }, { "epoch": 16.123128119800334, "loss": 0.41949576139450073, "loss_ce": 1.1035701845685253e-06, "loss_iou": 0.1728515625, "loss_num": 0.0147705078125, "loss_xval": 0.419921875, "num_input_tokens_seen": 303763332, "step": 4845 }, { "epoch": 16.126455906821963, "grad_norm": 3.993077516555786, "learning_rate": 5e-06, "loss": 0.3912, "num_input_tokens_seen": 303825548, "step": 4846 }, { "epoch": 16.126455906821963, "loss": 0.33309727907180786, "loss_ce": 2.8439108064048924e-05, "loss_iou": 0.0908203125, "loss_num": 0.0303955078125, "loss_xval": 0.333984375, "num_input_tokens_seen": 303825548, "step": 4846 }, { "epoch": 16.129783693843596, "grad_norm": 8.444158554077148, "learning_rate": 5e-06, "loss": 0.3984, "num_input_tokens_seen": 303887472, "step": 4847 }, { "epoch": 16.129783693843596, "loss": 0.33978432416915894, "loss_ce": 1.5819991858734284e-06, "loss_iou": 0.11376953125, "loss_num": 0.0225830078125, "loss_xval": 0.33984375, "num_input_tokens_seen": 303887472, "step": 4847 }, { "epoch": 16.133111480865225, "grad_norm": 7.106865406036377, "learning_rate": 5e-06, "loss": 0.3177, "num_input_tokens_seen": 303951280, "step": 4848 }, { "epoch": 16.133111480865225, "loss": 0.41374582052230835, "loss_ce": 3.758098046091618e-06, "loss_iou": 0.1474609375, "loss_num": 0.0238037109375, "loss_xval": 0.4140625, "num_input_tokens_seen": 303951280, "step": 4848 }, { "epoch": 16.136439267886857, "grad_norm": 9.42135238647461, "learning_rate": 5e-06, "loss": 0.4286, "num_input_tokens_seen": 304015092, "step": 4849 }, { "epoch": 16.136439267886857, "loss": 0.5943233966827393, "loss_ce": 8.806272489891853e-06, "loss_iou": 0.2470703125, "loss_num": 0.0201416015625, "loss_xval": 0.59375, "num_input_tokens_seen": 304015092, "step": 4849 }, { "epoch": 16.139767054908486, "grad_norm": 10.157776832580566, "learning_rate": 5e-06, "loss": 0.3742, "num_input_tokens_seen": 304077720, "step": 4850 }, { "epoch": 16.139767054908486, "loss": 0.2648923397064209, "loss_ce": 0.0009153068531304598, "loss_iou": 0.1103515625, "loss_num": 0.0086669921875, "loss_xval": 0.263671875, "num_input_tokens_seen": 304077720, "step": 4850 }, { "epoch": 16.143094841930118, "grad_norm": 17.328977584838867, "learning_rate": 5e-06, "loss": 0.3862, "num_input_tokens_seen": 304140140, "step": 4851 }, { "epoch": 16.143094841930118, "loss": 0.33593815565109253, "loss_ce": 6.578991360584041e-07, "loss_iou": 0.099609375, "loss_num": 0.02734375, "loss_xval": 0.3359375, "num_input_tokens_seen": 304140140, "step": 4851 }, { "epoch": 16.146422628951747, "grad_norm": 25.313716888427734, "learning_rate": 5e-06, "loss": 0.38, "num_input_tokens_seen": 304203324, "step": 4852 }, { "epoch": 16.146422628951747, "loss": 0.3386386036872864, "loss_ce": 1.5546158465440385e-05, "loss_iou": 0.1474609375, "loss_num": 0.0089111328125, "loss_xval": 0.337890625, "num_input_tokens_seen": 304203324, "step": 4852 }, { "epoch": 16.14975041597338, "grad_norm": 7.924054145812988, "learning_rate": 5e-06, "loss": 0.4024, "num_input_tokens_seen": 304266552, "step": 4853 }, { "epoch": 16.14975041597338, "loss": 0.2880919575691223, "loss_ce": 6.030526037648087e-06, "loss_iou": 0.09130859375, "loss_num": 0.0211181640625, "loss_xval": 0.2890625, "num_input_tokens_seen": 304266552, "step": 4853 }, { "epoch": 16.153078202995008, "grad_norm": 27.84309959411621, "learning_rate": 5e-06, "loss": 0.5477, "num_input_tokens_seen": 304331280, "step": 4854 }, { "epoch": 16.153078202995008, "loss": 0.22311542928218842, "loss_ce": 1.412610117768054e-06, "loss_iou": 0.0830078125, "loss_num": 0.011474609375, "loss_xval": 0.22265625, "num_input_tokens_seen": 304331280, "step": 4854 }, { "epoch": 16.15640599001664, "grad_norm": 12.021773338317871, "learning_rate": 5e-06, "loss": 0.3119, "num_input_tokens_seen": 304390548, "step": 4855 }, { "epoch": 16.15640599001664, "loss": 0.3418014645576477, "loss_ce": 4.5868528104620054e-06, "loss_iou": 0.115234375, "loss_num": 0.022216796875, "loss_xval": 0.341796875, "num_input_tokens_seen": 304390548, "step": 4855 }, { "epoch": 16.15973377703827, "grad_norm": 11.2310152053833, "learning_rate": 5e-06, "loss": 0.3512, "num_input_tokens_seen": 304453492, "step": 4856 }, { "epoch": 16.15973377703827, "loss": 0.26310038566589355, "loss_ce": 7.293854196177563e-07, "loss_iou": 0.083984375, "loss_num": 0.0189208984375, "loss_xval": 0.263671875, "num_input_tokens_seen": 304453492, "step": 4856 }, { "epoch": 16.163061564059902, "grad_norm": 9.42270565032959, "learning_rate": 5e-06, "loss": 0.5779, "num_input_tokens_seen": 304515768, "step": 4857 }, { "epoch": 16.163061564059902, "loss": 0.3105275332927704, "loss_ce": 1.1164208444824908e-05, "loss_iou": 0.12890625, "loss_num": 0.0103759765625, "loss_xval": 0.310546875, "num_input_tokens_seen": 304515768, "step": 4857 }, { "epoch": 16.16638935108153, "grad_norm": 13.496119499206543, "learning_rate": 5e-06, "loss": 0.4743, "num_input_tokens_seen": 304578356, "step": 4858 }, { "epoch": 16.16638935108153, "loss": 0.43860238790512085, "loss_ce": 3.772623813347309e-06, "loss_iou": 0.185546875, "loss_num": 0.0133056640625, "loss_xval": 0.439453125, "num_input_tokens_seen": 304578356, "step": 4858 }, { "epoch": 16.169717138103163, "grad_norm": 25.716384887695312, "learning_rate": 5e-06, "loss": 0.4644, "num_input_tokens_seen": 304642500, "step": 4859 }, { "epoch": 16.169717138103163, "loss": 0.30579447746276855, "loss_ce": 8.367518603336066e-06, "loss_iou": 0.109375, "loss_num": 0.017333984375, "loss_xval": 0.306640625, "num_input_tokens_seen": 304642500, "step": 4859 }, { "epoch": 16.173044925124792, "grad_norm": 29.028871536254883, "learning_rate": 5e-06, "loss": 0.4735, "num_input_tokens_seen": 304705552, "step": 4860 }, { "epoch": 16.173044925124792, "loss": 0.5444364547729492, "loss_ce": 2.8287545319471974e-06, "loss_iou": 0.22265625, "loss_num": 0.019775390625, "loss_xval": 0.54296875, "num_input_tokens_seen": 304705552, "step": 4860 }, { "epoch": 16.176372712146424, "grad_norm": 17.373964309692383, "learning_rate": 5e-06, "loss": 0.4945, "num_input_tokens_seen": 304768748, "step": 4861 }, { "epoch": 16.176372712146424, "loss": 0.46118319034576416, "loss_ce": 1.538376409371267e-06, "loss_iou": 0.1875, "loss_num": 0.0169677734375, "loss_xval": 0.4609375, "num_input_tokens_seen": 304768748, "step": 4861 }, { "epoch": 16.179700499168053, "grad_norm": 12.395729064941406, "learning_rate": 5e-06, "loss": 0.4889, "num_input_tokens_seen": 304832952, "step": 4862 }, { "epoch": 16.179700499168053, "loss": 0.27212733030319214, "loss_ce": 2.095394847856369e-06, "loss_iou": 0.1162109375, "loss_num": 0.00787353515625, "loss_xval": 0.271484375, "num_input_tokens_seen": 304832952, "step": 4862 }, { "epoch": 16.183028286189685, "grad_norm": 19.713157653808594, "learning_rate": 5e-06, "loss": 0.5572, "num_input_tokens_seen": 304895532, "step": 4863 }, { "epoch": 16.183028286189685, "loss": 0.7522943019866943, "loss_ce": 0.0023400457575917244, "loss_iou": 0.265625, "loss_num": 0.04345703125, "loss_xval": 0.75, "num_input_tokens_seen": 304895532, "step": 4863 }, { "epoch": 16.186356073211314, "grad_norm": 18.055173873901367, "learning_rate": 5e-06, "loss": 0.3367, "num_input_tokens_seen": 304957884, "step": 4864 }, { "epoch": 16.186356073211314, "loss": 0.36585232615470886, "loss_ce": 7.583828846691176e-06, "loss_iou": 0.140625, "loss_num": 0.0169677734375, "loss_xval": 0.365234375, "num_input_tokens_seen": 304957884, "step": 4864 }, { "epoch": 16.189683860232947, "grad_norm": 10.873960494995117, "learning_rate": 5e-06, "loss": 0.6391, "num_input_tokens_seen": 305022152, "step": 4865 }, { "epoch": 16.189683860232947, "loss": 0.6864206790924072, "loss_ce": 1.9348677597008646e-05, "loss_iou": 0.265625, "loss_num": 0.03125, "loss_xval": 0.6875, "num_input_tokens_seen": 305022152, "step": 4865 }, { "epoch": 16.193011647254576, "grad_norm": 22.791175842285156, "learning_rate": 5e-06, "loss": 0.5506, "num_input_tokens_seen": 305084124, "step": 4866 }, { "epoch": 16.193011647254576, "loss": 0.30334559082984924, "loss_ce": 0.00012294482439756393, "loss_iou": 0.0869140625, "loss_num": 0.0257568359375, "loss_xval": 0.302734375, "num_input_tokens_seen": 305084124, "step": 4866 }, { "epoch": 16.196339434276208, "grad_norm": 10.133084297180176, "learning_rate": 5e-06, "loss": 0.3925, "num_input_tokens_seen": 305145228, "step": 4867 }, { "epoch": 16.196339434276208, "loss": 0.39616522192955017, "loss_ce": 4.7053268644958735e-05, "loss_iou": 0.1591796875, "loss_num": 0.015869140625, "loss_xval": 0.396484375, "num_input_tokens_seen": 305145228, "step": 4867 }, { "epoch": 16.199667221297837, "grad_norm": 8.101190567016602, "learning_rate": 5e-06, "loss": 0.355, "num_input_tokens_seen": 305207296, "step": 4868 }, { "epoch": 16.199667221297837, "loss": 0.29192787408828735, "loss_ce": 4.249732955940999e-05, "loss_iou": 0.1005859375, "loss_num": 0.0181884765625, "loss_xval": 0.291015625, "num_input_tokens_seen": 305207296, "step": 4868 }, { "epoch": 16.20299500831947, "grad_norm": 6.911988258361816, "learning_rate": 5e-06, "loss": 0.4078, "num_input_tokens_seen": 305270800, "step": 4869 }, { "epoch": 16.20299500831947, "loss": 0.5087323188781738, "loss_ce": 4.321118467487395e-06, "loss_iou": 0.181640625, "loss_num": 0.029052734375, "loss_xval": 0.5078125, "num_input_tokens_seen": 305270800, "step": 4869 }, { "epoch": 16.206322795341098, "grad_norm": 8.452603340148926, "learning_rate": 5e-06, "loss": 0.5145, "num_input_tokens_seen": 305334072, "step": 4870 }, { "epoch": 16.206322795341098, "loss": 0.38513338565826416, "loss_ce": 1.5612561128364177e-06, "loss_iou": 0.15625, "loss_num": 0.0145263671875, "loss_xval": 0.384765625, "num_input_tokens_seen": 305334072, "step": 4870 }, { "epoch": 16.20965058236273, "grad_norm": 17.144786834716797, "learning_rate": 5e-06, "loss": 0.396, "num_input_tokens_seen": 305396052, "step": 4871 }, { "epoch": 16.20965058236273, "loss": 0.5034955739974976, "loss_ce": 0.00019965390674769878, "loss_iou": 0.1787109375, "loss_num": 0.0294189453125, "loss_xval": 0.50390625, "num_input_tokens_seen": 305396052, "step": 4871 }, { "epoch": 16.21297836938436, "grad_norm": 24.434106826782227, "learning_rate": 5e-06, "loss": 0.4108, "num_input_tokens_seen": 305458288, "step": 4872 }, { "epoch": 16.21297836938436, "loss": 0.4148821532726288, "loss_ce": 1.0919718988589011e-05, "loss_iou": 0.1220703125, "loss_num": 0.0341796875, "loss_xval": 0.4140625, "num_input_tokens_seen": 305458288, "step": 4872 }, { "epoch": 16.21630615640599, "grad_norm": 8.116002082824707, "learning_rate": 5e-06, "loss": 0.3417, "num_input_tokens_seen": 305520968, "step": 4873 }, { "epoch": 16.21630615640599, "loss": 0.3354037404060364, "loss_ce": 1.5570709365420043e-05, "loss_iou": 0.11962890625, "loss_num": 0.0191650390625, "loss_xval": 0.3359375, "num_input_tokens_seen": 305520968, "step": 4873 }, { "epoch": 16.21963394342762, "grad_norm": 13.308221817016602, "learning_rate": 5e-06, "loss": 0.5018, "num_input_tokens_seen": 305584860, "step": 4874 }, { "epoch": 16.21963394342762, "loss": 0.4989635944366455, "loss_ce": 1.1758359050872969e-06, "loss_iou": 0.205078125, "loss_num": 0.017822265625, "loss_xval": 0.498046875, "num_input_tokens_seen": 305584860, "step": 4874 }, { "epoch": 16.222961730449253, "grad_norm": 8.270187377929688, "learning_rate": 5e-06, "loss": 0.4799, "num_input_tokens_seen": 305647488, "step": 4875 }, { "epoch": 16.222961730449253, "loss": 0.7540961503982544, "loss_ce": 6.8088788793829735e-06, "loss_iou": 0.318359375, "loss_num": 0.023681640625, "loss_xval": 0.75390625, "num_input_tokens_seen": 305647488, "step": 4875 }, { "epoch": 16.22628951747088, "grad_norm": 14.966150283813477, "learning_rate": 5e-06, "loss": 0.3806, "num_input_tokens_seen": 305710928, "step": 4876 }, { "epoch": 16.22628951747088, "loss": 0.19610771536827087, "loss_ce": 1.749798343553266e-06, "loss_iou": 0.08056640625, "loss_num": 0.0069580078125, "loss_xval": 0.1962890625, "num_input_tokens_seen": 305710928, "step": 4876 }, { "epoch": 16.229617304492514, "grad_norm": 24.01275634765625, "learning_rate": 5e-06, "loss": 0.3611, "num_input_tokens_seen": 305772992, "step": 4877 }, { "epoch": 16.229617304492514, "loss": 0.45277678966522217, "loss_ce": 1.8004180674324743e-05, "loss_iou": 0.162109375, "loss_num": 0.025634765625, "loss_xval": 0.453125, "num_input_tokens_seen": 305772992, "step": 4877 }, { "epoch": 16.232945091514143, "grad_norm": 33.316532135009766, "learning_rate": 5e-06, "loss": 0.5577, "num_input_tokens_seen": 305834644, "step": 4878 }, { "epoch": 16.232945091514143, "loss": 0.42297399044036865, "loss_ce": 3.790529206071369e-07, "loss_iou": 0.1708984375, "loss_num": 0.016357421875, "loss_xval": 0.423828125, "num_input_tokens_seen": 305834644, "step": 4878 }, { "epoch": 16.236272878535775, "grad_norm": 24.25692367553711, "learning_rate": 5e-06, "loss": 0.4123, "num_input_tokens_seen": 305897532, "step": 4879 }, { "epoch": 16.236272878535775, "loss": 0.28772902488708496, "loss_ce": 9.286228305427358e-06, "loss_iou": 0.0966796875, "loss_num": 0.018798828125, "loss_xval": 0.287109375, "num_input_tokens_seen": 305897532, "step": 4879 }, { "epoch": 16.239600665557404, "grad_norm": 23.29959487915039, "learning_rate": 5e-06, "loss": 0.4618, "num_input_tokens_seen": 305960420, "step": 4880 }, { "epoch": 16.239600665557404, "loss": 0.47124648094177246, "loss_ce": 2.455654612276703e-05, "loss_iou": 0.177734375, "loss_num": 0.0233154296875, "loss_xval": 0.470703125, "num_input_tokens_seen": 305960420, "step": 4880 }, { "epoch": 16.242928452579037, "grad_norm": 17.10602569580078, "learning_rate": 5e-06, "loss": 0.4798, "num_input_tokens_seen": 306022696, "step": 4881 }, { "epoch": 16.242928452579037, "loss": 0.43232861161231995, "loss_ce": 1.657123357290402e-05, "loss_iou": 0.16015625, "loss_num": 0.0223388671875, "loss_xval": 0.431640625, "num_input_tokens_seen": 306022696, "step": 4881 }, { "epoch": 16.246256239600665, "grad_norm": 16.64977264404297, "learning_rate": 5e-06, "loss": 0.5109, "num_input_tokens_seen": 306086304, "step": 4882 }, { "epoch": 16.246256239600665, "loss": 0.2727978825569153, "loss_ce": 1.2626492207346018e-06, "loss_iou": 0.11572265625, "loss_num": 0.00823974609375, "loss_xval": 0.2734375, "num_input_tokens_seen": 306086304, "step": 4882 }, { "epoch": 16.249584026622298, "grad_norm": 26.09014129638672, "learning_rate": 5e-06, "loss": 0.3814, "num_input_tokens_seen": 306148700, "step": 4883 }, { "epoch": 16.249584026622298, "loss": 0.5097953677177429, "loss_ce": 2.9724928026553243e-05, "loss_iou": 0.203125, "loss_num": 0.0208740234375, "loss_xval": 0.5078125, "num_input_tokens_seen": 306148700, "step": 4883 }, { "epoch": 16.252911813643927, "grad_norm": 17.777420043945312, "learning_rate": 5e-06, "loss": 0.4744, "num_input_tokens_seen": 306211888, "step": 4884 }, { "epoch": 16.252911813643927, "loss": 0.5439973473548889, "loss_ce": 0.00011305816587992013, "loss_iou": 0.2021484375, "loss_num": 0.028076171875, "loss_xval": 0.54296875, "num_input_tokens_seen": 306211888, "step": 4884 }, { "epoch": 16.25623960066556, "grad_norm": 13.379783630371094, "learning_rate": 5e-06, "loss": 0.2503, "num_input_tokens_seen": 306272220, "step": 4885 }, { "epoch": 16.25623960066556, "loss": 0.18719631433486938, "loss_ce": 1.4877690546200029e-06, "loss_iou": 0.0240478515625, "loss_num": 0.02783203125, "loss_xval": 0.1875, "num_input_tokens_seen": 306272220, "step": 4885 }, { "epoch": 16.259567387687188, "grad_norm": 9.123466491699219, "learning_rate": 5e-06, "loss": 0.4433, "num_input_tokens_seen": 306335624, "step": 4886 }, { "epoch": 16.259567387687188, "loss": 0.5020350217819214, "loss_ce": 2.0861059965682216e-05, "loss_iou": 0.1943359375, "loss_num": 0.0225830078125, "loss_xval": 0.50390625, "num_input_tokens_seen": 306335624, "step": 4886 }, { "epoch": 16.26289517470882, "grad_norm": 15.70402717590332, "learning_rate": 5e-06, "loss": 0.437, "num_input_tokens_seen": 306398196, "step": 4887 }, { "epoch": 16.26289517470882, "loss": 0.4726576805114746, "loss_ce": 6.244487303774804e-05, "loss_iou": 0.1845703125, "loss_num": 0.0206298828125, "loss_xval": 0.47265625, "num_input_tokens_seen": 306398196, "step": 4887 }, { "epoch": 16.26622296173045, "grad_norm": 20.089038848876953, "learning_rate": 5e-06, "loss": 0.4619, "num_input_tokens_seen": 306461696, "step": 4888 }, { "epoch": 16.26622296173045, "loss": 0.6386775374412537, "loss_ce": 5.664374384650728e-06, "loss_iou": 0.271484375, "loss_num": 0.0194091796875, "loss_xval": 0.640625, "num_input_tokens_seen": 306461696, "step": 4888 }, { "epoch": 16.26955074875208, "grad_norm": 19.234546661376953, "learning_rate": 5e-06, "loss": 0.4306, "num_input_tokens_seen": 306524124, "step": 4889 }, { "epoch": 16.26955074875208, "loss": 0.4967082440853119, "loss_ce": 4.136863481107866e-06, "loss_iou": 0.1796875, "loss_num": 0.0272216796875, "loss_xval": 0.49609375, "num_input_tokens_seen": 306524124, "step": 4889 }, { "epoch": 16.27287853577371, "grad_norm": 18.52012062072754, "learning_rate": 5e-06, "loss": 0.5242, "num_input_tokens_seen": 306587660, "step": 4890 }, { "epoch": 16.27287853577371, "loss": 0.37415599822998047, "loss_ce": 7.151266618166119e-05, "loss_iou": 0.1640625, "loss_num": 0.00927734375, "loss_xval": 0.375, "num_input_tokens_seen": 306587660, "step": 4890 }, { "epoch": 16.276206322795343, "grad_norm": 13.580092430114746, "learning_rate": 5e-06, "loss": 0.3873, "num_input_tokens_seen": 306651156, "step": 4891 }, { "epoch": 16.276206322795343, "loss": 0.5803594589233398, "loss_ce": 3.713154364959337e-05, "loss_iou": 0.2490234375, "loss_num": 0.0166015625, "loss_xval": 0.58203125, "num_input_tokens_seen": 306651156, "step": 4891 }, { "epoch": 16.27953410981697, "grad_norm": 14.301302909851074, "learning_rate": 5e-06, "loss": 0.5579, "num_input_tokens_seen": 306714212, "step": 4892 }, { "epoch": 16.27953410981697, "loss": 0.5685683488845825, "loss_ce": 0.0003005475737154484, "loss_iou": 0.2216796875, "loss_num": 0.025146484375, "loss_xval": 0.56640625, "num_input_tokens_seen": 306714212, "step": 4892 }, { "epoch": 16.282861896838604, "grad_norm": 15.95986557006836, "learning_rate": 5e-06, "loss": 0.2841, "num_input_tokens_seen": 306776456, "step": 4893 }, { "epoch": 16.282861896838604, "loss": 0.24560591578483582, "loss_ce": 4.449675543582998e-07, "loss_iou": 0.058837890625, "loss_num": 0.0255126953125, "loss_xval": 0.24609375, "num_input_tokens_seen": 306776456, "step": 4893 }, { "epoch": 16.286189683860233, "grad_norm": 26.135953903198242, "learning_rate": 5e-06, "loss": 0.3075, "num_input_tokens_seen": 306838484, "step": 4894 }, { "epoch": 16.286189683860233, "loss": 0.21160957217216492, "loss_ce": 6.727460686306586e-07, "loss_iou": 0.07421875, "loss_num": 0.01263427734375, "loss_xval": 0.2119140625, "num_input_tokens_seen": 306838484, "step": 4894 }, { "epoch": 16.289517470881865, "grad_norm": 15.105377197265625, "learning_rate": 5e-06, "loss": 0.3567, "num_input_tokens_seen": 306899876, "step": 4895 }, { "epoch": 16.289517470881865, "loss": 0.31976422667503357, "loss_ce": 1.0447831755300285e-06, "loss_iou": 0.10498046875, "loss_num": 0.02197265625, "loss_xval": 0.3203125, "num_input_tokens_seen": 306899876, "step": 4895 }, { "epoch": 16.292845257903494, "grad_norm": 10.212084770202637, "learning_rate": 5e-06, "loss": 0.4677, "num_input_tokens_seen": 306963296, "step": 4896 }, { "epoch": 16.292845257903494, "loss": 0.5847079157829285, "loss_ce": 0.0001284279569517821, "loss_iou": 0.232421875, "loss_num": 0.02392578125, "loss_xval": 0.5859375, "num_input_tokens_seen": 306963296, "step": 4896 }, { "epoch": 16.296173044925126, "grad_norm": 9.551775932312012, "learning_rate": 5e-06, "loss": 0.3126, "num_input_tokens_seen": 307026072, "step": 4897 }, { "epoch": 16.296173044925126, "loss": 0.3960510492324829, "loss_ce": 0.00014647850184701383, "loss_iou": 0.1650390625, "loss_num": 0.01312255859375, "loss_xval": 0.396484375, "num_input_tokens_seen": 307026072, "step": 4897 }, { "epoch": 16.299500831946755, "grad_norm": 10.33194637298584, "learning_rate": 5e-06, "loss": 0.3973, "num_input_tokens_seen": 307088496, "step": 4898 }, { "epoch": 16.299500831946755, "loss": 0.2841193675994873, "loss_ce": 7.190412816271419e-07, "loss_iou": 0.11865234375, "loss_num": 0.0093994140625, "loss_xval": 0.283203125, "num_input_tokens_seen": 307088496, "step": 4898 }, { "epoch": 16.302828618968388, "grad_norm": 8.78176498413086, "learning_rate": 5e-06, "loss": 0.1947, "num_input_tokens_seen": 307148896, "step": 4899 }, { "epoch": 16.302828618968388, "loss": 0.26458966732025146, "loss_ce": 2.281011120430776e-06, "loss_iou": 0.09912109375, "loss_num": 0.0133056640625, "loss_xval": 0.263671875, "num_input_tokens_seen": 307148896, "step": 4899 }, { "epoch": 16.306156405990016, "grad_norm": 9.728446006774902, "learning_rate": 5e-06, "loss": 0.3861, "num_input_tokens_seen": 307211248, "step": 4900 }, { "epoch": 16.306156405990016, "loss": 0.4023445248603821, "loss_ce": 7.851533609937178e-07, "loss_iou": 0.1357421875, "loss_num": 0.0262451171875, "loss_xval": 0.40234375, "num_input_tokens_seen": 307211248, "step": 4900 }, { "epoch": 16.30948419301165, "grad_norm": 10.937362670898438, "learning_rate": 5e-06, "loss": 0.3666, "num_input_tokens_seen": 307274960, "step": 4901 }, { "epoch": 16.30948419301165, "loss": 0.20538443326950073, "loss_ce": 1.1219755151614663e-06, "loss_iou": 0.0849609375, "loss_num": 0.007171630859375, "loss_xval": 0.205078125, "num_input_tokens_seen": 307274960, "step": 4901 }, { "epoch": 16.312811980033278, "grad_norm": 13.141266822814941, "learning_rate": 5e-06, "loss": 0.4952, "num_input_tokens_seen": 307337356, "step": 4902 }, { "epoch": 16.312811980033278, "loss": 0.44073569774627686, "loss_ce": 8.159510116456659e-07, "loss_iou": 0.1787109375, "loss_num": 0.016845703125, "loss_xval": 0.44140625, "num_input_tokens_seen": 307337356, "step": 4902 }, { "epoch": 16.31613976705491, "grad_norm": 9.893586158752441, "learning_rate": 5e-06, "loss": 0.5549, "num_input_tokens_seen": 307400792, "step": 4903 }, { "epoch": 16.31613976705491, "loss": 0.6345223784446716, "loss_ce": 0.00012292650353629142, "loss_iou": 0.265625, "loss_num": 0.020751953125, "loss_xval": 0.6328125, "num_input_tokens_seen": 307400792, "step": 4903 }, { "epoch": 16.31946755407654, "grad_norm": 11.358869552612305, "learning_rate": 5e-06, "loss": 0.4304, "num_input_tokens_seen": 307462884, "step": 4904 }, { "epoch": 16.31946755407654, "loss": 0.4456911087036133, "loss_ce": 1.239144603459863e-05, "loss_iou": 0.1455078125, "loss_num": 0.0308837890625, "loss_xval": 0.4453125, "num_input_tokens_seen": 307462884, "step": 4904 }, { "epoch": 16.32279534109817, "grad_norm": 10.938610076904297, "learning_rate": 5e-06, "loss": 0.5991, "num_input_tokens_seen": 307525212, "step": 4905 }, { "epoch": 16.32279534109817, "loss": 0.8769686222076416, "loss_ce": 1.5497882486670278e-05, "loss_iou": 0.3671875, "loss_num": 0.0283203125, "loss_xval": 0.875, "num_input_tokens_seen": 307525212, "step": 4905 }, { "epoch": 16.3261231281198, "grad_norm": 10.622302055358887, "learning_rate": 5e-06, "loss": 0.4543, "num_input_tokens_seen": 307586832, "step": 4906 }, { "epoch": 16.3261231281198, "loss": 0.5378426313400269, "loss_ce": 7.840746434339962e-07, "loss_iou": 0.2099609375, "loss_num": 0.023681640625, "loss_xval": 0.5390625, "num_input_tokens_seen": 307586832, "step": 4906 }, { "epoch": 16.329450915141432, "grad_norm": 6.566868305206299, "learning_rate": 5e-06, "loss": 0.2329, "num_input_tokens_seen": 307648572, "step": 4907 }, { "epoch": 16.329450915141432, "loss": 0.3143320679664612, "loss_ce": 9.840609891398344e-07, "loss_iou": 0.10498046875, "loss_num": 0.0208740234375, "loss_xval": 0.314453125, "num_input_tokens_seen": 307648572, "step": 4907 }, { "epoch": 16.33277870216306, "grad_norm": 9.611928939819336, "learning_rate": 5e-06, "loss": 0.5551, "num_input_tokens_seen": 307712068, "step": 4908 }, { "epoch": 16.33277870216306, "loss": 0.5305556058883667, "loss_ce": 3.797787576331757e-05, "loss_iou": 0.19921875, "loss_num": 0.026611328125, "loss_xval": 0.53125, "num_input_tokens_seen": 307712068, "step": 4908 }, { "epoch": 16.336106489184694, "grad_norm": 9.073822021484375, "learning_rate": 5e-06, "loss": 0.2886, "num_input_tokens_seen": 307774900, "step": 4909 }, { "epoch": 16.336106489184694, "loss": 0.16954058408737183, "loss_ce": 1.8994467154698214e-07, "loss_iou": 0.060546875, "loss_num": 0.00970458984375, "loss_xval": 0.169921875, "num_input_tokens_seen": 307774900, "step": 4909 }, { "epoch": 16.339434276206322, "grad_norm": 5.28695011138916, "learning_rate": 5e-06, "loss": 0.3597, "num_input_tokens_seen": 307837740, "step": 4910 }, { "epoch": 16.339434276206322, "loss": 0.2454839050769806, "loss_ce": 5.026973894928233e-07, "loss_iou": 0.0888671875, "loss_num": 0.01361083984375, "loss_xval": 0.2451171875, "num_input_tokens_seen": 307837740, "step": 4910 }, { "epoch": 16.342762063227955, "grad_norm": 14.765652656555176, "learning_rate": 5e-06, "loss": 0.5319, "num_input_tokens_seen": 307900276, "step": 4911 }, { "epoch": 16.342762063227955, "loss": 0.5173636674880981, "loss_ce": 6.01724095758982e-05, "loss_iou": 0.197265625, "loss_num": 0.0242919921875, "loss_xval": 0.515625, "num_input_tokens_seen": 307900276, "step": 4911 }, { "epoch": 16.346089850249584, "grad_norm": 17.64401626586914, "learning_rate": 5e-06, "loss": 0.2578, "num_input_tokens_seen": 307962044, "step": 4912 }, { "epoch": 16.346089850249584, "loss": 0.2822299003601074, "loss_ce": 3.328410912217805e-06, "loss_iou": 0.111328125, "loss_num": 0.01202392578125, "loss_xval": 0.28125, "num_input_tokens_seen": 307962044, "step": 4912 }, { "epoch": 16.349417637271216, "grad_norm": 11.987199783325195, "learning_rate": 5e-06, "loss": 0.5219, "num_input_tokens_seen": 308021548, "step": 4913 }, { "epoch": 16.349417637271216, "loss": 0.3801892101764679, "loss_ce": 1.2110283478250494e-06, "loss_iou": 0.134765625, "loss_num": 0.02197265625, "loss_xval": 0.380859375, "num_input_tokens_seen": 308021548, "step": 4913 }, { "epoch": 16.352745424292845, "grad_norm": 6.4812846183776855, "learning_rate": 5e-06, "loss": 0.4421, "num_input_tokens_seen": 308084856, "step": 4914 }, { "epoch": 16.352745424292845, "loss": 0.5305217504501343, "loss_ce": 4.167085080553079e-06, "loss_iou": 0.2119140625, "loss_num": 0.0211181640625, "loss_xval": 0.53125, "num_input_tokens_seen": 308084856, "step": 4914 }, { "epoch": 16.356073211314477, "grad_norm": 15.234848022460938, "learning_rate": 5e-06, "loss": 0.4059, "num_input_tokens_seen": 308148064, "step": 4915 }, { "epoch": 16.356073211314477, "loss": 0.5068372488021851, "loss_ce": 1.2786423440047656e-06, "loss_iou": 0.212890625, "loss_num": 0.0162353515625, "loss_xval": 0.5078125, "num_input_tokens_seen": 308148064, "step": 4915 }, { "epoch": 16.359400998336106, "grad_norm": 17.03172492980957, "learning_rate": 5e-06, "loss": 0.5335, "num_input_tokens_seen": 308211024, "step": 4916 }, { "epoch": 16.359400998336106, "loss": 0.6744104623794556, "loss_ce": 2.4612120341771515e-06, "loss_iou": 0.248046875, "loss_num": 0.03564453125, "loss_xval": 0.67578125, "num_input_tokens_seen": 308211024, "step": 4916 }, { "epoch": 16.36272878535774, "grad_norm": 29.312807083129883, "learning_rate": 5e-06, "loss": 0.6679, "num_input_tokens_seen": 308273824, "step": 4917 }, { "epoch": 16.36272878535774, "loss": 0.8730499148368835, "loss_ce": 3.037328951904783e-06, "loss_iou": 0.30859375, "loss_num": 0.051513671875, "loss_xval": 0.875, "num_input_tokens_seen": 308273824, "step": 4917 }, { "epoch": 16.366056572379367, "grad_norm": 42.690406799316406, "learning_rate": 5e-06, "loss": 0.4844, "num_input_tokens_seen": 308337196, "step": 4918 }, { "epoch": 16.366056572379367, "loss": 0.462211012840271, "loss_ce": 5.280949699226767e-05, "loss_iou": 0.1904296875, "loss_num": 0.0162353515625, "loss_xval": 0.462890625, "num_input_tokens_seen": 308337196, "step": 4918 }, { "epoch": 16.369384359401, "grad_norm": 32.862449645996094, "learning_rate": 5e-06, "loss": 0.4552, "num_input_tokens_seen": 308400772, "step": 4919 }, { "epoch": 16.369384359401, "loss": 0.44054001569747925, "loss_ce": 3.4996039630641462e-06, "loss_iou": 0.162109375, "loss_num": 0.023193359375, "loss_xval": 0.44140625, "num_input_tokens_seen": 308400772, "step": 4919 }, { "epoch": 16.37271214642263, "grad_norm": 22.0275821685791, "learning_rate": 5e-06, "loss": 0.3123, "num_input_tokens_seen": 308463232, "step": 4920 }, { "epoch": 16.37271214642263, "loss": 0.28589069843292236, "loss_ce": 2.0459413008211413e-06, "loss_iou": 0.0986328125, "loss_num": 0.0177001953125, "loss_xval": 0.28515625, "num_input_tokens_seen": 308463232, "step": 4920 }, { "epoch": 16.37603993344426, "grad_norm": 24.504661560058594, "learning_rate": 5e-06, "loss": 0.3559, "num_input_tokens_seen": 308525952, "step": 4921 }, { "epoch": 16.37603993344426, "loss": 0.33511584997177124, "loss_ce": 2.3252789560501697e-06, "loss_iou": 0.1435546875, "loss_num": 0.00946044921875, "loss_xval": 0.3359375, "num_input_tokens_seen": 308525952, "step": 4921 }, { "epoch": 16.37936772046589, "grad_norm": 27.3111572265625, "learning_rate": 5e-06, "loss": 0.2956, "num_input_tokens_seen": 308588344, "step": 4922 }, { "epoch": 16.37936772046589, "loss": 0.34844252467155457, "loss_ce": 2.331726864213124e-05, "loss_iou": 0.119140625, "loss_num": 0.02197265625, "loss_xval": 0.34765625, "num_input_tokens_seen": 308588344, "step": 4922 }, { "epoch": 16.382695507487522, "grad_norm": 18.394134521484375, "learning_rate": 5e-06, "loss": 0.4746, "num_input_tokens_seen": 308650436, "step": 4923 }, { "epoch": 16.382695507487522, "loss": 0.4384009838104248, "loss_ce": 4.651583731174469e-05, "loss_iou": 0.16015625, "loss_num": 0.0234375, "loss_xval": 0.4375, "num_input_tokens_seen": 308650436, "step": 4923 }, { "epoch": 16.38602329450915, "grad_norm": 20.301830291748047, "learning_rate": 5e-06, "loss": 0.2498, "num_input_tokens_seen": 308710664, "step": 4924 }, { "epoch": 16.38602329450915, "loss": 0.253784716129303, "loss_ce": 5.251578159004566e-07, "loss_iou": 0.09375, "loss_num": 0.013427734375, "loss_xval": 0.25390625, "num_input_tokens_seen": 308710664, "step": 4924 }, { "epoch": 16.389351081530783, "grad_norm": 27.4018497467041, "learning_rate": 5e-06, "loss": 0.4493, "num_input_tokens_seen": 308772832, "step": 4925 }, { "epoch": 16.389351081530783, "loss": 0.4037785530090332, "loss_ce": 4.701940099494095e-07, "loss_iou": 0.15234375, "loss_num": 0.01953125, "loss_xval": 0.404296875, "num_input_tokens_seen": 308772832, "step": 4925 }, { "epoch": 16.392678868552412, "grad_norm": 16.764009475708008, "learning_rate": 5e-06, "loss": 0.2778, "num_input_tokens_seen": 308835872, "step": 4926 }, { "epoch": 16.392678868552412, "loss": 0.29823583364486694, "loss_ce": 1.8060400179820135e-05, "loss_iou": 0.1142578125, "loss_num": 0.0140380859375, "loss_xval": 0.298828125, "num_input_tokens_seen": 308835872, "step": 4926 }, { "epoch": 16.396006655574045, "grad_norm": 23.849868774414062, "learning_rate": 5e-06, "loss": 0.3058, "num_input_tokens_seen": 308897960, "step": 4927 }, { "epoch": 16.396006655574045, "loss": 0.32544225454330444, "loss_ce": 2.794754436763469e-06, "loss_iou": 0.12255859375, "loss_num": 0.0159912109375, "loss_xval": 0.326171875, "num_input_tokens_seen": 308897960, "step": 4927 }, { "epoch": 16.399334442595674, "grad_norm": 47.7640266418457, "learning_rate": 5e-06, "loss": 0.4758, "num_input_tokens_seen": 308961212, "step": 4928 }, { "epoch": 16.399334442595674, "loss": 0.4368082880973816, "loss_ce": 2.5444309358135797e-05, "loss_iou": 0.181640625, "loss_num": 0.01495361328125, "loss_xval": 0.4375, "num_input_tokens_seen": 308961212, "step": 4928 }, { "epoch": 16.402662229617306, "grad_norm": 37.054588317871094, "learning_rate": 5e-06, "loss": 0.3648, "num_input_tokens_seen": 309024604, "step": 4929 }, { "epoch": 16.402662229617306, "loss": 0.3173222541809082, "loss_ce": 5.03426861087064e-07, "loss_iou": 0.1298828125, "loss_num": 0.011474609375, "loss_xval": 0.31640625, "num_input_tokens_seen": 309024604, "step": 4929 }, { "epoch": 16.405990016638935, "grad_norm": 14.625770568847656, "learning_rate": 5e-06, "loss": 0.2842, "num_input_tokens_seen": 309086668, "step": 4930 }, { "epoch": 16.405990016638935, "loss": 0.18432694673538208, "loss_ce": 7.782286388646753e-07, "loss_iou": 0.052001953125, "loss_num": 0.0159912109375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 309086668, "step": 4930 }, { "epoch": 16.409317803660567, "grad_norm": 12.250617980957031, "learning_rate": 5e-06, "loss": 0.5959, "num_input_tokens_seen": 309149616, "step": 4931 }, { "epoch": 16.409317803660567, "loss": 0.7911394834518433, "loss_ce": 1.7684582189758657e-06, "loss_iou": 0.322265625, "loss_num": 0.0294189453125, "loss_xval": 0.79296875, "num_input_tokens_seen": 309149616, "step": 4931 }, { "epoch": 16.412645590682196, "grad_norm": 22.93888282775879, "learning_rate": 5e-06, "loss": 0.5525, "num_input_tokens_seen": 309213384, "step": 4932 }, { "epoch": 16.412645590682196, "loss": 0.6308876276016235, "loss_ce": 2.8249758543097414e-05, "loss_iou": 0.244140625, "loss_num": 0.0283203125, "loss_xval": 0.6328125, "num_input_tokens_seen": 309213384, "step": 4932 }, { "epoch": 16.41597337770383, "grad_norm": 11.591294288635254, "learning_rate": 5e-06, "loss": 0.5504, "num_input_tokens_seen": 309276268, "step": 4933 }, { "epoch": 16.41597337770383, "loss": 0.43976110219955444, "loss_ce": 9.434013190912083e-05, "loss_iou": 0.1708984375, "loss_num": 0.0194091796875, "loss_xval": 0.439453125, "num_input_tokens_seen": 309276268, "step": 4933 }, { "epoch": 16.419301164725457, "grad_norm": 7.211434364318848, "learning_rate": 5e-06, "loss": 0.4094, "num_input_tokens_seen": 309339276, "step": 4934 }, { "epoch": 16.419301164725457, "loss": 0.4802263379096985, "loss_ce": 1.7186832792503992e-06, "loss_iou": 0.1826171875, "loss_num": 0.0228271484375, "loss_xval": 0.48046875, "num_input_tokens_seen": 309339276, "step": 4934 }, { "epoch": 16.42262895174709, "grad_norm": 10.637443542480469, "learning_rate": 5e-06, "loss": 0.478, "num_input_tokens_seen": 309402664, "step": 4935 }, { "epoch": 16.42262895174709, "loss": 0.38129448890686035, "loss_ce": 7.85636711952975e-06, "loss_iou": 0.1611328125, "loss_num": 0.01165771484375, "loss_xval": 0.380859375, "num_input_tokens_seen": 309402664, "step": 4935 }, { "epoch": 16.42595673876872, "grad_norm": 11.823324203491211, "learning_rate": 5e-06, "loss": 0.3916, "num_input_tokens_seen": 309465220, "step": 4936 }, { "epoch": 16.42595673876872, "loss": 0.4942111670970917, "loss_ce": 0.0001925947581185028, "loss_iou": 0.1904296875, "loss_num": 0.0225830078125, "loss_xval": 0.494140625, "num_input_tokens_seen": 309465220, "step": 4936 }, { "epoch": 16.42928452579035, "grad_norm": 14.343124389648438, "learning_rate": 5e-06, "loss": 0.5094, "num_input_tokens_seen": 309527876, "step": 4937 }, { "epoch": 16.42928452579035, "loss": 0.46443480253219604, "loss_ce": 1.8276426999364048e-05, "loss_iou": 0.1982421875, "loss_num": 0.01373291015625, "loss_xval": 0.46484375, "num_input_tokens_seen": 309527876, "step": 4937 }, { "epoch": 16.43261231281198, "grad_norm": 9.0996675491333, "learning_rate": 5e-06, "loss": 0.4049, "num_input_tokens_seen": 309591316, "step": 4938 }, { "epoch": 16.43261231281198, "loss": 0.431171178817749, "loss_ce": 1.882840479083825e-05, "loss_iou": 0.1708984375, "loss_num": 0.017822265625, "loss_xval": 0.431640625, "num_input_tokens_seen": 309591316, "step": 4938 }, { "epoch": 16.435940099833612, "grad_norm": 6.325038909912109, "learning_rate": 5e-06, "loss": 0.372, "num_input_tokens_seen": 309653312, "step": 4939 }, { "epoch": 16.435940099833612, "loss": 0.385164737701416, "loss_ce": 2.3963350486155832e-06, "loss_iou": 0.1533203125, "loss_num": 0.015625, "loss_xval": 0.384765625, "num_input_tokens_seen": 309653312, "step": 4939 }, { "epoch": 16.43926788685524, "grad_norm": 10.093846321105957, "learning_rate": 5e-06, "loss": 0.3897, "num_input_tokens_seen": 309715524, "step": 4940 }, { "epoch": 16.43926788685524, "loss": 0.46825897693634033, "loss_ce": 0.00011933179484913126, "loss_iou": 0.150390625, "loss_num": 0.033447265625, "loss_xval": 0.46875, "num_input_tokens_seen": 309715524, "step": 4940 }, { "epoch": 16.442595673876873, "grad_norm": 6.695464134216309, "learning_rate": 5e-06, "loss": 0.3854, "num_input_tokens_seen": 309779048, "step": 4941 }, { "epoch": 16.442595673876873, "loss": 0.4695547819137573, "loss_ce": 1.1319741133775096e-05, "loss_iou": 0.1376953125, "loss_num": 0.038818359375, "loss_xval": 0.46875, "num_input_tokens_seen": 309779048, "step": 4941 }, { "epoch": 16.445923460898502, "grad_norm": 10.266615867614746, "learning_rate": 5e-06, "loss": 0.3111, "num_input_tokens_seen": 309841708, "step": 4942 }, { "epoch": 16.445923460898502, "loss": 0.2656271159648895, "loss_ce": 2.11994711207808e-06, "loss_iou": 0.0791015625, "loss_num": 0.021484375, "loss_xval": 0.265625, "num_input_tokens_seen": 309841708, "step": 4942 }, { "epoch": 16.449251247920134, "grad_norm": 11.741121292114258, "learning_rate": 5e-06, "loss": 0.4631, "num_input_tokens_seen": 309903384, "step": 4943 }, { "epoch": 16.449251247920134, "loss": 0.34558209776878357, "loss_ce": 1.0483091728019645e-06, "loss_iou": 0.1328125, "loss_num": 0.015869140625, "loss_xval": 0.345703125, "num_input_tokens_seen": 309903384, "step": 4943 }, { "epoch": 16.452579034941763, "grad_norm": 13.096162796020508, "learning_rate": 5e-06, "loss": 0.3147, "num_input_tokens_seen": 309966024, "step": 4944 }, { "epoch": 16.452579034941763, "loss": 0.11008091270923615, "loss_ce": 4.008365067420527e-06, "loss_iou": 0.028564453125, "loss_num": 0.0106201171875, "loss_xval": 0.10986328125, "num_input_tokens_seen": 309966024, "step": 4944 }, { "epoch": 16.455906821963396, "grad_norm": 6.71261739730835, "learning_rate": 5e-06, "loss": 0.3517, "num_input_tokens_seen": 310029428, "step": 4945 }, { "epoch": 16.455906821963396, "loss": 0.35623228549957275, "loss_ce": 5.725996743422002e-07, "loss_iou": 0.140625, "loss_num": 0.015380859375, "loss_xval": 0.35546875, "num_input_tokens_seen": 310029428, "step": 4945 }, { "epoch": 16.459234608985025, "grad_norm": 13.75836181640625, "learning_rate": 5e-06, "loss": 0.5117, "num_input_tokens_seen": 310092780, "step": 4946 }, { "epoch": 16.459234608985025, "loss": 0.46106043457984924, "loss_ce": 8.713469696886023e-07, "loss_iou": 0.173828125, "loss_num": 0.0228271484375, "loss_xval": 0.4609375, "num_input_tokens_seen": 310092780, "step": 4946 }, { "epoch": 16.462562396006657, "grad_norm": 11.103243827819824, "learning_rate": 5e-06, "loss": 0.4111, "num_input_tokens_seen": 310156008, "step": 4947 }, { "epoch": 16.462562396006657, "loss": 0.29390591382980347, "loss_ce": 2.162625787605066e-05, "loss_iou": 0.1220703125, "loss_num": 0.0098876953125, "loss_xval": 0.29296875, "num_input_tokens_seen": 310156008, "step": 4947 }, { "epoch": 16.465890183028286, "grad_norm": 7.927892684936523, "learning_rate": 5e-06, "loss": 0.2756, "num_input_tokens_seen": 310219268, "step": 4948 }, { "epoch": 16.465890183028286, "loss": 0.2543972432613373, "loss_ce": 2.697315949262702e-06, "loss_iou": 0.0869140625, "loss_num": 0.01611328125, "loss_xval": 0.25390625, "num_input_tokens_seen": 310219268, "step": 4948 }, { "epoch": 16.469217970049918, "grad_norm": 11.7318115234375, "learning_rate": 5e-06, "loss": 0.6599, "num_input_tokens_seen": 310283584, "step": 4949 }, { "epoch": 16.469217970049918, "loss": 0.6114543676376343, "loss_ce": 4.1370194594492204e-06, "loss_iou": 0.2314453125, "loss_num": 0.0299072265625, "loss_xval": 0.61328125, "num_input_tokens_seen": 310283584, "step": 4949 }, { "epoch": 16.472545757071547, "grad_norm": 22.844865798950195, "learning_rate": 5e-06, "loss": 0.5449, "num_input_tokens_seen": 310347836, "step": 4950 }, { "epoch": 16.472545757071547, "loss": 0.43733346462249756, "loss_ce": 1.655750020290725e-05, "loss_iou": 0.154296875, "loss_num": 0.0257568359375, "loss_xval": 0.4375, "num_input_tokens_seen": 310347836, "step": 4950 }, { "epoch": 16.47587354409318, "grad_norm": 31.668752670288086, "learning_rate": 5e-06, "loss": 0.4717, "num_input_tokens_seen": 310411456, "step": 4951 }, { "epoch": 16.47587354409318, "loss": 0.37655627727508545, "loss_ce": 3.0428500394918956e-05, "loss_iou": 0.1455078125, "loss_num": 0.01708984375, "loss_xval": 0.376953125, "num_input_tokens_seen": 310411456, "step": 4951 }, { "epoch": 16.47920133111481, "grad_norm": 16.400373458862305, "learning_rate": 5e-06, "loss": 0.3591, "num_input_tokens_seen": 310473408, "step": 4952 }, { "epoch": 16.47920133111481, "loss": 0.5170972347259521, "loss_ce": 7.378452210105024e-06, "loss_iou": 0.189453125, "loss_num": 0.0277099609375, "loss_xval": 0.515625, "num_input_tokens_seen": 310473408, "step": 4952 }, { "epoch": 16.48252911813644, "grad_norm": 12.862483024597168, "learning_rate": 5e-06, "loss": 0.4473, "num_input_tokens_seen": 310537008, "step": 4953 }, { "epoch": 16.48252911813644, "loss": 0.3717074990272522, "loss_ce": 3.3749442991393153e-06, "loss_iou": 0.1484375, "loss_num": 0.01507568359375, "loss_xval": 0.37109375, "num_input_tokens_seen": 310537008, "step": 4953 }, { "epoch": 16.48585690515807, "grad_norm": 10.427308082580566, "learning_rate": 5e-06, "loss": 0.6574, "num_input_tokens_seen": 310599220, "step": 4954 }, { "epoch": 16.48585690515807, "loss": 0.6867694854736328, "loss_ce": 1.8592161268315976e-06, "loss_iou": 0.283203125, "loss_num": 0.02392578125, "loss_xval": 0.6875, "num_input_tokens_seen": 310599220, "step": 4954 }, { "epoch": 16.489184692179702, "grad_norm": 13.14699935913086, "learning_rate": 5e-06, "loss": 0.377, "num_input_tokens_seen": 310663196, "step": 4955 }, { "epoch": 16.489184692179702, "loss": 0.43243607878685, "loss_ce": 2.000070026042522e-06, "loss_iou": 0.162109375, "loss_num": 0.0213623046875, "loss_xval": 0.431640625, "num_input_tokens_seen": 310663196, "step": 4955 }, { "epoch": 16.49251247920133, "grad_norm": 16.37906837463379, "learning_rate": 5e-06, "loss": 0.2992, "num_input_tokens_seen": 310726168, "step": 4956 }, { "epoch": 16.49251247920133, "loss": 0.365605890750885, "loss_ce": 5.3119274525670335e-06, "loss_iou": 0.1474609375, "loss_num": 0.01422119140625, "loss_xval": 0.365234375, "num_input_tokens_seen": 310726168, "step": 4956 }, { "epoch": 16.495840266222963, "grad_norm": 19.44974708557129, "learning_rate": 5e-06, "loss": 0.5106, "num_input_tokens_seen": 310788124, "step": 4957 }, { "epoch": 16.495840266222963, "loss": 0.6124309301376343, "loss_ce": 4.189013907307526e-06, "loss_iou": 0.2236328125, "loss_num": 0.033203125, "loss_xval": 0.61328125, "num_input_tokens_seen": 310788124, "step": 4957 }, { "epoch": 16.499168053244592, "grad_norm": 28.515043258666992, "learning_rate": 5e-06, "loss": 0.5064, "num_input_tokens_seen": 310850068, "step": 4958 }, { "epoch": 16.499168053244592, "loss": 0.5006445050239563, "loss_ce": 3.6366263884701766e-06, "loss_iou": 0.224609375, "loss_num": 0.01031494140625, "loss_xval": 0.5, "num_input_tokens_seen": 310850068, "step": 4958 }, { "epoch": 16.502495840266224, "grad_norm": 17.810993194580078, "learning_rate": 5e-06, "loss": 0.3578, "num_input_tokens_seen": 310912212, "step": 4959 }, { "epoch": 16.502495840266224, "loss": 0.3567514717578888, "loss_ce": 9.721508149596048e-07, "loss_iou": 0.1416015625, "loss_num": 0.01470947265625, "loss_xval": 0.357421875, "num_input_tokens_seen": 310912212, "step": 4959 }, { "epoch": 16.505823627287853, "grad_norm": 10.026216506958008, "learning_rate": 5e-06, "loss": 0.3492, "num_input_tokens_seen": 310975048, "step": 4960 }, { "epoch": 16.505823627287853, "loss": 0.3395887017250061, "loss_ce": 0.0001416804443579167, "loss_iou": 0.0947265625, "loss_num": 0.0299072265625, "loss_xval": 0.33984375, "num_input_tokens_seen": 310975048, "step": 4960 }, { "epoch": 16.509151414309486, "grad_norm": 11.031352996826172, "learning_rate": 5e-06, "loss": 0.3881, "num_input_tokens_seen": 311038176, "step": 4961 }, { "epoch": 16.509151414309486, "loss": 0.38459235429763794, "loss_ce": 0.00019295158563181758, "loss_iou": 0.1708984375, "loss_num": 0.008544921875, "loss_xval": 0.384765625, "num_input_tokens_seen": 311038176, "step": 4961 }, { "epoch": 16.512479201331114, "grad_norm": 22.805341720581055, "learning_rate": 5e-06, "loss": 0.4193, "num_input_tokens_seen": 311102152, "step": 4962 }, { "epoch": 16.512479201331114, "loss": 0.4752238392829895, "loss_ce": 4.120585799682885e-06, "loss_iou": 0.203125, "loss_num": 0.013916015625, "loss_xval": 0.474609375, "num_input_tokens_seen": 311102152, "step": 4962 }, { "epoch": 16.515806988352747, "grad_norm": 11.185400009155273, "learning_rate": 5e-06, "loss": 0.4584, "num_input_tokens_seen": 311164792, "step": 4963 }, { "epoch": 16.515806988352747, "loss": 0.34228888154029846, "loss_ce": 3.721700977621367e-06, "loss_iou": 0.138671875, "loss_num": 0.012939453125, "loss_xval": 0.341796875, "num_input_tokens_seen": 311164792, "step": 4963 }, { "epoch": 16.519134775374376, "grad_norm": 16.005067825317383, "learning_rate": 5e-06, "loss": 0.3245, "num_input_tokens_seen": 311226168, "step": 4964 }, { "epoch": 16.519134775374376, "loss": 0.20989447832107544, "loss_ce": 4.034296580357477e-05, "loss_iou": 0.06884765625, "loss_num": 0.01434326171875, "loss_xval": 0.2099609375, "num_input_tokens_seen": 311226168, "step": 4964 }, { "epoch": 16.522462562396008, "grad_norm": 17.25465965270996, "learning_rate": 5e-06, "loss": 0.5316, "num_input_tokens_seen": 311289840, "step": 4965 }, { "epoch": 16.522462562396008, "loss": 0.6938572525978088, "loss_ce": 9.602686077414546e-06, "loss_iou": 0.28125, "loss_num": 0.0262451171875, "loss_xval": 0.6953125, "num_input_tokens_seen": 311289840, "step": 4965 }, { "epoch": 16.525790349417637, "grad_norm": 25.929059982299805, "learning_rate": 5e-06, "loss": 0.5843, "num_input_tokens_seen": 311353560, "step": 4966 }, { "epoch": 16.525790349417637, "loss": 0.6132894158363342, "loss_ce": 8.154859642672818e-06, "loss_iou": 0.271484375, "loss_num": 0.01416015625, "loss_xval": 0.61328125, "num_input_tokens_seen": 311353560, "step": 4966 }, { "epoch": 16.52911813643927, "grad_norm": 20.15894889831543, "learning_rate": 5e-06, "loss": 0.3937, "num_input_tokens_seen": 311414896, "step": 4967 }, { "epoch": 16.52911813643927, "loss": 0.36024951934814453, "loss_ce": 8.104537118924782e-05, "loss_iou": 0.11962890625, "loss_num": 0.024169921875, "loss_xval": 0.359375, "num_input_tokens_seen": 311414896, "step": 4967 }, { "epoch": 16.532445923460898, "grad_norm": 8.18518352508545, "learning_rate": 5e-06, "loss": 0.4022, "num_input_tokens_seen": 311477592, "step": 4968 }, { "epoch": 16.532445923460898, "loss": 0.4112290143966675, "loss_ce": 4.659532351070084e-06, "loss_iou": 0.1640625, "loss_num": 0.0169677734375, "loss_xval": 0.412109375, "num_input_tokens_seen": 311477592, "step": 4968 }, { "epoch": 16.53577371048253, "grad_norm": 23.530302047729492, "learning_rate": 5e-06, "loss": 0.3951, "num_input_tokens_seen": 311541120, "step": 4969 }, { "epoch": 16.53577371048253, "loss": 0.36642515659332275, "loss_ce": 5.797046469524503e-07, "loss_iou": 0.1328125, "loss_num": 0.0198974609375, "loss_xval": 0.3671875, "num_input_tokens_seen": 311541120, "step": 4969 }, { "epoch": 16.53910149750416, "grad_norm": 22.3094482421875, "learning_rate": 5e-06, "loss": 0.6521, "num_input_tokens_seen": 311604532, "step": 4970 }, { "epoch": 16.53910149750416, "loss": 0.6688069105148315, "loss_ce": 4.465419624466449e-05, "loss_iou": 0.267578125, "loss_num": 0.0272216796875, "loss_xval": 0.66796875, "num_input_tokens_seen": 311604532, "step": 4970 }, { "epoch": 16.54242928452579, "grad_norm": 9.251094818115234, "learning_rate": 5e-06, "loss": 0.3893, "num_input_tokens_seen": 311665608, "step": 4971 }, { "epoch": 16.54242928452579, "loss": 0.46435678005218506, "loss_ce": 1.2745031199301593e-06, "loss_iou": 0.12353515625, "loss_num": 0.04345703125, "loss_xval": 0.46484375, "num_input_tokens_seen": 311665608, "step": 4971 }, { "epoch": 16.54575707154742, "grad_norm": 5.179887294769287, "learning_rate": 5e-06, "loss": 0.3198, "num_input_tokens_seen": 311728976, "step": 4972 }, { "epoch": 16.54575707154742, "loss": 0.30967074632644653, "loss_ce": 0.0002225120842922479, "loss_iou": 0.12451171875, "loss_num": 0.0120849609375, "loss_xval": 0.30859375, "num_input_tokens_seen": 311728976, "step": 4972 }, { "epoch": 16.549084858569053, "grad_norm": 19.619653701782227, "learning_rate": 5e-06, "loss": 0.4875, "num_input_tokens_seen": 311790184, "step": 4973 }, { "epoch": 16.549084858569053, "loss": 0.5253918766975403, "loss_ce": 1.2416394383762963e-06, "loss_iou": 0.1943359375, "loss_num": 0.027587890625, "loss_xval": 0.5234375, "num_input_tokens_seen": 311790184, "step": 4973 }, { "epoch": 16.55241264559068, "grad_norm": 39.967594146728516, "learning_rate": 5e-06, "loss": 0.4981, "num_input_tokens_seen": 311853572, "step": 4974 }, { "epoch": 16.55241264559068, "loss": 0.5467686653137207, "loss_ce": 0.0005040451651439071, "loss_iou": 0.203125, "loss_num": 0.0279541015625, "loss_xval": 0.546875, "num_input_tokens_seen": 311853572, "step": 4974 }, { "epoch": 16.555740432612314, "grad_norm": 22.71688461303711, "learning_rate": 5e-06, "loss": 0.3043, "num_input_tokens_seen": 311914164, "step": 4975 }, { "epoch": 16.555740432612314, "loss": 0.2399921715259552, "loss_ce": 1.9387309748708503e-06, "loss_iou": 0.0849609375, "loss_num": 0.0140380859375, "loss_xval": 0.240234375, "num_input_tokens_seen": 311914164, "step": 4975 }, { "epoch": 16.559068219633943, "grad_norm": 13.516968727111816, "learning_rate": 5e-06, "loss": 0.4114, "num_input_tokens_seen": 311976932, "step": 4976 }, { "epoch": 16.559068219633943, "loss": 0.40502989292144775, "loss_ce": 5.808487912872806e-07, "loss_iou": 0.1357421875, "loss_num": 0.0269775390625, "loss_xval": 0.404296875, "num_input_tokens_seen": 311976932, "step": 4976 }, { "epoch": 16.562396006655575, "grad_norm": 12.372068405151367, "learning_rate": 5e-06, "loss": 0.4939, "num_input_tokens_seen": 312039092, "step": 4977 }, { "epoch": 16.562396006655575, "loss": 0.4198647439479828, "loss_ce": 3.916086370736593e-06, "loss_iou": 0.1650390625, "loss_num": 0.017822265625, "loss_xval": 0.419921875, "num_input_tokens_seen": 312039092, "step": 4977 }, { "epoch": 16.565723793677204, "grad_norm": 10.49933910369873, "learning_rate": 5e-06, "loss": 0.3635, "num_input_tokens_seen": 312101784, "step": 4978 }, { "epoch": 16.565723793677204, "loss": 0.26342856884002686, "loss_ce": 8.364448262909718e-07, "loss_iou": 0.10498046875, "loss_num": 0.01080322265625, "loss_xval": 0.263671875, "num_input_tokens_seen": 312101784, "step": 4978 }, { "epoch": 16.569051580698837, "grad_norm": 26.182994842529297, "learning_rate": 5e-06, "loss": 0.4201, "num_input_tokens_seen": 312164544, "step": 4979 }, { "epoch": 16.569051580698837, "loss": 0.4318108558654785, "loss_ce": 1.7637699784245342e-05, "loss_iou": 0.16796875, "loss_num": 0.0194091796875, "loss_xval": 0.431640625, "num_input_tokens_seen": 312164544, "step": 4979 }, { "epoch": 16.572379367720465, "grad_norm": 29.364587783813477, "learning_rate": 5e-06, "loss": 0.4248, "num_input_tokens_seen": 312226524, "step": 4980 }, { "epoch": 16.572379367720465, "loss": 0.6415823698043823, "loss_ce": 0.0006521659670397639, "loss_iou": 0.251953125, "loss_num": 0.027587890625, "loss_xval": 0.640625, "num_input_tokens_seen": 312226524, "step": 4980 }, { "epoch": 16.575707154742098, "grad_norm": 32.679012298583984, "learning_rate": 5e-06, "loss": 0.3235, "num_input_tokens_seen": 312289272, "step": 4981 }, { "epoch": 16.575707154742098, "loss": 0.362305223941803, "loss_ce": 5.031780005992914e-07, "loss_iou": 0.130859375, "loss_num": 0.020263671875, "loss_xval": 0.36328125, "num_input_tokens_seen": 312289272, "step": 4981 }, { "epoch": 16.579034941763727, "grad_norm": 18.634206771850586, "learning_rate": 5e-06, "loss": 0.2611, "num_input_tokens_seen": 312351552, "step": 4982 }, { "epoch": 16.579034941763727, "loss": 0.35717880725860596, "loss_ce": 1.0544198403295013e-06, "loss_iou": 0.138671875, "loss_num": 0.0157470703125, "loss_xval": 0.357421875, "num_input_tokens_seen": 312351552, "step": 4982 }, { "epoch": 16.58236272878536, "grad_norm": 11.672806739807129, "learning_rate": 5e-06, "loss": 0.3157, "num_input_tokens_seen": 312413712, "step": 4983 }, { "epoch": 16.58236272878536, "loss": 0.47302526235580444, "loss_ce": 2.8200620363350026e-06, "loss_iou": 0.189453125, "loss_num": 0.0189208984375, "loss_xval": 0.47265625, "num_input_tokens_seen": 312413712, "step": 4983 }, { "epoch": 16.585690515806988, "grad_norm": 11.998357772827148, "learning_rate": 5e-06, "loss": 0.4314, "num_input_tokens_seen": 312476392, "step": 4984 }, { "epoch": 16.585690515806988, "loss": 0.4823042154312134, "loss_ce": 4.41465954281739e-06, "loss_iou": 0.1953125, "loss_num": 0.0184326171875, "loss_xval": 0.482421875, "num_input_tokens_seen": 312476392, "step": 4984 }, { "epoch": 16.58901830282862, "grad_norm": 12.036153793334961, "learning_rate": 5e-06, "loss": 0.3416, "num_input_tokens_seen": 312538436, "step": 4985 }, { "epoch": 16.58901830282862, "loss": 0.2793281078338623, "loss_ce": 7.175350447141682e-07, "loss_iou": 0.11328125, "loss_num": 0.010498046875, "loss_xval": 0.279296875, "num_input_tokens_seen": 312538436, "step": 4985 }, { "epoch": 16.59234608985025, "grad_norm": 14.686861038208008, "learning_rate": 5e-06, "loss": 0.551, "num_input_tokens_seen": 312600704, "step": 4986 }, { "epoch": 16.59234608985025, "loss": 0.48767217993736267, "loss_ce": 1.3103118590152008e-06, "loss_iou": 0.1865234375, "loss_num": 0.02294921875, "loss_xval": 0.48828125, "num_input_tokens_seen": 312600704, "step": 4986 }, { "epoch": 16.59567387687188, "grad_norm": 9.161335945129395, "learning_rate": 5e-06, "loss": 0.3805, "num_input_tokens_seen": 312664528, "step": 4987 }, { "epoch": 16.59567387687188, "loss": 0.5250262022018433, "loss_ce": 1.7934410152520286e-06, "loss_iou": 0.2158203125, "loss_num": 0.0185546875, "loss_xval": 0.5234375, "num_input_tokens_seen": 312664528, "step": 4987 }, { "epoch": 16.59900166389351, "grad_norm": 18.328439712524414, "learning_rate": 5e-06, "loss": 0.3362, "num_input_tokens_seen": 312726832, "step": 4988 }, { "epoch": 16.59900166389351, "loss": 0.40926748514175415, "loss_ce": 0.0004845533985644579, "loss_iou": 0.15234375, "loss_num": 0.020751953125, "loss_xval": 0.408203125, "num_input_tokens_seen": 312726832, "step": 4988 }, { "epoch": 16.602329450915143, "grad_norm": 24.588214874267578, "learning_rate": 5e-06, "loss": 0.4885, "num_input_tokens_seen": 312789052, "step": 4989 }, { "epoch": 16.602329450915143, "loss": 0.6253730058670044, "loss_ce": 6.853470949863549e-06, "loss_iou": 0.244140625, "loss_num": 0.0272216796875, "loss_xval": 0.625, "num_input_tokens_seen": 312789052, "step": 4989 }, { "epoch": 16.60565723793677, "grad_norm": 10.360082626342773, "learning_rate": 5e-06, "loss": 0.3182, "num_input_tokens_seen": 312850988, "step": 4990 }, { "epoch": 16.60565723793677, "loss": 0.20324815809726715, "loss_ce": 1.0983375204887125e-06, "loss_iou": 0.0693359375, "loss_num": 0.01287841796875, "loss_xval": 0.203125, "num_input_tokens_seen": 312850988, "step": 4990 }, { "epoch": 16.608985024958404, "grad_norm": 10.7132568359375, "learning_rate": 5e-06, "loss": 0.4319, "num_input_tokens_seen": 312914424, "step": 4991 }, { "epoch": 16.608985024958404, "loss": 0.6558927297592163, "loss_ce": 8.924497706175316e-06, "loss_iou": 0.259765625, "loss_num": 0.0269775390625, "loss_xval": 0.65625, "num_input_tokens_seen": 312914424, "step": 4991 }, { "epoch": 16.612312811980033, "grad_norm": 12.940841674804688, "learning_rate": 5e-06, "loss": 0.3482, "num_input_tokens_seen": 312977840, "step": 4992 }, { "epoch": 16.612312811980033, "loss": 0.23686686158180237, "loss_ce": 0.00023355678422376513, "loss_iou": 0.10205078125, "loss_num": 0.006591796875, "loss_xval": 0.236328125, "num_input_tokens_seen": 312977840, "step": 4992 }, { "epoch": 16.615640599001665, "grad_norm": 17.48478126525879, "learning_rate": 5e-06, "loss": 0.6238, "num_input_tokens_seen": 313042116, "step": 4993 }, { "epoch": 16.615640599001665, "loss": 0.4768557846546173, "loss_ce": 0.00017119261610787362, "loss_iou": 0.177734375, "loss_num": 0.024169921875, "loss_xval": 0.4765625, "num_input_tokens_seen": 313042116, "step": 4993 }, { "epoch": 16.618968386023294, "grad_norm": 15.372246742248535, "learning_rate": 5e-06, "loss": 0.3061, "num_input_tokens_seen": 313104100, "step": 4994 }, { "epoch": 16.618968386023294, "loss": 0.2280379682779312, "loss_ce": 1.0631564691720996e-05, "loss_iou": 0.05517578125, "loss_num": 0.0234375, "loss_xval": 0.228515625, "num_input_tokens_seen": 313104100, "step": 4994 }, { "epoch": 16.622296173044926, "grad_norm": 25.093416213989258, "learning_rate": 5e-06, "loss": 0.6067, "num_input_tokens_seen": 313165224, "step": 4995 }, { "epoch": 16.622296173044926, "loss": 0.8271059989929199, "loss_ce": 0.00014064906281419098, "loss_iou": 0.349609375, "loss_num": 0.026123046875, "loss_xval": 0.828125, "num_input_tokens_seen": 313165224, "step": 4995 }, { "epoch": 16.625623960066555, "grad_norm": 17.714303970336914, "learning_rate": 5e-06, "loss": 0.3122, "num_input_tokens_seen": 313227108, "step": 4996 }, { "epoch": 16.625623960066555, "loss": 0.16760316491127014, "loss_ce": 6.359328494909278e-07, "loss_iou": 0.07080078125, "loss_num": 0.005126953125, "loss_xval": 0.16796875, "num_input_tokens_seen": 313227108, "step": 4996 }, { "epoch": 16.628951747088188, "grad_norm": 9.074315071105957, "learning_rate": 5e-06, "loss": 0.5259, "num_input_tokens_seen": 313289988, "step": 4997 }, { "epoch": 16.628951747088188, "loss": 0.5195464491844177, "loss_ce": 1.5184286894509569e-05, "loss_iou": 0.2080078125, "loss_num": 0.020751953125, "loss_xval": 0.51953125, "num_input_tokens_seen": 313289988, "step": 4997 }, { "epoch": 16.632279534109816, "grad_norm": 17.16736602783203, "learning_rate": 5e-06, "loss": 0.2823, "num_input_tokens_seen": 313353620, "step": 4998 }, { "epoch": 16.632279534109816, "loss": 0.274596631526947, "loss_ce": 0.0007318888092413545, "loss_iou": 0.0927734375, "loss_num": 0.0174560546875, "loss_xval": 0.2734375, "num_input_tokens_seen": 313353620, "step": 4998 }, { "epoch": 16.63560732113145, "grad_norm": 26.9007511138916, "learning_rate": 5e-06, "loss": 0.4362, "num_input_tokens_seen": 313414900, "step": 4999 }, { "epoch": 16.63560732113145, "loss": 0.34899967908859253, "loss_ce": 6.602958819712512e-07, "loss_iou": 0.138671875, "loss_num": 0.0142822265625, "loss_xval": 0.349609375, "num_input_tokens_seen": 313414900, "step": 4999 }, { "epoch": 16.638935108153078, "grad_norm": 31.89777183532715, "learning_rate": 5e-06, "loss": 0.5472, "num_input_tokens_seen": 313478688, "step": 5000 }, { "epoch": 16.638935108153078, "eval_seeclick_CIoU": 0.02830713428556919, "eval_seeclick_GIoU": 0.020999638363718987, "eval_seeclick_IoU": 0.15893124788999557, "eval_seeclick_MAE_all": 0.17243807762861252, "eval_seeclick_MAE_h": 0.07612233608961105, "eval_seeclick_MAE_w": 0.13957122713327408, "eval_seeclick_MAE_x_boxes": 0.21974767744541168, "eval_seeclick_MAE_y_boxes": 0.18458770215511322, "eval_seeclick_NUM_probability": 0.9999722540378571, "eval_seeclick_inside_bbox": 0.16250000149011612, "eval_seeclick_loss": 3.0314717292785645, "eval_seeclick_loss_ce": 0.17047031968832016, "eval_seeclick_loss_iou": 0.99267578125, "eval_seeclick_loss_num": 0.1734161376953125, "eval_seeclick_loss_xval": 2.8515625, "eval_seeclick_runtime": 68.1619, "eval_seeclick_samples_per_second": 0.69, "eval_seeclick_steps_per_second": 0.029, "num_input_tokens_seen": 313478688, "step": 5000 }, { "epoch": 16.638935108153078, "eval_icons_CIoU": -0.05177699029445648, "eval_icons_GIoU": 0.03931999392807484, "eval_icons_IoU": 0.12919080257415771, "eval_icons_MAE_all": 0.20360295474529266, "eval_icons_MAE_h": 0.18753288686275482, "eval_icons_MAE_w": 0.2042771801352501, "eval_icons_MAE_x_boxes": 0.14949437975883484, "eval_icons_MAE_y_boxes": 0.09772763028740883, "eval_icons_NUM_probability": 0.9999867677688599, "eval_icons_inside_bbox": 0.2482638955116272, "eval_icons_loss": 2.88375186920166, "eval_icons_loss_ce": 3.189411017956445e-06, "eval_icons_loss_iou": 0.95751953125, "eval_icons_loss_num": 0.1954345703125, "eval_icons_loss_xval": 2.8916015625, "eval_icons_runtime": 68.0475, "eval_icons_samples_per_second": 0.735, "eval_icons_steps_per_second": 0.029, "num_input_tokens_seen": 313478688, "step": 5000 }, { "epoch": 16.638935108153078, "eval_screenspot_CIoU": 0.1837989166378975, "eval_screenspot_GIoU": 0.21952173113822937, "eval_screenspot_IoU": 0.29577693343162537, "eval_screenspot_MAE_all": 0.11365679403146108, "eval_screenspot_MAE_h": 0.059598115583260856, "eval_screenspot_MAE_w": 0.10226693252722423, "eval_screenspot_MAE_x_boxes": 0.15545955300331116, "eval_screenspot_MAE_y_boxes": 0.08469116936127345, "eval_screenspot_NUM_probability": 0.9999937812487284, "eval_screenspot_inside_bbox": 0.5362499952316284, "eval_screenspot_loss": 2.1683664321899414, "eval_screenspot_loss_ce": 2.5486670741277826e-06, "eval_screenspot_loss_iou": 0.7928873697916666, "eval_screenspot_loss_num": 0.12240091959635417, "eval_screenspot_loss_xval": 2.197265625, "eval_screenspot_runtime": 120.1412, "eval_screenspot_samples_per_second": 0.741, "eval_screenspot_steps_per_second": 0.025, "num_input_tokens_seen": 313478688, "step": 5000 }, { "epoch": 16.638935108153078, "eval_compot_CIoU": 0.15593845397233963, "eval_compot_GIoU": 0.20458263903856277, "eval_compot_IoU": 0.28670844435691833, "eval_compot_MAE_all": 0.13251586258411407, "eval_compot_MAE_h": 0.0566236712038517, "eval_compot_MAE_w": 0.13641006499528885, "eval_compot_MAE_x_boxes": 0.11726570874452591, "eval_compot_MAE_y_boxes": 0.11105619370937347, "eval_compot_NUM_probability": 0.9999969601631165, "eval_compot_inside_bbox": 0.4288194477558136, "eval_compot_loss": 2.2314887046813965, "eval_compot_loss_ce": 0.004827011609449983, "eval_compot_loss_iou": 0.8121337890625, "eval_compot_loss_num": 0.1391773223876953, "eval_compot_loss_xval": 2.321533203125, "eval_compot_runtime": 82.343, "eval_compot_samples_per_second": 0.607, "eval_compot_steps_per_second": 0.024, "num_input_tokens_seen": 313478688, "step": 5000 }, { "epoch": 16.638935108153078, "eval_custom_ui_MAE_all": 0.06302358210086823, "eval_custom_ui_MAE_x": 0.07297132536768913, "eval_custom_ui_MAE_y": 0.05307583510875702, "eval_custom_ui_NUM_probability": 0.9999828338623047, "eval_custom_ui_loss": 0.30746403336524963, "eval_custom_ui_loss_ce": 3.626446073212719e-06, "eval_custom_ui_loss_num": 0.06580352783203125, "eval_custom_ui_loss_xval": 0.329132080078125, "eval_custom_ui_runtime": 50.9777, "eval_custom_ui_samples_per_second": 0.981, "eval_custom_ui_steps_per_second": 0.039, "num_input_tokens_seen": 313478688, "step": 5000 }, { "epoch": 16.638935108153078, "loss": 0.33978667855262756, "loss_ce": 3.968244527641218e-06, "loss_iou": 0.0, "loss_num": 0.06787109375, "loss_xval": 0.33984375, "num_input_tokens_seen": 313478688, "step": 5000 }, { "epoch": 16.64226289517471, "grad_norm": 32.079524993896484, "learning_rate": 5e-06, "loss": 0.4953, "num_input_tokens_seen": 313542332, "step": 5001 }, { "epoch": 16.64226289517471, "loss": 0.6835360527038574, "loss_ce": 3.3299866117886268e-06, "loss_iou": 0.2177734375, "loss_num": 0.04931640625, "loss_xval": 0.68359375, "num_input_tokens_seen": 313542332, "step": 5001 }, { "epoch": 16.64559068219634, "grad_norm": 15.428890228271484, "learning_rate": 5e-06, "loss": 0.2837, "num_input_tokens_seen": 313604996, "step": 5002 }, { "epoch": 16.64559068219634, "loss": 0.17718634009361267, "loss_ce": 1.290544446419517e-06, "loss_iou": 0.0556640625, "loss_num": 0.01312255859375, "loss_xval": 0.1767578125, "num_input_tokens_seen": 313604996, "step": 5002 }, { "epoch": 16.64891846921797, "grad_norm": 14.169041633605957, "learning_rate": 5e-06, "loss": 0.379, "num_input_tokens_seen": 313668524, "step": 5003 }, { "epoch": 16.64891846921797, "loss": 0.35833999514579773, "loss_ce": 2.5971717150241602e-06, "loss_iou": 0.1650390625, "loss_num": 0.005828857421875, "loss_xval": 0.357421875, "num_input_tokens_seen": 313668524, "step": 5003 }, { "epoch": 16.6522462562396, "grad_norm": 25.88524627685547, "learning_rate": 5e-06, "loss": 0.4209, "num_input_tokens_seen": 313732100, "step": 5004 }, { "epoch": 16.6522462562396, "loss": 0.24226824939250946, "loss_ce": 4.464210178412031e-06, "loss_iou": 0.10498046875, "loss_num": 0.00653076171875, "loss_xval": 0.2421875, "num_input_tokens_seen": 313732100, "step": 5004 }, { "epoch": 16.655574043261232, "grad_norm": 32.619991302490234, "learning_rate": 5e-06, "loss": 0.4792, "num_input_tokens_seen": 313793356, "step": 5005 }, { "epoch": 16.655574043261232, "loss": 0.6654059290885925, "loss_ce": 6.540151389344828e-07, "loss_iou": 0.27734375, "loss_num": 0.022216796875, "loss_xval": 0.6640625, "num_input_tokens_seen": 313793356, "step": 5005 }, { "epoch": 16.65890183028286, "grad_norm": 13.63675594329834, "learning_rate": 5e-06, "loss": 0.393, "num_input_tokens_seen": 313856228, "step": 5006 }, { "epoch": 16.65890183028286, "loss": 0.39711901545524597, "loss_ce": 0.00020741194020956755, "loss_iou": 0.1630859375, "loss_num": 0.0140380859375, "loss_xval": 0.396484375, "num_input_tokens_seen": 313856228, "step": 5006 }, { "epoch": 16.662229617304494, "grad_norm": 24.376102447509766, "learning_rate": 5e-06, "loss": 0.2986, "num_input_tokens_seen": 313919224, "step": 5007 }, { "epoch": 16.662229617304494, "loss": 0.25225120782852173, "loss_ce": 5.3933235903969035e-05, "loss_iou": 0.087890625, "loss_num": 0.01519775390625, "loss_xval": 0.251953125, "num_input_tokens_seen": 313919224, "step": 5007 }, { "epoch": 16.665557404326123, "grad_norm": 27.04013442993164, "learning_rate": 5e-06, "loss": 0.3882, "num_input_tokens_seen": 313982440, "step": 5008 }, { "epoch": 16.665557404326123, "loss": 0.3581560254096985, "loss_ce": 1.742377207847312e-06, "loss_iou": 0.119140625, "loss_num": 0.02392578125, "loss_xval": 0.357421875, "num_input_tokens_seen": 313982440, "step": 5008 }, { "epoch": 16.668885191347755, "grad_norm": 19.8651180267334, "learning_rate": 5e-06, "loss": 0.4524, "num_input_tokens_seen": 314044952, "step": 5009 }, { "epoch": 16.668885191347755, "loss": 0.5079027414321899, "loss_ce": 6.2878912103769835e-06, "loss_iou": 0.177734375, "loss_num": 0.0308837890625, "loss_xval": 0.5078125, "num_input_tokens_seen": 314044952, "step": 5009 }, { "epoch": 16.672212978369384, "grad_norm": 12.929264068603516, "learning_rate": 5e-06, "loss": 0.3091, "num_input_tokens_seen": 314107300, "step": 5010 }, { "epoch": 16.672212978369384, "loss": 0.21107791364192963, "loss_ce": 1.8340191672905348e-05, "loss_iou": 0.0634765625, "loss_num": 0.016845703125, "loss_xval": 0.2109375, "num_input_tokens_seen": 314107300, "step": 5010 }, { "epoch": 16.675540765391016, "grad_norm": 9.746620178222656, "learning_rate": 5e-06, "loss": 0.402, "num_input_tokens_seen": 314168992, "step": 5011 }, { "epoch": 16.675540765391016, "loss": 0.47126543521881104, "loss_ce": 5.874764610780403e-05, "loss_iou": 0.19140625, "loss_num": 0.0177001953125, "loss_xval": 0.470703125, "num_input_tokens_seen": 314168992, "step": 5011 }, { "epoch": 16.678868552412645, "grad_norm": 19.025373458862305, "learning_rate": 5e-06, "loss": 0.2993, "num_input_tokens_seen": 314230752, "step": 5012 }, { "epoch": 16.678868552412645, "loss": 0.18319770693778992, "loss_ce": 6.753708703399752e-07, "loss_iou": 0.07275390625, "loss_num": 0.00750732421875, "loss_xval": 0.18359375, "num_input_tokens_seen": 314230752, "step": 5012 }, { "epoch": 16.682196339434277, "grad_norm": 26.148460388183594, "learning_rate": 5e-06, "loss": 0.3702, "num_input_tokens_seen": 314293940, "step": 5013 }, { "epoch": 16.682196339434277, "loss": 0.4550282955169678, "loss_ce": 1.1211166565772146e-05, "loss_iou": 0.1640625, "loss_num": 0.0252685546875, "loss_xval": 0.455078125, "num_input_tokens_seen": 314293940, "step": 5013 }, { "epoch": 16.685524126455906, "grad_norm": 20.81414794921875, "learning_rate": 5e-06, "loss": 0.4489, "num_input_tokens_seen": 314356624, "step": 5014 }, { "epoch": 16.685524126455906, "loss": 0.4763984978199005, "loss_ce": 1.9101906218566e-05, "loss_iou": 0.1904296875, "loss_num": 0.01904296875, "loss_xval": 0.4765625, "num_input_tokens_seen": 314356624, "step": 5014 }, { "epoch": 16.68885191347754, "grad_norm": 11.852943420410156, "learning_rate": 5e-06, "loss": 0.3227, "num_input_tokens_seen": 314417668, "step": 5015 }, { "epoch": 16.68885191347754, "loss": 0.3219063878059387, "loss_ce": 6.97731729815132e-06, "loss_iou": 0.07568359375, "loss_num": 0.0341796875, "loss_xval": 0.322265625, "num_input_tokens_seen": 314417668, "step": 5015 }, { "epoch": 16.692179700499167, "grad_norm": 14.870262145996094, "learning_rate": 5e-06, "loss": 0.3562, "num_input_tokens_seen": 314479616, "step": 5016 }, { "epoch": 16.692179700499167, "loss": 0.43139946460723877, "loss_ce": 3.0087453524174634e-06, "loss_iou": 0.1572265625, "loss_num": 0.023193359375, "loss_xval": 0.431640625, "num_input_tokens_seen": 314479616, "step": 5016 }, { "epoch": 16.6955074875208, "grad_norm": 21.225414276123047, "learning_rate": 5e-06, "loss": 0.2847, "num_input_tokens_seen": 314542436, "step": 5017 }, { "epoch": 16.6955074875208, "loss": 0.282992959022522, "loss_ce": 3.4774086543620797e-06, "loss_iou": 0.111328125, "loss_num": 0.01202392578125, "loss_xval": 0.283203125, "num_input_tokens_seen": 314542436, "step": 5017 }, { "epoch": 16.69883527454243, "grad_norm": 15.46623420715332, "learning_rate": 5e-06, "loss": 0.3794, "num_input_tokens_seen": 314607444, "step": 5018 }, { "epoch": 16.69883527454243, "loss": 0.39709967374801636, "loss_ce": 4.954154519509757e-06, "loss_iou": 0.1630859375, "loss_num": 0.0140380859375, "loss_xval": 0.396484375, "num_input_tokens_seen": 314607444, "step": 5018 }, { "epoch": 16.70216306156406, "grad_norm": 19.546884536743164, "learning_rate": 5e-06, "loss": 0.402, "num_input_tokens_seen": 314668788, "step": 5019 }, { "epoch": 16.70216306156406, "loss": 0.5482344031333923, "loss_ce": 1.664040428295266e-05, "loss_iou": 0.2216796875, "loss_num": 0.02099609375, "loss_xval": 0.546875, "num_input_tokens_seen": 314668788, "step": 5019 }, { "epoch": 16.70549084858569, "grad_norm": 36.54631423950195, "learning_rate": 5e-06, "loss": 0.4481, "num_input_tokens_seen": 314731572, "step": 5020 }, { "epoch": 16.70549084858569, "loss": 0.4740627408027649, "loss_ce": 2.671343281690497e-06, "loss_iou": 0.185546875, "loss_num": 0.0208740234375, "loss_xval": 0.474609375, "num_input_tokens_seen": 314731572, "step": 5020 }, { "epoch": 16.708818635607322, "grad_norm": 42.402774810791016, "learning_rate": 5e-06, "loss": 0.4298, "num_input_tokens_seen": 314794700, "step": 5021 }, { "epoch": 16.708818635607322, "loss": 0.41231268644332886, "loss_ce": 2.0190494979033247e-05, "loss_iou": 0.1572265625, "loss_num": 0.019775390625, "loss_xval": 0.412109375, "num_input_tokens_seen": 314794700, "step": 5021 }, { "epoch": 16.71214642262895, "grad_norm": 34.73294448852539, "learning_rate": 5e-06, "loss": 0.56, "num_input_tokens_seen": 314857000, "step": 5022 }, { "epoch": 16.71214642262895, "loss": 0.5307775139808655, "loss_ce": 5.481184643940651e-07, "loss_iou": 0.1865234375, "loss_num": 0.03173828125, "loss_xval": 0.53125, "num_input_tokens_seen": 314857000, "step": 5022 }, { "epoch": 16.715474209650584, "grad_norm": 13.745397567749023, "learning_rate": 5e-06, "loss": 0.3991, "num_input_tokens_seen": 314920876, "step": 5023 }, { "epoch": 16.715474209650584, "loss": 0.4141952693462372, "loss_ce": 1.0703301995818038e-05, "loss_iou": 0.1728515625, "loss_num": 0.0135498046875, "loss_xval": 0.4140625, "num_input_tokens_seen": 314920876, "step": 5023 }, { "epoch": 16.718801996672212, "grad_norm": 6.488992214202881, "learning_rate": 5e-06, "loss": 0.6294, "num_input_tokens_seen": 314984476, "step": 5024 }, { "epoch": 16.718801996672212, "loss": 0.7451195120811462, "loss_ce": 2.3194559162220685e-06, "loss_iou": 0.279296875, "loss_num": 0.03759765625, "loss_xval": 0.74609375, "num_input_tokens_seen": 314984476, "step": 5024 }, { "epoch": 16.722129783693845, "grad_norm": 16.58009910583496, "learning_rate": 5e-06, "loss": 0.4717, "num_input_tokens_seen": 315047032, "step": 5025 }, { "epoch": 16.722129783693845, "loss": 0.6290913224220276, "loss_ce": 2.008898718486307e-06, "loss_iou": 0.2373046875, "loss_num": 0.0308837890625, "loss_xval": 0.62890625, "num_input_tokens_seen": 315047032, "step": 5025 }, { "epoch": 16.725457570715474, "grad_norm": 22.99786376953125, "learning_rate": 5e-06, "loss": 0.319, "num_input_tokens_seen": 315110900, "step": 5026 }, { "epoch": 16.725457570715474, "loss": 0.3277013599872589, "loss_ce": 3.6041085422766628e-06, "loss_iou": 0.130859375, "loss_num": 0.01324462890625, "loss_xval": 0.328125, "num_input_tokens_seen": 315110900, "step": 5026 }, { "epoch": 16.728785357737106, "grad_norm": 20.15001678466797, "learning_rate": 5e-06, "loss": 0.4079, "num_input_tokens_seen": 315173612, "step": 5027 }, { "epoch": 16.728785357737106, "loss": 0.42270511388778687, "loss_ce": 6.13494148637983e-06, "loss_iou": 0.1591796875, "loss_num": 0.0208740234375, "loss_xval": 0.421875, "num_input_tokens_seen": 315173612, "step": 5027 }, { "epoch": 16.732113144758735, "grad_norm": 8.83884334564209, "learning_rate": 5e-06, "loss": 0.3815, "num_input_tokens_seen": 315235888, "step": 5028 }, { "epoch": 16.732113144758735, "loss": 0.2643865942955017, "loss_ce": 1.2812060958822258e-05, "loss_iou": 0.10546875, "loss_num": 0.01080322265625, "loss_xval": 0.263671875, "num_input_tokens_seen": 315235888, "step": 5028 }, { "epoch": 16.735440931780367, "grad_norm": 15.518686294555664, "learning_rate": 5e-06, "loss": 0.4655, "num_input_tokens_seen": 315298596, "step": 5029 }, { "epoch": 16.735440931780367, "loss": 0.6790398955345154, "loss_ce": 8.481062832288444e-05, "loss_iou": 0.296875, "loss_num": 0.01708984375, "loss_xval": 0.6796875, "num_input_tokens_seen": 315298596, "step": 5029 }, { "epoch": 16.738768718801996, "grad_norm": 19.313453674316406, "learning_rate": 5e-06, "loss": 0.3777, "num_input_tokens_seen": 315362028, "step": 5030 }, { "epoch": 16.738768718801996, "loss": 0.3462321162223816, "loss_ce": 0.00016277383838314563, "loss_iou": 0.1259765625, "loss_num": 0.018798828125, "loss_xval": 0.345703125, "num_input_tokens_seen": 315362028, "step": 5030 }, { "epoch": 16.74209650582363, "grad_norm": 8.317488670349121, "learning_rate": 5e-06, "loss": 0.3795, "num_input_tokens_seen": 315425416, "step": 5031 }, { "epoch": 16.74209650582363, "loss": 0.5087979435920715, "loss_ce": 8.905039067030884e-06, "loss_iou": 0.1513671875, "loss_num": 0.041259765625, "loss_xval": 0.5078125, "num_input_tokens_seen": 315425416, "step": 5031 }, { "epoch": 16.745424292845257, "grad_norm": 19.25815200805664, "learning_rate": 5e-06, "loss": 0.4133, "num_input_tokens_seen": 315487428, "step": 5032 }, { "epoch": 16.745424292845257, "loss": 0.5538036227226257, "loss_ce": 1.1377978808013722e-06, "loss_iou": 0.220703125, "loss_num": 0.0225830078125, "loss_xval": 0.5546875, "num_input_tokens_seen": 315487428, "step": 5032 }, { "epoch": 16.74875207986689, "grad_norm": 26.3471736907959, "learning_rate": 5e-06, "loss": 0.4989, "num_input_tokens_seen": 315552308, "step": 5033 }, { "epoch": 16.74875207986689, "loss": 0.5181921720504761, "loss_ce": 3.6616770557884593e-06, "loss_iou": 0.189453125, "loss_num": 0.0279541015625, "loss_xval": 0.51953125, "num_input_tokens_seen": 315552308, "step": 5033 }, { "epoch": 16.75207986688852, "grad_norm": 31.208017349243164, "learning_rate": 5e-06, "loss": 0.5288, "num_input_tokens_seen": 315615312, "step": 5034 }, { "epoch": 16.75207986688852, "loss": 0.2901027798652649, "loss_ce": 2.6794550649356097e-06, "loss_iou": 0.10986328125, "loss_num": 0.01409912109375, "loss_xval": 0.291015625, "num_input_tokens_seen": 315615312, "step": 5034 }, { "epoch": 16.75540765391015, "grad_norm": 29.548921585083008, "learning_rate": 5e-06, "loss": 0.447, "num_input_tokens_seen": 315678888, "step": 5035 }, { "epoch": 16.75540765391015, "loss": 0.5289835929870605, "loss_ce": 2.2365078621078283e-05, "loss_iou": 0.1787109375, "loss_num": 0.034423828125, "loss_xval": 0.52734375, "num_input_tokens_seen": 315678888, "step": 5035 }, { "epoch": 16.75873544093178, "grad_norm": 16.981233596801758, "learning_rate": 5e-06, "loss": 0.6378, "num_input_tokens_seen": 315742244, "step": 5036 }, { "epoch": 16.75873544093178, "loss": 0.8281264305114746, "loss_ce": 1.3974961348139914e-06, "loss_iou": 0.369140625, "loss_num": 0.0179443359375, "loss_xval": 0.828125, "num_input_tokens_seen": 315742244, "step": 5036 }, { "epoch": 16.762063227953412, "grad_norm": 13.10698127746582, "learning_rate": 5e-06, "loss": 0.4371, "num_input_tokens_seen": 315804732, "step": 5037 }, { "epoch": 16.762063227953412, "loss": 0.4552640914916992, "loss_ce": 2.9011225706199184e-06, "loss_iou": 0.1884765625, "loss_num": 0.0157470703125, "loss_xval": 0.455078125, "num_input_tokens_seen": 315804732, "step": 5037 }, { "epoch": 16.76539101497504, "grad_norm": 12.732739448547363, "learning_rate": 5e-06, "loss": 0.4615, "num_input_tokens_seen": 315868188, "step": 5038 }, { "epoch": 16.76539101497504, "loss": 0.49885129928588867, "loss_ce": 7.201985863503069e-05, "loss_iou": 0.2138671875, "loss_num": 0.013916015625, "loss_xval": 0.498046875, "num_input_tokens_seen": 315868188, "step": 5038 }, { "epoch": 16.768718801996673, "grad_norm": 10.573822021484375, "learning_rate": 5e-06, "loss": 0.4301, "num_input_tokens_seen": 315931184, "step": 5039 }, { "epoch": 16.768718801996673, "loss": 0.3702448308467865, "loss_ce": 5.562958904192783e-06, "loss_iou": 0.134765625, "loss_num": 0.020263671875, "loss_xval": 0.37109375, "num_input_tokens_seen": 315931184, "step": 5039 }, { "epoch": 16.772046589018302, "grad_norm": 14.926505088806152, "learning_rate": 5e-06, "loss": 0.3254, "num_input_tokens_seen": 315991320, "step": 5040 }, { "epoch": 16.772046589018302, "loss": 0.14404457807540894, "loss_ce": 1.6074636732810177e-06, "loss_iou": 0.03466796875, "loss_num": 0.014892578125, "loss_xval": 0.14453125, "num_input_tokens_seen": 315991320, "step": 5040 }, { "epoch": 16.775374376039935, "grad_norm": 15.543330192565918, "learning_rate": 5e-06, "loss": 0.4757, "num_input_tokens_seen": 316053616, "step": 5041 }, { "epoch": 16.775374376039935, "loss": 0.40027129650115967, "loss_ce": 2.7214709916734137e-06, "loss_iou": 0.1513671875, "loss_num": 0.019287109375, "loss_xval": 0.400390625, "num_input_tokens_seen": 316053616, "step": 5041 }, { "epoch": 16.778702163061563, "grad_norm": 12.8128080368042, "learning_rate": 5e-06, "loss": 0.4566, "num_input_tokens_seen": 316115500, "step": 5042 }, { "epoch": 16.778702163061563, "loss": 0.4747527241706848, "loss_ce": 0.00029591715428978205, "loss_iou": 0.15625, "loss_num": 0.032470703125, "loss_xval": 0.474609375, "num_input_tokens_seen": 316115500, "step": 5042 }, { "epoch": 16.782029950083196, "grad_norm": 11.391507148742676, "learning_rate": 5e-06, "loss": 0.5963, "num_input_tokens_seen": 316178820, "step": 5043 }, { "epoch": 16.782029950083196, "loss": 0.6606248021125793, "loss_ce": 3.173353888996644e-06, "loss_iou": 0.2421875, "loss_num": 0.035400390625, "loss_xval": 0.66015625, "num_input_tokens_seen": 316178820, "step": 5043 }, { "epoch": 16.785357737104825, "grad_norm": 21.360944747924805, "learning_rate": 5e-06, "loss": 0.5124, "num_input_tokens_seen": 316244164, "step": 5044 }, { "epoch": 16.785357737104825, "loss": 0.5521945357322693, "loss_ce": 9.470118129684124e-06, "loss_iou": 0.240234375, "loss_num": 0.01446533203125, "loss_xval": 0.55078125, "num_input_tokens_seen": 316244164, "step": 5044 }, { "epoch": 16.788685524126457, "grad_norm": 14.643407821655273, "learning_rate": 5e-06, "loss": 0.3519, "num_input_tokens_seen": 316307912, "step": 5045 }, { "epoch": 16.788685524126457, "loss": 0.4442158341407776, "loss_ce": 1.9497638277243823e-06, "loss_iou": 0.193359375, "loss_num": 0.011474609375, "loss_xval": 0.443359375, "num_input_tokens_seen": 316307912, "step": 5045 }, { "epoch": 16.792013311148086, "grad_norm": 12.345368385314941, "learning_rate": 5e-06, "loss": 0.433, "num_input_tokens_seen": 316370516, "step": 5046 }, { "epoch": 16.792013311148086, "loss": 0.47009506821632385, "loss_ce": 2.3221205083245877e-06, "loss_iou": 0.1962890625, "loss_num": 0.01556396484375, "loss_xval": 0.470703125, "num_input_tokens_seen": 316370516, "step": 5046 }, { "epoch": 16.795341098169718, "grad_norm": 21.087615966796875, "learning_rate": 5e-06, "loss": 0.4925, "num_input_tokens_seen": 316433988, "step": 5047 }, { "epoch": 16.795341098169718, "loss": 0.4339691698551178, "loss_ce": 9.201915418088902e-06, "loss_iou": 0.1904296875, "loss_num": 0.010498046875, "loss_xval": 0.43359375, "num_input_tokens_seen": 316433988, "step": 5047 }, { "epoch": 16.798668885191347, "grad_norm": 16.45331382751465, "learning_rate": 5e-06, "loss": 0.4955, "num_input_tokens_seen": 316496816, "step": 5048 }, { "epoch": 16.798668885191347, "loss": 0.6534876823425293, "loss_ce": 0.00016733873053453863, "loss_iou": 0.265625, "loss_num": 0.0242919921875, "loss_xval": 0.65234375, "num_input_tokens_seen": 316496816, "step": 5048 }, { "epoch": 16.80199667221298, "grad_norm": 8.829753875732422, "learning_rate": 5e-06, "loss": 0.498, "num_input_tokens_seen": 316561632, "step": 5049 }, { "epoch": 16.80199667221298, "loss": 0.6179838180541992, "loss_ce": 2.8556701181514654e-06, "loss_iou": 0.2431640625, "loss_num": 0.026611328125, "loss_xval": 0.6171875, "num_input_tokens_seen": 316561632, "step": 5049 }, { "epoch": 16.80532445923461, "grad_norm": 12.5568208694458, "learning_rate": 5e-06, "loss": 0.3082, "num_input_tokens_seen": 316624408, "step": 5050 }, { "epoch": 16.80532445923461, "loss": 0.41135743260383606, "loss_ce": 1.0983736501657404e-05, "loss_iou": 0.13671875, "loss_num": 0.0277099609375, "loss_xval": 0.412109375, "num_input_tokens_seen": 316624408, "step": 5050 }, { "epoch": 16.80865224625624, "grad_norm": 20.98607635498047, "learning_rate": 5e-06, "loss": 0.3119, "num_input_tokens_seen": 316685996, "step": 5051 }, { "epoch": 16.80865224625624, "loss": 0.265148401260376, "loss_ce": 1.1676021131279413e-05, "loss_iou": 0.08447265625, "loss_num": 0.0191650390625, "loss_xval": 0.265625, "num_input_tokens_seen": 316685996, "step": 5051 }, { "epoch": 16.81198003327787, "grad_norm": 26.60582733154297, "learning_rate": 5e-06, "loss": 0.3772, "num_input_tokens_seen": 316748636, "step": 5052 }, { "epoch": 16.81198003327787, "loss": 0.442505419254303, "loss_ce": 5.252080654827296e-07, "loss_iou": 0.185546875, "loss_num": 0.0145263671875, "loss_xval": 0.443359375, "num_input_tokens_seen": 316748636, "step": 5052 }, { "epoch": 16.815307820299502, "grad_norm": 8.968958854675293, "learning_rate": 5e-06, "loss": 0.3975, "num_input_tokens_seen": 316811220, "step": 5053 }, { "epoch": 16.815307820299502, "loss": 0.40772414207458496, "loss_ce": 9.290616617363412e-06, "loss_iou": 0.169921875, "loss_num": 0.0135498046875, "loss_xval": 0.408203125, "num_input_tokens_seen": 316811220, "step": 5053 }, { "epoch": 16.81863560732113, "grad_norm": 19.621347427368164, "learning_rate": 5e-06, "loss": 0.3774, "num_input_tokens_seen": 316874264, "step": 5054 }, { "epoch": 16.81863560732113, "loss": 0.44516462087631226, "loss_ce": 4.7092153181438334e-06, "loss_iou": 0.1689453125, "loss_num": 0.021484375, "loss_xval": 0.4453125, "num_input_tokens_seen": 316874264, "step": 5054 }, { "epoch": 16.821963394342763, "grad_norm": 11.128510475158691, "learning_rate": 5e-06, "loss": 0.4724, "num_input_tokens_seen": 316935940, "step": 5055 }, { "epoch": 16.821963394342763, "loss": 0.5814238786697388, "loss_ce": 2.9667494345630985e-06, "loss_iou": 0.2109375, "loss_num": 0.03173828125, "loss_xval": 0.58203125, "num_input_tokens_seen": 316935940, "step": 5055 }, { "epoch": 16.825291181364392, "grad_norm": 10.54517936706543, "learning_rate": 5e-06, "loss": 0.5134, "num_input_tokens_seen": 317000080, "step": 5056 }, { "epoch": 16.825291181364392, "loss": 0.5356488227844238, "loss_ce": 4.30842464993475e-06, "loss_iou": 0.2109375, "loss_num": 0.022705078125, "loss_xval": 0.53515625, "num_input_tokens_seen": 317000080, "step": 5056 }, { "epoch": 16.828618968386024, "grad_norm": 11.150111198425293, "learning_rate": 5e-06, "loss": 0.618, "num_input_tokens_seen": 317062320, "step": 5057 }, { "epoch": 16.828618968386024, "loss": 0.5900943279266357, "loss_ce": 0.00031161715742200613, "loss_iou": 0.212890625, "loss_num": 0.03271484375, "loss_xval": 0.58984375, "num_input_tokens_seen": 317062320, "step": 5057 }, { "epoch": 16.831946755407653, "grad_norm": 11.091609954833984, "learning_rate": 5e-06, "loss": 0.2379, "num_input_tokens_seen": 317123472, "step": 5058 }, { "epoch": 16.831946755407653, "loss": 0.10960519313812256, "loss_ce": 1.3028889043198433e-06, "loss_iou": 0.03857421875, "loss_num": 0.006439208984375, "loss_xval": 0.109375, "num_input_tokens_seen": 317123472, "step": 5058 }, { "epoch": 16.835274542429286, "grad_norm": 5.053739547729492, "learning_rate": 5e-06, "loss": 0.2027, "num_input_tokens_seen": 317185864, "step": 5059 }, { "epoch": 16.835274542429286, "loss": 0.21734336018562317, "loss_ce": 2.7680238417815417e-05, "loss_iou": 0.078125, "loss_num": 0.01220703125, "loss_xval": 0.2177734375, "num_input_tokens_seen": 317185864, "step": 5059 }, { "epoch": 16.838602329450914, "grad_norm": 7.757769584655762, "learning_rate": 5e-06, "loss": 0.4576, "num_input_tokens_seen": 317249196, "step": 5060 }, { "epoch": 16.838602329450914, "loss": 0.6498568654060364, "loss_ce": 7.662778079975396e-05, "loss_iou": 0.26171875, "loss_num": 0.025390625, "loss_xval": 0.6484375, "num_input_tokens_seen": 317249196, "step": 5060 }, { "epoch": 16.841930116472547, "grad_norm": 9.910187721252441, "learning_rate": 5e-06, "loss": 0.3959, "num_input_tokens_seen": 317312380, "step": 5061 }, { "epoch": 16.841930116472547, "loss": 0.5866134166717529, "loss_ce": 4.483577868086286e-06, "loss_iou": 0.2373046875, "loss_num": 0.022216796875, "loss_xval": 0.5859375, "num_input_tokens_seen": 317312380, "step": 5061 }, { "epoch": 16.845257903494176, "grad_norm": 13.377242088317871, "learning_rate": 5e-06, "loss": 0.3253, "num_input_tokens_seen": 317375708, "step": 5062 }, { "epoch": 16.845257903494176, "loss": 0.3022777736186981, "loss_ce": 1.157368842541473e-06, "loss_iou": 0.115234375, "loss_num": 0.0142822265625, "loss_xval": 0.302734375, "num_input_tokens_seen": 317375708, "step": 5062 }, { "epoch": 16.848585690515808, "grad_norm": 6.63076114654541, "learning_rate": 5e-06, "loss": 0.2943, "num_input_tokens_seen": 317438224, "step": 5063 }, { "epoch": 16.848585690515808, "loss": 0.33844149112701416, "loss_ce": 1.5286268535419367e-06, "loss_iou": 0.11279296875, "loss_num": 0.0225830078125, "loss_xval": 0.337890625, "num_input_tokens_seen": 317438224, "step": 5063 }, { "epoch": 16.851913477537437, "grad_norm": 7.847648620605469, "learning_rate": 5e-06, "loss": 0.2779, "num_input_tokens_seen": 317500724, "step": 5064 }, { "epoch": 16.851913477537437, "loss": 0.18200847506523132, "loss_ce": 1.6388397625632933e-06, "loss_iou": 0.07373046875, "loss_num": 0.0069580078125, "loss_xval": 0.181640625, "num_input_tokens_seen": 317500724, "step": 5064 }, { "epoch": 16.85524126455907, "grad_norm": 12.910172462463379, "learning_rate": 5e-06, "loss": 0.4466, "num_input_tokens_seen": 317563448, "step": 5065 }, { "epoch": 16.85524126455907, "loss": 0.4621002674102783, "loss_ce": 6.415096868295223e-05, "loss_iou": 0.1943359375, "loss_num": 0.01483154296875, "loss_xval": 0.462890625, "num_input_tokens_seen": 317563448, "step": 5065 }, { "epoch": 16.858569051580698, "grad_norm": 9.921592712402344, "learning_rate": 5e-06, "loss": 0.5281, "num_input_tokens_seen": 317625304, "step": 5066 }, { "epoch": 16.858569051580698, "loss": 0.4685381054878235, "loss_ce": 1.7151654674307792e-06, "loss_iou": 0.1708984375, "loss_num": 0.025390625, "loss_xval": 0.46875, "num_input_tokens_seen": 317625304, "step": 5066 }, { "epoch": 16.86189683860233, "grad_norm": 26.979633331298828, "learning_rate": 5e-06, "loss": 0.5336, "num_input_tokens_seen": 317686596, "step": 5067 }, { "epoch": 16.86189683860233, "loss": 0.7336688041687012, "loss_ce": 2.6260469894623384e-05, "loss_iou": 0.267578125, "loss_num": 0.040283203125, "loss_xval": 0.734375, "num_input_tokens_seen": 317686596, "step": 5067 }, { "epoch": 16.86522462562396, "grad_norm": 29.847963333129883, "learning_rate": 5e-06, "loss": 0.5556, "num_input_tokens_seen": 317748684, "step": 5068 }, { "epoch": 16.86522462562396, "loss": 0.6225067973136902, "loss_ce": 9.236873665940948e-06, "loss_iou": 0.2314453125, "loss_num": 0.031982421875, "loss_xval": 0.62109375, "num_input_tokens_seen": 317748684, "step": 5068 }, { "epoch": 16.86855241264559, "grad_norm": 14.324700355529785, "learning_rate": 5e-06, "loss": 0.3991, "num_input_tokens_seen": 317810232, "step": 5069 }, { "epoch": 16.86855241264559, "loss": 0.4136817455291748, "loss_ce": 0.001328252605162561, "loss_iou": 0.138671875, "loss_num": 0.027099609375, "loss_xval": 0.412109375, "num_input_tokens_seen": 317810232, "step": 5069 }, { "epoch": 16.87188019966722, "grad_norm": 24.095375061035156, "learning_rate": 5e-06, "loss": 0.346, "num_input_tokens_seen": 317872996, "step": 5070 }, { "epoch": 16.87188019966722, "loss": 0.37754350900650024, "loss_ce": 0.00010208695312030613, "loss_iou": 0.1572265625, "loss_num": 0.0126953125, "loss_xval": 0.376953125, "num_input_tokens_seen": 317872996, "step": 5070 }, { "epoch": 16.875207986688853, "grad_norm": 32.71965026855469, "learning_rate": 5e-06, "loss": 0.6028, "num_input_tokens_seen": 317936612, "step": 5071 }, { "epoch": 16.875207986688853, "loss": 0.5731841325759888, "loss_ce": 3.0118449103611056e-06, "loss_iou": 0.24609375, "loss_num": 0.01611328125, "loss_xval": 0.57421875, "num_input_tokens_seen": 317936612, "step": 5071 }, { "epoch": 16.87853577371048, "grad_norm": 31.1330623626709, "learning_rate": 5e-06, "loss": 0.4659, "num_input_tokens_seen": 318000580, "step": 5072 }, { "epoch": 16.87853577371048, "loss": 0.45862269401550293, "loss_ce": 4.538579560176004e-06, "loss_iou": 0.1875, "loss_num": 0.0166015625, "loss_xval": 0.458984375, "num_input_tokens_seen": 318000580, "step": 5072 }, { "epoch": 16.881863560732114, "grad_norm": 26.794038772583008, "learning_rate": 5e-06, "loss": 0.435, "num_input_tokens_seen": 318064096, "step": 5073 }, { "epoch": 16.881863560732114, "loss": 0.4962232708930969, "loss_ce": 7.439582077495288e-06, "loss_iou": 0.205078125, "loss_num": 0.0174560546875, "loss_xval": 0.49609375, "num_input_tokens_seen": 318064096, "step": 5073 }, { "epoch": 16.885191347753743, "grad_norm": 21.084381103515625, "learning_rate": 5e-06, "loss": 0.5437, "num_input_tokens_seen": 318126492, "step": 5074 }, { "epoch": 16.885191347753743, "loss": 0.694566547870636, "loss_ce": 0.00012380893167573959, "loss_iou": 0.28125, "loss_num": 0.0264892578125, "loss_xval": 0.6953125, "num_input_tokens_seen": 318126492, "step": 5074 }, { "epoch": 16.888519134775375, "grad_norm": 15.39794635772705, "learning_rate": 5e-06, "loss": 0.4822, "num_input_tokens_seen": 318189092, "step": 5075 }, { "epoch": 16.888519134775375, "loss": 0.33399438858032227, "loss_ce": 0.00010156478674616665, "loss_iou": 0.1181640625, "loss_num": 0.0194091796875, "loss_xval": 0.333984375, "num_input_tokens_seen": 318189092, "step": 5075 }, { "epoch": 16.891846921797004, "grad_norm": 13.833385467529297, "learning_rate": 5e-06, "loss": 0.4825, "num_input_tokens_seen": 318252464, "step": 5076 }, { "epoch": 16.891846921797004, "loss": 0.5982081890106201, "loss_ce": 2.5954714146791957e-06, "loss_iou": 0.2392578125, "loss_num": 0.02392578125, "loss_xval": 0.59765625, "num_input_tokens_seen": 318252464, "step": 5076 }, { "epoch": 16.895174708818637, "grad_norm": 23.187480926513672, "learning_rate": 5e-06, "loss": 0.5513, "num_input_tokens_seen": 318317068, "step": 5077 }, { "epoch": 16.895174708818637, "loss": 0.45669615268707275, "loss_ce": 6.142371944406477e-07, "loss_iou": 0.1826171875, "loss_num": 0.018310546875, "loss_xval": 0.45703125, "num_input_tokens_seen": 318317068, "step": 5077 }, { "epoch": 16.898502495840265, "grad_norm": 29.199356079101562, "learning_rate": 5e-06, "loss": 0.3489, "num_input_tokens_seen": 318379372, "step": 5078 }, { "epoch": 16.898502495840265, "loss": 0.2814384698867798, "loss_ce": 5.353930646379013e-06, "loss_iou": 0.11767578125, "loss_num": 0.00921630859375, "loss_xval": 0.28125, "num_input_tokens_seen": 318379372, "step": 5078 }, { "epoch": 16.901830282861898, "grad_norm": 28.987497329711914, "learning_rate": 5e-06, "loss": 0.5656, "num_input_tokens_seen": 318442012, "step": 5079 }, { "epoch": 16.901830282861898, "loss": 0.6140251755714417, "loss_ce": 1.1484194146760274e-05, "loss_iou": 0.2490234375, "loss_num": 0.0233154296875, "loss_xval": 0.61328125, "num_input_tokens_seen": 318442012, "step": 5079 }, { "epoch": 16.905158069883527, "grad_norm": 38.05876922607422, "learning_rate": 5e-06, "loss": 0.6211, "num_input_tokens_seen": 318506080, "step": 5080 }, { "epoch": 16.905158069883527, "loss": 0.6705164909362793, "loss_ce": 0.0002283572976011783, "loss_iou": 0.244140625, "loss_num": 0.036376953125, "loss_xval": 0.671875, "num_input_tokens_seen": 318506080, "step": 5080 }, { "epoch": 16.90848585690516, "grad_norm": 29.811935424804688, "learning_rate": 5e-06, "loss": 0.5178, "num_input_tokens_seen": 318569400, "step": 5081 }, { "epoch": 16.90848585690516, "loss": 0.3827049732208252, "loss_ce": 1.4559400369762443e-05, "loss_iou": 0.1650390625, "loss_num": 0.0106201171875, "loss_xval": 0.3828125, "num_input_tokens_seen": 318569400, "step": 5081 }, { "epoch": 16.911813643926788, "grad_norm": 17.066083908081055, "learning_rate": 5e-06, "loss": 0.4938, "num_input_tokens_seen": 318630932, "step": 5082 }, { "epoch": 16.911813643926788, "loss": 0.6418784856796265, "loss_ce": 2.270670847792644e-06, "loss_iou": 0.240234375, "loss_num": 0.0322265625, "loss_xval": 0.640625, "num_input_tokens_seen": 318630932, "step": 5082 }, { "epoch": 16.91514143094842, "grad_norm": 16.98098373413086, "learning_rate": 5e-06, "loss": 0.6125, "num_input_tokens_seen": 318694180, "step": 5083 }, { "epoch": 16.91514143094842, "loss": 0.5544571876525879, "loss_ce": 1.3885305634175893e-05, "loss_iou": 0.2119140625, "loss_num": 0.0263671875, "loss_xval": 0.5546875, "num_input_tokens_seen": 318694180, "step": 5083 }, { "epoch": 16.91846921797005, "grad_norm": 13.040465354919434, "learning_rate": 5e-06, "loss": 0.4311, "num_input_tokens_seen": 318757192, "step": 5084 }, { "epoch": 16.91846921797005, "loss": 0.21241861581802368, "loss_ce": 8.644400622870307e-06, "loss_iou": 0.06982421875, "loss_num": 0.01458740234375, "loss_xval": 0.212890625, "num_input_tokens_seen": 318757192, "step": 5084 }, { "epoch": 16.92179700499168, "grad_norm": 17.672988891601562, "learning_rate": 5e-06, "loss": 0.4283, "num_input_tokens_seen": 318821492, "step": 5085 }, { "epoch": 16.92179700499168, "loss": 0.31433629989624023, "loss_ce": 0.0006155857117846608, "loss_iou": 0.130859375, "loss_num": 0.010498046875, "loss_xval": 0.314453125, "num_input_tokens_seen": 318821492, "step": 5085 }, { "epoch": 16.92512479201331, "grad_norm": 16.022737503051758, "learning_rate": 5e-06, "loss": 0.5712, "num_input_tokens_seen": 318883860, "step": 5086 }, { "epoch": 16.92512479201331, "loss": 0.6485652327537537, "loss_ce": 5.6488224799977615e-06, "loss_iou": 0.26953125, "loss_num": 0.02197265625, "loss_xval": 0.6484375, "num_input_tokens_seen": 318883860, "step": 5086 }, { "epoch": 16.928452579034943, "grad_norm": 8.996587753295898, "learning_rate": 5e-06, "loss": 0.3733, "num_input_tokens_seen": 318947756, "step": 5087 }, { "epoch": 16.928452579034943, "loss": 0.44994843006134033, "loss_ce": 0.0007907192339189351, "loss_iou": 0.16015625, "loss_num": 0.025634765625, "loss_xval": 0.44921875, "num_input_tokens_seen": 318947756, "step": 5087 }, { "epoch": 16.93178036605657, "grad_norm": 11.822614669799805, "learning_rate": 5e-06, "loss": 0.4483, "num_input_tokens_seen": 319011416, "step": 5088 }, { "epoch": 16.93178036605657, "loss": 0.4624040722846985, "loss_ce": 1.7164836663141614e-06, "loss_iou": 0.2109375, "loss_num": 0.0081787109375, "loss_xval": 0.462890625, "num_input_tokens_seen": 319011416, "step": 5088 }, { "epoch": 16.935108153078204, "grad_norm": 19.263988494873047, "learning_rate": 5e-06, "loss": 0.4245, "num_input_tokens_seen": 319075388, "step": 5089 }, { "epoch": 16.935108153078204, "loss": 0.4455789625644684, "loss_ce": 0.00026647336198948324, "loss_iou": 0.1865234375, "loss_num": 0.01458740234375, "loss_xval": 0.4453125, "num_input_tokens_seen": 319075388, "step": 5089 }, { "epoch": 16.938435940099833, "grad_norm": 13.498922348022461, "learning_rate": 5e-06, "loss": 0.4971, "num_input_tokens_seen": 319138144, "step": 5090 }, { "epoch": 16.938435940099833, "loss": 0.6989818811416626, "loss_ce": 7.215990081022028e-06, "loss_iou": 0.296875, "loss_num": 0.0213623046875, "loss_xval": 0.69921875, "num_input_tokens_seen": 319138144, "step": 5090 }, { "epoch": 16.941763727121465, "grad_norm": 24.668867111206055, "learning_rate": 5e-06, "loss": 0.4266, "num_input_tokens_seen": 319201316, "step": 5091 }, { "epoch": 16.941763727121465, "loss": 0.4971323013305664, "loss_ce": 9.452905374018883e-07, "loss_iou": 0.1923828125, "loss_num": 0.0224609375, "loss_xval": 0.498046875, "num_input_tokens_seen": 319201316, "step": 5091 }, { "epoch": 16.945091514143094, "grad_norm": 16.501380920410156, "learning_rate": 5e-06, "loss": 0.6192, "num_input_tokens_seen": 319265092, "step": 5092 }, { "epoch": 16.945091514143094, "loss": 0.7240622639656067, "loss_ce": 2.189935457863612e-06, "loss_iou": 0.251953125, "loss_num": 0.043701171875, "loss_xval": 0.72265625, "num_input_tokens_seen": 319265092, "step": 5092 }, { "epoch": 16.948419301164726, "grad_norm": 12.060791969299316, "learning_rate": 5e-06, "loss": 0.5237, "num_input_tokens_seen": 319328632, "step": 5093 }, { "epoch": 16.948419301164726, "loss": 0.547258198261261, "loss_ce": 4.7492321755271405e-05, "loss_iou": 0.2216796875, "loss_num": 0.020751953125, "loss_xval": 0.546875, "num_input_tokens_seen": 319328632, "step": 5093 }, { "epoch": 16.951747088186355, "grad_norm": 11.193984031677246, "learning_rate": 5e-06, "loss": 0.5244, "num_input_tokens_seen": 319391844, "step": 5094 }, { "epoch": 16.951747088186355, "loss": 0.4047863781452179, "loss_ce": 1.2198825061204843e-06, "loss_iou": 0.166015625, "loss_num": 0.01446533203125, "loss_xval": 0.404296875, "num_input_tokens_seen": 319391844, "step": 5094 }, { "epoch": 16.955074875207988, "grad_norm": 14.553125381469727, "learning_rate": 5e-06, "loss": 0.4209, "num_input_tokens_seen": 319453960, "step": 5095 }, { "epoch": 16.955074875207988, "loss": 0.340437114238739, "loss_ce": 0.00010508012201171368, "loss_iou": 0.1279296875, "loss_num": 0.016845703125, "loss_xval": 0.33984375, "num_input_tokens_seen": 319453960, "step": 5095 }, { "epoch": 16.958402662229616, "grad_norm": 15.725777626037598, "learning_rate": 5e-06, "loss": 0.41, "num_input_tokens_seen": 319517508, "step": 5096 }, { "epoch": 16.958402662229616, "loss": 0.42077821493148804, "loss_ce": 1.8626632254381548e-06, "loss_iou": 0.1640625, "loss_num": 0.0184326171875, "loss_xval": 0.419921875, "num_input_tokens_seen": 319517508, "step": 5096 }, { "epoch": 16.96173044925125, "grad_norm": 9.664607048034668, "learning_rate": 5e-06, "loss": 0.3848, "num_input_tokens_seen": 319580704, "step": 5097 }, { "epoch": 16.96173044925125, "loss": 0.5629299879074097, "loss_ce": 2.7663857053994434e-06, "loss_iou": 0.20703125, "loss_num": 0.030029296875, "loss_xval": 0.5625, "num_input_tokens_seen": 319580704, "step": 5097 }, { "epoch": 16.965058236272878, "grad_norm": 19.464641571044922, "learning_rate": 5e-06, "loss": 0.3979, "num_input_tokens_seen": 319643000, "step": 5098 }, { "epoch": 16.965058236272878, "loss": 0.31817716360092163, "loss_ce": 8.960626018961193e-07, "loss_iou": 0.10888671875, "loss_num": 0.0201416015625, "loss_xval": 0.318359375, "num_input_tokens_seen": 319643000, "step": 5098 }, { "epoch": 16.96838602329451, "grad_norm": 5.090531349182129, "learning_rate": 5e-06, "loss": 0.3519, "num_input_tokens_seen": 319704036, "step": 5099 }, { "epoch": 16.96838602329451, "loss": 0.346929132938385, "loss_ce": 5.293208232615143e-06, "loss_iou": 0.11962890625, "loss_num": 0.0216064453125, "loss_xval": 0.34765625, "num_input_tokens_seen": 319704036, "step": 5099 }, { "epoch": 16.97171381031614, "grad_norm": 16.754226684570312, "learning_rate": 5e-06, "loss": 0.4059, "num_input_tokens_seen": 319767124, "step": 5100 }, { "epoch": 16.97171381031614, "loss": 0.42603200674057007, "loss_ce": 6.6378879637341015e-06, "loss_iou": 0.1982421875, "loss_num": 0.005859375, "loss_xval": 0.42578125, "num_input_tokens_seen": 319767124, "step": 5100 }, { "epoch": 16.97504159733777, "grad_norm": 22.907211303710938, "learning_rate": 5e-06, "loss": 0.2939, "num_input_tokens_seen": 319829088, "step": 5101 }, { "epoch": 16.97504159733777, "loss": 0.302868515253067, "loss_ce": 0.00010362408647779375, "loss_iou": 0.1181640625, "loss_num": 0.01318359375, "loss_xval": 0.302734375, "num_input_tokens_seen": 319829088, "step": 5101 }, { "epoch": 16.9783693843594, "grad_norm": 29.858924865722656, "learning_rate": 5e-06, "loss": 0.3786, "num_input_tokens_seen": 319891908, "step": 5102 }, { "epoch": 16.9783693843594, "loss": 0.2766736149787903, "loss_ce": 1.2508776308095548e-06, "loss_iou": 0.12158203125, "loss_num": 0.006591796875, "loss_xval": 0.27734375, "num_input_tokens_seen": 319891908, "step": 5102 }, { "epoch": 16.981697171381033, "grad_norm": 31.153779983520508, "learning_rate": 5e-06, "loss": 0.4604, "num_input_tokens_seen": 319955212, "step": 5103 }, { "epoch": 16.981697171381033, "loss": 0.4822595715522766, "loss_ce": 8.182087913155556e-05, "loss_iou": 0.2109375, "loss_num": 0.01214599609375, "loss_xval": 0.482421875, "num_input_tokens_seen": 319955212, "step": 5103 }, { "epoch": 16.98502495840266, "grad_norm": 14.015069961547852, "learning_rate": 5e-06, "loss": 0.2882, "num_input_tokens_seen": 320017052, "step": 5104 }, { "epoch": 16.98502495840266, "loss": 0.2390035092830658, "loss_ce": 5.08812336192932e-05, "loss_iou": 0.0849609375, "loss_num": 0.013916015625, "loss_xval": 0.2392578125, "num_input_tokens_seen": 320017052, "step": 5104 }, { "epoch": 16.988352745424294, "grad_norm": 5.590756893157959, "learning_rate": 5e-06, "loss": 0.3739, "num_input_tokens_seen": 320079416, "step": 5105 }, { "epoch": 16.988352745424294, "loss": 0.43383854627609253, "loss_ce": 6.785451773794193e-07, "loss_iou": 0.1904296875, "loss_num": 0.0103759765625, "loss_xval": 0.43359375, "num_input_tokens_seen": 320079416, "step": 5105 }, { "epoch": 16.991680532445923, "grad_norm": 13.280501365661621, "learning_rate": 5e-06, "loss": 0.6556, "num_input_tokens_seen": 320141948, "step": 5106 }, { "epoch": 16.991680532445923, "loss": 0.36006417870521545, "loss_ce": 2.501781636965461e-06, "loss_iou": 0.1328125, "loss_num": 0.0189208984375, "loss_xval": 0.359375, "num_input_tokens_seen": 320141948, "step": 5106 }, { "epoch": 16.995008319467555, "grad_norm": 12.277535438537598, "learning_rate": 5e-06, "loss": 0.3127, "num_input_tokens_seen": 320202700, "step": 5107 }, { "epoch": 16.995008319467555, "loss": 0.31158632040023804, "loss_ce": 1.854608626672416e-06, "loss_iou": 0.107421875, "loss_num": 0.019287109375, "loss_xval": 0.3125, "num_input_tokens_seen": 320202700, "step": 5107 }, { "epoch": 16.998336106489184, "grad_norm": 12.22753620147705, "learning_rate": 5e-06, "loss": 0.4595, "num_input_tokens_seen": 320264808, "step": 5108 }, { "epoch": 16.998336106489184, "loss": 0.6317216157913208, "loss_ce": 6.881642912048846e-05, "loss_iou": 0.2314453125, "loss_num": 0.03369140625, "loss_xval": 0.6328125, "num_input_tokens_seen": 320264808, "step": 5108 }, { "epoch": 16.998336106489184, "loss": 0.7482919692993164, "loss_ce": 9.341197255707812e-07, "loss_iou": 0.32421875, "loss_num": 0.0198974609375, "loss_xval": 0.75, "num_input_tokens_seen": 320297292, "step": 5108 }, { "epoch": 17.001663893510816, "grad_norm": 35.28773880004883, "learning_rate": 5e-06, "loss": 0.6195, "num_input_tokens_seen": 320328828, "step": 5109 }, { "epoch": 17.001663893510816, "loss": 0.49076032638549805, "loss_ce": 3.76667121599894e-05, "loss_iou": 0.2119140625, "loss_num": 0.013427734375, "loss_xval": 0.490234375, "num_input_tokens_seen": 320328828, "step": 5109 }, { "epoch": 17.004991680532445, "grad_norm": 30.385940551757812, "learning_rate": 5e-06, "loss": 0.424, "num_input_tokens_seen": 320392572, "step": 5110 }, { "epoch": 17.004991680532445, "loss": 0.47406095266342163, "loss_ce": 8.731013281249034e-07, "loss_iou": 0.189453125, "loss_num": 0.019287109375, "loss_xval": 0.474609375, "num_input_tokens_seen": 320392572, "step": 5110 }, { "epoch": 17.008319467554077, "grad_norm": 14.180205345153809, "learning_rate": 5e-06, "loss": 0.4067, "num_input_tokens_seen": 320456224, "step": 5111 }, { "epoch": 17.008319467554077, "loss": 0.3323986828327179, "loss_ce": 1.2300909020268591e-06, "loss_iou": 0.12890625, "loss_num": 0.01495361328125, "loss_xval": 0.33203125, "num_input_tokens_seen": 320456224, "step": 5111 }, { "epoch": 17.011647254575706, "grad_norm": 8.385810852050781, "learning_rate": 5e-06, "loss": 0.4002, "num_input_tokens_seen": 320518656, "step": 5112 }, { "epoch": 17.011647254575706, "loss": 0.39331644773483276, "loss_ce": 5.943959877185989e-06, "loss_iou": 0.1611328125, "loss_num": 0.0142822265625, "loss_xval": 0.392578125, "num_input_tokens_seen": 320518656, "step": 5112 }, { "epoch": 17.01497504159734, "grad_norm": 8.225004196166992, "learning_rate": 5e-06, "loss": 0.3598, "num_input_tokens_seen": 320581084, "step": 5113 }, { "epoch": 17.01497504159734, "loss": 0.4478176534175873, "loss_ce": 2.6892680580203887e-06, "loss_iou": 0.1689453125, "loss_num": 0.02197265625, "loss_xval": 0.447265625, "num_input_tokens_seen": 320581084, "step": 5113 }, { "epoch": 17.018302828618967, "grad_norm": 13.003045082092285, "learning_rate": 5e-06, "loss": 0.5298, "num_input_tokens_seen": 320643524, "step": 5114 }, { "epoch": 17.018302828618967, "loss": 0.6001008749008179, "loss_ce": 3.1982326618162915e-06, "loss_iou": 0.2109375, "loss_num": 0.03564453125, "loss_xval": 0.6015625, "num_input_tokens_seen": 320643524, "step": 5114 }, { "epoch": 17.0216306156406, "grad_norm": 11.840007781982422, "learning_rate": 5e-06, "loss": 0.2684, "num_input_tokens_seen": 320704792, "step": 5115 }, { "epoch": 17.0216306156406, "loss": 0.3780221939086914, "loss_ce": 9.676632544142194e-07, "loss_iou": 0.1259765625, "loss_num": 0.0250244140625, "loss_xval": 0.37890625, "num_input_tokens_seen": 320704792, "step": 5115 }, { "epoch": 17.02495840266223, "grad_norm": 13.155138969421387, "learning_rate": 5e-06, "loss": 0.3705, "num_input_tokens_seen": 320767932, "step": 5116 }, { "epoch": 17.02495840266223, "loss": 0.38569802045822144, "loss_ce": 1.6313421156155528e-06, "loss_iou": 0.134765625, "loss_num": 0.0233154296875, "loss_xval": 0.384765625, "num_input_tokens_seen": 320767932, "step": 5116 }, { "epoch": 17.02828618968386, "grad_norm": 22.98318862915039, "learning_rate": 5e-06, "loss": 0.382, "num_input_tokens_seen": 320831056, "step": 5117 }, { "epoch": 17.02828618968386, "loss": 0.3217865228652954, "loss_ce": 1.5405510112032061e-06, "loss_iou": 0.0849609375, "loss_num": 0.0302734375, "loss_xval": 0.322265625, "num_input_tokens_seen": 320831056, "step": 5117 }, { "epoch": 17.03161397670549, "grad_norm": 31.015750885009766, "learning_rate": 5e-06, "loss": 0.6176, "num_input_tokens_seen": 320894616, "step": 5118 }, { "epoch": 17.03161397670549, "loss": 0.5064710974693298, "loss_ce": 1.3945900718681514e-06, "loss_iou": 0.2119140625, "loss_num": 0.0166015625, "loss_xval": 0.5078125, "num_input_tokens_seen": 320894616, "step": 5118 }, { "epoch": 17.034941763727122, "grad_norm": 7.190418243408203, "learning_rate": 5e-06, "loss": 0.5372, "num_input_tokens_seen": 320957552, "step": 5119 }, { "epoch": 17.034941763727122, "loss": 0.6015630960464478, "loss_ce": 6.058559165467159e-07, "loss_iou": 0.23828125, "loss_num": 0.0250244140625, "loss_xval": 0.6015625, "num_input_tokens_seen": 320957552, "step": 5119 }, { "epoch": 17.03826955074875, "grad_norm": 28.742368698120117, "learning_rate": 5e-06, "loss": 0.4823, "num_input_tokens_seen": 321019264, "step": 5120 }, { "epoch": 17.03826955074875, "loss": 0.5066538453102112, "loss_ce": 1.0329912356610294e-06, "loss_iou": 0.19921875, "loss_num": 0.0216064453125, "loss_xval": 0.5078125, "num_input_tokens_seen": 321019264, "step": 5120 }, { "epoch": 17.041597337770384, "grad_norm": 23.984315872192383, "learning_rate": 5e-06, "loss": 0.4554, "num_input_tokens_seen": 321082248, "step": 5121 }, { "epoch": 17.041597337770384, "loss": 0.4375009536743164, "loss_ce": 9.55037876337883e-07, "loss_iou": 0.18359375, "loss_num": 0.01409912109375, "loss_xval": 0.4375, "num_input_tokens_seen": 321082248, "step": 5121 }, { "epoch": 17.044925124792012, "grad_norm": 22.162673950195312, "learning_rate": 5e-06, "loss": 0.5406, "num_input_tokens_seen": 321145432, "step": 5122 }, { "epoch": 17.044925124792012, "loss": 0.5961493253707886, "loss_ce": 1.8904862372437492e-05, "loss_iou": 0.2470703125, "loss_num": 0.0205078125, "loss_xval": 0.59765625, "num_input_tokens_seen": 321145432, "step": 5122 }, { "epoch": 17.048252911813645, "grad_norm": 13.66738224029541, "learning_rate": 5e-06, "loss": 0.4256, "num_input_tokens_seen": 321207328, "step": 5123 }, { "epoch": 17.048252911813645, "loss": 0.32758665084838867, "loss_ce": 1.0987179848598316e-05, "loss_iou": 0.1181640625, "loss_num": 0.0181884765625, "loss_xval": 0.328125, "num_input_tokens_seen": 321207328, "step": 5123 }, { "epoch": 17.051580698835274, "grad_norm": 13.080214500427246, "learning_rate": 5e-06, "loss": 0.354, "num_input_tokens_seen": 321270272, "step": 5124 }, { "epoch": 17.051580698835274, "loss": 0.5221828818321228, "loss_ce": 0.0006985021173022687, "loss_iou": 0.22265625, "loss_num": 0.01513671875, "loss_xval": 0.5234375, "num_input_tokens_seen": 321270272, "step": 5124 }, { "epoch": 17.054908485856906, "grad_norm": 9.809718132019043, "learning_rate": 5e-06, "loss": 0.4173, "num_input_tokens_seen": 321333624, "step": 5125 }, { "epoch": 17.054908485856906, "loss": 0.25287026166915894, "loss_ce": 1.6082245792858885e-06, "loss_iou": 0.10791015625, "loss_num": 0.0074462890625, "loss_xval": 0.251953125, "num_input_tokens_seen": 321333624, "step": 5125 }, { "epoch": 17.058236272878535, "grad_norm": 10.323770523071289, "learning_rate": 5e-06, "loss": 0.3449, "num_input_tokens_seen": 321396476, "step": 5126 }, { "epoch": 17.058236272878535, "loss": 0.25017058849334717, "loss_ce": 2.744267476373352e-06, "loss_iou": 0.10400390625, "loss_num": 0.00848388671875, "loss_xval": 0.25, "num_input_tokens_seen": 321396476, "step": 5126 }, { "epoch": 17.061564059900167, "grad_norm": 13.253432273864746, "learning_rate": 5e-06, "loss": 0.3247, "num_input_tokens_seen": 321458552, "step": 5127 }, { "epoch": 17.061564059900167, "loss": 0.2516024112701416, "loss_ce": 1.551098466734402e-05, "loss_iou": 0.0986328125, "loss_num": 0.0108642578125, "loss_xval": 0.251953125, "num_input_tokens_seen": 321458552, "step": 5127 }, { "epoch": 17.064891846921796, "grad_norm": 13.741621017456055, "learning_rate": 5e-06, "loss": 0.2967, "num_input_tokens_seen": 321521340, "step": 5128 }, { "epoch": 17.064891846921796, "loss": 0.4693618416786194, "loss_ce": 1.4708641629113117e-06, "loss_iou": 0.1904296875, "loss_num": 0.017822265625, "loss_xval": 0.46875, "num_input_tokens_seen": 321521340, "step": 5128 }, { "epoch": 17.06821963394343, "grad_norm": 5.921903133392334, "learning_rate": 5e-06, "loss": 0.6427, "num_input_tokens_seen": 321583980, "step": 5129 }, { "epoch": 17.06821963394343, "loss": 0.5147785544395447, "loss_ce": 8.056022124947049e-06, "loss_iou": 0.1953125, "loss_num": 0.02490234375, "loss_xval": 0.515625, "num_input_tokens_seen": 321583980, "step": 5129 }, { "epoch": 17.071547420965057, "grad_norm": 10.264476776123047, "learning_rate": 5e-06, "loss": 0.3122, "num_input_tokens_seen": 321644012, "step": 5130 }, { "epoch": 17.071547420965057, "loss": 0.1961430460214615, "loss_ce": 6.574641702172812e-06, "loss_iou": 0.0693359375, "loss_num": 0.01141357421875, "loss_xval": 0.1962890625, "num_input_tokens_seen": 321644012, "step": 5130 }, { "epoch": 17.07487520798669, "grad_norm": 24.237855911254883, "learning_rate": 5e-06, "loss": 0.5205, "num_input_tokens_seen": 321707352, "step": 5131 }, { "epoch": 17.07487520798669, "loss": 0.4172380566596985, "loss_ce": 1.7275418713325053e-06, "loss_iou": 0.1904296875, "loss_num": 0.00738525390625, "loss_xval": 0.41796875, "num_input_tokens_seen": 321707352, "step": 5131 }, { "epoch": 17.07820299500832, "grad_norm": 30.848896026611328, "learning_rate": 5e-06, "loss": 0.548, "num_input_tokens_seen": 321770324, "step": 5132 }, { "epoch": 17.07820299500832, "loss": 0.3760085105895996, "loss_ce": 1.4380366337718442e-06, "loss_iou": 0.150390625, "loss_num": 0.01519775390625, "loss_xval": 0.376953125, "num_input_tokens_seen": 321770324, "step": 5132 }, { "epoch": 17.08153078202995, "grad_norm": 7.806191444396973, "learning_rate": 5e-06, "loss": 0.4436, "num_input_tokens_seen": 321833380, "step": 5133 }, { "epoch": 17.08153078202995, "loss": 0.3707963228225708, "loss_ce": 7.770120646455325e-06, "loss_iou": 0.1630859375, "loss_num": 0.009033203125, "loss_xval": 0.37109375, "num_input_tokens_seen": 321833380, "step": 5133 }, { "epoch": 17.08485856905158, "grad_norm": 9.16798210144043, "learning_rate": 5e-06, "loss": 0.4501, "num_input_tokens_seen": 321895912, "step": 5134 }, { "epoch": 17.08485856905158, "loss": 0.5765398144721985, "loss_ce": 1.7368868157063844e-06, "loss_iou": 0.228515625, "loss_num": 0.02392578125, "loss_xval": 0.578125, "num_input_tokens_seen": 321895912, "step": 5134 }, { "epoch": 17.088186356073212, "grad_norm": 6.992307662963867, "learning_rate": 5e-06, "loss": 0.295, "num_input_tokens_seen": 321958184, "step": 5135 }, { "epoch": 17.088186356073212, "loss": 0.3067125678062439, "loss_ce": 4.1437509935349226e-05, "loss_iou": 0.1083984375, "loss_num": 0.017822265625, "loss_xval": 0.306640625, "num_input_tokens_seen": 321958184, "step": 5135 }, { "epoch": 17.09151414309484, "grad_norm": 23.75404167175293, "learning_rate": 5e-06, "loss": 0.4364, "num_input_tokens_seen": 322021636, "step": 5136 }, { "epoch": 17.09151414309484, "loss": 0.4159616231918335, "loss_ce": 7.058309165586252e-06, "loss_iou": 0.189453125, "loss_num": 0.007415771484375, "loss_xval": 0.416015625, "num_input_tokens_seen": 322021636, "step": 5136 }, { "epoch": 17.094841930116473, "grad_norm": 28.893320083618164, "learning_rate": 5e-06, "loss": 0.3031, "num_input_tokens_seen": 322084500, "step": 5137 }, { "epoch": 17.094841930116473, "loss": 0.4141853451728821, "loss_ce": 7.790636686877406e-07, "loss_iou": 0.1728515625, "loss_num": 0.013671875, "loss_xval": 0.4140625, "num_input_tokens_seen": 322084500, "step": 5137 }, { "epoch": 17.098169717138102, "grad_norm": 21.08132553100586, "learning_rate": 5e-06, "loss": 0.3929, "num_input_tokens_seen": 322145808, "step": 5138 }, { "epoch": 17.098169717138102, "loss": 0.3905068039894104, "loss_ce": 3.8747230064473115e-06, "loss_iou": 0.1357421875, "loss_num": 0.023681640625, "loss_xval": 0.390625, "num_input_tokens_seen": 322145808, "step": 5138 }, { "epoch": 17.101497504159735, "grad_norm": 20.970327377319336, "learning_rate": 5e-06, "loss": 0.4935, "num_input_tokens_seen": 322209324, "step": 5139 }, { "epoch": 17.101497504159735, "loss": 0.3792762756347656, "loss_ce": 3.8013756693544565e-06, "loss_iou": 0.1123046875, "loss_num": 0.031005859375, "loss_xval": 0.37890625, "num_input_tokens_seen": 322209324, "step": 5139 }, { "epoch": 17.104825291181363, "grad_norm": 15.921540260314941, "learning_rate": 5e-06, "loss": 0.4677, "num_input_tokens_seen": 322272048, "step": 5140 }, { "epoch": 17.104825291181363, "loss": 0.2816477417945862, "loss_ce": 1.0284268228133442e-06, "loss_iou": 0.10595703125, "loss_num": 0.01385498046875, "loss_xval": 0.28125, "num_input_tokens_seen": 322272048, "step": 5140 }, { "epoch": 17.108153078202996, "grad_norm": 6.733375549316406, "learning_rate": 5e-06, "loss": 0.485, "num_input_tokens_seen": 322335332, "step": 5141 }, { "epoch": 17.108153078202996, "loss": 0.46838587522506714, "loss_ce": 2.07367020266247e-06, "loss_iou": 0.19140625, "loss_num": 0.0172119140625, "loss_xval": 0.46875, "num_input_tokens_seen": 322335332, "step": 5141 }, { "epoch": 17.111480865224625, "grad_norm": 57.457359313964844, "learning_rate": 5e-06, "loss": 0.473, "num_input_tokens_seen": 322396528, "step": 5142 }, { "epoch": 17.111480865224625, "loss": 0.613532304763794, "loss_ce": 6.944704182387795e-06, "loss_iou": 0.248046875, "loss_num": 0.0235595703125, "loss_xval": 0.61328125, "num_input_tokens_seen": 322396528, "step": 5142 }, { "epoch": 17.114808652246257, "grad_norm": 26.291250228881836, "learning_rate": 5e-06, "loss": 0.5332, "num_input_tokens_seen": 322461652, "step": 5143 }, { "epoch": 17.114808652246257, "loss": 0.3930387496948242, "loss_ce": 9.441882866667584e-05, "loss_iou": 0.1630859375, "loss_num": 0.0133056640625, "loss_xval": 0.392578125, "num_input_tokens_seen": 322461652, "step": 5143 }, { "epoch": 17.118136439267886, "grad_norm": 24.185014724731445, "learning_rate": 5e-06, "loss": 0.5087, "num_input_tokens_seen": 322524200, "step": 5144 }, { "epoch": 17.118136439267886, "loss": 0.6146265268325806, "loss_ce": 2.539848082960816e-06, "loss_iou": 0.2333984375, "loss_num": 0.02978515625, "loss_xval": 0.61328125, "num_input_tokens_seen": 322524200, "step": 5144 }, { "epoch": 17.12146422628952, "grad_norm": 16.371007919311523, "learning_rate": 5e-06, "loss": 0.4536, "num_input_tokens_seen": 322587244, "step": 5145 }, { "epoch": 17.12146422628952, "loss": 0.4623546600341797, "loss_ce": 1.337716184934834e-05, "loss_iou": 0.17578125, "loss_num": 0.0223388671875, "loss_xval": 0.462890625, "num_input_tokens_seen": 322587244, "step": 5145 }, { "epoch": 17.124792013311147, "grad_norm": 18.825742721557617, "learning_rate": 5e-06, "loss": 0.3721, "num_input_tokens_seen": 322650516, "step": 5146 }, { "epoch": 17.124792013311147, "loss": 0.4929826259613037, "loss_ce": 1.6733145002945093e-06, "loss_iou": 0.19921875, "loss_num": 0.0191650390625, "loss_xval": 0.4921875, "num_input_tokens_seen": 322650516, "step": 5146 }, { "epoch": 17.12811980033278, "grad_norm": 11.467921257019043, "learning_rate": 5e-06, "loss": 0.2604, "num_input_tokens_seen": 322712960, "step": 5147 }, { "epoch": 17.12811980033278, "loss": 0.27170053124427795, "loss_ce": 2.5457816263951827e-06, "loss_iou": 0.1181640625, "loss_num": 0.00701904296875, "loss_xval": 0.271484375, "num_input_tokens_seen": 322712960, "step": 5147 }, { "epoch": 17.13144758735441, "grad_norm": 9.905827522277832, "learning_rate": 5e-06, "loss": 0.3151, "num_input_tokens_seen": 322776660, "step": 5148 }, { "epoch": 17.13144758735441, "loss": 0.2962043285369873, "loss_ce": 7.167435569499503e-07, "loss_iou": 0.1357421875, "loss_num": 0.00482177734375, "loss_xval": 0.296875, "num_input_tokens_seen": 322776660, "step": 5148 }, { "epoch": 17.13477537437604, "grad_norm": 29.79977035522461, "learning_rate": 5e-06, "loss": 0.4287, "num_input_tokens_seen": 322839956, "step": 5149 }, { "epoch": 17.13477537437604, "loss": 0.48560163378715515, "loss_ce": 5.907671038585249e-06, "loss_iou": 0.2060546875, "loss_num": 0.0147705078125, "loss_xval": 0.486328125, "num_input_tokens_seen": 322839956, "step": 5149 }, { "epoch": 17.13810316139767, "grad_norm": 27.57647132873535, "learning_rate": 5e-06, "loss": 0.3799, "num_input_tokens_seen": 322901724, "step": 5150 }, { "epoch": 17.13810316139767, "loss": 0.36727994680404663, "loss_ce": 8.9029884975389e-07, "loss_iou": 0.1376953125, "loss_num": 0.0184326171875, "loss_xval": 0.3671875, "num_input_tokens_seen": 322901724, "step": 5150 }, { "epoch": 17.141430948419302, "grad_norm": 5.96712064743042, "learning_rate": 5e-06, "loss": 0.4911, "num_input_tokens_seen": 322965352, "step": 5151 }, { "epoch": 17.141430948419302, "loss": 0.6883897185325623, "loss_ce": 0.0007065673707984388, "loss_iou": 0.275390625, "loss_num": 0.0274658203125, "loss_xval": 0.6875, "num_input_tokens_seen": 322965352, "step": 5151 }, { "epoch": 17.14475873544093, "grad_norm": 8.100406646728516, "learning_rate": 5e-06, "loss": 0.3996, "num_input_tokens_seen": 323027104, "step": 5152 }, { "epoch": 17.14475873544093, "loss": 0.3391129970550537, "loss_ce": 1.663368493609596e-06, "loss_iou": 0.12109375, "loss_num": 0.0194091796875, "loss_xval": 0.33984375, "num_input_tokens_seen": 323027104, "step": 5152 }, { "epoch": 17.148086522462563, "grad_norm": 12.480849266052246, "learning_rate": 5e-06, "loss": 0.5348, "num_input_tokens_seen": 323090712, "step": 5153 }, { "epoch": 17.148086522462563, "loss": 0.6685832738876343, "loss_ce": 4.190861545794178e-06, "loss_iou": 0.28125, "loss_num": 0.0211181640625, "loss_xval": 0.66796875, "num_input_tokens_seen": 323090712, "step": 5153 }, { "epoch": 17.151414309484192, "grad_norm": 10.188639640808105, "learning_rate": 5e-06, "loss": 0.3705, "num_input_tokens_seen": 323154532, "step": 5154 }, { "epoch": 17.151414309484192, "loss": 0.2332768440246582, "loss_ce": 4.863447315983649e-07, "loss_iou": 0.0966796875, "loss_num": 0.00799560546875, "loss_xval": 0.2333984375, "num_input_tokens_seen": 323154532, "step": 5154 }, { "epoch": 17.154742096505824, "grad_norm": 13.716565132141113, "learning_rate": 5e-06, "loss": 0.5795, "num_input_tokens_seen": 323217852, "step": 5155 }, { "epoch": 17.154742096505824, "loss": 0.6058973670005798, "loss_ce": 1.3912961094320053e-06, "loss_iou": 0.23828125, "loss_num": 0.0257568359375, "loss_xval": 0.60546875, "num_input_tokens_seen": 323217852, "step": 5155 }, { "epoch": 17.158069883527453, "grad_norm": 20.95253562927246, "learning_rate": 5e-06, "loss": 0.5769, "num_input_tokens_seen": 323282292, "step": 5156 }, { "epoch": 17.158069883527453, "loss": 0.6691977977752686, "loss_ce": 8.367552254640032e-06, "loss_iou": 0.26171875, "loss_num": 0.029052734375, "loss_xval": 0.66796875, "num_input_tokens_seen": 323282292, "step": 5156 }, { "epoch": 17.161397670549086, "grad_norm": 17.44427490234375, "learning_rate": 5e-06, "loss": 0.3037, "num_input_tokens_seen": 323343512, "step": 5157 }, { "epoch": 17.161397670549086, "loss": 0.4254181385040283, "loss_ce": 3.0920646167942323e-06, "loss_iou": 0.1689453125, "loss_num": 0.017578125, "loss_xval": 0.42578125, "num_input_tokens_seen": 323343512, "step": 5157 }, { "epoch": 17.164725457570714, "grad_norm": 14.21406364440918, "learning_rate": 5e-06, "loss": 0.4895, "num_input_tokens_seen": 323407048, "step": 5158 }, { "epoch": 17.164725457570714, "loss": 0.5142525434494019, "loss_ce": 8.469987733406015e-07, "loss_iou": 0.1982421875, "loss_num": 0.0234375, "loss_xval": 0.515625, "num_input_tokens_seen": 323407048, "step": 5158 }, { "epoch": 17.168053244592347, "grad_norm": 27.292280197143555, "learning_rate": 5e-06, "loss": 0.4666, "num_input_tokens_seen": 323470508, "step": 5159 }, { "epoch": 17.168053244592347, "loss": 0.439392626285553, "loss_ce": 5.444484258987359e-07, "loss_iou": 0.1865234375, "loss_num": 0.01348876953125, "loss_xval": 0.439453125, "num_input_tokens_seen": 323470508, "step": 5159 }, { "epoch": 17.171381031613976, "grad_norm": 29.259719848632812, "learning_rate": 5e-06, "loss": 0.3028, "num_input_tokens_seen": 323532592, "step": 5160 }, { "epoch": 17.171381031613976, "loss": 0.22906561195850372, "loss_ce": 6.718904614899657e-07, "loss_iou": 0.0966796875, "loss_num": 0.007080078125, "loss_xval": 0.2294921875, "num_input_tokens_seen": 323532592, "step": 5160 }, { "epoch": 17.174708818635608, "grad_norm": 19.25179100036621, "learning_rate": 5e-06, "loss": 0.588, "num_input_tokens_seen": 323596112, "step": 5161 }, { "epoch": 17.174708818635608, "loss": 0.713747501373291, "loss_ce": 2.410487923043547e-06, "loss_iou": 0.2890625, "loss_num": 0.02734375, "loss_xval": 0.71484375, "num_input_tokens_seen": 323596112, "step": 5161 }, { "epoch": 17.178036605657237, "grad_norm": 11.479451179504395, "learning_rate": 5e-06, "loss": 0.328, "num_input_tokens_seen": 323657696, "step": 5162 }, { "epoch": 17.178036605657237, "loss": 0.27490508556365967, "loss_ce": 2.741277285167598e-06, "loss_iou": 0.10888671875, "loss_num": 0.0113525390625, "loss_xval": 0.275390625, "num_input_tokens_seen": 323657696, "step": 5162 }, { "epoch": 17.18136439267887, "grad_norm": 27.213930130004883, "learning_rate": 5e-06, "loss": 0.5404, "num_input_tokens_seen": 323719580, "step": 5163 }, { "epoch": 17.18136439267887, "loss": 0.41266000270843506, "loss_ce": 1.3297247960508685e-06, "loss_iou": 0.1279296875, "loss_num": 0.03125, "loss_xval": 0.412109375, "num_input_tokens_seen": 323719580, "step": 5163 }, { "epoch": 17.184692179700498, "grad_norm": 25.2376708984375, "learning_rate": 5e-06, "loss": 0.2264, "num_input_tokens_seen": 323782476, "step": 5164 }, { "epoch": 17.184692179700498, "loss": 0.30896052718162537, "loss_ce": 5.838946890435182e-07, "loss_iou": 0.1240234375, "loss_num": 0.01220703125, "loss_xval": 0.30859375, "num_input_tokens_seen": 323782476, "step": 5164 }, { "epoch": 17.18801996672213, "grad_norm": 9.4364595413208, "learning_rate": 5e-06, "loss": 0.2444, "num_input_tokens_seen": 323843864, "step": 5165 }, { "epoch": 17.18801996672213, "loss": 0.1926586627960205, "loss_ce": 1.1914538617929793e-06, "loss_iou": 0.0830078125, "loss_num": 0.00531005859375, "loss_xval": 0.1923828125, "num_input_tokens_seen": 323843864, "step": 5165 }, { "epoch": 17.19134775374376, "grad_norm": 20.40345573425293, "learning_rate": 5e-06, "loss": 0.4467, "num_input_tokens_seen": 323907008, "step": 5166 }, { "epoch": 17.19134775374376, "loss": 0.4491657614707947, "loss_ce": 8.030867320485413e-06, "loss_iou": 0.1796875, "loss_num": 0.0181884765625, "loss_xval": 0.44921875, "num_input_tokens_seen": 323907008, "step": 5166 }, { "epoch": 17.19467554076539, "grad_norm": 14.743691444396973, "learning_rate": 5e-06, "loss": 0.548, "num_input_tokens_seen": 323970656, "step": 5167 }, { "epoch": 17.19467554076539, "loss": 0.5529096126556396, "loss_ce": 0.00017519619723316282, "loss_iou": 0.21875, "loss_num": 0.023193359375, "loss_xval": 0.5546875, "num_input_tokens_seen": 323970656, "step": 5167 }, { "epoch": 17.19800332778702, "grad_norm": 16.974348068237305, "learning_rate": 5e-06, "loss": 0.3241, "num_input_tokens_seen": 324034280, "step": 5168 }, { "epoch": 17.19800332778702, "loss": 0.32000836730003357, "loss_ce": 1.0363638693888788e-06, "loss_iou": 0.1328125, "loss_num": 0.010986328125, "loss_xval": 0.3203125, "num_input_tokens_seen": 324034280, "step": 5168 }, { "epoch": 17.201331114808653, "grad_norm": 37.42784118652344, "learning_rate": 5e-06, "loss": 0.5322, "num_input_tokens_seen": 324098040, "step": 5169 }, { "epoch": 17.201331114808653, "loss": 0.6588148474693298, "loss_ce": 1.3632095487992046e-06, "loss_iou": 0.2373046875, "loss_num": 0.037109375, "loss_xval": 0.66015625, "num_input_tokens_seen": 324098040, "step": 5169 }, { "epoch": 17.204658901830282, "grad_norm": 38.66635513305664, "learning_rate": 5e-06, "loss": 0.807, "num_input_tokens_seen": 324161316, "step": 5170 }, { "epoch": 17.204658901830282, "loss": 0.8529070615768433, "loss_ce": 1.7419765754311811e-06, "loss_iou": 0.33203125, "loss_num": 0.037841796875, "loss_xval": 0.8515625, "num_input_tokens_seen": 324161316, "step": 5170 }, { "epoch": 17.207986688851914, "grad_norm": 21.087589263916016, "learning_rate": 5e-06, "loss": 0.3863, "num_input_tokens_seen": 324223296, "step": 5171 }, { "epoch": 17.207986688851914, "loss": 0.24130715429782867, "loss_ce": 4.661086222768063e-06, "loss_iou": 0.09130859375, "loss_num": 0.0118408203125, "loss_xval": 0.2412109375, "num_input_tokens_seen": 324223296, "step": 5171 }, { "epoch": 17.211314475873543, "grad_norm": 18.43759536743164, "learning_rate": 5e-06, "loss": 0.3766, "num_input_tokens_seen": 324285984, "step": 5172 }, { "epoch": 17.211314475873543, "loss": 0.34704458713531494, "loss_ce": 5.970542042632587e-05, "loss_iou": 0.146484375, "loss_num": 0.01080322265625, "loss_xval": 0.34765625, "num_input_tokens_seen": 324285984, "step": 5172 }, { "epoch": 17.214642262895175, "grad_norm": 20.0036563873291, "learning_rate": 5e-06, "loss": 0.4528, "num_input_tokens_seen": 324349244, "step": 5173 }, { "epoch": 17.214642262895175, "loss": 0.5139172077178955, "loss_ce": 1.1917998108401662e-06, "loss_iou": 0.197265625, "loss_num": 0.023681640625, "loss_xval": 0.515625, "num_input_tokens_seen": 324349244, "step": 5173 }, { "epoch": 17.217970049916804, "grad_norm": 18.522794723510742, "learning_rate": 5e-06, "loss": 0.4642, "num_input_tokens_seen": 324411424, "step": 5174 }, { "epoch": 17.217970049916804, "loss": 0.571465253829956, "loss_ce": 5.41253830306232e-05, "loss_iou": 0.236328125, "loss_num": 0.019775390625, "loss_xval": 0.5703125, "num_input_tokens_seen": 324411424, "step": 5174 }, { "epoch": 17.221297836938437, "grad_norm": 9.47696304321289, "learning_rate": 5e-06, "loss": 0.3447, "num_input_tokens_seen": 324474232, "step": 5175 }, { "epoch": 17.221297836938437, "loss": 0.36497700214385986, "loss_ce": 2.048560418188572e-06, "loss_iou": 0.1416015625, "loss_num": 0.0164794921875, "loss_xval": 0.365234375, "num_input_tokens_seen": 324474232, "step": 5175 }, { "epoch": 17.224625623960065, "grad_norm": 17.35675048828125, "learning_rate": 5e-06, "loss": 0.4915, "num_input_tokens_seen": 324536752, "step": 5176 }, { "epoch": 17.224625623960065, "loss": 0.5123029947280884, "loss_ce": 3.4941433113999665e-05, "loss_iou": 0.1630859375, "loss_num": 0.037109375, "loss_xval": 0.51171875, "num_input_tokens_seen": 324536752, "step": 5176 }, { "epoch": 17.227953410981698, "grad_norm": 25.020675659179688, "learning_rate": 5e-06, "loss": 0.3548, "num_input_tokens_seen": 324599056, "step": 5177 }, { "epoch": 17.227953410981698, "loss": 0.25482386350631714, "loss_ce": 2.095846411975799e-06, "loss_iou": 0.08203125, "loss_num": 0.0181884765625, "loss_xval": 0.25390625, "num_input_tokens_seen": 324599056, "step": 5177 }, { "epoch": 17.231281198003327, "grad_norm": 24.63246726989746, "learning_rate": 5e-06, "loss": 0.5919, "num_input_tokens_seen": 324661472, "step": 5178 }, { "epoch": 17.231281198003327, "loss": 0.7328352332115173, "loss_ce": 1.3640529914482613e-06, "loss_iou": 0.23046875, "loss_num": 0.053955078125, "loss_xval": 0.734375, "num_input_tokens_seen": 324661472, "step": 5178 }, { "epoch": 17.23460898502496, "grad_norm": 27.784902572631836, "learning_rate": 5e-06, "loss": 0.5064, "num_input_tokens_seen": 324723988, "step": 5179 }, { "epoch": 17.23460898502496, "loss": 0.3922172486782074, "loss_ce": 5.340738425729796e-06, "loss_iou": 0.162109375, "loss_num": 0.013671875, "loss_xval": 0.392578125, "num_input_tokens_seen": 324723988, "step": 5179 }, { "epoch": 17.237936772046588, "grad_norm": 26.43584442138672, "learning_rate": 5e-06, "loss": 0.3874, "num_input_tokens_seen": 324787220, "step": 5180 }, { "epoch": 17.237936772046588, "loss": 0.5053203105926514, "loss_ce": 1.0288039447914343e-05, "loss_iou": 0.189453125, "loss_num": 0.025146484375, "loss_xval": 0.50390625, "num_input_tokens_seen": 324787220, "step": 5180 }, { "epoch": 17.24126455906822, "grad_norm": 25.62998390197754, "learning_rate": 5e-06, "loss": 0.5717, "num_input_tokens_seen": 324850704, "step": 5181 }, { "epoch": 17.24126455906822, "loss": 0.7143615484237671, "loss_ce": 6.028941243130248e-06, "loss_iou": 0.287109375, "loss_num": 0.0279541015625, "loss_xval": 0.71484375, "num_input_tokens_seen": 324850704, "step": 5181 }, { "epoch": 17.24459234608985, "grad_norm": 19.89073944091797, "learning_rate": 5e-06, "loss": 0.5089, "num_input_tokens_seen": 324914320, "step": 5182 }, { "epoch": 17.24459234608985, "loss": 0.591728925704956, "loss_ce": 0.00029822898795828223, "loss_iou": 0.2412109375, "loss_num": 0.02197265625, "loss_xval": 0.58984375, "num_input_tokens_seen": 324914320, "step": 5182 }, { "epoch": 17.24792013311148, "grad_norm": 10.303661346435547, "learning_rate": 5e-06, "loss": 0.6833, "num_input_tokens_seen": 324977060, "step": 5183 }, { "epoch": 17.24792013311148, "loss": 0.7089880704879761, "loss_ce": 6.468694482464343e-05, "loss_iou": 0.27734375, "loss_num": 0.0311279296875, "loss_xval": 0.70703125, "num_input_tokens_seen": 324977060, "step": 5183 }, { "epoch": 17.25124792013311, "grad_norm": 9.2535400390625, "learning_rate": 5e-06, "loss": 0.3369, "num_input_tokens_seen": 325040308, "step": 5184 }, { "epoch": 17.25124792013311, "loss": 0.23749621212482452, "loss_ce": 8.421797247137874e-06, "loss_iou": 0.0791015625, "loss_num": 0.015869140625, "loss_xval": 0.2373046875, "num_input_tokens_seen": 325040308, "step": 5184 }, { "epoch": 17.254575707154743, "grad_norm": 11.946436882019043, "learning_rate": 5e-06, "loss": 0.3446, "num_input_tokens_seen": 325103320, "step": 5185 }, { "epoch": 17.254575707154743, "loss": 0.3717045187950134, "loss_ce": 4.0961788272397825e-07, "loss_iou": 0.1435546875, "loss_num": 0.016845703125, "loss_xval": 0.37109375, "num_input_tokens_seen": 325103320, "step": 5185 }, { "epoch": 17.25790349417637, "grad_norm": 10.761635780334473, "learning_rate": 5e-06, "loss": 0.3641, "num_input_tokens_seen": 325166656, "step": 5186 }, { "epoch": 17.25790349417637, "loss": 0.4559341073036194, "loss_ce": 1.4777978094571154e-06, "loss_iou": 0.1767578125, "loss_num": 0.020263671875, "loss_xval": 0.455078125, "num_input_tokens_seen": 325166656, "step": 5186 }, { "epoch": 17.261231281198004, "grad_norm": 44.027671813964844, "learning_rate": 5e-06, "loss": 0.5306, "num_input_tokens_seen": 325230136, "step": 5187 }, { "epoch": 17.261231281198004, "loss": 0.4698207378387451, "loss_ce": 2.622027068355237e-06, "loss_iou": 0.189453125, "loss_num": 0.01806640625, "loss_xval": 0.470703125, "num_input_tokens_seen": 325230136, "step": 5187 }, { "epoch": 17.264559068219633, "grad_norm": 39.574344635009766, "learning_rate": 5e-06, "loss": 0.6061, "num_input_tokens_seen": 325294244, "step": 5188 }, { "epoch": 17.264559068219633, "loss": 0.6720069646835327, "loss_ce": 9.8758300737245e-06, "loss_iou": 0.2578125, "loss_num": 0.03173828125, "loss_xval": 0.671875, "num_input_tokens_seen": 325294244, "step": 5188 }, { "epoch": 17.267886855241265, "grad_norm": 6.557857990264893, "learning_rate": 5e-06, "loss": 0.2871, "num_input_tokens_seen": 325356128, "step": 5189 }, { "epoch": 17.267886855241265, "loss": 0.3260357975959778, "loss_ce": 1.2738872783302213e-06, "loss_iou": 0.138671875, "loss_num": 0.00958251953125, "loss_xval": 0.326171875, "num_input_tokens_seen": 325356128, "step": 5189 }, { "epoch": 17.271214642262894, "grad_norm": 14.88914966583252, "learning_rate": 5e-06, "loss": 0.5999, "num_input_tokens_seen": 325420596, "step": 5190 }, { "epoch": 17.271214642262894, "loss": 0.7992908954620361, "loss_ce": 5.005302227800712e-06, "loss_iou": 0.3359375, "loss_num": 0.025634765625, "loss_xval": 0.80078125, "num_input_tokens_seen": 325420596, "step": 5190 }, { "epoch": 17.274542429284526, "grad_norm": 22.650285720825195, "learning_rate": 5e-06, "loss": 0.3697, "num_input_tokens_seen": 325481916, "step": 5191 }, { "epoch": 17.274542429284526, "loss": 0.4372412860393524, "loss_ce": 6.840519972683978e-07, "loss_iou": 0.1806640625, "loss_num": 0.01495361328125, "loss_xval": 0.4375, "num_input_tokens_seen": 325481916, "step": 5191 }, { "epoch": 17.277870216306155, "grad_norm": 37.88779830932617, "learning_rate": 5e-06, "loss": 0.3855, "num_input_tokens_seen": 325543548, "step": 5192 }, { "epoch": 17.277870216306155, "loss": 0.4340234100818634, "loss_ce": 2.3930642782943323e-06, "loss_iou": 0.193359375, "loss_num": 0.00958251953125, "loss_xval": 0.43359375, "num_input_tokens_seen": 325543548, "step": 5192 }, { "epoch": 17.281198003327788, "grad_norm": 27.242406845092773, "learning_rate": 5e-06, "loss": 0.4705, "num_input_tokens_seen": 325606072, "step": 5193 }, { "epoch": 17.281198003327788, "loss": 0.5710461735725403, "loss_ce": 1.2617913398571545e-06, "loss_iou": 0.224609375, "loss_num": 0.024658203125, "loss_xval": 0.5703125, "num_input_tokens_seen": 325606072, "step": 5193 }, { "epoch": 17.284525790349416, "grad_norm": 13.523588180541992, "learning_rate": 5e-06, "loss": 0.3943, "num_input_tokens_seen": 325668224, "step": 5194 }, { "epoch": 17.284525790349416, "loss": 0.43756628036499023, "loss_ce": 5.259684712655144e-06, "loss_iou": 0.15234375, "loss_num": 0.0263671875, "loss_xval": 0.4375, "num_input_tokens_seen": 325668224, "step": 5194 }, { "epoch": 17.28785357737105, "grad_norm": 14.60245132446289, "learning_rate": 5e-06, "loss": 0.4542, "num_input_tokens_seen": 325730732, "step": 5195 }, { "epoch": 17.28785357737105, "loss": 0.5477582216262817, "loss_ce": 2.8749827833962627e-05, "loss_iou": 0.2109375, "loss_num": 0.025390625, "loss_xval": 0.546875, "num_input_tokens_seen": 325730732, "step": 5195 }, { "epoch": 17.291181364392678, "grad_norm": 35.69125747680664, "learning_rate": 5e-06, "loss": 0.5228, "num_input_tokens_seen": 325794832, "step": 5196 }, { "epoch": 17.291181364392678, "loss": 0.5046468377113342, "loss_ce": 8.183243153325748e-06, "loss_iou": 0.220703125, "loss_num": 0.01239013671875, "loss_xval": 0.50390625, "num_input_tokens_seen": 325794832, "step": 5196 }, { "epoch": 17.29450915141431, "grad_norm": 29.476451873779297, "learning_rate": 5e-06, "loss": 0.4475, "num_input_tokens_seen": 325857688, "step": 5197 }, { "epoch": 17.29450915141431, "loss": 0.49475350975990295, "loss_ce": 2.529899575165473e-06, "loss_iou": 0.16015625, "loss_num": 0.034912109375, "loss_xval": 0.494140625, "num_input_tokens_seen": 325857688, "step": 5197 }, { "epoch": 17.29783693843594, "grad_norm": 17.20514678955078, "learning_rate": 5e-06, "loss": 0.5406, "num_input_tokens_seen": 325920700, "step": 5198 }, { "epoch": 17.29783693843594, "loss": 0.5989240407943726, "loss_ce": 1.2945635035066516e-06, "loss_iou": 0.255859375, "loss_num": 0.017578125, "loss_xval": 0.59765625, "num_input_tokens_seen": 325920700, "step": 5198 }, { "epoch": 17.30116472545757, "grad_norm": 9.681623458862305, "learning_rate": 5e-06, "loss": 0.3502, "num_input_tokens_seen": 325982584, "step": 5199 }, { "epoch": 17.30116472545757, "loss": 0.4028504490852356, "loss_ce": 1.844116923166439e-05, "loss_iou": 0.1533203125, "loss_num": 0.01953125, "loss_xval": 0.40234375, "num_input_tokens_seen": 325982584, "step": 5199 }, { "epoch": 17.3044925124792, "grad_norm": 25.481966018676758, "learning_rate": 5e-06, "loss": 0.6041, "num_input_tokens_seen": 326047200, "step": 5200 }, { "epoch": 17.3044925124792, "loss": 0.5847219228744507, "loss_ce": 5.121863978274632e-06, "loss_iou": 0.220703125, "loss_num": 0.0284423828125, "loss_xval": 0.5859375, "num_input_tokens_seen": 326047200, "step": 5200 }, { "epoch": 17.307820299500833, "grad_norm": 10.774380683898926, "learning_rate": 5e-06, "loss": 0.4487, "num_input_tokens_seen": 326109580, "step": 5201 }, { "epoch": 17.307820299500833, "loss": 0.4340221881866455, "loss_ce": 1.174371391243767e-06, "loss_iou": 0.1767578125, "loss_num": 0.0162353515625, "loss_xval": 0.43359375, "num_input_tokens_seen": 326109580, "step": 5201 }, { "epoch": 17.31114808652246, "grad_norm": 15.889208793640137, "learning_rate": 5e-06, "loss": 0.2616, "num_input_tokens_seen": 326172208, "step": 5202 }, { "epoch": 17.31114808652246, "loss": 0.26352378726005554, "loss_ce": 4.49306526206783e-06, "loss_iou": 0.09375, "loss_num": 0.01513671875, "loss_xval": 0.263671875, "num_input_tokens_seen": 326172208, "step": 5202 }, { "epoch": 17.314475873544094, "grad_norm": 11.727590560913086, "learning_rate": 5e-06, "loss": 0.5721, "num_input_tokens_seen": 326234840, "step": 5203 }, { "epoch": 17.314475873544094, "loss": 0.5645520687103271, "loss_ce": 7.37255049898522e-06, "loss_iou": 0.22265625, "loss_num": 0.0235595703125, "loss_xval": 0.56640625, "num_input_tokens_seen": 326234840, "step": 5203 }, { "epoch": 17.317803660565723, "grad_norm": 12.65495491027832, "learning_rate": 5e-06, "loss": 0.3676, "num_input_tokens_seen": 326298132, "step": 5204 }, { "epoch": 17.317803660565723, "loss": 0.4500996470451355, "loss_ce": 2.64057052845601e-05, "loss_iou": 0.1748046875, "loss_num": 0.02001953125, "loss_xval": 0.44921875, "num_input_tokens_seen": 326298132, "step": 5204 }, { "epoch": 17.321131447587355, "grad_norm": 18.60923194885254, "learning_rate": 5e-06, "loss": 0.3995, "num_input_tokens_seen": 326361780, "step": 5205 }, { "epoch": 17.321131447587355, "loss": 0.3598640263080597, "loss_ce": 7.448423957612249e-07, "loss_iou": 0.14453125, "loss_num": 0.01416015625, "loss_xval": 0.359375, "num_input_tokens_seen": 326361780, "step": 5205 }, { "epoch": 17.324459234608984, "grad_norm": 16.6984920501709, "learning_rate": 5e-06, "loss": 0.5879, "num_input_tokens_seen": 326423720, "step": 5206 }, { "epoch": 17.324459234608984, "loss": 0.42267343401908875, "loss_ce": 4.985188297723653e-06, "loss_iou": 0.15234375, "loss_num": 0.023681640625, "loss_xval": 0.421875, "num_input_tokens_seen": 326423720, "step": 5206 }, { "epoch": 17.327787021630616, "grad_norm": 5.96605920791626, "learning_rate": 5e-06, "loss": 0.3708, "num_input_tokens_seen": 326486548, "step": 5207 }, { "epoch": 17.327787021630616, "loss": 0.48236173391342163, "loss_ce": 9.014665920403786e-07, "loss_iou": 0.1953125, "loss_num": 0.018310546875, "loss_xval": 0.482421875, "num_input_tokens_seen": 326486548, "step": 5207 }, { "epoch": 17.331114808652245, "grad_norm": 104.482421875, "learning_rate": 5e-06, "loss": 0.4656, "num_input_tokens_seen": 326550164, "step": 5208 }, { "epoch": 17.331114808652245, "loss": 0.5733043551445007, "loss_ce": 1.1461233953014016e-06, "loss_iou": 0.25, "loss_num": 0.01434326171875, "loss_xval": 0.57421875, "num_input_tokens_seen": 326550164, "step": 5208 }, { "epoch": 17.334442595673877, "grad_norm": 19.933574676513672, "learning_rate": 5e-06, "loss": 0.449, "num_input_tokens_seen": 326612540, "step": 5209 }, { "epoch": 17.334442595673877, "loss": 0.34643685817718506, "loss_ce": 1.3167152701498708e-06, "loss_iou": 0.09375, "loss_num": 0.031982421875, "loss_xval": 0.345703125, "num_input_tokens_seen": 326612540, "step": 5209 }, { "epoch": 17.337770382695506, "grad_norm": 6.201373100280762, "learning_rate": 5e-06, "loss": 0.3152, "num_input_tokens_seen": 326675304, "step": 5210 }, { "epoch": 17.337770382695506, "loss": 0.23529183864593506, "loss_ce": 1.3113946124576614e-06, "loss_iou": 0.087890625, "loss_num": 0.0118408203125, "loss_xval": 0.2353515625, "num_input_tokens_seen": 326675304, "step": 5210 }, { "epoch": 17.34109816971714, "grad_norm": 13.553428649902344, "learning_rate": 5e-06, "loss": 0.3998, "num_input_tokens_seen": 326736624, "step": 5211 }, { "epoch": 17.34109816971714, "loss": 0.3413120210170746, "loss_ce": 3.4406061786285136e-06, "loss_iou": 0.10986328125, "loss_num": 0.0242919921875, "loss_xval": 0.341796875, "num_input_tokens_seen": 326736624, "step": 5211 }, { "epoch": 17.344425956738768, "grad_norm": 22.718942642211914, "learning_rate": 5e-06, "loss": 0.2409, "num_input_tokens_seen": 326797916, "step": 5212 }, { "epoch": 17.344425956738768, "loss": 0.3258276879787445, "loss_ce": 2.2014708520146087e-05, "loss_iou": 0.123046875, "loss_num": 0.015869140625, "loss_xval": 0.326171875, "num_input_tokens_seen": 326797916, "step": 5212 }, { "epoch": 17.3477537437604, "grad_norm": 18.45446014404297, "learning_rate": 5e-06, "loss": 0.4517, "num_input_tokens_seen": 326861748, "step": 5213 }, { "epoch": 17.3477537437604, "loss": 0.39288610219955444, "loss_ce": 6.38228448224254e-05, "loss_iou": 0.154296875, "loss_num": 0.016845703125, "loss_xval": 0.392578125, "num_input_tokens_seen": 326861748, "step": 5213 }, { "epoch": 17.35108153078203, "grad_norm": 10.030011177062988, "learning_rate": 5e-06, "loss": 0.3181, "num_input_tokens_seen": 326923836, "step": 5214 }, { "epoch": 17.35108153078203, "loss": 0.26803725957870483, "loss_ce": 1.3745527667197166e-06, "loss_iou": 0.061279296875, "loss_num": 0.029052734375, "loss_xval": 0.267578125, "num_input_tokens_seen": 326923836, "step": 5214 }, { "epoch": 17.35440931780366, "grad_norm": 12.139204978942871, "learning_rate": 5e-06, "loss": 0.4638, "num_input_tokens_seen": 326986012, "step": 5215 }, { "epoch": 17.35440931780366, "loss": 0.431166410446167, "loss_ce": 0.00019716336100827903, "loss_iou": 0.1689453125, "loss_num": 0.018798828125, "loss_xval": 0.431640625, "num_input_tokens_seen": 326986012, "step": 5215 }, { "epoch": 17.35773710482529, "grad_norm": 14.406109809875488, "learning_rate": 5e-06, "loss": 0.4071, "num_input_tokens_seen": 327049160, "step": 5216 }, { "epoch": 17.35773710482529, "loss": 0.32003533840179443, "loss_ce": 8.90472874743864e-05, "loss_iou": 0.142578125, "loss_num": 0.006805419921875, "loss_xval": 0.3203125, "num_input_tokens_seen": 327049160, "step": 5216 }, { "epoch": 17.361064891846922, "grad_norm": 8.158205032348633, "learning_rate": 5e-06, "loss": 0.3735, "num_input_tokens_seen": 327110940, "step": 5217 }, { "epoch": 17.361064891846922, "loss": 0.3191857933998108, "loss_ce": 2.47031948674703e-06, "loss_iou": 0.12109375, "loss_num": 0.01531982421875, "loss_xval": 0.318359375, "num_input_tokens_seen": 327110940, "step": 5217 }, { "epoch": 17.36439267886855, "grad_norm": 11.254800796508789, "learning_rate": 5e-06, "loss": 0.574, "num_input_tokens_seen": 327174924, "step": 5218 }, { "epoch": 17.36439267886855, "loss": 0.7076123952865601, "loss_ce": 7.763196481391788e-05, "loss_iou": 0.271484375, "loss_num": 0.033203125, "loss_xval": 0.70703125, "num_input_tokens_seen": 327174924, "step": 5218 }, { "epoch": 17.367720465890184, "grad_norm": 10.545318603515625, "learning_rate": 5e-06, "loss": 0.299, "num_input_tokens_seen": 327238380, "step": 5219 }, { "epoch": 17.367720465890184, "loss": 0.3509540557861328, "loss_ce": 1.8900502709584543e-06, "loss_iou": 0.138671875, "loss_num": 0.0147705078125, "loss_xval": 0.3515625, "num_input_tokens_seen": 327238380, "step": 5219 }, { "epoch": 17.371048252911812, "grad_norm": 23.697633743286133, "learning_rate": 5e-06, "loss": 0.2951, "num_input_tokens_seen": 327299800, "step": 5220 }, { "epoch": 17.371048252911812, "loss": 0.3206194043159485, "loss_ce": 1.7619478285268997e-06, "loss_iou": 0.125, "loss_num": 0.0140380859375, "loss_xval": 0.3203125, "num_input_tokens_seen": 327299800, "step": 5220 }, { "epoch": 17.374376039933445, "grad_norm": 29.627988815307617, "learning_rate": 5e-06, "loss": 0.5429, "num_input_tokens_seen": 327362356, "step": 5221 }, { "epoch": 17.374376039933445, "loss": 0.7048365473747253, "loss_ce": 2.582338083811919e-06, "loss_iou": 0.287109375, "loss_num": 0.0257568359375, "loss_xval": 0.703125, "num_input_tokens_seen": 327362356, "step": 5221 }, { "epoch": 17.377703826955074, "grad_norm": 18.653696060180664, "learning_rate": 5e-06, "loss": 0.342, "num_input_tokens_seen": 327424844, "step": 5222 }, { "epoch": 17.377703826955074, "loss": 0.36563313007354736, "loss_ce": 2.0424090507731307e-06, "loss_iou": 0.158203125, "loss_num": 0.00982666015625, "loss_xval": 0.365234375, "num_input_tokens_seen": 327424844, "step": 5222 }, { "epoch": 17.381031613976706, "grad_norm": 15.347935676574707, "learning_rate": 5e-06, "loss": 0.3931, "num_input_tokens_seen": 327486532, "step": 5223 }, { "epoch": 17.381031613976706, "loss": 0.5020913481712341, "loss_ce": 1.615176552149933e-05, "loss_iou": 0.2021484375, "loss_num": 0.019775390625, "loss_xval": 0.50390625, "num_input_tokens_seen": 327486532, "step": 5223 }, { "epoch": 17.384359400998335, "grad_norm": 33.03142166137695, "learning_rate": 5e-06, "loss": 0.3998, "num_input_tokens_seen": 327550780, "step": 5224 }, { "epoch": 17.384359400998335, "loss": 0.44361385703086853, "loss_ce": 4.086824264959432e-05, "loss_iou": 0.19921875, "loss_num": 0.00897216796875, "loss_xval": 0.443359375, "num_input_tokens_seen": 327550780, "step": 5224 }, { "epoch": 17.387687188019967, "grad_norm": 28.59859275817871, "learning_rate": 5e-06, "loss": 0.2934, "num_input_tokens_seen": 327611700, "step": 5225 }, { "epoch": 17.387687188019967, "loss": 0.26367270946502686, "loss_ce": 8.448604944533145e-07, "loss_iou": 0.1025390625, "loss_num": 0.01165771484375, "loss_xval": 0.263671875, "num_input_tokens_seen": 327611700, "step": 5225 }, { "epoch": 17.391014975041596, "grad_norm": 18.408966064453125, "learning_rate": 5e-06, "loss": 0.5677, "num_input_tokens_seen": 327674056, "step": 5226 }, { "epoch": 17.391014975041596, "loss": 0.6223187446594238, "loss_ce": 4.2711212699941825e-06, "loss_iou": 0.2216796875, "loss_num": 0.03564453125, "loss_xval": 0.62109375, "num_input_tokens_seen": 327674056, "step": 5226 }, { "epoch": 17.39434276206323, "grad_norm": 18.160627365112305, "learning_rate": 5e-06, "loss": 0.5598, "num_input_tokens_seen": 327737820, "step": 5227 }, { "epoch": 17.39434276206323, "loss": 0.36633822321891785, "loss_ce": 5.206645710131852e-06, "loss_iou": 0.1611328125, "loss_num": 0.0087890625, "loss_xval": 0.3671875, "num_input_tokens_seen": 327737820, "step": 5227 }, { "epoch": 17.397670549084857, "grad_norm": 10.695967674255371, "learning_rate": 5e-06, "loss": 0.5151, "num_input_tokens_seen": 327800840, "step": 5228 }, { "epoch": 17.397670549084857, "loss": 0.5350536108016968, "loss_ce": 1.944965697475709e-05, "loss_iou": 0.2021484375, "loss_num": 0.0260009765625, "loss_xval": 0.53515625, "num_input_tokens_seen": 327800840, "step": 5228 }, { "epoch": 17.40099833610649, "grad_norm": 17.61517906188965, "learning_rate": 5e-06, "loss": 0.4444, "num_input_tokens_seen": 327862156, "step": 5229 }, { "epoch": 17.40099833610649, "loss": 0.45108920335769653, "loss_ce": 6.993546412559226e-05, "loss_iou": 0.19140625, "loss_num": 0.0135498046875, "loss_xval": 0.451171875, "num_input_tokens_seen": 327862156, "step": 5229 }, { "epoch": 17.40432612312812, "grad_norm": 22.116912841796875, "learning_rate": 5e-06, "loss": 0.4255, "num_input_tokens_seen": 327923820, "step": 5230 }, { "epoch": 17.40432612312812, "loss": 0.45622318983078003, "loss_ce": 6.59118995827157e-07, "loss_iou": 0.1796875, "loss_num": 0.0191650390625, "loss_xval": 0.45703125, "num_input_tokens_seen": 327923820, "step": 5230 }, { "epoch": 17.40765391014975, "grad_norm": 29.182451248168945, "learning_rate": 5e-06, "loss": 0.4424, "num_input_tokens_seen": 327987936, "step": 5231 }, { "epoch": 17.40765391014975, "loss": 0.2847602069377899, "loss_ce": 6.938332717254525e-07, "loss_iou": 0.1171875, "loss_num": 0.01007080078125, "loss_xval": 0.28515625, "num_input_tokens_seen": 327987936, "step": 5231 }, { "epoch": 17.41098169717138, "grad_norm": 14.087891578674316, "learning_rate": 5e-06, "loss": 0.313, "num_input_tokens_seen": 328048688, "step": 5232 }, { "epoch": 17.41098169717138, "loss": 0.3832436501979828, "loss_ce": 3.8996558942017145e-06, "loss_iou": 0.150390625, "loss_num": 0.0167236328125, "loss_xval": 0.3828125, "num_input_tokens_seen": 328048688, "step": 5232 }, { "epoch": 17.414309484193012, "grad_norm": 19.87963104248047, "learning_rate": 5e-06, "loss": 0.3057, "num_input_tokens_seen": 328111708, "step": 5233 }, { "epoch": 17.414309484193012, "loss": 0.2922998070716858, "loss_ce": 2.4642340576974675e-06, "loss_iou": 0.10595703125, "loss_num": 0.0159912109375, "loss_xval": 0.29296875, "num_input_tokens_seen": 328111708, "step": 5233 }, { "epoch": 17.41763727121464, "grad_norm": 31.209617614746094, "learning_rate": 5e-06, "loss": 0.3606, "num_input_tokens_seen": 328174460, "step": 5234 }, { "epoch": 17.41763727121464, "loss": 0.29617369174957275, "loss_ce": 5.883057383471169e-07, "loss_iou": 0.1181640625, "loss_num": 0.011962890625, "loss_xval": 0.296875, "num_input_tokens_seen": 328174460, "step": 5234 }, { "epoch": 17.420965058236273, "grad_norm": 48.550926208496094, "learning_rate": 5e-06, "loss": 0.527, "num_input_tokens_seen": 328236756, "step": 5235 }, { "epoch": 17.420965058236273, "loss": 0.3076418340206146, "loss_ce": 9.386756573803723e-06, "loss_iou": 0.13671875, "loss_num": 0.006622314453125, "loss_xval": 0.30859375, "num_input_tokens_seen": 328236756, "step": 5235 }, { "epoch": 17.424292845257902, "grad_norm": 22.760982513427734, "learning_rate": 5e-06, "loss": 0.4162, "num_input_tokens_seen": 328300000, "step": 5236 }, { "epoch": 17.424292845257902, "loss": 0.2834879755973816, "loss_ce": 1.019607952912338e-05, "loss_iou": 0.12890625, "loss_num": 0.00531005859375, "loss_xval": 0.283203125, "num_input_tokens_seen": 328300000, "step": 5236 }, { "epoch": 17.427620632279535, "grad_norm": 17.64752960205078, "learning_rate": 5e-06, "loss": 0.5974, "num_input_tokens_seen": 328361968, "step": 5237 }, { "epoch": 17.427620632279535, "loss": 0.6206694841384888, "loss_ce": 3.006249926329474e-06, "loss_iou": 0.248046875, "loss_num": 0.02490234375, "loss_xval": 0.62109375, "num_input_tokens_seen": 328361968, "step": 5237 }, { "epoch": 17.430948419301163, "grad_norm": 20.09499168395996, "learning_rate": 5e-06, "loss": 0.4241, "num_input_tokens_seen": 328423800, "step": 5238 }, { "epoch": 17.430948419301163, "loss": 0.42549842596054077, "loss_ce": 2.2342770535033196e-05, "loss_iou": 0.181640625, "loss_num": 0.01263427734375, "loss_xval": 0.42578125, "num_input_tokens_seen": 328423800, "step": 5238 }, { "epoch": 17.434276206322796, "grad_norm": 21.52111053466797, "learning_rate": 5e-06, "loss": 0.3196, "num_input_tokens_seen": 328487020, "step": 5239 }, { "epoch": 17.434276206322796, "loss": 0.24713219702243805, "loss_ce": 8.486185834044591e-07, "loss_iou": 0.08544921875, "loss_num": 0.01513671875, "loss_xval": 0.2470703125, "num_input_tokens_seen": 328487020, "step": 5239 }, { "epoch": 17.437603993344425, "grad_norm": 28.47797966003418, "learning_rate": 5e-06, "loss": 0.6061, "num_input_tokens_seen": 328548472, "step": 5240 }, { "epoch": 17.437603993344425, "loss": 0.7801520824432373, "loss_ce": 6.675172699033283e-07, "loss_iou": 0.29296875, "loss_num": 0.038818359375, "loss_xval": 0.78125, "num_input_tokens_seen": 328548472, "step": 5240 }, { "epoch": 17.440931780366057, "grad_norm": 35.67027282714844, "learning_rate": 5e-06, "loss": 0.3899, "num_input_tokens_seen": 328610736, "step": 5241 }, { "epoch": 17.440931780366057, "loss": 0.38324031233787537, "loss_ce": 5.538452683140349e-07, "loss_iou": 0.1572265625, "loss_num": 0.0137939453125, "loss_xval": 0.3828125, "num_input_tokens_seen": 328610736, "step": 5241 }, { "epoch": 17.444259567387686, "grad_norm": 16.331022262573242, "learning_rate": 5e-06, "loss": 0.5282, "num_input_tokens_seen": 328672164, "step": 5242 }, { "epoch": 17.444259567387686, "loss": 0.4294658899307251, "loss_ce": 2.254640821774956e-05, "loss_iou": 0.1806640625, "loss_num": 0.0137939453125, "loss_xval": 0.4296875, "num_input_tokens_seen": 328672164, "step": 5242 }, { "epoch": 17.44758735440932, "grad_norm": 19.6800537109375, "learning_rate": 5e-06, "loss": 0.5694, "num_input_tokens_seen": 328735580, "step": 5243 }, { "epoch": 17.44758735440932, "loss": 0.6951924562454224, "loss_ce": 2.0336892703198828e-06, "loss_iou": 0.22265625, "loss_num": 0.050048828125, "loss_xval": 0.6953125, "num_input_tokens_seen": 328735580, "step": 5243 }, { "epoch": 17.450915141430947, "grad_norm": 17.769723892211914, "learning_rate": 5e-06, "loss": 0.5106, "num_input_tokens_seen": 328798884, "step": 5244 }, { "epoch": 17.450915141430947, "loss": 0.3205583393573761, "loss_ce": 1.6770322872616816e-06, "loss_iou": 0.126953125, "loss_num": 0.0135498046875, "loss_xval": 0.3203125, "num_input_tokens_seen": 328798884, "step": 5244 }, { "epoch": 17.45424292845258, "grad_norm": 13.198070526123047, "learning_rate": 5e-06, "loss": 0.5123, "num_input_tokens_seen": 328860424, "step": 5245 }, { "epoch": 17.45424292845258, "loss": 0.39673519134521484, "loss_ce": 6.692878287140047e-06, "loss_iou": 0.15625, "loss_num": 0.0169677734375, "loss_xval": 0.396484375, "num_input_tokens_seen": 328860424, "step": 5245 }, { "epoch": 17.45757071547421, "grad_norm": 23.805025100708008, "learning_rate": 5e-06, "loss": 0.5771, "num_input_tokens_seen": 328924416, "step": 5246 }, { "epoch": 17.45757071547421, "loss": 0.6622447967529297, "loss_ce": 1.3328874956641812e-05, "loss_iou": 0.2373046875, "loss_num": 0.03759765625, "loss_xval": 0.6640625, "num_input_tokens_seen": 328924416, "step": 5246 }, { "epoch": 17.46089850249584, "grad_norm": 15.694345474243164, "learning_rate": 5e-06, "loss": 0.2907, "num_input_tokens_seen": 328984876, "step": 5247 }, { "epoch": 17.46089850249584, "loss": 0.16622993350028992, "loss_ce": 0.0001227513130288571, "loss_iou": 0.0634765625, "loss_num": 0.007781982421875, "loss_xval": 0.166015625, "num_input_tokens_seen": 328984876, "step": 5247 }, { "epoch": 17.46422628951747, "grad_norm": 20.317230224609375, "learning_rate": 5e-06, "loss": 0.4473, "num_input_tokens_seen": 329048956, "step": 5248 }, { "epoch": 17.46422628951747, "loss": 0.40811365842819214, "loss_ce": 2.0847255655098706e-06, "loss_iou": 0.1484375, "loss_num": 0.022216796875, "loss_xval": 0.408203125, "num_input_tokens_seen": 329048956, "step": 5248 }, { "epoch": 17.467554076539102, "grad_norm": 16.11625862121582, "learning_rate": 5e-06, "loss": 0.3765, "num_input_tokens_seen": 329112488, "step": 5249 }, { "epoch": 17.467554076539102, "loss": 0.3850210905075073, "loss_ce": 1.1313621143926866e-05, "loss_iou": 0.1455078125, "loss_num": 0.01904296875, "loss_xval": 0.384765625, "num_input_tokens_seen": 329112488, "step": 5249 }, { "epoch": 17.47088186356073, "grad_norm": 25.6210994720459, "learning_rate": 5e-06, "loss": 0.5148, "num_input_tokens_seen": 329175328, "step": 5250 }, { "epoch": 17.47088186356073, "eval_seeclick_CIoU": 0.030929828993976116, "eval_seeclick_GIoU": 0.02613657945767045, "eval_seeclick_IoU": 0.1626487672328949, "eval_seeclick_MAE_all": 0.17657573521137238, "eval_seeclick_MAE_h": 0.07398315146565437, "eval_seeclick_MAE_w": 0.14084036648273468, "eval_seeclick_MAE_x_boxes": 0.21346064656972885, "eval_seeclick_MAE_y_boxes": 0.19054647535085678, "eval_seeclick_NUM_probability": 0.9999781250953674, "eval_seeclick_inside_bbox": 0.16250000149011612, "eval_seeclick_loss": 3.0528504848480225, "eval_seeclick_loss_ce": 0.1729920133948326, "eval_seeclick_loss_iou": 0.9951171875, "eval_seeclick_loss_num": 0.177215576171875, "eval_seeclick_loss_xval": 2.875, "eval_seeclick_runtime": 66.9388, "eval_seeclick_samples_per_second": 0.702, "eval_seeclick_steps_per_second": 0.03, "num_input_tokens_seen": 329175328, "step": 5250 }, { "epoch": 17.47088186356073, "eval_icons_CIoU": -0.059963636100292206, "eval_icons_GIoU": 0.026690708473324776, "eval_icons_IoU": 0.11738575994968414, "eval_icons_MAE_all": 0.19122713804244995, "eval_icons_MAE_h": 0.17263149470090866, "eval_icons_MAE_w": 0.19689606130123138, "eval_icons_MAE_x_boxes": 0.14205481112003326, "eval_icons_MAE_y_boxes": 0.09236417338252068, "eval_icons_NUM_probability": 0.9999946653842926, "eval_icons_inside_bbox": 0.1805555559694767, "eval_icons_loss": 2.8350014686584473, "eval_icons_loss_ce": 1.7977297375182388e-06, "eval_icons_loss_iou": 0.96533203125, "eval_icons_loss_num": 0.18316650390625, "eval_icons_loss_xval": 2.84765625, "eval_icons_runtime": 70.3132, "eval_icons_samples_per_second": 0.711, "eval_icons_steps_per_second": 0.028, "num_input_tokens_seen": 329175328, "step": 5250 }, { "epoch": 17.47088186356073, "eval_screenspot_CIoU": 0.1838892251253128, "eval_screenspot_GIoU": 0.2160656750202179, "eval_screenspot_IoU": 0.2960560421148936, "eval_screenspot_MAE_all": 0.1129742090900739, "eval_screenspot_MAE_h": 0.05915581559141477, "eval_screenspot_MAE_w": 0.09793054560820262, "eval_screenspot_MAE_x_boxes": 0.15827348828315735, "eval_screenspot_MAE_y_boxes": 0.08573991805315018, "eval_screenspot_NUM_probability": 0.9999958078066508, "eval_screenspot_inside_bbox": 0.512500007947286, "eval_screenspot_loss": 2.1688921451568604, "eval_screenspot_loss_ce": 1.6430532620385445e-06, "eval_screenspot_loss_iou": 0.79541015625, "eval_screenspot_loss_num": 0.12130228678385417, "eval_screenspot_loss_xval": 2.1964518229166665, "eval_screenspot_runtime": 121.9763, "eval_screenspot_samples_per_second": 0.73, "eval_screenspot_steps_per_second": 0.025, "num_input_tokens_seen": 329175328, "step": 5250 }, { "epoch": 17.47088186356073, "eval_compot_CIoU": 0.15451280400156975, "eval_compot_GIoU": 0.20240101218223572, "eval_compot_IoU": 0.28323571383953094, "eval_compot_MAE_all": 0.13637571036815643, "eval_compot_MAE_h": 0.059580570086836815, "eval_compot_MAE_w": 0.1351865977048874, "eval_compot_MAE_x_boxes": 0.12425283342599869, "eval_compot_MAE_y_boxes": 0.11449575796723366, "eval_compot_NUM_probability": 0.9999974966049194, "eval_compot_inside_bbox": 0.4131944477558136, "eval_compot_loss": 2.250957727432251, "eval_compot_loss_ce": 0.006611439632251859, "eval_compot_loss_iou": 0.7991943359375, "eval_compot_loss_num": 0.13860321044921875, "eval_compot_loss_xval": 2.29296875, "eval_compot_runtime": 67.3786, "eval_compot_samples_per_second": 0.742, "eval_compot_steps_per_second": 0.03, "num_input_tokens_seen": 329175328, "step": 5250 }, { "epoch": 17.47088186356073, "eval_custom_ui_MAE_all": 0.06264181807637215, "eval_custom_ui_MAE_x": 0.07278245873749256, "eval_custom_ui_MAE_y": 0.05250117555260658, "eval_custom_ui_NUM_probability": 0.9999988675117493, "eval_custom_ui_loss": 0.30785277485847473, "eval_custom_ui_loss_ce": 1.1389308838261059e-06, "eval_custom_ui_loss_num": 0.0662994384765625, "eval_custom_ui_loss_xval": 0.331451416015625, "eval_custom_ui_runtime": 52.1851, "eval_custom_ui_samples_per_second": 0.958, "eval_custom_ui_steps_per_second": 0.038, "num_input_tokens_seen": 329175328, "step": 5250 }, { "epoch": 17.47088186356073, "loss": 0.338441401720047, "loss_ce": 1.4570041457773186e-06, "loss_iou": 0.0, "loss_num": 0.06787109375, "loss_xval": 0.337890625, "num_input_tokens_seen": 329175328, "step": 5250 }, { "epoch": 17.474209650582363, "grad_norm": 31.477426528930664, "learning_rate": 5e-06, "loss": 0.4692, "num_input_tokens_seen": 329238980, "step": 5251 }, { "epoch": 17.474209650582363, "loss": 0.48669737577438354, "loss_ce": 3.070897946599871e-06, "loss_iou": 0.1904296875, "loss_num": 0.0211181640625, "loss_xval": 0.486328125, "num_input_tokens_seen": 329238980, "step": 5251 }, { "epoch": 17.477537437603992, "grad_norm": 25.192201614379883, "learning_rate": 5e-06, "loss": 0.1954, "num_input_tokens_seen": 329300628, "step": 5252 }, { "epoch": 17.477537437603992, "loss": 0.15831124782562256, "loss_ce": 1.3129338185535744e-06, "loss_iou": 0.05908203125, "loss_num": 0.008056640625, "loss_xval": 0.158203125, "num_input_tokens_seen": 329300628, "step": 5252 }, { "epoch": 17.480865224625624, "grad_norm": 21.297687530517578, "learning_rate": 5e-06, "loss": 0.4229, "num_input_tokens_seen": 329361544, "step": 5253 }, { "epoch": 17.480865224625624, "loss": 0.5285671353340149, "loss_ce": 2.7203998342884006e-06, "loss_iou": 0.232421875, "loss_num": 0.01263427734375, "loss_xval": 0.52734375, "num_input_tokens_seen": 329361544, "step": 5253 }, { "epoch": 17.484193011647253, "grad_norm": 10.639823913574219, "learning_rate": 5e-06, "loss": 0.3314, "num_input_tokens_seen": 329421204, "step": 5254 }, { "epoch": 17.484193011647253, "loss": 0.342874139547348, "loss_ce": 9.161248271993827e-06, "loss_iou": 0.1416015625, "loss_num": 0.011962890625, "loss_xval": 0.34375, "num_input_tokens_seen": 329421204, "step": 5254 }, { "epoch": 17.487520798668886, "grad_norm": 9.069801330566406, "learning_rate": 5e-06, "loss": 0.5121, "num_input_tokens_seen": 329482840, "step": 5255 }, { "epoch": 17.487520798668886, "loss": 0.44409555196762085, "loss_ce": 3.7763952605018858e-06, "loss_iou": 0.146484375, "loss_num": 0.0303955078125, "loss_xval": 0.443359375, "num_input_tokens_seen": 329482840, "step": 5255 }, { "epoch": 17.490848585690514, "grad_norm": 9.98929214477539, "learning_rate": 5e-06, "loss": 0.3865, "num_input_tokens_seen": 329545996, "step": 5256 }, { "epoch": 17.490848585690514, "loss": 0.26636114716529846, "loss_ce": 3.7222971513983794e-06, "loss_iou": 0.12060546875, "loss_num": 0.004974365234375, "loss_xval": 0.265625, "num_input_tokens_seen": 329545996, "step": 5256 }, { "epoch": 17.494176372712147, "grad_norm": 16.09063720703125, "learning_rate": 5e-06, "loss": 0.4877, "num_input_tokens_seen": 329608944, "step": 5257 }, { "epoch": 17.494176372712147, "loss": 0.38176143169403076, "loss_ce": 1.785800009201921e-06, "loss_iou": 0.1513671875, "loss_num": 0.0157470703125, "loss_xval": 0.380859375, "num_input_tokens_seen": 329608944, "step": 5257 }, { "epoch": 17.497504159733776, "grad_norm": 7.165801525115967, "learning_rate": 5e-06, "loss": 0.3304, "num_input_tokens_seen": 329671072, "step": 5258 }, { "epoch": 17.497504159733776, "loss": 0.5116599202156067, "loss_ce": 2.188636017308454e-06, "loss_iou": 0.1982421875, "loss_num": 0.0230712890625, "loss_xval": 0.51171875, "num_input_tokens_seen": 329671072, "step": 5258 }, { "epoch": 17.500831946755408, "grad_norm": 6.728553295135498, "learning_rate": 5e-06, "loss": 0.2274, "num_input_tokens_seen": 329733124, "step": 5259 }, { "epoch": 17.500831946755408, "loss": 0.2466435730457306, "loss_ce": 4.940803819408757e-07, "loss_iou": 0.1044921875, "loss_num": 0.007598876953125, "loss_xval": 0.2470703125, "num_input_tokens_seen": 329733124, "step": 5259 }, { "epoch": 17.504159733777037, "grad_norm": 11.854421615600586, "learning_rate": 5e-06, "loss": 0.3562, "num_input_tokens_seen": 329795344, "step": 5260 }, { "epoch": 17.504159733777037, "loss": 0.20152872800827026, "loss_ce": 2.1168723833397962e-05, "loss_iou": 0.04345703125, "loss_num": 0.02294921875, "loss_xval": 0.201171875, "num_input_tokens_seen": 329795344, "step": 5260 }, { "epoch": 17.50748752079867, "grad_norm": 12.918737411499023, "learning_rate": 5e-06, "loss": 0.4014, "num_input_tokens_seen": 329856896, "step": 5261 }, { "epoch": 17.50748752079867, "loss": 0.4470846354961395, "loss_ce": 2.1309551812009886e-06, "loss_iou": 0.169921875, "loss_num": 0.0216064453125, "loss_xval": 0.447265625, "num_input_tokens_seen": 329856896, "step": 5261 }, { "epoch": 17.510815307820298, "grad_norm": 5.695866584777832, "learning_rate": 5e-06, "loss": 0.3439, "num_input_tokens_seen": 329919904, "step": 5262 }, { "epoch": 17.510815307820298, "loss": 0.34613120555877686, "loss_ce": 8.237145721068373e-07, "loss_iou": 0.146484375, "loss_num": 0.01055908203125, "loss_xval": 0.345703125, "num_input_tokens_seen": 329919904, "step": 5262 }, { "epoch": 17.51414309484193, "grad_norm": 5.647043228149414, "learning_rate": 5e-06, "loss": 0.3277, "num_input_tokens_seen": 329982356, "step": 5263 }, { "epoch": 17.51414309484193, "loss": 0.19061362743377686, "loss_ce": 8.382686473851209e-07, "loss_iou": 0.05615234375, "loss_num": 0.0157470703125, "loss_xval": 0.1904296875, "num_input_tokens_seen": 329982356, "step": 5263 }, { "epoch": 17.51747088186356, "grad_norm": 14.886981964111328, "learning_rate": 5e-06, "loss": 0.3902, "num_input_tokens_seen": 330046448, "step": 5264 }, { "epoch": 17.51747088186356, "loss": 0.39064306020736694, "loss_ce": 1.8057364286505617e-05, "loss_iou": 0.169921875, "loss_num": 0.0101318359375, "loss_xval": 0.390625, "num_input_tokens_seen": 330046448, "step": 5264 }, { "epoch": 17.52079866888519, "grad_norm": 14.154172897338867, "learning_rate": 5e-06, "loss": 0.3252, "num_input_tokens_seen": 330109480, "step": 5265 }, { "epoch": 17.52079866888519, "loss": 0.37559065222740173, "loss_ce": 1.0811608262883965e-05, "loss_iou": 0.140625, "loss_num": 0.0186767578125, "loss_xval": 0.375, "num_input_tokens_seen": 330109480, "step": 5265 }, { "epoch": 17.52412645590682, "grad_norm": 19.094663619995117, "learning_rate": 5e-06, "loss": 0.4607, "num_input_tokens_seen": 330172228, "step": 5266 }, { "epoch": 17.52412645590682, "loss": 0.3225419521331787, "loss_ce": 1.6610296142971492e-06, "loss_iou": 0.12109375, "loss_num": 0.01611328125, "loss_xval": 0.322265625, "num_input_tokens_seen": 330172228, "step": 5266 }, { "epoch": 17.527454242928453, "grad_norm": 12.401460647583008, "learning_rate": 5e-06, "loss": 0.4148, "num_input_tokens_seen": 330234656, "step": 5267 }, { "epoch": 17.527454242928453, "loss": 0.5582373142242432, "loss_ce": 9.755009159562178e-06, "loss_iou": 0.216796875, "loss_num": 0.024658203125, "loss_xval": 0.55859375, "num_input_tokens_seen": 330234656, "step": 5267 }, { "epoch": 17.530782029950082, "grad_norm": 10.930788040161133, "learning_rate": 5e-06, "loss": 0.3401, "num_input_tokens_seen": 330295164, "step": 5268 }, { "epoch": 17.530782029950082, "loss": 0.3559882342815399, "loss_ce": 6.772862093384902e-07, "loss_iou": 0.1337890625, "loss_num": 0.017822265625, "loss_xval": 0.35546875, "num_input_tokens_seen": 330295164, "step": 5268 }, { "epoch": 17.534109816971714, "grad_norm": 16.232711791992188, "learning_rate": 5e-06, "loss": 0.4858, "num_input_tokens_seen": 330358832, "step": 5269 }, { "epoch": 17.534109816971714, "loss": 0.6108428835868835, "loss_ce": 3.001377535838401e-06, "loss_iou": 0.2333984375, "loss_num": 0.029052734375, "loss_xval": 0.609375, "num_input_tokens_seen": 330358832, "step": 5269 }, { "epoch": 17.537437603993343, "grad_norm": 11.100198745727539, "learning_rate": 5e-06, "loss": 0.4039, "num_input_tokens_seen": 330419280, "step": 5270 }, { "epoch": 17.537437603993343, "loss": 0.2066662311553955, "loss_ce": 1.2016295158900903e-06, "loss_iou": 0.072265625, "loss_num": 0.01239013671875, "loss_xval": 0.20703125, "num_input_tokens_seen": 330419280, "step": 5270 }, { "epoch": 17.540765391014975, "grad_norm": 10.327312469482422, "learning_rate": 5e-06, "loss": 0.4428, "num_input_tokens_seen": 330481640, "step": 5271 }, { "epoch": 17.540765391014975, "loss": 0.4257820248603821, "loss_ce": 7.712733918197046e-07, "loss_iou": 0.1689453125, "loss_num": 0.017822265625, "loss_xval": 0.42578125, "num_input_tokens_seen": 330481640, "step": 5271 }, { "epoch": 17.544093178036604, "grad_norm": 13.230838775634766, "learning_rate": 5e-06, "loss": 0.4374, "num_input_tokens_seen": 330544080, "step": 5272 }, { "epoch": 17.544093178036604, "loss": 0.6838397979736328, "loss_ce": 1.9084850464423653e-06, "loss_iou": 0.25390625, "loss_num": 0.035400390625, "loss_xval": 0.68359375, "num_input_tokens_seen": 330544080, "step": 5272 }, { "epoch": 17.547420965058237, "grad_norm": 11.635852813720703, "learning_rate": 5e-06, "loss": 0.4052, "num_input_tokens_seen": 330605656, "step": 5273 }, { "epoch": 17.547420965058237, "loss": 0.3304617404937744, "loss_ce": 2.1421833480417263e-06, "loss_iou": 0.1376953125, "loss_num": 0.01092529296875, "loss_xval": 0.330078125, "num_input_tokens_seen": 330605656, "step": 5273 }, { "epoch": 17.550748752079866, "grad_norm": 19.466041564941406, "learning_rate": 5e-06, "loss": 0.7676, "num_input_tokens_seen": 330668264, "step": 5274 }, { "epoch": 17.550748752079866, "loss": 0.5778310298919678, "loss_ce": 1.123331458074972e-05, "loss_iou": 0.1728515625, "loss_num": 0.046630859375, "loss_xval": 0.578125, "num_input_tokens_seen": 330668264, "step": 5274 }, { "epoch": 17.554076539101498, "grad_norm": 20.100914001464844, "learning_rate": 5e-06, "loss": 0.4422, "num_input_tokens_seen": 330732564, "step": 5275 }, { "epoch": 17.554076539101498, "loss": 0.45712706446647644, "loss_ce": 4.265655661583878e-06, "loss_iou": 0.1806640625, "loss_num": 0.0189208984375, "loss_xval": 0.45703125, "num_input_tokens_seen": 330732564, "step": 5275 }, { "epoch": 17.557404326123127, "grad_norm": 14.495187759399414, "learning_rate": 5e-06, "loss": 0.3767, "num_input_tokens_seen": 330795240, "step": 5276 }, { "epoch": 17.557404326123127, "loss": 0.4863285422325134, "loss_ce": 4.384355065667478e-07, "loss_iou": 0.2177734375, "loss_num": 0.01025390625, "loss_xval": 0.486328125, "num_input_tokens_seen": 330795240, "step": 5276 }, { "epoch": 17.56073211314476, "grad_norm": 21.667510986328125, "learning_rate": 5e-06, "loss": 0.2431, "num_input_tokens_seen": 330856636, "step": 5277 }, { "epoch": 17.56073211314476, "loss": 0.2904106676578522, "loss_ce": 5.3906160246697254e-06, "loss_iou": 0.11181640625, "loss_num": 0.0133056640625, "loss_xval": 0.291015625, "num_input_tokens_seen": 330856636, "step": 5277 }, { "epoch": 17.564059900166388, "grad_norm": 42.383331298828125, "learning_rate": 5e-06, "loss": 0.333, "num_input_tokens_seen": 330920036, "step": 5278 }, { "epoch": 17.564059900166388, "loss": 0.3577752709388733, "loss_ce": 1.7708247469272465e-05, "loss_iou": 0.146484375, "loss_num": 0.01300048828125, "loss_xval": 0.357421875, "num_input_tokens_seen": 330920036, "step": 5278 }, { "epoch": 17.56738768718802, "grad_norm": 32.12909698486328, "learning_rate": 5e-06, "loss": 0.3149, "num_input_tokens_seen": 330982732, "step": 5279 }, { "epoch": 17.56738768718802, "loss": 0.28942933678627014, "loss_ce": 6.214621066646941e-07, "loss_iou": 0.119140625, "loss_num": 0.01025390625, "loss_xval": 0.2890625, "num_input_tokens_seen": 330982732, "step": 5279 }, { "epoch": 17.57071547420965, "grad_norm": 7.586840629577637, "learning_rate": 5e-06, "loss": 0.2694, "num_input_tokens_seen": 331045684, "step": 5280 }, { "epoch": 17.57071547420965, "loss": 0.2616608738899231, "loss_ce": 3.137478870485211e-06, "loss_iou": 0.07373046875, "loss_num": 0.022705078125, "loss_xval": 0.26171875, "num_input_tokens_seen": 331045684, "step": 5280 }, { "epoch": 17.57404326123128, "grad_norm": 8.01975154876709, "learning_rate": 5e-06, "loss": 0.278, "num_input_tokens_seen": 331108324, "step": 5281 }, { "epoch": 17.57404326123128, "loss": 0.4222525954246521, "loss_ce": 1.139631694968557e-05, "loss_iou": 0.177734375, "loss_num": 0.01348876953125, "loss_xval": 0.421875, "num_input_tokens_seen": 331108324, "step": 5281 }, { "epoch": 17.57737104825291, "grad_norm": 8.302288055419922, "learning_rate": 5e-06, "loss": 0.3083, "num_input_tokens_seen": 331171748, "step": 5282 }, { "epoch": 17.57737104825291, "loss": 0.3791511654853821, "loss_ce": 7.642487958037236e-07, "loss_iou": 0.1611328125, "loss_num": 0.01123046875, "loss_xval": 0.37890625, "num_input_tokens_seen": 331171748, "step": 5282 }, { "epoch": 17.580698835274543, "grad_norm": 6.035703659057617, "learning_rate": 5e-06, "loss": 0.4113, "num_input_tokens_seen": 331233912, "step": 5283 }, { "epoch": 17.580698835274543, "loss": 0.5278955101966858, "loss_ce": 2.443299081278383e-06, "loss_iou": 0.1865234375, "loss_num": 0.0311279296875, "loss_xval": 0.52734375, "num_input_tokens_seen": 331233912, "step": 5283 }, { "epoch": 17.58402662229617, "grad_norm": 7.694578647613525, "learning_rate": 5e-06, "loss": 0.2898, "num_input_tokens_seen": 331295600, "step": 5284 }, { "epoch": 17.58402662229617, "loss": 0.27841225266456604, "loss_ce": 3.838820248347474e-07, "loss_iou": 0.109375, "loss_num": 0.01202392578125, "loss_xval": 0.279296875, "num_input_tokens_seen": 331295600, "step": 5284 }, { "epoch": 17.587354409317804, "grad_norm": 17.660247802734375, "learning_rate": 5e-06, "loss": 0.5633, "num_input_tokens_seen": 331359188, "step": 5285 }, { "epoch": 17.587354409317804, "loss": 0.5249656438827515, "loss_ce": 2.269544211230823e-06, "loss_iou": 0.2314453125, "loss_num": 0.0123291015625, "loss_xval": 0.5234375, "num_input_tokens_seen": 331359188, "step": 5285 }, { "epoch": 17.590682196339433, "grad_norm": 34.12169647216797, "learning_rate": 5e-06, "loss": 0.3546, "num_input_tokens_seen": 331421976, "step": 5286 }, { "epoch": 17.590682196339433, "loss": 0.3382572531700134, "loss_ce": 4.1160114960803185e-07, "loss_iou": 0.1484375, "loss_num": 0.0081787109375, "loss_xval": 0.337890625, "num_input_tokens_seen": 331421976, "step": 5286 }, { "epoch": 17.594009983361065, "grad_norm": 22.37397003173828, "learning_rate": 5e-06, "loss": 0.3547, "num_input_tokens_seen": 331483168, "step": 5287 }, { "epoch": 17.594009983361065, "loss": 0.2778843641281128, "loss_ce": 6.5392132455599494e-06, "loss_iou": 0.0947265625, "loss_num": 0.0177001953125, "loss_xval": 0.27734375, "num_input_tokens_seen": 331483168, "step": 5287 }, { "epoch": 17.597337770382694, "grad_norm": 7.617621898651123, "learning_rate": 5e-06, "loss": 0.3673, "num_input_tokens_seen": 331546384, "step": 5288 }, { "epoch": 17.597337770382694, "loss": 0.4523930847644806, "loss_ce": 5.073708848613023e-07, "loss_iou": 0.1845703125, "loss_num": 0.0167236328125, "loss_xval": 0.453125, "num_input_tokens_seen": 331546384, "step": 5288 }, { "epoch": 17.600665557404326, "grad_norm": 11.872401237487793, "learning_rate": 5e-06, "loss": 0.4173, "num_input_tokens_seen": 331608536, "step": 5289 }, { "epoch": 17.600665557404326, "loss": 0.3616967797279358, "loss_ce": 2.4464600301143946e-06, "loss_iou": 0.11865234375, "loss_num": 0.02490234375, "loss_xval": 0.361328125, "num_input_tokens_seen": 331608536, "step": 5289 }, { "epoch": 17.603993344425955, "grad_norm": 19.341806411743164, "learning_rate": 5e-06, "loss": 0.4153, "num_input_tokens_seen": 331671636, "step": 5290 }, { "epoch": 17.603993344425955, "loss": 0.502427875995636, "loss_ce": 1.7464276425016578e-06, "loss_iou": 0.1904296875, "loss_num": 0.024658203125, "loss_xval": 0.50390625, "num_input_tokens_seen": 331671636, "step": 5290 }, { "epoch": 17.607321131447588, "grad_norm": 17.761201858520508, "learning_rate": 5e-06, "loss": 0.3299, "num_input_tokens_seen": 331733652, "step": 5291 }, { "epoch": 17.607321131447588, "loss": 0.401697039604187, "loss_ce": 8.570589852752164e-05, "loss_iou": 0.16796875, "loss_num": 0.01312255859375, "loss_xval": 0.40234375, "num_input_tokens_seen": 331733652, "step": 5291 }, { "epoch": 17.610648918469217, "grad_norm": 10.124748229980469, "learning_rate": 5e-06, "loss": 0.3261, "num_input_tokens_seen": 331796360, "step": 5292 }, { "epoch": 17.610648918469217, "loss": 0.30896133184432983, "loss_ce": 1.3499139868144994e-06, "loss_iou": 0.1337890625, "loss_num": 0.00830078125, "loss_xval": 0.30859375, "num_input_tokens_seen": 331796360, "step": 5292 }, { "epoch": 17.61397670549085, "grad_norm": 20.44258689880371, "learning_rate": 5e-06, "loss": 0.5172, "num_input_tokens_seen": 331860192, "step": 5293 }, { "epoch": 17.61397670549085, "loss": 0.5376042723655701, "loss_ce": 6.610748641833197e-06, "loss_iou": 0.228515625, "loss_num": 0.0162353515625, "loss_xval": 0.5390625, "num_input_tokens_seen": 331860192, "step": 5293 }, { "epoch": 17.617304492512478, "grad_norm": 33.17156982421875, "learning_rate": 5e-06, "loss": 0.5484, "num_input_tokens_seen": 331920880, "step": 5294 }, { "epoch": 17.617304492512478, "loss": 0.490540087223053, "loss_ce": 5.168482744011271e-07, "loss_iou": 0.193359375, "loss_num": 0.020751953125, "loss_xval": 0.490234375, "num_input_tokens_seen": 331920880, "step": 5294 }, { "epoch": 17.62063227953411, "grad_norm": 16.824939727783203, "learning_rate": 5e-06, "loss": 0.2427, "num_input_tokens_seen": 331981860, "step": 5295 }, { "epoch": 17.62063227953411, "loss": 0.26757845282554626, "loss_ce": 3.249513724767894e-07, "loss_iou": 0.06396484375, "loss_num": 0.02783203125, "loss_xval": 0.267578125, "num_input_tokens_seen": 331981860, "step": 5295 }, { "epoch": 17.62396006655574, "grad_norm": 19.437049865722656, "learning_rate": 5e-06, "loss": 0.2989, "num_input_tokens_seen": 332045008, "step": 5296 }, { "epoch": 17.62396006655574, "loss": 0.30886310338974, "loss_ce": 2.5222716431017034e-05, "loss_iou": 0.12890625, "loss_num": 0.01019287109375, "loss_xval": 0.30859375, "num_input_tokens_seen": 332045008, "step": 5296 }, { "epoch": 17.62728785357737, "grad_norm": 27.12031364440918, "learning_rate": 5e-06, "loss": 0.4634, "num_input_tokens_seen": 332108220, "step": 5297 }, { "epoch": 17.62728785357737, "loss": 0.42444372177124023, "loss_ce": 5.242943188932259e-06, "loss_iou": 0.1533203125, "loss_num": 0.023681640625, "loss_xval": 0.423828125, "num_input_tokens_seen": 332108220, "step": 5297 }, { "epoch": 17.630615640599, "grad_norm": 45.56828308105469, "learning_rate": 5e-06, "loss": 0.3905, "num_input_tokens_seen": 332170044, "step": 5298 }, { "epoch": 17.630615640599, "loss": 0.42396318912506104, "loss_ce": 1.3000623766856734e-05, "loss_iou": 0.1669921875, "loss_num": 0.0179443359375, "loss_xval": 0.423828125, "num_input_tokens_seen": 332170044, "step": 5298 }, { "epoch": 17.633943427620633, "grad_norm": 38.446285247802734, "learning_rate": 5e-06, "loss": 0.588, "num_input_tokens_seen": 332234188, "step": 5299 }, { "epoch": 17.633943427620633, "loss": 0.5958882570266724, "loss_ce": 2.076071041301475e-06, "loss_iou": 0.20703125, "loss_num": 0.036376953125, "loss_xval": 0.59765625, "num_input_tokens_seen": 332234188, "step": 5299 }, { "epoch": 17.63727121464226, "grad_norm": 19.331212997436523, "learning_rate": 5e-06, "loss": 0.4953, "num_input_tokens_seen": 332297896, "step": 5300 }, { "epoch": 17.63727121464226, "loss": 0.38538190722465515, "loss_ce": 5.923404387431219e-06, "loss_iou": 0.1455078125, "loss_num": 0.0191650390625, "loss_xval": 0.384765625, "num_input_tokens_seen": 332297896, "step": 5300 }, { "epoch": 17.640599001663894, "grad_norm": 17.312007904052734, "learning_rate": 5e-06, "loss": 0.3068, "num_input_tokens_seen": 332360688, "step": 5301 }, { "epoch": 17.640599001663894, "loss": 0.35753148794174194, "loss_ce": 0.00014011492021381855, "loss_iou": 0.1630859375, "loss_num": 0.006317138671875, "loss_xval": 0.357421875, "num_input_tokens_seen": 332360688, "step": 5301 }, { "epoch": 17.643926788685523, "grad_norm": 26.432811737060547, "learning_rate": 5e-06, "loss": 0.3954, "num_input_tokens_seen": 332423752, "step": 5302 }, { "epoch": 17.643926788685523, "loss": 0.39663761854171753, "loss_ce": 6.712714366585715e-07, "loss_iou": 0.158203125, "loss_num": 0.016357421875, "loss_xval": 0.396484375, "num_input_tokens_seen": 332423752, "step": 5302 }, { "epoch": 17.647254575707155, "grad_norm": 30.25397300720215, "learning_rate": 5e-06, "loss": 0.5328, "num_input_tokens_seen": 332485696, "step": 5303 }, { "epoch": 17.647254575707155, "loss": 0.5841077566146851, "loss_ce": 1.3595189329862478e-06, "loss_iou": 0.22265625, "loss_num": 0.0279541015625, "loss_xval": 0.5859375, "num_input_tokens_seen": 332485696, "step": 5303 }, { "epoch": 17.650582362728784, "grad_norm": 20.170061111450195, "learning_rate": 5e-06, "loss": 0.3491, "num_input_tokens_seen": 332546528, "step": 5304 }, { "epoch": 17.650582362728784, "loss": 0.39139771461486816, "loss_ce": 9.773540114110801e-06, "loss_iou": 0.1455078125, "loss_num": 0.02001953125, "loss_xval": 0.390625, "num_input_tokens_seen": 332546528, "step": 5304 }, { "epoch": 17.653910149750416, "grad_norm": 12.675455093383789, "learning_rate": 5e-06, "loss": 0.4639, "num_input_tokens_seen": 332608672, "step": 5305 }, { "epoch": 17.653910149750416, "loss": 0.4059004783630371, "loss_ce": 7.774672121740878e-05, "loss_iou": 0.13671875, "loss_num": 0.0263671875, "loss_xval": 0.40625, "num_input_tokens_seen": 332608672, "step": 5305 }, { "epoch": 17.657237936772045, "grad_norm": 11.11660099029541, "learning_rate": 5e-06, "loss": 0.3637, "num_input_tokens_seen": 332671352, "step": 5306 }, { "epoch": 17.657237936772045, "loss": 0.16351479291915894, "loss_ce": 1.599166466803581e-06, "loss_iou": 0.052734375, "loss_num": 0.01165771484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 332671352, "step": 5306 }, { "epoch": 17.660565723793678, "grad_norm": 16.75368309020996, "learning_rate": 5e-06, "loss": 0.3702, "num_input_tokens_seen": 332733944, "step": 5307 }, { "epoch": 17.660565723793678, "loss": 0.34350714087486267, "loss_ce": 1.2871657872892683e-06, "loss_iou": 0.126953125, "loss_num": 0.017822265625, "loss_xval": 0.34375, "num_input_tokens_seen": 332733944, "step": 5307 }, { "epoch": 17.663893510815306, "grad_norm": 17.033275604248047, "learning_rate": 5e-06, "loss": 0.4318, "num_input_tokens_seen": 332797028, "step": 5308 }, { "epoch": 17.663893510815306, "loss": 0.43557971715927124, "loss_ce": 2.3153929760155734e-06, "loss_iou": 0.1591796875, "loss_num": 0.0235595703125, "loss_xval": 0.435546875, "num_input_tokens_seen": 332797028, "step": 5308 }, { "epoch": 17.66722129783694, "grad_norm": 14.972832679748535, "learning_rate": 5e-06, "loss": 0.4346, "num_input_tokens_seen": 332858124, "step": 5309 }, { "epoch": 17.66722129783694, "loss": 0.3933129608631134, "loss_ce": 2.40400254369888e-06, "loss_iou": 0.166015625, "loss_num": 0.01202392578125, "loss_xval": 0.392578125, "num_input_tokens_seen": 332858124, "step": 5309 }, { "epoch": 17.670549084858568, "grad_norm": 7.417253017425537, "learning_rate": 5e-06, "loss": 0.4371, "num_input_tokens_seen": 332921696, "step": 5310 }, { "epoch": 17.670549084858568, "loss": 0.38572490215301514, "loss_ce": 1.3217246305430308e-05, "loss_iou": 0.1572265625, "loss_num": 0.01416015625, "loss_xval": 0.384765625, "num_input_tokens_seen": 332921696, "step": 5310 }, { "epoch": 17.6738768718802, "grad_norm": 23.128347396850586, "learning_rate": 5e-06, "loss": 0.5775, "num_input_tokens_seen": 332986392, "step": 5311 }, { "epoch": 17.6738768718802, "loss": 0.7786376476287842, "loss_ce": 1.213587438542163e-05, "loss_iou": 0.291015625, "loss_num": 0.03955078125, "loss_xval": 0.77734375, "num_input_tokens_seen": 332986392, "step": 5311 }, { "epoch": 17.67720465890183, "grad_norm": 32.51768493652344, "learning_rate": 5e-06, "loss": 0.4363, "num_input_tokens_seen": 333049916, "step": 5312 }, { "epoch": 17.67720465890183, "loss": 0.39642542600631714, "loss_ce": 2.0679635781561956e-06, "loss_iou": 0.1640625, "loss_num": 0.013671875, "loss_xval": 0.396484375, "num_input_tokens_seen": 333049916, "step": 5312 }, { "epoch": 17.68053244592346, "grad_norm": 11.17968463897705, "learning_rate": 5e-06, "loss": 0.2036, "num_input_tokens_seen": 333110800, "step": 5313 }, { "epoch": 17.68053244592346, "loss": 0.21682780981063843, "loss_ce": 4.248755658409209e-07, "loss_iou": 0.08447265625, "loss_num": 0.00958251953125, "loss_xval": 0.216796875, "num_input_tokens_seen": 333110800, "step": 5313 }, { "epoch": 17.68386023294509, "grad_norm": 14.425881385803223, "learning_rate": 5e-06, "loss": 0.3459, "num_input_tokens_seen": 333174612, "step": 5314 }, { "epoch": 17.68386023294509, "loss": 0.28391849994659424, "loss_ce": 5.847073225595523e-06, "loss_iou": 0.12109375, "loss_num": 0.0084228515625, "loss_xval": 0.283203125, "num_input_tokens_seen": 333174612, "step": 5314 }, { "epoch": 17.687188019966722, "grad_norm": 17.689912796020508, "learning_rate": 5e-06, "loss": 0.3156, "num_input_tokens_seen": 333237684, "step": 5315 }, { "epoch": 17.687188019966722, "loss": 0.375000536441803, "loss_ce": 5.666283300342911e-07, "loss_iou": 0.1455078125, "loss_num": 0.0169677734375, "loss_xval": 0.375, "num_input_tokens_seen": 333237684, "step": 5315 }, { "epoch": 17.69051580698835, "grad_norm": 17.45903778076172, "learning_rate": 5e-06, "loss": 0.4561, "num_input_tokens_seen": 333300660, "step": 5316 }, { "epoch": 17.69051580698835, "loss": 0.6413619518280029, "loss_ce": 4.509327936830232e-06, "loss_iou": 0.2490234375, "loss_num": 0.02880859375, "loss_xval": 0.640625, "num_input_tokens_seen": 333300660, "step": 5316 }, { "epoch": 17.693843594009984, "grad_norm": 11.16242504119873, "learning_rate": 5e-06, "loss": 0.2914, "num_input_tokens_seen": 333363096, "step": 5317 }, { "epoch": 17.693843594009984, "loss": 0.40280288457870483, "loss_ce": 1.3538433449866716e-06, "loss_iou": 0.158203125, "loss_num": 0.017333984375, "loss_xval": 0.40234375, "num_input_tokens_seen": 333363096, "step": 5317 }, { "epoch": 17.697171381031612, "grad_norm": 17.606063842773438, "learning_rate": 5e-06, "loss": 0.4739, "num_input_tokens_seen": 333427448, "step": 5318 }, { "epoch": 17.697171381031612, "loss": 0.4507002830505371, "loss_ce": 1.440791265849839e-06, "loss_iou": 0.18359375, "loss_num": 0.016845703125, "loss_xval": 0.451171875, "num_input_tokens_seen": 333427448, "step": 5318 }, { "epoch": 17.700499168053245, "grad_norm": 11.00228214263916, "learning_rate": 5e-06, "loss": 0.2345, "num_input_tokens_seen": 333488816, "step": 5319 }, { "epoch": 17.700499168053245, "loss": 0.28705042600631714, "loss_ce": 2.0861896246060496e-06, "loss_iou": 0.119140625, "loss_num": 0.009765625, "loss_xval": 0.287109375, "num_input_tokens_seen": 333488816, "step": 5319 }, { "epoch": 17.703826955074874, "grad_norm": 6.5794758796691895, "learning_rate": 5e-06, "loss": 0.3584, "num_input_tokens_seen": 333551708, "step": 5320 }, { "epoch": 17.703826955074874, "loss": 0.2900440990924835, "loss_ce": 2.0304050849517807e-05, "loss_iou": 0.1220703125, "loss_num": 0.00927734375, "loss_xval": 0.2890625, "num_input_tokens_seen": 333551708, "step": 5320 }, { "epoch": 17.707154742096506, "grad_norm": 7.110891342163086, "learning_rate": 5e-06, "loss": 0.2883, "num_input_tokens_seen": 333613084, "step": 5321 }, { "epoch": 17.707154742096506, "loss": 0.2425680160522461, "loss_ce": 2.8645936254179105e-06, "loss_iou": 0.099609375, "loss_num": 0.0087890625, "loss_xval": 0.2421875, "num_input_tokens_seen": 333613084, "step": 5321 }, { "epoch": 17.710482529118135, "grad_norm": 14.595794677734375, "learning_rate": 5e-06, "loss": 0.3347, "num_input_tokens_seen": 333674572, "step": 5322 }, { "epoch": 17.710482529118135, "loss": 0.3421866297721863, "loss_ce": 6.481022296611627e-07, "loss_iou": 0.12255859375, "loss_num": 0.01953125, "loss_xval": 0.341796875, "num_input_tokens_seen": 333674572, "step": 5322 }, { "epoch": 17.713810316139767, "grad_norm": 23.656570434570312, "learning_rate": 5e-06, "loss": 0.5967, "num_input_tokens_seen": 333739312, "step": 5323 }, { "epoch": 17.713810316139767, "loss": 0.6785900592803955, "loss_ce": 3.167345130350441e-05, "loss_iou": 0.283203125, "loss_num": 0.022216796875, "loss_xval": 0.6796875, "num_input_tokens_seen": 333739312, "step": 5323 }, { "epoch": 17.717138103161396, "grad_norm": 32.982120513916016, "learning_rate": 5e-06, "loss": 0.433, "num_input_tokens_seen": 333803720, "step": 5324 }, { "epoch": 17.717138103161396, "loss": 0.5317450165748596, "loss_ce": 6.750545253453311e-06, "loss_iou": 0.2060546875, "loss_num": 0.02392578125, "loss_xval": 0.53125, "num_input_tokens_seen": 333803720, "step": 5324 }, { "epoch": 17.72046589018303, "grad_norm": 94.19048309326172, "learning_rate": 5e-06, "loss": 0.5787, "num_input_tokens_seen": 333865156, "step": 5325 }, { "epoch": 17.72046589018303, "loss": 0.6348800659179688, "loss_ce": 5.345925092115067e-05, "loss_iou": 0.275390625, "loss_num": 0.016845703125, "loss_xval": 0.63671875, "num_input_tokens_seen": 333865156, "step": 5325 }, { "epoch": 17.723793677204657, "grad_norm": 15.181970596313477, "learning_rate": 5e-06, "loss": 0.3929, "num_input_tokens_seen": 333928808, "step": 5326 }, { "epoch": 17.723793677204657, "loss": 0.37253230810165405, "loss_ce": 0.0002788899000734091, "loss_iou": 0.1435546875, "loss_num": 0.0169677734375, "loss_xval": 0.373046875, "num_input_tokens_seen": 333928808, "step": 5326 }, { "epoch": 17.72712146422629, "grad_norm": 9.142189025878906, "learning_rate": 5e-06, "loss": 0.2813, "num_input_tokens_seen": 333991392, "step": 5327 }, { "epoch": 17.72712146422629, "loss": 0.25348085165023804, "loss_ce": 1.8454650216881419e-06, "loss_iou": 0.09375, "loss_num": 0.01318359375, "loss_xval": 0.25390625, "num_input_tokens_seen": 333991392, "step": 5327 }, { "epoch": 17.73044925124792, "grad_norm": 10.77643871307373, "learning_rate": 5e-06, "loss": 0.2113, "num_input_tokens_seen": 334053048, "step": 5328 }, { "epoch": 17.73044925124792, "loss": 0.24159583449363708, "loss_ce": 1.8687223928282037e-05, "loss_iou": 0.076171875, "loss_num": 0.017822265625, "loss_xval": 0.2412109375, "num_input_tokens_seen": 334053048, "step": 5328 }, { "epoch": 17.73377703826955, "grad_norm": 24.067537307739258, "learning_rate": 5e-06, "loss": 0.3823, "num_input_tokens_seen": 334115312, "step": 5329 }, { "epoch": 17.73377703826955, "loss": 0.44934821128845215, "loss_ce": 7.378421287285164e-06, "loss_iou": 0.197265625, "loss_num": 0.010986328125, "loss_xval": 0.44921875, "num_input_tokens_seen": 334115312, "step": 5329 }, { "epoch": 17.73710482529118, "grad_norm": 26.02249526977539, "learning_rate": 5e-06, "loss": 0.2889, "num_input_tokens_seen": 334176792, "step": 5330 }, { "epoch": 17.73710482529118, "loss": 0.3388080894947052, "loss_ce": 1.93946698345826e-06, "loss_iou": 0.11328125, "loss_num": 0.0225830078125, "loss_xval": 0.337890625, "num_input_tokens_seen": 334176792, "step": 5330 }, { "epoch": 17.740432612312812, "grad_norm": 14.982220649719238, "learning_rate": 5e-06, "loss": 0.5534, "num_input_tokens_seen": 334239948, "step": 5331 }, { "epoch": 17.740432612312812, "loss": 0.6946778297424316, "loss_ce": 0.0006165459053590894, "loss_iou": 0.2421875, "loss_num": 0.0419921875, "loss_xval": 0.6953125, "num_input_tokens_seen": 334239948, "step": 5331 }, { "epoch": 17.74376039933444, "grad_norm": 49.8640251159668, "learning_rate": 5e-06, "loss": 0.6226, "num_input_tokens_seen": 334304400, "step": 5332 }, { "epoch": 17.74376039933444, "loss": 0.7350568175315857, "loss_ce": 1.0432573617435992e-05, "loss_iou": 0.294921875, "loss_num": 0.0294189453125, "loss_xval": 0.734375, "num_input_tokens_seen": 334304400, "step": 5332 }, { "epoch": 17.747088186356073, "grad_norm": 38.99806594848633, "learning_rate": 5e-06, "loss": 0.4787, "num_input_tokens_seen": 334367376, "step": 5333 }, { "epoch": 17.747088186356073, "loss": 0.6198762655258179, "loss_ce": 3.2677012313797604e-06, "loss_iou": 0.26953125, "loss_num": 0.0157470703125, "loss_xval": 0.62109375, "num_input_tokens_seen": 334367376, "step": 5333 }, { "epoch": 17.750415973377702, "grad_norm": 8.646446228027344, "learning_rate": 5e-06, "loss": 0.3285, "num_input_tokens_seen": 334428540, "step": 5334 }, { "epoch": 17.750415973377702, "loss": 0.45752018690109253, "loss_ce": 6.194659363245592e-07, "loss_iou": 0.17578125, "loss_num": 0.021484375, "loss_xval": 0.45703125, "num_input_tokens_seen": 334428540, "step": 5334 }, { "epoch": 17.753743760399335, "grad_norm": 10.227270126342773, "learning_rate": 5e-06, "loss": 0.292, "num_input_tokens_seen": 334491972, "step": 5335 }, { "epoch": 17.753743760399335, "loss": 0.23646587133407593, "loss_ce": 4.1740139522516984e-07, "loss_iou": 0.0908203125, "loss_num": 0.01092529296875, "loss_xval": 0.236328125, "num_input_tokens_seen": 334491972, "step": 5335 }, { "epoch": 17.757071547420963, "grad_norm": 10.90970516204834, "learning_rate": 5e-06, "loss": 0.3816, "num_input_tokens_seen": 334555224, "step": 5336 }, { "epoch": 17.757071547420963, "loss": 0.4739786982536316, "loss_ce": 2.5492524855508236e-06, "loss_iou": 0.2001953125, "loss_num": 0.01495361328125, "loss_xval": 0.474609375, "num_input_tokens_seen": 334555224, "step": 5336 }, { "epoch": 17.760399334442596, "grad_norm": 7.246659755706787, "learning_rate": 5e-06, "loss": 0.4107, "num_input_tokens_seen": 334618608, "step": 5337 }, { "epoch": 17.760399334442596, "loss": 0.5483567118644714, "loss_ce": 1.6883032003534026e-05, "loss_iou": 0.17578125, "loss_num": 0.039306640625, "loss_xval": 0.546875, "num_input_tokens_seen": 334618608, "step": 5337 }, { "epoch": 17.763727121464225, "grad_norm": 7.416783332824707, "learning_rate": 5e-06, "loss": 0.4803, "num_input_tokens_seen": 334680700, "step": 5338 }, { "epoch": 17.763727121464225, "loss": 0.4254899024963379, "loss_ce": 1.3838443010172341e-05, "loss_iou": 0.1640625, "loss_num": 0.0194091796875, "loss_xval": 0.42578125, "num_input_tokens_seen": 334680700, "step": 5338 }, { "epoch": 17.767054908485857, "grad_norm": 10.050816535949707, "learning_rate": 5e-06, "loss": 0.5137, "num_input_tokens_seen": 334744708, "step": 5339 }, { "epoch": 17.767054908485857, "loss": 0.5261867642402649, "loss_ce": 2.7126075110572856e-06, "loss_iou": 0.197265625, "loss_num": 0.0264892578125, "loss_xval": 0.52734375, "num_input_tokens_seen": 334744708, "step": 5339 }, { "epoch": 17.770382695507486, "grad_norm": 7.158872604370117, "learning_rate": 5e-06, "loss": 0.3751, "num_input_tokens_seen": 334807456, "step": 5340 }, { "epoch": 17.770382695507486, "loss": 0.43493762612342834, "loss_ce": 1.1129470749438042e-06, "loss_iou": 0.15234375, "loss_num": 0.0262451171875, "loss_xval": 0.435546875, "num_input_tokens_seen": 334807456, "step": 5340 }, { "epoch": 17.77371048252912, "grad_norm": 14.168370246887207, "learning_rate": 5e-06, "loss": 0.2704, "num_input_tokens_seen": 334869352, "step": 5341 }, { "epoch": 17.77371048252912, "loss": 0.18738096952438354, "loss_ce": 3.0463706934824586e-06, "loss_iou": 0.0771484375, "loss_num": 0.006561279296875, "loss_xval": 0.1875, "num_input_tokens_seen": 334869352, "step": 5341 }, { "epoch": 17.777038269550747, "grad_norm": 18.882482528686523, "learning_rate": 5e-06, "loss": 0.6131, "num_input_tokens_seen": 334932824, "step": 5342 }, { "epoch": 17.777038269550747, "loss": 0.7097675800323486, "loss_ce": 0.00017281505279242992, "loss_iou": 0.279296875, "loss_num": 0.030517578125, "loss_xval": 0.7109375, "num_input_tokens_seen": 334932824, "step": 5342 }, { "epoch": 17.78036605657238, "grad_norm": 19.100543975830078, "learning_rate": 5e-06, "loss": 0.3642, "num_input_tokens_seen": 334995180, "step": 5343 }, { "epoch": 17.78036605657238, "loss": 0.24084694683551788, "loss_ce": 2.2298663679976016e-06, "loss_iou": 0.08203125, "loss_num": 0.015380859375, "loss_xval": 0.2412109375, "num_input_tokens_seen": 334995180, "step": 5343 }, { "epoch": 17.78369384359401, "grad_norm": 14.873527526855469, "learning_rate": 5e-06, "loss": 0.2264, "num_input_tokens_seen": 335058252, "step": 5344 }, { "epoch": 17.78369384359401, "loss": 0.24562406539916992, "loss_ce": 0.00026272470131516457, "loss_iou": 0.08642578125, "loss_num": 0.014404296875, "loss_xval": 0.2451171875, "num_input_tokens_seen": 335058252, "step": 5344 }, { "epoch": 17.78702163061564, "grad_norm": 18.555896759033203, "learning_rate": 5e-06, "loss": 0.574, "num_input_tokens_seen": 335120104, "step": 5345 }, { "epoch": 17.78702163061564, "loss": 0.4963062107563019, "loss_ce": 2.9342638299567625e-05, "loss_iou": 0.197265625, "loss_num": 0.0203857421875, "loss_xval": 0.49609375, "num_input_tokens_seen": 335120104, "step": 5345 }, { "epoch": 17.79034941763727, "grad_norm": 20.22998046875, "learning_rate": 5e-06, "loss": 0.2603, "num_input_tokens_seen": 335183216, "step": 5346 }, { "epoch": 17.79034941763727, "loss": 0.19821223616600037, "loss_ce": 4.635595905710943e-05, "loss_iou": 0.0693359375, "loss_num": 0.0118408203125, "loss_xval": 0.1982421875, "num_input_tokens_seen": 335183216, "step": 5346 }, { "epoch": 17.793677204658902, "grad_norm": 9.11264705657959, "learning_rate": 5e-06, "loss": 0.3462, "num_input_tokens_seen": 335246832, "step": 5347 }, { "epoch": 17.793677204658902, "loss": 0.409608393907547, "loss_ce": 1.4541076325258473e-06, "loss_iou": 0.15234375, "loss_num": 0.0208740234375, "loss_xval": 0.41015625, "num_input_tokens_seen": 335246832, "step": 5347 }, { "epoch": 17.79700499168053, "grad_norm": 8.11495304107666, "learning_rate": 5e-06, "loss": 0.4151, "num_input_tokens_seen": 335309972, "step": 5348 }, { "epoch": 17.79700499168053, "loss": 0.36725348234176636, "loss_ce": 4.9607288019615225e-06, "loss_iou": 0.12255859375, "loss_num": 0.0244140625, "loss_xval": 0.3671875, "num_input_tokens_seen": 335309972, "step": 5348 }, { "epoch": 17.800332778702163, "grad_norm": 16.057376861572266, "learning_rate": 5e-06, "loss": 0.5487, "num_input_tokens_seen": 335373776, "step": 5349 }, { "epoch": 17.800332778702163, "loss": 0.4379904270172119, "loss_ce": 2.132631379936356e-06, "loss_iou": 0.1787109375, "loss_num": 0.0159912109375, "loss_xval": 0.4375, "num_input_tokens_seen": 335373776, "step": 5349 }, { "epoch": 17.803660565723792, "grad_norm": 24.641328811645508, "learning_rate": 5e-06, "loss": 0.6552, "num_input_tokens_seen": 335437228, "step": 5350 }, { "epoch": 17.803660565723792, "loss": 0.39831608533859253, "loss_ce": 1.5902773156994954e-05, "loss_iou": 0.1689453125, "loss_num": 0.01214599609375, "loss_xval": 0.3984375, "num_input_tokens_seen": 335437228, "step": 5350 }, { "epoch": 17.806988352745424, "grad_norm": 19.96274757385254, "learning_rate": 5e-06, "loss": 0.4345, "num_input_tokens_seen": 335501396, "step": 5351 }, { "epoch": 17.806988352745424, "loss": 0.5186774134635925, "loss_ce": 6.634536475758068e-07, "loss_iou": 0.1943359375, "loss_num": 0.0260009765625, "loss_xval": 0.51953125, "num_input_tokens_seen": 335501396, "step": 5351 }, { "epoch": 17.810316139767053, "grad_norm": 14.468295097351074, "learning_rate": 5e-06, "loss": 0.3764, "num_input_tokens_seen": 335564688, "step": 5352 }, { "epoch": 17.810316139767053, "loss": 0.3832107484340668, "loss_ce": 1.5238699688779889e-06, "loss_iou": 0.1484375, "loss_num": 0.017333984375, "loss_xval": 0.3828125, "num_input_tokens_seen": 335564688, "step": 5352 }, { "epoch": 17.813643926788686, "grad_norm": 7.4173431396484375, "learning_rate": 5e-06, "loss": 0.3763, "num_input_tokens_seen": 335628532, "step": 5353 }, { "epoch": 17.813643926788686, "loss": 0.4405646324157715, "loss_ce": 1.2849308404838666e-05, "loss_iou": 0.1767578125, "loss_num": 0.017578125, "loss_xval": 0.44140625, "num_input_tokens_seen": 335628532, "step": 5353 }, { "epoch": 17.816971713810315, "grad_norm": 12.535286903381348, "learning_rate": 5e-06, "loss": 0.4193, "num_input_tokens_seen": 335692088, "step": 5354 }, { "epoch": 17.816971713810315, "loss": 0.4580380618572235, "loss_ce": 0.0011288924142718315, "loss_iou": 0.177734375, "loss_num": 0.0203857421875, "loss_xval": 0.45703125, "num_input_tokens_seen": 335692088, "step": 5354 }, { "epoch": 17.820299500831947, "grad_norm": 13.595169067382812, "learning_rate": 5e-06, "loss": 0.3689, "num_input_tokens_seen": 335753316, "step": 5355 }, { "epoch": 17.820299500831947, "loss": 0.312439888715744, "loss_ce": 9.237225526703696e-07, "loss_iou": 0.1337890625, "loss_num": 0.0089111328125, "loss_xval": 0.3125, "num_input_tokens_seen": 335753316, "step": 5355 }, { "epoch": 17.823627287853576, "grad_norm": 9.842429161071777, "learning_rate": 5e-06, "loss": 0.3574, "num_input_tokens_seen": 335816316, "step": 5356 }, { "epoch": 17.823627287853576, "loss": 0.3261151909828186, "loss_ce": 4.333839569881093e-06, "loss_iou": 0.1376953125, "loss_num": 0.0103759765625, "loss_xval": 0.326171875, "num_input_tokens_seen": 335816316, "step": 5356 }, { "epoch": 17.826955074875208, "grad_norm": 7.787083625793457, "learning_rate": 5e-06, "loss": 0.3666, "num_input_tokens_seen": 335880020, "step": 5357 }, { "epoch": 17.826955074875208, "loss": 0.38168683648109436, "loss_ce": 3.4003533073700964e-05, "loss_iou": 0.154296875, "loss_num": 0.0147705078125, "loss_xval": 0.380859375, "num_input_tokens_seen": 335880020, "step": 5357 }, { "epoch": 17.830282861896837, "grad_norm": 7.414761066436768, "learning_rate": 5e-06, "loss": 0.5358, "num_input_tokens_seen": 335943204, "step": 5358 }, { "epoch": 17.830282861896837, "loss": 0.4493866562843323, "loss_ce": 3.055854176636785e-05, "loss_iou": 0.2021484375, "loss_num": 0.00885009765625, "loss_xval": 0.44921875, "num_input_tokens_seen": 335943204, "step": 5358 }, { "epoch": 17.83361064891847, "grad_norm": 7.893875598907471, "learning_rate": 5e-06, "loss": 0.3443, "num_input_tokens_seen": 336004948, "step": 5359 }, { "epoch": 17.83361064891847, "loss": 0.22699040174484253, "loss_ce": 6.535787520078884e-07, "loss_iou": 0.0771484375, "loss_num": 0.01458740234375, "loss_xval": 0.2265625, "num_input_tokens_seen": 336004948, "step": 5359 }, { "epoch": 17.836938435940098, "grad_norm": 18.720176696777344, "learning_rate": 5e-06, "loss": 0.3964, "num_input_tokens_seen": 336068160, "step": 5360 }, { "epoch": 17.836938435940098, "loss": 0.4682309627532959, "loss_ce": 3.0264449378591962e-05, "loss_iou": 0.2001953125, "loss_num": 0.01373291015625, "loss_xval": 0.46875, "num_input_tokens_seen": 336068160, "step": 5360 }, { "epoch": 17.84026622296173, "grad_norm": 34.028079986572266, "learning_rate": 5e-06, "loss": 0.4237, "num_input_tokens_seen": 336130212, "step": 5361 }, { "epoch": 17.84026622296173, "loss": 0.19824674725532532, "loss_ce": 4.5565484469989315e-06, "loss_iou": 0.0546875, "loss_num": 0.017822265625, "loss_xval": 0.1982421875, "num_input_tokens_seen": 336130212, "step": 5361 }, { "epoch": 17.84359400998336, "grad_norm": 33.5516471862793, "learning_rate": 5e-06, "loss": 0.3272, "num_input_tokens_seen": 336191976, "step": 5362 }, { "epoch": 17.84359400998336, "loss": 0.2342531532049179, "loss_ce": 2.2861226511849964e-07, "loss_iou": 0.0810546875, "loss_num": 0.0145263671875, "loss_xval": 0.234375, "num_input_tokens_seen": 336191976, "step": 5362 }, { "epoch": 17.846921797004992, "grad_norm": 25.150859832763672, "learning_rate": 5e-06, "loss": 0.3523, "num_input_tokens_seen": 336255164, "step": 5363 }, { "epoch": 17.846921797004992, "loss": 0.3623494505882263, "loss_ce": 4.476790127228014e-05, "loss_iou": 0.13671875, "loss_num": 0.017578125, "loss_xval": 0.36328125, "num_input_tokens_seen": 336255164, "step": 5363 }, { "epoch": 17.85024958402662, "grad_norm": 20.229167938232422, "learning_rate": 5e-06, "loss": 0.5059, "num_input_tokens_seen": 336318556, "step": 5364 }, { "epoch": 17.85024958402662, "loss": 0.6027235984802246, "loss_ce": 1.4840534277027473e-06, "loss_iou": 0.26953125, "loss_num": 0.01300048828125, "loss_xval": 0.6015625, "num_input_tokens_seen": 336318556, "step": 5364 }, { "epoch": 17.853577371048253, "grad_norm": 22.822858810424805, "learning_rate": 5e-06, "loss": 0.4056, "num_input_tokens_seen": 336380240, "step": 5365 }, { "epoch": 17.853577371048253, "loss": 0.39471590518951416, "loss_ce": 1.570269773765176e-06, "loss_iou": 0.162109375, "loss_num": 0.01422119140625, "loss_xval": 0.39453125, "num_input_tokens_seen": 336380240, "step": 5365 }, { "epoch": 17.856905158069882, "grad_norm": 34.65104675292969, "learning_rate": 5e-06, "loss": 0.4358, "num_input_tokens_seen": 336442604, "step": 5366 }, { "epoch": 17.856905158069882, "loss": 0.5243552923202515, "loss_ce": 2.2972612896410283e-06, "loss_iou": 0.1865234375, "loss_num": 0.030029296875, "loss_xval": 0.5234375, "num_input_tokens_seen": 336442604, "step": 5366 }, { "epoch": 17.860232945091514, "grad_norm": 42.788551330566406, "learning_rate": 5e-06, "loss": 0.6944, "num_input_tokens_seen": 336505336, "step": 5367 }, { "epoch": 17.860232945091514, "loss": 0.8044586181640625, "loss_ce": 1.5291627278202213e-05, "loss_iou": 0.322265625, "loss_num": 0.031982421875, "loss_xval": 0.8046875, "num_input_tokens_seen": 336505336, "step": 5367 }, { "epoch": 17.863560732113143, "grad_norm": 22.489181518554688, "learning_rate": 5e-06, "loss": 0.3564, "num_input_tokens_seen": 336567148, "step": 5368 }, { "epoch": 17.863560732113143, "loss": 0.20361420512199402, "loss_ce": 9.291418336943025e-07, "loss_iou": 0.08203125, "loss_num": 0.00787353515625, "loss_xval": 0.203125, "num_input_tokens_seen": 336567148, "step": 5368 }, { "epoch": 17.866888519134775, "grad_norm": 16.42172622680664, "learning_rate": 5e-06, "loss": 0.4694, "num_input_tokens_seen": 336629524, "step": 5369 }, { "epoch": 17.866888519134775, "loss": 0.29575005173683167, "loss_ce": 4.20617561758263e-06, "loss_iou": 0.115234375, "loss_num": 0.01300048828125, "loss_xval": 0.294921875, "num_input_tokens_seen": 336629524, "step": 5369 }, { "epoch": 17.870216306156404, "grad_norm": 11.983659744262695, "learning_rate": 5e-06, "loss": 0.3109, "num_input_tokens_seen": 336692068, "step": 5370 }, { "epoch": 17.870216306156404, "loss": 0.19799940288066864, "loss_ce": 1.3520598258764949e-06, "loss_iou": 0.046630859375, "loss_num": 0.0208740234375, "loss_xval": 0.1982421875, "num_input_tokens_seen": 336692068, "step": 5370 }, { "epoch": 17.873544093178037, "grad_norm": 14.453381538391113, "learning_rate": 5e-06, "loss": 0.3173, "num_input_tokens_seen": 336754104, "step": 5371 }, { "epoch": 17.873544093178037, "loss": 0.3012094795703888, "loss_ce": 9.827120948102674e-07, "loss_iou": 0.11767578125, "loss_num": 0.01312255859375, "loss_xval": 0.30078125, "num_input_tokens_seen": 336754104, "step": 5371 }, { "epoch": 17.876871880199666, "grad_norm": 13.575965881347656, "learning_rate": 5e-06, "loss": 0.2718, "num_input_tokens_seen": 336816884, "step": 5372 }, { "epoch": 17.876871880199666, "loss": 0.3089492917060852, "loss_ce": 0.0008438241784460843, "loss_iou": 0.130859375, "loss_num": 0.009033203125, "loss_xval": 0.30859375, "num_input_tokens_seen": 336816884, "step": 5372 }, { "epoch": 17.880199667221298, "grad_norm": 5.390939235687256, "learning_rate": 5e-06, "loss": 0.5488, "num_input_tokens_seen": 336880884, "step": 5373 }, { "epoch": 17.880199667221298, "loss": 0.3700580298900604, "loss_ce": 1.8720033949648496e-06, "loss_iou": 0.1357421875, "loss_num": 0.0196533203125, "loss_xval": 0.369140625, "num_input_tokens_seen": 336880884, "step": 5373 }, { "epoch": 17.883527454242927, "grad_norm": 9.715229988098145, "learning_rate": 5e-06, "loss": 0.5674, "num_input_tokens_seen": 336944480, "step": 5374 }, { "epoch": 17.883527454242927, "loss": 0.6478289365768433, "loss_ce": 1.7713450688461307e-06, "loss_iou": 0.25390625, "loss_num": 0.0283203125, "loss_xval": 0.6484375, "num_input_tokens_seen": 336944480, "step": 5374 }, { "epoch": 17.88685524126456, "grad_norm": 11.618508338928223, "learning_rate": 5e-06, "loss": 0.2924, "num_input_tokens_seen": 337006800, "step": 5375 }, { "epoch": 17.88685524126456, "loss": 0.23864784836769104, "loss_ce": 0.0010684948647394776, "loss_iou": 0.07421875, "loss_num": 0.0179443359375, "loss_xval": 0.2373046875, "num_input_tokens_seen": 337006800, "step": 5375 }, { "epoch": 17.890183028286188, "grad_norm": 11.140654563903809, "learning_rate": 5e-06, "loss": 0.4208, "num_input_tokens_seen": 337069688, "step": 5376 }, { "epoch": 17.890183028286188, "loss": 0.5408432483673096, "loss_ce": 1.0740788638941012e-05, "loss_iou": 0.1982421875, "loss_num": 0.0286865234375, "loss_xval": 0.5390625, "num_input_tokens_seen": 337069688, "step": 5376 }, { "epoch": 17.89351081530782, "grad_norm": 11.59449577331543, "learning_rate": 5e-06, "loss": 0.6203, "num_input_tokens_seen": 337134196, "step": 5377 }, { "epoch": 17.89351081530782, "loss": 0.5124906301498413, "loss_ce": 0.00028356039547361434, "loss_iou": 0.2080078125, "loss_num": 0.0189208984375, "loss_xval": 0.51171875, "num_input_tokens_seen": 337134196, "step": 5377 }, { "epoch": 17.89683860232945, "grad_norm": 11.02955150604248, "learning_rate": 5e-06, "loss": 0.3952, "num_input_tokens_seen": 337197708, "step": 5378 }, { "epoch": 17.89683860232945, "loss": 0.576021134853363, "loss_ce": 0.00021547038340941072, "loss_iou": 0.251953125, "loss_num": 0.01409912109375, "loss_xval": 0.57421875, "num_input_tokens_seen": 337197708, "step": 5378 }, { "epoch": 17.90016638935108, "grad_norm": 13.782137870788574, "learning_rate": 5e-06, "loss": 0.3632, "num_input_tokens_seen": 337261700, "step": 5379 }, { "epoch": 17.90016638935108, "loss": 0.4032832086086273, "loss_ce": 8.663482731208205e-06, "loss_iou": 0.1513671875, "loss_num": 0.0201416015625, "loss_xval": 0.40234375, "num_input_tokens_seen": 337261700, "step": 5379 }, { "epoch": 17.90349417637271, "grad_norm": 19.330106735229492, "learning_rate": 5e-06, "loss": 0.4548, "num_input_tokens_seen": 337325084, "step": 5380 }, { "epoch": 17.90349417637271, "loss": 0.6807912588119507, "loss_ce": 5.125790266902186e-06, "loss_iou": 0.296875, "loss_num": 0.017333984375, "loss_xval": 0.6796875, "num_input_tokens_seen": 337325084, "step": 5380 }, { "epoch": 17.906821963394343, "grad_norm": 16.803207397460938, "learning_rate": 5e-06, "loss": 0.5053, "num_input_tokens_seen": 337388024, "step": 5381 }, { "epoch": 17.906821963394343, "loss": 0.5285051465034485, "loss_ce": 1.6942926777119283e-06, "loss_iou": 0.2060546875, "loss_num": 0.0234375, "loss_xval": 0.52734375, "num_input_tokens_seen": 337388024, "step": 5381 }, { "epoch": 17.91014975041597, "grad_norm": 9.708841323852539, "learning_rate": 5e-06, "loss": 0.3945, "num_input_tokens_seen": 337451112, "step": 5382 }, { "epoch": 17.91014975041597, "loss": 0.43542611598968506, "loss_ce": 1.2952009456057567e-06, "loss_iou": 0.181640625, "loss_num": 0.01434326171875, "loss_xval": 0.435546875, "num_input_tokens_seen": 337451112, "step": 5382 }, { "epoch": 17.913477537437604, "grad_norm": 30.3945369720459, "learning_rate": 5e-06, "loss": 0.4928, "num_input_tokens_seen": 337515328, "step": 5383 }, { "epoch": 17.913477537437604, "loss": 0.4233555197715759, "loss_ce": 4.244223816840531e-07, "loss_iou": 0.193359375, "loss_num": 0.007537841796875, "loss_xval": 0.423828125, "num_input_tokens_seen": 337515328, "step": 5383 }, { "epoch": 17.916805324459233, "grad_norm": 13.274879455566406, "learning_rate": 5e-06, "loss": 0.2535, "num_input_tokens_seen": 337578104, "step": 5384 }, { "epoch": 17.916805324459233, "loss": 0.2349410057067871, "loss_ce": 1.4321019534691004e-06, "loss_iou": 0.09326171875, "loss_num": 0.00970458984375, "loss_xval": 0.2353515625, "num_input_tokens_seen": 337578104, "step": 5384 }, { "epoch": 17.920133111480865, "grad_norm": 7.916996955871582, "learning_rate": 5e-06, "loss": 0.4731, "num_input_tokens_seen": 337640680, "step": 5385 }, { "epoch": 17.920133111480865, "loss": 0.42816752195358276, "loss_ce": 5.8902041928377e-06, "loss_iou": 0.1552734375, "loss_num": 0.0235595703125, "loss_xval": 0.427734375, "num_input_tokens_seen": 337640680, "step": 5385 }, { "epoch": 17.923460898502494, "grad_norm": 10.041097640991211, "learning_rate": 5e-06, "loss": 0.3552, "num_input_tokens_seen": 337704396, "step": 5386 }, { "epoch": 17.923460898502494, "loss": 0.30513083934783936, "loss_ce": 3.137913881801069e-05, "loss_iou": 0.11279296875, "loss_num": 0.015869140625, "loss_xval": 0.3046875, "num_input_tokens_seen": 337704396, "step": 5386 }, { "epoch": 17.926788685524127, "grad_norm": 26.0644588470459, "learning_rate": 5e-06, "loss": 0.3652, "num_input_tokens_seen": 337767660, "step": 5387 }, { "epoch": 17.926788685524127, "loss": 0.4017954468727112, "loss_ce": 1.0150042726309039e-06, "loss_iou": 0.1533203125, "loss_num": 0.019287109375, "loss_xval": 0.40234375, "num_input_tokens_seen": 337767660, "step": 5387 }, { "epoch": 17.930116472545755, "grad_norm": 35.344749450683594, "learning_rate": 5e-06, "loss": 0.5611, "num_input_tokens_seen": 337829740, "step": 5388 }, { "epoch": 17.930116472545755, "loss": 0.6649793982505798, "loss_ce": 1.3888097782910336e-06, "loss_iou": 0.248046875, "loss_num": 0.033447265625, "loss_xval": 0.6640625, "num_input_tokens_seen": 337829740, "step": 5388 }, { "epoch": 17.933444259567388, "grad_norm": 34.35508728027344, "learning_rate": 5e-06, "loss": 0.4547, "num_input_tokens_seen": 337893176, "step": 5389 }, { "epoch": 17.933444259567388, "loss": 0.5389580726623535, "loss_ce": 1.763929867593106e-05, "loss_iou": 0.1845703125, "loss_num": 0.033935546875, "loss_xval": 0.5390625, "num_input_tokens_seen": 337893176, "step": 5389 }, { "epoch": 17.936772046589017, "grad_norm": 28.434650421142578, "learning_rate": 5e-06, "loss": 0.5972, "num_input_tokens_seen": 337956196, "step": 5390 }, { "epoch": 17.936772046589017, "loss": 0.6235976219177246, "loss_ce": 1.4578549780708272e-06, "loss_iou": 0.2470703125, "loss_num": 0.02587890625, "loss_xval": 0.625, "num_input_tokens_seen": 337956196, "step": 5390 }, { "epoch": 17.94009983361065, "grad_norm": 8.381388664245605, "learning_rate": 5e-06, "loss": 0.2891, "num_input_tokens_seen": 338016292, "step": 5391 }, { "epoch": 17.94009983361065, "loss": 0.41952353715896606, "loss_ce": 0.00027305277762934566, "loss_iou": 0.1552734375, "loss_num": 0.021728515625, "loss_xval": 0.419921875, "num_input_tokens_seen": 338016292, "step": 5391 }, { "epoch": 17.943427620632278, "grad_norm": 16.83711051940918, "learning_rate": 5e-06, "loss": 0.3978, "num_input_tokens_seen": 338079320, "step": 5392 }, { "epoch": 17.943427620632278, "loss": 0.48175084590911865, "loss_ce": 3.6255789837014163e-07, "loss_iou": 0.2001953125, "loss_num": 0.016357421875, "loss_xval": 0.482421875, "num_input_tokens_seen": 338079320, "step": 5392 }, { "epoch": 17.94675540765391, "grad_norm": 23.824277877807617, "learning_rate": 5e-06, "loss": 0.5579, "num_input_tokens_seen": 338142316, "step": 5393 }, { "epoch": 17.94675540765391, "loss": 0.36267152428627014, "loss_ce": 6.187516419231542e-07, "loss_iou": 0.150390625, "loss_num": 0.01226806640625, "loss_xval": 0.36328125, "num_input_tokens_seen": 338142316, "step": 5393 }, { "epoch": 17.950083194675543, "grad_norm": 24.305423736572266, "learning_rate": 5e-06, "loss": 0.3432, "num_input_tokens_seen": 338203400, "step": 5394 }, { "epoch": 17.950083194675543, "loss": 0.4729022681713104, "loss_ce": 1.8835532955563394e-06, "loss_iou": 0.1806640625, "loss_num": 0.0224609375, "loss_xval": 0.47265625, "num_input_tokens_seen": 338203400, "step": 5394 }, { "epoch": 17.95341098169717, "grad_norm": 23.456687927246094, "learning_rate": 5e-06, "loss": 0.5097, "num_input_tokens_seen": 338266720, "step": 5395 }, { "epoch": 17.95341098169717, "loss": 0.5360183715820312, "loss_ce": 2.2915211957297288e-05, "loss_iou": 0.2119140625, "loss_num": 0.0224609375, "loss_xval": 0.53515625, "num_input_tokens_seen": 338266720, "step": 5395 }, { "epoch": 17.9567387687188, "grad_norm": 36.72749328613281, "learning_rate": 5e-06, "loss": 0.5149, "num_input_tokens_seen": 338329568, "step": 5396 }, { "epoch": 17.9567387687188, "loss": 0.40326032042503357, "loss_ce": 1.04416187696188e-06, "loss_iou": 0.150390625, "loss_num": 0.020263671875, "loss_xval": 0.40234375, "num_input_tokens_seen": 338329568, "step": 5396 }, { "epoch": 17.960066555740433, "grad_norm": 30.16398048400879, "learning_rate": 5e-06, "loss": 0.5111, "num_input_tokens_seen": 338392444, "step": 5397 }, { "epoch": 17.960066555740433, "loss": 0.5509525537490845, "loss_ce": 0.0004764412879012525, "loss_iou": 0.23046875, "loss_num": 0.017578125, "loss_xval": 0.55078125, "num_input_tokens_seen": 338392444, "step": 5397 }, { "epoch": 17.963394342762065, "grad_norm": 11.132864952087402, "learning_rate": 5e-06, "loss": 0.3842, "num_input_tokens_seen": 338455564, "step": 5398 }, { "epoch": 17.963394342762065, "loss": 0.42053279280662537, "loss_ce": 5.76566776544496e-07, "loss_iou": 0.1845703125, "loss_num": 0.01025390625, "loss_xval": 0.419921875, "num_input_tokens_seen": 338455564, "step": 5398 }, { "epoch": 17.966722129783694, "grad_norm": 22.961999893188477, "learning_rate": 5e-06, "loss": 0.3885, "num_input_tokens_seen": 338517928, "step": 5399 }, { "epoch": 17.966722129783694, "loss": 0.4182450473308563, "loss_ce": 1.6373105609090999e-06, "loss_iou": 0.1708984375, "loss_num": 0.01531982421875, "loss_xval": 0.41796875, "num_input_tokens_seen": 338517928, "step": 5399 }, { "epoch": 17.970049916805323, "grad_norm": 17.47234535217285, "learning_rate": 5e-06, "loss": 0.511, "num_input_tokens_seen": 338580912, "step": 5400 }, { "epoch": 17.970049916805323, "loss": 0.3515024781227112, "loss_ce": 1.027824168886582e-06, "loss_iou": 0.1435546875, "loss_num": 0.0128173828125, "loss_xval": 0.3515625, "num_input_tokens_seen": 338580912, "step": 5400 }, { "epoch": 17.973377703826955, "grad_norm": 13.996308326721191, "learning_rate": 5e-06, "loss": 0.3903, "num_input_tokens_seen": 338643432, "step": 5401 }, { "epoch": 17.973377703826955, "loss": 0.24267953634262085, "loss_ce": 3.7406073261081474e-06, "loss_iou": 0.09521484375, "loss_num": 0.0106201171875, "loss_xval": 0.2421875, "num_input_tokens_seen": 338643432, "step": 5401 }, { "epoch": 17.976705490848587, "grad_norm": 14.282442092895508, "learning_rate": 5e-06, "loss": 0.5145, "num_input_tokens_seen": 338706764, "step": 5402 }, { "epoch": 17.976705490848587, "loss": 0.479785680770874, "loss_ce": 3.587043465813622e-06, "loss_iou": 0.2177734375, "loss_num": 0.008544921875, "loss_xval": 0.48046875, "num_input_tokens_seen": 338706764, "step": 5402 }, { "epoch": 17.980033277870216, "grad_norm": 13.014655113220215, "learning_rate": 5e-06, "loss": 0.3711, "num_input_tokens_seen": 338770012, "step": 5403 }, { "epoch": 17.980033277870216, "loss": 0.36413806676864624, "loss_ce": 2.3382760900858557e-06, "loss_iou": 0.1552734375, "loss_num": 0.010498046875, "loss_xval": 0.36328125, "num_input_tokens_seen": 338770012, "step": 5403 }, { "epoch": 17.983361064891845, "grad_norm": 18.1613826751709, "learning_rate": 5e-06, "loss": 0.5312, "num_input_tokens_seen": 338833640, "step": 5404 }, { "epoch": 17.983361064891845, "loss": 0.5683600306510925, "loss_ce": 6.135842340881936e-07, "loss_iou": 0.2177734375, "loss_num": 0.0267333984375, "loss_xval": 0.5703125, "num_input_tokens_seen": 338833640, "step": 5404 }, { "epoch": 17.986688851913478, "grad_norm": 27.096399307250977, "learning_rate": 5e-06, "loss": 0.5237, "num_input_tokens_seen": 338896604, "step": 5405 }, { "epoch": 17.986688851913478, "loss": 0.6815422177314758, "loss_ce": 0.0006034955731593072, "loss_iou": 0.3046875, "loss_num": 0.01434326171875, "loss_xval": 0.6796875, "num_input_tokens_seen": 338896604, "step": 5405 }, { "epoch": 17.99001663893511, "grad_norm": 29.58254623413086, "learning_rate": 5e-06, "loss": 0.3239, "num_input_tokens_seen": 338958456, "step": 5406 }, { "epoch": 17.99001663893511, "loss": 0.3387458324432373, "loss_ce": 7.127042636057013e-07, "loss_iou": 0.11962890625, "loss_num": 0.02001953125, "loss_xval": 0.337890625, "num_input_tokens_seen": 338958456, "step": 5406 }, { "epoch": 17.99334442595674, "grad_norm": 10.874626159667969, "learning_rate": 5e-06, "loss": 0.4624, "num_input_tokens_seen": 339022604, "step": 5407 }, { "epoch": 17.99334442595674, "loss": 0.254606157541275, "loss_ce": 0.00024212891003116965, "loss_iou": 0.10302734375, "loss_num": 0.00970458984375, "loss_xval": 0.25390625, "num_input_tokens_seen": 339022604, "step": 5407 }, { "epoch": 17.996672212978368, "grad_norm": 15.566896438598633, "learning_rate": 5e-06, "loss": 0.5076, "num_input_tokens_seen": 339083996, "step": 5408 }, { "epoch": 17.996672212978368, "loss": 0.5277363061904907, "loss_ce": 8.738585893297568e-05, "loss_iou": 0.2177734375, "loss_num": 0.0186767578125, "loss_xval": 0.52734375, "num_input_tokens_seen": 339083996, "step": 5408 }, { "epoch": 18.0, "grad_norm": 22.76108169555664, "learning_rate": 5e-06, "loss": 0.3695, "num_input_tokens_seen": 339148312, "step": 5409 }, { "epoch": 18.0, "loss": 0.45166856050491333, "loss_ce": 8.413863724854309e-06, "loss_iou": 0.185546875, "loss_num": 0.01611328125, "loss_xval": 0.451171875, "num_input_tokens_seen": 339148312, "step": 5409 }, { "epoch": 18.003327787021632, "grad_norm": 15.386190414428711, "learning_rate": 5e-06, "loss": 0.413, "num_input_tokens_seen": 339210904, "step": 5410 }, { "epoch": 18.003327787021632, "loss": 0.30017367005348206, "loss_ce": 2.7874777970282594e-06, "loss_iou": 0.07275390625, "loss_num": 0.0311279296875, "loss_xval": 0.30078125, "num_input_tokens_seen": 339210904, "step": 5410 }, { "epoch": 18.00665557404326, "grad_norm": 14.442679405212402, "learning_rate": 5e-06, "loss": 0.3734, "num_input_tokens_seen": 339273232, "step": 5411 }, { "epoch": 18.00665557404326, "loss": 0.25814923644065857, "loss_ce": 1.0508967989153462e-06, "loss_iou": 0.11376953125, "loss_num": 0.006072998046875, "loss_xval": 0.2578125, "num_input_tokens_seen": 339273232, "step": 5411 }, { "epoch": 18.009983361064894, "grad_norm": 12.514415740966797, "learning_rate": 5e-06, "loss": 0.6859, "num_input_tokens_seen": 339337488, "step": 5412 }, { "epoch": 18.009983361064894, "loss": 0.49169978499412537, "loss_ce": 5.747925797550124e-07, "loss_iou": 0.2001953125, "loss_num": 0.0181884765625, "loss_xval": 0.4921875, "num_input_tokens_seen": 339337488, "step": 5412 }, { "epoch": 18.013311148086522, "grad_norm": 9.211650848388672, "learning_rate": 5e-06, "loss": 0.3778, "num_input_tokens_seen": 339401900, "step": 5413 }, { "epoch": 18.013311148086522, "loss": 0.366341769695282, "loss_ce": 8.752323992666788e-06, "loss_iou": 0.15625, "loss_num": 0.0107421875, "loss_xval": 0.3671875, "num_input_tokens_seen": 339401900, "step": 5413 }, { "epoch": 18.016638935108155, "grad_norm": 15.829276084899902, "learning_rate": 5e-06, "loss": 0.3675, "num_input_tokens_seen": 339464368, "step": 5414 }, { "epoch": 18.016638935108155, "loss": 0.5007839798927307, "loss_ce": 5.155572580406442e-05, "loss_iou": 0.220703125, "loss_num": 0.01177978515625, "loss_xval": 0.5, "num_input_tokens_seen": 339464368, "step": 5414 }, { "epoch": 18.019966722129784, "grad_norm": 19.95416259765625, "learning_rate": 5e-06, "loss": 0.4115, "num_input_tokens_seen": 339526592, "step": 5415 }, { "epoch": 18.019966722129784, "loss": 0.596193253993988, "loss_ce": 1.8116099909093464e-06, "loss_iou": 0.23828125, "loss_num": 0.023681640625, "loss_xval": 0.59765625, "num_input_tokens_seen": 339526592, "step": 5415 }, { "epoch": 18.023294509151416, "grad_norm": 9.609343528747559, "learning_rate": 5e-06, "loss": 0.4064, "num_input_tokens_seen": 339588448, "step": 5416 }, { "epoch": 18.023294509151416, "loss": 0.16342321038246155, "loss_ce": 1.5860118764976505e-06, "loss_iou": 0.044921875, "loss_num": 0.01470947265625, "loss_xval": 0.1630859375, "num_input_tokens_seen": 339588448, "step": 5416 }, { "epoch": 18.026622296173045, "grad_norm": 8.953709602355957, "learning_rate": 5e-06, "loss": 0.2985, "num_input_tokens_seen": 339650900, "step": 5417 }, { "epoch": 18.026622296173045, "loss": 0.1692514568567276, "loss_ce": 9.638953315516119e-07, "loss_iou": 0.03662109375, "loss_num": 0.0191650390625, "loss_xval": 0.1689453125, "num_input_tokens_seen": 339650900, "step": 5417 }, { "epoch": 18.029950083194677, "grad_norm": 8.874521255493164, "learning_rate": 5e-06, "loss": 0.2615, "num_input_tokens_seen": 339712076, "step": 5418 }, { "epoch": 18.029950083194677, "loss": 0.24786600470542908, "loss_ce": 2.2257008822634816e-06, "loss_iou": 0.078125, "loss_num": 0.0184326171875, "loss_xval": 0.248046875, "num_input_tokens_seen": 339712076, "step": 5418 }, { "epoch": 18.033277870216306, "grad_norm": 14.130385398864746, "learning_rate": 5e-06, "loss": 0.2244, "num_input_tokens_seen": 339775316, "step": 5419 }, { "epoch": 18.033277870216306, "loss": 0.26636236906051636, "loss_ce": 4.9604086598264985e-06, "loss_iou": 0.1064453125, "loss_num": 0.0107421875, "loss_xval": 0.265625, "num_input_tokens_seen": 339775316, "step": 5419 }, { "epoch": 18.03660565723794, "grad_norm": 19.8447208404541, "learning_rate": 5e-06, "loss": 0.3015, "num_input_tokens_seen": 339838604, "step": 5420 }, { "epoch": 18.03660565723794, "loss": 0.29428088665008545, "loss_ce": 9.145594231085852e-05, "loss_iou": 0.08984375, "loss_num": 0.02294921875, "loss_xval": 0.294921875, "num_input_tokens_seen": 339838604, "step": 5420 }, { "epoch": 18.039933444259567, "grad_norm": 27.232099533081055, "learning_rate": 5e-06, "loss": 0.3723, "num_input_tokens_seen": 339901584, "step": 5421 }, { "epoch": 18.039933444259567, "loss": 0.5251671671867371, "loss_ce": 2.0668452634708956e-05, "loss_iou": 0.208984375, "loss_num": 0.021240234375, "loss_xval": 0.5234375, "num_input_tokens_seen": 339901584, "step": 5421 }, { "epoch": 18.0432612312812, "grad_norm": 30.8729305267334, "learning_rate": 5e-06, "loss": 0.4689, "num_input_tokens_seen": 339966424, "step": 5422 }, { "epoch": 18.0432612312812, "loss": 0.6121339201927185, "loss_ce": 1.2334679922787473e-05, "loss_iou": 0.2451171875, "loss_num": 0.0244140625, "loss_xval": 0.61328125, "num_input_tokens_seen": 339966424, "step": 5422 }, { "epoch": 18.04658901830283, "grad_norm": 21.17388153076172, "learning_rate": 5e-06, "loss": 0.3903, "num_input_tokens_seen": 340027720, "step": 5423 }, { "epoch": 18.04658901830283, "loss": 0.5253941416740417, "loss_ce": 3.538201326591661e-06, "loss_iou": 0.220703125, "loss_num": 0.016845703125, "loss_xval": 0.5234375, "num_input_tokens_seen": 340027720, "step": 5423 }, { "epoch": 18.04991680532446, "grad_norm": 19.168548583984375, "learning_rate": 5e-06, "loss": 0.4602, "num_input_tokens_seen": 340091752, "step": 5424 }, { "epoch": 18.04991680532446, "loss": 0.5858178734779358, "loss_ce": 2.461823669364094e-06, "loss_iou": 0.23046875, "loss_num": 0.0247802734375, "loss_xval": 0.5859375, "num_input_tokens_seen": 340091752, "step": 5424 }, { "epoch": 18.05324459234609, "grad_norm": 27.735450744628906, "learning_rate": 5e-06, "loss": 0.3436, "num_input_tokens_seen": 340155392, "step": 5425 }, { "epoch": 18.05324459234609, "loss": 0.3546895384788513, "loss_ce": 1.4236087736207992e-05, "loss_iou": 0.1474609375, "loss_num": 0.011962890625, "loss_xval": 0.35546875, "num_input_tokens_seen": 340155392, "step": 5425 }, { "epoch": 18.056572379367722, "grad_norm": 17.246431350708008, "learning_rate": 5e-06, "loss": 0.2697, "num_input_tokens_seen": 340217968, "step": 5426 }, { "epoch": 18.056572379367722, "loss": 0.3028291165828705, "loss_ce": 3.201912022632314e-06, "loss_iou": 0.12353515625, "loss_num": 0.01129150390625, "loss_xval": 0.302734375, "num_input_tokens_seen": 340217968, "step": 5426 }, { "epoch": 18.05990016638935, "grad_norm": 13.052860260009766, "learning_rate": 5e-06, "loss": 0.427, "num_input_tokens_seen": 340281348, "step": 5427 }, { "epoch": 18.05990016638935, "loss": 0.4321293830871582, "loss_ce": 4.939511200063862e-07, "loss_iou": 0.1865234375, "loss_num": 0.0118408203125, "loss_xval": 0.431640625, "num_input_tokens_seen": 340281348, "step": 5427 }, { "epoch": 18.063227953410983, "grad_norm": 10.109258651733398, "learning_rate": 5e-06, "loss": 0.2392, "num_input_tokens_seen": 340342796, "step": 5428 }, { "epoch": 18.063227953410983, "loss": 0.20692498981952667, "loss_ce": 5.469596544571687e-07, "loss_iou": 0.07958984375, "loss_num": 0.009521484375, "loss_xval": 0.20703125, "num_input_tokens_seen": 340342796, "step": 5428 }, { "epoch": 18.066555740432612, "grad_norm": 16.500762939453125, "learning_rate": 5e-06, "loss": 0.4433, "num_input_tokens_seen": 340406524, "step": 5429 }, { "epoch": 18.066555740432612, "loss": 0.5131251811981201, "loss_ce": 2.6467728275747504e-06, "loss_iou": 0.20703125, "loss_num": 0.019775390625, "loss_xval": 0.51171875, "num_input_tokens_seen": 340406524, "step": 5429 }, { "epoch": 18.069883527454245, "grad_norm": 18.17728614807129, "learning_rate": 5e-06, "loss": 0.4662, "num_input_tokens_seen": 340469052, "step": 5430 }, { "epoch": 18.069883527454245, "loss": 0.4101516902446747, "loss_ce": 0.0011551165953278542, "loss_iou": 0.1298828125, "loss_num": 0.0296630859375, "loss_xval": 0.408203125, "num_input_tokens_seen": 340469052, "step": 5430 }, { "epoch": 18.073211314475873, "grad_norm": 20.423635482788086, "learning_rate": 5e-06, "loss": 0.4052, "num_input_tokens_seen": 340531168, "step": 5431 }, { "epoch": 18.073211314475873, "loss": 0.36848753690719604, "loss_ce": 3.0169260298862355e-06, "loss_iou": 0.140625, "loss_num": 0.0172119140625, "loss_xval": 0.369140625, "num_input_tokens_seen": 340531168, "step": 5431 }, { "epoch": 18.076539101497506, "grad_norm": 56.764549255371094, "learning_rate": 5e-06, "loss": 0.5903, "num_input_tokens_seen": 340595088, "step": 5432 }, { "epoch": 18.076539101497506, "loss": 0.6541837453842163, "loss_ce": 8.911191798688378e-06, "loss_iou": 0.26953125, "loss_num": 0.0228271484375, "loss_xval": 0.65234375, "num_input_tokens_seen": 340595088, "step": 5432 }, { "epoch": 18.079866888519135, "grad_norm": 24.349660873413086, "learning_rate": 5e-06, "loss": 0.4181, "num_input_tokens_seen": 340657508, "step": 5433 }, { "epoch": 18.079866888519135, "loss": 0.48871803283691406, "loss_ce": 0.0005588473286479712, "loss_iou": 0.1591796875, "loss_num": 0.0341796875, "loss_xval": 0.48828125, "num_input_tokens_seen": 340657508, "step": 5433 }, { "epoch": 18.083194675540767, "grad_norm": 18.12993621826172, "learning_rate": 5e-06, "loss": 0.1827, "num_input_tokens_seen": 340719492, "step": 5434 }, { "epoch": 18.083194675540767, "loss": 0.16662748157978058, "loss_ce": 1.5116592066988233e-06, "loss_iou": 0.05712890625, "loss_num": 0.01055908203125, "loss_xval": 0.1669921875, "num_input_tokens_seen": 340719492, "step": 5434 }, { "epoch": 18.086522462562396, "grad_norm": 15.057820320129395, "learning_rate": 5e-06, "loss": 0.3063, "num_input_tokens_seen": 340780944, "step": 5435 }, { "epoch": 18.086522462562396, "loss": 0.3721622824668884, "loss_ce": 4.178194785708911e-07, "loss_iou": 0.162109375, "loss_num": 0.00946044921875, "loss_xval": 0.373046875, "num_input_tokens_seen": 340780944, "step": 5435 }, { "epoch": 18.08985024958403, "grad_norm": 14.718728065490723, "learning_rate": 5e-06, "loss": 0.3636, "num_input_tokens_seen": 340843808, "step": 5436 }, { "epoch": 18.08985024958403, "loss": 0.3516879677772522, "loss_ce": 3.421507244638633e-06, "loss_iou": 0.1435546875, "loss_num": 0.0128173828125, "loss_xval": 0.3515625, "num_input_tokens_seen": 340843808, "step": 5436 }, { "epoch": 18.093178036605657, "grad_norm": 7.003665447235107, "learning_rate": 5e-06, "loss": 0.3046, "num_input_tokens_seen": 340907564, "step": 5437 }, { "epoch": 18.093178036605657, "loss": 0.3795246183872223, "loss_ce": 7.985772754182108e-06, "loss_iou": 0.1591796875, "loss_num": 0.0123291015625, "loss_xval": 0.37890625, "num_input_tokens_seen": 340907564, "step": 5437 }, { "epoch": 18.09650582362729, "grad_norm": 14.53191089630127, "learning_rate": 5e-06, "loss": 0.3199, "num_input_tokens_seen": 340969496, "step": 5438 }, { "epoch": 18.09650582362729, "loss": 0.3720707297325134, "loss_ce": 4.171825480625557e-07, "loss_iou": 0.1357421875, "loss_num": 0.0198974609375, "loss_xval": 0.37109375, "num_input_tokens_seen": 340969496, "step": 5438 }, { "epoch": 18.09983361064892, "grad_norm": 19.767383575439453, "learning_rate": 5e-06, "loss": 0.2829, "num_input_tokens_seen": 341032876, "step": 5439 }, { "epoch": 18.09983361064892, "loss": 0.3050925135612488, "loss_ce": 2.3558925022371113e-05, "loss_iou": 0.12158203125, "loss_num": 0.0125732421875, "loss_xval": 0.3046875, "num_input_tokens_seen": 341032876, "step": 5439 }, { "epoch": 18.10316139767055, "grad_norm": 23.292461395263672, "learning_rate": 5e-06, "loss": 0.3069, "num_input_tokens_seen": 341095528, "step": 5440 }, { "epoch": 18.10316139767055, "loss": 0.37120378017425537, "loss_ce": 1.8500521036912687e-05, "loss_iou": 0.16796875, "loss_num": 0.006927490234375, "loss_xval": 0.37109375, "num_input_tokens_seen": 341095528, "step": 5440 }, { "epoch": 18.10648918469218, "grad_norm": 9.212130546569824, "learning_rate": 5e-06, "loss": 0.3723, "num_input_tokens_seen": 341158664, "step": 5441 }, { "epoch": 18.10648918469218, "loss": 0.49863043427467346, "loss_ce": 9.529839735478163e-05, "loss_iou": 0.1953125, "loss_num": 0.021484375, "loss_xval": 0.498046875, "num_input_tokens_seen": 341158664, "step": 5441 }, { "epoch": 18.109816971713812, "grad_norm": 18.02276611328125, "learning_rate": 5e-06, "loss": 0.3409, "num_input_tokens_seen": 341220420, "step": 5442 }, { "epoch": 18.109816971713812, "loss": 0.26643651723861694, "loss_ce": 2.7989876798528712e-06, "loss_iou": 0.08984375, "loss_num": 0.017333984375, "loss_xval": 0.265625, "num_input_tokens_seen": 341220420, "step": 5442 }, { "epoch": 18.11314475873544, "grad_norm": 21.585914611816406, "learning_rate": 5e-06, "loss": 0.4978, "num_input_tokens_seen": 341283520, "step": 5443 }, { "epoch": 18.11314475873544, "loss": 0.3291660249233246, "loss_ce": 3.4490847156121163e-06, "loss_iou": 0.134765625, "loss_num": 0.01177978515625, "loss_xval": 0.330078125, "num_input_tokens_seen": 341283520, "step": 5443 }, { "epoch": 18.116472545757073, "grad_norm": 15.376826286315918, "learning_rate": 5e-06, "loss": 0.4774, "num_input_tokens_seen": 341348056, "step": 5444 }, { "epoch": 18.116472545757073, "loss": 0.4542301595211029, "loss_ce": 6.543561539729126e-06, "loss_iou": 0.1865234375, "loss_num": 0.01611328125, "loss_xval": 0.455078125, "num_input_tokens_seen": 341348056, "step": 5444 }, { "epoch": 18.119800332778702, "grad_norm": 7.002047061920166, "learning_rate": 5e-06, "loss": 0.4095, "num_input_tokens_seen": 341408624, "step": 5445 }, { "epoch": 18.119800332778702, "loss": 0.5621348023414612, "loss_ce": 1.0341086635889951e-06, "loss_iou": 0.2265625, "loss_num": 0.0216064453125, "loss_xval": 0.5625, "num_input_tokens_seen": 341408624, "step": 5445 }, { "epoch": 18.123128119800334, "grad_norm": 67.28253936767578, "learning_rate": 5e-06, "loss": 0.4678, "num_input_tokens_seen": 341470896, "step": 5446 }, { "epoch": 18.123128119800334, "loss": 0.3884289860725403, "loss_ce": 1.2532320852187695e-06, "loss_iou": 0.146484375, "loss_num": 0.0191650390625, "loss_xval": 0.388671875, "num_input_tokens_seen": 341470896, "step": 5446 }, { "epoch": 18.126455906821963, "grad_norm": 19.37444305419922, "learning_rate": 5e-06, "loss": 0.5131, "num_input_tokens_seen": 341533940, "step": 5447 }, { "epoch": 18.126455906821963, "loss": 0.41699713468551636, "loss_ce": 4.947818979417207e-06, "loss_iou": 0.119140625, "loss_num": 0.03564453125, "loss_xval": 0.41796875, "num_input_tokens_seen": 341533940, "step": 5447 }, { "epoch": 18.129783693843596, "grad_norm": 23.817306518554688, "learning_rate": 5e-06, "loss": 0.648, "num_input_tokens_seen": 341596176, "step": 5448 }, { "epoch": 18.129783693843596, "loss": 0.7506726980209351, "loss_ce": 1.2877861763627152e-06, "loss_iou": 0.3046875, "loss_num": 0.02783203125, "loss_xval": 0.75, "num_input_tokens_seen": 341596176, "step": 5448 }, { "epoch": 18.133111480865225, "grad_norm": 7.3077802658081055, "learning_rate": 5e-06, "loss": 0.3323, "num_input_tokens_seen": 341659016, "step": 5449 }, { "epoch": 18.133111480865225, "loss": 0.2874765992164612, "loss_ce": 1.0228536666545551e-06, "loss_iou": 0.1123046875, "loss_num": 0.01263427734375, "loss_xval": 0.287109375, "num_input_tokens_seen": 341659016, "step": 5449 }, { "epoch": 18.136439267886857, "grad_norm": 8.808711051940918, "learning_rate": 5e-06, "loss": 0.5291, "num_input_tokens_seen": 341722804, "step": 5450 }, { "epoch": 18.136439267886857, "loss": 0.38067686557769775, "loss_ce": 6.055555559214554e-07, "loss_iou": 0.1572265625, "loss_num": 0.013427734375, "loss_xval": 0.380859375, "num_input_tokens_seen": 341722804, "step": 5450 }, { "epoch": 18.139767054908486, "grad_norm": 15.449837684631348, "learning_rate": 5e-06, "loss": 0.4809, "num_input_tokens_seen": 341786180, "step": 5451 }, { "epoch": 18.139767054908486, "loss": 0.4732706546783447, "loss_ce": 4.071293005836196e-06, "loss_iou": 0.1875, "loss_num": 0.0194091796875, "loss_xval": 0.47265625, "num_input_tokens_seen": 341786180, "step": 5451 }, { "epoch": 18.143094841930118, "grad_norm": 25.364944458007812, "learning_rate": 5e-06, "loss": 0.3172, "num_input_tokens_seen": 341848996, "step": 5452 }, { "epoch": 18.143094841930118, "loss": 0.30603092908859253, "loss_ce": 6.853993568256556e-07, "loss_iou": 0.138671875, "loss_num": 0.005584716796875, "loss_xval": 0.306640625, "num_input_tokens_seen": 341848996, "step": 5452 }, { "epoch": 18.146422628951747, "grad_norm": 14.5830078125, "learning_rate": 5e-06, "loss": 0.3227, "num_input_tokens_seen": 341910264, "step": 5453 }, { "epoch": 18.146422628951747, "loss": 0.5670175552368164, "loss_ce": 9.006421350932214e-07, "loss_iou": 0.2138671875, "loss_num": 0.0277099609375, "loss_xval": 0.56640625, "num_input_tokens_seen": 341910264, "step": 5453 }, { "epoch": 18.14975041597338, "grad_norm": 9.916094779968262, "learning_rate": 5e-06, "loss": 0.4228, "num_input_tokens_seen": 341973908, "step": 5454 }, { "epoch": 18.14975041597338, "loss": 0.5382665991783142, "loss_ce": 0.00015012291260063648, "loss_iou": 0.1904296875, "loss_num": 0.031494140625, "loss_xval": 0.5390625, "num_input_tokens_seen": 341973908, "step": 5454 }, { "epoch": 18.153078202995008, "grad_norm": 22.726438522338867, "learning_rate": 5e-06, "loss": 0.3941, "num_input_tokens_seen": 342036384, "step": 5455 }, { "epoch": 18.153078202995008, "loss": 0.4471204876899719, "loss_ce": 5.323095683706924e-05, "loss_iou": 0.177734375, "loss_num": 0.018310546875, "loss_xval": 0.447265625, "num_input_tokens_seen": 342036384, "step": 5455 }, { "epoch": 18.15640599001664, "grad_norm": 37.55530548095703, "learning_rate": 5e-06, "loss": 0.4336, "num_input_tokens_seen": 342098940, "step": 5456 }, { "epoch": 18.15640599001664, "loss": 0.37766602635383606, "loss_ce": 4.1488183342153206e-05, "loss_iou": 0.146484375, "loss_num": 0.0169677734375, "loss_xval": 0.376953125, "num_input_tokens_seen": 342098940, "step": 5456 }, { "epoch": 18.15973377703827, "grad_norm": 37.47508239746094, "learning_rate": 5e-06, "loss": 0.4553, "num_input_tokens_seen": 342162716, "step": 5457 }, { "epoch": 18.15973377703827, "loss": 0.33642709255218506, "loss_ce": 1.3104798881613533e-06, "loss_iou": 0.1513671875, "loss_num": 0.00677490234375, "loss_xval": 0.3359375, "num_input_tokens_seen": 342162716, "step": 5457 }, { "epoch": 18.163061564059902, "grad_norm": 21.16477394104004, "learning_rate": 5e-06, "loss": 0.3606, "num_input_tokens_seen": 342224360, "step": 5458 }, { "epoch": 18.163061564059902, "loss": 0.3486350178718567, "loss_ce": 2.183131073252298e-06, "loss_iou": 0.08154296875, "loss_num": 0.037109375, "loss_xval": 0.34765625, "num_input_tokens_seen": 342224360, "step": 5458 }, { "epoch": 18.16638935108153, "grad_norm": 5.912932395935059, "learning_rate": 5e-06, "loss": 0.2201, "num_input_tokens_seen": 342286304, "step": 5459 }, { "epoch": 18.16638935108153, "loss": 0.10694971680641174, "loss_ce": 8.687924264449975e-07, "loss_iou": 0.034423828125, "loss_num": 0.00762939453125, "loss_xval": 0.10693359375, "num_input_tokens_seen": 342286304, "step": 5459 }, { "epoch": 18.169717138103163, "grad_norm": 14.831888198852539, "learning_rate": 5e-06, "loss": 0.3933, "num_input_tokens_seen": 342349996, "step": 5460 }, { "epoch": 18.169717138103163, "loss": 0.4031672477722168, "loss_ce": 6.0539005062310025e-05, "loss_iou": 0.1748046875, "loss_num": 0.0106201171875, "loss_xval": 0.40234375, "num_input_tokens_seen": 342349996, "step": 5460 }, { "epoch": 18.173044925124792, "grad_norm": 13.658722877502441, "learning_rate": 5e-06, "loss": 0.4433, "num_input_tokens_seen": 342413112, "step": 5461 }, { "epoch": 18.173044925124792, "loss": 0.480488121509552, "loss_ce": 1.9339218852110207e-05, "loss_iou": 0.1875, "loss_num": 0.02099609375, "loss_xval": 0.48046875, "num_input_tokens_seen": 342413112, "step": 5461 }, { "epoch": 18.176372712146424, "grad_norm": 6.284977912902832, "learning_rate": 5e-06, "loss": 0.4141, "num_input_tokens_seen": 342473516, "step": 5462 }, { "epoch": 18.176372712146424, "loss": 0.6364538669586182, "loss_ce": 4.0279650420416147e-05, "loss_iou": 0.24609375, "loss_num": 0.028564453125, "loss_xval": 0.63671875, "num_input_tokens_seen": 342473516, "step": 5462 }, { "epoch": 18.179700499168053, "grad_norm": 9.871095657348633, "learning_rate": 5e-06, "loss": 0.3171, "num_input_tokens_seen": 342536336, "step": 5463 }, { "epoch": 18.179700499168053, "loss": 0.3509555757045746, "loss_ce": 3.42074395121017e-06, "loss_iou": 0.11376953125, "loss_num": 0.0245361328125, "loss_xval": 0.3515625, "num_input_tokens_seen": 342536336, "step": 5463 }, { "epoch": 18.183028286189685, "grad_norm": 11.916340827941895, "learning_rate": 5e-06, "loss": 0.3975, "num_input_tokens_seen": 342599256, "step": 5464 }, { "epoch": 18.183028286189685, "loss": 0.4188011884689331, "loss_ce": 8.4652092482429e-06, "loss_iou": 0.15234375, "loss_num": 0.0228271484375, "loss_xval": 0.41796875, "num_input_tokens_seen": 342599256, "step": 5464 }, { "epoch": 18.186356073211314, "grad_norm": 14.2304048538208, "learning_rate": 5e-06, "loss": 0.3151, "num_input_tokens_seen": 342662384, "step": 5465 }, { "epoch": 18.186356073211314, "loss": 0.46528133749961853, "loss_ce": 0.00013241247506812215, "loss_iou": 0.1953125, "loss_num": 0.01470947265625, "loss_xval": 0.46484375, "num_input_tokens_seen": 342662384, "step": 5465 }, { "epoch": 18.189683860232947, "grad_norm": 12.213899612426758, "learning_rate": 5e-06, "loss": 0.4228, "num_input_tokens_seen": 342724876, "step": 5466 }, { "epoch": 18.189683860232947, "loss": 0.4826676845550537, "loss_ce": 1.6810668057587463e-06, "loss_iou": 0.1845703125, "loss_num": 0.0225830078125, "loss_xval": 0.482421875, "num_input_tokens_seen": 342724876, "step": 5466 }, { "epoch": 18.193011647254576, "grad_norm": 10.118507385253906, "learning_rate": 5e-06, "loss": 0.2934, "num_input_tokens_seen": 342787440, "step": 5467 }, { "epoch": 18.193011647254576, "loss": 0.2851274013519287, "loss_ce": 3.21882835123688e-05, "loss_iou": 0.11328125, "loss_num": 0.01177978515625, "loss_xval": 0.28515625, "num_input_tokens_seen": 342787440, "step": 5467 }, { "epoch": 18.196339434276208, "grad_norm": 6.636274337768555, "learning_rate": 5e-06, "loss": 0.3101, "num_input_tokens_seen": 342850416, "step": 5468 }, { "epoch": 18.196339434276208, "loss": 0.39892706274986267, "loss_ce": 3.1779767596162856e-05, "loss_iou": 0.1513671875, "loss_num": 0.0191650390625, "loss_xval": 0.3984375, "num_input_tokens_seen": 342850416, "step": 5468 }, { "epoch": 18.199667221297837, "grad_norm": 37.55901336669922, "learning_rate": 5e-06, "loss": 0.4166, "num_input_tokens_seen": 342913816, "step": 5469 }, { "epoch": 18.199667221297837, "loss": 0.5284453630447388, "loss_ce": 3.0030664674995933e-06, "loss_iou": 0.2177734375, "loss_num": 0.0186767578125, "loss_xval": 0.52734375, "num_input_tokens_seen": 342913816, "step": 5469 }, { "epoch": 18.20299500831947, "grad_norm": 46.97545623779297, "learning_rate": 5e-06, "loss": 0.5185, "num_input_tokens_seen": 342975500, "step": 5470 }, { "epoch": 18.20299500831947, "loss": 0.2882694602012634, "loss_ce": 4.1943198425542505e-07, "loss_iou": 0.11474609375, "loss_num": 0.01165771484375, "loss_xval": 0.2890625, "num_input_tokens_seen": 342975500, "step": 5470 }, { "epoch": 18.206322795341098, "grad_norm": 31.186826705932617, "learning_rate": 5e-06, "loss": 0.3268, "num_input_tokens_seen": 343038200, "step": 5471 }, { "epoch": 18.206322795341098, "loss": 0.3851022720336914, "loss_ce": 9.59956878432422e-07, "loss_iou": 0.17578125, "loss_num": 0.006866455078125, "loss_xval": 0.384765625, "num_input_tokens_seen": 343038200, "step": 5471 }, { "epoch": 18.20965058236273, "grad_norm": 15.08152961730957, "learning_rate": 5e-06, "loss": 0.2345, "num_input_tokens_seen": 343100764, "step": 5472 }, { "epoch": 18.20965058236273, "loss": 0.15391653776168823, "loss_ce": 1.1379215720808133e-06, "loss_iou": 0.059326171875, "loss_num": 0.006988525390625, "loss_xval": 0.154296875, "num_input_tokens_seen": 343100764, "step": 5472 }, { "epoch": 18.21297836938436, "grad_norm": 11.014090538024902, "learning_rate": 5e-06, "loss": 0.4641, "num_input_tokens_seen": 343162668, "step": 5473 }, { "epoch": 18.21297836938436, "loss": 0.5170321464538574, "loss_ce": 3.3877004170790315e-06, "loss_iou": 0.1875, "loss_num": 0.028564453125, "loss_xval": 0.515625, "num_input_tokens_seen": 343162668, "step": 5473 }, { "epoch": 18.21630615640599, "grad_norm": 8.167227745056152, "learning_rate": 5e-06, "loss": 0.38, "num_input_tokens_seen": 343224880, "step": 5474 }, { "epoch": 18.21630615640599, "loss": 0.23496848344802856, "loss_ce": 1.3648857930093072e-05, "loss_iou": 0.08349609375, "loss_num": 0.013671875, "loss_xval": 0.2353515625, "num_input_tokens_seen": 343224880, "step": 5474 }, { "epoch": 18.21963394342762, "grad_norm": 12.69473648071289, "learning_rate": 5e-06, "loss": 0.3536, "num_input_tokens_seen": 343287372, "step": 5475 }, { "epoch": 18.21963394342762, "loss": 0.4609121084213257, "loss_ce": 5.1226438699814025e-06, "loss_iou": 0.189453125, "loss_num": 0.016357421875, "loss_xval": 0.4609375, "num_input_tokens_seen": 343287372, "step": 5475 }, { "epoch": 18.222961730449253, "grad_norm": 17.9837646484375, "learning_rate": 5e-06, "loss": 0.3522, "num_input_tokens_seen": 343351492, "step": 5476 }, { "epoch": 18.222961730449253, "loss": 0.39429575204849243, "loss_ce": 8.657049875182565e-06, "loss_iou": 0.1640625, "loss_num": 0.0133056640625, "loss_xval": 0.39453125, "num_input_tokens_seen": 343351492, "step": 5476 }, { "epoch": 18.22628951747088, "grad_norm": 24.108503341674805, "learning_rate": 5e-06, "loss": 0.4655, "num_input_tokens_seen": 343414860, "step": 5477 }, { "epoch": 18.22628951747088, "loss": 0.5391855835914612, "loss_ce": 9.935367870639311e-07, "loss_iou": 0.2333984375, "loss_num": 0.01446533203125, "loss_xval": 0.5390625, "num_input_tokens_seen": 343414860, "step": 5477 }, { "epoch": 18.229617304492514, "grad_norm": 39.911067962646484, "learning_rate": 5e-06, "loss": 0.4165, "num_input_tokens_seen": 343478996, "step": 5478 }, { "epoch": 18.229617304492514, "loss": 0.4792485237121582, "loss_ce": 4.553737653623102e-07, "loss_iou": 0.203125, "loss_num": 0.0146484375, "loss_xval": 0.478515625, "num_input_tokens_seen": 343478996, "step": 5478 }, { "epoch": 18.232945091514143, "grad_norm": 42.68018341064453, "learning_rate": 5e-06, "loss": 0.4616, "num_input_tokens_seen": 343542688, "step": 5479 }, { "epoch": 18.232945091514143, "loss": 0.6440466046333313, "loss_ce": 3.636715746324626e-06, "loss_iou": 0.26953125, "loss_num": 0.0205078125, "loss_xval": 0.64453125, "num_input_tokens_seen": 343542688, "step": 5479 }, { "epoch": 18.236272878535775, "grad_norm": 20.39773941040039, "learning_rate": 5e-06, "loss": 0.3143, "num_input_tokens_seen": 343604652, "step": 5480 }, { "epoch": 18.236272878535775, "loss": 0.2996106743812561, "loss_ce": 1.9579845684347674e-05, "loss_iou": 0.11328125, "loss_num": 0.01470947265625, "loss_xval": 0.298828125, "num_input_tokens_seen": 343604652, "step": 5480 }, { "epoch": 18.239600665557404, "grad_norm": 5.570769309997559, "learning_rate": 5e-06, "loss": 0.2904, "num_input_tokens_seen": 343666752, "step": 5481 }, { "epoch": 18.239600665557404, "loss": 0.1734047830104828, "loss_ce": 3.907865902874619e-06, "loss_iou": 0.064453125, "loss_num": 0.009033203125, "loss_xval": 0.173828125, "num_input_tokens_seen": 343666752, "step": 5481 }, { "epoch": 18.242928452579037, "grad_norm": 12.973108291625977, "learning_rate": 5e-06, "loss": 0.4919, "num_input_tokens_seen": 343728656, "step": 5482 }, { "epoch": 18.242928452579037, "loss": 0.5229513645172119, "loss_ce": 2.1717128220188897e-06, "loss_iou": 0.1875, "loss_num": 0.0294189453125, "loss_xval": 0.5234375, "num_input_tokens_seen": 343728656, "step": 5482 }, { "epoch": 18.246256239600665, "grad_norm": 15.310547828674316, "learning_rate": 5e-06, "loss": 0.3509, "num_input_tokens_seen": 343791312, "step": 5483 }, { "epoch": 18.246256239600665, "loss": 0.3852550983428955, "loss_ce": 1.1939318937947974e-06, "loss_iou": 0.1572265625, "loss_num": 0.01422119140625, "loss_xval": 0.384765625, "num_input_tokens_seen": 343791312, "step": 5483 }, { "epoch": 18.249584026622298, "grad_norm": 12.41771125793457, "learning_rate": 5e-06, "loss": 0.3374, "num_input_tokens_seen": 343854972, "step": 5484 }, { "epoch": 18.249584026622298, "loss": 0.33533531427383423, "loss_ce": 8.177197742043063e-06, "loss_iou": 0.1435546875, "loss_num": 0.009765625, "loss_xval": 0.3359375, "num_input_tokens_seen": 343854972, "step": 5484 }, { "epoch": 18.252911813643927, "grad_norm": 10.137232780456543, "learning_rate": 5e-06, "loss": 0.3581, "num_input_tokens_seen": 343917740, "step": 5485 }, { "epoch": 18.252911813643927, "loss": 0.3938943147659302, "loss_ce": 0.00015653669834136963, "loss_iou": 0.1259765625, "loss_num": 0.0283203125, "loss_xval": 0.39453125, "num_input_tokens_seen": 343917740, "step": 5485 }, { "epoch": 18.25623960066556, "grad_norm": 12.834590911865234, "learning_rate": 5e-06, "loss": 0.479, "num_input_tokens_seen": 343981464, "step": 5486 }, { "epoch": 18.25623960066556, "loss": 0.40710484981536865, "loss_ce": 3.5293300015837303e-07, "loss_iou": 0.181640625, "loss_num": 0.0086669921875, "loss_xval": 0.40625, "num_input_tokens_seen": 343981464, "step": 5486 }, { "epoch": 18.259567387687188, "grad_norm": 9.777318000793457, "learning_rate": 5e-06, "loss": 0.3355, "num_input_tokens_seen": 344044100, "step": 5487 }, { "epoch": 18.259567387687188, "loss": 0.28845274448394775, "loss_ce": 5.94584150803712e-07, "loss_iou": 0.1259765625, "loss_num": 0.007110595703125, "loss_xval": 0.2890625, "num_input_tokens_seen": 344044100, "step": 5487 }, { "epoch": 18.26289517470882, "grad_norm": 6.28300142288208, "learning_rate": 5e-06, "loss": 0.2653, "num_input_tokens_seen": 344106988, "step": 5488 }, { "epoch": 18.26289517470882, "loss": 0.24820159375667572, "loss_ce": 2.13200746657094e-06, "loss_iou": 0.09130859375, "loss_num": 0.01312255859375, "loss_xval": 0.248046875, "num_input_tokens_seen": 344106988, "step": 5488 }, { "epoch": 18.26622296173045, "grad_norm": 18.498157501220703, "learning_rate": 5e-06, "loss": 0.5153, "num_input_tokens_seen": 344171812, "step": 5489 }, { "epoch": 18.26622296173045, "loss": 0.5307681560516357, "loss_ce": 6.43220892015961e-06, "loss_iou": 0.185546875, "loss_num": 0.031982421875, "loss_xval": 0.53125, "num_input_tokens_seen": 344171812, "step": 5489 }, { "epoch": 18.26955074875208, "grad_norm": 32.28453826904297, "learning_rate": 5e-06, "loss": 0.4362, "num_input_tokens_seen": 344234896, "step": 5490 }, { "epoch": 18.26955074875208, "loss": 0.3599873483181, "loss_ce": 1.986894858418964e-06, "loss_iou": 0.1396484375, "loss_num": 0.01611328125, "loss_xval": 0.359375, "num_input_tokens_seen": 344234896, "step": 5490 }, { "epoch": 18.27287853577371, "grad_norm": 19.478139877319336, "learning_rate": 5e-06, "loss": 0.2891, "num_input_tokens_seen": 344296928, "step": 5491 }, { "epoch": 18.27287853577371, "loss": 0.23884108662605286, "loss_ce": 4.4854044972453266e-05, "loss_iou": 0.09521484375, "loss_num": 0.00970458984375, "loss_xval": 0.2392578125, "num_input_tokens_seen": 344296928, "step": 5491 }, { "epoch": 18.276206322795343, "grad_norm": 26.559572219848633, "learning_rate": 5e-06, "loss": 0.2991, "num_input_tokens_seen": 344358956, "step": 5492 }, { "epoch": 18.276206322795343, "loss": 0.35277244448661804, "loss_ce": 1.976652129087597e-05, "loss_iou": 0.1494140625, "loss_num": 0.0107421875, "loss_xval": 0.353515625, "num_input_tokens_seen": 344358956, "step": 5492 }, { "epoch": 18.27953410981697, "grad_norm": 12.5383939743042, "learning_rate": 5e-06, "loss": 0.3281, "num_input_tokens_seen": 344422240, "step": 5493 }, { "epoch": 18.27953410981697, "loss": 0.2975860834121704, "loss_ce": 3.971199839725159e-05, "loss_iou": 0.1220703125, "loss_num": 0.0107421875, "loss_xval": 0.296875, "num_input_tokens_seen": 344422240, "step": 5493 }, { "epoch": 18.282861896838604, "grad_norm": 20.09958267211914, "learning_rate": 5e-06, "loss": 0.2617, "num_input_tokens_seen": 344484412, "step": 5494 }, { "epoch": 18.282861896838604, "loss": 0.2894911766052246, "loss_ce": 1.4192848993843654e-06, "loss_iou": 0.08935546875, "loss_num": 0.0220947265625, "loss_xval": 0.2890625, "num_input_tokens_seen": 344484412, "step": 5494 }, { "epoch": 18.286189683860233, "grad_norm": 13.707408905029297, "learning_rate": 5e-06, "loss": 0.4031, "num_input_tokens_seen": 344548260, "step": 5495 }, { "epoch": 18.286189683860233, "loss": 0.26120030879974365, "loss_ce": 6.13778320257552e-05, "loss_iou": 0.10986328125, "loss_num": 0.00823974609375, "loss_xval": 0.26171875, "num_input_tokens_seen": 344548260, "step": 5495 }, { "epoch": 18.289517470881865, "grad_norm": 22.93804931640625, "learning_rate": 5e-06, "loss": 0.5048, "num_input_tokens_seen": 344610448, "step": 5496 }, { "epoch": 18.289517470881865, "loss": 0.5288937091827393, "loss_ce": 0.0011837404454126954, "loss_iou": 0.1796875, "loss_num": 0.03369140625, "loss_xval": 0.52734375, "num_input_tokens_seen": 344610448, "step": 5496 }, { "epoch": 18.292845257903494, "grad_norm": 20.32354164123535, "learning_rate": 5e-06, "loss": 0.4116, "num_input_tokens_seen": 344673360, "step": 5497 }, { "epoch": 18.292845257903494, "loss": 0.24908505380153656, "loss_ce": 5.795803872388205e-07, "loss_iou": 0.0869140625, "loss_num": 0.01513671875, "loss_xval": 0.2490234375, "num_input_tokens_seen": 344673360, "step": 5497 }, { "epoch": 18.296173044925126, "grad_norm": 29.490699768066406, "learning_rate": 5e-06, "loss": 0.4723, "num_input_tokens_seen": 344737388, "step": 5498 }, { "epoch": 18.296173044925126, "loss": 0.5562209486961365, "loss_ce": 0.0003432741214055568, "loss_iou": 0.2412109375, "loss_num": 0.0146484375, "loss_xval": 0.5546875, "num_input_tokens_seen": 344737388, "step": 5498 }, { "epoch": 18.299500831946755, "grad_norm": 30.08915901184082, "learning_rate": 5e-06, "loss": 0.5078, "num_input_tokens_seen": 344799976, "step": 5499 }, { "epoch": 18.299500831946755, "loss": 0.5181916356086731, "loss_ce": 3.1662139008403756e-06, "loss_iou": 0.2060546875, "loss_num": 0.02099609375, "loss_xval": 0.51953125, "num_input_tokens_seen": 344799976, "step": 5499 }, { "epoch": 18.302828618968388, "grad_norm": 27.45698356628418, "learning_rate": 5e-06, "loss": 0.5355, "num_input_tokens_seen": 344863212, "step": 5500 }, { "epoch": 18.302828618968388, "eval_seeclick_CIoU": 0.03518588934093714, "eval_seeclick_GIoU": 0.026860845740884542, "eval_seeclick_IoU": 0.16593646258115768, "eval_seeclick_MAE_all": 0.1764061227440834, "eval_seeclick_MAE_h": 0.07204640097916126, "eval_seeclick_MAE_w": 0.13515771180391312, "eval_seeclick_MAE_x_boxes": 0.21954002231359482, "eval_seeclick_MAE_y_boxes": 0.18927235156297684, "eval_seeclick_NUM_probability": 0.9999709129333496, "eval_seeclick_inside_bbox": 0.17812500149011612, "eval_seeclick_loss": 3.0382297039031982, "eval_seeclick_loss_ce": 0.17308932542800903, "eval_seeclick_loss_iou": 0.98828125, "eval_seeclick_loss_num": 0.1771392822265625, "eval_seeclick_loss_xval": 2.86181640625, "eval_seeclick_runtime": 68.2911, "eval_seeclick_samples_per_second": 0.688, "eval_seeclick_steps_per_second": 0.029, "num_input_tokens_seen": 344863212, "step": 5500 }, { "epoch": 18.302828618968388, "eval_icons_CIoU": -0.04412840120494366, "eval_icons_GIoU": 0.04237383417785168, "eval_icons_IoU": 0.12914633005857468, "eval_icons_MAE_all": 0.1975475177168846, "eval_icons_MAE_h": 0.17770272493362427, "eval_icons_MAE_w": 0.19729211926460266, "eval_icons_MAE_x_boxes": 0.15093794092535973, "eval_icons_MAE_y_boxes": 0.09561797231435776, "eval_icons_NUM_probability": 0.9999881684780121, "eval_icons_inside_bbox": 0.2326388955116272, "eval_icons_loss": 2.8356268405914307, "eval_icons_loss_ce": 2.100146275552106e-06, "eval_icons_loss_iou": 0.957763671875, "eval_icons_loss_num": 0.187591552734375, "eval_icons_loss_xval": 2.85400390625, "eval_icons_runtime": 67.995, "eval_icons_samples_per_second": 0.735, "eval_icons_steps_per_second": 0.029, "num_input_tokens_seen": 344863212, "step": 5500 }, { "epoch": 18.302828618968388, "eval_screenspot_CIoU": 0.1715420534213384, "eval_screenspot_GIoU": 0.20623134076595306, "eval_screenspot_IoU": 0.2867853840192159, "eval_screenspot_MAE_all": 0.11568833390871684, "eval_screenspot_MAE_h": 0.05918771276871363, "eval_screenspot_MAE_w": 0.0995996097723643, "eval_screenspot_MAE_x_boxes": 0.16442308823267618, "eval_screenspot_MAE_y_boxes": 0.08821603159109752, "eval_screenspot_NUM_probability": 0.9999947945276896, "eval_screenspot_inside_bbox": 0.512500007947286, "eval_screenspot_loss": 2.205409288406372, "eval_screenspot_loss_ce": 1.9613360109360656e-06, "eval_screenspot_loss_iou": 0.806396484375, "eval_screenspot_loss_num": 0.1244354248046875, "eval_screenspot_loss_xval": 2.2347005208333335, "eval_screenspot_runtime": 132.7748, "eval_screenspot_samples_per_second": 0.67, "eval_screenspot_steps_per_second": 0.023, "num_input_tokens_seen": 344863212, "step": 5500 }, { "epoch": 18.302828618968388, "eval_compot_CIoU": 0.1808655858039856, "eval_compot_GIoU": 0.22777745872735977, "eval_compot_IoU": 0.3092515766620636, "eval_compot_MAE_all": 0.12486755102872849, "eval_compot_MAE_h": 0.053604972548782825, "eval_compot_MAE_w": 0.1350414827466011, "eval_compot_MAE_x_boxes": 0.10287221893668175, "eval_compot_MAE_y_boxes": 0.11175283417105675, "eval_compot_NUM_probability": 0.9999971389770508, "eval_compot_inside_bbox": 0.4565972238779068, "eval_compot_loss": 2.1354284286499023, "eval_compot_loss_ce": 0.008879796136170626, "eval_compot_loss_iou": 0.7711181640625, "eval_compot_loss_num": 0.12864303588867188, "eval_compot_loss_xval": 2.185791015625, "eval_compot_runtime": 74.3231, "eval_compot_samples_per_second": 0.673, "eval_compot_steps_per_second": 0.027, "num_input_tokens_seen": 344863212, "step": 5500 }, { "epoch": 18.302828618968388, "eval_custom_ui_MAE_all": 0.061347829177975655, "eval_custom_ui_MAE_x": 0.07210716791450977, "eval_custom_ui_MAE_y": 0.05058848485350609, "eval_custom_ui_NUM_probability": 0.9999988377094269, "eval_custom_ui_loss": 0.3041039705276489, "eval_custom_ui_loss_ce": 2.1143370645404502e-06, "eval_custom_ui_loss_num": 0.06542205810546875, "eval_custom_ui_loss_xval": 0.326934814453125, "eval_custom_ui_runtime": 51.3152, "eval_custom_ui_samples_per_second": 0.974, "eval_custom_ui_steps_per_second": 0.039, "num_input_tokens_seen": 344863212, "step": 5500 }, { "epoch": 18.302828618968388, "loss": 0.3313624858856201, "loss_ce": 2.624964963615639e-06, "loss_iou": 0.0, "loss_num": 0.06640625, "loss_xval": 0.33203125, "num_input_tokens_seen": 344863212, "step": 5500 }, { "epoch": 18.306156405990016, "grad_norm": 26.72846221923828, "learning_rate": 5e-06, "loss": 0.3737, "num_input_tokens_seen": 344926160, "step": 5501 }, { "epoch": 18.306156405990016, "loss": 0.37475651502609253, "loss_ce": 6.261934117901546e-07, "loss_iou": 0.158203125, "loss_num": 0.0118408203125, "loss_xval": 0.375, "num_input_tokens_seen": 344926160, "step": 5501 }, { "epoch": 18.30948419301165, "grad_norm": 9.03872299194336, "learning_rate": 5e-06, "loss": 0.4143, "num_input_tokens_seen": 344989424, "step": 5502 }, { "epoch": 18.30948419301165, "loss": 0.4228369891643524, "loss_ce": 6.841813160463062e-07, "loss_iou": 0.138671875, "loss_num": 0.02880859375, "loss_xval": 0.421875, "num_input_tokens_seen": 344989424, "step": 5502 }, { "epoch": 18.312811980033278, "grad_norm": 18.037784576416016, "learning_rate": 5e-06, "loss": 0.3478, "num_input_tokens_seen": 345052336, "step": 5503 }, { "epoch": 18.312811980033278, "loss": 0.28082603216171265, "loss_ce": 3.2695111258362886e-06, "loss_iou": 0.07861328125, "loss_num": 0.0247802734375, "loss_xval": 0.28125, "num_input_tokens_seen": 345052336, "step": 5503 }, { "epoch": 18.31613976705491, "grad_norm": 21.866823196411133, "learning_rate": 5e-06, "loss": 0.3875, "num_input_tokens_seen": 345114844, "step": 5504 }, { "epoch": 18.31613976705491, "loss": 0.46460068225860596, "loss_ce": 1.0708563422667794e-06, "loss_iou": 0.1669921875, "loss_num": 0.026123046875, "loss_xval": 0.46484375, "num_input_tokens_seen": 345114844, "step": 5504 }, { "epoch": 18.31946755407654, "grad_norm": 23.623886108398438, "learning_rate": 5e-06, "loss": 0.2754, "num_input_tokens_seen": 345177468, "step": 5505 }, { "epoch": 18.31946755407654, "loss": 0.1738661229610443, "loss_ce": 7.471047865692526e-06, "loss_iou": 0.0703125, "loss_num": 0.006622314453125, "loss_xval": 0.173828125, "num_input_tokens_seen": 345177468, "step": 5505 }, { "epoch": 18.32279534109817, "grad_norm": 20.521316528320312, "learning_rate": 5e-06, "loss": 0.4439, "num_input_tokens_seen": 345241084, "step": 5506 }, { "epoch": 18.32279534109817, "loss": 0.34086132049560547, "loss_ce": 1.0463559192430694e-05, "loss_iou": 0.1318359375, "loss_num": 0.01544189453125, "loss_xval": 0.341796875, "num_input_tokens_seen": 345241084, "step": 5506 }, { "epoch": 18.3261231281198, "grad_norm": 23.646696090698242, "learning_rate": 5e-06, "loss": 0.4927, "num_input_tokens_seen": 345304672, "step": 5507 }, { "epoch": 18.3261231281198, "loss": 0.6569865942001343, "loss_ce": 4.187670128885657e-06, "loss_iou": 0.267578125, "loss_num": 0.024169921875, "loss_xval": 0.65625, "num_input_tokens_seen": 345304672, "step": 5507 }, { "epoch": 18.329450915141432, "grad_norm": 18.111787796020508, "learning_rate": 5e-06, "loss": 0.3595, "num_input_tokens_seen": 345366800, "step": 5508 }, { "epoch": 18.329450915141432, "loss": 0.3290126621723175, "loss_ce": 2.6516263460507616e-06, "loss_iou": 0.10107421875, "loss_num": 0.0255126953125, "loss_xval": 0.328125, "num_input_tokens_seen": 345366800, "step": 5508 }, { "epoch": 18.33277870216306, "grad_norm": 17.89556312561035, "learning_rate": 5e-06, "loss": 0.5157, "num_input_tokens_seen": 345428964, "step": 5509 }, { "epoch": 18.33277870216306, "loss": 0.46038949489593506, "loss_ce": 1.303486442338908e-06, "loss_iou": 0.181640625, "loss_num": 0.01953125, "loss_xval": 0.4609375, "num_input_tokens_seen": 345428964, "step": 5509 }, { "epoch": 18.336106489184694, "grad_norm": 16.810596466064453, "learning_rate": 5e-06, "loss": 0.4315, "num_input_tokens_seen": 345489580, "step": 5510 }, { "epoch": 18.336106489184694, "loss": 0.462390273809433, "loss_ce": 0.00023209235223475844, "loss_iou": 0.169921875, "loss_num": 0.0244140625, "loss_xval": 0.462890625, "num_input_tokens_seen": 345489580, "step": 5510 }, { "epoch": 18.339434276206322, "grad_norm": 11.543886184692383, "learning_rate": 5e-06, "loss": 0.4122, "num_input_tokens_seen": 345553192, "step": 5511 }, { "epoch": 18.339434276206322, "loss": 0.33276665210723877, "loss_ce": 2.9518660085159354e-06, "loss_iou": 0.14453125, "loss_num": 0.00885009765625, "loss_xval": 0.33203125, "num_input_tokens_seen": 345553192, "step": 5511 }, { "epoch": 18.342762063227955, "grad_norm": 9.142887115478516, "learning_rate": 5e-06, "loss": 0.3285, "num_input_tokens_seen": 345614796, "step": 5512 }, { "epoch": 18.342762063227955, "loss": 0.2015305459499359, "loss_ce": 1.534561306471005e-05, "loss_iou": 0.07958984375, "loss_num": 0.00848388671875, "loss_xval": 0.201171875, "num_input_tokens_seen": 345614796, "step": 5512 }, { "epoch": 18.346089850249584, "grad_norm": 8.568805694580078, "learning_rate": 5e-06, "loss": 0.3443, "num_input_tokens_seen": 345678012, "step": 5513 }, { "epoch": 18.346089850249584, "loss": 0.24399013817310333, "loss_ce": 2.091670012305258e-06, "loss_iou": 0.0966796875, "loss_num": 0.01007080078125, "loss_xval": 0.244140625, "num_input_tokens_seen": 345678012, "step": 5513 }, { "epoch": 18.349417637271216, "grad_norm": 13.21142292022705, "learning_rate": 5e-06, "loss": 0.4904, "num_input_tokens_seen": 345741284, "step": 5514 }, { "epoch": 18.349417637271216, "loss": 0.43439817428588867, "loss_ce": 1.0965626643155701e-05, "loss_iou": 0.1591796875, "loss_num": 0.0235595703125, "loss_xval": 0.43359375, "num_input_tokens_seen": 345741284, "step": 5514 }, { "epoch": 18.352745424292845, "grad_norm": 7.597809314727783, "learning_rate": 5e-06, "loss": 0.3304, "num_input_tokens_seen": 345805060, "step": 5515 }, { "epoch": 18.352745424292845, "loss": 0.3986847400665283, "loss_ce": 3.1322108497988665e-06, "loss_iou": 0.1748046875, "loss_num": 0.00982666015625, "loss_xval": 0.3984375, "num_input_tokens_seen": 345805060, "step": 5515 }, { "epoch": 18.356073211314477, "grad_norm": 17.71733856201172, "learning_rate": 5e-06, "loss": 0.37, "num_input_tokens_seen": 345868124, "step": 5516 }, { "epoch": 18.356073211314477, "loss": 0.43420493602752686, "loss_ce": 8.294074973491661e-07, "loss_iou": 0.193359375, "loss_num": 0.0093994140625, "loss_xval": 0.43359375, "num_input_tokens_seen": 345868124, "step": 5516 }, { "epoch": 18.359400998336106, "grad_norm": 27.92255973815918, "learning_rate": 5e-06, "loss": 0.2776, "num_input_tokens_seen": 345929652, "step": 5517 }, { "epoch": 18.359400998336106, "loss": 0.3100917935371399, "loss_ce": 2.697492391234846e-06, "loss_iou": 0.130859375, "loss_num": 0.00946044921875, "loss_xval": 0.310546875, "num_input_tokens_seen": 345929652, "step": 5517 }, { "epoch": 18.36272878535774, "grad_norm": 30.181795120239258, "learning_rate": 5e-06, "loss": 0.3795, "num_input_tokens_seen": 345993168, "step": 5518 }, { "epoch": 18.36272878535774, "loss": 0.401428759098053, "loss_ce": 5.224245569479535e-07, "loss_iou": 0.16796875, "loss_num": 0.01324462890625, "loss_xval": 0.40234375, "num_input_tokens_seen": 345993168, "step": 5518 }, { "epoch": 18.366056572379367, "grad_norm": 24.42955207824707, "learning_rate": 5e-06, "loss": 0.3364, "num_input_tokens_seen": 346055132, "step": 5519 }, { "epoch": 18.366056572379367, "loss": 0.2799812853336334, "loss_ce": 1.595127173459332e-06, "loss_iou": 0.10986328125, "loss_num": 0.01202392578125, "loss_xval": 0.279296875, "num_input_tokens_seen": 346055132, "step": 5519 }, { "epoch": 18.369384359401, "grad_norm": 11.841567993164062, "learning_rate": 5e-06, "loss": 0.3093, "num_input_tokens_seen": 346117888, "step": 5520 }, { "epoch": 18.369384359401, "loss": 0.36430275440216064, "loss_ce": 0.0005332402070052922, "loss_iou": 0.16015625, "loss_num": 0.008544921875, "loss_xval": 0.36328125, "num_input_tokens_seen": 346117888, "step": 5520 }, { "epoch": 18.37271214642263, "grad_norm": 20.329172134399414, "learning_rate": 5e-06, "loss": 0.4471, "num_input_tokens_seen": 346181520, "step": 5521 }, { "epoch": 18.37271214642263, "loss": 0.5169693231582642, "loss_ce": 1.52543520925974e-06, "loss_iou": 0.1953125, "loss_num": 0.025146484375, "loss_xval": 0.515625, "num_input_tokens_seen": 346181520, "step": 5521 }, { "epoch": 18.37603993344426, "grad_norm": 27.423093795776367, "learning_rate": 5e-06, "loss": 0.4263, "num_input_tokens_seen": 346244200, "step": 5522 }, { "epoch": 18.37603993344426, "loss": 0.45849698781967163, "loss_ce": 9.217816341333673e-07, "loss_iou": 0.2080078125, "loss_num": 0.0086669921875, "loss_xval": 0.458984375, "num_input_tokens_seen": 346244200, "step": 5522 }, { "epoch": 18.37936772046589, "grad_norm": 13.957590103149414, "learning_rate": 5e-06, "loss": 0.2104, "num_input_tokens_seen": 346305704, "step": 5523 }, { "epoch": 18.37936772046589, "loss": 0.20104041695594788, "loss_ce": 2.114380731654819e-05, "loss_iou": 0.0634765625, "loss_num": 0.01483154296875, "loss_xval": 0.201171875, "num_input_tokens_seen": 346305704, "step": 5523 }, { "epoch": 18.382695507487522, "grad_norm": 11.03859806060791, "learning_rate": 5e-06, "loss": 0.3462, "num_input_tokens_seen": 346368716, "step": 5524 }, { "epoch": 18.382695507487522, "loss": 0.4783363342285156, "loss_ce": 3.7998306652298197e-06, "loss_iou": 0.2001953125, "loss_num": 0.0157470703125, "loss_xval": 0.478515625, "num_input_tokens_seen": 346368716, "step": 5524 }, { "epoch": 18.38602329450915, "grad_norm": 10.229231834411621, "learning_rate": 5e-06, "loss": 0.3107, "num_input_tokens_seen": 346431292, "step": 5525 }, { "epoch": 18.38602329450915, "loss": 0.3615732192993164, "loss_ce": 9.352305596621591e-07, "loss_iou": 0.1533203125, "loss_num": 0.0107421875, "loss_xval": 0.361328125, "num_input_tokens_seen": 346431292, "step": 5525 }, { "epoch": 18.389351081530783, "grad_norm": 17.94597625732422, "learning_rate": 5e-06, "loss": 0.409, "num_input_tokens_seen": 346493468, "step": 5526 }, { "epoch": 18.389351081530783, "loss": 0.4743676781654358, "loss_ce": 2.4166972707462264e-06, "loss_iou": 0.1796875, "loss_num": 0.0230712890625, "loss_xval": 0.474609375, "num_input_tokens_seen": 346493468, "step": 5526 }, { "epoch": 18.392678868552412, "grad_norm": 14.441884994506836, "learning_rate": 5e-06, "loss": 0.2789, "num_input_tokens_seen": 346556192, "step": 5527 }, { "epoch": 18.392678868552412, "loss": 0.20691634714603424, "loss_ce": 7.163409009081079e-06, "loss_iou": 0.0673828125, "loss_num": 0.01446533203125, "loss_xval": 0.20703125, "num_input_tokens_seen": 346556192, "step": 5527 }, { "epoch": 18.396006655574045, "grad_norm": 12.60000991821289, "learning_rate": 5e-06, "loss": 0.2525, "num_input_tokens_seen": 346618396, "step": 5528 }, { "epoch": 18.396006655574045, "loss": 0.16599300503730774, "loss_ce": 2.6395457553007873e-07, "loss_iou": 0.05712890625, "loss_num": 0.01043701171875, "loss_xval": 0.166015625, "num_input_tokens_seen": 346618396, "step": 5528 }, { "epoch": 18.399334442595674, "grad_norm": 7.361432075500488, "learning_rate": 5e-06, "loss": 0.3058, "num_input_tokens_seen": 346681616, "step": 5529 }, { "epoch": 18.399334442595674, "loss": 0.33061307668685913, "loss_ce": 8.687231911608251e-07, "loss_iou": 0.1376953125, "loss_num": 0.0108642578125, "loss_xval": 0.330078125, "num_input_tokens_seen": 346681616, "step": 5529 }, { "epoch": 18.402662229617306, "grad_norm": 13.962689399719238, "learning_rate": 5e-06, "loss": 0.3381, "num_input_tokens_seen": 346744768, "step": 5530 }, { "epoch": 18.402662229617306, "loss": 0.4394354820251465, "loss_ce": 1.2864636119047645e-05, "loss_iou": 0.16796875, "loss_num": 0.02099609375, "loss_xval": 0.439453125, "num_input_tokens_seen": 346744768, "step": 5530 }, { "epoch": 18.405990016638935, "grad_norm": 24.180009841918945, "learning_rate": 5e-06, "loss": 0.562, "num_input_tokens_seen": 346807280, "step": 5531 }, { "epoch": 18.405990016638935, "loss": 0.5550621747970581, "loss_ce": 8.421520760748535e-06, "loss_iou": 0.23828125, "loss_num": 0.0157470703125, "loss_xval": 0.5546875, "num_input_tokens_seen": 346807280, "step": 5531 }, { "epoch": 18.409317803660567, "grad_norm": 13.689428329467773, "learning_rate": 5e-06, "loss": 0.3581, "num_input_tokens_seen": 346870136, "step": 5532 }, { "epoch": 18.409317803660567, "loss": 0.4490976929664612, "loss_ce": 9.948801107384497e-07, "loss_iou": 0.1787109375, "loss_num": 0.0184326171875, "loss_xval": 0.44921875, "num_input_tokens_seen": 346870136, "step": 5532 }, { "epoch": 18.412645590682196, "grad_norm": 11.508358001708984, "learning_rate": 5e-06, "loss": 0.3354, "num_input_tokens_seen": 346931432, "step": 5533 }, { "epoch": 18.412645590682196, "loss": 0.34973374009132385, "loss_ce": 2.281664819747675e-06, "loss_iou": 0.06640625, "loss_num": 0.04345703125, "loss_xval": 0.349609375, "num_input_tokens_seen": 346931432, "step": 5533 }, { "epoch": 18.41597337770383, "grad_norm": 9.510260581970215, "learning_rate": 5e-06, "loss": 0.2742, "num_input_tokens_seen": 346993608, "step": 5534 }, { "epoch": 18.41597337770383, "loss": 0.2947089672088623, "loss_ce": 7.263397492351942e-07, "loss_iou": 0.09814453125, "loss_num": 0.0196533203125, "loss_xval": 0.294921875, "num_input_tokens_seen": 346993608, "step": 5534 }, { "epoch": 18.419301164725457, "grad_norm": 19.137643814086914, "learning_rate": 5e-06, "loss": 0.5516, "num_input_tokens_seen": 347056292, "step": 5535 }, { "epoch": 18.419301164725457, "loss": 0.4709489345550537, "loss_ce": 1.6883419675650657e-06, "loss_iou": 0.185546875, "loss_num": 0.0201416015625, "loss_xval": 0.470703125, "num_input_tokens_seen": 347056292, "step": 5535 }, { "epoch": 18.42262895174709, "grad_norm": 11.220930099487305, "learning_rate": 5e-06, "loss": 0.2945, "num_input_tokens_seen": 347117560, "step": 5536 }, { "epoch": 18.42262895174709, "loss": 0.22153542935848236, "loss_ce": 8.329976481036283e-06, "loss_iou": 0.05029296875, "loss_num": 0.024169921875, "loss_xval": 0.2216796875, "num_input_tokens_seen": 347117560, "step": 5536 }, { "epoch": 18.42595673876872, "grad_norm": 10.725313186645508, "learning_rate": 5e-06, "loss": 0.4958, "num_input_tokens_seen": 347181120, "step": 5537 }, { "epoch": 18.42595673876872, "loss": 0.4653027057647705, "loss_ce": 1.2220914413774153e-06, "loss_iou": 0.1943359375, "loss_num": 0.015380859375, "loss_xval": 0.46484375, "num_input_tokens_seen": 347181120, "step": 5537 }, { "epoch": 18.42928452579035, "grad_norm": 14.85632038116455, "learning_rate": 5e-06, "loss": 0.5296, "num_input_tokens_seen": 347245448, "step": 5538 }, { "epoch": 18.42928452579035, "loss": 0.4889456033706665, "loss_ce": 0.00023709710512775928, "loss_iou": 0.169921875, "loss_num": 0.0296630859375, "loss_xval": 0.48828125, "num_input_tokens_seen": 347245448, "step": 5538 }, { "epoch": 18.43261231281198, "grad_norm": 9.224674224853516, "learning_rate": 5e-06, "loss": 0.5708, "num_input_tokens_seen": 347309560, "step": 5539 }, { "epoch": 18.43261231281198, "loss": 0.7034207582473755, "loss_ce": 0.00017366238171234727, "loss_iou": 0.265625, "loss_num": 0.03466796875, "loss_xval": 0.703125, "num_input_tokens_seen": 347309560, "step": 5539 }, { "epoch": 18.435940099833612, "grad_norm": 12.9473237991333, "learning_rate": 5e-06, "loss": 0.3167, "num_input_tokens_seen": 347372268, "step": 5540 }, { "epoch": 18.435940099833612, "loss": 0.2930043637752533, "loss_ce": 5.096860604680842e-06, "loss_iou": 0.1240234375, "loss_num": 0.00897216796875, "loss_xval": 0.29296875, "num_input_tokens_seen": 347372268, "step": 5540 }, { "epoch": 18.43926788685524, "grad_norm": 21.378751754760742, "learning_rate": 5e-06, "loss": 0.4784, "num_input_tokens_seen": 347433872, "step": 5541 }, { "epoch": 18.43926788685524, "loss": 0.4580668807029724, "loss_ce": 2.8560600185301155e-05, "loss_iou": 0.1884765625, "loss_num": 0.0162353515625, "loss_xval": 0.458984375, "num_input_tokens_seen": 347433872, "step": 5541 }, { "epoch": 18.442595673876873, "grad_norm": 35.76236343383789, "learning_rate": 5e-06, "loss": 0.5678, "num_input_tokens_seen": 347495232, "step": 5542 }, { "epoch": 18.442595673876873, "loss": 0.3316214978694916, "loss_ce": 2.2285084924078546e-06, "loss_iou": 0.142578125, "loss_num": 0.00921630859375, "loss_xval": 0.33203125, "num_input_tokens_seen": 347495232, "step": 5542 }, { "epoch": 18.445923460898502, "grad_norm": 28.071657180786133, "learning_rate": 5e-06, "loss": 0.3241, "num_input_tokens_seen": 347557676, "step": 5543 }, { "epoch": 18.445923460898502, "loss": 0.481825053691864, "loss_ce": 4.40482072008308e-05, "loss_iou": 0.2021484375, "loss_num": 0.015625, "loss_xval": 0.482421875, "num_input_tokens_seen": 347557676, "step": 5543 }, { "epoch": 18.449251247920134, "grad_norm": 16.118114471435547, "learning_rate": 5e-06, "loss": 0.5274, "num_input_tokens_seen": 347621440, "step": 5544 }, { "epoch": 18.449251247920134, "loss": 0.41620421409606934, "loss_ce": 5.4711545089958236e-06, "loss_iou": 0.1337890625, "loss_num": 0.029541015625, "loss_xval": 0.416015625, "num_input_tokens_seen": 347621440, "step": 5544 }, { "epoch": 18.452579034941763, "grad_norm": 33.412620544433594, "learning_rate": 5e-06, "loss": 0.4258, "num_input_tokens_seen": 347683200, "step": 5545 }, { "epoch": 18.452579034941763, "loss": 0.6038645505905151, "loss_ce": 1.3219351785664912e-05, "loss_iou": 0.27734375, "loss_num": 0.009765625, "loss_xval": 0.60546875, "num_input_tokens_seen": 347683200, "step": 5545 }, { "epoch": 18.455906821963396, "grad_norm": 44.68722152709961, "learning_rate": 5e-06, "loss": 0.5221, "num_input_tokens_seen": 347746480, "step": 5546 }, { "epoch": 18.455906821963396, "loss": 0.4553771913051605, "loss_ce": 1.2986164620087948e-05, "loss_iou": 0.2109375, "loss_num": 0.00653076171875, "loss_xval": 0.455078125, "num_input_tokens_seen": 347746480, "step": 5546 }, { "epoch": 18.459234608985025, "grad_norm": 46.10934066772461, "learning_rate": 5e-06, "loss": 0.5083, "num_input_tokens_seen": 347808256, "step": 5547 }, { "epoch": 18.459234608985025, "loss": 0.5584118962287903, "loss_ce": 1.2518738685685094e-06, "loss_iou": 0.2119140625, "loss_num": 0.0267333984375, "loss_xval": 0.55859375, "num_input_tokens_seen": 347808256, "step": 5547 }, { "epoch": 18.462562396006657, "grad_norm": 31.117198944091797, "learning_rate": 5e-06, "loss": 0.4111, "num_input_tokens_seen": 347871032, "step": 5548 }, { "epoch": 18.462562396006657, "loss": 0.29767942428588867, "loss_ce": 2.622003921715077e-05, "loss_iou": 0.10791015625, "loss_num": 0.016357421875, "loss_xval": 0.296875, "num_input_tokens_seen": 347871032, "step": 5548 }, { "epoch": 18.465890183028286, "grad_norm": 16.99928092956543, "learning_rate": 5e-06, "loss": 0.7363, "num_input_tokens_seen": 347932592, "step": 5549 }, { "epoch": 18.465890183028286, "loss": 0.8063988089561462, "loss_ce": 2.3404859348374885e-06, "loss_iou": 0.33203125, "loss_num": 0.0281982421875, "loss_xval": 0.8046875, "num_input_tokens_seen": 347932592, "step": 5549 }, { "epoch": 18.469217970049918, "grad_norm": 26.0183048248291, "learning_rate": 5e-06, "loss": 0.4732, "num_input_tokens_seen": 347996088, "step": 5550 }, { "epoch": 18.469217970049918, "loss": 0.5692315697669983, "loss_ce": 1.7672493413556367e-05, "loss_iou": 0.2333984375, "loss_num": 0.0206298828125, "loss_xval": 0.5703125, "num_input_tokens_seen": 347996088, "step": 5550 }, { "epoch": 18.472545757071547, "grad_norm": 28.497478485107422, "learning_rate": 5e-06, "loss": 0.3316, "num_input_tokens_seen": 348058724, "step": 5551 }, { "epoch": 18.472545757071547, "loss": 0.3348410427570343, "loss_ce": 2.174403562094085e-06, "loss_iou": 0.1357421875, "loss_num": 0.0125732421875, "loss_xval": 0.333984375, "num_input_tokens_seen": 348058724, "step": 5551 }, { "epoch": 18.47587354409318, "grad_norm": 25.238393783569336, "learning_rate": 5e-06, "loss": 0.4294, "num_input_tokens_seen": 348121444, "step": 5552 }, { "epoch": 18.47587354409318, "loss": 0.5301772356033325, "loss_ce": 2.5847784854704514e-05, "loss_iou": 0.2314453125, "loss_num": 0.01373291015625, "loss_xval": 0.53125, "num_input_tokens_seen": 348121444, "step": 5552 }, { "epoch": 18.47920133111481, "grad_norm": 10.871220588684082, "learning_rate": 5e-06, "loss": 0.2973, "num_input_tokens_seen": 348184072, "step": 5553 }, { "epoch": 18.47920133111481, "loss": 0.1681523472070694, "loss_ce": 4.963198421137349e-07, "loss_iou": 0.07080078125, "loss_num": 0.00531005859375, "loss_xval": 0.16796875, "num_input_tokens_seen": 348184072, "step": 5553 }, { "epoch": 18.48252911813644, "grad_norm": 11.171881675720215, "learning_rate": 5e-06, "loss": 0.1815, "num_input_tokens_seen": 348247188, "step": 5554 }, { "epoch": 18.48252911813644, "loss": 0.1974518895149231, "loss_ce": 3.1582876545144245e-06, "loss_iou": 0.07080078125, "loss_num": 0.0111083984375, "loss_xval": 0.197265625, "num_input_tokens_seen": 348247188, "step": 5554 }, { "epoch": 18.48585690515807, "grad_norm": 10.720335960388184, "learning_rate": 5e-06, "loss": 0.3587, "num_input_tokens_seen": 348309256, "step": 5555 }, { "epoch": 18.48585690515807, "loss": 0.3837082087993622, "loss_ce": 1.0709051821322646e-05, "loss_iou": 0.1533203125, "loss_num": 0.015380859375, "loss_xval": 0.3828125, "num_input_tokens_seen": 348309256, "step": 5555 }, { "epoch": 18.489184692179702, "grad_norm": 15.325542449951172, "learning_rate": 5e-06, "loss": 0.3332, "num_input_tokens_seen": 348372088, "step": 5556 }, { "epoch": 18.489184692179702, "loss": 0.44667190313339233, "loss_ce": 1.3715552995563485e-06, "loss_iou": 0.2001953125, "loss_num": 0.0091552734375, "loss_xval": 0.447265625, "num_input_tokens_seen": 348372088, "step": 5556 }, { "epoch": 18.49251247920133, "grad_norm": 14.836922645568848, "learning_rate": 5e-06, "loss": 0.2242, "num_input_tokens_seen": 348434040, "step": 5557 }, { "epoch": 18.49251247920133, "loss": 0.23954901099205017, "loss_ce": 1.3015151125728153e-06, "loss_iou": 0.0712890625, "loss_num": 0.0194091796875, "loss_xval": 0.2392578125, "num_input_tokens_seen": 348434040, "step": 5557 }, { "epoch": 18.495840266222963, "grad_norm": 24.850936889648438, "learning_rate": 5e-06, "loss": 0.5025, "num_input_tokens_seen": 348496820, "step": 5558 }, { "epoch": 18.495840266222963, "loss": 0.43555572628974915, "loss_ce": 8.836368579068221e-06, "loss_iou": 0.1650390625, "loss_num": 0.0213623046875, "loss_xval": 0.435546875, "num_input_tokens_seen": 348496820, "step": 5558 }, { "epoch": 18.499168053244592, "grad_norm": 6.138052463531494, "learning_rate": 5e-06, "loss": 0.2643, "num_input_tokens_seen": 348558684, "step": 5559 }, { "epoch": 18.499168053244592, "loss": 0.4216102063655853, "loss_ce": 7.088651182129979e-05, "loss_iou": 0.177734375, "loss_num": 0.013427734375, "loss_xval": 0.421875, "num_input_tokens_seen": 348558684, "step": 5559 }, { "epoch": 18.502495840266224, "grad_norm": 15.44811725616455, "learning_rate": 5e-06, "loss": 0.4273, "num_input_tokens_seen": 348620840, "step": 5560 }, { "epoch": 18.502495840266224, "loss": 0.5039682388305664, "loss_ce": 9.285688520321855e-07, "loss_iou": 0.166015625, "loss_num": 0.034423828125, "loss_xval": 0.50390625, "num_input_tokens_seen": 348620840, "step": 5560 }, { "epoch": 18.505823627287853, "grad_norm": 20.572986602783203, "learning_rate": 5e-06, "loss": 0.2991, "num_input_tokens_seen": 348684100, "step": 5561 }, { "epoch": 18.505823627287853, "loss": 0.3750632107257843, "loss_ce": 2.180088586101192e-06, "loss_iou": 0.1591796875, "loss_num": 0.0113525390625, "loss_xval": 0.375, "num_input_tokens_seen": 348684100, "step": 5561 }, { "epoch": 18.509151414309486, "grad_norm": 17.84046173095703, "learning_rate": 5e-06, "loss": 0.4593, "num_input_tokens_seen": 348745028, "step": 5562 }, { "epoch": 18.509151414309486, "loss": 0.45439019799232483, "loss_ce": 2.921971827163361e-05, "loss_iou": 0.154296875, "loss_num": 0.029052734375, "loss_xval": 0.455078125, "num_input_tokens_seen": 348745028, "step": 5562 }, { "epoch": 18.512479201331114, "grad_norm": 13.433248519897461, "learning_rate": 5e-06, "loss": 0.3422, "num_input_tokens_seen": 348807472, "step": 5563 }, { "epoch": 18.512479201331114, "loss": 0.3449818193912506, "loss_ce": 7.214393554022536e-05, "loss_iou": 0.1162109375, "loss_num": 0.0224609375, "loss_xval": 0.345703125, "num_input_tokens_seen": 348807472, "step": 5563 }, { "epoch": 18.515806988352747, "grad_norm": 16.994911193847656, "learning_rate": 5e-06, "loss": 0.2597, "num_input_tokens_seen": 348869812, "step": 5564 }, { "epoch": 18.515806988352747, "loss": 0.3326728940010071, "loss_ce": 8.056933324951387e-07, "loss_iou": 0.138671875, "loss_num": 0.01104736328125, "loss_xval": 0.33203125, "num_input_tokens_seen": 348869812, "step": 5564 }, { "epoch": 18.519134775374376, "grad_norm": 15.691169738769531, "learning_rate": 5e-06, "loss": 0.3551, "num_input_tokens_seen": 348931580, "step": 5565 }, { "epoch": 18.519134775374376, "loss": 0.19986362755298615, "loss_ce": 4.021419044875074e-06, "loss_iou": 0.050537109375, "loss_num": 0.0198974609375, "loss_xval": 0.2001953125, "num_input_tokens_seen": 348931580, "step": 5565 }, { "epoch": 18.522462562396008, "grad_norm": 11.247476577758789, "learning_rate": 5e-06, "loss": 0.5838, "num_input_tokens_seen": 348996124, "step": 5566 }, { "epoch": 18.522462562396008, "loss": 0.5885138511657715, "loss_ce": 1.2878407687821891e-05, "loss_iou": 0.255859375, "loss_num": 0.01544189453125, "loss_xval": 0.58984375, "num_input_tokens_seen": 348996124, "step": 5566 }, { "epoch": 18.525790349417637, "grad_norm": 18.397886276245117, "learning_rate": 5e-06, "loss": 0.5481, "num_input_tokens_seen": 349060072, "step": 5567 }, { "epoch": 18.525790349417637, "loss": 0.5275163650512695, "loss_ce": 5.049658284406178e-05, "loss_iou": 0.205078125, "loss_num": 0.0234375, "loss_xval": 0.52734375, "num_input_tokens_seen": 349060072, "step": 5567 }, { "epoch": 18.52911813643927, "grad_norm": 23.270610809326172, "learning_rate": 5e-06, "loss": 0.4471, "num_input_tokens_seen": 349123168, "step": 5568 }, { "epoch": 18.52911813643927, "loss": 0.49240434169769287, "loss_ce": 3.177594862791011e-06, "loss_iou": 0.205078125, "loss_num": 0.0166015625, "loss_xval": 0.4921875, "num_input_tokens_seen": 349123168, "step": 5568 }, { "epoch": 18.532445923460898, "grad_norm": 13.78498363494873, "learning_rate": 5e-06, "loss": 0.4524, "num_input_tokens_seen": 349186788, "step": 5569 }, { "epoch": 18.532445923460898, "loss": 0.40601110458374023, "loss_ce": 5.2265418162278365e-06, "loss_iou": 0.1767578125, "loss_num": 0.0106201171875, "loss_xval": 0.40625, "num_input_tokens_seen": 349186788, "step": 5569 }, { "epoch": 18.53577371048253, "grad_norm": 10.13986873626709, "learning_rate": 5e-06, "loss": 0.4885, "num_input_tokens_seen": 349250448, "step": 5570 }, { "epoch": 18.53577371048253, "loss": 0.22659359872341156, "loss_ce": 5.828429152643366e-07, "loss_iou": 0.083984375, "loss_num": 0.01177978515625, "loss_xval": 0.2265625, "num_input_tokens_seen": 349250448, "step": 5570 }, { "epoch": 18.53910149750416, "grad_norm": 5.690506935119629, "learning_rate": 5e-06, "loss": 0.2159, "num_input_tokens_seen": 349310908, "step": 5571 }, { "epoch": 18.53910149750416, "loss": 0.14737384021282196, "loss_ce": 4.468105544219725e-06, "loss_iou": 0.047607421875, "loss_num": 0.0103759765625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 349310908, "step": 5571 }, { "epoch": 18.54242928452579, "grad_norm": 11.342318534851074, "learning_rate": 5e-06, "loss": 0.4268, "num_input_tokens_seen": 349374120, "step": 5572 }, { "epoch": 18.54242928452579, "loss": 0.4681861102581024, "loss_ce": 1.5927793356240727e-05, "loss_iou": 0.158203125, "loss_num": 0.0301513671875, "loss_xval": 0.46875, "num_input_tokens_seen": 349374120, "step": 5572 }, { "epoch": 18.54575707154742, "grad_norm": 9.444395065307617, "learning_rate": 5e-06, "loss": 0.4943, "num_input_tokens_seen": 349435232, "step": 5573 }, { "epoch": 18.54575707154742, "loss": 0.4381110668182373, "loss_ce": 7.087698463692504e-07, "loss_iou": 0.1865234375, "loss_num": 0.0128173828125, "loss_xval": 0.4375, "num_input_tokens_seen": 349435232, "step": 5573 }, { "epoch": 18.549084858569053, "grad_norm": 12.010346412658691, "learning_rate": 5e-06, "loss": 0.3124, "num_input_tokens_seen": 349496220, "step": 5574 }, { "epoch": 18.549084858569053, "loss": 0.2875996232032776, "loss_ce": 1.9511996924848063e-06, "loss_iou": 0.1044921875, "loss_num": 0.015625, "loss_xval": 0.287109375, "num_input_tokens_seen": 349496220, "step": 5574 }, { "epoch": 18.55241264559068, "grad_norm": 20.834653854370117, "learning_rate": 5e-06, "loss": 0.4191, "num_input_tokens_seen": 349558464, "step": 5575 }, { "epoch": 18.55241264559068, "loss": 0.42276453971862793, "loss_ce": 4.532247658062261e-06, "loss_iou": 0.1689453125, "loss_num": 0.0169677734375, "loss_xval": 0.421875, "num_input_tokens_seen": 349558464, "step": 5575 }, { "epoch": 18.555740432612314, "grad_norm": 7.650561809539795, "learning_rate": 5e-06, "loss": 0.2774, "num_input_tokens_seen": 349621856, "step": 5576 }, { "epoch": 18.555740432612314, "loss": 0.2674565613269806, "loss_ce": 4.825282644560502e-07, "loss_iou": 0.087890625, "loss_num": 0.0184326171875, "loss_xval": 0.267578125, "num_input_tokens_seen": 349621856, "step": 5576 }, { "epoch": 18.559068219633943, "grad_norm": 14.798502922058105, "learning_rate": 5e-06, "loss": 0.4927, "num_input_tokens_seen": 349685572, "step": 5577 }, { "epoch": 18.559068219633943, "loss": 0.5014656782150269, "loss_ce": 8.19028116438858e-07, "loss_iou": 0.197265625, "loss_num": 0.021240234375, "loss_xval": 0.5, "num_input_tokens_seen": 349685572, "step": 5577 }, { "epoch": 18.562396006655575, "grad_norm": 8.072050094604492, "learning_rate": 5e-06, "loss": 0.1635, "num_input_tokens_seen": 349747220, "step": 5578 }, { "epoch": 18.562396006655575, "loss": 0.14148294925689697, "loss_ce": 3.4511976991780102e-06, "loss_iou": 0.02978515625, "loss_num": 0.016357421875, "loss_xval": 0.1416015625, "num_input_tokens_seen": 349747220, "step": 5578 }, { "epoch": 18.565723793677204, "grad_norm": 11.108901023864746, "learning_rate": 5e-06, "loss": 0.5183, "num_input_tokens_seen": 349809844, "step": 5579 }, { "epoch": 18.565723793677204, "loss": 0.649659276008606, "loss_ce": 1.1162683222210035e-06, "loss_iou": 0.2431640625, "loss_num": 0.032470703125, "loss_xval": 0.6484375, "num_input_tokens_seen": 349809844, "step": 5579 }, { "epoch": 18.569051580698837, "grad_norm": 11.179746627807617, "learning_rate": 5e-06, "loss": 0.5776, "num_input_tokens_seen": 349872548, "step": 5580 }, { "epoch": 18.569051580698837, "loss": 0.47104018926620483, "loss_ce": 1.3626688541990006e-06, "loss_iou": 0.1689453125, "loss_num": 0.0264892578125, "loss_xval": 0.470703125, "num_input_tokens_seen": 349872548, "step": 5580 }, { "epoch": 18.572379367720465, "grad_norm": 8.340195655822754, "learning_rate": 5e-06, "loss": 0.3963, "num_input_tokens_seen": 349935192, "step": 5581 }, { "epoch": 18.572379367720465, "loss": 0.2894919812679291, "loss_ce": 2.2383571831596782e-06, "loss_iou": 0.12353515625, "loss_num": 0.00848388671875, "loss_xval": 0.2890625, "num_input_tokens_seen": 349935192, "step": 5581 }, { "epoch": 18.575707154742098, "grad_norm": 7.1681013107299805, "learning_rate": 5e-06, "loss": 0.329, "num_input_tokens_seen": 349997660, "step": 5582 }, { "epoch": 18.575707154742098, "loss": 0.31105536222457886, "loss_ce": 1.1238544175284915e-06, "loss_iou": 0.125, "loss_num": 0.01220703125, "loss_xval": 0.310546875, "num_input_tokens_seen": 349997660, "step": 5582 }, { "epoch": 18.579034941763727, "grad_norm": 7.913095474243164, "learning_rate": 5e-06, "loss": 0.3207, "num_input_tokens_seen": 350059904, "step": 5583 }, { "epoch": 18.579034941763727, "loss": 0.5158411264419556, "loss_ce": 3.3035033993655816e-05, "loss_iou": 0.1630859375, "loss_num": 0.0380859375, "loss_xval": 0.515625, "num_input_tokens_seen": 350059904, "step": 5583 }, { "epoch": 18.58236272878536, "grad_norm": 13.587084770202637, "learning_rate": 5e-06, "loss": 0.3624, "num_input_tokens_seen": 350122012, "step": 5584 }, { "epoch": 18.58236272878536, "loss": 0.3025527894496918, "loss_ce": 1.5008669151939102e-06, "loss_iou": 0.12158203125, "loss_num": 0.01190185546875, "loss_xval": 0.302734375, "num_input_tokens_seen": 350122012, "step": 5584 }, { "epoch": 18.585690515806988, "grad_norm": 18.821577072143555, "learning_rate": 5e-06, "loss": 0.6085, "num_input_tokens_seen": 350185108, "step": 5585 }, { "epoch": 18.585690515806988, "loss": 0.6613785028457642, "loss_ce": 1.5192140381259378e-06, "loss_iou": 0.2470703125, "loss_num": 0.03369140625, "loss_xval": 0.66015625, "num_input_tokens_seen": 350185108, "step": 5585 }, { "epoch": 18.58901830282862, "grad_norm": 15.707382202148438, "learning_rate": 5e-06, "loss": 0.3983, "num_input_tokens_seen": 350247380, "step": 5586 }, { "epoch": 18.58901830282862, "loss": 0.38879451155662537, "loss_ce": 5.956778750260128e-07, "loss_iou": 0.17578125, "loss_num": 0.007354736328125, "loss_xval": 0.388671875, "num_input_tokens_seen": 350247380, "step": 5586 }, { "epoch": 18.59234608985025, "grad_norm": 7.033620357513428, "learning_rate": 5e-06, "loss": 0.4624, "num_input_tokens_seen": 350311740, "step": 5587 }, { "epoch": 18.59234608985025, "loss": 0.6319189667701721, "loss_ce": 2.198468428105116e-05, "loss_iou": 0.251953125, "loss_num": 0.025390625, "loss_xval": 0.6328125, "num_input_tokens_seen": 350311740, "step": 5587 }, { "epoch": 18.59567387687188, "grad_norm": 10.885958671569824, "learning_rate": 5e-06, "loss": 0.4036, "num_input_tokens_seen": 350375500, "step": 5588 }, { "epoch": 18.59567387687188, "loss": 0.3501317799091339, "loss_ce": 3.6088920296606375e-06, "loss_iou": 0.14453125, "loss_num": 0.0123291015625, "loss_xval": 0.349609375, "num_input_tokens_seen": 350375500, "step": 5588 }, { "epoch": 18.59900166389351, "grad_norm": 27.671091079711914, "learning_rate": 5e-06, "loss": 0.3926, "num_input_tokens_seen": 350438428, "step": 5589 }, { "epoch": 18.59900166389351, "loss": 0.3843145966529846, "loss_ce": 0.000250883778790012, "loss_iou": 0.17578125, "loss_num": 0.006439208984375, "loss_xval": 0.384765625, "num_input_tokens_seen": 350438428, "step": 5589 }, { "epoch": 18.602329450915143, "grad_norm": 19.52801513671875, "learning_rate": 5e-06, "loss": 0.4702, "num_input_tokens_seen": 350501268, "step": 5590 }, { "epoch": 18.602329450915143, "loss": 0.6773688793182373, "loss_ce": 6.949703674763441e-07, "loss_iou": 0.26171875, "loss_num": 0.0308837890625, "loss_xval": 0.67578125, "num_input_tokens_seen": 350501268, "step": 5590 }, { "epoch": 18.60565723793677, "grad_norm": 22.647611618041992, "learning_rate": 5e-06, "loss": 0.4579, "num_input_tokens_seen": 350564388, "step": 5591 }, { "epoch": 18.60565723793677, "loss": 0.49109208583831787, "loss_ce": 3.2019538593885954e-06, "loss_iou": 0.1923828125, "loss_num": 0.0211181640625, "loss_xval": 0.490234375, "num_input_tokens_seen": 350564388, "step": 5591 }, { "epoch": 18.608985024958404, "grad_norm": 14.188813209533691, "learning_rate": 5e-06, "loss": 0.5198, "num_input_tokens_seen": 350628136, "step": 5592 }, { "epoch": 18.608985024958404, "loss": 0.3490002155303955, "loss_ce": 1.1905329984074342e-06, "loss_iou": 0.150390625, "loss_num": 0.009765625, "loss_xval": 0.349609375, "num_input_tokens_seen": 350628136, "step": 5592 }, { "epoch": 18.612312811980033, "grad_norm": 8.799603462219238, "learning_rate": 5e-06, "loss": 0.2303, "num_input_tokens_seen": 350691532, "step": 5593 }, { "epoch": 18.612312811980033, "loss": 0.19360819458961487, "loss_ce": 4.682283815782284e-06, "loss_iou": 0.0595703125, "loss_num": 0.01495361328125, "loss_xval": 0.193359375, "num_input_tokens_seen": 350691532, "step": 5593 }, { "epoch": 18.615640599001665, "grad_norm": 9.097368240356445, "learning_rate": 5e-06, "loss": 0.4342, "num_input_tokens_seen": 350754544, "step": 5594 }, { "epoch": 18.615640599001665, "loss": 0.5007075071334839, "loss_ce": 5.610660991806071e-06, "loss_iou": 0.1767578125, "loss_num": 0.0294189453125, "loss_xval": 0.5, "num_input_tokens_seen": 350754544, "step": 5594 }, { "epoch": 18.618968386023294, "grad_norm": 19.627962112426758, "learning_rate": 5e-06, "loss": 0.5023, "num_input_tokens_seen": 350817868, "step": 5595 }, { "epoch": 18.618968386023294, "loss": 0.4917067289352417, "loss_ce": 7.520266990468372e-06, "loss_iou": 0.1943359375, "loss_num": 0.020751953125, "loss_xval": 0.4921875, "num_input_tokens_seen": 350817868, "step": 5595 }, { "epoch": 18.622296173044926, "grad_norm": 32.742374420166016, "learning_rate": 5e-06, "loss": 0.6353, "num_input_tokens_seen": 350880920, "step": 5596 }, { "epoch": 18.622296173044926, "loss": 0.727784276008606, "loss_ce": 1.0783818424897618e-06, "loss_iou": 0.298828125, "loss_num": 0.02587890625, "loss_xval": 0.7265625, "num_input_tokens_seen": 350880920, "step": 5596 }, { "epoch": 18.625623960066555, "grad_norm": 38.80014419555664, "learning_rate": 5e-06, "loss": 0.449, "num_input_tokens_seen": 350944180, "step": 5597 }, { "epoch": 18.625623960066555, "loss": 0.4959874749183655, "loss_ce": 5.214116640672728e-07, "loss_iou": 0.171875, "loss_num": 0.030517578125, "loss_xval": 0.49609375, "num_input_tokens_seen": 350944180, "step": 5597 }, { "epoch": 18.628951747088188, "grad_norm": 9.599136352539062, "learning_rate": 5e-06, "loss": 0.3765, "num_input_tokens_seen": 351006472, "step": 5598 }, { "epoch": 18.628951747088188, "loss": 0.16049076616764069, "loss_ce": 2.9339620596147142e-05, "loss_iou": 0.041015625, "loss_num": 0.0157470703125, "loss_xval": 0.16015625, "num_input_tokens_seen": 351006472, "step": 5598 }, { "epoch": 18.632279534109816, "grad_norm": 22.01912498474121, "learning_rate": 5e-06, "loss": 0.3949, "num_input_tokens_seen": 351070280, "step": 5599 }, { "epoch": 18.632279534109816, "loss": 0.3109750747680664, "loss_ce": 8.57556915434543e-06, "loss_iou": 0.1220703125, "loss_num": 0.013427734375, "loss_xval": 0.310546875, "num_input_tokens_seen": 351070280, "step": 5599 }, { "epoch": 18.63560732113145, "grad_norm": 29.554096221923828, "learning_rate": 5e-06, "loss": 0.4781, "num_input_tokens_seen": 351134388, "step": 5600 }, { "epoch": 18.63560732113145, "loss": 0.46423691511154175, "loss_ce": 3.527105263856356e-06, "loss_iou": 0.1865234375, "loss_num": 0.0181884765625, "loss_xval": 0.46484375, "num_input_tokens_seen": 351134388, "step": 5600 }, { "epoch": 18.638935108153078, "grad_norm": 27.227832794189453, "learning_rate": 5e-06, "loss": 0.388, "num_input_tokens_seen": 351198416, "step": 5601 }, { "epoch": 18.638935108153078, "loss": 0.1947723776102066, "loss_ce": 9.210181815433316e-06, "loss_iou": 0.078125, "loss_num": 0.00775146484375, "loss_xval": 0.1943359375, "num_input_tokens_seen": 351198416, "step": 5601 }, { "epoch": 18.64226289517471, "grad_norm": 12.25460147857666, "learning_rate": 5e-06, "loss": 0.3544, "num_input_tokens_seen": 351260432, "step": 5602 }, { "epoch": 18.64226289517471, "loss": 0.4897712469100952, "loss_ce": 0.00020825771207455546, "loss_iou": 0.1845703125, "loss_num": 0.02392578125, "loss_xval": 0.490234375, "num_input_tokens_seen": 351260432, "step": 5602 }, { "epoch": 18.64559068219634, "grad_norm": 14.44179630279541, "learning_rate": 5e-06, "loss": 0.3454, "num_input_tokens_seen": 351323580, "step": 5603 }, { "epoch": 18.64559068219634, "loss": 0.4051832854747772, "loss_ce": 3.1896801374386996e-05, "loss_iou": 0.1708984375, "loss_num": 0.01275634765625, "loss_xval": 0.404296875, "num_input_tokens_seen": 351323580, "step": 5603 }, { "epoch": 18.64891846921797, "grad_norm": 12.444397926330566, "learning_rate": 5e-06, "loss": 0.3079, "num_input_tokens_seen": 351386308, "step": 5604 }, { "epoch": 18.64891846921797, "loss": 0.24631407856941223, "loss_ce": 6.705575742671499e-06, "loss_iou": 0.10400390625, "loss_num": 0.0078125, "loss_xval": 0.24609375, "num_input_tokens_seen": 351386308, "step": 5604 }, { "epoch": 18.6522462562396, "grad_norm": 12.556640625, "learning_rate": 5e-06, "loss": 0.427, "num_input_tokens_seen": 351450256, "step": 5605 }, { "epoch": 18.6522462562396, "loss": 0.29419028759002686, "loss_ce": 8.581557722209254e-07, "loss_iou": 0.10791015625, "loss_num": 0.0157470703125, "loss_xval": 0.294921875, "num_input_tokens_seen": 351450256, "step": 5605 }, { "epoch": 18.655574043261232, "grad_norm": 17.249414443969727, "learning_rate": 5e-06, "loss": 0.3924, "num_input_tokens_seen": 351513316, "step": 5606 }, { "epoch": 18.655574043261232, "loss": 0.49891990423202515, "loss_ce": 1.850280023063533e-05, "loss_iou": 0.2001953125, "loss_num": 0.0196533203125, "loss_xval": 0.498046875, "num_input_tokens_seen": 351513316, "step": 5606 }, { "epoch": 18.65890183028286, "grad_norm": 18.44329071044922, "learning_rate": 5e-06, "loss": 0.603, "num_input_tokens_seen": 351576944, "step": 5607 }, { "epoch": 18.65890183028286, "loss": 0.5684289932250977, "loss_ce": 8.572353181079961e-06, "loss_iou": 0.2294921875, "loss_num": 0.0218505859375, "loss_xval": 0.5703125, "num_input_tokens_seen": 351576944, "step": 5607 }, { "epoch": 18.662229617304494, "grad_norm": 23.525251388549805, "learning_rate": 5e-06, "loss": 0.5742, "num_input_tokens_seen": 351639492, "step": 5608 }, { "epoch": 18.662229617304494, "loss": 0.591702938079834, "loss_ce": 2.810281512211077e-05, "loss_iou": 0.2412109375, "loss_num": 0.021728515625, "loss_xval": 0.58984375, "num_input_tokens_seen": 351639492, "step": 5608 }, { "epoch": 18.665557404326123, "grad_norm": 21.073209762573242, "learning_rate": 5e-06, "loss": 0.3811, "num_input_tokens_seen": 351701848, "step": 5609 }, { "epoch": 18.665557404326123, "loss": 0.3351132869720459, "loss_ce": 3.0278069971245714e-05, "loss_iou": 0.1279296875, "loss_num": 0.015869140625, "loss_xval": 0.3359375, "num_input_tokens_seen": 351701848, "step": 5609 }, { "epoch": 18.668885191347755, "grad_norm": 26.454971313476562, "learning_rate": 5e-06, "loss": 0.3574, "num_input_tokens_seen": 351763768, "step": 5610 }, { "epoch": 18.668885191347755, "loss": 0.4842223525047302, "loss_ce": 6.098128869780339e-05, "loss_iou": 0.205078125, "loss_num": 0.0147705078125, "loss_xval": 0.484375, "num_input_tokens_seen": 351763768, "step": 5610 }, { "epoch": 18.672212978369384, "grad_norm": 32.06330871582031, "learning_rate": 5e-06, "loss": 0.4798, "num_input_tokens_seen": 351826228, "step": 5611 }, { "epoch": 18.672212978369384, "loss": 0.4647566080093384, "loss_ce": 4.412595899339067e-06, "loss_iou": 0.193359375, "loss_num": 0.01556396484375, "loss_xval": 0.46484375, "num_input_tokens_seen": 351826228, "step": 5611 }, { "epoch": 18.675540765391016, "grad_norm": 22.578079223632812, "learning_rate": 5e-06, "loss": 0.4559, "num_input_tokens_seen": 351889184, "step": 5612 }, { "epoch": 18.675540765391016, "loss": 0.3353290855884552, "loss_ce": 1.9333733689563815e-06, "loss_iou": 0.1337890625, "loss_num": 0.0137939453125, "loss_xval": 0.3359375, "num_input_tokens_seen": 351889184, "step": 5612 }, { "epoch": 18.678868552412645, "grad_norm": 6.046185493469238, "learning_rate": 5e-06, "loss": 0.345, "num_input_tokens_seen": 351953360, "step": 5613 }, { "epoch": 18.678868552412645, "loss": 0.260690838098526, "loss_ce": 9.686642442829907e-06, "loss_iou": 0.10595703125, "loss_num": 0.009765625, "loss_xval": 0.259765625, "num_input_tokens_seen": 351953360, "step": 5613 }, { "epoch": 18.682196339434277, "grad_norm": 7.825525283813477, "learning_rate": 5e-06, "loss": 0.4407, "num_input_tokens_seen": 352016760, "step": 5614 }, { "epoch": 18.682196339434277, "loss": 0.38446104526519775, "loss_ce": 5.876972863916308e-07, "loss_iou": 0.14453125, "loss_num": 0.0189208984375, "loss_xval": 0.384765625, "num_input_tokens_seen": 352016760, "step": 5614 }, { "epoch": 18.685524126455906, "grad_norm": 14.480791091918945, "learning_rate": 5e-06, "loss": 0.2965, "num_input_tokens_seen": 352079932, "step": 5615 }, { "epoch": 18.685524126455906, "loss": 0.302737295627594, "loss_ce": 2.9000375434407033e-06, "loss_iou": 0.119140625, "loss_num": 0.012939453125, "loss_xval": 0.302734375, "num_input_tokens_seen": 352079932, "step": 5615 }, { "epoch": 18.68885191347754, "grad_norm": 18.62698745727539, "learning_rate": 5e-06, "loss": 0.3269, "num_input_tokens_seen": 352142656, "step": 5616 }, { "epoch": 18.68885191347754, "loss": 0.22281013429164886, "loss_ce": 1.2898976819997188e-06, "loss_iou": 0.0791015625, "loss_num": 0.01287841796875, "loss_xval": 0.22265625, "num_input_tokens_seen": 352142656, "step": 5616 }, { "epoch": 18.692179700499167, "grad_norm": 27.154420852661133, "learning_rate": 5e-06, "loss": 0.4417, "num_input_tokens_seen": 352205808, "step": 5617 }, { "epoch": 18.692179700499167, "loss": 0.2591778039932251, "loss_ce": 2.2506115783471614e-05, "loss_iou": 0.0888671875, "loss_num": 0.0162353515625, "loss_xval": 0.259765625, "num_input_tokens_seen": 352205808, "step": 5617 }, { "epoch": 18.6955074875208, "grad_norm": 26.93094825744629, "learning_rate": 5e-06, "loss": 0.5039, "num_input_tokens_seen": 352268680, "step": 5618 }, { "epoch": 18.6955074875208, "loss": 0.3624342083930969, "loss_ce": 7.436040505126584e-06, "loss_iou": 0.15625, "loss_num": 0.00994873046875, "loss_xval": 0.36328125, "num_input_tokens_seen": 352268680, "step": 5618 }, { "epoch": 18.69883527454243, "grad_norm": 39.95256423950195, "learning_rate": 5e-06, "loss": 0.5137, "num_input_tokens_seen": 352332092, "step": 5619 }, { "epoch": 18.69883527454243, "loss": 0.5775212049484253, "loss_ce": 6.5581170929363e-06, "loss_iou": 0.259765625, "loss_num": 0.011962890625, "loss_xval": 0.578125, "num_input_tokens_seen": 352332092, "step": 5619 }, { "epoch": 18.70216306156406, "grad_norm": 24.8909969329834, "learning_rate": 5e-06, "loss": 0.4097, "num_input_tokens_seen": 352392856, "step": 5620 }, { "epoch": 18.70216306156406, "loss": 0.626725435256958, "loss_ce": 1.6464160580653697e-05, "loss_iou": 0.2578125, "loss_num": 0.0224609375, "loss_xval": 0.625, "num_input_tokens_seen": 352392856, "step": 5620 }, { "epoch": 18.70549084858569, "grad_norm": 14.03065013885498, "learning_rate": 5e-06, "loss": 0.467, "num_input_tokens_seen": 352456140, "step": 5621 }, { "epoch": 18.70549084858569, "loss": 0.5902581214904785, "loss_ce": 4.816760338144377e-05, "loss_iou": 0.26953125, "loss_num": 0.010498046875, "loss_xval": 0.58984375, "num_input_tokens_seen": 352456140, "step": 5621 }, { "epoch": 18.708818635607322, "grad_norm": 11.258647918701172, "learning_rate": 5e-06, "loss": 0.4455, "num_input_tokens_seen": 352518812, "step": 5622 }, { "epoch": 18.708818635607322, "loss": 0.28387027978897095, "loss_ce": 3.3906726457644254e-05, "loss_iou": 0.10107421875, "loss_num": 0.016357421875, "loss_xval": 0.283203125, "num_input_tokens_seen": 352518812, "step": 5622 }, { "epoch": 18.71214642262895, "grad_norm": 22.929365158081055, "learning_rate": 5e-06, "loss": 0.3992, "num_input_tokens_seen": 352582888, "step": 5623 }, { "epoch": 18.71214642262895, "loss": 0.3255631923675537, "loss_ce": 1.6611345472483663e-06, "loss_iou": 0.138671875, "loss_num": 0.00970458984375, "loss_xval": 0.326171875, "num_input_tokens_seen": 352582888, "step": 5623 }, { "epoch": 18.715474209650584, "grad_norm": 20.790664672851562, "learning_rate": 5e-06, "loss": 0.5544, "num_input_tokens_seen": 352646012, "step": 5624 }, { "epoch": 18.715474209650584, "loss": 0.5028705596923828, "loss_ce": 1.8839596123143565e-06, "loss_iou": 0.193359375, "loss_num": 0.0230712890625, "loss_xval": 0.50390625, "num_input_tokens_seen": 352646012, "step": 5624 }, { "epoch": 18.718801996672212, "grad_norm": 12.21467113494873, "learning_rate": 5e-06, "loss": 0.4511, "num_input_tokens_seen": 352710208, "step": 5625 }, { "epoch": 18.718801996672212, "loss": 0.4123923182487488, "loss_ce": 0.0008932743803597987, "loss_iou": 0.1845703125, "loss_num": 0.00860595703125, "loss_xval": 0.412109375, "num_input_tokens_seen": 352710208, "step": 5625 }, { "epoch": 18.722129783693845, "grad_norm": 10.056465148925781, "learning_rate": 5e-06, "loss": 0.3392, "num_input_tokens_seen": 352772480, "step": 5626 }, { "epoch": 18.722129783693845, "loss": 0.3786337971687317, "loss_ce": 2.1929386093688663e-06, "loss_iou": 0.1416015625, "loss_num": 0.0191650390625, "loss_xval": 0.37890625, "num_input_tokens_seen": 352772480, "step": 5626 }, { "epoch": 18.725457570715474, "grad_norm": 18.605592727661133, "learning_rate": 5e-06, "loss": 0.3867, "num_input_tokens_seen": 352835164, "step": 5627 }, { "epoch": 18.725457570715474, "loss": 0.3092097043991089, "loss_ce": 5.624004188575782e-06, "loss_iou": 0.0986328125, "loss_num": 0.0224609375, "loss_xval": 0.30859375, "num_input_tokens_seen": 352835164, "step": 5627 }, { "epoch": 18.728785357737106, "grad_norm": 6.2469635009765625, "learning_rate": 5e-06, "loss": 0.5335, "num_input_tokens_seen": 352897124, "step": 5628 }, { "epoch": 18.728785357737106, "loss": 0.5525235533714294, "loss_ce": 2.8073561679775594e-06, "loss_iou": 0.1728515625, "loss_num": 0.041259765625, "loss_xval": 0.55078125, "num_input_tokens_seen": 352897124, "step": 5628 }, { "epoch": 18.732113144758735, "grad_norm": 23.758710861206055, "learning_rate": 5e-06, "loss": 0.4959, "num_input_tokens_seen": 352960572, "step": 5629 }, { "epoch": 18.732113144758735, "loss": 0.3155803084373474, "loss_ce": 4.3819101847475395e-05, "loss_iou": 0.1103515625, "loss_num": 0.01904296875, "loss_xval": 0.31640625, "num_input_tokens_seen": 352960572, "step": 5629 }, { "epoch": 18.735440931780367, "grad_norm": 42.27604675292969, "learning_rate": 5e-06, "loss": 0.44, "num_input_tokens_seen": 353024036, "step": 5630 }, { "epoch": 18.735440931780367, "loss": 0.3967449963092804, "loss_ce": 1.648607212700881e-05, "loss_iou": 0.1640625, "loss_num": 0.01373291015625, "loss_xval": 0.396484375, "num_input_tokens_seen": 353024036, "step": 5630 }, { "epoch": 18.738768718801996, "grad_norm": 27.101104736328125, "learning_rate": 5e-06, "loss": 0.4565, "num_input_tokens_seen": 353087236, "step": 5631 }, { "epoch": 18.738768718801996, "loss": 0.36498329043388367, "loss_ce": 8.324248483404517e-06, "loss_iou": 0.1591796875, "loss_num": 0.00927734375, "loss_xval": 0.365234375, "num_input_tokens_seen": 353087236, "step": 5631 }, { "epoch": 18.74209650582363, "grad_norm": 8.08700180053711, "learning_rate": 5e-06, "loss": 0.4106, "num_input_tokens_seen": 353151212, "step": 5632 }, { "epoch": 18.74209650582363, "loss": 0.4566341042518616, "loss_ce": 3.0084385798545554e-05, "loss_iou": 0.16796875, "loss_num": 0.0240478515625, "loss_xval": 0.45703125, "num_input_tokens_seen": 353151212, "step": 5632 }, { "epoch": 18.745424292845257, "grad_norm": 16.01865577697754, "learning_rate": 5e-06, "loss": 0.3664, "num_input_tokens_seen": 353211924, "step": 5633 }, { "epoch": 18.745424292845257, "loss": 0.2836177349090576, "loss_ce": 2.6175703169428743e-06, "loss_iou": 0.1240234375, "loss_num": 0.007049560546875, "loss_xval": 0.283203125, "num_input_tokens_seen": 353211924, "step": 5633 }, { "epoch": 18.74875207986689, "grad_norm": 25.08660316467285, "learning_rate": 5e-06, "loss": 0.4796, "num_input_tokens_seen": 353275556, "step": 5634 }, { "epoch": 18.74875207986689, "loss": 0.46079182624816895, "loss_ce": 6.90844353812281e-06, "loss_iou": 0.1826171875, "loss_num": 0.01904296875, "loss_xval": 0.4609375, "num_input_tokens_seen": 353275556, "step": 5634 }, { "epoch": 18.75207986688852, "grad_norm": 18.30170440673828, "learning_rate": 5e-06, "loss": 0.4487, "num_input_tokens_seen": 353338372, "step": 5635 }, { "epoch": 18.75207986688852, "loss": 0.21750026941299438, "loss_ce": 1.4916488453309285e-06, "loss_iou": 0.09033203125, "loss_num": 0.007354736328125, "loss_xval": 0.2177734375, "num_input_tokens_seen": 353338372, "step": 5635 }, { "epoch": 18.75540765391015, "grad_norm": 10.956268310546875, "learning_rate": 5e-06, "loss": 0.512, "num_input_tokens_seen": 353400244, "step": 5636 }, { "epoch": 18.75540765391015, "loss": 0.7302545309066772, "loss_ce": 0.0003961229231208563, "loss_iou": 0.25390625, "loss_num": 0.0439453125, "loss_xval": 0.73046875, "num_input_tokens_seen": 353400244, "step": 5636 }, { "epoch": 18.75873544093178, "grad_norm": 9.707232475280762, "learning_rate": 5e-06, "loss": 0.5158, "num_input_tokens_seen": 353463324, "step": 5637 }, { "epoch": 18.75873544093178, "loss": 0.5772751569747925, "loss_ce": 4.6953487071732525e-06, "loss_iou": 0.2314453125, "loss_num": 0.0225830078125, "loss_xval": 0.578125, "num_input_tokens_seen": 353463324, "step": 5637 }, { "epoch": 18.762063227953412, "grad_norm": 12.191112518310547, "learning_rate": 5e-06, "loss": 0.3103, "num_input_tokens_seen": 353525052, "step": 5638 }, { "epoch": 18.762063227953412, "loss": 0.2514813542366028, "loss_ce": 1.24222356134851e-06, "loss_iou": 0.1064453125, "loss_num": 0.007659912109375, "loss_xval": 0.251953125, "num_input_tokens_seen": 353525052, "step": 5638 }, { "epoch": 18.76539101497504, "grad_norm": 12.408790588378906, "learning_rate": 5e-06, "loss": 0.4101, "num_input_tokens_seen": 353588940, "step": 5639 }, { "epoch": 18.76539101497504, "loss": 0.5841420888900757, "loss_ce": 5.138163032825105e-06, "loss_iou": 0.232421875, "loss_num": 0.02392578125, "loss_xval": 0.5859375, "num_input_tokens_seen": 353588940, "step": 5639 }, { "epoch": 18.768718801996673, "grad_norm": 11.502753257751465, "learning_rate": 5e-06, "loss": 0.4636, "num_input_tokens_seen": 353649288, "step": 5640 }, { "epoch": 18.768718801996673, "loss": 0.3513220250606537, "loss_ce": 3.417109837755561e-05, "loss_iou": 0.142578125, "loss_num": 0.013427734375, "loss_xval": 0.3515625, "num_input_tokens_seen": 353649288, "step": 5640 }, { "epoch": 18.772046589018302, "grad_norm": 8.249232292175293, "learning_rate": 5e-06, "loss": 0.3664, "num_input_tokens_seen": 353712932, "step": 5641 }, { "epoch": 18.772046589018302, "loss": 0.33998894691467285, "loss_ce": 2.3147194951889105e-05, "loss_iou": 0.1337890625, "loss_num": 0.01434326171875, "loss_xval": 0.33984375, "num_input_tokens_seen": 353712932, "step": 5641 }, { "epoch": 18.775374376039935, "grad_norm": 7.84395170211792, "learning_rate": 5e-06, "loss": 0.3049, "num_input_tokens_seen": 353774304, "step": 5642 }, { "epoch": 18.775374376039935, "loss": 0.18353413045406342, "loss_ce": 1.4204041463017347e-06, "loss_iou": 0.0673828125, "loss_num": 0.009765625, "loss_xval": 0.18359375, "num_input_tokens_seen": 353774304, "step": 5642 }, { "epoch": 18.778702163061563, "grad_norm": 6.9226765632629395, "learning_rate": 5e-06, "loss": 0.3394, "num_input_tokens_seen": 353837096, "step": 5643 }, { "epoch": 18.778702163061563, "loss": 0.1627562940120697, "loss_ce": 0.000128116924315691, "loss_iou": 0.037109375, "loss_num": 0.0177001953125, "loss_xval": 0.1630859375, "num_input_tokens_seen": 353837096, "step": 5643 }, { "epoch": 18.782029950083196, "grad_norm": 15.288829803466797, "learning_rate": 5e-06, "loss": 0.4234, "num_input_tokens_seen": 353900760, "step": 5644 }, { "epoch": 18.782029950083196, "loss": 0.573645293712616, "loss_ce": 6.742383993696421e-05, "loss_iou": 0.228515625, "loss_num": 0.0235595703125, "loss_xval": 0.57421875, "num_input_tokens_seen": 353900760, "step": 5644 }, { "epoch": 18.785357737104825, "grad_norm": 21.362300872802734, "learning_rate": 5e-06, "loss": 0.624, "num_input_tokens_seen": 353964748, "step": 5645 }, { "epoch": 18.785357737104825, "loss": 0.7550060749053955, "loss_ce": 1.1792651548603317e-06, "loss_iou": 0.28515625, "loss_num": 0.037109375, "loss_xval": 0.75390625, "num_input_tokens_seen": 353964748, "step": 5645 }, { "epoch": 18.788685524126457, "grad_norm": 16.276596069335938, "learning_rate": 5e-06, "loss": 0.3684, "num_input_tokens_seen": 354027476, "step": 5646 }, { "epoch": 18.788685524126457, "loss": 0.4579707384109497, "loss_ce": 8.498592069372535e-05, "loss_iou": 0.177734375, "loss_num": 0.0206298828125, "loss_xval": 0.45703125, "num_input_tokens_seen": 354027476, "step": 5646 }, { "epoch": 18.792013311148086, "grad_norm": 13.469297409057617, "learning_rate": 5e-06, "loss": 0.4623, "num_input_tokens_seen": 354090640, "step": 5647 }, { "epoch": 18.792013311148086, "loss": 0.6670548319816589, "loss_ce": 1.6127440858326736e-06, "loss_iou": 0.255859375, "loss_num": 0.0311279296875, "loss_xval": 0.66796875, "num_input_tokens_seen": 354090640, "step": 5647 }, { "epoch": 18.795341098169718, "grad_norm": 10.03350830078125, "learning_rate": 5e-06, "loss": 0.4193, "num_input_tokens_seen": 354154416, "step": 5648 }, { "epoch": 18.795341098169718, "loss": 0.3852551579475403, "loss_ce": 1.2811258329747943e-06, "loss_iou": 0.158203125, "loss_num": 0.01373291015625, "loss_xval": 0.384765625, "num_input_tokens_seen": 354154416, "step": 5648 }, { "epoch": 18.798668885191347, "grad_norm": 20.59154510498047, "learning_rate": 5e-06, "loss": 0.4401, "num_input_tokens_seen": 354218748, "step": 5649 }, { "epoch": 18.798668885191347, "loss": 0.5470030307769775, "loss_ce": 5.960474027233431e-06, "loss_iou": 0.2138671875, "loss_num": 0.023681640625, "loss_xval": 0.546875, "num_input_tokens_seen": 354218748, "step": 5649 }, { "epoch": 18.80199667221298, "grad_norm": 42.51165008544922, "learning_rate": 5e-06, "loss": 0.5154, "num_input_tokens_seen": 354278644, "step": 5650 }, { "epoch": 18.80199667221298, "loss": 0.37860167026519775, "loss_ce": 5.885693781237933e-07, "loss_iou": 0.1591796875, "loss_num": 0.0120849609375, "loss_xval": 0.37890625, "num_input_tokens_seen": 354278644, "step": 5650 }, { "epoch": 18.80532445923461, "grad_norm": 41.08174514770508, "learning_rate": 5e-06, "loss": 0.3907, "num_input_tokens_seen": 354342004, "step": 5651 }, { "epoch": 18.80532445923461, "loss": 0.5342715978622437, "loss_ce": 3.090496829827316e-05, "loss_iou": 0.2255859375, "loss_num": 0.0166015625, "loss_xval": 0.53515625, "num_input_tokens_seen": 354342004, "step": 5651 }, { "epoch": 18.80865224625624, "grad_norm": 26.635726928710938, "learning_rate": 5e-06, "loss": 0.5394, "num_input_tokens_seen": 354405572, "step": 5652 }, { "epoch": 18.80865224625624, "loss": 0.6073004007339478, "loss_ce": 5.740704409618047e-07, "loss_iou": 0.240234375, "loss_num": 0.025146484375, "loss_xval": 0.60546875, "num_input_tokens_seen": 354405572, "step": 5652 }, { "epoch": 18.81198003327787, "grad_norm": 34.406681060791016, "learning_rate": 5e-06, "loss": 0.4765, "num_input_tokens_seen": 354468448, "step": 5653 }, { "epoch": 18.81198003327787, "loss": 0.7537856101989746, "loss_ce": 1.3946341823611874e-06, "loss_iou": 0.328125, "loss_num": 0.019775390625, "loss_xval": 0.75390625, "num_input_tokens_seen": 354468448, "step": 5653 }, { "epoch": 18.815307820299502, "grad_norm": 34.6660270690918, "learning_rate": 5e-06, "loss": 0.5103, "num_input_tokens_seen": 354530788, "step": 5654 }, { "epoch": 18.815307820299502, "loss": 0.5197768211364746, "loss_ce": 1.4501486020890297e-06, "loss_iou": 0.205078125, "loss_num": 0.022216796875, "loss_xval": 0.51953125, "num_input_tokens_seen": 354530788, "step": 5654 }, { "epoch": 18.81863560732113, "grad_norm": 24.08639907836914, "learning_rate": 5e-06, "loss": 0.2593, "num_input_tokens_seen": 354592980, "step": 5655 }, { "epoch": 18.81863560732113, "loss": 0.2869068384170532, "loss_ce": 3.4676465929806e-06, "loss_iou": 0.1015625, "loss_num": 0.0167236328125, "loss_xval": 0.287109375, "num_input_tokens_seen": 354592980, "step": 5655 }, { "epoch": 18.821963394342763, "grad_norm": 25.6777400970459, "learning_rate": 5e-06, "loss": 0.5015, "num_input_tokens_seen": 354657452, "step": 5656 }, { "epoch": 18.821963394342763, "loss": 0.4650905430316925, "loss_ce": 2.6504355901124654e-06, "loss_iou": 0.185546875, "loss_num": 0.0189208984375, "loss_xval": 0.46484375, "num_input_tokens_seen": 354657452, "step": 5656 }, { "epoch": 18.825291181364392, "grad_norm": 33.1021842956543, "learning_rate": 5e-06, "loss": 0.4649, "num_input_tokens_seen": 354719296, "step": 5657 }, { "epoch": 18.825291181364392, "loss": 0.3153727650642395, "loss_ce": 4.092369636055082e-06, "loss_iou": 0.11474609375, "loss_num": 0.0172119140625, "loss_xval": 0.314453125, "num_input_tokens_seen": 354719296, "step": 5657 }, { "epoch": 18.828618968386024, "grad_norm": 43.49013900756836, "learning_rate": 5e-06, "loss": 0.3743, "num_input_tokens_seen": 354781976, "step": 5658 }, { "epoch": 18.828618968386024, "loss": 0.4322526752948761, "loss_ce": 1.722206434351392e-06, "loss_iou": 0.181640625, "loss_num": 0.0140380859375, "loss_xval": 0.431640625, "num_input_tokens_seen": 354781976, "step": 5658 }, { "epoch": 18.831946755407653, "grad_norm": 39.817588806152344, "learning_rate": 5e-06, "loss": 0.4045, "num_input_tokens_seen": 354844704, "step": 5659 }, { "epoch": 18.831946755407653, "loss": 0.3977794349193573, "loss_ce": 1.3330015462997835e-05, "loss_iou": 0.16015625, "loss_num": 0.015625, "loss_xval": 0.3984375, "num_input_tokens_seen": 354844704, "step": 5659 }, { "epoch": 18.835274542429286, "grad_norm": 20.517011642456055, "learning_rate": 5e-06, "loss": 0.3768, "num_input_tokens_seen": 354907812, "step": 5660 }, { "epoch": 18.835274542429286, "loss": 0.31662195920944214, "loss_ce": 2.0745283109135926e-06, "loss_iou": 0.1279296875, "loss_num": 0.0120849609375, "loss_xval": 0.31640625, "num_input_tokens_seen": 354907812, "step": 5660 }, { "epoch": 18.838602329450914, "grad_norm": 10.804510116577148, "learning_rate": 5e-06, "loss": 0.3339, "num_input_tokens_seen": 354969964, "step": 5661 }, { "epoch": 18.838602329450914, "loss": 0.33587783575057983, "loss_ce": 1.3933474747318542e-06, "loss_iou": 0.12890625, "loss_num": 0.0155029296875, "loss_xval": 0.3359375, "num_input_tokens_seen": 354969964, "step": 5661 }, { "epoch": 18.841930116472547, "grad_norm": 6.670055866241455, "learning_rate": 5e-06, "loss": 0.342, "num_input_tokens_seen": 355032056, "step": 5662 }, { "epoch": 18.841930116472547, "loss": 0.4395638108253479, "loss_ce": 0.002063829218968749, "loss_iou": 0.1708984375, "loss_num": 0.0194091796875, "loss_xval": 0.4375, "num_input_tokens_seen": 355032056, "step": 5662 }, { "epoch": 18.845257903494176, "grad_norm": 11.172454833984375, "learning_rate": 5e-06, "loss": 0.3551, "num_input_tokens_seen": 355094260, "step": 5663 }, { "epoch": 18.845257903494176, "loss": 0.4835568368434906, "loss_ce": 5.8103651099372655e-06, "loss_iou": 0.20703125, "loss_num": 0.01397705078125, "loss_xval": 0.484375, "num_input_tokens_seen": 355094260, "step": 5663 }, { "epoch": 18.848585690515808, "grad_norm": 22.370718002319336, "learning_rate": 5e-06, "loss": 0.5993, "num_input_tokens_seen": 355157996, "step": 5664 }, { "epoch": 18.848585690515808, "loss": 0.7472245097160339, "loss_ce": 0.00042887323070317507, "loss_iou": 0.29296875, "loss_num": 0.0322265625, "loss_xval": 0.74609375, "num_input_tokens_seen": 355157996, "step": 5664 }, { "epoch": 18.851913477537437, "grad_norm": 26.61648178100586, "learning_rate": 5e-06, "loss": 0.3973, "num_input_tokens_seen": 355221488, "step": 5665 }, { "epoch": 18.851913477537437, "loss": 0.4576159715652466, "loss_ce": 1.2494822840380948e-05, "loss_iou": 0.173828125, "loss_num": 0.0218505859375, "loss_xval": 0.45703125, "num_input_tokens_seen": 355221488, "step": 5665 }, { "epoch": 18.85524126455907, "grad_norm": 20.728872299194336, "learning_rate": 5e-06, "loss": 0.5241, "num_input_tokens_seen": 355285348, "step": 5666 }, { "epoch": 18.85524126455907, "loss": 0.6306787729263306, "loss_ce": 2.5200290565408068e-06, "loss_iou": 0.265625, "loss_num": 0.0201416015625, "loss_xval": 0.62890625, "num_input_tokens_seen": 355285348, "step": 5666 }, { "epoch": 18.858569051580698, "grad_norm": 13.963292121887207, "learning_rate": 5e-06, "loss": 0.52, "num_input_tokens_seen": 355347380, "step": 5667 }, { "epoch": 18.858569051580698, "loss": 0.5572341680526733, "loss_ce": 4.418698154040612e-05, "loss_iou": 0.2490234375, "loss_num": 0.0118408203125, "loss_xval": 0.55859375, "num_input_tokens_seen": 355347380, "step": 5667 }, { "epoch": 18.86189683860233, "grad_norm": 12.982034683227539, "learning_rate": 5e-06, "loss": 0.4071, "num_input_tokens_seen": 355410024, "step": 5668 }, { "epoch": 18.86189683860233, "loss": 0.4335640072822571, "loss_ce": 8.046811785789032e-07, "loss_iou": 0.16015625, "loss_num": 0.0225830078125, "loss_xval": 0.43359375, "num_input_tokens_seen": 355410024, "step": 5668 }, { "epoch": 18.86522462562396, "grad_norm": 15.830412864685059, "learning_rate": 5e-06, "loss": 0.2593, "num_input_tokens_seen": 355472864, "step": 5669 }, { "epoch": 18.86522462562396, "loss": 0.34772372245788574, "loss_ce": 6.433357157220598e-06, "loss_iou": 0.126953125, "loss_num": 0.01904296875, "loss_xval": 0.34765625, "num_input_tokens_seen": 355472864, "step": 5669 }, { "epoch": 18.86855241264559, "grad_norm": 13.285154342651367, "learning_rate": 5e-06, "loss": 0.2756, "num_input_tokens_seen": 355534816, "step": 5670 }, { "epoch": 18.86855241264559, "loss": 0.29675427079200745, "loss_ce": 1.345491796200804e-06, "loss_iou": 0.1201171875, "loss_num": 0.0113525390625, "loss_xval": 0.296875, "num_input_tokens_seen": 355534816, "step": 5670 }, { "epoch": 18.87188019966722, "grad_norm": 11.893289566040039, "learning_rate": 5e-06, "loss": 0.4302, "num_input_tokens_seen": 355597496, "step": 5671 }, { "epoch": 18.87188019966722, "loss": 0.4488983154296875, "loss_ce": 4.578319931169972e-05, "loss_iou": 0.15234375, "loss_num": 0.029052734375, "loss_xval": 0.44921875, "num_input_tokens_seen": 355597496, "step": 5671 }, { "epoch": 18.875207986688853, "grad_norm": 26.951053619384766, "learning_rate": 5e-06, "loss": 0.4006, "num_input_tokens_seen": 355660516, "step": 5672 }, { "epoch": 18.875207986688853, "loss": 0.1719406396150589, "loss_ce": 4.5995575419510715e-06, "loss_iou": 0.061279296875, "loss_num": 0.0098876953125, "loss_xval": 0.171875, "num_input_tokens_seen": 355660516, "step": 5672 }, { "epoch": 18.87853577371048, "grad_norm": 34.10044860839844, "learning_rate": 5e-06, "loss": 0.5366, "num_input_tokens_seen": 355723500, "step": 5673 }, { "epoch": 18.87853577371048, "loss": 0.6059850454330444, "loss_ce": 5.080741175333969e-06, "loss_iou": 0.26953125, "loss_num": 0.013671875, "loss_xval": 0.60546875, "num_input_tokens_seen": 355723500, "step": 5673 }, { "epoch": 18.881863560732114, "grad_norm": 37.443424224853516, "learning_rate": 5e-06, "loss": 0.4957, "num_input_tokens_seen": 355786948, "step": 5674 }, { "epoch": 18.881863560732114, "loss": 0.5842314958572388, "loss_ce": 3.018299594259588e-06, "loss_iou": 0.2578125, "loss_num": 0.01385498046875, "loss_xval": 0.5859375, "num_input_tokens_seen": 355786948, "step": 5674 }, { "epoch": 18.885191347753743, "grad_norm": 34.88566970825195, "learning_rate": 5e-06, "loss": 0.5555, "num_input_tokens_seen": 355851192, "step": 5675 }, { "epoch": 18.885191347753743, "loss": 0.6592289209365845, "loss_ce": 1.8756742065306753e-05, "loss_iou": 0.248046875, "loss_num": 0.03271484375, "loss_xval": 0.66015625, "num_input_tokens_seen": 355851192, "step": 5675 }, { "epoch": 18.888519134775375, "grad_norm": 13.942546844482422, "learning_rate": 5e-06, "loss": 0.5059, "num_input_tokens_seen": 355912736, "step": 5676 }, { "epoch": 18.888519134775375, "loss": 0.6526669263839722, "loss_ce": 0.00020105016301386058, "loss_iou": 0.259765625, "loss_num": 0.0267333984375, "loss_xval": 0.65234375, "num_input_tokens_seen": 355912736, "step": 5676 }, { "epoch": 18.891846921797004, "grad_norm": 9.875178337097168, "learning_rate": 5e-06, "loss": 0.3741, "num_input_tokens_seen": 355975640, "step": 5677 }, { "epoch": 18.891846921797004, "loss": 0.5256689786911011, "loss_ce": 3.7330771647248184e-06, "loss_iou": 0.1787109375, "loss_num": 0.033447265625, "loss_xval": 0.52734375, "num_input_tokens_seen": 355975640, "step": 5677 }, { "epoch": 18.895174708818637, "grad_norm": 17.49053192138672, "learning_rate": 5e-06, "loss": 0.3632, "num_input_tokens_seen": 356038532, "step": 5678 }, { "epoch": 18.895174708818637, "loss": 0.5628975033760071, "loss_ce": 7.675934057260747e-07, "loss_iou": 0.21484375, "loss_num": 0.0267333984375, "loss_xval": 0.5625, "num_input_tokens_seen": 356038532, "step": 5678 }, { "epoch": 18.898502495840265, "grad_norm": 22.670581817626953, "learning_rate": 5e-06, "loss": 0.3762, "num_input_tokens_seen": 356100904, "step": 5679 }, { "epoch": 18.898502495840265, "loss": 0.5003665685653687, "loss_ce": 3.727400326170027e-07, "loss_iou": 0.201171875, "loss_num": 0.01953125, "loss_xval": 0.5, "num_input_tokens_seen": 356100904, "step": 5679 }, { "epoch": 18.901830282861898, "grad_norm": 12.720881462097168, "learning_rate": 5e-06, "loss": 0.2686, "num_input_tokens_seen": 356162980, "step": 5680 }, { "epoch": 18.901830282861898, "loss": 0.2590954303741455, "loss_ce": 1.201919985760469e-06, "loss_iou": 0.0966796875, "loss_num": 0.0130615234375, "loss_xval": 0.259765625, "num_input_tokens_seen": 356162980, "step": 5680 }, { "epoch": 18.905158069883527, "grad_norm": 12.644536972045898, "learning_rate": 5e-06, "loss": 0.474, "num_input_tokens_seen": 356226804, "step": 5681 }, { "epoch": 18.905158069883527, "loss": 0.362154483795166, "loss_ce": 2.4202636268455535e-06, "loss_iou": 0.1455078125, "loss_num": 0.01434326171875, "loss_xval": 0.361328125, "num_input_tokens_seen": 356226804, "step": 5681 }, { "epoch": 18.90848585690516, "grad_norm": 17.174190521240234, "learning_rate": 5e-06, "loss": 0.3772, "num_input_tokens_seen": 356289140, "step": 5682 }, { "epoch": 18.90848585690516, "loss": 0.14378488063812256, "loss_ce": 1.3156318345863838e-06, "loss_iou": 0.03759765625, "loss_num": 0.0137939453125, "loss_xval": 0.1435546875, "num_input_tokens_seen": 356289140, "step": 5682 }, { "epoch": 18.911813643926788, "grad_norm": 9.08018970489502, "learning_rate": 5e-06, "loss": 0.2413, "num_input_tokens_seen": 356349092, "step": 5683 }, { "epoch": 18.911813643926788, "loss": 0.31074583530426025, "loss_ce": 5.940332812315319e-07, "loss_iou": 0.0927734375, "loss_num": 0.025146484375, "loss_xval": 0.310546875, "num_input_tokens_seen": 356349092, "step": 5683 }, { "epoch": 18.91514143094842, "grad_norm": 21.541749954223633, "learning_rate": 5e-06, "loss": 0.493, "num_input_tokens_seen": 356412040, "step": 5684 }, { "epoch": 18.91514143094842, "loss": 0.49140453338623047, "loss_ce": 1.0505492355150636e-05, "loss_iou": 0.185546875, "loss_num": 0.02392578125, "loss_xval": 0.4921875, "num_input_tokens_seen": 356412040, "step": 5684 }, { "epoch": 18.91846921797005, "grad_norm": 38.045143127441406, "learning_rate": 5e-06, "loss": 0.3943, "num_input_tokens_seen": 356474872, "step": 5685 }, { "epoch": 18.91846921797005, "loss": 0.4421694278717041, "loss_ce": 2.2462344873019902e-07, "loss_iou": 0.1875, "loss_num": 0.0135498046875, "loss_xval": 0.44140625, "num_input_tokens_seen": 356474872, "step": 5685 }, { "epoch": 18.92179700499168, "grad_norm": 26.38801383972168, "learning_rate": 5e-06, "loss": 0.6019, "num_input_tokens_seen": 356538972, "step": 5686 }, { "epoch": 18.92179700499168, "loss": 0.6369650959968567, "loss_ce": 2.1958946945233038e-06, "loss_iou": 0.255859375, "loss_num": 0.025390625, "loss_xval": 0.63671875, "num_input_tokens_seen": 356538972, "step": 5686 }, { "epoch": 18.92512479201331, "grad_norm": 8.833430290222168, "learning_rate": 5e-06, "loss": 0.4497, "num_input_tokens_seen": 356602432, "step": 5687 }, { "epoch": 18.92512479201331, "loss": 0.46301424503326416, "loss_ce": 1.549904254716239e-06, "loss_iou": 0.1826171875, "loss_num": 0.0194091796875, "loss_xval": 0.462890625, "num_input_tokens_seen": 356602432, "step": 5687 }, { "epoch": 18.928452579034943, "grad_norm": 11.019174575805664, "learning_rate": 5e-06, "loss": 0.3896, "num_input_tokens_seen": 356663936, "step": 5688 }, { "epoch": 18.928452579034943, "loss": 0.27655652165412903, "loss_ce": 3.673434184747748e-05, "loss_iou": 0.103515625, "loss_num": 0.0137939453125, "loss_xval": 0.27734375, "num_input_tokens_seen": 356663936, "step": 5688 }, { "epoch": 18.93178036605657, "grad_norm": 12.883810043334961, "learning_rate": 5e-06, "loss": 0.336, "num_input_tokens_seen": 356727024, "step": 5689 }, { "epoch": 18.93178036605657, "loss": 0.4119422137737274, "loss_ce": 1.5976345821400173e-05, "loss_iou": 0.1640625, "loss_num": 0.0167236328125, "loss_xval": 0.412109375, "num_input_tokens_seen": 356727024, "step": 5689 }, { "epoch": 18.935108153078204, "grad_norm": 17.006458282470703, "learning_rate": 5e-06, "loss": 0.336, "num_input_tokens_seen": 356790684, "step": 5690 }, { "epoch": 18.935108153078204, "loss": 0.4597838521003723, "loss_ce": 6.045311238267459e-06, "loss_iou": 0.1904296875, "loss_num": 0.0159912109375, "loss_xval": 0.458984375, "num_input_tokens_seen": 356790684, "step": 5690 }, { "epoch": 18.938435940099833, "grad_norm": 20.036985397338867, "learning_rate": 5e-06, "loss": 0.4349, "num_input_tokens_seen": 356854856, "step": 5691 }, { "epoch": 18.938435940099833, "loss": 0.429932564496994, "loss_ce": 9.233714877154853e-07, "loss_iou": 0.1923828125, "loss_num": 0.0091552734375, "loss_xval": 0.4296875, "num_input_tokens_seen": 356854856, "step": 5691 }, { "epoch": 18.941763727121465, "grad_norm": 16.557138442993164, "learning_rate": 5e-06, "loss": 0.3554, "num_input_tokens_seen": 356915936, "step": 5692 }, { "epoch": 18.941763727121465, "loss": 0.2968156933784485, "loss_ce": 1.7070960893761367e-06, "loss_iou": 0.11962890625, "loss_num": 0.0115966796875, "loss_xval": 0.296875, "num_input_tokens_seen": 356915936, "step": 5692 }, { "epoch": 18.945091514143094, "grad_norm": 7.8654069900512695, "learning_rate": 5e-06, "loss": 0.2286, "num_input_tokens_seen": 356976880, "step": 5693 }, { "epoch": 18.945091514143094, "loss": 0.3419017195701599, "loss_ce": 1.328628059127368e-05, "loss_iou": 0.1357421875, "loss_num": 0.013916015625, "loss_xval": 0.341796875, "num_input_tokens_seen": 356976880, "step": 5693 }, { "epoch": 18.948419301164726, "grad_norm": 9.151227951049805, "learning_rate": 5e-06, "loss": 0.3135, "num_input_tokens_seen": 357037872, "step": 5694 }, { "epoch": 18.948419301164726, "loss": 0.3437187075614929, "loss_ce": 0.0018608259269967675, "loss_iou": 0.12158203125, "loss_num": 0.01953125, "loss_xval": 0.341796875, "num_input_tokens_seen": 357037872, "step": 5694 }, { "epoch": 18.951747088186355, "grad_norm": 8.116426467895508, "learning_rate": 5e-06, "loss": 0.5348, "num_input_tokens_seen": 357100412, "step": 5695 }, { "epoch": 18.951747088186355, "loss": 0.6317943334579468, "loss_ce": 1.9456019799690694e-05, "loss_iou": 0.22265625, "loss_num": 0.037353515625, "loss_xval": 0.6328125, "num_input_tokens_seen": 357100412, "step": 5695 }, { "epoch": 18.955074875207988, "grad_norm": 12.259775161743164, "learning_rate": 5e-06, "loss": 0.4027, "num_input_tokens_seen": 357162032, "step": 5696 }, { "epoch": 18.955074875207988, "loss": 0.4873685836791992, "loss_ce": 2.8803501663787756e-06, "loss_iou": 0.1953125, "loss_num": 0.0191650390625, "loss_xval": 0.48828125, "num_input_tokens_seen": 357162032, "step": 5696 }, { "epoch": 18.958402662229616, "grad_norm": 20.126291275024414, "learning_rate": 5e-06, "loss": 0.4698, "num_input_tokens_seen": 357225424, "step": 5697 }, { "epoch": 18.958402662229616, "loss": 0.4098522663116455, "loss_ce": 1.194264882542484e-06, "loss_iou": 0.169921875, "loss_num": 0.01409912109375, "loss_xval": 0.41015625, "num_input_tokens_seen": 357225424, "step": 5697 }, { "epoch": 18.96173044925125, "grad_norm": 10.422189712524414, "learning_rate": 5e-06, "loss": 0.5118, "num_input_tokens_seen": 357286328, "step": 5698 }, { "epoch": 18.96173044925125, "loss": 0.7488142251968384, "loss_ce": 4.45034856966231e-06, "loss_iou": 0.298828125, "loss_num": 0.0299072265625, "loss_xval": 0.75, "num_input_tokens_seen": 357286328, "step": 5698 }, { "epoch": 18.965058236272878, "grad_norm": 23.04779052734375, "learning_rate": 5e-06, "loss": 0.4049, "num_input_tokens_seen": 357348148, "step": 5699 }, { "epoch": 18.965058236272878, "loss": 0.3485792875289917, "loss_ce": 7.483812169084558e-06, "loss_iou": 0.140625, "loss_num": 0.01348876953125, "loss_xval": 0.34765625, "num_input_tokens_seen": 357348148, "step": 5699 }, { "epoch": 18.96838602329451, "grad_norm": 11.641287803649902, "learning_rate": 5e-06, "loss": 0.3852, "num_input_tokens_seen": 357409832, "step": 5700 }, { "epoch": 18.96838602329451, "loss": 0.3734199106693268, "loss_ce": 6.8196413849364035e-06, "loss_iou": 0.1455078125, "loss_num": 0.016357421875, "loss_xval": 0.373046875, "num_input_tokens_seen": 357409832, "step": 5700 }, { "epoch": 18.97171381031614, "grad_norm": 9.548869132995605, "learning_rate": 5e-06, "loss": 0.5294, "num_input_tokens_seen": 357473580, "step": 5701 }, { "epoch": 18.97171381031614, "loss": 0.4927990436553955, "loss_ce": 1.2116126981709385e-06, "loss_iou": 0.1865234375, "loss_num": 0.0238037109375, "loss_xval": 0.4921875, "num_input_tokens_seen": 357473580, "step": 5701 }, { "epoch": 18.97504159733777, "grad_norm": 14.999140739440918, "learning_rate": 5e-06, "loss": 0.3405, "num_input_tokens_seen": 357536708, "step": 5702 }, { "epoch": 18.97504159733777, "loss": 0.23233821988105774, "loss_ce": 1.1718047062458936e-05, "loss_iou": 0.08837890625, "loss_num": 0.0111083984375, "loss_xval": 0.232421875, "num_input_tokens_seen": 357536708, "step": 5702 }, { "epoch": 18.9783693843594, "grad_norm": 11.352274894714355, "learning_rate": 5e-06, "loss": 0.3875, "num_input_tokens_seen": 357599172, "step": 5703 }, { "epoch": 18.9783693843594, "loss": 0.5067195892333984, "loss_ce": 5.726295512431534e-06, "loss_iou": 0.19140625, "loss_num": 0.0244140625, "loss_xval": 0.5078125, "num_input_tokens_seen": 357599172, "step": 5703 }, { "epoch": 18.981697171381033, "grad_norm": 8.41838550567627, "learning_rate": 5e-06, "loss": 0.4466, "num_input_tokens_seen": 357660960, "step": 5704 }, { "epoch": 18.981697171381033, "loss": 0.2967183589935303, "loss_ce": 3.5828725231112912e-06, "loss_iou": 0.12353515625, "loss_num": 0.00994873046875, "loss_xval": 0.296875, "num_input_tokens_seen": 357660960, "step": 5704 }, { "epoch": 18.98502495840266, "grad_norm": 6.609307289123535, "learning_rate": 5e-06, "loss": 0.4046, "num_input_tokens_seen": 357722572, "step": 5705 }, { "epoch": 18.98502495840266, "loss": 0.2364301085472107, "loss_ce": 0.00022405841446015984, "loss_iou": 0.08642578125, "loss_num": 0.01263427734375, "loss_xval": 0.236328125, "num_input_tokens_seen": 357722572, "step": 5705 }, { "epoch": 18.988352745424294, "grad_norm": 34.85097122192383, "learning_rate": 5e-06, "loss": 0.6604, "num_input_tokens_seen": 357786852, "step": 5706 }, { "epoch": 18.988352745424294, "loss": 0.541020393371582, "loss_ce": 4.730855380330468e-06, "loss_iou": 0.2353515625, "loss_num": 0.01397705078125, "loss_xval": 0.5390625, "num_input_tokens_seen": 357786852, "step": 5706 }, { "epoch": 18.991680532445923, "grad_norm": 30.39300537109375, "learning_rate": 5e-06, "loss": 0.3978, "num_input_tokens_seen": 357849668, "step": 5707 }, { "epoch": 18.991680532445923, "loss": 0.4481232166290283, "loss_ce": 3.1174781724985223e-06, "loss_iou": 0.185546875, "loss_num": 0.0152587890625, "loss_xval": 0.447265625, "num_input_tokens_seen": 357849668, "step": 5707 }, { "epoch": 18.995008319467555, "grad_norm": 17.361059188842773, "learning_rate": 5e-06, "loss": 0.5554, "num_input_tokens_seen": 357912972, "step": 5708 }, { "epoch": 18.995008319467555, "loss": 0.6340047717094421, "loss_ce": 2.0725492504425347e-06, "loss_iou": 0.267578125, "loss_num": 0.0198974609375, "loss_xval": 0.6328125, "num_input_tokens_seen": 357912972, "step": 5708 }, { "epoch": 18.998336106489184, "grad_norm": 6.243175029754639, "learning_rate": 5e-06, "loss": 0.1767, "num_input_tokens_seen": 357975152, "step": 5709 }, { "epoch": 18.998336106489184, "loss": 0.24340879917144775, "loss_ce": 5.897959454159718e-07, "loss_iou": 0.11279296875, "loss_num": 0.003509521484375, "loss_xval": 0.2431640625, "num_input_tokens_seen": 357975152, "step": 5709 }, { "epoch": 18.998336106489184, "loss": 0.6780495047569275, "loss_ce": 9.938672519638203e-06, "loss_iou": 0.2060546875, "loss_num": 0.053466796875, "loss_xval": 0.6796875, "num_input_tokens_seen": 358006432, "step": 5709 }, { "epoch": 19.001663893510816, "grad_norm": 5.240334510803223, "learning_rate": 5e-06, "loss": 0.5527, "num_input_tokens_seen": 358037728, "step": 5710 }, { "epoch": 19.001663893510816, "loss": 0.4272826611995697, "loss_ce": 6.07239780947566e-06, "loss_iou": 0.1591796875, "loss_num": 0.0216064453125, "loss_xval": 0.427734375, "num_input_tokens_seen": 358037728, "step": 5710 }, { "epoch": 19.004991680532445, "grad_norm": 18.966068267822266, "learning_rate": 5e-06, "loss": 0.5664, "num_input_tokens_seen": 358103132, "step": 5711 }, { "epoch": 19.004991680532445, "loss": 0.5433380603790283, "loss_ce": 3.070006414418458e-06, "loss_iou": 0.2392578125, "loss_num": 0.0126953125, "loss_xval": 0.54296875, "num_input_tokens_seen": 358103132, "step": 5711 }, { "epoch": 19.008319467554077, "grad_norm": 17.45966339111328, "learning_rate": 5e-06, "loss": 0.4742, "num_input_tokens_seen": 358164312, "step": 5712 }, { "epoch": 19.008319467554077, "loss": 0.25702136754989624, "loss_ce": 2.312756578248809e-06, "loss_iou": 0.09716796875, "loss_num": 0.01263427734375, "loss_xval": 0.2578125, "num_input_tokens_seen": 358164312, "step": 5712 }, { "epoch": 19.011647254575706, "grad_norm": 22.978349685668945, "learning_rate": 5e-06, "loss": 0.5586, "num_input_tokens_seen": 358228592, "step": 5713 }, { "epoch": 19.011647254575706, "loss": 0.6045544743537903, "loss_ce": 1.2764319308189442e-06, "loss_iou": 0.255859375, "loss_num": 0.0184326171875, "loss_xval": 0.60546875, "num_input_tokens_seen": 358228592, "step": 5713 }, { "epoch": 19.01497504159734, "grad_norm": 27.894699096679688, "learning_rate": 5e-06, "loss": 0.3391, "num_input_tokens_seen": 358292200, "step": 5714 }, { "epoch": 19.01497504159734, "loss": 0.3563581109046936, "loss_ce": 3.48804060195107e-05, "loss_iou": 0.11767578125, "loss_num": 0.024169921875, "loss_xval": 0.35546875, "num_input_tokens_seen": 358292200, "step": 5714 }, { "epoch": 19.018302828618967, "grad_norm": 6.364406585693359, "learning_rate": 5e-06, "loss": 0.2793, "num_input_tokens_seen": 358353584, "step": 5715 }, { "epoch": 19.018302828618967, "loss": 0.439174085855484, "loss_ce": 8.716357842786238e-05, "loss_iou": 0.162109375, "loss_num": 0.02294921875, "loss_xval": 0.439453125, "num_input_tokens_seen": 358353584, "step": 5715 }, { "epoch": 19.0216306156406, "grad_norm": 13.074263572692871, "learning_rate": 5e-06, "loss": 0.2821, "num_input_tokens_seen": 358415276, "step": 5716 }, { "epoch": 19.0216306156406, "loss": 0.11203251779079437, "loss_ce": 2.4806608962535392e-06, "loss_iou": 0.0157470703125, "loss_num": 0.01611328125, "loss_xval": 0.11181640625, "num_input_tokens_seen": 358415276, "step": 5716 }, { "epoch": 19.02495840266223, "grad_norm": 15.119400978088379, "learning_rate": 5e-06, "loss": 0.3054, "num_input_tokens_seen": 358477860, "step": 5717 }, { "epoch": 19.02495840266223, "loss": 0.11290054023265839, "loss_ce": 7.658305776203633e-07, "loss_iou": 0.00927734375, "loss_num": 0.0189208984375, "loss_xval": 0.11279296875, "num_input_tokens_seen": 358477860, "step": 5717 }, { "epoch": 19.02828618968386, "grad_norm": 7.888031482696533, "learning_rate": 5e-06, "loss": 0.5117, "num_input_tokens_seen": 358541436, "step": 5718 }, { "epoch": 19.02828618968386, "loss": 0.46106159687042236, "loss_ce": 2.037940475929645e-06, "loss_iou": 0.1591796875, "loss_num": 0.028564453125, "loss_xval": 0.4609375, "num_input_tokens_seen": 358541436, "step": 5718 }, { "epoch": 19.03161397670549, "grad_norm": 7.860311031341553, "learning_rate": 5e-06, "loss": 0.1874, "num_input_tokens_seen": 358603408, "step": 5719 }, { "epoch": 19.03161397670549, "loss": 0.1376596987247467, "loss_ce": 1.0149932677450124e-05, "loss_iou": 0.040771484375, "loss_num": 0.01123046875, "loss_xval": 0.1376953125, "num_input_tokens_seen": 358603408, "step": 5719 }, { "epoch": 19.034941763727122, "grad_norm": 8.932470321655273, "learning_rate": 5e-06, "loss": 0.3107, "num_input_tokens_seen": 358665976, "step": 5720 }, { "epoch": 19.034941763727122, "loss": 0.3768472373485565, "loss_ce": 9.18388536774728e-07, "loss_iou": 0.15234375, "loss_num": 0.0146484375, "loss_xval": 0.376953125, "num_input_tokens_seen": 358665976, "step": 5720 }, { "epoch": 19.03826955074875, "grad_norm": 24.69854164123535, "learning_rate": 5e-06, "loss": 0.4189, "num_input_tokens_seen": 358728824, "step": 5721 }, { "epoch": 19.03826955074875, "loss": 0.4315212368965149, "loss_ce": 2.6902948775386903e-06, "loss_iou": 0.1689453125, "loss_num": 0.018798828125, "loss_xval": 0.431640625, "num_input_tokens_seen": 358728824, "step": 5721 }, { "epoch": 19.041597337770384, "grad_norm": 33.214317321777344, "learning_rate": 5e-06, "loss": 0.363, "num_input_tokens_seen": 358791268, "step": 5722 }, { "epoch": 19.041597337770384, "loss": 0.376343309879303, "loss_ce": 5.536356866286951e-07, "loss_iou": 0.154296875, "loss_num": 0.013671875, "loss_xval": 0.376953125, "num_input_tokens_seen": 358791268, "step": 5722 }, { "epoch": 19.044925124792012, "grad_norm": 18.454296112060547, "learning_rate": 5e-06, "loss": 0.3151, "num_input_tokens_seen": 358855496, "step": 5723 }, { "epoch": 19.044925124792012, "loss": 0.3607799708843231, "loss_ce": 1.1475307246655575e-06, "loss_iou": 0.1396484375, "loss_num": 0.016357421875, "loss_xval": 0.361328125, "num_input_tokens_seen": 358855496, "step": 5723 }, { "epoch": 19.048252911813645, "grad_norm": 8.4237699508667, "learning_rate": 5e-06, "loss": 0.3286, "num_input_tokens_seen": 358916788, "step": 5724 }, { "epoch": 19.048252911813645, "loss": 0.38098469376564026, "loss_ce": 3.2334469324268866e-06, "loss_iou": 0.1494140625, "loss_num": 0.0164794921875, "loss_xval": 0.380859375, "num_input_tokens_seen": 358916788, "step": 5724 }, { "epoch": 19.051580698835274, "grad_norm": 10.148604393005371, "learning_rate": 5e-06, "loss": 0.3909, "num_input_tokens_seen": 358978312, "step": 5725 }, { "epoch": 19.051580698835274, "loss": 0.391480416059494, "loss_ce": 9.153877726930659e-07, "loss_iou": 0.1474609375, "loss_num": 0.019287109375, "loss_xval": 0.390625, "num_input_tokens_seen": 358978312, "step": 5725 }, { "epoch": 19.054908485856906, "grad_norm": 20.75384521484375, "learning_rate": 5e-06, "loss": 0.4021, "num_input_tokens_seen": 359041292, "step": 5726 }, { "epoch": 19.054908485856906, "loss": 0.2913838326931, "loss_ce": 1.986657480301801e-06, "loss_iou": 0.09912109375, "loss_num": 0.0186767578125, "loss_xval": 0.291015625, "num_input_tokens_seen": 359041292, "step": 5726 }, { "epoch": 19.058236272878535, "grad_norm": 14.464926719665527, "learning_rate": 5e-06, "loss": 0.4331, "num_input_tokens_seen": 359102384, "step": 5727 }, { "epoch": 19.058236272878535, "loss": 0.5824011564254761, "loss_ce": 3.641067451098934e-06, "loss_iou": 0.2001953125, "loss_num": 0.036376953125, "loss_xval": 0.58203125, "num_input_tokens_seen": 359102384, "step": 5727 }, { "epoch": 19.061564059900167, "grad_norm": 11.490264892578125, "learning_rate": 5e-06, "loss": 0.2533, "num_input_tokens_seen": 359162712, "step": 5728 }, { "epoch": 19.061564059900167, "loss": 0.2264125645160675, "loss_ce": 2.65820199274458e-06, "loss_iou": 0.09375, "loss_num": 0.0078125, "loss_xval": 0.2265625, "num_input_tokens_seen": 359162712, "step": 5728 }, { "epoch": 19.064891846921796, "grad_norm": 26.02570915222168, "learning_rate": 5e-06, "loss": 0.5397, "num_input_tokens_seen": 359227176, "step": 5729 }, { "epoch": 19.064891846921796, "loss": 0.5426126718521118, "loss_ce": 0.00013222053530626, "loss_iou": 0.216796875, "loss_num": 0.021484375, "loss_xval": 0.54296875, "num_input_tokens_seen": 359227176, "step": 5729 }, { "epoch": 19.06821963394343, "grad_norm": 45.79217529296875, "learning_rate": 5e-06, "loss": 0.6131, "num_input_tokens_seen": 359290224, "step": 5730 }, { "epoch": 19.06821963394343, "loss": 0.7342565059661865, "loss_ce": 3.5593945995060494e-06, "loss_iou": 0.279296875, "loss_num": 0.035400390625, "loss_xval": 0.734375, "num_input_tokens_seen": 359290224, "step": 5730 }, { "epoch": 19.071547420965057, "grad_norm": 37.357975006103516, "learning_rate": 5e-06, "loss": 0.432, "num_input_tokens_seen": 359352228, "step": 5731 }, { "epoch": 19.071547420965057, "loss": 0.29849377274513245, "loss_ce": 1.351922946923878e-06, "loss_iou": 0.1298828125, "loss_num": 0.00775146484375, "loss_xval": 0.298828125, "num_input_tokens_seen": 359352228, "step": 5731 }, { "epoch": 19.07487520798669, "grad_norm": 26.162935256958008, "learning_rate": 5e-06, "loss": 0.4038, "num_input_tokens_seen": 359414768, "step": 5732 }, { "epoch": 19.07487520798669, "loss": 0.5738543272018433, "loss_ce": 1.8075119214699953e-06, "loss_iou": 0.2314453125, "loss_num": 0.0223388671875, "loss_xval": 0.57421875, "num_input_tokens_seen": 359414768, "step": 5732 }, { "epoch": 19.07820299500832, "grad_norm": 23.950471878051758, "learning_rate": 5e-06, "loss": 0.6412, "num_input_tokens_seen": 359478196, "step": 5733 }, { "epoch": 19.07820299500832, "loss": 0.8745386004447937, "loss_ce": 2.6914716727333143e-05, "loss_iou": 0.33203125, "loss_num": 0.041748046875, "loss_xval": 0.875, "num_input_tokens_seen": 359478196, "step": 5733 }, { "epoch": 19.08153078202995, "grad_norm": 23.047740936279297, "learning_rate": 5e-06, "loss": 0.3587, "num_input_tokens_seen": 359541012, "step": 5734 }, { "epoch": 19.08153078202995, "loss": 0.22223825752735138, "loss_ce": 9.25375752558466e-06, "loss_iou": 0.09765625, "loss_num": 0.00531005859375, "loss_xval": 0.22265625, "num_input_tokens_seen": 359541012, "step": 5734 }, { "epoch": 19.08485856905158, "grad_norm": 35.272193908691406, "learning_rate": 5e-06, "loss": 0.5309, "num_input_tokens_seen": 359602032, "step": 5735 }, { "epoch": 19.08485856905158, "loss": 0.5121932029724121, "loss_ce": 1.4162766319714137e-06, "loss_iou": 0.1796875, "loss_num": 0.0303955078125, "loss_xval": 0.51171875, "num_input_tokens_seen": 359602032, "step": 5735 }, { "epoch": 19.088186356073212, "grad_norm": 44.29369354248047, "learning_rate": 5e-06, "loss": 0.4061, "num_input_tokens_seen": 359664548, "step": 5736 }, { "epoch": 19.088186356073212, "loss": 0.5198377370834351, "loss_ce": 1.3283530506669194e-06, "loss_iou": 0.2216796875, "loss_num": 0.01513671875, "loss_xval": 0.51953125, "num_input_tokens_seen": 359664548, "step": 5736 }, { "epoch": 19.09151414309484, "grad_norm": 27.448806762695312, "learning_rate": 5e-06, "loss": 0.4158, "num_input_tokens_seen": 359724932, "step": 5737 }, { "epoch": 19.09151414309484, "loss": 0.5434444546699524, "loss_ce": 2.708947022256325e-06, "loss_iou": 0.224609375, "loss_num": 0.0189208984375, "loss_xval": 0.54296875, "num_input_tokens_seen": 359724932, "step": 5737 }, { "epoch": 19.094841930116473, "grad_norm": 18.038333892822266, "learning_rate": 5e-06, "loss": 0.6164, "num_input_tokens_seen": 359787940, "step": 5738 }, { "epoch": 19.094841930116473, "loss": 0.6347171068191528, "loss_ce": 0.00019560917280614376, "loss_iou": 0.2333984375, "loss_num": 0.033447265625, "loss_xval": 0.6328125, "num_input_tokens_seen": 359787940, "step": 5738 }, { "epoch": 19.098169717138102, "grad_norm": 38.778533935546875, "learning_rate": 5e-06, "loss": 0.5895, "num_input_tokens_seen": 359850856, "step": 5739 }, { "epoch": 19.098169717138102, "loss": 0.519899845123291, "loss_ce": 2.3861148292780854e-06, "loss_iou": 0.20703125, "loss_num": 0.021484375, "loss_xval": 0.51953125, "num_input_tokens_seen": 359850856, "step": 5739 }, { "epoch": 19.101497504159735, "grad_norm": 31.314083099365234, "learning_rate": 5e-06, "loss": 0.3002, "num_input_tokens_seen": 359913660, "step": 5740 }, { "epoch": 19.101497504159735, "loss": 0.3466048836708069, "loss_ce": 1.493211129854899e-06, "loss_iou": 0.15625, "loss_num": 0.00689697265625, "loss_xval": 0.345703125, "num_input_tokens_seen": 359913660, "step": 5740 }, { "epoch": 19.104825291181363, "grad_norm": 21.717979431152344, "learning_rate": 5e-06, "loss": 0.342, "num_input_tokens_seen": 359976812, "step": 5741 }, { "epoch": 19.104825291181363, "loss": 0.41565054655075073, "loss_ce": 1.1385855032131076e-06, "loss_iou": 0.169921875, "loss_num": 0.015380859375, "loss_xval": 0.416015625, "num_input_tokens_seen": 359976812, "step": 5741 }, { "epoch": 19.108153078202996, "grad_norm": 10.722843170166016, "learning_rate": 5e-06, "loss": 0.29, "num_input_tokens_seen": 360039520, "step": 5742 }, { "epoch": 19.108153078202996, "loss": 0.25708964467048645, "loss_ce": 9.56374697125284e-06, "loss_iou": 0.10888671875, "loss_num": 0.0079345703125, "loss_xval": 0.2578125, "num_input_tokens_seen": 360039520, "step": 5742 }, { "epoch": 19.111480865224625, "grad_norm": 11.734892845153809, "learning_rate": 5e-06, "loss": 0.3336, "num_input_tokens_seen": 360101700, "step": 5743 }, { "epoch": 19.111480865224625, "loss": 0.2288247048854828, "loss_ce": 3.8949660847720224e-06, "loss_iou": 0.09423828125, "loss_num": 0.00799560546875, "loss_xval": 0.228515625, "num_input_tokens_seen": 360101700, "step": 5743 }, { "epoch": 19.114808652246257, "grad_norm": 8.584782600402832, "learning_rate": 5e-06, "loss": 0.2986, "num_input_tokens_seen": 360163948, "step": 5744 }, { "epoch": 19.114808652246257, "loss": 0.3762063980102539, "loss_ce": 9.80694494501222e-07, "loss_iou": 0.1484375, "loss_num": 0.015869140625, "loss_xval": 0.376953125, "num_input_tokens_seen": 360163948, "step": 5744 }, { "epoch": 19.118136439267886, "grad_norm": 8.956785202026367, "learning_rate": 5e-06, "loss": 0.3314, "num_input_tokens_seen": 360227424, "step": 5745 }, { "epoch": 19.118136439267886, "loss": 0.40673956274986267, "loss_ce": 1.2850183566115447e-06, "loss_iou": 0.16796875, "loss_num": 0.01422119140625, "loss_xval": 0.40625, "num_input_tokens_seen": 360227424, "step": 5745 }, { "epoch": 19.12146422628952, "grad_norm": 8.09567642211914, "learning_rate": 5e-06, "loss": 0.3779, "num_input_tokens_seen": 360290708, "step": 5746 }, { "epoch": 19.12146422628952, "loss": 0.364996999502182, "loss_ce": 6.764024874428287e-06, "loss_iou": 0.142578125, "loss_num": 0.015869140625, "loss_xval": 0.365234375, "num_input_tokens_seen": 360290708, "step": 5746 }, { "epoch": 19.124792013311147, "grad_norm": 18.281705856323242, "learning_rate": 5e-06, "loss": 0.5131, "num_input_tokens_seen": 360352488, "step": 5747 }, { "epoch": 19.124792013311147, "loss": 0.6373128890991211, "loss_ce": 0.00010586583812255412, "loss_iou": 0.259765625, "loss_num": 0.0235595703125, "loss_xval": 0.63671875, "num_input_tokens_seen": 360352488, "step": 5747 }, { "epoch": 19.12811980033278, "grad_norm": 20.957256317138672, "learning_rate": 5e-06, "loss": 0.3968, "num_input_tokens_seen": 360415496, "step": 5748 }, { "epoch": 19.12811980033278, "loss": 0.3221473693847656, "loss_ce": 3.7937143133603968e-06, "loss_iou": 0.1337890625, "loss_num": 0.01092529296875, "loss_xval": 0.322265625, "num_input_tokens_seen": 360415496, "step": 5748 }, { "epoch": 19.13144758735441, "grad_norm": 16.726285934448242, "learning_rate": 5e-06, "loss": 0.3377, "num_input_tokens_seen": 360478100, "step": 5749 }, { "epoch": 19.13144758735441, "loss": 0.46082139015197754, "loss_ce": 5.9442991187097505e-06, "loss_iou": 0.203125, "loss_num": 0.01104736328125, "loss_xval": 0.4609375, "num_input_tokens_seen": 360478100, "step": 5749 }, { "epoch": 19.13477537437604, "grad_norm": 13.61998462677002, "learning_rate": 5e-06, "loss": 0.3414, "num_input_tokens_seen": 360541892, "step": 5750 }, { "epoch": 19.13477537437604, "eval_seeclick_CIoU": 0.03610678482800722, "eval_seeclick_GIoU": 0.03265850618481636, "eval_seeclick_IoU": 0.16868987679481506, "eval_seeclick_MAE_all": 0.17902852594852448, "eval_seeclick_MAE_h": 0.07244567573070526, "eval_seeclick_MAE_w": 0.136456198990345, "eval_seeclick_MAE_x_boxes": 0.22201430797576904, "eval_seeclick_MAE_y_boxes": 0.1927475929260254, "eval_seeclick_NUM_probability": 0.9999757707118988, "eval_seeclick_inside_bbox": 0.16250000149011612, "eval_seeclick_loss": 3.044710874557495, "eval_seeclick_loss_ce": 0.1738397181034088, "eval_seeclick_loss_iou": 0.983642578125, "eval_seeclick_loss_num": 0.17969512939453125, "eval_seeclick_loss_xval": 2.86474609375, "eval_seeclick_runtime": 67.8933, "eval_seeclick_samples_per_second": 0.692, "eval_seeclick_steps_per_second": 0.029, "num_input_tokens_seen": 360541892, "step": 5750 }, { "epoch": 19.13477537437604, "eval_icons_CIoU": -0.06788751110434532, "eval_icons_GIoU": 0.03123145503923297, "eval_icons_IoU": 0.1043703481554985, "eval_icons_MAE_all": 0.20244651287794113, "eval_icons_MAE_h": 0.1879909411072731, "eval_icons_MAE_w": 0.2065812423825264, "eval_icons_MAE_x_boxes": 0.13654616847634315, "eval_icons_MAE_y_boxes": 0.0996764525771141, "eval_icons_NUM_probability": 0.9999921023845673, "eval_icons_inside_bbox": 0.2170138955116272, "eval_icons_loss": 2.902501106262207, "eval_icons_loss_ce": 1.39309719315861e-06, "eval_icons_loss_iou": 0.972900390625, "eval_icons_loss_num": 0.19512939453125, "eval_icons_loss_xval": 2.923828125, "eval_icons_runtime": 70.4699, "eval_icons_samples_per_second": 0.71, "eval_icons_steps_per_second": 0.028, "num_input_tokens_seen": 360541892, "step": 5750 }, { "epoch": 19.13477537437604, "eval_screenspot_CIoU": 0.17717889696359634, "eval_screenspot_GIoU": 0.2097932000954946, "eval_screenspot_IoU": 0.2869095951318741, "eval_screenspot_MAE_all": 0.11408769090970357, "eval_screenspot_MAE_h": 0.05867135773102442, "eval_screenspot_MAE_w": 0.10178381204605103, "eval_screenspot_MAE_x_boxes": 0.16015754640102386, "eval_screenspot_MAE_y_boxes": 0.08742884298165639, "eval_screenspot_NUM_probability": 0.9999958872795105, "eval_screenspot_inside_bbox": 0.49916666746139526, "eval_screenspot_loss": 2.198390007019043, "eval_screenspot_loss_ce": 4.9042945799252875e-06, "eval_screenspot_loss_iou": 0.8069661458333334, "eval_screenspot_loss_num": 0.122833251953125, "eval_screenspot_loss_xval": 2.228515625, "eval_screenspot_runtime": 125.1645, "eval_screenspot_samples_per_second": 0.711, "eval_screenspot_steps_per_second": 0.024, "num_input_tokens_seen": 360541892, "step": 5750 }, { "epoch": 19.13477537437604, "eval_compot_CIoU": 0.18278945982456207, "eval_compot_GIoU": 0.2358373999595642, "eval_compot_IoU": 0.30810777842998505, "eval_compot_MAE_all": 0.12089479714632034, "eval_compot_MAE_h": 0.051339815370738506, "eval_compot_MAE_w": 0.11797875910997391, "eval_compot_MAE_x_boxes": 0.10792999714612961, "eval_compot_MAE_y_boxes": 0.10726717859506607, "eval_compot_NUM_probability": 0.9999971985816956, "eval_compot_inside_bbox": 0.4565972238779068, "eval_compot_loss": 2.086958408355713, "eval_compot_loss_ce": 0.011051815934479237, "eval_compot_loss_iou": 0.7628173828125, "eval_compot_loss_num": 0.12264823913574219, "eval_compot_loss_xval": 2.138916015625, "eval_compot_runtime": 71.561, "eval_compot_samples_per_second": 0.699, "eval_compot_steps_per_second": 0.028, "num_input_tokens_seen": 360541892, "step": 5750 }, { "epoch": 19.13477537437604, "eval_custom_ui_MAE_all": 0.06072630546987057, "eval_custom_ui_MAE_x": 0.06924512796103954, "eval_custom_ui_MAE_y": 0.05220748484134674, "eval_custom_ui_NUM_probability": 0.9999986588954926, "eval_custom_ui_loss": 0.29937708377838135, "eval_custom_ui_loss_ce": 1.8556233953859191e-06, "eval_custom_ui_loss_num": 0.06459808349609375, "eval_custom_ui_loss_xval": 0.322998046875, "eval_custom_ui_runtime": 54.3141, "eval_custom_ui_samples_per_second": 0.921, "eval_custom_ui_steps_per_second": 0.037, "num_input_tokens_seen": 360541892, "step": 5750 }, { "epoch": 19.13477537437604, "loss": 0.330324649810791, "loss_ce": 2.3946715828060405e-06, "loss_iou": 0.0, "loss_num": 0.06591796875, "loss_xval": 0.330078125, "num_input_tokens_seen": 360541892, "step": 5750 }, { "epoch": 19.13810316139767, "grad_norm": 17.230775833129883, "learning_rate": 5e-06, "loss": 0.3661, "num_input_tokens_seen": 360604196, "step": 5751 }, { "epoch": 19.13810316139767, "loss": 0.27673405408859253, "loss_ce": 6.743871381331701e-07, "loss_iou": 0.1123046875, "loss_num": 0.01043701171875, "loss_xval": 0.27734375, "num_input_tokens_seen": 360604196, "step": 5751 }, { "epoch": 19.141430948419302, "grad_norm": 29.13734245300293, "learning_rate": 5e-06, "loss": 0.3383, "num_input_tokens_seen": 360666180, "step": 5752 }, { "epoch": 19.141430948419302, "loss": 0.15927374362945557, "loss_ce": 2.504025360394735e-06, "loss_iou": 0.04931640625, "loss_num": 0.01214599609375, "loss_xval": 0.1591796875, "num_input_tokens_seen": 360666180, "step": 5752 }, { "epoch": 19.14475873544093, "grad_norm": 28.24761199951172, "learning_rate": 5e-06, "loss": 0.4276, "num_input_tokens_seen": 360730156, "step": 5753 }, { "epoch": 19.14475873544093, "loss": 0.44067686796188354, "loss_ce": 3.0637077088613296e-06, "loss_iou": 0.2021484375, "loss_num": 0.007232666015625, "loss_xval": 0.44140625, "num_input_tokens_seen": 360730156, "step": 5753 }, { "epoch": 19.148086522462563, "grad_norm": 10.493368148803711, "learning_rate": 5e-06, "loss": 0.4175, "num_input_tokens_seen": 360793536, "step": 5754 }, { "epoch": 19.148086522462563, "loss": 0.32365238666534424, "loss_ce": 1.3477936590788886e-05, "loss_iou": 0.1376953125, "loss_num": 0.009765625, "loss_xval": 0.32421875, "num_input_tokens_seen": 360793536, "step": 5754 }, { "epoch": 19.151414309484192, "grad_norm": 16.18376350402832, "learning_rate": 5e-06, "loss": 0.2575, "num_input_tokens_seen": 360855784, "step": 5755 }, { "epoch": 19.151414309484192, "loss": 0.2692881226539612, "loss_ce": 9.971977306122426e-07, "loss_iou": 0.091796875, "loss_num": 0.01708984375, "loss_xval": 0.26953125, "num_input_tokens_seen": 360855784, "step": 5755 }, { "epoch": 19.154742096505824, "grad_norm": 15.553600311279297, "learning_rate": 5e-06, "loss": 0.3984, "num_input_tokens_seen": 360919496, "step": 5756 }, { "epoch": 19.154742096505824, "loss": 0.5201568603515625, "loss_ce": 1.5281715604942292e-05, "loss_iou": 0.2236328125, "loss_num": 0.01458740234375, "loss_xval": 0.51953125, "num_input_tokens_seen": 360919496, "step": 5756 }, { "epoch": 19.158069883527453, "grad_norm": 18.75748634338379, "learning_rate": 5e-06, "loss": 0.3254, "num_input_tokens_seen": 360982836, "step": 5757 }, { "epoch": 19.158069883527453, "loss": 0.4099748432636261, "loss_ce": 1.71064516507613e-06, "loss_iou": 0.1748046875, "loss_num": 0.01220703125, "loss_xval": 0.41015625, "num_input_tokens_seen": 360982836, "step": 5757 }, { "epoch": 19.161397670549086, "grad_norm": 16.975292205810547, "learning_rate": 5e-06, "loss": 0.4097, "num_input_tokens_seen": 361046452, "step": 5758 }, { "epoch": 19.161397670549086, "loss": 0.32765626907348633, "loss_ce": 4.285811428417219e-06, "loss_iou": 0.138671875, "loss_num": 0.010009765625, "loss_xval": 0.328125, "num_input_tokens_seen": 361046452, "step": 5758 }, { "epoch": 19.164725457570714, "grad_norm": 6.522472381591797, "learning_rate": 5e-06, "loss": 0.237, "num_input_tokens_seen": 361109684, "step": 5759 }, { "epoch": 19.164725457570714, "loss": 0.18191561102867126, "loss_ce": 4.133732090849662e-06, "loss_iou": 0.07958984375, "loss_num": 0.004547119140625, "loss_xval": 0.181640625, "num_input_tokens_seen": 361109684, "step": 5759 }, { "epoch": 19.168053244592347, "grad_norm": 5.031409740447998, "learning_rate": 5e-06, "loss": 0.2806, "num_input_tokens_seen": 361173140, "step": 5760 }, { "epoch": 19.168053244592347, "loss": 0.3057266175746918, "loss_ce": 1.516313204774633e-06, "loss_iou": 0.126953125, "loss_num": 0.010498046875, "loss_xval": 0.306640625, "num_input_tokens_seen": 361173140, "step": 5760 }, { "epoch": 19.171381031613976, "grad_norm": 10.000347137451172, "learning_rate": 5e-06, "loss": 0.4518, "num_input_tokens_seen": 361235884, "step": 5761 }, { "epoch": 19.171381031613976, "loss": 0.4598432183265686, "loss_ce": 0.00023323006462305784, "loss_iou": 0.193359375, "loss_num": 0.0145263671875, "loss_xval": 0.458984375, "num_input_tokens_seen": 361235884, "step": 5761 }, { "epoch": 19.174708818635608, "grad_norm": 14.432680130004883, "learning_rate": 5e-06, "loss": 0.3894, "num_input_tokens_seen": 361298820, "step": 5762 }, { "epoch": 19.174708818635608, "loss": 0.5937849879264832, "loss_ce": 4.470577096071793e-06, "loss_iou": 0.240234375, "loss_num": 0.0225830078125, "loss_xval": 0.59375, "num_input_tokens_seen": 361298820, "step": 5762 }, { "epoch": 19.178036605657237, "grad_norm": 15.46665096282959, "learning_rate": 5e-06, "loss": 0.295, "num_input_tokens_seen": 361360368, "step": 5763 }, { "epoch": 19.178036605657237, "loss": 0.351278692483902, "loss_ce": 2.1360041500884108e-05, "loss_iou": 0.1357421875, "loss_num": 0.0159912109375, "loss_xval": 0.3515625, "num_input_tokens_seen": 361360368, "step": 5763 }, { "epoch": 19.18136439267887, "grad_norm": 13.876596450805664, "learning_rate": 5e-06, "loss": 0.5157, "num_input_tokens_seen": 361423776, "step": 5764 }, { "epoch": 19.18136439267887, "loss": 0.35550805926322937, "loss_ce": 1.159694988928095e-06, "loss_iou": 0.1435546875, "loss_num": 0.01373291015625, "loss_xval": 0.35546875, "num_input_tokens_seen": 361423776, "step": 5764 }, { "epoch": 19.184692179700498, "grad_norm": 12.173171043395996, "learning_rate": 5e-06, "loss": 0.4815, "num_input_tokens_seen": 361486472, "step": 5765 }, { "epoch": 19.184692179700498, "loss": 0.6153147220611572, "loss_ce": 0.0008127574110403657, "loss_iou": 0.26171875, "loss_num": 0.01806640625, "loss_xval": 0.61328125, "num_input_tokens_seen": 361486472, "step": 5765 }, { "epoch": 19.18801996672213, "grad_norm": 19.504899978637695, "learning_rate": 5e-06, "loss": 0.4287, "num_input_tokens_seen": 361548728, "step": 5766 }, { "epoch": 19.18801996672213, "loss": 0.2890220284461975, "loss_ce": 2.0580690033966675e-05, "loss_iou": 0.103515625, "loss_num": 0.016357421875, "loss_xval": 0.2890625, "num_input_tokens_seen": 361548728, "step": 5766 }, { "epoch": 19.19134775374376, "grad_norm": 25.68254280090332, "learning_rate": 5e-06, "loss": 0.4904, "num_input_tokens_seen": 361611404, "step": 5767 }, { "epoch": 19.19134775374376, "loss": 0.48536109924316406, "loss_ce": 0.00040628673741593957, "loss_iou": 0.2021484375, "loss_num": 0.01611328125, "loss_xval": 0.484375, "num_input_tokens_seen": 361611404, "step": 5767 }, { "epoch": 19.19467554076539, "grad_norm": 24.441654205322266, "learning_rate": 5e-06, "loss": 0.4797, "num_input_tokens_seen": 361676044, "step": 5768 }, { "epoch": 19.19467554076539, "loss": 0.48322874307632446, "loss_ce": 0.0008678835583850741, "loss_iou": 0.19921875, "loss_num": 0.0167236328125, "loss_xval": 0.482421875, "num_input_tokens_seen": 361676044, "step": 5768 }, { "epoch": 19.19800332778702, "grad_norm": 17.238096237182617, "learning_rate": 5e-06, "loss": 0.5038, "num_input_tokens_seen": 361737800, "step": 5769 }, { "epoch": 19.19800332778702, "loss": 0.5908232927322388, "loss_ce": 3.0065127702982863e-06, "loss_iou": 0.251953125, "loss_num": 0.0177001953125, "loss_xval": 0.58984375, "num_input_tokens_seen": 361737800, "step": 5769 }, { "epoch": 19.201331114808653, "grad_norm": 22.2685546875, "learning_rate": 5e-06, "loss": 0.3305, "num_input_tokens_seen": 361799040, "step": 5770 }, { "epoch": 19.201331114808653, "loss": 0.2284427285194397, "loss_ce": 3.4035922453767853e-06, "loss_iou": 0.06494140625, "loss_num": 0.019775390625, "loss_xval": 0.228515625, "num_input_tokens_seen": 361799040, "step": 5770 }, { "epoch": 19.204658901830282, "grad_norm": 27.80194664001465, "learning_rate": 5e-06, "loss": 0.3956, "num_input_tokens_seen": 361863312, "step": 5771 }, { "epoch": 19.204658901830282, "loss": 0.2998672127723694, "loss_ce": 1.47842729347758e-06, "loss_iou": 0.1357421875, "loss_num": 0.005462646484375, "loss_xval": 0.30078125, "num_input_tokens_seen": 361863312, "step": 5771 }, { "epoch": 19.207986688851914, "grad_norm": 28.411392211914062, "learning_rate": 5e-06, "loss": 0.3338, "num_input_tokens_seen": 361926044, "step": 5772 }, { "epoch": 19.207986688851914, "loss": 0.41784799098968506, "loss_ce": 1.2785471881215926e-06, "loss_iou": 0.1845703125, "loss_num": 0.00958251953125, "loss_xval": 0.41796875, "num_input_tokens_seen": 361926044, "step": 5772 }, { "epoch": 19.211314475873543, "grad_norm": 29.028545379638672, "learning_rate": 5e-06, "loss": 0.3134, "num_input_tokens_seen": 361988816, "step": 5773 }, { "epoch": 19.211314475873543, "loss": 0.3418128788471222, "loss_ce": 1.6012454580049962e-05, "loss_iou": 0.1376953125, "loss_num": 0.0130615234375, "loss_xval": 0.341796875, "num_input_tokens_seen": 361988816, "step": 5773 }, { "epoch": 19.214642262895175, "grad_norm": 8.79090404510498, "learning_rate": 5e-06, "loss": 0.2713, "num_input_tokens_seen": 362049628, "step": 5774 }, { "epoch": 19.214642262895175, "loss": 0.17331472039222717, "loss_ce": 5.379692993301433e-06, "loss_iou": 0.06591796875, "loss_num": 0.00830078125, "loss_xval": 0.1728515625, "num_input_tokens_seen": 362049628, "step": 5774 }, { "epoch": 19.217970049916804, "grad_norm": 18.81977081298828, "learning_rate": 5e-06, "loss": 0.1808, "num_input_tokens_seen": 362111884, "step": 5775 }, { "epoch": 19.217970049916804, "loss": 0.17349669337272644, "loss_ce": 4.260384685039753e-06, "loss_iou": 0.038330078125, "loss_num": 0.0194091796875, "loss_xval": 0.173828125, "num_input_tokens_seen": 362111884, "step": 5775 }, { "epoch": 19.221297836938437, "grad_norm": 23.984527587890625, "learning_rate": 5e-06, "loss": 0.3051, "num_input_tokens_seen": 362174868, "step": 5776 }, { "epoch": 19.221297836938437, "loss": 0.2823498547077179, "loss_ce": 1.2175714800832793e-06, "loss_iou": 0.10546875, "loss_num": 0.01434326171875, "loss_xval": 0.283203125, "num_input_tokens_seen": 362174868, "step": 5776 }, { "epoch": 19.224625623960065, "grad_norm": 25.726747512817383, "learning_rate": 5e-06, "loss": 0.294, "num_input_tokens_seen": 362237116, "step": 5777 }, { "epoch": 19.224625623960065, "loss": 0.3466828465461731, "loss_ce": 3.167328941344749e-06, "loss_iou": 0.14453125, "loss_num": 0.0115966796875, "loss_xval": 0.34765625, "num_input_tokens_seen": 362237116, "step": 5777 }, { "epoch": 19.227953410981698, "grad_norm": 24.43417739868164, "learning_rate": 5e-06, "loss": 0.2877, "num_input_tokens_seen": 362299220, "step": 5778 }, { "epoch": 19.227953410981698, "loss": 0.24722570180892944, "loss_ce": 2.7876287731487537e-06, "loss_iou": 0.10693359375, "loss_num": 0.006683349609375, "loss_xval": 0.2470703125, "num_input_tokens_seen": 362299220, "step": 5778 }, { "epoch": 19.231281198003327, "grad_norm": 17.497312545776367, "learning_rate": 5e-06, "loss": 0.2294, "num_input_tokens_seen": 362361960, "step": 5779 }, { "epoch": 19.231281198003327, "loss": 0.17807112634181976, "loss_ce": 1.050394530466292e-06, "loss_iou": 0.07275390625, "loss_num": 0.00653076171875, "loss_xval": 0.177734375, "num_input_tokens_seen": 362361960, "step": 5779 }, { "epoch": 19.23460898502496, "grad_norm": 20.1038761138916, "learning_rate": 5e-06, "loss": 0.4383, "num_input_tokens_seen": 362425436, "step": 5780 }, { "epoch": 19.23460898502496, "loss": 0.4912375807762146, "loss_ce": 2.66711267613573e-05, "loss_iou": 0.171875, "loss_num": 0.029296875, "loss_xval": 0.4921875, "num_input_tokens_seen": 362425436, "step": 5780 }, { "epoch": 19.237936772046588, "grad_norm": 8.994041442871094, "learning_rate": 5e-06, "loss": 0.3062, "num_input_tokens_seen": 362487348, "step": 5781 }, { "epoch": 19.237936772046588, "loss": 0.2640451490879059, "loss_ce": 7.057632046780782e-06, "loss_iou": 0.10595703125, "loss_num": 0.010498046875, "loss_xval": 0.263671875, "num_input_tokens_seen": 362487348, "step": 5781 }, { "epoch": 19.24126455906822, "grad_norm": 9.459311485290527, "learning_rate": 5e-06, "loss": 0.2886, "num_input_tokens_seen": 362550676, "step": 5782 }, { "epoch": 19.24126455906822, "loss": 0.25982752442359924, "loss_ce": 8.633951438241638e-07, "loss_iou": 0.0947265625, "loss_num": 0.01416015625, "loss_xval": 0.259765625, "num_input_tokens_seen": 362550676, "step": 5782 }, { "epoch": 19.24459234608985, "grad_norm": 19.533588409423828, "learning_rate": 5e-06, "loss": 0.3418, "num_input_tokens_seen": 362614232, "step": 5783 }, { "epoch": 19.24459234608985, "loss": 0.4127580523490906, "loss_ce": 3.8333924749167636e-05, "loss_iou": 0.1767578125, "loss_num": 0.01177978515625, "loss_xval": 0.412109375, "num_input_tokens_seen": 362614232, "step": 5783 }, { "epoch": 19.24792013311148, "grad_norm": 14.776411056518555, "learning_rate": 5e-06, "loss": 0.4009, "num_input_tokens_seen": 362676744, "step": 5784 }, { "epoch": 19.24792013311148, "loss": 0.5278033018112183, "loss_ce": 1.7722150005283765e-06, "loss_iou": 0.2021484375, "loss_num": 0.024658203125, "loss_xval": 0.52734375, "num_input_tokens_seen": 362676744, "step": 5784 }, { "epoch": 19.25124792013311, "grad_norm": 9.385489463806152, "learning_rate": 5e-06, "loss": 0.4097, "num_input_tokens_seen": 362739772, "step": 5785 }, { "epoch": 19.25124792013311, "loss": 0.2899426221847534, "loss_ce": 0.0010021983180195093, "loss_iou": 0.1083984375, "loss_num": 0.01446533203125, "loss_xval": 0.2890625, "num_input_tokens_seen": 362739772, "step": 5785 }, { "epoch": 19.254575707154743, "grad_norm": 11.433369636535645, "learning_rate": 5e-06, "loss": 0.2144, "num_input_tokens_seen": 362801436, "step": 5786 }, { "epoch": 19.254575707154743, "loss": 0.23974663019180298, "loss_ce": 5.255205905996263e-07, "loss_iou": 0.07666015625, "loss_num": 0.017333984375, "loss_xval": 0.240234375, "num_input_tokens_seen": 362801436, "step": 5786 }, { "epoch": 19.25790349417637, "grad_norm": 8.838174819946289, "learning_rate": 5e-06, "loss": 0.1891, "num_input_tokens_seen": 362862524, "step": 5787 }, { "epoch": 19.25790349417637, "loss": 0.19282004237174988, "loss_ce": 9.971090548788197e-06, "loss_iou": 0.0625, "loss_num": 0.01361083984375, "loss_xval": 0.1923828125, "num_input_tokens_seen": 362862524, "step": 5787 }, { "epoch": 19.261231281198004, "grad_norm": 18.493589401245117, "learning_rate": 5e-06, "loss": 0.3218, "num_input_tokens_seen": 362925492, "step": 5788 }, { "epoch": 19.261231281198004, "loss": 0.16806253790855408, "loss_ce": 2.2379613255907316e-06, "loss_iou": 0.034423828125, "loss_num": 0.019775390625, "loss_xval": 0.16796875, "num_input_tokens_seen": 362925492, "step": 5788 }, { "epoch": 19.264559068219633, "grad_norm": 5.976156711578369, "learning_rate": 5e-06, "loss": 0.3901, "num_input_tokens_seen": 362987516, "step": 5789 }, { "epoch": 19.264559068219633, "loss": 0.5645766854286194, "loss_ce": 1.485164716541476e-06, "loss_iou": 0.1494140625, "loss_num": 0.05322265625, "loss_xval": 0.56640625, "num_input_tokens_seen": 362987516, "step": 5789 }, { "epoch": 19.267886855241265, "grad_norm": 24.88056755065918, "learning_rate": 5e-06, "loss": 0.4913, "num_input_tokens_seen": 363051676, "step": 5790 }, { "epoch": 19.267886855241265, "loss": 0.6990680694580078, "loss_ce": 1.9670126221171813e-06, "loss_iou": 0.25, "loss_num": 0.039306640625, "loss_xval": 0.69921875, "num_input_tokens_seen": 363051676, "step": 5790 }, { "epoch": 19.271214642262894, "grad_norm": 12.52092456817627, "learning_rate": 5e-06, "loss": 0.2948, "num_input_tokens_seen": 363114416, "step": 5791 }, { "epoch": 19.271214642262894, "loss": 0.2552510201931, "loss_ce": 2.0007762486784486e-06, "loss_iou": 0.091796875, "loss_num": 0.0142822265625, "loss_xval": 0.255859375, "num_input_tokens_seen": 363114416, "step": 5791 }, { "epoch": 19.274542429284526, "grad_norm": 16.837209701538086, "learning_rate": 5e-06, "loss": 0.4284, "num_input_tokens_seen": 363178388, "step": 5792 }, { "epoch": 19.274542429284526, "loss": 0.41943779587745667, "loss_ce": 4.171481577941449e-06, "loss_iou": 0.1806640625, "loss_num": 0.01177978515625, "loss_xval": 0.419921875, "num_input_tokens_seen": 363178388, "step": 5792 }, { "epoch": 19.277870216306155, "grad_norm": 15.434374809265137, "learning_rate": 5e-06, "loss": 0.2523, "num_input_tokens_seen": 363242092, "step": 5793 }, { "epoch": 19.277870216306155, "loss": 0.22479704022407532, "loss_ce": 6.559790199389681e-05, "loss_iou": 0.08642578125, "loss_num": 0.0103759765625, "loss_xval": 0.224609375, "num_input_tokens_seen": 363242092, "step": 5793 }, { "epoch": 19.281198003327788, "grad_norm": 11.28855037689209, "learning_rate": 5e-06, "loss": 0.3088, "num_input_tokens_seen": 363305684, "step": 5794 }, { "epoch": 19.281198003327788, "loss": 0.4081139862537384, "loss_ce": 6.346203008433804e-05, "loss_iou": 0.154296875, "loss_num": 0.019775390625, "loss_xval": 0.408203125, "num_input_tokens_seen": 363305684, "step": 5794 }, { "epoch": 19.284525790349416, "grad_norm": 12.182991981506348, "learning_rate": 5e-06, "loss": 0.4156, "num_input_tokens_seen": 363368640, "step": 5795 }, { "epoch": 19.284525790349416, "loss": 0.47864019870758057, "loss_ce": 2.491826990080881e-06, "loss_iou": 0.185546875, "loss_num": 0.021484375, "loss_xval": 0.478515625, "num_input_tokens_seen": 363368640, "step": 5795 }, { "epoch": 19.28785357737105, "grad_norm": 9.739754676818848, "learning_rate": 5e-06, "loss": 0.3977, "num_input_tokens_seen": 363430832, "step": 5796 }, { "epoch": 19.28785357737105, "loss": 0.6247574090957642, "loss_ce": 1.524709546174563e-06, "loss_iou": 0.25390625, "loss_num": 0.0235595703125, "loss_xval": 0.625, "num_input_tokens_seen": 363430832, "step": 5796 }, { "epoch": 19.291181364392678, "grad_norm": 10.732812881469727, "learning_rate": 5e-06, "loss": 0.2386, "num_input_tokens_seen": 363491632, "step": 5797 }, { "epoch": 19.291181364392678, "loss": 0.20070713758468628, "loss_ce": 6.412780066966661e-07, "loss_iou": 0.07763671875, "loss_num": 0.00909423828125, "loss_xval": 0.201171875, "num_input_tokens_seen": 363491632, "step": 5797 }, { "epoch": 19.29450915141431, "grad_norm": 8.897384643554688, "learning_rate": 5e-06, "loss": 0.3037, "num_input_tokens_seen": 363555016, "step": 5798 }, { "epoch": 19.29450915141431, "loss": 0.2972446382045746, "loss_ce": 3.433880010561552e-06, "loss_iou": 0.12255859375, "loss_num": 0.0103759765625, "loss_xval": 0.296875, "num_input_tokens_seen": 363555016, "step": 5798 }, { "epoch": 19.29783693843594, "grad_norm": 27.037349700927734, "learning_rate": 5e-06, "loss": 0.6417, "num_input_tokens_seen": 363618712, "step": 5799 }, { "epoch": 19.29783693843594, "loss": 0.38586801290512085, "loss_ce": 3.775479626710876e-06, "loss_iou": 0.15625, "loss_num": 0.01470947265625, "loss_xval": 0.38671875, "num_input_tokens_seen": 363618712, "step": 5799 }, { "epoch": 19.30116472545757, "grad_norm": 57.920223236083984, "learning_rate": 5e-06, "loss": 0.5762, "num_input_tokens_seen": 363682156, "step": 5800 }, { "epoch": 19.30116472545757, "loss": 0.5510846376419067, "loss_ce": 5.923011485720053e-05, "loss_iou": 0.2265625, "loss_num": 0.0196533203125, "loss_xval": 0.55078125, "num_input_tokens_seen": 363682156, "step": 5800 }, { "epoch": 19.3044925124792, "grad_norm": 33.823158264160156, "learning_rate": 5e-06, "loss": 0.5034, "num_input_tokens_seen": 363744960, "step": 5801 }, { "epoch": 19.3044925124792, "loss": 0.46339812874794006, "loss_ce": 1.9189712475053966e-05, "loss_iou": 0.2109375, "loss_num": 0.00830078125, "loss_xval": 0.462890625, "num_input_tokens_seen": 363744960, "step": 5801 }, { "epoch": 19.307820299500833, "grad_norm": 19.77855682373047, "learning_rate": 5e-06, "loss": 0.3969, "num_input_tokens_seen": 363808280, "step": 5802 }, { "epoch": 19.307820299500833, "loss": 0.32118022441864014, "loss_ce": 9.427475561096799e-06, "loss_iou": 0.12890625, "loss_num": 0.01251220703125, "loss_xval": 0.3203125, "num_input_tokens_seen": 363808280, "step": 5802 }, { "epoch": 19.31114808652246, "grad_norm": 42.78610610961914, "learning_rate": 5e-06, "loss": 0.5168, "num_input_tokens_seen": 363871608, "step": 5803 }, { "epoch": 19.31114808652246, "loss": 0.6616355180740356, "loss_ce": 1.4415451005334035e-05, "loss_iou": 0.279296875, "loss_num": 0.0203857421875, "loss_xval": 0.66015625, "num_input_tokens_seen": 363871608, "step": 5803 }, { "epoch": 19.314475873544094, "grad_norm": 17.949405670166016, "learning_rate": 5e-06, "loss": 0.3677, "num_input_tokens_seen": 363933868, "step": 5804 }, { "epoch": 19.314475873544094, "loss": 0.21090860664844513, "loss_ce": 1.6276903807010967e-06, "loss_iou": 0.0908203125, "loss_num": 0.005889892578125, "loss_xval": 0.2109375, "num_input_tokens_seen": 363933868, "step": 5804 }, { "epoch": 19.317803660565723, "grad_norm": 13.406156539916992, "learning_rate": 5e-06, "loss": 0.3896, "num_input_tokens_seen": 363995632, "step": 5805 }, { "epoch": 19.317803660565723, "loss": 0.4682052731513977, "loss_ce": 4.547987373371143e-06, "loss_iou": 0.1708984375, "loss_num": 0.0255126953125, "loss_xval": 0.46875, "num_input_tokens_seen": 363995632, "step": 5805 }, { "epoch": 19.321131447587355, "grad_norm": 10.47157096862793, "learning_rate": 5e-06, "loss": 0.4019, "num_input_tokens_seen": 364059772, "step": 5806 }, { "epoch": 19.321131447587355, "loss": 0.4822404384613037, "loss_ce": 1.6868352759047411e-06, "loss_iou": 0.19921875, "loss_num": 0.0166015625, "loss_xval": 0.482421875, "num_input_tokens_seen": 364059772, "step": 5806 }, { "epoch": 19.324459234608984, "grad_norm": 16.554712295532227, "learning_rate": 5e-06, "loss": 0.2959, "num_input_tokens_seen": 364123000, "step": 5807 }, { "epoch": 19.324459234608984, "loss": 0.3092105984687805, "loss_ce": 6.503700205939822e-06, "loss_iou": 0.12353515625, "loss_num": 0.012451171875, "loss_xval": 0.30859375, "num_input_tokens_seen": 364123000, "step": 5807 }, { "epoch": 19.327787021630616, "grad_norm": 33.7567253112793, "learning_rate": 5e-06, "loss": 0.626, "num_input_tokens_seen": 364187364, "step": 5808 }, { "epoch": 19.327787021630616, "loss": 0.5668984651565552, "loss_ce": 3.933971129299607e-06, "loss_iou": 0.2421875, "loss_num": 0.0164794921875, "loss_xval": 0.56640625, "num_input_tokens_seen": 364187364, "step": 5808 }, { "epoch": 19.331114808652245, "grad_norm": 48.38916015625, "learning_rate": 5e-06, "loss": 0.4464, "num_input_tokens_seen": 364251280, "step": 5809 }, { "epoch": 19.331114808652245, "loss": 0.4138216972351074, "loss_ce": 3.3159178656205768e-06, "loss_iou": 0.1640625, "loss_num": 0.0169677734375, "loss_xval": 0.4140625, "num_input_tokens_seen": 364251280, "step": 5809 }, { "epoch": 19.334442595673877, "grad_norm": 27.91644859313965, "learning_rate": 5e-06, "loss": 0.5203, "num_input_tokens_seen": 364313236, "step": 5810 }, { "epoch": 19.334442595673877, "loss": 0.5505406856536865, "loss_ce": 3.5915581975132227e-06, "loss_iou": 0.216796875, "loss_num": 0.0233154296875, "loss_xval": 0.55078125, "num_input_tokens_seen": 364313236, "step": 5810 }, { "epoch": 19.337770382695506, "grad_norm": 8.700141906738281, "learning_rate": 5e-06, "loss": 0.3987, "num_input_tokens_seen": 364377136, "step": 5811 }, { "epoch": 19.337770382695506, "loss": 0.5230450630187988, "loss_ce": 4.255221483617788e-06, "loss_iou": 0.2001953125, "loss_num": 0.024658203125, "loss_xval": 0.5234375, "num_input_tokens_seen": 364377136, "step": 5811 }, { "epoch": 19.34109816971714, "grad_norm": 11.115525245666504, "learning_rate": 5e-06, "loss": 0.3253, "num_input_tokens_seen": 364438756, "step": 5812 }, { "epoch": 19.34109816971714, "loss": 0.3786017596721649, "loss_ce": 6.946443136257585e-07, "loss_iou": 0.1494140625, "loss_num": 0.0157470703125, "loss_xval": 0.37890625, "num_input_tokens_seen": 364438756, "step": 5812 }, { "epoch": 19.344425956738768, "grad_norm": 14.47199821472168, "learning_rate": 5e-06, "loss": 0.4202, "num_input_tokens_seen": 364501608, "step": 5813 }, { "epoch": 19.344425956738768, "loss": 0.5194466710090637, "loss_ce": 6.966342880332377e-06, "loss_iou": 0.2265625, "loss_num": 0.01336669921875, "loss_xval": 0.51953125, "num_input_tokens_seen": 364501608, "step": 5813 }, { "epoch": 19.3477537437604, "grad_norm": 31.748388290405273, "learning_rate": 5e-06, "loss": 0.4153, "num_input_tokens_seen": 364564372, "step": 5814 }, { "epoch": 19.3477537437604, "loss": 0.3900330066680908, "loss_ce": 1.8342781913815998e-05, "loss_iou": 0.1572265625, "loss_num": 0.01507568359375, "loss_xval": 0.390625, "num_input_tokens_seen": 364564372, "step": 5814 }, { "epoch": 19.35108153078203, "grad_norm": 29.233572006225586, "learning_rate": 5e-06, "loss": 0.558, "num_input_tokens_seen": 364628088, "step": 5815 }, { "epoch": 19.35108153078203, "loss": 0.621483564376831, "loss_ce": 2.3564429284306243e-05, "loss_iou": 0.26953125, "loss_num": 0.016357421875, "loss_xval": 0.62109375, "num_input_tokens_seen": 364628088, "step": 5815 }, { "epoch": 19.35440931780366, "grad_norm": 9.622817039489746, "learning_rate": 5e-06, "loss": 0.4464, "num_input_tokens_seen": 364691932, "step": 5816 }, { "epoch": 19.35440931780366, "loss": 0.532842755317688, "loss_ce": 5.873728241567733e-06, "loss_iou": 0.2060546875, "loss_num": 0.0242919921875, "loss_xval": 0.53125, "num_input_tokens_seen": 364691932, "step": 5816 }, { "epoch": 19.35773710482529, "grad_norm": 8.217416763305664, "learning_rate": 5e-06, "loss": 0.3256, "num_input_tokens_seen": 364754440, "step": 5817 }, { "epoch": 19.35773710482529, "loss": 0.3154299855232239, "loss_ce": 3.1149539836405893e-07, "loss_iou": 0.09912109375, "loss_num": 0.0233154296875, "loss_xval": 0.31640625, "num_input_tokens_seen": 364754440, "step": 5817 }, { "epoch": 19.361064891846922, "grad_norm": 6.558896064758301, "learning_rate": 5e-06, "loss": 0.3882, "num_input_tokens_seen": 364818280, "step": 5818 }, { "epoch": 19.361064891846922, "loss": 0.36442190408706665, "loss_ce": 1.147581497207284e-05, "loss_iou": 0.1474609375, "loss_num": 0.01397705078125, "loss_xval": 0.365234375, "num_input_tokens_seen": 364818280, "step": 5818 }, { "epoch": 19.36439267886855, "grad_norm": 5.84474515914917, "learning_rate": 5e-06, "loss": 0.3485, "num_input_tokens_seen": 364879828, "step": 5819 }, { "epoch": 19.36439267886855, "loss": 0.4164433181285858, "loss_ce": 4.4786725084122736e-07, "loss_iou": 0.16015625, "loss_num": 0.019287109375, "loss_xval": 0.416015625, "num_input_tokens_seen": 364879828, "step": 5819 }, { "epoch": 19.367720465890184, "grad_norm": 12.576366424560547, "learning_rate": 5e-06, "loss": 0.2866, "num_input_tokens_seen": 364943344, "step": 5820 }, { "epoch": 19.367720465890184, "loss": 0.3364601731300354, "loss_ce": 3.4390399378025904e-05, "loss_iou": 0.125, "loss_num": 0.0172119140625, "loss_xval": 0.3359375, "num_input_tokens_seen": 364943344, "step": 5820 }, { "epoch": 19.371048252911812, "grad_norm": 14.606855392456055, "learning_rate": 5e-06, "loss": 0.2785, "num_input_tokens_seen": 365004516, "step": 5821 }, { "epoch": 19.371048252911812, "loss": 0.3271403908729553, "loss_ce": 5.2975196012994274e-05, "loss_iou": 0.130859375, "loss_num": 0.012939453125, "loss_xval": 0.326171875, "num_input_tokens_seen": 365004516, "step": 5821 }, { "epoch": 19.374376039933445, "grad_norm": 8.539703369140625, "learning_rate": 5e-06, "loss": 0.2452, "num_input_tokens_seen": 365066632, "step": 5822 }, { "epoch": 19.374376039933445, "loss": 0.2095956802368164, "loss_ce": 9.310685982200084e-07, "loss_iou": 0.0771484375, "loss_num": 0.01104736328125, "loss_xval": 0.2099609375, "num_input_tokens_seen": 365066632, "step": 5822 }, { "epoch": 19.377703826955074, "grad_norm": 12.823468208312988, "learning_rate": 5e-06, "loss": 0.4626, "num_input_tokens_seen": 365130500, "step": 5823 }, { "epoch": 19.377703826955074, "loss": 0.5097084045410156, "loss_ce": 3.811721398960799e-06, "loss_iou": 0.205078125, "loss_num": 0.0201416015625, "loss_xval": 0.5078125, "num_input_tokens_seen": 365130500, "step": 5823 }, { "epoch": 19.381031613976706, "grad_norm": 20.121313095092773, "learning_rate": 5e-06, "loss": 0.3856, "num_input_tokens_seen": 365194504, "step": 5824 }, { "epoch": 19.381031613976706, "loss": 0.44836342334747314, "loss_ce": 6.0199621657375246e-05, "loss_iou": 0.1865234375, "loss_num": 0.01513671875, "loss_xval": 0.44921875, "num_input_tokens_seen": 365194504, "step": 5824 }, { "epoch": 19.384359400998335, "grad_norm": 17.09537124633789, "learning_rate": 5e-06, "loss": 0.3464, "num_input_tokens_seen": 365256264, "step": 5825 }, { "epoch": 19.384359400998335, "loss": 0.3011949062347412, "loss_ce": 1.685391680439352e-06, "loss_iou": 0.1171875, "loss_num": 0.0133056640625, "loss_xval": 0.30078125, "num_input_tokens_seen": 365256264, "step": 5825 }, { "epoch": 19.387687188019967, "grad_norm": 7.190653324127197, "learning_rate": 5e-06, "loss": 0.3137, "num_input_tokens_seen": 365319708, "step": 5826 }, { "epoch": 19.387687188019967, "loss": 0.27582529187202454, "loss_ce": 7.433928203681717e-06, "loss_iou": 0.103515625, "loss_num": 0.01373291015625, "loss_xval": 0.275390625, "num_input_tokens_seen": 365319708, "step": 5826 }, { "epoch": 19.391014975041596, "grad_norm": 8.297785758972168, "learning_rate": 5e-06, "loss": 0.3236, "num_input_tokens_seen": 365382884, "step": 5827 }, { "epoch": 19.391014975041596, "loss": 0.37164658308029175, "loss_ce": 3.5200159800297115e-06, "loss_iou": 0.1513671875, "loss_num": 0.01385498046875, "loss_xval": 0.37109375, "num_input_tokens_seen": 365382884, "step": 5827 }, { "epoch": 19.39434276206323, "grad_norm": 7.663806438446045, "learning_rate": 5e-06, "loss": 0.4806, "num_input_tokens_seen": 365447340, "step": 5828 }, { "epoch": 19.39434276206323, "loss": 0.446184903383255, "loss_ce": 2.6425427677168045e-06, "loss_iou": 0.201171875, "loss_num": 0.00860595703125, "loss_xval": 0.4453125, "num_input_tokens_seen": 365447340, "step": 5828 }, { "epoch": 19.397670549084857, "grad_norm": 10.256460189819336, "learning_rate": 5e-06, "loss": 0.3659, "num_input_tokens_seen": 365509128, "step": 5829 }, { "epoch": 19.397670549084857, "loss": 0.39413559436798096, "loss_ce": 1.0658703786248225e-06, "loss_iou": 0.1396484375, "loss_num": 0.0228271484375, "loss_xval": 0.39453125, "num_input_tokens_seen": 365509128, "step": 5829 }, { "epoch": 19.40099833610649, "grad_norm": 16.505489349365234, "learning_rate": 5e-06, "loss": 0.293, "num_input_tokens_seen": 365571516, "step": 5830 }, { "epoch": 19.40099833610649, "loss": 0.26002246141433716, "loss_ce": 1.2705461813311558e-05, "loss_iou": 0.09521484375, "loss_num": 0.013916015625, "loss_xval": 0.259765625, "num_input_tokens_seen": 365571516, "step": 5830 }, { "epoch": 19.40432612312812, "grad_norm": 26.976333618164062, "learning_rate": 5e-06, "loss": 0.3334, "num_input_tokens_seen": 365634680, "step": 5831 }, { "epoch": 19.40432612312812, "loss": 0.2666582465171814, "loss_ce": 5.6691995268920437e-05, "loss_iou": 0.1044921875, "loss_num": 0.01153564453125, "loss_xval": 0.265625, "num_input_tokens_seen": 365634680, "step": 5831 }, { "epoch": 19.40765391014975, "grad_norm": 21.522951126098633, "learning_rate": 5e-06, "loss": 0.2385, "num_input_tokens_seen": 365697724, "step": 5832 }, { "epoch": 19.40765391014975, "loss": 0.20993775129318237, "loss_ce": 2.2581720259040594e-05, "loss_iou": 0.06787109375, "loss_num": 0.0147705078125, "loss_xval": 0.2099609375, "num_input_tokens_seen": 365697724, "step": 5832 }, { "epoch": 19.41098169717138, "grad_norm": 7.605019569396973, "learning_rate": 5e-06, "loss": 0.444, "num_input_tokens_seen": 365761268, "step": 5833 }, { "epoch": 19.41098169717138, "loss": 0.5917951464653015, "loss_ce": 0.00015089116641320288, "loss_iou": 0.2392578125, "loss_num": 0.02294921875, "loss_xval": 0.58984375, "num_input_tokens_seen": 365761268, "step": 5833 }, { "epoch": 19.414309484193012, "grad_norm": 9.798748016357422, "learning_rate": 5e-06, "loss": 0.5509, "num_input_tokens_seen": 365823144, "step": 5834 }, { "epoch": 19.414309484193012, "loss": 0.713309109210968, "loss_ce": 5.224740380072035e-05, "loss_iou": 0.298828125, "loss_num": 0.0235595703125, "loss_xval": 0.71484375, "num_input_tokens_seen": 365823144, "step": 5834 }, { "epoch": 19.41763727121464, "grad_norm": 9.390779495239258, "learning_rate": 5e-06, "loss": 0.3899, "num_input_tokens_seen": 365884312, "step": 5835 }, { "epoch": 19.41763727121464, "loss": 0.28592628240585327, "loss_ce": 7.099469257809687e-06, "loss_iou": 0.111328125, "loss_num": 0.0125732421875, "loss_xval": 0.28515625, "num_input_tokens_seen": 365884312, "step": 5835 }, { "epoch": 19.420965058236273, "grad_norm": 15.000127792358398, "learning_rate": 5e-06, "loss": 0.2995, "num_input_tokens_seen": 365948032, "step": 5836 }, { "epoch": 19.420965058236273, "loss": 0.23999162018299103, "loss_ce": 1.38097198032483e-06, "loss_iou": 0.10546875, "loss_num": 0.005767822265625, "loss_xval": 0.240234375, "num_input_tokens_seen": 365948032, "step": 5836 }, { "epoch": 19.424292845257902, "grad_norm": 16.655473709106445, "learning_rate": 5e-06, "loss": 0.3899, "num_input_tokens_seen": 366010324, "step": 5837 }, { "epoch": 19.424292845257902, "loss": 0.5630550384521484, "loss_ce": 5.7001270761247724e-06, "loss_iou": 0.2392578125, "loss_num": 0.0172119140625, "loss_xval": 0.5625, "num_input_tokens_seen": 366010324, "step": 5837 }, { "epoch": 19.427620632279535, "grad_norm": 17.025135040283203, "learning_rate": 5e-06, "loss": 0.6048, "num_input_tokens_seen": 366074300, "step": 5838 }, { "epoch": 19.427620632279535, "loss": 0.5779571533203125, "loss_ce": 1.5255169273586944e-05, "loss_iou": 0.2158203125, "loss_num": 0.029052734375, "loss_xval": 0.578125, "num_input_tokens_seen": 366074300, "step": 5838 }, { "epoch": 19.430948419301163, "grad_norm": 19.668804168701172, "learning_rate": 5e-06, "loss": 0.4646, "num_input_tokens_seen": 366136940, "step": 5839 }, { "epoch": 19.430948419301163, "loss": 0.5345484018325806, "loss_ce": 2.5570138859620783e-06, "loss_iou": 0.232421875, "loss_num": 0.01416015625, "loss_xval": 0.53515625, "num_input_tokens_seen": 366136940, "step": 5839 }, { "epoch": 19.434276206322796, "grad_norm": 12.102884292602539, "learning_rate": 5e-06, "loss": 0.5773, "num_input_tokens_seen": 366200564, "step": 5840 }, { "epoch": 19.434276206322796, "loss": 0.33008265495300293, "loss_ce": 4.529886609816458e-06, "loss_iou": 0.111328125, "loss_num": 0.0216064453125, "loss_xval": 0.330078125, "num_input_tokens_seen": 366200564, "step": 5840 }, { "epoch": 19.437603993344425, "grad_norm": 29.419416427612305, "learning_rate": 5e-06, "loss": 0.3171, "num_input_tokens_seen": 366263812, "step": 5841 }, { "epoch": 19.437603993344425, "loss": 0.29646703600883484, "loss_ce": 1.9299004634376615e-05, "loss_iou": 0.134765625, "loss_num": 0.005157470703125, "loss_xval": 0.296875, "num_input_tokens_seen": 366263812, "step": 5841 }, { "epoch": 19.440931780366057, "grad_norm": 22.4404239654541, "learning_rate": 5e-06, "loss": 0.4, "num_input_tokens_seen": 366326936, "step": 5842 }, { "epoch": 19.440931780366057, "loss": 0.3291936218738556, "loss_ce": 5.013928898733866e-07, "loss_iou": 0.138671875, "loss_num": 0.01031494140625, "loss_xval": 0.330078125, "num_input_tokens_seen": 366326936, "step": 5842 }, { "epoch": 19.444259567387686, "grad_norm": 27.681734085083008, "learning_rate": 5e-06, "loss": 0.3098, "num_input_tokens_seen": 366389872, "step": 5843 }, { "epoch": 19.444259567387686, "loss": 0.26135411858558655, "loss_ce": 1.5681861214034143e-06, "loss_iou": 0.1123046875, "loss_num": 0.007293701171875, "loss_xval": 0.26171875, "num_input_tokens_seen": 366389872, "step": 5843 }, { "epoch": 19.44758735440932, "grad_norm": 41.293724060058594, "learning_rate": 5e-06, "loss": 0.7013, "num_input_tokens_seen": 366453956, "step": 5844 }, { "epoch": 19.44758735440932, "loss": 0.7082908153533936, "loss_ce": 3.887120692525059e-05, "loss_iou": 0.287109375, "loss_num": 0.027099609375, "loss_xval": 0.70703125, "num_input_tokens_seen": 366453956, "step": 5844 }, { "epoch": 19.450915141430947, "grad_norm": 31.500871658325195, "learning_rate": 5e-06, "loss": 0.3174, "num_input_tokens_seen": 366517012, "step": 5845 }, { "epoch": 19.450915141430947, "loss": 0.2765207886695862, "loss_ce": 1.0100148983838153e-06, "loss_iou": 0.08740234375, "loss_num": 0.0203857421875, "loss_xval": 0.27734375, "num_input_tokens_seen": 366517012, "step": 5845 }, { "epoch": 19.45424292845258, "grad_norm": 30.547277450561523, "learning_rate": 5e-06, "loss": 0.3455, "num_input_tokens_seen": 366579260, "step": 5846 }, { "epoch": 19.45424292845258, "loss": 0.2156999558210373, "loss_ce": 1.701734731796023e-06, "loss_iou": 0.0830078125, "loss_num": 0.00982666015625, "loss_xval": 0.2158203125, "num_input_tokens_seen": 366579260, "step": 5846 }, { "epoch": 19.45757071547421, "grad_norm": 12.524526596069336, "learning_rate": 5e-06, "loss": 0.383, "num_input_tokens_seen": 366641556, "step": 5847 }, { "epoch": 19.45757071547421, "loss": 0.516453742980957, "loss_ce": 4.753105713461991e-06, "loss_iou": 0.1796875, "loss_num": 0.031494140625, "loss_xval": 0.515625, "num_input_tokens_seen": 366641556, "step": 5847 }, { "epoch": 19.46089850249584, "grad_norm": 6.129978656768799, "learning_rate": 5e-06, "loss": 0.2657, "num_input_tokens_seen": 366704200, "step": 5848 }, { "epoch": 19.46089850249584, "loss": 0.3480243384838104, "loss_ce": 1.8688956515688915e-06, "loss_iou": 0.13671875, "loss_num": 0.01519775390625, "loss_xval": 0.34765625, "num_input_tokens_seen": 366704200, "step": 5848 }, { "epoch": 19.46422628951747, "grad_norm": 16.35210418701172, "learning_rate": 5e-06, "loss": 0.2861, "num_input_tokens_seen": 366766360, "step": 5849 }, { "epoch": 19.46422628951747, "loss": 0.31010663509368896, "loss_ce": 1.7557782484800555e-05, "loss_iou": 0.1328125, "loss_num": 0.00885009765625, "loss_xval": 0.310546875, "num_input_tokens_seen": 366766360, "step": 5849 }, { "epoch": 19.467554076539102, "grad_norm": 21.861595153808594, "learning_rate": 5e-06, "loss": 0.4437, "num_input_tokens_seen": 366827216, "step": 5850 }, { "epoch": 19.467554076539102, "loss": 0.706147313117981, "loss_ce": 1.0032540558313485e-06, "loss_iou": 0.296875, "loss_num": 0.0225830078125, "loss_xval": 0.70703125, "num_input_tokens_seen": 366827216, "step": 5850 }, { "epoch": 19.47088186356073, "grad_norm": 11.112997055053711, "learning_rate": 5e-06, "loss": 0.2962, "num_input_tokens_seen": 366887400, "step": 5851 }, { "epoch": 19.47088186356073, "loss": 0.46234673261642456, "loss_ce": 5.449906893773004e-06, "loss_iou": 0.19140625, "loss_num": 0.015869140625, "loss_xval": 0.462890625, "num_input_tokens_seen": 366887400, "step": 5851 }, { "epoch": 19.474209650582363, "grad_norm": 12.641119003295898, "learning_rate": 5e-06, "loss": 0.4018, "num_input_tokens_seen": 366951072, "step": 5852 }, { "epoch": 19.474209650582363, "loss": 0.5067300796508789, "loss_ce": 9.525571158519597e-07, "loss_iou": 0.2021484375, "loss_num": 0.0201416015625, "loss_xval": 0.5078125, "num_input_tokens_seen": 366951072, "step": 5852 }, { "epoch": 19.477537437603992, "grad_norm": 8.661499977111816, "learning_rate": 5e-06, "loss": 0.3186, "num_input_tokens_seen": 367012864, "step": 5853 }, { "epoch": 19.477537437603992, "loss": 0.23419520258903503, "loss_ce": 3.3159849408548325e-06, "loss_iou": 0.059326171875, "loss_num": 0.0230712890625, "loss_xval": 0.234375, "num_input_tokens_seen": 367012864, "step": 5853 }, { "epoch": 19.480865224625624, "grad_norm": 21.728059768676758, "learning_rate": 5e-06, "loss": 0.5561, "num_input_tokens_seen": 367076012, "step": 5854 }, { "epoch": 19.480865224625624, "loss": 0.22853673994541168, "loss_ce": 2.111804496962577e-05, "loss_iou": 0.091796875, "loss_num": 0.009033203125, "loss_xval": 0.228515625, "num_input_tokens_seen": 367076012, "step": 5854 }, { "epoch": 19.484193011647253, "grad_norm": 23.092449188232422, "learning_rate": 5e-06, "loss": 0.5521, "num_input_tokens_seen": 367139236, "step": 5855 }, { "epoch": 19.484193011647253, "loss": 0.5239290595054626, "loss_ce": 3.2874934277060675e-06, "loss_iou": 0.2236328125, "loss_num": 0.01531982421875, "loss_xval": 0.5234375, "num_input_tokens_seen": 367139236, "step": 5855 }, { "epoch": 19.487520798668886, "grad_norm": 18.63672637939453, "learning_rate": 5e-06, "loss": 0.4187, "num_input_tokens_seen": 367203020, "step": 5856 }, { "epoch": 19.487520798668886, "loss": 0.4250797629356384, "loss_ce": 3.094414569204673e-05, "loss_iou": 0.1748046875, "loss_num": 0.0152587890625, "loss_xval": 0.42578125, "num_input_tokens_seen": 367203020, "step": 5856 }, { "epoch": 19.490848585690514, "grad_norm": 23.819515228271484, "learning_rate": 5e-06, "loss": 0.4858, "num_input_tokens_seen": 367266144, "step": 5857 }, { "epoch": 19.490848585690514, "loss": 0.3919772207736969, "loss_ce": 9.463009519095067e-06, "loss_iou": 0.1640625, "loss_num": 0.01275634765625, "loss_xval": 0.392578125, "num_input_tokens_seen": 367266144, "step": 5857 }, { "epoch": 19.494176372712147, "grad_norm": 10.644865036010742, "learning_rate": 5e-06, "loss": 0.1675, "num_input_tokens_seen": 367328256, "step": 5858 }, { "epoch": 19.494176372712147, "loss": 0.17276164889335632, "loss_ce": 1.632603925827425e-06, "loss_iou": 0.072265625, "loss_num": 0.005584716796875, "loss_xval": 0.1728515625, "num_input_tokens_seen": 367328256, "step": 5858 }, { "epoch": 19.497504159733776, "grad_norm": 12.285213470458984, "learning_rate": 5e-06, "loss": 0.5249, "num_input_tokens_seen": 367390796, "step": 5859 }, { "epoch": 19.497504159733776, "loss": 0.6346442103385925, "loss_ce": 6.711900368827628e-07, "loss_iou": 0.265625, "loss_num": 0.0208740234375, "loss_xval": 0.6328125, "num_input_tokens_seen": 367390796, "step": 5859 }, { "epoch": 19.500831946755408, "grad_norm": 21.01544189453125, "learning_rate": 5e-06, "loss": 0.3142, "num_input_tokens_seen": 367453384, "step": 5860 }, { "epoch": 19.500831946755408, "loss": 0.3626912534236908, "loss_ce": 2.0358789697638713e-05, "loss_iou": 0.1376953125, "loss_num": 0.0174560546875, "loss_xval": 0.36328125, "num_input_tokens_seen": 367453384, "step": 5860 }, { "epoch": 19.504159733777037, "grad_norm": 13.016902923583984, "learning_rate": 5e-06, "loss": 0.1788, "num_input_tokens_seen": 367515096, "step": 5861 }, { "epoch": 19.504159733777037, "loss": 0.1817135512828827, "loss_ce": 1.1907430234714411e-05, "loss_iou": 0.0654296875, "loss_num": 0.0101318359375, "loss_xval": 0.181640625, "num_input_tokens_seen": 367515096, "step": 5861 }, { "epoch": 19.50748752079867, "grad_norm": 17.632205963134766, "learning_rate": 5e-06, "loss": 0.3863, "num_input_tokens_seen": 367576920, "step": 5862 }, { "epoch": 19.50748752079867, "loss": 0.32443487644195557, "loss_ce": 2.5148824533971492e-06, "loss_iou": 0.10693359375, "loss_num": 0.0220947265625, "loss_xval": 0.32421875, "num_input_tokens_seen": 367576920, "step": 5862 }, { "epoch": 19.510815307820298, "grad_norm": 25.25273323059082, "learning_rate": 5e-06, "loss": 0.2201, "num_input_tokens_seen": 367639316, "step": 5863 }, { "epoch": 19.510815307820298, "loss": 0.245370015501976, "loss_ce": 8.689402420714032e-06, "loss_iou": 0.099609375, "loss_num": 0.00933837890625, "loss_xval": 0.2451171875, "num_input_tokens_seen": 367639316, "step": 5863 }, { "epoch": 19.51414309484193, "grad_norm": 12.871767044067383, "learning_rate": 5e-06, "loss": 0.5572, "num_input_tokens_seen": 367702672, "step": 5864 }, { "epoch": 19.51414309484193, "loss": 0.4605337381362915, "loss_ce": 3.876067785313353e-05, "loss_iou": 0.1923828125, "loss_num": 0.01519775390625, "loss_xval": 0.4609375, "num_input_tokens_seen": 367702672, "step": 5864 }, { "epoch": 19.51747088186356, "grad_norm": 10.875319480895996, "learning_rate": 5e-06, "loss": 0.3446, "num_input_tokens_seen": 367766480, "step": 5865 }, { "epoch": 19.51747088186356, "loss": 0.3630073666572571, "loss_ce": 7.886501407483593e-07, "loss_iou": 0.1513671875, "loss_num": 0.0123291015625, "loss_xval": 0.36328125, "num_input_tokens_seen": 367766480, "step": 5865 }, { "epoch": 19.52079866888519, "grad_norm": 6.542229175567627, "learning_rate": 5e-06, "loss": 0.3421, "num_input_tokens_seen": 367826596, "step": 5866 }, { "epoch": 19.52079866888519, "loss": 0.3028593361377716, "loss_ce": 2.8954966637684265e-06, "loss_iou": 0.10009765625, "loss_num": 0.0205078125, "loss_xval": 0.302734375, "num_input_tokens_seen": 367826596, "step": 5866 }, { "epoch": 19.52412645590682, "grad_norm": 12.811712265014648, "learning_rate": 5e-06, "loss": 0.2732, "num_input_tokens_seen": 367889028, "step": 5867 }, { "epoch": 19.52412645590682, "loss": 0.3497050106525421, "loss_ce": 1.1702142728609033e-05, "loss_iou": 0.1552734375, "loss_num": 0.007537841796875, "loss_xval": 0.349609375, "num_input_tokens_seen": 367889028, "step": 5867 }, { "epoch": 19.527454242928453, "grad_norm": 12.36987590789795, "learning_rate": 5e-06, "loss": 0.4815, "num_input_tokens_seen": 367952844, "step": 5868 }, { "epoch": 19.527454242928453, "loss": 0.3032872676849365, "loss_ce": 3.5581015254138038e-06, "loss_iou": 0.08154296875, "loss_num": 0.028076171875, "loss_xval": 0.302734375, "num_input_tokens_seen": 367952844, "step": 5868 }, { "epoch": 19.530782029950082, "grad_norm": 22.222442626953125, "learning_rate": 5e-06, "loss": 0.3451, "num_input_tokens_seen": 368015632, "step": 5869 }, { "epoch": 19.530782029950082, "loss": 0.4083269238471985, "loss_ce": 1.7131364984379616e-06, "loss_iou": 0.1689453125, "loss_num": 0.0140380859375, "loss_xval": 0.408203125, "num_input_tokens_seen": 368015632, "step": 5869 }, { "epoch": 19.534109816971714, "grad_norm": 19.85041046142578, "learning_rate": 5e-06, "loss": 0.5269, "num_input_tokens_seen": 368077420, "step": 5870 }, { "epoch": 19.534109816971714, "loss": 0.330158531665802, "loss_ce": 1.9387105567147955e-05, "loss_iou": 0.1328125, "loss_num": 0.01318359375, "loss_xval": 0.330078125, "num_input_tokens_seen": 368077420, "step": 5870 }, { "epoch": 19.537437603993343, "grad_norm": 22.229217529296875, "learning_rate": 5e-06, "loss": 0.3548, "num_input_tokens_seen": 368139928, "step": 5871 }, { "epoch": 19.537437603993343, "loss": 0.3044484853744507, "loss_ce": 5.098898327560164e-06, "loss_iou": 0.12890625, "loss_num": 0.0093994140625, "loss_xval": 0.3046875, "num_input_tokens_seen": 368139928, "step": 5871 }, { "epoch": 19.540765391014975, "grad_norm": 22.703882217407227, "learning_rate": 5e-06, "loss": 0.2464, "num_input_tokens_seen": 368203092, "step": 5872 }, { "epoch": 19.540765391014975, "loss": 0.2776426374912262, "loss_ce": 2.4222956199082546e-05, "loss_iou": 0.1181640625, "loss_num": 0.0081787109375, "loss_xval": 0.27734375, "num_input_tokens_seen": 368203092, "step": 5872 }, { "epoch": 19.544093178036604, "grad_norm": 9.183845520019531, "learning_rate": 5e-06, "loss": 0.3767, "num_input_tokens_seen": 368265928, "step": 5873 }, { "epoch": 19.544093178036604, "loss": 0.3688361644744873, "loss_ce": 7.236087640194455e-07, "loss_iou": 0.134765625, "loss_num": 0.0198974609375, "loss_xval": 0.369140625, "num_input_tokens_seen": 368265928, "step": 5873 }, { "epoch": 19.547420965058237, "grad_norm": 9.214341163635254, "learning_rate": 5e-06, "loss": 0.384, "num_input_tokens_seen": 368329620, "step": 5874 }, { "epoch": 19.547420965058237, "loss": 0.43671101331710815, "loss_ce": 4.482586518861353e-06, "loss_iou": 0.171875, "loss_num": 0.0184326171875, "loss_xval": 0.4375, "num_input_tokens_seen": 368329620, "step": 5874 }, { "epoch": 19.550748752079866, "grad_norm": 11.524943351745605, "learning_rate": 5e-06, "loss": 0.4214, "num_input_tokens_seen": 368393080, "step": 5875 }, { "epoch": 19.550748752079866, "loss": 0.3929394483566284, "loss_ce": 2.7661021704261657e-06, "loss_iou": 0.138671875, "loss_num": 0.0230712890625, "loss_xval": 0.392578125, "num_input_tokens_seen": 368393080, "step": 5875 }, { "epoch": 19.554076539101498, "grad_norm": 36.75761413574219, "learning_rate": 5e-06, "loss": 0.5661, "num_input_tokens_seen": 368456216, "step": 5876 }, { "epoch": 19.554076539101498, "loss": 0.44666266441345215, "loss_ce": 7.410369107674342e-06, "loss_iou": 0.197265625, "loss_num": 0.01043701171875, "loss_xval": 0.447265625, "num_input_tokens_seen": 368456216, "step": 5876 }, { "epoch": 19.557404326123127, "grad_norm": 41.78353500366211, "learning_rate": 5e-06, "loss": 0.6801, "num_input_tokens_seen": 368520052, "step": 5877 }, { "epoch": 19.557404326123127, "loss": 0.7200350165367126, "loss_ce": 3.2945902148640016e-06, "loss_iou": 0.29296875, "loss_num": 0.0269775390625, "loss_xval": 0.71875, "num_input_tokens_seen": 368520052, "step": 5877 }, { "epoch": 19.56073211314476, "grad_norm": 13.753697395324707, "learning_rate": 5e-06, "loss": 0.2424, "num_input_tokens_seen": 368582108, "step": 5878 }, { "epoch": 19.56073211314476, "loss": 0.1632409393787384, "loss_ce": 2.4228143047366757e-06, "loss_iou": 0.048095703125, "loss_num": 0.01348876953125, "loss_xval": 0.1630859375, "num_input_tokens_seen": 368582108, "step": 5878 }, { "epoch": 19.564059900166388, "grad_norm": 6.4659833908081055, "learning_rate": 5e-06, "loss": 0.2889, "num_input_tokens_seen": 368644488, "step": 5879 }, { "epoch": 19.564059900166388, "loss": 0.25180643796920776, "loss_ce": 5.918171154917218e-06, "loss_iou": 0.08837890625, "loss_num": 0.0150146484375, "loss_xval": 0.251953125, "num_input_tokens_seen": 368644488, "step": 5879 }, { "epoch": 19.56738768718802, "grad_norm": 10.375506401062012, "learning_rate": 5e-06, "loss": 0.345, "num_input_tokens_seen": 368706520, "step": 5880 }, { "epoch": 19.56738768718802, "loss": 0.4817516505718231, "loss_ce": 1.1859348205689457e-06, "loss_iou": 0.1826171875, "loss_num": 0.0230712890625, "loss_xval": 0.482421875, "num_input_tokens_seen": 368706520, "step": 5880 }, { "epoch": 19.57071547420965, "grad_norm": 13.2022123336792, "learning_rate": 5e-06, "loss": 0.2747, "num_input_tokens_seen": 368769708, "step": 5881 }, { "epoch": 19.57071547420965, "loss": 0.28756803274154663, "loss_ce": 3.14185926981736e-05, "loss_iou": 0.123046875, "loss_num": 0.00836181640625, "loss_xval": 0.287109375, "num_input_tokens_seen": 368769708, "step": 5881 }, { "epoch": 19.57404326123128, "grad_norm": 9.16064167022705, "learning_rate": 5e-06, "loss": 0.5196, "num_input_tokens_seen": 368833632, "step": 5882 }, { "epoch": 19.57404326123128, "loss": 0.592692494392395, "loss_ce": 4.108422945137136e-05, "loss_iou": 0.2431640625, "loss_num": 0.021240234375, "loss_xval": 0.59375, "num_input_tokens_seen": 368833632, "step": 5882 }, { "epoch": 19.57737104825291, "grad_norm": 10.19306755065918, "learning_rate": 5e-06, "loss": 0.3372, "num_input_tokens_seen": 368897140, "step": 5883 }, { "epoch": 19.57737104825291, "loss": 0.4017440378665924, "loss_ce": 1.0640884283930063e-05, "loss_iou": 0.1669921875, "loss_num": 0.01348876953125, "loss_xval": 0.40234375, "num_input_tokens_seen": 368897140, "step": 5883 }, { "epoch": 19.580698835274543, "grad_norm": 8.137589454650879, "learning_rate": 5e-06, "loss": 0.4205, "num_input_tokens_seen": 368959080, "step": 5884 }, { "epoch": 19.580698835274543, "loss": 0.46522748470306396, "loss_ce": 2.259632765344577e-06, "loss_iou": 0.18359375, "loss_num": 0.0194091796875, "loss_xval": 0.46484375, "num_input_tokens_seen": 368959080, "step": 5884 }, { "epoch": 19.58402662229617, "grad_norm": 12.463781356811523, "learning_rate": 5e-06, "loss": 0.4387, "num_input_tokens_seen": 369021104, "step": 5885 }, { "epoch": 19.58402662229617, "loss": 0.5128183960914612, "loss_ce": 9.923420520863147e-07, "loss_iou": 0.181640625, "loss_num": 0.030029296875, "loss_xval": 0.51171875, "num_input_tokens_seen": 369021104, "step": 5885 }, { "epoch": 19.587354409317804, "grad_norm": 17.206119537353516, "learning_rate": 5e-06, "loss": 0.4135, "num_input_tokens_seen": 369084560, "step": 5886 }, { "epoch": 19.587354409317804, "loss": 0.5510272979736328, "loss_ce": 1.900687948364066e-06, "loss_iou": 0.2373046875, "loss_num": 0.01519775390625, "loss_xval": 0.55078125, "num_input_tokens_seen": 369084560, "step": 5886 }, { "epoch": 19.590682196339433, "grad_norm": 5.980077743530273, "learning_rate": 5e-06, "loss": 0.2719, "num_input_tokens_seen": 369145836, "step": 5887 }, { "epoch": 19.590682196339433, "loss": 0.11426045000553131, "loss_ce": 2.6369700663053663e-06, "loss_iou": 0.040283203125, "loss_num": 0.0067138671875, "loss_xval": 0.1142578125, "num_input_tokens_seen": 369145836, "step": 5887 }, { "epoch": 19.594009983361065, "grad_norm": 12.059760093688965, "learning_rate": 5e-06, "loss": 0.3318, "num_input_tokens_seen": 369207648, "step": 5888 }, { "epoch": 19.594009983361065, "loss": 0.21116741001605988, "loss_ce": 1.0251752655676682e-06, "loss_iou": 0.06591796875, "loss_num": 0.015869140625, "loss_xval": 0.2109375, "num_input_tokens_seen": 369207648, "step": 5888 }, { "epoch": 19.597337770382694, "grad_norm": 4.961147308349609, "learning_rate": 5e-06, "loss": 0.3467, "num_input_tokens_seen": 369268776, "step": 5889 }, { "epoch": 19.597337770382694, "loss": 0.461248517036438, "loss_ce": 5.833483555761632e-06, "loss_iou": 0.181640625, "loss_num": 0.0196533203125, "loss_xval": 0.4609375, "num_input_tokens_seen": 369268776, "step": 5889 }, { "epoch": 19.600665557404326, "grad_norm": 16.997116088867188, "learning_rate": 5e-06, "loss": 0.3325, "num_input_tokens_seen": 369328136, "step": 5890 }, { "epoch": 19.600665557404326, "loss": 0.2960551977157593, "loss_ce": 4.15843669543392e-06, "loss_iou": 0.126953125, "loss_num": 0.00830078125, "loss_xval": 0.296875, "num_input_tokens_seen": 369328136, "step": 5890 }, { "epoch": 19.603993344425955, "grad_norm": 9.889593124389648, "learning_rate": 5e-06, "loss": 0.4044, "num_input_tokens_seen": 369390104, "step": 5891 }, { "epoch": 19.603993344425955, "loss": 0.19595429301261902, "loss_ce": 9.407038987774285e-07, "loss_iou": 0.0732421875, "loss_num": 0.0098876953125, "loss_xval": 0.1962890625, "num_input_tokens_seen": 369390104, "step": 5891 }, { "epoch": 19.607321131447588, "grad_norm": 4.980090141296387, "learning_rate": 5e-06, "loss": 0.4388, "num_input_tokens_seen": 369451088, "step": 5892 }, { "epoch": 19.607321131447588, "loss": 0.3137844204902649, "loss_ce": 2.706497070903424e-06, "loss_iou": 0.10205078125, "loss_num": 0.02197265625, "loss_xval": 0.314453125, "num_input_tokens_seen": 369451088, "step": 5892 }, { "epoch": 19.610648918469217, "grad_norm": 15.000380516052246, "learning_rate": 5e-06, "loss": 0.371, "num_input_tokens_seen": 369514228, "step": 5893 }, { "epoch": 19.610648918469217, "loss": 0.23828741908073425, "loss_ce": 6.167806532175746e-06, "loss_iou": 0.09033203125, "loss_num": 0.01165771484375, "loss_xval": 0.23828125, "num_input_tokens_seen": 369514228, "step": 5893 }, { "epoch": 19.61397670549085, "grad_norm": 16.593271255493164, "learning_rate": 5e-06, "loss": 0.2716, "num_input_tokens_seen": 369576052, "step": 5894 }, { "epoch": 19.61397670549085, "loss": 0.18985190987586975, "loss_ce": 2.0488373593252618e-06, "loss_iou": 0.06396484375, "loss_num": 0.01239013671875, "loss_xval": 0.189453125, "num_input_tokens_seen": 369576052, "step": 5894 }, { "epoch": 19.617304492512478, "grad_norm": 10.398351669311523, "learning_rate": 5e-06, "loss": 0.3098, "num_input_tokens_seen": 369638412, "step": 5895 }, { "epoch": 19.617304492512478, "loss": 0.35066598653793335, "loss_ce": 3.7266433992044767e-06, "loss_iou": 0.11767578125, "loss_num": 0.023193359375, "loss_xval": 0.3515625, "num_input_tokens_seen": 369638412, "step": 5895 }, { "epoch": 19.62063227953411, "grad_norm": 11.109054565429688, "learning_rate": 5e-06, "loss": 0.291, "num_input_tokens_seen": 369699756, "step": 5896 }, { "epoch": 19.62063227953411, "loss": 0.31842371821403503, "loss_ce": 3.2895704862312414e-06, "loss_iou": 0.1328125, "loss_num": 0.0103759765625, "loss_xval": 0.318359375, "num_input_tokens_seen": 369699756, "step": 5896 }, { "epoch": 19.62396006655574, "grad_norm": 9.202303886413574, "learning_rate": 5e-06, "loss": 0.2748, "num_input_tokens_seen": 369761336, "step": 5897 }, { "epoch": 19.62396006655574, "loss": 0.25128433108329773, "loss_ce": 2.59019498116686e-06, "loss_iou": 0.0966796875, "loss_num": 0.0115966796875, "loss_xval": 0.251953125, "num_input_tokens_seen": 369761336, "step": 5897 }, { "epoch": 19.62728785357737, "grad_norm": 9.155508995056152, "learning_rate": 5e-06, "loss": 0.3411, "num_input_tokens_seen": 369824932, "step": 5898 }, { "epoch": 19.62728785357737, "loss": 0.33496350049972534, "loss_ce": 2.5782064767554402e-06, "loss_iou": 0.140625, "loss_num": 0.0106201171875, "loss_xval": 0.3359375, "num_input_tokens_seen": 369824932, "step": 5898 }, { "epoch": 19.630615640599, "grad_norm": 10.645379066467285, "learning_rate": 5e-06, "loss": 0.2961, "num_input_tokens_seen": 369886260, "step": 5899 }, { "epoch": 19.630615640599, "loss": 0.30078214406967163, "loss_ce": 8.889469427231234e-07, "loss_iou": 0.1259765625, "loss_num": 0.009765625, "loss_xval": 0.30078125, "num_input_tokens_seen": 369886260, "step": 5899 }, { "epoch": 19.633943427620633, "grad_norm": 7.557188987731934, "learning_rate": 5e-06, "loss": 0.4039, "num_input_tokens_seen": 369949732, "step": 5900 }, { "epoch": 19.633943427620633, "loss": 0.44873958826065063, "loss_ce": 9.116889486904256e-06, "loss_iou": 0.1884765625, "loss_num": 0.0142822265625, "loss_xval": 0.44921875, "num_input_tokens_seen": 369949732, "step": 5900 }, { "epoch": 19.63727121464226, "grad_norm": 8.90225887298584, "learning_rate": 5e-06, "loss": 0.6325, "num_input_tokens_seen": 370013064, "step": 5901 }, { "epoch": 19.63727121464226, "loss": 0.6089731454849243, "loss_ce": 8.639832230983302e-05, "loss_iou": 0.23828125, "loss_num": 0.0264892578125, "loss_xval": 0.609375, "num_input_tokens_seen": 370013064, "step": 5901 }, { "epoch": 19.640599001663894, "grad_norm": 9.139486312866211, "learning_rate": 5e-06, "loss": 0.2549, "num_input_tokens_seen": 370074428, "step": 5902 }, { "epoch": 19.640599001663894, "loss": 0.1855103224515915, "loss_ce": 1.6000014966266463e-06, "loss_iou": 0.06689453125, "loss_num": 0.01043701171875, "loss_xval": 0.185546875, "num_input_tokens_seen": 370074428, "step": 5902 }, { "epoch": 19.643926788685523, "grad_norm": 14.735671997070312, "learning_rate": 5e-06, "loss": 0.3377, "num_input_tokens_seen": 370136008, "step": 5903 }, { "epoch": 19.643926788685523, "loss": 0.21945534646511078, "loss_ce": 3.4458446407370502e-06, "loss_iou": 0.09033203125, "loss_num": 0.00775146484375, "loss_xval": 0.2197265625, "num_input_tokens_seen": 370136008, "step": 5903 }, { "epoch": 19.647254575707155, "grad_norm": 8.187874794006348, "learning_rate": 5e-06, "loss": 0.3488, "num_input_tokens_seen": 370196776, "step": 5904 }, { "epoch": 19.647254575707155, "loss": 0.3496996760368347, "loss_ce": 1.4027702491148375e-05, "loss_iou": 0.1328125, "loss_num": 0.016845703125, "loss_xval": 0.349609375, "num_input_tokens_seen": 370196776, "step": 5904 }, { "epoch": 19.650582362728784, "grad_norm": 11.210838317871094, "learning_rate": 5e-06, "loss": 0.2452, "num_input_tokens_seen": 370257316, "step": 5905 }, { "epoch": 19.650582362728784, "loss": 0.18011580407619476, "loss_ce": 1.0598218977975193e-06, "loss_iou": 0.038818359375, "loss_num": 0.0205078125, "loss_xval": 0.1796875, "num_input_tokens_seen": 370257316, "step": 5905 }, { "epoch": 19.653910149750416, "grad_norm": 12.827980041503906, "learning_rate": 5e-06, "loss": 0.3222, "num_input_tokens_seen": 370319476, "step": 5906 }, { "epoch": 19.653910149750416, "loss": 0.38264840841293335, "loss_ce": 1.90073624253273e-05, "loss_iou": 0.1572265625, "loss_num": 0.01348876953125, "loss_xval": 0.3828125, "num_input_tokens_seen": 370319476, "step": 5906 }, { "epoch": 19.657237936772045, "grad_norm": 23.43220329284668, "learning_rate": 5e-06, "loss": 0.4535, "num_input_tokens_seen": 370383152, "step": 5907 }, { "epoch": 19.657237936772045, "loss": 0.4935029149055481, "loss_ce": 3.180636895194766e-06, "loss_iou": 0.205078125, "loss_num": 0.0166015625, "loss_xval": 0.494140625, "num_input_tokens_seen": 370383152, "step": 5907 }, { "epoch": 19.660565723793678, "grad_norm": 29.96070098876953, "learning_rate": 5e-06, "loss": 0.4097, "num_input_tokens_seen": 370446144, "step": 5908 }, { "epoch": 19.660565723793678, "loss": 0.39920929074287415, "loss_ce": 3.9345479308394715e-05, "loss_iou": 0.1572265625, "loss_num": 0.016845703125, "loss_xval": 0.3984375, "num_input_tokens_seen": 370446144, "step": 5908 }, { "epoch": 19.663893510815306, "grad_norm": 23.908674240112305, "learning_rate": 5e-06, "loss": 0.2631, "num_input_tokens_seen": 370509360, "step": 5909 }, { "epoch": 19.663893510815306, "loss": 0.1918981820344925, "loss_ce": 3.6488481782726012e-06, "loss_iou": 0.08251953125, "loss_num": 0.00543212890625, "loss_xval": 0.19140625, "num_input_tokens_seen": 370509360, "step": 5909 }, { "epoch": 19.66722129783694, "grad_norm": 19.288692474365234, "learning_rate": 5e-06, "loss": 0.5346, "num_input_tokens_seen": 370573180, "step": 5910 }, { "epoch": 19.66722129783694, "loss": 0.4524044990539551, "loss_ce": 1.1917869414901361e-05, "loss_iou": 0.19140625, "loss_num": 0.01361083984375, "loss_xval": 0.453125, "num_input_tokens_seen": 370573180, "step": 5910 }, { "epoch": 19.670549084858568, "grad_norm": 20.829362869262695, "learning_rate": 5e-06, "loss": 0.3465, "num_input_tokens_seen": 370633904, "step": 5911 }, { "epoch": 19.670549084858568, "loss": 0.24633842706680298, "loss_ce": 5.540117626878782e-07, "loss_iou": 0.09619140625, "loss_num": 0.0107421875, "loss_xval": 0.24609375, "num_input_tokens_seen": 370633904, "step": 5911 }, { "epoch": 19.6738768718802, "grad_norm": 18.53961753845215, "learning_rate": 5e-06, "loss": 0.4724, "num_input_tokens_seen": 370697904, "step": 5912 }, { "epoch": 19.6738768718802, "loss": 0.5493853092193604, "loss_ce": 7.817367986717727e-06, "loss_iou": 0.2421875, "loss_num": 0.01275634765625, "loss_xval": 0.55078125, "num_input_tokens_seen": 370697904, "step": 5912 }, { "epoch": 19.67720465890183, "grad_norm": 16.545536041259766, "learning_rate": 5e-06, "loss": 0.3002, "num_input_tokens_seen": 370760092, "step": 5913 }, { "epoch": 19.67720465890183, "loss": 0.19696442782878876, "loss_ce": 3.981086592830252e-06, "loss_iou": 0.06591796875, "loss_num": 0.0130615234375, "loss_xval": 0.197265625, "num_input_tokens_seen": 370760092, "step": 5913 }, { "epoch": 19.68053244592346, "grad_norm": 14.003129005432129, "learning_rate": 5e-06, "loss": 0.3458, "num_input_tokens_seen": 370823956, "step": 5914 }, { "epoch": 19.68053244592346, "loss": 0.34246939420700073, "loss_ce": 1.1317659982523764e-06, "loss_iou": 0.12255859375, "loss_num": 0.01953125, "loss_xval": 0.341796875, "num_input_tokens_seen": 370823956, "step": 5914 }, { "epoch": 19.68386023294509, "grad_norm": 25.23017120361328, "learning_rate": 5e-06, "loss": 0.5747, "num_input_tokens_seen": 370887896, "step": 5915 }, { "epoch": 19.68386023294509, "loss": 0.6999775171279907, "loss_ce": 2.633345138747245e-05, "loss_iou": 0.29296875, "loss_num": 0.02294921875, "loss_xval": 0.69921875, "num_input_tokens_seen": 370887896, "step": 5915 }, { "epoch": 19.687188019966722, "grad_norm": 22.869110107421875, "learning_rate": 5e-06, "loss": 0.4789, "num_input_tokens_seen": 370950912, "step": 5916 }, { "epoch": 19.687188019966722, "loss": 0.39465558528900146, "loss_ce": 2.246822987217456e-06, "loss_iou": 0.14453125, "loss_num": 0.02099609375, "loss_xval": 0.39453125, "num_input_tokens_seen": 370950912, "step": 5916 }, { "epoch": 19.69051580698835, "grad_norm": 31.51764678955078, "learning_rate": 5e-06, "loss": 0.4242, "num_input_tokens_seen": 371014112, "step": 5917 }, { "epoch": 19.69051580698835, "loss": 0.23566146194934845, "loss_ce": 4.721330697066151e-06, "loss_iou": 0.06396484375, "loss_num": 0.0216064453125, "loss_xval": 0.2353515625, "num_input_tokens_seen": 371014112, "step": 5917 }, { "epoch": 19.693843594009984, "grad_norm": 27.77056884765625, "learning_rate": 5e-06, "loss": 0.5642, "num_input_tokens_seen": 371077280, "step": 5918 }, { "epoch": 19.693843594009984, "loss": 0.46821898221969604, "loss_ce": 3.032220092791249e-06, "loss_iou": 0.19921875, "loss_num": 0.01416015625, "loss_xval": 0.46875, "num_input_tokens_seen": 371077280, "step": 5918 }, { "epoch": 19.697171381031612, "grad_norm": 12.897278785705566, "learning_rate": 5e-06, "loss": 0.4098, "num_input_tokens_seen": 371140324, "step": 5919 }, { "epoch": 19.697171381031612, "loss": 0.38376474380493164, "loss_ce": 2.1494068278116174e-05, "loss_iou": 0.1455078125, "loss_num": 0.0184326171875, "loss_xval": 0.3828125, "num_input_tokens_seen": 371140324, "step": 5919 }, { "epoch": 19.700499168053245, "grad_norm": 9.10488510131836, "learning_rate": 5e-06, "loss": 0.3156, "num_input_tokens_seen": 371202480, "step": 5920 }, { "epoch": 19.700499168053245, "loss": 0.3419235050678253, "loss_ce": 4.566487405099906e-06, "loss_iou": 0.11328125, "loss_num": 0.023193359375, "loss_xval": 0.341796875, "num_input_tokens_seen": 371202480, "step": 5920 }, { "epoch": 19.703826955074874, "grad_norm": 11.955238342285156, "learning_rate": 5e-06, "loss": 0.4895, "num_input_tokens_seen": 371266008, "step": 5921 }, { "epoch": 19.703826955074874, "loss": 0.29211732745170593, "loss_ce": 3.067489615204977e-06, "loss_iou": 0.10400390625, "loss_num": 0.016845703125, "loss_xval": 0.29296875, "num_input_tokens_seen": 371266008, "step": 5921 }, { "epoch": 19.707154742096506, "grad_norm": 18.220699310302734, "learning_rate": 5e-06, "loss": 0.4185, "num_input_tokens_seen": 371327956, "step": 5922 }, { "epoch": 19.707154742096506, "loss": 0.40213125944137573, "loss_ce": 1.1375857411621837e-06, "loss_iou": 0.1572265625, "loss_num": 0.0174560546875, "loss_xval": 0.40234375, "num_input_tokens_seen": 371327956, "step": 5922 }, { "epoch": 19.710482529118135, "grad_norm": 20.46641731262207, "learning_rate": 5e-06, "loss": 0.3357, "num_input_tokens_seen": 371392492, "step": 5923 }, { "epoch": 19.710482529118135, "loss": 0.3248467445373535, "loss_ce": 1.7646023479755968e-05, "loss_iou": 0.1416015625, "loss_num": 0.00823974609375, "loss_xval": 0.32421875, "num_input_tokens_seen": 371392492, "step": 5923 }, { "epoch": 19.713810316139767, "grad_norm": 22.49366569519043, "learning_rate": 5e-06, "loss": 0.3314, "num_input_tokens_seen": 371454992, "step": 5924 }, { "epoch": 19.713810316139767, "loss": 0.2566157877445221, "loss_ce": 8.502608397975564e-05, "loss_iou": 0.0751953125, "loss_num": 0.021240234375, "loss_xval": 0.255859375, "num_input_tokens_seen": 371454992, "step": 5924 }, { "epoch": 19.717138103161396, "grad_norm": 26.722013473510742, "learning_rate": 5e-06, "loss": 0.372, "num_input_tokens_seen": 371517720, "step": 5925 }, { "epoch": 19.717138103161396, "loss": 0.23073247075080872, "loss_ce": 4.332392563810572e-06, "loss_iou": 0.08935546875, "loss_num": 0.0103759765625, "loss_xval": 0.23046875, "num_input_tokens_seen": 371517720, "step": 5925 }, { "epoch": 19.72046589018303, "grad_norm": 11.328043937683105, "learning_rate": 5e-06, "loss": 0.1799, "num_input_tokens_seen": 371578216, "step": 5926 }, { "epoch": 19.72046589018303, "loss": 0.19108664989471436, "loss_ce": 8.446861556876684e-07, "loss_iou": 0.052490234375, "loss_num": 0.0172119140625, "loss_xval": 0.19140625, "num_input_tokens_seen": 371578216, "step": 5926 }, { "epoch": 19.723793677204657, "grad_norm": 7.239537239074707, "learning_rate": 5e-06, "loss": 0.3715, "num_input_tokens_seen": 371641812, "step": 5927 }, { "epoch": 19.723793677204657, "loss": 0.3882461488246918, "loss_ce": 1.5402808912767796e-06, "loss_iou": 0.138671875, "loss_num": 0.0220947265625, "loss_xval": 0.388671875, "num_input_tokens_seen": 371641812, "step": 5927 }, { "epoch": 19.72712146422629, "grad_norm": 25.44999885559082, "learning_rate": 5e-06, "loss": 0.4902, "num_input_tokens_seen": 371703044, "step": 5928 }, { "epoch": 19.72712146422629, "loss": 0.535707414150238, "loss_ce": 1.8510525023884838e-06, "loss_iou": 0.1943359375, "loss_num": 0.0294189453125, "loss_xval": 0.53515625, "num_input_tokens_seen": 371703044, "step": 5928 }, { "epoch": 19.73044925124792, "grad_norm": 25.158164978027344, "learning_rate": 5e-06, "loss": 0.6151, "num_input_tokens_seen": 371766024, "step": 5929 }, { "epoch": 19.73044925124792, "loss": 0.4189460277557373, "loss_ce": 7.045072152322973e-07, "loss_iou": 0.193359375, "loss_num": 0.00634765625, "loss_xval": 0.41796875, "num_input_tokens_seen": 371766024, "step": 5929 }, { "epoch": 19.73377703826955, "grad_norm": 18.8247013092041, "learning_rate": 5e-06, "loss": 0.269, "num_input_tokens_seen": 371827244, "step": 5930 }, { "epoch": 19.73377703826955, "loss": 0.2759079337120056, "loss_ce": 2.900517029047478e-05, "loss_iou": 0.0966796875, "loss_num": 0.0164794921875, "loss_xval": 0.275390625, "num_input_tokens_seen": 371827244, "step": 5930 }, { "epoch": 19.73710482529118, "grad_norm": 16.635046005249023, "learning_rate": 5e-06, "loss": 0.5811, "num_input_tokens_seen": 371889416, "step": 5931 }, { "epoch": 19.73710482529118, "loss": 0.5860613584518433, "loss_ce": 1.7457325611758279e-06, "loss_iou": 0.2021484375, "loss_num": 0.036376953125, "loss_xval": 0.5859375, "num_input_tokens_seen": 371889416, "step": 5931 }, { "epoch": 19.740432612312812, "grad_norm": 6.465653419494629, "learning_rate": 5e-06, "loss": 0.3192, "num_input_tokens_seen": 371953432, "step": 5932 }, { "epoch": 19.740432612312812, "loss": 0.26098713278770447, "loss_ce": 8.124804935505381e-07, "loss_iou": 0.10498046875, "loss_num": 0.01031494140625, "loss_xval": 0.26171875, "num_input_tokens_seen": 371953432, "step": 5932 }, { "epoch": 19.74376039933444, "grad_norm": 22.068090438842773, "learning_rate": 5e-06, "loss": 0.5213, "num_input_tokens_seen": 372016540, "step": 5933 }, { "epoch": 19.74376039933444, "loss": 0.5245996713638306, "loss_ce": 0.000490751990582794, "loss_iou": 0.19921875, "loss_num": 0.0252685546875, "loss_xval": 0.5234375, "num_input_tokens_seen": 372016540, "step": 5933 }, { "epoch": 19.747088186356073, "grad_norm": 25.27790641784668, "learning_rate": 5e-06, "loss": 0.2545, "num_input_tokens_seen": 372078048, "step": 5934 }, { "epoch": 19.747088186356073, "loss": 0.18672794103622437, "loss_ce": 6.139459401310887e-06, "loss_iou": 0.041748046875, "loss_num": 0.0206298828125, "loss_xval": 0.1865234375, "num_input_tokens_seen": 372078048, "step": 5934 }, { "epoch": 19.750415973377702, "grad_norm": 16.977632522583008, "learning_rate": 5e-06, "loss": 0.2914, "num_input_tokens_seen": 372140844, "step": 5935 }, { "epoch": 19.750415973377702, "loss": 0.2379767894744873, "loss_ce": 7.236853889480699e-07, "loss_iou": 0.10400390625, "loss_num": 0.006011962890625, "loss_xval": 0.23828125, "num_input_tokens_seen": 372140844, "step": 5935 }, { "epoch": 19.753743760399335, "grad_norm": 22.86380958557129, "learning_rate": 5e-06, "loss": 0.4081, "num_input_tokens_seen": 372202388, "step": 5936 }, { "epoch": 19.753743760399335, "loss": 0.4008225202560425, "loss_ce": 4.677450306189712e-06, "loss_iou": 0.1669921875, "loss_num": 0.01336669921875, "loss_xval": 0.400390625, "num_input_tokens_seen": 372202388, "step": 5936 }, { "epoch": 19.757071547420963, "grad_norm": 18.082735061645508, "learning_rate": 5e-06, "loss": 0.3726, "num_input_tokens_seen": 372266196, "step": 5937 }, { "epoch": 19.757071547420963, "loss": 0.21148937940597534, "loss_ce": 2.5547085442667594e-06, "loss_iou": 0.08837890625, "loss_num": 0.006988525390625, "loss_xval": 0.2119140625, "num_input_tokens_seen": 372266196, "step": 5937 }, { "epoch": 19.760399334442596, "grad_norm": 14.383731842041016, "learning_rate": 5e-06, "loss": 0.3834, "num_input_tokens_seen": 372328668, "step": 5938 }, { "epoch": 19.760399334442596, "loss": 0.4867069721221924, "loss_ce": 1.2668248018599115e-05, "loss_iou": 0.1845703125, "loss_num": 0.0234375, "loss_xval": 0.486328125, "num_input_tokens_seen": 372328668, "step": 5938 }, { "epoch": 19.763727121464225, "grad_norm": 22.49877166748047, "learning_rate": 5e-06, "loss": 0.3897, "num_input_tokens_seen": 372391160, "step": 5939 }, { "epoch": 19.763727121464225, "loss": 0.35784202814102173, "loss_ce": 2.3413063900079578e-05, "loss_iou": 0.14453125, "loss_num": 0.01373291015625, "loss_xval": 0.357421875, "num_input_tokens_seen": 372391160, "step": 5939 }, { "epoch": 19.767054908485857, "grad_norm": 36.88857650756836, "learning_rate": 5e-06, "loss": 0.5075, "num_input_tokens_seen": 372454640, "step": 5940 }, { "epoch": 19.767054908485857, "loss": 0.42846935987472534, "loss_ce": 2.569056732681929e-06, "loss_iou": 0.171875, "loss_num": 0.0169677734375, "loss_xval": 0.427734375, "num_input_tokens_seen": 372454640, "step": 5940 }, { "epoch": 19.770382695507486, "grad_norm": 35.45454406738281, "learning_rate": 5e-06, "loss": 0.4217, "num_input_tokens_seen": 372516216, "step": 5941 }, { "epoch": 19.770382695507486, "loss": 0.4205174446105957, "loss_ce": 4.76278898986493e-07, "loss_iou": 0.17578125, "loss_num": 0.01361083984375, "loss_xval": 0.419921875, "num_input_tokens_seen": 372516216, "step": 5941 }, { "epoch": 19.77371048252912, "grad_norm": 24.17701530456543, "learning_rate": 5e-06, "loss": 0.4306, "num_input_tokens_seen": 372578996, "step": 5942 }, { "epoch": 19.77371048252912, "loss": 0.48974907398223877, "loss_ce": 2.99182511298568e-06, "loss_iou": 0.1875, "loss_num": 0.0228271484375, "loss_xval": 0.490234375, "num_input_tokens_seen": 372578996, "step": 5942 }, { "epoch": 19.777038269550747, "grad_norm": 20.863853454589844, "learning_rate": 5e-06, "loss": 0.2707, "num_input_tokens_seen": 372642288, "step": 5943 }, { "epoch": 19.777038269550747, "loss": 0.19117888808250427, "loss_ce": 4.7300422011176124e-05, "loss_iou": 0.072265625, "loss_num": 0.00921630859375, "loss_xval": 0.19140625, "num_input_tokens_seen": 372642288, "step": 5943 }, { "epoch": 19.78036605657238, "grad_norm": 19.235515594482422, "learning_rate": 5e-06, "loss": 0.4549, "num_input_tokens_seen": 372705304, "step": 5944 }, { "epoch": 19.78036605657238, "loss": 0.38910040259361267, "loss_ce": 1.3030014542891877e-06, "loss_iou": 0.150390625, "loss_num": 0.017822265625, "loss_xval": 0.388671875, "num_input_tokens_seen": 372705304, "step": 5944 }, { "epoch": 19.78369384359401, "grad_norm": 6.131237983703613, "learning_rate": 5e-06, "loss": 0.3258, "num_input_tokens_seen": 372767600, "step": 5945 }, { "epoch": 19.78369384359401, "loss": 0.3748824894428253, "loss_ce": 4.561298283078941e-06, "loss_iou": 0.1494140625, "loss_num": 0.0155029296875, "loss_xval": 0.375, "num_input_tokens_seen": 372767600, "step": 5945 }, { "epoch": 19.78702163061564, "grad_norm": 11.316439628601074, "learning_rate": 5e-06, "loss": 0.4846, "num_input_tokens_seen": 372829692, "step": 5946 }, { "epoch": 19.78702163061564, "loss": 0.5333887934684753, "loss_ce": 2.5903270852722926e-06, "loss_iou": 0.201171875, "loss_num": 0.0263671875, "loss_xval": 0.53515625, "num_input_tokens_seen": 372829692, "step": 5946 }, { "epoch": 19.79034941763727, "grad_norm": 21.746673583984375, "learning_rate": 5e-06, "loss": 0.4334, "num_input_tokens_seen": 372891244, "step": 5947 }, { "epoch": 19.79034941763727, "loss": 0.6186529994010925, "loss_ce": 6.478428531409008e-07, "loss_iou": 0.234375, "loss_num": 0.030029296875, "loss_xval": 0.6171875, "num_input_tokens_seen": 372891244, "step": 5947 }, { "epoch": 19.793677204658902, "grad_norm": 24.490015029907227, "learning_rate": 5e-06, "loss": 0.4078, "num_input_tokens_seen": 372953900, "step": 5948 }, { "epoch": 19.793677204658902, "loss": 0.269096702337265, "loss_ce": 7.955777618917637e-06, "loss_iou": 0.08984375, "loss_num": 0.0179443359375, "loss_xval": 0.26953125, "num_input_tokens_seen": 372953900, "step": 5948 }, { "epoch": 19.79700499168053, "grad_norm": 15.142626762390137, "learning_rate": 5e-06, "loss": 0.4697, "num_input_tokens_seen": 373017140, "step": 5949 }, { "epoch": 19.79700499168053, "loss": 0.5433671474456787, "loss_ce": 1.675693056313321e-06, "loss_iou": 0.203125, "loss_num": 0.0274658203125, "loss_xval": 0.54296875, "num_input_tokens_seen": 373017140, "step": 5949 }, { "epoch": 19.800332778702163, "grad_norm": 7.135056018829346, "learning_rate": 5e-06, "loss": 0.314, "num_input_tokens_seen": 373080192, "step": 5950 }, { "epoch": 19.800332778702163, "loss": 0.27508652210235596, "loss_ce": 1.0843209565791767e-06, "loss_iou": 0.12255859375, "loss_num": 0.005950927734375, "loss_xval": 0.275390625, "num_input_tokens_seen": 373080192, "step": 5950 }, { "epoch": 19.803660565723792, "grad_norm": 15.171390533447266, "learning_rate": 5e-06, "loss": 0.5317, "num_input_tokens_seen": 373143980, "step": 5951 }, { "epoch": 19.803660565723792, "loss": 0.31012439727783203, "loss_ce": 4.769917268276913e-06, "loss_iou": 0.1328125, "loss_num": 0.0087890625, "loss_xval": 0.310546875, "num_input_tokens_seen": 373143980, "step": 5951 }, { "epoch": 19.806988352745424, "grad_norm": 29.55864906311035, "learning_rate": 5e-06, "loss": 0.4082, "num_input_tokens_seen": 373207272, "step": 5952 }, { "epoch": 19.806988352745424, "loss": 0.5051645040512085, "loss_ce": 3.755873694899492e-05, "loss_iou": 0.2333984375, "loss_num": 0.00787353515625, "loss_xval": 0.50390625, "num_input_tokens_seen": 373207272, "step": 5952 }, { "epoch": 19.810316139767053, "grad_norm": 25.09952163696289, "learning_rate": 5e-06, "loss": 0.3482, "num_input_tokens_seen": 373269668, "step": 5953 }, { "epoch": 19.810316139767053, "loss": 0.30331799387931824, "loss_ce": 3.791139761233353e-06, "loss_iou": 0.12255859375, "loss_num": 0.0115966796875, "loss_xval": 0.302734375, "num_input_tokens_seen": 373269668, "step": 5953 }, { "epoch": 19.813643926788686, "grad_norm": 22.357248306274414, "learning_rate": 5e-06, "loss": 0.4661, "num_input_tokens_seen": 373333168, "step": 5954 }, { "epoch": 19.813643926788686, "loss": 0.5185406804084778, "loss_ce": 1.2629104730876861e-06, "loss_iou": 0.220703125, "loss_num": 0.0152587890625, "loss_xval": 0.51953125, "num_input_tokens_seen": 373333168, "step": 5954 }, { "epoch": 19.816971713810315, "grad_norm": 11.984607696533203, "learning_rate": 5e-06, "loss": 0.2984, "num_input_tokens_seen": 373395084, "step": 5955 }, { "epoch": 19.816971713810315, "loss": 0.29993313550949097, "loss_ce": 6.385560027410975e-06, "loss_iou": 0.10009765625, "loss_num": 0.0198974609375, "loss_xval": 0.30078125, "num_input_tokens_seen": 373395084, "step": 5955 }, { "epoch": 19.820299500831947, "grad_norm": 7.05075216293335, "learning_rate": 5e-06, "loss": 0.3482, "num_input_tokens_seen": 373458896, "step": 5956 }, { "epoch": 19.820299500831947, "loss": 0.20480895042419434, "loss_ce": 5.482179403770715e-06, "loss_iou": 0.080078125, "loss_num": 0.00885009765625, "loss_xval": 0.205078125, "num_input_tokens_seen": 373458896, "step": 5956 }, { "epoch": 19.823627287853576, "grad_norm": 8.197088241577148, "learning_rate": 5e-06, "loss": 0.379, "num_input_tokens_seen": 373521992, "step": 5957 }, { "epoch": 19.823627287853576, "loss": 0.2347419708967209, "loss_ce": 7.751640396236326e-07, "loss_iou": 0.09521484375, "loss_num": 0.0089111328125, "loss_xval": 0.234375, "num_input_tokens_seen": 373521992, "step": 5957 }, { "epoch": 19.826955074875208, "grad_norm": 8.289430618286133, "learning_rate": 5e-06, "loss": 0.2652, "num_input_tokens_seen": 373584632, "step": 5958 }, { "epoch": 19.826955074875208, "loss": 0.37783241271972656, "loss_ce": 2.4774240955593996e-05, "loss_iou": 0.1103515625, "loss_num": 0.031494140625, "loss_xval": 0.376953125, "num_input_tokens_seen": 373584632, "step": 5958 }, { "epoch": 19.830282861896837, "grad_norm": 7.406334400177002, "learning_rate": 5e-06, "loss": 0.2689, "num_input_tokens_seen": 373645560, "step": 5959 }, { "epoch": 19.830282861896837, "loss": 0.20361432433128357, "loss_ce": 1.0377774515291094e-06, "loss_iou": 0.068359375, "loss_num": 0.013427734375, "loss_xval": 0.203125, "num_input_tokens_seen": 373645560, "step": 5959 }, { "epoch": 19.83361064891847, "grad_norm": 9.787936210632324, "learning_rate": 5e-06, "loss": 0.4751, "num_input_tokens_seen": 373709024, "step": 5960 }, { "epoch": 19.83361064891847, "loss": 0.5836377143859863, "loss_ce": 1.9546328985597938e-05, "loss_iou": 0.263671875, "loss_num": 0.0113525390625, "loss_xval": 0.58203125, "num_input_tokens_seen": 373709024, "step": 5960 }, { "epoch": 19.836938435940098, "grad_norm": 14.564325332641602, "learning_rate": 5e-06, "loss": 0.3869, "num_input_tokens_seen": 373771844, "step": 5961 }, { "epoch": 19.836938435940098, "loss": 0.347270667552948, "loss_ce": 0.00010269758058711886, "loss_iou": 0.134765625, "loss_num": 0.01556396484375, "loss_xval": 0.34765625, "num_input_tokens_seen": 373771844, "step": 5961 }, { "epoch": 19.84026622296173, "grad_norm": 8.838959693908691, "learning_rate": 5e-06, "loss": 0.2968, "num_input_tokens_seen": 373834736, "step": 5962 }, { "epoch": 19.84026622296173, "loss": 0.3352091610431671, "loss_ce": 4.0821983020578045e-06, "loss_iou": 0.1484375, "loss_num": 0.007781982421875, "loss_xval": 0.3359375, "num_input_tokens_seen": 373834736, "step": 5962 }, { "epoch": 19.84359400998336, "grad_norm": 12.448040008544922, "learning_rate": 5e-06, "loss": 0.3457, "num_input_tokens_seen": 373898052, "step": 5963 }, { "epoch": 19.84359400998336, "loss": 0.32984015345573425, "loss_ce": 6.164913429529406e-06, "loss_iou": 0.1357421875, "loss_num": 0.0115966796875, "loss_xval": 0.330078125, "num_input_tokens_seen": 373898052, "step": 5963 }, { "epoch": 19.846921797004992, "grad_norm": 13.118873596191406, "learning_rate": 5e-06, "loss": 0.2996, "num_input_tokens_seen": 373961152, "step": 5964 }, { "epoch": 19.846921797004992, "loss": 0.34421712160110474, "loss_ce": 3.988874959759414e-05, "loss_iou": 0.12158203125, "loss_num": 0.0201416015625, "loss_xval": 0.34375, "num_input_tokens_seen": 373961152, "step": 5964 }, { "epoch": 19.85024958402662, "grad_norm": 22.969758987426758, "learning_rate": 5e-06, "loss": 0.3468, "num_input_tokens_seen": 374023444, "step": 5965 }, { "epoch": 19.85024958402662, "loss": 0.3694780468940735, "loss_ce": 1.7195558257299126e-06, "loss_iou": 0.1572265625, "loss_num": 0.01092529296875, "loss_xval": 0.369140625, "num_input_tokens_seen": 374023444, "step": 5965 }, { "epoch": 19.853577371048253, "grad_norm": 8.422104835510254, "learning_rate": 5e-06, "loss": 0.2751, "num_input_tokens_seen": 374087624, "step": 5966 }, { "epoch": 19.853577371048253, "loss": 0.2878429591655731, "loss_ce": 1.164213699667016e-06, "loss_iou": 0.125, "loss_num": 0.00762939453125, "loss_xval": 0.287109375, "num_input_tokens_seen": 374087624, "step": 5966 }, { "epoch": 19.856905158069882, "grad_norm": 22.539274215698242, "learning_rate": 5e-06, "loss": 0.4816, "num_input_tokens_seen": 374150452, "step": 5967 }, { "epoch": 19.856905158069882, "loss": 0.37963956594467163, "loss_ce": 8.619382469987613e-07, "loss_iou": 0.1337890625, "loss_num": 0.0224609375, "loss_xval": 0.37890625, "num_input_tokens_seen": 374150452, "step": 5967 }, { "epoch": 19.860232945091514, "grad_norm": 61.71273422241211, "learning_rate": 5e-06, "loss": 0.6073, "num_input_tokens_seen": 374214052, "step": 5968 }, { "epoch": 19.860232945091514, "loss": 0.4788842499256134, "loss_ce": 2.434595899103442e-06, "loss_iou": 0.189453125, "loss_num": 0.0201416015625, "loss_xval": 0.478515625, "num_input_tokens_seen": 374214052, "step": 5968 }, { "epoch": 19.863560732113143, "grad_norm": 30.925201416015625, "learning_rate": 5e-06, "loss": 0.4976, "num_input_tokens_seen": 374277044, "step": 5969 }, { "epoch": 19.863560732113143, "loss": 0.5697177052497864, "loss_ce": 1.5558500308543444e-05, "loss_iou": 0.2255859375, "loss_num": 0.0235595703125, "loss_xval": 0.5703125, "num_input_tokens_seen": 374277044, "step": 5969 }, { "epoch": 19.866888519134775, "grad_norm": 18.76418113708496, "learning_rate": 5e-06, "loss": 0.3507, "num_input_tokens_seen": 374339264, "step": 5970 }, { "epoch": 19.866888519134775, "loss": 0.19775456190109253, "loss_ce": 6.606605893466622e-07, "loss_iou": 0.060546875, "loss_num": 0.01531982421875, "loss_xval": 0.197265625, "num_input_tokens_seen": 374339264, "step": 5970 }, { "epoch": 19.870216306156404, "grad_norm": 17.294164657592773, "learning_rate": 5e-06, "loss": 0.4859, "num_input_tokens_seen": 374402876, "step": 5971 }, { "epoch": 19.870216306156404, "loss": 0.4951794147491455, "loss_ce": 1.2200666787975933e-06, "loss_iou": 0.17578125, "loss_num": 0.028564453125, "loss_xval": 0.49609375, "num_input_tokens_seen": 374402876, "step": 5971 }, { "epoch": 19.873544093178037, "grad_norm": 34.1458625793457, "learning_rate": 5e-06, "loss": 0.3994, "num_input_tokens_seen": 374465808, "step": 5972 }, { "epoch": 19.873544093178037, "loss": 0.3089587688446045, "loss_ce": 6.442538960982347e-06, "loss_iou": 0.140625, "loss_num": 0.005615234375, "loss_xval": 0.30859375, "num_input_tokens_seen": 374465808, "step": 5972 }, { "epoch": 19.876871880199666, "grad_norm": 32.3641242980957, "learning_rate": 5e-06, "loss": 0.455, "num_input_tokens_seen": 374527220, "step": 5973 }, { "epoch": 19.876871880199666, "loss": 0.4494633674621582, "loss_ce": 4.770975010615075e-07, "loss_iou": 0.1767578125, "loss_num": 0.0191650390625, "loss_xval": 0.44921875, "num_input_tokens_seen": 374527220, "step": 5973 }, { "epoch": 19.880199667221298, "grad_norm": 22.437557220458984, "learning_rate": 5e-06, "loss": 0.2722, "num_input_tokens_seen": 374589932, "step": 5974 }, { "epoch": 19.880199667221298, "loss": 0.34793248772621155, "loss_ce": 1.5830215716050589e-06, "loss_iou": 0.123046875, "loss_num": 0.0203857421875, "loss_xval": 0.34765625, "num_input_tokens_seen": 374589932, "step": 5974 }, { "epoch": 19.883527454242927, "grad_norm": 22.716068267822266, "learning_rate": 5e-06, "loss": 0.4774, "num_input_tokens_seen": 374652192, "step": 5975 }, { "epoch": 19.883527454242927, "loss": 0.7832956314086914, "loss_ce": 1.0026637937698979e-06, "loss_iou": 0.3359375, "loss_num": 0.0225830078125, "loss_xval": 0.78515625, "num_input_tokens_seen": 374652192, "step": 5975 }, { "epoch": 19.88685524126456, "grad_norm": 24.6044864654541, "learning_rate": 5e-06, "loss": 0.4505, "num_input_tokens_seen": 374715744, "step": 5976 }, { "epoch": 19.88685524126456, "loss": 0.4653342664241791, "loss_ce": 2.245453288196586e-06, "loss_iou": 0.1875, "loss_num": 0.017822265625, "loss_xval": 0.46484375, "num_input_tokens_seen": 374715744, "step": 5976 }, { "epoch": 19.890183028286188, "grad_norm": 30.469343185424805, "learning_rate": 5e-06, "loss": 0.3848, "num_input_tokens_seen": 374777412, "step": 5977 }, { "epoch": 19.890183028286188, "loss": 0.37696754932403564, "loss_ce": 1.4395376638276502e-05, "loss_iou": 0.1044921875, "loss_num": 0.03369140625, "loss_xval": 0.376953125, "num_input_tokens_seen": 374777412, "step": 5977 }, { "epoch": 19.89351081530782, "grad_norm": 20.659252166748047, "learning_rate": 5e-06, "loss": 0.3875, "num_input_tokens_seen": 374840528, "step": 5978 }, { "epoch": 19.89351081530782, "loss": 0.4006204605102539, "loss_ce": 0.00010775611735880375, "loss_iou": 0.16015625, "loss_num": 0.01611328125, "loss_xval": 0.400390625, "num_input_tokens_seen": 374840528, "step": 5978 }, { "epoch": 19.89683860232945, "grad_norm": 14.324573516845703, "learning_rate": 5e-06, "loss": 0.4283, "num_input_tokens_seen": 374902412, "step": 5979 }, { "epoch": 19.89683860232945, "loss": 0.42993268370628357, "loss_ce": 1.0289523970641312e-06, "loss_iou": 0.1484375, "loss_num": 0.0264892578125, "loss_xval": 0.4296875, "num_input_tokens_seen": 374902412, "step": 5979 }, { "epoch": 19.90016638935108, "grad_norm": 4.213141918182373, "learning_rate": 5e-06, "loss": 0.3984, "num_input_tokens_seen": 374964060, "step": 5980 }, { "epoch": 19.90016638935108, "loss": 0.5317395329475403, "loss_ce": 1.2617530273928423e-06, "loss_iou": 0.216796875, "loss_num": 0.01953125, "loss_xval": 0.53125, "num_input_tokens_seen": 374964060, "step": 5980 }, { "epoch": 19.90349417637271, "grad_norm": 17.440887451171875, "learning_rate": 5e-06, "loss": 0.6189, "num_input_tokens_seen": 375026552, "step": 5981 }, { "epoch": 19.90349417637271, "loss": 0.2601938247680664, "loss_ce": 9.577391892889864e-07, "loss_iou": 0.1103515625, "loss_num": 0.0079345703125, "loss_xval": 0.259765625, "num_input_tokens_seen": 375026552, "step": 5981 }, { "epoch": 19.906821963394343, "grad_norm": 13.581063270568848, "learning_rate": 5e-06, "loss": 0.3128, "num_input_tokens_seen": 375088176, "step": 5982 }, { "epoch": 19.906821963394343, "loss": 0.21380716562271118, "loss_ce": 1.0130381724593462e-06, "loss_iou": 0.0869140625, "loss_num": 0.00799560546875, "loss_xval": 0.2138671875, "num_input_tokens_seen": 375088176, "step": 5982 }, { "epoch": 19.91014975041597, "grad_norm": 7.969045639038086, "learning_rate": 5e-06, "loss": 0.3259, "num_input_tokens_seen": 375149860, "step": 5983 }, { "epoch": 19.91014975041597, "loss": 0.37786954641342163, "loss_ce": 8.901351975509897e-07, "loss_iou": 0.1259765625, "loss_num": 0.025390625, "loss_xval": 0.376953125, "num_input_tokens_seen": 375149860, "step": 5983 }, { "epoch": 19.913477537437604, "grad_norm": 6.905434608459473, "learning_rate": 5e-06, "loss": 0.4417, "num_input_tokens_seen": 375212848, "step": 5984 }, { "epoch": 19.913477537437604, "loss": 0.4570320248603821, "loss_ce": 7.853493571019499e-07, "loss_iou": 0.1806640625, "loss_num": 0.01904296875, "loss_xval": 0.45703125, "num_input_tokens_seen": 375212848, "step": 5984 }, { "epoch": 19.916805324459233, "grad_norm": 13.408425331115723, "learning_rate": 5e-06, "loss": 0.3169, "num_input_tokens_seen": 375275640, "step": 5985 }, { "epoch": 19.916805324459233, "loss": 0.17029060423374176, "loss_ce": 2.5121858016063925e-06, "loss_iou": 0.068359375, "loss_num": 0.006744384765625, "loss_xval": 0.169921875, "num_input_tokens_seen": 375275640, "step": 5985 }, { "epoch": 19.920133111480865, "grad_norm": 13.913800239562988, "learning_rate": 5e-06, "loss": 0.3568, "num_input_tokens_seen": 375336508, "step": 5986 }, { "epoch": 19.920133111480865, "loss": 0.34445545077323914, "loss_ce": 3.552397629391635e-06, "loss_iou": 0.1298828125, "loss_num": 0.0169677734375, "loss_xval": 0.34375, "num_input_tokens_seen": 375336508, "step": 5986 }, { "epoch": 19.923460898502494, "grad_norm": 21.173128128051758, "learning_rate": 5e-06, "loss": 0.4202, "num_input_tokens_seen": 375400432, "step": 5987 }, { "epoch": 19.923460898502494, "loss": 0.3517835736274719, "loss_ce": 3.797288081841543e-05, "loss_iou": 0.14453125, "loss_num": 0.01263427734375, "loss_xval": 0.3515625, "num_input_tokens_seen": 375400432, "step": 5987 }, { "epoch": 19.926788685524127, "grad_norm": 24.00170135498047, "learning_rate": 5e-06, "loss": 0.5436, "num_input_tokens_seen": 375463952, "step": 5988 }, { "epoch": 19.926788685524127, "loss": 0.6220543384552002, "loss_ce": 4.503105446929112e-05, "loss_iou": 0.23828125, "loss_num": 0.0291748046875, "loss_xval": 0.62109375, "num_input_tokens_seen": 375463952, "step": 5988 }, { "epoch": 19.930116472545755, "grad_norm": 27.327978134155273, "learning_rate": 5e-06, "loss": 0.4965, "num_input_tokens_seen": 375529084, "step": 5989 }, { "epoch": 19.930116472545755, "loss": 0.4692099392414093, "loss_ce": 2.1756202386313817e-06, "loss_iou": 0.1904296875, "loss_num": 0.0179443359375, "loss_xval": 0.46875, "num_input_tokens_seen": 375529084, "step": 5989 }, { "epoch": 19.933444259567388, "grad_norm": 17.114620208740234, "learning_rate": 5e-06, "loss": 0.3583, "num_input_tokens_seen": 375592236, "step": 5990 }, { "epoch": 19.933444259567388, "loss": 0.479952871799469, "loss_ce": 2.885647518269252e-06, "loss_iou": 0.193359375, "loss_num": 0.0186767578125, "loss_xval": 0.48046875, "num_input_tokens_seen": 375592236, "step": 5990 }, { "epoch": 19.936772046589017, "grad_norm": 10.506176948547363, "learning_rate": 5e-06, "loss": 0.2647, "num_input_tokens_seen": 375653888, "step": 5991 }, { "epoch": 19.936772046589017, "loss": 0.38800451159477234, "loss_ce": 6.504552584374323e-05, "loss_iou": 0.1474609375, "loss_num": 0.0186767578125, "loss_xval": 0.388671875, "num_input_tokens_seen": 375653888, "step": 5991 }, { "epoch": 19.94009983361065, "grad_norm": 20.492334365844727, "learning_rate": 5e-06, "loss": 0.6411, "num_input_tokens_seen": 375719168, "step": 5992 }, { "epoch": 19.94009983361065, "loss": 0.45410874485969543, "loss_ce": 7.179195108619751e-06, "loss_iou": 0.2099609375, "loss_num": 0.00689697265625, "loss_xval": 0.453125, "num_input_tokens_seen": 375719168, "step": 5992 }, { "epoch": 19.943427620632278, "grad_norm": 8.195962905883789, "learning_rate": 5e-06, "loss": 0.3312, "num_input_tokens_seen": 375781416, "step": 5993 }, { "epoch": 19.943427620632278, "loss": 0.41731274127960205, "loss_ce": 0.00105294247623533, "loss_iou": 0.1845703125, "loss_num": 0.00958251953125, "loss_xval": 0.416015625, "num_input_tokens_seen": 375781416, "step": 5993 }, { "epoch": 19.94675540765391, "grad_norm": 8.608583450317383, "learning_rate": 5e-06, "loss": 0.3602, "num_input_tokens_seen": 375844932, "step": 5994 }, { "epoch": 19.94675540765391, "loss": 0.29037630558013916, "loss_ce": 1.532734245301981e-06, "loss_iou": 0.130859375, "loss_num": 0.00592041015625, "loss_xval": 0.291015625, "num_input_tokens_seen": 375844932, "step": 5994 }, { "epoch": 19.950083194675543, "grad_norm": 11.777538299560547, "learning_rate": 5e-06, "loss": 0.374, "num_input_tokens_seen": 375908284, "step": 5995 }, { "epoch": 19.950083194675543, "loss": 0.374403715133667, "loss_ce": 1.4047313015908003e-05, "loss_iou": 0.1533203125, "loss_num": 0.01373291015625, "loss_xval": 0.375, "num_input_tokens_seen": 375908284, "step": 5995 }, { "epoch": 19.95341098169717, "grad_norm": 8.758488655090332, "learning_rate": 5e-06, "loss": 0.4968, "num_input_tokens_seen": 375971164, "step": 5996 }, { "epoch": 19.95341098169717, "loss": 0.5937236547470093, "loss_ce": 4.17258979723556e-06, "loss_iou": 0.2236328125, "loss_num": 0.0291748046875, "loss_xval": 0.59375, "num_input_tokens_seen": 375971164, "step": 5996 }, { "epoch": 19.9567387687188, "grad_norm": 4.934691905975342, "learning_rate": 5e-06, "loss": 0.2414, "num_input_tokens_seen": 376032444, "step": 5997 }, { "epoch": 19.9567387687188, "loss": 0.39141958951950073, "loss_ce": 1.1553570402611513e-06, "loss_iou": 0.138671875, "loss_num": 0.0228271484375, "loss_xval": 0.390625, "num_input_tokens_seen": 376032444, "step": 5997 }, { "epoch": 19.960066555740433, "grad_norm": 9.04544448852539, "learning_rate": 5e-06, "loss": 0.5219, "num_input_tokens_seen": 376096912, "step": 5998 }, { "epoch": 19.960066555740433, "loss": 0.43963688611984253, "loss_ce": 6.744470510966494e-07, "loss_iou": 0.171875, "loss_num": 0.019287109375, "loss_xval": 0.439453125, "num_input_tokens_seen": 376096912, "step": 5998 }, { "epoch": 19.963394342762065, "grad_norm": 11.848540306091309, "learning_rate": 5e-06, "loss": 0.3354, "num_input_tokens_seen": 376160016, "step": 5999 }, { "epoch": 19.963394342762065, "loss": 0.43603867292404175, "loss_ce": 3.5227121770731173e-06, "loss_iou": 0.158203125, "loss_num": 0.0238037109375, "loss_xval": 0.435546875, "num_input_tokens_seen": 376160016, "step": 5999 }, { "epoch": 19.966722129783694, "grad_norm": 5.845351696014404, "learning_rate": 5e-06, "loss": 0.3402, "num_input_tokens_seen": 376222572, "step": 6000 }, { "epoch": 19.966722129783694, "eval_seeclick_CIoU": 0.030241853557527065, "eval_seeclick_GIoU": 0.021268533542752266, "eval_seeclick_IoU": 0.16292262822389603, "eval_seeclick_MAE_all": 0.1780235320329666, "eval_seeclick_MAE_h": 0.0710251796990633, "eval_seeclick_MAE_w": 0.1398494578897953, "eval_seeclick_MAE_x_boxes": 0.21971195936203003, "eval_seeclick_MAE_y_boxes": 0.1913694217801094, "eval_seeclick_NUM_probability": 0.999967485666275, "eval_seeclick_inside_bbox": 0.16250000149011612, "eval_seeclick_loss": 3.0659663677215576, "eval_seeclick_loss_ce": 0.17268741875886917, "eval_seeclick_loss_iou": 0.996337890625, "eval_seeclick_loss_num": 0.17913818359375, "eval_seeclick_loss_xval": 2.888671875, "eval_seeclick_runtime": 67.4335, "eval_seeclick_samples_per_second": 0.697, "eval_seeclick_steps_per_second": 0.03, "num_input_tokens_seen": 376222572, "step": 6000 }, { "epoch": 19.966722129783694, "eval_icons_CIoU": -0.04913701303303242, "eval_icons_GIoU": 0.050909227691590786, "eval_icons_IoU": 0.1190880686044693, "eval_icons_MAE_all": 0.1912556067109108, "eval_icons_MAE_h": 0.15120510756969452, "eval_icons_MAE_w": 0.2189185842871666, "eval_icons_MAE_x_boxes": 0.13626738637685776, "eval_icons_MAE_y_boxes": 0.09074198454618454, "eval_icons_NUM_probability": 0.9999925494194031, "eval_icons_inside_bbox": 0.2204861119389534, "eval_icons_loss": 2.816251277923584, "eval_icons_loss_ce": 1.2755185707646888e-06, "eval_icons_loss_iou": 0.949462890625, "eval_icons_loss_num": 0.191680908203125, "eval_icons_loss_xval": 2.8564453125, "eval_icons_runtime": 74.71, "eval_icons_samples_per_second": 0.669, "eval_icons_steps_per_second": 0.027, "num_input_tokens_seen": 376222572, "step": 6000 }, { "epoch": 19.966722129783694, "eval_screenspot_CIoU": 0.17272637536128363, "eval_screenspot_GIoU": 0.20956078668435416, "eval_screenspot_IoU": 0.2871937155723572, "eval_screenspot_MAE_all": 0.1153217429916064, "eval_screenspot_MAE_h": 0.059355150908231735, "eval_screenspot_MAE_w": 0.10645230983694394, "eval_screenspot_MAE_x_boxes": 0.1564212366938591, "eval_screenspot_MAE_y_boxes": 0.09195773055156072, "eval_screenspot_NUM_probability": 0.99999471505483, "eval_screenspot_inside_bbox": 0.512500007947286, "eval_screenspot_loss": 2.1941778659820557, "eval_screenspot_loss_ce": 6.1881359177580935e-06, "eval_screenspot_loss_iou": 0.8024088541666666, "eval_screenspot_loss_num": 0.12380472819010417, "eval_screenspot_loss_xval": 2.2236328125, "eval_screenspot_runtime": 126.7716, "eval_screenspot_samples_per_second": 0.702, "eval_screenspot_steps_per_second": 0.024, "num_input_tokens_seen": 376222572, "step": 6000 }, { "epoch": 19.966722129783694, "eval_compot_CIoU": 0.18568577617406845, "eval_compot_GIoU": 0.24270299077033997, "eval_compot_IoU": 0.31596729159355164, "eval_compot_MAE_all": 0.12292724847793579, "eval_compot_MAE_h": 0.046858206391334534, "eval_compot_MAE_w": 0.13125669956207275, "eval_compot_MAE_x_boxes": 0.10688621178269386, "eval_compot_MAE_y_boxes": 0.11256765201687813, "eval_compot_NUM_probability": 0.9999963343143463, "eval_compot_inside_bbox": 0.4131944477558136, "eval_compot_loss": 2.1106820106506348, "eval_compot_loss_ce": 0.01103103207424283, "eval_compot_loss_iou": 0.7576904296875, "eval_compot_loss_num": 0.12609291076660156, "eval_compot_loss_xval": 2.146484375, "eval_compot_runtime": 72.2638, "eval_compot_samples_per_second": 0.692, "eval_compot_steps_per_second": 0.028, "num_input_tokens_seen": 376222572, "step": 6000 }, { "epoch": 19.966722129783694, "eval_custom_ui_MAE_all": 0.054079631343483925, "eval_custom_ui_MAE_x": 0.062035854905843735, "eval_custom_ui_MAE_y": 0.046123405918478966, "eval_custom_ui_NUM_probability": 0.9999988079071045, "eval_custom_ui_loss": 0.2610563635826111, "eval_custom_ui_loss_ce": 1.3799589169138926e-06, "eval_custom_ui_loss_num": 0.05348968505859375, "eval_custom_ui_loss_xval": 0.26715087890625, "eval_custom_ui_runtime": 50.4208, "eval_custom_ui_samples_per_second": 0.992, "eval_custom_ui_steps_per_second": 0.04, "num_input_tokens_seen": 376222572, "step": 6000 } ], "logging_steps": 1.0, "max_steps": 15000, "num_input_tokens_seen": 376222572, "num_train_epochs": 50, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.750258633291491e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }