diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6019 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.005490779608342691, + "eval_steps": 500, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0001, + "loss": 3.3985, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001, + "loss": 5.2123, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002, + "loss": 3.8735, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002, + "loss": 2.6185, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001997995991983968, + "loss": 4.2863, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001995991983967936, + "loss": 1.4706, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001993987975951904, + "loss": 3.6384, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019919839679358718, + "loss": 4.1227, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019919839679358718, + "loss": 6.3727, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019899799599198398, + "loss": 3.427, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019879759519038077, + "loss": 3.4727, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019859719438877757, + "loss": 5.3318, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019839679358717434, + "loss": 4.8612, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019839679358717434, + "loss": 2.9335, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019819639278557117, + "loss": 3.2481, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019799599198396794, + "loss": 4.3807, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019779559118236474, + "loss": 8.8975, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019759519038076154, + "loss": 3.0805, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019739478957915834, + "loss": 5.8723, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001971943887775551, + "loss": 4.6326, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019699398797595193, + "loss": 4.7934, + "step": 21 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001967935871743487, + "loss": 3.7, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001965931863727455, + "loss": 1.9477, + "step": 23 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001963927855711423, + "loss": 7.7452, + "step": 24 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001961923847695391, + "loss": 3.4931, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019599198396793587, + "loss": 4.2015, + "step": 26 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019579158316633267, + "loss": 3.0261, + "step": 27 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019559118236472947, + "loss": 1.9957, + "step": 28 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019539078156312627, + "loss": 1.795, + "step": 29 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019519038076152304, + "loss": 1.9667, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019498997995991986, + "loss": 2.3988, + "step": 31 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019478957915831664, + "loss": 3.0434, + "step": 32 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019458917835671343, + "loss": 1.6954, + "step": 33 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019438877755511023, + "loss": 3.5625, + "step": 34 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019418837675350703, + "loss": 3.5388, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019418837675350703, + "loss": 3.8641, + "step": 36 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001939879759519038, + "loss": 2.4463, + "step": 37 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001937875751503006, + "loss": 1.5531, + "step": 38 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001935871743486974, + "loss": 2.904, + "step": 39 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001933867735470942, + "loss": 1.9943, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 0.000193186372745491, + "loss": 8.4639, + "step": 41 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001929859719438878, + "loss": 2.1307, + "step": 42 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019278557114228457, + "loss": 2.5124, + "step": 43 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019258517034068137, + "loss": 4.3622, + "step": 44 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019238476953907816, + "loss": 2.3385, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019218436873747496, + "loss": 2.2836, + "step": 46 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019198396793587173, + "loss": 2.1174, + "step": 47 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019178356713426856, + "loss": 2.8594, + "step": 48 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019158316633266533, + "loss": 4.3321, + "step": 49 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019138276553106213, + "loss": 2.3192, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019118236472945893, + "loss": 0.7275, + "step": 51 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019098196392785573, + "loss": 3.198, + "step": 52 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001907815631262525, + "loss": 3.2683, + "step": 53 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001905811623246493, + "loss": 2.1871, + "step": 54 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001903807615230461, + "loss": 1.5479, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001901803607214429, + "loss": 2.793, + "step": 56 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018997995991983967, + "loss": 2.6259, + "step": 57 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001897795591182365, + "loss": 2.7578, + "step": 58 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018957915831663326, + "loss": 2.0832, + "step": 59 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018937875751503006, + "loss": 4.0156, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018917835671342686, + "loss": 0.0, + "step": 61 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018897795591182366, + "loss": 2.866, + "step": 62 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018877755511022046, + "loss": 2.8362, + "step": 63 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018857715430861726, + "loss": 2.138, + "step": 64 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018837675350701405, + "loss": 1.9499, + "step": 65 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018817635270541083, + "loss": 13.8323, + "step": 66 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018797595190380762, + "loss": 2.2264, + "step": 67 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018777555110220442, + "loss": 3.8097, + "step": 68 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018757515030060122, + "loss": 3.3592, + "step": 69 + }, + { + "epoch": 0.0, + "learning_rate": 0.000187374749498998, + "loss": 2.6868, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018717434869739482, + "loss": 5.958, + "step": 71 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001869739478957916, + "loss": 1.1189, + "step": 72 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001867735470941884, + "loss": 2.9349, + "step": 73 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001865731462925852, + "loss": 4.0328, + "step": 74 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018637274549098199, + "loss": 1.6495, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018617234468937876, + "loss": 1.7545, + "step": 76 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018597194388777556, + "loss": 1.4433, + "step": 77 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018577154308617235, + "loss": 1.2002, + "step": 78 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018557114228456915, + "loss": 1.7638, + "step": 79 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018537074148296592, + "loss": 1.3996, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018517034068136275, + "loss": 2.099, + "step": 81 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018496993987975952, + "loss": 4.7314, + "step": 82 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018476953907815632, + "loss": 3.1169, + "step": 83 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018456913827655312, + "loss": 4.9392, + "step": 84 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018436873747494992, + "loss": 3.1455, + "step": 85 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001841683366733467, + "loss": 0.765, + "step": 86 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018396793587174351, + "loss": 3.067, + "step": 87 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018376753507014029, + "loss": 1.3356, + "step": 88 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018356713426853708, + "loss": 4.5039, + "step": 89 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018336673346693388, + "loss": 2.6412, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018316633266533068, + "loss": 0.5048, + "step": 91 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018296593186372745, + "loss": 1.0112, + "step": 92 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018276553106212425, + "loss": 1.0227, + "step": 93 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018256513026052105, + "loss": 2.4534, + "step": 94 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018236472945891785, + "loss": 2.7282, + "step": 95 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018216432865731462, + "loss": 3.8706, + "step": 96 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018196392785571145, + "loss": 4.2973, + "step": 97 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018176352705410822, + "loss": 2.874, + "step": 98 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018156312625250502, + "loss": 3.4857, + "step": 99 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018136272545090181, + "loss": 2.1494, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001811623246492986, + "loss": 3.3004, + "step": 101 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018096192384769538, + "loss": 0.0, + "step": 102 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001807615230460922, + "loss": 2.0755, + "step": 103 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018056112224448898, + "loss": 2.339, + "step": 104 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018036072144288578, + "loss": 3.2574, + "step": 105 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018016032064128258, + "loss": 2.515, + "step": 106 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017995991983967938, + "loss": 2.4068, + "step": 107 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017975951903807615, + "loss": 2.46, + "step": 108 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017955911823647295, + "loss": 3.7409, + "step": 109 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017935871743486975, + "loss": 3.1791, + "step": 110 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017915831663326654, + "loss": 2.4718, + "step": 111 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017895791583166331, + "loss": 4.7393, + "step": 112 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017875751503006014, + "loss": 1.1858, + "step": 113 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001785571142284569, + "loss": 1.3025, + "step": 114 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001783567134268537, + "loss": 2.6866, + "step": 115 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001781563126252505, + "loss": 1.636, + "step": 116 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001779559118236473, + "loss": 3.4094, + "step": 117 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001777555110220441, + "loss": 2.6015, + "step": 118 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017755511022044088, + "loss": 2.0884, + "step": 119 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001773547094188377, + "loss": 0.0, + "step": 120 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017715430861723447, + "loss": 0.4421, + "step": 121 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017695390781563127, + "loss": 2.1163, + "step": 122 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017675350701402807, + "loss": 4.7788, + "step": 123 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017655310621242487, + "loss": 3.7254, + "step": 124 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017635270541082164, + "loss": 6.1688, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017615230460921847, + "loss": 2.2818, + "step": 126 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017595190380761524, + "loss": 2.9417, + "step": 127 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017575150300601204, + "loss": 0.0, + "step": 128 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017555110220440884, + "loss": 2.0813, + "step": 129 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017535070140280563, + "loss": 0.0, + "step": 130 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001751503006012024, + "loss": 2.4927, + "step": 131 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001749498997995992, + "loss": 5.145, + "step": 132 + }, + { + "epoch": 0.0, + "learning_rate": 0.000174749498997996, + "loss": 0.9084, + "step": 133 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001745490981963928, + "loss": 1.7761, + "step": 134 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017434869739478957, + "loss": 3.2621, + "step": 135 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001741482965931864, + "loss": 3.3788, + "step": 136 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017394789579158317, + "loss": 2.54, + "step": 137 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017374749498997997, + "loss": 2.1147, + "step": 138 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017354709418837677, + "loss": 4.4466, + "step": 139 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017334669338677357, + "loss": 1.6063, + "step": 140 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017314629258517034, + "loss": 2.522, + "step": 141 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017294589178356714, + "loss": 4.5326, + "step": 142 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017274549098196393, + "loss": 3.0165, + "step": 143 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017254509018036073, + "loss": 2.465, + "step": 144 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001723446893787575, + "loss": 0.914, + "step": 145 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017214428857715433, + "loss": 3.3662, + "step": 146 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001719438877755511, + "loss": 4.086, + "step": 147 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001717434869739479, + "loss": 1.5515, + "step": 148 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001715430861723447, + "loss": 0.9641, + "step": 149 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001713426853707415, + "loss": 3.3465, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017114228456913827, + "loss": 2.2881, + "step": 151 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001709418837675351, + "loss": 2.9916, + "step": 152 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017074148296593187, + "loss": 7.9494, + "step": 153 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017054108216432866, + "loss": 1.7766, + "step": 154 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017034068136272546, + "loss": 3.882, + "step": 155 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017014028056112226, + "loss": 1.26, + "step": 156 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016993987975951903, + "loss": 2.6855, + "step": 157 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016973947895791583, + "loss": 2.751, + "step": 158 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016953907815631263, + "loss": 4.4908, + "step": 159 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016933867735470943, + "loss": 4.7742, + "step": 160 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001691382765531062, + "loss": 1.0128, + "step": 161 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016893787575150303, + "loss": 0.7312, + "step": 162 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001687374749498998, + "loss": 2.5514, + "step": 163 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001685370741482966, + "loss": 3.6529, + "step": 164 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001683366733466934, + "loss": 2.0932, + "step": 165 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001681362725450902, + "loss": 3.7294, + "step": 166 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016793587174348696, + "loss": 5.3816, + "step": 167 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001677354709418838, + "loss": 4.4184, + "step": 168 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016753507014028056, + "loss": 4.5827, + "step": 169 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016733466933867736, + "loss": 2.6326, + "step": 170 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016713426853707416, + "loss": 2.4652, + "step": 171 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016693386773547096, + "loss": 1.7369, + "step": 172 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016673346693386773, + "loss": 2.3528, + "step": 173 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016653306613226453, + "loss": 2.4526, + "step": 174 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016653306613226453, + "loss": 1.8607, + "step": 175 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016633266533066135, + "loss": 0.0, + "step": 176 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016613226452905812, + "loss": 4.1204, + "step": 177 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016593186372745492, + "loss": 2.2052, + "step": 178 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016573146292585172, + "loss": 1.4778, + "step": 179 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016553106212424852, + "loss": 3.0413, + "step": 180 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001653306613226453, + "loss": 1.8595, + "step": 181 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001651302605210421, + "loss": 5.2543, + "step": 182 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001649298597194389, + "loss": 5.0545, + "step": 183 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001647294589178357, + "loss": 0.6023, + "step": 184 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016452905811623246, + "loss": 2.3329, + "step": 185 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016432865731462928, + "loss": 1.4628, + "step": 186 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016412825651302606, + "loss": 1.9968, + "step": 187 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016392785571142285, + "loss": 2.8452, + "step": 188 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016372745490981965, + "loss": 1.7714, + "step": 189 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016352705410821645, + "loss": 1.5021, + "step": 190 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016332665330661322, + "loss": 1.4912, + "step": 191 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016312625250501005, + "loss": 0.3812, + "step": 192 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016292585170340682, + "loss": 5.2674, + "step": 193 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016272545090180362, + "loss": 0.8585, + "step": 194 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016252505010020042, + "loss": 0.5154, + "step": 195 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016232464929859721, + "loss": 1.394, + "step": 196 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016212424849699399, + "loss": 1.8194, + "step": 197 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016192384769539078, + "loss": 2.0729, + "step": 198 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016172344689378758, + "loss": 3.8512, + "step": 199 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016152304609218438, + "loss": 2.1289, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016132264529058115, + "loss": 4.1014, + "step": 201 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016112224448897798, + "loss": 2.5436, + "step": 202 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016092184368737475, + "loss": 3.015, + "step": 203 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016072144288577155, + "loss": 0.463, + "step": 204 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016052104208416835, + "loss": 2.2814, + "step": 205 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016032064128256515, + "loss": 3.4467, + "step": 206 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016012024048096192, + "loss": 3.9723, + "step": 207 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015991983967935872, + "loss": 3.7387, + "step": 208 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015971943887775551, + "loss": 3.9553, + "step": 209 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001595190380761523, + "loss": 4.4628, + "step": 210 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001593186372745491, + "loss": 2.0064, + "step": 211 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001591182364729459, + "loss": 2.5059, + "step": 212 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015891783567134268, + "loss": 2.3503, + "step": 213 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015871743486973948, + "loss": 2.9199, + "step": 214 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015851703406813628, + "loss": 2.8497, + "step": 215 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015831663326653308, + "loss": 1.9105, + "step": 216 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015811623246492985, + "loss": 0.0, + "step": 217 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015791583166332667, + "loss": 3.1217, + "step": 218 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015771543086172345, + "loss": 3.2305, + "step": 219 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015751503006012024, + "loss": 2.5012, + "step": 220 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015731462925851704, + "loss": 2.7481, + "step": 221 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015711422845691384, + "loss": 2.8643, + "step": 222 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001569138276553106, + "loss": 0.0, + "step": 223 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001567134268537074, + "loss": 2.2062, + "step": 224 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001565130260521042, + "loss": 2.913, + "step": 225 + }, + { + "epoch": 0.0, + "learning_rate": 0.000156312625250501, + "loss": 7.0962, + "step": 226 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015611222444889778, + "loss": 3.1977, + "step": 227 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001559118236472946, + "loss": 0.4999, + "step": 228 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015571142284569138, + "loss": 1.128, + "step": 229 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015551102204408818, + "loss": 1.7394, + "step": 230 + }, + { + "epoch": 0.0, + "learning_rate": 0.000155310621242485, + "loss": 2.6111, + "step": 231 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015511022044088177, + "loss": 1.8315, + "step": 232 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015490981963927857, + "loss": 1.1312, + "step": 233 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015470941883767537, + "loss": 1.6202, + "step": 234 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015450901803607217, + "loss": 0.6677, + "step": 235 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015430861723446894, + "loss": 2.2805, + "step": 236 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015410821643286574, + "loss": 2.1234, + "step": 237 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015390781563126254, + "loss": 1.4029, + "step": 238 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015370741482965934, + "loss": 1.8568, + "step": 239 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001535070140280561, + "loss": 2.4828, + "step": 240 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015330661322645293, + "loss": 2.0113, + "step": 241 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001531062124248497, + "loss": 0.8511, + "step": 242 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001529058116232465, + "loss": 1.4795, + "step": 243 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001527054108216433, + "loss": 3.0378, + "step": 244 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001525050100200401, + "loss": 1.9997, + "step": 245 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015230460921843687, + "loss": 1.7813, + "step": 246 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015210420841683367, + "loss": 0.0, + "step": 247 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015190380761523047, + "loss": 2.7432, + "step": 248 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015170340681362727, + "loss": 1.6375, + "step": 249 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015150300601202404, + "loss": 1.6442, + "step": 250 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015130260521042086, + "loss": 3.8107, + "step": 251 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015110220440881764, + "loss": 2.3884, + "step": 252 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015090180360721443, + "loss": 1.256, + "step": 253 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015070140280561123, + "loss": 2.6774, + "step": 254 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015050100200400803, + "loss": 4.4725, + "step": 255 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001503006012024048, + "loss": 1.8188, + "step": 256 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015010020040080163, + "loss": 2.8957, + "step": 257 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001498997995991984, + "loss": 4.4914, + "step": 258 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001496993987975952, + "loss": 1.7564, + "step": 259 + }, + { + "epoch": 0.0, + "learning_rate": 0.000149498997995992, + "loss": 2.175, + "step": 260 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001492985971943888, + "loss": 0.4855, + "step": 261 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014909819639278557, + "loss": 0.7024, + "step": 262 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014889779559118237, + "loss": 2.941, + "step": 263 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014869739478957916, + "loss": 2.8929, + "step": 264 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014849699398797596, + "loss": 4.9599, + "step": 265 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014829659318637273, + "loss": 3.2604, + "step": 266 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014809619238476956, + "loss": 3.0037, + "step": 267 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014789579158316633, + "loss": 2.6023, + "step": 268 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014769539078156313, + "loss": 0.0, + "step": 269 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014749498997995993, + "loss": 4.7965, + "step": 270 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014729458917835673, + "loss": 0.8059, + "step": 271 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001470941883767535, + "loss": 0.7748, + "step": 272 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001468937875751503, + "loss": 2.2642, + "step": 273 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001466933867735471, + "loss": 2.9185, + "step": 274 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001464929859719439, + "loss": 2.4913, + "step": 275 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001462925851703407, + "loss": 3.3304, + "step": 276 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001460921843687375, + "loss": 1.5842, + "step": 277 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014589178356713426, + "loss": 2.2768, + "step": 278 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014569138276553106, + "loss": 2.129, + "step": 279 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014549098196392786, + "loss": 1.7883, + "step": 280 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014529058116232466, + "loss": 2.8672, + "step": 281 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014509018036072143, + "loss": 1.6781, + "step": 282 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014488977955911825, + "loss": 2.321, + "step": 283 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014468937875751503, + "loss": 1.2013, + "step": 284 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014448897795591182, + "loss": 2.5649, + "step": 285 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014428857715430862, + "loss": 1.9453, + "step": 286 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014408817635270542, + "loss": 0.32, + "step": 287 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014388777555110222, + "loss": 3.1301, + "step": 288 + }, + { + "epoch": 0.0, + "learning_rate": 0.000143687374749499, + "loss": 0.4875, + "step": 289 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014348697394789582, + "loss": 2.9447, + "step": 290 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001432865731462926, + "loss": 0.8736, + "step": 291 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001430861723446894, + "loss": 0.5231, + "step": 292 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014288577154308619, + "loss": 2.8954, + "step": 293 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014268537074148298, + "loss": 1.6437, + "step": 294 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014248496993987976, + "loss": 7.0932, + "step": 295 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014228456913827658, + "loss": 0.2355, + "step": 296 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014208416833667335, + "loss": 2.0368, + "step": 297 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014188376753507015, + "loss": 8.2179, + "step": 298 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014168336673346695, + "loss": 4.559, + "step": 299 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014148296593186375, + "loss": 2.3565, + "step": 300 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014128256513026052, + "loss": 3.1562, + "step": 301 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014108216432865732, + "loss": 0.9561, + "step": 302 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014088176352705412, + "loss": 2.3135, + "step": 303 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014068136272545092, + "loss": 4.5708, + "step": 304 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001404809619238477, + "loss": 0.6331, + "step": 305 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001402805611222445, + "loss": 1.2219, + "step": 306 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014008016032064128, + "loss": 2.4206, + "step": 307 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013987975951903808, + "loss": 1.0911, + "step": 308 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013967935871743488, + "loss": 3.7003, + "step": 309 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013947895791583168, + "loss": 1.7502, + "step": 310 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013927855711422845, + "loss": 1.6085, + "step": 311 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013907815631262525, + "loss": 0.0, + "step": 312 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013887775551102205, + "loss": 3.5848, + "step": 313 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013867735470941885, + "loss": 4.0407, + "step": 314 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013847695390781562, + "loss": 2.6365, + "step": 315 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013827655310621244, + "loss": 2.0709, + "step": 316 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013807615230460922, + "loss": 2.6688, + "step": 317 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013787575150300601, + "loss": 1.7493, + "step": 318 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001376753507014028, + "loss": 2.1693, + "step": 319 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001374749498997996, + "loss": 5.6072, + "step": 320 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013727454909819638, + "loss": 2.1042, + "step": 321 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001370741482965932, + "loss": 0.7863, + "step": 322 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013687374749498998, + "loss": 2.1696, + "step": 323 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013667334669338678, + "loss": 2.7226, + "step": 324 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013647294589178358, + "loss": 2.0276, + "step": 325 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013627254509018038, + "loss": 2.3375, + "step": 326 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013607214428857715, + "loss": 1.1399, + "step": 327 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013587174348697395, + "loss": 1.5908, + "step": 328 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013567134268537074, + "loss": 2.9017, + "step": 329 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013547094188376754, + "loss": 2.0847, + "step": 330 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013527054108216431, + "loss": 2.0466, + "step": 331 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013507014028056114, + "loss": 3.1415, + "step": 332 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001348697394789579, + "loss": 1.3235, + "step": 333 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001346693386773547, + "loss": 2.4393, + "step": 334 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001344689378757515, + "loss": 0.7366, + "step": 335 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001342685370741483, + "loss": 1.5512, + "step": 336 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013406813627254508, + "loss": 0.9189, + "step": 337 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001338677354709419, + "loss": 0.6407, + "step": 338 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013366733466933868, + "loss": 2.403, + "step": 339 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013346693386773547, + "loss": 1.9463, + "step": 340 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013326653306613227, + "loss": 3.963, + "step": 341 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013306613226452907, + "loss": 1.1293, + "step": 342 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013286573146292587, + "loss": 2.3127, + "step": 343 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013266533066132264, + "loss": 1.5872, + "step": 344 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013246492985971947, + "loss": 2.3619, + "step": 345 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013226452905811624, + "loss": 2.153, + "step": 346 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013206412825651304, + "loss": 0.8323, + "step": 347 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013186372745490984, + "loss": 0.0, + "step": 348 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013166332665330663, + "loss": 2.4433, + "step": 349 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001314629258517034, + "loss": 2.5312, + "step": 350 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001312625250501002, + "loss": 1.7814, + "step": 351 + }, + { + "epoch": 0.0, + "learning_rate": 0.000131062124248497, + "loss": 2.5383, + "step": 352 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001308617234468938, + "loss": 1.2516, + "step": 353 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013066132264529057, + "loss": 6.2541, + "step": 354 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001304609218436874, + "loss": 1.3022, + "step": 355 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013026052104208417, + "loss": 2.8096, + "step": 356 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013006012024048097, + "loss": 2.0949, + "step": 357 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012985971943887777, + "loss": 3.4624, + "step": 358 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012965931863727457, + "loss": 2.9193, + "step": 359 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012945891783567134, + "loss": 3.0689, + "step": 360 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012925851703406816, + "loss": 2.0878, + "step": 361 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012905811623246493, + "loss": 2.718, + "step": 362 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012885771543086173, + "loss": 0.9596, + "step": 363 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012865731462925853, + "loss": 1.3671, + "step": 364 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012845691382765533, + "loss": 3.5474, + "step": 365 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001282565130260521, + "loss": 3.185, + "step": 366 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001280561122244489, + "loss": 1.195, + "step": 367 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001278557114228457, + "loss": 2.8428, + "step": 368 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001276553106212425, + "loss": 1.6981, + "step": 369 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012745490981963927, + "loss": 1.6565, + "step": 370 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001272545090180361, + "loss": 3.5342, + "step": 371 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012705410821643286, + "loss": 1.9243, + "step": 372 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012685370741482966, + "loss": 2.4072, + "step": 373 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012665330661322646, + "loss": 2.0842, + "step": 374 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012645290581162326, + "loss": 1.2166, + "step": 375 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012625250501002003, + "loss": 0.0, + "step": 376 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012605210420841683, + "loss": 2.2686, + "step": 377 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012585170340681363, + "loss": 2.9105, + "step": 378 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012565130260521043, + "loss": 1.7398, + "step": 379 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001254509018036072, + "loss": 1.2882, + "step": 380 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012525050100200402, + "loss": 4.7763, + "step": 381 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001250501002004008, + "loss": 1.5265, + "step": 382 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001248496993987976, + "loss": 1.154, + "step": 383 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001246492985971944, + "loss": 2.6431, + "step": 384 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001244488977955912, + "loss": 4.251, + "step": 385 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012424849699398796, + "loss": 1.8364, + "step": 386 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001240480961923848, + "loss": 1.7506, + "step": 387 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012384769539078156, + "loss": 1.3577, + "step": 388 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012364729458917836, + "loss": 5.6636, + "step": 389 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012344689378757516, + "loss": 4.8805, + "step": 390 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012324649298597196, + "loss": 1.8715, + "step": 391 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012304609218436873, + "loss": 1.4509, + "step": 392 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012284569138276553, + "loss": 2.9912, + "step": 393 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012264529058116232, + "loss": 0.7702, + "step": 394 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012244488977955912, + "loss": 5.1749, + "step": 395 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001222444889779559, + "loss": 1.7929, + "step": 396 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001220440881763527, + "loss": 3.7588, + "step": 397 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012184368737474952, + "loss": 1.5476, + "step": 398 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001216432865731463, + "loss": 0.9982, + "step": 399 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001214428857715431, + "loss": 0.9221, + "step": 400 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012124248496993989, + "loss": 1.7868, + "step": 401 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012104208416833669, + "loss": 1.0556, + "step": 402 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012084168336673347, + "loss": 3.1609, + "step": 403 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012064128256513027, + "loss": 2.0551, + "step": 404 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012044088176352705, + "loss": 2.2839, + "step": 405 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012024048096192387, + "loss": 2.6195, + "step": 406 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012004008016032065, + "loss": 2.4274, + "step": 407 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011983967935871745, + "loss": 3.065, + "step": 408 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011963927855711424, + "loss": 2.9908, + "step": 409 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011943887775551103, + "loss": 2.584, + "step": 410 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011923847695390782, + "loss": 6.5406, + "step": 411 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011903807615230462, + "loss": 2.2507, + "step": 412 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001188376753507014, + "loss": 1.9925, + "step": 413 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011863727454909821, + "loss": 2.1832, + "step": 414 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011843687374749499, + "loss": 2.9761, + "step": 415 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001182364729458918, + "loss": 3.3134, + "step": 416 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011803607214428858, + "loss": 3.6563, + "step": 417 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011783567134268538, + "loss": 0.0, + "step": 418 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011763527054108217, + "loss": 2.8706, + "step": 419 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011743486973947896, + "loss": 1.4058, + "step": 420 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011723446893787575, + "loss": 3.0397, + "step": 421 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011703406813627256, + "loss": 0.4962, + "step": 422 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011683366733466933, + "loss": 1.8918, + "step": 423 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011663326653306615, + "loss": 1.6106, + "step": 424 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011643286573146293, + "loss": 1.6681, + "step": 425 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011623246492985973, + "loss": 2.438, + "step": 426 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011603206412825651, + "loss": 1.8013, + "step": 427 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011583166332665331, + "loss": 1.6599, + "step": 428 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001156312625250501, + "loss": 2.4527, + "step": 429 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011543086172344691, + "loss": 2.6056, + "step": 430 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011523046092184368, + "loss": 2.3085, + "step": 431 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011503006012024049, + "loss": 3.2212, + "step": 432 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011482965931863728, + "loss": 4.1987, + "step": 433 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011462925851703408, + "loss": 0.0, + "step": 434 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011442885771543086, + "loss": 2.9687, + "step": 435 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011422845691382766, + "loss": 2.8237, + "step": 436 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011402805611222445, + "loss": 1.7948, + "step": 437 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011382765531062126, + "loss": 0.863, + "step": 438 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011362725450901803, + "loss": 2.4803, + "step": 439 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011342685370741484, + "loss": 2.0552, + "step": 440 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011322645290581163, + "loss": 1.6845, + "step": 441 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011302605210420842, + "loss": 1.8881, + "step": 442 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011282565130260521, + "loss": 1.3237, + "step": 443 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011262525050100201, + "loss": 1.1419, + "step": 444 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011242484969939879, + "loss": 2.5045, + "step": 445 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011222444889779559, + "loss": 1.9609, + "step": 446 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011202404809619238, + "loss": 0.6873, + "step": 447 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011182364729458919, + "loss": 2.2377, + "step": 448 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011162324649298596, + "loss": 2.5531, + "step": 449 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011142284569138277, + "loss": 1.0334, + "step": 450 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011122244488977956, + "loss": 1.8675, + "step": 451 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011102204408817636, + "loss": 1.9346, + "step": 452 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011082164328657317, + "loss": 3.4389, + "step": 453 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011062124248496994, + "loss": 2.9012, + "step": 454 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011042084168336675, + "loss": 1.0503, + "step": 455 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011022044088176354, + "loss": 2.3487, + "step": 456 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011002004008016033, + "loss": 1.5353, + "step": 457 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010981963927855712, + "loss": 2.6252, + "step": 458 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010961923847695392, + "loss": 1.8809, + "step": 459 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001094188376753507, + "loss": 0.2392, + "step": 460 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010921843687374752, + "loss": 2.9035, + "step": 461 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010901803607214429, + "loss": 1.3167, + "step": 462 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001088176352705411, + "loss": 1.9923, + "step": 463 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010861723446893788, + "loss": 2.0446, + "step": 464 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010841683366733468, + "loss": 4.3443, + "step": 465 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010821643286573147, + "loss": 2.5066, + "step": 466 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010801603206412827, + "loss": 2.0386, + "step": 467 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010781563126252505, + "loss": 1.5045, + "step": 468 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010761523046092185, + "loss": 2.1218, + "step": 469 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010741482965931863, + "loss": 3.0638, + "step": 470 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010721442885771545, + "loss": 2.0932, + "step": 471 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010701402805611223, + "loss": 1.5917, + "step": 472 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010681362725450903, + "loss": 1.4797, + "step": 473 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010661322645290582, + "loss": 2.1382, + "step": 474 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010641282565130261, + "loss": 3.9274, + "step": 475 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001062124248496994, + "loss": 2.8679, + "step": 476 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001060120240480962, + "loss": 2.3778, + "step": 477 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010581162324649298, + "loss": 1.6275, + "step": 478 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001056112224448898, + "loss": 2.3191, + "step": 479 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010541082164328657, + "loss": 2.7971, + "step": 480 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010521042084168338, + "loss": 1.9046, + "step": 481 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010501002004008016, + "loss": 1.7607, + "step": 482 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010480961923847696, + "loss": 2.2909, + "step": 483 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010460921843687375, + "loss": 1.6937, + "step": 484 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010440881763527055, + "loss": 2.3124, + "step": 485 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010420841683366733, + "loss": 1.0625, + "step": 486 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010400801603206414, + "loss": 1.8987, + "step": 487 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010380761523046091, + "loss": 0.7515, + "step": 488 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010360721442885773, + "loss": 1.4015, + "step": 489 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010340681362725451, + "loss": 0.0, + "step": 490 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010320641282565131, + "loss": 2.2455, + "step": 491 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001030060120240481, + "loss": 1.5684, + "step": 492 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010280561122244489, + "loss": 0.4304, + "step": 493 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010260521042084168, + "loss": 1.1205, + "step": 494 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010240480961923849, + "loss": 5.8101, + "step": 495 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010220440881763526, + "loss": 2.3707, + "step": 496 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010200400801603207, + "loss": 4.4205, + "step": 497 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010180360721442886, + "loss": 1.9965, + "step": 498 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010160320641282566, + "loss": 2.1804, + "step": 499 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010140280561122244, + "loss": 2.7072, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010120240480961924, + "loss": 0.3631, + "step": 501 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010100200400801603, + "loss": 1.192, + "step": 502 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010080160320641284, + "loss": 1.4428, + "step": 503 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010060120240480961, + "loss": 1.4366, + "step": 504 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010040080160320642, + "loss": 2.4546, + "step": 505 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001002004008016032, + "loss": 1.6439, + "step": 506 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001, + "loss": 1.9219, + "step": 507 + }, + { + "epoch": 0.0, + "learning_rate": 9.97995991983968e-05, + "loss": 1.5569, + "step": 508 + }, + { + "epoch": 0.0, + "learning_rate": 9.959919839679359e-05, + "loss": 3.5738, + "step": 509 + }, + { + "epoch": 0.0, + "learning_rate": 9.939879759519039e-05, + "loss": 6.3032, + "step": 510 + }, + { + "epoch": 0.0, + "learning_rate": 9.919839679358717e-05, + "loss": 0.0, + "step": 511 + }, + { + "epoch": 0.0, + "learning_rate": 9.899799599198397e-05, + "loss": 2.6412, + "step": 512 + }, + { + "epoch": 0.0, + "learning_rate": 9.879759519038077e-05, + "loss": 2.0273, + "step": 513 + }, + { + "epoch": 0.0, + "learning_rate": 9.859719438877755e-05, + "loss": 0.0, + "step": 514 + }, + { + "epoch": 0.0, + "learning_rate": 9.839679358717435e-05, + "loss": 1.99, + "step": 515 + }, + { + "epoch": 0.0, + "learning_rate": 9.819639278557115e-05, + "loss": 0.9392, + "step": 516 + }, + { + "epoch": 0.0, + "learning_rate": 9.799599198396794e-05, + "loss": 1.0164, + "step": 517 + }, + { + "epoch": 0.0, + "learning_rate": 9.779559118236473e-05, + "loss": 2.5598, + "step": 518 + }, + { + "epoch": 0.0, + "learning_rate": 9.759519038076152e-05, + "loss": 5.3794, + "step": 519 + }, + { + "epoch": 0.0, + "learning_rate": 9.739478957915832e-05, + "loss": 2.5614, + "step": 520 + }, + { + "epoch": 0.0, + "learning_rate": 9.719438877755512e-05, + "loss": 0.7221, + "step": 521 + }, + { + "epoch": 0.0, + "learning_rate": 9.69939879759519e-05, + "loss": 1.4231, + "step": 522 + }, + { + "epoch": 0.0, + "learning_rate": 9.67935871743487e-05, + "loss": 1.3288, + "step": 523 + }, + { + "epoch": 0.0, + "learning_rate": 9.65931863727455e-05, + "loss": 5.2266, + "step": 524 + }, + { + "epoch": 0.0, + "learning_rate": 9.639278557114228e-05, + "loss": 2.4147, + "step": 525 + }, + { + "epoch": 0.0, + "learning_rate": 9.619238476953908e-05, + "loss": 1.1246, + "step": 526 + }, + { + "epoch": 0.0, + "learning_rate": 9.599198396793587e-05, + "loss": 0.0, + "step": 527 + }, + { + "epoch": 0.0, + "learning_rate": 9.579158316633267e-05, + "loss": 4.951, + "step": 528 + }, + { + "epoch": 0.0, + "learning_rate": 9.559118236472946e-05, + "loss": 2.6102, + "step": 529 + }, + { + "epoch": 0.0, + "learning_rate": 9.539078156312625e-05, + "loss": 1.9989, + "step": 530 + }, + { + "epoch": 0.0, + "learning_rate": 9.519038076152305e-05, + "loss": 2.2009, + "step": 531 + }, + { + "epoch": 0.0, + "learning_rate": 9.498997995991983e-05, + "loss": 2.7023, + "step": 532 + }, + { + "epoch": 0.0, + "learning_rate": 9.478957915831663e-05, + "loss": 2.3955, + "step": 533 + }, + { + "epoch": 0.0, + "learning_rate": 9.458917835671343e-05, + "loss": 2.43, + "step": 534 + }, + { + "epoch": 0.0, + "learning_rate": 9.438877755511023e-05, + "loss": 1.8758, + "step": 535 + }, + { + "epoch": 0.0, + "learning_rate": 9.418837675350703e-05, + "loss": 1.45, + "step": 536 + }, + { + "epoch": 0.0, + "learning_rate": 9.398797595190381e-05, + "loss": 3.1603, + "step": 537 + }, + { + "epoch": 0.0, + "learning_rate": 9.378757515030061e-05, + "loss": 4.395, + "step": 538 + }, + { + "epoch": 0.0, + "learning_rate": 9.358717434869741e-05, + "loss": 1.6075, + "step": 539 + }, + { + "epoch": 0.0, + "learning_rate": 9.33867735470942e-05, + "loss": 0.9112, + "step": 540 + }, + { + "epoch": 0.0, + "learning_rate": 9.318637274549099e-05, + "loss": 3.9706, + "step": 541 + }, + { + "epoch": 0.0, + "learning_rate": 9.298597194388778e-05, + "loss": 1.1322, + "step": 542 + }, + { + "epoch": 0.0, + "learning_rate": 9.278557114228458e-05, + "loss": 4.9835, + "step": 543 + }, + { + "epoch": 0.0, + "learning_rate": 9.258517034068137e-05, + "loss": 2.3816, + "step": 544 + }, + { + "epoch": 0.0, + "learning_rate": 9.238476953907816e-05, + "loss": 1.7184, + "step": 545 + }, + { + "epoch": 0.0, + "learning_rate": 9.218436873747496e-05, + "loss": 1.9735, + "step": 546 + }, + { + "epoch": 0.0, + "learning_rate": 9.198396793587176e-05, + "loss": 2.2694, + "step": 547 + }, + { + "epoch": 0.0, + "learning_rate": 9.178356713426854e-05, + "loss": 2.1059, + "step": 548 + }, + { + "epoch": 0.0, + "learning_rate": 9.158316633266534e-05, + "loss": 1.2872, + "step": 549 + }, + { + "epoch": 0.0, + "learning_rate": 9.138276553106213e-05, + "loss": 1.6778, + "step": 550 + }, + { + "epoch": 0.0, + "learning_rate": 9.118236472945892e-05, + "loss": 3.3389, + "step": 551 + }, + { + "epoch": 0.0, + "learning_rate": 9.098196392785572e-05, + "loss": 1.7216, + "step": 552 + }, + { + "epoch": 0.0, + "learning_rate": 9.078156312625251e-05, + "loss": 1.596, + "step": 553 + }, + { + "epoch": 0.0, + "learning_rate": 9.05811623246493e-05, + "loss": 1.4005, + "step": 554 + }, + { + "epoch": 0.0, + "learning_rate": 9.03807615230461e-05, + "loss": 2.4382, + "step": 555 + }, + { + "epoch": 0.0, + "learning_rate": 9.018036072144289e-05, + "loss": 4.276, + "step": 556 + }, + { + "epoch": 0.0, + "learning_rate": 8.997995991983969e-05, + "loss": 2.6499, + "step": 557 + }, + { + "epoch": 0.0, + "learning_rate": 8.977955911823647e-05, + "loss": 3.9086, + "step": 558 + }, + { + "epoch": 0.0, + "learning_rate": 8.957915831663327e-05, + "loss": 3.3864, + "step": 559 + }, + { + "epoch": 0.0, + "learning_rate": 8.937875751503007e-05, + "loss": 7.2937, + "step": 560 + }, + { + "epoch": 0.0, + "learning_rate": 8.917835671342686e-05, + "loss": 1.8136, + "step": 561 + }, + { + "epoch": 0.0, + "learning_rate": 8.897795591182365e-05, + "loss": 2.7177, + "step": 562 + }, + { + "epoch": 0.0, + "learning_rate": 8.877755511022044e-05, + "loss": 3.3096, + "step": 563 + }, + { + "epoch": 0.0, + "learning_rate": 8.857715430861724e-05, + "loss": 3.5284, + "step": 564 + }, + { + "epoch": 0.0, + "learning_rate": 8.837675350701404e-05, + "loss": 1.853, + "step": 565 + }, + { + "epoch": 0.0, + "learning_rate": 8.817635270541082e-05, + "loss": 3.3489, + "step": 566 + }, + { + "epoch": 0.0, + "learning_rate": 8.797595190380762e-05, + "loss": 2.4069, + "step": 567 + }, + { + "epoch": 0.0, + "learning_rate": 8.777555110220442e-05, + "loss": 1.3353, + "step": 568 + }, + { + "epoch": 0.0, + "learning_rate": 8.75751503006012e-05, + "loss": 1.1441, + "step": 569 + }, + { + "epoch": 0.0, + "learning_rate": 8.7374749498998e-05, + "loss": 1.2785, + "step": 570 + }, + { + "epoch": 0.0, + "learning_rate": 8.717434869739479e-05, + "loss": 1.0496, + "step": 571 + }, + { + "epoch": 0.0, + "learning_rate": 8.697394789579159e-05, + "loss": 1.8968, + "step": 572 + }, + { + "epoch": 0.0, + "learning_rate": 8.677354709418838e-05, + "loss": 2.4824, + "step": 573 + }, + { + "epoch": 0.0, + "learning_rate": 8.657314629258517e-05, + "loss": 1.5688, + "step": 574 + }, + { + "epoch": 0.0, + "learning_rate": 8.637274549098197e-05, + "loss": 2.0568, + "step": 575 + }, + { + "epoch": 0.0, + "learning_rate": 8.617234468937875e-05, + "loss": 3.695, + "step": 576 + }, + { + "epoch": 0.0, + "learning_rate": 8.597194388777555e-05, + "loss": 1.44, + "step": 577 + }, + { + "epoch": 0.0, + "learning_rate": 8.577154308617235e-05, + "loss": 0.394, + "step": 578 + }, + { + "epoch": 0.0, + "learning_rate": 8.557114228456913e-05, + "loss": 3.4525, + "step": 579 + }, + { + "epoch": 0.0, + "learning_rate": 8.537074148296593e-05, + "loss": 2.219, + "step": 580 + }, + { + "epoch": 0.0, + "learning_rate": 8.517034068136273e-05, + "loss": 2.9851, + "step": 581 + }, + { + "epoch": 0.0, + "learning_rate": 8.496993987975952e-05, + "loss": 1.6133, + "step": 582 + }, + { + "epoch": 0.0, + "learning_rate": 8.476953907815631e-05, + "loss": 0.7803, + "step": 583 + }, + { + "epoch": 0.0, + "learning_rate": 8.45691382765531e-05, + "loss": 1.9138, + "step": 584 + }, + { + "epoch": 0.0, + "learning_rate": 8.43687374749499e-05, + "loss": 5.0276, + "step": 585 + }, + { + "epoch": 0.0, + "learning_rate": 8.41683366733467e-05, + "loss": 5.7651, + "step": 586 + }, + { + "epoch": 0.0, + "learning_rate": 8.396793587174348e-05, + "loss": 0.2584, + "step": 587 + }, + { + "epoch": 0.0, + "learning_rate": 8.376753507014028e-05, + "loss": 2.1676, + "step": 588 + }, + { + "epoch": 0.0, + "learning_rate": 8.356713426853708e-05, + "loss": 1.5227, + "step": 589 + }, + { + "epoch": 0.0, + "learning_rate": 8.336673346693386e-05, + "loss": 1.3924, + "step": 590 + }, + { + "epoch": 0.0, + "learning_rate": 8.316633266533068e-05, + "loss": 2.7769, + "step": 591 + }, + { + "epoch": 0.0, + "learning_rate": 8.296593186372746e-05, + "loss": 3.3983, + "step": 592 + }, + { + "epoch": 0.0, + "learning_rate": 8.276553106212426e-05, + "loss": 3.5176, + "step": 593 + }, + { + "epoch": 0.0, + "learning_rate": 8.256513026052104e-05, + "loss": 0.0, + "step": 594 + }, + { + "epoch": 0.0, + "learning_rate": 8.236472945891784e-05, + "loss": 1.3284, + "step": 595 + }, + { + "epoch": 0.0, + "learning_rate": 8.216432865731464e-05, + "loss": 2.8176, + "step": 596 + }, + { + "epoch": 0.0, + "learning_rate": 8.196392785571143e-05, + "loss": 1.4708, + "step": 597 + }, + { + "epoch": 0.0, + "learning_rate": 8.176352705410823e-05, + "loss": 2.7816, + "step": 598 + }, + { + "epoch": 0.0, + "learning_rate": 8.156312625250502e-05, + "loss": 1.8782, + "step": 599 + }, + { + "epoch": 0.0, + "learning_rate": 8.136272545090181e-05, + "loss": 1.5401, + "step": 600 + }, + { + "epoch": 0.0, + "learning_rate": 8.116232464929861e-05, + "loss": 1.4211, + "step": 601 + }, + { + "epoch": 0.0, + "learning_rate": 8.096192384769539e-05, + "loss": 0.7955, + "step": 602 + }, + { + "epoch": 0.0, + "learning_rate": 8.076152304609219e-05, + "loss": 2.4868, + "step": 603 + }, + { + "epoch": 0.0, + "learning_rate": 8.056112224448899e-05, + "loss": 0.8562, + "step": 604 + }, + { + "epoch": 0.0, + "learning_rate": 8.036072144288577e-05, + "loss": 2.3006, + "step": 605 + }, + { + "epoch": 0.0, + "learning_rate": 8.016032064128257e-05, + "loss": 1.9321, + "step": 606 + }, + { + "epoch": 0.0, + "learning_rate": 7.995991983967936e-05, + "loss": 2.6157, + "step": 607 + }, + { + "epoch": 0.0, + "learning_rate": 7.975951903807616e-05, + "loss": 2.0561, + "step": 608 + }, + { + "epoch": 0.0, + "learning_rate": 7.955911823647296e-05, + "loss": 3.1193, + "step": 609 + }, + { + "epoch": 0.0, + "learning_rate": 7.935871743486974e-05, + "loss": 1.6484, + "step": 610 + }, + { + "epoch": 0.0, + "learning_rate": 7.915831663326654e-05, + "loss": 1.5268, + "step": 611 + }, + { + "epoch": 0.0, + "learning_rate": 7.895791583166334e-05, + "loss": 0.0, + "step": 612 + }, + { + "epoch": 0.0, + "learning_rate": 7.875751503006012e-05, + "loss": 0.492, + "step": 613 + }, + { + "epoch": 0.0, + "learning_rate": 7.855711422845692e-05, + "loss": 4.4678, + "step": 614 + }, + { + "epoch": 0.0, + "learning_rate": 7.83567134268537e-05, + "loss": 1.9929, + "step": 615 + }, + { + "epoch": 0.0, + "learning_rate": 7.81563126252505e-05, + "loss": 0.7015, + "step": 616 + }, + { + "epoch": 0.0, + "learning_rate": 7.79559118236473e-05, + "loss": 1.5474, + "step": 617 + }, + { + "epoch": 0.0, + "learning_rate": 7.775551102204409e-05, + "loss": 1.8292, + "step": 618 + }, + { + "epoch": 0.0, + "learning_rate": 7.755511022044089e-05, + "loss": 2.8704, + "step": 619 + }, + { + "epoch": 0.0, + "learning_rate": 7.735470941883769e-05, + "loss": 2.0216, + "step": 620 + }, + { + "epoch": 0.0, + "learning_rate": 7.715430861723447e-05, + "loss": 2.8592, + "step": 621 + }, + { + "epoch": 0.0, + "learning_rate": 7.695390781563127e-05, + "loss": 2.3869, + "step": 622 + }, + { + "epoch": 0.0, + "learning_rate": 7.675350701402805e-05, + "loss": 3.5467, + "step": 623 + }, + { + "epoch": 0.0, + "learning_rate": 7.655310621242485e-05, + "loss": 0.785, + "step": 624 + }, + { + "epoch": 0.0, + "learning_rate": 7.635270541082165e-05, + "loss": 1.8183, + "step": 625 + }, + { + "epoch": 0.0, + "learning_rate": 7.615230460921844e-05, + "loss": 0.5593, + "step": 626 + }, + { + "epoch": 0.0, + "learning_rate": 7.595190380761523e-05, + "loss": 3.5061, + "step": 627 + }, + { + "epoch": 0.0, + "learning_rate": 7.575150300601202e-05, + "loss": 6.0013, + "step": 628 + }, + { + "epoch": 0.0, + "learning_rate": 7.555110220440882e-05, + "loss": 3.9287, + "step": 629 + }, + { + "epoch": 0.0, + "learning_rate": 7.535070140280562e-05, + "loss": 1.1422, + "step": 630 + }, + { + "epoch": 0.0, + "learning_rate": 7.51503006012024e-05, + "loss": 0.0, + "step": 631 + }, + { + "epoch": 0.0, + "learning_rate": 7.49498997995992e-05, + "loss": 2.8392, + "step": 632 + }, + { + "epoch": 0.0, + "learning_rate": 7.4749498997996e-05, + "loss": 0.0, + "step": 633 + }, + { + "epoch": 0.0, + "learning_rate": 7.454909819639278e-05, + "loss": 2.363, + "step": 634 + }, + { + "epoch": 0.0, + "learning_rate": 7.434869739478958e-05, + "loss": 1.4281, + "step": 635 + }, + { + "epoch": 0.0, + "learning_rate": 7.414829659318637e-05, + "loss": 1.4313, + "step": 636 + }, + { + "epoch": 0.0, + "learning_rate": 7.394789579158317e-05, + "loss": 2.8012, + "step": 637 + }, + { + "epoch": 0.0, + "learning_rate": 7.374749498997996e-05, + "loss": 2.175, + "step": 638 + }, + { + "epoch": 0.0, + "learning_rate": 7.354709418837675e-05, + "loss": 3.2143, + "step": 639 + }, + { + "epoch": 0.0, + "learning_rate": 7.334669338677355e-05, + "loss": 0.6305, + "step": 640 + }, + { + "epoch": 0.0, + "learning_rate": 7.314629258517035e-05, + "loss": 2.0879, + "step": 641 + }, + { + "epoch": 0.0, + "learning_rate": 7.294589178356713e-05, + "loss": 3.7997, + "step": 642 + }, + { + "epoch": 0.0, + "learning_rate": 7.274549098196393e-05, + "loss": 1.5878, + "step": 643 + }, + { + "epoch": 0.0, + "learning_rate": 7.254509018036071e-05, + "loss": 3.5472, + "step": 644 + }, + { + "epoch": 0.0, + "learning_rate": 7.234468937875751e-05, + "loss": 2.5241, + "step": 645 + }, + { + "epoch": 0.0, + "learning_rate": 7.214428857715431e-05, + "loss": 2.3167, + "step": 646 + }, + { + "epoch": 0.0, + "learning_rate": 7.194388777555111e-05, + "loss": 0.6522, + "step": 647 + }, + { + "epoch": 0.0, + "learning_rate": 7.174348697394791e-05, + "loss": 1.3162, + "step": 648 + }, + { + "epoch": 0.0, + "learning_rate": 7.15430861723447e-05, + "loss": 3.8872, + "step": 649 + }, + { + "epoch": 0.0, + "learning_rate": 7.134268537074149e-05, + "loss": 1.4756, + "step": 650 + }, + { + "epoch": 0.0, + "learning_rate": 7.114228456913829e-05, + "loss": 2.367, + "step": 651 + }, + { + "epoch": 0.0, + "learning_rate": 7.094188376753508e-05, + "loss": 1.5718, + "step": 652 + }, + { + "epoch": 0.0, + "learning_rate": 7.074148296593187e-05, + "loss": 2.8435, + "step": 653 + }, + { + "epoch": 0.0, + "learning_rate": 7.054108216432866e-05, + "loss": 2.7527, + "step": 654 + }, + { + "epoch": 0.0, + "learning_rate": 7.034068136272546e-05, + "loss": 4.4464, + "step": 655 + }, + { + "epoch": 0.0, + "learning_rate": 7.014028056112226e-05, + "loss": 1.5668, + "step": 656 + }, + { + "epoch": 0.0, + "learning_rate": 6.993987975951904e-05, + "loss": 2.2184, + "step": 657 + }, + { + "epoch": 0.0, + "learning_rate": 6.973947895791584e-05, + "loss": 1.2717, + "step": 658 + }, + { + "epoch": 0.0, + "learning_rate": 6.953907815631263e-05, + "loss": 3.3642, + "step": 659 + }, + { + "epoch": 0.0, + "learning_rate": 6.933867735470942e-05, + "loss": 0.6904, + "step": 660 + }, + { + "epoch": 0.0, + "learning_rate": 6.913827655310622e-05, + "loss": 0.0, + "step": 661 + }, + { + "epoch": 0.0, + "learning_rate": 6.893787575150301e-05, + "loss": 1.6057, + "step": 662 + }, + { + "epoch": 0.0, + "learning_rate": 6.87374749498998e-05, + "loss": 1.9233, + "step": 663 + }, + { + "epoch": 0.0, + "learning_rate": 6.85370741482966e-05, + "loss": 1.5996, + "step": 664 + }, + { + "epoch": 0.0, + "learning_rate": 6.833667334669339e-05, + "loss": 2.1426, + "step": 665 + }, + { + "epoch": 0.0, + "learning_rate": 6.813627254509019e-05, + "loss": 2.533, + "step": 666 + }, + { + "epoch": 0.0, + "learning_rate": 6.793587174348697e-05, + "loss": 2.3267, + "step": 667 + }, + { + "epoch": 0.0, + "learning_rate": 6.773547094188377e-05, + "loss": 4.0984, + "step": 668 + }, + { + "epoch": 0.0, + "learning_rate": 6.753507014028057e-05, + "loss": 0.9157, + "step": 669 + }, + { + "epoch": 0.0, + "learning_rate": 6.733466933867735e-05, + "loss": 2.0127, + "step": 670 + }, + { + "epoch": 0.0, + "learning_rate": 6.713426853707415e-05, + "loss": 1.7905, + "step": 671 + }, + { + "epoch": 0.0, + "learning_rate": 6.693386773547095e-05, + "loss": 0.6195, + "step": 672 + }, + { + "epoch": 0.0, + "learning_rate": 6.673346693386774e-05, + "loss": 0.9984, + "step": 673 + }, + { + "epoch": 0.0, + "learning_rate": 6.653306613226454e-05, + "loss": 2.7152, + "step": 674 + }, + { + "epoch": 0.0, + "learning_rate": 6.633266533066132e-05, + "loss": 2.2094, + "step": 675 + }, + { + "epoch": 0.0, + "learning_rate": 6.613226452905812e-05, + "loss": 2.0554, + "step": 676 + }, + { + "epoch": 0.0, + "learning_rate": 6.593186372745492e-05, + "loss": 3.2084, + "step": 677 + }, + { + "epoch": 0.0, + "learning_rate": 6.57314629258517e-05, + "loss": 1.3133, + "step": 678 + }, + { + "epoch": 0.0, + "learning_rate": 6.55310621242485e-05, + "loss": 3.631, + "step": 679 + }, + { + "epoch": 0.0, + "learning_rate": 6.533066132264529e-05, + "loss": 3.5943, + "step": 680 + }, + { + "epoch": 0.0, + "learning_rate": 6.513026052104208e-05, + "loss": 1.6873, + "step": 681 + }, + { + "epoch": 0.0, + "learning_rate": 6.492985971943888e-05, + "loss": 3.4164, + "step": 682 + }, + { + "epoch": 0.0, + "learning_rate": 6.472945891783567e-05, + "loss": 1.4744, + "step": 683 + }, + { + "epoch": 0.0, + "learning_rate": 6.452905811623247e-05, + "loss": 1.6474, + "step": 684 + }, + { + "epoch": 0.0, + "learning_rate": 6.432865731462927e-05, + "loss": 2.2888, + "step": 685 + }, + { + "epoch": 0.0, + "learning_rate": 6.412825651302605e-05, + "loss": 0.947, + "step": 686 + }, + { + "epoch": 0.0, + "learning_rate": 6.392785571142285e-05, + "loss": 1.4766, + "step": 687 + }, + { + "epoch": 0.0, + "learning_rate": 6.372745490981963e-05, + "loss": 3.1128, + "step": 688 + }, + { + "epoch": 0.0, + "learning_rate": 6.352705410821643e-05, + "loss": 2.7201, + "step": 689 + }, + { + "epoch": 0.0, + "learning_rate": 6.332665330661323e-05, + "loss": 1.0741, + "step": 690 + }, + { + "epoch": 0.0, + "learning_rate": 6.312625250501002e-05, + "loss": 1.5721, + "step": 691 + }, + { + "epoch": 0.0, + "learning_rate": 6.292585170340681e-05, + "loss": 4.1374, + "step": 692 + }, + { + "epoch": 0.0, + "learning_rate": 6.27254509018036e-05, + "loss": 0.0, + "step": 693 + }, + { + "epoch": 0.0, + "learning_rate": 6.25250501002004e-05, + "loss": 5.0251, + "step": 694 + }, + { + "epoch": 0.0, + "learning_rate": 6.23246492985972e-05, + "loss": 4.394, + "step": 695 + }, + { + "epoch": 0.0, + "learning_rate": 6.212424849699398e-05, + "loss": 2.5211, + "step": 696 + }, + { + "epoch": 0.0, + "learning_rate": 6.192384769539078e-05, + "loss": 0.6668, + "step": 697 + }, + { + "epoch": 0.0, + "learning_rate": 6.172344689378758e-05, + "loss": 3.7969, + "step": 698 + }, + { + "epoch": 0.0, + "learning_rate": 6.152304609218436e-05, + "loss": 2.1938, + "step": 699 + }, + { + "epoch": 0.0, + "learning_rate": 6.132264529058116e-05, + "loss": 2.3569, + "step": 700 + }, + { + "epoch": 0.0, + "learning_rate": 6.112224448897795e-05, + "loss": 2.2737, + "step": 701 + }, + { + "epoch": 0.0, + "learning_rate": 6.092184368737476e-05, + "loss": 2.2821, + "step": 702 + }, + { + "epoch": 0.0, + "learning_rate": 6.072144288577155e-05, + "loss": 2.2069, + "step": 703 + }, + { + "epoch": 0.0, + "learning_rate": 6.052104208416834e-05, + "loss": 2.4519, + "step": 704 + }, + { + "epoch": 0.0, + "learning_rate": 6.0320641282565135e-05, + "loss": 1.6573, + "step": 705 + }, + { + "epoch": 0.0, + "learning_rate": 6.012024048096193e-05, + "loss": 2.6574, + "step": 706 + }, + { + "epoch": 0.0, + "learning_rate": 5.9919839679358725e-05, + "loss": 3.376, + "step": 707 + }, + { + "epoch": 0.0, + "learning_rate": 5.971943887775552e-05, + "loss": 1.1742, + "step": 708 + }, + { + "epoch": 0.0, + "learning_rate": 5.951903807615231e-05, + "loss": 0.0, + "step": 709 + }, + { + "epoch": 0.0, + "learning_rate": 5.931863727454911e-05, + "loss": 1.5307, + "step": 710 + }, + { + "epoch": 0.0, + "learning_rate": 5.91182364729459e-05, + "loss": 2.9334, + "step": 711 + }, + { + "epoch": 0.0, + "learning_rate": 5.891783567134269e-05, + "loss": 0.0683, + "step": 712 + }, + { + "epoch": 0.0, + "learning_rate": 5.871743486973948e-05, + "loss": 1.3237, + "step": 713 + }, + { + "epoch": 0.0, + "learning_rate": 5.851703406813628e-05, + "loss": 0.8188, + "step": 714 + }, + { + "epoch": 0.0, + "learning_rate": 5.831663326653307e-05, + "loss": 5.0694, + "step": 715 + }, + { + "epoch": 0.0, + "learning_rate": 5.8116232464929865e-05, + "loss": 0.4038, + "step": 716 + }, + { + "epoch": 0.0, + "learning_rate": 5.7915831663326656e-05, + "loss": 1.5647, + "step": 717 + }, + { + "epoch": 0.0, + "learning_rate": 5.7715430861723455e-05, + "loss": 1.4984, + "step": 718 + }, + { + "epoch": 0.0, + "learning_rate": 5.7515030060120247e-05, + "loss": 4.388, + "step": 719 + }, + { + "epoch": 0.0, + "learning_rate": 5.731462925851704e-05, + "loss": 3.747, + "step": 720 + }, + { + "epoch": 0.0, + "learning_rate": 5.711422845691383e-05, + "loss": 3.0854, + "step": 721 + }, + { + "epoch": 0.0, + "learning_rate": 5.691382765531063e-05, + "loss": 1.4225, + "step": 722 + }, + { + "epoch": 0.0, + "learning_rate": 5.671342685370742e-05, + "loss": 1.3527, + "step": 723 + }, + { + "epoch": 0.0, + "learning_rate": 5.651302605210421e-05, + "loss": 1.6069, + "step": 724 + }, + { + "epoch": 0.0, + "learning_rate": 5.6312625250501004e-05, + "loss": 2.4684, + "step": 725 + }, + { + "epoch": 0.0, + "learning_rate": 5.6112224448897796e-05, + "loss": 3.3059, + "step": 726 + }, + { + "epoch": 0.0, + "learning_rate": 5.5911823647294594e-05, + "loss": 1.2761, + "step": 727 + }, + { + "epoch": 0.0, + "learning_rate": 5.5711422845691386e-05, + "loss": 4.7155, + "step": 728 + }, + { + "epoch": 0.0, + "learning_rate": 5.551102204408818e-05, + "loss": 0.8618, + "step": 729 + }, + { + "epoch": 0.0, + "learning_rate": 5.531062124248497e-05, + "loss": 0.1951, + "step": 730 + }, + { + "epoch": 0.0, + "learning_rate": 5.511022044088177e-05, + "loss": 0.8723, + "step": 731 + }, + { + "epoch": 0.0, + "learning_rate": 5.490981963927856e-05, + "loss": 2.6863, + "step": 732 + }, + { + "epoch": 0.0, + "learning_rate": 5.470941883767535e-05, + "loss": 6.9487, + "step": 733 + }, + { + "epoch": 0.0, + "learning_rate": 5.4509018036072143e-05, + "loss": 1.7146, + "step": 734 + }, + { + "epoch": 0.0, + "learning_rate": 5.430861723446894e-05, + "loss": 4.26, + "step": 735 + }, + { + "epoch": 0.0, + "learning_rate": 5.4108216432865734e-05, + "loss": 2.187, + "step": 736 + }, + { + "epoch": 0.0, + "learning_rate": 5.3907815631262526e-05, + "loss": 2.7236, + "step": 737 + }, + { + "epoch": 0.0, + "learning_rate": 5.370741482965932e-05, + "loss": 4.0348, + "step": 738 + }, + { + "epoch": 0.0, + "learning_rate": 5.3507014028056116e-05, + "loss": 1.4374, + "step": 739 + }, + { + "epoch": 0.0, + "learning_rate": 5.330661322645291e-05, + "loss": 1.0688, + "step": 740 + }, + { + "epoch": 0.0, + "learning_rate": 5.31062124248497e-05, + "loss": 2.0594, + "step": 741 + }, + { + "epoch": 0.0, + "learning_rate": 5.290581162324649e-05, + "loss": 0.8456, + "step": 742 + }, + { + "epoch": 0.0, + "learning_rate": 5.270541082164328e-05, + "loss": 0.913, + "step": 743 + }, + { + "epoch": 0.0, + "learning_rate": 5.250501002004008e-05, + "loss": 3.1388, + "step": 744 + }, + { + "epoch": 0.0, + "learning_rate": 5.230460921843687e-05, + "loss": 2.5727, + "step": 745 + }, + { + "epoch": 0.0, + "learning_rate": 5.2104208416833665e-05, + "loss": 1.4594, + "step": 746 + }, + { + "epoch": 0.0, + "learning_rate": 5.190380761523046e-05, + "loss": 3.9008, + "step": 747 + }, + { + "epoch": 0.0, + "learning_rate": 5.1703406813627255e-05, + "loss": 2.31, + "step": 748 + }, + { + "epoch": 0.0, + "learning_rate": 5.150300601202405e-05, + "loss": 1.7205, + "step": 749 + }, + { + "epoch": 0.0, + "learning_rate": 5.130260521042084e-05, + "loss": 2.2148, + "step": 750 + }, + { + "epoch": 0.0, + "learning_rate": 5.110220440881763e-05, + "loss": 3.1292, + "step": 751 + }, + { + "epoch": 0.0, + "learning_rate": 5.090180360721443e-05, + "loss": 2.3089, + "step": 752 + }, + { + "epoch": 0.0, + "learning_rate": 5.070140280561122e-05, + "loss": 1.6105, + "step": 753 + }, + { + "epoch": 0.0, + "learning_rate": 5.050100200400801e-05, + "loss": 3.2778, + "step": 754 + }, + { + "epoch": 0.0, + "learning_rate": 5.0300601202404805e-05, + "loss": 0.0, + "step": 755 + }, + { + "epoch": 0.0, + "learning_rate": 5.01002004008016e-05, + "loss": 1.9581, + "step": 756 + }, + { + "epoch": 0.0, + "learning_rate": 4.98997995991984e-05, + "loss": 1.2599, + "step": 757 + }, + { + "epoch": 0.0, + "learning_rate": 4.9699398797595193e-05, + "loss": 1.5371, + "step": 758 + }, + { + "epoch": 0.0, + "learning_rate": 4.9498997995991985e-05, + "loss": 2.8294, + "step": 759 + }, + { + "epoch": 0.0, + "learning_rate": 4.929859719438878e-05, + "loss": 2.2045, + "step": 760 + }, + { + "epoch": 0.0, + "learning_rate": 4.9098196392785576e-05, + "loss": 2.1054, + "step": 761 + }, + { + "epoch": 0.0, + "learning_rate": 4.889779559118237e-05, + "loss": 1.8911, + "step": 762 + }, + { + "epoch": 0.0, + "learning_rate": 4.869739478957916e-05, + "loss": 0.0, + "step": 763 + }, + { + "epoch": 0.0, + "learning_rate": 4.849699398797595e-05, + "loss": 1.362, + "step": 764 + }, + { + "epoch": 0.0, + "learning_rate": 4.829659318637275e-05, + "loss": 2.2082, + "step": 765 + }, + { + "epoch": 0.0, + "learning_rate": 4.809619238476954e-05, + "loss": 1.4097, + "step": 766 + }, + { + "epoch": 0.0, + "learning_rate": 4.789579158316633e-05, + "loss": 2.1663, + "step": 767 + }, + { + "epoch": 0.0, + "learning_rate": 4.7695390781563125e-05, + "loss": 1.7452, + "step": 768 + }, + { + "epoch": 0.0, + "learning_rate": 4.7494989979959916e-05, + "loss": 1.8244, + "step": 769 + }, + { + "epoch": 0.0, + "learning_rate": 4.7294589178356715e-05, + "loss": 4.3847, + "step": 770 + }, + { + "epoch": 0.0, + "learning_rate": 4.7094188376753514e-05, + "loss": 1.3235, + "step": 771 + }, + { + "epoch": 0.0, + "learning_rate": 4.6893787575150305e-05, + "loss": 1.6868, + "step": 772 + }, + { + "epoch": 0.0, + "learning_rate": 4.66933867735471e-05, + "loss": 0.4138, + "step": 773 + }, + { + "epoch": 0.0, + "learning_rate": 4.649298597194389e-05, + "loss": 0.1626, + "step": 774 + }, + { + "epoch": 0.0, + "learning_rate": 4.629258517034069e-05, + "loss": 1.6324, + "step": 775 + }, + { + "epoch": 0.0, + "learning_rate": 4.609218436873748e-05, + "loss": 1.064, + "step": 776 + }, + { + "epoch": 0.0, + "learning_rate": 4.589178356713427e-05, + "loss": 1.1306, + "step": 777 + }, + { + "epoch": 0.0, + "learning_rate": 4.569138276553106e-05, + "loss": 9.5001, + "step": 778 + }, + { + "epoch": 0.0, + "learning_rate": 4.549098196392786e-05, + "loss": 2.5549, + "step": 779 + }, + { + "epoch": 0.0, + "learning_rate": 4.529058116232465e-05, + "loss": 1.4219, + "step": 780 + }, + { + "epoch": 0.0, + "learning_rate": 4.5090180360721445e-05, + "loss": 0.9293, + "step": 781 + }, + { + "epoch": 0.0, + "learning_rate": 4.488977955911824e-05, + "loss": 2.9165, + "step": 782 + }, + { + "epoch": 0.0, + "learning_rate": 4.4689378757515035e-05, + "loss": 5.4542, + "step": 783 + }, + { + "epoch": 0.0, + "learning_rate": 4.448897795591183e-05, + "loss": 1.6391, + "step": 784 + }, + { + "epoch": 0.0, + "learning_rate": 4.428857715430862e-05, + "loss": 1.6708, + "step": 785 + }, + { + "epoch": 0.0, + "learning_rate": 4.408817635270541e-05, + "loss": 1.7247, + "step": 786 + }, + { + "epoch": 0.0, + "learning_rate": 4.388777555110221e-05, + "loss": 4.8019, + "step": 787 + }, + { + "epoch": 0.0, + "learning_rate": 4.3687374749499e-05, + "loss": 2.9598, + "step": 788 + }, + { + "epoch": 0.0, + "learning_rate": 4.348697394789579e-05, + "loss": 2.1231, + "step": 789 + }, + { + "epoch": 0.0, + "learning_rate": 4.3286573146292584e-05, + "loss": 2.9594, + "step": 790 + }, + { + "epoch": 0.0, + "learning_rate": 4.3086172344689376e-05, + "loss": 2.5611, + "step": 791 + }, + { + "epoch": 0.0, + "learning_rate": 4.2885771543086175e-05, + "loss": 4.429, + "step": 792 + }, + { + "epoch": 0.0, + "learning_rate": 4.2685370741482966e-05, + "loss": 4.2864, + "step": 793 + }, + { + "epoch": 0.0, + "learning_rate": 4.248496993987976e-05, + "loss": 6.1905, + "step": 794 + }, + { + "epoch": 0.0, + "learning_rate": 4.228456913827655e-05, + "loss": 5.4012, + "step": 795 + }, + { + "epoch": 0.0, + "learning_rate": 4.208416833667335e-05, + "loss": 2.7522, + "step": 796 + }, + { + "epoch": 0.0, + "learning_rate": 4.188376753507014e-05, + "loss": 1.9901, + "step": 797 + }, + { + "epoch": 0.0, + "learning_rate": 4.168336673346693e-05, + "loss": 1.7421, + "step": 798 + }, + { + "epoch": 0.0, + "learning_rate": 4.148296593186373e-05, + "loss": 0.7346, + "step": 799 + }, + { + "epoch": 0.0, + "learning_rate": 4.128256513026052e-05, + "loss": 0.4529, + "step": 800 + }, + { + "epoch": 0.0, + "learning_rate": 4.108216432865732e-05, + "loss": 1.7953, + "step": 801 + }, + { + "epoch": 0.0, + "learning_rate": 4.088176352705411e-05, + "loss": 3.1981, + "step": 802 + }, + { + "epoch": 0.0, + "learning_rate": 4.0681362725450904e-05, + "loss": 3.32, + "step": 803 + }, + { + "epoch": 0.0, + "learning_rate": 4.0480961923847696e-05, + "loss": 4.134, + "step": 804 + }, + { + "epoch": 0.0, + "learning_rate": 4.0280561122244495e-05, + "loss": 2.0281, + "step": 805 + }, + { + "epoch": 0.0, + "learning_rate": 4.0080160320641287e-05, + "loss": 1.8402, + "step": 806 + }, + { + "epoch": 0.0, + "learning_rate": 3.987975951903808e-05, + "loss": 3.2845, + "step": 807 + }, + { + "epoch": 0.0, + "learning_rate": 3.967935871743487e-05, + "loss": 1.6719, + "step": 808 + }, + { + "epoch": 0.0, + "learning_rate": 3.947895791583167e-05, + "loss": 0.0, + "step": 809 + }, + { + "epoch": 0.0, + "learning_rate": 3.927855711422846e-05, + "loss": 1.9383, + "step": 810 + }, + { + "epoch": 0.0, + "learning_rate": 3.907815631262525e-05, + "loss": 2.1896, + "step": 811 + }, + { + "epoch": 0.0, + "learning_rate": 3.8877755511022044e-05, + "loss": 1.1527, + "step": 812 + }, + { + "epoch": 0.0, + "learning_rate": 3.867735470941884e-05, + "loss": 5.3477, + "step": 813 + }, + { + "epoch": 0.0, + "learning_rate": 3.8476953907815634e-05, + "loss": 1.5018, + "step": 814 + }, + { + "epoch": 0.0, + "learning_rate": 3.8276553106212426e-05, + "loss": 1.241, + "step": 815 + }, + { + "epoch": 0.0, + "learning_rate": 3.807615230460922e-05, + "loss": 2.193, + "step": 816 + }, + { + "epoch": 0.0, + "learning_rate": 3.787575150300601e-05, + "loss": 2.021, + "step": 817 + }, + { + "epoch": 0.0, + "learning_rate": 3.767535070140281e-05, + "loss": 0.6531, + "step": 818 + }, + { + "epoch": 0.0, + "learning_rate": 3.74749498997996e-05, + "loss": 0.6331, + "step": 819 + }, + { + "epoch": 0.0, + "learning_rate": 3.727454909819639e-05, + "loss": 0.0, + "step": 820 + }, + { + "epoch": 0.0, + "learning_rate": 3.7074148296593183e-05, + "loss": 0.8708, + "step": 821 + }, + { + "epoch": 0.0, + "learning_rate": 3.687374749498998e-05, + "loss": 2.0937, + "step": 822 + }, + { + "epoch": 0.0, + "learning_rate": 3.6673346693386774e-05, + "loss": 2.5359, + "step": 823 + }, + { + "epoch": 0.0, + "learning_rate": 3.6472945891783566e-05, + "loss": 1.7331, + "step": 824 + }, + { + "epoch": 0.0, + "learning_rate": 3.627254509018036e-05, + "loss": 1.1601, + "step": 825 + }, + { + "epoch": 0.0, + "learning_rate": 3.6072144288577156e-05, + "loss": 1.8112, + "step": 826 + }, + { + "epoch": 0.0, + "learning_rate": 3.5871743486973954e-05, + "loss": 1.9119, + "step": 827 + }, + { + "epoch": 0.0, + "learning_rate": 3.5671342685370746e-05, + "loss": 2.1902, + "step": 828 + }, + { + "epoch": 0.0, + "learning_rate": 3.547094188376754e-05, + "loss": 0.5517, + "step": 829 + }, + { + "epoch": 0.0, + "learning_rate": 3.527054108216433e-05, + "loss": 0.4456, + "step": 830 + }, + { + "epoch": 0.0, + "learning_rate": 3.507014028056113e-05, + "loss": 5.7706, + "step": 831 + }, + { + "epoch": 0.0, + "learning_rate": 3.486973947895792e-05, + "loss": 1.3838, + "step": 832 + }, + { + "epoch": 0.0, + "learning_rate": 3.466933867735471e-05, + "loss": 1.9657, + "step": 833 + }, + { + "epoch": 0.0, + "learning_rate": 3.4468937875751504e-05, + "loss": 1.5892, + "step": 834 + }, + { + "epoch": 0.0, + "learning_rate": 3.42685370741483e-05, + "loss": 1.0192, + "step": 835 + }, + { + "epoch": 0.0, + "learning_rate": 3.4068136272545094e-05, + "loss": 1.8128, + "step": 836 + }, + { + "epoch": 0.0, + "learning_rate": 3.3867735470941886e-05, + "loss": 1.2665, + "step": 837 + }, + { + "epoch": 0.0, + "learning_rate": 3.366733466933868e-05, + "loss": 1.9121, + "step": 838 + }, + { + "epoch": 0.0, + "learning_rate": 3.3466933867735476e-05, + "loss": 1.7453, + "step": 839 + }, + { + "epoch": 0.0, + "learning_rate": 3.326653306613227e-05, + "loss": 3.9064, + "step": 840 + }, + { + "epoch": 0.0, + "learning_rate": 3.306613226452906e-05, + "loss": 0.9258, + "step": 841 + }, + { + "epoch": 0.0, + "learning_rate": 3.286573146292585e-05, + "loss": 2.3671, + "step": 842 + }, + { + "epoch": 0.0, + "learning_rate": 3.266533066132264e-05, + "loss": 1.9065, + "step": 843 + }, + { + "epoch": 0.0, + "learning_rate": 3.246492985971944e-05, + "loss": 1.869, + "step": 844 + }, + { + "epoch": 0.0, + "learning_rate": 3.2264529058116233e-05, + "loss": 0.7601, + "step": 845 + }, + { + "epoch": 0.0, + "learning_rate": 3.2064128256513025e-05, + "loss": 0.4456, + "step": 846 + }, + { + "epoch": 0.0, + "learning_rate": 3.186372745490982e-05, + "loss": 2.6767, + "step": 847 + }, + { + "epoch": 0.0, + "learning_rate": 3.1663326653306616e-05, + "loss": 0.7504, + "step": 848 + }, + { + "epoch": 0.0, + "learning_rate": 3.146292585170341e-05, + "loss": 5.5667, + "step": 849 + }, + { + "epoch": 0.0, + "learning_rate": 3.12625250501002e-05, + "loss": 3.0322, + "step": 850 + }, + { + "epoch": 0.0, + "learning_rate": 3.106212424849699e-05, + "loss": 1.73, + "step": 851 + }, + { + "epoch": 0.0, + "learning_rate": 3.086172344689379e-05, + "loss": 1.1359, + "step": 852 + }, + { + "epoch": 0.0, + "learning_rate": 3.066132264529058e-05, + "loss": 2.1959, + "step": 853 + }, + { + "epoch": 0.0, + "learning_rate": 3.046092184368738e-05, + "loss": 2.5073, + "step": 854 + }, + { + "epoch": 0.0, + "learning_rate": 3.026052104208417e-05, + "loss": 2.8468, + "step": 855 + }, + { + "epoch": 0.0, + "learning_rate": 3.0060120240480967e-05, + "loss": 1.3095, + "step": 856 + }, + { + "epoch": 0.0, + "learning_rate": 2.985971943887776e-05, + "loss": 3.6701, + "step": 857 + }, + { + "epoch": 0.0, + "learning_rate": 2.9659318637274554e-05, + "loss": 1.4558, + "step": 858 + }, + { + "epoch": 0.0, + "learning_rate": 2.9458917835671345e-05, + "loss": 2.5426, + "step": 859 + }, + { + "epoch": 0.0, + "learning_rate": 2.925851703406814e-05, + "loss": 2.4209, + "step": 860 + }, + { + "epoch": 0.0, + "learning_rate": 2.9058116232464932e-05, + "loss": 0.5248, + "step": 861 + }, + { + "epoch": 0.0, + "learning_rate": 2.8857715430861727e-05, + "loss": 3.4234, + "step": 862 + }, + { + "epoch": 0.0, + "learning_rate": 2.865731462925852e-05, + "loss": 2.9914, + "step": 863 + }, + { + "epoch": 0.0, + "learning_rate": 2.8456913827655314e-05, + "loss": 2.631, + "step": 864 + }, + { + "epoch": 0.0, + "learning_rate": 2.8256513026052106e-05, + "loss": 9.4896, + "step": 865 + }, + { + "epoch": 0.0, + "learning_rate": 2.8056112224448898e-05, + "loss": 1.3135, + "step": 866 + }, + { + "epoch": 0.0, + "learning_rate": 2.7855711422845693e-05, + "loss": 1.7307, + "step": 867 + }, + { + "epoch": 0.0, + "learning_rate": 2.7655310621242485e-05, + "loss": 6.0387, + "step": 868 + }, + { + "epoch": 0.0, + "learning_rate": 2.745490981963928e-05, + "loss": 3.1468, + "step": 869 + }, + { + "epoch": 0.0, + "learning_rate": 2.7254509018036072e-05, + "loss": 2.8258, + "step": 870 + }, + { + "epoch": 0.0, + "learning_rate": 2.7054108216432867e-05, + "loss": 2.0105, + "step": 871 + }, + { + "epoch": 0.0, + "learning_rate": 2.685370741482966e-05, + "loss": 1.774, + "step": 872 + }, + { + "epoch": 0.0, + "learning_rate": 2.6653306613226454e-05, + "loss": 2.0636, + "step": 873 + }, + { + "epoch": 0.0, + "learning_rate": 2.6452905811623246e-05, + "loss": 1.6851, + "step": 874 + }, + { + "epoch": 0.0, + "learning_rate": 2.625250501002004e-05, + "loss": 1.1166, + "step": 875 + }, + { + "epoch": 0.0, + "learning_rate": 2.6052104208416833e-05, + "loss": 1.4389, + "step": 876 + }, + { + "epoch": 0.0, + "learning_rate": 2.5851703406813628e-05, + "loss": 0.9514, + "step": 877 + }, + { + "epoch": 0.0, + "learning_rate": 2.565130260521042e-05, + "loss": 2.6993, + "step": 878 + }, + { + "epoch": 0.0, + "learning_rate": 2.5450901803607215e-05, + "loss": 2.7322, + "step": 879 + }, + { + "epoch": 0.0, + "learning_rate": 2.5250501002004006e-05, + "loss": 6.489, + "step": 880 + }, + { + "epoch": 0.0, + "learning_rate": 2.50501002004008e-05, + "loss": 0.8815, + "step": 881 + }, + { + "epoch": 0.0, + "learning_rate": 2.4849699398797597e-05, + "loss": 0.9959, + "step": 882 + }, + { + "epoch": 0.0, + "learning_rate": 2.464929859719439e-05, + "loss": 0.0, + "step": 883 + }, + { + "epoch": 0.0, + "learning_rate": 2.4448897795591184e-05, + "loss": 2.0596, + "step": 884 + }, + { + "epoch": 0.0, + "learning_rate": 2.4248496993987975e-05, + "loss": 2.9538, + "step": 885 + }, + { + "epoch": 0.0, + "learning_rate": 2.404809619238477e-05, + "loss": 0.7011, + "step": 886 + }, + { + "epoch": 0.0, + "learning_rate": 2.3847695390781562e-05, + "loss": 2.6931, + "step": 887 + }, + { + "epoch": 0.0, + "learning_rate": 2.3647294589178358e-05, + "loss": 3.6907, + "step": 888 + }, + { + "epoch": 0.0, + "learning_rate": 2.3446893787575153e-05, + "loss": 1.0606, + "step": 889 + }, + { + "epoch": 0.0, + "learning_rate": 2.3246492985971944e-05, + "loss": 1.1309, + "step": 890 + }, + { + "epoch": 0.0, + "learning_rate": 2.304609218436874e-05, + "loss": 2.0832, + "step": 891 + }, + { + "epoch": 0.0, + "learning_rate": 2.284569138276553e-05, + "loss": 1.9598, + "step": 892 + }, + { + "epoch": 0.0, + "learning_rate": 2.2645290581162327e-05, + "loss": 5.0853, + "step": 893 + }, + { + "epoch": 0.0, + "learning_rate": 2.244488977955912e-05, + "loss": 1.3508, + "step": 894 + }, + { + "epoch": 0.0, + "learning_rate": 2.2244488977955913e-05, + "loss": 3.2122, + "step": 895 + }, + { + "epoch": 0.0, + "learning_rate": 2.2044088176352705e-05, + "loss": 1.5134, + "step": 896 + }, + { + "epoch": 0.0, + "learning_rate": 2.18436873747495e-05, + "loss": 1.3606, + "step": 897 + }, + { + "epoch": 0.0, + "learning_rate": 2.1643286573146292e-05, + "loss": 0.9683, + "step": 898 + }, + { + "epoch": 0.0, + "learning_rate": 2.1442885771543087e-05, + "loss": 1.33, + "step": 899 + }, + { + "epoch": 0.0, + "learning_rate": 2.124248496993988e-05, + "loss": 3.1403, + "step": 900 + }, + { + "epoch": 0.0, + "learning_rate": 2.1042084168336674e-05, + "loss": 3.3669, + "step": 901 + }, + { + "epoch": 0.0, + "learning_rate": 2.0841683366733466e-05, + "loss": 0.0, + "step": 902 + }, + { + "epoch": 0.0, + "learning_rate": 2.064128256513026e-05, + "loss": 3.7623, + "step": 903 + }, + { + "epoch": 0.0, + "learning_rate": 2.0440881763527056e-05, + "loss": 0.1433, + "step": 904 + }, + { + "epoch": 0.0, + "learning_rate": 2.0240480961923848e-05, + "loss": 0.8717, + "step": 905 + }, + { + "epoch": 0.0, + "learning_rate": 2.0040080160320643e-05, + "loss": 2.4617, + "step": 906 + }, + { + "epoch": 0.0, + "learning_rate": 1.9839679358717435e-05, + "loss": 1.3359, + "step": 907 + }, + { + "epoch": 0.0, + "learning_rate": 1.963927855711423e-05, + "loss": 0.9416, + "step": 908 + }, + { + "epoch": 0.0, + "learning_rate": 1.9438877755511022e-05, + "loss": 0.6057, + "step": 909 + }, + { + "epoch": 0.0, + "learning_rate": 1.9238476953907817e-05, + "loss": 2.5017, + "step": 910 + }, + { + "epoch": 0.01, + "learning_rate": 1.903807615230461e-05, + "loss": 1.331, + "step": 911 + }, + { + "epoch": 0.01, + "learning_rate": 1.8837675350701404e-05, + "loss": 2.2103, + "step": 912 + }, + { + "epoch": 0.01, + "learning_rate": 1.8637274549098196e-05, + "loss": 1.4674, + "step": 913 + }, + { + "epoch": 0.01, + "learning_rate": 1.843687374749499e-05, + "loss": 3.1923, + "step": 914 + }, + { + "epoch": 0.01, + "learning_rate": 1.8236472945891783e-05, + "loss": 1.8469, + "step": 915 + }, + { + "epoch": 0.01, + "learning_rate": 1.8036072144288578e-05, + "loss": 2.0753, + "step": 916 + }, + { + "epoch": 0.01, + "learning_rate": 1.7835671342685373e-05, + "loss": 2.9633, + "step": 917 + }, + { + "epoch": 0.01, + "learning_rate": 1.7635270541082165e-05, + "loss": 0.2268, + "step": 918 + }, + { + "epoch": 0.01, + "learning_rate": 1.743486973947896e-05, + "loss": 2.7156, + "step": 919 + }, + { + "epoch": 0.01, + "learning_rate": 1.7234468937875752e-05, + "loss": 2.0701, + "step": 920 + }, + { + "epoch": 0.01, + "learning_rate": 1.7034068136272547e-05, + "loss": 1.3708, + "step": 921 + }, + { + "epoch": 0.01, + "learning_rate": 1.683366733466934e-05, + "loss": 1.6181, + "step": 922 + }, + { + "epoch": 0.01, + "learning_rate": 1.6633266533066134e-05, + "loss": 1.5673, + "step": 923 + }, + { + "epoch": 0.01, + "learning_rate": 1.6432865731462926e-05, + "loss": 0.7278, + "step": 924 + }, + { + "epoch": 0.01, + "learning_rate": 1.623246492985972e-05, + "loss": 1.1777, + "step": 925 + }, + { + "epoch": 0.01, + "learning_rate": 1.6032064128256513e-05, + "loss": 1.3947, + "step": 926 + }, + { + "epoch": 0.01, + "learning_rate": 1.5831663326653308e-05, + "loss": 1.3677, + "step": 927 + }, + { + "epoch": 0.01, + "learning_rate": 1.56312625250501e-05, + "loss": 0.8851, + "step": 928 + }, + { + "epoch": 0.01, + "learning_rate": 1.5430861723446895e-05, + "loss": 3.3576, + "step": 929 + }, + { + "epoch": 0.01, + "learning_rate": 1.523046092184369e-05, + "loss": 1.4984, + "step": 930 + }, + { + "epoch": 0.01, + "learning_rate": 1.5030060120240483e-05, + "loss": 0.8138, + "step": 931 + }, + { + "epoch": 0.01, + "learning_rate": 1.4829659318637277e-05, + "loss": 1.1379, + "step": 932 + }, + { + "epoch": 0.01, + "learning_rate": 1.462925851703407e-05, + "loss": 1.8896, + "step": 933 + }, + { + "epoch": 0.01, + "learning_rate": 1.4428857715430864e-05, + "loss": 2.0607, + "step": 934 + }, + { + "epoch": 0.01, + "learning_rate": 1.4228456913827657e-05, + "loss": 4.4332, + "step": 935 + }, + { + "epoch": 0.01, + "learning_rate": 1.4028056112224449e-05, + "loss": 0.0, + "step": 936 + }, + { + "epoch": 0.01, + "learning_rate": 1.3827655310621242e-05, + "loss": 2.1725, + "step": 937 + }, + { + "epoch": 0.01, + "learning_rate": 1.3627254509018036e-05, + "loss": 2.1489, + "step": 938 + }, + { + "epoch": 0.01, + "learning_rate": 1.342685370741483e-05, + "loss": 1.7468, + "step": 939 + }, + { + "epoch": 0.01, + "learning_rate": 1.3226452905811623e-05, + "loss": 1.3146, + "step": 940 + }, + { + "epoch": 0.01, + "learning_rate": 1.3026052104208416e-05, + "loss": 2.112, + "step": 941 + }, + { + "epoch": 0.01, + "learning_rate": 1.282565130260521e-05, + "loss": 0.0, + "step": 942 + }, + { + "epoch": 0.01, + "learning_rate": 1.2625250501002003e-05, + "loss": 1.8155, + "step": 943 + }, + { + "epoch": 0.01, + "learning_rate": 1.2424849699398798e-05, + "loss": 1.6048, + "step": 944 + }, + { + "epoch": 0.01, + "learning_rate": 1.2224448897795592e-05, + "loss": 0.2253, + "step": 945 + }, + { + "epoch": 0.01, + "learning_rate": 1.2024048096192385e-05, + "loss": 4.1294, + "step": 946 + }, + { + "epoch": 0.01, + "learning_rate": 1.1823647294589179e-05, + "loss": 2.5192, + "step": 947 + }, + { + "epoch": 0.01, + "learning_rate": 1.1623246492985972e-05, + "loss": 0.52, + "step": 948 + }, + { + "epoch": 0.01, + "learning_rate": 1.1422845691382766e-05, + "loss": 0.0, + "step": 949 + }, + { + "epoch": 0.01, + "learning_rate": 1.122244488977956e-05, + "loss": 8.9787, + "step": 950 + }, + { + "epoch": 0.01, + "learning_rate": 1.1022044088176353e-05, + "loss": 0.9179, + "step": 951 + }, + { + "epoch": 0.01, + "learning_rate": 1.0821643286573146e-05, + "loss": 1.3446, + "step": 952 + }, + { + "epoch": 0.01, + "learning_rate": 1.062124248496994e-05, + "loss": 2.1705, + "step": 953 + }, + { + "epoch": 0.01, + "learning_rate": 1.0420841683366733e-05, + "loss": 2.7145, + "step": 954 + }, + { + "epoch": 0.01, + "learning_rate": 1.0220440881763528e-05, + "loss": 2.4458, + "step": 955 + }, + { + "epoch": 0.01, + "learning_rate": 1.0020040080160322e-05, + "loss": 1.471, + "step": 956 + }, + { + "epoch": 0.01, + "learning_rate": 9.819639278557115e-06, + "loss": 0.0, + "step": 957 + }, + { + "epoch": 0.01, + "learning_rate": 9.619238476953909e-06, + "loss": 2.9764, + "step": 958 + }, + { + "epoch": 0.01, + "learning_rate": 9.418837675350702e-06, + "loss": 1.7468, + "step": 959 + }, + { + "epoch": 0.01, + "learning_rate": 9.218436873747496e-06, + "loss": 2.3666, + "step": 960 + }, + { + "epoch": 0.01, + "learning_rate": 9.018036072144289e-06, + "loss": 2.5105, + "step": 961 + }, + { + "epoch": 0.01, + "learning_rate": 8.817635270541082e-06, + "loss": 2.2997, + "step": 962 + }, + { + "epoch": 0.01, + "learning_rate": 8.617234468937876e-06, + "loss": 1.9821, + "step": 963 + }, + { + "epoch": 0.01, + "learning_rate": 8.41683366733467e-06, + "loss": 0.8583, + "step": 964 + }, + { + "epoch": 0.01, + "learning_rate": 8.216432865731463e-06, + "loss": 2.9195, + "step": 965 + }, + { + "epoch": 0.01, + "learning_rate": 8.016032064128256e-06, + "loss": 3.1603, + "step": 966 + }, + { + "epoch": 0.01, + "learning_rate": 7.81563126252505e-06, + "loss": 0.8383, + "step": 967 + }, + { + "epoch": 0.01, + "learning_rate": 7.615230460921845e-06, + "loss": 2.7378, + "step": 968 + }, + { + "epoch": 0.01, + "learning_rate": 7.414829659318638e-06, + "loss": 2.9844, + "step": 969 + }, + { + "epoch": 0.01, + "learning_rate": 7.214428857715432e-06, + "loss": 1.8668, + "step": 970 + }, + { + "epoch": 0.01, + "learning_rate": 7.0140280561122245e-06, + "loss": 1.6183, + "step": 971 + }, + { + "epoch": 0.01, + "learning_rate": 6.813627254509018e-06, + "loss": 4.8758, + "step": 972 + }, + { + "epoch": 0.01, + "learning_rate": 6.613226452905811e-06, + "loss": 2.247, + "step": 973 + }, + { + "epoch": 0.01, + "learning_rate": 6.412825651302605e-06, + "loss": 1.6818, + "step": 974 + }, + { + "epoch": 0.01, + "learning_rate": 6.212424849699399e-06, + "loss": 2.971, + "step": 975 + }, + { + "epoch": 0.01, + "learning_rate": 6.012024048096193e-06, + "loss": 0.0, + "step": 976 + }, + { + "epoch": 0.01, + "learning_rate": 5.811623246492986e-06, + "loss": 0.0, + "step": 977 + }, + { + "epoch": 0.01, + "learning_rate": 5.61122244488978e-06, + "loss": 1.3841, + "step": 978 + }, + { + "epoch": 0.01, + "learning_rate": 5.410821643286573e-06, + "loss": 1.3487, + "step": 979 + }, + { + "epoch": 0.01, + "learning_rate": 5.2104208416833665e-06, + "loss": 2.0792, + "step": 980 + }, + { + "epoch": 0.01, + "learning_rate": 5.010020040080161e-06, + "loss": 2.5128, + "step": 981 + }, + { + "epoch": 0.01, + "learning_rate": 4.809619238476954e-06, + "loss": 4.2402, + "step": 982 + }, + { + "epoch": 0.01, + "learning_rate": 4.609218436873748e-06, + "loss": 0.4601, + "step": 983 + }, + { + "epoch": 0.01, + "learning_rate": 4.408817635270541e-06, + "loss": 2.5609, + "step": 984 + }, + { + "epoch": 0.01, + "learning_rate": 4.208416833667335e-06, + "loss": 2.3339, + "step": 985 + }, + { + "epoch": 0.01, + "learning_rate": 4.008016032064128e-06, + "loss": 0.9409, + "step": 986 + }, + { + "epoch": 0.01, + "learning_rate": 3.8076152304609225e-06, + "loss": 5.0006, + "step": 987 + }, + { + "epoch": 0.01, + "learning_rate": 3.607214428857716e-06, + "loss": 3.4097, + "step": 988 + }, + { + "epoch": 0.01, + "learning_rate": 3.406813627254509e-06, + "loss": 2.0473, + "step": 989 + }, + { + "epoch": 0.01, + "learning_rate": 3.2064128256513024e-06, + "loss": 1.3606, + "step": 990 + }, + { + "epoch": 0.01, + "learning_rate": 3.0060120240480963e-06, + "loss": 1.5403, + "step": 991 + }, + { + "epoch": 0.01, + "learning_rate": 2.80561122244489e-06, + "loss": 1.9095, + "step": 992 + }, + { + "epoch": 0.01, + "learning_rate": 2.6052104208416833e-06, + "loss": 0.0, + "step": 993 + }, + { + "epoch": 0.01, + "learning_rate": 2.404809619238477e-06, + "loss": 2.7986, + "step": 994 + }, + { + "epoch": 0.01, + "learning_rate": 2.2044088176352706e-06, + "loss": 1.1042, + "step": 995 + }, + { + "epoch": 0.01, + "learning_rate": 2.004008016032064e-06, + "loss": 0.983, + "step": 996 + }, + { + "epoch": 0.01, + "learning_rate": 1.803607214428858e-06, + "loss": 2.6895, + "step": 997 + }, + { + "epoch": 0.01, + "learning_rate": 1.6032064128256512e-06, + "loss": 1.5531, + "step": 998 + }, + { + "epoch": 0.01, + "learning_rate": 1.402805611222445e-06, + "loss": 1.7102, + "step": 999 + }, + { + "epoch": 0.01, + "learning_rate": 1.2024048096192386e-06, + "loss": 6.9092, + "step": 1000 + } + ], + "logging_steps": 1, + "max_steps": 1000, + "num_train_epochs": 1, + "save_steps": 50, + "total_flos": 116009623166976.0, + "trial_name": null, + "trial_params": null +}