|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.992481203007518, |
|
"global_step": 330, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00011764705882352942, |
|
"loss": 5.1019, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00023529411764705883, |
|
"loss": 4.8202, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00035294117647058826, |
|
"loss": 4.4144, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00047058823529411766, |
|
"loss": 4.0763, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0005882352941176471, |
|
"loss": 3.7584, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0007058823529411765, |
|
"loss": 3.583, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0008235294117647058, |
|
"loss": 3.4246, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0009411764705882353, |
|
"loss": 3.301, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0009999748146823375, |
|
"loss": 3.225, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0009997733473639876, |
|
"loss": 3.1705, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0009993704939095377, |
|
"loss": 3.0495, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0009987664166507748, |
|
"loss": 2.9806, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0009979613590036108, |
|
"loss": 2.9235, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0009969556453699965, |
|
"loss": 2.8419, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0009957496810072027, |
|
"loss": 2.7978, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0009943439518645192, |
|
"loss": 2.7061, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 2.585865020751953, |
|
"eval_runtime": 6.5169, |
|
"eval_samples_per_second": 68.13, |
|
"eval_steps_per_second": 17.033, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0009927390243874398, |
|
"loss": 2.9771, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0009909355452894098, |
|
"loss": 2.5679, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0009889342412912295, |
|
"loss": 2.5551, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0009867359188282193, |
|
"loss": 2.4842, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0009843414637252614, |
|
"loss": 2.4689, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0009817518408398536, |
|
"loss": 2.4216, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.000978968093673314, |
|
"loss": 2.4006, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0009759913439502981, |
|
"loss": 2.342, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0009728227911667932, |
|
"loss": 2.3004, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0009694637121067764, |
|
"loss": 2.2644, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.0009659154603277282, |
|
"loss": 2.2406, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0009621794656152091, |
|
"loss": 2.2076, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0009582572334067213, |
|
"loss": 2.1834, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0009541503441850843, |
|
"loss": 2.1652, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0009498604528415731, |
|
"loss": 2.1353, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0009453892880090695, |
|
"loss": 2.1394, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.000940738651365503, |
|
"loss": 2.08, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 1.996474027633667, |
|
"eval_runtime": 6.333, |
|
"eval_samples_per_second": 70.109, |
|
"eval_steps_per_second": 17.527, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.000935910416907854, |
|
"loss": 2.2925, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.0009309065301970192, |
|
"loss": 2.0167, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.0009257290075738364, |
|
"loss": 1.9594, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.0009203799353465918, |
|
"loss": 1.9508, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.0009148614689503306, |
|
"loss": 1.9579, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.0009091758320783139, |
|
"loss": 1.9166, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.0009033253157859713, |
|
"loss": 1.8802, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0008973122775677078, |
|
"loss": 1.8642, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.0008911391404069408, |
|
"loss": 1.8552, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.0008848083917997462, |
|
"loss": 1.8637, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.0008783225827525098, |
|
"loss": 1.852, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.0008716843267539868, |
|
"loss": 1.7914, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.0008648962987221837, |
|
"loss": 1.8048, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0008579612339264867, |
|
"loss": 1.7966, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.0008508819268854713, |
|
"loss": 1.7871, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.0008436612302408376, |
|
"loss": 1.7623, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 1.724814772605896, |
|
"eval_runtime": 7.0329, |
|
"eval_samples_per_second": 63.132, |
|
"eval_steps_per_second": 15.783, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.0008363020536079239, |
|
"loss": 1.9929, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 0.0008288073624032633, |
|
"loss": 1.7159, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.0008211801766496537, |
|
"loss": 1.6946, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.000813423569759226, |
|
"loss": 1.6397, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 0.0008055406672949956, |
|
"loss": 1.669, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.0007975346457114034, |
|
"loss": 1.6531, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.0007894087310743467, |
|
"loss": 1.6478, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 0.0007811661977612201, |
|
"loss": 1.6231, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.0007728103671414887, |
|
"loss": 1.6478, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.0007643446062383273, |
|
"loss": 1.6287, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.0007557723263718596, |
|
"loss": 1.5939, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 0.0007470969817845518, |
|
"loss": 1.6309, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 0.000738322068249308, |
|
"loss": 1.5665, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.0007294511216608307, |
|
"loss": 1.5953, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 0.0007204877166108151, |
|
"loss": 1.5987, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 0.0007114354649475498, |
|
"loss": 1.5961, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.0007022980143205046, |
|
"loss": 1.5408, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_loss": 1.5449421405792236, |
|
"eval_runtime": 7.1026, |
|
"eval_samples_per_second": 62.512, |
|
"eval_steps_per_second": 15.628, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 0.0006930790467104916, |
|
"loss": 1.7394, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.0006837822769459941, |
|
"loss": 1.5015, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 0.000674411451206257, |
|
"loss": 1.4962, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.0006649703455117458, |
|
"loss": 1.496, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.0006554627642025807, |
|
"loss": 1.4703, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.0006458925384055585, |
|
"loss": 1.474, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 0.0006362635244903819, |
|
"loss": 1.4663, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.0006265796025157153, |
|
"loss": 1.4556, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 0.0006168446746656973, |
|
"loss": 1.4779, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 0.0006070626636775348, |
|
"loss": 1.4687, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.0005972375112608181, |
|
"loss": 1.4614, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.000587373176509189, |
|
"loss": 1.4615, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 0.0005774736343050039, |
|
"loss": 1.4479, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 0.0005675428737176367, |
|
"loss": 1.427, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.000557584896396062, |
|
"loss": 1.4327, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 0.0005476037149563726, |
|
"loss": 1.4147, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_loss": 1.4437452554702759, |
|
"eval_runtime": 6.9311, |
|
"eval_samples_per_second": 64.059, |
|
"eval_steps_per_second": 16.015, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 0.0005376033513648743, |
|
"loss": 1.5806, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 0.0005275878353174165, |
|
"loss": 1.3567, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.0005175612026156045, |
|
"loss": 1.3639, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 0.0005075274935405553, |
|
"loss": 1.3578, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 0.0004974907512248451, |
|
"loss": 1.3787, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.0004874550200233085, |
|
"loss": 1.3406, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 0.0004774243438833481, |
|
"loss": 1.368, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.00046740276471540364, |
|
"loss": 1.3549, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.00045739432076424515, |
|
"loss": 1.3655, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.00044740304498174226, |
|
"loss": 1.35, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 0.0004374329634017669, |
|
"loss": 1.3604, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 0.00042748809351788165, |
|
"loss": 1.3692, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 0.0004175724426644724, |
|
"loss": 1.3231, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 0.00040769000640197205, |
|
"loss": 1.3361, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 0.00039784476690683085, |
|
"loss": 1.3391, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 0.0003880406913668777, |
|
"loss": 1.3259, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.0003782817303827226, |
|
"loss": 1.3593, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_loss": 1.3767662048339844, |
|
"eval_runtime": 6.7366, |
|
"eval_samples_per_second": 65.908, |
|
"eval_steps_per_second": 16.477, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 0.0003685718163758427, |
|
"loss": 1.4657, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.0003589148620039941, |
|
"loss": 1.2816, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.00034931475858458635, |
|
"loss": 1.2989, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.0003397753745266571, |
|
"loss": 1.3021, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.0003303005537720778, |
|
"loss": 1.2478, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.00032089411424661863, |
|
"loss": 1.2827, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 0.0003115598463214956, |
|
"loss": 1.2458, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.0003023015112860228, |
|
"loss": 1.2954, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.00029312283983198097, |
|
"loss": 1.2782, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.0002840275305503186, |
|
"loss": 1.2653, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 0.00027501924844078535, |
|
"loss": 1.2701, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 0.0002661016234351018, |
|
"loss": 1.2862, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.00025727824893426166, |
|
"loss": 1.277, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.00024855268036055346, |
|
"loss": 1.2791, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 0.00023992843372488355, |
|
"loss": 1.266, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.00023140898420998424, |
|
"loss": 1.2703, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_loss": 1.3362174034118652, |
|
"eval_runtime": 6.9977, |
|
"eval_samples_per_second": 63.449, |
|
"eval_steps_per_second": 15.862, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 0.0002229977647700707, |
|
"loss": 1.4282, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 0.00021469816474751563, |
|
"loss": 1.2356, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 0.00020651352850709653, |
|
"loss": 1.247, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 0.00019844715408836789, |
|
"loss": 1.2564, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 0.00019050229187669949, |
|
"loss": 1.2187, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.00018268214329351796, |
|
"loss": 1.2388, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 0.00017498985950627793, |
|
"loss": 1.2368, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 0.00016742854015868347, |
|
"loss": 1.212, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 0.00016000123212167155, |
|
"loss": 1.2377, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 0.00015271092826566108, |
|
"loss": 1.2146, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 0.0001455605662545592, |
|
"loss": 1.2209, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 0.00013855302736201687, |
|
"loss": 1.2319, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 0.00013169113531040461, |
|
"loss": 1.2271, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 0.00012497765513297976, |
|
"loss": 1.2021, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 0.00011841529205970281, |
|
"loss": 1.2264, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 0.00011200669042715162, |
|
"loss": 1.2228, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 0.00010575443261297229, |
|
"loss": 1.2528, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"eval_loss": 1.3175491094589233, |
|
"eval_runtime": 6.7551, |
|
"eval_samples_per_second": 65.729, |
|
"eval_steps_per_second": 16.432, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 9.96610379952989e-05, |
|
"loss": 1.3448, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 9.37289619375562e-05, |
|
"loss": 1.1886, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 8.7960594799059e-05, |
|
"loss": 1.2062, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 8.235826097180565e-05, |
|
"loss": 1.2207, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 7.692421794385312e-05, |
|
"loss": 1.2095, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 7.166065538964955e-05, |
|
"loss": 1.1986, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 6.656969428769566e-05, |
|
"loss": 1.1962, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 6.165338606588517e-05, |
|
"loss": 1.2164, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 5.6913711774872144e-05, |
|
"loss": 1.1904, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 5.235258128979675e-05, |
|
"loss": 1.2172, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 4.797183254069176e-05, |
|
"loss": 1.2345, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 4.3773230771879005e-05, |
|
"loss": 1.1994, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 3.975846783065662e-05, |
|
"loss": 1.1963, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 3.5929161485559694e-05, |
|
"loss": 1.1995, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 3.2286854774472905e-05, |
|
"loss": 1.1779, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 2.883301538285582e-05, |
|
"loss": 1.1981, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_loss": 1.3090853691101074, |
|
"eval_runtime": 6.8294, |
|
"eval_samples_per_second": 65.013, |
|
"eval_steps_per_second": 16.253, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 2.5569035052332156e-05, |
|
"loss": 1.3461, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 2.2496229019879632e-05, |
|
"loss": 1.1812, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 1.9615835487849675e-05, |
|
"loss": 1.177, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 1.6929015125027312e-05, |
|
"loss": 1.1856, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 1.443685059893396e-05, |
|
"loss": 1.1984, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 1.2140346139561276e-05, |
|
"loss": 1.1711, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 1.0040427134711649e-05, |
|
"loss": 1.1905, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 8.137939757108525e-06, |
|
"loss": 1.1924, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 6.433650623427378e-06, |
|
"loss": 1.2033, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 4.928246485383147e-06, |
|
"loss": 1.2159, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 3.6223339530006004e-06, |
|
"loss": 1.1831, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 2.516439250177749e-06, |
|
"loss": 1.2011, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 1.611008002641412e-06, |
|
"loss": 1.1991, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 9.064050583800221e-07, |
|
"loss": 1.1786, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 4.029143406262259e-07, |
|
"loss": 1.1945, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 1.0073873344895734e-07, |
|
"loss": 1.1972, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 0.0, |
|
"loss": 1.2117, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"eval_loss": 1.3089168071746826, |
|
"eval_runtime": 6.8863, |
|
"eval_samples_per_second": 64.476, |
|
"eval_steps_per_second": 16.119, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"step": 330, |
|
"total_flos": 1.1112428209176576e+16, |
|
"train_loss": 1.7149053530259566, |
|
"train_runtime": 770.4396, |
|
"train_samples_per_second": 110.534, |
|
"train_steps_per_second": 0.428 |
|
} |
|
], |
|
"max_steps": 330, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.1112428209176576e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|