|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 660, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004545454545454545, |
|
"grad_norm": 3.4694509506225586, |
|
"learning_rate": 0.0004992424242424243, |
|
"loss": 1.6877, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00909090909090909, |
|
"grad_norm": 2.844703435897827, |
|
"learning_rate": 0.0004984848484848485, |
|
"loss": 1.7528, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.013636363636363636, |
|
"grad_norm": 4.147863388061523, |
|
"learning_rate": 0.0004977272727272727, |
|
"loss": 2.6111, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01818181818181818, |
|
"grad_norm": 2.755852699279785, |
|
"learning_rate": 0.000496969696969697, |
|
"loss": 1.9464, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.022727272727272728, |
|
"grad_norm": 4.124767780303955, |
|
"learning_rate": 0.0004962121212121212, |
|
"loss": 2.2121, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02727272727272727, |
|
"grad_norm": 3.923773765563965, |
|
"learning_rate": 0.0004954545454545455, |
|
"loss": 2.6991, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.031818181818181815, |
|
"grad_norm": 4.66182279586792, |
|
"learning_rate": 0.0004946969696969697, |
|
"loss": 2.176, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.03636363636363636, |
|
"grad_norm": 3.7830166816711426, |
|
"learning_rate": 0.000493939393939394, |
|
"loss": 2.7265, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.04090909090909091, |
|
"grad_norm": 3.966615676879883, |
|
"learning_rate": 0.0004931818181818182, |
|
"loss": 3.1926, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.045454545454545456, |
|
"grad_norm": 3.281916618347168, |
|
"learning_rate": 0.0004924242424242425, |
|
"loss": 2.1706, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.288072109222412, |
|
"learning_rate": 0.0004916666666666666, |
|
"loss": 3.1348, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.05454545454545454, |
|
"grad_norm": 3.2815868854522705, |
|
"learning_rate": 0.0004909090909090909, |
|
"loss": 2.2711, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.05909090909090909, |
|
"grad_norm": 2.75382924079895, |
|
"learning_rate": 0.0004901515151515152, |
|
"loss": 1.762, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.06363636363636363, |
|
"grad_norm": 2.6767005920410156, |
|
"learning_rate": 0.0004893939393939393, |
|
"loss": 1.591, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.06818181818181818, |
|
"grad_norm": 3.613719940185547, |
|
"learning_rate": 0.0004886363636363636, |
|
"loss": 2.1644, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.07272727272727272, |
|
"grad_norm": 3.3433680534362793, |
|
"learning_rate": 0.00048787878787878784, |
|
"loss": 2.9727, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.07727272727272727, |
|
"grad_norm": 3.7183644771575928, |
|
"learning_rate": 0.0004871212121212121, |
|
"loss": 1.8928, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.08181818181818182, |
|
"grad_norm": 4.1484575271606445, |
|
"learning_rate": 0.0004863636363636364, |
|
"loss": 2.6002, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.08636363636363636, |
|
"grad_norm": 3.281487464904785, |
|
"learning_rate": 0.0004856060606060606, |
|
"loss": 1.9074, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.09090909090909091, |
|
"grad_norm": 3.0067665576934814, |
|
"learning_rate": 0.0004848484848484849, |
|
"loss": 2.0375, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09545454545454546, |
|
"grad_norm": 2.8053739070892334, |
|
"learning_rate": 0.00048409090909090906, |
|
"loss": 1.7248, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.619422435760498, |
|
"learning_rate": 0.00048333333333333334, |
|
"loss": 2.6251, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.10454545454545454, |
|
"grad_norm": 3.1278717517852783, |
|
"learning_rate": 0.0004825757575757576, |
|
"loss": 2.7065, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.10909090909090909, |
|
"grad_norm": 2.723963975906372, |
|
"learning_rate": 0.00048181818181818184, |
|
"loss": 1.8838, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.11363636363636363, |
|
"grad_norm": 2.6069819927215576, |
|
"learning_rate": 0.0004810606060606061, |
|
"loss": 1.9516, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.11818181818181818, |
|
"grad_norm": 2.426720380783081, |
|
"learning_rate": 0.0004803030303030303, |
|
"loss": 2.1439, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.12272727272727273, |
|
"grad_norm": 2.3633666038513184, |
|
"learning_rate": 0.00047954545454545456, |
|
"loss": 2.0113, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.12727272727272726, |
|
"grad_norm": 2.988654136657715, |
|
"learning_rate": 0.0004787878787878788, |
|
"loss": 2.1691, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1318181818181818, |
|
"grad_norm": 2.713346481323242, |
|
"learning_rate": 0.00047803030303030306, |
|
"loss": 2.1206, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.13636363636363635, |
|
"grad_norm": 2.9896864891052246, |
|
"learning_rate": 0.0004772727272727273, |
|
"loss": 2.139, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1409090909090909, |
|
"grad_norm": 2.7176098823547363, |
|
"learning_rate": 0.0004765151515151515, |
|
"loss": 2.2194, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.14545454545454545, |
|
"grad_norm": 2.6730499267578125, |
|
"learning_rate": 0.0004757575757575758, |
|
"loss": 1.5875, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.921717643737793, |
|
"learning_rate": 0.000475, |
|
"loss": 1.9037, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.15454545454545454, |
|
"grad_norm": 2.800473690032959, |
|
"learning_rate": 0.0004742424242424243, |
|
"loss": 1.8628, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.1590909090909091, |
|
"grad_norm": 2.7188827991485596, |
|
"learning_rate": 0.0004734848484848485, |
|
"loss": 2.1262, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.16363636363636364, |
|
"grad_norm": 2.794339895248413, |
|
"learning_rate": 0.0004727272727272727, |
|
"loss": 2.3508, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.16818181818181818, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.0004727272727272727, |
|
"loss": 2.4565, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.17272727272727273, |
|
"grad_norm": 3.1815218925476074, |
|
"learning_rate": 0.000471969696969697, |
|
"loss": 2.2242, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.17727272727272728, |
|
"grad_norm": 3.4017906188964844, |
|
"learning_rate": 0.0004712121212121212, |
|
"loss": 2.1975, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 2.7533328533172607, |
|
"learning_rate": 0.00047045454545454544, |
|
"loss": 2.1, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.18636363636363637, |
|
"grad_norm": 2.8896608352661133, |
|
"learning_rate": 0.0004696969696969697, |
|
"loss": 1.8716, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.19090909090909092, |
|
"grad_norm": 2.501896858215332, |
|
"learning_rate": 0.00046893939393939394, |
|
"loss": 2.3533, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.19545454545454546, |
|
"grad_norm": 2.2779133319854736, |
|
"learning_rate": 0.0004681818181818182, |
|
"loss": 1.8062, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.78344988822937, |
|
"learning_rate": 0.00046742424242424244, |
|
"loss": 2.4259, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.20454545454545456, |
|
"grad_norm": 2.230311393737793, |
|
"learning_rate": 0.00046666666666666666, |
|
"loss": 1.8689, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.20909090909090908, |
|
"grad_norm": 3.4158501625061035, |
|
"learning_rate": 0.0004659090909090909, |
|
"loss": 2.3214, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.21363636363636362, |
|
"grad_norm": 2.355423927307129, |
|
"learning_rate": 0.00046515151515151516, |
|
"loss": 1.7059, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.21818181818181817, |
|
"grad_norm": 2.495224952697754, |
|
"learning_rate": 0.00046439393939393944, |
|
"loss": 1.3574, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.22272727272727272, |
|
"grad_norm": 2.829482078552246, |
|
"learning_rate": 0.00046363636363636366, |
|
"loss": 2.2364, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.22727272727272727, |
|
"grad_norm": 2.390627861022949, |
|
"learning_rate": 0.0004628787878787879, |
|
"loss": 1.5226, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2318181818181818, |
|
"grad_norm": 2.2006781101226807, |
|
"learning_rate": 0.0004621212121212121, |
|
"loss": 1.6848, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.23636363636363636, |
|
"grad_norm": 2.737412452697754, |
|
"learning_rate": 0.0004613636363636364, |
|
"loss": 1.9071, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.2409090909090909, |
|
"grad_norm": 3.4992029666900635, |
|
"learning_rate": 0.00046060606060606066, |
|
"loss": 2.0172, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.24545454545454545, |
|
"grad_norm": 3.196709394454956, |
|
"learning_rate": 0.0004598484848484848, |
|
"loss": 2.6357, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.9549436569213867, |
|
"learning_rate": 0.0004590909090909091, |
|
"loss": 2.9822, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.2545454545454545, |
|
"grad_norm": 2.788527488708496, |
|
"learning_rate": 0.0004583333333333333, |
|
"loss": 2.3458, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.2590909090909091, |
|
"grad_norm": 3.2539544105529785, |
|
"learning_rate": 0.0004575757575757576, |
|
"loss": 1.9336, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.2636363636363636, |
|
"grad_norm": 2.859744071960449, |
|
"learning_rate": 0.0004568181818181819, |
|
"loss": 2.4852, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.2681818181818182, |
|
"grad_norm": 2.6832542419433594, |
|
"learning_rate": 0.00045606060606060605, |
|
"loss": 2.0347, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.2727272727272727, |
|
"grad_norm": 4.593046188354492, |
|
"learning_rate": 0.0004553030303030303, |
|
"loss": 2.2415, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2772727272727273, |
|
"grad_norm": 3.3459599018096924, |
|
"learning_rate": 0.00045454545454545455, |
|
"loss": 2.2843, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.2818181818181818, |
|
"grad_norm": 2.1371498107910156, |
|
"learning_rate": 0.0004537878787878788, |
|
"loss": 1.9185, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.2863636363636364, |
|
"grad_norm": 2.3603177070617676, |
|
"learning_rate": 0.000453030303030303, |
|
"loss": 1.9426, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.2909090909090909, |
|
"grad_norm": 2.5435550212860107, |
|
"learning_rate": 0.00045227272727272727, |
|
"loss": 2.4551, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.29545454545454547, |
|
"grad_norm": 2.5501880645751953, |
|
"learning_rate": 0.00045151515151515154, |
|
"loss": 1.9113, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.8549928665161133, |
|
"learning_rate": 0.00045075757575757577, |
|
"loss": 2.2465, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.30454545454545456, |
|
"grad_norm": 2.396857976913452, |
|
"learning_rate": 0.00045000000000000004, |
|
"loss": 2.1836, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.3090909090909091, |
|
"grad_norm": 2.7043912410736084, |
|
"learning_rate": 0.0004492424242424242, |
|
"loss": 2.1715, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.31363636363636366, |
|
"grad_norm": 3.1579270362854004, |
|
"learning_rate": 0.0004484848484848485, |
|
"loss": 2.4971, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.3181818181818182, |
|
"grad_norm": 2.3673815727233887, |
|
"learning_rate": 0.00044772727272727276, |
|
"loss": 1.7927, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.32272727272727275, |
|
"grad_norm": 2.776143789291382, |
|
"learning_rate": 0.000446969696969697, |
|
"loss": 1.9065, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.32727272727272727, |
|
"grad_norm": 2.937574625015259, |
|
"learning_rate": 0.00044621212121212126, |
|
"loss": 1.6579, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.33181818181818185, |
|
"grad_norm": 2.937641143798828, |
|
"learning_rate": 0.00044545454545454543, |
|
"loss": 2.2259, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.33636363636363636, |
|
"grad_norm": 2.305844306945801, |
|
"learning_rate": 0.0004446969696969697, |
|
"loss": 2.0766, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.3409090909090909, |
|
"grad_norm": 3.1322784423828125, |
|
"learning_rate": 0.000443939393939394, |
|
"loss": 2.3208, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.34545454545454546, |
|
"grad_norm": 2.7713890075683594, |
|
"learning_rate": 0.0004431818181818182, |
|
"loss": 2.4553, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.275108575820923, |
|
"learning_rate": 0.00044242424242424243, |
|
"loss": 1.4249, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.35454545454545455, |
|
"grad_norm": 2.502997875213623, |
|
"learning_rate": 0.00044166666666666665, |
|
"loss": 2.4343, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.35909090909090907, |
|
"grad_norm": 2.1204617023468018, |
|
"learning_rate": 0.00044090909090909093, |
|
"loss": 1.9248, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 2.9564898014068604, |
|
"learning_rate": 0.00044015151515151515, |
|
"loss": 1.6672, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.36818181818181817, |
|
"grad_norm": 3.0879478454589844, |
|
"learning_rate": 0.0004393939393939394, |
|
"loss": 2.0625, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.37272727272727274, |
|
"grad_norm": 3.1532368659973145, |
|
"learning_rate": 0.00043863636363636365, |
|
"loss": 2.3648, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.37727272727272726, |
|
"grad_norm": 2.1850852966308594, |
|
"learning_rate": 0.00043787878787878787, |
|
"loss": 2.4397, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.38181818181818183, |
|
"grad_norm": 2.241144895553589, |
|
"learning_rate": 0.00043712121212121215, |
|
"loss": 2.5193, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.38636363636363635, |
|
"grad_norm": 3.6500165462493896, |
|
"learning_rate": 0.00043636363636363637, |
|
"loss": 2.1096, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.39090909090909093, |
|
"grad_norm": 2.0548837184906006, |
|
"learning_rate": 0.0004356060606060606, |
|
"loss": 2.359, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.39545454545454545, |
|
"grad_norm": 2.4512407779693604, |
|
"learning_rate": 0.00043484848484848487, |
|
"loss": 1.5353, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.9420820474624634, |
|
"learning_rate": 0.0004340909090909091, |
|
"loss": 1.688, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.40454545454545454, |
|
"grad_norm": 1.845475673675537, |
|
"learning_rate": 0.00043333333333333337, |
|
"loss": 1.6642, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.4090909090909091, |
|
"grad_norm": 2.2735133171081543, |
|
"learning_rate": 0.0004325757575757576, |
|
"loss": 1.89, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.41363636363636364, |
|
"grad_norm": 2.203105926513672, |
|
"learning_rate": 0.0004318181818181818, |
|
"loss": 1.9556, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.41818181818181815, |
|
"grad_norm": 1.8748105764389038, |
|
"learning_rate": 0.00043106060606060603, |
|
"loss": 1.5452, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.42272727272727273, |
|
"grad_norm": 2.8958442211151123, |
|
"learning_rate": 0.0004303030303030303, |
|
"loss": 1.9343, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.42727272727272725, |
|
"grad_norm": 2.7512269020080566, |
|
"learning_rate": 0.0004295454545454546, |
|
"loss": 2.4008, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.4318181818181818, |
|
"grad_norm": 2.748307228088379, |
|
"learning_rate": 0.00042878787878787876, |
|
"loss": 2.3614, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.43636363636363634, |
|
"grad_norm": 3.7091145515441895, |
|
"learning_rate": 0.00042803030303030303, |
|
"loss": 1.5435, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.4409090909090909, |
|
"grad_norm": 2.0227293968200684, |
|
"learning_rate": 0.00042727272727272726, |
|
"loss": 1.5536, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.44545454545454544, |
|
"grad_norm": 1.868477702140808, |
|
"learning_rate": 0.00042651515151515153, |
|
"loss": 2.0019, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.2410340309143066, |
|
"learning_rate": 0.0004257575757575758, |
|
"loss": 2.0278, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 2.4206206798553467, |
|
"learning_rate": 0.000425, |
|
"loss": 2.6757, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4590909090909091, |
|
"grad_norm": 2.6481056213378906, |
|
"learning_rate": 0.00042424242424242425, |
|
"loss": 2.004, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.4636363636363636, |
|
"grad_norm": 2.493495225906372, |
|
"learning_rate": 0.0004234848484848485, |
|
"loss": 2.2102, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.4681818181818182, |
|
"grad_norm": 2.588595390319824, |
|
"learning_rate": 0.00042272727272727275, |
|
"loss": 2.3133, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.4727272727272727, |
|
"grad_norm": 2.185718536376953, |
|
"learning_rate": 0.00042196969696969703, |
|
"loss": 2.5506, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.4772727272727273, |
|
"grad_norm": 2.155470132827759, |
|
"learning_rate": 0.0004212121212121212, |
|
"loss": 2.2074, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.4818181818181818, |
|
"grad_norm": 2.518435001373291, |
|
"learning_rate": 0.0004204545454545455, |
|
"loss": 1.8589, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.4863636363636364, |
|
"grad_norm": 2.5512635707855225, |
|
"learning_rate": 0.0004196969696969697, |
|
"loss": 1.9953, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.4909090909090909, |
|
"grad_norm": 2.238809108734131, |
|
"learning_rate": 0.00041893939393939397, |
|
"loss": 2.2441, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.4954545454545455, |
|
"grad_norm": 1.8442784547805786, |
|
"learning_rate": 0.00041818181818181814, |
|
"loss": 1.8682, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.4844954013824463, |
|
"learning_rate": 0.0004174242424242424, |
|
"loss": 1.9522, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5045454545454545, |
|
"grad_norm": 1.9704878330230713, |
|
"learning_rate": 0.0004166666666666667, |
|
"loss": 1.4167, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.509090909090909, |
|
"grad_norm": 2.2447972297668457, |
|
"learning_rate": 0.0004159090909090909, |
|
"loss": 1.7897, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.5136363636363637, |
|
"grad_norm": 2.530410051345825, |
|
"learning_rate": 0.0004151515151515152, |
|
"loss": 2.5473, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.5181818181818182, |
|
"grad_norm": 2.450526714324951, |
|
"learning_rate": 0.00041439393939393936, |
|
"loss": 2.0436, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.5227272727272727, |
|
"grad_norm": 2.4212632179260254, |
|
"learning_rate": 0.00041363636363636364, |
|
"loss": 2.1118, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.5272727272727272, |
|
"grad_norm": 1.9820351600646973, |
|
"learning_rate": 0.0004128787878787879, |
|
"loss": 1.9614, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.5318181818181819, |
|
"grad_norm": 2.467961549758911, |
|
"learning_rate": 0.00041212121212121214, |
|
"loss": 1.9572, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.5363636363636364, |
|
"grad_norm": 2.2693068981170654, |
|
"learning_rate": 0.00041136363636363636, |
|
"loss": 1.9033, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.5409090909090909, |
|
"grad_norm": 2.299119710922241, |
|
"learning_rate": 0.0004106060606060606, |
|
"loss": 1.9848, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.5454545454545454, |
|
"grad_norm": 2.0330560207366943, |
|
"learning_rate": 0.00040984848484848486, |
|
"loss": 1.3706, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.197603702545166, |
|
"learning_rate": 0.00040909090909090913, |
|
"loss": 2.1687, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.5545454545454546, |
|
"grad_norm": 2.7206549644470215, |
|
"learning_rate": 0.00040833333333333336, |
|
"loss": 2.3247, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.5590909090909091, |
|
"grad_norm": 2.882654905319214, |
|
"learning_rate": 0.0004075757575757576, |
|
"loss": 1.6946, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.5636363636363636, |
|
"grad_norm": 2.3815231323242188, |
|
"learning_rate": 0.0004068181818181818, |
|
"loss": 1.862, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.5681818181818182, |
|
"grad_norm": 2.4142932891845703, |
|
"learning_rate": 0.0004060606060606061, |
|
"loss": 2.0066, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5727272727272728, |
|
"grad_norm": 2.6641104221343994, |
|
"learning_rate": 0.0004053030303030303, |
|
"loss": 1.9456, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.5772727272727273, |
|
"grad_norm": 2.983633518218994, |
|
"learning_rate": 0.0004045454545454546, |
|
"loss": 2.0049, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.5818181818181818, |
|
"grad_norm": 2.1993696689605713, |
|
"learning_rate": 0.0004037878787878788, |
|
"loss": 1.6259, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.5863636363636363, |
|
"grad_norm": 3.0398480892181396, |
|
"learning_rate": 0.000403030303030303, |
|
"loss": 1.6535, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.5909090909090909, |
|
"grad_norm": 2.298558235168457, |
|
"learning_rate": 0.0004022727272727273, |
|
"loss": 1.8693, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5954545454545455, |
|
"grad_norm": 2.5126214027404785, |
|
"learning_rate": 0.0004015151515151515, |
|
"loss": 2.1854, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 2.0419557094573975, |
|
"learning_rate": 0.00040075757575757574, |
|
"loss": 1.5857, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.6045454545454545, |
|
"grad_norm": 2.4304699897766113, |
|
"learning_rate": 0.0004, |
|
"loss": 2.347, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.6090909090909091, |
|
"grad_norm": 3.098036050796509, |
|
"learning_rate": 0.00039924242424242424, |
|
"loss": 2.9748, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.6136363636363636, |
|
"grad_norm": 2.4403679370880127, |
|
"learning_rate": 0.0003984848484848485, |
|
"loss": 1.9342, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.6181818181818182, |
|
"grad_norm": 2.832394599914551, |
|
"learning_rate": 0.00039772727272727274, |
|
"loss": 2.132, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.6227272727272727, |
|
"grad_norm": 2.601243019104004, |
|
"learning_rate": 0.00039696969696969696, |
|
"loss": 2.3074, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.6272727272727273, |
|
"grad_norm": 2.2306132316589355, |
|
"learning_rate": 0.00039621212121212124, |
|
"loss": 1.6065, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.6318181818181818, |
|
"grad_norm": 2.393157720565796, |
|
"learning_rate": 0.00039545454545454546, |
|
"loss": 1.7411, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.6363636363636364, |
|
"grad_norm": 2.174208164215088, |
|
"learning_rate": 0.00039469696969696974, |
|
"loss": 1.3876, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6409090909090909, |
|
"grad_norm": 2.3376457691192627, |
|
"learning_rate": 0.0003939393939393939, |
|
"loss": 2.3752, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.6454545454545455, |
|
"grad_norm": 2.141479969024658, |
|
"learning_rate": 0.0003931818181818182, |
|
"loss": 1.3948, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.4302890300750732, |
|
"learning_rate": 0.0003924242424242424, |
|
"loss": 2.5493, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.6545454545454545, |
|
"grad_norm": 1.9080986976623535, |
|
"learning_rate": 0.0003916666666666667, |
|
"loss": 1.4652, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.6590909090909091, |
|
"grad_norm": 2.438420057296753, |
|
"learning_rate": 0.00039090909090909096, |
|
"loss": 2.0606, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.6636363636363637, |
|
"grad_norm": 2.028294563293457, |
|
"learning_rate": 0.0003901515151515151, |
|
"loss": 1.8798, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.6681818181818182, |
|
"grad_norm": 2.4235315322875977, |
|
"learning_rate": 0.0003893939393939394, |
|
"loss": 1.8855, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.6727272727272727, |
|
"grad_norm": 2.430391788482666, |
|
"learning_rate": 0.0003886363636363636, |
|
"loss": 1.7753, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.6772727272727272, |
|
"grad_norm": 2.053199052810669, |
|
"learning_rate": 0.0003878787878787879, |
|
"loss": 2.1466, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.6818181818181818, |
|
"grad_norm": 2.067093849182129, |
|
"learning_rate": 0.0003871212121212122, |
|
"loss": 1.7715, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6863636363636364, |
|
"grad_norm": 2.047165632247925, |
|
"learning_rate": 0.00038636363636363635, |
|
"loss": 1.8703, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.6909090909090909, |
|
"grad_norm": 2.567540168762207, |
|
"learning_rate": 0.0003856060606060606, |
|
"loss": 1.7973, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.6954545454545454, |
|
"grad_norm": 2.5282599925994873, |
|
"learning_rate": 0.00038484848484848485, |
|
"loss": 2.422, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.0428948402404785, |
|
"learning_rate": 0.0003840909090909091, |
|
"loss": 1.5709, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.7045454545454546, |
|
"grad_norm": 2.032672166824341, |
|
"learning_rate": 0.00038333333333333334, |
|
"loss": 1.8584, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.7090909090909091, |
|
"grad_norm": 2.4448535442352295, |
|
"learning_rate": 0.00038257575757575757, |
|
"loss": 2.069, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.7136363636363636, |
|
"grad_norm": 1.6503087282180786, |
|
"learning_rate": 0.00038181818181818184, |
|
"loss": 1.5194, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.7181818181818181, |
|
"grad_norm": 2.5853593349456787, |
|
"learning_rate": 0.00038106060606060607, |
|
"loss": 2.4603, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.7227272727272728, |
|
"grad_norm": 2.353992223739624, |
|
"learning_rate": 0.00038030303030303034, |
|
"loss": 1.4417, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 2.382633686065674, |
|
"learning_rate": 0.0003795454545454545, |
|
"loss": 1.9239, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7318181818181818, |
|
"grad_norm": 2.7274303436279297, |
|
"learning_rate": 0.0003787878787878788, |
|
"loss": 2.1116, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.7363636363636363, |
|
"grad_norm": 2.0137476921081543, |
|
"learning_rate": 0.00037803030303030306, |
|
"loss": 1.5707, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.740909090909091, |
|
"grad_norm": 1.977155089378357, |
|
"learning_rate": 0.0003772727272727273, |
|
"loss": 1.4972, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.7454545454545455, |
|
"grad_norm": 2.5506880283355713, |
|
"learning_rate": 0.0003765151515151515, |
|
"loss": 2.4583, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 2.2664661407470703, |
|
"learning_rate": 0.00037575757575757573, |
|
"loss": 2.1239, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.7545454545454545, |
|
"grad_norm": 2.393831968307495, |
|
"learning_rate": 0.000375, |
|
"loss": 2.2104, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.759090909090909, |
|
"grad_norm": 2.358670711517334, |
|
"learning_rate": 0.0003742424242424243, |
|
"loss": 1.7545, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.7636363636363637, |
|
"grad_norm": 2.0985164642333984, |
|
"learning_rate": 0.0003734848484848485, |
|
"loss": 1.7337, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.7681818181818182, |
|
"grad_norm": 1.711176872253418, |
|
"learning_rate": 0.00037272727272727273, |
|
"loss": 1.3195, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.7727272727272727, |
|
"grad_norm": 2.20684814453125, |
|
"learning_rate": 0.00037196969696969695, |
|
"loss": 1.7633, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.7772727272727272, |
|
"grad_norm": 2.0569570064544678, |
|
"learning_rate": 0.00037121212121212123, |
|
"loss": 1.8354, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.7818181818181819, |
|
"grad_norm": 2.4895520210266113, |
|
"learning_rate": 0.0003704545454545455, |
|
"loss": 2.6706, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.7863636363636364, |
|
"grad_norm": 2.3134992122650146, |
|
"learning_rate": 0.00036969696969696967, |
|
"loss": 2.077, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.7909090909090909, |
|
"grad_norm": 1.876047968864441, |
|
"learning_rate": 0.00036893939393939395, |
|
"loss": 1.5816, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.7954545454545454, |
|
"grad_norm": 2.301314353942871, |
|
"learning_rate": 0.00036818181818181817, |
|
"loss": 2.9433, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.4783785343170166, |
|
"learning_rate": 0.00036742424242424245, |
|
"loss": 2.3913, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.8045454545454546, |
|
"grad_norm": 2.3966879844665527, |
|
"learning_rate": 0.00036666666666666667, |
|
"loss": 2.6103, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.8090909090909091, |
|
"grad_norm": 2.1050190925598145, |
|
"learning_rate": 0.0003659090909090909, |
|
"loss": 1.6801, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.8136363636363636, |
|
"grad_norm": 2.3336639404296875, |
|
"learning_rate": 0.00036515151515151517, |
|
"loss": 1.936, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.8181818181818182, |
|
"grad_norm": 2.4867429733276367, |
|
"learning_rate": 0.0003643939393939394, |
|
"loss": 2.0285, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8227272727272728, |
|
"grad_norm": 1.9529379606246948, |
|
"learning_rate": 0.00036363636363636367, |
|
"loss": 1.5503, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.8272727272727273, |
|
"grad_norm": 2.095381736755371, |
|
"learning_rate": 0.00036287878787878784, |
|
"loss": 2.5626, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.8318181818181818, |
|
"grad_norm": 2.156099319458008, |
|
"learning_rate": 0.0003621212121212121, |
|
"loss": 1.8788, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.8363636363636363, |
|
"grad_norm": 1.8726741075515747, |
|
"learning_rate": 0.0003613636363636364, |
|
"loss": 1.6386, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.8409090909090909, |
|
"grad_norm": 2.6056482791900635, |
|
"learning_rate": 0.0003606060606060606, |
|
"loss": 1.7965, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.8454545454545455, |
|
"grad_norm": 2.65775728225708, |
|
"learning_rate": 0.0003598484848484849, |
|
"loss": 1.775, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.9309563636779785, |
|
"learning_rate": 0.00035909090909090906, |
|
"loss": 1.7575, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.8545454545454545, |
|
"grad_norm": 1.874107003211975, |
|
"learning_rate": 0.00035833333333333333, |
|
"loss": 1.4686, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.8590909090909091, |
|
"grad_norm": 2.125084400177002, |
|
"learning_rate": 0.0003575757575757576, |
|
"loss": 1.3713, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.8636363636363636, |
|
"grad_norm": 2.016660690307617, |
|
"learning_rate": 0.00035681818181818183, |
|
"loss": 1.6914, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8681818181818182, |
|
"grad_norm": 2.8699893951416016, |
|
"learning_rate": 0.0003560606060606061, |
|
"loss": 1.6524, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.8727272727272727, |
|
"grad_norm": 2.2474772930145264, |
|
"learning_rate": 0.0003553030303030303, |
|
"loss": 1.7333, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.8772727272727273, |
|
"grad_norm": 2.6996030807495117, |
|
"learning_rate": 0.00035454545454545455, |
|
"loss": 1.5828, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.8818181818181818, |
|
"grad_norm": 2.196274757385254, |
|
"learning_rate": 0.0003537878787878788, |
|
"loss": 1.6058, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.8863636363636364, |
|
"grad_norm": 2.4350290298461914, |
|
"learning_rate": 0.00035303030303030305, |
|
"loss": 2.0724, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.8909090909090909, |
|
"grad_norm": 2.047480821609497, |
|
"learning_rate": 0.0003522727272727273, |
|
"loss": 1.7112, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.8954545454545455, |
|
"grad_norm": 2.4547033309936523, |
|
"learning_rate": 0.0003515151515151515, |
|
"loss": 1.7747, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 2.9125847816467285, |
|
"learning_rate": 0.0003507575757575758, |
|
"loss": 2.0878, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.9045454545454545, |
|
"grad_norm": 2.168196678161621, |
|
"learning_rate": 0.00035, |
|
"loss": 1.5592, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 2.364847183227539, |
|
"learning_rate": 0.0003492424242424243, |
|
"loss": 2.0301, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9136363636363637, |
|
"grad_norm": 2.743267059326172, |
|
"learning_rate": 0.0003484848484848485, |
|
"loss": 1.8784, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.9181818181818182, |
|
"grad_norm": 2.2784361839294434, |
|
"learning_rate": 0.0003477272727272727, |
|
"loss": 1.5936, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.9227272727272727, |
|
"grad_norm": 2.875943422317505, |
|
"learning_rate": 0.000346969696969697, |
|
"loss": 1.9961, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.9272727272727272, |
|
"grad_norm": 2.0056071281433105, |
|
"learning_rate": 0.0003462121212121212, |
|
"loss": 1.8795, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.9318181818181818, |
|
"grad_norm": 2.3547298908233643, |
|
"learning_rate": 0.00034545454545454544, |
|
"loss": 2.1429, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.9363636363636364, |
|
"grad_norm": 2.7082138061523438, |
|
"learning_rate": 0.0003446969696969697, |
|
"loss": 1.7504, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.9409090909090909, |
|
"grad_norm": 2.335139751434326, |
|
"learning_rate": 0.00034393939393939394, |
|
"loss": 1.9774, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.9454545454545454, |
|
"grad_norm": 2.3550143241882324, |
|
"learning_rate": 0.0003431818181818182, |
|
"loss": 1.8602, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 2.622682809829712, |
|
"learning_rate": 0.00034242424242424244, |
|
"loss": 2.2283, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.9545454545454546, |
|
"grad_norm": 1.7282129526138306, |
|
"learning_rate": 0.00034166666666666666, |
|
"loss": 1.6025, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9590909090909091, |
|
"grad_norm": 2.8252415657043457, |
|
"learning_rate": 0.0003409090909090909, |
|
"loss": 1.7649, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.9636363636363636, |
|
"grad_norm": 2.146219491958618, |
|
"learning_rate": 0.00034015151515151516, |
|
"loss": 1.9742, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.9681818181818181, |
|
"grad_norm": 2.124577045440674, |
|
"learning_rate": 0.00033939393939393943, |
|
"loss": 1.7412, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.9727272727272728, |
|
"grad_norm": 1.7649497985839844, |
|
"learning_rate": 0.00033863636363636366, |
|
"loss": 1.2667, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.9772727272727273, |
|
"grad_norm": 2.375659465789795, |
|
"learning_rate": 0.0003378787878787879, |
|
"loss": 1.7142, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.9818181818181818, |
|
"grad_norm": 1.9995368719100952, |
|
"learning_rate": 0.0003371212121212121, |
|
"loss": 1.4613, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.9863636363636363, |
|
"grad_norm": 2.2640981674194336, |
|
"learning_rate": 0.0003363636363636364, |
|
"loss": 1.9474, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.990909090909091, |
|
"grad_norm": 2.1680893898010254, |
|
"learning_rate": 0.00033560606060606066, |
|
"loss": 2.5352, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.9954545454545455, |
|
"grad_norm": 2.3969366550445557, |
|
"learning_rate": 0.0003348484848484848, |
|
"loss": 1.9012, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.3696913719177246, |
|
"learning_rate": 0.0003340909090909091, |
|
"loss": 2.2928, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.891, |
|
"eval_gen_len": 41.9182, |
|
"eval_loss": 1.8093845844268799, |
|
"eval_precision": 0.8891, |
|
"eval_recall": 0.8931, |
|
"eval_rouge1": 0.466, |
|
"eval_rouge2": 0.2146, |
|
"eval_rougeL": 0.3912, |
|
"eval_rougeLsum": 0.4301, |
|
"eval_runtime": 25.1921, |
|
"eval_samples_per_second": 4.366, |
|
"eval_steps_per_second": 0.556, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.0045454545454546, |
|
"grad_norm": 1.7403843402862549, |
|
"learning_rate": 0.0003333333333333333, |
|
"loss": 1.7294, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.009090909090909, |
|
"grad_norm": 1.5273452997207642, |
|
"learning_rate": 0.0003325757575757576, |
|
"loss": 1.3279, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.0136363636363637, |
|
"grad_norm": 1.427538275718689, |
|
"learning_rate": 0.0003318181818181819, |
|
"loss": 0.9647, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.018181818181818, |
|
"grad_norm": 1.5605067014694214, |
|
"learning_rate": 0.00033106060606060604, |
|
"loss": 1.3178, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.0227272727272727, |
|
"grad_norm": 1.6737922430038452, |
|
"learning_rate": 0.0003303030303030303, |
|
"loss": 1.403, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.0272727272727273, |
|
"grad_norm": 2.3249313831329346, |
|
"learning_rate": 0.00032954545454545454, |
|
"loss": 1.4907, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.0318181818181817, |
|
"grad_norm": 1.9939628839492798, |
|
"learning_rate": 0.0003287878787878788, |
|
"loss": 1.795, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.0363636363636364, |
|
"grad_norm": 2.482421398162842, |
|
"learning_rate": 0.000328030303030303, |
|
"loss": 1.7309, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.040909090909091, |
|
"grad_norm": 1.6090010404586792, |
|
"learning_rate": 0.00032727272727272726, |
|
"loss": 1.0976, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.0454545454545454, |
|
"grad_norm": 1.5481090545654297, |
|
"learning_rate": 0.00032651515151515154, |
|
"loss": 1.1785, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 2.0420186519622803, |
|
"learning_rate": 0.00032575757575757576, |
|
"loss": 1.1853, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.0545454545454545, |
|
"grad_norm": 2.5020453929901123, |
|
"learning_rate": 0.00032500000000000004, |
|
"loss": 1.8213, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.059090909090909, |
|
"grad_norm": 1.5312837362289429, |
|
"learning_rate": 0.0003242424242424242, |
|
"loss": 1.016, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.0636363636363637, |
|
"grad_norm": 2.062110185623169, |
|
"learning_rate": 0.0003234848484848485, |
|
"loss": 1.2245, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.0681818181818181, |
|
"grad_norm": 1.7521977424621582, |
|
"learning_rate": 0.00032272727272727276, |
|
"loss": 1.4904, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.0727272727272728, |
|
"grad_norm": 1.431998372077942, |
|
"learning_rate": 0.000321969696969697, |
|
"loss": 1.2364, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.0772727272727272, |
|
"grad_norm": 1.8957371711730957, |
|
"learning_rate": 0.00032121212121212126, |
|
"loss": 1.9241, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.0818181818181818, |
|
"grad_norm": 1.6720540523529053, |
|
"learning_rate": 0.00032045454545454543, |
|
"loss": 1.0261, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.0863636363636364, |
|
"grad_norm": 1.8503271341323853, |
|
"learning_rate": 0.0003196969696969697, |
|
"loss": 1.6694, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.0909090909090908, |
|
"grad_norm": 2.055772066116333, |
|
"learning_rate": 0.000318939393939394, |
|
"loss": 1.6855, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.0954545454545455, |
|
"grad_norm": 4.034445285797119, |
|
"learning_rate": 0.0003181818181818182, |
|
"loss": 1.8592, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 2.6941239833831787, |
|
"learning_rate": 0.0003174242424242424, |
|
"loss": 1.1528, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.1045454545454545, |
|
"grad_norm": 1.8258893489837646, |
|
"learning_rate": 0.00031666666666666665, |
|
"loss": 1.1762, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.1090909090909091, |
|
"grad_norm": 2.721888303756714, |
|
"learning_rate": 0.0003159090909090909, |
|
"loss": 1.5523, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.1136363636363635, |
|
"grad_norm": 2.35798978805542, |
|
"learning_rate": 0.00031515151515151515, |
|
"loss": 1.7533, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.1181818181818182, |
|
"grad_norm": 2.089695453643799, |
|
"learning_rate": 0.0003143939393939394, |
|
"loss": 1.4344, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.1227272727272728, |
|
"grad_norm": 2.0275492668151855, |
|
"learning_rate": 0.00031363636363636365, |
|
"loss": 1.5359, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.1272727272727272, |
|
"grad_norm": 2.290893077850342, |
|
"learning_rate": 0.00031287878787878787, |
|
"loss": 1.8292, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.1318181818181818, |
|
"grad_norm": 2.3136603832244873, |
|
"learning_rate": 0.00031212121212121214, |
|
"loss": 1.6828, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.1363636363636362, |
|
"grad_norm": 2.1181459426879883, |
|
"learning_rate": 0.00031136363636363637, |
|
"loss": 1.1531, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.1409090909090909, |
|
"grad_norm": 1.9240480661392212, |
|
"learning_rate": 0.0003106060606060606, |
|
"loss": 1.2515, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.1454545454545455, |
|
"grad_norm": 2.522502899169922, |
|
"learning_rate": 0.00030984848484848487, |
|
"loss": 1.4942, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 1.5959993600845337, |
|
"learning_rate": 0.0003090909090909091, |
|
"loss": 1.1412, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.1545454545454545, |
|
"grad_norm": 1.740268588066101, |
|
"learning_rate": 0.00030833333333333337, |
|
"loss": 1.7009, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.1590909090909092, |
|
"grad_norm": 2.1791181564331055, |
|
"learning_rate": 0.0003075757575757576, |
|
"loss": 1.4727, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.1636363636363636, |
|
"grad_norm": 2.2325475215911865, |
|
"learning_rate": 0.0003068181818181818, |
|
"loss": 1.4379, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.1681818181818182, |
|
"grad_norm": 2.206281900405884, |
|
"learning_rate": 0.00030606060606060603, |
|
"loss": 1.5069, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.1727272727272728, |
|
"grad_norm": 2.6821632385253906, |
|
"learning_rate": 0.0003053030303030303, |
|
"loss": 1.7888, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.1772727272727272, |
|
"grad_norm": 2.567087173461914, |
|
"learning_rate": 0.0003045454545454546, |
|
"loss": 1.7501, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.1818181818181819, |
|
"grad_norm": 2.131887674331665, |
|
"learning_rate": 0.00030378787878787875, |
|
"loss": 1.3294, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.1863636363636363, |
|
"grad_norm": 1.5638073682785034, |
|
"learning_rate": 0.00030303030303030303, |
|
"loss": 1.3679, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.190909090909091, |
|
"grad_norm": 2.208799362182617, |
|
"learning_rate": 0.00030227272727272725, |
|
"loss": 1.2585, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.1954545454545455, |
|
"grad_norm": 2.379058599472046, |
|
"learning_rate": 0.00030151515151515153, |
|
"loss": 1.4692, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 2.2137858867645264, |
|
"learning_rate": 0.0003007575757575758, |
|
"loss": 1.5648, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.2045454545454546, |
|
"grad_norm": 1.94793701171875, |
|
"learning_rate": 0.0003, |
|
"loss": 1.2642, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.209090909090909, |
|
"grad_norm": 2.8321635723114014, |
|
"learning_rate": 0.00029924242424242425, |
|
"loss": 1.4013, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.2136363636363636, |
|
"grad_norm": 2.6073920726776123, |
|
"learning_rate": 0.00029848484848484847, |
|
"loss": 1.6666, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.2181818181818183, |
|
"grad_norm": 1.6753661632537842, |
|
"learning_rate": 0.00029772727272727275, |
|
"loss": 1.2472, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.2227272727272727, |
|
"grad_norm": 2.296635866165161, |
|
"learning_rate": 0.000296969696969697, |
|
"loss": 1.8099, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.2272727272727273, |
|
"grad_norm": 2.8359079360961914, |
|
"learning_rate": 0.0002962121212121212, |
|
"loss": 1.5522, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.231818181818182, |
|
"grad_norm": 2.3741962909698486, |
|
"learning_rate": 0.00029545454545454547, |
|
"loss": 1.5737, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.2363636363636363, |
|
"grad_norm": 1.9859591722488403, |
|
"learning_rate": 0.0002946969696969697, |
|
"loss": 1.5659, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.240909090909091, |
|
"grad_norm": 1.9343222379684448, |
|
"learning_rate": 0.00029393939393939397, |
|
"loss": 1.1204, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.2454545454545454, |
|
"grad_norm": 1.6376460790634155, |
|
"learning_rate": 0.00029318181818181814, |
|
"loss": 0.8886, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.9865474700927734, |
|
"learning_rate": 0.0002924242424242424, |
|
"loss": 1.5425, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.2545454545454544, |
|
"grad_norm": 2.1017825603485107, |
|
"learning_rate": 0.0002916666666666667, |
|
"loss": 1.1125, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.259090909090909, |
|
"grad_norm": 2.349350690841675, |
|
"learning_rate": 0.0002909090909090909, |
|
"loss": 1.6496, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.2636363636363637, |
|
"grad_norm": 1.8741109371185303, |
|
"learning_rate": 0.0002901515151515152, |
|
"loss": 1.3404, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.268181818181818, |
|
"grad_norm": 2.2605037689208984, |
|
"learning_rate": 0.00028939393939393936, |
|
"loss": 1.5495, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.2727272727272727, |
|
"grad_norm": 2.0851080417633057, |
|
"learning_rate": 0.00028863636363636363, |
|
"loss": 1.501, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.2772727272727273, |
|
"grad_norm": 2.2092325687408447, |
|
"learning_rate": 0.0002878787878787879, |
|
"loss": 1.5655, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.2818181818181817, |
|
"grad_norm": 2.343780755996704, |
|
"learning_rate": 0.00028712121212121213, |
|
"loss": 1.4229, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.2863636363636364, |
|
"grad_norm": 1.684411883354187, |
|
"learning_rate": 0.00028636363636363636, |
|
"loss": 1.4132, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.290909090909091, |
|
"grad_norm": 2.034984827041626, |
|
"learning_rate": 0.0002856060606060606, |
|
"loss": 1.1224, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.2954545454545454, |
|
"grad_norm": 1.9973840713500977, |
|
"learning_rate": 0.00028484848484848485, |
|
"loss": 1.4387, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 2.2674074172973633, |
|
"learning_rate": 0.00028409090909090913, |
|
"loss": 1.6697, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.3045454545454547, |
|
"grad_norm": 1.596279501914978, |
|
"learning_rate": 0.00028333333333333335, |
|
"loss": 1.0433, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.309090909090909, |
|
"grad_norm": 1.874055027961731, |
|
"learning_rate": 0.0002825757575757576, |
|
"loss": 1.291, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.3136363636363637, |
|
"grad_norm": 2.2551302909851074, |
|
"learning_rate": 0.0002818181818181818, |
|
"loss": 1.3771, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.3181818181818181, |
|
"grad_norm": 2.502380847930908, |
|
"learning_rate": 0.0002810606060606061, |
|
"loss": 1.853, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.3227272727272728, |
|
"grad_norm": 1.9750282764434814, |
|
"learning_rate": 0.0002803030303030303, |
|
"loss": 1.4369, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.3272727272727272, |
|
"grad_norm": 2.4181363582611084, |
|
"learning_rate": 0.0002795454545454546, |
|
"loss": 1.3565, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.3318181818181818, |
|
"grad_norm": 2.0823867321014404, |
|
"learning_rate": 0.0002787878787878788, |
|
"loss": 1.4589, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.3363636363636364, |
|
"grad_norm": 2.147993326187134, |
|
"learning_rate": 0.000278030303030303, |
|
"loss": 1.3616, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.3409090909090908, |
|
"grad_norm": 3.184967517852783, |
|
"learning_rate": 0.0002772727272727273, |
|
"loss": 1.8248, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.3454545454545455, |
|
"grad_norm": 2.3890221118927, |
|
"learning_rate": 0.0002765151515151515, |
|
"loss": 1.4599, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 1.9724668264389038, |
|
"learning_rate": 0.00027575757575757574, |
|
"loss": 1.2248, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.3545454545454545, |
|
"grad_norm": 2.1539180278778076, |
|
"learning_rate": 0.000275, |
|
"loss": 1.1712, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.3590909090909091, |
|
"grad_norm": 2.107490062713623, |
|
"learning_rate": 0.00027424242424242424, |
|
"loss": 1.3786, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.3636363636363638, |
|
"grad_norm": 2.052065372467041, |
|
"learning_rate": 0.0002734848484848485, |
|
"loss": 1.2121, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.3681818181818182, |
|
"grad_norm": 2.310871124267578, |
|
"learning_rate": 0.00027272727272727274, |
|
"loss": 1.4206, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.3727272727272728, |
|
"grad_norm": 2.1283962726593018, |
|
"learning_rate": 0.00027196969696969696, |
|
"loss": 1.8294, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.3772727272727272, |
|
"grad_norm": 1.676561951637268, |
|
"learning_rate": 0.00027121212121212124, |
|
"loss": 0.9432, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.3818181818181818, |
|
"grad_norm": 2.4148755073547363, |
|
"learning_rate": 0.00027045454545454546, |
|
"loss": 1.8412, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.3863636363636362, |
|
"grad_norm": 1.668143391609192, |
|
"learning_rate": 0.00026969696969696974, |
|
"loss": 0.9952, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.3909090909090909, |
|
"grad_norm": 2.411818504333496, |
|
"learning_rate": 0.0002689393939393939, |
|
"loss": 1.657, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.3954545454545455, |
|
"grad_norm": 2.2723898887634277, |
|
"learning_rate": 0.0002681818181818182, |
|
"loss": 1.6628, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 1.8225884437561035, |
|
"learning_rate": 0.0002674242424242424, |
|
"loss": 1.3039, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.4045454545454545, |
|
"grad_norm": 1.860181450843811, |
|
"learning_rate": 0.0002666666666666667, |
|
"loss": 1.4974, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.4090909090909092, |
|
"grad_norm": 2.22611927986145, |
|
"learning_rate": 0.00026590909090909096, |
|
"loss": 1.3242, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.4136363636363636, |
|
"grad_norm": 2.4301326274871826, |
|
"learning_rate": 0.0002651515151515151, |
|
"loss": 1.4631, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.4181818181818182, |
|
"grad_norm": 2.2716891765594482, |
|
"learning_rate": 0.0002643939393939394, |
|
"loss": 1.4076, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.4227272727272728, |
|
"grad_norm": 1.8279646635055542, |
|
"learning_rate": 0.0002636363636363636, |
|
"loss": 1.1232, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.4272727272727272, |
|
"grad_norm": 1.9681382179260254, |
|
"learning_rate": 0.0002628787878787879, |
|
"loss": 1.0339, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.4318181818181819, |
|
"grad_norm": 2.138864278793335, |
|
"learning_rate": 0.0002621212121212122, |
|
"loss": 1.4739, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.4363636363636363, |
|
"grad_norm": 1.997995376586914, |
|
"learning_rate": 0.00026136363636363634, |
|
"loss": 1.6025, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.440909090909091, |
|
"grad_norm": 2.493382215499878, |
|
"learning_rate": 0.0002606060606060606, |
|
"loss": 1.9215, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.4454545454545453, |
|
"grad_norm": 2.0182077884674072, |
|
"learning_rate": 0.00025984848484848484, |
|
"loss": 0.8709, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 2.0383524894714355, |
|
"learning_rate": 0.0002590909090909091, |
|
"loss": 1.1791, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.4545454545454546, |
|
"grad_norm": 2.159406900405884, |
|
"learning_rate": 0.00025833333333333334, |
|
"loss": 1.896, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.459090909090909, |
|
"grad_norm": 2.154700756072998, |
|
"learning_rate": 0.00025757575757575756, |
|
"loss": 1.4738, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.4636363636363636, |
|
"grad_norm": 2.071272134780884, |
|
"learning_rate": 0.00025681818181818184, |
|
"loss": 1.4454, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.4681818181818183, |
|
"grad_norm": 2.1091556549072266, |
|
"learning_rate": 0.00025606060606060606, |
|
"loss": 1.2386, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.4727272727272727, |
|
"grad_norm": 1.8080791234970093, |
|
"learning_rate": 0.00025530303030303034, |
|
"loss": 0.9288, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.4772727272727273, |
|
"grad_norm": 2.170426607131958, |
|
"learning_rate": 0.0002545454545454545, |
|
"loss": 1.6025, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.481818181818182, |
|
"grad_norm": 2.9302620887756348, |
|
"learning_rate": 0.0002537878787878788, |
|
"loss": 2.0105, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.4863636363636363, |
|
"grad_norm": 2.4640023708343506, |
|
"learning_rate": 0.00025303030303030306, |
|
"loss": 1.5101, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.490909090909091, |
|
"grad_norm": 1.6185390949249268, |
|
"learning_rate": 0.0002522727272727273, |
|
"loss": 0.9489, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.4954545454545456, |
|
"grad_norm": 1.4413659572601318, |
|
"learning_rate": 0.0002515151515151515, |
|
"loss": 0.8982, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.884208083152771, |
|
"learning_rate": 0.00025075757575757573, |
|
"loss": 1.2771, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.5045454545454544, |
|
"grad_norm": 1.9692877531051636, |
|
"learning_rate": 0.00025, |
|
"loss": 1.6345, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.509090909090909, |
|
"grad_norm": 2.3343496322631836, |
|
"learning_rate": 0.00024924242424242423, |
|
"loss": 1.6179, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.5136363636363637, |
|
"grad_norm": 2.2422614097595215, |
|
"learning_rate": 0.0002484848484848485, |
|
"loss": 1.3785, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.518181818181818, |
|
"grad_norm": 2.130425214767456, |
|
"learning_rate": 0.0002477272727272727, |
|
"loss": 1.6261, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.5227272727272727, |
|
"grad_norm": 2.13519287109375, |
|
"learning_rate": 0.000246969696969697, |
|
"loss": 1.4136, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.5272727272727273, |
|
"grad_norm": 1.268389344215393, |
|
"learning_rate": 0.0002462121212121212, |
|
"loss": 0.9329, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.5318181818181817, |
|
"grad_norm": 2.094594955444336, |
|
"learning_rate": 0.00024545454545454545, |
|
"loss": 1.3465, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.5363636363636364, |
|
"grad_norm": 2.156473159790039, |
|
"learning_rate": 0.00024469696969696967, |
|
"loss": 1.2741, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.540909090909091, |
|
"grad_norm": 1.898336410522461, |
|
"learning_rate": 0.00024393939393939392, |
|
"loss": 1.059, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.5454545454545454, |
|
"grad_norm": 1.8859503269195557, |
|
"learning_rate": 0.0002431818181818182, |
|
"loss": 1.4959, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 2.060011863708496, |
|
"learning_rate": 0.00024242424242424245, |
|
"loss": 1.3644, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.5545454545454547, |
|
"grad_norm": 2.3816416263580322, |
|
"learning_rate": 0.00024166666666666667, |
|
"loss": 1.4375, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.559090909090909, |
|
"grad_norm": 3.5078084468841553, |
|
"learning_rate": 0.00024090909090909092, |
|
"loss": 1.422, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.5636363636363635, |
|
"grad_norm": 2.221052885055542, |
|
"learning_rate": 0.00024015151515151514, |
|
"loss": 1.3024, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.5681818181818183, |
|
"grad_norm": 2.4711906909942627, |
|
"learning_rate": 0.0002393939393939394, |
|
"loss": 1.4838, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.5727272727272728, |
|
"grad_norm": 1.9803431034088135, |
|
"learning_rate": 0.00023863636363636364, |
|
"loss": 1.1055, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.5772727272727272, |
|
"grad_norm": 2.3196969032287598, |
|
"learning_rate": 0.0002378787878787879, |
|
"loss": 1.8282, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.5818181818181818, |
|
"grad_norm": 2.094829797744751, |
|
"learning_rate": 0.00023712121212121214, |
|
"loss": 1.2518, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.5863636363636364, |
|
"grad_norm": 2.0144240856170654, |
|
"learning_rate": 0.00023636363636363636, |
|
"loss": 1.4561, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.5909090909090908, |
|
"grad_norm": 1.8540327548980713, |
|
"learning_rate": 0.0002356060606060606, |
|
"loss": 1.3567, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.5954545454545455, |
|
"grad_norm": 2.6601638793945312, |
|
"learning_rate": 0.00023484848484848486, |
|
"loss": 1.7279, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 2.5020227432250977, |
|
"learning_rate": 0.0002340909090909091, |
|
"loss": 1.7535, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.6045454545454545, |
|
"grad_norm": 2.0830986499786377, |
|
"learning_rate": 0.00023333333333333333, |
|
"loss": 1.2985, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.6090909090909091, |
|
"grad_norm": 1.8451659679412842, |
|
"learning_rate": 0.00023257575757575758, |
|
"loss": 1.0175, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.6136363636363638, |
|
"grad_norm": 2.271484375, |
|
"learning_rate": 0.00023181818181818183, |
|
"loss": 1.4269, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.6181818181818182, |
|
"grad_norm": 4.305004596710205, |
|
"learning_rate": 0.00023106060606060605, |
|
"loss": 1.3775, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.6227272727272726, |
|
"grad_norm": 1.8406462669372559, |
|
"learning_rate": 0.00023030303030303033, |
|
"loss": 1.2472, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.6272727272727274, |
|
"grad_norm": 2.6075923442840576, |
|
"learning_rate": 0.00022954545454545455, |
|
"loss": 1.3993, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.6318181818181818, |
|
"grad_norm": 2.324907064437866, |
|
"learning_rate": 0.0002287878787878788, |
|
"loss": 1.3076, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.6363636363636362, |
|
"grad_norm": 1.954463005065918, |
|
"learning_rate": 0.00022803030303030302, |
|
"loss": 1.4135, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.6409090909090909, |
|
"grad_norm": 1.8350000381469727, |
|
"learning_rate": 0.00022727272727272727, |
|
"loss": 1.2618, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.6454545454545455, |
|
"grad_norm": 2.2729427814483643, |
|
"learning_rate": 0.0002265151515151515, |
|
"loss": 1.2483, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 1.7203510999679565, |
|
"learning_rate": 0.00022575757575757577, |
|
"loss": 0.9189, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.6545454545454545, |
|
"grad_norm": 2.2685649394989014, |
|
"learning_rate": 0.00022500000000000002, |
|
"loss": 1.4897, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.6590909090909092, |
|
"grad_norm": 2.502887010574341, |
|
"learning_rate": 0.00022424242424242424, |
|
"loss": 1.315, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.6636363636363636, |
|
"grad_norm": 1.8945139646530151, |
|
"learning_rate": 0.0002234848484848485, |
|
"loss": 1.1686, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.6681818181818182, |
|
"grad_norm": 2.207409620285034, |
|
"learning_rate": 0.00022272727272727272, |
|
"loss": 1.9896, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.6727272727272728, |
|
"grad_norm": 2.028022527694702, |
|
"learning_rate": 0.000221969696969697, |
|
"loss": 1.5135, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.6772727272727272, |
|
"grad_norm": 2.403134822845459, |
|
"learning_rate": 0.00022121212121212121, |
|
"loss": 1.4201, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.6818181818181817, |
|
"grad_norm": 2.022304058074951, |
|
"learning_rate": 0.00022045454545454546, |
|
"loss": 1.672, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.6863636363636365, |
|
"grad_norm": 2.3457093238830566, |
|
"learning_rate": 0.0002196969696969697, |
|
"loss": 1.4657, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.690909090909091, |
|
"grad_norm": 2.0904908180236816, |
|
"learning_rate": 0.00021893939393939394, |
|
"loss": 1.5864, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.6954545454545453, |
|
"grad_norm": 1.5914294719696045, |
|
"learning_rate": 0.00021818181818181818, |
|
"loss": 1.2828, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 1.8519418239593506, |
|
"learning_rate": 0.00021742424242424243, |
|
"loss": 1.5195, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.7045454545454546, |
|
"grad_norm": 2.2076525688171387, |
|
"learning_rate": 0.00021666666666666668, |
|
"loss": 1.247, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.709090909090909, |
|
"grad_norm": 1.8584811687469482, |
|
"learning_rate": 0.0002159090909090909, |
|
"loss": 1.6614, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.7136363636363636, |
|
"grad_norm": 1.9943331480026245, |
|
"learning_rate": 0.00021515151515151516, |
|
"loss": 1.2409, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.7181818181818183, |
|
"grad_norm": 1.5293704271316528, |
|
"learning_rate": 0.00021439393939393938, |
|
"loss": 1.1563, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.7227272727272727, |
|
"grad_norm": 2.835527181625366, |
|
"learning_rate": 0.00021363636363636363, |
|
"loss": 1.3795, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.7272727272727273, |
|
"grad_norm": 2.491914987564087, |
|
"learning_rate": 0.0002128787878787879, |
|
"loss": 1.3017, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.731818181818182, |
|
"grad_norm": 3.2870216369628906, |
|
"learning_rate": 0.00021212121212121213, |
|
"loss": 1.1947, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.7363636363636363, |
|
"grad_norm": 2.1510424613952637, |
|
"learning_rate": 0.00021136363636363638, |
|
"loss": 1.7048, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.740909090909091, |
|
"grad_norm": 2.2372002601623535, |
|
"learning_rate": 0.0002106060606060606, |
|
"loss": 1.1627, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.7454545454545456, |
|
"grad_norm": 2.217090368270874, |
|
"learning_rate": 0.00020984848484848485, |
|
"loss": 1.8424, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 1.8897899389266968, |
|
"learning_rate": 0.00020909090909090907, |
|
"loss": 1.3006, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.7545454545454544, |
|
"grad_norm": 1.7469165325164795, |
|
"learning_rate": 0.00020833333333333335, |
|
"loss": 1.496, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.759090909090909, |
|
"grad_norm": 1.8982353210449219, |
|
"learning_rate": 0.0002075757575757576, |
|
"loss": 1.2682, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.7636363636363637, |
|
"grad_norm": 2.0795273780822754, |
|
"learning_rate": 0.00020681818181818182, |
|
"loss": 2.2314, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.768181818181818, |
|
"grad_norm": 1.6244702339172363, |
|
"learning_rate": 0.00020606060606060607, |
|
"loss": 0.9585, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.7727272727272727, |
|
"grad_norm": 2.3025052547454834, |
|
"learning_rate": 0.0002053030303030303, |
|
"loss": 1.6639, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.7772727272727273, |
|
"grad_norm": 2.418119192123413, |
|
"learning_rate": 0.00020454545454545457, |
|
"loss": 1.8274, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.7818181818181817, |
|
"grad_norm": 1.70542573928833, |
|
"learning_rate": 0.0002037878787878788, |
|
"loss": 1.3115, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.7863636363636364, |
|
"grad_norm": 1.9681594371795654, |
|
"learning_rate": 0.00020303030303030304, |
|
"loss": 0.973, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.790909090909091, |
|
"grad_norm": 1.856879711151123, |
|
"learning_rate": 0.0002022727272727273, |
|
"loss": 1.2958, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.7954545454545454, |
|
"grad_norm": 2.1984550952911377, |
|
"learning_rate": 0.0002015151515151515, |
|
"loss": 1.5643, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 2.6989824771881104, |
|
"learning_rate": 0.00020075757575757576, |
|
"loss": 1.4334, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.8045454545454547, |
|
"grad_norm": 2.3298850059509277, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5145, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.809090909090909, |
|
"grad_norm": 1.9490818977355957, |
|
"learning_rate": 0.00019924242424242426, |
|
"loss": 1.5346, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.8136363636363635, |
|
"grad_norm": 2.0060818195343018, |
|
"learning_rate": 0.00019848484848484848, |
|
"loss": 1.2493, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 2.1615042686462402, |
|
"learning_rate": 0.00019772727272727273, |
|
"loss": 1.8834, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.8227272727272728, |
|
"grad_norm": 2.4424972534179688, |
|
"learning_rate": 0.00019696969696969695, |
|
"loss": 1.6012, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.8272727272727272, |
|
"grad_norm": 1.83026921749115, |
|
"learning_rate": 0.0001962121212121212, |
|
"loss": 1.4308, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.8318181818181818, |
|
"grad_norm": 2.6793453693389893, |
|
"learning_rate": 0.00019545454545454548, |
|
"loss": 1.6356, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.8363636363636364, |
|
"grad_norm": 2.1211740970611572, |
|
"learning_rate": 0.0001946969696969697, |
|
"loss": 1.0859, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.8409090909090908, |
|
"grad_norm": 1.6658772230148315, |
|
"learning_rate": 0.00019393939393939395, |
|
"loss": 1.3467, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.8454545454545455, |
|
"grad_norm": 1.7903032302856445, |
|
"learning_rate": 0.00019318181818181817, |
|
"loss": 1.0118, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 1.7358275651931763, |
|
"learning_rate": 0.00019242424242424242, |
|
"loss": 1.4404, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.8545454545454545, |
|
"grad_norm": 2.0745861530303955, |
|
"learning_rate": 0.00019166666666666667, |
|
"loss": 1.3409, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.8590909090909091, |
|
"grad_norm": 2.1365160942077637, |
|
"learning_rate": 0.00019090909090909092, |
|
"loss": 1.4658, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.8636363636363638, |
|
"grad_norm": 2.040371894836426, |
|
"learning_rate": 0.00019015151515151517, |
|
"loss": 1.3165, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.8681818181818182, |
|
"grad_norm": 1.9335429668426514, |
|
"learning_rate": 0.0001893939393939394, |
|
"loss": 1.5063, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.8727272727272726, |
|
"grad_norm": 2.0025243759155273, |
|
"learning_rate": 0.00018863636363636364, |
|
"loss": 1.2584, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.8772727272727274, |
|
"grad_norm": 1.7558890581130981, |
|
"learning_rate": 0.00018787878787878787, |
|
"loss": 1.0937, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.8818181818181818, |
|
"grad_norm": 1.886003851890564, |
|
"learning_rate": 0.00018712121212121214, |
|
"loss": 1.3969, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.8863636363636362, |
|
"grad_norm": 2.6999497413635254, |
|
"learning_rate": 0.00018636363636363636, |
|
"loss": 1.1411, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.8909090909090909, |
|
"grad_norm": 1.923196792602539, |
|
"learning_rate": 0.00018560606060606061, |
|
"loss": 1.6597, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.8954545454545455, |
|
"grad_norm": 2.261504650115967, |
|
"learning_rate": 0.00018484848484848484, |
|
"loss": 1.738, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 1.9908875226974487, |
|
"learning_rate": 0.00018409090909090909, |
|
"loss": 1.1275, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.9045454545454545, |
|
"grad_norm": 2.348240852355957, |
|
"learning_rate": 0.00018333333333333334, |
|
"loss": 1.5368, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.9090909090909092, |
|
"grad_norm": 3.1023001670837402, |
|
"learning_rate": 0.00018257575757575758, |
|
"loss": 1.1828, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.9136363636363636, |
|
"grad_norm": 2.2243757247924805, |
|
"learning_rate": 0.00018181818181818183, |
|
"loss": 1.4783, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.9181818181818182, |
|
"grad_norm": 1.824317216873169, |
|
"learning_rate": 0.00018106060606060606, |
|
"loss": 1.178, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.9227272727272728, |
|
"grad_norm": 2.1963822841644287, |
|
"learning_rate": 0.0001803030303030303, |
|
"loss": 1.5811, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.9272727272727272, |
|
"grad_norm": 1.8573740720748901, |
|
"learning_rate": 0.00017954545454545453, |
|
"loss": 1.2361, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.9318181818181817, |
|
"grad_norm": 2.223315715789795, |
|
"learning_rate": 0.0001787878787878788, |
|
"loss": 1.3236, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.9363636363636365, |
|
"grad_norm": 2.0890495777130127, |
|
"learning_rate": 0.00017803030303030305, |
|
"loss": 1.4358, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.940909090909091, |
|
"grad_norm": 1.8097957372665405, |
|
"learning_rate": 0.00017727272727272728, |
|
"loss": 1.0634, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.9454545454545453, |
|
"grad_norm": 2.378750801086426, |
|
"learning_rate": 0.00017651515151515153, |
|
"loss": 1.8565, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 1.7777960300445557, |
|
"learning_rate": 0.00017575757575757575, |
|
"loss": 1.5755, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.9545454545454546, |
|
"grad_norm": 2.1310245990753174, |
|
"learning_rate": 0.000175, |
|
"loss": 1.4119, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.959090909090909, |
|
"grad_norm": 2.506479024887085, |
|
"learning_rate": 0.00017424242424242425, |
|
"loss": 1.5025, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.9636363636363636, |
|
"grad_norm": 2.091891288757324, |
|
"learning_rate": 0.0001734848484848485, |
|
"loss": 1.383, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.9681818181818183, |
|
"grad_norm": 1.7450625896453857, |
|
"learning_rate": 0.00017272727272727272, |
|
"loss": 1.4122, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.9727272727272727, |
|
"grad_norm": 1.7834117412567139, |
|
"learning_rate": 0.00017196969696969697, |
|
"loss": 1.0932, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.9772727272727273, |
|
"grad_norm": 1.6854950189590454, |
|
"learning_rate": 0.00017121212121212122, |
|
"loss": 0.9985, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.981818181818182, |
|
"grad_norm": 1.4569097757339478, |
|
"learning_rate": 0.00017045454545454544, |
|
"loss": 1.1354, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.9863636363636363, |
|
"grad_norm": 2.3083584308624268, |
|
"learning_rate": 0.00016969696969696972, |
|
"loss": 1.4856, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.990909090909091, |
|
"grad_norm": 2.1282572746276855, |
|
"learning_rate": 0.00016893939393939394, |
|
"loss": 1.942, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.9954545454545456, |
|
"grad_norm": 2.199822187423706, |
|
"learning_rate": 0.0001681818181818182, |
|
"loss": 1.4891, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.0030460357666016, |
|
"learning_rate": 0.0001674242424242424, |
|
"loss": 1.2939, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.8928, |
|
"eval_gen_len": 42.0, |
|
"eval_loss": 1.7928985357284546, |
|
"eval_precision": 0.8914, |
|
"eval_recall": 0.8944, |
|
"eval_rouge1": 0.4605, |
|
"eval_rouge2": 0.2125, |
|
"eval_rougeL": 0.3897, |
|
"eval_rougeLsum": 0.4259, |
|
"eval_runtime": 25.1108, |
|
"eval_samples_per_second": 4.381, |
|
"eval_steps_per_second": 0.558, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.0045454545454544, |
|
"grad_norm": 1.4425781965255737, |
|
"learning_rate": 0.00016666666666666666, |
|
"loss": 0.9057, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 2.0090909090909093, |
|
"grad_norm": 1.579765796661377, |
|
"learning_rate": 0.00016590909090909094, |
|
"loss": 0.7069, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 2.0136363636363637, |
|
"grad_norm": 1.8639825582504272, |
|
"learning_rate": 0.00016515151515151516, |
|
"loss": 1.1531, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 2.018181818181818, |
|
"grad_norm": 1.4890676736831665, |
|
"learning_rate": 0.0001643939393939394, |
|
"loss": 0.8112, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 2.022727272727273, |
|
"grad_norm": 1.7381901741027832, |
|
"learning_rate": 0.00016363636363636363, |
|
"loss": 1.2108, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.0272727272727273, |
|
"grad_norm": 1.6125924587249756, |
|
"learning_rate": 0.00016287878787878788, |
|
"loss": 1.0529, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 2.0318181818181817, |
|
"grad_norm": 1.8624428510665894, |
|
"learning_rate": 0.0001621212121212121, |
|
"loss": 1.006, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 2.036363636363636, |
|
"grad_norm": 1.719439148902893, |
|
"learning_rate": 0.00016136363636363638, |
|
"loss": 1.0881, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 2.040909090909091, |
|
"grad_norm": 2.446216106414795, |
|
"learning_rate": 0.00016060606060606063, |
|
"loss": 1.2399, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 2.0454545454545454, |
|
"grad_norm": 1.703517198562622, |
|
"learning_rate": 0.00015984848484848485, |
|
"loss": 0.931, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 1.780228853225708, |
|
"learning_rate": 0.0001590909090909091, |
|
"loss": 0.9769, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 2.0545454545454547, |
|
"grad_norm": 2.015679121017456, |
|
"learning_rate": 0.00015833333333333332, |
|
"loss": 1.6044, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 2.059090909090909, |
|
"grad_norm": 2.084481716156006, |
|
"learning_rate": 0.00015757575757575757, |
|
"loss": 0.9933, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 2.0636363636363635, |
|
"grad_norm": 2.3098299503326416, |
|
"learning_rate": 0.00015681818181818182, |
|
"loss": 0.9405, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 2.0681818181818183, |
|
"grad_norm": 1.8041385412216187, |
|
"learning_rate": 0.00015606060606060607, |
|
"loss": 1.1748, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.0727272727272728, |
|
"grad_norm": 1.693158745765686, |
|
"learning_rate": 0.0001553030303030303, |
|
"loss": 0.9358, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 2.077272727272727, |
|
"grad_norm": 1.5484883785247803, |
|
"learning_rate": 0.00015454545454545454, |
|
"loss": 1.0664, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 2.081818181818182, |
|
"grad_norm": 1.4313092231750488, |
|
"learning_rate": 0.0001537878787878788, |
|
"loss": 0.6624, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 2.0863636363636364, |
|
"grad_norm": 2.218092679977417, |
|
"learning_rate": 0.00015303030303030302, |
|
"loss": 0.9856, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 2.090909090909091, |
|
"grad_norm": 2.030869960784912, |
|
"learning_rate": 0.0001522727272727273, |
|
"loss": 1.143, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.0954545454545457, |
|
"grad_norm": 2.190603017807007, |
|
"learning_rate": 0.00015151515151515152, |
|
"loss": 1.077, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 1.3030821084976196, |
|
"learning_rate": 0.00015075757575757576, |
|
"loss": 0.6711, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 2.1045454545454545, |
|
"grad_norm": 1.8678494691848755, |
|
"learning_rate": 0.00015, |
|
"loss": 1.0674, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 2.109090909090909, |
|
"grad_norm": 1.407085657119751, |
|
"learning_rate": 0.00014924242424242424, |
|
"loss": 0.7024, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 2.1136363636363638, |
|
"grad_norm": 2.004911422729492, |
|
"learning_rate": 0.0001484848484848485, |
|
"loss": 0.795, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 2.118181818181818, |
|
"grad_norm": 2.25128173828125, |
|
"learning_rate": 0.00014772727272727274, |
|
"loss": 1.2232, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 2.1227272727272726, |
|
"grad_norm": 1.960771918296814, |
|
"learning_rate": 0.00014696969696969698, |
|
"loss": 1.0019, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 2.1272727272727274, |
|
"grad_norm": 1.9563887119293213, |
|
"learning_rate": 0.0001462121212121212, |
|
"loss": 0.9798, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 2.131818181818182, |
|
"grad_norm": 1.687361240386963, |
|
"learning_rate": 0.00014545454545454546, |
|
"loss": 0.755, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 2.1363636363636362, |
|
"grad_norm": 2.191286325454712, |
|
"learning_rate": 0.00014469696969696968, |
|
"loss": 1.0018, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.140909090909091, |
|
"grad_norm": 2.046880006790161, |
|
"learning_rate": 0.00014393939393939396, |
|
"loss": 1.2281, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 2.1454545454545455, |
|
"grad_norm": 2.4996211528778076, |
|
"learning_rate": 0.00014318181818181818, |
|
"loss": 1.0795, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 1.8937994241714478, |
|
"learning_rate": 0.00014242424242424243, |
|
"loss": 1.0556, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 2.1545454545454543, |
|
"grad_norm": 2.250491142272949, |
|
"learning_rate": 0.00014166666666666668, |
|
"loss": 0.8816, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 2.159090909090909, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00014166666666666668, |
|
"loss": 0.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.1636363636363636, |
|
"grad_norm": 2.231706142425537, |
|
"learning_rate": 0.0001409090909090909, |
|
"loss": 1.2344, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 2.168181818181818, |
|
"grad_norm": 2.2170498371124268, |
|
"learning_rate": 0.00014015151515151515, |
|
"loss": 1.4409, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 2.172727272727273, |
|
"grad_norm": 2.3106095790863037, |
|
"learning_rate": 0.0001393939393939394, |
|
"loss": 0.9081, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 2.1772727272727272, |
|
"grad_norm": 1.9665738344192505, |
|
"learning_rate": 0.00013863636363636365, |
|
"loss": 1.3029, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 2.1818181818181817, |
|
"grad_norm": 2.321331739425659, |
|
"learning_rate": 0.00013787878787878787, |
|
"loss": 1.4714, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.1863636363636365, |
|
"grad_norm": 2.0038533210754395, |
|
"learning_rate": 0.00013712121212121212, |
|
"loss": 1.0879, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 2.190909090909091, |
|
"grad_norm": 1.6077767610549927, |
|
"learning_rate": 0.00013636363636363637, |
|
"loss": 0.6456, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 2.1954545454545453, |
|
"grad_norm": 1.5018125772476196, |
|
"learning_rate": 0.00013560606060606062, |
|
"loss": 0.6937, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 1.5473967790603638, |
|
"learning_rate": 0.00013484848484848487, |
|
"loss": 0.6191, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 2.2045454545454546, |
|
"grad_norm": 2.5554354190826416, |
|
"learning_rate": 0.0001340909090909091, |
|
"loss": 1.4345, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 2.209090909090909, |
|
"grad_norm": 2.1666858196258545, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 1.4182, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 2.213636363636364, |
|
"grad_norm": 2.2915759086608887, |
|
"learning_rate": 0.00013257575757575756, |
|
"loss": 0.8752, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 2.2181818181818183, |
|
"grad_norm": 2.24314546585083, |
|
"learning_rate": 0.0001318181818181818, |
|
"loss": 1.3214, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 2.2227272727272727, |
|
"grad_norm": 2.269216537475586, |
|
"learning_rate": 0.0001310606060606061, |
|
"loss": 0.9968, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 2.227272727272727, |
|
"grad_norm": 2.3108322620391846, |
|
"learning_rate": 0.0001303030303030303, |
|
"loss": 0.9695, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.231818181818182, |
|
"grad_norm": 2.3146250247955322, |
|
"learning_rate": 0.00012954545454545456, |
|
"loss": 1.4007, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 2.2363636363636363, |
|
"grad_norm": 1.9747002124786377, |
|
"learning_rate": 0.00012878787878787878, |
|
"loss": 0.8876, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 2.2409090909090907, |
|
"grad_norm": 2.0410826206207275, |
|
"learning_rate": 0.00012803030303030303, |
|
"loss": 0.9588, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 2.2454545454545456, |
|
"grad_norm": 2.2743778228759766, |
|
"learning_rate": 0.00012727272727272725, |
|
"loss": 1.2062, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 2.272749662399292, |
|
"learning_rate": 0.00012651515151515153, |
|
"loss": 0.975, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.2545454545454544, |
|
"grad_norm": 2.297175884246826, |
|
"learning_rate": 0.00012575757575757575, |
|
"loss": 1.0806, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 2.2590909090909093, |
|
"grad_norm": 2.2274718284606934, |
|
"learning_rate": 0.000125, |
|
"loss": 0.9391, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 2.2636363636363637, |
|
"grad_norm": 2.4175453186035156, |
|
"learning_rate": 0.00012424242424242425, |
|
"loss": 1.2736, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 2.268181818181818, |
|
"grad_norm": 1.7530089616775513, |
|
"learning_rate": 0.0001234848484848485, |
|
"loss": 1.1917, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 2.2727272727272725, |
|
"grad_norm": 2.598747730255127, |
|
"learning_rate": 0.00012272727272727272, |
|
"loss": 1.5901, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.2772727272727273, |
|
"grad_norm": 2.0590171813964844, |
|
"learning_rate": 0.00012196969696969696, |
|
"loss": 1.0049, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 2.2818181818181817, |
|
"grad_norm": 1.6530457735061646, |
|
"learning_rate": 0.00012121212121212122, |
|
"loss": 0.6991, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 2.286363636363636, |
|
"grad_norm": 1.4000625610351562, |
|
"learning_rate": 0.00012045454545454546, |
|
"loss": 0.7258, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 2.290909090909091, |
|
"grad_norm": 3.4282798767089844, |
|
"learning_rate": 0.0001196969696969697, |
|
"loss": 0.7331, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 2.2954545454545454, |
|
"grad_norm": 2.0328640937805176, |
|
"learning_rate": 0.00011893939393939394, |
|
"loss": 1.0245, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 2.639125347137451, |
|
"learning_rate": 0.00011818181818181818, |
|
"loss": 0.939, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 2.3045454545454547, |
|
"grad_norm": 2.069645643234253, |
|
"learning_rate": 0.00011742424242424243, |
|
"loss": 1.186, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 2.309090909090909, |
|
"grad_norm": 2.103675603866577, |
|
"learning_rate": 0.00011666666666666667, |
|
"loss": 1.0986, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 2.3136363636363635, |
|
"grad_norm": 2.022813320159912, |
|
"learning_rate": 0.00011590909090909091, |
|
"loss": 1.1106, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 2.3181818181818183, |
|
"grad_norm": 2.1240341663360596, |
|
"learning_rate": 0.00011515151515151516, |
|
"loss": 0.9754, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.3227272727272728, |
|
"grad_norm": 2.11362361907959, |
|
"learning_rate": 0.0001143939393939394, |
|
"loss": 1.243, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 2.327272727272727, |
|
"grad_norm": 1.9033676385879517, |
|
"learning_rate": 0.00011363636363636364, |
|
"loss": 0.7314, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 2.331818181818182, |
|
"grad_norm": 2.7902817726135254, |
|
"learning_rate": 0.00011287878787878789, |
|
"loss": 1.2161, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 2.3363636363636364, |
|
"grad_norm": 2.1139214038848877, |
|
"learning_rate": 0.00011212121212121212, |
|
"loss": 1.4216, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 2.340909090909091, |
|
"grad_norm": 2.2380800247192383, |
|
"learning_rate": 0.00011136363636363636, |
|
"loss": 1.0319, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 2.3454545454545457, |
|
"grad_norm": 1.9591755867004395, |
|
"learning_rate": 0.00011060606060606061, |
|
"loss": 0.7923, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 2.6767358779907227, |
|
"learning_rate": 0.00010984848484848486, |
|
"loss": 0.9721, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 2.3545454545454545, |
|
"grad_norm": 2.350008487701416, |
|
"learning_rate": 0.00010909090909090909, |
|
"loss": 1.1793, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 2.359090909090909, |
|
"grad_norm": 2.0240652561187744, |
|
"learning_rate": 0.00010833333333333334, |
|
"loss": 1.1184, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 2.3636363636363638, |
|
"grad_norm": 2.058748960494995, |
|
"learning_rate": 0.00010757575757575758, |
|
"loss": 1.1886, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.368181818181818, |
|
"grad_norm": 1.7921950817108154, |
|
"learning_rate": 0.00010681818181818181, |
|
"loss": 0.8511, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 2.3727272727272726, |
|
"grad_norm": 2.027445077896118, |
|
"learning_rate": 0.00010606060606060606, |
|
"loss": 0.8641, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 2.3772727272727274, |
|
"grad_norm": 1.8156445026397705, |
|
"learning_rate": 0.0001053030303030303, |
|
"loss": 0.8234, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 2.381818181818182, |
|
"grad_norm": 2.3511455059051514, |
|
"learning_rate": 0.00010454545454545454, |
|
"loss": 1.048, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 2.3863636363636362, |
|
"grad_norm": 1.489744782447815, |
|
"learning_rate": 0.0001037878787878788, |
|
"loss": 0.4886, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.390909090909091, |
|
"grad_norm": 2.0359721183776855, |
|
"learning_rate": 0.00010303030303030303, |
|
"loss": 1.0011, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 2.3954545454545455, |
|
"grad_norm": 2.8290212154388428, |
|
"learning_rate": 0.00010227272727272728, |
|
"loss": 1.4443, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 1.991904616355896, |
|
"learning_rate": 0.00010151515151515152, |
|
"loss": 0.9877, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 2.4045454545454543, |
|
"grad_norm": 1.8174313306808472, |
|
"learning_rate": 0.00010075757575757576, |
|
"loss": 0.9048, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 2.409090909090909, |
|
"grad_norm": 1.66022527217865, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9039, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.4136363636363636, |
|
"grad_norm": 1.6025142669677734, |
|
"learning_rate": 9.924242424242424e-05, |
|
"loss": 0.8169, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 2.418181818181818, |
|
"grad_norm": 1.871733546257019, |
|
"learning_rate": 9.848484848484848e-05, |
|
"loss": 1.039, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 2.422727272727273, |
|
"grad_norm": 2.35320782661438, |
|
"learning_rate": 9.772727272727274e-05, |
|
"loss": 1.3449, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 2.4272727272727272, |
|
"grad_norm": 1.9311728477478027, |
|
"learning_rate": 9.696969696969698e-05, |
|
"loss": 1.0332, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 2.4318181818181817, |
|
"grad_norm": 1.6838319301605225, |
|
"learning_rate": 9.621212121212121e-05, |
|
"loss": 0.6631, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 2.4363636363636365, |
|
"grad_norm": 1.9957849979400635, |
|
"learning_rate": 9.545454545454546e-05, |
|
"loss": 1.0397, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 2.440909090909091, |
|
"grad_norm": 2.338730573654175, |
|
"learning_rate": 9.46969696969697e-05, |
|
"loss": 1.1155, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 2.4454545454545453, |
|
"grad_norm": 2.0578792095184326, |
|
"learning_rate": 9.393939393939393e-05, |
|
"loss": 1.0634, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 2.0512609481811523, |
|
"learning_rate": 9.318181818181818e-05, |
|
"loss": 0.9052, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 2.4545454545454546, |
|
"grad_norm": 2.2808845043182373, |
|
"learning_rate": 9.242424242424242e-05, |
|
"loss": 1.2479, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.459090909090909, |
|
"grad_norm": 1.7963327169418335, |
|
"learning_rate": 9.166666666666667e-05, |
|
"loss": 0.8655, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 2.463636363636364, |
|
"grad_norm": 2.378777265548706, |
|
"learning_rate": 9.090909090909092e-05, |
|
"loss": 1.1019, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 2.4681818181818183, |
|
"grad_norm": 1.7346596717834473, |
|
"learning_rate": 9.015151515151515e-05, |
|
"loss": 0.6478, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 2.4727272727272727, |
|
"grad_norm": 1.8121107816696167, |
|
"learning_rate": 8.93939393939394e-05, |
|
"loss": 0.9549, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 2.4772727272727275, |
|
"grad_norm": 1.9102083444595337, |
|
"learning_rate": 8.863636363636364e-05, |
|
"loss": 0.9103, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 2.481818181818182, |
|
"grad_norm": 2.3148677349090576, |
|
"learning_rate": 8.787878787878787e-05, |
|
"loss": 1.1075, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 2.4863636363636363, |
|
"grad_norm": 2.3098530769348145, |
|
"learning_rate": 8.712121212121212e-05, |
|
"loss": 1.0885, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 2.4909090909090907, |
|
"grad_norm": 2.061582565307617, |
|
"learning_rate": 8.636363636363636e-05, |
|
"loss": 0.7894, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 2.4954545454545456, |
|
"grad_norm": 2.3829803466796875, |
|
"learning_rate": 8.560606060606061e-05, |
|
"loss": 1.2397, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 2.111055612564087, |
|
"learning_rate": 8.484848484848486e-05, |
|
"loss": 1.0463, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.5045454545454544, |
|
"grad_norm": 1.883468508720398, |
|
"learning_rate": 8.40909090909091e-05, |
|
"loss": 0.9837, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 2.509090909090909, |
|
"grad_norm": 1.8480087518692017, |
|
"learning_rate": 8.333333333333333e-05, |
|
"loss": 0.897, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 2.5136363636363637, |
|
"grad_norm": 1.9513871669769287, |
|
"learning_rate": 8.257575757575758e-05, |
|
"loss": 0.9668, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 2.518181818181818, |
|
"grad_norm": 1.5687415599822998, |
|
"learning_rate": 8.181818181818182e-05, |
|
"loss": 0.8729, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 2.5227272727272725, |
|
"grad_norm": 1.959887981414795, |
|
"learning_rate": 8.106060606060605e-05, |
|
"loss": 0.9612, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 2.5272727272727273, |
|
"grad_norm": 2.1609091758728027, |
|
"learning_rate": 8.030303030303031e-05, |
|
"loss": 1.133, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 2.5318181818181817, |
|
"grad_norm": 2.534611225128174, |
|
"learning_rate": 7.954545454545455e-05, |
|
"loss": 1.3566, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 2.536363636363636, |
|
"grad_norm": 2.731877088546753, |
|
"learning_rate": 7.878787878787879e-05, |
|
"loss": 1.1991, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 2.540909090909091, |
|
"grad_norm": 2.1953938007354736, |
|
"learning_rate": 7.803030303030304e-05, |
|
"loss": 0.932, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 2.5454545454545454, |
|
"grad_norm": 2.260007381439209, |
|
"learning_rate": 7.727272727272727e-05, |
|
"loss": 1.0682, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 2.9932045936584473, |
|
"learning_rate": 7.651515151515151e-05, |
|
"loss": 1.2489, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 2.5545454545454547, |
|
"grad_norm": 2.4135005474090576, |
|
"learning_rate": 7.575757575757576e-05, |
|
"loss": 0.7289, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 2.559090909090909, |
|
"grad_norm": 2.2235300540924072, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.7027, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 2.5636363636363635, |
|
"grad_norm": 2.6621127128601074, |
|
"learning_rate": 7.424242424242426e-05, |
|
"loss": 1.2601, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 2.5681818181818183, |
|
"grad_norm": 2.574686050415039, |
|
"learning_rate": 7.348484848484849e-05, |
|
"loss": 1.1076, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 2.5727272727272728, |
|
"grad_norm": 2.416339874267578, |
|
"learning_rate": 7.272727272727273e-05, |
|
"loss": 0.9473, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 2.577272727272727, |
|
"grad_norm": 1.7082793712615967, |
|
"learning_rate": 7.196969696969698e-05, |
|
"loss": 0.7671, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 2.581818181818182, |
|
"grad_norm": 2.220196008682251, |
|
"learning_rate": 7.121212121212121e-05, |
|
"loss": 1.1754, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 2.5863636363636364, |
|
"grad_norm": 2.26267409324646, |
|
"learning_rate": 7.045454545454545e-05, |
|
"loss": 1.4229, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 2.590909090909091, |
|
"grad_norm": 1.7881556749343872, |
|
"learning_rate": 6.96969696969697e-05, |
|
"loss": 0.8333, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.5954545454545457, |
|
"grad_norm": 2.156179904937744, |
|
"learning_rate": 6.893939393939393e-05, |
|
"loss": 1.1788, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 1.6093627214431763, |
|
"learning_rate": 6.818181818181818e-05, |
|
"loss": 0.6442, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 2.6045454545454545, |
|
"grad_norm": 1.984737753868103, |
|
"learning_rate": 6.742424242424243e-05, |
|
"loss": 0.9969, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 2.6090909090909093, |
|
"grad_norm": 1.958917498588562, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.8534, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 2.6136363636363638, |
|
"grad_norm": 3.060192346572876, |
|
"learning_rate": 6.59090909090909e-05, |
|
"loss": 1.4748, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.618181818181818, |
|
"grad_norm": 1.9157240390777588, |
|
"learning_rate": 6.515151515151516e-05, |
|
"loss": 0.8512, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 2.6227272727272726, |
|
"grad_norm": 1.6468448638916016, |
|
"learning_rate": 6.439393939393939e-05, |
|
"loss": 0.7536, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 2.6272727272727274, |
|
"grad_norm": 1.9406344890594482, |
|
"learning_rate": 6.363636363636363e-05, |
|
"loss": 0.9798, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 2.631818181818182, |
|
"grad_norm": 1.8992547988891602, |
|
"learning_rate": 6.287878787878788e-05, |
|
"loss": 1.1394, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 2.6363636363636362, |
|
"grad_norm": 1.8168598413467407, |
|
"learning_rate": 6.212121212121213e-05, |
|
"loss": 0.8848, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.6409090909090907, |
|
"grad_norm": 2.8009986877441406, |
|
"learning_rate": 6.136363636363636e-05, |
|
"loss": 1.1817, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 2.6454545454545455, |
|
"grad_norm": 1.8650470972061157, |
|
"learning_rate": 6.060606060606061e-05, |
|
"loss": 0.9148, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 2.132161855697632, |
|
"learning_rate": 5.984848484848485e-05, |
|
"loss": 1.0103, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 2.6545454545454543, |
|
"grad_norm": 2.488576650619507, |
|
"learning_rate": 5.909090909090909e-05, |
|
"loss": 1.5804, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 2.659090909090909, |
|
"grad_norm": 1.7953377962112427, |
|
"learning_rate": 5.833333333333333e-05, |
|
"loss": 0.8419, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 2.6636363636363636, |
|
"grad_norm": 2.563900947570801, |
|
"learning_rate": 5.757575757575758e-05, |
|
"loss": 1.1122, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 2.668181818181818, |
|
"grad_norm": 2.112504243850708, |
|
"learning_rate": 5.681818181818182e-05, |
|
"loss": 0.8345, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 2.672727272727273, |
|
"grad_norm": 2.874629020690918, |
|
"learning_rate": 5.606060606060606e-05, |
|
"loss": 1.257, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 2.6772727272727272, |
|
"grad_norm": 2.3965139389038086, |
|
"learning_rate": 5.5303030303030304e-05, |
|
"loss": 1.4174, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 2.6818181818181817, |
|
"grad_norm": 2.149787425994873, |
|
"learning_rate": 5.4545454545454546e-05, |
|
"loss": 1.0162, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.6863636363636365, |
|
"grad_norm": 3.67689847946167, |
|
"learning_rate": 5.378787878787879e-05, |
|
"loss": 0.9925, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 2.690909090909091, |
|
"grad_norm": 2.144545316696167, |
|
"learning_rate": 5.303030303030303e-05, |
|
"loss": 1.2257, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 2.6954545454545453, |
|
"grad_norm": 1.9149094820022583, |
|
"learning_rate": 5.227272727272727e-05, |
|
"loss": 0.7236, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 2.803966999053955, |
|
"learning_rate": 5.151515151515152e-05, |
|
"loss": 1.1317, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 2.7045454545454546, |
|
"grad_norm": 2.1107089519500732, |
|
"learning_rate": 5.075757575757576e-05, |
|
"loss": 0.9265, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 2.709090909090909, |
|
"grad_norm": 2.037118911743164, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6859, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 2.713636363636364, |
|
"grad_norm": 2.310952663421631, |
|
"learning_rate": 4.924242424242424e-05, |
|
"loss": 0.98, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 2.7181818181818183, |
|
"grad_norm": 1.9925788640975952, |
|
"learning_rate": 4.848484848484849e-05, |
|
"loss": 0.8919, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 2.7227272727272727, |
|
"grad_norm": 2.466705083847046, |
|
"learning_rate": 4.772727272727273e-05, |
|
"loss": 1.3115, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 2.7272727272727275, |
|
"grad_norm": 2.8948001861572266, |
|
"learning_rate": 4.6969696969696966e-05, |
|
"loss": 1.4843, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.731818181818182, |
|
"grad_norm": 1.8009178638458252, |
|
"learning_rate": 4.621212121212121e-05, |
|
"loss": 0.8387, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 2.7363636363636363, |
|
"grad_norm": 1.7695908546447754, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 0.6376, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 2.740909090909091, |
|
"grad_norm": 2.255938768386841, |
|
"learning_rate": 4.46969696969697e-05, |
|
"loss": 0.9277, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 2.7454545454545456, |
|
"grad_norm": 2.6216013431549072, |
|
"learning_rate": 4.393939393939394e-05, |
|
"loss": 0.8539, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 2.33111310005188, |
|
"learning_rate": 4.318181818181818e-05, |
|
"loss": 0.766, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 2.7545454545454544, |
|
"grad_norm": 2.256770610809326, |
|
"learning_rate": 4.242424242424243e-05, |
|
"loss": 0.9658, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 2.759090909090909, |
|
"grad_norm": 2.4762847423553467, |
|
"learning_rate": 4.1666666666666665e-05, |
|
"loss": 0.8902, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 2.7636363636363637, |
|
"grad_norm": 1.8913813829421997, |
|
"learning_rate": 4.090909090909091e-05, |
|
"loss": 0.6938, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 2.768181818181818, |
|
"grad_norm": 1.928743839263916, |
|
"learning_rate": 4.015151515151516e-05, |
|
"loss": 0.637, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 2.7727272727272725, |
|
"grad_norm": 3.3095438480377197, |
|
"learning_rate": 3.939393939393939e-05, |
|
"loss": 0.9913, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.7772727272727273, |
|
"grad_norm": 2.611701011657715, |
|
"learning_rate": 3.8636363636363636e-05, |
|
"loss": 1.1573, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 2.7818181818181817, |
|
"grad_norm": 2.022073745727539, |
|
"learning_rate": 3.787878787878788e-05, |
|
"loss": 0.9537, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 2.786363636363636, |
|
"grad_norm": 2.9264447689056396, |
|
"learning_rate": 3.712121212121213e-05, |
|
"loss": 1.4692, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 2.790909090909091, |
|
"grad_norm": 2.22469425201416, |
|
"learning_rate": 3.6363636363636364e-05, |
|
"loss": 1.0733, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 2.7954545454545454, |
|
"grad_norm": 2.8329367637634277, |
|
"learning_rate": 3.560606060606061e-05, |
|
"loss": 1.5153, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 1.8949047327041626, |
|
"learning_rate": 3.484848484848485e-05, |
|
"loss": 0.6316, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 2.8045454545454547, |
|
"grad_norm": 2.597440242767334, |
|
"learning_rate": 3.409090909090909e-05, |
|
"loss": 1.6049, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 2.809090909090909, |
|
"grad_norm": 2.0482330322265625, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.0083, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 2.8136363636363635, |
|
"grad_norm": 1.7359944581985474, |
|
"learning_rate": 3.257575757575758e-05, |
|
"loss": 0.9097, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 2.8181818181818183, |
|
"grad_norm": 1.9825539588928223, |
|
"learning_rate": 3.1818181818181814e-05, |
|
"loss": 0.7107, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.8227272727272728, |
|
"grad_norm": 1.7564197778701782, |
|
"learning_rate": 3.106060606060606e-05, |
|
"loss": 0.9089, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 2.827272727272727, |
|
"grad_norm": 2.735137939453125, |
|
"learning_rate": 3.0303030303030306e-05, |
|
"loss": 1.4978, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 2.831818181818182, |
|
"grad_norm": 2.702873706817627, |
|
"learning_rate": 2.9545454545454545e-05, |
|
"loss": 1.2556, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 2.8363636363636364, |
|
"grad_norm": 1.9755101203918457, |
|
"learning_rate": 2.878787878787879e-05, |
|
"loss": 0.8022, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 2.840909090909091, |
|
"grad_norm": 2.0104050636291504, |
|
"learning_rate": 2.803030303030303e-05, |
|
"loss": 0.8993, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.8454545454545457, |
|
"grad_norm": 2.915588855743408, |
|
"learning_rate": 2.7272727272727273e-05, |
|
"loss": 1.7374, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 2.5364370346069336, |
|
"learning_rate": 2.6515151515151516e-05, |
|
"loss": 0.7967, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 2.8545454545454545, |
|
"grad_norm": 2.731673002243042, |
|
"learning_rate": 2.575757575757576e-05, |
|
"loss": 1.5847, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 2.8590909090909093, |
|
"grad_norm": 2.1468403339385986, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.0101, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 2.8636363636363638, |
|
"grad_norm": 1.8162040710449219, |
|
"learning_rate": 2.4242424242424244e-05, |
|
"loss": 0.6688, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.868181818181818, |
|
"grad_norm": 2.285930871963501, |
|
"learning_rate": 2.3484848484848483e-05, |
|
"loss": 1.1906, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 2.8727272727272726, |
|
"grad_norm": 2.0448861122131348, |
|
"learning_rate": 2.272727272727273e-05, |
|
"loss": 0.8868, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 2.8772727272727274, |
|
"grad_norm": 2.98807692527771, |
|
"learning_rate": 2.196969696969697e-05, |
|
"loss": 1.1679, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 2.881818181818182, |
|
"grad_norm": 1.9618700742721558, |
|
"learning_rate": 2.1212121212121215e-05, |
|
"loss": 1.0659, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 2.8863636363636362, |
|
"grad_norm": 2.200741767883301, |
|
"learning_rate": 2.0454545454545454e-05, |
|
"loss": 1.0582, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 2.8909090909090907, |
|
"grad_norm": 2.1953506469726562, |
|
"learning_rate": 1.9696969696969697e-05, |
|
"loss": 1.4356, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 2.8954545454545455, |
|
"grad_norm": 2.1912357807159424, |
|
"learning_rate": 1.893939393939394e-05, |
|
"loss": 1.0532, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 2.125601053237915, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 1.458, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 2.9045454545454543, |
|
"grad_norm": 1.9430787563323975, |
|
"learning_rate": 1.7424242424242425e-05, |
|
"loss": 0.8313, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 2.909090909090909, |
|
"grad_norm": 1.7255802154541016, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.7894, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.9136363636363636, |
|
"grad_norm": 2.259798526763916, |
|
"learning_rate": 1.5909090909090907e-05, |
|
"loss": 1.0942, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 2.918181818181818, |
|
"grad_norm": 2.4443533420562744, |
|
"learning_rate": 1.5151515151515153e-05, |
|
"loss": 1.5392, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 2.922727272727273, |
|
"grad_norm": 2.437310218811035, |
|
"learning_rate": 1.4393939393939396e-05, |
|
"loss": 0.9475, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 2.9272727272727272, |
|
"grad_norm": 2.1248443126678467, |
|
"learning_rate": 1.3636363636363637e-05, |
|
"loss": 0.6917, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 2.9318181818181817, |
|
"grad_norm": 2.0161659717559814, |
|
"learning_rate": 1.287878787878788e-05, |
|
"loss": 1.0465, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 2.9363636363636365, |
|
"grad_norm": 1.825695514678955, |
|
"learning_rate": 1.2121212121212122e-05, |
|
"loss": 0.8001, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 2.940909090909091, |
|
"grad_norm": 2.58219575881958, |
|
"learning_rate": 1.1363636363636365e-05, |
|
"loss": 0.9649, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 2.9454545454545453, |
|
"grad_norm": 1.9554407596588135, |
|
"learning_rate": 1.0606060606060607e-05, |
|
"loss": 1.1447, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 2.89900541305542, |
|
"learning_rate": 9.848484848484848e-06, |
|
"loss": 0.9461, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 2.9545454545454546, |
|
"grad_norm": 1.8475868701934814, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 1.1863, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.959090909090909, |
|
"grad_norm": 2.264302968978882, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.9459, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 2.963636363636364, |
|
"grad_norm": 2.157198190689087, |
|
"learning_rate": 7.5757575757575764e-06, |
|
"loss": 1.4461, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 2.9681818181818183, |
|
"grad_norm": 2.3027210235595703, |
|
"learning_rate": 6.818181818181818e-06, |
|
"loss": 0.8407, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 2.9727272727272727, |
|
"grad_norm": 1.786800503730774, |
|
"learning_rate": 6.060606060606061e-06, |
|
"loss": 0.7051, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 2.9772727272727275, |
|
"grad_norm": 2.4173872470855713, |
|
"learning_rate": 5.303030303030304e-06, |
|
"loss": 1.0276, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 2.981818181818182, |
|
"grad_norm": 3.755701780319214, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"loss": 0.9572, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 2.9863636363636363, |
|
"grad_norm": 2.0097804069519043, |
|
"learning_rate": 3.7878787878787882e-06, |
|
"loss": 0.9736, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 2.990909090909091, |
|
"grad_norm": 1.773881196975708, |
|
"learning_rate": 3.0303030303030305e-06, |
|
"loss": 0.8404, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 2.9954545454545456, |
|
"grad_norm": 2.139065980911255, |
|
"learning_rate": 2.2727272727272728e-06, |
|
"loss": 1.1301, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.8846311569213867, |
|
"learning_rate": 1.5151515151515152e-06, |
|
"loss": 0.7227, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.8952, |
|
"eval_gen_len": 41.8455, |
|
"eval_loss": 1.8697103261947632, |
|
"eval_precision": 0.8934, |
|
"eval_recall": 0.8971, |
|
"eval_rouge1": 0.4709, |
|
"eval_rouge2": 0.2223, |
|
"eval_rougeL": 0.3999, |
|
"eval_rougeLsum": 0.4391, |
|
"eval_runtime": 25.3276, |
|
"eval_samples_per_second": 4.343, |
|
"eval_steps_per_second": 0.553, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 660, |
|
"total_flos": 2484005840363520.0, |
|
"train_loss": 1.4827006761774872, |
|
"train_runtime": 507.3071, |
|
"train_samples_per_second": 5.198, |
|
"train_steps_per_second": 1.301 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 660, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2484005840363520.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|