|
{ |
|
"best_metric": 4.786604404449463, |
|
"best_model_checkpoint": "/mmfs1/gscratch/stf/abhinavp/corpus-filtering/outputs/superlative-quantifier/lstm/3/checkpoints/checkpoint-76320", |
|
"epoch": 0.025000606015738065, |
|
"eval_steps": 10, |
|
"global_step": 76320, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.999998362119627e-05, |
|
"loss": 10.8181, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.999161405248948e-05, |
|
"loss": 7.554, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.998322810497896e-05, |
|
"loss": 7.0541, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.997484215746844e-05, |
|
"loss": 6.9869, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.996645620995792e-05, |
|
"loss": 6.9413, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.99580702624474e-05, |
|
"loss": 6.8806, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.994968431493688e-05, |
|
"loss": 6.7151, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.994129836742636e-05, |
|
"loss": 6.609, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.993292879871958e-05, |
|
"loss": 6.521, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.992454285120906e-05, |
|
"loss": 6.4483, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.991617328250227e-05, |
|
"loss": 6.386, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.990778733499175e-05, |
|
"loss": 6.3152, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.989940138748123e-05, |
|
"loss": 6.2486, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.989101543997071e-05, |
|
"loss": 6.189, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.988262949246018e-05, |
|
"loss": 6.1376, |
|
"step": 7168 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9874259923753396e-05, |
|
"loss": 6.0919, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9865873976242876e-05, |
|
"loss": 6.047, |
|
"step": 8192 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9857488028732356e-05, |
|
"loss": 6.0107, |
|
"step": 8704 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.984910208122183e-05, |
|
"loss": 5.9639, |
|
"step": 9216 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9840716133711316e-05, |
|
"loss": 5.9378, |
|
"step": 9728 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9832330186200796e-05, |
|
"loss": 5.9037, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9823944238690276e-05, |
|
"loss": 5.8591, |
|
"step": 10752 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9815558291179756e-05, |
|
"loss": 5.8305, |
|
"step": 11264 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9807172343669236e-05, |
|
"loss": 5.7959, |
|
"step": 11776 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9798786396158716e-05, |
|
"loss": 5.7762, |
|
"step": 12288 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9790400448648195e-05, |
|
"loss": 5.7448, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9782014501137675e-05, |
|
"loss": 5.7162, |
|
"step": 13312 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9773628553627155e-05, |
|
"loss": 5.6881, |
|
"step": 13824 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9765242606116635e-05, |
|
"loss": 5.6819, |
|
"step": 14336 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9756856658606115e-05, |
|
"loss": 5.643, |
|
"step": 14848 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9748470711095595e-05, |
|
"loss": 5.6307, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9740101142388804e-05, |
|
"loss": 5.6222, |
|
"step": 15872 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9731715194878284e-05, |
|
"loss": 5.5913, |
|
"step": 16384 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9723329247367764e-05, |
|
"loss": 5.5736, |
|
"step": 16896 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9714943299857244e-05, |
|
"loss": 5.5687, |
|
"step": 17408 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9706557352346724e-05, |
|
"loss": 5.5461, |
|
"step": 17920 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9698171404836204e-05, |
|
"loss": 5.5371, |
|
"step": 18432 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.968980183612942e-05, |
|
"loss": 5.4986, |
|
"step": 18944 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.96814158886189e-05, |
|
"loss": 5.4921, |
|
"step": 19456 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.967302994110837e-05, |
|
"loss": 5.4639, |
|
"step": 19968 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.966466037240159e-05, |
|
"loss": 5.4617, |
|
"step": 20480 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.965627442489107e-05, |
|
"loss": 5.4486, |
|
"step": 20992 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.964788847738054e-05, |
|
"loss": 5.443, |
|
"step": 21504 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.963950252987002e-05, |
|
"loss": 5.4127, |
|
"step": 22016 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.96311165823595e-05, |
|
"loss": 5.411, |
|
"step": 22528 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.962273063484898e-05, |
|
"loss": 5.3966, |
|
"step": 23040 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.961434468733847e-05, |
|
"loss": 5.3982, |
|
"step": 23552 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.960595873982795e-05, |
|
"loss": 5.3902, |
|
"step": 24064 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.959757279231743e-05, |
|
"loss": 5.362, |
|
"step": 24576 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.958920322361064e-05, |
|
"loss": 5.346, |
|
"step": 25088 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.958081727610012e-05, |
|
"loss": 5.3559, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.95724313285896e-05, |
|
"loss": 5.3494, |
|
"step": 26112 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.956404538107908e-05, |
|
"loss": 5.3248, |
|
"step": 26624 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.955567581237229e-05, |
|
"loss": 5.3156, |
|
"step": 27136 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9547306243665496e-05, |
|
"loss": 5.3127, |
|
"step": 27648 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9538920296154976e-05, |
|
"loss": 5.2933, |
|
"step": 28160 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9530534348644456e-05, |
|
"loss": 5.3016, |
|
"step": 28672 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9522148401133936e-05, |
|
"loss": 5.268, |
|
"step": 29184 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.951376245362342e-05, |
|
"loss": 5.2718, |
|
"step": 29696 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.95053765061129e-05, |
|
"loss": 5.2606, |
|
"step": 30208 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.949700693740611e-05, |
|
"loss": 5.2606, |
|
"step": 30720 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.948862098989559e-05, |
|
"loss": 5.2366, |
|
"step": 31232 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.948023504238507e-05, |
|
"loss": 5.2443, |
|
"step": 31744 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.947184909487455e-05, |
|
"loss": 5.2165, |
|
"step": 32256 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.946346314736403e-05, |
|
"loss": 5.2226, |
|
"step": 32768 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.945509357865724e-05, |
|
"loss": 5.2228, |
|
"step": 33280 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.944670763114672e-05, |
|
"loss": 5.215, |
|
"step": 33792 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.94383216836362e-05, |
|
"loss": 5.194, |
|
"step": 34304 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.942993573612568e-05, |
|
"loss": 5.178, |
|
"step": 34816 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.942154978861516e-05, |
|
"loss": 5.1707, |
|
"step": 35328 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.941316384110464e-05, |
|
"loss": 5.1857, |
|
"step": 35840 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.940477789359412e-05, |
|
"loss": 5.175, |
|
"step": 36352 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.93963919460836e-05, |
|
"loss": 5.1593, |
|
"step": 36864 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.938800599857309e-05, |
|
"loss": 5.1699, |
|
"step": 37376 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9379636429866296e-05, |
|
"loss": 5.1678, |
|
"step": 37888 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9371250482355776e-05, |
|
"loss": 5.153, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9362864534845256e-05, |
|
"loss": 5.141, |
|
"step": 38912 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.935447858733473e-05, |
|
"loss": 5.1305, |
|
"step": 39424 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9346109018627945e-05, |
|
"loss": 5.128, |
|
"step": 39936 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9337723071117425e-05, |
|
"loss": 5.1128, |
|
"step": 40448 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9329337123606905e-05, |
|
"loss": 5.1165, |
|
"step": 40960 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9320967554900114e-05, |
|
"loss": 5.1039, |
|
"step": 41472 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9312581607389594e-05, |
|
"loss": 5.1159, |
|
"step": 41984 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9304195659879074e-05, |
|
"loss": 5.1033, |
|
"step": 42496 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9295809712368554e-05, |
|
"loss": 5.0802, |
|
"step": 43008 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9287423764858034e-05, |
|
"loss": 5.0832, |
|
"step": 43520 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9279037817347514e-05, |
|
"loss": 5.0818, |
|
"step": 44032 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9270651869836994e-05, |
|
"loss": 5.0803, |
|
"step": 44544 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9262265922326474e-05, |
|
"loss": 5.0756, |
|
"step": 45056 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9253879974815954e-05, |
|
"loss": 5.057, |
|
"step": 45568 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.924551040610916e-05, |
|
"loss": 5.0545, |
|
"step": 46080 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.923712445859864e-05, |
|
"loss": 5.052, |
|
"step": 46592 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.922873851108812e-05, |
|
"loss": 5.0378, |
|
"step": 47104 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.92203525635776e-05, |
|
"loss": 5.0278, |
|
"step": 47616 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.921196661606708e-05, |
|
"loss": 5.0278, |
|
"step": 48128 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.920358066855656e-05, |
|
"loss": 5.0352, |
|
"step": 48640 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.919519472104604e-05, |
|
"loss": 5.0228, |
|
"step": 49152 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.918680877353552e-05, |
|
"loss": 5.0122, |
|
"step": 49664 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.917843920482874e-05, |
|
"loss": 5.003, |
|
"step": 50176 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.917005325731822e-05, |
|
"loss": 5.0043, |
|
"step": 50688 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.916168368861143e-05, |
|
"loss": 5.0084, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.915329774110091e-05, |
|
"loss": 4.9993, |
|
"step": 51712 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.914491179359039e-05, |
|
"loss": 4.9931, |
|
"step": 52224 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.913652584607987e-05, |
|
"loss": 4.9805, |
|
"step": 52736 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.912813989856935e-05, |
|
"loss": 4.9902, |
|
"step": 53248 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.911975395105883e-05, |
|
"loss": 4.9681, |
|
"step": 53760 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9111384382352036e-05, |
|
"loss": 4.9724, |
|
"step": 54272 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9103014813645245e-05, |
|
"loss": 4.965, |
|
"step": 54784 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.909462886613473e-05, |
|
"loss": 4.9575, |
|
"step": 55296 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.908624291862421e-05, |
|
"loss": 4.9525, |
|
"step": 55808 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.907785697111369e-05, |
|
"loss": 4.9576, |
|
"step": 56320 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.906947102360317e-05, |
|
"loss": 4.9438, |
|
"step": 56832 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.906108507609265e-05, |
|
"loss": 4.9418, |
|
"step": 57344 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.905269912858213e-05, |
|
"loss": 4.9395, |
|
"step": 57856 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.904431318107161e-05, |
|
"loss": 4.9347, |
|
"step": 58368 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.903592723356109e-05, |
|
"loss": 4.938, |
|
"step": 58880 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9027541286050565e-05, |
|
"loss": 4.9293, |
|
"step": 59392 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.901917171734378e-05, |
|
"loss": 4.9274, |
|
"step": 59904 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.901078576983326e-05, |
|
"loss": 4.9239, |
|
"step": 60416 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.900239982232274e-05, |
|
"loss": 4.9175, |
|
"step": 60928 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8994013874812214e-05, |
|
"loss": 4.9161, |
|
"step": 61440 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.89856279273017e-05, |
|
"loss": 4.9034, |
|
"step": 61952 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.897724197979118e-05, |
|
"loss": 4.9007, |
|
"step": 62464 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.896885603228066e-05, |
|
"loss": 4.8986, |
|
"step": 62976 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.896047008477014e-05, |
|
"loss": 4.8971, |
|
"step": 63488 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.895210051606335e-05, |
|
"loss": 4.8903, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.894371456855283e-05, |
|
"loss": 4.8877, |
|
"step": 64512 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.893532862104231e-05, |
|
"loss": 4.8865, |
|
"step": 65024 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.892695905233552e-05, |
|
"loss": 4.8742, |
|
"step": 65536 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8918573104825e-05, |
|
"loss": 4.883, |
|
"step": 66048 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.891018715731448e-05, |
|
"loss": 4.8813, |
|
"step": 66560 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.890180120980396e-05, |
|
"loss": 4.8616, |
|
"step": 67072 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.889341526229344e-05, |
|
"loss": 4.8506, |
|
"step": 67584 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.888502931478292e-05, |
|
"loss": 4.8572, |
|
"step": 68096 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.88766433672724e-05, |
|
"loss": 4.8643, |
|
"step": 68608 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.886825741976188e-05, |
|
"loss": 4.8714, |
|
"step": 69120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8859887851055094e-05, |
|
"loss": 4.8537, |
|
"step": 69632 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8851501903544574e-05, |
|
"loss": 4.8524, |
|
"step": 70144 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8843115956034054e-05, |
|
"loss": 4.8537, |
|
"step": 70656 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8834730008523534e-05, |
|
"loss": 4.8516, |
|
"step": 71168 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.882636043981674e-05, |
|
"loss": 4.8381, |
|
"step": 71680 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.881797449230622e-05, |
|
"loss": 4.8321, |
|
"step": 72192 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.88095885447957e-05, |
|
"loss": 4.8387, |
|
"step": 72704 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.880120259728518e-05, |
|
"loss": 4.829, |
|
"step": 73216 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.879283302857839e-05, |
|
"loss": 4.8262, |
|
"step": 73728 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.878444708106787e-05, |
|
"loss": 4.8299, |
|
"step": 74240 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.877606113355735e-05, |
|
"loss": 4.8144, |
|
"step": 74752 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.876769156485057e-05, |
|
"loss": 4.8243, |
|
"step": 75264 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.875930561734005e-05, |
|
"loss": 4.8211, |
|
"step": 75776 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.875091966982953e-05, |
|
"loss": 4.7985, |
|
"step": 76288 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 4.786604404449463, |
|
"eval_runtime": 295.0564, |
|
"eval_samples_per_second": 1293.281, |
|
"eval_steps_per_second": 40.416, |
|
"step": 76320 |
|
} |
|
], |
|
"logging_steps": 512, |
|
"max_steps": 3052726, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 10, |
|
"total_flos": 3.0262213707177744e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|