|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.986666666666667, |
|
"eval_steps": 10000, |
|
"global_step": 140000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.99862042482382e-05, |
|
"loss": 1.1884, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9971981823741495e-05, |
|
"loss": 1.0269, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.995775939924479e-05, |
|
"loss": 0.9881, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.994353697474809e-05, |
|
"loss": 0.9348, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9929314550251384e-05, |
|
"loss": 0.9291, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.991509212575468e-05, |
|
"loss": 0.8964, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9900869701257974e-05, |
|
"loss": 0.8579, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9886647276761272e-05, |
|
"loss": 0.8678, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9872424852264567e-05, |
|
"loss": 0.8506, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9858202427767862e-05, |
|
"loss": 0.8268, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.984398000327116e-05, |
|
"loss": 0.8056, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9829757578774455e-05, |
|
"loss": 0.7876, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.981553515427775e-05, |
|
"loss": 0.7956, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.980131272978105e-05, |
|
"loss": 0.7904, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9787090305284344e-05, |
|
"loss": 0.7707, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.977286788078764e-05, |
|
"loss": 0.7652, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9758645456290934e-05, |
|
"loss": 0.7798, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9744423031794232e-05, |
|
"loss": 0.7454, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9730200607297527e-05, |
|
"loss": 0.7506, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9715978182800822e-05, |
|
"loss": 0.727, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.970175575830412e-05, |
|
"loss": 0.714, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9687533333807415e-05, |
|
"loss": 0.7132, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9673310909310714e-05, |
|
"loss": 0.7088, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.965908848481401e-05, |
|
"loss": 0.7266, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9644866060317303e-05, |
|
"loss": 0.7031, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9630643635820602e-05, |
|
"loss": 0.6948, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9616421211323893e-05, |
|
"loss": 0.7048, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9602198786827192e-05, |
|
"loss": 0.699, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9587976362330487e-05, |
|
"loss": 0.679, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9573753937833785e-05, |
|
"loss": 0.6847, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.955953151333708e-05, |
|
"loss": 0.6781, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9545309088840375e-05, |
|
"loss": 0.6803, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9531086664343673e-05, |
|
"loss": 0.6594, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.951686423984697e-05, |
|
"loss": 0.6729, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9502641815350263e-05, |
|
"loss": 0.664, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9488419390853562e-05, |
|
"loss": 0.6611, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9474196966356857e-05, |
|
"loss": 0.6497, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.945997454186015e-05, |
|
"loss": 0.65, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9445752117363447e-05, |
|
"loss": 0.647, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9431529692866745e-05, |
|
"loss": 0.6528, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.941730726837004e-05, |
|
"loss": 0.6432, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9403084843873335e-05, |
|
"loss": 0.6291, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9388862419376633e-05, |
|
"loss": 0.6388, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.937463999487993e-05, |
|
"loss": 0.6261, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9360417570383227e-05, |
|
"loss": 0.642, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.934619514588652e-05, |
|
"loss": 0.6399, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9331972721389817e-05, |
|
"loss": 0.6213, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.931775029689311e-05, |
|
"loss": 0.6163, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9303527872396407e-05, |
|
"loss": 0.6167, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9289305447899705e-05, |
|
"loss": 0.6127, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 0.6764795780181885, |
|
"eval_runtime": 34.8863, |
|
"eval_samples_per_second": 143.323, |
|
"eval_steps_per_second": 1.147, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9275083023403e-05, |
|
"loss": 0.5963, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9260860598906298e-05, |
|
"loss": 0.6034, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9246638174409593e-05, |
|
"loss": 0.5944, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9232415749912888e-05, |
|
"loss": 0.5951, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9218193325416187e-05, |
|
"loss": 0.6055, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.920397090091948e-05, |
|
"loss": 0.6052, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9189748476422777e-05, |
|
"loss": 0.5907, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9175526051926075e-05, |
|
"loss": 0.5933, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.916130362742937e-05, |
|
"loss": 0.6002, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9147081202932665e-05, |
|
"loss": 0.5985, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.913285877843596e-05, |
|
"loss": 0.5761, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9118636353939258e-05, |
|
"loss": 0.5749, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9104413929442553e-05, |
|
"loss": 0.5855, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9090191504945848e-05, |
|
"loss": 0.5724, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9075969080449146e-05, |
|
"loss": 0.5856, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.906174665595244e-05, |
|
"loss": 0.5843, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.904752423145574e-05, |
|
"loss": 0.5624, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9033301806959035e-05, |
|
"loss": 0.5784, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.901907938246233e-05, |
|
"loss": 0.5589, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9004856957965625e-05, |
|
"loss": 0.5732, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.899063453346892e-05, |
|
"loss": 0.5529, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.8976412108972218e-05, |
|
"loss": 0.5648, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.8962189684475513e-05, |
|
"loss": 0.5535, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.894796725997881e-05, |
|
"loss": 0.5527, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.8933744835482106e-05, |
|
"loss": 0.5478, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.89195224109854e-05, |
|
"loss": 0.5419, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.89052999864887e-05, |
|
"loss": 0.5596, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8891077561991995e-05, |
|
"loss": 0.5444, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.887685513749529e-05, |
|
"loss": 0.5526, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8862632712998585e-05, |
|
"loss": 0.5343, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8848410288501883e-05, |
|
"loss": 0.5327, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8834187864005178e-05, |
|
"loss": 0.5289, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8819965439508473e-05, |
|
"loss": 0.5467, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.880574301501177e-05, |
|
"loss": 0.5357, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8791520590515066e-05, |
|
"loss": 0.5317, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.877729816601836e-05, |
|
"loss": 0.5326, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.876307574152166e-05, |
|
"loss": 0.5335, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8748853317024955e-05, |
|
"loss": 0.5364, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8734630892528253e-05, |
|
"loss": 0.5295, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8720408468031548e-05, |
|
"loss": 0.53, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8706186043534843e-05, |
|
"loss": 0.5325, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8691963619038138e-05, |
|
"loss": 0.5196, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8677741194541433e-05, |
|
"loss": 0.5206, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.866351877004473e-05, |
|
"loss": 0.5231, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8649296345548026e-05, |
|
"loss": 0.5134, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8635073921051325e-05, |
|
"loss": 0.5113, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.862085149655462e-05, |
|
"loss": 0.5147, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.8606629072057914e-05, |
|
"loss": 0.5255, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.8592406647561213e-05, |
|
"loss": 0.5106, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.8578184223064508e-05, |
|
"loss": 0.5083, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 0.5610331296920776, |
|
"eval_runtime": 34.9828, |
|
"eval_samples_per_second": 142.927, |
|
"eval_steps_per_second": 1.143, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8563961798567803e-05, |
|
"loss": 0.5158, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8549739374071098e-05, |
|
"loss": 0.5055, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8535516949574396e-05, |
|
"loss": 0.5006, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.852129452507769e-05, |
|
"loss": 0.507, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8507072100580986e-05, |
|
"loss": 0.4954, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.8492849676084284e-05, |
|
"loss": 0.5027, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.847862725158758e-05, |
|
"loss": 0.4875, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.8464404827090874e-05, |
|
"loss": 0.4911, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.8450182402594173e-05, |
|
"loss": 0.483, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.8435959978097468e-05, |
|
"loss": 0.4891, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.8421737553600766e-05, |
|
"loss": 0.4911, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.8407515129104058e-05, |
|
"loss": 0.4742, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.8393292704607356e-05, |
|
"loss": 0.4959, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.837907028011065e-05, |
|
"loss": 0.4959, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.8364847855613946e-05, |
|
"loss": 0.4798, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.8350625431117244e-05, |
|
"loss": 0.4927, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.833640300662054e-05, |
|
"loss": 0.4824, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.8322180582123838e-05, |
|
"loss": 0.4811, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.8307958157627133e-05, |
|
"loss": 0.4781, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.8293735733130428e-05, |
|
"loss": 0.4705, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.8279513308633726e-05, |
|
"loss": 0.4633, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.826529088413702e-05, |
|
"loss": 0.4809, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.8251068459640316e-05, |
|
"loss": 0.4801, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.823684603514361e-05, |
|
"loss": 0.4813, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.822262361064691e-05, |
|
"loss": 0.4682, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.8208401186150204e-05, |
|
"loss": 0.4745, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.81941787616535e-05, |
|
"loss": 0.4631, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.8179956337156798e-05, |
|
"loss": 0.4733, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.8165733912660093e-05, |
|
"loss": 0.4687, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.8151511488163388e-05, |
|
"loss": 0.4685, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.8137289063666686e-05, |
|
"loss": 0.4638, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.812306663916998e-05, |
|
"loss": 0.4482, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.810884421467328e-05, |
|
"loss": 0.4452, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.809462179017657e-05, |
|
"loss": 0.4553, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.808039936567987e-05, |
|
"loss": 0.4498, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.8066176941183164e-05, |
|
"loss": 0.4468, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.805195451668646e-05, |
|
"loss": 0.4466, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.8037732092189757e-05, |
|
"loss": 0.4465, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.8023509667693052e-05, |
|
"loss": 0.4426, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.800928724319635e-05, |
|
"loss": 0.4466, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.7995064818699646e-05, |
|
"loss": 0.4444, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.798084239420294e-05, |
|
"loss": 0.4445, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.796661996970624e-05, |
|
"loss": 0.4395, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.795239754520953e-05, |
|
"loss": 0.4425, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.793817512071283e-05, |
|
"loss": 0.4334, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.7923952696216124e-05, |
|
"loss": 0.4411, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.7909730271719422e-05, |
|
"loss": 0.4481, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.7895507847222717e-05, |
|
"loss": 0.431, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.7881285422726012e-05, |
|
"loss": 0.451, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.786706299822931e-05, |
|
"loss": 0.4406, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 0.47009751200675964, |
|
"eval_runtime": 35.0436, |
|
"eval_samples_per_second": 142.68, |
|
"eval_steps_per_second": 1.141, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.7852840573732606e-05, |
|
"loss": 0.4358, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.78386181492359e-05, |
|
"loss": 0.4333, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.78243957247392e-05, |
|
"loss": 0.4349, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.7810173300242494e-05, |
|
"loss": 0.4317, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.779595087574579e-05, |
|
"loss": 0.4248, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.7781728451249084e-05, |
|
"loss": 0.433, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.7767506026752382e-05, |
|
"loss": 0.4275, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.7753283602255677e-05, |
|
"loss": 0.4299, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.7739061177758972e-05, |
|
"loss": 0.4294, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.772483875326227e-05, |
|
"loss": 0.416, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.7710616328765566e-05, |
|
"loss": 0.4146, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.7696393904268864e-05, |
|
"loss": 0.4212, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.768217147977216e-05, |
|
"loss": 0.4201, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.7667949055275454e-05, |
|
"loss": 0.4147, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.7653726630778752e-05, |
|
"loss": 0.421, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.7639504206282044e-05, |
|
"loss": 0.4089, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.7625281781785342e-05, |
|
"loss": 0.4118, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.7611059357288637e-05, |
|
"loss": 0.4112, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.7596836932791936e-05, |
|
"loss": 0.408, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.758261450829523e-05, |
|
"loss": 0.4174, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.7568392083798525e-05, |
|
"loss": 0.4072, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.7554169659301824e-05, |
|
"loss": 0.4136, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.753994723480512e-05, |
|
"loss": 0.395, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.7525724810308414e-05, |
|
"loss": 0.4179, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.7511502385811712e-05, |
|
"loss": 0.4104, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.7497279961315007e-05, |
|
"loss": 0.404, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.7483057536818302e-05, |
|
"loss": 0.4011, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.7468835112321597e-05, |
|
"loss": 0.3986, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.7454612687824895e-05, |
|
"loss": 0.399, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.744039026332819e-05, |
|
"loss": 0.3954, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.7426167838831485e-05, |
|
"loss": 0.4122, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.7411945414334784e-05, |
|
"loss": 0.3826, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.739772298983808e-05, |
|
"loss": 0.3874, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.7383500565341377e-05, |
|
"loss": 0.3815, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.7369278140844672e-05, |
|
"loss": 0.3863, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.7355055716347967e-05, |
|
"loss": 0.3827, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.7340833291851262e-05, |
|
"loss": 0.3944, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.7326610867354557e-05, |
|
"loss": 0.392, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.7312388442857855e-05, |
|
"loss": 0.3948, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.729816601836115e-05, |
|
"loss": 0.382, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.728394359386445e-05, |
|
"loss": 0.3803, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.7269721169367744e-05, |
|
"loss": 0.3795, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.725549874487104e-05, |
|
"loss": 0.383, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.7241276320374337e-05, |
|
"loss": 0.3865, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.7227053895877632e-05, |
|
"loss": 0.3755, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.7212831471380927e-05, |
|
"loss": 0.3783, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.7198609046884225e-05, |
|
"loss": 0.3822, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.718438662238752e-05, |
|
"loss": 0.375, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.7170164197890815e-05, |
|
"loss": 0.3823, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.715594177339411e-05, |
|
"loss": 0.3847, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 0.4110700190067291, |
|
"eval_runtime": 35.0314, |
|
"eval_samples_per_second": 142.729, |
|
"eval_steps_per_second": 1.142, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.714171934889741e-05, |
|
"loss": 0.3818, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.7127496924400704e-05, |
|
"loss": 0.3802, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.7113274499904e-05, |
|
"loss": 0.3726, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.7099052075407297e-05, |
|
"loss": 0.3822, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.7084829650910592e-05, |
|
"loss": 0.363, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.707060722641389e-05, |
|
"loss": 0.3604, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.7056384801917185e-05, |
|
"loss": 0.3665, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.704216237742048e-05, |
|
"loss": 0.3575, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.7027939952923775e-05, |
|
"loss": 0.3655, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.701371752842707e-05, |
|
"loss": 0.3718, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.699949510393037e-05, |
|
"loss": 0.3667, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.6985272679433663e-05, |
|
"loss": 0.3511, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.6971050254936962e-05, |
|
"loss": 0.3644, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.6956827830440257e-05, |
|
"loss": 0.3638, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.6942605405943552e-05, |
|
"loss": 0.3637, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.692838298144685e-05, |
|
"loss": 0.3625, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.6914160556950145e-05, |
|
"loss": 0.3501, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.689993813245344e-05, |
|
"loss": 0.3604, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.6885715707956735e-05, |
|
"loss": 0.3615, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.6871493283460033e-05, |
|
"loss": 0.3558, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.685727085896333e-05, |
|
"loss": 0.3644, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.6843048434466623e-05, |
|
"loss": 0.3564, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.682882600996992e-05, |
|
"loss": 0.35, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.6814603585473217e-05, |
|
"loss": 0.3559, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.680038116097651e-05, |
|
"loss": 0.3585, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.678615873647981e-05, |
|
"loss": 0.3544, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.6771936311983105e-05, |
|
"loss": 0.3459, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.6757713887486403e-05, |
|
"loss": 0.3521, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.6743491462989698e-05, |
|
"loss": 0.3431, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.6729269038492993e-05, |
|
"loss": 0.337, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.6715046613996288e-05, |
|
"loss": 0.3376, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.6700824189499583e-05, |
|
"loss": 0.3345, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.668660176500288e-05, |
|
"loss": 0.3327, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.6672379340506177e-05, |
|
"loss": 0.3431, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.6658156916009475e-05, |
|
"loss": 0.3375, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.664393449151277e-05, |
|
"loss": 0.3369, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.6629712067016065e-05, |
|
"loss": 0.3397, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.6615489642519363e-05, |
|
"loss": 0.338, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.6601267218022658e-05, |
|
"loss": 0.3311, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.6587044793525953e-05, |
|
"loss": 0.3369, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.6572822369029248e-05, |
|
"loss": 0.3351, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.6558599944532547e-05, |
|
"loss": 0.3428, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.654437752003584e-05, |
|
"loss": 0.3324, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.6530155095539136e-05, |
|
"loss": 0.3273, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.6515932671042435e-05, |
|
"loss": 0.3262, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.650171024654573e-05, |
|
"loss": 0.3343, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.6487487822049025e-05, |
|
"loss": 0.3269, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.6473265397552323e-05, |
|
"loss": 0.336, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.6459042973055618e-05, |
|
"loss": 0.3316, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.6444820548558916e-05, |
|
"loss": 0.3257, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 0.36412838101387024, |
|
"eval_runtime": 34.9354, |
|
"eval_samples_per_second": 143.121, |
|
"eval_steps_per_second": 1.145, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.6430598124062208e-05, |
|
"loss": 0.3235, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.6416375699565506e-05, |
|
"loss": 0.3294, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.64021532750688e-05, |
|
"loss": 0.3268, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.6387930850572096e-05, |
|
"loss": 0.3162, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.6373708426075395e-05, |
|
"loss": 0.3199, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.635948600157869e-05, |
|
"loss": 0.3168, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.6345263577081988e-05, |
|
"loss": 0.3209, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.6331041152585283e-05, |
|
"loss": 0.3097, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.6316818728088578e-05, |
|
"loss": 0.3137, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.6302596303591876e-05, |
|
"loss": 0.3095, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.628837387909517e-05, |
|
"loss": 0.3205, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.6274151454598466e-05, |
|
"loss": 0.3141, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.625992903010176e-05, |
|
"loss": 0.3177, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.624570660560506e-05, |
|
"loss": 0.3198, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.6231484181108355e-05, |
|
"loss": 0.3187, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.621726175661165e-05, |
|
"loss": 0.3208, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.6203039332114948e-05, |
|
"loss": 0.3152, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.6188816907618243e-05, |
|
"loss": 0.3194, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.6174594483121538e-05, |
|
"loss": 0.3199, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.6160372058624836e-05, |
|
"loss": 0.3181, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.614614963412813e-05, |
|
"loss": 0.3185, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.6131927209631426e-05, |
|
"loss": 0.3178, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.611770478513472e-05, |
|
"loss": 0.3056, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.610348236063802e-05, |
|
"loss": 0.2985, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.6089259936141315e-05, |
|
"loss": 0.3099, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.607503751164461e-05, |
|
"loss": 0.3004, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.6060815087147908e-05, |
|
"loss": 0.2997, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.6046592662651203e-05, |
|
"loss": 0.3124, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.60323702381545e-05, |
|
"loss": 0.3008, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.6018147813657796e-05, |
|
"loss": 0.3086, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.600392538916109e-05, |
|
"loss": 0.3028, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.598970296466439e-05, |
|
"loss": 0.3035, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.597548054016768e-05, |
|
"loss": 0.2954, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.596125811567098e-05, |
|
"loss": 0.2794, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.5947035691174274e-05, |
|
"loss": 0.3009, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.5932813266677573e-05, |
|
"loss": 0.2945, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.5918590842180868e-05, |
|
"loss": 0.3038, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.5904368417684163e-05, |
|
"loss": 0.2877, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.589014599318746e-05, |
|
"loss": 0.2928, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.5875923568690756e-05, |
|
"loss": 0.298, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.586170114419405e-05, |
|
"loss": 0.2985, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.584747871969735e-05, |
|
"loss": 0.2868, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.5833256295200644e-05, |
|
"loss": 0.2935, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.581903387070394e-05, |
|
"loss": 0.2924, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.5804811446207234e-05, |
|
"loss": 0.2911, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.5790589021710533e-05, |
|
"loss": 0.2947, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.5776366597213828e-05, |
|
"loss": 0.2872, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.5762144172717123e-05, |
|
"loss": 0.2782, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.574792174822042e-05, |
|
"loss": 0.2877, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.5733699323723716e-05, |
|
"loss": 0.2874, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 0.3142920732498169, |
|
"eval_runtime": 34.9962, |
|
"eval_samples_per_second": 142.872, |
|
"eval_steps_per_second": 1.143, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.5719476899227014e-05, |
|
"loss": 0.2884, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.570525447473031e-05, |
|
"loss": 0.2756, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.5691032050233604e-05, |
|
"loss": 0.2847, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.56768096257369e-05, |
|
"loss": 0.2742, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.5662587201240194e-05, |
|
"loss": 0.2818, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.5648364776743493e-05, |
|
"loss": 0.2804, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.5634142352246788e-05, |
|
"loss": 0.2729, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5619919927750086e-05, |
|
"loss": 0.2771, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.560569750325338e-05, |
|
"loss": 0.2863, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5591475078756676e-05, |
|
"loss": 0.2791, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5577252654259974e-05, |
|
"loss": 0.28, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.556303022976327e-05, |
|
"loss": 0.2742, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5548807805266564e-05, |
|
"loss": 0.264, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5534585380769863e-05, |
|
"loss": 0.2747, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5520362956273158e-05, |
|
"loss": 0.2704, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5506140531776452e-05, |
|
"loss": 0.2846, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5491918107279747e-05, |
|
"loss": 0.2753, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5477695682783046e-05, |
|
"loss": 0.2654, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.546347325828634e-05, |
|
"loss": 0.2666, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5449250833789636e-05, |
|
"loss": 0.2661, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5435028409292934e-05, |
|
"loss": 0.2738, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.542080598479623e-05, |
|
"loss": 0.2766, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5406583560299527e-05, |
|
"loss": 0.2744, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5392361135802822e-05, |
|
"loss": 0.2701, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5378138711306117e-05, |
|
"loss": 0.2772, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5363916286809412e-05, |
|
"loss": 0.2788, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5349693862312707e-05, |
|
"loss": 0.2747, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5335471437816006e-05, |
|
"loss": 0.2543, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.53212490133193e-05, |
|
"loss": 0.2654, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.53070265888226e-05, |
|
"loss": 0.2593, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5292804164325894e-05, |
|
"loss": 0.2631, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.527858173982919e-05, |
|
"loss": 0.2654, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5264359315332487e-05, |
|
"loss": 0.2684, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5250136890835782e-05, |
|
"loss": 0.2575, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5235914466339079e-05, |
|
"loss": 0.2711, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5221692041842372e-05, |
|
"loss": 0.2589, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5207469617345669e-05, |
|
"loss": 0.2659, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5193247192848966e-05, |
|
"loss": 0.2513, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5179024768352262e-05, |
|
"loss": 0.2681, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5164802343855557e-05, |
|
"loss": 0.2528, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5150579919358854e-05, |
|
"loss": 0.251, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.513635749486215e-05, |
|
"loss": 0.2487, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5122135070365447e-05, |
|
"loss": 0.2498, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5107912645868742e-05, |
|
"loss": 0.2506, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.5093690221372039e-05, |
|
"loss": 0.2549, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.5079467796875336e-05, |
|
"loss": 0.2482, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.5065245372378629e-05, |
|
"loss": 0.256, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.5051022947881926e-05, |
|
"loss": 0.2503, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.5036800523385222e-05, |
|
"loss": 0.2569, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.5022578098888519e-05, |
|
"loss": 0.2558, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 0.2726137340068817, |
|
"eval_runtime": 35.0016, |
|
"eval_samples_per_second": 142.85, |
|
"eval_steps_per_second": 1.143, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.5008355674391814e-05, |
|
"loss": 0.244, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.499413324989511e-05, |
|
"loss": 0.2551, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4979910825398407e-05, |
|
"loss": 0.2396, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4965688400901704e-05, |
|
"loss": 0.2579, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4951465976404999e-05, |
|
"loss": 0.2444, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4937243551908295e-05, |
|
"loss": 0.239, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4923021127411592e-05, |
|
"loss": 0.2381, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4908798702914885e-05, |
|
"loss": 0.2578, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.4894576278418182e-05, |
|
"loss": 0.2467, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.4880353853921479e-05, |
|
"loss": 0.238, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.4866131429424775e-05, |
|
"loss": 0.2441, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.485190900492807e-05, |
|
"loss": 0.2445, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.4837686580431367e-05, |
|
"loss": 0.2464, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.4823464155934664e-05, |
|
"loss": 0.2414, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.480924173143796e-05, |
|
"loss": 0.2462, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.4795019306941255e-05, |
|
"loss": 0.2411, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.4780796882444552e-05, |
|
"loss": 0.2378, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.4766574457947847e-05, |
|
"loss": 0.2298, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.4752352033451142e-05, |
|
"loss": 0.2382, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.4738129608954439e-05, |
|
"loss": 0.235, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.4723907184457735e-05, |
|
"loss": 0.2321, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.4709684759961032e-05, |
|
"loss": 0.229, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.4695462335464327e-05, |
|
"loss": 0.2411, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.4681239910967624e-05, |
|
"loss": 0.2269, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.466701748647092e-05, |
|
"loss": 0.2357, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.4652795061974217e-05, |
|
"loss": 0.2324, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.4638572637477514e-05, |
|
"loss": 0.2306, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.4624350212980809e-05, |
|
"loss": 0.2382, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.4610127788484104e-05, |
|
"loss": 0.2418, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.4595905363987399e-05, |
|
"loss": 0.2416, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.4581682939490695e-05, |
|
"loss": 0.2287, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.4567460514993992e-05, |
|
"loss": 0.2429, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.4553238090497289e-05, |
|
"loss": 0.2481, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.4539015666000584e-05, |
|
"loss": 0.2217, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.452479324150388e-05, |
|
"loss": 0.229, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.4510570817007177e-05, |
|
"loss": 0.2338, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.4496348392510474e-05, |
|
"loss": 0.2241, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.448212596801377e-05, |
|
"loss": 0.2144, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.4467903543517065e-05, |
|
"loss": 0.2331, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.445368111902036e-05, |
|
"loss": 0.2293, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.4439458694523655e-05, |
|
"loss": 0.2323, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.4425236270026952e-05, |
|
"loss": 0.218, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.4411013845530248e-05, |
|
"loss": 0.2286, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.4396791421033545e-05, |
|
"loss": 0.2325, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.438256899653684e-05, |
|
"loss": 0.2188, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.4368346572040137e-05, |
|
"loss": 0.2322, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.4354124147543433e-05, |
|
"loss": 0.2219, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.433990172304673e-05, |
|
"loss": 0.2218, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.4325679298550027e-05, |
|
"loss": 0.2253, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.431145687405332e-05, |
|
"loss": 0.2188, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 0.24339932203292847, |
|
"eval_runtime": 35.0062, |
|
"eval_samples_per_second": 142.832, |
|
"eval_steps_per_second": 1.143, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4297234449556617e-05, |
|
"loss": 0.2144, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4283012025059912e-05, |
|
"loss": 0.2282, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4268789600563208e-05, |
|
"loss": 0.2155, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4254567176066505e-05, |
|
"loss": 0.2153, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4240344751569802e-05, |
|
"loss": 0.225, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4226122327073097e-05, |
|
"loss": 0.2132, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4211899902576393e-05, |
|
"loss": 0.2241, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.419767747807969e-05, |
|
"loss": 0.2194, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4183455053582987e-05, |
|
"loss": 0.2102, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4169232629086283e-05, |
|
"loss": 0.2138, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4155010204589577e-05, |
|
"loss": 0.2193, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4140787780092873e-05, |
|
"loss": 0.2218, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4126565355596168e-05, |
|
"loss": 0.2193, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4112342931099465e-05, |
|
"loss": 0.2218, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4098120506602762e-05, |
|
"loss": 0.2172, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4083898082106058e-05, |
|
"loss": 0.2143, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4069675657609353e-05, |
|
"loss": 0.2181, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.405545323311265e-05, |
|
"loss": 0.2078, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4041230808615947e-05, |
|
"loss": 0.2102, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4027008384119243e-05, |
|
"loss": 0.2017, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.401278595962254e-05, |
|
"loss": 0.209, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3998563535125833e-05, |
|
"loss": 0.2065, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.398434111062913e-05, |
|
"loss": 0.2024, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3970118686132425e-05, |
|
"loss": 0.1976, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3955896261635721e-05, |
|
"loss": 0.2041, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3941673837139018e-05, |
|
"loss": 0.1998, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3927451412642315e-05, |
|
"loss": 0.2026, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.391322898814561e-05, |
|
"loss": 0.2031, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3899006563648906e-05, |
|
"loss": 0.2043, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3884784139152203e-05, |
|
"loss": 0.2098, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.38705617146555e-05, |
|
"loss": 0.206, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3856339290158793e-05, |
|
"loss": 0.1995, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.384211686566209e-05, |
|
"loss": 0.2029, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3827894441165386e-05, |
|
"loss": 0.2054, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3813672016668681e-05, |
|
"loss": 0.2027, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3799449592171978e-05, |
|
"loss": 0.196, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3785227167675275e-05, |
|
"loss": 0.1984, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3771004743178571e-05, |
|
"loss": 0.2033, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3756782318681866e-05, |
|
"loss": 0.2019, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3742559894185163e-05, |
|
"loss": 0.1965, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.372833746968846e-05, |
|
"loss": 0.1878, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3714115045191756e-05, |
|
"loss": 0.2072, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.369989262069505e-05, |
|
"loss": 0.204, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.3685670196198346e-05, |
|
"loss": 0.1969, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.3671447771701643e-05, |
|
"loss": 0.1908, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.3657225347204938e-05, |
|
"loss": 0.1937, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.3643002922708235e-05, |
|
"loss": 0.1987, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.3628780498211531e-05, |
|
"loss": 0.1921, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.3614558073714828e-05, |
|
"loss": 0.1967, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.3600335649218125e-05, |
|
"loss": 0.1963, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 0.21258682012557983, |
|
"eval_runtime": 34.9419, |
|
"eval_samples_per_second": 143.095, |
|
"eval_steps_per_second": 1.145, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.358611322472142e-05, |
|
"loss": 0.1905, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.3571890800224716e-05, |
|
"loss": 0.1997, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3557668375728013e-05, |
|
"loss": 0.192, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3543445951231306e-05, |
|
"loss": 0.1973, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3529223526734603e-05, |
|
"loss": 0.1883, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.35150011022379e-05, |
|
"loss": 0.2007, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3500778677741195e-05, |
|
"loss": 0.1926, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3486556253244491e-05, |
|
"loss": 0.194, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3472333828747788e-05, |
|
"loss": 0.1946, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3458111404251085e-05, |
|
"loss": 0.186, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3443888979754381e-05, |
|
"loss": 0.1922, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.3429666555257676e-05, |
|
"loss": 0.1912, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.3415444130760973e-05, |
|
"loss": 0.1919, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.3401221706264266e-05, |
|
"loss": 0.1975, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.3386999281767563e-05, |
|
"loss": 0.1902, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.337277685727086e-05, |
|
"loss": 0.1858, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.3358554432774156e-05, |
|
"loss": 0.1969, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.3344332008277451e-05, |
|
"loss": 0.1741, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.3330109583780748e-05, |
|
"loss": 0.1724, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.3315887159284044e-05, |
|
"loss": 0.1066, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.3301664734787341e-05, |
|
"loss": 0.1048, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.3287442310290638e-05, |
|
"loss": 0.1075, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.3273219885793933e-05, |
|
"loss": 0.1024, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.325899746129723e-05, |
|
"loss": 0.1054, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.3244775036800523e-05, |
|
"loss": 0.1056, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.323055261230382e-05, |
|
"loss": 0.1004, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.3216330187807116e-05, |
|
"loss": 0.1074, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.3202107763310413e-05, |
|
"loss": 0.1028, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.3187885338813708e-05, |
|
"loss": 0.1092, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.3173662914317004e-05, |
|
"loss": 0.1033, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.3159440489820301e-05, |
|
"loss": 0.1025, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.3145218065323598e-05, |
|
"loss": 0.1017, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.3130995640826894e-05, |
|
"loss": 0.1091, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.311677321633019e-05, |
|
"loss": 0.1054, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.3102550791833486e-05, |
|
"loss": 0.1033, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.308832836733678e-05, |
|
"loss": 0.1082, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.3074105942840076e-05, |
|
"loss": 0.1045, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.3059883518343373e-05, |
|
"loss": 0.1083, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.304566109384667e-05, |
|
"loss": 0.1067, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.3031438669349964e-05, |
|
"loss": 0.1047, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.3017216244853261e-05, |
|
"loss": 0.1038, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.3002993820356558e-05, |
|
"loss": 0.1016, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.2988771395859854e-05, |
|
"loss": 0.1045, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.2974548971363151e-05, |
|
"loss": 0.1081, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.2960326546866446e-05, |
|
"loss": 0.111, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.294610412236974e-05, |
|
"loss": 0.1044, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.2931881697873036e-05, |
|
"loss": 0.1084, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.2917659273376332e-05, |
|
"loss": 0.106, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.2903436848879629e-05, |
|
"loss": 0.1094, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.2889214424382926e-05, |
|
"loss": 0.1154, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 0.205108642578125, |
|
"eval_runtime": 34.2834, |
|
"eval_samples_per_second": 145.843, |
|
"eval_steps_per_second": 1.167, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.287499199988622e-05, |
|
"loss": 0.0996, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.2860769575389517e-05, |
|
"loss": 0.1065, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.2846547150892814e-05, |
|
"loss": 0.1057, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.283232472639611e-05, |
|
"loss": 0.0993, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.2818102301899407e-05, |
|
"loss": 0.1095, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.2803879877402702e-05, |
|
"loss": 0.1098, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.2789657452905997e-05, |
|
"loss": 0.1148, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.2775435028409292e-05, |
|
"loss": 0.1088, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.2761212603912589e-05, |
|
"loss": 0.0996, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.2746990179415886e-05, |
|
"loss": 0.1093, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.2732767754919182e-05, |
|
"loss": 0.103, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.2718545330422477e-05, |
|
"loss": 0.1029, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.2704322905925774e-05, |
|
"loss": 0.1012, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.269010048142907e-05, |
|
"loss": 0.1077, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.2675878056932367e-05, |
|
"loss": 0.1115, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.2661655632435664e-05, |
|
"loss": 0.0979, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.2647433207938959e-05, |
|
"loss": 0.1038, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.2633210783442254e-05, |
|
"loss": 0.1101, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.2618988358945549e-05, |
|
"loss": 0.1114, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.2604765934448846e-05, |
|
"loss": 0.1026, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.2590543509952142e-05, |
|
"loss": 0.0995, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.2576321085455439e-05, |
|
"loss": 0.1126, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.2562098660958734e-05, |
|
"loss": 0.1111, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.254787623646203e-05, |
|
"loss": 0.1033, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.2533653811965327e-05, |
|
"loss": 0.1048, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.2519431387468624e-05, |
|
"loss": 0.1, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.250520896297192e-05, |
|
"loss": 0.1056, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.2490986538475214e-05, |
|
"loss": 0.1104, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.247676411397851e-05, |
|
"loss": 0.1109, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.2462541689481806e-05, |
|
"loss": 0.1053, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.2448319264985102e-05, |
|
"loss": 0.0941, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.2434096840488399e-05, |
|
"loss": 0.1011, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.2419874415991696e-05, |
|
"loss": 0.1049, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.2405651991494992e-05, |
|
"loss": 0.106, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.2391429566998287e-05, |
|
"loss": 0.1047, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.2377207142501584e-05, |
|
"loss": 0.1015, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.236298471800488e-05, |
|
"loss": 0.1021, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.2348762293508177e-05, |
|
"loss": 0.1016, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.233453986901147e-05, |
|
"loss": 0.0967, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.2320317444514767e-05, |
|
"loss": 0.1061, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.2306095020018062e-05, |
|
"loss": 0.0994, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.2291872595521359e-05, |
|
"loss": 0.0998, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.2277650171024655e-05, |
|
"loss": 0.1068, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.2263427746527952e-05, |
|
"loss": 0.1015, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.2249205322031249e-05, |
|
"loss": 0.1051, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.2234982897534544e-05, |
|
"loss": 0.1101, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.222076047303784e-05, |
|
"loss": 0.1021, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.2206538048541137e-05, |
|
"loss": 0.1047, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.2192315624044434e-05, |
|
"loss": 0.0966, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.2178093199547727e-05, |
|
"loss": 0.1028, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 0.19255822896957397, |
|
"eval_runtime": 34.1404, |
|
"eval_samples_per_second": 146.454, |
|
"eval_steps_per_second": 1.172, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.2163870775051024e-05, |
|
"loss": 0.1049, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.2149648350554319e-05, |
|
"loss": 0.0977, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.2135425926057615e-05, |
|
"loss": 0.1011, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.2121203501560912e-05, |
|
"loss": 0.1003, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.2106981077064209e-05, |
|
"loss": 0.1109, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.2092758652567505e-05, |
|
"loss": 0.1085, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.20785362280708e-05, |
|
"loss": 0.1081, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.2064313803574097e-05, |
|
"loss": 0.1012, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.2050091379077394e-05, |
|
"loss": 0.1019, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.2035868954580687e-05, |
|
"loss": 0.1036, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.2021646530083984e-05, |
|
"loss": 0.1055, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.200742410558728e-05, |
|
"loss": 0.1105, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.1993201681090575e-05, |
|
"loss": 0.1006, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.1978979256593872e-05, |
|
"loss": 0.0995, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.1964756832097169e-05, |
|
"loss": 0.1044, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.1950534407600465e-05, |
|
"loss": 0.1021, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.1936311983103762e-05, |
|
"loss": 0.1049, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.1922089558607057e-05, |
|
"loss": 0.1102, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.1907867134110354e-05, |
|
"loss": 0.1017, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.189364470961365e-05, |
|
"loss": 0.0983, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.1879422285116943e-05, |
|
"loss": 0.1003, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.186519986062024e-05, |
|
"loss": 0.1, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.1850977436123537e-05, |
|
"loss": 0.109, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.1836755011626832e-05, |
|
"loss": 0.0958, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.1822532587130128e-05, |
|
"loss": 0.1057, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.1808310162633425e-05, |
|
"loss": 0.1067, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.1794087738136722e-05, |
|
"loss": 0.1108, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.1779865313640018e-05, |
|
"loss": 0.1089, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.1765642889143313e-05, |
|
"loss": 0.1035, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.175142046464661e-05, |
|
"loss": 0.1025, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.1737198040149907e-05, |
|
"loss": 0.1002, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.17229756156532e-05, |
|
"loss": 0.1018, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.1708753191156497e-05, |
|
"loss": 0.108, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.1694530766659793e-05, |
|
"loss": 0.0987, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.1680308342163088e-05, |
|
"loss": 0.1099, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.1666085917666385e-05, |
|
"loss": 0.1024, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.1651863493169682e-05, |
|
"loss": 0.0957, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.1637641068672978e-05, |
|
"loss": 0.1044, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.1623418644176275e-05, |
|
"loss": 0.1007, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.160919621967957e-05, |
|
"loss": 0.1042, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.1594973795182867e-05, |
|
"loss": 0.1059, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.158075137068616e-05, |
|
"loss": 0.1014, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.1566528946189457e-05, |
|
"loss": 0.1, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.1552306521692753e-05, |
|
"loss": 0.0947, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.153808409719605e-05, |
|
"loss": 0.1025, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.1523861672699345e-05, |
|
"loss": 0.1032, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.1509639248202642e-05, |
|
"loss": 0.1048, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.1495416823705938e-05, |
|
"loss": 0.0976, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.1481194399209235e-05, |
|
"loss": 0.1036, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.1466971974712532e-05, |
|
"loss": 0.1097, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 0.18599912524223328, |
|
"eval_runtime": 34.2806, |
|
"eval_samples_per_second": 145.855, |
|
"eval_steps_per_second": 1.167, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.1452749550215827e-05, |
|
"loss": 0.1038, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.1438527125719123e-05, |
|
"loss": 0.1013, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.1424304701222416e-05, |
|
"loss": 0.098, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.1410082276725713e-05, |
|
"loss": 0.1002, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.139585985222901e-05, |
|
"loss": 0.105, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.1381637427732307e-05, |
|
"loss": 0.1002, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.1367415003235601e-05, |
|
"loss": 0.0973, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.1353192578738898e-05, |
|
"loss": 0.1038, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.1338970154242195e-05, |
|
"loss": 0.0989, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.1324747729745491e-05, |
|
"loss": 0.1096, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.1310525305248788e-05, |
|
"loss": 0.0869, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.1296302880752083e-05, |
|
"loss": 0.1003, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.128208045625538e-05, |
|
"loss": 0.0945, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.1267858031758673e-05, |
|
"loss": 0.1004, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.125363560726197e-05, |
|
"loss": 0.0984, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.1239413182765266e-05, |
|
"loss": 0.098, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.1225190758268563e-05, |
|
"loss": 0.0982, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.121096833377186e-05, |
|
"loss": 0.0988, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.1196745909275155e-05, |
|
"loss": 0.0999, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.1182523484778451e-05, |
|
"loss": 0.1028, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.1168301060281748e-05, |
|
"loss": 0.0966, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.1154078635785045e-05, |
|
"loss": 0.0881, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.113985621128834e-05, |
|
"loss": 0.1008, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.1125633786791635e-05, |
|
"loss": 0.1029, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.111141136229493e-05, |
|
"loss": 0.0981, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.1097188937798226e-05, |
|
"loss": 0.093, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.1082966513301523e-05, |
|
"loss": 0.1009, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.106874408880482e-05, |
|
"loss": 0.0988, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.1054521664308116e-05, |
|
"loss": 0.0947, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.1040299239811411e-05, |
|
"loss": 0.1021, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.1026076815314708e-05, |
|
"loss": 0.0948, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.1011854390818005e-05, |
|
"loss": 0.0934, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.0997631966321301e-05, |
|
"loss": 0.0919, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.0983409541824596e-05, |
|
"loss": 0.0959, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.0969187117327891e-05, |
|
"loss": 0.0964, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.0954964692831186e-05, |
|
"loss": 0.1008, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.0940742268334483e-05, |
|
"loss": 0.1005, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.092651984383778e-05, |
|
"loss": 0.0891, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.0912297419341076e-05, |
|
"loss": 0.0962, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.0898074994844373e-05, |
|
"loss": 0.0891, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.0883852570347668e-05, |
|
"loss": 0.0946, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.0869630145850965e-05, |
|
"loss": 0.0977, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.0855407721354261e-05, |
|
"loss": 0.0999, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.0841185296857558e-05, |
|
"loss": 0.1032, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.0826962872360853e-05, |
|
"loss": 0.087, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.0812740447864148e-05, |
|
"loss": 0.0976, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.0798518023367443e-05, |
|
"loss": 0.1002, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.078429559887074e-05, |
|
"loss": 0.0991, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.0770073174374036e-05, |
|
"loss": 0.0928, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.0755850749877333e-05, |
|
"loss": 0.0966, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.074162832538063e-05, |
|
"loss": 0.0916, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.0727405900883924e-05, |
|
"loss": 0.0984, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.0713183476387221e-05, |
|
"loss": 0.1021, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.0698961051890518e-05, |
|
"loss": 0.092, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.0684738627393814e-05, |
|
"loss": 0.0997, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.0670516202897108e-05, |
|
"loss": 0.0862, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.0656293778400404e-05, |
|
"loss": 0.091, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.06420713539037e-05, |
|
"loss": 0.0945, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.0627848929406996e-05, |
|
"loss": 0.089, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.0613626504910293e-05, |
|
"loss": 0.0943, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.059940408041359e-05, |
|
"loss": 0.0919, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.0585181655916886e-05, |
|
"loss": 0.0947, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.0570959231420181e-05, |
|
"loss": 0.0915, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.0556736806923478e-05, |
|
"loss": 0.1016, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.0542514382426774e-05, |
|
"loss": 0.0953, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.0528291957930071e-05, |
|
"loss": 0.0953, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.0514069533433364e-05, |
|
"loss": 0.0943, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.0499847108936661e-05, |
|
"loss": 0.0983, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.0485624684439956e-05, |
|
"loss": 0.0929, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.0471402259943253e-05, |
|
"loss": 0.0983, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.045717983544655e-05, |
|
"loss": 0.0928, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.0442957410949846e-05, |
|
"loss": 0.0887, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.0428734986453143e-05, |
|
"loss": 0.0927, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.0414512561956438e-05, |
|
"loss": 0.0957, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.0400290137459734e-05, |
|
"loss": 0.0905, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.0386067712963031e-05, |
|
"loss": 0.0943, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.0371845288466328e-05, |
|
"loss": 0.093, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.035762286396962e-05, |
|
"loss": 0.0922, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.0343400439472917e-05, |
|
"loss": 0.095, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.0329178014976212e-05, |
|
"loss": 0.0904, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.0314955590479509e-05, |
|
"loss": 0.0981, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.0300733165982806e-05, |
|
"loss": 0.0973, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.0286510741486102e-05, |
|
"loss": 0.0891, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.0272288316989399e-05, |
|
"loss": 0.0942, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.0258065892492694e-05, |
|
"loss": 0.0869, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.024384346799599e-05, |
|
"loss": 0.1023, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.0229621043499287e-05, |
|
"loss": 0.1025, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.021539861900258e-05, |
|
"loss": 0.0878, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.0201176194505877e-05, |
|
"loss": 0.0921, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.0186953770009174e-05, |
|
"loss": 0.09, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.0172731345512469e-05, |
|
"loss": 0.0925, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.0158508921015766e-05, |
|
"loss": 0.0873, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.0144286496519062e-05, |
|
"loss": 0.089, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.0130064072022359e-05, |
|
"loss": 0.092, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.0115841647525656e-05, |
|
"loss": 0.0985, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.010161922302895e-05, |
|
"loss": 0.0895, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.0087396798532247e-05, |
|
"loss": 0.0845, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.0073174374035544e-05, |
|
"loss": 0.0905, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.0058951949538837e-05, |
|
"loss": 0.0901, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.0044729525042134e-05, |
|
"loss": 0.0955, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_loss": 0.16799671947956085, |
|
"eval_runtime": 34.201, |
|
"eval_samples_per_second": 146.194, |
|
"eval_steps_per_second": 1.17, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.003050710054543e-05, |
|
"loss": 0.0864, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.0016284676048727e-05, |
|
"loss": 0.0888, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.0002062251552022e-05, |
|
"loss": 0.0898, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 9.987839827055319e-06, |
|
"loss": 0.0887, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 9.973617402558616e-06, |
|
"loss": 0.0928, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 9.959394978061912e-06, |
|
"loss": 0.0897, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 9.945172553565207e-06, |
|
"loss": 0.0889, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 9.930950129068502e-06, |
|
"loss": 0.0922, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 9.916727704571799e-06, |
|
"loss": 0.0969, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 9.902505280075096e-06, |
|
"loss": 0.0905, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 9.888282855578392e-06, |
|
"loss": 0.0882, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 9.874060431081687e-06, |
|
"loss": 0.0953, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 9.859838006584984e-06, |
|
"loss": 0.0907, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 9.845615582088279e-06, |
|
"loss": 0.0932, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.831393157591576e-06, |
|
"loss": 0.0937, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.817170733094872e-06, |
|
"loss": 0.0932, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.802948308598169e-06, |
|
"loss": 0.0891, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.788725884101464e-06, |
|
"loss": 0.0924, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.774503459604759e-06, |
|
"loss": 0.0882, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 9.760281035108055e-06, |
|
"loss": 0.0901, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 9.746058610611352e-06, |
|
"loss": 0.0899, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 9.731836186114649e-06, |
|
"loss": 0.0897, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 9.717613761617944e-06, |
|
"loss": 0.0957, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 9.70339133712124e-06, |
|
"loss": 0.0909, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.689168912624535e-06, |
|
"loss": 0.0907, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.674946488127832e-06, |
|
"loss": 0.0873, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.660724063631129e-06, |
|
"loss": 0.093, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.646501639134424e-06, |
|
"loss": 0.0951, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.63227921463772e-06, |
|
"loss": 0.0905, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.618056790141015e-06, |
|
"loss": 0.0884, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.603834365644312e-06, |
|
"loss": 0.0898, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.589611941147609e-06, |
|
"loss": 0.0863, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.575389516650905e-06, |
|
"loss": 0.0922, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.5611670921542e-06, |
|
"loss": 0.0836, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.546944667657497e-06, |
|
"loss": 0.0892, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.532722243160792e-06, |
|
"loss": 0.0907, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.518499818664089e-06, |
|
"loss": 0.0852, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.504277394167385e-06, |
|
"loss": 0.0877, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.49005496967068e-06, |
|
"loss": 0.0912, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.475832545173977e-06, |
|
"loss": 0.0874, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.461610120677272e-06, |
|
"loss": 0.0844, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.447387696180569e-06, |
|
"loss": 0.084, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.433165271683865e-06, |
|
"loss": 0.0834, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.41894284718716e-06, |
|
"loss": 0.0897, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.404720422690457e-06, |
|
"loss": 0.0994, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.390497998193754e-06, |
|
"loss": 0.0936, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.376275573697049e-06, |
|
"loss": 0.0931, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.362053149200345e-06, |
|
"loss": 0.0835, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.347830724703642e-06, |
|
"loss": 0.0933, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.333608300206937e-06, |
|
"loss": 0.0936, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.319385875710234e-06, |
|
"loss": 0.0892, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.305163451213528e-06, |
|
"loss": 0.0857, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.290941026716825e-06, |
|
"loss": 0.0857, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.276718602220122e-06, |
|
"loss": 0.0806, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.262496177723417e-06, |
|
"loss": 0.0908, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.248273753226713e-06, |
|
"loss": 0.0869, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.23405132873001e-06, |
|
"loss": 0.0747, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.219828904233305e-06, |
|
"loss": 0.0844, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.205606479736602e-06, |
|
"loss": 0.0815, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.191384055239897e-06, |
|
"loss": 0.0819, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.177161630743193e-06, |
|
"loss": 0.0849, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.16293920624649e-06, |
|
"loss": 0.0864, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.148716781749785e-06, |
|
"loss": 0.0922, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.134494357253082e-06, |
|
"loss": 0.0853, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.120271932756378e-06, |
|
"loss": 0.0849, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.106049508259673e-06, |
|
"loss": 0.0857, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.09182708376297e-06, |
|
"loss": 0.0821, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.077604659266267e-06, |
|
"loss": 0.0887, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.063382234769562e-06, |
|
"loss": 0.0864, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.049159810272858e-06, |
|
"loss": 0.0858, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.034937385776153e-06, |
|
"loss": 0.0892, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.02071496127945e-06, |
|
"loss": 0.0804, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.006492536782747e-06, |
|
"loss": 0.0833, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.992270112286042e-06, |
|
"loss": 0.0843, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.978047687789338e-06, |
|
"loss": 0.0869, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.963825263292633e-06, |
|
"loss": 0.0907, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.94960283879593e-06, |
|
"loss": 0.0844, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.935380414299227e-06, |
|
"loss": 0.0795, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.921157989802523e-06, |
|
"loss": 0.0873, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.906935565305818e-06, |
|
"loss": 0.0829, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.892713140809115e-06, |
|
"loss": 0.0814, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.87849071631241e-06, |
|
"loss": 0.0844, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.864268291815707e-06, |
|
"loss": 0.0848, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.850045867319003e-06, |
|
"loss": 0.0911, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.835823442822298e-06, |
|
"loss": 0.0842, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.821601018325595e-06, |
|
"loss": 0.079, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.80737859382889e-06, |
|
"loss": 0.0835, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.793156169332186e-06, |
|
"loss": 0.0871, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.778933744835483e-06, |
|
"loss": 0.0809, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.76471132033878e-06, |
|
"loss": 0.0906, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.750488895842075e-06, |
|
"loss": 0.0836, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.73626647134537e-06, |
|
"loss": 0.0768, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.722044046848666e-06, |
|
"loss": 0.0844, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.707821622351963e-06, |
|
"loss": 0.0848, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.69359919785526e-06, |
|
"loss": 0.0862, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.679376773358555e-06, |
|
"loss": 0.0778, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.665154348861851e-06, |
|
"loss": 0.0813, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.650931924365146e-06, |
|
"loss": 0.0874, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.636709499868443e-06, |
|
"loss": 0.0772, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.62248707537174e-06, |
|
"loss": 0.0801, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_loss": 0.14860820770263672, |
|
"eval_runtime": 34.2128, |
|
"eval_samples_per_second": 146.144, |
|
"eval_steps_per_second": 1.169, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.608264650875036e-06, |
|
"loss": 0.087, |
|
"step": 80100 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.594042226378331e-06, |
|
"loss": 0.0758, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.579819801881626e-06, |
|
"loss": 0.0855, |
|
"step": 80300 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.565597377384923e-06, |
|
"loss": 0.0834, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.55137495288822e-06, |
|
"loss": 0.0846, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.537152528391516e-06, |
|
"loss": 0.079, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.522930103894811e-06, |
|
"loss": 0.0838, |
|
"step": 80700 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.508707679398108e-06, |
|
"loss": 0.0868, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.494485254901403e-06, |
|
"loss": 0.0923, |
|
"step": 80900 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.4802628304047e-06, |
|
"loss": 0.0851, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.466040405907996e-06, |
|
"loss": 0.0839, |
|
"step": 81100 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.451817981411293e-06, |
|
"loss": 0.0832, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.437595556914588e-06, |
|
"loss": 0.0794, |
|
"step": 81300 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.423373132417883e-06, |
|
"loss": 0.0772, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.40915070792118e-06, |
|
"loss": 0.0801, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.394928283424476e-06, |
|
"loss": 0.0806, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.380705858927773e-06, |
|
"loss": 0.0799, |
|
"step": 81700 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.366483434431068e-06, |
|
"loss": 0.0823, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.352261009934365e-06, |
|
"loss": 0.0781, |
|
"step": 81900 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.33803858543766e-06, |
|
"loss": 0.0872, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.323816160940956e-06, |
|
"loss": 0.0776, |
|
"step": 82100 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.309593736444253e-06, |
|
"loss": 0.0801, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.29537131194755e-06, |
|
"loss": 0.0869, |
|
"step": 82300 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.281148887450845e-06, |
|
"loss": 0.0837, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.26692646295414e-06, |
|
"loss": 0.0871, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.252704038457436e-06, |
|
"loss": 0.0787, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.238481613960733e-06, |
|
"loss": 0.0811, |
|
"step": 82700 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.22425918946403e-06, |
|
"loss": 0.0779, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.210036764967324e-06, |
|
"loss": 0.0781, |
|
"step": 82900 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.195814340470621e-06, |
|
"loss": 0.0797, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.181591915973916e-06, |
|
"loss": 0.0873, |
|
"step": 83100 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.167369491477213e-06, |
|
"loss": 0.0769, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.15314706698051e-06, |
|
"loss": 0.0859, |
|
"step": 83300 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.138924642483806e-06, |
|
"loss": 0.0745, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.124702217987101e-06, |
|
"loss": 0.0789, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.110479793490396e-06, |
|
"loss": 0.091, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 8.096257368993693e-06, |
|
"loss": 0.0758, |
|
"step": 83700 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 8.08203494449699e-06, |
|
"loss": 0.0815, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 8.067812520000286e-06, |
|
"loss": 0.0852, |
|
"step": 83900 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 8.053590095503581e-06, |
|
"loss": 0.0742, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 8.039367671006878e-06, |
|
"loss": 0.0806, |
|
"step": 84100 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 8.025145246510173e-06, |
|
"loss": 0.0836, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 8.01092282201347e-06, |
|
"loss": 0.0771, |
|
"step": 84300 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.996700397516766e-06, |
|
"loss": 0.0745, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.982477973020063e-06, |
|
"loss": 0.0795, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.968255548523358e-06, |
|
"loss": 0.0784, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.954033124026653e-06, |
|
"loss": 0.0762, |
|
"step": 84700 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.93981069952995e-06, |
|
"loss": 0.0822, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.925588275033246e-06, |
|
"loss": 0.0775, |
|
"step": 84900 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.911365850536543e-06, |
|
"loss": 0.0784, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.897143426039838e-06, |
|
"loss": 0.0823, |
|
"step": 85100 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.882921001543134e-06, |
|
"loss": 0.077, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.86869857704643e-06, |
|
"loss": 0.0737, |
|
"step": 85300 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.854476152549726e-06, |
|
"loss": 0.0769, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.840253728053023e-06, |
|
"loss": 0.0783, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.826031303556318e-06, |
|
"loss": 0.0817, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.811808879059614e-06, |
|
"loss": 0.0759, |
|
"step": 85700 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.79758645456291e-06, |
|
"loss": 0.0772, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.783364030066206e-06, |
|
"loss": 0.0792, |
|
"step": 85900 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.769141605569503e-06, |
|
"loss": 0.0738, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.7549191810728e-06, |
|
"loss": 0.0694, |
|
"step": 86100 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.740696756576094e-06, |
|
"loss": 0.0825, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.72647433207939e-06, |
|
"loss": 0.0809, |
|
"step": 86300 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.712251907582686e-06, |
|
"loss": 0.0751, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 7.698029483085982e-06, |
|
"loss": 0.0775, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 7.683807058589279e-06, |
|
"loss": 0.0771, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 7.669584634092574e-06, |
|
"loss": 0.0777, |
|
"step": 86700 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 7.65536220959587e-06, |
|
"loss": 0.0773, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 7.641139785099166e-06, |
|
"loss": 0.0749, |
|
"step": 86900 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.626917360602462e-06, |
|
"loss": 0.0773, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.612694936105759e-06, |
|
"loss": 0.0859, |
|
"step": 87100 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.598472511609054e-06, |
|
"loss": 0.0798, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.584250087112351e-06, |
|
"loss": 0.0755, |
|
"step": 87300 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.5700276626156465e-06, |
|
"loss": 0.0808, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.555805238118943e-06, |
|
"loss": 0.0725, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.541582813622239e-06, |
|
"loss": 0.0812, |
|
"step": 87600 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.527360389125536e-06, |
|
"loss": 0.0794, |
|
"step": 87700 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.513137964628831e-06, |
|
"loss": 0.0692, |
|
"step": 87800 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 7.4989155401321265e-06, |
|
"loss": 0.0773, |
|
"step": 87900 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 7.484693115635423e-06, |
|
"loss": 0.0695, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 7.470470691138719e-06, |
|
"loss": 0.0853, |
|
"step": 88100 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 7.456248266642016e-06, |
|
"loss": 0.0759, |
|
"step": 88200 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 7.442025842145311e-06, |
|
"loss": 0.0731, |
|
"step": 88300 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 7.427803417648607e-06, |
|
"loss": 0.0776, |
|
"step": 88400 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 7.413580993151903e-06, |
|
"loss": 0.0799, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 7.3993585686552e-06, |
|
"loss": 0.083, |
|
"step": 88600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 7.385136144158496e-06, |
|
"loss": 0.0833, |
|
"step": 88700 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 7.3709137196617906e-06, |
|
"loss": 0.072, |
|
"step": 88800 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.356691295165087e-06, |
|
"loss": 0.0755, |
|
"step": 88900 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.342468870668383e-06, |
|
"loss": 0.0788, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.32824644617168e-06, |
|
"loss": 0.0782, |
|
"step": 89100 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.3140240216749755e-06, |
|
"loss": 0.0697, |
|
"step": 89200 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 7.299801597178272e-06, |
|
"loss": 0.0764, |
|
"step": 89300 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 7.285579172681567e-06, |
|
"loss": 0.0711, |
|
"step": 89400 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 7.271356748184864e-06, |
|
"loss": 0.0691, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 7.25713432368816e-06, |
|
"loss": 0.0708, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 7.242911899191456e-06, |
|
"loss": 0.076, |
|
"step": 89700 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.228689474694752e-06, |
|
"loss": 0.0721, |
|
"step": 89800 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.214467050198047e-06, |
|
"loss": 0.0758, |
|
"step": 89900 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.200244625701344e-06, |
|
"loss": 0.0762, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 0.1308322250843048, |
|
"eval_runtime": 34.2018, |
|
"eval_samples_per_second": 146.191, |
|
"eval_steps_per_second": 1.17, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.18602220120464e-06, |
|
"loss": 0.0774, |
|
"step": 90100 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.171799776707936e-06, |
|
"loss": 0.0753, |
|
"step": 90200 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.157577352211232e-06, |
|
"loss": 0.0731, |
|
"step": 90300 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.143354927714528e-06, |
|
"loss": 0.0793, |
|
"step": 90400 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.129132503217824e-06, |
|
"loss": 0.0665, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.11491007872112e-06, |
|
"loss": 0.0732, |
|
"step": 90600 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.100687654224416e-06, |
|
"loss": 0.0768, |
|
"step": 90700 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 7.086465229727713e-06, |
|
"loss": 0.0716, |
|
"step": 90800 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 7.072242805231009e-06, |
|
"loss": 0.0715, |
|
"step": 90900 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 7.058020380734304e-06, |
|
"loss": 0.0748, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 7.0437979562376e-06, |
|
"loss": 0.0672, |
|
"step": 91100 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 7.029575531740896e-06, |
|
"loss": 0.0775, |
|
"step": 91200 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 7.015353107244193e-06, |
|
"loss": 0.0696, |
|
"step": 91300 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 7.001130682747489e-06, |
|
"loss": 0.0688, |
|
"step": 91400 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 6.9869082582507845e-06, |
|
"loss": 0.067, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 6.97268583375408e-06, |
|
"loss": 0.079, |
|
"step": 91600 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.958463409257377e-06, |
|
"loss": 0.0729, |
|
"step": 91700 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.944240984760673e-06, |
|
"loss": 0.0701, |
|
"step": 91800 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.9300185602639695e-06, |
|
"loss": 0.0699, |
|
"step": 91900 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.9157961357672645e-06, |
|
"loss": 0.0743, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.90157371127056e-06, |
|
"loss": 0.0657, |
|
"step": 92100 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 6.887351286773857e-06, |
|
"loss": 0.071, |
|
"step": 92200 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 6.873128862277153e-06, |
|
"loss": 0.0685, |
|
"step": 92300 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 6.8589064377804494e-06, |
|
"loss": 0.079, |
|
"step": 92400 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 6.844684013283745e-06, |
|
"loss": 0.0709, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 6.830461588787041e-06, |
|
"loss": 0.0693, |
|
"step": 92600 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 6.816239164290337e-06, |
|
"loss": 0.0703, |
|
"step": 92700 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 6.8020167397936336e-06, |
|
"loss": 0.0698, |
|
"step": 92800 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 6.787794315296929e-06, |
|
"loss": 0.0727, |
|
"step": 92900 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 6.773571890800226e-06, |
|
"loss": 0.0641, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 6.759349466303521e-06, |
|
"loss": 0.0765, |
|
"step": 93100 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 6.745127041806817e-06, |
|
"loss": 0.0653, |
|
"step": 93200 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 6.7309046173101135e-06, |
|
"loss": 0.0687, |
|
"step": 93300 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 6.716682192813409e-06, |
|
"loss": 0.0759, |
|
"step": 93400 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 6.702459768316706e-06, |
|
"loss": 0.0688, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 6.688237343820001e-06, |
|
"loss": 0.0673, |
|
"step": 93600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 6.674014919323298e-06, |
|
"loss": 0.0666, |
|
"step": 93700 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 6.6597924948265935e-06, |
|
"loss": 0.0588, |
|
"step": 93800 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 6.64557007032989e-06, |
|
"loss": 0.0303, |
|
"step": 93900 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 6.631347645833186e-06, |
|
"loss": 0.0387, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 6.617125221336483e-06, |
|
"loss": 0.0344, |
|
"step": 94100 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 6.602902796839778e-06, |
|
"loss": 0.0354, |
|
"step": 94200 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 6.588680372343073e-06, |
|
"loss": 0.0306, |
|
"step": 94300 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 6.57445794784637e-06, |
|
"loss": 0.0331, |
|
"step": 94400 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 6.560235523349666e-06, |
|
"loss": 0.0325, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 6.5460130988529626e-06, |
|
"loss": 0.0319, |
|
"step": 94600 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 6.5317906743562575e-06, |
|
"loss": 0.0337, |
|
"step": 94700 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 6.517568249859554e-06, |
|
"loss": 0.0338, |
|
"step": 94800 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 6.50334582536285e-06, |
|
"loss": 0.0362, |
|
"step": 94900 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 6.489123400866147e-06, |
|
"loss": 0.0363, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 6.4749009763694425e-06, |
|
"loss": 0.0354, |
|
"step": 95100 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 6.4606785518727375e-06, |
|
"loss": 0.0275, |
|
"step": 95200 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 6.446456127376034e-06, |
|
"loss": 0.0328, |
|
"step": 95300 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 6.43223370287933e-06, |
|
"loss": 0.0353, |
|
"step": 95400 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 6.418011278382627e-06, |
|
"loss": 0.0342, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 6.4037888538859225e-06, |
|
"loss": 0.0341, |
|
"step": 95600 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 6.389566429389219e-06, |
|
"loss": 0.0317, |
|
"step": 95700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 6.375344004892514e-06, |
|
"loss": 0.033, |
|
"step": 95800 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 6.361121580395811e-06, |
|
"loss": 0.0334, |
|
"step": 95900 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 6.346899155899107e-06, |
|
"loss": 0.0361, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 6.332676731402403e-06, |
|
"loss": 0.0319, |
|
"step": 96100 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 6.318454306905699e-06, |
|
"loss": 0.0347, |
|
"step": 96200 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 6.304231882408994e-06, |
|
"loss": 0.0333, |
|
"step": 96300 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 6.290009457912291e-06, |
|
"loss": 0.037, |
|
"step": 96400 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 6.2757870334155865e-06, |
|
"loss": 0.0343, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 6.261564608918883e-06, |
|
"loss": 0.0334, |
|
"step": 96600 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 6.247342184422179e-06, |
|
"loss": 0.0312, |
|
"step": 96700 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 6.233119759925475e-06, |
|
"loss": 0.0315, |
|
"step": 96800 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 6.218897335428771e-06, |
|
"loss": 0.0268, |
|
"step": 96900 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 6.204674910932067e-06, |
|
"loss": 0.0263, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 6.190452486435363e-06, |
|
"loss": 0.0303, |
|
"step": 97100 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 6.17623006193866e-06, |
|
"loss": 0.0345, |
|
"step": 97200 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 6.162007637441956e-06, |
|
"loss": 0.0358, |
|
"step": 97300 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 6.147785212945251e-06, |
|
"loss": 0.0369, |
|
"step": 97400 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 6.133562788448547e-06, |
|
"loss": 0.0392, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 6.119340363951843e-06, |
|
"loss": 0.0319, |
|
"step": 97600 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 6.10511793945514e-06, |
|
"loss": 0.0372, |
|
"step": 97700 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 6.090895514958436e-06, |
|
"loss": 0.0317, |
|
"step": 97800 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 6.076673090461731e-06, |
|
"loss": 0.0321, |
|
"step": 97900 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 6.062450665965027e-06, |
|
"loss": 0.0369, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 6.048228241468324e-06, |
|
"loss": 0.0332, |
|
"step": 98100 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 6.03400581697162e-06, |
|
"loss": 0.0306, |
|
"step": 98200 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 6.019783392474916e-06, |
|
"loss": 0.0337, |
|
"step": 98300 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 6.005560967978211e-06, |
|
"loss": 0.0332, |
|
"step": 98400 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 5.991338543481507e-06, |
|
"loss": 0.0337, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 5.977116118984804e-06, |
|
"loss": 0.0296, |
|
"step": 98600 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 5.9628936944881e-06, |
|
"loss": 0.0341, |
|
"step": 98700 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 5.948671269991396e-06, |
|
"loss": 0.0287, |
|
"step": 98800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 5.934448845494692e-06, |
|
"loss": 0.0313, |
|
"step": 98900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 5.920226420997988e-06, |
|
"loss": 0.0335, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 5.906003996501284e-06, |
|
"loss": 0.0327, |
|
"step": 99100 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 5.8917815720045805e-06, |
|
"loss": 0.0363, |
|
"step": 99200 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 5.877559147507876e-06, |
|
"loss": 0.0339, |
|
"step": 99300 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 5.863336723011173e-06, |
|
"loss": 0.0282, |
|
"step": 99400 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 5.849114298514468e-06, |
|
"loss": 0.0339, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 5.834891874017764e-06, |
|
"loss": 0.0297, |
|
"step": 99600 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 5.82066944952106e-06, |
|
"loss": 0.0287, |
|
"step": 99700 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 5.806447025024356e-06, |
|
"loss": 0.0321, |
|
"step": 99800 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 5.792224600527653e-06, |
|
"loss": 0.0304, |
|
"step": 99900 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 5.778002176030948e-06, |
|
"loss": 0.0351, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 0.13596704602241516, |
|
"eval_runtime": 34.4302, |
|
"eval_samples_per_second": 145.221, |
|
"eval_steps_per_second": 1.162, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 5.7637797515342445e-06, |
|
"loss": 0.0314, |
|
"step": 100100 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 5.74955732703754e-06, |
|
"loss": 0.033, |
|
"step": 100200 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 5.735334902540837e-06, |
|
"loss": 0.0358, |
|
"step": 100300 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 5.721112478044133e-06, |
|
"loss": 0.0345, |
|
"step": 100400 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 5.7068900535474295e-06, |
|
"loss": 0.0353, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 5.6926676290507245e-06, |
|
"loss": 0.0347, |
|
"step": 100600 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 5.67844520455402e-06, |
|
"loss": 0.0301, |
|
"step": 100700 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 5.664222780057317e-06, |
|
"loss": 0.0337, |
|
"step": 100800 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 5.650000355560613e-06, |
|
"loss": 0.0317, |
|
"step": 100900 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 5.6357779310639095e-06, |
|
"loss": 0.0309, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 5.6215555065672044e-06, |
|
"loss": 0.0307, |
|
"step": 101100 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 5.607333082070501e-06, |
|
"loss": 0.0321, |
|
"step": 101200 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 5.593110657573797e-06, |
|
"loss": 0.0326, |
|
"step": 101300 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 5.578888233077094e-06, |
|
"loss": 0.0286, |
|
"step": 101400 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 5.564665808580389e-06, |
|
"loss": 0.0345, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 5.550443384083684e-06, |
|
"loss": 0.0285, |
|
"step": 101600 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 5.536220959586981e-06, |
|
"loss": 0.0332, |
|
"step": 101700 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 5.521998535090277e-06, |
|
"loss": 0.0293, |
|
"step": 101800 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 5.5077761105935736e-06, |
|
"loss": 0.0347, |
|
"step": 101900 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 5.493553686096869e-06, |
|
"loss": 0.0353, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 5.479331261600166e-06, |
|
"loss": 0.0297, |
|
"step": 102100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 5.465108837103461e-06, |
|
"loss": 0.0377, |
|
"step": 102200 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 5.450886412606758e-06, |
|
"loss": 0.0387, |
|
"step": 102300 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 5.4366639881100535e-06, |
|
"loss": 0.0255, |
|
"step": 102400 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 5.42244156361335e-06, |
|
"loss": 0.027, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 5.408219139116646e-06, |
|
"loss": 0.0325, |
|
"step": 102600 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 5.393996714619941e-06, |
|
"loss": 0.0302, |
|
"step": 102700 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 5.379774290123238e-06, |
|
"loss": 0.0358, |
|
"step": 102800 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 5.3655518656265335e-06, |
|
"loss": 0.031, |
|
"step": 102900 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 5.35132944112983e-06, |
|
"loss": 0.0322, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 5.337107016633126e-06, |
|
"loss": 0.0321, |
|
"step": 103100 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 5.322884592136422e-06, |
|
"loss": 0.0327, |
|
"step": 103200 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 5.308662167639718e-06, |
|
"loss": 0.0348, |
|
"step": 103300 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 5.294439743143014e-06, |
|
"loss": 0.0339, |
|
"step": 103400 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 5.28021731864631e-06, |
|
"loss": 0.0319, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 5.265994894149607e-06, |
|
"loss": 0.0315, |
|
"step": 103600 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 5.2517724696529026e-06, |
|
"loss": 0.0319, |
|
"step": 103700 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 5.2375500451561975e-06, |
|
"loss": 0.0305, |
|
"step": 103800 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 5.223327620659494e-06, |
|
"loss": 0.0294, |
|
"step": 103900 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 5.20910519616279e-06, |
|
"loss": 0.033, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 5.194882771666087e-06, |
|
"loss": 0.0303, |
|
"step": 104100 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 5.1806603471693825e-06, |
|
"loss": 0.0353, |
|
"step": 104200 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 5.166437922672678e-06, |
|
"loss": 0.0307, |
|
"step": 104300 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 5.152215498175974e-06, |
|
"loss": 0.0321, |
|
"step": 104400 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 5.137993073679271e-06, |
|
"loss": 0.0328, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 5.123770649182567e-06, |
|
"loss": 0.0357, |
|
"step": 104600 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 5.109548224685863e-06, |
|
"loss": 0.0302, |
|
"step": 104700 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 5.095325800189158e-06, |
|
"loss": 0.0349, |
|
"step": 104800 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 5.081103375692454e-06, |
|
"loss": 0.0295, |
|
"step": 104900 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 5.066880951195751e-06, |
|
"loss": 0.0276, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 5.052658526699047e-06, |
|
"loss": 0.0328, |
|
"step": 105100 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 5.038436102202343e-06, |
|
"loss": 0.0372, |
|
"step": 105200 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 5.024213677705639e-06, |
|
"loss": 0.035, |
|
"step": 105300 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 5.009991253208935e-06, |
|
"loss": 0.0294, |
|
"step": 105400 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.995768828712231e-06, |
|
"loss": 0.0345, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.981546404215527e-06, |
|
"loss": 0.0373, |
|
"step": 105600 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.967323979718823e-06, |
|
"loss": 0.0305, |
|
"step": 105700 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.953101555222119e-06, |
|
"loss": 0.0323, |
|
"step": 105800 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.938879130725415e-06, |
|
"loss": 0.0275, |
|
"step": 105900 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.924656706228711e-06, |
|
"loss": 0.0323, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.910434281732007e-06, |
|
"loss": 0.0342, |
|
"step": 106100 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.896211857235303e-06, |
|
"loss": 0.0315, |
|
"step": 106200 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.881989432738599e-06, |
|
"loss": 0.0286, |
|
"step": 106300 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.867767008241896e-06, |
|
"loss": 0.0296, |
|
"step": 106400 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.8535445837451915e-06, |
|
"loss": 0.0416, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.839322159248487e-06, |
|
"loss": 0.0329, |
|
"step": 106600 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.825099734751784e-06, |
|
"loss": 0.0301, |
|
"step": 106700 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.810877310255079e-06, |
|
"loss": 0.0327, |
|
"step": 106800 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.796654885758376e-06, |
|
"loss": 0.032, |
|
"step": 106900 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.782432461261671e-06, |
|
"loss": 0.0292, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.768210036764967e-06, |
|
"loss": 0.0327, |
|
"step": 107100 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 4.753987612268264e-06, |
|
"loss": 0.0288, |
|
"step": 107200 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 4.73976518777156e-06, |
|
"loss": 0.0323, |
|
"step": 107300 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 4.7255427632748555e-06, |
|
"loss": 0.0328, |
|
"step": 107400 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 4.711320338778152e-06, |
|
"loss": 0.032, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.697097914281448e-06, |
|
"loss": 0.0321, |
|
"step": 107600 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.682875489784744e-06, |
|
"loss": 0.0355, |
|
"step": 107700 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.6686530652880405e-06, |
|
"loss": 0.0314, |
|
"step": 107800 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.6544306407913355e-06, |
|
"loss": 0.0329, |
|
"step": 107900 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.640208216294632e-06, |
|
"loss": 0.0322, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.625985791797928e-06, |
|
"loss": 0.0306, |
|
"step": 108100 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.611763367301224e-06, |
|
"loss": 0.0362, |
|
"step": 108200 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.5975409428045205e-06, |
|
"loss": 0.0304, |
|
"step": 108300 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.583318518307816e-06, |
|
"loss": 0.028, |
|
"step": 108400 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.569096093811112e-06, |
|
"loss": 0.0309, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.554873669314409e-06, |
|
"loss": 0.0294, |
|
"step": 108600 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.540651244817705e-06, |
|
"loss": 0.0337, |
|
"step": 108700 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.526428820321e-06, |
|
"loss": 0.0268, |
|
"step": 108800 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.512206395824297e-06, |
|
"loss": 0.0291, |
|
"step": 108900 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.497983971327592e-06, |
|
"loss": 0.0335, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.483761546830889e-06, |
|
"loss": 0.0321, |
|
"step": 109100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.4695391223341845e-06, |
|
"loss": 0.0277, |
|
"step": 109200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.45531669783748e-06, |
|
"loss": 0.0316, |
|
"step": 109300 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.441094273340777e-06, |
|
"loss": 0.033, |
|
"step": 109400 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.426871848844073e-06, |
|
"loss": 0.0337, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.412649424347369e-06, |
|
"loss": 0.0315, |
|
"step": 109600 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.398426999850665e-06, |
|
"loss": 0.0301, |
|
"step": 109700 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.384204575353961e-06, |
|
"loss": 0.0265, |
|
"step": 109800 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.369982150857257e-06, |
|
"loss": 0.0282, |
|
"step": 109900 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 4.355759726360553e-06, |
|
"loss": 0.0347, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_loss": 0.13324007391929626, |
|
"eval_runtime": 34.3057, |
|
"eval_samples_per_second": 145.748, |
|
"eval_steps_per_second": 1.166, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 4.341537301863849e-06, |
|
"loss": 0.032, |
|
"step": 110100 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 4.327314877367145e-06, |
|
"loss": 0.0281, |
|
"step": 110200 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 4.313092452870441e-06, |
|
"loss": 0.0327, |
|
"step": 110300 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 4.298870028373737e-06, |
|
"loss": 0.0325, |
|
"step": 110400 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 4.284647603877034e-06, |
|
"loss": 0.0283, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 4.270425179380329e-06, |
|
"loss": 0.0346, |
|
"step": 110600 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 4.256202754883625e-06, |
|
"loss": 0.036, |
|
"step": 110700 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 4.241980330386921e-06, |
|
"loss": 0.0275, |
|
"step": 110800 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.227757905890218e-06, |
|
"loss": 0.0329, |
|
"step": 110900 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.2135354813935135e-06, |
|
"loss": 0.0332, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.199313056896809e-06, |
|
"loss": 0.0315, |
|
"step": 111100 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.185090632400105e-06, |
|
"loss": 0.0302, |
|
"step": 111200 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.170868207903402e-06, |
|
"loss": 0.0308, |
|
"step": 111300 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 4.156645783406698e-06, |
|
"loss": 0.0311, |
|
"step": 111400 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 4.1424233589099935e-06, |
|
"loss": 0.0295, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 4.128200934413289e-06, |
|
"loss": 0.037, |
|
"step": 111600 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 4.113978509916586e-06, |
|
"loss": 0.0348, |
|
"step": 111700 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.099756085419882e-06, |
|
"loss": 0.0331, |
|
"step": 111800 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.085533660923178e-06, |
|
"loss": 0.0295, |
|
"step": 111900 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.071311236426474e-06, |
|
"loss": 0.0311, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.05708881192977e-06, |
|
"loss": 0.0316, |
|
"step": 112100 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.042866387433066e-06, |
|
"loss": 0.0292, |
|
"step": 112200 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.028643962936362e-06, |
|
"loss": 0.0261, |
|
"step": 112300 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.014421538439658e-06, |
|
"loss": 0.03, |
|
"step": 112400 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.000199113942954e-06, |
|
"loss": 0.0324, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.98597668944625e-06, |
|
"loss": 0.0288, |
|
"step": 112600 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.971754264949546e-06, |
|
"loss": 0.0274, |
|
"step": 112700 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.9575318404528426e-06, |
|
"loss": 0.0288, |
|
"step": 112800 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.943309415956138e-06, |
|
"loss": 0.0308, |
|
"step": 112900 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.929086991459434e-06, |
|
"loss": 0.0284, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.914864566962731e-06, |
|
"loss": 0.0245, |
|
"step": 113100 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.900642142466026e-06, |
|
"loss": 0.026, |
|
"step": 113200 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.8864197179693225e-06, |
|
"loss": 0.0326, |
|
"step": 113300 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.872197293472618e-06, |
|
"loss": 0.0321, |
|
"step": 113400 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.857974868975914e-06, |
|
"loss": 0.0351, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.843752444479211e-06, |
|
"loss": 0.0362, |
|
"step": 113600 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.829530019982507e-06, |
|
"loss": 0.0338, |
|
"step": 113700 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.8153075954858025e-06, |
|
"loss": 0.032, |
|
"step": 113800 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.8010851709890987e-06, |
|
"loss": 0.031, |
|
"step": 113900 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.7868627464923945e-06, |
|
"loss": 0.0326, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.7726403219956908e-06, |
|
"loss": 0.0355, |
|
"step": 114100 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.758417897498987e-06, |
|
"loss": 0.0285, |
|
"step": 114200 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.744195473002283e-06, |
|
"loss": 0.0346, |
|
"step": 114300 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.729973048505579e-06, |
|
"loss": 0.0369, |
|
"step": 114400 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.7157506240088753e-06, |
|
"loss": 0.0324, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.701528199512171e-06, |
|
"loss": 0.0317, |
|
"step": 114600 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.6873057750154674e-06, |
|
"loss": 0.0289, |
|
"step": 114700 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.6730833505187628e-06, |
|
"loss": 0.0312, |
|
"step": 114800 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.658860926022059e-06, |
|
"loss": 0.0322, |
|
"step": 114900 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.6446385015253557e-06, |
|
"loss": 0.0266, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.630416077028651e-06, |
|
"loss": 0.0289, |
|
"step": 115100 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.6161936525319473e-06, |
|
"loss": 0.036, |
|
"step": 115200 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.6019712280352436e-06, |
|
"loss": 0.028, |
|
"step": 115300 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.5877488035385394e-06, |
|
"loss": 0.029, |
|
"step": 115400 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.5735263790418356e-06, |
|
"loss": 0.0314, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.5593039545451315e-06, |
|
"loss": 0.0227, |
|
"step": 115600 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.5450815300484277e-06, |
|
"loss": 0.0317, |
|
"step": 115700 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.530859105551724e-06, |
|
"loss": 0.0324, |
|
"step": 115800 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.5166366810550193e-06, |
|
"loss": 0.0309, |
|
"step": 115900 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.5024142565583156e-06, |
|
"loss": 0.0287, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.4881918320616123e-06, |
|
"loss": 0.027, |
|
"step": 116100 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.4739694075649077e-06, |
|
"loss": 0.0331, |
|
"step": 116200 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.459746983068204e-06, |
|
"loss": 0.0307, |
|
"step": 116300 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.4455245585714997e-06, |
|
"loss": 0.0286, |
|
"step": 116400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.431302134074796e-06, |
|
"loss": 0.0332, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.417079709578092e-06, |
|
"loss": 0.0281, |
|
"step": 116600 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.402857285081388e-06, |
|
"loss": 0.0323, |
|
"step": 116700 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.3886348605846843e-06, |
|
"loss": 0.0303, |
|
"step": 116800 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.3744124360879805e-06, |
|
"loss": 0.0336, |
|
"step": 116900 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.360190011591276e-06, |
|
"loss": 0.0333, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.3459675870945726e-06, |
|
"loss": 0.0283, |
|
"step": 117100 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.331745162597868e-06, |
|
"loss": 0.0312, |
|
"step": 117200 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.3175227381011642e-06, |
|
"loss": 0.0319, |
|
"step": 117300 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.3033003136044605e-06, |
|
"loss": 0.0281, |
|
"step": 117400 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.2890778891077563e-06, |
|
"loss": 0.0331, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.2748554646110525e-06, |
|
"loss": 0.0348, |
|
"step": 117600 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.2606330401143488e-06, |
|
"loss": 0.0245, |
|
"step": 117700 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.2464106156176446e-06, |
|
"loss": 0.0279, |
|
"step": 117800 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.232188191120941e-06, |
|
"loss": 0.0285, |
|
"step": 117900 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.2179657666242362e-06, |
|
"loss": 0.0288, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.2037433421275325e-06, |
|
"loss": 0.0293, |
|
"step": 118100 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.189520917630829e-06, |
|
"loss": 0.0296, |
|
"step": 118200 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.1752984931341245e-06, |
|
"loss": 0.0303, |
|
"step": 118300 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.1610760686374208e-06, |
|
"loss": 0.0309, |
|
"step": 118400 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.146853644140717e-06, |
|
"loss": 0.0348, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.132631219644013e-06, |
|
"loss": 0.0299, |
|
"step": 118600 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.118408795147309e-06, |
|
"loss": 0.0318, |
|
"step": 118700 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.104186370650605e-06, |
|
"loss": 0.0266, |
|
"step": 118800 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.089963946153901e-06, |
|
"loss": 0.0294, |
|
"step": 118900 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.0757415216571974e-06, |
|
"loss": 0.0281, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.061519097160493e-06, |
|
"loss": 0.0251, |
|
"step": 119100 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.0472966726637895e-06, |
|
"loss": 0.0288, |
|
"step": 119200 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.0330742481670857e-06, |
|
"loss": 0.0313, |
|
"step": 119300 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.018851823670381e-06, |
|
"loss": 0.0295, |
|
"step": 119400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.0046293991736774e-06, |
|
"loss": 0.0334, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.990406974676973e-06, |
|
"loss": 0.0243, |
|
"step": 119600 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.9761845501802694e-06, |
|
"loss": 0.0335, |
|
"step": 119700 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.9619621256835657e-06, |
|
"loss": 0.0327, |
|
"step": 119800 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.9477397011868615e-06, |
|
"loss": 0.0304, |
|
"step": 119900 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.9335172766901577e-06, |
|
"loss": 0.0302, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 0.13005784153938293, |
|
"eval_runtime": 34.1867, |
|
"eval_samples_per_second": 146.256, |
|
"eval_steps_per_second": 1.17, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.919294852193454e-06, |
|
"loss": 0.0274, |
|
"step": 120100 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.9050724276967494e-06, |
|
"loss": 0.0289, |
|
"step": 120200 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.890850003200046e-06, |
|
"loss": 0.0267, |
|
"step": 120300 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.8766275787033414e-06, |
|
"loss": 0.0307, |
|
"step": 120400 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.8624051542066377e-06, |
|
"loss": 0.0268, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.848182729709934e-06, |
|
"loss": 0.0314, |
|
"step": 120600 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.8339603052132297e-06, |
|
"loss": 0.0306, |
|
"step": 120700 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.819737880716526e-06, |
|
"loss": 0.03, |
|
"step": 120800 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.8055154562198222e-06, |
|
"loss": 0.0276, |
|
"step": 120900 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.791293031723118e-06, |
|
"loss": 0.0315, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.7770706072264143e-06, |
|
"loss": 0.0345, |
|
"step": 121100 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.7628481827297097e-06, |
|
"loss": 0.03, |
|
"step": 121200 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.7486257582330064e-06, |
|
"loss": 0.0265, |
|
"step": 121300 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.7344033337363026e-06, |
|
"loss": 0.0317, |
|
"step": 121400 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.720180909239598e-06, |
|
"loss": 0.0362, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.7059584847428942e-06, |
|
"loss": 0.0293, |
|
"step": 121600 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.6917360602461905e-06, |
|
"loss": 0.0262, |
|
"step": 121700 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.6775136357494863e-06, |
|
"loss": 0.0306, |
|
"step": 121800 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.6632912112527826e-06, |
|
"loss": 0.0288, |
|
"step": 121900 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.6490687867560784e-06, |
|
"loss": 0.0309, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.6348463622593746e-06, |
|
"loss": 0.0259, |
|
"step": 122100 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.620623937762671e-06, |
|
"loss": 0.0288, |
|
"step": 122200 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.6064015132659663e-06, |
|
"loss": 0.0359, |
|
"step": 122300 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.592179088769263e-06, |
|
"loss": 0.0277, |
|
"step": 122400 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.577956664272559e-06, |
|
"loss": 0.0314, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.5637342397758546e-06, |
|
"loss": 0.0277, |
|
"step": 122600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.549511815279151e-06, |
|
"loss": 0.0348, |
|
"step": 122700 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.5352893907824466e-06, |
|
"loss": 0.0295, |
|
"step": 122800 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.521066966285743e-06, |
|
"loss": 0.0325, |
|
"step": 122900 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.506844541789039e-06, |
|
"loss": 0.032, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.492622117292335e-06, |
|
"loss": 0.028, |
|
"step": 123100 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.478399692795631e-06, |
|
"loss": 0.0292, |
|
"step": 123200 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.464177268298927e-06, |
|
"loss": 0.0348, |
|
"step": 123300 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.4499548438022232e-06, |
|
"loss": 0.0271, |
|
"step": 123400 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.4357324193055195e-06, |
|
"loss": 0.0285, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.4215099948088153e-06, |
|
"loss": 0.0278, |
|
"step": 123600 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.407287570312111e-06, |
|
"loss": 0.0298, |
|
"step": 123700 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.3930651458154074e-06, |
|
"loss": 0.0298, |
|
"step": 123800 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.3788427213187036e-06, |
|
"loss": 0.0309, |
|
"step": 123900 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.3646202968219994e-06, |
|
"loss": 0.0299, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.3503978723252953e-06, |
|
"loss": 0.0266, |
|
"step": 124100 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.3361754478285915e-06, |
|
"loss": 0.0249, |
|
"step": 124200 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.3219530233318877e-06, |
|
"loss": 0.0286, |
|
"step": 124300 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.3077305988351836e-06, |
|
"loss": 0.0262, |
|
"step": 124400 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.29350817433848e-06, |
|
"loss": 0.0276, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.2792857498417756e-06, |
|
"loss": 0.028, |
|
"step": 124600 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.265063325345072e-06, |
|
"loss": 0.0287, |
|
"step": 124700 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.2508409008483677e-06, |
|
"loss": 0.0313, |
|
"step": 124800 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.236618476351664e-06, |
|
"loss": 0.0281, |
|
"step": 124900 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.2223960518549598e-06, |
|
"loss": 0.0229, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.208173627358256e-06, |
|
"loss": 0.027, |
|
"step": 125100 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.193951202861552e-06, |
|
"loss": 0.0279, |
|
"step": 125200 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.179728778364848e-06, |
|
"loss": 0.0298, |
|
"step": 125300 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.165506353868144e-06, |
|
"loss": 0.0295, |
|
"step": 125400 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.15128392937144e-06, |
|
"loss": 0.0223, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.1370615048747364e-06, |
|
"loss": 0.0298, |
|
"step": 125600 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.122839080378032e-06, |
|
"loss": 0.0322, |
|
"step": 125700 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.108616655881328e-06, |
|
"loss": 0.0282, |
|
"step": 125800 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.0943942313846243e-06, |
|
"loss": 0.0296, |
|
"step": 125900 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.0801718068879205e-06, |
|
"loss": 0.0258, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.0659493823912163e-06, |
|
"loss": 0.0277, |
|
"step": 126100 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.051726957894512e-06, |
|
"loss": 0.0285, |
|
"step": 126200 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.0375045333978084e-06, |
|
"loss": 0.0314, |
|
"step": 126300 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.0232821089011046e-06, |
|
"loss": 0.0312, |
|
"step": 126400 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.0090596844044005e-06, |
|
"loss": 0.0285, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.9948372599076967e-06, |
|
"loss": 0.0291, |
|
"step": 126600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.980614835410993e-06, |
|
"loss": 0.0253, |
|
"step": 126700 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.9663924109142888e-06, |
|
"loss": 0.0242, |
|
"step": 126800 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.9521699864175846e-06, |
|
"loss": 0.0316, |
|
"step": 126900 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.937947561920881e-06, |
|
"loss": 0.0305, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.923725137424177e-06, |
|
"loss": 0.0244, |
|
"step": 127100 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.909502712927473e-06, |
|
"loss": 0.0263, |
|
"step": 127200 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.895280288430769e-06, |
|
"loss": 0.0281, |
|
"step": 127300 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.881057863934065e-06, |
|
"loss": 0.027, |
|
"step": 127400 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.8668354394373612e-06, |
|
"loss": 0.0265, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.852613014940657e-06, |
|
"loss": 0.0302, |
|
"step": 127600 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.838390590443953e-06, |
|
"loss": 0.0273, |
|
"step": 127700 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.824168165947249e-06, |
|
"loss": 0.0243, |
|
"step": 127800 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.8099457414505453e-06, |
|
"loss": 0.0246, |
|
"step": 127900 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.7957233169538414e-06, |
|
"loss": 0.0246, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.7815008924571372e-06, |
|
"loss": 0.0307, |
|
"step": 128100 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.7672784679604332e-06, |
|
"loss": 0.0255, |
|
"step": 128200 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.7530560434637295e-06, |
|
"loss": 0.0231, |
|
"step": 128300 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.7388336189670255e-06, |
|
"loss": 0.0261, |
|
"step": 128400 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.7246111944703215e-06, |
|
"loss": 0.0277, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.7103887699736173e-06, |
|
"loss": 0.0289, |
|
"step": 128600 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.6961663454769136e-06, |
|
"loss": 0.0272, |
|
"step": 128700 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.6819439209802096e-06, |
|
"loss": 0.0304, |
|
"step": 128800 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.6677214964835057e-06, |
|
"loss": 0.0253, |
|
"step": 128900 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.6534990719868017e-06, |
|
"loss": 0.0289, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.639276647490098e-06, |
|
"loss": 0.0287, |
|
"step": 129100 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.6250542229933938e-06, |
|
"loss": 0.0302, |
|
"step": 129200 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.6108317984966898e-06, |
|
"loss": 0.0227, |
|
"step": 129300 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.5966093739999858e-06, |
|
"loss": 0.0302, |
|
"step": 129400 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.582386949503282e-06, |
|
"loss": 0.0242, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.568164525006578e-06, |
|
"loss": 0.0281, |
|
"step": 129600 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.553942100509874e-06, |
|
"loss": 0.032, |
|
"step": 129700 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.53971967601317e-06, |
|
"loss": 0.0251, |
|
"step": 129800 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.5254972515164662e-06, |
|
"loss": 0.0262, |
|
"step": 129900 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.5112748270197622e-06, |
|
"loss": 0.029, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 0.12821832299232483, |
|
"eval_runtime": 34.334, |
|
"eval_samples_per_second": 145.628, |
|
"eval_steps_per_second": 1.165, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.4970524025230583e-06, |
|
"loss": 0.0269, |
|
"step": 130100 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.482829978026354e-06, |
|
"loss": 0.0303, |
|
"step": 130200 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.4686075535296503e-06, |
|
"loss": 0.0309, |
|
"step": 130300 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.4543851290329464e-06, |
|
"loss": 0.0262, |
|
"step": 130400 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.4401627045362424e-06, |
|
"loss": 0.0252, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.4259402800395384e-06, |
|
"loss": 0.024, |
|
"step": 130600 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.4117178555428347e-06, |
|
"loss": 0.0292, |
|
"step": 130700 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.3974954310461305e-06, |
|
"loss": 0.0264, |
|
"step": 130800 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.3832730065494265e-06, |
|
"loss": 0.0245, |
|
"step": 130900 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.3690505820527225e-06, |
|
"loss": 0.0281, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.3548281575560188e-06, |
|
"loss": 0.0302, |
|
"step": 131100 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.3406057330593148e-06, |
|
"loss": 0.0278, |
|
"step": 131200 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.3263833085626106e-06, |
|
"loss": 0.0277, |
|
"step": 131300 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.3121608840659067e-06, |
|
"loss": 0.0252, |
|
"step": 131400 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.297938459569203e-06, |
|
"loss": 0.027, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.283716035072499e-06, |
|
"loss": 0.0276, |
|
"step": 131600 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.269493610575795e-06, |
|
"loss": 0.0234, |
|
"step": 131700 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.2552711860790908e-06, |
|
"loss": 0.0285, |
|
"step": 131800 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.241048761582387e-06, |
|
"loss": 0.0298, |
|
"step": 131900 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.226826337085683e-06, |
|
"loss": 0.027, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.2126039125889791e-06, |
|
"loss": 0.0309, |
|
"step": 132100 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.1983814880922751e-06, |
|
"loss": 0.0257, |
|
"step": 132200 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.1841590635955712e-06, |
|
"loss": 0.0263, |
|
"step": 132300 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.1699366390988672e-06, |
|
"loss": 0.024, |
|
"step": 132400 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.1557142146021632e-06, |
|
"loss": 0.0313, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.1414917901054593e-06, |
|
"loss": 0.0214, |
|
"step": 132600 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.1272693656087553e-06, |
|
"loss": 0.027, |
|
"step": 132700 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.1130469411120516e-06, |
|
"loss": 0.0286, |
|
"step": 132800 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.0988245166153474e-06, |
|
"loss": 0.0242, |
|
"step": 132900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.0846020921186436e-06, |
|
"loss": 0.0314, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.0703796676219394e-06, |
|
"loss": 0.0247, |
|
"step": 133100 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.0561572431252357e-06, |
|
"loss": 0.0281, |
|
"step": 133200 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.0419348186285317e-06, |
|
"loss": 0.0214, |
|
"step": 133300 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.0277123941318277e-06, |
|
"loss": 0.0313, |
|
"step": 133400 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.0134899696351238e-06, |
|
"loss": 0.0291, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 9.992675451384198e-07, |
|
"loss": 0.0268, |
|
"step": 133600 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 9.850451206417158e-07, |
|
"loss": 0.0285, |
|
"step": 133700 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 9.708226961450119e-07, |
|
"loss": 0.0266, |
|
"step": 133800 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 9.56600271648308e-07, |
|
"loss": 0.0266, |
|
"step": 133900 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 9.42377847151604e-07, |
|
"loss": 0.0226, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 9.281554226549e-07, |
|
"loss": 0.0273, |
|
"step": 134100 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 9.139329981581961e-07, |
|
"loss": 0.0278, |
|
"step": 134200 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 8.99710573661492e-07, |
|
"loss": 0.031, |
|
"step": 134300 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 8.854881491647882e-07, |
|
"loss": 0.0274, |
|
"step": 134400 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 8.712657246680842e-07, |
|
"loss": 0.0245, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 8.570433001713803e-07, |
|
"loss": 0.0264, |
|
"step": 134600 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 8.428208756746763e-07, |
|
"loss": 0.0314, |
|
"step": 134700 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 8.285984511779724e-07, |
|
"loss": 0.0283, |
|
"step": 134800 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 8.143760266812683e-07, |
|
"loss": 0.0281, |
|
"step": 134900 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 8.001536021845645e-07, |
|
"loss": 0.0278, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 7.859311776878604e-07, |
|
"loss": 0.0334, |
|
"step": 135100 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 7.717087531911565e-07, |
|
"loss": 0.0235, |
|
"step": 135200 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 7.574863286944526e-07, |
|
"loss": 0.0337, |
|
"step": 135300 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 7.432639041977487e-07, |
|
"loss": 0.0221, |
|
"step": 135400 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 7.290414797010446e-07, |
|
"loss": 0.0293, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 7.148190552043408e-07, |
|
"loss": 0.0227, |
|
"step": 135600 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 7.005966307076367e-07, |
|
"loss": 0.0248, |
|
"step": 135700 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 6.863742062109328e-07, |
|
"loss": 0.0291, |
|
"step": 135800 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 6.721517817142289e-07, |
|
"loss": 0.0261, |
|
"step": 135900 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 6.579293572175249e-07, |
|
"loss": 0.029, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 6.437069327208209e-07, |
|
"loss": 0.0266, |
|
"step": 136100 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 6.294845082241171e-07, |
|
"loss": 0.0277, |
|
"step": 136200 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 6.152620837274131e-07, |
|
"loss": 0.0231, |
|
"step": 136300 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 6.010396592307091e-07, |
|
"loss": 0.0284, |
|
"step": 136400 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 5.868172347340052e-07, |
|
"loss": 0.0269, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 5.725948102373012e-07, |
|
"loss": 0.0254, |
|
"step": 136600 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 5.583723857405972e-07, |
|
"loss": 0.0267, |
|
"step": 136700 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 5.441499612438933e-07, |
|
"loss": 0.0254, |
|
"step": 136800 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 5.299275367471893e-07, |
|
"loss": 0.0271, |
|
"step": 136900 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 5.157051122504854e-07, |
|
"loss": 0.0281, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 5.014826877537815e-07, |
|
"loss": 0.0326, |
|
"step": 137100 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 4.872602632570775e-07, |
|
"loss": 0.0275, |
|
"step": 137200 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 4.7303783876037353e-07, |
|
"loss": 0.0312, |
|
"step": 137300 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 4.5881541426366957e-07, |
|
"loss": 0.0265, |
|
"step": 137400 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 4.445929897669656e-07, |
|
"loss": 0.0282, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 4.303705652702617e-07, |
|
"loss": 0.0279, |
|
"step": 137600 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 4.161481407735577e-07, |
|
"loss": 0.0283, |
|
"step": 137700 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 4.0192571627685375e-07, |
|
"loss": 0.0271, |
|
"step": 137800 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 3.877032917801498e-07, |
|
"loss": 0.0297, |
|
"step": 137900 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 3.7348086728344587e-07, |
|
"loss": 0.0288, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.592584427867419e-07, |
|
"loss": 0.0259, |
|
"step": 138100 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.4503601829003793e-07, |
|
"loss": 0.0293, |
|
"step": 138200 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.3081359379333396e-07, |
|
"loss": 0.0228, |
|
"step": 138300 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.1659116929663005e-07, |
|
"loss": 0.0234, |
|
"step": 138400 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.023687447999261e-07, |
|
"loss": 0.0228, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.881463203032221e-07, |
|
"loss": 0.0196, |
|
"step": 138600 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.7392389580651815e-07, |
|
"loss": 0.0264, |
|
"step": 138700 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.5970147130981423e-07, |
|
"loss": 0.029, |
|
"step": 138800 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.4547904681311026e-07, |
|
"loss": 0.0236, |
|
"step": 138900 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.312566223164063e-07, |
|
"loss": 0.0216, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.1703419781970235e-07, |
|
"loss": 0.0275, |
|
"step": 139100 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.0281177332299839e-07, |
|
"loss": 0.0243, |
|
"step": 139200 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.8858934882629444e-07, |
|
"loss": 0.027, |
|
"step": 139300 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.7436692432959048e-07, |
|
"loss": 0.033, |
|
"step": 139400 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.6014449983288654e-07, |
|
"loss": 0.0267, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.4592207533618257e-07, |
|
"loss": 0.0283, |
|
"step": 139600 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.3169965083947863e-07, |
|
"loss": 0.0244, |
|
"step": 139700 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.1747722634277467e-07, |
|
"loss": 0.03, |
|
"step": 139800 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.0325480184607072e-07, |
|
"loss": 0.0277, |
|
"step": 139900 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 8.903237734936676e-08, |
|
"loss": 0.03, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 0.1257346123456955, |
|
"eval_runtime": 34.2432, |
|
"eval_samples_per_second": 146.014, |
|
"eval_steps_per_second": 1.168, |
|
"step": 140000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 140625, |
|
"num_train_epochs": 3, |
|
"save_steps": 10000, |
|
"total_flos": 3822663594147840.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|