|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 232677, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.989255491518285e-05, |
|
"loss": 2.956, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.97851098303657e-05, |
|
"loss": 2.4622, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.967766474554855e-05, |
|
"loss": 2.3321, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9570219660731405e-05, |
|
"loss": 2.2234, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.946277457591425e-05, |
|
"loss": 2.156, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.93553294910971e-05, |
|
"loss": 2.1099, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.924788440627995e-05, |
|
"loss": 2.0528, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.91404393214628e-05, |
|
"loss": 2.0336, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.903299423664565e-05, |
|
"loss": 1.9938, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.8925549151828506e-05, |
|
"loss": 1.9726, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8818104067011355e-05, |
|
"loss": 1.9286, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8710658982194204e-05, |
|
"loss": 1.9026, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.860321389737705e-05, |
|
"loss": 1.9147, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.84957688125599e-05, |
|
"loss": 1.8774, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.838832372774275e-05, |
|
"loss": 1.8614, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.82808786429256e-05, |
|
"loss": 1.8656, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8173433558108456e-05, |
|
"loss": 1.8337, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.80659884732913e-05, |
|
"loss": 1.8322, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7958543388474154e-05, |
|
"loss": 1.7918, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.7851098303657e-05, |
|
"loss": 1.7813, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.774365321883985e-05, |
|
"loss": 1.7649, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.76362081340227e-05, |
|
"loss": 1.7451, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.752876304920556e-05, |
|
"loss": 1.7723, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.74213179643884e-05, |
|
"loss": 1.7512, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7313872879571255e-05, |
|
"loss": 1.728, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7206427794754104e-05, |
|
"loss": 1.728, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.709898270993695e-05, |
|
"loss": 1.7277, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.69915376251198e-05, |
|
"loss": 1.7009, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.688409254030265e-05, |
|
"loss": 1.6931, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.677664745548551e-05, |
|
"loss": 1.734, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.666920237066835e-05, |
|
"loss": 1.6718, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6561757285851205e-05, |
|
"loss": 1.6895, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6454312201034054e-05, |
|
"loss": 1.7088, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.63468671162169e-05, |
|
"loss": 1.6694, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.623942203139975e-05, |
|
"loss": 1.6648, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.613197694658261e-05, |
|
"loss": 1.6718, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.602453186176545e-05, |
|
"loss": 1.6606, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.5917086776948306e-05, |
|
"loss": 1.6807, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.5809641692131155e-05, |
|
"loss": 1.6352, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5702196607314004e-05, |
|
"loss": 1.6454, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.559475152249685e-05, |
|
"loss": 1.6509, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.548730643767971e-05, |
|
"loss": 1.6083, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.537986135286255e-05, |
|
"loss": 1.6177, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.527241626804541e-05, |
|
"loss": 1.6097, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5164971183228256e-05, |
|
"loss": 1.6025, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.5057526098411105e-05, |
|
"loss": 1.6162, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4950081013593954e-05, |
|
"loss": 1.6092, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.48426359287768e-05, |
|
"loss": 1.592, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.473519084395966e-05, |
|
"loss": 1.5961, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.46277457591425e-05, |
|
"loss": 1.5711, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4520300674325357e-05, |
|
"loss": 1.5789, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4412855589508206e-05, |
|
"loss": 1.5774, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4305410504691055e-05, |
|
"loss": 1.5854, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4197965419873904e-05, |
|
"loss": 1.5462, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.409052033505676e-05, |
|
"loss": 1.5591, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.39830752502396e-05, |
|
"loss": 1.5775, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.387563016542246e-05, |
|
"loss": 1.542, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3768185080605306e-05, |
|
"loss": 1.5408, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3660739995788155e-05, |
|
"loss": 1.5484, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3553294910971004e-05, |
|
"loss": 1.5625, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3445849826153853e-05, |
|
"loss": 1.5276, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.33384047413367e-05, |
|
"loss": 1.5524, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.323095965651955e-05, |
|
"loss": 1.5458, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.312351457170241e-05, |
|
"loss": 1.541, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.3016069486885256e-05, |
|
"loss": 1.5313, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.2908624402068105e-05, |
|
"loss": 1.5507, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.2801179317250954e-05, |
|
"loss": 1.5189, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.269373423243381e-05, |
|
"loss": 1.5292, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.258628914761665e-05, |
|
"loss": 1.5109, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.247884406279951e-05, |
|
"loss": 1.4981, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.237139897798236e-05, |
|
"loss": 1.5229, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2263953893165206e-05, |
|
"loss": 1.496, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2156508808348055e-05, |
|
"loss": 1.4987, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.2049063723530904e-05, |
|
"loss": 1.5046, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.194161863871375e-05, |
|
"loss": 1.5233, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.18341735538966e-05, |
|
"loss": 1.4931, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.172672846907946e-05, |
|
"loss": 1.5, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.16192833842623e-05, |
|
"loss": 1.5132, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.1511838299445156e-05, |
|
"loss": 1.4723, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1404393214628005e-05, |
|
"loss": 1.5036, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1296948129810854e-05, |
|
"loss": 1.4907, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.11895030449937e-05, |
|
"loss": 1.4806, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.108205796017656e-05, |
|
"loss": 1.4775, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.097461287535941e-05, |
|
"loss": 1.466, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.086716779054226e-05, |
|
"loss": 1.4855, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0759722705725106e-05, |
|
"loss": 1.4749, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0652277620907955e-05, |
|
"loss": 1.4617, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0544832536090804e-05, |
|
"loss": 1.4674, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.043738745127365e-05, |
|
"loss": 1.437, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.032994236645651e-05, |
|
"loss": 1.4632, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.022249728163935e-05, |
|
"loss": 1.4692, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.011505219682221e-05, |
|
"loss": 1.4655, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.0007607112005056e-05, |
|
"loss": 1.4469, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.9900162027187905e-05, |
|
"loss": 1.4606, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.9792716942370754e-05, |
|
"loss": 1.4746, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.968527185755361e-05, |
|
"loss": 1.4479, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.957782677273645e-05, |
|
"loss": 1.4187, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.947038168791931e-05, |
|
"loss": 1.4497, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.936293660310216e-05, |
|
"loss": 1.4174, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.9255491518285006e-05, |
|
"loss": 1.4391, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.9148046433467855e-05, |
|
"loss": 1.4355, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.9040601348650704e-05, |
|
"loss": 1.441, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.893315626383356e-05, |
|
"loss": 1.4265, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.88257111790164e-05, |
|
"loss": 1.4499, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.871826609419926e-05, |
|
"loss": 1.4564, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.861082100938211e-05, |
|
"loss": 1.4362, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.8503375924564956e-05, |
|
"loss": 1.4338, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.8395930839747805e-05, |
|
"loss": 1.4099, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.828848575493066e-05, |
|
"loss": 1.4213, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.81810406701135e-05, |
|
"loss": 1.4142, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.807359558529636e-05, |
|
"loss": 1.411, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.796615050047921e-05, |
|
"loss": 1.4071, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.785870541566206e-05, |
|
"loss": 1.4536, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.7751260330844906e-05, |
|
"loss": 1.4205, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.764381524602776e-05, |
|
"loss": 1.4275, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.7536370161210604e-05, |
|
"loss": 1.4142, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.742892507639346e-05, |
|
"loss": 1.4007, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.732147999157631e-05, |
|
"loss": 1.3993, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.721403490675916e-05, |
|
"loss": 1.3797, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.7106589821942007e-05, |
|
"loss": 1.4012, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.6999144737124856e-05, |
|
"loss": 1.4133, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.689169965230771e-05, |
|
"loss": 1.4276, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6784254567490554e-05, |
|
"loss": 1.4062, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.667680948267341e-05, |
|
"loss": 1.3683, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.656936439785626e-05, |
|
"loss": 1.3942, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.646191931303911e-05, |
|
"loss": 1.3861, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6354474228221956e-05, |
|
"loss": 1.4005, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.624702914340481e-05, |
|
"loss": 1.3732, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6139584058587654e-05, |
|
"loss": 1.4009, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.603213897377051e-05, |
|
"loss": 1.3785, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.592469388895336e-05, |
|
"loss": 1.3883, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.581724880413621e-05, |
|
"loss": 1.351, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.570980371931906e-05, |
|
"loss": 1.3579, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5602358634501906e-05, |
|
"loss": 1.4154, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.5494913549684755e-05, |
|
"loss": 1.3607, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5387468464867604e-05, |
|
"loss": 1.3604, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.528002338005046e-05, |
|
"loss": 1.3624, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.517257829523331e-05, |
|
"loss": 1.3805, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.506513321041616e-05, |
|
"loss": 1.3659, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.495768812559901e-05, |
|
"loss": 1.3777, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.485024304078186e-05, |
|
"loss": 1.3516, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.4742797955964705e-05, |
|
"loss": 1.3718, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.463535287114756e-05, |
|
"loss": 1.37, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.452790778633041e-05, |
|
"loss": 1.3502, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.442046270151326e-05, |
|
"loss": 1.3591, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.431301761669611e-05, |
|
"loss": 1.3483, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.420557253187896e-05, |
|
"loss": 1.3578, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.4098127447061806e-05, |
|
"loss": 1.3465, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.3990682362244655e-05, |
|
"loss": 1.3694, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.388323727742751e-05, |
|
"loss": 1.3631, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.377579219261035e-05, |
|
"loss": 1.3675, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.366834710779321e-05, |
|
"loss": 1.3631, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.356090202297606e-05, |
|
"loss": 1.3432, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.3453456938158914e-05, |
|
"loss": 1.3441, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.3346011853341756e-05, |
|
"loss": 1.3543, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.323856676852461e-05, |
|
"loss": 1.2648, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.313112168370746e-05, |
|
"loss": 1.2416, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.302367659889031e-05, |
|
"loss": 1.2426, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.291623151407316e-05, |
|
"loss": 1.2268, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.280878642925601e-05, |
|
"loss": 1.2366, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.270134134443886e-05, |
|
"loss": 1.2198, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.2593896259621706e-05, |
|
"loss": 1.2172, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.248645117480456e-05, |
|
"loss": 1.226, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.2379006089987404e-05, |
|
"loss": 1.2192, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.227156100517026e-05, |
|
"loss": 1.2385, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.216411592035311e-05, |
|
"loss": 1.2127, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.205667083553596e-05, |
|
"loss": 1.234, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.194922575071881e-05, |
|
"loss": 1.2267, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.184178066590166e-05, |
|
"loss": 1.2302, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.1734335581084505e-05, |
|
"loss": 1.2387, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.162689049626736e-05, |
|
"loss": 1.243, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.151944541145021e-05, |
|
"loss": 1.2139, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.141200032663306e-05, |
|
"loss": 1.2217, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.130455524181591e-05, |
|
"loss": 1.2122, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.119711015699876e-05, |
|
"loss": 1.2253, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.108966507218161e-05, |
|
"loss": 1.2172, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.0982219987364455e-05, |
|
"loss": 1.2296, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.087477490254731e-05, |
|
"loss": 1.2216, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.076732981773016e-05, |
|
"loss": 1.2205, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.065988473291301e-05, |
|
"loss": 1.2127, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.055243964809586e-05, |
|
"loss": 1.2324, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.044499456327871e-05, |
|
"loss": 1.2124, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.033754947846156e-05, |
|
"loss": 1.2364, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0230104393644408e-05, |
|
"loss": 1.2111, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.012265930882726e-05, |
|
"loss": 1.2082, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.0015214224010106e-05, |
|
"loss": 1.2292, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.990776913919296e-05, |
|
"loss": 1.2236, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.980032405437581e-05, |
|
"loss": 1.2178, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.9692878969558657e-05, |
|
"loss": 1.2165, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.958543388474151e-05, |
|
"loss": 1.21, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.947798879992436e-05, |
|
"loss": 1.2239, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9370543715107214e-05, |
|
"loss": 1.2094, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.926309863029006e-05, |
|
"loss": 1.1993, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.9155653545472912e-05, |
|
"loss": 1.2143, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.9048208460655764e-05, |
|
"loss": 1.2092, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.894076337583861e-05, |
|
"loss": 1.2128, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.8833318291021462e-05, |
|
"loss": 1.2339, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.872587320620431e-05, |
|
"loss": 1.2002, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.8618428121387157e-05, |
|
"loss": 1.2068, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.851098303657001e-05, |
|
"loss": 1.2026, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.840353795175286e-05, |
|
"loss": 1.2186, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.8296092866935707e-05, |
|
"loss": 1.2251, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.818864778211856e-05, |
|
"loss": 1.2054, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.8081202697301412e-05, |
|
"loss": 1.2136, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.7973757612484258e-05, |
|
"loss": 1.2064, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.786631252766711e-05, |
|
"loss": 1.1948, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.7758867442849963e-05, |
|
"loss": 1.1918, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.7651422358032808e-05, |
|
"loss": 1.2137, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.754397727321566e-05, |
|
"loss": 1.1977, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.7436532188398513e-05, |
|
"loss": 1.2128, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7329087103581362e-05, |
|
"loss": 1.1802, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.722164201876421e-05, |
|
"loss": 1.1854, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.711419693394706e-05, |
|
"loss": 1.1809, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.7006751849129912e-05, |
|
"loss": 1.1923, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.6899306764312758e-05, |
|
"loss": 1.1918, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.679186167949561e-05, |
|
"loss": 1.1987, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.6684416594678463e-05, |
|
"loss": 1.1786, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.657697150986131e-05, |
|
"loss": 1.179, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.646952642504416e-05, |
|
"loss": 1.194, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.6362081340227013e-05, |
|
"loss": 1.1954, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.625463625540986e-05, |
|
"loss": 1.1996, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.614719117059271e-05, |
|
"loss": 1.2026, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.6039746085775564e-05, |
|
"loss": 1.1892, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.593230100095841e-05, |
|
"loss": 1.1968, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.5824855916141262e-05, |
|
"loss": 1.1719, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.571741083132411e-05, |
|
"loss": 1.1855, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5609965746506963e-05, |
|
"loss": 1.1762, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.550252066168981e-05, |
|
"loss": 1.187, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.539507557687266e-05, |
|
"loss": 1.1892, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5287630492055514e-05, |
|
"loss": 1.2085, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.518018540723836e-05, |
|
"loss": 1.181, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.5072740322421212e-05, |
|
"loss": 1.1654, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.496529523760406e-05, |
|
"loss": 1.1677, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4857850152786913e-05, |
|
"loss": 1.2037, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4750405067969762e-05, |
|
"loss": 1.1787, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.464295998315261e-05, |
|
"loss": 1.179, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4535514898335464e-05, |
|
"loss": 1.2019, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4428069813518313e-05, |
|
"loss": 1.1787, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.432062472870116e-05, |
|
"loss": 1.1677, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4213179643884014e-05, |
|
"loss": 1.1919, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4105734559066863e-05, |
|
"loss": 1.1859, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.3998289474249712e-05, |
|
"loss": 1.1597, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.389084438943256e-05, |
|
"loss": 1.1806, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.378339930461541e-05, |
|
"loss": 1.1694, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.3675954219798263e-05, |
|
"loss": 1.1896, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.356850913498111e-05, |
|
"loss": 1.1897, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.346106405016396e-05, |
|
"loss": 1.1796, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3353618965346813e-05, |
|
"loss": 1.1745, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.3246173880529662e-05, |
|
"loss": 1.1828, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.3138728795712514e-05, |
|
"loss": 1.1875, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.3031283710895363e-05, |
|
"loss": 1.1716, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.2923838626078212e-05, |
|
"loss": 1.1768, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.2816393541261065e-05, |
|
"loss": 1.1821, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.2708948456443914e-05, |
|
"loss": 1.1672, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.2601503371626763e-05, |
|
"loss": 1.173, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.2494058286809612e-05, |
|
"loss": 1.1751, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.238661320199246e-05, |
|
"loss": 1.1967, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.227916811717531e-05, |
|
"loss": 1.1695, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.2171723032358162e-05, |
|
"loss": 1.179, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.206427794754101e-05, |
|
"loss": 1.1793, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.1956832862723864e-05, |
|
"loss": 1.1579, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.1849387777906713e-05, |
|
"loss": 1.1466, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.1741942693089562e-05, |
|
"loss": 1.208, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.1634497608272414e-05, |
|
"loss": 1.1592, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.1527052523455263e-05, |
|
"loss": 1.1558, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.1419607438638112e-05, |
|
"loss": 1.158, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.1312162353820965e-05, |
|
"loss": 1.1566, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1204717269003814e-05, |
|
"loss": 1.1722, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1097272184186666e-05, |
|
"loss": 1.1612, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.0989827099369515e-05, |
|
"loss": 1.1706, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.0882382014552364e-05, |
|
"loss": 1.1746, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.0774936929735213e-05, |
|
"loss": 1.1568, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.0667491844918062e-05, |
|
"loss": 1.1564, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.056004676010091e-05, |
|
"loss": 1.1566, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.0452601675283764e-05, |
|
"loss": 1.1684, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.0345156590466613e-05, |
|
"loss": 1.166, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0237711505649465e-05, |
|
"loss": 1.1439, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0130266420832314e-05, |
|
"loss": 1.1535, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0022821336015163e-05, |
|
"loss": 1.1571, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9915376251198015e-05, |
|
"loss": 1.1573, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9807931166380864e-05, |
|
"loss": 1.1682, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.9700486081563713e-05, |
|
"loss": 1.1506, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.9593040996746566e-05, |
|
"loss": 1.1513, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.9485595911929415e-05, |
|
"loss": 1.17, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.9378150827112264e-05, |
|
"loss": 1.1538, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.9270705742295113e-05, |
|
"loss": 1.1461, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.9163260657477962e-05, |
|
"loss": 1.1353, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.9055815572660814e-05, |
|
"loss": 1.1523, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.8948370487843663e-05, |
|
"loss": 1.1547, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.8840925403026512e-05, |
|
"loss": 1.1092, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8733480318209365e-05, |
|
"loss": 1.1662, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8626035233392214e-05, |
|
"loss": 1.1539, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.8518590148575063e-05, |
|
"loss": 1.1571, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.8411145063757915e-05, |
|
"loss": 1.1228, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.8303699978940764e-05, |
|
"loss": 1.1421, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8196254894123617e-05, |
|
"loss": 1.1603, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8088809809306466e-05, |
|
"loss": 1.1051, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.7981364724489315e-05, |
|
"loss": 1.1423, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.7873919639672164e-05, |
|
"loss": 1.1548, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.7766474554855013e-05, |
|
"loss": 1.1424, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.7659029470037862e-05, |
|
"loss": 1.1435, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.7551584385220714e-05, |
|
"loss": 1.1592, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.7444139300403563e-05, |
|
"loss": 1.1364, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.7336694215586416e-05, |
|
"loss": 1.1412, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7229249130769265e-05, |
|
"loss": 1.1428, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7121804045952114e-05, |
|
"loss": 1.131, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.7014358961134966e-05, |
|
"loss": 1.1438, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.6906913876317815e-05, |
|
"loss": 1.117, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.6799468791500664e-05, |
|
"loss": 1.1552, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.6692023706683516e-05, |
|
"loss": 1.1362, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.6584578621866365e-05, |
|
"loss": 1.0437, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.6477133537049214e-05, |
|
"loss": 0.9948, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.6369688452232067e-05, |
|
"loss": 1.0094, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.6262243367414916e-05, |
|
"loss": 1.0137, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.6154798282597765e-05, |
|
"loss": 1.0197, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.6047353197780614e-05, |
|
"loss": 1.0113, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.5939908112963463e-05, |
|
"loss": 1.0123, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.5832463028146315e-05, |
|
"loss": 1.018, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.5725017943329164e-05, |
|
"loss": 1.0256, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.5617572858512013e-05, |
|
"loss": 0.9943, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.5510127773694866e-05, |
|
"loss": 1.0117, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.5402682688877715e-05, |
|
"loss": 1.0127, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.5295237604060567e-05, |
|
"loss": 1.012, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.5187792519243416e-05, |
|
"loss": 0.9863, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.5080347434426265e-05, |
|
"loss": 1.0253, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.4972902349609116e-05, |
|
"loss": 1.0143, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.4865457264791965e-05, |
|
"loss": 0.9931, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.4758012179974814e-05, |
|
"loss": 1.0074, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.4650567095157666e-05, |
|
"loss": 0.9954, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.4543122010340515e-05, |
|
"loss": 1.0081, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.4435676925523368e-05, |
|
"loss": 1.0072, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.4328231840706217e-05, |
|
"loss": 1.0019, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.4220786755889066e-05, |
|
"loss": 1.0164, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.4113341671071917e-05, |
|
"loss": 1.0449, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.4005896586254766e-05, |
|
"loss": 0.9966, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3898451501437615e-05, |
|
"loss": 1.0105, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3791006416620467e-05, |
|
"loss": 1.0485, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3683561331803316e-05, |
|
"loss": 1.0047, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3576116246986165e-05, |
|
"loss": 1.0103, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3468671162169016e-05, |
|
"loss": 1.0032, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3361226077351865e-05, |
|
"loss": 1.0013, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3253780992534717e-05, |
|
"loss": 1.0028, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.3146335907717566e-05, |
|
"loss": 1.0134, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.3038890822900415e-05, |
|
"loss": 1.036, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.2931445738083268e-05, |
|
"loss": 1.0139, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.2824000653266117e-05, |
|
"loss": 0.9908, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.2716555568448966e-05, |
|
"loss": 0.9896, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.2609110483631816e-05, |
|
"loss": 0.9982, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.2501665398814665e-05, |
|
"loss": 1.0115, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.2394220313997516e-05, |
|
"loss": 0.9939, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.2286775229180367e-05, |
|
"loss": 0.9957, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.2179330144363218e-05, |
|
"loss": 1.0009, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.2071885059546067e-05, |
|
"loss": 1.0102, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.1964439974728917e-05, |
|
"loss": 1.0058, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.1856994889911766e-05, |
|
"loss": 0.9999, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1749549805094617e-05, |
|
"loss": 1.0141, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1642104720277466e-05, |
|
"loss": 1.0004, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1534659635460317e-05, |
|
"loss": 0.9727, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1427214550643167e-05, |
|
"loss": 0.9989, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1319769465826018e-05, |
|
"loss": 0.9968, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.1212324381008867e-05, |
|
"loss": 1.0138, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.1104879296191716e-05, |
|
"loss": 1.0079, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.0997434211374567e-05, |
|
"loss": 0.9866, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0889989126557416e-05, |
|
"loss": 1.0007, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0782544041740267e-05, |
|
"loss": 1.004, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.0675098956923117e-05, |
|
"loss": 0.9913, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.0567653872105968e-05, |
|
"loss": 1.0002, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.0460208787288817e-05, |
|
"loss": 1.0069, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0352763702471668e-05, |
|
"loss": 0.9947, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0245318617654517e-05, |
|
"loss": 0.9987, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0137873532837368e-05, |
|
"loss": 0.9895, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.0030428448020217e-05, |
|
"loss": 0.9888, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.922983363203067e-06, |
|
"loss": 0.9905, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.815538278385918e-06, |
|
"loss": 0.9872, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.708093193568769e-06, |
|
"loss": 0.9855, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.600648108751618e-06, |
|
"loss": 1.0014, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.493203023934467e-06, |
|
"loss": 0.9987, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.385757939117317e-06, |
|
"loss": 0.9986, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.278312854300168e-06, |
|
"loss": 1.0028, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.170867769483017e-06, |
|
"loss": 0.9882, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.063422684665868e-06, |
|
"loss": 0.983, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 8.955977599848719e-06, |
|
"loss": 0.984, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.848532515031568e-06, |
|
"loss": 1.0114, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.741087430214417e-06, |
|
"loss": 0.9921, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.633642345397267e-06, |
|
"loss": 0.9994, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.526197260580118e-06, |
|
"loss": 0.9947, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.418752175762967e-06, |
|
"loss": 0.9933, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.311307090945818e-06, |
|
"loss": 0.985, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.203862006128668e-06, |
|
"loss": 0.9863, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.09641692131152e-06, |
|
"loss": 0.9924, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 7.988971836494368e-06, |
|
"loss": 0.9814, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.881526751677217e-06, |
|
"loss": 0.9837, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.774081666860068e-06, |
|
"loss": 0.9849, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.666636582042919e-06, |
|
"loss": 1.0009, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.559191497225768e-06, |
|
"loss": 1.0074, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.451746412408618e-06, |
|
"loss": 0.9983, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.344301327591468e-06, |
|
"loss": 1.0009, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.236856242774319e-06, |
|
"loss": 0.9858, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.129411157957168e-06, |
|
"loss": 0.9908, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.021966073140019e-06, |
|
"loss": 0.973, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.9145209883228686e-06, |
|
"loss": 0.98, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.807075903505719e-06, |
|
"loss": 0.9783, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.699630818688568e-06, |
|
"loss": 0.9697, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.592185733871418e-06, |
|
"loss": 0.9787, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.484740649054269e-06, |
|
"loss": 0.9739, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.37729556423712e-06, |
|
"loss": 0.9966, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.269850479419969e-06, |
|
"loss": 0.9894, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.1624053946028185e-06, |
|
"loss": 0.965, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.054960309785669e-06, |
|
"loss": 0.9836, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 5.947515224968519e-06, |
|
"loss": 0.9728, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.840070140151369e-06, |
|
"loss": 0.977, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.732625055334219e-06, |
|
"loss": 0.9655, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.625179970517069e-06, |
|
"loss": 0.9802, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.517734885699919e-06, |
|
"loss": 0.9992, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.410289800882768e-06, |
|
"loss": 0.9975, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.302844716065619e-06, |
|
"loss": 0.9721, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.195399631248469e-06, |
|
"loss": 0.9695, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.08795454643132e-06, |
|
"loss": 0.9832, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 4.980509461614169e-06, |
|
"loss": 0.9873, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.873064376797019e-06, |
|
"loss": 0.9886, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.765619291979869e-06, |
|
"loss": 0.9753, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.658174207162719e-06, |
|
"loss": 0.9785, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.550729122345569e-06, |
|
"loss": 0.9704, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.44328403752842e-06, |
|
"loss": 0.9855, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.3358389527112696e-06, |
|
"loss": 0.9773, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.2283938678941194e-06, |
|
"loss": 0.9894, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.120948783076969e-06, |
|
"loss": 0.9701, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.01350369825982e-06, |
|
"loss": 0.9865, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.90605861344267e-06, |
|
"loss": 0.9934, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.7986135286255197e-06, |
|
"loss": 0.9765, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.6911684438083696e-06, |
|
"loss": 0.9633, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.58372335899122e-06, |
|
"loss": 0.9625, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.4762782741740698e-06, |
|
"loss": 0.9676, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.36883318935692e-06, |
|
"loss": 0.9642, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.26138810453977e-06, |
|
"loss": 0.9923, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.15394301972262e-06, |
|
"loss": 0.9641, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.04649793490547e-06, |
|
"loss": 0.9727, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.93905285008832e-06, |
|
"loss": 0.9868, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.8316077652711702e-06, |
|
"loss": 0.9504, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.72416268045402e-06, |
|
"loss": 0.9811, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.6167175956368704e-06, |
|
"loss": 0.9813, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.5092725108197202e-06, |
|
"loss": 0.9687, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.40182742600257e-06, |
|
"loss": 0.9673, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.2943823411854204e-06, |
|
"loss": 0.9653, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.1869372563682703e-06, |
|
"loss": 0.9479, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.0794921715511205e-06, |
|
"loss": 0.9802, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.9720470867339704e-06, |
|
"loss": 0.9624, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.8646020019168205e-06, |
|
"loss": 0.9822, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.7571569170996706e-06, |
|
"loss": 0.9747, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.6497118322825206e-06, |
|
"loss": 0.9614, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.5422667474653705e-06, |
|
"loss": 0.9729, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.4348216626482206e-06, |
|
"loss": 0.9679, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.3273765778310707e-06, |
|
"loss": 0.9551, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2199314930139207e-06, |
|
"loss": 0.9762, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.1124864081967708e-06, |
|
"loss": 0.9737, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0050413233796209e-06, |
|
"loss": 0.9718, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.975962385624709e-07, |
|
"loss": 0.9666, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 7.901511537453207e-07, |
|
"loss": 0.9854, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.827060689281709e-07, |
|
"loss": 0.9535, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.752609841110209e-07, |
|
"loss": 0.9665, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.678158992938709e-07, |
|
"loss": 0.9463, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.60370814476721e-07, |
|
"loss": 0.9664, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.52925729659571e-07, |
|
"loss": 0.9573, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.4548064484242105e-07, |
|
"loss": 0.95, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.8035560025271085e-08, |
|
"loss": 0.9636, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 232677, |
|
"total_flos": 3.713762606980301e+17, |
|
"train_loss": 1.2468314008385242, |
|
"train_runtime": 62368.4442, |
|
"train_samples_per_second": 37.307, |
|
"train_steps_per_second": 3.731 |
|
} |
|
], |
|
"max_steps": 232677, |
|
"num_train_epochs": 3, |
|
"total_flos": 3.713762606980301e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|