|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 102820, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001, |
|
"loss": 2.9981, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 0.7217285633087158, |
|
"eval_runtime": 1.4105, |
|
"eval_samples_per_second": 708.956, |
|
"eval_steps_per_second": 22.687, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.95113369820172e-05, |
|
"loss": 0.3596, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 0.683592677116394, |
|
"eval_runtime": 1.425, |
|
"eval_samples_per_second": 701.743, |
|
"eval_steps_per_second": 22.456, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.90226739640344e-05, |
|
"loss": 0.3481, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 0.6578707695007324, |
|
"eval_runtime": 1.4239, |
|
"eval_samples_per_second": 702.301, |
|
"eval_steps_per_second": 22.474, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.85340109460516e-05, |
|
"loss": 0.3381, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 0.6479542255401611, |
|
"eval_runtime": 1.42, |
|
"eval_samples_per_second": 704.242, |
|
"eval_steps_per_second": 22.536, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.80453479280688e-05, |
|
"loss": 0.3289, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 0.6387728452682495, |
|
"eval_runtime": 1.4329, |
|
"eval_samples_per_second": 697.89, |
|
"eval_steps_per_second": 22.332, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.7556684910086e-05, |
|
"loss": 0.3279, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 0.6298181414604187, |
|
"eval_runtime": 1.4061, |
|
"eval_samples_per_second": 711.206, |
|
"eval_steps_per_second": 22.759, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.706802189210322e-05, |
|
"loss": 0.3217, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 0.62165367603302, |
|
"eval_runtime": 1.421, |
|
"eval_samples_per_second": 703.748, |
|
"eval_steps_per_second": 22.52, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.65793588741204e-05, |
|
"loss": 0.3165, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 0.616775393486023, |
|
"eval_runtime": 1.4244, |
|
"eval_samples_per_second": 702.031, |
|
"eval_steps_per_second": 22.465, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.609069585613761e-05, |
|
"loss": 0.3188, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 0.6099900007247925, |
|
"eval_runtime": 1.4195, |
|
"eval_samples_per_second": 704.489, |
|
"eval_steps_per_second": 22.544, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.560203283815481e-05, |
|
"loss": 0.3022, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 0.6031085252761841, |
|
"eval_runtime": 1.4165, |
|
"eval_samples_per_second": 705.968, |
|
"eval_steps_per_second": 22.591, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.511336982017201e-05, |
|
"loss": 0.3167, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 0.5986317992210388, |
|
"eval_runtime": 1.413, |
|
"eval_samples_per_second": 707.704, |
|
"eval_steps_per_second": 22.647, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.462470680218921e-05, |
|
"loss": 0.3123, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 0.594712495803833, |
|
"eval_runtime": 1.4076, |
|
"eval_samples_per_second": 710.451, |
|
"eval_steps_per_second": 22.734, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.413604378420641e-05, |
|
"loss": 0.3102, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 0.589926540851593, |
|
"eval_runtime": 1.4215, |
|
"eval_samples_per_second": 703.5, |
|
"eval_steps_per_second": 22.512, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.364738076622361e-05, |
|
"loss": 0.3029, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 0.5852935910224915, |
|
"eval_runtime": 1.4145, |
|
"eval_samples_per_second": 706.961, |
|
"eval_steps_per_second": 22.623, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.315871774824082e-05, |
|
"loss": 0.2999, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 0.5810648798942566, |
|
"eval_runtime": 1.4374, |
|
"eval_samples_per_second": 695.724, |
|
"eval_steps_per_second": 22.263, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.267005473025801e-05, |
|
"loss": 0.2898, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 0.5774234533309937, |
|
"eval_runtime": 1.422, |
|
"eval_samples_per_second": 703.256, |
|
"eval_steps_per_second": 22.504, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.218139171227522e-05, |
|
"loss": 0.2924, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 0.5741690993309021, |
|
"eval_runtime": 1.4369, |
|
"eval_samples_per_second": 695.963, |
|
"eval_steps_per_second": 22.271, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 9.169272869429242e-05, |
|
"loss": 0.2965, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 0.5683675408363342, |
|
"eval_runtime": 1.4175, |
|
"eval_samples_per_second": 705.474, |
|
"eval_steps_per_second": 22.575, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.120406567630962e-05, |
|
"loss": 0.3003, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 0.5660465359687805, |
|
"eval_runtime": 1.4225, |
|
"eval_samples_per_second": 703.011, |
|
"eval_steps_per_second": 22.496, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.071540265832682e-05, |
|
"loss": 0.2877, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 0.5636941194534302, |
|
"eval_runtime": 1.4249, |
|
"eval_samples_per_second": 701.786, |
|
"eval_steps_per_second": 22.457, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 9.022673964034402e-05, |
|
"loss": 0.28, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 0.5614505410194397, |
|
"eval_runtime": 1.418, |
|
"eval_samples_per_second": 705.226, |
|
"eval_steps_per_second": 22.567, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 8.973807662236122e-05, |
|
"loss": 0.2596, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 0.560372531414032, |
|
"eval_runtime": 1.419, |
|
"eval_samples_per_second": 704.735, |
|
"eval_steps_per_second": 22.552, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.924941360437843e-05, |
|
"loss": 0.2629, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 0.5570399165153503, |
|
"eval_runtime": 1.422, |
|
"eval_samples_per_second": 703.254, |
|
"eval_steps_per_second": 22.504, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.876075058639562e-05, |
|
"loss": 0.2588, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 0.5555282831192017, |
|
"eval_runtime": 1.4115, |
|
"eval_samples_per_second": 708.456, |
|
"eval_steps_per_second": 22.671, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 8.827208756841283e-05, |
|
"loss": 0.2623, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_loss": 0.5514973402023315, |
|
"eval_runtime": 1.4195, |
|
"eval_samples_per_second": 704.486, |
|
"eval_steps_per_second": 22.544, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 8.778342455043003e-05, |
|
"loss": 0.2553, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 0.5486002564430237, |
|
"eval_runtime": 1.42, |
|
"eval_samples_per_second": 704.24, |
|
"eval_steps_per_second": 22.536, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 8.729476153244723e-05, |
|
"loss": 0.262, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 0.5457084774971008, |
|
"eval_runtime": 1.4135, |
|
"eval_samples_per_second": 707.481, |
|
"eval_steps_per_second": 22.639, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 8.680609851446443e-05, |
|
"loss": 0.2613, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 0.5417377948760986, |
|
"eval_runtime": 1.4156, |
|
"eval_samples_per_second": 706.429, |
|
"eval_steps_per_second": 22.606, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 8.631743549648163e-05, |
|
"loss": 0.2679, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 0.5402753949165344, |
|
"eval_runtime": 1.4211, |
|
"eval_samples_per_second": 703.665, |
|
"eval_steps_per_second": 22.517, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 8.582877247849883e-05, |
|
"loss": 0.2537, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 0.5380659699440002, |
|
"eval_runtime": 1.4234, |
|
"eval_samples_per_second": 702.521, |
|
"eval_steps_per_second": 22.481, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 8.534010946051603e-05, |
|
"loss": 0.2502, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 0.532864511013031, |
|
"eval_runtime": 1.4508, |
|
"eval_samples_per_second": 689.292, |
|
"eval_steps_per_second": 22.057, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 8.485144644253323e-05, |
|
"loss": 0.2594, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_loss": 0.5308486223220825, |
|
"eval_runtime": 1.4175, |
|
"eval_samples_per_second": 705.473, |
|
"eval_steps_per_second": 22.575, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 8.436278342455043e-05, |
|
"loss": 0.2495, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 0.5297316312789917, |
|
"eval_runtime": 1.416, |
|
"eval_samples_per_second": 706.219, |
|
"eval_steps_per_second": 22.599, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.387412040656764e-05, |
|
"loss": 0.2499, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_loss": 0.5281020402908325, |
|
"eval_runtime": 1.4056, |
|
"eval_samples_per_second": 711.457, |
|
"eval_steps_per_second": 22.767, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.338545738858483e-05, |
|
"loss": 0.2578, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 0.5247856378555298, |
|
"eval_runtime": 1.4135, |
|
"eval_samples_per_second": 707.459, |
|
"eval_steps_per_second": 22.639, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.289679437060204e-05, |
|
"loss": 0.2497, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 0.5230081677436829, |
|
"eval_runtime": 1.4438, |
|
"eval_samples_per_second": 692.611, |
|
"eval_steps_per_second": 22.164, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 8.240813135261924e-05, |
|
"loss": 0.2565, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 0.5200880765914917, |
|
"eval_runtime": 1.4398, |
|
"eval_samples_per_second": 694.522, |
|
"eval_steps_per_second": 22.225, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 8.191946833463644e-05, |
|
"loss": 0.2523, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_loss": 0.5170234441757202, |
|
"eval_runtime": 1.4299, |
|
"eval_samples_per_second": 699.349, |
|
"eval_steps_per_second": 22.379, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 8.143080531665364e-05, |
|
"loss": 0.2501, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_loss": 0.5144360065460205, |
|
"eval_runtime": 1.4239, |
|
"eval_samples_per_second": 702.275, |
|
"eval_steps_per_second": 22.473, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 8.094214229867084e-05, |
|
"loss": 0.2488, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 0.5127125382423401, |
|
"eval_runtime": 1.4244, |
|
"eval_samples_per_second": 702.027, |
|
"eval_steps_per_second": 22.465, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 8.045347928068804e-05, |
|
"loss": 0.2465, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 0.511444091796875, |
|
"eval_runtime": 1.422, |
|
"eval_samples_per_second": 703.257, |
|
"eval_steps_per_second": 22.504, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 7.996481626270525e-05, |
|
"loss": 0.2282, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 0.512248158454895, |
|
"eval_runtime": 1.419, |
|
"eval_samples_per_second": 704.734, |
|
"eval_steps_per_second": 22.551, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 7.947615324472244e-05, |
|
"loss": 0.2251, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 0.5102840065956116, |
|
"eval_runtime": 1.414, |
|
"eval_samples_per_second": 707.209, |
|
"eval_steps_per_second": 22.631, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 7.898749022673965e-05, |
|
"loss": 0.2172, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_loss": 0.5100817680358887, |
|
"eval_runtime": 1.4279, |
|
"eval_samples_per_second": 700.321, |
|
"eval_steps_per_second": 22.41, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 7.849882720875685e-05, |
|
"loss": 0.2143, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 0.509198784828186, |
|
"eval_runtime": 1.4398, |
|
"eval_samples_per_second": 694.524, |
|
"eval_steps_per_second": 22.225, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.801016419077405e-05, |
|
"loss": 0.2215, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_loss": 0.5054255127906799, |
|
"eval_runtime": 1.4289, |
|
"eval_samples_per_second": 699.834, |
|
"eval_steps_per_second": 22.395, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.752150117279125e-05, |
|
"loss": 0.2175, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 0.5042800307273865, |
|
"eval_runtime": 1.4234, |
|
"eval_samples_per_second": 702.521, |
|
"eval_steps_per_second": 22.481, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 7.703283815480845e-05, |
|
"loss": 0.2181, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 0.5006986260414124, |
|
"eval_runtime": 1.419, |
|
"eval_samples_per_second": 704.732, |
|
"eval_steps_per_second": 22.551, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 7.654417513682565e-05, |
|
"loss": 0.2229, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_loss": 0.49812304973602295, |
|
"eval_runtime": 1.4369, |
|
"eval_samples_per_second": 695.962, |
|
"eval_steps_per_second": 22.271, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 7.605551211884286e-05, |
|
"loss": 0.2103, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 0.49662986397743225, |
|
"eval_runtime": 1.4066, |
|
"eval_samples_per_second": 710.955, |
|
"eval_steps_per_second": 22.751, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 7.556684910086005e-05, |
|
"loss": 0.2195, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 0.4949464499950409, |
|
"eval_runtime": 1.4145, |
|
"eval_samples_per_second": 706.961, |
|
"eval_steps_per_second": 22.623, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.507818608287726e-05, |
|
"loss": 0.2197, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 0.49265730381011963, |
|
"eval_runtime": 1.4441, |
|
"eval_samples_per_second": 692.484, |
|
"eval_steps_per_second": 22.159, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.458952306489444e-05, |
|
"loss": 0.2163, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_loss": 0.4933662414550781, |
|
"eval_runtime": 1.4196, |
|
"eval_samples_per_second": 704.41, |
|
"eval_steps_per_second": 22.541, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 7.410086004691166e-05, |
|
"loss": 0.2203, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 0.4912818670272827, |
|
"eval_runtime": 1.4225, |
|
"eval_samples_per_second": 703.011, |
|
"eval_steps_per_second": 22.496, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 7.361219702892886e-05, |
|
"loss": 0.2131, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 0.49019622802734375, |
|
"eval_runtime": 1.4165, |
|
"eval_samples_per_second": 705.97, |
|
"eval_steps_per_second": 22.591, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 7.312353401094606e-05, |
|
"loss": 0.2192, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_loss": 0.48748642206192017, |
|
"eval_runtime": 1.419, |
|
"eval_samples_per_second": 704.732, |
|
"eval_steps_per_second": 22.551, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.263487099296326e-05, |
|
"loss": 0.216, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 0.4867847263813019, |
|
"eval_runtime": 1.4078, |
|
"eval_samples_per_second": 710.341, |
|
"eval_steps_per_second": 22.731, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 7.214620797498047e-05, |
|
"loss": 0.2151, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_loss": 0.4847819209098816, |
|
"eval_runtime": 1.4175, |
|
"eval_samples_per_second": 705.473, |
|
"eval_steps_per_second": 22.575, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 7.165754495699765e-05, |
|
"loss": 0.2134, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_loss": 0.48309269547462463, |
|
"eval_runtime": 1.4264, |
|
"eval_samples_per_second": 701.053, |
|
"eval_steps_per_second": 22.434, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 7.116888193901487e-05, |
|
"loss": 0.215, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_loss": 0.4808345437049866, |
|
"eval_runtime": 1.4354, |
|
"eval_samples_per_second": 696.688, |
|
"eval_steps_per_second": 22.294, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 7.068021892103205e-05, |
|
"loss": 0.2149, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_loss": 0.48003917932510376, |
|
"eval_runtime": 1.4294, |
|
"eval_samples_per_second": 699.594, |
|
"eval_steps_per_second": 22.387, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 7.019155590304925e-05, |
|
"loss": 0.2081, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_loss": 0.4828941524028778, |
|
"eval_runtime": 1.4264, |
|
"eval_samples_per_second": 701.051, |
|
"eval_steps_per_second": 22.434, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 6.970289288506647e-05, |
|
"loss": 0.1851, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"eval_loss": 0.48561614751815796, |
|
"eval_runtime": 1.4155, |
|
"eval_samples_per_second": 706.463, |
|
"eval_steps_per_second": 22.607, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 6.921422986708365e-05, |
|
"loss": 0.1888, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"eval_loss": 0.48478779196739197, |
|
"eval_runtime": 1.4155, |
|
"eval_samples_per_second": 706.465, |
|
"eval_steps_per_second": 22.607, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 6.872556684910086e-05, |
|
"loss": 0.1916, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"eval_loss": 0.4795476198196411, |
|
"eval_runtime": 1.4239, |
|
"eval_samples_per_second": 702.273, |
|
"eval_steps_per_second": 22.473, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 6.823690383111806e-05, |
|
"loss": 0.1932, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_loss": 0.47898271679878235, |
|
"eval_runtime": 1.42, |
|
"eval_samples_per_second": 704.241, |
|
"eval_steps_per_second": 22.536, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 6.774824081313526e-05, |
|
"loss": 0.1882, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_loss": 0.48221901059150696, |
|
"eval_runtime": 1.4234, |
|
"eval_samples_per_second": 702.521, |
|
"eval_steps_per_second": 22.481, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 6.725957779515246e-05, |
|
"loss": 0.1845, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"eval_loss": 0.479130357503891, |
|
"eval_runtime": 1.4215, |
|
"eval_samples_per_second": 703.503, |
|
"eval_steps_per_second": 22.512, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 6.677091477716966e-05, |
|
"loss": 0.1895, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"eval_loss": 0.4773789644241333, |
|
"eval_runtime": 1.4294, |
|
"eval_samples_per_second": 699.592, |
|
"eval_steps_per_second": 22.387, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 6.628225175918686e-05, |
|
"loss": 0.1909, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_loss": 0.4763247072696686, |
|
"eval_runtime": 1.409, |
|
"eval_samples_per_second": 709.702, |
|
"eval_steps_per_second": 22.71, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 6.579358874120408e-05, |
|
"loss": 0.1841, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_loss": 0.4759540259838104, |
|
"eval_runtime": 1.421, |
|
"eval_samples_per_second": 703.747, |
|
"eval_steps_per_second": 22.52, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 6.530492572322126e-05, |
|
"loss": 0.1882, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_loss": 0.4739590585231781, |
|
"eval_runtime": 1.417, |
|
"eval_samples_per_second": 705.724, |
|
"eval_steps_per_second": 22.583, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 6.481626270523847e-05, |
|
"loss": 0.1902, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_loss": 0.47059980034828186, |
|
"eval_runtime": 1.4215, |
|
"eval_samples_per_second": 703.504, |
|
"eval_steps_per_second": 22.512, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 6.432759968725567e-05, |
|
"loss": 0.1924, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_loss": 0.46917036175727844, |
|
"eval_runtime": 1.4388, |
|
"eval_samples_per_second": 695.005, |
|
"eval_steps_per_second": 22.24, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 6.383893666927287e-05, |
|
"loss": 0.1845, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_loss": 0.46856725215911865, |
|
"eval_runtime": 1.4135, |
|
"eval_samples_per_second": 707.458, |
|
"eval_steps_per_second": 22.639, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 6.335027365129007e-05, |
|
"loss": 0.1892, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_loss": 0.46638262271881104, |
|
"eval_runtime": 1.4632, |
|
"eval_samples_per_second": 683.445, |
|
"eval_steps_per_second": 21.87, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 6.286161063330727e-05, |
|
"loss": 0.1849, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_loss": 0.46737831830978394, |
|
"eval_runtime": 1.4319, |
|
"eval_samples_per_second": 698.377, |
|
"eval_steps_per_second": 22.348, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 6.237294761532447e-05, |
|
"loss": 0.1883, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_loss": 0.46423232555389404, |
|
"eval_runtime": 1.4249, |
|
"eval_samples_per_second": 701.787, |
|
"eval_steps_per_second": 22.457, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 6.188428459734168e-05, |
|
"loss": 0.1821, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_loss": 0.4651487171649933, |
|
"eval_runtime": 1.4135, |
|
"eval_samples_per_second": 707.454, |
|
"eval_steps_per_second": 22.639, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 6.139562157935887e-05, |
|
"loss": 0.1905, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_loss": 0.462035208940506, |
|
"eval_runtime": 1.417, |
|
"eval_samples_per_second": 705.722, |
|
"eval_steps_per_second": 22.583, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 6.090695856137608e-05, |
|
"loss": 0.185, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_loss": 0.4627071022987366, |
|
"eval_runtime": 1.4179, |
|
"eval_samples_per_second": 705.261, |
|
"eval_steps_per_second": 22.568, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 6.0418295543393276e-05, |
|
"loss": 0.19, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_loss": 0.4600967466831207, |
|
"eval_runtime": 1.422, |
|
"eval_samples_per_second": 703.255, |
|
"eval_steps_per_second": 22.504, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 5.992963252541048e-05, |
|
"loss": 0.1734, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"eval_loss": 0.46835413575172424, |
|
"eval_runtime": 1.4105, |
|
"eval_samples_per_second": 708.959, |
|
"eval_steps_per_second": 22.687, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 5.944096950742768e-05, |
|
"loss": 0.1665, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_loss": 0.4674428403377533, |
|
"eval_runtime": 1.4224, |
|
"eval_samples_per_second": 703.013, |
|
"eval_steps_per_second": 22.496, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 5.895230648944489e-05, |
|
"loss": 0.1621, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"eval_loss": 0.46939995884895325, |
|
"eval_runtime": 1.418, |
|
"eval_samples_per_second": 705.227, |
|
"eval_steps_per_second": 22.567, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 5.846364347146208e-05, |
|
"loss": 0.1633, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_loss": 0.4672936499118805, |
|
"eval_runtime": 1.4229, |
|
"eval_samples_per_second": 702.769, |
|
"eval_steps_per_second": 22.489, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 5.797498045347929e-05, |
|
"loss": 0.1612, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"eval_loss": 0.4673324525356293, |
|
"eval_runtime": 1.42, |
|
"eval_samples_per_second": 704.241, |
|
"eval_steps_per_second": 22.536, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 5.7486317435496486e-05, |
|
"loss": 0.1644, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_loss": 0.4646117091178894, |
|
"eval_runtime": 1.4244, |
|
"eval_samples_per_second": 702.031, |
|
"eval_steps_per_second": 22.465, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 5.699765441751369e-05, |
|
"loss": 0.1655, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_loss": 0.46449655294418335, |
|
"eval_runtime": 1.4359, |
|
"eval_samples_per_second": 696.446, |
|
"eval_steps_per_second": 22.286, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 5.6508991399530885e-05, |
|
"loss": 0.1627, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_loss": 0.4624975025653839, |
|
"eval_runtime": 1.4244, |
|
"eval_samples_per_second": 702.032, |
|
"eval_steps_per_second": 22.465, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 5.6020328381548085e-05, |
|
"loss": 0.1675, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_loss": 0.46210145950317383, |
|
"eval_runtime": 1.4274, |
|
"eval_samples_per_second": 700.566, |
|
"eval_steps_per_second": 22.418, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 5.553166536356529e-05, |
|
"loss": 0.1648, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"eval_loss": 0.4633449614048004, |
|
"eval_runtime": 1.4487, |
|
"eval_samples_per_second": 690.281, |
|
"eval_steps_per_second": 22.089, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 5.5043002345582483e-05, |
|
"loss": 0.1691, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"eval_loss": 0.4609707295894623, |
|
"eval_runtime": 1.4403, |
|
"eval_samples_per_second": 694.283, |
|
"eval_steps_per_second": 22.217, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 5.455433932759969e-05, |
|
"loss": 0.1642, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"eval_loss": 0.46095407009124756, |
|
"eval_runtime": 1.4319, |
|
"eval_samples_per_second": 698.378, |
|
"eval_steps_per_second": 22.348, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 5.406567630961689e-05, |
|
"loss": 0.1666, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"eval_loss": 0.46066999435424805, |
|
"eval_runtime": 1.4264, |
|
"eval_samples_per_second": 701.053, |
|
"eval_steps_per_second": 22.434, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 5.3577013291634095e-05, |
|
"loss": 0.167, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"eval_loss": 0.45587822794914246, |
|
"eval_runtime": 1.4344, |
|
"eval_samples_per_second": 697.169, |
|
"eval_steps_per_second": 22.309, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 5.308835027365129e-05, |
|
"loss": 0.1691, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 0.45380640029907227, |
|
"eval_runtime": 1.4344, |
|
"eval_samples_per_second": 697.166, |
|
"eval_steps_per_second": 22.309, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 5.2599687255668494e-05, |
|
"loss": 0.1674, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"eval_loss": 0.45414891839027405, |
|
"eval_runtime": 1.4319, |
|
"eval_samples_per_second": 698.38, |
|
"eval_steps_per_second": 22.348, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 5.2111024237685694e-05, |
|
"loss": 0.1613, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_loss": 0.45553380250930786, |
|
"eval_runtime": 1.4458, |
|
"eval_samples_per_second": 691.661, |
|
"eval_steps_per_second": 22.133, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 5.16223612197029e-05, |
|
"loss": 0.1613, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"eval_loss": 0.4527079463005066, |
|
"eval_runtime": 1.4195, |
|
"eval_samples_per_second": 704.487, |
|
"eval_steps_per_second": 22.544, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 5.113369820172009e-05, |
|
"loss": 0.1639, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"eval_loss": 0.44933873414993286, |
|
"eval_runtime": 1.416, |
|
"eval_samples_per_second": 706.217, |
|
"eval_steps_per_second": 22.599, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 5.06450351837373e-05, |
|
"loss": 0.1685, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_loss": 0.44989633560180664, |
|
"eval_runtime": 1.4289, |
|
"eval_samples_per_second": 699.837, |
|
"eval_steps_per_second": 22.395, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 5.01563721657545e-05, |
|
"loss": 0.1629, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"eval_loss": 0.45488646626472473, |
|
"eval_runtime": 1.4239, |
|
"eval_samples_per_second": 702.276, |
|
"eval_steps_per_second": 22.473, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 4.9667709147771705e-05, |
|
"loss": 0.1484, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"eval_loss": 0.4580441415309906, |
|
"eval_runtime": 1.416, |
|
"eval_samples_per_second": 706.223, |
|
"eval_steps_per_second": 22.599, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 4.9179046129788904e-05, |
|
"loss": 0.1468, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"eval_loss": 0.4577222168445587, |
|
"eval_runtime": 1.4304, |
|
"eval_samples_per_second": 699.106, |
|
"eval_steps_per_second": 22.371, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 4.86903831118061e-05, |
|
"loss": 0.147, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"eval_loss": 0.4562654197216034, |
|
"eval_runtime": 1.4284, |
|
"eval_samples_per_second": 700.079, |
|
"eval_steps_per_second": 22.403, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 4.8201720093823296e-05, |
|
"loss": 0.1486, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"eval_loss": 0.4565419852733612, |
|
"eval_runtime": 1.4264, |
|
"eval_samples_per_second": 701.053, |
|
"eval_steps_per_second": 22.434, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 4.77130570758405e-05, |
|
"loss": 0.1461, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"eval_loss": 0.45437780022621155, |
|
"eval_runtime": 1.4279, |
|
"eval_samples_per_second": 700.322, |
|
"eval_steps_per_second": 22.41, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 4.72243940578577e-05, |
|
"loss": 0.1435, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"eval_loss": 0.4550324082374573, |
|
"eval_runtime": 1.4289, |
|
"eval_samples_per_second": 699.835, |
|
"eval_steps_per_second": 22.395, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 4.67357310398749e-05, |
|
"loss": 0.1463, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"eval_loss": 0.4553817808628082, |
|
"eval_runtime": 1.4304, |
|
"eval_samples_per_second": 699.106, |
|
"eval_steps_per_second": 22.371, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 4.62470680218921e-05, |
|
"loss": 0.1495, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"eval_loss": 0.45491766929626465, |
|
"eval_runtime": 1.4195, |
|
"eval_samples_per_second": 704.489, |
|
"eval_steps_per_second": 22.544, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 4.575840500390931e-05, |
|
"loss": 0.143, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"eval_loss": 0.45461103320121765, |
|
"eval_runtime": 1.4225, |
|
"eval_samples_per_second": 703.01, |
|
"eval_steps_per_second": 22.496, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 4.5269741985926506e-05, |
|
"loss": 0.1473, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"eval_loss": 0.4515800178050995, |
|
"eval_runtime": 1.4373, |
|
"eval_samples_per_second": 695.726, |
|
"eval_steps_per_second": 22.263, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 4.4781078967943706e-05, |
|
"loss": 0.1481, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"eval_loss": 0.45015862584114075, |
|
"eval_runtime": 1.4259, |
|
"eval_samples_per_second": 701.296, |
|
"eval_steps_per_second": 22.441, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 4.4292415949960905e-05, |
|
"loss": 0.1494, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"eval_loss": 0.4483198821544647, |
|
"eval_runtime": 1.4359, |
|
"eval_samples_per_second": 696.444, |
|
"eval_steps_per_second": 22.286, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 4.380375293197811e-05, |
|
"loss": 0.1413, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"eval_loss": 0.4498542249202728, |
|
"eval_runtime": 1.4473, |
|
"eval_samples_per_second": 690.949, |
|
"eval_steps_per_second": 22.11, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 4.331508991399531e-05, |
|
"loss": 0.1498, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"eval_loss": 0.447781503200531, |
|
"eval_runtime": 1.4225, |
|
"eval_samples_per_second": 703.012, |
|
"eval_steps_per_second": 22.496, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 4.282642689601251e-05, |
|
"loss": 0.146, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"eval_loss": 0.4458942115306854, |
|
"eval_runtime": 1.413, |
|
"eval_samples_per_second": 707.707, |
|
"eval_steps_per_second": 22.647, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 4.233776387802971e-05, |
|
"loss": 0.1455, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"eval_loss": 0.44468608498573303, |
|
"eval_runtime": 1.4239, |
|
"eval_samples_per_second": 702.274, |
|
"eval_steps_per_second": 22.473, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 4.1849100860046916e-05, |
|
"loss": 0.1439, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"eval_loss": 0.446806401014328, |
|
"eval_runtime": 1.4224, |
|
"eval_samples_per_second": 703.012, |
|
"eval_steps_per_second": 22.496, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 4.1360437842064116e-05, |
|
"loss": 0.1472, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"eval_loss": 0.44735094904899597, |
|
"eval_runtime": 1.4315, |
|
"eval_samples_per_second": 698.575, |
|
"eval_steps_per_second": 22.354, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 4.0871774824081315e-05, |
|
"loss": 0.1481, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_loss": 0.4440445303916931, |
|
"eval_runtime": 1.4374, |
|
"eval_samples_per_second": 695.724, |
|
"eval_steps_per_second": 22.263, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 4.0383111806098515e-05, |
|
"loss": 0.1462, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"eval_loss": 0.44478389620780945, |
|
"eval_runtime": 1.4258, |
|
"eval_samples_per_second": 701.358, |
|
"eval_steps_per_second": 22.443, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 3.989444878811572e-05, |
|
"loss": 0.1335, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_loss": 0.450600266456604, |
|
"eval_runtime": 1.4249, |
|
"eval_samples_per_second": 701.787, |
|
"eval_steps_per_second": 22.457, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 3.940578577013292e-05, |
|
"loss": 0.1279, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"eval_loss": 0.45286017656326294, |
|
"eval_runtime": 1.4175, |
|
"eval_samples_per_second": 705.475, |
|
"eval_steps_per_second": 22.575, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 3.891712275215012e-05, |
|
"loss": 0.1318, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_loss": 0.4531707763671875, |
|
"eval_runtime": 1.4314, |
|
"eval_samples_per_second": 698.621, |
|
"eval_steps_per_second": 22.356, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 3.842845973416732e-05, |
|
"loss": 0.1297, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"eval_loss": 0.4539336562156677, |
|
"eval_runtime": 1.422, |
|
"eval_samples_per_second": 703.255, |
|
"eval_steps_per_second": 22.504, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 3.7939796716184525e-05, |
|
"loss": 0.1314, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"eval_loss": 0.4507242441177368, |
|
"eval_runtime": 1.4354, |
|
"eval_samples_per_second": 696.689, |
|
"eval_steps_per_second": 22.294, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 3.7451133698201725e-05, |
|
"loss": 0.1295, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"eval_loss": 0.45251962542533875, |
|
"eval_runtime": 1.4215, |
|
"eval_samples_per_second": 703.503, |
|
"eval_steps_per_second": 22.512, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 3.6962470680218924e-05, |
|
"loss": 0.1311, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"eval_loss": 0.45232245326042175, |
|
"eval_runtime": 1.4364, |
|
"eval_samples_per_second": 696.202, |
|
"eval_steps_per_second": 22.278, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 3.6473807662236124e-05, |
|
"loss": 0.1303, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"eval_loss": 0.45104601979255676, |
|
"eval_runtime": 1.421, |
|
"eval_samples_per_second": 703.749, |
|
"eval_steps_per_second": 22.52, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 3.598514464425333e-05, |
|
"loss": 0.1289, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"eval_loss": 0.44871556758880615, |
|
"eval_runtime": 1.4274, |
|
"eval_samples_per_second": 700.563, |
|
"eval_steps_per_second": 22.418, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 3.549648162627053e-05, |
|
"loss": 0.1375, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"eval_loss": 0.4471152126789093, |
|
"eval_runtime": 1.4185, |
|
"eval_samples_per_second": 704.978, |
|
"eval_steps_per_second": 22.559, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 3.500781860828773e-05, |
|
"loss": 0.1295, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_loss": 0.44929370284080505, |
|
"eval_runtime": 1.414, |
|
"eval_samples_per_second": 707.208, |
|
"eval_steps_per_second": 22.631, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 3.451915559030492e-05, |
|
"loss": 0.1291, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"eval_loss": 0.44738081097602844, |
|
"eval_runtime": 1.4095, |
|
"eval_samples_per_second": 709.453, |
|
"eval_steps_per_second": 22.703, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 3.403049257232213e-05, |
|
"loss": 0.1297, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"eval_loss": 0.4483153820037842, |
|
"eval_runtime": 1.4314, |
|
"eval_samples_per_second": 698.622, |
|
"eval_steps_per_second": 22.356, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 3.354182955433933e-05, |
|
"loss": 0.1354, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"eval_loss": 0.4452635943889618, |
|
"eval_runtime": 1.4074, |
|
"eval_samples_per_second": 710.532, |
|
"eval_steps_per_second": 22.737, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 3.305316653635653e-05, |
|
"loss": 0.1316, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"eval_loss": 0.4459252953529358, |
|
"eval_runtime": 1.4149, |
|
"eval_samples_per_second": 706.773, |
|
"eval_steps_per_second": 22.617, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 3.2564503518373726e-05, |
|
"loss": 0.1303, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"eval_loss": 0.4454708397388458, |
|
"eval_runtime": 1.4195, |
|
"eval_samples_per_second": 704.485, |
|
"eval_steps_per_second": 22.544, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 3.207584050039093e-05, |
|
"loss": 0.1352, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"eval_loss": 0.4453655481338501, |
|
"eval_runtime": 1.4036, |
|
"eval_samples_per_second": 712.466, |
|
"eval_steps_per_second": 22.799, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 3.158717748240813e-05, |
|
"loss": 0.1278, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"eval_loss": 0.44641512632369995, |
|
"eval_runtime": 1.4284, |
|
"eval_samples_per_second": 700.077, |
|
"eval_steps_per_second": 22.402, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 3.109851446442533e-05, |
|
"loss": 0.127, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"eval_loss": 0.446814626455307, |
|
"eval_runtime": 1.4239, |
|
"eval_samples_per_second": 702.277, |
|
"eval_steps_per_second": 22.473, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 3.060985144644253e-05, |
|
"loss": 0.1337, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"eval_loss": 0.44566431641578674, |
|
"eval_runtime": 1.421, |
|
"eval_samples_per_second": 703.749, |
|
"eval_steps_per_second": 22.52, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 3.0121188428459734e-05, |
|
"loss": 0.1322, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.4453600347042084, |
|
"eval_runtime": 1.3961, |
|
"eval_samples_per_second": 716.266, |
|
"eval_steps_per_second": 22.921, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 2.9632525410476936e-05, |
|
"loss": 0.1171, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_loss": 0.45082348585128784, |
|
"eval_runtime": 1.4021, |
|
"eval_samples_per_second": 713.223, |
|
"eval_steps_per_second": 22.823, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 2.9143862392494136e-05, |
|
"loss": 0.1201, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"eval_loss": 0.45133039355278015, |
|
"eval_runtime": 1.4284, |
|
"eval_samples_per_second": 700.08, |
|
"eval_steps_per_second": 22.403, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 2.865519937451134e-05, |
|
"loss": 0.1119, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"eval_loss": 0.4528238773345947, |
|
"eval_runtime": 1.4066, |
|
"eval_samples_per_second": 710.952, |
|
"eval_steps_per_second": 22.75, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 2.8166536356528538e-05, |
|
"loss": 0.1178, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"eval_loss": 0.4517793655395508, |
|
"eval_runtime": 1.4279, |
|
"eval_samples_per_second": 700.319, |
|
"eval_steps_per_second": 22.41, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 2.767787333854574e-05, |
|
"loss": 0.1172, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"eval_loss": 0.45097029209136963, |
|
"eval_runtime": 1.4105, |
|
"eval_samples_per_second": 708.955, |
|
"eval_steps_per_second": 22.687, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 2.718921032056294e-05, |
|
"loss": 0.1229, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"eval_loss": 0.4481058418750763, |
|
"eval_runtime": 1.4051, |
|
"eval_samples_per_second": 711.694, |
|
"eval_steps_per_second": 22.774, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 2.6700547302580143e-05, |
|
"loss": 0.12, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"eval_loss": 0.4482279419898987, |
|
"eval_runtime": 1.411, |
|
"eval_samples_per_second": 708.702, |
|
"eval_steps_per_second": 22.678, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 2.6211884284597343e-05, |
|
"loss": 0.1158, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"eval_loss": 0.4505749046802521, |
|
"eval_runtime": 1.4021, |
|
"eval_samples_per_second": 713.221, |
|
"eval_steps_per_second": 22.823, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 2.5723221266614546e-05, |
|
"loss": 0.1212, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"eval_loss": 0.4481782913208008, |
|
"eval_runtime": 1.4165, |
|
"eval_samples_per_second": 705.97, |
|
"eval_steps_per_second": 22.591, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 2.5234558248631745e-05, |
|
"loss": 0.1189, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"eval_loss": 0.44906875491142273, |
|
"eval_runtime": 1.4185, |
|
"eval_samples_per_second": 704.978, |
|
"eval_steps_per_second": 22.559, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 2.4745895230648948e-05, |
|
"loss": 0.1225, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"eval_loss": 0.4473673701286316, |
|
"eval_runtime": 1.408, |
|
"eval_samples_per_second": 710.203, |
|
"eval_steps_per_second": 22.727, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 2.4257232212666147e-05, |
|
"loss": 0.1206, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"eval_loss": 0.4478332996368408, |
|
"eval_runtime": 1.4299, |
|
"eval_samples_per_second": 699.349, |
|
"eval_steps_per_second": 22.379, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 2.376856919468335e-05, |
|
"loss": 0.1205, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"eval_loss": 0.4450225234031677, |
|
"eval_runtime": 1.4139, |
|
"eval_samples_per_second": 707.288, |
|
"eval_steps_per_second": 22.633, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 2.3279906176700546e-05, |
|
"loss": 0.1237, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"eval_loss": 0.44548895955085754, |
|
"eval_runtime": 1.418, |
|
"eval_samples_per_second": 705.229, |
|
"eval_steps_per_second": 22.567, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 2.279124315871775e-05, |
|
"loss": 0.1211, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"eval_loss": 0.4440496861934662, |
|
"eval_runtime": 1.4428, |
|
"eval_samples_per_second": 693.091, |
|
"eval_steps_per_second": 22.179, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 2.230258014073495e-05, |
|
"loss": 0.1167, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"eval_loss": 0.44403979182243347, |
|
"eval_runtime": 1.415, |
|
"eval_samples_per_second": 706.709, |
|
"eval_steps_per_second": 22.615, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 2.181391712275215e-05, |
|
"loss": 0.1195, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"eval_loss": 0.44386279582977295, |
|
"eval_runtime": 1.411, |
|
"eval_samples_per_second": 708.704, |
|
"eval_steps_per_second": 22.679, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 2.132525410476935e-05, |
|
"loss": 0.1236, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"eval_loss": 0.4440469443798065, |
|
"eval_runtime": 1.413, |
|
"eval_samples_per_second": 707.702, |
|
"eval_steps_per_second": 22.646, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 2.0836591086786554e-05, |
|
"loss": 0.1196, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"eval_loss": 0.4431215822696686, |
|
"eval_runtime": 1.4071, |
|
"eval_samples_per_second": 710.704, |
|
"eval_steps_per_second": 22.743, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 2.0347928068803753e-05, |
|
"loss": 0.1154, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"eval_loss": 0.44494298100471497, |
|
"eval_runtime": 1.3991, |
|
"eval_samples_per_second": 714.74, |
|
"eval_steps_per_second": 22.872, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 1.9859265050820956e-05, |
|
"loss": 0.1146, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_loss": 0.44438567757606506, |
|
"eval_runtime": 1.4136, |
|
"eval_samples_per_second": 707.391, |
|
"eval_steps_per_second": 22.637, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 1.9370602032838155e-05, |
|
"loss": 0.111, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"eval_loss": 0.4510573744773865, |
|
"eval_runtime": 1.4011, |
|
"eval_samples_per_second": 713.727, |
|
"eval_steps_per_second": 22.839, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 1.8881939014855358e-05, |
|
"loss": 0.1107, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"eval_loss": 0.45002079010009766, |
|
"eval_runtime": 1.4195, |
|
"eval_samples_per_second": 704.487, |
|
"eval_steps_per_second": 22.544, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 1.8393275996872558e-05, |
|
"loss": 0.1069, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"eval_loss": 0.4500637352466583, |
|
"eval_runtime": 1.4026, |
|
"eval_samples_per_second": 712.972, |
|
"eval_steps_per_second": 22.815, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 1.790461297888976e-05, |
|
"loss": 0.1091, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"eval_loss": 0.450139582157135, |
|
"eval_runtime": 1.423, |
|
"eval_samples_per_second": 702.764, |
|
"eval_steps_per_second": 22.488, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 1.741594996090696e-05, |
|
"loss": 0.1107, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"eval_loss": 0.4503149390220642, |
|
"eval_runtime": 1.407, |
|
"eval_samples_per_second": 710.707, |
|
"eval_steps_per_second": 22.743, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 1.6927286942924163e-05, |
|
"loss": 0.11, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"eval_loss": 0.4499202370643616, |
|
"eval_runtime": 1.3956, |
|
"eval_samples_per_second": 716.522, |
|
"eval_steps_per_second": 22.929, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 1.6438623924941362e-05, |
|
"loss": 0.1084, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"eval_loss": 0.4492938220500946, |
|
"eval_runtime": 1.4061, |
|
"eval_samples_per_second": 711.208, |
|
"eval_steps_per_second": 22.759, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 1.5949960906958562e-05, |
|
"loss": 0.1142, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"eval_loss": 0.449333518743515, |
|
"eval_runtime": 1.417, |
|
"eval_samples_per_second": 705.721, |
|
"eval_steps_per_second": 22.583, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 1.546129788897576e-05, |
|
"loss": 0.1091, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"eval_loss": 0.4506095051765442, |
|
"eval_runtime": 1.412, |
|
"eval_samples_per_second": 708.206, |
|
"eval_steps_per_second": 22.663, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 1.4972634870992962e-05, |
|
"loss": 0.1072, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"eval_loss": 0.44918569922447205, |
|
"eval_runtime": 1.4021, |
|
"eval_samples_per_second": 713.221, |
|
"eval_steps_per_second": 22.823, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 1.4483971853010164e-05, |
|
"loss": 0.1128, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"eval_loss": 0.44889140129089355, |
|
"eval_runtime": 1.414, |
|
"eval_samples_per_second": 707.21, |
|
"eval_steps_per_second": 22.631, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 1.3995308835027365e-05, |
|
"loss": 0.1123, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"eval_loss": 0.44764241576194763, |
|
"eval_runtime": 1.4076, |
|
"eval_samples_per_second": 710.451, |
|
"eval_steps_per_second": 22.734, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 1.3506645817044566e-05, |
|
"loss": 0.1093, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"eval_loss": 0.44862595200538635, |
|
"eval_runtime": 1.3991, |
|
"eval_samples_per_second": 714.742, |
|
"eval_steps_per_second": 22.872, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 1.3017982799061767e-05, |
|
"loss": 0.1111, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"eval_loss": 0.4482482969760895, |
|
"eval_runtime": 1.3981, |
|
"eval_samples_per_second": 715.242, |
|
"eval_steps_per_second": 22.888, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 1.2529319781078968e-05, |
|
"loss": 0.1086, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"eval_loss": 0.44752514362335205, |
|
"eval_runtime": 1.4036, |
|
"eval_samples_per_second": 712.46, |
|
"eval_steps_per_second": 22.799, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 1.204065676309617e-05, |
|
"loss": 0.11, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"eval_loss": 0.44697925448417664, |
|
"eval_runtime": 1.401, |
|
"eval_samples_per_second": 713.773, |
|
"eval_steps_per_second": 22.841, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 1.155199374511337e-05, |
|
"loss": 0.1118, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"eval_loss": 0.4479255974292755, |
|
"eval_runtime": 1.4107, |
|
"eval_samples_per_second": 708.879, |
|
"eval_steps_per_second": 22.684, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 1.1063330727130572e-05, |
|
"loss": 0.1078, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"eval_loss": 0.4473107159137726, |
|
"eval_runtime": 1.4011, |
|
"eval_samples_per_second": 713.727, |
|
"eval_steps_per_second": 22.839, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 1.0574667709147771e-05, |
|
"loss": 0.1083, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"eval_loss": 0.44750386476516724, |
|
"eval_runtime": 1.4056, |
|
"eval_samples_per_second": 711.457, |
|
"eval_steps_per_second": 22.767, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 1.0086004691164972e-05, |
|
"loss": 0.1127, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.4473421573638916, |
|
"eval_runtime": 1.4175, |
|
"eval_samples_per_second": 705.475, |
|
"eval_steps_per_second": 22.575, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 9.597341673182173e-06, |
|
"loss": 0.1034, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"eval_loss": 0.4490604102611542, |
|
"eval_runtime": 1.4334, |
|
"eval_samples_per_second": 697.652, |
|
"eval_steps_per_second": 22.325, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 9.108678655199375e-06, |
|
"loss": 0.1022, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_loss": 0.45033252239227295, |
|
"eval_runtime": 1.4284, |
|
"eval_samples_per_second": 700.078, |
|
"eval_steps_per_second": 22.403, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 8.620015637216576e-06, |
|
"loss": 0.1048, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"eval_loss": 0.45117974281311035, |
|
"eval_runtime": 1.412, |
|
"eval_samples_per_second": 708.205, |
|
"eval_steps_per_second": 22.663, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 8.131352619233777e-06, |
|
"loss": 0.1018, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"eval_loss": 0.45118698477745056, |
|
"eval_runtime": 1.421, |
|
"eval_samples_per_second": 703.749, |
|
"eval_steps_per_second": 22.52, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 7.642689601250978e-06, |
|
"loss": 0.1064, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"eval_loss": 0.4503132402896881, |
|
"eval_runtime": 1.4151, |
|
"eval_samples_per_second": 706.64, |
|
"eval_steps_per_second": 22.612, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 7.154026583268178e-06, |
|
"loss": 0.1055, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"eval_loss": 0.4488275647163391, |
|
"eval_runtime": 1.417, |
|
"eval_samples_per_second": 705.722, |
|
"eval_steps_per_second": 22.583, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 6.665363565285379e-06, |
|
"loss": 0.1125, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"eval_loss": 0.44890880584716797, |
|
"eval_runtime": 1.4006, |
|
"eval_samples_per_second": 713.983, |
|
"eval_steps_per_second": 22.847, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 6.1767005473025806e-06, |
|
"loss": 0.1002, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"eval_loss": 0.44921454787254333, |
|
"eval_runtime": 1.4134, |
|
"eval_samples_per_second": 707.5, |
|
"eval_steps_per_second": 22.64, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 5.688037529319782e-06, |
|
"loss": 0.1043, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_loss": 0.4495786130428314, |
|
"eval_runtime": 1.408, |
|
"eval_samples_per_second": 710.206, |
|
"eval_steps_per_second": 22.727, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 5.199374511336982e-06, |
|
"loss": 0.102, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"eval_loss": 0.44972699880599976, |
|
"eval_runtime": 1.4141, |
|
"eval_samples_per_second": 707.139, |
|
"eval_steps_per_second": 22.628, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 4.710711493354183e-06, |
|
"loss": 0.1059, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"eval_loss": 0.44945281744003296, |
|
"eval_runtime": 1.4001, |
|
"eval_samples_per_second": 714.235, |
|
"eval_steps_per_second": 22.856, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 4.222048475371384e-06, |
|
"loss": 0.1012, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"eval_loss": 0.44921252131462097, |
|
"eval_runtime": 1.4051, |
|
"eval_samples_per_second": 711.709, |
|
"eval_steps_per_second": 22.775, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 3.733385457388585e-06, |
|
"loss": 0.1066, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"eval_loss": 0.44877171516418457, |
|
"eval_runtime": 1.42, |
|
"eval_samples_per_second": 704.241, |
|
"eval_steps_per_second": 22.536, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 3.244722439405786e-06, |
|
"loss": 0.1044, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"eval_loss": 0.4489387273788452, |
|
"eval_runtime": 1.4031, |
|
"eval_samples_per_second": 712.712, |
|
"eval_steps_per_second": 22.807, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 2.756059421422987e-06, |
|
"loss": 0.1049, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"eval_loss": 0.4488721191883087, |
|
"eval_runtime": 1.4041, |
|
"eval_samples_per_second": 712.211, |
|
"eval_steps_per_second": 22.791, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 2.2673964034401876e-06, |
|
"loss": 0.1038, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"eval_loss": 0.44900763034820557, |
|
"eval_runtime": 1.4001, |
|
"eval_samples_per_second": 714.232, |
|
"eval_steps_per_second": 22.855, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 1.7787333854573888e-06, |
|
"loss": 0.1057, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"eval_loss": 0.44878125190734863, |
|
"eval_runtime": 1.409, |
|
"eval_samples_per_second": 709.704, |
|
"eval_steps_per_second": 22.711, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 1.2900703674745897e-06, |
|
"loss": 0.1035, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"eval_loss": 0.448639452457428, |
|
"eval_runtime": 1.4105, |
|
"eval_samples_per_second": 708.955, |
|
"eval_steps_per_second": 22.687, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 8.014073494917906e-07, |
|
"loss": 0.1058, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"eval_loss": 0.4486231207847595, |
|
"eval_runtime": 1.3971, |
|
"eval_samples_per_second": 715.758, |
|
"eval_steps_per_second": 22.904, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 3.1274433150899144e-07, |
|
"loss": 0.1013, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_loss": 0.44872137904167175, |
|
"eval_runtime": 1.4115, |
|
"eval_samples_per_second": 708.454, |
|
"eval_steps_per_second": 22.671, |
|
"step": 102500 |
|
} |
|
], |
|
"max_steps": 102820, |
|
"num_train_epochs": 10, |
|
"total_flos": 6.16302255744e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|