distill-whisper-th-medium / trainer_state.json
tensorops's picture
update model
e756107
raw
history blame contribute delete
No virus
48.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.8229775327133569,
"eval_steps": 500,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5.0453611334320685e-06,
"loss": 0.2826,
"step": 25
},
{
"epoch": 0.0,
"learning_rate": 6.229195710491767e-06,
"loss": 0.2538,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 6.903829450223392e-06,
"loss": 0.2949,
"step": 75
},
{
"epoch": 0.01,
"learning_rate": 7.377725845391017e-06,
"loss": 0.3298,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 7.743343231239583e-06,
"loss": 0.2927,
"step": 125
},
{
"epoch": 0.01,
"learning_rate": 8.041073861170494e-06,
"loss": 0.3015,
"step": 150
},
{
"epoch": 0.01,
"learning_rate": 8.292222957399574e-06,
"loss": 0.2914,
"step": 175
},
{
"epoch": 0.02,
"learning_rate": 8.509413541357755e-06,
"loss": 0.3043,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 8.700744577655557e-06,
"loss": 0.3396,
"step": 225
},
{
"epoch": 0.02,
"learning_rate": 8.871723942761204e-06,
"loss": 0.2985,
"step": 250
},
{
"epoch": 0.02,
"learning_rate": 9.026267958246849e-06,
"loss": 0.2895,
"step": 275
},
{
"epoch": 0.02,
"learning_rate": 9.16726106663399e-06,
"loss": 0.2732,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 9.296889251455016e-06,
"loss": 0.3234,
"step": 325
},
{
"epoch": 0.03,
"learning_rate": 9.416848797368692e-06,
"loss": 0.3023,
"step": 350
},
{
"epoch": 0.03,
"learning_rate": 9.528482449516371e-06,
"loss": 0.3097,
"step": 375
},
{
"epoch": 0.03,
"learning_rate": 9.632871309784314e-06,
"loss": 0.3526,
"step": 400
},
{
"epoch": 0.03,
"learning_rate": 9.73089868785391e-06,
"loss": 0.3205,
"step": 425
},
{
"epoch": 0.04,
"learning_rate": 9.823295589572114e-06,
"loss": 0.3256,
"step": 450
},
{
"epoch": 0.04,
"learning_rate": 9.910673836465484e-06,
"loss": 0.3189,
"step": 475
},
{
"epoch": 0.04,
"learning_rate": 9.993550644973805e-06,
"loss": 0.2663,
"step": 500
},
{
"epoch": 0.04,
"learning_rate": 9.976842105263158e-06,
"loss": 0.2888,
"step": 525
},
{
"epoch": 0.05,
"learning_rate": 9.950526315789475e-06,
"loss": 0.2944,
"step": 550
},
{
"epoch": 0.05,
"learning_rate": 9.92421052631579e-06,
"loss": 0.3057,
"step": 575
},
{
"epoch": 0.05,
"learning_rate": 9.897894736842107e-06,
"loss": 0.2682,
"step": 600
},
{
"epoch": 0.05,
"learning_rate": 9.871578947368422e-06,
"loss": 0.3085,
"step": 625
},
{
"epoch": 0.05,
"learning_rate": 9.845263157894738e-06,
"loss": 0.2925,
"step": 650
},
{
"epoch": 0.06,
"learning_rate": 9.818947368421053e-06,
"loss": 0.2887,
"step": 675
},
{
"epoch": 0.06,
"learning_rate": 9.79263157894737e-06,
"loss": 0.3216,
"step": 700
},
{
"epoch": 0.06,
"learning_rate": 9.766315789473685e-06,
"loss": 0.2931,
"step": 725
},
{
"epoch": 0.06,
"learning_rate": 9.74e-06,
"loss": 0.2842,
"step": 750
},
{
"epoch": 0.06,
"learning_rate": 9.713684210526317e-06,
"loss": 0.3191,
"step": 775
},
{
"epoch": 0.07,
"learning_rate": 9.687368421052632e-06,
"loss": 0.2843,
"step": 800
},
{
"epoch": 0.07,
"learning_rate": 9.661052631578948e-06,
"loss": 0.2623,
"step": 825
},
{
"epoch": 0.07,
"learning_rate": 9.634736842105265e-06,
"loss": 0.2664,
"step": 850
},
{
"epoch": 0.07,
"learning_rate": 9.60842105263158e-06,
"loss": 0.2963,
"step": 875
},
{
"epoch": 0.07,
"learning_rate": 9.582105263157897e-06,
"loss": 0.2996,
"step": 900
},
{
"epoch": 0.08,
"learning_rate": 9.555789473684211e-06,
"loss": 0.304,
"step": 925
},
{
"epoch": 0.08,
"learning_rate": 9.529473684210528e-06,
"loss": 0.2876,
"step": 950
},
{
"epoch": 0.08,
"learning_rate": 9.503157894736843e-06,
"loss": 0.3041,
"step": 975
},
{
"epoch": 0.08,
"learning_rate": 9.476842105263158e-06,
"loss": 0.3216,
"step": 1000
},
{
"epoch": 0.08,
"learning_rate": 9.452631578947368e-06,
"loss": 0.298,
"step": 1025
},
{
"epoch": 0.09,
"learning_rate": 9.426315789473685e-06,
"loss": 0.3062,
"step": 1050
},
{
"epoch": 0.09,
"learning_rate": 9.4e-06,
"loss": 0.251,
"step": 1075
},
{
"epoch": 0.09,
"learning_rate": 9.373684210526316e-06,
"loss": 0.2677,
"step": 1100
},
{
"epoch": 0.09,
"learning_rate": 9.347368421052633e-06,
"loss": 0.2588,
"step": 1125
},
{
"epoch": 0.09,
"learning_rate": 9.321052631578948e-06,
"loss": 0.2658,
"step": 1150
},
{
"epoch": 0.1,
"learning_rate": 9.294736842105265e-06,
"loss": 0.2841,
"step": 1175
},
{
"epoch": 0.1,
"learning_rate": 9.26842105263158e-06,
"loss": 0.2903,
"step": 1200
},
{
"epoch": 0.1,
"learning_rate": 9.242105263157896e-06,
"loss": 0.2688,
"step": 1225
},
{
"epoch": 0.1,
"learning_rate": 9.215789473684211e-06,
"loss": 0.2839,
"step": 1250
},
{
"epoch": 0.1,
"learning_rate": 9.189473684210526e-06,
"loss": 0.2926,
"step": 1275
},
{
"epoch": 0.11,
"learning_rate": 9.163157894736843e-06,
"loss": 0.3417,
"step": 1300
},
{
"epoch": 0.11,
"learning_rate": 9.136842105263158e-06,
"loss": 0.2811,
"step": 1325
},
{
"epoch": 0.11,
"learning_rate": 9.110526315789475e-06,
"loss": 0.292,
"step": 1350
},
{
"epoch": 0.11,
"learning_rate": 9.08421052631579e-06,
"loss": 0.2721,
"step": 1375
},
{
"epoch": 0.12,
"learning_rate": 9.057894736842106e-06,
"loss": 0.2875,
"step": 1400
},
{
"epoch": 0.12,
"learning_rate": 9.031578947368423e-06,
"loss": 0.2684,
"step": 1425
},
{
"epoch": 0.12,
"learning_rate": 9.005263157894738e-06,
"loss": 0.2691,
"step": 1450
},
{
"epoch": 0.12,
"learning_rate": 8.978947368421055e-06,
"loss": 0.2733,
"step": 1475
},
{
"epoch": 0.12,
"learning_rate": 8.95263157894737e-06,
"loss": 0.2866,
"step": 1500
},
{
"epoch": 0.13,
"learning_rate": 8.926315789473685e-06,
"loss": 0.3006,
"step": 1525
},
{
"epoch": 0.13,
"learning_rate": 8.900000000000001e-06,
"loss": 0.2611,
"step": 1550
},
{
"epoch": 0.13,
"learning_rate": 8.873684210526316e-06,
"loss": 0.2726,
"step": 1575
},
{
"epoch": 0.13,
"learning_rate": 8.847368421052633e-06,
"loss": 0.2812,
"step": 1600
},
{
"epoch": 0.13,
"learning_rate": 8.821052631578948e-06,
"loss": 0.2998,
"step": 1625
},
{
"epoch": 0.14,
"learning_rate": 8.794736842105264e-06,
"loss": 0.2951,
"step": 1650
},
{
"epoch": 0.14,
"learning_rate": 8.76842105263158e-06,
"loss": 0.2693,
"step": 1675
},
{
"epoch": 0.14,
"learning_rate": 8.742105263157894e-06,
"loss": 0.2616,
"step": 1700
},
{
"epoch": 0.14,
"learning_rate": 8.715789473684211e-06,
"loss": 0.2723,
"step": 1725
},
{
"epoch": 0.14,
"learning_rate": 8.689473684210526e-06,
"loss": 0.2738,
"step": 1750
},
{
"epoch": 0.15,
"learning_rate": 8.663157894736843e-06,
"loss": 0.2789,
"step": 1775
},
{
"epoch": 0.15,
"learning_rate": 8.63684210526316e-06,
"loss": 0.2665,
"step": 1800
},
{
"epoch": 0.15,
"learning_rate": 8.610526315789474e-06,
"loss": 0.2552,
"step": 1825
},
{
"epoch": 0.15,
"learning_rate": 8.584210526315791e-06,
"loss": 0.2662,
"step": 1850
},
{
"epoch": 0.15,
"learning_rate": 8.557894736842106e-06,
"loss": 0.3013,
"step": 1875
},
{
"epoch": 0.16,
"learning_rate": 8.531578947368423e-06,
"loss": 0.2706,
"step": 1900
},
{
"epoch": 0.16,
"learning_rate": 8.505263157894738e-06,
"loss": 0.2687,
"step": 1925
},
{
"epoch": 0.16,
"learning_rate": 8.478947368421053e-06,
"loss": 0.2866,
"step": 1950
},
{
"epoch": 0.16,
"learning_rate": 8.45263157894737e-06,
"loss": 0.2674,
"step": 1975
},
{
"epoch": 0.16,
"learning_rate": 8.426315789473684e-06,
"loss": 0.2701,
"step": 2000
},
{
"epoch": 0.17,
"learning_rate": 8.402105263157896e-06,
"loss": 0.2691,
"step": 2025
},
{
"epoch": 0.17,
"learning_rate": 8.375789473684211e-06,
"loss": 0.277,
"step": 2050
},
{
"epoch": 0.17,
"learning_rate": 8.349473684210528e-06,
"loss": 0.2976,
"step": 2075
},
{
"epoch": 0.17,
"learning_rate": 8.323157894736843e-06,
"loss": 0.2672,
"step": 2100
},
{
"epoch": 0.17,
"learning_rate": 8.29684210526316e-06,
"loss": 0.2755,
"step": 2125
},
{
"epoch": 0.18,
"learning_rate": 8.270526315789474e-06,
"loss": 0.254,
"step": 2150
},
{
"epoch": 0.18,
"learning_rate": 8.244210526315791e-06,
"loss": 0.2853,
"step": 2175
},
{
"epoch": 0.18,
"learning_rate": 8.217894736842106e-06,
"loss": 0.2965,
"step": 2200
},
{
"epoch": 0.18,
"learning_rate": 8.19157894736842e-06,
"loss": 0.2498,
"step": 2225
},
{
"epoch": 0.19,
"learning_rate": 8.165263157894737e-06,
"loss": 0.2461,
"step": 2250
},
{
"epoch": 0.19,
"learning_rate": 8.138947368421052e-06,
"loss": 0.2716,
"step": 2275
},
{
"epoch": 0.19,
"learning_rate": 8.112631578947369e-06,
"loss": 0.2376,
"step": 2300
},
{
"epoch": 0.19,
"learning_rate": 8.086315789473684e-06,
"loss": 0.2556,
"step": 2325
},
{
"epoch": 0.19,
"learning_rate": 8.06e-06,
"loss": 0.2181,
"step": 2350
},
{
"epoch": 0.2,
"learning_rate": 8.033684210526317e-06,
"loss": 0.235,
"step": 2375
},
{
"epoch": 0.2,
"learning_rate": 8.007368421052632e-06,
"loss": 0.2953,
"step": 2400
},
{
"epoch": 0.2,
"learning_rate": 7.981052631578949e-06,
"loss": 0.2603,
"step": 2425
},
{
"epoch": 0.2,
"learning_rate": 7.954736842105264e-06,
"loss": 0.2882,
"step": 2450
},
{
"epoch": 0.2,
"learning_rate": 7.928421052631579e-06,
"loss": 0.2531,
"step": 2475
},
{
"epoch": 0.21,
"learning_rate": 7.902105263157896e-06,
"loss": 0.269,
"step": 2500
},
{
"epoch": 0.21,
"learning_rate": 7.87578947368421e-06,
"loss": 0.2562,
"step": 2525
},
{
"epoch": 0.21,
"learning_rate": 7.849473684210527e-06,
"loss": 0.2944,
"step": 2550
},
{
"epoch": 0.21,
"learning_rate": 7.823157894736842e-06,
"loss": 0.2774,
"step": 2575
},
{
"epoch": 0.21,
"learning_rate": 7.796842105263159e-06,
"loss": 0.2485,
"step": 2600
},
{
"epoch": 0.22,
"learning_rate": 7.770526315789474e-06,
"loss": 0.2928,
"step": 2625
},
{
"epoch": 0.22,
"learning_rate": 7.744210526315789e-06,
"loss": 0.2669,
"step": 2650
},
{
"epoch": 0.22,
"learning_rate": 7.717894736842107e-06,
"loss": 0.2786,
"step": 2675
},
{
"epoch": 0.22,
"learning_rate": 7.691578947368422e-06,
"loss": 0.254,
"step": 2700
},
{
"epoch": 0.22,
"learning_rate": 7.665263157894737e-06,
"loss": 0.2564,
"step": 2725
},
{
"epoch": 0.23,
"learning_rate": 7.638947368421054e-06,
"loss": 0.2158,
"step": 2750
},
{
"epoch": 0.23,
"learning_rate": 7.61263157894737e-06,
"loss": 0.2322,
"step": 2775
},
{
"epoch": 0.23,
"learning_rate": 7.586315789473685e-06,
"loss": 0.2353,
"step": 2800
},
{
"epoch": 0.23,
"learning_rate": 7.5600000000000005e-06,
"loss": 0.2678,
"step": 2825
},
{
"epoch": 0.23,
"learning_rate": 7.533684210526316e-06,
"loss": 0.2437,
"step": 2850
},
{
"epoch": 0.24,
"learning_rate": 7.507368421052632e-06,
"loss": 0.2687,
"step": 2875
},
{
"epoch": 0.24,
"learning_rate": 7.481052631578948e-06,
"loss": 0.2554,
"step": 2900
},
{
"epoch": 0.24,
"learning_rate": 7.454736842105264e-06,
"loss": 0.2387,
"step": 2925
},
{
"epoch": 0.24,
"learning_rate": 7.4284210526315796e-06,
"loss": 0.2348,
"step": 2950
},
{
"epoch": 0.24,
"learning_rate": 7.4021052631578945e-06,
"loss": 0.2736,
"step": 2975
},
{
"epoch": 0.25,
"learning_rate": 7.37578947368421e-06,
"loss": 0.2568,
"step": 3000
},
{
"epoch": 0.25,
"learning_rate": 7.351578947368422e-06,
"loss": 0.2451,
"step": 3025
},
{
"epoch": 0.25,
"learning_rate": 7.325263157894738e-06,
"loss": 0.2792,
"step": 3050
},
{
"epoch": 0.25,
"learning_rate": 7.298947368421053e-06,
"loss": 0.2817,
"step": 3075
},
{
"epoch": 0.26,
"learning_rate": 7.272631578947369e-06,
"loss": 0.2562,
"step": 3100
},
{
"epoch": 0.26,
"learning_rate": 7.2463157894736845e-06,
"loss": 0.2131,
"step": 3125
},
{
"epoch": 0.26,
"learning_rate": 7.22e-06,
"loss": 0.2523,
"step": 3150
},
{
"epoch": 0.26,
"learning_rate": 7.193684210526316e-06,
"loss": 0.2391,
"step": 3175
},
{
"epoch": 0.26,
"learning_rate": 7.167368421052632e-06,
"loss": 0.2337,
"step": 3200
},
{
"epoch": 0.27,
"learning_rate": 7.141052631578948e-06,
"loss": 0.2807,
"step": 3225
},
{
"epoch": 0.27,
"learning_rate": 7.1147368421052645e-06,
"loss": 0.2541,
"step": 3250
},
{
"epoch": 0.27,
"learning_rate": 7.08842105263158e-06,
"loss": 0.2365,
"step": 3275
},
{
"epoch": 0.27,
"learning_rate": 7.062105263157896e-06,
"loss": 0.2566,
"step": 3300
},
{
"epoch": 0.27,
"learning_rate": 7.035789473684211e-06,
"loss": 0.2538,
"step": 3325
},
{
"epoch": 0.28,
"learning_rate": 7.009473684210527e-06,
"loss": 0.2496,
"step": 3350
},
{
"epoch": 0.28,
"learning_rate": 6.983157894736843e-06,
"loss": 0.2757,
"step": 3375
},
{
"epoch": 0.28,
"learning_rate": 6.9568421052631585e-06,
"loss": 0.2499,
"step": 3400
},
{
"epoch": 0.28,
"learning_rate": 6.930526315789474e-06,
"loss": 0.2308,
"step": 3425
},
{
"epoch": 0.28,
"learning_rate": 6.90421052631579e-06,
"loss": 0.2389,
"step": 3450
},
{
"epoch": 0.29,
"learning_rate": 6.877894736842106e-06,
"loss": 0.2328,
"step": 3475
},
{
"epoch": 0.29,
"learning_rate": 6.851578947368421e-06,
"loss": 0.2319,
"step": 3500
},
{
"epoch": 0.29,
"learning_rate": 6.825263157894737e-06,
"loss": 0.2441,
"step": 3525
},
{
"epoch": 0.29,
"learning_rate": 6.798947368421053e-06,
"loss": 0.2423,
"step": 3550
},
{
"epoch": 0.29,
"learning_rate": 6.772631578947368e-06,
"loss": 0.2347,
"step": 3575
},
{
"epoch": 0.3,
"learning_rate": 6.746315789473685e-06,
"loss": 0.2621,
"step": 3600
},
{
"epoch": 0.3,
"learning_rate": 6.720000000000001e-06,
"loss": 0.2517,
"step": 3625
},
{
"epoch": 0.3,
"learning_rate": 6.693684210526317e-06,
"loss": 0.2801,
"step": 3650
},
{
"epoch": 0.3,
"learning_rate": 6.6673684210526325e-06,
"loss": 0.2527,
"step": 3675
},
{
"epoch": 0.3,
"learning_rate": 6.641052631578948e-06,
"loss": 0.2362,
"step": 3700
},
{
"epoch": 0.31,
"learning_rate": 6.614736842105264e-06,
"loss": 0.2583,
"step": 3725
},
{
"epoch": 0.31,
"learning_rate": 6.588421052631579e-06,
"loss": 0.2538,
"step": 3750
},
{
"epoch": 0.31,
"learning_rate": 6.562105263157895e-06,
"loss": 0.2502,
"step": 3775
},
{
"epoch": 0.31,
"learning_rate": 6.535789473684211e-06,
"loss": 0.2476,
"step": 3800
},
{
"epoch": 0.31,
"learning_rate": 6.509473684210527e-06,
"loss": 0.2307,
"step": 3825
},
{
"epoch": 0.32,
"learning_rate": 6.483157894736842e-06,
"loss": 0.2396,
"step": 3850
},
{
"epoch": 0.32,
"learning_rate": 6.456842105263158e-06,
"loss": 0.2271,
"step": 3875
},
{
"epoch": 0.32,
"learning_rate": 6.430526315789474e-06,
"loss": 0.243,
"step": 3900
},
{
"epoch": 0.32,
"learning_rate": 6.404210526315791e-06,
"loss": 0.2468,
"step": 3925
},
{
"epoch": 0.33,
"learning_rate": 6.3778947368421065e-06,
"loss": 0.2442,
"step": 3950
},
{
"epoch": 0.33,
"learning_rate": 6.351578947368422e-06,
"loss": 0.2055,
"step": 3975
},
{
"epoch": 0.33,
"learning_rate": 6.325263157894737e-06,
"loss": 0.2169,
"step": 4000
},
{
"epoch": 0.33,
"learning_rate": 6.301052631578947e-06,
"loss": 0.2578,
"step": 4025
},
{
"epoch": 0.33,
"learning_rate": 6.274736842105263e-06,
"loss": 0.232,
"step": 4050
},
{
"epoch": 0.34,
"learning_rate": 6.248421052631579e-06,
"loss": 0.2115,
"step": 4075
},
{
"epoch": 0.34,
"learning_rate": 6.222105263157895e-06,
"loss": 0.2627,
"step": 4100
},
{
"epoch": 0.34,
"learning_rate": 6.195789473684211e-06,
"loss": 0.2435,
"step": 4125
},
{
"epoch": 0.34,
"learning_rate": 6.1694736842105265e-06,
"loss": 0.2651,
"step": 4150
},
{
"epoch": 0.34,
"learning_rate": 6.143157894736843e-06,
"loss": 0.2516,
"step": 4175
},
{
"epoch": 0.35,
"learning_rate": 6.116842105263159e-06,
"loss": 0.2383,
"step": 4200
},
{
"epoch": 0.35,
"learning_rate": 6.090526315789475e-06,
"loss": 0.2199,
"step": 4225
},
{
"epoch": 0.35,
"learning_rate": 6.0642105263157906e-06,
"loss": 0.2273,
"step": 4250
},
{
"epoch": 0.35,
"learning_rate": 6.0378947368421055e-06,
"loss": 0.268,
"step": 4275
},
{
"epoch": 0.35,
"learning_rate": 6.011578947368421e-06,
"loss": 0.2738,
"step": 4300
},
{
"epoch": 0.36,
"learning_rate": 5.985263157894737e-06,
"loss": 0.2265,
"step": 4325
},
{
"epoch": 0.36,
"learning_rate": 5.9600000000000005e-06,
"loss": 0.2412,
"step": 4350
},
{
"epoch": 0.36,
"learning_rate": 5.933684210526316e-06,
"loss": 0.2274,
"step": 4375
},
{
"epoch": 0.36,
"learning_rate": 5.907368421052631e-06,
"loss": 0.2482,
"step": 4400
},
{
"epoch": 0.36,
"learning_rate": 5.881052631578947e-06,
"loss": 0.2313,
"step": 4425
},
{
"epoch": 0.37,
"learning_rate": 5.854736842105264e-06,
"loss": 0.2243,
"step": 4450
},
{
"epoch": 0.37,
"learning_rate": 5.82842105263158e-06,
"loss": 0.2475,
"step": 4475
},
{
"epoch": 0.37,
"learning_rate": 5.8021052631578954e-06,
"loss": 0.2197,
"step": 4500
},
{
"epoch": 0.37,
"learning_rate": 5.775789473684211e-06,
"loss": 0.2484,
"step": 4525
},
{
"epoch": 0.37,
"learning_rate": 5.749473684210527e-06,
"loss": 0.2398,
"step": 4550
},
{
"epoch": 0.38,
"learning_rate": 5.723157894736843e-06,
"loss": 0.2228,
"step": 4575
},
{
"epoch": 0.38,
"learning_rate": 5.696842105263159e-06,
"loss": 0.2405,
"step": 4600
},
{
"epoch": 0.38,
"learning_rate": 5.670526315789474e-06,
"loss": 0.2356,
"step": 4625
},
{
"epoch": 0.38,
"learning_rate": 5.6442105263157895e-06,
"loss": 0.2354,
"step": 4650
},
{
"epoch": 0.38,
"learning_rate": 5.617894736842105e-06,
"loss": 0.2142,
"step": 4675
},
{
"epoch": 0.39,
"learning_rate": 5.591578947368421e-06,
"loss": 0.2492,
"step": 4700
},
{
"epoch": 0.39,
"learning_rate": 5.565263157894737e-06,
"loss": 0.2386,
"step": 4725
},
{
"epoch": 0.39,
"learning_rate": 5.538947368421053e-06,
"loss": 0.2357,
"step": 4750
},
{
"epoch": 0.39,
"learning_rate": 5.512631578947369e-06,
"loss": 0.2114,
"step": 4775
},
{
"epoch": 0.4,
"learning_rate": 5.486315789473685e-06,
"loss": 0.2387,
"step": 4800
},
{
"epoch": 0.4,
"learning_rate": 5.460000000000001e-06,
"loss": 0.2135,
"step": 4825
},
{
"epoch": 0.4,
"learning_rate": 5.433684210526317e-06,
"loss": 0.271,
"step": 4850
},
{
"epoch": 0.4,
"learning_rate": 5.407368421052632e-06,
"loss": 0.2306,
"step": 4875
},
{
"epoch": 0.4,
"learning_rate": 5.381052631578948e-06,
"loss": 0.24,
"step": 4900
},
{
"epoch": 0.41,
"learning_rate": 5.3547368421052635e-06,
"loss": 0.2306,
"step": 4925
},
{
"epoch": 0.41,
"learning_rate": 5.328421052631579e-06,
"loss": 0.253,
"step": 4950
},
{
"epoch": 0.41,
"learning_rate": 5.302105263157895e-06,
"loss": 0.2143,
"step": 4975
},
{
"epoch": 0.41,
"learning_rate": 5.275789473684211e-06,
"loss": 0.2534,
"step": 5000
},
{
"epoch": 0.41,
"learning_rate": 5.249473684210527e-06,
"loss": 0.287,
"step": 5025
},
{
"epoch": 0.42,
"learning_rate": 5.223157894736843e-06,
"loss": 0.232,
"step": 5050
},
{
"epoch": 0.42,
"learning_rate": 5.196842105263158e-06,
"loss": 0.2135,
"step": 5075
},
{
"epoch": 0.42,
"learning_rate": 5.170526315789473e-06,
"loss": 0.2544,
"step": 5100
},
{
"epoch": 0.42,
"learning_rate": 5.14421052631579e-06,
"loss": 0.2916,
"step": 5125
},
{
"epoch": 0.42,
"learning_rate": 5.117894736842106e-06,
"loss": 0.2373,
"step": 5150
},
{
"epoch": 0.43,
"learning_rate": 5.091578947368422e-06,
"loss": 0.2238,
"step": 5175
},
{
"epoch": 0.43,
"learning_rate": 5.0652631578947375e-06,
"loss": 0.2522,
"step": 5200
},
{
"epoch": 0.43,
"learning_rate": 5.038947368421053e-06,
"loss": 0.2523,
"step": 5225
},
{
"epoch": 0.43,
"learning_rate": 5.012631578947369e-06,
"loss": 0.2504,
"step": 5250
},
{
"epoch": 0.43,
"learning_rate": 4.986315789473685e-06,
"loss": 0.2398,
"step": 5275
},
{
"epoch": 0.44,
"learning_rate": 4.960000000000001e-06,
"loss": 0.2351,
"step": 5300
},
{
"epoch": 0.44,
"learning_rate": 4.933684210526316e-06,
"loss": 0.2461,
"step": 5325
},
{
"epoch": 0.44,
"learning_rate": 4.907368421052632e-06,
"loss": 0.225,
"step": 5350
},
{
"epoch": 0.44,
"learning_rate": 4.881052631578948e-06,
"loss": 0.219,
"step": 5375
},
{
"epoch": 0.44,
"learning_rate": 4.854736842105264e-06,
"loss": 0.2384,
"step": 5400
},
{
"epoch": 0.45,
"learning_rate": 4.828421052631579e-06,
"loss": 0.2348,
"step": 5425
},
{
"epoch": 0.45,
"learning_rate": 4.802105263157895e-06,
"loss": 0.2215,
"step": 5450
},
{
"epoch": 0.45,
"learning_rate": 4.775789473684211e-06,
"loss": 0.2413,
"step": 5475
},
{
"epoch": 0.45,
"learning_rate": 4.7494736842105265e-06,
"loss": 0.2252,
"step": 5500
},
{
"epoch": 0.45,
"learning_rate": 4.723157894736842e-06,
"loss": 0.2549,
"step": 5525
},
{
"epoch": 0.46,
"learning_rate": 4.696842105263158e-06,
"loss": 0.2257,
"step": 5550
},
{
"epoch": 0.46,
"learning_rate": 4.670526315789474e-06,
"loss": 0.2006,
"step": 5575
},
{
"epoch": 0.46,
"learning_rate": 4.64421052631579e-06,
"loss": 0.2441,
"step": 5600
},
{
"epoch": 0.46,
"learning_rate": 4.617894736842106e-06,
"loss": 0.2728,
"step": 5625
},
{
"epoch": 0.46,
"learning_rate": 4.591578947368421e-06,
"loss": 0.2353,
"step": 5650
},
{
"epoch": 0.47,
"learning_rate": 4.565263157894737e-06,
"loss": 0.2535,
"step": 5675
},
{
"epoch": 0.47,
"learning_rate": 4.538947368421053e-06,
"loss": 0.2475,
"step": 5700
},
{
"epoch": 0.47,
"learning_rate": 4.512631578947369e-06,
"loss": 0.2029,
"step": 5725
},
{
"epoch": 0.47,
"learning_rate": 4.486315789473685e-06,
"loss": 0.2016,
"step": 5750
},
{
"epoch": 0.48,
"learning_rate": 4.4600000000000005e-06,
"loss": 0.2583,
"step": 5775
},
{
"epoch": 0.48,
"learning_rate": 4.433684210526316e-06,
"loss": 0.2271,
"step": 5800
},
{
"epoch": 0.48,
"learning_rate": 4.407368421052632e-06,
"loss": 0.2197,
"step": 5825
},
{
"epoch": 0.48,
"learning_rate": 4.381052631578948e-06,
"loss": 0.233,
"step": 5850
},
{
"epoch": 0.48,
"learning_rate": 4.354736842105263e-06,
"loss": 0.2042,
"step": 5875
},
{
"epoch": 0.49,
"learning_rate": 4.32842105263158e-06,
"loss": 0.2533,
"step": 5900
},
{
"epoch": 0.49,
"learning_rate": 4.302105263157895e-06,
"loss": 0.2425,
"step": 5925
},
{
"epoch": 0.49,
"learning_rate": 4.275789473684211e-06,
"loss": 0.241,
"step": 5950
},
{
"epoch": 0.49,
"learning_rate": 4.249473684210527e-06,
"loss": 0.2311,
"step": 5975
},
{
"epoch": 0.49,
"learning_rate": 4.223157894736842e-06,
"loss": 0.213,
"step": 6000
},
{
"epoch": 0.5,
"learning_rate": 4.196842105263158e-06,
"loss": 0.2157,
"step": 6025
},
{
"epoch": 0.5,
"learning_rate": 4.170526315789474e-06,
"loss": 0.2097,
"step": 6050
},
{
"epoch": 0.5,
"learning_rate": 4.14421052631579e-06,
"loss": 0.2059,
"step": 6075
},
{
"epoch": 0.5,
"learning_rate": 4.117894736842106e-06,
"loss": 0.2205,
"step": 6100
},
{
"epoch": 0.5,
"learning_rate": 4.091578947368421e-06,
"loss": 0.2134,
"step": 6125
},
{
"epoch": 0.51,
"learning_rate": 4.065263157894737e-06,
"loss": 0.2489,
"step": 6150
},
{
"epoch": 0.51,
"learning_rate": 4.038947368421053e-06,
"loss": 0.2418,
"step": 6175
},
{
"epoch": 0.51,
"learning_rate": 4.0126315789473686e-06,
"loss": 0.232,
"step": 6200
},
{
"epoch": 0.51,
"learning_rate": 3.986315789473684e-06,
"loss": 0.2258,
"step": 6225
},
{
"epoch": 0.51,
"learning_rate": 3.96e-06,
"loss": 0.2343,
"step": 6250
},
{
"epoch": 0.52,
"learning_rate": 3.933684210526316e-06,
"loss": 0.2288,
"step": 6275
},
{
"epoch": 0.52,
"learning_rate": 3.907368421052632e-06,
"loss": 0.2417,
"step": 6300
},
{
"epoch": 0.52,
"learning_rate": 3.881052631578948e-06,
"loss": 0.2252,
"step": 6325
},
{
"epoch": 0.52,
"learning_rate": 3.8568421052631585e-06,
"loss": 0.2039,
"step": 6350
},
{
"epoch": 0.52,
"learning_rate": 3.830526315789474e-06,
"loss": 0.2081,
"step": 6375
},
{
"epoch": 0.53,
"learning_rate": 3.8042105263157898e-06,
"loss": 0.2244,
"step": 6400
},
{
"epoch": 0.53,
"learning_rate": 3.7778947368421056e-06,
"loss": 0.2121,
"step": 6425
},
{
"epoch": 0.53,
"learning_rate": 3.751578947368421e-06,
"loss": 0.1981,
"step": 6450
},
{
"epoch": 0.53,
"learning_rate": 3.7252631578947372e-06,
"loss": 0.2209,
"step": 6475
},
{
"epoch": 0.53,
"learning_rate": 3.698947368421053e-06,
"loss": 0.1973,
"step": 6500
},
{
"epoch": 0.54,
"learning_rate": 3.672631578947369e-06,
"loss": 0.184,
"step": 6525
},
{
"epoch": 0.54,
"learning_rate": 3.6463157894736847e-06,
"loss": 0.2522,
"step": 6550
},
{
"epoch": 0.54,
"learning_rate": 3.62e-06,
"loss": 0.1923,
"step": 6575
},
{
"epoch": 0.54,
"learning_rate": 3.593684210526316e-06,
"loss": 0.2284,
"step": 6600
},
{
"epoch": 0.55,
"learning_rate": 3.567368421052632e-06,
"loss": 0.1941,
"step": 6625
},
{
"epoch": 0.55,
"learning_rate": 3.541052631578948e-06,
"loss": 0.2316,
"step": 6650
},
{
"epoch": 0.55,
"learning_rate": 3.5147368421052638e-06,
"loss": 0.2039,
"step": 6675
},
{
"epoch": 0.55,
"learning_rate": 3.488421052631579e-06,
"loss": 0.2123,
"step": 6700
},
{
"epoch": 0.55,
"learning_rate": 3.462105263157895e-06,
"loss": 0.2254,
"step": 6725
},
{
"epoch": 0.56,
"learning_rate": 3.435789473684211e-06,
"loss": 0.2041,
"step": 6750
},
{
"epoch": 0.56,
"learning_rate": 3.409473684210526e-06,
"loss": 0.1916,
"step": 6775
},
{
"epoch": 0.56,
"learning_rate": 3.3831578947368424e-06,
"loss": 0.1907,
"step": 6800
},
{
"epoch": 0.56,
"learning_rate": 3.3568421052631583e-06,
"loss": 0.2589,
"step": 6825
},
{
"epoch": 0.56,
"learning_rate": 3.330526315789474e-06,
"loss": 0.2016,
"step": 6850
},
{
"epoch": 0.57,
"learning_rate": 3.30421052631579e-06,
"loss": 0.2198,
"step": 6875
},
{
"epoch": 0.57,
"learning_rate": 3.2778947368421053e-06,
"loss": 0.1903,
"step": 6900
},
{
"epoch": 0.57,
"learning_rate": 3.251578947368421e-06,
"loss": 0.2355,
"step": 6925
},
{
"epoch": 0.57,
"learning_rate": 3.225263157894737e-06,
"loss": 0.2013,
"step": 6950
},
{
"epoch": 0.57,
"learning_rate": 3.198947368421053e-06,
"loss": 0.2083,
"step": 6975
},
{
"epoch": 0.58,
"learning_rate": 3.172631578947369e-06,
"loss": 0.2311,
"step": 7000
},
{
"epoch": 0.58,
"learning_rate": 3.1463157894736844e-06,
"loss": 0.2072,
"step": 7025
},
{
"epoch": 0.58,
"learning_rate": 3.12e-06,
"loss": 0.2302,
"step": 7050
},
{
"epoch": 0.58,
"learning_rate": 3.093684210526316e-06,
"loss": 0.2152,
"step": 7075
},
{
"epoch": 0.58,
"learning_rate": 3.067368421052632e-06,
"loss": 0.1985,
"step": 7100
},
{
"epoch": 0.59,
"learning_rate": 3.0410526315789472e-06,
"loss": 0.2066,
"step": 7125
},
{
"epoch": 0.59,
"learning_rate": 3.0147368421052635e-06,
"loss": 0.1944,
"step": 7150
},
{
"epoch": 0.59,
"learning_rate": 2.9884210526315793e-06,
"loss": 0.2183,
"step": 7175
},
{
"epoch": 0.59,
"learning_rate": 2.962105263157895e-06,
"loss": 0.1926,
"step": 7200
},
{
"epoch": 0.59,
"learning_rate": 2.935789473684211e-06,
"loss": 0.222,
"step": 7225
},
{
"epoch": 0.6,
"learning_rate": 2.9094736842105263e-06,
"loss": 0.2211,
"step": 7250
},
{
"epoch": 0.6,
"learning_rate": 2.883157894736842e-06,
"loss": 0.2201,
"step": 7275
},
{
"epoch": 0.6,
"learning_rate": 2.856842105263158e-06,
"loss": 0.2165,
"step": 7300
},
{
"epoch": 0.6,
"learning_rate": 2.830526315789474e-06,
"loss": 0.2177,
"step": 7325
},
{
"epoch": 0.6,
"learning_rate": 2.8063157894736842e-06,
"loss": 0.1929,
"step": 7350
},
{
"epoch": 0.61,
"learning_rate": 2.7800000000000005e-06,
"loss": 0.1903,
"step": 7375
},
{
"epoch": 0.61,
"learning_rate": 2.7536842105263163e-06,
"loss": 0.2097,
"step": 7400
},
{
"epoch": 0.61,
"learning_rate": 2.7273684210526317e-06,
"loss": 0.2335,
"step": 7425
},
{
"epoch": 0.61,
"learning_rate": 2.7010526315789475e-06,
"loss": 0.2315,
"step": 7450
},
{
"epoch": 0.62,
"learning_rate": 2.6747368421052633e-06,
"loss": 0.2249,
"step": 7475
},
{
"epoch": 0.62,
"learning_rate": 2.648421052631579e-06,
"loss": 0.2397,
"step": 7500
},
{
"epoch": 0.62,
"learning_rate": 2.6221052631578946e-06,
"loss": 0.2241,
"step": 7525
},
{
"epoch": 0.62,
"learning_rate": 2.595789473684211e-06,
"loss": 0.1969,
"step": 7550
},
{
"epoch": 0.62,
"learning_rate": 2.5694736842105266e-06,
"loss": 0.2475,
"step": 7575
},
{
"epoch": 0.63,
"learning_rate": 2.5431578947368424e-06,
"loss": 0.2122,
"step": 7600
},
{
"epoch": 0.63,
"learning_rate": 2.5168421052631583e-06,
"loss": 0.1849,
"step": 7625
},
{
"epoch": 0.63,
"learning_rate": 2.4905263157894736e-06,
"loss": 0.2259,
"step": 7650
},
{
"epoch": 0.63,
"learning_rate": 2.46421052631579e-06,
"loss": 0.2062,
"step": 7675
},
{
"epoch": 0.63,
"learning_rate": 2.4378947368421053e-06,
"loss": 0.1968,
"step": 7700
},
{
"epoch": 0.64,
"learning_rate": 2.411578947368421e-06,
"loss": 0.2024,
"step": 7725
},
{
"epoch": 0.64,
"learning_rate": 2.3852631578947373e-06,
"loss": 0.2169,
"step": 7750
},
{
"epoch": 0.64,
"learning_rate": 2.3589473684210527e-06,
"loss": 0.2229,
"step": 7775
},
{
"epoch": 0.64,
"learning_rate": 2.3326315789473686e-06,
"loss": 0.2013,
"step": 7800
},
{
"epoch": 0.64,
"learning_rate": 2.3063157894736844e-06,
"loss": 0.2276,
"step": 7825
},
{
"epoch": 0.65,
"learning_rate": 2.28e-06,
"loss": 0.1745,
"step": 7850
},
{
"epoch": 0.65,
"learning_rate": 2.253684210526316e-06,
"loss": 0.2252,
"step": 7875
},
{
"epoch": 0.65,
"learning_rate": 2.227368421052632e-06,
"loss": 0.2135,
"step": 7900
},
{
"epoch": 0.65,
"learning_rate": 2.2010526315789476e-06,
"loss": 0.2181,
"step": 7925
},
{
"epoch": 0.65,
"learning_rate": 2.1747368421052635e-06,
"loss": 0.21,
"step": 7950
},
{
"epoch": 0.66,
"learning_rate": 2.148421052631579e-06,
"loss": 0.2151,
"step": 7975
},
{
"epoch": 0.66,
"learning_rate": 2.1221052631578947e-06,
"loss": 0.1712,
"step": 8000
},
{
"epoch": 0.66,
"learning_rate": 2.095789473684211e-06,
"loss": 0.2024,
"step": 8025
},
{
"epoch": 0.66,
"learning_rate": 2.0694736842105263e-06,
"loss": 0.2238,
"step": 8050
},
{
"epoch": 0.66,
"learning_rate": 2.043157894736842e-06,
"loss": 0.2428,
"step": 8075
},
{
"epoch": 0.67,
"learning_rate": 2.016842105263158e-06,
"loss": 0.22,
"step": 8100
},
{
"epoch": 0.67,
"learning_rate": 1.9905263157894738e-06,
"loss": 0.2441,
"step": 8125
},
{
"epoch": 0.67,
"learning_rate": 1.9642105263157896e-06,
"loss": 0.1981,
"step": 8150
},
{
"epoch": 0.67,
"learning_rate": 1.9378947368421054e-06,
"loss": 0.2208,
"step": 8175
},
{
"epoch": 0.67,
"learning_rate": 1.9115789473684212e-06,
"loss": 0.2001,
"step": 8200
},
{
"epoch": 0.68,
"learning_rate": 1.885263157894737e-06,
"loss": 0.2143,
"step": 8225
},
{
"epoch": 0.68,
"learning_rate": 1.8589473684210527e-06,
"loss": 0.2097,
"step": 8250
},
{
"epoch": 0.68,
"learning_rate": 1.8326315789473687e-06,
"loss": 0.2023,
"step": 8275
},
{
"epoch": 0.68,
"learning_rate": 1.8063157894736843e-06,
"loss": 0.2233,
"step": 8300
},
{
"epoch": 0.69,
"learning_rate": 1.7800000000000001e-06,
"loss": 0.2349,
"step": 8325
},
{
"epoch": 0.69,
"learning_rate": 1.7557894736842108e-06,
"loss": 0.2292,
"step": 8350
},
{
"epoch": 0.69,
"learning_rate": 1.7294736842105264e-06,
"loss": 0.2374,
"step": 8375
},
{
"epoch": 0.69,
"learning_rate": 1.7031578947368422e-06,
"loss": 0.2459,
"step": 8400
},
{
"epoch": 0.69,
"learning_rate": 1.676842105263158e-06,
"loss": 0.2072,
"step": 8425
},
{
"epoch": 0.7,
"learning_rate": 1.6505263157894739e-06,
"loss": 0.2321,
"step": 8450
},
{
"epoch": 0.7,
"learning_rate": 1.6242105263157895e-06,
"loss": 0.207,
"step": 8475
},
{
"epoch": 0.7,
"learning_rate": 1.5978947368421055e-06,
"loss": 0.2005,
"step": 8500
},
{
"epoch": 0.7,
"learning_rate": 1.5715789473684213e-06,
"loss": 0.2216,
"step": 8525
},
{
"epoch": 0.7,
"learning_rate": 1.545263157894737e-06,
"loss": 0.1913,
"step": 8550
},
{
"epoch": 0.71,
"learning_rate": 1.518947368421053e-06,
"loss": 0.2017,
"step": 8575
},
{
"epoch": 0.71,
"learning_rate": 1.4926315789473686e-06,
"loss": 0.1724,
"step": 8600
},
{
"epoch": 0.71,
"learning_rate": 1.4663157894736844e-06,
"loss": 0.2071,
"step": 8625
},
{
"epoch": 0.71,
"learning_rate": 1.44e-06,
"loss": 0.2295,
"step": 8650
},
{
"epoch": 0.71,
"learning_rate": 1.413684210526316e-06,
"loss": 0.2117,
"step": 8675
},
{
"epoch": 0.72,
"learning_rate": 1.3873684210526316e-06,
"loss": 0.2116,
"step": 8700
},
{
"epoch": 0.72,
"learning_rate": 1.3610526315789474e-06,
"loss": 0.2154,
"step": 8725
},
{
"epoch": 0.72,
"learning_rate": 1.3347368421052635e-06,
"loss": 0.2274,
"step": 8750
},
{
"epoch": 0.72,
"learning_rate": 1.308421052631579e-06,
"loss": 0.1889,
"step": 8775
},
{
"epoch": 0.72,
"learning_rate": 1.2821052631578949e-06,
"loss": 0.1956,
"step": 8800
},
{
"epoch": 0.73,
"learning_rate": 1.2557894736842105e-06,
"loss": 0.2036,
"step": 8825
},
{
"epoch": 0.73,
"learning_rate": 1.2294736842105263e-06,
"loss": 0.2432,
"step": 8850
},
{
"epoch": 0.73,
"learning_rate": 1.2031578947368421e-06,
"loss": 0.2063,
"step": 8875
},
{
"epoch": 0.73,
"learning_rate": 1.176842105263158e-06,
"loss": 0.1829,
"step": 8900
},
{
"epoch": 0.73,
"learning_rate": 1.1505263157894738e-06,
"loss": 0.214,
"step": 8925
},
{
"epoch": 0.74,
"learning_rate": 1.1242105263157896e-06,
"loss": 0.2065,
"step": 8950
},
{
"epoch": 0.74,
"learning_rate": 1.0978947368421052e-06,
"loss": 0.1974,
"step": 8975
},
{
"epoch": 0.74,
"learning_rate": 1.0715789473684212e-06,
"loss": 0.2128,
"step": 9000
},
{
"epoch": 0.74,
"learning_rate": 1.045263157894737e-06,
"loss": 0.2462,
"step": 9025
},
{
"epoch": 0.74,
"learning_rate": 1.0189473684210527e-06,
"loss": 0.188,
"step": 9050
},
{
"epoch": 0.75,
"learning_rate": 9.936842105263158e-07,
"loss": 0.2401,
"step": 9075
},
{
"epoch": 0.75,
"learning_rate": 9.673684210526316e-07,
"loss": 0.1751,
"step": 9100
},
{
"epoch": 0.75,
"learning_rate": 9.410526315789474e-07,
"loss": 0.2274,
"step": 9125
},
{
"epoch": 0.75,
"learning_rate": 9.147368421052632e-07,
"loss": 0.205,
"step": 9150
},
{
"epoch": 0.76,
"learning_rate": 8.88421052631579e-07,
"loss": 0.2005,
"step": 9175
},
{
"epoch": 0.76,
"learning_rate": 8.621052631578948e-07,
"loss": 0.2095,
"step": 9200
},
{
"epoch": 0.76,
"learning_rate": 8.357894736842106e-07,
"loss": 0.2118,
"step": 9225
},
{
"epoch": 0.76,
"learning_rate": 8.094736842105263e-07,
"loss": 0.2223,
"step": 9250
},
{
"epoch": 0.76,
"learning_rate": 7.831578947368422e-07,
"loss": 0.1951,
"step": 9275
},
{
"epoch": 0.77,
"learning_rate": 7.56842105263158e-07,
"loss": 0.2063,
"step": 9300
},
{
"epoch": 0.77,
"learning_rate": 7.305263157894738e-07,
"loss": 0.2202,
"step": 9325
},
{
"epoch": 0.77,
"learning_rate": 7.042105263157896e-07,
"loss": 0.2056,
"step": 9350
},
{
"epoch": 0.77,
"learning_rate": 6.778947368421053e-07,
"loss": 0.2485,
"step": 9375
},
{
"epoch": 0.77,
"learning_rate": 6.515789473684211e-07,
"loss": 0.2224,
"step": 9400
},
{
"epoch": 0.78,
"learning_rate": 6.252631578947368e-07,
"loss": 0.2212,
"step": 9425
},
{
"epoch": 0.78,
"learning_rate": 5.989473684210526e-07,
"loss": 0.2204,
"step": 9450
},
{
"epoch": 0.78,
"learning_rate": 5.726315789473685e-07,
"loss": 0.2251,
"step": 9475
},
{
"epoch": 0.78,
"learning_rate": 5.463157894736843e-07,
"loss": 0.1869,
"step": 9500
},
{
"epoch": 0.78,
"learning_rate": 5.2e-07,
"loss": 0.1992,
"step": 9525
},
{
"epoch": 0.79,
"learning_rate": 4.936842105263158e-07,
"loss": 0.2253,
"step": 9550
},
{
"epoch": 0.79,
"learning_rate": 4.6736842105263163e-07,
"loss": 0.1934,
"step": 9575
},
{
"epoch": 0.79,
"learning_rate": 4.410526315789474e-07,
"loss": 0.2083,
"step": 9600
},
{
"epoch": 0.79,
"learning_rate": 4.1473684210526317e-07,
"loss": 0.1822,
"step": 9625
},
{
"epoch": 0.79,
"learning_rate": 3.88421052631579e-07,
"loss": 0.2022,
"step": 9650
},
{
"epoch": 0.8,
"learning_rate": 3.6210526315789475e-07,
"loss": 0.215,
"step": 9675
},
{
"epoch": 0.8,
"learning_rate": 3.3578947368421057e-07,
"loss": 0.2159,
"step": 9700
},
{
"epoch": 0.8,
"learning_rate": 3.0947368421052633e-07,
"loss": 0.2243,
"step": 9725
},
{
"epoch": 0.8,
"learning_rate": 2.8315789473684215e-07,
"loss": 0.2008,
"step": 9750
},
{
"epoch": 0.8,
"learning_rate": 2.568421052631579e-07,
"loss": 0.2249,
"step": 9775
},
{
"epoch": 0.81,
"learning_rate": 2.305263157894737e-07,
"loss": 0.2154,
"step": 9800
},
{
"epoch": 0.81,
"learning_rate": 2.042105263157895e-07,
"loss": 0.2355,
"step": 9825
},
{
"epoch": 0.81,
"learning_rate": 1.7789473684210527e-07,
"loss": 0.2024,
"step": 9850
},
{
"epoch": 0.81,
"learning_rate": 1.5157894736842106e-07,
"loss": 0.1999,
"step": 9875
},
{
"epoch": 0.81,
"learning_rate": 1.2526315789473685e-07,
"loss": 0.2139,
"step": 9900
},
{
"epoch": 0.82,
"learning_rate": 9.894736842105264e-08,
"loss": 0.1962,
"step": 9925
},
{
"epoch": 0.82,
"learning_rate": 7.263157894736842e-08,
"loss": 0.1945,
"step": 9950
},
{
"epoch": 0.82,
"learning_rate": 4.631578947368422e-08,
"loss": 0.2174,
"step": 9975
},
{
"epoch": 0.82,
"learning_rate": 2e-08,
"loss": 0.2085,
"step": 10000
},
{
"epoch": 0.82,
"step": 10000,
"total_flos": 8.590715932450488e+19,
"train_loss": 0.241443017578125,
"train_runtime": 25061.6522,
"train_samples_per_second": 6.384,
"train_steps_per_second": 0.399
}
],
"logging_steps": 25,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 2000,
"total_flos": 8.590715932450488e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}