whisper-th-small / trainer_state.json
tensorops's picture
add model
1706e32
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.8229775327133569,
"eval_steps": 500,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.898977360288234e-06,
"loss": 0.7672,
"step": 25
},
{
"epoch": 0.0,
"learning_rate": 6.160712527409633e-06,
"loss": 0.6426,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 6.85912902234906e-06,
"loss": 0.6038,
"step": 75
},
{
"epoch": 0.01,
"learning_rate": 7.344547104469332e-06,
"loss": 0.6007,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 7.716963756434345e-06,
"loss": 0.5597,
"step": 125
},
{
"epoch": 0.01,
"learning_rate": 8.019180844200955e-06,
"loss": 0.5361,
"step": 150
},
{
"epoch": 0.01,
"learning_rate": 8.27351214279797e-06,
"loss": 0.52,
"step": 175
},
{
"epoch": 0.02,
"learning_rate": 8.49307723936858e-06,
"loss": 0.5044,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 8.686247975778677e-06,
"loss": 0.5196,
"step": 225
},
{
"epoch": 0.02,
"learning_rate": 8.858694625217149e-06,
"loss": 0.4631,
"step": 250
},
{
"epoch": 0.02,
"learning_rate": 9.014436199608479e-06,
"loss": 0.4645,
"step": 275
},
{
"epoch": 0.02,
"learning_rate": 9.156425255148058e-06,
"loss": 0.4442,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 9.28689473531776e-06,
"loss": 0.4742,
"step": 325
},
{
"epoch": 0.03,
"learning_rate": 9.407574351377137e-06,
"loss": 0.4525,
"step": 350
},
{
"epoch": 0.03,
"learning_rate": 9.519831289296397e-06,
"loss": 0.4513,
"step": 375
},
{
"epoch": 0.03,
"learning_rate": 9.624764935335318e-06,
"loss": 0.4655,
"step": 400
},
{
"epoch": 0.03,
"learning_rate": 9.723272550712454e-06,
"loss": 0.4471,
"step": 425
},
{
"epoch": 0.04,
"learning_rate": 9.816095971633122e-06,
"loss": 0.4505,
"step": 450
},
{
"epoch": 0.04,
"learning_rate": 9.90385555539545e-06,
"loss": 0.4212,
"step": 475
},
{
"epoch": 0.04,
"learning_rate": 9.987075336738768e-06,
"loss": 0.3854,
"step": 500
},
{
"epoch": 0.04,
"learning_rate": 9.978947368421053e-06,
"loss": 0.4108,
"step": 525
},
{
"epoch": 0.05,
"learning_rate": 9.95263157894737e-06,
"loss": 0.4051,
"step": 550
},
{
"epoch": 0.05,
"learning_rate": 9.926315789473685e-06,
"loss": 0.4071,
"step": 575
},
{
"epoch": 0.05,
"learning_rate": 9.9e-06,
"loss": 0.3635,
"step": 600
},
{
"epoch": 0.05,
"learning_rate": 9.873684210526317e-06,
"loss": 0.4016,
"step": 625
},
{
"epoch": 0.05,
"learning_rate": 9.847368421052632e-06,
"loss": 0.3839,
"step": 650
},
{
"epoch": 0.06,
"learning_rate": 9.821052631578948e-06,
"loss": 0.3805,
"step": 675
},
{
"epoch": 0.06,
"learning_rate": 9.794736842105263e-06,
"loss": 0.3978,
"step": 700
},
{
"epoch": 0.06,
"learning_rate": 9.76842105263158e-06,
"loss": 0.3772,
"step": 725
},
{
"epoch": 0.06,
"learning_rate": 9.742105263157897e-06,
"loss": 0.3692,
"step": 750
},
{
"epoch": 0.06,
"learning_rate": 9.715789473684212e-06,
"loss": 0.4012,
"step": 775
},
{
"epoch": 0.07,
"learning_rate": 9.689473684210527e-06,
"loss": 0.3688,
"step": 800
},
{
"epoch": 0.07,
"learning_rate": 9.663157894736843e-06,
"loss": 0.339,
"step": 825
},
{
"epoch": 0.07,
"learning_rate": 9.636842105263158e-06,
"loss": 0.3346,
"step": 850
},
{
"epoch": 0.07,
"learning_rate": 9.610526315789475e-06,
"loss": 0.3649,
"step": 875
},
{
"epoch": 0.07,
"learning_rate": 9.58421052631579e-06,
"loss": 0.3608,
"step": 900
},
{
"epoch": 0.08,
"learning_rate": 9.557894736842107e-06,
"loss": 0.3659,
"step": 925
},
{
"epoch": 0.08,
"learning_rate": 9.531578947368422e-06,
"loss": 0.3482,
"step": 950
},
{
"epoch": 0.08,
"learning_rate": 9.505263157894738e-06,
"loss": 0.3649,
"step": 975
},
{
"epoch": 0.08,
"learning_rate": 9.478947368421053e-06,
"loss": 0.3701,
"step": 1000
},
{
"epoch": 0.08,
"learning_rate": 9.452631578947368e-06,
"loss": 0.3517,
"step": 1025
},
{
"epoch": 0.09,
"learning_rate": 9.426315789473685e-06,
"loss": 0.356,
"step": 1050
},
{
"epoch": 0.09,
"learning_rate": 9.4e-06,
"loss": 0.3206,
"step": 1075
},
{
"epoch": 0.09,
"learning_rate": 9.373684210526316e-06,
"loss": 0.3179,
"step": 1100
},
{
"epoch": 0.09,
"learning_rate": 9.347368421052633e-06,
"loss": 0.315,
"step": 1125
},
{
"epoch": 0.09,
"learning_rate": 9.321052631578948e-06,
"loss": 0.3161,
"step": 1150
},
{
"epoch": 0.1,
"learning_rate": 9.294736842105265e-06,
"loss": 0.3327,
"step": 1175
},
{
"epoch": 0.1,
"learning_rate": 9.26842105263158e-06,
"loss": 0.3365,
"step": 1200
},
{
"epoch": 0.1,
"learning_rate": 9.242105263157896e-06,
"loss": 0.3199,
"step": 1225
},
{
"epoch": 0.1,
"learning_rate": 9.215789473684211e-06,
"loss": 0.335,
"step": 1250
},
{
"epoch": 0.1,
"learning_rate": 9.189473684210526e-06,
"loss": 0.3206,
"step": 1275
},
{
"epoch": 0.11,
"learning_rate": 9.163157894736843e-06,
"loss": 0.3714,
"step": 1300
},
{
"epoch": 0.11,
"learning_rate": 9.136842105263158e-06,
"loss": 0.3191,
"step": 1325
},
{
"epoch": 0.11,
"learning_rate": 9.110526315789475e-06,
"loss": 0.338,
"step": 1350
},
{
"epoch": 0.11,
"learning_rate": 9.08421052631579e-06,
"loss": 0.3158,
"step": 1375
},
{
"epoch": 0.12,
"learning_rate": 9.057894736842106e-06,
"loss": 0.33,
"step": 1400
},
{
"epoch": 0.12,
"learning_rate": 9.031578947368423e-06,
"loss": 0.3031,
"step": 1425
},
{
"epoch": 0.12,
"learning_rate": 9.005263157894738e-06,
"loss": 0.3086,
"step": 1450
},
{
"epoch": 0.12,
"learning_rate": 8.978947368421055e-06,
"loss": 0.3125,
"step": 1475
},
{
"epoch": 0.12,
"learning_rate": 8.95263157894737e-06,
"loss": 0.3229,
"step": 1500
},
{
"epoch": 0.13,
"learning_rate": 8.926315789473685e-06,
"loss": 0.3319,
"step": 1525
},
{
"epoch": 0.13,
"learning_rate": 8.900000000000001e-06,
"loss": 0.3013,
"step": 1550
},
{
"epoch": 0.13,
"learning_rate": 8.873684210526316e-06,
"loss": 0.3065,
"step": 1575
},
{
"epoch": 0.13,
"learning_rate": 8.847368421052633e-06,
"loss": 0.3196,
"step": 1600
},
{
"epoch": 0.13,
"learning_rate": 8.821052631578948e-06,
"loss": 0.3177,
"step": 1625
},
{
"epoch": 0.14,
"learning_rate": 8.794736842105264e-06,
"loss": 0.3362,
"step": 1650
},
{
"epoch": 0.14,
"learning_rate": 8.76842105263158e-06,
"loss": 0.2915,
"step": 1675
},
{
"epoch": 0.14,
"learning_rate": 8.742105263157894e-06,
"loss": 0.2931,
"step": 1700
},
{
"epoch": 0.14,
"learning_rate": 8.715789473684211e-06,
"loss": 0.3116,
"step": 1725
},
{
"epoch": 0.14,
"learning_rate": 8.689473684210526e-06,
"loss": 0.301,
"step": 1750
},
{
"epoch": 0.15,
"learning_rate": 8.663157894736843e-06,
"loss": 0.304,
"step": 1775
},
{
"epoch": 0.15,
"learning_rate": 8.63684210526316e-06,
"loss": 0.2975,
"step": 1800
},
{
"epoch": 0.15,
"learning_rate": 8.610526315789474e-06,
"loss": 0.2888,
"step": 1825
},
{
"epoch": 0.15,
"learning_rate": 8.584210526315791e-06,
"loss": 0.2922,
"step": 1850
},
{
"epoch": 0.15,
"learning_rate": 8.557894736842106e-06,
"loss": 0.3138,
"step": 1875
},
{
"epoch": 0.16,
"learning_rate": 8.531578947368423e-06,
"loss": 0.2941,
"step": 1900
},
{
"epoch": 0.16,
"learning_rate": 8.505263157894738e-06,
"loss": 0.291,
"step": 1925
},
{
"epoch": 0.16,
"learning_rate": 8.478947368421053e-06,
"loss": 0.3047,
"step": 1950
},
{
"epoch": 0.16,
"learning_rate": 8.45263157894737e-06,
"loss": 0.3012,
"step": 1975
},
{
"epoch": 0.16,
"learning_rate": 8.426315789473684e-06,
"loss": 0.2927,
"step": 2000
},
{
"epoch": 0.17,
"learning_rate": 8.400000000000001e-06,
"loss": 0.2959,
"step": 2025
},
{
"epoch": 0.17,
"learning_rate": 8.373684210526316e-06,
"loss": 0.2956,
"step": 2050
},
{
"epoch": 0.17,
"learning_rate": 8.347368421052633e-06,
"loss": 0.3106,
"step": 2075
},
{
"epoch": 0.17,
"learning_rate": 8.32105263157895e-06,
"loss": 0.2887,
"step": 2100
},
{
"epoch": 0.17,
"learning_rate": 8.294736842105264e-06,
"loss": 0.2964,
"step": 2125
},
{
"epoch": 0.18,
"learning_rate": 8.26842105263158e-06,
"loss": 0.2827,
"step": 2150
},
{
"epoch": 0.18,
"learning_rate": 8.242105263157896e-06,
"loss": 0.2966,
"step": 2175
},
{
"epoch": 0.18,
"learning_rate": 8.21578947368421e-06,
"loss": 0.3052,
"step": 2200
},
{
"epoch": 0.18,
"learning_rate": 8.189473684210527e-06,
"loss": 0.2726,
"step": 2225
},
{
"epoch": 0.19,
"learning_rate": 8.163157894736842e-06,
"loss": 0.2703,
"step": 2250
},
{
"epoch": 0.19,
"learning_rate": 8.136842105263159e-06,
"loss": 0.2868,
"step": 2275
},
{
"epoch": 0.19,
"learning_rate": 8.110526315789474e-06,
"loss": 0.2641,
"step": 2300
},
{
"epoch": 0.19,
"learning_rate": 8.08421052631579e-06,
"loss": 0.2668,
"step": 2325
},
{
"epoch": 0.19,
"learning_rate": 8.057894736842106e-06,
"loss": 0.2481,
"step": 2350
},
{
"epoch": 0.2,
"learning_rate": 8.03157894736842e-06,
"loss": 0.26,
"step": 2375
},
{
"epoch": 0.2,
"learning_rate": 8.005263157894737e-06,
"loss": 0.3047,
"step": 2400
},
{
"epoch": 0.2,
"learning_rate": 7.978947368421052e-06,
"loss": 0.2758,
"step": 2425
},
{
"epoch": 0.2,
"learning_rate": 7.952631578947369e-06,
"loss": 0.2962,
"step": 2450
},
{
"epoch": 0.2,
"learning_rate": 7.926315789473686e-06,
"loss": 0.2682,
"step": 2475
},
{
"epoch": 0.21,
"learning_rate": 7.9e-06,
"loss": 0.2833,
"step": 2500
},
{
"epoch": 0.21,
"learning_rate": 7.873684210526317e-06,
"loss": 0.2741,
"step": 2525
},
{
"epoch": 0.21,
"learning_rate": 7.847368421052632e-06,
"loss": 0.3021,
"step": 2550
},
{
"epoch": 0.21,
"learning_rate": 7.821052631578949e-06,
"loss": 0.2806,
"step": 2575
},
{
"epoch": 0.21,
"learning_rate": 7.794736842105264e-06,
"loss": 0.2592,
"step": 2600
},
{
"epoch": 0.22,
"learning_rate": 7.768421052631579e-06,
"loss": 0.2953,
"step": 2625
},
{
"epoch": 0.22,
"learning_rate": 7.742105263157896e-06,
"loss": 0.2778,
"step": 2650
},
{
"epoch": 0.22,
"learning_rate": 7.71578947368421e-06,
"loss": 0.2854,
"step": 2675
},
{
"epoch": 0.22,
"learning_rate": 7.689473684210527e-06,
"loss": 0.2699,
"step": 2700
},
{
"epoch": 0.22,
"learning_rate": 7.663157894736842e-06,
"loss": 0.2754,
"step": 2725
},
{
"epoch": 0.23,
"learning_rate": 7.636842105263159e-06,
"loss": 0.2363,
"step": 2750
},
{
"epoch": 0.23,
"learning_rate": 7.610526315789474e-06,
"loss": 0.2489,
"step": 2775
},
{
"epoch": 0.23,
"learning_rate": 7.58421052631579e-06,
"loss": 0.2553,
"step": 2800
},
{
"epoch": 0.23,
"learning_rate": 7.557894736842106e-06,
"loss": 0.2782,
"step": 2825
},
{
"epoch": 0.23,
"learning_rate": 7.531578947368422e-06,
"loss": 0.2552,
"step": 2850
},
{
"epoch": 0.24,
"learning_rate": 7.505263157894738e-06,
"loss": 0.2726,
"step": 2875
},
{
"epoch": 0.24,
"learning_rate": 7.478947368421054e-06,
"loss": 0.2672,
"step": 2900
},
{
"epoch": 0.24,
"learning_rate": 7.4526315789473695e-06,
"loss": 0.2604,
"step": 2925
},
{
"epoch": 0.24,
"learning_rate": 7.4263157894736845e-06,
"loss": 0.2552,
"step": 2950
},
{
"epoch": 0.24,
"learning_rate": 7.4e-06,
"loss": 0.2747,
"step": 2975
},
{
"epoch": 0.25,
"learning_rate": 7.373684210526316e-06,
"loss": 0.2645,
"step": 3000
},
{
"epoch": 0.25,
"learning_rate": 7.349473684210527e-06,
"loss": 0.2602,
"step": 3025
},
{
"epoch": 0.25,
"learning_rate": 7.323157894736843e-06,
"loss": 0.2799,
"step": 3050
},
{
"epoch": 0.25,
"learning_rate": 7.296842105263159e-06,
"loss": 0.2854,
"step": 3075
},
{
"epoch": 0.26,
"learning_rate": 7.2705263157894745e-06,
"loss": 0.263,
"step": 3100
},
{
"epoch": 0.26,
"learning_rate": 7.24421052631579e-06,
"loss": 0.2339,
"step": 3125
},
{
"epoch": 0.26,
"learning_rate": 7.217894736842106e-06,
"loss": 0.2597,
"step": 3150
},
{
"epoch": 0.26,
"learning_rate": 7.191578947368422e-06,
"loss": 0.2518,
"step": 3175
},
{
"epoch": 0.26,
"learning_rate": 7.165263157894738e-06,
"loss": 0.2513,
"step": 3200
},
{
"epoch": 0.27,
"learning_rate": 7.138947368421053e-06,
"loss": 0.2841,
"step": 3225
},
{
"epoch": 0.27,
"learning_rate": 7.1126315789473685e-06,
"loss": 0.2603,
"step": 3250
},
{
"epoch": 0.27,
"learning_rate": 7.086315789473684e-06,
"loss": 0.2507,
"step": 3275
},
{
"epoch": 0.27,
"learning_rate": 7.06e-06,
"loss": 0.2664,
"step": 3300
},
{
"epoch": 0.27,
"learning_rate": 7.033684210526316e-06,
"loss": 0.2569,
"step": 3325
},
{
"epoch": 0.28,
"learning_rate": 7.007368421052632e-06,
"loss": 0.2557,
"step": 3350
},
{
"epoch": 0.28,
"learning_rate": 6.9810526315789485e-06,
"loss": 0.2793,
"step": 3375
},
{
"epoch": 0.28,
"learning_rate": 6.954736842105264e-06,
"loss": 0.2613,
"step": 3400
},
{
"epoch": 0.28,
"learning_rate": 6.92842105263158e-06,
"loss": 0.2445,
"step": 3425
},
{
"epoch": 0.28,
"learning_rate": 6.902105263157896e-06,
"loss": 0.246,
"step": 3450
},
{
"epoch": 0.29,
"learning_rate": 6.875789473684211e-06,
"loss": 0.2433,
"step": 3475
},
{
"epoch": 0.29,
"learning_rate": 6.849473684210527e-06,
"loss": 0.2414,
"step": 3500
},
{
"epoch": 0.29,
"learning_rate": 6.8231578947368425e-06,
"loss": 0.2517,
"step": 3525
},
{
"epoch": 0.29,
"learning_rate": 6.796842105263158e-06,
"loss": 0.2485,
"step": 3550
},
{
"epoch": 0.29,
"learning_rate": 6.770526315789474e-06,
"loss": 0.2435,
"step": 3575
},
{
"epoch": 0.3,
"learning_rate": 6.74421052631579e-06,
"loss": 0.2639,
"step": 3600
},
{
"epoch": 0.3,
"learning_rate": 6.717894736842106e-06,
"loss": 0.2576,
"step": 3625
},
{
"epoch": 0.3,
"learning_rate": 6.691578947368421e-06,
"loss": 0.2748,
"step": 3650
},
{
"epoch": 0.3,
"learning_rate": 6.665263157894737e-06,
"loss": 0.2598,
"step": 3675
},
{
"epoch": 0.3,
"learning_rate": 6.638947368421054e-06,
"loss": 0.2483,
"step": 3700
},
{
"epoch": 0.31,
"learning_rate": 6.612631578947369e-06,
"loss": 0.2594,
"step": 3725
},
{
"epoch": 0.31,
"learning_rate": 6.586315789473685e-06,
"loss": 0.2531,
"step": 3750
},
{
"epoch": 0.31,
"learning_rate": 6.560000000000001e-06,
"loss": 0.2561,
"step": 3775
},
{
"epoch": 0.31,
"learning_rate": 6.5336842105263165e-06,
"loss": 0.2556,
"step": 3800
},
{
"epoch": 0.31,
"learning_rate": 6.507368421052632e-06,
"loss": 0.2339,
"step": 3825
},
{
"epoch": 0.32,
"learning_rate": 6.481052631578948e-06,
"loss": 0.2439,
"step": 3850
},
{
"epoch": 0.32,
"learning_rate": 6.454736842105264e-06,
"loss": 0.2423,
"step": 3875
},
{
"epoch": 0.32,
"learning_rate": 6.428421052631579e-06,
"loss": 0.2478,
"step": 3900
},
{
"epoch": 0.32,
"learning_rate": 6.402105263157895e-06,
"loss": 0.2521,
"step": 3925
},
{
"epoch": 0.33,
"learning_rate": 6.375789473684211e-06,
"loss": 0.253,
"step": 3950
},
{
"epoch": 0.33,
"learning_rate": 6.349473684210526e-06,
"loss": 0.2156,
"step": 3975
},
{
"epoch": 0.33,
"learning_rate": 6.323157894736842e-06,
"loss": 0.2237,
"step": 4000
},
{
"epoch": 0.33,
"learning_rate": 6.298947368421053e-06,
"loss": 0.2585,
"step": 4025
},
{
"epoch": 0.33,
"learning_rate": 6.272631578947369e-06,
"loss": 0.234,
"step": 4050
},
{
"epoch": 0.34,
"learning_rate": 6.246315789473685e-06,
"loss": 0.2213,
"step": 4075
},
{
"epoch": 0.34,
"learning_rate": 6.220000000000001e-06,
"loss": 0.2575,
"step": 4100
},
{
"epoch": 0.34,
"learning_rate": 6.193684210526316e-06,
"loss": 0.2551,
"step": 4125
},
{
"epoch": 0.34,
"learning_rate": 6.167368421052632e-06,
"loss": 0.2657,
"step": 4150
},
{
"epoch": 0.34,
"learning_rate": 6.141052631578947e-06,
"loss": 0.2549,
"step": 4175
},
{
"epoch": 0.35,
"learning_rate": 6.114736842105263e-06,
"loss": 0.2401,
"step": 4200
},
{
"epoch": 0.35,
"learning_rate": 6.088421052631579e-06,
"loss": 0.2324,
"step": 4225
},
{
"epoch": 0.35,
"learning_rate": 6.062105263157895e-06,
"loss": 0.2244,
"step": 4250
},
{
"epoch": 0.35,
"learning_rate": 6.035789473684211e-06,
"loss": 0.261,
"step": 4275
},
{
"epoch": 0.35,
"learning_rate": 6.009473684210527e-06,
"loss": 0.2734,
"step": 4300
},
{
"epoch": 0.36,
"learning_rate": 5.983157894736843e-06,
"loss": 0.234,
"step": 4325
},
{
"epoch": 0.36,
"learning_rate": 5.956842105263159e-06,
"loss": 0.2344,
"step": 4350
},
{
"epoch": 0.36,
"learning_rate": 5.930526315789475e-06,
"loss": 0.2361,
"step": 4375
},
{
"epoch": 0.36,
"learning_rate": 5.90421052631579e-06,
"loss": 0.2513,
"step": 4400
},
{
"epoch": 0.36,
"learning_rate": 5.877894736842105e-06,
"loss": 0.2383,
"step": 4425
},
{
"epoch": 0.37,
"learning_rate": 5.851578947368421e-06,
"loss": 0.2277,
"step": 4450
},
{
"epoch": 0.37,
"learning_rate": 5.825263157894737e-06,
"loss": 0.2483,
"step": 4475
},
{
"epoch": 0.37,
"learning_rate": 5.798947368421053e-06,
"loss": 0.2257,
"step": 4500
},
{
"epoch": 0.37,
"learning_rate": 5.772631578947369e-06,
"loss": 0.2494,
"step": 4525
},
{
"epoch": 0.37,
"learning_rate": 5.7463157894736845e-06,
"loss": 0.2394,
"step": 4550
},
{
"epoch": 0.38,
"learning_rate": 5.72e-06,
"loss": 0.2278,
"step": 4575
},
{
"epoch": 0.38,
"learning_rate": 5.693684210526316e-06,
"loss": 0.244,
"step": 4600
},
{
"epoch": 0.38,
"learning_rate": 5.667368421052633e-06,
"loss": 0.2342,
"step": 4625
},
{
"epoch": 0.38,
"learning_rate": 5.641052631578949e-06,
"loss": 0.2382,
"step": 4650
},
{
"epoch": 0.38,
"learning_rate": 5.6147368421052636e-06,
"loss": 0.2195,
"step": 4675
},
{
"epoch": 0.39,
"learning_rate": 5.588421052631579e-06,
"loss": 0.2508,
"step": 4700
},
{
"epoch": 0.39,
"learning_rate": 5.562105263157895e-06,
"loss": 0.2454,
"step": 4725
},
{
"epoch": 0.39,
"learning_rate": 5.535789473684211e-06,
"loss": 0.2348,
"step": 4750
},
{
"epoch": 0.39,
"learning_rate": 5.509473684210527e-06,
"loss": 0.2245,
"step": 4775
},
{
"epoch": 0.4,
"learning_rate": 5.483157894736843e-06,
"loss": 0.2439,
"step": 4800
},
{
"epoch": 0.4,
"learning_rate": 5.4568421052631585e-06,
"loss": 0.2179,
"step": 4825
},
{
"epoch": 0.4,
"learning_rate": 5.4305263157894734e-06,
"loss": 0.2581,
"step": 4850
},
{
"epoch": 0.4,
"learning_rate": 5.404210526315789e-06,
"loss": 0.2344,
"step": 4875
},
{
"epoch": 0.4,
"learning_rate": 5.377894736842105e-06,
"loss": 0.2453,
"step": 4900
},
{
"epoch": 0.41,
"learning_rate": 5.351578947368421e-06,
"loss": 0.2331,
"step": 4925
},
{
"epoch": 0.41,
"learning_rate": 5.3252631578947376e-06,
"loss": 0.2507,
"step": 4950
},
{
"epoch": 0.41,
"learning_rate": 5.298947368421053e-06,
"loss": 0.2219,
"step": 4975
},
{
"epoch": 0.41,
"learning_rate": 5.272631578947369e-06,
"loss": 0.2467,
"step": 5000
},
{
"epoch": 0.41,
"learning_rate": 5.248421052631579e-06,
"loss": 0.2757,
"step": 5025
},
{
"epoch": 0.42,
"learning_rate": 5.222105263157895e-06,
"loss": 0.229,
"step": 5050
},
{
"epoch": 0.42,
"learning_rate": 5.195789473684211e-06,
"loss": 0.2263,
"step": 5075
},
{
"epoch": 0.42,
"learning_rate": 5.169473684210527e-06,
"loss": 0.2482,
"step": 5100
},
{
"epoch": 0.42,
"learning_rate": 5.1431578947368425e-06,
"loss": 0.2807,
"step": 5125
},
{
"epoch": 0.42,
"learning_rate": 5.1168421052631575e-06,
"loss": 0.2472,
"step": 5150
},
{
"epoch": 0.43,
"learning_rate": 5.090526315789475e-06,
"loss": 0.2269,
"step": 5175
},
{
"epoch": 0.43,
"learning_rate": 5.06421052631579e-06,
"loss": 0.2438,
"step": 5200
},
{
"epoch": 0.43,
"learning_rate": 5.037894736842106e-06,
"loss": 0.2519,
"step": 5225
},
{
"epoch": 0.43,
"learning_rate": 5.011578947368422e-06,
"loss": 0.2469,
"step": 5250
},
{
"epoch": 0.43,
"learning_rate": 4.9852631578947374e-06,
"loss": 0.2423,
"step": 5275
},
{
"epoch": 0.44,
"learning_rate": 4.958947368421053e-06,
"loss": 0.2347,
"step": 5300
},
{
"epoch": 0.44,
"learning_rate": 4.932631578947369e-06,
"loss": 0.2456,
"step": 5325
},
{
"epoch": 0.44,
"learning_rate": 4.906315789473685e-06,
"loss": 0.2295,
"step": 5350
},
{
"epoch": 0.44,
"learning_rate": 4.880000000000001e-06,
"loss": 0.2164,
"step": 5375
},
{
"epoch": 0.44,
"learning_rate": 4.853684210526316e-06,
"loss": 0.2375,
"step": 5400
},
{
"epoch": 0.45,
"learning_rate": 4.8273684210526315e-06,
"loss": 0.2364,
"step": 5425
},
{
"epoch": 0.45,
"learning_rate": 4.801052631578948e-06,
"loss": 0.2226,
"step": 5450
},
{
"epoch": 0.45,
"learning_rate": 4.774736842105264e-06,
"loss": 0.2427,
"step": 5475
},
{
"epoch": 0.45,
"learning_rate": 4.748421052631579e-06,
"loss": 0.2342,
"step": 5500
},
{
"epoch": 0.45,
"learning_rate": 4.722105263157895e-06,
"loss": 0.2473,
"step": 5525
},
{
"epoch": 0.46,
"learning_rate": 4.695789473684211e-06,
"loss": 0.2298,
"step": 5550
},
{
"epoch": 0.46,
"learning_rate": 4.669473684210526e-06,
"loss": 0.2096,
"step": 5575
},
{
"epoch": 0.46,
"learning_rate": 4.643157894736843e-06,
"loss": 0.2454,
"step": 5600
},
{
"epoch": 0.46,
"learning_rate": 4.616842105263158e-06,
"loss": 0.2616,
"step": 5625
},
{
"epoch": 0.46,
"learning_rate": 4.590526315789474e-06,
"loss": 0.2317,
"step": 5650
},
{
"epoch": 0.47,
"learning_rate": 4.56421052631579e-06,
"loss": 0.2464,
"step": 5675
},
{
"epoch": 0.47,
"learning_rate": 4.5378947368421055e-06,
"loss": 0.2347,
"step": 5700
},
{
"epoch": 0.47,
"learning_rate": 4.511578947368421e-06,
"loss": 0.212,
"step": 5725
},
{
"epoch": 0.47,
"learning_rate": 4.485263157894737e-06,
"loss": 0.2099,
"step": 5750
},
{
"epoch": 0.48,
"learning_rate": 4.458947368421053e-06,
"loss": 0.2454,
"step": 5775
},
{
"epoch": 0.48,
"learning_rate": 4.432631578947369e-06,
"loss": 0.2292,
"step": 5800
},
{
"epoch": 0.48,
"learning_rate": 4.406315789473685e-06,
"loss": 0.2209,
"step": 5825
},
{
"epoch": 0.48,
"learning_rate": 4.38e-06,
"loss": 0.2332,
"step": 5850
},
{
"epoch": 0.48,
"learning_rate": 4.353684210526316e-06,
"loss": 0.2123,
"step": 5875
},
{
"epoch": 0.49,
"learning_rate": 4.327368421052632e-06,
"loss": 0.2417,
"step": 5900
},
{
"epoch": 0.49,
"learning_rate": 4.301052631578948e-06,
"loss": 0.2335,
"step": 5925
},
{
"epoch": 0.49,
"learning_rate": 4.274736842105264e-06,
"loss": 0.2406,
"step": 5950
},
{
"epoch": 0.49,
"learning_rate": 4.2484210526315795e-06,
"loss": 0.2321,
"step": 5975
},
{
"epoch": 0.49,
"learning_rate": 4.222105263157895e-06,
"loss": 0.22,
"step": 6000
},
{
"epoch": 0.5,
"learning_rate": 4.197894736842106e-06,
"loss": 0.2177,
"step": 6025
},
{
"epoch": 0.5,
"learning_rate": 4.171578947368421e-06,
"loss": 0.2122,
"step": 6050
},
{
"epoch": 0.5,
"learning_rate": 4.145263157894737e-06,
"loss": 0.2117,
"step": 6075
},
{
"epoch": 0.5,
"learning_rate": 4.118947368421053e-06,
"loss": 0.2166,
"step": 6100
},
{
"epoch": 0.5,
"learning_rate": 4.092631578947369e-06,
"loss": 0.2198,
"step": 6125
},
{
"epoch": 0.51,
"learning_rate": 4.0663157894736845e-06,
"loss": 0.2439,
"step": 6150
},
{
"epoch": 0.51,
"learning_rate": 4.04e-06,
"loss": 0.2462,
"step": 6175
},
{
"epoch": 0.51,
"learning_rate": 4.013684210526316e-06,
"loss": 0.2291,
"step": 6200
},
{
"epoch": 0.51,
"learning_rate": 3.987368421052632e-06,
"loss": 0.2311,
"step": 6225
},
{
"epoch": 0.51,
"learning_rate": 3.961052631578948e-06,
"loss": 0.2313,
"step": 6250
},
{
"epoch": 0.52,
"learning_rate": 3.9347368421052636e-06,
"loss": 0.2286,
"step": 6275
},
{
"epoch": 0.52,
"learning_rate": 3.908421052631579e-06,
"loss": 0.2425,
"step": 6300
},
{
"epoch": 0.52,
"learning_rate": 3.882105263157895e-06,
"loss": 0.2236,
"step": 6325
},
{
"epoch": 0.52,
"learning_rate": 3.855789473684211e-06,
"loss": 0.2064,
"step": 6350
},
{
"epoch": 0.52,
"learning_rate": 3.829473684210527e-06,
"loss": 0.2095,
"step": 6375
},
{
"epoch": 0.53,
"learning_rate": 3.8031578947368426e-06,
"loss": 0.2235,
"step": 6400
},
{
"epoch": 0.53,
"learning_rate": 3.776842105263158e-06,
"loss": 0.2176,
"step": 6425
},
{
"epoch": 0.53,
"learning_rate": 3.750526315789474e-06,
"loss": 0.2062,
"step": 6450
},
{
"epoch": 0.53,
"learning_rate": 3.7242105263157897e-06,
"loss": 0.2173,
"step": 6475
},
{
"epoch": 0.53,
"learning_rate": 3.6978947368421055e-06,
"loss": 0.201,
"step": 6500
},
{
"epoch": 0.54,
"learning_rate": 3.6715789473684217e-06,
"loss": 0.1935,
"step": 6525
},
{
"epoch": 0.54,
"learning_rate": 3.645263157894737e-06,
"loss": 0.2367,
"step": 6550
},
{
"epoch": 0.54,
"learning_rate": 3.618947368421053e-06,
"loss": 0.202,
"step": 6575
},
{
"epoch": 0.54,
"learning_rate": 3.5926315789473688e-06,
"loss": 0.2272,
"step": 6600
},
{
"epoch": 0.55,
"learning_rate": 3.5663157894736846e-06,
"loss": 0.1942,
"step": 6625
},
{
"epoch": 0.55,
"learning_rate": 3.54e-06,
"loss": 0.2303,
"step": 6650
},
{
"epoch": 0.55,
"learning_rate": 3.513684210526316e-06,
"loss": 0.2076,
"step": 6675
},
{
"epoch": 0.55,
"learning_rate": 3.487368421052632e-06,
"loss": 0.212,
"step": 6700
},
{
"epoch": 0.55,
"learning_rate": 3.461052631578948e-06,
"loss": 0.225,
"step": 6725
},
{
"epoch": 0.56,
"learning_rate": 3.4347368421052637e-06,
"loss": 0.2079,
"step": 6750
},
{
"epoch": 0.56,
"learning_rate": 3.408421052631579e-06,
"loss": 0.1967,
"step": 6775
},
{
"epoch": 0.56,
"learning_rate": 3.382105263157895e-06,
"loss": 0.1922,
"step": 6800
},
{
"epoch": 0.56,
"learning_rate": 3.3557894736842107e-06,
"loss": 0.2433,
"step": 6825
},
{
"epoch": 0.56,
"learning_rate": 3.329473684210527e-06,
"loss": 0.2058,
"step": 6850
},
{
"epoch": 0.57,
"learning_rate": 3.3031578947368424e-06,
"loss": 0.2173,
"step": 6875
},
{
"epoch": 0.57,
"learning_rate": 3.276842105263158e-06,
"loss": 0.1963,
"step": 6900
},
{
"epoch": 0.57,
"learning_rate": 3.250526315789474e-06,
"loss": 0.2353,
"step": 6925
},
{
"epoch": 0.57,
"learning_rate": 3.22421052631579e-06,
"loss": 0.2037,
"step": 6950
},
{
"epoch": 0.57,
"learning_rate": 3.197894736842105e-06,
"loss": 0.21,
"step": 6975
},
{
"epoch": 0.58,
"learning_rate": 3.171578947368421e-06,
"loss": 0.2268,
"step": 7000
},
{
"epoch": 0.58,
"learning_rate": 3.147368421052632e-06,
"loss": 0.2142,
"step": 7025
},
{
"epoch": 0.58,
"learning_rate": 3.1210526315789473e-06,
"loss": 0.2315,
"step": 7050
},
{
"epoch": 0.58,
"learning_rate": 3.094736842105263e-06,
"loss": 0.2164,
"step": 7075
},
{
"epoch": 0.58,
"learning_rate": 3.0684210526315794e-06,
"loss": 0.2052,
"step": 7100
},
{
"epoch": 0.59,
"learning_rate": 3.042105263157895e-06,
"loss": 0.2121,
"step": 7125
},
{
"epoch": 0.59,
"learning_rate": 3.015789473684211e-06,
"loss": 0.2037,
"step": 7150
},
{
"epoch": 0.59,
"learning_rate": 2.9894736842105264e-06,
"loss": 0.2217,
"step": 7175
},
{
"epoch": 0.59,
"learning_rate": 2.9631578947368422e-06,
"loss": 0.2051,
"step": 7200
},
{
"epoch": 0.59,
"learning_rate": 2.936842105263158e-06,
"loss": 0.2208,
"step": 7225
},
{
"epoch": 0.6,
"learning_rate": 2.9105263157894743e-06,
"loss": 0.2253,
"step": 7250
},
{
"epoch": 0.6,
"learning_rate": 2.88421052631579e-06,
"loss": 0.2211,
"step": 7275
},
{
"epoch": 0.6,
"learning_rate": 2.8578947368421055e-06,
"loss": 0.2129,
"step": 7300
},
{
"epoch": 0.6,
"learning_rate": 2.8315789473684213e-06,
"loss": 0.2176,
"step": 7325
},
{
"epoch": 0.6,
"learning_rate": 2.805263157894737e-06,
"loss": 0.1965,
"step": 7350
},
{
"epoch": 0.61,
"learning_rate": 2.7789473684210525e-06,
"loss": 0.192,
"step": 7375
},
{
"epoch": 0.61,
"learning_rate": 2.7526315789473683e-06,
"loss": 0.2126,
"step": 7400
},
{
"epoch": 0.61,
"learning_rate": 2.7263157894736846e-06,
"loss": 0.2365,
"step": 7425
},
{
"epoch": 0.61,
"learning_rate": 2.7000000000000004e-06,
"loss": 0.2306,
"step": 7450
},
{
"epoch": 0.62,
"learning_rate": 2.6736842105263162e-06,
"loss": 0.2193,
"step": 7475
},
{
"epoch": 0.62,
"learning_rate": 2.6473684210526316e-06,
"loss": 0.2388,
"step": 7500
},
{
"epoch": 0.62,
"learning_rate": 2.6210526315789474e-06,
"loss": 0.222,
"step": 7525
},
{
"epoch": 0.62,
"learning_rate": 2.5947368421052633e-06,
"loss": 0.2027,
"step": 7550
},
{
"epoch": 0.62,
"learning_rate": 2.568421052631579e-06,
"loss": 0.2414,
"step": 7575
},
{
"epoch": 0.63,
"learning_rate": 2.5421052631578953e-06,
"loss": 0.2122,
"step": 7600
},
{
"epoch": 0.63,
"learning_rate": 2.5157894736842107e-06,
"loss": 0.1938,
"step": 7625
},
{
"epoch": 0.63,
"learning_rate": 2.4894736842105265e-06,
"loss": 0.2222,
"step": 7650
},
{
"epoch": 0.63,
"learning_rate": 2.4631578947368424e-06,
"loss": 0.21,
"step": 7675
},
{
"epoch": 0.63,
"learning_rate": 2.436842105263158e-06,
"loss": 0.1995,
"step": 7700
},
{
"epoch": 0.64,
"learning_rate": 2.410526315789474e-06,
"loss": 0.1985,
"step": 7725
},
{
"epoch": 0.64,
"learning_rate": 2.38421052631579e-06,
"loss": 0.215,
"step": 7750
},
{
"epoch": 0.64,
"learning_rate": 2.357894736842105e-06,
"loss": 0.2197,
"step": 7775
},
{
"epoch": 0.64,
"learning_rate": 2.331578947368421e-06,
"loss": 0.209,
"step": 7800
},
{
"epoch": 0.64,
"learning_rate": 2.3052631578947373e-06,
"loss": 0.2226,
"step": 7825
},
{
"epoch": 0.65,
"learning_rate": 2.2789473684210527e-06,
"loss": 0.1847,
"step": 7850
},
{
"epoch": 0.65,
"learning_rate": 2.2526315789473685e-06,
"loss": 0.2253,
"step": 7875
},
{
"epoch": 0.65,
"learning_rate": 2.2263157894736843e-06,
"loss": 0.2128,
"step": 7900
},
{
"epoch": 0.65,
"learning_rate": 2.2e-06,
"loss": 0.2139,
"step": 7925
},
{
"epoch": 0.65,
"learning_rate": 2.173684210526316e-06,
"loss": 0.2059,
"step": 7950
},
{
"epoch": 0.66,
"learning_rate": 2.1473684210526317e-06,
"loss": 0.2081,
"step": 7975
},
{
"epoch": 0.66,
"learning_rate": 2.1210526315789476e-06,
"loss": 0.1793,
"step": 8000
},
{
"epoch": 0.66,
"learning_rate": 2.096842105263158e-06,
"loss": 0.203,
"step": 8025
},
{
"epoch": 0.66,
"learning_rate": 2.070526315789474e-06,
"loss": 0.2157,
"step": 8050
},
{
"epoch": 0.66,
"learning_rate": 2.0442105263157897e-06,
"loss": 0.2297,
"step": 8075
},
{
"epoch": 0.67,
"learning_rate": 2.0178947368421055e-06,
"loss": 0.223,
"step": 8100
},
{
"epoch": 0.67,
"learning_rate": 1.9915789473684213e-06,
"loss": 0.2374,
"step": 8125
},
{
"epoch": 0.67,
"learning_rate": 1.965263157894737e-06,
"loss": 0.2039,
"step": 8150
},
{
"epoch": 0.67,
"learning_rate": 1.938947368421053e-06,
"loss": 0.2209,
"step": 8175
},
{
"epoch": 0.67,
"learning_rate": 1.9126315789473683e-06,
"loss": 0.2024,
"step": 8200
},
{
"epoch": 0.68,
"learning_rate": 1.8863157894736844e-06,
"loss": 0.2127,
"step": 8225
},
{
"epoch": 0.68,
"learning_rate": 1.8600000000000002e-06,
"loss": 0.2103,
"step": 8250
},
{
"epoch": 0.68,
"learning_rate": 1.8336842105263158e-06,
"loss": 0.1989,
"step": 8275
},
{
"epoch": 0.68,
"learning_rate": 1.8073684210526318e-06,
"loss": 0.2192,
"step": 8300
},
{
"epoch": 0.69,
"learning_rate": 1.7810526315789474e-06,
"loss": 0.2317,
"step": 8325
},
{
"epoch": 0.69,
"learning_rate": 1.7547368421052633e-06,
"loss": 0.2245,
"step": 8350
},
{
"epoch": 0.69,
"learning_rate": 1.7284210526315793e-06,
"loss": 0.2216,
"step": 8375
},
{
"epoch": 0.69,
"learning_rate": 1.7021052631578949e-06,
"loss": 0.2392,
"step": 8400
},
{
"epoch": 0.69,
"learning_rate": 1.6757894736842107e-06,
"loss": 0.2088,
"step": 8425
},
{
"epoch": 0.7,
"learning_rate": 1.6494736842105263e-06,
"loss": 0.222,
"step": 8450
},
{
"epoch": 0.7,
"learning_rate": 1.6231578947368423e-06,
"loss": 0.207,
"step": 8475
},
{
"epoch": 0.7,
"learning_rate": 1.596842105263158e-06,
"loss": 0.203,
"step": 8500
},
{
"epoch": 0.7,
"learning_rate": 1.5705263157894738e-06,
"loss": 0.218,
"step": 8525
},
{
"epoch": 0.7,
"learning_rate": 1.5442105263157898e-06,
"loss": 0.1949,
"step": 8550
},
{
"epoch": 0.71,
"learning_rate": 1.5178947368421054e-06,
"loss": 0.2015,
"step": 8575
},
{
"epoch": 0.71,
"learning_rate": 1.4915789473684212e-06,
"loss": 0.1839,
"step": 8600
},
{
"epoch": 0.71,
"learning_rate": 1.4652631578947368e-06,
"loss": 0.2114,
"step": 8625
},
{
"epoch": 0.71,
"learning_rate": 1.4389473684210529e-06,
"loss": 0.2247,
"step": 8650
},
{
"epoch": 0.71,
"learning_rate": 1.4126315789473685e-06,
"loss": 0.2124,
"step": 8675
},
{
"epoch": 0.72,
"learning_rate": 1.3863157894736843e-06,
"loss": 0.2116,
"step": 8700
},
{
"epoch": 0.72,
"learning_rate": 1.3600000000000001e-06,
"loss": 0.2212,
"step": 8725
},
{
"epoch": 0.72,
"learning_rate": 1.333684210526316e-06,
"loss": 0.2214,
"step": 8750
},
{
"epoch": 0.72,
"learning_rate": 1.3073684210526315e-06,
"loss": 0.1966,
"step": 8775
},
{
"epoch": 0.72,
"learning_rate": 1.2810526315789476e-06,
"loss": 0.205,
"step": 8800
},
{
"epoch": 0.73,
"learning_rate": 1.2547368421052634e-06,
"loss": 0.2015,
"step": 8825
},
{
"epoch": 0.73,
"learning_rate": 1.228421052631579e-06,
"loss": 0.2347,
"step": 8850
},
{
"epoch": 0.73,
"learning_rate": 1.2021052631578948e-06,
"loss": 0.21,
"step": 8875
},
{
"epoch": 0.73,
"learning_rate": 1.1757894736842106e-06,
"loss": 0.1891,
"step": 8900
},
{
"epoch": 0.73,
"learning_rate": 1.1494736842105264e-06,
"loss": 0.2117,
"step": 8925
},
{
"epoch": 0.74,
"learning_rate": 1.1231578947368423e-06,
"loss": 0.2053,
"step": 8950
},
{
"epoch": 0.74,
"learning_rate": 1.0968421052631579e-06,
"loss": 0.1985,
"step": 8975
},
{
"epoch": 0.74,
"learning_rate": 1.070526315789474e-06,
"loss": 0.2126,
"step": 9000
},
{
"epoch": 0.74,
"learning_rate": 1.0463157894736844e-06,
"loss": 0.2417,
"step": 9025
},
{
"epoch": 0.74,
"learning_rate": 1.02e-06,
"loss": 0.1908,
"step": 9050
},
{
"epoch": 0.75,
"learning_rate": 9.936842105263158e-07,
"loss": 0.2168,
"step": 9075
},
{
"epoch": 0.75,
"learning_rate": 9.673684210526316e-07,
"loss": 0.1841,
"step": 9100
},
{
"epoch": 0.75,
"learning_rate": 9.410526315789474e-07,
"loss": 0.2246,
"step": 9125
},
{
"epoch": 0.75,
"learning_rate": 9.147368421052632e-07,
"loss": 0.2058,
"step": 9150
},
{
"epoch": 0.76,
"learning_rate": 8.88421052631579e-07,
"loss": 0.1996,
"step": 9175
},
{
"epoch": 0.76,
"learning_rate": 8.621052631578948e-07,
"loss": 0.211,
"step": 9200
},
{
"epoch": 0.76,
"learning_rate": 8.357894736842106e-07,
"loss": 0.2069,
"step": 9225
},
{
"epoch": 0.76,
"learning_rate": 8.094736842105263e-07,
"loss": 0.2186,
"step": 9250
},
{
"epoch": 0.76,
"learning_rate": 7.831578947368422e-07,
"loss": 0.1951,
"step": 9275
},
{
"epoch": 0.77,
"learning_rate": 7.56842105263158e-07,
"loss": 0.2041,
"step": 9300
},
{
"epoch": 0.77,
"learning_rate": 7.305263157894738e-07,
"loss": 0.2146,
"step": 9325
},
{
"epoch": 0.77,
"learning_rate": 7.042105263157896e-07,
"loss": 0.204,
"step": 9350
},
{
"epoch": 0.77,
"learning_rate": 6.778947368421053e-07,
"loss": 0.2424,
"step": 9375
},
{
"epoch": 0.77,
"learning_rate": 6.515789473684211e-07,
"loss": 0.2188,
"step": 9400
},
{
"epoch": 0.78,
"learning_rate": 6.252631578947368e-07,
"loss": 0.222,
"step": 9425
},
{
"epoch": 0.78,
"learning_rate": 5.989473684210526e-07,
"loss": 0.2161,
"step": 9450
},
{
"epoch": 0.78,
"learning_rate": 5.726315789473685e-07,
"loss": 0.219,
"step": 9475
},
{
"epoch": 0.78,
"learning_rate": 5.463157894736843e-07,
"loss": 0.1896,
"step": 9500
},
{
"epoch": 0.78,
"learning_rate": 5.2e-07,
"loss": 0.1996,
"step": 9525
},
{
"epoch": 0.79,
"learning_rate": 4.936842105263158e-07,
"loss": 0.2167,
"step": 9550
},
{
"epoch": 0.79,
"learning_rate": 4.6736842105263163e-07,
"loss": 0.1939,
"step": 9575
},
{
"epoch": 0.79,
"learning_rate": 4.410526315789474e-07,
"loss": 0.2089,
"step": 9600
},
{
"epoch": 0.79,
"learning_rate": 4.1473684210526317e-07,
"loss": 0.1889,
"step": 9625
},
{
"epoch": 0.79,
"learning_rate": 3.88421052631579e-07,
"loss": 0.2005,
"step": 9650
},
{
"epoch": 0.8,
"learning_rate": 3.6210526315789475e-07,
"loss": 0.2141,
"step": 9675
},
{
"epoch": 0.8,
"learning_rate": 3.3578947368421057e-07,
"loss": 0.2137,
"step": 9700
},
{
"epoch": 0.8,
"learning_rate": 3.0947368421052633e-07,
"loss": 0.2245,
"step": 9725
},
{
"epoch": 0.8,
"learning_rate": 2.8315789473684215e-07,
"loss": 0.2018,
"step": 9750
},
{
"epoch": 0.8,
"learning_rate": 2.568421052631579e-07,
"loss": 0.2241,
"step": 9775
},
{
"epoch": 0.81,
"learning_rate": 2.305263157894737e-07,
"loss": 0.216,
"step": 9800
},
{
"epoch": 0.81,
"learning_rate": 2.042105263157895e-07,
"loss": 0.2269,
"step": 9825
},
{
"epoch": 0.81,
"learning_rate": 1.7789473684210527e-07,
"loss": 0.209,
"step": 9850
},
{
"epoch": 0.81,
"learning_rate": 1.5157894736842106e-07,
"loss": 0.1998,
"step": 9875
},
{
"epoch": 0.81,
"learning_rate": 1.2526315789473685e-07,
"loss": 0.2129,
"step": 9900
},
{
"epoch": 0.82,
"learning_rate": 9.894736842105264e-08,
"loss": 0.1995,
"step": 9925
},
{
"epoch": 0.82,
"learning_rate": 7.263157894736842e-08,
"loss": 0.1983,
"step": 9950
},
{
"epoch": 0.82,
"learning_rate": 4.631578947368422e-08,
"loss": 0.2042,
"step": 9975
},
{
"epoch": 0.82,
"learning_rate": 2e-08,
"loss": 0.2059,
"step": 10000
},
{
"epoch": 0.82,
"step": 10000,
"total_flos": 4.617366439093862e+19,
"train_loss": 0.26186515502929686,
"train_runtime": 11850.3169,
"train_samples_per_second": 13.502,
"train_steps_per_second": 0.844
}
],
"logging_steps": 25,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 2000,
"total_flos": 4.617366439093862e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}