|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.8229775327133569, |
|
"eval_steps": 500, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.898977360288234e-06, |
|
"loss": 0.7672, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.160712527409633e-06, |
|
"loss": 0.6426, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.85912902234906e-06, |
|
"loss": 0.6038, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.344547104469332e-06, |
|
"loss": 0.6007, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.716963756434345e-06, |
|
"loss": 0.5597, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.019180844200955e-06, |
|
"loss": 0.5361, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.27351214279797e-06, |
|
"loss": 0.52, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.49307723936858e-06, |
|
"loss": 0.5044, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.686247975778677e-06, |
|
"loss": 0.5196, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.858694625217149e-06, |
|
"loss": 0.4631, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.014436199608479e-06, |
|
"loss": 0.4645, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.156425255148058e-06, |
|
"loss": 0.4442, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.28689473531776e-06, |
|
"loss": 0.4742, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.407574351377137e-06, |
|
"loss": 0.4525, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.519831289296397e-06, |
|
"loss": 0.4513, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.624764935335318e-06, |
|
"loss": 0.4655, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.723272550712454e-06, |
|
"loss": 0.4471, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.816095971633122e-06, |
|
"loss": 0.4505, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.90385555539545e-06, |
|
"loss": 0.4212, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.987075336738768e-06, |
|
"loss": 0.3854, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.978947368421053e-06, |
|
"loss": 0.4108, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.95263157894737e-06, |
|
"loss": 0.4051, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.926315789473685e-06, |
|
"loss": 0.4071, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.9e-06, |
|
"loss": 0.3635, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.873684210526317e-06, |
|
"loss": 0.4016, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.847368421052632e-06, |
|
"loss": 0.3839, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.821052631578948e-06, |
|
"loss": 0.3805, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.794736842105263e-06, |
|
"loss": 0.3978, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.76842105263158e-06, |
|
"loss": 0.3772, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.742105263157897e-06, |
|
"loss": 0.3692, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.715789473684212e-06, |
|
"loss": 0.4012, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.689473684210527e-06, |
|
"loss": 0.3688, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.663157894736843e-06, |
|
"loss": 0.339, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.636842105263158e-06, |
|
"loss": 0.3346, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.610526315789475e-06, |
|
"loss": 0.3649, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.58421052631579e-06, |
|
"loss": 0.3608, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.557894736842107e-06, |
|
"loss": 0.3659, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.531578947368422e-06, |
|
"loss": 0.3482, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.505263157894738e-06, |
|
"loss": 0.3649, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.478947368421053e-06, |
|
"loss": 0.3701, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.452631578947368e-06, |
|
"loss": 0.3517, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.426315789473685e-06, |
|
"loss": 0.356, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.3206, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.373684210526316e-06, |
|
"loss": 0.3179, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.347368421052633e-06, |
|
"loss": 0.315, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.321052631578948e-06, |
|
"loss": 0.3161, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.294736842105265e-06, |
|
"loss": 0.3327, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.26842105263158e-06, |
|
"loss": 0.3365, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.242105263157896e-06, |
|
"loss": 0.3199, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.215789473684211e-06, |
|
"loss": 0.335, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.189473684210526e-06, |
|
"loss": 0.3206, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.163157894736843e-06, |
|
"loss": 0.3714, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.136842105263158e-06, |
|
"loss": 0.3191, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.110526315789475e-06, |
|
"loss": 0.338, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.08421052631579e-06, |
|
"loss": 0.3158, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.057894736842106e-06, |
|
"loss": 0.33, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.031578947368423e-06, |
|
"loss": 0.3031, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.005263157894738e-06, |
|
"loss": 0.3086, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.978947368421055e-06, |
|
"loss": 0.3125, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.95263157894737e-06, |
|
"loss": 0.3229, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.926315789473685e-06, |
|
"loss": 0.3319, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.900000000000001e-06, |
|
"loss": 0.3013, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.873684210526316e-06, |
|
"loss": 0.3065, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.847368421052633e-06, |
|
"loss": 0.3196, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.821052631578948e-06, |
|
"loss": 0.3177, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.794736842105264e-06, |
|
"loss": 0.3362, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.76842105263158e-06, |
|
"loss": 0.2915, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.742105263157894e-06, |
|
"loss": 0.2931, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.715789473684211e-06, |
|
"loss": 0.3116, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.689473684210526e-06, |
|
"loss": 0.301, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.663157894736843e-06, |
|
"loss": 0.304, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.63684210526316e-06, |
|
"loss": 0.2975, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.610526315789474e-06, |
|
"loss": 0.2888, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.584210526315791e-06, |
|
"loss": 0.2922, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.557894736842106e-06, |
|
"loss": 0.3138, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.531578947368423e-06, |
|
"loss": 0.2941, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.505263157894738e-06, |
|
"loss": 0.291, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.478947368421053e-06, |
|
"loss": 0.3047, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.45263157894737e-06, |
|
"loss": 0.3012, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.426315789473684e-06, |
|
"loss": 0.2927, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.2959, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.373684210526316e-06, |
|
"loss": 0.2956, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.347368421052633e-06, |
|
"loss": 0.3106, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.32105263157895e-06, |
|
"loss": 0.2887, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.294736842105264e-06, |
|
"loss": 0.2964, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.26842105263158e-06, |
|
"loss": 0.2827, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.242105263157896e-06, |
|
"loss": 0.2966, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.21578947368421e-06, |
|
"loss": 0.3052, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.189473684210527e-06, |
|
"loss": 0.2726, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.163157894736842e-06, |
|
"loss": 0.2703, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.136842105263159e-06, |
|
"loss": 0.2868, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.110526315789474e-06, |
|
"loss": 0.2641, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.08421052631579e-06, |
|
"loss": 0.2668, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.057894736842106e-06, |
|
"loss": 0.2481, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.03157894736842e-06, |
|
"loss": 0.26, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.005263157894737e-06, |
|
"loss": 0.3047, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.978947368421052e-06, |
|
"loss": 0.2758, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.952631578947369e-06, |
|
"loss": 0.2962, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.926315789473686e-06, |
|
"loss": 0.2682, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.9e-06, |
|
"loss": 0.2833, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.873684210526317e-06, |
|
"loss": 0.2741, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.847368421052632e-06, |
|
"loss": 0.3021, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.821052631578949e-06, |
|
"loss": 0.2806, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.794736842105264e-06, |
|
"loss": 0.2592, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.768421052631579e-06, |
|
"loss": 0.2953, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.742105263157896e-06, |
|
"loss": 0.2778, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.71578947368421e-06, |
|
"loss": 0.2854, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.689473684210527e-06, |
|
"loss": 0.2699, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.663157894736842e-06, |
|
"loss": 0.2754, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.636842105263159e-06, |
|
"loss": 0.2363, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.610526315789474e-06, |
|
"loss": 0.2489, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.58421052631579e-06, |
|
"loss": 0.2553, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.557894736842106e-06, |
|
"loss": 0.2782, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.531578947368422e-06, |
|
"loss": 0.2552, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.505263157894738e-06, |
|
"loss": 0.2726, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.478947368421054e-06, |
|
"loss": 0.2672, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.4526315789473695e-06, |
|
"loss": 0.2604, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.4263157894736845e-06, |
|
"loss": 0.2552, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.4e-06, |
|
"loss": 0.2747, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.373684210526316e-06, |
|
"loss": 0.2645, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.349473684210527e-06, |
|
"loss": 0.2602, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.323157894736843e-06, |
|
"loss": 0.2799, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.296842105263159e-06, |
|
"loss": 0.2854, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.2705263157894745e-06, |
|
"loss": 0.263, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.24421052631579e-06, |
|
"loss": 0.2339, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.217894736842106e-06, |
|
"loss": 0.2597, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.191578947368422e-06, |
|
"loss": 0.2518, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.165263157894738e-06, |
|
"loss": 0.2513, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.138947368421053e-06, |
|
"loss": 0.2841, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.1126315789473685e-06, |
|
"loss": 0.2603, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.086315789473684e-06, |
|
"loss": 0.2507, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.06e-06, |
|
"loss": 0.2664, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.033684210526316e-06, |
|
"loss": 0.2569, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.007368421052632e-06, |
|
"loss": 0.2557, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 6.9810526315789485e-06, |
|
"loss": 0.2793, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 6.954736842105264e-06, |
|
"loss": 0.2613, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 6.92842105263158e-06, |
|
"loss": 0.2445, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 6.902105263157896e-06, |
|
"loss": 0.246, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 6.875789473684211e-06, |
|
"loss": 0.2433, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 6.849473684210527e-06, |
|
"loss": 0.2414, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 6.8231578947368425e-06, |
|
"loss": 0.2517, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 6.796842105263158e-06, |
|
"loss": 0.2485, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 6.770526315789474e-06, |
|
"loss": 0.2435, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.74421052631579e-06, |
|
"loss": 0.2639, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.717894736842106e-06, |
|
"loss": 0.2576, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.691578947368421e-06, |
|
"loss": 0.2748, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.665263157894737e-06, |
|
"loss": 0.2598, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.638947368421054e-06, |
|
"loss": 0.2483, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.612631578947369e-06, |
|
"loss": 0.2594, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.586315789473685e-06, |
|
"loss": 0.2531, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.560000000000001e-06, |
|
"loss": 0.2561, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.5336842105263165e-06, |
|
"loss": 0.2556, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.507368421052632e-06, |
|
"loss": 0.2339, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.481052631578948e-06, |
|
"loss": 0.2439, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.454736842105264e-06, |
|
"loss": 0.2423, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.428421052631579e-06, |
|
"loss": 0.2478, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.402105263157895e-06, |
|
"loss": 0.2521, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.375789473684211e-06, |
|
"loss": 0.253, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.349473684210526e-06, |
|
"loss": 0.2156, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.323157894736842e-06, |
|
"loss": 0.2237, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.298947368421053e-06, |
|
"loss": 0.2585, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.272631578947369e-06, |
|
"loss": 0.234, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.246315789473685e-06, |
|
"loss": 0.2213, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.220000000000001e-06, |
|
"loss": 0.2575, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.193684210526316e-06, |
|
"loss": 0.2551, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.167368421052632e-06, |
|
"loss": 0.2657, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.141052631578947e-06, |
|
"loss": 0.2549, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.114736842105263e-06, |
|
"loss": 0.2401, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.088421052631579e-06, |
|
"loss": 0.2324, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.062105263157895e-06, |
|
"loss": 0.2244, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.035789473684211e-06, |
|
"loss": 0.261, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.009473684210527e-06, |
|
"loss": 0.2734, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.983157894736843e-06, |
|
"loss": 0.234, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.956842105263159e-06, |
|
"loss": 0.2344, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.930526315789475e-06, |
|
"loss": 0.2361, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.90421052631579e-06, |
|
"loss": 0.2513, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.877894736842105e-06, |
|
"loss": 0.2383, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5.851578947368421e-06, |
|
"loss": 0.2277, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5.825263157894737e-06, |
|
"loss": 0.2483, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5.798947368421053e-06, |
|
"loss": 0.2257, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5.772631578947369e-06, |
|
"loss": 0.2494, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5.7463157894736845e-06, |
|
"loss": 0.2394, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.72e-06, |
|
"loss": 0.2278, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.693684210526316e-06, |
|
"loss": 0.244, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.667368421052633e-06, |
|
"loss": 0.2342, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.641052631578949e-06, |
|
"loss": 0.2382, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.6147368421052636e-06, |
|
"loss": 0.2195, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5.588421052631579e-06, |
|
"loss": 0.2508, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5.562105263157895e-06, |
|
"loss": 0.2454, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5.535789473684211e-06, |
|
"loss": 0.2348, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5.509473684210527e-06, |
|
"loss": 0.2245, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.483157894736843e-06, |
|
"loss": 0.2439, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.4568421052631585e-06, |
|
"loss": 0.2179, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.4305263157894734e-06, |
|
"loss": 0.2581, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.404210526315789e-06, |
|
"loss": 0.2344, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.377894736842105e-06, |
|
"loss": 0.2453, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.351578947368421e-06, |
|
"loss": 0.2331, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.3252631578947376e-06, |
|
"loss": 0.2507, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.298947368421053e-06, |
|
"loss": 0.2219, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.272631578947369e-06, |
|
"loss": 0.2467, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.248421052631579e-06, |
|
"loss": 0.2757, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.222105263157895e-06, |
|
"loss": 0.229, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.195789473684211e-06, |
|
"loss": 0.2263, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.169473684210527e-06, |
|
"loss": 0.2482, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.1431578947368425e-06, |
|
"loss": 0.2807, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.1168421052631575e-06, |
|
"loss": 0.2472, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.090526315789475e-06, |
|
"loss": 0.2269, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.06421052631579e-06, |
|
"loss": 0.2438, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.037894736842106e-06, |
|
"loss": 0.2519, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.011578947368422e-06, |
|
"loss": 0.2469, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.9852631578947374e-06, |
|
"loss": 0.2423, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.958947368421053e-06, |
|
"loss": 0.2347, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.932631578947369e-06, |
|
"loss": 0.2456, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.906315789473685e-06, |
|
"loss": 0.2295, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.880000000000001e-06, |
|
"loss": 0.2164, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.853684210526316e-06, |
|
"loss": 0.2375, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.8273684210526315e-06, |
|
"loss": 0.2364, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.801052631578948e-06, |
|
"loss": 0.2226, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.774736842105264e-06, |
|
"loss": 0.2427, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.748421052631579e-06, |
|
"loss": 0.2342, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.722105263157895e-06, |
|
"loss": 0.2473, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.695789473684211e-06, |
|
"loss": 0.2298, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.669473684210526e-06, |
|
"loss": 0.2096, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.643157894736843e-06, |
|
"loss": 0.2454, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.616842105263158e-06, |
|
"loss": 0.2616, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.590526315789474e-06, |
|
"loss": 0.2317, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.56421052631579e-06, |
|
"loss": 0.2464, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.5378947368421055e-06, |
|
"loss": 0.2347, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.511578947368421e-06, |
|
"loss": 0.212, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.485263157894737e-06, |
|
"loss": 0.2099, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.458947368421053e-06, |
|
"loss": 0.2454, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.432631578947369e-06, |
|
"loss": 0.2292, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.406315789473685e-06, |
|
"loss": 0.2209, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.38e-06, |
|
"loss": 0.2332, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.353684210526316e-06, |
|
"loss": 0.2123, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.327368421052632e-06, |
|
"loss": 0.2417, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.301052631578948e-06, |
|
"loss": 0.2335, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.274736842105264e-06, |
|
"loss": 0.2406, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.2484210526315795e-06, |
|
"loss": 0.2321, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.222105263157895e-06, |
|
"loss": 0.22, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.197894736842106e-06, |
|
"loss": 0.2177, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.171578947368421e-06, |
|
"loss": 0.2122, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.145263157894737e-06, |
|
"loss": 0.2117, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.118947368421053e-06, |
|
"loss": 0.2166, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.092631578947369e-06, |
|
"loss": 0.2198, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.0663157894736845e-06, |
|
"loss": 0.2439, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.04e-06, |
|
"loss": 0.2462, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.013684210526316e-06, |
|
"loss": 0.2291, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.987368421052632e-06, |
|
"loss": 0.2311, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.961052631578948e-06, |
|
"loss": 0.2313, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.9347368421052636e-06, |
|
"loss": 0.2286, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.908421052631579e-06, |
|
"loss": 0.2425, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.882105263157895e-06, |
|
"loss": 0.2236, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.855789473684211e-06, |
|
"loss": 0.2064, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.829473684210527e-06, |
|
"loss": 0.2095, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.8031578947368426e-06, |
|
"loss": 0.2235, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.776842105263158e-06, |
|
"loss": 0.2176, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.750526315789474e-06, |
|
"loss": 0.2062, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.7242105263157897e-06, |
|
"loss": 0.2173, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.6978947368421055e-06, |
|
"loss": 0.201, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.6715789473684217e-06, |
|
"loss": 0.1935, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.645263157894737e-06, |
|
"loss": 0.2367, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.618947368421053e-06, |
|
"loss": 0.202, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.5926315789473688e-06, |
|
"loss": 0.2272, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.5663157894736846e-06, |
|
"loss": 0.1942, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.54e-06, |
|
"loss": 0.2303, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.513684210526316e-06, |
|
"loss": 0.2076, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.487368421052632e-06, |
|
"loss": 0.212, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.461052631578948e-06, |
|
"loss": 0.225, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.4347368421052637e-06, |
|
"loss": 0.2079, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.408421052631579e-06, |
|
"loss": 0.1967, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.382105263157895e-06, |
|
"loss": 0.1922, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.3557894736842107e-06, |
|
"loss": 0.2433, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.329473684210527e-06, |
|
"loss": 0.2058, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.3031578947368424e-06, |
|
"loss": 0.2173, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.276842105263158e-06, |
|
"loss": 0.1963, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.250526315789474e-06, |
|
"loss": 0.2353, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.22421052631579e-06, |
|
"loss": 0.2037, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.197894736842105e-06, |
|
"loss": 0.21, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.171578947368421e-06, |
|
"loss": 0.2268, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.147368421052632e-06, |
|
"loss": 0.2142, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.1210526315789473e-06, |
|
"loss": 0.2315, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.094736842105263e-06, |
|
"loss": 0.2164, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.0684210526315794e-06, |
|
"loss": 0.2052, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.042105263157895e-06, |
|
"loss": 0.2121, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.015789473684211e-06, |
|
"loss": 0.2037, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.9894736842105264e-06, |
|
"loss": 0.2217, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.9631578947368422e-06, |
|
"loss": 0.2051, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.936842105263158e-06, |
|
"loss": 0.2208, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.9105263157894743e-06, |
|
"loss": 0.2253, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.88421052631579e-06, |
|
"loss": 0.2211, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.8578947368421055e-06, |
|
"loss": 0.2129, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.8315789473684213e-06, |
|
"loss": 0.2176, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.805263157894737e-06, |
|
"loss": 0.1965, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.7789473684210525e-06, |
|
"loss": 0.192, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.7526315789473683e-06, |
|
"loss": 0.2126, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.7263157894736846e-06, |
|
"loss": 0.2365, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.7000000000000004e-06, |
|
"loss": 0.2306, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.6736842105263162e-06, |
|
"loss": 0.2193, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.6473684210526316e-06, |
|
"loss": 0.2388, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.6210526315789474e-06, |
|
"loss": 0.222, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.5947368421052633e-06, |
|
"loss": 0.2027, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.568421052631579e-06, |
|
"loss": 0.2414, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.5421052631578953e-06, |
|
"loss": 0.2122, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.5157894736842107e-06, |
|
"loss": 0.1938, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.4894736842105265e-06, |
|
"loss": 0.2222, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.4631578947368424e-06, |
|
"loss": 0.21, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.436842105263158e-06, |
|
"loss": 0.1995, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.410526315789474e-06, |
|
"loss": 0.1985, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.38421052631579e-06, |
|
"loss": 0.215, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.357894736842105e-06, |
|
"loss": 0.2197, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.331578947368421e-06, |
|
"loss": 0.209, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.3052631578947373e-06, |
|
"loss": 0.2226, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.2789473684210527e-06, |
|
"loss": 0.1847, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.2526315789473685e-06, |
|
"loss": 0.2253, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.2263157894736843e-06, |
|
"loss": 0.2128, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.2e-06, |
|
"loss": 0.2139, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.173684210526316e-06, |
|
"loss": 0.2059, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.1473684210526317e-06, |
|
"loss": 0.2081, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.1210526315789476e-06, |
|
"loss": 0.1793, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.096842105263158e-06, |
|
"loss": 0.203, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.070526315789474e-06, |
|
"loss": 0.2157, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.0442105263157897e-06, |
|
"loss": 0.2297, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.0178947368421055e-06, |
|
"loss": 0.223, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.9915789473684213e-06, |
|
"loss": 0.2374, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.965263157894737e-06, |
|
"loss": 0.2039, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.938947368421053e-06, |
|
"loss": 0.2209, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.9126315789473683e-06, |
|
"loss": 0.2024, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8863157894736844e-06, |
|
"loss": 0.2127, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8600000000000002e-06, |
|
"loss": 0.2103, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8336842105263158e-06, |
|
"loss": 0.1989, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8073684210526318e-06, |
|
"loss": 0.2192, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7810526315789474e-06, |
|
"loss": 0.2317, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7547368421052633e-06, |
|
"loss": 0.2245, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7284210526315793e-06, |
|
"loss": 0.2216, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7021052631578949e-06, |
|
"loss": 0.2392, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.6757894736842107e-06, |
|
"loss": 0.2088, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6494736842105263e-06, |
|
"loss": 0.222, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6231578947368423e-06, |
|
"loss": 0.207, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.596842105263158e-06, |
|
"loss": 0.203, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5705263157894738e-06, |
|
"loss": 0.218, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5442105263157898e-06, |
|
"loss": 0.1949, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5178947368421054e-06, |
|
"loss": 0.2015, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4915789473684212e-06, |
|
"loss": 0.1839, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4652631578947368e-06, |
|
"loss": 0.2114, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4389473684210529e-06, |
|
"loss": 0.2247, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4126315789473685e-06, |
|
"loss": 0.2124, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.3863157894736843e-06, |
|
"loss": 0.2116, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.3600000000000001e-06, |
|
"loss": 0.2212, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.333684210526316e-06, |
|
"loss": 0.2214, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.3073684210526315e-06, |
|
"loss": 0.1966, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.2810526315789476e-06, |
|
"loss": 0.205, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.2547368421052634e-06, |
|
"loss": 0.2015, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.228421052631579e-06, |
|
"loss": 0.2347, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.2021052631578948e-06, |
|
"loss": 0.21, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.1757894736842106e-06, |
|
"loss": 0.1891, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.1494736842105264e-06, |
|
"loss": 0.2117, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.1231578947368423e-06, |
|
"loss": 0.2053, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.0968421052631579e-06, |
|
"loss": 0.1985, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.070526315789474e-06, |
|
"loss": 0.2126, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.0463157894736844e-06, |
|
"loss": 0.2417, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.02e-06, |
|
"loss": 0.1908, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.936842105263158e-07, |
|
"loss": 0.2168, |
|
"step": 9075 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.673684210526316e-07, |
|
"loss": 0.1841, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.410526315789474e-07, |
|
"loss": 0.2246, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.147368421052632e-07, |
|
"loss": 0.2058, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.88421052631579e-07, |
|
"loss": 0.1996, |
|
"step": 9175 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.621052631578948e-07, |
|
"loss": 0.211, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.357894736842106e-07, |
|
"loss": 0.2069, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.094736842105263e-07, |
|
"loss": 0.2186, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.831578947368422e-07, |
|
"loss": 0.1951, |
|
"step": 9275 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.56842105263158e-07, |
|
"loss": 0.2041, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.305263157894738e-07, |
|
"loss": 0.2146, |
|
"step": 9325 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.042105263157896e-07, |
|
"loss": 0.204, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.778947368421053e-07, |
|
"loss": 0.2424, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.515789473684211e-07, |
|
"loss": 0.2188, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.252631578947368e-07, |
|
"loss": 0.222, |
|
"step": 9425 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5.989473684210526e-07, |
|
"loss": 0.2161, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5.726315789473685e-07, |
|
"loss": 0.219, |
|
"step": 9475 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5.463157894736843e-07, |
|
"loss": 0.1896, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5.2e-07, |
|
"loss": 0.1996, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.936842105263158e-07, |
|
"loss": 0.2167, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.6736842105263163e-07, |
|
"loss": 0.1939, |
|
"step": 9575 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.410526315789474e-07, |
|
"loss": 0.2089, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.1473684210526317e-07, |
|
"loss": 0.1889, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.88421052631579e-07, |
|
"loss": 0.2005, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6210526315789475e-07, |
|
"loss": 0.2141, |
|
"step": 9675 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.3578947368421057e-07, |
|
"loss": 0.2137, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.0947368421052633e-07, |
|
"loss": 0.2245, |
|
"step": 9725 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.8315789473684215e-07, |
|
"loss": 0.2018, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.568421052631579e-07, |
|
"loss": 0.2241, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.305263157894737e-07, |
|
"loss": 0.216, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.042105263157895e-07, |
|
"loss": 0.2269, |
|
"step": 9825 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.7789473684210527e-07, |
|
"loss": 0.209, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.5157894736842106e-07, |
|
"loss": 0.1998, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.2526315789473685e-07, |
|
"loss": 0.2129, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.894736842105264e-08, |
|
"loss": 0.1995, |
|
"step": 9925 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.263157894736842e-08, |
|
"loss": 0.1983, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.631578947368422e-08, |
|
"loss": 0.2042, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2e-08, |
|
"loss": 0.2059, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"step": 10000, |
|
"total_flos": 4.617366439093862e+19, |
|
"train_loss": 0.26186515502929686, |
|
"train_runtime": 11850.3169, |
|
"train_samples_per_second": 13.502, |
|
"train_steps_per_second": 0.844 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 2000, |
|
"total_flos": 4.617366439093862e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|