|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.3137151865475565, |
|
"eval_steps": 1000, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006568575932737782, |
|
"grad_norm": 4.938570976257324, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 1.1658, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.013137151865475564, |
|
"grad_norm": 3.4416959285736084, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.9316, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01970572779821335, |
|
"grad_norm": 3.3691599369049072, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.753, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.02627430373095113, |
|
"grad_norm": 3.2769742012023926, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.6576, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.032842879663688915, |
|
"grad_norm": 2.5112249851226807, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.6188, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.0394114555964267, |
|
"grad_norm": 3.0561046600341797, |
|
"learning_rate": 3e-06, |
|
"loss": 0.6021, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.045980031529164474, |
|
"grad_norm": 2.563472032546997, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.564, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.05254860746190226, |
|
"grad_norm": 2.6831862926483154, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.5314, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05911718339464004, |
|
"grad_norm": 2.383794069290161, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.5376, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.06568575932737783, |
|
"grad_norm": 2.8369593620300293, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5404, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07225433526011561, |
|
"grad_norm": 2.641814708709717, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.5014, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.0788229111928534, |
|
"grad_norm": 2.549522638320923, |
|
"learning_rate": 6e-06, |
|
"loss": 0.4978, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08539148712559118, |
|
"grad_norm": 2.770048141479492, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.5135, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.09196006305832895, |
|
"grad_norm": 3.170330286026001, |
|
"learning_rate": 7e-06, |
|
"loss": 0.504, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09852863899106673, |
|
"grad_norm": 2.712066650390625, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.451, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.10509721492380451, |
|
"grad_norm": 2.3632166385650635, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.4533, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1116657908565423, |
|
"grad_norm": 2.728026866912842, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.4732, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.11823436678928008, |
|
"grad_norm": 2.7299931049346924, |
|
"learning_rate": 9e-06, |
|
"loss": 0.4255, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12480294272201786, |
|
"grad_norm": 2.4892563819885254, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.4254, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.13137151865475566, |
|
"grad_norm": 2.3553197383880615, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4218, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13794009458749343, |
|
"grad_norm": 2.1275930404663086, |
|
"learning_rate": 9.944444444444445e-06, |
|
"loss": 0.4159, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.14450867052023122, |
|
"grad_norm": 2.166019916534424, |
|
"learning_rate": 9.88888888888889e-06, |
|
"loss": 0.4051, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.151077246452969, |
|
"grad_norm": 2.5494415760040283, |
|
"learning_rate": 9.833333333333333e-06, |
|
"loss": 0.399, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.1576458223857068, |
|
"grad_norm": 2.482174873352051, |
|
"learning_rate": 9.777777777777779e-06, |
|
"loss": 0.4013, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16421439831844456, |
|
"grad_norm": 2.3230655193328857, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.3736, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.17078297425118236, |
|
"grad_norm": 2.1272637844085693, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 0.3715, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17735155018392013, |
|
"grad_norm": 2.2144370079040527, |
|
"learning_rate": 9.611111111111112e-06, |
|
"loss": 0.389, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.1839201261166579, |
|
"grad_norm": 2.156562566757202, |
|
"learning_rate": 9.555555555555556e-06, |
|
"loss": 0.3779, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1904887020493957, |
|
"grad_norm": 1.9418479204177856, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.3624, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.19705727798213346, |
|
"grad_norm": 1.9420461654663086, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 0.3726, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.20362585391487126, |
|
"grad_norm": 1.929376482963562, |
|
"learning_rate": 9.38888888888889e-06, |
|
"loss": 0.3501, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.21019442984760903, |
|
"grad_norm": 2.257051467895508, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.3755, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21676300578034682, |
|
"grad_norm": 2.102417230606079, |
|
"learning_rate": 9.277777777777778e-06, |
|
"loss": 0.3671, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.2233315817130846, |
|
"grad_norm": 2.096370220184326, |
|
"learning_rate": 9.222222222222224e-06, |
|
"loss": 0.3397, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.2299001576458224, |
|
"grad_norm": 2.0005109310150146, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 0.3467, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.23646873357856016, |
|
"grad_norm": 2.1975176334381104, |
|
"learning_rate": 9.111111111111112e-06, |
|
"loss": 0.3341, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24303730951129796, |
|
"grad_norm": 2.0950164794921875, |
|
"learning_rate": 9.055555555555556e-06, |
|
"loss": 0.3398, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.24960588544403572, |
|
"grad_norm": 2.0137791633605957, |
|
"learning_rate": 9e-06, |
|
"loss": 0.3183, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.2561744613767735, |
|
"grad_norm": 1.97981595993042, |
|
"learning_rate": 8.944444444444446e-06, |
|
"loss": 0.3653, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.2627430373095113, |
|
"grad_norm": 1.9213696718215942, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.3429, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2627430373095113, |
|
"eval_loss": 0.3295910656452179, |
|
"eval_runtime": 594.427, |
|
"eval_samples_per_second": 2.052, |
|
"eval_steps_per_second": 0.13, |
|
"eval_wer": 0.2299665865525529, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26931161324224906, |
|
"grad_norm": 2.122868299484253, |
|
"learning_rate": 8.833333333333334e-06, |
|
"loss": 0.318, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.27588018917498686, |
|
"grad_norm": 2.259373426437378, |
|
"learning_rate": 8.777777777777778e-06, |
|
"loss": 0.3364, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.28244876510772465, |
|
"grad_norm": 1.978367567062378, |
|
"learning_rate": 8.722222222222224e-06, |
|
"loss": 0.3125, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.28901734104046245, |
|
"grad_norm": 1.8453904390335083, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 0.3009, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2955859169732002, |
|
"grad_norm": 2.002053737640381, |
|
"learning_rate": 8.611111111111112e-06, |
|
"loss": 0.3087, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.302154492905938, |
|
"grad_norm": 2.080690860748291, |
|
"learning_rate": 8.555555555555556e-06, |
|
"loss": 0.3188, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3087230688386758, |
|
"grad_norm": 1.6879727840423584, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.3043, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.3152916447714136, |
|
"grad_norm": 2.2736353874206543, |
|
"learning_rate": 8.444444444444446e-06, |
|
"loss": 0.3165, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3218602207041513, |
|
"grad_norm": 1.8325968980789185, |
|
"learning_rate": 8.38888888888889e-06, |
|
"loss": 0.3236, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.3284287966368891, |
|
"grad_norm": 1.8235770463943481, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.3057, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.3349973725696269, |
|
"grad_norm": 2.09566068649292, |
|
"learning_rate": 8.277777777777778e-06, |
|
"loss": 0.2845, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.3415659485023647, |
|
"grad_norm": 1.9097365140914917, |
|
"learning_rate": 8.222222222222222e-06, |
|
"loss": 0.2833, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34813452443510245, |
|
"grad_norm": 1.9709367752075195, |
|
"learning_rate": 8.166666666666668e-06, |
|
"loss": 0.294, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.35470310036784025, |
|
"grad_norm": 1.9940749406814575, |
|
"learning_rate": 8.111111111111112e-06, |
|
"loss": 0.2789, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.36127167630057805, |
|
"grad_norm": 2.122657299041748, |
|
"learning_rate": 8.055555555555557e-06, |
|
"loss": 0.3026, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.3678402522333158, |
|
"grad_norm": 1.742019772529602, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.2843, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.3744088281660536, |
|
"grad_norm": 1.8771026134490967, |
|
"learning_rate": 7.944444444444445e-06, |
|
"loss": 0.2937, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.3809774040987914, |
|
"grad_norm": 1.617311954498291, |
|
"learning_rate": 7.88888888888889e-06, |
|
"loss": 0.2749, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3875459800315292, |
|
"grad_norm": 1.902928113937378, |
|
"learning_rate": 7.833333333333333e-06, |
|
"loss": 0.2929, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.3941145559642669, |
|
"grad_norm": 1.9772895574569702, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.2699, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4006831318970047, |
|
"grad_norm": 1.7294281721115112, |
|
"learning_rate": 7.722222222222223e-06, |
|
"loss": 0.288, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.4072517078297425, |
|
"grad_norm": 1.6163759231567383, |
|
"learning_rate": 7.666666666666667e-06, |
|
"loss": 0.2919, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.4138202837624803, |
|
"grad_norm": 1.9823168516159058, |
|
"learning_rate": 7.611111111111111e-06, |
|
"loss": 0.2691, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.42038885969521805, |
|
"grad_norm": 2.085510730743408, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 0.2807, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42695743562795585, |
|
"grad_norm": 2.1536219120025635, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.2977, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.43352601156069365, |
|
"grad_norm": 2.23018741607666, |
|
"learning_rate": 7.444444444444445e-06, |
|
"loss": 0.291, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.44009458749343144, |
|
"grad_norm": 1.8097656965255737, |
|
"learning_rate": 7.38888888888889e-06, |
|
"loss": 0.3125, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.4466631634261692, |
|
"grad_norm": 1.557788372039795, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 0.2743, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.453231739358907, |
|
"grad_norm": 1.677343726158142, |
|
"learning_rate": 7.277777777777778e-06, |
|
"loss": 0.2568, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.4598003152916448, |
|
"grad_norm": 1.7231301069259644, |
|
"learning_rate": 7.222222222222223e-06, |
|
"loss": 0.2701, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.4663688912243826, |
|
"grad_norm": 1.632379412651062, |
|
"learning_rate": 7.166666666666667e-06, |
|
"loss": 0.2795, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.4729374671571203, |
|
"grad_norm": 2.042989730834961, |
|
"learning_rate": 7.111111111111112e-06, |
|
"loss": 0.2714, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4795060430898581, |
|
"grad_norm": 1.8419965505599976, |
|
"learning_rate": 7.055555555555557e-06, |
|
"loss": 0.2638, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.4860746190225959, |
|
"grad_norm": 1.7692322731018066, |
|
"learning_rate": 7e-06, |
|
"loss": 0.2631, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.4926431949553337, |
|
"grad_norm": 1.8727885484695435, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.2647, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.49921177088807145, |
|
"grad_norm": 1.5731993913650513, |
|
"learning_rate": 6.88888888888889e-06, |
|
"loss": 0.2765, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5057803468208093, |
|
"grad_norm": 1.9374446868896484, |
|
"learning_rate": 6.833333333333334e-06, |
|
"loss": 0.2672, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.512348922753547, |
|
"grad_norm": 1.702355980873108, |
|
"learning_rate": 6.777777777777779e-06, |
|
"loss": 0.2574, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.5189174986862848, |
|
"grad_norm": 1.539027214050293, |
|
"learning_rate": 6.7222222222222235e-06, |
|
"loss": 0.2651, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.5254860746190226, |
|
"grad_norm": 1.8967093229293823, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.2636, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5254860746190226, |
|
"eval_loss": 0.2615206837654114, |
|
"eval_runtime": 532.8353, |
|
"eval_samples_per_second": 2.29, |
|
"eval_steps_per_second": 0.145, |
|
"eval_wer": 0.1925669734451023, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5320546505517604, |
|
"grad_norm": 1.688244342803955, |
|
"learning_rate": 6.6111111111111115e-06, |
|
"loss": 0.2683, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.5386232264844981, |
|
"grad_norm": 1.8095322847366333, |
|
"learning_rate": 6.555555555555556e-06, |
|
"loss": 0.2707, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.545191802417236, |
|
"grad_norm": 1.8945379257202148, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.2535, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.5517603783499737, |
|
"grad_norm": 1.9910788536071777, |
|
"learning_rate": 6.444444444444445e-06, |
|
"loss": 0.2486, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5583289542827115, |
|
"grad_norm": 1.3568848371505737, |
|
"learning_rate": 6.3888888888888885e-06, |
|
"loss": 0.2755, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.5648975302154493, |
|
"grad_norm": 1.7561497688293457, |
|
"learning_rate": 6.333333333333333e-06, |
|
"loss": 0.2679, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.571466106148187, |
|
"grad_norm": 1.6052385568618774, |
|
"learning_rate": 6.277777777777778e-06, |
|
"loss": 0.2497, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.5780346820809249, |
|
"grad_norm": 1.5882091522216797, |
|
"learning_rate": 6.222222222222223e-06, |
|
"loss": 0.2673, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5846032580136626, |
|
"grad_norm": 1.7540193796157837, |
|
"learning_rate": 6.166666666666667e-06, |
|
"loss": 0.2335, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.5911718339464004, |
|
"grad_norm": 1.4882384538650513, |
|
"learning_rate": 6.111111111111112e-06, |
|
"loss": 0.2555, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5977404098791382, |
|
"grad_norm": 2.00600266456604, |
|
"learning_rate": 6.055555555555555e-06, |
|
"loss": 0.2687, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.604308985811876, |
|
"grad_norm": 1.4347535371780396, |
|
"learning_rate": 6e-06, |
|
"loss": 0.2681, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6108775617446137, |
|
"grad_norm": 1.8053362369537354, |
|
"learning_rate": 5.944444444444445e-06, |
|
"loss": 0.2568, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.6174461376773516, |
|
"grad_norm": 1.657359004020691, |
|
"learning_rate": 5.88888888888889e-06, |
|
"loss": 0.246, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.6240147136100893, |
|
"grad_norm": 1.6917238235473633, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 0.2333, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.6305832895428272, |
|
"grad_norm": 2.0534191131591797, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 0.2557, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6371518654755649, |
|
"grad_norm": 1.6997853517532349, |
|
"learning_rate": 5.722222222222222e-06, |
|
"loss": 0.2888, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.6437204414083026, |
|
"grad_norm": 1.7183536291122437, |
|
"learning_rate": 5.666666666666667e-06, |
|
"loss": 0.2364, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.6502890173410405, |
|
"grad_norm": 1.6452535390853882, |
|
"learning_rate": 5.611111111111112e-06, |
|
"loss": 0.2395, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.6568575932737782, |
|
"grad_norm": 1.7632161378860474, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.2547, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.663426169206516, |
|
"grad_norm": 1.9056440591812134, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.249, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.6699947451392538, |
|
"grad_norm": 2.083576202392578, |
|
"learning_rate": 5.444444444444445e-06, |
|
"loss": 0.2596, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6765633210719916, |
|
"grad_norm": 2.1211483478546143, |
|
"learning_rate": 5.388888888888889e-06, |
|
"loss": 0.2435, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.6831318970047294, |
|
"grad_norm": 1.599048376083374, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.2376, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6897004729374672, |
|
"grad_norm": 1.6461100578308105, |
|
"learning_rate": 5.2777777777777785e-06, |
|
"loss": 0.2457, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.6962690488702049, |
|
"grad_norm": 1.660069227218628, |
|
"learning_rate": 5.2222222222222226e-06, |
|
"loss": 0.2354, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.7028376248029428, |
|
"grad_norm": 2.221201181411743, |
|
"learning_rate": 5.1666666666666675e-06, |
|
"loss": 0.2794, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.7094062007356805, |
|
"grad_norm": 2.4395995140075684, |
|
"learning_rate": 5.1111111111111115e-06, |
|
"loss": 0.2499, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7159747766684182, |
|
"grad_norm": 1.4763926267623901, |
|
"learning_rate": 5.0555555555555555e-06, |
|
"loss": 0.2783, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.7225433526011561, |
|
"grad_norm": 1.6573847532272339, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2349, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.7291119285338938, |
|
"grad_norm": 1.8241550922393799, |
|
"learning_rate": 4.944444444444445e-06, |
|
"loss": 0.2449, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.7356805044666316, |
|
"grad_norm": 1.6332080364227295, |
|
"learning_rate": 4.888888888888889e-06, |
|
"loss": 0.2413, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.7422490803993694, |
|
"grad_norm": 1.626541256904602, |
|
"learning_rate": 4.833333333333333e-06, |
|
"loss": 0.2499, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.7488176563321072, |
|
"grad_norm": 1.5557328462600708, |
|
"learning_rate": 4.777777777777778e-06, |
|
"loss": 0.2534, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.755386232264845, |
|
"grad_norm": 1.932283878326416, |
|
"learning_rate": 4.722222222222222e-06, |
|
"loss": 0.2379, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.7619548081975828, |
|
"grad_norm": 1.9064332246780396, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.2323, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7685233841303205, |
|
"grad_norm": 1.5515189170837402, |
|
"learning_rate": 4.611111111111112e-06, |
|
"loss": 0.2328, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.7750919600630584, |
|
"grad_norm": 2.1047863960266113, |
|
"learning_rate": 4.555555555555556e-06, |
|
"loss": 0.248, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.7816605359957961, |
|
"grad_norm": 1.8366841077804565, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.2584, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.7882291119285338, |
|
"grad_norm": 1.949874997138977, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.2316, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7882291119285338, |
|
"eval_loss": 0.23398828506469727, |
|
"eval_runtime": 533.7623, |
|
"eval_samples_per_second": 2.286, |
|
"eval_steps_per_second": 0.144, |
|
"eval_wer": 0.1797877952986693, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7947976878612717, |
|
"grad_norm": 2.0107836723327637, |
|
"learning_rate": 4.388888888888889e-06, |
|
"loss": 0.2421, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.8013662637940094, |
|
"grad_norm": 1.959560751914978, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 0.2319, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.8079348397267473, |
|
"grad_norm": 1.6217644214630127, |
|
"learning_rate": 4.277777777777778e-06, |
|
"loss": 0.2383, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.814503415659485, |
|
"grad_norm": 2.180353879928589, |
|
"learning_rate": 4.222222222222223e-06, |
|
"loss": 0.2374, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8210719915922228, |
|
"grad_norm": 1.7979501485824585, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.2437, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.8276405675249606, |
|
"grad_norm": 1.6101832389831543, |
|
"learning_rate": 4.111111111111111e-06, |
|
"loss": 0.2142, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.8342091434576984, |
|
"grad_norm": 2.010740280151367, |
|
"learning_rate": 4.055555555555556e-06, |
|
"loss": 0.2489, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.8407777193904361, |
|
"grad_norm": 1.62699556350708, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.2362, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.847346295323174, |
|
"grad_norm": 1.6320332288742065, |
|
"learning_rate": 3.944444444444445e-06, |
|
"loss": 0.2134, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.8539148712559117, |
|
"grad_norm": 1.5167447328567505, |
|
"learning_rate": 3.88888888888889e-06, |
|
"loss": 0.219, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.8604834471886496, |
|
"grad_norm": 1.8277373313903809, |
|
"learning_rate": 3.833333333333334e-06, |
|
"loss": 0.2593, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.8670520231213873, |
|
"grad_norm": 1.6318010091781616, |
|
"learning_rate": 3.777777777777778e-06, |
|
"loss": 0.2151, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.873620599054125, |
|
"grad_norm": 1.9252204895019531, |
|
"learning_rate": 3.7222222222222225e-06, |
|
"loss": 0.2509, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.8801891749868629, |
|
"grad_norm": 2.156442880630493, |
|
"learning_rate": 3.6666666666666666e-06, |
|
"loss": 0.2407, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8867577509196006, |
|
"grad_norm": 1.9615235328674316, |
|
"learning_rate": 3.6111111111111115e-06, |
|
"loss": 0.2299, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.8933263268523384, |
|
"grad_norm": 1.985635757446289, |
|
"learning_rate": 3.555555555555556e-06, |
|
"loss": 0.2451, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8998949027850762, |
|
"grad_norm": 1.5059348344802856, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.2389, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.906463478717814, |
|
"grad_norm": 1.8141510486602783, |
|
"learning_rate": 3.444444444444445e-06, |
|
"loss": 0.2412, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.9130320546505517, |
|
"grad_norm": 1.7190760374069214, |
|
"learning_rate": 3.3888888888888893e-06, |
|
"loss": 0.217, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.9196006305832896, |
|
"grad_norm": 1.7403192520141602, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.2386, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9261692065160273, |
|
"grad_norm": 1.5826787948608398, |
|
"learning_rate": 3.277777777777778e-06, |
|
"loss": 0.1965, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.9327377824487652, |
|
"grad_norm": 1.8713033199310303, |
|
"learning_rate": 3.2222222222222227e-06, |
|
"loss": 0.2205, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.9393063583815029, |
|
"grad_norm": 1.6279629468917847, |
|
"learning_rate": 3.1666666666666667e-06, |
|
"loss": 0.2031, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.9458749343142406, |
|
"grad_norm": 1.5993796586990356, |
|
"learning_rate": 3.1111111111111116e-06, |
|
"loss": 0.218, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.9524435102469785, |
|
"grad_norm": 1.8748666048049927, |
|
"learning_rate": 3.055555555555556e-06, |
|
"loss": 0.2198, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.9590120861797162, |
|
"grad_norm": 2.0783231258392334, |
|
"learning_rate": 3e-06, |
|
"loss": 0.2285, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.965580662112454, |
|
"grad_norm": 1.6366628408432007, |
|
"learning_rate": 2.944444444444445e-06, |
|
"loss": 0.2244, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.9721492380451918, |
|
"grad_norm": 1.4321212768554688, |
|
"learning_rate": 2.888888888888889e-06, |
|
"loss": 0.2129, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.9787178139779296, |
|
"grad_norm": 1.9172074794769287, |
|
"learning_rate": 2.8333333333333335e-06, |
|
"loss": 0.2308, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.9852863899106674, |
|
"grad_norm": 1.601163387298584, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.2249, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.9918549658434052, |
|
"grad_norm": 1.920453429222107, |
|
"learning_rate": 2.7222222222222224e-06, |
|
"loss": 0.241, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.9984235417761429, |
|
"grad_norm": 1.421399712562561, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.245, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.0049921177088808, |
|
"grad_norm": 1.2747637033462524, |
|
"learning_rate": 2.6111111111111113e-06, |
|
"loss": 0.183, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.0115606936416186, |
|
"grad_norm": 1.276963472366333, |
|
"learning_rate": 2.5555555555555557e-06, |
|
"loss": 0.1669, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.0181292695743562, |
|
"grad_norm": 1.6604379415512085, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1685, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.024697845507094, |
|
"grad_norm": 1.4884883165359497, |
|
"learning_rate": 2.4444444444444447e-06, |
|
"loss": 0.1837, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.031266421439832, |
|
"grad_norm": 1.2239917516708374, |
|
"learning_rate": 2.388888888888889e-06, |
|
"loss": 0.161, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.0378349973725696, |
|
"grad_norm": 1.7419662475585938, |
|
"learning_rate": 2.3333333333333336e-06, |
|
"loss": 0.1903, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.0444035733053074, |
|
"grad_norm": 1.5548486709594727, |
|
"learning_rate": 2.277777777777778e-06, |
|
"loss": 0.159, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.0509721492380453, |
|
"grad_norm": 1.7805577516555786, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.1779, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.0509721492380453, |
|
"eval_loss": 0.21787120401859283, |
|
"eval_runtime": 531.6511, |
|
"eval_samples_per_second": 2.295, |
|
"eval_steps_per_second": 0.145, |
|
"eval_wer": 0.16243625065947595, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.057540725170783, |
|
"grad_norm": 1.646399974822998, |
|
"learning_rate": 2.166666666666667e-06, |
|
"loss": 0.1891, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.0641093011035208, |
|
"grad_norm": 2.159637928009033, |
|
"learning_rate": 2.1111111111111114e-06, |
|
"loss": 0.1712, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.0706778770362586, |
|
"grad_norm": 1.5517977476119995, |
|
"learning_rate": 2.0555555555555555e-06, |
|
"loss": 0.1582, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.0772464529689962, |
|
"grad_norm": 1.410233974456787, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.1828, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.083815028901734, |
|
"grad_norm": 1.6741544008255005, |
|
"learning_rate": 1.944444444444445e-06, |
|
"loss": 0.1597, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.090383604834472, |
|
"grad_norm": 1.508376121520996, |
|
"learning_rate": 1.888888888888889e-06, |
|
"loss": 0.166, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.0969521807672096, |
|
"grad_norm": 1.4631690979003906, |
|
"learning_rate": 1.8333333333333333e-06, |
|
"loss": 0.159, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.1035207566999474, |
|
"grad_norm": 1.7129231691360474, |
|
"learning_rate": 1.777777777777778e-06, |
|
"loss": 0.1682, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.1100893326326853, |
|
"grad_norm": 1.7223049402236938, |
|
"learning_rate": 1.7222222222222224e-06, |
|
"loss": 0.1865, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.116657908565423, |
|
"grad_norm": 1.8834460973739624, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.1753, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.1232264844981608, |
|
"grad_norm": 1.6631660461425781, |
|
"learning_rate": 1.6111111111111113e-06, |
|
"loss": 0.1585, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.1297950604308986, |
|
"grad_norm": 1.6180058717727661, |
|
"learning_rate": 1.5555555555555558e-06, |
|
"loss": 0.1839, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.1363636363636362, |
|
"grad_norm": 1.635799527168274, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.1685, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.142932212296374, |
|
"grad_norm": 1.4893510341644287, |
|
"learning_rate": 1.4444444444444445e-06, |
|
"loss": 0.1678, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.149500788229112, |
|
"grad_norm": 1.4803720712661743, |
|
"learning_rate": 1.3888888888888892e-06, |
|
"loss": 0.1607, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.1560693641618498, |
|
"grad_norm": 1.4839766025543213, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.187, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.1626379400945874, |
|
"grad_norm": 1.518723487854004, |
|
"learning_rate": 1.2777777777777779e-06, |
|
"loss": 0.1628, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.1692065160273253, |
|
"grad_norm": 1.740968942642212, |
|
"learning_rate": 1.2222222222222223e-06, |
|
"loss": 0.1521, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.1757750919600631, |
|
"grad_norm": 1.941607117652893, |
|
"learning_rate": 1.1666666666666668e-06, |
|
"loss": 0.1558, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.1823436678928008, |
|
"grad_norm": 1.47350013256073, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.1758, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.1889122438255386, |
|
"grad_norm": 1.4565749168395996, |
|
"learning_rate": 1.0555555555555557e-06, |
|
"loss": 0.1804, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 1.1954808197582765, |
|
"grad_norm": 1.6238913536071777, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1596, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.202049395691014, |
|
"grad_norm": 1.6127305030822754, |
|
"learning_rate": 9.444444444444445e-07, |
|
"loss": 0.1801, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 1.208617971623752, |
|
"grad_norm": 1.5954357385635376, |
|
"learning_rate": 8.88888888888889e-07, |
|
"loss": 0.1616, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.2151865475564898, |
|
"grad_norm": 1.5588889122009277, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 0.1727, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 1.2217551234892277, |
|
"grad_norm": 1.2232043743133545, |
|
"learning_rate": 7.777777777777779e-07, |
|
"loss": 0.1617, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.2283236994219653, |
|
"grad_norm": 1.3071914911270142, |
|
"learning_rate": 7.222222222222222e-07, |
|
"loss": 0.1623, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 1.2348922753547031, |
|
"grad_norm": 1.1346869468688965, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 0.155, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.241460851287441, |
|
"grad_norm": 1.572501540184021, |
|
"learning_rate": 6.111111111111112e-07, |
|
"loss": 0.1742, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 1.2480294272201786, |
|
"grad_norm": 1.6292036771774292, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 0.1535, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.2545980031529165, |
|
"grad_norm": 1.8764897584915161, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.1564, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 1.2611665790856543, |
|
"grad_norm": 1.7719995975494385, |
|
"learning_rate": 4.444444444444445e-07, |
|
"loss": 0.1779, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.267735155018392, |
|
"grad_norm": 1.4942494630813599, |
|
"learning_rate": 3.8888888888888895e-07, |
|
"loss": 0.1743, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 1.2743037309511298, |
|
"grad_norm": 1.6207414865493774, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"loss": 0.1731, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.2808723068838677, |
|
"grad_norm": 1.4872610569000244, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"loss": 0.1708, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 1.2874408828166053, |
|
"grad_norm": 1.8898082971572876, |
|
"learning_rate": 2.2222222222222224e-07, |
|
"loss": 0.1819, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.2940094587493431, |
|
"grad_norm": 1.4731193780899048, |
|
"learning_rate": 1.6666666666666668e-07, |
|
"loss": 0.1714, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 1.300578034682081, |
|
"grad_norm": 1.4309362173080444, |
|
"learning_rate": 1.1111111111111112e-07, |
|
"loss": 0.1668, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.3071466106148186, |
|
"grad_norm": 1.6482913494110107, |
|
"learning_rate": 5.555555555555556e-08, |
|
"loss": 0.1928, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 1.3137151865475565, |
|
"grad_norm": 1.3571888208389282, |
|
"learning_rate": 0.0, |
|
"loss": 0.1626, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.3137151865475565, |
|
"eval_loss": 0.21081987023353577, |
|
"eval_runtime": 712.9205, |
|
"eval_samples_per_second": 1.711, |
|
"eval_steps_per_second": 0.108, |
|
"eval_wer": 0.1561639017527405, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.3137151865475565, |
|
"step": 5000, |
|
"total_flos": 5.435725490631475e+20, |
|
"train_loss": 0.2802161669254303, |
|
"train_runtime": 36290.7707, |
|
"train_samples_per_second": 4.409, |
|
"train_steps_per_second": 0.138 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.435725490631475e+20, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|