|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 30.0, |
|
"global_step": 16950, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.71, |
|
"eval_cer": 0.479669124113056, |
|
"eval_loss": 1.7289525270462036, |
|
"eval_runtime": 352.9433, |
|
"eval_samples_per_second": 28.738, |
|
"eval_steps_per_second": 3.593, |
|
"eval_wer": 0.9804149925270289, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0002982, |
|
"loss": 4.5435, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_cer": 0.1449701156451013, |
|
"eval_loss": 0.48100030422210693, |
|
"eval_runtime": 355.0295, |
|
"eval_samples_per_second": 28.569, |
|
"eval_steps_per_second": 3.572, |
|
"eval_wer": 0.5774374502610687, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0002909361702127659, |
|
"loss": 0.523, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_cer": 0.11564221810245158, |
|
"eval_loss": 0.3859069347381592, |
|
"eval_runtime": 358.2561, |
|
"eval_samples_per_second": 28.312, |
|
"eval_steps_per_second": 3.539, |
|
"eval_wer": 0.48120111026999746, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.0002818176291793313, |
|
"loss": 0.3449, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_cer": 0.1094981215960235, |
|
"eval_loss": 0.34923675656318665, |
|
"eval_runtime": 353.6657, |
|
"eval_samples_per_second": 28.68, |
|
"eval_steps_per_second": 3.585, |
|
"eval_wer": 0.44977581086589413, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.0002726990881458966, |
|
"loss": 0.2814, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"eval_cer": 0.10994501740010437, |
|
"eval_loss": 0.36604171991348267, |
|
"eval_runtime": 352.2579, |
|
"eval_samples_per_second": 28.794, |
|
"eval_steps_per_second": 3.6, |
|
"eval_wer": 0.44657310895009605, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"eval_cer": 0.10429683107626926, |
|
"eval_loss": 0.37658488750457764, |
|
"eval_runtime": 352.034, |
|
"eval_samples_per_second": 28.813, |
|
"eval_steps_per_second": 3.602, |
|
"eval_wer": 0.4235330654709913, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 0.000263580547112462, |
|
"loss": 0.2463, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_cer": 0.10097250293365471, |
|
"eval_loss": 0.34164857864379883, |
|
"eval_runtime": 355.8322, |
|
"eval_samples_per_second": 28.505, |
|
"eval_steps_per_second": 3.563, |
|
"eval_wer": 0.4119256973155535, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.0002544620060790273, |
|
"loss": 0.2296, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_cer": 0.09793072826716873, |
|
"eval_loss": 0.3322136402130127, |
|
"eval_runtime": 355.102, |
|
"eval_samples_per_second": 28.564, |
|
"eval_steps_per_second": 3.571, |
|
"eval_wer": 0.4012500242628933, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 0.00024534346504559266, |
|
"loss": 0.2143, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"eval_cer": 0.09717821339707125, |
|
"eval_loss": 0.3369796872138977, |
|
"eval_runtime": 355.7454, |
|
"eval_samples_per_second": 28.512, |
|
"eval_steps_per_second": 3.564, |
|
"eval_wer": 0.395601622702304, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 0.00023622492401215801, |
|
"loss": 0.1955, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"eval_cer": 0.09977020906074034, |
|
"eval_loss": 0.3401270806789398, |
|
"eval_runtime": 359.7076, |
|
"eval_samples_per_second": 28.198, |
|
"eval_steps_per_second": 3.525, |
|
"eval_wer": 0.40330751761486056, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"eval_cer": 0.09622099141671736, |
|
"eval_loss": 0.33754295110702515, |
|
"eval_runtime": 352.1651, |
|
"eval_samples_per_second": 28.802, |
|
"eval_steps_per_second": 3.601, |
|
"eval_wer": 0.3889244744657311, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 0.0002271063829787234, |
|
"loss": 0.1845, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"eval_cer": 0.09233155632184571, |
|
"eval_loss": 0.34551626443862915, |
|
"eval_runtime": 357.5651, |
|
"eval_samples_per_second": 28.367, |
|
"eval_steps_per_second": 3.546, |
|
"eval_wer": 0.37524020264368485, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 0.00021798784194528871, |
|
"loss": 0.1752, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"eval_cer": 0.09245553386749396, |
|
"eval_loss": 0.3335849642753601, |
|
"eval_runtime": 351.3613, |
|
"eval_samples_per_second": 28.868, |
|
"eval_steps_per_second": 3.609, |
|
"eval_wer": 0.37176575632291, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 0.0002088693009118541, |
|
"loss": 0.1705, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"eval_cer": 0.08918598650086351, |
|
"eval_loss": 0.3145359754562378, |
|
"eval_runtime": 355.7051, |
|
"eval_samples_per_second": 28.515, |
|
"eval_steps_per_second": 3.565, |
|
"eval_wer": 0.3653021215473903, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 0.00019975075987841941, |
|
"loss": 0.1585, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"eval_cer": 0.09218451318630942, |
|
"eval_loss": 0.34097233414649963, |
|
"eval_runtime": 352.3797, |
|
"eval_samples_per_second": 28.784, |
|
"eval_steps_per_second": 3.598, |
|
"eval_wer": 0.37370678778703004, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 11.33, |
|
"eval_cer": 0.08989237019118491, |
|
"eval_loss": 0.3296053409576416, |
|
"eval_runtime": 350.4695, |
|
"eval_samples_per_second": 28.941, |
|
"eval_steps_per_second": 3.618, |
|
"eval_wer": 0.3664279197965799, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 0.0001906322188449848, |
|
"loss": 0.1474, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 12.04, |
|
"eval_cer": 0.08988083739624089, |
|
"eval_loss": 0.34918734431266785, |
|
"eval_runtime": 352.3033, |
|
"eval_samples_per_second": 28.791, |
|
"eval_steps_per_second": 3.599, |
|
"eval_wer": 0.3589937692890002, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 0.00018153191489361702, |
|
"loss": 0.1485, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 12.74, |
|
"eval_cer": 0.08672950117778669, |
|
"eval_loss": 0.31763964891433716, |
|
"eval_runtime": 352.8906, |
|
"eval_samples_per_second": 28.743, |
|
"eval_steps_per_second": 3.593, |
|
"eval_wer": 0.3506085133640016, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 13.27, |
|
"learning_rate": 0.00017241337386018235, |
|
"loss": 0.137, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 13.45, |
|
"eval_cer": 0.08901587777543918, |
|
"eval_loss": 0.3532153367996216, |
|
"eval_runtime": 357.5465, |
|
"eval_samples_per_second": 28.368, |
|
"eval_steps_per_second": 3.546, |
|
"eval_wer": 0.360041926279625, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"learning_rate": 0.00016329483282674772, |
|
"loss": 0.1291, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"eval_cer": 0.08733497291234787, |
|
"eval_loss": 0.33181944489479065, |
|
"eval_runtime": 352.5023, |
|
"eval_samples_per_second": 28.774, |
|
"eval_steps_per_second": 3.597, |
|
"eval_wer": 0.3570527378248801, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 14.87, |
|
"eval_cer": 0.08829796129017377, |
|
"eval_loss": 0.33532437682151794, |
|
"eval_runtime": 357.1338, |
|
"eval_samples_per_second": 28.401, |
|
"eval_steps_per_second": 3.55, |
|
"eval_wer": 0.3547623206972185, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 0.00015417629179331305, |
|
"loss": 0.1274, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"eval_cer": 0.08226342633571389, |
|
"eval_loss": 0.32346823811531067, |
|
"eval_runtime": 349.5389, |
|
"eval_samples_per_second": 29.018, |
|
"eval_steps_per_second": 3.628, |
|
"eval_wer": 0.339602864962441, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"learning_rate": 0.00014505775075987842, |
|
"loss": 0.1198, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 16.28, |
|
"eval_cer": 0.08322353151480379, |
|
"eval_loss": 0.32590439915657043, |
|
"eval_runtime": 352.9664, |
|
"eval_samples_per_second": 28.736, |
|
"eval_steps_per_second": 3.592, |
|
"eval_wer": 0.33894291426464024, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 16.81, |
|
"learning_rate": 0.00013595744680851063, |
|
"loss": 0.1164, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_cer": 0.084353745419318, |
|
"eval_loss": 0.32632604241371155, |
|
"eval_runtime": 355.0498, |
|
"eval_samples_per_second": 28.568, |
|
"eval_steps_per_second": 3.571, |
|
"eval_wer": 0.3411362798190959, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 0.00012683890577507598, |
|
"loss": 0.1119, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"eval_cer": 0.08243353506113824, |
|
"eval_loss": 0.32535773515701294, |
|
"eval_runtime": 352.3077, |
|
"eval_samples_per_second": 28.79, |
|
"eval_steps_per_second": 3.599, |
|
"eval_wer": 0.3377006541276034, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"eval_cer": 0.0811533948223517, |
|
"eval_loss": 0.3243008255958557, |
|
"eval_runtime": 356.0414, |
|
"eval_samples_per_second": 28.488, |
|
"eval_steps_per_second": 3.561, |
|
"eval_wer": 0.3330615889283565, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 18.58, |
|
"learning_rate": 0.00011772036474164133, |
|
"loss": 0.1054, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 19.12, |
|
"eval_cer": 0.07895063098804338, |
|
"eval_loss": 0.32234683632850647, |
|
"eval_runtime": 353.14, |
|
"eval_samples_per_second": 28.722, |
|
"eval_steps_per_second": 3.591, |
|
"eval_wer": 0.3239387410469924, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 19.47, |
|
"learning_rate": 0.00010860182370820666, |
|
"loss": 0.1017, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 19.82, |
|
"eval_cer": 0.07741965245922436, |
|
"eval_loss": 0.305361270904541, |
|
"eval_runtime": 348.3343, |
|
"eval_samples_per_second": 29.119, |
|
"eval_steps_per_second": 3.64, |
|
"eval_wer": 0.3189502901842039, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 20.35, |
|
"learning_rate": 9.948328267477204e-05, |
|
"loss": 0.0964, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 20.53, |
|
"eval_cer": 0.07850373518396249, |
|
"eval_loss": 0.32777705788612366, |
|
"eval_runtime": 353.3356, |
|
"eval_samples_per_second": 28.706, |
|
"eval_steps_per_second": 3.589, |
|
"eval_wer": 0.3236669966420156, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 21.24, |
|
"learning_rate": 9.036474164133739e-05, |
|
"loss": 0.0903, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 21.24, |
|
"eval_cer": 0.07744560124784841, |
|
"eval_loss": 0.3166551887989044, |
|
"eval_runtime": 353.0166, |
|
"eval_samples_per_second": 28.732, |
|
"eval_steps_per_second": 3.592, |
|
"eval_wer": 0.3177274403618083, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 21.95, |
|
"eval_cer": 0.07655469283842266, |
|
"eval_loss": 0.33310163021087646, |
|
"eval_runtime": 354.4672, |
|
"eval_samples_per_second": 28.615, |
|
"eval_steps_per_second": 3.577, |
|
"eval_wer": 0.3124478347794018, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 22.12, |
|
"learning_rate": 8.124620060790274e-05, |
|
"loss": 0.0886, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 22.65, |
|
"eval_cer": 0.07452492092827466, |
|
"eval_loss": 0.3098578155040741, |
|
"eval_runtime": 354.4398, |
|
"eval_samples_per_second": 28.617, |
|
"eval_steps_per_second": 3.577, |
|
"eval_wer": 0.30889574720006213, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 7.214589665653494e-05, |
|
"loss": 0.0836, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 23.36, |
|
"eval_cer": 0.07314963513119996, |
|
"eval_loss": 0.3170570433139801, |
|
"eval_runtime": 351.2874, |
|
"eval_samples_per_second": 28.874, |
|
"eval_steps_per_second": 3.61, |
|
"eval_wer": 0.3047613501814864, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 23.89, |
|
"learning_rate": 6.30273556231003e-05, |
|
"loss": 0.0796, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 24.07, |
|
"eval_cer": 0.07325919668316817, |
|
"eval_loss": 0.315768837928772, |
|
"eval_runtime": 354.0965, |
|
"eval_samples_per_second": 28.645, |
|
"eval_steps_per_second": 3.581, |
|
"eval_wer": 0.30410139948368564, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 24.78, |
|
"learning_rate": 5.390881458966565e-05, |
|
"loss": 0.0739, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 24.78, |
|
"eval_cer": 0.07206266920772582, |
|
"eval_loss": 0.3202644884586334, |
|
"eval_runtime": 351.0812, |
|
"eval_samples_per_second": 28.891, |
|
"eval_steps_per_second": 3.612, |
|
"eval_wer": 0.3002775674993692, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 25.49, |
|
"eval_cer": 0.07125249036290822, |
|
"eval_loss": 0.3138331472873688, |
|
"eval_runtime": 353.8044, |
|
"eval_samples_per_second": 28.668, |
|
"eval_steps_per_second": 3.584, |
|
"eval_wer": 0.2973660203031891, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 25.66, |
|
"learning_rate": 4.4790273556231e-05, |
|
"loss": 0.0742, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 26.19, |
|
"eval_cer": 0.07109103123369191, |
|
"eval_loss": 0.3196839392185211, |
|
"eval_runtime": 353.996, |
|
"eval_samples_per_second": 28.653, |
|
"eval_steps_per_second": 3.582, |
|
"eval_wer": 0.2958520157611755, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 26.55, |
|
"learning_rate": 3.568996960486322e-05, |
|
"loss": 0.07, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 26.9, |
|
"eval_cer": 0.07031256757497037, |
|
"eval_loss": 0.3232352137565613, |
|
"eval_runtime": 355.2159, |
|
"eval_samples_per_second": 28.554, |
|
"eval_steps_per_second": 3.57, |
|
"eval_wer": 0.2951726547487335, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 27.43, |
|
"learning_rate": 2.6571428571428566e-05, |
|
"loss": 0.0654, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 27.61, |
|
"eval_cer": 0.07008479487482593, |
|
"eval_loss": 0.3242589831352234, |
|
"eval_runtime": 352.5643, |
|
"eval_samples_per_second": 28.769, |
|
"eval_steps_per_second": 3.597, |
|
"eval_wer": 0.2938527533531319, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 28.32, |
|
"learning_rate": 1.745288753799392e-05, |
|
"loss": 0.0631, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 28.32, |
|
"eval_cer": 0.06881907062971944, |
|
"eval_loss": 0.3212898373603821, |
|
"eval_runtime": 352.1058, |
|
"eval_samples_per_second": 28.807, |
|
"eval_steps_per_second": 3.601, |
|
"eval_wer": 0.2875638114093829, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 29.03, |
|
"eval_cer": 0.06853940035232689, |
|
"eval_loss": 0.3151107728481293, |
|
"eval_runtime": 353.743, |
|
"eval_samples_per_second": 28.673, |
|
"eval_steps_per_second": 3.585, |
|
"eval_wer": 0.28799083833148936, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 29.2, |
|
"learning_rate": 8.33434650455927e-06, |
|
"loss": 0.0607, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 29.73, |
|
"eval_cer": 0.06810115414445403, |
|
"eval_loss": 0.31835824251174927, |
|
"eval_runtime": 352.8555, |
|
"eval_samples_per_second": 28.745, |
|
"eval_steps_per_second": 3.594, |
|
"eval_wer": 0.28665152662124654, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 16950, |
|
"total_flos": 1.1955997003691401e+20, |
|
"train_loss": 0.2775966154790558, |
|
"train_runtime": 58895.0057, |
|
"train_samples_per_second": 18.401, |
|
"train_steps_per_second": 0.288 |
|
} |
|
], |
|
"max_steps": 16950, |
|
"num_train_epochs": 30, |
|
"total_flos": 1.1955997003691401e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|