{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.544072948328267, "eval_steps": 330, "global_step": 9570, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.23, "learning_rate": 0.0003, "loss": 33.5779, "step": 150 }, { "epoch": 0.46, "learning_rate": 0.00029537037037037037, "loss": 6.0153, "step": 300 }, { "epoch": 0.5, "eval_cer": 0.9522337427844499, "eval_loss": 5.343827724456787, "eval_runtime": 251.7628, "eval_samples_per_second": 10.573, "eval_steps_per_second": 0.663, "step": 330 }, { "epoch": 0.68, "learning_rate": 0.0002907407407407407, "loss": 5.513, "step": 450 }, { "epoch": 0.91, "learning_rate": 0.0002861111111111111, "loss": 5.3776, "step": 600 }, { "epoch": 1.0, "eval_cer": 0.9409077230323093, "eval_loss": 5.153414726257324, "eval_runtime": 250.9411, "eval_samples_per_second": 10.608, "eval_steps_per_second": 0.665, "step": 660 }, { "epoch": 1.14, "learning_rate": 0.00028148148148148146, "loss": 5.3202, "step": 750 }, { "epoch": 1.37, "learning_rate": 0.0002768518518518518, "loss": 5.2604, "step": 900 }, { "epoch": 1.5, "eval_cer": 0.9108417913714333, "eval_loss": 5.083229064941406, "eval_runtime": 252.1577, "eval_samples_per_second": 10.557, "eval_steps_per_second": 0.662, "step": 990 }, { "epoch": 1.6, "learning_rate": 0.0002722222222222222, "loss": 5.2287, "step": 1050 }, { "epoch": 1.82, "learning_rate": 0.00026759259259259255, "loss": 5.2393, "step": 1200 }, { "epoch": 2.01, "eval_cer": 0.9073126692747517, "eval_loss": 5.065478324890137, "eval_runtime": 251.9951, "eval_samples_per_second": 10.564, "eval_steps_per_second": 0.663, "step": 1320 }, { "epoch": 2.05, "learning_rate": 0.00026296296296296294, "loss": 5.2058, "step": 1350 }, { "epoch": 2.28, "learning_rate": 0.00025833333333333334, "loss": 5.1796, "step": 1500 }, { "epoch": 2.51, "learning_rate": 0.0002537037037037037, "loss": 5.1721, "step": 1650 }, { "epoch": 2.51, "eval_cer": 0.9000355647963232, "eval_loss": 5.046383380889893, "eval_runtime": 251.3928, "eval_samples_per_second": 10.589, "eval_steps_per_second": 0.664, "step": 1650 }, { "epoch": 2.74, "learning_rate": 0.0002490740740740741, "loss": 5.1736, "step": 1800 }, { "epoch": 2.96, "learning_rate": 0.00024444444444444443, "loss": 5.1619, "step": 1950 }, { "epoch": 3.01, "eval_cer": 0.9044674855689, "eval_loss": 5.024378776550293, "eval_runtime": 248.9825, "eval_samples_per_second": 10.692, "eval_steps_per_second": 0.671, "step": 1980 }, { "epoch": 3.19, "learning_rate": 0.0002398148148148148, "loss": 5.1484, "step": 2100 }, { "epoch": 3.42, "learning_rate": 0.00023518518518518517, "loss": 5.1308, "step": 2250 }, { "epoch": 3.51, "eval_cer": 0.9020326648975461, "eval_loss": 5.021634578704834, "eval_runtime": 250.5536, "eval_samples_per_second": 10.624, "eval_steps_per_second": 0.667, "step": 2310 }, { "epoch": 3.65, "learning_rate": 0.00023055555555555552, "loss": 5.0855, "step": 2400 }, { "epoch": 3.88, "learning_rate": 0.00022592592592592591, "loss": 5.0971, "step": 2550 }, { "epoch": 4.01, "eval_cer": 0.9040297649987689, "eval_loss": 4.9340667724609375, "eval_runtime": 248.8115, "eval_samples_per_second": 10.699, "eval_steps_per_second": 0.671, "step": 2640 }, { "epoch": 4.1, "learning_rate": 0.00022129629629629626, "loss": 5.0599, "step": 2700 }, { "epoch": 4.33, "learning_rate": 0.00021666666666666666, "loss": 5.0137, "step": 2850 }, { "epoch": 4.51, "eval_cer": 0.9143709134681148, "eval_loss": 4.879497051239014, "eval_runtime": 248.574, "eval_samples_per_second": 10.709, "eval_steps_per_second": 0.672, "step": 2970 }, { "epoch": 4.56, "learning_rate": 0.00021203703703703703, "loss": 4.9809, "step": 3000 }, { "epoch": 4.79, "learning_rate": 0.00020740740740740737, "loss": 4.9939, "step": 3150 }, { "epoch": 5.02, "learning_rate": 0.00020277777777777777, "loss": 4.9341, "step": 3300 }, { "epoch": 5.02, "eval_cer": 0.9039476923918693, "eval_loss": 4.725036144256592, "eval_runtime": 250.9019, "eval_samples_per_second": 10.61, "eval_steps_per_second": 0.666, "step": 3300 }, { "epoch": 5.24, "learning_rate": 0.00019814814814814814, "loss": 4.8114, "step": 3450 }, { "epoch": 5.47, "learning_rate": 0.00019351851851851849, "loss": 4.6832, "step": 3600 }, { "epoch": 5.52, "eval_cer": 0.8367302273411211, "eval_loss": 4.214047908782959, "eval_runtime": 249.955, "eval_samples_per_second": 10.65, "eval_steps_per_second": 0.668, "step": 3630 }, { "epoch": 5.7, "learning_rate": 0.00018888888888888888, "loss": 4.4588, "step": 3750 }, { "epoch": 5.93, "learning_rate": 0.00018425925925925923, "loss": 4.1627, "step": 3900 }, { "epoch": 6.02, "eval_cer": 0.7318140781878368, "eval_loss": 3.4010486602783203, "eval_runtime": 249.3206, "eval_samples_per_second": 10.677, "eval_steps_per_second": 0.67, "step": 3960 }, { "epoch": 6.16, "learning_rate": 0.0001796296296296296, "loss": 3.7597, "step": 4050 }, { "epoch": 6.38, "learning_rate": 0.000175, "loss": 3.5448, "step": 4200 }, { "epoch": 6.52, "eval_cer": 0.6479905890077422, "eval_loss": 2.882997989654541, "eval_runtime": 248.069, "eval_samples_per_second": 10.731, "eval_steps_per_second": 0.673, "step": 4290 }, { "epoch": 6.61, "learning_rate": 0.00017037037037037034, "loss": 3.3922, "step": 4350 }, { "epoch": 6.84, "learning_rate": 0.00016574074074074074, "loss": 3.2576, "step": 4500 }, { "epoch": 7.02, "eval_cer": 0.6265696386069542, "eval_loss": 2.6253392696380615, "eval_runtime": 244.3615, "eval_samples_per_second": 10.894, "eval_steps_per_second": 0.683, "step": 4620 }, { "epoch": 7.07, "learning_rate": 0.0001611111111111111, "loss": 3.0846, "step": 4650 }, { "epoch": 7.29, "learning_rate": 0.00015648148148148146, "loss": 2.9344, "step": 4800 }, { "epoch": 7.52, "learning_rate": 0.00015185185185185185, "loss": 2.8561, "step": 4950 }, { "epoch": 7.52, "eval_cer": 0.5866002790468634, "eval_loss": 2.430042266845703, "eval_runtime": 245.691, "eval_samples_per_second": 10.835, "eval_steps_per_second": 0.68, "step": 4950 }, { "epoch": 7.75, "learning_rate": 0.00014722222222222223, "loss": 2.8167, "step": 5100 }, { "epoch": 7.98, "learning_rate": 0.00014259259259259257, "loss": 2.7894, "step": 5250 }, { "epoch": 8.02, "eval_cer": 0.575028041474024, "eval_loss": 2.2997841835021973, "eval_runtime": 245.9682, "eval_samples_per_second": 10.823, "eval_steps_per_second": 0.679, "step": 5280 }, { "epoch": 8.21, "learning_rate": 0.00013796296296296294, "loss": 2.6472, "step": 5400 }, { "epoch": 8.43, "learning_rate": 0.0001333333333333333, "loss": 2.6018, "step": 5550 }, { "epoch": 8.53, "eval_cer": 0.554892895247996, "eval_loss": 2.187838554382324, "eval_runtime": 244.7687, "eval_samples_per_second": 10.876, "eval_steps_per_second": 0.682, "step": 5610 }, { "epoch": 8.66, "learning_rate": 0.0001287037037037037, "loss": 2.5751, "step": 5700 }, { "epoch": 8.89, "learning_rate": 0.00012407407407407406, "loss": 2.546, "step": 5850 }, { "epoch": 9.03, "eval_cer": 0.5350860394495663, "eval_loss": 2.1450469493865967, "eval_runtime": 244.8384, "eval_samples_per_second": 10.872, "eval_steps_per_second": 0.682, "step": 5940 }, { "epoch": 9.12, "learning_rate": 0.00011944444444444443, "loss": 2.4555, "step": 6000 }, { "epoch": 9.35, "learning_rate": 0.0001148148148148148, "loss": 2.3787, "step": 6150 }, { "epoch": 9.53, "eval_cer": 0.5339917380242388, "eval_loss": 2.102729558944702, "eval_runtime": 244.3347, "eval_samples_per_second": 10.895, "eval_steps_per_second": 0.683, "step": 6270 }, { "epoch": 9.57, "learning_rate": 0.00011018518518518518, "loss": 2.3783, "step": 6300 }, { "epoch": 9.8, "learning_rate": 0.00010555555555555555, "loss": 2.3806, "step": 6450 }, { "epoch": 10.03, "learning_rate": 0.00010092592592592591, "loss": 2.335, "step": 6600 }, { "epoch": 10.03, "eval_cer": 0.5165923453615299, "eval_loss": 2.0303709506988525, "eval_runtime": 244.6547, "eval_samples_per_second": 10.881, "eval_steps_per_second": 0.683, "step": 6600 }, { "epoch": 10.26, "learning_rate": 9.629629629629628e-05, "loss": 2.2373, "step": 6750 }, { "epoch": 10.49, "learning_rate": 9.166666666666667e-05, "loss": 2.2138, "step": 6900 }, { "epoch": 10.53, "eval_cer": 0.5164555576833639, "eval_loss": 2.0100014209747314, "eval_runtime": 244.9943, "eval_samples_per_second": 10.866, "eval_steps_per_second": 0.682, "step": 6930 }, { "epoch": 10.71, "learning_rate": 8.703703703703704e-05, "loss": 2.2174, "step": 7050 }, { "epoch": 10.94, "learning_rate": 8.24074074074074e-05, "loss": 2.2381, "step": 7200 }, { "epoch": 11.03, "eval_cer": 0.5031050802943671, "eval_loss": 1.9650695323944092, "eval_runtime": 244.4808, "eval_samples_per_second": 10.888, "eval_steps_per_second": 0.683, "step": 7260 }, { "epoch": 11.17, "learning_rate": 7.777777777777777e-05, "loss": 2.155, "step": 7350 }, { "epoch": 11.4, "learning_rate": 7.314814814814814e-05, "loss": 2.1108, "step": 7500 }, { "epoch": 11.53, "eval_cer": 0.5034880857932318, "eval_loss": 1.9666314125061035, "eval_runtime": 244.872, "eval_samples_per_second": 10.871, "eval_steps_per_second": 0.682, "step": 7590 }, { "epoch": 11.63, "learning_rate": 6.851851851851851e-05, "loss": 2.1221, "step": 7650 }, { "epoch": 11.85, "learning_rate": 6.388888888888888e-05, "loss": 2.0916, "step": 7800 }, { "epoch": 12.04, "eval_cer": 0.49982217601838425, "eval_loss": 1.913594365119934, "eval_runtime": 244.907, "eval_samples_per_second": 10.869, "eval_steps_per_second": 0.682, "step": 7920 }, { "epoch": 12.08, "learning_rate": 5.925925925925925e-05, "loss": 2.0517, "step": 7950 }, { "epoch": 12.31, "learning_rate": 5.4629629629629624e-05, "loss": 2.0279, "step": 8100 }, { "epoch": 12.54, "learning_rate": 4.9999999999999996e-05, "loss": 2.0229, "step": 8250 }, { "epoch": 12.54, "eval_cer": 0.5027767898667688, "eval_loss": 1.898772120475769, "eval_runtime": 244.3507, "eval_samples_per_second": 10.894, "eval_steps_per_second": 0.683, "step": 8250 }, { "epoch": 12.77, "learning_rate": 4.537037037037037e-05, "loss": 1.9891, "step": 8400 }, { "epoch": 12.99, "learning_rate": 4.074074074074074e-05, "loss": 2.0056, "step": 8550 }, { "epoch": 13.04, "eval_cer": 0.4995759581976856, "eval_loss": 1.8768519163131714, "eval_runtime": 244.2609, "eval_samples_per_second": 10.898, "eval_steps_per_second": 0.684, "step": 8580 }, { "epoch": 13.22, "learning_rate": 3.61111111111111e-05, "loss": 1.9451, "step": 8700 }, { "epoch": 13.45, "learning_rate": 3.1481481481481474e-05, "loss": 1.9245, "step": 8850 }, { "epoch": 13.54, "eval_cer": 0.495472327852707, "eval_loss": 1.8715523481369019, "eval_runtime": 244.453, "eval_samples_per_second": 10.89, "eval_steps_per_second": 0.683, "step": 8910 }, { "epoch": 13.68, "learning_rate": 2.685185185185185e-05, "loss": 1.916, "step": 9000 }, { "epoch": 13.91, "learning_rate": 2.222222222222222e-05, "loss": 1.9378, "step": 9150 }, { "epoch": 14.04, "eval_cer": 0.49459688671244495, "eval_loss": 1.8560909032821655, "eval_runtime": 244.76, "eval_samples_per_second": 10.876, "eval_steps_per_second": 0.682, "step": 9240 }, { "epoch": 14.13, "learning_rate": 1.759259259259259e-05, "loss": 1.9169, "step": 9300 }, { "epoch": 14.36, "learning_rate": 1.296296296296296e-05, "loss": 1.9003, "step": 9450 }, { "epoch": 14.54, "eval_cer": 0.49363937296528326, "eval_loss": 1.848546028137207, "eval_runtime": 244.6555, "eval_samples_per_second": 10.881, "eval_steps_per_second": 0.683, "step": 9570 } ], "logging_steps": 150, "max_steps": 9870, "num_train_epochs": 15, "save_steps": 330, "total_flos": 2.9439549367006527e+19, "trial_name": null, "trial_params": null }