{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 65930, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 0.0003, "loss": 27.6845, "step": 500 }, { "epoch": 0.15, "learning_rate": 0.0002977074736359468, "loss": 5.9239, "step": 1000 }, { "epoch": 0.23, "learning_rate": 0.0002954149472718936, "loss": 5.8508, "step": 1500 }, { "epoch": 0.3, "learning_rate": 0.0002931224209078404, "loss": 5.8132, "step": 2000 }, { "epoch": 0.38, "learning_rate": 0.00029082989454378723, "loss": 5.7398, "step": 2500 }, { "epoch": 0.46, "learning_rate": 0.00028853736817973405, "loss": 5.6899, "step": 3000 }, { "epoch": 0.53, "learning_rate": 0.00028624484181568086, "loss": 5.669, "step": 3500 }, { "epoch": 0.61, "learning_rate": 0.0002839523154516277, "loss": 5.6871, "step": 4000 }, { "epoch": 0.68, "learning_rate": 0.0002816597890875745, "loss": 5.6503, "step": 4500 }, { "epoch": 0.76, "learning_rate": 0.0002793672627235213, "loss": 5.571, "step": 5000 }, { "epoch": 0.83, "learning_rate": 0.0002770747363594681, "loss": 5.4537, "step": 5500 }, { "epoch": 0.91, "learning_rate": 0.00027478220999541494, "loss": 5.3316, "step": 6000 }, { "epoch": 0.99, "learning_rate": 0.0002724896836313617, "loss": 5.0444, "step": 6500 }, { "epoch": 1.0, "eval_cer": 0.8763969974979149, "eval_loss": 4.669518947601318, "eval_runtime": 55.1634, "eval_samples_per_second": 5.819, "eval_steps_per_second": 2.919, "step": 6593 }, { "epoch": 1.06, "learning_rate": 0.00027019715726730857, "loss": 4.7276, "step": 7000 }, { "epoch": 1.14, "learning_rate": 0.0002679046309032554, "loss": 4.5233, "step": 7500 }, { "epoch": 1.21, "learning_rate": 0.0002656121045392022, "loss": 4.3549, "step": 8000 }, { "epoch": 1.29, "learning_rate": 0.000263319578175149, "loss": 4.181, "step": 8500 }, { "epoch": 1.37, "learning_rate": 0.0002610270518110958, "loss": 4.0707, "step": 9000 }, { "epoch": 1.44, "learning_rate": 0.00025873452544704264, "loss": 3.8964, "step": 9500 }, { "epoch": 1.52, "learning_rate": 0.0002564419990829894, "loss": 3.7554, "step": 10000 }, { "epoch": 1.59, "learning_rate": 0.00025414947271893627, "loss": 3.673, "step": 10500 }, { "epoch": 1.67, "learning_rate": 0.00025185694635488303, "loss": 3.5734, "step": 11000 }, { "epoch": 1.74, "learning_rate": 0.0002495644199908299, "loss": 3.5483, "step": 11500 }, { "epoch": 1.82, "learning_rate": 0.00024727189362677666, "loss": 3.4484, "step": 12000 }, { "epoch": 1.9, "learning_rate": 0.00024497936726272353, "loss": 3.3388, "step": 12500 }, { "epoch": 1.97, "learning_rate": 0.0002426868408986703, "loss": 3.2619, "step": 13000 }, { "epoch": 2.0, "eval_cer": 0.6248540450375313, "eval_loss": 2.9759151935577393, "eval_runtime": 54.7232, "eval_samples_per_second": 5.866, "eval_steps_per_second": 2.942, "step": 13186 }, { "epoch": 2.05, "learning_rate": 0.00024039431453461713, "loss": 3.1258, "step": 13500 }, { "epoch": 2.12, "learning_rate": 0.00023810178817056395, "loss": 3.0259, "step": 14000 }, { "epoch": 2.2, "learning_rate": 0.00023580926180651076, "loss": 3.0577, "step": 14500 }, { "epoch": 2.28, "learning_rate": 0.00023351673544245758, "loss": 2.9761, "step": 15000 }, { "epoch": 2.35, "learning_rate": 0.00023122420907840436, "loss": 2.8932, "step": 15500 }, { "epoch": 2.43, "learning_rate": 0.0002289316827143512, "loss": 2.9507, "step": 16000 }, { "epoch": 2.5, "learning_rate": 0.000226639156350298, "loss": 2.9112, "step": 16500 }, { "epoch": 2.58, "learning_rate": 0.00022434662998624484, "loss": 2.9235, "step": 17000 }, { "epoch": 2.65, "learning_rate": 0.00022205410362219162, "loss": 2.8323, "step": 17500 }, { "epoch": 2.73, "learning_rate": 0.00021976157725813846, "loss": 2.8452, "step": 18000 }, { "epoch": 2.81, "learning_rate": 0.00021746905089408525, "loss": 2.8166, "step": 18500 }, { "epoch": 2.88, "learning_rate": 0.00021517652453003207, "loss": 2.807, "step": 19000 }, { "epoch": 2.96, "learning_rate": 0.00021288399816597888, "loss": 2.7859, "step": 19500 }, { "epoch": 3.0, "eval_cer": 0.5421184320266889, "eval_loss": 2.5369625091552734, "eval_runtime": 56.2807, "eval_samples_per_second": 5.704, "eval_steps_per_second": 2.861, "step": 19779 }, { "epoch": 3.03, "learning_rate": 0.0002105914718019257, "loss": 2.664, "step": 20000 }, { "epoch": 3.11, "learning_rate": 0.00020829894543787254, "loss": 2.4556, "step": 20500 }, { "epoch": 3.19, "learning_rate": 0.00020600641907381933, "loss": 2.4617, "step": 21000 }, { "epoch": 3.26, "learning_rate": 0.00020371389270976617, "loss": 2.5162, "step": 21500 }, { "epoch": 3.34, "learning_rate": 0.00020142136634571296, "loss": 2.5092, "step": 22000 }, { "epoch": 3.41, "learning_rate": 0.00019912883998165977, "loss": 2.4437, "step": 22500 }, { "epoch": 3.49, "learning_rate": 0.00019683631361760659, "loss": 2.4205, "step": 23000 }, { "epoch": 3.56, "learning_rate": 0.0001945437872535534, "loss": 2.45, "step": 23500 }, { "epoch": 3.64, "learning_rate": 0.00019225126088950022, "loss": 2.4553, "step": 24000 }, { "epoch": 3.72, "learning_rate": 0.00018995873452544703, "loss": 2.3963, "step": 24500 }, { "epoch": 3.79, "learning_rate": 0.00018766620816139382, "loss": 2.3739, "step": 25000 }, { "epoch": 3.87, "learning_rate": 0.00018537368179734066, "loss": 2.463, "step": 25500 }, { "epoch": 3.94, "learning_rate": 0.00018308115543328745, "loss": 2.3947, "step": 26000 }, { "epoch": 4.0, "eval_cer": 0.515929941618015, "eval_loss": 2.358245611190796, "eval_runtime": 55.2411, "eval_samples_per_second": 5.811, "eval_steps_per_second": 2.914, "step": 26372 }, { "epoch": 4.02, "learning_rate": 0.0001807886290692343, "loss": 2.3494, "step": 26500 }, { "epoch": 4.1, "learning_rate": 0.0001784961027051811, "loss": 2.1528, "step": 27000 }, { "epoch": 4.17, "learning_rate": 0.00017620357634112792, "loss": 2.1103, "step": 27500 }, { "epoch": 4.25, "learning_rate": 0.00017391104997707473, "loss": 2.0831, "step": 28000 }, { "epoch": 4.32, "learning_rate": 0.00017161852361302152, "loss": 2.124, "step": 28500 }, { "epoch": 4.4, "learning_rate": 0.00016932599724896836, "loss": 2.0801, "step": 29000 }, { "epoch": 4.47, "learning_rate": 0.00016703347088491515, "loss": 2.1513, "step": 29500 }, { "epoch": 4.55, "learning_rate": 0.000164740944520862, "loss": 2.0648, "step": 30000 }, { "epoch": 4.63, "learning_rate": 0.00016244841815680878, "loss": 2.0649, "step": 30500 }, { "epoch": 4.7, "learning_rate": 0.0001601558917927556, "loss": 2.1093, "step": 31000 }, { "epoch": 4.78, "learning_rate": 0.0001578633654287024, "loss": 2.1226, "step": 31500 }, { "epoch": 4.85, "learning_rate": 0.00015557083906464923, "loss": 2.066, "step": 32000 }, { "epoch": 4.93, "learning_rate": 0.00015327831270059604, "loss": 2.1313, "step": 32500 }, { "epoch": 5.0, "eval_cer": 0.4872393661384487, "eval_loss": 2.246213436126709, "eval_runtime": 56.125, "eval_samples_per_second": 5.719, "eval_steps_per_second": 2.869, "step": 32965 }, { "epoch": 5.01, "learning_rate": 0.00015098578633654285, "loss": 2.0466, "step": 33000 }, { "epoch": 5.08, "learning_rate": 0.00014869325997248967, "loss": 1.7844, "step": 33500 }, { "epoch": 5.16, "learning_rate": 0.00014640073360843648, "loss": 1.8717, "step": 34000 }, { "epoch": 5.23, "learning_rate": 0.0001441082072443833, "loss": 1.8059, "step": 34500 }, { "epoch": 5.31, "learning_rate": 0.00014181568088033011, "loss": 1.818, "step": 35000 }, { "epoch": 5.38, "learning_rate": 0.00013952315451627693, "loss": 1.8108, "step": 35500 }, { "epoch": 5.46, "learning_rate": 0.00013723062815222374, "loss": 1.8391, "step": 36000 }, { "epoch": 5.54, "learning_rate": 0.00013493810178817053, "loss": 1.8801, "step": 36500 }, { "epoch": 5.61, "learning_rate": 0.00013264557542411737, "loss": 1.8382, "step": 37000 }, { "epoch": 5.69, "learning_rate": 0.0001303530490600642, "loss": 1.8418, "step": 37500 }, { "epoch": 5.76, "learning_rate": 0.000128060522696011, "loss": 1.7916, "step": 38000 }, { "epoch": 5.84, "learning_rate": 0.00012576799633195782, "loss": 1.7707, "step": 38500 }, { "epoch": 5.92, "learning_rate": 0.00012347546996790463, "loss": 1.807, "step": 39000 }, { "epoch": 5.99, "learning_rate": 0.00012118294360385143, "loss": 1.8083, "step": 39500 }, { "epoch": 6.0, "eval_cer": 0.469557964970809, "eval_loss": 2.210385799407959, "eval_runtime": 57.4991, "eval_samples_per_second": 5.583, "eval_steps_per_second": 2.8, "step": 39558 }, { "epoch": 6.07, "learning_rate": 0.00011889041723979825, "loss": 1.5902, "step": 40000 }, { "epoch": 6.14, "learning_rate": 0.00011659789087574506, "loss": 1.5428, "step": 40500 }, { "epoch": 6.22, "learning_rate": 0.00011430536451169188, "loss": 1.5965, "step": 41000 }, { "epoch": 6.29, "learning_rate": 0.00011201283814763868, "loss": 1.5892, "step": 41500 }, { "epoch": 6.37, "learning_rate": 0.0001097203117835855, "loss": 1.5869, "step": 42000 }, { "epoch": 6.45, "learning_rate": 0.00010742778541953231, "loss": 1.5466, "step": 42500 }, { "epoch": 6.52, "learning_rate": 0.00010513525905547912, "loss": 1.6088, "step": 43000 }, { "epoch": 6.6, "learning_rate": 0.00010284273269142595, "loss": 1.5707, "step": 43500 }, { "epoch": 6.67, "learning_rate": 0.00010055020632737277, "loss": 1.5875, "step": 44000 }, { "epoch": 6.75, "learning_rate": 9.825767996331957e-05, "loss": 1.544, "step": 44500 }, { "epoch": 6.83, "learning_rate": 9.596515359926638e-05, "loss": 1.56, "step": 45000 }, { "epoch": 6.9, "learning_rate": 9.36726272352132e-05, "loss": 1.6172, "step": 45500 }, { "epoch": 6.98, "learning_rate": 9.138010087116001e-05, "loss": 1.5209, "step": 46000 }, { "epoch": 7.0, "eval_cer": 0.46605504587155966, "eval_loss": 2.162141799926758, "eval_runtime": 55.7456, "eval_samples_per_second": 5.758, "eval_steps_per_second": 2.888, "step": 46151 }, { "epoch": 7.05, "learning_rate": 8.908757450710683e-05, "loss": 1.3888, "step": 46500 }, { "epoch": 7.13, "learning_rate": 8.679504814305364e-05, "loss": 1.3561, "step": 47000 }, { "epoch": 7.2, "learning_rate": 8.450252177900044e-05, "loss": 1.3643, "step": 47500 }, { "epoch": 7.28, "learning_rate": 8.220999541494726e-05, "loss": 1.3816, "step": 48000 }, { "epoch": 7.36, "learning_rate": 7.991746905089407e-05, "loss": 1.3173, "step": 48500 }, { "epoch": 7.43, "learning_rate": 7.762494268684089e-05, "loss": 1.4088, "step": 49000 }, { "epoch": 7.51, "learning_rate": 7.53324163227877e-05, "loss": 1.3434, "step": 49500 }, { "epoch": 7.58, "learning_rate": 7.303988995873452e-05, "loss": 1.379, "step": 50000 }, { "epoch": 7.66, "learning_rate": 7.074736359468133e-05, "loss": 1.3693, "step": 50500 }, { "epoch": 7.74, "learning_rate": 6.845483723062815e-05, "loss": 1.3075, "step": 51000 }, { "epoch": 7.81, "learning_rate": 6.616231086657496e-05, "loss": 1.3585, "step": 51500 }, { "epoch": 7.89, "learning_rate": 6.386978450252178e-05, "loss": 1.4037, "step": 52000 }, { "epoch": 7.96, "learning_rate": 6.157725813846859e-05, "loss": 1.3735, "step": 52500 }, { "epoch": 8.0, "eval_cer": 0.45821517931609673, "eval_loss": 2.2031326293945312, "eval_runtime": 55.4705, "eval_samples_per_second": 5.787, "eval_steps_per_second": 2.902, "step": 52744 }, { "epoch": 8.04, "learning_rate": 5.92847317744154e-05, "loss": 1.288, "step": 53000 }, { "epoch": 8.11, "learning_rate": 5.6992205410362215e-05, "loss": 1.2032, "step": 53500 }, { "epoch": 8.19, "learning_rate": 5.469967904630902e-05, "loss": 1.2034, "step": 54000 }, { "epoch": 8.27, "learning_rate": 5.240715268225584e-05, "loss": 1.2098, "step": 54500 }, { "epoch": 8.34, "learning_rate": 5.011462631820266e-05, "loss": 1.196, "step": 55000 }, { "epoch": 8.42, "learning_rate": 4.7822099954149474e-05, "loss": 1.1629, "step": 55500 }, { "epoch": 8.49, "learning_rate": 4.552957359009628e-05, "loss": 1.1932, "step": 56000 }, { "epoch": 8.57, "learning_rate": 4.3237047226043097e-05, "loss": 1.1548, "step": 56500 }, { "epoch": 8.65, "learning_rate": 4.0944520861989905e-05, "loss": 1.1835, "step": 57000 }, { "epoch": 8.72, "learning_rate": 3.865199449793672e-05, "loss": 1.2334, "step": 57500 }, { "epoch": 8.8, "learning_rate": 3.6359468133883534e-05, "loss": 1.2385, "step": 58000 }, { "epoch": 8.87, "learning_rate": 3.406694176983035e-05, "loss": 1.1762, "step": 58500 }, { "epoch": 8.95, "learning_rate": 3.1774415405777164e-05, "loss": 1.1354, "step": 59000 }, { "epoch": 9.0, "eval_cer": 0.44920767306088405, "eval_loss": 2.1740152835845947, "eval_runtime": 55.8382, "eval_samples_per_second": 5.749, "eval_steps_per_second": 2.883, "step": 59337 }, { "epoch": 9.02, "learning_rate": 2.948188904172398e-05, "loss": 1.1439, "step": 59500 }, { "epoch": 9.1, "learning_rate": 2.718936267767079e-05, "loss": 1.1242, "step": 60000 }, { "epoch": 9.18, "learning_rate": 2.4896836313617605e-05, "loss": 1.0779, "step": 60500 }, { "epoch": 9.25, "learning_rate": 2.260430994956442e-05, "loss": 1.1212, "step": 61000 }, { "epoch": 9.33, "learning_rate": 2.031178358551123e-05, "loss": 1.0518, "step": 61500 }, { "epoch": 9.4, "learning_rate": 1.8019257221458046e-05, "loss": 1.05, "step": 62000 }, { "epoch": 9.48, "learning_rate": 1.572673085740486e-05, "loss": 1.0233, "step": 62500 }, { "epoch": 9.56, "learning_rate": 1.3434204493351672e-05, "loss": 1.0439, "step": 63000 }, { "epoch": 9.63, "learning_rate": 1.1141678129298487e-05, "loss": 1.0929, "step": 63500 }, { "epoch": 9.71, "learning_rate": 8.8491517652453e-06, "loss": 1.0614, "step": 64000 }, { "epoch": 9.78, "learning_rate": 6.556625401192113e-06, "loss": 1.0696, "step": 64500 }, { "epoch": 9.86, "learning_rate": 4.264099037138927e-06, "loss": 1.0251, "step": 65000 }, { "epoch": 9.93, "learning_rate": 1.9715726730857402e-06, "loss": 1.0788, "step": 65500 }, { "epoch": 10.0, "eval_cer": 0.44653878231859884, "eval_loss": 2.1948180198669434, "eval_runtime": 56.1921, "eval_samples_per_second": 5.713, "eval_steps_per_second": 2.865, "step": 65930 }, { "epoch": 10.0, "step": 65930, "total_flos": 3.725663511627642e+19, "train_loss": 2.559117515977084, "train_runtime": 35313.2078, "train_samples_per_second": 7.468, "train_steps_per_second": 1.867 } ], "logging_steps": 500, "max_steps": 65930, "num_train_epochs": 10, "save_steps": 500, "total_flos": 3.725663511627642e+19, "trial_name": null, "trial_params": null }