{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.147606696533837, "eval_steps": 2000, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.24, "learning_rate": 0.00014879999999999998, "loss": 15.1742, "step": 500 }, { "epoch": 0.47, "learning_rate": 0.0002985, "loss": 3.8658, "step": 1000 }, { "epoch": 0.71, "learning_rate": 0.0002998088484992574, "loss": 1.8845, "step": 1500 }, { "epoch": 0.94, "learning_rate": 0.0002992281499624614, "loss": 1.54, "step": 2000 }, { "epoch": 0.94, "eval_cer": 0.26165714958754477, "eval_loss": 1.005660891532898, "eval_runtime": 2601.6674, "eval_samples_per_second": 6.501, "eval_steps_per_second": 0.407, "eval_wer": 0.6134988781327054, "step": 2000 }, { "epoch": 1.18, "learning_rate": 0.0002982593949446869, "loss": 1.3671, "step": 2500 }, { "epoch": 1.41, "learning_rate": 0.0002969051026198513, "loss": 1.2849, "step": 3000 }, { "epoch": 1.65, "learning_rate": 0.00029517264575296907, "loss": 1.2354, "step": 3500 }, { "epoch": 1.89, "learning_rate": 0.00029305958725988945, "loss": 1.1895, "step": 4000 }, { "epoch": 1.89, "eval_cer": 0.20404210489580651, "eval_loss": 0.7781721949577332, "eval_runtime": 2606.5317, "eval_samples_per_second": 6.489, "eval_steps_per_second": 0.406, "eval_wer": 0.5034607425730799, "step": 4000 }, { "epoch": 2.12, "learning_rate": 0.0002905745131650867, "loss": 1.1348, "step": 4500 }, { "epoch": 2.36, "learning_rate": 0.0002877238857151841, "loss": 1.0801, "step": 5000 }, { "epoch": 2.59, "learning_rate": 0.00028452188731761346, "loss": 1.0722, "step": 5500 }, { "epoch": 2.83, "learning_rate": 0.00028096401365270297, "loss": 1.0582, "step": 6000 }, { "epoch": 2.83, "eval_cer": 0.18262104285084868, "eval_loss": 0.6766561269760132, "eval_runtime": 2575.8771, "eval_samples_per_second": 6.566, "eval_steps_per_second": 0.411, "eval_wer": 0.4655154643262057, "step": 6000 }, { "epoch": 3.07, "learning_rate": 0.0002770655780074579, "loss": 1.0283, "step": 6500 }, { "epoch": 3.3, "learning_rate": 0.0002728367179679716, "loss": 0.9818, "step": 7000 }, { "epoch": 3.54, "learning_rate": 0.0002682884303639032, "loss": 0.972, "step": 7500 }, { "epoch": 3.77, "learning_rate": 0.0002634425531692912, "loss": 0.9586, "step": 8000 }, { "epoch": 3.77, "eval_cer": 0.1690433917735793, "eval_loss": 0.6272980570793152, "eval_runtime": 2527.907, "eval_samples_per_second": 6.691, "eval_steps_per_second": 0.419, "eval_wer": 0.43799601994783116, "step": 8000 }, { "epoch": 4.01, "learning_rate": 0.00025829226958497964, "loss": 0.9543, "step": 8500 }, { "epoch": 4.24, "learning_rate": 0.0002528603801705155, "loss": 0.9046, "step": 9000 }, { "epoch": 4.48, "learning_rate": 0.00024716101014216497, "loss": 0.8973, "step": 9500 }, { "epoch": 4.72, "learning_rate": 0.00024122112631130689, "loss": 0.8831, "step": 10000 }, { "epoch": 4.72, "eval_cer": 0.15516692331517162, "eval_loss": 0.5883975028991699, "eval_runtime": 2528.7031, "eval_samples_per_second": 6.689, "eval_steps_per_second": 0.418, "eval_wer": 0.40706736753641115, "step": 10000 }, { "epoch": 4.95, "learning_rate": 0.00023503237291623731, "loss": 0.8808, "step": 10500 }, { "epoch": 5.19, "learning_rate": 0.00022862249928740867, "loss": 0.8442, "step": 11000 }, { "epoch": 5.42, "learning_rate": 0.00022202159516049143, "loss": 0.8334, "step": 11500 }, { "epoch": 5.66, "learning_rate": 0.0002152203749023848, "loss": 0.8318, "step": 12000 }, { "epoch": 5.66, "eval_cer": 0.14686983340385715, "eval_loss": 0.5510314106941223, "eval_runtime": 2595.7595, "eval_samples_per_second": 6.516, "eval_steps_per_second": 0.408, "eval_wer": 0.3897438336940751, "step": 12000 }, { "epoch": 5.89, "learning_rate": 0.0002082495540080772, "loss": 0.8242, "step": 12500 }, { "epoch": 6.13, "learning_rate": 0.00020112725956837873, "loss": 0.7965, "step": 13000 }, { "epoch": 6.37, "learning_rate": 0.00019387201256956072, "loss": 0.7776, "step": 13500 }, { "epoch": 6.6, "learning_rate": 0.0001865026797309365, "loss": 0.7725, "step": 14000 }, { "epoch": 6.6, "eval_cer": 0.1407080881358594, "eval_loss": 0.5327238440513611, "eval_runtime": 2579.4825, "eval_samples_per_second": 6.557, "eval_steps_per_second": 0.41, "eval_wer": 0.3726026528395532, "step": 14000 }, { "epoch": 6.84, "learning_rate": 0.0001790534348212884, "loss": 0.7775, "step": 14500 }, { "epoch": 7.07, "learning_rate": 0.00017152894049650538, "loss": 0.7543, "step": 15000 }, { "epoch": 7.31, "learning_rate": 0.0001639334517198862, "loss": 0.7284, "step": 15500 }, { "epoch": 7.55, "learning_rate": 0.00015630173005929936, "loss": 0.7254, "step": 16000 }, { "epoch": 7.55, "eval_cer": 0.1416372575107858, "eval_loss": 0.5081394910812378, "eval_runtime": 2577.7581, "eval_samples_per_second": 6.562, "eval_steps_per_second": 0.41, "eval_wer": 0.3675760530885046, "step": 16000 }, { "epoch": 7.78, "learning_rate": 0.0001486536212278503, "loss": 0.7224, "step": 16500 }, { "epoch": 8.02, "learning_rate": 0.00014100901355223894, "loss": 0.7136, "step": 17000 }, { "epoch": 8.25, "learning_rate": 0.00013338778625467495, "loss": 0.6742, "step": 17500 }, { "epoch": 8.49, "learning_rate": 0.00012580975775846912, "loss": 0.6802, "step": 18000 }, { "epoch": 8.49, "eval_cer": 0.13128552546054956, "eval_loss": 0.4845993220806122, "eval_runtime": 2577.5228, "eval_samples_per_second": 6.562, "eval_steps_per_second": 0.41, "eval_wer": 0.350228734073845, "step": 18000 }, { "epoch": 8.72, "learning_rate": 0.0001182946341517271, "loss": 0.6779, "step": 18500 }, { "epoch": 8.96, "learning_rate": 0.00011087672812556355, "loss": 0.6727, "step": 19000 }, { "epoch": 9.2, "learning_rate": 0.00010354560475282154, "loss": 0.6437, "step": 19500 }, { "epoch": 9.43, "learning_rate": 9.633528251009509e-05, "loss": 0.6386, "step": 20000 }, { "epoch": 9.43, "eval_cer": 0.12406592304034689, "eval_loss": 0.46761998534202576, "eval_runtime": 2525.8593, "eval_samples_per_second": 6.696, "eval_steps_per_second": 0.419, "eval_wer": 0.3343878093063451, "step": 20000 }, { "epoch": 9.67, "learning_rate": 8.926451129327824e-05, "loss": 0.6291, "step": 20500 }, { "epoch": 9.9, "learning_rate": 8.23653341292733e-05, "loss": 0.6257, "step": 21000 }, { "epoch": 10.14, "learning_rate": 7.562804579791497e-05, "loss": 0.608, "step": 21500 }, { "epoch": 10.37, "learning_rate": 6.909703945913943e-05, "loss": 0.5949, "step": 22000 }, { "epoch": 10.37, "eval_cer": 0.11851489942723033, "eval_loss": 0.45102670788764954, "eval_runtime": 2545.5284, "eval_samples_per_second": 6.645, "eval_steps_per_second": 0.416, "eval_wer": 0.3250481649739552, "step": 22000 }, { "epoch": 10.61, "learning_rate": 6.276312780656023e-05, "loss": 0.5946, "step": 22500 }, { "epoch": 10.85, "learning_rate": 5.665606902083949e-05, "loss": 0.5947, "step": 23000 }, { "epoch": 11.08, "learning_rate": 5.079174404496348e-05, "loss": 0.5854, "step": 23500 }, { "epoch": 11.32, "learning_rate": 4.519634802070792e-05, "loss": 0.5736, "step": 24000 }, { "epoch": 11.32, "eval_cer": 0.11609600544414732, "eval_loss": 0.4416215717792511, "eval_runtime": 2548.5938, "eval_samples_per_second": 6.637, "eval_steps_per_second": 0.415, "eval_wer": 0.31890366212369875, "step": 24000 }, { "epoch": 11.55, "learning_rate": 3.9862009784738855e-05, "loss": 0.5637, "step": 24500 }, { "epoch": 11.79, "learning_rate": 3.4814077035373634e-05, "loss": 0.5681, "step": 25000 }, { "epoch": 12.03, "learning_rate": 3.0065676538704393e-05, "loss": 0.563, "step": 25500 }, { "epoch": 12.26, "learning_rate": 2.562915614992792e-05, "loss": 0.5451, "step": 26000 }, { "epoch": 12.26, "eval_cer": 0.1143270937937593, "eval_loss": 0.43376967310905457, "eval_runtime": 2588.7798, "eval_samples_per_second": 6.534, "eval_steps_per_second": 0.409, "eval_wer": 0.31443204972686695, "step": 26000 }, { "epoch": 12.5, "learning_rate": 2.1523948888423446e-05, "loss": 0.5525, "step": 26500 }, { "epoch": 12.73, "learning_rate": 1.7744279871280954e-05, "loss": 0.5442, "step": 27000 }, { "epoch": 12.97, "learning_rate": 1.4308531819153024e-05, "loss": 0.5452, "step": 27500 }, { "epoch": 13.2, "learning_rate": 1.1225639134154647e-05, "loss": 0.5375, "step": 28000 }, { "epoch": 13.2, "eval_cer": 0.11264106579653371, "eval_loss": 0.4286753833293915, "eval_runtime": 2584.5725, "eval_samples_per_second": 6.544, "eval_steps_per_second": 0.409, "eval_wer": 0.30952437583743625, "step": 28000 }, { "epoch": 13.44, "learning_rate": 8.508697672362313e-06, "loss": 0.5351, "step": 28500 }, { "epoch": 13.68, "learning_rate": 6.153885454076635e-06, "loss": 0.5365, "step": 29000 }, { "epoch": 13.91, "learning_rate": 4.173134142042017e-06, "loss": 0.5308, "step": 29500 }, { "epoch": 14.15, "learning_rate": 2.5715945297517193e-06, "loss": 0.5335, "step": 30000 }, { "epoch": 14.15, "eval_cer": 0.11220919834058202, "eval_loss": 0.42732492089271545, "eval_runtime": 2538.009, "eval_samples_per_second": 6.664, "eval_steps_per_second": 0.417, "eval_wer": 0.3079228409009823, "step": 30000 } ], "logging_steps": 500, "max_steps": 31800, "num_train_epochs": 15, "save_steps": 2000, "total_flos": 2.014666123318961e+20, "trial_name": null, "trial_params": null }