{ "best_metric": 0.25546682656055525, "best_model_checkpoint": "./jako-xlsr/checkpoint-11000", "epoch": 18.275271273557966, "eval_steps": 1000, "global_step": 16000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.38, "learning_rate": 0.00012563451776649744, "loss": 24.8869, "step": 330 }, { "epoch": 0.75, "learning_rate": 0.0002512690355329949, "loss": 4.8854, "step": 660 }, { "epoch": 1.13, "learning_rate": 0.00029761998271934643, "loss": 3.5667, "step": 990 }, { "epoch": 1.14, "eval_cer": 0.5188284138707948, "eval_loss": 2.2322683334350586, "eval_runtime": 333.3102, "eval_samples_per_second": 10.522, "eval_steps_per_second": 0.66, "step": 1000 }, { "epoch": 1.51, "learning_rate": 0.00029373183567669466, "loss": 2.1771, "step": 1320 }, { "epoch": 1.88, "learning_rate": 0.0002898436886340429, "loss": 1.8324, "step": 1650 }, { "epoch": 2.26, "learning_rate": 0.00028595554159139105, "loss": 1.5569, "step": 1980 }, { "epoch": 2.28, "eval_cer": 0.3527113824461948, "eval_loss": 1.3106200695037842, "eval_runtime": 333.5876, "eval_samples_per_second": 10.513, "eval_steps_per_second": 0.659, "step": 2000 }, { "epoch": 2.64, "learning_rate": 0.00028206739454873927, "loss": 1.4453, "step": 2310 }, { "epoch": 3.02, "learning_rate": 0.0002781792475060875, "loss": 1.38, "step": 2640 }, { "epoch": 3.39, "learning_rate": 0.0002742911004634357, "loss": 1.2238, "step": 2970 }, { "epoch": 3.43, "eval_cer": 0.3098580033250661, "eval_loss": 1.1108620166778564, "eval_runtime": 331.1347, "eval_samples_per_second": 10.591, "eval_steps_per_second": 0.664, "step": 3000 }, { "epoch": 3.77, "learning_rate": 0.0002704029534207839, "loss": 1.203, "step": 3300 }, { "epoch": 4.15, "learning_rate": 0.0002665148063781321, "loss": 1.151, "step": 3630 }, { "epoch": 4.52, "learning_rate": 0.0002626266593354803, "loss": 1.0593, "step": 3960 }, { "epoch": 4.57, "eval_cer": 0.28911722220708075, "eval_loss": 1.0389584302902222, "eval_runtime": 331.5451, "eval_samples_per_second": 10.578, "eval_steps_per_second": 0.664, "step": 4000 }, { "epoch": 4.9, "learning_rate": 0.0002587385122928285, "loss": 1.0674, "step": 4290 }, { "epoch": 5.28, "learning_rate": 0.0002548503652501767, "loss": 0.974, "step": 4620 }, { "epoch": 5.65, "learning_rate": 0.00025096221820752494, "loss": 0.9658, "step": 4950 }, { "epoch": 5.71, "eval_cer": 0.29180634669719185, "eval_loss": 0.973089337348938, "eval_runtime": 331.0672, "eval_samples_per_second": 10.593, "eval_steps_per_second": 0.665, "step": 5000 }, { "epoch": 6.03, "learning_rate": 0.0002470740711648731, "loss": 0.9596, "step": 5280 }, { "epoch": 6.41, "learning_rate": 0.00024318592412222133, "loss": 0.8626, "step": 5610 }, { "epoch": 6.78, "learning_rate": 0.00023929777707956955, "loss": 0.8796, "step": 5940 }, { "epoch": 6.85, "eval_cer": 0.26956656037357024, "eval_loss": 0.9479135870933533, "eval_runtime": 330.5292, "eval_samples_per_second": 10.61, "eval_steps_per_second": 0.666, "step": 6000 }, { "epoch": 7.16, "learning_rate": 0.00023540963003691772, "loss": 0.8311, "step": 6270 }, { "epoch": 7.54, "learning_rate": 0.00023152148299426594, "loss": 0.7889, "step": 6600 }, { "epoch": 7.92, "learning_rate": 0.00022763333595161414, "loss": 0.8022, "step": 6930 }, { "epoch": 8.0, "eval_cer": 0.2710383109391041, "eval_loss": 0.9330962300300598, "eval_runtime": 330.2975, "eval_samples_per_second": 10.618, "eval_steps_per_second": 0.666, "step": 7000 }, { "epoch": 8.29, "learning_rate": 0.00022374518890896236, "loss": 0.7418, "step": 7260 }, { "epoch": 8.67, "learning_rate": 0.00021985704186631055, "loss": 0.739, "step": 7590 }, { "epoch": 9.05, "learning_rate": 0.00021596889482365878, "loss": 0.7392, "step": 7920 }, { "epoch": 9.14, "eval_cer": 0.2745814141524261, "eval_loss": 0.925165593624115, "eval_runtime": 338.4979, "eval_samples_per_second": 10.36, "eval_steps_per_second": 0.65, "step": 8000 }, { "epoch": 9.42, "learning_rate": 0.000212080747781007, "loss": 0.6765, "step": 8250 }, { "epoch": 9.8, "learning_rate": 0.00020819260073835517, "loss": 0.6778, "step": 8580 }, { "epoch": 10.18, "learning_rate": 0.0002043044536957034, "loss": 0.6694, "step": 8910 }, { "epoch": 10.28, "eval_cer": 0.2590008448938432, "eval_loss": 0.9317852854728699, "eval_runtime": 336.5027, "eval_samples_per_second": 10.422, "eval_steps_per_second": 0.654, "step": 9000 }, { "epoch": 10.55, "learning_rate": 0.00020041630665305158, "loss": 0.621, "step": 9240 }, { "epoch": 10.93, "learning_rate": 0.0001965281596103998, "loss": 0.6494, "step": 9570 }, { "epoch": 11.31, "learning_rate": 0.000192640012567748, "loss": 0.5977, "step": 9900 }, { "epoch": 11.42, "eval_cer": 0.2674225286855087, "eval_loss": 0.9348525404930115, "eval_runtime": 338.5078, "eval_samples_per_second": 10.36, "eval_steps_per_second": 0.65, "step": 10000 }, { "epoch": 11.68, "learning_rate": 0.00018875186552509622, "loss": 0.5786, "step": 10230 }, { "epoch": 12.06, "learning_rate": 0.0001848637184824444, "loss": 0.582, "step": 10560 }, { "epoch": 12.44, "learning_rate": 0.00018097557143979261, "loss": 0.5484, "step": 10890 }, { "epoch": 12.56, "eval_cer": 0.25546682656055525, "eval_loss": 0.9408797025680542, "eval_runtime": 334.3327, "eval_samples_per_second": 10.49, "eval_steps_per_second": 0.658, "step": 11000 }, { "epoch": 12.82, "learning_rate": 0.0001770874243971408, "loss": 0.5508, "step": 11220 }, { "epoch": 13.19, "learning_rate": 0.00017319927735448903, "loss": 0.5314, "step": 11550 }, { "epoch": 13.57, "learning_rate": 0.00016931113031183723, "loss": 0.5154, "step": 11880 }, { "epoch": 13.71, "eval_cer": 0.27192862918245164, "eval_loss": 0.9510018229484558, "eval_runtime": 332.097, "eval_samples_per_second": 10.56, "eval_steps_per_second": 0.662, "step": 12000 }, { "epoch": 13.95, "learning_rate": 0.00016542298326918545, "loss": 0.5096, "step": 12210 }, { "epoch": 14.32, "learning_rate": 0.00016153483622653367, "loss": 0.477, "step": 12540 }, { "epoch": 14.7, "learning_rate": 0.00015764668918388184, "loss": 0.4767, "step": 12870 }, { "epoch": 14.85, "eval_cer": 0.2623531656264479, "eval_loss": 0.9555571675300598, "eval_runtime": 337.0597, "eval_samples_per_second": 10.405, "eval_steps_per_second": 0.653, "step": 13000 }, { "epoch": 15.08, "learning_rate": 0.00015375854214123006, "loss": 0.4733, "step": 13200 }, { "epoch": 15.45, "learning_rate": 0.00014987039509857826, "loss": 0.437, "step": 13530 }, { "epoch": 15.83, "learning_rate": 0.00014598224805592645, "loss": 0.4536, "step": 13860 }, { "epoch": 15.99, "eval_cer": 0.26838552596912957, "eval_loss": 0.9849810600280762, "eval_runtime": 331.7906, "eval_samples_per_second": 10.57, "eval_steps_per_second": 0.663, "step": 14000 }, { "epoch": 16.21, "learning_rate": 0.00014209410101327467, "loss": 0.4291, "step": 14190 }, { "epoch": 16.58, "learning_rate": 0.0001382059539706229, "loss": 0.4195, "step": 14520 }, { "epoch": 16.96, "learning_rate": 0.0001343178069279711, "loss": 0.4195, "step": 14850 }, { "epoch": 17.13, "eval_cer": 0.2590008448938432, "eval_loss": 0.9893819093704224, "eval_runtime": 336.3435, "eval_samples_per_second": 10.427, "eval_steps_per_second": 0.654, "step": 15000 }, { "epoch": 17.34, "learning_rate": 0.0001304296598853193, "loss": 0.3817, "step": 15180 }, { "epoch": 17.72, "learning_rate": 0.0001265415128426675, "loss": 0.3905, "step": 15510 }, { "epoch": 18.09, "learning_rate": 0.0001226533658000157, "loss": 0.3937, "step": 15840 }, { "epoch": 18.28, "eval_cer": 0.26977551261435595, "eval_loss": 1.019676923751831, "eval_runtime": 335.5807, "eval_samples_per_second": 10.451, "eval_steps_per_second": 0.656, "step": 16000 }, { "epoch": 18.28, "step": 16000, "total_flos": 7.669732886426786e+19, "train_loss": 1.4304106447696685, "train_runtime": 19413.4182, "train_samples_per_second": 43.283, "train_steps_per_second": 1.352 } ], "logging_steps": 330, "max_steps": 26250, "num_train_epochs": 30, "save_steps": 1000, "total_flos": 7.669732886426786e+19, "trial_name": null, "trial_params": null }