|
{ |
|
"best_metric": 0.25546682656055525, |
|
"best_model_checkpoint": "./jako-xlsr/checkpoint-11000", |
|
"epoch": 18.275271273557966, |
|
"eval_steps": 1000, |
|
"global_step": 16000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00012563451776649744, |
|
"loss": 24.8869, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0002512690355329949, |
|
"loss": 4.8854, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00029761998271934643, |
|
"loss": 3.5667, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_cer": 0.5188284138707948, |
|
"eval_loss": 2.2322683334350586, |
|
"eval_runtime": 333.3102, |
|
"eval_samples_per_second": 10.522, |
|
"eval_steps_per_second": 0.66, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00029373183567669466, |
|
"loss": 2.1771, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0002898436886340429, |
|
"loss": 1.8324, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.00028595554159139105, |
|
"loss": 1.5569, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_cer": 0.3527113824461948, |
|
"eval_loss": 1.3106200695037842, |
|
"eval_runtime": 333.5876, |
|
"eval_samples_per_second": 10.513, |
|
"eval_steps_per_second": 0.659, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.00028206739454873927, |
|
"loss": 1.4453, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.0002781792475060875, |
|
"loss": 1.38, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.0002742911004634357, |
|
"loss": 1.2238, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"eval_cer": 0.3098580033250661, |
|
"eval_loss": 1.1108620166778564, |
|
"eval_runtime": 331.1347, |
|
"eval_samples_per_second": 10.591, |
|
"eval_steps_per_second": 0.664, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.0002704029534207839, |
|
"loss": 1.203, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.0002665148063781321, |
|
"loss": 1.151, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 0.0002626266593354803, |
|
"loss": 1.0593, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"eval_cer": 0.28911722220708075, |
|
"eval_loss": 1.0389584302902222, |
|
"eval_runtime": 331.5451, |
|
"eval_samples_per_second": 10.578, |
|
"eval_steps_per_second": 0.664, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.0002587385122928285, |
|
"loss": 1.0674, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.0002548503652501767, |
|
"loss": 0.974, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 0.00025096221820752494, |
|
"loss": 0.9658, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"eval_cer": 0.29180634669719185, |
|
"eval_loss": 0.973089337348938, |
|
"eval_runtime": 331.0672, |
|
"eval_samples_per_second": 10.593, |
|
"eval_steps_per_second": 0.665, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 0.0002470740711648731, |
|
"loss": 0.9596, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.00024318592412222133, |
|
"loss": 0.8626, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.00023929777707956955, |
|
"loss": 0.8796, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"eval_cer": 0.26956656037357024, |
|
"eval_loss": 0.9479135870933533, |
|
"eval_runtime": 330.5292, |
|
"eval_samples_per_second": 10.61, |
|
"eval_steps_per_second": 0.666, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 0.00023540963003691772, |
|
"loss": 0.8311, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 0.00023152148299426594, |
|
"loss": 0.7889, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 0.00022763333595161414, |
|
"loss": 0.8022, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 0.2710383109391041, |
|
"eval_loss": 0.9330962300300598, |
|
"eval_runtime": 330.2975, |
|
"eval_samples_per_second": 10.618, |
|
"eval_steps_per_second": 0.666, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 0.00022374518890896236, |
|
"loss": 0.7418, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 0.00021985704186631055, |
|
"loss": 0.739, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 0.00021596889482365878, |
|
"loss": 0.7392, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"eval_cer": 0.2745814141524261, |
|
"eval_loss": 0.925165593624115, |
|
"eval_runtime": 338.4979, |
|
"eval_samples_per_second": 10.36, |
|
"eval_steps_per_second": 0.65, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 0.000212080747781007, |
|
"loss": 0.6765, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 0.00020819260073835517, |
|
"loss": 0.6778, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"learning_rate": 0.0002043044536957034, |
|
"loss": 0.6694, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"eval_cer": 0.2590008448938432, |
|
"eval_loss": 0.9317852854728699, |
|
"eval_runtime": 336.5027, |
|
"eval_samples_per_second": 10.422, |
|
"eval_steps_per_second": 0.654, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 0.00020041630665305158, |
|
"loss": 0.621, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"learning_rate": 0.0001965281596103998, |
|
"loss": 0.6494, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"learning_rate": 0.000192640012567748, |
|
"loss": 0.5977, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 11.42, |
|
"eval_cer": 0.2674225286855087, |
|
"eval_loss": 0.9348525404930115, |
|
"eval_runtime": 338.5078, |
|
"eval_samples_per_second": 10.36, |
|
"eval_steps_per_second": 0.65, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 11.68, |
|
"learning_rate": 0.00018875186552509622, |
|
"loss": 0.5786, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 0.0001848637184824444, |
|
"loss": 0.582, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"learning_rate": 0.00018097557143979261, |
|
"loss": 0.5484, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 12.56, |
|
"eval_cer": 0.25546682656055525, |
|
"eval_loss": 0.9408797025680542, |
|
"eval_runtime": 334.3327, |
|
"eval_samples_per_second": 10.49, |
|
"eval_steps_per_second": 0.658, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.0001770874243971408, |
|
"loss": 0.5508, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 0.00017319927735448903, |
|
"loss": 0.5314, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 0.00016931113031183723, |
|
"loss": 0.5154, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"eval_cer": 0.27192862918245164, |
|
"eval_loss": 0.9510018229484558, |
|
"eval_runtime": 332.097, |
|
"eval_samples_per_second": 10.56, |
|
"eval_steps_per_second": 0.662, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"learning_rate": 0.00016542298326918545, |
|
"loss": 0.5096, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 14.32, |
|
"learning_rate": 0.00016153483622653367, |
|
"loss": 0.477, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 14.7, |
|
"learning_rate": 0.00015764668918388184, |
|
"loss": 0.4767, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"eval_cer": 0.2623531656264479, |
|
"eval_loss": 0.9555571675300598, |
|
"eval_runtime": 337.0597, |
|
"eval_samples_per_second": 10.405, |
|
"eval_steps_per_second": 0.653, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 15.08, |
|
"learning_rate": 0.00015375854214123006, |
|
"loss": 0.4733, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 15.45, |
|
"learning_rate": 0.00014987039509857826, |
|
"loss": 0.437, |
|
"step": 13530 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"learning_rate": 0.00014598224805592645, |
|
"loss": 0.4536, |
|
"step": 13860 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"eval_cer": 0.26838552596912957, |
|
"eval_loss": 0.9849810600280762, |
|
"eval_runtime": 331.7906, |
|
"eval_samples_per_second": 10.57, |
|
"eval_steps_per_second": 0.663, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"learning_rate": 0.00014209410101327467, |
|
"loss": 0.4291, |
|
"step": 14190 |
|
}, |
|
{ |
|
"epoch": 16.58, |
|
"learning_rate": 0.0001382059539706229, |
|
"loss": 0.4195, |
|
"step": 14520 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 0.0001343178069279711, |
|
"loss": 0.4195, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 17.13, |
|
"eval_cer": 0.2590008448938432, |
|
"eval_loss": 0.9893819093704224, |
|
"eval_runtime": 336.3435, |
|
"eval_samples_per_second": 10.427, |
|
"eval_steps_per_second": 0.654, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 17.34, |
|
"learning_rate": 0.0001304296598853193, |
|
"loss": 0.3817, |
|
"step": 15180 |
|
}, |
|
{ |
|
"epoch": 17.72, |
|
"learning_rate": 0.0001265415128426675, |
|
"loss": 0.3905, |
|
"step": 15510 |
|
}, |
|
{ |
|
"epoch": 18.09, |
|
"learning_rate": 0.0001226533658000157, |
|
"loss": 0.3937, |
|
"step": 15840 |
|
}, |
|
{ |
|
"epoch": 18.28, |
|
"eval_cer": 0.26977551261435595, |
|
"eval_loss": 1.019676923751831, |
|
"eval_runtime": 335.5807, |
|
"eval_samples_per_second": 10.451, |
|
"eval_steps_per_second": 0.656, |
|
"step": 16000 |
|
} |
|
], |
|
"logging_steps": 330, |
|
"max_steps": 26250, |
|
"num_train_epochs": 30, |
|
"save_steps": 1000, |
|
"total_flos": 7.669732886426786e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|