|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 39.768019884009945, |
|
"eval_steps": 1000, |
|
"global_step": 12000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5.6399999999999995e-05, |
|
"loss": 37.3417, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0001164, |
|
"loss": 12.5064, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00017639999999999998, |
|
"loss": 4.823, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0002364, |
|
"loss": 4.7159, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0002964, |
|
"loss": 4.6828, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0002975563258232235, |
|
"loss": 4.5391, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.0002949566724436741, |
|
"loss": 4.3365, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.0002923570190641247, |
|
"loss": 3.2316, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.00028975736568457536, |
|
"loss": 2.1607, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.00028715771230502596, |
|
"loss": 1.778, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"eval_cer": 0.3050481701797491, |
|
"eval_loss": 1.2772819995880127, |
|
"eval_runtime": 143.2917, |
|
"eval_samples_per_second": 33.77, |
|
"eval_steps_per_second": 2.115, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 0.00028455805892547655, |
|
"loss": 1.6287, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 0.0002819584055459272, |
|
"loss": 1.5046, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.0002793587521663778, |
|
"loss": 1.4046, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.0002767590987868284, |
|
"loss": 1.3378, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 0.000274159445407279, |
|
"loss": 1.2868, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 0.0002715597920277296, |
|
"loss": 1.2376, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 0.00026896013864818023, |
|
"loss": 1.2126, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 0.0002663604852686308, |
|
"loss": 1.1781, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.0002637608318890814, |
|
"loss": 1.1426, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 0.00026116117850953207, |
|
"loss": 1.1037, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"eval_cer": 0.18878507694297167, |
|
"eval_loss": 0.7715919017791748, |
|
"eval_runtime": 143.3347, |
|
"eval_samples_per_second": 33.76, |
|
"eval_steps_per_second": 2.114, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.00025856152512998266, |
|
"loss": 1.0971, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 0.00025596187175043326, |
|
"loss": 1.0693, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 0.00025336221837088385, |
|
"loss": 1.0398, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 0.00025076256499133445, |
|
"loss": 1.0282, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 0.00024816291161178504, |
|
"loss": 0.9997, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 0.0002455632582322357, |
|
"loss": 0.995, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 0.00024296360485268629, |
|
"loss": 0.9721, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 0.00024036395147313688, |
|
"loss": 0.9639, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 0.0002377642980935875, |
|
"loss": 0.9543, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 0.0002351646447140381, |
|
"loss": 0.9529, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_cer": 0.16591232380706064, |
|
"eval_loss": 0.6726131439208984, |
|
"eval_runtime": 144.4701, |
|
"eval_samples_per_second": 33.495, |
|
"eval_steps_per_second": 2.097, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"learning_rate": 0.0002325649913344887, |
|
"loss": 0.9281, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 0.00022996533795493934, |
|
"loss": 0.9274, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"learning_rate": 0.00022736568457538994, |
|
"loss": 0.9149, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 11.27, |
|
"learning_rate": 0.00022476603119584053, |
|
"loss": 0.9036, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 0.00022216637781629115, |
|
"loss": 0.8838, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"learning_rate": 0.00021956672443674175, |
|
"loss": 0.8872, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 0.00021696707105719234, |
|
"loss": 0.8661, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 0.00021436741767764296, |
|
"loss": 0.8529, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"learning_rate": 0.00021176776429809356, |
|
"loss": 0.8566, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"learning_rate": 0.00020916811091854415, |
|
"loss": 0.8424, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"eval_cer": 0.1511541445751972, |
|
"eval_loss": 0.6138216257095337, |
|
"eval_runtime": 143.2958, |
|
"eval_samples_per_second": 33.769, |
|
"eval_steps_per_second": 2.115, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"learning_rate": 0.0002065684575389948, |
|
"loss": 0.8403, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 13.92, |
|
"learning_rate": 0.0002039688041594454, |
|
"loss": 0.836, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 14.25, |
|
"learning_rate": 0.000201369150779896, |
|
"loss": 0.8119, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"learning_rate": 0.0001987694974003466, |
|
"loss": 0.8298, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 14.91, |
|
"learning_rate": 0.0001961698440207972, |
|
"loss": 0.8019, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 0.0001935701906412478, |
|
"loss": 0.8, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"learning_rate": 0.00019097053726169842, |
|
"loss": 0.7909, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 0.00018837088388214902, |
|
"loss": 0.7941, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 16.24, |
|
"learning_rate": 0.0001857712305025996, |
|
"loss": 0.7888, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"learning_rate": 0.00018317157712305026, |
|
"loss": 0.767, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"eval_cer": 0.14326587352903142, |
|
"eval_loss": 0.5884789228439331, |
|
"eval_runtime": 143.5148, |
|
"eval_samples_per_second": 33.718, |
|
"eval_steps_per_second": 2.111, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"learning_rate": 0.00018057192374350086, |
|
"loss": 0.7655, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 17.23, |
|
"learning_rate": 0.00017797227036395145, |
|
"loss": 0.7571, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 17.56, |
|
"learning_rate": 0.00017537261698440207, |
|
"loss": 0.754, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 17.9, |
|
"learning_rate": 0.00017277296360485267, |
|
"loss": 0.764, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 18.23, |
|
"learning_rate": 0.0001701733102253033, |
|
"loss": 0.7461, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.56, |
|
"learning_rate": 0.00016757365684575388, |
|
"loss": 0.7363, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 18.89, |
|
"learning_rate": 0.00016497400346620448, |
|
"loss": 0.7315, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 19.22, |
|
"learning_rate": 0.0001623743500866551, |
|
"loss": 0.7193, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 19.55, |
|
"learning_rate": 0.00015977469670710572, |
|
"loss": 0.7253, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 19.88, |
|
"learning_rate": 0.00015717504332755632, |
|
"loss": 0.7201, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 19.88, |
|
"eval_cer": 0.1377699469804733, |
|
"eval_loss": 0.5682498812675476, |
|
"eval_runtime": 145.5022, |
|
"eval_samples_per_second": 33.257, |
|
"eval_steps_per_second": 2.082, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.22, |
|
"learning_rate": 0.00015457538994800694, |
|
"loss": 0.7203, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 20.55, |
|
"learning_rate": 0.00015197573656845753, |
|
"loss": 0.7045, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 20.88, |
|
"learning_rate": 0.00014937608318890813, |
|
"loss": 0.7072, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 21.21, |
|
"learning_rate": 0.00014677642980935872, |
|
"loss": 0.6979, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 21.54, |
|
"learning_rate": 0.00014417677642980934, |
|
"loss": 0.6971, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 21.87, |
|
"learning_rate": 0.00014157712305025997, |
|
"loss": 0.702, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 22.2, |
|
"learning_rate": 0.00013897746967071056, |
|
"loss": 0.6767, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 22.54, |
|
"learning_rate": 0.00013637781629116116, |
|
"loss": 0.682, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 22.87, |
|
"learning_rate": 0.00013377816291161178, |
|
"loss": 0.6729, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 23.2, |
|
"learning_rate": 0.00013120450606585787, |
|
"loss": 0.664, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 23.2, |
|
"eval_cer": 0.13325197206776154, |
|
"eval_loss": 0.5583386421203613, |
|
"eval_runtime": 143.6376, |
|
"eval_samples_per_second": 33.689, |
|
"eval_steps_per_second": 2.109, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 23.53, |
|
"learning_rate": 0.0001286048526863085, |
|
"loss": 0.668, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 23.86, |
|
"learning_rate": 0.00012600519930675908, |
|
"loss": 0.67, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 24.19, |
|
"learning_rate": 0.0001234055459272097, |
|
"loss": 0.6608, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 24.52, |
|
"learning_rate": 0.0001208058925476603, |
|
"loss": 0.6543, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 24.86, |
|
"learning_rate": 0.0001182062391681109, |
|
"loss": 0.6648, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 25.19, |
|
"learning_rate": 0.00011560658578856152, |
|
"loss": 0.6473, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 25.52, |
|
"learning_rate": 0.00011300693240901212, |
|
"loss": 0.6546, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 25.85, |
|
"learning_rate": 0.00011040727902946273, |
|
"loss": 0.6397, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 26.18, |
|
"learning_rate": 0.00010780762564991333, |
|
"loss": 0.6395, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 26.51, |
|
"learning_rate": 0.00010520797227036395, |
|
"loss": 0.6296, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 26.51, |
|
"eval_cer": 0.1298170179749127, |
|
"eval_loss": 0.5415648818016052, |
|
"eval_runtime": 142.9992, |
|
"eval_samples_per_second": 33.839, |
|
"eval_steps_per_second": 2.119, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 26.84, |
|
"learning_rate": 0.00010260831889081456, |
|
"loss": 0.6284, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 27.17, |
|
"learning_rate": 0.00010000866551126515, |
|
"loss": 0.6328, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 27.51, |
|
"learning_rate": 9.740901213171576e-05, |
|
"loss": 0.6233, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 27.84, |
|
"learning_rate": 9.480935875216638e-05, |
|
"loss": 0.6226, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 28.17, |
|
"learning_rate": 9.220970537261698e-05, |
|
"loss": 0.6214, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 28.5, |
|
"learning_rate": 8.961005199306758e-05, |
|
"loss": 0.6195, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 28.83, |
|
"learning_rate": 8.701039861351819e-05, |
|
"loss": 0.6138, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 29.16, |
|
"learning_rate": 8.441074523396879e-05, |
|
"loss": 0.6067, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 29.49, |
|
"learning_rate": 8.181109185441941e-05, |
|
"loss": 0.6076, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 29.83, |
|
"learning_rate": 7.921143847487002e-05, |
|
"loss": 0.6021, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 29.83, |
|
"eval_cer": 0.12716604163972586, |
|
"eval_loss": 0.5376999974250793, |
|
"eval_runtime": 142.2332, |
|
"eval_samples_per_second": 34.022, |
|
"eval_steps_per_second": 2.13, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 30.16, |
|
"learning_rate": 7.661178509532061e-05, |
|
"loss": 0.5996, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 30.49, |
|
"learning_rate": 7.401213171577122e-05, |
|
"loss": 0.5989, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 30.82, |
|
"learning_rate": 7.141247833622183e-05, |
|
"loss": 0.5924, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 31.15, |
|
"learning_rate": 6.881282495667244e-05, |
|
"loss": 0.5858, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 31.48, |
|
"learning_rate": 6.621317157712304e-05, |
|
"loss": 0.5846, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 31.81, |
|
"learning_rate": 6.363951473136915e-05, |
|
"loss": 0.5906, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 32.15, |
|
"learning_rate": 6.103986135181975e-05, |
|
"loss": 0.5823, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 32.48, |
|
"learning_rate": 5.8466204506065855e-05, |
|
"loss": 0.5853, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 32.81, |
|
"learning_rate": 5.586655112651646e-05, |
|
"loss": 0.5759, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 33.14, |
|
"learning_rate": 5.3266897746967065e-05, |
|
"loss": 0.568, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 33.14, |
|
"eval_cer": 0.1245797232639338, |
|
"eval_loss": 0.5240569710731506, |
|
"eval_runtime": 143.5422, |
|
"eval_samples_per_second": 33.711, |
|
"eval_steps_per_second": 2.111, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 33.47, |
|
"learning_rate": 5.066724436741767e-05, |
|
"loss": 0.574, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 33.8, |
|
"learning_rate": 4.806759098786828e-05, |
|
"loss": 0.5682, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 34.13, |
|
"learning_rate": 4.546793760831888e-05, |
|
"loss": 0.5636, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 34.47, |
|
"learning_rate": 4.28682842287695e-05, |
|
"loss": 0.5651, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 34.8, |
|
"learning_rate": 4.02686308492201e-05, |
|
"loss": 0.5693, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 35.13, |
|
"learning_rate": 3.766897746967071e-05, |
|
"loss": 0.5562, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 35.46, |
|
"learning_rate": 3.5069324090121316e-05, |
|
"loss": 0.5564, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 35.79, |
|
"learning_rate": 3.2469670710571924e-05, |
|
"loss": 0.5613, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 36.12, |
|
"learning_rate": 2.9870017331022526e-05, |
|
"loss": 0.5519, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 36.45, |
|
"learning_rate": 2.7270363951473134e-05, |
|
"loss": 0.5519, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 36.45, |
|
"eval_cer": 0.12275313591103065, |
|
"eval_loss": 0.5184260606765747, |
|
"eval_runtime": 142.3486, |
|
"eval_samples_per_second": 33.994, |
|
"eval_steps_per_second": 2.129, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 36.79, |
|
"learning_rate": 2.4670710571923742e-05, |
|
"loss": 0.5605, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 37.12, |
|
"learning_rate": 2.207105719237435e-05, |
|
"loss": 0.5512, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 37.45, |
|
"learning_rate": 1.9471403812824955e-05, |
|
"loss": 0.5539, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 37.78, |
|
"learning_rate": 1.687175043327556e-05, |
|
"loss": 0.5467, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 38.11, |
|
"learning_rate": 1.4272097053726168e-05, |
|
"loss": 0.5464, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 38.44, |
|
"learning_rate": 1.1672443674176775e-05, |
|
"loss": 0.5457, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 38.77, |
|
"learning_rate": 9.072790294627383e-06, |
|
"loss": 0.5445, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 39.11, |
|
"learning_rate": 6.473136915077989e-06, |
|
"loss": 0.5395, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 39.44, |
|
"learning_rate": 3.8734835355285955e-06, |
|
"loss": 0.542, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 39.77, |
|
"learning_rate": 1.2738301559792027e-06, |
|
"loss": 0.5395, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 39.77, |
|
"eval_cer": 0.12272080693133325, |
|
"eval_loss": 0.5156292915344238, |
|
"eval_runtime": 142.2326, |
|
"eval_samples_per_second": 34.022, |
|
"eval_steps_per_second": 2.13, |
|
"step": 12000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 12040, |
|
"num_train_epochs": 40, |
|
"save_steps": 1000, |
|
"total_flos": 1.9494106689613857e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|