|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.9998887776665555, |
|
"global_step": 22475, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.982e-05, |
|
"loss": 10.9521, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.9866994413407824e-05, |
|
"loss": 2.9894, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_cer": 1.0, |
|
"eval_loss": 2.925715684890747, |
|
"eval_runtime": 1316.2549, |
|
"eval_samples_per_second": 11.314, |
|
"eval_steps_per_second": 1.415, |
|
"eval_wer": 1.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.973291620111732e-05, |
|
"loss": 2.6003, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.9598837988826818e-05, |
|
"loss": 0.7104, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_cer": 0.045692850294096206, |
|
"eval_loss": 0.2129213511943817, |
|
"eval_runtime": 1315.3965, |
|
"eval_samples_per_second": 11.321, |
|
"eval_steps_per_second": 1.416, |
|
"eval_wer": 0.25382939389937936, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.9464759776536317e-05, |
|
"loss": 0.3546, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.933068156424581e-05, |
|
"loss": 0.2853, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_cer": 0.027426083445944053, |
|
"eval_loss": 0.11085120588541031, |
|
"eval_runtime": 1321.34, |
|
"eval_samples_per_second": 11.27, |
|
"eval_steps_per_second": 1.409, |
|
"eval_wer": 0.15825135349267133, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.9196603351955308e-05, |
|
"loss": 0.2563, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.9062525139664803e-05, |
|
"loss": 0.2327, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_cer": 0.02314500709307346, |
|
"eval_loss": 0.09091737866401672, |
|
"eval_runtime": 1319.6784, |
|
"eval_samples_per_second": 11.285, |
|
"eval_steps_per_second": 1.411, |
|
"eval_wer": 0.1320315594876535, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.8928446927374302e-05, |
|
"loss": 0.2072, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.8794368715083797e-05, |
|
"loss": 0.1917, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_cer": 0.02055449330783939, |
|
"eval_loss": 0.07748183608055115, |
|
"eval_runtime": 1318.284, |
|
"eval_samples_per_second": 11.297, |
|
"eval_steps_per_second": 1.412, |
|
"eval_wer": 0.11881024692988247, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.8660290502793296e-05, |
|
"loss": 0.1853, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.8526212290502795e-05, |
|
"loss": 0.1803, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_cer": 0.018439187408532995, |
|
"eval_loss": 0.0698164626955986, |
|
"eval_runtime": 1319.3199, |
|
"eval_samples_per_second": 11.288, |
|
"eval_steps_per_second": 1.411, |
|
"eval_wer": 0.10547339231480259, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.839213407821229e-05, |
|
"loss": 0.1724, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.825805586592179e-05, |
|
"loss": 0.1661, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_cer": 0.01694487588523239, |
|
"eval_loss": 0.06446516513824463, |
|
"eval_runtime": 1319.092, |
|
"eval_samples_per_second": 11.29, |
|
"eval_steps_per_second": 1.412, |
|
"eval_wer": 0.09606496764822395, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.8123977653631284e-05, |
|
"loss": 0.161, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.7989899441340783e-05, |
|
"loss": 0.1635, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_cer": 0.017010760164402304, |
|
"eval_loss": 0.0639302060008049, |
|
"eval_runtime": 1319.0191, |
|
"eval_samples_per_second": 11.29, |
|
"eval_steps_per_second": 1.412, |
|
"eval_wer": 0.09642809982833751, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.785582122905028e-05, |
|
"loss": 0.1547, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.7721743016759777e-05, |
|
"loss": 0.1555, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 0.015554297056795053, |
|
"eval_loss": 0.0592055507004261, |
|
"eval_runtime": 1320.0874, |
|
"eval_samples_per_second": 11.281, |
|
"eval_steps_per_second": 1.411, |
|
"eval_wer": 0.08808431268981909, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.7587664804469276e-05, |
|
"loss": 0.1405, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.745358659217877e-05, |
|
"loss": 0.1386, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_cer": 0.014650140459676017, |
|
"eval_loss": 0.055936481803655624, |
|
"eval_runtime": 1320.1961, |
|
"eval_samples_per_second": 11.28, |
|
"eval_steps_per_second": 1.41, |
|
"eval_wer": 0.08208437871385184, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.731950837988827e-05, |
|
"loss": 0.1353, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.7185430167597765e-05, |
|
"loss": 0.1338, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_cer": 0.014644533286980706, |
|
"eval_loss": 0.05477109178900719, |
|
"eval_runtime": 1321.7173, |
|
"eval_samples_per_second": 11.267, |
|
"eval_steps_per_second": 1.409, |
|
"eval_wer": 0.08314901624191205, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.7051620111731843e-05, |
|
"loss": 0.1345, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.691754189944134e-05, |
|
"loss": 0.1307, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_cer": 0.013712340826385111, |
|
"eval_loss": 0.0528746023774147, |
|
"eval_runtime": 1321.5984, |
|
"eval_samples_per_second": 11.268, |
|
"eval_steps_per_second": 1.409, |
|
"eval_wer": 0.07593589066420177, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.3624573378839592e-05, |
|
"loss": 0.1264, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.29419795221843e-05, |
|
"loss": 0.1297, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_cer": 0.013381517637361713, |
|
"eval_loss": 0.05041037127375603, |
|
"eval_runtime": 1303.0852, |
|
"eval_samples_per_second": 11.428, |
|
"eval_steps_per_second": 1.429, |
|
"eval_wer": 0.07453287996830846, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.225938566552901e-05, |
|
"loss": 0.122, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.1578156996587031e-05, |
|
"loss": 0.1201, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"eval_cer": 0.013103962588943778, |
|
"eval_loss": 0.04994073510169983, |
|
"eval_runtime": 1305.505, |
|
"eval_samples_per_second": 11.407, |
|
"eval_steps_per_second": 1.426, |
|
"eval_wer": 0.07336095338703288, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.089556313993174e-05, |
|
"loss": 0.1185, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1.0214334470989761e-05, |
|
"loss": 0.1152, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"eval_cer": 0.012759121468182099, |
|
"eval_loss": 0.0484052337706089, |
|
"eval_runtime": 1304.2248, |
|
"eval_samples_per_second": 11.418, |
|
"eval_steps_per_second": 1.428, |
|
"eval_wer": 0.07121517232272548, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 9.531740614334472e-06, |
|
"loss": 0.1165, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 8.849146757679181e-06, |
|
"loss": 0.1144, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_cer": 0.012485771799285646, |
|
"eval_loss": 0.04772350192070007, |
|
"eval_runtime": 1302.5642, |
|
"eval_samples_per_second": 11.433, |
|
"eval_steps_per_second": 1.429, |
|
"eval_wer": 0.06949029446718606, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 8.166552901023892e-06, |
|
"loss": 0.1129, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 7.483959044368601e-06, |
|
"loss": 0.1179, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_cer": 0.012212422130389193, |
|
"eval_loss": 0.04678424075245857, |
|
"eval_runtime": 1303.8755, |
|
"eval_samples_per_second": 11.421, |
|
"eval_steps_per_second": 1.428, |
|
"eval_wer": 0.0678809586689555, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 6.8040955631399314e-06, |
|
"loss": 0.1118, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 6.1215017064846416e-06, |
|
"loss": 0.1112, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 0.012117100194568893, |
|
"eval_loss": 0.04677628353238106, |
|
"eval_runtime": 1306.4884, |
|
"eval_samples_per_second": 11.398, |
|
"eval_steps_per_second": 1.425, |
|
"eval_wer": 0.0676416215502443, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 5.438907849829352e-06, |
|
"loss": 0.1117, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 4.756313993174062e-06, |
|
"loss": 0.1141, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"eval_cer": 0.012068037433484913, |
|
"eval_loss": 0.04616771265864372, |
|
"eval_runtime": 1302.5466, |
|
"eval_samples_per_second": 11.433, |
|
"eval_steps_per_second": 1.43, |
|
"eval_wer": 0.06683282714908227, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 4.073720136518771e-06, |
|
"loss": 0.1108, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 3.3911262798634816e-06, |
|
"loss": 0.1085, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"eval_cer": 0.011937670668318913, |
|
"eval_loss": 0.04581034556031227, |
|
"eval_runtime": 1302.9709, |
|
"eval_samples_per_second": 11.429, |
|
"eval_steps_per_second": 1.429, |
|
"eval_wer": 0.06640367093622078, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 2.708532423208191e-06, |
|
"loss": 0.1132, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 2.025938566552901e-06, |
|
"loss": 0.105, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"eval_cer": 0.01189281328675642, |
|
"eval_loss": 0.04555269330739975, |
|
"eval_runtime": 1307.8114, |
|
"eval_samples_per_second": 11.387, |
|
"eval_steps_per_second": 1.424, |
|
"eval_wer": 0.06604053875610723, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 1.3433447098976108e-06, |
|
"loss": 0.1123, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 6.621160409556314e-07, |
|
"loss": 0.1072, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"eval_cer": 0.011854964871063065, |
|
"eval_loss": 0.04538210481405258, |
|
"eval_runtime": 1302.5621, |
|
"eval_samples_per_second": 11.433, |
|
"eval_steps_per_second": 1.429, |
|
"eval_wer": 0.06575168361283507, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 22475, |
|
"total_flos": 1.360184451953694e+20, |
|
"train_loss": 0.0023267049651522526, |
|
"train_runtime": 2078.5155, |
|
"train_samples_per_second": 346.038, |
|
"train_steps_per_second": 10.813 |
|
} |
|
], |
|
"max_steps": 22475, |
|
"num_train_epochs": 5, |
|
"total_flos": 1.360184451953694e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|