|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.3166023166023164, |
|
"eval_steps": 200, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07722007722007722, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 199.2963, |
|
"eval_samples_per_second": 35.234, |
|
"eval_steps_per_second": 4.406, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15444015444015444, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 187.0234, |
|
"eval_samples_per_second": 37.546, |
|
"eval_steps_per_second": 4.695, |
|
"eval_wer": 0.9937672860426162, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.19305019305019305, |
|
"grad_norm": 4.210392951965332, |
|
"learning_rate": 0.00029699999999999996, |
|
"loss": 3.9317, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.23166023166023167, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.1705, |
|
"eval_samples_per_second": 38.974, |
|
"eval_steps_per_second": 4.873, |
|
"eval_wer": 0.7576253359822368, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3088803088803089, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.0087, |
|
"eval_samples_per_second": 39.227, |
|
"eval_steps_per_second": 4.905, |
|
"eval_wer": 0.6941685170036228, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3861003861003861, |
|
"grad_norm": 3.149083137512207, |
|
"learning_rate": 0.0002730545454545454, |
|
"loss": 0.9699, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3861003861003861, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.6063, |
|
"eval_samples_per_second": 39.097, |
|
"eval_steps_per_second": 4.888, |
|
"eval_wer": 0.5762923142846013, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46332046332046334, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.175, |
|
"eval_samples_per_second": 38.973, |
|
"eval_steps_per_second": 4.873, |
|
"eval_wer": 0.5518548816433589, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5405405405405406, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.7382, |
|
"eval_samples_per_second": 38.852, |
|
"eval_steps_per_second": 4.858, |
|
"eval_wer": 0.5173931673873242, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5791505791505791, |
|
"grad_norm": 4.50093412399292, |
|
"learning_rate": 0.0002458363636363636, |
|
"loss": 0.8031, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6177606177606177, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.2438, |
|
"eval_samples_per_second": 38.958, |
|
"eval_steps_per_second": 4.871, |
|
"eval_wer": 0.5338059808084349, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.694980694980695, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.7932, |
|
"eval_samples_per_second": 38.84, |
|
"eval_steps_per_second": 4.856, |
|
"eval_wer": 0.47772454001272513, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.7722007722007722, |
|
"grad_norm": 3.7705626487731934, |
|
"learning_rate": 0.0002186181818181818, |
|
"loss": 0.7169, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7722007722007722, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.9877, |
|
"eval_samples_per_second": 39.014, |
|
"eval_steps_per_second": 4.878, |
|
"eval_wer": 0.45044343162842637, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8494208494208494, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 182.9186, |
|
"eval_samples_per_second": 38.389, |
|
"eval_steps_per_second": 4.8, |
|
"eval_wer": 0.4499500084401335, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.9266409266409267, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.2372, |
|
"eval_samples_per_second": 38.96, |
|
"eval_steps_per_second": 4.871, |
|
"eval_wer": 0.4431719320114786, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.9652509652509652, |
|
"grad_norm": 3.5582635402679443, |
|
"learning_rate": 0.00019134545454545454, |
|
"loss": 0.6687, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0038610038610039, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.3998, |
|
"eval_samples_per_second": 38.925, |
|
"eval_steps_per_second": 4.867, |
|
"eval_wer": 0.4175658655032267, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.0810810810810811, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.1365, |
|
"eval_samples_per_second": 38.982, |
|
"eval_steps_per_second": 4.874, |
|
"eval_wer": 0.40537311882409466, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.1583011583011582, |
|
"grad_norm": 0.5179678201675415, |
|
"learning_rate": 0.0001641272727272727, |
|
"loss": 0.5609, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.1583011583011582, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.8398, |
|
"eval_samples_per_second": 38.83, |
|
"eval_steps_per_second": 4.855, |
|
"eval_wer": 0.4009193253087141, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.2355212355212355, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.6098, |
|
"eval_samples_per_second": 38.665, |
|
"eval_steps_per_second": 4.835, |
|
"eval_wer": 0.4022567618454027, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.3127413127413128, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 182.1259, |
|
"eval_samples_per_second": 38.556, |
|
"eval_steps_per_second": 4.821, |
|
"eval_wer": 0.39188189007050755, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.3513513513513513, |
|
"grad_norm": 0.5364826321601868, |
|
"learning_rate": 0.00013696363636363636, |
|
"loss": 0.5324, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.3899613899613898, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.3225, |
|
"eval_samples_per_second": 38.727, |
|
"eval_steps_per_second": 4.842, |
|
"eval_wer": 0.3794813862594627, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.4671814671814671, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.5845, |
|
"eval_samples_per_second": 38.671, |
|
"eval_steps_per_second": 4.835, |
|
"eval_wer": 0.37523534987599494, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.5444015444015444, |
|
"grad_norm": 1.197091817855835, |
|
"learning_rate": 0.00010974545454545454, |
|
"loss": 0.5196, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.5444015444015444, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.3235, |
|
"eval_samples_per_second": 38.726, |
|
"eval_steps_per_second": 4.842, |
|
"eval_wer": 0.36617194499629935, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.6216216216216215, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.9653, |
|
"eval_samples_per_second": 38.59, |
|
"eval_steps_per_second": 4.825, |
|
"eval_wer": 0.37034007245529976, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.698841698841699, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.5241, |
|
"eval_samples_per_second": 38.684, |
|
"eval_steps_per_second": 4.837, |
|
"eval_wer": 0.3613935309623051, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.7374517374517375, |
|
"grad_norm": 0.7301501631736755, |
|
"learning_rate": 8.247272727272728e-05, |
|
"loss": 0.4967, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.776061776061776, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.6896, |
|
"eval_samples_per_second": 38.648, |
|
"eval_steps_per_second": 4.832, |
|
"eval_wer": 0.3530313064028151, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.8532818532818531, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 184.0459, |
|
"eval_samples_per_second": 38.154, |
|
"eval_steps_per_second": 4.771, |
|
"eval_wer": 0.34805812005765263, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.9305019305019306, |
|
"grad_norm": 1.2918003797531128, |
|
"learning_rate": 5.519999999999999e-05, |
|
"loss": 0.4735, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.9305019305019306, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 182.1737, |
|
"eval_samples_per_second": 38.546, |
|
"eval_steps_per_second": 4.82, |
|
"eval_wer": 0.35057717528209525, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.0077220077220077, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 182.5384, |
|
"eval_samples_per_second": 38.469, |
|
"eval_steps_per_second": 4.81, |
|
"eval_wer": 0.3432277667406801, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.0849420849420848, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 182.9254, |
|
"eval_samples_per_second": 38.387, |
|
"eval_steps_per_second": 4.8, |
|
"eval_wer": 0.33689117421733994, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.1235521235521237, |
|
"grad_norm": 0.6086732745170593, |
|
"learning_rate": 2.7927272727272724e-05, |
|
"loss": 0.4244, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.1621621621621623, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 182.4452, |
|
"eval_samples_per_second": 38.488, |
|
"eval_steps_per_second": 4.812, |
|
"eval_wer": 0.32959370495890306, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.2393822393822393, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 182.1862, |
|
"eval_samples_per_second": 38.543, |
|
"eval_steps_per_second": 4.819, |
|
"eval_wer": 0.32954176567592486, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.3166023166023164, |
|
"grad_norm": 1.0216798782348633, |
|
"learning_rate": 6.545454545454546e-07, |
|
"loss": 0.3674, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.3166023166023164, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 182.1846, |
|
"eval_samples_per_second": 38.543, |
|
"eval_steps_per_second": 4.819, |
|
"eval_wer": 0.3289055094594419, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.3166023166023164, |
|
"step": 6000, |
|
"total_flos": 1.261526897313927e+19, |
|
"train_loss": 0.8721037826538086, |
|
"train_runtime": 9008.7975, |
|
"train_samples_per_second": 10.656, |
|
"train_steps_per_second": 0.666 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.261526897313927e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|