|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.008233910267182, |
|
"eval_steps": 500, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08401949252226516, |
|
"grad_norm": 19.443201065063477, |
|
"learning_rate": 0.0002455, |
|
"loss": 4.6277, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08401949252226516, |
|
"eval_loss": 1.7780689001083374, |
|
"eval_runtime": 184.9372, |
|
"eval_samples_per_second": 37.97, |
|
"eval_steps_per_second": 2.374, |
|
"eval_wer": 0.9507301452317996, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16803898504453033, |
|
"grad_norm": 10.621150970458984, |
|
"learning_rate": 0.0002782777777777778, |
|
"loss": 1.2514, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16803898504453033, |
|
"eval_loss": 1.046749472618103, |
|
"eval_runtime": 183.9043, |
|
"eval_samples_per_second": 38.183, |
|
"eval_steps_per_second": 2.387, |
|
"eval_wer": 0.7116623925377695, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2520584775667955, |
|
"grad_norm": 6.651599884033203, |
|
"learning_rate": 0.00025049999999999996, |
|
"loss": 0.986, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2520584775667955, |
|
"eval_loss": 0.8940034508705139, |
|
"eval_runtime": 185.3076, |
|
"eval_samples_per_second": 37.894, |
|
"eval_steps_per_second": 2.369, |
|
"eval_wer": 0.6577817935396431, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.33607797008906065, |
|
"grad_norm": 16.908123016357422, |
|
"learning_rate": 0.00022277777777777774, |
|
"loss": 0.8702, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33607797008906065, |
|
"eval_loss": 0.8157631158828735, |
|
"eval_runtime": 185.5186, |
|
"eval_samples_per_second": 37.851, |
|
"eval_steps_per_second": 2.366, |
|
"eval_wer": 0.5836644122297665, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4200974626113258, |
|
"grad_norm": 10.515896797180176, |
|
"learning_rate": 0.00019505555555555555, |
|
"loss": 0.8063, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.4200974626113258, |
|
"eval_loss": 0.7131509184837341, |
|
"eval_runtime": 185.8391, |
|
"eval_samples_per_second": 37.785, |
|
"eval_steps_per_second": 2.362, |
|
"eval_wer": 0.5281095948657304, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.504116955133591, |
|
"grad_norm": 3.7379024028778076, |
|
"learning_rate": 0.00016727777777777778, |
|
"loss": 0.7518, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.504116955133591, |
|
"eval_loss": 0.7195897102355957, |
|
"eval_runtime": 183.5821, |
|
"eval_samples_per_second": 38.25, |
|
"eval_steps_per_second": 2.391, |
|
"eval_wer": 0.5187950942744389, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5881364476558562, |
|
"grad_norm": 5.922393321990967, |
|
"learning_rate": 0.0001395, |
|
"loss": 0.7045, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.5881364476558562, |
|
"eval_loss": 0.6231045722961426, |
|
"eval_runtime": 184.2635, |
|
"eval_samples_per_second": 38.108, |
|
"eval_steps_per_second": 2.382, |
|
"eval_wer": 0.4771256593895747, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6721559401781213, |
|
"grad_norm": 8.91370964050293, |
|
"learning_rate": 0.00011177777777777777, |
|
"loss": 0.6726, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6721559401781213, |
|
"eval_loss": 0.6074743270874023, |
|
"eval_runtime": 184.3908, |
|
"eval_samples_per_second": 38.082, |
|
"eval_steps_per_second": 2.381, |
|
"eval_wer": 0.4517333475066105, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7561754327003865, |
|
"grad_norm": 12.337292671203613, |
|
"learning_rate": 8.405555555555556e-05, |
|
"loss": 0.6355, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.7561754327003865, |
|
"eval_loss": 0.5601897239685059, |
|
"eval_runtime": 184.3021, |
|
"eval_samples_per_second": 38.1, |
|
"eval_steps_per_second": 2.382, |
|
"eval_wer": 0.4328784597191034, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8401949252226516, |
|
"grad_norm": 18.518247604370117, |
|
"learning_rate": 5.6333333333333325e-05, |
|
"loss": 0.5938, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.8401949252226516, |
|
"eval_loss": 0.5438016653060913, |
|
"eval_runtime": 185.1047, |
|
"eval_samples_per_second": 37.935, |
|
"eval_steps_per_second": 2.372, |
|
"eval_wer": 0.4225939754713722, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9242144177449169, |
|
"grad_norm": 5.622241020202637, |
|
"learning_rate": 2.8555555555555556e-05, |
|
"loss": 0.5842, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.9242144177449169, |
|
"eval_loss": 0.5245193839073181, |
|
"eval_runtime": 184.9427, |
|
"eval_samples_per_second": 37.969, |
|
"eval_steps_per_second": 2.374, |
|
"eval_wer": 0.4090939289653065, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.008233910267182, |
|
"grad_norm": 10.519143104553223, |
|
"learning_rate": 7.777777777777777e-07, |
|
"loss": 0.557, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.008233910267182, |
|
"eval_loss": 0.5155353546142578, |
|
"eval_runtime": 185.543, |
|
"eval_samples_per_second": 37.846, |
|
"eval_steps_per_second": 2.366, |
|
"eval_wer": 0.39834438406037814, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.008233910267182, |
|
"step": 6000, |
|
"total_flos": 6.665403914161851e+18, |
|
"train_loss": 1.086754165649414, |
|
"train_runtime": 4718.794, |
|
"train_samples_per_second": 10.172, |
|
"train_steps_per_second": 1.272 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.665403914161851e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|