|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.172839506172839, |
|
"eval_steps": 200, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.30864197530864196, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.8693, |
|
"eval_samples_per_second": 39.702, |
|
"eval_steps_per_second": 4.964, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6172839506172839, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 175.9503, |
|
"eval_samples_per_second": 39.909, |
|
"eval_steps_per_second": 4.99, |
|
"eval_wer": 0.8138755794476257, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7716049382716049, |
|
"grad_norm": 2.980449914932251, |
|
"learning_rate": 0.0002958, |
|
"loss": 3.6854, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9259259259259259, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 174.7168, |
|
"eval_samples_per_second": 40.191, |
|
"eval_steps_per_second": 5.025, |
|
"eval_wer": 0.6042746029891057, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.2345679012345678, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.3896, |
|
"eval_samples_per_second": 39.81, |
|
"eval_steps_per_second": 4.978, |
|
"eval_wer": 0.503447469907678, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.5432098765432098, |
|
"grad_norm": 0.7878520488739014, |
|
"learning_rate": 0.0002578285714285714, |
|
"loss": 0.7236, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5432098765432098, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 175.5785, |
|
"eval_samples_per_second": 39.994, |
|
"eval_steps_per_second": 5.001, |
|
"eval_wer": 0.44518457922688376, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.8518518518518519, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 175.3263, |
|
"eval_samples_per_second": 40.051, |
|
"eval_steps_per_second": 5.008, |
|
"eval_wer": 0.4584940204900471, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.1604938271604937, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 174.0761, |
|
"eval_samples_per_second": 40.339, |
|
"eval_steps_per_second": 5.044, |
|
"eval_wer": 0.4735564125537247, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.314814814814815, |
|
"grad_norm": 1.6092010736465454, |
|
"learning_rate": 0.0002153142857142857, |
|
"loss": 0.6244, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.4691358024691357, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 174.8676, |
|
"eval_samples_per_second": 40.156, |
|
"eval_steps_per_second": 5.021, |
|
"eval_wer": 0.4445742926518899, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 174.4183, |
|
"eval_samples_per_second": 40.26, |
|
"eval_steps_per_second": 5.034, |
|
"eval_wer": 0.4992274031656993, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.0864197530864197, |
|
"grad_norm": 13.045821189880371, |
|
"learning_rate": 0.0001727142857142857, |
|
"loss": 0.8045, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0864197530864197, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.0054, |
|
"eval_samples_per_second": 39.897, |
|
"eval_steps_per_second": 4.988, |
|
"eval_wer": 0.699323490839209, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.3950617283950617, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 175.9958, |
|
"eval_samples_per_second": 39.899, |
|
"eval_steps_per_second": 4.989, |
|
"eval_wer": 0.9154558321322375, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.7037037037037037, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 175.4175, |
|
"eval_samples_per_second": 40.03, |
|
"eval_steps_per_second": 5.005, |
|
"eval_wer": 1.0, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.8580246913580245, |
|
"grad_norm": 1.601144552230835, |
|
"learning_rate": 0.0001302, |
|
"loss": 2.3067, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.012345679012346, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 175.9931, |
|
"eval_samples_per_second": 39.899, |
|
"eval_steps_per_second": 4.989, |
|
"eval_wer": 0.998558684897355, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.320987654320987, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 175.0274, |
|
"eval_samples_per_second": 40.119, |
|
"eval_steps_per_second": 5.016, |
|
"eval_wer": 1.0, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.62962962962963, |
|
"grad_norm": 0.7471032738685608, |
|
"learning_rate": 8.759999999999999e-05, |
|
"loss": 3.008, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.62962962962963, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 175.5031, |
|
"eval_samples_per_second": 40.011, |
|
"eval_steps_per_second": 5.003, |
|
"eval_wer": 1.0, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.938271604938271, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 175.2017, |
|
"eval_samples_per_second": 40.08, |
|
"eval_steps_per_second": 5.011, |
|
"eval_wer": 1.0, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.246913580246914, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.0971, |
|
"eval_samples_per_second": 39.876, |
|
"eval_steps_per_second": 4.986, |
|
"eval_wer": 1.0, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 5.401234567901234, |
|
"grad_norm": 1.3811966180801392, |
|
"learning_rate": 4.4999999999999996e-05, |
|
"loss": 3.032, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.555555555555555, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.4078, |
|
"eval_samples_per_second": 39.805, |
|
"eval_steps_per_second": 4.977, |
|
"eval_wer": 1.0, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.864197530864198, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 175.4996, |
|
"eval_samples_per_second": 40.011, |
|
"eval_steps_per_second": 5.003, |
|
"eval_wer": 1.0, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 6.172839506172839, |
|
"grad_norm": 0.011152578517794609, |
|
"learning_rate": 2.314285714285714e-06, |
|
"loss": 3.0173, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.172839506172839, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 175.8629, |
|
"eval_samples_per_second": 39.929, |
|
"eval_steps_per_second": 4.993, |
|
"eval_wer": 1.0, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.172839506172839, |
|
"step": 4000, |
|
"total_flos": 3.3889468875697558e+19, |
|
"train_loss": 2.1502342529296876, |
|
"train_runtime": 11441.7295, |
|
"train_samples_per_second": 22.374, |
|
"train_steps_per_second": 0.35 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.3889468875697558e+19, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|