|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.4722536806342017, |
|
"eval_steps": 100, |
|
"global_step": 2600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.056625141562853906, |
|
"eval_loss": 3.599447011947632, |
|
"eval_runtime": 153.6415, |
|
"eval_samples_per_second": 36.813, |
|
"eval_steps_per_second": 4.602, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11325028312570781, |
|
"eval_loss": 3.0223705768585205, |
|
"eval_runtime": 151.1357, |
|
"eval_samples_per_second": 37.423, |
|
"eval_steps_per_second": 4.678, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16987542468856173, |
|
"eval_loss": 1.9626648426055908, |
|
"eval_runtime": 153.3369, |
|
"eval_samples_per_second": 36.886, |
|
"eval_steps_per_second": 4.611, |
|
"eval_wer": 0.9060519009484682, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22650056625141562, |
|
"eval_loss": 0.9921229481697083, |
|
"eval_runtime": 154.0126, |
|
"eval_samples_per_second": 36.724, |
|
"eval_steps_per_second": 4.591, |
|
"eval_wer": 0.6951581582706103, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.28312570781426954, |
|
"grad_norm": 1.7500211000442505, |
|
"learning_rate": 0.0002982, |
|
"loss": 3.6184, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.28312570781426954, |
|
"eval_loss": 0.8504024147987366, |
|
"eval_runtime": 153.4805, |
|
"eval_samples_per_second": 36.852, |
|
"eval_steps_per_second": 4.606, |
|
"eval_wer": 0.6157981736771998, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33975084937712347, |
|
"eval_loss": 0.7935531735420227, |
|
"eval_runtime": 152.4296, |
|
"eval_samples_per_second": 37.106, |
|
"eval_steps_per_second": 4.638, |
|
"eval_wer": 0.5963634029304618, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.39637599093997733, |
|
"eval_loss": 0.7600815892219543, |
|
"eval_runtime": 156.4822, |
|
"eval_samples_per_second": 36.145, |
|
"eval_steps_per_second": 4.518, |
|
"eval_wer": 0.5606072764038452, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.45300113250283125, |
|
"eval_loss": 0.6909334063529968, |
|
"eval_runtime": 152.429, |
|
"eval_samples_per_second": 37.106, |
|
"eval_steps_per_second": 4.638, |
|
"eval_wer": 0.5127666062172008, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5096262740656852, |
|
"eval_loss": 0.6616554856300354, |
|
"eval_runtime": 152.3205, |
|
"eval_samples_per_second": 37.132, |
|
"eval_steps_per_second": 4.642, |
|
"eval_wer": 0.4916146426794627, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5662514156285391, |
|
"grad_norm": 3.9567878246307373, |
|
"learning_rate": 0.00022914285714285712, |
|
"loss": 0.4662, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5662514156285391, |
|
"eval_loss": 0.6466770172119141, |
|
"eval_runtime": 152.3467, |
|
"eval_samples_per_second": 37.126, |
|
"eval_steps_per_second": 4.641, |
|
"eval_wer": 0.4811510006258927, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.622876557191393, |
|
"eval_loss": 0.6144490838050842, |
|
"eval_runtime": 152.8109, |
|
"eval_samples_per_second": 37.013, |
|
"eval_steps_per_second": 4.627, |
|
"eval_wer": 0.46394697565437887, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6795016987542469, |
|
"eval_loss": 0.5942133069038391, |
|
"eval_runtime": 157.9769, |
|
"eval_samples_per_second": 35.803, |
|
"eval_steps_per_second": 4.475, |
|
"eval_wer": 0.45369196450064997, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7361268403171007, |
|
"eval_loss": 0.5675162076950073, |
|
"eval_runtime": 165.0294, |
|
"eval_samples_per_second": 34.273, |
|
"eval_steps_per_second": 4.284, |
|
"eval_wer": 0.4351077658840333, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7927519818799547, |
|
"eval_loss": 0.5538555383682251, |
|
"eval_runtime": 154.4584, |
|
"eval_samples_per_second": 36.618, |
|
"eval_steps_per_second": 4.577, |
|
"eval_wer": 0.42295902810097735, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8493771234428086, |
|
"grad_norm": 1.9575612545013428, |
|
"learning_rate": 0.00015785714285714285, |
|
"loss": 0.3508, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8493771234428086, |
|
"eval_loss": 0.5447892546653748, |
|
"eval_runtime": 152.2149, |
|
"eval_samples_per_second": 37.158, |
|
"eval_steps_per_second": 4.645, |
|
"eval_wer": 0.41440516120749143, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9060022650056625, |
|
"eval_loss": 0.5326293110847473, |
|
"eval_runtime": 157.3458, |
|
"eval_samples_per_second": 35.946, |
|
"eval_steps_per_second": 4.493, |
|
"eval_wer": 0.406621623790342, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.9626274065685164, |
|
"eval_loss": 0.5154709219932556, |
|
"eval_runtime": 154.1323, |
|
"eval_samples_per_second": 36.696, |
|
"eval_steps_per_second": 4.587, |
|
"eval_wer": 0.3989343775577346, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.0192525481313703, |
|
"eval_loss": 0.5067195296287537, |
|
"eval_runtime": 153.6675, |
|
"eval_samples_per_second": 36.807, |
|
"eval_steps_per_second": 4.601, |
|
"eval_wer": 0.38559804849866, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.0758776896942241, |
|
"eval_loss": 0.49161842465400696, |
|
"eval_runtime": 153.2473, |
|
"eval_samples_per_second": 36.908, |
|
"eval_steps_per_second": 4.613, |
|
"eval_wer": 0.3724061562163984, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.1325028312570782, |
|
"grad_norm": 0.9259862303733826, |
|
"learning_rate": 8.671428571428571e-05, |
|
"loss": 0.2774, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.1325028312570782, |
|
"eval_loss": 0.48551619052886963, |
|
"eval_runtime": 153.8121, |
|
"eval_samples_per_second": 36.772, |
|
"eval_steps_per_second": 4.597, |
|
"eval_wer": 0.36971000304922086, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.189127972819932, |
|
"eval_loss": 0.4797590672969818, |
|
"eval_runtime": 152.9797, |
|
"eval_samples_per_second": 36.972, |
|
"eval_steps_per_second": 4.622, |
|
"eval_wer": 0.3661311806904078, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.245753114382786, |
|
"eval_loss": 0.47735241055488586, |
|
"eval_runtime": 152.7086, |
|
"eval_samples_per_second": 37.038, |
|
"eval_steps_per_second": 4.63, |
|
"eval_wer": 0.36462261879924895, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.3023782559456398, |
|
"eval_loss": 0.46992629766464233, |
|
"eval_runtime": 152.5925, |
|
"eval_samples_per_second": 37.066, |
|
"eval_steps_per_second": 4.633, |
|
"eval_wer": 0.3584760315193144, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.3590033975084936, |
|
"eval_loss": 0.4651219844818115, |
|
"eval_runtime": 153.5802, |
|
"eval_samples_per_second": 36.828, |
|
"eval_steps_per_second": 4.603, |
|
"eval_wer": 0.3550255974065574, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.4156285390713477, |
|
"grad_norm": 0.43387308716773987, |
|
"learning_rate": 1.5428571428571428e-05, |
|
"loss": 0.2328, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.4156285390713477, |
|
"eval_loss": 0.4611109495162964, |
|
"eval_runtime": 153.4779, |
|
"eval_samples_per_second": 36.852, |
|
"eval_steps_per_second": 4.607, |
|
"eval_wer": 0.3570156152204266, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.4722536806342017, |
|
"eval_loss": 0.4593363106250763, |
|
"eval_runtime": 153.5126, |
|
"eval_samples_per_second": 36.844, |
|
"eval_steps_per_second": 4.605, |
|
"eval_wer": 0.3553465680216976, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.4722536806342017, |
|
"step": 2600, |
|
"total_flos": 2.3804122051094954e+19, |
|
"train_loss": 0.9595056893275334, |
|
"train_runtime": 10902.6713, |
|
"train_samples_per_second": 15.262, |
|
"train_steps_per_second": 0.238 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 400, |
|
"total_flos": 2.3804122051094954e+19, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|