|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6412722842118763, |
|
"eval_steps": 500, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03206361421059382, |
|
"grad_norm": 8.037480354309082, |
|
"learning_rate": 0.0002465, |
|
"loss": 4.7051, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03206361421059382, |
|
"eval_loss": 1.7504417896270752, |
|
"eval_runtime": 184.6482, |
|
"eval_samples_per_second": 38.029, |
|
"eval_steps_per_second": 0.596, |
|
"eval_wer": 0.9570416827223323, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06412722842118763, |
|
"grad_norm": 7.763198375701904, |
|
"learning_rate": 0.0002874574468085106, |
|
"loss": 1.0409, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06412722842118763, |
|
"eval_loss": 1.1511156558990479, |
|
"eval_runtime": 184.729, |
|
"eval_samples_per_second": 38.012, |
|
"eval_steps_per_second": 0.595, |
|
"eval_wer": 0.776093224730597, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.09619084263178146, |
|
"grad_norm": 3.2267072200775146, |
|
"learning_rate": 0.0002715, |
|
"loss": 0.8183, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09619084263178146, |
|
"eval_loss": 1.0506497621536255, |
|
"eval_runtime": 186.2316, |
|
"eval_samples_per_second": 37.706, |
|
"eval_steps_per_second": 0.591, |
|
"eval_wer": 0.70972242522489, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.12825445684237527, |
|
"grad_norm": 6.372620582580566, |
|
"learning_rate": 0.00025554255319148935, |
|
"loss": 0.7091, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12825445684237527, |
|
"eval_loss": 0.9421387314796448, |
|
"eval_runtime": 186.5946, |
|
"eval_samples_per_second": 37.632, |
|
"eval_steps_per_second": 0.59, |
|
"eval_wer": 0.6609707809032807, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.16031807105296908, |
|
"grad_norm": 5.675894260406494, |
|
"learning_rate": 0.0002395851063829787, |
|
"loss": 0.6547, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.16031807105296908, |
|
"eval_loss": 0.8725515007972717, |
|
"eval_runtime": 187.2013, |
|
"eval_samples_per_second": 37.51, |
|
"eval_steps_per_second": 0.588, |
|
"eval_wer": 0.6127639219229594, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.19238168526356292, |
|
"grad_norm": 6.913870334625244, |
|
"learning_rate": 0.00022362765957446805, |
|
"loss": 0.6088, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.19238168526356292, |
|
"eval_loss": 0.8246235847473145, |
|
"eval_runtime": 188.3497, |
|
"eval_samples_per_second": 37.282, |
|
"eval_steps_per_second": 0.584, |
|
"eval_wer": 0.5989582641278784, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.22444529947415673, |
|
"grad_norm": 4.30249547958374, |
|
"learning_rate": 0.00020767021276595744, |
|
"loss": 0.5781, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.22444529947415673, |
|
"eval_loss": 0.802536129951477, |
|
"eval_runtime": 187.0791, |
|
"eval_samples_per_second": 37.535, |
|
"eval_steps_per_second": 0.588, |
|
"eval_wer": 0.5747352476115813, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.25650891368475054, |
|
"grad_norm": 3.4820008277893066, |
|
"learning_rate": 0.0001917446808510638, |
|
"loss": 0.5429, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.25650891368475054, |
|
"eval_loss": 0.7359501123428345, |
|
"eval_runtime": 186.7747, |
|
"eval_samples_per_second": 37.596, |
|
"eval_steps_per_second": 0.589, |
|
"eval_wer": 0.53048804794111, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.2885725278953444, |
|
"grad_norm": 11.696717262268066, |
|
"learning_rate": 0.00017585106382978722, |
|
"loss": 0.5104, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.2885725278953444, |
|
"eval_loss": 0.7335178852081299, |
|
"eval_runtime": 187.3685, |
|
"eval_samples_per_second": 37.477, |
|
"eval_steps_per_second": 0.587, |
|
"eval_wer": 0.5394039251119468, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.32063614210593816, |
|
"grad_norm": 7.053103446960449, |
|
"learning_rate": 0.00015989361702127658, |
|
"loss": 0.501, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.32063614210593816, |
|
"eval_loss": 0.6932825446128845, |
|
"eval_runtime": 186.2726, |
|
"eval_samples_per_second": 37.697, |
|
"eval_steps_per_second": 0.591, |
|
"eval_wer": 0.5087763589736776, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.352699756316532, |
|
"grad_norm": 6.128586769104004, |
|
"learning_rate": 0.00014393617021276595, |
|
"loss": 0.4708, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.352699756316532, |
|
"eval_loss": 0.6770374774932861, |
|
"eval_runtime": 188.2655, |
|
"eval_samples_per_second": 37.298, |
|
"eval_steps_per_second": 0.584, |
|
"eval_wer": 0.5112743990751937, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.38476337052712584, |
|
"grad_norm": 7.154539108276367, |
|
"learning_rate": 0.00012801063829787234, |
|
"loss": 0.4526, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.38476337052712584, |
|
"eval_loss": 0.6608560681343079, |
|
"eval_runtime": 187.3283, |
|
"eval_samples_per_second": 37.485, |
|
"eval_steps_per_second": 0.587, |
|
"eval_wer": 0.48059368314753054, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.4168269847377196, |
|
"grad_norm": 5.313536643981934, |
|
"learning_rate": 0.0001120531914893617, |
|
"loss": 0.4235, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.4168269847377196, |
|
"eval_loss": 0.637322187423706, |
|
"eval_runtime": 186.315, |
|
"eval_samples_per_second": 37.689, |
|
"eval_steps_per_second": 0.59, |
|
"eval_wer": 0.485842224850184, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.44889059894831346, |
|
"grad_norm": 6.399425983428955, |
|
"learning_rate": 9.612765957446806e-05, |
|
"loss": 0.4032, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.44889059894831346, |
|
"eval_loss": 0.6047533750534058, |
|
"eval_runtime": 186.8155, |
|
"eval_samples_per_second": 37.588, |
|
"eval_steps_per_second": 0.589, |
|
"eval_wer": 0.4466176802774419, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.4809542131589073, |
|
"grad_norm": 11.48141098022461, |
|
"learning_rate": 8.017021276595744e-05, |
|
"loss": 0.3863, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.4809542131589073, |
|
"eval_loss": 0.5946004390716553, |
|
"eval_runtime": 186.2938, |
|
"eval_samples_per_second": 37.693, |
|
"eval_steps_per_second": 0.59, |
|
"eval_wer": 0.4432160937562285, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.5130178273695011, |
|
"grad_norm": 32.89252471923828, |
|
"learning_rate": 6.424468085106383e-05, |
|
"loss": 0.3766, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.5130178273695011, |
|
"eval_loss": 0.5737225413322449, |
|
"eval_runtime": 186.9085, |
|
"eval_samples_per_second": 37.569, |
|
"eval_steps_per_second": 0.589, |
|
"eval_wer": 0.4298489217236477, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.5450814415800949, |
|
"grad_norm": 4.741519451141357, |
|
"learning_rate": 4.8287234042553194e-05, |
|
"loss": 0.3746, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.5450814415800949, |
|
"eval_loss": 0.5668203234672546, |
|
"eval_runtime": 186.8619, |
|
"eval_samples_per_second": 37.579, |
|
"eval_steps_per_second": 0.589, |
|
"eval_wer": 0.4247731168365245, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.5771450557906888, |
|
"grad_norm": 13.890504837036133, |
|
"learning_rate": 3.232978723404255e-05, |
|
"loss": 0.3586, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.5771450557906888, |
|
"eval_loss": 0.5485312342643738, |
|
"eval_runtime": 187.9252, |
|
"eval_samples_per_second": 37.366, |
|
"eval_steps_per_second": 0.585, |
|
"eval_wer": 0.4100772000690947, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.6092086700012825, |
|
"grad_norm": 8.073569297790527, |
|
"learning_rate": 1.6372340425531912e-05, |
|
"loss": 0.3552, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.6092086700012825, |
|
"eval_loss": 0.5377594828605652, |
|
"eval_runtime": 187.0305, |
|
"eval_samples_per_second": 37.545, |
|
"eval_steps_per_second": 0.588, |
|
"eval_wer": 0.40320758978992544, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.6412722842118763, |
|
"grad_norm": 6.519000053405762, |
|
"learning_rate": 4.1489361702127654e-07, |
|
"loss": 0.3326, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.6412722842118763, |
|
"eval_loss": 0.5324302911758423, |
|
"eval_runtime": 186.1276, |
|
"eval_samples_per_second": 37.727, |
|
"eval_steps_per_second": 0.591, |
|
"eval_wer": 0.40138720950318235, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.6412722842118763, |
|
"step": 10000, |
|
"total_flos": 1.1393778193380235e+19, |
|
"train_loss": 0.73015986328125, |
|
"train_runtime": 7697.7754, |
|
"train_samples_per_second": 10.393, |
|
"train_steps_per_second": 1.299 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1393778193380235e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|