|
{ |
|
"best_metric": 0.3630259037017822, |
|
"best_model_checkpoint": "./working/checkpoint-10000", |
|
"epoch": 119.04761904761905, |
|
"eval_steps": 1000, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 5.874335765838623, |
|
"learning_rate": 1.5e-06, |
|
"loss": 12.5691, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 11.808794975280762, |
|
"learning_rate": 2.9850000000000002e-06, |
|
"loss": 9.8251, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"grad_norm": 10.97357177734375, |
|
"learning_rate": 4.485e-06, |
|
"loss": 6.5329, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"grad_norm": 10.752958297729492, |
|
"learning_rate": 5.985e-06, |
|
"loss": 5.2405, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"grad_norm": 5.865973949432373, |
|
"learning_rate": 7.485e-06, |
|
"loss": 4.629, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"grad_norm": 4.192840099334717, |
|
"learning_rate": 8.985e-06, |
|
"loss": 4.2437, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"grad_norm": 3.2876224517822266, |
|
"learning_rate": 1.0485e-05, |
|
"loss": 3.9784, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"grad_norm": 1.2407236099243164, |
|
"learning_rate": 1.1985000000000001e-05, |
|
"loss": 3.7904, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"grad_norm": 0.6437963247299194, |
|
"learning_rate": 1.3485e-05, |
|
"loss": 3.6682, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"grad_norm": 0.3295372724533081, |
|
"learning_rate": 1.4985e-05, |
|
"loss": 3.5325, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"eval_cer": 0.926601333602748, |
|
"eval_loss": 3.4897494316101074, |
|
"eval_runtime": 10.3726, |
|
"eval_samples_per_second": 32.393, |
|
"eval_steps_per_second": 4.049, |
|
"eval_wer": 1.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 13.1, |
|
"grad_norm": 0.61485356092453, |
|
"learning_rate": 1.6485e-05, |
|
"loss": 3.4934, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"grad_norm": 0.4778901934623718, |
|
"learning_rate": 1.7985e-05, |
|
"loss": 3.4662, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"grad_norm": 0.56944739818573, |
|
"learning_rate": 1.9485e-05, |
|
"loss": 3.4413, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"grad_norm": 0.37235376238822937, |
|
"learning_rate": 2.0985e-05, |
|
"loss": 3.4089, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"grad_norm": 0.4277956187725067, |
|
"learning_rate": 2.2485000000000002e-05, |
|
"loss": 3.3384, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"grad_norm": 0.50371253490448, |
|
"learning_rate": 2.3985e-05, |
|
"loss": 3.2455, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 20.24, |
|
"grad_norm": 1.1383576393127441, |
|
"learning_rate": 2.5485e-05, |
|
"loss": 3.0681, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 21.43, |
|
"grad_norm": 0.8668686747550964, |
|
"learning_rate": 2.6985e-05, |
|
"loss": 2.7949, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 22.62, |
|
"grad_norm": 1.0731563568115234, |
|
"learning_rate": 2.8485000000000003e-05, |
|
"loss": 2.4866, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 23.81, |
|
"grad_norm": 1.3317248821258545, |
|
"learning_rate": 2.9985000000000002e-05, |
|
"loss": 2.1973, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 23.81, |
|
"eval_cer": 0.24030107092341887, |
|
"eval_loss": 1.1350404024124146, |
|
"eval_runtime": 10.3527, |
|
"eval_samples_per_second": 32.455, |
|
"eval_steps_per_second": 4.057, |
|
"eval_wer": 0.839647119875454, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 2.0239453315734863, |
|
"learning_rate": 2.962875e-05, |
|
"loss": 1.9821, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 26.19, |
|
"grad_norm": 1.464921236038208, |
|
"learning_rate": 2.925375e-05, |
|
"loss": 1.853, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 27.38, |
|
"grad_norm": 1.6508703231811523, |
|
"learning_rate": 2.887875e-05, |
|
"loss": 1.7547, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"grad_norm": 1.3476407527923584, |
|
"learning_rate": 2.850375e-05, |
|
"loss": 1.7171, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 29.76, |
|
"grad_norm": 1.2977994680404663, |
|
"learning_rate": 2.812875e-05, |
|
"loss": 1.6498, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 30.95, |
|
"grad_norm": 1.8536533117294312, |
|
"learning_rate": 2.775375e-05, |
|
"loss": 1.5965, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 32.14, |
|
"grad_norm": 1.7063647508621216, |
|
"learning_rate": 2.7378750000000003e-05, |
|
"loss": 1.5744, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"grad_norm": 1.613274097442627, |
|
"learning_rate": 2.700375e-05, |
|
"loss": 1.5483, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 34.52, |
|
"grad_norm": 1.6182752847671509, |
|
"learning_rate": 2.662875e-05, |
|
"loss": 1.5076, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 35.71, |
|
"grad_norm": 1.9291083812713623, |
|
"learning_rate": 2.6253750000000003e-05, |
|
"loss": 1.4762, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 35.71, |
|
"eval_cer": 0.15634471610426348, |
|
"eval_loss": 0.527005672454834, |
|
"eval_runtime": 10.3175, |
|
"eval_samples_per_second": 32.566, |
|
"eval_steps_per_second": 4.071, |
|
"eval_wer": 0.6844836533471718, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 36.9, |
|
"grad_norm": 1.7794642448425293, |
|
"learning_rate": 2.587875e-05, |
|
"loss": 1.4752, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 38.1, |
|
"grad_norm": 1.7410004138946533, |
|
"learning_rate": 2.550375e-05, |
|
"loss": 1.4586, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 39.29, |
|
"grad_norm": 1.6410831212997437, |
|
"learning_rate": 2.512875e-05, |
|
"loss": 1.4172, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 40.48, |
|
"grad_norm": 2.350106954574585, |
|
"learning_rate": 2.475375e-05, |
|
"loss": 1.3751, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"grad_norm": 1.7794309854507446, |
|
"learning_rate": 2.437875e-05, |
|
"loss": 1.3516, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 42.86, |
|
"grad_norm": 1.8536804914474487, |
|
"learning_rate": 2.400375e-05, |
|
"loss": 1.338, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 44.05, |
|
"grad_norm": 2.043091058731079, |
|
"learning_rate": 2.362875e-05, |
|
"loss": 1.31, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 45.24, |
|
"grad_norm": 2.556605577468872, |
|
"learning_rate": 2.325375e-05, |
|
"loss": 1.2737, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 46.43, |
|
"grad_norm": 2.167360544204712, |
|
"learning_rate": 2.2878750000000002e-05, |
|
"loss": 1.2706, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 47.62, |
|
"grad_norm": 1.933358907699585, |
|
"learning_rate": 2.2503750000000003e-05, |
|
"loss": 1.2409, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 47.62, |
|
"eval_cer": 0.140280864821176, |
|
"eval_loss": 0.41946256160736084, |
|
"eval_runtime": 10.4076, |
|
"eval_samples_per_second": 32.284, |
|
"eval_steps_per_second": 4.036, |
|
"eval_wer": 0.6331084587441619, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 48.81, |
|
"grad_norm": 2.1927788257598877, |
|
"learning_rate": 2.212875e-05, |
|
"loss": 1.2476, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 3.7042958736419678, |
|
"learning_rate": 2.175375e-05, |
|
"loss": 1.2211, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 51.19, |
|
"grad_norm": 2.503298282623291, |
|
"learning_rate": 2.13825e-05, |
|
"loss": 1.1974, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 52.38, |
|
"grad_norm": 2.378753423690796, |
|
"learning_rate": 2.101125e-05, |
|
"loss": 1.19, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 53.57, |
|
"grad_norm": 6.145068645477295, |
|
"learning_rate": 2.063625e-05, |
|
"loss": 1.1734, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 54.76, |
|
"grad_norm": 2.2741551399230957, |
|
"learning_rate": 2.026125e-05, |
|
"loss": 1.1664, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 55.95, |
|
"grad_norm": 3.21976900100708, |
|
"learning_rate": 1.988625e-05, |
|
"loss": 1.1555, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 57.14, |
|
"grad_norm": 3.06923508644104, |
|
"learning_rate": 1.951125e-05, |
|
"loss": 1.1391, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 58.33, |
|
"grad_norm": 1.8809341192245483, |
|
"learning_rate": 1.9136249999999998e-05, |
|
"loss": 1.1271, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 59.52, |
|
"grad_norm": 2.041844367980957, |
|
"learning_rate": 1.876125e-05, |
|
"loss": 1.1241, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 59.52, |
|
"eval_cer": 0.13785613255203072, |
|
"eval_loss": 0.38446417450904846, |
|
"eval_runtime": 10.417, |
|
"eval_samples_per_second": 32.255, |
|
"eval_steps_per_second": 4.032, |
|
"eval_wer": 0.63622210690192, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 60.71, |
|
"grad_norm": 2.3043465614318848, |
|
"learning_rate": 1.838625e-05, |
|
"loss": 1.1042, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 61.9, |
|
"grad_norm": 3.682835340499878, |
|
"learning_rate": 1.801125e-05, |
|
"loss": 1.0921, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 63.1, |
|
"grad_norm": 4.466809272766113, |
|
"learning_rate": 1.7636250000000002e-05, |
|
"loss": 1.095, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 64.29, |
|
"grad_norm": 2.615339994430542, |
|
"learning_rate": 1.726125e-05, |
|
"loss": 1.1057, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 65.48, |
|
"grad_norm": 3.483346700668335, |
|
"learning_rate": 1.688625e-05, |
|
"loss": 1.0553, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"grad_norm": 2.141965866088867, |
|
"learning_rate": 1.651125e-05, |
|
"loss": 1.0656, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 67.86, |
|
"grad_norm": 2.2111611366271973, |
|
"learning_rate": 1.613625e-05, |
|
"loss": 1.0673, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 69.05, |
|
"grad_norm": 2.071429491043091, |
|
"learning_rate": 1.576125e-05, |
|
"loss": 1.0632, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 70.24, |
|
"grad_norm": 4.86116886138916, |
|
"learning_rate": 1.538625e-05, |
|
"loss": 1.0447, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 71.43, |
|
"grad_norm": 3.3076369762420654, |
|
"learning_rate": 1.5011250000000001e-05, |
|
"loss": 1.024, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 71.43, |
|
"eval_cer": 0.13548191553849262, |
|
"eval_loss": 0.3715837895870209, |
|
"eval_runtime": 10.3955, |
|
"eval_samples_per_second": 32.322, |
|
"eval_steps_per_second": 4.04, |
|
"eval_wer": 0.6320705760249092, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 72.62, |
|
"grad_norm": 2.3496508598327637, |
|
"learning_rate": 1.463625e-05, |
|
"loss": 1.0379, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 73.81, |
|
"grad_norm": 2.621004343032837, |
|
"learning_rate": 1.426125e-05, |
|
"loss": 1.0515, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"grad_norm": 5.240926742553711, |
|
"learning_rate": 1.388625e-05, |
|
"loss": 1.0253, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 76.19, |
|
"grad_norm": 2.9943532943725586, |
|
"learning_rate": 1.351125e-05, |
|
"loss": 1.0131, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 77.38, |
|
"grad_norm": 2.475804328918457, |
|
"learning_rate": 1.3136250000000001e-05, |
|
"loss": 1.0227, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 78.57, |
|
"grad_norm": 2.5056631565093994, |
|
"learning_rate": 1.2761250000000001e-05, |
|
"loss": 1.0025, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 79.76, |
|
"grad_norm": 3.9102323055267334, |
|
"learning_rate": 1.238625e-05, |
|
"loss": 1.0181, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 80.95, |
|
"grad_norm": 3.3800106048583984, |
|
"learning_rate": 1.201125e-05, |
|
"loss": 0.9892, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 82.14, |
|
"grad_norm": 2.165987014770508, |
|
"learning_rate": 1.164e-05, |
|
"loss": 0.9822, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"grad_norm": 2.426816463470459, |
|
"learning_rate": 1.1265e-05, |
|
"loss": 0.9922, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"eval_cer": 0.13310769852495455, |
|
"eval_loss": 0.3727741539478302, |
|
"eval_runtime": 10.3942, |
|
"eval_samples_per_second": 32.326, |
|
"eval_steps_per_second": 4.041, |
|
"eval_wer": 0.628956927867151, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 84.52, |
|
"grad_norm": 3.203552007675171, |
|
"learning_rate": 1.089e-05, |
|
"loss": 0.9995, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 85.71, |
|
"grad_norm": 2.827246904373169, |
|
"learning_rate": 1.0515e-05, |
|
"loss": 0.9683, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 86.9, |
|
"grad_norm": 2.9927895069122314, |
|
"learning_rate": 1.0140000000000001e-05, |
|
"loss": 0.9864, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 88.1, |
|
"grad_norm": 2.151737928390503, |
|
"learning_rate": 9.765e-06, |
|
"loss": 0.9744, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 89.29, |
|
"grad_norm": 2.5920581817626953, |
|
"learning_rate": 9.39e-06, |
|
"loss": 0.9794, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 90.48, |
|
"grad_norm": 3.2127621173858643, |
|
"learning_rate": 9.015e-06, |
|
"loss": 0.9622, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 91.67, |
|
"grad_norm": 3.541879892349243, |
|
"learning_rate": 8.64e-06, |
|
"loss": 0.9624, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 92.86, |
|
"grad_norm": 2.827958345413208, |
|
"learning_rate": 8.265000000000001e-06, |
|
"loss": 0.9637, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 94.05, |
|
"grad_norm": 2.648591995239258, |
|
"learning_rate": 7.89e-06, |
|
"loss": 0.9684, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 95.24, |
|
"grad_norm": 4.264640808105469, |
|
"learning_rate": 7.515e-06, |
|
"loss": 0.9432, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 95.24, |
|
"eval_cer": 0.13209739341281068, |
|
"eval_loss": 0.3648131787776947, |
|
"eval_runtime": 10.4236, |
|
"eval_samples_per_second": 32.234, |
|
"eval_steps_per_second": 4.029, |
|
"eval_wer": 0.6170212765957447, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 96.43, |
|
"grad_norm": 2.8603055477142334, |
|
"learning_rate": 7.14e-06, |
|
"loss": 0.9576, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 97.62, |
|
"grad_norm": 2.931117296218872, |
|
"learning_rate": 6.7650000000000005e-06, |
|
"loss": 0.9579, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 98.81, |
|
"grad_norm": 3.449780225753784, |
|
"learning_rate": 6.39e-06, |
|
"loss": 0.9535, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"grad_norm": 4.3435468673706055, |
|
"learning_rate": 6.015000000000001e-06, |
|
"loss": 0.9463, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 101.19, |
|
"grad_norm": 2.2839837074279785, |
|
"learning_rate": 5.64e-06, |
|
"loss": 0.9413, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 102.38, |
|
"grad_norm": 3.1021485328674316, |
|
"learning_rate": 5.2649999999999996e-06, |
|
"loss": 0.9436, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 103.57, |
|
"grad_norm": 2.9421229362487793, |
|
"learning_rate": 4.890000000000001e-06, |
|
"loss": 0.939, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 104.76, |
|
"grad_norm": 2.0578436851501465, |
|
"learning_rate": 4.515e-06, |
|
"loss": 0.9338, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 105.95, |
|
"grad_norm": 3.5860297679901123, |
|
"learning_rate": 4.14e-06, |
|
"loss": 0.9315, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 107.14, |
|
"grad_norm": 2.1002159118652344, |
|
"learning_rate": 3.765e-06, |
|
"loss": 0.9279, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 107.14, |
|
"eval_cer": 0.13245100020206102, |
|
"eval_loss": 0.3642527461051941, |
|
"eval_runtime": 10.5077, |
|
"eval_samples_per_second": 31.976, |
|
"eval_steps_per_second": 3.997, |
|
"eval_wer": 0.6248053969901401, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 108.33, |
|
"grad_norm": 3.5053226947784424, |
|
"learning_rate": 3.3975e-06, |
|
"loss": 0.9489, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 109.52, |
|
"grad_norm": 2.178657054901123, |
|
"learning_rate": 3.0225000000000003e-06, |
|
"loss": 0.9218, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 110.71, |
|
"grad_norm": 2.9659178256988525, |
|
"learning_rate": 2.6475e-06, |
|
"loss": 0.928, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 111.9, |
|
"grad_norm": 3.755510091781616, |
|
"learning_rate": 2.2725e-06, |
|
"loss": 0.9244, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 113.1, |
|
"grad_norm": 2.2474422454833984, |
|
"learning_rate": 1.8975e-06, |
|
"loss": 0.9269, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 114.29, |
|
"grad_norm": 4.289571285247803, |
|
"learning_rate": 1.5225000000000002e-06, |
|
"loss": 0.9353, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 115.48, |
|
"grad_norm": 3.7131989002227783, |
|
"learning_rate": 1.1475e-06, |
|
"loss": 0.9169, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 116.67, |
|
"grad_norm": 2.692605972290039, |
|
"learning_rate": 7.725e-07, |
|
"loss": 0.9325, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 117.86, |
|
"grad_norm": 3.175621509552002, |
|
"learning_rate": 3.975e-07, |
|
"loss": 0.9163, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 119.05, |
|
"grad_norm": 9.366300582885742, |
|
"learning_rate": 2.25e-08, |
|
"loss": 0.9268, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 119.05, |
|
"eval_cer": 0.13159224085673873, |
|
"eval_loss": 0.3630259037017822, |
|
"eval_runtime": 10.5083, |
|
"eval_samples_per_second": 31.975, |
|
"eval_steps_per_second": 3.997, |
|
"eval_wer": 0.6242864556305138, |
|
"step": 10000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 120, |
|
"save_steps": 1000, |
|
"total_flos": 3.012648370984383e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|