|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"eval_steps": 300, |
|
"global_step": 16425, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 7.266334533691406, |
|
"eval_runtime": 325.0132, |
|
"eval_samples_per_second": 33.952, |
|
"eval_steps_per_second": 1.061, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.14420628442998e-05, |
|
"loss": 10.5256, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 3.0892837047576904, |
|
"eval_runtime": 316.7833, |
|
"eval_samples_per_second": 34.835, |
|
"eval_steps_per_second": 1.089, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 3.061225652694702, |
|
"eval_runtime": 483.1327, |
|
"eval_samples_per_second": 22.841, |
|
"eval_steps_per_second": 0.714, |
|
"eval_wer": 1.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0001434602713566988, |
|
"loss": 2.9795, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 2.9936766624450684, |
|
"eval_runtime": 320.3828, |
|
"eval_samples_per_second": 34.443, |
|
"eval_steps_per_second": 1.077, |
|
"eval_wer": 1.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0002154784798690978, |
|
"loss": 2.9564, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 3.242413282394409, |
|
"eval_runtime": 328.1378, |
|
"eval_samples_per_second": 33.629, |
|
"eval_steps_per_second": 1.051, |
|
"eval_wer": 1.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 3.2866387367248535, |
|
"eval_runtime": 325.8189, |
|
"eval_samples_per_second": 33.869, |
|
"eval_steps_per_second": 1.059, |
|
"eval_wer": 1.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.0002874966883814968, |
|
"loss": 3.1552, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 3.6338589191436768, |
|
"eval_runtime": 322.0052, |
|
"eval_samples_per_second": 34.27, |
|
"eval_steps_per_second": 1.071, |
|
"eval_wer": 1.0, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 3.1184866428375244, |
|
"eval_runtime": 316.2742, |
|
"eval_samples_per_second": 34.891, |
|
"eval_steps_per_second": 1.091, |
|
"eval_wer": 1.0, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.00035951489689389575, |
|
"loss": 3.2079, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_loss": 3.183176040649414, |
|
"eval_runtime": 316.7437, |
|
"eval_samples_per_second": 34.839, |
|
"eval_steps_per_second": 1.089, |
|
"eval_wer": 1.0, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.00043153310540629475, |
|
"loss": 3.1275, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_loss": 3.3952367305755615, |
|
"eval_runtime": 319.4373, |
|
"eval_samples_per_second": 34.545, |
|
"eval_steps_per_second": 1.08, |
|
"eval_wer": 1.0, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_loss": 3.2981579303741455, |
|
"eval_runtime": 327.568, |
|
"eval_samples_per_second": 33.688, |
|
"eval_steps_per_second": 1.053, |
|
"eval_wer": 1.0, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.0005035513139186939, |
|
"loss": 3.0987, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"eval_loss": 3.103595733642578, |
|
"eval_runtime": 327.3342, |
|
"eval_samples_per_second": 33.712, |
|
"eval_steps_per_second": 1.054, |
|
"eval_wer": 1.0, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_loss": 3.1222941875457764, |
|
"eval_runtime": 312.8357, |
|
"eval_samples_per_second": 35.274, |
|
"eval_steps_per_second": 1.103, |
|
"eval_wer": 1.0, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 0.0005755695224310928, |
|
"loss": 2.9301, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_loss": 3.114525556564331, |
|
"eval_runtime": 308.5965, |
|
"eval_samples_per_second": 35.759, |
|
"eval_steps_per_second": 1.118, |
|
"eval_wer": 1.0, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.0006475877309434917, |
|
"loss": 2.9197, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_loss": 3.0324432849884033, |
|
"eval_runtime": 308.9122, |
|
"eval_samples_per_second": 35.722, |
|
"eval_steps_per_second": 1.117, |
|
"eval_wer": 1.0, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_loss": 2.999401807785034, |
|
"eval_runtime": 308.612, |
|
"eval_samples_per_second": 35.757, |
|
"eval_steps_per_second": 1.118, |
|
"eval_wer": 1.9598550067965563, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.0007196059394558908, |
|
"loss": 2.9023, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"eval_loss": 2.991722822189331, |
|
"eval_runtime": 309.103, |
|
"eval_samples_per_second": 35.7, |
|
"eval_steps_per_second": 1.116, |
|
"eval_wer": 1.8240144993203444, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"eval_loss": 2.9946165084838867, |
|
"eval_runtime": 308.4148, |
|
"eval_samples_per_second": 35.78, |
|
"eval_steps_per_second": 1.119, |
|
"eval_wer": 1.958948799275034, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.0006889164121067909, |
|
"loss": 2.9007, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"eval_loss": 3.195502519607544, |
|
"eval_runtime": 308.3861, |
|
"eval_samples_per_second": 35.783, |
|
"eval_steps_per_second": 1.119, |
|
"eval_wer": 1.0, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 0.0006574616321963427, |
|
"loss": 3.1887, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"eval_loss": 3.1901698112487793, |
|
"eval_runtime": 308.4713, |
|
"eval_samples_per_second": 35.773, |
|
"eval_steps_per_second": 1.118, |
|
"eval_wer": 1.0, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"eval_loss": 3.167245864868164, |
|
"eval_runtime": 308.5576, |
|
"eval_samples_per_second": 35.763, |
|
"eval_steps_per_second": 1.118, |
|
"eval_wer": 1.0, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 0.0006259438166548115, |
|
"loss": 3.135, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_loss": 3.2076234817504883, |
|
"eval_runtime": 308.784, |
|
"eval_samples_per_second": 35.737, |
|
"eval_steps_per_second": 1.117, |
|
"eval_wer": 1.0, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"eval_loss": 3.212040424346924, |
|
"eval_runtime": 309.3257, |
|
"eval_samples_per_second": 35.674, |
|
"eval_steps_per_second": 1.115, |
|
"eval_wer": 1.0, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 0.0005944260011132802, |
|
"loss": 3.1482, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"eval_loss": 3.1832025051116943, |
|
"eval_runtime": 308.2925, |
|
"eval_samples_per_second": 35.794, |
|
"eval_steps_per_second": 1.119, |
|
"eval_wer": 1.0, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 0.0005629081855717488, |
|
"loss": 3.1546, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"eval_loss": 3.1799111366271973, |
|
"eval_runtime": 308.7059, |
|
"eval_samples_per_second": 35.746, |
|
"eval_steps_per_second": 1.118, |
|
"eval_wer": 1.0, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"eval_loss": 3.2451581954956055, |
|
"eval_runtime": 307.3881, |
|
"eval_samples_per_second": 35.899, |
|
"eval_steps_per_second": 1.122, |
|
"eval_wer": 1.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.0005313903700302176, |
|
"loss": 3.1567, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"eval_loss": 3.2318718433380127, |
|
"eval_runtime": 308.2983, |
|
"eval_samples_per_second": 35.793, |
|
"eval_steps_per_second": 1.119, |
|
"eval_wer": 1.0, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"eval_loss": 3.222830057144165, |
|
"eval_runtime": 308.3145, |
|
"eval_samples_per_second": 35.791, |
|
"eval_steps_per_second": 1.119, |
|
"eval_wer": 1.0, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 0.0004998725544886862, |
|
"loss": 3.1719, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"eval_loss": 3.2054970264434814, |
|
"eval_runtime": 308.0159, |
|
"eval_samples_per_second": 35.826, |
|
"eval_steps_per_second": 1.12, |
|
"eval_wer": 1.0, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 0.00046835473894715497, |
|
"loss": 3.168, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"eval_loss": 3.2552778720855713, |
|
"eval_runtime": 303.2884, |
|
"eval_samples_per_second": 36.385, |
|
"eval_steps_per_second": 1.138, |
|
"eval_wer": 1.0, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_loss": 3.197523593902588, |
|
"eval_runtime": 305.1147, |
|
"eval_samples_per_second": 36.167, |
|
"eval_steps_per_second": 1.131, |
|
"eval_wer": 1.0, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 0.0004368369234056237, |
|
"loss": 3.1643, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"eval_loss": 3.2445874214172363, |
|
"eval_runtime": 303.4296, |
|
"eval_samples_per_second": 36.368, |
|
"eval_steps_per_second": 1.137, |
|
"eval_wer": 1.0, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"eval_loss": 3.2781076431274414, |
|
"eval_runtime": 305.7237, |
|
"eval_samples_per_second": 36.095, |
|
"eval_steps_per_second": 1.128, |
|
"eval_wer": 1.0, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 0.0004053191078640924, |
|
"loss": 3.169, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"eval_loss": 3.2596964836120605, |
|
"eval_runtime": 306.6385, |
|
"eval_samples_per_second": 35.987, |
|
"eval_steps_per_second": 1.125, |
|
"eval_wer": 1.0, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 0.00037380129232256106, |
|
"loss": 3.1789, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"eval_loss": 3.2585501670837402, |
|
"eval_runtime": 307.6539, |
|
"eval_samples_per_second": 35.868, |
|
"eval_steps_per_second": 1.121, |
|
"eval_wer": 1.0, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"eval_loss": 3.2689764499664307, |
|
"eval_runtime": 307.738, |
|
"eval_samples_per_second": 35.858, |
|
"eval_steps_per_second": 1.121, |
|
"eval_wer": 1.0, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 0.0003422834767810298, |
|
"loss": 3.1701, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"eval_loss": 3.273723602294922, |
|
"eval_runtime": 308.5346, |
|
"eval_samples_per_second": 35.766, |
|
"eval_steps_per_second": 1.118, |
|
"eval_wer": 1.0, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"eval_loss": 3.273848533630371, |
|
"eval_runtime": 308.8614, |
|
"eval_samples_per_second": 35.728, |
|
"eval_steps_per_second": 1.117, |
|
"eval_wer": 1.0, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 0.00031076566123949855, |
|
"loss": 3.1698, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"eval_loss": 3.2595293521881104, |
|
"eval_runtime": 308.7628, |
|
"eval_samples_per_second": 35.739, |
|
"eval_steps_per_second": 1.117, |
|
"eval_wer": 1.0, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"learning_rate": 0.00027924784569796727, |
|
"loss": 3.1595, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"eval_loss": 3.2467362880706787, |
|
"eval_runtime": 308.3094, |
|
"eval_samples_per_second": 35.792, |
|
"eval_steps_per_second": 1.119, |
|
"eval_wer": 1.0, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"eval_loss": 3.252420663833618, |
|
"eval_runtime": 309.0904, |
|
"eval_samples_per_second": 35.702, |
|
"eval_steps_per_second": 1.116, |
|
"eval_wer": 1.0, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 11.42, |
|
"learning_rate": 0.00024773003015643593, |
|
"loss": 3.15, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 11.51, |
|
"eval_loss": 3.2327377796173096, |
|
"eval_runtime": 308.9397, |
|
"eval_samples_per_second": 35.719, |
|
"eval_steps_per_second": 1.117, |
|
"eval_wer": 1.0, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"eval_loss": 3.219557046890259, |
|
"eval_runtime": 309.2594, |
|
"eval_samples_per_second": 35.682, |
|
"eval_steps_per_second": 1.116, |
|
"eval_wer": 1.0, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 11.87, |
|
"learning_rate": 0.00021621221461490465, |
|
"loss": 3.1444, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"eval_loss": 3.1942968368530273, |
|
"eval_runtime": 309.871, |
|
"eval_samples_per_second": 35.612, |
|
"eval_steps_per_second": 1.113, |
|
"eval_wer": 1.0, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"learning_rate": 0.00018469439907337336, |
|
"loss": 3.132, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"eval_loss": 3.191138744354248, |
|
"eval_runtime": 309.3206, |
|
"eval_samples_per_second": 35.675, |
|
"eval_steps_per_second": 1.115, |
|
"eval_wer": 1.0, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"eval_loss": 3.207465648651123, |
|
"eval_runtime": 309.5517, |
|
"eval_samples_per_second": 35.648, |
|
"eval_steps_per_second": 1.115, |
|
"eval_wer": 1.0, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"learning_rate": 0.00015323961916292511, |
|
"loss": 3.1153, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 12.88, |
|
"eval_loss": 3.1938300132751465, |
|
"eval_runtime": 310.0376, |
|
"eval_samples_per_second": 35.592, |
|
"eval_steps_per_second": 1.113, |
|
"eval_wer": 1.0, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 13.15, |
|
"eval_loss": 3.1638731956481934, |
|
"eval_runtime": 308.9592, |
|
"eval_samples_per_second": 35.717, |
|
"eval_steps_per_second": 1.117, |
|
"eval_wer": 1.0, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"learning_rate": 0.00012172180362139385, |
|
"loss": 3.1039, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 13.42, |
|
"eval_loss": 3.15146803855896, |
|
"eval_runtime": 308.6922, |
|
"eval_samples_per_second": 35.748, |
|
"eval_steps_per_second": 1.118, |
|
"eval_wer": 1.0, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 9.020398807986256e-05, |
|
"loss": 3.0839, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"eval_loss": 3.153453826904297, |
|
"eval_runtime": 309.1197, |
|
"eval_samples_per_second": 35.698, |
|
"eval_steps_per_second": 1.116, |
|
"eval_wer": 1.0, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"eval_loss": 3.130723237991333, |
|
"eval_runtime": 309.6167, |
|
"eval_samples_per_second": 35.641, |
|
"eval_steps_per_second": 1.114, |
|
"eval_wer": 1.0, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"learning_rate": 5.8686172538331265e-05, |
|
"loss": 3.0632, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 14.25, |
|
"eval_loss": 3.1138317584991455, |
|
"eval_runtime": 309.4562, |
|
"eval_samples_per_second": 35.659, |
|
"eval_steps_per_second": 1.115, |
|
"eval_wer": 1.0, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"eval_loss": 3.128912925720215, |
|
"eval_runtime": 309.4874, |
|
"eval_samples_per_second": 35.656, |
|
"eval_steps_per_second": 1.115, |
|
"eval_wer": 1.0, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"learning_rate": 2.7168356996799972e-05, |
|
"loss": 3.0518, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 14.79, |
|
"eval_loss": 3.081491708755493, |
|
"eval_runtime": 308.8218, |
|
"eval_samples_per_second": 35.733, |
|
"eval_steps_per_second": 1.117, |
|
"eval_wer": 1.0, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 16425, |
|
"total_flos": 6.442470243808035e+19, |
|
"train_loss": 3.3253096312547563, |
|
"train_runtime": 44962.3834, |
|
"train_samples_per_second": 14.612, |
|
"train_steps_per_second": 0.365 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 16425, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 400, |
|
"total_flos": 6.442470243808035e+19, |
|
"train_batch_size": 20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|