|
{ |
|
"best_metric": 1.8157883882522583, |
|
"best_model_checkpoint": "/tmp/tst-gun-gub-pt/checkpoint-40000", |
|
"epoch": 3.0, |
|
"eval_steps": 4000, |
|
"global_step": 40752, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.938653317628583e-05, |
|
"loss": 3.3185, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.877306635257165e-05, |
|
"loss": 2.967, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.815959952885748e-05, |
|
"loss": 2.803, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.754613270514331e-05, |
|
"loss": 2.6964, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.6932665881429136e-05, |
|
"loss": 2.6199, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.631919905771496e-05, |
|
"loss": 2.5435, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5705732234000786e-05, |
|
"loss": 2.4687, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.509226541028662e-05, |
|
"loss": 2.4421, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_bleu": 3.3785, |
|
"eval_gen_len": 68.48, |
|
"eval_loss": 2.5284605026245117, |
|
"eval_runtime": 39.0713, |
|
"eval_samples_per_second": 6.399, |
|
"eval_steps_per_second": 0.41, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4478798586572437e-05, |
|
"loss": 2.3817, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.386533176285827e-05, |
|
"loss": 2.3424, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3251864939144094e-05, |
|
"loss": 2.3047, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.263839811542992e-05, |
|
"loss": 2.2823, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.2024931291715744e-05, |
|
"loss": 2.2436, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.141146446800157e-05, |
|
"loss": 2.2266, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.07979976442874e-05, |
|
"loss": 2.1898, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.018453082057323e-05, |
|
"loss": 2.1667, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_bleu": 4.5883, |
|
"eval_gen_len": 58.6, |
|
"eval_loss": 2.3017916679382324, |
|
"eval_runtime": 34.6446, |
|
"eval_samples_per_second": 7.216, |
|
"eval_steps_per_second": 0.462, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.957106399685905e-05, |
|
"loss": 2.1403, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.895759717314488e-05, |
|
"loss": 2.1188, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.834413034943071e-05, |
|
"loss": 2.1002, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.773066352571653e-05, |
|
"loss": 2.0948, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.711719670200235e-05, |
|
"loss": 2.0678, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6503729878288185e-05, |
|
"loss": 2.0502, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.589026305457401e-05, |
|
"loss": 2.0416, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5276796230859836e-05, |
|
"loss": 2.0255, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_bleu": 5.1052, |
|
"eval_gen_len": 67.3, |
|
"eval_loss": 2.1290316581726074, |
|
"eval_runtime": 40.7019, |
|
"eval_samples_per_second": 6.142, |
|
"eval_steps_per_second": 0.393, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.466332940714566e-05, |
|
"loss": 2.0057, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.404986258343149e-05, |
|
"loss": 1.9979, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.343639575971731e-05, |
|
"loss": 1.9726, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.2822928936003144e-05, |
|
"loss": 1.9261, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.220946211228897e-05, |
|
"loss": 1.9133, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.15959952885748e-05, |
|
"loss": 1.9109, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.098252846486062e-05, |
|
"loss": 1.9171, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.0369061641146445e-05, |
|
"loss": 1.8995, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_bleu": 7.8429, |
|
"eval_gen_len": 55.48, |
|
"eval_loss": 2.0535314083099365, |
|
"eval_runtime": 32.581, |
|
"eval_samples_per_second": 7.673, |
|
"eval_steps_per_second": 0.491, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9755594817432277e-05, |
|
"loss": 1.8771, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.91421279937181e-05, |
|
"loss": 1.8841, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.8528661170003927e-05, |
|
"loss": 1.8787, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.7915194346289753e-05, |
|
"loss": 1.8647, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.730172752257558e-05, |
|
"loss": 1.8488, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.6688260698861407e-05, |
|
"loss": 1.8342, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.6074793875147235e-05, |
|
"loss": 1.833, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.546132705143306e-05, |
|
"loss": 1.8322, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_bleu": 7.2663, |
|
"eval_gen_len": 58.24, |
|
"eval_loss": 1.9960261583328247, |
|
"eval_runtime": 35.7526, |
|
"eval_samples_per_second": 6.993, |
|
"eval_steps_per_second": 0.448, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4847860227718886e-05, |
|
"loss": 1.8187, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4234393404004714e-05, |
|
"loss": 1.8118, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.362092658029054e-05, |
|
"loss": 1.8144, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.3007459756576365e-05, |
|
"loss": 1.7989, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.2393992932862194e-05, |
|
"loss": 1.8068, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.178052610914802e-05, |
|
"loss": 1.8043, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1167059285433844e-05, |
|
"loss": 1.7863, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.055359246171967e-05, |
|
"loss": 1.7868, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_bleu": 7.0981, |
|
"eval_gen_len": 66.34, |
|
"eval_loss": 1.922366976737976, |
|
"eval_runtime": 40.7924, |
|
"eval_samples_per_second": 6.129, |
|
"eval_steps_per_second": 0.392, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.9940125638005498e-05, |
|
"loss": 1.7759, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.9326658814291323e-05, |
|
"loss": 1.7653, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.871319199057715e-05, |
|
"loss": 1.7689, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8099725166862977e-05, |
|
"loss": 1.758, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.7486258343148802e-05, |
|
"loss": 1.754, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.687279151943463e-05, |
|
"loss": 1.7393, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.6259324695720456e-05, |
|
"loss": 1.7151, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.5645857872006285e-05, |
|
"loss": 1.7012, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_bleu": 7.5657, |
|
"eval_gen_len": 60.3, |
|
"eval_loss": 1.8868523836135864, |
|
"eval_runtime": 38.0123, |
|
"eval_samples_per_second": 6.577, |
|
"eval_steps_per_second": 0.421, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.503239104829211e-05, |
|
"loss": 1.6993, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.4418924224577934e-05, |
|
"loss": 1.6944, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.380545740086376e-05, |
|
"loss": 1.7064, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.3191990577149588e-05, |
|
"loss": 1.6993, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.2578523753435415e-05, |
|
"loss": 1.6893, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.1965056929721242e-05, |
|
"loss": 1.6874, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1351590106007069e-05, |
|
"loss": 1.6793, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.0738123282292894e-05, |
|
"loss": 1.6773, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_bleu": 7.9888, |
|
"eval_gen_len": 61.18, |
|
"eval_loss": 1.8613367080688477, |
|
"eval_runtime": 38.7116, |
|
"eval_samples_per_second": 6.458, |
|
"eval_steps_per_second": 0.413, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.012465645857872e-05, |
|
"loss": 1.6798, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.511189634864546e-06, |
|
"loss": 1.6823, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.897722811150373e-06, |
|
"loss": 1.6711, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.2842559874362e-06, |
|
"loss": 1.679, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.670789163722027e-06, |
|
"loss": 1.6662, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.057322340007853e-06, |
|
"loss": 1.6716, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.4438555162936784e-06, |
|
"loss": 1.6569, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.830388692579505e-06, |
|
"loss": 1.6631, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_bleu": 8.0862, |
|
"eval_gen_len": 60.5, |
|
"eval_loss": 1.8354450464248657, |
|
"eval_runtime": 38.0466, |
|
"eval_samples_per_second": 6.571, |
|
"eval_steps_per_second": 0.421, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.216921868865332e-06, |
|
"loss": 1.6602, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.6034550451511585e-06, |
|
"loss": 1.6549, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.989988221436985e-06, |
|
"loss": 1.6521, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.3765213977228115e-06, |
|
"loss": 1.6643, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.7630545740086376e-06, |
|
"loss": 1.6533, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.149587750294464e-06, |
|
"loss": 1.6596, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.5361209265802905e-06, |
|
"loss": 1.6664, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 9.22654102866117e-07, |
|
"loss": 1.6379, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_bleu": 8.4077, |
|
"eval_gen_len": 60.18, |
|
"eval_loss": 1.8157883882522583, |
|
"eval_runtime": 35.7813, |
|
"eval_samples_per_second": 6.987, |
|
"eval_steps_per_second": 0.447, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.091872791519435e-07, |
|
"loss": 1.6493, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 40752, |
|
"total_flos": 3.4728164062396416e+16, |
|
"train_loss": 1.939443588256836, |
|
"train_runtime": 15380.8623, |
|
"train_samples_per_second": 42.392, |
|
"train_steps_per_second": 2.65 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 40752, |
|
"num_train_epochs": 3, |
|
"save_steps": 4000, |
|
"total_flos": 3.4728164062396416e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|