|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 1100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.545454545454545e-07, |
|
"loss": 2.7297, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 2.7244, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 2.7671, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"gpt4_scores": 0.43333333333333335, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.312265396118164, |
|
"eval_runtime": 4.9266, |
|
"eval_samples_per_second": 4.669, |
|
"eval_steps_per_second": 1.218, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.7272727272727273e-05, |
|
"loss": 2.4172, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.6363636363636364e-05, |
|
"loss": 2.1566, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 2.0319, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"gpt4_scores": 0.6, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.9679468870162964, |
|
"eval_runtime": 4.9521, |
|
"eval_samples_per_second": 4.644, |
|
"eval_steps_per_second": 1.212, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.9987413559579636e-05, |
|
"loss": 1.8348, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.988679806432712e-05, |
|
"loss": 1.8492, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 4.968597221690986e-05, |
|
"loss": 1.7972, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"gpt4_scores": 0.5166666666666666, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.942632794380188, |
|
"eval_runtime": 4.9711, |
|
"eval_samples_per_second": 4.627, |
|
"eval_steps_per_second": 1.207, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 4.938574467213518e-05, |
|
"loss": 1.6588, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 4.898732434036244e-05, |
|
"loss": 1.6187, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.849231551964771e-05, |
|
"loss": 1.5841, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"gpt4_scores": 0.6833333333333332, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.0110199451446533, |
|
"eval_runtime": 4.9304, |
|
"eval_samples_per_second": 4.665, |
|
"eval_steps_per_second": 1.217, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 4.790271143580174e-05, |
|
"loss": 1.2998, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 4.722088621637309e-05, |
|
"loss": 1.2842, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"gpt4_scores": 0.5166666666666667, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.2670648097991943, |
|
"eval_runtime": 4.9304, |
|
"eval_samples_per_second": 4.665, |
|
"eval_steps_per_second": 1.217, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 4.644958533087443e-05, |
|
"loss": 1.1442, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 4.559191453574582e-05, |
|
"loss": 0.9076, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 4.465132736856969e-05, |
|
"loss": 0.9305, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"gpt4_scores": 0.75, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.5263493061065674, |
|
"eval_runtime": 4.9267, |
|
"eval_samples_per_second": 4.668, |
|
"eval_steps_per_second": 1.218, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 4.3631611241893874e-05, |
|
"loss": 0.7708, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 4.2536872192658036e-05, |
|
"loss": 0.6432, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 4.137151834863213e-05, |
|
"loss": 0.6734, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"gpt4_scores": 0.7333333333333334, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 2.7797744274139404, |
|
"eval_runtime": 4.982, |
|
"eval_samples_per_second": 4.617, |
|
"eval_steps_per_second": 1.204, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 4.014024217844167e-05, |
|
"loss": 0.5284, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 3.884800159665276e-05, |
|
"loss": 0.4544, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4579, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"gpt4_scores": 0.5499999999999999, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 3.1051599979400635, |
|
"eval_runtime": 4.9596, |
|
"eval_samples_per_second": 4.637, |
|
"eval_steps_per_second": 1.21, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 3.610166531514436e-05, |
|
"loss": 0.3129, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 3.465862814232822e-05, |
|
"loss": 0.3091, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"gpt4_scores": 0.5666666666666668, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 3.3408806324005127, |
|
"eval_runtime": 4.9659, |
|
"eval_samples_per_second": 4.632, |
|
"eval_steps_per_second": 1.208, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 3.3176699082935545e-05, |
|
"loss": 0.3082, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 3.166184534225087e-05, |
|
"loss": 0.2184, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 3.012016670162977e-05, |
|
"loss": 0.2418, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"gpt4_scores": 0.48333333333333334, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 3.499851703643799, |
|
"eval_runtime": 4.9663, |
|
"eval_samples_per_second": 4.631, |
|
"eval_steps_per_second": 1.208, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"learning_rate": 2.8557870956832132e-05, |
|
"loss": 0.1996, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 2.698124892141971e-05, |
|
"loss": 0.1778, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 2.5396649095870202e-05, |
|
"loss": 0.1718, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"gpt4_scores": 0.6333333333333334, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 3.6687815189361572, |
|
"eval_runtime": 4.9484, |
|
"eval_samples_per_second": 4.648, |
|
"eval_steps_per_second": 1.213, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 11.27, |
|
"learning_rate": 2.3810452104406444e-05, |
|
"loss": 0.1401, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"learning_rate": 2.222904500247473e-05, |
|
"loss": 0.1344, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 0.1555, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"gpt4_scores": 0.6, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 3.78193998336792, |
|
"eval_runtime": 4.9817, |
|
"eval_samples_per_second": 4.617, |
|
"eval_steps_per_second": 1.204, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 12.36, |
|
"learning_rate": 1.9106026612264316e-05, |
|
"loss": 0.1414, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"learning_rate": 1.7576990616793137e-05, |
|
"loss": 0.1191, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"gpt4_scores": 0.3666666666666667, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 3.910775899887085, |
|
"eval_runtime": 4.9709, |
|
"eval_samples_per_second": 4.627, |
|
"eval_steps_per_second": 1.207, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"learning_rate": 1.6077844460203206e-05, |
|
"loss": 0.1191, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 13.45, |
|
"learning_rate": 1.4614624674952842e-05, |
|
"loss": 0.1069, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 13.82, |
|
"learning_rate": 1.3193223130682936e-05, |
|
"loss": 0.1291, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"gpt4_scores": 0.3666666666666667, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 3.995321750640869, |
|
"eval_runtime": 4.9833, |
|
"eval_samples_per_second": 4.615, |
|
"eval_steps_per_second": 1.204, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 1.181936330973744e-05, |
|
"loss": 0.1075, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 1.049857726072005e-05, |
|
"loss": 0.1058, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 14.91, |
|
"learning_rate": 9.236183322886945e-06, |
|
"loss": 0.1213, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"gpt4_scores": 0.3333333333333333, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 4.102020740509033, |
|
"eval_runtime": 4.931, |
|
"eval_samples_per_second": 4.664, |
|
"eval_steps_per_second": 1.217, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 15.27, |
|
"learning_rate": 8.0372647110717e-06, |
|
"loss": 0.1013, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 15.64, |
|
"learning_rate": 6.906649047373246e-06, |
|
"loss": 0.1176, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 5.848888922025553e-06, |
|
"loss": 0.1, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"gpt4_scores": 0.43333333333333335, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 4.120510578155518, |
|
"eval_runtime": 4.9308, |
|
"eval_samples_per_second": 4.665, |
|
"eval_steps_per_second": 1.217, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 16.36, |
|
"learning_rate": 4.868243561723535e-06, |
|
"loss": 0.0974, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 16.73, |
|
"learning_rate": 3.968661679220468e-06, |
|
"loss": 0.115, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"gpt4_scores": 0.45, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 4.160642623901367, |
|
"eval_runtime": 4.9259, |
|
"eval_samples_per_second": 4.669, |
|
"eval_steps_per_second": 1.218, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"learning_rate": 3.1537655732553768e-06, |
|
"loss": 0.1013, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 17.45, |
|
"learning_rate": 2.4268365428344736e-06, |
|
"loss": 0.0971, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 17.82, |
|
"learning_rate": 1.790801674598186e-06, |
|
"loss": 0.1076, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"gpt4_scores": 0.31666666666666665, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 4.183867454528809, |
|
"eval_runtime": 4.9845, |
|
"eval_samples_per_second": 4.614, |
|
"eval_steps_per_second": 1.204, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 1.248222056476367e-06, |
|
"loss": 0.103, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 18.55, |
|
"learning_rate": 8.012824650910938e-07, |
|
"loss": 0.1082, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 18.91, |
|
"learning_rate": 4.517825684323324e-07, |
|
"loss": 0.0962, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"gpt4_scores": 0.4666666666666666, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 4.187291622161865, |
|
"eval_runtime": 4.9682, |
|
"eval_samples_per_second": 4.629, |
|
"eval_steps_per_second": 1.208, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 19.27, |
|
"learning_rate": 2.011296792301165e-07, |
|
"loss": 0.0981, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 19.64, |
|
"learning_rate": 5.033308820289184e-08, |
|
"loss": 0.1154, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0917, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"gpt4_scores": 0.31666666666666665, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 4.187403678894043, |
|
"eval_runtime": 4.9884, |
|
"eval_samples_per_second": 4.611, |
|
"eval_steps_per_second": 1.203, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 1100, |
|
"total_flos": 3.836907908090266e+16, |
|
"train_loss": 0.660087760145014, |
|
"train_runtime": 8612.1752, |
|
"train_samples_per_second": 0.504, |
|
"train_steps_per_second": 0.128 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 1100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 20, |
|
"total_flos": 3.836907908090266e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|