|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9716123619074253, |
|
"global_step": 42500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.941733090942992e-05, |
|
"loss": 3.2035, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8834661818859834e-05, |
|
"loss": 3.1734, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.825199272828975e-05, |
|
"loss": 3.1439, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.766932363771967e-05, |
|
"loss": 3.1277, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7086654547149584e-05, |
|
"loss": 3.1302, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.65039854565795e-05, |
|
"loss": 3.1091, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.592131636600942e-05, |
|
"loss": 3.115, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5338647275439334e-05, |
|
"loss": 3.0718, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4755978184869253e-05, |
|
"loss": 3.0977, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.417330909429917e-05, |
|
"loss": 3.1092, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3590640003729085e-05, |
|
"loss": 3.0919, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.3007970913159004e-05, |
|
"loss": 3.0868, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.242530182258892e-05, |
|
"loss": 3.0913, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1842632732018835e-05, |
|
"loss": 3.0817, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1259963641448754e-05, |
|
"loss": 3.0835, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0677294550878666e-05, |
|
"loss": 3.059, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0094625460308585e-05, |
|
"loss": 3.0757, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.95119563697385e-05, |
|
"loss": 3.0767, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.892928727916842e-05, |
|
"loss": 3.0637, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.834661818859833e-05, |
|
"loss": 3.0504, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.776394909802825e-05, |
|
"loss": 3.0339, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.718128000745816e-05, |
|
"loss": 3.0385, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.659861091688808e-05, |
|
"loss": 3.064, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6015941826318e-05, |
|
"loss": 3.0685, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.543327273574791e-05, |
|
"loss": 3.0366, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.485060364517783e-05, |
|
"loss": 3.0512, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.426793455460775e-05, |
|
"loss": 3.0179, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.368526546403766e-05, |
|
"loss": 3.0113, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.310259637346758e-05, |
|
"loss": 2.9934, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.25199272828975e-05, |
|
"loss": 3.0166, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.193725819232741e-05, |
|
"loss": 3.0001, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.135458910175733e-05, |
|
"loss": 2.9672, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.077192001118725e-05, |
|
"loss": 2.9892, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.018925092061716e-05, |
|
"loss": 2.9614, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.960658183004708e-05, |
|
"loss": 2.9961, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.9023912739477e-05, |
|
"loss": 2.9779, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.8441243648906912e-05, |
|
"loss": 2.9579, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.785857455833683e-05, |
|
"loss": 2.973, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.727590546776675e-05, |
|
"loss": 2.9722, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.6693236377196662e-05, |
|
"loss": 2.9646, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.611056728662658e-05, |
|
"loss": 2.9623, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.55278981960565e-05, |
|
"loss": 2.9916, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.4945229105486416e-05, |
|
"loss": 2.9656, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.436256001491633e-05, |
|
"loss": 2.9928, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3779890924346247e-05, |
|
"loss": 2.9385, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.3197221833776163e-05, |
|
"loss": 2.957, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.261455274320608e-05, |
|
"loss": 2.9654, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.2031883652635994e-05, |
|
"loss": 2.97, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.144921456206591e-05, |
|
"loss": 2.9741, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.086654547149583e-05, |
|
"loss": 2.9547, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.0283876380925745e-05, |
|
"loss": 2.9451, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.970120729035566e-05, |
|
"loss": 2.9596, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.911853819978558e-05, |
|
"loss": 2.9608, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.8535869109215495e-05, |
|
"loss": 2.9353, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.795320001864541e-05, |
|
"loss": 2.9597, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.737053092807533e-05, |
|
"loss": 2.9766, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.6787861837505245e-05, |
|
"loss": 2.9663, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.620519274693516e-05, |
|
"loss": 2.9407, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.562252365636508e-05, |
|
"loss": 2.9297, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.5039854565794994e-05, |
|
"loss": 2.8912, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.445718547522491e-05, |
|
"loss": 2.9339, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3874516384654828e-05, |
|
"loss": 2.9428, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3291847294084744e-05, |
|
"loss": 2.9401, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.270917820351466e-05, |
|
"loss": 2.9332, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.2126509112944577e-05, |
|
"loss": 2.9101, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1543840022374494e-05, |
|
"loss": 2.9166, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.096117093180441e-05, |
|
"loss": 2.9384, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0378501841234327e-05, |
|
"loss": 2.9133, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.795832750664243e-06, |
|
"loss": 2.8853, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.213163660094159e-06, |
|
"loss": 2.9052, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.630494569524076e-06, |
|
"loss": 2.9147, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.047825478953992e-06, |
|
"loss": 2.9421, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.465156388383909e-06, |
|
"loss": 2.9287, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.8824872978138264e-06, |
|
"loss": 2.9044, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.299818207243742e-06, |
|
"loss": 2.9015, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.7171491166736594e-06, |
|
"loss": 2.9207, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.134480026103575e-06, |
|
"loss": 2.9307, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.5518109355334916e-06, |
|
"loss": 2.9315, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.969141844963409e-06, |
|
"loss": 2.9047, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.386472754393325e-06, |
|
"loss": 2.9263, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.8038036638232415e-06, |
|
"loss": 2.9154, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.2211345732531584e-06, |
|
"loss": 2.9042, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.6384654826830745e-06, |
|
"loss": 2.886, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0557963921129912e-06, |
|
"loss": 2.9167, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.731273015429078e-07, |
|
"loss": 2.8958, |
|
"step": 42500 |
|
} |
|
], |
|
"max_steps": 42906, |
|
"num_train_epochs": 3, |
|
"total_flos": 4.277802369024e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|