|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": null, |
|
"global_step": 0, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9997541914704325e-05, |
|
"loss": 0.0855, |
|
"reward": 0.814, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9989966819651966e-05, |
|
"loss": 0.0814, |
|
"reward": 0.9376, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9977276276102345e-05, |
|
"loss": 0.0734, |
|
"reward": 0.8684, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.995947461480701e-05, |
|
"loss": 0.0716, |
|
"reward": 0.8537, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9936567910728124e-05, |
|
"loss": 0.0603, |
|
"reward": 0.6429, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9908563980965393e-05, |
|
"loss": 0.0624, |
|
"reward": 0.641, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9875472382088355e-05, |
|
"loss": 0.0631, |
|
"reward": 0.8062, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9837304406875167e-05, |
|
"loss": 0.0577, |
|
"reward": 0.5326, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9794073080458815e-05, |
|
"loss": 0.0556, |
|
"reward": 0.9085, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9745793155882214e-05, |
|
"loss": 0.055, |
|
"reward": 0.9616, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9692481109063605e-05, |
|
"loss": 0.0525, |
|
"reward": 0.899, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.963415513317399e-05, |
|
"loss": 0.0518, |
|
"reward": 0.6793, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9570835132428594e-05, |
|
"loss": 0.0546, |
|
"reward": 0.8445, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9502542715294366e-05, |
|
"loss": 0.0532, |
|
"reward": 0.7952, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.942930118711593e-05, |
|
"loss": 0.0515, |
|
"reward": 0.8009, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9351135542162432e-05, |
|
"loss": 0.0445, |
|
"reward": 1.1039, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9268072455098065e-05, |
|
"loss": 0.0447, |
|
"reward": 0.858, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.918014027187909e-05, |
|
"loss": 0.0455, |
|
"reward": 0.9129, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9087369000080567e-05, |
|
"loss": 0.0459, |
|
"reward": 0.9215, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.898979029865602e-05, |
|
"loss": 0.0424, |
|
"reward": 0.8282, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.888743746713357e-05, |
|
"loss": 0.0437, |
|
"reward": 0.9707, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8780345434252185e-05, |
|
"loss": 0.041, |
|
"reward": 0.796, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8668550746041966e-05, |
|
"loss": 0.0468, |
|
"reward": 0.9538, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8552091553352533e-05, |
|
"loss": 0.0409, |
|
"reward": 0.8948, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8431007598833705e-05, |
|
"loss": 0.0408, |
|
"reward": 0.7338, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.830534020337303e-05, |
|
"loss": 0.0392, |
|
"reward": 0.8156, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.817513225199466e-05, |
|
"loss": 0.0375, |
|
"reward": 0.7625, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8040428179224528e-05, |
|
"loss": 0.04, |
|
"reward": 0.9719, |
|
"step": 1399 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.790127395392666e-05, |
|
"loss": 0.034, |
|
"reward": 0.8545, |
|
"step": 1449 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.7757717063615962e-05, |
|
"loss": 0.0437, |
|
"reward": 0.8889, |
|
"step": 1499 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.7609806498252692e-05, |
|
"loss": 0.0355, |
|
"reward": 1.2005, |
|
"step": 1549 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.745759273352425e-05, |
|
"loss": 0.0368, |
|
"reward": 1.0282, |
|
"step": 1599 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.7301127713619938e-05, |
|
"loss": 0.0346, |
|
"reward": 0.9873, |
|
"step": 1649 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.7140464833504564e-05, |
|
"loss": 0.034, |
|
"reward": 0.7074, |
|
"step": 1699 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.6975658920697006e-05, |
|
"loss": 0.0334, |
|
"reward": 0.9238, |
|
"step": 1749 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.680676621655984e-05, |
|
"loss": 0.0317, |
|
"reward": 0.8791, |
|
"step": 1799 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.663384435710654e-05, |
|
"loss": 0.0348, |
|
"reward": 1.1593, |
|
"step": 1849 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.6456952353332712e-05, |
|
"loss": 0.0309, |
|
"reward": 1.0537, |
|
"step": 1899 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.6276150571078108e-05, |
|
"loss": 0.0333, |
|
"reward": 1.0686, |
|
"step": 1949 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.6091500710426278e-05, |
|
"loss": 0.0355, |
|
"reward": 1.0331, |
|
"step": 1999 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.5903065784648947e-05, |
|
"loss": 0.0331, |
|
"reward": 0.8029, |
|
"step": 2049 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.5710910098702187e-05, |
|
"loss": 0.0324, |
|
"reward": 0.9726, |
|
"step": 2099 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.5515099227281836e-05, |
|
"loss": 0.0322, |
|
"reward": 0.9616, |
|
"step": 2149 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.5315699992445617e-05, |
|
"loss": 0.0322, |
|
"reward": 0.9011, |
|
"step": 2199 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.511278044080954e-05, |
|
"loss": 0.0314, |
|
"reward": 0.7529, |
|
"step": 2249 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.4906409820326436e-05, |
|
"loss": 0.0286, |
|
"reward": 1.1313, |
|
"step": 2299 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.4696658556654575e-05, |
|
"loss": 0.0285, |
|
"reward": 1.032, |
|
"step": 2349 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.4483598229124274e-05, |
|
"loss": 0.0296, |
|
"reward": 1.0514, |
|
"step": 2399 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.42673015463109e-05, |
|
"loss": 0.0308, |
|
"reward": 1.1721, |
|
"step": 2449 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.404784232122248e-05, |
|
"loss": 0.0288, |
|
"reward": 1.2602, |
|
"step": 2499 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.382529544611038e-05, |
|
"loss": 0.0274, |
|
"reward": 1.3181, |
|
"step": 2549 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.3599736866911756e-05, |
|
"loss": 0.0302, |
|
"reward": 0.8566, |
|
"step": 2599 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.3371243557332333e-05, |
|
"loss": 0.0324, |
|
"reward": 1.1646, |
|
"step": 2649 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.313989349257855e-05, |
|
"loss": 0.028, |
|
"reward": 1.3095, |
|
"step": 2699 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.2905765622747843e-05, |
|
"loss": 0.0294, |
|
"reward": 0.9202, |
|
"step": 2749 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.266893984588631e-05, |
|
"loss": 0.0299, |
|
"reward": 0.9073, |
|
"step": 2799 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.242949698072283e-05, |
|
"loss": 0.0264, |
|
"reward": 1.0046, |
|
"step": 2849 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.2187518739089033e-05, |
|
"loss": 0.0317, |
|
"reward": 1.1227, |
|
"step": 2899 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.194308769803444e-05, |
|
"loss": 0.0276, |
|
"reward": 1.0356, |
|
"step": 2949 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.1696287271646406e-05, |
|
"loss": 0.0253, |
|
"reward": 1.1648, |
|
"step": 2999 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.1447201682584356e-05, |
|
"loss": 0.026, |
|
"reward": 1.155, |
|
"step": 3049 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.1195915933338133e-05, |
|
"loss": 0.0265, |
|
"reward": 0.9532, |
|
"step": 3099 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.0942515777220186e-05, |
|
"loss": 0.0278, |
|
"reward": 1.1358, |
|
"step": 3149 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.0687087689101562e-05, |
|
"loss": 0.0258, |
|
"reward": 1.0723, |
|
"step": 3199 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.0429718835901672e-05, |
|
"loss": 0.029, |
|
"reward": 1.3277, |
|
"step": 3249 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.0170497046841824e-05, |
|
"loss": 0.0281, |
|
"reward": 1.2176, |
|
"step": 3299 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.9909510783472825e-05, |
|
"loss": 0.0258, |
|
"reward": 1.3399, |
|
"step": 3349 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.964684910948672e-05, |
|
"loss": 0.0279, |
|
"reward": 1.1264, |
|
"step": 3399 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9382601660323124e-05, |
|
"loss": 0.0259, |
|
"reward": 1.0383, |
|
"step": 3449 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.911685861258034e-05, |
|
"loss": 0.0244, |
|
"reward": 1.135, |
|
"step": 3499 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.8849710653241923e-05, |
|
"loss": 0.0246, |
|
"reward": 1.0922, |
|
"step": 3549 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.858124894872895e-05, |
|
"loss": 0.0243, |
|
"reward": 1.1385, |
|
"step": 3599 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8311565113788777e-05, |
|
"loss": 0.0255, |
|
"reward": 0.9836, |
|
"step": 3649 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.804075118023072e-05, |
|
"loss": 0.0244, |
|
"reward": 1.0459, |
|
"step": 3699 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.7768899565519493e-05, |
|
"loss": 0.0233, |
|
"reward": 1.1805, |
|
"step": 3749 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.749610304123695e-05, |
|
"loss": 0.0264, |
|
"reward": 1.305, |
|
"step": 3799 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.7222454701423068e-05, |
|
"loss": 0.0237, |
|
"reward": 1.0362, |
|
"step": 3849 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.694804793080681e-05, |
|
"loss": 0.0236, |
|
"reward": 1.2275, |
|
"step": 3899 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.6672976372937838e-05, |
|
"loss": 0.0238, |
|
"reward": 1.2652, |
|
"step": 3949 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.63973338982299e-05, |
|
"loss": 0.0235, |
|
"reward": 1.1863, |
|
"step": 3999 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.6121214571926765e-05, |
|
"loss": 0.0253, |
|
"reward": 1.4457, |
|
"step": 4049 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.5844712622001708e-05, |
|
"loss": 0.0241, |
|
"reward": 1.3766, |
|
"step": 4099 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.5567922407001432e-05, |
|
"loss": 0.0228, |
|
"reward": 1.2514, |
|
"step": 4149 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.5290938383845442e-05, |
|
"loss": 0.0237, |
|
"reward": 1.2308, |
|
"step": 4199 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.5013855075591872e-05, |
|
"loss": 0.0219, |
|
"reward": 1.3735, |
|
"step": 4249 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.4736767039180697e-05, |
|
"loss": 0.0239, |
|
"reward": 1.1207, |
|
"step": 4299 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.4459768833165414e-05, |
|
"loss": 0.0219, |
|
"reward": 1.1005, |
|
"step": 4349 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.4182954985444172e-05, |
|
"loss": 0.023, |
|
"reward": 1.4297, |
|
"step": 4399 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.3906419961001339e-05, |
|
"loss": 0.0238, |
|
"reward": 1.289, |
|
"step": 4449 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.3630258129670565e-05, |
|
"loss": 0.0221, |
|
"reward": 1.3299, |
|
"step": 4499 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.3354563733930315e-05, |
|
"loss": 0.0221, |
|
"reward": 1.5207, |
|
"step": 4549 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.3079430856742829e-05, |
|
"loss": 0.0207, |
|
"reward": 1.3068, |
|
"step": 4599 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.2804953389447579e-05, |
|
"loss": 0.0228, |
|
"reward": 1.2289, |
|
"step": 4649 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.2531224999720032e-05, |
|
"loss": 0.0222, |
|
"reward": 1.1784, |
|
"step": 4699 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.2258339099606862e-05, |
|
"loss": 0.0208, |
|
"reward": 1.1996, |
|
"step": 4749 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.198638881364825e-05, |
|
"loss": 0.0213, |
|
"reward": 1.2377, |
|
"step": 4799 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.1715466947098438e-05, |
|
"loss": 0.0217, |
|
"reward": 1.1597, |
|
"step": 4849 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.1445665954255139e-05, |
|
"loss": 0.02, |
|
"reward": 1.2, |
|
"step": 4899 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.1177077906908772e-05, |
|
"loss": 0.0233, |
|
"reward": 1.3867, |
|
"step": 4949 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.0909794462922214e-05, |
|
"loss": 0.0213, |
|
"reward": 1.3454, |
|
"step": 4999 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.064390683495178e-05, |
|
"loss": 0.0212, |
|
"reward": 1.2913, |
|
"step": 5049 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.0379505759320209e-05, |
|
"loss": 0.0215, |
|
"reward": 1.1036, |
|
"step": 5099 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.0116681465052087e-05, |
|
"loss": 0.0211, |
|
"reward": 1.5439, |
|
"step": 5149 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.855523643082532e-06, |
|
"loss": 0.0204, |
|
"reward": 1.2613, |
|
"step": 5199 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.596121415649359e-06, |
|
"loss": 0.0219, |
|
"reward": 1.4101, |
|
"step": 5249 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.33856330587944e-06, |
|
"loss": 0.022, |
|
"reward": 1.4715, |
|
"step": 5299 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.082937207579442e-06, |
|
"loss": 0.0227, |
|
"reward": 1.3201, |
|
"step": 5349 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 8.82933035524135e-06, |
|
"loss": 0.0194, |
|
"reward": 1.2946, |
|
"step": 5399 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 8.577829294272992e-06, |
|
"loss": 0.0206, |
|
"reward": 1.4951, |
|
"step": 5449 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.328519851463702e-06, |
|
"loss": 0.0207, |
|
"reward": 1.5987, |
|
"step": 5499 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.08148710569524e-06, |
|
"loss": 0.0202, |
|
"reward": 1.5224, |
|
"step": 5549 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.836815358907908e-06, |
|
"loss": 0.0195, |
|
"reward": 1.5981, |
|
"step": 5599 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.594588107331857e-06, |
|
"loss": 0.0198, |
|
"reward": 1.3337, |
|
"step": 5649 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.354888012993293e-06, |
|
"loss": 0.0194, |
|
"reward": 1.503, |
|
"step": 5699 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.117796875505393e-06, |
|
"loss": 0.0192, |
|
"reward": 1.3961, |
|
"step": 5749 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.883395604153524e-06, |
|
"loss": 0.0204, |
|
"reward": 1.5568, |
|
"step": 5799 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.651764190284266e-06, |
|
"loss": 0.0195, |
|
"reward": 1.4106, |
|
"step": 5849 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.422981680007759e-06, |
|
"loss": 0.0191, |
|
"reward": 1.5962, |
|
"step": 5899 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.197126147222517e-06, |
|
"loss": 0.0208, |
|
"reward": 1.5746, |
|
"step": 5949 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.974274666972112e-06, |
|
"loss": 0.0201, |
|
"reward": 1.2134, |
|
"step": 5999 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.754503289142692e-06, |
|
"loss": 0.0198, |
|
"reward": 1.6037, |
|
"step": 6049 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.537887012510291e-06, |
|
"loss": 0.0199, |
|
"reward": 1.5427, |
|
"step": 6099 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.324499759146934e-06, |
|
"loss": 0.0202, |
|
"reward": 1.5012, |
|
"step": 6149 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.11441434919409e-06, |
|
"loss": 0.019, |
|
"reward": 1.6545, |
|
"step": 6199 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.907702476012234e-06, |
|
"loss": 0.0187, |
|
"reward": 1.529, |
|
"step": 6249 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.704434681714884e-06, |
|
"loss": 0.0195, |
|
"reward": 1.7154, |
|
"step": 6299 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.504680333095542e-06, |
|
"loss": 0.0206, |
|
"reward": 1.549, |
|
"step": 6349 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.308507597955685e-06, |
|
"loss": 0.0194, |
|
"reward": 1.7198, |
|
"step": 6399 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.115983421841979e-06, |
|
"loss": 0.0194, |
|
"reward": 1.298, |
|
"step": 6449 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.927173505200547e-06, |
|
"loss": 0.0196, |
|
"reward": 1.5776, |
|
"step": 6499 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.742142280956153e-06, |
|
"loss": 0.0194, |
|
"reward": 1.4295, |
|
"step": 6549 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.5609528925239476e-06, |
|
"loss": 0.0194, |
|
"reward": 1.4245, |
|
"step": 6599 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.3836671722612646e-06, |
|
"loss": 0.0194, |
|
"reward": 1.5306, |
|
"step": 6649 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.2103456203668223e-06, |
|
"loss": 0.0201, |
|
"reward": 1.4046, |
|
"step": 6699 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.041047384234521e-06, |
|
"loss": 0.0178, |
|
"reward": 1.5582, |
|
"step": 6749 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.875830238268942e-06, |
|
"loss": 0.0193, |
|
"reward": 1.4851, |
|
"step": 6799 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.714750564169339e-06, |
|
"loss": 0.019, |
|
"reward": 1.5795, |
|
"step": 6849 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.557863331688927e-06, |
|
"loss": 0.0193, |
|
"reward": 1.5771, |
|
"step": 6899 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.405222079876017e-06, |
|
"loss": 0.0197, |
|
"reward": 1.4866, |
|
"step": 6949 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.256878898803354e-06, |
|
"loss": 0.0198, |
|
"reward": 1.4232, |
|
"step": 6999 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.112884411791984e-06, |
|
"loss": 0.0196, |
|
"reward": 1.5922, |
|
"step": 7049 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.9732877581356075e-06, |
|
"loss": 0.0196, |
|
"reward": 1.5317, |
|
"step": 7099 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.8381365763314151e-06, |
|
"loss": 0.019, |
|
"reward": 1.4884, |
|
"step": 7149 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.7074769878230494e-06, |
|
"loss": 0.0181, |
|
"reward": 1.589, |
|
"step": 7199 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.5813535812612856e-06, |
|
"loss": 0.019, |
|
"reward": 1.5272, |
|
"step": 7249 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4598093972878007e-06, |
|
"loss": 0.0204, |
|
"reward": 1.5452, |
|
"step": 7299 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.3428859138471839e-06, |
|
"loss": 0.0188, |
|
"reward": 1.6511, |
|
"step": 7349 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.2306230320322798e-06, |
|
"loss": 0.0192, |
|
"reward": 1.3072, |
|
"step": 7399 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.1230590624675747e-06, |
|
"loss": 0.0194, |
|
"reward": 1.6043, |
|
"step": 7449 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.0202307122354288e-06, |
|
"loss": 0.0185, |
|
"reward": 1.5806, |
|
"step": 7499 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.221730723494504e-07, |
|
"loss": 0.0187, |
|
"reward": 1.3901, |
|
"step": 7549 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.289196057794096e-07, |
|
"loss": 0.019, |
|
"reward": 1.6659, |
|
"step": 7599 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.405021360317366e-07, |
|
"loss": 0.0192, |
|
"reward": 1.3981, |
|
"step": 7649 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.569508362894783e-07, |
|
"loss": 0.0182, |
|
"reward": 1.6831, |
|
"step": 7699 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.782942191154622e-07, |
|
"loss": 0.0188, |
|
"reward": 1.4812, |
|
"step": 7749 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5.045591267221461e-07, |
|
"loss": 0.0187, |
|
"reward": 1.7532, |
|
"step": 7799 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.3577072181150035e-07, |
|
"loss": 0.0185, |
|
"reward": 1.6008, |
|
"step": 7849 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.719524789880202e-07, |
|
"loss": 0.0192, |
|
"reward": 1.8267, |
|
"step": 7899 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.1312617674783385e-07, |
|
"loss": 0.0188, |
|
"reward": 1.8393, |
|
"step": 7949 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.5931189004661406e-07, |
|
"loss": 0.0182, |
|
"reward": 1.4103, |
|
"step": 7999 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.1052798344882495e-07, |
|
"loss": 0.0187, |
|
"reward": 1.4023, |
|
"step": 8049 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.667911048606785e-07, |
|
"loss": 0.0192, |
|
"reward": 1.5402, |
|
"step": 8099 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.2811617984889056e-07, |
|
"loss": 0.0182, |
|
"reward": 1.5328, |
|
"step": 8149 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.451640654721538e-08, |
|
"loss": 0.0183, |
|
"reward": 1.7633, |
|
"step": 8199 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.600325115246831e-08, |
|
"loss": 0.0183, |
|
"reward": 1.5819, |
|
"step": 8249 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.2586444011600835e-08, |
|
"loss": 0.0183, |
|
"reward": 1.6798, |
|
"step": 8299 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.4273976301131818e-08, |
|
"loss": 0.0193, |
|
"reward": 1.742, |
|
"step": 8349 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.1072097300102168e-08, |
|
"loss": 0.0189, |
|
"reward": 1.4572, |
|
"step": 8399 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.98531225744747e-09, |
|
"loss": 0.0179, |
|
"reward": 1.4872, |
|
"step": 8449 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.6380854554465253e-11, |
|
"loss": 0.0185, |
|
"reward": 1.9113, |
|
"step": 8499 |
|
} |
|
], |
|
"max_steps": 8502, |
|
"num_train_epochs": 1.0, |
|
"total_flos": 0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|