Upload 2 files
Browse files
pretrain_videomae_giant_patch16_224_tiktokactions/checkpoint-298.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1c734fb13884777a34285ca6739e820f0a573f172629300eaf601c0d2c64e72
|
3 |
+
size 12316403693
|
pretrain_videomae_giant_patch16_224_tiktokactions/log.txt
ADDED
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"train_lr": 1.2500047022710084e-06, "train_min_lr": 1.2500047022710084e-06, "train_loss": 1.0300160151254953, "train_loss_scale": 323960.22929361317, "train_weight_decay": 0.0500000000000075, "train_grad_norm": 0.2478663127313853, "epoch": 0, "n_parameters": 1026306560}
|
2 |
+
{"train_lr": 3.7500141068130316e-06, "train_min_lr": 3.7500141068130316e-06, "train_loss": 0.9518006612578059, "train_loss_scale": 2844123.37079666, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 1, "n_parameters": 1026306560}
|
3 |
+
{"train_lr": 6.25002351135504e-06, "train_min_lr": 6.25002351135504e-06, "train_loss": 0.920076063113412, "train_loss_scale": 2896776.983976529, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 2, "n_parameters": 1026306560}
|
4 |
+
{"train_lr": 8.750032915897065e-06, "train_min_lr": 8.750032915897065e-06, "train_loss": 0.8716975058200054, "train_loss_scale": 1737687.5576619273, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 3, "n_parameters": 1026306560}
|
5 |
+
{"train_lr": 1.1250042320439071e-05, "train_min_lr": 1.1250042320439071e-05, "train_loss": 0.8424681088456489, "train_loss_scale": 2152053.74497856, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 4, "n_parameters": 1026306560}
|
6 |
+
{"train_lr": 1.3750051724981092e-05, "train_min_lr": 1.3750051724981092e-05, "train_loss": 0.8227766022797404, "train_loss_scale": 2063903.313924622, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 5, "n_parameters": 1026306560}
|
7 |
+
{"train_lr": 1.6250061129523103e-05, "train_min_lr": 1.6250061129523103e-05, "train_loss": 0.8055127448392047, "train_loss_scale": 1050114.1954412097, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 6, "n_parameters": 1026306560}
|
8 |
+
{"train_lr": 1.8750070534065132e-05, "train_min_lr": 1.8750070534065132e-05, "train_loss": 0.7817553069406359, "train_loss_scale": 1080878.1042654028, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 7, "n_parameters": 1026306560}
|
9 |
+
{"train_lr": 2.1250079938607154e-05, "train_min_lr": 2.1250079938607154e-05, "train_loss": 0.7644546341295605, "train_loss_scale": 856124.0857594223, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 8, "n_parameters": 1026306560}
|
10 |
+
{"train_lr": 2.375008934314917e-05, "train_min_lr": 2.375008934314917e-05, "train_loss": 0.7502640391988131, "train_loss_scale": 759099.4502369668, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 9, "n_parameters": 1026306560}
|
11 |
+
{"train_lr": 2.6250098747691188e-05, "train_min_lr": 2.6250098747691188e-05, "train_loss": 0.7381019029477108, "train_loss_scale": 491246.37869555404, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 10, "n_parameters": 1026306560}
|
12 |
+
{"train_lr": 2.8750108152233196e-05, "train_min_lr": 2.8750108152233196e-05, "train_loss": 0.7274072502612147, "train_loss_scale": 634121.070638682, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 11, "n_parameters": 1026306560}
|
13 |
+
{"train_lr": 3.125011755677524e-05, "train_min_lr": 3.125011755677524e-05, "train_loss": 0.718122530050527, "train_loss_scale": 540261.568043331, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 12, "n_parameters": 1026306560}
|
14 |
+
{"train_lr": 3.375012696131725e-05, "train_min_lr": 3.375012696131725e-05, "train_loss": 0.7099955128966128, "train_loss_scale": 516301.2159783345, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 13, "n_parameters": 1026306560}
|
15 |
+
{"train_lr": 3.6250136365859285e-05, "train_min_lr": 3.6250136365859285e-05, "train_loss": 0.7031016664163768, "train_loss_scale": 467848.0595802302, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 14, "n_parameters": 1026306560}
|
16 |
+
{"train_lr": 3.875014577040128e-05, "train_min_lr": 3.875014577040128e-05, "train_loss": 0.6967709350652523, "train_loss_scale": 320506.68472128187, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 15, "n_parameters": 1026306560}
|
17 |
+
{"train_lr": 4.125015517494332e-05, "train_min_lr": 4.125015517494332e-05, "train_loss": 0.691358201016145, "train_loss_scale": 264007.58293838863, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 16, "n_parameters": 1026306560}
|
18 |
+
{"train_lr": 4.375016457948533e-05, "train_min_lr": 4.375016457948533e-05, "train_loss": 0.6861221637283402, "train_loss_scale": 390257.9318438276, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 17, "n_parameters": 1026306560}
|
19 |
+
{"train_lr": 4.6250173984027305e-05, "train_min_lr": 4.6250173984027305e-05, "train_loss": 0.6815588666011372, "train_loss_scale": 350678.9799142406, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 18, "n_parameters": 1026306560}
|
20 |
+
{"train_lr": 4.875018338856932e-05, "train_min_lr": 4.875018338856932e-05, "train_loss": 0.6770697719129771, "train_loss_scale": 365735.5468291582, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 19, "n_parameters": 1026306560}
|
21 |
+
{"train_lr": 5.1250192793111356e-05, "train_min_lr": 5.1250192793111356e-05, "train_loss": 0.6732994596102059, "train_loss_scale": 393038.51591062965, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 20, "n_parameters": 1026306560}
|
22 |
+
{"train_lr": 5.375020219765337e-05, "train_min_lr": 5.375020219765337e-05, "train_loss": 0.6698415232164425, "train_loss_scale": 231143.44572331302, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 21, "n_parameters": 1026306560}
|
23 |
+
{"train_lr": 5.625021160219539e-05, "train_min_lr": 5.625021160219539e-05, "train_loss": 0.666649928073502, "train_loss_scale": 328197.6619273302, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 22, "n_parameters": 1026306560}
|
24 |
+
{"train_lr": 5.87502210067374e-05, "train_min_lr": 5.87502210067374e-05, "train_loss": 0.663462208981618, "train_loss_scale": 287110.09523809527, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 23, "n_parameters": 1026306560}
|
25 |
+
{"train_lr": 6.125023041127941e-05, "train_min_lr": 6.125023041127941e-05, "train_loss": 0.6605473035344103, "train_loss_scale": 291133.0679304897, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 24, "n_parameters": 1026306560}
|
26 |
+
{"train_lr": 6.375023981582141e-05, "train_min_lr": 6.375023981582141e-05, "train_loss": 0.658211972196638, "train_loss_scale": 305420.53712480253, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 25, "n_parameters": 1026306560}
|
27 |
+
{"train_lr": 6.62502492203635e-05, "train_min_lr": 6.62502492203635e-05, "train_loss": 0.6556660512033905, "train_loss_scale": 276342.72714962764, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 26, "n_parameters": 1026306560}
|
28 |
+
{"train_lr": 6.875025862490549e-05, "train_min_lr": 6.875025862490549e-05, "train_loss": 0.6534831501237495, "train_loss_scale": 236231.3229519296, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 27, "n_parameters": 1026306560}
|
29 |
+
{"train_lr": 7.125026802944748e-05, "train_min_lr": 7.125026802944748e-05, "train_loss": 0.6511942085473081, "train_loss_scale": 241407.94222523132, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 28, "n_parameters": 1026306560}
|
30 |
+
{"train_lr": 7.375027740215286e-05, "train_min_lr": 7.375027740215286e-05, "train_loss": 0.649130307124094, "train_loss_scale": 233924.0297901151, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 29, "n_parameters": 1026306560}
|
31 |
+
{"train_lr": 7.499916790184181e-05, "train_min_lr": 7.499916790184181e-05, "train_loss": 0.6472014445715667, "train_loss_scale": 223570.79124351163, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 30, "n_parameters": 1026306560}
|
32 |
+
{"train_lr": 7.499417572977202e-05, "train_min_lr": 7.499417572977202e-05, "train_loss": 0.6447433810016925, "train_loss_scale": 214814.90950124126, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 31, "n_parameters": 1026306560}
|
33 |
+
{"train_lr": 7.498419206149277e-05, "train_min_lr": 7.498419206149277e-05, "train_loss": 0.6423977205407482, "train_loss_scale": 176360.02347099976, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 32, "n_parameters": 1026306560}
|
34 |
+
{"train_lr": 7.496921824863244e-05, "train_min_lr": 7.496921824863244e-05, "train_loss": 0.6406393066280665, "train_loss_scale": 183119.20920785377, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 33, "n_parameters": 1026306560}
|
35 |
+
{"train_lr": 7.494925631840661e-05, "train_min_lr": 7.494925631840661e-05, "train_loss": 0.6386543486937393, "train_loss_scale": 191638.44549763034, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 34, "n_parameters": 1026306560}
|
36 |
+
{"train_lr": 7.492430897334003e-05, "train_min_lr": 7.492430897334003e-05, "train_loss": 0.6369908800055644, "train_loss_scale": 172928.66440983975, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 35, "n_parameters": 1026306560}
|
37 |
+
{"train_lr": 7.489437959090435e-05, "train_min_lr": 7.489437959090435e-05, "train_loss": 0.635353271356153, "train_loss_scale": 144767.8555630783, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 36, "n_parameters": 1026306560}
|
38 |
+
{"train_lr": 7.485947222305898e-05, "train_min_lr": 7.485947222305898e-05, "train_loss": 0.633793860404029, "train_loss_scale": 172573.69623109908, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 37, "n_parameters": 1026306560}
|
39 |
+
{"train_lr": 7.48195915957022e-05, "train_min_lr": 7.48195915957022e-05, "train_loss": 0.6321676908036947, "train_loss_scale": 159780.05145565336, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 38, "n_parameters": 1026306560}
|
40 |
+
{"train_lr": 7.477474310803317e-05, "train_min_lr": 7.477474310803317e-05, "train_loss": 0.6309705693341995, "train_loss_scale": 143347.98284811556, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 39, "n_parameters": 1026306560}
|
41 |
+
{"train_lr": 7.472493283181877e-05, "train_min_lr": 7.472493283181877e-05, "train_loss": 0.6298111528872812, "train_loss_scale": 149101.4254118709, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 40, "n_parameters": 1026306560}
|
42 |
+
{"train_lr": 7.467016751057323e-05, "train_min_lr": 7.467016751057323e-05, "train_loss": 0.6286249667611221, "train_loss_scale": 150003.6361995035, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 41, "n_parameters": 1026306560}
|
43 |
+
{"train_lr": 7.461045455864487e-05, "train_min_lr": 7.461045455864487e-05, "train_loss": 0.6274279266948107, "train_loss_scale": 142993.01466937485, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 42, "n_parameters": 1026306560}
|
44 |
+
{"train_lr": 7.454580206021074e-05, "train_min_lr": 7.454580206021074e-05, "train_loss": 0.6263125150314615, "train_loss_scale": 128631.59377115776, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 43, "n_parameters": 1026306560}
|
45 |
+
{"train_lr": 7.447621876818487e-05, "train_min_lr": 7.447621876818487e-05, "train_loss": 0.6252130348541396, "train_loss_scale": 141425.23854660348, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 44, "n_parameters": 1026306560}
|
46 |
+
{"train_lr": 7.440171410303045e-05, "train_min_lr": 7.440171410303045e-05, "train_loss": 0.6245034699795061, "train_loss_scale": 140049.7368539833, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 45, "n_parameters": 1026306560}
|
47 |
+
{"train_lr": 7.432229815148737e-05, "train_min_lr": 7.432229815148737e-05, "train_loss": 0.6234489890844087, "train_loss_scale": 142549.304445949, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 46, "n_parameters": 1026306560}
|
48 |
+
{"train_lr": 7.423798166520288e-05, "train_min_lr": 7.423798166520288e-05, "train_loss": 0.622627822521261, "train_loss_scale": 140271.59196569622, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 47, "n_parameters": 1026306560}
|
49 |
+
{"train_lr": 7.41487760592812e-05, "train_min_lr": 7.41487760592812e-05, "train_loss": 0.621788419555697, "train_loss_scale": 144146.6612502821, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 48, "n_parameters": 1026306560}
|
50 |
+
{"train_lr": 7.405469341073167e-05, "train_min_lr": 7.405469341073167e-05, "train_loss": 0.6210306839968255, "train_loss_scale": 134947.0692845859, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 49, "n_parameters": 1026306560}
|
51 |
+
{"train_lr": 7.395574645684094e-05, "train_min_lr": 7.395574645684094e-05, "train_loss": 0.6202844455598594, "train_loss_scale": 117671.95125253893, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 50, "n_parameters": 1026306560}
|
52 |
+
{"train_lr": 7.3851948593441e-05, "train_min_lr": 7.3851948593441e-05, "train_loss": 0.6195127888799346, "train_loss_scale": 135050.6016700519, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 51, "n_parameters": 1026306560}
|
53 |
+
{"train_lr": 7.374331387310111e-05, "train_min_lr": 7.374331387310111e-05, "train_loss": 0.6187348371559531, "train_loss_scale": 121916.77905664635, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 52, "n_parameters": 1026306560}
|
54 |
+
{"train_lr": 7.36298570032238e-05, "train_min_lr": 7.36298570032238e-05, "train_loss": 0.6180127315816775, "train_loss_scale": 135361.19882645, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 53, "n_parameters": 1026306560}
|
55 |
+
{"train_lr": 7.351159334405342e-05, "train_min_lr": 7.351159334405342e-05, "train_loss": 0.617589170729884, "train_loss_scale": 139384.1715188445, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 54, "n_parameters": 1026306560}
|
56 |
+
{"train_lr": 7.338853890659644e-05, "train_min_lr": 7.338853890659644e-05, "train_loss": 0.6170523253125938, "train_loss_scale": 118736.85578876101, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 55, "n_parameters": 1026306560}
|
57 |
+
{"train_lr": 7.326071035045279e-05, "train_min_lr": 7.326071035045279e-05, "train_loss": 0.6164538830197562, "train_loss_scale": 119402.4211238998, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 56, "n_parameters": 1026306560}
|
58 |
+
{"train_lr": 7.312812498156355e-05, "train_min_lr": 7.312812498156355e-05, "train_loss": 0.6157359346353094, "train_loss_scale": 122700.66711803204, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 57, "n_parameters": 1026306560}
|
59 |
+
{"train_lr": 7.299080074986516e-05, "train_min_lr": 7.299080074986516e-05, "train_loss": 0.6153190604483999, "train_loss_scale": 124327.60460392688, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 58, "n_parameters": 1026306560}
|
60 |
+
{"train_lr": 7.284875624686109e-05, "train_min_lr": 7.284875624686109e-05, "train_loss": 0.6148390534069684, "train_loss_scale": 117043.3617693523, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 59, "n_parameters": 1026306560}
|
61 |
+
{"train_lr": 7.270201070310174e-05, "train_min_lr": 7.270201070310174e-05, "train_loss": 0.6141355500424743, "train_loss_scale": 125207.62988038818, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 60, "n_parameters": 1026306560}
|
62 |
+
{"train_lr": 7.255058398558559e-05, "train_min_lr": 7.255058398558559e-05, "train_loss": 0.6137764255987672, "train_loss_scale": 136174.66756939742, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 61, "n_parameters": 1026306560}
|
63 |
+
{"train_lr": 7.239449659506667e-05, "train_min_lr": 7.239449659506667e-05, "train_loss": 0.613379321531255, "train_loss_scale": 137017.71699390656, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 62, "n_parameters": 1026306560}
|
64 |
+
{"train_lr": 7.22337696632786e-05, "train_min_lr": 7.22337696632786e-05, "train_loss": 0.6130003251347214, "train_loss_scale": 121872.40803430376, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 63, "n_parameters": 1026306560}
|
65 |
+
{"train_lr": 7.206842495007457e-05, "train_min_lr": 7.206842495007457e-05, "train_loss": 0.6124832240822178, "train_loss_scale": 133556.77725118483, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 64, "n_parameters": 1026306560}
|
66 |
+
{"train_lr": 7.189848484048287e-05, "train_min_lr": 7.189848484048287e-05, "train_loss": 0.6120998601060713, "train_loss_scale": 122168.21484992102, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 65, "n_parameters": 1026306560}
|
67 |
+
{"train_lr": 7.172397234167354e-05, "train_min_lr": 7.172397234167354e-05, "train_loss": 0.6114478414431399, "train_loss_scale": 107266.94651320243, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 66, "n_parameters": 1026306560}
|
68 |
+
{"train_lr": 7.154491107984527e-05, "train_min_lr": 7.154491107984527e-05, "train_loss": 0.6111840181909393, "train_loss_scale": 86161.1302189122, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 67, "n_parameters": 1026306560}
|
69 |
+
{"train_lr": 7.136132529702747e-05, "train_min_lr": 7.136132529702747e-05, "train_loss": 0.6106718176233058, "train_loss_scale": 114033.52742044686, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 68, "n_parameters": 1026306560}
|
70 |
+
{"train_lr": 7.11732398477951e-05, "train_min_lr": 7.11732398477951e-05, "train_loss": 0.6104013966924011, "train_loss_scale": 118648.11374407583, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 69, "n_parameters": 1026306560}
|
71 |
+
{"train_lr": 7.098068019590814e-05, "train_min_lr": 7.098068019590814e-05, "train_loss": 0.6100851445351846, "train_loss_scale": 101491.31843827579, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 70, "n_parameters": 1026306560}
|
72 |
+
{"train_lr": 7.078367241086072e-05, "train_min_lr": 7.078367241086072e-05, "train_loss": 0.6098171420289442, "train_loss_scale": 118263.56488377342, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 71, "n_parameters": 1026306560}
|
73 |
+
{"train_lr": 7.05822431643538e-05, "train_min_lr": 7.05822431643538e-05, "train_loss": 0.6093425022959844, "train_loss_scale": 101668.80252764613, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 72, "n_parameters": 1026306560}
|
74 |
+
{"train_lr": 7.037641972668209e-05, "train_min_lr": 7.037641972668209e-05, "train_loss": 0.6090085521606042, "train_loss_scale": 103399.272399007, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 73, "n_parameters": 1026306560}
|
75 |
+
{"train_lr": 7.016622996304456e-05, "train_min_lr": 7.016622996304456e-05, "train_loss": 0.6085527797930129, "train_loss_scale": 86700.97765741368, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 74, "n_parameters": 1026306560}
|
76 |
+
{"train_lr": 6.995170232977089e-05, "train_min_lr": 6.995170232977089e-05, "train_loss": 0.6083635239561553, "train_loss_scale": 101846.28661701648, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 75, "n_parameters": 1026306560}
|
77 |
+
{"train_lr": 6.973286587046871e-05, "train_min_lr": 6.973286587046871e-05, "train_loss": 0.6079267779719552, "train_loss_scale": 133260.97043556758, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 76, "n_parameters": 1026306560}
|
78 |
+
{"train_lr": 6.950975021209159e-05, "train_min_lr": 6.950975021209159e-05, "train_loss": 0.6077145715106445, "train_loss_scale": 119313.67907921462, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 77, "n_parameters": 1026306560}
|
79 |
+
{"train_lr": 6.92823855609272e-05, "train_min_lr": 6.92823855609272e-05, "train_loss": 0.6073064969171993, "train_loss_scale": 109640.79620853081, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 78, "n_parameters": 1026306560}
|
80 |
+
{"train_lr": 6.905080269851075e-05, "train_min_lr": 6.905080269851075e-05, "train_loss": 0.6070229011011686, "train_loss_scale": 109699.95757165426, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 79, "n_parameters": 1026306560}
|
81 |
+
{"train_lr": 6.881503297745486e-05, "train_min_lr": 6.881503297745486e-05, "train_loss": 0.6069172320572591, "train_loss_scale": 103014.72353870458, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 80, "n_parameters": 1026306560}
|
82 |
+
{"train_lr": 6.857510831720544e-05, "train_min_lr": 6.857510831720544e-05, "train_loss": 0.6065330217466098, "train_loss_scale": 116289.05438952832, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 81, "n_parameters": 1026306560}
|
83 |
+
{"train_lr": 6.833106119972002e-05, "train_min_lr": 6.833106119972002e-05, "train_loss": 0.6060801456240789, "train_loss_scale": 88009.92281651997, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 82, "n_parameters": 1026306560}
|
84 |
+
{"train_lr": 6.808292466507247e-05, "train_min_lr": 6.808292466507247e-05, "train_loss": 0.6059558670656746, "train_loss_scale": 111681.86323628978, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 83, "n_parameters": 1026306560}
|
85 |
+
{"train_lr": 6.783073230697757e-05, "train_min_lr": 6.783073230697757e-05, "train_loss": 0.6055275329399817, "train_loss_scale": 122700.66711803204, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 84, "n_parameters": 1026306560}
|
86 |
+
{"train_lr": 6.757451826824424e-05, "train_min_lr": 6.757451826824424e-05, "train_loss": 0.605460460853278, "train_loss_scale": 107340.89821710675, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 85, "n_parameters": 1026306560}
|
87 |
+
{"train_lr": 6.731431723615042e-05, "train_min_lr": 6.731431723615042e-05, "train_loss": 0.6051472165283212, "train_loss_scale": 112961.22771383435, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 86, "n_parameters": 1026306560}
|
88 |
+
{"train_lr": 6.705016443775172e-05, "train_min_lr": 6.705016443775172e-05, "train_loss": 0.6049617066053761, "train_loss_scale": 102689.33604152562, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 87, "n_parameters": 1026306560}
|
89 |
+
{"train_lr": 6.678209563510869e-05, "train_min_lr": 6.678209563510869e-05, "train_loss": 0.6046343731921552, "train_loss_scale": 99117.46874294741, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 88, "n_parameters": 1026306560}
|
90 |
+
{"train_lr": 6.651014712044584e-05, "train_min_lr": 6.651014712044584e-05, "train_loss": 0.6043806737534747, "train_loss_scale": 89415.00519070188, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 89, "n_parameters": 1026306560}
|
91 |
+
{"train_lr": 6.623435571123876e-05, "train_min_lr": 6.623435571123876e-05, "train_loss": 0.6040043314762079, "train_loss_scale": 105388.57323403296, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 90, "n_parameters": 1026306560}
|
92 |
+
{"train_lr": 6.595475874522911e-05, "train_min_lr": 6.595475874522911e-05, "train_loss": 0.604000394195789, "train_loss_scale": 100515.15594673889, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 91, "n_parameters": 1026306560}
|
93 |
+
{"train_lr": 6.567139407537131e-05, "train_min_lr": 6.567139407537131e-05, "train_loss": 0.6038487757827488, "train_loss_scale": 84356.70864364703, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 92, "n_parameters": 1026306560}
|
94 |
+
{"train_lr": 6.538430006470527e-05, "train_min_lr": 6.538430006470527e-05, "train_loss": 0.6035786392711585, "train_loss_scale": 98718.12954186414, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 93, "n_parameters": 1026306560}
|
95 |
+
{"train_lr": 6.509351558116422e-05, "train_min_lr": 6.509351558116422e-05, "train_loss": 0.603139638340408, "train_loss_scale": 90583.44211238998, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 94, "n_parameters": 1026306560}
|
96 |
+
{"train_lr": 6.479907999231311e-05, "train_min_lr": 6.479907999231311e-05, "train_loss": 0.603064857937414, "train_loss_scale": 103709.8695554051, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 95, "n_parameters": 1026306560}
|
97 |
+
{"train_lr": 6.450103316001656e-05, "train_min_lr": 6.450103316001656e-05, "train_loss": 0.6027328364283714, "train_loss_scale": 92432.23470999775, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 96, "n_parameters": 1026306560}
|
98 |
+
{"train_lr": 6.419941543504506e-05, "train_min_lr": 6.419941543504506e-05, "train_loss": 0.6025389535161321, "train_loss_scale": 110713.0959151433, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 97, "n_parameters": 1026306560}
|
99 |
+
{"train_lr": 6.389426765161061e-05, "train_min_lr": 6.389426765161061e-05, "train_loss": 0.6023575114571794, "train_loss_scale": 107732.84224779959, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 98, "n_parameters": 1026306560}
|
100 |
+
{"train_lr": 6.35856311218386e-05, "train_min_lr": 6.35856311218386e-05, "train_loss": 0.6023305862451771, "train_loss_scale": 92587.53328819679, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 99, "n_parameters": 1026306560}
|
101 |
+
{"train_lr": 6.327354763017428e-05, "train_min_lr": 6.327354763017428e-05, "train_loss": 0.6019235198312239, "train_loss_scale": 108782.95644324081, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 100, "n_parameters": 1026306560}
|
102 |
+
{"train_lr": 6.295805942772681e-05, "train_min_lr": 6.295805942772681e-05, "train_loss": 0.6018361111283571, "train_loss_scale": 82271.27059354547, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 101, "n_parameters": 1026306560}
|
103 |
+
{"train_lr": 6.263920922654842e-05, "train_min_lr": 6.263920922654842e-05, "train_loss": 0.6016874968412027, "train_loss_scale": 97949.03182125931, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 102, "n_parameters": 1026306560}
|
104 |
+
{"train_lr": 6.23170401938531e-05, "train_min_lr": 6.23170401938531e-05, "train_loss": 0.601280662130049, "train_loss_scale": 129463.55044008125, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 103, "n_parameters": 1026306560}
|
105 |
+
{"train_lr": 6.199159594616967e-05, "train_min_lr": 6.199159594616967e-05, "train_loss": 0.6011942937706171, "train_loss_scale": 59719.698487925976, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 104, "n_parameters": 1026306560}
|
106 |
+
{"train_lr": 6.16629205434404e-05, "train_min_lr": 6.16629205434404e-05, "train_loss": 0.6009253050540928, "train_loss_scale": 79357.57345971564, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 105, "n_parameters": 1026306560}
|
107 |
+
{"train_lr": 6.133105848305322e-05, "train_min_lr": 6.133105848305322e-05, "train_loss": 0.6007978073984411, "train_loss_scale": 115867.52967727375, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 106, "n_parameters": 1026306560}
|
108 |
+
{"train_lr": 6.099605469381926e-05, "train_min_lr": 6.099605469381926e-05, "train_loss": 0.6006083048827975, "train_loss_scale": 86153.73504852178, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 107, "n_parameters": 1026306560}
|
109 |
+
{"train_lr": 6.065795452988859e-05, "train_min_lr": 6.065795452988859e-05, "train_loss": 0.6001439839353501, "train_loss_scale": 87573.60776348454, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 108, "n_parameters": 1026306560}
|
110 |
+
{"train_lr": 6.031680376461133e-05, "train_min_lr": 6.031680376461133e-05, "train_loss": 0.5999472757162035, "train_loss_scale": 86723.16316858497, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 109, "n_parameters": 1026306560}
|
111 |
+
{"train_lr": 5.997264858434042e-05, "train_min_lr": 5.997264858434042e-05, "train_loss": 0.5998410423328506, "train_loss_scale": 80511.22004062288, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 110, "n_parameters": 1026306560}
|
112 |
+
{"train_lr": 5.96255355821777e-05, "train_min_lr": 5.96255355821777e-05, "train_loss": 0.5996045014282714, "train_loss_scale": 91515.2335815843, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 111, "n_parameters": 1026306560}
|
113 |
+
{"train_lr": 5.927551175166758e-05, "train_min_lr": 5.927551175166758e-05, "train_loss": 0.5994099347096141, "train_loss_scale": 75378.97178966373, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 112, "n_parameters": 1026306560}
|
114 |
+
{"train_lr": 5.892262448043388e-05, "train_min_lr": 5.892262448043388e-05, "train_loss": 0.5992172665476186, "train_loss_scale": 89851.32024373731, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 113, "n_parameters": 1026306560}
|
115 |
+
{"train_lr": 5.856692154376355e-05, "train_min_lr": 5.856692154376355e-05, "train_loss": 0.5991833375906015, "train_loss_scale": 80378.10697359513, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 114, "n_parameters": 1026306560}
|
116 |
+
{"train_lr": 5.820845109814128e-05, "train_min_lr": 5.820845109814128e-05, "train_loss": 0.5988797152533041, "train_loss_scale": 79912.21123899797, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 115, "n_parameters": 1026306560}
|
117 |
+
{"train_lr": 5.78472616747263e-05, "train_min_lr": 5.78472616747263e-05, "train_loss": 0.5988850613609982, "train_loss_scale": 90820.08756488377, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 116, "n_parameters": 1026306560}
|
118 |
+
{"train_lr": 5.7483402172785083e-05, "train_min_lr": 5.7483402172785083e-05, "train_loss": 0.5983857440587139, "train_loss_scale": 103746.84540735725, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 117, "n_parameters": 1026306560}
|
119 |
+
{"train_lr": 5.7116921853069717e-05, "train_min_lr": 5.7116921853069717e-05, "train_loss": 0.5984377281093364, "train_loss_scale": 87078.13134732566, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 118, "n_parameters": 1026306560}
|
120 |
+
{"train_lr": 5.674787033114971e-05, "train_min_lr": 5.674787033114971e-05, "train_loss": 0.5981278778096908, "train_loss_scale": 68153.8903182126, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 119, "n_parameters": 1026306560}
|
121 |
+
{"train_lr": 5.6376297570692395e-05, "train_min_lr": 5.6376297570692395e-05, "train_loss": 0.5980306941174311, "train_loss_scale": 75060.9794628752, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 120, "n_parameters": 1026306560}
|
122 |
+
{"train_lr": 5.6002253876702e-05, "train_min_lr": 5.6002253876702e-05, "train_loss": 0.5979609417045353, "train_loss_scale": 84282.75693974273, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 121, "n_parameters": 1026306560}
|
123 |
+
{"train_lr": 5.562578988870761e-05, "train_min_lr": 5.562578988870761e-05, "train_loss": 0.5975358468827, "train_loss_scale": 70705.2241029113, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 122, "n_parameters": 1026306560}
|
124 |
+
{"train_lr": 5.524695657390639e-05, "train_min_lr": 5.524695657390639e-05, "train_loss": 0.597441437892335, "train_loss_scale": 81383.85014669375, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 123, "n_parameters": 1026306560}
|
125 |
+
{"train_lr": 5.486580522026538e-05, "train_min_lr": 5.486580522026538e-05, "train_loss": 0.5973102816913796, "train_loss_scale": 77686.26495147822, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 124, "n_parameters": 1026306560}
|
126 |
+
{"train_lr": 5.448238742957625e-05, "train_min_lr": 5.448238742957625e-05, "train_loss": 0.5971544652519808, "train_loss_scale": 92912.92078537577, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 125, "n_parameters": 1026306560}
|
127 |
+
{"train_lr": 5.4096755110470685e-05, "train_min_lr": 5.4096755110470685e-05, "train_loss": 0.5969509546628381, "train_loss_scale": 68590.20537124803, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 126, "n_parameters": 1026306560}
|
128 |
+
{"train_lr": 5.37089604713914e-05, "train_min_lr": 5.37089604713914e-05, "train_loss": 0.5967895107684711, "train_loss_scale": 97002.45001128413, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 127, "n_parameters": 1026306560}
|
129 |
+
{"train_lr": 5.331905601352417e-05, "train_min_lr": 5.331905601352417e-05, "train_loss": 0.5964229348740078, "train_loss_scale": 53748.0983976529, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 128, "n_parameters": 1026306560}
|
130 |
+
{"train_lr": 5.2927094523692716e-05, "train_min_lr": 5.2927094523692716e-05, "train_loss": 0.5963374844984926, "train_loss_scale": 78277.8785827127, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 129, "n_parameters": 1026306560}
|
131 |
+
{"train_lr": 5.253312906720734e-05, "train_min_lr": 5.253312906720734e-05, "train_loss": 0.5961311220735164, "train_loss_scale": 69255.77070638681, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 130, "n_parameters": 1026306560}
|
132 |
+
{"train_lr": 5.213721298068476e-05, "train_min_lr": 5.213721298068476e-05, "train_loss": 0.595988328071974, "train_loss_scale": 66534.3480027082, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 131, "n_parameters": 1026306560}
|
133 |
+
{"train_lr": 5.173939986482581e-05, "train_min_lr": 5.173939986482581e-05, "train_loss": 0.5958522532237818, "train_loss_scale": 91108.49921011059, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 132, "n_parameters": 1026306560}
|
134 |
+
{"train_lr": 5.133974357715768e-05, "train_min_lr": 5.133974357715768e-05, "train_loss": 0.5957016101141892, "train_loss_scale": 80355.92146242384, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 133, "n_parameters": 1026306560}
|
135 |
+
{"train_lr": 5.09382982247444e-05, "train_min_lr": 5.09382982247444e-05, "train_loss": 0.5954422524618836, "train_loss_scale": 62145.314375987364, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 134, "n_parameters": 1026306560}
|
136 |
+
{"train_lr": 5.0535118156860696e-05, "train_min_lr": 5.0535118156860696e-05, "train_loss": 0.5953326662247813, "train_loss_scale": 82252.7826675694, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 135, "n_parameters": 1026306560}
|
137 |
+
{"train_lr": 5.013025795763425e-05, "train_min_lr": 5.013025795763425e-05, "train_loss": 0.5950071495253348, "train_loss_scale": 85717.41999548634, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 136, "n_parameters": 1026306560}
|
138 |
+
{"train_lr": 4.972377243865433e-05, "train_min_lr": 4.972377243865433e-05, "train_loss": 0.5949794842862224, "train_loss_scale": 78296.36650868878, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 137, "n_parameters": 1026306560}
|
139 |
+
{"train_lr": 4.931571663155425e-05, "train_min_lr": 4.931571663155425e-05, "train_loss": 0.5947251868854827, "train_loss_scale": 59327.75445723313, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 138, "n_parameters": 1026306560}
|
140 |
+
{"train_lr": 4.8906145780557814e-05, "train_min_lr": 4.8906145780557814e-05, "train_loss": 0.5943049080299957, "train_loss_scale": 62644.488377341455, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 139, "n_parameters": 1026306560}
|
141 |
+
{"train_lr": 4.8495115335001866e-05, "train_min_lr": 4.8495115335001866e-05, "train_loss": 0.5943719290244818, "train_loss_scale": 55781.77025502144, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 140, "n_parameters": 1026306560}
|
142 |
+
{"train_lr": 4.808268094182962e-05, "train_min_lr": 4.808268094182962e-05, "train_loss": 0.5942285139970372, "train_loss_scale": 67961.61588806138, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 141, "n_parameters": 1026306560}
|
143 |
+
{"train_lr": 4.7668898438055796e-05, "train_min_lr": 4.7668898438055796e-05, "train_loss": 0.5938569835051507, "train_loss_scale": 68312.88648160687, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 142, "n_parameters": 1026306560}
|
144 |
+
{"train_lr": 4.725382384320728e-05, "train_min_lr": 4.725382384320728e-05, "train_loss": 0.5936949138540738, "train_loss_scale": 76159.1622658542, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 143, "n_parameters": 1026306560}
|
145 |
+
{"train_lr": 4.683751335174056e-05, "train_min_lr": 4.683751335174056e-05, "train_loss": 0.593565150072638, "train_loss_scale": 50686.497856014445, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 144, "n_parameters": 1026306560}
|
146 |
+
{"train_lr": 4.642002332543183e-05, "train_min_lr": 4.642002332543183e-05, "train_loss": 0.5932443948499178, "train_loss_scale": 58114.946513202434, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 145, "n_parameters": 1026306560}
|
147 |
+
{"train_lr": 4.6001410285747267e-05, "train_min_lr": 4.6001410285747267e-05, "train_loss": 0.5933170156837976, "train_loss_scale": 28713.597833446176, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 146, "n_parameters": 1026306560}
|
148 |
+
{"train_lr": 4.558173090619213e-05, "train_min_lr": 4.558173090619213e-05, "train_loss": 0.5929519663810986, "train_loss_scale": 47260.68517264726, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 147, "n_parameters": 1026306560}
|
149 |
+
{"train_lr": 4.5161042004636365e-05, "train_min_lr": 4.5161042004636365e-05, "train_loss": 0.5928061019622514, "train_loss_scale": 45866.695554051, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 148, "n_parameters": 1026306560}
|
150 |
+
{"train_lr": 4.4739400535622294e-05, "train_min_lr": 4.4739400535622294e-05, "train_loss": 0.5924653780901574, "train_loss_scale": 59484.90182802979, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 149, "n_parameters": 1026306560}
|
151 |
+
{"train_lr": 4.431686358265622e-05, "train_min_lr": 4.431686358265622e-05, "train_loss": 0.5920882878761593, "train_loss_scale": 79786.49334236064, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 150, "n_parameters": 1026306560}
|
152 |
+
{"train_lr": 4.389348835047726e-05, "train_min_lr": 4.389348835047726e-05, "train_loss": 0.59220339596327, "train_loss_scale": 31074.50598059129, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 151, "n_parameters": 1026306560}
|
153 |
+
{"train_lr": 4.3469332157314837e-05, "train_min_lr": 4.3469332157314837e-05, "train_loss": 0.5916082916196576, "train_loss_scale": 72032.65718799368, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 152, "n_parameters": 1026306560}
|
154 |
+
{"train_lr": 4.304445242712796e-05, "train_min_lr": 4.304445242712796e-05, "train_loss": 0.5914600417967966, "train_loss_scale": 74909.37846987136, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 153, "n_parameters": 1026306560}
|
155 |
+
{"train_lr": 4.261890668183077e-05, "train_min_lr": 4.261890668183077e-05, "train_loss": 0.5914092102098333, "train_loss_scale": 64959.17670954638, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 154, "n_parameters": 1026306560}
|
156 |
+
{"train_lr": 4.2192752533504904e-05, "train_min_lr": 4.2192752533504904e-05, "train_loss": 0.5913460533876796, "train_loss_scale": 51603.4989844279, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 155, "n_parameters": 1026306560}
|
157 |
+
{"train_lr": 4.176604767660082e-05, "train_min_lr": 4.176604767660082e-05, "train_loss": 0.591141634038308, "train_loss_scale": 68142.79756262695, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 156, "n_parameters": 1026306560}
|
158 |
+
{"train_lr": 4.133884988012558e-05, "train_min_lr": 4.133884988012558e-05, "train_loss": 0.5906659207184686, "train_loss_scale": 76381.01737756714, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 157, "n_parameters": 1026306560}
|
159 |
+
{"train_lr": 4.091121697982231e-05, "train_min_lr": 4.091121697982231e-05, "train_loss": 0.5907148990037747, "train_loss_scale": 51766.19273301738, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 158, "n_parameters": 1026306560}
|
160 |
+
{"train_lr": 4.048320687034053e-05, "train_min_lr": 4.048320687034053e-05, "train_loss": 0.5906326025900965, "train_loss_scale": 64123.52245542767, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 159, "n_parameters": 1026306560}
|
161 |
+
{"train_lr": 4.005487749739718e-05, "train_min_lr": 4.005487749739718e-05, "train_loss": 0.5903009851127896, "train_loss_scale": 68627.18122320018, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 160, "n_parameters": 1026306560}
|
162 |
+
{"train_lr": 3.9626286849933505e-05, "train_min_lr": 3.9626286849933505e-05, "train_loss": 0.5900566746789523, "train_loss_scale": 70098.82013089595, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 161, "n_parameters": 1026306560}
|
163 |
+
{"train_lr": 3.919749295226199e-05, "train_min_lr": 3.919749295226199e-05, "train_loss": 0.5897584896802875, "train_loss_scale": 66175.6822387723, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 162, "n_parameters": 1026306560}
|
164 |
+
{"train_lr": 3.876855385621322e-05, "train_min_lr": 3.876855385621322e-05, "train_loss": 0.589521601042853, "train_loss_scale": 59704.90814714511, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 163, "n_parameters": 1026306560}
|
165 |
+
{"train_lr": 3.833952763327343e-05, "train_min_lr": 3.833952763327343e-05, "train_loss": 0.5892979089394329, "train_loss_scale": 55108.80974949221, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 164, "n_parameters": 1026306560}
|
166 |
+
{"train_lr": 3.791047236672662e-05, "train_min_lr": 3.791047236672662e-05, "train_loss": 0.589195355771711, "train_loss_scale": 61672.023470999775, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 165, "n_parameters": 1026306560}
|
167 |
+
{"train_lr": 3.748144614378688e-05, "train_min_lr": 3.748144614378688e-05, "train_loss": 0.5889602180707404, "train_loss_scale": 35626.233355901604, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 166, "n_parameters": 1026306560}
|
168 |
+
{"train_lr": 3.7052507047737866e-05, "train_min_lr": 3.7052507047737866e-05, "train_loss": 0.5887365167705362, "train_loss_scale": 48486.434664861205, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 167, "n_parameters": 1026306560}
|
169 |
+
{"train_lr": 3.662371315006658e-05, "train_min_lr": 3.662371315006658e-05, "train_loss": 0.5884693904650168, "train_loss_scale": 61272.6842699165, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 168, "n_parameters": 1026306560}
|
170 |
+
{"train_lr": 3.6195122502602736e-05, "train_min_lr": 3.6195122502602736e-05, "train_loss": 0.5882739601406677, "train_loss_scale": 42328.10652222975, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 169, "n_parameters": 1026306560}
|
171 |
+
{"train_lr": 3.5766793129659425e-05, "train_min_lr": 3.5766793129659425e-05, "train_loss": 0.5880203669551526, "train_loss_scale": 45652.2356127285, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 170, "n_parameters": 1026306560}
|
172 |
+
{"train_lr": 3.533878302017769e-05, "train_min_lr": 3.533878302017769e-05, "train_loss": 0.5877934420484267, "train_loss_scale": 51359.45836154367, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 171, "n_parameters": 1026306560}
|
173 |
+
{"train_lr": 3.491115011987432e-05, "train_min_lr": 3.491115011987432e-05, "train_loss": 0.5876199070076706, "train_loss_scale": 45010.70458135861, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 172, "n_parameters": 1026306560}
|
174 |
+
{"train_lr": 3.44839523233991e-05, "train_min_lr": 3.44839523233991e-05, "train_loss": 0.5875196714539361, "train_loss_scale": 50845.49401940871, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 173, "n_parameters": 1026306560}
|
175 |
+
{"train_lr": 3.405724746649512e-05, "train_min_lr": 3.405724746649512e-05, "train_loss": 0.587472762553939, "train_loss_scale": 60603.42134958249, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 174, "n_parameters": 1026306560}
|
176 |
+
{"train_lr": 3.363109331816943e-05, "train_min_lr": 3.363109331816943e-05, "train_loss": 0.5870919030771766, "train_loss_scale": 39978.29113067028, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 175, "n_parameters": 1026306560}
|
177 |
+
{"train_lr": 3.3205547572872134e-05, "train_min_lr": 3.3205547572872134e-05, "train_loss": 0.5867940734187446, "train_loss_scale": 52498.31460167005, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 176, "n_parameters": 1026306560}
|
178 |
+
{"train_lr": 3.278066784268516e-05, "train_min_lr": 3.278066784268516e-05, "train_loss": 0.5864321276057058, "train_loss_scale": 34187.87271496276, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 177, "n_parameters": 1026306560}
|
179 |
+
{"train_lr": 3.235651164952276e-05, "train_min_lr": 3.235651164952276e-05, "train_loss": 0.5862604448993429, "train_loss_scale": 32960.27443015121, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 178, "n_parameters": 1026306560}
|
180 |
+
{"train_lr": 3.193313641734391e-05, "train_min_lr": 3.193313641734391e-05, "train_loss": 0.5862641082687757, "train_loss_scale": 53230.436470322726, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 179, "n_parameters": 1026306560}
|
181 |
+
{"train_lr": 3.151059946437771e-05, "train_min_lr": 3.151059946437771e-05, "train_loss": 0.5859810572812854, "train_loss_scale": 31137.364928909952, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 180, "n_parameters": 1026306560}
|
182 |
+
{"train_lr": 3.108895799536375e-05, "train_min_lr": 3.108895799536375e-05, "train_loss": 0.5857518626256342, "train_loss_scale": 31516.367411419546, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 181, "n_parameters": 1026306560}
|
183 |
+
{"train_lr": 3.0668269093807795e-05, "train_min_lr": 3.0668269093807795e-05, "train_loss": 0.5853225959999194, "train_loss_scale": 46221.6637327917, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 182, "n_parameters": 1026306560}
|
184 |
+
{"train_lr": 3.02485897142525e-05, "train_min_lr": 3.02485897142525e-05, "train_loss": 0.5853345162504024, "train_loss_scale": 52076.78988941548, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 183, "n_parameters": 1026306560}
|
185 |
+
{"train_lr": 2.9829976674568354e-05, "train_min_lr": 2.9829976674568354e-05, "train_loss": 0.5849322937504617, "train_loss_scale": 51233.74046490634, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 184, "n_parameters": 1026306560}
|
186 |
+
{"train_lr": 2.9412486648259368e-05, "train_min_lr": 2.9412486648259368e-05, "train_loss": 0.5846072752596199, "train_loss_scale": 48708.289776574136, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 185, "n_parameters": 1026306560}
|
187 |
+
{"train_lr": 2.8996176156792646e-05, "train_min_lr": 2.8996176156792646e-05, "train_loss": 0.5845250091489208, "train_loss_scale": 38532.535319341005, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 186, "n_parameters": 1026306560}
|
188 |
+
{"train_lr": 2.858110156194436e-05, "train_min_lr": 2.858110156194436e-05, "train_loss": 0.5843621949454855, "train_loss_scale": 46238.30286617017, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 187, "n_parameters": 1026306560}
|
189 |
+
{"train_lr": 2.8167319058170417e-05, "train_min_lr": 2.8167319058170417e-05, "train_loss": 0.5840681368479629, "train_loss_scale": 45702.15301286391, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 188, "n_parameters": 1026306560}
|
190 |
+
{"train_lr": 2.7754884664998124e-05, "train_min_lr": 2.7754884664998124e-05, "train_loss": 0.5839610929302224, "train_loss_scale": 45280.62830060934, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 189, "n_parameters": 1026306560}
|
191 |
+
{"train_lr": 2.7343854219442267e-05, "train_min_lr": 2.7343854219442267e-05, "train_loss": 0.5834495788185786, "train_loss_scale": 51655.26517716091, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 190, "n_parameters": 1026306560}
|
192 |
+
{"train_lr": 2.6934283368445713e-05, "train_min_lr": 2.6934283368445713e-05, "train_loss": 0.5831251668118914, "train_loss_scale": 52365.20153464229, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 191, "n_parameters": 1026306560}
|
193 |
+
{"train_lr": 2.6526227561345476e-05, "train_min_lr": 2.6526227561345476e-05, "train_loss": 0.5828823376908525, "train_loss_scale": 49813.86774994358, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 192, "n_parameters": 1026306560}
|
194 |
+
{"train_lr": 2.611974204236587e-05, "train_min_lr": 2.611974204236587e-05, "train_loss": 0.5825204395675828, "train_loss_scale": 30309.105845181675, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 193, "n_parameters": 1026306560}
|
195 |
+
{"train_lr": 2.571488184313907e-05, "train_min_lr": 2.571488184313907e-05, "train_loss": 0.5822930808314228, "train_loss_scale": 44973.72872940645, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 194, "n_parameters": 1026306560}
|
196 |
+
{"train_lr": 2.5311701775255622e-05, "train_min_lr": 2.5311701775255622e-05, "train_loss": 0.5819989177981643, "train_loss_scale": 53725.91288648161, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 195, "n_parameters": 1026306560}
|
197 |
+
{"train_lr": 2.4910256422842307e-05, "train_min_lr": 2.4910256422842307e-05, "train_loss": 0.5820542896694845, "train_loss_scale": 41749.43443917851, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 196, "n_parameters": 1026306560}
|
198 |
+
{"train_lr": 2.4510600135174328e-05, "train_min_lr": 2.4510600135174328e-05, "train_loss": 0.5816762664064654, "train_loss_scale": 31795.535093658316, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 197, "n_parameters": 1026306560}
|
199 |
+
{"train_lr": 2.411278701931504e-05, "train_min_lr": 2.411278701931504e-05, "train_loss": 0.5815300073670536, "train_loss_scale": 54188.11103588355, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 198, "n_parameters": 1026306560}
|
200 |
+
{"train_lr": 2.371687093279259e-05, "train_min_lr": 2.371687093279259e-05, "train_loss": 0.5812337068599991, "train_loss_scale": 53947.76799819454, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 199, "n_parameters": 1026306560}
|
201 |
+
{"train_lr": 2.332290547630717e-05, "train_min_lr": 2.332290547630717e-05, "train_loss": 0.5808459423710257, "train_loss_scale": 31207.619047619046, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 200, "n_parameters": 1026306560}
|
202 |
+
{"train_lr": 2.293094398647569e-05, "train_min_lr": 2.293094398647569e-05, "train_loss": 0.58082645126923, "train_loss_scale": 31092.993906567368, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 201, "n_parameters": 1026306560}
|
203 |
+
{"train_lr": 2.2541039528608895e-05, "train_min_lr": 2.2541039528608895e-05, "train_loss": 0.5805983491564773, "train_loss_scale": 31573.679981945384, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 202, "n_parameters": 1026306560}
|
204 |
+
{"train_lr": 2.215324488952953e-05, "train_min_lr": 2.215324488952953e-05, "train_loss": 0.5799055350858224, "train_loss_scale": 52317.13292710449, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 203, "n_parameters": 1026306560}
|
205 |
+
{"train_lr": 2.176761257042369e-05, "train_min_lr": 2.176761257042369e-05, "train_loss": 0.5795836508907991, "train_loss_scale": 46678.31550440081, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 204, "n_parameters": 1026306560}
|
206 |
+
{"train_lr": 2.1384194779734656e-05, "train_min_lr": 2.1384194779734656e-05, "train_loss": 0.5795155670851153, "train_loss_scale": 24160.021665538254, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 205, "n_parameters": 1026306560}
|
207 |
+
{"train_lr": 2.1003043426093622e-05, "train_min_lr": 2.1003043426093622e-05, "train_loss": 0.5792308295558851, "train_loss_scale": 45369.37034529452, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 206, "n_parameters": 1026306560}
|
208 |
+
{"train_lr": 2.0624210111292464e-05, "train_min_lr": 2.0624210111292464e-05, "train_loss": 0.5788321269407475, "train_loss_scale": 50797.42541187091, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 207, "n_parameters": 1026306560}
|
209 |
+
{"train_lr": 2.024774612329793e-05, "train_min_lr": 2.024774612329793e-05, "train_loss": 0.5787848889847906, "train_loss_scale": 49181.580681561725, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 208, "n_parameters": 1026306560}
|
210 |
+
{"train_lr": 1.9873702429307768e-05, "train_min_lr": 1.9873702429307768e-05, "train_loss": 0.5783235678435401, "train_loss_scale": 37548.97765741368, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 209, "n_parameters": 1026306560}
|
211 |
+
{"train_lr": 1.950212966885046e-05, "train_min_lr": 1.950212966885046e-05, "train_loss": 0.5780911951289472, "train_loss_scale": 27214.22703678628, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 210, "n_parameters": 1026306560}
|
212 |
+
{"train_lr": 1.9133078146930057e-05, "train_min_lr": 1.9133078146930057e-05, "train_loss": 0.5777318784817834, "train_loss_scale": 49433.01647483638, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 211, "n_parameters": 1026306560}
|
213 |
+
{"train_lr": 1.8766597827214795e-05, "train_min_lr": 1.8766597827214795e-05, "train_loss": 0.577519324562117, "train_loss_scale": 32315.045813586097, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 212, "n_parameters": 1026306560}
|
214 |
+
{"train_lr": 1.8402738325273886e-05, "train_min_lr": 1.8402738325273886e-05, "train_loss": 0.5771671939171267, "train_loss_scale": 35801.86865267434, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 213, "n_parameters": 1026306560}
|
215 |
+
{"train_lr": 1.8041548901858857e-05, "train_min_lr": 1.8041548901858857e-05, "train_loss": 0.5770295762938904, "train_loss_scale": 39390.37508463101, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 214, "n_parameters": 1026306560}
|
216 |
+
{"train_lr": 1.768307845623634e-05, "train_min_lr": 1.768307845623634e-05, "train_loss": 0.5766733927684609, "train_loss_scale": 36491.46829158204, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 215, "n_parameters": 1026306560}
|
217 |
+
{"train_lr": 1.7327375519566257e-05, "train_min_lr": 1.7327375519566257e-05, "train_loss": 0.5765396847869487, "train_loss_scale": 21675.24441435342, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 216, "n_parameters": 1026306560}
|
218 |
+
{"train_lr": 1.697448824833228e-05, "train_min_lr": 1.697448824833228e-05, "train_loss": 0.5763489342767756, "train_loss_scale": 40348.04965019183, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 217, "n_parameters": 1026306560}
|
219 |
+
{"train_lr": 1.6624464417822198e-05, "train_min_lr": 1.6624464417822198e-05, "train_loss": 0.5758959692206105, "train_loss_scale": 34191.570300157975, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 218, "n_parameters": 1026306560}
|
220 |
+
{"train_lr": 1.6277351415659674e-05, "train_min_lr": 1.6277351415659674e-05, "train_loss": 0.5758488731481063, "train_loss_scale": 23027.636199503497, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 219, "n_parameters": 1026306560}
|
221 |
+
{"train_lr": 1.5933196235388516e-05, "train_min_lr": 1.5933196235388516e-05, "train_loss": 0.5752776562721983, "train_loss_scale": 29322.774994357933, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 220, "n_parameters": 1026306560}
|
222 |
+
{"train_lr": 1.5592045470111352e-05, "train_min_lr": 1.5592045470111352e-05, "train_loss": 0.5750558579303164, "train_loss_scale": 39371.88715865493, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 221, "n_parameters": 1026306560}
|
223 |
+
{"train_lr": 1.5253945306180702e-05, "train_min_lr": 1.5253945306180702e-05, "train_loss": 0.5747951210609504, "train_loss_scale": 29872.790792146243, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 222, "n_parameters": 1026306560}
|
224 |
+
{"train_lr": 1.491894151694671e-05, "train_min_lr": 1.491894151694671e-05, "train_loss": 0.5744104053450173, "train_loss_scale": 14596.217558113292, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 223, "n_parameters": 1026306560}
|
225 |
+
{"train_lr": 1.4587079456559633e-05, "train_min_lr": 1.4587079456559633e-05, "train_loss": 0.574147836186027, "train_loss_scale": 25969.989618596253, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 224, "n_parameters": 1026306560}
|
226 |
+
{"train_lr": 1.425840405383039e-05, "train_min_lr": 1.425840405383039e-05, "train_loss": 0.573792009700373, "train_loss_scale": 21717.7666440984, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 225, "n_parameters": 1026306560}
|
227 |
+
{"train_lr": 1.3932959806147094e-05, "train_min_lr": 1.3932959806147094e-05, "train_loss": 0.5738927129159203, "train_loss_scale": 16722.32904536222, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 226, "n_parameters": 1026306560}
|
228 |
+
{"train_lr": 1.3610790773451362e-05, "train_min_lr": 1.3610790773451362e-05, "train_loss": 0.5732769324536505, "train_loss_scale": 21675.24441435342, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 227, "n_parameters": 1026306560}
|
229 |
+
{"train_lr": 1.3291940572273121e-05, "train_min_lr": 1.3291940572273121e-05, "train_loss": 0.5732296630075986, "train_loss_scale": 25579.894380501017, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 228, "n_parameters": 1026306560}
|
230 |
+
{"train_lr": 1.2976452369825624e-05, "train_min_lr": 1.2976452369825624e-05, "train_loss": 0.5728258418877294, "train_loss_scale": 37512.00180546152, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 229, "n_parameters": 1026306560}
|
231 |
+
{"train_lr": 1.266436887816126e-05, "train_min_lr": 1.266436887816126e-05, "train_loss": 0.5724121920568556, "train_loss_scale": 30989.461521101333, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 230, "n_parameters": 1026306560}
|
232 |
+
{"train_lr": 1.2355732348388986e-05, "train_min_lr": 1.2355732348388986e-05, "train_loss": 0.5720689795714581, "train_loss_scale": 17907.405100428798, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 231, "n_parameters": 1026306560}
|
233 |
+
{"train_lr": 1.205058456495472e-05, "train_min_lr": 1.205058456495472e-05, "train_loss": 0.5716723906503887, "train_loss_scale": 28410.3958474385, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 232, "n_parameters": 1026306560}
|
234 |
+
{"train_lr": 1.1748966839983452e-05, "train_min_lr": 1.1748966839983452e-05, "train_loss": 0.5714342828623108, "train_loss_scale": 26666.984427894382, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 233, "n_parameters": 1026306560}
|
235 |
+
{"train_lr": 1.1450920007687048e-05, "train_min_lr": 1.1450920007687048e-05, "train_loss": 0.5712138763346203, "train_loss_scale": 29874.63958474385, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 234, "n_parameters": 1026306560}
|
236 |
+
{"train_lr": 1.11564844188356e-05, "train_min_lr": 1.11564844188356e-05, "train_loss": 0.5709678134997612, "train_loss_scale": 39811.89979688558, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 235, "n_parameters": 1026306560}
|
237 |
+
{"train_lr": 1.0865699935294677e-05, "train_min_lr": 1.0865699935294677e-05, "train_loss": 0.5704242208221378, "train_loss_scale": 32405.636650868877, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 236, "n_parameters": 1026306560}
|
238 |
+
{"train_lr": 1.0578605924628659e-05, "train_min_lr": 1.0578605924628659e-05, "train_loss": 0.5703247475055917, "train_loss_scale": 21599.443917851502, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 237, "n_parameters": 1026306560}
|
239 |
+
{"train_lr": 1.0295241254770847e-05, "train_min_lr": 1.0295241254770847e-05, "train_loss": 0.570022999859894, "train_loss_scale": 16239.794177386595, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 238, "n_parameters": 1026306560}
|
240 |
+
{"train_lr": 1.001564428876157e-05, "train_min_lr": 1.001564428876157e-05, "train_loss": 0.5698027759374258, "train_loss_scale": 21201.95350936583, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 239, "n_parameters": 1026306560}
|
241 |
+
{"train_lr": 9.73985287955431e-06, "train_min_lr": 9.73985287955431e-06, "train_loss": 0.5694658709608004, "train_loss_scale": 23890.09794628752, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 240, "n_parameters": 1026306560}
|
242 |
+
{"train_lr": 9.467904364891305e-06, "train_min_lr": 9.467904364891305e-06, "train_loss": 0.5691467544752907, "train_loss_scale": 24021.36222071767, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 241, "n_parameters": 1026306560}
|
243 |
+
{"train_lr": 9.19983556224812e-06, "train_min_lr": 9.19983556224812e-06, "train_loss": 0.5688995504035076, "train_loss_scale": 18181.026404874745, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 242, "n_parameters": 1026306560}
|
244 |
+
{"train_lr": 8.935682763849601e-06, "train_min_lr": 8.935682763849601e-06, "train_loss": 0.5685938833194382, "train_loss_scale": 16694.597156398104, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 243, "n_parameters": 1026306560}
|
245 |
+
{"train_lr": 8.675481731756103e-06, "train_min_lr": 8.675481731756103e-06, "train_loss": 0.5681953554688418, "train_loss_scale": 24250.612502821034, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 244, "n_parameters": 1026306560}
|
246 |
+
{"train_lr": 8.419267693022166e-06, "train_min_lr": 8.419267693022166e-06, "train_loss": 0.5680117240489618, "train_loss_scale": 17996.14714511397, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 245, "n_parameters": 1026306560}
|
247 |
+
{"train_lr": 8.167075334927451e-06, "train_min_lr": 8.167075334927451e-06, "train_loss": 0.567610759771074, "train_loss_scale": 18905.75310313699, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 246, "n_parameters": 1026306560}
|
248 |
+
{"train_lr": 7.918938800280095e-06, "train_min_lr": 7.918938800280095e-06, "train_loss": 0.5672521764676093, "train_loss_scale": 17173.434439178516, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 247, "n_parameters": 1026306560}
|
249 |
+
{"train_lr": 7.67489168279499e-06, "train_min_lr": 7.67489168279499e-06, "train_loss": 0.5669760563331159, "train_loss_scale": 15986.50959151433, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 248, "n_parameters": 1026306560}
|
250 |
+
{"train_lr": 7.4349670225451665e-06, "train_min_lr": 7.4349670225451665e-06, "train_loss": 0.5667427620598073, "train_loss_scale": 15437.418190024826, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 249, "n_parameters": 1026306560}
|
251 |
+
{"train_lr": 7.1991973014889945e-06, "train_min_lr": 7.1991973014889945e-06, "train_loss": 0.5662824573767519, "train_loss_scale": 18036.82058226134, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 250, "n_parameters": 1026306560}
|
252 |
+
{"train_lr": 6.967614439072678e-06, "train_min_lr": 6.967614439072678e-06, "train_loss": 0.5661672737282942, "train_loss_scale": 25250.809298126835, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 251, "n_parameters": 1026306560}
|
253 |
+
{"train_lr": 6.740249787908712e-06, "train_min_lr": 6.740249787908712e-06, "train_loss": 0.5658059971753018, "train_loss_scale": 20834.043782441888, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 252, "n_parameters": 1026306560}
|
254 |
+
{"train_lr": 6.517134129531299e-06, "train_min_lr": 6.517134129531299e-06, "train_loss": 0.5656754804682218, "train_loss_scale": 15512.29429022794, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 253, "n_parameters": 1026306560}
|
255 |
+
{"train_lr": 6.2982976702290594e-06, "train_min_lr": 6.2982976702290594e-06, "train_loss": 0.5653933908069841, "train_loss_scale": 10704.50914014895, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 254, "n_parameters": 1026306560}
|
256 |
+
{"train_lr": 6.083770036955526e-06, "train_min_lr": 6.083770036955526e-06, "train_loss": 0.5649916307785876, "train_loss_scale": 14334.613405551794, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 255, "n_parameters": 1026306560}
|
257 |
+
{"train_lr": 5.873580273318258e-06, "train_min_lr": 5.873580273318258e-06, "train_loss": 0.5645596448010911, "train_loss_scale": 14304.108327691267, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 256, "n_parameters": 1026306560}
|
258 |
+
{"train_lr": 5.667756835646572e-06, "train_min_lr": 5.667756835646572e-06, "train_loss": 0.5644391143265326, "train_loss_scale": 19834.77138343489, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 257, "n_parameters": 1026306560}
|
259 |
+
{"train_lr": 5.466327589139217e-06, "train_min_lr": 5.466327589139217e-06, "train_loss": 0.564250359585779, "train_loss_scale": 20442.09975174904, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 258, "n_parameters": 1026306560}
|
260 |
+
{"train_lr": 5.26931980409181e-06, "train_min_lr": 5.26931980409181e-06, "train_loss": 0.5639288046270529, "train_loss_scale": 15508.596705032724, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 259, "n_parameters": 1026306560}
|
261 |
+
{"train_lr": 5.076760152204845e-06, "train_min_lr": 5.076760152204845e-06, "train_loss": 0.5637327257331096, "train_loss_scale": 9902.13315278718, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 260, "n_parameters": 1026306560}
|
262 |
+
{"train_lr": 4.888674702972726e-06, "train_min_lr": 4.888674702972726e-06, "train_loss": 0.5634710810859117, "train_loss_scale": 7975.691266079892, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 261, "n_parameters": 1026306560}
|
263 |
+
{"train_lr": 4.705088920154528e-06, "train_min_lr": 4.705088920154528e-06, "train_loss": 0.5632122733216809, "train_loss_scale": 15645.407357255699, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 262, "n_parameters": 1026306560}
|
264 |
+
{"train_lr": 4.526027658326509e-06, "train_min_lr": 4.526027658326509e-06, "train_loss": 0.5627649262357487, "train_loss_scale": 17099.482735274203, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 263, "n_parameters": 1026306560}
|
265 |
+
{"train_lr": 4.351515159517205e-06, "train_min_lr": 4.351515159517205e-06, "train_loss": 0.5624451438918007, "train_loss_scale": 11978.3272399007, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 264, "n_parameters": 1026306560}
|
266 |
+
{"train_lr": 4.181575049925392e-06, "train_min_lr": 4.181575049925392e-06, "train_loss": 0.5623621311491888, "train_loss_scale": 10983.676822387722, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 265, "n_parameters": 1026306560}
|
267 |
+
{"train_lr": 4.016230336721614e-06, "train_min_lr": 4.016230336721614e-06, "train_loss": 0.5622637822052926, "train_loss_scale": 13956.535319341006, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 266, "n_parameters": 1026306560}
|
268 |
+
{"train_lr": 3.855503404933287e-06, "train_min_lr": 3.855503404933287e-06, "train_loss": 0.5617262900364948, "train_loss_scale": 16921.99864590386, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 267, "n_parameters": 1026306560}
|
269 |
+
{"train_lr": 3.6994160144140995e-06, "train_min_lr": 3.6994160144140995e-06, "train_loss": 0.5615570596603185, "train_loss_scale": 16087.268788083955, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 268, "n_parameters": 1026306560}
|
270 |
+
{"train_lr": 3.5479892968982262e-06, "train_min_lr": 3.5479892968982262e-06, "train_loss": 0.5613775516205887, "train_loss_scale": 9394.177386594449, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 269, "n_parameters": 1026306560}
|
271 |
+
{"train_lr": 3.401243753139167e-06, "train_min_lr": 3.401243753139167e-06, "train_loss": 0.5610800900558967, "train_loss_scale": 9169.086887835703, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 270, "n_parameters": 1026306560}
|
272 |
+
{"train_lr": 3.259199250134581e-06, "train_min_lr": 3.259199250134581e-06, "train_loss": 0.5607208020046027, "train_loss_scale": 11569.281877679981, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 271, "n_parameters": 1026306560}
|
273 |
+
{"train_lr": 3.121875018436357e-06, "train_min_lr": 3.121875018436357e-06, "train_loss": 0.5606387177811395, "train_loss_scale": 11787.9016023471, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 272, "n_parameters": 1026306560}
|
274 |
+
{"train_lr": 2.9892896495472302e-06, "train_min_lr": 2.9892896495472302e-06, "train_loss": 0.56060092982104, "train_loss_scale": 10077.768449559919, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 273, "n_parameters": 1026306560}
|
275 |
+
{"train_lr": 2.8614610934038605e-06, "train_min_lr": 2.8614610934038605e-06, "train_loss": 0.5601000929576256, "train_loss_scale": 13889.978785827127, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 274, "n_parameters": 1026306560}
|
276 |
+
{"train_lr": 2.7384066559464543e-06, "train_min_lr": 2.7384066559464543e-06, "train_loss": 0.5601313669584259, "train_loss_scale": 13848.380952380952, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 275, "n_parameters": 1026306560}
|
277 |
+
{"train_lr": 2.6201429967760815e-06, "train_min_lr": 2.6201429967760815e-06, "train_loss": 0.5598813738548656, "train_loss_scale": 10268.194087113518, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 276, "n_parameters": 1026306560}
|
278 |
+
{"train_lr": 2.5066861268990564e-06, "train_min_lr": 2.5066861268990564e-06, "train_loss": 0.5595660847549722, "train_loss_scale": 12758.517716091175, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 277, "n_parameters": 1026306560}
|
279 |
+
{"train_lr": 2.3980514065594357e-06, "train_min_lr": 2.3980514065594357e-06, "train_loss": 0.5594957613974096, "train_loss_scale": 8187.37801850598, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 278, "n_parameters": 1026306560}
|
280 |
+
{"train_lr": 2.2942535431593764e-06, "train_min_lr": 2.2942535431593764e-06, "train_loss": 0.5592960389917395, "train_loss_scale": 8685.627623561273, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 279, "n_parameters": 1026306560}
|
281 |
+
{"train_lr": 2.195306589268004e-06, "train_min_lr": 2.195306589268004e-06, "train_loss": 0.5591739279923603, "train_loss_scale": 7899.8907695779735, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 280, "n_parameters": 1026306560}
|
282 |
+
{"train_lr": 2.101223940719e-06, "train_min_lr": 2.101223940719e-06, "train_loss": 0.559056919112657, "train_loss_scale": 7708.078537576168, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 281, "n_parameters": 1026306560}
|
283 |
+
{"train_lr": 2.0120183347969303e-06, "train_min_lr": 2.0120183347969303e-06, "train_loss": 0.5588443198682048, "train_loss_scale": 6726.369668246445, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 282, "n_parameters": 1026306560}
|
284 |
+
{"train_lr": 1.927701848512913e-06, "train_min_lr": 1.927701848512913e-06, "train_loss": 0.5585892904466605, "train_loss_scale": 12067.993680884676, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 283, "n_parameters": 1026306560}
|
285 |
+
{"train_lr": 1.8482858969694659e-06, "train_min_lr": 1.8482858969694659e-06, "train_loss": 0.5586503334377063, "train_loss_scale": 9377.076055066576, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 284, "n_parameters": 1026306560}
|
286 |
+
{"train_lr": 1.7737812318151748e-06, "train_min_lr": 1.7737812318151748e-06, "train_loss": 0.5584570141809737, "train_loss_scale": 8991.602798465357, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 285, "n_parameters": 1026306560}
|
287 |
+
{"train_lr": 1.7041979397890732e-06, "train_min_lr": 1.7041979397890732e-06, "train_loss": 0.5581718851145693, "train_loss_scale": 8376.879259760777, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 286, "n_parameters": 1026306560}
|
288 |
+
{"train_lr": 1.6395454413550448e-06, "train_min_lr": 1.6395454413550448e-06, "train_loss": 0.5582158091477839, "train_loss_scale": 8624.617467840217, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 287, "n_parameters": 1026306560}
|
289 |
+
{"train_lr": 1.5798324894264484e-06, "train_min_lr": 1.5798324894264484e-06, "train_loss": 0.5579445596996218, "train_loss_scale": 5418.811103588355, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 288, "n_parameters": 1026306560}
|
290 |
+
{"train_lr": 1.525067168181119e-06, "train_min_lr": 1.525067168181119e-06, "train_loss": 0.5579883330111726, "train_loss_scale": 6509.598736176935, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 289, "n_parameters": 1026306560}
|
291 |
+
{"train_lr": 1.4752568919669013e-06, "train_min_lr": 1.4752568919669013e-06, "train_loss": 0.5577794809459369, "train_loss_scale": 7952.581358609795, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 290, "n_parameters": 1026306560}
|
292 |
+
{"train_lr": 1.430408404297843e-06, "train_min_lr": 1.430408404297843e-06, "train_loss": 0.5576302259403786, "train_loss_scale": 5883.320243737306, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 291, "n_parameters": 1026306560}
|
293 |
+
{"train_lr": 1.3905277769412832e-06, "train_min_lr": 1.3905277769412832e-06, "train_loss": 0.5575857502686818, "train_loss_scale": 7113.22951929587, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 292, "n_parameters": 1026306560}
|
294 |
+
{"train_lr": 1.3556204090957543e-06, "train_min_lr": 1.3556204090957543e-06, "train_loss": 0.5575416416789024, "train_loss_scale": 7043.437598736177, "train_weight_decay": 0.0500000000000075, "train_grad_norm": Infinity, "epoch": 293, "n_parameters": 1026306560}
|
295 |
+
{"train_lr": 1.3256910266600916e-06, "train_min_lr": 1.3256910266600916e-06, "train_loss": 0.557391341997189, "train_loss_scale": 10483.578424734822, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 294, "n_parameters": 1026306560}
|
296 |
+
{"train_lr": 1.3007436815935715e-06, "train_min_lr": 1.3007436815935715e-06, "train_loss": 0.5574958193080369, "train_loss_scale": 10215.041299932294, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 295, "n_parameters": 1026306560}
|
297 |
+
{"train_lr": 1.280781751367382e-06, "train_min_lr": 1.280781751367382e-06, "train_loss": 0.5573452462992269, "train_loss_scale": 6330.728052358384, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 296, "n_parameters": 1026306560}
|
298 |
+
{"train_lr": 1.2658079385073404e-06, "train_min_lr": 1.2658079385073404e-06, "train_loss": 0.5571730503464789, "train_loss_scale": 7984.935229067931, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 297, "n_parameters": 1026306560}
|
299 |
+
{"train_lr": 1.2558242702279972e-06, "train_min_lr": 1.2558242702279972e-06, "train_loss": 0.5570566176433268, "train_loss_scale": 7074.4048747461065, "train_weight_decay": 0.0500000000000075, "train_grad_norm": NaN, "epoch": 298, "n_parameters": 1026306560}
|