DrQY
/

Video Classification
DrQY commited on
Commit
ccfa4e3
·
verified ·
1 Parent(s): bfb1d57

Upload 3 files

Browse files
pretrain_videomae_base_patch16_224_tiktokactions/._vit_b_tta_pretrain.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbe3b7108ce9849294b18c8fcd4c2d8fe24054b79ef7f66ddea2aa059c266339
3
+ size 4096
pretrain_videomae_base_patch16_224_tiktokactions/log.txt ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 1.87506351841187e-05, "train_min_lr": 1.87506351841187e-05, "train_loss": 1.0054343460567836, "train_loss_scale": 65536.0, "train_weight_decay": 0.04999999999999862, "train_grad_norm": 0.1640834872120299, "epoch": 0, "n_parameters": 94210944}
2
+ {"train_lr": 5.6251905552356104e-05, "train_min_lr": 5.6251905552356104e-05, "train_loss": 0.9441261822528022, "train_loss_scale": 107910.32633716994, "train_weight_decay": 0.04999999999999862, "train_grad_norm": 0.18111113502758133, "epoch": 1, "n_parameters": 94210944}
3
+ {"train_lr": 9.37531759205935e-05, "train_min_lr": 9.37531759205935e-05, "train_loss": 0.8968875731670057, "train_loss_scale": 169408.5633039946, "train_weight_decay": 0.04999999999999862, "train_grad_norm": 0.24030154741690138, "epoch": 2, "n_parameters": 94210944}
4
+ {"train_lr": 0.0001312544462888309, "train_min_lr": 0.0001312544462888309, "train_loss": 0.8531214475086919, "train_loss_scale": 262144.0, "train_weight_decay": 0.04999999999999862, "train_grad_norm": 0.21543787992335528, "epoch": 3, "n_parameters": 94210944}
5
+ {"train_lr": 0.0001687557166570683, "train_min_lr": 0.0001687557166570683, "train_loss": 0.8242569995303773, "train_loss_scale": 508136.94786729856, "train_weight_decay": 0.04999999999999862, "train_grad_norm": 0.18898231721783942, "epoch": 4, "n_parameters": 94210944}
6
+ {"train_lr": 0.00011249689609246262, "train_min_lr": 0.00011249689609246262, "train_loss": 0.8220351848221524, "train_loss_scale": 542868.8613995485, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.19735312983561853, "epoch": 4, "n_parameters": 94210944}
7
+ {"train_lr": 0.00013749746043928754, "train_min_lr": 0.00013749746043928754, "train_loss": 0.7993325535451855, "train_loss_scale": 1209057.8347629798, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.18881684084576356, "epoch": 5, "n_parameters": 94210944}
8
+ {"train_lr": 0.00016249802478611255, "train_min_lr": 0.00016249802478611255, "train_loss": 0.7811216617007288, "train_loss_scale": 2621676.6988713318, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.17514816534371194, "epoch": 6, "n_parameters": 94210944}
9
+ {"train_lr": 0.00018749858913293753, "train_min_lr": 0.00018749858913293753, "train_loss": 0.7675926281992792, "train_loss_scale": 5650475.456433409, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.1575174555115183, "epoch": 7, "n_parameters": 94210944}
10
+ {"train_lr": 0.00021249915347976254, "train_min_lr": 0.00021249915347976254, "train_loss": 0.7563288628483196, "train_loss_scale": 4534203.5792325055, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 8, "n_parameters": 94210944}
11
+ {"train_lr": 0.0002374997178265875, "train_min_lr": 0.0002374997178265875, "train_loss": 0.7468992818256264, "train_loss_scale": 2741919.7255079006, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 9, "n_parameters": 94210944}
12
+ {"train_lr": 0.00026250028217341255, "train_min_lr": 0.00026250028217341255, "train_loss": 0.7386982778730564, "train_loss_scale": 2502380.4677200904, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.12220186182459644, "epoch": 10, "n_parameters": 94210944}
13
+ {"train_lr": 0.0002875008465202375, "train_min_lr": 0.0002875008465202375, "train_loss": 0.7320380008146672, "train_loss_scale": 2177629.616252822, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 11, "n_parameters": 94210944}
14
+ {"train_lr": 0.0003125014108670625, "train_min_lr": 0.0003125014108670625, "train_loss": 0.726330610413449, "train_loss_scale": 952002.860496614, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 12, "n_parameters": 94210944}
15
+ {"train_lr": 0.0003375019752138873, "train_min_lr": 0.0003375019752138873, "train_loss": 0.7213638244411208, "train_loss_scale": 956736.8379232505, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.10361295879459004, "epoch": 13, "n_parameters": 94210944}
16
+ {"train_lr": 0.0003625025395607124, "train_min_lr": 0.0003625025395607124, "train_loss": 0.7173034797807311, "train_loss_scale": 2055019.6009029346, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.09815341335790691, "epoch": 14, "n_parameters": 94210944}
17
+ {"train_lr": 0.00038750310390753756, "train_min_lr": 0.00038750310390753756, "train_loss": 0.7136197358168006, "train_loss_scale": 4196197.590970655, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 15, "n_parameters": 94210944}
18
+ {"train_lr": 0.00041250366825436254, "train_min_lr": 0.00041250366825436254, "train_loss": 0.7104204449441041, "train_loss_scale": 2911396.11738149, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 16, "n_parameters": 94210944}
19
+ {"train_lr": 0.00043750423260118735, "train_min_lr": 0.00043750423260118735, "train_loss": 0.7075132350822065, "train_loss_scale": 2261894.4144469528, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 17, "n_parameters": 94210944}
20
+ {"train_lr": 0.0004625047969480124, "train_min_lr": 0.0004625047969480124, "train_loss": 0.7050307441645767, "train_loss_scale": 2451253.5115124155, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 18, "n_parameters": 94210944}
21
+ {"train_lr": 0.00048750536129483715, "train_min_lr": 0.00048750536129483715, "train_loss": 0.7029381069386248, "train_loss_scale": 2134077.0239277654, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 19, "n_parameters": 94210944}
22
+ {"train_lr": 0.000499987443230665, "train_min_lr": 0.000499987443230665, "train_loss": 0.7005200942368593, "train_loss_scale": 2860269.161173815, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 20, "n_parameters": 94210944}
23
+ {"train_lr": 0.0004999120731762533, "train_min_lr": 0.0004999120731762533, "train_loss": 0.6980456664317348, "train_loss_scale": 1635115.803160271, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 21, "n_parameters": 94210944}
24
+ {"train_lr": 0.0004997613390076187, "train_min_lr": 0.0004997613390076187, "train_loss": 0.6958574426766174, "train_loss_scale": 1469426.593227991, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 22, "n_parameters": 94210944}
25
+ {"train_lr": 0.0004995352866398469, "train_min_lr": 0.0004995352866398469, "train_loss": 0.6938709376157003, "train_loss_scale": 1296163.0194130926, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.07303675664063232, "epoch": 23, "n_parameters": 94210944}
26
+ {"train_lr": 0.0004992339849306809, "train_min_lr": 0.0004992339849306809, "train_loss": 0.6921234347245763, "train_loss_scale": 2238224.52731377, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 24, "n_parameters": 94210944}
27
+ {"train_lr": 0.0004988575256595376, "train_min_lr": 0.0004988575256595376, "train_loss": 0.6905317712146326, "train_loss_scale": 2106619.954853273, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 25, "n_parameters": 94210944}
28
+ {"train_lr": 0.0004984060234995617, "train_min_lr": 0.0004984060234995617, "train_loss": 0.6892625475811097, "train_loss_scale": 2260947.6189616253, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 26, "n_parameters": 94210944}
29
+ {"train_lr": 0.0004978796159826815, "train_min_lr": 0.0004978796159826815, "train_loss": 0.6877718834824541, "train_loss_scale": 498724.52189616254, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 27, "n_parameters": 94210944}
30
+ {"train_lr": 0.0004972784634577262, "train_min_lr": 0.0004972784634577262, "train_loss": 0.6868802516643404, "train_loss_scale": 387831.1006772009, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 28, "n_parameters": 94210944}
31
+ {"train_lr": 0.0004966027490415802, "train_min_lr": 0.0004966027490415802, "train_loss": 0.6857877336055646, "train_loss_scale": 415524.86862302484, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.07040702748043004, "epoch": 29, "n_parameters": 94210944}
32
+ {"train_lr": 0.0004958526785634007, "train_min_lr": 0.0004958526785634007, "train_loss": 0.6849020221297951, "train_loss_scale": 881939.9945823927, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.06991383571421589, "epoch": 30, "n_parameters": 94210944}
33
+ {"train_lr": 0.0004950284805019194, "train_min_lr": 0.0004950284805019194, "train_loss": 0.6838629563458886, "train_loss_scale": 1865660.5038374718, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.07022929277531718, "epoch": 31, "n_parameters": 94210944}
34
+ {"train_lr": 0.0004941304059158519, "train_min_lr": 0.0004941304059158519, "train_loss": 0.6829429843826703, "train_loss_scale": 1164558.4469525958, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 32, "n_parameters": 94210944}
35
+ {"train_lr": 0.0004931587283674154, "train_min_lr": 0.0004931587283674154, "train_loss": 0.6822576716643155, "train_loss_scale": 1740210.1020316028, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 33, "n_parameters": 94210944}
36
+ {"train_lr": 0.0004921137438390036, "train_min_lr": 0.0004921137438390036, "train_loss": 0.6815312039199586, "train_loss_scale": 1493096.4803611739, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.06991237070470309, "epoch": 34, "n_parameters": 94210944}
37
+ {"train_lr": 0.0004909957706430305, "train_min_lr": 0.0004909957706430305, "train_loss": 0.6809918168200596, "train_loss_scale": 2482971.1602708804, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 35, "n_parameters": 94210944}
38
+ {"train_lr": 0.0004898051493249584, "train_min_lr": 0.0004898051493249584, "train_loss": 0.6802892858166727, "train_loss_scale": 1211424.823476298, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.07023872563586009, "epoch": 36, "n_parameters": 94210944}
39
+ {"train_lr": 0.0004885422425595745, "train_min_lr": 0.0004885422425595745, "train_loss": 0.6795596554673552, "train_loss_scale": 2626410.6762979683, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.07003567918765087, "epoch": 37, "n_parameters": 94210944}
40
+ {"train_lr": 0.0004872074350405136, "train_min_lr": 0.0004872074350405136, "train_loss": 0.6790702461792707, "train_loss_scale": 2251953.061851016, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 38, "n_parameters": 94210944}
41
+ {"train_lr": 0.00048580113336307894, "train_min_lr": 0.00048580113336307894, "train_loss": 0.6785469003273994, "train_loss_scale": 1332614.645598194, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 39, "n_parameters": 94210944}
42
+ {"train_lr": 0.000484323765900384, "train_min_lr": 0.000484323765900384, "train_loss": 0.677992282511822, "train_loss_scale": 1399363.7273137697, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.07039308818750942, "epoch": 40, "n_parameters": 94210944}
43
+ {"train_lr": 0.0004827757826728664, "train_min_lr": 0.0004827757826728664, "train_loss": 0.6775955428556448, "train_loss_scale": 3002288.483972912, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.0702370450186541, "epoch": 41, "n_parameters": 94210944}
44
+ {"train_lr": 0.0004811576552112163, "train_min_lr": 0.0004811576552112163, "train_loss": 0.6770043324435269, "train_loss_scale": 1341135.8049661398, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 42, "n_parameters": 94210944}
45
+ {"train_lr": 0.00047946987641273237, "train_min_lr": 0.00047946987641273237, "train_loss": 0.6765808133454678, "train_loss_scale": 1979275.9620767494, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.07052050681267581, "epoch": 43, "n_parameters": 94210944}
46
+ {"train_lr": 0.0004777129603911854, "train_min_lr": 0.0004777129603911854, "train_loss": 0.6761909951104805, "train_loss_scale": 2188991.1620767494, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 44, "n_parameters": 94210944}
47
+ {"train_lr": 0.00047588744232021754, "train_min_lr": 0.00047588744232021754, "train_loss": 0.675662665543384, "train_loss_scale": 1150356.5146726863, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 45, "n_parameters": 94210944}
48
+ {"train_lr": 0.000473993878270316, "train_min_lr": 0.000473993878270316, "train_loss": 0.675249227800703, "train_loss_scale": 2430424.0108352145, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.0709891432177267, "epoch": 46, "n_parameters": 94210944}
49
+ {"train_lr": 0.00047203284503943247, "train_min_lr": 0.00047203284503943247, "train_loss": 0.6750185969350838, "train_loss_scale": 2340478.4397291197, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 47, "n_parameters": 94210944}
50
+ {"train_lr": 0.00047000493997728887, "train_min_lr": 0.00047000493997728887, "train_loss": 0.6746340712965476, "train_loss_scale": 1196749.4934537245, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.0719981349718759, "epoch": 48, "n_parameters": 94210944}
51
+ {"train_lr": 0.00046791078080341106, "train_min_lr": 0.00046791078080341106, "train_loss": 0.6740617157340588, "train_loss_scale": 2597060.016252822, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.07182871455297245, "epoch": 49, "n_parameters": 94210944}
52
+ {"train_lr": 0.00046575100541896826, "train_min_lr": 0.00046575100541896826, "train_loss": 0.6739249454966786, "train_loss_scale": 1369066.2717832958, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 50, "n_parameters": 94210944}
53
+ {"train_lr": 0.0004635262717124667, "train_min_lr": 0.0004635262717124667, "train_loss": 0.6734419125769262, "train_loss_scale": 1957026.2681715575, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.07207640105728103, "epoch": 51, "n_parameters": 94210944}
54
+ {"train_lr": 0.00046123725735934453, "train_min_lr": 0.00046123725735934453, "train_loss": 0.6732031490880802, "train_loss_scale": 1457118.251918736, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 52, "n_parameters": 94210944}
55
+ {"train_lr": 0.00045888465961554703, "train_min_lr": 0.00045888465961554703, "train_loss": 0.6728668518650344, "train_loss_scale": 1488835.9006772009, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 53, "n_parameters": 94210944}
56
+ {"train_lr": 0.00045646919510513613, "train_min_lr": 0.00045646919510513613, "train_loss": 0.672500072116911, "train_loss_scale": 1496410.2645598194, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.0731345654559862, "epoch": 54, "n_parameters": 94210944}
57
+ {"train_lr": 0.0004539915996020052, "train_min_lr": 0.0004539915996020052, "train_loss": 0.6723582117639599, "train_loss_scale": 580385.6325056433, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 55, "n_parameters": 94210944}
58
+ {"train_lr": 0.00045145262780574, "train_min_lr": 0.00045145262780574, "train_loss": 0.6718072278168616, "train_loss_scale": 1246456.2564334085, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.07266009126371509, "epoch": 56, "n_parameters": 94210944}
59
+ {"train_lr": 0.00044885305311174876, "train_min_lr": 0.00044885305311174876, "train_loss": 0.6717216641551754, "train_loss_scale": 1570260.31241535, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 57, "n_parameters": 94210944}
60
+ {"train_lr": 0.00044619366737566493, "train_min_lr": 0.00044619366737566493, "train_loss": 0.6713205029981938, "train_loss_scale": 1259237.9954853272, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 58, "n_parameters": 94210944}
61
+ {"train_lr": 0.0004434752806721448, "train_min_lr": 0.0004434752806721448, "train_loss": 0.6710454012786562, "train_loss_scale": 714357.1936794582, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.07360753190806703, "epoch": 59, "n_parameters": 94210944}
62
+ {"train_lr": 0.0004406987210481143, "train_min_lr": 0.0004406987210481143, "train_loss": 0.6705654282534903, "train_loss_scale": 1530494.9020316028, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.07376440629262149, "epoch": 60, "n_parameters": 94210944}
63
+ {"train_lr": 0.0004378648342705306, "train_min_lr": 0.0004378648342705306, "train_loss": 0.6705032219433354, "train_loss_scale": 1482208.3322799096, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 61, "n_parameters": 94210944}
64
+ {"train_lr": 0.0004349744835687574, "train_min_lr": 0.0004349744835687574, "train_loss": 0.6701008018770551, "train_loss_scale": 1765300.1823927765, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.0753246323754233, "epoch": 62, "n_parameters": 94210944}
65
+ {"train_lr": 0.0004320285493716205, "train_min_lr": 0.0004320285493716205, "train_loss": 0.6700715924459709, "train_loss_scale": 1150356.5146726863, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 63, "n_parameters": 94210944}
66
+ {"train_lr": 0.0004290279290392112, "train_min_lr": 0.0004290279290392112, "train_loss": 0.669737740436203, "train_loss_scale": 587486.5986455982, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 64, "n_parameters": 94210944}
67
+ {"train_lr": 0.00042597353658955345, "train_min_lr": 0.00042597353658955345, "train_loss": 0.6695661478366862, "train_loss_scale": 1160297.867268623, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.07529102459247171, "epoch": 65, "n_parameters": 94210944}
68
+ {"train_lr": 0.00042286630242017456, "train_min_lr": 0.00042286630242017456, "train_loss": 0.6691276194212399, "train_loss_scale": 1823054.7069977426, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 66, "n_parameters": 94210944}
69
+ {"train_lr": 0.00041970717302470314, "train_min_lr": 0.00041970717302470314, "train_loss": 0.6689469663607078, "train_loss_scale": 1424453.8076749435, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.07574352361682306, "epoch": 67, "n_parameters": 94210944}
70
+ {"train_lr": 0.00041649711070455495, "train_min_lr": 0.00041649711070455495, "train_loss": 0.6686761446213076, "train_loss_scale": 2208873.867268623, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 68, "n_parameters": 94210944}
71
+ {"train_lr": 0.00041323709327581166, "train_min_lr": 0.00041323709327581166, "train_loss": 0.6684300723685368, "train_loss_scale": 2410541.305643341, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 69, "n_parameters": 94210944}
72
+ {"train_lr": 0.0004099281137713611, "train_min_lr": 0.0004099281137713611, "train_loss": 0.6681701692683164, "train_loss_scale": 1191542.1182844243, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 70, "n_parameters": 94210944}
73
+ {"train_lr": 0.00040657118013841546, "train_min_lr": 0.00040657118013841546, "train_loss": 0.6678166073177254, "train_loss_scale": 2108986.9435665915, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 71, "n_parameters": 94210944}
74
+ {"train_lr": 0.00040316731493147745, "train_min_lr": 0.00040316731493147745, "train_loss": 0.6678724005212903, "train_loss_scale": 2140704.5923250564, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 72, "n_parameters": 94210944}
75
+ {"train_lr": 0.0003997175550008656, "train_min_lr": 0.0003997175550008656, "train_loss": 0.6675369731325059, "train_loss_scale": 1679141.7932279909, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 73, "n_parameters": 94210944}
76
+ {"train_lr": 0.00039622295117687224, "train_min_lr": 0.00039622295117687224, "train_loss": 0.6672442266284477, "train_loss_scale": 1214738.6076749435, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 74, "n_parameters": 94210944}
77
+ {"train_lr": 0.0003926845679496748, "train_min_lr": 0.0003926845679496748, "train_loss": 0.6670274592751035, "train_loss_scale": 1503984.6284424379, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.07766003276875272, "epoch": 75, "n_parameters": 94210944}
78
+ {"train_lr": 0.0003891034831450808, "train_min_lr": 0.0003891034831450808, "train_loss": 0.6668068701035551, "train_loss_scale": 2117034.7051918735, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 76, "n_parameters": 94210944}
79
+ {"train_lr": 0.0003854807875962133, "train_min_lr": 0.0003854807875962133, "train_loss": 0.6664828105045495, "train_loss_scale": 1620440.4731376974, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 77, "n_parameters": 94210944}
80
+ {"train_lr": 0.0003818175848112301, "train_min_lr": 0.0003818175848112301, "train_loss": 0.666077302379479, "train_loss_scale": 1105857.1268623024, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 78, "n_parameters": 94210944}
81
+ {"train_lr": 0.00037811499063718625, "train_min_lr": 0.00037811499063718625, "train_loss": 0.6659460055000626, "train_loss_scale": 1096862.569751693, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 79, "n_parameters": 94210944}
82
+ {"train_lr": 0.0003743741329201343, "train_min_lr": 0.0003743741329201343, "train_loss": 0.6656784487329003, "train_loss_scale": 1214738.6076749435, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 80, "n_parameters": 94210944}
83
+ {"train_lr": 0.00037059615116157376, "train_min_lr": 0.00037059615116157376, "train_loss": 0.6653749857409006, "train_loss_scale": 1359598.3169300226, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 81, "n_parameters": 94210944}
84
+ {"train_lr": 0.0003667821961713463, "train_min_lr": 0.0003667821961713463, "train_loss": 0.6651768554212277, "train_loss_scale": 1359124.9191873588, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 82, "n_parameters": 94210944}
85
+ {"train_lr": 0.0003629334297170878, "train_min_lr": 0.0003629334297170878, "train_loss": 0.664970609828258, "train_loss_scale": 1199116.4821670428, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 83, "n_parameters": 94210944}
86
+ {"train_lr": 0.00035905102417034567, "train_min_lr": 0.00035905102417034567, "train_loss": 0.6647563134877192, "train_loss_scale": 1200536.6753950338, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.08096167955118701, "epoch": 84, "n_parameters": 94210944}
87
+ {"train_lr": 0.00035513616214945845, "train_min_lr": 0.00035513616214945845, "train_loss": 0.6645053177130141, "train_loss_scale": 1054020.074040632, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 85, "n_parameters": 94210944}
88
+ {"train_lr": 0.00035119003615932085, "train_min_lr": 0.00035119003615932085, "train_loss": 0.6644112974102556, "train_loss_scale": 685716.630248307, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 86, "n_parameters": 94210944}
89
+ {"train_lr": 0.00034721384822813755, "train_min_lr": 0.00034721384822813755, "train_loss": 0.6640619396356104, "train_loss_scale": 668911.0103837472, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.08226684153113624, "epoch": 87, "n_parameters": 94210944}
90
+ {"train_lr": 0.00034320880954127176, "train_min_lr": 0.00034320880954127176, "train_loss": 0.6640450631859071, "train_loss_scale": 828446.0496613996, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 88, "n_parameters": 94210944}
91
+ {"train_lr": 0.000339176140072302, "train_min_lr": 0.000339176140072302, "train_loss": 0.6635885613362892, "train_loss_scale": 758146.4848758465, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 89, "n_parameters": 94210944}
92
+ {"train_lr": 0.0003351170682114157, "train_min_lr": 0.0003351170682114157, "train_loss": 0.6633453147103232, "train_loss_scale": 612339.9801354401, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.08375678057897978, "epoch": 90, "n_parameters": 94210944}
93
+ {"train_lr": 0.000331032830391224, "train_min_lr": 0.000331032830391224, "train_loss": 0.6630859727212323, "train_loss_scale": 1326460.4749435666, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.08358993218627256, "epoch": 91, "n_parameters": 94210944}
94
+ {"train_lr": 0.000326924670710129, "train_min_lr": 0.000326924670710129, "train_loss": 0.662794155595534, "train_loss_scale": 2342372.030699774, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 92, "n_parameters": 94210944}
95
+ {"train_lr": 0.0003227938405533681, "train_min_lr": 0.0003227938405533681, "train_loss": 0.6626170952856945, "train_loss_scale": 848092.0559819413, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 93, "n_parameters": 94210944}
96
+ {"train_lr": 0.00031864159821182387, "train_min_lr": 0.00031864159821182387, "train_loss": 0.6624287039209704, "train_loss_scale": 927149.479006772, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.08544299067681314, "epoch": 94, "n_parameters": 94210944}
97
+ {"train_lr": 0.00031446920849873434, "train_min_lr": 0.00031446920849873434, "train_loss": 0.662063623137318, "train_loss_scale": 1178760.3792325056, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 95, "n_parameters": 94210944}
98
+ {"train_lr": 0.00031027794236442876, "train_min_lr": 0.00031027794236442876, "train_loss": 0.6619655768704468, "train_loss_scale": 1621387.268623025, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 96, "n_parameters": 94210944}
99
+ {"train_lr": 0.00030606907650917065, "train_min_lr": 0.00030606907650917065, "train_loss": 0.6616307690066893, "train_loss_scale": 1456644.8541760722, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.08681120853660876, "epoch": 97, "n_parameters": 94210944}
100
+ {"train_lr": 0.0003018438929942724, "train_min_lr": 0.0003018438929942724, "train_loss": 0.6613953153752042, "train_loss_scale": 2098098.7954853275, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 98, "n_parameters": 94210944}
101
+ {"train_lr": 0.00029760367885156406, "train_min_lr": 0.00029760367885156406, "train_loss": 0.6612337540524, "train_loss_scale": 756016.19503386, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 99, "n_parameters": 94210944}
102
+ {"train_lr": 0.00029334972569134793, "train_min_lr": 0.00029334972569134793, "train_loss": 0.6610450070948418, "train_loss_scale": 935907.3372460497, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.0883106715876281, "epoch": 100, "n_parameters": 94210944}
103
+ {"train_lr": 0.00028908332930896566, "train_min_lr": 0.00028908332930896566, "train_loss": 0.6607949713114691, "train_loss_scale": 1234147.9151241535, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 101, "n_parameters": 94210944}
104
+ {"train_lr": 0.0002848057892900854, "train_min_lr": 0.0002848057892900854, "train_loss": 0.660267421842025, "train_loss_scale": 1400310.522799097, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 102, "n_parameters": 94210944}
105
+ {"train_lr": 0.00028051840861483343, "train_min_lr": 0.00028051840861483343, "train_loss": 0.6601315538144542, "train_loss_scale": 1613339.5069977427, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 103, "n_parameters": 94210944}
106
+ {"train_lr": 0.00027622249326089856, "train_min_lr": 0.00027622249326089856, "train_loss": 0.6600186519745241, "train_loss_scale": 891644.6483069977, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 104, "n_parameters": 94210944}
107
+ {"train_lr": 0.00027191935180571094, "train_min_lr": 0.00027191935180571094, "train_loss": 0.6596403107316177, "train_loss_scale": 732109.6090293453, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.09104573908125305, "epoch": 105, "n_parameters": 94210944}
108
+ {"train_lr": 0.0002676102950278428, "train_min_lr": 0.0002676102950278428, "train_loss": 0.6592954242505285, "train_loss_scale": 1565999.732731377, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.09153085909531564, "epoch": 106, "n_parameters": 94210944}
109
+ {"train_lr": 0.0002632966355077328, "train_min_lr": 0.0002632966355077328, "train_loss": 0.6590559059892767, "train_loss_scale": 1093075.3878103837, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 107, "n_parameters": 94210944}
110
+ {"train_lr": 0.00025897968722785855, "train_min_lr": 0.00025897968722785855, "train_loss": 0.6588814638740053, "train_loss_scale": 738973.8762979684, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.09284255542281519, "epoch": 108, "n_parameters": 94210944}
111
+ {"train_lr": 0.0002546607651724837, "train_min_lr": 0.0002546607651724837, "train_loss": 0.6588158854445271, "train_loss_scale": 1316519.1223476299, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 109, "n_parameters": 94210944}
112
+ {"train_lr": 0.0002503411849271061, "train_min_lr": 0.0002503411849271061, "train_loss": 0.6584617016277787, "train_loss_scale": 1266812.359367946, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 110, "n_parameters": 94210944}
113
+ {"train_lr": 0.00024602226227771277, "train_min_lr": 0.00024602226227771277, "train_loss": 0.6581510819006988, "train_loss_scale": 1129053.6162528216, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 111, "n_parameters": 94210944}
114
+ {"train_lr": 0.00024170531280998068, "train_min_lr": 0.00024170531280998068, "train_loss": 0.6578550042033734, "train_loss_scale": 1074139.4781038375, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 112, "n_parameters": 94210944}
115
+ {"train_lr": 0.00023739165150853662, "train_min_lr": 0.00023739165150853662, "train_loss": 0.6577658076116786, "train_loss_scale": 1394629.7498871333, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.09488440139332689, "epoch": 113, "n_parameters": 94210944}
116
+ {"train_lr": 0.00023308259235640182, "train_min_lr": 0.00023308259235640182, "train_loss": 0.65730362453458, "train_loss_scale": 1421613.4212189617, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 114, "n_parameters": 94210944}
117
+ {"train_lr": 0.00022877944793473534, "train_min_lr": 0.00022877944793473534, "train_loss": 0.6570100645297133, "train_loss_scale": 897680.4695259593, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 115, "n_parameters": 94210944}
118
+ {"train_lr": 0.0002244835290230144, "train_min_lr": 0.0002244835290230144, "train_loss": 0.6568485731162282, "train_loss_scale": 96691.48893905192, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.0966377755320099, "epoch": 116, "n_parameters": 94210944}
119
+ {"train_lr": 0.00022019614419975427, "train_min_lr": 0.00022019614419975427, "train_loss": 0.6566159759868887, "train_loss_scale": 206105.5422121896, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.10106808693562351, "epoch": 117, "n_parameters": 94210944}
120
+ {"train_lr": 0.00021591859944390554, "train_min_lr": 0.00021591859944390554, "train_loss": 0.656352761749422, "train_loss_scale": 437656.2130925508, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.09837361176920799, "epoch": 118, "n_parameters": 94210944}
121
+ {"train_lr": 0.00021165219773703844, "train_min_lr": 0.00021165219773703844, "train_loss": 0.6560197811296239, "train_loss_scale": 643584.2311512416, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 119, "n_parameters": 94210944}
122
+ {"train_lr": 0.0002073982386664418, "train_min_lr": 0.0002073982386664418, "train_loss": 0.6556408809014421, "train_loss_scale": 182672.3539503386, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 120, "n_parameters": 94210944}
123
+ {"train_lr": 0.0002031580180292583, "train_min_lr": 0.0002031580180292583, "train_loss": 0.6553513088627271, "train_loss_scale": 120657.24966139955, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.09995475103973535, "epoch": 121, "n_parameters": 94210944}
124
+ {"train_lr": 0.00019893282743776915, "train_min_lr": 0.00019893282743776915, "train_loss": 0.6550403757201629, "train_loss_scale": 263268.3196388262, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.10155190166851198, "epoch": 122, "n_parameters": 94210944}
125
+ {"train_lr": 0.00019472395392596033, "train_min_lr": 0.00019472395392596033, "train_loss": 0.6546850215971605, "train_loss_scale": 562633.2171557562, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 123, "n_parameters": 94210944}
126
+ {"train_lr": 0.00019053267955747316, "train_min_lr": 0.00019053267955747316, "train_loss": 0.6545531411842352, "train_loss_scale": 606185.8094808126, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.10290193630395032, "epoch": 124, "n_parameters": 94210944}
127
+ {"train_lr": 0.0001863602810350849, "train_min_lr": 0.0001863602810350849, "train_loss": 0.6542960926284909, "train_loss_scale": 608079.4004514673, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 125, "n_parameters": 94210944}
128
+ {"train_lr": 0.00018220802931180271, "train_min_lr": 0.00018220802931180271, "train_loss": 0.653910167030906, "train_loss_scale": 1051653.0853273137, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.10473232801423385, "epoch": 126, "n_parameters": 94210944}
129
+ {"train_lr": 0.00017807718920372575, "train_min_lr": 0.00017807718920372575, "train_loss": 0.653808610528102, "train_loss_scale": 1209057.8347629798, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 127, "n_parameters": 94210944}
130
+ {"train_lr": 0.00017396901900476797, "train_min_lr": 0.00017396901900476797, "train_loss": 0.653360907901625, "train_loss_scale": 1018751.9422121897, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 128, "n_parameters": 94210944}
131
+ {"train_lr": 0.000169884770103367, "train_min_lr": 0.000169884770103367, "train_loss": 0.653065086615274, "train_loss_scale": 594587.564785553, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 129, "n_parameters": 94210944}
132
+ {"train_lr": 0.0001658256866013048, "train_min_lr": 0.0001658256866013048, "train_loss": 0.6529171408030034, "train_loss_scale": 776845.6957110609, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.10709614497746087, "epoch": 130, "n_parameters": 94210944}
133
+ {"train_lr": 0.00016179300493473722, "train_min_lr": 0.00016179300493473722, "train_loss": 0.6526565286564235, "train_loss_scale": 1399363.7273137697, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 131, "n_parameters": 94210944}
134
+ {"train_lr": 0.00015778795349756373, "train_min_lr": 0.00015778795349756373, "train_loss": 0.6522925484631186, "train_loss_scale": 1406464.6934537245, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.10930601817894198, "epoch": 132, "n_parameters": 94210944}
135
+ {"train_lr": 0.00015381175226725194, "train_min_lr": 0.00015381175226725194, "train_loss": 0.6519351052881364, "train_loss_scale": 1427294.1941309255, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 133, "n_parameters": 94210944}
136
+ {"train_lr": 0.0001498656124332142, "train_min_lr": 0.0001498656124332142, "train_loss": 0.651734107884139, "train_loss_scale": 1245509.4609480812, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 134, "n_parameters": 94210944}
137
+ {"train_lr": 0.0001459507360278706, "train_min_lr": 0.0001459507360278706, "train_loss": 0.6513890097022864, "train_loss_scale": 1239828.6880361175, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 135, "n_parameters": 94210944}
138
+ {"train_lr": 0.00014206831556049863, "train_min_lr": 0.00014206831556049863, "train_loss": 0.6510883330861548, "train_loss_scale": 1049049.3977426637, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 136, "n_parameters": 94210944}
139
+ {"train_lr": 0.00013821953365398317, "train_min_lr": 0.00013821953365398317, "train_loss": 0.6508564877489889, "train_loss_scale": 1736896.317832957, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.11260490652602628, "epoch": 137, "n_parameters": 94210944}
140
+ {"train_lr": 0.000134405562684576, "train_min_lr": 0.000134405562684576, "train_loss": 0.6504784367928117, "train_loss_scale": 1151776.707900677, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 138, "n_parameters": 94210944}
141
+ {"train_lr": 0.00013062756442478195, "train_min_lr": 0.00013062756442478195, "train_loss": 0.6501371015292123, "train_loss_scale": 2011467.0085778781, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 139, "n_parameters": 94210944}
142
+ {"train_lr": 0.0001268866896894689, "train_min_lr": 0.0001268866896894689, "train_loss": 0.6497572747195817, "train_loss_scale": 1185861.3453724605, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 140, "n_parameters": 94210944}
143
+ {"train_lr": 0.00012318407798531943, "train_min_lr": 0.00012318407798531943, "train_loss": 0.6494202515745271, "train_loss_scale": 496594.2320541761, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 141, "n_parameters": 94210944}
144
+ {"train_lr": 0.00011952085716372716, "train_min_lr": 0.00011952085716372716, "train_loss": 0.6494217206608361, "train_loss_scale": 387712.751241535, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.1172947087722492, "epoch": 142, "n_parameters": 94210944}
145
+ {"train_lr": 0.00011589814307724128, "train_min_lr": 0.00011589814307724128, "train_loss": 0.6488567601376408, "train_loss_scale": 826315.7598194131, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.1185701938131593, "epoch": 143, "n_parameters": 94210944}
146
+ {"train_lr": 0.00011231703923966577, "train_min_lr": 0.00011231703923966577, "train_loss": 0.6485799433667692, "train_loss_scale": 1386108.5905191873, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 144, "n_parameters": 94210944}
147
+ {"train_lr": 0.00010877863648992044, "train_min_lr": 0.00010877863648992044, "train_loss": 0.6482458996348941, "train_loss_scale": 1518659.9584650113, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.11991290848357414, "epoch": 145, "n_parameters": 94210944}
148
+ {"train_lr": 0.0001052840126597605, "train_min_lr": 0.0001052840126597605, "train_loss": 0.6480167992833505, "train_loss_scale": 1240775.4835214447, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 146, "n_parameters": 94210944}
149
+ {"train_lr": 0.00010183423224545773, "train_min_lr": 0.00010183423224545773, "train_loss": 0.647656453328531, "train_loss_scale": 1115325.0817155757, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 147, "n_parameters": 94210944}
150
+ {"train_lr": 9.843034608354522e-05, "train_min_lr": 9.843034608354522e-05, "train_loss": 0.6472746039957279, "train_loss_scale": 1160771.2650112866, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 148, "n_parameters": 94210944}
151
+ {"train_lr": 9.507339103072148e-05, "train_min_lr": 9.507339103072148e-05, "train_loss": 0.6470858913747506, "train_loss_scale": 1211898.2212189615, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 149, "n_parameters": 94210944}
152
+ {"train_lr": 9.17643896480159e-05, "train_min_lr": 9.17643896480159e-05, "train_loss": 0.6467146196900856, "train_loss_scale": 1969808.0072234762, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.12356616759475143, "epoch": 150, "n_parameters": 94210944}
153
+ {"train_lr": 8.850434988930415e-05, "train_min_lr": 8.850434988930415e-05, "train_loss": 0.6461674254042838, "train_loss_scale": 1578781.4717832957, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 151, "n_parameters": 94210944}
154
+ {"train_lr": 8.529426479427781e-05, "train_min_lr": 8.529426479427781e-05, "train_loss": 0.6462225925471659, "train_loss_scale": 990584.7765237021, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 152, "n_parameters": 94210944}
155
+ {"train_lr": 8.213511218595337e-05, "train_min_lr": 8.213511218595337e-05, "train_loss": 0.6457221887181629, "train_loss_scale": 793177.9178329571, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.12733038198665356, "epoch": 153, "n_parameters": 94210944}
156
+ {"train_lr": 7.902785437281893e-05, "train_min_lr": 7.902785437281893e-05, "train_loss": 0.6456104904379586, "train_loss_scale": 1174973.1972911963, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 154, "n_parameters": 94210944}
157
+ {"train_lr": 7.597343785570493e-05, "train_min_lr": 7.597343785570493e-05, "train_loss": 0.6451934839882646, "train_loss_scale": 1663519.6677200904, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.12811546200890572, "epoch": 155, "n_parameters": 94210944}
158
+ {"train_lr": 7.297279303947109e-05, "train_min_lr": 7.297279303947109e-05, "train_loss": 0.6446921279077874, "train_loss_scale": 1078873.455530474, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 156, "n_parameters": 94210944}
159
+ {"train_lr": 7.002683394959675e-05, "train_min_lr": 7.002683394959675e-05, "train_loss": 0.6447277806564728, "train_loss_scale": 1111537.8997742664, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 157, "n_parameters": 94210944}
160
+ {"train_lr": 6.713645795375859e-05, "train_min_lr": 6.713645795375859e-05, "train_loss": 0.64443131204417, "train_loss_scale": 1072245.887133183, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 158, "n_parameters": 94210944}
161
+ {"train_lr": 6.430254548848465e-05, "train_min_lr": 6.430254548848465e-05, "train_loss": 0.6436538639829471, "train_loss_scale": 1115798.4794582392, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 159, "n_parameters": 94210944}
162
+ {"train_lr": 6.15259597909645e-05, "train_min_lr": 6.15259597909645e-05, "train_loss": 0.6435108510184503, "train_loss_scale": 1145622.5372460496, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 160, "n_parameters": 94210944}
163
+ {"train_lr": 5.880754663609925e-05, "train_min_lr": 5.880754663609925e-05, "train_loss": 0.6431580853011485, "train_loss_scale": 1378060.8288939053, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.13440152481366496, "epoch": 161, "n_parameters": 94210944}
164
+ {"train_lr": 5.614813407886952e-05, "train_min_lr": 5.614813407886952e-05, "train_loss": 0.643163060590322, "train_loss_scale": 1109644.3088036117, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 162, "n_parameters": 94210944}
165
+ {"train_lr": 5.354853220210244e-05, "train_min_lr": 5.354853220210244e-05, "train_loss": 0.642778804558529, "train_loss_scale": 1191068.7205417608, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.1365930774007371, "epoch": 163, "n_parameters": 94210944}
166
+ {"train_lr": 5.1009532869712974e-05, "train_min_lr": 5.1009532869712974e-05, "train_loss": 0.6424611435046853, "train_loss_scale": 1156510.6853273138, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 164, "n_parameters": 94210944}
167
+ {"train_lr": 4.853190948549334e-05, "train_min_lr": 4.853190948549334e-05, "train_loss": 0.6423653847174235, "train_loss_scale": 1144202.3440180586, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 165, "n_parameters": 94210944}
168
+ {"train_lr": 4.6116416757527574e-05, "train_min_lr": 4.6116416757527574e-05, "train_loss": 0.6418563195464304, "train_loss_scale": 1208111.0392776523, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 166, "n_parameters": 94210944}
169
+ {"train_lr": 4.376379046830018e-05, "train_min_lr": 4.376379046830018e-05, "train_loss": 0.6417000893747296, "train_loss_scale": 1200536.6753950338, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 167, "n_parameters": 94210944}
170
+ {"train_lr": 4.147474725056922e-05, "train_min_lr": 4.147474725056922e-05, "train_loss": 0.641350346366385, "train_loss_scale": 1110117.7065462754, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 168, "n_parameters": 94210944}
171
+ {"train_lr": 3.924998436907318e-05, "train_min_lr": 3.924998436907318e-05, "train_loss": 0.6410454075035069, "train_loss_scale": 1264682.0695259594, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 169, "n_parameters": 94210944}
172
+ {"train_lr": 3.70901795081376e-05, "train_min_lr": 3.70901795081376e-05, "train_loss": 0.6407260552788696, "train_loss_scale": 809273.4410835215, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.14309204199580522, "epoch": 170, "n_parameters": 94210944}
173
+ {"train_lr": 3.4995990565245815e-05, "train_min_lr": 3.4995990565245815e-05, "train_loss": 0.6404957156922694, "train_loss_scale": 1088341.4103837472, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 171, "n_parameters": 94210944}
174
+ {"train_lr": 3.296805545063644e-05, "train_min_lr": 3.296805545063644e-05, "train_loss": 0.640295142719089, "train_loss_scale": 1524814.1291196388, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 172, "n_parameters": 94210944}
175
+ {"train_lr": 3.100699189299038e-05, "train_min_lr": 3.100699189299038e-05, "train_loss": 0.6398682242423364, "train_loss_scale": 1407884.8866817155, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.1446373401659606, "epoch": 173, "n_parameters": 94210944}
176
+ {"train_lr": 2.911339725126477e-05, "train_min_lr": 2.911339725126477e-05, "train_loss": 0.6397035405376157, "train_loss_scale": 1275333.5187358917, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 174, "n_parameters": 94210944}
177
+ {"train_lr": 2.728784833273107e-05, "train_min_lr": 2.728784833273107e-05, "train_loss": 0.6395402690963337, "train_loss_scale": 1196749.4934537245, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 175, "n_parameters": 94210944}
178
+ {"train_lr": 2.553090121727422e-05, "train_min_lr": 2.553090121727422e-05, "train_loss": 0.6394635571818589, "train_loss_scale": 1101596.5471783297, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 176, "n_parameters": 94210944}
179
+ {"train_lr": 2.3843091088005677e-05, "train_min_lr": 2.3843091088005677e-05, "train_loss": 0.6391063202651844, "train_loss_scale": 802409.1738148985, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 177, "n_parameters": 94210944}
180
+ {"train_lr": 2.2224932068240927e-05, "train_min_lr": 2.2224932068240927e-05, "train_loss": 0.6387539982156614, "train_loss_scale": 878626.2103837471, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.14817266714155808, "epoch": 178, "n_parameters": 94210944}
181
+ {"train_lr": 2.0676917064892832e-05, "train_min_lr": 2.0676917064892832e-05, "train_loss": 0.6385664638560727, "train_loss_scale": 1093075.3878103837, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 179, "n_parameters": 94210944}
182
+ {"train_lr": 1.9199517618327453e-05, "train_min_lr": 1.9199517618327453e-05, "train_loss": 0.6383920692361504, "train_loss_scale": 1090235.0013544017, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 180, "n_parameters": 94210944}
183
+ {"train_lr": 1.7793183758728046e-05, "train_min_lr": 1.7793183758728046e-05, "train_loss": 0.6382939946738915, "train_loss_scale": 1132367.4004514674, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 181, "n_parameters": 94210944}
184
+ {"train_lr": 1.6458343869011706e-05, "train_min_lr": 1.6458343869011706e-05, "train_loss": 0.6380631898044195, "train_loss_scale": 1132840.7981941309, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 182, "n_parameters": 94210944}
185
+ {"train_lr": 1.5195404554339714e-05, "train_min_lr": 1.5195404554339714e-05, "train_loss": 0.6378268349890516, "train_loss_scale": 1312731.9404063206, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 183, "n_parameters": 94210944}
186
+ {"train_lr": 1.4004750518261729e-05, "train_min_lr": 1.4004750518261729e-05, "train_loss": 0.6376773391499208, "train_loss_scale": 1070352.2961625282, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 184, "n_parameters": 94210944}
187
+ {"train_lr": 1.288674444553118e-05, "train_min_lr": 1.288674444553118e-05, "train_loss": 0.6374871199206359, "train_loss_scale": 1128580.2185101581, "train_weight_decay": 0.049999999999998136, "train_grad_norm": NaN, "epoch": 185, "n_parameters": 94210944}
188
+ {"train_lr": 1.1841726891628337e-05, "train_min_lr": 1.1841726891628337e-05, "train_loss": 0.6375138560549669, "train_loss_scale": 1120532.456884876, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 186, "n_parameters": 94210944}
189
+ {"train_lr": 1.0870016179023307e-05, "train_min_lr": 1.0870016179023307e-05, "train_loss": 0.6372756450493772, "train_loss_scale": 1076743.1656884875, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 187, "n_parameters": 94210944}
190
+ {"train_lr": 9.971908300212377e-06, "train_min_lr": 9.971908300212377e-06, "train_loss": 0.637025338862066, "train_loss_scale": 599084.8433408578, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.15356421128218115, "epoch": 188, "n_parameters": 94210944}
191
+ {"train_lr": 9.147676827555565e-06, "train_min_lr": 9.147676827555565e-06, "train_loss": 0.6369670491976743, "train_loss_scale": 1299950.201354402, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.1544346490038168, "epoch": 189, "n_parameters": 94210944}
192
+ {"train_lr": 8.397572829943962e-06, "train_min_lr": 8.397572829943962e-06, "train_loss": 0.6368464512628572, "train_loss_scale": 1089288.2058690744, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 190, "n_parameters": 94210944}
193
+ {"train_lr": 7.721824796321671e-06, "train_min_lr": 7.721824796321671e-06, "train_loss": 0.6369289904563744, "train_loss_scale": 882650.0911963882, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 191, "n_parameters": 94210944}
194
+ {"train_lr": 7.120638566085852e-06, "train_min_lr": 7.120638566085852e-06, "train_loss": 0.6367962299263773, "train_loss_scale": 741104.1661399548, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.1555945662726936, "epoch": 192, "n_parameters": 94210944}
195
+ {"train_lr": 6.594197266386048e-06, "train_min_lr": 6.594197266386048e-06, "train_loss": 0.6366888422157372, "train_loss_scale": 1147989.5259593679, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 193, "n_parameters": 94210944}
196
+ {"train_lr": 6.142661256341902e-06, "train_min_lr": 6.142661256341902e-06, "train_loss": 0.6365773241016719, "train_loss_scale": 1075559.6713318285, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 194, "n_parameters": 94210944}
197
+ {"train_lr": 5.766168078196202e-06, "train_min_lr": 5.766168078196202e-06, "train_loss": 0.6364746647649371, "train_loss_scale": 1097335.9674943567, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 195, "n_parameters": 94210944}
198
+ {"train_lr": 5.464832415418173e-06, "train_min_lr": 5.464832415418173e-06, "train_loss": 0.6364539896089659, "train_loss_scale": 1112484.6952595937, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 196, "n_parameters": 94210944}
199
+ {"train_lr": 5.238746057769839e-06, "train_min_lr": 5.238746057769839e-06, "train_loss": 0.6363296502768051, "train_loss_scale": 586539.8031602709, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.1590290876967374, "epoch": 197, "n_parameters": 94210944}
200
+ {"train_lr": 5.08797787334586e-06, "train_min_lr": 5.08797787334586e-06, "train_loss": 0.6362864567433604, "train_loss_scale": 659443.055530474, "train_weight_decay": 0.049999999999998136, "train_grad_norm": Infinity, "epoch": 198, "n_parameters": 94210944}
201
+ {"train_lr": 5.012573787595717e-06, "train_min_lr": 5.012573787595717e-06, "train_loss": 0.6363362485042544, "train_loss_scale": 964311.2018058691, "train_weight_decay": 0.049999999999998136, "train_grad_norm": 0.15975790785680238, "epoch": 199, "n_parameters": 94210944}
pretrain_videomae_base_patch16_224_tiktokactions/vit_b_tta_pretrain.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:706d28f103c57057bd29d028b88950766c3a2ee8632c3f3f12fb575b8d7e867f
3
+ size 1130797105