Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3fb264e9aacf5ef9356c846bc014fb11cb3495daff68ebfec9001f611c4f1304
 size 522227376

 version https://git-lfs.github.com/spec/v1
+oid sha256:5f7faaf0bc6dac6049f37f87c33fe8c933a82138fc1080ba1670ec713471c635
 size 522227376

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bfddbedd3f442bd455a5b9978d25b76d652fa8ae8dfd01f3d31fc08042851370
 size 1044597498

 version https://git-lfs.github.com/spec/v1
+oid sha256:5fcc759905c574245e98d0993ca268bc91db8de18efb1af55d228b0b70e73dc4
 size 1044597498

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:45fedd8c260c32228ad2c8b4d30e6b751b22cbb49fa7da1462ca78efd6942498
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:72217cd5204ddcebe28b7c1e0c0ca3a07c776eb9b8dd6e6509adae29fdf64235
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d90116c540b4ff0066495fbccc9c914a568905fb44c6564f227952cc4231b00
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:03ad66011cfc1fc727a51190602a41adc332b48eeef62a5ee87c2ca9f9b90b2b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.9813548922538757,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.045004500450045004,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 4.337,
       "eval_steps_per_second": 1.084,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.3211020214272e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.933211624622345,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.09000900090009001,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 4.337,
       "eval_steps_per_second": 1.084,
       "step": 50
+    },
+    {
+      "epoch": 0.04590459045904591,
+      "grad_norm": 5.705034255981445,
+      "learning_rate": 2.3816778784387097e-05,
+      "loss": 1.3283,
+      "step": 51
+    },
+    {
+      "epoch": 0.046804680468046804,
+      "grad_norm": 3.1250901222229004,
+      "learning_rate": 2.3263454721781537e-05,
+      "loss": 1.1372,
+      "step": 52
+    },
+    {
+      "epoch": 0.04770477047704771,
+      "grad_norm": 3.5653185844421387,
+      "learning_rate": 2.2693489161088592e-05,
+      "loss": 1.1422,
+      "step": 53
+    },
+    {
+      "epoch": 0.048604860486048604,
+      "grad_norm": 2.5940499305725098,
+      "learning_rate": 2.210802993709498e-05,
+      "loss": 1.0782,
+      "step": 54
+    },
+    {
+      "epoch": 0.04950495049504951,
+      "grad_norm": 1.700508713722229,
+      "learning_rate": 2.1508256086763372e-05,
+      "loss": 0.8352,
+      "step": 55
+    },
+    {
+      "epoch": 0.050405040504050404,
+      "grad_norm": 1.5756418704986572,
+      "learning_rate": 2.0895375474808857e-05,
+      "loss": 1.3823,
+      "step": 56
+    },
+    {
+      "epoch": 0.05130513051305131,
+      "grad_norm": 1.4209038019180298,
+      "learning_rate": 2.0270622361220143e-05,
+      "loss": 1.1883,
+      "step": 57
+    },
+    {
+      "epoch": 0.052205220522052204,
+      "grad_norm": 1.3939372301101685,
+      "learning_rate": 1.963525491562421e-05,
+      "loss": 1.0329,
+      "step": 58
+    },
+    {
+      "epoch": 0.05310531053105311,
+      "grad_norm": 1.2505179643630981,
+      "learning_rate": 1.8990552683500128e-05,
+      "loss": 1.2086,
+      "step": 59
+    },
+    {
+      "epoch": 0.054005400540054004,
+      "grad_norm": 1.3785752058029175,
+      "learning_rate": 1.8337814009344716e-05,
+      "loss": 1.0227,
+      "step": 60
+    },
+    {
+      "epoch": 0.05490549054905491,
+      "grad_norm": 1.367503046989441,
+      "learning_rate": 1.767835342197955e-05,
+      "loss": 1.609,
+      "step": 61
+    },
+    {
+      "epoch": 0.0558055805580558,
+      "grad_norm": 1.2117955684661865,
+      "learning_rate": 1.7013498987264832e-05,
+      "loss": 1.3177,
+      "step": 62
+    },
+    {
+      "epoch": 0.05670567056705671,
+      "grad_norm": 1.0135844945907593,
+      "learning_rate": 1.6344589633551502e-05,
+      "loss": 1.4245,
+      "step": 63
+    },
+    {
+      "epoch": 0.0576057605760576,
+      "grad_norm": 1.1287009716033936,
+      "learning_rate": 1.5672972455257726e-05,
+      "loss": 1.5013,
+      "step": 64
+    },
+    {
+      "epoch": 0.05850585058505851,
+      "grad_norm": 1.2982141971588135,
+      "learning_rate": 1.5e-05,
+      "loss": 1.5079,
+      "step": 65
+    },
+    {
+      "epoch": 0.0594059405940594,
+      "grad_norm": 1.1244807243347168,
+      "learning_rate": 1.4327027544742281e-05,
+      "loss": 2.046,
+      "step": 66
+    },
+    {
+      "epoch": 0.06030603060306031,
+      "grad_norm": 0.9998478293418884,
+      "learning_rate": 1.36554103664485e-05,
+      "loss": 1.3912,
+      "step": 67
+    },
+    {
+      "epoch": 0.0612061206120612,
+      "grad_norm": 1.026103138923645,
+      "learning_rate": 1.2986501012735174e-05,
+      "loss": 1.6617,
+      "step": 68
+    },
+    {
+      "epoch": 0.062106210621062106,
+      "grad_norm": 1.0572633743286133,
+      "learning_rate": 1.2321646578020452e-05,
+      "loss": 1.7724,
+      "step": 69
+    },
+    {
+      "epoch": 0.063006300630063,
+      "grad_norm": 1.105764389038086,
+      "learning_rate": 1.1662185990655285e-05,
+      "loss": 1.4844,
+      "step": 70
+    },
+    {
+      "epoch": 0.0639063906390639,
+      "grad_norm": 0.9881569147109985,
+      "learning_rate": 1.1009447316499875e-05,
+      "loss": 1.5753,
+      "step": 71
+    },
+    {
+      "epoch": 0.06480648064806481,
+      "grad_norm": 0.9760916829109192,
+      "learning_rate": 1.036474508437579e-05,
+      "loss": 1.7407,
+      "step": 72
+    },
+    {
+      "epoch": 0.06570657065706571,
+      "grad_norm": 0.998174250125885,
+      "learning_rate": 9.729377638779859e-06,
+      "loss": 1.734,
+      "step": 73
+    },
+    {
+      "epoch": 0.0666066606660666,
+      "grad_norm": 0.9511818289756775,
+      "learning_rate": 9.104624525191147e-06,
+      "loss": 1.6426,
+      "step": 74
+    },
+    {
+      "epoch": 0.0675067506750675,
+      "grad_norm": 0.995963454246521,
+      "learning_rate": 8.491743913236629e-06,
+      "loss": 1.8299,
+      "step": 75
+    },
+    {
+      "epoch": 0.06840684068406841,
+      "grad_norm": 0.9901272058486938,
+      "learning_rate": 7.89197006290502e-06,
+      "loss": 1.8434,
+      "step": 76
+    },
+    {
+      "epoch": 0.06930693069306931,
+      "grad_norm": 1.0815800428390503,
+      "learning_rate": 7.30651083891141e-06,
+      "loss": 2.1484,
+      "step": 77
+    },
+    {
+      "epoch": 0.0702070207020702,
+      "grad_norm": 1.1151647567749023,
+      "learning_rate": 6.736545278218464e-06,
+      "loss": 2.0096,
+      "step": 78
+    },
+    {
+      "epoch": 0.0711071107110711,
+      "grad_norm": 0.9471780061721802,
+      "learning_rate": 6.1832212156129045e-06,
+      "loss": 1.8158,
+      "step": 79
+    },
+    {
+      "epoch": 0.07200720072007201,
+      "grad_norm": 0.8763648271560669,
+      "learning_rate": 5.647652972118998e-06,
+      "loss": 1.5679,
+      "step": 80
+    },
+    {
+      "epoch": 0.07290729072907291,
+      "grad_norm": 1.0473607778549194,
+      "learning_rate": 5.130919110904311e-06,
+      "loss": 1.9162,
+      "step": 81
+    },
+    {
+      "epoch": 0.0738073807380738,
+      "grad_norm": 0.9801396727561951,
+      "learning_rate": 4.6340602651970304e-06,
+      "loss": 1.95,
+      "step": 82
+    },
+    {
+      "epoch": 0.0747074707470747,
+      "grad_norm": 1.2046276330947876,
+      "learning_rate": 4.158077042589129e-06,
+      "loss": 2.0275,
+      "step": 83
+    },
+    {
+      "epoch": 0.07560756075607561,
+      "grad_norm": 1.0073298215866089,
+      "learning_rate": 3.7039280099458373e-06,
+      "loss": 1.8993,
+      "step": 84
+    },
+    {
+      "epoch": 0.07650765076507651,
+      "grad_norm": 0.9107989072799683,
+      "learning_rate": 3.272527762979553e-06,
+      "loss": 1.8347,
+      "step": 85
+    },
+    {
+      "epoch": 0.0774077407740774,
+      "grad_norm": 1.0077648162841797,
+      "learning_rate": 2.86474508437579e-06,
+      "loss": 2.1918,
+      "step": 86
+    },
+    {
+      "epoch": 0.0783078307830783,
+      "grad_norm": 0.9970296621322632,
+      "learning_rate": 2.4814011941804603e-06,
+      "loss": 2.0114,
+      "step": 87
+    },
+    {
+      "epoch": 0.07920792079207921,
+      "grad_norm": 0.9443178176879883,
+      "learning_rate": 2.1232680959720085e-06,
+      "loss": 1.8943,
+      "step": 88
+    },
+    {
+      "epoch": 0.08010801080108011,
+      "grad_norm": 1.1723785400390625,
+      "learning_rate": 1.79106702214893e-06,
+      "loss": 2.5215,
+      "step": 89
+    },
+    {
+      "epoch": 0.081008100810081,
+      "grad_norm": 1.1029690504074097,
+      "learning_rate": 1.4854669814637145e-06,
+      "loss": 2.9212,
+      "step": 90
+    },
+    {
+      "epoch": 0.0819081908190819,
+      "grad_norm": 0.8593942523002625,
+      "learning_rate": 1.2070834117282414e-06,
+      "loss": 1.9948,
+      "step": 91
+    },
+    {
+      "epoch": 0.08280828082808281,
+      "grad_norm": 1.2536730766296387,
+      "learning_rate": 9.56476940403942e-07,
+      "loss": 2.9596,
+      "step": 92
+    },
+    {
+      "epoch": 0.08370837083708371,
+      "grad_norm": 1.1047546863555908,
+      "learning_rate": 7.341522555726971e-07,
+      "loss": 3.2272,
+      "step": 93
+    },
+    {
+      "epoch": 0.0846084608460846,
+      "grad_norm": 1.0855673551559448,
+      "learning_rate": 5.405570895622014e-07,
+      "loss": 3.1168,
+      "step": 94
+    },
+    {
+      "epoch": 0.0855085508550855,
+      "grad_norm": 1.0301321744918823,
+      "learning_rate": 3.760813172726457e-07,
+      "loss": 2.7688,
+      "step": 95
+    },
+    {
+      "epoch": 0.08640864086408641,
+      "grad_norm": 0.9736518859863281,
+      "learning_rate": 2.41056171020555e-07,
+      "loss": 2.6513,
+      "step": 96
+    },
+    {
+      "epoch": 0.08730873087308731,
+      "grad_norm": 1.150550365447998,
+      "learning_rate": 1.357535734809795e-07,
+      "loss": 3.3847,
+      "step": 97
+    },
+    {
+      "epoch": 0.08820882088208822,
+      "grad_norm": 1.075913906097412,
+      "learning_rate": 6.038559007141397e-08,
+      "loss": 2.7569,
+      "step": 98
+    },
+    {
+      "epoch": 0.0891089108910891,
+      "grad_norm": 1.1728016138076782,
+      "learning_rate": 1.510400188028116e-08,
+      "loss": 2.8988,
+      "step": 99
+    },
+    {
+      "epoch": 0.09000900090009001,
+      "grad_norm": 1.0802109241485596,
+      "learning_rate": 0.0,
+      "loss": 2.9239,
+      "step": 100
+    },
+    {
+      "epoch": 0.09000900090009001,
+      "eval_loss": 0.933211624622345,
+      "eval_runtime": 107.8918,
+      "eval_samples_per_second": 4.338,
+      "eval_steps_per_second": 1.084,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.6422040428544e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null