jssky commited on
Commit
cfdd947
·
verified ·
1 Parent(s): b28e495

Training in progress, step 32, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e9647282502e43ed681430e77823b4ba804e714b8401e5bf5d12cf717c91f0d
3
  size 239536272
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e7a5c80b259ec1b4717d71fee50ed46000459382a99c2cea50a99f7a52f2f96
3
  size 239536272
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d44c4094b530e951087f57d38dc24baf31ab1cafec4c1d70c9e8554ddfe8702
3
  size 479362682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b38b26a07a23bb8971ba6a069540914cf8be302f2205ecfaf24a07b6c3d166f6
3
  size 479362682
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a7f529fa0ca46a2fc7df048e62f4a6a160ee0abc9228610baf1f873e581b575
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8b83d60e3bd7c770c94c7581c71569dd7212f834da6dd4807638eee44db31a8
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d7dfe37fd91c64517ee3e36c11c3fae0466d740fca64d2cac13b48d07f91cac
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fce5ad4cacf86f298445bc321ddca0c2867c0596bccb538356cad22111ad6ee
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0b0120a70950f4d7cbc6380fce58797e960908be06fcb451a5a9265f2ac3436
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccab18dc58cf8768b739d1ca503830678fbafcb8080ce561bbcfd691e8d76982
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dab923b8546218d478e39f5a5fe9230eae2e7db16d726a0af741de61e1ae86b2
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32a98567e49c9f240d2b97202509959994309a2039698fdd420b0fc60f705020
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6b21ff4242f9f91d137e15c0f4fbae1aea3032edb3ddf39816bcc96ae6607b8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a387b210316633d05904a757c86d6d5b1f723a9084c82125f530fbd1f5f4ae32
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.6539015173912048,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-25",
4
- "epoch": 2.364705882352941,
5
  "eval_steps": 25,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -198,6 +198,55 @@
198
  "eval_samples_per_second": 57.894,
199
  "eval_steps_per_second": 7.237,
200
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  }
202
  ],
203
  "logging_steps": 1,
@@ -221,12 +270,12 @@
221
  "should_evaluate": false,
222
  "should_log": false,
223
  "should_save": true,
224
- "should_training_stop": false
225
  },
226
  "attributes": {}
227
  }
228
  },
229
- "total_flos": 1.1201992933874074e+17,
230
  "train_batch_size": 2,
231
  "trial_name": null,
232
  "trial_params": null
 
1
  {
2
  "best_metric": 0.6539015173912048,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-25",
4
+ "epoch": 3.0294117647058822,
5
  "eval_steps": 25,
6
+ "global_step": 32,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
198
  "eval_samples_per_second": 57.894,
199
  "eval_steps_per_second": 7.237,
200
  "step": 25
201
+ },
202
+ {
203
+ "epoch": 2.458823529411765,
204
+ "grad_norm": 0.14100024104118347,
205
+ "learning_rate": 9.549150281252633e-06,
206
+ "loss": 0.5238,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 2.552941176470588,
211
+ "grad_norm": 0.09745590388774872,
212
+ "learning_rate": 6.698729810778065e-06,
213
+ "loss": 0.4364,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 2.6470588235294117,
218
+ "grad_norm": 0.12731899321079254,
219
+ "learning_rate": 4.322727117869951e-06,
220
+ "loss": 0.4703,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 2.7411764705882353,
225
+ "grad_norm": 0.10433598607778549,
226
+ "learning_rate": 2.4471741852423237e-06,
227
+ "loss": 0.3853,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 2.835294117647059,
232
+ "grad_norm": 0.11452948302030563,
233
+ "learning_rate": 1.0926199633097157e-06,
234
+ "loss": 0.5038,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 2.9294117647058826,
239
+ "grad_norm": 0.13775578141212463,
240
+ "learning_rate": 2.7390523158633554e-07,
241
+ "loss": 0.5232,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 3.0294117647058822,
246
+ "grad_norm": 0.14986667037010193,
247
+ "learning_rate": 0.0,
248
+ "loss": 0.6791,
249
+ "step": 32
250
  }
251
  ],
252
  "logging_steps": 1,
 
270
  "should_evaluate": false,
271
  "should_log": false,
272
  "should_save": true,
273
+ "should_training_stop": true
274
  },
275
  "attributes": {}
276
  }
277
  },
278
+ "total_flos": 1.4336879040554598e+17,
279
  "train_batch_size": 2,
280
  "trial_name": null,
281
  "trial_params": null