kooff11 commited on
Commit
0dd45cd
·
verified ·
1 Parent(s): 222f917

Training in progress, step 36, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dd5f0c8fe5d8de2d7c86300e4683230c6f6bdfe172fc1b08a6bb0547632b0ee
3
  size 2264640
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bd9a0b26c05777e154789f438aca6ab389a4b4e7f14e9e46a64a61ba224e1f9
3
  size 2264640
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:003e14264bc10d7bf94a2ca481eace96bd4bf67a4383fef49980269dd9687bd6
3
  size 1183674
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f61e2195bb620cac6484901a2dedffd79474f7149b90766f7b4360f25a8ea1b1
3
  size 1183674
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d25eb66897bc9db464c68a2f7e7d3cb410ee7dcd3fd0e1db4ed0c22dbb4df02
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42c1902870c57878fe9c203bc060ac2f19e644f979d654d1455c49b7c7e2ce2c
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e25540055d73f0c84acced376b2284a3c9b0e959187ff9560a1efc5e7517001
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e1bd9b6d8210f2244258171cb8c34e568e2483850481e01d45c1d72bc4295d0
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33ef6f4b0dc1a0ee466ac9818efdab5291728661390e42389cd9c4df42291c75
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28651c349f447d90467a1ab072123a42ef3fcd558c7e8a7b853a87ba29324613
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7526132404181185,
5
  "eval_steps": 9,
6
- "global_step": 27,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -228,6 +228,77 @@
228
  "eval_samples_per_second": 191.767,
229
  "eval_steps_per_second": 48.338,
230
  "step": 27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  }
232
  ],
233
  "logging_steps": 1,
@@ -242,12 +313,12 @@
242
  "should_evaluate": false,
243
  "should_log": false,
244
  "should_save": true,
245
- "should_training_stop": false
246
  },
247
  "attributes": {}
248
  }
249
  },
250
- "total_flos": 4602071416307712.0,
251
  "train_batch_size": 2,
252
  "trial_name": null,
253
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0139372822299653,
5
  "eval_steps": 9,
6
+ "global_step": 36,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
228
  "eval_samples_per_second": 191.767,
229
  "eval_steps_per_second": 48.338,
230
  "step": 27
231
+ },
232
+ {
233
+ "epoch": 0.7804878048780488,
234
+ "grad_norm": 0.6679636836051941,
235
+ "learning_rate": 1.3049554138967051e-05,
236
+ "loss": 4.5055,
237
+ "step": 28
238
+ },
239
+ {
240
+ "epoch": 0.8083623693379791,
241
+ "grad_norm": 0.7194696068763733,
242
+ "learning_rate": 1.0099138635988026e-05,
243
+ "loss": 4.5841,
244
+ "step": 29
245
+ },
246
+ {
247
+ "epoch": 0.8362369337979094,
248
+ "grad_norm": 0.7062932848930359,
249
+ "learning_rate": 7.489143213519301e-06,
250
+ "loss": 4.502,
251
+ "step": 30
252
+ },
253
+ {
254
+ "epoch": 0.8641114982578397,
255
+ "grad_norm": 0.7180935740470886,
256
+ "learning_rate": 5.241835432246889e-06,
257
+ "loss": 4.4559,
258
+ "step": 31
259
+ },
260
+ {
261
+ "epoch": 0.89198606271777,
262
+ "grad_norm": 0.702731192111969,
263
+ "learning_rate": 3.376388529782215e-06,
264
+ "loss": 4.6249,
265
+ "step": 32
266
+ },
267
+ {
268
+ "epoch": 0.9198606271777003,
269
+ "grad_norm": 0.7095968127250671,
270
+ "learning_rate": 1.908717841359048e-06,
271
+ "loss": 4.4653,
272
+ "step": 33
273
+ },
274
+ {
275
+ "epoch": 0.9477351916376306,
276
+ "grad_norm": 0.7153111696243286,
277
+ "learning_rate": 8.513450158049108e-07,
278
+ "loss": 4.542,
279
+ "step": 34
280
+ },
281
+ {
282
+ "epoch": 0.975609756097561,
283
+ "grad_norm": 0.7444437146186829,
284
+ "learning_rate": 2.1329118524827662e-07,
285
+ "loss": 4.5022,
286
+ "step": 35
287
+ },
288
+ {
289
+ "epoch": 1.0139372822299653,
290
+ "grad_norm": 1.0557011365890503,
291
+ "learning_rate": 0.0,
292
+ "loss": 6.8891,
293
+ "step": 36
294
+ },
295
+ {
296
+ "epoch": 1.0139372822299653,
297
+ "eval_loss": 4.388754367828369,
298
+ "eval_runtime": 1.2674,
299
+ "eval_samples_per_second": 190.94,
300
+ "eval_steps_per_second": 48.13,
301
+ "step": 36
302
  }
303
  ],
304
  "logging_steps": 1,
 
313
  "should_evaluate": false,
314
  "should_log": false,
315
  "should_save": true,
316
+ "should_training_stop": true
317
  },
318
  "attributes": {}
319
  }
320
  },
321
+ "total_flos": 6149077112717312.0,
322
  "train_batch_size": 2,
323
  "trial_name": null,
324
  "trial_params": null