dada22231 commited on
Commit
72fb7c6
1 Parent(s): 5790125

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e8f834d42621a36d64c6d12be33beb7dea9ce22871ef58eb3ce0dafd962c624
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f046435dabb221da39b147c8cfb3f11da3b67e8e8051a4fc212f60bed1083d2e
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfa527998caa0c9d8d6f044711a9ff04addca63138395cb122820a013c42bc7f
3
  size 671466706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbea3b3ffe0e52f2fdc4228af1355339af573de3f655b96dba4b57a820d46a32
3
  size 671466706
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f06d11f5bd9ab818954d1d9cc87411a300635525aca8758d1a9cb915ffd4fcb8
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed42de57561b54a72d8064407f8f024c0714e2e87ab15e261d34ed1acad4c59c
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03f6f8d785f55aa86241bd7662c3d0954f8429d567efeefbe0f7b4dbc579afb2
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e523bdf8632f90f13687d216d1f57f673bf97c6dd57de7836b2f9680d43301fc
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d5cc0abc20eeae957058c17a017494bee23580826e783b8089b5ead7d6012d7
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffafedff9859b32355f15c8d599f6a98e106616c6caf41b12332d77fd8f27284
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:329910af47f2bee64b83832e563a6d744fe8cb4888e88d29296e590af4864055
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47a306e34ce9d992d69699f561634adebe4fbefa2de06dcb48c8b7af575a7075
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d271cdb95f63cd655315f063ca2e25c78dc5ae4275523c5d4f80f367586b3351
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5607f6de446164d9d9adb8b91c44cec55b14aa391e24ba5637c08b834eedda2a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.51513200257566,
5
  "eval_steps": 25,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -198,6 +198,189 @@
198
  "eval_samples_per_second": 14.312,
199
  "eval_steps_per_second": 3.721,
200
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  }
202
  ],
203
  "logging_steps": 1,
@@ -212,12 +395,12 @@
212
  "should_evaluate": false,
213
  "should_log": false,
214
  "should_save": true,
215
- "should_training_stop": false
216
  },
217
  "attributes": {}
218
  }
219
  },
220
- "total_flos": 2.984041808658432e+17,
221
  "train_batch_size": 1,
222
  "trial_name": null,
223
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.039278815196394,
5
  "eval_steps": 25,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
198
  "eval_samples_per_second": 14.312,
199
  "eval_steps_per_second": 3.721,
200
  "step": 25
201
+ },
202
+ {
203
+ "epoch": 0.5357372826786864,
204
+ "grad_norm": 1.0864949226379395,
205
+ "learning_rate": 5.500000000000001e-05,
206
+ "loss": 1.3438,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 0.5563425627817128,
211
+ "grad_norm": 1.1756632328033447,
212
+ "learning_rate": 5.205685918464356e-05,
213
+ "loss": 1.475,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 0.5769478428847392,
218
+ "grad_norm": 1.0386406183242798,
219
+ "learning_rate": 4.912632135009769e-05,
220
+ "loss": 1.3067,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 0.5975531229877656,
225
+ "grad_norm": 1.084520697593689,
226
+ "learning_rate": 4.6220935509274235e-05,
227
+ "loss": 1.3079,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 0.6181584030907921,
232
+ "grad_norm": 1.199838638305664,
233
+ "learning_rate": 4.3353142970386564e-05,
234
+ "loss": 1.2165,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 0.6387636831938184,
239
+ "grad_norm": 1.2528841495513916,
240
+ "learning_rate": 4.053522406135775e-05,
241
+ "loss": 1.2907,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 0.6593689632968448,
246
+ "grad_norm": 1.1232812404632568,
247
+ "learning_rate": 3.777924554357096e-05,
248
+ "loss": 1.2557,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 0.6799742433998712,
253
+ "grad_norm": 1.1742340326309204,
254
+ "learning_rate": 3.509700894014496e-05,
255
+ "loss": 1.2703,
256
+ "step": 33
257
+ },
258
+ {
259
+ "epoch": 0.7005795235028976,
260
+ "grad_norm": 1.1688557863235474,
261
+ "learning_rate": 3.250000000000001e-05,
262
+ "loss": 1.2046,
263
+ "step": 34
264
+ },
265
+ {
266
+ "epoch": 0.721184803605924,
267
+ "grad_norm": 1.1665050983428955,
268
+ "learning_rate": 2.9999339514117912e-05,
269
+ "loss": 1.1234,
270
+ "step": 35
271
+ },
272
+ {
273
+ "epoch": 0.7417900837089504,
274
+ "grad_norm": 1.281623125076294,
275
+ "learning_rate": 2.760573569460757e-05,
276
+ "loss": 1.1562,
277
+ "step": 36
278
+ },
279
+ {
280
+ "epoch": 0.7623953638119768,
281
+ "grad_norm": 1.1950199604034424,
282
+ "learning_rate": 2.53294383204969e-05,
283
+ "loss": 1.4527,
284
+ "step": 37
285
+ },
286
+ {
287
+ "epoch": 0.7830006439150032,
288
+ "grad_norm": 1.2145071029663086,
289
+ "learning_rate": 2.3180194846605367e-05,
290
+ "loss": 1.4938,
291
+ "step": 38
292
+ },
293
+ {
294
+ "epoch": 0.8036059240180297,
295
+ "grad_norm": 1.1486097574234009,
296
+ "learning_rate": 2.1167208663446025e-05,
297
+ "loss": 1.3334,
298
+ "step": 39
299
+ },
300
+ {
301
+ "epoch": 0.824211204121056,
302
+ "grad_norm": 1.0132158994674683,
303
+ "learning_rate": 1.9299099686894423e-05,
304
+ "loss": 1.3069,
305
+ "step": 40
306
+ },
307
+ {
308
+ "epoch": 0.8448164842240824,
309
+ "grad_norm": 1.069280743598938,
310
+ "learning_rate": 1.758386744638546e-05,
311
+ "loss": 1.2995,
312
+ "step": 41
313
+ },
314
+ {
315
+ "epoch": 0.8654217643271088,
316
+ "grad_norm": 1.0366171598434448,
317
+ "learning_rate": 1.602885682970026e-05,
318
+ "loss": 1.1784,
319
+ "step": 42
320
+ },
321
+ {
322
+ "epoch": 0.8860270444301352,
323
+ "grad_norm": 0.9968025088310242,
324
+ "learning_rate": 1.464072663102903e-05,
325
+ "loss": 1.1579,
326
+ "step": 43
327
+ },
328
+ {
329
+ "epoch": 0.9066323245331617,
330
+ "grad_norm": 1.0707521438598633,
331
+ "learning_rate": 1.3425421036992098e-05,
332
+ "loss": 1.2042,
333
+ "step": 44
334
+ },
335
+ {
336
+ "epoch": 0.927237604636188,
337
+ "grad_norm": 1.0155386924743652,
338
+ "learning_rate": 1.2388144172720251e-05,
339
+ "loss": 1.164,
340
+ "step": 45
341
+ },
342
+ {
343
+ "epoch": 0.9478428847392144,
344
+ "grad_norm": 1.0132240056991577,
345
+ "learning_rate": 1.1533337816991932e-05,
346
+ "loss": 1.1677,
347
+ "step": 46
348
+ },
349
+ {
350
+ "epoch": 0.9684481648422408,
351
+ "grad_norm": 1.047545075416565,
352
+ "learning_rate": 1.0864662381854632e-05,
353
+ "loss": 1.0935,
354
+ "step": 47
355
+ },
356
+ {
357
+ "epoch": 0.9890534449452673,
358
+ "grad_norm": 1.1519137620925903,
359
+ "learning_rate": 1.0384981238178534e-05,
360
+ "loss": 1.0777,
361
+ "step": 48
362
+ },
363
+ {
364
+ "epoch": 1.0186735350933678,
365
+ "grad_norm": 3.185882806777954,
366
+ "learning_rate": 1.0096348454262845e-05,
367
+ "loss": 2.4695,
368
+ "step": 49
369
+ },
370
+ {
371
+ "epoch": 1.039278815196394,
372
+ "grad_norm": 1.053970456123352,
373
+ "learning_rate": 1e-05,
374
+ "loss": 1.3533,
375
+ "step": 50
376
+ },
377
+ {
378
+ "epoch": 1.039278815196394,
379
+ "eval_loss": 1.2204114198684692,
380
+ "eval_runtime": 3.4847,
381
+ "eval_samples_per_second": 14.348,
382
+ "eval_steps_per_second": 3.731,
383
+ "step": 50
384
  }
385
  ],
386
  "logging_steps": 1,
 
395
  "should_evaluate": false,
396
  "should_log": false,
397
  "should_save": true,
398
+ "should_training_stop": true
399
  },
400
  "attributes": {}
401
  }
402
  },
403
+ "total_flos": 5.968083617316864e+17,
404
  "train_batch_size": 1,
405
  "trial_name": null,
406
  "trial_params": null