Amiran13 commited on
Commit
4d55c3c
1 Parent(s): be41ee7

Training in progress, step 9400, checkpoint

Browse files
checkpoint-9400/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c08b169dbc301670a5a7437983e2bdb309c761725c4ad00a4e504f279b35b470
3
  size 1261975580
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74fc93f66189b6167184fea73b2e23a9c55c2a1934f0bad20d21ddf0ea8fa2b2
3
  size 1261975580
checkpoint-9400/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24db415858cc06e5a8a6e3adb939b78fcf984184af2c12f89746aca356d5075d
3
  size 2490495926
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64c942ac8f092000faab3d1a2b8aca0f1e57ba65e6951fad7df8f8921102fc6e
3
  size 2490495926
checkpoint-9400/scheduler.pt CHANGED
Binary files a/checkpoint-9400/scheduler.pt and b/checkpoint-9400/scheduler.pt differ
 
checkpoint-9400/trainer_state.json CHANGED
@@ -6358,146 +6358,146 @@
6358
  },
6359
  {
6360
  "epoch": 15.0,
6361
- "learning_rate": 1.920199501246883e-05,
6362
- "loss": 9.0274,
6363
  "step": 9210
6364
  },
6365
  {
6366
  "epoch": 15.02,
6367
- "learning_rate": 1.8890274314214465e-05,
6368
- "loss": 4.9159,
6369
  "step": 9220
6370
  },
6371
  {
6372
  "epoch": 15.03,
6373
- "learning_rate": 1.8578553615960096e-05,
6374
- "loss": 3.9926,
6375
  "step": 9230
6376
  },
6377
  {
6378
  "epoch": 15.05,
6379
- "learning_rate": 1.8266832917705734e-05,
6380
- "loss": 3.6284,
6381
  "step": 9240
6382
  },
6383
  {
6384
  "epoch": 15.06,
6385
- "learning_rate": 1.795511221945137e-05,
6386
- "loss": 3.4183,
6387
  "step": 9250
6388
  },
6389
  {
6390
  "epoch": 15.08,
6391
- "learning_rate": 1.7643391521197004e-05,
6392
- "loss": 3.1265,
6393
  "step": 9260
6394
  },
6395
  {
6396
  "epoch": 15.1,
6397
- "learning_rate": 1.7331670822942642e-05,
6398
- "loss": 2.7977,
6399
  "step": 9270
6400
  },
6401
  {
6402
  "epoch": 15.11,
6403
- "learning_rate": 1.7019950124688277e-05,
6404
- "loss": 2.0169,
6405
  "step": 9280
6406
  },
6407
  {
6408
  "epoch": 15.13,
6409
- "learning_rate": 1.6708229426433912e-05,
6410
- "loss": 1.3587,
6411
  "step": 9290
6412
  },
6413
  {
6414
  "epoch": 15.15,
6415
- "learning_rate": 1.639650872817955e-05,
6416
- "loss": 0.9968,
6417
  "step": 9300
6418
  },
6419
  {
6420
  "epoch": 15.15,
6421
- "eval_loss": 0.7115535140037537,
6422
- "eval_runtime": 1033.0002,
6423
- "eval_samples_per_second": 11.238,
6424
- "eval_steps_per_second": 0.703,
6425
- "eval_wer": 0.9390480317474799,
6426
  "step": 9300
6427
  },
6428
  {
6429
  "epoch": 15.16,
6430
- "learning_rate": 1.6084788029925186e-05,
6431
- "loss": 0.7074,
6432
  "step": 9310
6433
  },
6434
  {
6435
  "epoch": 15.18,
6436
- "learning_rate": 1.577306733167082e-05,
6437
- "loss": 0.5215,
6438
  "step": 9320
6439
  },
6440
  {
6441
  "epoch": 15.19,
6442
- "learning_rate": 1.5461346633416455e-05,
6443
- "loss": 0.446,
6444
  "step": 9330
6445
  },
6446
  {
6447
  "epoch": 15.21,
6448
- "learning_rate": 1.5149625935162094e-05,
6449
- "loss": 0.3456,
6450
  "step": 9340
6451
  },
6452
  {
6453
  "epoch": 15.23,
6454
- "learning_rate": 1.4837905236907729e-05,
6455
- "loss": 0.2667,
6456
  "step": 9350
6457
  },
6458
  {
6459
  "epoch": 15.24,
6460
- "learning_rate": 1.4526184538653365e-05,
6461
- "loss": 0.2608,
6462
  "step": 9360
6463
  },
6464
  {
6465
  "epoch": 15.26,
6466
- "learning_rate": 1.4214463840399e-05,
6467
- "loss": 0.1953,
6468
  "step": 9370
6469
  },
6470
  {
6471
  "epoch": 15.28,
6472
- "learning_rate": 1.3902743142144637e-05,
6473
- "loss": 0.204,
6474
  "step": 9380
6475
  },
6476
  {
6477
  "epoch": 15.29,
6478
- "learning_rate": 1.3591022443890273e-05,
6479
- "loss": 0.1784,
6480
  "step": 9390
6481
  },
6482
  {
6483
  "epoch": 15.31,
6484
- "learning_rate": 1.3279301745635908e-05,
6485
- "loss": 0.1715,
6486
  "step": 9400
6487
  },
6488
  {
6489
  "epoch": 15.31,
6490
- "eval_loss": 0.16349774599075317,
6491
- "eval_runtime": 1030.7944,
6492
- "eval_samples_per_second": 11.262,
6493
- "eval_steps_per_second": 0.704,
6494
- "eval_wer": 0.3316266021986042,
6495
  "step": 9400
6496
  }
6497
  ],
6498
  "logging_steps": 10,
6499
- "max_steps": 9824,
6500
- "num_train_epochs": 16,
6501
  "save_steps": 200,
6502
  "total_flos": 9.855811400623335e+19,
6503
  "trial_name": null,
 
6358
  },
6359
  {
6360
  "epoch": 15.0,
6361
+ "learning_rate": 7.629139072847681e-05,
6362
+ "loss": 6.7067,
6363
  "step": 9210
6364
  },
6365
  {
6366
  "epoch": 15.02,
6367
+ "learning_rate": 7.60430463576159e-05,
6368
+ "loss": 3.5775,
6369
  "step": 9220
6370
  },
6371
  {
6372
  "epoch": 15.03,
6373
+ "learning_rate": 7.579470198675495e-05,
6374
+ "loss": 3.1504,
6375
  "step": 9230
6376
  },
6377
  {
6378
  "epoch": 15.05,
6379
+ "learning_rate": 7.554635761589404e-05,
6380
+ "loss": 3.0605,
6381
  "step": 9240
6382
  },
6383
  {
6384
  "epoch": 15.06,
6385
+ "learning_rate": 7.52980132450331e-05,
6386
+ "loss": 3.0235,
6387
  "step": 9250
6388
  },
6389
  {
6390
  "epoch": 15.08,
6391
+ "learning_rate": 7.504966887417217e-05,
6392
+ "loss": 2.9939,
6393
  "step": 9260
6394
  },
6395
  {
6396
  "epoch": 15.1,
6397
+ "learning_rate": 7.480132450331126e-05,
6398
+ "loss": 2.9837,
6399
  "step": 9270
6400
  },
6401
  {
6402
  "epoch": 15.11,
6403
+ "learning_rate": 7.455298013245033e-05,
6404
+ "loss": 2.8137,
6405
  "step": 9280
6406
  },
6407
  {
6408
  "epoch": 15.13,
6409
+ "learning_rate": 7.43046357615894e-05,
6410
+ "loss": 1.9888,
6411
  "step": 9290
6412
  },
6413
  {
6414
  "epoch": 15.15,
6415
+ "learning_rate": 7.405629139072846e-05,
6416
+ "loss": 1.0386,
6417
  "step": 9300
6418
  },
6419
  {
6420
  "epoch": 15.15,
6421
+ "eval_loss": 0.43749570846557617,
6422
+ "eval_runtime": 1037.1435,
6423
+ "eval_samples_per_second": 11.193,
6424
+ "eval_steps_per_second": 0.7,
6425
+ "eval_wer": 0.679286593988049,
6426
  "step": 9300
6427
  },
6428
  {
6429
  "epoch": 15.16,
6430
+ "learning_rate": 7.380794701986755e-05,
6431
+ "loss": 0.4016,
6432
  "step": 9310
6433
  },
6434
  {
6435
  "epoch": 15.18,
6436
+ "learning_rate": 7.355960264900662e-05,
6437
+ "loss": 0.23,
6438
  "step": 9320
6439
  },
6440
  {
6441
  "epoch": 15.19,
6442
+ "learning_rate": 7.331125827814569e-05,
6443
+ "loss": 0.1834,
6444
  "step": 9330
6445
  },
6446
  {
6447
  "epoch": 15.21,
6448
+ "learning_rate": 7.306291390728476e-05,
6449
+ "loss": 0.149,
6450
  "step": 9340
6451
  },
6452
  {
6453
  "epoch": 15.23,
6454
+ "learning_rate": 7.281456953642384e-05,
6455
+ "loss": 0.1275,
6456
  "step": 9350
6457
  },
6458
  {
6459
  "epoch": 15.24,
6460
+ "learning_rate": 7.256622516556291e-05,
6461
+ "loss": 0.1483,
6462
  "step": 9360
6463
  },
6464
  {
6465
  "epoch": 15.26,
6466
+ "learning_rate": 7.231788079470198e-05,
6467
+ "loss": 0.1038,
6468
  "step": 9370
6469
  },
6470
  {
6471
  "epoch": 15.28,
6472
+ "learning_rate": 7.206953642384106e-05,
6473
+ "loss": 0.1185,
6474
  "step": 9380
6475
  },
6476
  {
6477
  "epoch": 15.29,
6478
+ "learning_rate": 7.182119205298013e-05,
6479
+ "loss": 0.1099,
6480
  "step": 9390
6481
  },
6482
  {
6483
  "epoch": 15.31,
6484
+ "learning_rate": 7.15728476821192e-05,
6485
+ "loss": 0.116,
6486
  "step": 9400
6487
  },
6488
  {
6489
  "epoch": 15.31,
6490
+ "eval_loss": 0.12657363712787628,
6491
+ "eval_runtime": 1045.2349,
6492
+ "eval_samples_per_second": 11.107,
6493
+ "eval_steps_per_second": 0.695,
6494
+ "eval_wer": 0.30423527801851935,
6495
  "step": 9400
6496
  }
6497
  ],
6498
  "logging_steps": 10,
6499
+ "max_steps": 12280,
6500
+ "num_train_epochs": 20,
6501
  "save_steps": 200,
6502
  "total_flos": 9.855811400623335e+19,
6503
  "trial_name": null,
checkpoint-9400/training_args.bin CHANGED
Binary files a/checkpoint-9400/training_args.bin and b/checkpoint-9400/training_args.bin differ