Training in progress, step 9400, checkpoint
Browse files
checkpoint-9400/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1261975580
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74fc93f66189b6167184fea73b2e23a9c55c2a1934f0bad20d21ddf0ea8fa2b2
|
3 |
size 1261975580
|
checkpoint-9400/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2490495926
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64c942ac8f092000faab3d1a2b8aca0f1e57ba65e6951fad7df8f8921102fc6e
|
3 |
size 2490495926
|
checkpoint-9400/scheduler.pt
CHANGED
Binary files a/checkpoint-9400/scheduler.pt and b/checkpoint-9400/scheduler.pt differ
|
|
checkpoint-9400/trainer_state.json
CHANGED
@@ -6358,146 +6358,146 @@
|
|
6358 |
},
|
6359 |
{
|
6360 |
"epoch": 15.0,
|
6361 |
-
"learning_rate":
|
6362 |
-
"loss":
|
6363 |
"step": 9210
|
6364 |
},
|
6365 |
{
|
6366 |
"epoch": 15.02,
|
6367 |
-
"learning_rate":
|
6368 |
-
"loss":
|
6369 |
"step": 9220
|
6370 |
},
|
6371 |
{
|
6372 |
"epoch": 15.03,
|
6373 |
-
"learning_rate":
|
6374 |
-
"loss": 3.
|
6375 |
"step": 9230
|
6376 |
},
|
6377 |
{
|
6378 |
"epoch": 15.05,
|
6379 |
-
"learning_rate":
|
6380 |
-
"loss": 3.
|
6381 |
"step": 9240
|
6382 |
},
|
6383 |
{
|
6384 |
"epoch": 15.06,
|
6385 |
-
"learning_rate":
|
6386 |
-
"loss": 3.
|
6387 |
"step": 9250
|
6388 |
},
|
6389 |
{
|
6390 |
"epoch": 15.08,
|
6391 |
-
"learning_rate":
|
6392 |
-
"loss":
|
6393 |
"step": 9260
|
6394 |
},
|
6395 |
{
|
6396 |
"epoch": 15.1,
|
6397 |
-
"learning_rate":
|
6398 |
-
"loss": 2.
|
6399 |
"step": 9270
|
6400 |
},
|
6401 |
{
|
6402 |
"epoch": 15.11,
|
6403 |
-
"learning_rate":
|
6404 |
-
"loss": 2.
|
6405 |
"step": 9280
|
6406 |
},
|
6407 |
{
|
6408 |
"epoch": 15.13,
|
6409 |
-
"learning_rate":
|
6410 |
-
"loss": 1.
|
6411 |
"step": 9290
|
6412 |
},
|
6413 |
{
|
6414 |
"epoch": 15.15,
|
6415 |
-
"learning_rate":
|
6416 |
-
"loss":
|
6417 |
"step": 9300
|
6418 |
},
|
6419 |
{
|
6420 |
"epoch": 15.15,
|
6421 |
-
"eval_loss": 0.
|
6422 |
-
"eval_runtime":
|
6423 |
-
"eval_samples_per_second": 11.
|
6424 |
-
"eval_steps_per_second": 0.
|
6425 |
-
"eval_wer": 0.
|
6426 |
"step": 9300
|
6427 |
},
|
6428 |
{
|
6429 |
"epoch": 15.16,
|
6430 |
-
"learning_rate":
|
6431 |
-
"loss": 0.
|
6432 |
"step": 9310
|
6433 |
},
|
6434 |
{
|
6435 |
"epoch": 15.18,
|
6436 |
-
"learning_rate":
|
6437 |
-
"loss": 0.
|
6438 |
"step": 9320
|
6439 |
},
|
6440 |
{
|
6441 |
"epoch": 15.19,
|
6442 |
-
"learning_rate":
|
6443 |
-
"loss": 0.
|
6444 |
"step": 9330
|
6445 |
},
|
6446 |
{
|
6447 |
"epoch": 15.21,
|
6448 |
-
"learning_rate":
|
6449 |
-
"loss": 0.
|
6450 |
"step": 9340
|
6451 |
},
|
6452 |
{
|
6453 |
"epoch": 15.23,
|
6454 |
-
"learning_rate":
|
6455 |
-
"loss": 0.
|
6456 |
"step": 9350
|
6457 |
},
|
6458 |
{
|
6459 |
"epoch": 15.24,
|
6460 |
-
"learning_rate":
|
6461 |
-
"loss": 0.
|
6462 |
"step": 9360
|
6463 |
},
|
6464 |
{
|
6465 |
"epoch": 15.26,
|
6466 |
-
"learning_rate":
|
6467 |
-
"loss": 0.
|
6468 |
"step": 9370
|
6469 |
},
|
6470 |
{
|
6471 |
"epoch": 15.28,
|
6472 |
-
"learning_rate":
|
6473 |
-
"loss": 0.
|
6474 |
"step": 9380
|
6475 |
},
|
6476 |
{
|
6477 |
"epoch": 15.29,
|
6478 |
-
"learning_rate":
|
6479 |
-
"loss": 0.
|
6480 |
"step": 9390
|
6481 |
},
|
6482 |
{
|
6483 |
"epoch": 15.31,
|
6484 |
-
"learning_rate":
|
6485 |
-
"loss": 0.
|
6486 |
"step": 9400
|
6487 |
},
|
6488 |
{
|
6489 |
"epoch": 15.31,
|
6490 |
-
"eval_loss": 0.
|
6491 |
-
"eval_runtime":
|
6492 |
-
"eval_samples_per_second": 11.
|
6493 |
-
"eval_steps_per_second": 0.
|
6494 |
-
"eval_wer": 0.
|
6495 |
"step": 9400
|
6496 |
}
|
6497 |
],
|
6498 |
"logging_steps": 10,
|
6499 |
-
"max_steps":
|
6500 |
-
"num_train_epochs":
|
6501 |
"save_steps": 200,
|
6502 |
"total_flos": 9.855811400623335e+19,
|
6503 |
"trial_name": null,
|
|
|
6358 |
},
|
6359 |
{
|
6360 |
"epoch": 15.0,
|
6361 |
+
"learning_rate": 7.629139072847681e-05,
|
6362 |
+
"loss": 6.7067,
|
6363 |
"step": 9210
|
6364 |
},
|
6365 |
{
|
6366 |
"epoch": 15.02,
|
6367 |
+
"learning_rate": 7.60430463576159e-05,
|
6368 |
+
"loss": 3.5775,
|
6369 |
"step": 9220
|
6370 |
},
|
6371 |
{
|
6372 |
"epoch": 15.03,
|
6373 |
+
"learning_rate": 7.579470198675495e-05,
|
6374 |
+
"loss": 3.1504,
|
6375 |
"step": 9230
|
6376 |
},
|
6377 |
{
|
6378 |
"epoch": 15.05,
|
6379 |
+
"learning_rate": 7.554635761589404e-05,
|
6380 |
+
"loss": 3.0605,
|
6381 |
"step": 9240
|
6382 |
},
|
6383 |
{
|
6384 |
"epoch": 15.06,
|
6385 |
+
"learning_rate": 7.52980132450331e-05,
|
6386 |
+
"loss": 3.0235,
|
6387 |
"step": 9250
|
6388 |
},
|
6389 |
{
|
6390 |
"epoch": 15.08,
|
6391 |
+
"learning_rate": 7.504966887417217e-05,
|
6392 |
+
"loss": 2.9939,
|
6393 |
"step": 9260
|
6394 |
},
|
6395 |
{
|
6396 |
"epoch": 15.1,
|
6397 |
+
"learning_rate": 7.480132450331126e-05,
|
6398 |
+
"loss": 2.9837,
|
6399 |
"step": 9270
|
6400 |
},
|
6401 |
{
|
6402 |
"epoch": 15.11,
|
6403 |
+
"learning_rate": 7.455298013245033e-05,
|
6404 |
+
"loss": 2.8137,
|
6405 |
"step": 9280
|
6406 |
},
|
6407 |
{
|
6408 |
"epoch": 15.13,
|
6409 |
+
"learning_rate": 7.43046357615894e-05,
|
6410 |
+
"loss": 1.9888,
|
6411 |
"step": 9290
|
6412 |
},
|
6413 |
{
|
6414 |
"epoch": 15.15,
|
6415 |
+
"learning_rate": 7.405629139072846e-05,
|
6416 |
+
"loss": 1.0386,
|
6417 |
"step": 9300
|
6418 |
},
|
6419 |
{
|
6420 |
"epoch": 15.15,
|
6421 |
+
"eval_loss": 0.43749570846557617,
|
6422 |
+
"eval_runtime": 1037.1435,
|
6423 |
+
"eval_samples_per_second": 11.193,
|
6424 |
+
"eval_steps_per_second": 0.7,
|
6425 |
+
"eval_wer": 0.679286593988049,
|
6426 |
"step": 9300
|
6427 |
},
|
6428 |
{
|
6429 |
"epoch": 15.16,
|
6430 |
+
"learning_rate": 7.380794701986755e-05,
|
6431 |
+
"loss": 0.4016,
|
6432 |
"step": 9310
|
6433 |
},
|
6434 |
{
|
6435 |
"epoch": 15.18,
|
6436 |
+
"learning_rate": 7.355960264900662e-05,
|
6437 |
+
"loss": 0.23,
|
6438 |
"step": 9320
|
6439 |
},
|
6440 |
{
|
6441 |
"epoch": 15.19,
|
6442 |
+
"learning_rate": 7.331125827814569e-05,
|
6443 |
+
"loss": 0.1834,
|
6444 |
"step": 9330
|
6445 |
},
|
6446 |
{
|
6447 |
"epoch": 15.21,
|
6448 |
+
"learning_rate": 7.306291390728476e-05,
|
6449 |
+
"loss": 0.149,
|
6450 |
"step": 9340
|
6451 |
},
|
6452 |
{
|
6453 |
"epoch": 15.23,
|
6454 |
+
"learning_rate": 7.281456953642384e-05,
|
6455 |
+
"loss": 0.1275,
|
6456 |
"step": 9350
|
6457 |
},
|
6458 |
{
|
6459 |
"epoch": 15.24,
|
6460 |
+
"learning_rate": 7.256622516556291e-05,
|
6461 |
+
"loss": 0.1483,
|
6462 |
"step": 9360
|
6463 |
},
|
6464 |
{
|
6465 |
"epoch": 15.26,
|
6466 |
+
"learning_rate": 7.231788079470198e-05,
|
6467 |
+
"loss": 0.1038,
|
6468 |
"step": 9370
|
6469 |
},
|
6470 |
{
|
6471 |
"epoch": 15.28,
|
6472 |
+
"learning_rate": 7.206953642384106e-05,
|
6473 |
+
"loss": 0.1185,
|
6474 |
"step": 9380
|
6475 |
},
|
6476 |
{
|
6477 |
"epoch": 15.29,
|
6478 |
+
"learning_rate": 7.182119205298013e-05,
|
6479 |
+
"loss": 0.1099,
|
6480 |
"step": 9390
|
6481 |
},
|
6482 |
{
|
6483 |
"epoch": 15.31,
|
6484 |
+
"learning_rate": 7.15728476821192e-05,
|
6485 |
+
"loss": 0.116,
|
6486 |
"step": 9400
|
6487 |
},
|
6488 |
{
|
6489 |
"epoch": 15.31,
|
6490 |
+
"eval_loss": 0.12657363712787628,
|
6491 |
+
"eval_runtime": 1045.2349,
|
6492 |
+
"eval_samples_per_second": 11.107,
|
6493 |
+
"eval_steps_per_second": 0.695,
|
6494 |
+
"eval_wer": 0.30423527801851935,
|
6495 |
"step": 9400
|
6496 |
}
|
6497 |
],
|
6498 |
"logging_steps": 10,
|
6499 |
+
"max_steps": 12280,
|
6500 |
+
"num_train_epochs": 20,
|
6501 |
"save_steps": 200,
|
6502 |
"total_flos": 9.855811400623335e+19,
|
6503 |
"trial_name": null,
|
checkpoint-9400/training_args.bin
CHANGED
Binary files a/checkpoint-9400/training_args.bin and b/checkpoint-9400/training_args.bin differ
|
|