Training in progress, step 490000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cab54be7cb4d2572a34bddaadf5aa44b09e63a53da564cdebcbf1c0114515cb4
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cd7e757b48942dd6939c67ce3bb195396690b3f6c6d27ddc20a0b96e1fdb0e9
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d798ff13d72fe751bc0ea721c37eb1e98064dde5819b90f3504db53fdceee97
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 12.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -9606,11 +9606,211 @@
|
|
9606 |
"eval_samples_per_second": 759.093,
|
9607 |
"eval_steps_per_second": 12.145,
|
9608 |
"step": 480000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9609 |
}
|
9610 |
],
|
9611 |
"max_steps": 500000,
|
9612 |
"num_train_epochs": 13,
|
9613 |
-
"total_flos": 1.
|
9614 |
"trial_name": null,
|
9615 |
"trial_params": null
|
9616 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 12.487894388093174,
|
5 |
+
"global_step": 490000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
9606 |
"eval_samples_per_second": 759.093,
|
9607 |
"eval_steps_per_second": 12.145,
|
9608 |
"step": 480000
|
9609 |
+
},
|
9610 |
+
{
|
9611 |
+
"epoch": 12.25,
|
9612 |
+
"learning_rate": 1.1204252864868377e-05,
|
9613 |
+
"loss": 0.2669,
|
9614 |
+
"step": 480500
|
9615 |
+
},
|
9616 |
+
{
|
9617 |
+
"epoch": 12.26,
|
9618 |
+
"learning_rate": 1.1143368309400725e-05,
|
9619 |
+
"loss": 0.2672,
|
9620 |
+
"step": 481000
|
9621 |
+
},
|
9622 |
+
{
|
9623 |
+
"epoch": 12.26,
|
9624 |
+
"eval_loss": 0.8035285472869873,
|
9625 |
+
"eval_runtime": 1.597,
|
9626 |
+
"eval_samples_per_second": 626.161,
|
9627 |
+
"eval_steps_per_second": 10.019,
|
9628 |
+
"step": 481000
|
9629 |
+
},
|
9630 |
+
{
|
9631 |
+
"epoch": 12.27,
|
9632 |
+
"learning_rate": 1.1084056947009348e-05,
|
9633 |
+
"loss": 0.2671,
|
9634 |
+
"step": 481500
|
9635 |
+
},
|
9636 |
+
{
|
9637 |
+
"epoch": 12.28,
|
9638 |
+
"learning_rate": 1.1026319426313837e-05,
|
9639 |
+
"loss": 0.267,
|
9640 |
+
"step": 482000
|
9641 |
+
},
|
9642 |
+
{
|
9643 |
+
"epoch": 12.28,
|
9644 |
+
"eval_loss": 0.8039422035217285,
|
9645 |
+
"eval_runtime": 1.2756,
|
9646 |
+
"eval_samples_per_second": 783.963,
|
9647 |
+
"eval_steps_per_second": 12.543,
|
9648 |
+
"step": 482000
|
9649 |
+
},
|
9650 |
+
{
|
9651 |
+
"epoch": 12.3,
|
9652 |
+
"learning_rate": 1.097015637872247e-05,
|
9653 |
+
"loss": 0.2669,
|
9654 |
+
"step": 482500
|
9655 |
+
},
|
9656 |
+
{
|
9657 |
+
"epoch": 12.31,
|
9658 |
+
"learning_rate": 1.0915568418425301e-05,
|
9659 |
+
"loss": 0.2669,
|
9660 |
+
"step": 483000
|
9661 |
+
},
|
9662 |
+
{
|
9663 |
+
"epoch": 12.31,
|
9664 |
+
"eval_loss": 0.8074455857276917,
|
9665 |
+
"eval_runtime": 1.3487,
|
9666 |
+
"eval_samples_per_second": 741.458,
|
9667 |
+
"eval_steps_per_second": 11.863,
|
9668 |
+
"step": 483000
|
9669 |
+
},
|
9670 |
+
{
|
9671 |
+
"epoch": 12.32,
|
9672 |
+
"learning_rate": 1.0862556142387571e-05,
|
9673 |
+
"loss": 0.2673,
|
9674 |
+
"step": 483500
|
9675 |
+
},
|
9676 |
+
{
|
9677 |
+
"epoch": 12.33,
|
9678 |
+
"learning_rate": 1.081112013034298e-05,
|
9679 |
+
"loss": 0.267,
|
9680 |
+
"step": 484000
|
9681 |
+
},
|
9682 |
+
{
|
9683 |
+
"epoch": 12.33,
|
9684 |
+
"eval_loss": 0.8040180802345276,
|
9685 |
+
"eval_runtime": 1.3584,
|
9686 |
+
"eval_samples_per_second": 736.142,
|
9687 |
+
"eval_steps_per_second": 11.778,
|
9688 |
+
"step": 484000
|
9689 |
+
},
|
9690 |
+
{
|
9691 |
+
"epoch": 12.35,
|
9692 |
+
"learning_rate": 1.0761260944787561e-05,
|
9693 |
+
"loss": 0.267,
|
9694 |
+
"step": 484500
|
9695 |
+
},
|
9696 |
+
{
|
9697 |
+
"epoch": 12.36,
|
9698 |
+
"learning_rate": 1.0712979130973347e-05,
|
9699 |
+
"loss": 0.267,
|
9700 |
+
"step": 485000
|
9701 |
+
},
|
9702 |
+
{
|
9703 |
+
"epoch": 12.36,
|
9704 |
+
"eval_loss": 0.8028098940849304,
|
9705 |
+
"eval_runtime": 1.3341,
|
9706 |
+
"eval_samples_per_second": 749.592,
|
9707 |
+
"eval_steps_per_second": 11.993,
|
9708 |
+
"step": 485000
|
9709 |
+
},
|
9710 |
+
{
|
9711 |
+
"epoch": 12.37,
|
9712 |
+
"learning_rate": 1.0666275216902535e-05,
|
9713 |
+
"loss": 0.2668,
|
9714 |
+
"step": 485500
|
9715 |
+
},
|
9716 |
+
{
|
9717 |
+
"epoch": 12.39,
|
9718 |
+
"learning_rate": 1.0621149713321656e-05,
|
9719 |
+
"loss": 0.2668,
|
9720 |
+
"step": 486000
|
9721 |
+
},
|
9722 |
+
{
|
9723 |
+
"epoch": 12.39,
|
9724 |
+
"eval_loss": 0.8054640889167786,
|
9725 |
+
"eval_runtime": 1.3289,
|
9726 |
+
"eval_samples_per_second": 752.527,
|
9727 |
+
"eval_steps_per_second": 12.04,
|
9728 |
+
"step": 486000
|
9729 |
+
},
|
9730 |
+
{
|
9731 |
+
"epoch": 12.4,
|
9732 |
+
"learning_rate": 1.0577603113715964e-05,
|
9733 |
+
"loss": 0.2669,
|
9734 |
+
"step": 486500
|
9735 |
+
},
|
9736 |
+
{
|
9737 |
+
"epoch": 12.41,
|
9738 |
+
"learning_rate": 1.0535635894304106e-05,
|
9739 |
+
"loss": 0.2669,
|
9740 |
+
"step": 487000
|
9741 |
+
},
|
9742 |
+
{
|
9743 |
+
"epoch": 12.41,
|
9744 |
+
"eval_loss": 0.8062050342559814,
|
9745 |
+
"eval_runtime": 1.3114,
|
9746 |
+
"eval_samples_per_second": 762.54,
|
9747 |
+
"eval_steps_per_second": 12.201,
|
9748 |
+
"step": 487000
|
9749 |
+
},
|
9750 |
+
{
|
9751 |
+
"epoch": 12.42,
|
9752 |
+
"learning_rate": 1.0495248514032875e-05,
|
9753 |
+
"loss": 0.2669,
|
9754 |
+
"step": 487500
|
9755 |
+
},
|
9756 |
+
{
|
9757 |
+
"epoch": 12.44,
|
9758 |
+
"learning_rate": 1.045644141457218e-05,
|
9759 |
+
"loss": 0.2669,
|
9760 |
+
"step": 488000
|
9761 |
+
},
|
9762 |
+
{
|
9763 |
+
"epoch": 12.44,
|
9764 |
+
"eval_loss": 0.8053330779075623,
|
9765 |
+
"eval_runtime": 1.3085,
|
9766 |
+
"eval_samples_per_second": 764.21,
|
9767 |
+
"eval_steps_per_second": 12.227,
|
9768 |
+
"step": 488000
|
9769 |
+
},
|
9770 |
+
{
|
9771 |
+
"epoch": 12.45,
|
9772 |
+
"learning_rate": 1.0419215020310254e-05,
|
9773 |
+
"loss": 0.2671,
|
9774 |
+
"step": 488500
|
9775 |
+
},
|
9776 |
+
{
|
9777 |
+
"epoch": 12.46,
|
9778 |
+
"learning_rate": 1.0383569738348988e-05,
|
9779 |
+
"loss": 0.267,
|
9780 |
+
"step": 489000
|
9781 |
+
},
|
9782 |
+
{
|
9783 |
+
"epoch": 12.46,
|
9784 |
+
"eval_loss": 0.8089292049407959,
|
9785 |
+
"eval_runtime": 1.3008,
|
9786 |
+
"eval_samples_per_second": 768.73,
|
9787 |
+
"eval_steps_per_second": 12.3,
|
9788 |
+
"step": 489000
|
9789 |
+
},
|
9790 |
+
{
|
9791 |
+
"epoch": 12.48,
|
9792 |
+
"learning_rate": 1.0349505958499436e-05,
|
9793 |
+
"loss": 0.2671,
|
9794 |
+
"step": 489500
|
9795 |
+
},
|
9796 |
+
{
|
9797 |
+
"epoch": 12.49,
|
9798 |
+
"learning_rate": 1.0317024053277693e-05,
|
9799 |
+
"loss": 0.267,
|
9800 |
+
"step": 490000
|
9801 |
+
},
|
9802 |
+
{
|
9803 |
+
"epoch": 12.49,
|
9804 |
+
"eval_loss": 0.8080971837043762,
|
9805 |
+
"eval_runtime": 1.3533,
|
9806 |
+
"eval_samples_per_second": 738.941,
|
9807 |
+
"eval_steps_per_second": 11.823,
|
9808 |
+
"step": 490000
|
9809 |
}
|
9810 |
],
|
9811 |
"max_steps": 500000,
|
9812 |
"num_train_epochs": 13,
|
9813 |
+
"total_flos": 1.565470805299396e+22,
|
9814 |
"trial_name": null,
|
9815 |
"trial_params": null
|
9816 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cd7e757b48942dd6939c67ce3bb195396690b3f6c6d27ddc20a0b96e1fdb0e9
|
3 |
size 102501541
|