plip commited on
Commit
a41cdcc
1 Parent(s): 1f84c75

Training in progress, step 490000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6eb2914271748426c715383d1a24825a4ccdc25bc6a47fbcc44c61b6e8bc6904
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cab54be7cb4d2572a34bddaadf5aa44b09e63a53da564cdebcbf1c0114515cb4
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7c95c0d1c732de01b9184ed04d1f634a78d1007e2afb2aea4701f6f53ffae36
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cd7e757b48942dd6939c67ce3bb195396690b3f6c6d27ddc20a0b96e1fdb0e9
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32e23eb9436e48648702bf0fff27668d87efa726ca368e875380deaf4f872988
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32e23eb9436e48648702bf0fff27668d87efa726ca368e875380deaf4f872988
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32e23eb9436e48648702bf0fff27668d87efa726ca368e875380deaf4f872988
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32e23eb9436e48648702bf0fff27668d87efa726ca368e875380deaf4f872988
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32e23eb9436e48648702bf0fff27668d87efa726ca368e875380deaf4f872988
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32e23eb9436e48648702bf0fff27668d87efa726ca368e875380deaf4f872988
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32e23eb9436e48648702bf0fff27668d87efa726ca368e875380deaf4f872988
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32e23eb9436e48648702bf0fff27668d87efa726ca368e875380deaf4f872988
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebbfa680b1187d22cc7371654116ef29dab3c85749ad34b845956736ad3b3612
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d798ff13d72fe751bc0ea721c37eb1e98064dde5819b90f3504db53fdceee97
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 12.233039400581069,
5
- "global_step": 480000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9606,11 +9606,211 @@
9606
  "eval_samples_per_second": 759.093,
9607
  "eval_steps_per_second": 12.145,
9608
  "step": 480000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9609
  }
9610
  ],
9611
  "max_steps": 500000,
9612
  "num_train_epochs": 13,
9613
- "total_flos": 1.533522091421946e+22,
9614
  "trial_name": null,
9615
  "trial_params": null
9616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 12.487894388093174,
5
+ "global_step": 490000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9606
  "eval_samples_per_second": 759.093,
9607
  "eval_steps_per_second": 12.145,
9608
  "step": 480000
9609
+ },
9610
+ {
9611
+ "epoch": 12.25,
9612
+ "learning_rate": 1.1204252864868377e-05,
9613
+ "loss": 0.2669,
9614
+ "step": 480500
9615
+ },
9616
+ {
9617
+ "epoch": 12.26,
9618
+ "learning_rate": 1.1143368309400725e-05,
9619
+ "loss": 0.2672,
9620
+ "step": 481000
9621
+ },
9622
+ {
9623
+ "epoch": 12.26,
9624
+ "eval_loss": 0.8035285472869873,
9625
+ "eval_runtime": 1.597,
9626
+ "eval_samples_per_second": 626.161,
9627
+ "eval_steps_per_second": 10.019,
9628
+ "step": 481000
9629
+ },
9630
+ {
9631
+ "epoch": 12.27,
9632
+ "learning_rate": 1.1084056947009348e-05,
9633
+ "loss": 0.2671,
9634
+ "step": 481500
9635
+ },
9636
+ {
9637
+ "epoch": 12.28,
9638
+ "learning_rate": 1.1026319426313837e-05,
9639
+ "loss": 0.267,
9640
+ "step": 482000
9641
+ },
9642
+ {
9643
+ "epoch": 12.28,
9644
+ "eval_loss": 0.8039422035217285,
9645
+ "eval_runtime": 1.2756,
9646
+ "eval_samples_per_second": 783.963,
9647
+ "eval_steps_per_second": 12.543,
9648
+ "step": 482000
9649
+ },
9650
+ {
9651
+ "epoch": 12.3,
9652
+ "learning_rate": 1.097015637872247e-05,
9653
+ "loss": 0.2669,
9654
+ "step": 482500
9655
+ },
9656
+ {
9657
+ "epoch": 12.31,
9658
+ "learning_rate": 1.0915568418425301e-05,
9659
+ "loss": 0.2669,
9660
+ "step": 483000
9661
+ },
9662
+ {
9663
+ "epoch": 12.31,
9664
+ "eval_loss": 0.8074455857276917,
9665
+ "eval_runtime": 1.3487,
9666
+ "eval_samples_per_second": 741.458,
9667
+ "eval_steps_per_second": 11.863,
9668
+ "step": 483000
9669
+ },
9670
+ {
9671
+ "epoch": 12.32,
9672
+ "learning_rate": 1.0862556142387571e-05,
9673
+ "loss": 0.2673,
9674
+ "step": 483500
9675
+ },
9676
+ {
9677
+ "epoch": 12.33,
9678
+ "learning_rate": 1.081112013034298e-05,
9679
+ "loss": 0.267,
9680
+ "step": 484000
9681
+ },
9682
+ {
9683
+ "epoch": 12.33,
9684
+ "eval_loss": 0.8040180802345276,
9685
+ "eval_runtime": 1.3584,
9686
+ "eval_samples_per_second": 736.142,
9687
+ "eval_steps_per_second": 11.778,
9688
+ "step": 484000
9689
+ },
9690
+ {
9691
+ "epoch": 12.35,
9692
+ "learning_rate": 1.0761260944787561e-05,
9693
+ "loss": 0.267,
9694
+ "step": 484500
9695
+ },
9696
+ {
9697
+ "epoch": 12.36,
9698
+ "learning_rate": 1.0712979130973347e-05,
9699
+ "loss": 0.267,
9700
+ "step": 485000
9701
+ },
9702
+ {
9703
+ "epoch": 12.36,
9704
+ "eval_loss": 0.8028098940849304,
9705
+ "eval_runtime": 1.3341,
9706
+ "eval_samples_per_second": 749.592,
9707
+ "eval_steps_per_second": 11.993,
9708
+ "step": 485000
9709
+ },
9710
+ {
9711
+ "epoch": 12.37,
9712
+ "learning_rate": 1.0666275216902535e-05,
9713
+ "loss": 0.2668,
9714
+ "step": 485500
9715
+ },
9716
+ {
9717
+ "epoch": 12.39,
9718
+ "learning_rate": 1.0621149713321656e-05,
9719
+ "loss": 0.2668,
9720
+ "step": 486000
9721
+ },
9722
+ {
9723
+ "epoch": 12.39,
9724
+ "eval_loss": 0.8054640889167786,
9725
+ "eval_runtime": 1.3289,
9726
+ "eval_samples_per_second": 752.527,
9727
+ "eval_steps_per_second": 12.04,
9728
+ "step": 486000
9729
+ },
9730
+ {
9731
+ "epoch": 12.4,
9732
+ "learning_rate": 1.0577603113715964e-05,
9733
+ "loss": 0.2669,
9734
+ "step": 486500
9735
+ },
9736
+ {
9737
+ "epoch": 12.41,
9738
+ "learning_rate": 1.0535635894304106e-05,
9739
+ "loss": 0.2669,
9740
+ "step": 487000
9741
+ },
9742
+ {
9743
+ "epoch": 12.41,
9744
+ "eval_loss": 0.8062050342559814,
9745
+ "eval_runtime": 1.3114,
9746
+ "eval_samples_per_second": 762.54,
9747
+ "eval_steps_per_second": 12.201,
9748
+ "step": 487000
9749
+ },
9750
+ {
9751
+ "epoch": 12.42,
9752
+ "learning_rate": 1.0495248514032875e-05,
9753
+ "loss": 0.2669,
9754
+ "step": 487500
9755
+ },
9756
+ {
9757
+ "epoch": 12.44,
9758
+ "learning_rate": 1.045644141457218e-05,
9759
+ "loss": 0.2669,
9760
+ "step": 488000
9761
+ },
9762
+ {
9763
+ "epoch": 12.44,
9764
+ "eval_loss": 0.8053330779075623,
9765
+ "eval_runtime": 1.3085,
9766
+ "eval_samples_per_second": 764.21,
9767
+ "eval_steps_per_second": 12.227,
9768
+ "step": 488000
9769
+ },
9770
+ {
9771
+ "epoch": 12.45,
9772
+ "learning_rate": 1.0419215020310254e-05,
9773
+ "loss": 0.2671,
9774
+ "step": 488500
9775
+ },
9776
+ {
9777
+ "epoch": 12.46,
9778
+ "learning_rate": 1.0383569738348988e-05,
9779
+ "loss": 0.267,
9780
+ "step": 489000
9781
+ },
9782
+ {
9783
+ "epoch": 12.46,
9784
+ "eval_loss": 0.8089292049407959,
9785
+ "eval_runtime": 1.3008,
9786
+ "eval_samples_per_second": 768.73,
9787
+ "eval_steps_per_second": 12.3,
9788
+ "step": 489000
9789
+ },
9790
+ {
9791
+ "epoch": 12.48,
9792
+ "learning_rate": 1.0349505958499436e-05,
9793
+ "loss": 0.2671,
9794
+ "step": 489500
9795
+ },
9796
+ {
9797
+ "epoch": 12.49,
9798
+ "learning_rate": 1.0317024053277693e-05,
9799
+ "loss": 0.267,
9800
+ "step": 490000
9801
+ },
9802
+ {
9803
+ "epoch": 12.49,
9804
+ "eval_loss": 0.8080971837043762,
9805
+ "eval_runtime": 1.3533,
9806
+ "eval_samples_per_second": 738.941,
9807
+ "eval_steps_per_second": 11.823,
9808
+ "step": 490000
9809
  }
9810
  ],
9811
  "max_steps": 500000,
9812
  "num_train_epochs": 13,
9813
+ "total_flos": 1.565470805299396e+22,
9814
  "trial_name": null,
9815
  "trial_params": null
9816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7c95c0d1c732de01b9184ed04d1f634a78d1007e2afb2aea4701f6f53ffae36
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cd7e757b48942dd6939c67ce3bb195396690b3f6c6d27ddc20a0b96e1fdb0e9
3
  size 102501541