RoyJoy commited on
Commit
9da4e3b
·
verified ·
1 Parent(s): 8d1eb9d

Training in progress, step 116, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a499d923fc8b0362c81987e3172fbcd093b0effe5bdc768e70faf0811be15f5e
3
  size 50358592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56b2f65a1d089ee66fc7ea728b1874e441b68e560eb21bbff05c54959e5e3f21
3
  size 50358592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc86c2c3b62f1125d16831eb4312c9bd1507cc6ded424140a435b9659edc365e
3
  size 100824826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:390428e84d5e9a0ce532250c5fdf92c062df738731de6205f526a446d7a1f935
3
  size 100824826
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3603ae0f461c0309918a469bb88361702e8fe7031d469296ef29915e59cd15f4
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b51b4320dedee1d321d6d72d7aad1566ba8a70f284d4fc872e4d20473231aca4
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95019e029f304009516750a4bbe05ba42bcbfeab090e08f3a47061c7683127e4
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14579bee543d59d3eada9977676f27cb9d72db0c545f2552d8a0908f37d7fc75
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c93ea150dedd152785349606801d73a50b174319e11bd7bc4c752090cefb4196
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b25c181224c6a64505fbebbb62ba1315e8db34d2563446a817bb262831c728bb
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5e3858bb07bc0e1a65c01d5084480d194b61020c06fc22f6fa0708b202f0e34
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45b78dae73959737264e6eed60625c40d91131768deebb2ac98a0e465f131b70
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba08b4a1855b48cf82b864725b4104527747dc0aacfb5a9d0a509cb25e565a06
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d46fb3cc6e0ccde808ead2f08288d1a1004041779e54159d7604f576ec806133
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.6968957781791687,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 2.5901374292643493,
5
  "eval_steps": 25,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -747,6 +747,118 @@
747
  "eval_samples_per_second": 79.714,
748
  "eval_steps_per_second": 3.189,
749
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
750
  }
751
  ],
752
  "logging_steps": 1,
@@ -770,12 +882,12 @@
770
  "should_evaluate": false,
771
  "should_log": false,
772
  "should_save": true,
773
- "should_training_stop": false
774
  },
775
  "attributes": {}
776
  }
777
  },
778
- "total_flos": 1.020403094913024e+17,
779
  "train_batch_size": 1,
780
  "trial_name": null,
781
  "trial_params": null
 
1
  {
2
  "best_metric": 0.6968957781791687,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 3.005658852061439,
5
  "eval_steps": 25,
6
+ "global_step": 116,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
747
  "eval_samples_per_second": 79.714,
748
  "eval_steps_per_second": 3.189,
749
  "step": 100
750
+ },
751
+ {
752
+ "epoch": 2.6160064672594987,
753
+ "grad_norm": 18.59197235107422,
754
+ "learning_rate": 1.4580325505138468e-05,
755
+ "loss": 22.2291,
756
+ "step": 101
757
+ },
758
+ {
759
+ "epoch": 2.6418755052546485,
760
+ "grad_norm": 8.891879081726074,
761
+ "learning_rate": 1.272815741354723e-05,
762
+ "loss": 22.3545,
763
+ "step": 102
764
+ },
765
+ {
766
+ "epoch": 2.667744543249798,
767
+ "grad_norm": 8.696002006530762,
768
+ "learning_rate": 1.0996558885224993e-05,
769
+ "loss": 22.1393,
770
+ "step": 103
771
+ },
772
+ {
773
+ "epoch": 2.6936135812449473,
774
+ "grad_norm": 6.413660049438477,
775
+ "learning_rate": 9.387050829881865e-06,
776
+ "loss": 22.9287,
777
+ "step": 104
778
+ },
779
+ {
780
+ "epoch": 2.719482619240097,
781
+ "grad_norm": 13.523515701293945,
782
+ "learning_rate": 7.90104692187129e-06,
783
+ "loss": 22.8497,
784
+ "step": 105
785
+ },
786
+ {
787
+ "epoch": 2.7453516572352465,
788
+ "grad_norm": 8.049901008605957,
789
+ "learning_rate": 6.539852358521636e-06,
790
+ "loss": 22.0333,
791
+ "step": 106
792
+ },
793
+ {
794
+ "epoch": 2.7712206952303964,
795
+ "grad_norm": 8.899979591369629,
796
+ "learning_rate": 5.304662713746205e-06,
797
+ "loss": 22.1953,
798
+ "step": 107
799
+ },
800
+ {
801
+ "epoch": 2.7970897332255458,
802
+ "grad_norm": 9.008318901062012,
803
+ "learning_rate": 4.1965628879383875e-06,
804
+ "loss": 22.1504,
805
+ "step": 108
806
+ },
807
+ {
808
+ "epoch": 2.822958771220695,
809
+ "grad_norm": 13.858719825744629,
810
+ "learning_rate": 3.2165261550743946e-06,
811
+ "loss": 22.0938,
812
+ "step": 109
813
+ },
814
+ {
815
+ "epoch": 2.8488278092158446,
816
+ "grad_norm": 6.062250137329102,
817
+ "learning_rate": 2.3654133078604753e-06,
818
+ "loss": 22.1504,
819
+ "step": 110
820
+ },
821
+ {
822
+ "epoch": 2.8746968472109944,
823
+ "grad_norm": 5.169662952423096,
824
+ "learning_rate": 1.643971901675395e-06,
825
+ "loss": 22.1182,
826
+ "step": 111
827
+ },
828
+ {
829
+ "epoch": 2.900565885206144,
830
+ "grad_norm": 9.194791793823242,
831
+ "learning_rate": 1.0528355979724624e-06,
832
+ "loss": 22.0225,
833
+ "step": 112
834
+ },
835
+ {
836
+ "epoch": 2.9264349232012936,
837
+ "grad_norm": 5.832217693328857,
838
+ "learning_rate": 5.925236077174655e-07,
839
+ "loss": 22.2256,
840
+ "step": 113
841
+ },
842
+ {
843
+ "epoch": 2.952303961196443,
844
+ "grad_norm": 3.4554474353790283,
845
+ "learning_rate": 2.634402353517973e-07,
846
+ "loss": 22.0733,
847
+ "step": 114
848
+ },
849
+ {
850
+ "epoch": 2.9781729991915924,
851
+ "grad_norm": 10.36470890045166,
852
+ "learning_rate": 6.587452368084779e-08,
853
+ "loss": 22.0811,
854
+ "step": 115
855
+ },
856
+ {
857
+ "epoch": 3.005658852061439,
858
+ "grad_norm": 10.728325843811035,
859
+ "learning_rate": 0.0,
860
+ "loss": 21.647,
861
+ "step": 116
862
  }
863
  ],
864
  "logging_steps": 1,
 
882
  "should_evaluate": false,
883
  "should_log": false,
884
  "should_save": true,
885
+ "should_training_stop": true
886
  },
887
  "attributes": {}
888
  }
889
  },
890
+ "total_flos": 1.1836675900991078e+17,
891
  "train_batch_size": 1,
892
  "trial_name": null,
893
  "trial_params": null