nutorbit commited on
Commit
7980799
1 Parent(s): abf3603

Training in progress, step 175, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2453c2e3294848e5fdfc24e048c1067e321df75f1c3196c442f2aba8b9f0279
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82c29a240327704fc6308528a79479382296de6cfed508d0b0162ecf997b2e81
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e077b08c719bb7b427e236996fb0c792fd3d4c0ef0d2c88800c48772d9d3dac7
3
  size 42545748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0c943c3d72ba71df99f13267a644cecf346914768f99cdf1633a736691ffbbc
3
  size 42545748
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1edea7e57130e0f2957bce0ab868e371c690b8d0bc7a6dcec629366489ad56d6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a917d4671a090e9d6e24eb6118f2b87fee978309a5cb80de0030b8ff15757299
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12ad3f73c37b908f7e6d73930423bb8448afc36fc1f9dcd87868ce173eebb611
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de22cc9e265638e0274e21fc41c39f9e04260749bf4242dc266a4f3abba5955d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.028358067870309103,
5
  "eval_steps": 1000,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -907,13 +907,163 @@
907
  "learning_rate": 0.00014233870967741936,
908
  "loss": 1.4998,
909
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
910
  }
911
  ],
912
  "logging_steps": 1,
913
  "max_steps": 501,
914
  "num_train_epochs": 1,
915
  "save_steps": 25,
916
- "total_flos": 9333394508070912.0,
917
  "trial_name": null,
918
  "trial_params": null
919
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.03308441251536062,
5
  "eval_steps": 1000,
6
+ "global_step": 175,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
907
  "learning_rate": 0.00014233870967741936,
908
  "loss": 1.4998,
909
  "step": 150
910
+ },
911
+ {
912
+ "epoch": 0.03,
913
+ "learning_rate": 0.00014193548387096775,
914
+ "loss": 1.6332,
915
+ "step": 151
916
+ },
917
+ {
918
+ "epoch": 0.03,
919
+ "learning_rate": 0.00014153225806451614,
920
+ "loss": 1.7242,
921
+ "step": 152
922
+ },
923
+ {
924
+ "epoch": 0.03,
925
+ "learning_rate": 0.00014112903225806453,
926
+ "loss": 1.9499,
927
+ "step": 153
928
+ },
929
+ {
930
+ "epoch": 0.03,
931
+ "learning_rate": 0.00014072580645161292,
932
+ "loss": 1.6425,
933
+ "step": 154
934
+ },
935
+ {
936
+ "epoch": 0.03,
937
+ "learning_rate": 0.0001403225806451613,
938
+ "loss": 1.7308,
939
+ "step": 155
940
+ },
941
+ {
942
+ "epoch": 0.03,
943
+ "learning_rate": 0.00013991935483870967,
944
+ "loss": 1.8306,
945
+ "step": 156
946
+ },
947
+ {
948
+ "epoch": 0.03,
949
+ "learning_rate": 0.0001395161290322581,
950
+ "loss": 1.6522,
951
+ "step": 157
952
+ },
953
+ {
954
+ "epoch": 0.03,
955
+ "learning_rate": 0.00013911290322580645,
956
+ "loss": 1.432,
957
+ "step": 158
958
+ },
959
+ {
960
+ "epoch": 0.03,
961
+ "learning_rate": 0.00013870967741935487,
962
+ "loss": 1.3832,
963
+ "step": 159
964
+ },
965
+ {
966
+ "epoch": 0.03,
967
+ "learning_rate": 0.00013830645161290323,
968
+ "loss": 1.4807,
969
+ "step": 160
970
+ },
971
+ {
972
+ "epoch": 0.03,
973
+ "learning_rate": 0.00013790322580645162,
974
+ "loss": 1.4447,
975
+ "step": 161
976
+ },
977
+ {
978
+ "epoch": 0.03,
979
+ "learning_rate": 0.0001375,
980
+ "loss": 1.6328,
981
+ "step": 162
982
+ },
983
+ {
984
+ "epoch": 0.03,
985
+ "learning_rate": 0.00013709677419354837,
986
+ "loss": 1.5871,
987
+ "step": 163
988
+ },
989
+ {
990
+ "epoch": 0.03,
991
+ "learning_rate": 0.0001366935483870968,
992
+ "loss": 1.5617,
993
+ "step": 164
994
+ },
995
+ {
996
+ "epoch": 0.03,
997
+ "learning_rate": 0.00013629032258064515,
998
+ "loss": 1.5104,
999
+ "step": 165
1000
+ },
1001
+ {
1002
+ "epoch": 0.03,
1003
+ "learning_rate": 0.00013588709677419357,
1004
+ "loss": 1.7455,
1005
+ "step": 166
1006
+ },
1007
+ {
1008
+ "epoch": 0.03,
1009
+ "learning_rate": 0.00013548387096774193,
1010
+ "loss": 1.5425,
1011
+ "step": 167
1012
+ },
1013
+ {
1014
+ "epoch": 0.03,
1015
+ "learning_rate": 0.00013508064516129032,
1016
+ "loss": 1.5297,
1017
+ "step": 168
1018
+ },
1019
+ {
1020
+ "epoch": 0.03,
1021
+ "learning_rate": 0.0001346774193548387,
1022
+ "loss": 1.6117,
1023
+ "step": 169
1024
+ },
1025
+ {
1026
+ "epoch": 0.03,
1027
+ "learning_rate": 0.0001342741935483871,
1028
+ "loss": 1.4543,
1029
+ "step": 170
1030
+ },
1031
+ {
1032
+ "epoch": 0.03,
1033
+ "learning_rate": 0.0001338709677419355,
1034
+ "loss": 1.6787,
1035
+ "step": 171
1036
+ },
1037
+ {
1038
+ "epoch": 0.03,
1039
+ "learning_rate": 0.00013346774193548388,
1040
+ "loss": 1.4363,
1041
+ "step": 172
1042
+ },
1043
+ {
1044
+ "epoch": 0.03,
1045
+ "learning_rate": 0.00013306451612903227,
1046
+ "loss": 1.7276,
1047
+ "step": 173
1048
+ },
1049
+ {
1050
+ "epoch": 0.03,
1051
+ "learning_rate": 0.00013266129032258066,
1052
+ "loss": 1.6509,
1053
+ "step": 174
1054
+ },
1055
+ {
1056
+ "epoch": 0.03,
1057
+ "learning_rate": 0.00013225806451612905,
1058
+ "loss": 1.7243,
1059
+ "step": 175
1060
  }
1061
  ],
1062
  "logging_steps": 1,
1063
  "max_steps": 501,
1064
  "num_train_epochs": 1,
1065
  "save_steps": 25,
1066
+ "total_flos": 1.0874511573762048e+16,
1067
  "trial_name": null,
1068
  "trial_params": null
1069
  }