RoyJoy commited on
Commit
84e312c
·
verified ·
1 Parent(s): c742111

Training in progress, step 378, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edce236ed9d28437ecbd25f57fa405e7811548ad06d25e7ee2e03e35cae474e7
3
  size 93608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b094796a3043040fc20becd553325e34a769103a7c5d10ae18b6158c1c1849f5
3
  size 93608
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecbd8daf8d8f6dadeaaf1894e705edcf7b4cb93a2fe07927e49405c98edc3627
3
  size 197158
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39557afe36d5e189e1e04d3e99f8bd45f3b28f96a8c1d63dc4f40cb53e95a33c
3
  size 197158
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:585086fc4d882c3b00033d27af061feb3e3aba0472ded903abd7a66ec1908052
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:119fe715ac3c860103e8a9debc4f03af2063cfd89cb1715847cde9fb4c7fc469
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06715ba1620389ca0b4addc4d3a5d2e99d004e57928b3fd13d4028448769c87c
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c871a29aafc4cc2e188c2a1192be10629a3367edb40bd6f57fc891f110c0b32
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:968865e3b0ca4449ed9a6d127f5f982867a1e9d962247cd559315d44137c3980
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5505f1fb01d87dde14fc81379497ded1f5f974f64cedaf83d975d1311d82a953
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4861407560531aa131144dfce65910431552a00481de242fe5bff99a269dba47
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eea6038eeb267887dcd89f798edd4e04b2ce9df9d4541d4f61f2a0a5c24c73a3
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10affc9ced28dcfaf0d40e3497a97c8e7416bd057324538f99a7e1756fd84408
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ac207b57c6cefba3838e335ba7ebf320ffdaee8162f1c0afc72ea9ad9f0725f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8778576094056172,
5
  "eval_steps": 42,
6
- "global_step": 336,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -863,6 +863,112 @@
863
  "eval_samples_per_second": 775.492,
864
  "eval_steps_per_second": 24.366,
865
  "step": 336
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
866
  }
867
  ],
868
  "logging_steps": 3,
@@ -882,7 +988,7 @@
882
  "attributes": {}
883
  }
884
  },
885
- "total_flos": 6481603461120.0,
886
  "train_batch_size": 8,
887
  "trial_name": null,
888
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9875898105813194,
5
  "eval_steps": 42,
6
+ "global_step": 378,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
863
  "eval_samples_per_second": 775.492,
864
  "eval_steps_per_second": 24.366,
865
  "step": 336
866
+ },
867
+ {
868
+ "epoch": 0.8856956237753103,
869
+ "grad_norm": 0.01870771311223507,
870
+ "learning_rate": 2.43550361297047e-05,
871
+ "loss": 11.9178,
872
+ "step": 339
873
+ },
874
+ {
875
+ "epoch": 0.8935336381450033,
876
+ "grad_norm": 0.014654111117124557,
877
+ "learning_rate": 2.353425010381063e-05,
878
+ "loss": 11.9177,
879
+ "step": 342
880
+ },
881
+ {
882
+ "epoch": 0.9013716525146963,
883
+ "grad_norm": 0.01838817447423935,
884
+ "learning_rate": 2.272325493947257e-05,
885
+ "loss": 11.9171,
886
+ "step": 345
887
+ },
888
+ {
889
+ "epoch": 0.9092096668843893,
890
+ "grad_norm": 0.021683456376194954,
891
+ "learning_rate": 2.192235065998126e-05,
892
+ "loss": 11.9179,
893
+ "step": 348
894
+ },
895
+ {
896
+ "epoch": 0.9170476812540823,
897
+ "grad_norm": 0.02190260961651802,
898
+ "learning_rate": 2.1131833555559037e-05,
899
+ "loss": 11.917,
900
+ "step": 351
901
+ },
902
+ {
903
+ "epoch": 0.9248856956237753,
904
+ "grad_norm": 0.014892240054905415,
905
+ "learning_rate": 2.0351996073748713e-05,
906
+ "loss": 11.917,
907
+ "step": 354
908
+ },
909
+ {
910
+ "epoch": 0.9327237099934683,
911
+ "grad_norm": 0.020150186493992805,
912
+ "learning_rate": 1.9583126711224343e-05,
913
+ "loss": 11.9175,
914
+ "step": 357
915
+ },
916
+ {
917
+ "epoch": 0.9405617243631613,
918
+ "grad_norm": 0.01918022148311138,
919
+ "learning_rate": 1.8825509907063327e-05,
920
+ "loss": 11.9172,
921
+ "step": 360
922
+ },
923
+ {
924
+ "epoch": 0.9483997387328543,
925
+ "grad_norm": 0.020093288272619247,
926
+ "learning_rate": 1.807942593751973e-05,
927
+ "loss": 11.9177,
928
+ "step": 363
929
+ },
930
+ {
931
+ "epoch": 0.9562377531025473,
932
+ "grad_norm": 0.015267434529960155,
933
+ "learning_rate": 1.7345150812337564e-05,
934
+ "loss": 11.9167,
935
+ "step": 366
936
+ },
937
+ {
938
+ "epoch": 0.9640757674722403,
939
+ "grad_norm": 0.01452693808823824,
940
+ "learning_rate": 1.66229561726426e-05,
941
+ "loss": 11.9178,
942
+ "step": 369
943
+ },
944
+ {
945
+ "epoch": 0.9719137818419333,
946
+ "grad_norm": 0.01645076647400856,
947
+ "learning_rate": 1.5913109190450032e-05,
948
+ "loss": 11.9171,
949
+ "step": 372
950
+ },
951
+ {
952
+ "epoch": 0.9797517962116263,
953
+ "grad_norm": 0.02012869343161583,
954
+ "learning_rate": 1.5215872469825682e-05,
955
+ "loss": 11.9169,
956
+ "step": 375
957
+ },
958
+ {
959
+ "epoch": 0.9875898105813194,
960
+ "grad_norm": 0.015907544642686844,
961
+ "learning_rate": 1.4531503949737108e-05,
962
+ "loss": 11.9176,
963
+ "step": 378
964
+ },
965
+ {
966
+ "epoch": 0.9875898105813194,
967
+ "eval_loss": 11.916740417480469,
968
+ "eval_runtime": 3.3252,
969
+ "eval_samples_per_second": 775.289,
970
+ "eval_steps_per_second": 24.359,
971
+ "step": 378
972
  }
973
  ],
974
  "logging_steps": 3,
 
988
  "attributes": {}
989
  }
990
  },
991
+ "total_flos": 7292707799040.0,
992
  "train_batch_size": 8,
993
  "trial_name": null,
994
  "trial_params": null