nutorbit commited on
Commit
6117be7
1 Parent(s): a8cdc10

Training in progress, step 325, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:905e964f4324984e53eb7fef6e0e34dce4aee31dea6117380645f6ac838e05b2
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:711515011bc9a1118e2a2cd48fe663da98c9e69a5329f85b8db220971cc778a5
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af2138a28601906805f7004fd88f4ab2c01a76fd327549739267888df4772ddf
3
  size 42546196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:125640ca1c2270dcc43bfec9b85910ec5f542fe4a3d0331918c3c607c5c1ec3a
3
  size 42546196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:767c70768d113efa4dcbdb70d4f9a493e78092c5f49e743619a1df4eb0fa7663
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:760e2590ede718fcb118f4dc8d958bdba98e4ac3f71ddbb83deefe6280da4030
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a490f516731d84c5ebbd21bb085cd67e0db15849165333c36e7773f44b973d6b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62e5062625ff30e631b3308d37f5a5550bd7d782dfc6daef7e3c2f2f13956f99
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.056716135740618207,
5
  "eval_steps": 1000,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1807,13 +1807,163 @@
1807
  "learning_rate": 8.185483870967743e-05,
1808
  "loss": 1.6695,
1809
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1810
  }
1811
  ],
1812
  "logging_steps": 1,
1813
  "max_steps": 501,
1814
  "num_train_epochs": 1,
1815
  "save_steps": 25,
1816
- "total_flos": 1.844334073434931e+16,
1817
  "trial_name": null,
1818
  "trial_params": null
1819
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.061442480385669726,
5
  "eval_steps": 1000,
6
+ "global_step": 325,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1807
  "learning_rate": 8.185483870967743e-05,
1808
  "loss": 1.6695,
1809
  "step": 300
1810
+ },
1811
+ {
1812
+ "epoch": 0.06,
1813
+ "learning_rate": 8.145161290322582e-05,
1814
+ "loss": 1.6221,
1815
+ "step": 301
1816
+ },
1817
+ {
1818
+ "epoch": 0.06,
1819
+ "learning_rate": 8.104838709677419e-05,
1820
+ "loss": 1.5938,
1821
+ "step": 302
1822
+ },
1823
+ {
1824
+ "epoch": 0.06,
1825
+ "learning_rate": 8.064516129032258e-05,
1826
+ "loss": 1.7077,
1827
+ "step": 303
1828
+ },
1829
+ {
1830
+ "epoch": 0.06,
1831
+ "learning_rate": 8.024193548387097e-05,
1832
+ "loss": 1.3318,
1833
+ "step": 304
1834
+ },
1835
+ {
1836
+ "epoch": 0.06,
1837
+ "learning_rate": 7.983870967741936e-05,
1838
+ "loss": 1.4808,
1839
+ "step": 305
1840
+ },
1841
+ {
1842
+ "epoch": 0.06,
1843
+ "learning_rate": 7.943548387096774e-05,
1844
+ "loss": 1.6729,
1845
+ "step": 306
1846
+ },
1847
+ {
1848
+ "epoch": 0.06,
1849
+ "learning_rate": 7.903225806451613e-05,
1850
+ "loss": 1.6133,
1851
+ "step": 307
1852
+ },
1853
+ {
1854
+ "epoch": 0.06,
1855
+ "learning_rate": 7.862903225806451e-05,
1856
+ "loss": 1.6705,
1857
+ "step": 308
1858
+ },
1859
+ {
1860
+ "epoch": 0.06,
1861
+ "learning_rate": 7.82258064516129e-05,
1862
+ "loss": 1.5732,
1863
+ "step": 309
1864
+ },
1865
+ {
1866
+ "epoch": 0.06,
1867
+ "learning_rate": 7.78225806451613e-05,
1868
+ "loss": 1.3341,
1869
+ "step": 310
1870
+ },
1871
+ {
1872
+ "epoch": 0.06,
1873
+ "learning_rate": 7.741935483870968e-05,
1874
+ "loss": 1.7081,
1875
+ "step": 311
1876
+ },
1877
+ {
1878
+ "epoch": 0.06,
1879
+ "learning_rate": 7.701612903225807e-05,
1880
+ "loss": 1.6325,
1881
+ "step": 312
1882
+ },
1883
+ {
1884
+ "epoch": 0.06,
1885
+ "learning_rate": 7.661290322580645e-05,
1886
+ "loss": 1.5734,
1887
+ "step": 313
1888
+ },
1889
+ {
1890
+ "epoch": 0.06,
1891
+ "learning_rate": 7.620967741935484e-05,
1892
+ "loss": 1.5356,
1893
+ "step": 314
1894
+ },
1895
+ {
1896
+ "epoch": 0.06,
1897
+ "learning_rate": 7.580645161290323e-05,
1898
+ "loss": 1.6806,
1899
+ "step": 315
1900
+ },
1901
+ {
1902
+ "epoch": 0.06,
1903
+ "learning_rate": 7.540322580645162e-05,
1904
+ "loss": 1.5687,
1905
+ "step": 316
1906
+ },
1907
+ {
1908
+ "epoch": 0.06,
1909
+ "learning_rate": 7.500000000000001e-05,
1910
+ "loss": 1.6411,
1911
+ "step": 317
1912
+ },
1913
+ {
1914
+ "epoch": 0.06,
1915
+ "learning_rate": 7.45967741935484e-05,
1916
+ "loss": 1.6724,
1917
+ "step": 318
1918
+ },
1919
+ {
1920
+ "epoch": 0.06,
1921
+ "learning_rate": 7.419354838709677e-05,
1922
+ "loss": 1.5039,
1923
+ "step": 319
1924
+ },
1925
+ {
1926
+ "epoch": 0.06,
1927
+ "learning_rate": 7.379032258064516e-05,
1928
+ "loss": 1.6331,
1929
+ "step": 320
1930
+ },
1931
+ {
1932
+ "epoch": 0.06,
1933
+ "learning_rate": 7.338709677419355e-05,
1934
+ "loss": 1.629,
1935
+ "step": 321
1936
+ },
1937
+ {
1938
+ "epoch": 0.06,
1939
+ "learning_rate": 7.298387096774194e-05,
1940
+ "loss": 1.6863,
1941
+ "step": 322
1942
+ },
1943
+ {
1944
+ "epoch": 0.06,
1945
+ "learning_rate": 7.258064516129033e-05,
1946
+ "loss": 1.5211,
1947
+ "step": 323
1948
+ },
1949
+ {
1950
+ "epoch": 0.06,
1951
+ "learning_rate": 7.217741935483872e-05,
1952
+ "loss": 1.6081,
1953
+ "step": 324
1954
+ },
1955
+ {
1956
+ "epoch": 0.06,
1957
+ "learning_rate": 7.177419354838711e-05,
1958
+ "loss": 1.6189,
1959
+ "step": 325
1960
  }
1961
  ],
1962
  "logging_steps": 1,
1963
  "max_steps": 501,
1964
  "num_train_epochs": 1,
1965
  "save_steps": 25,
1966
+ "total_flos": 2.003409395608781e+16,
1967
  "trial_name": null,
1968
  "trial_params": null
1969
  }