nutorbit commited on
Commit
74004f8
1 Parent(s): 12ec1a2

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f91a032091b4d44d60bb0e09eb86f9d305f9365256cbdf069a77d1b7cfcaa95
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c1792e16999d20aab7c7f3147de13f2f2744a71aa4369a0ff55883d494181e4
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d592ae1b646b7d407d98d03dc5e76455a2198a6440aa75c55be4c11c27328d76
3
  size 42546196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c1c49f99d095521a72089fd929683f6a057d9cc2a6a525e29dafa17d1a3263a
3
  size 42546196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d04fa6ba62e6d2693994357040d1f8b0df1a2292ac61d59108919d588e97608
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a1c514f7fd02541ee711c621db702524520ab84a034f9ac00217461e361da06
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:829afd908950b55f19acd29cd95ce4b7eeb6e3a96dff858cc79db502814ed864
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fce62aa16b9baa5f49dced04db0ff1567e194dc4daa50ed8d0d1098cd28a3b6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.08980054825597883,
5
  "eval_steps": 1000,
6
- "global_step": 475,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2857,13 +2857,163 @@
2857
  "learning_rate": 1.129032258064516e-05,
2858
  "loss": 1.5357,
2859
  "step": 475
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2860
  }
2861
  ],
2862
  "logging_steps": 1,
2863
  "max_steps": 501,
2864
  "num_train_epochs": 1,
2865
  "save_steps": 25,
2866
- "total_flos": 2.920625653658419e+16,
2867
  "trial_name": null,
2868
  "trial_params": null
2869
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.09452689290103035,
5
  "eval_steps": 1000,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2857
  "learning_rate": 1.129032258064516e-05,
2858
  "loss": 1.5357,
2859
  "step": 475
2860
+ },
2861
+ {
2862
+ "epoch": 0.09,
2863
+ "learning_rate": 1.0887096774193549e-05,
2864
+ "loss": 1.7497,
2865
+ "step": 476
2866
+ },
2867
+ {
2868
+ "epoch": 0.09,
2869
+ "learning_rate": 1.0483870967741936e-05,
2870
+ "loss": 1.4821,
2871
+ "step": 477
2872
+ },
2873
+ {
2874
+ "epoch": 0.09,
2875
+ "learning_rate": 1.0080645161290323e-05,
2876
+ "loss": 1.5097,
2877
+ "step": 478
2878
+ },
2879
+ {
2880
+ "epoch": 0.09,
2881
+ "learning_rate": 9.67741935483871e-06,
2882
+ "loss": 1.5417,
2883
+ "step": 479
2884
+ },
2885
+ {
2886
+ "epoch": 0.09,
2887
+ "learning_rate": 9.274193548387097e-06,
2888
+ "loss": 1.715,
2889
+ "step": 480
2890
+ },
2891
+ {
2892
+ "epoch": 0.09,
2893
+ "learning_rate": 8.870967741935484e-06,
2894
+ "loss": 1.5074,
2895
+ "step": 481
2896
+ },
2897
+ {
2898
+ "epoch": 0.09,
2899
+ "learning_rate": 8.46774193548387e-06,
2900
+ "loss": 1.9941,
2901
+ "step": 482
2902
+ },
2903
+ {
2904
+ "epoch": 0.09,
2905
+ "learning_rate": 8.064516129032258e-06,
2906
+ "loss": 1.4365,
2907
+ "step": 483
2908
+ },
2909
+ {
2910
+ "epoch": 0.09,
2911
+ "learning_rate": 7.661290322580646e-06,
2912
+ "loss": 1.6494,
2913
+ "step": 484
2914
+ },
2915
+ {
2916
+ "epoch": 0.09,
2917
+ "learning_rate": 7.258064516129033e-06,
2918
+ "loss": 1.3565,
2919
+ "step": 485
2920
+ },
2921
+ {
2922
+ "epoch": 0.09,
2923
+ "learning_rate": 6.854838709677419e-06,
2924
+ "loss": 1.7097,
2925
+ "step": 486
2926
+ },
2927
+ {
2928
+ "epoch": 0.09,
2929
+ "learning_rate": 6.451612903225806e-06,
2930
+ "loss": 1.5873,
2931
+ "step": 487
2932
+ },
2933
+ {
2934
+ "epoch": 0.09,
2935
+ "learning_rate": 6.048387096774194e-06,
2936
+ "loss": 1.3527,
2937
+ "step": 488
2938
+ },
2939
+ {
2940
+ "epoch": 0.09,
2941
+ "learning_rate": 5.64516129032258e-06,
2942
+ "loss": 1.5721,
2943
+ "step": 489
2944
+ },
2945
+ {
2946
+ "epoch": 0.09,
2947
+ "learning_rate": 5.241935483870968e-06,
2948
+ "loss": 1.6132,
2949
+ "step": 490
2950
+ },
2951
+ {
2952
+ "epoch": 0.09,
2953
+ "learning_rate": 4.838709677419355e-06,
2954
+ "loss": 1.8699,
2955
+ "step": 491
2956
+ },
2957
+ {
2958
+ "epoch": 0.09,
2959
+ "learning_rate": 4.435483870967742e-06,
2960
+ "loss": 1.5994,
2961
+ "step": 492
2962
+ },
2963
+ {
2964
+ "epoch": 0.09,
2965
+ "learning_rate": 4.032258064516129e-06,
2966
+ "loss": 1.5774,
2967
+ "step": 493
2968
+ },
2969
+ {
2970
+ "epoch": 0.09,
2971
+ "learning_rate": 3.6290322580645166e-06,
2972
+ "loss": 1.7266,
2973
+ "step": 494
2974
+ },
2975
+ {
2976
+ "epoch": 0.09,
2977
+ "learning_rate": 3.225806451612903e-06,
2978
+ "loss": 1.4401,
2979
+ "step": 495
2980
+ },
2981
+ {
2982
+ "epoch": 0.09,
2983
+ "learning_rate": 2.82258064516129e-06,
2984
+ "loss": 1.6521,
2985
+ "step": 496
2986
+ },
2987
+ {
2988
+ "epoch": 0.09,
2989
+ "learning_rate": 2.4193548387096776e-06,
2990
+ "loss": 1.5495,
2991
+ "step": 497
2992
+ },
2993
+ {
2994
+ "epoch": 0.09,
2995
+ "learning_rate": 2.0161290322580646e-06,
2996
+ "loss": 1.7499,
2997
+ "step": 498
2998
+ },
2999
+ {
3000
+ "epoch": 0.09,
3001
+ "learning_rate": 1.6129032258064516e-06,
3002
+ "loss": 1.393,
3003
+ "step": 499
3004
+ },
3005
+ {
3006
+ "epoch": 0.09,
3007
+ "learning_rate": 1.2096774193548388e-06,
3008
+ "loss": 1.5208,
3009
+ "step": 500
3010
  }
3011
  ],
3012
  "logging_steps": 1,
3013
  "max_steps": 501,
3014
  "num_train_epochs": 1,
3015
  "save_steps": 25,
3016
+ "total_flos": 3.0694656581369856e+16,
3017
  "trial_name": null,
3018
  "trial_params": null
3019
  }