g8a9 commited on
Commit
2f14001
1 Parent(s): 9787087

Upload folder using huggingface_hub

Browse files
pytorch_model-00001-of-00003.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:667777c452190f8efb5e339659cf4fa836a754a548e0733c79257adbd40e4a11
3
  size 9949048046
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d82f9d5f33c2348af4447f141f74c961a0cc86cbbe99b51895f32984918c809
3
  size 9949048046
pytorch_model-00002-of-00003.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc05ba86ee883984ddcd71eb936a37e3d0b8c83ba9eff9d9f8a9a921dc0dacf9
3
  size 9904474400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eb9a8ac9b537431cb9fee8e616d35dd839e5017708229a4b1a96fd3fa00ab04
3
  size 9904474400
pytorch_model-00003-of-00003.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75fcd7c91eeaf5a795d2fcc071e019dfcfdce82b861e42071e758e82882d68b6
3
  size 6179210249
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c92dd5995c46bb0803a31cef538d65f768fa5129e8bbbd7a2604920fcf4bcc7
3
  size 6179210249
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dae77d1d86eb7275f2a9fab8fdd0f3d2e9b085c5393ceeb24294803290f3941e
3
  size 14511
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:410117ad5ab89dcf7988c84af691849f5d9e934e1f3e3d22b2264a3156b3a388
3
  size 14511
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3415aff70181afd51efc048fb7c4348442ee37b0317e7e93f002f0a59e0e3ea3
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4d1f3cad6e54148c289328646571ee0d43f9671c93cc919adafe5bd27a33d05
3
  size 627
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.005352650945642216,
5
- "global_step": 8500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2850,11 +2850,215 @@
2850
  "eval_samples_per_second": 2.97,
2851
  "eval_steps_per_second": 0.743,
2852
  "step": 8500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2853
  }
2854
  ],
2855
  "max_steps": 10000,
2856
  "num_train_epochs": 1,
2857
- "total_flos": 2.679889113792e+19,
2858
  "trial_name": null,
2859
  "trial_params": null
2860
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.006297236406637901,
5
+ "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2850
  "eval_samples_per_second": 2.97,
2851
  "eval_steps_per_second": 0.743,
2852
  "step": 8500
2853
+ },
2854
+ {
2855
+ "epoch": 0.01,
2856
+ "learning_rate": 0.0002,
2857
+ "loss": 1.9796,
2858
+ "step": 8550
2859
+ },
2860
+ {
2861
+ "epoch": 0.01,
2862
+ "learning_rate": 0.0002,
2863
+ "loss": 1.9943,
2864
+ "step": 8600
2865
+ },
2866
+ {
2867
+ "epoch": 0.01,
2868
+ "learning_rate": 0.0002,
2869
+ "loss": 1.9722,
2870
+ "step": 8650
2871
+ },
2872
+ {
2873
+ "epoch": 0.01,
2874
+ "learning_rate": 0.0002,
2875
+ "loss": 1.9835,
2876
+ "step": 8700
2877
+ },
2878
+ {
2879
+ "epoch": 0.01,
2880
+ "learning_rate": 0.0002,
2881
+ "loss": 1.963,
2882
+ "step": 8750
2883
+ },
2884
+ {
2885
+ "epoch": 0.01,
2886
+ "learning_rate": 0.0002,
2887
+ "loss": 2.0058,
2888
+ "step": 8800
2889
+ },
2890
+ {
2891
+ "epoch": 0.01,
2892
+ "learning_rate": 0.0002,
2893
+ "loss": 1.9804,
2894
+ "step": 8850
2895
+ },
2896
+ {
2897
+ "epoch": 0.01,
2898
+ "learning_rate": 0.0002,
2899
+ "loss": 1.9729,
2900
+ "step": 8900
2901
+ },
2902
+ {
2903
+ "epoch": 0.01,
2904
+ "learning_rate": 0.0002,
2905
+ "loss": 1.9867,
2906
+ "step": 8950
2907
+ },
2908
+ {
2909
+ "epoch": 0.01,
2910
+ "learning_rate": 0.0002,
2911
+ "loss": 1.976,
2912
+ "step": 9000
2913
+ },
2914
+ {
2915
+ "epoch": 0.01,
2916
+ "eval_loss": 2.041231155395508,
2917
+ "eval_runtime": 34089.8046,
2918
+ "eval_samples_per_second": 2.971,
2919
+ "eval_steps_per_second": 0.743,
2920
+ "step": 9000
2921
+ },
2922
+ {
2923
+ "epoch": 0.01,
2924
+ "learning_rate": 0.0002,
2925
+ "loss": 1.9773,
2926
+ "step": 9050
2927
+ },
2928
+ {
2929
+ "epoch": 0.01,
2930
+ "learning_rate": 0.0002,
2931
+ "loss": 1.964,
2932
+ "step": 9100
2933
+ },
2934
+ {
2935
+ "epoch": 0.01,
2936
+ "learning_rate": 0.0002,
2937
+ "loss": 1.9648,
2938
+ "step": 9150
2939
+ },
2940
+ {
2941
+ "epoch": 0.01,
2942
+ "learning_rate": 0.0002,
2943
+ "loss": 1.9843,
2944
+ "step": 9200
2945
+ },
2946
+ {
2947
+ "epoch": 0.01,
2948
+ "learning_rate": 0.0002,
2949
+ "loss": 1.9677,
2950
+ "step": 9250
2951
+ },
2952
+ {
2953
+ "epoch": 0.01,
2954
+ "learning_rate": 0.0002,
2955
+ "loss": 1.962,
2956
+ "step": 9300
2957
+ },
2958
+ {
2959
+ "epoch": 0.01,
2960
+ "learning_rate": 0.0002,
2961
+ "loss": 1.9953,
2962
+ "step": 9350
2963
+ },
2964
+ {
2965
+ "epoch": 0.01,
2966
+ "learning_rate": 0.0002,
2967
+ "loss": 1.9665,
2968
+ "step": 9400
2969
+ },
2970
+ {
2971
+ "epoch": 0.01,
2972
+ "learning_rate": 0.0002,
2973
+ "loss": 1.9724,
2974
+ "step": 9450
2975
+ },
2976
+ {
2977
+ "epoch": 0.01,
2978
+ "learning_rate": 0.0002,
2979
+ "loss": 1.9576,
2980
+ "step": 9500
2981
+ },
2982
+ {
2983
+ "epoch": 0.01,
2984
+ "eval_loss": 2.0336899757385254,
2985
+ "eval_runtime": 34110.7491,
2986
+ "eval_samples_per_second": 2.969,
2987
+ "eval_steps_per_second": 0.742,
2988
+ "step": 9500
2989
+ },
2990
+ {
2991
+ "epoch": 0.01,
2992
+ "learning_rate": 0.0002,
2993
+ "loss": 1.9817,
2994
+ "step": 9550
2995
+ },
2996
+ {
2997
+ "epoch": 0.01,
2998
+ "learning_rate": 0.0002,
2999
+ "loss": 1.952,
3000
+ "step": 9600
3001
+ },
3002
+ {
3003
+ "epoch": 0.01,
3004
+ "learning_rate": 0.0002,
3005
+ "loss": 1.9693,
3006
+ "step": 9650
3007
+ },
3008
+ {
3009
+ "epoch": 0.01,
3010
+ "learning_rate": 0.0002,
3011
+ "loss": 1.9827,
3012
+ "step": 9700
3013
+ },
3014
+ {
3015
+ "epoch": 0.01,
3016
+ "learning_rate": 0.0002,
3017
+ "loss": 1.979,
3018
+ "step": 9750
3019
+ },
3020
+ {
3021
+ "epoch": 0.01,
3022
+ "learning_rate": 0.0002,
3023
+ "loss": 1.9666,
3024
+ "step": 9800
3025
+ },
3026
+ {
3027
+ "epoch": 0.01,
3028
+ "learning_rate": 0.0002,
3029
+ "loss": 1.987,
3030
+ "step": 9850
3031
+ },
3032
+ {
3033
+ "epoch": 0.01,
3034
+ "learning_rate": 0.0002,
3035
+ "loss": 1.9596,
3036
+ "step": 9900
3037
+ },
3038
+ {
3039
+ "epoch": 0.01,
3040
+ "learning_rate": 0.0002,
3041
+ "loss": 1.9663,
3042
+ "step": 9950
3043
+ },
3044
+ {
3045
+ "epoch": 0.01,
3046
+ "learning_rate": 0.0002,
3047
+ "loss": 1.9783,
3048
+ "step": 10000
3049
+ },
3050
+ {
3051
+ "epoch": 0.01,
3052
+ "eval_loss": 2.0236124992370605,
3053
+ "eval_runtime": 34103.4942,
3054
+ "eval_samples_per_second": 2.97,
3055
+ "eval_steps_per_second": 0.742,
3056
+ "step": 10000
3057
  }
3058
  ],
3059
  "max_steps": 10000,
3060
  "num_train_epochs": 1,
3061
+ "total_flos": 3.2626731178471956e+19,
3062
  "trial_name": null,
3063
  "trial_params": null
3064
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:151c1e0a38693ccd36dfbee906fcda3901de363201e50826113290f62066924b
3
  size 5819
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:067b18fe5ad5111e5aadf8a8184df86f0635a802f0f07eb7c2f306830c1f3b57
3
  size 5819