motheecreator commited on
Commit
3e48b92
1 Parent(s): 16ce9af

End of training

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 6.0,
3
- "eval_accuracy": 0.7839129072681704,
4
- "eval_loss": 0.62819904088974,
5
- "eval_runtime": 233.6957,
6
- "eval_samples_per_second": 109.27,
7
- "eval_steps_per_second": 3.415,
8
- "total_flos": 4.749417900430118e+19,
9
- "train_loss": 0.321556950521748,
10
- "train_runtime": 10205.5984,
11
- "train_samples_per_second": 60.052,
12
- "train_steps_per_second": 0.469
13
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.8571428571428571,
4
+ "eval_loss": 0.4352613687515259,
5
+ "eval_runtime": 236.2313,
6
+ "eval_samples_per_second": 108.097,
7
+ "eval_steps_per_second": 3.378,
8
+ "total_flos": 7.915696500716863e+19,
9
+ "train_loss": 0.14746467811720712,
10
+ "train_runtime": 9966.943,
11
+ "train_samples_per_second": 102.483,
12
+ "train_steps_per_second": 0.801
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 6.0,
3
- "eval_accuracy": 0.7839129072681704,
4
- "eval_loss": 0.62819904088974,
5
- "eval_runtime": 233.6957,
6
- "eval_samples_per_second": 109.27,
7
- "eval_steps_per_second": 3.415
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.8571428571428571,
4
+ "eval_loss": 0.4352613687515259,
5
+ "eval_runtime": 236.2313,
6
+ "eval_samples_per_second": 108.097,
7
+ "eval_steps_per_second": 3.378
8
  }
runs/May24_23-40-34_371088ce566e/events.out.tfevents.1716605763.371088ce566e.42.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:785bf90b2337263bd8b02e461767a421e64f712e790cc3ac3ae3b95edbf67a8f
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 6.0,
3
- "total_flos": 4.749417900430118e+19,
4
- "train_loss": 0.321556950521748,
5
- "train_runtime": 10205.5984,
6
- "train_samples_per_second": 60.052,
7
- "train_steps_per_second": 0.469
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "total_flos": 7.915696500716863e+19,
4
+ "train_loss": 0.14746467811720712,
5
+ "train_runtime": 9966.943,
6
+ "train_samples_per_second": 102.483,
7
+ "train_steps_per_second": 0.801
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7839129072681704,
3
- "best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned/checkpoint-3192",
4
- "epoch": 6.0,
5
  "eval_steps": 500,
6
- "global_step": 4788,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2932,20 +2932,1976 @@
2932
  },
2933
  {
2934
  "epoch": 6.0,
2935
- "step": 4788,
2936
- "total_flos": 4.749417900430118e+19,
2937
- "train_loss": 0.321556950521748,
2938
- "train_runtime": 10205.5984,
2939
- "train_samples_per_second": 60.052,
2940
- "train_steps_per_second": 0.469
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2941
  }
2942
  ],
2943
  "logging_steps": 10,
2944
- "max_steps": 4788,
2945
  "num_input_tokens_seen": 0,
2946
- "num_train_epochs": 6,
2947
  "save_steps": 500,
2948
- "total_flos": 4.749417900430118e+19,
2949
  "train_batch_size": 32,
2950
  "trial_name": null,
2951
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8571428571428571,
3
+ "best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned/checkpoint-7980",
4
+ "epoch": 10.0,
5
  "eval_steps": 500,
6
+ "global_step": 7980,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2932
  },
2933
  {
2934
  "epoch": 6.0,
2935
+ "learning_rate": 2.2208298524088e-05,
2936
+ "loss": 0.4739,
2937
+ "step": 4790
2938
+ },
2939
+ {
2940
+ "epoch": 6.02,
2941
+ "learning_rate": 2.2138680033416877e-05,
2942
+ "loss": 0.4398,
2943
+ "step": 4800
2944
+ },
2945
+ {
2946
+ "epoch": 6.03,
2947
+ "learning_rate": 2.2069061542745755e-05,
2948
+ "loss": 0.4024,
2949
+ "step": 4810
2950
+ },
2951
+ {
2952
+ "epoch": 6.04,
2953
+ "learning_rate": 2.199944305207463e-05,
2954
+ "loss": 0.4506,
2955
+ "step": 4820
2956
+ },
2957
+ {
2958
+ "epoch": 6.05,
2959
+ "learning_rate": 2.1929824561403507e-05,
2960
+ "loss": 0.5156,
2961
+ "step": 4830
2962
+ },
2963
+ {
2964
+ "epoch": 6.07,
2965
+ "learning_rate": 2.1860206070732385e-05,
2966
+ "loss": 0.5007,
2967
+ "step": 4840
2968
+ },
2969
+ {
2970
+ "epoch": 6.08,
2971
+ "learning_rate": 2.1790587580061263e-05,
2972
+ "loss": 0.4959,
2973
+ "step": 4850
2974
+ },
2975
+ {
2976
+ "epoch": 6.09,
2977
+ "learning_rate": 2.172096908939014e-05,
2978
+ "loss": 0.5087,
2979
+ "step": 4860
2980
+ },
2981
+ {
2982
+ "epoch": 6.1,
2983
+ "learning_rate": 2.165135059871902e-05,
2984
+ "loss": 0.4577,
2985
+ "step": 4870
2986
+ },
2987
+ {
2988
+ "epoch": 6.12,
2989
+ "learning_rate": 2.1581732108047897e-05,
2990
+ "loss": 0.4851,
2991
+ "step": 4880
2992
+ },
2993
+ {
2994
+ "epoch": 6.13,
2995
+ "learning_rate": 2.1512113617376775e-05,
2996
+ "loss": 0.4541,
2997
+ "step": 4890
2998
+ },
2999
+ {
3000
+ "epoch": 6.14,
3001
+ "learning_rate": 2.1442495126705653e-05,
3002
+ "loss": 0.5341,
3003
+ "step": 4900
3004
+ },
3005
+ {
3006
+ "epoch": 6.15,
3007
+ "learning_rate": 2.137287663603453e-05,
3008
+ "loss": 0.4609,
3009
+ "step": 4910
3010
+ },
3011
+ {
3012
+ "epoch": 6.17,
3013
+ "learning_rate": 2.130325814536341e-05,
3014
+ "loss": 0.5178,
3015
+ "step": 4920
3016
+ },
3017
+ {
3018
+ "epoch": 6.18,
3019
+ "learning_rate": 2.1233639654692286e-05,
3020
+ "loss": 0.4792,
3021
+ "step": 4930
3022
+ },
3023
+ {
3024
+ "epoch": 6.19,
3025
+ "learning_rate": 2.1164021164021164e-05,
3026
+ "loss": 0.448,
3027
+ "step": 4940
3028
+ },
3029
+ {
3030
+ "epoch": 6.2,
3031
+ "learning_rate": 2.1094402673350042e-05,
3032
+ "loss": 0.4971,
3033
+ "step": 4950
3034
+ },
3035
+ {
3036
+ "epoch": 6.22,
3037
+ "learning_rate": 2.102478418267892e-05,
3038
+ "loss": 0.4521,
3039
+ "step": 4960
3040
+ },
3041
+ {
3042
+ "epoch": 6.23,
3043
+ "learning_rate": 2.0955165692007798e-05,
3044
+ "loss": 0.4584,
3045
+ "step": 4970
3046
+ },
3047
+ {
3048
+ "epoch": 6.24,
3049
+ "learning_rate": 2.0885547201336676e-05,
3050
+ "loss": 0.4984,
3051
+ "step": 4980
3052
+ },
3053
+ {
3054
+ "epoch": 6.25,
3055
+ "learning_rate": 2.0815928710665554e-05,
3056
+ "loss": 0.4894,
3057
+ "step": 4990
3058
+ },
3059
+ {
3060
+ "epoch": 6.27,
3061
+ "learning_rate": 2.0746310219994432e-05,
3062
+ "loss": 0.4887,
3063
+ "step": 5000
3064
+ },
3065
+ {
3066
+ "epoch": 6.28,
3067
+ "learning_rate": 2.067669172932331e-05,
3068
+ "loss": 0.5063,
3069
+ "step": 5010
3070
+ },
3071
+ {
3072
+ "epoch": 6.29,
3073
+ "learning_rate": 2.0607073238652184e-05,
3074
+ "loss": 0.4963,
3075
+ "step": 5020
3076
+ },
3077
+ {
3078
+ "epoch": 6.3,
3079
+ "learning_rate": 2.0537454747981062e-05,
3080
+ "loss": 0.524,
3081
+ "step": 5030
3082
+ },
3083
+ {
3084
+ "epoch": 6.32,
3085
+ "learning_rate": 2.046783625730994e-05,
3086
+ "loss": 0.4502,
3087
+ "step": 5040
3088
+ },
3089
+ {
3090
+ "epoch": 6.33,
3091
+ "learning_rate": 2.0398217766638818e-05,
3092
+ "loss": 0.4505,
3093
+ "step": 5050
3094
+ },
3095
+ {
3096
+ "epoch": 6.34,
3097
+ "learning_rate": 2.0328599275967696e-05,
3098
+ "loss": 0.4962,
3099
+ "step": 5060
3100
+ },
3101
+ {
3102
+ "epoch": 6.35,
3103
+ "learning_rate": 2.0258980785296574e-05,
3104
+ "loss": 0.4566,
3105
+ "step": 5070
3106
+ },
3107
+ {
3108
+ "epoch": 6.37,
3109
+ "learning_rate": 2.018936229462545e-05,
3110
+ "loss": 0.4464,
3111
+ "step": 5080
3112
+ },
3113
+ {
3114
+ "epoch": 6.38,
3115
+ "learning_rate": 2.011974380395433e-05,
3116
+ "loss": 0.4872,
3117
+ "step": 5090
3118
+ },
3119
+ {
3120
+ "epoch": 6.39,
3121
+ "learning_rate": 2.0050125313283208e-05,
3122
+ "loss": 0.4396,
3123
+ "step": 5100
3124
+ },
3125
+ {
3126
+ "epoch": 6.4,
3127
+ "learning_rate": 1.9980506822612085e-05,
3128
+ "loss": 0.5302,
3129
+ "step": 5110
3130
+ },
3131
+ {
3132
+ "epoch": 6.42,
3133
+ "learning_rate": 1.9910888331940963e-05,
3134
+ "loss": 0.4927,
3135
+ "step": 5120
3136
+ },
3137
+ {
3138
+ "epoch": 6.43,
3139
+ "learning_rate": 1.984126984126984e-05,
3140
+ "loss": 0.5129,
3141
+ "step": 5130
3142
+ },
3143
+ {
3144
+ "epoch": 6.44,
3145
+ "learning_rate": 1.977165135059872e-05,
3146
+ "loss": 0.4747,
3147
+ "step": 5140
3148
+ },
3149
+ {
3150
+ "epoch": 6.45,
3151
+ "learning_rate": 1.9702032859927597e-05,
3152
+ "loss": 0.4878,
3153
+ "step": 5150
3154
+ },
3155
+ {
3156
+ "epoch": 6.47,
3157
+ "learning_rate": 1.9632414369256475e-05,
3158
+ "loss": 0.4885,
3159
+ "step": 5160
3160
+ },
3161
+ {
3162
+ "epoch": 6.48,
3163
+ "learning_rate": 1.9562795878585353e-05,
3164
+ "loss": 0.4634,
3165
+ "step": 5170
3166
+ },
3167
+ {
3168
+ "epoch": 6.49,
3169
+ "learning_rate": 1.949317738791423e-05,
3170
+ "loss": 0.4665,
3171
+ "step": 5180
3172
+ },
3173
+ {
3174
+ "epoch": 6.5,
3175
+ "learning_rate": 1.942355889724311e-05,
3176
+ "loss": 0.4392,
3177
+ "step": 5190
3178
+ },
3179
+ {
3180
+ "epoch": 6.52,
3181
+ "learning_rate": 1.9353940406571987e-05,
3182
+ "loss": 0.4754,
3183
+ "step": 5200
3184
+ },
3185
+ {
3186
+ "epoch": 6.53,
3187
+ "learning_rate": 1.9284321915900865e-05,
3188
+ "loss": 0.457,
3189
+ "step": 5210
3190
+ },
3191
+ {
3192
+ "epoch": 6.54,
3193
+ "learning_rate": 1.9214703425229743e-05,
3194
+ "loss": 0.4734,
3195
+ "step": 5220
3196
+ },
3197
+ {
3198
+ "epoch": 6.55,
3199
+ "learning_rate": 1.9145084934558617e-05,
3200
+ "loss": 0.4556,
3201
+ "step": 5230
3202
+ },
3203
+ {
3204
+ "epoch": 6.57,
3205
+ "learning_rate": 1.9075466443887495e-05,
3206
+ "loss": 0.4844,
3207
+ "step": 5240
3208
+ },
3209
+ {
3210
+ "epoch": 6.58,
3211
+ "learning_rate": 1.9005847953216373e-05,
3212
+ "loss": 0.4161,
3213
+ "step": 5250
3214
+ },
3215
+ {
3216
+ "epoch": 6.59,
3217
+ "learning_rate": 1.893622946254525e-05,
3218
+ "loss": 0.4722,
3219
+ "step": 5260
3220
+ },
3221
+ {
3222
+ "epoch": 6.6,
3223
+ "learning_rate": 1.886661097187413e-05,
3224
+ "loss": 0.517,
3225
+ "step": 5270
3226
+ },
3227
+ {
3228
+ "epoch": 6.62,
3229
+ "learning_rate": 1.8796992481203007e-05,
3230
+ "loss": 0.4612,
3231
+ "step": 5280
3232
+ },
3233
+ {
3234
+ "epoch": 6.63,
3235
+ "learning_rate": 1.8727373990531885e-05,
3236
+ "loss": 0.5017,
3237
+ "step": 5290
3238
+ },
3239
+ {
3240
+ "epoch": 6.64,
3241
+ "learning_rate": 1.8657755499860762e-05,
3242
+ "loss": 0.4882,
3243
+ "step": 5300
3244
+ },
3245
+ {
3246
+ "epoch": 6.65,
3247
+ "learning_rate": 1.858813700918964e-05,
3248
+ "loss": 0.4722,
3249
+ "step": 5310
3250
+ },
3251
+ {
3252
+ "epoch": 6.67,
3253
+ "learning_rate": 1.8518518518518518e-05,
3254
+ "loss": 0.4674,
3255
+ "step": 5320
3256
+ },
3257
+ {
3258
+ "epoch": 6.68,
3259
+ "learning_rate": 1.8448900027847396e-05,
3260
+ "loss": 0.4928,
3261
+ "step": 5330
3262
+ },
3263
+ {
3264
+ "epoch": 6.69,
3265
+ "learning_rate": 1.8379281537176274e-05,
3266
+ "loss": 0.4426,
3267
+ "step": 5340
3268
+ },
3269
+ {
3270
+ "epoch": 6.7,
3271
+ "learning_rate": 1.8309663046505152e-05,
3272
+ "loss": 0.4778,
3273
+ "step": 5350
3274
+ },
3275
+ {
3276
+ "epoch": 6.72,
3277
+ "learning_rate": 1.824004455583403e-05,
3278
+ "loss": 0.4555,
3279
+ "step": 5360
3280
+ },
3281
+ {
3282
+ "epoch": 6.73,
3283
+ "learning_rate": 1.8170426065162908e-05,
3284
+ "loss": 0.4707,
3285
+ "step": 5370
3286
+ },
3287
+ {
3288
+ "epoch": 6.74,
3289
+ "learning_rate": 1.8100807574491786e-05,
3290
+ "loss": 0.4729,
3291
+ "step": 5380
3292
+ },
3293
+ {
3294
+ "epoch": 6.75,
3295
+ "learning_rate": 1.8031189083820664e-05,
3296
+ "loss": 0.5236,
3297
+ "step": 5390
3298
+ },
3299
+ {
3300
+ "epoch": 6.77,
3301
+ "learning_rate": 1.796157059314954e-05,
3302
+ "loss": 0.4635,
3303
+ "step": 5400
3304
+ },
3305
+ {
3306
+ "epoch": 6.78,
3307
+ "learning_rate": 1.789195210247842e-05,
3308
+ "loss": 0.4731,
3309
+ "step": 5410
3310
+ },
3311
+ {
3312
+ "epoch": 6.79,
3313
+ "learning_rate": 1.7822333611807297e-05,
3314
+ "loss": 0.4784,
3315
+ "step": 5420
3316
+ },
3317
+ {
3318
+ "epoch": 6.8,
3319
+ "learning_rate": 1.7752715121136172e-05,
3320
+ "loss": 0.4913,
3321
+ "step": 5430
3322
+ },
3323
+ {
3324
+ "epoch": 6.82,
3325
+ "learning_rate": 1.768309663046505e-05,
3326
+ "loss": 0.4617,
3327
+ "step": 5440
3328
+ },
3329
+ {
3330
+ "epoch": 6.83,
3331
+ "learning_rate": 1.7613478139793928e-05,
3332
+ "loss": 0.4888,
3333
+ "step": 5450
3334
+ },
3335
+ {
3336
+ "epoch": 6.84,
3337
+ "learning_rate": 1.7543859649122806e-05,
3338
+ "loss": 0.4976,
3339
+ "step": 5460
3340
+ },
3341
+ {
3342
+ "epoch": 6.85,
3343
+ "learning_rate": 1.7474241158451684e-05,
3344
+ "loss": 0.4778,
3345
+ "step": 5470
3346
+ },
3347
+ {
3348
+ "epoch": 6.87,
3349
+ "learning_rate": 1.740462266778056e-05,
3350
+ "loss": 0.4818,
3351
+ "step": 5480
3352
+ },
3353
+ {
3354
+ "epoch": 6.88,
3355
+ "learning_rate": 1.733500417710944e-05,
3356
+ "loss": 0.5,
3357
+ "step": 5490
3358
+ },
3359
+ {
3360
+ "epoch": 6.89,
3361
+ "learning_rate": 1.7265385686438317e-05,
3362
+ "loss": 0.4529,
3363
+ "step": 5500
3364
+ },
3365
+ {
3366
+ "epoch": 6.9,
3367
+ "learning_rate": 1.7195767195767195e-05,
3368
+ "loss": 0.4674,
3369
+ "step": 5510
3370
+ },
3371
+ {
3372
+ "epoch": 6.92,
3373
+ "learning_rate": 1.7126148705096073e-05,
3374
+ "loss": 0.4733,
3375
+ "step": 5520
3376
+ },
3377
+ {
3378
+ "epoch": 6.93,
3379
+ "learning_rate": 1.705653021442495e-05,
3380
+ "loss": 0.4702,
3381
+ "step": 5530
3382
+ },
3383
+ {
3384
+ "epoch": 6.94,
3385
+ "learning_rate": 1.698691172375383e-05,
3386
+ "loss": 0.4809,
3387
+ "step": 5540
3388
+ },
3389
+ {
3390
+ "epoch": 6.95,
3391
+ "learning_rate": 1.6917293233082707e-05,
3392
+ "loss": 0.509,
3393
+ "step": 5550
3394
+ },
3395
+ {
3396
+ "epoch": 6.97,
3397
+ "learning_rate": 1.6847674742411585e-05,
3398
+ "loss": 0.4692,
3399
+ "step": 5560
3400
+ },
3401
+ {
3402
+ "epoch": 6.98,
3403
+ "learning_rate": 1.6778056251740463e-05,
3404
+ "loss": 0.5013,
3405
+ "step": 5570
3406
+ },
3407
+ {
3408
+ "epoch": 6.99,
3409
+ "learning_rate": 1.670843776106934e-05,
3410
+ "loss": 0.4245,
3411
+ "step": 5580
3412
+ },
3413
+ {
3414
+ "epoch": 7.0,
3415
+ "eval_accuracy": 0.8541666666666666,
3416
+ "eval_loss": 0.43816250562667847,
3417
+ "eval_runtime": 233.7655,
3418
+ "eval_samples_per_second": 109.238,
3419
+ "eval_steps_per_second": 3.414,
3420
+ "step": 5586
3421
+ },
3422
+ {
3423
+ "epoch": 7.01,
3424
+ "learning_rate": 1.663881927039822e-05,
3425
+ "loss": 0.4232,
3426
+ "step": 5590
3427
+ },
3428
+ {
3429
+ "epoch": 7.02,
3430
+ "learning_rate": 1.6569200779727097e-05,
3431
+ "loss": 0.4133,
3432
+ "step": 5600
3433
+ },
3434
+ {
3435
+ "epoch": 7.03,
3436
+ "learning_rate": 1.6499582289055974e-05,
3437
+ "loss": 0.4066,
3438
+ "step": 5610
3439
+ },
3440
+ {
3441
+ "epoch": 7.04,
3442
+ "learning_rate": 1.6429963798384852e-05,
3443
+ "loss": 0.4381,
3444
+ "step": 5620
3445
+ },
3446
+ {
3447
+ "epoch": 7.06,
3448
+ "learning_rate": 1.636034530771373e-05,
3449
+ "loss": 0.4063,
3450
+ "step": 5630
3451
+ },
3452
+ {
3453
+ "epoch": 7.07,
3454
+ "learning_rate": 1.6290726817042605e-05,
3455
+ "loss": 0.3721,
3456
+ "step": 5640
3457
+ },
3458
+ {
3459
+ "epoch": 7.08,
3460
+ "learning_rate": 1.6221108326371483e-05,
3461
+ "loss": 0.3574,
3462
+ "step": 5650
3463
+ },
3464
+ {
3465
+ "epoch": 7.09,
3466
+ "learning_rate": 1.615148983570036e-05,
3467
+ "loss": 0.4156,
3468
+ "step": 5660
3469
+ },
3470
+ {
3471
+ "epoch": 7.11,
3472
+ "learning_rate": 1.608187134502924e-05,
3473
+ "loss": 0.4311,
3474
+ "step": 5670
3475
+ },
3476
+ {
3477
+ "epoch": 7.12,
3478
+ "learning_rate": 1.6012252854358116e-05,
3479
+ "loss": 0.3922,
3480
+ "step": 5680
3481
+ },
3482
+ {
3483
+ "epoch": 7.13,
3484
+ "learning_rate": 1.5942634363686994e-05,
3485
+ "loss": 0.3568,
3486
+ "step": 5690
3487
+ },
3488
+ {
3489
+ "epoch": 7.14,
3490
+ "learning_rate": 1.5873015873015872e-05,
3491
+ "loss": 0.3943,
3492
+ "step": 5700
3493
+ },
3494
+ {
3495
+ "epoch": 7.16,
3496
+ "learning_rate": 1.580339738234475e-05,
3497
+ "loss": 0.3995,
3498
+ "step": 5710
3499
+ },
3500
+ {
3501
+ "epoch": 7.17,
3502
+ "learning_rate": 1.5733778891673628e-05,
3503
+ "loss": 0.3948,
3504
+ "step": 5720
3505
+ },
3506
+ {
3507
+ "epoch": 7.18,
3508
+ "learning_rate": 1.5664160401002506e-05,
3509
+ "loss": 0.3912,
3510
+ "step": 5730
3511
+ },
3512
+ {
3513
+ "epoch": 7.19,
3514
+ "learning_rate": 1.5594541910331384e-05,
3515
+ "loss": 0.3791,
3516
+ "step": 5740
3517
+ },
3518
+ {
3519
+ "epoch": 7.21,
3520
+ "learning_rate": 1.5524923419660262e-05,
3521
+ "loss": 0.4085,
3522
+ "step": 5750
3523
+ },
3524
+ {
3525
+ "epoch": 7.22,
3526
+ "learning_rate": 1.545530492898914e-05,
3527
+ "loss": 0.4175,
3528
+ "step": 5760
3529
+ },
3530
+ {
3531
+ "epoch": 7.23,
3532
+ "learning_rate": 1.5385686438318018e-05,
3533
+ "loss": 0.4018,
3534
+ "step": 5770
3535
+ },
3536
+ {
3537
+ "epoch": 7.24,
3538
+ "learning_rate": 1.5316067947646896e-05,
3539
+ "loss": 0.4203,
3540
+ "step": 5780
3541
+ },
3542
+ {
3543
+ "epoch": 7.26,
3544
+ "learning_rate": 1.5246449456975772e-05,
3545
+ "loss": 0.3939,
3546
+ "step": 5790
3547
+ },
3548
+ {
3549
+ "epoch": 7.27,
3550
+ "learning_rate": 1.517683096630465e-05,
3551
+ "loss": 0.4417,
3552
+ "step": 5800
3553
+ },
3554
+ {
3555
+ "epoch": 7.28,
3556
+ "learning_rate": 1.5107212475633528e-05,
3557
+ "loss": 0.3599,
3558
+ "step": 5810
3559
+ },
3560
+ {
3561
+ "epoch": 7.29,
3562
+ "learning_rate": 1.5037593984962406e-05,
3563
+ "loss": 0.3877,
3564
+ "step": 5820
3565
+ },
3566
+ {
3567
+ "epoch": 7.31,
3568
+ "learning_rate": 1.4967975494291284e-05,
3569
+ "loss": 0.4054,
3570
+ "step": 5830
3571
+ },
3572
+ {
3573
+ "epoch": 7.32,
3574
+ "learning_rate": 1.4898357003620161e-05,
3575
+ "loss": 0.3935,
3576
+ "step": 5840
3577
+ },
3578
+ {
3579
+ "epoch": 7.33,
3580
+ "learning_rate": 1.482873851294904e-05,
3581
+ "loss": 0.3589,
3582
+ "step": 5850
3583
+ },
3584
+ {
3585
+ "epoch": 7.34,
3586
+ "learning_rate": 1.4759120022277917e-05,
3587
+ "loss": 0.3815,
3588
+ "step": 5860
3589
+ },
3590
+ {
3591
+ "epoch": 7.36,
3592
+ "learning_rate": 1.4689501531606795e-05,
3593
+ "loss": 0.4185,
3594
+ "step": 5870
3595
+ },
3596
+ {
3597
+ "epoch": 7.37,
3598
+ "learning_rate": 1.4619883040935673e-05,
3599
+ "loss": 0.4318,
3600
+ "step": 5880
3601
+ },
3602
+ {
3603
+ "epoch": 7.38,
3604
+ "learning_rate": 1.455026455026455e-05,
3605
+ "loss": 0.3831,
3606
+ "step": 5890
3607
+ },
3608
+ {
3609
+ "epoch": 7.39,
3610
+ "learning_rate": 1.4480646059593427e-05,
3611
+ "loss": 0.3999,
3612
+ "step": 5900
3613
+ },
3614
+ {
3615
+ "epoch": 7.41,
3616
+ "learning_rate": 1.4411027568922305e-05,
3617
+ "loss": 0.4112,
3618
+ "step": 5910
3619
+ },
3620
+ {
3621
+ "epoch": 7.42,
3622
+ "learning_rate": 1.4341409078251183e-05,
3623
+ "loss": 0.4429,
3624
+ "step": 5920
3625
+ },
3626
+ {
3627
+ "epoch": 7.43,
3628
+ "learning_rate": 1.4271790587580061e-05,
3629
+ "loss": 0.4023,
3630
+ "step": 5930
3631
+ },
3632
+ {
3633
+ "epoch": 7.44,
3634
+ "learning_rate": 1.4202172096908939e-05,
3635
+ "loss": 0.4087,
3636
+ "step": 5940
3637
+ },
3638
+ {
3639
+ "epoch": 7.46,
3640
+ "learning_rate": 1.4132553606237817e-05,
3641
+ "loss": 0.3838,
3642
+ "step": 5950
3643
+ },
3644
+ {
3645
+ "epoch": 7.47,
3646
+ "learning_rate": 1.4062935115566695e-05,
3647
+ "loss": 0.4084,
3648
+ "step": 5960
3649
+ },
3650
+ {
3651
+ "epoch": 7.48,
3652
+ "learning_rate": 1.3993316624895573e-05,
3653
+ "loss": 0.3851,
3654
+ "step": 5970
3655
+ },
3656
+ {
3657
+ "epoch": 7.49,
3658
+ "learning_rate": 1.392369813422445e-05,
3659
+ "loss": 0.4234,
3660
+ "step": 5980
3661
+ },
3662
+ {
3663
+ "epoch": 7.51,
3664
+ "learning_rate": 1.3854079643553327e-05,
3665
+ "loss": 0.4183,
3666
+ "step": 5990
3667
+ },
3668
+ {
3669
+ "epoch": 7.52,
3670
+ "learning_rate": 1.3784461152882205e-05,
3671
+ "loss": 0.4108,
3672
+ "step": 6000
3673
+ },
3674
+ {
3675
+ "epoch": 7.53,
3676
+ "learning_rate": 1.3714842662211083e-05,
3677
+ "loss": 0.4314,
3678
+ "step": 6010
3679
+ },
3680
+ {
3681
+ "epoch": 7.54,
3682
+ "learning_rate": 1.364522417153996e-05,
3683
+ "loss": 0.4252,
3684
+ "step": 6020
3685
+ },
3686
+ {
3687
+ "epoch": 7.56,
3688
+ "learning_rate": 1.3575605680868838e-05,
3689
+ "loss": 0.4272,
3690
+ "step": 6030
3691
+ },
3692
+ {
3693
+ "epoch": 7.57,
3694
+ "learning_rate": 1.3505987190197716e-05,
3695
+ "loss": 0.3799,
3696
+ "step": 6040
3697
+ },
3698
+ {
3699
+ "epoch": 7.58,
3700
+ "learning_rate": 1.3436368699526594e-05,
3701
+ "loss": 0.3705,
3702
+ "step": 6050
3703
+ },
3704
+ {
3705
+ "epoch": 7.59,
3706
+ "learning_rate": 1.3366750208855472e-05,
3707
+ "loss": 0.4124,
3708
+ "step": 6060
3709
+ },
3710
+ {
3711
+ "epoch": 7.61,
3712
+ "learning_rate": 1.329713171818435e-05,
3713
+ "loss": 0.3976,
3714
+ "step": 6070
3715
+ },
3716
+ {
3717
+ "epoch": 7.62,
3718
+ "learning_rate": 1.3227513227513228e-05,
3719
+ "loss": 0.4187,
3720
+ "step": 6080
3721
+ },
3722
+ {
3723
+ "epoch": 7.63,
3724
+ "learning_rate": 1.3157894736842106e-05,
3725
+ "loss": 0.386,
3726
+ "step": 6090
3727
+ },
3728
+ {
3729
+ "epoch": 7.64,
3730
+ "learning_rate": 1.3088276246170982e-05,
3731
+ "loss": 0.4153,
3732
+ "step": 6100
3733
+ },
3734
+ {
3735
+ "epoch": 7.66,
3736
+ "learning_rate": 1.301865775549986e-05,
3737
+ "loss": 0.4067,
3738
+ "step": 6110
3739
+ },
3740
+ {
3741
+ "epoch": 7.67,
3742
+ "learning_rate": 1.2949039264828738e-05,
3743
+ "loss": 0.3874,
3744
+ "step": 6120
3745
+ },
3746
+ {
3747
+ "epoch": 7.68,
3748
+ "learning_rate": 1.2879420774157616e-05,
3749
+ "loss": 0.3921,
3750
+ "step": 6130
3751
+ },
3752
+ {
3753
+ "epoch": 7.69,
3754
+ "learning_rate": 1.2809802283486494e-05,
3755
+ "loss": 0.369,
3756
+ "step": 6140
3757
+ },
3758
+ {
3759
+ "epoch": 7.71,
3760
+ "learning_rate": 1.2740183792815372e-05,
3761
+ "loss": 0.4082,
3762
+ "step": 6150
3763
+ },
3764
+ {
3765
+ "epoch": 7.72,
3766
+ "learning_rate": 1.267056530214425e-05,
3767
+ "loss": 0.4238,
3768
+ "step": 6160
3769
+ },
3770
+ {
3771
+ "epoch": 7.73,
3772
+ "learning_rate": 1.2600946811473128e-05,
3773
+ "loss": 0.3899,
3774
+ "step": 6170
3775
+ },
3776
+ {
3777
+ "epoch": 7.74,
3778
+ "learning_rate": 1.2531328320802006e-05,
3779
+ "loss": 0.3806,
3780
+ "step": 6180
3781
+ },
3782
+ {
3783
+ "epoch": 7.76,
3784
+ "learning_rate": 1.2461709830130883e-05,
3785
+ "loss": 0.3617,
3786
+ "step": 6190
3787
+ },
3788
+ {
3789
+ "epoch": 7.77,
3790
+ "learning_rate": 1.239209133945976e-05,
3791
+ "loss": 0.4171,
3792
+ "step": 6200
3793
+ },
3794
+ {
3795
+ "epoch": 7.78,
3796
+ "learning_rate": 1.2322472848788638e-05,
3797
+ "loss": 0.3941,
3798
+ "step": 6210
3799
+ },
3800
+ {
3801
+ "epoch": 7.79,
3802
+ "learning_rate": 1.2252854358117516e-05,
3803
+ "loss": 0.3891,
3804
+ "step": 6220
3805
+ },
3806
+ {
3807
+ "epoch": 7.81,
3808
+ "learning_rate": 1.2183235867446393e-05,
3809
+ "loss": 0.3987,
3810
+ "step": 6230
3811
+ },
3812
+ {
3813
+ "epoch": 7.82,
3814
+ "learning_rate": 1.2113617376775271e-05,
3815
+ "loss": 0.3707,
3816
+ "step": 6240
3817
+ },
3818
+ {
3819
+ "epoch": 7.83,
3820
+ "learning_rate": 1.204399888610415e-05,
3821
+ "loss": 0.3835,
3822
+ "step": 6250
3823
+ },
3824
+ {
3825
+ "epoch": 7.84,
3826
+ "learning_rate": 1.1974380395433027e-05,
3827
+ "loss": 0.3925,
3828
+ "step": 6260
3829
+ },
3830
+ {
3831
+ "epoch": 7.86,
3832
+ "learning_rate": 1.1904761904761905e-05,
3833
+ "loss": 0.3693,
3834
+ "step": 6270
3835
+ },
3836
+ {
3837
+ "epoch": 7.87,
3838
+ "learning_rate": 1.1835143414090783e-05,
3839
+ "loss": 0.3809,
3840
+ "step": 6280
3841
+ },
3842
+ {
3843
+ "epoch": 7.88,
3844
+ "learning_rate": 1.1765524923419661e-05,
3845
+ "loss": 0.3872,
3846
+ "step": 6290
3847
+ },
3848
+ {
3849
+ "epoch": 7.89,
3850
+ "learning_rate": 1.1695906432748537e-05,
3851
+ "loss": 0.3964,
3852
+ "step": 6300
3853
+ },
3854
+ {
3855
+ "epoch": 7.91,
3856
+ "learning_rate": 1.1626287942077415e-05,
3857
+ "loss": 0.3767,
3858
+ "step": 6310
3859
+ },
3860
+ {
3861
+ "epoch": 7.92,
3862
+ "learning_rate": 1.1556669451406293e-05,
3863
+ "loss": 0.3902,
3864
+ "step": 6320
3865
+ },
3866
+ {
3867
+ "epoch": 7.93,
3868
+ "learning_rate": 1.1487050960735171e-05,
3869
+ "loss": 0.3551,
3870
+ "step": 6330
3871
+ },
3872
+ {
3873
+ "epoch": 7.94,
3874
+ "learning_rate": 1.1417432470064049e-05,
3875
+ "loss": 0.4097,
3876
+ "step": 6340
3877
+ },
3878
+ {
3879
+ "epoch": 7.96,
3880
+ "learning_rate": 1.1347813979392927e-05,
3881
+ "loss": 0.4374,
3882
+ "step": 6350
3883
+ },
3884
+ {
3885
+ "epoch": 7.97,
3886
+ "learning_rate": 1.1278195488721805e-05,
3887
+ "loss": 0.3815,
3888
+ "step": 6360
3889
+ },
3890
+ {
3891
+ "epoch": 7.98,
3892
+ "learning_rate": 1.1208576998050683e-05,
3893
+ "loss": 0.3829,
3894
+ "step": 6370
3895
+ },
3896
+ {
3897
+ "epoch": 7.99,
3898
+ "learning_rate": 1.113895850737956e-05,
3899
+ "loss": 0.3806,
3900
+ "step": 6380
3901
+ },
3902
+ {
3903
+ "epoch": 8.0,
3904
+ "eval_accuracy": 0.853109335839599,
3905
+ "eval_loss": 0.4375392496585846,
3906
+ "eval_runtime": 234.5395,
3907
+ "eval_samples_per_second": 108.877,
3908
+ "eval_steps_per_second": 3.402,
3909
+ "step": 6384
3910
+ },
3911
+ {
3912
+ "epoch": 8.01,
3913
+ "learning_rate": 1.1069340016708438e-05,
3914
+ "loss": 0.3573,
3915
+ "step": 6390
3916
+ },
3917
+ {
3918
+ "epoch": 8.02,
3919
+ "learning_rate": 1.0999721526037315e-05,
3920
+ "loss": 0.3303,
3921
+ "step": 6400
3922
+ },
3923
+ {
3924
+ "epoch": 8.03,
3925
+ "learning_rate": 1.0930103035366193e-05,
3926
+ "loss": 0.3206,
3927
+ "step": 6410
3928
+ },
3929
+ {
3930
+ "epoch": 8.05,
3931
+ "learning_rate": 1.086048454469507e-05,
3932
+ "loss": 0.3416,
3933
+ "step": 6420
3934
+ },
3935
+ {
3936
+ "epoch": 8.06,
3937
+ "learning_rate": 1.0790866054023948e-05,
3938
+ "loss": 0.3379,
3939
+ "step": 6430
3940
+ },
3941
+ {
3942
+ "epoch": 8.07,
3943
+ "learning_rate": 1.0721247563352826e-05,
3944
+ "loss": 0.3255,
3945
+ "step": 6440
3946
+ },
3947
+ {
3948
+ "epoch": 8.08,
3949
+ "learning_rate": 1.0651629072681704e-05,
3950
+ "loss": 0.3278,
3951
+ "step": 6450
3952
+ },
3953
+ {
3954
+ "epoch": 8.1,
3955
+ "learning_rate": 1.0582010582010582e-05,
3956
+ "loss": 0.3136,
3957
+ "step": 6460
3958
+ },
3959
+ {
3960
+ "epoch": 8.11,
3961
+ "learning_rate": 1.051239209133946e-05,
3962
+ "loss": 0.2932,
3963
+ "step": 6470
3964
+ },
3965
+ {
3966
+ "epoch": 8.12,
3967
+ "learning_rate": 1.0442773600668338e-05,
3968
+ "loss": 0.3199,
3969
+ "step": 6480
3970
+ },
3971
+ {
3972
+ "epoch": 8.13,
3973
+ "learning_rate": 1.0373155109997216e-05,
3974
+ "loss": 0.3212,
3975
+ "step": 6490
3976
+ },
3977
+ {
3978
+ "epoch": 8.15,
3979
+ "learning_rate": 1.0303536619326092e-05,
3980
+ "loss": 0.3487,
3981
+ "step": 6500
3982
+ },
3983
+ {
3984
+ "epoch": 8.16,
3985
+ "learning_rate": 1.023391812865497e-05,
3986
+ "loss": 0.3364,
3987
+ "step": 6510
3988
+ },
3989
+ {
3990
+ "epoch": 8.17,
3991
+ "learning_rate": 1.0164299637983848e-05,
3992
+ "loss": 0.3261,
3993
+ "step": 6520
3994
+ },
3995
+ {
3996
+ "epoch": 8.18,
3997
+ "learning_rate": 1.0094681147312726e-05,
3998
+ "loss": 0.3398,
3999
+ "step": 6530
4000
+ },
4001
+ {
4002
+ "epoch": 8.2,
4003
+ "learning_rate": 1.0025062656641604e-05,
4004
+ "loss": 0.3111,
4005
+ "step": 6540
4006
+ },
4007
+ {
4008
+ "epoch": 8.21,
4009
+ "learning_rate": 9.955444165970482e-06,
4010
+ "loss": 0.3523,
4011
+ "step": 6550
4012
+ },
4013
+ {
4014
+ "epoch": 8.22,
4015
+ "learning_rate": 9.88582567529936e-06,
4016
+ "loss": 0.3325,
4017
+ "step": 6560
4018
+ },
4019
+ {
4020
+ "epoch": 8.23,
4021
+ "learning_rate": 9.816207184628238e-06,
4022
+ "loss": 0.329,
4023
+ "step": 6570
4024
+ },
4025
+ {
4026
+ "epoch": 8.25,
4027
+ "learning_rate": 9.746588693957115e-06,
4028
+ "loss": 0.3558,
4029
+ "step": 6580
4030
+ },
4031
+ {
4032
+ "epoch": 8.26,
4033
+ "learning_rate": 9.676970203285993e-06,
4034
+ "loss": 0.3286,
4035
+ "step": 6590
4036
+ },
4037
+ {
4038
+ "epoch": 8.27,
4039
+ "learning_rate": 9.607351712614871e-06,
4040
+ "loss": 0.3429,
4041
+ "step": 6600
4042
+ },
4043
+ {
4044
+ "epoch": 8.28,
4045
+ "learning_rate": 9.537733221943747e-06,
4046
+ "loss": 0.3146,
4047
+ "step": 6610
4048
+ },
4049
+ {
4050
+ "epoch": 8.3,
4051
+ "learning_rate": 9.468114731272625e-06,
4052
+ "loss": 0.3075,
4053
+ "step": 6620
4054
+ },
4055
+ {
4056
+ "epoch": 8.31,
4057
+ "learning_rate": 9.398496240601503e-06,
4058
+ "loss": 0.3053,
4059
+ "step": 6630
4060
+ },
4061
+ {
4062
+ "epoch": 8.32,
4063
+ "learning_rate": 9.328877749930381e-06,
4064
+ "loss": 0.3262,
4065
+ "step": 6640
4066
+ },
4067
+ {
4068
+ "epoch": 8.33,
4069
+ "learning_rate": 9.259259259259259e-06,
4070
+ "loss": 0.3315,
4071
+ "step": 6650
4072
+ },
4073
+ {
4074
+ "epoch": 8.35,
4075
+ "learning_rate": 9.189640768588137e-06,
4076
+ "loss": 0.3446,
4077
+ "step": 6660
4078
+ },
4079
+ {
4080
+ "epoch": 8.36,
4081
+ "learning_rate": 9.120022277917015e-06,
4082
+ "loss": 0.3406,
4083
+ "step": 6670
4084
+ },
4085
+ {
4086
+ "epoch": 8.37,
4087
+ "learning_rate": 9.050403787245893e-06,
4088
+ "loss": 0.3119,
4089
+ "step": 6680
4090
+ },
4091
+ {
4092
+ "epoch": 8.38,
4093
+ "learning_rate": 8.98078529657477e-06,
4094
+ "loss": 0.3102,
4095
+ "step": 6690
4096
+ },
4097
+ {
4098
+ "epoch": 8.4,
4099
+ "learning_rate": 8.911166805903649e-06,
4100
+ "loss": 0.3051,
4101
+ "step": 6700
4102
+ },
4103
+ {
4104
+ "epoch": 8.41,
4105
+ "learning_rate": 8.841548315232525e-06,
4106
+ "loss": 0.3509,
4107
+ "step": 6710
4108
+ },
4109
+ {
4110
+ "epoch": 8.42,
4111
+ "learning_rate": 8.771929824561403e-06,
4112
+ "loss": 0.3265,
4113
+ "step": 6720
4114
+ },
4115
+ {
4116
+ "epoch": 8.43,
4117
+ "learning_rate": 8.70231133389028e-06,
4118
+ "loss": 0.3522,
4119
+ "step": 6730
4120
+ },
4121
+ {
4122
+ "epoch": 8.45,
4123
+ "learning_rate": 8.632692843219159e-06,
4124
+ "loss": 0.3029,
4125
+ "step": 6740
4126
+ },
4127
+ {
4128
+ "epoch": 8.46,
4129
+ "learning_rate": 8.563074352548037e-06,
4130
+ "loss": 0.317,
4131
+ "step": 6750
4132
+ },
4133
+ {
4134
+ "epoch": 8.47,
4135
+ "learning_rate": 8.493455861876915e-06,
4136
+ "loss": 0.3446,
4137
+ "step": 6760
4138
+ },
4139
+ {
4140
+ "epoch": 8.48,
4141
+ "learning_rate": 8.423837371205792e-06,
4142
+ "loss": 0.3627,
4143
+ "step": 6770
4144
+ },
4145
+ {
4146
+ "epoch": 8.5,
4147
+ "learning_rate": 8.35421888053467e-06,
4148
+ "loss": 0.3583,
4149
+ "step": 6780
4150
+ },
4151
+ {
4152
+ "epoch": 8.51,
4153
+ "learning_rate": 8.284600389863548e-06,
4154
+ "loss": 0.3106,
4155
+ "step": 6790
4156
+ },
4157
+ {
4158
+ "epoch": 8.52,
4159
+ "learning_rate": 8.214981899192426e-06,
4160
+ "loss": 0.3142,
4161
+ "step": 6800
4162
+ },
4163
+ {
4164
+ "epoch": 8.53,
4165
+ "learning_rate": 8.145363408521302e-06,
4166
+ "loss": 0.3031,
4167
+ "step": 6810
4168
+ },
4169
+ {
4170
+ "epoch": 8.55,
4171
+ "learning_rate": 8.07574491785018e-06,
4172
+ "loss": 0.3235,
4173
+ "step": 6820
4174
+ },
4175
+ {
4176
+ "epoch": 8.56,
4177
+ "learning_rate": 8.006126427179058e-06,
4178
+ "loss": 0.3186,
4179
+ "step": 6830
4180
+ },
4181
+ {
4182
+ "epoch": 8.57,
4183
+ "learning_rate": 7.936507936507936e-06,
4184
+ "loss": 0.2935,
4185
+ "step": 6840
4186
+ },
4187
+ {
4188
+ "epoch": 8.58,
4189
+ "learning_rate": 7.866889445836814e-06,
4190
+ "loss": 0.2895,
4191
+ "step": 6850
4192
+ },
4193
+ {
4194
+ "epoch": 8.6,
4195
+ "learning_rate": 7.797270955165692e-06,
4196
+ "loss": 0.3252,
4197
+ "step": 6860
4198
+ },
4199
+ {
4200
+ "epoch": 8.61,
4201
+ "learning_rate": 7.72765246449457e-06,
4202
+ "loss": 0.3128,
4203
+ "step": 6870
4204
+ },
4205
+ {
4206
+ "epoch": 8.62,
4207
+ "learning_rate": 7.658033973823448e-06,
4208
+ "loss": 0.3176,
4209
+ "step": 6880
4210
+ },
4211
+ {
4212
+ "epoch": 8.63,
4213
+ "learning_rate": 7.588415483152325e-06,
4214
+ "loss": 0.3343,
4215
+ "step": 6890
4216
+ },
4217
+ {
4218
+ "epoch": 8.65,
4219
+ "learning_rate": 7.518796992481203e-06,
4220
+ "loss": 0.3738,
4221
+ "step": 6900
4222
+ },
4223
+ {
4224
+ "epoch": 8.66,
4225
+ "learning_rate": 7.449178501810081e-06,
4226
+ "loss": 0.3144,
4227
+ "step": 6910
4228
+ },
4229
+ {
4230
+ "epoch": 8.67,
4231
+ "learning_rate": 7.379560011138959e-06,
4232
+ "loss": 0.3808,
4233
+ "step": 6920
4234
+ },
4235
+ {
4236
+ "epoch": 8.68,
4237
+ "learning_rate": 7.3099415204678366e-06,
4238
+ "loss": 0.3089,
4239
+ "step": 6930
4240
+ },
4241
+ {
4242
+ "epoch": 8.7,
4243
+ "learning_rate": 7.240323029796714e-06,
4244
+ "loss": 0.3327,
4245
+ "step": 6940
4246
+ },
4247
+ {
4248
+ "epoch": 8.71,
4249
+ "learning_rate": 7.1707045391255915e-06,
4250
+ "loss": 0.3289,
4251
+ "step": 6950
4252
+ },
4253
+ {
4254
+ "epoch": 8.72,
4255
+ "learning_rate": 7.1010860484544695e-06,
4256
+ "loss": 0.3699,
4257
+ "step": 6960
4258
+ },
4259
+ {
4260
+ "epoch": 8.73,
4261
+ "learning_rate": 7.031467557783347e-06,
4262
+ "loss": 0.3061,
4263
+ "step": 6970
4264
+ },
4265
+ {
4266
+ "epoch": 8.75,
4267
+ "learning_rate": 6.961849067112225e-06,
4268
+ "loss": 0.3026,
4269
+ "step": 6980
4270
+ },
4271
+ {
4272
+ "epoch": 8.76,
4273
+ "learning_rate": 6.892230576441102e-06,
4274
+ "loss": 0.3233,
4275
+ "step": 6990
4276
+ },
4277
+ {
4278
+ "epoch": 8.77,
4279
+ "learning_rate": 6.82261208576998e-06,
4280
+ "loss": 0.3106,
4281
+ "step": 7000
4282
+ },
4283
+ {
4284
+ "epoch": 8.78,
4285
+ "learning_rate": 6.752993595098858e-06,
4286
+ "loss": 0.3429,
4287
+ "step": 7010
4288
+ },
4289
+ {
4290
+ "epoch": 8.8,
4291
+ "learning_rate": 6.683375104427736e-06,
4292
+ "loss": 0.3397,
4293
+ "step": 7020
4294
+ },
4295
+ {
4296
+ "epoch": 8.81,
4297
+ "learning_rate": 6.613756613756614e-06,
4298
+ "loss": 0.352,
4299
+ "step": 7030
4300
+ },
4301
+ {
4302
+ "epoch": 8.82,
4303
+ "learning_rate": 6.544138123085491e-06,
4304
+ "loss": 0.2803,
4305
+ "step": 7040
4306
+ },
4307
+ {
4308
+ "epoch": 8.83,
4309
+ "learning_rate": 6.474519632414369e-06,
4310
+ "loss": 0.3335,
4311
+ "step": 7050
4312
+ },
4313
+ {
4314
+ "epoch": 8.85,
4315
+ "learning_rate": 6.404901141743247e-06,
4316
+ "loss": 0.3054,
4317
+ "step": 7060
4318
+ },
4319
+ {
4320
+ "epoch": 8.86,
4321
+ "learning_rate": 6.335282651072125e-06,
4322
+ "loss": 0.3383,
4323
+ "step": 7070
4324
+ },
4325
+ {
4326
+ "epoch": 8.87,
4327
+ "learning_rate": 6.265664160401003e-06,
4328
+ "loss": 0.3237,
4329
+ "step": 7080
4330
+ },
4331
+ {
4332
+ "epoch": 8.88,
4333
+ "learning_rate": 6.19604566972988e-06,
4334
+ "loss": 0.3227,
4335
+ "step": 7090
4336
+ },
4337
+ {
4338
+ "epoch": 8.9,
4339
+ "learning_rate": 6.126427179058758e-06,
4340
+ "loss": 0.3375,
4341
+ "step": 7100
4342
+ },
4343
+ {
4344
+ "epoch": 8.91,
4345
+ "learning_rate": 6.056808688387636e-06,
4346
+ "loss": 0.2785,
4347
+ "step": 7110
4348
+ },
4349
+ {
4350
+ "epoch": 8.92,
4351
+ "learning_rate": 5.987190197716514e-06,
4352
+ "loss": 0.31,
4353
+ "step": 7120
4354
+ },
4355
+ {
4356
+ "epoch": 8.93,
4357
+ "learning_rate": 5.9175717070453915e-06,
4358
+ "loss": 0.3472,
4359
+ "step": 7130
4360
+ },
4361
+ {
4362
+ "epoch": 8.95,
4363
+ "learning_rate": 5.8479532163742686e-06,
4364
+ "loss": 0.3577,
4365
+ "step": 7140
4366
+ },
4367
+ {
4368
+ "epoch": 8.96,
4369
+ "learning_rate": 5.7783347257031465e-06,
4370
+ "loss": 0.3299,
4371
+ "step": 7150
4372
+ },
4373
+ {
4374
+ "epoch": 8.97,
4375
+ "learning_rate": 5.708716235032024e-06,
4376
+ "loss": 0.2989,
4377
+ "step": 7160
4378
+ },
4379
+ {
4380
+ "epoch": 8.98,
4381
+ "learning_rate": 5.639097744360902e-06,
4382
+ "loss": 0.2973,
4383
+ "step": 7170
4384
+ },
4385
+ {
4386
+ "epoch": 9.0,
4387
+ "learning_rate": 5.56947925368978e-06,
4388
+ "loss": 0.3112,
4389
+ "step": 7180
4390
+ },
4391
+ {
4392
+ "epoch": 9.0,
4393
+ "eval_accuracy": 0.8556547619047619,
4394
+ "eval_loss": 0.4372400641441345,
4395
+ "eval_runtime": 234.8854,
4396
+ "eval_samples_per_second": 108.717,
4397
+ "eval_steps_per_second": 3.397,
4398
+ "step": 7182
4399
+ },
4400
+ {
4401
+ "epoch": 9.01,
4402
+ "learning_rate": 5.499860763018657e-06,
4403
+ "loss": 0.2907,
4404
+ "step": 7190
4405
+ },
4406
+ {
4407
+ "epoch": 9.02,
4408
+ "learning_rate": 5.430242272347535e-06,
4409
+ "loss": 0.3028,
4410
+ "step": 7200
4411
+ },
4412
+ {
4413
+ "epoch": 9.04,
4414
+ "learning_rate": 5.360623781676413e-06,
4415
+ "loss": 0.2706,
4416
+ "step": 7210
4417
+ },
4418
+ {
4419
+ "epoch": 9.05,
4420
+ "learning_rate": 5.291005291005291e-06,
4421
+ "loss": 0.2695,
4422
+ "step": 7220
4423
+ },
4424
+ {
4425
+ "epoch": 9.06,
4426
+ "learning_rate": 5.221386800334169e-06,
4427
+ "loss": 0.3298,
4428
+ "step": 7230
4429
+ },
4430
+ {
4431
+ "epoch": 9.07,
4432
+ "learning_rate": 5.151768309663046e-06,
4433
+ "loss": 0.2717,
4434
+ "step": 7240
4435
+ },
4436
+ {
4437
+ "epoch": 9.09,
4438
+ "learning_rate": 5.082149818991924e-06,
4439
+ "loss": 0.2785,
4440
+ "step": 7250
4441
+ },
4442
+ {
4443
+ "epoch": 9.1,
4444
+ "learning_rate": 5.012531328320802e-06,
4445
+ "loss": 0.2635,
4446
+ "step": 7260
4447
+ },
4448
+ {
4449
+ "epoch": 9.11,
4450
+ "learning_rate": 4.94291283764968e-06,
4451
+ "loss": 0.305,
4452
+ "step": 7270
4453
+ },
4454
+ {
4455
+ "epoch": 9.12,
4456
+ "learning_rate": 4.873294346978558e-06,
4457
+ "loss": 0.2786,
4458
+ "step": 7280
4459
+ },
4460
+ {
4461
+ "epoch": 9.14,
4462
+ "learning_rate": 4.803675856307436e-06,
4463
+ "loss": 0.2568,
4464
+ "step": 7290
4465
+ },
4466
+ {
4467
+ "epoch": 9.15,
4468
+ "learning_rate": 4.734057365636313e-06,
4469
+ "loss": 0.261,
4470
+ "step": 7300
4471
+ },
4472
+ {
4473
+ "epoch": 9.16,
4474
+ "learning_rate": 4.664438874965191e-06,
4475
+ "loss": 0.2548,
4476
+ "step": 7310
4477
+ },
4478
+ {
4479
+ "epoch": 9.17,
4480
+ "learning_rate": 4.5948203842940685e-06,
4481
+ "loss": 0.2698,
4482
+ "step": 7320
4483
+ },
4484
+ {
4485
+ "epoch": 9.19,
4486
+ "learning_rate": 4.5252018936229464e-06,
4487
+ "loss": 0.2965,
4488
+ "step": 7330
4489
+ },
4490
+ {
4491
+ "epoch": 9.2,
4492
+ "learning_rate": 4.455583402951824e-06,
4493
+ "loss": 0.2601,
4494
+ "step": 7340
4495
+ },
4496
+ {
4497
+ "epoch": 9.21,
4498
+ "learning_rate": 4.3859649122807014e-06,
4499
+ "loss": 0.2512,
4500
+ "step": 7350
4501
+ },
4502
+ {
4503
+ "epoch": 9.22,
4504
+ "learning_rate": 4.316346421609579e-06,
4505
+ "loss": 0.3007,
4506
+ "step": 7360
4507
+ },
4508
+ {
4509
+ "epoch": 9.24,
4510
+ "learning_rate": 4.246727930938457e-06,
4511
+ "loss": 0.2669,
4512
+ "step": 7370
4513
+ },
4514
+ {
4515
+ "epoch": 9.25,
4516
+ "learning_rate": 4.177109440267335e-06,
4517
+ "loss": 0.2709,
4518
+ "step": 7380
4519
+ },
4520
+ {
4521
+ "epoch": 9.26,
4522
+ "learning_rate": 4.107490949596213e-06,
4523
+ "loss": 0.2455,
4524
+ "step": 7390
4525
+ },
4526
+ {
4527
+ "epoch": 9.27,
4528
+ "learning_rate": 4.03787245892509e-06,
4529
+ "loss": 0.2737,
4530
+ "step": 7400
4531
+ },
4532
+ {
4533
+ "epoch": 9.29,
4534
+ "learning_rate": 3.968253968253968e-06,
4535
+ "loss": 0.2845,
4536
+ "step": 7410
4537
+ },
4538
+ {
4539
+ "epoch": 9.3,
4540
+ "learning_rate": 3.898635477582846e-06,
4541
+ "loss": 0.2437,
4542
+ "step": 7420
4543
+ },
4544
+ {
4545
+ "epoch": 9.31,
4546
+ "learning_rate": 3.829016986911724e-06,
4547
+ "loss": 0.2716,
4548
+ "step": 7430
4549
+ },
4550
+ {
4551
+ "epoch": 9.32,
4552
+ "learning_rate": 3.7593984962406014e-06,
4553
+ "loss": 0.2764,
4554
+ "step": 7440
4555
+ },
4556
+ {
4557
+ "epoch": 9.34,
4558
+ "learning_rate": 3.6897800055694793e-06,
4559
+ "loss": 0.2819,
4560
+ "step": 7450
4561
+ },
4562
+ {
4563
+ "epoch": 9.35,
4564
+ "learning_rate": 3.620161514898357e-06,
4565
+ "loss": 0.2976,
4566
+ "step": 7460
4567
+ },
4568
+ {
4569
+ "epoch": 9.36,
4570
+ "learning_rate": 3.5505430242272347e-06,
4571
+ "loss": 0.2553,
4572
+ "step": 7470
4573
+ },
4574
+ {
4575
+ "epoch": 9.37,
4576
+ "learning_rate": 3.4809245335561126e-06,
4577
+ "loss": 0.2817,
4578
+ "step": 7480
4579
+ },
4580
+ {
4581
+ "epoch": 9.39,
4582
+ "learning_rate": 3.41130604288499e-06,
4583
+ "loss": 0.2809,
4584
+ "step": 7490
4585
+ },
4586
+ {
4587
+ "epoch": 9.4,
4588
+ "learning_rate": 3.341687552213868e-06,
4589
+ "loss": 0.254,
4590
+ "step": 7500
4591
+ },
4592
+ {
4593
+ "epoch": 9.41,
4594
+ "learning_rate": 3.2720690615427456e-06,
4595
+ "loss": 0.2814,
4596
+ "step": 7510
4597
+ },
4598
+ {
4599
+ "epoch": 9.42,
4600
+ "learning_rate": 3.2024505708716235e-06,
4601
+ "loss": 0.2923,
4602
+ "step": 7520
4603
+ },
4604
+ {
4605
+ "epoch": 9.44,
4606
+ "learning_rate": 3.1328320802005014e-06,
4607
+ "loss": 0.2792,
4608
+ "step": 7530
4609
+ },
4610
+ {
4611
+ "epoch": 9.45,
4612
+ "learning_rate": 3.063213589529379e-06,
4613
+ "loss": 0.2843,
4614
+ "step": 7540
4615
+ },
4616
+ {
4617
+ "epoch": 9.46,
4618
+ "learning_rate": 2.993595098858257e-06,
4619
+ "loss": 0.2724,
4620
+ "step": 7550
4621
+ },
4622
+ {
4623
+ "epoch": 9.47,
4624
+ "learning_rate": 2.9239766081871343e-06,
4625
+ "loss": 0.2918,
4626
+ "step": 7560
4627
+ },
4628
+ {
4629
+ "epoch": 9.49,
4630
+ "learning_rate": 2.854358117516012e-06,
4631
+ "loss": 0.2756,
4632
+ "step": 7570
4633
+ },
4634
+ {
4635
+ "epoch": 9.5,
4636
+ "learning_rate": 2.78473962684489e-06,
4637
+ "loss": 0.303,
4638
+ "step": 7580
4639
+ },
4640
+ {
4641
+ "epoch": 9.51,
4642
+ "learning_rate": 2.7151211361737676e-06,
4643
+ "loss": 0.2556,
4644
+ "step": 7590
4645
+ },
4646
+ {
4647
+ "epoch": 9.52,
4648
+ "learning_rate": 2.6455026455026455e-06,
4649
+ "loss": 0.2687,
4650
+ "step": 7600
4651
+ },
4652
+ {
4653
+ "epoch": 9.54,
4654
+ "learning_rate": 2.575884154831523e-06,
4655
+ "loss": 0.2527,
4656
+ "step": 7610
4657
+ },
4658
+ {
4659
+ "epoch": 9.55,
4660
+ "learning_rate": 2.506265664160401e-06,
4661
+ "loss": 0.2665,
4662
+ "step": 7620
4663
+ },
4664
+ {
4665
+ "epoch": 9.56,
4666
+ "learning_rate": 2.436647173489279e-06,
4667
+ "loss": 0.287,
4668
+ "step": 7630
4669
+ },
4670
+ {
4671
+ "epoch": 9.57,
4672
+ "learning_rate": 2.3670286828181563e-06,
4673
+ "loss": 0.3368,
4674
+ "step": 7640
4675
+ },
4676
+ {
4677
+ "epoch": 9.59,
4678
+ "learning_rate": 2.2974101921470343e-06,
4679
+ "loss": 0.2389,
4680
+ "step": 7650
4681
+ },
4682
+ {
4683
+ "epoch": 9.6,
4684
+ "learning_rate": 2.227791701475912e-06,
4685
+ "loss": 0.2775,
4686
+ "step": 7660
4687
+ },
4688
+ {
4689
+ "epoch": 9.61,
4690
+ "learning_rate": 2.1581732108047897e-06,
4691
+ "loss": 0.2854,
4692
+ "step": 7670
4693
+ },
4694
+ {
4695
+ "epoch": 9.62,
4696
+ "learning_rate": 2.0885547201336676e-06,
4697
+ "loss": 0.2705,
4698
+ "step": 7680
4699
+ },
4700
+ {
4701
+ "epoch": 9.64,
4702
+ "learning_rate": 2.018936229462545e-06,
4703
+ "loss": 0.2666,
4704
+ "step": 7690
4705
+ },
4706
+ {
4707
+ "epoch": 9.65,
4708
+ "learning_rate": 1.949317738791423e-06,
4709
+ "loss": 0.2618,
4710
+ "step": 7700
4711
+ },
4712
+ {
4713
+ "epoch": 9.66,
4714
+ "learning_rate": 1.8796992481203007e-06,
4715
+ "loss": 0.2613,
4716
+ "step": 7710
4717
+ },
4718
+ {
4719
+ "epoch": 9.67,
4720
+ "learning_rate": 1.8100807574491784e-06,
4721
+ "loss": 0.2666,
4722
+ "step": 7720
4723
+ },
4724
+ {
4725
+ "epoch": 9.69,
4726
+ "learning_rate": 1.7404622667780563e-06,
4727
+ "loss": 0.2302,
4728
+ "step": 7730
4729
+ },
4730
+ {
4731
+ "epoch": 9.7,
4732
+ "learning_rate": 1.670843776106934e-06,
4733
+ "loss": 0.2484,
4734
+ "step": 7740
4735
+ },
4736
+ {
4737
+ "epoch": 9.71,
4738
+ "learning_rate": 1.6012252854358117e-06,
4739
+ "loss": 0.2419,
4740
+ "step": 7750
4741
+ },
4742
+ {
4743
+ "epoch": 9.72,
4744
+ "learning_rate": 1.5316067947646894e-06,
4745
+ "loss": 0.2479,
4746
+ "step": 7760
4747
+ },
4748
+ {
4749
+ "epoch": 9.74,
4750
+ "learning_rate": 1.4619883040935671e-06,
4751
+ "loss": 0.2976,
4752
+ "step": 7770
4753
+ },
4754
+ {
4755
+ "epoch": 9.75,
4756
+ "learning_rate": 1.392369813422445e-06,
4757
+ "loss": 0.2675,
4758
+ "step": 7780
4759
+ },
4760
+ {
4761
+ "epoch": 9.76,
4762
+ "learning_rate": 1.3227513227513228e-06,
4763
+ "loss": 0.2993,
4764
+ "step": 7790
4765
+ },
4766
+ {
4767
+ "epoch": 9.77,
4768
+ "learning_rate": 1.2531328320802005e-06,
4769
+ "loss": 0.2943,
4770
+ "step": 7800
4771
+ },
4772
+ {
4773
+ "epoch": 9.79,
4774
+ "learning_rate": 1.1835143414090782e-06,
4775
+ "loss": 0.3028,
4776
+ "step": 7810
4777
+ },
4778
+ {
4779
+ "epoch": 9.8,
4780
+ "learning_rate": 1.113895850737956e-06,
4781
+ "loss": 0.2926,
4782
+ "step": 7820
4783
+ },
4784
+ {
4785
+ "epoch": 9.81,
4786
+ "learning_rate": 1.0442773600668338e-06,
4787
+ "loss": 0.2558,
4788
+ "step": 7830
4789
+ },
4790
+ {
4791
+ "epoch": 9.82,
4792
+ "learning_rate": 9.746588693957115e-07,
4793
+ "loss": 0.2556,
4794
+ "step": 7840
4795
+ },
4796
+ {
4797
+ "epoch": 9.84,
4798
+ "learning_rate": 9.050403787245892e-07,
4799
+ "loss": 0.2531,
4800
+ "step": 7850
4801
+ },
4802
+ {
4803
+ "epoch": 9.85,
4804
+ "learning_rate": 8.35421888053467e-07,
4805
+ "loss": 0.2996,
4806
+ "step": 7860
4807
+ },
4808
+ {
4809
+ "epoch": 9.86,
4810
+ "learning_rate": 7.658033973823447e-07,
4811
+ "loss": 0.2825,
4812
+ "step": 7870
4813
+ },
4814
+ {
4815
+ "epoch": 9.87,
4816
+ "learning_rate": 6.961849067112225e-07,
4817
+ "loss": 0.2828,
4818
+ "step": 7880
4819
+ },
4820
+ {
4821
+ "epoch": 9.89,
4822
+ "learning_rate": 6.265664160401002e-07,
4823
+ "loss": 0.2608,
4824
+ "step": 7890
4825
+ },
4826
+ {
4827
+ "epoch": 9.9,
4828
+ "learning_rate": 5.56947925368978e-07,
4829
+ "loss": 0.2855,
4830
+ "step": 7900
4831
+ },
4832
+ {
4833
+ "epoch": 9.91,
4834
+ "learning_rate": 4.873294346978557e-07,
4835
+ "loss": 0.255,
4836
+ "step": 7910
4837
+ },
4838
+ {
4839
+ "epoch": 9.92,
4840
+ "learning_rate": 4.177109440267335e-07,
4841
+ "loss": 0.2583,
4842
+ "step": 7920
4843
+ },
4844
+ {
4845
+ "epoch": 9.94,
4846
+ "learning_rate": 3.4809245335561126e-07,
4847
+ "loss": 0.2945,
4848
+ "step": 7930
4849
+ },
4850
+ {
4851
+ "epoch": 9.95,
4852
+ "learning_rate": 2.78473962684489e-07,
4853
+ "loss": 0.2671,
4854
+ "step": 7940
4855
+ },
4856
+ {
4857
+ "epoch": 9.96,
4858
+ "learning_rate": 2.0885547201336675e-07,
4859
+ "loss": 0.2847,
4860
+ "step": 7950
4861
+ },
4862
+ {
4863
+ "epoch": 9.97,
4864
+ "learning_rate": 1.392369813422445e-07,
4865
+ "loss": 0.2756,
4866
+ "step": 7960
4867
+ },
4868
+ {
4869
+ "epoch": 9.99,
4870
+ "learning_rate": 6.961849067112226e-08,
4871
+ "loss": 0.2689,
4872
+ "step": 7970
4873
+ },
4874
+ {
4875
+ "epoch": 10.0,
4876
+ "learning_rate": 0.0,
4877
+ "loss": 0.2692,
4878
+ "step": 7980
4879
+ },
4880
+ {
4881
+ "epoch": 10.0,
4882
+ "eval_accuracy": 0.8571428571428571,
4883
+ "eval_loss": 0.4352613687515259,
4884
+ "eval_runtime": 236.1141,
4885
+ "eval_samples_per_second": 108.151,
4886
+ "eval_steps_per_second": 3.38,
4887
+ "step": 7980
4888
+ },
4889
+ {
4890
+ "epoch": 10.0,
4891
+ "step": 7980,
4892
+ "total_flos": 7.915696500716863e+19,
4893
+ "train_loss": 0.14746467811720712,
4894
+ "train_runtime": 9966.943,
4895
+ "train_samples_per_second": 102.483,
4896
+ "train_steps_per_second": 0.801
4897
  }
4898
  ],
4899
  "logging_steps": 10,
4900
+ "max_steps": 7980,
4901
  "num_input_tokens_seen": 0,
4902
+ "num_train_epochs": 10,
4903
  "save_steps": 500,
4904
+ "total_flos": 7.915696500716863e+19,
4905
  "train_batch_size": 32,
4906
  "trial_name": null,
4907
  "trial_params": null