shizhediao2 commited on
Commit
ca5269f
1 Parent(s): d6daba6

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +200 -0
trainer_state.json ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 200,
6
+ "global_step": 412,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04854368932038835,
13
+ "grad_norm": 0.19167186319828033,
14
+ "learning_rate": 4.970984274562741e-06,
15
+ "loss": 0.5965,
16
+ "step": 20
17
+ },
18
+ {
19
+ "epoch": 0.0970873786407767,
20
+ "grad_norm": 0.10218532383441925,
21
+ "learning_rate": 4.884610628109082e-06,
22
+ "loss": 0.5749,
23
+ "step": 40
24
+ },
25
+ {
26
+ "epoch": 0.14563106796116504,
27
+ "grad_norm": 0.0955277681350708,
28
+ "learning_rate": 4.742884015847436e-06,
29
+ "loss": 0.5653,
30
+ "step": 60
31
+ },
32
+ {
33
+ "epoch": 0.1941747572815534,
34
+ "grad_norm": 0.09652815014123917,
35
+ "learning_rate": 4.549094278152631e-06,
36
+ "loss": 0.5592,
37
+ "step": 80
38
+ },
39
+ {
40
+ "epoch": 0.24271844660194175,
41
+ "grad_norm": 0.09276870638132095,
42
+ "learning_rate": 4.307739774881878e-06,
43
+ "loss": 0.5562,
44
+ "step": 100
45
+ },
46
+ {
47
+ "epoch": 0.2912621359223301,
48
+ "grad_norm": 0.09189510345458984,
49
+ "learning_rate": 4.024422966835137e-06,
50
+ "loss": 0.5518,
51
+ "step": 120
52
+ },
53
+ {
54
+ "epoch": 0.33980582524271846,
55
+ "grad_norm": 0.09179496020078659,
56
+ "learning_rate": 3.7057203681836407e-06,
57
+ "loss": 0.551,
58
+ "step": 140
59
+ },
60
+ {
61
+ "epoch": 0.3883495145631068,
62
+ "grad_norm": 0.09196960926055908,
63
+ "learning_rate": 3.3590298886062833e-06,
64
+ "loss": 0.5482,
65
+ "step": 160
66
+ },
67
+ {
68
+ "epoch": 0.4368932038834951,
69
+ "grad_norm": 0.09062644839286804,
70
+ "learning_rate": 2.9923991087167657e-06,
71
+ "loss": 0.5461,
72
+ "step": 180
73
+ },
74
+ {
75
+ "epoch": 0.4854368932038835,
76
+ "grad_norm": 0.09341968595981598,
77
+ "learning_rate": 2.614338474951987e-06,
78
+ "loss": 0.5446,
79
+ "step": 200
80
+ },
81
+ {
82
+ "epoch": 0.4854368932038835,
83
+ "eval_accuracy": 0.32638514992244894,
84
+ "eval_loss": 0.5471854209899902,
85
+ "eval_runtime": 32.0389,
86
+ "eval_samples_per_second": 132.027,
87
+ "eval_steps_per_second": 0.531,
88
+ "step": 200
89
+ },
90
+ {
91
+ "epoch": 0.5339805825242718,
92
+ "grad_norm": 0.09235095232725143,
93
+ "learning_rate": 2.2336237501503103e-06,
94
+ "loss": 0.5411,
95
+ "step": 220
96
+ },
97
+ {
98
+ "epoch": 0.5825242718446602,
99
+ "grad_norm": 0.09220809489488602,
100
+ "learning_rate": 1.8590923054515504e-06,
101
+ "loss": 0.543,
102
+ "step": 240
103
+ },
104
+ {
105
+ "epoch": 0.6310679611650486,
106
+ "grad_norm": 0.0929255411028862,
107
+ "learning_rate": 1.499437982109305e-06,
108
+ "loss": 0.5411,
109
+ "step": 260
110
+ },
111
+ {
112
+ "epoch": 0.6796116504854369,
113
+ "grad_norm": 0.09099574387073517,
114
+ "learning_rate": 1.1630092850023148e-06,
115
+ "loss": 0.5423,
116
+ "step": 280
117
+ },
118
+ {
119
+ "epoch": 0.7281553398058253,
120
+ "grad_norm": 0.08963935077190399,
121
+ "learning_rate": 8.576155922941548e-07,
122
+ "loss": 0.5397,
123
+ "step": 300
124
+ },
125
+ {
126
+ "epoch": 0.7766990291262136,
127
+ "grad_norm": 0.09014247357845306,
128
+ "learning_rate": 5.903458796151382e-07,
129
+ "loss": 0.5414,
130
+ "step": 320
131
+ },
132
+ {
133
+ "epoch": 0.8252427184466019,
134
+ "grad_norm": 0.0917976126074791,
135
+ "learning_rate": 3.6740416664589634e-07,
136
+ "loss": 0.5401,
137
+ "step": 340
138
+ },
139
+ {
140
+ "epoch": 0.8737864077669902,
141
+ "grad_norm": 0.09078697115182877,
142
+ "learning_rate": 1.9396550581205208e-07,
143
+ "loss": 0.537,
144
+ "step": 360
145
+ },
146
+ {
147
+ "epoch": 0.9223300970873787,
148
+ "grad_norm": 0.09018886834383011,
149
+ "learning_rate": 7.405585596397314e-08,
150
+ "loss": 0.5391,
151
+ "step": 380
152
+ },
153
+ {
154
+ "epoch": 0.970873786407767,
155
+ "grad_norm": 0.09031691402196884,
156
+ "learning_rate": 1.0458629483476868e-08,
157
+ "loss": 0.5418,
158
+ "step": 400
159
+ },
160
+ {
161
+ "epoch": 0.970873786407767,
162
+ "eval_accuracy": 0.3268526506529188,
163
+ "eval_loss": 0.5428585410118103,
164
+ "eval_runtime": 26.2685,
165
+ "eval_samples_per_second": 161.029,
166
+ "eval_steps_per_second": 0.647,
167
+ "step": 400
168
+ },
169
+ {
170
+ "epoch": 1.0,
171
+ "step": 412,
172
+ "total_flos": 7.628990136885182e+18,
173
+ "train_loss": 0.5495563189960221,
174
+ "train_runtime": 3539.9459,
175
+ "train_samples_per_second": 119.039,
176
+ "train_steps_per_second": 0.116
177
+ }
178
+ ],
179
+ "logging_steps": 20,
180
+ "max_steps": 412,
181
+ "num_input_tokens_seen": 0,
182
+ "num_train_epochs": 1,
183
+ "save_steps": 200,
184
+ "stateful_callbacks": {
185
+ "TrainerControl": {
186
+ "args": {
187
+ "should_epoch_stop": false,
188
+ "should_evaluate": false,
189
+ "should_log": false,
190
+ "should_save": true,
191
+ "should_training_stop": true
192
+ },
193
+ "attributes": {}
194
+ }
195
+ },
196
+ "total_flos": 7.628990136885182e+18,
197
+ "train_batch_size": 32,
198
+ "trial_name": null,
199
+ "trial_params": null
200
+ }