rs545837 commited on
Commit
01c89d1
1 Parent(s): 3b40544

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +237 -0
trainer_state.json ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 2.6527161598205566,
3
+ "best_model_checkpoint": "./results/checkpoint-2000",
4
+ "epoch": 0.9996365838885524,
5
+ "eval_steps": 250,
6
+ "global_step": 2063,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04845548152634767,
13
+ "grad_norm": 0.2021484375,
14
+ "learning_rate": 0.0019926230341909047,
15
+ "loss": 5.5548,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.09691096305269534,
20
+ "grad_norm": 0.484375,
21
+ "learning_rate": 0.0019623193935821215,
22
+ "loss": 4.0801,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.12113870381586916,
27
+ "eval_loss": 3.651378631591797,
28
+ "eval_runtime": 1225.4443,
29
+ "eval_samples_per_second": 47.902,
30
+ "eval_steps_per_second": 1.497,
31
+ "step": 250
32
+ },
33
+ {
34
+ "epoch": 0.145366444579043,
35
+ "grad_norm": 0.6796875,
36
+ "learning_rate": 0.0019092830690827923,
37
+ "loss": 3.6511,
38
+ "step": 300
39
+ },
40
+ {
41
+ "epoch": 0.19382192610539067,
42
+ "grad_norm": 0.474609375,
43
+ "learning_rate": 0.0018347669274724923,
44
+ "loss": 3.4945,
45
+ "step": 400
46
+ },
47
+ {
48
+ "epoch": 0.24227740763173833,
49
+ "grad_norm": 0.51171875,
50
+ "learning_rate": 0.0017405312490731885,
51
+ "loss": 3.3662,
52
+ "step": 500
53
+ },
54
+ {
55
+ "epoch": 0.24227740763173833,
56
+ "eval_loss": 3.2901480197906494,
57
+ "eval_runtime": 1223.1792,
58
+ "eval_samples_per_second": 47.991,
59
+ "eval_steps_per_second": 1.5,
60
+ "step": 500
61
+ },
62
+ {
63
+ "epoch": 0.290732889158086,
64
+ "grad_norm": 0.90234375,
65
+ "learning_rate": 0.0016288021449895208,
66
+ "loss": 3.2248,
67
+ "step": 600
68
+ },
69
+ {
70
+ "epoch": 0.3391883706844337,
71
+ "grad_norm": 0.5859375,
72
+ "learning_rate": 0.001502218970119089,
73
+ "loss": 3.1488,
74
+ "step": 700
75
+ },
76
+ {
77
+ "epoch": 0.3634161114476075,
78
+ "eval_loss": 3.054584503173828,
79
+ "eval_runtime": 1225.6755,
80
+ "eval_samples_per_second": 47.893,
81
+ "eval_steps_per_second": 1.497,
82
+ "step": 750
83
+ },
84
+ {
85
+ "epoch": 0.38764385221078135,
86
+ "grad_norm": 0.5546875,
87
+ "learning_rate": 0.001363771974184993,
88
+ "loss": 3.0595,
89
+ "step": 800
90
+ },
91
+ {
92
+ "epoch": 0.436099333737129,
93
+ "grad_norm": 0.498046875,
94
+ "learning_rate": 0.001216731663648138,
95
+ "loss": 2.9756,
96
+ "step": 900
97
+ },
98
+ {
99
+ "epoch": 0.48455481526347666,
100
+ "grad_norm": 0.423828125,
101
+ "learning_rate": 0.0010645715431691336,
102
+ "loss": 2.9173,
103
+ "step": 1000
104
+ },
105
+ {
106
+ "epoch": 0.48455481526347666,
107
+ "eval_loss": 2.87882399559021,
108
+ "eval_runtime": 1223.6,
109
+ "eval_samples_per_second": 47.974,
110
+ "eval_steps_per_second": 1.5,
111
+ "step": 1000
112
+ },
113
+ {
114
+ "epoch": 0.5330102967898244,
115
+ "grad_norm": 0.431640625,
116
+ "learning_rate": 0.0009108860616831929,
117
+ "loss": 2.853,
118
+ "step": 1100
119
+ },
120
+ {
121
+ "epoch": 0.581465778316172,
122
+ "grad_norm": 0.546875,
123
+ "learning_rate": 0.0007593057014319566,
124
+ "loss": 2.7923,
125
+ "step": 1200
126
+ },
127
+ {
128
+ "epoch": 0.6056935190793459,
129
+ "eval_loss": 2.7545697689056396,
130
+ "eval_runtime": 1225.0225,
131
+ "eval_samples_per_second": 47.918,
132
+ "eval_steps_per_second": 1.498,
133
+ "step": 1250
134
+ },
135
+ {
136
+ "epoch": 0.6299212598425197,
137
+ "grad_norm": 0.392578125,
138
+ "learning_rate": 0.0006134112157875245,
139
+ "loss": 2.7647,
140
+ "step": 1300
141
+ },
142
+ {
143
+ "epoch": 0.6783767413688674,
144
+ "grad_norm": 0.30078125,
145
+ "learning_rate": 0.00047664904181190284,
146
+ "loss": 2.7262,
147
+ "step": 1400
148
+ },
149
+ {
150
+ "epoch": 0.726832222895215,
151
+ "grad_norm": 0.337890625,
152
+ "learning_rate": 0.0003522498857445232,
153
+ "loss": 2.692,
154
+ "step": 1500
155
+ },
156
+ {
157
+ "epoch": 0.726832222895215,
158
+ "eval_loss": 2.6806631088256836,
159
+ "eval_runtime": 1223.5768,
160
+ "eval_samples_per_second": 47.975,
161
+ "eval_steps_per_second": 1.5,
162
+ "step": 1500
163
+ },
164
+ {
165
+ "epoch": 0.7752877044215627,
166
+ "grad_norm": 0.302734375,
167
+ "learning_rate": 0.0002431524046570296,
168
+ "loss": 2.6635,
169
+ "step": 1600
170
+ },
171
+ {
172
+ "epoch": 0.8237431859479104,
173
+ "grad_norm": 0.33203125,
174
+ "learning_rate": 0.00015193378712876149,
175
+ "loss": 2.6561,
176
+ "step": 1700
177
+ },
178
+ {
179
+ "epoch": 0.8479709267110842,
180
+ "eval_loss": 2.655076503753662,
181
+ "eval_runtime": 1225.3417,
182
+ "eval_samples_per_second": 47.906,
183
+ "eval_steps_per_second": 1.498,
184
+ "step": 1750
185
+ },
186
+ {
187
+ "epoch": 0.872198667474258,
188
+ "grad_norm": 0.298828125,
189
+ "learning_rate": 8.074887282213439e-05,
190
+ "loss": 2.6481,
191
+ "step": 1800
192
+ },
193
+ {
194
+ "epoch": 0.9206541490006057,
195
+ "grad_norm": 0.26953125,
196
+ "learning_rate": 3.127924912435132e-05,
197
+ "loss": 2.6565,
198
+ "step": 1900
199
+ },
200
+ {
201
+ "epoch": 0.9691096305269533,
202
+ "grad_norm": 0.416015625,
203
+ "learning_rate": 4.693527335575154e-06,
204
+ "loss": 2.6513,
205
+ "step": 2000
206
+ },
207
+ {
208
+ "epoch": 0.9691096305269533,
209
+ "eval_loss": 2.6527161598205566,
210
+ "eval_runtime": 1223.4873,
211
+ "eval_samples_per_second": 47.978,
212
+ "eval_steps_per_second": 1.5,
213
+ "step": 2000
214
+ }
215
+ ],
216
+ "logging_steps": 100,
217
+ "max_steps": 2063,
218
+ "num_input_tokens_seen": 0,
219
+ "num_train_epochs": 1,
220
+ "save_steps": 250,
221
+ "stateful_callbacks": {
222
+ "TrainerControl": {
223
+ "args": {
224
+ "should_epoch_stop": false,
225
+ "should_evaluate": false,
226
+ "should_log": false,
227
+ "should_save": true,
228
+ "should_training_stop": true
229
+ },
230
+ "attributes": {}
231
+ }
232
+ },
233
+ "total_flos": 4.825965054971085e+17,
234
+ "train_batch_size": 4,
235
+ "trial_name": null,
236
+ "trial_params": null
237
+ }