tiagoblima commited on
Commit
9b61df6
1 Parent(s): 3d93a99

End of training

Browse files
Files changed (5) hide show
  1. README.md +3 -1
  2. all_results.json +8 -8
  3. eval_results.json +4 -4
  4. train_results.json +4 -4
  5. trainer_state.json +214 -94
README.md CHANGED
@@ -3,6 +3,8 @@ license: mit
3
  base_model: unicamp-dl/ptt5-large-t5-vocab
4
  tags:
5
  - generated_from_trainer
 
 
6
  model-index:
7
  - name: t5_large-qg-aap
8
  results: []
@@ -13,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # t5_large-qg-aap
15
 
16
- This model is a fine-tuned version of [unicamp-dl/ptt5-large-t5-vocab](https://huggingface.co/unicamp-dl/ptt5-large-t5-vocab) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
  - Loss: 5.5901
19
 
 
3
  base_model: unicamp-dl/ptt5-large-t5-vocab
4
  tags:
5
  - generated_from_trainer
6
+ datasets:
7
+ - tiagoblima/qg_squad_v1_pt
8
  model-index:
9
  - name: t5_large-qg-aap
10
  results: []
 
15
 
16
  # t5_large-qg-aap
17
 
18
+ This model is a fine-tuned version of [unicamp-dl/ptt5-large-t5-vocab](https://huggingface.co/unicamp-dl/ptt5-large-t5-vocab) on the tiagoblima/qg_squad_v1_pt dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 5.5901
21
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_loss": 7.420807361602783,
4
- "eval_runtime": 587.2121,
5
  "eval_samples": 8869,
6
- "eval_samples_per_second": 15.104,
7
- "eval_steps_per_second": 7.553,
8
- "train_loss": 6.77048750395822,
9
- "train_runtime": 16676.4775,
10
  "train_samples": 51704,
11
- "train_samples_per_second": 15.502,
12
- "train_steps_per_second": 0.121
13
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_loss": 5.590092658996582,
4
+ "eval_runtime": 526.161,
5
  "eval_samples": 8869,
6
+ "eval_samples_per_second": 16.856,
7
+ "eval_steps_per_second": 4.215,
8
+ "train_loss": 5.410919136576133,
9
+ "train_runtime": 16658.5174,
10
  "train_samples": 51704,
11
+ "train_samples_per_second": 15.519,
12
+ "train_steps_per_second": 0.243
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_loss": 7.420807361602783,
4
- "eval_runtime": 587.2121,
5
  "eval_samples": 8869,
6
- "eval_samples_per_second": 15.104,
7
- "eval_steps_per_second": 7.553
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_loss": 5.590092658996582,
4
+ "eval_runtime": 526.161,
5
  "eval_samples": 8869,
6
+ "eval_samples_per_second": 16.856,
7
+ "eval_steps_per_second": 4.215
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "train_loss": 6.77048750395822,
4
- "train_runtime": 16676.4775,
5
  "train_samples": 51704,
6
- "train_samples_per_second": 15.502,
7
- "train_steps_per_second": 0.121
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "train_loss": 5.410919136576133,
4
+ "train_runtime": 16658.5174,
5
  "train_samples": 51704,
6
+ "train_samples_per_second": 15.519,
7
+ "train_steps_per_second": 0.243
8
  }
trainer_state.json CHANGED
@@ -1,185 +1,305 @@
1
  {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
  "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 2020,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.25,
13
- "learning_rate": 0.0028514851485148514,
14
- "loss": 8.3611,
15
  "step": 100
16
  },
17
  {
18
- "epoch": 0.5,
19
- "learning_rate": 0.002702970297029703,
20
- "loss": 8.0607,
21
  "step": 200
22
  },
23
  {
24
- "epoch": 0.74,
25
- "learning_rate": 0.0025544554455445546,
26
- "loss": 7.7784,
27
  "step": 300
28
  },
29
  {
30
- "epoch": 0.99,
31
- "learning_rate": 0.0024059405940594063,
32
- "loss": 7.5413,
33
  "step": 400
34
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  {
36
  "epoch": 1.0,
37
- "eval_loss": 8.850217819213867,
38
- "eval_runtime": 584.1219,
39
- "eval_samples_per_second": 15.183,
40
- "eval_steps_per_second": 7.593,
41
- "step": 404
 
 
 
 
 
 
42
  },
43
  {
44
  "epoch": 1.24,
45
- "learning_rate": 0.0022574257425742577,
46
- "loss": 7.3312,
47
- "step": 500
 
 
 
 
 
 
48
  },
49
  {
50
  "epoch": 1.49,
51
- "learning_rate": 0.0021089108910891086,
52
- "loss": 7.1317,
53
- "step": 600
 
 
 
 
 
 
54
  },
55
  {
56
  "epoch": 1.73,
57
- "learning_rate": 0.0019603960396039604,
58
- "loss": 6.9568,
59
- "step": 700
 
 
 
 
 
 
60
  },
61
  {
62
  "epoch": 1.98,
63
- "learning_rate": 0.0018118811881188118,
64
- "loss": 6.7965,
65
- "step": 800
66
  },
67
  {
68
  "epoch": 2.0,
69
- "eval_loss": 8.118351936340332,
70
- "eval_runtime": 586.5561,
71
- "eval_samples_per_second": 15.12,
72
- "eval_steps_per_second": 7.561,
73
- "step": 808
 
 
 
 
 
 
74
  },
75
  {
76
  "epoch": 2.23,
77
- "learning_rate": 0.0016633663366336635,
78
- "loss": 6.6621,
79
- "step": 900
 
 
 
 
 
 
80
  },
81
  {
82
  "epoch": 2.48,
83
- "learning_rate": 0.001514851485148515,
84
- "loss": 6.5571,
85
- "step": 1000
 
 
 
 
 
 
86
  },
87
  {
88
  "epoch": 2.72,
89
- "learning_rate": 0.0013663366336633665,
90
- "loss": 6.4736,
91
- "step": 1100
 
 
 
 
 
 
92
  },
93
  {
94
  "epoch": 2.97,
95
- "learning_rate": 0.001217821782178218,
96
- "loss": 6.3963,
97
- "step": 1200
98
  },
99
  {
100
  "epoch": 3.0,
101
- "eval_loss": 7.695041179656982,
102
- "eval_runtime": 586.9195,
103
- "eval_samples_per_second": 15.111,
104
- "eval_steps_per_second": 7.556,
105
- "step": 1212
 
 
 
 
 
 
106
  },
107
  {
108
  "epoch": 3.22,
109
- "learning_rate": 0.0010693069306930692,
110
- "loss": 6.3235,
111
- "step": 1300
 
 
 
 
 
 
112
  },
113
  {
114
  "epoch": 3.47,
115
- "learning_rate": 0.0009207920792079207,
116
- "loss": 6.283,
117
- "step": 1400
 
 
 
 
 
 
118
  },
119
  {
120
  "epoch": 3.71,
121
- "learning_rate": 0.0007722772277227723,
122
- "loss": 6.2224,
123
- "step": 1500
 
 
 
 
 
 
124
  },
125
  {
126
  "epoch": 3.96,
127
- "learning_rate": 0.0006237623762376238,
128
- "loss": 6.1664,
129
- "step": 1600
130
  },
131
  {
132
  "epoch": 4.0,
133
- "eval_loss": 7.485514163970947,
134
- "eval_runtime": 587.006,
135
- "eval_samples_per_second": 15.109,
136
- "eval_steps_per_second": 7.555,
137
- "step": 1616
 
 
 
 
 
 
138
  },
139
  {
140
  "epoch": 4.21,
141
- "learning_rate": 0.0004752475247524753,
142
- "loss": 6.1536,
143
- "step": 1700
 
 
 
 
 
 
144
  },
145
  {
146
  "epoch": 4.46,
147
- "learning_rate": 0.00032673267326732675,
148
- "loss": 6.1402,
149
- "step": 1800
 
 
 
 
 
 
150
  },
151
  {
152
  "epoch": 4.7,
153
- "learning_rate": 0.0001782178217821782,
154
- "loss": 6.0973,
155
- "step": 1900
 
 
 
 
 
 
156
  },
157
  {
158
  "epoch": 4.95,
159
- "learning_rate": 2.9702970297029706e-05,
160
- "loss": 6.1028,
161
- "step": 2000
162
  },
163
  {
164
  "epoch": 5.0,
165
- "eval_loss": 7.420807361602783,
166
- "eval_runtime": 587.2049,
167
- "eval_samples_per_second": 15.104,
168
- "eval_steps_per_second": 7.553,
169
- "step": 2020
170
  },
171
  {
172
  "epoch": 5.0,
173
- "step": 2020,
174
  "total_flos": 4.4078196477394944e+17,
175
- "train_loss": 6.77048750395822,
176
- "train_runtime": 16676.4775,
177
- "train_samples_per_second": 15.502,
178
- "train_steps_per_second": 0.121
179
  }
180
  ],
181
  "logging_steps": 100,
182
- "max_steps": 2020,
183
  "num_train_epochs": 5,
184
  "save_steps": 500,
185
  "total_flos": 4.4078196477394944e+17,
 
1
  {
2
+ "best_metric": 5.590092658996582,
3
+ "best_model_checkpoint": "/temp/t5_large-qg-aap/checkpoint-4040",
4
  "epoch": 5.0,
5
  "eval_steps": 500,
6
+ "global_step": 4040,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.12,
13
+ "learning_rate": 0.004876237623762376,
14
+ "loss": 8.2771,
15
  "step": 100
16
  },
17
  {
18
+ "epoch": 0.25,
19
+ "learning_rate": 0.004752475247524752,
20
+ "loss": 7.8175,
21
  "step": 200
22
  },
23
  {
24
+ "epoch": 0.37,
25
+ "learning_rate": 0.004628712871287129,
26
+ "loss": 7.4476,
27
  "step": 300
28
  },
29
  {
30
+ "epoch": 0.5,
31
+ "learning_rate": 0.0045049504950495055,
32
+ "loss": 7.0782,
33
  "step": 400
34
  },
35
+ {
36
+ "epoch": 0.62,
37
+ "learning_rate": 0.004381188118811882,
38
+ "loss": 6.755,
39
+ "step": 500
40
+ },
41
+ {
42
+ "epoch": 0.74,
43
+ "learning_rate": 0.004257425742574258,
44
+ "loss": 6.5292,
45
+ "step": 600
46
+ },
47
+ {
48
+ "epoch": 0.87,
49
+ "learning_rate": 0.004133663366336634,
50
+ "loss": 6.3201,
51
+ "step": 700
52
+ },
53
+ {
54
+ "epoch": 0.99,
55
+ "learning_rate": 0.00400990099009901,
56
+ "loss": 6.15,
57
+ "step": 800
58
+ },
59
  {
60
  "epoch": 1.0,
61
+ "eval_loss": 7.336104869842529,
62
+ "eval_runtime": 526.3079,
63
+ "eval_samples_per_second": 16.851,
64
+ "eval_steps_per_second": 4.214,
65
+ "step": 808
66
+ },
67
+ {
68
+ "epoch": 1.11,
69
+ "learning_rate": 0.0038861386138613866,
70
+ "loss": 6.0156,
71
+ "step": 900
72
  },
73
  {
74
  "epoch": 1.24,
75
+ "learning_rate": 0.0037623762376237627,
76
+ "loss": 5.8846,
77
+ "step": 1000
78
+ },
79
+ {
80
+ "epoch": 1.36,
81
+ "learning_rate": 0.0036386138613861384,
82
+ "loss": 5.7566,
83
+ "step": 1100
84
  },
85
  {
86
  "epoch": 1.49,
87
+ "learning_rate": 0.0035148514851485145,
88
+ "loss": 5.6981,
89
+ "step": 1200
90
+ },
91
+ {
92
+ "epoch": 1.61,
93
+ "learning_rate": 0.0033910891089108915,
94
+ "loss": 5.6151,
95
+ "step": 1300
96
  },
97
  {
98
  "epoch": 1.73,
99
+ "learning_rate": 0.0032673267326732676,
100
+ "loss": 5.4776,
101
+ "step": 1400
102
+ },
103
+ {
104
+ "epoch": 1.86,
105
+ "learning_rate": 0.0031435643564356438,
106
+ "loss": 5.3873,
107
+ "step": 1500
108
  },
109
  {
110
  "epoch": 1.98,
111
+ "learning_rate": 0.00301980198019802,
112
+ "loss": 5.3335,
113
+ "step": 1600
114
  },
115
  {
116
  "epoch": 2.0,
117
+ "eval_loss": 6.409176349639893,
118
+ "eval_runtime": 525.7688,
119
+ "eval_samples_per_second": 16.869,
120
+ "eval_steps_per_second": 4.219,
121
+ "step": 1616
122
+ },
123
+ {
124
+ "epoch": 2.1,
125
+ "learning_rate": 0.002896039603960396,
126
+ "loss": 5.2361,
127
+ "step": 1700
128
  },
129
  {
130
  "epoch": 2.23,
131
+ "learning_rate": 0.0027722772277227726,
132
+ "loss": 5.1746,
133
+ "step": 1800
134
+ },
135
+ {
136
+ "epoch": 2.35,
137
+ "learning_rate": 0.0026485148514851487,
138
+ "loss": 5.1069,
139
+ "step": 1900
140
  },
141
  {
142
  "epoch": 2.48,
143
+ "learning_rate": 0.002524752475247525,
144
+ "loss": 5.07,
145
+ "step": 2000
146
+ },
147
+ {
148
+ "epoch": 2.6,
149
+ "learning_rate": 0.002400990099009901,
150
+ "loss": 5.0201,
151
+ "step": 2100
152
  },
153
  {
154
  "epoch": 2.72,
155
+ "learning_rate": 0.0022772277227722775,
156
+ "loss": 4.9693,
157
+ "step": 2200
158
+ },
159
+ {
160
+ "epoch": 2.85,
161
+ "learning_rate": 0.0021534653465346536,
162
+ "loss": 4.9579,
163
+ "step": 2300
164
  },
165
  {
166
  "epoch": 2.97,
167
+ "learning_rate": 0.0020297029702970298,
168
+ "loss": 4.8807,
169
+ "step": 2400
170
  },
171
  {
172
  "epoch": 3.0,
173
+ "eval_loss": 5.913175582885742,
174
+ "eval_runtime": 526.104,
175
+ "eval_samples_per_second": 16.858,
176
+ "eval_steps_per_second": 4.216,
177
+ "step": 2424
178
+ },
179
+ {
180
+ "epoch": 3.09,
181
+ "learning_rate": 0.0019059405940594061,
182
+ "loss": 4.8449,
183
+ "step": 2500
184
  },
185
  {
186
  "epoch": 3.22,
187
+ "learning_rate": 0.001782178217821782,
188
+ "loss": 4.7936,
189
+ "step": 2600
190
+ },
191
+ {
192
+ "epoch": 3.34,
193
+ "learning_rate": 0.0016584158415841586,
194
+ "loss": 4.7996,
195
+ "step": 2700
196
  },
197
  {
198
  "epoch": 3.47,
199
+ "learning_rate": 0.0015346534653465347,
200
+ "loss": 4.7631,
201
+ "step": 2800
202
+ },
203
+ {
204
+ "epoch": 3.59,
205
+ "learning_rate": 0.0014108910891089108,
206
+ "loss": 4.7178,
207
+ "step": 2900
208
  },
209
  {
210
  "epoch": 3.71,
211
+ "learning_rate": 0.0012871287128712872,
212
+ "loss": 4.6978,
213
+ "step": 3000
214
+ },
215
+ {
216
+ "epoch": 3.84,
217
+ "learning_rate": 0.0011633663366336635,
218
+ "loss": 4.6717,
219
+ "step": 3100
220
  },
221
  {
222
  "epoch": 3.96,
223
+ "learning_rate": 0.0010396039603960397,
224
+ "loss": 4.6492,
225
+ "step": 3200
226
  },
227
  {
228
  "epoch": 4.0,
229
+ "eval_loss": 5.6656341552734375,
230
+ "eval_runtime": 526.1669,
231
+ "eval_samples_per_second": 16.856,
232
+ "eval_steps_per_second": 4.215,
233
+ "step": 3232
234
+ },
235
+ {
236
+ "epoch": 4.08,
237
+ "learning_rate": 0.0009158415841584158,
238
+ "loss": 4.6611,
239
+ "step": 3300
240
  },
241
  {
242
  "epoch": 4.21,
243
+ "learning_rate": 0.0007920792079207921,
244
+ "loss": 4.6159,
245
+ "step": 3400
246
+ },
247
+ {
248
+ "epoch": 4.33,
249
+ "learning_rate": 0.0006683168316831684,
250
+ "loss": 4.6386,
251
+ "step": 3500
252
  },
253
  {
254
  "epoch": 4.46,
255
+ "learning_rate": 0.0005445544554455446,
256
+ "loss": 4.5997,
257
+ "step": 3600
258
+ },
259
+ {
260
+ "epoch": 4.58,
261
+ "learning_rate": 0.00042079207920792084,
262
+ "loss": 4.5819,
263
+ "step": 3700
264
  },
265
  {
266
  "epoch": 4.7,
267
+ "learning_rate": 0.000297029702970297,
268
+ "loss": 4.5785,
269
+ "step": 3800
270
+ },
271
+ {
272
+ "epoch": 4.83,
273
+ "learning_rate": 0.00017326732673267329,
274
+ "loss": 4.588,
275
+ "step": 3900
276
  },
277
  {
278
  "epoch": 4.95,
279
+ "learning_rate": 4.950495049504951e-05,
280
+ "loss": 4.591,
281
+ "step": 4000
282
  },
283
  {
284
  "epoch": 5.0,
285
+ "eval_loss": 5.590092658996582,
286
+ "eval_runtime": 526.0558,
287
+ "eval_samples_per_second": 16.859,
288
+ "eval_steps_per_second": 4.216,
289
+ "step": 4040
290
  },
291
  {
292
  "epoch": 5.0,
293
+ "step": 4040,
294
  "total_flos": 4.4078196477394944e+17,
295
+ "train_loss": 5.410919136576133,
296
+ "train_runtime": 16658.5174,
297
+ "train_samples_per_second": 15.519,
298
+ "train_steps_per_second": 0.243
299
  }
300
  ],
301
  "logging_steps": 100,
302
+ "max_steps": 4040,
303
  "num_train_epochs": 5,
304
  "save_steps": 500,
305
  "total_flos": 4.4078196477394944e+17,