boumehdi commited on
Commit
449018b
1 Parent(s): c5098d4

Upload 9 files

Browse files
Files changed (7) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +2 -2
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +160 -49
  7. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:852f3660024c968cc4ef52275707929fd5543378838969cbb4f8fef793d37cb5
3
  size 2490594117
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd5ea1944603fdc43786885c6fad297352a4cfa88903c6e5e401a61afdb1ff69
3
  size 2490594117
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7f0fcd11ecc6bf466188d996bb2c0323e1e2a252799c8f878a3eb91dc63a030
3
  size 1262168365
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8413826d343ca1a5a05286342dc463d0c96f14f6f5250d6663157cd0a22bfa14
3
  size 1262168365
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc5fd90d34449afd87dad4166da01169d0c05887d448a55d6c373fec7de3d50e
3
- size 14639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4b28524c43613725eb2734e93e3c395b0e0263834fec5ee89fe1a89e4e55726
3
+ size 14575
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:076f6ff47b30a5a4c15d66bc604090be40c546cc4745c17c3598eccee67eb0ba
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd0d7640fa100af1c436a6097e415e0c78c222c34fc3a4163201c7f7420d7659
3
  size 557
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6fe6ee715be2fb390ed8c899f21cf138e268409631f9c7601cdd970a9d2894e
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc7a186be3ea4a6d0305e7ea5e53d52b11c001b50c8a64d1a6c67ede89211232
3
  size 627
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.245931283905968,
5
- "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10,103 +10,214 @@
10
  {
11
  "epoch": 0.72,
12
  "learning_rate": 1e-05,
13
- "loss": 0.0369,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 1.45,
18
  "learning_rate": 9.99927530980506e-06,
19
- "loss": 0.0305,
20
  "step": 200
21
  },
22
- {
23
- "epoch": 1.81,
24
- "eval_loss": 0.2080060839653015,
25
- "eval_runtime": 285.8113,
26
- "eval_samples_per_second": 13.81,
27
- "eval_steps_per_second": 1.728,
28
- "eval_wer": 0.1806282722513089,
29
- "step": 250
30
- },
31
  {
32
  "epoch": 2.17,
33
  "learning_rate": 9.998550619610118e-06,
34
- "loss": 0.0294,
35
  "step": 300
36
  },
 
 
 
 
 
 
 
 
 
37
  {
38
  "epoch": 2.9,
39
  "learning_rate": 9.997825929415176e-06,
40
- "loss": 0.0271,
41
  "step": 400
42
  },
43
  {
44
  "epoch": 3.62,
45
- "learning_rate": 9.997101239220233e-06,
46
- "loss": 0.0253,
47
- "step": 500
48
- },
49
- {
50
- "epoch": 3.62,
51
- "eval_loss": 0.20865508913993835,
52
- "eval_runtime": 202.8306,
53
- "eval_samples_per_second": 19.46,
54
- "eval_steps_per_second": 2.436,
55
- "eval_wer": 0.1781614981876762,
56
  "step": 500
57
  },
58
  {
59
  "epoch": 4.35,
60
- "learning_rate": 9.996376549025293e-06,
61
- "loss": 0.0261,
62
  "step": 600
63
  },
64
  {
65
  "epoch": 5.07,
66
- "learning_rate": 9.99565185883035e-06,
67
- "loss": 0.0232,
68
  "step": 700
69
  },
70
  {
71
- "epoch": 5.43,
72
- "eval_loss": 0.21311765909194946,
73
- "eval_runtime": 210.5372,
74
- "eval_samples_per_second": 18.747,
75
- "eval_steps_per_second": 2.346,
76
- "eval_wer": 0.1742851389448248,
77
- "step": 750
78
  },
79
  {
80
  "epoch": 5.8,
81
- "learning_rate": 9.994927168635409e-06,
82
- "loss": 0.0239,
83
  "step": 800
84
  },
85
  {
86
  "epoch": 6.52,
87
  "learning_rate": 9.994209725342417e-06,
88
- "loss": 0.0229,
89
  "step": 900
90
  },
91
  {
92
  "epoch": 7.25,
93
  "learning_rate": 9.993485035147475e-06,
94
- "loss": 0.023,
95
  "step": 1000
96
  },
97
  {
98
- "epoch": 7.25,
99
- "eval_loss": 0.21518608927726746,
100
- "eval_runtime": 215.7218,
101
- "eval_samples_per_second": 18.297,
102
- "eval_steps_per_second": 2.29,
103
- "eval_wer": 0.17247281514297222,
104
- "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  }
106
  ],
107
  "max_steps": 1380000,
108
  "num_train_epochs": 10000,
109
- "total_flos": 2.2743816298254418e+19,
110
  "trial_name": null,
111
  "trial_params": null
112
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 17.752260397830018,
5
+ "global_step": 2450,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10
  {
11
  "epoch": 0.72,
12
  "learning_rate": 1e-05,
13
+ "loss": 0.0194,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 1.45,
18
  "learning_rate": 9.99927530980506e-06,
19
+ "loss": 0.0174,
20
  "step": 200
21
  },
 
 
 
 
 
 
 
 
 
22
  {
23
  "epoch": 2.17,
24
  "learning_rate": 9.998550619610118e-06,
25
+ "loss": 0.0197,
26
  "step": 300
27
  },
28
+ {
29
+ "epoch": 2.54,
30
+ "eval_loss": 0.2245764136314392,
31
+ "eval_runtime": 322.1745,
32
+ "eval_samples_per_second": 12.251,
33
+ "eval_steps_per_second": 1.533,
34
+ "eval_wer": 0.17368103101087395,
35
+ "step": 350
36
+ },
37
  {
38
  "epoch": 2.9,
39
  "learning_rate": 9.997825929415176e-06,
40
+ "loss": 0.0205,
41
  "step": 400
42
  },
43
  {
44
  "epoch": 3.62,
45
+ "learning_rate": 9.997108486122183e-06,
46
+ "loss": 0.0199,
 
 
 
 
 
 
 
 
 
47
  "step": 500
48
  },
49
  {
50
  "epoch": 4.35,
51
+ "learning_rate": 9.996383795927241e-06,
52
+ "loss": 0.0198,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 5.07,
57
+ "learning_rate": 9.9956591057323e-06,
58
+ "loss": 0.0193,
59
  "step": 700
60
  },
61
  {
62
+ "epoch": 5.07,
63
+ "eval_loss": 0.23122623562812805,
64
+ "eval_runtime": 209.3421,
65
+ "eval_samples_per_second": 18.854,
66
+ "eval_steps_per_second": 2.36,
67
+ "eval_wer": 0.17217076117599678,
68
+ "step": 700
69
  },
70
  {
71
  "epoch": 5.8,
72
+ "learning_rate": 9.994934415537358e-06,
73
+ "loss": 0.0186,
74
  "step": 800
75
  },
76
  {
77
  "epoch": 6.52,
78
  "learning_rate": 9.994209725342417e-06,
79
+ "loss": 0.0194,
80
  "step": 900
81
  },
82
  {
83
  "epoch": 7.25,
84
  "learning_rate": 9.993485035147475e-06,
85
+ "loss": 0.0186,
86
  "step": 1000
87
  },
88
  {
89
+ "epoch": 7.61,
90
+ "eval_loss": 0.2398330122232437,
91
+ "eval_runtime": 209.0,
92
+ "eval_samples_per_second": 18.885,
93
+ "eval_steps_per_second": 2.364,
94
+ "eval_wer": 0.17232178815948448,
95
+ "step": 1050
96
+ },
97
+ {
98
+ "epoch": 7.97,
99
+ "learning_rate": 9.992760344952534e-06,
100
+ "loss": 0.0186,
101
+ "step": 1100
102
+ },
103
+ {
104
+ "epoch": 8.69,
105
+ "learning_rate": 9.99203565475759e-06,
106
+ "loss": 0.0191,
107
+ "step": 1200
108
+ },
109
+ {
110
+ "epoch": 9.42,
111
+ "learning_rate": 9.991310964562651e-06,
112
+ "loss": 0.0185,
113
+ "step": 1300
114
+ },
115
+ {
116
+ "epoch": 10.14,
117
+ "learning_rate": 9.990586274367708e-06,
118
+ "loss": 0.0171,
119
+ "step": 1400
120
+ },
121
+ {
122
+ "epoch": 10.14,
123
+ "eval_loss": 0.24630184471607208,
124
+ "eval_runtime": 212.2032,
125
+ "eval_samples_per_second": 18.6,
126
+ "eval_steps_per_second": 2.328,
127
+ "eval_wer": 0.17020741039065646,
128
+ "step": 1400
129
+ },
130
+ {
131
+ "epoch": 10.87,
132
+ "learning_rate": 9.989861584172766e-06,
133
+ "loss": 0.0176,
134
+ "step": 1500
135
+ },
136
+ {
137
+ "epoch": 11.59,
138
+ "learning_rate": 9.989136893977825e-06,
139
+ "loss": 0.018,
140
+ "step": 1600
141
+ },
142
+ {
143
+ "epoch": 12.32,
144
+ "learning_rate": 9.988412203782883e-06,
145
+ "loss": 0.0172,
146
+ "step": 1700
147
+ },
148
+ {
149
+ "epoch": 12.68,
150
+ "eval_loss": 0.24790118634700775,
151
+ "eval_runtime": 215.9062,
152
+ "eval_samples_per_second": 18.281,
153
+ "eval_steps_per_second": 2.288,
154
+ "eval_wer": 0.17081151832460734,
155
+ "step": 1750
156
+ },
157
+ {
158
+ "epoch": 13.04,
159
+ "learning_rate": 9.987687513587942e-06,
160
+ "loss": 0.0172,
161
+ "step": 1800
162
+ },
163
+ {
164
+ "epoch": 13.77,
165
+ "learning_rate": 9.986962823393e-06,
166
+ "loss": 0.0165,
167
+ "step": 1900
168
+ },
169
+ {
170
+ "epoch": 14.49,
171
+ "learning_rate": 9.986238133198059e-06,
172
+ "loss": 0.0176,
173
+ "step": 2000
174
+ },
175
+ {
176
+ "epoch": 15.22,
177
+ "learning_rate": 9.985513443003117e-06,
178
+ "loss": 0.0173,
179
+ "step": 2100
180
+ },
181
+ {
182
+ "epoch": 15.22,
183
+ "eval_loss": 0.24780623614788055,
184
+ "eval_runtime": 215.4375,
185
+ "eval_samples_per_second": 18.321,
186
+ "eval_steps_per_second": 2.293,
187
+ "eval_wer": 0.17015706806282724,
188
+ "step": 2100
189
+ },
190
+ {
191
+ "epoch": 15.94,
192
+ "learning_rate": 9.984788752808176e-06,
193
+ "loss": 0.0168,
194
+ "step": 2200
195
+ },
196
+ {
197
+ "epoch": 16.67,
198
+ "learning_rate": 9.984064062613234e-06,
199
+ "loss": 0.0165,
200
+ "step": 2300
201
+ },
202
+ {
203
+ "epoch": 17.39,
204
+ "learning_rate": 9.983339372418293e-06,
205
+ "loss": 0.0169,
206
+ "step": 2400
207
+ },
208
+ {
209
+ "epoch": 17.75,
210
+ "eval_loss": 0.24955050647258759,
211
+ "eval_runtime": 224.6408,
212
+ "eval_samples_per_second": 17.57,
213
+ "eval_steps_per_second": 2.199,
214
+ "eval_wer": 0.1689488521949255,
215
+ "step": 2450
216
  }
217
  ],
218
  "max_steps": 1380000,
219
  "num_train_epochs": 10000,
220
+ "total_flos": 5.57531310441053e+19,
221
  "trial_name": null,
222
  "trial_params": null
223
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f835e214c90a10026201ea65e17731cdf49397e7fb8e4bb791dfaf52ba09d5b1
3
  size 3323
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3393125c7f14a291727a873967ef481f803e54d70aa9f3fcdf615773d38c2b19
3
  size 3323