5roop commited on
Commit
5495272
1 Parent(s): 9180e13

Add files.

Browse files
config.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "classla/wav2vec2-large-slavic-parlaspeech-hr",
3
+ "activation_dropout": 0.0,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForSpeechClassification"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 768,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": false,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.0,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "finetuning_task": "wav2vec2_clf",
56
+ "gradient_checkpointing": false,
57
+ "hidden_act": "gelu",
58
+ "hidden_dropout": 0.0,
59
+ "hidden_size": 1024,
60
+ "id2label": {
61
+ "0": "anger",
62
+ "1": "fear",
63
+ "2": "happiness",
64
+ "3": "neutral",
65
+ "4": "sadness"
66
+ },
67
+ "initializer_range": 0.02,
68
+ "intermediate_size": 4096,
69
+ "label2id": {
70
+ "anger": 0,
71
+ "fear": 1,
72
+ "happiness": 2,
73
+ "neutral": 3,
74
+ "sadness": 4
75
+ },
76
+ "layer_norm_eps": 1e-05,
77
+ "layerdrop": 0.0,
78
+ "mask_channel_length": 10,
79
+ "mask_channel_min_space": 1,
80
+ "mask_channel_other": 0.0,
81
+ "mask_channel_prob": 0.0,
82
+ "mask_channel_selection": "static",
83
+ "mask_feature_length": 10,
84
+ "mask_feature_min_masks": 0,
85
+ "mask_feature_prob": 0.0,
86
+ "mask_time_length": 10,
87
+ "mask_time_min_masks": 2,
88
+ "mask_time_min_space": 1,
89
+ "mask_time_other": 0.0,
90
+ "mask_time_prob": 0.05,
91
+ "mask_time_selection": "static",
92
+ "model_type": "wav2vec2",
93
+ "num_adapter_layers": 3,
94
+ "num_attention_heads": 16,
95
+ "num_codevector_groups": 2,
96
+ "num_codevectors_per_group": 320,
97
+ "num_conv_pos_embedding_groups": 16,
98
+ "num_conv_pos_embeddings": 128,
99
+ "num_feat_extract_layers": 7,
100
+ "num_hidden_layers": 24,
101
+ "num_negatives": 100,
102
+ "output_hidden_size": 1024,
103
+ "pad_token_id": 1,
104
+ "pooling_mode": "mean",
105
+ "problem_type": "single_label_classification",
106
+ "proj_codevector_dim": 768,
107
+ "tdnn_dilation": [
108
+ 1,
109
+ 2,
110
+ 3,
111
+ 1,
112
+ 1
113
+ ],
114
+ "tdnn_dim": [
115
+ 512,
116
+ 512,
117
+ 512,
118
+ 512,
119
+ 1500
120
+ ],
121
+ "tdnn_kernel": [
122
+ 5,
123
+ 3,
124
+ 3,
125
+ 1,
126
+ 1
127
+ ],
128
+ "torch_dtype": "float32",
129
+ "transformers_version": "4.19.2",
130
+ "use_weighted_layer_sum": false,
131
+ "vocab_size": 50,
132
+ "xvector_output_dim": 512
133
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6e6bc71f1f6162fcd8e675d807d6da0995087b0207d1c69781fbbc43b9dfd19
3
+ size 2498497929
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79874d308c0bbe3eaf6551bbdaafb1aeb7b86a1e090e552f1d5d5c17aabed067
3
+ size 1266118253
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7dbe44d49485edecb6dde49bc5d8645c0d283b90a1d59a2d64b65aaf1a34c29
3
+ size 14503
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7aae24ee13846e246b37bbb4d65264fc17487a20329b99f62fdb47ff22b7f733
3
+ size 559
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1992a35c74e8d0ffd01caf1992a4808f9d54c9cff8fc4295913ed1e24c934333
3
+ size 623
stats.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ | | TASK | accuracy | macroF1 | split | NUM_EPOCH |
2
+ |---:|:---------------------|-----------:|----------:|:--------|------------:|
3
+ | 2 | best_model_finding_1 | 0.772277 | 0.772291 | dev | 7 |
4
+ | 0 | best_model_finding_0 | 0.767327 | 0.763055 | dev | 7 |
5
+ | 4 | best_model_finding_2 | 0.752475 | 0.751493 | dev | 7 |
6
+ | 5 | best_model_finding_2 | 0.747573 | 0.70292 | test | 7 |
7
+ | 1 | best_model_finding_0 | 0.728155 | 0.690463 | test | 7 |
8
+ | 3 | best_model_finding_1 | 0.679612 | 0.635755 | test | 7 |
9
+ | 6 | best_model_finding_3 | 0.658416 | 0.652704 | dev | 7 |
10
+ | 7 | best_model_finding_3 | 0.582524 | 0.52383 | test | 7 |
trainer_state.json ADDED
@@ -0,0 +1,811 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 6.997150997150997,
5
+ "global_step": 1225,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.06,
12
+ "learning_rate": 9.918367346938776e-05,
13
+ "loss": 1.5894,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.11,
18
+ "learning_rate": 9.836734693877552e-05,
19
+ "loss": 1.5249,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.17,
24
+ "learning_rate": 9.755102040816328e-05,
25
+ "loss": 1.4639,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.23,
30
+ "learning_rate": 9.673469387755102e-05,
31
+ "loss": 1.4989,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.28,
36
+ "learning_rate": 9.591836734693878e-05,
37
+ "loss": 1.4726,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.34,
42
+ "learning_rate": 9.510204081632653e-05,
43
+ "loss": 1.3616,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.4,
48
+ "learning_rate": 9.428571428571429e-05,
49
+ "loss": 1.4093,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 0.46,
54
+ "learning_rate": 9.346938775510204e-05,
55
+ "loss": 1.2487,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 0.51,
60
+ "learning_rate": 9.26530612244898e-05,
61
+ "loss": 1.1843,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 0.57,
66
+ "learning_rate": 9.183673469387756e-05,
67
+ "loss": 1.2226,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 0.63,
72
+ "learning_rate": 9.102040816326532e-05,
73
+ "loss": 1.1402,
74
+ "step": 110
75
+ },
76
+ {
77
+ "epoch": 0.68,
78
+ "learning_rate": 9.020408163265308e-05,
79
+ "loss": 1.2654,
80
+ "step": 120
81
+ },
82
+ {
83
+ "epoch": 0.74,
84
+ "learning_rate": 8.938775510204082e-05,
85
+ "loss": 1.2024,
86
+ "step": 130
87
+ },
88
+ {
89
+ "epoch": 0.8,
90
+ "learning_rate": 8.857142857142857e-05,
91
+ "loss": 1.447,
92
+ "step": 140
93
+ },
94
+ {
95
+ "epoch": 0.85,
96
+ "learning_rate": 8.775510204081632e-05,
97
+ "loss": 1.0705,
98
+ "step": 150
99
+ },
100
+ {
101
+ "epoch": 0.91,
102
+ "learning_rate": 8.693877551020408e-05,
103
+ "loss": 0.7617,
104
+ "step": 160
105
+ },
106
+ {
107
+ "epoch": 0.97,
108
+ "learning_rate": 8.612244897959184e-05,
109
+ "loss": 0.7525,
110
+ "step": 170
111
+ },
112
+ {
113
+ "epoch": 1.0,
114
+ "eval_accuracy": 0.6188119053840637,
115
+ "eval_loss": 1.0104970932006836,
116
+ "eval_runtime": 11.9979,
117
+ "eval_samples_per_second": 16.836,
118
+ "eval_steps_per_second": 8.418,
119
+ "step": 175
120
+ },
121
+ {
122
+ "epoch": 1.03,
123
+ "learning_rate": 8.53061224489796e-05,
124
+ "loss": 0.9951,
125
+ "step": 180
126
+ },
127
+ {
128
+ "epoch": 1.09,
129
+ "learning_rate": 8.448979591836736e-05,
130
+ "loss": 0.8361,
131
+ "step": 190
132
+ },
133
+ {
134
+ "epoch": 1.14,
135
+ "learning_rate": 8.367346938775511e-05,
136
+ "loss": 0.5858,
137
+ "step": 200
138
+ },
139
+ {
140
+ "epoch": 1.2,
141
+ "learning_rate": 8.285714285714287e-05,
142
+ "loss": 0.5791,
143
+ "step": 210
144
+ },
145
+ {
146
+ "epoch": 1.26,
147
+ "learning_rate": 8.204081632653062e-05,
148
+ "loss": 0.8001,
149
+ "step": 220
150
+ },
151
+ {
152
+ "epoch": 1.31,
153
+ "learning_rate": 8.122448979591836e-05,
154
+ "loss": 0.7776,
155
+ "step": 230
156
+ },
157
+ {
158
+ "epoch": 1.37,
159
+ "learning_rate": 8.040816326530612e-05,
160
+ "loss": 0.8223,
161
+ "step": 240
162
+ },
163
+ {
164
+ "epoch": 1.43,
165
+ "learning_rate": 7.959183673469388e-05,
166
+ "loss": 0.621,
167
+ "step": 250
168
+ },
169
+ {
170
+ "epoch": 1.48,
171
+ "learning_rate": 7.885714285714286e-05,
172
+ "loss": 0.7097,
173
+ "step": 260
174
+ },
175
+ {
176
+ "epoch": 1.54,
177
+ "learning_rate": 7.804081632653062e-05,
178
+ "loss": 0.5678,
179
+ "step": 270
180
+ },
181
+ {
182
+ "epoch": 1.6,
183
+ "learning_rate": 7.722448979591837e-05,
184
+ "loss": 0.4465,
185
+ "step": 280
186
+ },
187
+ {
188
+ "epoch": 1.66,
189
+ "learning_rate": 7.640816326530612e-05,
190
+ "loss": 0.957,
191
+ "step": 290
192
+ },
193
+ {
194
+ "epoch": 1.71,
195
+ "learning_rate": 7.559183673469388e-05,
196
+ "loss": 0.645,
197
+ "step": 300
198
+ },
199
+ {
200
+ "epoch": 1.77,
201
+ "learning_rate": 7.477551020408163e-05,
202
+ "loss": 1.3314,
203
+ "step": 310
204
+ },
205
+ {
206
+ "epoch": 1.83,
207
+ "learning_rate": 7.395918367346939e-05,
208
+ "loss": 0.5702,
209
+ "step": 320
210
+ },
211
+ {
212
+ "epoch": 1.88,
213
+ "learning_rate": 7.314285714285715e-05,
214
+ "loss": 0.7456,
215
+ "step": 330
216
+ },
217
+ {
218
+ "epoch": 1.94,
219
+ "learning_rate": 7.232653061224491e-05,
220
+ "loss": 0.3972,
221
+ "step": 340
222
+ },
223
+ {
224
+ "epoch": 2.0,
225
+ "learning_rate": 7.151020408163265e-05,
226
+ "loss": 0.5402,
227
+ "step": 350
228
+ },
229
+ {
230
+ "epoch": 2.0,
231
+ "eval_accuracy": 0.7574257254600525,
232
+ "eval_loss": 0.7105001211166382,
233
+ "eval_runtime": 11.6399,
234
+ "eval_samples_per_second": 17.354,
235
+ "eval_steps_per_second": 8.677,
236
+ "step": 350
237
+ },
238
+ {
239
+ "epoch": 2.06,
240
+ "learning_rate": 7.069387755102041e-05,
241
+ "loss": 0.4779,
242
+ "step": 360
243
+ },
244
+ {
245
+ "epoch": 2.11,
246
+ "learning_rate": 6.987755102040817e-05,
247
+ "loss": 0.4267,
248
+ "step": 370
249
+ },
250
+ {
251
+ "epoch": 2.17,
252
+ "learning_rate": 6.906122448979592e-05,
253
+ "loss": 0.2531,
254
+ "step": 380
255
+ },
256
+ {
257
+ "epoch": 2.23,
258
+ "learning_rate": 6.824489795918367e-05,
259
+ "loss": 0.2189,
260
+ "step": 390
261
+ },
262
+ {
263
+ "epoch": 2.28,
264
+ "learning_rate": 6.742857142857143e-05,
265
+ "loss": 0.2174,
266
+ "step": 400
267
+ },
268
+ {
269
+ "epoch": 2.34,
270
+ "learning_rate": 6.661224489795919e-05,
271
+ "loss": 0.466,
272
+ "step": 410
273
+ },
274
+ {
275
+ "epoch": 2.4,
276
+ "learning_rate": 6.579591836734695e-05,
277
+ "loss": 0.2604,
278
+ "step": 420
279
+ },
280
+ {
281
+ "epoch": 2.46,
282
+ "learning_rate": 6.497959183673469e-05,
283
+ "loss": 0.3566,
284
+ "step": 430
285
+ },
286
+ {
287
+ "epoch": 2.51,
288
+ "learning_rate": 6.424489795918368e-05,
289
+ "loss": 0.5317,
290
+ "step": 440
291
+ },
292
+ {
293
+ "epoch": 2.57,
294
+ "learning_rate": 6.342857142857143e-05,
295
+ "loss": 0.124,
296
+ "step": 450
297
+ },
298
+ {
299
+ "epoch": 2.63,
300
+ "learning_rate": 6.261224489795919e-05,
301
+ "loss": 0.3307,
302
+ "step": 460
303
+ },
304
+ {
305
+ "epoch": 2.68,
306
+ "learning_rate": 6.179591836734693e-05,
307
+ "loss": 0.2536,
308
+ "step": 470
309
+ },
310
+ {
311
+ "epoch": 2.74,
312
+ "learning_rate": 6.09795918367347e-05,
313
+ "loss": 0.4331,
314
+ "step": 480
315
+ },
316
+ {
317
+ "epoch": 2.8,
318
+ "learning_rate": 6.016326530612245e-05,
319
+ "loss": 0.3736,
320
+ "step": 490
321
+ },
322
+ {
323
+ "epoch": 2.85,
324
+ "learning_rate": 5.934693877551021e-05,
325
+ "loss": 0.3034,
326
+ "step": 500
327
+ },
328
+ {
329
+ "epoch": 2.91,
330
+ "learning_rate": 5.8530612244897965e-05,
331
+ "loss": 0.5411,
332
+ "step": 510
333
+ },
334
+ {
335
+ "epoch": 2.97,
336
+ "learning_rate": 5.771428571428572e-05,
337
+ "loss": 0.6408,
338
+ "step": 520
339
+ },
340
+ {
341
+ "epoch": 3.0,
342
+ "eval_accuracy": 0.7227723002433777,
343
+ "eval_loss": 1.0339319705963135,
344
+ "eval_runtime": 11.8395,
345
+ "eval_samples_per_second": 17.062,
346
+ "eval_steps_per_second": 8.531,
347
+ "step": 525
348
+ },
349
+ {
350
+ "epoch": 3.03,
351
+ "learning_rate": 5.6897959183673475e-05,
352
+ "loss": 0.1283,
353
+ "step": 530
354
+ },
355
+ {
356
+ "epoch": 3.09,
357
+ "learning_rate": 5.608163265306122e-05,
358
+ "loss": 0.2879,
359
+ "step": 540
360
+ },
361
+ {
362
+ "epoch": 3.14,
363
+ "learning_rate": 5.526530612244898e-05,
364
+ "loss": 0.0459,
365
+ "step": 550
366
+ },
367
+ {
368
+ "epoch": 3.2,
369
+ "learning_rate": 5.4448979591836736e-05,
370
+ "loss": 0.4081,
371
+ "step": 560
372
+ },
373
+ {
374
+ "epoch": 3.26,
375
+ "learning_rate": 5.3632653061224494e-05,
376
+ "loss": 0.0723,
377
+ "step": 570
378
+ },
379
+ {
380
+ "epoch": 3.31,
381
+ "learning_rate": 5.2816326530612245e-05,
382
+ "loss": 0.3279,
383
+ "step": 580
384
+ },
385
+ {
386
+ "epoch": 3.37,
387
+ "learning_rate": 5.2000000000000004e-05,
388
+ "loss": 0.1205,
389
+ "step": 590
390
+ },
391
+ {
392
+ "epoch": 3.43,
393
+ "learning_rate": 5.118367346938776e-05,
394
+ "loss": 0.1483,
395
+ "step": 600
396
+ },
397
+ {
398
+ "epoch": 3.48,
399
+ "learning_rate": 5.036734693877552e-05,
400
+ "loss": 0.1873,
401
+ "step": 610
402
+ },
403
+ {
404
+ "epoch": 3.54,
405
+ "learning_rate": 4.9551020408163265e-05,
406
+ "loss": 0.2136,
407
+ "step": 620
408
+ },
409
+ {
410
+ "epoch": 3.6,
411
+ "learning_rate": 4.873469387755102e-05,
412
+ "loss": 0.2166,
413
+ "step": 630
414
+ },
415
+ {
416
+ "epoch": 3.66,
417
+ "learning_rate": 4.7918367346938774e-05,
418
+ "loss": 0.1963,
419
+ "step": 640
420
+ },
421
+ {
422
+ "epoch": 3.71,
423
+ "learning_rate": 4.710204081632653e-05,
424
+ "loss": 0.0301,
425
+ "step": 650
426
+ },
427
+ {
428
+ "epoch": 3.77,
429
+ "learning_rate": 4.628571428571429e-05,
430
+ "loss": 0.1655,
431
+ "step": 660
432
+ },
433
+ {
434
+ "epoch": 3.83,
435
+ "learning_rate": 4.546938775510204e-05,
436
+ "loss": 0.0028,
437
+ "step": 670
438
+ },
439
+ {
440
+ "epoch": 3.88,
441
+ "learning_rate": 4.46530612244898e-05,
442
+ "loss": 0.0482,
443
+ "step": 680
444
+ },
445
+ {
446
+ "epoch": 3.94,
447
+ "learning_rate": 4.383673469387755e-05,
448
+ "loss": 0.1093,
449
+ "step": 690
450
+ },
451
+ {
452
+ "epoch": 4.0,
453
+ "learning_rate": 4.302040816326531e-05,
454
+ "loss": 0.0784,
455
+ "step": 700
456
+ },
457
+ {
458
+ "epoch": 4.0,
459
+ "eval_accuracy": 0.7277227640151978,
460
+ "eval_loss": 1.246588945388794,
461
+ "eval_runtime": 11.9773,
462
+ "eval_samples_per_second": 16.865,
463
+ "eval_steps_per_second": 8.433,
464
+ "step": 700
465
+ },
466
+ {
467
+ "epoch": 4.06,
468
+ "learning_rate": 4.220408163265306e-05,
469
+ "loss": 0.0031,
470
+ "step": 710
471
+ },
472
+ {
473
+ "epoch": 4.11,
474
+ "learning_rate": 4.138775510204082e-05,
475
+ "loss": 0.023,
476
+ "step": 720
477
+ },
478
+ {
479
+ "epoch": 4.17,
480
+ "learning_rate": 4.057142857142857e-05,
481
+ "loss": 0.0178,
482
+ "step": 730
483
+ },
484
+ {
485
+ "epoch": 4.23,
486
+ "learning_rate": 3.9836734693877556e-05,
487
+ "loss": 0.141,
488
+ "step": 740
489
+ },
490
+ {
491
+ "epoch": 4.28,
492
+ "learning_rate": 3.902040816326531e-05,
493
+ "loss": 0.0021,
494
+ "step": 750
495
+ },
496
+ {
497
+ "epoch": 4.34,
498
+ "learning_rate": 3.820408163265306e-05,
499
+ "loss": 0.0996,
500
+ "step": 760
501
+ },
502
+ {
503
+ "epoch": 4.4,
504
+ "learning_rate": 3.738775510204082e-05,
505
+ "loss": 0.0155,
506
+ "step": 770
507
+ },
508
+ {
509
+ "epoch": 4.46,
510
+ "learning_rate": 3.6571428571428576e-05,
511
+ "loss": 0.0638,
512
+ "step": 780
513
+ },
514
+ {
515
+ "epoch": 4.51,
516
+ "learning_rate": 3.575510204081633e-05,
517
+ "loss": 0.035,
518
+ "step": 790
519
+ },
520
+ {
521
+ "epoch": 4.57,
522
+ "learning_rate": 3.4938775510204085e-05,
523
+ "loss": 0.0019,
524
+ "step": 800
525
+ },
526
+ {
527
+ "epoch": 4.63,
528
+ "learning_rate": 3.412244897959184e-05,
529
+ "loss": 0.0018,
530
+ "step": 810
531
+ },
532
+ {
533
+ "epoch": 4.68,
534
+ "learning_rate": 3.3306122448979595e-05,
535
+ "loss": 0.0014,
536
+ "step": 820
537
+ },
538
+ {
539
+ "epoch": 4.74,
540
+ "learning_rate": 3.2489795918367346e-05,
541
+ "loss": 0.0046,
542
+ "step": 830
543
+ },
544
+ {
545
+ "epoch": 4.8,
546
+ "learning_rate": 3.1673469387755105e-05,
547
+ "loss": 0.0044,
548
+ "step": 840
549
+ },
550
+ {
551
+ "epoch": 4.85,
552
+ "learning_rate": 3.0857142857142856e-05,
553
+ "loss": 0.0207,
554
+ "step": 850
555
+ },
556
+ {
557
+ "epoch": 4.91,
558
+ "learning_rate": 3.0040816326530614e-05,
559
+ "loss": 0.0029,
560
+ "step": 860
561
+ },
562
+ {
563
+ "epoch": 4.97,
564
+ "learning_rate": 2.922448979591837e-05,
565
+ "loss": 0.0385,
566
+ "step": 870
567
+ },
568
+ {
569
+ "epoch": 5.0,
570
+ "eval_accuracy": 0.7623762488365173,
571
+ "eval_loss": 1.2059693336486816,
572
+ "eval_runtime": 11.6571,
573
+ "eval_samples_per_second": 17.328,
574
+ "eval_steps_per_second": 8.664,
575
+ "step": 875
576
+ },
577
+ {
578
+ "epoch": 5.03,
579
+ "learning_rate": 2.8408163265306127e-05,
580
+ "loss": 0.0605,
581
+ "step": 880
582
+ },
583
+ {
584
+ "epoch": 5.09,
585
+ "learning_rate": 2.7591836734693875e-05,
586
+ "loss": 0.0011,
587
+ "step": 890
588
+ },
589
+ {
590
+ "epoch": 5.14,
591
+ "learning_rate": 2.6775510204081634e-05,
592
+ "loss": 0.0037,
593
+ "step": 900
594
+ },
595
+ {
596
+ "epoch": 5.2,
597
+ "learning_rate": 2.595918367346939e-05,
598
+ "loss": 0.0011,
599
+ "step": 910
600
+ },
601
+ {
602
+ "epoch": 5.26,
603
+ "learning_rate": 2.5142857142857147e-05,
604
+ "loss": 0.0036,
605
+ "step": 920
606
+ },
607
+ {
608
+ "epoch": 5.31,
609
+ "learning_rate": 2.4326530612244898e-05,
610
+ "loss": 0.0015,
611
+ "step": 930
612
+ },
613
+ {
614
+ "epoch": 5.37,
615
+ "learning_rate": 2.3510204081632653e-05,
616
+ "loss": 0.0039,
617
+ "step": 940
618
+ },
619
+ {
620
+ "epoch": 5.43,
621
+ "learning_rate": 2.269387755102041e-05,
622
+ "loss": 0.0019,
623
+ "step": 950
624
+ },
625
+ {
626
+ "epoch": 5.48,
627
+ "learning_rate": 2.1877551020408162e-05,
628
+ "loss": 0.0011,
629
+ "step": 960
630
+ },
631
+ {
632
+ "epoch": 5.54,
633
+ "learning_rate": 2.106122448979592e-05,
634
+ "loss": 0.0031,
635
+ "step": 970
636
+ },
637
+ {
638
+ "epoch": 5.6,
639
+ "learning_rate": 2.0244897959183676e-05,
640
+ "loss": 0.0012,
641
+ "step": 980
642
+ },
643
+ {
644
+ "epoch": 5.66,
645
+ "learning_rate": 1.942857142857143e-05,
646
+ "loss": 0.0009,
647
+ "step": 990
648
+ },
649
+ {
650
+ "epoch": 5.71,
651
+ "learning_rate": 1.8612244897959185e-05,
652
+ "loss": 0.0031,
653
+ "step": 1000
654
+ },
655
+ {
656
+ "epoch": 5.77,
657
+ "learning_rate": 1.779591836734694e-05,
658
+ "loss": 0.001,
659
+ "step": 1010
660
+ },
661
+ {
662
+ "epoch": 5.83,
663
+ "learning_rate": 1.6979591836734695e-05,
664
+ "loss": 0.0011,
665
+ "step": 1020
666
+ },
667
+ {
668
+ "epoch": 5.88,
669
+ "learning_rate": 1.616326530612245e-05,
670
+ "loss": 0.0011,
671
+ "step": 1030
672
+ },
673
+ {
674
+ "epoch": 5.94,
675
+ "learning_rate": 1.5346938775510204e-05,
676
+ "loss": 0.0008,
677
+ "step": 1040
678
+ },
679
+ {
680
+ "epoch": 6.0,
681
+ "learning_rate": 1.453061224489796e-05,
682
+ "loss": 0.0066,
683
+ "step": 1050
684
+ },
685
+ {
686
+ "epoch": 6.0,
687
+ "eval_accuracy": 0.7425742745399475,
688
+ "eval_loss": 1.2782899141311646,
689
+ "eval_runtime": 11.4463,
690
+ "eval_samples_per_second": 17.648,
691
+ "eval_steps_per_second": 8.824,
692
+ "step": 1050
693
+ },
694
+ {
695
+ "epoch": 6.06,
696
+ "learning_rate": 1.3714285714285716e-05,
697
+ "loss": 0.005,
698
+ "step": 1060
699
+ },
700
+ {
701
+ "epoch": 6.11,
702
+ "learning_rate": 1.2897959183673469e-05,
703
+ "loss": 0.0007,
704
+ "step": 1070
705
+ },
706
+ {
707
+ "epoch": 6.17,
708
+ "learning_rate": 1.2081632653061225e-05,
709
+ "loss": 0.0011,
710
+ "step": 1080
711
+ },
712
+ {
713
+ "epoch": 6.23,
714
+ "learning_rate": 1.126530612244898e-05,
715
+ "loss": 0.0176,
716
+ "step": 1090
717
+ },
718
+ {
719
+ "epoch": 6.28,
720
+ "learning_rate": 1.0448979591836735e-05,
721
+ "loss": 0.001,
722
+ "step": 1100
723
+ },
724
+ {
725
+ "epoch": 6.34,
726
+ "learning_rate": 9.63265306122449e-06,
727
+ "loss": 0.0008,
728
+ "step": 1110
729
+ },
730
+ {
731
+ "epoch": 6.4,
732
+ "learning_rate": 8.816326530612245e-06,
733
+ "loss": 0.0008,
734
+ "step": 1120
735
+ },
736
+ {
737
+ "epoch": 6.46,
738
+ "learning_rate": 8.000000000000001e-06,
739
+ "loss": 0.0007,
740
+ "step": 1130
741
+ },
742
+ {
743
+ "epoch": 6.51,
744
+ "learning_rate": 7.183673469387755e-06,
745
+ "loss": 0.0031,
746
+ "step": 1140
747
+ },
748
+ {
749
+ "epoch": 6.57,
750
+ "learning_rate": 6.36734693877551e-06,
751
+ "loss": 0.0175,
752
+ "step": 1150
753
+ },
754
+ {
755
+ "epoch": 6.63,
756
+ "learning_rate": 5.551020408163266e-06,
757
+ "loss": 0.0044,
758
+ "step": 1160
759
+ },
760
+ {
761
+ "epoch": 6.68,
762
+ "learning_rate": 4.734693877551021e-06,
763
+ "loss": 0.0008,
764
+ "step": 1170
765
+ },
766
+ {
767
+ "epoch": 6.74,
768
+ "learning_rate": 3.9183673469387755e-06,
769
+ "loss": 0.0009,
770
+ "step": 1180
771
+ },
772
+ {
773
+ "epoch": 6.8,
774
+ "learning_rate": 3.1020408163265307e-06,
775
+ "loss": 0.0016,
776
+ "step": 1190
777
+ },
778
+ {
779
+ "epoch": 6.85,
780
+ "learning_rate": 2.285714285714286e-06,
781
+ "loss": 0.0008,
782
+ "step": 1200
783
+ },
784
+ {
785
+ "epoch": 6.91,
786
+ "learning_rate": 1.4693877551020408e-06,
787
+ "loss": 0.0007,
788
+ "step": 1210
789
+ },
790
+ {
791
+ "epoch": 6.97,
792
+ "learning_rate": 6.53061224489796e-07,
793
+ "loss": 0.0102,
794
+ "step": 1220
795
+ },
796
+ {
797
+ "epoch": 7.0,
798
+ "eval_accuracy": 0.7524752616882324,
799
+ "eval_loss": 1.2731852531433105,
800
+ "eval_runtime": 11.7439,
801
+ "eval_samples_per_second": 17.2,
802
+ "eval_steps_per_second": 8.6,
803
+ "step": 1225
804
+ }
805
+ ],
806
+ "max_steps": 1225,
807
+ "num_train_epochs": 7,
808
+ "total_flos": 8.443178680144968e+17,
809
+ "trial_name": null,
810
+ "trial_params": null
811
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9c8694a1a02a17556ba46646dfbd3242298541f0f80d2d3f27a5ec007a118dc
3
+ size 3311
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ { "|": 0, "?": 1, "a": 2, "b": 3, "c": 4, "d": 5, "e": 6, "f": 7, "g": 8, "h": 9, "i": 10, "j": 11, "k": 12, "l": 13, "m": 14, "n": 15, "o": 16, "p": 17, "q": 18, "r": 19, "s": 20, "t": 21, "u": 22, "v": 23, "w": 24, "x": 25, "y": 26, "z": 27, "\u00e4": 28, "\u00fc": 29, "\u0107": 30, "\u010d": 31, "\u0111": 32, "\u0161": 33, "\u017e": 34, "[UNK]": 35, "[PAD]": 36, " ": 37, "1": 38, "2": 39, "3": 40, "4": 41, "5": 42, "6": 43, "7": 44, "8": 45, "9": 46, "0": 47}