Badr Abdullah commited on
Commit
ceecf24
1 Parent(s): afda824

Upload tokenizer

Browse files
Files changed (4) hide show
  1. README.md +4 -4
  2. added_tokens.json +2 -2
  3. tokenizer_config.json +4 -4
  4. vocab.json +163 -154
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  license: apache-2.0
3
- base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
5
  - generated_from_trainer
 
6
  datasets:
7
  - common_voice_17_0
8
  metrics:
@@ -11,8 +11,8 @@ model-index:
11
  - name: xlsr-am-adap-phon
12
  results:
13
  - task:
14
- name: Automatic Speech Recognition
15
  type: automatic-speech-recognition
 
16
  dataset:
17
  name: common_voice_17_0
18
  type: common_voice_17_0
@@ -20,9 +20,9 @@ model-index:
20
  split: validation
21
  args: am
22
  metrics:
23
- - name: Wer
24
- type: wer
25
  value: 0.9302421009437833
 
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
1
  ---
2
  license: apache-2.0
 
3
  tags:
4
  - generated_from_trainer
5
+ base_model: facebook/wav2vec2-large-xlsr-53
6
  datasets:
7
  - common_voice_17_0
8
  metrics:
 
11
  - name: xlsr-am-adap-phon
12
  results:
13
  - task:
 
14
  type: automatic-speech-recognition
15
+ name: Automatic Speech Recognition
16
  dataset:
17
  name: common_voice_17_0
18
  type: common_voice_17_0
 
20
  split: validation
21
  args: am
22
  metrics:
23
+ - type: wer
 
24
  value: 0.9302421009437833
25
+ name: Wer
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 231,
3
- "<s>": 230
4
  }
 
1
  {
2
+ "</s>": 240,
3
+ "<s>": 239
4
  }
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "added_tokens_decoder": {
3
- "228": {
4
  "content": "[UNK]",
5
  "lstrip": true,
6
  "normalized": false,
@@ -8,7 +8,7 @@
8
  "single_word": false,
9
  "special": false
10
  },
11
- "229": {
12
  "content": "[PAD]",
13
  "lstrip": true,
14
  "normalized": false,
@@ -16,7 +16,7 @@
16
  "single_word": false,
17
  "special": false
18
  },
19
- "230": {
20
  "content": "<s>",
21
  "lstrip": false,
22
  "normalized": false,
@@ -24,7 +24,7 @@
24
  "single_word": false,
25
  "special": true
26
  },
27
- "231": {
28
  "content": "</s>",
29
  "lstrip": false,
30
  "normalized": false,
 
1
  {
2
  "added_tokens_decoder": {
3
+ "237": {
4
  "content": "[UNK]",
5
  "lstrip": true,
6
  "normalized": false,
 
8
  "single_word": false,
9
  "special": false
10
  },
11
+ "238": {
12
  "content": "[PAD]",
13
  "lstrip": true,
14
  "normalized": false,
 
16
  "single_word": false,
17
  "special": false
18
  },
19
+ "239": {
20
  "content": "<s>",
21
  "lstrip": false,
22
  "normalized": false,
 
24
  "single_word": false,
25
  "special": true
26
  },
27
+ "240": {
28
  "content": "</s>",
29
  "lstrip": false,
30
  "normalized": false,
vocab.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "[PAD]": 229,
3
- "[UNK]": 228,
4
  "|": 0,
5
  "ሀ": 1,
6
  "ሁ": 2,
@@ -77,156 +77,165 @@
77
  "ቨ": 73,
78
  "ቪ": 74,
79
  "ቫ": 75,
80
- "": 76,
81
- "": 77,
82
- "": 78,
83
- "": 79,
84
- "": 80,
85
- "": 81,
86
- "": 82,
87
- "": 83,
88
- "": 84,
89
- "": 85,
90
- "": 86,
91
- "": 87,
92
- "": 88,
93
- "": 89,
94
- "": 90,
95
- "": 91,
96
- "": 92,
97
- "": 93,
98
- "": 94,
99
- "": 95,
100
- "": 96,
101
- "": 97,
102
- "": 98,
103
- "": 99,
104
- "": 100,
105
- "": 101,
106
- "": 102,
107
- "": 103,
108
- "": 104,
109
- "": 105,
110
- "": 106,
111
- "": 107,
112
- "": 108,
113
- "": 109,
114
- "": 110,
115
- "": 111,
116
- "": 112,
117
- "": 113,
118
- "": 114,
119
- "": 115,
120
- "": 116,
121
- "": 117,
122
- "": 118,
123
- "": 119,
124
- "": 120,
125
- "": 121,
126
- "": 122,
127
- "": 123,
128
- "": 124,
129
- "": 125,
130
- "": 126,
131
- "": 127,
132
- "": 128,
133
- "": 129,
134
- "": 130,
135
- "": 131,
136
- "": 132,
137
- "": 133,
138
- "": 134,
139
- "": 135,
140
- "": 136,
141
- "": 137,
142
- "": 138,
143
- "": 139,
144
- "": 140,
145
- "": 141,
146
- "": 142,
147
- "": 143,
148
- "": 144,
149
- "": 145,
150
- "": 146,
151
- "": 147,
152
- "": 148,
153
- "": 149,
154
- "": 150,
155
- "": 151,
156
- "": 152,
157
- "": 153,
158
- "": 154,
159
- "": 155,
160
- "": 156,
161
- "": 157,
162
- "": 158,
163
- "": 159,
164
- "": 160,
165
- "": 161,
166
- "": 162,
167
- "": 163,
168
- "": 164,
169
- "": 165,
170
- "": 166,
171
- "": 167,
172
- "": 168,
173
- "": 169,
174
- "": 170,
175
- "": 171,
176
- "": 172,
177
- "": 173,
178
- "": 174,
179
- "": 175,
180
- "": 176,
181
- "": 177,
182
- "": 178,
183
- "": 179,
184
- "": 180,
185
- "": 181,
186
- "": 182,
187
- "": 183,
188
- "": 184,
189
- "": 185,
190
- "": 186,
191
- "": 187,
192
- "": 188,
193
- "": 189,
194
- "": 190,
195
- "": 191,
196
- "": 192,
197
- "": 193,
198
- "": 194,
199
- "": 195,
200
- "": 196,
201
- "": 197,
202
- "": 198,
203
- "": 199,
204
- "": 200,
205
- "": 201,
206
- "": 202,
207
- "": 203,
208
- "": 204,
209
- "": 205,
210
- "": 206,
211
- "": 207,
212
- "": 208,
213
- "": 209,
214
- "": 210,
215
- "": 211,
216
- "": 212,
217
- "": 213,
218
- "": 214,
219
- "": 215,
220
- "": 216,
221
- "": 217,
222
- "": 218,
223
- "": 219,
224
- "": 220,
225
- "": 221,
226
- "": 222,
227
- "": 223,
228
- "": 224,
229
- "": 225,
230
- "": 226,
231
- "": 227
 
 
 
 
 
 
 
 
 
232
  }
 
1
  {
2
+ "[PAD]": 238,
3
+ "[UNK]": 237,
4
  "|": 0,
5
  "ሀ": 1,
6
  "ሁ": 2,
 
77
  "ቨ": 73,
78
  "ቪ": 74,
79
  "ቫ": 75,
80
+ "": 76,
81
+ "": 77,
82
+ "": 78,
83
+ "": 79,
84
+ "": 80,
85
+ "": 81,
86
+ "": 82,
87
+ "": 83,
88
+ "": 84,
89
+ "": 85,
90
+ "": 86,
91
+ "": 87,
92
+ "": 88,
93
+ "": 89,
94
+ "": 90,
95
+ "": 91,
96
+ "": 92,
97
+ "": 93,
98
+ "": 94,
99
+ "": 95,
100
+ "": 96,
101
+ "": 97,
102
+ "": 98,
103
+ "": 99,
104
+ "": 100,
105
+ "": 101,
106
+ "": 102,
107
+ "": 103,
108
+ "": 104,
109
+ "": 105,
110
+ "": 106,
111
+ "": 107,
112
+ "": 108,
113
+ "": 109,
114
+ "": 110,
115
+ "": 111,
116
+ "": 112,
117
+ "": 113,
118
+ "": 114,
119
+ "": 115,
120
+ "": 116,
121
+ "": 117,
122
+ "": 118,
123
+ "": 119,
124
+ "": 120,
125
+ "": 121,
126
+ "": 122,
127
+ "": 123,
128
+ "": 124,
129
+ "": 125,
130
+ "": 126,
131
+ "": 127,
132
+ "": 128,
133
+ "": 129,
134
+ "": 130,
135
+ "": 131,
136
+ "": 132,
137
+ "": 133,
138
+ "": 134,
139
+ "": 135,
140
+ "": 136,
141
+ "": 137,
142
+ "": 138,
143
+ "": 139,
144
+ "": 140,
145
+ "": 141,
146
+ "": 142,
147
+ "": 143,
148
+ "": 144,
149
+ "": 145,
150
+ "": 146,
151
+ "": 147,
152
+ "": 148,
153
+ "": 149,
154
+ "": 150,
155
+ "": 151,
156
+ "": 152,
157
+ "": 153,
158
+ "": 154,
159
+ "": 155,
160
+ "": 156,
161
+ "": 157,
162
+ "": 158,
163
+ "": 159,
164
+ "": 160,
165
+ "": 161,
166
+ "": 162,
167
+ "": 163,
168
+ "": 164,
169
+ "": 165,
170
+ "": 166,
171
+ "": 167,
172
+ "": 168,
173
+ "": 169,
174
+ "": 170,
175
+ "": 171,
176
+ "": 172,
177
+ "": 173,
178
+ "": 174,
179
+ "": 175,
180
+ "": 176,
181
+ "": 177,
182
+ "": 178,
183
+ "": 179,
184
+ "": 180,
185
+ "": 181,
186
+ "": 182,
187
+ "": 183,
188
+ "": 184,
189
+ "": 185,
190
+ "": 186,
191
+ "": 187,
192
+ "": 188,
193
+ "": 189,
194
+ "": 190,
195
+ "": 191,
196
+ "": 192,
197
+ "": 193,
198
+ "": 194,
199
+ "": 195,
200
+ "": 196,
201
+ "": 197,
202
+ "": 198,
203
+ "": 199,
204
+ "": 200,
205
+ "": 201,
206
+ "": 202,
207
+ "": 203,
208
+ "": 204,
209
+ "": 205,
210
+ "": 206,
211
+ "": 207,
212
+ "": 208,
213
+ "": 209,
214
+ "": 210,
215
+ "": 211,
216
+ "": 212,
217
+ "": 213,
218
+ "": 214,
219
+ "": 215,
220
+ "": 216,
221
+ "": 217,
222
+ "": 218,
223
+ "": 219,
224
+ "": 220,
225
+ "": 221,
226
+ "": 222,
227
+ "": 223,
228
+ "": 224,
229
+ "": 225,
230
+ "": 226,
231
+ "": 227,
232
+ "ፒ": 228,
233
+ "ፓ": 229,
234
+ "ፔ": 230,
235
+ "ፕ": 231,
236
+ "ፖ": 232,
237
+ "፡": 233,
238
+ "።": 234,
239
+ "፣": 235,
240
+ "’": 236
241
  }