MehdiHosseiniMoghadam
commited on
Commit
•
5dc5731
1
Parent(s):
9c268f9
Update README.md
Browse files
README.md
CHANGED
@@ -21,7 +21,7 @@ model-index:
|
|
21 |
metrics:
|
22 |
- name: Test WER
|
23 |
type: wer
|
24 |
-
value:
|
25 |
---
|
26 |
|
27 |
# wav2vec2-large-xlsr-53-German
|
@@ -96,7 +96,7 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
|
96 |
|
97 |
import re
|
98 |
|
99 |
-
test_dataset = load_dataset("common_voice", "de", split="test[:
|
100 |
|
101 |
wer = load_metric("wer")
|
102 |
|
@@ -106,7 +106,7 @@ model = Wav2Vec2ForCTC.from_pretrained("MehdiHosseiniMoghadam/wav2vec2-large-xls
|
|
106 |
|
107 |
model.to("cuda")
|
108 |
|
109 |
-
chars_to_ignore_regex = '[
|
110 |
|
111 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
112 |
|
@@ -166,7 +166,7 @@ print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"],
|
|
166 |
|
167 |
```
|
168 |
|
169 |
-
**Test Result**:
|
170 |
|
171 |
## Training
|
172 |
|
@@ -174,4 +174,4 @@ print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"],
|
|
174 |
|
175 |
## Testing
|
176 |
|
177 |
-
|
|
|
21 |
metrics:
|
22 |
- name: Test WER
|
23 |
type: wer
|
24 |
+
value: 25.284593
|
25 |
---
|
26 |
|
27 |
# wav2vec2-large-xlsr-53-German
|
|
|
96 |
|
97 |
import re
|
98 |
|
99 |
+
test_dataset = load_dataset("common_voice", "de", split="test[:15%]")
|
100 |
|
101 |
wer = load_metric("wer")
|
102 |
|
|
|
106 |
|
107 |
model.to("cuda")
|
108 |
|
109 |
+
chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“\%\‘\”\�]'
|
110 |
|
111 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
112 |
|
|
|
166 |
|
167 |
```
|
168 |
|
169 |
+
**Test Result**: 25.284593 %
|
170 |
|
171 |
## Training
|
172 |
|
|
|
174 |
|
175 |
## Testing
|
176 |
|
177 |
+
15% of the Common Voice `Test` dataset were used for training.
|