Saving train state of step 1000
Browse files
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"activation_dropout": 0.0,
|
4 |
"activation_function": "gelu",
|
5 |
"apply_spec_augment": false,
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "openai/whisper-large-v3",
|
3 |
"activation_dropout": 0.0,
|
4 |
"activation_function": "gelu",
|
5 |
"apply_spec_augment": false,
|
distil-whisper/events.out.tfevents.1713341751.mycena-3090.144385.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca47056266d431f63008e943b3644392c7d27bbc0ee4e0e662135e39b3914d04
|
3 |
+
size 12458
|
run_distillation.py
CHANGED
@@ -77,6 +77,9 @@ def chinese_wer(ref, hyp):
|
|
77 |
返回:
|
78 |
float: 計算出的 WER
|
79 |
"""
|
|
|
|
|
|
|
80 |
# 將字符串分割成字符列表
|
81 |
ref_chars = list(ref.replace(" ", ""))
|
82 |
hyp_chars = list(hyp.replace(" ", ""))
|
@@ -1297,10 +1300,8 @@ def main():
|
|
1297 |
# we do not want to group tokens when computing the metrics
|
1298 |
label_str = tokenizer.batch_decode(labels, skip_special_tokens=True)
|
1299 |
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1
|
1300 |
-
|
1301 |
-
|
1302 |
-
print("!!!!!!!!!!!!!!!!!!!!!!!!!")
|
1303 |
-
wer_ortho = 100 * metric.compute(predictions=pred_str, references=label_str)
|
1304 |
wer_ortho = 100 * chinese_wer(pred_str, label_str)
|
1305 |
|
1306 |
# normalize everything and re-compute the WER
|
|
|
77 |
返回:
|
78 |
float: 計算出的 WER
|
79 |
"""
|
80 |
+
if type(ref) == list and type(hyp) == list:
|
81 |
+
ref = "".join(ref)
|
82 |
+
hyp = "".join(hyp)
|
83 |
# 將字符串分割成字符列表
|
84 |
ref_chars = list(ref.replace(" ", ""))
|
85 |
hyp_chars = list(hyp.replace(" ", ""))
|
|
|
1300 |
# we do not want to group tokens when computing the metrics
|
1301 |
label_str = tokenizer.batch_decode(labels, skip_special_tokens=True)
|
1302 |
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1
|
1303 |
+
|
1304 |
+
# wer_ortho = 100 * metric.compute(predictions=pred_str, references=label_str)
|
|
|
|
|
1305 |
wer_ortho = 100 * chinese_wer(pred_str, label_str)
|
1306 |
|
1307 |
# normalize everything and re-compute the WER
|