JiHungLin commited on
Commit
e91ae45
1 Parent(s): 6ad5550

Saving train state of step 1000

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "./distil-large-v3-init",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
 
1
  {
2
+ "_name_or_path": "openai/whisper-large-v3",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
distil-whisper/events.out.tfevents.1713341751.mycena-3090.144385.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca47056266d431f63008e943b3644392c7d27bbc0ee4e0e662135e39b3914d04
3
+ size 12458
run_distillation.py CHANGED
@@ -77,6 +77,9 @@ def chinese_wer(ref, hyp):
77
  返回:
78
  float: 計算出的 WER
79
  """
 
 
 
80
  # 將字符串分割成字符列表
81
  ref_chars = list(ref.replace(" ", ""))
82
  hyp_chars = list(hyp.replace(" ", ""))
@@ -1297,10 +1300,8 @@ def main():
1297
  # we do not want to group tokens when computing the metrics
1298
  label_str = tokenizer.batch_decode(labels, skip_special_tokens=True)
1299
  # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1
1300
- print("pred_str: ", pred_str)
1301
- print("label_str: ", label_str)
1302
- print("!!!!!!!!!!!!!!!!!!!!!!!!!")
1303
- wer_ortho = 100 * metric.compute(predictions=pred_str, references=label_str)
1304
  wer_ortho = 100 * chinese_wer(pred_str, label_str)
1305
 
1306
  # normalize everything and re-compute the WER
 
77
  返回:
78
  float: 計算出的 WER
79
  """
80
+ if type(ref) == list and type(hyp) == list:
81
+ ref = "".join(ref)
82
+ hyp = "".join(hyp)
83
  # 將字符串分割成字符列表
84
  ref_chars = list(ref.replace(" ", ""))
85
  hyp_chars = list(hyp.replace(" ", ""))
 
1300
  # we do not want to group tokens when computing the metrics
1301
  label_str = tokenizer.batch_decode(labels, skip_special_tokens=True)
1302
  # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1
1303
+
1304
+ # wer_ortho = 100 * metric.compute(predictions=pred_str, references=label_str)
 
 
1305
  wer_ortho = 100 * chinese_wer(pred_str, label_str)
1306
 
1307
  # normalize everything and re-compute the WER