Haopeng's picture
Change Hypothesis highlight and WER
f70a1dc
raw
history blame
776 Bytes
import Levenshtein
import numpy as np
import pdb
import difflib
ref = "I love you too".split(' ')
hyp = "I really don't loie him".split(' ')
# return hypothesis wrong word with labels
def get_WER_highlight(ref, hyp):
result = []
for li in difflib.ndiff(ref, hyp):
if li[0] == "+" or li[0] == " ":
x = li.split(" ")
# pdb.set_trace()
if len(x) == 3:
x = (x[-1], "1")
else:
x = (x[-1], "0")
result.append(x)
return result
def diff_texts(text1, text2):
d = difflib.Differ()
return [
(token[2:], token[0] if token[0] != " " else None)
for token in d.compare(text1, text2)
]
# x = diff_texts(ref, hyp)
# pdb.set_trace()