File size: 776 Bytes
f70a1dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import Levenshtein
import numpy as np
import pdb
import difflib

ref = "I love you too".split(' ')
hyp = "I really don't loie him".split(' ')

# return hypothesis wrong word with labels
def get_WER_highlight(ref, hyp):
    result = []
    for li in difflib.ndiff(ref, hyp):
        if li[0] == "+" or li[0] == " ":
            x = li.split(" ")
            # pdb.set_trace()
            if len(x) == 3:
                x = (x[-1], "1")
            else:
                x = (x[-1], "0")
            result.append(x)
    return result            


def diff_texts(text1, text2):
    d = difflib.Differ()
    return [
        (token[2:], token[0] if token[0] != " " else None)
        for token in d.compare(text1, text2)
    ]
    
# x = diff_texts(ref, hyp)

# pdb.set_trace()