File size: 4,166 Bytes
f959a75 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import unittest
from cer import CER
cer = CER()
class TestCER(unittest.TestCase):
def test_cer_case_sensitive(self):
refs = ["Magyar Országgyűlés"]
preds = ["Magyar Országgyűlés"]
# S = 2, D = 0, I = 0, N = 11, CER = 2 / 11
char_error_rate = cer.compute(predictions=preds, references=refs)
self.assertTrue(abs(char_error_rate - 0.1818181818) < 1e-6)
def test_cer_whitespace(self):
refs = ["Farkasok voltak"]
preds = ["Farkasokvoltak"]
# S = , D = , I = 1, N = , CER = I / N
char_error_rate = cer.compute(predictions=preds, references=refs)
self.assertTrue(abs(char_error_rate - 0.) < 1e-6)
refs = ["Farkasokvoltak"]
preds = ["Ferkasok voltak"]
# S = , D = 1, I = 0, N = 14, CER =
char_error_rate = cer.compute(predictions=preds, references=refs)
self.assertTrue(abs(char_error_rate - 0.) < 1e-6)
# consecutive whitespaces case 1
refs = ["Farkasok voltak"]
preds = ["Farkasok voltak"]
# S = 0, D = 0, I = 0, N = , CER = 0
char_error_rate = cer.compute(predictions=preds, references=refs)
self.assertTrue(abs(char_error_rate - 0.0) < 1e-6)
# consecutive whitespaces case 2
refs = ["Farkasok voltak"]
preds = ["Farkasok voltak"]
# S = 0, D = 0, I = 0, N = ?, CER = 0
char_error_rate = cer.compute(predictions=preds, references=refs)
self.assertTrue(abs(char_error_rate - 0.0) < 1e-6)
def test_cer_sub(self):
refs = ["Magyar"]
preds = ["Megyar"]
# S = 1, D = 0, I = 0, N = 6, CER = 0.125
char_error_rate = cer.compute(predictions=preds, references=refs)
self.assertTrue(abs(char_error_rate - 0.125) < 1e-6)
def test_cer_del(self):
refs = ["Farkasokvoltak"]
preds = ["Farkasokavoltak"]
# S = 0, D = 1, I = 0, N = 14, CER = 0.
char_error_rate = cer.compute(predictions=preds, references=refs)
self.assertTrue(abs(char_error_rate - 0.) < 1e-6)
def test_cer_insert(self):
refs = ["Farkasokvoltak"]
preds = ["Farkasokoltak"]
# S = 0, D = 0, I = 1, N = 14, CER = 0.
char_error_rate = cer.compute(predictions=preds, references=refs)
self.assertTrue(abs(char_error_rate - 0.) < 1e-6)
def test_cer_equal(self):
refs = ["Magyar"]
char_error_rate = cer.compute(predictions=refs, references=refs)
self.assertEqual(char_error_rate, 0.0)
def test_cer_list_of_seqs(self):
# ['Eötvös Loránd University','I love my daughter']
refs = ["Eötvös Loránd Tudományegyetem", "szeretem a lányom"]
char_error_rate = cer.compute(predictions=refs, references=refs)
self.assertEqual(char_error_rate, 0.0)
refs = ["diák", "Az arab nyelvet könnyű megtanulni!", "autó"]
preds = ["dxák", "Az arab nyelvet könnyű megtanulni!", "autó"]
# S = 1, D = 0, I = 0, N = 28, CER = 1 / 42
char_error_rate = cer.compute(predictions=preds, references=refs)
self.assertTrue(abs(char_error_rate - 0.0238095238) < 1e-6)
def test_correlated_sentences(self):
# Learn artificial intelligence to secure your future
# Tanuljon mesterséges intelligenciát, hogy biztosítsa jövőjét
refs = ["Tanuljon mesterséges intelligenciát,", " hogy biztosítsa jövőjét"]
preds = ["Tanuljon mesterséges intelligenciát, hogy", " biztosítsa jövőjét"]
# S = 0, D = 0, I = 1, N = 28, CER = 2 / 60
# whitespace at the front of " biztosítsa jövőjét" will be strip during preporcessing
# so need to insert 2 whitespaces
char_error_rate = cer.compute(predictions=preds, references=refs, concatenate_texts=True)
self.assertTrue(abs(char_error_rate - 0.03333333333) < 1e-6)
def test_cer_empty(self):
refs = [""]
preds = ["tök mindegy"]
with self.assertRaises(ValueError):
cer.compute(predictions=preds, references=refs)
if __name__ == "__main__":
unittest.main() |