Spaces:
Runtime error
Runtime error
highlighting added, first version
Browse files- testdiff.py +49 -0
testdiff.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
from transformers import pipeline
|
5 |
+
import tokenizer
|
6 |
+
from difflib import Differ, SequenceMatcher
|
7 |
+
|
8 |
+
text1 = "Kver á á þenan bússtað"
|
9 |
+
text2 = "Hver á þennan bústað?"
|
10 |
+
|
11 |
+
def diff_texts(text1, text2):
|
12 |
+
d = Differ()
|
13 |
+
return [
|
14 |
+
(token[2:], token[0] if token[0] != " " else None)
|
15 |
+
for token in d.compare(text1, text2)
|
16 |
+
]
|
17 |
+
|
18 |
+
def split_text(text):
|
19 |
+
sentence_list = [i for i in tokenizer.split_into_sentences(text, original=True)]
|
20 |
+
return sentence_list
|
21 |
+
|
22 |
+
def mark_text( text, tag,):
|
23 |
+
return (text, tag, )
|
24 |
+
|
25 |
+
def mark_span(text, tag,):
|
26 |
+
return [mark_text(token, tag) for token in text]
|
27 |
+
|
28 |
+
def markup_diff(a, b,
|
29 |
+
mark=mark_span,
|
30 |
+
default_mark = lambda x: x,
|
31 |
+
isjunk=None):
|
32 |
+
"""Returns a and b with any differences processed by mark
|
33 |
+
|
34 |
+
Junk is ignored by the differ
|
35 |
+
"""
|
36 |
+
seqmatcher = SequenceMatcher(isjunk=isjunk, a=a, b=b, autojunk=False)
|
37 |
+
|
38 |
+
out_a, out_b = [], []
|
39 |
+
for tag, a0, a1, b0, b1 in seqmatcher.get_opcodes():
|
40 |
+
#markup = (default_mark) if tag == 'equal' else mark
|
41 |
+
markup=mark
|
42 |
+
out_a += markup(a[a0:a1], tag)
|
43 |
+
out_b += markup(b[b0:b1], tag)
|
44 |
+
assert len(out_a) == len(a)
|
45 |
+
assert len(out_b) == len(b)
|
46 |
+
return out_a, out_b
|
47 |
+
|
48 |
+
print(diff_texts(text1, text2))
|
49 |
+
print(markup_diff(text1.split(" "), text2.split(" ")))
|