Spaces:
Runtime error
Runtime error
File size: 2,922 Bytes
2366e36 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# Copyright (c) OpenMMLab. All rights reserved.
import json
import tempfile
from mmocr.utils import list_from_file, list_to_file
lists = [
[],
[' '],
['\t'],
['a'],
[1],
[1.],
['a', 'b'],
['a', 1, 1.],
[1, 1., 'a'],
['啊', '啊啊'],
['選択', 'noël', 'Информацией', 'ÄÆä'],
]
dicts = [
[{
'text': []
}],
[{
'text': [' ']
}],
[{
'text': ['\t']
}],
[{
'text': ['a']
}],
[{
'text': [1]
}],
[{
'text': [1.]
}],
[{
'text': ['a', 'b']
}],
[{
'text': ['a', 1, 1.]
}],
[{
'text': [1, 1., 'a']
}],
[{
'text': ['啊', '啊啊']
}],
[{
'text': ['選択', 'noël', 'Информацией', 'ÄÆä']
}],
]
def test_list_to_file():
with tempfile.TemporaryDirectory() as tmpdirname:
# test txt
for i, lines in enumerate(lists):
filename = f'{tmpdirname}/{i}.txt'
list_to_file(filename, lines)
lines2 = [
line.rstrip('\r\n')
for line in open(filename, 'r', encoding='utf-8').readlines()
]
lines = list(map(str, lines))
assert len(lines) == len(lines2)
assert all(line1 == line2 for line1, line2 in zip(lines, lines2))
# test jsonl
for i, lines in enumerate(dicts):
filename = f'{tmpdirname}/{i}.jsonl'
list_to_file(filename, [json.dumps(line) for line in lines])
lines2 = [
json.loads(line.rstrip('\r\n'))['text']
for line in open(filename, 'r', encoding='utf-8').readlines()
][0]
lines = list(lines[0]['text'])
assert len(lines) == len(lines2)
assert all(line1 == line2 for line1, line2 in zip(lines, lines2))
def test_list_from_file():
with tempfile.TemporaryDirectory() as tmpdirname:
# test txt file
for i, lines in enumerate(lists):
filename = f'{tmpdirname}/{i}.txt'
with open(filename, 'w', encoding='utf-8') as f:
f.writelines(f'{line}\n' for line in lines)
lines2 = list_from_file(filename, encoding='utf-8')
lines = list(map(str, lines))
assert len(lines) == len(lines2)
assert all(line1 == line2 for line1, line2 in zip(lines, lines2))
# test jsonl file
for i, lines in enumerate(dicts):
filename = f'{tmpdirname}/{i}.jsonl'
with open(filename, 'w', encoding='utf-8') as f:
f.writelines(f'{line}\n' for line in lines)
lines2 = list_from_file(filename, encoding='utf-8')
lines = list(map(str, lines))
assert len(lines) == len(lines2)
assert all(line1 == line2 for line1, line2 in zip(lines, lines2))
|