File size: 2,922 Bytes
2366e36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# Copyright (c) OpenMMLab. All rights reserved.
import json
import tempfile

from mmocr.utils import list_from_file, list_to_file

lists = [
    [],
    [' '],
    ['\t'],
    ['a'],
    [1],
    [1.],
    ['a', 'b'],
    ['a', 1, 1.],
    [1, 1., 'a'],
    ['啊', '啊啊'],
    ['選択', 'noël', 'Информацией', 'ÄÆä'],
]

dicts = [
    [{
        'text': []
    }],
    [{
        'text': [' ']
    }],
    [{
        'text': ['\t']
    }],
    [{
        'text': ['a']
    }],
    [{
        'text': [1]
    }],
    [{
        'text': [1.]
    }],
    [{
        'text': ['a', 'b']
    }],
    [{
        'text': ['a', 1, 1.]
    }],
    [{
        'text': [1, 1., 'a']
    }],
    [{
        'text': ['啊', '啊啊']
    }],
    [{
        'text': ['選択', 'noël', 'Информацией', 'ÄÆä']
    }],
]


def test_list_to_file():
    with tempfile.TemporaryDirectory() as tmpdirname:
        # test txt
        for i, lines in enumerate(lists):
            filename = f'{tmpdirname}/{i}.txt'
            list_to_file(filename, lines)
            lines2 = [
                line.rstrip('\r\n')
                for line in open(filename, 'r', encoding='utf-8').readlines()
            ]
            lines = list(map(str, lines))
            assert len(lines) == len(lines2)
            assert all(line1 == line2 for line1, line2 in zip(lines, lines2))
        # test jsonl
        for i, lines in enumerate(dicts):
            filename = f'{tmpdirname}/{i}.jsonl'
            list_to_file(filename, [json.dumps(line) for line in lines])
            lines2 = [
                json.loads(line.rstrip('\r\n'))['text']
                for line in open(filename, 'r', encoding='utf-8').readlines()
            ][0]

            lines = list(lines[0]['text'])
            assert len(lines) == len(lines2)
            assert all(line1 == line2 for line1, line2 in zip(lines, lines2))


def test_list_from_file():
    with tempfile.TemporaryDirectory() as tmpdirname:
        # test txt file
        for i, lines in enumerate(lists):
            filename = f'{tmpdirname}/{i}.txt'
            with open(filename, 'w', encoding='utf-8') as f:
                f.writelines(f'{line}\n' for line in lines)
            lines2 = list_from_file(filename, encoding='utf-8')
            lines = list(map(str, lines))
            assert len(lines) == len(lines2)
            assert all(line1 == line2 for line1, line2 in zip(lines, lines2))
        # test jsonl file
        for i, lines in enumerate(dicts):
            filename = f'{tmpdirname}/{i}.jsonl'
            with open(filename, 'w', encoding='utf-8') as f:
                f.writelines(f'{line}\n' for line in lines)
            lines2 = list_from_file(filename, encoding='utf-8')
            lines = list(map(str, lines))
            assert len(lines) == len(lines2)
            assert all(line1 == line2 for line1, line2 in zip(lines, lines2))