# Copyright (c) OpenMMLab. All rights reserved. import json import tempfile from mmocr.utils import list_from_file, list_to_file lists = [ [], [' '], ['\t'], ['a'], [1], [1.], ['a', 'b'], ['a', 1, 1.], [1, 1., 'a'], ['啊', '啊啊'], ['選択', 'noël', 'Информацией', 'ÄÆä'], ] dicts = [ [{ 'text': [] }], [{ 'text': [' '] }], [{ 'text': ['\t'] }], [{ 'text': ['a'] }], [{ 'text': [1] }], [{ 'text': [1.] }], [{ 'text': ['a', 'b'] }], [{ 'text': ['a', 1, 1.] }], [{ 'text': [1, 1., 'a'] }], [{ 'text': ['啊', '啊啊'] }], [{ 'text': ['選択', 'noël', 'Информацией', 'ÄÆä'] }], ] def test_list_to_file(): with tempfile.TemporaryDirectory() as tmpdirname: # test txt for i, lines in enumerate(lists): filename = f'{tmpdirname}/{i}.txt' list_to_file(filename, lines) lines2 = [ line.rstrip('\r\n') for line in open(filename, 'r', encoding='utf-8').readlines() ] lines = list(map(str, lines)) assert len(lines) == len(lines2) assert all(line1 == line2 for line1, line2 in zip(lines, lines2)) # test jsonl for i, lines in enumerate(dicts): filename = f'{tmpdirname}/{i}.jsonl' list_to_file(filename, [json.dumps(line) for line in lines]) lines2 = [ json.loads(line.rstrip('\r\n'))['text'] for line in open(filename, 'r', encoding='utf-8').readlines() ][0] lines = list(lines[0]['text']) assert len(lines) == len(lines2) assert all(line1 == line2 for line1, line2 in zip(lines, lines2)) def test_list_from_file(): with tempfile.TemporaryDirectory() as tmpdirname: # test txt file for i, lines in enumerate(lists): filename = f'{tmpdirname}/{i}.txt' with open(filename, 'w', encoding='utf-8') as f: f.writelines(f'{line}\n' for line in lines) lines2 = list_from_file(filename, encoding='utf-8') lines = list(map(str, lines)) assert len(lines) == len(lines2) assert all(line1 == line2 for line1, line2 in zip(lines, lines2)) # test jsonl file for i, lines in enumerate(dicts): filename = f'{tmpdirname}/{i}.jsonl' with open(filename, 'w', encoding='utf-8') as f: f.writelines(f'{line}\n' for line in lines) lines2 = list_from_file(filename, encoding='utf-8') lines = list(map(str, lines)) assert len(lines) == len(lines2) assert all(line1 == line2 for line1, line2 in zip(lines, lines2))