File size: 1,746 Bytes
2366e36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# Copyright (c) OpenMMLab. All rights reserved.
import json

import pytest

from mmocr.datasets.utils.parser import LineJsonParser, LineStrParser


def test_line_str_parser():
    data_ret = ['sample1.jpg hello\n', 'sample2.jpg world']
    keys = ['filename', 'text']
    keys_idx = [0, 1]
    separator = ' '

    # test init
    with pytest.raises(AssertionError):
        parser = LineStrParser('filename', keys_idx, separator)
    with pytest.raises(AssertionError):
        parser = LineStrParser(keys, keys_idx, [' '])
    with pytest.raises(AssertionError):
        parser = LineStrParser(keys, [0], separator)

    # test get_item
    parser = LineStrParser(keys, keys_idx, separator)
    assert parser.get_item(data_ret, 0) == {
        'filename': 'sample1.jpg',
        'text': 'hello'
    }

    with pytest.raises(Exception):
        parser = LineStrParser(['filename', 'text', 'ignore'], [0, 1, 2],
                               separator)
        parser.get_item(data_ret, 0)


def test_line_dict_parser():
    data_ret = [
        json.dumps({
            'filename': 'sample1.jpg',
            'text': 'hello'
        }),
        json.dumps({
            'filename': 'sample2.jpg',
            'text': 'world'
        })
    ]
    keys = ['filename', 'text']

    # test init
    with pytest.raises(AssertionError):
        parser = LineJsonParser('filename')
    with pytest.raises(AssertionError):
        parser = LineJsonParser([])

    # test get_item
    parser = LineJsonParser(keys)
    assert parser.get_item(data_ret, 0) == {
        'filename': 'sample1.jpg',
        'text': 'hello'
    }

    with pytest.raises(Exception):
        parser = LineJsonParser(['img_name', 'text'])
        parser.get_item(data_ret, 0)