File size: 4,414 Bytes
2366e36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import json
from functools import partial

import mmcv

from mmocr.utils import list_from_file, list_to_file


def convert(closeset_line, merge_bg_others=False, ignore_idx=0, others_idx=25):
    """Convert line-json str of closeset to line-json str of openset. Note that
    this function is designed for closeset-wildreceipt to openset-wildreceipt.
    It may not be suitable to your own dataset.

    Args:
        closeset_line (str): The string to be deserialized to
            the closeset dictionary object.
        merge_bg_others (bool): If True, give the same label to "background"
            class and "others" class.
        ignore_idx (int): Index for ``ignore`` class.
        others_idx (int): Index for ``others`` class.
    """
    # Two labels at the same index of the following two lists
    # make up a key-value pair. For example, in wildreceipt,
    # closeset_key_inds[0] maps to "Store_name_key"
    # and closeset_value_inds[0] maps to "Store_addr_value".
    closeset_key_inds = list(range(2, others_idx, 2))
    closeset_value_inds = list(range(1, others_idx, 2))

    openset_node_label_mapping = {'bg': 0, 'key': 1, 'value': 2, 'others': 3}
    if merge_bg_others:
        openset_node_label_mapping['others'] = openset_node_label_mapping['bg']

    closeset_obj = json.loads(closeset_line)
    openset_obj = {
        'file_name': closeset_obj['file_name'],
        'height': closeset_obj['height'],
        'width': closeset_obj['width'],
        'annotations': []
    }

    edge_idx = 1
    label_to_edge = {}
    for anno in closeset_obj['annotations']:
        label = anno['label']
        if label == ignore_idx:
            anno['label'] = openset_node_label_mapping['bg']
            anno['edge'] = edge_idx
            edge_idx += 1
        elif label == others_idx:
            anno['label'] = openset_node_label_mapping['others']
            anno['edge'] = edge_idx
            edge_idx += 1
        else:
            edge = label_to_edge.get(label, None)
            if edge is not None:
                anno['edge'] = edge
                if label in closeset_key_inds:
                    anno['label'] = openset_node_label_mapping['key']
                elif label in closeset_value_inds:
                    anno['label'] = openset_node_label_mapping['value']
            else:
                tmp_key = 'key'
                if label in closeset_key_inds:
                    label_with_same_edge = closeset_value_inds[
                        closeset_key_inds.index(label)]
                elif label in closeset_value_inds:
                    label_with_same_edge = closeset_key_inds[
                        closeset_value_inds.index(label)]
                    tmp_key = 'value'
                edge_counterpart = label_to_edge.get(label_with_same_edge,
                                                     None)
                if edge_counterpart is not None:
                    anno['edge'] = edge_counterpart
                else:
                    anno['edge'] = edge_idx
                    edge_idx += 1
                anno['label'] = openset_node_label_mapping[tmp_key]
                label_to_edge[label] = anno['edge']

    openset_obj['annotations'] = closeset_obj['annotations']

    return json.dumps(openset_obj, ensure_ascii=False)


def process(closeset_file, openset_file, merge_bg_others=False, n_proc=10):
    closeset_lines = list_from_file(closeset_file)

    convert_func = partial(convert, merge_bg_others=merge_bg_others)

    openset_lines = mmcv.track_parallel_progress(
        convert_func, closeset_lines, nproc=n_proc)

    list_to_file(openset_file, openset_lines)


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('in_file', help='Annotation file for closeset.')
    parser.add_argument('out_file', help='Annotation file for openset.')
    parser.add_argument(
        '--merge',
        action='store_true',
        help='Merge two classes: "background" and "others" in closeset '
        'to one class in openset.')
    parser.add_argument(
        '--n_proc', type=int, default=10, help='Number of process.')
    args = parser.parse_args()
    return args


def main():
    args = parse_args()

    process(args.in_file, args.out_file, args.merge, args.n_proc)

    print('finish')


if __name__ == '__main__':
    main()