File size: 3,386 Bytes
2366e36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# Copyright (c) OpenMMLab. All rights reserved.
import warnings

from mmocr.datasets.builder import LOADERS, build_parser
from .backend import (HardDiskAnnFileBackend, HTTPAnnFileBackend,
                      PetrelAnnFileBackend)


@LOADERS.register_module()
class AnnFileLoader:
    """Annotation file loader to load annotations from ann_file, and parse raw
    annotation to dict format with certain parser.

    Args:
        ann_file (str): Annotation file path.
        parser (dict): Dictionary to construct parser
            to parse original annotation infos.
        repeat (int|float): Repeated times of dataset.
        file_storage_backend (str): The storage backend type for annotation
            file. Options are "disk", "http" and "petrel". Default: "disk".
        file_format (str): The format of annotation file. Options are
            "txt" and "lmdb". Default: "txt".
    """

    _backends = {
        'disk': HardDiskAnnFileBackend,
        'petrel': PetrelAnnFileBackend,
        'http': HTTPAnnFileBackend
    }

    def __init__(self,
                 ann_file,
                 parser,
                 repeat=1,
                 file_storage_backend='disk',
                 file_format='txt',
                 **kwargs):
        assert isinstance(ann_file, str)
        assert isinstance(repeat, (int, float))
        assert isinstance(parser, dict)
        assert repeat > 0
        assert file_storage_backend in ['disk', 'http', 'petrel']
        assert file_format in ['txt', 'lmdb']

        self.parser = build_parser(parser)
        self.repeat = repeat
        self.ann_file_backend = self._backends[file_storage_backend](
            file_format, **kwargs)
        self.ori_data_infos = self._load(ann_file)

    def __len__(self):
        return int(len(self.ori_data_infos) * self.repeat)

    def _load(self, ann_file):
        """Load annotation file."""

        return self.ann_file_backend(ann_file)

    def __getitem__(self, index):
        """Retrieve anno info of one instance with dict format."""
        return self.parser.get_item(self.ori_data_infos, index)

    def __iter__(self):
        self._n = 0
        return self

    def __next__(self):
        if self._n < len(self):
            data = self[self._n]
            self._n += 1
            return data
        raise StopIteration

    def close(self):
        """For ann_file with lmdb format only."""
        self.ori_data_infos.close()


@LOADERS.register_module()
class HardDiskLoader(AnnFileLoader):
    """Load txt format annotation file from hard disks."""

    def __init__(self, ann_file, parser, repeat=1):
        warnings.warn(
            'HardDiskLoader is deprecated, please use '
            'AnnFileLoader instead.', UserWarning)
        super().__init__(
            ann_file,
            parser,
            repeat,
            file_storage_backend='disk',
            file_format='txt')


@LOADERS.register_module()
class LmdbLoader(AnnFileLoader):
    """Load lmdb format annotation file from hard disks."""

    def __init__(self, ann_file, parser, repeat=1):
        warnings.warn(
            'LmdbLoader is deprecated, please use '
            'AnnFileLoader instead.', UserWarning)
        super().__init__(
            ann_file,
            parser,
            repeat,
            file_storage_backend='disk',
            file_format='lmdb')