File size: 7,772 Bytes
c9019cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# Copyright (c) 2022, National Diet Library, Japan
#
# This software is released under the CC BY 4.0.
# https://creativecommons.org/licenses/by/4.0/


import copy
import datetime
import glob
import os
import sys
import yaml


def parse_cfg(cfg_dict):
    """
    コマンドで入力された引数やオプションを内部関数が利用しやすい形にparseします。

    Parameters
    ----------
    cfg_dict : dict
        コマンドで入力された引数やオプションが保存された辞書型データ。

    Returns
    -------
    infer_cfg : dict
        推論処理を実行するための設定情報が保存された辞書型データ。
    """
    infer_cfg = copy.deepcopy(cfg_dict)

    # add inference config parameters from yml config file
    yml_config = None
    if not os.path.isfile(cfg_dict['config_file']):
        print('[ERROR] Config yml file not found.', file=sys.stderr)
        return None

    with open(cfg_dict['config_file'], 'r') as yml:
        yml_config = yaml.safe_load(yml)

    if type(yml_config) is not dict:
        print('[ERROR] Config yml file read error.', file=sys.stderr)
        return None

    infer_cfg.update(yml_config)

    # save_xml will be ignored when last proc does not output xml data
    if (infer_cfg['proc_range'] != '0..3') and (infer_cfg['save_xml'] or infer_cfg['save_image']):
        print('[WARNING] save_xml and save_image flags are ignored because this is partial execution.')
        print('          All output of last proc will be saved in output directory.')

    # parse start/end indices of inference process
    start = int(infer_cfg['proc_range'][0])
    end = int(infer_cfg['proc_range'][-1])
    if start > end:
        print('[ERROR] Value of proc_range must be [x..y : x <= y] .', file=sys.stderr)
        return None
    infer_cfg['proc_range'] = {
        'start': start,
        'end': end
    }
    if (start != 0) or (end != 3):
        infer_cfg['partial_infer'] = True
    else:
        infer_cfg['partial_infer'] = False

    # create input_dirs from input_root
    # input_dirs is list of dirs that contain img (and xml) dir
    infer_cfg['input_root'] = os.path.abspath(infer_cfg['input_root'])
    infer_cfg['output_root'] = os.path.abspath(infer_cfg['output_root'])
    if infer_cfg['input_structure'] in ['s']:
        # - Sigle input dir mode
        # input_root
        #  ├── xml
        #  │   └── R[7桁連番].xml※XMLデータ
        #  └── img
        #      └── R[7桁連番]_pp.jp2※画像データ

        # validation check for input dir structure
        if not os.path.isdir(os.path.join(infer_cfg['input_root'], 'img')):
            print('[ERROR] Input img diretctory not found in {}'.format(infer_cfg['input_root']), file=sys.stderr)
            return None
        if (start > 2) and (not os.path.isdir(os.path.join(infer_cfg['input_root'], 'xml'))):
            print('[ERROR] Input xml diretctory not found in {}'.format(infer_cfg['input_root']), file=sys.stderr)
            return None
        infer_cfg['input_dirs'] = [infer_cfg['input_root']]
    elif infer_cfg['input_structure'] in ['i']:
        # - Partial inference mode
        # input_root
        #  └── PID
        #      ├── xml
        #      │   └── R[7桁連番].xml※XMLデータ
        #      └── img
        #          └── R[7桁連番]_pp.jp2※画像データ
        infer_cfg['input_dirs'] = []
        for input_dir in glob.glob(os.path.join(infer_cfg['input_root'], '*')):
            if os.path.isdir(input_dir):
                if not os.path.isdir(os.path.join(input_dir, 'img')):
                    print('[WARNING] Input directory {0} is skipped(no img diretctory)'.format(input_dir))
                    continue
                if (start > 1) and (not os.path.isdir(os.path.join(input_dir, 'xml'))):
                    print('[WARNING] Input directory {0} is skipped(no xml diretctory)'.format(input_dir))
                    continue
                infer_cfg['input_dirs'].append(input_dir)
    elif infer_cfg['input_structure'] in ['t']:
        # - ToshoData mode
        # input_root
        #  └── tosho_19XX_bunkei
        #      └── R[7桁連番]_pp.jp2※画像データ
        infer_cfg['input_dirs'] = []
        for input_dir in glob.glob(os.path.join(infer_cfg['input_root'], '*')):
            if os.path.isdir(input_dir):
                infer_cfg['input_dirs'].append(input_dir)
        if 'img' in [os.path.basename(d) for d in infer_cfg['input_dirs']]:
            print('[WARNING] This input structure might be single input(img diretctory found)')
    elif infer_cfg['input_structure'] in ['w']:
        # - Work station input mode
        # input_root
        #  └── workstation
        #      └── [collect(3桁数字)、またはdigital(3桁数字)]フォルダ
        #           └── [15桁連番]フォルダ※PID上1桁目
        #                └── [3桁連番]フォルダ※PID上2~4桁目
        #                     └── [3桁連番]フォルダ※PID上5~7桁目
        #                          └── R[7桁連番]_contents.jp2※画像データ

        # recursive function to get input_dirs in workstation mode
        def get_input_dirs(path, depth):
            depth += 1
            ret_list = []
            current_list = []
            for input_dir in glob.glob(os.path.join(path, '*')):
                if os.path.isdir(input_dir):
                    current_list.append(input_dir)
            if depth > 3:
                return current_list
            if (depth < 2) and (len(current_list) == 0):
                print('[ERROR] Input directory structure dose not match workstation mode', file=sys.stderr)
                return []
            for dir in current_list:
                tmp_list = get_input_dirs(dir, depth)
                ret_list.extend(tmp_list)
            return ret_list

        # check if workstation directory exist
        work_dir = os.path.join(infer_cfg['input_root'], 'workstation')
        if not os.path.isdir(work_dir):
            print('[ERROR] \'workstation\' directory not found', file=sys.stderr)
            return None

        # get input dir list
        infer_cfg['input_dirs'] = get_input_dirs(work_dir, 0)
    elif infer_cfg['input_structure'] in ['f']:
        # - Image file input mode
        # input_root is equal to input image file path
        infer_cfg['input_dirs'] = [infer_cfg['input_root']]
    else:
        print('[ERROR] Unexpected input directory structure type: {0}.'.format(infer_cfg['input_structure']), file=sys.stderr)
        return None

    return infer_cfg


def save_xml(xml_to_save, path):
    """
    指定されたファイルパスにXMLファイル保存します。

    Parameters
    ----------
    path : str
        XMLファイルを保存するファイルパス。

    """
    print('### save xml : {}###'.format(path))
    try:
        xml_to_save.write(path, encoding='utf-8', xml_declaration=True)
    except OSError as err:
        print("[ERROR] XML save error : {0}".format(err), file=sys.stderr)
        raise OSError
    return


def mkdir_with_duplication_check(dir_path):
    dir_path_to_create = dir_path

    # prepare output root derectory
    while os.path.isdir(dir_path_to_create):
        print('[WARNING] Directory {0} already exist.'.format(dir_path))
        now = datetime.datetime.now()
        time_stamp = now.strftime('_%Y%m%d%H%M%S')
        dir_path_to_create += time_stamp

    if dir_path_to_create != dir_path:
        print('[WARNING] Directory is changed to {0}.'.format(dir_path_to_create))
    os.mkdir(dir_path_to_create)

    return dir_path_to_create