Spaces:
Runtime error
Runtime error
# Copyright (c) OpenMMLab. All rights reserved. | |
import argparse | |
import json | |
import os.path as osp | |
import cv2 | |
from mmocr.utils import list_from_file, list_to_file | |
def parse_old_label(data_root, in_path, img_size=False): | |
imgid2imgname = {} | |
imgid2anno = {} | |
idx = 0 | |
for line in list_from_file(in_path): | |
line = line.strip().split() | |
img_full_path = osp.join(data_root, line[0]) | |
if not osp.exists(img_full_path): | |
continue | |
ann_file = osp.join(data_root, line[1]) | |
if not osp.exists(ann_file): | |
continue | |
img_info = {} | |
img_info['file_name'] = line[0] | |
if img_size: | |
img = cv2.imread(img_full_path) | |
h, w = img.shape[:2] | |
img_info['height'] = h | |
img_info['width'] = w | |
imgid2imgname[idx] = img_info | |
imgid2anno[idx] = [] | |
char_annos = [] | |
for t, ann_line in enumerate(list_from_file(ann_file)): | |
ann_line = ann_line.strip() | |
if t == 0: | |
img_info['text'] = ann_line | |
else: | |
char_box = [float(x) for x in ann_line.split()] | |
char_text = img_info['text'][t - 1] | |
char_ann = dict(char_box=char_box, char_text=char_text) | |
char_annos.append(char_ann) | |
imgid2anno[idx] = char_annos | |
idx += 1 | |
return imgid2imgname, imgid2anno | |
def gen_line_dict_file(out_path, imgid2imgname, imgid2anno, img_size=False): | |
lines = [] | |
for key, value in imgid2imgname.items(): | |
if key in imgid2anno: | |
anno = imgid2anno[key] | |
line_dict = {} | |
line_dict['file_name'] = value['file_name'] | |
line_dict['text'] = value['text'] | |
if img_size: | |
line_dict['height'] = value['height'] | |
line_dict['width'] = value['width'] | |
line_dict['annotations'] = anno | |
lines.append(json.dumps(line_dict)) | |
list_to_file(out_path, lines) | |
def parse_args(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
'--data-root', help='data root for both image file and anno file') | |
parser.add_argument( | |
'--in-path', | |
help='mapping file of image_name and ann_file,' | |
' "image_name ann_file" in each line') | |
parser.add_argument( | |
'--out-path', help='output txt path with line-json format') | |
args = parser.parse_args() | |
return args | |
def main(): | |
args = parse_args() | |
imgid2imgname, imgid2anno = parse_old_label(args.data_root, args.in_path) | |
gen_line_dict_file(args.out_path, imgid2imgname, imgid2anno) | |
print('finish') | |
if __name__ == '__main__': | |
main() | |