Spaces:

tomofi
/

MMOCR

Runtime error

App Files Files Community

MMOCR / tools /data /textrecog /seg_synthtext_converter.py

tomofi

Add application file

2366e36 over 2 years ago

raw

history blame

2.69 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	import argparse
	import json
	import os.path as osp

	import cv2

	from mmocr.utils import list_from_file, list_to_file


	def parse_old_label(data_root, in_path, img_size=False):
	imgid2imgname = {}
	imgid2anno = {}
	idx = 0
	for line in list_from_file(in_path):
	line = line.strip().split()
	img_full_path = osp.join(data_root, line[0])
	if not osp.exists(img_full_path):
	continue
	ann_file = osp.join(data_root, line[1])
	if not osp.exists(ann_file):
	continue

	img_info = {}
	img_info['file_name'] = line[0]
	if img_size:
	img = cv2.imread(img_full_path)
	h, w = img.shape[:2]
	img_info['height'] = h
	img_info['width'] = w
	imgid2imgname[idx] = img_info

	imgid2anno[idx] = []
	char_annos = []
	for t, ann_line in enumerate(list_from_file(ann_file)):
	ann_line = ann_line.strip()
	if t == 0:
	img_info['text'] = ann_line
	else:
	char_box = [float(x) for x in ann_line.split()]
	char_text = img_info['text'][t - 1]
	char_ann = dict(char_box=char_box, char_text=char_text)
	char_annos.append(char_ann)
	imgid2anno[idx] = char_annos
	idx += 1

	return imgid2imgname, imgid2anno


	def gen_line_dict_file(out_path, imgid2imgname, imgid2anno, img_size=False):
	lines = []
	for key, value in imgid2imgname.items():
	if key in imgid2anno:
	anno = imgid2anno[key]
	line_dict = {}
	line_dict['file_name'] = value['file_name']
	line_dict['text'] = value['text']
	if img_size:
	line_dict['height'] = value['height']
	line_dict['width'] = value['width']
	line_dict['annotations'] = anno
	lines.append(json.dumps(line_dict))
	list_to_file(out_path, lines)


	def parse_args():
	parser = argparse.ArgumentParser()
	parser.add_argument(
	'--data-root', help='data root for both image file and anno file')
	parser.add_argument(
	'--in-path',
	help='mapping file of image_name and ann_file,'
	' "image_name ann_file" in each line')
	parser.add_argument(
	'--out-path', help='output txt path with line-json format')

	args = parser.parse_args()
	return args


	def main():
	args = parse_args()
	imgid2imgname, imgid2anno = parse_old_label(args.data_root, args.in_path)
	gen_line_dict_file(args.out_path, imgid2imgname, imgid2anno)
	print('finish')


	if __name__ == '__main__':
	main()