Spaces:

tomofi
/

NDLOCR

Build error

App Files Files Community

NDLOCR / src /ndl_layout /tools /process.py

3v324v23

Add files

c9019cd over 2 years ago

raw

history blame

9.68 kB

	#!/usr/bin/env python

	# Copyright (c) 2022, National Diet Library, Japan
	#
	# This software is released under the CC BY 4.0.
	# https://creativecommons.org/licenses/by/4.0/

	import sys
	import os
	from pathlib import Path
	from .utils import auto_run
	from typing import List
	import xml.etree.ElementTree as ET

	import mmcv
	from mmdet.apis import (inference_detector, init_detector)


	def generate_class_colors(class_num):
	import cv2
	import numpy as np
	colors = 255 * np.ones((class_num, 3), dtype=np.uint8)
	colors[:, 0] = np.linspace(0, 179, class_num)
	colors = cv2.cvtColor(colors[None, ...], cv2.COLOR_HSV2BGR)[0]
	return colors


	def draw_legand(img, origin, classes, colors, ssz: int = 16):
	import cv2
	c_num = len(classes)
	x, y = origin[0], origin[1]
	for c in range(c_num):
	color = colors[c]
	color = (int(color[0]), int(color[1]), int(color[2]))
	text = classes[c]
	img = cv2.rectangle(img, (x, y), (x + ssz - 1, y + ssz - 1), color, -1)
	img = cv2.putText(img, text, (x + ssz, y + ssz), cv2.FONT_HERSHEY_PLAIN,
	1, (255, 0, 0), 1, cv2.LINE_AA)
	y += ssz
	return img


	class LayoutDetector:
	def __init__(self, config: str, checkpoint: str, device: str):
	print(f'load from config={config}, checkpoint={checkpoint}')
	self.load(config, checkpoint, device)
	cfg = mmcv.Config.fromfile(config)
	self.classes = cfg.classes
	self.colors = generate_class_colors(len(self.classes))

	def load(self, config: str, checkpoint: str, device: str):
	self.model = init_detector(config, checkpoint, device)

	def predict(self, img_path: str):
	return inference_detector(self.model, img_path)

	def show(self, img_path: str, result, score_thr: float = 0.3, border: int = 3, show_legand: bool = True):
	import cv2
	img = cv2.imread(img_path)
	for c in range(len(result)):
	color = self.colors[c]
	color = (int(color[0]), int(color[1]), int(color[2]))
	for pred in result[c]:
	if float(pred[4]) < score_thr:
	continue
	x0, y0 = int(pred[0]), int(pred[1])
	x1, y1 = int(pred[2]), int(pred[3])
	img = cv2.rectangle(img, (x0, y0), (x1, y1), color, border)

	sz = max(img.shape[0], img.shape[1])
	scale = 1024.0 / sz
	img = cv2.resize(img, dsize=None, fx=scale, fy=scale)

	if show_legand:
	ssz = 16
	c_num = len(self.classes)
	org_width = img.shape[1]
	img = cv2.copyMakeBorder(
	img, 0, 0, 0, 8 * c_num, cv2.BORDER_REPLICATE)
	x = org_width
	y = img.shape[0] - ssz * c_num
	img = draw_legand(img, (x, y),
	self.classes, self.colors, ssz=ssz)

	return img

	def draw_rects_with_data(self, img, result, score_thr: float = 0.3, border: int = 3, show_legand: bool = True):
	import cv2
	for c in range(len(result)):
	color = self.colors[c]
	color = (int(color[0]), int(color[1]), int(color[2]))
	for pred in result[c]:
	if float(pred[4]) < score_thr:
	continue
	x0, y0 = int(pred[0]), int(pred[1])
	x1, y1 = int(pred[2]), int(pred[3])
	img = cv2.rectangle(img, (x0, y0), (x1, y1), color, border)

	sz = max(img.shape[0], img.shape[1])
	scale = 1024.0 / sz
	img = cv2.resize(img, dsize=None, fx=scale, fy=scale)

	if show_legand:
	ssz = 16
	c_num = len(self.classes)
	org_width = img.shape[1]
	img = cv2.copyMakeBorder(
	img, 0, 0, 0, 8 * c_num, cv2.BORDER_REPLICATE)
	x = org_width
	y = img.shape[0] - ssz * c_num
	img = draw_legand(img, (x, y), self.classes, self.colors, ssz=ssz)

	return img


	def convert_to_xml_string(img_path, classes, result, score_thr: float = 0.3):
	import cv2
	img = cv2.imread(img_path)
	img_h, img_w = img.shape[0:2]

	from .ndl_parser import name_to_org_name

	img_name = os.path.basename(img_path)
	s = f'<PAGE IMAGENAME = "{img_name}" WIDTH = "{img_w}" HEIGHT = "{img_h}">\n'

	for c in range(len(classes)):
	cls = classes[c]
	if cls.startswith('line_'):
	for line in result[c]:
	conf = float(line[4])
	if conf < score_thr:
	continue
	x, y = int(line[0]), int(line[1])
	w, h = int(line[2] - line[0]), int(line[3] - line[1])
	s += f'<LINE TYPE = "{name_to_org_name(cls)}" X = "{x}" Y = "{y}" WIDTH = "{w}" HEIGHT = "{h}" CONF = "{conf:0.3f}"></LINE>\n'
	elif cls.startswith('block_'):
	for block in result[c]:
	conf = float(block[4])
	if conf < score_thr:
	continue
	x, y = int(block[0]), int(block[1])
	w, h = int(block[2] - block[0]), int(block[3] - block[1])
	s += f'<BLOCK TYPE = "{name_to_org_name(cls)}" X = "{x}" Y = "{y}" WIDTH = "{w}" HEIGHT = "{h}" CONF = "{conf:0.3f}"></BLOCK>\n'

	s += '</PAGE>\n'
	return s


	def convert_to_xml_string_with_data(img, img_path, classes, result, score_thr: float = 0.3):
	img_h, img_w = img.shape[0:2]

	from .ndl_parser import name_to_org_name

	img_name = os.path.basename(img_path)
	s = f'<PAGE IMAGENAME = "{img_name}" WIDTH = "{img_w}" HEIGHT = "{img_h}">\n'

	for c in range(len(classes)):
	cls = classes[c]
	if cls.startswith('line_'):
	for line in result[c]:
	conf = float(line[4])
	if conf < score_thr:
	continue
	x, y = int(line[0]), int(line[1])
	w, h = int(line[2] - line[0]), int(line[3] - line[1])
	s += f'<LINE TYPE = "{name_to_org_name(cls)}" X = "{x}" Y = "{y}" WIDTH = "{w}" HEIGHT = "{h}" CONF = "{conf:0.3f}"></LINE>\n'
	elif cls.startswith('block_'):
	for block in result[c]:
	conf = float(block[4])
	if conf < score_thr:
	continue
	x, y = int(block[0]), int(block[1])
	w, h = int(block[2] - block[0]), int(block[3] - block[1])
	s += f'<BLOCK TYPE = "{name_to_org_name(cls)}" X = "{x}" Y = "{y}" WIDTH = "{w}" HEIGHT = "{h}" CONF = "{conf:0.3f}"></BLOCK>\n'

	s += '</PAGE>\n'
	return s


	def run_layout_detection(img_paths: List[str] = None, list_path: str = None, output_path: str = "layout_prediction.xml",
	config: str = './models/config_file.py',
	checkpoint: str = './models/trained_weights.pth',
	device: str = 'cuda:0', score_thr: float = 0.3, use_show: bool = False, dump_dir: str = None):
	detector = LayoutDetector(config, checkpoint, device)
	if list_path is not None:
	img_paths = list([s.strip() for s in open(list_path).readlines()])
	if img_paths is None:
	print('Please specify --img_paths or --list_path')
	return -1
	if dump_dir is not None:
	Path(dump_dir).mkdir(exist_ok=True)
	with open(output_path, 'w') as f:
	def tee(s):
	print(s, file=f, end="")
	print(s, file=sys.stdout, end="")

	tee('<?xml version="1.0" encoding="utf-8" standalone="yes"?><OCRDATASET xmlns="">\n')

	for img_path in img_paths:
	result = detector.predict(img_path)
	xml_str = convert_to_xml_string(
	img_path, detector.classes, result, score_thr=score_thr)
	tee(xml_str)

	if use_show:
	import cv2
	img = detector.show(img_path, result, score_thr=score_thr)
	cv2.namedWindow('show')
	cv2.imshow('show', img)
	if 27 == cv2.waitKey(0):
	break

	if dump_dir is not None:
	import cv2
	img = detector.show(img_path, result, score_thr=score_thr)
	cv2.imwrite(str(Path(dump_dir) / Path(img_path).name), img)

	tee('</OCRDATASET>\n')


	class InferencerWithCLI:
	def __init__(self, conf_dict):
	config = conf_dict['config_path']
	checkpoint = conf_dict['checkpoint_path']
	device = conf_dict['device']
	self.detector = LayoutDetector(config, checkpoint, device)

	def inference_wich_cli(self, img=None, img_path='',
	score_thr: float = 0.3, dump: bool = False):
	ET.register_namespace('', 'NDLOCRDATASET')
	node = ET.fromstring(
	'<?xml version="1.0" encoding="utf-8" standalone="yes"?><OCRDATASET xmlns="">\n</OCRDATASET>\n')

	# prediction
	if self.detector is None:
	print('ERROR: Layout detector is not created.')
	return None
	result = self.detector.predict(img)

	# xml creation
	xml_str = convert_to_xml_string_with_data(
	img, img_path, self.detector.classes, result, score_thr=score_thr)

	# xml conversion from string
	result_xml = ET.fromstring(xml_str)
	node.append(result_xml)
	tree = ET.ElementTree(node)

	# xml conversion from string
	dump_img = None
	if dump is not None:
	dump_img = self.detector.draw_rects_with_data(
	img, result, score_thr=score_thr)

	return {'xml': tree, 'dump_img': dump_img}


	if __name__ == '__main__':
	auto_run(run_layout_detection)