|
import argparse |
|
import os.path as osp |
|
import xml.etree.ElementTree as ET |
|
|
|
import mmcv |
|
import numpy as np |
|
|
|
from mmdet.core import voc_classes |
|
|
|
label_ids = {name: i for i, name in enumerate(voc_classes())} |
|
|
|
|
|
def parse_xml(args): |
|
xml_path, img_path = args |
|
tree = ET.parse(xml_path) |
|
root = tree.getroot() |
|
size = root.find('size') |
|
w = int(size.find('width').text) |
|
h = int(size.find('height').text) |
|
bboxes = [] |
|
labels = [] |
|
bboxes_ignore = [] |
|
labels_ignore = [] |
|
for obj in root.findall('object'): |
|
name = obj.find('name').text |
|
label = label_ids[name] |
|
difficult = int(obj.find('difficult').text) |
|
bnd_box = obj.find('bndbox') |
|
bbox = [ |
|
int(bnd_box.find('xmin').text), |
|
int(bnd_box.find('ymin').text), |
|
int(bnd_box.find('xmax').text), |
|
int(bnd_box.find('ymax').text) |
|
] |
|
if difficult: |
|
bboxes_ignore.append(bbox) |
|
labels_ignore.append(label) |
|
else: |
|
bboxes.append(bbox) |
|
labels.append(label) |
|
if not bboxes: |
|
bboxes = np.zeros((0, 4)) |
|
labels = np.zeros((0, )) |
|
else: |
|
bboxes = np.array(bboxes, ndmin=2) - 1 |
|
labels = np.array(labels) |
|
if not bboxes_ignore: |
|
bboxes_ignore = np.zeros((0, 4)) |
|
labels_ignore = np.zeros((0, )) |
|
else: |
|
bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1 |
|
labels_ignore = np.array(labels_ignore) |
|
annotation = { |
|
'filename': img_path, |
|
'width': w, |
|
'height': h, |
|
'ann': { |
|
'bboxes': bboxes.astype(np.float32), |
|
'labels': labels.astype(np.int64), |
|
'bboxes_ignore': bboxes_ignore.astype(np.float32), |
|
'labels_ignore': labels_ignore.astype(np.int64) |
|
} |
|
} |
|
return annotation |
|
|
|
|
|
def cvt_annotations(devkit_path, years, split, out_file): |
|
if not isinstance(years, list): |
|
years = [years] |
|
annotations = [] |
|
for year in years: |
|
filelist = osp.join(devkit_path, |
|
f'VOC{year}/ImageSets/Main/{split}.txt') |
|
if not osp.isfile(filelist): |
|
print(f'filelist does not exist: {filelist}, ' |
|
f'skip voc{year} {split}') |
|
return |
|
img_names = mmcv.list_from_file(filelist) |
|
xml_paths = [ |
|
osp.join(devkit_path, f'VOC{year}/Annotations/{img_name}.xml') |
|
for img_name in img_names |
|
] |
|
img_paths = [ |
|
f'VOC{year}/JPEGImages/{img_name}.jpg' for img_name in img_names |
|
] |
|
part_annotations = mmcv.track_progress(parse_xml, |
|
list(zip(xml_paths, img_paths))) |
|
annotations.extend(part_annotations) |
|
if out_file.endswith('json'): |
|
annotations = cvt_to_coco_json(annotations) |
|
mmcv.dump(annotations, out_file) |
|
return annotations |
|
|
|
|
|
def cvt_to_coco_json(annotations): |
|
image_id = 0 |
|
annotation_id = 0 |
|
coco = dict() |
|
coco['images'] = [] |
|
coco['type'] = 'instance' |
|
coco['categories'] = [] |
|
coco['annotations'] = [] |
|
image_set = set() |
|
|
|
def addAnnItem(annotation_id, image_id, category_id, bbox, difficult_flag): |
|
annotation_item = dict() |
|
annotation_item['segmentation'] = [] |
|
|
|
seg = [] |
|
|
|
|
|
seg.append(int(bbox[0])) |
|
seg.append(int(bbox[1])) |
|
|
|
seg.append(int(bbox[0])) |
|
seg.append(int(bbox[3])) |
|
|
|
seg.append(int(bbox[2])) |
|
seg.append(int(bbox[3])) |
|
|
|
seg.append(int(bbox[2])) |
|
seg.append(int(bbox[1])) |
|
|
|
annotation_item['segmentation'].append(seg) |
|
|
|
xywh = np.array( |
|
[bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]]) |
|
annotation_item['area'] = int(xywh[2] * xywh[3]) |
|
if difficult_flag == 1: |
|
annotation_item['ignore'] = 0 |
|
annotation_item['iscrowd'] = 1 |
|
else: |
|
annotation_item['ignore'] = 0 |
|
annotation_item['iscrowd'] = 0 |
|
annotation_item['image_id'] = int(image_id) |
|
annotation_item['bbox'] = xywh.astype(int).tolist() |
|
annotation_item['category_id'] = int(category_id) |
|
annotation_item['id'] = int(annotation_id) |
|
coco['annotations'].append(annotation_item) |
|
return annotation_id + 1 |
|
|
|
for category_id, name in enumerate(voc_classes()): |
|
category_item = dict() |
|
category_item['supercategory'] = str('none') |
|
category_item['id'] = int(category_id) |
|
category_item['name'] = str(name) |
|
coco['categories'].append(category_item) |
|
|
|
for ann_dict in annotations: |
|
file_name = ann_dict['filename'] |
|
ann = ann_dict['ann'] |
|
assert file_name not in image_set |
|
image_item = dict() |
|
image_item['id'] = int(image_id) |
|
image_item['file_name'] = str(file_name) |
|
image_item['height'] = int(ann_dict['height']) |
|
image_item['width'] = int(ann_dict['width']) |
|
coco['images'].append(image_item) |
|
image_set.add(file_name) |
|
|
|
bboxes = ann['bboxes'][:, :4] |
|
labels = ann['labels'] |
|
for bbox_id in range(len(bboxes)): |
|
bbox = bboxes[bbox_id] |
|
label = labels[bbox_id] |
|
annotation_id = addAnnItem( |
|
annotation_id, image_id, label, bbox, difficult_flag=0) |
|
|
|
bboxes_ignore = ann['bboxes_ignore'][:, :4] |
|
labels_ignore = ann['labels_ignore'] |
|
for bbox_id in range(len(bboxes_ignore)): |
|
bbox = bboxes_ignore[bbox_id] |
|
label = labels_ignore[bbox_id] |
|
annotation_id = addAnnItem( |
|
annotation_id, image_id, label, bbox, difficult_flag=1) |
|
|
|
image_id += 1 |
|
|
|
return coco |
|
|
|
|
|
def parse_args(): |
|
parser = argparse.ArgumentParser( |
|
description='Convert PASCAL VOC annotations to mmdetection format') |
|
parser.add_argument('devkit_path', help='pascal voc devkit path') |
|
parser.add_argument('-o', '--out-dir', help='output path') |
|
parser.add_argument( |
|
'--out-format', |
|
default='pkl', |
|
choices=('pkl', 'coco'), |
|
help='output format, "coco" indicates coco annotation format') |
|
args = parser.parse_args() |
|
return args |
|
|
|
|
|
def main(): |
|
args = parse_args() |
|
devkit_path = args.devkit_path |
|
out_dir = args.out_dir if args.out_dir else devkit_path |
|
mmcv.mkdir_or_exist(out_dir) |
|
|
|
years = [] |
|
if osp.isdir(osp.join(devkit_path, 'VOC2007')): |
|
years.append('2007') |
|
if osp.isdir(osp.join(devkit_path, 'VOC2012')): |
|
years.append('2012') |
|
if '2007' in years and '2012' in years: |
|
years.append(['2007', '2012']) |
|
if not years: |
|
raise IOError(f'The devkit path {devkit_path} contains neither ' |
|
'"VOC2007" nor "VOC2012" subfolder') |
|
out_fmt = f'.{args.out_format}' |
|
if args.out_format == 'coco': |
|
out_fmt = '.json' |
|
for year in years: |
|
if year == '2007': |
|
prefix = 'voc07' |
|
elif year == '2012': |
|
prefix = 'voc12' |
|
elif year == ['2007', '2012']: |
|
prefix = 'voc0712' |
|
for split in ['train', 'val', 'trainval']: |
|
dataset_name = prefix + '_' + split |
|
print(f'processing {dataset_name} ...') |
|
cvt_annotations(devkit_path, year, split, |
|
osp.join(out_dir, dataset_name + out_fmt)) |
|
if not isinstance(year, list): |
|
dataset_name = prefix + '_test' |
|
print(f'processing {dataset_name} ...') |
|
cvt_annotations(devkit_path, year, 'test', |
|
osp.join(out_dir, dataset_name + out_fmt)) |
|
print('Done!') |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |
|
|