|
|
|
"""This script helps to convert labelme-style dataset to the coco format. |
|
|
|
Usage: |
|
$ python labelme2coco.py \ |
|
--img-dir /path/to/images \ |
|
--labels-dir /path/to/labels \ |
|
--out /path/to/coco_instances.json \ |
|
[--class-id-txt /path/to/class_with_id.txt] |
|
|
|
Note: |
|
Labels dir file structure: |
|
. |
|
βββ PATH_TO_LABELS |
|
βββ image1.json |
|
βββ image2.json |
|
βββ ... |
|
|
|
Images dir file structure: |
|
. |
|
βββ PATH_TO_IMAGES |
|
βββ image1.jpg |
|
βββ image2.png |
|
βββ ... |
|
|
|
If user set `--class-id-txt` then will use it in `categories` field, |
|
if not set, then will generate auto base on the all labelme label |
|
files to `class_with_id.json`. |
|
|
|
class_with_id.txt example, each line is "id class_name": |
|
```text |
|
1 cat |
|
2 dog |
|
3 bicycle |
|
4 motorcycle |
|
|
|
``` |
|
""" |
|
import argparse |
|
import json |
|
from pathlib import Path |
|
from typing import Optional |
|
|
|
import numpy as np |
|
from mmengine import track_iter_progress |
|
|
|
from mmyolo.utils.misc import IMG_EXTENSIONS |
|
|
|
|
|
def parse_args(): |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument('--img-dir', type=str, help='Dataset image directory') |
|
parser.add_argument( |
|
'--labels-dir', type=str, help='Dataset labels directory') |
|
parser.add_argument('--out', type=str, help='COCO label json output path') |
|
parser.add_argument( |
|
'--class-id-txt', default=None, type=str, help='All class id txt path') |
|
args = parser.parse_args() |
|
return args |
|
|
|
|
|
def format_coco_annotations(points: list, image_id: int, annotations_id: int, |
|
category_id: int) -> dict: |
|
"""Gen COCO annotations format label from labelme format label. |
|
|
|
Args: |
|
points (list): Coordinates of four vertices of rectangle bbox. |
|
image_id (int): Image id. |
|
annotations_id (int): Annotations id. |
|
category_id (int): Image dir path. |
|
|
|
Return: |
|
annotation_info (dict): COCO annotation data. |
|
""" |
|
annotation_info = dict() |
|
annotation_info['iscrowd'] = 0 |
|
annotation_info['category_id'] = category_id |
|
annotation_info['id'] = annotations_id |
|
annotation_info['image_id'] = image_id |
|
|
|
|
|
annotation_info['bbox'] = [ |
|
points[0][0], points[0][1], points[1][0] - points[0][0], |
|
points[1][1] - points[0][1] |
|
] |
|
|
|
annotation_info['area'] = annotation_info['bbox'][2] * annotation_info[ |
|
'bbox'][3] |
|
segmentation_points = np.asarray(points).copy() |
|
segmentation_points[1, :] = np.asarray(points)[2, :] |
|
segmentation_points[2, :] = np.asarray(points)[1, :] |
|
annotation_info['segmentation'] = [list(segmentation_points.flatten())] |
|
|
|
return annotation_info |
|
|
|
|
|
def parse_labelme_to_coco( |
|
image_dir: str, |
|
labels_root: str, |
|
all_classes_id: Optional[dict] = None) -> (dict, dict): |
|
"""Gen COCO json format label from labelme format label. |
|
|
|
Args: |
|
image_dir (str): Image dir path. |
|
labels_root (str): Image label root path. |
|
all_classes_id (Optional[dict]): All class with id. Default None. |
|
|
|
Return: |
|
coco_json (dict): COCO json data. |
|
category_to_id (dict): category id and name. |
|
|
|
COCO json example: |
|
|
|
{ |
|
"images": [ |
|
{ |
|
"height": 3000, |
|
"width": 4000, |
|
"id": 1, |
|
"file_name": "IMG_20210627_225110.jpg" |
|
}, |
|
... |
|
], |
|
"categories": [ |
|
{ |
|
"id": 1, |
|
"name": "cat" |
|
}, |
|
... |
|
], |
|
"annotations": [ |
|
{ |
|
"iscrowd": 0, |
|
"category_id": 1, |
|
"id": 1, |
|
"image_id": 1, |
|
"bbox": [ |
|
1183.7313232421875, |
|
1230.0509033203125, |
|
1270.9998779296875, |
|
927.0848388671875 |
|
], |
|
"area": 1178324.7170306593, |
|
"segmentation": [ |
|
[ |
|
1183.7313232421875, |
|
1230.0509033203125, |
|
1183.7313232421875, |
|
2157.1357421875, |
|
2454.731201171875, |
|
2157.1357421875, |
|
2454.731201171875, |
|
1230.0509033203125 |
|
] |
|
] |
|
}, |
|
... |
|
] |
|
} |
|
""" |
|
|
|
|
|
coco_json = {'images': [], 'categories': [], 'annotations': []} |
|
|
|
image_id = 0 |
|
annotations_id = 0 |
|
if all_classes_id is None: |
|
category_to_id = dict() |
|
categories_labels = [] |
|
else: |
|
category_to_id = all_classes_id |
|
categories_labels = list(all_classes_id.keys()) |
|
|
|
|
|
for class_name, class_id in category_to_id.items(): |
|
coco_json['categories'].append({ |
|
'id': class_id, |
|
'name': class_name |
|
}) |
|
|
|
|
|
img_file_list = [ |
|
img_file for img_file in Path(image_dir).iterdir() |
|
if img_file.suffix.lower() in IMG_EXTENSIONS |
|
] |
|
|
|
for img_file in track_iter_progress(img_file_list): |
|
|
|
|
|
label_path = Path(labels_root).joinpath( |
|
img_file.stem).with_suffix('.json') |
|
if not label_path.exists(): |
|
print(f'Can not find label file: {label_path}, skip...') |
|
continue |
|
|
|
|
|
with open(label_path, encoding='utf-8') as f: |
|
labelme_data = json.load(f) |
|
|
|
image_id = image_id + 1 |
|
|
|
|
|
coco_json['images'].append({ |
|
'height': |
|
labelme_data['imageHeight'], |
|
'width': |
|
labelme_data['imageWidth'], |
|
'id': |
|
image_id, |
|
'file_name': |
|
Path(labelme_data['imagePath']).name |
|
}) |
|
|
|
for label_shapes in labelme_data['shapes']: |
|
|
|
|
|
class_name = label_shapes['label'] |
|
|
|
if (all_classes_id is None) and (class_name |
|
not in categories_labels): |
|
|
|
coco_json['categories'].append({ |
|
'id': |
|
len(categories_labels) + 1, |
|
'name': class_name |
|
}) |
|
categories_labels.append(class_name) |
|
category_to_id[class_name] = len(categories_labels) |
|
|
|
elif (all_classes_id is not None) and (class_name |
|
not in categories_labels): |
|
|
|
raise ValueError(f'Got unexpected class name {class_name}, ' |
|
'which is not in your `--class-id-txt`.') |
|
|
|
|
|
shape_type = label_shapes['shape_type'] |
|
if shape_type != 'rectangle': |
|
print(f'not support `{shape_type}` yet, skip...') |
|
continue |
|
|
|
annotations_id = annotations_id + 1 |
|
|
|
(x1, y1), (x2, y2) = label_shapes['points'] |
|
x1, x2 = sorted([x1, x2]) |
|
y1, y2 = sorted([y1, y2]) |
|
points = [[x1, y1], [x2, y2], [x1, y2], [x2, y1]] |
|
coco_annotations = format_coco_annotations( |
|
points, image_id, annotations_id, category_to_id[class_name]) |
|
coco_json['annotations'].append(coco_annotations) |
|
|
|
print(f'Total image = {image_id}') |
|
print(f'Total annotations = {annotations_id}') |
|
print(f'Number of categories = {len(categories_labels)}, ' |
|
f'which is {categories_labels}') |
|
|
|
return coco_json, category_to_id |
|
|
|
|
|
def convert_labelme_to_coco(image_dir: str, |
|
labels_dir: str, |
|
out_path: str, |
|
class_id_txt: Optional[str] = None): |
|
"""Convert labelme format label to COCO json format label. |
|
|
|
Args: |
|
image_dir (str): Image dir path. |
|
labels_dir (str): Image label path. |
|
out_path (str): COCO json file save path. |
|
class_id_txt (Optional[str]): All class id txt file path. |
|
Default None. |
|
""" |
|
assert Path(out_path).suffix == '.json' |
|
|
|
if class_id_txt is not None: |
|
assert Path(class_id_txt).suffix == '.txt' |
|
|
|
all_classes_id = dict() |
|
with open(class_id_txt, encoding='utf-8') as f: |
|
txt_lines = f.read().splitlines() |
|
assert len(txt_lines) > 0 |
|
|
|
for txt_line in txt_lines: |
|
class_info = txt_line.split(' ') |
|
if len(class_info) != 2: |
|
raise ValueError('Error parse "class_id_txt" file ' |
|
f'{class_id_txt}, please check if some of ' |
|
'the class names is blank, like "1 " -> ' |
|
'"1 blank", or class name has space between' |
|
' words, like "1 Big house" -> "1 ' |
|
'Big-house".') |
|
v, k = class_info |
|
all_classes_id.update({k: int(v)}) |
|
else: |
|
all_classes_id = None |
|
|
|
|
|
coco_json_data, category_to_id = parse_labelme_to_coco( |
|
image_dir, labels_dir, all_classes_id) |
|
|
|
|
|
Path(out_path).parent.mkdir(exist_ok=True, parents=True) |
|
print(f'Saving json to {out_path}') |
|
json.dump(coco_json_data, open(out_path, 'w'), indent=2) |
|
|
|
if class_id_txt is None: |
|
category_to_id_path = Path(out_path).with_name('class_with_id.txt') |
|
print(f'Saving class id txt to {category_to_id_path}') |
|
with open(category_to_id_path, 'w', encoding='utf-8') as f: |
|
for k, v in category_to_id.items(): |
|
f.write(f'{v} {k}\n') |
|
else: |
|
print('Not Saving new class id txt, user should using ' |
|
f'{class_id_txt} for training config') |
|
|
|
|
|
def main(): |
|
args = parse_args() |
|
convert_labelme_to_coco(args.img_dir, args.labels_dir, args.out, |
|
args.class_id_txt) |
|
print('All done!') |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |
|
|