agucci commited on
Commit
3b82ac6
1 Parent(s): 7b8fc5f

Initial test

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ inptest.jpg filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ __pycache__
2
+ **/*.jpg
.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "yolov5"]
2
+ path = yolov5
3
+ url = https://github.com/ultralytics/yolov5
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from glob import glob
2
+ from PIL import Image
3
+ from ultralytics import YOLO
4
+ from utils import draw_bbox
5
+ import gradio as gr
6
+ import numpy as np
7
+ import subprocess
8
+
9
+
10
+ with gr.Blocks() as demo:
11
+ gr.Markdown("Detect planes demo.")
12
+
13
+ models=["SSD", "FasterRCNN", "CenterNet", "RetinaNet", "DETR", "RTMDET", "YOLOv5", "YOLOv8"]
14
+
15
+ with gr.Tab("Image"):
16
+ with gr.Row():
17
+ with gr.Column():
18
+ image_input_single = gr.Image()
19
+ image_output = gr.Image(visible = True)
20
+ with gr.Row():
21
+ drop = gr.Dropdown([m for m in models], label="Model selection", type ="index", value=models[0])
22
+ image_button = gr.Button("Detect", variant = 'primary')
23
+ with gr.Column(visible=True) as output_row:
24
+ object_count = gr.Textbox(value = 0,label="Aircrafts detected")
25
+
26
+ def runmodel(input_img, model_num):
27
+ Image.fromarray(input_img).save(source:="inptest.jpg")
28
+ print("Using model", model_name:=models[model_num])
29
+
30
+ conf = 0.3
31
+
32
+ if model_name in models[:-2]:
33
+ cmd = f"python3 image_inference.py {source} inference/{model_name.lower()}_config.py --weights inference/models/{model_name.lower()}best.pth --out-dir inference/results/{model_name.lower()}_inference --pred-score-thr {conf}"
34
+ subprocess.run(cmd, shell=True)
35
+ im, count = draw_bbox(model_name.lower())
36
+
37
+ if model_name == "YOLOv5":
38
+ cmd = f"python3 yolov5/detect.py --weights inference/models/yolov5best.pt --source {source} --save-txt --save-conf --project inference/results/yolov5_inference --name predict"
39
+ subprocess.run(cmd, shell=True)
40
+ im, count = draw_bbox(model_name.lower())
41
+
42
+ if model_name == "YOLOv8":
43
+ model = YOLO('inference/models/yolov8best.pt')
44
+ results = model(source, imgsz=1024, conf = conf, save_txt = True, save_conf = True, save = True, project = "inference/results/yolov8_inference")
45
+ im, count = draw_bbox(model_name.lower())
46
+
47
+ return im, count
48
+
49
+ image_button.click(runmodel, inputs=[image_input_single, drop], outputs=[image_output, object_count])
50
+
51
+ demo.launch()
image_inference.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from argparse import ArgumentParser
2
+
3
+ from mmengine.logging import print_log
4
+
5
+ from mmdet.apis import DetInferencer
6
+
7
+
8
+ def parse_args():
9
+ parser = ArgumentParser()
10
+ parser.add_argument(
11
+ 'inputs', type=str, help='Input image file or folder path.')
12
+ parser.add_argument(
13
+ 'model',
14
+ type=str,
15
+ help='Config or checkpoint .pth file or the model name '
16
+ 'and alias defined in metafile. The model configuration '
17
+ 'file will try to read from .pth if the parameter is '
18
+ 'a .pth weights file.')
19
+ parser.add_argument('--weights', default=None, help='Checkpoint file')
20
+ parser.add_argument(
21
+ '--out-dir',
22
+ type=str,
23
+ default='outputs',
24
+ help='Output directory of images or prediction results.')
25
+ parser.add_argument('--texts', help='text prompt')
26
+ parser.add_argument(
27
+ '--device', default='cuda:0', help='Device used for inference')
28
+ parser.add_argument(
29
+ '--pred-score-thr',
30
+ type=float,
31
+ default=0.3,
32
+ help='bbox score threshold')
33
+ parser.add_argument(
34
+ '--batch-size', type=int, default=1, help='Inference batch size.')
35
+ parser.add_argument(
36
+ '--show',
37
+ action='store_true',
38
+ help='Display the image in a popup window.')
39
+ parser.add_argument(
40
+ '--no-save-vis',
41
+ action='store_true',
42
+ help='Do not save detection vis results')
43
+ parser.add_argument(
44
+ '--no-save-pred',
45
+ action='store_true',
46
+ help='Do not save detection json results')
47
+ parser.add_argument(
48
+ '--print-result',
49
+ action='store_true',
50
+ help='Whether to print the results.')
51
+ parser.add_argument(
52
+ '--palette',
53
+ default='none',
54
+ choices=['coco', 'voc', 'citys', 'random', 'none'],
55
+ help='Color palette used for visualization')
56
+ # only for GLIP
57
+ parser.add_argument(
58
+ '--custom-entities',
59
+ '-c',
60
+ action='store_true',
61
+ help='Whether to customize entity names? '
62
+ 'If so, the input text should be '
63
+ '"cls_name1 . cls_name2 . cls_name3 ." format')
64
+
65
+ call_args = vars(parser.parse_args())
66
+
67
+ if call_args['no_save_vis'] and call_args['no_save_pred']:
68
+ call_args['out_dir'] = ''
69
+
70
+ if call_args['model'].endswith('.pth'):
71
+ print_log('The model is a weight file, automatically '
72
+ 'assign the model to --weights')
73
+ call_args['weights'] = call_args['model']
74
+ call_args['model'] = None
75
+
76
+ init_kws = ['model', 'weights', 'device', 'palette']
77
+ init_args = {}
78
+ for init_kw in init_kws:
79
+ init_args[init_kw] = call_args.pop(init_kw)
80
+
81
+ return init_args, call_args
82
+
83
+
84
+ def main():
85
+ init_args, call_args = parse_args()
86
+ # TODO: Video and Webcam are currently not supported and
87
+ # may consume too much memory if your input folder has a lot of images.
88
+ # We will be optimized later.
89
+ inferencer = DetInferencer(**init_args)
90
+ inferencer(**call_args)
91
+
92
+ if call_args['out_dir'] != '' and not (call_args['no_save_vis']
93
+ and call_args['no_save_pred']):
94
+ print_log(f'Results have been saved at {call_args["out_dir"]}')
95
+
96
+
97
+ if __name__ == '__main__':
98
+ main()
inference/centernet_config.py ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'CocoDataset'
2
+ data_root = '/home/safouane/Downloads/benchmark_aircraft/data/' # dataset root
3
+ backend_args = None
4
+
5
+ max_epochs = 500
6
+
7
+ metainfo = {
8
+ 'classes': ('airplane', ),
9
+ 'palette': [
10
+ (0, 128, 255),
11
+ ]
12
+ }
13
+ num_classes = 1
14
+
15
+ train_pipeline = [
16
+ dict(type='LoadImageFromFile', backend_args=None),
17
+ dict(type='LoadAnnotations', with_bbox=True),
18
+ dict(
19
+ type='RandomChoiceResize',
20
+ scales=[
21
+ ( 1333, 640, ),
22
+ ( 1333, 672, ),
23
+ ( 1333, 704, ),
24
+ ( 1333, 736, ),
25
+ ( 1333, 768, ),
26
+ ( 1333, 800, ),
27
+ ],
28
+ keep_ratio=True),
29
+ dict(type='RandomFlip', prob=0.5),
30
+ dict(type='PackDetInputs'),
31
+ ]
32
+ test_pipeline = [
33
+ dict(type='LoadImageFromFile', backend_args=None),
34
+ dict(type='Resize', scale=(
35
+ 1333,
36
+ 800,
37
+ ), keep_ratio=True),
38
+ dict(type='LoadAnnotations', with_bbox=True),
39
+ dict(
40
+ type='PackDetInputs',
41
+ meta_keys=(
42
+ 'img_id',
43
+ 'img_path',
44
+ 'ori_shape',
45
+ 'img_shape',
46
+ 'scale_factor',
47
+ )),
48
+ ]
49
+ train_dataloader = dict(
50
+ batch_size=32,
51
+ num_workers=2,
52
+ persistent_workers=True,
53
+ sampler=dict(type='DefaultSampler', shuffle=True),
54
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
55
+ dataset=dict(
56
+ type='CocoDataset',
57
+ metainfo=metainfo,
58
+ data_root=data_root,
59
+ ann_file='train/__coco.json',
60
+ data_prefix=dict(img='train/'),
61
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
62
+ pipeline=[
63
+ dict(type='LoadImageFromFile', backend_args=None),
64
+ dict(type='LoadAnnotations', with_bbox=True),
65
+ dict(
66
+ type='RandomChoiceResize',
67
+ scales=[
68
+ ( 1333, 640, ),
69
+ ( 1333, 672, ),
70
+ ( 1333, 704, ),
71
+ ( 1333, 736, ),
72
+ ( 1333, 768, ),
73
+ ( 1333, 800, ),
74
+ ],
75
+ keep_ratio=True),
76
+ dict(type='RandomFlip', prob=0.5),
77
+ dict(type='PackDetInputs'),
78
+ ],
79
+ backend_args=None))
80
+ val_dataloader = dict(
81
+ batch_size=32,
82
+ num_workers=2,
83
+ persistent_workers=True,
84
+ drop_last=False,
85
+ sampler=dict(type='DefaultSampler', shuffle=False),
86
+ dataset=dict(
87
+ type='CocoDataset',
88
+ metainfo=metainfo,
89
+ data_root=data_root,
90
+ ann_file='val/__coco.json',
91
+ data_prefix=dict(img='val/'),
92
+ test_mode=True,
93
+ pipeline=[
94
+ dict(type='LoadImageFromFile', backend_args=None),
95
+ dict(type='Resize', scale=(
96
+ 1333,
97
+ 800,
98
+ ), keep_ratio=True),
99
+ dict(type='LoadAnnotations', with_bbox=True),
100
+ dict(
101
+ type='PackDetInputs',
102
+ meta_keys=(
103
+ 'img_id',
104
+ 'img_path',
105
+ 'ori_shape',
106
+ 'img_shape',
107
+ 'scale_factor',
108
+ )),
109
+ ],
110
+ backend_args=None))
111
+ test_dataloader = dict(
112
+ batch_size=32,
113
+ num_workers=2,
114
+ persistent_workers=True,
115
+ drop_last=False,
116
+ sampler=dict(type='DefaultSampler', shuffle=False),
117
+ dataset=dict(
118
+ type='CocoDataset',
119
+ metainfo=metainfo,
120
+ data_root=data_root,
121
+ ann_file='test/__coco.json',
122
+ data_prefix=dict(img='test/'),
123
+ test_mode=True,
124
+ pipeline=[
125
+ dict(type='LoadImageFromFile', backend_args=None),
126
+ dict(type='Resize', scale=(
127
+ 1333,
128
+ 800,
129
+ ), keep_ratio=True),
130
+ dict(type='LoadAnnotations', with_bbox=True),
131
+ dict(
132
+ type='PackDetInputs',
133
+ meta_keys=(
134
+ 'img_id',
135
+ 'img_path',
136
+ 'ori_shape',
137
+ 'img_shape',
138
+ 'scale_factor',
139
+ )),
140
+ ],
141
+ backend_args=None))
142
+ val_evaluator = dict(
143
+ type='CocoMetric',
144
+ ann_file=data_root + 'val/__coco.json',
145
+ metric='bbox',
146
+ format_only=False,
147
+ backend_args=None)
148
+ test_evaluator = dict(
149
+ type='CocoMetric',
150
+ ann_file=data_root + 'test/__coco.json',
151
+ metric='bbox',
152
+ format_only=False,
153
+ backend_args=None)
154
+
155
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=10)
156
+ val_cfg = dict(type='ValLoop')
157
+ test_cfg = dict(type='TestLoop')
158
+ param_scheduler = [
159
+ dict(
160
+ type='LinearLR',
161
+ start_factor=0.00025,
162
+ by_epoch=False,
163
+ begin=0,
164
+ end=4000),
165
+ dict(
166
+ type='MultiStepLR',
167
+ begin=0,
168
+ end=12,
169
+ by_epoch=True,
170
+ milestones=[
171
+ 8,
172
+ 11,
173
+ ],
174
+ gamma=0.1),
175
+ ]
176
+ optim_wrapper = dict(
177
+ type='OptimWrapper',
178
+ optimizer=dict(type='SGD', lr=0.015, momentum=0.9, weight_decay=0.0001),
179
+ paramwise_cfg=dict(norm_decay_mult=0.0))
180
+ auto_scale_lr = dict(enable=False, base_batch_size=32)
181
+ default_scope = 'mmdet'
182
+ default_hooks = dict(
183
+ timer=dict(type='IterTimerHook'),
184
+ logger=dict(type='LoggerHook', interval=5),
185
+ param_scheduler=dict(type='ParamSchedulerHook'),
186
+ checkpoint=dict(
187
+ type='CheckpointHook',
188
+ interval=5,
189
+ max_keep_ckpts=2, # only keep latest 2 checkpoints
190
+ save_best='auto'
191
+ ),
192
+ sampler_seed=dict(type='DistSamplerSeedHook'),
193
+ visualization=dict(type='DetVisualizationHook'))
194
+ env_cfg = dict(
195
+ cudnn_benchmark=False,
196
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
197
+ dist_cfg=dict(backend='nccl'))
198
+ vis_backends = [
199
+ dict(type='LocalVisBackend'),
200
+ ]
201
+ visualizer = dict(
202
+ type='DetLocalVisualizer',
203
+ vis_backends=[
204
+ dict(type='LocalVisBackend'),
205
+ dict(type='TensorboardVisBackend'),
206
+ ],
207
+ name='visualizer')
208
+
209
+ log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
210
+ log_level = 'INFO'
211
+ load_from = None
212
+ resume = False
213
+ model = dict(
214
+ type='CenterNet',
215
+ data_preprocessor=dict(
216
+ type='DetDataPreprocessor',
217
+ mean=[
218
+ 103.53,
219
+ 116.28,
220
+ 123.675,
221
+ ],
222
+ std=[
223
+ 1.0,
224
+ 1.0,
225
+ 1.0,
226
+ ],
227
+ bgr_to_rgb=False,
228
+ pad_size_divisor=32),
229
+ backbone=dict(
230
+ type='ResNet',
231
+ depth=50,
232
+ num_stages=4,
233
+ out_indices=(
234
+ 0,
235
+ 1,
236
+ 2,
237
+ 3,
238
+ ),
239
+ frozen_stages=1,
240
+ norm_cfg=dict(type='BN', requires_grad=False),
241
+ norm_eval=True,
242
+ style='caffe',
243
+ init_cfg=dict(
244
+ type='Pretrained',
245
+ checkpoint='open-mmlab://detectron2/resnet50_caffe')),
246
+ neck=dict(
247
+ type='FPN',
248
+ in_channels=[
249
+ 256,
250
+ 512,
251
+ 1024,
252
+ 2048,
253
+ ],
254
+ out_channels=256,
255
+ start_level=1,
256
+ add_extra_convs='on_output',
257
+ num_outs=5,
258
+ init_cfg=dict(type='Caffe2Xavier', layer='Conv2d'),
259
+ relu_before_extra_convs=True),
260
+ bbox_head=dict(
261
+ type='CenterNetUpdateHead',
262
+ num_classes=num_classes,
263
+ in_channels=256,
264
+ stacked_convs=4,
265
+ feat_channels=256,
266
+ strides=[
267
+ 8,
268
+ 16,
269
+ 32,
270
+ 64,
271
+ 128,
272
+ ],
273
+ hm_min_radius=4,
274
+ hm_min_overlap=0.8,
275
+ more_pos_thresh=0.2,
276
+ more_pos_topk=9,
277
+ soft_weight_on_reg=False,
278
+ loss_cls=dict(
279
+ type='GaussianFocalLoss',
280
+ pos_weight=0.25,
281
+ neg_weight=0.75,
282
+ loss_weight=1.0),
283
+ loss_bbox=dict(type='GIoULoss', loss_weight=2.0)),
284
+ train_cfg=None,
285
+ test_cfg=dict(
286
+ nms_pre=1000,
287
+ min_bbox_size=0,
288
+ score_thr=0.05,
289
+ nms=dict(type='nms', iou_threshold=0.6),
290
+ max_per_img=100))
inference/detr_config.py ADDED
@@ -0,0 +1,542 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'CocoDataset'
2
+ data_root = '/home/safouane/Downloads/benchmark_aircraft/data/'
3
+ backend_args = None
4
+ max_epochs = 500
5
+ metainfo = {
6
+ 'classes': ('airplane', ),
7
+ 'palette': [
8
+ (0, 128, 255),
9
+ ]
10
+ }
11
+ num_classes = 1
12
+ train_pipeline = [
13
+ dict(type='LoadImageFromFile', backend_args=None),
14
+ dict(type='LoadAnnotations', with_bbox=True),
15
+ dict(type='RandomFlip', prob=0.5),
16
+ dict(
17
+ type='RandomChoice',
18
+ transforms=[
19
+ [
20
+ dict(
21
+ type='RandomChoiceResize',
22
+ scales=[
23
+ (
24
+ 480,
25
+ 1333,
26
+ ),
27
+ (
28
+ 512,
29
+ 1333,
30
+ ),
31
+ (
32
+ 544,
33
+ 1333,
34
+ ),
35
+ (
36
+ 576,
37
+ 1333,
38
+ ),
39
+ (
40
+ 608,
41
+ 1333,
42
+ ),
43
+ (
44
+ 640,
45
+ 1333,
46
+ ),
47
+ (
48
+ 672,
49
+ 1333,
50
+ ),
51
+ (
52
+ 704,
53
+ 1333,
54
+ ),
55
+ (
56
+ 736,
57
+ 1333,
58
+ ),
59
+ (
60
+ 768,
61
+ 1333,
62
+ ),
63
+ (
64
+ 800,
65
+ 1333,
66
+ ),
67
+ ],
68
+ keep_ratio=True),
69
+ ],
70
+ [
71
+ dict(
72
+ type='RandomChoiceResize',
73
+ scales=[
74
+ (
75
+ 400,
76
+ 1333,
77
+ ),
78
+ (
79
+ 500,
80
+ 1333,
81
+ ),
82
+ (
83
+ 600,
84
+ 1333,
85
+ ),
86
+ ],
87
+ keep_ratio=True),
88
+ dict(
89
+ type='RandomCrop',
90
+ crop_type='absolute_range',
91
+ crop_size=(
92
+ 384,
93
+ 600,
94
+ ),
95
+ allow_negative_crop=True),
96
+ dict(
97
+ type='RandomChoiceResize',
98
+ scales=[
99
+ (
100
+ 480,
101
+ 1333,
102
+ ),
103
+ (
104
+ 512,
105
+ 1333,
106
+ ),
107
+ (
108
+ 544,
109
+ 1333,
110
+ ),
111
+ (
112
+ 576,
113
+ 1333,
114
+ ),
115
+ (
116
+ 608,
117
+ 1333,
118
+ ),
119
+ (
120
+ 640,
121
+ 1333,
122
+ ),
123
+ (
124
+ 672,
125
+ 1333,
126
+ ),
127
+ (
128
+ 704,
129
+ 1333,
130
+ ),
131
+ (
132
+ 736,
133
+ 1333,
134
+ ),
135
+ (
136
+ 768,
137
+ 1333,
138
+ ),
139
+ (
140
+ 800,
141
+ 1333,
142
+ ),
143
+ ],
144
+ keep_ratio=True),
145
+ ],
146
+ ]),
147
+ dict(type='PackDetInputs'),
148
+ ]
149
+ test_pipeline = [
150
+ dict(type='LoadImageFromFile', backend_args=None),
151
+ dict(type='Resize', scale=(
152
+ 1333,
153
+ 800,
154
+ ), keep_ratio=True),
155
+ dict(type='LoadAnnotations', with_bbox=True),
156
+ dict(
157
+ type='PackDetInputs',
158
+ meta_keys=(
159
+ 'img_id',
160
+ 'img_path',
161
+ 'ori_shape',
162
+ 'img_shape',
163
+ 'scale_factor',
164
+ )),
165
+ ]
166
+ train_dataloader = dict(
167
+ batch_size=8,
168
+ num_workers=2,
169
+ persistent_workers=True,
170
+ sampler=dict(type='DefaultSampler', shuffle=True),
171
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
172
+ dataset=dict(
173
+ type='CocoDataset',
174
+ metainfo=dict(classes=('airplane', ), palette=[
175
+ (
176
+ 220,
177
+ 20,
178
+ 60,
179
+ ),
180
+ ]),
181
+ data_root='/home/safouane/Downloads/benchmark_aircraft/data/',
182
+ ann_file='train/__coco.json',
183
+ data_prefix=dict(img='train/'),
184
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
185
+ pipeline=[
186
+ dict(type='LoadImageFromFile', backend_args=None),
187
+ dict(type='LoadAnnotations', with_bbox=True),
188
+ dict(type='RandomFlip', prob=0.5),
189
+ dict(
190
+ type='RandomChoice',
191
+ transforms=[
192
+ [
193
+ dict(
194
+ type='RandomChoiceResize',
195
+ scales=[
196
+ (
197
+ 480,
198
+ 1333,
199
+ ),
200
+ (
201
+ 512,
202
+ 1333,
203
+ ),
204
+ (
205
+ 544,
206
+ 1333,
207
+ ),
208
+ (
209
+ 576,
210
+ 1333,
211
+ ),
212
+ (
213
+ 608,
214
+ 1333,
215
+ ),
216
+ (
217
+ 640,
218
+ 1333,
219
+ ),
220
+ (
221
+ 672,
222
+ 1333,
223
+ ),
224
+ (
225
+ 704,
226
+ 1333,
227
+ ),
228
+ (
229
+ 736,
230
+ 1333,
231
+ ),
232
+ (
233
+ 768,
234
+ 1333,
235
+ ),
236
+ (
237
+ 800,
238
+ 1333,
239
+ ),
240
+ ],
241
+ keep_ratio=True),
242
+ ],
243
+ [
244
+ dict(
245
+ type='RandomChoiceResize',
246
+ scales=[
247
+ (
248
+ 400,
249
+ 1333,
250
+ ),
251
+ (
252
+ 500,
253
+ 1333,
254
+ ),
255
+ (
256
+ 600,
257
+ 1333,
258
+ ),
259
+ ],
260
+ keep_ratio=True),
261
+ dict(
262
+ type='RandomCrop',
263
+ crop_type='absolute_range',
264
+ crop_size=(
265
+ 384,
266
+ 600,
267
+ ),
268
+ allow_negative_crop=True),
269
+ dict(
270
+ type='RandomChoiceResize',
271
+ scales=[
272
+ (
273
+ 480,
274
+ 1333,
275
+ ),
276
+ (
277
+ 512,
278
+ 1333,
279
+ ),
280
+ (
281
+ 544,
282
+ 1333,
283
+ ),
284
+ (
285
+ 576,
286
+ 1333,
287
+ ),
288
+ (
289
+ 608,
290
+ 1333,
291
+ ),
292
+ (
293
+ 640,
294
+ 1333,
295
+ ),
296
+ (
297
+ 672,
298
+ 1333,
299
+ ),
300
+ (
301
+ 704,
302
+ 1333,
303
+ ),
304
+ (
305
+ 736,
306
+ 1333,
307
+ ),
308
+ (
309
+ 768,
310
+ 1333,
311
+ ),
312
+ (
313
+ 800,
314
+ 1333,
315
+ ),
316
+ ],
317
+ keep_ratio=True),
318
+ ],
319
+ ]),
320
+ dict(type='PackDetInputs'),
321
+ ],
322
+ backend_args=None))
323
+ val_dataloader = dict(
324
+ batch_size=1,
325
+ num_workers=2,
326
+ persistent_workers=True,
327
+ drop_last=False,
328
+ sampler=dict(type='DefaultSampler', shuffle=False),
329
+ dataset=dict(
330
+ type='CocoDataset',
331
+ metainfo=dict(classes=('airplane', ), palette=[
332
+ (
333
+ 220,
334
+ 20,
335
+ 60,
336
+ ),
337
+ ]),
338
+ data_root='/home/safouane/Downloads/benchmark_aircraft/data/',
339
+ ann_file='val/__coco.json',
340
+ data_prefix=dict(img='val/'),
341
+ test_mode=True,
342
+ pipeline=[
343
+ dict(type='LoadImageFromFile', backend_args=None),
344
+ dict(type='Resize', scale=(
345
+ 1333,
346
+ 800,
347
+ ), keep_ratio=True),
348
+ dict(type='LoadAnnotations', with_bbox=True),
349
+ dict(
350
+ type='PackDetInputs',
351
+ meta_keys=(
352
+ 'img_id',
353
+ 'img_path',
354
+ 'ori_shape',
355
+ 'img_shape',
356
+ 'scale_factor',
357
+ )),
358
+ ],
359
+ backend_args=None))
360
+ test_dataloader = dict(
361
+ batch_size=1,
362
+ num_workers=2,
363
+ persistent_workers=True,
364
+ drop_last=False,
365
+ sampler=dict(type='DefaultSampler', shuffle=False),
366
+ dataset=dict(
367
+ type='CocoDataset',
368
+ metainfo=dict(classes=('airplane', ), palette=[
369
+ (
370
+ 220,
371
+ 20,
372
+ 60,
373
+ ),
374
+ ]),
375
+ data_root='/home/safouane/Downloads/benchmark_aircraft/data/',
376
+ ann_file='test/__coco.json',
377
+ data_prefix=dict(img='test/'),
378
+ test_mode=True,
379
+ pipeline=[
380
+ dict(type='LoadImageFromFile', backend_args=None),
381
+ dict(type='Resize', scale=(
382
+ 1333,
383
+ 800,
384
+ ), keep_ratio=True),
385
+ dict(type='LoadAnnotations', with_bbox=True),
386
+ dict(
387
+ type='PackDetInputs',
388
+ meta_keys=(
389
+ 'img_id',
390
+ 'img_path',
391
+ 'ori_shape',
392
+ 'img_shape',
393
+ 'scale_factor',
394
+ )),
395
+ ],
396
+ backend_args=None))
397
+ val_evaluator = dict(
398
+ type='CocoMetric',
399
+ ann_file='/home/safouane/Downloads/benchmark_aircraft/data/val/__coco.json',
400
+ metric='bbox',
401
+ format_only=False,
402
+ backend_args=None)
403
+ test_evaluator = dict(
404
+ type='CocoMetric',
405
+ ann_file=
406
+ '/home/safouane/Downloads/benchmark_aircraft/data/test/__coco.json',
407
+ metric='bbox',
408
+ format_only=False,
409
+ backend_args=None)
410
+ default_scope = 'mmdet'
411
+ default_hooks = dict(
412
+ timer=dict(type='IterTimerHook'),
413
+ logger=dict(type='LoggerHook', interval=5),
414
+ param_scheduler=dict(type='ParamSchedulerHook'),
415
+ checkpoint=dict(type='CheckpointHook', interval=5, save_best='auto'),
416
+ sampler_seed=dict(type='DistSamplerSeedHook'),
417
+ visualization=dict(type='DetVisualizationHook'))
418
+ env_cfg = dict(
419
+ cudnn_benchmark=False,
420
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
421
+ dist_cfg=dict(backend='nccl'))
422
+ vis_backends = [
423
+ dict(type='LocalVisBackend'),
424
+ ]
425
+ visualizer = dict(
426
+ type='DetLocalVisualizer',
427
+ vis_backends=[
428
+ dict(type='LocalVisBackend'),
429
+ dict(type='TensorboardVisBackend'),
430
+ ],
431
+ name='visualizer')
432
+ log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
433
+ log_level = 'INFO'
434
+ load_from = '/home/safouane/Downloads/benchmark_aircraft/mmdetection/configs/detr/checkpoints/detr_r50_8xb2-150e_coco_20221023_153551-436d03e8.pth'
435
+ resume = False
436
+ model = dict(
437
+ type='DETR',
438
+ num_queries=100,
439
+ data_preprocessor=dict(
440
+ type='DetDataPreprocessor',
441
+ mean=[
442
+ 123.675,
443
+ 116.28,
444
+ 103.53,
445
+ ],
446
+ std=[
447
+ 58.395,
448
+ 57.12,
449
+ 57.375,
450
+ ],
451
+ bgr_to_rgb=True,
452
+ pad_size_divisor=1),
453
+ backbone=dict(
454
+ type='ResNet',
455
+ depth=50,
456
+ num_stages=4,
457
+ out_indices=(3, ),
458
+ frozen_stages=1,
459
+ norm_cfg=dict(type='BN', requires_grad=False),
460
+ norm_eval=True,
461
+ style='pytorch',
462
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
463
+ neck=dict(
464
+ type='ChannelMapper',
465
+ in_channels=[
466
+ 2048,
467
+ ],
468
+ kernel_size=1,
469
+ out_channels=256,
470
+ act_cfg=None,
471
+ norm_cfg=None,
472
+ num_outs=1),
473
+ encoder=dict(
474
+ num_layers=6,
475
+ layer_cfg=dict(
476
+ self_attn_cfg=dict(
477
+ embed_dims=256, num_heads=8, dropout=0.1, batch_first=True),
478
+ ffn_cfg=dict(
479
+ embed_dims=256,
480
+ feedforward_channels=2048,
481
+ num_fcs=2,
482
+ ffn_drop=0.1,
483
+ act_cfg=dict(type='ReLU', inplace=True)))),
484
+ decoder=dict(
485
+ num_layers=6,
486
+ layer_cfg=dict(
487
+ self_attn_cfg=dict(
488
+ embed_dims=256, num_heads=8, dropout=0.1, batch_first=True),
489
+ cross_attn_cfg=dict(
490
+ embed_dims=256, num_heads=8, dropout=0.1, batch_first=True),
491
+ ffn_cfg=dict(
492
+ embed_dims=256,
493
+ feedforward_channels=2048,
494
+ num_fcs=2,
495
+ ffn_drop=0.1,
496
+ act_cfg=dict(type='ReLU', inplace=True))),
497
+ return_intermediate=True),
498
+ positional_encoding=dict(num_feats=128, normalize=True),
499
+ bbox_head=dict(
500
+ type='DETRHead',
501
+ num_classes=1,
502
+ embed_dims=256,
503
+ loss_cls=dict(
504
+ type='CrossEntropyLoss',
505
+ bg_cls_weight=0.1,
506
+ use_sigmoid=False,
507
+ loss_weight=1.0,
508
+ class_weight=1.0),
509
+ loss_bbox=dict(type='L1Loss', loss_weight=5.0),
510
+ loss_iou=dict(type='GIoULoss', loss_weight=2.0)),
511
+ train_cfg=dict(
512
+ assigner=dict(
513
+ type='HungarianAssigner',
514
+ match_costs=[
515
+ dict(type='ClassificationCost', weight=1.0),
516
+ dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),
517
+ dict(type='IoUCost', iou_mode='giou', weight=2.0),
518
+ ])),
519
+ test_cfg=dict(max_per_img=100))
520
+ optim_wrapper = dict(
521
+ type='OptimWrapper',
522
+ optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0001),
523
+ clip_grad=dict(max_norm=0.1, norm_type=2),
524
+ paramwise_cfg=dict(
525
+ custom_keys=dict(backbone=dict(lr_mult=0.1, decay_mult=1.0))))
526
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=500, val_interval=1)
527
+ val_cfg = dict(type='ValLoop')
528
+ test_cfg = dict(type='TestLoop')
529
+ param_scheduler = [
530
+ dict(
531
+ type='MultiStepLR',
532
+ begin=0,
533
+ end=150,
534
+ by_epoch=True,
535
+ milestones=[
536
+ 100,
537
+ ],
538
+ gamma=0.1),
539
+ ]
540
+ auto_scale_lr = dict(base_batch_size=16)
541
+ launcher = 'none'
542
+ work_dir = './work_dirs/detr_r50_8xb2-150e_coco'
inference/fasterrcnn_config.py ADDED
@@ -0,0 +1,372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'CocoDataset'
2
+ data_root = '/home/safouane/Downloads/benchmark_aircraft/data/'
3
+ backend_args = None
4
+ max_epochs = 500
5
+ metainfo = dict(
6
+ classes=('airplane', ), palette=[
7
+ (
8
+ 0,
9
+ 128,
10
+ 255,
11
+ ),
12
+ ])
13
+ num_classes = 1
14
+ model = dict(
15
+ type='FasterRCNN',
16
+ data_preprocessor=dict(
17
+ type='DetDataPreprocessor',
18
+ mean=[
19
+ 103.53,
20
+ 116.28,
21
+ 123.675,
22
+ ],
23
+ std=[
24
+ 1.0,
25
+ 1.0,
26
+ 1.0,
27
+ ],
28
+ bgr_to_rgb=False,
29
+ pad_size_divisor=32),
30
+ backbone=dict(
31
+ type='ResNet',
32
+ depth=50,
33
+ num_stages=4,
34
+ out_indices=(
35
+ 0,
36
+ 1,
37
+ 2,
38
+ 3,
39
+ ),
40
+ frozen_stages=1,
41
+ norm_cfg=dict(type='BN', requires_grad=False),
42
+ norm_eval=True,
43
+ style='caffe',
44
+ init_cfg=dict(
45
+ type='Pretrained',
46
+ checkpoint='open-mmlab://detectron2/resnet50_caffe')),
47
+ neck=dict(
48
+ type='FPN',
49
+ in_channels=[
50
+ 256,
51
+ 512,
52
+ 1024,
53
+ 2048,
54
+ ],
55
+ out_channels=256,
56
+ num_outs=5),
57
+ rpn_head=dict(
58
+ type='RPNHead',
59
+ in_channels=256,
60
+ feat_channels=256,
61
+ anchor_generator=dict(
62
+ type='AnchorGenerator',
63
+ scales=[
64
+ 8,
65
+ ],
66
+ ratios=[
67
+ 0.5,
68
+ 1.0,
69
+ 2.0,
70
+ ],
71
+ strides=[
72
+ 4,
73
+ 8,
74
+ 16,
75
+ 32,
76
+ 64,
77
+ ]),
78
+ bbox_coder=dict(
79
+ type='DeltaXYWHBBoxCoder',
80
+ target_means=[
81
+ 0.0,
82
+ 0.0,
83
+ 0.0,
84
+ 0.0,
85
+ ],
86
+ target_stds=[
87
+ 1.0,
88
+ 1.0,
89
+ 1.0,
90
+ 1.0,
91
+ ]),
92
+ loss_cls=dict(
93
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
94
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
95
+ roi_head=dict(
96
+ type='StandardRoIHead',
97
+ bbox_roi_extractor=dict(
98
+ type='SingleRoIExtractor',
99
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
100
+ out_channels=256,
101
+ featmap_strides=[
102
+ 4,
103
+ 8,
104
+ 16,
105
+ 32,
106
+ ]),
107
+ bbox_head=dict(
108
+ type='Shared2FCBBoxHead',
109
+ in_channels=256,
110
+ fc_out_channels=1024,
111
+ roi_feat_size=7,
112
+ num_classes=1,
113
+ bbox_coder=dict(
114
+ type='DeltaXYWHBBoxCoder',
115
+ target_means=[
116
+ 0.0,
117
+ 0.0,
118
+ 0.0,
119
+ 0.0,
120
+ ],
121
+ target_stds=[
122
+ 0.1,
123
+ 0.1,
124
+ 0.2,
125
+ 0.2,
126
+ ]),
127
+ reg_class_agnostic=False,
128
+ loss_cls=dict(
129
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
130
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
131
+ train_cfg=dict(
132
+ rpn=dict(
133
+ assigner=dict(
134
+ type='MaxIoUAssigner',
135
+ pos_iou_thr=0.7,
136
+ neg_iou_thr=0.3,
137
+ min_pos_iou=0.3,
138
+ match_low_quality=True,
139
+ ignore_iof_thr=-1),
140
+ sampler=dict(
141
+ type='RandomSampler',
142
+ num=256,
143
+ pos_fraction=0.5,
144
+ neg_pos_ub=-1,
145
+ add_gt_as_proposals=False),
146
+ allowed_border=-1,
147
+ pos_weight=-1,
148
+ debug=False),
149
+ rpn_proposal=dict(
150
+ nms_pre=2000,
151
+ max_per_img=1000,
152
+ nms=dict(type='nms', iou_threshold=0.7),
153
+ min_bbox_size=0),
154
+ rcnn=dict(
155
+ assigner=dict(
156
+ type='MaxIoUAssigner',
157
+ pos_iou_thr=0.5,
158
+ neg_iou_thr=0.5,
159
+ min_pos_iou=0.5,
160
+ match_low_quality=False,
161
+ ignore_iof_thr=-1),
162
+ sampler=dict(
163
+ type='RandomSampler',
164
+ num=512,
165
+ pos_fraction=0.25,
166
+ neg_pos_ub=-1,
167
+ add_gt_as_proposals=True),
168
+ pos_weight=-1,
169
+ debug=False)),
170
+ test_cfg=dict(
171
+ rpn=dict(
172
+ nms_pre=1000,
173
+ max_per_img=1000,
174
+ nms=dict(type='nms', iou_threshold=0.7),
175
+ min_bbox_size=0),
176
+ rcnn=dict(
177
+ score_thr=0.05,
178
+ nms=dict(type='nms', iou_threshold=0.5),
179
+ max_per_img=100)))
180
+ train_pipeline = [
181
+ dict(type='LoadImageFromFile', backend_args=None),
182
+ dict(type='LoadAnnotations', with_bbox=True),
183
+ dict(type='Resize', scale=(
184
+ 1333,
185
+ 800,
186
+ ), keep_ratio=True),
187
+ dict(type='RandomFlip', prob=0.5),
188
+ dict(type='PackDetInputs'),
189
+ ]
190
+ test_pipeline = [
191
+ dict(type='LoadImageFromFile', backend_args=None),
192
+ dict(type='Resize', scale=(
193
+ 1333,
194
+ 800,
195
+ ), keep_ratio=True),
196
+ dict(type='LoadAnnotations', with_bbox=True),
197
+ dict(
198
+ type='PackDetInputs',
199
+ meta_keys=(
200
+ 'img_id',
201
+ 'img_path',
202
+ 'ori_shape',
203
+ 'img_shape',
204
+ 'scale_factor',
205
+ )),
206
+ ]
207
+ train_dataloader = dict(
208
+ batch_size=32,
209
+ num_workers=2,
210
+ persistent_workers=True,
211
+ sampler=dict(type='DefaultSampler', shuffle=True),
212
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
213
+ dataset=dict(
214
+ type='CocoDataset',
215
+ metainfo=dict(classes=('airplane', ), palette=[
216
+ (
217
+ 220,
218
+ 20,
219
+ 60,
220
+ ),
221
+ ]),
222
+ data_root='/home/safouane/Downloads/benchmark_aircraft/data/',
223
+ ann_file='train/__coco.json',
224
+ data_prefix=dict(img='train/'),
225
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
226
+ pipeline=[
227
+ dict(type='LoadImageFromFile', backend_args=None),
228
+ dict(type='LoadAnnotations', with_bbox=True),
229
+ dict(type='Resize', scale=(
230
+ 1333,
231
+ 800,
232
+ ), keep_ratio=True),
233
+ dict(type='RandomFlip', prob=0.5),
234
+ dict(type='PackDetInputs'),
235
+ ],
236
+ backend_args=None))
237
+ val_dataloader = dict(
238
+ batch_size=32,
239
+ num_workers=2,
240
+ persistent_workers=True,
241
+ drop_last=False,
242
+ sampler=dict(type='DefaultSampler', shuffle=False),
243
+ dataset=dict(
244
+ type='CocoDataset',
245
+ metainfo=dict(classes=('airplane', ), palette=[
246
+ (
247
+ 220,
248
+ 20,
249
+ 60,
250
+ ),
251
+ ]),
252
+ data_root='/home/safouane/Downloads/benchmark_aircraft/data/',
253
+ ann_file='val/__coco.json',
254
+ data_prefix=dict(img='val/'),
255
+ test_mode=True,
256
+ pipeline=[
257
+ dict(type='LoadImageFromFile', backend_args=None),
258
+ dict(type='Resize', scale=(
259
+ 1333,
260
+ 800,
261
+ ), keep_ratio=True),
262
+ dict(type='LoadAnnotations', with_bbox=True),
263
+ dict(
264
+ type='PackDetInputs',
265
+ meta_keys=(
266
+ 'img_id',
267
+ 'img_path',
268
+ 'ori_shape',
269
+ 'img_shape',
270
+ 'scale_factor',
271
+ )),
272
+ ],
273
+ backend_args=None))
274
+ test_dataloader = dict(
275
+ batch_size=32,
276
+ num_workers=2,
277
+ persistent_workers=True,
278
+ drop_last=False,
279
+ sampler=dict(type='DefaultSampler', shuffle=False),
280
+ dataset=dict(
281
+ type='CocoDataset',
282
+ metainfo=dict(classes=('airplane', ), palette=[
283
+ (
284
+ 220,
285
+ 20,
286
+ 60,
287
+ ),
288
+ ]),
289
+ data_root='/home/safouane/Downloads/benchmark_aircraft/data/',
290
+ ann_file='test/__coco.json',
291
+ data_prefix=dict(img='test/'),
292
+ test_mode=True,
293
+ pipeline=[
294
+ dict(type='LoadImageFromFile', backend_args=None),
295
+ dict(type='Resize', scale=(
296
+ 1333,
297
+ 800,
298
+ ), keep_ratio=True),
299
+ dict(type='LoadAnnotations', with_bbox=True),
300
+ dict(
301
+ type='PackDetInputs',
302
+ meta_keys=(
303
+ 'img_id',
304
+ 'img_path',
305
+ 'ori_shape',
306
+ 'img_shape',
307
+ 'scale_factor',
308
+ )),
309
+ ],
310
+ backend_args=None))
311
+ val_evaluator = dict(
312
+ type='CocoMetric',
313
+ ann_file='/home/safouane/Downloads/benchmark_aircraft/data/val/__coco.json',
314
+ metric='bbox',
315
+ format_only=False,
316
+ backend_args=None)
317
+ test_evaluator = dict(
318
+ type='CocoMetric',
319
+ ann_file=
320
+ '/home/safouane/Downloads/benchmark_aircraft/data/test/__coco.json',
321
+ metric='bbox',
322
+ format_only=False,
323
+ backend_args=None)
324
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=500, val_interval=1)
325
+ val_cfg = dict(type='ValLoop')
326
+ test_cfg = dict(type='TestLoop')
327
+ param_scheduler = [
328
+ dict(
329
+ type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
330
+ dict(
331
+ type='MultiStepLR',
332
+ begin=0,
333
+ end=12,
334
+ by_epoch=True,
335
+ milestones=[
336
+ 8,
337
+ 11,
338
+ ],
339
+ gamma=0.1),
340
+ ]
341
+ optim_wrapper = dict(
342
+ type='OptimWrapper',
343
+ optimizer=dict(type='SGD', lr=0.015, momentum=0.9, weight_decay=0.0001))
344
+ auto_scale_lr = dict(enable=False, base_batch_size=32)
345
+ default_scope = 'mmdet'
346
+ default_hooks = dict(
347
+ timer=dict(type='IterTimerHook'),
348
+ logger=dict(type='LoggerHook', interval=50),
349
+ param_scheduler=dict(type='ParamSchedulerHook'),
350
+ checkpoint=dict(type='CheckpointHook', interval=50, save_best='auto'),
351
+ sampler_seed=dict(type='DistSamplerSeedHook'),
352
+ visualization=dict(type='DetVisualizationHook'))
353
+ env_cfg = dict(
354
+ cudnn_benchmark=False,
355
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
356
+ dist_cfg=dict(backend='nccl'))
357
+ vis_backends = [
358
+ dict(type='LocalVisBackend'),
359
+ ]
360
+ visualizer = dict(
361
+ type='DetLocalVisualizer',
362
+ vis_backends=[
363
+ dict(type='LocalVisBackend'),
364
+ dict(type='TensorboardVisBackend'),
365
+ ],
366
+ name='visualizer')
367
+ log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
368
+ log_level = 'INFO'
369
+ load_from = '/home/safouane/Downloads/benchmark_aircraft/mmlab_configs/faster_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.378_20200504_180032-c5925ee5.pth'
370
+ resume = False
371
+ launcher = 'none'
372
+ work_dir = './work_dirs/faster-rcnn_r50-caffe_fpn_1x_coco'
inference/models/centernetbest.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0af1c3c2357dc6f4650e798e5aff8be01e93a2766a57548026622a10b40462a8
3
+ size 140757155
inference/models/detrbest.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dbffb3471ae7d9b4ad7a33977cebb38983e797dd7cb2180f314a42b9d99e80a
3
+ size 213052547
inference/models/fasterrcnnbest.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8f25fcd2fe4bbfb27c3f62c667e9a4d337079ddb576bee01424a6bd8c225568
3
+ size 169034569
inference/models/retinanetbest.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5addc6c1a9fa202b5192922559bd69c9c274774bb427cecded5bcbfcd6a59d72
3
+ size 222922197
inference/models/rtmdetbest.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:047f9d0980a6517a2e8a436f7f6377b4bf04f0370a9a6906f317f691234b2464
3
+ size 82940119
inference/models/ssdbest.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7463e3afedf144cb289a244b7548b33e0fd2b7255aa7580606ce4a1dc2733e1
3
+ size 28107401
inference/models/yolov5best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51fce45b8130940c74f07fcda686a120648d75b6d4d9f2f9287b4769f9029608
3
+ size 172984812
inference/models/yolov8best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffaab64d2ad6ecaeca6d79066c95b8602060469bf28edf21caeb5df6d32daf2b
3
+ size 136739881
inference/retinanet_config.py ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'CocoDataset'
2
+ data_root = '/home/safouane/Downloads/benchmark_aircraft/data/'
3
+ backend_args = None
4
+ max_epochs = 500
5
+ metainfo = {
6
+ 'classes': ('airplane', ),
7
+ 'palette': [
8
+ (0, 128, 255),
9
+ ]
10
+ }
11
+ num_classes = 1
12
+ model = dict(
13
+ type='RetinaNet',
14
+ data_preprocessor=dict(
15
+ type='DetDataPreprocessor',
16
+ mean=[
17
+ 123.675,
18
+ 116.28,
19
+ 103.53,
20
+ ],
21
+ std=[
22
+ 58.395,
23
+ 57.12,
24
+ 57.375,
25
+ ],
26
+ bgr_to_rgb=True,
27
+ pad_size_divisor=64,
28
+ batch_augments=[
29
+ dict(type='BatchFixedSizePad', size=(
30
+ 640,
31
+ 640,
32
+ )),
33
+ ]),
34
+ backbone=dict(
35
+ type='ResNet',
36
+ depth=50,
37
+ num_stages=4,
38
+ out_indices=(
39
+ 0,
40
+ 1,
41
+ 2,
42
+ 3,
43
+ ),
44
+ frozen_stages=1,
45
+ norm_cfg=dict(type='BN', requires_grad=True),
46
+ norm_eval=False,
47
+ style='pytorch',
48
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
49
+ neck=dict(
50
+ type='FPN',
51
+ in_channels=[
52
+ 256,
53
+ 512,
54
+ 1024,
55
+ 2048,
56
+ ],
57
+ out_channels=256,
58
+ start_level=1,
59
+ add_extra_convs='on_input',
60
+ num_outs=5,
61
+ relu_before_extra_convs=True,
62
+ no_norm_on_lateral=True,
63
+ norm_cfg=dict(type='BN', requires_grad=True)),
64
+ bbox_head=dict(
65
+ type='RetinaSepBNHead',
66
+ num_classes=1,
67
+ in_channels=256,
68
+ stacked_convs=4,
69
+ feat_channels=256,
70
+ anchor_generator=dict(
71
+ type='AnchorGenerator',
72
+ octave_base_scale=4,
73
+ scales_per_octave=3,
74
+ ratios=[
75
+ 0.5,
76
+ 1.0,
77
+ 2.0,
78
+ ],
79
+ strides=[
80
+ 8,
81
+ 16,
82
+ 32,
83
+ 64,
84
+ 128,
85
+ ]),
86
+ bbox_coder=dict(
87
+ type='DeltaXYWHBBoxCoder',
88
+ target_means=[
89
+ 0.0,
90
+ 0.0,
91
+ 0.0,
92
+ 0.0,
93
+ ],
94
+ target_stds=[
95
+ 1.0,
96
+ 1.0,
97
+ 1.0,
98
+ 1.0,
99
+ ]),
100
+ loss_cls=dict(
101
+ type='FocalLoss',
102
+ use_sigmoid=True,
103
+ gamma=2.0,
104
+ alpha=0.25,
105
+ loss_weight=1.0),
106
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0),
107
+ num_ins=5,
108
+ norm_cfg=dict(type='BN', requires_grad=True)),
109
+ train_cfg=dict(
110
+ assigner=dict(
111
+ type='MaxIoUAssigner',
112
+ pos_iou_thr=0.5,
113
+ neg_iou_thr=0.5,
114
+ min_pos_iou=0,
115
+ ignore_iof_thr=-1),
116
+ sampler=dict(type='PseudoSampler'),
117
+ allowed_border=-1,
118
+ pos_weight=-1,
119
+ debug=False),
120
+ test_cfg=dict(
121
+ nms_pre=1000,
122
+ min_bbox_size=0,
123
+ score_thr=0.05,
124
+ nms=dict(type='nms', iou_threshold=0.5),
125
+ max_per_img=100))
126
+ train_pipeline = [
127
+ dict(type='LoadImageFromFile', backend_args=None),
128
+ dict(type='LoadAnnotations', with_bbox=True),
129
+ dict(
130
+ type='RandomResize',
131
+ scale=(
132
+ 640,
133
+ 640,
134
+ ),
135
+ ratio_range=(
136
+ 0.8,
137
+ 1.2,
138
+ ),
139
+ keep_ratio=True),
140
+ dict(type='RandomCrop', crop_size=(
141
+ 640,
142
+ 640,
143
+ )),
144
+ dict(type='RandomFlip', prob=0.5),
145
+ dict(type='PackDetInputs'),
146
+ ]
147
+ test_pipeline = [
148
+ dict(type='LoadImageFromFile', backend_args=None),
149
+ dict(type='Resize', scale=(
150
+ 640,
151
+ 640,
152
+ ), keep_ratio=True),
153
+ dict(type='LoadAnnotations', with_bbox=True),
154
+ dict(
155
+ type='PackDetInputs',
156
+ meta_keys=(
157
+ 'img_id',
158
+ 'img_path',
159
+ 'ori_shape',
160
+ 'img_shape',
161
+ 'scale_factor',
162
+ )),
163
+ ]
164
+ train_dataloader = dict(
165
+ batch_size=32,
166
+ num_workers=2,
167
+ persistent_workers=True,
168
+ sampler=dict(type='DefaultSampler', shuffle=True),
169
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
170
+ dataset=dict(
171
+ type='CocoDataset',
172
+ metainfo=dict(classes=('airplane', ), palette=[
173
+ (
174
+ 220,
175
+ 20,
176
+ 60,
177
+ ),
178
+ ]),
179
+ data_root='/home/safouane/Downloads/benchmark_aircraft/data/',
180
+ ann_file='train/__coco.json',
181
+ data_prefix=dict(img='train/'),
182
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
183
+ pipeline=[
184
+ dict(type='LoadImageFromFile', backend_args=None),
185
+ dict(type='LoadAnnotations', with_bbox=True),
186
+ dict(
187
+ type='RandomResize',
188
+ scale=(
189
+ 640,
190
+ 640,
191
+ ),
192
+ ratio_range=(
193
+ 0.8,
194
+ 1.2,
195
+ ),
196
+ keep_ratio=True),
197
+ dict(type='RandomCrop', crop_size=(
198
+ 640,
199
+ 640,
200
+ )),
201
+ dict(type='RandomFlip', prob=0.5),
202
+ dict(type='PackDetInputs'),
203
+ ],
204
+ backend_args=None))
205
+ val_dataloader = dict(
206
+ batch_size=32,
207
+ num_workers=2,
208
+ persistent_workers=True,
209
+ drop_last=False,
210
+ sampler=dict(type='DefaultSampler', shuffle=False),
211
+ dataset=dict(
212
+ type='CocoDataset',
213
+ metainfo=dict(classes=('airplane', ), palette=[
214
+ (
215
+ 220,
216
+ 20,
217
+ 60,
218
+ ),
219
+ ]),
220
+ data_root='/home/safouane/Downloads/benchmark_aircraft/data/',
221
+ ann_file='val/__coco.json',
222
+ data_prefix=dict(img='val/'),
223
+ test_mode=True,
224
+ pipeline=[
225
+ dict(type='LoadImageFromFile', backend_args=None),
226
+ dict(type='Resize', scale=(
227
+ 640,
228
+ 640,
229
+ ), keep_ratio=True),
230
+ dict(type='LoadAnnotations', with_bbox=True),
231
+ dict(
232
+ type='PackDetInputs',
233
+ meta_keys=(
234
+ 'img_id',
235
+ 'img_path',
236
+ 'ori_shape',
237
+ 'img_shape',
238
+ 'scale_factor',
239
+ )),
240
+ ],
241
+ backend_args=None))
242
+ test_dataloader = dict(
243
+ batch_size=1,
244
+ num_workers=2,
245
+ persistent_workers=True,
246
+ drop_last=False,
247
+ sampler=dict(type='DefaultSampler', shuffle=False),
248
+ dataset=dict(
249
+ type='CocoDataset',
250
+ metainfo=dict(classes=('airplane', ), palette=[
251
+ (
252
+ 220,
253
+ 20,
254
+ 60,
255
+ ),
256
+ ]),
257
+ data_root='/home/safouane/Downloads/benchmark_aircraft/data/',
258
+ ann_file='test/__coco.json',
259
+ data_prefix=dict(img='test/'),
260
+ test_mode=True,
261
+ pipeline=[
262
+ dict(type='LoadImageFromFile', backend_args=None),
263
+ dict(type='Resize', scale=(
264
+ 640,
265
+ 640,
266
+ ), keep_ratio=True),
267
+ dict(type='LoadAnnotations', with_bbox=True),
268
+ dict(
269
+ type='PackDetInputs',
270
+ meta_keys=(
271
+ 'img_id',
272
+ 'img_path',
273
+ 'ori_shape',
274
+ 'img_shape',
275
+ 'scale_factor',
276
+ )),
277
+ ],
278
+ backend_args=None))
279
+ val_evaluator = dict(
280
+ type='CocoMetric',
281
+ ann_file='/home/safouane/Downloads/benchmark_aircraft/data/val/__coco.json',
282
+ metric='bbox',
283
+ format_only=False,
284
+ backend_args=None)
285
+ test_evaluator = dict(
286
+ type='CocoMetric',
287
+ ann_file=
288
+ '/home/safouane/Downloads/benchmark_aircraft/data/test/__coco.json',
289
+ metric='bbox',
290
+ format_only=False,
291
+ backend_args=None)
292
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=500, val_interval=10)
293
+ val_cfg = dict(type='ValLoop')
294
+ test_cfg = dict(type='TestLoop')
295
+ param_scheduler = [
296
+ dict(type='LinearLR', start_factor=0.1, by_epoch=False, begin=0, end=1000),
297
+ dict(
298
+ type='MultiStepLR',
299
+ begin=0,
300
+ end=50,
301
+ by_epoch=True,
302
+ milestones=[
303
+ 30,
304
+ 40,
305
+ ],
306
+ gamma=0.1),
307
+ ]
308
+ optim_wrapper = dict(
309
+ type='OptimWrapper',
310
+ optimizer=dict(type='SGD', lr=0.015, momentum=0.9, weight_decay=0.0001),
311
+ paramwise_cfg=dict(norm_decay_mult=0, bypass_duplicate=True))
312
+ auto_scale_lr = dict(enable=False, base_batch_size=64)
313
+ default_scope = 'mmdet'
314
+ default_hooks = dict(
315
+ timer=dict(type='IterTimerHook'),
316
+ logger=dict(type='LoggerHook', interval=50),
317
+ param_scheduler=dict(type='ParamSchedulerHook'),
318
+ checkpoint=dict(
319
+ type='CheckpointHook', interval=20, max_keep_ckpts=2,
320
+ save_best='auto'),
321
+ sampler_seed=dict(type='DistSamplerSeedHook'),
322
+ visualization=dict(type='DetVisualizationHook'))
323
+ env_cfg = dict(
324
+ cudnn_benchmark=True,
325
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
326
+ dist_cfg=dict(backend='nccl'))
327
+ vis_backends = [
328
+ dict(type='LocalVisBackend'),
329
+ ]
330
+ visualizer = dict(
331
+ type='DetLocalVisualizer',
332
+ vis_backends=[
333
+ dict(type='LocalVisBackend'),
334
+ dict(type='TensorboardVisBackend'),
335
+ ],
336
+ name='visualizer')
337
+ log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
338
+ log_level = 'INFO'
339
+ load_from = '/home/safouane/Downloads/benchmark_aircraft/mmlab_configs/retinanet_r50_fpn_crop640_50e_coco-9b953d76.pth'
340
+ resume = False
341
+ norm_cfg = dict(type='BN', requires_grad=True)
342
+ launcher = 'none'
343
+ work_dir = './work_dirs/retinanet_r50_fpn_crop640-50e_coco'
inference/rtmdet_config.py ADDED
@@ -0,0 +1,562 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_scope = 'mmdet'
2
+ dataset_type = 'CocoDataset'
3
+ data_root = '/home/safouane/Downloads/benchmark_aircraft/data/'
4
+ backend_args = None
5
+ batch_size = 64
6
+ max_epochs = 300
7
+ metainfo = {
8
+ 'classes': ('airplane', ),
9
+ 'palette': [
10
+ (0, 128, 255),
11
+ ]
12
+ }
13
+ num_classes = 1
14
+ default_hooks = dict(
15
+ timer=dict(type='IterTimerHook'),
16
+ logger=dict(type='LoggerHook', interval=50),
17
+ param_scheduler=dict(type='ParamSchedulerHook'),
18
+ checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=3),
19
+ sampler_seed=dict(type='DistSamplerSeedHook'),
20
+ visualization=dict(type='DetVisualizationHook'))
21
+ env_cfg = dict(
22
+ cudnn_benchmark=False,
23
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
24
+ dist_cfg=dict(backend='nccl'))
25
+ vis_backends = [
26
+ dict(type='LocalVisBackend'),
27
+ ]
28
+ visualizer = dict(
29
+ type='DetLocalVisualizer',
30
+ vis_backends=[
31
+ dict(type='LocalVisBackend'),
32
+ dict(type='TensorboardVisBackend'),
33
+ ],
34
+ name='visualizer')
35
+ log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
36
+ log_level = 'INFO'
37
+ load_from = '/home/safouane/Downloads/benchmark_aircraft/mmdetection/configs/rtmdet/checkpoints/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth'
38
+ resume = False
39
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=500, val_interval=10)
40
+ val_cfg = dict(type='ValLoop')
41
+ test_cfg = dict(type='TestLoop')
42
+ param_scheduler = [
43
+ dict(
44
+ type='LinearLR', start_factor=1e-05, by_epoch=False, begin=0,
45
+ end=1000),
46
+ dict(
47
+ type='CosineAnnealingLR',
48
+ eta_min=0.0002,
49
+ begin=150,
50
+ end=300,
51
+ T_max=150,
52
+ by_epoch=True,
53
+ convert_to_iter_based=True),
54
+ ]
55
+ optim_wrapper = dict(
56
+ type='OptimWrapper',
57
+ optimizer=dict(type='AdamW', lr=0.004, weight_decay=0.05),
58
+ paramwise_cfg=dict(
59
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
60
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
61
+ train_pipeline = [
62
+ dict(type='LoadImageFromFile', backend_args=None),
63
+ dict(type='LoadAnnotations', with_bbox=True),
64
+ dict(
65
+ type='CachedMosaic',
66
+ img_scale=(
67
+ 640,
68
+ 640,
69
+ ),
70
+ pad_val=114.0,
71
+ max_cached_images=20,
72
+ random_pop=False),
73
+ dict(
74
+ type='RandomResize',
75
+ scale=(
76
+ 1280,
77
+ 1280,
78
+ ),
79
+ ratio_range=(
80
+ 0.5,
81
+ 2.0,
82
+ ),
83
+ keep_ratio=True),
84
+ dict(type='RandomCrop', crop_size=(
85
+ 640,
86
+ 640,
87
+ )),
88
+ dict(type='YOLOXHSVRandomAug'),
89
+ dict(type='RandomFlip', prob=0.5),
90
+ dict(type='Pad', size=(
91
+ 640,
92
+ 640,
93
+ ), pad_val=dict(img=(
94
+ 114,
95
+ 114,
96
+ 114,
97
+ ))),
98
+ dict(
99
+ type='CachedMixUp',
100
+ img_scale=(
101
+ 640,
102
+ 640,
103
+ ),
104
+ ratio_range=(
105
+ 1.0,
106
+ 1.0,
107
+ ),
108
+ max_cached_images=10,
109
+ random_pop=False,
110
+ pad_val=(
111
+ 114,
112
+ 114,
113
+ 114,
114
+ ),
115
+ prob=0.5),
116
+ dict(type='PackDetInputs'),
117
+ ]
118
+ test_pipeline = [
119
+ dict(type='LoadImageFromFile', backend_args=None),
120
+ dict(type='Resize', scale=(
121
+ 640,
122
+ 640,
123
+ ), keep_ratio=True),
124
+ dict(type='Pad', size=(
125
+ 640,
126
+ 640,
127
+ ), pad_val=dict(img=(
128
+ 114,
129
+ 114,
130
+ 114,
131
+ ))),
132
+ dict(type='LoadAnnotations', with_bbox=True),
133
+ dict(
134
+ type='PackDetInputs',
135
+ meta_keys=(
136
+ 'img_id',
137
+ 'img_path',
138
+ 'ori_shape',
139
+ 'img_shape',
140
+ 'scale_factor',
141
+ )),
142
+ ]
143
+ train_dataloader = dict(
144
+ batch_size=64,
145
+ num_workers=2,
146
+ persistent_workers=True,
147
+ sampler=dict(type='DefaultSampler', shuffle=True),
148
+ batch_sampler=None,
149
+ dataset=dict(
150
+ type='CocoDataset',
151
+ metainfo=dict(classes=('airplane', ), palette=[
152
+ (
153
+ 220,
154
+ 20,
155
+ 60,
156
+ ),
157
+ ]),
158
+ data_root='/home/safouane/Downloads/benchmark_aircraft/data/',
159
+ ann_file='train/__coco.json',
160
+ data_prefix=dict(img='train/'),
161
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
162
+ pipeline=[
163
+ dict(type='LoadImageFromFile', backend_args=None),
164
+ dict(type='LoadAnnotations', with_bbox=True),
165
+ dict(
166
+ type='CachedMosaic',
167
+ img_scale=(
168
+ 640,
169
+ 640,
170
+ ),
171
+ pad_val=114.0,
172
+ max_cached_images=20,
173
+ random_pop=False),
174
+ dict(
175
+ type='RandomResize',
176
+ scale=(
177
+ 1280,
178
+ 1280,
179
+ ),
180
+ ratio_range=(
181
+ 0.5,
182
+ 2.0,
183
+ ),
184
+ keep_ratio=True),
185
+ dict(type='RandomCrop', crop_size=(
186
+ 640,
187
+ 640,
188
+ )),
189
+ dict(type='YOLOXHSVRandomAug'),
190
+ dict(type='RandomFlip', prob=0.5),
191
+ dict(
192
+ type='Pad',
193
+ size=(
194
+ 640,
195
+ 640,
196
+ ),
197
+ pad_val=dict(img=(
198
+ 114,
199
+ 114,
200
+ 114,
201
+ ))),
202
+ dict(
203
+ type='CachedMixUp',
204
+ img_scale=(
205
+ 640,
206
+ 640,
207
+ ),
208
+ ratio_range=(
209
+ 1.0,
210
+ 1.0,
211
+ ),
212
+ max_cached_images=10,
213
+ random_pop=False,
214
+ pad_val=(
215
+ 114,
216
+ 114,
217
+ 114,
218
+ ),
219
+ prob=0.5),
220
+ dict(type='PackDetInputs'),
221
+ ],
222
+ backend_args=None),
223
+ pin_memory=True)
224
+ val_dataloader = dict(
225
+ batch_size=64,
226
+ num_workers=2,
227
+ persistent_workers=True,
228
+ drop_last=False,
229
+ sampler=dict(type='DefaultSampler', shuffle=False),
230
+ dataset=dict(
231
+ type='CocoDataset',
232
+ metainfo=dict(classes=('airplane', ), palette=[
233
+ (
234
+ 220,
235
+ 20,
236
+ 60,
237
+ ),
238
+ ]),
239
+ data_root='/home/safouane/Downloads/benchmark_aircraft/data/',
240
+ ann_file='val/__coco.json',
241
+ data_prefix=dict(img='val/'),
242
+ test_mode=True,
243
+ pipeline=[
244
+ dict(type='LoadImageFromFile', backend_args=None),
245
+ dict(type='Resize', scale=(
246
+ 640,
247
+ 640,
248
+ ), keep_ratio=True),
249
+ dict(
250
+ type='Pad',
251
+ size=(
252
+ 640,
253
+ 640,
254
+ ),
255
+ pad_val=dict(img=(
256
+ 114,
257
+ 114,
258
+ 114,
259
+ ))),
260
+ dict(type='LoadAnnotations', with_bbox=True),
261
+ dict(
262
+ type='PackDetInputs',
263
+ meta_keys=(
264
+ 'img_id',
265
+ 'img_path',
266
+ 'ori_shape',
267
+ 'img_shape',
268
+ 'scale_factor',
269
+ )),
270
+ ],
271
+ backend_args=None))
272
+ test_dataloader = dict(
273
+ batch_size=64,
274
+ num_workers=2,
275
+ persistent_workers=True,
276
+ drop_last=False,
277
+ sampler=dict(type='DefaultSampler', shuffle=False),
278
+ dataset=dict(
279
+ type='CocoDataset',
280
+ metainfo=dict(classes=('airplane', ), palette=[
281
+ (
282
+ 220,
283
+ 20,
284
+ 60,
285
+ ),
286
+ ]),
287
+ data_root='/home/safouane/Downloads/benchmark_aircraft/data/',
288
+ ann_file='test/__coco.json',
289
+ data_prefix=dict(img='test/'),
290
+ test_mode=True,
291
+ pipeline=[
292
+ dict(type='LoadImageFromFile', backend_args=None),
293
+ dict(type='Resize', scale=(
294
+ 640,
295
+ 640,
296
+ ), keep_ratio=True),
297
+ dict(
298
+ type='Pad',
299
+ size=(
300
+ 640,
301
+ 640,
302
+ ),
303
+ pad_val=dict(img=(
304
+ 114,
305
+ 114,
306
+ 114,
307
+ ))),
308
+ dict(type='LoadAnnotations', with_bbox=True),
309
+ dict(
310
+ type='PackDetInputs',
311
+ meta_keys=(
312
+ 'img_id',
313
+ 'img_path',
314
+ 'ori_shape',
315
+ 'img_shape',
316
+ 'scale_factor',
317
+ )),
318
+ ],
319
+ backend_args=None))
320
+ val_evaluator = dict(
321
+ type='CocoMetric',
322
+ ann_file='/home/safouane/Downloads/benchmark_aircraft/data/val/__coco.json',
323
+ metric='bbox',
324
+ format_only=False,
325
+ backend_args=None)
326
+ test_evaluator = dict(
327
+ type='CocoMetric',
328
+ ann_file=
329
+ '/home/safouane/Downloads/benchmark_aircraft/data/test/__coco.json',
330
+ metric='bbox',
331
+ format_only=False,
332
+ backend_args=None)
333
+ tta_model = dict(
334
+ type='DetTTAModel',
335
+ tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.6), max_per_img=100))
336
+ img_scales = [
337
+ (
338
+ 640,
339
+ 640,
340
+ ),
341
+ (
342
+ 320,
343
+ 320,
344
+ ),
345
+ (
346
+ 960,
347
+ 960,
348
+ ),
349
+ ]
350
+ tta_pipeline = [
351
+ dict(type='LoadImageFromFile', backend_args=None),
352
+ dict(
353
+ type='TestTimeAug',
354
+ transforms=[
355
+ [
356
+ dict(type='Resize', scale=(
357
+ 640,
358
+ 640,
359
+ ), keep_ratio=True),
360
+ dict(type='Resize', scale=(
361
+ 320,
362
+ 320,
363
+ ), keep_ratio=True),
364
+ dict(type='Resize', scale=(
365
+ 960,
366
+ 960,
367
+ ), keep_ratio=True),
368
+ ],
369
+ [
370
+ dict(type='RandomFlip', prob=1.0),
371
+ dict(type='RandomFlip', prob=0.0),
372
+ ],
373
+ [
374
+ dict(
375
+ type='Pad',
376
+ size=(
377
+ 960,
378
+ 960,
379
+ ),
380
+ pad_val=dict(img=(
381
+ 114,
382
+ 114,
383
+ 114,
384
+ ))),
385
+ ],
386
+ [
387
+ dict(type='LoadAnnotations', with_bbox=True),
388
+ ],
389
+ [
390
+ dict(
391
+ type='PackDetInputs',
392
+ meta_keys=(
393
+ 'img_id',
394
+ 'img_path',
395
+ 'ori_shape',
396
+ 'img_shape',
397
+ 'scale_factor',
398
+ 'flip',
399
+ 'flip_direction',
400
+ )),
401
+ ],
402
+ ]),
403
+ ]
404
+ model = dict(
405
+ type='RTMDet',
406
+ data_preprocessor=dict(
407
+ type='DetDataPreprocessor',
408
+ mean=[
409
+ 103.53,
410
+ 116.28,
411
+ 123.675,
412
+ ],
413
+ std=[
414
+ 57.375,
415
+ 57.12,
416
+ 58.395,
417
+ ],
418
+ bgr_to_rgb=False,
419
+ batch_augments=None),
420
+ backbone=dict(
421
+ type='CSPNeXt',
422
+ arch='P5',
423
+ expand_ratio=0.5,
424
+ deepen_factor=0.167,
425
+ widen_factor=0.375,
426
+ channel_attention=True,
427
+ norm_cfg=dict(type='SyncBN'),
428
+ act_cfg=dict(type='SiLU', inplace=True),
429
+ init_cfg=dict(
430
+ type='Pretrained',
431
+ prefix='backbone.',
432
+ checkpoint=
433
+ 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth'
434
+ )),
435
+ neck=dict(
436
+ type='CSPNeXtPAFPN',
437
+ in_channels=[
438
+ 96,
439
+ 192,
440
+ 384,
441
+ ],
442
+ out_channels=96,
443
+ num_csp_blocks=1,
444
+ expand_ratio=0.5,
445
+ norm_cfg=dict(type='SyncBN'),
446
+ act_cfg=dict(type='SiLU', inplace=True)),
447
+ bbox_head=dict(
448
+ type='RTMDetSepBNHead',
449
+ num_classes=1,
450
+ in_channels=96,
451
+ stacked_convs=2,
452
+ feat_channels=96,
453
+ anchor_generator=dict(
454
+ type='MlvlPointGenerator', offset=0, strides=[
455
+ 8,
456
+ 16,
457
+ 32,
458
+ ]),
459
+ bbox_coder=dict(type='DistancePointBBoxCoder'),
460
+ loss_cls=dict(
461
+ type='QualityFocalLoss',
462
+ use_sigmoid=True,
463
+ beta=2.0,
464
+ loss_weight=1.0),
465
+ loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
466
+ with_objectness=False,
467
+ exp_on_reg=False,
468
+ share_conv=True,
469
+ pred_kernel_size=1,
470
+ norm_cfg=dict(type='SyncBN'),
471
+ act_cfg=dict(type='SiLU', inplace=True)),
472
+ train_cfg=dict(
473
+ assigner=dict(type='DynamicSoftLabelAssigner', topk=13),
474
+ allowed_border=-1,
475
+ pos_weight=-1,
476
+ debug=False),
477
+ test_cfg=dict(
478
+ nms_pre=30000,
479
+ min_bbox_size=0,
480
+ score_thr=0.001,
481
+ nms=dict(type='nms', iou_threshold=0.65),
482
+ max_per_img=300))
483
+ train_pipeline_stage2 = [
484
+ dict(type='LoadImageFromFile', backend_args=None),
485
+ dict(type='LoadAnnotations', with_bbox=True),
486
+ dict(
487
+ type='RandomResize',
488
+ scale=(
489
+ 640,
490
+ 640,
491
+ ),
492
+ ratio_range=(
493
+ 0.5,
494
+ 2.0,
495
+ ),
496
+ keep_ratio=True),
497
+ dict(type='RandomCrop', crop_size=(
498
+ 640,
499
+ 640,
500
+ )),
501
+ dict(type='YOLOXHSVRandomAug'),
502
+ dict(type='RandomFlip', prob=0.5),
503
+ dict(type='Pad', size=(
504
+ 640,
505
+ 640,
506
+ ), pad_val=dict(img=(
507
+ 114,
508
+ 114,
509
+ 114,
510
+ ))),
511
+ dict(type='PackDetInputs'),
512
+ ]
513
+ stage2_num_epochs = 20
514
+ base_lr = 0.004
515
+ interval = 10
516
+ custom_hooks = [
517
+ dict(
518
+ type='EMAHook',
519
+ ema_type='ExpMomentumEMA',
520
+ momentum=0.0002,
521
+ update_buffers=True,
522
+ priority=49),
523
+ dict(
524
+ type='PipelineSwitchHook',
525
+ switch_epoch=280,
526
+ switch_pipeline=[
527
+ dict(type='LoadImageFromFile', backend_args=None),
528
+ dict(type='LoadAnnotations', with_bbox=True),
529
+ dict(
530
+ type='RandomResize',
531
+ scale=(
532
+ 640,
533
+ 640,
534
+ ),
535
+ ratio_range=(
536
+ 0.5,
537
+ 2.0,
538
+ ),
539
+ keep_ratio=True),
540
+ dict(type='RandomCrop', crop_size=(
541
+ 640,
542
+ 640,
543
+ )),
544
+ dict(type='YOLOXHSVRandomAug'),
545
+ dict(type='RandomFlip', prob=0.5),
546
+ dict(
547
+ type='Pad',
548
+ size=(
549
+ 640,
550
+ 640,
551
+ ),
552
+ pad_val=dict(img=(
553
+ 114,
554
+ 114,
555
+ 114,
556
+ ))),
557
+ dict(type='PackDetInputs'),
558
+ ]),
559
+ ]
560
+ checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth'
561
+ launcher = 'none'
562
+ work_dir = './work_dirs/rtmdet_tiny_8xb32-300e_coco'
inference/ssd_config.py ADDED
@@ -0,0 +1,450 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'CocoDataset'
2
+ data_root = '/home/safouane/Downloads/benchmark_aircraft/data/'
3
+ backend_args = None
4
+ max_epochs = 500
5
+ metainfo = dict(
6
+ classes=('airplane', ), palette=[
7
+ (
8
+ 0,
9
+ 0,
10
+ 255,
11
+ ),
12
+ ])
13
+ num_classes = 1
14
+ batch_size = 128
15
+ train_pipeline = [
16
+ dict(type='LoadImageFromFile'),
17
+ dict(type='LoadAnnotations', with_bbox=True),
18
+ dict(
19
+ type='Expand',
20
+ mean=[
21
+ 123.675,
22
+ 116.28,
23
+ 103.53,
24
+ ],
25
+ to_rgb=True,
26
+ ratio_range=(
27
+ 1,
28
+ 4,
29
+ )),
30
+ dict(
31
+ type='MinIoURandomCrop',
32
+ min_ious=(
33
+ 0.1,
34
+ 0.3,
35
+ 0.5,
36
+ 0.7,
37
+ 0.9,
38
+ ),
39
+ min_crop_size=0.3),
40
+ dict(type='Resize', scale=(
41
+ 320,
42
+ 320,
43
+ ), keep_ratio=False),
44
+ dict(type='RandomFlip', prob=0.5),
45
+ dict(
46
+ type='PhotoMetricDistortion',
47
+ brightness_delta=32,
48
+ contrast_range=(
49
+ 0.5,
50
+ 1.5,
51
+ ),
52
+ saturation_range=(
53
+ 0.5,
54
+ 1.5,
55
+ ),
56
+ hue_delta=18),
57
+ dict(type='PackDetInputs'),
58
+ ]
59
+ test_pipeline = [
60
+ dict(type='LoadImageFromFile'),
61
+ dict(type='Resize', scale=(
62
+ 320,
63
+ 320,
64
+ ), keep_ratio=False),
65
+ dict(type='LoadAnnotations', with_bbox=True),
66
+ dict(
67
+ type='PackDetInputs',
68
+ meta_keys=(
69
+ 'img_id',
70
+ 'img_path',
71
+ 'ori_shape',
72
+ 'img_shape',
73
+ 'scale_factor',
74
+ )),
75
+ ]
76
+ train_dataloader = dict(
77
+ batch_size=128,
78
+ num_workers=2,
79
+ persistent_workers=True,
80
+ sampler=dict(type='DefaultSampler', shuffle=True),
81
+ batch_sampler=None,
82
+ dataset=dict(
83
+ type='RepeatDataset',
84
+ times=5,
85
+ dataset=dict(
86
+ type='CocoDataset',
87
+ metainfo=dict(classes=('airplane', ), palette=[
88
+ (
89
+ 220,
90
+ 20,
91
+ 60,
92
+ ),
93
+ ]),
94
+ data_root='/home/safouane/Downloads/benchmark_aircraft/data/',
95
+ ann_file='train/__coco.json',
96
+ data_prefix=dict(img='train/'),
97
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
98
+ pipeline=[
99
+ dict(type='LoadImageFromFile'),
100
+ dict(type='LoadAnnotations', with_bbox=True),
101
+ dict(
102
+ type='Expand',
103
+ mean=[
104
+ 123.675,
105
+ 116.28,
106
+ 103.53,
107
+ ],
108
+ to_rgb=True,
109
+ ratio_range=(
110
+ 1,
111
+ 4,
112
+ )),
113
+ dict(
114
+ type='MinIoURandomCrop',
115
+ min_ious=(
116
+ 0.1,
117
+ 0.3,
118
+ 0.5,
119
+ 0.7,
120
+ 0.9,
121
+ ),
122
+ min_crop_size=0.3),
123
+ dict(type='Resize', scale=(
124
+ 320,
125
+ 320,
126
+ ), keep_ratio=False),
127
+ dict(type='RandomFlip', prob=0.5),
128
+ dict(
129
+ type='PhotoMetricDistortion',
130
+ brightness_delta=32,
131
+ contrast_range=(
132
+ 0.5,
133
+ 1.5,
134
+ ),
135
+ saturation_range=(
136
+ 0.5,
137
+ 1.5,
138
+ ),
139
+ hue_delta=18),
140
+ dict(type='PackDetInputs'),
141
+ ])))
142
+ val_dataloader = dict(
143
+ batch_size=128,
144
+ num_workers=2,
145
+ persistent_workers=True,
146
+ drop_last=False,
147
+ sampler=dict(type='DefaultSampler', shuffle=False),
148
+ dataset=dict(
149
+ type='CocoDataset',
150
+ metainfo=dict(classes=('airplane', ), palette=[
151
+ (
152
+ 220,
153
+ 20,
154
+ 60,
155
+ ),
156
+ ]),
157
+ data_root='/home/safouane/Downloads/benchmark_aircraft/data/',
158
+ ann_file='val/__coco.json',
159
+ data_prefix=dict(img='val/'),
160
+ test_mode=True,
161
+ pipeline=[
162
+ dict(type='LoadImageFromFile'),
163
+ dict(type='Resize', scale=(
164
+ 320,
165
+ 320,
166
+ ), keep_ratio=False),
167
+ dict(type='LoadAnnotations', with_bbox=True),
168
+ dict(
169
+ type='PackDetInputs',
170
+ meta_keys=(
171
+ 'img_id',
172
+ 'img_path',
173
+ 'ori_shape',
174
+ 'img_shape',
175
+ 'scale_factor',
176
+ )),
177
+ ],
178
+ backend_args=None))
179
+ test_dataloader = dict(
180
+ batch_size=128,
181
+ num_workers=2,
182
+ persistent_workers=True,
183
+ drop_last=False,
184
+ sampler=dict(type='DefaultSampler', shuffle=False),
185
+ dataset=dict(
186
+ type='CocoDataset',
187
+ metainfo=dict(classes=('airplane', ), palette=[
188
+ (
189
+ 220,
190
+ 20,
191
+ 60,
192
+ ),
193
+ ]),
194
+ data_root='/home/safouane/Downloads/benchmark_aircraft/data/',
195
+ ann_file='test/__coco.json',
196
+ data_prefix=dict(img='test/'),
197
+ test_mode=True,
198
+ pipeline=[
199
+ dict(type='LoadImageFromFile'),
200
+ dict(type='Resize', scale=(
201
+ 320,
202
+ 320,
203
+ ), keep_ratio=False),
204
+ dict(type='LoadAnnotations', with_bbox=True),
205
+ dict(
206
+ type='PackDetInputs',
207
+ meta_keys=(
208
+ 'img_id',
209
+ 'img_path',
210
+ 'ori_shape',
211
+ 'img_shape',
212
+ 'scale_factor',
213
+ )),
214
+ ],
215
+ backend_args=None))
216
+ val_evaluator = dict(
217
+ type='CocoMetric',
218
+ ann_file='/home/safouane/Downloads/benchmark_aircraft/data/val/__coco.json',
219
+ metric='bbox',
220
+ format_only=False,
221
+ backend_args=None)
222
+ test_evaluator = dict(
223
+ type='CocoMetric',
224
+ ann_file=
225
+ '/home/safouane/Downloads/benchmark_aircraft/data/test/__coco.json',
226
+ metric='bbox',
227
+ format_only=False,
228
+ backend_args=None)
229
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=500, val_interval=1)
230
+ val_cfg = dict(type='ValLoop')
231
+ test_cfg = dict(type='TestLoop')
232
+ param_scheduler = [
233
+ dict(
234
+ type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
235
+ dict(
236
+ type='CosineAnnealingLR',
237
+ begin=0,
238
+ T_max=120,
239
+ end=120,
240
+ by_epoch=True,
241
+ eta_min=0),
242
+ ]
243
+ optim_wrapper = dict(
244
+ type='OptimWrapper',
245
+ optimizer=dict(type='SGD', lr=0.015, momentum=0.9, weight_decay=4e-05))
246
+ auto_scale_lr = dict(enable=False, base_batch_size=64)
247
+ default_scope = 'mmdet'
248
+ default_hooks = dict(
249
+ timer=dict(type='IterTimerHook'),
250
+ logger=dict(type='LoggerHook', interval=50),
251
+ param_scheduler=dict(type='ParamSchedulerHook'),
252
+ checkpoint=dict(type='CheckpointHook', interval=20, save_best='auto'),
253
+ sampler_seed=dict(type='DistSamplerSeedHook'),
254
+ visualization=dict(type='DetVisualizationHook'))
255
+ env_cfg = dict(
256
+ cudnn_benchmark=True,
257
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
258
+ dist_cfg=dict(backend='nccl'))
259
+ vis_backends = [
260
+ dict(type='LocalVisBackend'),
261
+ ]
262
+ visualizer = dict(
263
+ type='DetLocalVisualizer',
264
+ vis_backends=[
265
+ dict(type='LocalVisBackend'),
266
+ dict(type='TensorboardVisBackend'),
267
+ ],
268
+ name='visualizer')
269
+ log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
270
+ log_level = 'INFO'
271
+ load_from = '/home/safouane/Downloads/benchmark_aircraft/mmdetection/configs/ssd/checkpoints/ssdlite_mobilenetv2_scratch_600e_coco_20210629_110627-974d9307.pth'
272
+ resume = False
273
+ data_preprocessor = dict(
274
+ type='DetDataPreprocessor',
275
+ mean=[
276
+ 123.675,
277
+ 116.28,
278
+ 103.53,
279
+ ],
280
+ std=[
281
+ 58.395,
282
+ 57.12,
283
+ 57.375,
284
+ ],
285
+ bgr_to_rgb=True,
286
+ pad_size_divisor=1)
287
+ model = dict(
288
+ type='SingleStageDetector',
289
+ data_preprocessor=dict(
290
+ type='DetDataPreprocessor',
291
+ mean=[
292
+ 123.675,
293
+ 116.28,
294
+ 103.53,
295
+ ],
296
+ std=[
297
+ 58.395,
298
+ 57.12,
299
+ 57.375,
300
+ ],
301
+ bgr_to_rgb=True,
302
+ pad_size_divisor=1),
303
+ backbone=dict(
304
+ type='MobileNetV2',
305
+ out_indices=(
306
+ 4,
307
+ 7,
308
+ ),
309
+ norm_cfg=dict(type='BN', eps=0.001, momentum=0.03),
310
+ init_cfg=dict(type='TruncNormal', layer='Conv2d', std=0.03)),
311
+ neck=dict(
312
+ type='SSDNeck',
313
+ in_channels=(
314
+ 96,
315
+ 1280,
316
+ ),
317
+ out_channels=(
318
+ 96,
319
+ 1280,
320
+ 512,
321
+ 256,
322
+ 256,
323
+ 128,
324
+ ),
325
+ level_strides=(
326
+ 2,
327
+ 2,
328
+ 2,
329
+ 2,
330
+ ),
331
+ level_paddings=(
332
+ 1,
333
+ 1,
334
+ 1,
335
+ 1,
336
+ ),
337
+ l2_norm_scale=None,
338
+ use_depthwise=True,
339
+ norm_cfg=dict(type='BN', eps=0.001, momentum=0.03),
340
+ act_cfg=dict(type='ReLU6'),
341
+ init_cfg=dict(type='TruncNormal', layer='Conv2d', std=0.03)),
342
+ bbox_head=dict(
343
+ type='SSDHead',
344
+ in_channels=(
345
+ 96,
346
+ 1280,
347
+ 512,
348
+ 256,
349
+ 256,
350
+ 128,
351
+ ),
352
+ num_classes=1,
353
+ use_depthwise=True,
354
+ norm_cfg=dict(type='BN', eps=0.001, momentum=0.03),
355
+ act_cfg=dict(type='ReLU6'),
356
+ init_cfg=dict(type='Normal', layer='Conv2d', std=0.001),
357
+ anchor_generator=dict(
358
+ type='SSDAnchorGenerator',
359
+ scale_major=False,
360
+ strides=[
361
+ 16,
362
+ 32,
363
+ 64,
364
+ 107,
365
+ 160,
366
+ 320,
367
+ ],
368
+ ratios=[
369
+ [
370
+ 2,
371
+ 3,
372
+ ],
373
+ [
374
+ 2,
375
+ 3,
376
+ ],
377
+ [
378
+ 2,
379
+ 3,
380
+ ],
381
+ [
382
+ 2,
383
+ 3,
384
+ ],
385
+ [
386
+ 2,
387
+ 3,
388
+ ],
389
+ [
390
+ 2,
391
+ 3,
392
+ ],
393
+ ],
394
+ min_sizes=[
395
+ 48,
396
+ 100,
397
+ 150,
398
+ 202,
399
+ 253,
400
+ 304,
401
+ ],
402
+ max_sizes=[
403
+ 100,
404
+ 150,
405
+ 202,
406
+ 253,
407
+ 304,
408
+ 320,
409
+ ]),
410
+ bbox_coder=dict(
411
+ type='DeltaXYWHBBoxCoder',
412
+ target_means=[
413
+ 0.0,
414
+ 0.0,
415
+ 0.0,
416
+ 0.0,
417
+ ],
418
+ target_stds=[
419
+ 0.1,
420
+ 0.1,
421
+ 0.2,
422
+ 0.2,
423
+ ])),
424
+ train_cfg=dict(
425
+ assigner=dict(
426
+ type='MaxIoUAssigner',
427
+ pos_iou_thr=0.5,
428
+ neg_iou_thr=0.5,
429
+ min_pos_iou=0.0,
430
+ ignore_iof_thr=-1,
431
+ gt_max_assign_all=False),
432
+ sampler=dict(type='PseudoSampler'),
433
+ smoothl1_beta=1.0,
434
+ allowed_border=-1,
435
+ pos_weight=-1,
436
+ neg_pos_ratio=3,
437
+ debug=False),
438
+ test_cfg=dict(
439
+ nms_pre=1000,
440
+ nms=dict(type='nms', iou_threshold=0.45),
441
+ min_bbox_size=0,
442
+ score_thr=0.02,
443
+ max_per_img=200))
444
+ input_size = 320
445
+ custom_hooks = [
446
+ dict(type='NumClassCheckHook'),
447
+ dict(type='CheckInvalidLossHook', interval=50, priority='VERY_LOW'),
448
+ ]
449
+ launcher = 'none'
450
+ work_dir = './work_dirs/ssdlite_mobilenetv2-scratch_8xb24-600e_coco'
requirements.txt ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==1.3.0
2
+ addict==2.4.0
3
+ aiofiles==23.2.1
4
+ aiohttp==3.8.3
5
+ aiosignal==1.3.1
6
+ aliyun-python-sdk-core==2.15.0
7
+ aliyun-python-sdk-kms==2.16.2
8
+ altair==5.3.0
9
+ annotated-types==0.6.0
10
+ anyio==4.3.0
11
+ apispec==6.0.2
12
+ apispec-webframeworks==0.5.2
13
+ asttokens==2.1.0
14
+ astunparse==1.6.3
15
+ async-timeout==4.0.2
16
+ attrs==22.2.0
17
+ backcall==0.2.0
18
+ bidict==0.22.1
19
+ bleach==4.1.0
20
+ blessed==1.20.0
21
+ blis==0.7.9
22
+ Brotli @ file:///tmp/abs_ecyw11_7ze/croots/recipe/brotli-split_1659616059936/work
23
+ cachelib==0.10.2
24
+ cachetools==5.2.0
25
+ catalogue==2.0.8
26
+ certifi @ file:///croot/certifi_1707229174982/work/certifi
27
+ cffi==1.15.1
28
+ charset-normalizer==2.1.1
29
+ click==8.1.7
30
+ cmake==3.27.1
31
+ colorama==0.4.6
32
+ confection==0.0.3
33
+ contourpy==1.0.6
34
+ crcmod==1.7
35
+ cryptography==42.0.5
36
+ cycler==0.12.1
37
+ cymem==2.0.7
38
+ debugpy==1.6.3
39
+ decorator==5.1.1
40
+ dill==0.3.8
41
+ dnspython==2.2.1
42
+ entrypoints==0.4
43
+ etils==0.9.0
44
+ eventlet==0.33.3
45
+ exceptiongroup==1.2.0
46
+ executing==1.2.0
47
+ fastai==2.7.10
48
+ fastapi==0.110.1
49
+ fastcore==1.5.27
50
+ fastdownload==0.0.7
51
+ fastprogress==1.0.3
52
+ ffmpy==0.3.2
53
+ filelock==3.12.2
54
+ Flask==2.2.3
55
+ flask-cloudflared==0.0.10
56
+ flask-ngrok==0.0.25
57
+ Flask-Session==0.4.0
58
+ Flask-SocketIO==5.3.2
59
+ fonttools==4.38.0
60
+ frozenlist==1.3.3
61
+ fsspec==2023.6.0
62
+ gitdb==4.0.10
63
+ GitPython==3.1.31
64
+ gmpy2 @ file:///tmp/build/80754af9/gmpy2_1645455532332/work
65
+ google-pasta==0.2.0
66
+ googleapis-common-protos==1.57.0
67
+ gpustat==1.1
68
+ gradio==4.26.0
69
+ gradio_client==0.15.1
70
+ grpcio==1.50.0
71
+ h11==0.14.0
72
+ h5py==2.10.0
73
+ httpcore==1.0.5
74
+ httpx==0.27.0
75
+ huggingface-hub==0.22.2
76
+ HyperPyYAML==1.2.1
77
+ idna @ file:///croot/idna_1666125576474/work
78
+ importlib-resources==5.10.0
79
+ importlib_metadata==7.1.0
80
+ ipykernel==6.17.1
81
+ ipython==8.6.0
82
+ itsdangerous==2.1.2
83
+ jedi==0.18.2
84
+ Jinja2==3.1.2
85
+ jmespath==0.10.0
86
+ joblib==1.2.0
87
+ jsonschema==4.21.1
88
+ jsonschema-specifications==2023.12.1
89
+ jupyter_client==7.4.7
90
+ jupyter_core==5.7.2
91
+ kiwisolver==1.4.4
92
+ langcodes==3.3.0
93
+ libclang==14.0.6
94
+ lit==16.0.6
95
+ loguru==0.6.0
96
+ lupa==1.10
97
+ Markdown==3.4.1
98
+ markdown-it-py==3.0.0
99
+ MarkupSafe==2.1.1
100
+ marshmallow==3.19.0
101
+ matplotlib==3.7.5
102
+ matplotlib-inline==0.1.6
103
+ mdurl==0.1.2
104
+ mkl-fft @ file:///croot/mkl_fft_1695058164594/work
105
+ mkl-random @ file:///croot/mkl_random_1695059800811/work
106
+ mkl-service==2.4.0
107
+ mkultra==0.1
108
+ mmcv==2.1.0
109
+ -e git+https://github.com/open-mmlab/mmdetection.git@cfd5d3a985b0249de009b67d04f37263e11cdf3d#egg=mmdet
110
+ mmengine==0.10.3
111
+ model-index==0.1.11
112
+ monai==1.1.0
113
+ mpmath==1.3.0
114
+ multidict==6.0.4
115
+ multiprocess==0.70.15
116
+ murmurhash==1.0.9
117
+ nest-asyncio==1.5.6
118
+ networkx==3.0
119
+ nibabel==5.0.0
120
+ numpy @ file:///work/mkl/numpy_and_numpy_base_1682953417311/work
121
+ nvidia-cublas-cu11==11.10.3.66
122
+ nvidia-cublas-cu12==12.1.3.1
123
+ nvidia-cuda-cupti-cu11==11.7.101
124
+ nvidia-cuda-cupti-cu12==12.1.105
125
+ nvidia-cuda-nvrtc-cu11==11.7.99
126
+ nvidia-cuda-nvrtc-cu12==12.1.105
127
+ nvidia-cuda-runtime-cu11==11.7.99
128
+ nvidia-cuda-runtime-cu12==12.1.105
129
+ nvidia-cudnn-cu11==8.5.0.96
130
+ nvidia-cudnn-cu12==8.9.2.26
131
+ nvidia-cufft-cu11==10.9.0.58
132
+ nvidia-cufft-cu12==11.0.2.54
133
+ nvidia-curand-cu11==10.2.10.91
134
+ nvidia-curand-cu12==10.3.2.106
135
+ nvidia-cusolver-cu11==11.4.0.1
136
+ nvidia-cusolver-cu12==11.4.5.107
137
+ nvidia-cusparse-cu11==11.7.4.91
138
+ nvidia-cusparse-cu12==12.1.0.106
139
+ nvidia-ml-py==12.535.77
140
+ nvidia-nccl-cu11==2.14.3
141
+ nvidia-nccl-cu12==2.19.3
142
+ nvidia-nvjitlink-cu12==12.4.127
143
+ nvidia-nvtx-cu11==11.7.91
144
+ nvidia-nvtx-cu12==12.1.105
145
+ opencv-python==4.9.0.80
146
+ opendatalab==0.0.10
147
+ openmim==0.3.9
148
+ openxlab==0.0.38
149
+ opt-einsum==3.3.0
150
+ ordered-set==4.1.0
151
+ orjson==3.10.0
152
+ oss2==2.17.0
153
+ packaging==24.0
154
+ pandas==2.0.3
155
+ parso==0.8.3
156
+ pathy==0.10.0
157
+ pexpect==4.9.0
158
+ pickleshare==0.7.5
159
+ pillow==10.3.0
160
+ pkgutil_resolve_name==1.3.10
161
+ platformdirs==4.2.0
162
+ preshed==3.0.8
163
+ progress==1.6
164
+ promise==2.3
165
+ prompt-toolkit==3.0.33
166
+ psutil==5.9.4
167
+ ptyprocess==0.7.0
168
+ pure-eval==0.2.2
169
+ py-cpuinfo==9.0.0
170
+ pyarrow==12.0.1
171
+ pyasn1==0.4.8
172
+ pyasn1-modules==0.2.8
173
+ pycocotools==2.0.7
174
+ pycparser==2.21
175
+ pycryptodome==3.20.0
176
+ pydantic==2.7.0
177
+ pydantic_core==2.18.1
178
+ pyDeprecate==0.3.1
179
+ pydot==1.4.2
180
+ pydub==0.25.1
181
+ Pygments==2.13.0
182
+ pyparsing==3.1.2
183
+ PySocks @ file:///tmp/build/80754af9/pysocks_1605305779399/work
184
+ python-dateutil==2.8.2
185
+ python-engineio==4.3.4
186
+ python-multipart==0.0.9
187
+ python-socketio==5.7.2
188
+ pytz==2023.4
189
+ PyWavelets==1.4.1
190
+ PyYAML==6.0.1
191
+ pyzmq==24.0.1
192
+ referencing==0.34.0
193
+ regex==2022.10.31
194
+ requests==2.28.2
195
+ requests-oauthlib==1.3.1
196
+ rich==13.4.2
197
+ rpds-py==0.18.0
198
+ rsa==4.9
199
+ ruamel.yaml==0.17.28
200
+ ruamel.yaml.clib==0.2.7
201
+ ruff==0.3.7
202
+ runstats==2.0.0
203
+ safetensors==0.3.2
204
+ scikit-learn==1.1.3
205
+ scipy==1.10.1
206
+ seaborn==0.12.2
207
+ semantic-version==2.10.0
208
+ sentencepiece==0.1.97
209
+ shapely==2.0.3
210
+ shellingham==1.5.4
211
+ six==1.16.0
212
+ smart-open==5.2.1
213
+ smmap==5.0.0
214
+ sniffio==1.3.1
215
+ soundfile==0.12.1
216
+ spacy==3.4.3
217
+ spacy-legacy==3.0.10
218
+ spacy-loggers==1.0.3
219
+ speechbrain==0.5.15
220
+ srsly==2.4.5
221
+ stack-data==0.6.1
222
+ starlette==0.37.2
223
+ sympy==1.12
224
+ tabulate==0.9.0
225
+ tensorboard-plugin-wit==1.8.1
226
+ tensorflow-datasets==4.7.0
227
+ tensorflow-examples===e2510e7de8354ea89c54ab376ce52371efb39eff-
228
+ tensorflow-hub==0.12.0
229
+ tensorflow-io-gcs-filesystem==0.28.0
230
+ tensorflow-metadata==1.11.0
231
+ termcolor==2.1.1
232
+ terminaltables==3.1.10
233
+ thinc==8.1.5
234
+ thop==0.1.1.post2209072238
235
+ threadpoolctl==3.1.0
236
+ toml==0.10.2
237
+ tomli==2.0.1
238
+ tomlkit==0.12.0
239
+ toolz==0.12.1
240
+ torch==2.0.1
241
+ torch-tb-profiler==0.4.1
242
+ torchaudio==2.0.2
243
+ torchvision==0.15.2
244
+ tornado==6.2
245
+ tqdm==4.65.2
246
+ traitlets==5.14.2
247
+ triton==2.0.0
248
+ typer==0.12.3
249
+ typing_extensions==4.11.0
250
+ tzdata==2023.3
251
+ ultralytics==8.1.47
252
+ urllib3==1.26.18
253
+ uvicorn==0.29.0
254
+ wasabi==0.10.1
255
+ wcwidth==0.2.13
256
+ webencodings==0.5.1
257
+ websockets==11.0.3
258
+ Werkzeug==2.2.2
259
+ xxhash==3.3.0
260
+ yapf==0.40.2
261
+ yarl==1.8.2
262
+ zipp==3.10.0
utils.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, ast
2
+ from glob import glob
3
+ from PIL import ImageFont, ImageDraw, Image
4
+
5
+ def process_txtfile(filename):
6
+ """
7
+ Read txt annotations files (designed for YOLO xywh format)
8
+
9
+ Parameters:
10
+ filename(str): path of the txt annotation file.
11
+
12
+ Returns:
13
+ segments: list of bboxes in format xmin, ymin, xmax, ymax (as image ratio)
14
+ confs: list of confidences of the bboxes object detection
15
+ """
16
+ segments = []
17
+ confs = []
18
+ with open(filename, 'r') as file:
19
+ for line in file:
20
+ # print(line)
21
+ line = line.strip().split(' ')
22
+ cls = int(line[0])
23
+ conf = line[5]
24
+ x, y, w, h = map(float, line[1:5])
25
+ x_min = x - (w / 2)
26
+ y_min = y - (h / 2)
27
+ x_max = x + (w / 2)
28
+ y_max = y + (h / 2)
29
+ segment = [x_min, y_min, x_max, y_max]
30
+ segments.append(segment)
31
+ confs.append(conf)
32
+
33
+ return segments, confs
34
+
35
+ def process_jsonfile(filename):
36
+ """
37
+ Read json annotations files (designed for mmdetect dict format)
38
+
39
+ Parameters:
40
+ filename(str): path of the json annotation file.
41
+
42
+ Returns:
43
+ segments: bboxes in format xmin, ymin, xmax, ymax (as px coordinates)
44
+ confs: list of confidences of the bboxes object detection
45
+ """
46
+ with open(filename, 'r') as file:
47
+ line = file.readline().strip()
48
+ dic = ast.literal_eval(line)
49
+ segments = dic['bboxes']
50
+ confs = dic['scores']
51
+ # labels = dic['labels']
52
+
53
+ return segments, confs
54
+
55
+ def lerp_color(color1, color2, t):
56
+ """
57
+ Linearly interpolate between two RGB colors.
58
+
59
+ Parameters:
60
+ color1 (tuple): RGB tuple of the first color.
61
+ color2 (tuple): RGB tuple of the second color.
62
+ t (float): Interpolation factor between 0 and 1.
63
+
64
+ Returns:
65
+ tuple: Interpolated RGB color tuple.
66
+ """
67
+ r = int(color1[0] + (color2[0] - color1[0]) * t)
68
+ g = int(color1[1] + (color2[1] - color1[1]) * t)
69
+ b = int(color1[2] + (color2[2] - color1[2]) * t)
70
+ return r, g, b
71
+
72
+ def generate_color_palette(start_color, end_color, steps):
73
+ """
74
+ Generate an RGB color palette between two colors.
75
+
76
+ Parameters:
77
+ start_color (tuple): RGB tuple of the starting color.
78
+ end_color (tuple): RGB tuple of the ending color.
79
+ steps (int): Number of steps between the two colors.
80
+
81
+ Returns:
82
+ list: List of RGB tuples
83
+ """
84
+ palette = []
85
+ for i in range(steps):
86
+ t = i / (steps - 1) # interpolation factor
87
+ color = lerp_color(start_color, end_color, t)
88
+ palette.append(color)
89
+
90
+ return palette
91
+
92
+ def draw_bbox(model_name, results_folder="./inference/results/", image_path="inptest.jpg"):
93
+ """
94
+ Draw bounding boxes from mmdetect or yolo formats
95
+ """
96
+
97
+ # annotations style
98
+ txt_color=(255, 255, 255)
99
+ yellow=(255, 255, 128)
100
+ black = (0, 0, 0)
101
+ steps = 11 # Step : 5%
102
+ # (255, 0, 0) # Red
103
+ # (0, 0, 255) # Blue
104
+ palette = generate_color_palette((255, 0, 0), (0, 0, 255), steps)
105
+ lw = 9
106
+ font = ImageFont.truetype(font="Pillow/Tests/fonts/FreeMono.ttf", size=48)
107
+
108
+ im = Image.open(image_path)
109
+ width, height = im.size
110
+ imdraw = ImageDraw.Draw(im)
111
+
112
+ exps = sorted(glob(f"inference/results/{model_name}_inference/*", recursive = True))
113
+ # print(exps)
114
+ if model_name[:4] == "yolo":
115
+ annot_file = glob(f"{exps[-1]}/labels/" + "*.txt")[0]
116
+ segments, confs = process_txtfile(annot_file)
117
+ else:
118
+ annot_file = glob(f"{exps[1]}/{image_path[:-4]}.json")[0]
119
+ segments, confs = process_jsonfile(annot_file)
120
+ # print("Result bboxes : " + annot_file)
121
+
122
+ for conf, box in zip(confs, segments):
123
+ conf_r = round(float(conf), 3) # round conf
124
+
125
+ if conf_r >= 0.5: # 0.5 threshold
126
+ bbox_c = palette[1] #
127
+ if conf_r <= 1.0: bbox_c = palette[-1]
128
+ if conf_r < 0.95: bbox_c = palette[-2]
129
+ if conf_r < 0.90: bbox_c = palette[-3]
130
+ if conf_r < 0.85: bbox_c = palette[-4]
131
+ if conf_r < 0.80: bbox_c = palette[-5]
132
+ if conf_r < 0.75: bbox_c = palette[-6]
133
+ if conf_r < 0.70: bbox_c = palette[-7]
134
+ if conf_r < 0.65: bbox_c = palette[-8]
135
+ if conf_r < 0.60: bbox_c = palette[-9]
136
+ if conf_r < 0.55: bbox_c = palette[-10]
137
+
138
+ if model_name[:4] == "yolo":
139
+ box = [box[0]*width, box[1]*height, box[2]*width, box[3]*height]
140
+ imdraw.rectangle(box, width=lw, outline=bbox_c) # box
141
+
142
+ # label
143
+ w, h = font.getbbox(str(conf_r))[2:4] # text w, h
144
+ imdraw.rectangle([box[0], box[1]-h, box[0]+w+1, box[1]+1], width=3, fill = black) # box
145
+ imdraw.text([box[0], box[1]-h], str(conf_r), fill=yellow, font=font)
146
+
147
+ im.save(f"{results_folder}{model_name}_inference/clean.jpg")
148
+
149
+ # count
150
+ count = len([i for i in confs if float(i) > 0.5])
151
+
152
+ return im, count
153
+