Spaces:
Runtime error
Runtime error
_base_ = [ | |
'../_base_/datasets/coco_panoptic.py', '../_base_/default_runtime.py' | |
] | |
plugin = True | |
plugin_dir = 'projects/instance_segment_anything/' | |
model = dict( | |
type='DetWrapperInstanceSAM', | |
det_wrapper_type='focalnet_dino', | |
det_wrapper_cfg=dict(num_classes=91, | |
param_dict_type='default', | |
ddetr_lr_param=False, | |
onecyclelr=False, | |
modelname='dino', | |
frozen_weights=None, | |
backbone='focalnet_L_384_22k_fl4', | |
focal_levels=4, | |
focal_windows=3, | |
use_checkpoint=False, | |
dilation=False, | |
position_embedding='sine', | |
pe_temperatureH=20, | |
pe_temperatureW=20, | |
return_interm_indices=[0, 1, 2, 3], | |
backbone_freeze_keywords=None, | |
enc_layers=6, | |
dec_layers=6, | |
unic_layers=0, | |
pre_norm=False, | |
dim_feedforward=2048, | |
hidden_dim=256, | |
dropout=0.0, | |
nheads=8, | |
num_queries=900, | |
query_dim=4, | |
num_patterns=0, | |
pdetr3_bbox_embed_diff_each_layer=False, | |
pdetr3_refHW=-1, | |
random_refpoints_xy=False, | |
fix_refpoints_hw=-1, | |
dabdetr_yolo_like_anchor_update=False, | |
dabdetr_deformable_encoder=False, | |
dabdetr_deformable_decoder=False, | |
use_deformable_box_attn=False, | |
box_attn_type='roi_align', | |
dec_layer_number=None, | |
num_feature_levels=5, | |
enc_n_points=4, | |
dec_n_points=4, | |
decoder_layer_noise=False, | |
dln_xy_noise=0.2, | |
dln_hw_noise=0.2, | |
add_channel_attention=False, | |
add_pos_value=False, | |
two_stage_type='standard', | |
two_stage_pat_embed=0, | |
two_stage_add_query_num=0, | |
two_stage_bbox_embed_share=False, | |
two_stage_class_embed_share=False, | |
two_stage_learn_wh=False, | |
two_stage_default_hw=0.05, | |
two_stage_keep_all_tokens=False, | |
num_select=300, | |
transformer_activation='relu', | |
batch_norm_type='FrozenBatchNorm2d', | |
masks=False, | |
aux_loss=True, | |
set_cost_class=2.0, | |
set_cost_bbox=5.0, | |
set_cost_giou=2.0, | |
no_interm_box_loss=False, | |
focal_alpha=0.25, | |
decoder_sa_type='sa', # ['sa', 'ca_label', 'ca_content'] | |
matcher_type='HungarianMatcher', # or SimpleMinsumMatcher | |
decoder_module_seq=['sa', 'ca', 'ffn'], | |
nms_iou_threshold=-1, | |
dec_pred_bbox_embed_share=True, | |
dec_pred_class_embed_share=True, | |
use_dn=False, | |
dn_number=100, | |
dn_box_noise_scale=0.4, | |
dn_label_noise_ratio=0.5, | |
embed_init_tgt=True, | |
dn_labelbook_size=91, | |
match_unstable_error=True, | |
# for ema | |
use_ema=False, | |
ema_decay=0.9997, | |
ema_epoch=0, | |
use_detached_boxes_dec_out=False), | |
det_model_ckpt='ckpt/focalnet_l_dino.pth', | |
num_classes=80, | |
model_type='vit_h', | |
sam_checkpoint='ckpt/sam_vit_h_4b8939.pth', | |
use_sam_iou=True, | |
) | |
img_norm_cfg = dict( | |
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |
# test_pipeline, NOTE the Pad's size_divisor is different from the default | |
# setting (size_divisor=32). While there is little effect on the performance | |
# whether we use the default setting or use size_divisor=1. | |
test_pipeline = [ | |
dict(type='LoadImageFromFile'), | |
dict( | |
type='MultiScaleFlipAug', | |
img_scale=(1333, 800), | |
flip=False, | |
transforms=[ | |
dict(type='Resize', keep_ratio=True), | |
dict(type='RandomFlip'), | |
dict(type='Normalize', **img_norm_cfg), | |
dict(type='Pad', size_divisor=1), | |
dict(type='ImageToTensor', keys=['img']), | |
dict(type='Collect', keys=['img']) | |
]) | |
] | |
dataset_type = 'CocoDataset' | |
data_root = 'data/coco/' | |
data = dict( | |
samples_per_gpu=1, | |
workers_per_gpu=1, | |
test=dict( | |
type=dataset_type, | |
ann_file=data_root + 'annotations/instances_val2017.json', | |
img_prefix=data_root + 'val2017/', | |
pipeline=test_pipeline)) | |