Spaces:

rockeycoss
/

Prompt-Segment-Anything-Demo

Runtime error

Prompt-Segment-Anything-Demo / projects /configs /focalnet_dino /focalnet-l-dino_sam-vit-h.py

RockeyCoss

add code files”

51f6859 over 1 year ago

4.95 kB

	_base_ = [
	'../_base_/datasets/coco_panoptic.py', '../_base_/default_runtime.py'
	]

	plugin = True
	plugin_dir = 'projects/instance_segment_anything/'

	model = dict(
	type='DetWrapperInstanceSAM',
	det_wrapper_type='focalnet_dino',
	det_wrapper_cfg=dict(num_classes=91,
	param_dict_type='default',
	ddetr_lr_param=False,
	onecyclelr=False,
	modelname='dino',
	frozen_weights=None,
	backbone='focalnet_L_384_22k_fl4',
	focal_levels=4,
	focal_windows=3,
	use_checkpoint=False,
	dilation=False,
	position_embedding='sine',
	pe_temperatureH=20,
	pe_temperatureW=20,
	return_interm_indices=[0, 1, 2, 3],
	backbone_freeze_keywords=None,
	enc_layers=6,
	dec_layers=6,
	unic_layers=0,
	pre_norm=False,
	dim_feedforward=2048,
	hidden_dim=256,
	dropout=0.0,
	nheads=8,
	num_queries=900,
	query_dim=4,
	num_patterns=0,
	pdetr3_bbox_embed_diff_each_layer=False,
	pdetr3_refHW=-1,
	random_refpoints_xy=False,
	fix_refpoints_hw=-1,
	dabdetr_yolo_like_anchor_update=False,
	dabdetr_deformable_encoder=False,
	dabdetr_deformable_decoder=False,
	use_deformable_box_attn=False,
	box_attn_type='roi_align',
	dec_layer_number=None,
	num_feature_levels=5,
	enc_n_points=4,
	dec_n_points=4,
	decoder_layer_noise=False,
	dln_xy_noise=0.2,
	dln_hw_noise=0.2,
	add_channel_attention=False,
	add_pos_value=False,
	two_stage_type='standard',
	two_stage_pat_embed=0,
	two_stage_add_query_num=0,
	two_stage_bbox_embed_share=False,
	two_stage_class_embed_share=False,
	two_stage_learn_wh=False,
	two_stage_default_hw=0.05,
	two_stage_keep_all_tokens=False,
	num_select=300,
	transformer_activation='relu',
	batch_norm_type='FrozenBatchNorm2d',
	masks=False,
	aux_loss=True,
	set_cost_class=2.0,
	set_cost_bbox=5.0,
	set_cost_giou=2.0,
	no_interm_box_loss=False,
	focal_alpha=0.25,
	decoder_sa_type='sa', # ['sa', 'ca_label', 'ca_content']
	matcher_type='HungarianMatcher', # or SimpleMinsumMatcher
	decoder_module_seq=['sa', 'ca', 'ffn'],
	nms_iou_threshold=-1,
	dec_pred_bbox_embed_share=True,
	dec_pred_class_embed_share=True,
	use_dn=False,
	dn_number=100,
	dn_box_noise_scale=0.4,
	dn_label_noise_ratio=0.5,
	embed_init_tgt=True,
	dn_labelbook_size=91,
	match_unstable_error=True,
	# for ema
	use_ema=False,
	ema_decay=0.9997,
	ema_epoch=0,
	use_detached_boxes_dec_out=False),
	det_model_ckpt='ckpt/focalnet_l_dino.pth',
	num_classes=80,
	model_type='vit_h',
	sam_checkpoint='ckpt/sam_vit_h_4b8939.pth',
	use_sam_iou=True,
	)
	img_norm_cfg = dict(
	mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
	# test_pipeline, NOTE the Pad's size_divisor is different from the default
	# setting (size_divisor=32). While there is little effect on the performance
	# whether we use the default setting or use size_divisor=1.

	test_pipeline = [
	dict(type='LoadImageFromFile'),
	dict(
	type='MultiScaleFlipAug',
	img_scale=(1333, 800),
	flip=False,
	transforms=[
	dict(type='Resize', keep_ratio=True),
	dict(type='RandomFlip'),
	dict(type='Normalize', **img_norm_cfg),
	dict(type='Pad', size_divisor=1),
	dict(type='ImageToTensor', keys=['img']),
	dict(type='Collect', keys=['img'])
	])
	]

	dataset_type = 'CocoDataset'
	data_root = 'data/coco/'

	data = dict(
	samples_per_gpu=1,
	workers_per_gpu=1,
	test=dict(
	type=dataset_type,
	ann_file=data_root + 'annotations/instances_val2017.json',
	img_prefix=data_root + 'val2017/',
	pipeline=test_pipeline))