|
|
|
import os |
|
import json |
|
import argparse |
|
import os.path as osp |
|
|
|
import torch |
|
import torch.nn.functional as F |
|
from mmengine.config import Config, DictAction |
|
from mmengine.runner import Runner |
|
from mmengine.dataset import Compose |
|
from mmyolo.registry import RUNNERS |
|
|
|
|
|
def get_caption_embed(runner, caption, prompt_template): |
|
captions = json.load(open(caption, 'r')) |
|
captions = [[prompt_template.format(c[0])] for c in captions] |
|
with torch.no_grad(): |
|
embed = runner.model.backbone.text_model(captions) |
|
embed = F.normalize(embed[:, 0, :], dim=1, p=2) |
|
embed = embed.detach().cpu() |
|
embed = embed[:, :, None, None] |
|
return embed |
|
|
|
|
|
def convert(runner, caption, checkpoint, prompt_template): |
|
checkpoint = torch.load(checkpoint, map_location='cpu') |
|
state_dict = checkpoint['state_dict'] |
|
embed = get_caption_embed(runner, caption, prompt_template) |
|
import ipdb; ipdb.set_trace() |
|
|
|
new_state_dict = {} |
|
for key in list(state_dict.keys()): |
|
if key.startswith('backbone.text_model'): |
|
continue |
|
elif key.startswith('backbone.image_model'): |
|
new_key = key.replace('backbone.image_model', 'backbone') |
|
new_state_dict[new_key] = state_dict[key].clone() |
|
elif key.startswith('bbox_head.head_module.cls_contrasts'): |
|
module_key = '.'.join(key.split('.')[:4]) |
|
logit_scale = state_dict[module_key + '.logit_scale'] |
|
bias = state_dict[module_key + '.bias'] |
|
conv_weight = embed * logit_scale.exp() |
|
conv_bias = bias.repeat(conv_weight.shape[0]) |
|
new_state_dict[module_key + '.conv.weight'] = conv_weight |
|
new_state_dict[module_key + '.conv.bias'] = conv_bias |
|
else: |
|
new_state_dict[key] = state_dict[key].clone() |
|
|
|
new_checkpoint = {'state_dict': new_state_dict} |
|
return new_checkpoint |
|
|
|
|
|
def parse_args(): |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument('config', type=str) |
|
parser.add_argument('checkpoint', type=str) |
|
parser.add_argument('caption', type=str) |
|
parser.add_argument('output', type=str) |
|
parser.add_argument('--prompt-template', type=str, |
|
default='{}') |
|
parser.add_argument( |
|
'--work-dir', |
|
help='the directory to save the file containing evaluation metrics') |
|
parser.add_argument( |
|
'--cfg-options', |
|
nargs='+', |
|
action=DictAction, |
|
help='override some settings in the used config, the key-value pair ' |
|
'in xxx=yyy format will be merged into config file. If the value to ' |
|
'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' |
|
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' |
|
'Note that the quotation marks are necessary and that no white space ' |
|
'is allowed.') |
|
args = parser.parse_args() |
|
return args |
|
|
|
|
|
if __name__ == '__main__': |
|
args = parse_args() |
|
|
|
|
|
cfg = Config.fromfile(args.config) |
|
|
|
|
|
if args.cfg_options is not None: |
|
cfg.merge_from_dict(args.cfg_options) |
|
|
|
|
|
if args.work_dir is not None: |
|
|
|
cfg.work_dir = args.work_dir |
|
elif cfg.get('work_dir', None) is None: |
|
|
|
cfg.work_dir = osp.join('./work_dirs', |
|
osp.splitext(osp.basename(args.config))[0]) |
|
|
|
cfg.load_from = args.checkpoint |
|
|
|
|
|
if 'runner_type' not in cfg: |
|
|
|
runner = Runner.from_cfg(cfg) |
|
else: |
|
|
|
|
|
runner = RUNNERS.build(cfg) |
|
|
|
runner.call_hook('before_run') |
|
runner.load_or_resume() |
|
pipeline = cfg.test_dataloader.dataset.pipeline |
|
runner.pipeline = Compose(pipeline) |
|
runner.model.eval() |
|
|
|
new_checkpoint = convert(runner, args.caption, args.checkpoint, |
|
args.prompt_template) |
|
os.makedirs(os.path.dirname(args.output), exist_ok=True) |
|
torch.save(new_checkpoint, args.output) |
|
|