import os.path as osp
from functools import partial

import mmcv
import pytest
import torch
from mmcv.cnn import Scale

from mmdet import digit_version
from mmdet.models.dense_heads import (FCOSHead, FSAFHead, RetinaHead, SSDHead,
                                      YOLOV3Head)
from .utils import ort_validate

data_path = osp.join(osp.dirname(__file__), 'data')

if digit_version(torch.__version__) <= digit_version('1.5.0'):
    pytest.skip(
        'ort backend does not support version below 1.5.0',
        allow_module_level=True)


def retinanet_config():
    """RetinanNet Head Config."""
    head_cfg = dict(
        stacked_convs=6,
        feat_channels=2,
        anchor_generator=dict(
            type='AnchorGenerator',
            octave_base_scale=4,
            scales_per_octave=3,
            ratios=[0.5, 1.0, 2.0],
            strides=[8, 16, 32, 64, 128]),
        bbox_coder=dict(
            type='DeltaXYWHBBoxCoder',
            target_means=[.0, .0, .0, .0],
            target_stds=[1.0, 1.0, 1.0, 1.0]))

    test_cfg = mmcv.Config(
        dict(
            deploy_nms_pre=0,
            min_bbox_size=0,
            score_thr=0.05,
            nms=dict(type='nms', iou_threshold=0.5),
            max_per_img=100))

    model = RetinaHead(
        num_classes=4, in_channels=1, test_cfg=test_cfg, **head_cfg)
    model.requires_grad_(False)

    return model


def test_retina_head_forward_single():
    """Test RetinaNet Head single forward in torch and onnxruntime env."""
    retina_model = retinanet_config()

    feat = torch.rand(1, retina_model.in_channels, 32, 32)
    # validate the result between the torch and ort
    ort_validate(retina_model.forward_single, feat)


def test_retina_head_forward():
    """Test RetinaNet Head forward in torch and onnxruntime env."""
    retina_model = retinanet_config()
    s = 128
    # RetinaNet head expects a multiple levels of features per image
    feats = [
        torch.rand(1, retina_model.in_channels, s // (2**(i + 2)),
                   s // (2**(i + 2)))  # [32, 16, 8, 4, 2]
        for i in range(len(retina_model.anchor_generator.strides))
    ]
    ort_validate(retina_model.forward, feats)


def test_retinanet_head_get_bboxes():
    """Test RetinaNet Head _get_bboxes() in torch and onnxruntime env."""
    retina_model = retinanet_config()
    s = 128
    img_metas = [{
        'img_shape_for_onnx': torch.Tensor([s, s]),
        'scale_factor': 1,
        'pad_shape': (s, s, 3),
        'img_shape': (s, s, 2)
    }]

    # The data of retina_head_get_bboxes.pkl contains two parts:
    # cls_score(list(Tensor)) and bboxes(list(Tensor)),
    # where each torch.Tensor is generated by torch.rand().
    # the cls_score's size: (1, 36, 32, 32), (1, 36, 16, 16),
    # (1, 36, 8, 8), (1, 36, 4, 4), (1, 36, 2, 2).
    # the bboxes's size: (1, 36, 32, 32), (1, 36, 16, 16),
    # (1, 36, 8, 8), (1, 36, 4, 4), (1, 36, 2, 2)
    retina_head_data = 'retina_head_get_bboxes.pkl'
    feats = mmcv.load(osp.join(data_path, retina_head_data))
    cls_score = feats[:5]
    bboxes = feats[5:]

    retina_model.get_bboxes = partial(
        retina_model.get_bboxes, img_metas=img_metas, with_nms=False)
    ort_validate(retina_model.get_bboxes, (cls_score, bboxes))


def yolo_config():
    """YoloV3 Head Config."""
    head_cfg = dict(
        anchor_generator=dict(
            type='YOLOAnchorGenerator',
            base_sizes=[[(116, 90), (156, 198), (373, 326)],
                        [(30, 61), (62, 45), (59, 119)],
                        [(10, 13), (16, 30), (33, 23)]],
            strides=[32, 16, 8]),
        bbox_coder=dict(type='YOLOBBoxCoder'))

    test_cfg = mmcv.Config(
        dict(
            deploy_nms_pre=0,
            min_bbox_size=0,
            score_thr=0.05,
            conf_thr=0.005,
            nms=dict(type='nms', iou_threshold=0.45),
            max_per_img=100))

    model = YOLOV3Head(
        num_classes=4,
        in_channels=[1, 1, 1],
        out_channels=[16, 8, 4],
        test_cfg=test_cfg,
        **head_cfg)
    model.requires_grad_(False)
    # yolov3 need eval()
    model.cpu().eval()
    return model


def test_yolov3_head_forward():
    """Test Yolov3 head forward() in torch and ort env."""
    yolo_model = yolo_config()

    # Yolov3 head expects a multiple levels of features per image
    feats = [
        torch.rand(1, 1, 64 // (2**(i + 2)), 64 // (2**(i + 2)))
        for i in range(len(yolo_model.in_channels))
    ]
    ort_validate(yolo_model.forward, feats)


def test_yolov3_head_get_bboxes():
    """Test yolov3 head get_bboxes() in torch and ort env."""
    yolo_model = yolo_config()
    s = 128
    img_metas = [{
        'img_shape_for_onnx': torch.Tensor([s, s]),
        'img_shape': (s, s, 3),
        'scale_factor': 1,
        'pad_shape': (s, s, 3)
    }]

    # The data of yolov3_head_get_bboxes.pkl contains
    # a list of torch.Tensor, where each torch.Tensor
    # is generated by torch.rand and each tensor size is:
    # (1, 27, 32, 32), (1, 27, 16, 16), (1, 27, 8, 8).
    yolo_head_data = 'yolov3_head_get_bboxes.pkl'
    pred_maps = mmcv.load(osp.join(data_path, yolo_head_data))

    yolo_model.get_bboxes = partial(
        yolo_model.get_bboxes, img_metas=img_metas, with_nms=False)
    ort_validate(yolo_model.get_bboxes, pred_maps)


def fcos_config():
    """FCOS Head Config."""
    test_cfg = mmcv.Config(
        dict(
            deploy_nms_pre=0,
            min_bbox_size=0,
            score_thr=0.05,
            nms=dict(type='nms', iou_threshold=0.5),
            max_per_img=100))

    model = FCOSHead(num_classes=4, in_channels=1, test_cfg=test_cfg)

    model.requires_grad_(False)
    return model


def test_fcos_head_forward_single():
    """Test fcos forward single in torch and ort env."""
    fcos_model = fcos_config()

    feat = torch.rand(1, fcos_model.in_channels, 32, 32)
    fcos_model.forward_single = partial(
        fcos_model.forward_single,
        scale=Scale(1.0).requires_grad_(False),
        stride=(4, ))
    ort_validate(fcos_model.forward_single, feat)


def test_fcos_head_forward():
    """Test fcos forward in mutil-level feature map."""
    fcos_model = fcos_config()
    s = 128
    feats = [
        torch.rand(1, 1, s // feat_size, s // feat_size)
        for feat_size in [4, 8, 16, 32, 64]
    ]
    ort_validate(fcos_model.forward, feats)


def test_fcos_head_get_bboxes():
    """Test fcos head get_bboxes() in ort."""
    fcos_model = fcos_config()
    s = 128
    img_metas = [{
        'img_shape_for_onnx': torch.Tensor([s, s]),
        'img_shape': (s, s, 3),
        'scale_factor': 1,
        'pad_shape': (s, s, 3)
    }]

    cls_scores = [
        torch.rand(1, fcos_model.num_classes, s // feat_size, s // feat_size)
        for feat_size in [4, 8, 16, 32, 64]
    ]
    bboxes = [
        torch.rand(1, 4, s // feat_size, s // feat_size)
        for feat_size in [4, 8, 16, 32, 64]
    ]
    centerness = [
        torch.rand(1, 1, s // feat_size, s // feat_size)
        for feat_size in [4, 8, 16, 32, 64]
    ]

    fcos_model.get_bboxes = partial(
        fcos_model.get_bboxes, img_metas=img_metas, with_nms=False)
    ort_validate(fcos_model.get_bboxes, (cls_scores, bboxes, centerness))


def fsaf_config():
    """FSAF Head Config."""
    cfg = dict(
        anchor_generator=dict(
            type='AnchorGenerator',
            octave_base_scale=1,
            scales_per_octave=1,
            ratios=[1.0],
            strides=[8, 16, 32, 64, 128]))

    test_cfg = mmcv.Config(
        dict(
            deploy_nms_pre=0,
            min_bbox_size=0,
            score_thr=0.05,
            nms=dict(type='nms', iou_threshold=0.5),
            max_per_img=100))

    model = FSAFHead(num_classes=4, in_channels=1, test_cfg=test_cfg, **cfg)
    model.requires_grad_(False)
    return model


def test_fsaf_head_forward_single():
    """Test RetinaNet Head forward_single() in torch and onnxruntime env."""
    fsaf_model = fsaf_config()

    feat = torch.rand(1, fsaf_model.in_channels, 32, 32)
    ort_validate(fsaf_model.forward_single, feat)


def test_fsaf_head_forward():
    """Test RetinaNet Head forward in torch and onnxruntime env."""
    fsaf_model = fsaf_config()
    s = 128
    feats = [
        torch.rand(1, fsaf_model.in_channels, s // (2**(i + 2)),
                   s // (2**(i + 2)))
        for i in range(len(fsaf_model.anchor_generator.strides))
    ]
    ort_validate(fsaf_model.forward, feats)


def test_fsaf_head_get_bboxes():
    """Test RetinaNet Head get_bboxes in torch and onnxruntime env."""
    fsaf_model = fsaf_config()
    s = 256
    img_metas = [{
        'img_shape_for_onnx': torch.Tensor([s, s]),
        'scale_factor': 1,
        'pad_shape': (s, s, 3),
        'img_shape': (s, s, 2)
    }]

    # The data of fsaf_head_get_bboxes.pkl contains two parts:
    # cls_score(list(Tensor)) and bboxes(list(Tensor)),
    # where each torch.Tensor is generated by torch.rand().
    # the cls_score's size: (1, 4, 64, 64), (1, 4, 32, 32),
    # (1, 4, 16, 16), (1, 4, 8, 8), (1, 4, 4, 4).
    # the bboxes's size: (1, 4, 64, 64), (1, 4, 32, 32),
    # (1, 4, 16, 16), (1, 4, 8, 8), (1, 4, 4, 4).
    fsaf_head_data = 'fsaf_head_get_bboxes.pkl'
    feats = mmcv.load(osp.join(data_path, fsaf_head_data))
    cls_score = feats[:5]
    bboxes = feats[5:]

    fsaf_model.get_bboxes = partial(
        fsaf_model.get_bboxes, img_metas=img_metas, with_nms=False)
    ort_validate(fsaf_model.get_bboxes, (cls_score, bboxes))


def ssd_config():
    """SSD Head Config."""
    cfg = dict(
        anchor_generator=dict(
            type='SSDAnchorGenerator',
            scale_major=False,
            input_size=300,
            basesize_ratio_range=(0.15, 0.9),
            strides=[8, 16, 32, 64, 100, 300],
            ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]),
        bbox_coder=dict(
            type='DeltaXYWHBBoxCoder',
            target_means=[.0, .0, .0, .0],
            target_stds=[0.1, 0.1, 0.2, 0.2]))

    test_cfg = mmcv.Config(
        dict(
            deploy_nms_pre=0,
            nms=dict(type='nms', iou_threshold=0.45),
            min_bbox_size=0,
            score_thr=0.02,
            max_per_img=200))

    model = SSDHead(
        num_classes=4,
        in_channels=(4, 8, 4, 2, 2, 2),
        test_cfg=test_cfg,
        **cfg)

    model.requires_grad_(False)
    return model


def test_ssd_head_forward():
    """Test SSD Head forward in torch and onnxruntime env."""
    ssd_model = ssd_config()

    featmap_size = [38, 19, 10, 6, 5, 3, 1]

    feats = [
        torch.rand(1, ssd_model.in_channels[i], featmap_size[i],
                   featmap_size[i]) for i in range(len(ssd_model.in_channels))
    ]
    ort_validate(ssd_model.forward, feats)


def test_ssd_head_get_bboxes():
    """Test SSD Head get_bboxes in torch and onnxruntime env."""
    ssd_model = ssd_config()
    s = 300
    img_metas = [{
        'img_shape_for_onnx': torch.Tensor([s, s]),
        'scale_factor': 1,
        'pad_shape': (s, s, 3),
        'img_shape': (s, s, 2)
    }]

    # The data of ssd_head_get_bboxes.pkl contains two parts:
    # cls_score(list(Tensor)) and bboxes(list(Tensor)),
    # where each torch.Tensor is generated by torch.rand().
    # the cls_score's size: (1, 20, 38, 38), (1, 30, 19, 19),
    # (1, 30, 10, 10), (1, 30, 5, 5), (1, 20, 3, 3), (1, 20, 1, 1).
    # the bboxes's size: (1, 16, 38, 38), (1, 24, 19, 19),
    # (1, 24, 10, 10), (1, 24, 5, 5), (1, 16, 3, 3), (1, 16, 1, 1).
    ssd_head_data = 'ssd_head_get_bboxes.pkl'
    feats = mmcv.load(osp.join(data_path, ssd_head_data))
    cls_score = feats[:6]
    bboxes = feats[6:]

    ssd_model.get_bboxes = partial(
        ssd_model.get_bboxes, img_metas=img_metas, with_nms=False)
    ort_validate(ssd_model.get_bboxes, (cls_score, bboxes))