# Copyright (c) OpenMMLab. All rights reserved.
import copy

import pytest
import torch
import torch.nn as nn
from mmcv.utils import _BatchNorm

from mmaction.models import (C3D, STGCN, X3D, MobileNetV2TSM, ResNet2Plus1d,
                             ResNet3dCSN, ResNet3dSlowFast, ResNet3dSlowOnly,
                             ResNetAudio, ResNetTIN, ResNetTSM, TANet,
                             TimeSformer)
from mmaction.models.backbones.resnet_tsm import NL3DWrapper
from .base import check_norm_state, generate_backbone_demo_inputs


def test_x3d_backbone():
    """Test x3d backbone."""
    with pytest.raises(AssertionError):
        # In X3D: 1 <= num_stages <= 4
        X3D(gamma_w=1.0, gamma_b=2.25, gamma_d=2.2, num_stages=0)

    with pytest.raises(AssertionError):
        # In X3D: 1 <= num_stages <= 4
        X3D(gamma_w=1.0, gamma_b=2.25, gamma_d=2.2, num_stages=5)

    with pytest.raises(AssertionError):
        # len(spatial_strides) == num_stages
        X3D(gamma_w=1.0,
            gamma_b=2.25,
            gamma_d=2.2,
            spatial_strides=(1, 2),
            num_stages=4)

    with pytest.raises(AssertionError):
        # se_style in ['half', 'all']
        X3D(gamma_w=1.0, gamma_b=2.25, gamma_d=2.2, se_style=None)

    with pytest.raises(AssertionError):
        # se_ratio should be None or > 0
        X3D(gamma_w=1.0,
            gamma_b=2.25,
            gamma_d=2.2,
            se_style='half',
            se_ratio=0)

    # x3d_s, no pretrained, norm_eval True
    x3d_s = X3D(gamma_w=1.0, gamma_b=2.25, gamma_d=2.2, norm_eval=True)
    x3d_s.init_weights()
    x3d_s.train()
    assert check_norm_state(x3d_s.modules(), False)

    # x3d_l, no pretrained, norm_eval True
    x3d_l = X3D(gamma_w=1.0, gamma_b=2.25, gamma_d=5.0, norm_eval=True)
    x3d_l.init_weights()
    x3d_l.train()
    assert check_norm_state(x3d_l.modules(), False)

    # x3d_s, no pretrained, norm_eval False
    x3d_s = X3D(gamma_w=1.0, gamma_b=2.25, gamma_d=2.2, norm_eval=False)
    x3d_s.init_weights()
    x3d_s.train()
    assert check_norm_state(x3d_s.modules(), True)

    # x3d_l, no pretrained, norm_eval False
    x3d_l = X3D(gamma_w=1.0, gamma_b=2.25, gamma_d=5.0, norm_eval=False)
    x3d_l.init_weights()
    x3d_l.train()
    assert check_norm_state(x3d_l.modules(), True)

    # x3d_s, no pretrained, frozen_stages, norm_eval False
    frozen_stages = 1
    x3d_s_frozen = X3D(
        gamma_w=1.0,
        gamma_b=2.25,
        gamma_d=2.2,
        norm_eval=False,
        frozen_stages=frozen_stages)

    x3d_s_frozen.init_weights()
    x3d_s_frozen.train()
    assert x3d_s_frozen.conv1_t.bn.training is False
    for param in x3d_s_frozen.conv1_s.parameters():
        assert param.requires_grad is False
    for param in x3d_s_frozen.conv1_t.parameters():
        assert param.requires_grad is False

    for i in range(1, frozen_stages + 1):
        layer = getattr(x3d_s_frozen, f'layer{i}')
        for mod in layer.modules():
            if isinstance(mod, _BatchNorm):
                assert mod.training is False
        for param in layer.parameters():
            assert param.requires_grad is False

    # test zero_init_residual, zero_init_residual is True by default
    for m in x3d_s_frozen.modules():
        if hasattr(m, 'conv3'):
            assert torch.equal(m.conv3.bn.weight,
                               torch.zeros_like(m.conv3.bn.weight))
            assert torch.equal(m.conv3.bn.bias,
                               torch.zeros_like(m.conv3.bn.bias))

    # x3d_s inference
    input_shape = (1, 3, 13, 64, 64)
    imgs = generate_backbone_demo_inputs(input_shape)
    # parrots 3dconv is only implemented on gpu
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            x3d_s_frozen = x3d_s_frozen.cuda()
            imgs_gpu = imgs.cuda()
            feat = x3d_s_frozen(imgs_gpu)
            assert feat.shape == torch.Size([1, 432, 13, 2, 2])
    else:
        feat = x3d_s_frozen(imgs)
        assert feat.shape == torch.Size([1, 432, 13, 2, 2])

    # x3d_m inference
    input_shape = (1, 3, 16, 96, 96)
    imgs = generate_backbone_demo_inputs(input_shape)
    # parrots 3dconv is only implemented on gpu
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            x3d_s_frozen = x3d_s_frozen.cuda()
            imgs_gpu = imgs.cuda()
            feat = x3d_s_frozen(imgs_gpu)
            assert feat.shape == torch.Size([1, 432, 16, 3, 3])
    else:
        feat = x3d_s_frozen(imgs)
        assert feat.shape == torch.Size([1, 432, 16, 3, 3])


def test_resnet2plus1d_backbone():
    # Test r2+1d backbone
    with pytest.raises(AssertionError):
        # r2+1d does not support inflation
        ResNet2Plus1d(50, None, pretrained2d=True)

    with pytest.raises(AssertionError):
        # r2+1d requires conv(2+1)d module
        ResNet2Plus1d(
            50, None, pretrained2d=False, conv_cfg=dict(type='Conv3d'))

    frozen_stages = 1
    r2plus1d_34_frozen = ResNet2Plus1d(
        34,
        None,
        conv_cfg=dict(type='Conv2plus1d'),
        pretrained2d=False,
        frozen_stages=frozen_stages,
        conv1_kernel=(3, 7, 7),
        conv1_stride_t=1,
        pool1_stride_t=1,
        inflate=(1, 1, 1, 1),
        spatial_strides=(1, 2, 2, 2),
        temporal_strides=(1, 2, 2, 2))
    r2plus1d_34_frozen.init_weights()
    r2plus1d_34_frozen.train()
    assert r2plus1d_34_frozen.conv1.conv.bn_s.training is False
    assert r2plus1d_34_frozen.conv1.bn.training is False
    for param in r2plus1d_34_frozen.conv1.parameters():
        assert param.requires_grad is False
    for i in range(1, frozen_stages + 1):
        layer = getattr(r2plus1d_34_frozen, f'layer{i}')
        for mod in layer.modules():
            if isinstance(mod, _BatchNorm):
                assert mod.training is False
        for param in layer.parameters():
            assert param.requires_grad is False
    input_shape = (1, 3, 8, 64, 64)
    imgs = generate_backbone_demo_inputs(input_shape)
    # parrots 3dconv is only implemented on gpu
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            r2plus1d_34_frozen = r2plus1d_34_frozen.cuda()
            imgs_gpu = imgs.cuda()
            feat = r2plus1d_34_frozen(imgs_gpu)
            assert feat.shape == torch.Size([1, 512, 1, 2, 2])
    else:
        feat = r2plus1d_34_frozen(imgs)
        assert feat.shape == torch.Size([1, 512, 1, 2, 2])

    r2plus1d_50_frozen = ResNet2Plus1d(
        50,
        None,
        conv_cfg=dict(type='Conv2plus1d'),
        pretrained2d=False,
        conv1_kernel=(3, 7, 7),
        conv1_stride_t=1,
        pool1_stride_t=1,
        inflate=(1, 1, 1, 1),
        spatial_strides=(1, 2, 2, 2),
        temporal_strides=(1, 2, 2, 2),
        frozen_stages=frozen_stages)
    r2plus1d_50_frozen.init_weights()

    r2plus1d_50_frozen.train()
    assert r2plus1d_50_frozen.conv1.conv.bn_s.training is False
    assert r2plus1d_50_frozen.conv1.bn.training is False
    for param in r2plus1d_50_frozen.conv1.parameters():
        assert param.requires_grad is False
    for i in range(1, frozen_stages + 1):
        layer = getattr(r2plus1d_50_frozen, f'layer{i}')
        for mod in layer.modules():
            if isinstance(mod, _BatchNorm):
                assert mod.training is False
        for param in layer.parameters():
            assert param.requires_grad is False
    input_shape = (1, 3, 8, 64, 64)
    imgs = generate_backbone_demo_inputs(input_shape)

    # parrots 3dconv is only implemented on gpu
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            r2plus1d_50_frozen = r2plus1d_50_frozen.cuda()
            imgs_gpu = imgs.cuda()
            feat = r2plus1d_50_frozen(imgs_gpu)
            assert feat.shape == torch.Size([1, 2048, 1, 2, 2])
    else:
        feat = r2plus1d_50_frozen(imgs)
        assert feat.shape == torch.Size([1, 2048, 1, 2, 2])


def test_resnet_tsm_backbone():
    """Test resnet_tsm backbone."""
    with pytest.raises(NotImplementedError):
        # shift_place must be block or blockres
        resnet_tsm_50_block = ResNetTSM(50, shift_place='Block')
        resnet_tsm_50_block.init_weights()

    from mmaction.models.backbones.resnet import Bottleneck
    from mmaction.models.backbones.resnet_tsm import TemporalShift

    input_shape = (8, 3, 64, 64)
    imgs = generate_backbone_demo_inputs(input_shape)

    # resnet_tsm with depth 50
    resnet_tsm_50 = ResNetTSM(50)
    resnet_tsm_50.init_weights()
    for layer_name in resnet_tsm_50.res_layers:
        layer = getattr(resnet_tsm_50, layer_name)
        blocks = list(layer.children())
        for block in blocks:
            assert isinstance(block.conv1.conv, TemporalShift)
            assert block.conv1.conv.num_segments == resnet_tsm_50.num_segments
            assert block.conv1.conv.shift_div == resnet_tsm_50.shift_div
            assert isinstance(block.conv1.conv.net, nn.Conv2d)

    # resnet_tsm with depth 50, no pretrained, shift_place is block
    resnet_tsm_50_block = ResNetTSM(50, shift_place='block')
    resnet_tsm_50_block.init_weights()
    for layer_name in resnet_tsm_50_block.res_layers:
        layer = getattr(resnet_tsm_50_block, layer_name)
        blocks = list(layer.children())
        for block in blocks:
            assert isinstance(block, TemporalShift)
            assert block.num_segments == resnet_tsm_50_block.num_segments
            assert block.num_segments == resnet_tsm_50_block.num_segments
            assert block.shift_div == resnet_tsm_50_block.shift_div
            assert isinstance(block.net, Bottleneck)

    # resnet_tsm with depth 50, no pretrained, use temporal_pool
    resnet_tsm_50_temporal_pool = ResNetTSM(50, temporal_pool=True)
    resnet_tsm_50_temporal_pool.init_weights()
    for layer_name in resnet_tsm_50_temporal_pool.res_layers:
        layer = getattr(resnet_tsm_50_temporal_pool, layer_name)
        blocks = list(layer.children())

        if layer_name == 'layer2':
            assert len(blocks) == 2
            assert isinstance(blocks[1], nn.MaxPool3d)
            blocks = copy.deepcopy(blocks[0])

        for block in blocks:
            assert isinstance(block.conv1.conv, TemporalShift)
            if layer_name == 'layer1':
                assert block.conv1.conv.num_segments == \
                       resnet_tsm_50_temporal_pool.num_segments
            else:
                assert block.conv1.conv.num_segments == \
                       resnet_tsm_50_temporal_pool.num_segments // 2
            assert block.conv1.conv.shift_div == resnet_tsm_50_temporal_pool.shift_div  # noqa: E501
            assert isinstance(block.conv1.conv.net, nn.Conv2d)

    # resnet_tsm with non-local module
    non_local_cfg = dict(
        sub_sample=True,
        use_scale=False,
        norm_cfg=dict(type='BN3d', requires_grad=True),
        mode='embedded_gaussian')
    non_local = ((0, 0, 0), (1, 0, 1, 0), (1, 0, 1, 0, 1, 0), (0, 0, 0))
    resnet_tsm_nonlocal = ResNetTSM(
        50, non_local=non_local, non_local_cfg=non_local_cfg)
    resnet_tsm_nonlocal.init_weights()
    for layer_name in ['layer2', 'layer3']:
        layer = getattr(resnet_tsm_nonlocal, layer_name)
        for i, _ in enumerate(layer):
            if i % 2 == 0:
                assert isinstance(layer[i], NL3DWrapper)

    resnet_tsm_50_full = ResNetTSM(
        50,
        non_local=non_local,
        non_local_cfg=non_local_cfg,
        temporal_pool=True)
    resnet_tsm_50_full.init_weights()

    # TSM forword
    feat = resnet_tsm_50(imgs)
    assert feat.shape == torch.Size([8, 2048, 2, 2])

    # TSM with non-local forward
    feat = resnet_tsm_nonlocal(imgs)
    assert feat.shape == torch.Size([8, 2048, 2, 2])

    # TSM with temporal pool forward
    feat = resnet_tsm_50_temporal_pool(imgs)
    assert feat.shape == torch.Size([4, 2048, 2, 2])

    # TSM with temporal pool + non-local forward
    input_shape = (16, 3, 32, 32)
    imgs = generate_backbone_demo_inputs(input_shape)
    feat = resnet_tsm_50_full(imgs)
    assert feat.shape == torch.Size([8, 2048, 1, 1])


def test_mobilenetv2_tsm_backbone():
    """Test mobilenetv2_tsm backbone."""
    from mmcv.cnn import ConvModule

    from mmaction.models.backbones.mobilenet_v2 import InvertedResidual
    from mmaction.models.backbones.resnet_tsm import TemporalShift

    input_shape = (8, 3, 64, 64)
    imgs = generate_backbone_demo_inputs(input_shape)

    # mobilenetv2_tsm with width_mult = 1.0
    mobilenetv2_tsm = MobileNetV2TSM()
    mobilenetv2_tsm.init_weights()
    for cur_module in mobilenetv2_tsm.modules():
        if isinstance(cur_module, InvertedResidual) and \
            len(cur_module.conv) == 3 and \
                cur_module.use_res_connect:
            assert isinstance(cur_module.conv[0], TemporalShift)
            assert cur_module.conv[0].num_segments == \
                mobilenetv2_tsm.num_segments
            assert cur_module.conv[0].shift_div == mobilenetv2_tsm.shift_div
            assert isinstance(cur_module.conv[0].net, ConvModule)

    # TSM-MobileNetV2 with widen_factor = 1.0 forword
    feat = mobilenetv2_tsm(imgs)
    assert feat.shape == torch.Size([8, 1280, 2, 2])

    # mobilenetv2 with widen_factor = 0.5 forword
    mobilenetv2_tsm_05 = MobileNetV2TSM(widen_factor=0.5)
    mobilenetv2_tsm_05.init_weights()
    feat = mobilenetv2_tsm_05(imgs)
    assert feat.shape == torch.Size([8, 1280, 2, 2])

    # mobilenetv2 with widen_factor = 1.5 forword
    mobilenetv2_tsm_15 = MobileNetV2TSM(widen_factor=1.5)
    mobilenetv2_tsm_15.init_weights()
    feat = mobilenetv2_tsm_15(imgs)
    assert feat.shape == torch.Size([8, 1920, 2, 2])


def test_slowfast_backbone():
    """Test SlowFast backbone."""
    with pytest.raises(TypeError):
        # cfg should be a dict
        ResNet3dSlowFast(None, slow_pathway=list(['foo', 'bar']))
    with pytest.raises(TypeError):
        # pretrained should be a str
        sf_50 = ResNet3dSlowFast(dict(foo='bar'))
        sf_50.init_weights()
    with pytest.raises(KeyError):
        # pathway type should be implemented
        ResNet3dSlowFast(None, slow_pathway=dict(type='resnext'))

    # test slowfast with slow inflated
    sf_50_inflate = ResNet3dSlowFast(
        None,
        slow_pathway=dict(
            type='resnet3d',
            depth=50,
            pretrained='torchvision://resnet50',
            pretrained2d=True,
            lateral=True,
            conv1_kernel=(1, 7, 7),
            dilations=(1, 1, 1, 1),
            conv1_stride_t=1,
            pool1_stride_t=1,
            inflate=(0, 0, 1, 1)))
    sf_50_inflate.init_weights()
    sf_50_inflate.train()

    # test slowfast with no lateral connection
    sf_50_wo_lateral = ResNet3dSlowFast(
        None,
        slow_pathway=dict(
            type='resnet3d',
            depth=50,
            pretrained=None,
            lateral=False,
            conv1_kernel=(1, 7, 7),
            dilations=(1, 1, 1, 1),
            conv1_stride_t=1,
            pool1_stride_t=1,
            inflate=(0, 0, 1, 1)))
    sf_50_wo_lateral.init_weights()
    sf_50_wo_lateral.train()

    # slowfast w/o lateral connection inference test
    input_shape = (1, 3, 8, 64, 64)
    imgs = generate_backbone_demo_inputs(input_shape)
    # parrots 3dconv is only implemented on gpu
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            sf_50_wo_lateral = sf_50_wo_lateral.cuda()
            imgs_gpu = imgs.cuda()
            feat = sf_50_wo_lateral(imgs_gpu)
    else:
        feat = sf_50_wo_lateral(imgs)

    assert isinstance(feat, tuple)
    assert feat[0].shape == torch.Size([1, 2048, 1, 2, 2])
    assert feat[1].shape == torch.Size([1, 256, 8, 2, 2])

    # test slowfast with frozen stages config
    frozen_slow = 3
    sf_50 = ResNet3dSlowFast(
        None,
        slow_pathway=dict(
            type='resnet3d',
            depth=50,
            pretrained=None,
            pretrained2d=True,
            lateral=True,
            conv1_kernel=(1, 7, 7),
            dilations=(1, 1, 1, 1),
            conv1_stride_t=1,
            pool1_stride_t=1,
            inflate=(0, 0, 1, 1),
            frozen_stages=frozen_slow))
    sf_50.init_weights()
    sf_50.train()

    for stage in range(1, sf_50.slow_path.num_stages):
        lateral_name = sf_50.slow_path.lateral_connections[stage - 1]
        conv_lateral = getattr(sf_50.slow_path, lateral_name)
        for mod in conv_lateral.modules():
            if isinstance(mod, _BatchNorm):
                if stage <= frozen_slow:
                    assert mod.training is False
                else:
                    assert mod.training is True
        for param in conv_lateral.parameters():
            if stage <= frozen_slow:
                assert param.requires_grad is False
            else:
                assert param.requires_grad is True

    # test slowfast with normal config
    sf_50 = ResNet3dSlowFast(None)
    sf_50.init_weights()
    sf_50.train()

    # slowfast inference test
    input_shape = (1, 3, 8, 64, 64)
    imgs = generate_backbone_demo_inputs(input_shape)
    # parrots 3dconv is only implemented on gpu
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            sf_50 = sf_50.cuda()
            imgs_gpu = imgs.cuda()
            feat = sf_50(imgs_gpu)
    else:
        feat = sf_50(imgs)

    assert isinstance(feat, tuple)
    assert feat[0].shape == torch.Size([1, 2048, 1, 2, 2])
    assert feat[1].shape == torch.Size([1, 256, 8, 2, 2])


def test_slowonly_backbone():
    """Test SlowOnly backbone."""
    with pytest.raises(AssertionError):
        # SlowOnly should contain no lateral connection
        ResNet3dSlowOnly(50, None, lateral=True)

    # test SlowOnly for PoseC3D
    so_50 = ResNet3dSlowOnly(
        depth=50,
        pretrained=None,
        in_channels=17,
        base_channels=32,
        num_stages=3,
        out_indices=(2, ),
        stage_blocks=(4, 6, 3),
        conv1_stride_s=1,
        pool1_stride_s=1,
        inflate=(0, 1, 1),
        spatial_strides=(2, 2, 2),
        temporal_strides=(1, 1, 2),
        dilations=(1, 1, 1))
    so_50.init_weights()
    so_50.train()

    # test SlowOnly with normal config
    so_50 = ResNet3dSlowOnly(50, None)
    so_50.init_weights()
    so_50.train()

    # SlowOnly inference test
    input_shape = (1, 3, 8, 64, 64)
    imgs = generate_backbone_demo_inputs(input_shape)
    # parrots 3dconv is only implemented on gpu
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            so_50 = so_50.cuda()
            imgs_gpu = imgs.cuda()
            feat = so_50(imgs_gpu)
    else:
        feat = so_50(imgs)
    assert feat.shape == torch.Size([1, 2048, 8, 2, 2])


def test_resnet_csn_backbone():
    """Test resnet_csn backbone."""
    with pytest.raises(ValueError):
        # Bottleneck mode must be "ip" or "ir"
        ResNet3dCSN(152, None, bottleneck_mode='id')

    input_shape = (2, 3, 6, 64, 64)
    imgs = generate_backbone_demo_inputs(input_shape)

    resnet3d_csn_frozen = ResNet3dCSN(
        152, None, bn_frozen=True, norm_eval=True)
    resnet3d_csn_frozen.train()
    for m in resnet3d_csn_frozen.modules():
        if isinstance(m, _BatchNorm):
            for param in m.parameters():
                assert param.requires_grad is False

    # Interaction-preserved channel-separated bottleneck block
    resnet3d_csn_ip = ResNet3dCSN(152, None, bottleneck_mode='ip')
    resnet3d_csn_ip.init_weights()
    resnet3d_csn_ip.train()
    for i, layer_name in enumerate(resnet3d_csn_ip.res_layers):
        layers = getattr(resnet3d_csn_ip, layer_name)
        num_blocks = resnet3d_csn_ip.stage_blocks[i]
        assert len(layers) == num_blocks
        for layer in layers:
            assert isinstance(layer.conv2, nn.Sequential)
            assert len(layer.conv2) == 2
            assert layer.conv2[1].groups == layer.planes
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            resnet3d_csn_ip = resnet3d_csn_ip.cuda()
            imgs_gpu = imgs.cuda()
            feat = resnet3d_csn_ip(imgs_gpu)
            assert feat.shape == torch.Size([2, 2048, 1, 2, 2])
    else:
        feat = resnet3d_csn_ip(imgs)
        assert feat.shape == torch.Size([2, 2048, 1, 2, 2])

    # Interaction-reduced channel-separated bottleneck block
    resnet3d_csn_ir = ResNet3dCSN(152, None, bottleneck_mode='ir')
    resnet3d_csn_ir.init_weights()
    resnet3d_csn_ir.train()
    for i, layer_name in enumerate(resnet3d_csn_ir.res_layers):
        layers = getattr(resnet3d_csn_ir, layer_name)
        num_blocks = resnet3d_csn_ir.stage_blocks[i]
        assert len(layers) == num_blocks
        for layer in layers:
            assert isinstance(layer.conv2, nn.Sequential)
            assert len(layer.conv2) == 1
            assert layer.conv2[0].groups == layer.planes
    if torch.__version__ == 'parrots':
        if torch.cuda.is_available():
            resnet3d_csn_ir = resnet3d_csn_ir.cuda()
            imgs_gpu = imgs.cuda()
            feat = resnet3d_csn_ir(imgs_gpu)
            assert feat.shape == torch.Size([2, 2048, 1, 2, 2])
    else:
        feat = resnet3d_csn_ir(imgs)
        assert feat.shape == torch.Size([2, 2048, 1, 2, 2])

    # Set training status = False
    resnet3d_csn_ip = ResNet3dCSN(152, None, bottleneck_mode='ip')
    resnet3d_csn_ip.init_weights()
    resnet3d_csn_ip.train(False)
    for module in resnet3d_csn_ip.children():
        assert module.training is False


def test_tanet_backbone():
    """Test tanet backbone."""
    with pytest.raises(NotImplementedError):
        # TA-Blocks are only based on Bottleneck block now
        tanet_18 = TANet(18, 8)
        tanet_18.init_weights()

    from mmaction.models.backbones.resnet import Bottleneck
    from mmaction.models.backbones.tanet import TABlock

    # tanet with depth 50
    tanet_50 = TANet(50, 8)
    tanet_50.init_weights()

    for layer_name in tanet_50.res_layers:
        layer = getattr(tanet_50, layer_name)
        blocks = list(layer.children())
        for block in blocks:
            assert isinstance(block, TABlock)
            assert isinstance(block.block, Bottleneck)
            assert block.tam.num_segments == block.num_segments
            assert block.tam.in_channels == block.block.conv1.out_channels

    input_shape = (8, 3, 64, 64)
    imgs = generate_backbone_demo_inputs(input_shape)
    feat = tanet_50(imgs)
    assert feat.shape == torch.Size([8, 2048, 2, 2])

    input_shape = (16, 3, 32, 32)
    imgs = generate_backbone_demo_inputs(input_shape)
    feat = tanet_50(imgs)
    assert feat.shape == torch.Size([16, 2048, 1, 1])


def test_timesformer_backbone():
    input_shape = (1, 3, 8, 64, 64)
    imgs = generate_backbone_demo_inputs(input_shape)

    # divided_space_time
    timesformer = TimeSformer(
        8, 64, 16, embed_dims=768, attention_type='divided_space_time')
    timesformer.init_weights()
    from mmaction.models.common import (DividedSpatialAttentionWithNorm,
                                        DividedTemporalAttentionWithNorm,
                                        FFNWithNorm)
    assert isinstance(timesformer.transformer_layers.layers[0].attentions[0],
                      DividedTemporalAttentionWithNorm)
    assert isinstance(timesformer.transformer_layers.layers[11].attentions[1],
                      DividedSpatialAttentionWithNorm)
    assert isinstance(timesformer.transformer_layers.layers[0].ffns[0],
                      FFNWithNorm)
    assert hasattr(timesformer, 'time_embed')
    assert timesformer.patch_embed.num_patches == 16

    cls_tokens = timesformer(imgs)
    assert cls_tokens.shape == torch.Size([1, 768])

    # space_only
    timesformer = TimeSformer(
        8, 64, 16, embed_dims=512, num_heads=8, attention_type='space_only')
    timesformer.init_weights()

    assert not hasattr(timesformer, 'time_embed')
    assert timesformer.patch_embed.num_patches == 16

    cls_tokens = timesformer(imgs)
    assert cls_tokens.shape == torch.Size([1, 512])

    # joint_space_time
    input_shape = (1, 3, 2, 64, 64)
    imgs = generate_backbone_demo_inputs(input_shape)
    timesformer = TimeSformer(
        2,
        64,
        8,
        embed_dims=256,
        num_heads=8,
        attention_type='joint_space_time')
    timesformer.init_weights()

    assert hasattr(timesformer, 'time_embed')
    assert timesformer.patch_embed.num_patches == 64

    cls_tokens = timesformer(imgs)
    assert cls_tokens.shape == torch.Size([1, 256])

    with pytest.raises(AssertionError):
        # unsupported attention type
        timesformer = TimeSformer(
            8, 64, 16, attention_type='wrong_attention_type')

    with pytest.raises(AssertionError):
        # Wrong transformer_layers type
        timesformer = TimeSformer(8, 64, 16, transformer_layers='wrong_type')


def test_c3d_backbone():
    """Test c3d backbone."""
    input_shape = (1, 3, 16, 24, 24)
    imgs = generate_backbone_demo_inputs(input_shape)

    # c3d inference test
    c3d = C3D(out_dim=512)
    c3d.init_weights()
    c3d.train()
    feat = c3d(imgs)
    assert feat.shape == torch.Size([1, 4096])

    # c3d with bn inference test
    c3d_bn = C3D(out_dim=512, norm_cfg=dict(type='BN3d'))
    c3d_bn.init_weights()
    c3d_bn.train()
    feat = c3d_bn(imgs)
    assert feat.shape == torch.Size([1, 4096])


def test_resnet_audio_backbone():
    """Test ResNetAudio backbone."""
    input_shape = (1, 1, 16, 16)
    spec = generate_backbone_demo_inputs(input_shape)
    # inference
    audioonly = ResNetAudio(50, None)
    audioonly.init_weights()
    audioonly.train()
    feat = audioonly(spec)
    assert feat.shape == torch.Size([1, 1024, 2, 2])


@pytest.mark.skipif(
    not torch.cuda.is_available(), reason='requires CUDA support')
def test_resnet_tin_backbone():
    """Test resnet_tin backbone."""
    with pytest.raises(AssertionError):
        # num_segments should be positive
        resnet_tin = ResNetTIN(50, num_segments=-1)
        resnet_tin.init_weights()

    from mmaction.models.backbones.resnet_tin import (CombineNet,
                                                      TemporalInterlace)

    # resnet_tin with normal config
    resnet_tin = ResNetTIN(50)
    resnet_tin.init_weights()
    for layer_name in resnet_tin.res_layers:
        layer = getattr(resnet_tin, layer_name)
        blocks = list(layer.children())
        for block in blocks:
            assert isinstance(block.conv1.conv, CombineNet)
            assert isinstance(block.conv1.conv.net1, TemporalInterlace)
            assert (
                block.conv1.conv.net1.num_segments == resnet_tin.num_segments)
            assert block.conv1.conv.net1.shift_div == resnet_tin.shift_div

    # resnet_tin with partial batchnorm
    resnet_tin_pbn = ResNetTIN(50, partial_bn=True)
    resnet_tin_pbn.train()
    count_bn = 0
    for m in resnet_tin_pbn.modules():
        if isinstance(m, nn.BatchNorm2d):
            count_bn += 1
            if count_bn >= 2:
                assert m.training is False
                assert m.weight.requires_grad is False
                assert m.bias.requires_grad is False
            else:
                assert m.training is True
                assert m.weight.requires_grad is True
                assert m.bias.requires_grad is True

    input_shape = (8, 3, 64, 64)
    imgs = generate_backbone_demo_inputs(input_shape).cuda()
    resnet_tin = resnet_tin.cuda()

    # resnet_tin with normal cfg inference
    feat = resnet_tin(imgs)
    assert feat.shape == torch.Size([8, 2048, 2, 2])


def test_stgcn_backbone():
    """Test STGCN backbone."""
    # test coco layout, spatial strategy
    input_shape = (1, 3, 300, 17, 2)
    skeletons = generate_backbone_demo_inputs(input_shape)

    stgcn = STGCN(
        in_channels=3,
        edge_importance_weighting=True,
        graph_cfg=dict(layout='coco', strategy='spatial'))
    stgcn.init_weights()
    stgcn.train()
    feat = stgcn(skeletons)
    assert feat.shape == torch.Size([2, 256, 75, 17])

    # test openpose-18 layout, spatial strategy
    input_shape = (1, 3, 300, 18, 2)
    skeletons = generate_backbone_demo_inputs(input_shape)

    stgcn = STGCN(
        in_channels=3,
        edge_importance_weighting=True,
        graph_cfg=dict(layout='openpose-18', strategy='spatial'))
    stgcn.init_weights()
    stgcn.train()
    feat = stgcn(skeletons)
    assert feat.shape == torch.Size([2, 256, 75, 18])

    # test ntu-rgb+d layout, spatial strategy
    input_shape = (1, 3, 300, 25, 2)
    skeletons = generate_backbone_demo_inputs(input_shape)

    stgcn = STGCN(
        in_channels=3,
        edge_importance_weighting=True,
        graph_cfg=dict(layout='ntu-rgb+d', strategy='spatial'))
    stgcn.init_weights()
    stgcn.train()
    feat = stgcn(skeletons)
    assert feat.shape == torch.Size([2, 256, 75, 25])

    # test ntu_edge layout, spatial strategy
    input_shape = (1, 3, 300, 24, 2)
    skeletons = generate_backbone_demo_inputs(input_shape)

    stgcn = STGCN(
        in_channels=3,
        edge_importance_weighting=True,
        graph_cfg=dict(layout='ntu_edge', strategy='spatial'))
    stgcn.init_weights()
    stgcn.train()
    feat = stgcn(skeletons)
    assert feat.shape == torch.Size([2, 256, 75, 24])

    # test coco layout, uniform strategy
    input_shape = (1, 3, 300, 17, 2)
    skeletons = generate_backbone_demo_inputs(input_shape)

    stgcn = STGCN(
        in_channels=3,
        edge_importance_weighting=True,
        graph_cfg=dict(layout='coco', strategy='uniform'))
    stgcn.init_weights()
    stgcn.train()
    feat = stgcn(skeletons)
    assert feat.shape == torch.Size([2, 256, 75, 17])

    # test openpose-18 layout, uniform strategy
    input_shape = (1, 3, 300, 18, 2)
    skeletons = generate_backbone_demo_inputs(input_shape)

    stgcn = STGCN(
        in_channels=3,
        edge_importance_weighting=True,
        graph_cfg=dict(layout='openpose-18', strategy='uniform'))
    stgcn.init_weights()
    stgcn.train()
    feat = stgcn(skeletons)
    assert feat.shape == torch.Size([2, 256, 75, 18])

    # test ntu-rgb+d layout, uniform strategy
    input_shape = (1, 3, 300, 25, 2)
    skeletons = generate_backbone_demo_inputs(input_shape)

    stgcn = STGCN(
        in_channels=3,
        edge_importance_weighting=True,
        graph_cfg=dict(layout='ntu-rgb+d', strategy='uniform'))
    stgcn.init_weights()
    stgcn.train()
    feat = stgcn(skeletons)
    assert feat.shape == torch.Size([2, 256, 75, 25])

    # test ntu_edge layout, uniform strategy
    input_shape = (1, 3, 300, 24, 2)
    skeletons = generate_backbone_demo_inputs(input_shape)

    stgcn = STGCN(
        in_channels=3,
        edge_importance_weighting=True,
        graph_cfg=dict(layout='ntu_edge', strategy='uniform'))
    stgcn.init_weights()
    stgcn.train()
    feat = stgcn(skeletons)
    assert feat.shape == torch.Size([2, 256, 75, 24])

    # test coco layout, distance strategy
    input_shape = (1, 3, 300, 17, 2)
    skeletons = generate_backbone_demo_inputs(input_shape)

    stgcn = STGCN(
        in_channels=3,
        edge_importance_weighting=True,
        graph_cfg=dict(layout='coco', strategy='distance'))
    stgcn.init_weights()
    stgcn.train()
    feat = stgcn(skeletons)
    assert feat.shape == torch.Size([2, 256, 75, 17])

    # test openpose-18 layout, distance strategy
    input_shape = (1, 3, 300, 18, 2)
    skeletons = generate_backbone_demo_inputs(input_shape)

    stgcn = STGCN(
        in_channels=3,
        edge_importance_weighting=True,
        graph_cfg=dict(layout='openpose-18', strategy='distance'))
    stgcn.init_weights()
    stgcn.train()
    feat = stgcn(skeletons)
    assert feat.shape == torch.Size([2, 256, 75, 18])

    # test ntu-rgb+d layout, distance strategy
    input_shape = (1, 3, 300, 25, 2)
    skeletons = generate_backbone_demo_inputs(input_shape)

    stgcn = STGCN(
        in_channels=3,
        edge_importance_weighting=True,
        graph_cfg=dict(layout='ntu-rgb+d', strategy='distance'))
    stgcn.init_weights()
    stgcn.train()
    feat = stgcn(skeletons)
    assert feat.shape == torch.Size([2, 256, 75, 25])

    # test ntu_edge layout, distance strategy
    input_shape = (1, 3, 300, 24, 2)
    skeletons = generate_backbone_demo_inputs(input_shape)

    stgcn = STGCN(
        in_channels=3,
        edge_importance_weighting=True,
        graph_cfg=dict(layout='ntu_edge', strategy='distance'))
    stgcn.init_weights()
    stgcn.train()
    feat = stgcn(skeletons)
    assert feat.shape == torch.Size([2, 256, 75, 24])