test_conformer.py 3.37 KB
Newer Older
limm's avatar
limm committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# Copyright (c) OpenMMLab. All rights reserved.
from copy import deepcopy

import pytest
import torch
from torch.nn.modules import GroupNorm
from torch.nn.modules.batchnorm import _BatchNorm

from mmpretrain.models.backbones import Conformer


def is_norm(modules):
    """Check if is one of the norms."""
    if isinstance(modules, (GroupNorm, _BatchNorm)):
        return True
    return False


def check_norm_state(modules, train_state):
    """Check if norm layer is in correct train state."""
    for mod in modules:
        if isinstance(mod, _BatchNorm):
            if mod.training != train_state:
                return False
    return True


@torch.no_grad()  # To save memory
def test_conformer_backbone():

    cfg_ori = dict(
        arch='T',
        drop_path_rate=0.1,
    )

    with pytest.raises(AssertionError):
        # test invalid arch
        cfg = deepcopy(cfg_ori)
        cfg['arch'] = 'unknown'
        Conformer(**cfg)

    with pytest.raises(AssertionError):
        # test arch without essential keys
        cfg = deepcopy(cfg_ori)
        cfg['arch'] = {'embed_dims': 24, 'channel_ratio': 6, 'num_heads': 9}
        Conformer(**cfg)

    # Test Conformer small model with patch size of 16
    model = Conformer(**cfg_ori)
    model.init_weights()
    model.train()

    assert check_norm_state(model.modules(), True)

    imgs = torch.randn(1, 3, 224, 224)
    conv_feature, transformer_feature = model(imgs)[-1]
    assert conv_feature.shape == (1, 64 * 1 * 4
                                  )  # base_channels * channel_ratio * 4
    assert transformer_feature.shape == (1, 384)

    # Test Conformer with irregular input size.
    model = Conformer(**cfg_ori)
    model.init_weights()
    model.train()

    assert check_norm_state(model.modules(), True)

    imgs = torch.randn(1, 3, 241, 241)
    conv_feature, transformer_feature = model(imgs)[-1]
    assert conv_feature.shape == (1, 64 * 1 * 4
                                  )  # base_channels * channel_ratio * 4
    assert transformer_feature.shape == (1, 384)

    imgs = torch.randn(1, 3, 321, 221)
    conv_feature, transformer_feature = model(imgs)[-1]
    assert conv_feature.shape == (1, 64 * 1 * 4
                                  )  # base_channels * channel_ratio * 4
    assert transformer_feature.shape == (1, 384)

    # Test custom arch Conformer without output cls token
    cfg = deepcopy(cfg_ori)
    cfg['arch'] = {
        'embed_dims': 128,
        'depths': 15,
        'num_heads': 16,
        'channel_ratio': 3,
    }
    cfg['with_cls_token'] = False
    cfg['base_channels'] = 32
    model = Conformer(**cfg)
    conv_feature, transformer_feature = model(imgs)[-1]
    assert conv_feature.shape == (1, 32 * 3 * 4)
    assert transformer_feature.shape == (1, 128)

    # Test Conformer with multi out indices
    cfg = deepcopy(cfg_ori)
    cfg['out_indices'] = [4, 8, 12]
    model = Conformer(**cfg)
    outs = model(imgs)
    assert len(outs) == 3
    # stage 1
    conv_feature, transformer_feature = outs[0]
    assert conv_feature.shape == (1, 64 * 1)
    assert transformer_feature.shape == (1, 384)
    # stage 2
    conv_feature, transformer_feature = outs[1]
    assert conv_feature.shape == (1, 64 * 1 * 2)
    assert transformer_feature.shape == (1, 384)
    # stage 3
    conv_feature, transformer_feature = outs[2]
    assert conv_feature.shape == (1, 64 * 1 * 4)
    assert transformer_feature.shape == (1, 384)