Commit 1baf0566 authored by limm's avatar limm
Browse files

add tests part

parent 495d9ed9
Pipeline #2800 canceled with stages
# Copyright (c) OpenMMLab. All rights reserved.
import pytest
import torch
from mmpretrain.models.backbones import ResNeXt
from mmpretrain.models.backbones.resnext import Bottleneck as BottleneckX
def test_bottleneck():
with pytest.raises(AssertionError):
# Style must be in ['pytorch', 'caffe']
BottleneckX(64, 64, groups=32, width_per_group=4, style='tensorflow')
# Test ResNeXt Bottleneck structure
block = BottleneckX(
64, 256, groups=32, width_per_group=4, stride=2, style='pytorch')
assert block.conv2.stride == (2, 2)
assert block.conv2.groups == 32
assert block.conv2.out_channels == 128
# Test ResNeXt Bottleneck forward
block = BottleneckX(64, 64, base_channels=16, groups=32, width_per_group=4)
x = torch.randn(1, 64, 56, 56)
x_out = block(x)
assert x_out.shape == torch.Size([1, 64, 56, 56])
def test_resnext():
with pytest.raises(KeyError):
# ResNeXt depth should be in [50, 101, 152]
ResNeXt(depth=18)
# Test ResNeXt with group 32, width_per_group 4
model = ResNeXt(
depth=50, groups=32, width_per_group=4, out_indices=(0, 1, 2, 3))
for m in model.modules():
if isinstance(m, BottleneckX):
assert m.conv2.groups == 32
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 4
assert feat[0].shape == torch.Size([1, 256, 56, 56])
assert feat[1].shape == torch.Size([1, 512, 28, 28])
assert feat[2].shape == torch.Size([1, 1024, 14, 14])
assert feat[3].shape == torch.Size([1, 2048, 7, 7])
# Test ResNeXt with group 32, width_per_group 4 and layers 3 out forward
model = ResNeXt(depth=50, groups=32, width_per_group=4, out_indices=(3, ))
for m in model.modules():
if isinstance(m, BottleneckX):
assert m.conv2.groups == 32
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 1
assert feat[0].shape == torch.Size([1, 2048, 7, 7])
# Copyright (c) OpenMMLab. All rights reserved.
import os
import tempfile
from copy import deepcopy
from unittest import TestCase
import torch
from mmengine.runner import load_checkpoint, save_checkpoint
from mmpretrain.models.backbones import RevVisionTransformer
from .utils import timm_resize_pos_embed
class TestRevVisionTransformer(TestCase):
def setUp(self):
self.cfg = dict(
arch='b', img_size=224, patch_size=16, drop_path_rate=0.1)
def test_structure(self):
# Test invalid default arch
with self.assertRaisesRegex(AssertionError, 'not in default archs'):
cfg = deepcopy(self.cfg)
cfg['arch'] = 'unknown'
RevVisionTransformer(**cfg)
# Test invalid custom arch
with self.assertRaisesRegex(AssertionError, 'Custom arch needs'):
cfg = deepcopy(self.cfg)
cfg['arch'] = {
'num_layers': 24,
'num_heads': 16,
'feedforward_channels': 4096
}
RevVisionTransformer(**cfg)
# Test custom arch
cfg = deepcopy(self.cfg)
cfg['arch'] = {
'embed_dims': 128,
'num_layers': 24,
'num_heads': 16,
'feedforward_channels': 1024
}
model = RevVisionTransformer(**cfg)
self.assertEqual(model.embed_dims, 128)
self.assertEqual(model.num_layers, 24)
for layer in model.layers:
self.assertEqual(layer.attn.num_heads, 16)
self.assertEqual(layer.ffn.feedforward_channels, 1024)
# Test model structure
cfg = deepcopy(self.cfg)
model = RevVisionTransformer(**cfg)
self.assertEqual(len(model.layers), 12)
dpr_inc = 0.1 / (12 - 1)
dpr = 0
for layer in model.layers:
self.assertEqual(layer.attn.embed_dims, 768)
self.assertEqual(layer.attn.num_heads, 12)
self.assertEqual(layer.ffn.feedforward_channels, 3072)
# self.assertAlmostEqual(layer.attn.out_drop.drop_prob, dpr)
# self.assertAlmostEqual(layer.ffn.dropout_layer.drop_prob, dpr)
dpr += dpr_inc
def test_init_weights(self):
# test weight init cfg
cfg = deepcopy(self.cfg)
cfg['init_cfg'] = [
dict(
type='Kaiming',
layer='Conv2d',
mode='fan_in',
nonlinearity='linear')
]
model = RevVisionTransformer(**cfg)
ori_weight = model.patch_embed.projection.weight.clone().detach()
# The pos_embed is all zero before initialize
self.assertTrue(torch.allclose(model.pos_embed, torch.tensor(0.)))
model.init_weights()
initialized_weight = model.patch_embed.projection.weight
self.assertFalse(torch.allclose(ori_weight, initialized_weight))
self.assertFalse(torch.allclose(model.pos_embed, torch.tensor(0.)))
# test load checkpoint
pretrain_pos_embed = model.pos_embed.clone().detach()
tmpdir = tempfile.gettempdir()
checkpoint = os.path.join(tmpdir, 'test.pth')
save_checkpoint(model.state_dict(), checkpoint)
cfg = deepcopy(self.cfg)
model = RevVisionTransformer(**cfg)
load_checkpoint(model, checkpoint, strict=True)
self.assertTrue(torch.allclose(model.pos_embed, pretrain_pos_embed))
# test load checkpoint with different img_size
cfg = deepcopy(self.cfg)
cfg['img_size'] = 384
model = RevVisionTransformer(**cfg)
load_checkpoint(model, checkpoint, strict=True)
resized_pos_embed = timm_resize_pos_embed(
pretrain_pos_embed, model.pos_embed, num_tokens=0)
self.assertTrue(torch.allclose(model.pos_embed, resized_pos_embed))
os.remove(checkpoint)
def test_forward(self):
imgs = torch.randn(1, 3, 224, 224)
cfg = deepcopy(self.cfg)
cfg['with_cls_token'] = False
cfg['out_type'] = 'avg_featmap'
model = RevVisionTransformer(**cfg)
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
patch_token = outs[-1]
self.assertEqual(patch_token.shape, (1, 768 * 2))
# Test forward with dynamic input size
imgs1 = torch.randn(1, 3, 224, 224)
imgs2 = torch.randn(1, 3, 256, 256)
imgs3 = torch.randn(1, 3, 256, 309)
cfg = deepcopy(self.cfg)
model = RevVisionTransformer(**cfg)
for imgs in [imgs1, imgs2, imgs3]:
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
avg_featmap = outs[-1]
self.assertEqual(avg_featmap.shape, (1, 768 * 2))
# Copyright (c) OpenMMLab. All rights reserved.
from copy import deepcopy
from unittest import TestCase
import torch
import torch.nn as nn
from mmpretrain.models.backbones import RIFormer
from mmpretrain.models.backbones.riformer import RIFormerBlock
class TestRIFormer(TestCase):
def setUp(self):
arch = 's12'
self.cfg = dict(arch=arch, drop_path_rate=0.1)
self.arch = RIFormer.arch_settings[arch]
def test_arch(self):
# Test invalid default arch
with self.assertRaisesRegex(AssertionError, 'Unavailable arch'):
cfg = deepcopy(self.cfg)
cfg['arch'] = 'unknown'
RIFormer(**cfg)
# Test invalid custom arch
with self.assertRaisesRegex(AssertionError, 'must have "layers"'):
cfg = deepcopy(self.cfg)
cfg['arch'] = {
'embed_dims': 96,
'num_heads': [3, 6, 12, 16],
}
RIFormer(**cfg)
# Test custom arch
cfg = deepcopy(self.cfg)
layers = [2, 2, 4, 2]
embed_dims = [6, 12, 6, 12]
mlp_ratios = [2, 3, 4, 4]
layer_scale_init_value = 1e-4
cfg['arch'] = dict(
layers=layers,
embed_dims=embed_dims,
mlp_ratios=mlp_ratios,
layer_scale_init_value=layer_scale_init_value,
)
model = RIFormer(**cfg)
for i, stage in enumerate(model.network):
if not isinstance(stage, RIFormerBlock):
continue
self.assertEqual(len(stage), layers[i])
self.assertEqual(stage[0].mlp.fc1.in_channels, embed_dims[i])
self.assertEqual(stage[0].mlp.fc1.out_channels,
embed_dims[i] * mlp_ratios[i])
self.assertTrue(
torch.allclose(stage[0].layer_scale_1,
torch.tensor(layer_scale_init_value)))
self.assertTrue(
torch.allclose(stage[0].layer_scale_2,
torch.tensor(layer_scale_init_value)))
def test_init_weights(self):
# test weight init cfg
cfg = deepcopy(self.cfg)
cfg['init_cfg'] = [
dict(
type='Kaiming',
layer='Conv2d',
mode='fan_in',
nonlinearity='linear')
]
model = RIFormer(**cfg)
ori_weight = model.patch_embed.proj.weight.clone().detach()
model.init_weights()
initialized_weight = model.patch_embed.proj.weight
self.assertFalse(torch.allclose(ori_weight, initialized_weight))
def test_forward(self):
imgs = torch.randn(1, 3, 224, 224)
cfg = deepcopy(self.cfg)
model = RIFormer(**cfg)
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
feat = outs[-1]
self.assertEqual(feat.shape, (1, 512, 7, 7))
# test multiple output indices
cfg = deepcopy(self.cfg)
cfg['out_indices'] = (0, 2, 4, 6)
model = RIFormer(**cfg)
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 4)
for dim, stride, out in zip(self.arch['embed_dims'], [1, 2, 4, 8],
outs):
self.assertEqual(out.shape, (1, dim, 56 // stride, 56 // stride))
def test_repameterization(self):
# Test eval of "train" mode and "deploy" mode
imgs = torch.randn(1, 3, 224, 224)
gap = nn.AdaptiveAvgPool2d(output_size=(1))
fc = nn.Linear(self.arch['embed_dims'][3], 10)
cfg = deepcopy(self.cfg)
cfg['out_indices'] = (0, 2, 4, 6)
model = RIFormer(**cfg)
model.eval()
feats = model(imgs)
self.assertIsInstance(feats, tuple)
feat = feats[-1]
pred = fc(gap(feat).flatten(1))
model.switch_to_deploy()
for m in model.modules():
if isinstance(m, RIFormerBlock):
assert m.deploy is True
feats_deploy = model(imgs)
pred_deploy = fc(gap(feats_deploy[-1]).flatten(1))
for i in range(4):
torch.allclose(feats[i], feats_deploy[i])
torch.allclose(pred, pred_deploy)
def test_structure(self):
# test drop_path_rate decay
cfg = deepcopy(self.cfg)
cfg['drop_path_rate'] = 0.2
model = RIFormer(**cfg)
layers = self.arch['layers']
for i, block in enumerate(model.network):
expect_prob = 0.2 / (sum(layers) - 1) * i
if hasattr(block, 'drop_path'):
if expect_prob == 0:
self.assertIsInstance(block.drop_path, torch.nn.Identity)
else:
self.assertAlmostEqual(block.drop_path.drop_prob,
expect_prob)
# test with first stage frozen.
cfg = deepcopy(self.cfg)
frozen_stages = 1
cfg['frozen_stages'] = frozen_stages
cfg['out_indices'] = (0, 2, 4, 6)
model = RIFormer(**cfg)
model.init_weights()
model.train()
# the patch_embed and first stage should not require grad.
self.assertFalse(model.patch_embed.training)
for param in model.patch_embed.parameters():
self.assertFalse(param.requires_grad)
for i in range(frozen_stages):
module = model.network[i]
for param in module.parameters():
self.assertFalse(param.requires_grad)
for param in model.norm0.parameters():
self.assertFalse(param.requires_grad)
# the second stage should require grad.
for i in range(frozen_stages + 1, 7):
module = model.network[i]
for param in module.parameters():
self.assertTrue(param.requires_grad)
if hasattr(model, f'norm{i}'):
norm = getattr(model, f'norm{i}')
for param in norm.parameters():
self.assertTrue(param.requires_grad)
# Copyright (c) OpenMMLab. All rights reserved.
import pytest
import torch
from torch.nn.modules import AvgPool2d
from torch.nn.modules.batchnorm import _BatchNorm
from mmpretrain.models.backbones import SEResNet
from mmpretrain.models.backbones.resnet import ResLayer
from mmpretrain.models.backbones.seresnet import SEBottleneck, SELayer
def all_zeros(modules):
"""Check if the weight(and bias) is all zero."""
weight_zero = torch.equal(modules.weight.data,
torch.zeros_like(modules.weight.data))
if hasattr(modules, 'bias'):
bias_zero = torch.equal(modules.bias.data,
torch.zeros_like(modules.bias.data))
else:
bias_zero = True
return weight_zero and bias_zero
def check_norm_state(modules, train_state):
"""Check if norm layer is in correct train state."""
for mod in modules:
if isinstance(mod, _BatchNorm):
if mod.training != train_state:
return False
return True
def test_selayer():
# Test selayer forward
layer = SELayer(64)
x = torch.randn(1, 64, 56, 56)
x_out = layer(x)
assert x_out.shape == torch.Size([1, 64, 56, 56])
# Test selayer forward with different ratio
layer = SELayer(64, ratio=8)
x = torch.randn(1, 64, 56, 56)
x_out = layer(x)
assert x_out.shape == torch.Size([1, 64, 56, 56])
def test_bottleneck():
with pytest.raises(AssertionError):
# Style must be in ['pytorch', 'caffe']
SEBottleneck(64, 64, style='tensorflow')
# Test SEBottleneck with checkpoint forward
block = SEBottleneck(64, 64, with_cp=True)
assert block.with_cp
x = torch.randn(1, 64, 56, 56)
x_out = block(x)
assert x_out.shape == torch.Size([1, 64, 56, 56])
# Test Bottleneck style
block = SEBottleneck(64, 256, stride=2, style='pytorch')
assert block.conv1.stride == (1, 1)
assert block.conv2.stride == (2, 2)
block = SEBottleneck(64, 256, stride=2, style='caffe')
assert block.conv1.stride == (2, 2)
assert block.conv2.stride == (1, 1)
# Test Bottleneck forward
block = SEBottleneck(64, 64)
x = torch.randn(1, 64, 56, 56)
x_out = block(x)
assert x_out.shape == torch.Size([1, 64, 56, 56])
def test_res_layer():
# Test ResLayer of 3 Bottleneck w\o downsample
layer = ResLayer(SEBottleneck, 3, 64, 64, se_ratio=16)
assert len(layer) == 3
assert layer[0].conv1.in_channels == 64
assert layer[0].conv1.out_channels == 16
for i in range(1, len(layer)):
assert layer[i].conv1.in_channels == 64
assert layer[i].conv1.out_channels == 16
for i in range(len(layer)):
assert layer[i].downsample is None
x = torch.randn(1, 64, 56, 56)
x_out = layer(x)
assert x_out.shape == torch.Size([1, 64, 56, 56])
# Test ResLayer of 3 SEBottleneck with downsample
layer = ResLayer(SEBottleneck, 3, 64, 256, se_ratio=16)
assert layer[0].downsample[0].out_channels == 256
for i in range(1, len(layer)):
assert layer[i].downsample is None
x = torch.randn(1, 64, 56, 56)
x_out = layer(x)
assert x_out.shape == torch.Size([1, 256, 56, 56])
# Test ResLayer of 3 SEBottleneck with stride=2
layer = ResLayer(SEBottleneck, 3, 64, 256, stride=2, se_ratio=8)
assert layer[0].downsample[0].out_channels == 256
assert layer[0].downsample[0].stride == (2, 2)
for i in range(1, len(layer)):
assert layer[i].downsample is None
x = torch.randn(1, 64, 56, 56)
x_out = layer(x)
assert x_out.shape == torch.Size([1, 256, 28, 28])
# Test ResLayer of 3 SEBottleneck with stride=2 and average downsample
layer = ResLayer(
SEBottleneck, 3, 64, 256, stride=2, avg_down=True, se_ratio=8)
assert isinstance(layer[0].downsample[0], AvgPool2d)
assert layer[0].downsample[1].out_channels == 256
assert layer[0].downsample[1].stride == (1, 1)
for i in range(1, len(layer)):
assert layer[i].downsample is None
x = torch.randn(1, 64, 56, 56)
x_out = layer(x)
assert x_out.shape == torch.Size([1, 256, 28, 28])
def test_seresnet():
"""Test resnet backbone."""
with pytest.raises(KeyError):
# SEResNet depth should be in [50, 101, 152]
SEResNet(20)
with pytest.raises(AssertionError):
# In SEResNet: 1 <= num_stages <= 4
SEResNet(50, num_stages=0)
with pytest.raises(AssertionError):
# In SEResNet: 1 <= num_stages <= 4
SEResNet(50, num_stages=5)
with pytest.raises(AssertionError):
# len(strides) == len(dilations) == num_stages
SEResNet(50, strides=(1, ), dilations=(1, 1), num_stages=3)
with pytest.raises(TypeError):
# pretrained must be a string path
model = SEResNet(50)
model.init_weights(pretrained=0)
with pytest.raises(AssertionError):
# Style must be in ['pytorch', 'caffe']
SEResNet(50, style='tensorflow')
# Test SEResNet50 norm_eval=True
model = SEResNet(50, norm_eval=True)
model.init_weights()
model.train()
assert check_norm_state(model.modules(), False)
# Test SEResNet50 with torchvision pretrained weight
model = SEResNet(
depth=50,
norm_eval=True,
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'))
model.init_weights()
model.train()
assert check_norm_state(model.modules(), False)
# Test SEResNet50 with first stage frozen
frozen_stages = 1
model = SEResNet(50, frozen_stages=frozen_stages)
model.init_weights()
model.train()
assert model.norm1.training is False
for layer in [model.conv1, model.norm1]:
for param in layer.parameters():
assert param.requires_grad is False
for i in range(1, frozen_stages + 1):
layer = getattr(model, f'layer{i}')
for mod in layer.modules():
if isinstance(mod, _BatchNorm):
assert mod.training is False
for param in layer.parameters():
assert param.requires_grad is False
# Test SEResNet50 with BatchNorm forward
model = SEResNet(50, out_indices=(0, 1, 2, 3))
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 4
assert feat[0].shape == torch.Size([1, 256, 56, 56])
assert feat[1].shape == torch.Size([1, 512, 28, 28])
assert feat[2].shape == torch.Size([1, 1024, 14, 14])
assert feat[3].shape == torch.Size([1, 2048, 7, 7])
# Test SEResNet50 with layers 1, 2, 3 out forward
model = SEResNet(50, out_indices=(0, 1, 2))
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 3
assert feat[0].shape == torch.Size([1, 256, 56, 56])
assert feat[1].shape == torch.Size([1, 512, 28, 28])
assert feat[2].shape == torch.Size([1, 1024, 14, 14])
# Test SEResNet50 with layers 3 (top feature maps) out forward
model = SEResNet(50, out_indices=(3, ))
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 1
assert feat[0].shape == torch.Size([1, 2048, 7, 7])
# Test SEResNet50 with checkpoint forward
model = SEResNet(50, out_indices=(0, 1, 2, 3), with_cp=True)
for m in model.modules():
if isinstance(m, SEBottleneck):
assert m.with_cp
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 4
assert feat[0].shape == torch.Size([1, 256, 56, 56])
assert feat[1].shape == torch.Size([1, 512, 28, 28])
assert feat[2].shape == torch.Size([1, 1024, 14, 14])
assert feat[3].shape == torch.Size([1, 2048, 7, 7])
# Test SEResNet50 zero initialization of residual
model = SEResNet(50, out_indices=(0, 1, 2, 3), zero_init_residual=True)
model.init_weights()
for m in model.modules():
if isinstance(m, SEBottleneck):
assert all_zeros(m.norm3)
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 4
assert feat[0].shape == torch.Size([1, 256, 56, 56])
assert feat[1].shape == torch.Size([1, 512, 28, 28])
assert feat[2].shape == torch.Size([1, 1024, 14, 14])
assert feat[3].shape == torch.Size([1, 2048, 7, 7])
# Copyright (c) OpenMMLab. All rights reserved.
import pytest
import torch
from mmpretrain.models.backbones import SEResNeXt
from mmpretrain.models.backbones.seresnext import SEBottleneck as SEBottleneckX
def test_bottleneck():
with pytest.raises(AssertionError):
# Style must be in ['pytorch', 'caffe']
SEBottleneckX(64, 64, groups=32, width_per_group=4, style='tensorflow')
# Test SEResNeXt Bottleneck structure
block = SEBottleneckX(
64, 256, groups=32, width_per_group=4, stride=2, style='pytorch')
assert block.width_per_group == 4
assert block.conv2.stride == (2, 2)
assert block.conv2.groups == 32
assert block.conv2.out_channels == 128
assert block.conv2.out_channels == block.mid_channels
# Test SEResNeXt Bottleneck structure (groups=1)
block = SEBottleneckX(
64, 256, groups=1, width_per_group=4, stride=2, style='pytorch')
assert block.conv2.stride == (2, 2)
assert block.conv2.groups == 1
assert block.conv2.out_channels == 64
assert block.mid_channels == 64
assert block.conv2.out_channels == block.mid_channels
# Test SEResNeXt Bottleneck forward
block = SEBottleneckX(
64, 64, base_channels=16, groups=32, width_per_group=4)
x = torch.randn(1, 64, 56, 56)
x_out = block(x)
assert x_out.shape == torch.Size([1, 64, 56, 56])
def test_seresnext():
with pytest.raises(KeyError):
# SEResNeXt depth should be in [50, 101, 152]
SEResNeXt(depth=18)
# Test SEResNeXt with group 32, width_per_group 4
model = SEResNeXt(
depth=50, groups=32, width_per_group=4, out_indices=(0, 1, 2, 3))
for m in model.modules():
if isinstance(m, SEBottleneckX):
assert m.conv2.groups == 32
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 4
assert feat[0].shape == torch.Size([1, 256, 56, 56])
assert feat[1].shape == torch.Size([1, 512, 28, 28])
assert feat[2].shape == torch.Size([1, 1024, 14, 14])
assert feat[3].shape == torch.Size([1, 2048, 7, 7])
# Test SEResNeXt with group 32, width_per_group 4 and layers 3 out forward
model = SEResNeXt(
depth=50, groups=32, width_per_group=4, out_indices=(3, ))
for m in model.modules():
if isinstance(m, SEBottleneckX):
assert m.conv2.groups == 32
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 1
assert feat[0].shape == torch.Size([1, 2048, 7, 7])
# Copyright (c) OpenMMLab. All rights reserved.
import pytest
import torch
from torch.nn.modules import GroupNorm
from torch.nn.modules.batchnorm import _BatchNorm
from mmpretrain.models.backbones import ShuffleNetV1
from mmpretrain.models.backbones.shufflenet_v1 import ShuffleUnit
def is_block(modules):
"""Check if is ResNet building block."""
if isinstance(modules, (ShuffleUnit, )):
return True
return False
def is_norm(modules):
"""Check if is one of the norms."""
if isinstance(modules, (GroupNorm, _BatchNorm)):
return True
return False
def check_norm_state(modules, train_state):
"""Check if norm layer is in correct train state."""
for mod in modules:
if isinstance(mod, _BatchNorm):
if mod.training != train_state:
return False
return True
def test_shufflenetv1_shuffleuint():
with pytest.raises(ValueError):
# combine must be in ['add', 'concat']
ShuffleUnit(24, 16, groups=3, first_block=True, combine='test')
with pytest.raises(AssertionError):
# in_channels must be equal tp = outplanes when combine='add'
ShuffleUnit(64, 24, groups=4, first_block=True, combine='add')
# Test ShuffleUnit with combine='add'
block = ShuffleUnit(24, 24, groups=3, first_block=True, combine='add')
x = torch.randn(1, 24, 56, 56)
x_out = block(x)
assert x_out.shape == torch.Size((1, 24, 56, 56))
# Test ShuffleUnit with combine='concat'
block = ShuffleUnit(24, 240, groups=3, first_block=True, combine='concat')
x = torch.randn(1, 24, 56, 56)
x_out = block(x)
assert x_out.shape == torch.Size((1, 240, 28, 28))
# Test ShuffleUnit with checkpoint forward
block = ShuffleUnit(
24, 24, groups=3, first_block=True, combine='add', with_cp=True)
assert block.with_cp
x = torch.randn(1, 24, 56, 56)
x.requires_grad = True
x_out = block(x)
assert x_out.shape == torch.Size((1, 24, 56, 56))
def test_shufflenetv1_backbone():
with pytest.raises(ValueError):
# frozen_stages must be in range(-1, 4)
ShuffleNetV1(frozen_stages=10)
with pytest.raises(ValueError):
# the item in out_indices must be in range(0, 4)
ShuffleNetV1(out_indices=[5])
with pytest.raises(ValueError):
# groups must be in [1, 2, 3, 4, 8]
ShuffleNetV1(groups=10)
with pytest.raises(TypeError):
# pretrained must be str or None
model = ShuffleNetV1()
model.init_weights(pretrained=1)
# Test ShuffleNetV1 norm state
model = ShuffleNetV1()
model.init_weights()
model.train()
assert check_norm_state(model.modules(), True)
# Test ShuffleNetV1 with first stage frozen
frozen_stages = 1
model = ShuffleNetV1(frozen_stages=frozen_stages, out_indices=(0, 1, 2))
model.init_weights()
model.train()
for param in model.conv1.parameters():
assert param.requires_grad is False
for i in range(frozen_stages):
layer = model.layers[i]
for mod in layer.modules():
if isinstance(mod, _BatchNorm):
assert mod.training is False
for param in layer.parameters():
assert param.requires_grad is False
# Test ShuffleNetV1 forward with groups=1
model = ShuffleNetV1(groups=1, out_indices=(0, 1, 2))
model.init_weights()
model.train()
for m in model.modules():
if is_norm(m):
assert isinstance(m, _BatchNorm)
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 3
assert feat[0].shape == torch.Size((1, 144, 28, 28))
assert feat[1].shape == torch.Size((1, 288, 14, 14))
assert feat[2].shape == torch.Size((1, 576, 7, 7))
# Test ShuffleNetV1 forward with groups=2
model = ShuffleNetV1(groups=2, out_indices=(0, 1, 2))
model.init_weights()
model.train()
for m in model.modules():
if is_norm(m):
assert isinstance(m, _BatchNorm)
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 3
assert feat[0].shape == torch.Size((1, 200, 28, 28))
assert feat[1].shape == torch.Size((1, 400, 14, 14))
assert feat[2].shape == torch.Size((1, 800, 7, 7))
# Test ShuffleNetV1 forward with groups=3
model = ShuffleNetV1(groups=3, out_indices=(0, 1, 2))
model.init_weights()
model.train()
for m in model.modules():
if is_norm(m):
assert isinstance(m, _BatchNorm)
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 3
assert feat[0].shape == torch.Size((1, 240, 28, 28))
assert feat[1].shape == torch.Size((1, 480, 14, 14))
assert feat[2].shape == torch.Size((1, 960, 7, 7))
# Test ShuffleNetV1 forward with groups=4
model = ShuffleNetV1(groups=4, out_indices=(0, 1, 2))
model.init_weights()
model.train()
for m in model.modules():
if is_norm(m):
assert isinstance(m, _BatchNorm)
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 3
assert feat[0].shape == torch.Size((1, 272, 28, 28))
assert feat[1].shape == torch.Size((1, 544, 14, 14))
assert feat[2].shape == torch.Size((1, 1088, 7, 7))
# Test ShuffleNetV1 forward with groups=8
model = ShuffleNetV1(groups=8, out_indices=(0, 1, 2))
model.init_weights()
model.train()
for m in model.modules():
if is_norm(m):
assert isinstance(m, _BatchNorm)
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 3
assert feat[0].shape == torch.Size((1, 384, 28, 28))
assert feat[1].shape == torch.Size((1, 768, 14, 14))
assert feat[2].shape == torch.Size((1, 1536, 7, 7))
# Test ShuffleNetV1 forward with GroupNorm forward
model = ShuffleNetV1(
groups=3,
norm_cfg=dict(type='GN', num_groups=2, requires_grad=True),
out_indices=(0, 1, 2))
model.init_weights()
model.train()
for m in model.modules():
if is_norm(m):
assert isinstance(m, GroupNorm)
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 3
assert feat[0].shape == torch.Size((1, 240, 28, 28))
assert feat[1].shape == torch.Size((1, 480, 14, 14))
assert feat[2].shape == torch.Size((1, 960, 7, 7))
# Test ShuffleNetV1 forward with layers 1, 2 forward
model = ShuffleNetV1(groups=3, out_indices=(1, 2))
model.init_weights()
model.train()
for m in model.modules():
if is_norm(m):
assert isinstance(m, _BatchNorm)
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 2
assert feat[0].shape == torch.Size((1, 480, 14, 14))
assert feat[1].shape == torch.Size((1, 960, 7, 7))
# Test ShuffleNetV1 forward with layers 2 forward
model = ShuffleNetV1(groups=3, out_indices=(2, ))
model.init_weights()
model.train()
for m in model.modules():
if is_norm(m):
assert isinstance(m, _BatchNorm)
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 1
assert isinstance(feat[0], torch.Tensor)
assert feat[0].shape == torch.Size((1, 960, 7, 7))
# Test ShuffleNetV1 forward with checkpoint forward
model = ShuffleNetV1(groups=3, with_cp=True)
for m in model.modules():
if is_block(m):
assert m.with_cp
# Test ShuffleNetV1 with norm_eval
model = ShuffleNetV1(norm_eval=True)
model.init_weights()
model.train()
assert check_norm_state(model.modules(), False)
# Copyright (c) OpenMMLab. All rights reserved.
import pytest
import torch
from torch.nn.modules import GroupNorm
from torch.nn.modules.batchnorm import _BatchNorm
from mmpretrain.models.backbones import ShuffleNetV2
from mmpretrain.models.backbones.shufflenet_v2 import InvertedResidual
def is_block(modules):
"""Check if is ResNet building block."""
if isinstance(modules, (InvertedResidual, )):
return True
return False
def is_norm(modules):
"""Check if is one of the norms."""
if isinstance(modules, (GroupNorm, _BatchNorm)):
return True
return False
def check_norm_state(modules, train_state):
"""Check if norm layer is in correct train state."""
for mod in modules:
if isinstance(mod, _BatchNorm):
if mod.training != train_state:
return False
return True
def test_shufflenetv2_invertedresidual():
with pytest.raises(AssertionError):
# when stride==1, in_channels should be equal to out_channels // 2 * 2
InvertedResidual(24, 32, stride=1)
with pytest.raises(AssertionError):
# when in_channels != out_channels // 2 * 2, stride should not be
# equal to 1.
InvertedResidual(24, 32, stride=1)
# Test InvertedResidual forward
block = InvertedResidual(24, 48, stride=2)
x = torch.randn(1, 24, 56, 56)
x_out = block(x)
assert x_out.shape == torch.Size((1, 48, 28, 28))
# Test InvertedResidual with checkpoint forward
block = InvertedResidual(48, 48, stride=1, with_cp=True)
assert block.with_cp
x = torch.randn(1, 48, 56, 56)
x.requires_grad = True
x_out = block(x)
assert x_out.shape == torch.Size((1, 48, 56, 56))
def test_shufflenetv2_backbone():
with pytest.raises(ValueError):
# groups must be in 0.5, 1.0, 1.5, 2.0]
ShuffleNetV2(widen_factor=3.0)
with pytest.raises(ValueError):
# frozen_stages must be in [0, 1, 2, 3]
ShuffleNetV2(widen_factor=1.0, frozen_stages=4)
with pytest.raises(ValueError):
# out_indices must be in [0, 1, 2, 3]
ShuffleNetV2(widen_factor=1.0, out_indices=(4, ))
with pytest.raises(TypeError):
# pretrained must be str or None
model = ShuffleNetV2()
model.init_weights(pretrained=1)
# Test ShuffleNetV2 norm state
model = ShuffleNetV2()
model.init_weights()
model.train()
assert check_norm_state(model.modules(), True)
# Test ShuffleNetV2 with first stage frozen
frozen_stages = 1
model = ShuffleNetV2(frozen_stages=frozen_stages)
model.init_weights()
model.train()
for param in model.conv1.parameters():
assert param.requires_grad is False
for i in range(0, frozen_stages):
layer = model.layers[i]
for mod in layer.modules():
if isinstance(mod, _BatchNorm):
assert mod.training is False
for param in layer.parameters():
assert param.requires_grad is False
# Test ShuffleNetV2 with norm_eval
model = ShuffleNetV2(norm_eval=True)
model.init_weights()
model.train()
assert check_norm_state(model.modules(), False)
# Test ShuffleNetV2 forward with widen_factor=0.5
model = ShuffleNetV2(widen_factor=0.5, out_indices=(0, 1, 2, 3))
model.init_weights()
model.train()
for m in model.modules():
if is_norm(m):
assert isinstance(m, _BatchNorm)
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 4
assert feat[0].shape == torch.Size((1, 48, 28, 28))
assert feat[1].shape == torch.Size((1, 96, 14, 14))
assert feat[2].shape == torch.Size((1, 192, 7, 7))
# Test ShuffleNetV2 forward with widen_factor=1.0
model = ShuffleNetV2(widen_factor=1.0, out_indices=(0, 1, 2, 3))
model.init_weights()
model.train()
for m in model.modules():
if is_norm(m):
assert isinstance(m, _BatchNorm)
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 4
assert feat[0].shape == torch.Size((1, 116, 28, 28))
assert feat[1].shape == torch.Size((1, 232, 14, 14))
assert feat[2].shape == torch.Size((1, 464, 7, 7))
# Test ShuffleNetV2 forward with widen_factor=1.5
model = ShuffleNetV2(widen_factor=1.5, out_indices=(0, 1, 2, 3))
model.init_weights()
model.train()
for m in model.modules():
if is_norm(m):
assert isinstance(m, _BatchNorm)
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 4
assert feat[0].shape == torch.Size((1, 176, 28, 28))
assert feat[1].shape == torch.Size((1, 352, 14, 14))
assert feat[2].shape == torch.Size((1, 704, 7, 7))
# Test ShuffleNetV2 forward with widen_factor=2.0
model = ShuffleNetV2(widen_factor=2.0, out_indices=(0, 1, 2, 3))
model.init_weights()
model.train()
for m in model.modules():
if is_norm(m):
assert isinstance(m, _BatchNorm)
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 4
assert feat[0].shape == torch.Size((1, 244, 28, 28))
assert feat[1].shape == torch.Size((1, 488, 14, 14))
assert feat[2].shape == torch.Size((1, 976, 7, 7))
# Test ShuffleNetV2 forward with layers 3 forward
model = ShuffleNetV2(widen_factor=1.0, out_indices=(2, ))
model.init_weights()
model.train()
for m in model.modules():
if is_norm(m):
assert isinstance(m, _BatchNorm)
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 1
assert isinstance(feat[0], torch.Tensor)
assert feat[0].shape == torch.Size((1, 464, 7, 7))
# Test ShuffleNetV2 forward with layers 1 2 forward
model = ShuffleNetV2(widen_factor=1.0, out_indices=(1, 2))
model.init_weights()
model.train()
for m in model.modules():
if is_norm(m):
assert isinstance(m, _BatchNorm)
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 2
assert feat[0].shape == torch.Size((1, 232, 14, 14))
assert feat[1].shape == torch.Size((1, 464, 7, 7))
# Test ShuffleNetV2 forward with checkpoint forward
model = ShuffleNetV2(widen_factor=1.0, with_cp=True)
for m in model.modules():
if is_block(m):
assert m.with_cp
# Copyright (c) OpenMMLab. All rights reserved.
import math
import os
import tempfile
from copy import deepcopy
from itertools import chain
from unittest import TestCase
import torch
from mmengine.runner import load_checkpoint, save_checkpoint
from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm
from mmpretrain.models.backbones import SwinTransformer
from mmpretrain.models.backbones.swin_transformer import SwinBlock
from .utils import timm_resize_pos_embed
def check_norm_state(modules, train_state):
"""Check if norm layer is in correct train state."""
for mod in modules:
if isinstance(mod, _BatchNorm):
if mod.training != train_state:
return False
return True
class TestSwinTransformer(TestCase):
def setUp(self):
self.cfg = dict(
arch='tiny', img_size=224, patch_size=4, drop_path_rate=0.1)
def test_arch(self):
# Test invalid default arch
with self.assertRaisesRegex(AssertionError, 'not in default archs'):
cfg = deepcopy(self.cfg)
cfg['arch'] = 'unknown'
SwinTransformer(**cfg)
# Test invalid custom arch
with self.assertRaisesRegex(AssertionError, 'Custom arch needs'):
cfg = deepcopy(self.cfg)
cfg['arch'] = {
'embed_dims': 96,
'num_heads': [3, 6, 12, 16],
}
SwinTransformer(**cfg)
# Test custom arch
cfg = deepcopy(self.cfg)
depths = [2, 2, 4, 2]
num_heads = [6, 12, 6, 12]
cfg['arch'] = {
'embed_dims': 256,
'depths': depths,
'num_heads': num_heads
}
model = SwinTransformer(**cfg)
for i, stage in enumerate(model.stages):
self.assertEqual(stage.embed_dims, 256 * (2**i))
self.assertEqual(len(stage.blocks), depths[i])
self.assertEqual(stage.blocks[0].attn.w_msa.num_heads,
num_heads[i])
def test_init_weights(self):
# test weight init cfg
cfg = deepcopy(self.cfg)
cfg['use_abs_pos_embed'] = True
cfg['init_cfg'] = [
dict(
type='Kaiming',
layer='Conv2d',
mode='fan_in',
nonlinearity='linear')
]
model = SwinTransformer(**cfg)
ori_weight = model.patch_embed.projection.weight.clone().detach()
# The pos_embed is all zero before initialize
self.assertTrue(
torch.allclose(model.absolute_pos_embed, torch.tensor(0.)))
model.init_weights()
initialized_weight = model.patch_embed.projection.weight
self.assertFalse(torch.allclose(ori_weight, initialized_weight))
self.assertFalse(
torch.allclose(model.absolute_pos_embed, torch.tensor(0.)))
pretrain_pos_embed = model.absolute_pos_embed.clone().detach()
tmpdir = tempfile.gettempdir()
# Save v3 checkpoints
checkpoint_v2 = os.path.join(tmpdir, 'v3.pth')
save_checkpoint(model.state_dict(), checkpoint_v2)
# Save v1 checkpoints
setattr(model, 'norm', model.norm3)
setattr(model.stages[0].blocks[1].attn, 'attn_mask',
torch.zeros(64, 49, 49))
model._version = 1
del model.norm3
checkpoint_v1 = os.path.join(tmpdir, 'v1.pth')
save_checkpoint(model.state_dict(), checkpoint_v1)
# test load v1 checkpoint
cfg = deepcopy(self.cfg)
cfg['use_abs_pos_embed'] = True
model = SwinTransformer(**cfg)
load_checkpoint(model, checkpoint_v1, strict=True)
# test load v3 checkpoint
cfg = deepcopy(self.cfg)
cfg['use_abs_pos_embed'] = True
model = SwinTransformer(**cfg)
load_checkpoint(model, checkpoint_v2, strict=True)
# test load v3 checkpoint with different img_size
cfg = deepcopy(self.cfg)
cfg['img_size'] = 384
cfg['use_abs_pos_embed'] = True
model = SwinTransformer(**cfg)
load_checkpoint(model, checkpoint_v2, strict=True)
resized_pos_embed = timm_resize_pos_embed(
pretrain_pos_embed, model.absolute_pos_embed, num_tokens=0)
self.assertTrue(
torch.allclose(model.absolute_pos_embed, resized_pos_embed))
os.remove(checkpoint_v1)
os.remove(checkpoint_v2)
def test_forward(self):
imgs = torch.randn(1, 3, 224, 224)
cfg = deepcopy(self.cfg)
model = SwinTransformer(**cfg)
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
feat = outs[-1]
self.assertEqual(feat.shape, (1, 768, 7, 7))
# test with window_size=12
cfg = deepcopy(self.cfg)
cfg['window_size'] = 12
model = SwinTransformer(**cfg)
outs = model(torch.randn(1, 3, 384, 384))
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
feat = outs[-1]
self.assertEqual(feat.shape, (1, 768, 12, 12))
with self.assertRaisesRegex(AssertionError, r'the window size \(12\)'):
model(torch.randn(1, 3, 224, 224))
# test with pad_small_map=True
cfg = deepcopy(self.cfg)
cfg['window_size'] = 12
cfg['pad_small_map'] = True
model = SwinTransformer(**cfg)
outs = model(torch.randn(1, 3, 224, 224))
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
feat = outs[-1]
self.assertEqual(feat.shape, (1, 768, 7, 7))
# test multiple output indices
cfg = deepcopy(self.cfg)
cfg['out_indices'] = (0, 1, 2, 3)
model = SwinTransformer(**cfg)
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 4)
for stride, out in zip([1, 2, 4, 8], outs):
self.assertEqual(out.shape,
(1, 96 * stride, 56 // stride, 56 // stride))
# test with checkpoint forward
cfg = deepcopy(self.cfg)
cfg['with_cp'] = True
model = SwinTransformer(**cfg)
for m in model.modules():
if isinstance(m, SwinBlock):
self.assertTrue(m.with_cp)
model.init_weights()
model.train()
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
feat = outs[-1]
self.assertEqual(feat.shape, (1, 768, 7, 7))
# test with dynamic input shape
imgs1 = torch.randn(1, 3, 224, 224)
imgs2 = torch.randn(1, 3, 256, 256)
imgs3 = torch.randn(1, 3, 256, 309)
cfg = deepcopy(self.cfg)
model = SwinTransformer(**cfg)
for imgs in [imgs1, imgs2, imgs3]:
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
feat = outs[-1]
expect_feat_shape = (math.ceil(imgs.shape[2] / 32),
math.ceil(imgs.shape[3] / 32))
self.assertEqual(feat.shape, (1, 768, *expect_feat_shape))
def test_structure(self):
# test drop_path_rate decay
cfg = deepcopy(self.cfg)
cfg['drop_path_rate'] = 0.2
model = SwinTransformer(**cfg)
depths = model.arch_settings['depths']
blocks = chain(*[stage.blocks for stage in model.stages])
for i, block in enumerate(blocks):
expect_prob = 0.2 / (sum(depths) - 1) * i
self.assertAlmostEqual(block.ffn.dropout_layer.drop_prob,
expect_prob)
self.assertAlmostEqual(block.attn.drop.drop_prob, expect_prob)
# test Swin-Transformer with norm_eval=True
cfg = deepcopy(self.cfg)
cfg['norm_eval'] = True
cfg['norm_cfg'] = dict(type='BN')
cfg['stage_cfgs'] = dict(block_cfgs=dict(norm_cfg=dict(type='BN')))
model = SwinTransformer(**cfg)
model.init_weights()
model.train()
self.assertTrue(check_norm_state(model.modules(), False))
# test Swin-Transformer with first stage frozen.
cfg = deepcopy(self.cfg)
frozen_stages = 0
cfg['frozen_stages'] = frozen_stages
cfg['out_indices'] = (0, 1, 2, 3)
model = SwinTransformer(**cfg)
model.init_weights()
model.train()
# the patch_embed and first stage should not require grad.
self.assertFalse(model.patch_embed.training)
for param in model.patch_embed.parameters():
self.assertFalse(param.requires_grad)
for i in range(frozen_stages + 1):
stage = model.stages[i]
for param in stage.parameters():
self.assertFalse(param.requires_grad)
for param in model.norm0.parameters():
self.assertFalse(param.requires_grad)
# the second stage should require grad.
for i in range(frozen_stages + 1, 4):
stage = model.stages[i]
for param in stage.parameters():
self.assertTrue(param.requires_grad)
norm = getattr(model, f'norm{i}')
for param in norm.parameters():
self.assertTrue(param.requires_grad)
# Copyright (c) OpenMMLab. All rights reserved.
import math
import os
import tempfile
from copy import deepcopy
from itertools import chain
from unittest import TestCase
import torch
from mmengine.runner import load_checkpoint, save_checkpoint
from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm
from mmpretrain.models.backbones import SwinTransformerV2
from mmpretrain.models.backbones.swin_transformer import SwinBlock
from .utils import timm_resize_pos_embed
def check_norm_state(modules, train_state):
"""Check if norm layer is in correct train state."""
for mod in modules:
if isinstance(mod, _BatchNorm):
if mod.training != train_state:
return False
return True
class TestSwinTransformerV2(TestCase):
def setUp(self):
self.cfg = dict(
arch='b', img_size=256, patch_size=4, drop_path_rate=0.1)
def test_arch(self):
# Test invalid default arch
with self.assertRaisesRegex(AssertionError, 'not in default archs'):
cfg = deepcopy(self.cfg)
cfg['arch'] = 'unknown'
SwinTransformerV2(**cfg)
# Test invalid custom arch
with self.assertRaisesRegex(AssertionError, 'Custom arch needs'):
cfg = deepcopy(self.cfg)
cfg['arch'] = {
'embed_dims': 96,
'num_heads': [3, 6, 12, 16],
}
SwinTransformerV2(**cfg)
# Test custom arch
cfg = deepcopy(self.cfg)
depths = [2, 2, 6, 2]
num_heads = [6, 12, 6, 12]
cfg['arch'] = {
'embed_dims': 256,
'depths': depths,
'num_heads': num_heads,
'extra_norm_every_n_blocks': 2
}
model = SwinTransformerV2(**cfg)
for i, stage in enumerate(model.stages):
self.assertEqual(stage.out_channels, 256 * (2**i))
self.assertEqual(len(stage.blocks), depths[i])
self.assertEqual(stage.blocks[0].attn.w_msa.num_heads,
num_heads[i])
self.assertIsInstance(model.stages[2].blocks[5], torch.nn.Module)
def test_init_weights(self):
# test weight init cfg
cfg = deepcopy(self.cfg)
cfg['use_abs_pos_embed'] = True
cfg['init_cfg'] = [
dict(
type='Kaiming',
layer='Conv2d',
mode='fan_in',
nonlinearity='linear')
]
model = SwinTransformerV2(**cfg)
ori_weight = model.patch_embed.projection.weight.clone().detach()
# The pos_embed is all zero before initialize
self.assertTrue(
torch.allclose(model.absolute_pos_embed, torch.tensor(0.)))
model.init_weights()
initialized_weight = model.patch_embed.projection.weight
self.assertFalse(torch.allclose(ori_weight, initialized_weight))
self.assertFalse(
torch.allclose(model.absolute_pos_embed, torch.tensor(0.)))
pretrain_pos_embed = model.absolute_pos_embed.clone().detach()
tmpdir = tempfile.TemporaryDirectory()
# Save checkpoints
checkpoint = os.path.join(tmpdir.name, 'checkpoint.pth')
save_checkpoint(model.state_dict(), checkpoint)
# test load checkpoint
cfg = deepcopy(self.cfg)
cfg['use_abs_pos_embed'] = True
model = SwinTransformerV2(**cfg)
load_checkpoint(model, checkpoint, strict=False)
# test load checkpoint with different img_size
cfg = deepcopy(self.cfg)
cfg['img_size'] = 384
cfg['use_abs_pos_embed'] = True
model = SwinTransformerV2(**cfg)
load_checkpoint(model, checkpoint, strict=False)
resized_pos_embed = timm_resize_pos_embed(
pretrain_pos_embed, model.absolute_pos_embed, num_tokens=0)
self.assertTrue(
torch.allclose(model.absolute_pos_embed, resized_pos_embed))
tmpdir.cleanup()
def test_forward(self):
imgs = torch.randn(1, 3, 256, 256)
cfg = deepcopy(self.cfg)
model = SwinTransformerV2(**cfg)
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
feat = outs[-1]
self.assertEqual(feat.shape, (1, 1024, 8, 8))
# test with window_size=12
cfg = deepcopy(self.cfg)
cfg['window_size'] = 12
model = SwinTransformerV2(**cfg)
outs = model(torch.randn(1, 3, 384, 384))
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
feat = outs[-1]
self.assertEqual(feat.shape, (1, 1024, 12, 12))
with self.assertRaisesRegex(AssertionError, r'the window size \(12\)'):
model(torch.randn(1, 3, 256, 256))
# test with pad_small_map=True
cfg = deepcopy(self.cfg)
cfg['window_size'] = 12
cfg['pad_small_map'] = True
model = SwinTransformerV2(**cfg)
outs = model(torch.randn(1, 3, 256, 256))
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
feat = outs[-1]
self.assertEqual(feat.shape, (1, 1024, 8, 8))
# test multiple output indices
cfg = deepcopy(self.cfg)
cfg['out_indices'] = (0, 1, 2, 3)
model = SwinTransformerV2(**cfg)
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 4)
for stride, out in zip([1, 2, 4, 8], outs):
self.assertEqual(out.shape,
(1, 128 * stride, 64 // stride, 64 // stride))
# test with checkpoint forward
cfg = deepcopy(self.cfg)
cfg['with_cp'] = True
model = SwinTransformerV2(**cfg)
for m in model.modules():
if isinstance(m, SwinBlock):
self.assertTrue(m.with_cp)
model.init_weights()
model.train()
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
feat = outs[-1]
self.assertEqual(feat.shape, (1, 1024, 8, 8))
# test with dynamic input shape
imgs1 = torch.randn(1, 3, 224, 224)
imgs2 = torch.randn(1, 3, 256, 256)
imgs3 = torch.randn(1, 3, 256, 309)
cfg = deepcopy(self.cfg)
cfg['pad_small_map'] = True
model = SwinTransformerV2(**cfg)
for imgs in [imgs1, imgs2, imgs3]:
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
feat = outs[-1]
expect_feat_shape = (math.ceil(imgs.shape[2] / 32),
math.ceil(imgs.shape[3] / 32))
self.assertEqual(feat.shape, (1, 1024, *expect_feat_shape))
def test_structure(self):
# test drop_path_rate decay
cfg = deepcopy(self.cfg)
cfg['drop_path_rate'] = 0.2
model = SwinTransformerV2(**cfg)
depths = model.arch_settings['depths']
blocks = chain(*[stage.blocks for stage in model.stages])
for i, block in enumerate(blocks):
expect_prob = 0.2 / (sum(depths) - 1) * i
self.assertAlmostEqual(block.ffn.dropout_layer.drop_prob,
expect_prob)
self.assertAlmostEqual(block.attn.drop.drop_prob, expect_prob)
# test Swin-Transformer V2 with norm_eval=True
cfg = deepcopy(self.cfg)
cfg['norm_eval'] = True
cfg['norm_cfg'] = dict(type='BN')
cfg['stage_cfgs'] = dict(block_cfgs=dict(norm_cfg=dict(type='BN')))
model = SwinTransformerV2(**cfg)
model.init_weights()
model.train()
self.assertTrue(check_norm_state(model.modules(), False))
# test Swin-Transformer V2 with first stage frozen.
cfg = deepcopy(self.cfg)
frozen_stages = 0
cfg['frozen_stages'] = frozen_stages
cfg['out_indices'] = (0, 1, 2, 3)
model = SwinTransformerV2(**cfg)
model.init_weights()
model.train()
# the patch_embed and first stage should not require grad.
self.assertFalse(model.patch_embed.training)
for param in model.patch_embed.parameters():
self.assertFalse(param.requires_grad)
for i in range(frozen_stages + 1):
stage = model.stages[i]
for param in stage.parameters():
self.assertFalse(param.requires_grad)
for param in model.norm0.parameters():
self.assertFalse(param.requires_grad)
# the second stage should require grad.
for i in range(frozen_stages + 1, 4):
stage = model.stages[i]
for param in stage.parameters():
self.assertTrue(param.requires_grad)
norm = getattr(model, f'norm{i}')
for param in norm.parameters():
self.assertTrue(param.requires_grad)
# Copyright (c) OpenMMLab. All rights reserved.
import math
import os
import tempfile
from copy import deepcopy
from unittest import TestCase
import torch
from mmengine.runner import load_checkpoint, save_checkpoint
from mmpretrain.models.backbones import T2T_ViT
from .utils import timm_resize_pos_embed
class TestT2TViT(TestCase):
def setUp(self):
self.cfg = dict(
img_size=224,
in_channels=3,
embed_dims=384,
t2t_cfg=dict(
token_dims=64,
use_performer=False,
),
num_layers=14,
drop_path_rate=0.1)
def test_structure(self):
# The performer hasn't been implemented
cfg = deepcopy(self.cfg)
cfg['t2t_cfg']['use_performer'] = True
with self.assertRaises(NotImplementedError):
T2T_ViT(**cfg)
# Test out_indices
cfg = deepcopy(self.cfg)
cfg['out_indices'] = {1: 1}
with self.assertRaisesRegex(AssertionError, "get <class 'dict'>"):
T2T_ViT(**cfg)
cfg['out_indices'] = [0, 15]
with self.assertRaisesRegex(AssertionError, 'Invalid out_indices 15'):
T2T_ViT(**cfg)
# Test model structure
cfg = deepcopy(self.cfg)
model = T2T_ViT(**cfg)
self.assertEqual(len(model.encoder), 14)
dpr_inc = 0.1 / (14 - 1)
dpr = 0
for layer in model.encoder:
self.assertEqual(layer.attn.embed_dims, 384)
# The default mlp_ratio is 3
self.assertEqual(layer.ffn.feedforward_channels, 384 * 3)
self.assertAlmostEqual(layer.attn.out_drop.drop_prob, dpr)
self.assertAlmostEqual(layer.ffn.dropout_layer.drop_prob, dpr)
dpr += dpr_inc
def test_init_weights(self):
# test weight init cfg
cfg = deepcopy(self.cfg)
cfg['init_cfg'] = [dict(type='TruncNormal', layer='Linear', std=.02)]
model = T2T_ViT(**cfg)
ori_weight = model.tokens_to_token.project.weight.clone().detach()
model.init_weights()
initialized_weight = model.tokens_to_token.project.weight
self.assertFalse(torch.allclose(ori_weight, initialized_weight))
# test load checkpoint
pretrain_pos_embed = model.pos_embed.clone().detach()
tmpdir = tempfile.gettempdir()
checkpoint = os.path.join(tmpdir, 'test.pth')
save_checkpoint(model.state_dict(), checkpoint)
cfg = deepcopy(self.cfg)
model = T2T_ViT(**cfg)
load_checkpoint(model, checkpoint, strict=True)
self.assertTrue(torch.allclose(model.pos_embed, pretrain_pos_embed))
# test load checkpoint with different img_size
cfg = deepcopy(self.cfg)
cfg['img_size'] = 384
model = T2T_ViT(**cfg)
load_checkpoint(model, checkpoint, strict=True)
resized_pos_embed = timm_resize_pos_embed(pretrain_pos_embed,
model.pos_embed)
self.assertTrue(torch.allclose(model.pos_embed, resized_pos_embed))
os.remove(checkpoint)
def test_forward(self):
imgs = torch.randn(1, 3, 224, 224)
# test with_cls_token=False
cfg = deepcopy(self.cfg)
cfg['with_cls_token'] = False
cfg['out_type'] = 'cls_token'
with self.assertRaisesRegex(ValueError, 'must be True'):
T2T_ViT(**cfg)
cfg = deepcopy(self.cfg)
cfg['with_cls_token'] = False
cfg['out_type'] = 'featmap'
model = T2T_ViT(**cfg)
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
patch_token = outs[-1]
self.assertEqual(patch_token.shape, (1, 384, 14, 14))
# test with output cls_token
cfg = deepcopy(self.cfg)
model = T2T_ViT(**cfg)
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
cls_token = outs[-1]
self.assertEqual(cls_token.shape, (1, 384))
# Test forward with multi out indices
cfg = deepcopy(self.cfg)
cfg['out_indices'] = [-3, -2, -1]
model = T2T_ViT(**cfg)
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 3)
for out in outs:
self.assertEqual(out.shape, (1, 384))
# Test forward with dynamic input size
imgs1 = torch.randn(1, 3, 224, 224)
imgs2 = torch.randn(1, 3, 256, 256)
imgs3 = torch.randn(1, 3, 256, 309)
cfg = deepcopy(self.cfg)
cfg['out_type'] = 'featmap'
model = T2T_ViT(**cfg)
for imgs in [imgs1, imgs2, imgs3]:
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
patch_token = outs[-1]
expect_feat_shape = (math.ceil(imgs.shape[2] / 16),
math.ceil(imgs.shape[3] / 16))
self.assertEqual(patch_token.shape, (1, 384, *expect_feat_shape))
# Copyright (c) OpenMMLab. All rights reserved.
import unittest
import pytest
import torch
from torch import nn
from torch.nn.modules.batchnorm import _BatchNorm
from mmpretrain.models.backbones import TIMMBackbone
def has_timm() -> bool:
try:
import timm # noqa: F401
return True
except ImportError:
return False
def check_norm_state(modules, train_state):
"""Check if norm layer is in correct train state."""
for mod in modules:
if isinstance(mod, _BatchNorm):
if mod.training != train_state:
return False
return True
@unittest.skipIf(not has_timm(), 'timm is not installed')
def test_timm_backbone():
"""Test timm backbones, features_only=False (default)."""
with pytest.raises(TypeError):
# TIMMBackbone has 1 required positional argument: 'model_name'
model = TIMMBackbone(pretrained=True)
with pytest.raises(TypeError):
# pretrained must be bool
model = TIMMBackbone(model_name='resnet18', pretrained='model.pth')
# Test resnet18 from timm
model = TIMMBackbone(model_name='resnet18')
model.init_weights()
model.train()
assert check_norm_state(model.modules(), True)
assert isinstance(model.timm_model.global_pool.pool, nn.Identity)
assert isinstance(model.timm_model.fc, nn.Identity)
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 1
assert feat[0].shape == torch.Size((1, 512, 7, 7))
# Test efficientnet_b1 with pretrained weights
model = TIMMBackbone(model_name='efficientnet_b1', pretrained=True)
model.init_weights()
model.train()
assert isinstance(model.timm_model.global_pool.pool, nn.Identity)
assert isinstance(model.timm_model.classifier, nn.Identity)
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 1
assert feat[0].shape == torch.Size((1, 1280, 7, 7))
# Test vit_tiny_patch16_224 with pretrained weights
model = TIMMBackbone(model_name='vit_tiny_patch16_224', pretrained=True)
model.init_weights()
model.train()
assert isinstance(model.timm_model.head, nn.Identity)
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 1
# Disable the test since TIMM's behavior changes between 0.5.4 and 0.5.5
# assert feat[0].shape == torch.Size((1, 197, 192))
@unittest.skipIf(not has_timm(), 'timm is not installed')
def test_timm_backbone_features_only():
"""Test timm backbones, features_only=True."""
# Test different norm_layer, can be: 'SyncBN', 'BN2d', 'GN', 'LN', 'IN'
# Test resnet18 from timm, norm_layer='BN2d'
model = TIMMBackbone(
model_name='resnet18',
features_only=True,
pretrained=False,
output_stride=32,
norm_layer='BN2d')
# Test resnet18 from timm, norm_layer='SyncBN'
model = TIMMBackbone(
model_name='resnet18',
features_only=True,
pretrained=False,
output_stride=32,
norm_layer='SyncBN')
# Test resnet18 from timm, output_stride=32
model = TIMMBackbone(
model_name='resnet18',
features_only=True,
pretrained=False,
output_stride=32)
model.init_weights()
model.train()
assert check_norm_state(model.modules(), True)
imgs = torch.randn(1, 3, 224, 224)
feats = model(imgs)
assert len(feats) == 5
assert feats[0].shape == torch.Size((1, 64, 112, 112))
assert feats[1].shape == torch.Size((1, 64, 56, 56))
assert feats[2].shape == torch.Size((1, 128, 28, 28))
assert feats[3].shape == torch.Size((1, 256, 14, 14))
assert feats[4].shape == torch.Size((1, 512, 7, 7))
# Test resnet18 from timm, output_stride=32, out_indices=(1, 2, 3)
model = TIMMBackbone(
model_name='resnet18',
features_only=True,
pretrained=False,
output_stride=32,
out_indices=(1, 2, 3))
imgs = torch.randn(1, 3, 224, 224)
feats = model(imgs)
assert len(feats) == 3
assert feats[0].shape == torch.Size((1, 64, 56, 56))
assert feats[1].shape == torch.Size((1, 128, 28, 28))
assert feats[2].shape == torch.Size((1, 256, 14, 14))
# Test resnet18 from timm, output_stride=16
model = TIMMBackbone(
model_name='resnet18',
features_only=True,
pretrained=False,
output_stride=16)
imgs = torch.randn(1, 3, 224, 224)
feats = model(imgs)
assert len(feats) == 5
assert feats[0].shape == torch.Size((1, 64, 112, 112))
assert feats[1].shape == torch.Size((1, 64, 56, 56))
assert feats[2].shape == torch.Size((1, 128, 28, 28))
assert feats[3].shape == torch.Size((1, 256, 14, 14))
assert feats[4].shape == torch.Size((1, 512, 14, 14))
# Test resnet18 from timm, output_stride=8
model = TIMMBackbone(
model_name='resnet18',
features_only=True,
pretrained=False,
output_stride=8)
imgs = torch.randn(1, 3, 224, 224)
feats = model(imgs)
assert len(feats) == 5
assert feats[0].shape == torch.Size((1, 64, 112, 112))
assert feats[1].shape == torch.Size((1, 64, 56, 56))
assert feats[2].shape == torch.Size((1, 128, 28, 28))
assert feats[3].shape == torch.Size((1, 256, 28, 28))
assert feats[4].shape == torch.Size((1, 512, 28, 28))
# Test efficientnet_b1 with pretrained weights
model = TIMMBackbone(
model_name='efficientnet_b1', features_only=True, pretrained=True)
imgs = torch.randn(1, 3, 64, 64)
feats = model(imgs)
assert len(feats) == 5
assert feats[0].shape == torch.Size((1, 16, 32, 32))
assert feats[1].shape == torch.Size((1, 24, 16, 16))
assert feats[2].shape == torch.Size((1, 40, 8, 8))
assert feats[3].shape == torch.Size((1, 112, 4, 4))
assert feats[4].shape == torch.Size((1, 320, 2, 2))
# Test resnetv2_50x1_bitm from timm, output_stride=8
model = TIMMBackbone(
model_name='resnetv2_50x1_bitm',
features_only=True,
pretrained=False,
output_stride=8)
imgs = torch.randn(1, 3, 8, 8)
feats = model(imgs)
assert len(feats) == 5
assert feats[0].shape == torch.Size((1, 64, 4, 4))
assert feats[1].shape == torch.Size((1, 256, 2, 2))
assert feats[2].shape == torch.Size((1, 512, 1, 1))
assert feats[3].shape == torch.Size((1, 1024, 1, 1))
assert feats[4].shape == torch.Size((1, 2048, 1, 1))
# Test resnetv2_50x3_bitm from timm, output_stride=8
model = TIMMBackbone(
model_name='resnetv2_50x3_bitm',
features_only=True,
pretrained=False,
output_stride=8)
imgs = torch.randn(1, 3, 8, 8)
feats = model(imgs)
assert len(feats) == 5
assert feats[0].shape == torch.Size((1, 192, 4, 4))
assert feats[1].shape == torch.Size((1, 768, 2, 2))
assert feats[2].shape == torch.Size((1, 1536, 1, 1))
assert feats[3].shape == torch.Size((1, 3072, 1, 1))
assert feats[4].shape == torch.Size((1, 6144, 1, 1))
# Test resnetv2_101x1_bitm from timm, output_stride=8
model = TIMMBackbone(
model_name='resnetv2_101x1_bitm',
features_only=True,
pretrained=False,
output_stride=8)
imgs = torch.randn(1, 3, 8, 8)
feats = model(imgs)
assert len(feats) == 5
assert feats[0].shape == torch.Size((1, 64, 4, 4))
assert feats[1].shape == torch.Size((1, 256, 2, 2))
assert feats[2].shape == torch.Size((1, 512, 1, 1))
assert feats[3].shape == torch.Size((1, 1024, 1, 1))
assert feats[4].shape == torch.Size((1, 2048, 1, 1))
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
import pytest
import torch
from mmpretrain.models.backbones import TinyViT
def test_assertion():
with pytest.raises(AssertionError):
TinyViT(arch='unknown')
with pytest.raises(AssertionError):
# MobileViT out_indices should be valid depth.
TinyViT(out_indices=-100)
def test_tinyvit():
# Test forward
model = TinyViT(arch='5m')
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 1
assert feat[0].shape == torch.Size([1, 320])
# Test forward with multiple outputs
model = TinyViT(arch='5m', out_indices=(0, 1, 2, 3))
feat = model(imgs)
assert len(feat) == 4
assert feat[0].shape == torch.Size([1, 128])
assert feat[1].shape == torch.Size([1, 160])
assert feat[2].shape == torch.Size([1, 320])
assert feat[3].shape == torch.Size([1, 320])
# Test with custom arch
model = TinyViT(
arch={
'depths': [2, 3, 4, 5],
'channels': [64, 128, 256, 448],
'num_heads': [4, 4, 4, 4]
},
out_indices=(0, 1, 2, 3))
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 4
assert feat[0].shape == torch.Size([1, 128])
assert feat[1].shape == torch.Size([1, 256])
assert feat[2].shape == torch.Size([1, 448])
assert feat[3].shape == torch.Size([1, 448])
# Test without gap before final norm
model = TinyViT(
arch='21m', out_indices=(0, 1, 2, 3), gap_before_final_norm=False)
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 4
assert feat[0].shape == torch.Size([1, 192, 28, 28])
assert feat[1].shape == torch.Size([1, 384, 14, 14])
assert feat[2].shape == torch.Size([1, 576, 7, 7])
assert feat[3].shape == torch.Size([1, 576, 7, 7])
# Test frozen_stages
model = TinyViT(arch='11m', out_indices=(0, 1, 2, 3), frozen_stages=2)
model.init_weights()
model.train()
for i in range(2):
assert not model.stages[i].training
for i in range(2, 4):
assert model.stages[i].training
# Copyright (c) OpenMMLab. All rights reserved.
import pytest
import torch
from torch.nn.modules.batchnorm import _BatchNorm
from mmpretrain.models.backbones import TNT
def check_norm_state(modules, train_state):
"""Check if norm layer is in correct train state."""
for mod in modules:
if isinstance(mod, _BatchNorm):
if mod.training != train_state:
return False
return True
def test_tnt_backbone():
with pytest.raises(TypeError):
# pretrained must be a string path
model = TNT()
model.init_weights(pretrained=0)
# Test tnt_base_patch16_224
model = TNT()
model.init_weights()
model.train()
assert check_norm_state(model.modules(), True)
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 1
assert feat[0].shape == torch.Size((1, 640))
# Test tnt with embed_dims=768
arch = {
'embed_dims_outer': 768,
'embed_dims_inner': 48,
'num_layers': 12,
'num_heads_outer': 6,
'num_heads_inner': 4
}
model = TNT(arch=arch)
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 1
assert feat[0].shape == torch.Size((1, 768))
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import pytest
import torch
import torch.nn as nn
from mmpretrain.models.backbones.twins import (PCPVT, SVT,
GlobalSubsampledAttention,
LocallyGroupedSelfAttention)
def test_LSA_module():
lsa = LocallyGroupedSelfAttention(embed_dims=32, window_size=3)
outs = lsa(torch.randn(1, 3136, 32), (56, 56))
assert outs.shape == torch.Size([1, 3136, 32])
def test_GSA_module():
gsa = GlobalSubsampledAttention(embed_dims=32, num_heads=8)
outs = gsa(torch.randn(1, 3136, 32), (56, 56))
assert outs.shape == torch.Size([1, 3136, 32])
def test_pcpvt():
# test init
path = 'PATH_THAT_DO_NOT_EXIST'
# init_cfg loads pretrain from an non-existent file
model = PCPVT('s', init_cfg=dict(type='Pretrained', checkpoint=path))
assert model.init_cfg == dict(type='Pretrained', checkpoint=path)
# Test loading a checkpoint from an non-existent file
with pytest.raises(OSError):
model.init_weights()
# init_cfg=123, whose type is unsupported
model = PCPVT('s', init_cfg=123)
with pytest.raises(TypeError):
model.init_weights()
H, W = (64, 64)
temp = torch.randn((1, 3, H, W))
# test output last feat
model = PCPVT('small')
model.init_weights()
outs = model(temp)
assert len(outs) == 1
assert outs[-1].shape == (1, 512, H // 32, W // 32)
# test with multi outputs
model = PCPVT('small', out_indices=(0, 1, 2, 3))
model.init_weights()
outs = model(temp)
assert len(outs) == 4
assert outs[0].shape == (1, 64, H // 4, W // 4)
assert outs[1].shape == (1, 128, H // 8, W // 8)
assert outs[2].shape == (1, 320, H // 16, W // 16)
assert outs[3].shape == (1, 512, H // 32, W // 32)
# test with arch of dict
arch = {
'embed_dims': [64, 128, 320, 512],
'depths': [3, 4, 18, 3],
'num_heads': [1, 2, 5, 8],
'patch_sizes': [4, 2, 2, 2],
'strides': [4, 2, 2, 2],
'mlp_ratios': [8, 8, 4, 4],
'sr_ratios': [8, 4, 2, 1]
}
pcpvt_arch = copy.deepcopy(arch)
model = PCPVT(pcpvt_arch, out_indices=(0, 1, 2, 3))
model.init_weights()
outs = model(temp)
assert len(outs) == 4
assert outs[0].shape == (1, 64, H // 4, W // 4)
assert outs[1].shape == (1, 128, H // 8, W // 8)
assert outs[2].shape == (1, 320, H // 16, W // 16)
assert outs[3].shape == (1, 512, H // 32, W // 32)
# assert length of arch value not equal
pcpvt_arch = copy.deepcopy(arch)
pcpvt_arch['sr_ratios'] = [8, 4, 2]
with pytest.raises(AssertionError):
model = PCPVT(pcpvt_arch, out_indices=(0, 1, 2, 3))
# assert lack arch essential_keys
pcpvt_arch = copy.deepcopy(arch)
del pcpvt_arch['sr_ratios']
with pytest.raises(AssertionError):
model = PCPVT(pcpvt_arch, out_indices=(0, 1, 2, 3))
# assert arch value not list
pcpvt_arch = copy.deepcopy(arch)
pcpvt_arch['sr_ratios'] = 1
with pytest.raises(AssertionError):
model = PCPVT(pcpvt_arch, out_indices=(0, 1, 2, 3))
pcpvt_arch = copy.deepcopy(arch)
pcpvt_arch['sr_ratios'] = '1, 2, 3, 4'
with pytest.raises(AssertionError):
model = PCPVT(pcpvt_arch, out_indices=(0, 1, 2, 3))
# test norm_after_stage is bool True
model = PCPVT('small', norm_after_stage=True, norm_cfg=dict(type='LN'))
for i in range(model.num_stage):
assert hasattr(model, f'norm_after_stage{i}')
assert isinstance(getattr(model, f'norm_after_stage{i}'), nn.LayerNorm)
# test norm_after_stage is bool Flase
model = PCPVT('small', norm_after_stage=False)
for i in range(model.num_stage):
assert hasattr(model, f'norm_after_stage{i}')
assert isinstance(getattr(model, f'norm_after_stage{i}'), nn.Identity)
# test norm_after_stage is bool list
norm_after_stage = [False, True, False, True]
model = PCPVT('small', norm_after_stage=norm_after_stage)
assert len(norm_after_stage) == model.num_stage
for i in range(model.num_stage):
assert hasattr(model, f'norm_after_stage{i}')
norm_layer = getattr(model, f'norm_after_stage{i}')
if norm_after_stage[i]:
assert isinstance(norm_layer, nn.LayerNorm)
else:
assert isinstance(norm_layer, nn.Identity)
# test norm_after_stage is not bool list
norm_after_stage = [False, 'True', False, True]
with pytest.raises(AssertionError):
model = PCPVT('small', norm_after_stage=norm_after_stage)
def test_svt():
# test init
path = 'PATH_THAT_DO_NOT_EXIST'
# init_cfg loads pretrain from an non-existent file
model = SVT('s', init_cfg=dict(type='Pretrained', checkpoint=path))
assert model.init_cfg == dict(type='Pretrained', checkpoint=path)
# Test loading a checkpoint from an non-existent file
with pytest.raises(OSError):
model.init_weights()
# init_cfg=123, whose type is unsupported
model = SVT('s', init_cfg=123)
with pytest.raises(TypeError):
model.init_weights()
# Test feature map output
H, W = (64, 64)
temp = torch.randn((1, 3, H, W))
model = SVT('s')
model.init_weights()
outs = model(temp)
assert len(outs) == 1
assert outs[-1].shape == (1, 512, H // 32, W // 32)
# test with multi outputs
model = SVT('small', out_indices=(0, 1, 2, 3))
model.init_weights()
outs = model(temp)
assert len(outs) == 4
assert outs[0].shape == (1, 64, H // 4, W // 4)
assert outs[1].shape == (1, 128, H // 8, W // 8)
assert outs[2].shape == (1, 256, H // 16, W // 16)
assert outs[3].shape == (1, 512, H // 32, W // 32)
# test with arch of dict
arch = {
'embed_dims': [96, 192, 384, 768],
'depths': [2, 2, 18, 2],
'num_heads': [3, 6, 12, 24],
'patch_sizes': [4, 2, 2, 2],
'strides': [4, 2, 2, 2],
'mlp_ratios': [4, 4, 4, 4],
'sr_ratios': [8, 4, 2, 1],
'window_sizes': [7, 7, 7, 7]
}
model = SVT(arch, out_indices=(0, 1, 2, 3))
model.init_weights()
outs = model(temp)
assert len(outs) == 4
assert outs[0].shape == (1, 96, H // 4, W // 4)
assert outs[1].shape == (1, 192, H // 8, W // 8)
assert outs[2].shape == (1, 384, H // 16, W // 16)
assert outs[3].shape == (1, 768, H // 32, W // 32)
# assert length of arch value not equal
svt_arch = copy.deepcopy(arch)
svt_arch['sr_ratios'] = [8, 4, 2]
with pytest.raises(AssertionError):
model = SVT(svt_arch, out_indices=(0, 1, 2, 3))
# assert lack arch essential_keys
svt_arch = copy.deepcopy(arch)
del svt_arch['window_sizes']
with pytest.raises(AssertionError):
model = SVT(svt_arch, out_indices=(0, 1, 2, 3))
# assert arch value not list
svt_arch = copy.deepcopy(arch)
svt_arch['sr_ratios'] = 1
with pytest.raises(AssertionError):
model = SVT(svt_arch, out_indices=(0, 1, 2, 3))
svt_arch = copy.deepcopy(arch)
svt_arch['sr_ratios'] = '1, 2, 3, 4'
with pytest.raises(AssertionError):
model = SVT(svt_arch, out_indices=(0, 1, 2, 3))
# test norm_after_stage is bool True
model = SVT('small', norm_after_stage=True, norm_cfg=dict(type='LN'))
for i in range(model.num_stage):
assert hasattr(model, f'norm_after_stage{i}')
assert isinstance(getattr(model, f'norm_after_stage{i}'), nn.LayerNorm)
# test norm_after_stage is bool Flase
model = SVT('small', norm_after_stage=False)
for i in range(model.num_stage):
assert hasattr(model, f'norm_after_stage{i}')
assert isinstance(getattr(model, f'norm_after_stage{i}'), nn.Identity)
# test norm_after_stage is bool list
norm_after_stage = [False, True, False, True]
model = SVT('small', norm_after_stage=norm_after_stage)
assert len(norm_after_stage) == model.num_stage
for i in range(model.num_stage):
assert hasattr(model, f'norm_after_stage{i}')
norm_layer = getattr(model, f'norm_after_stage{i}')
if norm_after_stage[i]:
assert isinstance(norm_layer, nn.LayerNorm)
else:
assert isinstance(norm_layer, nn.Identity)
# test norm_after_stage is not bool list
norm_after_stage = [False, 'True', False, True]
with pytest.raises(AssertionError):
model = SVT('small', norm_after_stage=norm_after_stage)
# Copyright (c) OpenMMLab. All rights reserved.
import math
from copy import deepcopy
from itertools import chain
from unittest import TestCase
import torch
from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm
from torch import nn
from mmpretrain.models.backbones import VAN
def check_norm_state(modules, train_state):
"""Check if norm layer is in correct train state."""
for mod in modules:
if isinstance(mod, _BatchNorm):
if mod.training != train_state:
return False
return True
class TestVAN(TestCase):
def setUp(self):
self.cfg = dict(arch='t', drop_path_rate=0.1)
def test_arch(self):
# Test invalid default arch
with self.assertRaisesRegex(AssertionError, 'not in default archs'):
cfg = deepcopy(self.cfg)
cfg['arch'] = 'unknown'
VAN(**cfg)
# Test invalid custom arch
with self.assertRaisesRegex(AssertionError, 'Custom arch needs'):
cfg = deepcopy(self.cfg)
cfg['arch'] = {
'embed_dims': [32, 64, 160, 256],
'ffn_ratios': [8, 8, 4, 4],
}
VAN(**cfg)
# Test custom arch
cfg = deepcopy(self.cfg)
embed_dims = [32, 64, 160, 256]
depths = [3, 3, 5, 2]
ffn_ratios = [8, 8, 4, 4]
cfg['arch'] = {
'embed_dims': embed_dims,
'depths': depths,
'ffn_ratios': ffn_ratios
}
model = VAN(**cfg)
for i in range(len(depths)):
stage = getattr(model, f'blocks{i + 1}')
self.assertEqual(stage[-1].out_channels, embed_dims[i])
self.assertEqual(len(stage), depths[i])
def test_init_weights(self):
# test weight init cfg
cfg = deepcopy(self.cfg)
cfg['init_cfg'] = [
dict(
type='Kaiming',
layer='Conv2d',
mode='fan_in',
nonlinearity='linear')
]
model = VAN(**cfg)
ori_weight = model.patch_embed1.projection.weight.clone().detach()
model.init_weights()
initialized_weight = model.patch_embed1.projection.weight
self.assertFalse(torch.allclose(ori_weight, initialized_weight))
def test_forward(self):
imgs = torch.randn(3, 3, 224, 224)
cfg = deepcopy(self.cfg)
model = VAN(**cfg)
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
feat = outs[-1]
self.assertEqual(feat.shape, (3, 256, 7, 7))
# test with patch_sizes
cfg = deepcopy(self.cfg)
cfg['patch_sizes'] = [7, 5, 5, 5]
model = VAN(**cfg)
outs = model(torch.randn(3, 3, 224, 224))
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
feat = outs[-1]
self.assertEqual(feat.shape, (3, 256, 3, 3))
# test multiple output indices
cfg = deepcopy(self.cfg)
cfg['out_indices'] = (0, 1, 2, 3)
model = VAN(**cfg)
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 4)
for emb_size, stride, out in zip([32, 64, 160, 256], [1, 2, 4, 8],
outs):
self.assertEqual(out.shape,
(3, emb_size, 56 // stride, 56 // stride))
# test with dynamic input shape
imgs1 = torch.randn(3, 3, 224, 224)
imgs2 = torch.randn(3, 3, 256, 256)
imgs3 = torch.randn(3, 3, 256, 309)
cfg = deepcopy(self.cfg)
model = VAN(**cfg)
for imgs in [imgs1, imgs2, imgs3]:
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
feat = outs[-1]
expect_feat_shape = (math.ceil(imgs.shape[2] / 32),
math.ceil(imgs.shape[3] / 32))
self.assertEqual(feat.shape, (3, 256, *expect_feat_shape))
def test_structure(self):
# test drop_path_rate decay
cfg = deepcopy(self.cfg)
cfg['drop_path_rate'] = 0.2
model = VAN(**cfg)
depths = model.arch_settings['depths']
stages = [model.blocks1, model.blocks2, model.blocks3, model.blocks4]
blocks = chain(*[stage for stage in stages])
total_depth = sum(depths)
dpr = [
x.item()
for x in torch.linspace(0, cfg['drop_path_rate'], total_depth)
]
for i, (block, expect_prob) in enumerate(zip(blocks, dpr)):
if expect_prob == 0:
assert isinstance(block.drop_path, nn.Identity)
else:
self.assertAlmostEqual(block.drop_path.drop_prob, expect_prob)
# test VAN with norm_eval=True
cfg = deepcopy(self.cfg)
cfg['norm_eval'] = True
cfg['norm_cfg'] = dict(type='BN')
model = VAN(**cfg)
model.init_weights()
model.train()
self.assertTrue(check_norm_state(model.modules(), False))
# test VAN with first stage frozen.
cfg = deepcopy(self.cfg)
frozen_stages = 0
cfg['frozen_stages'] = frozen_stages
cfg['out_indices'] = (0, 1, 2, 3)
model = VAN(**cfg)
model.init_weights()
model.train()
# the patch_embed and first stage should not require grad.
self.assertFalse(model.patch_embed1.training)
for param in model.patch_embed1.parameters():
self.assertFalse(param.requires_grad)
for i in range(frozen_stages + 1):
patch = getattr(model, f'patch_embed{i+1}')
for param in patch.parameters():
self.assertFalse(param.requires_grad)
blocks = getattr(model, f'blocks{i + 1}')
for param in blocks.parameters():
self.assertFalse(param.requires_grad)
norm = getattr(model, f'norm{i + 1}')
for param in norm.parameters():
self.assertFalse(param.requires_grad)
# the second stage should require grad.
for i in range(frozen_stages + 1, 4):
patch = getattr(model, f'patch_embed{i + 1}')
for param in patch.parameters():
self.assertTrue(param.requires_grad)
blocks = getattr(model, f'blocks{i+1}')
for param in blocks.parameters():
self.assertTrue(param.requires_grad)
norm = getattr(model, f'norm{i + 1}')
for param in norm.parameters():
self.assertTrue(param.requires_grad)
# Copyright (c) OpenMMLab. All rights reserved.
import pytest
import torch
from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm
from mmpretrain.models.backbones import VGG
def check_norm_state(modules, train_state):
"""Check if norm layer is in correct train state."""
for mod in modules:
if isinstance(mod, _BatchNorm):
if mod.training != train_state:
return False
return True
def test_vgg():
"""Test VGG backbone."""
with pytest.raises(KeyError):
# VGG depth should be in [11, 13, 16, 19]
VGG(18)
with pytest.raises(AssertionError):
# In VGG: 1 <= num_stages <= 5
VGG(11, num_stages=0)
with pytest.raises(AssertionError):
# In VGG: 1 <= num_stages <= 5
VGG(11, num_stages=6)
with pytest.raises(AssertionError):
# len(dilations) == num_stages
VGG(11, dilations=(1, 1), num_stages=3)
with pytest.raises(TypeError):
# pretrained must be a string path
model = VGG(11)
model.init_weights(pretrained=0)
# Test VGG11 norm_eval=True
model = VGG(11, norm_eval=True)
model.init_weights()
model.train()
assert check_norm_state(model.modules(), False)
# Test VGG11 forward without classifiers
model = VGG(11, out_indices=(0, 1, 2, 3, 4))
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 5
assert feat[0].shape == (1, 64, 112, 112)
assert feat[1].shape == (1, 128, 56, 56)
assert feat[2].shape == (1, 256, 28, 28)
assert feat[3].shape == (1, 512, 14, 14)
assert feat[4].shape == (1, 512, 7, 7)
# Test VGG11 forward with classifiers
model = VGG(11, num_classes=10, out_indices=(0, 1, 2, 3, 4, 5))
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 6
assert feat[0].shape == (1, 64, 112, 112)
assert feat[1].shape == (1, 128, 56, 56)
assert feat[2].shape == (1, 256, 28, 28)
assert feat[3].shape == (1, 512, 14, 14)
assert feat[4].shape == (1, 512, 7, 7)
assert feat[5].shape == (1, 10)
# Test VGG11BN forward
model = VGG(11, norm_cfg=dict(type='BN'), out_indices=(0, 1, 2, 3, 4))
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 5
assert feat[0].shape == (1, 64, 112, 112)
assert feat[1].shape == (1, 128, 56, 56)
assert feat[2].shape == (1, 256, 28, 28)
assert feat[3].shape == (1, 512, 14, 14)
assert feat[4].shape == (1, 512, 7, 7)
# Test VGG11BN forward with classifiers
model = VGG(
11,
num_classes=10,
norm_cfg=dict(type='BN'),
out_indices=(0, 1, 2, 3, 4, 5))
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 6
assert feat[0].shape == (1, 64, 112, 112)
assert feat[1].shape == (1, 128, 56, 56)
assert feat[2].shape == (1, 256, 28, 28)
assert feat[3].shape == (1, 512, 14, 14)
assert feat[4].shape == (1, 512, 7, 7)
assert feat[5].shape == (1, 10)
# Test VGG13 with layers 1, 2, 3 out forward
model = VGG(13, out_indices=(0, 1, 2))
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 3
assert feat[0].shape == (1, 64, 112, 112)
assert feat[1].shape == (1, 128, 56, 56)
assert feat[2].shape == (1, 256, 28, 28)
# Test VGG16 with top feature maps out forward
model = VGG(16)
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 1
assert feat[0].shape == (1, 512, 7, 7)
# Test VGG19 with classification score out forward
model = VGG(19, num_classes=10)
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 1
assert feat[0].shape == (1, 10)
# Copyright (c) OpenMMLab. All rights reserved.
import math
import os
import tempfile
from copy import deepcopy
from unittest import TestCase
import torch
from mmengine.runner import load_checkpoint, save_checkpoint
from mmpretrain.models.backbones import VisionTransformer
from .utils import timm_resize_pos_embed
class TestVisionTransformer(TestCase):
def setUp(self):
self.cfg = dict(
arch='b', img_size=224, patch_size=16, drop_path_rate=0.1)
def test_structure(self):
# Test invalid default arch
with self.assertRaisesRegex(AssertionError, 'not in default archs'):
cfg = deepcopy(self.cfg)
cfg['arch'] = 'unknown'
VisionTransformer(**cfg)
# Test invalid custom arch
with self.assertRaisesRegex(AssertionError, 'Custom arch needs'):
cfg = deepcopy(self.cfg)
cfg['arch'] = {
'num_layers': 24,
'num_heads': 16,
'feedforward_channels': 4096
}
VisionTransformer(**cfg)
# Test custom arch
cfg = deepcopy(self.cfg)
cfg['arch'] = {
'embed_dims': 128,
'num_layers': 24,
'num_heads': 16,
'feedforward_channels': 1024
}
model = VisionTransformer(**cfg)
self.assertEqual(model.embed_dims, 128)
self.assertEqual(model.num_layers, 24)
for layer in model.layers:
self.assertEqual(layer.attn.num_heads, 16)
self.assertEqual(layer.ffn.feedforward_channels, 1024)
# Test out_indices
cfg = deepcopy(self.cfg)
cfg['out_indices'] = {1: 1}
with self.assertRaisesRegex(AssertionError, "get <class 'dict'>"):
VisionTransformer(**cfg)
cfg['out_indices'] = [0, 13]
with self.assertRaisesRegex(AssertionError, 'Invalid out_indices 13'):
VisionTransformer(**cfg)
# Test model structure
cfg = deepcopy(self.cfg)
model = VisionTransformer(**cfg)
self.assertEqual(len(model.layers), 12)
dpr_inc = 0.1 / (12 - 1)
dpr = 0
for layer in model.layers:
self.assertEqual(layer.attn.embed_dims, 768)
self.assertEqual(layer.attn.num_heads, 12)
self.assertEqual(layer.ffn.feedforward_channels, 3072)
self.assertAlmostEqual(layer.attn.out_drop.drop_prob, dpr)
self.assertAlmostEqual(layer.ffn.dropout_layer.drop_prob, dpr)
dpr += dpr_inc
# Test model structure: prenorm
cfg = deepcopy(self.cfg)
cfg['pre_norm'] = True
model = VisionTransformer(**cfg)
self.assertNotEqual(model.pre_norm.__class__, torch.nn.Identity)
def test_init_weights(self):
# test weight init cfg
cfg = deepcopy(self.cfg)
cfg['init_cfg'] = [
dict(
type='Kaiming',
layer='Conv2d',
mode='fan_in',
nonlinearity='linear')
]
model = VisionTransformer(**cfg)
ori_weight = model.patch_embed.projection.weight.clone().detach()
# The pos_embed is all zero before initialize
self.assertTrue(torch.allclose(model.pos_embed, torch.tensor(0.)))
model.init_weights()
initialized_weight = model.patch_embed.projection.weight
self.assertFalse(torch.allclose(ori_weight, initialized_weight))
self.assertFalse(torch.allclose(model.pos_embed, torch.tensor(0.)))
# test load checkpoint
pretrain_pos_embed = model.pos_embed.clone().detach()
tmpdir = tempfile.gettempdir()
checkpoint = os.path.join(tmpdir, 'test.pth')
save_checkpoint(model.state_dict(), checkpoint)
cfg = deepcopy(self.cfg)
model = VisionTransformer(**cfg)
load_checkpoint(model, checkpoint, strict=True)
self.assertTrue(torch.allclose(model.pos_embed, pretrain_pos_embed))
# test load checkpoint with different img_size
cfg = deepcopy(self.cfg)
cfg['img_size'] = 384
model = VisionTransformer(**cfg)
load_checkpoint(model, checkpoint, strict=True)
resized_pos_embed = timm_resize_pos_embed(pretrain_pos_embed,
model.pos_embed)
self.assertTrue(torch.allclose(model.pos_embed, resized_pos_embed))
os.remove(checkpoint)
def test_forward(self):
imgs = torch.randn(1, 3, 224, 224)
# test with_cls_token=False
cfg = deepcopy(self.cfg)
cfg['with_cls_token'] = False
cfg['out_type'] = 'cls_token'
with self.assertRaisesRegex(ValueError, 'must be True'):
VisionTransformer(**cfg)
cfg = deepcopy(self.cfg)
cfg['with_cls_token'] = False
cfg['out_type'] = 'featmap'
model = VisionTransformer(**cfg)
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
patch_token = outs[-1]
self.assertEqual(patch_token.shape, (1, 768, 14, 14))
# test with output cls_token
cfg = deepcopy(self.cfg)
model = VisionTransformer(**cfg)
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
cls_token = outs[-1]
self.assertEqual(cls_token.shape, (1, 768))
# Test forward with multi out indices
cfg = deepcopy(self.cfg)
cfg['out_indices'] = [-3, -2, -1]
model = VisionTransformer(**cfg)
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 3)
for out in outs:
self.assertEqual(out.shape, (1, 768))
# Test forward with dynamic input size
imgs1 = torch.randn(1, 3, 224, 224)
imgs2 = torch.randn(1, 3, 256, 256)
imgs3 = torch.randn(1, 3, 256, 309)
cfg = deepcopy(self.cfg)
cfg['out_type'] = 'featmap'
model = VisionTransformer(**cfg)
for imgs in [imgs1, imgs2, imgs3]:
outs = model(imgs)
self.assertIsInstance(outs, tuple)
self.assertEqual(len(outs), 1)
patch_token = outs[-1]
expect_feat_shape = (math.ceil(imgs.shape[2] / 16),
math.ceil(imgs.shape[3] / 16))
self.assertEqual(patch_token.shape, (1, 768, *expect_feat_shape))
# Copyright (c) OpenMMLab. All rights reserved.
# The basic forward/backward tests are in ../test_models.py
import torch
from mmpretrain.apis import get_model
def test_out_type():
inputs = torch.rand(1, 3, 224, 224)
model = get_model(
'xcit-nano-12-p16_3rdparty_in1k',
backbone=dict(out_type='raw'),
neck=None,
head=None)
outputs = model(inputs)[0]
assert outputs.shape == (1, 197, 128)
model = get_model(
'xcit-nano-12-p16_3rdparty_in1k',
backbone=dict(out_type='featmap'),
neck=None,
head=None)
outputs = model(inputs)[0]
assert outputs.shape == (1, 128, 14, 14)
model = get_model(
'xcit-nano-12-p16_3rdparty_in1k',
backbone=dict(out_type='cls_token'),
neck=None,
head=None)
outputs = model(inputs)[0]
assert outputs.shape == (1, 128)
model = get_model(
'xcit-nano-12-p16_3rdparty_in1k',
backbone=dict(out_type='avg_featmap'),
neck=None,
head=None)
outputs = model(inputs)[0]
assert outputs.shape == (1, 128)
# Copyright (c) OpenMMLab. All rights reserved.
import math
import torch
import torch.nn.functional as F
def timm_resize_pos_embed(posemb, posemb_new, num_tokens=1, gs_new=()):
"""Timm version pos embed resize function.
copied from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
""" # noqa:E501
ntok_new = posemb_new.shape[1]
if num_tokens:
posemb_tok, posemb_grid = posemb[:, :num_tokens], posemb[0,
num_tokens:]
ntok_new -= num_tokens
else:
posemb_tok, posemb_grid = posemb[:, :0], posemb[0]
gs_old = int(math.sqrt(len(posemb_grid)))
if not len(gs_new): # backwards compatibility
gs_new = [int(math.sqrt(ntok_new))] * 2
assert len(gs_new) >= 2
posemb_grid = posemb_grid.reshape(1, gs_old, gs_old,
-1).permute(0, 3, 1, 2)
posemb_grid = F.interpolate(
posemb_grid, size=gs_new, mode='bicubic', align_corners=False)
posemb_grid = posemb_grid.permute(0, 2, 3,
1).reshape(1, gs_new[0] * gs_new[1], -1)
posemb = torch.cat([posemb_tok, posemb_grid], dim=1)
return posemb
# Copyright (c) OpenMMLab. All rights reserved.
import unittest
from unittest import TestCase
from unittest.mock import MagicMock
import torch
import torch.nn as nn
from mmengine import ConfigDict
from mmpretrain.models import ImageClassifier
from mmpretrain.registry import MODELS
from mmpretrain.structures import DataSample
def has_timm() -> bool:
try:
import timm # noqa: F401
return True
except ImportError:
return False
def has_huggingface() -> bool:
try:
import transformers # noqa: F401
return True
except ImportError:
return False
class TestImageClassifier(TestCase):
DEFAULT_ARGS = dict(
type='ImageClassifier',
backbone=dict(type='ResNet', depth=18),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=10,
in_channels=512,
loss=dict(type='CrossEntropyLoss')))
def test_initialize(self):
model = MODELS.build(self.DEFAULT_ARGS)
self.assertTrue(model.with_neck)
self.assertTrue(model.with_head)
cfg = {**self.DEFAULT_ARGS, 'pretrained': 'checkpoint'}
model = MODELS.build(cfg)
self.assertDictEqual(model.init_cfg,
dict(type='Pretrained', checkpoint='checkpoint'))
cfg = ConfigDict(self.DEFAULT_ARGS)
cfg.pop('neck')
model = MODELS.build(cfg)
self.assertFalse(model.with_neck)
cfg = ConfigDict(self.DEFAULT_ARGS)
cfg.pop('head')
model = MODELS.build(cfg)
self.assertFalse(model.with_head)
# test set batch augmentation from train_cfg
cfg = {
**self.DEFAULT_ARGS, 'train_cfg':
dict(augments=dict(type='Mixup', alpha=1.))
}
model: ImageClassifier = MODELS.build(cfg)
self.assertIsNotNone(model.data_preprocessor.batch_augments)
cfg = {**self.DEFAULT_ARGS, 'train_cfg': dict()}
model: ImageClassifier = MODELS.build(cfg)
self.assertIsNone(model.data_preprocessor.batch_augments)
def test_extract_feat(self):
inputs = torch.rand(1, 3, 224, 224)
cfg = ConfigDict(self.DEFAULT_ARGS)
cfg.backbone.out_indices = (0, 1, 2, 3)
model: ImageClassifier = MODELS.build(cfg)
# test backbone output
feats = model.extract_feat(inputs, stage='backbone')
self.assertEqual(len(feats), 4)
self.assertEqual(feats[0].shape, (1, 64, 56, 56))
self.assertEqual(feats[1].shape, (1, 128, 28, 28))
self.assertEqual(feats[2].shape, (1, 256, 14, 14))
self.assertEqual(feats[3].shape, (1, 512, 7, 7))
# test neck output
feats = model.extract_feat(inputs, stage='neck')
self.assertEqual(len(feats), 4)
self.assertEqual(feats[0].shape, (1, 64))
self.assertEqual(feats[1].shape, (1, 128))
self.assertEqual(feats[2].shape, (1, 256))
self.assertEqual(feats[3].shape, (1, 512))
# test pre_logits output
feats = model.extract_feat(inputs, stage='pre_logits')
self.assertEqual(feats.shape, (1, 512))
# TODO: test transformer style feature extraction
# test extract_feats
multi_feats = model.extract_feats([inputs, inputs], stage='backbone')
self.assertEqual(len(multi_feats), 2)
for feats in multi_feats:
self.assertEqual(len(feats), 4)
self.assertEqual(feats[0].shape, (1, 64, 56, 56))
self.assertEqual(feats[1].shape, (1, 128, 28, 28))
self.assertEqual(feats[2].shape, (1, 256, 14, 14))
self.assertEqual(feats[3].shape, (1, 512, 7, 7))
# Without neck, return backbone
cfg = ConfigDict(self.DEFAULT_ARGS)
cfg.backbone.out_indices = (0, 1, 2, 3)
cfg.pop('neck')
model: ImageClassifier = MODELS.build(cfg)
feats = model.extract_feat(inputs, stage='neck')
self.assertEqual(len(feats), 4)
self.assertEqual(feats[0].shape, (1, 64, 56, 56))
self.assertEqual(feats[1].shape, (1, 128, 28, 28))
self.assertEqual(feats[2].shape, (1, 256, 14, 14))
self.assertEqual(feats[3].shape, (1, 512, 7, 7))
# Without head, raise error
cfg = ConfigDict(self.DEFAULT_ARGS)
cfg.backbone.out_indices = (0, 1, 2, 3)
cfg.pop('head')
model: ImageClassifier = MODELS.build(cfg)
with self.assertRaisesRegex(AssertionError, 'No head or the head'):
model.extract_feat(inputs, stage='pre_logits')
with self.assertRaisesRegex(AssertionError, 'use `extract_feat`'):
model.extract_feats(inputs)
def test_loss(self):
inputs = torch.rand(1, 3, 224, 224)
data_samples = [DataSample().set_gt_label(1)]
model: ImageClassifier = MODELS.build(self.DEFAULT_ARGS)
losses = model.loss(inputs, data_samples)
self.assertGreater(losses['loss'].item(), 0)
def test_predict(self):
inputs = torch.rand(1, 3, 224, 224)
data_samples = [DataSample().set_gt_label(1)]
model: ImageClassifier = MODELS.build(self.DEFAULT_ARGS)
predictions = model.predict(inputs)
self.assertEqual(predictions[0].pred_score.shape, (10, ))
predictions = model.predict(inputs, data_samples)
self.assertEqual(predictions[0].pred_score.shape, (10, ))
self.assertEqual(data_samples[0].pred_score.shape, (10, ))
torch.testing.assert_allclose(data_samples[0].pred_score,
predictions[0].pred_score)
def test_forward(self):
inputs = torch.rand(1, 3, 224, 224)
data_samples = [DataSample().set_gt_label(1)]
model: ImageClassifier = MODELS.build(self.DEFAULT_ARGS)
# test pure forward
outs = model(inputs)
self.assertIsInstance(outs, torch.Tensor)
# test forward train
losses = model(inputs, data_samples, mode='loss')
self.assertGreater(losses['loss'].item(), 0)
# test forward test
predictions = model(inputs, mode='predict')
self.assertEqual(predictions[0].pred_score.shape, (10, ))
predictions = model(inputs, data_samples, mode='predict')
self.assertEqual(predictions[0].pred_score.shape, (10, ))
self.assertEqual(data_samples[0].pred_score.shape, (10, ))
torch.testing.assert_allclose(data_samples[0].pred_score,
predictions[0].pred_score)
# test forward with invalid mode
with self.assertRaisesRegex(RuntimeError, 'Invalid mode "unknown"'):
model(inputs, mode='unknown')
def test_train_step(self):
cfg = {
**self.DEFAULT_ARGS, 'data_preprocessor':
dict(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5])
}
model: ImageClassifier = MODELS.build(cfg)
data = {
'inputs': torch.randint(0, 256, (1, 3, 224, 224)),
'data_samples': [DataSample().set_gt_label(1)]
}
optim_wrapper = MagicMock()
log_vars = model.train_step(data, optim_wrapper)
self.assertIn('loss', log_vars)
optim_wrapper.update_params.assert_called_once()
def test_val_step(self):
cfg = {
**self.DEFAULT_ARGS, 'data_preprocessor':
dict(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5])
}
model: ImageClassifier = MODELS.build(cfg)
data = {
'inputs': torch.randint(0, 256, (1, 3, 224, 224)),
'data_samples': [DataSample().set_gt_label(1)]
}
predictions = model.val_step(data)
self.assertEqual(predictions[0].pred_score.shape, (10, ))
def test_test_step(self):
cfg = {
**self.DEFAULT_ARGS, 'data_preprocessor':
dict(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5])
}
model: ImageClassifier = MODELS.build(cfg)
data = {
'inputs': torch.randint(0, 256, (1, 3, 224, 224)),
'data_samples': [DataSample().set_gt_label(1)]
}
predictions = model.test_step(data)
self.assertEqual(predictions[0].pred_score.shape, (10, ))
@unittest.skipIf(not has_timm(), 'timm is not installed.')
class TestTimmClassifier(TestCase):
DEFAULT_ARGS = dict(
type='TimmClassifier',
model_name='resnet18',
loss=dict(type='CrossEntropyLoss'),
)
def test_initialize(self):
model = MODELS.build(self.DEFAULT_ARGS)
assert isinstance(model.model, nn.Module)
# test set batch augmentation from train_cfg
cfg = {
**self.DEFAULT_ARGS, 'train_cfg':
dict(augments=dict(type='Mixup', alpha=1.))
}
model: ImageClassifier = MODELS.build(cfg)
self.assertIsNotNone(model.data_preprocessor.batch_augments)
cfg = {**self.DEFAULT_ARGS, 'train_cfg': dict()}
model: ImageClassifier = MODELS.build(cfg)
self.assertIsNone(model.data_preprocessor.batch_augments)
def test_loss(self):
inputs = torch.rand(1, 3, 224, 224)
data_samples = [DataSample().set_gt_label(1)]
model: ImageClassifier = MODELS.build(self.DEFAULT_ARGS)
losses = model.loss(inputs, data_samples)
self.assertGreater(losses['loss'].item(), 0)
def test_predict(self):
inputs = torch.rand(1, 3, 224, 224)
data_samples = [DataSample().set_gt_label(1)]
model: ImageClassifier = MODELS.build(self.DEFAULT_ARGS)
predictions = model.predict(inputs)
self.assertEqual(predictions[0].pred_score.shape, (1000, ))
predictions = model.predict(inputs, data_samples)
self.assertEqual(predictions[0].pred_score.shape, (1000, ))
self.assertEqual(data_samples[0].pred_score.shape, (1000, ))
torch.testing.assert_allclose(data_samples[0].pred_score,
predictions[0].pred_score)
def test_forward(self):
inputs = torch.rand(1, 3, 224, 224)
data_samples = [DataSample().set_gt_label(1)]
model: ImageClassifier = MODELS.build(self.DEFAULT_ARGS)
# test pure forward
outs = model(inputs)
self.assertIsInstance(outs, torch.Tensor)
# test forward train
losses = model(inputs, data_samples, mode='loss')
self.assertGreater(losses['loss'].item(), 0)
# test forward test
predictions = model(inputs, mode='predict')
self.assertEqual(predictions[0].pred_score.shape, (1000, ))
predictions = model(inputs, data_samples, mode='predict')
self.assertEqual(predictions[0].pred_score.shape, (1000, ))
self.assertEqual(data_samples[0].pred_score.shape, (1000, ))
torch.testing.assert_allclose(data_samples[0].pred_score,
predictions[0].pred_score)
# test forward with invalid mode
with self.assertRaisesRegex(RuntimeError, 'Invalid mode "unknown"'):
model(inputs, mode='unknown')
def test_train_step(self):
cfg = {
**self.DEFAULT_ARGS, 'data_preprocessor':
dict(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5])
}
model: ImageClassifier = MODELS.build(cfg)
data = {
'inputs': torch.randint(0, 256, (1, 3, 224, 224)),
'data_samples': [DataSample().set_gt_label(1)]
}
optim_wrapper = MagicMock()
log_vars = model.train_step(data, optim_wrapper)
self.assertIn('loss', log_vars)
optim_wrapper.update_params.assert_called_once()
def test_val_step(self):
cfg = {
**self.DEFAULT_ARGS, 'data_preprocessor':
dict(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5])
}
model: ImageClassifier = MODELS.build(cfg)
data = {
'inputs': torch.randint(0, 256, (1, 3, 224, 224)),
'data_samples': [DataSample().set_gt_label(1)]
}
predictions = model.val_step(data)
self.assertEqual(predictions[0].pred_score.shape, (1000, ))
def test_test_step(self):
cfg = {
**self.DEFAULT_ARGS, 'data_preprocessor':
dict(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5])
}
model: ImageClassifier = MODELS.build(cfg)
data = {
'inputs': torch.randint(0, 256, (1, 3, 224, 224)),
'data_samples': [DataSample().set_gt_label(1)]
}
predictions = model.test_step(data)
self.assertEqual(predictions[0].pred_score.shape, (1000, ))
@unittest.skipIf(not has_huggingface(), 'huggingface is not installed.')
class TestHuggingFaceClassifier(TestCase):
DEFAULT_ARGS = dict(
type='HuggingFaceClassifier',
model_name='microsoft/resnet-18',
loss=dict(type='CrossEntropyLoss'),
)
def test_initialize(self):
model = MODELS.build(self.DEFAULT_ARGS)
assert isinstance(model.model, nn.Module)
# test set batch augmentation from train_cfg
cfg = {
**self.DEFAULT_ARGS, 'train_cfg':
dict(augments=dict(type='Mixup', alpha=1.))
}
model: ImageClassifier = MODELS.build(cfg)
self.assertIsNotNone(model.data_preprocessor.batch_augments)
cfg = {**self.DEFAULT_ARGS, 'train_cfg': dict()}
model: ImageClassifier = MODELS.build(cfg)
self.assertIsNone(model.data_preprocessor.batch_augments)
def test_loss(self):
inputs = torch.rand(1, 3, 224, 224)
data_samples = [DataSample().set_gt_label(1)]
model: ImageClassifier = MODELS.build(self.DEFAULT_ARGS)
losses = model.loss(inputs, data_samples)
self.assertGreater(losses['loss'].item(), 0)
def test_predict(self):
inputs = torch.rand(1, 3, 224, 224)
data_samples = [DataSample().set_gt_label(1)]
model: ImageClassifier = MODELS.build(self.DEFAULT_ARGS)
predictions = model.predict(inputs)
self.assertEqual(predictions[0].pred_score.shape, (1000, ))
predictions = model.predict(inputs, data_samples)
self.assertEqual(predictions[0].pred_score.shape, (1000, ))
self.assertEqual(data_samples[0].pred_score.shape, (1000, ))
torch.testing.assert_allclose(data_samples[0].pred_score,
predictions[0].pred_score)
def test_forward(self):
inputs = torch.rand(1, 3, 224, 224)
data_samples = [DataSample().set_gt_label(1)]
model: ImageClassifier = MODELS.build(self.DEFAULT_ARGS)
# test pure forward
outs = model(inputs)
self.assertIsInstance(outs, torch.Tensor)
# test forward train
losses = model(inputs, data_samples, mode='loss')
self.assertGreater(losses['loss'].item(), 0)
# test forward test
predictions = model(inputs, mode='predict')
self.assertEqual(predictions[0].pred_score.shape, (1000, ))
predictions = model(inputs, data_samples, mode='predict')
self.assertEqual(predictions[0].pred_score.shape, (1000, ))
self.assertEqual(data_samples[0].pred_score.shape, (1000, ))
torch.testing.assert_allclose(data_samples[0].pred_score,
predictions[0].pred_score)
# test forward with invalid mode
with self.assertRaisesRegex(RuntimeError, 'Invalid mode "unknown"'):
model(inputs, mode='unknown')
def test_train_step(self):
cfg = {
**self.DEFAULT_ARGS, 'data_preprocessor':
dict(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5])
}
model: ImageClassifier = MODELS.build(cfg)
data = {
'inputs': torch.randint(0, 256, (1, 3, 224, 224)),
'data_samples': [DataSample().set_gt_label(1)]
}
optim_wrapper = MagicMock()
log_vars = model.train_step(data, optim_wrapper)
self.assertIn('loss', log_vars)
optim_wrapper.update_params.assert_called_once()
def test_val_step(self):
cfg = {
**self.DEFAULT_ARGS, 'data_preprocessor':
dict(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5])
}
model: ImageClassifier = MODELS.build(cfg)
data = {
'inputs': torch.randint(0, 256, (1, 3, 224, 224)),
'data_samples': [DataSample().set_gt_label(1)]
}
predictions = model.val_step(data)
self.assertEqual(predictions[0].pred_score.shape, (1000, ))
def test_test_step(self):
cfg = {
**self.DEFAULT_ARGS, 'data_preprocessor':
dict(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5])
}
model: ImageClassifier = MODELS.build(cfg)
data = {
'inputs': torch.randint(0, 256, (1, 3, 224, 224)),
'data_samples': [DataSample().set_gt_label(1)]
}
predictions = model.test_step(data)
self.assertEqual(predictions[0].pred_score.shape, (1000, ))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment