Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
dcuai
dlexamples
Commits
85529f35
Commit
85529f35
authored
Jul 30, 2022
by
unknown
Browse files
添加openmmlab测试用例
parent
b21b0c01
Changes
977
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3144 additions
and
0 deletions
+3144
-0
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/backbones/resnet.py
...fication-speed-benchmark/mmcls/models/backbones/resnet.py
+650
-0
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/backbones/resnet_cifar.py
...on-speed-benchmark/mmcls/models/backbones/resnet_cifar.py
+83
-0
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/backbones/resnext.py
...ication-speed-benchmark/mmcls/models/backbones/resnext.py
+147
-0
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/backbones/seresnet.py
...cation-speed-benchmark/mmcls/models/backbones/seresnet.py
+124
-0
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/backbones/seresnext.py
...ation-speed-benchmark/mmcls/models/backbones/seresnext.py
+154
-0
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/backbones/shufflenet_v1.py
...n-speed-benchmark/mmcls/models/backbones/shufflenet_v1.py
+315
-0
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/backbones/shufflenet_v2.py
...n-speed-benchmark/mmcls/models/backbones/shufflenet_v2.py
+291
-0
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/backbones/vgg.py
...ssification-speed-benchmark/mmcls/models/backbones/vgg.py
+186
-0
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/backbones/vision_transformer.py
...ed-benchmark/mmcls/models/backbones/vision_transformer.py
+480
-0
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/builder.py
.../mmclassification-speed-benchmark/mmcls/models/builder.py
+34
-0
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/classifiers/__init__.py
...tion-speed-benchmark/mmcls/models/classifiers/__init__.py
+4
-0
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/classifiers/base.py
...fication-speed-benchmark/mmcls/models/classifiers/base.py
+224
-0
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/classifiers/image.py
...ication-speed-benchmark/mmcls/models/classifiers/image.py
+97
-0
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/heads/__init__.py
...sification-speed-benchmark/mmcls/models/heads/__init__.py
+10
-0
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/heads/base_head.py
...ification-speed-benchmark/mmcls/models/heads/base_head.py
+14
-0
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/heads/cls_head.py
...sification-speed-benchmark/mmcls/models/heads/cls_head.py
+70
-0
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/heads/linear_head.py
...ication-speed-benchmark/mmcls/models/heads/linear_head.py
+61
-0
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/heads/multi_label_head.py
...on-speed-benchmark/mmcls/models/heads/multi_label_head.py
+55
-0
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/heads/multi_label_linear_head.py
...d-benchmark/mmcls/models/heads/multi_label_linear_head.py
+64
-0
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/heads/vision_transformer_head.py
...d-benchmark/mmcls/models/heads/vision_transformer_head.py
+81
-0
No files found.
Too many changes to show.
To preserve performance only
977 of 977+
files are displayed.
Plain diff
Email patch
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/backbones/resnet.py
0 → 100644
View file @
85529f35
import
torch.nn
as
nn
import
torch.utils.checkpoint
as
cp
from
mmcv.cnn
import
(
ConvModule
,
build_conv_layer
,
build_norm_layer
,
constant_init
)
from
mmcv.utils.parrots_wrapper
import
_BatchNorm
from
..builder
import
BACKBONES
from
.base_backbone
import
BaseBackbone
class
BasicBlock
(
nn
.
Module
):
"""BasicBlock for ResNet.
Args:
in_channels (int): Input channels of this block.
out_channels (int): Output channels of this block.
expansion (int): The ratio of ``out_channels/mid_channels`` where
``mid_channels`` is the output channels of conv1. This is a
reserved argument in BasicBlock and should always be 1. Default: 1.
stride (int): stride of the block. Default: 1
dilation (int): dilation of convolution. Default: 1
downsample (nn.Module, optional): downsample operation on identity
branch. Default: None.
style (str): `pytorch` or `caffe`. It is unused and reserved for
unified API with Bottleneck.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed.
conv_cfg (dict, optional): dictionary to construct and config conv
layer. Default: None
norm_cfg (dict): dictionary to construct and config norm layer.
Default: dict(type='BN')
"""
def
__init__
(
self
,
in_channels
,
out_channels
,
expansion
=
1
,
stride
=
1
,
dilation
=
1
,
downsample
=
None
,
style
=
'pytorch'
,
with_cp
=
False
,
conv_cfg
=
None
,
norm_cfg
=
dict
(
type
=
'BN'
)):
super
(
BasicBlock
,
self
).
__init__
()
self
.
in_channels
=
in_channels
self
.
out_channels
=
out_channels
self
.
expansion
=
expansion
assert
self
.
expansion
==
1
assert
out_channels
%
expansion
==
0
self
.
mid_channels
=
out_channels
//
expansion
self
.
stride
=
stride
self
.
dilation
=
dilation
self
.
style
=
style
self
.
with_cp
=
with_cp
self
.
conv_cfg
=
conv_cfg
self
.
norm_cfg
=
norm_cfg
self
.
norm1_name
,
norm1
=
build_norm_layer
(
norm_cfg
,
self
.
mid_channels
,
postfix
=
1
)
self
.
norm2_name
,
norm2
=
build_norm_layer
(
norm_cfg
,
out_channels
,
postfix
=
2
)
self
.
conv1
=
build_conv_layer
(
conv_cfg
,
in_channels
,
self
.
mid_channels
,
3
,
stride
=
stride
,
padding
=
dilation
,
dilation
=
dilation
,
bias
=
False
)
self
.
add_module
(
self
.
norm1_name
,
norm1
)
self
.
conv2
=
build_conv_layer
(
conv_cfg
,
self
.
mid_channels
,
out_channels
,
3
,
padding
=
1
,
bias
=
False
)
self
.
add_module
(
self
.
norm2_name
,
norm2
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
downsample
=
downsample
@
property
def
norm1
(
self
):
return
getattr
(
self
,
self
.
norm1_name
)
@
property
def
norm2
(
self
):
return
getattr
(
self
,
self
.
norm2_name
)
def
forward
(
self
,
x
):
def
_inner_forward
(
x
):
identity
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
norm1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
norm2
(
out
)
if
self
.
downsample
is
not
None
:
identity
=
self
.
downsample
(
x
)
out
+=
identity
return
out
if
self
.
with_cp
and
x
.
requires_grad
:
out
=
cp
.
checkpoint
(
_inner_forward
,
x
)
else
:
out
=
_inner_forward
(
x
)
out
=
self
.
relu
(
out
)
return
out
class
Bottleneck
(
nn
.
Module
):
"""Bottleneck block for ResNet.
Args:
in_channels (int): Input channels of this block.
out_channels (int): Output channels of this block.
expansion (int): The ratio of ``out_channels/mid_channels`` where
``mid_channels`` is the input/output channels of conv2. Default: 4.
stride (int): stride of the block. Default: 1
dilation (int): dilation of convolution. Default: 1
downsample (nn.Module, optional): downsample operation on identity
branch. Default: None.
style (str): ``"pytorch"`` or ``"caffe"``. If set to "pytorch", the
stride-two layer is the 3x3 conv layer, otherwise the stride-two
layer is the first 1x1 conv layer. Default: "pytorch".
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed.
conv_cfg (dict, optional): dictionary to construct and config conv
layer. Default: None
norm_cfg (dict): dictionary to construct and config norm layer.
Default: dict(type='BN')
"""
def
__init__
(
self
,
in_channels
,
out_channels
,
expansion
=
4
,
stride
=
1
,
dilation
=
1
,
downsample
=
None
,
style
=
'pytorch'
,
with_cp
=
False
,
conv_cfg
=
None
,
norm_cfg
=
dict
(
type
=
'BN'
)):
super
(
Bottleneck
,
self
).
__init__
()
assert
style
in
[
'pytorch'
,
'caffe'
]
self
.
in_channels
=
in_channels
self
.
out_channels
=
out_channels
self
.
expansion
=
expansion
assert
out_channels
%
expansion
==
0
self
.
mid_channels
=
out_channels
//
expansion
self
.
stride
=
stride
self
.
dilation
=
dilation
self
.
style
=
style
self
.
with_cp
=
with_cp
self
.
conv_cfg
=
conv_cfg
self
.
norm_cfg
=
norm_cfg
if
self
.
style
==
'pytorch'
:
self
.
conv1_stride
=
1
self
.
conv2_stride
=
stride
else
:
self
.
conv1_stride
=
stride
self
.
conv2_stride
=
1
self
.
norm1_name
,
norm1
=
build_norm_layer
(
norm_cfg
,
self
.
mid_channels
,
postfix
=
1
)
self
.
norm2_name
,
norm2
=
build_norm_layer
(
norm_cfg
,
self
.
mid_channels
,
postfix
=
2
)
self
.
norm3_name
,
norm3
=
build_norm_layer
(
norm_cfg
,
out_channels
,
postfix
=
3
)
self
.
conv1
=
build_conv_layer
(
conv_cfg
,
in_channels
,
self
.
mid_channels
,
kernel_size
=
1
,
stride
=
self
.
conv1_stride
,
bias
=
False
)
self
.
add_module
(
self
.
norm1_name
,
norm1
)
self
.
conv2
=
build_conv_layer
(
conv_cfg
,
self
.
mid_channels
,
self
.
mid_channels
,
kernel_size
=
3
,
stride
=
self
.
conv2_stride
,
padding
=
dilation
,
dilation
=
dilation
,
bias
=
False
)
self
.
add_module
(
self
.
norm2_name
,
norm2
)
self
.
conv3
=
build_conv_layer
(
conv_cfg
,
self
.
mid_channels
,
out_channels
,
kernel_size
=
1
,
bias
=
False
)
self
.
add_module
(
self
.
norm3_name
,
norm3
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
downsample
=
downsample
@
property
def
norm1
(
self
):
return
getattr
(
self
,
self
.
norm1_name
)
@
property
def
norm2
(
self
):
return
getattr
(
self
,
self
.
norm2_name
)
@
property
def
norm3
(
self
):
return
getattr
(
self
,
self
.
norm3_name
)
def
forward
(
self
,
x
):
def
_inner_forward
(
x
):
identity
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
norm1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
norm2
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv3
(
out
)
out
=
self
.
norm3
(
out
)
if
self
.
downsample
is
not
None
:
identity
=
self
.
downsample
(
x
)
out
+=
identity
return
out
if
self
.
with_cp
and
x
.
requires_grad
:
out
=
cp
.
checkpoint
(
_inner_forward
,
x
)
else
:
out
=
_inner_forward
(
x
)
out
=
self
.
relu
(
out
)
return
out
def
get_expansion
(
block
,
expansion
=
None
):
"""Get the expansion of a residual block.
The block expansion will be obtained by the following order:
1. If ``expansion`` is given, just return it.
2. If ``block`` has the attribute ``expansion``, then return
``block.expansion``.
3. Return the default value according the the block type:
1 for ``BasicBlock`` and 4 for ``Bottleneck``.
Args:
block (class): The block class.
expansion (int | None): The given expansion ratio.
Returns:
int: The expansion of the block.
"""
if
isinstance
(
expansion
,
int
):
assert
expansion
>
0
elif
expansion
is
None
:
if
hasattr
(
block
,
'expansion'
):
expansion
=
block
.
expansion
elif
issubclass
(
block
,
BasicBlock
):
expansion
=
1
elif
issubclass
(
block
,
Bottleneck
):
expansion
=
4
else
:
raise
TypeError
(
f
'expansion is not specified for
{
block
.
__name__
}
'
)
else
:
raise
TypeError
(
'expansion must be an integer or None'
)
return
expansion
class
ResLayer
(
nn
.
Sequential
):
"""ResLayer to build ResNet style backbone.
Args:
block (nn.Module): Residual block used to build ResLayer.
num_blocks (int): Number of blocks.
in_channels (int): Input channels of this block.
out_channels (int): Output channels of this block.
expansion (int, optional): The expansion for BasicBlock/Bottleneck.
If not specified, it will firstly be obtained via
``block.expansion``. If the block has no attribute "expansion",
the following default values will be used: 1 for BasicBlock and
4 for Bottleneck. Default: None.
stride (int): stride of the first block. Default: 1.
avg_down (bool): Use AvgPool instead of stride conv when
downsampling in the bottleneck. Default: False
conv_cfg (dict, optional): dictionary to construct and config conv
layer. Default: None
norm_cfg (dict): dictionary to construct and config norm layer.
Default: dict(type='BN')
"""
def
__init__
(
self
,
block
,
num_blocks
,
in_channels
,
out_channels
,
expansion
=
None
,
stride
=
1
,
avg_down
=
False
,
conv_cfg
=
None
,
norm_cfg
=
dict
(
type
=
'BN'
),
**
kwargs
):
self
.
block
=
block
self
.
expansion
=
get_expansion
(
block
,
expansion
)
downsample
=
None
if
stride
!=
1
or
in_channels
!=
out_channels
:
downsample
=
[]
conv_stride
=
stride
if
avg_down
and
stride
!=
1
:
conv_stride
=
1
downsample
.
append
(
nn
.
AvgPool2d
(
kernel_size
=
stride
,
stride
=
stride
,
ceil_mode
=
True
,
count_include_pad
=
False
))
downsample
.
extend
([
build_conv_layer
(
conv_cfg
,
in_channels
,
out_channels
,
kernel_size
=
1
,
stride
=
conv_stride
,
bias
=
False
),
build_norm_layer
(
norm_cfg
,
out_channels
)[
1
]
])
downsample
=
nn
.
Sequential
(
*
downsample
)
layers
=
[]
layers
.
append
(
block
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
expansion
=
self
.
expansion
,
stride
=
stride
,
downsample
=
downsample
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
**
kwargs
))
in_channels
=
out_channels
for
i
in
range
(
1
,
num_blocks
):
layers
.
append
(
block
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
expansion
=
self
.
expansion
,
stride
=
1
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
**
kwargs
))
super
(
ResLayer
,
self
).
__init__
(
*
layers
)
@
BACKBONES
.
register_module
()
class
ResNet
(
BaseBackbone
):
"""ResNet backbone.
Please refer to the `paper <https://arxiv.org/abs/1512.03385>`_ for
details.
Args:
depth (int): Network depth, from {18, 34, 50, 101, 152}.
in_channels (int): Number of input image channels. Default: 3.
stem_channels (int): Output channels of the stem layer. Default: 64.
base_channels (int): Middle channels of the first stage. Default: 64.
num_stages (int): Stages of the network. Default: 4.
strides (Sequence[int]): Strides of the first block of each stage.
Default: ``(1, 2, 2, 2)``.
dilations (Sequence[int]): Dilation of each stage.
Default: ``(1, 1, 1, 1)``.
out_indices (Sequence[int]): Output from which stages. If only one
stage is specified, a single tensor (feature map) is returned,
otherwise multiple stages are specified, a tuple of tensors will
be returned. Default: ``(3, )``.
style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
layer is the 3x3 conv layer, otherwise the stride-two layer is
the first 1x1 conv layer.
deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv.
Default: False.
avg_down (bool): Use AvgPool instead of stride conv when
downsampling in the bottleneck. Default: False.
frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
-1 means not freezing any parameters. Default: -1.
conv_cfg (dict | None): The config dict for conv layers. Default: None.
norm_cfg (dict): The config dict for norm layers.
norm_eval (bool): Whether to set norm layers to eval mode, namely,
freeze running stats (mean and var). Note: Effect on Batch Norm
and its variants only. Default: False.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed. Default: False.
zero_init_residual (bool): Whether to use zero init for last norm layer
in resblocks to let them behave as identity. Default: True.
Example:
>>> from mmcls.models import ResNet
>>> import torch
>>> self = ResNet(depth=18)
>>> self.eval()
>>> inputs = torch.rand(1, 3, 32, 32)
>>> level_outputs = self.forward(inputs)
>>> for level_out in level_outputs:
... print(tuple(level_out.shape))
(1, 64, 8, 8)
(1, 128, 4, 4)
(1, 256, 2, 2)
(1, 512, 1, 1)
"""
arch_settings
=
{
18
:
(
BasicBlock
,
(
2
,
2
,
2
,
2
)),
34
:
(
BasicBlock
,
(
3
,
4
,
6
,
3
)),
50
:
(
Bottleneck
,
(
3
,
4
,
6
,
3
)),
101
:
(
Bottleneck
,
(
3
,
4
,
23
,
3
)),
152
:
(
Bottleneck
,
(
3
,
8
,
36
,
3
))
}
def
__init__
(
self
,
depth
,
in_channels
=
3
,
stem_channels
=
64
,
base_channels
=
64
,
expansion
=
None
,
num_stages
=
4
,
strides
=
(
1
,
2
,
2
,
2
),
dilations
=
(
1
,
1
,
1
,
1
),
out_indices
=
(
3
,
),
style
=
'pytorch'
,
deep_stem
=
False
,
avg_down
=
False
,
frozen_stages
=-
1
,
conv_cfg
=
None
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
True
),
norm_eval
=
False
,
with_cp
=
False
,
zero_init_residual
=
True
,
init_cfg
=
[
dict
(
type
=
'Kaiming'
,
layer
=
[
'Conv2d'
]),
dict
(
type
=
'Constant'
,
val
=
1
,
layer
=
[
'_BatchNorm'
,
'GroupNorm'
])
]):
super
(
ResNet
,
self
).
__init__
(
init_cfg
)
if
depth
not
in
self
.
arch_settings
:
raise
KeyError
(
f
'invalid depth
{
depth
}
for resnet'
)
self
.
depth
=
depth
self
.
stem_channels
=
stem_channels
self
.
base_channels
=
base_channels
self
.
num_stages
=
num_stages
assert
num_stages
>=
1
and
num_stages
<=
4
self
.
strides
=
strides
self
.
dilations
=
dilations
assert
len
(
strides
)
==
len
(
dilations
)
==
num_stages
self
.
out_indices
=
out_indices
assert
max
(
out_indices
)
<
num_stages
self
.
style
=
style
self
.
deep_stem
=
deep_stem
self
.
avg_down
=
avg_down
self
.
frozen_stages
=
frozen_stages
self
.
conv_cfg
=
conv_cfg
self
.
norm_cfg
=
norm_cfg
self
.
with_cp
=
with_cp
self
.
norm_eval
=
norm_eval
self
.
zero_init_residual
=
zero_init_residual
self
.
block
,
stage_blocks
=
self
.
arch_settings
[
depth
]
self
.
stage_blocks
=
stage_blocks
[:
num_stages
]
self
.
expansion
=
get_expansion
(
self
.
block
,
expansion
)
self
.
_make_stem_layer
(
in_channels
,
stem_channels
)
self
.
res_layers
=
[]
_in_channels
=
stem_channels
_out_channels
=
base_channels
*
self
.
expansion
for
i
,
num_blocks
in
enumerate
(
self
.
stage_blocks
):
stride
=
strides
[
i
]
dilation
=
dilations
[
i
]
res_layer
=
self
.
make_res_layer
(
block
=
self
.
block
,
num_blocks
=
num_blocks
,
in_channels
=
_in_channels
,
out_channels
=
_out_channels
,
expansion
=
self
.
expansion
,
stride
=
stride
,
dilation
=
dilation
,
style
=
self
.
style
,
avg_down
=
self
.
avg_down
,
with_cp
=
with_cp
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
)
_in_channels
=
_out_channels
_out_channels
*=
2
layer_name
=
f
'layer
{
i
+
1
}
'
self
.
add_module
(
layer_name
,
res_layer
)
self
.
res_layers
.
append
(
layer_name
)
self
.
_freeze_stages
()
self
.
feat_dim
=
res_layer
[
-
1
].
out_channels
def
make_res_layer
(
self
,
**
kwargs
):
return
ResLayer
(
**
kwargs
)
@
property
def
norm1
(
self
):
return
getattr
(
self
,
self
.
norm1_name
)
def
_make_stem_layer
(
self
,
in_channels
,
stem_channels
):
if
self
.
deep_stem
:
self
.
stem
=
nn
.
Sequential
(
ConvModule
(
in_channels
,
stem_channels
//
2
,
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
,
inplace
=
True
),
ConvModule
(
stem_channels
//
2
,
stem_channels
//
2
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
,
inplace
=
True
),
ConvModule
(
stem_channels
//
2
,
stem_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
,
inplace
=
True
))
else
:
self
.
conv1
=
build_conv_layer
(
self
.
conv_cfg
,
in_channels
,
stem_channels
,
kernel_size
=
7
,
stride
=
2
,
padding
=
3
,
bias
=
False
)
self
.
norm1_name
,
norm1
=
build_norm_layer
(
self
.
norm_cfg
,
stem_channels
,
postfix
=
1
)
self
.
add_module
(
self
.
norm1_name
,
norm1
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
maxpool
=
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
def
_freeze_stages
(
self
):
if
self
.
frozen_stages
>=
0
:
if
self
.
deep_stem
:
self
.
stem
.
eval
()
for
param
in
self
.
stem
.
parameters
():
param
.
requires_grad
=
False
else
:
self
.
norm1
.
eval
()
for
m
in
[
self
.
conv1
,
self
.
norm1
]:
for
param
in
m
.
parameters
():
param
.
requires_grad
=
False
for
i
in
range
(
1
,
self
.
frozen_stages
+
1
):
m
=
getattr
(
self
,
f
'layer
{
i
}
'
)
m
.
eval
()
for
param
in
m
.
parameters
():
param
.
requires_grad
=
False
# def init_weights(self, pretrained=None):
def
init_weights
(
self
):
super
(
ResNet
,
self
).
init_weights
()
if
self
.
zero_init_residual
:
for
m
in
self
.
modules
():
if
isinstance
(
m
,
Bottleneck
):
constant_init
(
m
.
norm3
,
0
)
elif
isinstance
(
m
,
BasicBlock
):
constant_init
(
m
.
norm2
,
0
)
def
forward
(
self
,
x
):
if
self
.
deep_stem
:
x
=
self
.
stem
(
x
)
else
:
x
=
self
.
conv1
(
x
)
x
=
self
.
norm1
(
x
)
x
=
self
.
relu
(
x
)
x
=
self
.
maxpool
(
x
)
outs
=
[]
for
i
,
layer_name
in
enumerate
(
self
.
res_layers
):
res_layer
=
getattr
(
self
,
layer_name
)
x
=
res_layer
(
x
)
if
i
in
self
.
out_indices
:
outs
.
append
(
x
)
if
len
(
outs
)
==
1
:
return
outs
[
0
]
else
:
return
tuple
(
outs
)
def
train
(
self
,
mode
=
True
):
super
(
ResNet
,
self
).
train
(
mode
)
self
.
_freeze_stages
()
if
mode
and
self
.
norm_eval
:
for
m
in
self
.
modules
():
# trick: eval have effect on BatchNorm only
if
isinstance
(
m
,
_BatchNorm
):
m
.
eval
()
@
BACKBONES
.
register_module
()
class
ResNetV1d
(
ResNet
):
"""ResNetV1d variant described in `Bag of Tricks.
<https://arxiv.org/pdf/1812.01187.pdf>`_.
Compared with default ResNet(ResNetV1b), ResNetV1d replaces the 7x7 conv in
the input stem with three 3x3 convs. And in the downsampling block, a 2x2
avg_pool with stride 2 is added before conv, whose stride is changed to 1.
"""
def
__init__
(
self
,
**
kwargs
):
super
(
ResNetV1d
,
self
).
__init__
(
deep_stem
=
True
,
avg_down
=
True
,
**
kwargs
)
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/backbones/resnet_cifar.py
0 → 100644
View file @
85529f35
import
torch.nn
as
nn
from
mmcv.cnn
import
build_conv_layer
,
build_norm_layer
from
..builder
import
BACKBONES
from
.resnet
import
ResNet
@
BACKBONES
.
register_module
()
class
ResNet_CIFAR
(
ResNet
):
"""ResNet backbone for CIFAR.
Compared to standard ResNet, it uses `kernel_size=3` and `stride=1` in
conv1, and does not apply MaxPoolinng after stem. It has been proven to
be more efficient than standard ResNet in other public codebase, e.g.,
`https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py`.
Args:
depth (int): Network depth, from {18, 34, 50, 101, 152}.
in_channels (int): Number of input image channels. Default: 3.
stem_channels (int): Output channels of the stem layer. Default: 64.
base_channels (int): Middle channels of the first stage. Default: 64.
num_stages (int): Stages of the network. Default: 4.
strides (Sequence[int]): Strides of the first block of each stage.
Default: ``(1, 2, 2, 2)``.
dilations (Sequence[int]): Dilation of each stage.
Default: ``(1, 1, 1, 1)``.
out_indices (Sequence[int]): Output from which stages. If only one
stage is specified, a single tensor (feature map) is returned,
otherwise multiple stages are specified, a tuple of tensors will
be returned. Default: ``(3, )``.
style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
layer is the 3x3 conv layer, otherwise the stride-two layer is
the first 1x1 conv layer.
deep_stem (bool): This network has specific designed stem, thus it is
asserted to be False.
avg_down (bool): Use AvgPool instead of stride conv when
downsampling in the bottleneck. Default: False.
frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
-1 means not freezing any parameters. Default: -1.
conv_cfg (dict | None): The config dict for conv layers. Default: None.
norm_cfg (dict): The config dict for norm layers.
norm_eval (bool): Whether to set norm layers to eval mode, namely,
freeze running stats (mean and var). Note: Effect on Batch Norm
and its variants only. Default: False.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed. Default: False.
zero_init_residual (bool): Whether to use zero init for last norm layer
in resblocks to let them behave as identity. Default: True.
"""
def
__init__
(
self
,
depth
,
deep_stem
=
False
,
**
kwargs
):
super
(
ResNet_CIFAR
,
self
).
__init__
(
depth
,
deep_stem
=
deep_stem
,
**
kwargs
)
assert
not
self
.
deep_stem
,
'ResNet_CIFAR do not support deep_stem'
def
_make_stem_layer
(
self
,
in_channels
,
base_channels
):
self
.
conv1
=
build_conv_layer
(
self
.
conv_cfg
,
in_channels
,
base_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
False
)
self
.
norm1_name
,
norm1
=
build_norm_layer
(
self
.
norm_cfg
,
base_channels
,
postfix
=
1
)
self
.
add_module
(
self
.
norm1_name
,
norm1
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
norm1
(
x
)
x
=
self
.
relu
(
x
)
outs
=
[]
for
i
,
layer_name
in
enumerate
(
self
.
res_layers
):
res_layer
=
getattr
(
self
,
layer_name
)
x
=
res_layer
(
x
)
if
i
in
self
.
out_indices
:
outs
.
append
(
x
)
if
len
(
outs
)
==
1
:
return
outs
[
0
]
else
:
return
tuple
(
outs
)
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/backbones/resnext.py
0 → 100644
View file @
85529f35
from
mmcv.cnn
import
build_conv_layer
,
build_norm_layer
from
..builder
import
BACKBONES
from
.resnet
import
Bottleneck
as
_Bottleneck
from
.resnet
import
ResLayer
,
ResNet
class
Bottleneck
(
_Bottleneck
):
"""Bottleneck block for ResNeXt.
Args:
in_channels (int): Input channels of this block.
out_channels (int): Output channels of this block.
groups (int): Groups of conv2.
width_per_group (int): Width per group of conv2. 64x4d indicates
``groups=64, width_per_group=4`` and 32x8d indicates
``groups=32, width_per_group=8``.
stride (int): stride of the block. Default: 1
dilation (int): dilation of convolution. Default: 1
downsample (nn.Module, optional): downsample operation on identity
branch. Default: None
style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
layer is the 3x3 conv layer, otherwise the stride-two layer is
the first 1x1 conv layer.
conv_cfg (dict, optional): dictionary to construct and config conv
layer. Default: None
norm_cfg (dict): dictionary to construct and config norm layer.
Default: dict(type='BN')
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed.
"""
def
__init__
(
self
,
in_channels
,
out_channels
,
base_channels
=
64
,
groups
=
32
,
width_per_group
=
4
,
**
kwargs
):
super
(
Bottleneck
,
self
).
__init__
(
in_channels
,
out_channels
,
**
kwargs
)
self
.
groups
=
groups
self
.
width_per_group
=
width_per_group
# For ResNet bottleneck, middle channels are determined by expansion
# and out_channels, but for ResNeXt bottleneck, it is determined by
# groups and width_per_group and the stage it is located in.
if
groups
!=
1
:
assert
self
.
mid_channels
%
base_channels
==
0
self
.
mid_channels
=
(
groups
*
width_per_group
*
self
.
mid_channels
//
base_channels
)
self
.
norm1_name
,
norm1
=
build_norm_layer
(
self
.
norm_cfg
,
self
.
mid_channels
,
postfix
=
1
)
self
.
norm2_name
,
norm2
=
build_norm_layer
(
self
.
norm_cfg
,
self
.
mid_channels
,
postfix
=
2
)
self
.
norm3_name
,
norm3
=
build_norm_layer
(
self
.
norm_cfg
,
self
.
out_channels
,
postfix
=
3
)
self
.
conv1
=
build_conv_layer
(
self
.
conv_cfg
,
self
.
in_channels
,
self
.
mid_channels
,
kernel_size
=
1
,
stride
=
self
.
conv1_stride
,
bias
=
False
)
self
.
add_module
(
self
.
norm1_name
,
norm1
)
self
.
conv2
=
build_conv_layer
(
self
.
conv_cfg
,
self
.
mid_channels
,
self
.
mid_channels
,
kernel_size
=
3
,
stride
=
self
.
conv2_stride
,
padding
=
self
.
dilation
,
dilation
=
self
.
dilation
,
groups
=
groups
,
bias
=
False
)
self
.
add_module
(
self
.
norm2_name
,
norm2
)
self
.
conv3
=
build_conv_layer
(
self
.
conv_cfg
,
self
.
mid_channels
,
self
.
out_channels
,
kernel_size
=
1
,
bias
=
False
)
self
.
add_module
(
self
.
norm3_name
,
norm3
)
@
BACKBONES
.
register_module
()
class
ResNeXt
(
ResNet
):
"""ResNeXt backbone.
Please refer to the `paper <https://arxiv.org/abs/1611.05431>`_ for
details.
Args:
depth (int): Network depth, from {50, 101, 152}.
groups (int): Groups of conv2 in Bottleneck. Default: 32.
width_per_group (int): Width per group of conv2 in Bottleneck.
Default: 4.
in_channels (int): Number of input image channels. Default: 3.
stem_channels (int): Output channels of the stem layer. Default: 64.
num_stages (int): Stages of the network. Default: 4.
strides (Sequence[int]): Strides of the first block of each stage.
Default: ``(1, 2, 2, 2)``.
dilations (Sequence[int]): Dilation of each stage.
Default: ``(1, 1, 1, 1)``.
out_indices (Sequence[int]): Output from which stages. If only one
stage is specified, a single tensor (feature map) is returned,
otherwise multiple stages are specified, a tuple of tensors will
be returned. Default: ``(3, )``.
style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
layer is the 3x3 conv layer, otherwise the stride-two layer is
the first 1x1 conv layer.
deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv.
Default: False.
avg_down (bool): Use AvgPool instead of stride conv when
downsampling in the bottleneck. Default: False.
frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
-1 means not freezing any parameters. Default: -1.
conv_cfg (dict | None): The config dict for conv layers. Default: None.
norm_cfg (dict): The config dict for norm layers.
norm_eval (bool): Whether to set norm layers to eval mode, namely,
freeze running stats (mean and var). Note: Effect on Batch Norm
and its variants only. Default: False.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed. Default: False.
zero_init_residual (bool): Whether to use zero init for last norm layer
in resblocks to let them behave as identity. Default: True.
"""
arch_settings
=
{
50
:
(
Bottleneck
,
(
3
,
4
,
6
,
3
)),
101
:
(
Bottleneck
,
(
3
,
4
,
23
,
3
)),
152
:
(
Bottleneck
,
(
3
,
8
,
36
,
3
))
}
def
__init__
(
self
,
depth
,
groups
=
32
,
width_per_group
=
4
,
**
kwargs
):
self
.
groups
=
groups
self
.
width_per_group
=
width_per_group
super
(
ResNeXt
,
self
).
__init__
(
depth
,
**
kwargs
)
def
make_res_layer
(
self
,
**
kwargs
):
return
ResLayer
(
groups
=
self
.
groups
,
width_per_group
=
self
.
width_per_group
,
base_channels
=
self
.
base_channels
,
**
kwargs
)
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/backbones/seresnet.py
0 → 100644
View file @
85529f35
import
torch.utils.checkpoint
as
cp
from
..builder
import
BACKBONES
from
..utils.se_layer
import
SELayer
from
.resnet
import
Bottleneck
,
ResLayer
,
ResNet
class
SEBottleneck
(
Bottleneck
):
"""SEBottleneck block for SEResNet.
Args:
in_channels (int): The input channels of the SEBottleneck block.
out_channels (int): The output channel of the SEBottleneck block.
se_ratio (int): Squeeze ratio in SELayer. Default: 16
"""
def
__init__
(
self
,
in_channels
,
out_channels
,
se_ratio
=
16
,
**
kwargs
):
super
(
SEBottleneck
,
self
).
__init__
(
in_channels
,
out_channels
,
**
kwargs
)
self
.
se_layer
=
SELayer
(
out_channels
,
ratio
=
se_ratio
)
def
forward
(
self
,
x
):
def
_inner_forward
(
x
):
identity
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
norm1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
norm2
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv3
(
out
)
out
=
self
.
norm3
(
out
)
out
=
self
.
se_layer
(
out
)
if
self
.
downsample
is
not
None
:
identity
=
self
.
downsample
(
x
)
out
+=
identity
return
out
if
self
.
with_cp
and
x
.
requires_grad
:
out
=
cp
.
checkpoint
(
_inner_forward
,
x
)
else
:
out
=
_inner_forward
(
x
)
out
=
self
.
relu
(
out
)
return
out
@
BACKBONES
.
register_module
()
class
SEResNet
(
ResNet
):
"""SEResNet backbone.
Please refer to the `paper <https://arxiv.org/abs/1709.01507>`_ for
details.
Args:
depth (int): Network depth, from {50, 101, 152}.
se_ratio (int): Squeeze ratio in SELayer. Default: 16.
in_channels (int): Number of input image channels. Default: 3.
stem_channels (int): Output channels of the stem layer. Default: 64.
num_stages (int): Stages of the network. Default: 4.
strides (Sequence[int]): Strides of the first block of each stage.
Default: ``(1, 2, 2, 2)``.
dilations (Sequence[int]): Dilation of each stage.
Default: ``(1, 1, 1, 1)``.
out_indices (Sequence[int]): Output from which stages. If only one
stage is specified, a single tensor (feature map) is returned,
otherwise multiple stages are specified, a tuple of tensors will
be returned. Default: ``(3, )``.
style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
layer is the 3x3 conv layer, otherwise the stride-two layer is
the first 1x1 conv layer.
deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv.
Default: False.
avg_down (bool): Use AvgPool instead of stride conv when
downsampling in the bottleneck. Default: False.
frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
-1 means not freezing any parameters. Default: -1.
conv_cfg (dict | None): The config dict for conv layers. Default: None.
norm_cfg (dict): The config dict for norm layers.
norm_eval (bool): Whether to set norm layers to eval mode, namely,
freeze running stats (mean and var). Note: Effect on Batch Norm
and its variants only. Default: False.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed. Default: False.
zero_init_residual (bool): Whether to use zero init for last norm layer
in resblocks to let them behave as identity. Default: True.
Example:
>>> from mmcls.models import SEResNet
>>> import torch
>>> self = SEResNet(depth=50)
>>> self.eval()
>>> inputs = torch.rand(1, 3, 224, 224)
>>> level_outputs = self.forward(inputs)
>>> for level_out in level_outputs:
... print(tuple(level_out.shape))
(1, 64, 56, 56)
(1, 128, 28, 28)
(1, 256, 14, 14)
(1, 512, 7, 7)
"""
arch_settings
=
{
50
:
(
SEBottleneck
,
(
3
,
4
,
6
,
3
)),
101
:
(
SEBottleneck
,
(
3
,
4
,
23
,
3
)),
152
:
(
SEBottleneck
,
(
3
,
8
,
36
,
3
))
}
def
__init__
(
self
,
depth
,
se_ratio
=
16
,
**
kwargs
):
if
depth
not
in
self
.
arch_settings
:
raise
KeyError
(
f
'invalid depth
{
depth
}
for SEResNet'
)
self
.
se_ratio
=
se_ratio
super
(
SEResNet
,
self
).
__init__
(
depth
,
**
kwargs
)
def
make_res_layer
(
self
,
**
kwargs
):
return
ResLayer
(
se_ratio
=
self
.
se_ratio
,
**
kwargs
)
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/backbones/seresnext.py
0 → 100644
View file @
85529f35
from
mmcv.cnn
import
build_conv_layer
,
build_norm_layer
from
..builder
import
BACKBONES
from
.resnet
import
ResLayer
from
.seresnet
import
SEBottleneck
as
_SEBottleneck
from
.seresnet
import
SEResNet
class
SEBottleneck
(
_SEBottleneck
):
"""SEBottleneck block for SEResNeXt.
Args:
in_channels (int): Input channels of this block.
out_channels (int): Output channels of this block.
base_channels (int): Middle channels of the first stage. Default: 64.
groups (int): Groups of conv2.
width_per_group (int): Width per group of conv2. 64x4d indicates
``groups=64, width_per_group=4`` and 32x8d indicates
``groups=32, width_per_group=8``.
stride (int): stride of the block. Default: 1
dilation (int): dilation of convolution. Default: 1
downsample (nn.Module, optional): downsample operation on identity
branch. Default: None
se_ratio (int): Squeeze ratio in SELayer. Default: 16
style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
layer is the 3x3 conv layer, otherwise the stride-two layer is
the first 1x1 conv layer.
conv_cfg (dict, optional): dictionary to construct and config conv
layer. Default: None
norm_cfg (dict): dictionary to construct and config norm layer.
Default: dict(type='BN')
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed.
"""
def
__init__
(
self
,
in_channels
,
out_channels
,
base_channels
=
64
,
groups
=
32
,
width_per_group
=
4
,
se_ratio
=
16
,
**
kwargs
):
super
(
SEBottleneck
,
self
).
__init__
(
in_channels
,
out_channels
,
se_ratio
,
**
kwargs
)
self
.
groups
=
groups
self
.
width_per_group
=
width_per_group
# We follow the same rational of ResNext to compute mid_channels.
# For SEResNet bottleneck, middle channels are determined by expansion
# and out_channels, but for SEResNeXt bottleneck, it is determined by
# groups and width_per_group and the stage it is located in.
if
groups
!=
1
:
assert
self
.
mid_channels
%
base_channels
==
0
self
.
mid_channels
=
(
groups
*
width_per_group
*
self
.
mid_channels
//
base_channels
)
self
.
norm1_name
,
norm1
=
build_norm_layer
(
self
.
norm_cfg
,
self
.
mid_channels
,
postfix
=
1
)
self
.
norm2_name
,
norm2
=
build_norm_layer
(
self
.
norm_cfg
,
self
.
mid_channels
,
postfix
=
2
)
self
.
norm3_name
,
norm3
=
build_norm_layer
(
self
.
norm_cfg
,
self
.
out_channels
,
postfix
=
3
)
self
.
conv1
=
build_conv_layer
(
self
.
conv_cfg
,
self
.
in_channels
,
self
.
mid_channels
,
kernel_size
=
1
,
stride
=
self
.
conv1_stride
,
bias
=
False
)
self
.
add_module
(
self
.
norm1_name
,
norm1
)
self
.
conv2
=
build_conv_layer
(
self
.
conv_cfg
,
self
.
mid_channels
,
self
.
mid_channels
,
kernel_size
=
3
,
stride
=
self
.
conv2_stride
,
padding
=
self
.
dilation
,
dilation
=
self
.
dilation
,
groups
=
groups
,
bias
=
False
)
self
.
add_module
(
self
.
norm2_name
,
norm2
)
self
.
conv3
=
build_conv_layer
(
self
.
conv_cfg
,
self
.
mid_channels
,
self
.
out_channels
,
kernel_size
=
1
,
bias
=
False
)
self
.
add_module
(
self
.
norm3_name
,
norm3
)
@
BACKBONES
.
register_module
()
class
SEResNeXt
(
SEResNet
):
"""SEResNeXt backbone.
Please refer to the `paper <https://arxiv.org/abs/1709.01507>`_ for
details.
Args:
depth (int): Network depth, from {50, 101, 152}.
groups (int): Groups of conv2 in Bottleneck. Default: 32.
width_per_group (int): Width per group of conv2 in Bottleneck.
Default: 4.
se_ratio (int): Squeeze ratio in SELayer. Default: 16.
in_channels (int): Number of input image channels. Default: 3.
stem_channels (int): Output channels of the stem layer. Default: 64.
num_stages (int): Stages of the network. Default: 4.
strides (Sequence[int]): Strides of the first block of each stage.
Default: ``(1, 2, 2, 2)``.
dilations (Sequence[int]): Dilation of each stage.
Default: ``(1, 1, 1, 1)``.
out_indices (Sequence[int]): Output from which stages. If only one
stage is specified, a single tensor (feature map) is returned,
otherwise multiple stages are specified, a tuple of tensors will
be returned. Default: ``(3, )``.
style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
layer is the 3x3 conv layer, otherwise the stride-two layer is
the first 1x1 conv layer.
deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv.
Default: False.
avg_down (bool): Use AvgPool instead of stride conv when
downsampling in the bottleneck. Default: False.
frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
-1 means not freezing any parameters. Default: -1.
conv_cfg (dict | None): The config dict for conv layers. Default: None.
norm_cfg (dict): The config dict for norm layers.
norm_eval (bool): Whether to set norm layers to eval mode, namely,
freeze running stats (mean and var). Note: Effect on Batch Norm
and its variants only. Default: False.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed. Default: False.
zero_init_residual (bool): Whether to use zero init for last norm layer
in resblocks to let them behave as identity. Default: True.
"""
arch_settings
=
{
50
:
(
SEBottleneck
,
(
3
,
4
,
6
,
3
)),
101
:
(
SEBottleneck
,
(
3
,
4
,
23
,
3
)),
152
:
(
SEBottleneck
,
(
3
,
8
,
36
,
3
))
}
def
__init__
(
self
,
depth
,
groups
=
32
,
width_per_group
=
4
,
**
kwargs
):
self
.
groups
=
groups
self
.
width_per_group
=
width_per_group
super
(
SEResNeXt
,
self
).
__init__
(
depth
,
**
kwargs
)
def
make_res_layer
(
self
,
**
kwargs
):
return
ResLayer
(
groups
=
self
.
groups
,
width_per_group
=
self
.
width_per_group
,
base_channels
=
self
.
base_channels
,
**
kwargs
)
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/backbones/shufflenet_v1.py
0 → 100644
View file @
85529f35
import
torch
import
torch.nn
as
nn
import
torch.utils.checkpoint
as
cp
from
mmcv.cnn
import
(
ConvModule
,
build_activation_layer
,
constant_init
,
normal_init
)
from
torch.nn.modules.batchnorm
import
_BatchNorm
from
mmcls.models.utils
import
channel_shuffle
,
make_divisible
from
..builder
import
BACKBONES
from
.base_backbone
import
BaseBackbone
class
ShuffleUnit
(
nn
.
Module
):
"""ShuffleUnit block.
ShuffleNet unit with pointwise group convolution (GConv) and channel
shuffle.
Args:
in_channels (int): The input channels of the ShuffleUnit.
out_channels (int): The output channels of the ShuffleUnit.
groups (int): The number of groups to be used in grouped 1x1
convolutions in each ShuffleUnit. Default: 3
first_block (bool): Whether it is the first ShuffleUnit of a
sequential ShuffleUnits. Default: False, which means not using the
grouped 1x1 convolution.
combine (str): The ways to combine the input and output
branches. Default: 'add'.
conv_cfg (dict, optional): Config dict for convolution layer.
Default: None, which means using conv2d.
norm_cfg (dict): Config dict for normalization layer.
Default: dict(type='BN').
act_cfg (dict): Config dict for activation layer.
Default: dict(type='ReLU').
with_cp (bool): Use checkpoint or not. Using checkpoint
will save some memory while slowing down the training speed.
Default: False.
Returns:
Tensor: The output tensor.
"""
def
__init__
(
self
,
in_channels
,
out_channels
,
groups
=
3
,
first_block
=
True
,
combine
=
'add'
,
conv_cfg
=
None
,
norm_cfg
=
dict
(
type
=
'BN'
),
act_cfg
=
dict
(
type
=
'ReLU'
),
with_cp
=
False
):
super
(
ShuffleUnit
,
self
).
__init__
()
self
.
in_channels
=
in_channels
self
.
out_channels
=
out_channels
self
.
first_block
=
first_block
self
.
combine
=
combine
self
.
groups
=
groups
self
.
bottleneck_channels
=
self
.
out_channels
//
4
self
.
with_cp
=
with_cp
if
self
.
combine
==
'add'
:
self
.
depthwise_stride
=
1
self
.
_combine_func
=
self
.
_add
assert
in_channels
==
out_channels
,
(
'in_channels must be equal to out_channels when combine '
'is add'
)
elif
self
.
combine
==
'concat'
:
self
.
depthwise_stride
=
2
self
.
_combine_func
=
self
.
_concat
self
.
out_channels
-=
self
.
in_channels
self
.
avgpool
=
nn
.
AvgPool2d
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
else
:
raise
ValueError
(
f
'Cannot combine tensors with
{
self
.
combine
}
. '
'Only "add" and "concat" are supported'
)
self
.
first_1x1_groups
=
1
if
first_block
else
self
.
groups
self
.
g_conv_1x1_compress
=
ConvModule
(
in_channels
=
self
.
in_channels
,
out_channels
=
self
.
bottleneck_channels
,
kernel_size
=
1
,
groups
=
self
.
first_1x1_groups
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
act_cfg
=
act_cfg
)
self
.
depthwise_conv3x3_bn
=
ConvModule
(
in_channels
=
self
.
bottleneck_channels
,
out_channels
=
self
.
bottleneck_channels
,
kernel_size
=
3
,
stride
=
self
.
depthwise_stride
,
padding
=
1
,
groups
=
self
.
bottleneck_channels
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
act_cfg
=
None
)
self
.
g_conv_1x1_expand
=
ConvModule
(
in_channels
=
self
.
bottleneck_channels
,
out_channels
=
self
.
out_channels
,
kernel_size
=
1
,
groups
=
self
.
groups
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
act_cfg
=
None
)
self
.
act
=
build_activation_layer
(
act_cfg
)
@
staticmethod
def
_add
(
x
,
out
):
# residual connection
return
x
+
out
@
staticmethod
def
_concat
(
x
,
out
):
# concatenate along channel axis
return
torch
.
cat
((
x
,
out
),
1
)
def
forward
(
self
,
x
):
def
_inner_forward
(
x
):
residual
=
x
out
=
self
.
g_conv_1x1_compress
(
x
)
out
=
self
.
depthwise_conv3x3_bn
(
out
)
if
self
.
groups
>
1
:
out
=
channel_shuffle
(
out
,
self
.
groups
)
out
=
self
.
g_conv_1x1_expand
(
out
)
if
self
.
combine
==
'concat'
:
residual
=
self
.
avgpool
(
residual
)
out
=
self
.
act
(
out
)
out
=
self
.
_combine_func
(
residual
,
out
)
else
:
out
=
self
.
_combine_func
(
residual
,
out
)
out
=
self
.
act
(
out
)
return
out
if
self
.
with_cp
and
x
.
requires_grad
:
out
=
cp
.
checkpoint
(
_inner_forward
,
x
)
else
:
out
=
_inner_forward
(
x
)
return
out
@
BACKBONES
.
register_module
()
class
ShuffleNetV1
(
BaseBackbone
):
"""ShuffleNetV1 backbone.
Args:
groups (int): The number of groups to be used in grouped 1x1
convolutions in each ShuffleUnit. Default: 3.
widen_factor (float): Width multiplier - adjusts the number
of channels in each layer by this amount. Default: 1.0.
out_indices (Sequence[int]): Output from which stages.
Default: (2, )
frozen_stages (int): Stages to be frozen (all param fixed).
Default: -1, which means not freezing any parameters.
conv_cfg (dict, optional): Config dict for convolution layer.
Default: None, which means using conv2d.
norm_cfg (dict): Config dict for normalization layer.
Default: dict(type='BN').
act_cfg (dict): Config dict for activation layer.
Default: dict(type='ReLU').
norm_eval (bool): Whether to set norm layers to eval mode, namely,
freeze running stats (mean and var). Note: Effect on Batch Norm
and its variants only. Default: False.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed. Default: False.
"""
def
__init__
(
self
,
groups
=
3
,
widen_factor
=
1.0
,
out_indices
=
(
2
,
),
frozen_stages
=-
1
,
conv_cfg
=
None
,
norm_cfg
=
dict
(
type
=
'BN'
),
act_cfg
=
dict
(
type
=
'ReLU'
),
norm_eval
=
False
,
with_cp
=
False
,
init_cfg
=
None
):
super
(
ShuffleNetV1
,
self
).
__init__
(
init_cfg
)
self
.
stage_blocks
=
[
4
,
8
,
4
]
self
.
groups
=
groups
for
index
in
out_indices
:
if
index
not
in
range
(
0
,
3
):
raise
ValueError
(
'the item in out_indices must in '
f
'range(0, 3). But received
{
index
}
'
)
if
frozen_stages
not
in
range
(
-
1
,
3
):
raise
ValueError
(
'frozen_stages must be in range(-1, 3). '
f
'But received
{
frozen_stages
}
'
)
self
.
out_indices
=
out_indices
self
.
frozen_stages
=
frozen_stages
self
.
conv_cfg
=
conv_cfg
self
.
norm_cfg
=
norm_cfg
self
.
act_cfg
=
act_cfg
self
.
norm_eval
=
norm_eval
self
.
with_cp
=
with_cp
if
groups
==
1
:
channels
=
(
144
,
288
,
576
)
elif
groups
==
2
:
channels
=
(
200
,
400
,
800
)
elif
groups
==
3
:
channels
=
(
240
,
480
,
960
)
elif
groups
==
4
:
channels
=
(
272
,
544
,
1088
)
elif
groups
==
8
:
channels
=
(
384
,
768
,
1536
)
else
:
raise
ValueError
(
f
'
{
groups
}
groups is not supported for 1x1 '
'Grouped Convolutions'
)
channels
=
[
make_divisible
(
ch
*
widen_factor
,
8
)
for
ch
in
channels
]
self
.
in_channels
=
int
(
24
*
widen_factor
)
self
.
conv1
=
ConvModule
(
in_channels
=
3
,
out_channels
=
self
.
in_channels
,
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
act_cfg
=
act_cfg
)
self
.
maxpool
=
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
self
.
layers
=
nn
.
ModuleList
()
for
i
,
num_blocks
in
enumerate
(
self
.
stage_blocks
):
first_block
=
True
if
i
==
0
else
False
layer
=
self
.
make_layer
(
channels
[
i
],
num_blocks
,
first_block
)
self
.
layers
.
append
(
layer
)
def
_freeze_stages
(
self
):
if
self
.
frozen_stages
>=
0
:
for
param
in
self
.
conv1
.
parameters
():
param
.
requires_grad
=
False
for
i
in
range
(
self
.
frozen_stages
):
layer
=
self
.
layers
[
i
]
layer
.
eval
()
for
param
in
layer
.
parameters
():
param
.
requires_grad
=
False
def
init_weights
(
self
):
super
(
ShuffleNetV1
,
self
).
init_weights
()
for
name
,
m
in
self
.
named_modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
if
'conv1'
in
name
:
normal_init
(
m
,
mean
=
0
,
std
=
0.01
)
else
:
normal_init
(
m
,
mean
=
0
,
std
=
1.0
/
m
.
weight
.
shape
[
1
])
elif
isinstance
(
m
,
(
_BatchNorm
,
nn
.
GroupNorm
)):
constant_init
(
m
.
weight
,
val
=
1
,
bias
=
0.0001
)
if
isinstance
(
m
,
_BatchNorm
):
if
m
.
running_mean
is
not
None
:
nn
.
init
.
constant_
(
m
.
running_mean
,
0
)
def
make_layer
(
self
,
out_channels
,
num_blocks
,
first_block
=
False
):
"""Stack ShuffleUnit blocks to make a layer.
Args:
out_channels (int): out_channels of the block.
num_blocks (int): Number of blocks.
first_block (bool): Whether is the first ShuffleUnit of a
sequential ShuffleUnits. Default: False, which means not using
the grouped 1x1 convolution.
"""
layers
=
[]
for
i
in
range
(
num_blocks
):
first_block
=
first_block
if
i
==
0
else
False
combine_mode
=
'concat'
if
i
==
0
else
'add'
layers
.
append
(
ShuffleUnit
(
self
.
in_channels
,
out_channels
,
groups
=
self
.
groups
,
first_block
=
first_block
,
combine
=
combine_mode
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
,
act_cfg
=
self
.
act_cfg
,
with_cp
=
self
.
with_cp
))
self
.
in_channels
=
out_channels
return
nn
.
Sequential
(
*
layers
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
maxpool
(
x
)
outs
=
[]
for
i
,
layer
in
enumerate
(
self
.
layers
):
x
=
layer
(
x
)
if
i
in
self
.
out_indices
:
outs
.
append
(
x
)
if
len
(
outs
)
==
1
:
return
outs
[
0
]
else
:
return
tuple
(
outs
)
def
train
(
self
,
mode
=
True
):
super
(
ShuffleNetV1
,
self
).
train
(
mode
)
self
.
_freeze_stages
()
if
mode
and
self
.
norm_eval
:
for
m
in
self
.
modules
():
if
isinstance
(
m
,
_BatchNorm
):
m
.
eval
()
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/backbones/shufflenet_v2.py
0 → 100644
View file @
85529f35
import
torch
import
torch.nn
as
nn
import
torch.utils.checkpoint
as
cp
from
mmcv.cnn
import
ConvModule
,
constant_init
,
normal_init
from
torch.nn.modules.batchnorm
import
_BatchNorm
from
mmcls.models.utils
import
channel_shuffle
from
..builder
import
BACKBONES
from
.base_backbone
import
BaseBackbone
class
InvertedResidual
(
nn
.
Module
):
"""InvertedResidual block for ShuffleNetV2 backbone.
Args:
in_channels (int): The input channels of the block.
out_channels (int): The output channels of the block.
stride (int): Stride of the 3x3 convolution layer. Default: 1
conv_cfg (dict, optional): Config dict for convolution layer.
Default: None, which means using conv2d.
norm_cfg (dict): Config dict for normalization layer.
Default: dict(type='BN').
act_cfg (dict): Config dict for activation layer.
Default: dict(type='ReLU').
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed. Default: False.
Returns:
Tensor: The output tensor.
"""
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
=
1
,
conv_cfg
=
None
,
norm_cfg
=
dict
(
type
=
'BN'
),
act_cfg
=
dict
(
type
=
'ReLU'
),
with_cp
=
False
):
super
(
InvertedResidual
,
self
).
__init__
()
self
.
stride
=
stride
self
.
with_cp
=
with_cp
branch_features
=
out_channels
//
2
if
self
.
stride
==
1
:
assert
in_channels
==
branch_features
*
2
,
(
f
'in_channels (
{
in_channels
}
) should equal to '
f
'branch_features * 2 (
{
branch_features
*
2
}
) '
'when stride is 1'
)
if
in_channels
!=
branch_features
*
2
:
assert
self
.
stride
!=
1
,
(
f
'stride (
{
self
.
stride
}
) should not equal 1 when '
f
'in_channels != branch_features * 2'
)
if
self
.
stride
>
1
:
self
.
branch1
=
nn
.
Sequential
(
ConvModule
(
in_channels
,
in_channels
,
kernel_size
=
3
,
stride
=
self
.
stride
,
padding
=
1
,
groups
=
in_channels
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
act_cfg
=
None
),
ConvModule
(
in_channels
,
branch_features
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
act_cfg
=
act_cfg
),
)
self
.
branch2
=
nn
.
Sequential
(
ConvModule
(
in_channels
if
(
self
.
stride
>
1
)
else
branch_features
,
branch_features
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
act_cfg
=
act_cfg
),
ConvModule
(
branch_features
,
branch_features
,
kernel_size
=
3
,
stride
=
self
.
stride
,
padding
=
1
,
groups
=
branch_features
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
act_cfg
=
None
),
ConvModule
(
branch_features
,
branch_features
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
act_cfg
=
act_cfg
))
def
forward
(
self
,
x
):
def
_inner_forward
(
x
):
if
self
.
stride
>
1
:
out
=
torch
.
cat
((
self
.
branch1
(
x
),
self
.
branch2
(
x
)),
dim
=
1
)
else
:
x1
,
x2
=
x
.
chunk
(
2
,
dim
=
1
)
out
=
torch
.
cat
((
x1
,
self
.
branch2
(
x2
)),
dim
=
1
)
out
=
channel_shuffle
(
out
,
2
)
return
out
if
self
.
with_cp
and
x
.
requires_grad
:
out
=
cp
.
checkpoint
(
_inner_forward
,
x
)
else
:
out
=
_inner_forward
(
x
)
return
out
@
BACKBONES
.
register_module
()
class
ShuffleNetV2
(
BaseBackbone
):
"""ShuffleNetV2 backbone.
Args:
widen_factor (float): Width multiplier - adjusts the number of
channels in each layer by this amount. Default: 1.0.
out_indices (Sequence[int]): Output from which stages.
Default: (0, 1, 2, 3).
frozen_stages (int): Stages to be frozen (all param fixed).
Default: -1, which means not freezing any parameters.
conv_cfg (dict, optional): Config dict for convolution layer.
Default: None, which means using conv2d.
norm_cfg (dict): Config dict for normalization layer.
Default: dict(type='BN').
act_cfg (dict): Config dict for activation layer.
Default: dict(type='ReLU').
norm_eval (bool): Whether to set norm layers to eval mode, namely,
freeze running stats (mean and var). Note: Effect on Batch Norm
and its variants only. Default: False.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed. Default: False.
"""
def
__init__
(
self
,
widen_factor
=
1.0
,
out_indices
=
(
3
,
),
frozen_stages
=-
1
,
conv_cfg
=
None
,
norm_cfg
=
dict
(
type
=
'BN'
),
act_cfg
=
dict
(
type
=
'ReLU'
),
norm_eval
=
False
,
with_cp
=
False
,
init_cfg
=
None
):
super
(
ShuffleNetV2
,
self
).
__init__
(
init_cfg
)
self
.
stage_blocks
=
[
4
,
8
,
4
]
for
index
in
out_indices
:
if
index
not
in
range
(
0
,
4
):
raise
ValueError
(
'the item in out_indices must in '
f
'range(0, 4). But received
{
index
}
'
)
if
frozen_stages
not
in
range
(
-
1
,
4
):
raise
ValueError
(
'frozen_stages must be in range(-1, 4). '
f
'But received
{
frozen_stages
}
'
)
self
.
out_indices
=
out_indices
self
.
frozen_stages
=
frozen_stages
self
.
conv_cfg
=
conv_cfg
self
.
norm_cfg
=
norm_cfg
self
.
act_cfg
=
act_cfg
self
.
norm_eval
=
norm_eval
self
.
with_cp
=
with_cp
if
widen_factor
==
0.5
:
channels
=
[
48
,
96
,
192
,
1024
]
elif
widen_factor
==
1.0
:
channels
=
[
116
,
232
,
464
,
1024
]
elif
widen_factor
==
1.5
:
channels
=
[
176
,
352
,
704
,
1024
]
elif
widen_factor
==
2.0
:
channels
=
[
244
,
488
,
976
,
2048
]
else
:
raise
ValueError
(
'widen_factor must be in [0.5, 1.0, 1.5, 2.0]. '
f
'But received
{
widen_factor
}
'
)
self
.
in_channels
=
24
self
.
conv1
=
ConvModule
(
in_channels
=
3
,
out_channels
=
self
.
in_channels
,
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
act_cfg
=
act_cfg
)
self
.
maxpool
=
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
self
.
layers
=
nn
.
ModuleList
()
for
i
,
num_blocks
in
enumerate
(
self
.
stage_blocks
):
layer
=
self
.
_make_layer
(
channels
[
i
],
num_blocks
)
self
.
layers
.
append
(
layer
)
output_channels
=
channels
[
-
1
]
self
.
layers
.
append
(
ConvModule
(
in_channels
=
self
.
in_channels
,
out_channels
=
output_channels
,
kernel_size
=
1
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
act_cfg
=
act_cfg
))
def
_make_layer
(
self
,
out_channels
,
num_blocks
):
"""Stack blocks to make a layer.
Args:
out_channels (int): out_channels of the block.
num_blocks (int): number of blocks.
"""
layers
=
[]
for
i
in
range
(
num_blocks
):
stride
=
2
if
i
==
0
else
1
layers
.
append
(
InvertedResidual
(
in_channels
=
self
.
in_channels
,
out_channels
=
out_channels
,
stride
=
stride
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
,
act_cfg
=
self
.
act_cfg
,
with_cp
=
self
.
with_cp
))
self
.
in_channels
=
out_channels
return
nn
.
Sequential
(
*
layers
)
def
_freeze_stages
(
self
):
if
self
.
frozen_stages
>=
0
:
for
param
in
self
.
conv1
.
parameters
():
param
.
requires_grad
=
False
for
i
in
range
(
self
.
frozen_stages
):
m
=
self
.
layers
[
i
]
m
.
eval
()
for
param
in
m
.
parameters
():
param
.
requires_grad
=
False
def
init_weighs
(
self
):
super
(
ShuffleNetV2
,
self
).
init_weights
()
for
name
,
m
in
self
.
named_modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
if
'conv1'
in
name
:
normal_init
(
m
,
mean
=
0
,
std
=
0.01
)
else
:
normal_init
(
m
,
mean
=
0
,
std
=
1.0
/
m
.
weight
.
shape
[
1
])
elif
isinstance
(
m
,
(
_BatchNorm
,
nn
.
GroupNorm
)):
constant_init
(
m
.
weight
,
val
=
1
,
bias
=
0.0001
)
if
isinstance
(
m
,
_BatchNorm
):
if
m
.
running_mean
is
not
None
:
nn
.
init
.
constant_
(
m
.
running_mean
,
0
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
maxpool
(
x
)
outs
=
[]
for
i
,
layer
in
enumerate
(
self
.
layers
):
x
=
layer
(
x
)
if
i
in
self
.
out_indices
:
outs
.
append
(
x
)
if
len
(
outs
)
==
1
:
return
outs
[
0
]
else
:
return
tuple
(
outs
)
def
train
(
self
,
mode
=
True
):
super
(
ShuffleNetV2
,
self
).
train
(
mode
)
self
.
_freeze_stages
()
if
mode
and
self
.
norm_eval
:
for
m
in
self
.
modules
():
if
isinstance
(
m
,
nn
.
BatchNorm2d
):
m
.
eval
()
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/backbones/vgg.py
0 → 100644
View file @
85529f35
import
torch.nn
as
nn
from
mmcv.cnn
import
ConvModule
from
mmcv.utils.parrots_wrapper
import
_BatchNorm
from
..builder
import
BACKBONES
from
.base_backbone
import
BaseBackbone
def
make_vgg_layer
(
in_channels
,
out_channels
,
num_blocks
,
conv_cfg
=
None
,
norm_cfg
=
None
,
act_cfg
=
dict
(
type
=
'ReLU'
),
dilation
=
1
,
with_norm
=
False
,
ceil_mode
=
False
):
layers
=
[]
for
_
in
range
(
num_blocks
):
layer
=
ConvModule
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
dilation
=
dilation
,
padding
=
dilation
,
bias
=
True
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
act_cfg
=
act_cfg
)
layers
.
append
(
layer
)
in_channels
=
out_channels
layers
.
append
(
nn
.
MaxPool2d
(
kernel_size
=
2
,
stride
=
2
,
ceil_mode
=
ceil_mode
))
return
layers
@
BACKBONES
.
register_module
()
class
VGG
(
BaseBackbone
):
"""VGG backbone.
Args:
depth (int): Depth of vgg, from {11, 13, 16, 19}.
with_norm (bool): Use BatchNorm or not.
num_classes (int): number of classes for classification.
num_stages (int): VGG stages, normally 5.
dilations (Sequence[int]): Dilation of each stage.
out_indices (Sequence[int], optional): Output from which stages.
If only one stage is specified, a single tensor (feature map) is
returned, otherwise multiple stages are specified, a tuple of
tensors will be returned. When it is None, the default behavior
depends on whether num_classes is specified. If num_classes <= 0,
the default value is (4, ), outputing the last feature map before
classifier. If num_classes > 0, the default value is (5, ),
outputing the classification score. Default: None.
frozen_stages (int): Stages to be frozen (all param fixed). -1 means
not freezing any parameters.
norm_eval (bool): Whether to set norm layers to eval mode, namely,
freeze running stats (mean and var). Note: Effect on Batch Norm
and its variants only. Default: False.
ceil_mode (bool): Whether to use ceil_mode of MaxPool. Default: False.
with_last_pool (bool): Whether to keep the last pooling before
classifier. Default: True.
"""
# Parameters to build layers. Each element specifies the number of conv in
# each stage. For example, VGG11 contains 11 layers with learnable
# parameters. 11 is computed as 11 = (1 + 1 + 2 + 2 + 2) + 3,
# where 3 indicates the last three fully-connected layers.
arch_settings
=
{
11
:
(
1
,
1
,
2
,
2
,
2
),
13
:
(
2
,
2
,
2
,
2
,
2
),
16
:
(
2
,
2
,
3
,
3
,
3
),
19
:
(
2
,
2
,
4
,
4
,
4
)
}
def
__init__
(
self
,
depth
,
num_classes
=-
1
,
num_stages
=
5
,
dilations
=
(
1
,
1
,
1
,
1
,
1
),
out_indices
=
None
,
frozen_stages
=-
1
,
conv_cfg
=
None
,
norm_cfg
=
None
,
act_cfg
=
dict
(
type
=
'ReLU'
),
norm_eval
=
False
,
ceil_mode
=
False
,
with_last_pool
=
True
,
init_cfg
=
[
dict
(
type
=
'Kaiming'
,
layer
=
[
'Conv2d'
]),
dict
(
type
=
'Constant'
,
val
=
1.
,
layer
=
[
'_BatchNorm'
]),
dict
(
type
=
'Normal'
,
std
=
0.01
,
layer
=
[
'Linear'
])
]):
super
(
VGG
,
self
).
__init__
(
init_cfg
)
if
depth
not
in
self
.
arch_settings
:
raise
KeyError
(
f
'invalid depth
{
depth
}
for vgg'
)
assert
num_stages
>=
1
and
num_stages
<=
5
stage_blocks
=
self
.
arch_settings
[
depth
]
self
.
stage_blocks
=
stage_blocks
[:
num_stages
]
assert
len
(
dilations
)
==
num_stages
self
.
num_classes
=
num_classes
self
.
frozen_stages
=
frozen_stages
self
.
norm_eval
=
norm_eval
with_norm
=
norm_cfg
is
not
None
if
out_indices
is
None
:
out_indices
=
(
5
,
)
if
num_classes
>
0
else
(
4
,
)
assert
max
(
out_indices
)
<=
num_stages
self
.
out_indices
=
out_indices
self
.
in_channels
=
3
start_idx
=
0
vgg_layers
=
[]
self
.
range_sub_modules
=
[]
for
i
,
num_blocks
in
enumerate
(
self
.
stage_blocks
):
num_modules
=
num_blocks
+
1
end_idx
=
start_idx
+
num_modules
dilation
=
dilations
[
i
]
out_channels
=
64
*
2
**
i
if
i
<
4
else
512
vgg_layer
=
make_vgg_layer
(
self
.
in_channels
,
out_channels
,
num_blocks
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
act_cfg
=
act_cfg
,
dilation
=
dilation
,
with_norm
=
with_norm
,
ceil_mode
=
ceil_mode
)
vgg_layers
.
extend
(
vgg_layer
)
self
.
in_channels
=
out_channels
self
.
range_sub_modules
.
append
([
start_idx
,
end_idx
])
start_idx
=
end_idx
if
not
with_last_pool
:
vgg_layers
.
pop
(
-
1
)
self
.
range_sub_modules
[
-
1
][
1
]
-=
1
self
.
module_name
=
'features'
self
.
add_module
(
self
.
module_name
,
nn
.
Sequential
(
*
vgg_layers
))
if
self
.
num_classes
>
0
:
self
.
classifier
=
nn
.
Sequential
(
nn
.
Linear
(
512
*
7
*
7
,
4096
),
nn
.
ReLU
(
True
),
nn
.
Dropout
(),
nn
.
Linear
(
4096
,
4096
),
nn
.
ReLU
(
True
),
nn
.
Dropout
(),
nn
.
Linear
(
4096
,
num_classes
),
)
def
forward
(
self
,
x
):
outs
=
[]
vgg_layers
=
getattr
(
self
,
self
.
module_name
)
for
i
in
range
(
len
(
self
.
stage_blocks
)):
for
j
in
range
(
*
self
.
range_sub_modules
[
i
]):
vgg_layer
=
vgg_layers
[
j
]
x
=
vgg_layer
(
x
)
if
i
in
self
.
out_indices
:
outs
.
append
(
x
)
if
self
.
num_classes
>
0
:
x
=
x
.
view
(
x
.
size
(
0
),
-
1
)
x
=
self
.
classifier
(
x
)
outs
.
append
(
x
)
if
len
(
outs
)
==
1
:
return
outs
[
0
]
else
:
return
tuple
(
outs
)
def
_freeze_stages
(
self
):
vgg_layers
=
getattr
(
self
,
self
.
module_name
)
for
i
in
range
(
self
.
frozen_stages
):
for
j
in
range
(
*
self
.
range_sub_modules
[
i
]):
m
=
vgg_layers
[
j
]
m
.
eval
()
for
param
in
m
.
parameters
():
param
.
requires_grad
=
False
def
train
(
self
,
mode
=
True
):
super
(
VGG
,
self
).
train
(
mode
)
self
.
_freeze_stages
()
if
mode
and
self
.
norm_eval
:
for
m
in
self
.
modules
():
# trick: eval have effect on BatchNorm only
if
isinstance
(
m
,
_BatchNorm
):
m
.
eval
()
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/backbones/vision_transformer.py
0 → 100644
View file @
85529f35
import
torch
import
torch.nn
as
nn
from
mmcv.cnn
import
(
build_activation_layer
,
build_conv_layer
,
build_norm_layer
,
kaiming_init
)
from
..builder
import
BACKBONES
from
..utils
import
to_2tuple
from
.base_backbone
import
BaseBackbone
class
FFN
(
nn
.
Module
):
"""Implements feed-forward networks (FFNs) with residual connection.
Args:
embed_dims (int): The feature dimension. Same as
`MultiheadAttention`.
feedforward_channels (int): The hidden dimension of FFNs.
num_fcs (int, optional): The number of fully-connected layers in
FFNs. Defaluts to 2.
act_cfg (dict, optional): The activation config for FFNs.
dropout (float, optional): Probability of an element to be
zeroed. Default 0.0.
add_residual (bool, optional): Add resudual connection.
Defaults to False.
"""
def
__init__
(
self
,
embed_dims
,
feedforward_channels
,
num_fcs
=
2
,
act_cfg
=
dict
(
type
=
'GELU'
),
dropout
=
0.0
,
add_residual
=
True
):
super
(
FFN
,
self
).
__init__
()
assert
num_fcs
>=
2
,
'num_fcs should be no less '
\
f
'than 2. got
{
num_fcs
}
.'
self
.
embed_dims
=
embed_dims
self
.
feedforward_channels
=
feedforward_channels
self
.
num_fcs
=
num_fcs
self
.
act_cfg
=
act_cfg
self
.
activate
=
build_activation_layer
(
act_cfg
)
layers
=
nn
.
ModuleList
()
in_channels
=
embed_dims
for
_
in
range
(
num_fcs
-
1
):
layers
.
append
(
nn
.
Sequential
(
nn
.
Linear
(
in_channels
,
feedforward_channels
),
self
.
activate
,
nn
.
Dropout
(
dropout
)))
in_channels
=
feedforward_channels
layers
.
append
(
nn
.
Linear
(
feedforward_channels
,
embed_dims
))
self
.
layers
=
nn
.
Sequential
(
*
layers
)
self
.
dropout
=
nn
.
Dropout
(
dropout
)
self
.
add_residual
=
add_residual
self
.
init_weights
()
def
init_weights
(
self
):
for
m
in
self
.
modules
():
if
isinstance
(
m
,
nn
.
Linear
):
# xavier_init(m, distribution='uniform')
# Bias init is different from our API
# therefore initialize them separately
# The initialization is sync with ClassyVision
nn
.
init
.
xavier_normal_
(
m
.
weight
)
nn
.
init
.
normal_
(
m
.
bias
,
std
=
1e-6
)
def
forward
(
self
,
x
,
residual
=
None
):
"""Forward function for `FFN`."""
out
=
self
.
layers
(
x
)
if
not
self
.
add_residual
:
return
out
if
residual
is
None
:
residual
=
x
return
residual
+
self
.
dropout
(
out
)
def
__repr__
(
self
):
"""str: a string that describes the module"""
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
'(embed_dims=
{
self
.
embed_dims
}
, '
repr_str
+=
f
'feedforward_channels=
{
self
.
feedforward_channels
}
, '
repr_str
+=
f
'num_fcs=
{
self
.
num_fcs
}
, '
repr_str
+=
f
'act_cfg=
{
self
.
act_cfg
}
, '
repr_str
+=
f
'dropout=
{
self
.
dropout
}
, '
repr_str
+=
f
'add_residual=
{
self
.
add_residual
}
)'
return
repr_str
class
MultiheadAttention
(
nn
.
Module
):
"""A warpper for torch.nn.MultiheadAttention.
This module implements MultiheadAttention with residual connection.
Args:
embed_dims (int): The embedding dimension.
num_heads (int): Parallel attention heads. Same as
`nn.MultiheadAttention`.
attn_drop (float): A Dropout layer on attn_output_weights. Default 0.0.
proj_drop (float): The drop out rate after attention. Default 0.0.
"""
def
__init__
(
self
,
embed_dims
,
num_heads
,
attn_drop
=
0.0
,
proj_drop
=
0.0
):
super
(
MultiheadAttention
,
self
).
__init__
()
assert
embed_dims
%
num_heads
==
0
,
'embed_dims must be '
\
f
'divisible by num_heads. got
{
embed_dims
}
and
{
num_heads
}
.'
self
.
embed_dims
=
embed_dims
self
.
num_heads
=
num_heads
self
.
attn
=
nn
.
MultiheadAttention
(
embed_dims
,
num_heads
,
attn_drop
)
self
.
dropout
=
nn
.
Dropout
(
proj_drop
)
def
forward
(
self
,
x
,
key
=
None
,
value
=
None
,
residual
=
None
,
query_pos
=
None
,
key_pos
=
None
,
attn_mask
=
None
,
key_padding_mask
=
None
):
"""Forward function for `MultiheadAttention`.
Args:
x (Tensor): The input query with shape [num_query, bs,
embed_dims]. Same in `nn.MultiheadAttention.forward`.
key (Tensor): The key tensor with shape [num_key, bs,
embed_dims]. Same in `nn.MultiheadAttention.forward`.
Default None. If None, the `query` will be used.
value (Tensor): The value tensor with same shape as `key`.
Same in `nn.MultiheadAttention.forward`. Default None.
If None, the `key` will be used.
residual (Tensor): The tensor used for addition, with the
same shape as `x`. Default None. If None, `x` will be used.
query_pos (Tensor): The positional encoding for query, with
the same shape as `x`. Default None. If not None, it will
be added to `x` before forward function.
key_pos (Tensor): The positional encoding for `key`, with the
same shape as `key`. Default None. If not None, it will
be added to `key` before forward function. If None, and
`query_pos` has the same shape as `key`, then `query_pos`
will be used for `key_pos`.
attn_mask (Tensor): ByteTensor mask with shape [num_query,
num_key]. Same in `nn.MultiheadAttention.forward`.
Default None.
key_padding_mask (Tensor): ByteTensor with shape [bs, num_key].
Same in `nn.MultiheadAttention.forward`. Default None.
Returns:
Tensor: forwarded results with shape [num_query, bs, embed_dims].
"""
query
=
x
if
key
is
None
:
key
=
query
if
value
is
None
:
value
=
key
if
residual
is
None
:
residual
=
x
if
key_pos
is
None
:
if
query_pos
is
not
None
and
key
is
not
None
:
if
query_pos
.
shape
==
key
.
shape
:
key_pos
=
query_pos
if
query_pos
is
not
None
:
query
=
query
+
query_pos
if
key_pos
is
not
None
:
key
=
key
+
key_pos
out
=
self
.
attn
(
query
,
key
,
value
=
value
,
attn_mask
=
attn_mask
,
key_padding_mask
=
key_padding_mask
)[
0
]
return
residual
+
self
.
dropout
(
out
)
class
TransformerEncoderLayer
(
nn
.
Module
):
"""Implements one encoder layer in Vision Transformer.
Args:
embed_dims (int): The feature dimension. Same as `FFN`.
num_heads (int): Parallel attention heads.
feedforward_channels (int): The hidden dimension for FFNs.
attn_drop (float): The drop out rate for attention layer.
Default 0.0.
proj_drop (float): Probability of an element to be zeroed
after the feed forward layer. Default 0.0.
act_cfg (dict): The activation config for FFNs. Defalut GELU.
norm_cfg (dict): Config dict for normalization layer. Default
layer normalization.
num_fcs (int): The number of fully-connected layers for FFNs.
Default 2.
"""
def
__init__
(
self
,
embed_dims
,
num_heads
,
feedforward_channels
,
attn_drop
=
0.
,
proj_drop
=
0.
,
act_cfg
=
dict
(
type
=
'GELU'
),
norm_cfg
=
dict
(
type
=
'LN'
),
num_fcs
=
2
):
super
(
TransformerEncoderLayer
,
self
).
__init__
()
self
.
norm1_name
,
norm1
=
build_norm_layer
(
norm_cfg
,
embed_dims
,
postfix
=
1
)
self
.
add_module
(
self
.
norm1_name
,
norm1
)
self
.
attn
=
MultiheadAttention
(
embed_dims
,
num_heads
=
num_heads
,
attn_drop
=
attn_drop
,
proj_drop
=
proj_drop
)
self
.
norm2_name
,
norm2
=
build_norm_layer
(
norm_cfg
,
embed_dims
,
postfix
=
2
)
self
.
add_module
(
self
.
norm2_name
,
norm2
)
self
.
mlp
=
FFN
(
embed_dims
,
feedforward_channels
,
num_fcs
,
act_cfg
,
proj_drop
)
@
property
def
norm1
(
self
):
return
getattr
(
self
,
self
.
norm1_name
)
@
property
def
norm2
(
self
):
return
getattr
(
self
,
self
.
norm2_name
)
def
forward
(
self
,
x
):
norm_x
=
self
.
norm1
(
x
)
# Reason for permute: as the shape of input from pretrained weight
# from pytorch-image-models is [batch_size, num_query, embed_dim],
# but the one from nn.MultiheadAttention is
# [num_query, batch_size, embed_dim]
x
=
x
.
permute
(
1
,
0
,
2
)
norm_x
=
norm_x
.
permute
(
1
,
0
,
2
)
x
=
self
.
attn
(
norm_x
,
residual
=
x
)
# Convert the shape back to [batch_size, num_query, embed_dim] in
# order to make use of the pretrained weight
x
=
x
.
permute
(
1
,
0
,
2
)
x
=
self
.
mlp
(
self
.
norm2
(
x
),
residual
=
x
)
return
x
class
PatchEmbed
(
nn
.
Module
):
"""Image to Patch Embedding.
Args:
img_size (int | tuple): The size of input image.
patch_size (int): The size of one patch
in_channels (int): The num of input channels.
embed_dim (int): The dimensions of embedding.
conv_cfg (dict | None): The config dict for conv layers.
Default: None.
"""
def
__init__
(
self
,
img_size
=
224
,
patch_size
=
16
,
in_channels
=
3
,
embed_dim
=
768
,
conv_cfg
=
None
):
super
(
PatchEmbed
,
self
).
__init__
()
if
isinstance
(
img_size
,
int
):
img_size
=
to_2tuple
(
img_size
)
elif
isinstance
(
img_size
,
tuple
):
if
len
(
img_size
)
==
1
:
img_size
=
to_2tuple
(
img_size
[
0
])
assert
len
(
img_size
)
==
2
,
\
f
'The size of image should have length 1 or 2, '
\
f
'but got
{
len
(
img_size
)
}
'
self
.
img_size
=
img_size
self
.
patch_size
=
to_2tuple
(
patch_size
)
num_patches
=
(
self
.
img_size
[
1
]
//
self
.
patch_size
[
1
])
*
(
self
.
img_size
[
0
]
//
self
.
patch_size
[
0
])
assert
num_patches
*
self
.
patch_size
[
0
]
*
self
.
patch_size
[
1
]
==
\
self
.
img_size
[
0
]
*
self
.
img_size
[
1
],
\
'The image size H*W must be divisible by patch size'
self
.
num_patches
=
num_patches
# Use conv layer to embed
self
.
projection
=
build_conv_layer
(
conv_cfg
,
in_channels
,
embed_dim
,
kernel_size
=
patch_size
,
stride
=
patch_size
)
self
.
init_weights
()
def
init_weights
(
self
):
# Lecun norm from ClassyVision
kaiming_init
(
self
.
projection
,
mode
=
'fan_in'
,
nonlinearity
=
'linear'
)
def
forward
(
self
,
x
):
B
,
C
,
H
,
W
=
x
.
shape
# FIXME look at relaxing size constraints
assert
H
==
self
.
img_size
[
0
]
and
W
==
self
.
img_size
[
1
],
\
f
"Input image size (
{
H
}
*
{
W
}
) doesn't "
\
f
'match model (
{
self
.
img_size
[
0
]
}
*
{
self
.
img_size
[
1
]
}
).'
# The output size is (B, N, D), where N=H*W/P/P, D is embid_dim
x
=
self
.
projection
(
x
).
flatten
(
2
).
transpose
(
1
,
2
)
return
x
class
HybridEmbed
(
nn
.
Module
):
"""CNN Feature Map Embedding.
Extract feature map from CNN, flatten, project to embedding dim.
"""
def
__init__
(
self
,
backbone
,
img_size
=
224
,
feature_size
=
None
,
in_channels
=
3
,
embed_dim
=
768
,
conv_cfg
=
None
):
super
().
__init__
()
assert
isinstance
(
backbone
,
nn
.
Module
)
if
isinstance
(
img_size
,
int
):
img_size
=
to_2tuple
(
img_size
)
elif
isinstance
(
img_size
,
tuple
):
if
len
(
img_size
)
==
1
:
img_size
=
to_2tuple
(
img_size
[
0
])
assert
len
(
img_size
)
==
2
,
\
f
'The size of image should have length 1 or 2, '
\
f
'but got
{
len
(
img_size
)
}
'
self
.
img_size
=
img_size
self
.
backbone
=
backbone
if
feature_size
is
None
:
with
torch
.
no_grad
():
# FIXME this is hacky, but most reliable way of
# determining the exact dim of the output feature
# map for all networks, the feature metadata has
# reliable channel and stride info, but using
# stride to calc feature dim requires info about padding of
# each stage that isn't captured.
training
=
backbone
.
training
if
training
:
backbone
.
eval
()
o
=
self
.
backbone
(
torch
.
zeros
(
1
,
in_channels
,
img_size
[
0
],
img_size
[
1
]))
if
isinstance
(
o
,
(
list
,
tuple
)):
# last feature if backbone outputs list/tuple of features
o
=
o
[
-
1
]
feature_size
=
o
.
shape
[
-
2
:]
feature_dim
=
o
.
shape
[
1
]
backbone
.
train
(
training
)
else
:
feature_size
=
to_2tuple
(
feature_size
)
if
hasattr
(
self
.
backbone
,
'feature_info'
):
feature_dim
=
self
.
backbone
.
feature_info
.
channels
()[
-
1
]
else
:
feature_dim
=
self
.
backbone
.
num_features
self
.
num_patches
=
feature_size
[
0
]
*
feature_size
[
1
]
# Use conv layer to embed
self
.
projection
=
build_conv_layer
(
conv_cfg
,
feature_dim
,
embed_dim
,
kernel_size
=
1
,
stride
=
1
)
self
.
init_weights
()
def
init_weights
(
self
):
# Lecun norm from ClassyVision
kaiming_init
(
self
.
projection
,
mode
=
'fan_in'
,
nonlinearity
=
'linear'
)
def
forward
(
self
,
x
):
x
=
self
.
backbone
(
x
)
if
isinstance
(
x
,
(
list
,
tuple
)):
# last feature if backbone outputs list/tuple of features
x
=
x
[
-
1
]
x
=
self
.
projection
(
x
).
flatten
(
2
).
transpose
(
1
,
2
)
return
x
@
BACKBONES
.
register_module
()
class
VisionTransformer
(
BaseBackbone
):
""" Vision Transformer
A PyTorch impl of : `An Image is Worth 16x16 Words:
Transformers for Image Recognition at Scale` -
https://arxiv.org/abs/2010.11929
Args:
num_layers (int): Depth of transformer
embed_dim (int): Embedding dimension
num_heads (int): Number of attention heads
img_size (int | tuple): Input image size
patch_size (int | tuple): The patch size
in_channels (int): Number of input channels
feedforward_channels (int): The hidden dimension for FFNs.
drop_rate (float): Probability of an element to be zeroed.
Default 0.0.
attn_drop (float): The drop out rate for attention layer.
Default 0.0.
hybrid_backbone (nn.Module): CNN backbone to use in-place of
PatchEmbed module. Default None.
norm_cfg
norm_cfg (dict): Config dict for normalization layer. Default
layer normalization.
act_cfg (dict): The activation config for FFNs. Defalut GELU.
num_fcs (int): The number of fully-connected layers for FFNs.
Default 2.
"""
def
__init__
(
self
,
num_layers
=
12
,
embed_dim
=
768
,
num_heads
=
12
,
img_size
=
224
,
patch_size
=
16
,
in_channels
=
3
,
feedforward_channels
=
3072
,
drop_rate
=
0.
,
attn_drop_rate
=
0.
,
hybrid_backbone
=
None
,
norm_cfg
=
dict
(
type
=
'LN'
),
act_cfg
=
dict
(
type
=
'GELU'
),
num_fcs
=
2
):
super
(
VisionTransformer
,
self
).
__init__
()
self
.
embed_dim
=
embed_dim
if
hybrid_backbone
is
not
None
:
self
.
patch_embed
=
HybridEmbed
(
hybrid_backbone
,
img_size
=
img_size
,
in_channels
=
in_channels
,
embed_dim
=
embed_dim
)
else
:
self
.
patch_embed
=
PatchEmbed
(
img_size
=
img_size
,
patch_size
=
patch_size
,
in_channels
=
in_channels
,
embed_dim
=
embed_dim
)
num_patches
=
self
.
patch_embed
.
num_patches
self
.
cls_token
=
nn
.
Parameter
(
torch
.
zeros
(
1
,
1
,
embed_dim
))
self
.
pos_embed
=
nn
.
Parameter
(
torch
.
zeros
(
1
,
num_patches
+
1
,
embed_dim
))
self
.
drop_after_pos
=
nn
.
Dropout
(
p
=
drop_rate
)
self
.
layers
=
nn
.
ModuleList
()
for
_
in
range
(
num_layers
):
self
.
layers
.
append
(
TransformerEncoderLayer
(
embed_dim
,
num_heads
,
feedforward_channels
,
attn_drop
=
attn_drop_rate
,
proj_drop
=
drop_rate
,
act_cfg
=
act_cfg
,
norm_cfg
=
norm_cfg
,
num_fcs
=
num_fcs
))
self
.
norm1_name
,
norm1
=
build_norm_layer
(
norm_cfg
,
embed_dim
,
postfix
=
1
)
self
.
add_module
(
self
.
norm1_name
,
norm1
)
self
.
init_weights
()
def
init_weights
(
self
):
super
(
VisionTransformer
,
self
).
init_weights
()
nn
.
init
.
normal_
(
self
.
pos_embed
,
std
=
0.02
)
@
property
def
norm1
(
self
):
return
getattr
(
self
,
self
.
norm1_name
)
def
forward
(
self
,
x
):
B
=
x
.
shape
[
0
]
x
=
self
.
patch_embed
(
x
)
cls_tokens
=
self
.
cls_token
.
expand
(
B
,
-
1
,
-
1
)
# stole cls_tokens impl from Phil Wang, thanks
x
=
torch
.
cat
((
cls_tokens
,
x
),
dim
=
1
)
x
=
x
+
self
.
pos_embed
x
=
self
.
drop_after_pos
(
x
)
for
layer
in
self
.
layers
:
x
=
layer
(
x
)
x
=
self
.
norm1
(
x
)[:,
0
]
return
x
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/builder.py
0 → 100644
View file @
85529f35
from
mmcv.cnn
import
MODELS
as
MMCV_MODELS
from
mmcv.utils
import
Registry
MODELS
=
Registry
(
'models'
,
parent
=
MMCV_MODELS
)
BACKBONES
=
MODELS
NECKS
=
MODELS
HEADS
=
MODELS
LOSSES
=
MODELS
CLASSIFIERS
=
MODELS
def
build_backbone
(
cfg
):
"""Build backbone."""
return
BACKBONES
.
build
(
cfg
)
def
build_neck
(
cfg
):
"""Build neck."""
return
NECKS
.
build
(
cfg
)
def
build_head
(
cfg
):
"""Build head."""
return
HEADS
.
build
(
cfg
)
def
build_loss
(
cfg
):
"""Build loss."""
return
LOSSES
.
build
(
cfg
)
def
build_classifier
(
cfg
):
return
CLASSIFIERS
.
build
(
cfg
)
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/classifiers/__init__.py
0 → 100644
View file @
85529f35
from
.base
import
BaseClassifier
from
.image
import
ImageClassifier
__all__
=
[
'BaseClassifier'
,
'ImageClassifier'
]
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/classifiers/base.py
0 → 100644
View file @
85529f35
import
warnings
from
abc
import
ABCMeta
,
abstractmethod
from
collections
import
OrderedDict
import
cv2
import
mmcv
import
torch
import
torch.distributed
as
dist
from
mmcv
import
color_val
from
mmcv.runner
import
BaseModule
# TODO import `auto_fp16` from mmcv and delete them from mmcls
try
:
from
mmcv.runner
import
auto_fp16
except
ImportError
:
warnings
.
warn
(
'auto_fp16 from mmcls will be deprecated.'
'Please install mmcv>=1.1.4.'
)
from
mmcls.core
import
auto_fp16
class
BaseClassifier
(
BaseModule
,
metaclass
=
ABCMeta
):
"""Base class for classifiers."""
def
__init__
(
self
,
init_cfg
=
None
):
super
(
BaseClassifier
,
self
).
__init__
(
init_cfg
)
self
.
fp16_enabled
=
False
@
property
def
with_neck
(
self
):
return
hasattr
(
self
,
'neck'
)
and
self
.
neck
is
not
None
@
property
def
with_head
(
self
):
return
hasattr
(
self
,
'head'
)
and
self
.
head
is
not
None
@
abstractmethod
def
extract_feat
(
self
,
imgs
):
pass
def
extract_feats
(
self
,
imgs
):
assert
isinstance
(
imgs
,
list
)
for
img
in
imgs
:
yield
self
.
extract_feat
(
img
)
@
abstractmethod
def
forward_train
(
self
,
imgs
,
**
kwargs
):
"""
Args:
img (list[Tensor]): List of tensors of shape (1, C, H, W).
Typically these should be mean centered and std scaled.
kwargs (keyword arguments): Specific to concrete implementation.
"""
pass
@
abstractmethod
def
simple_test
(
self
,
img
,
**
kwargs
):
pass
def
forward_test
(
self
,
imgs
,
**
kwargs
):
"""
Args:
imgs (List[Tensor]): the outer list indicates test-time
augmentations and inner Tensor should have a shape NxCxHxW,
which contains all images in the batch.
"""
if
isinstance
(
imgs
,
torch
.
Tensor
):
imgs
=
[
imgs
]
for
var
,
name
in
[(
imgs
,
'imgs'
)]:
if
not
isinstance
(
var
,
list
):
raise
TypeError
(
f
'
{
name
}
must be a list, but got
{
type
(
var
)
}
'
)
if
len
(
imgs
)
==
1
:
return
self
.
simple_test
(
imgs
[
0
],
**
kwargs
)
else
:
raise
NotImplementedError
(
'aug_test has not been implemented'
)
@
auto_fp16
(
apply_to
=
(
'img'
,
))
def
forward
(
self
,
img
,
return_loss
=
True
,
**
kwargs
):
"""Calls either forward_train or forward_test depending on whether
return_loss=True.
Note this setting will change the expected inputs. When
`return_loss=True`, img and img_meta are single-nested (i.e. Tensor and
List[dict]), and when `resturn_loss=False`, img and img_meta should be
double nested (i.e. List[Tensor], List[List[dict]]), with the outer
list indicating test time augmentations.
"""
if
return_loss
:
return
self
.
forward_train
(
img
,
**
kwargs
)
else
:
return
self
.
forward_test
(
img
,
**
kwargs
)
def
_parse_losses
(
self
,
losses
):
log_vars
=
OrderedDict
()
for
loss_name
,
loss_value
in
losses
.
items
():
if
isinstance
(
loss_value
,
torch
.
Tensor
):
log_vars
[
loss_name
]
=
loss_value
.
mean
()
elif
isinstance
(
loss_value
,
list
):
log_vars
[
loss_name
]
=
sum
(
_loss
.
mean
()
for
_loss
in
loss_value
)
elif
isinstance
(
loss_value
,
dict
):
for
name
,
value
in
loss_value
.
items
():
log_vars
[
name
]
=
value
else
:
raise
TypeError
(
f
'
{
loss_name
}
is not a tensor or list of tensors'
)
loss
=
sum
(
_value
for
_key
,
_value
in
log_vars
.
items
()
if
'loss'
in
_key
)
log_vars
[
'loss'
]
=
loss
for
loss_name
,
loss_value
in
log_vars
.
items
():
# reduce loss when distributed training
if
dist
.
is_available
()
and
dist
.
is_initialized
():
loss_value
=
loss_value
.
data
.
clone
()
dist
.
all_reduce
(
loss_value
.
div_
(
dist
.
get_world_size
()))
log_vars
[
loss_name
]
=
loss_value
.
item
()
return
loss
,
log_vars
def
train_step
(
self
,
data
,
optimizer
):
"""The iteration step during training.
This method defines an iteration step during training, except for the
back propagation and optimizer updating, which are done in an optimizer
hook. Note that in some complicated cases or models, the whole process
including back propagation and optimizer updating are also defined in
this method, such as GAN.
Args:
data (dict): The output of dataloader.
optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of
runner is passed to ``train_step()``. This argument is unused
and reserved.
Returns:
dict: It should contain at least 3 keys: ``loss``, ``log_vars``,
``num_samples``.
``loss`` is a tensor for back propagation, which can be a
weighted sum of multiple losses.
``log_vars`` contains all the variables to be sent to the
logger.
``num_samples`` indicates the batch size (when the model is
DDP, it means the batch size on each GPU), which is used for
averaging the logs.
"""
losses
=
self
(
**
data
)
loss
,
log_vars
=
self
.
_parse_losses
(
losses
)
outputs
=
dict
(
loss
=
loss
,
log_vars
=
log_vars
,
num_samples
=
len
(
data
[
'img'
].
data
))
return
outputs
def
val_step
(
self
,
data
,
optimizer
):
"""The iteration step during validation.
This method shares the same signature as :func:`train_step`, but used
during val epochs. Note that the evaluation after training epochs is
not implemented with this method, but an evaluation hook.
"""
losses
=
self
(
**
data
)
loss
,
log_vars
=
self
.
_parse_losses
(
losses
)
outputs
=
dict
(
loss
=
loss
,
log_vars
=
log_vars
,
num_samples
=
len
(
data
[
'img'
].
data
))
return
outputs
def
show_result
(
self
,
img
,
result
,
text_color
=
'green'
,
font_scale
=
0.5
,
row_width
=
20
,
show
=
False
,
win_name
=
''
,
wait_time
=
0
,
out_file
=
None
):
"""Draw `result` over `img`.
Args:
img (str or Tensor): The image to be displayed.
result (Tensor): The classification results to draw over `img`.
text_color (str or tuple or :obj:`Color`): Color of texts.
font_scale (float): Font scales of texts.
row_width (int): width between each row of results on the image.
show (bool): Whether to show the image.
Default: False.
win_name (str): The window name.
wait_time (int): Value of waitKey param.
Default: 0.
out_file (str or None): The filename to write the image.
Default: None.
Returns:
img (Tensor): Only if not `show` or `out_file`
"""
img
=
mmcv
.
imread
(
img
)
img
=
img
.
copy
()
# write results on left-top of the image
x
,
y
=
0
,
row_width
text_color
=
color_val
(
text_color
)
for
k
,
v
in
result
.
items
():
if
isinstance
(
v
,
float
):
v
=
f
'
{
v
:.
2
f
}
'
label_text
=
f
'
{
k
}
:
{
v
}
'
cv2
.
putText
(
img
,
label_text
,
(
x
,
y
),
cv2
.
FONT_HERSHEY_COMPLEX
,
font_scale
,
text_color
)
y
+=
row_width
# if out_file specified, do not show image in window
if
out_file
is
not
None
:
show
=
False
if
show
:
mmcv
.
imshow
(
img
,
win_name
,
wait_time
)
if
out_file
is
not
None
:
mmcv
.
imwrite
(
img
,
out_file
)
if
not
(
show
or
out_file
):
warnings
.
warn
(
'show==False and out_file is not specified, only '
'result image will be returned'
)
return
img
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/classifiers/image.py
0 → 100644
View file @
85529f35
import
copy
import
warnings
from
..builder
import
CLASSIFIERS
,
build_backbone
,
build_head
,
build_neck
from
..utils.augment
import
Augments
from
.base
import
BaseClassifier
@
CLASSIFIERS
.
register_module
()
class
ImageClassifier
(
BaseClassifier
):
def
__init__
(
self
,
backbone
,
neck
=
None
,
head
=
None
,
pretrained
=
None
,
train_cfg
=
None
,
init_cfg
=
None
):
super
(
ImageClassifier
,
self
).
__init__
(
init_cfg
)
if
pretrained
is
not
None
:
warnings
.
warn
(
'DeprecationWarning: pretrained is a deprecated
\
key, please consider using init_cfg'
)
self
.
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
pretrained
)
self
.
backbone
=
build_backbone
(
backbone
)
if
neck
is
not
None
:
self
.
neck
=
build_neck
(
neck
)
if
head
is
not
None
:
self
.
head
=
build_head
(
head
)
self
.
augments
=
None
if
train_cfg
is
not
None
:
augments_cfg
=
train_cfg
.
get
(
'augments'
,
None
)
if
augments_cfg
is
not
None
:
self
.
augments
=
Augments
(
augments_cfg
)
else
:
# Considering BC-breaking
mixup_cfg
=
train_cfg
.
get
(
'mixup'
,
None
)
cutmix_cfg
=
train_cfg
.
get
(
'cutmix'
,
None
)
assert
mixup_cfg
is
None
or
cutmix_cfg
is
None
,
\
'If mixup and cutmix are set simultaneously,'
\
'use augments instead.'
if
mixup_cfg
is
not
None
:
warnings
.
warn
(
'The mixup attribute will be deprecated. '
'Please use augments instead.'
)
cfg
=
copy
.
deepcopy
(
mixup_cfg
)
cfg
[
'type'
]
=
'BatchMixup'
# In the previous version, mixup_prob is always 1.0.
cfg
[
'prob'
]
=
1.0
self
.
augments
=
Augments
(
cfg
)
if
cutmix_cfg
is
not
None
:
warnings
.
warn
(
'The cutmix attribute will be deprecated. '
'Please use augments instead.'
)
cfg
=
copy
.
deepcopy
(
cutmix_cfg
)
cutmix_prob
=
cfg
.
pop
(
'cutmix_prob'
)
cfg
[
'type'
]
=
'BatchCutMix'
cfg
[
'prob'
]
=
cutmix_prob
self
.
augments
=
Augments
(
cfg
)
def
extract_feat
(
self
,
img
):
"""Directly extract features from the backbone + neck."""
x
=
self
.
backbone
(
img
)
if
self
.
with_neck
:
x
=
self
.
neck
(
x
)
return
x
def
forward_train
(
self
,
img
,
gt_label
,
**
kwargs
):
"""Forward computation during training.
Args:
img (Tensor): of shape (N, C, H, W) encoding input images.
Typically these should be mean centered and std scaled.
gt_label (Tensor): It should be of shape (N, 1) encoding the
ground-truth label of input images for single label task. It
shoulf be of shape (N, C) encoding the ground-truth label
of input images for multi-labels task.
Returns:
dict[str, Tensor]: a dictionary of loss components
"""
if
self
.
augments
is
not
None
:
img
,
gt_label
=
self
.
augments
(
img
,
gt_label
)
x
=
self
.
extract_feat
(
img
)
losses
=
dict
()
loss
=
self
.
head
.
forward_train
(
x
,
gt_label
)
losses
.
update
(
loss
)
return
losses
def
simple_test
(
self
,
img
,
img_metas
):
"""Test without augmentation."""
x
=
self
.
extract_feat
(
img
)
return
self
.
head
.
simple_test
(
x
)
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/heads/__init__.py
0 → 100644
View file @
85529f35
from
.cls_head
import
ClsHead
from
.linear_head
import
LinearClsHead
from
.multi_label_head
import
MultiLabelClsHead
from
.multi_label_linear_head
import
MultiLabelLinearClsHead
from
.vision_transformer_head
import
VisionTransformerClsHead
__all__
=
[
'ClsHead'
,
'LinearClsHead'
,
'MultiLabelClsHead'
,
'MultiLabelLinearClsHead'
,
'VisionTransformerClsHead'
]
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/heads/base_head.py
0 → 100644
View file @
85529f35
from
abc
import
ABCMeta
,
abstractmethod
from
mmcv.runner
import
BaseModule
class
BaseHead
(
BaseModule
,
metaclass
=
ABCMeta
):
"""Base head."""
def
__init__
(
self
,
init_cfg
=
None
):
super
(
BaseHead
,
self
).
__init__
(
init_cfg
)
@
abstractmethod
def
forward_train
(
self
,
x
,
gt_label
,
**
kwargs
):
pass
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/heads/cls_head.py
0 → 100644
View file @
85529f35
import
torch
import
torch.nn.functional
as
F
from
mmcls.models.losses
import
Accuracy
from
..builder
import
HEADS
,
build_loss
from
.base_head
import
BaseHead
@
HEADS
.
register_module
()
class
ClsHead
(
BaseHead
):
"""classification head.
Args:
loss (dict): Config of classification loss.
topk (int | tuple): Top-k accuracy.
cal_acc (bool): Whether to calculate accuracy during training.
If you use Mixup/CutMix or something like that during training,
it is not reasonable to calculate accuracy. Defaults to False.
"""
def
__init__
(
self
,
loss
=
dict
(
type
=
'CrossEntropyLoss'
,
loss_weight
=
1.0
),
topk
=
(
1
,
),
cal_acc
=
False
,
init_cfg
=
None
):
super
(
ClsHead
,
self
).
__init__
(
init_cfg
=
init_cfg
)
assert
isinstance
(
loss
,
dict
)
assert
isinstance
(
topk
,
(
int
,
tuple
))
if
isinstance
(
topk
,
int
):
topk
=
(
topk
,
)
for
_topk
in
topk
:
assert
_topk
>
0
,
'Top-k should be larger than 0'
self
.
topk
=
topk
self
.
compute_loss
=
build_loss
(
loss
)
self
.
compute_accuracy
=
Accuracy
(
topk
=
self
.
topk
)
self
.
cal_acc
=
cal_acc
def
loss
(
self
,
cls_score
,
gt_label
):
num_samples
=
len
(
cls_score
)
losses
=
dict
()
# compute loss
loss
=
self
.
compute_loss
(
cls_score
,
gt_label
,
avg_factor
=
num_samples
)
if
self
.
cal_acc
:
# compute accuracy
acc
=
self
.
compute_accuracy
(
cls_score
,
gt_label
)
assert
len
(
acc
)
==
len
(
self
.
topk
)
losses
[
'accuracy'
]
=
{
f
'top-
{
k
}
'
:
a
for
k
,
a
in
zip
(
self
.
topk
,
acc
)
}
losses
[
'loss'
]
=
loss
return
losses
def
forward_train
(
self
,
cls_score
,
gt_label
):
losses
=
self
.
loss
(
cls_score
,
gt_label
)
return
losses
def
simple_test
(
self
,
cls_score
):
"""Test without augmentation."""
if
isinstance
(
cls_score
,
list
):
cls_score
=
sum
(
cls_score
)
/
float
(
len
(
cls_score
))
pred
=
F
.
softmax
(
cls_score
,
dim
=
1
)
if
cls_score
is
not
None
else
None
on_trace
=
hasattr
(
torch
.
jit
,
'is_tracing'
)
and
torch
.
jit
.
is_tracing
()
if
torch
.
onnx
.
is_in_onnx_export
()
or
on_trace
:
return
pred
pred
=
list
(
pred
.
detach
().
cpu
().
numpy
())
return
pred
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/heads/linear_head.py
0 → 100644
View file @
85529f35
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
..builder
import
HEADS
from
.cls_head
import
ClsHead
@
HEADS
.
register_module
()
class
LinearClsHead
(
ClsHead
):
"""Linear classifier head.
Args:
num_classes (int): Number of categories excluding the background
category.
in_channels (int): Number of channels in the input feature map.
"""
def
__init__
(
self
,
num_classes
,
in_channels
,
init_cfg
=
None
,
*
args
,
**
kwargs
):
init_cfg
=
init_cfg
or
dict
(
type
=
'Normal'
,
mean
=
0.
,
std
=
0.01
,
bias
=
0.
,
override
=
dict
(
name
=
'fc'
))
super
(
LinearClsHead
,
self
).
__init__
(
init_cfg
=
init_cfg
,
*
args
,
**
kwargs
)
self
.
in_channels
=
in_channels
self
.
num_classes
=
num_classes
if
self
.
num_classes
<=
0
:
raise
ValueError
(
f
'num_classes=
{
num_classes
}
must be a positive integer'
)
self
.
_init_layers
()
def
_init_layers
(
self
):
self
.
fc
=
nn
.
Linear
(
self
.
in_channels
,
self
.
num_classes
)
def
simple_test
(
self
,
img
):
"""Test without augmentation."""
cls_score
=
self
.
fc
(
img
)
if
isinstance
(
cls_score
,
list
):
cls_score
=
sum
(
cls_score
)
/
float
(
len
(
cls_score
))
pred
=
F
.
softmax
(
cls_score
,
dim
=
1
)
if
cls_score
is
not
None
else
None
on_trace
=
hasattr
(
torch
.
jit
,
'is_tracing'
)
and
torch
.
jit
.
is_tracing
()
if
torch
.
onnx
.
is_in_onnx_export
()
or
on_trace
:
return
pred
pred
=
list
(
pred
.
detach
().
cpu
().
numpy
())
return
pred
def
forward_train
(
self
,
x
,
gt_label
):
cls_score
=
self
.
fc
(
x
)
losses
=
self
.
loss
(
cls_score
,
gt_label
)
return
losses
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/heads/multi_label_head.py
0 → 100644
View file @
85529f35
import
torch
import
torch.nn.functional
as
F
from
..builder
import
HEADS
,
build_loss
from
.base_head
import
BaseHead
@
HEADS
.
register_module
()
class
MultiLabelClsHead
(
BaseHead
):
"""Classification head for multilabel task.
Args:
loss (dict): Config of classification loss.
"""
def
__init__
(
self
,
loss
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
reduction
=
'mean'
,
loss_weight
=
1.0
),
init_cfg
=
None
):
super
(
MultiLabelClsHead
,
self
).
__init__
(
init_cfg
=
init_cfg
)
assert
isinstance
(
loss
,
dict
)
self
.
compute_loss
=
build_loss
(
loss
)
def
loss
(
self
,
cls_score
,
gt_label
):
gt_label
=
gt_label
.
type_as
(
cls_score
)
num_samples
=
len
(
cls_score
)
losses
=
dict
()
# map difficult examples to positive ones
_gt_label
=
torch
.
abs
(
gt_label
)
# compute loss
loss
=
self
.
compute_loss
(
cls_score
,
_gt_label
,
avg_factor
=
num_samples
)
losses
[
'loss'
]
=
loss
return
losses
def
forward_train
(
self
,
cls_score
,
gt_label
):
gt_label
=
gt_label
.
type_as
(
cls_score
)
losses
=
self
.
loss
(
cls_score
,
gt_label
)
return
losses
def
simple_test
(
self
,
cls_score
):
if
isinstance
(
cls_score
,
list
):
cls_score
=
sum
(
cls_score
)
/
float
(
len
(
cls_score
))
pred
=
F
.
sigmoid
(
cls_score
)
if
cls_score
is
not
None
else
None
on_trace
=
hasattr
(
torch
.
jit
,
'is_tracing'
)
and
torch
.
jit
.
is_tracing
()
if
torch
.
onnx
.
is_in_onnx_export
()
or
on_trace
:
return
pred
pred
=
list
(
pred
.
detach
().
cpu
().
numpy
())
return
pred
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/heads/multi_label_linear_head.py
0 → 100644
View file @
85529f35
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
..builder
import
HEADS
from
.multi_label_head
import
MultiLabelClsHead
@
HEADS
.
register_module
()
class
MultiLabelLinearClsHead
(
MultiLabelClsHead
):
"""Linear classification head for multilabel task.
Args:
num_classes (int): Number of categories.
in_channels (int): Number of channels in the input feature map.
loss (dict): Config of classification loss.
"""
def
__init__
(
self
,
num_classes
,
in_channels
,
loss
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
reduction
=
'mean'
,
loss_weight
=
1.0
),
init_cfg
=
dict
(
type
=
'Normal'
,
mean
=
0.
,
std
=
0.01
,
bias
=
0.
,
override
=
dict
(
name
=
'fc'
))):
super
(
MultiLabelLinearClsHead
,
self
).
__init__
(
loss
=
loss
,
init_cfg
=
init_cfg
)
if
num_classes
<=
0
:
raise
ValueError
(
f
'num_classes=
{
num_classes
}
must be a positive integer'
)
self
.
in_channels
=
in_channels
self
.
num_classes
=
num_classes
self
.
_init_layers
()
def
_init_layers
(
self
):
self
.
fc
=
nn
.
Linear
(
self
.
in_channels
,
self
.
num_classes
)
def
forward_train
(
self
,
x
,
gt_label
):
gt_label
=
gt_label
.
type_as
(
x
)
cls_score
=
self
.
fc
(
x
)
losses
=
self
.
loss
(
cls_score
,
gt_label
)
return
losses
def
simple_test
(
self
,
img
):
"""Test without augmentation."""
cls_score
=
self
.
fc
(
img
)
if
isinstance
(
cls_score
,
list
):
cls_score
=
sum
(
cls_score
)
/
float
(
len
(
cls_score
))
pred
=
F
.
sigmoid
(
cls_score
)
if
cls_score
is
not
None
else
None
on_trace
=
hasattr
(
torch
.
jit
,
'is_tracing'
)
and
torch
.
jit
.
is_tracing
()
if
torch
.
onnx
.
is_in_onnx_export
()
or
on_trace
:
return
pred
pred
=
list
(
pred
.
detach
().
cpu
().
numpy
())
return
pred
openmmlab_test/mmclassification-speed-benchmark/mmcls/models/heads/vision_transformer_head.py
0 → 100644
View file @
85529f35
from
collections
import
OrderedDict
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
mmcv.cnn
import
build_activation_layer
,
constant_init
,
kaiming_init
from
..builder
import
HEADS
from
.cls_head
import
ClsHead
@
HEADS
.
register_module
()
class
VisionTransformerClsHead
(
ClsHead
):
"""Vision Transformer classifier head.
Args:
num_classes (int): Number of categories excluding the background
category.
in_channels (int): Number of channels in the input feature map.
hidden_dim (int): Number of the dimensions for hidden layer. Only
available during pre-training. Default None.
act_cfg (dict): The activation config. Only available during
pre-training. Defalut Tanh.
"""
def
__init__
(
self
,
num_classes
,
in_channels
,
hidden_dim
=
None
,
act_cfg
=
dict
(
type
=
'Tanh'
),
*
args
,
**
kwargs
):
super
(
VisionTransformerClsHead
,
self
).
__init__
(
*
args
,
**
kwargs
)
self
.
in_channels
=
in_channels
self
.
num_classes
=
num_classes
self
.
hidden_dim
=
hidden_dim
self
.
act_cfg
=
act_cfg
if
self
.
num_classes
<=
0
:
raise
ValueError
(
f
'num_classes=
{
num_classes
}
must be a positive integer'
)
self
.
_init_layers
()
def
_init_layers
(
self
):
if
self
.
hidden_dim
is
None
:
layers
=
[(
'head'
,
nn
.
Linear
(
self
.
in_channels
,
self
.
num_classes
))]
else
:
layers
=
[
(
'pre_logits'
,
nn
.
Linear
(
self
.
in_channels
,
self
.
hidden_dim
)),
(
'act'
,
build_activation_layer
(
self
.
act_cfg
)),
(
'head'
,
nn
.
Linear
(
self
.
hidden_dim
,
self
.
num_classes
)),
]
self
.
layers
=
nn
.
Sequential
(
OrderedDict
(
layers
))
def
init_weights
(
self
):
super
(
VisionTransformerClsHead
,
self
).
init_weights
()
# Modified from ClassyVision
if
hasattr
(
self
.
layers
,
'pre_logits'
):
# Lecun norm
kaiming_init
(
self
.
layers
.
pre_logits
,
mode
=
'fan_in'
,
nonlinearity
=
'linear'
)
constant_init
(
self
.
layers
.
head
,
0
)
def
simple_test
(
self
,
img
):
"""Test without augmentation."""
cls_score
=
self
.
layers
(
img
)
if
isinstance
(
cls_score
,
list
):
cls_score
=
sum
(
cls_score
)
/
float
(
len
(
cls_score
))
pred
=
F
.
softmax
(
cls_score
,
dim
=
1
)
if
cls_score
is
not
None
else
None
on_trace
=
hasattr
(
torch
.
jit
,
'is_tracing'
)
and
torch
.
jit
.
is_tracing
()
if
torch
.
onnx
.
is_in_onnx_export
()
or
on_trace
:
return
pred
pred
=
list
(
pred
.
detach
().
cpu
().
numpy
())
return
pred
def
forward_train
(
self
,
x
,
gt_label
):
cls_score
=
self
.
layers
(
x
)
losses
=
self
.
loss
(
cls_score
,
gt_label
)
return
losses
Prev
1
…
12
13
14
15
16
17
18
19
20
…
49
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment