Commit 1e2486af authored by sunxx1's avatar sunxx1
Browse files

添加inception_v3测试代码

parents
import torch
import torch.nn as nn
__all__ = ['mnasnet']
class _InvertedResidual(nn.Module):
def __init__(self, in_ch, out_ch, kernel_size, stride, expansion_factor):
super(_InvertedResidual, self).__init__()
assert stride in [1, 2]
assert kernel_size in [3, 5]
mid_ch = in_ch * expansion_factor
self.apply_residual = (in_ch == out_ch and stride == 1)
self.layers = nn.Sequential(
# Pointwise
nn.Conv2d(in_ch, mid_ch, 1, bias=False),
nn.BatchNorm2d(mid_ch),
nn.ReLU(inplace=True),
# Depthwise
nn.Conv2d(mid_ch,
mid_ch,
kernel_size,
padding=kernel_size // 2,
stride=stride,
groups=mid_ch,
bias=False),
nn.BatchNorm2d(mid_ch),
nn.ReLU(inplace=True),
# Linear pointwise. Note that there's no activation.
nn.Conv2d(mid_ch, out_ch, 1, bias=False),
nn.BatchNorm2d(out_ch))
def forward(self, input):
if self.apply_residual:
return self.layers(input) + input
else:
return self.layers(input)
def _stack(in_ch, out_ch, kernel_size, stride, exp_factor, repeats):
""" Creates a stack of inverted residuals. """
assert repeats >= 1
# First one has no skip, because feature map size changes.
first = _InvertedResidual(in_ch, out_ch, kernel_size, stride, exp_factor)
remaining = []
for _ in range(1, repeats):
remaining.append(
_InvertedResidual(out_ch, out_ch, kernel_size, 1, exp_factor))
return nn.Sequential(first, *remaining)
def _round_to_multiple_of(val, divisor, round_up_bias=0.9):
""" Asymmetric rounding to make `val` divisible by `divisor`. With default
bias, will round up, unless the number is no more than 10% greater than the
smaller divisible value, i.e. (83, 8) -> 80, but (84, 8) -> 88. """
assert 0.0 < round_up_bias < 1.0
new_val = max(divisor, int(val + divisor / 2) // divisor * divisor)
return new_val if new_val >= round_up_bias * val else new_val + divisor
def _get_depths(scale):
""" Scales tensor depths as in reference MobileNet code, prefers rouding up
rather than down. """
depths = [32, 16, 24, 40, 80, 96, 192, 320]
return [_round_to_multiple_of(depth * scale, 8) for depth in depths]
class MNASNet(torch.nn.Module):
# Version 2 adds depth scaling in the initial stages of the network.
_version = 2
def __init__(self, scale, num_classes=1000, dropout=0.2):
super(MNASNet, self).__init__()
assert scale > 0.0
self.scale = scale
self.num_classes = num_classes
depths = _get_depths(scale)
layers = [
# First layer: regular conv.
nn.Conv2d(3, depths[0], 3, padding=1, stride=2, bias=False),
nn.BatchNorm2d(depths[0]),
nn.ReLU(inplace=True),
# Depthwise separable, no skip.
nn.Conv2d(depths[0],
depths[0],
3,
padding=1,
stride=1,
groups=depths[0],
bias=False),
nn.BatchNorm2d(depths[0]),
nn.ReLU(inplace=True),
nn.Conv2d(depths[0], depths[1], 1, padding=0, stride=1,
bias=False),
nn.BatchNorm2d(depths[1]),
# MNASNet blocks: stacks of inverted residuals.
_stack(depths[1], depths[2], 3, 2, 3, 3),
_stack(depths[2], depths[3], 5, 2, 3, 3),
_stack(depths[3], depths[4], 5, 2, 6, 3),
_stack(depths[4], depths[5], 3, 1, 6, 2),
_stack(depths[5], depths[6], 5, 2, 6, 4),
_stack(depths[6], depths[7], 3, 1, 6, 1),
# Final mapping to classifier input.
nn.Conv2d(depths[7], 1280, 1, padding=0, stride=1, bias=False),
nn.BatchNorm2d(1280),
nn.ReLU(inplace=True),
]
self.layers = nn.Sequential(*layers)
self.classifier = nn.Sequential(nn.Dropout(p=dropout, inplace=True),
nn.Linear(1280, num_classes))
self._initialize_weights()
def forward(self, x):
x = self.layers(x)
# Equivalent to global avgpool and removing H and W dimensions.
x = x.mean([2, 3])
return self.classifier(x)
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight,
mode="fan_out",
nonlinearity="relu")
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
nn.init.kaiming_uniform_(m.weight,
mode="fan_out",
nonlinearity="sigmoid")
nn.init.zeros_(m.bias)
def mnasnet(**kwargs):
model = MNASNet(**kwargs)
return model
import torch.nn as nn
from torch.nn import init
__all__ = ["mobile_v1"]
class MobileNetV1(nn.Module):
def __init__(self, scale=1.0, num_classes=1000, bn_group=None):
super(MobileNetV1, self).__init__()
BN = nn.BatchNorm2d
self.scale = scale
def conv_bn(inp, oup, stride):
return nn.Sequential(nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
BN(oup), nn.ReLU(inplace=True))
def conv_dw(inp, oup, stride):
inp = int(inp * scale)
oup = int(oup * scale)
return nn.Sequential(
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
BN(inp),
nn.ReLU(inplace=True),
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
BN(oup),
nn.ReLU(inplace=True),
)
self.model = nn.Sequential(
conv_bn(3, int(32 * scale), 2),
conv_dw(32, 64, 1),
conv_dw(64, 128, 2),
conv_dw(128, 128, 1),
conv_dw(128, 256, 2),
conv_dw(256, 256, 1),
conv_dw(256, 512, 2),
conv_dw(512, 512, 1),
conv_dw(512, 512, 1),
conv_dw(512, 512, 1),
conv_dw(512, 512, 1),
conv_dw(512, 512, 1),
conv_dw(512, 1024, 2),
conv_dw(1024, 1024, 1),
nn.AvgPool2d(7),
)
self.fc = nn.Linear(int(1024 * scale), num_classes)
self.init_params()
def init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.01)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, x):
x = self.model(x)
x = x.view(-1, int(1024 * self.scale))
x = self.fc(x)
return x
def mobile_v1(**kwargs):
model = MobileNetV1(**kwargs)
return model
import torch.nn as nn
__all__ = ['mobile_v2']
def conv_bn(inp, oup, stride):
return nn.Sequential(nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
nn.BatchNorm2d(oup), nn.ReLU6(inplace=False))
def conv_1x1_bn(inp, oup):
return nn.Sequential(nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup), nn.ReLU6(inplace=False))
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride, expand_ratio):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
hidden_dim = round(inp * expand_ratio)
self.use_res_connect = self.stride == 1 and inp == oup
if expand_ratio == 1:
self.conv = nn.Sequential(
nn.Conv2d(hidden_dim,
hidden_dim,
3,
stride,
1,
groups=hidden_dim,
bias=False),
nn.BatchNorm2d(hidden_dim),
nn.ReLU6(inplace=False),
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
)
else:
self.conv = nn.Sequential(
nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
nn.BatchNorm2d(hidden_dim),
nn.ReLU6(inplace=False),
nn.Conv2d(hidden_dim,
hidden_dim,
3,
stride,
1,
groups=hidden_dim,
bias=False),
nn.BatchNorm2d(hidden_dim),
nn.ReLU6(inplace=False),
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV2(nn.Module):
def __init__(self,
scale=1,
num_classes=1000,
input_size=224,
width_mult=1.):
super(MobileNetV2, self).__init__()
block = InvertedResidual
input_channel = 32
last_channel = 1280
interverted_residual_setting = [
[1, 16, 1, 1],
[6, 24, 2, 2],
[6, 32, 3, 2],
[6, 64, 4, 2],
[6, 96, 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]
assert input_size % 32 == 0
input_channel = int(input_channel * width_mult)
self.last_channel = int(
last_channel * width_mult) if width_mult > 1.0 else last_channel
self.features = [conv_bn(3, input_channel, 2)]
for t, c, n, s in interverted_residual_setting:
output_channel = int(c * width_mult)
for i in range(n):
if i == 0:
self.features.append(
block(input_channel, output_channel, s,
expand_ratio=t))
else:
self.features.append(
block(input_channel, output_channel, 1,
expand_ratio=t))
input_channel = output_channel
self.features.append(conv_1x1_bn(input_channel, self.last_channel))
self.features = nn.Sequential(*self.features)
self.classifier = nn.Sequential(
nn.Dropout(0.2),
# nn.Conv2d(self.last_channel, num_classes, kernel_size=1))
nn.Linear(self.last_channel, num_classes))
self._initialize_weights()
def forward(self, x):
x = self.features(x)
x = x.mean(3).mean(2)
x = self.classifier(x)
# x = x.view(x.size(0), -1)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, std=0.001)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def mobile_v2(**kwargs):
model = MobileNetV2(**kwargs)
return model
import torch.nn as nn
import torch.nn.functional as F
__all__ = ['mobile_v3']
def _make_divisible(v, divisor, min_value=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
:param v:
:param divisor:
:param min_value:
:return:
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
def conv_bn(inp, oup, stride, activation=nn.ReLU):
return nn.Sequential(nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
nn.BatchNorm2d(oup), activation(inplace=True))
def conv_1x1_bn(inp, oup, activation=nn.ReLU):
return nn.Sequential(nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup), activation(inplace=True))
class Hswish(nn.Module):
def __init__(self, inplace=True):
super(Hswish, self).__init__()
self.inplace = inplace
def forward(self, x):
return x * F.relu6(x + 3., inplace=self.inplace) / 6.
class Hsigmoid(nn.Module):
def __init__(self, inplace=True):
super(Hsigmoid, self).__init__()
self.inplace = inplace
def forward(self, x):
return F.relu6(x + 3., inplace=self.inplace) / 6.
class SEModule(nn.Module):
def __init__(self, channel, reduction=4):
super(SEModule, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Linear(channel, channel // reduction, bias=False),
nn.ReLU(inplace=True),
nn.Linear(channel // reduction, channel, bias=False), Hsigmoid())
def forward(self, x):
b, c, _, _ = x.size()
y = self.avg_pool(x).view(b, c)
y = self.fc(y).view(b, c, 1, 1)
return x * y.expand_as(x)
class Identity(nn.Module):
def __init__(self, channel):
super(Identity, self).__init__()
def forward(self, x):
return x
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, kernel, stride, exp, se=False, nl='RE'):
super(InvertedResidual, self).__init__()
assert stride in [1, 2]
assert kernel in [3, 5]
padding = (kernel - 1) // 2
self.use_res_connect = stride == 1 and inp == oup
if nl == 'RE':
activation = nn.ReLU
elif nl == 'HS':
activation = Hswish
else:
raise NotImplementedError
SELayer = SEModule if se else Identity
layers = []
if inp != exp:
# pw
layers.extend([
nn.Conv2d(inp, exp, 1, 1, 0, bias=False),
nn.BatchNorm2d(exp),
activation(inplace=True),
])
layers.extend([
# dw
nn.Conv2d(exp,
exp,
kernel,
stride,
padding,
groups=exp,
bias=False),
nn.BatchNorm2d(exp),
SELayer(exp),
activation(inplace=True),
# pw-linear
nn.Conv2d(exp, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
])
self.conv = nn.Sequential(*layers)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV3(nn.Module):
def __init__(self,
num_classes=1000,
scale=1.0,
dropout=0.8,
round_nearest=8,
mode='small',
bn=None):
super(MobileNetV3, self).__init__()
input_channel = 16
last_channel = 1280
if mode == 'large':
mobile_setting = [
[3, 16, 16, False, 'RE', 1],
[3, 64, 24, False, 'RE', 2],
[3, 72, 24, False, 'RE', 1],
[5, 72, 40, True, 'RE', 2],
[5, 120, 40, True, 'RE', 1],
[5, 120, 40, True, 'RE', 1],
[3, 240, 80, False, 'HS', 2],
[3, 200, 80, False, 'HS', 1],
[3, 184, 80, False, 'HS', 1],
[3, 184, 80, False, 'HS', 1],
[3, 480, 112, True, 'HS', 1],
[3, 672, 112, True, 'HS', 1],
[5, 672, 160, True, 'HS', 2],
[5, 960, 160, True, 'HS', 1],
[5, 960, 160, True, 'HS', 1],
]
elif mode == 'small':
mobile_setting = [
[3, 16, 16, True, 'RE', 2],
[3, 72, 24, False, 'RE', 2],
[3, 88, 24, False, 'RE', 1],
[5, 96, 40, True, 'HS', 2],
[5, 240, 40, True, 'HS', 1],
[5, 240, 40, True, 'HS', 1],
[5, 120, 48, True, 'HS', 1],
[5, 144, 48, True, 'HS', 1],
[5, 288, 96, True, 'HS', 2],
[5, 576, 96, True, 'HS', 1],
[5, 576, 96, True, 'HS', 1],
]
else:
raise NotImplementedError
# building first layer
last_channel = _make_divisible(
last_channel *
scale, round_nearest) if scale > 1.0 else last_channel
self.features = [conv_bn(3, input_channel, 2, activation=Hswish)]
self.classifier = []
# building mobile blocks
for k, exp, c, se, nl, s in mobile_setting:
output_channel = _make_divisible(c * scale, round_nearest)
exp_channel = _make_divisible(exp * scale, round_nearest)
self.features.append(
InvertedResidual(input_channel, output_channel, k, s,
exp_channel, se, nl))
input_channel = output_channel
# building last several layers
if mode == 'large':
last_conv = _make_divisible(960 * scale, round_nearest)
self.features.append(
conv_1x1_bn(input_channel, last_conv, activation=Hswish))
self.features.append(nn.AdaptiveAvgPool2d(1))
self.features.append(nn.Conv2d(last_conv, last_channel, 1, 1, 0))
self.features.append(Hswish(inplace=True))
elif mode == 'small':
last_conv = _make_divisible(576 * scale, round_nearest)
self.features.append(
conv_1x1_bn(input_channel, last_conv, activation=Hswish))
self.features.append(nn.AdaptiveAvgPool2d(1))
self.features.append(nn.Conv2d(last_conv, last_channel, 1, 1, 0))
self.features.append(Hswish(inplace=True))
else:
raise NotImplementedError
self.features = nn.Sequential(*self.features)
self.classifier = nn.Sequential(
nn.Dropout(p=dropout),
nn.Linear(last_channel, num_classes),
)
self.init_params()
def forward(self, x):
x = self.features(x)
x = x.mean([2, 3])
x = self.classifier(x)
return x
def init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
if m.weight is not None:
nn.init.constant_(m.weight, 1)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, std=0.001)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def mobile_v3(**kwargs):
model = MobileNetV3(**kwargs)
return model
import torch
import torch.nn as nn
__all__ = ['nasnetamobile', 'nasnetalarge']
class MaxPoolPad(nn.Module):
def __init__(self):
super(MaxPoolPad, self).__init__()
self.pad = nn.ZeroPad2d((1, 0, 1, 0))
self.pool = nn.MaxPool2d(3, stride=2, padding=1)
def forward(self, x):
x = self.pad(x)
x = self.pool(x)
x = x[:, :, 1:, 1:]
return x
class AvgPoolPad(nn.Module):
def __init__(self, stride=2, padding=1):
super(AvgPoolPad, self).__init__()
self.pad = nn.ZeroPad2d((1, 0, 1, 0))
self.pool = nn.AvgPool2d(3,
stride=stride,
padding=padding,
count_include_pad=False)
def forward(self, x):
x = self.pad(x)
x = self.pool(x)
x = x[:, :, 1:, 1:]
return x
class SeparableConv2d(nn.Module):
def __init__(self,
in_channels,
out_channels,
dw_kernel,
dw_stride,
dw_padding,
bias=False):
super(SeparableConv2d, self).__init__()
self.depthwise_conv2d = nn.Conv2d(in_channels,
in_channels,
dw_kernel,
stride=dw_stride,
padding=dw_padding,
bias=bias,
groups=in_channels)
self.pointwise_conv2d = nn.Conv2d(in_channels,
out_channels,
1,
stride=1,
bias=bias)
def forward(self, x):
x = self.depthwise_conv2d(x)
x = self.pointwise_conv2d(x)
return x
class BranchSeparables(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
bias=False):
super(BranchSeparables, self).__init__()
self.relu = nn.ReLU()
self.separable_1 = SeparableConv2d(in_channels,
in_channels,
kernel_size,
stride,
padding,
bias=bias)
self.bn_sep_1 = nn.BatchNorm2d(in_channels,
eps=0.001,
momentum=0.1,
affine=True)
self.relu1 = nn.ReLU()
self.separable_2 = SeparableConv2d(in_channels,
out_channels,
kernel_size,
1,
padding,
bias=bias)
self.bn_sep_2 = nn.BatchNorm2d(out_channels,
eps=0.001,
momentum=0.1,
affine=True)
def forward(self, x):
x = self.relu(x)
x = self.separable_1(x)
x = self.bn_sep_1(x)
x = self.relu1(x)
x = self.separable_2(x)
x = self.bn_sep_2(x)
return x
class BranchSeparablesStem(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
bias=False):
super(BranchSeparablesStem, self).__init__()
self.relu = nn.ReLU()
self.separable_1 = SeparableConv2d(in_channels,
out_channels,
kernel_size,
stride,
padding,
bias=bias)
self.bn_sep_1 = nn.BatchNorm2d(out_channels,
eps=0.001,
momentum=0.1,
affine=True)
self.relu1 = nn.ReLU()
self.separable_2 = SeparableConv2d(out_channels,
out_channels,
kernel_size,
1,
padding,
bias=bias)
self.bn_sep_2 = nn.BatchNorm2d(out_channels,
eps=0.001,
momentum=0.1,
affine=True)
def forward(self, x):
x = self.relu(x)
x = self.separable_1(x)
x = self.bn_sep_1(x)
x = self.relu1(x)
x = self.separable_2(x)
x = self.bn_sep_2(x)
return x
class BranchSeparablesReduction(BranchSeparables):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
z_padding=1,
bias=False):
BranchSeparables.__init__(self, in_channels, out_channels, kernel_size,
stride, padding, bias)
self.padding = nn.ZeroPad2d((z_padding, 0, z_padding, 0))
def forward(self, x):
x = self.relu(x)
x = self.padding(x)
x = self.separable_1(x)
x = x[:, :, 1:, 1:].contiguous()
x = self.bn_sep_1(x)
x = self.relu1(x)
x = self.separable_2(x)
x = self.bn_sep_2(x)
return x
class CellStem0(nn.Module):
def __init__(self, stem_filters, num_filters=42):
super(CellStem0, self).__init__()
self.num_filters = num_filters
self.stem_filters = stem_filters
self.conv_1x1 = nn.Sequential()
self.conv_1x1.add_module('relu', nn.ReLU())
self.conv_1x1.add_module(
'conv',
nn.Conv2d(self.stem_filters,
self.num_filters,
1,
stride=1,
bias=False))
self.conv_1x1.add_module(
'bn',
nn.BatchNorm2d(self.num_filters,
eps=0.001,
momentum=0.1,
affine=True))
self.comb_iter_0_left = BranchSeparables(self.num_filters,
self.num_filters, 5, 2, 2)
self.comb_iter_0_right = BranchSeparablesStem(self.stem_filters,
self.num_filters,
7,
2,
3,
bias=False)
self.comb_iter_1_left = nn.MaxPool2d(3, stride=2, padding=1)
self.comb_iter_1_right = BranchSeparablesStem(self.stem_filters,
self.num_filters,
7,
2,
3,
bias=False)
self.comb_iter_2_left = nn.AvgPool2d(3,
stride=2,
padding=1,
count_include_pad=False)
self.comb_iter_2_right = BranchSeparablesStem(self.stem_filters,
self.num_filters,
5,
2,
2,
bias=False)
self.comb_iter_3_right = nn.AvgPool2d(3,
stride=1,
padding=1,
count_include_pad=False)
self.comb_iter_4_left = BranchSeparables(self.num_filters,
self.num_filters,
3,
1,
1,
bias=False)
self.comb_iter_4_right = nn.MaxPool2d(3, stride=2, padding=1)
def forward(self, x):
x1 = self.conv_1x1(x)
x_comb_iter_0_left = self.comb_iter_0_left(x1)
x_comb_iter_0_right = self.comb_iter_0_right(x)
x_comb_iter_0 = x_comb_iter_0_left + x_comb_iter_0_right
x_comb_iter_1_left = self.comb_iter_1_left(x1)
x_comb_iter_1_right = self.comb_iter_1_right(x)
x_comb_iter_1 = x_comb_iter_1_left + x_comb_iter_1_right
x_comb_iter_2_left = self.comb_iter_2_left(x1)
x_comb_iter_2_right = self.comb_iter_2_right(x)
x_comb_iter_2 = x_comb_iter_2_left + x_comb_iter_2_right
x_comb_iter_3_right = self.comb_iter_3_right(x_comb_iter_0)
x_comb_iter_3 = x_comb_iter_3_right + x_comb_iter_1
x_comb_iter_4_left = self.comb_iter_4_left(x_comb_iter_0)
x_comb_iter_4_right = self.comb_iter_4_right(x1)
x_comb_iter_4 = x_comb_iter_4_left + x_comb_iter_4_right
x_out = torch.cat(
[x_comb_iter_1, x_comb_iter_2, x_comb_iter_3, x_comb_iter_4], 1)
return x_out
class CellStem1(nn.Module):
def __init__(self, stem_filters, num_filters):
super(CellStem1, self).__init__()
self.num_filters = num_filters
self.stem_filters = stem_filters
self.conv_1x1 = nn.Sequential()
self.conv_1x1.add_module('relu', nn.ReLU())
self.conv_1x1.add_module(
'conv',
nn.Conv2d(2 * self.num_filters,
self.num_filters,
1,
stride=1,
bias=False))
self.conv_1x1.add_module(
'bn',
nn.BatchNorm2d(self.num_filters,
eps=0.001,
momentum=0.1,
affine=True))
self.relu = nn.ReLU()
self.path_1 = nn.Sequential()
self.path_1.add_module(
'avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False))
self.path_1.add_module(
'conv',
nn.Conv2d(self.stem_filters,
self.num_filters // 2,
1,
stride=1,
bias=False))
self.path_2 = nn.ModuleList()
self.path_2.add_module('pad', nn.ZeroPad2d((0, 1, 0, 1)))
self.path_2.add_module(
'avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False))
self.path_2.add_module(
'conv',
nn.Conv2d(self.stem_filters,
self.num_filters // 2,
1,
stride=1,
bias=False))
self.final_path_bn = nn.BatchNorm2d(self.num_filters,
eps=0.001,
momentum=0.1,
affine=True)
self.comb_iter_0_left = BranchSeparables(self.num_filters,
self.num_filters,
5,
2,
2,
bias=False)
self.comb_iter_0_right = BranchSeparables(self.num_filters,
self.num_filters,
7,
2,
3,
bias=False)
self.comb_iter_1_left = nn.MaxPool2d(3, stride=2, padding=1)
self.comb_iter_1_right = BranchSeparables(self.num_filters,
self.num_filters,
7,
2,
3,
bias=False)
self.comb_iter_2_left = nn.AvgPool2d(3,
stride=2,
padding=1,
count_include_pad=False)
self.comb_iter_2_right = BranchSeparables(self.num_filters,
self.num_filters,
5,
2,
2,
bias=False)
self.comb_iter_3_right = nn.AvgPool2d(3,
stride=1,
padding=1,
count_include_pad=False)
self.comb_iter_4_left = BranchSeparables(self.num_filters,
self.num_filters,
3,
1,
1,
bias=False)
self.comb_iter_4_right = nn.MaxPool2d(3, stride=2, padding=1)
def forward(self, x_conv0, x_stem_0):
x_left = self.conv_1x1(x_stem_0)
x_relu = self.relu(x_conv0)
# path 1
x_path1 = self.path_1(x_relu)
# path 2
x_path2 = self.path_2.pad(x_relu)
x_path2 = x_path2[:, :, 1:, 1:]
x_path2 = self.path_2.avgpool(x_path2)
x_path2 = self.path_2.conv(x_path2)
# final path
x_right = self.final_path_bn(torch.cat([x_path1, x_path2], 1))
x_comb_iter_0_left = self.comb_iter_0_left(x_left)
x_comb_iter_0_right = self.comb_iter_0_right(x_right)
x_comb_iter_0 = x_comb_iter_0_left + x_comb_iter_0_right
x_comb_iter_1_left = self.comb_iter_1_left(x_left)
x_comb_iter_1_right = self.comb_iter_1_right(x_right)
x_comb_iter_1 = x_comb_iter_1_left + x_comb_iter_1_right
x_comb_iter_2_left = self.comb_iter_2_left(x_left)
x_comb_iter_2_right = self.comb_iter_2_right(x_right)
x_comb_iter_2 = x_comb_iter_2_left + x_comb_iter_2_right
x_comb_iter_3_right = self.comb_iter_3_right(x_comb_iter_0)
x_comb_iter_3 = x_comb_iter_3_right + x_comb_iter_1
x_comb_iter_4_left = self.comb_iter_4_left(x_comb_iter_0)
x_comb_iter_4_right = self.comb_iter_4_right(x_left)
x_comb_iter_4 = x_comb_iter_4_left + x_comb_iter_4_right
x_out = torch.cat(
[x_comb_iter_1, x_comb_iter_2, x_comb_iter_3, x_comb_iter_4], 1)
return x_out
class FirstCell(nn.Module):
def __init__(self, in_channels_left, out_channels_left, in_channels_right,
out_channels_right):
super(FirstCell, self).__init__()
self.conv_1x1 = nn.Sequential()
self.conv_1x1.add_module('relu', nn.ReLU())
self.conv_1x1.add_module(
'conv',
nn.Conv2d(in_channels_right,
out_channels_right,
1,
stride=1,
bias=False))
self.conv_1x1.add_module(
'bn',
nn.BatchNorm2d(out_channels_right,
eps=0.001,
momentum=0.1,
affine=True))
self.relu = nn.ReLU()
self.path_1 = nn.Sequential()
self.path_1.add_module(
'avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False))
self.path_1.add_module(
'conv',
nn.Conv2d(in_channels_left,
out_channels_left,
1,
stride=1,
bias=False))
self.path_2 = nn.ModuleList()
self.path_2.add_module('pad', nn.ZeroPad2d((0, 1, 0, 1)))
self.path_2.add_module(
'avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False))
self.path_2.add_module(
'conv',
nn.Conv2d(in_channels_left,
out_channels_left,
1,
stride=1,
bias=False))
self.final_path_bn = nn.BatchNorm2d(out_channels_left * 2,
eps=0.001,
momentum=0.1,
affine=True)
self.comb_iter_0_left = BranchSeparables(out_channels_right,
out_channels_right,
5,
1,
2,
bias=False)
self.comb_iter_0_right = BranchSeparables(out_channels_right,
out_channels_right,
3,
1,
1,
bias=False)
self.comb_iter_1_left = BranchSeparables(out_channels_right,
out_channels_right,
5,
1,
2,
bias=False)
self.comb_iter_1_right = BranchSeparables(out_channels_right,
out_channels_right,
3,
1,
1,
bias=False)
self.comb_iter_2_left = nn.AvgPool2d(3,
stride=1,
padding=1,
count_include_pad=False)
self.comb_iter_3_left = nn.AvgPool2d(3,
stride=1,
padding=1,
count_include_pad=False)
self.comb_iter_3_right = nn.AvgPool2d(3,
stride=1,
padding=1,
count_include_pad=False)
self.comb_iter_4_left = BranchSeparables(out_channels_right,
out_channels_right,
3,
1,
1,
bias=False)
def forward(self, x, x_prev):
x_relu = self.relu(x_prev)
# path 1
x_path1 = self.path_1(x_relu)
# path 2
x_path2 = self.path_2.pad(x_relu)
x_path2 = x_path2[:, :, 1:, 1:]
x_path2 = self.path_2.avgpool(x_path2)
x_path2 = self.path_2.conv(x_path2)
# final path
x_left = self.final_path_bn(torch.cat([x_path1, x_path2], 1))
x_right = self.conv_1x1(x)
x_comb_iter_0_left = self.comb_iter_0_left(x_right)
x_comb_iter_0_right = self.comb_iter_0_right(x_left)
x_comb_iter_0 = x_comb_iter_0_left + x_comb_iter_0_right
x_comb_iter_1_left = self.comb_iter_1_left(x_left)
x_comb_iter_1_right = self.comb_iter_1_right(x_left)
x_comb_iter_1 = x_comb_iter_1_left + x_comb_iter_1_right
x_comb_iter_2_left = self.comb_iter_2_left(x_right)
x_comb_iter_2 = x_comb_iter_2_left + x_left
x_comb_iter_3_left = self.comb_iter_3_left(x_left)
x_comb_iter_3_right = self.comb_iter_3_right(x_left)
x_comb_iter_3 = x_comb_iter_3_left + x_comb_iter_3_right
x_comb_iter_4_left = self.comb_iter_4_left(x_right)
x_comb_iter_4 = x_comb_iter_4_left + x_right
x_out = torch.cat([
x_left, x_comb_iter_0, x_comb_iter_1, x_comb_iter_2, x_comb_iter_3,
x_comb_iter_4
], 1)
return x_out
class NormalCell(nn.Module):
def __init__(self, in_channels_left, out_channels_left, in_channels_right,
out_channels_right):
super(NormalCell, self).__init__()
self.conv_prev_1x1 = nn.Sequential()
self.conv_prev_1x1.add_module('relu', nn.ReLU())
self.conv_prev_1x1.add_module(
'conv',
nn.Conv2d(in_channels_left,
out_channels_left,
1,
stride=1,
bias=False))
self.conv_prev_1x1.add_module(
'bn',
nn.BatchNorm2d(out_channels_left,
eps=0.001,
momentum=0.1,
affine=True))
self.conv_1x1 = nn.Sequential()
self.conv_1x1.add_module('relu', nn.ReLU())
self.conv_1x1.add_module(
'conv',
nn.Conv2d(in_channels_right,
out_channels_right,
1,
stride=1,
bias=False))
self.conv_1x1.add_module(
'bn',
nn.BatchNorm2d(out_channels_right,
eps=0.001,
momentum=0.1,
affine=True))
self.comb_iter_0_left = BranchSeparables(out_channels_right,
out_channels_right,
5,
1,
2,
bias=False)
self.comb_iter_0_right = BranchSeparables(out_channels_left,
out_channels_left,
3,
1,
1,
bias=False)
self.comb_iter_1_left = BranchSeparables(out_channels_left,
out_channels_left,
5,
1,
2,
bias=False)
self.comb_iter_1_right = BranchSeparables(out_channels_left,
out_channels_left,
3,
1,
1,
bias=False)
self.comb_iter_2_left = nn.AvgPool2d(3,
stride=1,
padding=1,
count_include_pad=False)
self.comb_iter_3_left = nn.AvgPool2d(3,
stride=1,
padding=1,
count_include_pad=False)
self.comb_iter_3_right = nn.AvgPool2d(3,
stride=1,
padding=1,
count_include_pad=False)
self.comb_iter_4_left = BranchSeparables(out_channels_right,
out_channels_right,
3,
1,
1,
bias=False)
def forward(self, x, x_prev):
x_left = self.conv_prev_1x1(x_prev)
x_right = self.conv_1x1(x)
x_comb_iter_0_left = self.comb_iter_0_left(x_right)
x_comb_iter_0_right = self.comb_iter_0_right(x_left)
x_comb_iter_0 = x_comb_iter_0_left + x_comb_iter_0_right
x_comb_iter_1_left = self.comb_iter_1_left(x_left)
x_comb_iter_1_right = self.comb_iter_1_right(x_left)
x_comb_iter_1 = x_comb_iter_1_left + x_comb_iter_1_right
x_comb_iter_2_left = self.comb_iter_2_left(x_right)
x_comb_iter_2 = x_comb_iter_2_left + x_left
x_comb_iter_3_left = self.comb_iter_3_left(x_left)
x_comb_iter_3_right = self.comb_iter_3_right(x_left)
x_comb_iter_3 = x_comb_iter_3_left + x_comb_iter_3_right
x_comb_iter_4_left = self.comb_iter_4_left(x_right)
x_comb_iter_4 = x_comb_iter_4_left + x_right
x_out = torch.cat([
x_left, x_comb_iter_0, x_comb_iter_1, x_comb_iter_2, x_comb_iter_3,
x_comb_iter_4
], 1)
return x_out
class ReductionCell0(nn.Module):
def __init__(self, in_channels_left, out_channels_left, in_channels_right,
out_channels_right):
super(ReductionCell0, self).__init__()
self.conv_prev_1x1 = nn.Sequential()
self.conv_prev_1x1.add_module('relu', nn.ReLU())
self.conv_prev_1x1.add_module(
'conv',
nn.Conv2d(in_channels_left,
out_channels_left,
1,
stride=1,
bias=False))
self.conv_prev_1x1.add_module(
'bn',
nn.BatchNorm2d(out_channels_left,
eps=0.001,
momentum=0.1,
affine=True))
self.conv_1x1 = nn.Sequential()
self.conv_1x1.add_module('relu', nn.ReLU())
self.conv_1x1.add_module(
'conv',
nn.Conv2d(in_channels_right,
out_channels_right,
1,
stride=1,
bias=False))
self.conv_1x1.add_module(
'bn',
nn.BatchNorm2d(out_channels_right,
eps=0.001,
momentum=0.1,
affine=True))
self.comb_iter_0_left = BranchSeparablesReduction(out_channels_right,
out_channels_right,
5,
2,
2,
bias=False)
self.comb_iter_0_right = BranchSeparablesReduction(out_channels_right,
out_channels_right,
7,
2,
3,
bias=False)
self.comb_iter_1_left = MaxPoolPad()
self.comb_iter_1_right = BranchSeparablesReduction(out_channels_right,
out_channels_right,
7,
2,
3,
bias=False)
self.comb_iter_2_left = AvgPoolPad()
self.comb_iter_2_right = BranchSeparablesReduction(out_channels_right,
out_channels_right,
5,
2,
2,
bias=False)
self.comb_iter_3_right = nn.AvgPool2d(3,
stride=1,
padding=1,
count_include_pad=False)
self.comb_iter_4_left = BranchSeparablesReduction(out_channels_right,
out_channels_right,
3,
1,
1,
bias=False)
self.comb_iter_4_right = MaxPoolPad()
def forward(self, x, x_prev):
x_left = self.conv_prev_1x1(x_prev)
x_right = self.conv_1x1(x)
x_comb_iter_0_left = self.comb_iter_0_left(x_right)
x_comb_iter_0_right = self.comb_iter_0_right(x_left)
x_comb_iter_0 = x_comb_iter_0_left + x_comb_iter_0_right
x_comb_iter_1_left = self.comb_iter_1_left(x_right)
x_comb_iter_1_right = self.comb_iter_1_right(x_left)
x_comb_iter_1 = x_comb_iter_1_left + x_comb_iter_1_right
x_comb_iter_2_left = self.comb_iter_2_left(x_right)
x_comb_iter_2_right = self.comb_iter_2_right(x_left)
x_comb_iter_2 = x_comb_iter_2_left + x_comb_iter_2_right
x_comb_iter_3_right = self.comb_iter_3_right(x_comb_iter_0)
x_comb_iter_3 = x_comb_iter_3_right + x_comb_iter_1
x_comb_iter_4_left = self.comb_iter_4_left(x_comb_iter_0)
x_comb_iter_4_right = self.comb_iter_4_right(x_right)
x_comb_iter_4 = x_comb_iter_4_left + x_comb_iter_4_right
x_out = torch.cat(
[x_comb_iter_1, x_comb_iter_2, x_comb_iter_3, x_comb_iter_4], 1)
return x_out
class ReductionCell1(nn.Module):
def __init__(self, in_channels_left, out_channels_left, in_channels_right,
out_channels_right):
super(ReductionCell1, self).__init__()
self.conv_prev_1x1 = nn.Sequential()
self.conv_prev_1x1.add_module('relu', nn.ReLU())
self.conv_prev_1x1.add_module(
'conv',
nn.Conv2d(in_channels_left,
out_channels_left,
1,
stride=1,
bias=False))
self.conv_prev_1x1.add_module(
'bn',
nn.BatchNorm2d(out_channels_left,
eps=0.001,
momentum=0.1,
affine=True))
self.conv_1x1 = nn.Sequential()
self.conv_1x1.add_module('relu', nn.ReLU())
self.conv_1x1.add_module(
'conv',
nn.Conv2d(in_channels_right,
out_channels_right,
1,
stride=1,
bias=False))
self.conv_1x1.add_module(
'bn',
nn.BatchNorm2d(out_channels_right,
eps=0.001,
momentum=0.1,
affine=True))
self.comb_iter_0_left = BranchSeparables(out_channels_right,
out_channels_right,
5,
2,
2,
bias=False)
self.comb_iter_0_right = BranchSeparables(out_channels_right,
out_channels_right,
7,
2,
3,
bias=False)
self.comb_iter_1_left = nn.MaxPool2d(3, stride=2, padding=1)
self.comb_iter_1_right = BranchSeparables(out_channels_right,
out_channels_right,
7,
2,
3,
bias=False)
self.comb_iter_2_left = nn.AvgPool2d(3,
stride=2,
padding=1,
count_include_pad=False)
self.comb_iter_2_right = BranchSeparables(out_channels_right,
out_channels_right,
5,
2,
2,
bias=False)
self.comb_iter_3_right = nn.AvgPool2d(3,
stride=1,
padding=1,
count_include_pad=False)
self.comb_iter_4_left = BranchSeparables(out_channels_right,
out_channels_right,
3,
1,
1,
bias=False)
self.comb_iter_4_right = nn.MaxPool2d(3, stride=2, padding=1)
def forward(self, x, x_prev):
x_left = self.conv_prev_1x1(x_prev)
x_right = self.conv_1x1(x)
x_comb_iter_0_left = self.comb_iter_0_left(x_right)
x_comb_iter_0_right = self.comb_iter_0_right(x_left)
x_comb_iter_0 = x_comb_iter_0_left + x_comb_iter_0_right
x_comb_iter_1_left = self.comb_iter_1_left(x_right)
x_comb_iter_1_right = self.comb_iter_1_right(x_left)
x_comb_iter_1 = x_comb_iter_1_left + x_comb_iter_1_right
x_comb_iter_2_left = self.comb_iter_2_left(x_right)
x_comb_iter_2_right = self.comb_iter_2_right(x_left)
x_comb_iter_2 = x_comb_iter_2_left + x_comb_iter_2_right
x_comb_iter_3_right = self.comb_iter_3_right(x_comb_iter_0)
x_comb_iter_3 = x_comb_iter_3_right + x_comb_iter_1
x_comb_iter_4_left = self.comb_iter_4_left(x_comb_iter_0)
x_comb_iter_4_right = self.comb_iter_4_right(x_right)
x_comb_iter_4 = x_comb_iter_4_left + x_comb_iter_4_right
x_out = torch.cat(
[x_comb_iter_1, x_comb_iter_2, x_comb_iter_3, x_comb_iter_4], 1)
return x_out
class NASNetAMobile(nn.Module):
"""NASNetAMobile (4 @ 1056) """
def __init__(self,
num_classes=1000,
stem_filters=32,
penultimate_filters=1056,
filters_multiplier=2):
super(NASNetAMobile, self).__init__()
self.num_classes = num_classes
self.stem_filters = stem_filters
self.penultimate_filters = penultimate_filters
self.filters_multiplier = filters_multiplier
filters = self.penultimate_filters // 24
# 24 is default value for the architecture
self.conv0 = nn.Sequential()
self.conv0.add_module(
'conv',
nn.Conv2d(in_channels=3,
out_channels=self.stem_filters,
kernel_size=3,
padding=0,
stride=2,
bias=False))
self.conv0.add_module(
'bn',
nn.BatchNorm2d(self.stem_filters,
eps=0.001,
momentum=0.1,
affine=True))
self.cell_stem_0 = CellStem0(self.stem_filters,
num_filters=filters //
(filters_multiplier**2))
self.cell_stem_1 = CellStem1(self.stem_filters,
num_filters=filters // filters_multiplier)
self.cell_0 = FirstCell(
in_channels_left=filters,
out_channels_left=filters // 2, # 1, 0.5
in_channels_right=2 * filters,
out_channels_right=filters) # 2, 1
self.cell_1 = NormalCell(
in_channels_left=2 * filters,
out_channels_left=filters, # 2, 1
in_channels_right=6 * filters,
out_channels_right=filters) # 6, 1
self.cell_2 = NormalCell(
in_channels_left=6 * filters,
out_channels_left=filters, # 6, 1
in_channels_right=6 * filters,
out_channels_right=filters) # 6, 1
self.cell_3 = NormalCell(
in_channels_left=6 * filters,
out_channels_left=filters, # 6, 1
in_channels_right=6 * filters,
out_channels_right=filters) # 6, 1
self.reduction_cell_0 = ReductionCell0(
in_channels_left=6 * filters,
out_channels_left=2 * filters, # 6, 2
in_channels_right=6 * filters,
out_channels_right=2 * filters) # 6, 2
self.cell_6 = FirstCell(
in_channels_left=6 * filters,
out_channels_left=filters, # 6, 1
in_channels_right=8 * filters,
out_channels_right=2 * filters) # 8, 2
self.cell_7 = NormalCell(
in_channels_left=8 * filters,
out_channels_left=2 * filters, # 8, 2
in_channels_right=12 * filters,
out_channels_right=2 * filters) # 12, 2
self.cell_8 = NormalCell(
in_channels_left=12 * filters,
out_channels_left=2 * filters, # 12, 2
in_channels_right=12 * filters,
out_channels_right=2 * filters) # 12, 2
self.cell_9 = NormalCell(
in_channels_left=12 * filters,
out_channels_left=2 * filters, # 12, 2
in_channels_right=12 * filters,
out_channels_right=2 * filters) # 12, 2
self.reduction_cell_1 = ReductionCell1(
in_channels_left=12 * filters,
out_channels_left=4 * filters, # 12, 4
in_channels_right=12 * filters,
out_channels_right=4 * filters) # 12, 4
self.cell_12 = FirstCell(
in_channels_left=12 * filters,
out_channels_left=2 * filters, # 12, 2
in_channels_right=16 * filters,
out_channels_right=4 * filters) # 16, 4
self.cell_13 = NormalCell(
in_channels_left=16 * filters,
out_channels_left=4 * filters, # 16, 4
in_channels_right=24 * filters,
out_channels_right=4 * filters) # 24, 4
self.cell_14 = NormalCell(
in_channels_left=24 * filters,
out_channels_left=4 * filters, # 24, 4
in_channels_right=24 * filters,
out_channels_right=4 * filters) # 24, 4
self.cell_15 = NormalCell(
in_channels_left=24 * filters,
out_channels_left=4 * filters, # 24, 4
in_channels_right=24 * filters,
out_channels_right=4 * filters) # 24, 4
self.relu = nn.ReLU()
self.avg_pool = nn.AvgPool2d(7, stride=1, padding=0)
self.dropout = nn.Dropout()
self.last_linear = nn.Linear(24 * filters, self.num_classes)
def features(self, input):
x_conv0 = self.conv0(input)
x_stem_0 = self.cell_stem_0(x_conv0)
x_stem_1 = self.cell_stem_1(x_conv0, x_stem_0)
x_cell_0 = self.cell_0(x_stem_1, x_stem_0)
x_cell_1 = self.cell_1(x_cell_0, x_stem_1)
x_cell_2 = self.cell_2(x_cell_1, x_cell_0)
x_cell_3 = self.cell_3(x_cell_2, x_cell_1)
x_reduction_cell_0 = self.reduction_cell_0(x_cell_3, x_cell_2)
x_cell_6 = self.cell_6(x_reduction_cell_0, x_cell_3)
x_cell_7 = self.cell_7(x_cell_6, x_reduction_cell_0)
x_cell_8 = self.cell_8(x_cell_7, x_cell_6)
x_cell_9 = self.cell_9(x_cell_8, x_cell_7)
x_reduction_cell_1 = self.reduction_cell_1(x_cell_9, x_cell_8)
x_cell_12 = self.cell_12(x_reduction_cell_1, x_cell_9)
x_cell_13 = self.cell_13(x_cell_12, x_reduction_cell_1)
x_cell_14 = self.cell_14(x_cell_13, x_cell_12)
x_cell_15 = self.cell_15(x_cell_14, x_cell_13)
return x_cell_15
def logits(self, features):
x = self.relu(features)
x = self.avg_pool(x)
x = x.view(x.size(0), -1)
x = self.dropout(x)
x = self.last_linear(x)
return x
def forward(self, input):
x = self.features(input)
x = self.logits(x)
return x
def nasnetamobile(num_classes=1000):
model = NASNetAMobile(num_classes=num_classes)
return model
class NASNetALarge(nn.Module):
"""NASNetALarge (6 @ 4032) """
def __init__(self,
num_classes=1001,
stem_filters=96,
penultimate_filters=4032,
filters_multiplier=2):
super(NASNetALarge, self).__init__()
self.num_classes = num_classes
self.stem_filters = stem_filters
self.penultimate_filters = penultimate_filters
self.filters_multiplier = filters_multiplier
filters = self.penultimate_filters // 24
# 24 is default value for the architecture
self.conv0 = nn.Sequential()
self.conv0.add_module(
'conv',
nn.Conv2d(in_channels=3,
out_channels=self.stem_filters,
kernel_size=3,
padding=0,
stride=2,
bias=False))
self.conv0.add_module(
'bn',
nn.BatchNorm2d(self.stem_filters,
eps=0.001,
momentum=0.1,
affine=True))
self.cell_stem_0 = CellStem0(self.stem_filters,
num_filters=filters //
(filters_multiplier**2))
self.cell_stem_1 = CellStem1(self.stem_filters,
num_filters=filters // filters_multiplier)
self.cell_0 = FirstCell(in_channels_left=filters,
out_channels_left=filters // 2,
in_channels_right=2 * filters,
out_channels_right=filters)
self.cell_1 = NormalCell(in_channels_left=2 * filters,
out_channels_left=filters,
in_channels_right=6 * filters,
out_channels_right=filters)
self.cell_2 = NormalCell(in_channels_left=6 * filters,
out_channels_left=filters,
in_channels_right=6 * filters,
out_channels_right=filters)
self.cell_3 = NormalCell(in_channels_left=6 * filters,
out_channels_left=filters,
in_channels_right=6 * filters,
out_channels_right=filters)
self.cell_4 = NormalCell(in_channels_left=6 * filters,
out_channels_left=filters,
in_channels_right=6 * filters,
out_channels_right=filters)
self.cell_5 = NormalCell(in_channels_left=6 * filters,
out_channels_left=filters,
in_channels_right=6 * filters,
out_channels_right=filters)
self.reduction_cell_0 = ReductionCell0(in_channels_left=6 * filters,
out_channels_left=2 * filters,
in_channels_right=6 * filters,
out_channels_right=2 * filters)
self.cell_6 = FirstCell(in_channels_left=6 * filters,
out_channels_left=filters,
in_channels_right=8 * filters,
out_channels_right=2 * filters)
self.cell_7 = NormalCell(in_channels_left=8 * filters,
out_channels_left=2 * filters,
in_channels_right=12 * filters,
out_channels_right=2 * filters)
self.cell_8 = NormalCell(in_channels_left=12 * filters,
out_channels_left=2 * filters,
in_channels_right=12 * filters,
out_channels_right=2 * filters)
self.cell_9 = NormalCell(in_channels_left=12 * filters,
out_channels_left=2 * filters,
in_channels_right=12 * filters,
out_channels_right=2 * filters)
self.cell_10 = NormalCell(in_channels_left=12 * filters,
out_channels_left=2 * filters,
in_channels_right=12 * filters,
out_channels_right=2 * filters)
self.cell_11 = NormalCell(in_channels_left=12 * filters,
out_channels_left=2 * filters,
in_channels_right=12 * filters,
out_channels_right=2 * filters)
self.reduction_cell_1 = ReductionCell1(in_channels_left=12 * filters,
out_channels_left=4 * filters,
in_channels_right=12 * filters,
out_channels_right=4 * filters)
self.cell_12 = FirstCell(in_channels_left=12 * filters,
out_channels_left=2 * filters,
in_channels_right=16 * filters,
out_channels_right=4 * filters)
self.cell_13 = NormalCell(in_channels_left=16 * filters,
out_channels_left=4 * filters,
in_channels_right=24 * filters,
out_channels_right=4 * filters)
self.cell_14 = NormalCell(in_channels_left=24 * filters,
out_channels_left=4 * filters,
in_channels_right=24 * filters,
out_channels_right=4 * filters)
self.cell_15 = NormalCell(in_channels_left=24 * filters,
out_channels_left=4 * filters,
in_channels_right=24 * filters,
out_channels_right=4 * filters)
self.cell_16 = NormalCell(in_channels_left=24 * filters,
out_channels_left=4 * filters,
in_channels_right=24 * filters,
out_channels_right=4 * filters)
self.cell_17 = NormalCell(in_channels_left=24 * filters,
out_channels_left=4 * filters,
in_channels_right=24 * filters,
out_channels_right=4 * filters)
self.relu = nn.ReLU()
self.avg_pool = nn.AvgPool2d(11, stride=1, padding=0)
self.dropout = nn.Dropout()
self.last_linear = nn.Linear(24 * filters, self.num_classes)
def features(self, input):
x_conv0 = self.conv0(input)
x_stem_0 = self.cell_stem_0(x_conv0)
x_stem_1 = self.cell_stem_1(x_conv0, x_stem_0)
x_cell_0 = self.cell_0(x_stem_1, x_stem_0)
x_cell_1 = self.cell_1(x_cell_0, x_stem_1)
x_cell_2 = self.cell_2(x_cell_1, x_cell_0)
x_cell_3 = self.cell_3(x_cell_2, x_cell_1)
x_cell_4 = self.cell_4(x_cell_3, x_cell_2)
x_cell_5 = self.cell_5(x_cell_4, x_cell_3)
x_reduction_cell_0 = self.reduction_cell_0(x_cell_5, x_cell_4)
x_cell_6 = self.cell_6(x_reduction_cell_0, x_cell_4)
x_cell_7 = self.cell_7(x_cell_6, x_reduction_cell_0)
x_cell_8 = self.cell_8(x_cell_7, x_cell_6)
x_cell_9 = self.cell_9(x_cell_8, x_cell_7)
x_cell_10 = self.cell_10(x_cell_9, x_cell_8)
x_cell_11 = self.cell_11(x_cell_10, x_cell_9)
x_reduction_cell_1 = self.reduction_cell_1(x_cell_11, x_cell_10)
x_cell_12 = self.cell_12(x_reduction_cell_1, x_cell_10)
x_cell_13 = self.cell_13(x_cell_12, x_reduction_cell_1)
x_cell_14 = self.cell_14(x_cell_13, x_cell_12)
x_cell_15 = self.cell_15(x_cell_14, x_cell_13)
x_cell_16 = self.cell_16(x_cell_15, x_cell_14)
x_cell_17 = self.cell_17(x_cell_16, x_cell_15)
return x_cell_17
def logits(self, features):
x = self.relu(features)
x = self.avg_pool(x)
x = x.view(x.size(0), -1)
x = self.dropout(x)
x = self.last_linear(x)
return x
def forward(self, input):
x = self.features(input)
x = self.logits(x)
return x
def nasnetalarge(num_classes=1000):
model = NASNetALarge(num_classes=num_classes)
return model
import torch.nn as nn
import math
__all__ = [
'preact_resnet18', 'preact_resnet34', 'preact_resnet50',
'preact_resnet101', 'preact_resnet152'
]
def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=1,
bias=False)
class PreactBasicBlock(nn.Module):
expansion = 1
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
preactivate=True):
super(PreactBasicBlock, self).__init__()
self.pre_bn = self.pre_relu = None
if preactivate:
self.pre_bn = nn.BatchNorm2d(inplanes)
self.pre_relu = nn.ReLU(inplace=True)
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.downsample = downsample
self.stride = stride
self.preactivate = preactivate
def forward(self, x):
if self.preactivate:
preact = self.pre_bn(x)
preact = self.pre_relu(preact)
else:
preact = x
out = self.conv1(preact)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
if self.downsample is not None:
residual = self.downsample(preact)
else:
residual = x
out += residual
return out
class PreactBottleneck(nn.Module):
expansion = 4
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
preactivate=True):
super(PreactBottleneck, self).__init__()
self.pre_bn = self.pre_relu = None
if preactivate:
self.pre_bn = nn.BatchNorm2d(inplanes)
self.pre_relu = nn.ReLU(inplace=True)
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes,
planes,
kernel_size=3,
stride=stride,
padding=1,
bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.relu1 = nn.ReLU(inplace=True)
self.relu2 = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
self.preactivate = preactivate
def forward(self, x):
if self.preactivate:
preact = self.pre_bn(x)
preact = self.pre_relu(preact)
else:
preact = x
out = self.conv1(preact)
out = self.bn1(out)
out = self.relu1(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu2(out)
out = self.conv3(out)
if self.downsample is not None:
residual = self.downsample(preact)
else:
residual = x
out += residual
return out
class PreactResNet(nn.Module):
def __init__(self,
block,
layers,
num_classes=1000,
deep_stem=False,
avg_down=False,
bypass_last_bn=False,
bn=None):
super(PreactResNet, self).__init__()
global bypass_bn_weight_list
bypass_bn_weight_list = []
self.inplanes = 64
self.deep_stem = deep_stem
self.avg_down = avg_down
if self.deep_stem:
self.conv1 = nn.Sequential(
nn.Conv2d(3,
32,
kernel_size=3,
stride=2,
padding=1,
bias=False),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.Conv2d(32,
32,
kernel_size=3,
stride=1,
padding=1,
bias=False),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.Conv2d(32,
64,
kernel_size=3,
stride=1,
padding=1,
bias=False),
)
else:
self.conv1 = nn.Conv2d(3,
64,
kernel_size=7,
stride=2,
padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.final_bn = nn.BatchNorm2d(512 * block.expansion)
self.final_relu = nn.ReLU(inplace=True)
self.avgpool = nn.AvgPool2d(7, stride=1)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
n = m.weight.size(1)
m.weight.data.normal_(0, 1.0 / float(n))
m.bias.data.zero_()
if bypass_last_bn:
for param in bypass_bn_weight_list:
param.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1, avg_down=False):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
if self.avg_down:
downsample = nn.Sequential(
nn.AvgPool2d(stride,
stride=stride,
ceil_mode=True,
count_include_pad=False),
nn.Conv2d(self.inplanes,
planes * block.expansion,
kernel_size=1,
stride=1,
bias=False),
# BN(planes * block.expansion),
)
else:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes,
planes * block.expansion,
kernel_size=1,
stride=stride,
bias=False),
# BN(planes * block.expansion),
)
# On the first residual block in the first residual layer we don't pre-activate,
# because we take care of that (+ maxpool) after the initial conv layer
preactivate_first = stride != 1
layers = []
layers.append(
block(self.inplanes, planes, stride, downsample,
preactivate_first))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.final_bn(x)
x = self.final_relu(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def preact_resnet18(**kwargs):
model = PreactResNet(PreactBasicBlock, [2, 2, 2, 2], **kwargs)
return model
def preact_resnet34(**kwargs):
model = PreactResNet(PreactBasicBlock, [3, 4, 6, 3], **kwargs)
return model
def preact_resnet50(**kwargs):
model = PreactResNet(PreactBottleneck, [3, 4, 6, 3], **kwargs)
return model
def preact_resnet101(**kwargs):
model = PreactResNet(PreactBottleneck, [3, 4, 23, 3], **kwargs)
return model
def preact_resnet152(**kwargs):
model = PreactResNet(PreactBottleneck, [3, 8, 36, 3], **kwargs)
return model
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.utils import _pair
__all__ = ['ResNest', 'resnest50', 'resnest101', 'resnest200', 'resnest269']
class SplAtConv2d(nn.Module):
"""Split-Attention Conv2d
"""
def __init__(self,
in_channels,
channels,
kernel_size,
stride=(1, 1),
padding=(0, 0),
dilation=(1, 1),
groups=1,
bias=True,
radix=2,
reduction_factor=4,
rectify=False,
rectify_avg=False,
norm_layer=None,
dropblock_prob=0.0,
**kwargs):
super(SplAtConv2d, self).__init__()
padding = _pair(padding)
self.rectify = rectify and (padding[0] > 0 or padding[1] > 0)
self.rectify_avg = rectify_avg
inter_channels = max(in_channels * radix // reduction_factor, 32)
self.radix = radix
self.cardinality = groups
self.channels = channels
self.dropblock_prob = dropblock_prob
if self.rectify:
from rfconv import RFConv2d
self.conv = RFConv2d(in_channels,
channels * radix,
kernel_size,
stride,
padding,
dilation,
groups=groups * radix,
bias=bias,
average_mode=rectify_avg,
**kwargs)
else:
self.conv = nn.Conv2d(in_channels,
channels * radix,
kernel_size,
stride,
padding,
dilation,
groups=groups * radix,
bias=bias,
**kwargs)
self.use_bn = norm_layer is not None
if self.use_bn:
self.bn0 = norm_layer(channels * radix)
self.relu = nn.ReLU(inplace=True)
self.fc1 = nn.Conv2d(channels,
inter_channels,
1,
groups=self.cardinality)
if self.use_bn:
self.bn1 = norm_layer(inter_channels)
self.fc2 = nn.Conv2d(inter_channels,
channels * radix,
1,
groups=self.cardinality)
if dropblock_prob > 0.0:
self.dropblock = DropBlock2D(dropblock_prob, 3)
self.rsoftmax = rSoftMax(radix, groups)
def forward(self, x):
x = self.conv(x)
if self.use_bn:
x = self.bn0(x)
if self.dropblock_prob > 0.0:
x = self.dropblock(x)
x = self.relu(x)
batch, rchannel = x.shape[:2]
if self.radix > 1:
if torch.__version__ < '1.5':
splited = torch.split(x, int(rchannel // self.radix), dim=1)
else:
splited = torch.split(x, rchannel // self.radix, dim=1)
gap = sum(splited)
else:
gap = x
gap = F.adaptive_avg_pool2d(gap, 1)
gap = self.fc1(gap)
if self.use_bn:
gap = self.bn1(gap)
gap = self.relu(gap)
atten = self.fc2(gap)
atten = self.rsoftmax(atten).view(batch, -1, 1, 1)
if self.radix > 1:
if torch.__version__ < '1.5':
attens = torch.split(atten, int(rchannel // self.radix), dim=1)
else:
attens = torch.split(atten, rchannel // self.radix, dim=1)
out = sum([att * split for (att, split) in zip(attens, splited)])
else:
out = atten * x
return out.contiguous()
class rSoftMax(nn.Module):
def __init__(self, radix, cardinality):
super().__init__()
self.radix = radix
self.cardinality = cardinality
def forward(self, x):
batch = x.size(0)
if self.radix > 1:
x = x.view(batch, self.cardinality, self.radix, -1).transpose(1, 2)
x = F.softmax(x, dim=1)
x = x.reshape(batch, -1)
else:
x = torch.sigmoid(x)
return x
class DropBlock2D(object):
def __init__(self, *args, **kwargs):
raise NotImplementedError
class GlobalAvgPool2d(nn.Module):
def __init__(self):
"""Global average pooling over the input's spatial dimensions"""
super(GlobalAvgPool2d, self).__init__()
def forward(self, inputs):
return nn.functional.adaptive_avg_pool2d(inputs,
1).view(inputs.size(0), -1)
class Bottleneck(nn.Module):
"""ResNet Bottleneck
"""
# pylint: disable=unused-argument
expansion = 4
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
radix=1,
cardinality=1,
bottleneck_width=64,
avd=False,
avd_first=False,
dilation=1,
is_first=False,
rectified_conv=False,
rectify_avg=False,
norm_layer=None,
dropblock_prob=0.0,
last_gamma=False):
super(Bottleneck, self).__init__()
group_width = int(planes * (bottleneck_width / 64.)) * cardinality
self.conv1 = nn.Conv2d(inplanes,
group_width,
kernel_size=1,
bias=False)
self.bn1 = norm_layer(group_width)
self.dropblock_prob = dropblock_prob
self.radix = radix
self.avd = avd and (stride > 1 or is_first)
self.avd_first = avd_first
if self.avd:
self.avd_layer = nn.AvgPool2d(3, stride, padding=1)
stride = 1
if dropblock_prob > 0.0:
self.dropblock1 = DropBlock2D(dropblock_prob, 3)
if radix == 1:
self.dropblock2 = DropBlock2D(dropblock_prob, 3)
self.dropblock3 = DropBlock2D(dropblock_prob, 3)
if radix >= 1:
self.conv2 = SplAtConv2d(group_width,
group_width,
kernel_size=3,
stride=stride,
padding=dilation,
dilation=dilation,
groups=cardinality,
bias=False,
radix=radix,
rectify=rectified_conv,
rectify_avg=rectify_avg,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob)
elif rectified_conv:
from rfconv import RFConv2d
self.conv2 = RFConv2d(group_width,
group_width,
kernel_size=3,
stride=stride,
padding=dilation,
dilation=dilation,
groups=cardinality,
bias=False,
average_mode=rectify_avg)
self.bn2 = norm_layer(group_width)
else:
self.conv2 = nn.Conv2d(group_width,
group_width,
kernel_size=3,
stride=stride,
padding=dilation,
dilation=dilation,
groups=cardinality,
bias=False)
self.bn2 = norm_layer(group_width)
self.conv3 = nn.Conv2d(group_width,
planes * 4,
kernel_size=1,
bias=False)
self.bn3 = norm_layer(planes * 4)
if last_gamma:
from torch.nn.init import zeros_
zeros_(self.bn3.weight)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.dilation = dilation
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
if self.dropblock_prob > 0.0:
out = self.dropblock1(out)
out = self.relu(out)
if self.avd and self.avd_first:
out = self.avd_layer(out)
out = self.conv2(out)
if self.radix == 0:
out = self.bn2(out)
if self.dropblock_prob > 0.0:
out = self.dropblock2(out)
out = self.relu(out)
if self.avd and not self.avd_first:
out = self.avd_layer(out)
out = self.conv3(out)
out = self.bn3(out)
if self.dropblock_prob > 0.0:
out = self.dropblock3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNest(nn.Module):
def __init__(self,
block,
layers,
radix=1,
groups=1,
bottleneck_width=64,
num_classes=1000,
dilated=False,
dilation=1,
deep_stem=False,
stem_width=64,
avg_down=False,
rectified_conv=False,
rectify_avg=False,
avd=False,
avd_first=False,
final_drop=0.0,
dropblock_prob=0,
last_gamma=False,
norm_layer=nn.BatchNorm2d):
self.cardinality = groups
self.bottleneck_width = bottleneck_width
# ResNet-D params
self.inplanes = stem_width * 2 if deep_stem else 64
self.avg_down = avg_down
self.last_gamma = last_gamma
# ResNeSt params
self.radix = radix
self.avd = avd
self.avd_first = avd_first
super(ResNest, self).__init__()
self.rectified_conv = rectified_conv
self.rectify_avg = rectify_avg
if rectified_conv:
from rfconv import RFConv2d
conv_layer = RFConv2d
else:
conv_layer = nn.Conv2d
conv_kwargs = {'average_mode': rectify_avg} if rectified_conv else {}
if deep_stem:
self.conv1 = nn.Sequential(
conv_layer(3,
stem_width,
kernel_size=3,
stride=2,
padding=1,
bias=False,
**conv_kwargs),
norm_layer(stem_width),
nn.ReLU(inplace=True),
conv_layer(stem_width,
stem_width,
kernel_size=3,
stride=1,
padding=1,
bias=False,
**conv_kwargs),
norm_layer(stem_width),
nn.ReLU(inplace=True),
conv_layer(stem_width,
stem_width * 2,
kernel_size=3,
stride=1,
padding=1,
bias=False,
**conv_kwargs),
)
else:
self.conv1 = conv_layer(3,
64,
kernel_size=7,
stride=2,
padding=3,
bias=False,
**conv_kwargs)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block,
64,
layers[0],
norm_layer=norm_layer,
is_first=False)
self.layer2 = self._make_layer(block,
128,
layers[1],
stride=2,
norm_layer=norm_layer)
if dilated or dilation == 4:
self.layer3 = self._make_layer(block,
256,
layers[2],
stride=1,
dilation=2,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob)
self.layer4 = self._make_layer(block,
512,
layers[3],
stride=1,
dilation=4,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob)
elif dilation == 2:
self.layer3 = self._make_layer(block,
256,
layers[2],
stride=2,
dilation=1,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob)
self.layer4 = self._make_layer(block,
512,
layers[3],
stride=1,
dilation=2,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob)
else:
self.layer3 = self._make_layer(block,
256,
layers[2],
stride=2,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob)
self.layer4 = self._make_layer(block,
512,
layers[3],
stride=2,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob)
self.avgpool = GlobalAvgPool2d()
self.drop = nn.Dropout(final_drop) if final_drop > 0.0 else None
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, norm_layer):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self,
block,
planes,
blocks,
stride=1,
dilation=1,
norm_layer=None,
dropblock_prob=0.0,
is_first=True):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
down_layers = []
if self.avg_down:
if dilation == 1:
down_layers.append(
nn.AvgPool2d(kernel_size=stride,
stride=stride,
ceil_mode=True,
count_include_pad=False))
else:
down_layers.append(
nn.AvgPool2d(kernel_size=1,
stride=1,
ceil_mode=True,
count_include_pad=False))
down_layers.append(
nn.Conv2d(self.inplanes,
planes * block.expansion,
kernel_size=1,
stride=1,
bias=False))
else:
down_layers.append(
nn.Conv2d(self.inplanes,
planes * block.expansion,
kernel_size=1,
stride=stride,
bias=False))
down_layers.append(norm_layer(planes * block.expansion))
downsample = nn.Sequential(*down_layers)
layers = []
if dilation == 1 or dilation == 2:
layers.append(
block(self.inplanes,
planes,
stride,
downsample=downsample,
radix=self.radix,
cardinality=self.cardinality,
bottleneck_width=self.bottleneck_width,
avd=self.avd,
avd_first=self.avd_first,
dilation=1,
is_first=is_first,
rectified_conv=self.rectified_conv,
rectify_avg=self.rectify_avg,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob,
last_gamma=self.last_gamma))
elif dilation == 4:
layers.append(
block(self.inplanes,
planes,
stride,
downsample=downsample,
radix=self.radix,
cardinality=self.cardinality,
bottleneck_width=self.bottleneck_width,
avd=self.avd,
avd_first=self.avd_first,
dilation=2,
is_first=is_first,
rectified_conv=self.rectified_conv,
rectify_avg=self.rectify_avg,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob,
last_gamma=self.last_gamma))
else:
raise RuntimeError("=> unknown dilation size: {}".format(dilation))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(
block(self.inplanes,
planes,
radix=self.radix,
cardinality=self.cardinality,
bottleneck_width=self.bottleneck_width,
avd=self.avd,
avd_first=self.avd_first,
dilation=dilation,
rectified_conv=self.rectified_conv,
rectify_avg=self.rectify_avg,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob,
last_gamma=self.last_gamma))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
# x = x.view(x.size(0), -1)
x = torch.flatten(x, 1)
if self.drop:
x = self.drop(x)
x = self.fc(x)
return x
def resnest50(pretrained=False, **kwargs):
model = ResNest(Bottleneck, [3, 4, 6, 3],
radix=2,
groups=1,
bottleneck_width=64,
deep_stem=True,
stem_width=32,
avg_down=True,
avd=True,
avd_first=False,
**kwargs)
return model
def resnest101(pretrained=False, **kwargs):
model = ResNest(Bottleneck, [3, 4, 23, 3],
radix=2,
groups=1,
bottleneck_width=64,
deep_stem=True,
stem_width=64,
avg_down=True,
avd=True,
avd_first=False,
**kwargs)
return model
def resnest200(pretrained=False, **kwargs):
model = ResNest(Bottleneck, [3, 24, 36, 3],
radix=2,
groups=1,
bottleneck_width=64,
deep_stem=True,
stem_width=64,
avg_down=True,
avd=True,
avd_first=False,
**kwargs)
return model
def resnest269(pretrained=False, **kwargs):
model = ResNest(Bottleneck, [3, 30, 48, 8],
radix=2,
groups=1,
bottleneck_width=64,
deep_stem=True,
stem_width=64,
avg_down=True,
avd=True,
avd_first=False,
**kwargs)
return model
import torch.nn as nn
import math
__all__ = [
'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet50c', 'resnet50d',
'resnet101', 'resnet101d', 'resnet152', 'resnet152d'
]
def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=1,
bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes,
planes,
kernel_size=3,
stride=stride,
padding=1,
bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
bypass_bn_weight_list.append(self.bn3.weight)
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self,
block,
layers,
num_classes=1000,
deep_stem=False,
avg_down=False,
bypass_last_bn=False):
global bypass_bn_weight_list
bypass_bn_weight_list = []
self.inplanes = 64
super(ResNet, self).__init__()
self.deep_stem = deep_stem
self.avg_down = avg_down
if self.deep_stem:
self.conv1 = nn.Sequential(
nn.Conv2d(3,
32,
kernel_size=3,
stride=2,
padding=1,
bias=False),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.Conv2d(32,
32,
kernel_size=3,
stride=1,
padding=1,
bias=False),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.Conv2d(32,
64,
kernel_size=3,
stride=1,
padding=1,
bias=False),
)
else:
self.conv1 = nn.Conv2d(3,
64,
kernel_size=7,
stride=2,
padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.AvgPool2d(7, stride=1)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
if bypass_last_bn:
for param in bypass_bn_weight_list:
param.data.zero_()
print('bypass {} bn.weight in BottleneckBlocks'.format(
len(bypass_bn_weight_list)))
def _make_layer(self, block, planes, blocks, stride=1, avg_down=False):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
if self.avg_down:
downsample = nn.Sequential(
nn.AvgPool2d(stride, stride=stride, ceil_mode=True),
nn.Conv2d(self.inplanes,
planes * block.expansion,
kernel_size=1,
stride=1,
bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
else:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes,
planes * block.expansion,
kernel_size=1,
stride=stride,
bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def resnet18(pretrained=False, **kwargs):
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
return model
def resnet34(pretrained=False, **kwargs):
model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
return model
def resnet50(pretrained=False, **kwargs):
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
return model
def resnet50c(pretrained=False, **kwargs):
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs, deep_stem=True)
return model
def resnet50d(pretrained=False, **kwargs):
model = ResNet(Bottleneck, [3, 4, 6, 3],
**kwargs,
deep_stem=True,
avg_down=True)
return model
def resnet101(pretrained=False, **kwargs):
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
return model
def resnet101d(pretrained=False, **kwargs):
model = ResNet(Bottleneck, [3, 4, 23, 3],
**kwargs,
deep_stem=True,
avg_down=True)
return model
def resnet152(pretrained=False, **kwargs):
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
return model
def resnet152d(pretrained=False, **kwargs):
model = ResNet(Bottleneck, [3, 8, 36, 3],
**kwargs,
deep_stem=True,
avg_down=True)
return model
import torch
import torch.nn as nn
__all__ = [
'resnext50_32x4d', 'resnext101_32x8d', 'resnext50d_32x4d',
'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2',
'wide_resnet50d_2', 'wide_resnet101d_2'
]
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=dilation,
groups=groups,
bias=False,
dilation=dilation)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(in_planes,
out_planes,
kernel_size=1,
stride=stride,
bias=False)
class BasicBlock(nn.Module):
expansion = 1
__constants__ = ['downsample']
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
groups=1,
base_width=64,
dilation=1,
norm_layer=None):
super(BasicBlock, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
if groups != 1 or base_width != 64:
raise ValueError(
'BasicBlock only supports groups=1 and base_width=64')
if dilation > 1:
raise NotImplementedError(
"Dilation > 1 not supported in BasicBlock")
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = norm_layer(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = norm_layer(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
__constants__ = ['downsample']
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
groups=1,
base_width=64,
dilation=1,
norm_layer=None):
super(Bottleneck, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width / 64.)) * groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self,
block,
layers,
num_classes=1000,
zero_init_residual=False,
groups=1,
width_per_group=64,
replace_stride_with_dilation=None,
norm_layer=None,
deep_stem=False,
avg_down=False,
bn=None):
super(ResNet, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.inplanes = 64
self.dilation = 1
self.deep_stem = deep_stem
self.avg_down = avg_down
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(
replace_stride_with_dilation))
self.groups = groups
self.base_width = width_per_group
if self.deep_stem:
self.conv1 = nn.Sequential(
nn.Conv2d(3,
32,
kernel_size=3,
stride=2,
padding=1,
bias=False),
norm_layer(32),
nn.ReLU(inplace=True),
nn.Conv2d(32,
32,
kernel_size=3,
stride=1,
padding=1,
bias=False),
norm_layer(32),
nn.ReLU(inplace=True),
nn.Conv2d(32,
64,
kernel_size=3,
stride=1,
padding=1,
bias=False),
)
else:
self.conv1 = nn.Conv2d(3,
64,
kernel_size=7,
stride=2,
padding=3,
bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block,
128,
layers[1],
stride=2,
dilate=replace_stride_with_dilation[0])
self.layer3 = self._make_layer(block,
256,
layers[2],
stride=2,
dilate=replace_stride_with_dilation[1])
self.layer4 = self._make_layer(block,
512,
layers[3],
stride=2,
dilate=replace_stride_with_dilation[2])
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight,
mode='fan_out',
nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
if self.avg_down:
downsample = nn.Sequential(
nn.AvgPool2d(stride,
stride=stride,
ceil_mode=True,
count_include_pad=False),
conv1x1(self.inplanes, planes * block.expansion),
norm_layer(planes * block.expansion),
)
else:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
layers.append(
block(self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation, norm_layer))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(
block(self.inplanes,
planes,
groups=self.groups,
base_width=self.base_width,
dilation=self.dilation,
norm_layer=norm_layer))
return nn.Sequential(*layers)
def _forward_impl(self, x):
# See note [TorchScript super()]
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
def forward(self, x):
return self._forward_impl(x)
def resnext50_32x4d(**kwargs):
kwargs['groups'] = 32
kwargs['width_per_group'] = 4
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
return model
def resnext101_32x8d(**kwargs):
kwargs['groups'] = 32
kwargs['width_per_group'] = 8
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
return model
def resnext50d_32x4d(**kwargs):
kwargs['groups'] = 32
kwargs['width_per_group'] = 4
model = ResNet(Bottleneck, [3, 4, 6, 3],
**kwargs,
deep_stem=True,
avg_down=True)
return model
def resnext101d_32x8d(**kwargs):
kwargs['groups'] = 32
kwargs['width_per_group'] = 8
model = ResNet(Bottleneck, [3, 4, 23, 3],
**kwargs,
deep_stem=True,
avg_down=True)
return model
def wide_resnet50_2(**kwargs):
kwargs['width_per_group'] = 64 * 2
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
return model
def wide_resnet101_2(**kwargs):
kwargs['width_per_group'] = 64 * 2
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
return model
def wide_resnet50d_2(**kwargs):
kwargs['width_per_group'] = 64 * 2
model = ResNet(Bottleneck, [3, 4, 6, 3],
**kwargs,
deep_stem=True,
avg_down=True)
return model
def wide_resnet101d_2(**kwargs):
kwargs['width_per_group'] = 64 * 2
model = ResNet(Bottleneck, [3, 4, 23, 3],
**kwargs,
deep_stem=True,
avg_down=True)
return model
import torch.nn as nn
import math
__all__ = [
'ResNetV2', 'resnet18_v2', 'resnet34_v2', 'resnet50_v2', 'resnet50c_v2',
'resnet50d_v2', 'resnet101_v2', 'resnet152_v2', 'resnet200_v2'
]
def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=1,
bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.bn1 = nn.BatchNorm2d(inplanes)
self.relu = nn.ReLU(inplace=True)
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn2 = nn.BatchNorm2d(planes)
self.conv2 = conv3x3(planes, planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.bn1(x)
out = self.relu(out)
out = self.conv1(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.bn1 = nn.BatchNorm2d(inplanes)
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes,
planes,
kernel_size=3,
stride=stride,
padding=1,
bias=False)
self.conv3 = nn.Conv2d(planes,
planes * self.expansion,
kernel_size=1,
bias=False)
self.bn3 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
bypass_bn_weight_list.append(self.bn3.weight)
def forward(self, x):
residual = x
out = self.bn1(x)
out = self.relu(out)
out = self.conv1(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn3(out)
out = self.relu(out)
out = self.conv3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
return out
class ResNetV2(nn.Module):
def __init__(self,
block,
layers,
num_classes=1000,
deep_stem=False,
avg_down=False,
bypass_last_bn=False):
global bypass_bn_weight_list
bypass_bn_weight_list = []
self.inplanes = 64
super(ResNetV2, self).__init__()
self.deep_stem = deep_stem
self.avg_down = avg_down
if self.deep_stem:
self.conv1 = nn.Sequential(
nn.Conv2d(3,
32,
kernel_size=3,
stride=2,
padding=1,
bias=False),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.Conv2d(32,
32,
kernel_size=3,
stride=1,
padding=1,
bias=False),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.Conv2d(32,
64,
kernel_size=3,
stride=1,
padding=1,
bias=False),
)
else:
self.conv1 = nn.Conv2d(3,
64,
kernel_size=7,
stride=2,
padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.bn2 = nn.BatchNorm2d(512 * block.expansion)
self.avgpool = nn.AvgPool2d(7, stride=1)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
if bypass_last_bn:
for param in bypass_bn_weight_list:
param.data.zero_()
print('bypass {} bn.weight in BottleneckBlocks'.format(
len(bypass_bn_weight_list)))
def _make_layer(self, block, planes, blocks, stride=1, avg_down=False):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
if self.avg_down:
downsample = nn.Sequential(
nn.AvgPool2d(stride, stride=stride, ceil_mode=True),
nn.Conv2d(self.inplanes,
planes * block.expansion,
kernel_size=1,
stride=1,
bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
else:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes,
planes * block.expansion,
kernel_size=1,
stride=stride,
bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.bn2(x)
x = self.relu(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def resnet18_v2(pretrained=False, **kwargs):
model = ResNetV2(BasicBlock, [2, 2, 2, 2], **kwargs)
return model
def resnet34_v2(pretrained=False, **kwargs):
model = ResNetV2(BasicBlock, [3, 4, 6, 3], **kwargs)
return model
def resnet50_v2(pretrained=False, **kwargs):
model = ResNetV2(Bottleneck, [3, 4, 6, 3], **kwargs)
return model
def resnet50c_v2(pretrained=False, **kwargs):
model = ResNetV2(Bottleneck, [3, 4, 6, 3], **kwargs, deep_stem=True)
return model
def resnet50d_v2(pretrained=False, **kwargs):
model = ResNetV2(Bottleneck, [3, 4, 6, 3],
**kwargs,
deep_stem=True,
avg_down=True)
return model
def resnet101_v2(pretrained=False, **kwargs):
model = ResNetV2(Bottleneck, [3, 4, 23, 3], **kwargs)
return model
def resnet152_v2(pretrained=False, **kwargs):
model = ResNetV2(Bottleneck, [3, 8, 36, 3], **kwargs)
return model
def resnet200_v2(pretrained=False, **kwargs):
model = ResNetV2(Bottleneck, [3, 24, 36, 3], **kwargs)
return model
from collections import OrderedDict
import math
import torch.nn as nn
__all__ = [
'SENet', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152',
'se_resnext50_32x4d', 'se_resnext101_32x4d', 'se_resnext101_64x4d'
]
class SEModule(nn.Module):
def __init__(self, channels, reduction):
super(SEModule, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc1 = nn.Conv2d(channels,
channels // reduction,
kernel_size=1,
padding=0)
self.relu = nn.ReLU(inplace=True)
self.fc2 = nn.Conv2d(channels // reduction,
channels,
kernel_size=1,
padding=0)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
module_input = x
x = self.avg_pool(x)
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.sigmoid(x)
return module_input * x
class Bottleneck(nn.Module):
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out = self.se_module(out) + residual
out = self.relu(out)
return out
class SEBottleneck(Bottleneck):
expansion = 4
def __init__(self,
inplanes,
planes,
groups,
reduction,
stride=1,
downsample=None):
super(SEBottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes * 2)
self.conv2 = nn.Conv2d(planes * 2,
planes * 4,
kernel_size=3,
stride=stride,
padding=1,
groups=groups,
bias=False)
self.bn2 = nn.BatchNorm2d(planes * 4)
self.conv3 = nn.Conv2d(planes * 4,
planes * 4,
kernel_size=1,
bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.se_module = SEModule(planes * 4, reduction=reduction)
self.downsample = downsample
self.stride = stride
class SEResNetBottleneck(Bottleneck):
expansion = 4
def __init__(self,
inplanes,
planes,
groups,
reduction,
stride=1,
downsample=None):
super(SEResNetBottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes,
planes,
kernel_size=1,
bias=False,
stride=stride)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes,
planes,
kernel_size=3,
padding=1,
groups=groups,
bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.se_module = SEModule(planes * 4, reduction=reduction)
self.downsample = downsample
self.stride = stride
class SEResNeXtBottleneck(Bottleneck):
expansion = 4
def __init__(self,
inplanes,
planes,
groups,
reduction,
stride=1,
downsample=None,
base_width=4):
super(SEResNeXtBottleneck, self).__init__()
width = math.floor(planes * (base_width / 64)) * groups
self.conv1 = nn.Conv2d(inplanes,
width,
kernel_size=1,
bias=False,
stride=1)
self.bn1 = nn.BatchNorm2d(width)
self.conv2 = nn.Conv2d(width,
width,
kernel_size=3,
stride=stride,
padding=1,
groups=groups,
bias=False)
self.bn2 = nn.BatchNorm2d(width)
self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.se_module = SEModule(planes * 4, reduction=reduction)
self.downsample = downsample
self.stride = stride
class SENet(nn.Module):
def __init__(self,
block,
layers,
groups,
reduction,
dropout_p=0.2,
inplanes=128,
input_3x3=True,
downsample_kernel_size=3,
downsample_padding=1,
num_classes=1000):
super(SENet, self).__init__()
self.inplanes = inplanes
if input_3x3:
layer0_modules = [
('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1,
bias=False)),
('bn1', nn.BatchNorm2d(64)),
('relu1', nn.ReLU(inplace=True)),
('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1,
bias=False)),
('bn2', nn.BatchNorm2d(64)),
('relu2', nn.ReLU(inplace=True)),
('conv3',
nn.Conv2d(64, inplanes, 3, stride=1, padding=1, bias=False)),
('bn3', nn.BatchNorm2d(inplanes)),
('relu3', nn.ReLU(inplace=True)),
]
else:
layer0_modules = [
('conv1',
nn.Conv2d(3,
inplanes,
kernel_size=7,
stride=2,
padding=3,
bias=False)),
('bn1', nn.BatchNorm2d(inplanes)),
('relu1', nn.ReLU(inplace=True)),
]
layer0_modules.append(('pool', nn.MaxPool2d(3,
stride=2,
ceil_mode=True)))
self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
self.layer1 = self._make_layer(block,
planes=64,
blocks=layers[0],
groups=groups,
reduction=reduction,
downsample_kernel_size=1,
downsample_padding=0)
self.layer2 = self._make_layer(
block,
planes=128,
blocks=layers[1],
stride=2,
groups=groups,
reduction=reduction,
downsample_kernel_size=downsample_kernel_size,
downsample_padding=downsample_padding)
self.layer3 = self._make_layer(
block,
planes=256,
blocks=layers[2],
stride=2,
groups=groups,
reduction=reduction,
downsample_kernel_size=downsample_kernel_size,
downsample_padding=downsample_padding)
self.layer4 = self._make_layer(
block,
planes=512,
blocks=layers[3],
stride=2,
groups=groups,
reduction=reduction,
downsample_kernel_size=downsample_kernel_size,
downsample_padding=downsample_padding)
self.avg_pool = nn.AvgPool2d(7, stride=1)
self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None
self.last_linear = nn.Linear(512 * block.expansion, num_classes)
def _make_layer(self,
block,
planes,
blocks,
groups,
reduction,
stride=1,
downsample_kernel_size=1,
downsample_padding=0):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes,
planes * block.expansion,
kernel_size=downsample_kernel_size,
stride=stride,
padding=downsample_padding,
bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(
block(self.inplanes, planes, groups, reduction, stride,
downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes, groups, reduction))
return nn.Sequential(*layers)
def features(self, x):
x = self.layer0(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
return x
def logits(self, x):
x = self.avg_pool(x)
if self.dropout is not None:
x = self.dropout(x)
x = x.view(x.size(0), -1)
x = self.last_linear(x)
return x
def forward(self, x):
x = self.features(x)
x = self.logits(x)
return x
def senet154(**kwargs):
model = SENet(SEBottleneck, [3, 8, 36, 3],
groups=64,
reduction=16,
dropout_p=0.2,
**kwargs)
return model
def se_resnet50(**kwargs):
model = SENet(SEResNetBottleneck, [3, 4, 6, 3],
groups=1,
reduction=16,
dropout_p=None,
inplanes=64,
input_3x3=False,
downsample_kernel_size=1,
downsample_padding=0,
**kwargs)
return model
def se_resnet101(**kwargs):
model = SENet(SEResNetBottleneck, [3, 4, 23, 3],
groups=1,
reduction=16,
dropout_p=None,
inplanes=64,
input_3x3=False,
downsample_kernel_size=1,
downsample_padding=0,
**kwargs)
return model
def se_resnet152(**kwargs):
model = SENet(SEResNetBottleneck, [3, 8, 36, 3],
groups=1,
reduction=16,
dropout_p=None,
inplanes=64,
input_3x3=False,
downsample_kernel_size=1,
downsample_padding=0,
**kwargs)
return model
def se_resnext50_32x4d(**kwargs):
model = SENet(SEResNeXtBottleneck, [3, 4, 6, 3],
groups=32,
reduction=16,
dropout_p=None,
inplanes=64,
input_3x3=False,
downsample_kernel_size=1,
downsample_padding=0,
**kwargs)
return model
def se_resnext101_32x4d(**kwargs):
model = SENet(SEResNeXtBottleneck, [3, 4, 23, 3],
groups=32,
reduction=16,
dropout_p=None,
inplanes=64,
input_3x3=False,
downsample_kernel_size=1,
downsample_padding=0,
**kwargs)
return model
def se_resnext101_64x4d(**kwargs):
model = SENet(SEResNeXtBottleneck, [3, 4, 23, 3],
groups=64,
reduction=16,
dropout_p=None,
inplanes=64,
input_3x3=False,
downsample_kernel_size=1,
downsample_padding=0,
**kwargs)
return model
import torch
import torch.nn as nn
import torch.nn.functional as F
from collections import OrderedDict
from torch.nn import init
__all__ = ["shuffle_v1"]
def conv3x3(in_channels,
out_channels,
stride=1,
padding=1,
bias=True,
groups=1):
"""3x3 convolution with padding
"""
return nn.Conv2d(in_channels,
out_channels,
kernel_size=3,
stride=stride,
padding=padding,
bias=bias,
groups=groups)
def conv1x1(in_channels, out_channels, groups=1):
"""1x1 convolution with padding
- Normal pointwise convolution When groups == 1
- Grouped pointwise convolution when groups > 1
"""
return nn.Conv2d(in_channels,
out_channels,
kernel_size=1,
groups=groups,
stride=1)
def channel_shuffle(x, groups):
batchsize, num_channels, height, width = x.data.size()
channels_per_group = num_channels // groups
# reshape
x = x.view(batchsize, groups, channels_per_group, height, width)
# transpose
# - contiguous() required if transpose() is used before view().
# See https://github.com/pytorch/pytorch/issues/764
x = torch.transpose(x, 1, 2).contiguous()
# flatten
x = x.view(batchsize, -1, height, width)
return x
class ShuffleUnit(nn.Module):
def __init__(self,
in_channels,
out_channels,
groups=3,
grouped_conv=True,
combine='add'):
super(ShuffleUnit, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.grouped_conv = grouped_conv
self.combine = combine
self.groups = groups
self.bottleneck_channels = self.out_channels // 4
# define the type of ShuffleUnit
if self.combine == 'add':
# ShuffleUnit Figure 2b
self.depthwise_stride = 1
self._combine_func = self._add
elif self.combine == 'concat':
# ShuffleUnit Figure 2c
self.depthwise_stride = 2
self._combine_func = self._concat
# ensure output of concat has the same channels as
# original output channels.
self.out_channels -= self.in_channels
else:
raise ValueError("Cannot combine tensors with \"{}\""
"Only \"add\" and \"concat\" are"
"supported".format(self.combine))
# Use a 1x1 grouped or non-grouped convolution to reduce input channels
# to bottleneck channels, as in a ResNet bottleneck module.
# NOTE: Do not use group convolution for the first conv1x1 in Stage 2.
self.first_1x1_groups = self.groups if grouped_conv else 1
self.g_conv_1x1_compress = self._make_grouped_conv1x1(
self.in_channels,
self.bottleneck_channels,
self.first_1x1_groups,
batch_norm=True,
relu=True)
# 3x3 depthwise convolution followed by batch normalization
self.depthwise_conv3x3 = conv3x3(self.bottleneck_channels,
self.bottleneck_channels,
stride=self.depthwise_stride,
groups=self.bottleneck_channels)
self.bn_after_depthwise = BN(self.bottleneck_channels)
# Use 1x1 grouped convolution to expand from
# bottleneck_channels to out_channels
self.g_conv_1x1_expand = self._make_grouped_conv1x1(
self.bottleneck_channels,
self.out_channels,
self.groups,
batch_norm=True,
relu=False)
@staticmethod
def _add(x, out):
# residual connection
return x + out
@staticmethod
def _concat(x, out):
# concatenate along channel axis
return torch.cat((x, out), 1)
def _make_grouped_conv1x1(self,
in_channels,
out_channels,
groups,
batch_norm=True,
relu=False):
modules = OrderedDict()
conv = conv1x1(in_channels, out_channels, groups=groups)
modules['conv1x1'] = conv
if batch_norm:
modules['batch_norm'] = BN(out_channels)
if relu:
modules['relu'] = nn.ReLU()
if len(modules) > 1:
return nn.Sequential(modules)
else:
return conv
def forward(self, x):
# save for combining later with output
residual = x
if self.combine == 'concat':
residual = F.avg_pool2d(residual,
kernel_size=3,
stride=2,
padding=1)
out = self.g_conv_1x1_compress(x)
out = channel_shuffle(out, self.groups)
out = self.depthwise_conv3x3(out)
out = self.bn_after_depthwise(out)
out = self.g_conv_1x1_expand(out)
out = self._combine_func(residual, out)
return F.relu(out)
class ShuffleNetV1(nn.Module):
"""ShuffleNet implementation.
"""
def __init__(self,
groups=3,
in_channels=3,
num_classes=1000,
width_mult=1):
"""ShuffleNet constructor.
Arguments:
groups (int, optional): number of groups to be used in grouped
1x1 convolutions in each ShuffleUnit. Default is 3 for best
performance according to original paper.
in_channels (int, optional): number of channels in the input tensor.
Default is 3 for RGB image inputs.
num_classes (int, optional): number of classes to predict. Default
is 1000 for ImageNet.
"""
super(ShuffleNetV1, self).__init__()
self.groups = groups
self.stage_repeats = [3, 7, 3]
self.in_channels = in_channels
self.num_classes = num_classes
global BN
BN = nn.BatchNorm2d
# index 0 is invalid and should never be called.
# only used for indexing convenience.
if groups == 1:
self.stage_out_channels = list(
map(lambda a: a * width_mult
if a != -1 else a, [-1, 24, 144, 288, 576]))
elif groups == 2:
self.stage_out_channels = list(
map(lambda a: a * width_mult
if a != -1 else a, [-1, 24, 200, 400, 800]))
elif groups == 3:
self.stage_out_channels = list(
map(lambda a: a * width_mult
if a != -1 else a, [-1, 24, 240, 480, 960]))
elif groups == 4:
self.stage_out_channels = list(
map(lambda a: a * width_mult
if a != -1 else a, [-1, 24, 272, 544, 1088]))
elif groups == 8:
self.stage_out_channels = list(
map(lambda a: a * width_mult
if a != -1 else a, [-1, 24, 384, 768, 1536]))
else:
raise ValueError("""{} groups is not supported for
1x1 Grouped Convolutions""".format(groups))
# Stage 1 always has 24 output channels
self.conv1 = conv3x3(
self.in_channels,
self.stage_out_channels[1], # stage 1
stride=2)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
# Stage 2
self.stage2 = self._make_stage(2)
# Stage 3
self.stage3 = self._make_stage(3)
# Stage 4
self.stage4 = self._make_stage(4)
# Global pooling:
# Undefined as PyTorch's functional API can be used for on-the-fly
# shape inference if input size is not ImageNet's 224x224
# Fully-connected classification layer
num_inputs = self.stage_out_channels[-1]
self.fc = nn.Linear(num_inputs, self.num_classes)
self.init_params()
def init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.01)
if m.bias is not None:
init.constant_(m.bias, 0)
def _make_stage(self, stage):
modules = OrderedDict()
stage_name = "ShuffleUnit_Stage{}".format(stage)
# First ShuffleUnit in the stage
# 1. non-grouped 1x1 convolution (i.e. pointwise convolution)
# is used in Stage 2. Group convolutions used everywhere else.
grouped_conv = stage > 2
# 2. concatenation unit is always used.
first_module = ShuffleUnit(self.stage_out_channels[stage - 1],
self.stage_out_channels[stage],
groups=self.groups,
grouped_conv=grouped_conv,
combine='concat')
modules[stage_name + "_0"] = first_module
# add more ShuffleUnits depending on pre-defined number of repeats
for i in range(self.stage_repeats[stage - 2]):
name = stage_name + "_{}".format(i + 1)
module = ShuffleUnit(self.stage_out_channels[stage],
self.stage_out_channels[stage],
groups=self.groups,
grouped_conv=True,
combine='add')
modules[name] = module
return nn.Sequential(modules)
def forward(self, x):
x = self.conv1(x)
x = self.maxpool(x)
x = self.stage2(x)
x = self.stage3(x)
x = self.stage4(x)
# global average pooling layer
x = F.avg_pool2d(x, x.data.size()[-2:])
# flatten for input to fully-connected layer
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def shuffle_v1(**kwargs):
model = ShuffleNetV1(**kwargs)
return model
import torch as torch
import torch.nn as nn
from torch.nn import init
__all__ = ["shuffle_v2"]
def conv3x3(in_channels,
out_channels,
stride=1,
padding=1,
bias=True,
groups=1):
return nn.Conv2d(in_channels,
out_channels,
kernel_size=3,
stride=stride,
padding=padding,
bias=bias,
groups=groups)
def conv1x1(in_channels, out_channels, bias=True, groups=1):
return nn.Conv2d(in_channels,
out_channels,
kernel_size=1,
stride=1,
padding=0,
bias=bias,
groups=groups)
def channel_shuffle(x, groups):
batchsize, num_channels, height, width = x.data.size()
channels_per_group = num_channels // groups
x = x.view(batchsize, groups, channels_per_group, height, width)
x = torch.transpose(x, 1, 2).contiguous()
x = x.view(batchsize, -1, height, width)
return x
def channel_split(x, splits=[24, 24]):
return torch.split(x, splits, dim=1)
class ParimaryModule(nn.Module):
def __init__(self, in_channels=3, out_channels=24):
super(ParimaryModule, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.ParimaryModule = nn.Sequential(
conv3x3(in_channels, out_channels, 2, 1, True, 1),
nn.BatchNorm2d(out_channels),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_uniform_(m.weight)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, x):
x = self.ParimaryModule(x)
return x
class FinalModule(nn.Module):
def __init__(self, in_channels=464, out_channels=1024, num_classes=1000):
super(FinalModule, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.num_classes = num_classes
self.avgpool = nn.AvgPool2d(7, stride=1)
self.fc = nn.Linear(out_channels, num_classes)
self.FinalConv = nn.Sequential(
conv1x1(in_channels, out_channels, True, 1),
nn.BatchNorm2d(out_channels), nn.ReLU())
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_uniform_(m.weight)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, x):
x = self.FinalConv(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
class ShuffleNetV2Block(nn.Module):
def __init__(self, in_channels, out_channels, stride=1, splits_left=2):
super(ShuffleNetV2Block, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.stride = stride
self.splits_left = splits_left
if stride == 2:
self.Left = nn.Sequential(
conv3x3(in_channels, in_channels, stride, 1, True,
in_channels), nn.BatchNorm2d(in_channels),
conv1x1(in_channels, out_channels // 2, True, 1),
nn.BatchNorm2d(out_channels // 2), nn.ReLU())
self.Right = nn.Sequential(
conv1x1(in_channels, in_channels, True, 1),
nn.BatchNorm2d(in_channels), nn.ReLU(),
conv3x3(in_channels, in_channels, stride, 1, True,
in_channels), nn.BatchNorm2d(in_channels),
conv1x1(in_channels, out_channels // 2, True, 1),
nn.BatchNorm2d(out_channels // 2), nn.ReLU())
elif stride == 1:
in_channels = in_channels - in_channels // splits_left
self.Right = nn.Sequential(
conv1x1(in_channels, in_channels, True, 1),
nn.BatchNorm2d(in_channels), nn.ReLU(),
conv3x3(in_channels, in_channels, stride, 1, True,
in_channels), nn.BatchNorm2d(in_channels),
conv1x1(in_channels, in_channels, True, 1),
nn.BatchNorm2d(in_channels), nn.ReLU())
else:
raise ValueError('stride must be 1 or 2')
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_uniform_(m.weight)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, x):
if self.stride == 2:
x_left, x_right = x, x
x_left = self.Left(x_left)
x_right = self.Right(x_right)
elif self.stride == 1:
x_split = channel_split(x, [
self.in_channels // self.splits_left,
self.in_channels - self.in_channels // self.splits_left
])
x_left, x_right = x_split[0], x_split[1]
x_right = self.Right(x_right)
x = torch.cat((x_left, x_right), dim=1)
x = channel_shuffle(x, 2)
return x
class ShuffleNetV2(nn.Module):
def __init__(self,
in_channels=3,
num_classes=1000,
net_scale=1.0,
stage_repeat=1,
splits_left=2):
super(ShuffleNetV2, self).__init__()
self.in_channels = in_channels
self.num_classes = num_classes
self.net_scale = net_scale
self.splits_left = splits_left
if net_scale == 0.5:
self.out_channels = [24, 48, 96, 192, 1024]
elif net_scale == 1.0:
self.out_channels = [24, 116, 232, 464, 1024]
elif net_scale == 1.5:
self.out_channels = [24, 176, 352, 704, 1024]
elif net_scale == 2.0:
self.out_channels = [24, 244, 488, 976, 2048]
else:
raise ValueError('net_scale must be 0.5,1.0,1.5 or 2.0')
self.ParimaryModule = ParimaryModule(in_channels, self.out_channels[0])
if stage_repeat == 1:
self.Stage1 = self.Stage(1, [1, 3])
self.Stage2 = self.Stage(2, [1, 7])
self.Stage3 = self.Stage(3, [1, 3])
elif stage_repeat == 2:
self.Stage1 = self.Stage(1, [1, 7])
self.Stage2 = self.Stage(2, [1, 15])
self.Stage3 = self.Stage(3, [1, 7])
self.FinalModule = FinalModule(self.out_channels[3],
self.out_channels[4], num_classes)
def Stage(self, stage=1, BlockRepeat=[1, 3]):
modules = []
if BlockRepeat[0] == 1:
modules.append(
ShuffleNetV2Block(self.out_channels[stage - 1],
self.out_channels[stage], 2,
self.splits_left))
else:
raise ValueError('stage first block must only repeat 1 time')
for i in range(BlockRepeat[1]):
modules.append(
ShuffleNetV2Block(self.out_channels[stage],
self.out_channels[stage], 1,
self.splits_left))
return nn.Sequential(*modules)
def forward(self, x):
x = self.ParimaryModule(x)
x = self.Stage1(x)
x = self.Stage2(x)
x = self.Stage3(x)
x = self.FinalModule(x)
return x
def shuffle_v2(**kwargs):
model = ShuffleNetV2(**kwargs)
return model
import torch
import torch.nn as nn
import torch.nn.functional as F
def conv3x3(in_planes, out_planes, stride=1, groups=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=1,
bias=False,
groups=groups)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(in_planes,
out_planes,
kernel_size=1,
stride=stride,
bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = conv1x1(inplanes, planes)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = conv3x3(planes, planes, stride)
self.bn2 = nn.BatchNorm2d(planes)
self.conv2g = conv3x3(planes, planes, stride, groups=32)
self.bn2g = nn.BatchNorm2d(planes)
self.conv3 = conv1x1(planes, planes * self.expansion)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.conv_fc1 = nn.Conv2d(planes, planes // 16, 1, bias=False)
self.bn_fc1 = nn.BatchNorm2d(planes // 16)
self.conv_fc2 = nn.Conv2d(planes // 16, 2 * planes, 1, bias=False)
self.D = planes
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
d1 = self.conv2(out)
d1 = self.bn2(d1)
d1 = self.relu(d1)
d2 = self.conv2g(out)
d2 = self.bn2g(d2)
d2 = self.relu(d2)
d = self.avg_pool(d1) + self.avg_pool(d2)
d = F.relu(self.bn_fc1(self.conv_fc1(d)))
d = self.conv_fc2(d)
d = torch.unsqueeze(d, 1).view(-1, 2, self.D, 1, 1)
d = F.softmax(d, 1)
d1 = d1 * d[:, 0, :, :, :].squeeze(1)
d2 = d2 * d[:, 1, :, :, :].squeeze(1)
d = d1 + d2
out = self.conv3(d)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self,
block,
layers,
num_classes=1000,
zero_init_residual=False):
super(ResNet, self).__init__()
self.inplanes = 64
self.conv1 = nn.Conv2d(3,
64,
kernel_size=7,
stride=2,
padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight,
mode='fan_out',
nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def sk_resnet18(pretrained=False, **kwargs):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
return model
def sk_resnet34(pretrained=False, **kwargs):
"""Constructs a ResNet-34 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
return model
def sk_resnet50(pretrained=False, **kwargs):
"""Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
return model
def sk_resnet101(pretrained=False, **kwargs):
"""Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
return model
def sk_resnet152(pretrained=False, **kwargs):
"""Constructs a ResNet-152 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
return model
import torch.nn as nn
__all__ = [
'VGG',
'vgg11',
'vgg11_bn',
'vgg13',
'vgg13_bn',
'vgg16',
'vgg16_bn',
'vgg19_bn',
'vgg19',
]
class VGG(nn.Module):
def __init__(self, features, num_classes=1000, init_weights=True):
super(VGG, self).__init__()
self.features = features
self.classifier = nn.Sequential(
nn.Linear(512 * 7 * 7, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, num_classes),
)
if init_weights:
self._initialize_weights()
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight,
mode='fan_out',
nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
def make_layers(cfg, batch_norm=False):
layers = []
in_channels = 3
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
return nn.Sequential(*layers)
cfg = {
'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'B':
[64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'D': [
64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M',
512, 512, 512, 'M'
],
'E': [
64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512,
512, 'M', 512, 512, 512, 512, 'M'
],
}
def vgg11(**kwargs):
model = VGG(make_layers(cfg['A']), **kwargs)
return model
def vgg11_bn(**kwargs):
model = VGG(make_layers(cfg['A'], batch_norm=True), **kwargs)
return model
def vgg13(**kwargs):
model = VGG(make_layers(cfg['B']), **kwargs)
return model
def vgg13_bn(**kwargs):
model = VGG(make_layers(cfg['B'], batch_norm=True), **kwargs)
return model
def vgg16(**kwargs):
model = VGG(make_layers(cfg['D']), **kwargs)
return model
def vgg16_bn(**kwargs):
model = VGG(make_layers(cfg['D'], batch_norm=True), **kwargs)
return model
def vgg19(**kwargs):
model = VGG(make_layers(cfg['E']), **kwargs)
return model
def vgg19_bn(**kwargs):
model = VGG(make_layers(cfg['E'], batch_norm=True), **kwargs)
return model
import os
import shutil
import argparse
import random
import re
import time
import yaml
import json
import socket
import logging
from addict import Dict
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim
from torch.backends import cudnn
import torch.distributed as dist
from torch.nn.parallel import DistributedDataParallel as DDP
import models
from utils.dataloader import build_dataloader
from utils.misc import accuracy, check_keys, AverageMeter, ProgressMeter
from utils.loss import LabelSmoothLoss
parser = argparse.ArgumentParser(description='ImageNet Training Example')
parser.add_argument('--config',
default='configs/resnet50.yaml',
type=str,
help='path to config file')
parser.add_argument('--test',
dest='test',
action='store_true',
help='evaluate model on validation set')
parser.add_argument('--output',
dest='output',
default='inception_result.json',
help='output json file to hold perf results')
parser.add_argument('--port',
default=12345,
type=int,
metavar='P',
help='master port')
parser.add_argument('--rank', default=0, type=int,
help='node rank for distributed training')
logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s')
logger = logging.getLogger()
logger_all = logging.getLogger('all')
def main():
args = parser.parse_args()
args.config = yaml.load(open(args.config, 'r'), Loader=yaml.Loader)
cfgs = Dict(args.config)
# args.rank = int(os.environ['SLURM_PROCID'])
# args.world_size = int(os.environ['SLURM_NTASKS'])
# args.local_rank = int(os.environ['SLURM_LOCALID'])
args.world_size = int(os.environ["WORLD_SIZE"])
args.local_rank = int(os.environ['LOCAL_RANK'])
# node_list = str(os.environ['SLURM_NODELIST'])
# node_parts = re.findall('[0-9]+', node_list)
# os.environ[
# 'MASTER_ADDR'] = f'{node_parts[1]}.{node_parts[2]}.{node_parts[3]}.{node_parts[4]}'
# os.environ['MASTER_PORT'] = str(args.port)
# os.environ['WORLD_SIZE'] = str(args.world_size)
# os.environ['RANK'] = str(args.rank)
dist.init_process_group(backend="nccl")
torch.cuda.set_device(args.local_rank)
if args.local_rank == 0:
logger.setLevel(logging.INFO)
else:
logger.setLevel(logging.ERROR)
logger_all.setLevel(logging.INFO)
logger_all.info("rank {} of {} jobs, in {}".format(args.local_rank,
args.world_size,
socket.gethostname()))
dist.barrier()
logger.info("config\n{}".format(
json.dumps(cfgs, indent=2, ensure_ascii=False)))
if cfgs.get('seed', None):
random.seed(cfgs.seed)
torch.manual_seed(cfgs.seed)
torch.cuda.manual_seed(cfgs.seed)
cudnn.deterministic = True
model = models.__dict__[cfgs.net.arch](**cfgs.net.kwargs)
model.cuda()
logger.info("creating model '{}'".format(cfgs.net.arch))
model = DDP(model, device_ids=[args.local_rank])
logger.info("model\n{}".format(model))
if cfgs.get('label_smooth', None):
criterion = LabelSmoothLoss(cfgs.trainer.label_smooth,
cfgs.net.kwargs.num_classes).cuda()
else:
criterion = nn.CrossEntropyLoss().cuda()
logger.info("loss\n{}".format(criterion))
optimizer = torch.optim.SGD(model.parameters(),
**cfgs.trainer.optimizer.kwargs)
logger.info("optimizer\n{}".format(optimizer))
cudnn.benchmark = True
args.start_epoch = -cfgs.trainer.lr_scheduler.get('warmup_epochs', 0)
args.max_epoch = cfgs.trainer.max_epoch
args.test_freq = cfgs.trainer.test_freq
args.log_freq = cfgs.trainer.log_freq
best_acc1 = 0.0
if cfgs.saver.resume_model:
assert os.path.isfile(
cfgs.saver.resume_model), 'Not found resume model: {}'.format(
cfgs.saver.resume_model)
checkpoint = torch.load(cfgs.saver.resume_model)
check_keys(model=model, checkpoint=checkpoint)
model.load_state_dict(checkpoint['state_dict'])
args.start_epoch = checkpoint['epoch']
best_acc1 = checkpoint['best_acc1']
optimizer.load_state_dict(checkpoint['optimizer'])
logger.info("resume training from '{}' at epoch {}".format(
cfgs.saver.resume_model, checkpoint['epoch']))
elif cfgs.saver.pretrain_model:
assert os.path.isfile(
cfgs.saver.pretrain_model), 'Not found pretrain model: {}'.format(
cfgs.saver.pretrain_model)
checkpoint = torch.load(cfgs.saver.pretrain_model)
check_keys(model=model, checkpoint=checkpoint)
model.load_state_dict(checkpoint['state_dict'])
logger.info("pretrain training from '{}'".format(
cfgs.saver.pretrain_model))
if args.local_rank == 0 and cfgs.saver.get('save_dir', None):
if not os.path.exists(cfgs.saver.save_dir):
os.makedirs(cfgs.saver.save_dir)
logger.info("create checkpoint folder {}".format(
cfgs.saver.save_dir))
# Data loading code
train_loader, train_sampler, test_loader, _ = build_dataloader(
cfgs.dataset, args.world_size)
# test mode
if args.test:
return
# choose scheduler
lr_scheduler = torch.optim.lr_scheduler.__dict__[
cfgs.trainer.lr_scheduler.type](optimizer if isinstance(
optimizer, torch.optim.Optimizer) else optimizer.optimizer,
**cfgs.trainer.lr_scheduler.kwargs,
last_epoch=args.start_epoch - 1)
monitor_writer = None
if args.local_rank == 0 and cfgs.get('monitor', None):
if cfgs.monitor.get('type', None) == 'pavi':
from pavi import SummaryWriter
if cfgs.monitor.get("_taskid", None):
monitor_writer = SummaryWriter(session_text=yaml.dump(
args.config),
**cfgs.monitor.kwargs,
taskid=cfgs.monitor._taskid)
else:
monitor_writer = SummaryWriter(session_text=yaml.dump(
args.config),
**cfgs.monitor.kwargs)
# training
args.max_epoch = 1
for epoch in range(args.start_epoch, args.max_epoch):
train_sampler.set_epoch(epoch)
# train for one epoch
avg_time = train(train_loader, model, criterion, optimizer, epoch,
args, monitor_writer)
avg_time = avg_time.avg
if (epoch + 1) % args.test_freq == 0 or epoch + 1 == args.max_epoch:
# evaluate on validation set
if args.local_rank == 0:
results = {}
if os.path.exists(args.output):
with open(args.output, 'r') as f:
try:
results = json.load(f)
except:
pass
if results.get('inceptionv3', None) is None:
results['inceptionv3'] = {}
results['inceptionv3']['perf' + str(
args.world_size
)] = cfgs.dataset.batch_size * args.world_size / avg_time
with open(args.output, 'w') as f:
json.dump(results, f)
lr_scheduler.step()
def train(train_loader, model, criterion, optimizer, epoch, args,
monitor_writer):
batch_time = AverageMeter('Time', ':.3f', -1)
data_time = AverageMeter('Data', ':.3f', 200)
losses = AverageMeter('Loss', ':.4f', 50)
top1 = AverageMeter('Acc@1', ':.2f', 50)
top5 = AverageMeter('Acc@5', ':.2f', 50)
memory = AverageMeter('Memory(MB)', ':.0f')
progress = ProgressMeter(len(train_loader),
batch_time,
data_time,
losses,
top1,
top5,
memory,
prefix="Epoch: [{}/{}]".format(
epoch + 1, args.max_epoch))
# switch to train mode
model.train()
end = time.time()
for i, (input, target) in enumerate(train_loader):
# measure data loading time
data_time.update(time.time() - end)
input = input.cuda()
target = target.cuda()
# compute output
output = model(input)
loss = criterion(output, target)
# measure accuracy and record loss
acc1, acc5 = accuracy(output, target, topk=(1, 5))
stats_all = torch.tensor([loss.item(), acc1[0].item(),
acc5[0].item()]).float().cuda()
dist.all_reduce(stats_all)
stats_all /= args.world_size
losses.update(stats_all[0].item())
top1.update(stats_all[1].item())
top5.update(stats_all[2].item())
memory.update(torch.cuda.max_memory_allocated() / 1024 / 1024)
# compute gradient and do SGD step
optimizer.zero_grad()
loss.backward()
optimizer.step()
# measure elapsed time
if i >= 3:
batch_time.update(time.time() - end)
end = time.time()
if i % args.log_freq == 0:
progress.display(i)
if args.local_rank == 0 and monitor_writer:
cur_iter = epoch * len(train_loader) + i
monitor_writer.add_scalar('Train_Loss', losses.avg, cur_iter)
monitor_writer.add_scalar('Accuracy_train_top1', top1.avg,
cur_iter)
monitor_writer.add_scalar('Accuracy_train_top5', top5.avg,
cur_iter)
return batch_time
if __name__ == '__main__':
main()
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment