Commit b6c19984 authored by dengjb's avatar dengjb
Browse files

update

parents
"""
Creates a MobileNetV2 Model as defined in:
Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen. (2018).
MobileNetV2: Inverted Residuals and Linear Bottlenecks
arXiv preprint arXiv:1801.04381.
import from https://github.com/tonylins/pytorch-mobilenet-v2
"""
import logging
import math
import torch
import torch.nn as nn
from fastreid.layers import get_norm
from fastreid.utils.checkpoint import get_missing_parameters_message, get_unexpected_parameters_message
from .build import BACKBONE_REGISTRY
logger = logging.getLogger(__name__)
def _make_divisible(v, divisor, min_value=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
:param v:
:param divisor:
:param min_value:
:return:
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
def conv_3x3_bn(inp, oup, stride, bn_norm):
return nn.Sequential(
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
get_norm(bn_norm, oup),
nn.ReLU6(inplace=True)
)
def conv_1x1_bn(inp, oup, bn_norm):
return nn.Sequential(
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
get_norm(bn_norm, oup),
nn.ReLU6(inplace=True)
)
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, bn_norm, stride, expand_ratio):
super(InvertedResidual, self).__init__()
assert stride in [1, 2]
hidden_dim = round(inp * expand_ratio)
self.identity = stride == 1 and inp == oup
if expand_ratio == 1:
self.conv = nn.Sequential(
# dw
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
get_norm(bn_norm, hidden_dim),
nn.ReLU6(inplace=True),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
get_norm(bn_norm, oup),
)
else:
self.conv = nn.Sequential(
# pw
nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
get_norm(bn_norm, hidden_dim),
nn.ReLU6(inplace=True),
# dw
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
get_norm(bn_norm, hidden_dim),
nn.ReLU6(inplace=True),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
)
def forward(self, x):
if self.identity:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV2(nn.Module):
def __init__(self, bn_norm, width_mult=1.):
super(MobileNetV2, self).__init__()
# setting of inverted residual blocks
self.cfgs = [
# t, c, n, s
[1, 16, 1, 1],
[6, 24, 2, 2],
[6, 32, 3, 2],
[6, 64, 4, 2],
[6, 96, 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]
# building first layer
input_channel = _make_divisible(32 * width_mult, 4 if width_mult == 0.1 else 8)
layers = [conv_3x3_bn(3, input_channel, 2, bn_norm)]
# building inverted residual blocks
block = InvertedResidual
for t, c, n, s in self.cfgs:
output_channel = _make_divisible(c * width_mult, 4 if width_mult == 0.1 else 8)
for i in range(n):
layers.append(block(input_channel, output_channel, bn_norm, s if i == 0 else 1, t))
input_channel = output_channel
self.features = nn.Sequential(*layers)
# building last several layers
output_channel = _make_divisible(1280 * width_mult, 4 if width_mult == 0.1 else 8) if width_mult > 1.0 else 1280
self.conv = conv_1x1_bn(input_channel, output_channel, bn_norm)
self._initialize_weights()
def forward(self, x):
x = self.features(x)
x = self.conv(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()
@BACKBONE_REGISTRY.register()
def build_mobilenetv2_backbone(cfg):
"""
Create a MobileNetV2 instance from config.
Returns:
MobileNetV2: a :class: `MobileNetV2` instance.
"""
# fmt: off
pretrain = cfg.MODEL.BACKBONE.PRETRAIN
pretrain_path = cfg.MODEL.BACKBONE.PRETRAIN_PATH
bn_norm = cfg.MODEL.BACKBONE.NORM
depth = cfg.MODEL.BACKBONE.DEPTH
# fmt: on
width_mult = {
"1.0x": 1.0,
"0.75x": 0.75,
"0.5x": 0.5,
"0.35x": 0.35,
'0.25x': 0.25,
'0.1x': 0.1,
}[depth]
model = MobileNetV2(bn_norm, width_mult)
if pretrain:
try:
state_dict = torch.load(pretrain_path, map_location=torch.device('cpu'))
logger.info(f"Loading pretrained model from {pretrain_path}")
except FileNotFoundError as e:
logger.info(f'{pretrain_path} is not found! Please check this path.')
raise e
except KeyError as e:
logger.info("State dict keys error! Please check the state dict.")
raise e
incompatible = model.load_state_dict(state_dict, strict=False)
if incompatible.missing_keys:
logger.info(
get_missing_parameters_message(incompatible.missing_keys)
)
if incompatible.unexpected_keys:
logger.info(
get_unexpected_parameters_message(incompatible.unexpected_keys)
)
return model
from functools import partial
from typing import Any, Callable, Dict, List, Optional, Sequence
import torch
from torch import nn, Tensor
from torch.nn import functional as F
#The style of importing Considers compatibility for the diversity of torchvision versions
try:
from torchvision.models.utils import load_state_dict_from_url
except ImportError:
try:
from torch.hub import load_state_dict_from_url
except ImportError:
from torch.utils.model_zoo import load_url as load_state_dict_from_url
from fastreid.layers import get_norm
from .build import BACKBONE_REGISTRY
from .mobilenet import _make_divisible
# https://github.com/pytorch/vision/blob/master/torchvision/models/mobilenetv3.py
model_urls = {
"Large": "https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth",
"Small": "https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth",
}
def conv_1x1_bn(inp, oup, bn_norm):
return nn.Sequential(
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
get_norm(bn_norm, oup),
nn.ReLU6(inplace=True)
)
class ConvBNActivation(nn.Sequential):
def __init__(
self,
in_planes: int,
out_planes: int,
kernel_size: int = 3,
stride: int = 1,
groups: int = 1,
bn_norm=None,
activation_layer: Optional[Callable[..., nn.Module]] = None,
dilation: int = 1,
) -> None:
padding = (kernel_size - 1) // 2 * dilation
if activation_layer is None:
activation_layer = nn.ReLU6
super(ConvBNActivation, self).__init__(
nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, dilation=dilation, groups=groups,
bias=False),
get_norm(bn_norm, out_planes),
activation_layer(inplace=True)
)
self.out_channels = out_planes
class SqueezeExcitation(nn.Module):
def __init__(self, input_channels: int, squeeze_factor: int = 4):
super().__init__()
squeeze_channels = _make_divisible(input_channels // squeeze_factor, 8)
self.fc1 = nn.Conv2d(input_channels, squeeze_channels, 1)
self.relu = nn.ReLU(inplace=True)
self.fc2 = nn.Conv2d(squeeze_channels, input_channels, 1)
def _scale(self, input: Tensor, inplace: bool) -> Tensor:
scale = F.adaptive_avg_pool2d(input, 1)
scale = self.fc1(scale)
scale = self.relu(scale)
scale = self.fc2(scale)
return F.hardsigmoid(scale, inplace=inplace)
def forward(self, input: Tensor) -> Tensor:
scale = self._scale(input, True)
return scale * input
class InvertedResidualConfig:
def __init__(self, input_channels: int, kernel: int, expanded_channels: int, out_channels: int, use_se: bool,
activation: str, stride: int, dilation: int, width_mult: float):
self.input_channels = self.adjust_channels(input_channels, width_mult)
self.kernel = kernel
self.expanded_channels = self.adjust_channels(expanded_channels, width_mult)
self.out_channels = self.adjust_channels(out_channels, width_mult)
self.use_se = use_se
self.use_hs = activation == "HS"
self.stride = stride
self.dilation = dilation
@staticmethod
def adjust_channels(channels: int, width_mult: float):
return _make_divisible(channels * width_mult, 8)
class InvertedResidual(nn.Module):
def __init__(self, cnf: InvertedResidualConfig, bn_norm,
se_layer: Callable[..., nn.Module] = SqueezeExcitation):
super().__init__()
if not (1 <= cnf.stride <= 2):
raise ValueError('illegal stride value')
self.use_res_connect = cnf.stride == 1 and cnf.input_channels == cnf.out_channels
layers: List[nn.Module] = []
activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU
# expand
if cnf.expanded_channels != cnf.input_channels:
layers.append(ConvBNActivation(cnf.input_channels, cnf.expanded_channels, kernel_size=1,
bn_norm=bn_norm, activation_layer=activation_layer))
# depthwise
stride = 1 if cnf.dilation > 1 else cnf.stride
layers.append(ConvBNActivation(cnf.expanded_channels, cnf.expanded_channels, kernel_size=cnf.kernel,
stride=stride, dilation=cnf.dilation, groups=cnf.expanded_channels,
bn_norm=bn_norm, activation_layer=activation_layer))
if cnf.use_se:
layers.append(se_layer(cnf.expanded_channels))
# project
layers.append(ConvBNActivation(cnf.expanded_channels, cnf.out_channels, kernel_size=1, bn_norm=bn_norm,
activation_layer=nn.Identity))
self.block = nn.Sequential(*layers)
self.out_channels = cnf.out_channels
self._is_cn = cnf.stride > 1
def forward(self, input: Tensor) -> Tensor:
result = self.block(input)
if self.use_res_connect:
result += input
return result
class MobileNetV3(nn.Module):
def __init__(
self,
bn_norm,
inverted_residual_setting: List[InvertedResidualConfig],
last_channel: int,
block: Optional[Callable[..., nn.Module]] = None,
) -> None:
"""
MobileNet V3 main class
Args:
inverted_residual_setting (List[InvertedResidualConfig]): Network structure
last_channel (int): The number of channels on the penultimate layer
block (Optional[Callable[..., nn.Module]]): Module specifying inverted residual building block for mobilenet
"""
super().__init__()
if not inverted_residual_setting:
raise ValueError("The inverted_residual_setting should not be empty")
elif not (isinstance(inverted_residual_setting, Sequence) and
all([isinstance(s, InvertedResidualConfig) for s in inverted_residual_setting])):
raise TypeError("The inverted_residual_setting should be List[InvertedResidualConfig]")
if block is None:
block = InvertedResidual
layers: List[nn.Module] = []
# building first layer
firstconv_output_channels = inverted_residual_setting[0].input_channels
layers.append(ConvBNActivation(3, firstconv_output_channels, kernel_size=3, stride=2, bn_norm=bn_norm,
activation_layer=nn.Hardswish))
# building inverted residual blocks
for cnf in inverted_residual_setting:
layers.append(block(cnf, bn_norm))
# building last several layers
lastconv_input_channels = inverted_residual_setting[-1].out_channels
lastconv_output_channels = 6 * lastconv_input_channels
layers.append(ConvBNActivation(lastconv_input_channels, lastconv_output_channels, kernel_size=1,
bn_norm=bn_norm, activation_layer=nn.Hardswish))
self.features = nn.Sequential(*layers)
self.conv = conv_1x1_bn(lastconv_output_channels, last_channel, bn_norm)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.zeros_(m.bias)
def _forward_impl(self, x: Tensor) -> Tensor:
x = self.features(x)
x = self.conv(x)
return x
def forward(self, x: Tensor) -> Tensor:
return self._forward_impl(x)
def _mobilenet_v3_conf(arch: str, params: Dict[str, Any]):
# non-public config parameters
reduce_divider = 2 if params.pop('_reduced_tail', False) else 1
dilation = 2 if params.pop('_dilated', False) else 1
width_mult = params.pop('_width_mult', 1.0)
bneck_conf = partial(InvertedResidualConfig, width_mult=width_mult)
adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_mult=width_mult)
if arch == "Large":
inverted_residual_setting = [
bneck_conf(16, 3, 16, 16, False, "RE", 1, 1),
bneck_conf(16, 3, 64, 24, False, "RE", 2, 1), # C1
bneck_conf(24, 3, 72, 24, False, "RE", 1, 1),
bneck_conf(24, 5, 72, 40, True, "RE", 2, 1), # C2
bneck_conf(40, 5, 120, 40, True, "RE", 1, 1),
bneck_conf(40, 5, 120, 40, True, "RE", 1, 1),
bneck_conf(40, 3, 240, 80, False, "HS", 2, 1), # C3
bneck_conf(80, 3, 200, 80, False, "HS", 1, 1),
bneck_conf(80, 3, 184, 80, False, "HS", 1, 1),
bneck_conf(80, 3, 184, 80, False, "HS", 1, 1),
bneck_conf(80, 3, 480, 112, True, "HS", 1, 1),
bneck_conf(112, 3, 672, 112, True, "HS", 1, 1),
bneck_conf(112, 5, 672, 160 // reduce_divider, True, "HS", 2, dilation), # C4
bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation),
bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation),
]
last_channel = adjust_channels(1280 // reduce_divider) # C5
elif arch == "Small":
inverted_residual_setting = [
bneck_conf(16, 3, 16, 16, True, "RE", 2, 1), # C1
bneck_conf(16, 3, 72, 24, False, "RE", 2, 1), # C2
bneck_conf(24, 3, 88, 24, False, "RE", 1, 1),
bneck_conf(24, 5, 96, 40, True, "HS", 2, 1), # C3
bneck_conf(40, 5, 240, 40, True, "HS", 1, 1),
bneck_conf(40, 5, 240, 40, True, "HS", 1, 1),
bneck_conf(40, 5, 120, 48, True, "HS", 1, 1),
bneck_conf(48, 5, 144, 48, True, "HS", 1, 1),
bneck_conf(48, 5, 288, 96 // reduce_divider, True, "HS", 2, dilation), # C4
bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation),
bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation),
]
last_channel = adjust_channels(1024 // reduce_divider) # C5
else:
raise ValueError("Unsupported model type {}".format(arch))
return inverted_residual_setting, last_channel
def _mobilenet_v3_model(
bn_norm,
depth: str,
pretrained: bool,
pretrain_path: str,
**kwargs: Any
):
inverted_residual_setting, last_channel = _mobilenet_v3_conf(depth, kwargs)
model = MobileNetV3(bn_norm, inverted_residual_setting, last_channel, **kwargs)
if pretrained:
if pretrain_path:
state_dict = torch.load(pretrain_path)
else:
if model_urls.get(depth, None) is None:
raise ValueError("No checkpoint is available for model type {}".format(depth))
state_dict = load_state_dict_from_url(model_urls[depth], progress=True)
model.load_state_dict(state_dict, strict=False)
return model
@BACKBONE_REGISTRY.register()
def build_mobilenetv3_backbone(cfg):
pretrain = cfg.MODEL.BACKBONE.PRETRAIN
pretrain_path = cfg.MODEL.BACKBONE.PRETRAIN_PATH
bn_norm = cfg.MODEL.BACKBONE.NORM
depth = cfg.MODEL.BACKBONE.DEPTH
model = _mobilenet_v3_model(bn_norm, depth, pretrain, pretrain_path)
return model
# encoding: utf-8
"""
@author: xingyu liao
@contact: sherlockliao01@gmail.com
"""
# based on:
# https://github.com/KaiyangZhou/deep-person-reid/blob/master/torchreid/models/osnet.py
import logging
import torch
from torch import nn
from fastreid.layers import get_norm
from fastreid.utils import comm
from fastreid.utils.checkpoint import get_missing_parameters_message, get_unexpected_parameters_message
from .build import BACKBONE_REGISTRY
logger = logging.getLogger(__name__)
model_urls = {
'osnet_x1_0':
'https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY',
'osnet_x0_75':
'https://drive.google.com/uc?id=1uwA9fElHOk3ZogwbeY5GkLI6QPTX70Hq',
'osnet_x0_5':
'https://drive.google.com/uc?id=16DGLbZukvVYgINws8u8deSaOqjybZ83i',
'osnet_x0_25':
'https://drive.google.com/uc?id=1rb8UN5ZzPKRc_xvtHlyDh-cSz88YX9hs',
'osnet_ibn_x1_0':
'https://drive.google.com/uc?id=1sr90V6irlYYDd4_4ISU2iruoRG8J__6l'
}
##########
# Basic layers
##########
class ConvLayer(nn.Module):
"""Convolution layer (conv + bn + relu)."""
def __init__(
self,
in_channels,
out_channels,
kernel_size,
bn_norm,
stride=1,
padding=0,
groups=1,
IN=False
):
super(ConvLayer, self).__init__()
self.conv = nn.Conv2d(
in_channels,
out_channels,
kernel_size,
stride=stride,
padding=padding,
bias=False,
groups=groups
)
if IN:
self.bn = nn.InstanceNorm2d(out_channels, affine=True)
else:
self.bn = get_norm(bn_norm, out_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class Conv1x1(nn.Module):
"""1x1 convolution + bn + relu."""
def __init__(self, in_channels, out_channels, bn_norm, stride=1, groups=1):
super(Conv1x1, self).__init__()
self.conv = nn.Conv2d(
in_channels,
out_channels,
1,
stride=stride,
padding=0,
bias=False,
groups=groups
)
self.bn = get_norm(bn_norm, out_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class Conv1x1Linear(nn.Module):
"""1x1 convolution + bn (w/o non-linearity)."""
def __init__(self, in_channels, out_channels, bn_norm, stride=1):
super(Conv1x1Linear, self).__init__()
self.conv = nn.Conv2d(
in_channels, out_channels, 1, stride=stride, padding=0, bias=False
)
self.bn = get_norm(bn_norm, out_channels)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
return x
class Conv3x3(nn.Module):
"""3x3 convolution + bn + relu."""
def __init__(self, in_channels, out_channels, bn_norm, stride=1, groups=1):
super(Conv3x3, self).__init__()
self.conv = nn.Conv2d(
in_channels,
out_channels,
3,
stride=stride,
padding=1,
bias=False,
groups=groups
)
self.bn = get_norm(bn_norm, out_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class LightConv3x3(nn.Module):
"""Lightweight 3x3 convolution.
1x1 (linear) + dw 3x3 (nonlinear).
"""
def __init__(self, in_channels, out_channels, bn_norm):
super(LightConv3x3, self).__init__()
self.conv1 = nn.Conv2d(
in_channels, out_channels, 1, stride=1, padding=0, bias=False
)
self.conv2 = nn.Conv2d(
out_channels,
out_channels,
3,
stride=1,
padding=1,
bias=False,
groups=out_channels
)
self.bn = get_norm(bn_norm, out_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.bn(x)
x = self.relu(x)
return x
##########
# Building blocks for omni-scale feature learning
##########
class ChannelGate(nn.Module):
"""A mini-network that generates channel-wise gates conditioned on input tensor."""
def __init__(
self,
in_channels,
num_gates=None,
return_gates=False,
gate_activation='sigmoid',
reduction=16,
layer_norm=False
):
super(ChannelGate, self).__init__()
if num_gates is None: num_gates = in_channels
self.return_gates = return_gates
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
self.fc1 = nn.Conv2d(
in_channels,
in_channels // reduction,
kernel_size=1,
bias=True,
padding=0
)
self.norm1 = None
if layer_norm: self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
self.relu = nn.ReLU(inplace=True)
self.fc2 = nn.Conv2d(
in_channels // reduction,
num_gates,
kernel_size=1,
bias=True,
padding=0
)
if gate_activation == 'sigmoid':
self.gate_activation = nn.Sigmoid()
elif gate_activation == 'relu':
self.gate_activation = nn.ReLU(inplace=True)
elif gate_activation == 'linear':
self.gate_activation = nn.Identity()
else:
raise RuntimeError(
"Unknown gate activation: {}".format(gate_activation)
)
def forward(self, x):
input = x
x = self.global_avgpool(x)
x = self.fc1(x)
if self.norm1 is not None: x = self.norm1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.gate_activation(x)
if self.return_gates: return x
return input * x
class OSBlock(nn.Module):
"""Omni-scale feature learning block."""
def __init__(
self,
in_channels,
out_channels,
bn_norm,
IN=False,
bottleneck_reduction=4,
**kwargs
):
super(OSBlock, self).__init__()
mid_channels = out_channels // bottleneck_reduction
self.conv1 = Conv1x1(in_channels, mid_channels, bn_norm)
self.conv2a = LightConv3x3(mid_channels, mid_channels, bn_norm)
self.conv2b = nn.Sequential(
LightConv3x3(mid_channels, mid_channels, bn_norm),
LightConv3x3(mid_channels, mid_channels, bn_norm),
)
self.conv2c = nn.Sequential(
LightConv3x3(mid_channels, mid_channels, bn_norm),
LightConv3x3(mid_channels, mid_channels, bn_norm),
LightConv3x3(mid_channels, mid_channels, bn_norm),
)
self.conv2d = nn.Sequential(
LightConv3x3(mid_channels, mid_channels, bn_norm),
LightConv3x3(mid_channels, mid_channels, bn_norm),
LightConv3x3(mid_channels, mid_channels, bn_norm),
LightConv3x3(mid_channels, mid_channels, bn_norm),
)
self.gate = ChannelGate(mid_channels)
self.conv3 = Conv1x1Linear(mid_channels, out_channels, bn_norm)
self.downsample = None
if in_channels != out_channels:
self.downsample = Conv1x1Linear(in_channels, out_channels, bn_norm)
self.IN = None
if IN: self.IN = nn.InstanceNorm2d(out_channels, affine=True)
self.relu = nn.ReLU(True)
def forward(self, x):
identity = x
x1 = self.conv1(x)
x2a = self.conv2a(x1)
x2b = self.conv2b(x1)
x2c = self.conv2c(x1)
x2d = self.conv2d(x1)
x2 = self.gate(x2a) + self.gate(x2b) + self.gate(x2c) + self.gate(x2d)
x3 = self.conv3(x2)
if self.downsample is not None:
identity = self.downsample(identity)
out = x3 + identity
if self.IN is not None:
out = self.IN(out)
return self.relu(out)
##########
# Network architecture
##########
class OSNet(nn.Module):
"""Omni-Scale Network.
Reference:
- Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019.
- Zhou et al. Learning Generalisable Omni-Scale Representations
for Person Re-Identification. arXiv preprint, 2019.
"""
def __init__(
self,
blocks,
layers,
channels,
bn_norm,
IN=False,
**kwargs
):
super(OSNet, self).__init__()
num_blocks = len(blocks)
assert num_blocks == len(layers)
assert num_blocks == len(channels) - 1
# convolutional backbone
self.conv1 = ConvLayer(3, channels[0], 7, bn_norm, stride=2, padding=3, IN=IN)
self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
self.conv2 = self._make_layer(
blocks[0],
layers[0],
channels[0],
channels[1],
bn_norm,
reduce_spatial_size=True,
IN=IN
)
self.conv3 = self._make_layer(
blocks[1],
layers[1],
channels[1],
channels[2],
bn_norm,
reduce_spatial_size=True
)
self.conv4 = self._make_layer(
blocks[2],
layers[2],
channels[2],
channels[3],
bn_norm,
reduce_spatial_size=False
)
self.conv5 = Conv1x1(channels[3], channels[3], bn_norm)
self._init_params()
def _make_layer(
self,
block,
layer,
in_channels,
out_channels,
bn_norm,
reduce_spatial_size,
IN=False
):
layers = []
layers.append(block(in_channels, out_channels, bn_norm, IN=IN))
for i in range(1, layer):
layers.append(block(out_channels, out_channels, bn_norm, IN=IN))
if reduce_spatial_size:
layers.append(
nn.Sequential(
Conv1x1(out_channels, out_channels, bn_norm),
nn.AvgPool2d(2, stride=2),
)
)
return nn.Sequential(*layers)
def _init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='relu'
)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm1d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def forward(self, x):
x = self.conv1(x)
x = self.maxpool(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
return x
def init_pretrained_weights(model, key=''):
"""Initializes model with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
import os
import errno
import gdown
from collections import OrderedDict
import warnings
import logging
logger = logging.getLogger(__name__)
def _get_torch_home():
ENV_TORCH_HOME = 'TORCH_HOME'
ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
DEFAULT_CACHE_DIR = '~/.cache'
torch_home = os.path.expanduser(
os.getenv(
ENV_TORCH_HOME,
os.path.join(
os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch'
)
)
)
return torch_home
torch_home = _get_torch_home()
model_dir = os.path.join(torch_home, 'checkpoints')
try:
os.makedirs(model_dir)
except OSError as e:
if e.errno == errno.EEXIST:
# Directory already exists, ignore.
pass
else:
# Unexpected OSError, re-raise.
raise
filename = key + '_imagenet.pth'
cached_file = os.path.join(model_dir, filename)
if not os.path.exists(cached_file):
logger.info(f"Pretrain model don't exist, downloading from {model_urls[key]}")
if comm.is_main_process():
gdown.download(model_urls[key], cached_file, quiet=False)
comm.synchronize()
state_dict = torch.load(cached_file, map_location=torch.device('cpu'))
model_dict = model.state_dict()
new_state_dict = OrderedDict()
matched_layers, discarded_layers = [], []
for k, v in state_dict.items():
if k.startswith('module.'):
k = k[7:] # discard module.
if k in model_dict and model_dict[k].size() == v.size():
new_state_dict[k] = v
matched_layers.append(k)
else:
discarded_layers.append(k)
model_dict.update(new_state_dict)
return model_dict
@BACKBONE_REGISTRY.register()
def build_osnet_backbone(cfg):
"""
Create a OSNet instance from config.
Returns:
OSNet: a :class:`OSNet` instance
"""
# fmt: off
pretrain = cfg.MODEL.BACKBONE.PRETRAIN
pretrain_path = cfg.MODEL.BACKBONE.PRETRAIN_PATH
with_ibn = cfg.MODEL.BACKBONE.WITH_IBN
bn_norm = cfg.MODEL.BACKBONE.NORM
depth = cfg.MODEL.BACKBONE.DEPTH
# fmt: on
num_blocks_per_stage = [2, 2, 2]
num_channels_per_stage = {
"x1_0": [64, 256, 384, 512],
"x0_75": [48, 192, 288, 384],
"x0_5": [32, 128, 192, 256],
"x0_25": [16, 64, 96, 128]}[depth]
model = OSNet([OSBlock, OSBlock, OSBlock], num_blocks_per_stage, num_channels_per_stage,
bn_norm, IN=with_ibn)
if pretrain:
# Load pretrain path if specifically
if pretrain_path:
try:
state_dict = torch.load(pretrain_path, map_location=torch.device('cpu'))
logger.info(f"Loading pretrained model from {pretrain_path}")
except FileNotFoundError as e:
logger.info(f'{pretrain_path} is not found! Please check this path.')
raise e
except KeyError as e:
logger.info("State dict keys error! Please check the state dict.")
raise e
else:
if with_ibn:
pretrain_key = "osnet_ibn_" + depth
else:
pretrain_key = "osnet_" + depth
state_dict = init_pretrained_weights(model, pretrain_key)
incompatible = model.load_state_dict(state_dict, strict=False)
if incompatible.missing_keys:
logger.info(
get_missing_parameters_message(incompatible.missing_keys)
)
if incompatible.unexpected_keys:
logger.info(
get_unexpected_parameters_message(incompatible.unexpected_keys)
)
return model
from .regnet import build_regnet_backbone
from .effnet import build_effnet_backbone
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Configuration file (powered by YACS)."""
import argparse
import os
import sys
from yacs.config import CfgNode as CfgNode
# Global config object
_C = CfgNode()
# Example usage:
# from core.config import cfg
cfg = _C
# ------------------------------------------------------------------------------------ #
# Model options
# ------------------------------------------------------------------------------------ #
_C.MODEL = CfgNode()
# Model type
_C.MODEL.TYPE = ""
# Number of weight layers
_C.MODEL.DEPTH = 0
# Number of classes
_C.MODEL.NUM_CLASSES = 10
# Loss function (see pycls/models/loss.py for options)
_C.MODEL.LOSS_FUN = "cross_entropy"
# ------------------------------------------------------------------------------------ #
# ResNet options
# ------------------------------------------------------------------------------------ #
_C.RESNET = CfgNode()
# Transformation function (see pycls/models/resnet.py for options)
_C.RESNET.TRANS_FUN = "basic_transform"
# Number of groups to use (1 -> ResNet; > 1 -> ResNeXt)
_C.RESNET.NUM_GROUPS = 1
# Width of each group (64 -> ResNet; 4 -> ResNeXt)
_C.RESNET.WIDTH_PER_GROUP = 64
# Apply stride to 1x1 conv (True -> MSRA; False -> fb.torch)
_C.RESNET.STRIDE_1X1 = True
# ------------------------------------------------------------------------------------ #
# AnyNet options
# ------------------------------------------------------------------------------------ #
_C.ANYNET = CfgNode()
# Stem type
_C.ANYNET.STEM_TYPE = "simple_stem_in"
# Stem width
_C.ANYNET.STEM_W = 32
# Block type
_C.ANYNET.BLOCK_TYPE = "res_bottleneck_block"
# Depth for each stage (number of blocks in the stage)
_C.ANYNET.DEPTHS = []
# Width for each stage (width of each block in the stage)
_C.ANYNET.WIDTHS = []
# Strides for each stage (applies to the first block of each stage)
_C.ANYNET.STRIDES = []
# Bottleneck multipliers for each stage (applies to bottleneck block)
_C.ANYNET.BOT_MULS = []
# Group widths for each stage (applies to bottleneck block)
_C.ANYNET.GROUP_WS = []
# Whether SE is enabled for res_bottleneck_block
_C.ANYNET.SE_ON = False
# SE ratio
_C.ANYNET.SE_R = 0.25
# ------------------------------------------------------------------------------------ #
# RegNet options
# ------------------------------------------------------------------------------------ #
_C.REGNET = CfgNode()
# Stem type
_C.REGNET.STEM_TYPE = "simple_stem_in"
# Stem width
_C.REGNET.STEM_W = 32
# Block type
_C.REGNET.BLOCK_TYPE = "res_bottleneck_block"
# Stride of each stage
_C.REGNET.STRIDE = 2
# Squeeze-and-Excitation (RegNetY)
_C.REGNET.SE_ON = False
_C.REGNET.SE_R = 0.25
# Depth
_C.REGNET.DEPTH = 10
# Initial width
_C.REGNET.W0 = 32
# Slope
_C.REGNET.WA = 5.0
# Quantization
_C.REGNET.WM = 2.5
# Group width
_C.REGNET.GROUP_W = 16
# Bottleneck multiplier (bm = 1 / b from the paper)
_C.REGNET.BOT_MUL = 1.0
# ------------------------------------------------------------------------------------ #
# EfficientNet options
# ------------------------------------------------------------------------------------ #
_C.EN = CfgNode()
# Stem width
_C.EN.STEM_W = 32
# Depth for each stage (number of blocks in the stage)
_C.EN.DEPTHS = []
# Width for each stage (width of each block in the stage)
_C.EN.WIDTHS = []
# Expansion ratios for MBConv blocks in each stage
_C.EN.EXP_RATIOS = []
# Squeeze-and-Excitation (SE) ratio
_C.EN.SE_R = 0.25
# Strides for each stage (applies to the first block of each stage)
_C.EN.STRIDES = []
# Kernel sizes for each stage
_C.EN.KERNELS = []
# Head width
_C.EN.HEAD_W = 1280
# Drop connect ratio
_C.EN.DC_RATIO = 0.0
# Dropout ratio
_C.EN.DROPOUT_RATIO = 0.0
# ------------------------------------------------------------------------------------ #
# Batch norm options
# ------------------------------------------------------------------------------------ #
_C.BN = CfgNode()
# BN epsilon
_C.BN.EPS = 1e-5
# BN momentum (BN momentum in PyTorch = 1 - BN momentum in Caffe2)
_C.BN.MOM = 0.1
# Precise BN stats
_C.BN.USE_PRECISE_STATS = True
_C.BN.NUM_SAMPLES_PRECISE = 8192
# Initialize the gamma of the final BN of each block to zero
_C.BN.ZERO_INIT_FINAL_GAMMA = False
# Use a different weight decay for BN layers
_C.BN.USE_CUSTOM_WEIGHT_DECAY = False
_C.BN.CUSTOM_WEIGHT_DECAY = 0.0
# ------------------------------------------------------------------------------------ #
# Optimizer options
# ------------------------------------------------------------------------------------ #
_C.OPTIM = CfgNode()
# Base learning rate
_C.OPTIM.BASE_LR = 0.1
# Learning rate policy select from {'cos', 'exp', 'steps'}
_C.OPTIM.LR_POLICY = "cos"
# Exponential decay factor
_C.OPTIM.GAMMA = 0.1
# Steps for 'steps' policy (in epochs)
_C.OPTIM.STEPS = []
# Learning rate multiplier for 'steps' policy
_C.OPTIM.LR_MULT = 0.1
# Maximal number of epochs
_C.OPTIM.MAX_EPOCH = 200
# Momentum
_C.OPTIM.MOMENTUM = 0.9
# Momentum dampening
_C.OPTIM.DAMPENING = 0.0
# Nesterov momentum
_C.OPTIM.NESTEROV = True
# L2 regularization
_C.OPTIM.WEIGHT_DECAY = 5e-4
# Start the warm up from OPTIM.BASE_LR * OPTIM.WARMUP_FACTOR
_C.OPTIM.WARMUP_FACTOR = 0.1
# Gradually warm up the OPTIM.BASE_LR over this number of epochs
_C.OPTIM.WARMUP_ITERS = 0
# ------------------------------------------------------------------------------------ #
# Training options
# ------------------------------------------------------------------------------------ #
_C.TRAIN = CfgNode()
# Dataset and split
_C.TRAIN.DATASET = ""
_C.TRAIN.SPLIT = "train"
# Total mini-batch size
_C.TRAIN.BATCH_SIZE = 128
# Image size
_C.TRAIN.IM_SIZE = 224
# Evaluate model on test data every eval period epochs
_C.TRAIN.EVAL_PERIOD = 1
# Save model checkpoint every checkpoint period epochs
_C.TRAIN.CHECKPOINT_PERIOD = 1
# Resume training from the latest checkpoint in the output directory
_C.TRAIN.AUTO_RESUME = True
# Weights to start training from
_C.TRAIN.WEIGHTS = ""
# ------------------------------------------------------------------------------------ #
# Testing options
# ------------------------------------------------------------------------------------ #
_C.TEST = CfgNode()
# Dataset and split
_C.TEST.DATASET = ""
_C.TEST.SPLIT = "val"
# Total mini-batch size
_C.TEST.BATCH_SIZE = 200
# Image size
_C.TEST.IM_SIZE = 256
# Weights to use for testing
_C.TEST.WEIGHTS = ""
# ------------------------------------------------------------------------------------ #
# Common train/test data loader options
# ------------------------------------------------------------------------------------ #
_C.DATA_LOADER = CfgNode()
# Number of data loader workers per process
_C.DATA_LOADER.NUM_WORKERS = 8
# Load data to pinned host memory
_C.DATA_LOADER.PIN_MEMORY = True
# ------------------------------------------------------------------------------------ #
# Memory options
# ------------------------------------------------------------------------------------ #
_C.MEM = CfgNode()
# Perform ReLU inplace
_C.MEM.RELU_INPLACE = True
# ------------------------------------------------------------------------------------ #
# CUDNN options
# ------------------------------------------------------------------------------------ #
_C.CUDNN = CfgNode()
# Perform benchmarking to select the fastest CUDNN algorithms to use
# Note that this may increase the memory usage and will likely not result
# in overall speedups when variable size inputs are used (e.g. COCO training)
_C.CUDNN.BENCHMARK = True
# ------------------------------------------------------------------------------------ #
# Precise timing options
# ------------------------------------------------------------------------------------ #
_C.PREC_TIME = CfgNode()
# Number of iterations to warm up the caches
_C.PREC_TIME.WARMUP_ITER = 3
# Number of iterations to compute avg time
_C.PREC_TIME.NUM_ITER = 30
# ------------------------------------------------------------------------------------ #
# Misc options
# ------------------------------------------------------------------------------------ #
# Number of GPUs to use (applies to both training and testing)
_C.NUM_GPUS = 1
# Output directory
_C.OUT_DIR = "/tmp"
# Config destination (in OUT_DIR)
_C.CFG_DEST = "config.yaml"
# Note that non-determinism may still be present due to non-deterministic
# operator implementations in GPU operator libraries
_C.RNG_SEED = 1
# Log destination ('stdout' or 'file')
_C.LOG_DEST = "stdout"
# Log period in iters
_C.LOG_PERIOD = 10
# Distributed backend
_C.DIST_BACKEND = "nccl"
# Hostname and port range for multi-process groups (actual port selected randomly)
_C.HOST = "localhost"
_C.PORT_RANGE = [10000, 65000]
# Models weights referred to by URL are downloaded to this local cache
_C.DOWNLOAD_CACHE = "/tmp/pycls-download-cache"
# ------------------------------------------------------------------------------------ #
# Deprecated keys
# ------------------------------------------------------------------------------------ #
_C.register_deprecated_key("PREC_TIME.BATCH_SIZE")
_C.register_deprecated_key("PREC_TIME.ENABLED")
_C.register_deprecated_key("PORT")
def assert_and_infer_cfg():
"""Checks config values invariants."""
err_str = "The first lr step must start at 0"
assert not _C.OPTIM.STEPS or _C.OPTIM.STEPS[0] == 0, err_str
data_splits = ["train", "val", "test"]
err_str = "Data split '{}' not supported"
assert _C.TRAIN.SPLIT in data_splits, err_str.format(_C.TRAIN.SPLIT)
assert _C.TEST.SPLIT in data_splits, err_str.format(_C.TEST.SPLIT)
err_str = "Mini-batch size should be a multiple of NUM_GPUS."
assert _C.TRAIN.BATCH_SIZE % _C.NUM_GPUS == 0, err_str
assert _C.TEST.BATCH_SIZE % _C.NUM_GPUS == 0, err_str
err_str = "Log destination '{}' not supported"
assert _C.LOG_DEST in ["stdout", "file"], err_str.format(_C.LOG_DEST)
def dump_cfg():
"""Dumps the config to the output directory."""
cfg_file = os.path.join(_C.OUT_DIR, _C.CFG_DEST)
with open(cfg_file, "w") as f:
_C.dump(stream=f)
def load_cfg(out_dir, cfg_dest="config.yaml"):
"""Loads config from specified output directory."""
cfg_file = os.path.join(out_dir, cfg_dest)
_C.merge_from_file(cfg_file)
def load_cfg_fom_args(description="Config file options."):
"""Load config from command line arguments and set any specified options."""
parser = argparse.ArgumentParser(description=description)
help_s = "Config file location"
parser.add_argument("--cfg", dest="cfg_file", help=help_s, required=True, type=str)
help_s = "See pycls/core/config.py for all options"
parser.add_argument("opts", help=help_s, default=None, nargs=argparse.REMAINDER)
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
_C.merge_from_file(args.cfg_file)
_C.merge_from_list(args.opts)
# !/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""EfficientNet models."""
import logging
import torch
import torch.nn as nn
from fastreid.layers import *
from fastreid.modeling.backbones.build import BACKBONE_REGISTRY
from fastreid.utils import comm
from fastreid.utils.checkpoint import get_missing_parameters_message, get_unexpected_parameters_message
from .config import cfg as effnet_cfg
from .regnet import drop_connect, init_weights
logger = logging.getLogger(__name__)
model_urls = {
'b0': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/161305613/EN-B0_dds_8gpu.pyth',
'b1': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/161304979/EN-B1_dds_8gpu.pyth',
'b2': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/161305015/EN-B2_dds_8gpu.pyth',
'b3': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/161304979/EN-B3_dds_8gpu.pyth',
'b4': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/161305098/EN-B4_dds_8gpu.pyth',
'b5': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/161304979/EN-B5_dds_8gpu.pyth',
'b6': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/161304979/EN-B6_dds_8gpu.pyth',
'b7': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/161304979/EN-B7_dds_8gpu.pyth',
}
class EffHead(nn.Module):
"""EfficientNet head: 1x1, BN, Swish, AvgPool, Dropout, FC."""
def __init__(self, w_in, w_out, bn_norm):
super(EffHead, self).__init__()
self.conv = nn.Conv2d(w_in, w_out, 1, stride=1, padding=0, bias=False)
self.conv_bn = get_norm(bn_norm, w_out)
self.conv_swish = Swish()
def forward(self, x):
x = self.conv_swish(self.conv_bn(self.conv(x)))
return x
class Swish(nn.Module):
"""Swish activation function: x * sigmoid(x)."""
def __init__(self):
super(Swish, self).__init__()
def forward(self, x):
return x * torch.sigmoid(x)
class SE(nn.Module):
"""Squeeze-and-Excitation (SE) block w/ Swish: AvgPool, FC, Swish, FC, Sigmoid."""
def __init__(self, w_in, w_se):
super(SE, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
self.f_ex = nn.Sequential(
nn.Conv2d(w_in, w_se, 1, bias=True),
Swish(),
nn.Conv2d(w_se, w_in, 1, bias=True),
nn.Sigmoid(),
)
def forward(self, x):
return x * self.f_ex(self.avg_pool(x))
class MBConv(nn.Module):
"""Mobile inverted bottleneck block w/ SE (MBConv)."""
def __init__(self, w_in, exp_r, kernel, stride, se_r, w_out, bn_norm):
# expansion, 3x3 dwise, BN, Swish, SE, 1x1, BN, skip_connection
super(MBConv, self).__init__()
self.exp = None
w_exp = int(w_in * exp_r)
if w_exp != w_in:
self.exp = nn.Conv2d(w_in, w_exp, 1, stride=1, padding=0, bias=False)
self.exp_bn = get_norm(bn_norm, w_exp)
self.exp_swish = Swish()
dwise_args = {"groups": w_exp, "padding": (kernel - 1) // 2, "bias": False}
self.dwise = nn.Conv2d(w_exp, w_exp, kernel, stride=stride, **dwise_args)
self.dwise_bn = get_norm(bn_norm, w_exp)
self.dwise_swish = Swish()
self.se = SE(w_exp, int(w_in * se_r))
self.lin_proj = nn.Conv2d(w_exp, w_out, 1, stride=1, padding=0, bias=False)
self.lin_proj_bn = get_norm(bn_norm, w_out)
# Skip connection if in and out shapes are the same (MN-V2 style)
self.has_skip = stride == 1 and w_in == w_out
def forward(self, x):
f_x = x
if self.exp:
f_x = self.exp_swish(self.exp_bn(self.exp(f_x)))
f_x = self.dwise_swish(self.dwise_bn(self.dwise(f_x)))
f_x = self.se(f_x)
f_x = self.lin_proj_bn(self.lin_proj(f_x))
if self.has_skip:
if self.training and effnet_cfg.EN.DC_RATIO > 0.0:
f_x = drop_connect(f_x, effnet_cfg.EN.DC_RATIO)
f_x = x + f_x
return f_x
class EffStage(nn.Module):
"""EfficientNet stage."""
def __init__(self, w_in, exp_r, kernel, stride, se_r, w_out, d, bn_norm):
super(EffStage, self).__init__()
for i in range(d):
b_stride = stride if i == 0 else 1
b_w_in = w_in if i == 0 else w_out
name = "b{}".format(i + 1)
self.add_module(name, MBConv(b_w_in, exp_r, kernel, b_stride, se_r, w_out, bn_norm))
def forward(self, x):
for block in self.children():
x = block(x)
return x
class StemIN(nn.Module):
"""EfficientNet stem for ImageNet: 3x3, BN, Swish."""
def __init__(self, w_in, w_out, bn_norm):
super(StemIN, self).__init__()
self.conv = nn.Conv2d(w_in, w_out, 3, stride=2, padding=1, bias=False)
self.bn = get_norm(bn_norm, w_out)
self.swish = Swish()
def forward(self, x):
for layer in self.children():
x = layer(x)
return x
class EffNet(nn.Module):
"""EfficientNet model."""
@staticmethod
def get_args():
return {
"stem_w": effnet_cfg.EN.STEM_W,
"ds": effnet_cfg.EN.DEPTHS,
"ws": effnet_cfg.EN.WIDTHS,
"exp_rs": effnet_cfg.EN.EXP_RATIOS,
"se_r": effnet_cfg.EN.SE_R,
"ss": effnet_cfg.EN.STRIDES,
"ks": effnet_cfg.EN.KERNELS,
"head_w": effnet_cfg.EN.HEAD_W,
}
def __init__(self, last_stride, bn_norm, **kwargs):
super(EffNet, self).__init__()
kwargs = self.get_args() if not kwargs else kwargs
self._construct(**kwargs, last_stride=last_stride, bn_norm=bn_norm)
self.apply(init_weights)
def _construct(self, stem_w, ds, ws, exp_rs, se_r, ss, ks, head_w, last_stride, bn_norm):
stage_params = list(zip(ds, ws, exp_rs, ss, ks))
self.stem = StemIN(3, stem_w, bn_norm)
prev_w = stem_w
for i, (d, w, exp_r, stride, kernel) in enumerate(stage_params):
name = "s{}".format(i + 1)
if i == 5: stride = last_stride
self.add_module(name, EffStage(prev_w, exp_r, kernel, stride, se_r, w, d, bn_norm))
prev_w = w
self.head = EffHead(prev_w, head_w, bn_norm)
def forward(self, x):
for module in self.children():
x = module(x)
return x
def init_pretrained_weights(key):
"""Initializes model with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
import os
import errno
import gdown
def _get_torch_home():
ENV_TORCH_HOME = 'TORCH_HOME'
ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
DEFAULT_CACHE_DIR = '~/.cache'
torch_home = os.path.expanduser(
os.getenv(
ENV_TORCH_HOME,
os.path.join(
os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch'
)
)
)
return torch_home
torch_home = _get_torch_home()
model_dir = os.path.join(torch_home, 'checkpoints')
try:
os.makedirs(model_dir)
except OSError as e:
if e.errno == errno.EEXIST:
# Directory already exists, ignore.
pass
else:
# Unexpected OSError, re-raise.
raise
filename = model_urls[key].split('/')[-1]
cached_file = os.path.join(model_dir, filename)
if not os.path.exists(cached_file):
if comm.is_main_process():
gdown.download(model_urls[key], cached_file, quiet=False)
comm.synchronize()
logger.info(f"Loading pretrained model from {cached_file}")
state_dict = torch.load(cached_file, map_location=torch.device("cpu"))["model_state"]
return state_dict
@BACKBONE_REGISTRY.register()
def build_effnet_backbone(cfg):
# fmt: off
pretrain = cfg.MODEL.BACKBONE.PRETRAIN
pretrain_path = cfg.MODEL.BACKBONE.PRETRAIN_PATH
last_stride = cfg.MODEL.BACKBONE.LAST_STRIDE
bn_norm = cfg.MODEL.BACKBONE.NORM
depth = cfg.MODEL.BACKBONE.DEPTH
# fmt: on
cfg_files = {
'b0': 'fastreid/modeling/backbones/regnet/effnet/EN-B0_dds_8gpu.yaml',
'b1': 'fastreid/modeling/backbones/regnet/effnet/EN-B1_dds_8gpu.yaml',
'b2': 'fastreid/modeling/backbones/regnet/effnet/EN-B2_dds_8gpu.yaml',
'b3': 'fastreid/modeling/backbones/regnet/effnet/EN-B3_dds_8gpu.yaml',
'b4': 'fastreid/modeling/backbones/regnet/effnet/EN-B4_dds_8gpu.yaml',
'b5': 'fastreid/modeling/backbones/regnet/effnet/EN-B5_dds_8gpu.yaml',
}[depth]
effnet_cfg.merge_from_file(cfg_files)
model = EffNet(last_stride, bn_norm)
if pretrain:
# Load pretrain path if specifically
if pretrain_path:
try:
state_dict = torch.load(pretrain_path, map_location=torch.device('cpu'))["model_state"]
logger.info(f"Loading pretrained model from {pretrain_path}")
except FileNotFoundError as e:
logger.info(f'{pretrain_path} is not found! Please check this path.')
raise e
except KeyError as e:
logger.info("State dict keys error! Please check the state dict.")
raise e
else:
key = depth
state_dict = init_pretrained_weights(key)
incompatible = model.load_state_dict(state_dict, strict=False)
if incompatible.missing_keys:
logger.info(
get_missing_parameters_message(incompatible.missing_keys)
)
if incompatible.unexpected_keys:
logger.info(
get_unexpected_parameters_message(incompatible.unexpected_keys)
)
return model
MODEL:
TYPE: effnet
NUM_CLASSES: 1000
EN:
STEM_W: 32
STRIDES: [1, 2, 2, 2, 1, 2, 1]
DEPTHS: [1, 2, 2, 3, 3, 4, 1]
WIDTHS: [16, 24, 40, 80, 112, 192, 320]
EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6]
KERNELS: [3, 3, 5, 3, 5, 5, 3]
HEAD_W: 1280
OPTIM:
LR_POLICY: cos
BASE_LR: 0.4
MAX_EPOCH: 100
MOMENTUM: 0.9
WEIGHT_DECAY: 1e-5
TRAIN:
DATASET: imagenet
IM_SIZE: 224
BATCH_SIZE: 256
TEST:
DATASET: imagenet
IM_SIZE: 256
BATCH_SIZE: 200
NUM_GPUS: 8
OUT_DIR: .
MODEL:
TYPE: effnet
NUM_CLASSES: 1000
EN:
STEM_W: 32
STRIDES: [1, 2, 2, 2, 1, 2, 1]
DEPTHS: [2, 3, 3, 4, 4, 5, 2]
WIDTHS: [16, 24, 40, 80, 112, 192, 320]
EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6]
KERNELS: [3, 3, 5, 3, 5, 5, 3]
HEAD_W: 1280
OPTIM:
LR_POLICY: cos
BASE_LR: 0.4
MAX_EPOCH: 100
MOMENTUM: 0.9
WEIGHT_DECAY: 1e-5
TRAIN:
DATASET: imagenet
IM_SIZE: 240
BATCH_SIZE: 256
TEST:
DATASET: imagenet
IM_SIZE: 274
BATCH_SIZE: 200
NUM_GPUS: 8
OUT_DIR: .
MODEL:
TYPE: effnet
NUM_CLASSES: 1000
EN:
STEM_W: 32
STRIDES: [1, 2, 2, 2, 1, 2, 1]
DEPTHS: [2, 3, 3, 4, 4, 5, 2]
WIDTHS: [16, 24, 48, 88, 120, 208, 352]
EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6]
KERNELS: [3, 3, 5, 3, 5, 5, 3]
HEAD_W: 1408
OPTIM:
LR_POLICY: cos
BASE_LR: 0.4
MAX_EPOCH: 100
MOMENTUM: 0.9
WEIGHT_DECAY: 1e-5
TRAIN:
DATASET: imagenet
IM_SIZE: 260
BATCH_SIZE: 256
TEST:
DATASET: imagenet
IM_SIZE: 298
BATCH_SIZE: 200
NUM_GPUS: 8
OUT_DIR: .
MODEL:
TYPE: effnet
NUM_CLASSES: 1000
EN:
STEM_W: 40
STRIDES: [1, 2, 2, 2, 1, 2, 1]
DEPTHS: [2, 3, 3, 5, 5, 6, 2]
WIDTHS: [24, 32, 48, 96, 136, 232, 384]
EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6]
KERNELS: [3, 3, 5, 3, 5, 5, 3]
HEAD_W: 1536
OPTIM:
LR_POLICY: cos
BASE_LR: 0.4
MAX_EPOCH: 100
MOMENTUM: 0.9
WEIGHT_DECAY: 1e-5
TRAIN:
DATASET: imagenet
IM_SIZE: 300
BATCH_SIZE: 256
TEST:
DATASET: imagenet
IM_SIZE: 342
BATCH_SIZE: 200
NUM_GPUS: 8
OUT_DIR: .
MODEL:
TYPE: effnet
NUM_CLASSES: 1000
EN:
STEM_W: 48
STRIDES: [1, 2, 2, 2, 1, 2, 1]
DEPTHS: [2, 4, 4, 6, 6, 8, 2]
WIDTHS: [24, 32, 56, 112, 160, 272, 448]
EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6]
KERNELS: [3, 3, 5, 3, 5, 5, 3]
HEAD_W: 1792
OPTIM:
LR_POLICY: cos
BASE_LR: 0.2
MAX_EPOCH: 100
MOMENTUM: 0.9
WEIGHT_DECAY: 1e-5
TRAIN:
DATASET: imagenet
IM_SIZE: 380
BATCH_SIZE: 128
TEST:
DATASET: imagenet
IM_SIZE: 434
BATCH_SIZE: 104
NUM_GPUS: 8
OUT_DIR: .
MODEL:
TYPE: effnet
NUM_CLASSES: 1000
EN:
STEM_W: 48
STRIDES: [1, 2, 2, 2, 1, 2, 1]
DEPTHS: [3, 5, 5, 7, 7, 9, 3]
WIDTHS: [24, 40, 64, 128, 176, 304, 512]
EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6]
KERNELS: [3, 3, 5, 3, 5, 5, 3]
HEAD_W: 2048
OPTIM:
LR_POLICY: cos
BASE_LR: 0.1
MAX_EPOCH: 100
MOMENTUM: 0.9
WEIGHT_DECAY: 1e-5
TRAIN:
DATASET: imagenet
IM_SIZE: 456
BATCH_SIZE: 64
TEST:
DATASET: imagenet
IM_SIZE: 522
BATCH_SIZE: 48
NUM_GPUS: 8
OUT_DIR: .
import logging
import math
import numpy as np
import torch
import torch.nn as nn
from fastreid.layers import get_norm
from fastreid.utils import comm
from fastreid.utils.checkpoint import get_missing_parameters_message, get_unexpected_parameters_message
from .config import cfg as regnet_cfg
from ..build import BACKBONE_REGISTRY
logger = logging.getLogger(__name__)
model_urls = {
'800x': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/160905981/RegNetX-200MF_dds_8gpu.pyth',
'800y': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906567/RegNetY-800MF_dds_8gpu.pyth',
'1600x': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/160990626/RegNetX-1.6GF_dds_8gpu.pyth',
'1600y': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906681/RegNetY-1.6GF_dds_8gpu.pyth',
'3200x': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906139/RegNetX-3.2GF_dds_8gpu.pyth',
'3200y': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906834/RegNetY-3.2GF_dds_8gpu.pyth',
'4000x': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906383/RegNetX-4.0GF_dds_8gpu.pyth',
'4000y': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906838/RegNetY-4.0GF_dds_8gpu.pyth',
'6400x': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/161116590/RegNetX-6.4GF_dds_8gpu.pyth',
'6400y': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/160907112/RegNetY-6.4GF_dds_8gpu.pyth',
}
def init_weights(m):
"""Performs ResNet-style weight initialization."""
if isinstance(m, nn.Conv2d):
# Note that there is no bias due to BN
fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(mean=0.0, std=math.sqrt(2.0 / fan_out))
elif isinstance(m, nn.BatchNorm2d):
zero_init_gamma = (
hasattr(m, "final_bn") and m.final_bn and regnet_cfg.BN.ZERO_INIT_FINAL_GAMMA
)
m.weight.data.fill_(0.0 if zero_init_gamma else 1.0)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.weight.data.normal_(mean=0.0, std=0.01)
m.bias.data.zero_()
def get_stem_fun(stem_type):
"""Retrives the stem function by name."""
stem_funs = {
"res_stem_cifar": ResStemCifar,
"res_stem_in": ResStemIN,
"simple_stem_in": SimpleStemIN,
}
assert stem_type in stem_funs.keys(), "Stem type '{}' not supported".format(
stem_type
)
return stem_funs[stem_type]
def get_block_fun(block_type):
"""Retrieves the block function by name."""
block_funs = {
"vanilla_block": VanillaBlock,
"res_basic_block": ResBasicBlock,
"res_bottleneck_block": ResBottleneckBlock,
}
assert block_type in block_funs.keys(), "Block type '{}' not supported".format(
block_type
)
return block_funs[block_type]
def drop_connect(x, drop_ratio):
"""Drop connect (adapted from DARTS)."""
keep_ratio = 1.0 - drop_ratio
mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device)
mask.bernoulli_(keep_ratio)
x.div_(keep_ratio)
x.mul_(mask)
return x
class AnyHead(nn.Module):
"""AnyNet head."""
def __init__(self, w_in, nc):
super(AnyHead, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(w_in, nc, bias=True)
def forward(self, x):
x = self.avg_pool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
class VanillaBlock(nn.Module):
"""Vanilla block: [3x3 conv, BN, Relu] x2"""
def __init__(self, w_in, w_out, stride, bn_norm, bm=None, gw=None, se_r=None):
assert (
bm is None and gw is None and se_r is None
), "Vanilla block does not support bm, gw, and se_r options"
super(VanillaBlock, self).__init__()
self.construct(w_in, w_out, stride, bn_norm)
def construct(self, w_in, w_out, stride, bn_norm):
# 3x3, BN, ReLU
self.a = nn.Conv2d(
w_in, w_out, kernel_size=3, stride=stride, padding=1, bias=False
)
self.a_bn = get_norm(bn_norm, w_out)
self.a_relu = nn.ReLU(inplace=regnet_cfg.MEM.RELU_INPLACE)
# 3x3, BN, ReLU
self.b = nn.Conv2d(w_out, w_out, kernel_size=3, stride=1, padding=1, bias=False)
self.b_bn = get_norm(bn_norm, w_out)
self.b_relu = nn.ReLU(inplace=regnet_cfg.MEM.RELU_INPLACE)
def forward(self, x):
for layer in self.children():
x = layer(x)
return x
class BasicTransform(nn.Module):
"""Basic transformation: [3x3 conv, BN, Relu] x2"""
def __init__(self, w_in, w_out, stride, bn_norm):
super(BasicTransform, self).__init__()
self.construct(w_in, w_out, stride, bn_norm)
def construct(self, w_in, w_out, stride, bn_norm):
# 3x3, BN, ReLU
self.a = nn.Conv2d(
w_in, w_out, kernel_size=3, stride=stride, padding=1, bias=False
)
self.a_bn = get_norm(bn_norm, w_out)
self.a_relu = nn.ReLU(inplace=regnet_cfg.MEM.RELU_INPLACE)
# 3x3, BN
self.b = nn.Conv2d(w_out, w_out, kernel_size=3, stride=1, padding=1, bias=False)
self.b_bn = get_norm(bn_norm, w_out)
self.b_bn.final_bn = True
def forward(self, x):
for layer in self.children():
x = layer(x)
return x
class ResBasicBlock(nn.Module):
"""Residual basic block: x + F(x), F = basic transform"""
def __init__(self, w_in, w_out, stride, bn_norm, bm=None, gw=None, se_r=None):
assert (
bm is None and gw is None and se_r is None
), "Basic transform does not support bm, gw, and se_r options"
super(ResBasicBlock, self).__init__()
self.construct(w_in, w_out, stride, bn_norm)
def _add_skip_proj(self, w_in, w_out, stride, bn_norm):
self.proj = nn.Conv2d(
w_in, w_out, kernel_size=1, stride=stride, padding=0, bias=False
)
self.bn = get_norm(bn_norm, w_out)
def construct(self, w_in, w_out, stride, bn_norm):
# Use skip connection with projection if shape changes
self.proj_block = (w_in != w_out) or (stride != 1)
if self.proj_block:
self._add_skip_proj(w_in, w_out, stride, bn_norm)
self.f = BasicTransform(w_in, w_out, stride, bn_norm)
self.relu = nn.ReLU(regnet_cfg.MEM.RELU_INPLACE)
def forward(self, x):
if self.proj_block:
x = self.bn(self.proj(x)) + self.f(x)
else:
x = x + self.f(x)
x = self.relu(x)
return x
class SE(nn.Module):
"""Squeeze-and-Excitation (SE) block"""
def __init__(self, w_in, w_se):
super(SE, self).__init__()
self.construct(w_in, w_se)
def construct(self, w_in, w_se):
# AvgPool
self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
# FC, Activation, FC, Sigmoid
self.f_ex = nn.Sequential(
nn.Conv2d(w_in, w_se, kernel_size=1, bias=True),
nn.ReLU(inplace=regnet_cfg.MEM.RELU_INPLACE),
nn.Conv2d(w_se, w_in, kernel_size=1, bias=True),
nn.Sigmoid(),
)
def forward(self, x):
return x * self.f_ex(self.avg_pool(x))
class BottleneckTransform(nn.Module):
"""Bottlenect transformation: 1x1, 3x3, 1x1"""
def __init__(self, w_in, w_out, stride, bn_norm, bm, gw, se_r):
super(BottleneckTransform, self).__init__()
self.construct(w_in, w_out, stride, bn_norm, bm, gw, se_r)
def construct(self, w_in, w_out, stride, bn_norm, bm, gw, se_r):
# Compute the bottleneck width
w_b = int(round(w_out * bm))
# Compute the number of groups
num_gs = w_b // gw
# 1x1, BN, ReLU
self.a = nn.Conv2d(w_in, w_b, kernel_size=1, stride=1, padding=0, bias=False)
self.a_bn = get_norm(bn_norm, w_b)
self.a_relu = nn.ReLU(inplace=regnet_cfg.MEM.RELU_INPLACE)
# 3x3, BN, ReLU
self.b = nn.Conv2d(
w_b, w_b, kernel_size=3, stride=stride, padding=1, groups=num_gs, bias=False
)
self.b_bn = get_norm(bn_norm, w_b)
self.b_relu = nn.ReLU(inplace=regnet_cfg.MEM.RELU_INPLACE)
# Squeeze-and-Excitation (SE)
if se_r:
w_se = int(round(w_in * se_r))
self.se = SE(w_b, w_se)
# 1x1, BN
self.c = nn.Conv2d(w_b, w_out, kernel_size=1, stride=1, padding=0, bias=False)
self.c_bn = get_norm(bn_norm, w_out)
self.c_bn.final_bn = True
def forward(self, x):
for layer in self.children():
x = layer(x)
return x
class ResBottleneckBlock(nn.Module):
"""Residual bottleneck block: x + F(x), F = bottleneck transform"""
def __init__(self, w_in, w_out, stride, bn_norm, bm=1.0, gw=1, se_r=None):
super(ResBottleneckBlock, self).__init__()
self.construct(w_in, w_out, stride, bn_norm, bm, gw, se_r)
def _add_skip_proj(self, w_in, w_out, stride, bn_norm):
self.proj = nn.Conv2d(
w_in, w_out, kernel_size=1, stride=stride, padding=0, bias=False
)
self.bn = get_norm(bn_norm, w_out)
def construct(self, w_in, w_out, stride, bn_norm, bm, gw, se_r):
# Use skip connection with projection if shape changes
self.proj_block = (w_in != w_out) or (stride != 1)
if self.proj_block:
self._add_skip_proj(w_in, w_out, stride, bn_norm)
self.f = BottleneckTransform(w_in, w_out, stride, bn_norm, bm, gw, se_r)
self.relu = nn.ReLU(regnet_cfg.MEM.RELU_INPLACE)
def forward(self, x):
if self.proj_block:
x = self.bn(self.proj(x)) + self.f(x)
else:
x = x + self.f(x)
x = self.relu(x)
return x
class ResStemCifar(nn.Module):
"""ResNet stem for CIFAR."""
def __init__(self, w_in, w_out, bn_norm):
super(ResStemCifar, self).__init__()
self.construct(w_in, w_out, bn_norm)
def construct(self, w_in, w_out, bn_norm):
# 3x3, BN, ReLU
self.conv = nn.Conv2d(
w_in, w_out, kernel_size=3, stride=1, padding=1, bias=False
)
self.bn = get_norm(bn_norm, w_out)
self.relu = nn.ReLU(regnet_cfg.MEM.RELU_INPLACE)
def forward(self, x):
for layer in self.children():
x = layer(x)
return x
class ResStemIN(nn.Module):
"""ResNet stem for ImageNet."""
def __init__(self, w_in, w_out, bn_norm):
super(ResStemIN, self).__init__()
self.construct(w_in, w_out, bn_norm)
def construct(self, w_in, w_out, bn_norm):
# 7x7, BN, ReLU, maxpool
self.conv = nn.Conv2d(
w_in, w_out, kernel_size=7, stride=2, padding=3, bias=False
)
self.bn = get_norm(bn_norm, w_out)
self.relu = nn.ReLU(regnet_cfg.MEM.RELU_INPLACE)
self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
def forward(self, x):
for layer in self.children():
x = layer(x)
return x
class SimpleStemIN(nn.Module):
"""Simple stem for ImageNet."""
def __init__(self, in_w, out_w, bn_norm):
super(SimpleStemIN, self).__init__()
self.construct(in_w, out_w, bn_norm)
def construct(self, in_w, out_w, bn_norm):
# 3x3, BN, ReLU
self.conv = nn.Conv2d(
in_w, out_w, kernel_size=3, stride=2, padding=1, bias=False
)
self.bn = get_norm(bn_norm, out_w)
self.relu = nn.ReLU(regnet_cfg.MEM.RELU_INPLACE)
def forward(self, x):
for layer in self.children():
x = layer(x)
return x
class AnyStage(nn.Module):
"""AnyNet stage (sequence of blocks w/ the same output shape)."""
def __init__(self, w_in, w_out, stride, bn_norm, d, block_fun, bm, gw, se_r):
super(AnyStage, self).__init__()
self.construct(w_in, w_out, stride, bn_norm, d, block_fun, bm, gw, se_r)
def construct(self, w_in, w_out, stride, bn_norm, d, block_fun, bm, gw, se_r):
# Construct the blocks
for i in range(d):
# Stride and w_in apply to the first block of the stage
b_stride = stride if i == 0 else 1
b_w_in = w_in if i == 0 else w_out
# Construct the block
self.add_module(
"b{}".format(i + 1), block_fun(b_w_in, w_out, b_stride, bn_norm, bm, gw, se_r)
)
def forward(self, x):
for block in self.children():
x = block(x)
return x
class AnyNet(nn.Module):
"""AnyNet model."""
def __init__(self, **kwargs):
super(AnyNet, self).__init__()
if kwargs:
self.construct(
stem_type=kwargs["stem_type"],
stem_w=kwargs["stem_w"],
block_type=kwargs["block_type"],
ds=kwargs["ds"],
ws=kwargs["ws"],
ss=kwargs["ss"],
bn_norm=kwargs["bn_norm"],
bms=kwargs["bms"],
gws=kwargs["gws"],
se_r=kwargs["se_r"],
)
else:
self.construct(
stem_type=regnet_cfg.ANYNET.STEM_TYPE,
stem_w=regnet_cfg.ANYNET.STEM_W,
block_type=regnet_cfg.ANYNET.BLOCK_TYPE,
ds=regnet_cfg.ANYNET.DEPTHS,
ws=regnet_cfg.ANYNET.WIDTHS,
ss=regnet_cfg.ANYNET.STRIDES,
bn_norm=regnet_cfg.ANYNET.BN_NORM,
bms=regnet_cfg.ANYNET.BOT_MULS,
gws=regnet_cfg.ANYNET.GROUP_WS,
se_r=regnet_cfg.ANYNET.SE_R if regnet_cfg.ANYNET.SE_ON else None,
)
self.apply(init_weights)
def construct(self, stem_type, stem_w, block_type, ds, ws, ss, bn_norm, bms, gws, se_r):
# Generate dummy bot muls and gs for models that do not use them
bms = bms if bms else [1.0 for _d in ds]
gws = gws if gws else [1 for _d in ds]
# Group params by stage
stage_params = list(zip(ds, ws, ss, bms, gws))
# Construct the stem
stem_fun = get_stem_fun(stem_type)
self.stem = stem_fun(3, stem_w, bn_norm)
# Construct the stages
block_fun = get_block_fun(block_type)
prev_w = stem_w
for i, (d, w, s, bm, gw) in enumerate(stage_params):
self.add_module(
"s{}".format(i + 1), AnyStage(prev_w, w, s, bn_norm, d, block_fun, bm, gw, se_r)
)
prev_w = w
# Construct the head
self.in_planes = prev_w
# self.head = AnyHead(w_in=prev_w, nc=nc)
def forward(self, x):
for module in self.children():
x = module(x)
return x
def quantize_float(f, q):
"""Converts a float to closest non-zero int divisible by q."""
return int(round(f / q) * q)
def adjust_ws_gs_comp(ws, bms, gs):
"""Adjusts the compatibility of widths and groups."""
ws_bot = [int(w * b) for w, b in zip(ws, bms)]
gs = [min(g, w_bot) for g, w_bot in zip(gs, ws_bot)]
ws_bot = [quantize_float(w_bot, g) for w_bot, g in zip(ws_bot, gs)]
ws = [int(w_bot / b) for w_bot, b in zip(ws_bot, bms)]
return ws, gs
def get_stages_from_blocks(ws, rs):
"""Gets ws/ds of network at each stage from per block values."""
ts_temp = zip(ws + [0], [0] + ws, rs + [0], [0] + rs)
ts = [w != wp or r != rp for w, wp, r, rp in ts_temp]
s_ws = [w for w, t in zip(ws, ts[:-1]) if t]
s_ds = np.diff([d for d, t in zip(range(len(ts)), ts) if t]).tolist()
return s_ws, s_ds
def generate_regnet(w_a, w_0, w_m, d, q=8):
"""Generates per block ws from RegNet parameters."""
assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0
ws_cont = np.arange(d) * w_a + w_0
ks = np.round(np.log(ws_cont / w_0) / np.log(w_m))
ws = w_0 * np.power(w_m, ks)
ws = np.round(np.divide(ws, q)) * q
num_stages, max_stage = len(np.unique(ws)), ks.max() + 1
ws, ws_cont = ws.astype(int).tolist(), ws_cont.tolist()
return ws, num_stages, max_stage, ws_cont
class RegNet(AnyNet):
"""RegNet model."""
def __init__(self, last_stride, bn_norm):
# Generate RegNet ws per block
b_ws, num_s, _, _ = generate_regnet(
regnet_cfg.REGNET.WA, regnet_cfg.REGNET.W0, regnet_cfg.REGNET.WM, regnet_cfg.REGNET.DEPTH
)
# Convert to per stage format
ws, ds = get_stages_from_blocks(b_ws, b_ws)
# Generate group widths and bot muls
gws = [regnet_cfg.REGNET.GROUP_W for _ in range(num_s)]
bms = [regnet_cfg.REGNET.BOT_MUL for _ in range(num_s)]
# Adjust the compatibility of ws and gws
ws, gws = adjust_ws_gs_comp(ws, bms, gws)
# Use the same stride for each stage
ss = [regnet_cfg.REGNET.STRIDE for _ in range(num_s)]
ss[-1] = last_stride
# Use SE for RegNetY
se_r = regnet_cfg.REGNET.SE_R if regnet_cfg.REGNET.SE_ON else None
# Construct the model
kwargs = {
"stem_type": regnet_cfg.REGNET.STEM_TYPE,
"stem_w": regnet_cfg.REGNET.STEM_W,
"block_type": regnet_cfg.REGNET.BLOCK_TYPE,
"ss": ss,
"ds": ds,
"ws": ws,
"bn_norm": bn_norm,
"bms": bms,
"gws": gws,
"se_r": se_r,
}
super(RegNet, self).__init__(**kwargs)
def init_pretrained_weights(key):
"""Initializes model with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
import os
import errno
import gdown
def _get_torch_home():
ENV_TORCH_HOME = 'TORCH_HOME'
ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
DEFAULT_CACHE_DIR = '~/.cache'
torch_home = os.path.expanduser(
os.getenv(
ENV_TORCH_HOME,
os.path.join(
os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch'
)
)
)
return torch_home
torch_home = _get_torch_home()
model_dir = os.path.join(torch_home, 'checkpoints')
try:
os.makedirs(model_dir)
except OSError as e:
if e.errno == errno.EEXIST:
# Directory already exists, ignore.
pass
else:
# Unexpected OSError, re-raise.
raise
filename = model_urls[key].split('/')[-1]
cached_file = os.path.join(model_dir, filename)
if not os.path.exists(cached_file):
if comm.is_main_process():
gdown.download(model_urls[key], cached_file, quiet=False)
comm.synchronize()
logger.info(f"Loading pretrained model from {cached_file}")
state_dict = torch.load(cached_file, map_location=torch.device('cpu'))['model_state']
return state_dict
@BACKBONE_REGISTRY.register()
def build_regnet_backbone(cfg):
# fmt: off
pretrain = cfg.MODEL.BACKBONE.PRETRAIN
pretrain_path = cfg.MODEL.BACKBONE.PRETRAIN_PATH
last_stride = cfg.MODEL.BACKBONE.LAST_STRIDE
bn_norm = cfg.MODEL.BACKBONE.NORM
depth = cfg.MODEL.BACKBONE.DEPTH
# fmt: on
cfg_files = {
'200x': 'fastreid/modeling/backbones/regnet/regnetx/RegNetX-200MF_dds_8gpu.yaml',
'200y': 'fastreid/modeling/backbones/regnet/regnety/RegNetY-200MF_dds_8gpu.yaml',
'400x': 'fastreid/modeling/backbones/regnet/regnetx/RegNetX-400MF_dds_8gpu.yaml',
'400y': 'fastreid/modeling/backbones/regnet/regnety/RegNetY-400MF_dds_8gpu.yaml',
'800x': 'fastreid/modeling/backbones/regnet/regnetx/RegNetX-800MF_dds_8gpu.yaml',
'800y': 'fastreid/modeling/backbones/regnet/regnety/RegNetY-800MF_dds_8gpu.yaml',
'1600x': 'fastreid/modeling/backbones/regnet/regnetx/RegNetX-1.6GF_dds_8gpu.yaml',
'1600y': 'fastreid/modeling/backbones/regnet/regnety/RegNetY-1.6GF_dds_8gpu.yaml',
'3200x': 'fastreid/modeling/backbones/regnet/regnetx/RegNetX-3.2GF_dds_8gpu.yaml',
'3200y': 'fastreid/modeling/backbones/regnet/regnety/RegNetY-3.2GF_dds_8gpu.yaml',
'4000x': 'fastreid/modeling/backbones/regnet/regnetx/RegNetX-4.0GF_dds_8gpu.yaml',
'4000y': 'fastreid/modeling/backbones/regnet/regnety/RegNetY-4.0GF_dds_8gpu.yaml',
'6400x': 'fastreid/modeling/backbones/regnet/regnetx/RegNetX-6.4GF_dds_8gpu.yaml',
'6400y': 'fastreid/modeling/backbones/regnet/regnety/RegNetY-6.4GF_dds_8gpu.yaml',
}[depth]
regnet_cfg.merge_from_file(cfg_files)
model = RegNet(last_stride, bn_norm)
if pretrain:
# Load pretrain path if specifically
if pretrain_path:
try:
state_dict = torch.load(pretrain_path, map_location=torch.device('cpu'))
logger.info(f"Loading pretrained model from {pretrain_path}")
except FileNotFoundError as e:
logger.info(f'{pretrain_path} is not found! Please check this path.')
raise e
except KeyError as e:
logger.info("State dict keys error! Please check the state dict.")
raise e
else:
key = depth
state_dict = init_pretrained_weights(key)
incompatible = model.load_state_dict(state_dict, strict=False)
if incompatible.missing_keys:
logger.info(
get_missing_parameters_message(incompatible.missing_keys)
)
if incompatible.unexpected_keys:
logger.info(
get_unexpected_parameters_message(incompatible.unexpected_keys)
)
return model
MODEL:
TYPE: regnet
NUM_CLASSES: 1000
REGNET:
DEPTH: 18
W0: 80
WA: 34.01
WM: 2.25
GROUP_W: 24
OPTIM:
LR_POLICY: cos
BASE_LR: 0.8
MAX_EPOCH: 100
MOMENTUM: 0.9
WEIGHT_DECAY: 5e-5
WARMUP_ITERS: 5
TRAIN:
DATASET: imagenet
IM_SIZE: 224
BATCH_SIZE: 1024
TEST:
DATASET: imagenet
IM_SIZE: 256
BATCH_SIZE: 800
NUM_GPUS: 8
OUT_DIR: .
MODEL:
TYPE: regnet
NUM_CLASSES: 1000
REGNET:
DEPTH: 19
W0: 168
WA: 73.36
WM: 2.37
GROUP_W: 112
OPTIM:
LR_POLICY: cos
BASE_LR: 0.4
MAX_EPOCH: 100
MOMENTUM: 0.9
WEIGHT_DECAY: 5e-5
WARMUP_ITERS: 5
TRAIN:
DATASET: imagenet
IM_SIZE: 224
BATCH_SIZE: 512
TEST:
DATASET: imagenet
IM_SIZE: 256
BATCH_SIZE: 400
NUM_GPUS: 8
OUT_DIR: .
MODEL:
TYPE: regnet
NUM_CLASSES: 1000
REGNET:
DEPTH: 22
W0: 216
WA: 55.59
WM: 2.1
GROUP_W: 128
OPTIM:
LR_POLICY: cos
BASE_LR: 0.4
MAX_EPOCH: 100
MOMENTUM: 0.9
WEIGHT_DECAY: 5e-5
WARMUP_ITERS: 5
TRAIN:
DATASET: imagenet
IM_SIZE: 224
BATCH_SIZE: 512
TEST:
DATASET: imagenet
IM_SIZE: 256
BATCH_SIZE: 400
NUM_GPUS: 8
OUT_DIR: .
MODEL:
TYPE: regnet
NUM_CLASSES: 1000
REGNET:
DEPTH: 13
W0: 24
WA: 36.44
WM: 2.49
GROUP_W: 8
OPTIM:
LR_POLICY: cos
BASE_LR: 0.8
MAX_EPOCH: 100
MOMENTUM: 0.9
WEIGHT_DECAY: 5e-5
WARMUP_ITERS: 5
TRAIN:
DATASET: imagenet
IM_SIZE: 224
BATCH_SIZE: 1024
TEST:
DATASET: imagenet
IM_SIZE: 256
BATCH_SIZE: 800
NUM_GPUS: 8
OUT_DIR: .
MODEL:
TYPE: regnet
NUM_CLASSES: 1000
REGNET:
DEPTH: 25
W0: 88
WA: 26.31
WM: 2.25
GROUP_W: 48
OPTIM:
LR_POLICY: cos
BASE_LR: 0.4
MAX_EPOCH: 100
MOMENTUM: 0.9
WEIGHT_DECAY: 5e-5
WARMUP_ITERS: 5
TRAIN:
DATASET: imagenet
IM_SIZE: 224
BATCH_SIZE: 512
TEST:
DATASET: imagenet
IM_SIZE: 256
BATCH_SIZE: 400
NUM_GPUS: 8
OUT_DIR: .
MODEL:
TYPE: regnet
NUM_CLASSES: 1000
REGNET:
DEPTH: 23
W0: 320
WA: 69.86
WM: 2.0
GROUP_W: 168
OPTIM:
LR_POLICY: cos
BASE_LR: 0.2
MAX_EPOCH: 100
MOMENTUM: 0.9
WEIGHT_DECAY: 5e-5
WARMUP_ITERS: 5
TRAIN:
DATASET: imagenet
IM_SIZE: 224
BATCH_SIZE: 256
TEST:
DATASET: imagenet
IM_SIZE: 256
BATCH_SIZE: 200
NUM_GPUS: 8
OUT_DIR: .
MODEL:
TYPE: regnet
NUM_CLASSES: 1000
REGNET:
DEPTH: 23
W0: 96
WA: 38.65
WM: 2.43
GROUP_W: 40
OPTIM:
LR_POLICY: cos
BASE_LR: 0.4
MAX_EPOCH: 100
MOMENTUM: 0.9
WEIGHT_DECAY: 5e-5
WARMUP_ITERS: 5
TRAIN:
DATASET: imagenet
IM_SIZE: 224
BATCH_SIZE: 512
TEST:
DATASET: imagenet
IM_SIZE: 256
BATCH_SIZE: 400
NUM_GPUS: 8
OUT_DIR: .
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment