update

b6c19984 · dengjb · b6c19984 · b6c19984 · b6c19984 · b6c19984
Commit b6c19984 authored Nov 18, 2025 by dengjb
20 changed files
--- a/fastreid/modeling/backbones/mobilenet.py
+++ b/fastreid/modeling/backbones/mobilenet.py
+"""
+Creates a MobileNetV2 Model as defined in:
+Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen. (2018).
+MobileNetV2: Inverted Residuals and Linear Bottlenecks
+arXiv preprint arXiv:1801.04381.
+import from https://github.com/tonylins/pytorch-mobilenet-v2
+"""
+import logging
+import math
+
+import torch
+import torch.nn as nn
+
+from fastreid.layers import get_norm
+from fastreid.utils.checkpoint import get_missing_parameters_message, get_unexpected_parameters_message
+from .build import BACKBONE_REGISTRY
+
+logger = logging.getLogger(__name__)
+
+
+def _make_divisible(v, divisor, min_value=None):
+    """
+    This function is taken from the original tf repo.
+    It ensures that all layers have a channel number that is divisible by 8
+    It can be seen here:
+    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+    :param v:
+    :param divisor:
+    :param min_value:
+    :return:
+    """
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+def conv_3x3_bn(inp, oup, stride, bn_norm):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+        get_norm(bn_norm, oup),
+        nn.ReLU6(inplace=True)
+    )
+
+
+def conv_1x1_bn(inp, oup, bn_norm):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+        get_norm(bn_norm, oup),
+        nn.ReLU6(inplace=True)
+    )
+
+
+class InvertedResidual(nn.Module):
+    def __init__(self, inp, oup, bn_norm, stride, expand_ratio):
+        super(InvertedResidual, self).__init__()
+        assert stride in [1, 2]
+
+        hidden_dim = round(inp * expand_ratio)
+        self.identity = stride == 1 and inp == oup
+
+        if expand_ratio == 1:
+            self.conv = nn.Sequential(
+                # dw
+                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
+                get_norm(bn_norm, hidden_dim),
+                nn.ReLU6(inplace=True),
+                # pw-linear
+                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+                get_norm(bn_norm, oup),
+            )
+        else:
+            self.conv = nn.Sequential(
+                # pw
+                nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
+                get_norm(bn_norm, hidden_dim),
+                nn.ReLU6(inplace=True),
+                # dw
+                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
+                get_norm(bn_norm, hidden_dim),
+                nn.ReLU6(inplace=True),
+                # pw-linear
+                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup),
+            )
+
+    def forward(self, x):
+        if self.identity:
+            return x + self.conv(x)
+        else:
+            return self.conv(x)
+
+
+class MobileNetV2(nn.Module):
+    def __init__(self, bn_norm, width_mult=1.):
+        super(MobileNetV2, self).__init__()
+        # setting of inverted residual blocks
+        self.cfgs = [
+            # t, c, n, s
+            [1, 16, 1, 1],
+            [6, 24, 2, 2],
+            [6, 32, 3, 2],
+            [6, 64, 4, 2],
+            [6, 96, 3, 1],
+            [6, 160, 3, 2],
+            [6, 320, 1, 1],
+        ]
+
+        # building first layer
+        input_channel = _make_divisible(32 * width_mult, 4 if width_mult == 0.1 else 8)
+        layers = [conv_3x3_bn(3, input_channel, 2, bn_norm)]
+        # building inverted residual blocks
+        block = InvertedResidual
+        for t, c, n, s in self.cfgs:
+            output_channel = _make_divisible(c * width_mult, 4 if width_mult == 0.1 else 8)
+            for i in range(n):
+                layers.append(block(input_channel, output_channel, bn_norm, s if i == 0 else 1, t))
+                input_channel = output_channel
+        self.features = nn.Sequential(*layers)
+        # building last several layers
+        output_channel = _make_divisible(1280 * width_mult, 4 if width_mult == 0.1 else 8) if width_mult > 1.0 else 1280
+        self.conv = conv_1x1_bn(input_channel, output_channel, bn_norm)
+
+        self._initialize_weights()
+
+    def forward(self, x):
+        x = self.features(x)
+        x = self.conv(x)
+        return x
+
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.Linear):
+                m.weight.data.normal_(0, 0.01)
+                m.bias.data.zero_()
+
+
+@BACKBONE_REGISTRY.register()
+def build_mobilenetv2_backbone(cfg):
+    """
+    Create a MobileNetV2 instance from config.
+    Returns:
+        MobileNetV2: a :class: `MobileNetV2` instance.
+    """
+    # fmt: off
+    pretrain      = cfg.MODEL.BACKBONE.PRETRAIN
+    pretrain_path = cfg.MODEL.BACKBONE.PRETRAIN_PATH
+    bn_norm       = cfg.MODEL.BACKBONE.NORM
+    depth         = cfg.MODEL.BACKBONE.DEPTH
+    # fmt: on
+
+    width_mult = {
+        "1.0x": 1.0,
+        "0.75x": 0.75,
+        "0.5x": 0.5,
+        "0.35x": 0.35,
+        '0.25x': 0.25,
+        '0.1x': 0.1,
+    }[depth]
+
+    model = MobileNetV2(bn_norm, width_mult)
+
+    if pretrain:
+        try:
+            state_dict = torch.load(pretrain_path, map_location=torch.device('cpu'))
+            logger.info(f"Loading pretrained model from {pretrain_path}")
+        except FileNotFoundError as e:
+            logger.info(f'{pretrain_path} is not found! Please check this path.')
+            raise e
+        except KeyError as e:
+            logger.info("State dict keys error! Please check the state dict.")
+            raise e
+
+        incompatible = model.load_state_dict(state_dict, strict=False)
+        if incompatible.missing_keys:
+            logger.info(
+                get_missing_parameters_message(incompatible.missing_keys)
+            )
+        if incompatible.unexpected_keys:
+            logger.info(
+                get_unexpected_parameters_message(incompatible.unexpected_keys)
+            )
+
+    return model
--- a/fastreid/modeling/backbones/mobilenetv3.py
+++ b/fastreid/modeling/backbones/mobilenetv3.py
+from functools import partial
+from typing import Any, Callable, Dict, List, Optional, Sequence
+
+import torch
+from torch import nn, Tensor
+from torch.nn import functional as F
+
+#The style of importing Considers compatibility for the diversity of torchvision versions
+try:
+    from torchvision.models.utils import load_state_dict_from_url
+except ImportError:
+    try:
+        from torch.hub import load_state_dict_from_url
+    except ImportError:
+        from torch.utils.model_zoo import load_url as load_state_dict_from_url
+
+from fastreid.layers import get_norm
+from .build import BACKBONE_REGISTRY
+from .mobilenet import _make_divisible
+
+# https://github.com/pytorch/vision/blob/master/torchvision/models/mobilenetv3.py
+
+model_urls = {
+    "Large": "https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth",
+    "Small": "https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth",
+}
+
+
+def conv_1x1_bn(inp, oup, bn_norm):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+        get_norm(bn_norm, oup),
+        nn.ReLU6(inplace=True)
+    )
+
+
+class ConvBNActivation(nn.Sequential):
+    def __init__(
+            self,
+            in_planes: int,
+            out_planes: int,
+            kernel_size: int = 3,
+            stride: int = 1,
+            groups: int = 1,
+            bn_norm=None,
+            activation_layer: Optional[Callable[..., nn.Module]] = None,
+            dilation: int = 1,
+    ) -> None:
+        padding = (kernel_size - 1) // 2 * dilation
+        if activation_layer is None:
+            activation_layer = nn.ReLU6
+        super(ConvBNActivation, self).__init__(
+            nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, dilation=dilation, groups=groups,
+                      bias=False),
+            get_norm(bn_norm, out_planes),
+            activation_layer(inplace=True)
+        )
+        self.out_channels = out_planes
+
+
+class SqueezeExcitation(nn.Module):
+    def __init__(self, input_channels: int, squeeze_factor: int = 4):
+        super().__init__()
+        squeeze_channels = _make_divisible(input_channels // squeeze_factor, 8)
+        self.fc1 = nn.Conv2d(input_channels, squeeze_channels, 1)
+        self.relu = nn.ReLU(inplace=True)
+        self.fc2 = nn.Conv2d(squeeze_channels, input_channels, 1)
+
+    def _scale(self, input: Tensor, inplace: bool) -> Tensor:
+        scale = F.adaptive_avg_pool2d(input, 1)
+        scale = self.fc1(scale)
+        scale = self.relu(scale)
+        scale = self.fc2(scale)
+        return F.hardsigmoid(scale, inplace=inplace)
+
+    def forward(self, input: Tensor) -> Tensor:
+        scale = self._scale(input, True)
+        return scale * input
+
+
+class InvertedResidualConfig:
+    def __init__(self, input_channels: int, kernel: int, expanded_channels: int, out_channels: int, use_se: bool,
+                 activation: str, stride: int, dilation: int, width_mult: float):
+        self.input_channels = self.adjust_channels(input_channels, width_mult)
+        self.kernel = kernel
+        self.expanded_channels = self.adjust_channels(expanded_channels, width_mult)
+        self.out_channels = self.adjust_channels(out_channels, width_mult)
+        self.use_se = use_se
+        self.use_hs = activation == "HS"
+        self.stride = stride
+        self.dilation = dilation
+
+    @staticmethod
+    def adjust_channels(channels: int, width_mult: float):
+        return _make_divisible(channels * width_mult, 8)
+
+
+class InvertedResidual(nn.Module):
+    def __init__(self, cnf: InvertedResidualConfig, bn_norm,
+                 se_layer: Callable[..., nn.Module] = SqueezeExcitation):
+        super().__init__()
+        if not (1 <= cnf.stride <= 2):
+            raise ValueError('illegal stride value')
+
+        self.use_res_connect = cnf.stride == 1 and cnf.input_channels == cnf.out_channels
+
+        layers: List[nn.Module] = []
+        activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU
+
+        # expand
+        if cnf.expanded_channels != cnf.input_channels:
+            layers.append(ConvBNActivation(cnf.input_channels, cnf.expanded_channels, kernel_size=1,
+                                           bn_norm=bn_norm, activation_layer=activation_layer))
+
+        # depthwise
+        stride = 1 if cnf.dilation > 1 else cnf.stride
+        layers.append(ConvBNActivation(cnf.expanded_channels, cnf.expanded_channels, kernel_size=cnf.kernel,
+                                       stride=stride, dilation=cnf.dilation, groups=cnf.expanded_channels,
+                                       bn_norm=bn_norm, activation_layer=activation_layer))
+        if cnf.use_se:
+            layers.append(se_layer(cnf.expanded_channels))
+
+        # project
+        layers.append(ConvBNActivation(cnf.expanded_channels, cnf.out_channels, kernel_size=1, bn_norm=bn_norm,
+                                       activation_layer=nn.Identity))
+
+        self.block = nn.Sequential(*layers)
+        self.out_channels = cnf.out_channels
+        self._is_cn = cnf.stride > 1
+
+    def forward(self, input: Tensor) -> Tensor:
+        result = self.block(input)
+        if self.use_res_connect:
+            result += input
+        return result
+
+
+class MobileNetV3(nn.Module):
+    def __init__(
+            self,
+            bn_norm,
+            inverted_residual_setting: List[InvertedResidualConfig],
+            last_channel: int,
+            block: Optional[Callable[..., nn.Module]] = None,
+    ) -> None:
+        """
+        MobileNet V3 main class
+        Args:
+            inverted_residual_setting (List[InvertedResidualConfig]): Network structure
+            last_channel (int): The number of channels on the penultimate layer
+            block (Optional[Callable[..., nn.Module]]): Module specifying inverted residual building block for mobilenet
+        """
+        super().__init__()
+
+        if not inverted_residual_setting:
+            raise ValueError("The inverted_residual_setting should not be empty")
+        elif not (isinstance(inverted_residual_setting, Sequence) and
+                  all([isinstance(s, InvertedResidualConfig) for s in inverted_residual_setting])):
+            raise TypeError("The inverted_residual_setting should be List[InvertedResidualConfig]")
+
+        if block is None:
+            block = InvertedResidual
+
+        layers: List[nn.Module] = []
+
+        # building first layer
+        firstconv_output_channels = inverted_residual_setting[0].input_channels
+        layers.append(ConvBNActivation(3, firstconv_output_channels, kernel_size=3, stride=2, bn_norm=bn_norm,
+                                       activation_layer=nn.Hardswish))
+
+        # building inverted residual blocks
+        for cnf in inverted_residual_setting:
+            layers.append(block(cnf, bn_norm))
+
+        # building last several layers
+        lastconv_input_channels = inverted_residual_setting[-1].out_channels
+        lastconv_output_channels = 6 * lastconv_input_channels
+        layers.append(ConvBNActivation(lastconv_input_channels, lastconv_output_channels, kernel_size=1,
+                                       bn_norm=bn_norm, activation_layer=nn.Hardswish))
+
+        self.features = nn.Sequential(*layers)
+        self.conv = conv_1x1_bn(lastconv_output_channels, last_channel, bn_norm)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out')
+                if m.bias is not None:
+                    nn.init.zeros_(m.bias)
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.ones_(m.weight)
+                nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                nn.init.zeros_(m.bias)
+
+    def _forward_impl(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        x = self.conv(x)
+        return x
+
+    def forward(self, x: Tensor) -> Tensor:
+        return self._forward_impl(x)
+
+
+def _mobilenet_v3_conf(arch: str, params: Dict[str, Any]):
+    # non-public config parameters
+    reduce_divider = 2 if params.pop('_reduced_tail', False) else 1
+    dilation = 2 if params.pop('_dilated', False) else 1
+    width_mult = params.pop('_width_mult', 1.0)
+
+    bneck_conf = partial(InvertedResidualConfig, width_mult=width_mult)
+    adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_mult=width_mult)
+
+    if arch == "Large":
+        inverted_residual_setting = [
+            bneck_conf(16, 3, 16, 16, False, "RE", 1, 1),
+            bneck_conf(16, 3, 64, 24, False, "RE", 2, 1),  # C1
+            bneck_conf(24, 3, 72, 24, False, "RE", 1, 1),
+            bneck_conf(24, 5, 72, 40, True, "RE", 2, 1),  # C2
+            bneck_conf(40, 5, 120, 40, True, "RE", 1, 1),
+            bneck_conf(40, 5, 120, 40, True, "RE", 1, 1),
+            bneck_conf(40, 3, 240, 80, False, "HS", 2, 1),  # C3
+            bneck_conf(80, 3, 200, 80, False, "HS", 1, 1),
+            bneck_conf(80, 3, 184, 80, False, "HS", 1, 1),
+            bneck_conf(80, 3, 184, 80, False, "HS", 1, 1),
+            bneck_conf(80, 3, 480, 112, True, "HS", 1, 1),
+            bneck_conf(112, 3, 672, 112, True, "HS", 1, 1),
+            bneck_conf(112, 5, 672, 160 // reduce_divider, True, "HS", 2, dilation),  # C4
+            bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation),
+            bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation),
+        ]
+        last_channel = adjust_channels(1280 // reduce_divider)  # C5
+    elif arch == "Small":
+        inverted_residual_setting = [
+            bneck_conf(16, 3, 16, 16, True, "RE", 2, 1),  # C1
+            bneck_conf(16, 3, 72, 24, False, "RE", 2, 1),  # C2
+            bneck_conf(24, 3, 88, 24, False, "RE", 1, 1),
+            bneck_conf(24, 5, 96, 40, True, "HS", 2, 1),  # C3
+            bneck_conf(40, 5, 240, 40, True, "HS", 1, 1),
+            bneck_conf(40, 5, 240, 40, True, "HS", 1, 1),
+            bneck_conf(40, 5, 120, 48, True, "HS", 1, 1),
+            bneck_conf(48, 5, 144, 48, True, "HS", 1, 1),
+            bneck_conf(48, 5, 288, 96 // reduce_divider, True, "HS", 2, dilation),  # C4
+            bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation),
+            bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation),
+        ]
+        last_channel = adjust_channels(1024 // reduce_divider)  # C5
+    else:
+        raise ValueError("Unsupported model type {}".format(arch))
+
+    return inverted_residual_setting, last_channel
+
+
+def _mobilenet_v3_model(
+        bn_norm,
+        depth: str,
+        pretrained: bool,
+        pretrain_path: str,
+        **kwargs: Any
+):
+    inverted_residual_setting, last_channel = _mobilenet_v3_conf(depth, kwargs)
+    model = MobileNetV3(bn_norm, inverted_residual_setting, last_channel, **kwargs)
+    if pretrained:
+        if pretrain_path:
+            state_dict = torch.load(pretrain_path)
+        else:
+            if model_urls.get(depth, None) is None:
+                raise ValueError("No checkpoint is available for model type {}".format(depth))
+            state_dict = load_state_dict_from_url(model_urls[depth], progress=True)
+        model.load_state_dict(state_dict, strict=False)
+    return model
+
+
+@BACKBONE_REGISTRY.register()
+def build_mobilenetv3_backbone(cfg):
+    pretrain = cfg.MODEL.BACKBONE.PRETRAIN
+    pretrain_path = cfg.MODEL.BACKBONE.PRETRAIN_PATH
+    bn_norm = cfg.MODEL.BACKBONE.NORM
+    depth = cfg.MODEL.BACKBONE.DEPTH
+
+    model = _mobilenet_v3_model(bn_norm, depth, pretrain, pretrain_path)
+
+    return model
--- a/fastreid/modeling/backbones/osnet.py
+++ b/fastreid/modeling/backbones/osnet.py
+# encoding: utf-8
+"""
+@author:  xingyu liao
+@contact: sherlockliao01@gmail.com
+"""
+
+# based on:
+# https://github.com/KaiyangZhou/deep-person-reid/blob/master/torchreid/models/osnet.py
+
+import logging
+
+import torch
+from torch import nn
+
+from fastreid.layers import get_norm
+from fastreid.utils import comm
+from fastreid.utils.checkpoint import get_missing_parameters_message, get_unexpected_parameters_message
+from .build import BACKBONE_REGISTRY
+
+logger = logging.getLogger(__name__)
+model_urls = {
+    'osnet_x1_0':
+        'https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY',
+    'osnet_x0_75':
+        'https://drive.google.com/uc?id=1uwA9fElHOk3ZogwbeY5GkLI6QPTX70Hq',
+    'osnet_x0_5':
+        'https://drive.google.com/uc?id=16DGLbZukvVYgINws8u8deSaOqjybZ83i',
+    'osnet_x0_25':
+        'https://drive.google.com/uc?id=1rb8UN5ZzPKRc_xvtHlyDh-cSz88YX9hs',
+    'osnet_ibn_x1_0':
+        'https://drive.google.com/uc?id=1sr90V6irlYYDd4_4ISU2iruoRG8J__6l'
+}
+
+
+##########
+# Basic layers
+##########
+class ConvLayer(nn.Module):
+    """Convolution layer (conv + bn + relu)."""
+
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            bn_norm,
+            stride=1,
+            padding=0,
+            groups=1,
+            IN=False
+    ):
+        super(ConvLayer, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            bias=False,
+            groups=groups
+        )
+        if IN:
+            self.bn = nn.InstanceNorm2d(out_channels, affine=True)
+        else:
+            self.bn = get_norm(bn_norm, out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+
+class Conv1x1(nn.Module):
+    """1x1 convolution + bn + relu."""
+
+    def __init__(self, in_channels, out_channels, bn_norm, stride=1, groups=1):
+        super(Conv1x1, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            1,
+            stride=stride,
+            padding=0,
+            bias=False,
+            groups=groups
+        )
+        self.bn = get_norm(bn_norm, out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+
+class Conv1x1Linear(nn.Module):
+    """1x1 convolution + bn (w/o non-linearity)."""
+
+    def __init__(self, in_channels, out_channels, bn_norm, stride=1):
+        super(Conv1x1Linear, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels, out_channels, 1, stride=stride, padding=0, bias=False
+        )
+        self.bn = get_norm(bn_norm, out_channels)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
+
+class Conv3x3(nn.Module):
+    """3x3 convolution + bn + relu."""
+
+    def __init__(self, in_channels, out_channels, bn_norm, stride=1, groups=1):
+        super(Conv3x3, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            3,
+            stride=stride,
+            padding=1,
+            bias=False,
+            groups=groups
+        )
+        self.bn = get_norm(bn_norm, out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+
+class LightConv3x3(nn.Module):
+    """Lightweight 3x3 convolution.
+    1x1 (linear) + dw 3x3 (nonlinear).
+    """
+
+    def __init__(self, in_channels, out_channels, bn_norm):
+        super(LightConv3x3, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_channels, out_channels, 1, stride=1, padding=0, bias=False
+        )
+        self.conv2 = nn.Conv2d(
+            out_channels,
+            out_channels,
+            3,
+            stride=1,
+            padding=1,
+            bias=False,
+            groups=out_channels
+        )
+        self.bn = get_norm(bn_norm, out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+
+##########
+# Building blocks for omni-scale feature learning
+##########
+class ChannelGate(nn.Module):
+    """A mini-network that generates channel-wise gates conditioned on input tensor."""
+
+    def __init__(
+            self,
+            in_channels,
+            num_gates=None,
+            return_gates=False,
+            gate_activation='sigmoid',
+            reduction=16,
+            layer_norm=False
+    ):
+        super(ChannelGate, self).__init__()
+        if num_gates is None: num_gates = in_channels
+        self.return_gates = return_gates
+
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+
+        self.fc1 = nn.Conv2d(
+            in_channels,
+            in_channels // reduction,
+            kernel_size=1,
+            bias=True,
+            padding=0
+        )
+        self.norm1 = None
+        if layer_norm: self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
+        self.relu = nn.ReLU(inplace=True)
+        self.fc2 = nn.Conv2d(
+            in_channels // reduction,
+            num_gates,
+            kernel_size=1,
+            bias=True,
+            padding=0
+        )
+        if gate_activation == 'sigmoid':
+            self.gate_activation = nn.Sigmoid()
+        elif gate_activation == 'relu':
+            self.gate_activation = nn.ReLU(inplace=True)
+        elif gate_activation == 'linear':
+            self.gate_activation = nn.Identity()
+        else:
+            raise RuntimeError(
+                "Unknown gate activation: {}".format(gate_activation)
+            )
+
+    def forward(self, x):
+        input = x
+        x = self.global_avgpool(x)
+        x = self.fc1(x)
+        if self.norm1 is not None: x = self.norm1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        x = self.gate_activation(x)
+        if self.return_gates: return x
+        return input * x
+
+
+class OSBlock(nn.Module):
+    """Omni-scale feature learning block."""
+
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            bn_norm,
+            IN=False,
+            bottleneck_reduction=4,
+            **kwargs
+    ):
+        super(OSBlock, self).__init__()
+        mid_channels = out_channels // bottleneck_reduction
+        self.conv1 = Conv1x1(in_channels, mid_channels, bn_norm)
+        self.conv2a = LightConv3x3(mid_channels, mid_channels, bn_norm)
+        self.conv2b = nn.Sequential(
+            LightConv3x3(mid_channels, mid_channels, bn_norm),
+            LightConv3x3(mid_channels, mid_channels, bn_norm),
+        )
+        self.conv2c = nn.Sequential(
+            LightConv3x3(mid_channels, mid_channels, bn_norm),
+            LightConv3x3(mid_channels, mid_channels, bn_norm),
+            LightConv3x3(mid_channels, mid_channels, bn_norm),
+        )
+        self.conv2d = nn.Sequential(
+            LightConv3x3(mid_channels, mid_channels, bn_norm),
+            LightConv3x3(mid_channels, mid_channels, bn_norm),
+            LightConv3x3(mid_channels, mid_channels, bn_norm),
+            LightConv3x3(mid_channels, mid_channels, bn_norm),
+        )
+        self.gate = ChannelGate(mid_channels)
+        self.conv3 = Conv1x1Linear(mid_channels, out_channels, bn_norm)
+        self.downsample = None
+        if in_channels != out_channels:
+            self.downsample = Conv1x1Linear(in_channels, out_channels, bn_norm)
+        self.IN = None
+        if IN: self.IN = nn.InstanceNorm2d(out_channels, affine=True)
+        self.relu = nn.ReLU(True)
+
+    def forward(self, x):
+        identity = x
+        x1 = self.conv1(x)
+        x2a = self.conv2a(x1)
+        x2b = self.conv2b(x1)
+        x2c = self.conv2c(x1)
+        x2d = self.conv2d(x1)
+        x2 = self.gate(x2a) + self.gate(x2b) + self.gate(x2c) + self.gate(x2d)
+        x3 = self.conv3(x2)
+        if self.downsample is not None:
+            identity = self.downsample(identity)
+        out = x3 + identity
+        if self.IN is not None:
+            out = self.IN(out)
+        return self.relu(out)
+
+
+##########
+# Network architecture
+##########
+class OSNet(nn.Module):
+    """Omni-Scale Network.
+
+    Reference:
+        - Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019.
+        - Zhou et al. Learning Generalisable Omni-Scale Representations
+          for Person Re-Identification. arXiv preprint, 2019.
+    """
+
+    def __init__(
+            self,
+            blocks,
+            layers,
+            channels,
+            bn_norm,
+            IN=False,
+            **kwargs
+    ):
+        super(OSNet, self).__init__()
+        num_blocks = len(blocks)
+        assert num_blocks == len(layers)
+        assert num_blocks == len(channels) - 1
+
+        # convolutional backbone
+        self.conv1 = ConvLayer(3, channels[0], 7, bn_norm, stride=2, padding=3, IN=IN)
+        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
+        self.conv2 = self._make_layer(
+            blocks[0],
+            layers[0],
+            channels[0],
+            channels[1],
+            bn_norm,
+            reduce_spatial_size=True,
+            IN=IN
+        )
+        self.conv3 = self._make_layer(
+            blocks[1],
+            layers[1],
+            channels[1],
+            channels[2],
+            bn_norm,
+            reduce_spatial_size=True
+        )
+        self.conv4 = self._make_layer(
+            blocks[2],
+            layers[2],
+            channels[2],
+            channels[3],
+            bn_norm,
+            reduce_spatial_size=False
+        )
+        self.conv5 = Conv1x1(channels[3], channels[3], bn_norm)
+
+        self._init_params()
+
+    def _make_layer(
+            self,
+            block,
+            layer,
+            in_channels,
+            out_channels,
+            bn_norm,
+            reduce_spatial_size,
+            IN=False
+    ):
+        layers = []
+
+        layers.append(block(in_channels, out_channels, bn_norm, IN=IN))
+        for i in range(1, layer):
+            layers.append(block(out_channels, out_channels, bn_norm, IN=IN))
+
+        if reduce_spatial_size:
+            layers.append(
+                nn.Sequential(
+                    Conv1x1(out_channels, out_channels, bn_norm),
+                    nn.AvgPool2d(2, stride=2),
+                )
+            )
+
+        return nn.Sequential(*layers)
+
+    def _init_params(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(
+                    m.weight, mode='fan_out', nonlinearity='relu'
+                )
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+            elif isinstance(m, nn.BatchNorm1d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.maxpool(x)
+        x = self.conv2(x)
+        x = self.conv3(x)
+        x = self.conv4(x)
+        x = self.conv5(x)
+        return x
+
+
+def init_pretrained_weights(model, key=''):
+    """Initializes model with pretrained weights.
+
+    Layers that don't match with pretrained layers in name or size are kept unchanged.
+    """
+    import os
+    import errno
+    import gdown
+    from collections import OrderedDict
+    import warnings
+    import logging
+
+    logger = logging.getLogger(__name__)
+
+    def _get_torch_home():
+        ENV_TORCH_HOME = 'TORCH_HOME'
+        ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
+        DEFAULT_CACHE_DIR = '~/.cache'
+        torch_home = os.path.expanduser(
+            os.getenv(
+                ENV_TORCH_HOME,
+                os.path.join(
+                    os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch'
+                )
+            )
+        )
+        return torch_home
+
+    torch_home = _get_torch_home()
+    model_dir = os.path.join(torch_home, 'checkpoints')
+    try:
+        os.makedirs(model_dir)
+    except OSError as e:
+        if e.errno == errno.EEXIST:
+            # Directory already exists, ignore.
+            pass
+        else:
+            # Unexpected OSError, re-raise.
+            raise
+    filename = key + '_imagenet.pth'
+    cached_file = os.path.join(model_dir, filename)
+
+    if not os.path.exists(cached_file):
+        logger.info(f"Pretrain model don't exist, downloading from {model_urls[key]}")
+        if comm.is_main_process():
+            gdown.download(model_urls[key], cached_file, quiet=False)
+
+    comm.synchronize()
+
+    state_dict = torch.load(cached_file, map_location=torch.device('cpu'))
+    model_dict = model.state_dict()
+    new_state_dict = OrderedDict()
+    matched_layers, discarded_layers = [], []
+
+    for k, v in state_dict.items():
+        if k.startswith('module.'):
+            k = k[7:]  # discard module.
+
+        if k in model_dict and model_dict[k].size() == v.size():
+            new_state_dict[k] = v
+            matched_layers.append(k)
+        else:
+            discarded_layers.append(k)
+
+    model_dict.update(new_state_dict)
+    return model_dict
+
+
+@BACKBONE_REGISTRY.register()
+def build_osnet_backbone(cfg):
+    """
+    Create a OSNet instance from config.
+    Returns:
+        OSNet: a :class:`OSNet` instance
+    """
+
+    # fmt: off
+    pretrain      = cfg.MODEL.BACKBONE.PRETRAIN
+    pretrain_path = cfg.MODEL.BACKBONE.PRETRAIN_PATH
+    with_ibn      = cfg.MODEL.BACKBONE.WITH_IBN
+    bn_norm       = cfg.MODEL.BACKBONE.NORM
+    depth         = cfg.MODEL.BACKBONE.DEPTH
+    # fmt: on
+
+    num_blocks_per_stage = [2, 2, 2]
+    num_channels_per_stage = {
+        "x1_0": [64, 256, 384, 512],
+        "x0_75": [48, 192, 288, 384],
+        "x0_5": [32, 128, 192, 256],
+        "x0_25": [16, 64, 96, 128]}[depth]
+    model = OSNet([OSBlock, OSBlock, OSBlock], num_blocks_per_stage, num_channels_per_stage,
+                  bn_norm, IN=with_ibn)
+
+    if pretrain:
+        # Load pretrain path if specifically
+        if pretrain_path:
+            try:
+                state_dict = torch.load(pretrain_path, map_location=torch.device('cpu'))
+                logger.info(f"Loading pretrained model from {pretrain_path}")
+            except FileNotFoundError as e:
+                logger.info(f'{pretrain_path} is not found! Please check this path.')
+                raise e
+            except KeyError as e:
+                logger.info("State dict keys error! Please check the state dict.")
+                raise e
+        else:
+            if with_ibn:
+                pretrain_key = "osnet_ibn_" + depth
+            else:
+                pretrain_key = "osnet_" + depth
+
+            state_dict = init_pretrained_weights(model, pretrain_key)
+
+        incompatible = model.load_state_dict(state_dict, strict=False)
+        if incompatible.missing_keys:
+            logger.info(
+                get_missing_parameters_message(incompatible.missing_keys)
+            )
+        if incompatible.unexpected_keys:
+            logger.info(
+                get_unexpected_parameters_message(incompatible.unexpected_keys)
+            )
+    return model
--- a/fastreid/modeling/backbones/regnet/__init__.py
+++ b/fastreid/modeling/backbones/regnet/__init__.py
+
+
+from .regnet import build_regnet_backbone
+from .effnet import build_effnet_backbone
--- a/fastreid/modeling/backbones/regnet/config.py
+++ b/fastreid/modeling/backbones/regnet/config.py
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Configuration file (powered by YACS)."""
+
+import argparse
+import os
+import sys
+
+from yacs.config import CfgNode as CfgNode
+
+# Global config object
+_C = CfgNode()
+
+# Example usage:
+#   from core.config import cfg
+cfg = _C
+
+# ------------------------------------------------------------------------------------ #
+# Model options
+# ------------------------------------------------------------------------------------ #
+_C.MODEL = CfgNode()
+
+# Model type
+_C.MODEL.TYPE = ""
+
+# Number of weight layers
+_C.MODEL.DEPTH = 0
+
+# Number of classes
+_C.MODEL.NUM_CLASSES = 10
+
+# Loss function (see pycls/models/loss.py for options)
+_C.MODEL.LOSS_FUN = "cross_entropy"
+
+# ------------------------------------------------------------------------------------ #
+# ResNet options
+# ------------------------------------------------------------------------------------ #
+_C.RESNET = CfgNode()
+
+# Transformation function (see pycls/models/resnet.py for options)
+_C.RESNET.TRANS_FUN = "basic_transform"
+
+# Number of groups to use (1 -> ResNet; > 1 -> ResNeXt)
+_C.RESNET.NUM_GROUPS = 1
+
+# Width of each group (64 -> ResNet; 4 -> ResNeXt)
+_C.RESNET.WIDTH_PER_GROUP = 64
+
+# Apply stride to 1x1 conv (True -> MSRA; False -> fb.torch)
+_C.RESNET.STRIDE_1X1 = True
+
+# ------------------------------------------------------------------------------------ #
+# AnyNet options
+# ------------------------------------------------------------------------------------ #
+_C.ANYNET = CfgNode()
+
+# Stem type
+_C.ANYNET.STEM_TYPE = "simple_stem_in"
+
+# Stem width
+_C.ANYNET.STEM_W = 32
+
+# Block type
+_C.ANYNET.BLOCK_TYPE = "res_bottleneck_block"
+
+# Depth for each stage (number of blocks in the stage)
+_C.ANYNET.DEPTHS = []
+
+# Width for each stage (width of each block in the stage)
+_C.ANYNET.WIDTHS = []
+
+# Strides for each stage (applies to the first block of each stage)
+_C.ANYNET.STRIDES = []
+
+# Bottleneck multipliers for each stage (applies to bottleneck block)
+_C.ANYNET.BOT_MULS = []
+
+# Group widths for each stage (applies to bottleneck block)
+_C.ANYNET.GROUP_WS = []
+
+# Whether SE is enabled for res_bottleneck_block
+_C.ANYNET.SE_ON = False
+
+# SE ratio
+_C.ANYNET.SE_R = 0.25
+
+# ------------------------------------------------------------------------------------ #
+# RegNet options
+# ------------------------------------------------------------------------------------ #
+_C.REGNET = CfgNode()
+
+# Stem type
+_C.REGNET.STEM_TYPE = "simple_stem_in"
+
+# Stem width
+_C.REGNET.STEM_W = 32
+
+# Block type
+_C.REGNET.BLOCK_TYPE = "res_bottleneck_block"
+
+# Stride of each stage
+_C.REGNET.STRIDE = 2
+
+# Squeeze-and-Excitation (RegNetY)
+_C.REGNET.SE_ON = False
+_C.REGNET.SE_R = 0.25
+
+# Depth
+_C.REGNET.DEPTH = 10
+
+# Initial width
+_C.REGNET.W0 = 32
+
+# Slope
+_C.REGNET.WA = 5.0
+
+# Quantization
+_C.REGNET.WM = 2.5
+
+# Group width
+_C.REGNET.GROUP_W = 16
+
+# Bottleneck multiplier (bm = 1 / b from the paper)
+_C.REGNET.BOT_MUL = 1.0
+
+# ------------------------------------------------------------------------------------ #
+# EfficientNet options
+# ------------------------------------------------------------------------------------ #
+_C.EN = CfgNode()
+
+# Stem width
+_C.EN.STEM_W = 32
+
+# Depth for each stage (number of blocks in the stage)
+_C.EN.DEPTHS = []
+
+# Width for each stage (width of each block in the stage)
+_C.EN.WIDTHS = []
+
+# Expansion ratios for MBConv blocks in each stage
+_C.EN.EXP_RATIOS = []
+
+# Squeeze-and-Excitation (SE) ratio
+_C.EN.SE_R = 0.25
+
+# Strides for each stage (applies to the first block of each stage)
+_C.EN.STRIDES = []
+
+# Kernel sizes for each stage
+_C.EN.KERNELS = []
+
+# Head width
+_C.EN.HEAD_W = 1280
+
+# Drop connect ratio
+_C.EN.DC_RATIO = 0.0
+
+# Dropout ratio
+_C.EN.DROPOUT_RATIO = 0.0
+
+# ------------------------------------------------------------------------------------ #
+# Batch norm options
+# ------------------------------------------------------------------------------------ #
+_C.BN = CfgNode()
+
+# BN epsilon
+_C.BN.EPS = 1e-5
+
+# BN momentum (BN momentum in PyTorch = 1 - BN momentum in Caffe2)
+_C.BN.MOM = 0.1
+
+# Precise BN stats
+_C.BN.USE_PRECISE_STATS = True
+_C.BN.NUM_SAMPLES_PRECISE = 8192
+
+# Initialize the gamma of the final BN of each block to zero
+_C.BN.ZERO_INIT_FINAL_GAMMA = False
+
+# Use a different weight decay for BN layers
+_C.BN.USE_CUSTOM_WEIGHT_DECAY = False
+_C.BN.CUSTOM_WEIGHT_DECAY = 0.0
+
+# ------------------------------------------------------------------------------------ #
+# Optimizer options
+# ------------------------------------------------------------------------------------ #
+_C.OPTIM = CfgNode()
+
+# Base learning rate
+_C.OPTIM.BASE_LR = 0.1
+
+# Learning rate policy select from {'cos', 'exp', 'steps'}
+_C.OPTIM.LR_POLICY = "cos"
+
+# Exponential decay factor
+_C.OPTIM.GAMMA = 0.1
+
+# Steps for 'steps' policy (in epochs)
+_C.OPTIM.STEPS = []
+
+# Learning rate multiplier for 'steps' policy
+_C.OPTIM.LR_MULT = 0.1
+
+# Maximal number of epochs
+_C.OPTIM.MAX_EPOCH = 200
+
+# Momentum
+_C.OPTIM.MOMENTUM = 0.9
+
+# Momentum dampening
+_C.OPTIM.DAMPENING = 0.0
+
+# Nesterov momentum
+_C.OPTIM.NESTEROV = True
+
+# L2 regularization
+_C.OPTIM.WEIGHT_DECAY = 5e-4
+
+# Start the warm up from OPTIM.BASE_LR * OPTIM.WARMUP_FACTOR
+_C.OPTIM.WARMUP_FACTOR = 0.1
+
+# Gradually warm up the OPTIM.BASE_LR over this number of epochs
+_C.OPTIM.WARMUP_ITERS = 0
+
+# ------------------------------------------------------------------------------------ #
+# Training options
+# ------------------------------------------------------------------------------------ #
+_C.TRAIN = CfgNode()
+
+# Dataset and split
+_C.TRAIN.DATASET = ""
+_C.TRAIN.SPLIT = "train"
+
+# Total mini-batch size
+_C.TRAIN.BATCH_SIZE = 128
+
+# Image size
+_C.TRAIN.IM_SIZE = 224
+
+# Evaluate model on test data every eval period epochs
+_C.TRAIN.EVAL_PERIOD = 1
+
+# Save model checkpoint every checkpoint period epochs
+_C.TRAIN.CHECKPOINT_PERIOD = 1
+
+# Resume training from the latest checkpoint in the output directory
+_C.TRAIN.AUTO_RESUME = True
+
+# Weights to start training from
+_C.TRAIN.WEIGHTS = ""
+
+# ------------------------------------------------------------------------------------ #
+# Testing options
+# ------------------------------------------------------------------------------------ #
+_C.TEST = CfgNode()
+
+# Dataset and split
+_C.TEST.DATASET = ""
+_C.TEST.SPLIT = "val"
+
+# Total mini-batch size
+_C.TEST.BATCH_SIZE = 200
+
+# Image size
+_C.TEST.IM_SIZE = 256
+
+# Weights to use for testing
+_C.TEST.WEIGHTS = ""
+
+# ------------------------------------------------------------------------------------ #
+# Common train/test data loader options
+# ------------------------------------------------------------------------------------ #
+_C.DATA_LOADER = CfgNode()
+
+# Number of data loader workers per process
+_C.DATA_LOADER.NUM_WORKERS = 8
+
+# Load data to pinned host memory
+_C.DATA_LOADER.PIN_MEMORY = True
+
+# ------------------------------------------------------------------------------------ #
+# Memory options
+# ------------------------------------------------------------------------------------ #
+_C.MEM = CfgNode()
+
+# Perform ReLU inplace
+_C.MEM.RELU_INPLACE = True
+
+# ------------------------------------------------------------------------------------ #
+# CUDNN options
+# ------------------------------------------------------------------------------------ #
+_C.CUDNN = CfgNode()
+
+# Perform benchmarking to select the fastest CUDNN algorithms to use
+# Note that this may increase the memory usage and will likely not result
+# in overall speedups when variable size inputs are used (e.g. COCO training)
+_C.CUDNN.BENCHMARK = True
+
+# ------------------------------------------------------------------------------------ #
+# Precise timing options
+# ------------------------------------------------------------------------------------ #
+_C.PREC_TIME = CfgNode()
+
+# Number of iterations to warm up the caches
+_C.PREC_TIME.WARMUP_ITER = 3
+
+# Number of iterations to compute avg time
+_C.PREC_TIME.NUM_ITER = 30
+
+# ------------------------------------------------------------------------------------ #
+# Misc options
+# ------------------------------------------------------------------------------------ #
+
+# Number of GPUs to use (applies to both training and testing)
+_C.NUM_GPUS = 1
+
+# Output directory
+_C.OUT_DIR = "/tmp"
+
+# Config destination (in OUT_DIR)
+_C.CFG_DEST = "config.yaml"
+
+# Note that non-determinism may still be present due to non-deterministic
+# operator implementations in GPU operator libraries
+_C.RNG_SEED = 1
+
+# Log destination ('stdout' or 'file')
+_C.LOG_DEST = "stdout"
+
+# Log period in iters
+_C.LOG_PERIOD = 10
+
+# Distributed backend
+_C.DIST_BACKEND = "nccl"
+
+# Hostname and port range for multi-process groups (actual port selected randomly)
+_C.HOST = "localhost"
+_C.PORT_RANGE = [10000, 65000]
+
+# Models weights referred to by URL are downloaded to this local cache
+_C.DOWNLOAD_CACHE = "/tmp/pycls-download-cache"
+
+# ------------------------------------------------------------------------------------ #
+# Deprecated keys
+# ------------------------------------------------------------------------------------ #
+
+_C.register_deprecated_key("PREC_TIME.BATCH_SIZE")
+_C.register_deprecated_key("PREC_TIME.ENABLED")
+_C.register_deprecated_key("PORT")
+
+
+def assert_and_infer_cfg():
+    """Checks config values invariants."""
+    err_str = "The first lr step must start at 0"
+    assert not _C.OPTIM.STEPS or _C.OPTIM.STEPS[0] == 0, err_str
+    data_splits = ["train", "val", "test"]
+    err_str = "Data split '{}' not supported"
+    assert _C.TRAIN.SPLIT in data_splits, err_str.format(_C.TRAIN.SPLIT)
+    assert _C.TEST.SPLIT in data_splits, err_str.format(_C.TEST.SPLIT)
+    err_str = "Mini-batch size should be a multiple of NUM_GPUS."
+    assert _C.TRAIN.BATCH_SIZE % _C.NUM_GPUS == 0, err_str
+    assert _C.TEST.BATCH_SIZE % _C.NUM_GPUS == 0, err_str
+    err_str = "Log destination '{}' not supported"
+    assert _C.LOG_DEST in ["stdout", "file"], err_str.format(_C.LOG_DEST)
+
+
+def dump_cfg():
+    """Dumps the config to the output directory."""
+    cfg_file = os.path.join(_C.OUT_DIR, _C.CFG_DEST)
+    with open(cfg_file, "w") as f:
+        _C.dump(stream=f)
+
+
+def load_cfg(out_dir, cfg_dest="config.yaml"):
+    """Loads config from specified output directory."""
+    cfg_file = os.path.join(out_dir, cfg_dest)
+    _C.merge_from_file(cfg_file)
+
+
+def load_cfg_fom_args(description="Config file options."):
+    """Load config from command line arguments and set any specified options."""
+    parser = argparse.ArgumentParser(description=description)
+    help_s = "Config file location"
+    parser.add_argument("--cfg", dest="cfg_file", help=help_s, required=True, type=str)
+    help_s = "See pycls/core/config.py for all options"
+    parser.add_argument("opts", help=help_s, default=None, nargs=argparse.REMAINDER)
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+    args = parser.parse_args()
+    _C.merge_from_file(args.cfg_file)
+    _C.merge_from_list(args.opts)
--- a/fastreid/modeling/backbones/regnet/effnet.py
+++ b/fastreid/modeling/backbones/regnet/effnet.py
+# !/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""EfficientNet models."""
+
+import logging
+
+import torch
+import torch.nn as nn
+
+from fastreid.layers import *
+from fastreid.modeling.backbones.build import BACKBONE_REGISTRY
+from fastreid.utils import comm
+from fastreid.utils.checkpoint import get_missing_parameters_message, get_unexpected_parameters_message
+from .config import cfg as effnet_cfg
+from .regnet import drop_connect, init_weights
+
+logger = logging.getLogger(__name__)
+model_urls = {
+    'b0': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/161305613/EN-B0_dds_8gpu.pyth',
+    'b1': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/161304979/EN-B1_dds_8gpu.pyth',
+    'b2': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/161305015/EN-B2_dds_8gpu.pyth',
+    'b3': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/161304979/EN-B3_dds_8gpu.pyth',
+    'b4': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/161305098/EN-B4_dds_8gpu.pyth',
+    'b5': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/161304979/EN-B5_dds_8gpu.pyth',
+    'b6': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/161304979/EN-B6_dds_8gpu.pyth',
+    'b7': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/161304979/EN-B7_dds_8gpu.pyth',
+}
+
+
+class EffHead(nn.Module):
+    """EfficientNet head: 1x1, BN, Swish, AvgPool, Dropout, FC."""
+
+    def __init__(self, w_in, w_out, bn_norm):
+        super(EffHead, self).__init__()
+        self.conv = nn.Conv2d(w_in, w_out, 1, stride=1, padding=0, bias=False)
+        self.conv_bn = get_norm(bn_norm, w_out)
+        self.conv_swish = Swish()
+
+    def forward(self, x):
+        x = self.conv_swish(self.conv_bn(self.conv(x)))
+        return x
+
+
+class Swish(nn.Module):
+    """Swish activation function: x * sigmoid(x)."""
+
+    def __init__(self):
+        super(Swish, self).__init__()
+
+    def forward(self, x):
+        return x * torch.sigmoid(x)
+
+
+class SE(nn.Module):
+    """Squeeze-and-Excitation (SE) block w/ Swish: AvgPool, FC, Swish, FC, Sigmoid."""
+
+    def __init__(self, w_in, w_se):
+        super(SE, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
+        self.f_ex = nn.Sequential(
+            nn.Conv2d(w_in, w_se, 1, bias=True),
+            Swish(),
+            nn.Conv2d(w_se, w_in, 1, bias=True),
+            nn.Sigmoid(),
+        )
+
+    def forward(self, x):
+        return x * self.f_ex(self.avg_pool(x))
+
+
+class MBConv(nn.Module):
+    """Mobile inverted bottleneck block w/ SE (MBConv)."""
+
+    def __init__(self, w_in, exp_r, kernel, stride, se_r, w_out, bn_norm):
+        # expansion, 3x3 dwise, BN, Swish, SE, 1x1, BN, skip_connection
+        super(MBConv, self).__init__()
+        self.exp = None
+        w_exp = int(w_in * exp_r)
+        if w_exp != w_in:
+            self.exp = nn.Conv2d(w_in, w_exp, 1, stride=1, padding=0, bias=False)
+            self.exp_bn = get_norm(bn_norm, w_exp)
+            self.exp_swish = Swish()
+        dwise_args = {"groups": w_exp, "padding": (kernel - 1) // 2, "bias": False}
+        self.dwise = nn.Conv2d(w_exp, w_exp, kernel, stride=stride, **dwise_args)
+        self.dwise_bn = get_norm(bn_norm, w_exp)
+        self.dwise_swish = Swish()
+        self.se = SE(w_exp, int(w_in * se_r))
+        self.lin_proj = nn.Conv2d(w_exp, w_out, 1, stride=1, padding=0, bias=False)
+        self.lin_proj_bn = get_norm(bn_norm, w_out)
+        # Skip connection if in and out shapes are the same (MN-V2 style)
+        self.has_skip = stride == 1 and w_in == w_out
+
+    def forward(self, x):
+        f_x = x
+        if self.exp:
+            f_x = self.exp_swish(self.exp_bn(self.exp(f_x)))
+        f_x = self.dwise_swish(self.dwise_bn(self.dwise(f_x)))
+        f_x = self.se(f_x)
+        f_x = self.lin_proj_bn(self.lin_proj(f_x))
+        if self.has_skip:
+            if self.training and effnet_cfg.EN.DC_RATIO > 0.0:
+                f_x = drop_connect(f_x, effnet_cfg.EN.DC_RATIO)
+            f_x = x + f_x
+        return f_x
+
+
+class EffStage(nn.Module):
+    """EfficientNet stage."""
+
+    def __init__(self, w_in, exp_r, kernel, stride, se_r, w_out, d, bn_norm):
+        super(EffStage, self).__init__()
+        for i in range(d):
+            b_stride = stride if i == 0 else 1
+            b_w_in = w_in if i == 0 else w_out
+            name = "b{}".format(i + 1)
+            self.add_module(name, MBConv(b_w_in, exp_r, kernel, b_stride, se_r, w_out, bn_norm))
+
+    def forward(self, x):
+        for block in self.children():
+            x = block(x)
+        return x
+
+
+class StemIN(nn.Module):
+    """EfficientNet stem for ImageNet: 3x3, BN, Swish."""
+
+    def __init__(self, w_in, w_out, bn_norm):
+        super(StemIN, self).__init__()
+        self.conv = nn.Conv2d(w_in, w_out, 3, stride=2, padding=1, bias=False)
+        self.bn = get_norm(bn_norm, w_out)
+        self.swish = Swish()
+
+    def forward(self, x):
+        for layer in self.children():
+            x = layer(x)
+        return x
+
+
+class EffNet(nn.Module):
+    """EfficientNet model."""
+
+    @staticmethod
+    def get_args():
+        return {
+            "stem_w": effnet_cfg.EN.STEM_W,
+            "ds": effnet_cfg.EN.DEPTHS,
+            "ws": effnet_cfg.EN.WIDTHS,
+            "exp_rs": effnet_cfg.EN.EXP_RATIOS,
+            "se_r": effnet_cfg.EN.SE_R,
+            "ss": effnet_cfg.EN.STRIDES,
+            "ks": effnet_cfg.EN.KERNELS,
+            "head_w": effnet_cfg.EN.HEAD_W,
+        }
+
+    def __init__(self, last_stride, bn_norm, **kwargs):
+        super(EffNet, self).__init__()
+        kwargs = self.get_args() if not kwargs else kwargs
+        self._construct(**kwargs, last_stride=last_stride, bn_norm=bn_norm)
+        self.apply(init_weights)
+
+    def _construct(self, stem_w, ds, ws, exp_rs, se_r, ss, ks, head_w, last_stride, bn_norm):
+        stage_params = list(zip(ds, ws, exp_rs, ss, ks))
+        self.stem = StemIN(3, stem_w, bn_norm)
+        prev_w = stem_w
+        for i, (d, w, exp_r, stride, kernel) in enumerate(stage_params):
+            name = "s{}".format(i + 1)
+            if i == 5: stride = last_stride
+            self.add_module(name, EffStage(prev_w, exp_r, kernel, stride, se_r, w, d, bn_norm))
+            prev_w = w
+        self.head = EffHead(prev_w, head_w, bn_norm)
+
+    def forward(self, x):
+        for module in self.children():
+            x = module(x)
+        return x
+
+
+def init_pretrained_weights(key):
+    """Initializes model with pretrained weights.
+
+    Layers that don't match with pretrained layers in name or size are kept unchanged.
+    """
+    import os
+    import errno
+    import gdown
+
+    def _get_torch_home():
+        ENV_TORCH_HOME = 'TORCH_HOME'
+        ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
+        DEFAULT_CACHE_DIR = '~/.cache'
+        torch_home = os.path.expanduser(
+            os.getenv(
+                ENV_TORCH_HOME,
+                os.path.join(
+                    os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch'
+                )
+            )
+        )
+        return torch_home
+
+    torch_home = _get_torch_home()
+    model_dir = os.path.join(torch_home, 'checkpoints')
+    try:
+        os.makedirs(model_dir)
+    except OSError as e:
+        if e.errno == errno.EEXIST:
+            # Directory already exists, ignore.
+            pass
+        else:
+            # Unexpected OSError, re-raise.
+            raise
+
+    filename = model_urls[key].split('/')[-1]
+
+    cached_file = os.path.join(model_dir, filename)
+
+    if not os.path.exists(cached_file):
+        if comm.is_main_process():
+            gdown.download(model_urls[key], cached_file, quiet=False)
+
+    comm.synchronize()
+
+    logger.info(f"Loading pretrained model from {cached_file}")
+    state_dict = torch.load(cached_file, map_location=torch.device("cpu"))["model_state"]
+
+    return state_dict
+
+
+@BACKBONE_REGISTRY.register()
+def build_effnet_backbone(cfg):
+    # fmt: off
+    pretrain      = cfg.MODEL.BACKBONE.PRETRAIN
+    pretrain_path = cfg.MODEL.BACKBONE.PRETRAIN_PATH
+    last_stride   = cfg.MODEL.BACKBONE.LAST_STRIDE
+    bn_norm       = cfg.MODEL.BACKBONE.NORM
+    depth         = cfg.MODEL.BACKBONE.DEPTH
+    # fmt: on
+
+    cfg_files = {
+        'b0': 'fastreid/modeling/backbones/regnet/effnet/EN-B0_dds_8gpu.yaml',
+        'b1': 'fastreid/modeling/backbones/regnet/effnet/EN-B1_dds_8gpu.yaml',
+        'b2': 'fastreid/modeling/backbones/regnet/effnet/EN-B2_dds_8gpu.yaml',
+        'b3': 'fastreid/modeling/backbones/regnet/effnet/EN-B3_dds_8gpu.yaml',
+        'b4': 'fastreid/modeling/backbones/regnet/effnet/EN-B4_dds_8gpu.yaml',
+        'b5': 'fastreid/modeling/backbones/regnet/effnet/EN-B5_dds_8gpu.yaml',
+    }[depth]
+
+    effnet_cfg.merge_from_file(cfg_files)
+    model = EffNet(last_stride, bn_norm)
+
+    if pretrain:
+        # Load pretrain path if specifically
+        if pretrain_path:
+            try:
+                state_dict = torch.load(pretrain_path, map_location=torch.device('cpu'))["model_state"]
+                logger.info(f"Loading pretrained model from {pretrain_path}")
+            except FileNotFoundError as e:
+                logger.info(f'{pretrain_path} is not found! Please check this path.')
+                raise e
+            except KeyError as e:
+                logger.info("State dict keys error! Please check the state dict.")
+                raise e
+        else:
+            key = depth
+            state_dict = init_pretrained_weights(key)
+
+        incompatible = model.load_state_dict(state_dict, strict=False)
+        if incompatible.missing_keys:
+            logger.info(
+                get_missing_parameters_message(incompatible.missing_keys)
+            )
+        if incompatible.unexpected_keys:
+            logger.info(
+                get_unexpected_parameters_message(incompatible.unexpected_keys)
+            )
+    return model
--- a/fastreid/modeling/backbones/regnet/effnet/EN-B0_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/effnet/EN-B0_dds_8gpu.yaml
+MODEL:
+  TYPE: effnet
+  NUM_CLASSES: 1000
+EN:
+  STEM_W: 32
+  STRIDES: [1, 2, 2, 2, 1, 2, 1]
+  DEPTHS: [1, 2, 2, 3, 3, 4, 1]
+  WIDTHS: [16, 24, 40, 80, 112, 192, 320]
+  EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6]
+  KERNELS: [3, 3, 5, 3, 5, 5, 3]
+  HEAD_W: 1280
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.4
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 1e-5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 256
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 200
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/effnet/EN-B1_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/effnet/EN-B1_dds_8gpu.yaml
+MODEL:
+  TYPE: effnet
+  NUM_CLASSES: 1000
+EN:
+  STEM_W: 32
+  STRIDES: [1, 2, 2, 2, 1, 2, 1]
+  DEPTHS: [2, 3, 3, 4, 4, 5, 2]
+  WIDTHS: [16, 24, 40, 80, 112, 192, 320]
+  EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6]
+  KERNELS: [3, 3, 5, 3, 5, 5, 3]
+  HEAD_W: 1280
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.4
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 1e-5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 240
+  BATCH_SIZE: 256
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 274
+  BATCH_SIZE: 200
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/effnet/EN-B2_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/effnet/EN-B2_dds_8gpu.yaml
+MODEL:
+  TYPE: effnet
+  NUM_CLASSES: 1000
+EN:
+  STEM_W: 32
+  STRIDES: [1, 2, 2, 2, 1, 2, 1]
+  DEPTHS: [2, 3, 3, 4, 4, 5, 2]
+  WIDTHS: [16, 24, 48, 88, 120, 208, 352]
+  EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6]
+  KERNELS: [3, 3, 5, 3, 5, 5, 3]
+  HEAD_W: 1408
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.4
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 1e-5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 260
+  BATCH_SIZE: 256
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 298
+  BATCH_SIZE: 200
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/effnet/EN-B3_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/effnet/EN-B3_dds_8gpu.yaml
+MODEL:
+  TYPE: effnet
+  NUM_CLASSES: 1000
+EN:
+  STEM_W: 40
+  STRIDES: [1, 2, 2, 2, 1, 2, 1]
+  DEPTHS: [2, 3, 3, 5, 5, 6, 2]
+  WIDTHS: [24, 32, 48, 96, 136, 232, 384]
+  EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6]
+  KERNELS: [3, 3, 5, 3, 5, 5, 3]
+  HEAD_W: 1536
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.4
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 1e-5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 300
+  BATCH_SIZE: 256
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 342
+  BATCH_SIZE: 200
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/effnet/EN-B4_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/effnet/EN-B4_dds_8gpu.yaml
+MODEL:
+  TYPE: effnet
+  NUM_CLASSES: 1000
+EN:
+  STEM_W: 48
+  STRIDES: [1, 2, 2, 2, 1, 2, 1]
+  DEPTHS: [2, 4, 4, 6, 6, 8, 2]
+  WIDTHS: [24, 32, 56, 112, 160, 272, 448]
+  EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6]
+  KERNELS: [3, 3, 5, 3, 5, 5, 3]
+  HEAD_W: 1792
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.2
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 1e-5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 380
+  BATCH_SIZE: 128
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 434
+  BATCH_SIZE: 104
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/effnet/EN-B5_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/effnet/EN-B5_dds_8gpu.yaml
+MODEL:
+  TYPE: effnet
+  NUM_CLASSES: 1000
+EN:
+  STEM_W: 48
+  STRIDES: [1, 2, 2, 2, 1, 2, 1]
+  DEPTHS: [3, 5, 5, 7, 7, 9, 3]
+  WIDTHS: [24, 40, 64, 128, 176, 304, 512]
+  EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6]
+  KERNELS: [3, 3, 5, 3, 5, 5, 3]
+  HEAD_W: 2048
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.1
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 1e-5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 456
+  BATCH_SIZE: 64
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 522
+  BATCH_SIZE: 48
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnet.py
+++ b/fastreid/modeling/backbones/regnet/regnet.py
+import logging
+import math
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from fastreid.layers import get_norm
+from fastreid.utils import comm
+from fastreid.utils.checkpoint import get_missing_parameters_message, get_unexpected_parameters_message
+from .config import cfg as regnet_cfg
+from ..build import BACKBONE_REGISTRY
+
+logger = logging.getLogger(__name__)
+model_urls = {
+    '800x': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/160905981/RegNetX-200MF_dds_8gpu.pyth',
+    '800y': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906567/RegNetY-800MF_dds_8gpu.pyth',
+    '1600x': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/160990626/RegNetX-1.6GF_dds_8gpu.pyth',
+    '1600y': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906681/RegNetY-1.6GF_dds_8gpu.pyth',
+    '3200x': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906139/RegNetX-3.2GF_dds_8gpu.pyth',
+    '3200y': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906834/RegNetY-3.2GF_dds_8gpu.pyth',
+    '4000x': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906383/RegNetX-4.0GF_dds_8gpu.pyth',
+    '4000y': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906838/RegNetY-4.0GF_dds_8gpu.pyth',
+    '6400x': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/161116590/RegNetX-6.4GF_dds_8gpu.pyth',
+    '6400y': 'https://dl.fbaipublicfiles.com/pycls/dds_baselines/160907112/RegNetY-6.4GF_dds_8gpu.pyth',
+}
+
+
+def init_weights(m):
+    """Performs ResNet-style weight initialization."""
+    if isinstance(m, nn.Conv2d):
+        # Note that there is no bias due to BN
+        fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+        m.weight.data.normal_(mean=0.0, std=math.sqrt(2.0 / fan_out))
+    elif isinstance(m, nn.BatchNorm2d):
+        zero_init_gamma = (
+                hasattr(m, "final_bn") and m.final_bn and regnet_cfg.BN.ZERO_INIT_FINAL_GAMMA
+        )
+        m.weight.data.fill_(0.0 if zero_init_gamma else 1.0)
+        m.bias.data.zero_()
+    elif isinstance(m, nn.Linear):
+        m.weight.data.normal_(mean=0.0, std=0.01)
+        m.bias.data.zero_()
+
+
+def get_stem_fun(stem_type):
+    """Retrives the stem function by name."""
+    stem_funs = {
+        "res_stem_cifar": ResStemCifar,
+        "res_stem_in": ResStemIN,
+        "simple_stem_in": SimpleStemIN,
+    }
+    assert stem_type in stem_funs.keys(), "Stem type '{}' not supported".format(
+        stem_type
+    )
+    return stem_funs[stem_type]
+
+
+def get_block_fun(block_type):
+    """Retrieves the block function by name."""
+    block_funs = {
+        "vanilla_block": VanillaBlock,
+        "res_basic_block": ResBasicBlock,
+        "res_bottleneck_block": ResBottleneckBlock,
+    }
+    assert block_type in block_funs.keys(), "Block type '{}' not supported".format(
+        block_type
+    )
+    return block_funs[block_type]
+
+
+def drop_connect(x, drop_ratio):
+    """Drop connect (adapted from DARTS)."""
+    keep_ratio = 1.0 - drop_ratio
+    mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device)
+    mask.bernoulli_(keep_ratio)
+    x.div_(keep_ratio)
+    x.mul_(mask)
+    return x
+
+class AnyHead(nn.Module):
+    """AnyNet head."""
+
+    def __init__(self, w_in, nc):
+        super(AnyHead, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(w_in, nc, bias=True)
+
+    def forward(self, x):
+        x = self.avg_pool(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+        return x
+
+
+class VanillaBlock(nn.Module):
+    """Vanilla block: [3x3 conv, BN, Relu] x2"""
+
+    def __init__(self, w_in, w_out, stride, bn_norm, bm=None, gw=None, se_r=None):
+        assert (
+                bm is None and gw is None and se_r is None
+        ), "Vanilla block does not support bm, gw, and se_r options"
+        super(VanillaBlock, self).__init__()
+        self.construct(w_in, w_out, stride, bn_norm)
+
+    def construct(self, w_in, w_out, stride, bn_norm):
+        # 3x3, BN, ReLU
+        self.a = nn.Conv2d(
+            w_in, w_out, kernel_size=3, stride=stride, padding=1, bias=False
+        )
+        self.a_bn = get_norm(bn_norm, w_out)
+        self.a_relu = nn.ReLU(inplace=regnet_cfg.MEM.RELU_INPLACE)
+        # 3x3, BN, ReLU
+        self.b = nn.Conv2d(w_out, w_out, kernel_size=3, stride=1, padding=1, bias=False)
+        self.b_bn = get_norm(bn_norm, w_out)
+        self.b_relu = nn.ReLU(inplace=regnet_cfg.MEM.RELU_INPLACE)
+
+    def forward(self, x):
+        for layer in self.children():
+            x = layer(x)
+        return x
+
+
+class BasicTransform(nn.Module):
+    """Basic transformation: [3x3 conv, BN, Relu] x2"""
+
+    def __init__(self, w_in, w_out, stride, bn_norm):
+        super(BasicTransform, self).__init__()
+        self.construct(w_in, w_out, stride, bn_norm)
+
+    def construct(self, w_in, w_out, stride, bn_norm):
+        # 3x3, BN, ReLU
+        self.a = nn.Conv2d(
+            w_in, w_out, kernel_size=3, stride=stride, padding=1, bias=False
+        )
+        self.a_bn = get_norm(bn_norm, w_out)
+        self.a_relu = nn.ReLU(inplace=regnet_cfg.MEM.RELU_INPLACE)
+        # 3x3, BN
+        self.b = nn.Conv2d(w_out, w_out, kernel_size=3, stride=1, padding=1, bias=False)
+        self.b_bn = get_norm(bn_norm, w_out)
+        self.b_bn.final_bn = True
+
+    def forward(self, x):
+        for layer in self.children():
+            x = layer(x)
+        return x
+
+
+class ResBasicBlock(nn.Module):
+    """Residual basic block: x + F(x), F = basic transform"""
+
+    def __init__(self, w_in, w_out, stride, bn_norm, bm=None, gw=None, se_r=None):
+        assert (
+                bm is None and gw is None and se_r is None
+        ), "Basic transform does not support bm, gw, and se_r options"
+        super(ResBasicBlock, self).__init__()
+        self.construct(w_in, w_out, stride, bn_norm)
+
+    def _add_skip_proj(self, w_in, w_out, stride, bn_norm):
+        self.proj = nn.Conv2d(
+            w_in, w_out, kernel_size=1, stride=stride, padding=0, bias=False
+        )
+        self.bn = get_norm(bn_norm, w_out)
+
+    def construct(self, w_in, w_out, stride, bn_norm):
+        # Use skip connection with projection if shape changes
+        self.proj_block = (w_in != w_out) or (stride != 1)
+        if self.proj_block:
+            self._add_skip_proj(w_in, w_out, stride, bn_norm)
+        self.f = BasicTransform(w_in, w_out, stride, bn_norm)
+        self.relu = nn.ReLU(regnet_cfg.MEM.RELU_INPLACE)
+
+    def forward(self, x):
+        if self.proj_block:
+            x = self.bn(self.proj(x)) + self.f(x)
+        else:
+            x = x + self.f(x)
+        x = self.relu(x)
+        return x
+
+
+class SE(nn.Module):
+    """Squeeze-and-Excitation (SE) block"""
+
+    def __init__(self, w_in, w_se):
+        super(SE, self).__init__()
+        self.construct(w_in, w_se)
+
+    def construct(self, w_in, w_se):
+        # AvgPool
+        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
+        # FC, Activation, FC, Sigmoid
+        self.f_ex = nn.Sequential(
+            nn.Conv2d(w_in, w_se, kernel_size=1, bias=True),
+            nn.ReLU(inplace=regnet_cfg.MEM.RELU_INPLACE),
+            nn.Conv2d(w_se, w_in, kernel_size=1, bias=True),
+            nn.Sigmoid(),
+        )
+
+    def forward(self, x):
+        return x * self.f_ex(self.avg_pool(x))
+
+
+class BottleneckTransform(nn.Module):
+    """Bottlenect transformation: 1x1, 3x3, 1x1"""
+
+    def __init__(self, w_in, w_out, stride, bn_norm, bm, gw, se_r):
+        super(BottleneckTransform, self).__init__()
+        self.construct(w_in, w_out, stride, bn_norm, bm, gw, se_r)
+
+    def construct(self, w_in, w_out, stride, bn_norm, bm, gw, se_r):
+        # Compute the bottleneck width
+        w_b = int(round(w_out * bm))
+        # Compute the number of groups
+        num_gs = w_b // gw
+        # 1x1, BN, ReLU
+        self.a = nn.Conv2d(w_in, w_b, kernel_size=1, stride=1, padding=0, bias=False)
+        self.a_bn = get_norm(bn_norm, w_b)
+        self.a_relu = nn.ReLU(inplace=regnet_cfg.MEM.RELU_INPLACE)
+        # 3x3, BN, ReLU
+        self.b = nn.Conv2d(
+            w_b, w_b, kernel_size=3, stride=stride, padding=1, groups=num_gs, bias=False
+        )
+        self.b_bn = get_norm(bn_norm, w_b)
+        self.b_relu = nn.ReLU(inplace=regnet_cfg.MEM.RELU_INPLACE)
+        # Squeeze-and-Excitation (SE)
+        if se_r:
+            w_se = int(round(w_in * se_r))
+            self.se = SE(w_b, w_se)
+        # 1x1, BN
+        self.c = nn.Conv2d(w_b, w_out, kernel_size=1, stride=1, padding=0, bias=False)
+        self.c_bn = get_norm(bn_norm, w_out)
+        self.c_bn.final_bn = True
+
+    def forward(self, x):
+        for layer in self.children():
+            x = layer(x)
+        return x
+
+
+class ResBottleneckBlock(nn.Module):
+    """Residual bottleneck block: x + F(x), F = bottleneck transform"""
+
+    def __init__(self, w_in, w_out, stride, bn_norm, bm=1.0, gw=1, se_r=None):
+        super(ResBottleneckBlock, self).__init__()
+        self.construct(w_in, w_out, stride, bn_norm, bm, gw, se_r)
+
+    def _add_skip_proj(self, w_in, w_out, stride, bn_norm):
+        self.proj = nn.Conv2d(
+            w_in, w_out, kernel_size=1, stride=stride, padding=0, bias=False
+        )
+        self.bn = get_norm(bn_norm, w_out)
+
+    def construct(self, w_in, w_out, stride, bn_norm, bm, gw, se_r):
+        # Use skip connection with projection if shape changes
+        self.proj_block = (w_in != w_out) or (stride != 1)
+        if self.proj_block:
+            self._add_skip_proj(w_in, w_out, stride, bn_norm)
+        self.f = BottleneckTransform(w_in, w_out, stride, bn_norm, bm, gw, se_r)
+        self.relu = nn.ReLU(regnet_cfg.MEM.RELU_INPLACE)
+
+    def forward(self, x):
+        if self.proj_block:
+            x = self.bn(self.proj(x)) + self.f(x)
+        else:
+            x = x + self.f(x)
+        x = self.relu(x)
+        return x
+
+
+class ResStemCifar(nn.Module):
+    """ResNet stem for CIFAR."""
+
+    def __init__(self, w_in, w_out, bn_norm):
+        super(ResStemCifar, self).__init__()
+        self.construct(w_in, w_out, bn_norm)
+
+    def construct(self, w_in, w_out, bn_norm):
+        # 3x3, BN, ReLU
+        self.conv = nn.Conv2d(
+            w_in, w_out, kernel_size=3, stride=1, padding=1, bias=False
+        )
+        self.bn = get_norm(bn_norm, w_out)
+        self.relu = nn.ReLU(regnet_cfg.MEM.RELU_INPLACE)
+
+    def forward(self, x):
+        for layer in self.children():
+            x = layer(x)
+        return x
+
+
+class ResStemIN(nn.Module):
+    """ResNet stem for ImageNet."""
+
+    def __init__(self, w_in, w_out, bn_norm):
+        super(ResStemIN, self).__init__()
+        self.construct(w_in, w_out, bn_norm)
+
+    def construct(self, w_in, w_out, bn_norm):
+        # 7x7, BN, ReLU, maxpool
+        self.conv = nn.Conv2d(
+            w_in, w_out, kernel_size=7, stride=2, padding=3, bias=False
+        )
+        self.bn = get_norm(bn_norm, w_out)
+        self.relu = nn.ReLU(regnet_cfg.MEM.RELU_INPLACE)
+        self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+    def forward(self, x):
+        for layer in self.children():
+            x = layer(x)
+        return x
+
+
+class SimpleStemIN(nn.Module):
+    """Simple stem for ImageNet."""
+
+    def __init__(self, in_w, out_w, bn_norm):
+        super(SimpleStemIN, self).__init__()
+        self.construct(in_w, out_w, bn_norm)
+
+    def construct(self, in_w, out_w, bn_norm):
+        # 3x3, BN, ReLU
+        self.conv = nn.Conv2d(
+            in_w, out_w, kernel_size=3, stride=2, padding=1, bias=False
+        )
+        self.bn = get_norm(bn_norm, out_w)
+        self.relu = nn.ReLU(regnet_cfg.MEM.RELU_INPLACE)
+
+    def forward(self, x):
+        for layer in self.children():
+            x = layer(x)
+        return x
+
+
+class AnyStage(nn.Module):
+    """AnyNet stage (sequence of blocks w/ the same output shape)."""
+
+    def __init__(self, w_in, w_out, stride, bn_norm, d, block_fun, bm, gw, se_r):
+        super(AnyStage, self).__init__()
+        self.construct(w_in, w_out, stride, bn_norm, d, block_fun, bm, gw, se_r)
+
+    def construct(self, w_in, w_out, stride, bn_norm, d, block_fun, bm, gw, se_r):
+        # Construct the blocks
+        for i in range(d):
+            # Stride and w_in apply to the first block of the stage
+            b_stride = stride if i == 0 else 1
+            b_w_in = w_in if i == 0 else w_out
+            # Construct the block
+            self.add_module(
+                "b{}".format(i + 1), block_fun(b_w_in, w_out, b_stride, bn_norm, bm, gw, se_r)
+            )
+
+    def forward(self, x):
+        for block in self.children():
+            x = block(x)
+        return x
+
+
+class AnyNet(nn.Module):
+    """AnyNet model."""
+
+    def __init__(self, **kwargs):
+        super(AnyNet, self).__init__()
+        if kwargs:
+            self.construct(
+                stem_type=kwargs["stem_type"],
+                stem_w=kwargs["stem_w"],
+                block_type=kwargs["block_type"],
+                ds=kwargs["ds"],
+                ws=kwargs["ws"],
+                ss=kwargs["ss"],
+                bn_norm=kwargs["bn_norm"],
+                bms=kwargs["bms"],
+                gws=kwargs["gws"],
+                se_r=kwargs["se_r"],
+            )
+        else:
+            self.construct(
+                stem_type=regnet_cfg.ANYNET.STEM_TYPE,
+                stem_w=regnet_cfg.ANYNET.STEM_W,
+                block_type=regnet_cfg.ANYNET.BLOCK_TYPE,
+                ds=regnet_cfg.ANYNET.DEPTHS,
+                ws=regnet_cfg.ANYNET.WIDTHS,
+                ss=regnet_cfg.ANYNET.STRIDES,
+                bn_norm=regnet_cfg.ANYNET.BN_NORM,
+                bms=regnet_cfg.ANYNET.BOT_MULS,
+                gws=regnet_cfg.ANYNET.GROUP_WS,
+                se_r=regnet_cfg.ANYNET.SE_R if regnet_cfg.ANYNET.SE_ON else None,
+            )
+        self.apply(init_weights)
+
+    def construct(self, stem_type, stem_w, block_type, ds, ws, ss, bn_norm, bms, gws, se_r):
+        # Generate dummy bot muls and gs for models that do not use them
+        bms = bms if bms else [1.0 for _d in ds]
+        gws = gws if gws else [1 for _d in ds]
+        # Group params by stage
+        stage_params = list(zip(ds, ws, ss, bms, gws))
+        # Construct the stem
+        stem_fun = get_stem_fun(stem_type)
+        self.stem = stem_fun(3, stem_w, bn_norm)
+        # Construct the stages
+        block_fun = get_block_fun(block_type)
+        prev_w = stem_w
+        for i, (d, w, s, bm, gw) in enumerate(stage_params):
+            self.add_module(
+                "s{}".format(i + 1), AnyStage(prev_w, w, s, bn_norm, d, block_fun, bm, gw, se_r)
+            )
+            prev_w = w
+        # Construct the head
+        self.in_planes = prev_w
+        # self.head = AnyHead(w_in=prev_w, nc=nc)
+
+    def forward(self, x):
+        for module in self.children():
+            x = module(x)
+        return x
+
+
+def quantize_float(f, q):
+    """Converts a float to closest non-zero int divisible by q."""
+    return int(round(f / q) * q)
+
+
+def adjust_ws_gs_comp(ws, bms, gs):
+    """Adjusts the compatibility of widths and groups."""
+    ws_bot = [int(w * b) for w, b in zip(ws, bms)]
+    gs = [min(g, w_bot) for g, w_bot in zip(gs, ws_bot)]
+    ws_bot = [quantize_float(w_bot, g) for w_bot, g in zip(ws_bot, gs)]
+    ws = [int(w_bot / b) for w_bot, b in zip(ws_bot, bms)]
+    return ws, gs
+
+
+def get_stages_from_blocks(ws, rs):
+    """Gets ws/ds of network at each stage from per block values."""
+    ts_temp = zip(ws + [0], [0] + ws, rs + [0], [0] + rs)
+    ts = [w != wp or r != rp for w, wp, r, rp in ts_temp]
+    s_ws = [w for w, t in zip(ws, ts[:-1]) if t]
+    s_ds = np.diff([d for d, t in zip(range(len(ts)), ts) if t]).tolist()
+    return s_ws, s_ds
+
+
+def generate_regnet(w_a, w_0, w_m, d, q=8):
+    """Generates per block ws from RegNet parameters."""
+    assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0
+    ws_cont = np.arange(d) * w_a + w_0
+    ks = np.round(np.log(ws_cont / w_0) / np.log(w_m))
+    ws = w_0 * np.power(w_m, ks)
+    ws = np.round(np.divide(ws, q)) * q
+    num_stages, max_stage = len(np.unique(ws)), ks.max() + 1
+    ws, ws_cont = ws.astype(int).tolist(), ws_cont.tolist()
+    return ws, num_stages, max_stage, ws_cont
+
+
+class RegNet(AnyNet):
+    """RegNet model."""
+
+    def __init__(self, last_stride, bn_norm):
+        # Generate RegNet ws per block
+        b_ws, num_s, _, _ = generate_regnet(
+            regnet_cfg.REGNET.WA, regnet_cfg.REGNET.W0, regnet_cfg.REGNET.WM, regnet_cfg.REGNET.DEPTH
+        )
+        # Convert to per stage format
+        ws, ds = get_stages_from_blocks(b_ws, b_ws)
+        # Generate group widths and bot muls
+        gws = [regnet_cfg.REGNET.GROUP_W for _ in range(num_s)]
+        bms = [regnet_cfg.REGNET.BOT_MUL for _ in range(num_s)]
+        # Adjust the compatibility of ws and gws
+        ws, gws = adjust_ws_gs_comp(ws, bms, gws)
+        # Use the same stride for each stage
+        ss = [regnet_cfg.REGNET.STRIDE for _ in range(num_s)]
+        ss[-1] = last_stride
+        # Use SE for RegNetY
+        se_r = regnet_cfg.REGNET.SE_R if regnet_cfg.REGNET.SE_ON else None
+        # Construct the model
+        kwargs = {
+            "stem_type": regnet_cfg.REGNET.STEM_TYPE,
+            "stem_w": regnet_cfg.REGNET.STEM_W,
+            "block_type": regnet_cfg.REGNET.BLOCK_TYPE,
+            "ss": ss,
+            "ds": ds,
+            "ws": ws,
+            "bn_norm": bn_norm,
+            "bms": bms,
+            "gws": gws,
+            "se_r": se_r,
+        }
+        super(RegNet, self).__init__(**kwargs)
+
+
+def init_pretrained_weights(key):
+    """Initializes model with pretrained weights.
+
+    Layers that don't match with pretrained layers in name or size are kept unchanged.
+    """
+    import os
+    import errno
+    import gdown
+
+    def _get_torch_home():
+        ENV_TORCH_HOME = 'TORCH_HOME'
+        ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
+        DEFAULT_CACHE_DIR = '~/.cache'
+        torch_home = os.path.expanduser(
+            os.getenv(
+                ENV_TORCH_HOME,
+                os.path.join(
+                    os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch'
+                )
+            )
+        )
+        return torch_home
+
+    torch_home = _get_torch_home()
+    model_dir = os.path.join(torch_home, 'checkpoints')
+    try:
+        os.makedirs(model_dir)
+    except OSError as e:
+        if e.errno == errno.EEXIST:
+            # Directory already exists, ignore.
+            pass
+        else:
+            # Unexpected OSError, re-raise.
+            raise
+
+    filename = model_urls[key].split('/')[-1]
+
+    cached_file = os.path.join(model_dir, filename)
+
+    if not os.path.exists(cached_file):
+        if comm.is_main_process():
+            gdown.download(model_urls[key], cached_file, quiet=False)
+
+    comm.synchronize()
+
+    logger.info(f"Loading pretrained model from {cached_file}")
+    state_dict = torch.load(cached_file, map_location=torch.device('cpu'))['model_state']
+
+    return state_dict
+
+
+@BACKBONE_REGISTRY.register()
+def build_regnet_backbone(cfg):
+    # fmt: off
+    pretrain      = cfg.MODEL.BACKBONE.PRETRAIN
+    pretrain_path = cfg.MODEL.BACKBONE.PRETRAIN_PATH
+    last_stride   = cfg.MODEL.BACKBONE.LAST_STRIDE
+    bn_norm       = cfg.MODEL.BACKBONE.NORM
+    depth         = cfg.MODEL.BACKBONE.DEPTH
+    # fmt: on
+
+    cfg_files = {
+        '200x': 'fastreid/modeling/backbones/regnet/regnetx/RegNetX-200MF_dds_8gpu.yaml',
+        '200y': 'fastreid/modeling/backbones/regnet/regnety/RegNetY-200MF_dds_8gpu.yaml',
+        '400x': 'fastreid/modeling/backbones/regnet/regnetx/RegNetX-400MF_dds_8gpu.yaml',
+        '400y': 'fastreid/modeling/backbones/regnet/regnety/RegNetY-400MF_dds_8gpu.yaml',
+        '800x': 'fastreid/modeling/backbones/regnet/regnetx/RegNetX-800MF_dds_8gpu.yaml',
+        '800y': 'fastreid/modeling/backbones/regnet/regnety/RegNetY-800MF_dds_8gpu.yaml',
+        '1600x': 'fastreid/modeling/backbones/regnet/regnetx/RegNetX-1.6GF_dds_8gpu.yaml',
+        '1600y': 'fastreid/modeling/backbones/regnet/regnety/RegNetY-1.6GF_dds_8gpu.yaml',
+        '3200x': 'fastreid/modeling/backbones/regnet/regnetx/RegNetX-3.2GF_dds_8gpu.yaml',
+        '3200y': 'fastreid/modeling/backbones/regnet/regnety/RegNetY-3.2GF_dds_8gpu.yaml',
+        '4000x': 'fastreid/modeling/backbones/regnet/regnetx/RegNetX-4.0GF_dds_8gpu.yaml',
+        '4000y': 'fastreid/modeling/backbones/regnet/regnety/RegNetY-4.0GF_dds_8gpu.yaml',
+        '6400x': 'fastreid/modeling/backbones/regnet/regnetx/RegNetX-6.4GF_dds_8gpu.yaml',
+        '6400y': 'fastreid/modeling/backbones/regnet/regnety/RegNetY-6.4GF_dds_8gpu.yaml',
+    }[depth]
+
+    regnet_cfg.merge_from_file(cfg_files)
+    model = RegNet(last_stride, bn_norm)
+
+    if pretrain:
+        # Load pretrain path if specifically
+        if pretrain_path:
+            try:
+                state_dict = torch.load(pretrain_path, map_location=torch.device('cpu'))
+                logger.info(f"Loading pretrained model from {pretrain_path}")
+            except FileNotFoundError as e:
+                logger.info(f'{pretrain_path} is not found! Please check this path.')
+                raise e
+            except KeyError as e:
+                logger.info("State dict keys error! Please check the state dict.")
+                raise e
+        else:
+            key = depth
+            state_dict = init_pretrained_weights(key)
+
+        incompatible = model.load_state_dict(state_dict, strict=False)
+        if incompatible.missing_keys:
+            logger.info(
+                get_missing_parameters_message(incompatible.missing_keys)
+            )
+        if incompatible.unexpected_keys:
+            logger.info(
+                get_unexpected_parameters_message(incompatible.unexpected_keys)
+            )
+    return model
--- a/fastreid/modeling/backbones/regnet/regnetx/RegNetX-1.6GF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnetx/RegNetX-1.6GF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  DEPTH: 18
+  W0: 80
+  WA: 34.01
+  WM: 2.25
+  GROUP_W: 24
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.8
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 1024
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 800
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnetx/RegNetX-12GF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnetx/RegNetX-12GF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  DEPTH: 19
+  W0: 168
+  WA: 73.36
+  WM: 2.37
+  GROUP_W: 112
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.4
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 512
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 400
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnetx/RegNetX-16GF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnetx/RegNetX-16GF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  DEPTH: 22
+  W0: 216
+  WA: 55.59
+  WM: 2.1
+  GROUP_W: 128
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.4
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 512
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 400
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnetx/RegNetX-200MF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnetx/RegNetX-200MF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  DEPTH: 13
+  W0: 24
+  WA: 36.44
+  WM: 2.49
+  GROUP_W: 8
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.8
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 1024
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 800
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnetx/RegNetX-3.2GF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnetx/RegNetX-3.2GF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  DEPTH: 25
+  W0: 88
+  WA: 26.31
+  WM: 2.25
+  GROUP_W: 48
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.4
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 512
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 400
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnetx/RegNetX-32GF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnetx/RegNetX-32GF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  DEPTH: 23
+  W0: 320
+  WA: 69.86
+  WM: 2.0
+  GROUP_W: 168
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.2
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 256
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 200
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnetx/RegNetX-4.0GF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnetx/RegNetX-4.0GF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  DEPTH: 23
+  W0: 96
+  WA: 38.65
+  WM: 2.43
+  GROUP_W: 40
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.4
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 512
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 400
+NUM_GPUS: 8
+OUT_DIR: .