update

b6c19984 · dengjb · b6c19984 · b6c19984 · b6c19984 · b6c19984
Commit b6c19984 authored Nov 18, 2025 by dengjb
20 changed files
--- a/fastreid/modeling/backbones/regnet/regnetx/RegNetX-400MF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnetx/RegNetX-400MF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  DEPTH: 22
+  W0: 24
+  WA: 24.48
+  WM: 2.54
+  GROUP_W: 16
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.8
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 1024
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 800
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnetx/RegNetX-6.4GF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnetx/RegNetX-6.4GF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  DEPTH: 17
+  W0: 184
+  WA: 60.83
+  WM: 2.07
+  GROUP_W: 56
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.4
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 512
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 400
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnetx/RegNetX-600MF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnetx/RegNetX-600MF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  DEPTH: 16
+  W0: 48
+  WA: 36.97
+  WM: 2.24
+  GROUP_W: 24
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.8
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 1024
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 800
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnetx/RegNetX-8.0GF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnetx/RegNetX-8.0GF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  DEPTH: 23
+  W0: 80
+  WA: 49.56
+  WM: 2.88
+  GROUP_W: 120
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.4
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 512
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 400
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnetx/RegNetX-800MF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnetx/RegNetX-800MF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  DEPTH: 16
+  W0: 56
+  WA: 35.73
+  WM: 2.28
+  GROUP_W: 16
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.8
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 1024
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 800
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnety/RegNetY-1.6GF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnety/RegNetY-1.6GF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  SE_ON: True
+  DEPTH: 27
+  W0: 48
+  WA: 20.71
+  WM: 2.65
+  GROUP_W: 24
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.8
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 1024
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 800
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnety/RegNetY-12GF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnety/RegNetY-12GF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  SE_ON: True
+  DEPTH: 19
+  W0: 168
+  WA: 73.36
+  WM: 2.37
+  GROUP_W: 112
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.4
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 512
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 400
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnety/RegNetY-16GF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnety/RegNetY-16GF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  SE_ON: True
+  DEPTH: 18
+  W0: 200
+  WA: 106.23
+  WM: 2.48
+  GROUP_W: 112
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.2
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 256
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 200
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnety/RegNetY-200MF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnety/RegNetY-200MF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  SE_ON: True
+  DEPTH: 13
+  W0: 24
+  WA: 36.44
+  WM: 2.49
+  GROUP_W: 8
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.8
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 1024
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 800
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnety/RegNetY-3.2GF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnety/RegNetY-3.2GF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  SE_ON: True
+  DEPTH: 21
+  W0: 80
+  WA: 42.63
+  WM: 2.66
+  GROUP_W: 24
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.4
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 512
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 400
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnety/RegNetY-32GF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnety/RegNetY-32GF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  SE_ON: True
+  DEPTH: 20
+  W0: 232
+  WA: 115.89
+  WM: 2.53
+  GROUP_W: 232
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.2
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 256
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 200
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnety/RegNetY-4.0GF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnety/RegNetY-4.0GF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  SE_ON: True
+  DEPTH: 22
+  W0: 96
+  WA: 31.41
+  WM: 2.24
+  GROUP_W: 64
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.4
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 512
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 400
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnety/RegNetY-400MF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnety/RegNetY-400MF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  SE_ON: True
+  DEPTH: 16
+  W0: 48
+  WA: 27.89
+  WM: 2.09
+  GROUP_W: 8
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.8
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 1024
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 800
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnety/RegNetY-6.4GF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnety/RegNetY-6.4GF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  SE_ON: True
+  DEPTH: 25
+  W0: 112
+  WA: 33.22
+  WM: 2.27
+  GROUP_W: 72
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.4
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 512
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 400
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnety/RegNetY-600MF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnety/RegNetY-600MF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  SE_ON: True
+  DEPTH: 15
+  W0: 48
+  WA: 32.54
+  WM: 2.32
+  GROUP_W: 16
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.8
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 1024
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 800
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnety/RegNetY-8.0GF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnety/RegNetY-8.0GF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  SE_ON: true
+  DEPTH: 17
+  W0: 192
+  WA: 76.82
+  WM: 2.19
+  GROUP_W: 56
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.4
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 512
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 400
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/regnet/regnety/RegNetY-800MF_dds_8gpu.yaml
+++ b/fastreid/modeling/backbones/regnet/regnety/RegNetY-800MF_dds_8gpu.yaml
+MODEL:
+  TYPE: regnet
+  NUM_CLASSES: 1000
+REGNET:
+  SE_ON: True
+  DEPTH: 14
+  W0: 56
+  WA: 38.84
+  WM: 2.4
+  GROUP_W: 16
+OPTIM:
+  LR_POLICY: cos
+  BASE_LR: 0.8
+  MAX_EPOCH: 100
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 5e-5
+  WARMUP_ITERS: 5
+TRAIN:
+  DATASET: imagenet
+  IM_SIZE: 224
+  BATCH_SIZE: 1024
+TEST:
+  DATASET: imagenet
+  IM_SIZE: 256
+  BATCH_SIZE: 800
+NUM_GPUS: 8
+OUT_DIR: .
--- a/fastreid/modeling/backbones/repvgg.py
+++ b/fastreid/modeling/backbones/repvgg.py
+# encoding: utf-8
+# ref: https://github.com/CaoWGG/RepVGG/blob/develop/repvgg.py
+
+
+import logging
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from fastreid.layers import *
+from fastreid.utils.checkpoint import get_missing_parameters_message, get_unexpected_parameters_message
+from .build import BACKBONE_REGISTRY
+
+logger = logging.getLogger(__name__)
+
+
+def deploy(self, mode=False):
+    self.deploying = mode
+    for module in self.children():
+        if hasattr(module, 'deploying'):
+            module.deploy(mode)
+
+
+nn.Sequential.deploying = False
+nn.Sequential.deploy = deploy
+
+
+def conv_bn(norm_type, in_channels, out_channels, kernel_size, stride, padding, groups=1):
+    result = nn.Sequential()
+    result.add_module('conv', nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
+                                        kernel_size=kernel_size, stride=stride, padding=padding, groups=groups,
+                                        bias=False))
+    result.add_module('bn', get_norm(norm_type, out_channels))
+    return result
+
+
+class RepVGGBlock(nn.Module):
+
+    def __init__(self, in_channels, out_channels, norm_type, kernel_size,
+                 stride=1, padding=0, groups=1):
+        super(RepVGGBlock, self).__init__()
+        self.deploying = False
+
+        self.groups = groups
+        self.in_channels = in_channels
+
+        assert kernel_size == 3
+        assert padding == 1
+
+        padding_11 = padding - kernel_size // 2
+
+        self.nonlinearity = nn.ReLU()
+
+        self.in_channels = in_channels
+        self.in_channels = in_channels
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = padding
+        self.groups = groups
+
+        self.register_parameter('fused_weight', None)
+        self.register_parameter('fused_bias', None)
+
+        self.rbr_identity = get_norm(norm_type, in_channels) if out_channels == in_channels and stride == 1 else None
+        self.rbr_dense = conv_bn(norm_type, in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
+                                 stride=stride, padding=padding, groups=groups)
+        self.rbr_1x1 = conv_bn(norm_type, in_channels=in_channels, out_channels=out_channels, kernel_size=1,
+                               stride=stride, padding=padding_11, groups=groups)
+
+    def forward(self, inputs):
+        if self.deploying:
+            assert self.fused_weight is not None and self.fused_bias is not None, \
+                "Make deploy mode=True to generate fused weight and fused bias first"
+            fused_out = self.nonlinearity(torch.nn.functional.conv2d(
+                inputs, self.fused_weight, self.fused_bias, self.stride, self.padding, 1, self.groups))
+            return fused_out
+
+        if self.rbr_identity is None:
+            id_out = 0
+        else:
+            id_out = self.rbr_identity(inputs)
+        out = self.nonlinearity(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out)
+
+        return out
+
+    def get_equivalent_kernel_bias(self):
+        kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense)
+        kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
+        kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity)
+        return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
+
+    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
+        if kernel1x1 is None:
+            return 0
+        else:
+            return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
+
+    def _fuse_bn_tensor(self, branch):
+        if branch is None:
+            return 0, 0
+        if isinstance(branch, nn.Sequential):
+            kernel = branch.conv.weight
+            running_mean = branch.bn.running_mean
+            running_var = branch.bn.running_var
+            gamma = branch.bn.weight
+            beta = branch.bn.bias
+            eps = branch.bn.eps
+        else:
+            assert branch.__class__.__name__.find('BatchNorm') != -1
+            if not hasattr(self, 'id_tensor'):
+                input_dim = self.in_channels // self.groups
+                kernel_value = np.zeros((self.in_channels, input_dim, 3, 3), dtype=np.float32)
+                for i in range(self.in_channels):
+                    kernel_value[i, i % input_dim, 1, 1] = 1
+                self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device)
+            kernel = self.id_tensor
+            running_mean = branch.running_mean
+            running_var = branch.running_var
+            gamma = branch.weight
+            beta = branch.bias
+            eps = branch.eps
+        std = (running_var + eps).sqrt()
+        t = (gamma / std).reshape(-1, 1, 1, 1)
+        return kernel * t, beta - running_mean * gamma / std
+
+    def deploy(self, mode=False):
+        self.deploying = mode
+        if mode:
+            fused_weight, fused_bias = self.get_equivalent_kernel_bias()
+            self.register_parameter('fused_weight', nn.Parameter(fused_weight))
+            self.register_parameter('fused_bias', nn.Parameter(fused_bias))
+            del self.rbr_identity, self.rbr_1x1, self.rbr_dense
+
+
+class RepVGG(nn.Module):
+
+    def __init__(self, last_stride, norm_type, num_blocks, width_multiplier=None, override_groups_map=None):
+        super(RepVGG, self).__init__()
+
+        assert len(width_multiplier) == 4
+
+        self.deploying = False
+        self.override_groups_map = override_groups_map or dict()
+
+        assert 0 not in self.override_groups_map
+
+        self.in_planes = min(64, int(64 * width_multiplier[0]))
+
+        self.stage0 = RepVGGBlock(in_channels=3, out_channels=self.in_planes, norm_type=norm_type,
+                                  kernel_size=3, stride=2, padding=1)
+        self.cur_layer_idx = 1
+        self.stage1 = self._make_stage(int(64 * width_multiplier[0]), norm_type, num_blocks[0], stride=2)
+        self.stage2 = self._make_stage(int(128 * width_multiplier[1]), norm_type, num_blocks[1], stride=2)
+        self.stage3 = self._make_stage(int(256 * width_multiplier[2]), norm_type, num_blocks[2], stride=2)
+        self.stage4 = self._make_stage(int(512 * width_multiplier[3]), norm_type, num_blocks[3], stride=last_stride)
+
+    def _make_stage(self, planes, norm_type, num_blocks, stride):
+        strides = [stride] + [1] * (num_blocks - 1)
+        blocks = []
+        for stride in strides:
+            cur_groups = self.override_groups_map.get(self.cur_layer_idx, 1)
+            blocks.append(RepVGGBlock(in_channels=self.in_planes, out_channels=planes, norm_type=norm_type,
+                                      kernel_size=3, stride=stride, padding=1, groups=cur_groups))
+            self.in_planes = planes
+            self.cur_layer_idx += 1
+        return nn.Sequential(*blocks)
+
+    def deploy(self, mode=False):
+        self.deploying = mode
+        for module in self.children():
+            if hasattr(module, 'deploying'):
+                module.deploy(mode)
+
+    def forward(self, x):
+        out = self.stage0(x)
+        out = self.stage1(out)
+        out = self.stage2(out)
+        out = self.stage3(out)
+        out = self.stage4(out)
+        return out
+
+
+optional_groupwise_layers = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26]
+g2_map = {l: 2 for l in optional_groupwise_layers}
+g4_map = {l: 4 for l in optional_groupwise_layers}
+
+
+def create_RepVGG_A0(last_stride, norm_type):
+    return RepVGG(last_stride, norm_type, num_blocks=[2, 4, 14, 1],
+                  width_multiplier=[0.75, 0.75, 0.75, 2.5], override_groups_map=None)
+
+
+def create_RepVGG_A1(last_stride, norm_type):
+    return RepVGG(last_stride, norm_type, num_blocks=[2, 4, 14, 1],
+                  width_multiplier=[1, 1, 1, 2.5], override_groups_map=None)
+
+
+def create_RepVGG_A2(last_stride, norm_type):
+    return RepVGG(last_stride, norm_type, num_blocks=[2, 4, 14, 1],
+                  width_multiplier=[1.5, 1.5, 1.5, 2.75], override_groups_map=None)
+
+
+def create_RepVGG_B0(last_stride, norm_type):
+    return RepVGG(last_stride, norm_type, num_blocks=[4, 6, 16, 1],
+                  width_multiplier=[1, 1, 1, 2.5], override_groups_map=None)
+
+
+def create_RepVGG_B1(last_stride, norm_type):
+    return RepVGG(last_stride, norm_type, num_blocks=[4, 6, 16, 1],
+                  width_multiplier=[2, 2, 2, 4], override_groups_map=None)
+
+
+def create_RepVGG_B1g2(last_stride, norm_type):
+    return RepVGG(last_stride, norm_type, num_blocks=[4, 6, 16, 1],
+                  width_multiplier=[2, 2, 2, 4], override_groups_map=g2_map)
+
+
+def create_RepVGG_B1g4(last_stride, norm_type):
+    return RepVGG(last_stride, norm_type, num_blocks=[4, 6, 16, 1],
+                  width_multiplier=[2, 2, 2, 4], override_groups_map=g4_map)
+
+
+def create_RepVGG_B2(last_stride, norm_type):
+    return RepVGG(last_stride, norm_type, num_blocks=[4, 6, 16, 1],
+                  width_multiplier=[2.5, 2.5, 2.5, 5], override_groups_map=None)
+
+
+def create_RepVGG_B2g2(last_stride, norm_type):
+    return RepVGG(last_stride, norm_type, num_blocks=[4, 6, 16, 1],
+                  width_multiplier=[2.5, 2.5, 2.5, 5], override_groups_map=g2_map)
+
+
+def create_RepVGG_B2g4(last_stride, norm_type):
+    return RepVGG(last_stride, norm_type, num_blocks=[4, 6, 16, 1],
+                  width_multiplier=[2.5, 2.5, 2.5, 5], override_groups_map=g4_map)
+
+
+def create_RepVGG_B3(last_stride, norm_type):
+    return RepVGG(last_stride, norm_type, num_blocks=[4, 6, 16, 1],
+                  width_multiplier=[3, 3, 3, 5], override_groups_map=None)
+
+
+def create_RepVGG_B3g2(last_stride, norm_type):
+    return RepVGG(last_stride, norm_type, num_blocks=[4, 6, 16, 1],
+                  width_multiplier=[3, 3, 3, 5], override_groups_map=g2_map)
+
+
+def create_RepVGG_B3g4(last_stride, norm_type):
+    return RepVGG(last_stride, norm_type, num_blocks=[4, 6, 16, 1],
+                  width_multiplier=[3, 3, 3, 5], override_groups_map=g4_map)
+
+
+@BACKBONE_REGISTRY.register()
+def build_repvgg_backbone(cfg):
+    """
+    Create a RepVGG instance from config.
+    Returns:
+        RepVGG: a :class: `RepVGG` instance.
+    """
+
+    # fmt: off
+    pretrain      = cfg.MODEL.BACKBONE.PRETRAIN
+    pretrain_path = cfg.MODEL.BACKBONE.PRETRAIN_PATH
+    last_stride   = cfg.MODEL.BACKBONE.LAST_STRIDE
+    bn_norm       = cfg.MODEL.BACKBONE.NORM
+    depth         = cfg.MODEL.BACKBONE.DEPTH
+    # fmt: on
+
+    func_dict = {
+        'A0': create_RepVGG_A0,
+        'A1': create_RepVGG_A1,
+        'A2': create_RepVGG_A2,
+        'B0': create_RepVGG_B0,
+        'B1': create_RepVGG_B1,
+        'B1g2': create_RepVGG_B1g2,
+        'B1g4': create_RepVGG_B1g4,
+        'B2': create_RepVGG_B2,
+        'B2g2': create_RepVGG_B2g2,
+        'B2g4': create_RepVGG_B2g4,
+        'B3': create_RepVGG_B3,
+        'B3g2': create_RepVGG_B3g2,
+        'B3g4': create_RepVGG_B3g4,
+    }
+
+    model = func_dict[depth](last_stride, bn_norm)
+
+    if pretrain:
+        try:
+            state_dict = torch.load(pretrain_path, map_location=torch.device("cpu"))
+            logger.info(f"Loading pretrained model from {pretrain_path}")
+        except FileNotFoundError as e:
+            logger.info(f'{pretrain_path} is not found! Please check this path.')
+            raise e
+        except KeyError as e:
+            logger.info("State dict keys error! Please check the state dict.")
+            raise e
+
+        incompatible = model.load_state_dict(state_dict, strict=False)
+        if incompatible.missing_keys:
+            logger.info(
+                get_missing_parameters_message(incompatible.missing_keys)
+            )
+        if incompatible.unexpected_keys:
+            logger.info(
+                get_unexpected_parameters_message(incompatible.unexpected_keys)
+            )
+
+    return model
--- a/fastreid/modeling/backbones/resnest.py
+++ b/fastreid/modeling/backbones/resnest.py
+# encoding: utf-8
+# based on:
+# https://github.com/zhanghang1989/ResNeSt/blob/master/resnest/torch/models/resnest.py
+"""ResNeSt models"""
+
+import logging
+import math
+
+import torch
+from torch import nn
+
+from fastreid.layers import SplAtConv2d, get_norm, DropBlock2D
+from fastreid.utils.checkpoint import get_unexpected_parameters_message, get_missing_parameters_message
+from .build import BACKBONE_REGISTRY
+
+logger = logging.getLogger(__name__)
+_url_format = 'https://github.com/zhanghang1989/ResNeSt/releases/download/weights_step1/{}-{}.pth'
+
+_model_sha256 = {name: checksum for checksum, name in [
+    ('528c19ca', 'resnest50'),
+    ('22405ba7', 'resnest101'),
+    ('75117900', 'resnest200'),
+    ('0cc87c48', 'resnest269'),
+]}
+
+
+def short_hash(name):
+    if name not in _model_sha256:
+        raise ValueError('Pretrained model for {name} is not available.'.format(name=name))
+    return _model_sha256[name][:8]
+
+
+model_urls = {name: _url_format.format(name, short_hash(name)) for
+              name in _model_sha256.keys()
+              }
+
+
+class Bottleneck(nn.Module):
+    """ResNet Bottleneck
+    """
+    # pylint: disable=unused-argument
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None,
+                 radix=1, cardinality=1, bottleneck_width=64,
+                 avd=False, avd_first=False, dilation=1, is_first=False,
+                 rectified_conv=False, rectify_avg=False,
+                 norm_layer=None, dropblock_prob=0.0, last_gamma=False):
+        super(Bottleneck, self).__init__()
+        group_width = int(planes * (bottleneck_width / 64.)) * cardinality
+        self.conv1 = nn.Conv2d(inplanes, group_width, kernel_size=1, bias=False)
+        self.bn1 = get_norm(norm_layer, group_width)
+        self.dropblock_prob = dropblock_prob
+        self.radix = radix
+        self.avd = avd and (stride > 1 or is_first)
+        self.avd_first = avd_first
+
+        if self.avd:
+            self.avd_layer = nn.AvgPool2d(3, stride, padding=1)
+            stride = 1
+
+        if dropblock_prob > 0.0:
+            self.dropblock1 = DropBlock2D(dropblock_prob, 3)
+            if radix == 1:
+                self.dropblock2 = DropBlock2D(dropblock_prob, 3)
+            self.dropblock3 = DropBlock2D(dropblock_prob, 3)
+
+        if radix >= 1:
+            self.conv2 = SplAtConv2d(
+                group_width, group_width, kernel_size=3,
+                stride=stride, padding=dilation,
+                dilation=dilation, groups=cardinality, bias=False,
+                radix=radix, rectify=rectified_conv,
+                rectify_avg=rectify_avg,
+                norm_layer=norm_layer,
+                dropblock_prob=dropblock_prob)
+        elif rectified_conv:
+            from rfconv import RFConv2d
+            self.conv2 = RFConv2d(
+                group_width, group_width, kernel_size=3, stride=stride,
+                padding=dilation, dilation=dilation,
+                groups=cardinality, bias=False,
+                average_mode=rectify_avg)
+            self.bn2 = get_norm(norm_layer, group_width)
+        else:
+            self.conv2 = nn.Conv2d(
+                group_width, group_width, kernel_size=3, stride=stride,
+                padding=dilation, dilation=dilation,
+                groups=cardinality, bias=False)
+            self.bn2 = get_norm(norm_layer, group_width)
+
+        self.conv3 = nn.Conv2d(
+            group_width, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = get_norm(norm_layer, planes * 4)
+
+        if last_gamma:
+            from torch.nn.init import zeros_
+            zeros_(self.bn3.weight)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.dilation = dilation
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        if self.dropblock_prob > 0.0:
+            out = self.dropblock1(out)
+        out = self.relu(out)
+
+        if self.avd and self.avd_first:
+            out = self.avd_layer(out)
+
+        out = self.conv2(out)
+        if self.radix == 0:
+            out = self.bn2(out)
+            if self.dropblock_prob > 0.0:
+                out = self.dropblock2(out)
+            out = self.relu(out)
+
+        if self.avd and not self.avd_first:
+            out = self.avd_layer(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.dropblock_prob > 0.0:
+            out = self.dropblock3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class ResNeSt(nn.Module):
+    """ResNet Variants
+    Parameters
+    ----------
+    block : Block
+        Class for the residual block. Options are BasicBlockV1, BottleneckV1.
+    layers : list of int
+        Numbers of layers in each block
+    classes : int, default 1000
+        Number of classification classes.
+    dilated : bool, default False
+        Applying dilation strategy to pretrained ResNet yielding a stride-8 model,
+        typically used in Semantic Segmentation.
+    norm_layer : object
+        Normalization layer used in backbone network (default: :class:`mxnet.gluon.nn.BatchNorm`;
+        for Synchronized Cross-GPU BachNormalization).
+    Reference:
+        - He, Kaiming, et al. "Deep residual learning for image recognition." Proceedings of the IEEE conference on computer vision and pattern recognition. 2016.
+        - Yu, Fisher, and Vladlen Koltun. "Multi-scale context aggregation by dilated convolutions."
+    """
+
+    # pylint: disable=unused-variable
+    def __init__(self, last_stride, block, layers, radix=1, groups=1, bottleneck_width=64,
+                 dilated=False, dilation=1,
+                 deep_stem=False, stem_width=64, avg_down=False,
+                 rectified_conv=False, rectify_avg=False,
+                 avd=False, avd_first=False,
+                 final_drop=0.0, dropblock_prob=0,
+                 last_gamma=False, norm_layer="BN"):
+        if last_stride == 1: dilation = 2
+
+        self.cardinality = groups
+        self.bottleneck_width = bottleneck_width
+        # ResNet-D params
+        self.inplanes = stem_width * 2 if deep_stem else 64
+        self.avg_down = avg_down
+        self.last_gamma = last_gamma
+        # ResNeSt params
+        self.radix = radix
+        self.avd = avd
+        self.avd_first = avd_first
+
+        super().__init__()
+        self.rectified_conv = rectified_conv
+        self.rectify_avg = rectify_avg
+        if rectified_conv:
+            from rfconv import RFConv2d
+            conv_layer = RFConv2d
+        else:
+            conv_layer = nn.Conv2d
+        conv_kwargs = {'average_mode': rectify_avg} if rectified_conv else {}
+        if deep_stem:
+            self.conv1 = nn.Sequential(
+                conv_layer(3, stem_width, kernel_size=3, stride=2, padding=1, bias=False, **conv_kwargs),
+                get_norm(norm_layer, stem_width),
+                nn.ReLU(inplace=True),
+                conv_layer(stem_width, stem_width, kernel_size=3, stride=1, padding=1, bias=False, **conv_kwargs),
+                get_norm(norm_layer, stem_width),
+                nn.ReLU(inplace=True),
+                conv_layer(stem_width, stem_width * 2, kernel_size=3, stride=1, padding=1, bias=False, **conv_kwargs),
+            )
+        else:
+            self.conv1 = conv_layer(3, 64, kernel_size=7, stride=2, padding=3,
+                                    bias=False, **conv_kwargs)
+        self.bn1 = get_norm(norm_layer, self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer, is_first=False)
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer)
+        if dilated or dilation == 4:
+            self.layer3 = self._make_layer(block, 256, layers[2], stride=1,
+                                           dilation=2, norm_layer=norm_layer,
+                                           dropblock_prob=dropblock_prob)
+            self.layer4 = self._make_layer(block, 512, layers[3], stride=1,
+                                           dilation=4, norm_layer=norm_layer,
+                                           dropblock_prob=dropblock_prob)
+        elif dilation == 2:
+            self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+                                           dilation=1, norm_layer=norm_layer,
+                                           dropblock_prob=dropblock_prob)
+            self.layer4 = self._make_layer(block, 512, layers[3], stride=1,
+                                           dilation=2, norm_layer=norm_layer,
+                                           dropblock_prob=dropblock_prob)
+        else:
+            self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+                                           norm_layer=norm_layer,
+                                           dropblock_prob=dropblock_prob)
+            self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
+                                           norm_layer=norm_layer,
+                                           dropblock_prob=dropblock_prob)
+        self.drop = nn.Dropout(final_drop) if final_drop > 0.0 else None
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+
+    def _make_layer(self, block, planes, blocks, stride=1, dilation=1, norm_layer=None,
+                    dropblock_prob=0.0, is_first=True):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            down_layers = []
+            if self.avg_down:
+                if dilation == 1:
+                    down_layers.append(nn.AvgPool2d(kernel_size=stride, stride=stride,
+                                                    ceil_mode=True, count_include_pad=False))
+                else:
+                    down_layers.append(nn.AvgPool2d(kernel_size=1, stride=1,
+                                                    ceil_mode=True, count_include_pad=False))
+                down_layers.append(nn.Conv2d(self.inplanes, planes * block.expansion,
+                                             kernel_size=1, stride=1, bias=False))
+            else:
+                down_layers.append(nn.Conv2d(self.inplanes, planes * block.expansion,
+                                             kernel_size=1, stride=stride, bias=False))
+            down_layers.append(get_norm(norm_layer, planes * block.expansion))
+            downsample = nn.Sequential(*down_layers)
+
+        layers = []
+        if dilation == 1 or dilation == 2:
+            layers.append(block(self.inplanes, planes, stride, downsample=downsample,
+                                radix=self.radix, cardinality=self.cardinality,
+                                bottleneck_width=self.bottleneck_width,
+                                avd=self.avd, avd_first=self.avd_first,
+                                dilation=1, is_first=is_first, rectified_conv=self.rectified_conv,
+                                rectify_avg=self.rectify_avg,
+                                norm_layer=norm_layer, dropblock_prob=dropblock_prob,
+                                last_gamma=self.last_gamma))
+        elif dilation == 4:
+            layers.append(block(self.inplanes, planes, stride, downsample=downsample,
+                                radix=self.radix, cardinality=self.cardinality,
+                                bottleneck_width=self.bottleneck_width,
+                                avd=self.avd, avd_first=self.avd_first,
+                                dilation=2, is_first=is_first, rectified_conv=self.rectified_conv,
+                                rectify_avg=self.rectify_avg,
+                                norm_layer=norm_layer, dropblock_prob=dropblock_prob,
+                                last_gamma=self.last_gamma))
+        else:
+            raise RuntimeError("=> unknown dilation size: {}".format(dilation))
+
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes,
+                                radix=self.radix, cardinality=self.cardinality,
+                                bottleneck_width=self.bottleneck_width,
+                                avd=self.avd, avd_first=self.avd_first,
+                                dilation=dilation, rectified_conv=self.rectified_conv,
+                                rectify_avg=self.rectify_avg,
+                                norm_layer=norm_layer, dropblock_prob=dropblock_prob,
+                                last_gamma=self.last_gamma))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        return x
+
+
+@BACKBONE_REGISTRY.register()
+def build_resnest_backbone(cfg):
+    """
+    Create a ResNest instance from config.
+    Returns:
+        ResNet: a :class:`ResNet` instance.
+    """
+
+    # fmt: off
+    pretrain      = cfg.MODEL.BACKBONE.PRETRAIN
+    pretrain_path = cfg.MODEL.BACKBONE.PRETRAIN_PATH
+    last_stride   = cfg.MODEL.BACKBONE.LAST_STRIDE
+    bn_norm       = cfg.MODEL.BACKBONE.NORM
+    depth         = cfg.MODEL.BACKBONE.DEPTH
+    # fmt: on
+
+    num_blocks_per_stage = {
+        "50x": [3, 4, 6, 3],
+        "101x": [3, 4, 23, 3],
+        "200x": [3, 24, 36, 3],
+        "269x": [3, 30, 48, 8],
+    }[depth]
+
+    stem_width = {
+        "50x": 32,
+        "101x": 64,
+        "200x": 64,
+        "269x": 64,
+    }[depth]
+
+    model = ResNeSt(last_stride, Bottleneck, num_blocks_per_stage,
+                    radix=2, groups=1, bottleneck_width=64,
+                    deep_stem=True, stem_width=stem_width, avg_down=True,
+                    avd=True, avd_first=False, norm_layer=bn_norm)
+    if pretrain:
+        # Load pretrain path if specifically
+        if pretrain_path:
+            try:
+                state_dict = torch.load(pretrain_path, map_location=torch.device('cpu'))
+                logger.info(f"Loading pretrained model from {pretrain_path}")
+            except FileNotFoundError as e:
+                logger.info(f'{pretrain_path} is not found! Please check this path.')
+                raise e
+            except KeyError as e:
+                logger.info("State dict keys error! Please check the state dict.")
+                raise e
+        else:
+            state_dict = torch.hub.load_state_dict_from_url(
+                model_urls['resnest' + depth[:-1]], progress=True, check_hash=True, map_location=torch.device('cpu'))
+
+        incompatible = model.load_state_dict(state_dict, strict=False)
+        if incompatible.missing_keys:
+            logger.info(
+                get_missing_parameters_message(incompatible.missing_keys)
+            )
+        if incompatible.unexpected_keys:
+            logger.info(
+                get_unexpected_parameters_message(incompatible.unexpected_keys)
+            )
+    return model
--- a/fastreid/modeling/backbones/resnet.py
+++ b/fastreid/modeling/backbones/resnet.py
+# encoding: utf-8
+"""
+@author:  liaoxingyu
+@contact: sherlockliao01@gmail.com
+"""
+
+import logging
+import math
+
+import torch
+from torch import nn
+
+from fastreid.layers import (
+    IBN,
+    SELayer,
+    Non_local,
+    get_norm,
+)
+from fastreid.utils.checkpoint import get_missing_parameters_message, get_unexpected_parameters_message
+from .build import BACKBONE_REGISTRY
+from fastreid.utils import comm
+
+
+logger = logging.getLogger(__name__)
+model_urls = {
+    '18x': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    '34x': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    '50x': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    '101x': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'ibn_18x': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet18_ibn_a-2f571257.pth',
+    'ibn_34x': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet34_ibn_a-94bc1577.pth',
+    'ibn_50x': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet50_ibn_a-d9d0bb7b.pth',
+    'ibn_101x': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet101_ibn_a-59ea0ac6.pth',
+    'se_ibn_101x': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/se_resnet101_ibn_a-fabed4e2.pth',
+}
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, bn_norm, with_ibn=False, with_se=False,
+                 stride=1, downsample=None, reduction=16):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        if with_ibn:
+            self.bn1 = IBN(planes, bn_norm)
+        else:
+            self.bn1 = get_norm(bn_norm, planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn2 = get_norm(bn_norm, planes)
+        self.relu = nn.ReLU(inplace=True)
+        if with_se:
+            self.se = SELayer(planes, reduction)
+        else:
+            self.se = nn.Identity()
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.se(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out += identity
+        out = self.relu(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, bn_norm, with_ibn=False, with_se=False,
+                 stride=1, downsample=None, reduction=16):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        if with_ibn:
+            self.bn1 = IBN(planes, bn_norm)
+        else:
+            self.bn1 = get_norm(bn_norm, planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = get_norm(bn_norm, planes)
+        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
+        self.bn3 = get_norm(bn_norm, planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        if with_se:
+            self.se = SELayer(planes * self.expansion, reduction)
+        else:
+            self.se = nn.Identity()
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+        out = self.se(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class ResNet(nn.Module):
+    def __init__(self, last_stride, bn_norm, with_ibn, with_se, with_nl, block, layers, non_layers):
+        self.inplanes = 64
+        super().__init__()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = get_norm(bn_norm, 64)
+        self.relu = nn.ReLU(inplace=True)
+        # self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
+        self.layer1 = self._make_layer(block, 64, layers[0], 1, bn_norm, with_ibn, with_se)
+        self.layer2 = self._make_layer(block, 128, layers[1], 2, bn_norm, with_ibn, with_se)
+        self.layer3 = self._make_layer(block, 256, layers[2], 2, bn_norm, with_ibn, with_se)
+        self.layer4 = self._make_layer(block, 512, layers[3], last_stride, bn_norm, with_se=with_se)
+
+        self.random_init()
+
+        # fmt: off
+        if with_nl: self._build_nonlocal(layers, non_layers, bn_norm)
+        else:       self.NL_1_idx = self.NL_2_idx = self.NL_3_idx = self.NL_4_idx = []
+        # fmt: on
+
+    def _make_layer(self, block, planes, blocks, stride=1, bn_norm="BN", with_ibn=False, with_se=False):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                get_norm(bn_norm, planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, bn_norm, with_ibn, with_se, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes, bn_norm, with_ibn, with_se))
+
+        return nn.Sequential(*layers)
+
+    def _build_nonlocal(self, layers, non_layers, bn_norm):
+        self.NL_1 = nn.ModuleList(
+            [Non_local(256, bn_norm) for _ in range(non_layers[0])])
+        self.NL_1_idx = sorted([layers[0] - (i + 1) for i in range(non_layers[0])])
+        self.NL_2 = nn.ModuleList(
+            [Non_local(512, bn_norm) for _ in range(non_layers[1])])
+        self.NL_2_idx = sorted([layers[1] - (i + 1) for i in range(non_layers[1])])
+        self.NL_3 = nn.ModuleList(
+            [Non_local(1024, bn_norm) for _ in range(non_layers[2])])
+        self.NL_3_idx = sorted([layers[2] - (i + 1) for i in range(non_layers[2])])
+        self.NL_4 = nn.ModuleList(
+            [Non_local(2048, bn_norm) for _ in range(non_layers[3])])
+        self.NL_4_idx = sorted([layers[3] - (i + 1) for i in range(non_layers[3])])
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        # layer 1
+        NL1_counter = 0
+        if len(self.NL_1_idx) == 0:
+            self.NL_1_idx = [-1]
+        for i in range(len(self.layer1)):
+            x = self.layer1[i](x)
+            if i == self.NL_1_idx[NL1_counter]:
+                _, C, H, W = x.shape
+                x = self.NL_1[NL1_counter](x)
+                NL1_counter += 1
+        # layer 2
+        NL2_counter = 0
+        if len(self.NL_2_idx) == 0:
+            self.NL_2_idx = [-1]
+        for i in range(len(self.layer2)):
+            x = self.layer2[i](x)
+            if i == self.NL_2_idx[NL2_counter]:
+                _, C, H, W = x.shape
+                x = self.NL_2[NL2_counter](x)
+                NL2_counter += 1
+
+        # layer 3
+        NL3_counter = 0
+        if len(self.NL_3_idx) == 0:
+            self.NL_3_idx = [-1]
+        for i in range(len(self.layer3)):
+            x = self.layer3[i](x)
+            if i == self.NL_3_idx[NL3_counter]:
+                _, C, H, W = x.shape
+                x = self.NL_3[NL3_counter](x)
+                NL3_counter += 1
+
+        # layer 4
+        NL4_counter = 0
+        if len(self.NL_4_idx) == 0:
+            self.NL_4_idx = [-1]
+        for i in range(len(self.layer4)):
+            x = self.layer4[i](x)
+            if i == self.NL_4_idx[NL4_counter]:
+                _, C, H, W = x.shape
+                x = self.NL_4[NL4_counter](x)
+                NL4_counter += 1
+
+        return x
+
+    def random_init(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                nn.init.normal_(m.weight, 0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+
+def init_pretrained_weights(key):
+    """Initializes model with pretrained weights.
+
+    Layers that don't match with pretrained layers in name or size are kept unchanged.
+    """
+    import os
+    import errno
+    import gdown
+
+    def _get_torch_home():
+        ENV_TORCH_HOME = 'TORCH_HOME'
+        ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
+        DEFAULT_CACHE_DIR = '~/.cache'
+        torch_home = os.path.expanduser(
+            os.getenv(
+                ENV_TORCH_HOME,
+                os.path.join(
+                    os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch'
+                )
+            )
+        )
+        return torch_home
+
+    torch_home = _get_torch_home()
+    model_dir = os.path.join(torch_home, 'checkpoints')
+    try:
+        os.makedirs(model_dir)
+    except OSError as e:
+        if e.errno == errno.EEXIST:
+            # Directory already exists, ignore.
+            pass
+        else:
+            # Unexpected OSError, re-raise.
+            raise
+
+    filename = model_urls[key].split('/')[-1]
+
+    cached_file = os.path.join(model_dir, filename)
+
+    if not os.path.exists(cached_file):
+        logger.info(f"Pretrain model don't exist, downloading from {model_urls[key]}")
+        if comm.is_main_process():
+            gdown.download(model_urls[key], cached_file, quiet=False)
+
+    comm.synchronize()
+
+    logger.info(f"Loading pretrained model from {cached_file}")
+    state_dict = torch.load(cached_file, map_location=torch.device('cpu'))
+
+    return state_dict
+
+
+@BACKBONE_REGISTRY.register()
+def build_resnet_backbone(cfg):
+    """
+    Create a ResNet instance from config.
+    Returns:
+        ResNet: a :class:`ResNet` instance.
+    """
+
+    # fmt: off
+    pretrain      = cfg.MODEL.BACKBONE.PRETRAIN
+    pretrain_path = cfg.MODEL.BACKBONE.PRETRAIN_PATH
+    last_stride   = cfg.MODEL.BACKBONE.LAST_STRIDE
+    bn_norm       = cfg.MODEL.BACKBONE.NORM
+    with_ibn      = cfg.MODEL.BACKBONE.WITH_IBN
+    with_se       = cfg.MODEL.BACKBONE.WITH_SE
+    with_nl       = cfg.MODEL.BACKBONE.WITH_NL
+    depth         = cfg.MODEL.BACKBONE.DEPTH
+    # fmt: on
+
+    num_blocks_per_stage = {
+        '18x': [2, 2, 2, 2],
+        '34x': [3, 4, 6, 3],
+        '50x': [3, 4, 6, 3],
+        '101x': [3, 4, 23, 3],
+    }[depth]
+
+    nl_layers_per_stage = {
+        '18x': [0, 0, 0, 0],
+        '34x': [0, 0, 0, 0],
+        '50x': [0, 2, 3, 0],
+        '101x': [0, 2, 9, 0]
+    }[depth]
+
+    block = {
+        '18x': BasicBlock,
+        '34x': BasicBlock,
+        '50x': Bottleneck,
+        '101x': Bottleneck
+    }[depth]
+
+    model = ResNet(last_stride, bn_norm, with_ibn, with_se, with_nl, block,
+                   num_blocks_per_stage, nl_layers_per_stage)
+    if pretrain:
+        # Load pretrain path if specifically
+        if pretrain_path:
+            try:
+                state_dict = torch.load(pretrain_path, map_location=torch.device('cpu'))
+                logger.info(f"Loading pretrained model from {pretrain_path}")
+            except FileNotFoundError as e:
+                logger.info(f'{pretrain_path} is not found! Please check this path.')
+                raise e
+            except KeyError as e:
+                logger.info("State dict keys error! Please check the state dict.")
+                raise e
+        else:
+            key = depth
+            if with_ibn: key = 'ibn_' + key
+            if with_se:  key = 'se_' + key
+
+            state_dict = init_pretrained_weights(key)
+
+        incompatible = model.load_state_dict(state_dict, strict=False)
+        if incompatible.missing_keys:
+            logger.info(
+                get_missing_parameters_message(incompatible.missing_keys)
+            )
+        if incompatible.unexpected_keys:
+            logger.info(
+                get_unexpected_parameters_message(incompatible.unexpected_keys)
+            )
+
+    return model