添加inception_v3测试代码

1e2486af · sunxx1 · 1e2486af · 1e2486af · 1e2486af · 1e2486af
Commit 1e2486af authored Apr 12, 2023 by sunxx1
20 changed files
--- a/models/mnasnet.py
+++ b/models/mnasnet.py
+import torch
+import torch.nn as nn
+
+__all__ = ['mnasnet']
+
+
+class _InvertedResidual(nn.Module):
+
+    def __init__(self, in_ch, out_ch, kernel_size, stride, expansion_factor):
+        super(_InvertedResidual, self).__init__()
+        assert stride in [1, 2]
+        assert kernel_size in [3, 5]
+        mid_ch = in_ch * expansion_factor
+        self.apply_residual = (in_ch == out_ch and stride == 1)
+        self.layers = nn.Sequential(
+            # Pointwise
+            nn.Conv2d(in_ch, mid_ch, 1, bias=False),
+            nn.BatchNorm2d(mid_ch),
+            nn.ReLU(inplace=True),
+            # Depthwise
+            nn.Conv2d(mid_ch,
+                      mid_ch,
+                      kernel_size,
+                      padding=kernel_size // 2,
+                      stride=stride,
+                      groups=mid_ch,
+                      bias=False),
+            nn.BatchNorm2d(mid_ch),
+            nn.ReLU(inplace=True),
+            # Linear pointwise. Note that there's no activation.
+            nn.Conv2d(mid_ch, out_ch, 1, bias=False),
+            nn.BatchNorm2d(out_ch))
+
+    def forward(self, input):
+        if self.apply_residual:
+            return self.layers(input) + input
+        else:
+            return self.layers(input)
+
+
+def _stack(in_ch, out_ch, kernel_size, stride, exp_factor, repeats):
+    """ Creates a stack of inverted residuals. """
+    assert repeats >= 1
+    # First one has no skip, because feature map size changes.
+    first = _InvertedResidual(in_ch, out_ch, kernel_size, stride, exp_factor)
+    remaining = []
+    for _ in range(1, repeats):
+        remaining.append(
+            _InvertedResidual(out_ch, out_ch, kernel_size, 1, exp_factor))
+    return nn.Sequential(first, *remaining)
+
+
+def _round_to_multiple_of(val, divisor, round_up_bias=0.9):
+    """ Asymmetric rounding to make `val` divisible by `divisor`. With default
+    bias, will round up, unless the number is no more than 10% greater than the
+    smaller divisible value, i.e. (83, 8) -> 80, but (84, 8) -> 88. """
+    assert 0.0 < round_up_bias < 1.0
+    new_val = max(divisor, int(val + divisor / 2) // divisor * divisor)
+    return new_val if new_val >= round_up_bias * val else new_val + divisor
+
+
+def _get_depths(scale):
+    """ Scales tensor depths as in reference MobileNet code, prefers rouding up
+    rather than down. """
+    depths = [32, 16, 24, 40, 80, 96, 192, 320]
+    return [_round_to_multiple_of(depth * scale, 8) for depth in depths]
+
+
+class MNASNet(torch.nn.Module):
+    # Version 2 adds depth scaling in the initial stages of the network.
+    _version = 2
+
+    def __init__(self, scale, num_classes=1000, dropout=0.2):
+        super(MNASNet, self).__init__()
+
+        assert scale > 0.0
+        self.scale = scale
+        self.num_classes = num_classes
+        depths = _get_depths(scale)
+        layers = [
+            # First layer: regular conv.
+            nn.Conv2d(3, depths[0], 3, padding=1, stride=2, bias=False),
+            nn.BatchNorm2d(depths[0]),
+            nn.ReLU(inplace=True),
+            # Depthwise separable, no skip.
+            nn.Conv2d(depths[0],
+                      depths[0],
+                      3,
+                      padding=1,
+                      stride=1,
+                      groups=depths[0],
+                      bias=False),
+            nn.BatchNorm2d(depths[0]),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(depths[0], depths[1], 1, padding=0, stride=1,
+                      bias=False),
+            nn.BatchNorm2d(depths[1]),
+            # MNASNet blocks: stacks of inverted residuals.
+            _stack(depths[1], depths[2], 3, 2, 3, 3),
+            _stack(depths[2], depths[3], 5, 2, 3, 3),
+            _stack(depths[3], depths[4], 5, 2, 6, 3),
+            _stack(depths[4], depths[5], 3, 1, 6, 2),
+            _stack(depths[5], depths[6], 5, 2, 6, 4),
+            _stack(depths[6], depths[7], 3, 1, 6, 1),
+            # Final mapping to classifier input.
+            nn.Conv2d(depths[7], 1280, 1, padding=0, stride=1, bias=False),
+            nn.BatchNorm2d(1280),
+            nn.ReLU(inplace=True),
+        ]
+        self.layers = nn.Sequential(*layers)
+        self.classifier = nn.Sequential(nn.Dropout(p=dropout, inplace=True),
+                                        nn.Linear(1280, num_classes))
+        self._initialize_weights()
+
+    def forward(self, x):
+        x = self.layers(x)
+        # Equivalent to global avgpool and removing H and W dimensions.
+        x = x.mean([2, 3])
+        return self.classifier(x)
+
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight,
+                                        mode="fan_out",
+                                        nonlinearity="relu")
+                if m.bias is not None:
+                    nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.ones_(m.weight)
+                nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.Linear):
+                nn.init.kaiming_uniform_(m.weight,
+                                         mode="fan_out",
+                                         nonlinearity="sigmoid")
+                nn.init.zeros_(m.bias)
+
+
+def mnasnet(**kwargs):
+    model = MNASNet(**kwargs)
+    return model
--- a/models/mobile_v1.py
+++ b/models/mobile_v1.py
+import torch.nn as nn
+from torch.nn import init
+
+__all__ = ["mobile_v1"]
+
+
+class MobileNetV1(nn.Module):
+
+    def __init__(self, scale=1.0, num_classes=1000, bn_group=None):
+        super(MobileNetV1, self).__init__()
+
+        BN = nn.BatchNorm2d
+        self.scale = scale
+
+        def conv_bn(inp, oup, stride):
+            return nn.Sequential(nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+                                 BN(oup), nn.ReLU(inplace=True))
+
+        def conv_dw(inp, oup, stride):
+            inp = int(inp * scale)
+            oup = int(oup * scale)
+            return nn.Sequential(
+                nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
+                BN(inp),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+                BN(oup),
+                nn.ReLU(inplace=True),
+            )
+
+        self.model = nn.Sequential(
+            conv_bn(3, int(32 * scale), 2),
+            conv_dw(32, 64, 1),
+            conv_dw(64, 128, 2),
+            conv_dw(128, 128, 1),
+            conv_dw(128, 256, 2),
+            conv_dw(256, 256, 1),
+            conv_dw(256, 512, 2),
+            conv_dw(512, 512, 1),
+            conv_dw(512, 512, 1),
+            conv_dw(512, 512, 1),
+            conv_dw(512, 512, 1),
+            conv_dw(512, 512, 1),
+            conv_dw(512, 1024, 2),
+            conv_dw(1024, 1024, 1),
+            nn.AvgPool2d(7),
+        )
+        self.fc = nn.Linear(int(1024 * scale), num_classes)
+        self.init_params()
+
+    def init_params(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                init.kaiming_normal_(m.weight, mode='fan_out')
+                if m.bias is not None:
+                    init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                init.constant_(m.weight, 1)
+                init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                init.normal_(m.weight, std=0.01)
+                if m.bias is not None:
+                    init.constant_(m.bias, 0)
+
+    def forward(self, x):
+        x = self.model(x)
+        x = x.view(-1, int(1024 * self.scale))
+        x = self.fc(x)
+        return x
+
+
+def mobile_v1(**kwargs):
+    model = MobileNetV1(**kwargs)
+    return model
--- a/models/mobile_v2.py
+++ b/models/mobile_v2.py
+import torch.nn as nn
+
+__all__ = ['mobile_v2']
+
+
+def conv_bn(inp, oup, stride):
+    return nn.Sequential(nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+                         nn.BatchNorm2d(oup), nn.ReLU6(inplace=False))
+
+
+def conv_1x1_bn(inp, oup):
+    return nn.Sequential(nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+                         nn.BatchNorm2d(oup), nn.ReLU6(inplace=False))
+
+
+class InvertedResidual(nn.Module):
+
+    def __init__(self, inp, oup, stride, expand_ratio):
+        super(InvertedResidual, self).__init__()
+        self.stride = stride
+        assert stride in [1, 2]
+
+        hidden_dim = round(inp * expand_ratio)
+        self.use_res_connect = self.stride == 1 and inp == oup
+
+        if expand_ratio == 1:
+            self.conv = nn.Sequential(
+                nn.Conv2d(hidden_dim,
+                          hidden_dim,
+                          3,
+                          stride,
+                          1,
+                          groups=hidden_dim,
+                          bias=False),
+                nn.BatchNorm2d(hidden_dim),
+                nn.ReLU6(inplace=False),
+                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup),
+            )
+        else:
+            self.conv = nn.Sequential(
+                nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(hidden_dim),
+                nn.ReLU6(inplace=False),
+                nn.Conv2d(hidden_dim,
+                          hidden_dim,
+                          3,
+                          stride,
+                          1,
+                          groups=hidden_dim,
+                          bias=False),
+                nn.BatchNorm2d(hidden_dim),
+                nn.ReLU6(inplace=False),
+                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup),
+            )
+
+    def forward(self, x):
+        if self.use_res_connect:
+            return x + self.conv(x)
+        else:
+            return self.conv(x)
+
+
+class MobileNetV2(nn.Module):
+
+    def __init__(self,
+                 scale=1,
+                 num_classes=1000,
+                 input_size=224,
+                 width_mult=1.):
+        super(MobileNetV2, self).__init__()
+        block = InvertedResidual
+        input_channel = 32
+        last_channel = 1280
+        interverted_residual_setting = [
+            [1, 16, 1, 1],
+            [6, 24, 2, 2],
+            [6, 32, 3, 2],
+            [6, 64, 4, 2],
+            [6, 96, 3, 1],
+            [6, 160, 3, 2],
+            [6, 320, 1, 1],
+        ]
+
+        assert input_size % 32 == 0
+        input_channel = int(input_channel * width_mult)
+        self.last_channel = int(
+            last_channel * width_mult) if width_mult > 1.0 else last_channel
+        self.features = [conv_bn(3, input_channel, 2)]
+        for t, c, n, s in interverted_residual_setting:
+            output_channel = int(c * width_mult)
+            for i in range(n):
+                if i == 0:
+                    self.features.append(
+                        block(input_channel, output_channel, s,
+                              expand_ratio=t))
+                else:
+                    self.features.append(
+                        block(input_channel, output_channel, 1,
+                              expand_ratio=t))
+                input_channel = output_channel
+        self.features.append(conv_1x1_bn(input_channel, self.last_channel))
+        self.features = nn.Sequential(*self.features)
+
+        self.classifier = nn.Sequential(
+            nn.Dropout(0.2),
+            # nn.Conv2d(self.last_channel, num_classes, kernel_size=1))
+            nn.Linear(self.last_channel, num_classes))
+
+        self._initialize_weights()
+
+    def forward(self, x):
+        x = self.features(x)
+        x = x.mean(3).mean(2)
+        x = self.classifier(x)
+        # x = x.view(x.size(0), -1)
+        return x
+
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out')
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, std=0.001)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+
+def mobile_v2(**kwargs):
+    model = MobileNetV2(**kwargs)
+    return model
--- a/models/mobile_v3.py
+++ b/models/mobile_v3.py
+import torch.nn as nn
+import torch.nn.functional as F
+
+__all__ = ['mobile_v3']
+
+
+def _make_divisible(v, divisor, min_value=None):
+    """
+    This function is taken from the original tf repo.
+    It ensures that all layers have a channel number that is divisible by 8
+    It can be seen here:
+    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+    :param v:
+    :param divisor:
+    :param min_value:
+    :return:
+    """
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+def conv_bn(inp, oup, stride, activation=nn.ReLU):
+    return nn.Sequential(nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+                         nn.BatchNorm2d(oup), activation(inplace=True))
+
+
+def conv_1x1_bn(inp, oup, activation=nn.ReLU):
+    return nn.Sequential(nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+                         nn.BatchNorm2d(oup), activation(inplace=True))
+
+
+class Hswish(nn.Module):
+
+    def __init__(self, inplace=True):
+        super(Hswish, self).__init__()
+        self.inplace = inplace
+
+    def forward(self, x):
+        return x * F.relu6(x + 3., inplace=self.inplace) / 6.
+
+
+class Hsigmoid(nn.Module):
+
+    def __init__(self, inplace=True):
+        super(Hsigmoid, self).__init__()
+        self.inplace = inplace
+
+    def forward(self, x):
+        return F.relu6(x + 3., inplace=self.inplace) / 6.
+
+
+class SEModule(nn.Module):
+
+    def __init__(self, channel, reduction=4):
+        super(SEModule, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+            nn.Linear(channel, channel // reduction, bias=False),
+            nn.ReLU(inplace=True),
+            nn.Linear(channel // reduction, channel, bias=False), Hsigmoid())
+
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        y = self.avg_pool(x).view(b, c)
+        y = self.fc(y).view(b, c, 1, 1)
+        return x * y.expand_as(x)
+
+
+class Identity(nn.Module):
+
+    def __init__(self, channel):
+        super(Identity, self).__init__()
+
+    def forward(self, x):
+        return x
+
+
+class InvertedResidual(nn.Module):
+
+    def __init__(self, inp, oup, kernel, stride, exp, se=False, nl='RE'):
+        super(InvertedResidual, self).__init__()
+        assert stride in [1, 2]
+        assert kernel in [3, 5]
+        padding = (kernel - 1) // 2
+        self.use_res_connect = stride == 1 and inp == oup
+
+        if nl == 'RE':
+            activation = nn.ReLU
+        elif nl == 'HS':
+            activation = Hswish
+        else:
+            raise NotImplementedError
+
+        SELayer = SEModule if se else Identity
+
+        layers = []
+        if inp != exp:
+            # pw
+            layers.extend([
+                nn.Conv2d(inp, exp, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(exp),
+                activation(inplace=True),
+            ])
+        layers.extend([
+            # dw
+            nn.Conv2d(exp,
+                      exp,
+                      kernel,
+                      stride,
+                      padding,
+                      groups=exp,
+                      bias=False),
+            nn.BatchNorm2d(exp),
+            SELayer(exp),
+            activation(inplace=True),
+            # pw-linear
+            nn.Conv2d(exp, oup, 1, 1, 0, bias=False),
+            nn.BatchNorm2d(oup),
+        ])
+        self.conv = nn.Sequential(*layers)
+
+    def forward(self, x):
+        if self.use_res_connect:
+            return x + self.conv(x)
+        else:
+            return self.conv(x)
+
+
+class MobileNetV3(nn.Module):
+
+    def __init__(self,
+                 num_classes=1000,
+                 scale=1.0,
+                 dropout=0.8,
+                 round_nearest=8,
+                 mode='small',
+                 bn=None):
+        super(MobileNetV3, self).__init__()
+
+        input_channel = 16
+        last_channel = 1280
+        if mode == 'large':
+            mobile_setting = [
+                [3, 16, 16, False, 'RE', 1],
+                [3, 64, 24, False, 'RE', 2],
+                [3, 72, 24, False, 'RE', 1],
+                [5, 72, 40, True, 'RE', 2],
+                [5, 120, 40, True, 'RE', 1],
+                [5, 120, 40, True, 'RE', 1],
+                [3, 240, 80, False, 'HS', 2],
+                [3, 200, 80, False, 'HS', 1],
+                [3, 184, 80, False, 'HS', 1],
+                [3, 184, 80, False, 'HS', 1],
+                [3, 480, 112, True, 'HS', 1],
+                [3, 672, 112, True, 'HS', 1],
+                [5, 672, 160, True, 'HS', 2],
+                [5, 960, 160, True, 'HS', 1],
+                [5, 960, 160, True, 'HS', 1],
+            ]
+        elif mode == 'small':
+            mobile_setting = [
+                [3, 16, 16, True, 'RE', 2],
+                [3, 72, 24, False, 'RE', 2],
+                [3, 88, 24, False, 'RE', 1],
+                [5, 96, 40, True, 'HS', 2],
+                [5, 240, 40, True, 'HS', 1],
+                [5, 240, 40, True, 'HS', 1],
+                [5, 120, 48, True, 'HS', 1],
+                [5, 144, 48, True, 'HS', 1],
+                [5, 288, 96, True, 'HS', 2],
+                [5, 576, 96, True, 'HS', 1],
+                [5, 576, 96, True, 'HS', 1],
+            ]
+        else:
+            raise NotImplementedError
+
+        # building first layer
+        last_channel = _make_divisible(
+            last_channel *
+            scale, round_nearest) if scale > 1.0 else last_channel
+        self.features = [conv_bn(3, input_channel, 2, activation=Hswish)]
+        self.classifier = []
+
+        # building mobile blocks
+        for k, exp, c, se, nl, s in mobile_setting:
+            output_channel = _make_divisible(c * scale, round_nearest)
+            exp_channel = _make_divisible(exp * scale, round_nearest)
+            self.features.append(
+                InvertedResidual(input_channel, output_channel, k, s,
+                                 exp_channel, se, nl))
+            input_channel = output_channel
+
+        # building last several layers
+        if mode == 'large':
+            last_conv = _make_divisible(960 * scale, round_nearest)
+            self.features.append(
+                conv_1x1_bn(input_channel, last_conv, activation=Hswish))
+            self.features.append(nn.AdaptiveAvgPool2d(1))
+            self.features.append(nn.Conv2d(last_conv, last_channel, 1, 1, 0))
+            self.features.append(Hswish(inplace=True))
+        elif mode == 'small':
+            last_conv = _make_divisible(576 * scale, round_nearest)
+            self.features.append(
+                conv_1x1_bn(input_channel, last_conv, activation=Hswish))
+            self.features.append(nn.AdaptiveAvgPool2d(1))
+            self.features.append(nn.Conv2d(last_conv, last_channel, 1, 1, 0))
+            self.features.append(Hswish(inplace=True))
+        else:
+            raise NotImplementedError
+
+        self.features = nn.Sequential(*self.features)
+
+        self.classifier = nn.Sequential(
+            nn.Dropout(p=dropout),
+            nn.Linear(last_channel, num_classes),
+        )
+
+        self.init_params()
+
+    def forward(self, x):
+        x = self.features(x)
+        x = x.mean([2, 3])
+        x = self.classifier(x)
+        return x
+
+    def init_params(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out')
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                if m.weight is not None:
+                    nn.init.constant_(m.weight, 1)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, std=0.001)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+
+def mobile_v3(**kwargs):
+    model = MobileNetV3(**kwargs)
+    return model
--- a/models/nasnet.py
+++ b/models/nasnet.py
--- a/models/preact_resnet.py
+++ b/models/preact_resnet.py
+import torch.nn as nn
+import math
+
+__all__ = [
+    'preact_resnet18', 'preact_resnet34', 'preact_resnet50',
+    'preact_resnet101', 'preact_resnet152'
+]
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    "3x3 convolution with padding"
+    return nn.Conv2d(in_planes,
+                     out_planes,
+                     kernel_size=3,
+                     stride=stride,
+                     padding=1,
+                     bias=False)
+
+
+class PreactBasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 downsample=None,
+                 preactivate=True):
+        super(PreactBasicBlock, self).__init__()
+        self.pre_bn = self.pre_relu = None
+        if preactivate:
+            self.pre_bn = nn.BatchNorm2d(inplanes)
+            self.pre_relu = nn.ReLU(inplace=True)
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.downsample = downsample
+        self.stride = stride
+        self.preactivate = preactivate
+
+    def forward(self, x):
+        if self.preactivate:
+            preact = self.pre_bn(x)
+            preact = self.pre_relu(preact)
+        else:
+            preact = x
+
+        out = self.conv1(preact)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(preact)
+        else:
+            residual = x
+
+        out += residual
+
+        return out
+
+
+class PreactBottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 downsample=None,
+                 preactivate=True):
+        super(PreactBottleneck, self).__init__()
+        self.pre_bn = self.pre_relu = None
+        if preactivate:
+            self.pre_bn = nn.BatchNorm2d(inplanes)
+            self.pre_relu = nn.ReLU(inplace=True)
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes,
+                               planes,
+                               kernel_size=3,
+                               stride=stride,
+                               padding=1,
+                               bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.relu1 = nn.ReLU(inplace=True)
+        self.relu2 = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+        self.preactivate = preactivate
+
+    def forward(self, x):
+        if self.preactivate:
+            preact = self.pre_bn(x)
+            preact = self.pre_relu(preact)
+        else:
+            preact = x
+
+        out = self.conv1(preact)
+        out = self.bn1(out)
+        out = self.relu1(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu2(out)
+
+        out = self.conv3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(preact)
+        else:
+            residual = x
+
+        out += residual
+
+        return out
+
+
+class PreactResNet(nn.Module):
+
+    def __init__(self,
+                 block,
+                 layers,
+                 num_classes=1000,
+                 deep_stem=False,
+                 avg_down=False,
+                 bypass_last_bn=False,
+                 bn=None):
+
+        super(PreactResNet, self).__init__()
+
+        global bypass_bn_weight_list
+
+        bypass_bn_weight_list = []
+
+        self.inplanes = 64
+        self.deep_stem = deep_stem
+        self.avg_down = avg_down
+
+        if self.deep_stem:
+            self.conv1 = nn.Sequential(
+                nn.Conv2d(3,
+                          32,
+                          kernel_size=3,
+                          stride=2,
+                          padding=1,
+                          bias=False),
+                nn.BatchNorm2d(32),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(32,
+                          32,
+                          kernel_size=3,
+                          stride=1,
+                          padding=1,
+                          bias=False),
+                nn.BatchNorm2d(32),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(32,
+                          64,
+                          kernel_size=3,
+                          stride=1,
+                          padding=1,
+                          bias=False),
+            )
+        else:
+            self.conv1 = nn.Conv2d(3,
+                                   64,
+                                   kernel_size=7,
+                                   stride=2,
+                                   padding=3,
+                                   bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+
+        self.final_bn = nn.BatchNorm2d(512 * block.expansion)
+        self.final_relu = nn.ReLU(inplace=True)
+        self.avgpool = nn.AvgPool2d(7, stride=1)
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.Linear):
+                n = m.weight.size(1)
+                m.weight.data.normal_(0, 1.0 / float(n))
+                m.bias.data.zero_()
+
+        if bypass_last_bn:
+            for param in bypass_bn_weight_list:
+                param.data.zero_()
+
+    def _make_layer(self, block, planes, blocks, stride=1, avg_down=False):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            if self.avg_down:
+                downsample = nn.Sequential(
+                    nn.AvgPool2d(stride,
+                                 stride=stride,
+                                 ceil_mode=True,
+                                 count_include_pad=False),
+                    nn.Conv2d(self.inplanes,
+                              planes * block.expansion,
+                              kernel_size=1,
+                              stride=1,
+                              bias=False),
+                    # BN(planes * block.expansion),
+                )
+            else:
+                downsample = nn.Sequential(
+                    nn.Conv2d(self.inplanes,
+                              planes * block.expansion,
+                              kernel_size=1,
+                              stride=stride,
+                              bias=False),
+                    # BN(planes * block.expansion),
+                )
+
+        # On the first residual block in the first residual layer we don't pre-activate,
+        # because we take care of that (+ maxpool) after the initial conv layer
+        preactivate_first = stride != 1
+
+        layers = []
+        layers.append(
+            block(self.inplanes, planes, stride, downsample,
+                  preactivate_first))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        x = self.final_bn(x)
+        x = self.final_relu(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+
+        return x
+
+
+def preact_resnet18(**kwargs):
+    model = PreactResNet(PreactBasicBlock, [2, 2, 2, 2], **kwargs)
+    return model
+
+
+def preact_resnet34(**kwargs):
+    model = PreactResNet(PreactBasicBlock, [3, 4, 6, 3], **kwargs)
+    return model
+
+
+def preact_resnet50(**kwargs):
+    model = PreactResNet(PreactBottleneck, [3, 4, 6, 3], **kwargs)
+    return model
+
+
+def preact_resnet101(**kwargs):
+    model = PreactResNet(PreactBottleneck, [3, 4, 23, 3], **kwargs)
+    return model
+
+
+def preact_resnet152(**kwargs):
+    model = PreactResNet(PreactBottleneck, [3, 8, 36, 3], **kwargs)
+    return model
--- a/models/resnest.py
+++ b/models/resnest.py
--- a/models/resnet.py
+++ b/models/resnet.py
--- a/models/resnet_official.py
+++ b/models/resnet_official.py
--- a/models/resnet_v2.py
+++ b/models/resnet_v2.py
--- a/models/senet.py
+++ b/models/senet.py
--- a/models/shuffle_v1.py
+++ b/models/shuffle_v1.py
--- a/models/shuffle_v2.py
+++ b/models/shuffle_v2.py
+import torch as torch
+import torch.nn as nn
+from torch.nn import init
+
+__all__ = ["shuffle_v2"]
+
+
+def conv3x3(in_channels,
+            out_channels,
+            stride=1,
+            padding=1,
+            bias=True,
+            groups=1):
+    return nn.Conv2d(in_channels,
+                     out_channels,
+                     kernel_size=3,
+                     stride=stride,
+                     padding=padding,
+                     bias=bias,
+                     groups=groups)
+
+
+def conv1x1(in_channels, out_channels, bias=True, groups=1):
+    return nn.Conv2d(in_channels,
+                     out_channels,
+                     kernel_size=1,
+                     stride=1,
+                     padding=0,
+                     bias=bias,
+                     groups=groups)
+
+
+def channel_shuffle(x, groups):
+    batchsize, num_channels, height, width = x.data.size()
+    channels_per_group = num_channels // groups
+    x = x.view(batchsize, groups, channels_per_group, height, width)
+    x = torch.transpose(x, 1, 2).contiguous()
+    x = x.view(batchsize, -1, height, width)
+    return x
+
+
+def channel_split(x, splits=[24, 24]):
+    return torch.split(x, splits, dim=1)
+
+
+class ParimaryModule(nn.Module):
+
+    def __init__(self, in_channels=3, out_channels=24):
+        super(ParimaryModule, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+
+        self.ParimaryModule = nn.Sequential(
+            conv3x3(in_channels, out_channels, 2, 1, True, 1),
+            nn.BatchNorm2d(out_channels),
+            nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                init.kaiming_uniform_(m.weight)
+                if m.bias is not None:
+                    init.constant_(m.bias, 0)
+
+    def forward(self, x):
+        x = self.ParimaryModule(x)
+        return x
+
+
+class FinalModule(nn.Module):
+
+    def __init__(self, in_channels=464, out_channels=1024, num_classes=1000):
+        super(FinalModule, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.num_classes = num_classes
+        self.avgpool = nn.AvgPool2d(7, stride=1)
+        self.fc = nn.Linear(out_channels, num_classes)
+        self.FinalConv = nn.Sequential(
+            conv1x1(in_channels, out_channels, True, 1),
+            nn.BatchNorm2d(out_channels), nn.ReLU())
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                init.kaiming_uniform_(m.weight)
+                if m.bias is not None:
+                    init.constant_(m.bias, 0)
+
+    def forward(self, x):
+        x = self.FinalConv(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+        return x
+
+
+class ShuffleNetV2Block(nn.Module):
+
+    def __init__(self, in_channels, out_channels, stride=1, splits_left=2):
+        super(ShuffleNetV2Block, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.stride = stride
+        self.splits_left = splits_left
+
+        if stride == 2:
+            self.Left = nn.Sequential(
+                conv3x3(in_channels, in_channels, stride, 1, True,
+                        in_channels), nn.BatchNorm2d(in_channels),
+                conv1x1(in_channels, out_channels // 2, True, 1),
+                nn.BatchNorm2d(out_channels // 2), nn.ReLU())
+            self.Right = nn.Sequential(
+                conv1x1(in_channels, in_channels, True, 1),
+                nn.BatchNorm2d(in_channels), nn.ReLU(),
+                conv3x3(in_channels, in_channels, stride, 1, True,
+                        in_channels), nn.BatchNorm2d(in_channels),
+                conv1x1(in_channels, out_channels // 2, True, 1),
+                nn.BatchNorm2d(out_channels // 2), nn.ReLU())
+        elif stride == 1:
+            in_channels = in_channels - in_channels // splits_left
+            self.Right = nn.Sequential(
+                conv1x1(in_channels, in_channels, True, 1),
+                nn.BatchNorm2d(in_channels), nn.ReLU(),
+                conv3x3(in_channels, in_channels, stride, 1, True,
+                        in_channels), nn.BatchNorm2d(in_channels),
+                conv1x1(in_channels, in_channels, True, 1),
+                nn.BatchNorm2d(in_channels), nn.ReLU())
+        else:
+            raise ValueError('stride must be 1 or 2')
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                init.kaiming_uniform_(m.weight)
+                if m.bias is not None:
+                    init.constant_(m.bias, 0)
+
+    def forward(self, x):
+        if self.stride == 2:
+            x_left, x_right = x, x
+            x_left = self.Left(x_left)
+            x_right = self.Right(x_right)
+        elif self.stride == 1:
+            x_split = channel_split(x, [
+                self.in_channels // self.splits_left,
+                self.in_channels - self.in_channels // self.splits_left
+            ])
+            x_left, x_right = x_split[0], x_split[1]
+            x_right = self.Right(x_right)
+
+        x = torch.cat((x_left, x_right), dim=1)
+        x = channel_shuffle(x, 2)
+        return x
+
+
+class ShuffleNetV2(nn.Module):
+
+    def __init__(self,
+                 in_channels=3,
+                 num_classes=1000,
+                 net_scale=1.0,
+                 stage_repeat=1,
+                 splits_left=2):
+        super(ShuffleNetV2, self).__init__()
+        self.in_channels = in_channels
+        self.num_classes = num_classes
+        self.net_scale = net_scale
+        self.splits_left = splits_left
+
+        if net_scale == 0.5:
+            self.out_channels = [24, 48, 96, 192, 1024]
+        elif net_scale == 1.0:
+            self.out_channels = [24, 116, 232, 464, 1024]
+        elif net_scale == 1.5:
+            self.out_channels = [24, 176, 352, 704, 1024]
+        elif net_scale == 2.0:
+            self.out_channels = [24, 244, 488, 976, 2048]
+        else:
+            raise ValueError('net_scale must be 0.5,1.0,1.5 or 2.0')
+
+        self.ParimaryModule = ParimaryModule(in_channels, self.out_channels[0])
+
+        if stage_repeat == 1:
+            self.Stage1 = self.Stage(1, [1, 3])
+            self.Stage2 = self.Stage(2, [1, 7])
+            self.Stage3 = self.Stage(3, [1, 3])
+        elif stage_repeat == 2:
+            self.Stage1 = self.Stage(1, [1, 7])
+            self.Stage2 = self.Stage(2, [1, 15])
+            self.Stage3 = self.Stage(3, [1, 7])
+
+        self.FinalModule = FinalModule(self.out_channels[3],
+                                       self.out_channels[4], num_classes)
+
+    def Stage(self, stage=1, BlockRepeat=[1, 3]):
+        modules = []
+
+        if BlockRepeat[0] == 1:
+            modules.append(
+                ShuffleNetV2Block(self.out_channels[stage - 1],
+                                  self.out_channels[stage], 2,
+                                  self.splits_left))
+        else:
+            raise ValueError('stage first block must only repeat 1 time')
+
+        for i in range(BlockRepeat[1]):
+            modules.append(
+                ShuffleNetV2Block(self.out_channels[stage],
+                                  self.out_channels[stage], 1,
+                                  self.splits_left))
+
+        return nn.Sequential(*modules)
+
+    def forward(self, x):
+        x = self.ParimaryModule(x)
+        x = self.Stage1(x)
+        x = self.Stage2(x)
+        x = self.Stage3(x)
+        x = self.FinalModule(x)
+        return x
+
+
+def shuffle_v2(**kwargs):
+    model = ShuffleNetV2(**kwargs)
+    return model
--- a/models/sknet.py
+++ b/models/sknet.py
--- a/models/vgg.py
+++ b/models/vgg.py
--- a/perf.py
+++ b/perf.py
--- a/train_inceptionv3.log-20221223_210106
+++ b/train_inceptionv3.log-20221223_210106
--- a/train_inceptionv3.log-20221224_201806
+++ b/train_inceptionv3.log-20221224_201806
--- a/utils/__init__.py
+++ b/utils/__init__.py
--- a/utils/__pycache__/__init__.cpython-37.pyc
+++ b/utils/__pycache__/__init__.cpython-37.pyc