Unverified Commit f9bbd8d0 authored by Yuge Zhang's avatar Yuge Zhang Committed by GitHub
Browse files

[Retiarii] Search space hub (#4524)

parent 9fde0e8e
...@@ -8,3 +8,4 @@ _generated_model_*.py ...@@ -8,3 +8,4 @@ _generated_model_*.py
_generated_model _generated_model
generated generated
lightning_logs lightning_logs
model.onnx
...@@ -177,7 +177,6 @@ class _SupervisedLearningModule(LightningModule): ...@@ -177,7 +177,6 @@ class _SupervisedLearningModule(LightningModule):
self.export_onnx = Path(export_onnx) self.export_onnx = Path(export_onnx)
else: else:
self.export_onnx = None self.export_onnx = None
self._already_exported = False
def forward(self, x): def forward(self, x):
y_hat = self.model(x) y_hat = self.model(x)
...@@ -196,12 +195,12 @@ class _SupervisedLearningModule(LightningModule): ...@@ -196,12 +195,12 @@ class _SupervisedLearningModule(LightningModule):
x, y = batch x, y = batch
y_hat = self(x) y_hat = self(x)
if not self._already_exported: if self.export_onnx is not None:
try: try:
self.to_onnx(self.export_onnx, x, export_params=True) self.to_onnx(self.export_onnx, x, export_params=True)
except RuntimeError as e: except RuntimeError as e:
warnings.warn(f'ONNX conversion failed. As a result, you might not be able to use visualization. Error message: {e}') warnings.warn(f'ONNX conversion failed. As a result, you might not be able to use visualization. Error message: {e}')
self._already_exported = True self.export_onnx = None
self.log('val_loss', self.criterion(y_hat, y), prog_bar=True) self.log('val_loss', self.criterion(y_hat, y), prog_bar=True)
for name, metric in self.metrics.items(): for name, metric in self.metrics.items():
......
This README will be deleted once this hub got stabilized, after which we will promote it in the documentation.
## Why
We hereby provides a series of state-of-the-art search space, which is PyTorch model + mutations + training recipe.
For further motivations and plans, please see https://github.com/microsoft/nni/issues/4249.
## Reproduction Roadmap
1. Runnable
2. Load checkpoint of searched architecture and evaluate
3. Reproduce searched architecture
4. Runnable with built-in algos
5. Reproduce result with at least one algo
| | 1 | 2 | 3 | 4 | 5 |
|------------------------|--------|--------|--------|--------|--------|
| NasBench101 | Y | | | | |
| NasBench201 | Y | | | | |
| NASNet | Y | | | | |
| ENAS | Y | | | | |
| AmoebaNet | Y | | | | |
| PNAS | Y | | | | |
| DARTS | Y | | | | |
| ProxylessNAS | Y | | | | |
| MobileNetV3Space | Y | | | | |
| ShuffleNetSpace | Y | | | | |
| ShuffleNetSpace (ch) | Y | | | | |
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from .mobilenetv3 import MobileNetV3Space
from .nasbench101 import NasBench101
from .nasbench201 import NasBench201
from .nasnet import NDS, NASNet, ENAS, AmoebaNet, PNAS, DARTS
from .proxylessnas import ProxylessNAS
from .shufflenet import ShuffleNetSpace
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from typing import Tuple, Optional, Callable
import nni.retiarii.nn.pytorch as nn
from nni.retiarii import model_wrapper
from .proxylessnas import ConvBNReLU, InvertedResidual, SeparableConv, make_divisible, reset_parameters
class h_sigmoid(nn.Module):
def __init__(self, inplace=True):
super(h_sigmoid, self).__init__()
self.relu = nn.ReLU6(inplace=inplace)
def forward(self, x):
return self.relu(x + 3) / 6
class h_swish(nn.Module):
def __init__(self, inplace=True):
super(h_swish, self).__init__()
self.sigmoid = h_sigmoid(inplace=inplace)
def forward(self, x):
return x * self.sigmoid(x)
class SELayer(nn.Module):
"""Squeeze-and-excite layer."""
def __init__(self,
channels: int,
reduction: int = 4,
activation_layer: Optional[Callable[..., nn.Module]] = None):
super().__init__()
if activation_layer is None:
activation_layer = nn.Sigmoid
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Linear(channels, make_divisible(channels // reduction, 8)),
nn.ReLU(inplace=True),
nn.Linear(make_divisible(channels // reduction, 8), channels),
activation_layer()
)
def forward(self, x):
b, c, _, _ = x.size()
y = self.avg_pool(x).view(b, c)
y = self.fc(y).view(b, c, 1, 1)
return x * y
@model_wrapper
class MobileNetV3Space(nn.Module):
"""
MobileNetV3Space implements the largest search space in `TuNAS <https://arxiv.org/abs/2008.06120>`__.
The search dimensions include widths, expand ratios, kernel sizes, SE ratio.
Some of them can be turned off via arguments to narrow down the search space.
Different from ProxylessNAS search space, this space is implemented with :class:`nn.ValueChoice`.
We use the following snipppet as reference.
https://github.com/google-research/google-research/blob/20736344591f774f4b1570af64624ed1e18d2867/tunas/mobile_search_space_v3.py#L728
"""
def __init__(self, num_labels: int = 1000,
base_widths: Tuple[int, ...] = (16, 16, 32, 64, 128, 256, 512, 1024),
width_multipliers: Tuple[float, ...] = (0.5, 0.625, 0.75, 1.0, 1.25, 1.5, 2.0),
expand_ratios: Tuple[int, ...] = (1, 2, 3, 4, 5, 6),
dropout_rate: float = 0.2,
bn_eps: float = 1e-3,
bn_momentum: float = 0.1):
super().__init__()
self.widths = [
nn.ValueChoice([make_divisible(base_width * mult, 8) for mult in width_multipliers], label=f'width_{i}')
for i, base_width in enumerate(base_widths)
]
self.expand_ratios = expand_ratios
blocks = [
# Stem
ConvBNReLU(
3, self.widths[0],
nn.ValueChoice([3, 5], label='ks_0'),
stride=2, activation_layer=h_swish
),
SeparableConv(self.widths[0], self.widths[0], activation_layer=nn.ReLU),
]
# counting for kernel sizes and expand ratios
self.layer_count = 2
blocks += [
# Body
self._make_stage(1, self.widths[0], self.widths[1], False, 2, nn.ReLU),
self._make_stage(2, self.widths[1], self.widths[2], True, 2, nn.ReLU),
self._make_stage(1, self.widths[2], self.widths[3], False, 2, h_swish),
self._make_stage(1, self.widths[3], self.widths[4], True, 1, h_swish),
self._make_stage(1, self.widths[4], self.widths[5], True, 2, h_swish),
]
# Head
blocks += [
ConvBNReLU(self.widths[5], self.widths[6], 1, 1, activation_layer=h_swish),
nn.AdaptiveAvgPool2d(1),
ConvBNReLU(self.widths[6], self.widths[7], 1, 1, norm_layer=nn.Identity, activation_layer=h_swish),
]
self.blocks = nn.Sequential(*blocks)
self.classifier = nn.Sequential(
nn.Dropout(dropout_rate),
nn.Linear(self.widths[7], num_labels),
)
reset_parameters(self, bn_momentum=bn_momentum, bn_eps=bn_eps)
def forward(self, x):
x = self.blocks(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
def _make_stage(self, stage_idx, inp, oup, se, stride, act):
# initialize them first because they are related to layer_count.
exp, ks, se_blocks = [], [], []
for _ in range(4):
exp.append(nn.ValueChoice(list(self.expand_ratios), label=f'exp_{self.layer_count}'))
ks.append(nn.ValueChoice([3, 5, 7], label=f'ks_{self.layer_count}'))
if se:
# if SE is true, assign a layer choice to SE
se_blocks.append(
lambda hidden_ch: nn.LayerChoice([nn.Identity(), SELayer(hidden_ch)], label=f'se_{self.layer_count}')
)
else:
se_blocks.append(None)
self.layer_count += 1
blocks = [
# stride = 2
InvertedResidual(inp, oup, exp[0], ks[0],
stride, squeeze_and_excite=se_blocks[0], activation_layer=act),
# stride = 1, residual connection should be automatically enabled
InvertedResidual(oup, oup, exp[1], ks[1], squeeze_and_excite=se_blocks[1], activation_layer=act),
InvertedResidual(oup, oup, exp[2], ks[2], squeeze_and_excite=se_blocks[2], activation_layer=act),
InvertedResidual(oup, oup, exp[3], ks[3], squeeze_and_excite=se_blocks[3], activation_layer=act)
]
# mutable depth
return nn.Repeat(blocks, depth=(1, 4), label=f'depth_{stage_idx}')
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import math
import torch.nn as nn
from nni.retiarii import model_wrapper
from nni.retiarii.nn.pytorch import NasBench101Cell
__all__ = ['NasBench101']
def truncated_normal_(tensor, mean=0, std=1):
# https://discuss.pytorch.org/t/implementing-truncated-normal-initializer/4778/15
size = tensor.shape
tmp = tensor.new_empty(size + (4,)).normal_()
valid = (tmp < 2) & (tmp > -2)
ind = valid.max(-1, keepdim=True)[1]
tensor.data.copy_(tmp.gather(-1, ind).squeeze(-1))
tensor.data.mul_(std).add_(mean)
class ConvBNReLU(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0):
super(ConvBNReLU, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.conv_bn_relu = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
self.reset_parameters()
def reset_parameters(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
fan_in = m.kernel_size[0] * m.kernel_size[1] * m.in_channels
truncated_normal_(m.weight.data, mean=0., std=math.sqrt(1. / fan_in))
if isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def forward(self, x):
return self.conv_bn_relu(x)
class Conv3x3BNReLU(ConvBNReLU):
def __init__(self, in_channels, out_channels):
super(Conv3x3BNReLU, self).__init__(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
class Conv1x1BNReLU(ConvBNReLU):
def __init__(self, in_channels, out_channels):
super(Conv1x1BNReLU, self).__init__(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
Projection = Conv1x1BNReLU
@model_wrapper
class NasBench101(nn.Module):
"""The full search space, proposed by `NAS-Bench-101 <http://proceedings.mlr.press/v97/ying19a/ying19a.pdf>`__.
It's simply a stack of :class:`NasBench101Cell`. Operations are conv3x3, conv1x1 and maxpool respectively.
"""
def __init__(self,
stem_out_channels: int = 128,
num_stacks: int = 3,
num_modules_per_stack: int = 3,
max_num_vertices: int = 7,
max_num_edges: int = 9,
num_labels: int = 10,
bn_eps: float = 1e-5,
bn_momentum: float = 0.003):
super().__init__()
op_candidates = {
'conv3x3-bn-relu': lambda num_features: Conv3x3BNReLU(num_features, num_features),
'conv1x1-bn-relu': lambda num_features: Conv1x1BNReLU(num_features, num_features),
'maxpool3x3': lambda num_features: nn.MaxPool2d(3, 1, 1)
}
# initial stem convolution
self.stem_conv = Conv3x3BNReLU(3, stem_out_channels)
layers = []
in_channels = out_channels = stem_out_channels
for stack_num in range(num_stacks):
if stack_num > 0:
downsample = nn.MaxPool2d(kernel_size=2, stride=2)
layers.append(downsample)
out_channels *= 2
for _ in range(num_modules_per_stack):
cell = NasBench101Cell(op_candidates, in_channels, out_channels,
lambda cin, cout: Projection(cin, cout),
max_num_vertices, max_num_edges, label='cell')
layers.append(cell)
in_channels = out_channels
self.features = nn.ModuleList(layers)
self.gap = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(out_channels, num_labels)
for module in self.modules():
if isinstance(module, nn.BatchNorm2d):
module.eps = bn_eps
module.momentum = bn_momentum
def forward(self, x):
bs = x.size(0)
out = self.stem_conv(x)
for layer in self.features:
out = layer(out)
out = self.gap(out).view(bs, -1)
out = self.classifier(out)
return out
def reset_parameters(self):
for module in self.modules():
if isinstance(module, nn.BatchNorm2d):
module.eps = self.config.bn_eps
module.momentum = self.config.bn_momentum
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import torch
import torch.nn as nn
from nni.retiarii import model_wrapper
from nni.retiarii.nn.pytorch import NasBench201Cell
__all__ = ['NasBench201']
OPS_WITH_STRIDE = {
'none': lambda C_in, C_out, stride: Zero(C_in, C_out, stride),
'avg_pool_3x3': lambda C_in, C_out, stride: Pooling(C_in, C_out, stride, 'avg'),
'max_pool_3x3': lambda C_in, C_out, stride: Pooling(C_in, C_out, stride, 'max'),
'conv_3x3': lambda C_in, C_out, stride: ReLUConvBN(C_in, C_out, (3, 3), (stride, stride), (1, 1), (1, 1)),
'conv_1x1': lambda C_in, C_out, stride: ReLUConvBN(C_in, C_out, (1, 1), (stride, stride), (0, 0), (1, 1)),
'skip_connect': lambda C_in, C_out, stride: nn.Identity() if stride == 1 and C_in == C_out
else FactorizedReduce(C_in, C_out, stride),
}
PRIMITIVES = ['none', 'skip_connect', 'conv_1x1', 'conv_3x3', 'avg_pool_3x3']
class ReLUConvBN(nn.Module):
def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation):
super(ReLUConvBN, self).__init__()
self.op = nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C_in, C_out, kernel_size, stride=stride,
padding=padding, dilation=dilation, bias=False),
nn.BatchNorm2d(C_out)
)
def forward(self, x):
return self.op(x)
class SepConv(nn.Module):
def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation):
super(SepConv, self).__init__()
self.op = nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride,
padding=padding, dilation=dilation, groups=C_in, bias=False),
nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
nn.BatchNorm2d(C_out),
)
def forward(self, x):
return self.op(x)
class Pooling(nn.Module):
def __init__(self, C_in, C_out, stride, mode):
super(Pooling, self).__init__()
if C_in == C_out:
self.preprocess = None
else:
self.preprocess = ReLUConvBN(C_in, C_out, 1, 1, 0, 1)
if mode == 'avg':
self.op = nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False)
elif mode == 'max':
self.op = nn.MaxPool2d(3, stride=stride, padding=1)
else:
raise ValueError('Invalid mode={:} in Pooling'.format(mode))
def forward(self, x):
if self.preprocess:
x = self.preprocess(x)
return self.op(x)
class Zero(nn.Module):
def __init__(self, C_in, C_out, stride):
super(Zero, self).__init__()
self.C_in = C_in
self.C_out = C_out
self.stride = stride
self.is_zero = True
def forward(self, x):
if self.C_in == self.C_out:
if self.stride == 1:
return x.mul(0.)
else:
return x[:, :, ::self.stride, ::self.stride].mul(0.)
else:
shape = list(x.shape)
shape[1] = self.C_out
zeros = x.new_zeros(shape, dtype=x.dtype, device=x.device)
return zeros
class FactorizedReduce(nn.Module):
def __init__(self, C_in, C_out, stride):
super(FactorizedReduce, self).__init__()
self.stride = stride
self.C_in = C_in
self.C_out = C_out
self.relu = nn.ReLU(inplace=False)
if stride == 2:
C_outs = [C_out // 2, C_out - C_out // 2]
self.convs = nn.ModuleList()
for i in range(2):
self.convs.append(nn.Conv2d(C_in, C_outs[i], 1, stride=stride, padding=0, bias=False))
self.pad = nn.ConstantPad2d((0, 1, 0, 1), 0)
else:
raise ValueError('Invalid stride : {:}'.format(stride))
self.bn = nn.BatchNorm2d(C_out)
def forward(self, x):
x = self.relu(x)
y = self.pad(x)
out = torch.cat([self.convs[0](x), self.convs[1](y[:, :, 1:, 1:])], dim=1)
out = self.bn(out)
return out
class ResNetBasicblock(nn.Module):
def __init__(self, inplanes, planes, stride):
super(ResNetBasicblock, self).__init__()
assert stride == 1 or stride == 2, 'invalid stride {:}'.format(stride)
self.conv_a = ReLUConvBN(inplanes, planes, 3, stride, 1, 1)
self.conv_b = ReLUConvBN(planes, planes, 3, 1, 1, 1)
if stride == 2:
self.downsample = nn.Sequential(
nn.AvgPool2d(kernel_size=2, stride=2, padding=0),
nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, padding=0, bias=False))
elif inplanes != planes:
self.downsample = ReLUConvBN(inplanes, planes, 1, 1, 0, 1)
else:
self.downsample = None
self.in_dim = inplanes
self.out_dim = planes
self.stride = stride
self.num_conv = 2
def forward(self, inputs):
basicblock = self.conv_a(inputs)
basicblock = self.conv_b(basicblock)
if self.downsample is not None:
inputs = self.downsample(inputs) # residual
return inputs + basicblock
@model_wrapper
class NasBench201(nn.Module):
"""The full search space proposed by `NAS-Bench-201 <https://arxiv.org/abs/2001.00326>`__.
It's a stack of :class:`NasBench201Cell`.
"""
def __init__(self,
stem_out_channels: int = 16,
num_modules_per_stack: int = 5,
num_labels: int = 10):
super().__init__()
self.channels = C = stem_out_channels
self.num_modules = N = num_modules_per_stack
self.num_labels = num_labels
self.stem = nn.Sequential(
nn.Conv2d(3, C, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(C)
)
layer_channels = [C] * N + [C * 2] + [C * 2] * N + [C * 4] + [C * 4] * N
layer_reductions = [False] * N + [True] + [False] * N + [True] + [False] * N
C_prev = C
self.cells = nn.ModuleList()
for C_curr, reduction in zip(layer_channels, layer_reductions):
if reduction:
cell = ResNetBasicblock(C_prev, C_curr, 2)
else:
cell = NasBench201Cell({prim: lambda C_in, C_out: OPS_WITH_STRIDE[prim](C_in, C_out, 1) for prim in PRIMITIVES},
C_prev, C_curr, label='cell')
self.cells.append(cell)
C_prev = C_curr
self.lastact = nn.Sequential(
nn.BatchNorm2d(C_prev),
nn.ReLU(inplace=True)
)
self.global_pooling = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(C_prev, self.num_labels)
def forward(self, inputs):
feature = self.stem(inputs)
for cell in self.cells:
feature = cell(feature)
out = self.lastact(feature)
out = self.global_pooling(out)
out = out.view(out.size(0), -1)
logits = self.classifier(out)
return logits
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""File containing NASNet-series search space.
The implementation is based on NDS.
It's called ``nasnet.py`` simply because NASNet is the first to propose such structure.
"""
from collections import OrderedDict
from typing import Tuple, List, Union, Iterable, Dict, Callable
try:
from typing import Literal
except ImportError:
from typing_extensions import Literal
import torch
import nni.retiarii.nn.pytorch as nn
from nni.retiarii import model_wrapper
# the following are NAS operations from
# https://github.com/facebookresearch/unnas/blob/main/pycls/models/nas/operations.py
OPS = {
'none': lambda C, stride, affine:
Zero(stride),
'avg_pool_2x2': lambda C, stride, affine:
nn.AvgPool2d(2, stride=stride, padding=0, count_include_pad=False),
'avg_pool_3x3': lambda C, stride, affine:
nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False),
'avg_pool_5x5': lambda C, stride, affine:
nn.AvgPool2d(5, stride=stride, padding=2, count_include_pad=False),
'max_pool_2x2': lambda C, stride, affine:
nn.MaxPool2d(2, stride=stride, padding=0),
'max_pool_3x3': lambda C, stride, affine:
nn.MaxPool2d(3, stride=stride, padding=1),
'max_pool_5x5': lambda C, stride, affine:
nn.MaxPool2d(5, stride=stride, padding=2),
'max_pool_7x7': lambda C, stride, affine:
nn.MaxPool2d(7, stride=stride, padding=3),
'skip_connect': lambda C, stride, affine:
nn.Identity() if stride == 1 else FactorizedReduce(C, C, affine=affine),
'conv_1x1': lambda C, stride, affine:
nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C, C, 1, stride=stride, padding=0, bias=False),
nn.BatchNorm2d(C, affine=affine)
),
'conv_3x3': lambda C, stride, affine:
nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C, C, 3, stride=stride, padding=1, bias=False),
nn.BatchNorm2d(C, affine=affine)
),
'sep_conv_3x3': lambda C, stride, affine:
SepConv(C, C, 3, stride, 1, affine=affine),
'sep_conv_5x5': lambda C, stride, affine:
SepConv(C, C, 5, stride, 2, affine=affine),
'sep_conv_7x7': lambda C, stride, affine:
SepConv(C, C, 7, stride, 3, affine=affine),
'dil_conv_3x3': lambda C, stride, affine:
DilConv(C, C, 3, stride, 2, 2, affine=affine),
'dil_conv_5x5': lambda C, stride, affine:
DilConv(C, C, 5, stride, 4, 2, affine=affine),
'dil_sep_conv_3x3': lambda C, stride, affine:
DilSepConv(C, C, 3, stride, 2, 2, affine=affine),
'conv_3x1_1x3': lambda C, stride, affine:
nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C, C, (1, 3), stride=(1, stride), padding=(0, 1), bias=False),
nn.Conv2d(C, C, (3, 1), stride=(stride, 1), padding=(1, 0), bias=False),
nn.BatchNorm2d(C, affine=affine)
),
'conv_7x1_1x7': lambda C, stride, affine:
nn.Sequential(
nn.ReLU(inplace=False),
nn.Conv2d(C, C, (1, 7), stride=(1, stride), padding=(0, 3), bias=False),
nn.Conv2d(C, C, (7, 1), stride=(stride, 1), padding=(3, 0), bias=False),
nn.BatchNorm2d(C, affine=affine)
),
}
class ReLUConvBN(nn.Sequential):
def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
super().__init__(
nn.ReLU(inplace=False),
nn.Conv2d(
C_in, C_out, kernel_size, stride=stride,
padding=padding, bias=False
),
nn.BatchNorm2d(C_out, affine=affine)
)
class DilConv(nn.Sequential):
def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation, affine=True):
super().__init__(
nn.ReLU(inplace=False),
nn.Conv2d(
C_in, C_in, kernel_size=kernel_size, stride=stride,
padding=padding, dilation=dilation, groups=C_in, bias=False
),
nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
nn.BatchNorm2d(C_out, affine=affine),
)
class SepConv(nn.Sequential):
def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
super().__init__(
nn.ReLU(inplace=False),
nn.Conv2d(
C_in, C_in, kernel_size=kernel_size, stride=stride,
padding=padding, groups=C_in, bias=False
),
nn.Conv2d(C_in, C_in, kernel_size=1, padding=0, bias=False),
nn.BatchNorm2d(C_in, affine=affine),
nn.ReLU(inplace=False),
nn.Conv2d(
C_in, C_in, kernel_size=kernel_size, stride=1,
padding=padding, groups=C_in, bias=False
),
nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
nn.BatchNorm2d(C_out, affine=affine),
)
class DilSepConv(nn.Sequential):
def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation, affine=True):
super().__init__(
nn.ReLU(inplace=False),
nn.Conv2d(
C_in, C_in, kernel_size=kernel_size, stride=stride,
padding=padding, dilation=dilation, groups=C_in, bias=False
),
nn.Conv2d(C_in, C_in, kernel_size=1, padding=0, bias=False),
nn.BatchNorm2d(C_in, affine=affine),
nn.ReLU(inplace=False),
nn.Conv2d(
C_in, C_in, kernel_size=kernel_size, stride=1,
padding=padding, dilation=dilation, groups=C_in, bias=False
),
nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
nn.BatchNorm2d(C_out, affine=affine),
)
class Zero(nn.Module):
def __init__(self, stride):
super().__init__()
self.stride = stride
def forward(self, x):
if self.stride == 1:
return x.mul(0.)
return x[:, :, ::self.stride, ::self.stride].mul(0.)
class FactorizedReduce(nn.Module):
def __init__(self, C_in, C_out, affine=True):
super().__init__()
if isinstance(C_out, int):
assert C_out % 2 == 0
else: # is a value choice
assert all(c % 2 == 0 for c in C_out.all_options())
self.relu = nn.ReLU(inplace=False)
self.conv_1 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
self.conv_2 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
self.bn = nn.BatchNorm2d(C_out, affine=affine)
self.pad = nn.ConstantPad2d((0, 1, 0, 1), 0)
def forward(self, x):
x = self.relu(x)
y = self.pad(x)
out = torch.cat([self.conv_1(x), self.conv_2(y[:, :, 1:, 1:])], dim=1)
out = self.bn(out)
return out
class DropPath_(nn.Module):
# https://github.com/khanrc/pt.darts/blob/0.1/models/ops.py
def __init__(self, drop_prob=0.):
super().__init__()
self.drop_prob = drop_prob
def forward(self, x):
if self.training and self.drop_prob > 0.:
keep_prob = 1. - self.drop_prob
mask = torch.zeros((x.size(0), 1, 1, 1), dtype=torch.float, device=x.device).bernoulli_(keep_prob)
return x.div(keep_prob).mul(mask)
return x
class AuxiliaryHead(nn.Module):
def __init__(self, C: int, num_labels: int, dataset: Literal['imagenet', 'cifar']):
super().__init__()
if dataset == 'imagenet':
# assuming input size 14x14
stride = 2
elif dataset == 'cifar':
stride = 3
self.features = nn.Sequential(
nn.ReLU(inplace=True),
nn.AvgPool2d(5, stride=stride, padding=0, count_include_pad=False),
nn.Conv2d(C, 128, 1, bias=False),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Conv2d(128, 768, 2, bias=False),
nn.BatchNorm2d(768),
nn.ReLU(inplace=True)
)
self.classifier = nn.Linear(768, num_labels)
def forward(self, x):
x = self.features(x)
x = self.classifier(x.view(x.size(0), -1))
return x
class SequentialBreakdown(nn.Sequential):
"""Return all layers of a sequential."""
def __init__(self, sequential: nn.Sequential):
super().__init__(OrderedDict(sequential.named_children()))
def forward(self, inputs):
result = []
for module in self:
inputs = module(inputs)
result.append(inputs)
return result
class CellPreprocessor(nn.Module):
"""
Aligning the shape of predecessors.
If the last cell is a reduction cell, ``pre0`` should be ``FactorizedReduce`` instead of ``ReLUConvBN``.
See :class:`CellBuilder` on how to calculate those channel numbers.
"""
def __init__(self, C_pprev: int, C_prev: int, C: int, last_cell_reduce: bool) -> None:
super().__init__()
if last_cell_reduce:
self.pre0 = FactorizedReduce(C_pprev, C)
else:
self.pre0 = ReLUConvBN(C_pprev, C, 1, 1, 0)
self.pre1 = ReLUConvBN(C_prev, C, 1, 1, 0)
def forward(self, cells):
assert len(cells) == 2
pprev, prev = cells
pprev = self.pre0(pprev)
prev = self.pre1(prev)
return [pprev, prev]
class CellPostprocessor(nn.Module):
"""
The cell outputs previous cell + this cell, so that cells can be directly chained.
"""
def forward(self, this_cell, previous_cells):
return [previous_cells[-1], this_cell]
class CellBuilder:
"""The cell builder is used in Repeat.
Builds an cell each time it's "called".
Note that the builder is ephemeral, it can only be called once for every index.
"""
def __init__(self, op_candidates: List[str], C_prev_in: int, C_in: int, C: int,
num_nodes: int, merge_op: Literal['all', 'loose_end'],
first_cell_reduce: bool, last_cell_reduce: bool):
self.C_prev_in = C_prev_in # This is the out channels of the cell before last cell.
self.C_in = C_in # This is the out channesl of last cell.
self.C = C # This is NOT C_out of this stage, instead, C_out = C * len(cell.output_node_indices)
self.op_candidates = op_candidates
self.num_nodes = num_nodes
self.merge_op = merge_op
self.first_cell_reduce = first_cell_reduce
self.last_cell_reduce = last_cell_reduce
self._expect_idx = 0
def __call__(self, repeat_idx: int):
if self._expect_idx != repeat_idx:
raise ValueError(f'Expect index {self._expect_idx}, found {repeat_idx}')
# It takes an index that is the index in the repeat.
# Number of predecessors for each cell is fixed to 2.
num_predecessors = 2
# Number of ops per node is fixed to 2.
num_ops_per_node = 2
# Reduction cell means stride = 2 and channel multiplied by 2.
is_reduction_cell = repeat_idx == 0 and self.first_cell_reduce
# self.C_prev_in, self.C_in, self.last_cell_reduce are updated after each cell is built.
preprocessor = CellPreprocessor(self.C_prev_in, self.C_in, self.C, self.last_cell_reduce)
ops_factory: Dict[str, Callable[[int, int, int], nn.Module]] = {
op: # make final chosen ops named with their aliases
lambda node_index, op_index, input_index:
OPS[op](self.C, 2 if is_reduction_cell and (
input_index is None or input_index < num_predecessors # could be none when constructing search sapce
) else 1, True)
for op in self.op_candidates
}
cell = nn.Cell(ops_factory, self.num_nodes, num_ops_per_node, num_predecessors, self.merge_op,
preprocessor=preprocessor, postprocessor=CellPostprocessor(),
label='reduce' if is_reduction_cell else 'normal')
# update state
self.C_prev_in = self.C_in
self.C_in = self.C * len(cell.output_node_indices)
self.last_cell_reduce = is_reduction_cell
self._expect_idx += 1
return cell
_INIT_PARAMETER_DOCS = """
Parameters
----------
width : int or tuple of int
A fixed initial width or a tuple of widths to choose from.
num_cells : int or tuple of int
A fixed number of cells (depths) to stack, or a tuple of depths to choose from.
dataset : "cifar" | "imagenet"
The essential differences are in "stem" cells, i.e., how they process the raw image input.
Choosing "imagenet" means more downsampling at the beginning of the network.
auxiliary_loss : bool
If true, another auxiliary classification head will produce the another prediction.
This makes the output of network two logits in the training phase.
"""
class NDS(nn.Module):
"""
The unified version of NASNet search space.
We follow the implementation in
`unnas <https://github.com/facebookresearch/unnas/blob/main/pycls/models/nas/nas.py>`__.
See `On Network Design Spaces for Visual Recognition <https://arxiv.org/abs/1905.13214>`__ for details.
Different NAS papers usually differ in the way that they specify ``op_candidates`` and ``merge_op``.
``dataset`` here is to give a hint about input resolution, so as to create reasonable stem and auxiliary heads.
NDS has a speciality that it has mutable depths/widths.
This is implemented by accepting a list of int as ``num_cells`` / ``width``.
""" + _INIT_PARAMETER_DOCS + """
op_candidates : list of str
List of operator candidates. Must be from ``OPS``.
merge_op : ``all`` or ``loose_end``
See :class:`~nni.retiarii.nn.pytorch.Cell`.
num_nodes_per_cell : int
See :class:`~nni.retiarii.nn.pytorch.Cell`.
"""
def __init__(self,
op_candidates: List[str],
merge_op: Literal['all', 'loose_end'] = 'all',
num_nodes_per_cell: int = 4,
width: Union[Tuple[int], int] = 16,
num_cells: Union[Tuple[int], int] = 20,
dataset: Literal['cifar', 'imagenet'] = 'imagenet',
auxiliary_loss: bool = False):
super().__init__()
self.dataset = dataset
self.num_labels = 10 if dataset == 'cifar' else 1000
self.auxiliary_loss = auxiliary_loss
# preprocess the specified width and depth
if isinstance(width, Iterable):
C = nn.ValueChoice(list(width), label='width')
else:
C = width
if isinstance(num_cells, Iterable):
num_cells = nn.ValueChoice(list(num_cells), label='depth')
num_cells_per_stage = [i * num_cells // 3 - (i - 1) * num_cells // 3 for i in range(3)]
# auxiliary head is different for network targetted at different datasets
if dataset == 'imagenet':
self.stem0 = nn.Sequential(
nn.Conv2d(3, C // 2, kernel_size=3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(C // 2),
nn.ReLU(inplace=True),
nn.Conv2d(C // 2, C, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(C),
)
self.stem1 = nn.Sequential(
nn.ReLU(inplace=True),
nn.Conv2d(C, C, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(C),
)
C_pprev = C_prev = C_curr = C
last_cell_reduce = True
elif dataset == 'cifar':
self.stem = nn.Sequential(
nn.Conv2d(3, 3 * C, 3, padding=1, bias=False),
nn.BatchNorm2d(3 * C)
)
C_pprev = C_prev = 3 * C
C_curr = C
last_cell_reduce = False
self.stages = nn.ModuleList()
for stage_idx in range(3):
if stage_idx > 0:
C_curr *= 2
# For a stage, we get C_in, C_curr, and C_out.
# C_in is only used in the first cell.
# C_curr is number of channels for each operator in current stage.
# C_out is usually `C * num_nodes_per_cell` because of concat operator.
cell_builder = CellBuilder(op_candidates, C_pprev, C_prev, C_curr, num_nodes_per_cell,
merge_op, stage_idx > 0, last_cell_reduce)
stage = nn.Repeat(cell_builder, num_cells_per_stage[stage_idx])
self.stages.append(stage)
# C_pprev is output channel number of last second cell among all the cells already built.
if len(stage) > 1:
# Contains more than one cell
C_pprev = len(stage[-2].output_node_indices) * C_curr
else:
# Look up in the out channels of last stage.
C_pprev = C_prev
# This was originally,
# C_prev = num_nodes_per_cell * C_curr.
# but due to loose end, it becomes,
C_prev = len(stage[-1].output_node_indices) * C_curr
# Useful in aligning the pprev and prev cell.
last_cell_reduce = cell_builder.last_cell_reduce
if stage_idx == 2:
C_to_auxiliary = C_prev
if auxiliary_loss:
assert isinstance(self.stages[2], nn.Sequential), 'Auxiliary loss can only be enabled in retrain mode.'
self.stages[2] = SequentialBreakdown(self.stages[2])
self.auxiliary_head = AuxiliaryHead(C_to_auxiliary, self.num_labels, dataset=self.dataset)
self.global_pooling = nn.AdaptiveAvgPool2d((1, 1))
self.classifier = nn.Linear(C_prev, self.num_labels)
def forward(self, inputs):
if self.dataset == 'imagenet':
s0 = self.stem0(inputs)
s1 = self.stem1(s0)
else:
s0 = s1 = self.stem(inputs)
for stage_idx, stage in enumerate(self.stages):
if stage_idx == 2 and self.auxiliary_loss:
s = list(stage([s0, s1]).values())
s0, s1 = s[-1]
if self.training:
# auxiliary loss is attached to the first cell of the last stage.
logits_aux = self.auxiliary_head(s[0][1])
else:
s0, s1 = stage([s0, s1])
out = self.global_pooling(s1)
logits = self.classifier(out.view(out.size(0), -1))
if self.training and self.auxiliary_loss:
return logits, logits_aux
else:
return logits
def set_drop_path_prob(self, drop_prob):
"""
Set the drop probability of Drop-path in the network.
Reference: `FractalNet: Ultra-Deep Neural Networks without Residuals <https://arxiv.org/pdf/1605.07648v4.pdf>`__.
"""
for module in self.modules():
if isinstance(module, DropPath_):
module.drop_prob = drop_prob
@model_wrapper
class NASNet(NDS):
__doc__ = """
Search space proposed in `Learning Transferable Architectures for Scalable Image Recognition <https://arxiv.org/abs/1707.07012>`__.
It is built upon :class:`~nni.retiarii.nn.pytorch.Cell`, and implemented based on :class:`~NDS`.
Its operator candidates are :attribute:`~NASNet.NASNET_OPS`.
It has 5 nodes per cell, and the output is concatenation of nodes not used as input to other nodes.
""" + _INIT_PARAMETER_DOCS
NASNET_OPS = [
'skip_connect',
'conv_3x1_1x3',
'conv_7x1_1x7',
'dil_conv_3x3',
'avg_pool_3x3',
'max_pool_3x3',
'max_pool_5x5',
'max_pool_7x7',
'conv_1x1',
'conv_3x3',
'sep_conv_3x3',
'sep_conv_5x5',
'sep_conv_7x7',
]
def __init__(self,
width: Union[Tuple[int], int] = (16, 24, 32),
num_cells: Union[Tuple[int], int] = (4, 8, 12, 16, 20),
dataset: Literal['cifar', 'imagenet'] = 'cifar',
auxiliary_loss: bool = False):
super().__init__(self.NASNET_OPS,
merge_op='loose_end',
num_nodes_per_cell=5,
width=width,
num_cells=num_cells,
dataset=dataset,
auxiliary_loss=auxiliary_loss)
@model_wrapper
class ENAS(NDS):
__doc__ = """Search space proposed in `Efficient neural architecture search via parameter sharing <https://arxiv.org/abs/1802.03268>`__.
It is built upon :class:`~nni.retiarii.nn.pytorch.Cell`, and implemented based on :class:`~NDS`.
Its operator candidates are :attribute:`~ENAS.ENAS_OPS`.
It has 5 nodes per cell, and the output is concatenation of nodes not used as input to other nodes.
""" + _INIT_PARAMETER_DOCS
ENAS_OPS = [
'skip_connect',
'sep_conv_3x3',
'sep_conv_5x5',
'avg_pool_3x3',
'max_pool_3x3',
]
def __init__(self,
width: Union[Tuple[int], int] = (16, 24, 32),
num_cells: Union[Tuple[int], int] = (4, 8, 12, 16, 20),
dataset: Literal['cifar', 'imagenet'] = 'cifar',
auxiliary_loss: bool = False):
super().__init__(self.ENAS_OPS,
merge_op='loose_end',
num_nodes_per_cell=5,
width=width,
num_cells=num_cells,
dataset=dataset,
auxiliary_loss=auxiliary_loss)
@model_wrapper
class AmoebaNet(NDS):
__doc__ = """Search space proposed in
`Regularized evolution for image classifier architecture search <https://arxiv.org/abs/1802.01548>`__.
It is built upon :class:`~nni.retiarii.nn.pytorch.Cell`, and implemented based on :class:`~NDS`.
Its operator candidates are :attribute:`~AmoebaNet.AMOEBA_OPS`.
It has 5 nodes per cell, and the output is concatenation of nodes not used as input to other nodes.
""" + _INIT_PARAMETER_DOCS
AMOEBA_OPS = [
'skip_connect',
'sep_conv_3x3',
'sep_conv_5x5',
'sep_conv_7x7',
'avg_pool_3x3',
'max_pool_3x3',
'dil_sep_conv_3x3',
'conv_7x1_1x7',
]
def __init__(self,
width: Union[Tuple[int], int] = (16, 24, 32),
num_cells: Union[Tuple[int], int] = (4, 8, 12, 16, 20),
dataset: Literal['cifar', 'imagenet'] = 'cifar',
auxiliary_loss: bool = False):
super().__init__(self.AMOEBA_OPS,
merge_op='loose_end',
num_nodes_per_cell=5,
width=width,
num_cells=num_cells,
dataset=dataset,
auxiliary_loss=auxiliary_loss)
@model_wrapper
class PNAS(NDS):
__doc__ = """Search space proposed in
`Progressive neural architecture search <https://arxiv.org/abs/1712.00559>`__.
It is built upon :class:`~nni.retiarii.nn.pytorch.Cell`, and implemented based on :class:`~NDS`.
Its operator candidates are :attribute:`~PNAS.PNAS_OPS`.
It has 5 nodes per cell, and the output is concatenation of all nodes in the cell.
""" + _INIT_PARAMETER_DOCS
PNAS_OPS = [
'sep_conv_3x3',
'sep_conv_5x5',
'sep_conv_7x7',
'conv_7x1_1x7',
'skip_connect',
'avg_pool_3x3',
'max_pool_3x3',
'dil_conv_3x3',
]
def __init__(self,
width: Union[Tuple[int], int] = (16, 24, 32),
num_cells: Union[Tuple[int], int] = (4, 8, 12, 16, 20),
dataset: Literal['cifar', 'imagenet'] = 'cifar',
auxiliary_loss: bool = False):
super().__init__(self.PNAS_OPS,
merge_op='all',
num_nodes_per_cell=5,
width=width,
num_cells=num_cells,
dataset=dataset,
auxiliary_loss=auxiliary_loss)
@model_wrapper
class DARTS(NDS):
__doc__ = """Search space proposed in `Darts: Differentiable architecture search <https://arxiv.org/abs/1806.09055>`__.
It is built upon :class:`~nni.retiarii.nn.pytorch.Cell`, and implemented based on :class:`~NDS`.
Its operator candidates are :attribute:`~DARTS.DARTS_OPS`.
It has 4 nodes per cell, and the output is concatenation of all nodes in the cell.
""" + _INIT_PARAMETER_DOCS
DARTS_OPS = [
'none',
'max_pool_3x3',
'avg_pool_3x3',
'skip_connect',
'sep_conv_3x3',
'sep_conv_5x5',
'dil_conv_3x3',
'dil_conv_5x5',
]
def __init__(self,
width: Union[Tuple[int], int] = (16, 24, 32),
num_cells: Union[Tuple[int], int] = (4, 8, 12, 16, 20),
dataset: Literal['cifar', 'imagenet'] = 'cifar',
auxiliary_loss: bool = False):
super().__init__(self.DARTS_OPS,
merge_op='all',
num_nodes_per_cell=4,
width=width,
num_cells=num_cells,
dataset=dataset,
auxiliary_loss=auxiliary_loss)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import math
from typing import Optional, Callable, List, Tuple
import torch
import nni.retiarii.nn.pytorch as nn
from nni.retiarii import model_wrapper
def make_divisible(v, divisor, min_val=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
"""
if min_val is None:
min_val = divisor
# This should work for both value choices and constants.
new_v = nn.ValueChoice.max(min_val, round(v + divisor // 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
return nn.ValueChoice.condition(new_v < 0.9 * v, new_v + divisor, new_v)
class ConvBNReLU(nn.Sequential):
"""
The template for a conv-bn-relu block.
"""
def __init__(
self,
in_channels: int,
out_channels: int,
kernel_size: int = 3,
stride: int = 1,
groups: int = 1,
norm_layer: Optional[Callable[..., nn.Module]] = None,
activation_layer: Optional[Callable[..., nn.Module]] = None,
dilation: int = 1,
) -> None:
padding = (kernel_size - 1) // 2 * dilation
if norm_layer is None:
norm_layer = nn.BatchNorm2d
if activation_layer is None:
activation_layer = nn.ReLU6
super().__init__(
nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation=dilation, groups=groups,
bias=False),
norm_layer(out_channels),
activation_layer(inplace=True)
)
self.out_channels = out_channels
class SeparableConv(nn.Sequential):
"""
In the original MobileNetV2 implementation, this is InvertedResidual when expand ratio = 1.
Residual connection is added if input and output shape are the same.
"""
def __init__(
self,
in_channels: int,
out_channels: int,
kernel_size: int = 3,
stride: int = 1,
norm_layer: Optional[Callable[..., nn.Module]] = None,
activation_layer: Optional[Callable[..., nn.Module]] = None,
) -> None:
super().__init__(
# dw
ConvBNReLU(in_channels, in_channels, stride=stride, kernel_size=kernel_size, groups=in_channels,
norm_layer=norm_layer, activation_layer=activation_layer),
# pw-linear
ConvBNReLU(in_channels, out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity)
)
self.residual_connection = stride == 1 and in_channels == out_channels
def forward(self, x: torch.Tensor) -> torch.Tensor:
if self.residual_connection:
return x + super().forward(x)
else:
return super().forward(x)
class InvertedResidual(nn.Sequential):
"""
An Inverted Residual Block, sometimes called an MBConv Block, is a type of residual block used for image models
that uses an inverted structure for efficiency reasons.
It was originally proposed for the `MobileNetV2 <https://arxiv.org/abs/1801.04381>`__ CNN architecture.
It has since been reused for several mobile-optimized CNNs.
It follows a narrow -> wide -> narrow approach, hence the inversion.
It first widens with a 1x1 convolution, then uses a 3x3 depthwise convolution (which greatly reduces the number of parameters),
then a 1x1 convolution is used to reduce the number of channels so input and output can be added.
Follow implementation of:
https://github.com/google-research/google-research/blob/20736344591f774f4b1570af64624ed1e18d2867/tunas/rematlib/mobile_model_v3.py#L453
"""
def __init__(
self,
in_channels: int,
out_channels: int,
expand_ratio: int,
kernel_size: int = 3,
stride: int = 1,
squeeze_and_excite: Optional[Callable[[int], nn.Module]] = None,
norm_layer: Optional[Callable[..., nn.Module]] = None,
activation_layer: Optional[Callable[..., nn.Module]] = None,
) -> None:
super().__init__()
self.stride = stride
self.out_channels = out_channels
assert stride in [1, 2]
hidden_ch = nn.ValueChoice.to_int(round(in_channels * expand_ratio))
# FIXME: check whether this equal works
# Residual connection is added here stride = 1 and input channels and output channels are the same.
self.residual_connection = stride == 1 and in_channels == out_channels
layers: List[nn.Module] = [
# point-wise convolution
# NOTE: some paper omit this point-wise convolution when stride = 1.
# In our implementation, if this pw convolution is intended to be omitted,
# please use SepConv instead.
ConvBNReLU(in_channels, hidden_ch, kernel_size=1,
norm_layer=norm_layer, activation_layer=activation_layer),
# depth-wise
ConvBNReLU(hidden_ch, hidden_ch, stride=stride, kernel_size=kernel_size, groups=hidden_ch,
norm_layer=norm_layer, activation_layer=activation_layer)
]
if squeeze_and_excite:
layers.append(squeeze_and_excite(hidden_ch))
layers += [
# pw-linear
ConvBNReLU(hidden_ch, out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity)
]
super().__init__(*layers)
def forward(self, x: torch.Tensor) -> torch.Tensor:
if self.residual_connection:
return x + super().forward(x)
else:
return super().forward(x)
def inverted_residual_choice_builder(
expand_ratios: List[int],
kernel_sizes: List[int],
downsample: bool,
stage_input_width: int,
stage_output_width: int,
label: str
):
def builder(index):
stride = 1
inp = stage_output_width
if index == 0:
# first layer in stage
# do downsample and width reshape
inp = stage_input_width
if downsample:
stride = 2
oup = stage_output_width
op_choices = {}
for exp_ratio in expand_ratios:
for kernel_size in kernel_sizes:
op_choices[f'k{kernel_size}e{exp_ratio}'] = InvertedResidual(inp, oup, exp_ratio, kernel_size, stride)
# It can be implemented with ValueChoice, but we use LayerChoice here
# to be aligned with the intention of the original ProxylessNAS.
return nn.LayerChoice(op_choices, label=f'{label}_i{index}')
return builder
@model_wrapper
class ProxylessNAS(nn.Module):
"""
The search space proposed by `ProxylessNAS <https://arxiv.org/abs/1812.00332>`__.
Following the official implementation, the inverted residual with kernel size / expand ratio variations in each layer
is implemented with a :class:`nn.LayerChoice` with all-combination candidates. That means,
when used in weight sharing, these candidates will be treated as separate layers, and won't be fine-grained shared.
We note that ``MobileNetV3Space`` is different in this perspective.
"""
def __init__(self, num_labels: int = 1000,
base_widths: Tuple[int, ...] = (32, 16, 32, 40, 80, 96, 192, 320, 1280),
dropout_rate: float = 0.,
width_mult: float = 1.0,
bn_eps: float = 1e-3,
bn_momentum: float = 0.1):
super().__init__()
assert len(base_widths) == 9
# include the last stage info widths here
widths = [make_divisible(width * width_mult, 8) for width in base_widths]
downsamples = [True, False, True, True, True, False, True, False]
self.num_labels = num_labels
self.dropout_rate = dropout_rate
self.bn_eps = bn_eps
self.bn_momentum = bn_momentum
self.first_conv = ConvBNReLU(3, widths[0], stride=2, norm_layer=nn.BatchNorm2d)
blocks = [
# first stage is fixed
SeparableConv(widths[0], widths[1], kernel_size=3, stride=1)
]
# https://github.com/ultmaster/AceNAS/blob/46c8895fd8a05ffbc61a6b44f1e813f64b4f66b7/searchspace/proxylessnas/__init__.py#L21
for stage in range(2, 8):
# Rather than returning a fixed module here,
# we return a builder that dynamically creates module for different `repeat_idx`.
builder = inverted_residual_choice_builder(
[3, 6], [3, 5, 7], downsamples[stage], widths[stage - 1], widths[stage], f's{stage}')
if stage < 6:
blocks.append(nn.Repeat(builder, (1, 4), label=f's{stage}_depth'))
else:
# No mutation for depth in the last stage.
# Directly call builder to initiate one block
blocks.append(builder(0))
self.blocks = nn.Sequential(*blocks)
# final layers
self.feature_mix_layer = ConvBNReLU(widths[7], widths[8], kernel_size=1, norm_layer=nn.BatchNorm2d)
self.global_avg_pooling = nn.AdaptiveAvgPool2d(1)
self.dropout_layer = nn.Dropout(dropout_rate)
self.classifier = nn.Linear(widths[-1], num_labels)
reset_parameters(self, bn_momentum=bn_momentum, bn_eps=bn_eps)
def forward(self, x):
x = self.first_conv(x)
x = self.blocks(x)
x = self.feature_mix_layer(x)
x = self.global_avg_pooling(x)
x = x.view(x.size(0), -1) # flatten
x = self.dropout_layer(x)
x = self.classifier(x)
return x
def no_weight_decay(self):
# this is useful for timm optimizer
# no regularizer to linear layer
if hasattr(self, 'classifier'):
return {'classifier.weight', 'classifier.bias'}
return set()
def reset_parameters(model, model_init='he_fout', init_div_groups=False,
bn_momentum=0.1, bn_eps=1e-5):
for m in model.modules():
if isinstance(m, nn.Conv2d):
if model_init == 'he_fout':
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
if init_div_groups:
n /= m.groups
m.weight.data.normal_(0, math.sqrt(2. / n))
elif model_init == 'he_fin':
n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels
if init_div_groups:
n /= m.groups
m.weight.data.normal_(0, math.sqrt(2. / n))
else:
raise NotImplementedError
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
m.momentum = bn_momentum
m.eps = bn_eps
elif isinstance(m, nn.Linear):
m.weight.data.normal_(0, 0.01)
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm1d):
m.weight.data.fill_(1)
m.bias.data.zero_()
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import torch
import nni.retiarii.nn.pytorch as nn
from nni.retiarii import model_wrapper
class ShuffleNetBlock(nn.Module):
"""
Describe the basic building block of shuffle net, as described in
`ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices <https://arxiv.org/pdf/1707.01083.pdf>`__.
When stride = 1, the block expects an input with ``2 * input channels``. Otherwise input channels.
"""
def __init__(self, in_channels: int, out_channels: int, mid_channels: int, *,
kernel_size: int, stride: int, sequence: str = "pdp", affine: bool = True):
super().__init__()
assert stride in [1, 2]
assert kernel_size in [3, 5, 7]
self.channels = in_channels // 2 if stride == 1 else in_channels
self.in_channels = in_channels
self.out_channels = out_channels
self.mid_channels = mid_channels
self.kernel_size = kernel_size
self.stride = stride
self.pad = kernel_size // 2
self.oup_main = out_channels - self.channels
self.affine = affine
assert self.oup_main > 0
self.branch_main = nn.Sequential(*self._decode_point_depth_conv(sequence))
if stride == 2:
self.branch_proj = nn.Sequential(
# dw
nn.Conv2d(self.channels, self.channels, kernel_size, stride, self.pad,
groups=self.channels, bias=False),
nn.BatchNorm2d(self.channels, affine=affine),
# pw-linear
nn.Conv2d(self.channels, self.channels, 1, 1, 0, bias=False),
nn.BatchNorm2d(self.channels, affine=affine),
nn.ReLU(inplace=True)
)
else:
# empty block to be compatible with torchscript
self.branch_proj = nn.Sequential()
def forward(self, x):
if self.stride == 2:
x_proj, x = self.branch_proj(x), x
else:
x_proj, x = self._channel_shuffle(x)
return torch.cat((x_proj, self.branch_main(x)), 1)
def _decode_point_depth_conv(self, sequence):
result = []
first_depth = first_point = True
pc = c = self.channels
for i, token in enumerate(sequence):
# compute output channels of this conv
if i + 1 == len(sequence):
assert token == "p", "Last conv must be point-wise conv."
c = self.oup_main
elif token == "p" and first_point:
c = self.mid_channels
if token == "d":
# depth-wise conv
if isinstance(pc, int) and isinstance(c, int):
# check can only be done for static channels
assert pc == c, "Depth-wise conv must not change channels."
result.append(nn.Conv2d(pc, c, self.kernel_size, self.stride if first_depth else 1, self.pad,
groups=c, bias=False))
result.append(nn.BatchNorm2d(c, affine=self.affine))
first_depth = False
elif token == "p":
# point-wise conv
result.append(nn.Conv2d(pc, c, 1, 1, 0, bias=False))
result.append(nn.BatchNorm2d(c, affine=self.affine))
result.append(nn.ReLU(inplace=True))
first_point = False
else:
raise ValueError("Conv sequence must be d and p.")
pc = c
return result
def _channel_shuffle(self, x):
bs, num_channels, height, width = x.size()
# NOTE: this line is commented for torchscript
# assert (num_channels % 4 == 0)
x = x.reshape(bs * num_channels // 2, 2, height * width)
x = x.permute(1, 0, 2)
x = x.reshape(2, -1, num_channels // 2, height, width)
return x[0], x[1]
class ShuffleXceptionBlock(ShuffleNetBlock):
"""
The ``choice_x`` version of shuffle net block, described in
`Single Path One-shot <https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123610528.pdf>`__.
"""
def __init__(self, in_channels: int, out_channels: int, mid_channels: int, *, stride: int, affine: bool = True):
super().__init__(in_channels, out_channels, mid_channels,
kernel_size=3, stride=stride, sequence="dpdpdp", affine=affine)
@model_wrapper
class ShuffleNetSpace(nn.Module):
"""
The search space proposed in `Single Path One-shot <https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123610528.pdf>`__.
The basic building block design is inspired by a state-of-the-art manually-designed network --
`ShuffleNetV2 <https://openaccess.thecvf.com/content_ECCV_2018/html/Ningning_Light-weight_CNN_Architecture_ECCV_2018_paper.html>`__.
There are 20 choice blocks in total. Each choice block has 4 candidates, namely ``choice 3``, ``choice 5``,
``choice_7`` and ``choice_x`` respectively. They differ in kernel sizes and the number of depthwise convolutions.
The size of the search space is :math:`4^{20}`.
Parameters
----------
num_labels : int
Number of classes for the classification head. Default: 1000.
channel_search : bool
If true, for each building block, the number of ``mid_channels``
(output channels of the first 1x1 conv in each building block) varies from 0.2x to 1.6x (quantized to multiple of 0.2).
Here, "k-x" means k times the number of default channels.
Otherwise, 1.0x is used by default. Default: false.
affine : bool
Apply affine to all batch norm. Default: false.
"""
def __init__(self,
num_labels: int = 1000,
channel_search: bool = False,
affine: bool = False):
super().__init__()
self.num_labels = num_labels
self.channel_search = channel_search
self.affine = affine
# the block number in each stage. 4 stages in total. 20 blocks in total.
self.stage_repeats = [4, 4, 8, 4]
# output channels for all stages, including the very first layer and the very last layer
self.stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024]
# building first layer
out_channels = self.stage_out_channels[1]
self.first_conv = nn.Sequential(
nn.Conv2d(3, out_channels, 3, 2, 1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
)
self.features = []
global_block_idx = 0
for stage_idx, num_repeat in enumerate(self.stage_repeats):
for block_idx in range(num_repeat):
# count global index to give names to choices
global_block_idx += 1
# get ready for input and output
in_channels = out_channels
out_channels = self.stage_out_channels[stage_idx + 2]
stride = 2 if block_idx == 0 else 1
# mid channels can be searched
base_mid_channels = out_channels // 2
if self.channel_search:
k_choice_list = [int(base_mid_channels * (.2 * k)) for k in range(1, 9)]
mid_channels = nn.ValueChoice(k_choice_list, label=f'channel_{global_block_idx}')
else:
mid_channels = int(base_mid_channels)
choice_block = nn.LayerChoice([
ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=3, stride=stride, affine=affine),
ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=5, stride=stride, affine=affine),
ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=7, stride=stride, affine=affine),
ShuffleXceptionBlock(in_channels, out_channels, mid_channels=mid_channels, stride=stride, affine=affine)
], label=f'layer_{global_block_idx}')
self.features.append(choice_block)
self.features = nn.Sequential(*self.features)
# final layers
last_conv_channels = self.stage_out_channels[-1]
self.conv_last = nn.Sequential(
nn.Conv2d(out_channels, last_conv_channels, 1, 1, 0, bias=False),
nn.BatchNorm2d(last_conv_channels, affine=affine),
nn.ReLU(inplace=True),
)
self.globalpool = nn.AdaptiveAvgPool2d((1, 1))
self.dropout = nn.Dropout(0.1)
self.classifier = nn.Sequential(
nn.Linear(last_conv_channels, num_labels, bias=False),
)
self._initialize_weights()
def forward(self, x):
x = self.first_conv(x)
x = self.features(x)
x = self.conv_last(x)
x = self.globalpool(x)
x = self.dropout(x)
x = x.contiguous().view(-1, self.stage_out_channels[-1])
x = self.classifier(x)
return x
def _initialize_weights(self):
for name, m in self.named_modules():
if isinstance(m, nn.Conv2d):
if 'first' in name:
torch.nn.init.normal_(m.weight, 0, 0.01)
else:
torch.nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1])
if m.bias is not None:
torch.nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
if m.weight is not None:
torch.nn.init.constant_(m.weight, 1)
if m.bias is not None:
torch.nn.init.constant_(m.bias, 0.0001)
torch.nn.init.constant_(m.running_mean, 0)
elif isinstance(m, nn.BatchNorm1d):
if m.weight is not None:
torch.nn.init.constant_(m.weight, 1)
if m.bias is not None:
torch.nn.init.constant_(m.bias, 0.0001)
torch.nn.init.constant_(m.running_mean, 0)
elif isinstance(m, nn.Linear):
torch.nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None:
torch.nn.init.constant_(m.bias, 0)
...@@ -388,7 +388,7 @@ def _valuechoice_staticmethod_helper(orig_func): ...@@ -388,7 +388,7 @@ def _valuechoice_staticmethod_helper(orig_func):
return orig_func return orig_func
class ValueChoiceX(Translatable): class ValueChoiceX(Translatable, nn.Module):
"""Internal API. Implementation note: """Internal API. Implementation note:
The transformed (X) version of value choice. The transformed (X) version of value choice.
...@@ -404,6 +404,8 @@ class ValueChoiceX(Translatable): ...@@ -404,6 +404,8 @@ class ValueChoiceX(Translatable):
2. For graph-engine, it uses evaluate to calculate the result. 2. For graph-engine, it uses evaluate to calculate the result.
Potentially, we have to implement the evaluation logic in oneshot algorithms. I believe we can postpone the discussion till then. Potentially, we have to implement the evaluation logic in oneshot algorithms. I believe we can postpone the discussion till then.
This class is implemented as a ``nn.Module`` so that it can be scanned by python engine / torchscript.
""" """
def __init__(self, function: Callable[..., Any], repr_template: str, arguments: List[Any], dry_run: bool = True): def __init__(self, function: Callable[..., Any], repr_template: str, arguments: List[Any], dry_run: bool = True):
...@@ -424,6 +426,9 @@ class ValueChoiceX(Translatable): ...@@ -424,6 +426,9 @@ class ValueChoiceX(Translatable):
# for sanity check # for sanity check
self.dry_run() self.dry_run()
def forward(self) -> None:
raise RuntimeError('You should never call forward of the composition of a value-choice.')
def inner_choices(self) -> Iterable['ValueChoice']: def inner_choices(self) -> Iterable['ValueChoice']:
""" """
Return an iterable of all leaf value choices. Return an iterable of all leaf value choices.
......
...@@ -93,6 +93,8 @@ class Repeat(Mutable): ...@@ -93,6 +93,8 @@ class Repeat(Mutable):
depth: Union[int, Tuple[int, int]], *, label: Optional[str] = None): depth: Union[int, Tuple[int, int]], *, label: Optional[str] = None):
super().__init__() super().__init__()
self._label = None # by default, no label
if isinstance(depth, ValueChoiceX): if isinstance(depth, ValueChoiceX):
if label is not None: if label is not None:
warnings.warn( warnings.warn(
...@@ -103,10 +105,16 @@ class Repeat(Mutable): ...@@ -103,10 +105,16 @@ class Repeat(Mutable):
all_values = list(self.depth_choice.all_options()) all_values = list(self.depth_choice.all_options())
self.min_depth = min(all_values) self.min_depth = min(all_values)
self.max_depth = max(all_values) self.max_depth = max(all_values)
if isinstance(depth, ValueChoice):
self._label = depth.label # if a leaf node
elif isinstance(depth, tuple): elif isinstance(depth, tuple):
self.min_depth = depth if isinstance(depth, int) else depth[0] self.min_depth = depth if isinstance(depth, int) else depth[0]
self.max_depth = depth if isinstance(depth, int) else depth[1] self.max_depth = depth if isinstance(depth, int) else depth[1]
self.depth_choice = ValueChoice(list(range(self.min_depth, self.max_depth + 1)), label=label) self.depth_choice = ValueChoice(list(range(self.min_depth, self.max_depth + 1)), label=label)
self._label = self.depth_choice.label
elif isinstance(depth, int): elif isinstance(depth, int):
self.min_depth = self.max_depth = depth self.min_depth = self.max_depth = depth
self.depth_choice = depth self.depth_choice = depth
...@@ -116,8 +124,8 @@ class Repeat(Mutable): ...@@ -116,8 +124,8 @@ class Repeat(Mutable):
self.blocks = nn.ModuleList(self._replicate_and_instantiate(blocks, self.max_depth)) self.blocks = nn.ModuleList(self._replicate_and_instantiate(blocks, self.max_depth))
@property @property
def label(self): def label(self) -> Optional[str]:
return self.depth_choice.label return self._label
def forward(self, x): def forward(self, x):
for block in self.blocks: for block in self.blocks:
...@@ -142,6 +150,9 @@ class Repeat(Mutable): ...@@ -142,6 +150,9 @@ class Repeat(Mutable):
# shortcut for blocks[index] # shortcut for blocks[index]
return self.blocks[index] return self.blocks[index]
def __len__(self):
return self.max_depth
class NasBench201Cell(nn.Module): class NasBench201Cell(nn.Module):
""" """
......
...@@ -311,9 +311,10 @@ def extract_mutation_from_pt_module(pytorch_model: nn.Module) -> Tuple[Model, Op ...@@ -311,9 +311,10 @@ def extract_mutation_from_pt_module(pytorch_model: nn.Module) -> Tuple[Model, Op
node = graph.add_node(name, 'InputChoice', node = graph.add_node(name, 'InputChoice',
{'n_candidates': module.n_candidates, 'n_chosen': module.n_chosen}) {'n_candidates': module.n_candidates, 'n_chosen': module.n_chosen})
node.label = module.label node.label = module.label
if isinstance(module, ValueChoice): if isinstance(module, ValueChoiceX):
node = graph.add_node(name, 'ValueChoice', {'candidates': module.candidates}) for i, choice in enumerate(module.inner_choices()):
node.label = module.label node = graph.add_node(f'{name}.{i}', 'ValueChoice', {'candidates': choice.candidates})
node.label = choice.label
if isinstance(module, NasBench101Cell): if isinstance(module, NasBench101Cell):
node = graph.add_node(name, 'NasBench101Cell', { node = graph.add_node(name, 'NasBench101Cell', {
'max_num_edges': module.max_num_edges 'max_num_edges': module.max_num_edges
......
...@@ -683,6 +683,27 @@ class GraphIR(unittest.TestCase): ...@@ -683,6 +683,27 @@ class GraphIR(unittest.TestCase):
new_model = _apply_all_mutators(model, mutators, samplers) new_model = _apply_all_mutators(model, mutators, samplers)
self.assertTrue((self._get_converted_pytorch_model(new_model)(torch.zeros(1, 16)) == target).all()) self.assertTrue((self._get_converted_pytorch_model(new_model)(torch.zeros(1, 16)) == target).all())
def test_repeat_valuechoicex(self):
class AddOne(nn.Module):
def forward(self, x):
return x + 1
@model_wrapper
class Net(nn.Module):
def __init__(self):
super().__init__()
self.block = nn.Repeat(AddOne(), nn.ValueChoice([0, 2, 4]) + 1)
def forward(self, x):
return self.block(x)
model, mutators = self._get_model_with_mutators(Net())
self.assertEqual(len(mutators), 1 + self.repeat_incr + self.value_choice_incr)
samplers = [EnumerateSampler() for _ in range(len(mutators))]
for target in [1, 3, 5]:
new_model = _apply_all_mutators(model, mutators, samplers)
self.assertTrue((self._get_converted_pytorch_model(new_model)(torch.zeros(1, 16)) == target).all())
def test_repeat_weight_inheritance(self): def test_repeat_weight_inheritance(self):
@model_wrapper @model_wrapper
class Net(nn.Module): class Net(nn.Module):
......
"""Currently, this is only a sanity-check (runnable) of spaces provided in hub."""
import random
from torchvision import transforms
from torchvision.datasets import FakeData
import pytest
import pytorch_lightning
import nni
import nni.runtime.platform.test
import nni.retiarii.evaluator.pytorch.lightning as pl
import nni.retiarii.hub.pytorch as searchspace
from nni.retiarii.utils import ContextStack
from nni.retiarii.execution.utils import _unpack_if_only_one
from nni.retiarii.mutator import InvalidMutation, Sampler
from nni.retiarii.nn.pytorch.mutator import extract_mutation_from_pt_module
pytestmark = pytest.mark.skipif(pytorch_lightning.__version__ < '1.0', reason='Incompatible APIs.')
def _reset():
# this is to not affect other tests in sdk
nni.trial._intermediate_seq = 0
nni.trial._params = {'foo': 'bar', 'parameter_id': 0}
nni.runtime.platform.test._last_metric = None
class RandomSampler(Sampler):
def __init__(self):
self.counter = 0
def choice(self, candidates, *args, **kwargs):
self.counter += 1
return random.choice(candidates)
def try_mutation_until_success(base_model, mutators, retry):
if not retry:
raise ValueError('Retry exhausted.')
try:
model = base_model
for mutator in mutators:
model = mutator.bind_sampler(RandomSampler()).apply(model)
return model
except InvalidMutation:
return try_mutation_until_success(base_model, mutators, retry - 1)
def _test_searchspace_on_dataset(searchspace, dataset='cifar10', arch=None):
_reset()
# dataset supports cifar10 and imagenet
model, mutators = extract_mutation_from_pt_module(searchspace)
if arch is None:
model = try_mutation_until_success(model, mutators, 10)
arch = {mut.mutator.label: _unpack_if_only_one(mut.samples) for mut in model.history}
print('Selected model:', arch)
with ContextStack('fixed', arch):
model = model.python_class(**model.python_init_params)
if dataset == 'cifar10':
train_data = FakeData(size=200, image_size=(3, 32, 32), num_classes=10, transform=transforms.ToTensor())
valid_data = FakeData(size=200, image_size=(3, 32, 32), num_classes=10, transform=transforms.ToTensor())
elif dataset == 'imagenet':
train_data = FakeData(size=200, image_size=(3, 224, 224), num_classes=1000, transform=transforms.ToTensor())
valid_data = FakeData(size=200, image_size=(3, 224, 224), num_classes=1000, transform=transforms.ToTensor())
train_dataloader = pl.DataLoader(train_data, batch_size=4, shuffle=True)
valid_dataloader = pl.DataLoader(valid_data, batch_size=6)
evaluator = pl.Classification(
train_dataloader=train_dataloader,
val_dataloaders=valid_dataloader,
export_onnx=False,
max_epochs=1,
limit_train_batches=2,
limit_val_batches=3,
)
evaluator.fit(model)
# cleanup to avoid affecting later test cases
_reset()
def test_nasbench101():
ss = searchspace.NasBench101()
_test_searchspace_on_dataset(ss)
def test_nasbench201():
ss = searchspace.NasBench101()
_test_searchspace_on_dataset(ss)
def test_nasnet():
_test_searchspace_on_dataset(searchspace.NASNet())
_test_searchspace_on_dataset(searchspace.ENAS())
_test_searchspace_on_dataset(searchspace.AmoebaNet())
_test_searchspace_on_dataset(searchspace.PNAS())
_test_searchspace_on_dataset(searchspace.DARTS())
def test_nasnet_corner_case():
# The case is that output channel of reduce cell and normal cell are different
# CellPreprocessor needs to know whether its predecessors are normal cell / reduction cell
arch = {
"width": 32,
"depth": 8,
"normal/op_2_0": "max_pool_7x7",
"normal/op_2_1": "conv_1x1",
"normal/op_3_0": "sep_conv_5x5",
"normal/op_3_1": "max_pool_7x7",
"normal/op_4_0": "sep_conv_5x5",
"normal/op_4_1": "conv_1x1",
"normal/op_5_0": "max_pool_3x3",
"normal/op_5_1": "sep_conv_5x5",
"normal/op_6_0": "max_pool_7x7",
"normal/op_6_1": "sep_conv_5x5",
"normal/input_2_0": 0,
"normal/input_2_1": 0,
"normal/input_3_0": 0,
"normal/input_3_1": 1,
"normal/input_4_0": 1,
"normal/input_4_1": 2,
"normal/input_5_0": 0,
"normal/input_5_1": 1,
"normal/input_6_0": 0,
"normal/input_6_1": 2,
"reduce/op_2_0": "dil_conv_3x3",
"reduce/op_2_1": "max_pool_7x7",
"reduce/op_3_0": "dil_conv_3x3",
"reduce/op_3_1": "dil_conv_3x3",
"reduce/op_4_0": "conv_7x1_1x7",
"reduce/op_4_1": "conv_7x1_1x7",
"reduce/op_5_0": "max_pool_3x3",
"reduce/op_5_1": "conv_1x1",
"reduce/op_6_0": "sep_conv_7x7",
"reduce/op_6_1": "sep_conv_3x3",
"reduce/input_2_0": 1,
"reduce/input_2_1": 1,
"reduce/input_3_0": 0,
"reduce/input_3_1": 1,
"reduce/input_4_0": 2,
"reduce/input_4_1": 1,
"reduce/input_5_0": 0,
"reduce/input_5_1": 4,
"reduce/input_6_0": 3,
"reduce/input_6_1": 3,
}
_test_searchspace_on_dataset(searchspace.NASNet(), arch=arch)
def test_nasnet_fixwd():
# minimum
ss = searchspace.DARTS(width=16, num_cells=4)
_test_searchspace_on_dataset(ss)
# medium
ss = searchspace.NASNet(width=16, num_cells=12)
_test_searchspace_on_dataset(ss)
def test_nasnet_imagenet():
ss = searchspace.ENAS(dataset='imagenet')
_test_searchspace_on_dataset(ss, dataset='imagenet')
ss = searchspace.PNAS(dataset='imagenet')
_test_searchspace_on_dataset(ss, dataset='imagenet')
def test_proxylessnas():
ss = searchspace.ProxylessNAS()
_test_searchspace_on_dataset(ss, dataset='imagenet')
def test_mobilenetv3():
ss = searchspace.MobileNetV3Space()
_test_searchspace_on_dataset(ss, dataset='imagenet')
def test_shufflenet():
ss = searchspace.ShuffleNetSpace()
_test_searchspace_on_dataset(ss, dataset='imagenet')
ss = searchspace.ShuffleNetSpace(channel_search=True)
_test_searchspace_on_dataset(ss, dataset='imagenet')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment