Unverified Commit e8dc4562 authored by Yuge Zhang's avatar Yuge Zhang Committed by GitHub
Browse files

Searched model zoo in NAS space hub (#4831)

parent fe89e5af
...@@ -18,6 +18,7 @@ schema ...@@ -18,6 +18,7 @@ schema
scikit-learn >= 0.24.1 scikit-learn >= 0.24.1
scipy < 1.8 ; python_version < "3.8" scipy < 1.8 ; python_version < "3.8"
scipy ; python_version >= "3.8" scipy ; python_version >= "3.8"
tqdm
typeguard typeguard
typing_extensions >= 4.0.0 typing_extensions >= 4.0.0
websockets >= 10.1 websockets >= 10.1
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import logging
import hashlib
import os
import shutil
import subprocess
import tempfile
from pathlib import Path
from typing import Optional
import requests
import tqdm
__all__ = ['NNI_BLOB', 'load_or_download_file', 'upload_file', 'nni_cache_home']
# Blob that contains some downloadable files.
NNI_BLOB = 'https://nni.blob.core.windows.net'
# Override these environment vars to move your cache.
ENV_NNI_HOME = 'NNI_HOME'
ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
DEFAULT_CACHE_DIR = '~/.cache'
def nni_cache_home() -> str:
return os.path.expanduser(
os.getenv(ENV_NNI_HOME,
os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'nni')))
def load_or_download_file(local_path: str, download_url: str, download: bool = False, progress: bool = True) -> None:
"""Download the ``download_url`` to ``local_path``, and check its hash.
If ``local_path`` already exists, and hash is checked, do nothing.
"""
f = None
hash_prefix = Path(local_path).stem.split('-')[-1]
_logger = logging.getLogger(__name__)
try:
sha256 = hashlib.sha256()
if Path(local_path).exists():
_logger.info('"%s" already exists. Checking hash.', local_path)
with Path(local_path).open('rb') as fr:
while True:
chunk = fr.read(8192)
if len(chunk) == 0:
break
sha256.update(chunk)
elif download:
_logger.info('"%s" does not exist. Downloading "%s"', local_path, download_url)
# Follow download implementation in torchvision:
# We deliberately save it in a temp file and move it after
# download is complete. This prevents a local working checkpoint
# being overridden by a broken download.
dst_dir = Path(local_path).parent
dst_dir.mkdir(exist_ok=True, parents=True)
f = tempfile.NamedTemporaryFile(delete=False, dir=dst_dir)
r = requests.get(download_url, stream=True)
total_length: Optional[str] = r.headers.get('content-length')
assert total_length is not None, f'Content length is not found in the response of {download_url}'
with tqdm.tqdm(total=int(total_length), disable=not progress,
unit='B', unit_scale=True, unit_divisor=1024) as pbar:
for chunk in r.iter_content(8192):
f.write(chunk)
sha256.update(chunk)
pbar.update(len(chunk))
f.flush()
else:
raise FileNotFoundError(
'Download is not enabled, and file does not exist: {}. Please set download=True.'.format(local_path)
)
digest = sha256.hexdigest()
if not digest.startswith(hash_prefix):
raise RuntimeError('Invalid hash value (expected "{}", got "{}")'.format(hash_prefix, digest))
if f is not None:
shutil.move(f.name, local_path)
finally:
if f is not None:
f.close()
if os.path.exists(f.name):
os.remove(f.name)
def upload_file(local_path: str, destination_path: str, sas_token: str) -> str:
"""For NNI maintainers to add updated static files to the Azure blob easily.
In most cases, you don't need to calculate the hash on your own, it will be automatically inserted.
For example, if you write ``https://xxx.com/myfile.zip``, the uploaded file will look like
``https://xxx.com/myfile-da5f43b7.zip``.
Need to have `azcopy installed <https://docs.microsoft.com/en-us/azure/storage/common/storage-ref-azcopy>`_,
and a SAS token for the destination storage (``?`` should be included as prefix of token).
Returns a string which is the uploaded path.
"""
_logger = logging.getLogger(__name__)
sha256 = hashlib.sha256()
with Path(local_path).open('rb') as fr:
while True:
chunk = fr.read(8192)
if len(chunk) == 0:
break
sha256.update(chunk)
digest = sha256.hexdigest()
hash_prefix = digest[:8]
_logger.info('Hash of %s is %s', local_path, digest)
stem, suffix = destination_path.rsplit('.', 1)
if not stem.endswith('-' + hash_prefix):
destination_path = stem + '-' + hash_prefix + '.' + suffix
subprocess.run(['azcopy', 'copy', local_path, destination_path + sas_token], check=True)
return destination_path
...@@ -3,24 +3,21 @@ ...@@ -3,24 +3,21 @@
import os import os
from nni.common.blob_utils import NNI_BLOB, nni_cache_home
ENV_NASBENCHMARK_DIR = 'NASBENCHMARK_DIR' ENV_NASBENCHMARK_DIR = 'NASBENCHMARK_DIR'
ENV_NNI_HOME = 'NNI_HOME'
ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
DEFAULT_CACHE_DIR = '~/.cache'
def _get_nasbenchmark_dir(): def _get_nasbenchmark_dir():
nni_home = os.path.expanduser( nni_home = nni_cache_home()
os.getenv(ENV_NNI_HOME,
os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'nni')))
return os.getenv(ENV_NASBENCHMARK_DIR, os.path.join(nni_home, 'nasbenchmark')) return os.getenv(ENV_NASBENCHMARK_DIR, os.path.join(nni_home, 'nasbenchmark'))
DATABASE_DIR = _get_nasbenchmark_dir() DATABASE_DIR = _get_nasbenchmark_dir()
DB_URLS = { DB_URLS = {
'nasbench101': 'https://nni.blob.core.windows.net/nasbenchmark/nasbench101-209f5694.db', 'nasbench101': f'{NNI_BLOB}/nasbenchmark/nasbench101-209f5694.db',
'nasbench201': 'https://nni.blob.core.windows.net/nasbenchmark/nasbench201-b2b60732.db', 'nasbench201': f'{NNI_BLOB}/nasbenchmark/nasbench201-b2b60732.db',
'nds': 'https://nni.blob.core.windows.net/nasbenchmark/nds-5745c235.db' 'nds': f'{NNI_BLOB}/nasbenchmark/nds-5745c235.db'
} }
...@@ -2,19 +2,13 @@ ...@@ -2,19 +2,13 @@
# Licensed under the MIT license. # Licensed under the MIT license.
import functools import functools
import hashlib
import json import json
import logging
import os import os
import shutil
import tempfile
from pathlib import Path
from typing import Optional
import requests
import tqdm
from playhouse.sqlite_ext import SqliteExtDatabase from playhouse.sqlite_ext import SqliteExtDatabase
from nni.common.blob_utils import load_or_download_file
from .constants import DB_URLS, DATABASE_DIR from .constants import DB_URLS, DATABASE_DIR
...@@ -24,60 +18,6 @@ json_dumps = functools.partial(json.dumps, sort_keys=True) ...@@ -24,60 +18,6 @@ json_dumps = functools.partial(json.dumps, sort_keys=True)
_loaded_benchmarks = {} _loaded_benchmarks = {}
def load_or_download_file(local_path: str, download_url: str, download: bool = False, progress: bool = True):
f = None
hash_prefix = Path(local_path).stem.split('-')[-1]
_logger = logging.getLogger(__name__)
try:
sha256 = hashlib.sha256()
if Path(local_path).exists():
_logger.info('"%s" already exists. Checking hash.', local_path)
with Path(local_path).open('rb') as fr:
while True:
chunk = fr.read(8192)
if len(chunk) == 0:
break
sha256.update(chunk)
elif download:
_logger.info('"%s" does not exist. Downloading "%s"', local_path, download_url)
# Follow download implementation in torchvision:
# We deliberately save it in a temp file and move it after
# download is complete. This prevents a local working checkpoint
# being overridden by a broken download.
dst_dir = Path(local_path).parent
dst_dir.mkdir(exist_ok=True, parents=True)
f = tempfile.NamedTemporaryFile(delete=False, dir=dst_dir)
r = requests.get(download_url, stream=True)
total_length: Optional[str] = r.headers.get('content-length')
assert total_length is not None, f'Content length is not found in the response of {download_url}'
with tqdm.tqdm(total=int(total_length), disable=not progress,
unit='B', unit_scale=True, unit_divisor=1024) as pbar:
for chunk in r.iter_content(8192):
f.write(chunk)
sha256.update(chunk)
pbar.update(len(chunk))
f.flush()
else:
raise FileNotFoundError('Download is not enabled, but file still does not exist: {}'.format(local_path))
digest = sha256.hexdigest()
if not digest.startswith(hash_prefix):
raise RuntimeError('Invalid hash value (expected "{}", got "{}")'.format(hash_prefix, digest))
if f is not None:
shutil.move(f.name, local_path)
finally:
if f is not None:
f.close()
if os.path.exists(f.name):
os.remove(f.name)
def load_benchmark(benchmark: str) -> SqliteExtDatabase: def load_benchmark(benchmark: str) -> SqliteExtDatabase:
""" """
Load a benchmark as a database. Load a benchmark as a database.
......
...@@ -237,7 +237,7 @@ class _SupervisedLearningModule(LightningModule): ...@@ -237,7 +237,7 @@ class _SupervisedLearningModule(LightningModule):
class _AccuracyWithLogits(torchmetrics.Accuracy): class _AccuracyWithLogits(torchmetrics.Accuracy):
def update(self, pred, target): def update(self, pred, target):
return super().update(nn_functional.softmax(pred), target) return super().update(nn_functional.softmax(pred, dim=-1), target)
@nni.trace @nni.trace
......
...@@ -10,20 +10,74 @@ For further motivations and plans, please see https://github.com/microsoft/nni/i ...@@ -10,20 +10,74 @@ For further motivations and plans, please see https://github.com/microsoft/nni/i
1. Runnable 1. Runnable
2. Load checkpoint of searched architecture and evaluate 2. Load checkpoint of searched architecture and evaluate
3. Reproduce searched architecture 3. Reproduce "retrain" (i.e., training from scratch of searched architecture)
4. Runnable with built-in algos 4. Runnable with built-in algos
5. Reproduce result with at least one algo 5. Reproduce result with at least one algo
| | 1 | 2 | 3 | 4 | 5 | | | 1 | 2 | 3 | 4 | 5 |
|------------------------|--------|--------|--------|--------|--------| |------------------------|--------|--------|--------|--------|--------|
| NasBench101 | Y | | | | | | NasBench101 | Y | - | | | |
| NasBench201 | Y | | | | | | NasBench201 | Y | - | | | |
| NASNet | Y | | | | | | NASNet | Y | - | | | |
| ENAS | Y | | | | | | ENAS | Y | - | | | |
| AmoebaNet | Y | | | | | | AmoebaNet | Y | - | | | |
| PNAS | Y | | | | | | PNAS | Y | - | | | |
| DARTS | Y | | | | | | DARTS | Y | Y | | | |
| ProxylessNAS | Y | | | | | | ProxylessNAS | Y | Y | | | |
| MobileNetV3Space | Y | | | | | | MobileNetV3Space | Y | Y | | | |
| ShuffleNetSpace | Y | | | | | | ShuffleNetSpace | Y | Y | | | |
| ShuffleNetSpace (ch) | Y | | | | | | ShuffleNetSpace (ch) | Y | - | | | |
* `-`: Result unavailable, because lacking published checkpoints / architectures.
* NASNet, ENAS, AmoebaNet, PNAS, DARTS are based on the same implementation, with configuration differences.
* NasBench101 and 201 will directly proceed to stage 3 as it's cheaper to train them than to find a checkpoint.
## Space Planned
We welcome suggestions and contributions.
- [AutoFormer](https://openaccess.thecvf.com/content/ICCV2021/html/Chen_AutoFormer_Searching_Transformers_for_Visual_Recognition_ICCV_2021_paper.html), [PR under review](https://github.com/microsoft/nni/pull/4551)
- [NAS-BERT](https://arxiv.org/abs/2105.14444)
- Something speech, like [LightSpeech](https://arxiv.org/abs/2102.04040)
## Searched Model Zoo
Create a searched model with pretrained weights like the following:
```python
model = MobileNetV3Space.load_searched_model('mobilenetv3-small-075', pretrained=True, download=True)
evaluate(model, imagenet_data)
```
``MobileNetV3Space`` can be replaced with any search space listed above, and ``mobilenetv3-small-075`` can be any model listed below.
See an example of ``evaluate`` [here](https://github.com/rwightman/pytorch-image-models/blob/d30685c283137b4b91ea43c4e595c964cd2cb6f0/train.py#L778).
| Search space | Model | Dataset | Metric | Eval Protocol |
|------------------|-----------------------|----------|--------|------------------------------|
| ProxylessNAS | acenas-m1 | ImageNet | 75.176 | Default |
| ProxylessNAS | acenas-m2 | ImageNet | 75.0 | Default |
| ProxylessNAS | acenas-m3 | ImageNet | 75.118 | Default |
| ProxylessNAS | proxyless-cpu | ImageNet | 75.29 | Default |
| ProxylessNAS | proxyless-gpu | ImageNet | 75.084 | Default |
| ProxylessNAS | proxyless-mobile | ImageNet | 74.594 | Default |
| MobileNetV3Space | mobilenetv3-large-100 | ImageNet | 75.768 | Bicubic interpolation |
| MobileNetV3Space | mobilenetv3-small-050 | ImageNet | 57.906 | Bicubic interpolation |
| MobileNetV3Space | mobilenetv3-small-075 | ImageNet | 65.24 | Bicubic interpolation |
| MobileNetV3Space | mobilenetv3-small-100 | ImageNet | 67.652 | Bicubic interpolation |
| MobileNetV3Space | cream-014 | ImageNet | 53.74 | Test image size = 64 |
| MobileNetV3Space | cream-043 | ImageNet | 66.256 | Test image size = 96 |
| MobileNetV3Space | cream-114 | ImageNet | 72.514 | Test image size = 160 |
| MobileNetV3Space | cream-287 | ImageNet | 77.52 | Default |
| MobileNetV3Space | cream-481 | ImageNet | 79.078 | Default |
| MobileNetV3Space | cream-604 | ImageNet | 79.92 | Default |
| DARTS | darts-v2 | CIFAR-10 | 97.37 | Default |
| ShuffleNetSpace | spos | ImageNet | 74.14 | BGR tensor; no normalization |
The metrics listed above are obtained by evaluating the checkpoints provided the original author and converted to NNI NAS format with [these scripts](https://github.com/ultmaster/spacehub-conversion). Do note that some metrics can be higher / lower than the original report, because there could be subtle differences between data preprocessing, operation implementation (e.g., 3rd-party hswish vs ``nn.Hardswish``), or even library versions we are using. But most of these errors are acceptable (~0.1%). We will retrain these architectures in a reproducible and fair training settings, and update these results when the training is ready.
Latency / FLOPs data are missing in the table. Measuring them would be another task.
Several more models to be added:
- FBNet on MobileNetV3Space
# Copyright (c) Microsoft Corporation. # Copyright (c) Microsoft Corporation.
# Licensed under the MIT license. # Licensed under the MIT license.
from typing import Tuple, Optional, Callable, cast from functools import partial
from typing import Tuple, Optional, Callable, Union, List, Type, cast
import torch
import nni.retiarii.nn.pytorch as nn import nni.retiarii.nn.pytorch as nn
from nni.retiarii import model_wrapper from nni.retiarii import model_wrapper
from nni.typehint import Literal
from .proxylessnas import ConvBNReLU, InvertedResidual, SeparableConv, make_divisible, reset_parameters from .proxylessnas import ConvBNReLU, InvertedResidual, DepthwiseSeparableConv, make_divisible, reset_parameters
from .utils.fixed import FixedFactory
from .utils.pretrained import load_pretrained_weight
class h_sigmoid(nn.Module): class SqueezeExcite(nn.Module):
def __init__(self, inplace=True): """Squeeze-and-excite layer.
super(h_sigmoid, self).__init__()
self.relu = nn.ReLU6(inplace=inplace)
def forward(self, x): We can't use the op from ``torchvision.ops`` because it's not (yet) properly wrapped,
return self.relu(x + 3) / 6 and ValueChoice couldn't be processed.
class h_swish(nn.Module):
def __init__(self, inplace=True):
super(h_swish, self).__init__()
self.sigmoid = h_sigmoid(inplace=inplace)
def forward(self, x):
return x * self.sigmoid(x)
Reference:
class SELayer(nn.Module): - https://github.com/rwightman/pytorch-image-models/blob/b7cb8d03/timm/models/efficientnet_blocks.py#L26
"""Squeeze-and-excite layer.""" - https://github.com/d-li14/mobilenetv3.pytorch/blob/3e6938cedcbbc5ee5bc50780ea18e644702d85fc/mobilenetv3.py#L53
"""
def __init__(self, def __init__(self,
channels: int, channels: int,
reduction: int = 4, reduction_ratio: float = 0.25,
gate_layer: Optional[Callable[..., nn.Module]] = None,
activation_layer: Optional[Callable[..., nn.Module]] = None): activation_layer: Optional[Callable[..., nn.Module]] = None):
super().__init__() super().__init__()
if activation_layer is None:
activation_layer = nn.Sigmoid rd_channels = make_divisible(channels * reduction_ratio, 8)
self.avg_pool = nn.AdaptiveAvgPool2d(1) gate_layer = gate_layer or nn.Hardsigmoid
self.fc = nn.Sequential( activation_layer = activation_layer or nn.ReLU
nn.Linear(channels, make_divisible(channels // reduction, 8)), self.conv_reduce = nn.Conv2d(channels, rd_channels, 1, bias=True)
nn.ReLU(inplace=True), self.act1 = activation_layer(inplace=True)
nn.Linear(make_divisible(channels // reduction, 8), channels), self.conv_expand = nn.Conv2d(rd_channels, channels, 1, bias=True)
activation_layer() self.gate = gate_layer()
)
def forward(self, x): def forward(self, x):
b, c, _, _ = x.size() x_se = x.mean((2, 3), keepdim=True)
y = self.avg_pool(x).view(b, c) x_se = self.conv_reduce(x_se)
y = self.fc(y).view(b, c, 1, 1) x_se = self.act1(x_se)
return x * y x_se = self.conv_expand(x_se)
return x * self.gate(x_se)
def _se_or_skip(hidden_ch: int, input_ch: int, optional: bool, se_from_exp: bool, label: str) -> nn.Module:
ch = hidden_ch if se_from_exp else input_ch
if optional:
return nn.LayerChoice({
'identity': nn.Identity(),
'se': SqueezeExcite(ch)
}, label=label)
else:
return SqueezeExcite(ch)
def _act_fn(act_alias: Literal['hswish', 'swish', 'relu']) -> Type[nn.Module]:
if act_alias == 'hswish':
return nn.Hardswish
elif act_alias == 'swish':
return nn.SiLU
elif act_alias == 'relu':
return nn.ReLU
else:
raise ValueError(f'Unsupported act alias: {act_alias}')
@model_wrapper @model_wrapper
...@@ -64,92 +83,582 @@ class MobileNetV3Space(nn.Module): ...@@ -64,92 +83,582 @@ class MobileNetV3Space(nn.Module):
We use the following snipppet as reference. We use the following snipppet as reference.
https://github.com/google-research/google-research/blob/20736344591f774f4b1570af64624ed1e18d2867/tunas/mobile_search_space_v3.py#L728 https://github.com/google-research/google-research/blob/20736344591f774f4b1570af64624ed1e18d2867/tunas/mobile_search_space_v3.py#L728
We have ``num_blocks`` which equals to the length of ``self.blocks`` (the main body of the network).
For simplicity, the following parameter specification assumes ``num_blocks`` equals 8 (body + head).
If a shallower body is intended, arrays including ``base_widths``, ``squeeze_excite``, ``depth_range``,
``stride``, ``activation`` should also be shortened accordingly.
Parameters
----------
num_labels
Dimensions for classification head.
base_widths
Widths of each stage, from stem, to body, to head.
Length should be 9, i.e., ``num_blocks + 1`` (because there is a stem width in front).
width_multipliers
A range of widths multiplier to choose from. The choice is independent for each stage.
Or it can be a fixed float. This will be applied on ``base_widths``,
and we would also make sure that widths can be divided by 8.
expand_ratios
A list of expand ratios to choose from. Independent for every **block**.
squeeze_excite
Indicating whether the current stage can have an optional SE layer.
Expect array of length 6 for stage 0 to 5. Each element can be one of ``force``, ``optional``, ``none``.
depth_range
A range (e.g., ``(1, 4)``),
or a list of range (e.g., ``[(1, 3), (1, 4), (1, 4), (1, 3), (0, 2)]``).
If a list, the length should be 5. The depth are specified for stage 1 to 5.
stride
Stride for all stages (including stem and head). Length should be same as ``base_widths``.
activation
Activation (class) for all stages. Length is same as ``base_widths``.
se_from_exp
Calculate SE channel reduction from expanded (mid) channels.
dropout_rate
Dropout rate at classification head.
bn_eps
Epsilon of batch normalization.
bn_momentum
Momentum of batch normalization.
""" """
def __init__(self, num_labels: int = 1000, widths: List[Union[nn.ChoiceOf[int], int]]
base_widths: Tuple[int, ...] = (16, 16, 32, 64, 128, 256, 512, 1024), depth_range: List[Tuple[int, int]]
width_multipliers: Tuple[float, ...] = (0.5, 0.625, 0.75, 1.0, 1.25, 1.5, 2.0),
expand_ratios: Tuple[int, ...] = (1, 2, 3, 4, 5, 6), def __init__(
dropout_rate: float = 0.2, self, num_labels: int = 1000,
bn_eps: float = 1e-3, base_widths: Tuple[int, ...] = (16, 16, 16, 32, 64, 128, 256, 512, 1024),
bn_momentum: float = 0.1): width_multipliers: Union[Tuple[float, ...], float] = (0.5, 0.625, 0.75, 1.0, 1.25, 1.5, 2.0),
expand_ratios: Tuple[float, ...] = (1., 2., 3., 4., 5., 6.),
squeeze_excite: Tuple[Literal['force', 'optional', 'none'], ...] = (
'none', 'none', 'optional', 'none', 'optional', 'optional'
),
depth_range: Union[List[Tuple[int, int]], Tuple[int, int]] = (1, 4),
stride: Tuple[int, ...] = (2, 1, 2, 2, 2, 1, 2, 1, 1),
activation: Tuple[Literal['hswish', 'swish', 'relu'], ...] = (
'hswish', 'relu', 'relu', 'relu', 'hswish', 'hswish', 'hswish', 'hswish', 'hswish'
),
se_from_exp: bool = True,
dropout_rate: float = 0.2,
bn_eps: float = 1e-3,
bn_momentum: float = 0.1
):
super().__init__() super().__init__()
self.widths = cast(nn.ChoiceOf[int], [ self.num_blocks = len(base_widths) - 1 # without stem, equal to len(self.blocks)
nn.ValueChoice([make_divisible(base_width * mult, 8) for mult in width_multipliers], label=f'width_{i}') assert self.num_blocks >= 4
for i, base_width in enumerate(base_widths)
]) assert len(base_widths) == len(stride) == len(activation) == self.num_blocks + 1
# The final two blocks can't have SE
assert len(squeeze_excite) == self.num_blocks - 2 and all(se in ['force', 'optional', 'none'] for se in squeeze_excite)
# The first and final two blocks can't have variational depth
if isinstance(depth_range[0], int):
depth_range = cast(Tuple[int, int], depth_range)
assert len(depth_range) == 2 and depth_range[1] >= depth_range[0] >= 1
self.depth_range = [depth_range] * (self.num_blocks - 3)
else:
assert len(depth_range) == self.num_blocks - 3
self.depth_range = cast(List[Tuple[int, int]], depth_range)
for d in self.depth_range:
d = cast(Tuple[int, int], d)
# pylint: disable=unsubscriptable-object
assert len(d) == 2 and d[1] >= d[0] >= 1, f'{d} does not satisfy depth constraints'
self.widths = []
for i, base_width in enumerate(base_widths):
if isinstance(width_multipliers, float):
self.widths.append(make_divisible(base_width * width_multipliers, 8))
else:
self.widths.append(
# According to tunas, stem and stage 0 share one width multiplier
# https://github.com/google-research/google-research/blob/20736344/tunas/mobile_search_space_v3.py#L791
make_divisible(
nn.ValueChoice(list(width_multipliers), label=f's{max(i - 1, 0)}_width_mult') * base_width, 8
)
)
self.expand_ratios = expand_ratios self.expand_ratios = expand_ratios
self.se_from_exp = se_from_exp
blocks = [ # NOTE: The built-in hardswish produces slightly different output from 3rd-party implementation
# Stem # But I guess it doesn't really matter.
ConvBNReLU( # https://github.com/rwightman/pytorch-image-models/blob/b7cb8d03/timm/models/layers/activations.py#L79
3, self.widths[0],
nn.ValueChoice([3, 5], label='ks_0'), self.stem = ConvBNReLU(
stride=2, activation_layer=h_swish 3, self.widths[0],
nn.ValueChoice([3, 5], label=f'stem_ks'),
stride=stride[0], activation_layer=_act_fn(activation[0])
)
blocks: List[nn.Module] = [
# Stage 0
# FIXME: this should be an optional layer.
# https://github.com/google-research/google-research/blob/20736344/tunas/mobile_search_space_v3.py#L791
DepthwiseSeparableConv(
self.widths[0], self.widths[1],
nn.ValueChoice([3, 5, 7], label=f's0_i0_ks'),
stride=stride[1],
squeeze_excite=cast(Callable[[nn.MaybeChoice[int], nn.MaybeChoice[int]], nn.Module], partial(
_se_or_skip, optional=squeeze_excite[0] == 'optional', se_from_exp=self.se_from_exp, label=f's0_i0_se'
)) if squeeze_excite[0] != 'none' else None,
activation_layer=_act_fn(activation[1])
), ),
SeparableConv(self.widths[0], self.widths[0], activation_layer=nn.ReLU),
] ]
# counting for kernel sizes and expand ratios
self.layer_count = 2
blocks += [ blocks += [
# Body # Stage 1-5 (by default)
self._make_stage(1, self.widths[0], self.widths[1], False, 2, nn.ReLU), self._make_stage(i, self.widths[i], self.widths[i + 1], squeeze_excite[i], stride[i + 1], _act_fn(activation[i + 1]))
self._make_stage(2, self.widths[1], self.widths[2], True, 2, nn.ReLU), for i in range(1, self.num_blocks - 2)
self._make_stage(1, self.widths[2], self.widths[3], False, 2, h_swish),
self._make_stage(1, self.widths[3], self.widths[4], True, 1, h_swish),
self._make_stage(1, self.widths[4], self.widths[5], True, 2, h_swish),
] ]
# Head # Head
blocks += [ blocks += [
ConvBNReLU(self.widths[5], self.widths[6], 1, 1, activation_layer=h_swish), ConvBNReLU(
self.widths[self.num_blocks - 2],
self.widths[self.num_blocks - 1],
kernel_size=1,
stride=stride[self.num_blocks - 1],
activation_layer=_act_fn(activation[self.num_blocks - 1])
),
nn.AdaptiveAvgPool2d(1), nn.AdaptiveAvgPool2d(1),
ConvBNReLU(self.widths[6], self.widths[7], 1, 1, norm_layer=nn.Identity, activation_layer=h_swish), # In some implementation, this is a linear instead.
# Should be equivalent.
ConvBNReLU(
self.widths[self.num_blocks - 1],
self.widths[self.num_blocks],
kernel_size=1,
stride=stride[self.num_blocks],
norm_layer=nn.Identity,
activation_layer=_act_fn(activation[self.num_blocks])
)
] ]
self.blocks = nn.Sequential(*blocks) self.blocks = nn.Sequential(*blocks)
self.classifier = nn.Sequential( self.classifier = nn.Sequential(
nn.Dropout(dropout_rate), nn.Dropout(dropout_rate),
nn.Linear(cast(int, self.widths[7]), num_labels), nn.Linear(cast(int, self.widths[self.num_blocks]), num_labels),
) )
reset_parameters(self, bn_momentum=bn_momentum, bn_eps=bn_eps) reset_parameters(self, bn_momentum=bn_momentum, bn_eps=bn_eps)
def forward(self, x): def forward(self, x):
x = self.stem(x)
x = self.blocks(x) x = self.blocks(x)
x = x.view(x.size(0), -1) x = x.view(x.size(0), -1)
x = self.classifier(x) x = self.classifier(x)
return x return x
def _make_stage(self, stage_idx, inp, oup, se, stride, act): def _make_stage(self, stage_idx, inp, oup, se, stride, act):
# initialize them first because they are related to layer_count. def layer_builder(idx):
exp, ks, se_blocks = [], [], [] exp = nn.ValueChoice(list(self.expand_ratios), label=f's{stage_idx}_i{idx}_exp')
for _ in range(4): ks = nn.ValueChoice([3, 5, 7], label=f's{stage_idx}_i{idx}_ks')
exp.append(nn.ValueChoice(list(self.expand_ratios), label=f'exp_{self.layer_count}')) # if SE is true, assign a layer choice to SE
ks.append(nn.ValueChoice([3, 5, 7], label=f'ks_{self.layer_count}')) se_or_skip = cast(Callable[[nn.MaybeChoice[int], nn.MaybeChoice[int]], nn.Module], partial(
if se: _se_or_skip, optional=se == 'optional', se_from_exp=self.se_from_exp, label=f's{stage_idx}_i{idx}_se'
# if SE is true, assign a layer choice to SE )) if se != 'none' else None
se_blocks.append( return InvertedResidual(
lambda hidden_ch: nn.LayerChoice([nn.Identity(), SELayer(hidden_ch)], label=f'se_{self.layer_count}') inp if idx == 0 else oup,
) oup, exp, ks,
else: stride=stride if idx == 0 else 1, # only the first layer in each stage can have stride > 1
se_blocks.append(None) squeeze_excite=se_or_skip,
self.layer_count += 1 activation_layer=act,
)
blocks = [
# stride = 2
InvertedResidual(inp, oup, exp[0], ks[0],
stride, squeeze_and_excite=se_blocks[0], activation_layer=act),
# stride = 1, residual connection should be automatically enabled
InvertedResidual(oup, oup, exp[1], ks[1], squeeze_and_excite=se_blocks[1], activation_layer=act),
InvertedResidual(oup, oup, exp[2], ks[2], squeeze_and_excite=se_blocks[2], activation_layer=act),
InvertedResidual(oup, oup, exp[3], ks[3], squeeze_and_excite=se_blocks[3], activation_layer=act)
]
# mutable depth # mutable depth
return nn.Repeat(blocks, depth=(1, 4), label=f'depth_{stage_idx}') min_depth, max_depth = self.depth_range[stage_idx - 1]
if stride != 1:
min_depth = max(min_depth, 1)
return nn.Repeat(layer_builder, depth=(min_depth, max_depth), label=f's{stage_idx}_depth')
@classmethod
def fixed_arch(cls, arch: dict) -> FixedFactory:
return FixedFactory(cls, arch)
@classmethod
def load_searched_model(
cls, name: str,
pretrained: bool = False, download: bool = False, progress: bool = True
) -> nn.Module:
init_kwargs = {} # all default
if name == 'mobilenetv3-large-100':
# NOTE: Use bicsubic interpolation to evaluate this
# With default interpolation, it yields top-1 75.722
arch = {
'stem_ks': 3,
's0_i0_ks': 3,
's1_depth': 2,
's1_i0_exp': 4,
's1_i0_ks': 3,
's1_i1_exp': 3,
's1_i1_ks': 3,
's2_depth': 3,
's2_i0_exp': 3,
's2_i0_ks': 5,
's2_i1_exp': 3,
's2_i1_ks': 5,
's2_i2_exp': 3,
's2_i2_ks': 5,
's3_depth': 4,
's3_i0_exp': 6,
's3_i0_ks': 3,
's3_i1_exp': 2.5,
's3_i1_ks': 3,
's3_i2_exp': 2.3,
's3_i2_ks': 3,
's3_i3_exp': 2.3,
's3_i3_ks': 3,
's4_depth': 2,
's4_i0_exp': 6,
's4_i0_ks': 3,
's4_i1_exp': 6,
's4_i1_ks': 3,
's5_depth': 3,
's5_i0_exp': 6,
's5_i0_ks': 5,
's5_i1_exp': 6,
's5_i1_ks': 5,
's5_i2_exp': 6,
's5_i2_ks': 5,
}
init_kwargs.update(
base_widths=[16, 16, 24, 40, 80, 112, 160, 960, 1280],
expand_ratios=[1.0, 2.0, 2.3, 2.5, 3.0, 4.0, 6.0],
bn_eps=1e-5,
bn_momentum=0.1,
width_multipliers=1.0,
squeeze_excite=['none', 'none', 'force', 'none', 'force', 'force']
)
elif name.startswith('mobilenetv3-small-'):
# Evaluate with bicubic interpolation
multiplier = int(name.split('-')[-1]) / 100
widths = [16, 16, 24, 40, 48, 96, 576, 1024]
for i in range(7):
if i > 0 or multiplier >= 0.75:
# fix_stem = True when multiplier < 0.75
# https://github.com/rwightman/pytorch-image-models/blob/b7cb8d03/timm/models/mobilenetv3.py#L421
widths[i] = make_divisible(widths[i] * multiplier, 8)
init_kwargs.update(
base_widths=widths,
width_multipliers=1.0,
expand_ratios=[3.0, 3.67, 4.0, 4.5, 6.0],
bn_eps=1e-05,
bn_momentum=0.1,
squeeze_excite=['force', 'none', 'force', 'force', 'force'],
activation=['hswish', 'relu', 'relu', 'hswish', 'hswish', 'hswish', 'hswish', 'hswish'],
stride=[2, 2, 2, 2, 1, 2, 1, 1],
depth_range=(1, 2),
)
arch = {
'stem_ks': 3,
's0_i0_ks': 3,
's1_depth': 2,
's1_i0_exp': 4.5,
's1_i0_ks': 3,
's1_i1_exp': 3.67,
's1_i1_ks': 3,
's2_depth': 3,
's2_i0_exp': 4.0,
's2_i0_ks': 5,
's2_i1_exp': 6.0,
's2_i1_ks': 5,
's2_i2_exp': 6.0,
's2_i2_ks': 5,
's3_depth': 2,
's3_i0_exp': 3.0,
's3_i0_ks': 5,
's3_i1_exp': 3.0,
's3_i1_ks': 5,
's4_depth': 3,
's4_i0_exp': 6.0,
's4_i0_ks': 5,
's4_i1_exp': 6.0,
's4_i1_ks': 5,
's4_i2_exp': 6.0,
's4_i2_ks': 5
}
elif name.startswith('cream'):
# https://github.com/microsoft/Cream/tree/main/Cream
# bilinear interpolation
level = name.split('-')[-1]
# region cream arch specification
if level == '014':
arch = {
'stem_ks': 3,
's0_depth': 1,
's0_i0_ks': 3,
's1_depth': 1,
's1_i0_exp': 4.0,
's1_i0_ks': 3,
's2_depth': 2,
's2_i0_exp': 6.0,
's2_i0_ks': 5,
's2_i1_exp': 6.0,
's2_i1_ks': 5,
's3_depth': 2,
's3_i0_exp': 6.0,
's3_i0_ks': 5,
's3_i1_exp': 6.0,
's3_i1_ks': 5,
's4_depth': 1,
's4_i0_exp': 6.0,
's4_i0_ks': 3,
's5_depth': 1,
's5_i0_exp': 6.0,
's5_i0_ks': 5
}
elif level == '043':
arch = {
'stem_ks': 3,
's0_depth': 1,
's0_i0_ks': 3,
's1_depth': 1,
's1_i0_exp': 4.0,
's1_i0_ks': 3,
's2_depth': 2,
's2_i0_exp': 6.0,
's2_i0_ks': 5,
's2_i1_exp': 6.0,
's2_i1_ks': 3,
's3_depth': 2,
's3_i0_exp': 6.0,
's3_i0_ks': 5,
's3_i1_exp': 6.0,
's3_i1_ks': 3,
's4_depth': 3,
's4_i0_exp': 6.0,
's4_i0_ks': 5,
's4_i1_exp': 6.0,
's4_i1_ks': 5,
's4_i2_exp': 6.0,
's4_i2_ks': 5,
's5_depth': 2,
's5_i0_exp': 6.0,
's5_i0_ks': 5,
's5_i1_exp': 6.0,
's5_i1_ks': 5
}
elif level == '114':
arch = {
'stem_ks': 3,
's0_depth': 1,
's0_i0_ks': 3,
's1_depth': 1,
's1_i0_exp': 4.0,
's1_i0_ks': 3,
's2_depth': 2,
's2_i0_exp': 6.0,
's2_i0_ks': 5,
's2_i1_exp': 6.0,
's2_i1_ks': 5,
's3_depth': 2,
's3_i0_exp': 6.0,
's3_i0_ks': 5,
's3_i1_exp': 6.0,
's3_i1_ks': 5,
's4_depth': 3,
's4_i0_exp': 6.0,
's4_i0_ks': 5,
's4_i1_exp': 6.0,
's4_i1_ks': 5,
's4_i2_exp': 6.0,
's4_i2_ks': 5,
's5_depth': 2,
's5_i0_exp': 6.0,
's5_i0_ks': 5,
's5_i1_exp': 6.0,
's5_i1_ks': 5
}
elif level == '287':
arch = {
'stem_ks': 3,
's0_depth': 1,
's0_i0_ks': 3,
's1_depth': 1,
's1_i0_exp': 4.0,
's1_i0_ks': 3,
's2_depth': 2,
's2_i0_exp': 6.0,
's2_i0_ks': 5,
's2_i1_exp': 6.0,
's2_i1_ks': 5,
's3_depth': 3,
's3_i0_exp': 6.0,
's3_i0_ks': 5,
's3_i1_exp': 6.0,
's3_i1_ks': 3,
's3_i2_exp': 6.0,
's3_i2_ks': 5,
's4_depth': 4,
's4_i0_exp': 6.0,
's4_i0_ks': 5,
's4_i1_exp': 6.0,
's4_i1_ks': 5,
's4_i2_exp': 6.0,
's4_i2_ks': 5,
's4_i3_exp': 6.0,
's4_i3_ks': 5,
's5_depth': 3,
's5_i0_exp': 6.0,
's5_i0_ks': 5,
's5_i1_exp': 6.0,
's5_i1_ks': 5,
's5_i2_exp': 6.0,
's5_i2_ks': 5
}
elif level == '481':
arch = {
'stem_ks': 3,
's0_depth': 1,
's0_i0_ks': 3,
's1_depth': 4,
's1_i0_exp': 6.0,
's1_i0_ks': 5,
's1_i1_exp': 4.0,
's1_i1_ks': 7,
's1_i2_exp': 6.0,
's1_i2_ks': 5,
's1_i3_exp': 6.0,
's1_i3_ks': 3,
's2_depth': 4,
's2_i0_exp': 6.0,
's2_i0_ks': 5,
's2_i1_exp': 4.0,
's2_i1_ks': 5,
's2_i2_exp': 6.0,
's2_i2_ks': 5,
's2_i3_exp': 4.0,
's2_i3_ks': 3,
's3_depth': 5,
's3_i0_exp': 6.0,
's3_i0_ks': 5,
's3_i1_exp': 6.0,
's3_i1_ks': 5,
's3_i2_exp': 6.0,
's3_i2_ks': 5,
's3_i3_exp': 6.0,
's3_i3_ks': 3,
's3_i4_exp': 6.0,
's3_i4_ks': 3,
's4_depth': 4,
's4_i0_exp': 6.0,
's4_i0_ks': 5,
's4_i1_exp': 6.0,
's4_i1_ks': 5,
's4_i2_exp': 6.0,
's4_i2_ks': 5,
's4_i3_exp': 6.0,
's4_i3_ks': 5,
's5_depth': 4,
's5_i0_exp': 6.0,
's5_i0_ks': 5,
's5_i1_exp': 6.0,
's5_i1_ks': 5,
's5_i2_exp': 6.0,
's5_i2_ks': 5,
's5_i3_exp': 6.0,
's5_i3_ks': 5
}
elif level == '604':
arch = {
'stem_ks': 3,
's0_depth': 1,
's0_i0_ks': 3,
's1_depth': 5,
's1_i0_exp': 6.0,
's1_i0_ks': 5,
's1_i1_exp': 6.0,
's1_i1_ks': 5,
's1_i2_exp': 4.0,
's1_i2_ks': 5,
's1_i3_exp': 6.0,
's1_i3_ks': 5,
's1_i4_exp': 6.0,
's1_i4_ks': 5,
's2_depth': 5,
's2_i0_exp': 6.0,
's2_i0_ks': 5,
's2_i1_exp': 4.0,
's2_i1_ks': 5,
's2_i2_exp': 6.0,
's2_i2_ks': 5,
's2_i3_exp': 4.0,
's2_i3_ks': 5,
's2_i4_exp': 6.0,
's2_i4_ks': 5,
's3_depth': 5,
's3_i0_exp': 6.0,
's3_i0_ks': 5,
's3_i1_exp': 4.0,
's3_i1_ks': 5,
's3_i2_exp': 6.0,
's3_i2_ks': 5,
's3_i3_exp': 4.0,
's3_i3_ks': 5,
's3_i4_exp': 6.0,
's3_i4_ks': 5,
's4_depth': 6,
's4_i0_exp': 6.0,
's4_i0_ks': 5,
's4_i1_exp': 6.0,
's4_i1_ks': 5,
's4_i2_exp': 4.0,
's4_i2_ks': 5,
's4_i3_exp': 4.0,
's4_i3_ks': 5,
's4_i4_exp': 6.0,
's4_i4_ks': 5,
's4_i5_exp': 6.0,
's4_i5_ks': 5,
's5_depth': 6,
's5_i0_exp': 6.0,
's5_i0_ks': 5,
's5_i1_exp': 6.0,
's5_i1_ks': 5,
's5_i2_exp': 4.0,
's5_i2_ks': 5,
's5_i3_exp': 6.0,
's5_i3_ks': 5,
's5_i4_exp': 6.0,
's5_i4_ks': 5,
's5_i5_exp': 6.0,
's5_i5_ks': 5
}
else:
raise ValueError(f'Unsupported cream model level: {level}')
# endregion
init_kwargs.update(
base_widths=[16, 16, 24, 40, 80, 96, 192, 320, 1280],
width_multipliers=1.0,
expand_ratios=[4.0, 6.0],
bn_eps=1e-5,
bn_momentum=0.1,
squeeze_excite=['force'] * 6,
activation=['swish'] * 9
)
else:
raise ValueError(f'Unsupported architecture with name: {name}')
model_factory = cls.fixed_arch(arch)
model = model_factory(**init_kwargs)
if pretrained:
weight_file = load_pretrained_weight(name, download=download, progress=progress)
pretrained_weights = torch.load(weight_file)
model.load_state_dict(pretrained_weights)
return model
...@@ -8,6 +8,7 @@ It's called ``nasnet.py`` simply because NASNet is the first to propose such str ...@@ -8,6 +8,7 @@ It's called ``nasnet.py`` simply because NASNet is the first to propose such str
""" """
from collections import OrderedDict from collections import OrderedDict
from functools import partial
from typing import Tuple, List, Union, Iterable, Dict, Callable, Optional, cast from typing import Tuple, List, Union, Iterable, Dict, Callable, Optional, cast
try: try:
...@@ -20,6 +21,9 @@ import torch ...@@ -20,6 +21,9 @@ import torch
import nni.retiarii.nn.pytorch as nn import nni.retiarii.nn.pytorch as nn
from nni.retiarii import model_wrapper from nni.retiarii import model_wrapper
from .utils.fixed import FixedFactory
from .utils.pretrained import load_pretrained_weight
# the following are NAS operations from # the following are NAS operations from
# https://github.com/facebookresearch/unnas/blob/main/pycls/models/nas/operations.py # https://github.com/facebookresearch/unnas/blob/main/pycls/models/nas/operations.py
...@@ -300,15 +304,26 @@ class CellBuilder: ...@@ -300,15 +304,26 @@ class CellBuilder:
self.last_cell_reduce = last_cell_reduce self.last_cell_reduce = last_cell_reduce
self._expect_idx = 0 self._expect_idx = 0
def __call__(self, repeat_idx: int):
if self._expect_idx != repeat_idx:
raise ValueError(f'Expect index {self._expect_idx}, found {repeat_idx}')
# It takes an index that is the index in the repeat. # It takes an index that is the index in the repeat.
# Number of predecessors for each cell is fixed to 2. # Number of predecessors for each cell is fixed to 2.
num_predecessors = 2 self.num_predecessors = 2
# Number of ops per node is fixed to 2. # Number of ops per node is fixed to 2.
num_ops_per_node = 2 self.num_ops_per_node = 2
def op_factory(self, node_index: int, op_index: int, input_index: Optional[int], *,
op: str, channels: int, is_reduction_cell: bool):
if is_reduction_cell and (
input_index is None or input_index < self.num_predecessors
): # could be none when constructing search sapce
stride = 2
else:
stride = 1
return OPS[op](channels, stride, True)
def __call__(self, repeat_idx: int):
if self._expect_idx != repeat_idx:
raise ValueError(f'Expect index {self._expect_idx}, found {repeat_idx}')
# Reduction cell means stride = 2 and channel multiplied by 2. # Reduction cell means stride = 2 and channel multiplied by 2.
is_reduction_cell = repeat_idx == 0 and self.first_cell_reduce is_reduction_cell = repeat_idx == 0 and self.first_cell_reduce
...@@ -316,16 +331,11 @@ class CellBuilder: ...@@ -316,16 +331,11 @@ class CellBuilder:
# self.C_prev_in, self.C_in, self.last_cell_reduce are updated after each cell is built. # self.C_prev_in, self.C_in, self.last_cell_reduce are updated after each cell is built.
preprocessor = CellPreprocessor(self.C_prev_in, self.C_in, self.C, self.last_cell_reduce) preprocessor = CellPreprocessor(self.C_prev_in, self.C_in, self.C, self.last_cell_reduce)
ops_factory: Dict[str, Callable[[int, int, Optional[int]], nn.Module]] = { ops_factory: Dict[str, Callable[[int, int, Optional[int]], nn.Module]] = {}
op: # make final chosen ops named with their aliases for op in self.op_candidates:
lambda node_index, op_index, input_index: ops_factory[op] = partial(self.op_factory, op=op, channels=cast(int, self.C), is_reduction_cell=is_reduction_cell)
OPS[op](self.C, 2 if is_reduction_cell and (
input_index is None or input_index < num_predecessors # could be none when constructing search sapce
) else 1, True)
for op in self.op_candidates
}
cell = nn.Cell(ops_factory, self.num_nodes, num_ops_per_node, num_predecessors, self.merge_op, cell = nn.Cell(ops_factory, self.num_nodes, self.num_ops_per_node, self.num_predecessors, self.merge_op,
preprocessor=preprocessor, postprocessor=CellPostprocessor(), preprocessor=preprocessor, postprocessor=CellPostprocessor(),
label='reduce' if is_reduction_cell else 'normal') label='reduce' if is_reduction_cell else 'normal')
...@@ -401,7 +411,7 @@ class NDS(nn.Module): ...@@ -401,7 +411,7 @@ class NDS(nn.Module):
self.num_cells: nn.MaybeChoice[int] = cast(int, num_cells) self.num_cells: nn.MaybeChoice[int] = cast(int, num_cells)
if isinstance(num_cells, Iterable): if isinstance(num_cells, Iterable):
self.num_cells = nn.ValueChoice(list(num_cells), label='depth') self.num_cells = nn.ValueChoice(list(num_cells), label='depth')
num_cells_per_stage = [i * self.num_cells // 3 - (i - 1) * self.num_cells // 3 for i in range(3)] num_cells_per_stage = [(i + 1) * self.num_cells // 3 - i * self.num_cells // 3 for i in range(3)]
# auxiliary head is different for network targetted at different datasets # auxiliary head is different for network targetted at different datasets
if dataset == 'imagenet': if dataset == 'imagenet':
...@@ -501,6 +511,10 @@ class NDS(nn.Module): ...@@ -501,6 +511,10 @@ class NDS(nn.Module):
if isinstance(module, DropPath_): if isinstance(module, DropPath_):
module.drop_prob = drop_prob module.drop_prob = drop_prob
@classmethod
def fixed_arch(cls, arch: dict) -> FixedFactory:
return FixedFactory(cls, arch)
@model_wrapper @model_wrapper
class NASNet(NDS): class NASNet(NDS):
...@@ -676,3 +690,64 @@ class DARTS(NDS): ...@@ -676,3 +690,64 @@ class DARTS(NDS):
num_cells=num_cells, num_cells=num_cells,
dataset=dataset, dataset=dataset,
auxiliary_loss=auxiliary_loss) auxiliary_loss=auxiliary_loss)
@classmethod
def load_searched_model(
cls, name: str,
pretrained: bool = False, download: bool = False, progress: bool = True
) -> nn.Module:
init_kwargs = {} # all default
if name == 'darts-v2':
init_kwargs.update(
num_cells=20,
width=36,
)
arch = {
'normal/op_2_0': 'sep_conv_3x3',
'normal/op_2_1': 'sep_conv_3x3',
'normal/input_2_0': 0,
'normal/input_2_1': 1,
'normal/op_3_0': 'sep_conv_3x3',
'normal/op_3_1': 'sep_conv_3x3',
'normal/input_3_0': 0,
'normal/input_3_1': 1,
'normal/op_4_0': 'sep_conv_3x3',
'normal/op_4_1': 'skip_connect',
'normal/input_4_0': 1,
'normal/input_4_1': 0,
'normal/op_5_0': 'skip_connect',
'normal/op_5_1': 'dil_conv_3x3',
'normal/input_5_0': 0,
'normal/input_5_1': 2,
'reduce/op_2_0': 'max_pool_3x3',
'reduce/op_2_1': 'max_pool_3x3',
'reduce/input_2_0': 0,
'reduce/input_2_1': 1,
'reduce/op_3_0': 'skip_connect',
'reduce/op_3_1': 'max_pool_3x3',
'reduce/input_3_0': 2,
'reduce/input_3_1': 1,
'reduce/op_4_0': 'max_pool_3x3',
'reduce/op_4_1': 'skip_connect',
'reduce/input_4_0': 0,
'reduce/input_4_1': 2,
'reduce/op_5_0': 'skip_connect',
'reduce/op_5_1': 'max_pool_3x3',
'reduce/input_5_0': 2,
'reduce/input_5_1': 1
}
else:
raise ValueError(f'Unsupported architecture with name: {name}')
model_factory = cls.fixed_arch(arch)
model = model_factory(**init_kwargs)
if pretrained:
weight_file = load_pretrained_weight(name, download=download, progress=progress)
pretrained_weights = torch.load(weight_file)
model.load_state_dict(pretrained_weights)
return model
...@@ -2,12 +2,15 @@ ...@@ -2,12 +2,15 @@
# Licensed under the MIT license. # Licensed under the MIT license.
import math import math
from typing import Optional, Callable, List, Tuple, cast from typing import Optional, Callable, List, Tuple, Iterator, cast
import torch import torch
import nni.retiarii.nn.pytorch as nn import nni.retiarii.nn.pytorch as nn
from nni.retiarii import model_wrapper from nni.retiarii import model_wrapper
from .utils.fixed import FixedFactory
from .utils.pretrained import load_pretrained_weight
def make_divisible(v, divisor, min_val=None): def make_divisible(v, divisor, min_val=None):
""" """
...@@ -24,6 +27,22 @@ def make_divisible(v, divisor, min_val=None): ...@@ -24,6 +27,22 @@ def make_divisible(v, divisor, min_val=None):
return nn.ValueChoice.condition(new_v < 0.9 * v, new_v + divisor, new_v) return nn.ValueChoice.condition(new_v < 0.9 * v, new_v + divisor, new_v)
def simplify_sequential(sequentials: List[nn.Module]) -> Iterator[nn.Module]:
"""
Flatten the sequential blocks so that the hierarchy looks better.
Eliminate identity modules automatically.
"""
for module in sequentials:
if isinstance(module, nn.Sequential):
for submodule in module.children():
# no recursive expansion
if not isinstance(submodule, nn.Identity):
yield submodule
else:
if not isinstance(module, nn.Identity):
yield module
class ConvBNReLU(nn.Sequential): class ConvBNReLU(nn.Sequential):
""" """
The template for a conv-bn-relu block. The template for a conv-bn-relu block.
...@@ -45,7 +64,11 @@ class ConvBNReLU(nn.Sequential): ...@@ -45,7 +64,11 @@ class ConvBNReLU(nn.Sequential):
norm_layer = nn.BatchNorm2d norm_layer = nn.BatchNorm2d
if activation_layer is None: if activation_layer is None:
activation_layer = nn.ReLU6 activation_layer = nn.ReLU6
super().__init__( # If no normalization is used, set bias to True
# https://github.com/google-research/google-research/blob/20736344/tunas/rematlib/mobile_model_v3.py#L194
norm = norm_layer(cast(int, out_channels))
no_normalization = isinstance(norm, nn.Identity)
blocks: List[nn.Module] = [
nn.Conv2d( nn.Conv2d(
cast(int, in_channels), cast(int, in_channels),
cast(int, out_channels), cast(int, out_channels),
...@@ -54,18 +77,30 @@ class ConvBNReLU(nn.Sequential): ...@@ -54,18 +77,30 @@ class ConvBNReLU(nn.Sequential):
cast(int, padding), cast(int, padding),
dilation=dilation, dilation=dilation,
groups=cast(int, groups), groups=cast(int, groups),
bias=False bias=no_normalization
), ),
norm_layer(cast(int, out_channels)), # Normalization, regardless of batchnorm or identity
norm,
# One pytorch implementation as an SE here, to faithfully reproduce paper
# We follow a more accepted approach to put SE outside
# Reference: https://github.com/d-li14/mobilenetv3.pytorch/issues/18
activation_layer(inplace=True) activation_layer(inplace=True)
) ]
super().__init__(*simplify_sequential(blocks))
self.out_channels = out_channels self.out_channels = out_channels
class SeparableConv(nn.Sequential): class DepthwiseSeparableConv(nn.Sequential):
""" """
In the original MobileNetV2 implementation, this is InvertedResidual when expand ratio = 1. In the original MobileNetV2 implementation, this is InvertedResidual when expand ratio = 1.
Residual connection is added if input and output shape are the same. Residual connection is added if input and output shape are the same.
References:
- https://github.com/rwightman/pytorch-image-models/blob/b7cb8d03/timm/models/efficientnet_blocks.py#L90
- https://github.com/google-research/google-research/blob/20736344/tunas/rematlib/mobile_model_v3.py#L433
- https://github.com/ultmaster/AceNAS/blob/46c8895f/searchspace/proxylessnas/utils.py#L100
""" """
def __init__( def __init__(
...@@ -74,20 +109,24 @@ class SeparableConv(nn.Sequential): ...@@ -74,20 +109,24 @@ class SeparableConv(nn.Sequential):
out_channels: nn.MaybeChoice[int], out_channels: nn.MaybeChoice[int],
kernel_size: nn.MaybeChoice[int] = 3, kernel_size: nn.MaybeChoice[int] = 3,
stride: int = 1, stride: int = 1,
squeeze_excite: Optional[Callable[[nn.MaybeChoice[int], nn.MaybeChoice[int]], nn.Module]] = None,
norm_layer: Optional[Callable[[int], nn.Module]] = None, norm_layer: Optional[Callable[[int], nn.Module]] = None,
activation_layer: Optional[Callable[..., nn.Module]] = None, activation_layer: Optional[Callable[..., nn.Module]] = None,
) -> None: ) -> None:
super().__init__( blocks = [
# dw # dw
ConvBNReLU(in_channels, in_channels, stride=stride, kernel_size=kernel_size, groups=in_channels, ConvBNReLU(in_channels, in_channels, stride=stride, kernel_size=kernel_size, groups=in_channels,
norm_layer=norm_layer, activation_layer=activation_layer), norm_layer=norm_layer, activation_layer=activation_layer),
# optional se
squeeze_excite(in_channels, in_channels) if squeeze_excite else nn.Identity(),
# pw-linear # pw-linear
ConvBNReLU(in_channels, out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity) ConvBNReLU(in_channels, out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity)
) ]
self.residual_connection = stride == 1 and in_channels == out_channels super().__init__(*simplify_sequential(blocks))
self.has_skip = stride == 1 and in_channels == out_channels
def forward(self, x: torch.Tensor) -> torch.Tensor: def forward(self, x: torch.Tensor) -> torch.Tensor:
if self.residual_connection: if self.has_skip:
return x + super().forward(x) return x + super().forward(x)
else: else:
return super().forward(x) return super().forward(x)
...@@ -97,14 +136,17 @@ class InvertedResidual(nn.Sequential): ...@@ -97,14 +136,17 @@ class InvertedResidual(nn.Sequential):
""" """
An Inverted Residual Block, sometimes called an MBConv Block, is a type of residual block used for image models An Inverted Residual Block, sometimes called an MBConv Block, is a type of residual block used for image models
that uses an inverted structure for efficiency reasons. that uses an inverted structure for efficiency reasons.
It was originally proposed for the `MobileNetV2 <https://arxiv.org/abs/1801.04381>`__ CNN architecture. It was originally proposed for the `MobileNetV2 <https://arxiv.org/abs/1801.04381>`__ CNN architecture.
It has since been reused for several mobile-optimized CNNs. It has since been reused for several mobile-optimized CNNs.
It follows a narrow -> wide -> narrow approach, hence the inversion. It follows a narrow -> wide -> narrow approach, hence the inversion.
It first widens with a 1x1 convolution, then uses a 3x3 depthwise convolution (which greatly reduces the number of parameters), It first widens with a 1x1 convolution, then uses a 3x3 depthwise convolution (which greatly reduces the number of parameters),
then a 1x1 convolution is used to reduce the number of channels so input and output can be added. then a 1x1 convolution is used to reduce the number of channels so input and output can be added.
Follow implementation of: This implementation is sort of a mixture between:
https://github.com/google-research/google-research/blob/20736344591f774f4b1570af64624ed1e18d2867/tunas/rematlib/mobile_model_v3.py#L453
- https://github.com/google-research/google-research/blob/20736344/tunas/rematlib/mobile_model_v3.py#L453
- https://github.com/rwightman/pytorch-image-models/blob/b7cb8d03/timm/models/efficientnet_blocks.py#L134
""" """
def __init__( def __init__(
...@@ -114,7 +156,7 @@ class InvertedResidual(nn.Sequential): ...@@ -114,7 +156,7 @@ class InvertedResidual(nn.Sequential):
expand_ratio: nn.MaybeChoice[float], expand_ratio: nn.MaybeChoice[float],
kernel_size: nn.MaybeChoice[int] = 3, kernel_size: nn.MaybeChoice[int] = 3,
stride: int = 1, stride: int = 1,
squeeze_and_excite: Optional[Callable[[nn.MaybeChoice[int]], nn.Module]] = None, squeeze_excite: Optional[Callable[[nn.MaybeChoice[int], nn.MaybeChoice[int]], nn.Module]] = None,
norm_layer: Optional[Callable[[int], nn.Module]] = None, norm_layer: Optional[Callable[[int], nn.Module]] = None,
activation_layer: Optional[Callable[..., nn.Module]] = None, activation_layer: Optional[Callable[..., nn.Module]] = None,
) -> None: ) -> None:
...@@ -123,11 +165,10 @@ class InvertedResidual(nn.Sequential): ...@@ -123,11 +165,10 @@ class InvertedResidual(nn.Sequential):
self.out_channels = out_channels self.out_channels = out_channels
assert stride in [1, 2] assert stride in [1, 2]
hidden_ch = nn.ValueChoice.to_int(round(cast(int, in_channels * expand_ratio))) hidden_ch = cast(int, make_divisible(in_channels * expand_ratio, 8))
# FIXME: check whether this equal works # NOTE: this equivalence check should also work for ValueChoice
# Residual connection is added here stride = 1 and input channels and output channels are the same. self.has_skip = stride == 1 and in_channels == out_channels
self.residual_connection = stride == 1 and in_channels == out_channels
layers: List[nn.Module] = [ layers: List[nn.Module] = [
# point-wise convolution # point-wise convolution
...@@ -138,21 +179,20 @@ class InvertedResidual(nn.Sequential): ...@@ -138,21 +179,20 @@ class InvertedResidual(nn.Sequential):
norm_layer=norm_layer, activation_layer=activation_layer), norm_layer=norm_layer, activation_layer=activation_layer),
# depth-wise # depth-wise
ConvBNReLU(hidden_ch, hidden_ch, stride=stride, kernel_size=kernel_size, groups=hidden_ch, ConvBNReLU(hidden_ch, hidden_ch, stride=stride, kernel_size=kernel_size, groups=hidden_ch,
norm_layer=norm_layer, activation_layer=activation_layer) norm_layer=norm_layer, activation_layer=activation_layer),
] # SE
squeeze_excite(
if squeeze_and_excite: cast(int, hidden_ch),
layers.append(squeeze_and_excite(hidden_ch)) cast(int, in_channels)
) if squeeze_excite is not None else nn.Identity(),
layers += [
# pw-linear # pw-linear
ConvBNReLU(hidden_ch, out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity) ConvBNReLU(hidden_ch, out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity),
] ]
super().__init__(*layers) super().__init__(*simplify_sequential(layers))
def forward(self, x: torch.Tensor) -> torch.Tensor: def forward(self, x: torch.Tensor) -> torch.Tensor:
if self.residual_connection: if self.has_skip:
return x + super().forward(x) return x + super().forward(x)
else: else:
return super().forward(x) return super().forward(x)
...@@ -199,7 +239,9 @@ class ProxylessNAS(nn.Module): ...@@ -199,7 +239,9 @@ class ProxylessNAS(nn.Module):
Following the official implementation, the inverted residual with kernel size / expand ratio variations in each layer Following the official implementation, the inverted residual with kernel size / expand ratio variations in each layer
is implemented with a :class:`nn.LayerChoice` with all-combination candidates. That means, is implemented with a :class:`nn.LayerChoice` with all-combination candidates. That means,
when used in weight sharing, these candidates will be treated as separate layers, and won't be fine-grained shared. when used in weight sharing, these candidates will be treated as separate layers, and won't be fine-grained shared.
We note that ``MobileNetV3Space`` is different in this perspective. We note that :class:`MobileNetV3Space` is different in this perspective.
This space can be implemented as part of :class:`MobileNetV3Space`, but we separate those following conventions.
""" """
def __init__(self, num_labels: int = 1000, def __init__(self, num_labels: int = 1000,
...@@ -221,11 +263,11 @@ class ProxylessNAS(nn.Module): ...@@ -221,11 +263,11 @@ class ProxylessNAS(nn.Module):
self.bn_eps = bn_eps self.bn_eps = bn_eps
self.bn_momentum = bn_momentum self.bn_momentum = bn_momentum
self.first_conv = ConvBNReLU(3, widths[0], stride=2, norm_layer=nn.BatchNorm2d) self.stem = ConvBNReLU(3, widths[0], stride=2, norm_layer=nn.BatchNorm2d)
blocks: List[nn.Module] = [ blocks: List[nn.Module] = [
# first stage is fixed # first stage is fixed
SeparableConv(widths[0], widths[1], kernel_size=3, stride=1) DepthwiseSeparableConv(widths[0], widths[1], kernel_size=3, stride=1)
] ]
# https://github.com/ultmaster/AceNAS/blob/46c8895fd8a05ffbc61a6b44f1e813f64b4f66b7/searchspace/proxylessnas/__init__.py#L21 # https://github.com/ultmaster/AceNAS/blob/46c8895fd8a05ffbc61a6b44f1e813f64b4f66b7/searchspace/proxylessnas/__init__.py#L21
...@@ -234,7 +276,7 @@ class ProxylessNAS(nn.Module): ...@@ -234,7 +276,7 @@ class ProxylessNAS(nn.Module):
# we return a builder that dynamically creates module for different `repeat_idx`. # we return a builder that dynamically creates module for different `repeat_idx`.
builder = inverted_residual_choice_builder( builder = inverted_residual_choice_builder(
[3, 6], [3, 5, 7], downsamples[stage], widths[stage - 1], widths[stage], f's{stage}') [3, 6], [3, 5, 7], downsamples[stage], widths[stage - 1], widths[stage], f's{stage}')
if stage < 6: if stage < 7:
blocks.append(nn.Repeat(builder, (1, 4), label=f's{stage}_depth')) blocks.append(nn.Repeat(builder, (1, 4), label=f's{stage}_depth'))
else: else:
# No mutation for depth in the last stage. # No mutation for depth in the last stage.
...@@ -252,7 +294,7 @@ class ProxylessNAS(nn.Module): ...@@ -252,7 +294,7 @@ class ProxylessNAS(nn.Module):
reset_parameters(self, bn_momentum=bn_momentum, bn_eps=bn_eps) reset_parameters(self, bn_momentum=bn_momentum, bn_eps=bn_eps)
def forward(self, x): def forward(self, x):
x = self.first_conv(x) x = self.stem(x)
x = self.blocks(x) x = self.blocks(x)
x = self.feature_mix_layer(x) x = self.feature_mix_layer(x)
x = self.global_avg_pooling(x) x = self.global_avg_pooling(x)
...@@ -268,6 +310,193 @@ class ProxylessNAS(nn.Module): ...@@ -268,6 +310,193 @@ class ProxylessNAS(nn.Module):
return {'classifier.weight', 'classifier.bias'} return {'classifier.weight', 'classifier.bias'}
return set() return set()
@classmethod
def fixed_arch(cls, arch: dict) -> FixedFactory:
return FixedFactory(cls, arch)
@classmethod
def load_searched_model(
cls, name: str,
pretrained: bool = False, download: bool = False, progress: bool = True
) -> nn.Module:
init_kwargs = {} # all default
if name == 'acenas-m1':
arch = {
's2_depth': 2,
's2_i0': 'k3e6',
's2_i1': 'k3e3',
's3_depth': 3,
's3_i0': 'k5e3',
's3_i1': 'k3e3',
's3_i2': 'k5e3',
's4_depth': 2,
's4_i0': 'k3e6',
's4_i1': 'k5e3',
's5_depth': 4,
's5_i0': 'k7e6',
's5_i1': 'k3e6',
's5_i2': 'k3e6',
's5_i3': 'k7e3',
's6_depth': 4,
's6_i0': 'k7e6',
's6_i1': 'k7e6',
's6_i2': 'k7e3',
's6_i3': 'k7e3',
's7_depth': 1,
's7_i0': 'k7e6'
}
elif name == 'acenas-m2':
arch = {
's2_depth': 1,
's2_i0': 'k5e3',
's3_depth': 3,
's3_i0': 'k3e6',
's3_i1': 'k3e3',
's3_i2': 'k5e3',
's4_depth': 2,
's4_i0': 'k7e6',
's4_i1': 'k5e6',
's5_depth': 4,
's5_i0': 'k5e6',
's5_i1': 'k5e3',
's5_i2': 'k5e6',
's5_i3': 'k3e6',
's6_depth': 4,
's6_i0': 'k7e6',
's6_i1': 'k5e6',
's6_i2': 'k5e3',
's6_i3': 'k5e6',
's7_depth': 1,
's7_i0': 'k7e6'
}
elif name == 'acenas-m3':
arch = {
's2_depth': 2,
's2_i0': 'k3e3',
's2_i1': 'k3e6',
's3_depth': 2,
's3_i0': 'k5e3',
's3_i1': 'k3e3',
's4_depth': 3,
's4_i0': 'k5e6',
's4_i1': 'k7e6',
's4_i2': 'k3e6',
's5_depth': 4,
's5_i0': 'k7e6',
's5_i1': 'k7e3',
's5_i2': 'k7e3',
's5_i3': 'k5e3',
's6_depth': 4,
's6_i0': 'k7e6',
's6_i1': 'k7e3',
's6_i2': 'k7e6',
's6_i3': 'k3e3',
's7_depth': 1,
's7_i0': 'k5e6'
}
elif name == 'proxyless-cpu':
arch = {
's2_depth': 4,
's2_i0': 'k3e6',
's2_i1': 'k3e3',
's2_i2': 'k3e3',
's2_i3': 'k3e3',
's3_depth': 4,
's3_i0': 'k3e6',
's3_i1': 'k3e3',
's3_i2': 'k3e3',
's3_i3': 'k5e3',
's4_depth': 2,
's4_i0': 'k3e6',
's4_i1': 'k3e3',
's5_depth': 4,
's5_i0': 'k5e6',
's5_i1': 'k3e3',
's5_i2': 'k3e3',
's5_i3': 'k3e3',
's6_depth': 4,
's6_i0': 'k5e6',
's6_i1': 'k5e3',
's6_i2': 'k5e3',
's6_i3': 'k3e3',
's7_depth': 1,
's7_i0': 'k5e6'
}
init_kwargs['base_widths'] = [40, 24, 32, 48, 88, 104, 216, 360, 1432]
elif name == 'proxyless-gpu':
arch = {
's2_depth': 1,
's2_i0': 'k5e3',
's3_depth': 2,
's3_i0': 'k7e3',
's3_i1': 'k3e3',
's4_depth': 2,
's4_i0': 'k7e6',
's4_i1': 'k5e3',
's5_depth': 3,
's5_i0': 'k5e6',
's5_i1': 'k3e3',
's5_i2': 'k5e3',
's6_depth': 4,
's6_i0': 'k7e6',
's6_i1': 'k7e6',
's6_i2': 'k7e6',
's6_i3': 'k5e6',
's7_depth': 1,
's7_i0': 'k7e6'
}
init_kwargs['base_widths'] = [40, 24, 32, 56, 112, 128, 256, 432, 1728]
elif name == 'proxyless-mobile':
arch = {
's2_depth': 2,
's2_i0': 'k5e3',
's2_i1': 'k3e3',
's3_depth': 4,
's3_i0': 'k7e3',
's3_i1': 'k3e3',
's3_i2': 'k5e3',
's3_i3': 'k5e3',
's4_depth': 4,
's4_i0': 'k7e6',
's4_i1': 'k5e3',
's4_i2': 'k5e3',
's4_i3': 'k5e3',
's5_depth': 4,
's5_i0': 'k5e6',
's5_i1': 'k5e3',
's5_i2': 'k5e3',
's5_i3': 'k5e3',
's6_depth': 4,
's6_i0': 'k7e6',
's6_i1': 'k7e6',
's6_i2': 'k7e3',
's6_i3': 'k7e3',
's7_depth': 1,
's7_i0': 'k7e6'
}
else:
raise ValueError(f'Unsupported architecture with name: {name}')
model_factory = cls.fixed_arch(arch)
model = model_factory(**init_kwargs)
if pretrained:
weight_file = load_pretrained_weight(name, download=download, progress=progress)
pretrained_weights = torch.load(weight_file)
model.load_state_dict(pretrained_weights)
return model
def reset_parameters(model, model_init='he_fout', init_div_groups=False, def reset_parameters(model, model_init='he_fout', init_div_groups=False,
bn_momentum=0.1, bn_eps=1e-5): bn_momentum=0.1, bn_eps=1e-5):
......
...@@ -7,6 +7,9 @@ import torch ...@@ -7,6 +7,9 @@ import torch
import nni.retiarii.nn.pytorch as nn import nni.retiarii.nn.pytorch as nn
from nni.retiarii import model_wrapper from nni.retiarii import model_wrapper
from .utils.fixed import FixedFactory
from .utils.pretrained import load_pretrained_weight
class ShuffleNetBlock(nn.Module): class ShuffleNetBlock(nn.Module):
""" """
...@@ -130,13 +133,13 @@ class ShuffleNetSpace(nn.Module): ...@@ -130,13 +133,13 @@ class ShuffleNetSpace(nn.Module):
Here, "k-x" means k times the number of default channels. Here, "k-x" means k times the number of default channels.
Otherwise, 1.0x is used by default. Default: false. Otherwise, 1.0x is used by default. Default: false.
affine : bool affine : bool
Apply affine to all batch norm. Default: false. Apply affine to all batch norm. Default: true.
""" """
def __init__(self, def __init__(self,
num_labels: int = 1000, num_labels: int = 1000,
channel_search: bool = False, channel_search: bool = False,
affine: bool = False): affine: bool = True):
super().__init__() super().__init__()
self.num_labels = num_labels self.num_labels = num_labels
...@@ -180,12 +183,12 @@ class ShuffleNetSpace(nn.Module): ...@@ -180,12 +183,12 @@ class ShuffleNetSpace(nn.Module):
mid_channels = cast(nn.MaybeChoice[int], mid_channels) mid_channels = cast(nn.MaybeChoice[int], mid_channels)
choice_block = nn.LayerChoice([ choice_block = nn.LayerChoice(dict(
ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=3, stride=stride, affine=affine), k3=ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=3, stride=stride, affine=affine),
ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=5, stride=stride, affine=affine), k5=ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=5, stride=stride, affine=affine),
ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=7, stride=stride, affine=affine), k7=ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=7, stride=stride, affine=affine),
ShuffleXceptionBlock(in_channels, out_channels, mid_channels=mid_channels, stride=stride, affine=affine) xcep=ShuffleXceptionBlock(in_channels, out_channels, mid_channels=mid_channels, stride=stride, affine=affine)
], label=f'layer_{global_block_idx}') ), label=f'layer_{global_block_idx}')
feature_blocks.append(choice_block) feature_blocks.append(choice_block)
self.features = nn.Sequential(*feature_blocks) self.features = nn.Sequential(*feature_blocks)
...@@ -244,3 +247,51 @@ class ShuffleNetSpace(nn.Module): ...@@ -244,3 +247,51 @@ class ShuffleNetSpace(nn.Module):
torch.nn.init.normal_(m.weight, 0, 0.01) torch.nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None: if m.bias is not None:
torch.nn.init.constant_(m.bias, 0) torch.nn.init.constant_(m.bias, 0)
@classmethod
def fixed_arch(cls, arch: dict) -> FixedFactory:
return FixedFactory(cls, arch)
@classmethod
def load_searched_model(
cls, name: str,
pretrained: bool = False, download: bool = False, progress: bool = True
) -> nn.Module:
if name == 'spos':
# NOTE: Need BGR tensor, with no normalization
# https://github.com/ultmaster/spacehub-conversion/blob/371a4fd6646b4e11eda3f61187f7c9a1d484b1ca/cutils.py#L63
arch = {
'layer_1': 'k7',
'layer_2': 'k5',
'layer_3': 'k3',
'layer_4': 'k5',
'layer_5': 'k7',
'layer_6': 'k3',
'layer_7': 'k7',
'layer_8': 'k3',
'layer_9': 'k7',
'layer_10': 'k3',
'layer_11': 'k7',
'layer_12': 'xcep',
'layer_13': 'k3',
'layer_14': 'k3',
'layer_15': 'k3',
'layer_16': 'k3',
'layer_17': 'xcep',
'layer_18': 'k7',
'layer_19': 'xcep',
'layer_20': 'xcep'
}
else:
raise ValueError(f'Unsupported architecture with name: {name}')
model_factory = cls.fixed_arch(arch)
model = model_factory()
if pretrained:
weight_file = load_pretrained_weight(name, download=download, progress=progress)
pretrained_weights = torch.load(weight_file)
model.load_state_dict(pretrained_weights)
return model
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""This file should be merged to nni/retiarii/fixed.py"""
from typing import Type
from nni.retiarii.utils import ContextStack
class FixedFactory:
"""Make a model space ready to create a fixed model.
Examples
--------
>>> factory = FixedFactory(ModelSpaceClass, {"choice1": 3})
>>> model = factory(channels=16, classes=10)
"""
# TODO: mutations on ``init_args`` and ``init_kwargs`` themselves are not supported.
def __init__(self, cls: Type, arch: dict):
self.cls = cls
self.arch = arch
def __call__(self, *init_args, **init_kwargs):
with ContextStack('fixed', self.arch):
return self.cls(*init_args, **init_kwargs)
def __repr__(self):
return f'FixedFactory(class={self.cls}, arch={self.arch})'
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""
Weights available in this file are processed with scripts in https://github.com/ultmaster/spacehub-conversion,
and uploaded with :func:`nni.common.blob_utils.upload_file`.
"""
import os
from nni.common.blob_utils import NNI_BLOB, nni_cache_home, load_or_download_file
PRETRAINED_WEIGHT_URLS = {
# proxylessnas
'acenas-m1': f'{NNI_BLOB}/nashub/acenas-m1-e215f1b8.pth',
'acenas-m2': f'{NNI_BLOB}/nashub/acenas-m2-a8ee9e8f.pth',
'acenas-m3': f'{NNI_BLOB}/nashub/acenas-m3-66a5ed7b.pth',
'proxyless-cpu': f'{NNI_BLOB}/nashub/proxyless-cpu-2df03430.pth',
'proxyless-gpu': f'{NNI_BLOB}/nashub/proxyless-gpu-dbe6dd15.pth',
'proxyless-mobile': f'{NNI_BLOB}/nashub/proxyless-mobile-8668a978.pth',
# mobilenetv3
'mobilenetv3-large-100': f'{NNI_BLOB}/nashub/mobilenetv3-large-100-420e040a.pth',
'mobilenetv3-small-050': f'{NNI_BLOB}/nashub/mobilenetv3-small-050-05cb7a80.pth',
'mobilenetv3-small-075': f'{NNI_BLOB}/nashub/mobilenetv3-small-075-c87d8acb.pth',
'mobilenetv3-small-100': f'{NNI_BLOB}/nashub/mobilenetv3-small-100-8332faac.pth',
'cream-014': f'{NNI_BLOB}/nashub/cream-014-060aea24.pth',
'cream-043': f'{NNI_BLOB}/nashub/cream-043-bec949e1.pth',
'cream-114': f'{NNI_BLOB}/nashub/cream-114-fc272590.pth',
'cream-287': f'{NNI_BLOB}/nashub/cream-287-a0fcba33.pth',
'cream-481': f'{NNI_BLOB}/nashub/cream-481-d85779b6.pth',
'cream-604': f'{NNI_BLOB}/nashub/cream-604-9ee425f7.pth',
# nasnet
'darts-v2': f'{NNI_BLOB}/nashub/darts-v2-5465b0d2.pth',
# spos
'spos': f'{NNI_BLOB}/nashub/spos-0b17f6fc.pth',
}
def load_pretrained_weight(name: str, **kwargs) -> str:
if name not in PRETRAINED_WEIGHT_URLS:
raise ValueError(f'"{name}" do not have a valid pretrained weight file.')
url = PRETRAINED_WEIGHT_URLS[name]
local_path = os.path.join(nni_cache_home(), 'nashub', url.split('/')[-1])
load_or_download_file(local_path, url, **kwargs)
return local_path
...@@ -36,6 +36,8 @@ class Repeat(Mutable): ...@@ -36,6 +36,8 @@ class Repeat(Mutable):
meaning that the block will be repeated at least ``min`` times and at most ``max`` times. meaning that the block will be repeated at least ``min`` times and at most ``max`` times.
If a ValueChoice, it should choose from a series of positive integers. If a ValueChoice, it should choose from a series of positive integers.
*New in v2.8*: Minimum depth can be 0. But this feature is NOT supported on graph engine.
Examples Examples
-------- --------
Block() will be deep copied and repeated 3 times. :: Block() will be deep copied and repeated 3 times. ::
...@@ -123,7 +125,7 @@ class Repeat(Mutable): ...@@ -123,7 +125,7 @@ class Repeat(Mutable):
self.depth_choice = depth self.depth_choice = depth
else: else:
raise TypeError(f'Unsupported "depth" type: {type(depth)}') raise TypeError(f'Unsupported "depth" type: {type(depth)}')
assert self.max_depth >= self.min_depth > 0 assert self.max_depth >= self.min_depth >= 0 and self.max_depth >= 1, f'Depth of {self.min_depth} to {self.max_depth} is invalid.'
self.blocks = nn.ModuleList(self._replicate_and_instantiate(blocks, self.max_depth)) self.blocks = nn.ModuleList(self._replicate_and_instantiate(blocks, self.max_depth))
@property @property
...@@ -139,13 +141,13 @@ class Repeat(Mutable): ...@@ -139,13 +141,13 @@ class Repeat(Mutable):
def _replicate_and_instantiate(blocks, repeat): def _replicate_and_instantiate(blocks, repeat):
if not isinstance(blocks, list): if not isinstance(blocks, list):
if isinstance(blocks, nn.Module): if isinstance(blocks, nn.Module):
blocks = [blocks] + [copy.deepcopy(blocks) for _ in range(repeat - 1)] blocks = [blocks if i == 0 else copy.deepcopy(blocks) for i in range(repeat)]
else: else:
blocks = [blocks for _ in range(repeat)] blocks = [blocks for _ in range(repeat)]
assert len(blocks) > 0
assert repeat <= len(blocks), f'Not enough blocks to be used. {repeat} expected, only found {len(blocks)}.' assert repeat <= len(blocks), f'Not enough blocks to be used. {repeat} expected, only found {len(blocks)}.'
blocks = blocks[:repeat] if repeat < len(blocks):
if not isinstance(blocks[0], nn.Module): blocks = blocks[:repeat]
if len(blocks) > 0 and not isinstance(blocks[0], nn.Module):
blocks = [b(i) for i, b in enumerate(blocks)] blocks = [b(i) for i, b in enumerate(blocks)]
return blocks return blocks
......
...@@ -843,6 +843,27 @@ class Python(GraphIR): ...@@ -843,6 +843,27 @@ class Python(GraphIR):
@unittest.skip @unittest.skip
def test_valuechoice_getitem_functional_expression(self): ... def test_valuechoice_getitem_functional_expression(self): ...
def test_repeat_zero(self):
class AddOne(nn.Module):
def forward(self, x):
return x + 1
@model_wrapper
class Net(nn.Module):
def __init__(self):
super().__init__()
self.block = nn.Repeat(AddOne(), (0, 3))
def forward(self, x):
return self.block(x)
model, mutators = self._get_model_with_mutators(Net())
self.assertEqual(len(mutators), 1 + self.repeat_incr + self.value_choice_incr)
samplers = [EnumerateSampler() for _ in range(len(mutators))]
for target in [0, 1, 2, 3]:
new_model = _apply_all_mutators(model, mutators, samplers)
self.assertTrue((self._get_converted_pytorch_model(new_model)(torch.zeros(1, 16)) == target).all())
def test_hyperparameter_choice(self): def test_hyperparameter_choice(self):
@model_wrapper @model_wrapper
class Net(nn.Module): class Net(nn.Module):
......
...@@ -13,7 +13,7 @@ import nni ...@@ -13,7 +13,7 @@ import nni
import nni.runtime.platform.test import nni.runtime.platform.test
import nni.retiarii.evaluator.pytorch.lightning as pl import nni.retiarii.evaluator.pytorch.lightning as pl
import nni.retiarii.hub.pytorch as searchspace import nni.retiarii.hub.pytorch as searchspace
from nni.retiarii.utils import ContextStack from nni.retiarii import fixed_arch
from nni.retiarii.execution.utils import _unpack_if_only_one from nni.retiarii.execution.utils import _unpack_if_only_one
from nni.retiarii.mutator import InvalidMutation, Sampler from nni.retiarii.mutator import InvalidMutation, Sampler
from nni.retiarii.nn.pytorch.mutator import extract_mutation_from_pt_module from nni.retiarii.nn.pytorch.mutator import extract_mutation_from_pt_module
...@@ -61,7 +61,7 @@ def _test_searchspace_on_dataset(searchspace, dataset='cifar10', arch=None): ...@@ -61,7 +61,7 @@ def _test_searchspace_on_dataset(searchspace, dataset='cifar10', arch=None):
arch = {mut.mutator.label: _unpack_if_only_one(mut.samples) for mut in model.history} arch = {mut.mutator.label: _unpack_if_only_one(mut.samples) for mut in model.history}
print('Selected model:', arch) print('Selected model:', arch)
with ContextStack('fixed', arch): with fixed_arch(arch):
model = model.python_class(**model.python_init_params) model = model.python_class(**model.python_init_params)
if dataset == 'cifar10': if dataset == 'cifar10':
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment