Searched model zoo in NAS space hub (#4831)

e8dc4562 · Yuge Zhang · GitHub · fe89e5af · e8dc4562 · e8dc4562
Unverified Commit e8dc4562 authored May 13, 2022 by Yuge Zhang Committed by GitHub May 13, 2022
16 changed files
--- a/dependencies/required.txt
+++ b/dependencies/required.txt
@@ -18,6 +18,7 @@ schema
 scikit-learn >= 0.24.1
 scipy < 1.8 ; python_version < "3.8"
 scipy ; python_version >= "3.8"
+tqdm
 typeguard
 typing_extensions >= 4.0.0
 websockets >= 10.1
--- a/nni/common/blob_utils.py
+++ b/nni/common/blob_utils.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+import logging
+import hashlib
+import os
+import shutil
+import subprocess
+import tempfile
+from pathlib import Path
+from typing import Optional
+import requests
+import tqdm
+__all__ = ['NNI_BLOB', 'load_or_download_file', 'upload_file', 'nni_cache_home']
+# Blob that contains some downloadable files.
+NNI_BLOB = 'https://nni.blob.core.windows.net'
+# Override these environment vars to move your cache.
+ENV_NNI_HOME = 'NNI_HOME'
+ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
+DEFAULT_CACHE_DIR = '~/.cache'
+def nni_cache_home() -> str:
+    return os.path.expanduser(
+        os.getenv(ENV_NNI_HOME,
+                  os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'nni')))
+def load_or_download_file(local_path: str, download_url: str, download: bool = False, progress: bool = True) -> None:
+    """Download the ``download_url`` to ``local_path``, and check its hash.
+    If ``local_path`` already exists, and hash is checked, do nothing.
+    """
+    f = None
+    hash_prefix = Path(local_path).stem.split('-')[-1]
+    _logger = logging.getLogger(__name__)
+    try:
+        sha256 = hashlib.sha256()
+        if Path(local_path).exists():
+            _logger.info('"%s" already exists. Checking hash.', local_path)
+            with Path(local_path).open('rb') as fr:
+                while True:
+                    chunk = fr.read(8192)
+                    if len(chunk) == 0:
+                        break
+                    sha256.update(chunk)
+        elif download:
+            _logger.info('"%s" does not exist. Downloading "%s"', local_path, download_url)
+            # Follow download implementation in torchvision:
+            # We deliberately save it in a temp file and move it after
+            # download is complete. This prevents a local working checkpoint
+            # being overridden by a broken download.
+            dst_dir = Path(local_path).parent
+            dst_dir.mkdir(exist_ok=True, parents=True)
+            f = tempfile.NamedTemporaryFile(delete=False, dir=dst_dir)
+            r = requests.get(download_url, stream=True)
+            total_length: Optional[str] = r.headers.get('content-length')
+            assert total_length is not None, f'Content length is not found in the response of {download_url}'
+            with tqdm.tqdm(total=int(total_length), disable=not progress,
+                           unit='B', unit_scale=True, unit_divisor=1024) as pbar:
+                for chunk in r.iter_content(8192):
+                    f.write(chunk)
+                    sha256.update(chunk)
+                    pbar.update(len(chunk))
+                    f.flush()
+        else:
+            raise FileNotFoundError(
+                'Download is not enabled, and file does not exist: {}. Please set download=True.'.format(local_path)
+            )
+        digest = sha256.hexdigest()
+        if not digest.startswith(hash_prefix):
+            raise RuntimeError('Invalid hash value (expected "{}", got "{}")'.format(hash_prefix, digest))
+        if f is not None:
+            shutil.move(f.name, local_path)
+    finally:
+        if f is not None:
+            f.close()
+            if os.path.exists(f.name):
+                os.remove(f.name)
+def upload_file(local_path: str, destination_path: str, sas_token: str) -> str:
+    """For NNI maintainers to add updated static files to the Azure blob easily.
+    In most cases, you don't need to calculate the hash on your own, it will be automatically inserted.
+    For example, if you write ``https://xxx.com/myfile.zip``, the uploaded file will look like
+    ``https://xxx.com/myfile-da5f43b7.zip``.
+    Need to have `azcopy installed <https://docs.microsoft.com/en-us/azure/storage/common/storage-ref-azcopy>`_,
+    and a SAS token for the destination storage (``?`` should be included as prefix of token).
+    Returns a string which is the uploaded path.
+    """
+    _logger = logging.getLogger(__name__)
+    sha256 = hashlib.sha256()
+    with Path(local_path).open('rb') as fr:
+        while True:
+            chunk = fr.read(8192)
+            if len(chunk) == 0:
+                break
+            sha256.update(chunk)
+    digest = sha256.hexdigest()
+    hash_prefix = digest[:8]
+    _logger.info('Hash of %s is %s', local_path, digest)
+    stem, suffix = destination_path.rsplit('.', 1)
+    if not stem.endswith('-' + hash_prefix):
+        destination_path = stem + '-' + hash_prefix + '.' + suffix
+    subprocess.run(['azcopy', 'copy', local_path, destination_path + sas_token], check=True)
+    return destination_path
--- a/nni/nas/benchmarks/constants.py
+++ b/nni/nas/benchmarks/constants.py
@@ -3,24 +3,21 @@
 import os
+from nni.common.blob_utils import NNI_BLOB, nni_cache_home
 ENV_NASBENCHMARK_DIR = 'NASBENCHMARK_DIR'
-ENV_NNI_HOME = 'NNI_HOME'
-ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
-DEFAULT_CACHE_DIR = '~/.cache'
 def _get_nasbenchmark_dir():
-    nni_home = os.path.expanduser(
+    nni_home = nni_cache_home()
-        os.getenv(ENV_NNI_HOME,
-                  os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'nni')))
    return os.getenv(ENV_NASBENCHMARK_DIR, os.path.join(nni_home, 'nasbenchmark'))
 DATABASE_DIR = _get_nasbenchmark_dir()
 DB_URLS = {
-    'nasbench101': 'https://nni.blob.core.windows.net/nasbenchmark/nasbench101-209f5694.db',
+    'nasbench101': f'{NNI_BLOB}/nasbenchmark/nasbench101-209f5694.db',
-    'nasbench201': 'https://nni.blob.core.windows.net/nasbenchmark/nasbench201-b2b60732.db',
+    'nasbench201': f'{NNI_BLOB}/nasbenchmark/nasbench201-b2b60732.db',
-    'nds': 'https://nni.blob.core.windows.net/nasbenchmark/nds-5745c235.db'
+    'nds': f'{NNI_BLOB}/nasbenchmark/nds-5745c235.db'
 }
--- a/nni/nas/benchmarks/utils.py
+++ b/nni/nas/benchmarks/utils.py
@@ -2,19 +2,13 @@
 # Licensed under the MIT license.
 import functools
-import hashlib
 import json
-import logging
 import os
-import shutil
-import tempfile
-from pathlib import Path
-from typing import Optional
-import requests
-import tqdm
 from playhouse.sqlite_ext import SqliteExtDatabase
+from nni.common.blob_utils import load_or_download_file
 from .constants import DB_URLS, DATABASE_DIR
@@ -24,60 +18,6 @@ json_dumps = functools.partial(json.dumps, sort_keys=True)
 _loaded_benchmarks = {}
-def load_or_download_file(local_path: str, download_url: str, download: bool = False, progress: bool = True):
-    f = None
-    hash_prefix = Path(local_path).stem.split('-')[-1]
-    _logger = logging.getLogger(__name__)
-    try:
-        sha256 = hashlib.sha256()
-        if Path(local_path).exists():
-            _logger.info('"%s" already exists. Checking hash.', local_path)
-            with Path(local_path).open('rb') as fr:
-                while True:
-                    chunk = fr.read(8192)
-                    if len(chunk) == 0:
-                        break
-                    sha256.update(chunk)
-        elif download:
-            _logger.info('"%s" does not exist. Downloading "%s"', local_path, download_url)
-            # Follow download implementation in torchvision:
-            # We deliberately save it in a temp file and move it after
-            # download is complete. This prevents a local working checkpoint
-            # being overridden by a broken download.
-            dst_dir = Path(local_path).parent
-            dst_dir.mkdir(exist_ok=True, parents=True)
-            f = tempfile.NamedTemporaryFile(delete=False, dir=dst_dir)
-            r = requests.get(download_url, stream=True)
-            total_length: Optional[str] = r.headers.get('content-length')
-            assert total_length is not None, f'Content length is not found in the response of {download_url}'
-            with tqdm.tqdm(total=int(total_length), disable=not progress,
-                           unit='B', unit_scale=True, unit_divisor=1024) as pbar:
-                for chunk in r.iter_content(8192):
-                    f.write(chunk)
-                    sha256.update(chunk)
-                    pbar.update(len(chunk))
-                    f.flush()
-        else:
-            raise FileNotFoundError('Download is not enabled, but file still does not exist: {}'.format(local_path))
-        digest = sha256.hexdigest()
-        if not digest.startswith(hash_prefix):
-            raise RuntimeError('Invalid hash value (expected "{}", got "{}")'.format(hash_prefix, digest))
-        if f is not None:
-            shutil.move(f.name, local_path)
-    finally:
-        if f is not None:
-            f.close()
-            if os.path.exists(f.name):
-                os.remove(f.name)
 def load_benchmark(benchmark: str) -> SqliteExtDatabase:
    """
    Load a benchmark as a database.

--- a/nni/retiarii/evaluator/pytorch/lightning.py
+++ b/nni/retiarii/evaluator/pytorch/lightning.py
@@ -237,7 +237,7 @@ class _SupervisedLearningModule(LightningModule):
 class _AccuracyWithLogits(torchmetrics.Accuracy):
    def update(self, pred, target):
-        return super().update(nn_functional.softmax(pred), target)
+        return super().update(nn_functional.softmax(pred, dim=-1), target)
 @nni.trace

--- a/nni/retiarii/hub/README.md
+++ b/nni/retiarii/hub/README.md
@@ -10,20 +10,74 @@ For further motivations and plans, please see https://github.com/microsoft/nni/i
 1. Runnable
 2. Load checkpoint of searched architecture and evaluate
-3. Reproduce searched architecture
+3. Reproduce "retrain" (i.e., training from scratch of searched architecture)
 4. Runnable with built-in algos
 5. Reproduce result with at least one algo
 |                        | 1      | 2      | 3      | 4      | 5      |
 |------------------------|--------|--------|--------|--------|--------|
-| NasBench101            | Y      |        |        |        |        |
+| NasBench101            | Y      | -      |        |        |        |
-| NasBench201            | Y      |        |        |        |        |
+| NasBench201            | Y      | -      |        |        |        |
-| NASNet                 | Y      |        |        |        |        |
+| NASNet                 | Y      | -      |        |        |        |
-| ENAS                   | Y      |        |        |        |        |
+| ENAS                   | Y      | -      |        |        |        |
-| AmoebaNet              | Y      |        |        |        |        |
+| AmoebaNet              | Y      | -      |        |        |        |
-| PNAS                   | Y      |        |        |        |        |
+| PNAS                   | Y      | -      |        |        |        |
-| DARTS                  | Y      |        |        |        |        |
+| DARTS                  | Y      | Y      |        |        |        |
-| ProxylessNAS           | Y      |        |        |        |        |
+| ProxylessNAS           | Y      | Y      |        |        |        |
-| MobileNetV3Space       | Y      |        |        |        |        |
+| MobileNetV3Space       | Y      | Y      |        |        |        |
-| ShuffleNetSpace        | Y      |        |        |        |        |
+| ShuffleNetSpace        | Y      | Y      |        |        |        |
-| ShuffleNetSpace (ch)   | Y      |        |        |        |        |
+| ShuffleNetSpace (ch)   | Y      | -      |        |        |        |
+* `-`: Result unavailable, because lacking published checkpoints / architectures.
+* NASNet, ENAS, AmoebaNet, PNAS, DARTS are based on the same implementation, with configuration differences.
+* NasBench101 and 201 will directly proceed to stage 3 as it's cheaper to train them than to find a checkpoint.
+## Space Planned
+We welcome suggestions and contributions.
+- [AutoFormer](https://openaccess.thecvf.com/content/ICCV2021/html/Chen_AutoFormer_Searching_Transformers_for_Visual_Recognition_ICCV_2021_paper.html), [PR under review](https://github.com/microsoft/nni/pull/4551)
+- [NAS-BERT](https://arxiv.org/abs/2105.14444)
+- Something speech, like [LightSpeech](https://arxiv.org/abs/2102.04040)
+## Searched Model Zoo
+Create a searched model with pretrained weights like the following:
+```python
+model = MobileNetV3Space.load_searched_model('mobilenetv3-small-075', pretrained=True, download=True)
+evaluate(model, imagenet_data)
+```
+``MobileNetV3Space`` can be replaced with any search space listed above, and ``mobilenetv3-small-075`` can be any model listed below.
+See an example of ``evaluate`` [here](https://github.com/rwightman/pytorch-image-models/blob/d30685c283137b4b91ea43c4e595c964cd2cb6f0/train.py#L778).
+| Search space     | Model                 | Dataset  | Metric | Eval Protocol                |
+|------------------|-----------------------|----------|--------|------------------------------|
+| ProxylessNAS     | acenas-m1             | ImageNet | 75.176 | Default                      |
+| ProxylessNAS     | acenas-m2             | ImageNet | 75.0   | Default                      |
+| ProxylessNAS     | acenas-m3             | ImageNet | 75.118 | Default                      |
+| ProxylessNAS     | proxyless-cpu         | ImageNet | 75.29  | Default                      |
+| ProxylessNAS     | proxyless-gpu         | ImageNet | 75.084 | Default                      |
+| ProxylessNAS     | proxyless-mobile      | ImageNet | 74.594 | Default                      |
+| MobileNetV3Space | mobilenetv3-large-100 | ImageNet | 75.768 | Bicubic interpolation        |
+| MobileNetV3Space | mobilenetv3-small-050 | ImageNet | 57.906 | Bicubic interpolation        |
+| MobileNetV3Space | mobilenetv3-small-075 | ImageNet | 65.24  | Bicubic interpolation        |
+| MobileNetV3Space | mobilenetv3-small-100 | ImageNet | 67.652 | Bicubic interpolation        |
+| MobileNetV3Space | cream-014             | ImageNet | 53.74  | Test image size = 64         |
+| MobileNetV3Space | cream-043             | ImageNet | 66.256 | Test image size = 96         |
+| MobileNetV3Space | cream-114             | ImageNet | 72.514 | Test image size = 160        |
+| MobileNetV3Space | cream-287             | ImageNet | 77.52  | Default                      |
+| MobileNetV3Space | cream-481             | ImageNet | 79.078 | Default                      |
+| MobileNetV3Space | cream-604             | ImageNet | 79.92  | Default                      |
+| DARTS            | darts-v2              | CIFAR-10 | 97.37  | Default                      |
+| ShuffleNetSpace  | spos                  | ImageNet | 74.14  | BGR tensor; no normalization |
+The metrics listed above are obtained by evaluating the checkpoints provided the original author and converted to NNI NAS format with [these scripts](https://github.com/ultmaster/spacehub-conversion). Do note that some metrics can be higher / lower than the original report, because there could be subtle differences between data preprocessing, operation implementation (e.g., 3rd-party hswish vs ``nn.Hardswish``), or even library versions we are using. But most of these errors are acceptable (~0.1%). We will retrain these architectures in a reproducible and fair training settings, and update these results when the training is ready.
+Latency / FLOPs data are missing in the table. Measuring them would be another task.
+Several more models to be added:
+- FBNet on MobileNetV3Space
--- a/nni/retiarii/hub/pytorch/mobilenetv3.py
+++ b/nni/retiarii/hub/pytorch/mobilenetv3.py
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
-from typing import Tuple, Optional, Callable, cast
+from functools import partial
+from typing import Tuple, Optional, Callable, Union, List, Type, cast
+import torch
 import nni.retiarii.nn.pytorch as nn
 from nni.retiarii import model_wrapper
+from nni.typehint import Literal
-from .proxylessnas import ConvBNReLU, InvertedResidual, SeparableConv, make_divisible, reset_parameters
+from .proxylessnas import ConvBNReLU, InvertedResidual, DepthwiseSeparableConv, make_divisible, reset_parameters
+from .utils.fixed import FixedFactory
+from .utils.pretrained import load_pretrained_weight
-class h_sigmoid(nn.Module):
+class SqueezeExcite(nn.Module):
-    def __init__(self, inplace=True):
+    """Squeeze-and-excite layer.
-        super(h_sigmoid, self).__init__()
-        self.relu = nn.ReLU6(inplace=inplace)
-    def forward(self, x):
+    We can't use the op from ``torchvision.ops`` because it's not (yet) properly wrapped,
-        return self.relu(x + 3) / 6
+    and ValueChoice couldn't be processed.
-class h_swish(nn.Module):
-    def __init__(self, inplace=True):
-        super(h_swish, self).__init__()
-        self.sigmoid = h_sigmoid(inplace=inplace)
-    def forward(self, x):
-        return x * self.sigmoid(x)
+    Reference:
-class SELayer(nn.Module):
+    - https://github.com/rwightman/pytorch-image-models/blob/b7cb8d03/timm/models/efficientnet_blocks.py#L26
-    """Squeeze-and-excite layer."""
+    - https://github.com/d-li14/mobilenetv3.pytorch/blob/3e6938cedcbbc5ee5bc50780ea18e644702d85fc/mobilenetv3.py#L53
+    """
    def __init__(self,
                 channels: int,
-                 reduction: int = 4,
+                 reduction_ratio: float = 0.25,
+                 gate_layer: Optional[Callable[..., nn.Module]] = None,
                 activation_layer: Optional[Callable[..., nn.Module]] = None):
        super().__init__()
-        if activation_layer is None:
-            activation_layer = nn.Sigmoid
+        rd_channels = make_divisible(channels * reduction_ratio, 8)
-        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        gate_layer = gate_layer or nn.Hardsigmoid
-        self.fc = nn.Sequential(
+        activation_layer = activation_layer or nn.ReLU
-            nn.Linear(channels, make_divisible(channels // reduction, 8)),
+        self.conv_reduce = nn.Conv2d(channels, rd_channels, 1, bias=True)
-            nn.ReLU(inplace=True),
+        self.act1 = activation_layer(inplace=True)
-            nn.Linear(make_divisible(channels // reduction, 8), channels),
+        self.conv_expand = nn.Conv2d(rd_channels, channels, 1, bias=True)
-            activation_layer()
+        self.gate = gate_layer()
-        )
    def forward(self, x):
-        b, c, _, _ = x.size()
+        x_se = x.mean((2, 3), keepdim=True)
-        y = self.avg_pool(x).view(b, c)
+        x_se = self.conv_reduce(x_se)
-        y = self.fc(y).view(b, c, 1, 1)
+        x_se = self.act1(x_se)
-        return x * y
+        x_se = self.conv_expand(x_se)
+        return x * self.gate(x_se)
+def _se_or_skip(hidden_ch: int, input_ch: int, optional: bool, se_from_exp: bool, label: str) -> nn.Module:
+    ch = hidden_ch if se_from_exp else input_ch
+    if optional:
+        return nn.LayerChoice({
+            'identity': nn.Identity(),
+            'se': SqueezeExcite(ch)
+        }, label=label)
+    else:
+        return SqueezeExcite(ch)
+def _act_fn(act_alias: Literal['hswish', 'swish', 'relu']) -> Type[nn.Module]:
+    if act_alias == 'hswish':
+        return nn.Hardswish
+    elif act_alias == 'swish':
+        return nn.SiLU
+    elif act_alias == 'relu':
+        return nn.ReLU
+    else:
+        raise ValueError(f'Unsupported act alias: {act_alias}')
 @model_wrapper
@@ -64,92 +83,582 @@ class MobileNetV3Space(nn.Module):
    We use the following snipppet as reference.
    https://github.com/google-research/google-research/blob/20736344591f774f4b1570af64624ed1e18d2867/tunas/mobile_search_space_v3.py#L728
+    We have ``num_blocks`` which equals to the length of ``self.blocks`` (the main body of the network).
+    For simplicity, the following parameter specification assumes ``num_blocks`` equals 8 (body + head).
+    If a shallower body is intended, arrays including ``base_widths``, ``squeeze_excite``, ``depth_range``,
+    ``stride``, ``activation`` should also be shortened accordingly.
+    Parameters
+    ----------
+    num_labels
+        Dimensions for classification head.
+    base_widths
+        Widths of each stage, from stem, to body, to head.
+        Length should be 9, i.e., ``num_blocks + 1`` (because there is a stem width in front).
+    width_multipliers
+        A range of widths multiplier to choose from. The choice is independent for each stage.
+        Or it can be a fixed float. This will be applied on ``base_widths``,
+        and we would also make sure that widths can be divided by 8.
+    expand_ratios
+        A list of expand ratios to choose from. Independent for every **block**.
+    squeeze_excite
+        Indicating whether the current stage can have an optional SE layer.
+        Expect array of length 6 for stage 0 to 5. Each element can be one of ``force``, ``optional``, ``none``.
+    depth_range
+        A range (e.g., ``(1, 4)``),
+        or a list of range (e.g., ``[(1, 3), (1, 4), (1, 4), (1, 3), (0, 2)]``).
+        If a list, the length should be 5. The depth are specified for stage 1 to 5.
+    stride
+        Stride for all stages (including stem and head). Length should be same as ``base_widths``.
+    activation
+        Activation (class) for all stages. Length is same as ``base_widths``.
+    se_from_exp
+        Calculate SE channel reduction from expanded (mid) channels.
+    dropout_rate
+        Dropout rate at classification head.
+    bn_eps
+        Epsilon of batch normalization.
+    bn_momentum
+        Momentum of batch normalization.
    """
-    def __init__(self, num_labels: int = 1000,
+    widths: List[Union[nn.ChoiceOf[int], int]]
-                 base_widths: Tuple[int, ...] = (16, 16, 32, 64, 128, 256, 512, 1024),
+    depth_range: List[Tuple[int, int]]
-                 width_multipliers: Tuple[float, ...] = (0.5, 0.625, 0.75, 1.0, 1.25, 1.5, 2.0),
-                 expand_ratios: Tuple[int, ...] = (1, 2, 3, 4, 5, 6),
+    def __init__(
-                 dropout_rate: float = 0.2,
+        self, num_labels: int = 1000,
-                 bn_eps: float = 1e-3,
+        base_widths: Tuple[int, ...] = (16, 16, 16, 32, 64, 128, 256, 512, 1024),
-                 bn_momentum: float = 0.1):
+        width_multipliers: Union[Tuple[float, ...], float] = (0.5, 0.625, 0.75, 1.0, 1.25, 1.5, 2.0),
+        expand_ratios: Tuple[float, ...] = (1., 2., 3., 4., 5., 6.),
+        squeeze_excite: Tuple[Literal['force', 'optional', 'none'], ...] = (
+            'none', 'none', 'optional', 'none', 'optional', 'optional'
+        ),
+        depth_range: Union[List[Tuple[int, int]], Tuple[int, int]] = (1, 4),
+        stride: Tuple[int, ...] = (2, 1, 2, 2, 2, 1, 2, 1, 1),
+        activation: Tuple[Literal['hswish', 'swish', 'relu'], ...] = (
+            'hswish', 'relu', 'relu', 'relu', 'hswish', 'hswish', 'hswish', 'hswish', 'hswish'
+        ),
+        se_from_exp: bool = True,
+        dropout_rate: float = 0.2,
+        bn_eps: float = 1e-3,
+        bn_momentum: float = 0.1
+    ):
        super().__init__()
-        self.widths = cast(nn.ChoiceOf[int], [
+        self.num_blocks = len(base_widths) - 1  # without stem, equal to len(self.blocks)
-            nn.ValueChoice([make_divisible(base_width * mult, 8) for mult in width_multipliers], label=f'width_{i}')
+        assert self.num_blocks >= 4
-            for i, base_width in enumerate(base_widths)
-        ])
+        assert len(base_widths) == len(stride) == len(activation) == self.num_blocks + 1
+        # The final two blocks can't have SE
+        assert len(squeeze_excite) == self.num_blocks - 2 and all(se in ['force', 'optional', 'none'] for se in squeeze_excite)
+        # The first and final two blocks can't have variational depth
+        if isinstance(depth_range[0], int):
+            depth_range = cast(Tuple[int, int], depth_range)
+            assert len(depth_range) == 2 and depth_range[1] >= depth_range[0] >= 1
+            self.depth_range = [depth_range] * (self.num_blocks - 3)
+        else:
+            assert len(depth_range) == self.num_blocks - 3
+            self.depth_range = cast(List[Tuple[int, int]], depth_range)
+            for d in self.depth_range:
+                d = cast(Tuple[int, int], d)
+                # pylint: disable=unsubscriptable-object
+                assert len(d) == 2 and d[1] >= d[0] >= 1, f'{d} does not satisfy depth constraints'
+        self.widths = []
+        for i, base_width in enumerate(base_widths):
+            if isinstance(width_multipliers, float):
+                self.widths.append(make_divisible(base_width * width_multipliers, 8))
+            else:
+                self.widths.append(
+                    # According to tunas, stem and stage 0 share one width multiplier
+                    # https://github.com/google-research/google-research/blob/20736344/tunas/mobile_search_space_v3.py#L791
+                    make_divisible(
+                        nn.ValueChoice(list(width_multipliers), label=f's{max(i - 1, 0)}_width_mult') * base_width, 8
+                    )
+                )
        self.expand_ratios = expand_ratios
+        self.se_from_exp = se_from_exp
-        blocks = [
+        # NOTE: The built-in hardswish produces slightly different output from 3rd-party implementation
-            # Stem
+        # But I guess it doesn't really matter.
-            ConvBNReLU(
+        # https://github.com/rwightman/pytorch-image-models/blob/b7cb8d03/timm/models/layers/activations.py#L79
-                3, self.widths[0],
-                nn.ValueChoice([3, 5], label='ks_0'),
+        self.stem = ConvBNReLU(
-                stride=2, activation_layer=h_swish
+            3, self.widths[0],
+            nn.ValueChoice([3, 5], label=f'stem_ks'),
+            stride=stride[0], activation_layer=_act_fn(activation[0])
+        )
+        blocks: List[nn.Module] = [
+            # Stage 0
+            # FIXME: this should be an optional layer.
+            # https://github.com/google-research/google-research/blob/20736344/tunas/mobile_search_space_v3.py#L791
+            DepthwiseSeparableConv(
+                self.widths[0], self.widths[1],
+                nn.ValueChoice([3, 5, 7], label=f's0_i0_ks'),
+                stride=stride[1],
+                squeeze_excite=cast(Callable[[nn.MaybeChoice[int], nn.MaybeChoice[int]], nn.Module], partial(
+                    _se_or_skip, optional=squeeze_excite[0] == 'optional', se_from_exp=self.se_from_exp, label=f's0_i0_se'
+                )) if squeeze_excite[0] != 'none' else None,
+                activation_layer=_act_fn(activation[1])
            ),
-            SeparableConv(self.widths[0], self.widths[0], activation_layer=nn.ReLU),
        ]
-        # counting for kernel sizes and expand ratios
-        self.layer_count = 2
        blocks += [
-            # Body
+            # Stage 1-5 (by default)
-            self._make_stage(1, self.widths[0], self.widths[1], False, 2, nn.ReLU),
+            self._make_stage(i, self.widths[i], self.widths[i + 1], squeeze_excite[i], stride[i + 1], _act_fn(activation[i + 1]))
-            self._make_stage(2, self.widths[1], self.widths[2], True, 2, nn.ReLU),
+            for i in range(1, self.num_blocks - 2)
-            self._make_stage(1, self.widths[2], self.widths[3], False, 2, h_swish),
-            self._make_stage(1, self.widths[3], self.widths[4], True, 1, h_swish),
-            self._make_stage(1, self.widths[4], self.widths[5], True, 2, h_swish),
        ]
        # Head
        blocks += [
-            ConvBNReLU(self.widths[5], self.widths[6], 1, 1, activation_layer=h_swish),
+            ConvBNReLU(
+                self.widths[self.num_blocks - 2],
+                self.widths[self.num_blocks - 1],
+                kernel_size=1,
+                stride=stride[self.num_blocks - 1],
+                activation_layer=_act_fn(activation[self.num_blocks - 1])
+            ),
            nn.AdaptiveAvgPool2d(1),
-            ConvBNReLU(self.widths[6], self.widths[7], 1, 1, norm_layer=nn.Identity, activation_layer=h_swish),
+            # In some implementation, this is a linear instead.
+            # Should be equivalent.
+            ConvBNReLU(
+                self.widths[self.num_blocks - 1],
+                self.widths[self.num_blocks],
+                kernel_size=1,
+                stride=stride[self.num_blocks],
+                norm_layer=nn.Identity,
+                activation_layer=_act_fn(activation[self.num_blocks])
+            )
        ]
        self.blocks = nn.Sequential(*blocks)
        self.classifier = nn.Sequential(
            nn.Dropout(dropout_rate),
-            nn.Linear(cast(int, self.widths[7]), num_labels),
+            nn.Linear(cast(int, self.widths[self.num_blocks]), num_labels),
        )
        reset_parameters(self, bn_momentum=bn_momentum, bn_eps=bn_eps)
    def forward(self, x):
+        x = self.stem(x)
        x = self.blocks(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x
    def _make_stage(self, stage_idx, inp, oup, se, stride, act):
-        # initialize them first because they are related to layer_count.
+        def layer_builder(idx):
-        exp, ks, se_blocks = [], [], []
+            exp = nn.ValueChoice(list(self.expand_ratios), label=f's{stage_idx}_i{idx}_exp')
-        for _ in range(4):
+            ks = nn.ValueChoice([3, 5, 7], label=f's{stage_idx}_i{idx}_ks')
-            exp.append(nn.ValueChoice(list(self.expand_ratios), label=f'exp_{self.layer_count}'))
+            # if SE is true, assign a layer choice to SE
-            ks.append(nn.ValueChoice([3, 5, 7], label=f'ks_{self.layer_count}'))
+            se_or_skip = cast(Callable[[nn.MaybeChoice[int], nn.MaybeChoice[int]], nn.Module], partial(
-            if se:
+                _se_or_skip, optional=se == 'optional', se_from_exp=self.se_from_exp, label=f's{stage_idx}_i{idx}_se'
-                # if SE is true, assign a layer choice to SE
+            )) if se != 'none' else None
-                se_blocks.append(
+            return InvertedResidual(
-                    lambda hidden_ch: nn.LayerChoice([nn.Identity(), SELayer(hidden_ch)], label=f'se_{self.layer_count}')
+                inp if idx == 0 else oup,
-                )
+                oup, exp, ks,
-            else:
+                stride=stride if idx == 0 else 1,  # only the first layer in each stage can have stride > 1
-                se_blocks.append(None)
+                squeeze_excite=se_or_skip,
-            self.layer_count += 1
+                activation_layer=act,
+            )
-        blocks = [
-            # stride = 2
-            InvertedResidual(inp, oup, exp[0], ks[0],
-                             stride, squeeze_and_excite=se_blocks[0], activation_layer=act),
-            # stride = 1, residual connection should be automatically enabled
-            InvertedResidual(oup, oup, exp[1], ks[1], squeeze_and_excite=se_blocks[1], activation_layer=act),
-            InvertedResidual(oup, oup, exp[2], ks[2], squeeze_and_excite=se_blocks[2], activation_layer=act),
-            InvertedResidual(oup, oup, exp[3], ks[3], squeeze_and_excite=se_blocks[3], activation_layer=act)
-        ]
        # mutable depth
-        return nn.Repeat(blocks, depth=(1, 4), label=f'depth_{stage_idx}')
+        min_depth, max_depth = self.depth_range[stage_idx - 1]
+        if stride != 1:
+            min_depth = max(min_depth, 1)
+        return nn.Repeat(layer_builder, depth=(min_depth, max_depth), label=f's{stage_idx}_depth')
+    @classmethod
+    def fixed_arch(cls, arch: dict) -> FixedFactory:
+        return FixedFactory(cls, arch)
+    @classmethod
+    def load_searched_model(
+        cls, name: str,
+        pretrained: bool = False, download: bool = False, progress: bool = True
+    ) -> nn.Module:
+        init_kwargs = {}  # all default
+        if name == 'mobilenetv3-large-100':
+            # NOTE: Use bicsubic interpolation to evaluate this
+            # With default interpolation, it yields top-1 75.722
+            arch = {
+                'stem_ks': 3,
+                's0_i0_ks': 3,
+                's1_depth': 2,
+                's1_i0_exp': 4,
+                's1_i0_ks': 3,
+                's1_i1_exp': 3,
+                's1_i1_ks': 3,
+                's2_depth': 3,
+                's2_i0_exp': 3,
+                's2_i0_ks': 5,
+                's2_i1_exp': 3,
+                's2_i1_ks': 5,
+                's2_i2_exp': 3,
+                's2_i2_ks': 5,
+                's3_depth': 4,
+                's3_i0_exp': 6,
+                's3_i0_ks': 3,
+                's3_i1_exp': 2.5,
+                's3_i1_ks': 3,
+                's3_i2_exp': 2.3,
+                's3_i2_ks': 3,
+                's3_i3_exp': 2.3,
+                's3_i3_ks': 3,
+                's4_depth': 2,
+                's4_i0_exp': 6,
+                's4_i0_ks': 3,
+                's4_i1_exp': 6,
+                's4_i1_ks': 3,
+                's5_depth': 3,
+                's5_i0_exp': 6,
+                's5_i0_ks': 5,
+                's5_i1_exp': 6,
+                's5_i1_ks': 5,
+                's5_i2_exp': 6,
+                's5_i2_ks': 5,
+            }
+            init_kwargs.update(
+                base_widths=[16, 16, 24, 40, 80, 112, 160, 960, 1280],
+                expand_ratios=[1.0, 2.0, 2.3, 2.5, 3.0, 4.0, 6.0],
+                bn_eps=1e-5,
+                bn_momentum=0.1,
+                width_multipliers=1.0,
+                squeeze_excite=['none', 'none', 'force', 'none', 'force', 'force']
+            )
+        elif name.startswith('mobilenetv3-small-'):
+            # Evaluate with bicubic interpolation
+            multiplier = int(name.split('-')[-1]) / 100
+            widths = [16, 16, 24, 40, 48, 96, 576, 1024]
+            for i in range(7):
+                if i > 0 or multiplier >= 0.75:
+                    # fix_stem = True when multiplier < 0.75
+                    # https://github.com/rwightman/pytorch-image-models/blob/b7cb8d03/timm/models/mobilenetv3.py#L421
+                    widths[i] = make_divisible(widths[i] * multiplier, 8)
+            init_kwargs.update(
+                base_widths=widths,
+                width_multipliers=1.0,
+                expand_ratios=[3.0, 3.67, 4.0, 4.5, 6.0],
+                bn_eps=1e-05,
+                bn_momentum=0.1,
+                squeeze_excite=['force', 'none', 'force', 'force', 'force'],
+                activation=['hswish', 'relu', 'relu', 'hswish', 'hswish', 'hswish', 'hswish', 'hswish'],
+                stride=[2, 2, 2, 2, 1, 2, 1, 1],
+                depth_range=(1, 2),
+            )
+            arch = {
+                'stem_ks': 3,
+                's0_i0_ks': 3,
+                's1_depth': 2,
+                's1_i0_exp': 4.5,
+                's1_i0_ks': 3,
+                's1_i1_exp': 3.67,
+                's1_i1_ks': 3,
+                's2_depth': 3,
+                's2_i0_exp': 4.0,
+                's2_i0_ks': 5,
+                's2_i1_exp': 6.0,
+                's2_i1_ks': 5,
+                's2_i2_exp': 6.0,
+                's2_i2_ks': 5,
+                's3_depth': 2,
+                's3_i0_exp': 3.0,
+                's3_i0_ks': 5,
+                's3_i1_exp': 3.0,
+                's3_i1_ks': 5,
+                's4_depth': 3,
+                's4_i0_exp': 6.0,
+                's4_i0_ks': 5,
+                's4_i1_exp': 6.0,
+                's4_i1_ks': 5,
+                's4_i2_exp': 6.0,
+                's4_i2_ks': 5
+            }
+        elif name.startswith('cream'):
+            # https://github.com/microsoft/Cream/tree/main/Cream
+            # bilinear interpolation
+            level = name.split('-')[-1]
+            # region cream arch specification
+            if level == '014':
+                arch = {
+                    'stem_ks': 3,
+                    's0_depth': 1,
+                    's0_i0_ks': 3,
+                    's1_depth': 1,
+                    's1_i0_exp': 4.0,
+                    's1_i0_ks': 3,
+                    's2_depth': 2,
+                    's2_i0_exp': 6.0,
+                    's2_i0_ks': 5,
+                    's2_i1_exp': 6.0,
+                    's2_i1_ks': 5,
+                    's3_depth': 2,
+                    's3_i0_exp': 6.0,
+                    's3_i0_ks': 5,
+                    's3_i1_exp': 6.0,
+                    's3_i1_ks': 5,
+                    's4_depth': 1,
+                    's4_i0_exp': 6.0,
+                    's4_i0_ks': 3,
+                    's5_depth': 1,
+                    's5_i0_exp': 6.0,
+                    's5_i0_ks': 5
+                }
+            elif level == '043':
+                arch = {
+                    'stem_ks': 3,
+                    's0_depth': 1,
+                    's0_i0_ks': 3,
+                    's1_depth': 1,
+                    's1_i0_exp': 4.0,
+                    's1_i0_ks': 3,
+                    's2_depth': 2,
+                    's2_i0_exp': 6.0,
+                    's2_i0_ks': 5,
+                    's2_i1_exp': 6.0,
+                    's2_i1_ks': 3,
+                    's3_depth': 2,
+                    's3_i0_exp': 6.0,
+                    's3_i0_ks': 5,
+                    's3_i1_exp': 6.0,
+                    's3_i1_ks': 3,
+                    's4_depth': 3,
+                    's4_i0_exp': 6.0,
+                    's4_i0_ks': 5,
+                    's4_i1_exp': 6.0,
+                    's4_i1_ks': 5,
+                    's4_i2_exp': 6.0,
+                    's4_i2_ks': 5,
+                    's5_depth': 2,
+                    's5_i0_exp': 6.0,
+                    's5_i0_ks': 5,
+                    's5_i1_exp': 6.0,
+                    's5_i1_ks': 5
+                }
+            elif level == '114':
+                arch = {
+                    'stem_ks': 3,
+                    's0_depth': 1,
+                    's0_i0_ks': 3,
+                    's1_depth': 1,
+                    's1_i0_exp': 4.0,
+                    's1_i0_ks': 3,
+                    's2_depth': 2,
+                    's2_i0_exp': 6.0,
+                    's2_i0_ks': 5,
+                    's2_i1_exp': 6.0,
+                    's2_i1_ks': 5,
+                    's3_depth': 2,
+                    's3_i0_exp': 6.0,
+                    's3_i0_ks': 5,
+                    's3_i1_exp': 6.0,
+                    's3_i1_ks': 5,
+                    's4_depth': 3,
+                    's4_i0_exp': 6.0,
+                    's4_i0_ks': 5,
+                    's4_i1_exp': 6.0,
+                    's4_i1_ks': 5,
+                    's4_i2_exp': 6.0,
+                    's4_i2_ks': 5,
+                    's5_depth': 2,
+                    's5_i0_exp': 6.0,
+                    's5_i0_ks': 5,
+                    's5_i1_exp': 6.0,
+                    's5_i1_ks': 5
+                }
+            elif level == '287':
+                arch = {
+                    'stem_ks': 3,
+                    's0_depth': 1,
+                    's0_i0_ks': 3,
+                    's1_depth': 1,
+                    's1_i0_exp': 4.0,
+                    's1_i0_ks': 3,
+                    's2_depth': 2,
+                    's2_i0_exp': 6.0,
+                    's2_i0_ks': 5,
+                    's2_i1_exp': 6.0,
+                    's2_i1_ks': 5,
+                    's3_depth': 3,
+                    's3_i0_exp': 6.0,
+                    's3_i0_ks': 5,
+                    's3_i1_exp': 6.0,
+                    's3_i1_ks': 3,
+                    's3_i2_exp': 6.0,
+                    's3_i2_ks': 5,
+                    's4_depth': 4,
+                    's4_i0_exp': 6.0,
+                    's4_i0_ks': 5,
+                    's4_i1_exp': 6.0,
+                    's4_i1_ks': 5,
+                    's4_i2_exp': 6.0,
+                    's4_i2_ks': 5,
+                    's4_i3_exp': 6.0,
+                    's4_i3_ks': 5,
+                    's5_depth': 3,
+                    's5_i0_exp': 6.0,
+                    's5_i0_ks': 5,
+                    's5_i1_exp': 6.0,
+                    's5_i1_ks': 5,
+                    's5_i2_exp': 6.0,
+                    's5_i2_ks': 5
+                }
+            elif level == '481':
+                arch = {
+                    'stem_ks': 3,
+                    's0_depth': 1,
+                    's0_i0_ks': 3,
+                    's1_depth': 4,
+                    's1_i0_exp': 6.0,
+                    's1_i0_ks': 5,
+                    's1_i1_exp': 4.0,
+                    's1_i1_ks': 7,
+                    's1_i2_exp': 6.0,
+                    's1_i2_ks': 5,
+                    's1_i3_exp': 6.0,
+                    's1_i3_ks': 3,
+                    's2_depth': 4,
+                    's2_i0_exp': 6.0,
+                    's2_i0_ks': 5,
+                    's2_i1_exp': 4.0,
+                    's2_i1_ks': 5,
+                    's2_i2_exp': 6.0,
+                    's2_i2_ks': 5,
+                    's2_i3_exp': 4.0,
+                    's2_i3_ks': 3,
+                    's3_depth': 5,
+                    's3_i0_exp': 6.0,
+                    's3_i0_ks': 5,
+                    's3_i1_exp': 6.0,
+                    's3_i1_ks': 5,
+                    's3_i2_exp': 6.0,
+                    's3_i2_ks': 5,
+                    's3_i3_exp': 6.0,
+                    's3_i3_ks': 3,
+                    's3_i4_exp': 6.0,
+                    's3_i4_ks': 3,
+                    's4_depth': 4,
+                    's4_i0_exp': 6.0,
+                    's4_i0_ks': 5,
+                    's4_i1_exp': 6.0,
+                    's4_i1_ks': 5,
+                    's4_i2_exp': 6.0,
+                    's4_i2_ks': 5,
+                    's4_i3_exp': 6.0,
+                    's4_i3_ks': 5,
+                    's5_depth': 4,
+                    's5_i0_exp': 6.0,
+                    's5_i0_ks': 5,
+                    's5_i1_exp': 6.0,
+                    's5_i1_ks': 5,
+                    's5_i2_exp': 6.0,
+                    's5_i2_ks': 5,
+                    's5_i3_exp': 6.0,
+                    's5_i3_ks': 5
+                }
+            elif level == '604':
+                arch = {
+                    'stem_ks': 3,
+                    's0_depth': 1,
+                    's0_i0_ks': 3,
+                    's1_depth': 5,
+                    's1_i0_exp': 6.0,
+                    's1_i0_ks': 5,
+                    's1_i1_exp': 6.0,
+                    's1_i1_ks': 5,
+                    's1_i2_exp': 4.0,
+                    's1_i2_ks': 5,
+                    's1_i3_exp': 6.0,
+                    's1_i3_ks': 5,
+                    's1_i4_exp': 6.0,
+                    's1_i4_ks': 5,
+                    's2_depth': 5,
+                    's2_i0_exp': 6.0,
+                    's2_i0_ks': 5,
+                    's2_i1_exp': 4.0,
+                    's2_i1_ks': 5,
+                    's2_i2_exp': 6.0,
+                    's2_i2_ks': 5,
+                    's2_i3_exp': 4.0,
+                    's2_i3_ks': 5,
+                    's2_i4_exp': 6.0,
+                    's2_i4_ks': 5,
+                    's3_depth': 5,
+                    's3_i0_exp': 6.0,
+                    's3_i0_ks': 5,
+                    's3_i1_exp': 4.0,
+                    's3_i1_ks': 5,
+                    's3_i2_exp': 6.0,
+                    's3_i2_ks': 5,
+                    's3_i3_exp': 4.0,
+                    's3_i3_ks': 5,
+                    's3_i4_exp': 6.0,
+                    's3_i4_ks': 5,
+                    's4_depth': 6,
+                    's4_i0_exp': 6.0,
+                    's4_i0_ks': 5,
+                    's4_i1_exp': 6.0,
+                    's4_i1_ks': 5,
+                    's4_i2_exp': 4.0,
+                    's4_i2_ks': 5,
+                    's4_i3_exp': 4.0,
+                    's4_i3_ks': 5,
+                    's4_i4_exp': 6.0,
+                    's4_i4_ks': 5,
+                    's4_i5_exp': 6.0,
+                    's4_i5_ks': 5,
+                    's5_depth': 6,
+                    's5_i0_exp': 6.0,
+                    's5_i0_ks': 5,
+                    's5_i1_exp': 6.0,
+                    's5_i1_ks': 5,
+                    's5_i2_exp': 4.0,
+                    's5_i2_ks': 5,
+                    's5_i3_exp': 6.0,
+                    's5_i3_ks': 5,
+                    's5_i4_exp': 6.0,
+                    's5_i4_ks': 5,
+                    's5_i5_exp': 6.0,
+                    's5_i5_ks': 5
+                }
+            else:
+                raise ValueError(f'Unsupported cream model level: {level}')
+            # endregion
+            init_kwargs.update(
+                base_widths=[16, 16, 24, 40, 80, 96, 192, 320, 1280],
+                width_multipliers=1.0,
+                expand_ratios=[4.0, 6.0],
+                bn_eps=1e-5,
+                bn_momentum=0.1,
+                squeeze_excite=['force'] * 6,
+                activation=['swish'] * 9
+            )
+        else:
+            raise ValueError(f'Unsupported architecture with name: {name}')
+        model_factory = cls.fixed_arch(arch)
+        model = model_factory(**init_kwargs)
+        if pretrained:
+            weight_file = load_pretrained_weight(name, download=download, progress=progress)
+            pretrained_weights = torch.load(weight_file)
+            model.load_state_dict(pretrained_weights)
+        return model
--- a/nni/retiarii/hub/pytorch/nasnet.py
+++ b/nni/retiarii/hub/pytorch/nasnet.py
@@ -8,6 +8,7 @@ It's called ``nasnet.py`` simply because NASNet is the first to propose such str
 """
 from collections import OrderedDict
+from functools import partial
 from typing import Tuple, List, Union, Iterable, Dict, Callable, Optional, cast
 try:
@@ -20,6 +21,9 @@ import torch
 import nni.retiarii.nn.pytorch as nn
 from nni.retiarii import model_wrapper
+from .utils.fixed import FixedFactory
+from .utils.pretrained import load_pretrained_weight
 # the following are NAS operations from
 # https://github.com/facebookresearch/unnas/blob/main/pycls/models/nas/operations.py
@@ -300,15 +304,26 @@ class CellBuilder:
        self.last_cell_reduce = last_cell_reduce
        self._expect_idx = 0
-    def __call__(self, repeat_idx: int):
-        if self._expect_idx != repeat_idx:
-            raise ValueError(f'Expect index {self._expect_idx}, found {repeat_idx}')
        # It takes an index that is the index in the repeat.
        # Number of predecessors for each cell is fixed to 2.
-        num_predecessors = 2
+        self.num_predecessors = 2
        # Number of ops per node is fixed to 2.
-        num_ops_per_node = 2
+        self.num_ops_per_node = 2
+    def op_factory(self, node_index: int, op_index: int, input_index: Optional[int], *,
+                   op: str, channels: int, is_reduction_cell: bool):
+        if is_reduction_cell and (
+            input_index is None or input_index < self.num_predecessors
+        ):  # could be none when constructing search sapce
+            stride = 2
+        else:
+            stride = 1
+        return OPS[op](channels, stride, True)
+    def __call__(self, repeat_idx: int):
+        if self._expect_idx != repeat_idx:
+            raise ValueError(f'Expect index {self._expect_idx}, found {repeat_idx}')
        # Reduction cell means stride = 2 and channel multiplied by 2.
        is_reduction_cell = repeat_idx == 0 and self.first_cell_reduce
@@ -316,16 +331,11 @@ class CellBuilder:
        # self.C_prev_in, self.C_in, self.last_cell_reduce are updated after each cell is built.
        preprocessor = CellPreprocessor(self.C_prev_in, self.C_in, self.C, self.last_cell_reduce)
-        ops_factory: Dict[str, Callable[[int, int, Optional[int]], nn.Module]] = {
+        ops_factory: Dict[str, Callable[[int, int, Optional[int]], nn.Module]] = {}
-            op:  # make final chosen ops named with their aliases
+        for op in self.op_candidates:
-            lambda node_index, op_index, input_index:
+            ops_factory[op] = partial(self.op_factory, op=op, channels=cast(int, self.C), is_reduction_cell=is_reduction_cell)
-            OPS[op](self.C, 2 if is_reduction_cell and (
-                    input_index is None or input_index < num_predecessors  # could be none when constructing search sapce
-                    ) else 1, True)
-            for op in self.op_candidates
-        }
-        cell = nn.Cell(ops_factory, self.num_nodes, num_ops_per_node, num_predecessors, self.merge_op,
+        cell = nn.Cell(ops_factory, self.num_nodes, self.num_ops_per_node, self.num_predecessors, self.merge_op,
                       preprocessor=preprocessor, postprocessor=CellPostprocessor(),
                       label='reduce' if is_reduction_cell else 'normal')
@@ -401,7 +411,7 @@ class NDS(nn.Module):
        self.num_cells: nn.MaybeChoice[int] = cast(int, num_cells)
        if isinstance(num_cells, Iterable):
            self.num_cells = nn.ValueChoice(list(num_cells), label='depth')
-        num_cells_per_stage = [i * self.num_cells // 3 - (i - 1) * self.num_cells // 3 for i in range(3)]
+        num_cells_per_stage = [(i + 1) * self.num_cells // 3 - i * self.num_cells // 3 for i in range(3)]
        # auxiliary head is different for network targetted at different datasets
        if dataset == 'imagenet':
@@ -501,6 +511,10 @@ class NDS(nn.Module):
            if isinstance(module, DropPath_):
                module.drop_prob = drop_prob
+    @classmethod
+    def fixed_arch(cls, arch: dict) -> FixedFactory:
+        return FixedFactory(cls, arch)
 @model_wrapper
 class NASNet(NDS):
@@ -676,3 +690,64 @@ class DARTS(NDS):
                         num_cells=num_cells,
                         dataset=dataset,
                         auxiliary_loss=auxiliary_loss)
+    @classmethod
+    def load_searched_model(
+        cls, name: str,
+        pretrained: bool = False, download: bool = False, progress: bool = True
+    ) -> nn.Module:
+        init_kwargs = {}  # all default
+        if name == 'darts-v2':
+            init_kwargs.update(
+                num_cells=20,
+                width=36,
+            )
+            arch = {
+                'normal/op_2_0': 'sep_conv_3x3',
+                'normal/op_2_1': 'sep_conv_3x3',
+                'normal/input_2_0': 0,
+                'normal/input_2_1': 1,
+                'normal/op_3_0': 'sep_conv_3x3',
+                'normal/op_3_1': 'sep_conv_3x3',
+                'normal/input_3_0': 0,
+                'normal/input_3_1': 1,
+                'normal/op_4_0': 'sep_conv_3x3',
+                'normal/op_4_1': 'skip_connect',
+                'normal/input_4_0': 1,
+                'normal/input_4_1': 0,
+                'normal/op_5_0': 'skip_connect',
+                'normal/op_5_1': 'dil_conv_3x3',
+                'normal/input_5_0': 0,
+                'normal/input_5_1': 2,
+                'reduce/op_2_0': 'max_pool_3x3',
+                'reduce/op_2_1': 'max_pool_3x3',
+                'reduce/input_2_0': 0,
+                'reduce/input_2_1': 1,
+                'reduce/op_3_0': 'skip_connect',
+                'reduce/op_3_1': 'max_pool_3x3',
+                'reduce/input_3_0': 2,
+                'reduce/input_3_1': 1,
+                'reduce/op_4_0': 'max_pool_3x3',
+                'reduce/op_4_1': 'skip_connect',
+                'reduce/input_4_0': 0,
+                'reduce/input_4_1': 2,
+                'reduce/op_5_0': 'skip_connect',
+                'reduce/op_5_1': 'max_pool_3x3',
+                'reduce/input_5_0': 2,
+                'reduce/input_5_1': 1
+            }
+        else:
+            raise ValueError(f'Unsupported architecture with name: {name}')
+        model_factory = cls.fixed_arch(arch)
+        model = model_factory(**init_kwargs)
+        if pretrained:
+            weight_file = load_pretrained_weight(name, download=download, progress=progress)
+            pretrained_weights = torch.load(weight_file)
+            model.load_state_dict(pretrained_weights)
+        return model
--- a/nni/retiarii/hub/pytorch/proxylessnas.py
+++ b/nni/retiarii/hub/pytorch/proxylessnas.py
@@ -2,12 +2,15 @@
 # Licensed under the MIT license.
 import math
-from typing import Optional, Callable, List, Tuple, cast
+from typing import Optional, Callable, List, Tuple, Iterator, cast
 import torch
 import nni.retiarii.nn.pytorch as nn
 from nni.retiarii import model_wrapper
+from .utils.fixed import FixedFactory
+from .utils.pretrained import load_pretrained_weight
 def make_divisible(v, divisor, min_val=None):
    """
@@ -24,6 +27,22 @@ def make_divisible(v, divisor, min_val=None):
    return nn.ValueChoice.condition(new_v < 0.9 * v, new_v + divisor, new_v)
+def simplify_sequential(sequentials: List[nn.Module]) -> Iterator[nn.Module]:
+    """
+    Flatten the sequential blocks so that the hierarchy looks better.
+    Eliminate identity modules automatically.
+    """
+    for module in sequentials:
+        if isinstance(module, nn.Sequential):
+            for submodule in module.children():
+                # no recursive expansion
+                if not isinstance(submodule, nn.Identity):
+                    yield submodule
+        else:
+            if not isinstance(module, nn.Identity):
+                yield module
 class ConvBNReLU(nn.Sequential):
    """
    The template for a conv-bn-relu block.
@@ -45,7 +64,11 @@ class ConvBNReLU(nn.Sequential):
            norm_layer = nn.BatchNorm2d
        if activation_layer is None:
            activation_layer = nn.ReLU6
-        super().__init__(
+        # If no normalization is used, set bias to True
+        # https://github.com/google-research/google-research/blob/20736344/tunas/rematlib/mobile_model_v3.py#L194
+        norm = norm_layer(cast(int, out_channels))
+        no_normalization = isinstance(norm, nn.Identity)
+        blocks: List[nn.Module] = [
            nn.Conv2d(
                cast(int, in_channels),
                cast(int, out_channels),
@@ -54,18 +77,30 @@ class ConvBNReLU(nn.Sequential):
                cast(int, padding),
                dilation=dilation,
                groups=cast(int, groups),
-                bias=False
+                bias=no_normalization
            ),
-            norm_layer(cast(int, out_channels)),
+            # Normalization, regardless of batchnorm or identity
+            norm,
+            # One pytorch implementation as an SE here, to faithfully reproduce paper
+            # We follow a more accepted approach to put SE outside
+            # Reference: https://github.com/d-li14/mobilenetv3.pytorch/issues/18
            activation_layer(inplace=True)
-        )
+        ]
+        super().__init__(*simplify_sequential(blocks))
        self.out_channels = out_channels
-class SeparableConv(nn.Sequential):
+class DepthwiseSeparableConv(nn.Sequential):
    """
    In the original MobileNetV2 implementation, this is InvertedResidual when expand ratio = 1.
    Residual connection is added if input and output shape are the same.
+    References:
+    - https://github.com/rwightman/pytorch-image-models/blob/b7cb8d03/timm/models/efficientnet_blocks.py#L90
+    - https://github.com/google-research/google-research/blob/20736344/tunas/rematlib/mobile_model_v3.py#L433
+    - https://github.com/ultmaster/AceNAS/blob/46c8895f/searchspace/proxylessnas/utils.py#L100
    """
    def __init__(
@@ -74,20 +109,24 @@ class SeparableConv(nn.Sequential):
        out_channels: nn.MaybeChoice[int],
        kernel_size: nn.MaybeChoice[int] = 3,
        stride: int = 1,
+        squeeze_excite: Optional[Callable[[nn.MaybeChoice[int], nn.MaybeChoice[int]], nn.Module]] = None,
        norm_layer: Optional[Callable[[int], nn.Module]] = None,
        activation_layer: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
-        super().__init__(
+        blocks = [
            # dw
            ConvBNReLU(in_channels, in_channels, stride=stride, kernel_size=kernel_size, groups=in_channels,
                       norm_layer=norm_layer, activation_layer=activation_layer),
+            # optional se
+            squeeze_excite(in_channels, in_channels) if squeeze_excite else nn.Identity(),
            # pw-linear
            ConvBNReLU(in_channels, out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity)
-        )
+        ]
-        self.residual_connection = stride == 1 and in_channels == out_channels
+        super().__init__(*simplify_sequential(blocks))
+        self.has_skip = stride == 1 and in_channels == out_channels
    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.residual_connection:
+        if self.has_skip:
            return x + super().forward(x)
        else:
            return super().forward(x)
@@ -97,14 +136,17 @@ class InvertedResidual(nn.Sequential):
    """
    An Inverted Residual Block, sometimes called an MBConv Block, is a type of residual block used for image models
    that uses an inverted structure for efficiency reasons.
    It was originally proposed for the `MobileNetV2 <https://arxiv.org/abs/1801.04381>`__ CNN architecture.
    It has since been reused for several mobile-optimized CNNs.
    It follows a narrow -> wide -> narrow approach, hence the inversion.
    It first widens with a 1x1 convolution, then uses a 3x3 depthwise convolution (which greatly reduces the number of parameters),
    then a 1x1 convolution is used to reduce the number of channels so input and output can be added.
-    Follow implementation of:
+    This implementation is sort of a mixture between:
-    https://github.com/google-research/google-research/blob/20736344591f774f4b1570af64624ed1e18d2867/tunas/rematlib/mobile_model_v3.py#L453
+    - https://github.com/google-research/google-research/blob/20736344/tunas/rematlib/mobile_model_v3.py#L453
+    - https://github.com/rwightman/pytorch-image-models/blob/b7cb8d03/timm/models/efficientnet_blocks.py#L134
    """
    def __init__(
@@ -114,7 +156,7 @@ class InvertedResidual(nn.Sequential):
        expand_ratio: nn.MaybeChoice[float],
        kernel_size: nn.MaybeChoice[int] = 3,
        stride: int = 1,
-        squeeze_and_excite: Optional[Callable[[nn.MaybeChoice[int]], nn.Module]] = None,
+        squeeze_excite: Optional[Callable[[nn.MaybeChoice[int], nn.MaybeChoice[int]], nn.Module]] = None,
        norm_layer: Optional[Callable[[int], nn.Module]] = None,
        activation_layer: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
@@ -123,11 +165,10 @@ class InvertedResidual(nn.Sequential):
        self.out_channels = out_channels
        assert stride in [1, 2]
-        hidden_ch = nn.ValueChoice.to_int(round(cast(int, in_channels * expand_ratio)))
+        hidden_ch = cast(int, make_divisible(in_channels * expand_ratio, 8))
-        # FIXME: check whether this equal works
+        # NOTE: this equivalence check should also work for ValueChoice
-        # Residual connection is added here stride = 1 and input channels and output channels are the same.
+        self.has_skip = stride == 1 and in_channels == out_channels
-        self.residual_connection = stride == 1 and in_channels == out_channels
        layers: List[nn.Module] = [
            # point-wise convolution
@@ -138,21 +179,20 @@ class InvertedResidual(nn.Sequential):
                       norm_layer=norm_layer, activation_layer=activation_layer),
            # depth-wise
            ConvBNReLU(hidden_ch, hidden_ch, stride=stride, kernel_size=kernel_size, groups=hidden_ch,
-                       norm_layer=norm_layer, activation_layer=activation_layer)
+                       norm_layer=norm_layer, activation_layer=activation_layer),
-        ]
+            # SE
+            squeeze_excite(
-        if squeeze_and_excite:
+                cast(int, hidden_ch),
-            layers.append(squeeze_and_excite(hidden_ch))
+                cast(int, in_channels)
+            ) if squeeze_excite is not None else nn.Identity(),
-        layers += [
            # pw-linear
-            ConvBNReLU(hidden_ch, out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity)
+            ConvBNReLU(hidden_ch, out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity),
        ]
-        super().__init__(*layers)
+        super().__init__(*simplify_sequential(layers))
    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.residual_connection:
+        if self.has_skip:
            return x + super().forward(x)
        else:
            return super().forward(x)
@@ -199,7 +239,9 @@ class ProxylessNAS(nn.Module):
    Following the official implementation, the inverted residual with kernel size / expand ratio variations in each layer
    is implemented with a :class:`nn.LayerChoice` with all-combination candidates. That means,
    when used in weight sharing, these candidates will be treated as separate layers, and won't be fine-grained shared.
-    We note that ``MobileNetV3Space`` is different in this perspective.
+    We note that :class:`MobileNetV3Space` is different in this perspective.
+    This space can be implemented as part of :class:`MobileNetV3Space`, but we separate those following conventions.
    """
    def __init__(self, num_labels: int = 1000,
@@ -221,11 +263,11 @@ class ProxylessNAS(nn.Module):
        self.bn_eps = bn_eps
        self.bn_momentum = bn_momentum
-        self.first_conv = ConvBNReLU(3, widths[0], stride=2, norm_layer=nn.BatchNorm2d)
+        self.stem = ConvBNReLU(3, widths[0], stride=2, norm_layer=nn.BatchNorm2d)
        blocks: List[nn.Module] = [
            # first stage is fixed
-            SeparableConv(widths[0], widths[1], kernel_size=3, stride=1)
+            DepthwiseSeparableConv(widths[0], widths[1], kernel_size=3, stride=1)
        ]
        # https://github.com/ultmaster/AceNAS/blob/46c8895fd8a05ffbc61a6b44f1e813f64b4f66b7/searchspace/proxylessnas/__init__.py#L21
@@ -234,7 +276,7 @@ class ProxylessNAS(nn.Module):
            # we return a builder that dynamically creates module for different `repeat_idx`.
            builder = inverted_residual_choice_builder(
                [3, 6], [3, 5, 7], downsamples[stage], widths[stage - 1], widths[stage], f's{stage}')
-            if stage < 6:
+            if stage < 7:
                blocks.append(nn.Repeat(builder, (1, 4), label=f's{stage}_depth'))
            else:
                # No mutation for depth in the last stage.
@@ -252,7 +294,7 @@ class ProxylessNAS(nn.Module):
        reset_parameters(self, bn_momentum=bn_momentum, bn_eps=bn_eps)
    def forward(self, x):
-        x = self.first_conv(x)
+        x = self.stem(x)
        x = self.blocks(x)
        x = self.feature_mix_layer(x)
        x = self.global_avg_pooling(x)
@@ -268,6 +310,193 @@ class ProxylessNAS(nn.Module):
            return {'classifier.weight', 'classifier.bias'}
        return set()
+    @classmethod
+    def fixed_arch(cls, arch: dict) -> FixedFactory:
+        return FixedFactory(cls, arch)
+    @classmethod
+    def load_searched_model(
+        cls, name: str,
+        pretrained: bool = False, download: bool = False, progress: bool = True
+    ) -> nn.Module:
+        init_kwargs = {}  # all default
+        if name == 'acenas-m1':
+            arch = {
+                's2_depth': 2,
+                's2_i0': 'k3e6',
+                's2_i1': 'k3e3',
+                's3_depth': 3,
+                's3_i0': 'k5e3',
+                's3_i1': 'k3e3',
+                's3_i2': 'k5e3',
+                's4_depth': 2,
+                's4_i0': 'k3e6',
+                's4_i1': 'k5e3',
+                's5_depth': 4,
+                's5_i0': 'k7e6',
+                's5_i1': 'k3e6',
+                's5_i2': 'k3e6',
+                's5_i3': 'k7e3',
+                's6_depth': 4,
+                's6_i0': 'k7e6',
+                's6_i1': 'k7e6',
+                's6_i2': 'k7e3',
+                's6_i3': 'k7e3',
+                's7_depth': 1,
+                's7_i0': 'k7e6'
+            }
+        elif name == 'acenas-m2':
+            arch = {
+                's2_depth': 1,
+                's2_i0': 'k5e3',
+                's3_depth': 3,
+                's3_i0': 'k3e6',
+                's3_i1': 'k3e3',
+                's3_i2': 'k5e3',
+                's4_depth': 2,
+                's4_i0': 'k7e6',
+                's4_i1': 'k5e6',
+                's5_depth': 4,
+                's5_i0': 'k5e6',
+                's5_i1': 'k5e3',
+                's5_i2': 'k5e6',
+                's5_i3': 'k3e6',
+                's6_depth': 4,
+                's6_i0': 'k7e6',
+                's6_i1': 'k5e6',
+                's6_i2': 'k5e3',
+                's6_i3': 'k5e6',
+                's7_depth': 1,
+                's7_i0': 'k7e6'
+            }
+        elif name == 'acenas-m3':
+            arch = {
+                's2_depth': 2,
+                's2_i0': 'k3e3',
+                's2_i1': 'k3e6',
+                's3_depth': 2,
+                's3_i0': 'k5e3',
+                's3_i1': 'k3e3',
+                's4_depth': 3,
+                's4_i0': 'k5e6',
+                's4_i1': 'k7e6',
+                's4_i2': 'k3e6',
+                's5_depth': 4,
+                's5_i0': 'k7e6',
+                's5_i1': 'k7e3',
+                's5_i2': 'k7e3',
+                's5_i3': 'k5e3',
+                's6_depth': 4,
+                's6_i0': 'k7e6',
+                's6_i1': 'k7e3',
+                's6_i2': 'k7e6',
+                's6_i3': 'k3e3',
+                's7_depth': 1,
+                's7_i0': 'k5e6'
+            }
+        elif name == 'proxyless-cpu':
+            arch = {
+                's2_depth': 4,
+                's2_i0': 'k3e6',
+                's2_i1': 'k3e3',
+                's2_i2': 'k3e3',
+                's2_i3': 'k3e3',
+                's3_depth': 4,
+                's3_i0': 'k3e6',
+                's3_i1': 'k3e3',
+                's3_i2': 'k3e3',
+                's3_i3': 'k5e3',
+                's4_depth': 2,
+                's4_i0': 'k3e6',
+                's4_i1': 'k3e3',
+                's5_depth': 4,
+                's5_i0': 'k5e6',
+                's5_i1': 'k3e3',
+                's5_i2': 'k3e3',
+                's5_i3': 'k3e3',
+                's6_depth': 4,
+                's6_i0': 'k5e6',
+                's6_i1': 'k5e3',
+                's6_i2': 'k5e3',
+                's6_i3': 'k3e3',
+                's7_depth': 1,
+                's7_i0': 'k5e6'
+            }
+            init_kwargs['base_widths'] = [40, 24, 32, 48, 88, 104, 216, 360, 1432]
+        elif name == 'proxyless-gpu':
+            arch = {
+                's2_depth': 1,
+                's2_i0': 'k5e3',
+                's3_depth': 2,
+                's3_i0': 'k7e3',
+                's3_i1': 'k3e3',
+                's4_depth': 2,
+                's4_i0': 'k7e6',
+                's4_i1': 'k5e3',
+                's5_depth': 3,
+                's5_i0': 'k5e6',
+                's5_i1': 'k3e3',
+                's5_i2': 'k5e3',
+                's6_depth': 4,
+                's6_i0': 'k7e6',
+                's6_i1': 'k7e6',
+                's6_i2': 'k7e6',
+                's6_i3': 'k5e6',
+                's7_depth': 1,
+                's7_i0': 'k7e6'
+            }
+            init_kwargs['base_widths'] = [40, 24, 32, 56, 112, 128, 256, 432, 1728]
+        elif name == 'proxyless-mobile':
+            arch = {
+                's2_depth': 2,
+                's2_i0': 'k5e3',
+                's2_i1': 'k3e3',
+                's3_depth': 4,
+                's3_i0': 'k7e3',
+                's3_i1': 'k3e3',
+                's3_i2': 'k5e3',
+                's3_i3': 'k5e3',
+                's4_depth': 4,
+                's4_i0': 'k7e6',
+                's4_i1': 'k5e3',
+                's4_i2': 'k5e3',
+                's4_i3': 'k5e3',
+                's5_depth': 4,
+                's5_i0': 'k5e6',
+                's5_i1': 'k5e3',
+                's5_i2': 'k5e3',
+                's5_i3': 'k5e3',
+                's6_depth': 4,
+                's6_i0': 'k7e6',
+                's6_i1': 'k7e6',
+                's6_i2': 'k7e3',
+                's6_i3': 'k7e3',
+                's7_depth': 1,
+                's7_i0': 'k7e6'
+            }
+        else:
+            raise ValueError(f'Unsupported architecture with name: {name}')
+        model_factory = cls.fixed_arch(arch)
+        model = model_factory(**init_kwargs)
+        if pretrained:
+            weight_file = load_pretrained_weight(name, download=download, progress=progress)
+            pretrained_weights = torch.load(weight_file)
+            model.load_state_dict(pretrained_weights)
+        return model
 def reset_parameters(model, model_init='he_fout', init_div_groups=False,
                     bn_momentum=0.1, bn_eps=1e-5):

--- a/nni/retiarii/hub/pytorch/shufflenet.py
+++ b/nni/retiarii/hub/pytorch/shufflenet.py
@@ -7,6 +7,9 @@ import torch
 import nni.retiarii.nn.pytorch as nn
 from nni.retiarii import model_wrapper
+from .utils.fixed import FixedFactory
+from .utils.pretrained import load_pretrained_weight
 class ShuffleNetBlock(nn.Module):
    """
@@ -130,13 +133,13 @@ class ShuffleNetSpace(nn.Module):
        Here, "k-x" means k times the number of default channels.
        Otherwise, 1.0x is used by default. Default: false.
    affine : bool
-        Apply affine to all batch norm. Default: false.
+        Apply affine to all batch norm. Default: true.
    """
    def __init__(self,
                 num_labels: int = 1000,
                 channel_search: bool = False,
-                 affine: bool = False):
+                 affine: bool = True):
        super().__init__()
        self.num_labels = num_labels
@@ -180,12 +183,12 @@ class ShuffleNetSpace(nn.Module):
                mid_channels = cast(nn.MaybeChoice[int], mid_channels)
-                choice_block = nn.LayerChoice([
+                choice_block = nn.LayerChoice(dict(
-                    ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=3, stride=stride, affine=affine),
+                    k3=ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=3, stride=stride, affine=affine),
-                    ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=5, stride=stride, affine=affine),
+                    k5=ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=5, stride=stride, affine=affine),
-                    ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=7, stride=stride, affine=affine),
+                    k7=ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=7, stride=stride, affine=affine),
-                    ShuffleXceptionBlock(in_channels, out_channels, mid_channels=mid_channels, stride=stride, affine=affine)
+                    xcep=ShuffleXceptionBlock(in_channels, out_channels, mid_channels=mid_channels, stride=stride, affine=affine)
-                ], label=f'layer_{global_block_idx}')
+                ), label=f'layer_{global_block_idx}')
                feature_blocks.append(choice_block)
        self.features = nn.Sequential(*feature_blocks)
@@ -244,3 +247,51 @@ class ShuffleNetSpace(nn.Module):
                torch.nn.init.normal_(m.weight, 0, 0.01)
                if m.bias is not None:
                    torch.nn.init.constant_(m.bias, 0)
+    @classmethod
+    def fixed_arch(cls, arch: dict) -> FixedFactory:
+        return FixedFactory(cls, arch)
+    @classmethod
+    def load_searched_model(
+        cls, name: str,
+        pretrained: bool = False, download: bool = False, progress: bool = True
+    ) -> nn.Module:
+        if name == 'spos':
+            # NOTE: Need BGR tensor, with no normalization
+            # https://github.com/ultmaster/spacehub-conversion/blob/371a4fd6646b4e11eda3f61187f7c9a1d484b1ca/cutils.py#L63
+            arch = {
+                'layer_1': 'k7',
+                'layer_2': 'k5',
+                'layer_3': 'k3',
+                'layer_4': 'k5',
+                'layer_5': 'k7',
+                'layer_6': 'k3',
+                'layer_7': 'k7',
+                'layer_8': 'k3',
+                'layer_9': 'k7',
+                'layer_10': 'k3',
+                'layer_11': 'k7',
+                'layer_12': 'xcep',
+                'layer_13': 'k3',
+                'layer_14': 'k3',
+                'layer_15': 'k3',
+                'layer_16': 'k3',
+                'layer_17': 'xcep',
+                'layer_18': 'k7',
+                'layer_19': 'xcep',
+                'layer_20': 'xcep'
+            }
+        else:
+            raise ValueError(f'Unsupported architecture with name: {name}')
+        model_factory = cls.fixed_arch(arch)
+        model = model_factory()
+        if pretrained:
+            weight_file = load_pretrained_weight(name, download=download, progress=progress)
+            pretrained_weights = torch.load(weight_file)
+            model.load_state_dict(pretrained_weights)
+        return model
--- a/nni/retiarii/hub/pytorch/utils/__init__.py
+++ b/nni/retiarii/hub/pytorch/utils/__init__.py
--- a/nni/retiarii/hub/pytorch/utils/fixed.py
+++ b/nni/retiarii/hub/pytorch/utils/fixed.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+"""This file should be merged to nni/retiarii/fixed.py"""
+from typing import Type
+from nni.retiarii.utils import ContextStack
+class FixedFactory:
+    """Make a model space ready to create a fixed model.
+    Examples
+    --------
+    >>> factory = FixedFactory(ModelSpaceClass, {"choice1": 3})
+    >>> model = factory(channels=16, classes=10)
+    """
+    # TODO: mutations on ``init_args`` and ``init_kwargs`` themselves are not supported.
+    def __init__(self, cls: Type, arch: dict):
+        self.cls = cls
+        self.arch = arch
+    def __call__(self, *init_args, **init_kwargs):
+        with ContextStack('fixed', self.arch):
+            return self.cls(*init_args, **init_kwargs)
+    def __repr__(self):
+        return f'FixedFactory(class={self.cls}, arch={self.arch})'
--- a/nni/retiarii/hub/pytorch/utils/pretrained.py
+++ b/nni/retiarii/hub/pytorch/utils/pretrained.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+"""
+Weights available in this file are processed with scripts in https://github.com/ultmaster/spacehub-conversion,
+and uploaded with :func:`nni.common.blob_utils.upload_file`.
+"""
+import os
+from nni.common.blob_utils import NNI_BLOB, nni_cache_home, load_or_download_file
+PRETRAINED_WEIGHT_URLS = {
+    # proxylessnas
+    'acenas-m1': f'{NNI_BLOB}/nashub/acenas-m1-e215f1b8.pth',
+    'acenas-m2': f'{NNI_BLOB}/nashub/acenas-m2-a8ee9e8f.pth',
+    'acenas-m3': f'{NNI_BLOB}/nashub/acenas-m3-66a5ed7b.pth',
+    'proxyless-cpu': f'{NNI_BLOB}/nashub/proxyless-cpu-2df03430.pth',
+    'proxyless-gpu': f'{NNI_BLOB}/nashub/proxyless-gpu-dbe6dd15.pth',
+    'proxyless-mobile': f'{NNI_BLOB}/nashub/proxyless-mobile-8668a978.pth',
+    # mobilenetv3
+    'mobilenetv3-large-100': f'{NNI_BLOB}/nashub/mobilenetv3-large-100-420e040a.pth',
+    'mobilenetv3-small-050': f'{NNI_BLOB}/nashub/mobilenetv3-small-050-05cb7a80.pth',
+    'mobilenetv3-small-075': f'{NNI_BLOB}/nashub/mobilenetv3-small-075-c87d8acb.pth',
+    'mobilenetv3-small-100': f'{NNI_BLOB}/nashub/mobilenetv3-small-100-8332faac.pth',
+    'cream-014': f'{NNI_BLOB}/nashub/cream-014-060aea24.pth',
+    'cream-043': f'{NNI_BLOB}/nashub/cream-043-bec949e1.pth',
+    'cream-114': f'{NNI_BLOB}/nashub/cream-114-fc272590.pth',
+    'cream-287': f'{NNI_BLOB}/nashub/cream-287-a0fcba33.pth',
+    'cream-481': f'{NNI_BLOB}/nashub/cream-481-d85779b6.pth',
+    'cream-604': f'{NNI_BLOB}/nashub/cream-604-9ee425f7.pth',
+    # nasnet
+    'darts-v2': f'{NNI_BLOB}/nashub/darts-v2-5465b0d2.pth',
+    # spos
+    'spos': f'{NNI_BLOB}/nashub/spos-0b17f6fc.pth',
+}
+def load_pretrained_weight(name: str, **kwargs) -> str:
+    if name not in PRETRAINED_WEIGHT_URLS:
+        raise ValueError(f'"{name}" do not have a valid pretrained weight file.')
+    url = PRETRAINED_WEIGHT_URLS[name]
+    local_path = os.path.join(nni_cache_home(), 'nashub', url.split('/')[-1])
+    load_or_download_file(local_path, url, **kwargs)
+    return local_path
--- a/nni/retiarii/nn/pytorch/component.py
+++ b/nni/retiarii/nn/pytorch/component.py
@@ -36,6 +36,8 @@ class Repeat(Mutable):
        meaning that the block will be repeated at least ``min`` times and at most ``max`` times.
        If a ValueChoice, it should choose from a series of positive integers.
+        *New in v2.8*: Minimum depth can be 0. But this feature is NOT supported on graph engine.
    Examples
    --------
    Block() will be deep copied and repeated 3 times. ::
@@ -123,7 +125,7 @@ class Repeat(Mutable):
            self.depth_choice = depth
        else:
            raise TypeError(f'Unsupported "depth" type: {type(depth)}')
-        assert self.max_depth >= self.min_depth > 0
+        assert self.max_depth >= self.min_depth >= 0 and self.max_depth >= 1, f'Depth of {self.min_depth} to {self.max_depth} is invalid.'
        self.blocks = nn.ModuleList(self._replicate_and_instantiate(blocks, self.max_depth))
    @property
@@ -139,13 +141,13 @@ class Repeat(Mutable):
    def _replicate_and_instantiate(blocks, repeat):
        if not isinstance(blocks, list):
            if isinstance(blocks, nn.Module):
-                blocks = [blocks] + [copy.deepcopy(blocks) for _ in range(repeat - 1)]
+                blocks = [blocks if i == 0 else copy.deepcopy(blocks) for i in range(repeat)]
            else:
                blocks = [blocks for _ in range(repeat)]
-        assert len(blocks) > 0
        assert repeat <= len(blocks), f'Not enough blocks to be used. {repeat} expected, only found {len(blocks)}.'
-        blocks = blocks[:repeat]
+        if repeat < len(blocks):
-        if not isinstance(blocks[0], nn.Module):
+            blocks = blocks[:repeat]
+        if len(blocks) > 0 and not isinstance(blocks[0], nn.Module):
            blocks = [b(i) for i, b in enumerate(blocks)]
        return blocks

--- a/test/ut/retiarii/test_highlevel_apis.py
+++ b/test/ut/retiarii/test_highlevel_apis.py
@@ -843,6 +843,27 @@ class Python(GraphIR):
    @unittest.skip
    def test_valuechoice_getitem_functional_expression(self): ...
+    def test_repeat_zero(self):
+        class AddOne(nn.Module):
+            def forward(self, x):
+                return x + 1
+        @model_wrapper
+        class Net(nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.block = nn.Repeat(AddOne(), (0, 3))
+            def forward(self, x):
+                return self.block(x)
+        model, mutators = self._get_model_with_mutators(Net())
+        self.assertEqual(len(mutators), 1 + self.repeat_incr + self.value_choice_incr)
+        samplers = [EnumerateSampler() for _ in range(len(mutators))]
+        for target in [0, 1, 2, 3]:
+            new_model = _apply_all_mutators(model, mutators, samplers)
+            self.assertTrue((self._get_converted_pytorch_model(new_model)(torch.zeros(1, 16)) == target).all())
    def test_hyperparameter_choice(self):
        @model_wrapper
        class Net(nn.Module):

--- a/test/ut/retiarii/test_space_hub.py
+++ b/test/ut/retiarii/test_space_hub.py
@@ -13,7 +13,7 @@ import nni
 import nni.runtime.platform.test
 import nni.retiarii.evaluator.pytorch.lightning as pl
 import nni.retiarii.hub.pytorch as searchspace
-from nni.retiarii.utils import ContextStack
+from nni.retiarii import fixed_arch
 from nni.retiarii.execution.utils import _unpack_if_only_one
 from nni.retiarii.mutator import InvalidMutation, Sampler
 from nni.retiarii.nn.pytorch.mutator import extract_mutation_from_pt_module
@@ -61,7 +61,7 @@ def _test_searchspace_on_dataset(searchspace, dataset='cifar10', arch=None):
        arch = {mut.mutator.label: _unpack_if_only_one(mut.samples) for mut in model.history}
    print('Selected model:', arch)
-    with ContextStack('fixed', arch):
+    with fixed_arch(arch):
        model = model.python_class(**model.python_init_params)
    if dataset == 'cifar10':