Searched model zoo in NAS space hub (#4831)

e8dc4562 · Yuge Zhang · GitHub · fe89e5af · e8dc4562 · e8dc4562
Unverified Commit e8dc4562 authored May 13, 2022 by Yuge Zhang Committed by GitHub May 13, 2022
16 changed files
--- a/dependencies/required.txt
+++ b/dependencies/required.txt
@@ -18,6 +18,7 @@ schema
 scikit-learn >= 0.24.1
 scipy < 1.8 ; python_version < "3.8"
 scipy ; python_version >= "3.8"
+tqdm
 typeguard
 typing_extensions >= 4.0.0
 websockets >= 10.1
--- a/nni/common/blob_utils.py
+++ b/nni/common/blob_utils.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+import logging
+import hashlib
+import os
+import shutil
+import subprocess
+import tempfile
+from pathlib import Path
+from typing import Optional
+import requests
+import tqdm
+__all__ = ['NNI_BLOB', 'load_or_download_file', 'upload_file', 'nni_cache_home']
+# Blob that contains some downloadable files.
+NNI_BLOB = 'https://nni.blob.core.windows.net'
+# Override these environment vars to move your cache.
+ENV_NNI_HOME = 'NNI_HOME'
+ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
+DEFAULT_CACHE_DIR = '~/.cache'
+def nni_cache_home() -> str:
+    return os.path.expanduser(
+        os.getenv(ENV_NNI_HOME,
+                  os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'nni')))
+def load_or_download_file(local_path: str, download_url: str, download: bool = False, progress: bool = True) -> None:
+    """Download the ``download_url`` to ``local_path``, and check its hash.
+    If ``local_path`` already exists, and hash is checked, do nothing.
+    """
+    f = None
+    hash_prefix = Path(local_path).stem.split('-')[-1]
+    _logger = logging.getLogger(__name__)
+    try:
+        sha256 = hashlib.sha256()
+        if Path(local_path).exists():
+            _logger.info('"%s" already exists. Checking hash.', local_path)
+            with Path(local_path).open('rb') as fr:
+                while True:
+                    chunk = fr.read(8192)
+                    if len(chunk) == 0:
+                        break
+                    sha256.update(chunk)
+        elif download:
+            _logger.info('"%s" does not exist. Downloading "%s"', local_path, download_url)
+            # Follow download implementation in torchvision:
+            # We deliberately save it in a temp file and move it after
+            # download is complete. This prevents a local working checkpoint
+            # being overridden by a broken download.
+            dst_dir = Path(local_path).parent
+            dst_dir.mkdir(exist_ok=True, parents=True)
+            f = tempfile.NamedTemporaryFile(delete=False, dir=dst_dir)
+            r = requests.get(download_url, stream=True)
+            total_length: Optional[str] = r.headers.get('content-length')
+            assert total_length is not None, f'Content length is not found in the response of {download_url}'
+            with tqdm.tqdm(total=int(total_length), disable=not progress,
+                           unit='B', unit_scale=True, unit_divisor=1024) as pbar:
+                for chunk in r.iter_content(8192):
+                    f.write(chunk)
+                    sha256.update(chunk)
+                    pbar.update(len(chunk))
+                    f.flush()
+        else:
+            raise FileNotFoundError(
+                'Download is not enabled, and file does not exist: {}. Please set download=True.'.format(local_path)
+            )
+        digest = sha256.hexdigest()
+        if not digest.startswith(hash_prefix):
+            raise RuntimeError('Invalid hash value (expected "{}", got "{}")'.format(hash_prefix, digest))
+        if f is not None:
+            shutil.move(f.name, local_path)
+    finally:
+        if f is not None:
+            f.close()
+            if os.path.exists(f.name):
+                os.remove(f.name)
+def upload_file(local_path: str, destination_path: str, sas_token: str) -> str:
+    """For NNI maintainers to add updated static files to the Azure blob easily.
+    In most cases, you don't need to calculate the hash on your own, it will be automatically inserted.
+    For example, if you write ``https://xxx.com/myfile.zip``, the uploaded file will look like
+    ``https://xxx.com/myfile-da5f43b7.zip``.
+    Need to have `azcopy installed <https://docs.microsoft.com/en-us/azure/storage/common/storage-ref-azcopy>`_,
+    and a SAS token for the destination storage (``?`` should be included as prefix of token).
+    Returns a string which is the uploaded path.
+    """
+    _logger = logging.getLogger(__name__)
+    sha256 = hashlib.sha256()
+    with Path(local_path).open('rb') as fr:
+        while True:
+            chunk = fr.read(8192)
+            if len(chunk) == 0:
+                break
+            sha256.update(chunk)
+    digest = sha256.hexdigest()
+    hash_prefix = digest[:8]
+    _logger.info('Hash of %s is %s', local_path, digest)
+    stem, suffix = destination_path.rsplit('.', 1)
+    if not stem.endswith('-' + hash_prefix):
+        destination_path = stem + '-' + hash_prefix + '.' + suffix
+    subprocess.run(['azcopy', 'copy', local_path, destination_path + sas_token], check=True)
+    return destination_path
--- a/nni/nas/benchmarks/constants.py
+++ b/nni/nas/benchmarks/constants.py
@@ -3,24 +3,21 @@
 import os
+from nni.common.blob_utils import NNI_BLOB, nni_cache_home
 ENV_NASBENCHMARK_DIR = 'NASBENCHMARK_DIR'
-ENV_NNI_HOME = 'NNI_HOME'
-ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
-DEFAULT_CACHE_DIR = '~/.cache'
 def _get_nasbenchmark_dir():
-    nni_home = os.path.expanduser(
+    nni_home = nni_cache_home()
-        os.getenv(ENV_NNI_HOME,
-                  os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'nni')))
    return os.getenv(ENV_NASBENCHMARK_DIR, os.path.join(nni_home, 'nasbenchmark'))
 DATABASE_DIR = _get_nasbenchmark_dir()
 DB_URLS = {
-    'nasbench101': 'https://nni.blob.core.windows.net/nasbenchmark/nasbench101-209f5694.db',
+    'nasbench101': f'{NNI_BLOB}/nasbenchmark/nasbench101-209f5694.db',
-    'nasbench201': 'https://nni.blob.core.windows.net/nasbenchmark/nasbench201-b2b60732.db',
+    'nasbench201': f'{NNI_BLOB}/nasbenchmark/nasbench201-b2b60732.db',
-    'nds': 'https://nni.blob.core.windows.net/nasbenchmark/nds-5745c235.db'
+    'nds': f'{NNI_BLOB}/nasbenchmark/nds-5745c235.db'
 }
--- a/nni/nas/benchmarks/utils.py
+++ b/nni/nas/benchmarks/utils.py
@@ -2,19 +2,13 @@
 # Licensed under the MIT license.
 import functools
-import hashlib
 import json
-import logging
 import os
-import shutil
-import tempfile
-from pathlib import Path
-from typing import Optional
-import requests
-import tqdm
 from playhouse.sqlite_ext import SqliteExtDatabase
+from nni.common.blob_utils import load_or_download_file
 from .constants import DB_URLS, DATABASE_DIR
@@ -24,60 +18,6 @@ json_dumps = functools.partial(json.dumps, sort_keys=True)
 _loaded_benchmarks = {}
-def load_or_download_file(local_path: str, download_url: str, download: bool = False, progress: bool = True):
-    f = None
-    hash_prefix = Path(local_path).stem.split('-')[-1]
-    _logger = logging.getLogger(__name__)
-    try:
-        sha256 = hashlib.sha256()
-        if Path(local_path).exists():
-            _logger.info('"%s" already exists. Checking hash.', local_path)
-            with Path(local_path).open('rb') as fr:
-                while True:
-                    chunk = fr.read(8192)
-                    if len(chunk) == 0:
-                        break
-                    sha256.update(chunk)
-        elif download:
-            _logger.info('"%s" does not exist. Downloading "%s"', local_path, download_url)
-            # Follow download implementation in torchvision:
-            # We deliberately save it in a temp file and move it after
-            # download is complete. This prevents a local working checkpoint
-            # being overridden by a broken download.
-            dst_dir = Path(local_path).parent
-            dst_dir.mkdir(exist_ok=True, parents=True)
-            f = tempfile.NamedTemporaryFile(delete=False, dir=dst_dir)
-            r = requests.get(download_url, stream=True)
-            total_length: Optional[str] = r.headers.get('content-length')
-            assert total_length is not None, f'Content length is not found in the response of {download_url}'
-            with tqdm.tqdm(total=int(total_length), disable=not progress,
-                           unit='B', unit_scale=True, unit_divisor=1024) as pbar:
-                for chunk in r.iter_content(8192):
-                    f.write(chunk)
-                    sha256.update(chunk)
-                    pbar.update(len(chunk))
-                    f.flush()
-        else:
-            raise FileNotFoundError('Download is not enabled, but file still does not exist: {}'.format(local_path))
-        digest = sha256.hexdigest()
-        if not digest.startswith(hash_prefix):
-            raise RuntimeError('Invalid hash value (expected "{}", got "{}")'.format(hash_prefix, digest))
-        if f is not None:
-            shutil.move(f.name, local_path)
-    finally:
-        if f is not None:
-            f.close()
-            if os.path.exists(f.name):
-                os.remove(f.name)
 def load_benchmark(benchmark: str) -> SqliteExtDatabase:
    """
    Load a benchmark as a database.

--- a/nni/retiarii/evaluator/pytorch/lightning.py
+++ b/nni/retiarii/evaluator/pytorch/lightning.py
@@ -237,7 +237,7 @@ class _SupervisedLearningModule(LightningModule):
 class _AccuracyWithLogits(torchmetrics.Accuracy):
    def update(self, pred, target):
-        return super().update(nn_functional.softmax(pred), target)
+        return super().update(nn_functional.softmax(pred, dim=-1), target)
 @nni.trace

--- a/nni/retiarii/hub/README.md
+++ b/nni/retiarii/hub/README.md
@@ -10,20 +10,74 @@ For further motivations and plans, please see https://github.com/microsoft/nni/i
 1. Runnable
 2. Load checkpoint of searched architecture and evaluate
-3. Reproduce searched architecture
+3. Reproduce "retrain" (i.e., training from scratch of searched architecture)
 4. Runnable with built-in algos
 5. Reproduce result with at least one algo
 |                        | 1      | 2      | 3      | 4      | 5      |
 |------------------------|--------|--------|--------|--------|--------|
-| NasBench101            | Y      |        |        |        |        |
+| NasBench101            | Y      | -      |        |        |        |
-| NasBench201            | Y      |        |        |        |        |
+| NasBench201            | Y      | -      |        |        |        |
-| NASNet                 | Y      |        |        |        |        |
+| NASNet                 | Y      | -      |        |        |        |
-| ENAS                   | Y      |        |        |        |        |
+| ENAS                   | Y      | -      |        |        |        |
-| AmoebaNet              | Y      |        |        |        |        |
+| AmoebaNet              | Y      | -      |        |        |        |
-| PNAS                   | Y      |        |        |        |        |
+| PNAS                   | Y      | -      |        |        |        |
-| DARTS                  | Y      |        |        |        |        |
+| DARTS                  | Y      | Y      |        |        |        |
-| ProxylessNAS           | Y      |        |        |        |        |
+| ProxylessNAS           | Y      | Y      |        |        |        |
-| MobileNetV3Space       | Y      |        |        |        |        |
+| MobileNetV3Space       | Y      | Y      |        |        |        |
-| ShuffleNetSpace        | Y      |        |        |        |        |
+| ShuffleNetSpace        | Y      | Y      |        |        |        |
-| ShuffleNetSpace (ch)   | Y      |        |        |        |        |
+| ShuffleNetSpace (ch)   | Y      | -      |        |        |        |
+* `-`: Result unavailable, because lacking published checkpoints / architectures.
+* NASNet, ENAS, AmoebaNet, PNAS, DARTS are based on the same implementation, with configuration differences.
+* NasBench101 and 201 will directly proceed to stage 3 as it's cheaper to train them than to find a checkpoint.
+## Space Planned
+We welcome suggestions and contributions.
+- [AutoFormer](https://openaccess.thecvf.com/content/ICCV2021/html/Chen_AutoFormer_Searching_Transformers_for_Visual_Recognition_ICCV_2021_paper.html), [PR under review](https://github.com/microsoft/nni/pull/4551)
+- [NAS-BERT](https://arxiv.org/abs/2105.14444)
+- Something speech, like [LightSpeech](https://arxiv.org/abs/2102.04040)
+## Searched Model Zoo
+Create a searched model with pretrained weights like the following:
+```python
+model = MobileNetV3Space.load_searched_model('mobilenetv3-small-075', pretrained=True, download=True)
+evaluate(model, imagenet_data)
+```
+``MobileNetV3Space`` can be replaced with any search space listed above, and ``mobilenetv3-small-075`` can be any model listed below.
+See an example of ``evaluate`` [here](https://github.com/rwightman/pytorch-image-models/blob/d30685c283137b4b91ea43c4e595c964cd2cb6f0/train.py#L778).
+| Search space     | Model                 | Dataset  | Metric | Eval Protocol                |
+|------------------|-----------------------|----------|--------|------------------------------|
+| ProxylessNAS     | acenas-m1             | ImageNet | 75.176 | Default                      |
+| ProxylessNAS     | acenas-m2             | ImageNet | 75.0   | Default                      |
+| ProxylessNAS     | acenas-m3             | ImageNet | 75.118 | Default                      |
+| ProxylessNAS     | proxyless-cpu         | ImageNet | 75.29  | Default                      |
+| ProxylessNAS     | proxyless-gpu         | ImageNet | 75.084 | Default                      |
+| ProxylessNAS     | proxyless-mobile      | ImageNet | 74.594 | Default                      |
+| MobileNetV3Space | mobilenetv3-large-100 | ImageNet | 75.768 | Bicubic interpolation        |
+| MobileNetV3Space | mobilenetv3-small-050 | ImageNet | 57.906 | Bicubic interpolation        |
+| MobileNetV3Space | mobilenetv3-small-075 | ImageNet | 65.24  | Bicubic interpolation        |
+| MobileNetV3Space | mobilenetv3-small-100 | ImageNet | 67.652 | Bicubic interpolation        |
+| MobileNetV3Space | cream-014             | ImageNet | 53.74  | Test image size = 64         |
+| MobileNetV3Space | cream-043             | ImageNet | 66.256 | Test image size = 96         |
+| MobileNetV3Space | cream-114             | ImageNet | 72.514 | Test image size = 160        |
+| MobileNetV3Space | cream-287             | ImageNet | 77.52  | Default                      |
+| MobileNetV3Space | cream-481             | ImageNet | 79.078 | Default                      |
+| MobileNetV3Space | cream-604             | ImageNet | 79.92  | Default                      |
+| DARTS            | darts-v2              | CIFAR-10 | 97.37  | Default                      |
+| ShuffleNetSpace  | spos                  | ImageNet | 74.14  | BGR tensor; no normalization |
+The metrics listed above are obtained by evaluating the checkpoints provided the original author and converted to NNI NAS format with [these scripts](https://github.com/ultmaster/spacehub-conversion). Do note that some metrics can be higher / lower than the original report, because there could be subtle differences between data preprocessing, operation implementation (e.g., 3rd-party hswish vs ``nn.Hardswish``), or even library versions we are using. But most of these errors are acceptable (~0.1%). We will retrain these architectures in a reproducible and fair training settings, and update these results when the training is ready.
+Latency / FLOPs data are missing in the table. Measuring them would be another task.
+Several more models to be added:
+- FBNet on MobileNetV3Space
--- a/nni/retiarii/hub/pytorch/mobilenetv3.py
+++ b/nni/retiarii/hub/pytorch/mobilenetv3.py
--- a/nni/retiarii/hub/pytorch/nasnet.py
+++ b/nni/retiarii/hub/pytorch/nasnet.py
@@ -8,6 +8,7 @@ It's called ``nasnet.py`` simply because NASNet is the first to propose such str
 """
 from collections import OrderedDict
+from functools import partial
 from typing import Tuple, List, Union, Iterable, Dict, Callable, Optional, cast
 try:
@@ -20,6 +21,9 @@ import torch
 import nni.retiarii.nn.pytorch as nn
 from nni.retiarii import model_wrapper
+from .utils.fixed import FixedFactory
+from .utils.pretrained import load_pretrained_weight
 # the following are NAS operations from
 # https://github.com/facebookresearch/unnas/blob/main/pycls/models/nas/operations.py
@@ -300,15 +304,26 @@ class CellBuilder:
        self.last_cell_reduce = last_cell_reduce
        self._expect_idx = 0
-    def __call__(self, repeat_idx: int):
-        if self._expect_idx != repeat_idx:
-            raise ValueError(f'Expect index {self._expect_idx}, found {repeat_idx}')
        # It takes an index that is the index in the repeat.
        # Number of predecessors for each cell is fixed to 2.
-        num_predecessors = 2
+        self.num_predecessors = 2
        # Number of ops per node is fixed to 2.
-        num_ops_per_node = 2
+        self.num_ops_per_node = 2
+    def op_factory(self, node_index: int, op_index: int, input_index: Optional[int], *,
+                   op: str, channels: int, is_reduction_cell: bool):
+        if is_reduction_cell and (
+            input_index is None or input_index < self.num_predecessors
+        ):  # could be none when constructing search sapce
+            stride = 2
+        else:
+            stride = 1
+        return OPS[op](channels, stride, True)
+    def __call__(self, repeat_idx: int):
+        if self._expect_idx != repeat_idx:
+            raise ValueError(f'Expect index {self._expect_idx}, found {repeat_idx}')
        # Reduction cell means stride = 2 and channel multiplied by 2.
        is_reduction_cell = repeat_idx == 0 and self.first_cell_reduce
@@ -316,16 +331,11 @@ class CellBuilder:
        # self.C_prev_in, self.C_in, self.last_cell_reduce are updated after each cell is built.
        preprocessor = CellPreprocessor(self.C_prev_in, self.C_in, self.C, self.last_cell_reduce)
-        ops_factory: Dict[str, Callable[[int, int, Optional[int]], nn.Module]] = {
+        ops_factory: Dict[str, Callable[[int, int, Optional[int]], nn.Module]] = {}
-            op:  # make final chosen ops named with their aliases
+        for op in self.op_candidates:
-            lambda node_index, op_index, input_index:
+            ops_factory[op] = partial(self.op_factory, op=op, channels=cast(int, self.C), is_reduction_cell=is_reduction_cell)
-            OPS[op](self.C, 2 if is_reduction_cell and (
-                    input_index is None or input_index < num_predecessors  # could be none when constructing search sapce
-                    ) else 1, True)
-            for op in self.op_candidates
-        }
-        cell = nn.Cell(ops_factory, self.num_nodes, num_ops_per_node, num_predecessors, self.merge_op,
+        cell = nn.Cell(ops_factory, self.num_nodes, self.num_ops_per_node, self.num_predecessors, self.merge_op,
                       preprocessor=preprocessor, postprocessor=CellPostprocessor(),
                       label='reduce' if is_reduction_cell else 'normal')
@@ -401,7 +411,7 @@ class NDS(nn.Module):
        self.num_cells: nn.MaybeChoice[int] = cast(int, num_cells)
        if isinstance(num_cells, Iterable):
            self.num_cells = nn.ValueChoice(list(num_cells), label='depth')
-        num_cells_per_stage = [i * self.num_cells // 3 - (i - 1) * self.num_cells // 3 for i in range(3)]
+        num_cells_per_stage = [(i + 1) * self.num_cells // 3 - i * self.num_cells // 3 for i in range(3)]
        # auxiliary head is different for network targetted at different datasets
        if dataset == 'imagenet':
@@ -501,6 +511,10 @@ class NDS(nn.Module):
            if isinstance(module, DropPath_):
                module.drop_prob = drop_prob
+    @classmethod
+    def fixed_arch(cls, arch: dict) -> FixedFactory:
+        return FixedFactory(cls, arch)
 @model_wrapper
 class NASNet(NDS):
@@ -676,3 +690,64 @@ class DARTS(NDS):
                         num_cells=num_cells,
                         dataset=dataset,
                         auxiliary_loss=auxiliary_loss)
+    @classmethod
+    def load_searched_model(
+        cls, name: str,
+        pretrained: bool = False, download: bool = False, progress: bool = True
+    ) -> nn.Module:
+        init_kwargs = {}  # all default
+        if name == 'darts-v2':
+            init_kwargs.update(
+                num_cells=20,
+                width=36,
+            )
+            arch = {
+                'normal/op_2_0': 'sep_conv_3x3',
+                'normal/op_2_1': 'sep_conv_3x3',
+                'normal/input_2_0': 0,
+                'normal/input_2_1': 1,
+                'normal/op_3_0': 'sep_conv_3x3',
+                'normal/op_3_1': 'sep_conv_3x3',
+                'normal/input_3_0': 0,
+                'normal/input_3_1': 1,
+                'normal/op_4_0': 'sep_conv_3x3',
+                'normal/op_4_1': 'skip_connect',
+                'normal/input_4_0': 1,
+                'normal/input_4_1': 0,
+                'normal/op_5_0': 'skip_connect',
+                'normal/op_5_1': 'dil_conv_3x3',
+                'normal/input_5_0': 0,
+                'normal/input_5_1': 2,
+                'reduce/op_2_0': 'max_pool_3x3',
+                'reduce/op_2_1': 'max_pool_3x3',
+                'reduce/input_2_0': 0,
+                'reduce/input_2_1': 1,
+                'reduce/op_3_0': 'skip_connect',
+                'reduce/op_3_1': 'max_pool_3x3',
+                'reduce/input_3_0': 2,
+                'reduce/input_3_1': 1,
+                'reduce/op_4_0': 'max_pool_3x3',
+                'reduce/op_4_1': 'skip_connect',
+                'reduce/input_4_0': 0,
+                'reduce/input_4_1': 2,
+                'reduce/op_5_0': 'skip_connect',
+                'reduce/op_5_1': 'max_pool_3x3',
+                'reduce/input_5_0': 2,
+                'reduce/input_5_1': 1
+            }
+        else:
+            raise ValueError(f'Unsupported architecture with name: {name}')
+        model_factory = cls.fixed_arch(arch)
+        model = model_factory(**init_kwargs)
+        if pretrained:
+            weight_file = load_pretrained_weight(name, download=download, progress=progress)
+            pretrained_weights = torch.load(weight_file)
+            model.load_state_dict(pretrained_weights)
+        return model
--- a/nni/retiarii/hub/pytorch/proxylessnas.py
+++ b/nni/retiarii/hub/pytorch/proxylessnas.py
@@ -2,12 +2,15 @@
 # Licensed under the MIT license.
 import math
-from typing import Optional, Callable, List, Tuple, cast
+from typing import Optional, Callable, List, Tuple, Iterator, cast
 import torch
 import nni.retiarii.nn.pytorch as nn
 from nni.retiarii import model_wrapper
+from .utils.fixed import FixedFactory
+from .utils.pretrained import load_pretrained_weight
 def make_divisible(v, divisor, min_val=None):
    """
@@ -24,6 +27,22 @@ def make_divisible(v, divisor, min_val=None):
    return nn.ValueChoice.condition(new_v < 0.9 * v, new_v + divisor, new_v)
+def simplify_sequential(sequentials: List[nn.Module]) -> Iterator[nn.Module]:
+    """
+    Flatten the sequential blocks so that the hierarchy looks better.
+    Eliminate identity modules automatically.
+    """
+    for module in sequentials:
+        if isinstance(module, nn.Sequential):
+            for submodule in module.children():
+                # no recursive expansion
+                if not isinstance(submodule, nn.Identity):
+                    yield submodule
+        else:
+            if not isinstance(module, nn.Identity):
+                yield module
 class ConvBNReLU(nn.Sequential):
    """
    The template for a conv-bn-relu block.
@@ -45,7 +64,11 @@ class ConvBNReLU(nn.Sequential):
            norm_layer = nn.BatchNorm2d
        if activation_layer is None:
            activation_layer = nn.ReLU6
-        super().__init__(
+        # If no normalization is used, set bias to True
+        # https://github.com/google-research/google-research/blob/20736344/tunas/rematlib/mobile_model_v3.py#L194
+        norm = norm_layer(cast(int, out_channels))
+        no_normalization = isinstance(norm, nn.Identity)
+        blocks: List[nn.Module] = [
            nn.Conv2d(
                cast(int, in_channels),
                cast(int, out_channels),
@@ -54,18 +77,30 @@ class ConvBNReLU(nn.Sequential):
                cast(int, padding),
                dilation=dilation,
                groups=cast(int, groups),
-                bias=False
+                bias=no_normalization
            ),
-            norm_layer(cast(int, out_channels)),
+            # Normalization, regardless of batchnorm or identity
+            norm,
+            # One pytorch implementation as an SE here, to faithfully reproduce paper
+            # We follow a more accepted approach to put SE outside
+            # Reference: https://github.com/d-li14/mobilenetv3.pytorch/issues/18
            activation_layer(inplace=True)
-        )
+        ]
+        super().__init__(*simplify_sequential(blocks))
        self.out_channels = out_channels
-class SeparableConv(nn.Sequential):
+class DepthwiseSeparableConv(nn.Sequential):
    """
    In the original MobileNetV2 implementation, this is InvertedResidual when expand ratio = 1.
    Residual connection is added if input and output shape are the same.
+    References:
+    - https://github.com/rwightman/pytorch-image-models/blob/b7cb8d03/timm/models/efficientnet_blocks.py#L90
+    - https://github.com/google-research/google-research/blob/20736344/tunas/rematlib/mobile_model_v3.py#L433
+    - https://github.com/ultmaster/AceNAS/blob/46c8895f/searchspace/proxylessnas/utils.py#L100
    """
    def __init__(
@@ -74,20 +109,24 @@ class SeparableConv(nn.Sequential):
        out_channels: nn.MaybeChoice[int],
        kernel_size: nn.MaybeChoice[int] = 3,
        stride: int = 1,
+        squeeze_excite: Optional[Callable[[nn.MaybeChoice[int], nn.MaybeChoice[int]], nn.Module]] = None,
        norm_layer: Optional[Callable[[int], nn.Module]] = None,
        activation_layer: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
-        super().__init__(
+        blocks = [
            # dw
            ConvBNReLU(in_channels, in_channels, stride=stride, kernel_size=kernel_size, groups=in_channels,
                       norm_layer=norm_layer, activation_layer=activation_layer),
+            # optional se
+            squeeze_excite(in_channels, in_channels) if squeeze_excite else nn.Identity(),
            # pw-linear
            ConvBNReLU(in_channels, out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity)
-        )
+        ]
-        self.residual_connection = stride == 1 and in_channels == out_channels
+        super().__init__(*simplify_sequential(blocks))
+        self.has_skip = stride == 1 and in_channels == out_channels
    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.residual_connection:
+        if self.has_skip:
            return x + super().forward(x)
        else:
            return super().forward(x)
@@ -97,14 +136,17 @@ class InvertedResidual(nn.Sequential):
    """
    An Inverted Residual Block, sometimes called an MBConv Block, is a type of residual block used for image models
    that uses an inverted structure for efficiency reasons.
    It was originally proposed for the `MobileNetV2 <https://arxiv.org/abs/1801.04381>`__ CNN architecture.
    It has since been reused for several mobile-optimized CNNs.
    It follows a narrow -> wide -> narrow approach, hence the inversion.
    It first widens with a 1x1 convolution, then uses a 3x3 depthwise convolution (which greatly reduces the number of parameters),
    then a 1x1 convolution is used to reduce the number of channels so input and output can be added.
-    Follow implementation of:
+    This implementation is sort of a mixture between:
-    https://github.com/google-research/google-research/blob/20736344591f774f4b1570af64624ed1e18d2867/tunas/rematlib/mobile_model_v3.py#L453
+    - https://github.com/google-research/google-research/blob/20736344/tunas/rematlib/mobile_model_v3.py#L453
+    - https://github.com/rwightman/pytorch-image-models/blob/b7cb8d03/timm/models/efficientnet_blocks.py#L134
    """
    def __init__(
@@ -114,7 +156,7 @@ class InvertedResidual(nn.Sequential):
        expand_ratio: nn.MaybeChoice[float],
        kernel_size: nn.MaybeChoice[int] = 3,
        stride: int = 1,
-        squeeze_and_excite: Optional[Callable[[nn.MaybeChoice[int]], nn.Module]] = None,
+        squeeze_excite: Optional[Callable[[nn.MaybeChoice[int], nn.MaybeChoice[int]], nn.Module]] = None,
        norm_layer: Optional[Callable[[int], nn.Module]] = None,
        activation_layer: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
@@ -123,11 +165,10 @@ class InvertedResidual(nn.Sequential):
        self.out_channels = out_channels
        assert stride in [1, 2]
-        hidden_ch = nn.ValueChoice.to_int(round(cast(int, in_channels * expand_ratio)))
+        hidden_ch = cast(int, make_divisible(in_channels * expand_ratio, 8))
-        # FIXME: check whether this equal works
+        # NOTE: this equivalence check should also work for ValueChoice
-        # Residual connection is added here stride = 1 and input channels and output channels are the same.
+        self.has_skip = stride == 1 and in_channels == out_channels
-        self.residual_connection = stride == 1 and in_channels == out_channels
        layers: List[nn.Module] = [
            # point-wise convolution
@@ -138,21 +179,20 @@ class InvertedResidual(nn.Sequential):
                       norm_layer=norm_layer, activation_layer=activation_layer),
            # depth-wise
            ConvBNReLU(hidden_ch, hidden_ch, stride=stride, kernel_size=kernel_size, groups=hidden_ch,
-                       norm_layer=norm_layer, activation_layer=activation_layer)
+                       norm_layer=norm_layer, activation_layer=activation_layer),
-        ]
+            # SE
+            squeeze_excite(
-        if squeeze_and_excite:
+                cast(int, hidden_ch),
-            layers.append(squeeze_and_excite(hidden_ch))
+                cast(int, in_channels)
+            ) if squeeze_excite is not None else nn.Identity(),
-        layers += [
            # pw-linear
-            ConvBNReLU(hidden_ch, out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity)
+            ConvBNReLU(hidden_ch, out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity),
        ]
-        super().__init__(*layers)
+        super().__init__(*simplify_sequential(layers))
    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.residual_connection:
+        if self.has_skip:
            return x + super().forward(x)
        else:
            return super().forward(x)
@@ -199,7 +239,9 @@ class ProxylessNAS(nn.Module):
    Following the official implementation, the inverted residual with kernel size / expand ratio variations in each layer
    is implemented with a :class:`nn.LayerChoice` with all-combination candidates. That means,
    when used in weight sharing, these candidates will be treated as separate layers, and won't be fine-grained shared.
-    We note that ``MobileNetV3Space`` is different in this perspective.
+    We note that :class:`MobileNetV3Space` is different in this perspective.
+    This space can be implemented as part of :class:`MobileNetV3Space`, but we separate those following conventions.
    """
    def __init__(self, num_labels: int = 1000,
@@ -221,11 +263,11 @@ class ProxylessNAS(nn.Module):
        self.bn_eps = bn_eps
        self.bn_momentum = bn_momentum
-        self.first_conv = ConvBNReLU(3, widths[0], stride=2, norm_layer=nn.BatchNorm2d)
+        self.stem = ConvBNReLU(3, widths[0], stride=2, norm_layer=nn.BatchNorm2d)
        blocks: List[nn.Module] = [
            # first stage is fixed
-            SeparableConv(widths[0], widths[1], kernel_size=3, stride=1)
+            DepthwiseSeparableConv(widths[0], widths[1], kernel_size=3, stride=1)
        ]
        # https://github.com/ultmaster/AceNAS/blob/46c8895fd8a05ffbc61a6b44f1e813f64b4f66b7/searchspace/proxylessnas/__init__.py#L21
@@ -234,7 +276,7 @@ class ProxylessNAS(nn.Module):
            # we return a builder that dynamically creates module for different `repeat_idx`.
            builder = inverted_residual_choice_builder(
                [3, 6], [3, 5, 7], downsamples[stage], widths[stage - 1], widths[stage], f's{stage}')
-            if stage < 6:
+            if stage < 7:
                blocks.append(nn.Repeat(builder, (1, 4), label=f's{stage}_depth'))
            else:
                # No mutation for depth in the last stage.
@@ -252,7 +294,7 @@ class ProxylessNAS(nn.Module):
        reset_parameters(self, bn_momentum=bn_momentum, bn_eps=bn_eps)
    def forward(self, x):
-        x = self.first_conv(x)
+        x = self.stem(x)
        x = self.blocks(x)
        x = self.feature_mix_layer(x)
        x = self.global_avg_pooling(x)
@@ -268,6 +310,193 @@ class ProxylessNAS(nn.Module):
            return {'classifier.weight', 'classifier.bias'}
        return set()
+    @classmethod
+    def fixed_arch(cls, arch: dict) -> FixedFactory:
+        return FixedFactory(cls, arch)
+    @classmethod
+    def load_searched_model(
+        cls, name: str,
+        pretrained: bool = False, download: bool = False, progress: bool = True
+    ) -> nn.Module:
+        init_kwargs = {}  # all default
+        if name == 'acenas-m1':
+            arch = {
+                's2_depth': 2,
+                's2_i0': 'k3e6',
+                's2_i1': 'k3e3',
+                's3_depth': 3,
+                's3_i0': 'k5e3',
+                's3_i1': 'k3e3',
+                's3_i2': 'k5e3',
+                's4_depth': 2,
+                's4_i0': 'k3e6',
+                's4_i1': 'k5e3',
+                's5_depth': 4,
+                's5_i0': 'k7e6',
+                's5_i1': 'k3e6',
+                's5_i2': 'k3e6',
+                's5_i3': 'k7e3',
+                's6_depth': 4,
+                's6_i0': 'k7e6',
+                's6_i1': 'k7e6',
+                's6_i2': 'k7e3',
+                's6_i3': 'k7e3',
+                's7_depth': 1,
+                's7_i0': 'k7e6'
+            }
+        elif name == 'acenas-m2':
+            arch = {
+                's2_depth': 1,
+                's2_i0': 'k5e3',
+                's3_depth': 3,
+                's3_i0': 'k3e6',
+                's3_i1': 'k3e3',
+                's3_i2': 'k5e3',
+                's4_depth': 2,
+                's4_i0': 'k7e6',
+                's4_i1': 'k5e6',
+                's5_depth': 4,
+                's5_i0': 'k5e6',
+                's5_i1': 'k5e3',
+                's5_i2': 'k5e6',
+                's5_i3': 'k3e6',
+                's6_depth': 4,
+                's6_i0': 'k7e6',
+                's6_i1': 'k5e6',
+                's6_i2': 'k5e3',
+                's6_i3': 'k5e6',
+                's7_depth': 1,
+                's7_i0': 'k7e6'
+            }
+        elif name == 'acenas-m3':
+            arch = {
+                's2_depth': 2,
+                's2_i0': 'k3e3',
+                's2_i1': 'k3e6',
+                's3_depth': 2,
+                's3_i0': 'k5e3',
+                's3_i1': 'k3e3',
+                's4_depth': 3,
+                's4_i0': 'k5e6',
+                's4_i1': 'k7e6',
+                's4_i2': 'k3e6',
+                's5_depth': 4,
+                's5_i0': 'k7e6',
+                's5_i1': 'k7e3',
+                's5_i2': 'k7e3',
+                's5_i3': 'k5e3',
+                's6_depth': 4,
+                's6_i0': 'k7e6',
+                's6_i1': 'k7e3',
+                's6_i2': 'k7e6',
+                's6_i3': 'k3e3',
+                's7_depth': 1,
+                's7_i0': 'k5e6'
+            }
+        elif name == 'proxyless-cpu':
+            arch = {
+                's2_depth': 4,
+                's2_i0': 'k3e6',
+                's2_i1': 'k3e3',
+                's2_i2': 'k3e3',
+                's2_i3': 'k3e3',
+                's3_depth': 4,
+                's3_i0': 'k3e6',
+                's3_i1': 'k3e3',
+                's3_i2': 'k3e3',
+                's3_i3': 'k5e3',
+                's4_depth': 2,
+                's4_i0': 'k3e6',
+                's4_i1': 'k3e3',
+                's5_depth': 4,
+                's5_i0': 'k5e6',
+                's5_i1': 'k3e3',
+                's5_i2': 'k3e3',
+                's5_i3': 'k3e3',
+                's6_depth': 4,
+                's6_i0': 'k5e6',
+                's6_i1': 'k5e3',
+                's6_i2': 'k5e3',
+                's6_i3': 'k3e3',
+                's7_depth': 1,
+                's7_i0': 'k5e6'
+            }
+            init_kwargs['base_widths'] = [40, 24, 32, 48, 88, 104, 216, 360, 1432]
+        elif name == 'proxyless-gpu':
+            arch = {
+                's2_depth': 1,
+                's2_i0': 'k5e3',
+                's3_depth': 2,
+                's3_i0': 'k7e3',
+                's3_i1': 'k3e3',
+                's4_depth': 2,
+                's4_i0': 'k7e6',
+                's4_i1': 'k5e3',
+                's5_depth': 3,
+                's5_i0': 'k5e6',
+                's5_i1': 'k3e3',
+                's5_i2': 'k5e3',
+                's6_depth': 4,
+                's6_i0': 'k7e6',
+                's6_i1': 'k7e6',
+                's6_i2': 'k7e6',
+                's6_i3': 'k5e6',
+                's7_depth': 1,
+                's7_i0': 'k7e6'
+            }
+            init_kwargs['base_widths'] = [40, 24, 32, 56, 112, 128, 256, 432, 1728]
+        elif name == 'proxyless-mobile':
+            arch = {
+                's2_depth': 2,
+                's2_i0': 'k5e3',
+                's2_i1': 'k3e3',
+                's3_depth': 4,
+                's3_i0': 'k7e3',
+                's3_i1': 'k3e3',
+                's3_i2': 'k5e3',
+                's3_i3': 'k5e3',
+                's4_depth': 4,
+                's4_i0': 'k7e6',
+                's4_i1': 'k5e3',
+                's4_i2': 'k5e3',
+                's4_i3': 'k5e3',
+                's5_depth': 4,
+                's5_i0': 'k5e6',
+                's5_i1': 'k5e3',
+                's5_i2': 'k5e3',
+                's5_i3': 'k5e3',
+                's6_depth': 4,
+                's6_i0': 'k7e6',
+                's6_i1': 'k7e6',
+                's6_i2': 'k7e3',
+                's6_i3': 'k7e3',
+                's7_depth': 1,
+                's7_i0': 'k7e6'
+            }
+        else:
+            raise ValueError(f'Unsupported architecture with name: {name}')
+        model_factory = cls.fixed_arch(arch)
+        model = model_factory(**init_kwargs)
+        if pretrained:
+            weight_file = load_pretrained_weight(name, download=download, progress=progress)
+            pretrained_weights = torch.load(weight_file)
+            model.load_state_dict(pretrained_weights)
+        return model
 def reset_parameters(model, model_init='he_fout', init_div_groups=False,
                     bn_momentum=0.1, bn_eps=1e-5):

--- a/nni/retiarii/hub/pytorch/shufflenet.py
+++ b/nni/retiarii/hub/pytorch/shufflenet.py
@@ -7,6 +7,9 @@ import torch
 import nni.retiarii.nn.pytorch as nn
 from nni.retiarii import model_wrapper
+from .utils.fixed import FixedFactory
+from .utils.pretrained import load_pretrained_weight
 class ShuffleNetBlock(nn.Module):
    """
@@ -130,13 +133,13 @@ class ShuffleNetSpace(nn.Module):
        Here, "k-x" means k times the number of default channels.
        Otherwise, 1.0x is used by default. Default: false.
    affine : bool
-        Apply affine to all batch norm. Default: false.
+        Apply affine to all batch norm. Default: true.
    """
    def __init__(self,
                 num_labels: int = 1000,
                 channel_search: bool = False,
-                 affine: bool = False):
+                 affine: bool = True):
        super().__init__()
        self.num_labels = num_labels
@@ -180,12 +183,12 @@ class ShuffleNetSpace(nn.Module):
                mid_channels = cast(nn.MaybeChoice[int], mid_channels)
-                choice_block = nn.LayerChoice([
+                choice_block = nn.LayerChoice(dict(
-                    ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=3, stride=stride, affine=affine),
+                    k3=ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=3, stride=stride, affine=affine),
-                    ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=5, stride=stride, affine=affine),
+                    k5=ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=5, stride=stride, affine=affine),
-                    ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=7, stride=stride, affine=affine),
+                    k7=ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=7, stride=stride, affine=affine),
-                    ShuffleXceptionBlock(in_channels, out_channels, mid_channels=mid_channels, stride=stride, affine=affine)
+                    xcep=ShuffleXceptionBlock(in_channels, out_channels, mid_channels=mid_channels, stride=stride, affine=affine)
-                ], label=f'layer_{global_block_idx}')
+                ), label=f'layer_{global_block_idx}')
                feature_blocks.append(choice_block)
        self.features = nn.Sequential(*feature_blocks)
@@ -244,3 +247,51 @@ class ShuffleNetSpace(nn.Module):
                torch.nn.init.normal_(m.weight, 0, 0.01)
                if m.bias is not None:
                    torch.nn.init.constant_(m.bias, 0)
+    @classmethod
+    def fixed_arch(cls, arch: dict) -> FixedFactory:
+        return FixedFactory(cls, arch)
+    @classmethod
+    def load_searched_model(
+        cls, name: str,
+        pretrained: bool = False, download: bool = False, progress: bool = True
+    ) -> nn.Module:
+        if name == 'spos':
+            # NOTE: Need BGR tensor, with no normalization
+            # https://github.com/ultmaster/spacehub-conversion/blob/371a4fd6646b4e11eda3f61187f7c9a1d484b1ca/cutils.py#L63
+            arch = {
+                'layer_1': 'k7',
+                'layer_2': 'k5',
+                'layer_3': 'k3',
+                'layer_4': 'k5',
+                'layer_5': 'k7',
+                'layer_6': 'k3',
+                'layer_7': 'k7',
+                'layer_8': 'k3',
+                'layer_9': 'k7',
+                'layer_10': 'k3',
+                'layer_11': 'k7',
+                'layer_12': 'xcep',
+                'layer_13': 'k3',
+                'layer_14': 'k3',
+                'layer_15': 'k3',
+                'layer_16': 'k3',
+                'layer_17': 'xcep',
+                'layer_18': 'k7',
+                'layer_19': 'xcep',
+                'layer_20': 'xcep'
+            }
+        else:
+            raise ValueError(f'Unsupported architecture with name: {name}')
+        model_factory = cls.fixed_arch(arch)
+        model = model_factory()
+        if pretrained:
+            weight_file = load_pretrained_weight(name, download=download, progress=progress)
+            pretrained_weights = torch.load(weight_file)
+            model.load_state_dict(pretrained_weights)
+        return model
--- a/nni/retiarii/hub/pytorch/utils/__init__.py
+++ b/nni/retiarii/hub/pytorch/utils/__init__.py
--- a/nni/retiarii/hub/pytorch/utils/fixed.py
+++ b/nni/retiarii/hub/pytorch/utils/fixed.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+"""This file should be merged to nni/retiarii/fixed.py"""
+from typing import Type
+from nni.retiarii.utils import ContextStack
+class FixedFactory:
+    """Make a model space ready to create a fixed model.
+    Examples
+    --------
+    >>> factory = FixedFactory(ModelSpaceClass, {"choice1": 3})
+    >>> model = factory(channels=16, classes=10)
+    """
+    # TODO: mutations on ``init_args`` and ``init_kwargs`` themselves are not supported.
+    def __init__(self, cls: Type, arch: dict):
+        self.cls = cls
+        self.arch = arch
+    def __call__(self, *init_args, **init_kwargs):
+        with ContextStack('fixed', self.arch):
+            return self.cls(*init_args, **init_kwargs)
+    def __repr__(self):
+        return f'FixedFactory(class={self.cls}, arch={self.arch})'
--- a/nni/retiarii/hub/pytorch/utils/pretrained.py
+++ b/nni/retiarii/hub/pytorch/utils/pretrained.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+"""
+Weights available in this file are processed with scripts in https://github.com/ultmaster/spacehub-conversion,
+and uploaded with :func:`nni.common.blob_utils.upload_file`.
+"""
+import os
+from nni.common.blob_utils import NNI_BLOB, nni_cache_home, load_or_download_file
+PRETRAINED_WEIGHT_URLS = {
+    # proxylessnas
+    'acenas-m1': f'{NNI_BLOB}/nashub/acenas-m1-e215f1b8.pth',
+    'acenas-m2': f'{NNI_BLOB}/nashub/acenas-m2-a8ee9e8f.pth',
+    'acenas-m3': f'{NNI_BLOB}/nashub/acenas-m3-66a5ed7b.pth',
+    'proxyless-cpu': f'{NNI_BLOB}/nashub/proxyless-cpu-2df03430.pth',
+    'proxyless-gpu': f'{NNI_BLOB}/nashub/proxyless-gpu-dbe6dd15.pth',
+    'proxyless-mobile': f'{NNI_BLOB}/nashub/proxyless-mobile-8668a978.pth',
+    # mobilenetv3
+    'mobilenetv3-large-100': f'{NNI_BLOB}/nashub/mobilenetv3-large-100-420e040a.pth',
+    'mobilenetv3-small-050': f'{NNI_BLOB}/nashub/mobilenetv3-small-050-05cb7a80.pth',
+    'mobilenetv3-small-075': f'{NNI_BLOB}/nashub/mobilenetv3-small-075-c87d8acb.pth',
+    'mobilenetv3-small-100': f'{NNI_BLOB}/nashub/mobilenetv3-small-100-8332faac.pth',
+    'cream-014': f'{NNI_BLOB}/nashub/cream-014-060aea24.pth',
+    'cream-043': f'{NNI_BLOB}/nashub/cream-043-bec949e1.pth',
+    'cream-114': f'{NNI_BLOB}/nashub/cream-114-fc272590.pth',
+    'cream-287': f'{NNI_BLOB}/nashub/cream-287-a0fcba33.pth',
+    'cream-481': f'{NNI_BLOB}/nashub/cream-481-d85779b6.pth',
+    'cream-604': f'{NNI_BLOB}/nashub/cream-604-9ee425f7.pth',
+    # nasnet
+    'darts-v2': f'{NNI_BLOB}/nashub/darts-v2-5465b0d2.pth',
+    # spos
+    'spos': f'{NNI_BLOB}/nashub/spos-0b17f6fc.pth',
+}
+def load_pretrained_weight(name: str, **kwargs) -> str:
+    if name not in PRETRAINED_WEIGHT_URLS:
+        raise ValueError(f'"{name}" do not have a valid pretrained weight file.')
+    url = PRETRAINED_WEIGHT_URLS[name]
+    local_path = os.path.join(nni_cache_home(), 'nashub', url.split('/')[-1])
+    load_or_download_file(local_path, url, **kwargs)
+    return local_path
--- a/nni/retiarii/nn/pytorch/component.py
+++ b/nni/retiarii/nn/pytorch/component.py
@@ -36,6 +36,8 @@ class Repeat(Mutable):
        meaning that the block will be repeated at least ``min`` times and at most ``max`` times.
        If a ValueChoice, it should choose from a series of positive integers.
+        *New in v2.8*: Minimum depth can be 0. But this feature is NOT supported on graph engine.
    Examples
    --------
    Block() will be deep copied and repeated 3 times. ::
@@ -123,7 +125,7 @@ class Repeat(Mutable):
            self.depth_choice = depth
        else:
            raise TypeError(f'Unsupported "depth" type: {type(depth)}')
-        assert self.max_depth >= self.min_depth > 0
+        assert self.max_depth >= self.min_depth >= 0 and self.max_depth >= 1, f'Depth of {self.min_depth} to {self.max_depth} is invalid.'
        self.blocks = nn.ModuleList(self._replicate_and_instantiate(blocks, self.max_depth))
    @property
@@ -139,13 +141,13 @@ class Repeat(Mutable):
    def _replicate_and_instantiate(blocks, repeat):
        if not isinstance(blocks, list):
            if isinstance(blocks, nn.Module):
-                blocks = [blocks] + [copy.deepcopy(blocks) for _ in range(repeat - 1)]
+                blocks = [blocks if i == 0 else copy.deepcopy(blocks) for i in range(repeat)]
            else:
                blocks = [blocks for _ in range(repeat)]
-        assert len(blocks) > 0
        assert repeat <= len(blocks), f'Not enough blocks to be used. {repeat} expected, only found {len(blocks)}.'
-        blocks = blocks[:repeat]
+        if repeat < len(blocks):
-        if not isinstance(blocks[0], nn.Module):
+            blocks = blocks[:repeat]
+        if len(blocks) > 0 and not isinstance(blocks[0], nn.Module):
            blocks = [b(i) for i, b in enumerate(blocks)]
        return blocks

--- a/test/ut/retiarii/test_highlevel_apis.py
+++ b/test/ut/retiarii/test_highlevel_apis.py
@@ -843,6 +843,27 @@ class Python(GraphIR):
    @unittest.skip
    def test_valuechoice_getitem_functional_expression(self): ...
+    def test_repeat_zero(self):
+        class AddOne(nn.Module):
+            def forward(self, x):
+                return x + 1
+        @model_wrapper
+        class Net(nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.block = nn.Repeat(AddOne(), (0, 3))
+            def forward(self, x):
+                return self.block(x)
+        model, mutators = self._get_model_with_mutators(Net())
+        self.assertEqual(len(mutators), 1 + self.repeat_incr + self.value_choice_incr)
+        samplers = [EnumerateSampler() for _ in range(len(mutators))]
+        for target in [0, 1, 2, 3]:
+            new_model = _apply_all_mutators(model, mutators, samplers)
+            self.assertTrue((self._get_converted_pytorch_model(new_model)(torch.zeros(1, 16)) == target).all())
    def test_hyperparameter_choice(self):
        @model_wrapper
        class Net(nn.Module):

--- a/test/ut/retiarii/test_space_hub.py
+++ b/test/ut/retiarii/test_space_hub.py
@@ -13,7 +13,7 @@ import nni
 import nni.runtime.platform.test
 import nni.retiarii.evaluator.pytorch.lightning as pl
 import nni.retiarii.hub.pytorch as searchspace
-from nni.retiarii.utils import ContextStack
+from nni.retiarii import fixed_arch
 from nni.retiarii.execution.utils import _unpack_if_only_one
 from nni.retiarii.mutator import InvalidMutation, Sampler
 from nni.retiarii.nn.pytorch.mutator import extract_mutation_from_pt_module
@@ -61,7 +61,7 @@ def _test_searchspace_on_dataset(searchspace, dataset='cifar10', arch=None):
        arch = {mut.mutator.label: _unpack_if_only_one(mut.samples) for mut in model.history}
    print('Selected model:', arch)
-    with ContextStack('fixed', arch):
+    with fixed_arch(arch):
        model = model.python_class(**model.python_init_params)
    if dataset == 'cifar10':