[Doc] NAS (#4584)

cef9babd · Yuge Zhang · GitHub · ad5aff39 · cef9babd · cef9babd
Unverified Commit cef9babd authored Feb 28, 2022 by Yuge Zhang Committed by GitHub Feb 28, 2022
8 changed files
--- a/nni/retiarii/nn/pytorch/cell.py
+++ b/nni/retiarii/nn/pytorch/cell.py
@@ -32,13 +32,39 @@ _cell_op_factory_type = Callable[[int, int, Optional[int]], nn.Module]
 class Cell(nn.Module):
    """
-    Cell structure [zophnas]_ [zophnasnet]_ that is popularly used in NAS literature.
+    Cell structure :footcite:p:`zoph2017neural,zoph2018learning,liu2018darts` that is popularly used in NAS literature.
-    [nds]_ is a good summary of how this structure works in practice.
+    :footcite:t:`radosavovic2019network` is a good summary of how this structure works in practice.
    A cell consists of multiple "nodes". Each node is a sum of multiple operators. Each operator is chosen from
    ``op_candidates``, and takes one input from previous nodes and predecessors. Predecessor means the input of cell.
    The output of cell is the concatenation of some of the nodes in the cell (currently all the nodes).
+    Here is a glossary table, which could help better understand the terms used above:
+    .. list-table::
+        :widths: 25 75
+        * - Cell
+          - A cell consists of several nodes.
+        * - Node
+          - A node is the **sum** of several operators.
+        * - Operator
+          - Each operator is independently chosen from a list of user-specified candidate operators.
+        * - Operator's input
+          - Each operator has one input, chosen from previous nodes as well as predecessors.
+        * - Predecessors
+          - Input of cell. A cell can have multiple predecessors. Predecessors are sent to *preprocessor* for preprocessing.
+        * - Cell's output
+          - Output of cell. Usually concatenation of several nodes (possibly all nodes) in the cell. Cell's output,
+            along with predecessors, are sent to *postprocessor* for postprocessing.
+        * - Preprocessor
+          - Extra preprocessing to predecessors. Usually used in shape alignment (e.g., predecessors have different shapes).
+            By default, do nothing.
+        * - Postprocessor
+          - Extra postprocessing for cell's output. Usually used to chain cells with multiple Predecessors
+            (e.g., the next cell wants to have the outputs of both this cell and previous cell as its input).
+            By default, directly use this cell's output.
    Parameters
    ----------
    op_candidates : list of module or function, or dict
@@ -81,16 +107,33 @@ class Cell(nn.Module):
    Examples
    --------
+    Choose between conv2d and maxpool2d.
+    The cell have 4 nodes, 1 op per node, and 2 predecessors.
    >>> cell = nn.Cell([nn.Conv2d(32, 32, 3), nn.MaxPool2d(3)], 4, 1, 2)
-    >>> output = cell([input1, input2])
+    In forward:
+    >>> cell([input1, input2])
-    References
+    Use ``merge_op`` to specify how to construct the output.
-    ----------
+    The output will then have dynamic shape, depending on which input has been used in the cell.
-    .. [zophnas] Barret Zoph, Quoc V. Le, "Neural Architecture Search with Reinforcement Learning". https://arxiv.org/abs/1611.01578
+    >>> cell = nn.Cell([nn.Conv2d(32, 32, 3), nn.MaxPool2d(3)], 4, 1, 2, merge_op='loose_end')
-    .. [zophnasnet] Barret Zoph, Vijay Vasudevan, Jonathon Shlens, Quoc V. Le,
-        "Learning Transferable Architectures for Scalable Image Recognition". https://arxiv.org/abs/1707.07012
+    The op candidates can be callable that accepts node index in cell, op index in node, and input index.
-    .. [nds] Radosavovic, Ilija and Johnson, Justin and Xie, Saining and Lo, Wan-Yen and Dollar, Piotr,
+    >>> cell = nn.Cell([
-        "On Network Design Spaces for Visual Recognition". https://arxiv.org/abs/1905.13214
+    ...     lambda node_index, op_index, input_index: nn.Conv2d(32, 32, 3, stride=2 if input_index < 1 else 1),
+    ... ], 4, 1, 2)
+    Predecessor example: ::
+        class Preprocessor:
+            def __init__(self):
+            self.conv1 = nn.Conv2d(16, 32, 1)
+            self.conv2 = nn.Conv2d(64, 32, 1)
+            def forward(self, x):
+            return [self.conv1(x[0]), self.conv2(x[1])]
+        cell = nn.Cell([nn.Conv2d(32, 32, 3), nn.MaxPool2d(3)], 4, 1, 2, preprocessor=Preprocessor())
+        cell([torch.randn(1, 16, 48, 48), torch.randn(1, 64, 48, 48)])  # the two inputs will be sent to conv1 and conv2 respectively
    """
    def __init__(self,

--- a/nni/retiarii/nn/pytorch/component.py
+++ b/nni/retiarii/nn/pytorch/component.py
@@ -21,13 +21,34 @@ class Repeat(Mutable):
    Parameters
    ----------
    blocks : function, list of function, module or list of module
-        The block to be repeated. If not a list, it will be replicated into a list.
+        The block to be repeated. If not a list, it will be replicated (**deep-copied**) into a list.
        If a list, it should be of length ``max_depth``, the modules will be instantiated in order and a prefix will be taken.
        If a function, it will be called (the argument is the index) to instantiate a module.
        Otherwise the module will be deep-copied.
    depth : int or tuple of int
        If one number, the block will be repeated by a fixed number of times. If a tuple, it should be (min, max),
-        meaning that the block will be repeated at least `min` times and at most `max` times.
+        meaning that the block will be repeated at least ``min`` times and at most ``max`` times.
+    Examples
+    --------
+    Block() will be deep copied and repeated 3 times. ::
+        self.blocks = nn.Repeat(Block(), 3)
+    Block() will be repeated 1, 2, or 3 times. ::
+        self.blocks = nn.Repeat(Block(), (1, 3))
+    Can be used together with layer choice.
+    With deep copy, the 3 layers will have the same label, thus share the choice. ::
+        self.blocks = nn.Repeat(nn.LayerChoice([...]), (1, 3))
+    To make the three layer choices independent,
+    we need a factory function that accepts index (0, 1, 2, ...) and returns the module of the ``index``-th layer. ::
+        self.blocks = nn.Repeat(lambda index: nn.LayerChoice([...], label=f'layer{index}'), (1, 3))
    """
    @classmethod
@@ -79,7 +100,7 @@ class Repeat(Mutable):
 class NasBench201Cell(nn.Module):
    """
-    Cell structure that is proposed in NAS-Bench-201 [nasbench201]_ .
+    Cell structure that is proposed in NAS-Bench-201 :footcite:p:`dong2019bench`.
    This cell is a densely connected DAG with ``num_tensors`` nodes, where each node is tensor.
    For every i < j, there is an edge from i-th node to j-th node.
@@ -105,11 +126,6 @@ class NasBench201Cell(nn.Module):
        Number of tensors in the cell (input included). Default: 4
    label : str
        Identifier of the cell. Cell sharing the same label will semantically share the same choice.
-    References
-    ----------
-    .. [nasbench201] Dong, X. and Yang, Y., 2020. Nas-bench-201: Extending the scope of reproducible neural architecture search.
-        arXiv preprint arXiv:2001.00326.
    """
    @staticmethod
@@ -141,6 +157,10 @@ class NasBench201Cell(nn.Module):
            self.layers.append(node_ops)
    def forward(self, inputs):
+        """
+        The forward of input choice is simply selecting first on all choices.
+        It shouldn't be called directly by users in most cases.
+        """
        tensors = [inputs]
        for layer in self.layers:
            current_tensor = []

--- a/nni/retiarii/nn/pytorch/nasbench101.py
+++ b/nni/retiarii/nn/pytorch/nasbench101.py
@@ -219,11 +219,11 @@ class _NasBench101CellFixed(nn.Module):
 class NasBench101Cell(Mutable):
    """
-    Cell structure that is proposed in NAS-Bench-101 [nasbench101]_ .
+    Cell structure that is proposed in NAS-Bench-101 :footcite:p:`ying2019bench`.
-    This cell is usually used in evaluation of NAS algorithms because there is a ``comprehensive analysis'' of this search space
+    This cell is usually used in evaluation of NAS algorithms because there is a "comprehensive analysis" of this search space
-    available, which includes a full architecture-dataset that ``maps 423k unique architectures to metrics
+    available, which includes a full architecture-dataset that "maps 423k unique architectures to metrics
-    including run time and accuracy''. You can also use the space in your own space design, in which scenario it should be possible
+    including run time and accuracy". You can also use the space in your own space design, in which scenario it should be possible
    to leverage results in the benchmark to narrow the huge space down to a few efficient architectures.
    The space of this cell architecture consists of all possible directed acyclic graphs on no more than ``max_num_nodes`` nodes,
@@ -232,7 +232,7 @@ class NasBench101Cell(Mutable):
    To align with the paper settings, two vertices specially labeled as operation IN and OUT, are also counted into
    ``max_num_nodes`` in our implementaion, the default value of ``max_num_nodes`` is 7 and ``max_num_edges`` is 9.
-    Input of this cell should be of shape :math:`[N, C_{in}, *]`, while output should be `[N, C_{out}, *]`. The shape
+    Input of this cell should be of shape :math:`[N, C_{in}, *]`, while output should be :math:`[N, C_{out}, *]`. The shape
    of each hidden nodes will be first automatically computed, depending on the cell structure. Each of the ``op_candidates``
    should be a callable that accepts computed ``num_features`` and returns a ``Module``. For example,
@@ -275,11 +275,6 @@ class NasBench101Cell(Mutable):
        Maximum number of edges in the cell. Default: 9.
    label : str
        Identifier of the cell. Cell sharing the same label will semantically share the same choice.
-    References
-    ----------
-    .. [nasbench101] Ying, Chris, et al. "Nas-bench-101: Towards reproducible neural architecture search."
-        International Conference on Machine Learning. PMLR, 2019.
    """
    @staticmethod
@@ -339,7 +334,10 @@ class NasBench101Cell(Mutable):
        return self._label
    def forward(self, x):
-        # This is a dummy forward and actually not used
+        """
+        The forward of input choice is simply selecting first on all choices.
+        It shouldn't be called directly by users in most cases.
+        """
        tensors = [x]
        for i in range(1, self.max_num_nodes):
            node_input = self.inputs[i]([self.projections[i](tensors[0])] + [t for t in tensors[1:]])

--- a/nni/retiarii/serializer.py
+++ b/nni/retiarii/serializer.py
@@ -97,8 +97,8 @@ def model_wrapper(cls: T) -> Union[T, Traceable]:
    The wrapper serves two purposes:
-        1. Capture the init parameters of python class so that it can be re-instantiated in another process.
+    1. Capture the init parameters of python class so that it can be re-instantiated in another process.
-        2. Reset uid in namespace so that the auto label counting in each model stably starts from zero.
+    2. Reset uid in namespace so that the auto label counting in each model stably starts from zero.
    Currently, NNI might not complain in simple cases where ``@model_wrapper`` is actually not needed.
    But in future, we might enforce ``@model_wrapper`` to be required for base model.

--- a/nni/retiarii/strategy/__init__.py
+++ b/nni/retiarii/strategy/__init__.py
@@ -4,6 +4,6 @@
 from .base import BaseStrategy
 from .bruteforce import Random, GridSearch
 from .evolution import RegularizedEvolution
-from .tpe_strategy import TPEStrategy
+from .tpe_strategy import TPEStrategy, TPE
 from .local_debug_strategy import _LocalDebugStrategy
 from .rl import PolicyBasedRL
--- a/nni/retiarii/strategy/rl.py
+++ b/nni/retiarii/strategy/rl.py
@@ -23,7 +23,7 @@ class PolicyBasedRL(BaseStrategy):
    """
    Algorithm for policy-based reinforcement learning.
    This is a wrapper of algorithms provided in tianshou (PPO by default),
-    and can be easily customized with other algorithms that inherit ``BasePolicy`` (e.g., REINFORCE [1]_).
+    and can be easily customized with other algorithms that inherit ``BasePolicy`` (e.g., REINFORCE :footcite:p:`zoph2017neural`).
    Parameters
    ----------
@@ -34,12 +34,6 @@ class PolicyBasedRL(BaseStrategy):
        After each collect, trainer will sample batch from replay buffer and do the update. Default: 20.
    policy_fn : function
        Takes ``ModelEvaluationEnv`` as input and return a policy. See ``_default_policy_fn`` for an example.
-    References
-    ----------
-    .. [1] Barret Zoph and Quoc V. Le, "Neural Architecture Search with Reinforcement Learning".
-        https://arxiv.org/abs/1611.01578
    """
    def __init__(self, max_collect: int = 100, trial_per_collect = 20,

--- a/nni/retiarii/strategy/tpe_strategy.py
+++ b/nni/retiarii/strategy/tpe_strategy.py
@@ -39,17 +39,12 @@ class TPESampler(Sampler):
        return chosen
-class TPEStrategy(BaseStrategy):
+class TPE(BaseStrategy):
    """
-    The Tree-structured Parzen Estimator (TPE) [bergstrahpo]_ is a sequential model-based optimization (SMBO) approach.
+    The Tree-structured Parzen Estimator (TPE) :footcite:p:`bergstra2011algorithms`
+    is a sequential model-based optimization (SMBO) approach.
    SMBO methods sequentially construct models to approximate the performance of hyperparameters based on historical measurements,
    and then subsequently choose new hyperparameters to test based on this model.
-    References
-    ----------
-    .. [bergstrahpo] Bergstra et al., "Algorithms for Hyper-Parameter Optimization".
-        https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf
    """
    def __init__(self):
@@ -92,3 +87,7 @@ class TPEStrategy(BaseStrategy):
                    to_be_deleted.append(_id)
            for _id in to_be_deleted:
                del self.running_models[_id]
+# alias for backward compatibility
+TPEStrategy = TPE
--- a/nni/retiarii/utils.py
+++ b/nni/retiarii/utils.py
@@ -7,6 +7,8 @@ from collections import defaultdict
 from typing import Any, List, Dict
 from pathlib import Path
+__all__ = ['NoContextError', 'ContextStack', 'ModelNamespace']
 def import_(target: str, allow_none: bool = False) -> Any:
    if target is None:
@@ -70,12 +72,13 @@ def get_importable_name(cls, relocate_module=False):
 class NoContextError(Exception):
+    """Exception raised when context is missing."""
    pass
 class ContextStack:
    """
-    This is to maintain a globally-accessible context envinronment that is visible to everywhere.
+    This is to maintain a globally-accessible context environment that is visible to everywhere.
    Use ``with ContextStack(namespace, value):`` to initiate, and use ``get_current_context(namespace)`` to
    get the corresponding value in the namespace.