Unverified Commit a911b856 authored by Yuge Zhang's avatar Yuge Zhang Committed by GitHub
Browse files

Resolve conflicts for #4760 (#4762)

parent 14d2966b
......@@ -17,6 +17,8 @@ _logger = logging.getLogger(__name__)
class BaseGraphData:
"""
Data sent between strategy and trial, in graph-based execution engine.
Attributes
----------
model_script
......
......@@ -200,7 +200,7 @@ class CGOExecutionEngine(AbstractExecutionEngine):
# replace the module with a new instance whose n_models is set
# n_models must be set in __init__, otherwise it cannot be captured by serialize_cls
new_module_init_params = model.evaluator.module.trace_kwargs.copy()
new_module_init_params = model.evaluator.module.dump_kwargs().copy()
# MultiModelSupervisedLearningModule hides n_models of _MultiModelSupervisedLearningModule from users
new_module_init_params['n_models'] = len(multi_model)
......
......@@ -45,6 +45,9 @@ from ..strategy.utils import dry_run_for_formatted_search_space
_logger = logging.getLogger(__name__)
__all__ = ['RetiariiExeConfig', 'RetiariiExperiment']
@dataclass(init=False)
class RetiariiExeConfig(ConfigBase):
experiment_name: Optional[str] = None
......@@ -145,6 +148,10 @@ def preprocess_model(base_model, evaluator, applied_mutators, full_ir=True, dumm
'do not use mutators when you use LayerChoice/InputChoice')
if mutators is not None:
applied_mutators = mutators
# Add mutations on evaluators
applied_mutators += process_evaluator_mutations(evaluator, applied_mutators)
return base_model_ir, applied_mutators
......@@ -284,7 +291,6 @@ class RetiariiExperiment(Experiment):
full_ir=self.config.execution_engine not in ['py', 'benchmark'],
dummy_input=self.config.dummy_input
)
self.applied_mutators += process_evaluator_mutations(self.evaluator, self.applied_mutators)
_logger.info('Start strategy...')
search_space = dry_run_for_formatted_search_space(base_model_ir, self.applied_mutators)
......@@ -475,6 +481,8 @@ class RetiariiExperiment(Experiment):
For one-shot algorithms, only top-1 is supported. For others, ``optimize_mode`` and ``formatter`` are
available for customization.
Parameters
----------
top_k : int
How many models are intended to be exported.
optimize_mode : str
......
......@@ -75,12 +75,12 @@ class Model:
"""
Represents a neural network model.
During mutation, one `Model` object is created for each trainable snapshot.
During mutation, one :class:`Model` object is created for each trainable snapshot.
For example, consider a mutator that insert a node at an edge for each iteration.
In one iteration, the mutator invokes 4 primitives: add node, remove edge, add edge to head, add edge to tail.
These 4 primitives operates in one `Model` object.
These 4 primitives operates in one :class:`Model` object.
When they are all done the model will be set to "frozen" (trainable) status and be submitted to execution engine.
And then a new iteration starts, and a new `Model` object is created by forking last model.
And then a new iteration starts, and a new :class:`Model` object is created by forking last model.
Attributes
----------
......@@ -91,7 +91,7 @@ class Model:
python_init_params
Initialization parameters of python class.
status
See `ModelStatus`.
See :class:`ModelStatus`.
root_graph
The outermost graph which usually takes dataset as input and feeds output to loss function.
graphs
......@@ -100,11 +100,11 @@ class Model:
Model evaluator
history
Mutation history.
`self` is directly mutated from `self.history[-1]`;
`self.history[-1] is mutated from `self.history[-2]`, and so on.
`self.history[0]` is the base graph.
``self`` is directly mutated from ``self.history[-1]``;
``self.history[-1]`` is mutated from ``self.history[-2]``, and so on.
``self.history[0]`` is the base graph.
metric
Training result of the model, or `None` if it's not yet trained or has failed to train.
Training result of the model, or ``None`` if it's not yet trained or has failed to train.
intermediate_metrics
Intermediate training metrics. If the model is not trained, it's an empty list.
"""
......@@ -265,9 +265,9 @@ class Graph:
Graph topology.
This class simply represents the topology, with no semantic meaning.
All other information like metric, non-graph functions, mutation history, etc should go to `Model`.
All other information like metric, non-graph functions, mutation history, etc should go to :class:`Model`.
Each graph belongs to and only belongs to one `Model`.
Each graph belongs to and only belongs to one :class:`Model`.
Attributes
----------
......@@ -284,15 +284,15 @@ class Graph:
output_names
Optional mnemonic names of output values.
input_node
...
Incoming node.
output_node
...
Output node.
hidden_nodes
...
Hidden nodes
nodes
All input/output/hidden nodes.
edges
...
Edges.
python_name
The name of torch.nn.Module, should have one-to-one mapping with items in python model.
"""
......@@ -532,16 +532,16 @@ class Node:
"""
An operation or an opaque subgraph inside a graph.
Each node belongs to and only belongs to one `Graph`.
Nodes should never be created with constructor. Use `Graph.add_node()` instead.
Each node belongs to and only belongs to one :class:`Graph`.
Nodes should never be created with constructor. Use :meth:`Graph.add_node` instead.
The node itself is for topology only.
Information of tensor calculation should all go inside `operation` attribute.
Information of tensor calculation should all go inside ``operation`` attribute.
TODO: parameter of subgraph (cell)
It's easy to assign parameters on cell node, but it's hard to "use" them.
We need to design a way to reference stored cell parameters in inner node operations.
e.g. `self.fc = Linear(self.units)` <- how to express `self.units` in IR?
e.g. ``self.fc = Linear(self.units)`` <- how to express ``self.units`` in IR?
Attributes
----------
......@@ -557,10 +557,10 @@ class Node:
label
Optional. If two nodes have the same label, they are considered same by the mutator.
operation
...
Operation.
cell
Read only shortcut to get the referenced subgraph.
If this node is not a subgraph (is a primitive operation), accessing `cell` will raise an error.
If this node is not a subgraph (is a primitive operation), accessing ``cell`` will raise an error.
predecessors
Predecessor nodes of this node in the graph. This is an optional mutation helper.
successors
......@@ -677,36 +677,36 @@ class Edge:
"""
A tensor, or "data flow", between two nodes.
Example forward code snippet:
```
a, b, c = split(x)
p = concat(a, c)
q = sum(b, p)
z = relu(q)
```
Edges in above snippet:
+ head: (split, 0), tail: (concat, 0) # a in concat
+ head: (split, 2), tail: (concat, 1) # c in concat
+ head: (split, 1), tail: (sum, -1 or 0) # b in sum
+ head: (concat, null), tail: (sum, -1 or 1) # p in sum
+ head: (sum, null), tail: (relu, null) # q in relu
Example forward code snippet: ::
a, b, c = split(x)
p = concat(a, c)
q = sum(b, p)
z = relu(q)
Edges in above snippet: ::
+ head: (split, 0), tail: (concat, 0) # a in concat
+ head: (split, 2), tail: (concat, 1) # c in concat
+ head: (split, 1), tail: (sum, -1 or 0) # b in sum
+ head: (concat, null), tail: (sum, -1 or 1) # p in sum
+ head: (sum, null), tail: (relu, null) # q in relu
Attributes
----------
graph
...
Graph.
head
Head node.
tail
Tail node.
head_slot
Index of outputs in head node.
If the node has only one output, this should be `null`.
If the node has only one output, this should be ``null``.
tail_slot
Index of inputs in tail node.
If the node has only one input, this should be `null`.
If the node does not care about order, this can be `-1`.
If the node has only one input, this should be ``null``.
If the node does not care about order, this can be ``-1``.
"""
def __init__(self, head: EdgeEndpoint, tail: EdgeEndpoint, _internal: bool = False):
......
This README will be deleted once this hub got stabilized, after which we will promote it in the documentation.
## Why
We hereby provides a series of state-of-the-art search space, which is PyTorch model + mutations + training recipe.
For further motivations and plans, please see https://github.com/microsoft/nni/issues/4249.
## Reproduction Roadmap
1. Runnable
2. Load checkpoint of searched architecture and evaluate
3. Reproduce searched architecture
4. Runnable with built-in algos
5. Reproduce result with at least one algo
| | 1 | 2 | 3 | 4 | 5 |
|------------------------|--------|--------|--------|--------|--------|
| NasBench101 | Y | | | | |
| NasBench201 | Y | | | | |
| NASNet | Y | | | | |
| ENAS | Y | | | | |
| AmoebaNet | Y | | | | |
| PNAS | Y | | | | |
| DARTS | Y | | | | |
| ProxylessNAS | Y | | | | |
| MobileNetV3Space | Y | | | | |
| ShuffleNetSpace | Y | | | | |
| ShuffleNetSpace (ch) | Y | | | | |
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from .mobilenetv3 import MobileNetV3Space
from .nasbench101 import NasBench101
from .nasbench201 import NasBench201
from .nasnet import NDS, NASNet, ENAS, AmoebaNet, PNAS, DARTS
from .proxylessnas import ProxylessNAS
from .shufflenet import ShuffleNetSpace
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from typing import Tuple, Optional, Callable
import nni.retiarii.nn.pytorch as nn
from nni.retiarii import model_wrapper
from .proxylessnas import ConvBNReLU, InvertedResidual, SeparableConv, make_divisible, reset_parameters
class h_sigmoid(nn.Module):
def __init__(self, inplace=True):
super(h_sigmoid, self).__init__()
self.relu = nn.ReLU6(inplace=inplace)
def forward(self, x):
return self.relu(x + 3) / 6
class h_swish(nn.Module):
def __init__(self, inplace=True):
super(h_swish, self).__init__()
self.sigmoid = h_sigmoid(inplace=inplace)
def forward(self, x):
return x * self.sigmoid(x)
class SELayer(nn.Module):
"""Squeeze-and-excite layer."""
def __init__(self,
channels: int,
reduction: int = 4,
activation_layer: Optional[Callable[..., nn.Module]] = None):
super().__init__()
if activation_layer is None:
activation_layer = nn.Sigmoid
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Linear(channels, make_divisible(channels // reduction, 8)),
nn.ReLU(inplace=True),
nn.Linear(make_divisible(channels // reduction, 8), channels),
activation_layer()
)
def forward(self, x):
b, c, _, _ = x.size()
y = self.avg_pool(x).view(b, c)
y = self.fc(y).view(b, c, 1, 1)
return x * y
@model_wrapper
class MobileNetV3Space(nn.Module):
"""
MobileNetV3Space implements the largest search space in `TuNAS <https://arxiv.org/abs/2008.06120>`__.
The search dimensions include widths, expand ratios, kernel sizes, SE ratio.
Some of them can be turned off via arguments to narrow down the search space.
Different from ProxylessNAS search space, this space is implemented with :class:`nn.ValueChoice`.
We use the following snipppet as reference.
https://github.com/google-research/google-research/blob/20736344591f774f4b1570af64624ed1e18d2867/tunas/mobile_search_space_v3.py#L728
"""
def __init__(self, num_labels: int = 1000,
base_widths: Tuple[int, ...] = (16, 16, 32, 64, 128, 256, 512, 1024),
width_multipliers: Tuple[float, ...] = (0.5, 0.625, 0.75, 1.0, 1.25, 1.5, 2.0),
expand_ratios: Tuple[int, ...] = (1, 2, 3, 4, 5, 6),
dropout_rate: float = 0.2,
bn_eps: float = 1e-3,
bn_momentum: float = 0.1):
super().__init__()
self.widths = [
nn.ValueChoice([make_divisible(base_width * mult, 8) for mult in width_multipliers], label=f'width_{i}')
for i, base_width in enumerate(base_widths)
]
self.expand_ratios = expand_ratios
blocks = [
# Stem
ConvBNReLU(
3, self.widths[0],
nn.ValueChoice([3, 5], label='ks_0'),
stride=2, activation_layer=h_swish
),
SeparableConv(self.widths[0], self.widths[0], activation_layer=nn.ReLU),
]
# counting for kernel sizes and expand ratios
self.layer_count = 2
blocks += [
# Body
self._make_stage(1, self.widths[0], self.widths[1], False, 2, nn.ReLU),
self._make_stage(2, self.widths[1], self.widths[2], True, 2, nn.ReLU),
self._make_stage(1, self.widths[2], self.widths[3], False, 2, h_swish),
self._make_stage(1, self.widths[3], self.widths[4], True, 1, h_swish),
self._make_stage(1, self.widths[4], self.widths[5], True, 2, h_swish),
]
# Head
blocks += [
ConvBNReLU(self.widths[5], self.widths[6], 1, 1, activation_layer=h_swish),
nn.AdaptiveAvgPool2d(1),
ConvBNReLU(self.widths[6], self.widths[7], 1, 1, norm_layer=nn.Identity, activation_layer=h_swish),
]
self.blocks = nn.Sequential(*blocks)
self.classifier = nn.Sequential(
nn.Dropout(dropout_rate),
nn.Linear(self.widths[7], num_labels),
)
reset_parameters(self, bn_momentum=bn_momentum, bn_eps=bn_eps)
def forward(self, x):
x = self.blocks(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
def _make_stage(self, stage_idx, inp, oup, se, stride, act):
# initialize them first because they are related to layer_count.
exp, ks, se_blocks = [], [], []
for _ in range(4):
exp.append(nn.ValueChoice(list(self.expand_ratios), label=f'exp_{self.layer_count}'))
ks.append(nn.ValueChoice([3, 5, 7], label=f'ks_{self.layer_count}'))
if se:
# if SE is true, assign a layer choice to SE
se_blocks.append(
lambda hidden_ch: nn.LayerChoice([nn.Identity(), SELayer(hidden_ch)], label=f'se_{self.layer_count}')
)
else:
se_blocks.append(None)
self.layer_count += 1
blocks = [
# stride = 2
InvertedResidual(inp, oup, exp[0], ks[0],
stride, squeeze_and_excite=se_blocks[0], activation_layer=act),
# stride = 1, residual connection should be automatically enabled
InvertedResidual(oup, oup, exp[1], ks[1], squeeze_and_excite=se_blocks[1], activation_layer=act),
InvertedResidual(oup, oup, exp[2], ks[2], squeeze_and_excite=se_blocks[2], activation_layer=act),
InvertedResidual(oup, oup, exp[3], ks[3], squeeze_and_excite=se_blocks[3], activation_layer=act)
]
# mutable depth
return nn.Repeat(blocks, depth=(1, 4), label=f'depth_{stage_idx}')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment