Unverified Commit a911b856 authored by Yuge Zhang's avatar Yuge Zhang Committed by GitHub
Browse files

Resolve conflicts for #4760 (#4762)

parent 14d2966b
...@@ -17,6 +17,8 @@ _logger = logging.getLogger(__name__) ...@@ -17,6 +17,8 @@ _logger = logging.getLogger(__name__)
class BaseGraphData: class BaseGraphData:
""" """
Data sent between strategy and trial, in graph-based execution engine.
Attributes Attributes
---------- ----------
model_script model_script
......
...@@ -200,7 +200,7 @@ class CGOExecutionEngine(AbstractExecutionEngine): ...@@ -200,7 +200,7 @@ class CGOExecutionEngine(AbstractExecutionEngine):
# replace the module with a new instance whose n_models is set # replace the module with a new instance whose n_models is set
# n_models must be set in __init__, otherwise it cannot be captured by serialize_cls # n_models must be set in __init__, otherwise it cannot be captured by serialize_cls
new_module_init_params = model.evaluator.module.trace_kwargs.copy() new_module_init_params = model.evaluator.module.dump_kwargs().copy()
# MultiModelSupervisedLearningModule hides n_models of _MultiModelSupervisedLearningModule from users # MultiModelSupervisedLearningModule hides n_models of _MultiModelSupervisedLearningModule from users
new_module_init_params['n_models'] = len(multi_model) new_module_init_params['n_models'] = len(multi_model)
......
...@@ -45,6 +45,9 @@ from ..strategy.utils import dry_run_for_formatted_search_space ...@@ -45,6 +45,9 @@ from ..strategy.utils import dry_run_for_formatted_search_space
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
__all__ = ['RetiariiExeConfig', 'RetiariiExperiment']
@dataclass(init=False) @dataclass(init=False)
class RetiariiExeConfig(ConfigBase): class RetiariiExeConfig(ConfigBase):
experiment_name: Optional[str] = None experiment_name: Optional[str] = None
...@@ -145,6 +148,10 @@ def preprocess_model(base_model, evaluator, applied_mutators, full_ir=True, dumm ...@@ -145,6 +148,10 @@ def preprocess_model(base_model, evaluator, applied_mutators, full_ir=True, dumm
'do not use mutators when you use LayerChoice/InputChoice') 'do not use mutators when you use LayerChoice/InputChoice')
if mutators is not None: if mutators is not None:
applied_mutators = mutators applied_mutators = mutators
# Add mutations on evaluators
applied_mutators += process_evaluator_mutations(evaluator, applied_mutators)
return base_model_ir, applied_mutators return base_model_ir, applied_mutators
...@@ -284,7 +291,6 @@ class RetiariiExperiment(Experiment): ...@@ -284,7 +291,6 @@ class RetiariiExperiment(Experiment):
full_ir=self.config.execution_engine not in ['py', 'benchmark'], full_ir=self.config.execution_engine not in ['py', 'benchmark'],
dummy_input=self.config.dummy_input dummy_input=self.config.dummy_input
) )
self.applied_mutators += process_evaluator_mutations(self.evaluator, self.applied_mutators)
_logger.info('Start strategy...') _logger.info('Start strategy...')
search_space = dry_run_for_formatted_search_space(base_model_ir, self.applied_mutators) search_space = dry_run_for_formatted_search_space(base_model_ir, self.applied_mutators)
...@@ -475,6 +481,8 @@ class RetiariiExperiment(Experiment): ...@@ -475,6 +481,8 @@ class RetiariiExperiment(Experiment):
For one-shot algorithms, only top-1 is supported. For others, ``optimize_mode`` and ``formatter`` are For one-shot algorithms, only top-1 is supported. For others, ``optimize_mode`` and ``formatter`` are
available for customization. available for customization.
Parameters
----------
top_k : int top_k : int
How many models are intended to be exported. How many models are intended to be exported.
optimize_mode : str optimize_mode : str
......
...@@ -75,12 +75,12 @@ class Model: ...@@ -75,12 +75,12 @@ class Model:
""" """
Represents a neural network model. Represents a neural network model.
During mutation, one `Model` object is created for each trainable snapshot. During mutation, one :class:`Model` object is created for each trainable snapshot.
For example, consider a mutator that insert a node at an edge for each iteration. For example, consider a mutator that insert a node at an edge for each iteration.
In one iteration, the mutator invokes 4 primitives: add node, remove edge, add edge to head, add edge to tail. In one iteration, the mutator invokes 4 primitives: add node, remove edge, add edge to head, add edge to tail.
These 4 primitives operates in one `Model` object. These 4 primitives operates in one :class:`Model` object.
When they are all done the model will be set to "frozen" (trainable) status and be submitted to execution engine. When they are all done the model will be set to "frozen" (trainable) status and be submitted to execution engine.
And then a new iteration starts, and a new `Model` object is created by forking last model. And then a new iteration starts, and a new :class:`Model` object is created by forking last model.
Attributes Attributes
---------- ----------
...@@ -91,7 +91,7 @@ class Model: ...@@ -91,7 +91,7 @@ class Model:
python_init_params python_init_params
Initialization parameters of python class. Initialization parameters of python class.
status status
See `ModelStatus`. See :class:`ModelStatus`.
root_graph root_graph
The outermost graph which usually takes dataset as input and feeds output to loss function. The outermost graph which usually takes dataset as input and feeds output to loss function.
graphs graphs
...@@ -100,11 +100,11 @@ class Model: ...@@ -100,11 +100,11 @@ class Model:
Model evaluator Model evaluator
history history
Mutation history. Mutation history.
`self` is directly mutated from `self.history[-1]`; ``self`` is directly mutated from ``self.history[-1]``;
`self.history[-1] is mutated from `self.history[-2]`, and so on. ``self.history[-1]`` is mutated from ``self.history[-2]``, and so on.
`self.history[0]` is the base graph. ``self.history[0]`` is the base graph.
metric metric
Training result of the model, or `None` if it's not yet trained or has failed to train. Training result of the model, or ``None`` if it's not yet trained or has failed to train.
intermediate_metrics intermediate_metrics
Intermediate training metrics. If the model is not trained, it's an empty list. Intermediate training metrics. If the model is not trained, it's an empty list.
""" """
...@@ -265,9 +265,9 @@ class Graph: ...@@ -265,9 +265,9 @@ class Graph:
Graph topology. Graph topology.
This class simply represents the topology, with no semantic meaning. This class simply represents the topology, with no semantic meaning.
All other information like metric, non-graph functions, mutation history, etc should go to `Model`. All other information like metric, non-graph functions, mutation history, etc should go to :class:`Model`.
Each graph belongs to and only belongs to one `Model`. Each graph belongs to and only belongs to one :class:`Model`.
Attributes Attributes
---------- ----------
...@@ -284,15 +284,15 @@ class Graph: ...@@ -284,15 +284,15 @@ class Graph:
output_names output_names
Optional mnemonic names of output values. Optional mnemonic names of output values.
input_node input_node
... Incoming node.
output_node output_node
... Output node.
hidden_nodes hidden_nodes
... Hidden nodes
nodes nodes
All input/output/hidden nodes. All input/output/hidden nodes.
edges edges
... Edges.
python_name python_name
The name of torch.nn.Module, should have one-to-one mapping with items in python model. The name of torch.nn.Module, should have one-to-one mapping with items in python model.
""" """
...@@ -532,16 +532,16 @@ class Node: ...@@ -532,16 +532,16 @@ class Node:
""" """
An operation or an opaque subgraph inside a graph. An operation or an opaque subgraph inside a graph.
Each node belongs to and only belongs to one `Graph`. Each node belongs to and only belongs to one :class:`Graph`.
Nodes should never be created with constructor. Use `Graph.add_node()` instead. Nodes should never be created with constructor. Use :meth:`Graph.add_node` instead.
The node itself is for topology only. The node itself is for topology only.
Information of tensor calculation should all go inside `operation` attribute. Information of tensor calculation should all go inside ``operation`` attribute.
TODO: parameter of subgraph (cell) TODO: parameter of subgraph (cell)
It's easy to assign parameters on cell node, but it's hard to "use" them. It's easy to assign parameters on cell node, but it's hard to "use" them.
We need to design a way to reference stored cell parameters in inner node operations. We need to design a way to reference stored cell parameters in inner node operations.
e.g. `self.fc = Linear(self.units)` <- how to express `self.units` in IR? e.g. ``self.fc = Linear(self.units)`` <- how to express ``self.units`` in IR?
Attributes Attributes
---------- ----------
...@@ -557,10 +557,10 @@ class Node: ...@@ -557,10 +557,10 @@ class Node:
label label
Optional. If two nodes have the same label, they are considered same by the mutator. Optional. If two nodes have the same label, they are considered same by the mutator.
operation operation
... Operation.
cell cell
Read only shortcut to get the referenced subgraph. Read only shortcut to get the referenced subgraph.
If this node is not a subgraph (is a primitive operation), accessing `cell` will raise an error. If this node is not a subgraph (is a primitive operation), accessing ``cell`` will raise an error.
predecessors predecessors
Predecessor nodes of this node in the graph. This is an optional mutation helper. Predecessor nodes of this node in the graph. This is an optional mutation helper.
successors successors
...@@ -677,36 +677,36 @@ class Edge: ...@@ -677,36 +677,36 @@ class Edge:
""" """
A tensor, or "data flow", between two nodes. A tensor, or "data flow", between two nodes.
Example forward code snippet: Example forward code snippet: ::
```
a, b, c = split(x) a, b, c = split(x)
p = concat(a, c) p = concat(a, c)
q = sum(b, p) q = sum(b, p)
z = relu(q) z = relu(q)
```
Edges in above snippet: ::
Edges in above snippet:
+ head: (split, 0), tail: (concat, 0) # a in concat + head: (split, 0), tail: (concat, 0) # a in concat
+ head: (split, 2), tail: (concat, 1) # c in concat + head: (split, 2), tail: (concat, 1) # c in concat
+ head: (split, 1), tail: (sum, -1 or 0) # b in sum + head: (split, 1), tail: (sum, -1 or 0) # b in sum
+ head: (concat, null), tail: (sum, -1 or 1) # p in sum + head: (concat, null), tail: (sum, -1 or 1) # p in sum
+ head: (sum, null), tail: (relu, null) # q in relu + head: (sum, null), tail: (relu, null) # q in relu
Attributes Attributes
---------- ----------
graph graph
... Graph.
head head
Head node. Head node.
tail tail
Tail node. Tail node.
head_slot head_slot
Index of outputs in head node. Index of outputs in head node.
If the node has only one output, this should be `null`. If the node has only one output, this should be ``null``.
tail_slot tail_slot
Index of inputs in tail node. Index of inputs in tail node.
If the node has only one input, this should be `null`. If the node has only one input, this should be ``null``.
If the node does not care about order, this can be `-1`. If the node does not care about order, this can be ``-1``.
""" """
def __init__(self, head: EdgeEndpoint, tail: EdgeEndpoint, _internal: bool = False): def __init__(self, head: EdgeEndpoint, tail: EdgeEndpoint, _internal: bool = False):
......
This README will be deleted once this hub got stabilized, after which we will promote it in the documentation.
## Why
We hereby provides a series of state-of-the-art search space, which is PyTorch model + mutations + training recipe.
For further motivations and plans, please see https://github.com/microsoft/nni/issues/4249.
## Reproduction Roadmap
1. Runnable
2. Load checkpoint of searched architecture and evaluate
3. Reproduce searched architecture
4. Runnable with built-in algos
5. Reproduce result with at least one algo
| | 1 | 2 | 3 | 4 | 5 |
|------------------------|--------|--------|--------|--------|--------|
| NasBench101 | Y | | | | |
| NasBench201 | Y | | | | |
| NASNet | Y | | | | |
| ENAS | Y | | | | |
| AmoebaNet | Y | | | | |
| PNAS | Y | | | | |
| DARTS | Y | | | | |
| ProxylessNAS | Y | | | | |
| MobileNetV3Space | Y | | | | |
| ShuffleNetSpace | Y | | | | |
| ShuffleNetSpace (ch) | Y | | | | |
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from .mobilenetv3 import MobileNetV3Space
from .nasbench101 import NasBench101
from .nasbench201 import NasBench201
from .nasnet import NDS, NASNet, ENAS, AmoebaNet, PNAS, DARTS
from .proxylessnas import ProxylessNAS
from .shufflenet import ShuffleNetSpace
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from typing import Tuple, Optional, Callable
import nni.retiarii.nn.pytorch as nn
from nni.retiarii import model_wrapper
from .proxylessnas import ConvBNReLU, InvertedResidual, SeparableConv, make_divisible, reset_parameters
class h_sigmoid(nn.Module):
def __init__(self, inplace=True):
super(h_sigmoid, self).__init__()
self.relu = nn.ReLU6(inplace=inplace)
def forward(self, x):
return self.relu(x + 3) / 6
class h_swish(nn.Module):
def __init__(self, inplace=True):
super(h_swish, self).__init__()
self.sigmoid = h_sigmoid(inplace=inplace)
def forward(self, x):
return x * self.sigmoid(x)
class SELayer(nn.Module):
"""Squeeze-and-excite layer."""
def __init__(self,
channels: int,
reduction: int = 4,
activation_layer: Optional[Callable[..., nn.Module]] = None):
super().__init__()
if activation_layer is None:
activation_layer = nn.Sigmoid
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Linear(channels, make_divisible(channels // reduction, 8)),
nn.ReLU(inplace=True),
nn.Linear(make_divisible(channels // reduction, 8), channels),
activation_layer()
)
def forward(self, x):
b, c, _, _ = x.size()
y = self.avg_pool(x).view(b, c)
y = self.fc(y).view(b, c, 1, 1)
return x * y
@model_wrapper
class MobileNetV3Space(nn.Module):
"""
MobileNetV3Space implements the largest search space in `TuNAS <https://arxiv.org/abs/2008.06120>`__.
The search dimensions include widths, expand ratios, kernel sizes, SE ratio.
Some of them can be turned off via arguments to narrow down the search space.
Different from ProxylessNAS search space, this space is implemented with :class:`nn.ValueChoice`.
We use the following snipppet as reference.
https://github.com/google-research/google-research/blob/20736344591f774f4b1570af64624ed1e18d2867/tunas/mobile_search_space_v3.py#L728
"""
def __init__(self, num_labels: int = 1000,
base_widths: Tuple[int, ...] = (16, 16, 32, 64, 128, 256, 512, 1024),
width_multipliers: Tuple[float, ...] = (0.5, 0.625, 0.75, 1.0, 1.25, 1.5, 2.0),
expand_ratios: Tuple[int, ...] = (1, 2, 3, 4, 5, 6),
dropout_rate: float = 0.2,
bn_eps: float = 1e-3,
bn_momentum: float = 0.1):
super().__init__()
self.widths = [
nn.ValueChoice([make_divisible(base_width * mult, 8) for mult in width_multipliers], label=f'width_{i}')
for i, base_width in enumerate(base_widths)
]
self.expand_ratios = expand_ratios
blocks = [
# Stem
ConvBNReLU(
3, self.widths[0],
nn.ValueChoice([3, 5], label='ks_0'),
stride=2, activation_layer=h_swish
),
SeparableConv(self.widths[0], self.widths[0], activation_layer=nn.ReLU),
]
# counting for kernel sizes and expand ratios
self.layer_count = 2
blocks += [
# Body
self._make_stage(1, self.widths[0], self.widths[1], False, 2, nn.ReLU),
self._make_stage(2, self.widths[1], self.widths[2], True, 2, nn.ReLU),
self._make_stage(1, self.widths[2], self.widths[3], False, 2, h_swish),
self._make_stage(1, self.widths[3], self.widths[4], True, 1, h_swish),
self._make_stage(1, self.widths[4], self.widths[5], True, 2, h_swish),
]
# Head
blocks += [
ConvBNReLU(self.widths[5], self.widths[6], 1, 1, activation_layer=h_swish),
nn.AdaptiveAvgPool2d(1),
ConvBNReLU(self.widths[6], self.widths[7], 1, 1, norm_layer=nn.Identity, activation_layer=h_swish),
]
self.blocks = nn.Sequential(*blocks)
self.classifier = nn.Sequential(
nn.Dropout(dropout_rate),
nn.Linear(self.widths[7], num_labels),
)
reset_parameters(self, bn_momentum=bn_momentum, bn_eps=bn_eps)
def forward(self, x):
x = self.blocks(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
def _make_stage(self, stage_idx, inp, oup, se, stride, act):
# initialize them first because they are related to layer_count.
exp, ks, se_blocks = [], [], []
for _ in range(4):
exp.append(nn.ValueChoice(list(self.expand_ratios), label=f'exp_{self.layer_count}'))
ks.append(nn.ValueChoice([3, 5, 7], label=f'ks_{self.layer_count}'))
if se:
# if SE is true, assign a layer choice to SE
se_blocks.append(
lambda hidden_ch: nn.LayerChoice([nn.Identity(), SELayer(hidden_ch)], label=f'se_{self.layer_count}')
)
else:
se_blocks.append(None)
self.layer_count += 1
blocks = [
# stride = 2
InvertedResidual(inp, oup, exp[0], ks[0],
stride, squeeze_and_excite=se_blocks[0], activation_layer=act),
# stride = 1, residual connection should be automatically enabled
InvertedResidual(oup, oup, exp[1], ks[1], squeeze_and_excite=se_blocks[1], activation_layer=act),
InvertedResidual(oup, oup, exp[2], ks[2], squeeze_and_excite=se_blocks[2], activation_layer=act),
InvertedResidual(oup, oup, exp[3], ks[3], squeeze_and_excite=se_blocks[3], activation_layer=act)
]
# mutable depth
return nn.Repeat(blocks, depth=(1, 4), label=f'depth_{stage_idx}')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment