Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
867871b2
Unverified
Commit
867871b2
authored
Jul 27, 2022
by
Yuge Zhang
Committed by
GitHub
Jul 27, 2022
Browse files
Promote Retiarii to NAS (step 1) - move files (#5020)
parent
481aa292
Changes
137
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
0 additions
and
2305 deletions
+0
-2305
nni/nas/oneshot/pytorch/supermodule/differentiable.py
nni/nas/oneshot/pytorch/supermodule/differentiable.py
+0
-0
nni/nas/oneshot/pytorch/supermodule/operation.py
nni/nas/oneshot/pytorch/supermodule/operation.py
+0
-0
nni/nas/oneshot/pytorch/supermodule/proxyless.py
nni/nas/oneshot/pytorch/supermodule/proxyless.py
+0
-0
nni/nas/oneshot/pytorch/supermodule/sampling.py
nni/nas/oneshot/pytorch/supermodule/sampling.py
+0
-0
nni/nas/pytorch/__init__.py
nni/nas/pytorch/__init__.py
+0
-9
nni/nas/pytorch/base_mutator.py
nni/nas/pytorch/base_mutator.py
+0
-155
nni/nas/pytorch/base_trainer.py
nni/nas/pytorch/base_trainer.py
+0
-40
nni/nas/pytorch/callbacks.py
nni/nas/pytorch/callbacks.py
+0
-139
nni/nas/pytorch/fixed.py
nni/nas/pytorch/fixed.py
+0
-147
nni/nas/pytorch/mutables.py
nni/nas/pytorch/mutables.py
+0
-344
nni/nas/pytorch/mutator.py
nni/nas/pytorch/mutator.py
+0
-308
nni/nas/pytorch/nasbench201/__init__.py
nni/nas/pytorch/nasbench201/__init__.py
+0
-4
nni/nas/pytorch/nasbench201/nasbench201.py
nni/nas/pytorch/nasbench201/nasbench201.py
+0
-75
nni/nas/pytorch/nasbench201/nasbench201_ops.py
nni/nas/pytorch/nasbench201/nasbench201_ops.py
+0
-149
nni/nas/pytorch/search_space_zoo/__init__.py
nni/nas/pytorch/search_space_zoo/__init__.py
+0
-7
nni/nas/pytorch/search_space_zoo/darts_cell.py
nni/nas/pytorch/search_space_zoo/darts_cell.py
+0
-112
nni/nas/pytorch/search_space_zoo/darts_ops.py
nni/nas/pytorch/search_space_zoo/darts_ops.py
+0
-196
nni/nas/pytorch/search_space_zoo/enas_cell.py
nni/nas/pytorch/search_space_zoo/enas_cell.py
+0
-255
nni/nas/pytorch/search_space_zoo/enas_ops.py
nni/nas/pytorch/search_space_zoo/enas_ops.py
+0
-171
nni/nas/pytorch/trainer.py
nni/nas/pytorch/trainer.py
+0
-194
No files found.
nni/
retiarii
/oneshot/pytorch/supermodule/differentiable.py
→
nni/
nas
/oneshot/pytorch/supermodule/differentiable.py
View file @
867871b2
File moved
nni/
retiarii
/oneshot/pytorch/supermodule/operation.py
→
nni/
nas
/oneshot/pytorch/supermodule/operation.py
View file @
867871b2
File moved
nni/
retiarii
/oneshot/pytorch/supermodule/proxyless.py
→
nni/
nas
/oneshot/pytorch/supermodule/proxyless.py
View file @
867871b2
File moved
nni/
retiarii
/oneshot/pytorch/supermodule/sampling.py
→
nni/
nas
/oneshot/pytorch/supermodule/sampling.py
View file @
867871b2
File moved
nni/nas/pytorch/__init__.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
.base_mutator
import
BaseMutator
from
.base_trainer
import
BaseTrainer
from
.fixed
import
apply_fixed_architecture
from
.mutables
import
Mutable
,
LayerChoice
,
InputChoice
from
.mutator
import
Mutator
from
.trainer
import
Trainer
nni/nas/pytorch/base_mutator.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
logging
import
torch.nn
as
nn
from
nni.nas.pytorch.mutables
import
Mutable
,
MutableScope
,
InputChoice
from
nni.nas.pytorch.utils
import
StructuredMutableTreeNode
logger
=
logging
.
getLogger
(
__name__
)
class
BaseMutator
(
nn
.
Module
):
"""
A mutator is responsible for mutating a graph by obtaining the search space from the network and implementing
callbacks that are called in ``forward`` in mutables.
Parameters
----------
model : nn.Module
PyTorch model to apply mutator on.
"""
def
__init__
(
self
,
model
):
super
().
__init__
()
self
.
__dict__
[
"model"
]
=
model
self
.
_structured_mutables
=
self
.
_parse_search_space
(
self
.
model
)
def
_parse_search_space
(
self
,
module
,
root
=
None
,
prefix
=
""
,
memo
=
None
,
nested_detection
=
None
):
if
memo
is
None
:
memo
=
set
()
if
root
is
None
:
root
=
StructuredMutableTreeNode
(
None
)
if
module
not
in
memo
:
memo
.
add
(
module
)
if
isinstance
(
module
,
Mutable
):
if
nested_detection
is
not
None
:
raise
RuntimeError
(
"Cannot have nested search space. Error at {} in {}"
.
format
(
module
,
nested_detection
))
module
.
name
=
prefix
module
.
set_mutator
(
self
)
root
=
root
.
add_child
(
module
)
if
not
isinstance
(
module
,
MutableScope
):
nested_detection
=
module
if
isinstance
(
module
,
InputChoice
):
for
k
in
module
.
choose_from
:
if
k
!=
InputChoice
.
NO_KEY
and
k
not
in
[
m
.
key
for
m
in
memo
if
isinstance
(
m
,
Mutable
)]:
raise
RuntimeError
(
"'{}' required by '{}' not found in keys that appeared before, and is not NO_KEY."
.
format
(
k
,
module
.
key
))
for
name
,
submodule
in
module
.
_modules
.
items
():
if
submodule
is
None
:
continue
submodule_prefix
=
prefix
+
(
"."
if
prefix
else
""
)
+
name
self
.
_parse_search_space
(
submodule
,
root
,
submodule_prefix
,
memo
=
memo
,
nested_detection
=
nested_detection
)
return
root
@
property
def
mutables
(
self
):
"""
A generator of all modules inheriting :class:`~nni.nas.pytorch.mutables.Mutable`.
Modules are yielded in the order that they are defined in ``__init__``.
For mutables with their keys appearing multiple times, only the first one will appear.
"""
return
self
.
_structured_mutables
@
property
def
undedup_mutables
(
self
):
return
self
.
_structured_mutables
.
traverse
(
deduplicate
=
False
)
def
forward
(
self
,
*
inputs
):
"""
Warnings
--------
Don't call forward of a mutator.
"""
raise
RuntimeError
(
"Forward is undefined for mutators."
)
def
__setattr__
(
self
,
name
,
value
):
if
name
==
"model"
:
raise
AttributeError
(
"Attribute `model` can be set at most once, and you shouldn't use `self.model = model` to "
"include you network, as it will include all parameters in model into the mutator."
)
return
super
().
__setattr__
(
name
,
value
)
def
enter_mutable_scope
(
self
,
mutable_scope
):
"""
Callback when forward of a MutableScope is entered.
Parameters
----------
mutable_scope : MutableScope
The mutable scope that is entered.
"""
pass
def
exit_mutable_scope
(
self
,
mutable_scope
):
"""
Callback when forward of a MutableScope is exited.
Parameters
----------
mutable_scope : MutableScope
The mutable scope that is exited.
"""
pass
def
on_forward_layer_choice
(
self
,
mutable
,
*
args
,
**
kwargs
):
"""
Callbacks of forward in LayerChoice.
Parameters
----------
mutable : nni.nas.pytorch.mutables.LayerChoice
Module whose forward is called.
args : list of torch.Tensor
The arguments of its forward function.
kwargs : dict
The keyword arguments of its forward function.
Returns
-------
tuple of torch.Tensor and torch.Tensor
Output tensor and mask.
"""
raise
NotImplementedError
def
on_forward_input_choice
(
self
,
mutable
,
tensor_list
):
"""
Callbacks of forward in InputChoice.
Parameters
----------
mutable : nni.nas.pytorch.mutables.InputChoice
Mutable that is called.
tensor_list : list of torch.Tensor
The arguments mutable is called with.
Returns
-------
tuple of torch.Tensor and torch.Tensor
Output tensor and mask.
"""
raise
NotImplementedError
def
export
(
self
):
"""
Export the data of all decisions. This should output the decisions of all the mutables, so that the whole
network can be fully determined with these decisions for further training from scratch.
Returns
-------
dict
Mappings from mutable keys to decisions.
"""
raise
NotImplementedError
nni/nas/pytorch/base_trainer.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
abc
import
ABC
,
abstractmethod
class
BaseTrainer
(
ABC
):
@
abstractmethod
def
train
(
self
):
"""
Override the method to train.
"""
raise
NotImplementedError
@
abstractmethod
def
validate
(
self
):
"""
Override the method to validate.
"""
raise
NotImplementedError
@
abstractmethod
def
export
(
self
,
file
):
"""
Override the method to export to file.
Parameters
----------
file : str
File path to export to.
"""
raise
NotImplementedError
@
abstractmethod
def
checkpoint
(
self
):
"""
Override to dump a checkpoint.
"""
raise
NotImplementedError
nni/nas/pytorch/callbacks.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
logging
import
os
import
torch
import
torch.nn
as
nn
_logger
=
logging
.
getLogger
(
__name__
)
class
Callback
:
"""
Callback provides an easy way to react to events like begin/end of epochs.
"""
def
__init__
(
self
):
self
.
model
=
None
self
.
mutator
=
None
self
.
trainer
=
None
def
build
(
self
,
model
,
mutator
,
trainer
):
"""
Callback needs to be built with model, mutator, trainer, to get updates from them.
Parameters
----------
model : nn.Module
Model to be trained.
mutator : nn.Module
Mutator that mutates the model.
trainer : BaseTrainer
Trainer that is to call the callback.
"""
self
.
model
=
model
self
.
mutator
=
mutator
self
.
trainer
=
trainer
def
on_epoch_begin
(
self
,
epoch
):
"""
Implement this to do something at the begin of epoch.
Parameters
----------
epoch : int
Epoch number, starting from 0.
"""
pass
def
on_epoch_end
(
self
,
epoch
):
"""
Implement this to do something at the end of epoch.
Parameters
----------
epoch : int
Epoch number, starting from 0.
"""
pass
def
on_batch_begin
(
self
,
epoch
):
pass
def
on_batch_end
(
self
,
epoch
):
pass
class
LRSchedulerCallback
(
Callback
):
"""
Calls scheduler on every epoch ends.
Parameters
----------
scheduler : LRScheduler
Scheduler to be called.
"""
def
__init__
(
self
,
scheduler
,
mode
=
"epoch"
):
super
().
__init__
()
assert
mode
==
"epoch"
self
.
scheduler
=
scheduler
self
.
mode
=
mode
def
on_epoch_end
(
self
,
epoch
):
"""
Call ``self.scheduler.step()`` on epoch end.
"""
self
.
scheduler
.
step
()
class
ArchitectureCheckpoint
(
Callback
):
"""
Calls ``trainer.export()`` on every epoch ends.
Parameters
----------
checkpoint_dir : str
Location to save checkpoints.
"""
def
__init__
(
self
,
checkpoint_dir
):
super
().
__init__
()
self
.
checkpoint_dir
=
checkpoint_dir
os
.
makedirs
(
self
.
checkpoint_dir
,
exist_ok
=
True
)
def
on_epoch_end
(
self
,
epoch
):
"""
Dump to ``/checkpoint_dir/epoch_{number}.json`` on epoch end.
"""
dest_path
=
os
.
path
.
join
(
self
.
checkpoint_dir
,
"epoch_{}.json"
.
format
(
epoch
))
_logger
.
info
(
"Saving architecture to %s"
,
dest_path
)
self
.
trainer
.
export
(
dest_path
)
class
ModelCheckpoint
(
Callback
):
"""
Calls ``trainer.export()`` on every epoch ends.
Parameters
----------
checkpoint_dir : str
Location to save checkpoints.
"""
def
__init__
(
self
,
checkpoint_dir
):
super
().
__init__
()
self
.
checkpoint_dir
=
checkpoint_dir
os
.
makedirs
(
self
.
checkpoint_dir
,
exist_ok
=
True
)
def
on_epoch_end
(
self
,
epoch
):
"""
Dump to ``/checkpoint_dir/epoch_{number}.pth.tar`` on every epoch end.
``DataParallel`` object will have their inside modules exported.
"""
if
isinstance
(
self
.
model
,
nn
.
DataParallel
):
state_dict
=
self
.
model
.
module
.
state_dict
()
else
:
state_dict
=
self
.
model
.
state_dict
()
dest_path
=
os
.
path
.
join
(
self
.
checkpoint_dir
,
"epoch_{}.pth.tar"
.
format
(
epoch
))
_logger
.
info
(
"Saving model to %s"
,
dest_path
)
torch
.
save
(
state_dict
,
dest_path
)
nni/nas/pytorch/fixed.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
json
import
logging
from
.mutables
import
InputChoice
,
LayerChoice
,
MutableScope
from
.mutator
import
Mutator
from
.utils
import
to_list
_logger
=
logging
.
getLogger
(
__name__
)
class
FixedArchitecture
(
Mutator
):
"""
Fixed architecture mutator that always selects a certain graph.
Parameters
----------
model : nn.Module
A mutable network.
fixed_arc : dict
Preloaded architecture object.
strict : bool
Force everything that appears in ``fixed_arc`` to be used at least once.
verbose : bool
Print log messages if set to True
"""
def
__init__
(
self
,
model
,
fixed_arc
,
strict
=
True
,
verbose
=
True
):
super
().
__init__
(
model
)
self
.
_fixed_arc
=
fixed_arc
self
.
verbose
=
verbose
mutable_keys
=
set
([
mutable
.
key
for
mutable
in
self
.
mutables
if
not
isinstance
(
mutable
,
MutableScope
)])
fixed_arc_keys
=
set
(
self
.
_fixed_arc
.
keys
())
if
fixed_arc_keys
-
mutable_keys
:
raise
RuntimeError
(
"Unexpected keys found in fixed architecture: {}."
.
format
(
fixed_arc_keys
-
mutable_keys
))
if
mutable_keys
-
fixed_arc_keys
:
raise
RuntimeError
(
"Missing keys in fixed architecture: {}."
.
format
(
mutable_keys
-
fixed_arc_keys
))
self
.
_fixed_arc
=
self
.
_from_human_readable_architecture
(
self
.
_fixed_arc
)
def
_from_human_readable_architecture
(
self
,
human_arc
):
# convert from an exported architecture
result_arc
=
{
k
:
to_list
(
v
)
for
k
,
v
in
human_arc
.
items
()}
# there could be tensors, numpy arrays, etc.
# First, convert non-list to list, because there could be {"op1": 0} or {"op1": "conv"},
# which means {"op1": [0, ]} ir {"op1": ["conv", ]}
result_arc
=
{
k
:
v
if
isinstance
(
v
,
list
)
else
[
v
]
for
k
,
v
in
result_arc
.
items
()}
# Second, infer which ones are multi-hot arrays and which ones are in human-readable format.
# This is non-trivial, since if an array in [0, 1], we cannot know for sure it means [false, true] or [true, true].
# Here, we assume an multihot array has to be a boolean array or a float array and matches the length.
for
mutable
in
self
.
mutables
:
if
mutable
.
key
not
in
result_arc
:
continue
# skip silently
choice_arr
=
result_arc
[
mutable
.
key
]
if
all
(
isinstance
(
v
,
bool
)
for
v
in
choice_arr
)
or
all
(
isinstance
(
v
,
float
)
for
v
in
choice_arr
):
if
(
isinstance
(
mutable
,
LayerChoice
)
and
len
(
mutable
)
==
len
(
choice_arr
))
or
\
(
isinstance
(
mutable
,
InputChoice
)
and
mutable
.
n_candidates
==
len
(
choice_arr
)):
# multihot, do nothing
continue
if
isinstance
(
mutable
,
LayerChoice
):
choice_arr
=
[
mutable
.
names
.
index
(
val
)
if
isinstance
(
val
,
str
)
else
val
for
val
in
choice_arr
]
choice_arr
=
[
i
in
choice_arr
for
i
in
range
(
len
(
mutable
))]
elif
isinstance
(
mutable
,
InputChoice
):
choice_arr
=
[
mutable
.
choose_from
.
index
(
val
)
if
isinstance
(
val
,
str
)
else
val
for
val
in
choice_arr
]
choice_arr
=
[
i
in
choice_arr
for
i
in
range
(
mutable
.
n_candidates
)]
result_arc
[
mutable
.
key
]
=
choice_arr
return
result_arc
def
sample_search
(
self
):
"""
Always returns the fixed architecture.
"""
return
self
.
_fixed_arc
def
sample_final
(
self
):
"""
Always returns the fixed architecture.
"""
return
self
.
_fixed_arc
def
replace_layer_choice
(
self
,
module
=
None
,
prefix
=
""
):
"""
Replace layer choices with selected candidates. It's done with best effort.
In case of weighted choices or multiple choices. if some of the choices on weighted with zero, delete them.
If single choice, replace the module with a normal module.
Parameters
----------
module : nn.Module
Module to be processed.
prefix : str
Module name under global namespace.
"""
if
module
is
None
:
module
=
self
.
model
for
name
,
mutable
in
module
.
named_children
():
global_name
=
(
prefix
+
"."
if
prefix
else
""
)
+
name
if
isinstance
(
mutable
,
LayerChoice
):
chosen
=
self
.
_fixed_arc
[
mutable
.
key
]
if
sum
(
chosen
)
==
1
and
max
(
chosen
)
==
1
and
not
mutable
.
return_mask
:
# sum is one, max is one, there has to be an only one
# this is compatible with both integer arrays, boolean arrays and float arrays
if
self
.
verbose
:
_logger
.
info
(
"Replacing %s with candidate number %d."
,
global_name
,
chosen
.
index
(
1
))
setattr
(
module
,
name
,
mutable
[
chosen
.
index
(
1
)])
else
:
if
mutable
.
return_mask
and
self
.
verbose
:
_logger
.
info
(
"`return_mask` flag of %s is true. As it relies on the behavior of LayerChoice, "
\
"LayerChoice will not be replaced."
)
# remove unused parameters
for
ch
,
n
in
zip
(
chosen
,
mutable
.
names
):
if
ch
==
0
and
not
isinstance
(
ch
,
float
):
setattr
(
mutable
,
n
,
None
)
else
:
self
.
replace_layer_choice
(
mutable
,
global_name
)
def
apply_fixed_architecture
(
model
,
fixed_arc
,
verbose
=
True
):
"""
Load architecture from `fixed_arc` and apply to model.
Parameters
----------
model : torch.nn.Module
Model with mutables.
fixed_arc : str or dict
Path to the JSON that stores the architecture, or dict that stores the exported architecture.
verbose : bool
Print log messages if set to True
Returns
-------
FixedArchitecture
Mutator that is responsible for fixes the graph.
"""
if
isinstance
(
fixed_arc
,
str
):
with
open
(
fixed_arc
)
as
f
:
fixed_arc
=
json
.
load
(
f
)
architecture
=
FixedArchitecture
(
model
,
fixed_arc
,
verbose
)
architecture
.
reset
()
# for the convenience of parameters counting
architecture
.
replace_layer_choice
()
return
architecture
nni/nas/pytorch/mutables.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
logging
import
warnings
from
collections
import
OrderedDict
import
torch.nn
as
nn
from
nni.nas.pytorch.utils
import
global_mutable_counting
logger
=
logging
.
getLogger
(
__name__
)
class
Mutable
(
nn
.
Module
):
"""
Mutable is designed to function as a normal layer, with all necessary operators' weights.
States and weights of architectures should be included in mutator, instead of the layer itself.
Mutable has a key, which marks the identity of the mutable. This key can be used by users to share
decisions among different mutables. In mutator's implementation, mutators should use the key to
distinguish different mutables. Mutables that share the same key should be "similar" to each other.
Currently the default scope for keys is global. By default, the keys uses a global counter from 1 to
produce unique ids.
Parameters
----------
key : str
The key of mutable.
Notes
-----
The counter is program level, but mutables are model level. In case multiple models are defined, and
you want to have `counter` starting from 1 in the second model, it's recommended to assign keys manually
instead of using automatic keys.
"""
def
__init__
(
self
,
key
=
None
):
super
().
__init__
()
if
key
is
not
None
:
if
not
isinstance
(
key
,
str
):
key
=
str
(
key
)
logger
.
warning
(
"Warning: key
\"
%s
\"
is not string, converted to string."
,
key
)
self
.
_key
=
key
else
:
self
.
_key
=
self
.
__class__
.
__name__
+
str
(
global_mutable_counting
())
self
.
init_hook
=
self
.
forward_hook
=
None
def
__deepcopy__
(
self
,
memodict
=
None
):
raise
NotImplementedError
(
"Deep copy doesn't work for mutables."
)
def
__call__
(
self
,
*
args
,
**
kwargs
):
self
.
_check_built
()
return
super
().
__call__
(
*
args
,
**
kwargs
)
def
set_mutator
(
self
,
mutator
):
if
"mutator"
in
self
.
__dict__
:
raise
RuntimeError
(
"`set_mutator` is called more than once. Did you parse the search space multiple times? "
"Or did you apply multiple fixed architectures?"
)
self
.
__dict__
[
"mutator"
]
=
mutator
@
property
def
key
(
self
):
"""
Read-only property of key.
"""
return
self
.
_key
@
property
def
name
(
self
):
"""
After the search space is parsed, it will be the module name of the mutable.
"""
return
self
.
_name
if
hasattr
(
self
,
"_name"
)
else
self
.
_key
@
name
.
setter
def
name
(
self
,
name
):
self
.
_name
=
name
def
_check_built
(
self
):
if
not
hasattr
(
self
,
"mutator"
):
raise
ValueError
(
"Mutator not set for {}. You might have forgotten to initialize and apply your mutator. "
"Or did you initialize a mutable on the fly in forward pass? Move to `__init__` "
"so that trainer can locate all your mutables. See NNI docs for more details."
.
format
(
self
))
class
MutableScope
(
Mutable
):
"""
Mutable scope marks a subgraph/submodule to help mutators make better decisions.
If not annotated with mutable scope, search space will be flattened as a list. However, some mutators might
need to leverage the concept of a "cell". So if a module is defined as a mutable scope, everything in it will
look like "sub-search-space" in the scope. Scopes can be nested.
There are two ways mutators can use mutable scope. One is to traverse the search space as a tree during initialization
and reset. The other is to implement `enter_mutable_scope` and `exit_mutable_scope`. They are called before and after
the forward method of the class inheriting mutable scope.
Mutable scopes are also mutables that are listed in the mutator.mutables (search space), but they are not supposed
to appear in the dict of choices.
Parameters
----------
key : str
Key of mutable scope.
"""
def
__init__
(
self
,
key
):
super
().
__init__
(
key
=
key
)
def
_check_built
(
self
):
return
True
# bypass the test because it's deprecated
def
__call__
(
self
,
*
args
,
**
kwargs
):
if
not
hasattr
(
self
,
'mutator'
):
return
super
().
__call__
(
*
args
,
**
kwargs
)
warnings
.
warn
(
"`MutableScope` is deprecated in Retiarii."
,
DeprecationWarning
)
try
:
self
.
_check_built
()
self
.
mutator
.
enter_mutable_scope
(
self
)
return
super
().
__call__
(
*
args
,
**
kwargs
)
finally
:
self
.
mutator
.
exit_mutable_scope
(
self
)
class
LayerChoice
(
Mutable
):
"""
Layer choice selects one of the ``op_candidates``, then apply it on inputs and return results.
In rare cases, it can also select zero or many.
Layer choice does not allow itself to be nested.
Parameters
----------
op_candidates : list of nn.Module or OrderedDict
A module list to be selected from.
reduction : str
``mean``, ``concat``, ``sum`` or ``none``. Policy if multiples are selected.
If ``none``, a list is returned. ``mean`` returns the average. ``sum`` returns the sum.
``concat`` concatenate the list at dimension 1.
return_mask : bool
If ``return_mask``, return output tensor and a mask. Otherwise return tensor only.
key : str
Key of the input choice.
Attributes
----------
length : int
Deprecated. Number of ops to choose from. ``len(layer_choice)`` is recommended.
names : list of str
Names of candidates.
choices : list of Module
Deprecated. A list of all candidate modules in the layer choice module.
``list(layer_choice)`` is recommended, which will serve the same purpose.
Notes
-----
``op_candidates`` can be a list of modules or a ordered dict of named modules, for example,
.. code-block:: python
self.op_choice = LayerChoice(OrderedDict([
("conv3x3", nn.Conv2d(3, 16, 128)),
("conv5x5", nn.Conv2d(5, 16, 128)),
("conv7x7", nn.Conv2d(7, 16, 128))
]))
Elements in layer choice can be modified or deleted. Use ``del self.op_choice["conv5x5"]`` or
``self.op_choice[1] = nn.Conv3d(...)``. Adding more choices is not supported yet.
"""
def
__init__
(
self
,
op_candidates
,
reduction
=
"sum"
,
return_mask
=
False
,
key
=
None
):
super
().
__init__
(
key
=
key
)
self
.
names
=
[]
if
isinstance
(
op_candidates
,
OrderedDict
):
for
name
,
module
in
op_candidates
.
items
():
assert
name
not
in
[
"length"
,
"reduction"
,
"return_mask"
,
"_key"
,
"key"
,
"names"
],
\
"Please don't use a reserved name '{}' for your module."
.
format
(
name
)
self
.
add_module
(
name
,
module
)
self
.
names
.
append
(
name
)
elif
isinstance
(
op_candidates
,
list
):
for
i
,
module
in
enumerate
(
op_candidates
):
self
.
add_module
(
str
(
i
),
module
)
self
.
names
.
append
(
str
(
i
))
else
:
raise
TypeError
(
"Unsupported op_candidates type: {}"
.
format
(
type
(
op_candidates
)))
self
.
reduction
=
reduction
self
.
return_mask
=
return_mask
def
__getitem__
(
self
,
idx
):
if
isinstance
(
idx
,
str
):
return
self
.
_modules
[
idx
]
return
list
(
self
)[
idx
]
def
__setitem__
(
self
,
idx
,
module
):
key
=
idx
if
isinstance
(
idx
,
str
)
else
self
.
names
[
idx
]
return
setattr
(
self
,
key
,
module
)
def
__delitem__
(
self
,
idx
):
if
isinstance
(
idx
,
slice
):
for
key
in
self
.
names
[
idx
]:
delattr
(
self
,
key
)
else
:
if
isinstance
(
idx
,
str
):
key
,
idx
=
idx
,
self
.
names
.
index
(
idx
)
else
:
key
=
self
.
names
[
idx
]
delattr
(
self
,
key
)
del
self
.
names
[
idx
]
@
property
def
length
(
self
):
warnings
.
warn
(
"layer_choice.length is deprecated. Use `len(layer_choice)` instead."
,
DeprecationWarning
)
return
len
(
self
)
def
__len__
(
self
):
return
len
(
self
.
names
)
def
__iter__
(
self
):
return
map
(
lambda
name
:
self
.
_modules
[
name
],
self
.
names
)
@
property
def
choices
(
self
):
warnings
.
warn
(
"layer_choice.choices is deprecated. Use `list(layer_choice)` instead."
,
DeprecationWarning
)
return
list
(
self
)
def
forward
(
self
,
*
args
,
**
kwargs
):
"""
Returns
-------
tuple of tensors
Output and selection mask. If ``return_mask`` is ``False``, only output is returned.
"""
out
,
mask
=
self
.
mutator
.
on_forward_layer_choice
(
self
,
*
args
,
**
kwargs
)
if
self
.
return_mask
:
return
out
,
mask
return
out
class
InputChoice
(
Mutable
):
"""
Input choice selects ``n_chosen`` inputs from ``choose_from`` (contains ``n_candidates`` keys). For beginners,
use ``n_candidates`` instead of ``choose_from`` is a safe option. To get the most power out of it, you might want to
know about ``choose_from``.
The keys in ``choose_from`` can be keys that appear in past mutables, or ``NO_KEY`` if there are no suitable ones.
The keys are designed to be the keys of the sources. To help mutators make better decisions,
mutators might be interested in how the tensors to choose from come into place. For example, the tensor is the
output of some operator, some node, some cell, or some module. If this operator happens to be a mutable (e.g.,
``LayerChoice`` or ``InputChoice``), it has a key naturally that can be used as a source key. If it's a
module/submodule, it needs to be annotated with a key: that's where a :class:`MutableScope` is needed.
In the example below, ``input_choice`` is a 4-choose-any. The first 3 is semantically output of cell1, output of cell2,
output of cell3 with respectively. Notice that an extra max pooling is followed by cell1, indicating x1 is not
"actually" the direct output of cell1.
.. code-block:: python
class Cell(MutableScope):
pass
class Net(nn.Module):
def __init__(self):
self.cell1 = Cell("cell1")
self.cell2 = Cell("cell2")
self.op = LayerChoice([conv3x3(), conv5x5()], key="op")
self.input_choice = InputChoice(choose_from=["cell1", "cell2", "op", InputChoice.NO_KEY])
def forward(self, x):
x1 = max_pooling(self.cell1(x))
x2 = self.cell2(x)
x3 = self.op(x)
x4 = torch.zeros_like(x)
return self.input_choice([x1, x2, x3, x4])
Parameters
----------
n_candidates : int
Number of inputs to choose from.
choose_from : list of str
List of source keys to choose from. At least of one of ``choose_from`` and ``n_candidates`` must be fulfilled.
If ``n_candidates`` has a value but ``choose_from`` is None, it will be automatically treated as ``n_candidates``
number of empty string.
n_chosen : int
Recommended inputs to choose. If None, mutator is instructed to select any.
reduction : str
``mean``, ``concat``, ``sum`` or ``none``. See :class:`LayerChoice`.
return_mask : bool
If ``return_mask``, return output tensor and a mask. Otherwise return tensor only.
key : str
Key of the input choice.
"""
NO_KEY
=
""
def
__init__
(
self
,
n_candidates
=
None
,
choose_from
=
None
,
n_chosen
=
None
,
reduction
=
"sum"
,
return_mask
=
False
,
key
=
None
):
super
().
__init__
(
key
=
key
)
# precondition check
assert
n_candidates
is
not
None
or
choose_from
is
not
None
,
"At least one of `n_candidates` and `choose_from`"
\
"must be not None."
if
choose_from
is
not
None
and
n_candidates
is
None
:
n_candidates
=
len
(
choose_from
)
elif
choose_from
is
None
and
n_candidates
is
not
None
:
choose_from
=
[
self
.
NO_KEY
]
*
n_candidates
assert
n_candidates
==
len
(
choose_from
),
"Number of candidates must be equal to the length of `choose_from`."
assert
n_candidates
>
0
,
"Number of candidates must be greater than 0."
assert
n_chosen
is
None
or
0
<=
n_chosen
<=
n_candidates
,
"Expected selected number must be None or no more "
\
"than number of candidates."
self
.
n_candidates
=
n_candidates
self
.
choose_from
=
choose_from
.
copy
()
self
.
n_chosen
=
n_chosen
self
.
reduction
=
reduction
self
.
return_mask
=
return_mask
def
forward
(
self
,
optional_inputs
):
"""
Forward method of LayerChoice.
Parameters
----------
optional_inputs : list or dict
Recommended to be a dict. As a dict, inputs will be converted to a list that follows the order of
``choose_from`` in initialization. As a list, inputs must follow the semantic order that is the same as
``choose_from``.
Returns
-------
tuple of tensors
Output and selection mask. If ``return_mask`` is ``False``, only output is returned.
"""
optional_input_list
=
optional_inputs
if
isinstance
(
optional_inputs
,
dict
):
optional_input_list
=
[
optional_inputs
[
tag
]
for
tag
in
self
.
choose_from
]
assert
isinstance
(
optional_input_list
,
list
),
\
"Optional input list must be a list, not a {}."
.
format
(
type
(
optional_input_list
))
assert
len
(
optional_inputs
)
==
self
.
n_candidates
,
\
"Length of the input list must be equal to number of candidates."
out
,
mask
=
self
.
mutator
.
on_forward_input_choice
(
self
,
optional_input_list
)
if
self
.
return_mask
:
return
out
,
mask
return
out
nni/nas/pytorch/mutator.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
logging
from
collections
import
defaultdict
import
numpy
as
np
import
torch
from
.base_mutator
import
BaseMutator
from
.mutables
import
LayerChoice
,
InputChoice
from
.utils
import
to_list
logger
=
logging
.
getLogger
(
__name__
)
class
Mutator
(
BaseMutator
):
def
__init__
(
self
,
model
):
super
().
__init__
(
model
)
self
.
_cache
=
dict
()
self
.
_connect_all
=
False
def
sample_search
(
self
):
"""
Override to implement this method to iterate over mutables and make decisions.
Returns
-------
dict
A mapping from key of mutables to decisions.
"""
raise
NotImplementedError
def
sample_final
(
self
):
"""
Override to implement this method to iterate over mutables and make decisions that is final
for export and retraining.
Returns
-------
dict
A mapping from key of mutables to decisions.
"""
raise
NotImplementedError
def
reset
(
self
):
"""
Reset the mutator by call the `sample_search` to resample (for search). Stores the result in a local
variable so that `on_forward_layer_choice` and `on_forward_input_choice` can use the decision directly.
"""
self
.
_cache
=
self
.
sample_search
()
def
export
(
self
):
"""
Resample (for final) and return results.
Returns
-------
dict
A mapping from key of mutables to decisions.
"""
sampled
=
self
.
sample_final
()
result
=
dict
()
for
mutable
in
self
.
mutables
:
if
not
isinstance
(
mutable
,
(
LayerChoice
,
InputChoice
)):
# not supported as built-in
continue
result
[
mutable
.
key
]
=
self
.
_convert_mutable_decision_to_human_readable
(
mutable
,
sampled
.
pop
(
mutable
.
key
))
if
sampled
:
raise
ValueError
(
"Unexpected keys returned from 'sample_final()': %s"
,
list
(
sampled
.
keys
()))
return
result
def
status
(
self
):
"""
Return current selection status of mutator.
Returns
-------
dict
A mapping from key of mutables to decisions. All weights (boolean type and float type)
are converted into real number values. Numpy arrays and tensors are converted into list.
"""
data
=
dict
()
for
k
,
v
in
self
.
_cache
.
items
():
if
torch
.
is_tensor
(
v
):
v
=
v
.
detach
().
cpu
().
numpy
().
tolist
()
if
isinstance
(
v
,
np
.
ndarray
):
v
=
v
.
astype
(
np
.
float32
).
tolist
()
data
[
k
]
=
v
return
data
def
graph
(
self
,
inputs
):
"""
Return model supernet graph.
Parameters
----------
inputs: tuple of tensor
Inputs that will be feeded into the network.
Returns
-------
dict
Containing ``node``, in Tensorboard GraphDef format.
Additional key ``mutable`` is a map from key to list of modules.
"""
if
not
torch
.
__version__
.
startswith
(
"1.4"
):
logger
.
warning
(
"Graph is only tested with PyTorch 1.4. Other versions might not work."
)
from
nni.common.graph_utils
import
build_graph
from
google.protobuf
import
json_format
# protobuf should be installed as long as tensorboard is installed
try
:
self
.
_connect_all
=
True
graph_def
,
_
=
build_graph
(
self
.
model
,
inputs
,
verbose
=
False
)
result
=
json_format
.
MessageToDict
(
graph_def
)
finally
:
self
.
_connect_all
=
False
# `mutable` is to map the keys to a list of corresponding modules.
# A key can be linked to multiple modules, use `dedup=False` to find them all.
result
[
"mutable"
]
=
defaultdict
(
list
)
for
mutable
in
self
.
mutables
.
traverse
(
deduplicate
=
False
):
# A module will be represent in the format of
# [{"type": "Net", "name": ""}, {"type": "Cell", "name": "cell1"}, {"type": "Conv2d": "name": "conv"}]
# which will be concatenated into Net/Cell[cell1]/Conv2d[conv] in frontend.
# This format is aligned with the scope name jit gives.
modules
=
mutable
.
name
.
split
(
"."
)
path
=
[
{
"type"
:
self
.
model
.
__class__
.
__name__
,
"name"
:
""
}
]
m
=
self
.
model
for
module
in
modules
:
m
=
getattr
(
m
,
module
)
path
.
append
({
"type"
:
m
.
__class__
.
__name__
,
"name"
:
module
})
result
[
"mutable"
][
mutable
.
key
].
append
(
path
)
return
result
def
on_forward_layer_choice
(
self
,
mutable
,
*
args
,
**
kwargs
):
"""
On default, this method retrieves the decision obtained previously, and select certain operations.
Only operations with non-zero weight will be executed. The results will be added to a list.
Then it will reduce the list of all tensor outputs with the policy specified in `mutable.reduction`.
Parameters
----------
mutable : nni.nas.pytorch.mutables.LayerChoice
Layer choice module.
args : list of torch.Tensor
Inputs
kwargs : dict
Inputs
Returns
-------
tuple of torch.Tensor and torch.Tensor
Output and mask.
"""
if
self
.
_connect_all
:
return
self
.
_all_connect_tensor_reduction
(
mutable
.
reduction
,
[
op
(
*
args
,
**
kwargs
)
for
op
in
mutable
]),
\
torch
.
ones
(
len
(
mutable
)).
bool
()
def
_map_fn
(
op
,
args
,
kwargs
):
return
op
(
*
args
,
**
kwargs
)
mask
=
self
.
_get_decision
(
mutable
)
assert
len
(
mask
)
==
len
(
mutable
),
\
"Invalid mask, expected {} to be of length {}."
.
format
(
mask
,
len
(
mutable
))
out
,
mask
=
self
.
_select_with_mask
(
_map_fn
,
[(
choice
,
args
,
kwargs
)
for
choice
in
mutable
],
mask
)
return
self
.
_tensor_reduction
(
mutable
.
reduction
,
out
),
mask
def
on_forward_input_choice
(
self
,
mutable
,
tensor_list
):
"""
On default, this method retrieves the decision obtained previously, and select certain tensors.
Then it will reduce the list of all tensor outputs with the policy specified in `mutable.reduction`.
Parameters
----------
mutable : nni.nas.pytorch.mutables.InputChoice
Input choice module.
tensor_list : list of torch.Tensor
Tensor list to apply the decision on.
Returns
-------
tuple of torch.Tensor and torch.Tensor
Output and mask.
"""
if
self
.
_connect_all
:
return
self
.
_all_connect_tensor_reduction
(
mutable
.
reduction
,
tensor_list
),
\
torch
.
ones
(
mutable
.
n_candidates
).
bool
()
mask
=
self
.
_get_decision
(
mutable
)
assert
len
(
mask
)
==
mutable
.
n_candidates
,
\
"Invalid mask, expected {} to be of length {}."
.
format
(
mask
,
mutable
.
n_candidates
)
out
,
mask
=
self
.
_select_with_mask
(
lambda
x
:
x
,
[(
t
,)
for
t
in
tensor_list
],
mask
)
return
self
.
_tensor_reduction
(
mutable
.
reduction
,
out
),
mask
def
_select_with_mask
(
self
,
map_fn
,
candidates
,
mask
):
"""
Select masked tensors and return a list of tensors.
Parameters
----------
map_fn : function
Convert candidates to target candidates. Can be simply identity.
candidates : list of torch.Tensor
Tensor list to apply the decision on.
mask : list-like object
Can be a list, an numpy array or a tensor (recommended). Needs to
have the same length as ``candidates``.
Returns
-------
tuple of list of torch.Tensor and torch.Tensor
Output and mask.
"""
if
(
isinstance
(
mask
,
list
)
and
len
(
mask
)
>=
1
and
isinstance
(
mask
[
0
],
bool
))
or
\
(
isinstance
(
mask
,
np
.
ndarray
)
and
mask
.
dtype
==
np
.
bool
)
or
\
"BoolTensor"
in
mask
.
type
():
out
=
[
map_fn
(
*
cand
)
for
cand
,
m
in
zip
(
candidates
,
mask
)
if
m
]
elif
(
isinstance
(
mask
,
list
)
and
len
(
mask
)
>=
1
and
isinstance
(
mask
[
0
],
(
float
,
int
)))
or
\
(
isinstance
(
mask
,
np
.
ndarray
)
and
mask
.
dtype
in
(
np
.
float32
,
np
.
float64
,
np
.
int32
,
np
.
int64
))
or
\
"FloatTensor"
in
mask
.
type
():
out
=
[
map_fn
(
*
cand
)
*
m
for
cand
,
m
in
zip
(
candidates
,
mask
)
if
m
]
else
:
raise
ValueError
(
"Unrecognized mask '%s'"
%
mask
)
if
not
torch
.
is_tensor
(
mask
):
mask
=
torch
.
tensor
(
mask
)
# pylint: disable=not-callable
return
out
,
mask
def
_tensor_reduction
(
self
,
reduction_type
,
tensor_list
):
if
reduction_type
==
"none"
:
return
tensor_list
if
not
tensor_list
:
return
None
# empty. return None for now
if
len
(
tensor_list
)
==
1
:
return
tensor_list
[
0
]
if
reduction_type
==
"sum"
:
return
sum
(
tensor_list
)
if
reduction_type
==
"mean"
:
return
sum
(
tensor_list
)
/
len
(
tensor_list
)
if
reduction_type
==
"concat"
:
return
torch
.
cat
(
tensor_list
,
dim
=
1
)
raise
ValueError
(
"Unrecognized reduction policy:
\"
{}
\"
"
.
format
(
reduction_type
))
def
_all_connect_tensor_reduction
(
self
,
reduction_type
,
tensor_list
):
if
reduction_type
==
"none"
:
return
tensor_list
if
reduction_type
==
"concat"
:
return
torch
.
cat
(
tensor_list
,
dim
=
1
)
return
torch
.
stack
(
tensor_list
).
sum
(
0
)
def
_get_decision
(
self
,
mutable
):
"""
By default, this method checks whether `mutable.key` is already in the decision cache,
and returns the result without double-check.
Parameters
----------
mutable : Mutable
Returns
-------
object
"""
if
mutable
.
key
not
in
self
.
_cache
:
raise
ValueError
(
"
\"
{}
\"
not found in decision cache."
.
format
(
mutable
.
key
))
result
=
self
.
_cache
[
mutable
.
key
]
logger
.
debug
(
"Decision %s: %s"
,
mutable
.
key
,
result
)
return
result
def
_convert_mutable_decision_to_human_readable
(
self
,
mutable
,
sampled
):
# Assert the existence of mutable.key in returned architecture.
# Also check if there is anything extra.
multihot_list
=
to_list
(
sampled
)
converted
=
None
# If it's a boolean array, we can do optimization.
if
all
([
t
==
0
or
t
==
1
for
t
in
multihot_list
]):
if
isinstance
(
mutable
,
LayerChoice
):
assert
len
(
multihot_list
)
==
len
(
mutable
),
\
"Results returned from 'sample_final()' (%s: %s) either too short or too long."
\
%
(
mutable
.
key
,
multihot_list
)
# check if all modules have different names and they indeed have names
if
len
(
set
(
mutable
.
names
))
==
len
(
mutable
)
and
not
all
(
d
.
isdigit
()
for
d
in
mutable
.
names
):
converted
=
[
name
for
i
,
name
in
enumerate
(
mutable
.
names
)
if
multihot_list
[
i
]]
else
:
converted
=
[
i
for
i
in
range
(
len
(
multihot_list
))
if
multihot_list
[
i
]]
if
isinstance
(
mutable
,
InputChoice
):
assert
len
(
multihot_list
)
==
mutable
.
n_candidates
,
\
"Results returned from 'sample_final()' (%s: %s) either too short or too long."
\
%
(
mutable
.
key
,
multihot_list
)
# check if all input candidates have different names
if
len
(
set
(
mutable
.
choose_from
))
==
mutable
.
n_candidates
:
converted
=
[
name
for
i
,
name
in
enumerate
(
mutable
.
choose_from
)
if
multihot_list
[
i
]]
else
:
converted
=
[
i
for
i
in
range
(
len
(
multihot_list
))
if
multihot_list
[
i
]]
if
converted
is
not
None
:
# if only one element, then remove the bracket
if
len
(
converted
)
==
1
:
converted
=
converted
[
0
]
else
:
# do nothing
converted
=
multihot_list
return
converted
nni/nas/pytorch/nasbench201/__init__.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
.nasbench201
import
NASBench201Cell
nni/nas/pytorch/nasbench201/nasbench201.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
collections
import
OrderedDict
import
torch.nn
as
nn
from
nni.nas.pytorch.mutables
import
LayerChoice
from
.nasbench201_ops
import
Pooling
,
ReLUConvBN
,
Zero
,
FactorizedReduce
class
NASBench201Cell
(
nn
.
Module
):
"""
Builtin cell structure of NAS Bench 201. One cell contains four nodes. The First node serves as an input node
accepting the output of the previous cell. And other nodes connect to all previous nodes with an edge that
represents an operation chosen from a set to transform the tensor from the source node to the target node.
Every node accepts all its inputs and adds them as its output.
Parameters
---
cell_id: str
the name of this cell
C_in: int
the number of input channels of the cell
C_out: int
the number of output channels of the cell
stride: int
stride of all convolution operations in the cell
bn_affine: bool
If set to ``True``, all ``torch.nn.BatchNorm2d`` in this cell will have learnable affine parameters. Default: True
bn_momentum: float
the value used for the running_mean and running_var computation. Default: 0.1
bn_track_running_stats: bool
When set to ``True``, all ``torch.nn.BatchNorm2d`` in this cell tracks the running mean and variance. Default: True
"""
def
__init__
(
self
,
cell_id
,
C_in
,
C_out
,
stride
,
bn_affine
=
True
,
bn_momentum
=
0.1
,
bn_track_running_stats
=
True
):
super
(
NASBench201Cell
,
self
).
__init__
()
self
.
NUM_NODES
=
4
self
.
layers
=
nn
.
ModuleList
()
OPS
=
lambda
layer_idx
:
OrderedDict
([
(
"none"
,
Zero
(
C_in
,
C_out
,
stride
)),
(
"avg_pool_3x3"
,
Pooling
(
C_in
,
C_out
,
stride
if
layer_idx
==
0
else
1
,
bn_affine
,
bn_momentum
,
bn_track_running_stats
)),
(
"conv_3x3"
,
ReLUConvBN
(
C_in
,
C_out
,
3
,
stride
if
layer_idx
==
0
else
1
,
1
,
1
,
bn_affine
,
bn_momentum
,
bn_track_running_stats
)),
(
"conv_1x1"
,
ReLUConvBN
(
C_in
,
C_out
,
1
,
stride
if
layer_idx
==
0
else
1
,
0
,
1
,
bn_affine
,
bn_momentum
,
bn_track_running_stats
)),
(
"skip_connect"
,
nn
.
Identity
()
if
stride
==
1
and
C_in
==
C_out
else
FactorizedReduce
(
C_in
,
C_out
,
stride
if
layer_idx
==
0
else
1
,
bn_affine
,
bn_momentum
,
bn_track_running_stats
))
])
for
i
in
range
(
self
.
NUM_NODES
):
node_ops
=
nn
.
ModuleList
()
for
j
in
range
(
0
,
i
):
node_ops
.
append
(
LayerChoice
(
OPS
(
j
),
key
=
"%d_%d"
%
(
j
,
i
),
reduction
=
"mean"
))
self
.
layers
.
append
(
node_ops
)
self
.
in_dim
=
C_in
self
.
out_dim
=
C_out
self
.
cell_id
=
cell_id
def
forward
(
self
,
input
):
# pylint: disable=W0622
"""
Parameters
---
input: torch.tensor
the output of the previous layer
"""
nodes
=
[
input
]
for
i
in
range
(
1
,
self
.
NUM_NODES
):
node_feature
=
sum
(
self
.
layers
[
i
][
k
](
nodes
[
k
])
for
k
in
range
(
i
))
nodes
.
append
(
node_feature
)
return
nodes
[
-
1
]
nni/nas/pytorch/nasbench201/nasbench201_ops.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
torch
import
torch.nn
as
nn
class
ReLUConvBN
(
nn
.
Module
):
"""
Parameters
---
C_in: int
the number of input channels
C_out: int
the number of output channels
stride: int
stride of the convolution
padding: int
zero-padding added to both sides of the input
dilation: int
spacing between kernel elements
bn_affine: bool
If set to ``True``, ``torch.nn.BatchNorm2d`` will have learnable affine parameters. Default: True
bn_momentun: float
the value used for the running_mean and running_var computation. Default: 0.1
bn_track_running_stats: bool
When set to ``True``, ``torch.nn.BatchNorm2d`` tracks the running mean and variance. Default: True
"""
def
__init__
(
self
,
C_in
,
C_out
,
kernel_size
,
stride
,
padding
,
dilation
,
bn_affine
=
True
,
bn_momentum
=
0.1
,
bn_track_running_stats
=
True
):
super
(
ReLUConvBN
,
self
).
__init__
()
self
.
op
=
nn
.
Sequential
(
nn
.
ReLU
(
inplace
=
False
),
nn
.
Conv2d
(
C_in
,
C_out
,
kernel_size
,
stride
=
stride
,
padding
=
padding
,
dilation
=
dilation
,
bias
=
False
),
nn
.
BatchNorm2d
(
C_out
,
affine
=
bn_affine
,
momentum
=
bn_momentum
,
track_running_stats
=
bn_track_running_stats
)
)
def
forward
(
self
,
x
):
"""
Parameters
---
x: torch.Tensor
input tensor
"""
return
self
.
op
(
x
)
class
Pooling
(
nn
.
Module
):
"""
Parameters
---
C_in: int
the number of input channels
C_out: int
the number of output channels
stride: int
stride of the convolution
bn_affine: bool
If set to ``True``, ``torch.nn.BatchNorm2d`` will have learnable affine parameters. Default: True
bn_momentun: float
the value used for the running_mean and running_var computation. Default: 0.1
bn_track_running_stats: bool
When set to ``True``, ``torch.nn.BatchNorm2d`` tracks the running mean and variance. Default: True
"""
def
__init__
(
self
,
C_in
,
C_out
,
stride
,
bn_affine
=
True
,
bn_momentum
=
0.1
,
bn_track_running_stats
=
True
):
super
(
Pooling
,
self
).
__init__
()
if
C_in
==
C_out
:
self
.
preprocess
=
None
else
:
self
.
preprocess
=
ReLUConvBN
(
C_in
,
C_out
,
1
,
1
,
0
,
0
,
bn_affine
,
bn_momentum
,
bn_track_running_stats
)
self
.
op
=
nn
.
AvgPool2d
(
3
,
stride
=
stride
,
padding
=
1
,
count_include_pad
=
False
)
def
forward
(
self
,
x
):
"""
Parameters
---
x: torch.Tensor
input tensor
"""
if
self
.
preprocess
:
x
=
self
.
preprocess
(
x
)
return
self
.
op
(
x
)
class
Zero
(
nn
.
Module
):
"""
Parameters
---
C_in: int
the number of input channels
C_out: int
the number of output channels
stride: int
stride of the convolution
"""
def
__init__
(
self
,
C_in
,
C_out
,
stride
):
super
(
Zero
,
self
).
__init__
()
self
.
C_in
=
C_in
self
.
C_out
=
C_out
self
.
stride
=
stride
self
.
is_zero
=
True
def
forward
(
self
,
x
):
"""
Parameters
---
x: torch.Tensor
input tensor
"""
if
self
.
C_in
==
self
.
C_out
:
if
self
.
stride
==
1
:
return
x
.
mul
(
0.
)
else
:
return
x
[:,
:,
::
self
.
stride
,
::
self
.
stride
].
mul
(
0.
)
else
:
shape
=
list
(
x
.
shape
)
shape
[
1
]
=
self
.
C_out
zeros
=
x
.
new_zeros
(
shape
,
dtype
=
x
.
dtype
,
device
=
x
.
device
)
return
zeros
class
FactorizedReduce
(
nn
.
Module
):
def
__init__
(
self
,
C_in
,
C_out
,
stride
,
bn_affine
=
True
,
bn_momentum
=
0.1
,
bn_track_running_stats
=
True
):
super
(
FactorizedReduce
,
self
).
__init__
()
self
.
stride
=
stride
self
.
C_in
=
C_in
self
.
C_out
=
C_out
self
.
relu
=
nn
.
ReLU
(
inplace
=
False
)
if
stride
==
2
:
C_outs
=
[
C_out
//
2
,
C_out
-
C_out
//
2
]
self
.
convs
=
nn
.
ModuleList
()
for
i
in
range
(
2
):
self
.
convs
.
append
(
nn
.
Conv2d
(
C_in
,
C_outs
[
i
],
1
,
stride
=
stride
,
padding
=
0
,
bias
=
False
))
self
.
pad
=
nn
.
ConstantPad2d
((
0
,
1
,
0
,
1
),
0
)
else
:
raise
ValueError
(
"Invalid stride : {:}"
.
format
(
stride
))
self
.
bn
=
nn
.
BatchNorm2d
(
C_out
,
affine
=
bn_affine
,
momentum
=
bn_momentum
,
track_running_stats
=
bn_track_running_stats
)
def
forward
(
self
,
x
):
x
=
self
.
relu
(
x
)
y
=
self
.
pad
(
x
)
out
=
torch
.
cat
([
self
.
convs
[
0
](
x
),
self
.
convs
[
1
](
y
[:,
:,
1
:,
1
:])],
dim
=
1
)
out
=
self
.
bn
(
out
)
return
out
nni/nas/pytorch/search_space_zoo/__init__.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
.darts_cell
import
DartsCell
from
.enas_cell
import
ENASMicroLayer
from
.enas_cell
import
ENASMacroLayer
from
.enas_cell
import
ENASMacroGeneralModel
nni/nas/pytorch/search_space_zoo/darts_cell.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
collections
import
OrderedDict
import
torch
import
torch.nn
as
nn
from
nni.nas.pytorch
import
mutables
from
.darts_ops
import
PoolBN
,
SepConv
,
DilConv
,
FactorizedReduce
,
DropPath
,
StdConv
class
Node
(
nn
.
Module
):
def
__init__
(
self
,
node_id
,
num_prev_nodes
,
channels
,
num_downsample_connect
):
"""
builtin Darts Node structure
Parameters
---
node_id: str
num_prev_nodes: int
the number of previous nodes in this cell
channels: int
output channels
num_downsample_connect: int
downsample the input node if this cell is reduction cell
"""
super
().
__init__
()
self
.
ops
=
nn
.
ModuleList
()
choice_keys
=
[]
for
i
in
range
(
num_prev_nodes
):
stride
=
2
if
i
<
num_downsample_connect
else
1
choice_keys
.
append
(
"{}_p{}"
.
format
(
node_id
,
i
))
self
.
ops
.
append
(
mutables
.
LayerChoice
(
OrderedDict
([
(
"maxpool"
,
PoolBN
(
'max'
,
channels
,
3
,
stride
,
1
,
affine
=
False
)),
(
"avgpool"
,
PoolBN
(
'avg'
,
channels
,
3
,
stride
,
1
,
affine
=
False
)),
(
"skipconnect"
,
nn
.
Identity
()
if
stride
==
1
else
FactorizedReduce
(
channels
,
channels
,
affine
=
False
)),
(
"sepconv3x3"
,
SepConv
(
channels
,
channels
,
3
,
stride
,
1
,
affine
=
False
)),
(
"sepconv5x5"
,
SepConv
(
channels
,
channels
,
5
,
stride
,
2
,
affine
=
False
)),
(
"dilconv3x3"
,
DilConv
(
channels
,
channels
,
3
,
stride
,
2
,
2
,
affine
=
False
)),
(
"dilconv5x5"
,
DilConv
(
channels
,
channels
,
5
,
stride
,
4
,
2
,
affine
=
False
))
]),
key
=
choice_keys
[
-
1
]))
self
.
drop_path
=
DropPath
()
self
.
input_switch
=
mutables
.
InputChoice
(
choose_from
=
choice_keys
,
n_chosen
=
2
,
key
=
"{}_switch"
.
format
(
node_id
))
def
forward
(
self
,
prev_nodes
):
assert
len
(
self
.
ops
)
==
len
(
prev_nodes
)
out
=
[
op
(
node
)
for
op
,
node
in
zip
(
self
.
ops
,
prev_nodes
)]
out
=
[
self
.
drop_path
(
o
)
if
o
is
not
None
else
None
for
o
in
out
]
return
self
.
input_switch
(
out
)
class
DartsCell
(
nn
.
Module
):
"""
Builtin Darts Cell structure. There are ``n_nodes`` nodes in one cell, in which the first two nodes' values are
fixed to the results of previous previous cell and previous cell respectively. One node will connect all
the nodes after with predefined operations in a mutable way. The last node accepts five inputs from nodes
before and it concats all inputs in channels as the output of the current cell, and the number of output
channels is ``n_nodes`` times ``channels``.
Parameters
---
n_nodes: int
the number of nodes contained in this cell
channels_pp: int
the number of previous previous cell's output channels
channels_p: int
the number of previous cell's output channels
channels: int
the number of output channels for each node
reduction_p: bool
Is previous cell a reduction cell
reduction: bool
is current cell a reduction cell
"""
def
__init__
(
self
,
n_nodes
,
channels_pp
,
channels_p
,
channels
,
reduction_p
,
reduction
):
super
().
__init__
()
self
.
reduction
=
reduction
self
.
n_nodes
=
n_nodes
# If previous cell is reduction cell, current input size does not match with
# output size of cell[k-2]. So the output[k-2] should be reduced by preprocessing.
if
reduction_p
:
self
.
preproc0
=
FactorizedReduce
(
channels_pp
,
channels
,
affine
=
False
)
else
:
self
.
preproc0
=
StdConv
(
channels_pp
,
channels
,
1
,
1
,
0
,
affine
=
False
)
self
.
preproc1
=
StdConv
(
channels_p
,
channels
,
1
,
1
,
0
,
affine
=
False
)
# generate dag
self
.
mutable_ops
=
nn
.
ModuleList
()
for
depth
in
range
(
2
,
self
.
n_nodes
+
2
):
self
.
mutable_ops
.
append
(
Node
(
"{}_n{}"
.
format
(
"reduce"
if
reduction
else
"normal"
,
depth
),
depth
,
channels
,
2
if
reduction
else
0
))
def
forward
(
self
,
pprev
,
prev
):
"""
Parameters
---
pprev: torch.Tensor
the output of the previous previous layer
prev: torch.Tensor
the output of the previous layer
"""
tensors
=
[
self
.
preproc0
(
pprev
),
self
.
preproc1
(
prev
)]
for
node
in
self
.
mutable_ops
:
cur_tensor
=
node
(
tensors
)
tensors
.
append
(
cur_tensor
)
output
=
torch
.
cat
(
tensors
[
2
:],
dim
=
1
)
return
output
nni/nas/pytorch/search_space_zoo/darts_ops.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
torch
import
torch.nn
as
nn
class
DropPath
(
nn
.
Module
):
def
__init__
(
self
,
p
=
0.
):
"""
Drop path with probability.
Parameters
----------
p : float
Probability of an path to be zeroed.
"""
super
().
__init__
()
self
.
p
=
p
def
forward
(
self
,
x
):
if
self
.
training
and
self
.
p
>
0.
:
keep_prob
=
1.
-
self
.
p
# per data point mask
mask
=
torch
.
zeros
((
x
.
size
(
0
),
1
,
1
,
1
),
device
=
x
.
device
).
bernoulli_
(
keep_prob
)
return
x
/
keep_prob
*
mask
return
x
class
PoolBN
(
nn
.
Module
):
"""
AvgPool or MaxPool with BN. ``pool_type`` must be ``max`` or ``avg``.
Parameters
---
pool_type: str
choose operation
C: int
number of channels
kernal_size: int
size of the convolving kernel
stride: int
stride of the convolution
padding: int
zero-padding added to both sides of the input
affine: bool
is using affine in BatchNorm
"""
def
__init__
(
self
,
pool_type
,
C
,
kernel_size
,
stride
,
padding
,
affine
=
True
):
super
().
__init__
()
if
pool_type
.
lower
()
==
'max'
:
self
.
pool
=
nn
.
MaxPool2d
(
kernel_size
,
stride
,
padding
)
elif
pool_type
.
lower
()
==
'avg'
:
self
.
pool
=
nn
.
AvgPool2d
(
kernel_size
,
stride
,
padding
,
count_include_pad
=
False
)
else
:
raise
ValueError
()
self
.
bn
=
nn
.
BatchNorm2d
(
C
,
affine
=
affine
)
def
forward
(
self
,
x
):
out
=
self
.
pool
(
x
)
out
=
self
.
bn
(
out
)
return
out
class
StdConv
(
nn
.
Sequential
):
"""
Standard conv: ReLU - Conv - BN
Parameters
---
C_in: int
the number of input channels
C_out: int
the number of output channels
kernel_size: int
size of the convolution kernel
padding:
zero-padding added to both sides of the input
affine: bool
is using affine in BatchNorm
"""
def
__init__
(
self
,
C_in
,
C_out
,
kernel_size
,
stride
,
padding
,
affine
=
True
):
super
().
__init__
()
self
.
net
=
nn
.
Sequential
for
idx
,
ops
in
enumerate
((
nn
.
ReLU
(),
nn
.
Conv2d
(
C_in
,
C_out
,
kernel_size
,
stride
,
padding
,
bias
=
False
),
nn
.
BatchNorm2d
(
C_out
,
affine
=
affine
))):
self
.
add_module
(
str
(
idx
),
ops
)
class
FacConv
(
nn
.
Module
):
"""
Factorized conv: ReLU - Conv(Kx1) - Conv(1xK) - BN
"""
def
__init__
(
self
,
C_in
,
C_out
,
kernel_length
,
stride
,
padding
,
affine
=
True
):
super
().
__init__
()
self
.
net
=
nn
.
Sequential
(
nn
.
ReLU
(),
nn
.
Conv2d
(
C_in
,
C_in
,
(
kernel_length
,
1
),
stride
,
padding
,
bias
=
False
),
nn
.
Conv2d
(
C_in
,
C_out
,
(
1
,
kernel_length
),
stride
,
padding
,
bias
=
False
),
nn
.
BatchNorm2d
(
C_out
,
affine
=
affine
)
)
def
forward
(
self
,
x
):
return
self
.
net
(
x
)
class
DilConv
(
nn
.
Module
):
"""
(Dilated) depthwise separable conv.
ReLU - (Dilated) depthwise separable - Pointwise - BN.
If dilation == 2, 3x3 conv => 5x5 receptive field, 5x5 conv => 9x9 receptive field.
Parameters
---
C_in: int
the number of input channels
C_out: int
the number of output channels
kernal_size:
size of the convolving kernel
padding:
zero-padding added to both sides of the input
dilation: int
spacing between kernel elements.
affine: bool
is using affine in BatchNorm
"""
def
__init__
(
self
,
C_in
,
C_out
,
kernel_size
,
stride
,
padding
,
dilation
,
affine
=
True
):
super
().
__init__
()
self
.
net
=
nn
.
Sequential
(
nn
.
ReLU
(),
nn
.
Conv2d
(
C_in
,
C_in
,
kernel_size
,
stride
,
padding
,
dilation
=
dilation
,
groups
=
C_in
,
bias
=
False
),
nn
.
Conv2d
(
C_in
,
C_out
,
1
,
stride
=
1
,
padding
=
0
,
bias
=
False
),
nn
.
BatchNorm2d
(
C_out
,
affine
=
affine
)
)
def
forward
(
self
,
x
):
return
self
.
net
(
x
)
class
SepConv
(
nn
.
Module
):
"""
Depthwise separable conv.
DilConv(dilation=1) * 2.
Parameters
---
C_in: int
the number of input channels
C_out: int
the number of output channels
kernal_size:
size of the convolving kernel
padding:
zero-padding added to both sides of the input
dilation: int
spacing between kernel elements.
affine: bool
is using affine in BatchNorm
"""
def
__init__
(
self
,
C_in
,
C_out
,
kernel_size
,
stride
,
padding
,
affine
=
True
):
super
().
__init__
()
self
.
net
=
nn
.
Sequential
(
DilConv
(
C_in
,
C_in
,
kernel_size
,
stride
,
padding
,
dilation
=
1
,
affine
=
affine
),
DilConv
(
C_in
,
C_out
,
kernel_size
,
1
,
padding
,
dilation
=
1
,
affine
=
affine
)
)
def
forward
(
self
,
x
):
return
self
.
net
(
x
)
class
FactorizedReduce
(
nn
.
Module
):
"""
Reduce feature map size by factorized pointwise (stride=2).
"""
def
__init__
(
self
,
C_in
,
C_out
,
affine
=
True
):
super
().
__init__
()
self
.
relu
=
nn
.
ReLU
()
self
.
conv1
=
nn
.
Conv2d
(
C_in
,
C_out
//
2
,
1
,
stride
=
2
,
padding
=
0
,
bias
=
False
)
self
.
conv2
=
nn
.
Conv2d
(
C_in
,
C_out
//
2
,
1
,
stride
=
2
,
padding
=
0
,
bias
=
False
)
self
.
bn
=
nn
.
BatchNorm2d
(
C_out
,
affine
=
affine
)
def
forward
(
self
,
x
):
x
=
self
.
relu
(
x
)
out
=
torch
.
cat
([
self
.
conv1
(
x
),
self
.
conv2
(
x
[:,
:,
1
:,
1
:])],
dim
=
1
)
out
=
self
.
bn
(
out
)
return
out
nni/nas/pytorch/search_space_zoo/enas_cell.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
nni.nas.pytorch
import
mutables
from
.enas_ops
import
FactorizedReduce
,
StdConv
,
SepConvBN
,
Pool
,
ConvBranch
,
PoolBranch
class
Cell
(
nn
.
Module
):
def
__init__
(
self
,
cell_name
,
prev_labels
,
channels
):
super
().
__init__
()
self
.
input_choice
=
mutables
.
InputChoice
(
choose_from
=
prev_labels
,
n_chosen
=
1
,
return_mask
=
True
,
key
=
cell_name
+
"_input"
)
self
.
op_choice
=
mutables
.
LayerChoice
([
SepConvBN
(
channels
,
channels
,
3
,
1
),
SepConvBN
(
channels
,
channels
,
5
,
2
),
Pool
(
"avg"
,
3
,
1
,
1
),
Pool
(
"max"
,
3
,
1
,
1
),
nn
.
Identity
()
],
key
=
cell_name
+
"_op"
)
def
forward
(
self
,
prev_layers
):
chosen_input
,
chosen_mask
=
self
.
input_choice
(
prev_layers
)
cell_out
=
self
.
op_choice
(
chosen_input
)
return
cell_out
,
chosen_mask
class
Node
(
mutables
.
MutableScope
):
def
__init__
(
self
,
node_name
,
prev_node_names
,
channels
):
super
().
__init__
(
node_name
)
self
.
cell_x
=
Cell
(
node_name
+
"_x"
,
prev_node_names
,
channels
)
self
.
cell_y
=
Cell
(
node_name
+
"_y"
,
prev_node_names
,
channels
)
def
forward
(
self
,
prev_layers
):
out_x
,
mask_x
=
self
.
cell_x
(
prev_layers
)
out_y
,
mask_y
=
self
.
cell_y
(
prev_layers
)
return
out_x
+
out_y
,
mask_x
|
mask_y
class
Calibration
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
):
super
().
__init__
()
self
.
process
=
None
if
in_channels
!=
out_channels
:
self
.
process
=
StdConv
(
in_channels
,
out_channels
)
def
forward
(
self
,
x
):
if
self
.
process
is
None
:
return
x
return
self
.
process
(
x
)
class
ENASMicroLayer
(
nn
.
Module
):
"""
Builtin EnasMicroLayer. Micro search designs only one building block whose architecture is repeated
throughout the final architecture. A cell has ``num_nodes`` nodes and searches the topology and
operations among them in RL way. The first two nodes in a layer stand for the outputs from previous
previous layer and previous layer respectively. For the following nodes, the controller chooses
two previous nodes and applies two operations respectively for each node. Nodes that are not served
as input for any other node are viewed as the output of the layer. If there are multiple output nodes,
the model will calculate the average of these nodes as the layer output. Every node's output has ``out_channels``
channels so the result of the layer has the same number of channels as each node.
Parameters
---
num_nodes: int
the number of nodes contained in this layer
in_channles_pp: int
the number of previous previous layer's output channels
in_channels_p: int
the number of previous layer's output channels
out_channels: int
output channels of this layer
reduction: bool
is reduction operation empolyed before this layer
"""
def
__init__
(
self
,
num_nodes
,
in_channels_pp
,
in_channels_p
,
out_channels
,
reduction
):
super
().
__init__
()
self
.
reduction
=
reduction
if
self
.
reduction
:
self
.
reduce0
=
FactorizedReduce
(
in_channels_pp
,
out_channels
,
affine
=
False
)
self
.
reduce1
=
FactorizedReduce
(
in_channels_p
,
out_channels
,
affine
=
False
)
in_channels_pp
=
in_channels_p
=
out_channels
self
.
preproc0
=
Calibration
(
in_channels_pp
,
out_channels
)
self
.
preproc1
=
Calibration
(
in_channels_p
,
out_channels
)
self
.
num_nodes
=
num_nodes
name_prefix
=
"reduce"
if
reduction
else
"normal"
self
.
nodes
=
nn
.
ModuleList
()
node_labels
=
[
mutables
.
InputChoice
.
NO_KEY
,
mutables
.
InputChoice
.
NO_KEY
]
for
i
in
range
(
num_nodes
):
node_labels
.
append
(
"{}_node_{}"
.
format
(
name_prefix
,
i
))
self
.
nodes
.
append
(
Node
(
node_labels
[
-
1
],
node_labels
[:
-
1
],
out_channels
))
self
.
final_conv_w
=
nn
.
Parameter
(
torch
.
zeros
(
out_channels
,
self
.
num_nodes
+
2
,
out_channels
,
1
,
1
),
requires_grad
=
True
)
self
.
bn
=
nn
.
BatchNorm2d
(
out_channels
,
affine
=
False
)
self
.
reset_parameters
()
def
reset_parameters
(
self
):
nn
.
init
.
kaiming_normal_
(
self
.
final_conv_w
)
def
forward
(
self
,
pprev
,
prev
):
"""
Parameters
---
pprev: torch.Tensor
the output of the previous previous layer
prev: torch.Tensor
the output of the previous layer
"""
if
self
.
reduction
:
pprev
,
prev
=
self
.
reduce0
(
pprev
),
self
.
reduce1
(
prev
)
pprev_
,
prev_
=
self
.
preproc0
(
pprev
),
self
.
preproc1
(
prev
)
prev_nodes_out
=
[
pprev_
,
prev_
]
nodes_used_mask
=
torch
.
zeros
(
self
.
num_nodes
+
2
,
dtype
=
torch
.
bool
,
device
=
prev
.
device
)
for
i
in
range
(
self
.
num_nodes
):
node_out
,
mask
=
self
.
nodes
[
i
](
prev_nodes_out
)
nodes_used_mask
[:
mask
.
size
(
0
)]
|=
mask
.
to
(
node_out
.
device
)
prev_nodes_out
.
append
(
node_out
)
unused_nodes
=
torch
.
cat
([
out
for
used
,
out
in
zip
(
nodes_used_mask
,
prev_nodes_out
)
if
not
used
],
1
)
unused_nodes
=
F
.
relu
(
unused_nodes
)
conv_weight
=
self
.
final_conv_w
[:,
~
nodes_used_mask
,
:,
:,
:]
conv_weight
=
conv_weight
.
view
(
conv_weight
.
size
(
0
),
-
1
,
1
,
1
)
out
=
F
.
conv2d
(
unused_nodes
,
conv_weight
)
return
prev
,
self
.
bn
(
out
)
class
ENASMacroLayer
(
mutables
.
MutableScope
):
"""
Builtin ENAS Marco Layer. With search space changing to layer level, the controller decides
what operation is employed and the previous layer to connect to for skip connections. The model
is made up of the same layers but the choice of each layer may be different.
Parameters
---
key: str
the name of this layer
prev_labels: str
names of all previous layers
in_filters: int
the number of input channels
out_filters:
the number of output channels
"""
def
__init__
(
self
,
key
,
prev_labels
,
in_filters
,
out_filters
):
super
().
__init__
(
key
)
self
.
in_filters
=
in_filters
self
.
out_filters
=
out_filters
self
.
mutable
=
mutables
.
LayerChoice
([
ConvBranch
(
in_filters
,
out_filters
,
3
,
1
,
1
,
separable
=
False
),
ConvBranch
(
in_filters
,
out_filters
,
3
,
1
,
1
,
separable
=
True
),
ConvBranch
(
in_filters
,
out_filters
,
5
,
1
,
2
,
separable
=
False
),
ConvBranch
(
in_filters
,
out_filters
,
5
,
1
,
2
,
separable
=
True
),
PoolBranch
(
'avg'
,
in_filters
,
out_filters
,
3
,
1
,
1
),
PoolBranch
(
'max'
,
in_filters
,
out_filters
,
3
,
1
,
1
)
])
if
prev_labels
:
self
.
skipconnect
=
mutables
.
InputChoice
(
choose_from
=
prev_labels
,
n_chosen
=
None
)
else
:
self
.
skipconnect
=
None
self
.
batch_norm
=
nn
.
BatchNorm2d
(
out_filters
,
affine
=
False
)
def
forward
(
self
,
prev_list
):
"""
Parameters
---
prev_list: list
The cell selects the last element of the list as input and applies an operation on it.
The cell chooses none/one/multiple tensor(s) as SkipConnect(s) from the list excluding
the last element.
"""
out
=
self
.
mutable
(
prev_list
[
-
1
])
if
self
.
skipconnect
is
not
None
:
connection
=
self
.
skipconnect
(
prev_list
[:
-
1
])
if
connection
is
not
None
:
out
+=
connection
return
self
.
batch_norm
(
out
)
class
ENASMacroGeneralModel
(
nn
.
Module
):
"""
The network is made up by stacking ENASMacroLayer. The Macro search space contains these layers.
Each layer chooses an operation from predefined ones and SkipConnect then forms a network.
Parameters
---
num_layers: int
The number of layers contained in the network.
out_filters: int
The number of each layer's output channels.
in_channel: int
The number of input's channels.
num_classes: int
The number of classes for classification.
dropout_rate: float
Dropout layer's dropout rate before the final dense layer.
"""
def
__init__
(
self
,
num_layers
=
12
,
out_filters
=
24
,
in_channels
=
3
,
num_classes
=
10
,
dropout_rate
=
0.0
):
super
().
__init__
()
self
.
num_layers
=
num_layers
self
.
num_classes
=
num_classes
self
.
out_filters
=
out_filters
self
.
stem
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_channels
,
out_filters
,
3
,
1
,
1
,
bias
=
False
),
nn
.
BatchNorm2d
(
out_filters
)
)
pool_distance
=
self
.
num_layers
//
3
self
.
pool_layers_idx
=
[
pool_distance
-
1
,
2
*
pool_distance
-
1
]
self
.
dropout_rate
=
dropout_rate
self
.
dropout
=
nn
.
Dropout
(
self
.
dropout_rate
)
self
.
layers
=
nn
.
ModuleList
()
self
.
pool_layers
=
nn
.
ModuleList
()
labels
=
[]
for
layer_id
in
range
(
self
.
num_layers
):
labels
.
append
(
"layer_{}"
.
format
(
layer_id
))
if
layer_id
in
self
.
pool_layers_idx
:
self
.
pool_layers
.
append
(
FactorizedReduce
(
self
.
out_filters
,
self
.
out_filters
))
self
.
layers
.
append
(
ENASMacroLayer
(
labels
[
-
1
],
labels
[:
-
1
],
self
.
out_filters
,
self
.
out_filters
))
self
.
gap
=
nn
.
AdaptiveAvgPool2d
(
1
)
self
.
dense
=
nn
.
Linear
(
self
.
out_filters
,
self
.
num_classes
)
def
forward
(
self
,
x
):
"""
Parameters
---
x: torch.Tensor
the input of the network
"""
bs
=
x
.
size
(
0
)
cur
=
self
.
stem
(
x
)
layers
=
[
cur
]
for
layer_id
in
range
(
self
.
num_layers
):
cur
=
self
.
layers
[
layer_id
](
layers
)
layers
.
append
(
cur
)
if
layer_id
in
self
.
pool_layers_idx
:
for
i
,
layer
in
enumerate
(
layers
):
layers
[
i
]
=
self
.
pool_layers
[
self
.
pool_layers_idx
.
index
(
layer_id
)](
layer
)
cur
=
layers
[
-
1
]
cur
=
self
.
gap
(
cur
).
view
(
bs
,
-
1
)
cur
=
self
.
dropout
(
cur
)
logits
=
self
.
dense
(
cur
)
return
logits
nni/nas/pytorch/search_space_zoo/enas_ops.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
torch
import
torch.nn
as
nn
class
StdConv
(
nn
.
Module
):
def
__init__
(
self
,
C_in
,
C_out
):
super
(
StdConv
,
self
).
__init__
()
self
.
conv
=
nn
.
Sequential
(
nn
.
Conv2d
(
C_in
,
C_out
,
1
,
stride
=
1
,
padding
=
0
,
bias
=
False
),
nn
.
BatchNorm2d
(
C_out
,
affine
=
False
),
nn
.
ReLU
()
)
def
forward
(
self
,
x
):
return
self
.
conv
(
x
)
class
PoolBranch
(
nn
.
Module
):
"""
Pooling structure for Macro search. First pass through a 1x1 Conv, then pooling operation followed by BatchNorm2d.
Parameters
---
pool_type: str
only accept ``max`` for MaxPool and ``avg`` for AvgPool
C_in: int
the number of input channels
C_out: int
the number of output channels
kernal_size: int
size of the convolving kernel
stride: int
stride of the convolution
padding: int
zero-padding added to both sides of the input
"""
def
__init__
(
self
,
pool_type
,
C_in
,
C_out
,
kernel_size
,
stride
,
padding
,
affine
=
False
):
super
().
__init__
()
self
.
preproc
=
StdConv
(
C_in
,
C_out
)
self
.
pool
=
Pool
(
pool_type
,
kernel_size
,
stride
,
padding
)
self
.
bn
=
nn
.
BatchNorm2d
(
C_out
,
affine
=
affine
)
def
forward
(
self
,
x
):
out
=
self
.
preproc
(
x
)
out
=
self
.
pool
(
out
)
out
=
self
.
bn
(
out
)
return
out
class
SeparableConv
(
nn
.
Module
):
def
__init__
(
self
,
C_in
,
C_out
,
kernel_size
,
stride
,
padding
):
super
(
SeparableConv
,
self
).
__init__
()
self
.
depthwise
=
nn
.
Conv2d
(
C_in
,
C_in
,
kernel_size
=
kernel_size
,
padding
=
padding
,
stride
=
stride
,
groups
=
C_in
,
bias
=
False
)
self
.
pointwise
=
nn
.
Conv2d
(
C_in
,
C_out
,
kernel_size
=
1
,
bias
=
False
)
def
forward
(
self
,
x
):
out
=
self
.
depthwise
(
x
)
out
=
self
.
pointwise
(
out
)
return
out
class
ConvBranch
(
nn
.
Module
):
"""
Conv structure for Macro search. First pass through a 1x1 Conv,
then Conv operation with kernal_size equals 3 or 5 followed by BatchNorm and ReLU.
Parameters
---
C_in: int
the number of input channels
C_out: int
the number of output channels
kernal_size: int
size of the convolving kernel
stride: int
stride of the convolution
padding: int
zero-padding added to both sides of the input
separable: True
is separable Conv is used
"""
def
__init__
(
self
,
C_in
,
C_out
,
kernel_size
,
stride
,
padding
,
separable
):
super
(
ConvBranch
,
self
).
__init__
()
self
.
preproc
=
StdConv
(
C_in
,
C_out
)
if
separable
:
self
.
conv
=
SeparableConv
(
C_out
,
C_out
,
kernel_size
,
stride
,
padding
)
else
:
self
.
conv
=
nn
.
Conv2d
(
C_out
,
C_out
,
kernel_size
,
stride
=
stride
,
padding
=
padding
)
self
.
postproc
=
nn
.
Sequential
(
nn
.
BatchNorm2d
(
C_out
,
affine
=
False
),
nn
.
ReLU
()
)
def
forward
(
self
,
x
):
out
=
self
.
preproc
(
x
)
out
=
self
.
conv
(
out
)
out
=
self
.
postproc
(
out
)
return
out
class
FactorizedReduce
(
nn
.
Module
):
def
__init__
(
self
,
C_in
,
C_out
,
affine
=
False
):
super
().
__init__
()
self
.
conv1
=
nn
.
Conv2d
(
C_in
,
C_out
//
2
,
1
,
stride
=
2
,
padding
=
0
,
bias
=
False
)
self
.
conv2
=
nn
.
Conv2d
(
C_in
,
C_out
//
2
,
1
,
stride
=
2
,
padding
=
0
,
bias
=
False
)
self
.
bn
=
nn
.
BatchNorm2d
(
C_out
,
affine
=
affine
)
def
forward
(
self
,
x
):
out
=
torch
.
cat
([
self
.
conv1
(
x
),
self
.
conv2
(
x
[:,
:,
1
:,
1
:])],
dim
=
1
)
out
=
self
.
bn
(
out
)
return
out
class
Pool
(
nn
.
Module
):
"""
Pooling structure
Parameters
---
pool_type: str
only accept ``max`` for MaxPool and ``avg`` for AvgPool
kernal_size: int
size of the convolving kernel
stride: int
stride of the convolution
padding: int
zero-padding added to both sides of the input
"""
def
__init__
(
self
,
pool_type
,
kernel_size
,
stride
,
padding
):
super
().
__init__
()
if
pool_type
.
lower
()
==
'max'
:
self
.
pool
=
nn
.
MaxPool2d
(
kernel_size
,
stride
,
padding
)
elif
pool_type
.
lower
()
==
'avg'
:
self
.
pool
=
nn
.
AvgPool2d
(
kernel_size
,
stride
,
padding
,
count_include_pad
=
False
)
else
:
raise
ValueError
()
def
forward
(
self
,
x
):
return
self
.
pool
(
x
)
class
SepConvBN
(
nn
.
Module
):
"""
Implement SepConv followed by BatchNorm. The structure is ReLU ==> SepConv ==> BN.
Parameters
---
C_in: int
the number of imput channels
C_out: int
the number of output channels
kernal_size: int
size of the convolving kernel
padding: int
zero-padding added to both sides of the input
"""
def
__init__
(
self
,
C_in
,
C_out
,
kernel_size
,
padding
):
super
().
__init__
()
self
.
relu
=
nn
.
ReLU
()
self
.
conv
=
SeparableConv
(
C_in
,
C_out
,
kernel_size
,
1
,
padding
)
self
.
bn
=
nn
.
BatchNorm2d
(
C_out
,
affine
=
True
)
def
forward
(
self
,
x
):
x
=
self
.
relu
(
x
)
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
return
x
nni/nas/pytorch/trainer.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
json
import
logging
import
os
import
time
from
abc
import
abstractmethod
import
torch
from
.base_trainer
import
BaseTrainer
_logger
=
logging
.
getLogger
(
__name__
)
class
TorchTensorEncoder
(
json
.
JSONEncoder
):
def
default
(
self
,
o
):
# pylint: disable=method-hidden
if
isinstance
(
o
,
torch
.
Tensor
):
olist
=
o
.
tolist
()
if
"bool"
not
in
o
.
type
().
lower
()
and
all
(
map
(
lambda
d
:
d
==
0
or
d
==
1
,
olist
)):
_logger
.
warning
(
"Every element in %s is either 0 or 1. "
"You might consider convert it into bool."
,
olist
)
return
olist
return
super
().
default
(
o
)
class
Trainer
(
BaseTrainer
):
"""
A trainer with some helper functions implemented. To implement a new trainer,
users need to implement :meth:`train_one_epoch`, :meth:`validate_one_epoch` and :meth:`checkpoint`.
Parameters
----------
model : nn.Module
Model with mutables.
mutator : BaseMutator
A mutator object that has been initialized with the model.
loss : callable
Called with logits and targets. Returns a loss tensor.
See `PyTorch loss functions`_ for examples.
metrics : callable
Called with logits and targets. Returns a dict that maps metrics keys to metrics data. For example,
.. code-block:: python
def metrics_fn(output, target):
return {"acc1": accuracy(output, target, topk=1), "acc5": accuracy(output, target, topk=5)}
optimizer : Optimizer
Optimizer that optimizes the model.
num_epochs : int
Number of epochs of training.
dataset_train : torch.utils.data.Dataset
Dataset of training. If not otherwise specified, ``dataset_train`` and ``dataset_valid`` should be standard
PyTorch Dataset. See `torch.utils.data`_ for examples.
dataset_valid : torch.utils.data.Dataset
Dataset of validation/testing.
batch_size : int
Batch size.
workers : int
Number of workers used in data preprocessing.
device : torch.device
Device object. Either ``torch.device("cuda")`` or ``torch.device("cpu")``. When ``None``, trainer will
automatic detects GPU and selects GPU first.
log_frequency : int
Number of mini-batches to log metrics.
callbacks : list of Callback
Callbacks to plug into the trainer. See Callbacks.
.. _`PyTorch loss functions`: https://pytorch.org/docs/stable/nn.html#loss-functions
.. _`torch.utils.data`: https://pytorch.org/docs/stable/data.html
"""
def
__init__
(
self
,
model
,
mutator
,
loss
,
metrics
,
optimizer
,
num_epochs
,
dataset_train
,
dataset_valid
,
batch_size
,
workers
,
device
,
log_frequency
,
callbacks
):
self
.
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
if
device
is
None
else
device
self
.
model
=
model
self
.
mutator
=
mutator
self
.
loss
=
loss
self
.
metrics
=
metrics
self
.
optimizer
=
optimizer
self
.
model
.
to
(
self
.
device
)
self
.
mutator
.
to
(
self
.
device
)
self
.
loss
.
to
(
self
.
device
)
self
.
num_epochs
=
num_epochs
self
.
dataset_train
=
dataset_train
self
.
dataset_valid
=
dataset_valid
self
.
batch_size
=
batch_size
self
.
workers
=
workers
self
.
log_frequency
=
log_frequency
self
.
log_dir
=
os
.
path
.
join
(
"logs"
,
str
(
time
.
time
()))
os
.
makedirs
(
self
.
log_dir
,
exist_ok
=
True
)
self
.
status_writer
=
open
(
os
.
path
.
join
(
self
.
log_dir
,
"log"
),
"w"
)
self
.
callbacks
=
callbacks
if
callbacks
is
not
None
else
[]
for
callback
in
self
.
callbacks
:
callback
.
build
(
self
.
model
,
self
.
mutator
,
self
)
@
abstractmethod
def
train_one_epoch
(
self
,
epoch
):
"""
Train one epoch.
Parameters
----------
epoch : int
Epoch number starting from 0.
"""
pass
@
abstractmethod
def
validate_one_epoch
(
self
,
epoch
):
"""
Validate one epoch.
Parameters
----------
epoch : int
Epoch number starting from 0.
"""
pass
def
train
(
self
,
validate
=
True
):
"""
Train ``num_epochs``.
Trigger callbacks at the start and the end of each epoch.
Parameters
----------
validate : bool
If ``true``, will do validation every epoch.
"""
for
epoch
in
range
(
self
.
num_epochs
):
for
callback
in
self
.
callbacks
:
callback
.
on_epoch_begin
(
epoch
)
# training
_logger
.
info
(
"Epoch %d Training"
,
epoch
+
1
)
self
.
train_one_epoch
(
epoch
)
if
validate
:
# validation
_logger
.
info
(
"Epoch %d Validating"
,
epoch
+
1
)
self
.
validate_one_epoch
(
epoch
)
for
callback
in
self
.
callbacks
:
callback
.
on_epoch_end
(
epoch
)
def
validate
(
self
):
"""
Do one validation.
"""
self
.
validate_one_epoch
(
-
1
)
def
export
(
self
,
file
):
"""
Call ``mutator.export()`` and dump the architecture to ``file``.
Parameters
----------
file : str
A file path. Expected to be a JSON.
"""
mutator_export
=
self
.
mutator
.
export
()
with
open
(
file
,
"w"
)
as
f
:
json
.
dump
(
mutator_export
,
f
,
indent
=
2
,
sort_keys
=
True
,
cls
=
TorchTensorEncoder
)
def
checkpoint
(
self
):
"""
Return trainer checkpoint.
"""
raise
NotImplementedError
(
"Not implemented yet"
)
def
enable_visualization
(
self
):
"""
Enable visualization. Write graph and training log to folder ``logs/<timestamp>``.
"""
sample
=
None
for
x
,
_
in
self
.
train_loader
:
sample
=
x
.
to
(
self
.
device
)[:
2
]
break
if
sample
is
None
:
_logger
.
warning
(
"Sample is %s."
,
sample
)
_logger
.
info
(
"Creating graph json, writing to %s. Visualization enabled."
,
self
.
log_dir
)
with
open
(
os
.
path
.
join
(
self
.
log_dir
,
"graph.json"
),
"w"
)
as
f
:
json
.
dump
(
self
.
mutator
.
graph
(
sample
),
f
)
self
.
visualization_enabled
=
True
def
_write_graph_status
(
self
):
if
hasattr
(
self
,
"visualization_enabled"
)
and
self
.
visualization_enabled
:
print
(
json
.
dumps
(
self
.
mutator
.
status
()),
file
=
self
.
status_writer
,
flush
=
True
)
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment