Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
867871b2
Unverified
Commit
867871b2
authored
Jul 27, 2022
by
Yuge Zhang
Committed by
GitHub
Jul 27, 2022
Browse files
Promote Retiarii to NAS (step 1) - move files (#5020)
parent
481aa292
Changes
137
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
0 additions
and
2270 deletions
+0
-2270
nni/algorithms/nas/pytorch/pdarts/__init__.py
nni/algorithms/nas/pytorch/pdarts/__init__.py
+0
-4
nni/algorithms/nas/pytorch/pdarts/mutator.py
nni/algorithms/nas/pytorch/pdarts/mutator.py
+0
-93
nni/algorithms/nas/pytorch/pdarts/trainer.py
nni/algorithms/nas/pytorch/pdarts/trainer.py
+0
-86
nni/algorithms/nas/pytorch/proxylessnas/__init__.py
nni/algorithms/nas/pytorch/proxylessnas/__init__.py
+0
-5
nni/algorithms/nas/pytorch/proxylessnas/mutator.py
nni/algorithms/nas/pytorch/proxylessnas/mutator.py
+0
-478
nni/algorithms/nas/pytorch/proxylessnas/trainer.py
nni/algorithms/nas/pytorch/proxylessnas/trainer.py
+0
-500
nni/algorithms/nas/pytorch/proxylessnas/utils.py
nni/algorithms/nas/pytorch/proxylessnas/utils.py
+0
-78
nni/algorithms/nas/pytorch/random/__init__.py
nni/algorithms/nas/pytorch/random/__init__.py
+0
-4
nni/algorithms/nas/pytorch/random/mutator.py
nni/algorithms/nas/pytorch/random/mutator.py
+0
-39
nni/algorithms/nas/pytorch/spos/__init__.py
nni/algorithms/nas/pytorch/spos/__init__.py
+0
-6
nni/algorithms/nas/pytorch/spos/evolution.py
nni/algorithms/nas/pytorch/spos/evolution.py
+0
-223
nni/algorithms/nas/pytorch/spos/mutator.py
nni/algorithms/nas/pytorch/spos/mutator.py
+0
-66
nni/algorithms/nas/pytorch/spos/trainer.py
nni/algorithms/nas/pytorch/spos/trainer.py
+0
-95
nni/algorithms/nas/tensorflow/__init__.py
nni/algorithms/nas/tensorflow/__init__.py
+0
-0
nni/algorithms/nas/tensorflow/classic_nas/__init__.py
nni/algorithms/nas/tensorflow/classic_nas/__init__.py
+0
-4
nni/algorithms/nas/tensorflow/classic_nas/mutator.py
nni/algorithms/nas/tensorflow/classic_nas/mutator.py
+0
-217
nni/algorithms/nas/tensorflow/enas/__init__.py
nni/algorithms/nas/tensorflow/enas/__init__.py
+0
-5
nni/algorithms/nas/tensorflow/enas/mutator.py
nni/algorithms/nas/tensorflow/enas/mutator.py
+0
-162
nni/algorithms/nas/tensorflow/enas/trainer.py
nni/algorithms/nas/tensorflow/enas/trainer.py
+0
-205
nni/nas/evaluator/functional.py
nni/nas/evaluator/functional.py
+0
-0
No files found.
nni/algorithms/nas/pytorch/pdarts/__init__.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
.trainer
import
PdartsTrainer
nni/algorithms/nas/pytorch/pdarts/mutator.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
copy
import
numpy
as
np
import
torch
from
torch
import
nn
from
nni.algorithms.nas.pytorch.darts
import
DartsMutator
from
nni.nas.pytorch.mutables
import
LayerChoice
class
PdartsMutator
(
DartsMutator
):
"""
It works with PdartsTrainer to calculate ops weights,
and drop weights in different PDARTS epochs.
"""
def
__init__
(
self
,
model
,
pdarts_epoch_index
,
pdarts_num_to_drop
,
switches
=
{}):
self
.
pdarts_epoch_index
=
pdarts_epoch_index
self
.
pdarts_num_to_drop
=
pdarts_num_to_drop
if
switches
is
None
:
self
.
switches
=
{}
else
:
self
.
switches
=
switches
super
(
PdartsMutator
,
self
).
__init__
(
model
)
# this loop go through mutables with different keys,
# it's mainly to update length of choices.
for
mutable
in
self
.
mutables
:
if
isinstance
(
mutable
,
LayerChoice
):
switches
=
self
.
switches
.
get
(
mutable
.
key
,
[
True
for
j
in
range
(
len
(
mutable
))])
choices
=
self
.
choices
[
mutable
.
key
]
operations_count
=
np
.
sum
(
switches
)
# +1 and -1 are caused by zero operation in darts network
# the zero operation is not in choices list in network, but its weight are in,
# so it needs one more weights and switch for zero.
self
.
choices
[
mutable
.
key
]
=
nn
.
Parameter
(
1.0E-3
*
torch
.
randn
(
operations_count
+
1
))
self
.
switches
[
mutable
.
key
]
=
switches
# update LayerChoice instances in model,
# it's physically remove dropped choices operations.
for
module
in
self
.
model
.
modules
():
if
isinstance
(
module
,
LayerChoice
):
switches
=
self
.
switches
.
get
(
module
.
key
)
choices
=
self
.
choices
[
module
.
key
]
if
len
(
module
)
>
len
(
choices
):
# from last to first, so that it won't effect previous indexes after removed one.
for
index
in
range
(
len
(
switches
)
-
1
,
-
1
,
-
1
):
if
switches
[
index
]
==
False
:
del
module
[
index
]
assert
len
(
module
)
<=
len
(
choices
),
"Failed to remove dropped choices."
def
export
(
self
):
# Cannot rely on super().export() because P-DARTS has deleted some of the choices and has misaligned length.
results
=
super
().
sample_final
()
for
mutable
in
self
.
mutables
:
if
isinstance
(
mutable
,
LayerChoice
):
# As some operations are dropped physically,
# so it needs to fill back false to track dropped operations.
trained_result
=
results
[
mutable
.
key
]
trained_index
=
0
switches
=
self
.
switches
[
mutable
.
key
]
result
=
torch
.
Tensor
(
switches
).
bool
()
for
index
in
range
(
len
(
result
)):
if
result
[
index
]:
result
[
index
]
=
trained_result
[
trained_index
]
trained_index
+=
1
results
[
mutable
.
key
]
=
result
return
results
def
drop_paths
(
self
):
"""
This method is called when a PDARTS epoch is finished.
It prepares switches for next epoch.
candidate operations with False switch will be doppped in next epoch.
"""
all_switches
=
copy
.
deepcopy
(
self
.
switches
)
for
key
in
all_switches
:
switches
=
all_switches
[
key
]
idxs
=
[]
for
j
in
range
(
len
(
switches
)):
if
switches
[
j
]:
idxs
.
append
(
j
)
sorted_weights
=
self
.
choices
[
key
].
data
.
cpu
().
numpy
()[:
-
1
]
drop
=
np
.
argsort
(
sorted_weights
)[:
self
.
pdarts_num_to_drop
[
self
.
pdarts_epoch_index
]]
for
idx
in
drop
:
switches
[
idxs
[
idx
]]
=
False
return
all_switches
nni/algorithms/nas/pytorch/pdarts/trainer.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
json
import
logging
from
nni.nas.pytorch.callbacks
import
LRSchedulerCallback
from
nni.algorithms.nas.pytorch.darts
import
DartsTrainer
from
nni.nas.pytorch.trainer
import
BaseTrainer
,
TorchTensorEncoder
from
.mutator
import
PdartsMutator
logger
=
logging
.
getLogger
(
__name__
)
class
PdartsTrainer
(
BaseTrainer
):
"""
This trainer implements the PDARTS algorithm.
PDARTS bases on DARTS algorithm, and provides a network growth approach to find deeper and better network.
This class relies on pdarts_num_layers and pdarts_num_to_drop parameters to control how network grows.
pdarts_num_layers means how many layers more than first epoch.
pdarts_num_to_drop means how many candidate operations should be dropped in each epoch.
So that the grew network can in similar size.
"""
def
__init__
(
self
,
model_creator
,
init_layers
,
metrics
,
num_epochs
,
dataset_train
,
dataset_valid
,
pdarts_num_layers
=
[
0
,
6
,
12
],
pdarts_num_to_drop
=
[
3
,
2
,
1
],
mutator
=
None
,
batch_size
=
64
,
workers
=
4
,
device
=
None
,
log_frequency
=
None
,
callbacks
=
None
,
unrolled
=
False
):
super
(
PdartsTrainer
,
self
).
__init__
()
self
.
model_creator
=
model_creator
self
.
init_layers
=
init_layers
self
.
pdarts_num_layers
=
pdarts_num_layers
self
.
pdarts_num_to_drop
=
pdarts_num_to_drop
self
.
pdarts_epoch
=
len
(
pdarts_num_to_drop
)
self
.
darts_parameters
=
{
"metrics"
:
metrics
,
"num_epochs"
:
num_epochs
,
"dataset_train"
:
dataset_train
,
"dataset_valid"
:
dataset_valid
,
"batch_size"
:
batch_size
,
"workers"
:
workers
,
"device"
:
device
,
"log_frequency"
:
log_frequency
,
"unrolled"
:
unrolled
}
self
.
callbacks
=
callbacks
if
callbacks
is
not
None
else
[]
def
train
(
self
):
switches
=
None
for
epoch
in
range
(
self
.
pdarts_epoch
):
layers
=
self
.
init_layers
+
self
.
pdarts_num_layers
[
epoch
]
model
,
criterion
,
optim
,
lr_scheduler
=
self
.
model_creator
(
layers
)
self
.
mutator
=
PdartsMutator
(
model
,
epoch
,
self
.
pdarts_num_to_drop
,
switches
)
for
callback
in
self
.
callbacks
:
callback
.
build
(
model
,
self
.
mutator
,
self
)
callback
.
on_epoch_begin
(
epoch
)
darts_callbacks
=
[]
if
lr_scheduler
is
not
None
:
darts_callbacks
.
append
(
LRSchedulerCallback
(
lr_scheduler
))
self
.
trainer
=
DartsTrainer
(
model
,
mutator
=
self
.
mutator
,
loss
=
criterion
,
optimizer
=
optim
,
callbacks
=
darts_callbacks
,
**
self
.
darts_parameters
)
logger
.
info
(
"start pdarts training epoch %s..."
,
epoch
)
self
.
trainer
.
train
()
switches
=
self
.
mutator
.
drop_paths
()
for
callback
in
self
.
callbacks
:
callback
.
on_epoch_end
(
epoch
)
def
validate
(
self
):
self
.
trainer
.
validate
()
def
export
(
self
,
file
):
mutator_export
=
self
.
mutator
.
export
()
with
open
(
file
,
"w"
)
as
f
:
json
.
dump
(
mutator_export
,
f
,
indent
=
2
,
sort_keys
=
True
,
cls
=
TorchTensorEncoder
)
def
checkpoint
(
self
):
raise
NotImplementedError
(
"Not implemented yet"
)
nni/algorithms/nas/pytorch/proxylessnas/__init__.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
.mutator
import
ProxylessNasMutator
from
.trainer
import
ProxylessNasTrainer
nni/algorithms/nas/pytorch/proxylessnas/mutator.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
math
import
torch
from
torch
import
nn
as
nn
from
torch.nn
import
functional
as
F
import
numpy
as
np
from
nni.nas.pytorch.base_mutator
import
BaseMutator
from
nni.nas.pytorch.mutables
import
LayerChoice
from
.utils
import
detach_variable
class
ArchGradientFunction
(
torch
.
autograd
.
Function
):
@
staticmethod
def
forward
(
ctx
,
x
,
binary_gates
,
run_func
,
backward_func
):
ctx
.
run_func
=
run_func
ctx
.
backward_func
=
backward_func
detached_x
=
detach_variable
(
x
)
with
torch
.
enable_grad
():
output
=
run_func
(
detached_x
)
ctx
.
save_for_backward
(
detached_x
,
output
)
return
output
.
data
@
staticmethod
def
backward
(
ctx
,
grad_output
):
detached_x
,
output
=
ctx
.
saved_tensors
grad_x
=
torch
.
autograd
.
grad
(
output
,
detached_x
,
grad_output
,
only_inputs
=
True
)
# compute gradients w.r.t. binary_gates
binary_grads
=
ctx
.
backward_func
(
detached_x
.
data
,
output
.
data
,
grad_output
.
data
)
return
grad_x
[
0
],
binary_grads
,
None
,
None
class
MixedOp
(
nn
.
Module
):
"""
This class is to instantiate and manage info of one LayerChoice.
It includes architecture weights, binary weights, and member functions
operating the weights.
forward_mode:
forward/backward mode for LayerChoice: None, two, full, and full_v2.
For training architecture weights, we use full_v2 by default, and for training
model weights, we use None.
"""
forward_mode
=
None
def
__init__
(
self
,
mutable
):
"""
Parameters
----------
mutable : LayerChoice
A LayerChoice in user model
"""
super
(
MixedOp
,
self
).
__init__
()
self
.
ap_path_alpha
=
nn
.
Parameter
(
torch
.
Tensor
(
len
(
mutable
)))
self
.
ap_path_wb
=
nn
.
Parameter
(
torch
.
Tensor
(
len
(
mutable
)))
self
.
ap_path_alpha
.
requires_grad
=
False
self
.
ap_path_wb
.
requires_grad
=
False
self
.
active_index
=
[
0
]
self
.
inactive_index
=
None
self
.
log_prob
=
None
self
.
current_prob_over_ops
=
None
self
.
n_choices
=
len
(
mutable
)
def
get_ap_path_alpha
(
self
):
return
self
.
ap_path_alpha
def
to_requires_grad
(
self
):
self
.
ap_path_alpha
.
requires_grad
=
True
self
.
ap_path_wb
.
requires_grad
=
True
def
to_disable_grad
(
self
):
self
.
ap_path_alpha
.
requires_grad
=
False
self
.
ap_path_wb
.
requires_grad
=
False
def
forward
(
self
,
mutable
,
x
):
"""
Define forward of LayerChoice. For 'full_v2', backward is also defined.
The 'two' mode is explained in section 3.2.1 in the paper.
The 'full_v2' mode is explained in Appendix D in the paper.
Parameters
----------
mutable : LayerChoice
this layer's mutable
x : tensor
inputs of this layer, only support one input
Returns
-------
output: tensor
output of this layer
"""
if
MixedOp
.
forward_mode
==
'full'
or
MixedOp
.
forward_mode
==
'two'
:
output
=
0
for
_i
in
self
.
active_index
:
oi
=
self
.
candidate_ops
[
_i
](
x
)
output
=
output
+
self
.
ap_path_wb
[
_i
]
*
oi
for
_i
in
self
.
inactive_index
:
oi
=
self
.
candidate_ops
[
_i
](
x
)
output
=
output
+
self
.
ap_path_wb
[
_i
]
*
oi
.
detach
()
elif
MixedOp
.
forward_mode
==
'full_v2'
:
def
run_function
(
key
,
candidate_ops
,
active_id
):
def
forward
(
_x
):
return
candidate_ops
[
active_id
](
_x
)
return
forward
def
backward_function
(
key
,
candidate_ops
,
active_id
,
binary_gates
):
def
backward
(
_x
,
_output
,
grad_output
):
binary_grads
=
torch
.
zeros_like
(
binary_gates
.
data
)
with
torch
.
no_grad
():
for
k
in
range
(
len
(
candidate_ops
)):
if
k
!=
active_id
:
out_k
=
candidate_ops
[
k
](
_x
.
data
)
else
:
out_k
=
_output
.
data
grad_k
=
torch
.
sum
(
out_k
*
grad_output
)
binary_grads
[
k
]
=
grad_k
return
binary_grads
return
backward
output
=
ArchGradientFunction
.
apply
(
x
,
self
.
ap_path_wb
,
run_function
(
mutable
.
key
,
list
(
mutable
),
self
.
active_index
[
0
]),
backward_function
(
mutable
.
key
,
list
(
mutable
),
self
.
active_index
[
0
],
self
.
ap_path_wb
))
else
:
output
=
self
.
active_op
(
mutable
)(
x
)
return
output
@
property
def
probs_over_ops
(
self
):
"""
Apply softmax on alpha to generate probability distribution
Returns
-------
pytorch tensor
probability distribution
"""
probs
=
F
.
softmax
(
self
.
ap_path_alpha
,
dim
=
0
)
# softmax to probability
return
probs
@
property
def
chosen_index
(
self
):
"""
choose the op with max prob
Returns
-------
int
index of the chosen one
numpy.float32
prob of the chosen one
"""
probs
=
self
.
probs_over_ops
.
data
.
cpu
().
numpy
()
index
=
int
(
np
.
argmax
(
probs
))
return
index
,
probs
[
index
]
def
active_op
(
self
,
mutable
):
"""
assume only one path is active
Returns
-------
PyTorch module
the chosen operation
"""
return
mutable
[
self
.
active_index
[
0
]]
@
property
def
active_op_index
(
self
):
"""
return active op's index, the active op is sampled
Returns
-------
int
index of the active op
"""
return
self
.
active_index
[
0
]
def
set_chosen_op_active
(
self
):
"""
set chosen index, active and inactive indexes
"""
chosen_idx
,
_
=
self
.
chosen_index
self
.
active_index
=
[
chosen_idx
]
self
.
inactive_index
=
[
_i
for
_i
in
range
(
0
,
chosen_idx
)]
+
\
[
_i
for
_i
in
range
(
chosen_idx
+
1
,
self
.
n_choices
)]
def
binarize
(
self
,
mutable
):
"""
Sample based on alpha, and set binary weights accordingly.
ap_path_wb is set in this function, which is called binarize.
Parameters
----------
mutable : LayerChoice
this layer's mutable
"""
self
.
log_prob
=
None
# reset binary gates
self
.
ap_path_wb
.
data
.
zero_
()
probs
=
self
.
probs_over_ops
if
MixedOp
.
forward_mode
==
'two'
:
# sample two ops according to probs
sample_op
=
torch
.
multinomial
(
probs
.
data
,
2
,
replacement
=
False
)
probs_slice
=
F
.
softmax
(
torch
.
stack
([
self
.
ap_path_alpha
[
idx
]
for
idx
in
sample_op
]),
dim
=
0
)
self
.
current_prob_over_ops
=
torch
.
zeros_like
(
probs
)
for
i
,
idx
in
enumerate
(
sample_op
):
self
.
current_prob_over_ops
[
idx
]
=
probs_slice
[
i
]
# choose one to be active and the other to be inactive according to probs_slice
c
=
torch
.
multinomial
(
probs_slice
.
data
,
1
)[
0
]
# 0 or 1
active_op
=
sample_op
[
c
].
item
()
inactive_op
=
sample_op
[
1
-
c
].
item
()
self
.
active_index
=
[
active_op
]
self
.
inactive_index
=
[
inactive_op
]
# set binary gate
self
.
ap_path_wb
.
data
[
active_op
]
=
1.0
else
:
sample
=
torch
.
multinomial
(
probs
,
1
)[
0
].
item
()
self
.
active_index
=
[
sample
]
self
.
inactive_index
=
[
_i
for
_i
in
range
(
0
,
sample
)]
+
\
[
_i
for
_i
in
range
(
sample
+
1
,
len
(
mutable
))]
self
.
log_prob
=
torch
.
log
(
probs
[
sample
])
self
.
current_prob_over_ops
=
probs
self
.
ap_path_wb
.
data
[
sample
]
=
1.0
# avoid over-regularization
for
choice
in
mutable
:
for
_
,
param
in
choice
.
named_parameters
():
param
.
grad
=
None
@
staticmethod
def
delta_ij
(
i
,
j
):
if
i
==
j
:
return
1
else
:
return
0
def
set_arch_param_grad
(
self
,
mutable
):
"""
Calculate alpha gradient for this LayerChoice.
It is calculated using gradient of binary gate, probs of ops.
"""
binary_grads
=
self
.
ap_path_wb
.
grad
.
data
if
self
.
active_op
(
mutable
).
is_zero_layer
():
self
.
ap_path_alpha
.
grad
=
None
return
if
self
.
ap_path_alpha
.
grad
is
None
:
self
.
ap_path_alpha
.
grad
=
torch
.
zeros_like
(
self
.
ap_path_alpha
.
data
)
if
MixedOp
.
forward_mode
==
'two'
:
involved_idx
=
self
.
active_index
+
self
.
inactive_index
probs_slice
=
F
.
softmax
(
torch
.
stack
([
self
.
ap_path_alpha
[
idx
]
for
idx
in
involved_idx
]),
dim
=
0
).
data
for
i
in
range
(
2
):
for
j
in
range
(
2
):
origin_i
=
involved_idx
[
i
]
origin_j
=
involved_idx
[
j
]
self
.
ap_path_alpha
.
grad
.
data
[
origin_i
]
+=
\
binary_grads
[
origin_j
]
*
probs_slice
[
j
]
*
(
MixedOp
.
delta_ij
(
i
,
j
)
-
probs_slice
[
i
])
for
_i
,
idx
in
enumerate
(
self
.
active_index
):
self
.
active_index
[
_i
]
=
(
idx
,
self
.
ap_path_alpha
.
data
[
idx
].
item
())
for
_i
,
idx
in
enumerate
(
self
.
inactive_index
):
self
.
inactive_index
[
_i
]
=
(
idx
,
self
.
ap_path_alpha
.
data
[
idx
].
item
())
else
:
probs
=
self
.
probs_over_ops
.
data
for
i
in
range
(
self
.
n_choices
):
for
j
in
range
(
self
.
n_choices
):
self
.
ap_path_alpha
.
grad
.
data
[
i
]
+=
binary_grads
[
j
]
*
probs
[
j
]
*
(
MixedOp
.
delta_ij
(
i
,
j
)
-
probs
[
i
])
return
def
rescale_updated_arch_param
(
self
):
"""
rescale architecture weights for the 'two' mode.
"""
if
not
isinstance
(
self
.
active_index
[
0
],
tuple
):
assert
self
.
active_op
.
is_zero_layer
()
return
involved_idx
=
[
idx
for
idx
,
_
in
(
self
.
active_index
+
self
.
inactive_index
)]
old_alphas
=
[
alpha
for
_
,
alpha
in
(
self
.
active_index
+
self
.
inactive_index
)]
new_alphas
=
[
self
.
ap_path_alpha
.
data
[
idx
]
for
idx
in
involved_idx
]
offset
=
math
.
log
(
sum
([
math
.
exp
(
alpha
)
for
alpha
in
new_alphas
])
/
sum
([
math
.
exp
(
alpha
)
for
alpha
in
old_alphas
])
)
for
idx
in
involved_idx
:
self
.
ap_path_alpha
.
data
[
idx
]
-=
offset
class
ProxylessNasMutator
(
BaseMutator
):
"""
This mutator initializes and operates all the LayerChoices of the input model.
It is for the corresponding trainer to control the training process of LayerChoices,
coordinating with whole training process.
"""
def
__init__
(
self
,
model
):
"""
Init a MixedOp instance for each mutable i.e., LayerChoice.
And register the instantiated MixedOp in corresponding LayerChoice.
If does not register it in LayerChoice, DataParallel does not work then,
because architecture weights are not included in the DataParallel model.
When MixedOPs are registered, we use ```requires_grad``` to control
whether calculate gradients of architecture weights.
Parameters
----------
model : pytorch model
The model that users want to tune, it includes search space defined with nni nas apis
"""
super
(
ProxylessNasMutator
,
self
).
__init__
(
model
)
self
.
_unused_modules
=
None
self
.
mutable_list
=
[]
for
mutable
in
self
.
undedup_mutables
:
self
.
mutable_list
.
append
(
mutable
)
mutable
.
registered_module
=
MixedOp
(
mutable
)
def
on_forward_layer_choice
(
self
,
mutable
,
*
args
,
**
kwargs
):
"""
Callback of layer choice forward. This function defines the forward
logic of the input mutable. So mutable is only interface, its real
implementation is defined in mutator.
Parameters
----------
mutable: LayerChoice
forward logic of this input mutable
args: list of torch.Tensor
inputs of this mutable
kwargs: dict
inputs of this mutable
Returns
-------
torch.Tensor
output of this mutable, i.e., LayerChoice
int
index of the chosen op
"""
# FIXME: return mask, to be consistent with other algorithms
idx
=
mutable
.
registered_module
.
active_op_index
return
mutable
.
registered_module
(
mutable
,
*
args
,
**
kwargs
),
idx
def
reset_binary_gates
(
self
):
"""
For each LayerChoice, binarize binary weights
based on alpha to only activate one op.
It traverses all the mutables in the model to do this.
"""
for
mutable
in
self
.
undedup_mutables
:
mutable
.
registered_module
.
binarize
(
mutable
)
def
set_chosen_op_active
(
self
):
"""
For each LayerChoice, set the op with highest alpha as the chosen op.
Usually used for validation.
"""
for
mutable
in
self
.
undedup_mutables
:
mutable
.
registered_module
.
set_chosen_op_active
()
def
num_arch_params
(
self
):
"""
The number of mutables, i.e., LayerChoice
Returns
-------
int
the number of LayerChoice in user model
"""
return
len
(
self
.
mutable_list
)
def
set_arch_param_grad
(
self
):
"""
For each LayerChoice, calculate gradients for architecture weights, i.e., alpha
"""
for
mutable
in
self
.
undedup_mutables
:
mutable
.
registered_module
.
set_arch_param_grad
(
mutable
)
def
get_architecture_parameters
(
self
):
"""
Get all the architecture parameters.
yield
-----
PyTorch Parameter
Return ap_path_alpha of the traversed mutable
"""
for
mutable
in
self
.
undedup_mutables
:
yield
mutable
.
registered_module
.
get_ap_path_alpha
()
def
change_forward_mode
(
self
,
mode
):
"""
Update forward mode of MixedOps, as training architecture weights and
model weights use different forward modes.
"""
MixedOp
.
forward_mode
=
mode
def
get_forward_mode
(
self
):
"""
Get forward mode of MixedOp
Returns
-------
string
the current forward mode of MixedOp
"""
return
MixedOp
.
forward_mode
def
rescale_updated_arch_param
(
self
):
"""
Rescale architecture weights in 'two' mode.
"""
for
mutable
in
self
.
undedup_mutables
:
mutable
.
registered_module
.
rescale_updated_arch_param
()
def
unused_modules_off
(
self
):
"""
Remove unused modules for each mutables.
The removed modules are kept in ```self._unused_modules``` for resume later.
"""
self
.
_unused_modules
=
[]
for
mutable
in
self
.
undedup_mutables
:
mixed_op
=
mutable
.
registered_module
unused
=
{}
if
self
.
get_forward_mode
()
in
[
'full'
,
'two'
,
'full_v2'
]:
involved_index
=
mixed_op
.
active_index
+
mixed_op
.
inactive_index
else
:
involved_index
=
mixed_op
.
active_index
for
i
in
range
(
mixed_op
.
n_choices
):
if
i
not
in
involved_index
:
unused
[
i
]
=
mutable
[
i
]
mutable
[
i
]
=
None
self
.
_unused_modules
.
append
(
unused
)
def
unused_modules_back
(
self
):
"""
Resume the removed modules back.
"""
if
self
.
_unused_modules
is
None
:
return
for
m
,
unused
in
zip
(
self
.
mutable_list
,
self
.
_unused_modules
):
for
i
in
unused
:
m
[
i
]
=
unused
[
i
]
self
.
_unused_modules
=
None
def
arch_requires_grad
(
self
):
"""
Make architecture weights require gradient
"""
for
mutable
in
self
.
undedup_mutables
:
mutable
.
registered_module
.
to_requires_grad
()
def
arch_disable_grad
(
self
):
"""
Disable gradient of architecture weights, i.e., does not
calcuate gradient for them.
"""
for
mutable
in
self
.
undedup_mutables
:
mutable
.
registered_module
.
to_disable_grad
()
def
sample_final
(
self
):
"""
Generate the final chosen architecture.
Returns
-------
dict
the choice of each mutable, i.e., LayerChoice
"""
result
=
dict
()
for
mutable
in
self
.
undedup_mutables
:
assert
isinstance
(
mutable
,
LayerChoice
)
index
,
_
=
mutable
.
registered_module
.
chosen_index
# pylint: disable=not-callable
result
[
mutable
.
key
]
=
F
.
one_hot
(
torch
.
tensor
(
index
),
num_classes
=
len
(
mutable
)).
view
(
-
1
).
bool
()
return
result
nni/algorithms/nas/pytorch/proxylessnas/trainer.py
deleted
100644 → 0
View file @
481aa292
This diff is collapsed.
Click to expand it.
nni/algorithms/nas/pytorch/proxylessnas/utils.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
torch
import
torch.nn
as
nn
def
detach_variable
(
inputs
):
"""
Detach variables
Parameters
----------
inputs : pytorch tensors
pytorch tensors
"""
if
isinstance
(
inputs
,
tuple
):
return
tuple
([
detach_variable
(
x
)
for
x
in
inputs
])
else
:
x
=
inputs
.
detach
()
x
.
requires_grad
=
inputs
.
requires_grad
return
x
def
cross_entropy_with_label_smoothing
(
pred
,
target
,
label_smoothing
=
0.1
):
"""
Parameters
----------
pred : pytorch tensor
predicted value
target : pytorch tensor
label
label_smoothing : float
the degree of label smoothing
Returns
-------
pytorch tensor
cross entropy
"""
logsoftmax
=
nn
.
LogSoftmax
()
n_classes
=
pred
.
size
(
1
)
# convert to one-hot
target
=
torch
.
unsqueeze
(
target
,
1
)
soft_target
=
torch
.
zeros_like
(
pred
)
soft_target
.
scatter_
(
1
,
target
,
1
)
# label smoothing
soft_target
=
soft_target
*
(
1
-
label_smoothing
)
+
label_smoothing
/
n_classes
return
torch
.
mean
(
torch
.
sum
(
-
soft_target
*
logsoftmax
(
pred
),
1
))
def
accuracy
(
output
,
target
,
topk
=
(
1
,)):
"""
Computes the precision@k for the specified values of k
Parameters
----------
output : pytorch tensor
output, e.g., predicted value
target : pytorch tensor
label
topk : tuple
specify top1 and top5
Returns
-------
list
accuracy of top1 and top5
"""
maxk
=
max
(
topk
)
batch_size
=
target
.
size
(
0
)
_
,
pred
=
output
.
topk
(
maxk
,
1
,
True
,
True
)
pred
=
pred
.
t
()
correct
=
pred
.
eq
(
target
.
view
(
1
,
-
1
).
expand_as
(
pred
))
res
=
[]
for
k
in
topk
:
correct_k
=
correct
[:
k
].
reshape
(
-
1
).
float
().
sum
(
0
,
keepdim
=
True
)
res
.
append
(
correct_k
.
mul_
(
100.0
/
batch_size
))
return
res
nni/algorithms/nas/pytorch/random/__init__.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
.mutator
import
RandomMutator
nni/algorithms/nas/pytorch/random/mutator.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
torch
import
torch.nn.functional
as
F
from
nni.nas.pytorch.mutator
import
Mutator
from
nni.nas.pytorch.mutables
import
LayerChoice
,
InputChoice
class
RandomMutator
(
Mutator
):
"""
Random mutator that samples a random candidate in the search space each time ``reset()``.
It uses random function in PyTorch, so users can set seed in PyTorch to ensure deterministic behavior.
"""
def
sample_search
(
self
):
"""
Sample a random candidate.
"""
result
=
dict
()
for
mutable
in
self
.
mutables
:
if
isinstance
(
mutable
,
LayerChoice
):
gen_index
=
torch
.
randint
(
high
=
len
(
mutable
),
size
=
(
1
,
))
result
[
mutable
.
key
]
=
F
.
one_hot
(
gen_index
,
num_classes
=
len
(
mutable
)).
view
(
-
1
).
bool
()
elif
isinstance
(
mutable
,
InputChoice
):
if
mutable
.
n_chosen
is
None
:
result
[
mutable
.
key
]
=
torch
.
randint
(
high
=
2
,
size
=
(
mutable
.
n_candidates
,)).
view
(
-
1
).
bool
()
else
:
perm
=
torch
.
randperm
(
mutable
.
n_candidates
)
mask
=
[
i
in
perm
[:
mutable
.
n_chosen
]
for
i
in
range
(
mutable
.
n_candidates
)]
result
[
mutable
.
key
]
=
torch
.
tensor
(
mask
,
dtype
=
torch
.
bool
)
# pylint: disable=not-callable
return
result
def
sample_final
(
self
):
"""
Same as :meth:`sample_search`.
"""
return
self
.
sample_search
()
nni/algorithms/nas/pytorch/spos/__init__.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
.evolution
import
SPOSEvolution
from
.mutator
import
SPOSSupernetTrainingMutator
from
.trainer
import
SPOSSupernetTrainer
nni/algorithms/nas/pytorch/spos/evolution.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
json
import
logging
import
os
import
re
from
collections
import
deque
import
numpy
as
np
from
nni.tuner
import
Tuner
from
nni.algorithms.nas.pytorch.classic_nas.mutator
import
LAYER_CHOICE
,
INPUT_CHOICE
_logger
=
logging
.
getLogger
(
__name__
)
class
SPOSEvolution
(
Tuner
):
"""
SPOS evolution tuner.
Parameters
----------
max_epochs : int
Maximum number of epochs to run.
num_select : int
Number of survival candidates of each epoch.
num_population : int
Number of candidates at the start of each epoch. If candidates generated by
crossover and mutation are not enough, the rest will be filled with random
candidates.
m_prob : float
The probability of mutation.
num_crossover : int
Number of candidates generated by crossover in each epoch.
num_mutation : int
Number of candidates generated by mutation in each epoch.
"""
def
__init__
(
self
,
max_epochs
=
20
,
num_select
=
10
,
num_population
=
50
,
m_prob
=
0.1
,
num_crossover
=
25
,
num_mutation
=
25
):
assert
num_population
>=
num_select
self
.
max_epochs
=
max_epochs
self
.
num_select
=
num_select
self
.
num_population
=
num_population
self
.
m_prob
=
m_prob
self
.
num_crossover
=
num_crossover
self
.
num_mutation
=
num_mutation
self
.
epoch
=
0
self
.
candidates
=
[]
self
.
search_space
=
None
self
.
random_state
=
np
.
random
.
RandomState
(
0
)
# async status
self
.
_to_evaluate_queue
=
deque
()
self
.
_sending_parameter_queue
=
deque
()
self
.
_pending_result_ids
=
set
()
self
.
_reward_dict
=
dict
()
self
.
_id2candidate
=
dict
()
self
.
_st_callback
=
None
def
update_search_space
(
self
,
search_space
):
"""
Handle the initialization/update event of search space.
"""
self
.
_search_space
=
search_space
self
.
_next_round
()
def
_next_round
(
self
):
_logger
.
info
(
"Epoch %d, generating..."
,
self
.
epoch
)
if
self
.
epoch
==
0
:
self
.
_get_random_population
()
self
.
export_results
(
self
.
candidates
)
else
:
best_candidates
=
self
.
_select_top_candidates
()
self
.
export_results
(
best_candidates
)
if
self
.
epoch
>=
self
.
max_epochs
:
return
self
.
candidates
=
self
.
_get_mutation
(
best_candidates
)
+
self
.
_get_crossover
(
best_candidates
)
self
.
_get_random_population
()
self
.
epoch
+=
1
def
_random_candidate
(
self
):
chosen_arch
=
dict
()
for
key
,
val
in
self
.
_search_space
.
items
():
if
val
[
"_type"
]
==
LAYER_CHOICE
:
choices
=
val
[
"_value"
]
index
=
self
.
random_state
.
randint
(
len
(
choices
))
chosen_arch
[
key
]
=
{
"_value"
:
choices
[
index
],
"_idx"
:
index
}
elif
val
[
"_type"
]
==
INPUT_CHOICE
:
raise
NotImplementedError
(
"Input choice is not implemented yet."
)
return
chosen_arch
def
_add_to_evaluate_queue
(
self
,
cand
):
_logger
.
info
(
"Generate candidate %s, adding to eval queue."
,
self
.
_get_architecture_repr
(
cand
))
self
.
_reward_dict
[
self
.
_hashcode
(
cand
)]
=
0.
self
.
_to_evaluate_queue
.
append
(
cand
)
def
_get_random_population
(
self
):
while
len
(
self
.
candidates
)
<
self
.
num_population
:
cand
=
self
.
_random_candidate
()
if
self
.
_is_legal
(
cand
):
_logger
.
info
(
"Random candidate generated."
)
self
.
_add_to_evaluate_queue
(
cand
)
self
.
candidates
.
append
(
cand
)
def
_get_crossover
(
self
,
best
):
result
=
[]
for
_
in
range
(
10
*
self
.
num_crossover
):
cand_p1
=
best
[
self
.
random_state
.
randint
(
len
(
best
))]
cand_p2
=
best
[
self
.
random_state
.
randint
(
len
(
best
))]
assert
cand_p1
.
keys
()
==
cand_p2
.
keys
()
cand
=
{
k
:
cand_p1
[
k
]
if
self
.
random_state
.
randint
(
2
)
==
0
else
cand_p2
[
k
]
for
k
in
cand_p1
.
keys
()}
if
self
.
_is_legal
(
cand
):
result
.
append
(
cand
)
self
.
_add_to_evaluate_queue
(
cand
)
if
len
(
result
)
>=
self
.
num_crossover
:
break
_logger
.
info
(
"Found %d architectures with crossover."
,
len
(
result
))
return
result
def
_get_mutation
(
self
,
best
):
result
=
[]
for
_
in
range
(
10
*
self
.
num_mutation
):
cand
=
best
[
self
.
random_state
.
randint
(
len
(
best
))].
copy
()
mutation_sample
=
np
.
random
.
random_sample
(
len
(
cand
))
for
s
,
k
in
zip
(
mutation_sample
,
cand
):
if
s
<
self
.
m_prob
:
choices
=
self
.
_search_space
[
k
][
"_value"
]
index
=
self
.
random_state
.
randint
(
len
(
choices
))
cand
[
k
]
=
{
"_value"
:
choices
[
index
],
"_idx"
:
index
}
if
self
.
_is_legal
(
cand
):
result
.
append
(
cand
)
self
.
_add_to_evaluate_queue
(
cand
)
if
len
(
result
)
>=
self
.
num_mutation
:
break
_logger
.
info
(
"Found %d architectures with mutation."
,
len
(
result
))
return
result
def
_get_architecture_repr
(
self
,
cand
):
return
re
.
sub
(
r
"\".*?\": \{\"_idx\": (\d+), \"_value\": \".*?\"\}"
,
r
"\1"
,
self
.
_hashcode
(
cand
))
def
_is_legal
(
self
,
cand
):
if
self
.
_hashcode
(
cand
)
in
self
.
_reward_dict
:
return
False
return
True
def
_select_top_candidates
(
self
):
reward_query
=
lambda
cand
:
self
.
_reward_dict
[
self
.
_hashcode
(
cand
)]
_logger
.
info
(
"All candidate rewards: %s"
,
list
(
map
(
reward_query
,
self
.
candidates
)))
result
=
sorted
(
self
.
candidates
,
key
=
reward_query
,
reverse
=
True
)[:
self
.
num_select
]
_logger
.
info
(
"Best candidate rewards: %s"
,
list
(
map
(
reward_query
,
result
)))
return
result
@
staticmethod
def
_hashcode
(
d
):
return
json
.
dumps
(
d
,
sort_keys
=
True
)
def
_bind_and_send_parameters
(
self
):
"""
There are two types of resources: parameter ids and candidates. This function is called at
necessary times to bind these resources to send new trials with st_callback.
"""
result
=
[]
while
self
.
_sending_parameter_queue
and
self
.
_to_evaluate_queue
:
parameter_id
=
self
.
_sending_parameter_queue
.
popleft
()
parameters
=
self
.
_to_evaluate_queue
.
popleft
()
self
.
_id2candidate
[
parameter_id
]
=
parameters
result
.
append
(
parameters
)
self
.
_pending_result_ids
.
add
(
parameter_id
)
self
.
_st_callback
(
parameter_id
,
parameters
)
_logger
.
info
(
"Send parameter [%d] %s."
,
parameter_id
,
self
.
_get_architecture_repr
(
parameters
))
return
result
def
generate_multiple_parameters
(
self
,
parameter_id_list
,
**
kwargs
):
"""
Callback function necessary to implement a tuner. This will put more parameter ids into the
parameter id queue.
"""
if
"st_callback"
in
kwargs
and
self
.
_st_callback
is
None
:
self
.
_st_callback
=
kwargs
[
"st_callback"
]
for
parameter_id
in
parameter_id_list
:
self
.
_sending_parameter_queue
.
append
(
parameter_id
)
self
.
_bind_and_send_parameters
()
return
[]
# always not use this. might induce problem of over-sending
def
receive_trial_result
(
self
,
parameter_id
,
parameters
,
value
,
**
kwargs
):
"""
Callback function. Receive a trial result.
"""
_logger
.
info
(
"Candidate %d, reported reward %f"
,
parameter_id
,
value
)
self
.
_reward_dict
[
self
.
_hashcode
(
self
.
_id2candidate
[
parameter_id
])]
=
value
def
trial_end
(
self
,
parameter_id
,
success
,
**
kwargs
):
"""
Callback function when a trial is ended and resource is released.
"""
self
.
_pending_result_ids
.
remove
(
parameter_id
)
if
not
self
.
_pending_result_ids
and
not
self
.
_to_evaluate_queue
:
# a new epoch now
self
.
_next_round
()
assert
self
.
_st_callback
is
not
None
self
.
_bind_and_send_parameters
()
def
export_results
(
self
,
result
):
"""
Export a number of candidates to `checkpoints` dir.
Parameters
----------
result : dict
Chosen architectures to be exported.
"""
os
.
makedirs
(
"checkpoints"
,
exist_ok
=
True
)
for
i
,
cand
in
enumerate
(
result
):
converted
=
dict
()
for
cand_key
,
cand_val
in
cand
.
items
():
onehot
=
[
k
==
cand_val
[
"_idx"
]
for
k
in
range
(
len
(
self
.
_search_space
[
cand_key
][
"_value"
]))]
converted
[
cand_key
]
=
onehot
with
open
(
os
.
path
.
join
(
"checkpoints"
,
"%03d_%03d.json"
%
(
self
.
epoch
,
i
)),
"w"
)
as
fp
:
json
.
dump
(
converted
,
fp
)
nni/algorithms/nas/pytorch/spos/mutator.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
logging
import
numpy
as
np
from
nni.algorithms.nas.pytorch.random
import
RandomMutator
_logger
=
logging
.
getLogger
(
__name__
)
class
SPOSSupernetTrainingMutator
(
RandomMutator
):
"""
A random mutator with flops limit.
Parameters
----------
model : nn.Module
PyTorch model.
flops_func : callable
Callable that takes a candidate from `sample_search` and returns its candidate. When `flops_func`
is None, functions related to flops will be deactivated.
flops_lb : number
Lower bound of flops.
flops_ub : number
Upper bound of flops.
flops_bin_num : number
Number of bins divided for the interval of flops to ensure the uniformity. Bigger number will be more
uniform, but the sampling will be slower.
flops_sample_timeout : int
Maximum number of attempts to sample before giving up and use a random candidate.
"""
def
__init__
(
self
,
model
,
flops_func
=
None
,
flops_lb
=
None
,
flops_ub
=
None
,
flops_bin_num
=
7
,
flops_sample_timeout
=
500
):
super
().
__init__
(
model
)
self
.
_flops_func
=
flops_func
if
self
.
_flops_func
is
not
None
:
self
.
_flops_bin_num
=
flops_bin_num
self
.
_flops_bins
=
[
flops_lb
+
(
flops_ub
-
flops_lb
)
/
flops_bin_num
*
i
for
i
in
range
(
flops_bin_num
+
1
)]
self
.
_flops_sample_timeout
=
flops_sample_timeout
def
sample_search
(
self
):
"""
Sample a candidate for training. When `flops_func` is not None, candidates will be sampled uniformly
relative to flops.
Returns
-------
dict
"""
if
self
.
_flops_func
is
not
None
:
for
times
in
range
(
self
.
_flops_sample_timeout
):
idx
=
np
.
random
.
randint
(
self
.
_flops_bin_num
)
cand
=
super
().
sample_search
()
if
self
.
_flops_bins
[
idx
]
<=
self
.
_flops_func
(
cand
)
<=
self
.
_flops_bins
[
idx
+
1
]:
_logger
.
debug
(
"Sampled candidate flops %f in %d times."
,
cand
,
times
)
return
cand
_logger
.
warning
(
"Failed to sample a flops-valid candidate within %d tries."
,
self
.
_flops_sample_timeout
)
return
super
().
sample_search
()
def
sample_final
(
self
):
"""
Implement only to suffice the interface of Mutator.
"""
return
self
.
sample_search
()
nni/algorithms/nas/pytorch/spos/trainer.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
logging
import
torch
from
nni.nas.pytorch.trainer
import
Trainer
from
nni.nas.pytorch.utils
import
AverageMeterGroup
from
.mutator
import
SPOSSupernetTrainingMutator
logger
=
logging
.
getLogger
(
__name__
)
class
SPOSSupernetTrainer
(
Trainer
):
"""
This trainer trains a supernet that can be used for evolution search.
Parameters
----------
model : nn.Module
Model with mutables.
mutator : nni.nas.pytorch.mutator.Mutator
A mutator object that has been initialized with the model.
loss : callable
Called with logits and targets. Returns a loss tensor.
metrics : callable
Returns a dict that maps metrics keys to metrics data.
optimizer : Optimizer
Optimizer that optimizes the model.
num_epochs : int
Number of epochs of training.
train_loader : iterable
Data loader of training. Raise ``StopIteration`` when one epoch is exhausted.
dataset_valid : iterable
Data loader of validation. Raise ``StopIteration`` when one epoch is exhausted.
batch_size : int
Batch size.
workers: int
Number of threads for data preprocessing. Not used for this trainer. Maybe removed in future.
device : torch.device
Device object. Either ``torch.device("cuda")`` or ``torch.device("cpu")``. When ``None``, trainer will
automatic detects GPU and selects GPU first.
log_frequency : int
Number of mini-batches to log metrics.
callbacks : list of Callback
Callbacks to plug into the trainer. See Callbacks.
"""
def
__init__
(
self
,
model
,
loss
,
metrics
,
optimizer
,
num_epochs
,
train_loader
,
valid_loader
,
mutator
=
None
,
batch_size
=
64
,
workers
=
4
,
device
=
None
,
log_frequency
=
None
,
callbacks
=
None
):
assert
torch
.
cuda
.
is_available
()
super
().
__init__
(
model
,
mutator
if
mutator
is
not
None
else
SPOSSupernetTrainingMutator
(
model
),
loss
,
metrics
,
optimizer
,
num_epochs
,
None
,
None
,
batch_size
,
workers
,
device
,
log_frequency
,
callbacks
)
self
.
train_loader
=
train_loader
self
.
valid_loader
=
valid_loader
def
train_one_epoch
(
self
,
epoch
):
self
.
model
.
train
()
meters
=
AverageMeterGroup
()
for
step
,
(
x
,
y
)
in
enumerate
(
self
.
train_loader
):
x
,
y
=
x
.
to
(
self
.
device
),
y
.
to
(
self
.
device
)
self
.
optimizer
.
zero_grad
()
self
.
mutator
.
reset
()
logits
=
self
.
model
(
x
)
loss
=
self
.
loss
(
logits
,
y
)
loss
.
backward
()
self
.
optimizer
.
step
()
metrics
=
self
.
metrics
(
logits
,
y
)
metrics
[
"loss"
]
=
loss
.
item
()
meters
.
update
(
metrics
)
if
self
.
log_frequency
is
not
None
and
step
%
self
.
log_frequency
==
0
:
logger
.
info
(
"Epoch [%s/%s] Step [%s/%s] %s"
,
epoch
+
1
,
self
.
num_epochs
,
step
+
1
,
len
(
self
.
train_loader
),
meters
)
def
validate_one_epoch
(
self
,
epoch
):
self
.
model
.
eval
()
meters
=
AverageMeterGroup
()
with
torch
.
no_grad
():
for
step
,
(
x
,
y
)
in
enumerate
(
self
.
valid_loader
):
x
,
y
=
x
.
to
(
self
.
device
),
y
.
to
(
self
.
device
)
self
.
mutator
.
reset
()
logits
=
self
.
model
(
x
)
loss
=
self
.
loss
(
logits
,
y
)
metrics
=
self
.
metrics
(
logits
,
y
)
metrics
[
"loss"
]
=
loss
.
item
()
meters
.
update
(
metrics
)
if
self
.
log_frequency
is
not
None
and
step
%
self
.
log_frequency
==
0
:
logger
.
info
(
"Epoch [%s/%s] Validation Step [%s/%s] %s"
,
epoch
+
1
,
self
.
num_epochs
,
step
+
1
,
len
(
self
.
valid_loader
),
meters
)
nni/algorithms/nas/tensorflow/__init__.py
deleted
100644 → 0
View file @
481aa292
nni/algorithms/nas/tensorflow/classic_nas/__init__.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
.mutator
import
get_and_apply_next_architecture
nni/algorithms/nas/tensorflow/classic_nas/mutator.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
# pylint: skip-file
import
json
import
logging
import
os
import
sys
import
tensorflow
as
tf
import
nni
from
nni.runtime.env_vars
import
trial_env_vars
from
nni.nas.tensorflow.mutables
import
LayerChoice
,
InputChoice
,
MutableScope
from
nni.nas.tensorflow.mutator
import
Mutator
logger
=
logging
.
getLogger
(
__name__
)
NNI_GEN_SEARCH_SPACE
=
"NNI_GEN_SEARCH_SPACE"
LAYER_CHOICE
=
"layer_choice"
INPUT_CHOICE
=
"input_choice"
def
get_and_apply_next_architecture
(
model
):
"""
Wrapper of :class:`~nni.nas.tensorflow.classic_nas.mutator.ClassicMutator` to make it more meaningful,
similar to ``get_next_parameter`` for HPO.
Tt will generate search space based on ``model``.
If env ``NNI_GEN_SEARCH_SPACE`` exists, this is in dry run mode for
generating search space for the experiment.
If not, there are still two mode, one is nni experiment mode where users
use ``nnictl`` to start an experiment. The other is standalone mode
where users directly run the trial command, this mode chooses the first
one(s) for each LayerChoice and InputChoice.
Parameters
----------
model : nn.Module
User's model with search space (e.g., LayerChoice, InputChoice) embedded in it.
"""
ClassicMutator
(
model
)
class
ClassicMutator
(
Mutator
):
"""
This mutator is to apply the architecture chosen from tuner.
It implements the forward function of LayerChoice and InputChoice,
to only activate the chosen ones.
Parameters
----------
model : nn.Module
User's model with search space (e.g., LayerChoice, InputChoice) embedded in it.
"""
def
__init__
(
self
,
model
):
super
(
ClassicMutator
,
self
).
__init__
(
model
)
self
.
_chosen_arch
=
{}
self
.
_search_space
=
self
.
_generate_search_space
()
if
NNI_GEN_SEARCH_SPACE
in
os
.
environ
:
# dry run for only generating search space
self
.
_dump_search_space
(
os
.
environ
[
NNI_GEN_SEARCH_SPACE
])
sys
.
exit
(
0
)
if
trial_env_vars
.
NNI_PLATFORM
is
None
:
logger
.
warning
(
"This is in standalone mode, the chosen are the first one(s)."
)
self
.
_chosen_arch
=
self
.
_standalone_generate_chosen
()
else
:
# get chosen arch from tuner
self
.
_chosen_arch
=
nni
.
get_next_parameter
()
if
self
.
_chosen_arch
is
None
:
if
trial_env_vars
.
NNI_PLATFORM
==
"unittest"
:
# happens if NNI_PLATFORM is intentionally set, e.g., in UT
logger
.
warning
(
"`NNI_PLATFORM` is set but `param` is None. Falling back to standalone mode."
)
self
.
_chosen_arch
=
self
.
_standalone_generate_chosen
()
else
:
raise
RuntimeError
(
"Chosen architecture is None. This may be a platform error."
)
self
.
reset
()
def
_sample_layer_choice
(
self
,
mutable
,
idx
,
value
,
search_space_item
):
"""
Convert layer choice to tensor representation.
Parameters
----------
mutable : Mutable
idx : int
Number `idx` of list will be selected.
value : str
The verbose representation of the selected value.
search_space_item : list
The list for corresponding search space.
"""
# doesn't support multihot for layer choice yet
assert
0
<=
idx
<
len
(
mutable
)
and
search_space_item
[
idx
]
==
value
,
\
"Index '{}' in search space '{}' is not '{}'"
.
format
(
idx
,
search_space_item
,
value
)
mask
=
tf
.
one_hot
(
idx
,
len
(
mutable
))
return
tf
.
cast
(
tf
.
reshape
(
mask
,
[
-
1
]),
tf
.
bool
)
def
_sample_input_choice
(
self
,
mutable
,
idx
,
value
,
search_space_item
):
"""
Convert input choice to tensor representation.
Parameters
----------
mutable : Mutable
idx : int
Number `idx` of list will be selected.
value : str
The verbose representation of the selected value.
search_space_item : list
The list for corresponding search space.
"""
candidate_repr
=
search_space_item
[
"candidates"
]
multihot_list
=
[
False
]
*
mutable
.
n_candidates
for
i
,
v
in
zip
(
idx
,
value
):
assert
0
<=
i
<
mutable
.
n_candidates
and
candidate_repr
[
i
]
==
v
,
\
"Index '{}' in search space '{}' is not '{}'"
.
format
(
i
,
candidate_repr
,
v
)
assert
not
multihot_list
[
i
],
"'{}' is selected twice in '{}', which is not allowed."
.
format
(
i
,
idx
)
multihot_list
[
i
]
=
True
return
tf
.
cast
(
multihot_list
,
tf
.
bool
)
# pylint: disable=not-callable
def
sample_search
(
self
):
"""
See :meth:`sample_final`.
"""
return
self
.
sample_final
()
def
sample_final
(
self
):
"""
Convert the chosen arch and apply it on model.
"""
assert
set
(
self
.
_chosen_arch
.
keys
())
==
set
(
self
.
_search_space
.
keys
()),
\
"Unmatched keys, expected keys '{}' from search space, found '{}'."
.
format
(
self
.
_search_space
.
keys
(),
self
.
_chosen_arch
.
keys
())
result
=
dict
()
for
mutable
in
self
.
mutables
:
if
isinstance
(
mutable
,
(
LayerChoice
,
InputChoice
)):
assert
mutable
.
key
in
self
.
_chosen_arch
,
\
"Expected '{}' in chosen arch, but not found."
.
format
(
mutable
.
key
)
data
=
self
.
_chosen_arch
[
mutable
.
key
]
assert
isinstance
(
data
,
dict
)
and
"_value"
in
data
and
"_idx"
in
data
,
\
"'{}' is not a valid choice."
.
format
(
data
)
if
isinstance
(
mutable
,
LayerChoice
):
result
[
mutable
.
key
]
=
self
.
_sample_layer_choice
(
mutable
,
data
[
"_idx"
],
data
[
"_value"
],
self
.
_search_space
[
mutable
.
key
][
"_value"
])
elif
isinstance
(
mutable
,
InputChoice
):
result
[
mutable
.
key
]
=
self
.
_sample_input_choice
(
mutable
,
data
[
"_idx"
],
data
[
"_value"
],
self
.
_search_space
[
mutable
.
key
][
"_value"
])
elif
isinstance
(
mutable
,
MutableScope
):
logger
.
info
(
"Mutable scope '%s' is skipped during parsing choices."
,
mutable
.
key
)
else
:
raise
TypeError
(
"Unsupported mutable type: '%s'."
%
type
(
mutable
))
return
result
def
_standalone_generate_chosen
(
self
):
"""
Generate the chosen architecture for standalone mode,
i.e., choose the first one(s) for LayerChoice and InputChoice.
::
{ key_name: {"_value": "conv1",
"_idx": 0} }
{ key_name: {"_value": ["in1"],
"_idx": [0]} }
Returns
-------
dict
the chosen architecture
"""
chosen_arch
=
{}
for
key
,
val
in
self
.
_search_space
.
items
():
if
val
[
"_type"
]
==
LAYER_CHOICE
:
choices
=
val
[
"_value"
]
chosen_arch
[
key
]
=
{
"_value"
:
choices
[
0
],
"_idx"
:
0
}
elif
val
[
"_type"
]
==
INPUT_CHOICE
:
choices
=
val
[
"_value"
][
"candidates"
]
n_chosen
=
val
[
"_value"
][
"n_chosen"
]
if
n_chosen
is
None
:
n_chosen
=
len
(
choices
)
chosen_arch
[
key
]
=
{
"_value"
:
choices
[:
n_chosen
],
"_idx"
:
list
(
range
(
n_chosen
))}
else
:
raise
ValueError
(
"Unknown key '%s' and value '%s'."
%
(
key
,
val
))
return
chosen_arch
def
_generate_search_space
(
self
):
"""
Generate search space from mutables.
Here is the search space format:
::
{ key_name: {"_type": "layer_choice",
"_value": ["conv1", "conv2"]} }
{ key_name: {"_type": "input_choice",
"_value": {"candidates": ["in1", "in2"],
"n_chosen": 1}} }
Returns
-------
dict
the generated search space
"""
search_space
=
{}
for
mutable
in
self
.
mutables
:
# for now we only generate flattened search space
if
isinstance
(
mutable
,
LayerChoice
):
key
=
mutable
.
key
val
=
mutable
.
names
search_space
[
key
]
=
{
"_type"
:
LAYER_CHOICE
,
"_value"
:
val
}
elif
isinstance
(
mutable
,
InputChoice
):
key
=
mutable
.
key
search_space
[
key
]
=
{
"_type"
:
INPUT_CHOICE
,
"_value"
:
{
"candidates"
:
mutable
.
choose_from
,
"n_chosen"
:
mutable
.
n_chosen
}}
elif
isinstance
(
mutable
,
MutableScope
):
logger
.
info
(
"Mutable scope '%s' is skipped during generating search space."
,
mutable
.
key
)
else
:
raise
TypeError
(
"Unsupported mutable type: '%s'."
%
type
(
mutable
))
return
search_space
def
_dump_search_space
(
self
,
file_path
):
with
open
(
file_path
,
"w"
)
as
ss_file
:
json
.
dump
(
self
.
_search_space
,
ss_file
,
sort_keys
=
True
,
indent
=
2
)
nni/algorithms/nas/tensorflow/enas/__init__.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
.mutator
import
EnasMutator
from
.trainer
import
EnasTrainer
nni/algorithms/nas/tensorflow/enas/mutator.py
deleted
100644 → 0
View file @
481aa292
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
# pylint: skip-file
import
tensorflow
as
tf
from
tensorflow.keras.layers
import
Dense
,
Embedding
,
LSTMCell
,
RNN
from
tensorflow.keras.losses
import
SparseCategoricalCrossentropy
,
Reduction
from
nni.nas.tensorflow.mutator
import
Mutator
from
nni.nas.tensorflow.mutables
import
LayerChoice
,
InputChoice
,
MutableScope
class
EnasMutator
(
Mutator
):
def
__init__
(
self
,
model
,
lstm_size
=
64
,
lstm_num_layers
=
1
,
tanh_constant
=
1.5
,
cell_exit_extra_step
=
False
,
skip_target
=
0.4
,
temperature
=
None
,
branch_bias
=
0.25
,
entropy_reduction
=
'sum'
):
super
().
__init__
(
model
)
self
.
tanh_constant
=
tanh_constant
self
.
temperature
=
temperature
self
.
cell_exit_extra_step
=
cell_exit_extra_step
cells
=
[
LSTMCell
(
units
=
lstm_size
,
use_bias
=
False
)
for
_
in
range
(
lstm_num_layers
)]
self
.
lstm
=
RNN
(
cells
,
stateful
=
True
)
self
.
g_emb
=
tf
.
random
.
normal
((
1
,
1
,
lstm_size
))
*
0.1
self
.
skip_targets
=
tf
.
constant
([
1.0
-
skip_target
,
skip_target
])
self
.
max_layer_choice
=
0
self
.
bias_dict
=
{}
for
mutable
in
self
.
mutables
:
if
isinstance
(
mutable
,
LayerChoice
):
if
self
.
max_layer_choice
==
0
:
self
.
max_layer_choice
=
len
(
mutable
)
assert
self
.
max_layer_choice
==
len
(
mutable
),
\
"ENAS mutator requires all layer choice have the same number of candidates."
if
'reduce'
in
mutable
.
key
:
bias
=
[]
for
choice
in
mutable
.
choices
:
if
'conv'
in
str
(
type
(
choice
)).
lower
():
bias
.
append
(
branch_bias
)
else
:
bias
.
append
(
-
branch_bias
)
self
.
bias_dict
[
mutable
.
key
]
=
tf
.
constant
(
bias
)
# exposed for trainer
self
.
sample_log_prob
=
0
self
.
sample_entropy
=
0
self
.
sample_skip_penalty
=
0
# internal nn layers
self
.
embedding
=
Embedding
(
self
.
max_layer_choice
+
1
,
lstm_size
)
self
.
soft
=
Dense
(
self
.
max_layer_choice
,
use_bias
=
False
)
self
.
attn_anchor
=
Dense
(
lstm_size
,
use_bias
=
False
)
self
.
attn_query
=
Dense
(
lstm_size
,
use_bias
=
False
)
self
.
v_attn
=
Dense
(
1
,
use_bias
=
False
)
assert
entropy_reduction
in
[
'sum'
,
'mean'
],
'Entropy reduction must be one of sum and mean.'
self
.
entropy_reduction
=
tf
.
reduce_sum
if
entropy_reduction
==
'sum'
else
tf
.
reduce_mean
self
.
cross_entropy_loss
=
SparseCategoricalCrossentropy
(
from_logits
=
True
,
reduction
=
Reduction
.
NONE
)
self
.
_first_sample
=
True
def
sample_search
(
self
):
self
.
_initialize
()
self
.
_sample
(
self
.
mutables
)
self
.
_first_sample
=
False
return
self
.
_choices
def
sample_final
(
self
):
return
self
.
sample_search
()
def
_sample
(
self
,
tree
):
mutable
=
tree
.
mutable
if
isinstance
(
mutable
,
LayerChoice
)
and
mutable
.
key
not
in
self
.
_choices
:
self
.
_choices
[
mutable
.
key
]
=
self
.
_sample_layer_choice
(
mutable
)
elif
isinstance
(
mutable
,
InputChoice
)
and
mutable
.
key
not
in
self
.
_choices
:
self
.
_choices
[
mutable
.
key
]
=
self
.
_sample_input_choice
(
mutable
)
for
child
in
tree
.
children
:
self
.
_sample
(
child
)
if
self
.
cell_exit_extra_step
and
isinstance
(
mutable
,
MutableScope
)
and
mutable
.
key
not
in
self
.
_anchors_hid
:
self
.
_anchors_hid
[
mutable
.
key
]
=
self
.
lstm
(
self
.
_inputs
,
1
)
def
_initialize
(
self
):
self
.
_choices
=
{}
self
.
_anchors_hid
=
{}
self
.
_inputs
=
self
.
g_emb
# seems the `input_shape` parameter of RNN does not work
# workaround it by omitting `reset_states` for first run
if
not
self
.
_first_sample
:
self
.
lstm
.
reset_states
()
self
.
sample_log_prob
=
0
self
.
sample_entropy
=
0
self
.
sample_skip_penalty
=
0
def
_sample_layer_choice
(
self
,
mutable
):
logit
=
self
.
soft
(
self
.
lstm
(
self
.
_inputs
))
if
self
.
temperature
is
not
None
:
logit
/=
self
.
temperature
if
self
.
tanh_constant
is
not
None
:
logit
=
self
.
tanh_constant
*
tf
.
tanh
(
logit
)
if
mutable
.
key
in
self
.
bias_dict
:
logit
+=
self
.
bias_dict
[
mutable
.
key
]
softmax_logit
=
tf
.
math
.
log
(
tf
.
nn
.
softmax
(
logit
,
axis
=-
1
))
branch_id
=
tf
.
reshape
(
tf
.
random
.
categorical
(
softmax_logit
,
num_samples
=
1
),
[
1
])
log_prob
=
self
.
cross_entropy_loss
(
branch_id
,
logit
)
self
.
sample_log_prob
+=
self
.
entropy_reduction
(
log_prob
)
entropy
=
log_prob
*
tf
.
math
.
exp
(
-
log_prob
)
self
.
sample_entropy
+=
self
.
entropy_reduction
(
entropy
)
self
.
_inputs
=
tf
.
reshape
(
self
.
embedding
(
branch_id
),
[
1
,
1
,
-
1
])
mask
=
tf
.
one_hot
(
branch_id
,
self
.
max_layer_choice
)
return
tf
.
cast
(
tf
.
reshape
(
mask
,
[
-
1
]),
tf
.
bool
)
def
_sample_input_choice
(
self
,
mutable
):
query
,
anchors
=
[],
[]
for
label
in
mutable
.
choose_from
:
if
label
not
in
self
.
_anchors_hid
:
self
.
_anchors_hid
[
label
]
=
self
.
lstm
(
self
.
_inputs
)
query
.
append
(
self
.
attn_anchor
(
self
.
_anchors_hid
[
label
]))
anchors
.
append
(
self
.
_anchors_hid
[
label
])
query
=
tf
.
concat
(
query
,
axis
=
0
)
query
=
tf
.
tanh
(
query
+
self
.
attn_query
(
anchors
[
-
1
]))
query
=
self
.
v_attn
(
query
)
if
self
.
temperature
is
not
None
:
query
/=
self
.
temperature
if
self
.
tanh_constant
is
not
None
:
query
=
self
.
tanh_constant
*
tf
.
tanh
(
query
)
if
mutable
.
n_chosen
is
None
:
logit
=
tf
.
concat
([
-
query
,
query
],
axis
=
1
)
softmax_logit
=
tf
.
math
.
log
(
tf
.
nn
.
softmax
(
logit
,
axis
=-
1
))
skip
=
tf
.
reshape
(
tf
.
random
.
categorical
(
softmax_logit
,
num_samples
=
1
),
[
-
1
])
skip_prob
=
tf
.
math
.
sigmoid
(
logit
)
kl
=
tf
.
reduce_sum
(
skip_prob
*
tf
.
math
.
log
(
skip_prob
/
self
.
skip_targets
))
self
.
sample_skip_penalty
+=
kl
log_prob
=
self
.
cross_entropy_loss
(
skip
,
logit
)
skip
=
tf
.
cast
(
skip
,
tf
.
float32
)
inputs
=
tf
.
tensordot
(
skip
,
tf
.
concat
(
anchors
,
0
),
1
)
/
(
1.
+
tf
.
reduce_sum
(
skip
))
self
.
_inputs
=
tf
.
reshape
(
inputs
,
[
1
,
1
,
-
1
])
else
:
assert
mutable
.
n_chosen
==
1
,
"Input choice must select exactly one or any in ENAS."
logit
=
tf
.
reshape
(
query
,
[
1
,
-
1
])
softmax_logit
=
tf
.
math
.
log
(
tf
.
nn
.
softmax
(
logit
,
axis
=-
1
))
index
=
tf
.
reshape
(
tf
.
random
.
categorical
(
softmax_logit
,
num_samples
=
1
),
[
-
1
])
skip
=
tf
.
reshape
(
tf
.
one_hot
(
index
,
mutable
.
n_candidates
),
[
-
1
])
# when the size is 1, tf does not accept tensor here, complaining the shape is wrong
# but using a numpy array seems fine
log_prob
=
self
.
cross_entropy_loss
(
logit
,
query
.
numpy
())
self
.
_inputs
=
tf
.
reshape
(
anchors
[
index
.
numpy
()[
0
]],
[
1
,
1
,
-
1
])
self
.
sample_log_prob
+=
self
.
entropy_reduction
(
log_prob
)
entropy
=
log_prob
*
tf
.
exp
(
-
log_prob
)
self
.
sample_entropy
+=
self
.
entropy_reduction
(
entropy
)
assert
len
(
skip
)
==
mutable
.
n_candidates
,
(
skip
,
mutable
.
n_candidates
,
mutable
.
n_chosen
)
return
tf
.
cast
(
skip
,
tf
.
bool
)
nni/algorithms/nas/tensorflow/enas/trainer.py
deleted
100644 → 0
View file @
481aa292
This diff is collapsed.
Click to expand it.
nni/
retiarii
/evaluator/functional.py
→
nni/
nas
/evaluator/functional.py
View file @
867871b2
File moved
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment