Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
aa51e79c
Unverified
Commit
aa51e79c
authored
Nov 26, 2019
by
QuanluZhang
Committed by
GitHub
Nov 26, 2019
Browse files
support classic nas mode: each chosen arch as a separate trial job (#1775)
parent
6c1fe5c8
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
529 additions
and
105 deletions
+529
-105
docs/en_US/Tutorial/Nnictl.md
docs/en_US/Tutorial/Nnictl.md
+32
-0
examples/nas/classic_nas/config_nas.yml
examples/nas/classic_nas/config_nas.yml
+18
-0
examples/nas/classic_nas/config_ppo.yml
examples/nas/classic_nas/config_ppo.yml
+18
-0
examples/nas/classic_nas/mnist.py
examples/nas/classic_nas/mnist.py
+181
-0
examples/tuners/random_nas_tuner/random_nas_tuner.py
examples/tuners/random_nas_tuner/random_nas_tuner.py
+21
-30
src/sdk/pynni/nni/nas/pytorch/classic_nas/__init__.py
src/sdk/pynni/nni/nas/pytorch/classic_nas/__init__.py
+1
-0
src/sdk/pynni/nni/nas/pytorch/classic_nas/mutator.py
src/sdk/pynni/nni/nas/pytorch/classic_nas/mutator.py
+192
-0
src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py
src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py
+43
-74
tools/nni_cmd/nnictl.py
tools/nni_cmd/nnictl.py
+9
-1
tools/nni_cmd/nnictl_utils.py
tools/nni_cmd/nnictl_utils.py
+14
-0
No files found.
docs/en_US/Tutorial/Nnictl.md
View file @
aa51e79c
...
@@ -22,6 +22,7 @@ nnictl support commands:
...
@@ -22,6 +22,7 @@ nnictl support commands:
*
[
nnictl webui
](
#webui
)
*
[
nnictl webui
](
#webui
)
*
[
nnictl tensorboard
](
#tensorboard
)
*
[
nnictl tensorboard
](
#tensorboard
)
*
[
nnictl package
](
#package
)
*
[
nnictl package
](
#package
)
*
[
nnictl ss_gen
](
#ss_gen
)
*
[
nnictl --version
](
#version
)
*
[
nnictl --version
](
#version
)
### Manage an experiment
### Manage an experiment
...
@@ -733,6 +734,37 @@ Debug mode will disable version check function in Trialkeeper.
...
@@ -733,6 +734,37 @@ Debug mode will disable version check function in Trialkeeper.
nnictl package show
nnictl package show
```
```
<a
name=
"ss_gen"
></a>

`Generate search space`
*
__nnictl ss_gen__
*
Description
Generate search space from user trial code which uses NNI NAS APIs.
*
Usage
```bash
nnictl ss_gen [OPTIONS]
```
*
Options
|Name, shorthand|Required|Default|Description|
|------|------|------ |------|
|--trial_command| True| |The command of the trial code|
|--trial_dir| False| ./ |The directory of the trial code|
|--file| False| nni_auto_gen_search_space.json |The file for storing generated search space|
*
Example
> Generate a search space
```bash
nnictl ss_gen --trial_command="python3 mnist.py" --trial_dir=./ --file=ss.json
```
<a
name=
"version"
></a>
<a
name=
"version"
></a>

`Check NNI version`

`Check NNI version`
...
...
examples/nas/classic_nas/config_nas.yml
0 → 100644
View file @
aa51e79c
authorName
:
default
experimentName
:
example_mnist
trialConcurrency
:
1
maxExecDuration
:
1h
maxTrialNum
:
10
#choice: local, remote, pai
trainingServicePlatform
:
local
#please use `nnictl ss_gen` to generate search space file first
searchSpacePath
:
<the_generated_search_space_path>
useAnnotation
:
False
tuner
:
codeDir
:
../../tuners/random_nas_tuner
classFileName
:
random_nas_tuner.py
className
:
RandomNASTuner
trial
:
command
:
python3 mnist.py
codeDir
:
.
gpuNum
:
0
examples/nas/classic_nas/config_ppo.yml
0 → 100644
View file @
aa51e79c
authorName
:
default
experimentName
:
example_mnist
trialConcurrency
:
1
maxExecDuration
:
100h
maxTrialNum
:
1000
#choice: local, remote, pai
trainingServicePlatform
:
local
#please use `nnictl ss_gen` to generate search space file first
searchSpacePath
:
<the_generated_search_space_path>
useAnnotation
:
False
tuner
:
builtinTunerName
:
PPOTuner
classArgs
:
optimize_mode
:
maximize
trial
:
command
:
python3 mnist.py
codeDir
:
.
gpuNum
:
0
examples/nas/classic_nas/mnist.py
0 → 100644
View file @
aa51e79c
"""
A deep MNIST classifier using convolutional layers.
This file is a modification of the official pytorch mnist example:
https://github.com/pytorch/examples/blob/master/mnist/main.py
"""
import
os
import
argparse
import
logging
import
nni
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torch.optim
as
optim
from
torchvision
import
datasets
,
transforms
from
nni.nas.pytorch.mutables
import
LayerChoice
,
InputChoice
from
nni.nas.pytorch.classic_nas
import
get_and_apply_next_architecture
logger
=
logging
.
getLogger
(
'mnist_AutoML'
)
class
Net
(
nn
.
Module
):
def
__init__
(
self
,
hidden_size
):
super
(
Net
,
self
).
__init__
()
# two options of conv1
self
.
conv1
=
LayerChoice
([
nn
.
Conv2d
(
1
,
20
,
5
,
1
),
nn
.
Conv2d
(
1
,
20
,
3
,
1
)],
key
=
'first_conv'
)
# two options of mid_conv
self
.
mid_conv
=
LayerChoice
([
nn
.
Conv2d
(
20
,
20
,
3
,
1
,
padding
=
1
),
nn
.
Conv2d
(
20
,
20
,
5
,
1
,
padding
=
2
)],
key
=
'mid_conv'
)
self
.
conv2
=
nn
.
Conv2d
(
20
,
50
,
5
,
1
)
self
.
fc1
=
nn
.
Linear
(
4
*
4
*
50
,
hidden_size
)
self
.
fc2
=
nn
.
Linear
(
hidden_size
,
10
)
# skip connection over mid_conv
self
.
input_switch
=
InputChoice
(
n_candidates
=
2
,
n_chosen
=
1
,
key
=
'skip'
)
def
forward
(
self
,
x
):
x
=
F
.
relu
(
self
.
conv1
(
x
))
x
=
F
.
max_pool2d
(
x
,
2
,
2
)
old_x
=
x
x
=
F
.
relu
(
self
.
mid_conv
(
x
))
zero_x
=
torch
.
zeros_like
(
old_x
)
skip_x
=
self
.
input_switch
([
zero_x
,
old_x
])
x
=
torch
.
add
(
x
,
skip_x
)
x
=
F
.
relu
(
self
.
conv2
(
x
))
x
=
F
.
max_pool2d
(
x
,
2
,
2
)
x
=
x
.
view
(
-
1
,
4
*
4
*
50
)
x
=
F
.
relu
(
self
.
fc1
(
x
))
x
=
self
.
fc2
(
x
)
return
F
.
log_softmax
(
x
,
dim
=
1
)
def
train
(
args
,
model
,
device
,
train_loader
,
optimizer
,
epoch
):
model
.
train
()
for
batch_idx
,
(
data
,
target
)
in
enumerate
(
train_loader
):
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
optimizer
.
zero_grad
()
output
=
model
(
data
)
loss
=
F
.
nll_loss
(
output
,
target
)
loss
.
backward
()
optimizer
.
step
()
if
batch_idx
%
args
[
'log_interval'
]
==
0
:
logger
.
info
(
'Train Epoch: {} [{}/{} ({:.0f}%)]
\t
Loss: {:.6f}'
.
format
(
epoch
,
batch_idx
*
len
(
data
),
len
(
train_loader
.
dataset
),
100.
*
batch_idx
/
len
(
train_loader
),
loss
.
item
()))
def
test
(
args
,
model
,
device
,
test_loader
):
model
.
eval
()
test_loss
=
0
correct
=
0
with
torch
.
no_grad
():
for
data
,
target
in
test_loader
:
data
,
target
=
data
.
to
(
device
),
target
.
to
(
device
)
output
=
model
(
data
)
# sum up batch loss
test_loss
+=
F
.
nll_loss
(
output
,
target
,
reduction
=
'sum'
).
item
()
# get the index of the max log-probability
pred
=
output
.
argmax
(
dim
=
1
,
keepdim
=
True
)
correct
+=
pred
.
eq
(
target
.
view_as
(
pred
)).
sum
().
item
()
test_loss
/=
len
(
test_loader
.
dataset
)
accuracy
=
100.
*
correct
/
len
(
test_loader
.
dataset
)
logger
.
info
(
'
\n
Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)
\n
'
.
format
(
test_loss
,
correct
,
len
(
test_loader
.
dataset
),
accuracy
))
return
accuracy
def
main
(
args
):
use_cuda
=
not
args
[
'no_cuda'
]
and
torch
.
cuda
.
is_available
()
torch
.
manual_seed
(
args
[
'seed'
])
device
=
torch
.
device
(
"cuda"
if
use_cuda
else
"cpu"
)
kwargs
=
{
'num_workers'
:
1
,
'pin_memory'
:
True
}
if
use_cuda
else
{}
#data_dir = os.path.join(args['data_dir'], nni.get_trial_id())
data_dir
=
os
.
path
.
join
(
args
[
'data_dir'
],
'data'
)
train_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
MNIST
(
data_dir
,
train
=
True
,
download
=
True
,
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
transforms
.
Normalize
((
0.1307
,),
(
0.3081
,))
])),
batch_size
=
args
[
'batch_size'
],
shuffle
=
True
,
**
kwargs
)
test_loader
=
torch
.
utils
.
data
.
DataLoader
(
datasets
.
MNIST
(
data_dir
,
train
=
False
,
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
transforms
.
Normalize
((
0.1307
,),
(
0.3081
,))
])),
batch_size
=
1000
,
shuffle
=
True
,
**
kwargs
)
hidden_size
=
args
[
'hidden_size'
]
model
=
Net
(
hidden_size
=
hidden_size
).
to
(
device
)
get_and_apply_next_architecture
(
model
)
optimizer
=
optim
.
SGD
(
model
.
parameters
(),
lr
=
args
[
'lr'
],
momentum
=
args
[
'momentum'
])
for
epoch
in
range
(
1
,
args
[
'epochs'
]
+
1
):
train
(
args
,
model
,
device
,
train_loader
,
optimizer
,
epoch
)
test_acc
=
test
(
args
,
model
,
device
,
test_loader
)
if
epoch
<
args
[
'epochs'
]:
# report intermediate result
nni
.
report_intermediate_result
(
test_acc
)
logger
.
debug
(
'test accuracy %g'
,
test_acc
)
logger
.
debug
(
'Pipe send intermediate result done.'
)
else
:
# report final result
nni
.
report_final_result
(
test_acc
)
logger
.
debug
(
'Final result is %g'
,
test_acc
)
logger
.
debug
(
'Send final result done.'
)
def
get_params
():
# Training settings
parser
=
argparse
.
ArgumentParser
(
description
=
'PyTorch MNIST Example'
)
parser
.
add_argument
(
"--data_dir"
,
type
=
str
,
default
=
'/tmp/tensorflow/mnist/input_data'
,
help
=
"data directory"
)
parser
.
add_argument
(
'--batch_size'
,
type
=
int
,
default
=
64
,
metavar
=
'N'
,
help
=
'input batch size for training (default: 64)'
)
parser
.
add_argument
(
"--hidden_size"
,
type
=
int
,
default
=
512
,
metavar
=
'N'
,
help
=
'hidden layer size (default: 512)'
)
parser
.
add_argument
(
'--lr'
,
type
=
float
,
default
=
0.01
,
metavar
=
'LR'
,
help
=
'learning rate (default: 0.01)'
)
parser
.
add_argument
(
'--momentum'
,
type
=
float
,
default
=
0.5
,
metavar
=
'M'
,
help
=
'SGD momentum (default: 0.5)'
)
parser
.
add_argument
(
'--epochs'
,
type
=
int
,
default
=
10
,
metavar
=
'N'
,
help
=
'number of epochs to train (default: 10)'
)
parser
.
add_argument
(
'--seed'
,
type
=
int
,
default
=
1
,
metavar
=
'S'
,
help
=
'random seed (default: 1)'
)
parser
.
add_argument
(
'--no_cuda'
,
action
=
'store_true'
,
default
=
False
,
help
=
'disables CUDA training'
)
parser
.
add_argument
(
'--log_interval'
,
type
=
int
,
default
=
1000
,
metavar
=
'N'
,
help
=
'how many batches to wait before logging training status'
)
args
,
_
=
parser
.
parse_known_args
()
return
args
if
__name__
==
'__main__'
:
try
:
params
=
vars
(
get_params
())
main
(
params
)
except
Exception
as
exception
:
logger
.
exception
(
exception
)
raise
examples/tuners/random_nas_tuner/random_nas_tuner.py
View file @
aa51e79c
...
@@ -6,36 +6,27 @@ from nni.tuner import Tuner
...
@@ -6,36 +6,27 @@ from nni.tuner import Tuner
def
random_archi_generator
(
nas_ss
,
random_state
):
def
random_archi_generator
(
nas_ss
,
random_state
):
'''random
'''random
'''
'''
chosen_archi
=
{}
chosen_arch
=
{}
for
block_name
,
block_value
in
nas_ss
.
items
():
for
key
,
val
in
nas_ss
.
items
():
assert
block_value
[
'_type'
]
==
"mutable_layer"
,
\
assert
val
[
'_type'
]
in
[
'layer_choice'
,
'input_choice'
],
\
"Random NAS Tuner only receives NAS search space whose _type is 'mutable_layer'"
"Random NAS Tuner only receives NAS search space whose _type is 'layer_choice' or 'input_choice'"
block
=
block_value
[
'_value'
]
if
val
[
'_type'
]
==
'layer_choice'
:
tmp_block
=
{}
choices
=
val
[
'_value'
]
for
layer_name
,
layer
in
block
.
items
():
index
=
random_state
.
randint
(
len
(
choices
))
tmp_layer
=
{}
chosen_arch
[
key
]
=
{
'_value'
:
choices
[
index
],
'_idx'
:
index
}
for
key
,
value
in
layer
.
items
():
elif
val
[
'_type'
]
==
'input_choice'
:
if
key
==
'layer_choice'
:
choices
=
val
[
'_value'
][
'candidates'
]
index
=
random_state
.
randint
(
len
(
value
))
n_chosen
=
val
[
'_value'
][
'n_chosen'
]
tmp_layer
[
'chosen_layer'
]
=
value
[
index
]
chosen
=
[]
elif
key
==
'optional_inputs'
:
idxs
=
[]
tmp_layer
[
'chosen_inputs'
]
=
[]
for
_
in
range
(
n_chosen
):
if
layer
[
'optional_inputs'
]:
index
=
random_state
.
randint
(
len
(
choices
))
if
isinstance
(
layer
[
'optional_input_size'
],
int
):
chosen
.
append
(
choices
[
index
])
choice_num
=
layer
[
'optional_input_size'
]
idxs
.
append
(
index
)
else
:
chosen_arch
[
key
]
=
{
'_value'
:
chosen
,
'_idx'
:
idxs
}
choice_range
=
layer
[
'optional_input_size'
]
else
:
choice_num
=
random_state
.
randint
(
choice_range
[
0
],
choice_range
[
1
]
+
1
)
raise
ValueError
(
'Unknown key %s and value %s'
%
(
key
,
val
))
for
_
in
range
(
choice_num
):
return
chosen_arch
index
=
random_state
.
randint
(
len
(
layer
[
'optional_inputs'
]))
tmp_layer
[
'chosen_inputs'
].
append
(
layer
[
'optional_inputs'
][
index
])
elif
key
==
'optional_input_size'
:
pass
else
:
raise
ValueError
(
'Unknown field %s in layer %s of block %s'
%
(
key
,
layer_name
,
block_name
))
tmp_block
[
layer_name
]
=
tmp_layer
chosen_archi
[
block_name
]
=
tmp_block
return
chosen_archi
class
RandomNASTuner
(
Tuner
):
class
RandomNASTuner
(
Tuner
):
...
...
src/sdk/pynni/nni/nas/pytorch/classic_nas/__init__.py
0 → 100644
View file @
aa51e79c
from
.mutator
import
get_and_apply_next_architecture
src/sdk/pynni/nni/nas/pytorch/classic_nas/mutator.py
0 → 100644
View file @
aa51e79c
import
os
import
sys
import
json
import
logging
import
torch
import
nni
from
nni.env_vars
import
trial_env_vars
from
nni.nas.pytorch.base_mutator
import
BaseMutator
from
nni.nas.pytorch.mutables
import
LayerChoice
,
InputChoice
logger
=
logging
.
getLogger
(
__name__
)
def
get_and_apply_next_architecture
(
model
):
"""
Wrapper of ClassicMutator to make it more meaningful,
similar to ```get_next_parameter``` for HPO.
Parameters
----------
model : pytorch model
user's model with search space (e.g., LayerChoice, InputChoice) embedded in it
"""
ClassicMutator
(
model
)
class
ClassicMutator
(
BaseMutator
):
"""
This mutator is to apply the architecture chosen from tuner.
It implements the forward function of LayerChoice and InputChoice,
to only activate the chosen ones
"""
def
__init__
(
self
,
model
):
"""
Generate search space based on ```model```.
If env ```NNI_GEN_SEARCH_SPACE``` exists, this is in dry run mode for
generating search space for the experiment.
If not, there are still two mode, one is nni experiment mode where users
use ```nnictl``` to start an experiment. The other is standalone mode
where users directly run the trial command, this mode chooses the first
one(s) for each LayerChoice and InputChoice.
Parameters
----------
model : pytorch model
user's model with search space (e.g., LayerChoice, InputChoice) embedded in it
"""
super
(
ClassicMutator
,
self
).
__init__
(
model
)
self
.
chosen_arch
=
{}
self
.
search_space
=
self
.
_generate_search_space
()
if
'NNI_GEN_SEARCH_SPACE'
in
os
.
environ
:
# dry run for only generating search space
self
.
_dump_search_space
(
self
.
search_space
,
os
.
environ
.
get
(
'NNI_GEN_SEARCH_SPACE'
))
sys
.
exit
(
0
)
# get chosen arch from tuner
self
.
chosen_arch
=
nni
.
get_next_parameter
()
if
not
self
.
chosen_arch
and
trial_env_vars
.
NNI_PLATFORM
is
None
:
logger
.
warning
(
'This is in standalone mode, the chosen are the first one(s)'
)
self
.
chosen_arch
=
self
.
_standalone_generate_chosen
()
self
.
_validate_chosen_arch
()
def
_validate_chosen_arch
(
self
):
pass
def
_standalone_generate_chosen
(
self
):
"""
Generate the chosen architecture for standalone mode,
i.e., choose the first one(s) for LayerChoice and InputChoice
{ key_name: {'_value': "conv1",
'_idx': 0} }
{ key_name: {'_value': ["in1"],
'_idx': [0]} }
Returns
-------
dict
the chosen architecture
"""
chosen_arch
=
{}
for
key
,
val
in
self
.
search_space
.
items
():
if
val
[
'_type'
]
==
'layer_choice'
:
choices
=
val
[
'_value'
]
chosen_arch
[
key
]
=
{
'_value'
:
choices
[
0
],
'_idx'
:
0
}
elif
val
[
'_type'
]
==
'input_choice'
:
choices
=
val
[
'_value'
][
'candidates'
]
n_chosen
=
val
[
'_value'
][
'n_chosen'
]
chosen_arch
[
key
]
=
{
'_value'
:
choices
[:
n_chosen
],
'_idx'
:
list
(
range
(
n_chosen
))}
else
:
raise
ValueError
(
'Unknown key %s and value %s'
%
(
key
,
val
))
return
chosen_arch
def
_generate_search_space
(
self
):
"""
Generate search space from mutables.
Here is the search space format:
{ key_name: {'_type': 'layer_choice',
'_value': ["conv1", "conv2"]} }
{ key_name: {'_type': 'input_choice',
'_value': {'candidates': ["in1", "in2"],
'n_chosen': 1}} }
Returns
-------
dict
the generated search space
"""
search_space
=
{}
for
mutable
in
self
.
mutables
:
# for now we only generate flattened search space
if
isinstance
(
mutable
,
LayerChoice
):
key
=
mutable
.
key
val
=
[
repr
(
choice
)
for
choice
in
mutable
.
choices
]
search_space
[
key
]
=
{
"_type"
:
"layer_choice"
,
"_value"
:
val
}
elif
isinstance
(
mutable
,
InputChoice
):
key
=
mutable
.
key
search_space
[
key
]
=
{
"_type"
:
"input_choice"
,
"_value"
:
{
"candidates"
:
mutable
.
choose_from
,
"n_chosen"
:
mutable
.
n_chosen
}}
else
:
raise
TypeError
(
'Unsupported mutable type: %s.'
%
type
(
mutable
))
return
search_space
def
_dump_search_space
(
self
,
search_space
,
file_path
):
with
open
(
file_path
,
'w'
)
as
ss_file
:
json
.
dump
(
search_space
,
ss_file
)
def
_tensor_reduction
(
self
,
reduction_type
,
tensor_list
):
if
tensor_list
==
"none"
:
return
tensor_list
if
not
tensor_list
:
return
None
# empty. return None for now
if
len
(
tensor_list
)
==
1
:
return
tensor_list
[
0
]
if
reduction_type
==
"sum"
:
return
sum
(
tensor_list
)
if
reduction_type
==
"mean"
:
return
sum
(
tensor_list
)
/
len
(
tensor_list
)
if
reduction_type
==
"concat"
:
return
torch
.
cat
(
tensor_list
,
dim
=
1
)
raise
ValueError
(
"Unrecognized reduction policy:
\"
{}
\"
"
.
format
(
reduction_type
))
def
on_forward_layer_choice
(
self
,
mutable
,
*
inputs
):
"""
Implement the forward of LayerChoice
Parameters
----------
mutable: LayerChoice
inputs: list of torch.Tensor
Returns
-------
tuple
return of the chosen op, the index of the chosen op
"""
assert
mutable
.
key
in
self
.
chosen_arch
val
=
self
.
chosen_arch
[
mutable
.
key
]
assert
isinstance
(
val
,
dict
)
idx
=
val
[
'_idx'
]
assert
self
.
search_space
[
mutable
.
key
][
'_value'
][
idx
]
==
val
[
'_value'
]
return
mutable
.
choices
[
idx
](
*
inputs
),
idx
def
on_forward_input_choice
(
self
,
mutable
,
tensor_list
):
"""
Implement the forward of InputChoice
Parameters
----------
mutable: InputChoice
tensor_list: list of torch.Tensor
tags: list of string
Returns
-------
tuple of torch.Tensor and list
reduced tensor, mask list
"""
assert
mutable
.
key
in
self
.
chosen_arch
val
=
self
.
chosen_arch
[
mutable
.
key
]
assert
isinstance
(
val
,
dict
)
mask
=
[
0
for
_
in
range
(
mutable
.
n_candidates
)]
out
=
[]
for
i
,
idx
in
enumerate
(
val
[
'_idx'
]):
# check whether idx matches the chosen candidate name
assert
self
.
search_space
[
mutable
.
key
][
'_value'
][
'candidates'
][
idx
]
==
val
[
'_value'
][
i
]
out
.
append
(
tensor_list
[
idx
])
mask
[
idx
]
=
1
return
self
.
_tensor_reduction
(
mutable
.
reduction
,
out
),
mask
src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py
View file @
aa51e79c
...
@@ -351,74 +351,33 @@ class PPOTuner(Tuner):
...
@@ -351,74 +351,33 @@ class PPOTuner(Tuner):
self
.
send_trial_callback
=
None
self
.
send_trial_callback
=
None
logger
.
info
(
'Finished PPOTuner initialization'
)
logger
.
info
(
'Finished PPOTuner initialization'
)
def
_process_one_nas_space
(
self
,
block_name
,
block_space
):
"""
Process nas space to determine observation space and action space
Parameters
----------
block_name : str
The name of the mutable block
block_space : dict
Search space of this mutable block
Returns
-------
actions_spaces : list
List of the space of each action
actions_to_config : list
The mapping from action to generated configuration
"""
actions_spaces
=
[]
actions_to_config
=
[]
block_arch_temp
=
{}
for
l_name
,
layer
in
block_space
.
items
():
chosen_layer_temp
=
{}
if
len
(
layer
[
'layer_choice'
])
>
1
:
actions_spaces
.
append
(
layer
[
'layer_choice'
])
actions_to_config
.
append
((
block_name
,
l_name
,
'chosen_layer'
))
chosen_layer_temp
[
'chosen_layer'
]
=
None
else
:
assert
len
(
layer
[
'layer_choice'
])
==
1
chosen_layer_temp
[
'chosen_layer'
]
=
layer
[
'layer_choice'
][
0
]
if
layer
[
'optional_input_size'
]
not
in
[
0
,
1
,
[
0
,
1
]]:
raise
ValueError
(
'Optional_input_size can only be 0, 1, or [0, 1], but the pecified one is %s'
%
(
layer
[
'optional_input_size'
]))
if
isinstance
(
layer
[
'optional_input_size'
],
list
):
actions_spaces
.
append
([
"None"
,
*
layer
[
'optional_inputs'
]])
actions_to_config
.
append
((
block_name
,
l_name
,
'chosen_inputs'
))
chosen_layer_temp
[
'chosen_inputs'
]
=
None
elif
layer
[
'optional_input_size'
]
==
1
:
actions_spaces
.
append
(
layer
[
'optional_inputs'
])
actions_to_config
.
append
((
block_name
,
l_name
,
'chosen_inputs'
))
chosen_layer_temp
[
'chosen_inputs'
]
=
None
elif
layer
[
'optional_input_size'
]
==
0
:
chosen_layer_temp
[
'chosen_inputs'
]
=
[]
else
:
raise
ValueError
(
'invalid type and value of optional_input_size'
)
block_arch_temp
[
l_name
]
=
chosen_layer_temp
self
.
chosen_arch_template
[
block_name
]
=
block_arch_temp
return
actions_spaces
,
actions_to_config
def
_process_nas_space
(
self
,
search_space
):
def
_process_nas_space
(
self
,
search_space
):
"""
Process nas search space to get action/observation space
"""
actions_spaces
=
[]
actions_spaces
=
[]
actions_to_config
=
[]
actions_to_config
=
[]
for
b_name
,
block
in
search_space
.
items
():
for
key
,
val
in
search_space
.
items
():
if
block
[
'_type'
]
!=
'mutable_layer'
:
if
val
[
'_type'
]
==
'layer_choice'
:
raise
ValueError
(
'PPOTuner only accept mutable_layer type in search space, but the current one is %s'
%
(
block
[
'_type'
]))
actions_to_config
.
append
((
key
,
'layer_choice'
))
block
=
block
[
'_value'
]
actions_spaces
.
append
(
val
[
'_value'
])
act
,
act_map
=
self
.
_process_one_nas_space
(
b_name
,
block
)
self
.
chosen_arch_template
[
key
]
=
None
actions_spaces
.
extend
(
act
)
elif
val
[
'_type'
]
==
'input_choice'
:
actions_to_config
.
extend
(
act_map
)
candidates
=
val
[
'_value'
][
'candidates'
]
n_chosen
=
val
[
'_value'
][
'n_chosen'
]
if
n_chosen
not
in
[
0
,
1
,
[
0
,
1
]]:
raise
ValueError
(
'Optional_input_size can only be 0, 1, or [0, 1], but the pecified one is %s'
%
(
n_chosen
))
if
isinstance
(
n_chosen
,
list
):
actions_to_config
.
append
((
key
,
'input_choice'
))
# FIXME: risk, candidates might also have None
actions_spaces
.
append
([
'None'
,
*
candidates
])
self
.
chosen_arch_template
[
key
]
=
None
elif
n_chosen
==
1
:
actions_to_config
.
append
((
key
,
'input_choice'
))
actions_spaces
.
append
(
candidates
)
self
.
chosen_arch_template
[
key
]
=
None
elif
n_chosen
==
0
:
self
.
chosen_arch_template
[
key
]
=
[]
else
:
raise
ValueError
(
'Unsupported search space type: %s'
%
(
val
[
'_type'
]))
# calculate observation space
# calculate observation space
dedup
=
{}
dedup
=
{}
...
@@ -428,7 +387,6 @@ class PPOTuner(Tuner):
...
@@ -428,7 +387,6 @@ class PPOTuner(Tuner):
full_act_space
=
[
act
for
act
,
_
in
dedup
.
items
()]
full_act_space
=
[
act
for
act
,
_
in
dedup
.
items
()]
assert
len
(
full_act_space
)
==
len
(
dedup
)
assert
len
(
full_act_space
)
==
len
(
dedup
)
observation_space
=
len
(
full_act_space
)
observation_space
=
len
(
full_act_space
)
nsteps
=
len
(
actions_spaces
)
nsteps
=
len
(
actions_spaces
)
return
actions_spaces
,
actions_to_config
,
full_act_space
,
observation_space
,
nsteps
return
actions_spaces
,
actions_to_config
,
full_act_space
,
observation_space
,
nsteps
...
@@ -470,7 +428,7 @@ class PPOTuner(Tuner):
...
@@ -470,7 +428,7 @@ class PPOTuner(Tuner):
Search space for NAS
Search space for NAS
the format could be referred to search space spec (https://nni.readthedocs.io/en/latest/Tutorial/SearchSpaceSpec.html).
the format could be referred to search space spec (https://nni.readthedocs.io/en/latest/Tutorial/SearchSpaceSpec.html).
"""
"""
logger
.
info
(
'
===
update search space %s'
,
search_space
)
logger
.
info
(
'update search space %s'
,
search_space
)
assert
self
.
search_space
is
None
assert
self
.
search_space
is
None
self
.
search_space
=
search_space
self
.
search_space
=
search_space
...
@@ -496,16 +454,19 @@ class PPOTuner(Tuner):
...
@@ -496,16 +454,19 @@ class PPOTuner(Tuner):
chosen_arch
=
copy
.
deepcopy
(
self
.
chosen_arch_template
)
chosen_arch
=
copy
.
deepcopy
(
self
.
chosen_arch_template
)
for
cnt
,
act
in
enumerate
(
actions
):
for
cnt
,
act
in
enumerate
(
actions
):
act_name
=
self
.
full_act_space
[
act
]
act_name
=
self
.
full_act_space
[
act
]
(
block_name
,
layer_name
,
key
)
=
self
.
actions_to_config
[
cnt
]
(
_key
,
_type
)
=
self
.
actions_to_config
[
cnt
]
if
key
==
'chosen_inputs
'
:
if
_type
==
'input_choice
'
:
if
act_name
==
'None'
:
if
act_name
==
'None'
:
chosen_arch
[
block_name
][
layer_name
][
key
]
=
[]
chosen_arch
[
_key
]
=
{
'_value'
:
[],
'_idx'
:
[]
}
else
:
else
:
chosen_arch
[
block_name
][
layer_name
][
key
]
=
[
act_name
]
candidates
=
self
.
search_space
[
_key
][
'_value'
][
'candidates'
]
elif
key
==
'chosen_layer'
:
idx
=
candidates
.
index
(
act_name
)
chosen_arch
[
block_name
][
layer_name
][
key
]
=
act_name
chosen_arch
[
_key
]
=
{
'_value'
:
[
act_name
],
'_idx'
:
[
idx
]}
elif
_type
==
'layer_choice'
:
idx
=
self
.
search_space
[
_key
][
'_value'
].
index
(
act_name
)
chosen_arch
[
_key
]
=
{
'_value'
:
act_name
,
'_idx'
:
idx
}
else
:
else
:
raise
ValueError
(
'unrecognized key: {0}'
.
format
(
key
))
raise
ValueError
(
'unrecognized key: {0}'
.
format
(
_type
))
return
chosen_arch
return
chosen_arch
def
generate_multiple_parameters
(
self
,
parameter_id_list
,
**
kwargs
):
def
generate_multiple_parameters
(
self
,
parameter_id_list
,
**
kwargs
):
...
@@ -561,6 +522,7 @@ class PPOTuner(Tuner):
...
@@ -561,6 +522,7 @@ class PPOTuner(Tuner):
trial_info_idx
,
actions
=
self
.
trials_info
.
get_next
()
trial_info_idx
,
actions
=
self
.
trials_info
.
get_next
()
if
trial_info_idx
is
None
:
if
trial_info_idx
is
None
:
logger
.
debug
(
'Credit added by one in parameters request'
)
self
.
credit
+=
1
self
.
credit
+=
1
self
.
param_ids
.
append
(
parameter_id
)
self
.
param_ids
.
append
(
parameter_id
)
raise
nni
.
NoMoreTrialError
(
'no more parameters now.'
)
raise
nni
.
NoMoreTrialError
(
'no more parameters now.'
)
...
@@ -573,6 +535,7 @@ class PPOTuner(Tuner):
...
@@ -573,6 +535,7 @@ class PPOTuner(Tuner):
"""
"""
Run a inference to generate next batch of configurations
Run a inference to generate next batch of configurations
"""
"""
logger
.
debug
(
'Start next round inference...'
)
self
.
finished_trials
=
0
self
.
finished_trials
=
0
self
.
model
.
compute_rewards
(
self
.
trials_info
,
self
.
trials_result
)
self
.
model
.
compute_rewards
(
self
.
trials_info
,
self
.
trials_result
)
self
.
model
.
train
(
self
.
trials_info
,
self
.
inf_batch_size
)
self
.
model
.
train
(
self
.
trials_info
,
self
.
inf_batch_size
)
...
@@ -584,6 +547,7 @@ class PPOTuner(Tuner):
...
@@ -584,6 +547,7 @@ class PPOTuner(Tuner):
mb_values
,
mb_neglogpacs
,
mb_values
,
mb_neglogpacs
,
mb_dones
,
last_values
,
mb_dones
,
last_values
,
self
.
inf_batch_size
)
self
.
inf_batch_size
)
logger
.
debug
(
'Next round inference complete.'
)
# check credit and submit new trials
# check credit and submit new trials
for
_
in
range
(
self
.
credit
):
for
_
in
range
(
self
.
credit
):
trial_info_idx
,
actions
=
self
.
trials_info
.
get_next
()
trial_info_idx
,
actions
=
self
.
trials_info
.
get_next
()
...
@@ -596,6 +560,7 @@ class PPOTuner(Tuner):
...
@@ -596,6 +560,7 @@ class PPOTuner(Tuner):
new_config
=
self
.
_actions_to_config
(
actions
)
new_config
=
self
.
_actions_to_config
(
actions
)
self
.
send_trial_callback
(
param_id
,
new_config
)
self
.
send_trial_callback
(
param_id
,
new_config
)
self
.
credit
-=
1
self
.
credit
-=
1
logger
.
debug
(
'Send new trial (%d, %s) for reducing credit'
,
param_id
,
new_config
)
def
receive_trial_result
(
self
,
parameter_id
,
parameters
,
value
,
**
kwargs
):
def
receive_trial_result
(
self
,
parameter_id
,
parameters
,
value
,
**
kwargs
):
"""
"""
...
@@ -621,7 +586,10 @@ class PPOTuner(Tuner):
...
@@ -621,7 +586,10 @@ class PPOTuner(Tuner):
self
.
trials_result
[
trial_info_idx
]
=
value
self
.
trials_result
[
trial_info_idx
]
=
value
self
.
finished_trials
+=
1
self
.
finished_trials
+=
1
logger
.
debug
(
'receive_trial_result, parameter_id %d, trial_info_idx %d, finished_trials %d, inf_batch_size %d'
,
parameter_id
,
trial_info_idx
,
self
.
finished_trials
,
self
.
inf_batch_size
)
if
self
.
finished_trials
==
self
.
inf_batch_size
:
if
self
.
finished_trials
==
self
.
inf_batch_size
:
logger
.
debug
(
'Start next round inference in receive_trial_result'
)
self
.
_next_round_inference
()
self
.
_next_round_inference
()
def
trial_end
(
self
,
parameter_id
,
success
,
**
kwargs
):
def
trial_end
(
self
,
parameter_id
,
success
,
**
kwargs
):
...
@@ -650,6 +618,7 @@ class PPOTuner(Tuner):
...
@@ -650,6 +618,7 @@ class PPOTuner(Tuner):
self
.
trials_result
[
trial_info_idx
]
=
(
sum
(
values
)
/
len
(
values
))
if
values
else
0
self
.
trials_result
[
trial_info_idx
]
=
(
sum
(
values
)
/
len
(
values
))
if
values
else
0
self
.
finished_trials
+=
1
self
.
finished_trials
+=
1
if
self
.
finished_trials
==
self
.
inf_batch_size
:
if
self
.
finished_trials
==
self
.
inf_batch_size
:
logger
.
debug
(
'Start next round inference in trial_end'
)
self
.
_next_round_inference
()
self
.
_next_round_inference
()
def
import_data
(
self
,
data
):
def
import_data
(
self
,
data
):
...
...
tools/nni_cmd/nnictl.py
View file @
aa51e79c
...
@@ -10,7 +10,8 @@ from .launcher import create_experiment, resume_experiment, view_experiment
...
@@ -10,7 +10,8 @@ from .launcher import create_experiment, resume_experiment, view_experiment
from
.updater
import
update_searchspace
,
update_concurrency
,
update_duration
,
update_trialnum
,
import_data
from
.updater
import
update_searchspace
,
update_concurrency
,
update_duration
,
update_trialnum
,
import_data
from
.nnictl_utils
import
stop_experiment
,
trial_ls
,
trial_kill
,
list_experiment
,
experiment_status
,
\
from
.nnictl_utils
import
stop_experiment
,
trial_ls
,
trial_kill
,
list_experiment
,
experiment_status
,
\
log_trial
,
experiment_clean
,
platform_clean
,
experiment_list
,
\
log_trial
,
experiment_clean
,
platform_clean
,
experiment_list
,
\
monitor_experiment
,
export_trials_data
,
trial_codegen
,
webui_url
,
get_config
,
log_stdout
,
log_stderr
monitor_experiment
,
export_trials_data
,
trial_codegen
,
webui_url
,
\
get_config
,
log_stdout
,
log_stderr
,
search_space_auto_gen
from
.package_management
import
package_install
,
package_show
from
.package_management
import
package_install
,
package_show
from
.constants
import
DEFAULT_REST_PORT
from
.constants
import
DEFAULT_REST_PORT
from
.tensorboard_utils
import
start_tensorboard
,
stop_tensorboard
from
.tensorboard_utils
import
start_tensorboard
,
stop_tensorboard
...
@@ -38,6 +39,13 @@ def parse_args():
...
@@ -38,6 +39,13 @@ def parse_args():
# create subparsers for args with sub values
# create subparsers for args with sub values
subparsers
=
parser
.
add_subparsers
()
subparsers
=
parser
.
add_subparsers
()
# parse the command of auto generating search space
parser_start
=
subparsers
.
add_parser
(
'ss_gen'
,
help
=
'automatically generate search space file from trial code'
)
parser_start
.
add_argument
(
'--trial_command'
,
'-t'
,
required
=
True
,
dest
=
'trial_command'
,
help
=
'the command for running trial code'
)
parser_start
.
add_argument
(
'--trial_dir'
,
'-d'
,
default
=
'./'
,
dest
=
'trial_dir'
,
help
=
'the directory for running the command'
)
parser_start
.
add_argument
(
'--file'
,
'-f'
,
default
=
'nni_auto_gen_search_space.json'
,
dest
=
'file'
,
help
=
'the path of search space file'
)
parser_start
.
set_defaults
(
func
=
search_space_auto_gen
)
# parse start command
# parse start command
parser_start
=
subparsers
.
add_parser
(
'create'
,
help
=
'create a new experiment'
)
parser_start
=
subparsers
.
add_parser
(
'create'
,
help
=
'create a new experiment'
)
parser_start
.
add_argument
(
'--config'
,
'-c'
,
required
=
True
,
dest
=
'config'
,
help
=
'the path of yaml config file'
)
parser_start
.
add_argument
(
'--config'
,
'-c'
,
required
=
True
,
dest
=
'config'
,
help
=
'the path of yaml config file'
)
...
...
tools/nni_cmd/nnictl_utils.py
View file @
aa51e79c
...
@@ -9,6 +9,7 @@ import re
...
@@ -9,6 +9,7 @@ import re
import
shutil
import
shutil
from
datetime
import
datetime
,
timezone
from
datetime
import
datetime
,
timezone
from
pathlib
import
Path
from
pathlib
import
Path
from
subprocess
import
Popen
from
pyhdfs
import
HdfsClient
from
pyhdfs
import
HdfsClient
from
nni_annotation
import
expand_annotations
from
nni_annotation
import
expand_annotations
from
.rest_utils
import
rest_get
,
rest_delete
,
check_rest_server_quick
,
check_response
from
.rest_utils
import
rest_get
,
rest_delete
,
check_rest_server_quick
,
check_response
...
@@ -675,3 +676,16 @@ def export_trials_data(args):
...
@@ -675,3 +676,16 @@ def export_trials_data(args):
print_error
(
'Export failed...'
)
print_error
(
'Export failed...'
)
else
:
else
:
print_error
(
'Restful server is not Running'
)
print_error
(
'Restful server is not Running'
)
def
search_space_auto_gen
(
args
):
'''dry run trial code to generate search space file'''
trial_dir
=
os
.
path
.
expanduser
(
args
.
trial_dir
)
file_path
=
os
.
path
.
expanduser
(
args
.
file
)
if
not
os
.
path
.
isabs
(
file_path
):
abs_file_path
=
os
.
path
.
join
(
os
.
getcwd
(),
file_path
)
assert
os
.
path
.
exists
(
trial_dir
)
if
os
.
path
.
exists
(
abs_file_path
):
print_warning
(
'%s already exits, will be over written'
%
abs_file_path
)
print_normal
(
'Dry run to generate search space...'
)
Popen
(
args
.
trial_command
,
cwd
=
trial_dir
,
env
=
dict
(
os
.
environ
,
NNI_GEN_SEARCH_SPACE
=
abs_file_path
),
shell
=
True
).
wait
()
print_normal
(
'Dry run to generate search space, Done'
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment