Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
55f48d27
Unverified
Commit
55f48d27
authored
Sep 16, 2019
by
QuanluZhang
Committed by
GitHub
Sep 16, 2019
Browse files
Merge dev-nas-tuner back to master (#1531)
* PPO tuner for NAS, supports NNI's NAS interface (#1380)
parent
7246593f
Changes
41
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2209 additions
and
36 deletions
+2209
-36
examples/trials/nas_cifar10/src/cifar10_flags.py
examples/trials/nas_cifar10/src/cifar10_flags.py
+45
-0
examples/trials/nas_cifar10/src/common_ops.py
examples/trials/nas_cifar10/src/common_ops.py
+255
-0
examples/trials/nas_cifar10/src/utils.py
examples/trials/nas_cifar10/src/utils.py
+262
-0
examples/tuners/random_nas_tuner/random_nas_tuner.py
examples/tuners/random_nas_tuner/random_nas_tuner.py
+5
-4
src/nni_manager/rest_server/restValidationSchemas.ts
src/nni_manager/rest_server/restValidationSchemas.ts
+1
-1
src/sdk/pynni/nni/constants.py
src/sdk/pynni/nni/constants.py
+3
-1
src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py
src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py
+2
-0
src/sdk/pynni/nni/msg_dispatcher.py
src/sdk/pynni/nni/msg_dispatcher.py
+6
-1
src/sdk/pynni/nni/nas_utils.py
src/sdk/pynni/nni/nas_utils.py
+151
-20
src/sdk/pynni/nni/ppo_tuner/__init__.py
src/sdk/pynni/nni/ppo_tuner/__init__.py
+0
-0
src/sdk/pynni/nni/ppo_tuner/distri.py
src/sdk/pynni/nni/ppo_tuner/distri.py
+198
-0
src/sdk/pynni/nni/ppo_tuner/model.py
src/sdk/pynni/nni/ppo_tuner/model.py
+166
-0
src/sdk/pynni/nni/ppo_tuner/policy.py
src/sdk/pynni/nni/ppo_tuner/policy.py
+219
-0
src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py
src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py
+599
-0
src/sdk/pynni/nni/ppo_tuner/requirements.txt
src/sdk/pynni/nni/ppo_tuner/requirements.txt
+3
-0
src/sdk/pynni/nni/ppo_tuner/util.py
src/sdk/pynni/nni/ppo_tuner/util.py
+266
-0
src/sdk/pynni/nni/tuner.py
src/sdk/pynni/nni/tuner.py
+1
-2
tools/nni_annotation/.gitignore
tools/nni_annotation/.gitignore
+1
-0
tools/nni_annotation/test_annotation.py
tools/nni_annotation/test_annotation.py
+8
-7
tools/nni_cmd/config_schema.py
tools/nni_cmd/config_schema.py
+18
-0
No files found.
examples/trials/nas_cifar10/src/cifar10_flags.py
0 → 100644
View file @
55f48d27
import
tensorflow
as
tf
from
src.utils
import
DEFINE_boolean
from
src.utils
import
DEFINE_float
from
src.utils
import
DEFINE_integer
from
src.utils
import
DEFINE_string
flags
=
tf
.
app
.
flags
FLAGS
=
flags
.
FLAGS
DEFINE_boolean
(
"reset_output_dir"
,
False
,
"Delete output_dir if exists."
)
DEFINE_string
(
"data_path"
,
""
,
""
)
DEFINE_string
(
"output_dir"
,
""
,
""
)
DEFINE_string
(
"data_format"
,
"NHWC"
,
"'NHWC' or 'NCWH'"
)
DEFINE_string
(
"search_for"
,
None
,
"Must be [macro|micro]"
)
DEFINE_integer
(
"train_data_size"
,
45000
,
""
)
DEFINE_integer
(
"batch_size"
,
32
,
""
)
DEFINE_integer
(
"num_epochs"
,
300
,
""
)
DEFINE_integer
(
"child_lr_dec_every"
,
100
,
""
)
DEFINE_integer
(
"child_num_layers"
,
5
,
""
)
DEFINE_integer
(
"child_num_cells"
,
5
,
""
)
DEFINE_integer
(
"child_filter_size"
,
5
,
""
)
DEFINE_integer
(
"child_out_filters"
,
48
,
""
)
DEFINE_integer
(
"child_out_filters_scale"
,
1
,
""
)
DEFINE_integer
(
"child_num_branches"
,
4
,
""
)
DEFINE_integer
(
"child_num_aggregate"
,
None
,
""
)
DEFINE_integer
(
"child_num_replicas"
,
1
,
""
)
DEFINE_integer
(
"child_block_size"
,
3
,
""
)
DEFINE_integer
(
"child_lr_T_0"
,
None
,
"for lr schedule"
)
DEFINE_integer
(
"child_lr_T_mul"
,
None
,
"for lr schedule"
)
DEFINE_integer
(
"child_cutout_size"
,
None
,
"CutOut size"
)
DEFINE_float
(
"child_grad_bound"
,
5.0
,
"Gradient clipping"
)
DEFINE_float
(
"child_lr"
,
0.1
,
""
)
DEFINE_float
(
"child_lr_dec_rate"
,
0.1
,
""
)
DEFINE_float
(
"child_keep_prob"
,
0.5
,
""
)
DEFINE_float
(
"child_drop_path_keep_prob"
,
1.0
,
"minimum drop_path_keep_prob"
)
DEFINE_float
(
"child_l2_reg"
,
1e-4
,
""
)
DEFINE_float
(
"child_lr_max"
,
None
,
"for lr schedule"
)
DEFINE_float
(
"child_lr_min"
,
None
,
"for lr schedule"
)
DEFINE_string
(
"child_skip_pattern"
,
None
,
"Must be ['dense', None]"
)
DEFINE_string
(
"child_fixed_arc"
,
None
,
""
)
DEFINE_boolean
(
"child_use_aux_heads"
,
False
,
"Should we use an aux head"
)
DEFINE_boolean
(
"child_sync_replicas"
,
False
,
"To sync or not to sync."
)
DEFINE_boolean
(
"child_lr_cosine"
,
False
,
"Use cosine lr schedule"
)
DEFINE_integer
(
"log_every"
,
50
,
"How many steps to log"
)
DEFINE_integer
(
"eval_every_epochs"
,
1
,
"How many epochs to eval"
)
examples/trials/nas_cifar10/src/common_ops.py
0 → 100644
View file @
55f48d27
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.python.training
import
moving_averages
def
lstm
(
x
,
prev_c
,
prev_h
,
w
):
ifog
=
tf
.
matmul
(
tf
.
concat
([
x
,
prev_h
],
axis
=
1
),
w
)
i
,
f
,
o
,
g
=
tf
.
split
(
ifog
,
4
,
axis
=
1
)
i
=
tf
.
sigmoid
(
i
)
f
=
tf
.
sigmoid
(
f
)
o
=
tf
.
sigmoid
(
o
)
g
=
tf
.
tanh
(
g
)
next_c
=
i
*
g
+
f
*
prev_c
next_h
=
o
*
tf
.
tanh
(
next_c
)
return
next_c
,
next_h
def
stack_lstm
(
x
,
prev_c
,
prev_h
,
w
):
next_c
,
next_h
=
[],
[]
for
layer_id
,
(
_c
,
_h
,
_w
)
in
enumerate
(
zip
(
prev_c
,
prev_h
,
w
)):
inputs
=
x
if
layer_id
==
0
else
next_h
[
-
1
]
curr_c
,
curr_h
=
lstm
(
inputs
,
_c
,
_h
,
_w
)
next_c
.
append
(
curr_c
)
next_h
.
append
(
curr_h
)
return
next_c
,
next_h
def
create_weight
(
name
,
shape
,
initializer
=
None
,
trainable
=
True
,
seed
=
None
):
if
initializer
is
None
:
initializer
=
tf
.
contrib
.
keras
.
initializers
.
he_normal
(
seed
=
seed
)
return
tf
.
get_variable
(
name
,
shape
,
initializer
=
initializer
,
trainable
=
trainable
)
def
create_bias
(
name
,
shape
,
initializer
=
None
):
if
initializer
is
None
:
initializer
=
tf
.
constant_initializer
(
0.0
,
dtype
=
tf
.
float32
)
return
tf
.
get_variable
(
name
,
shape
,
initializer
=
initializer
)
def
conv_op
(
inputs
,
filter_size
,
is_training
,
count
,
out_filters
,
data_format
,
ch_mul
=
1
,
start_idx
=
None
,
separable
=
False
):
"""
Args:
start_idx: where to start taking the output channels. if None, assuming
fixed_arc mode
count: how many output_channels to take.
"""
if
data_format
==
"NHWC"
:
inp_c
=
inputs
.
get_shape
()[
3
].
value
elif
data_format
==
"NCHW"
:
inp_c
=
inputs
.
get_shape
()[
1
].
value
with
tf
.
variable_scope
(
"inp_conv_1"
):
w
=
create_weight
(
"w"
,
[
1
,
1
,
inp_c
,
out_filters
])
x
=
tf
.
nn
.
conv2d
(
inputs
,
w
,
[
1
,
1
,
1
,
1
],
"SAME"
,
data_format
=
data_format
)
x
=
batch_norm
(
x
,
is_training
,
data_format
=
data_format
)
x
=
tf
.
nn
.
relu
(
x
)
with
tf
.
variable_scope
(
"out_conv_{}"
.
format
(
filter_size
)):
if
start_idx
is
None
:
if
separable
:
w_depth
=
create_weight
(
"w_depth"
,
[
filter_size
,
filter_size
,
out_filters
,
ch_mul
])
w_point
=
create_weight
(
"w_point"
,
[
1
,
1
,
out_filters
*
ch_mul
,
count
])
x
=
tf
.
nn
.
separable_conv2d
(
x
,
w_depth
,
w_point
,
strides
=
[
1
,
1
,
1
,
1
],
padding
=
"SAME"
,
data_format
=
data_format
)
x
=
batch_norm
(
x
,
is_training
,
data_format
=
data_format
)
else
:
w
=
create_weight
(
"w"
,
[
filter_size
,
filter_size
,
inp_c
,
count
])
x
=
tf
.
nn
.
conv2d
(
x
,
w
,
[
1
,
1
,
1
,
1
],
"SAME"
,
data_format
=
data_format
)
x
=
batch_norm
(
x
,
is_training
,
data_format
=
data_format
)
else
:
if
separable
:
w_depth
=
create_weight
(
"w_depth"
,
[
filter_size
,
filter_size
,
out_filters
,
ch_mul
])
#test_depth = w_depth
w_point
=
create_weight
(
"w_point"
,
[
out_filters
,
out_filters
*
ch_mul
])
w_point
=
w_point
[
start_idx
:
start_idx
+
count
,
:]
w_point
=
tf
.
transpose
(
w_point
,
[
1
,
0
])
w_point
=
tf
.
reshape
(
w_point
,
[
1
,
1
,
out_filters
*
ch_mul
,
count
])
x
=
tf
.
nn
.
separable_conv2d
(
x
,
w_depth
,
w_point
,
strides
=
[
1
,
1
,
1
,
1
],
padding
=
"SAME"
,
data_format
=
data_format
)
mask
=
tf
.
range
(
0
,
out_filters
,
dtype
=
tf
.
int32
)
mask
=
tf
.
logical_and
(
start_idx
<=
mask
,
mask
<
start_idx
+
count
)
x
=
batch_norm_with_mask
(
x
,
is_training
,
mask
,
out_filters
,
data_format
=
data_format
)
else
:
w
=
create_weight
(
"w"
,
[
filter_size
,
filter_size
,
out_filters
,
out_filters
])
w
=
tf
.
transpose
(
w
,
[
3
,
0
,
1
,
2
])
w
=
w
[
start_idx
:
start_idx
+
count
,
:,
:,
:]
w
=
tf
.
transpose
(
w
,
[
1
,
2
,
3
,
0
])
x
=
tf
.
nn
.
conv2d
(
x
,
w
,
[
1
,
1
,
1
,
1
],
"SAME"
,
data_format
=
data_format
)
mask
=
tf
.
range
(
0
,
out_filters
,
dtype
=
tf
.
int32
)
mask
=
tf
.
logical_and
(
start_idx
<=
mask
,
mask
<
start_idx
+
count
)
x
=
batch_norm_with_mask
(
x
,
is_training
,
mask
,
out_filters
,
data_format
=
data_format
)
x
=
tf
.
nn
.
relu
(
x
)
return
x
def
pool_op
(
inputs
,
is_training
,
count
,
out_filters
,
avg_or_max
,
data_format
,
start_idx
=
None
):
"""
Args:
start_idx: where to start taking the output channels. if None, assuming
fixed_arc mode
count: how many output_channels to take.
"""
if
data_format
==
"NHWC"
:
inp_c
=
inputs
.
get_shape
()[
3
].
value
elif
data_format
==
"NCHW"
:
inp_c
=
inputs
.
get_shape
()[
1
].
value
with
tf
.
variable_scope
(
"conv_1"
):
w
=
create_weight
(
"w"
,
[
1
,
1
,
inp_c
,
out_filters
])
x
=
tf
.
nn
.
conv2d
(
inputs
,
w
,
[
1
,
1
,
1
,
1
],
"SAME"
,
data_format
=
data_format
)
x
=
batch_norm
(
x
,
is_training
,
data_format
=
data_format
)
x
=
tf
.
nn
.
relu
(
x
)
with
tf
.
variable_scope
(
"pool"
):
if
data_format
==
"NHWC"
:
actual_data_format
=
"channels_last"
elif
data_format
==
"NCHW"
:
actual_data_format
=
"channels_first"
if
avg_or_max
==
"avg"
:
x
=
tf
.
layers
.
average_pooling2d
(
x
,
[
3
,
3
],
[
1
,
1
],
"SAME"
,
data_format
=
actual_data_format
)
elif
avg_or_max
==
"max"
:
x
=
tf
.
layers
.
max_pooling2d
(
x
,
[
3
,
3
],
[
1
,
1
],
"SAME"
,
data_format
=
actual_data_format
)
else
:
raise
ValueError
(
"Unknown pool {}"
.
format
(
avg_or_max
))
if
start_idx
is
not
None
:
if
data_format
==
"NHWC"
:
x
=
x
[:,
:,
:,
start_idx
:
start_idx
+
count
]
elif
data_format
==
"NCHW"
:
x
=
x
[:,
start_idx
:
start_idx
+
count
,
:,
:]
return
x
def
global_avg_pool
(
x
,
data_format
=
"NHWC"
):
if
data_format
==
"NHWC"
:
x
=
tf
.
reduce_mean
(
x
,
[
1
,
2
])
elif
data_format
==
"NCHW"
:
x
=
tf
.
reduce_mean
(
x
,
[
2
,
3
])
else
:
raise
NotImplementedError
(
"Unknown data_format {}"
.
format
(
data_format
))
return
x
def
batch_norm
(
x
,
is_training
,
name
=
"bn"
,
decay
=
0.9
,
epsilon
=
1e-5
,
data_format
=
"NHWC"
):
if
data_format
==
"NHWC"
:
shape
=
[
x
.
get_shape
()[
3
]]
elif
data_format
==
"NCHW"
:
shape
=
[
x
.
get_shape
()[
1
]]
else
:
raise
NotImplementedError
(
"Unknown data_format {}"
.
format
(
data_format
))
with
tf
.
variable_scope
(
name
,
reuse
=
None
if
is_training
else
True
):
offset
=
tf
.
get_variable
(
"offset"
,
shape
,
initializer
=
tf
.
constant_initializer
(
0.0
,
dtype
=
tf
.
float32
))
scale
=
tf
.
get_variable
(
"scale"
,
shape
,
initializer
=
tf
.
constant_initializer
(
1.0
,
dtype
=
tf
.
float32
))
moving_mean
=
tf
.
get_variable
(
"moving_mean"
,
shape
,
trainable
=
False
,
initializer
=
tf
.
constant_initializer
(
0.0
,
dtype
=
tf
.
float32
))
moving_variance
=
tf
.
get_variable
(
"moving_variance"
,
shape
,
trainable
=
False
,
initializer
=
tf
.
constant_initializer
(
1.0
,
dtype
=
tf
.
float32
))
if
is_training
:
x
,
mean
,
variance
=
tf
.
nn
.
fused_batch_norm
(
x
,
scale
,
offset
,
epsilon
=
epsilon
,
data_format
=
data_format
,
is_training
=
True
)
update_mean
=
moving_averages
.
assign_moving_average
(
moving_mean
,
mean
,
decay
)
update_variance
=
moving_averages
.
assign_moving_average
(
moving_variance
,
variance
,
decay
)
with
tf
.
control_dependencies
([
update_mean
,
update_variance
]):
x
=
tf
.
identity
(
x
)
else
:
x
,
_
,
_
=
tf
.
nn
.
fused_batch_norm
(
x
,
scale
,
offset
,
mean
=
moving_mean
,
variance
=
moving_variance
,
epsilon
=
epsilon
,
data_format
=
data_format
,
is_training
=
False
)
return
x
def
batch_norm_with_mask
(
x
,
is_training
,
mask
,
num_channels
,
name
=
"bn"
,
decay
=
0.9
,
epsilon
=
1e-3
,
data_format
=
"NHWC"
):
shape
=
[
num_channels
]
indices
=
tf
.
where
(
mask
)
indices
=
tf
.
to_int32
(
indices
)
indices
=
tf
.
reshape
(
indices
,
[
-
1
])
with
tf
.
variable_scope
(
name
,
reuse
=
None
if
is_training
else
True
):
offset
=
tf
.
get_variable
(
"offset"
,
shape
,
initializer
=
tf
.
constant_initializer
(
0.0
,
dtype
=
tf
.
float32
))
scale
=
tf
.
get_variable
(
"scale"
,
shape
,
initializer
=
tf
.
constant_initializer
(
1.0
,
dtype
=
tf
.
float32
))
offset
=
tf
.
boolean_mask
(
offset
,
mask
)
scale
=
tf
.
boolean_mask
(
scale
,
mask
)
moving_mean
=
tf
.
get_variable
(
"moving_mean"
,
shape
,
trainable
=
False
,
initializer
=
tf
.
constant_initializer
(
0.0
,
dtype
=
tf
.
float32
))
moving_variance
=
tf
.
get_variable
(
"moving_variance"
,
shape
,
trainable
=
False
,
initializer
=
tf
.
constant_initializer
(
1.0
,
dtype
=
tf
.
float32
))
if
is_training
:
x
,
mean
,
variance
=
tf
.
nn
.
fused_batch_norm
(
x
,
scale
,
offset
,
epsilon
=
epsilon
,
data_format
=
data_format
,
is_training
=
True
)
mean
=
(
1.0
-
decay
)
*
(
tf
.
boolean_mask
(
moving_mean
,
mask
)
-
mean
)
variance
=
(
1.0
-
decay
)
*
\
(
tf
.
boolean_mask
(
moving_variance
,
mask
)
-
variance
)
update_mean
=
tf
.
scatter_sub
(
moving_mean
,
indices
,
mean
,
use_locking
=
True
)
update_variance
=
tf
.
scatter_sub
(
moving_variance
,
indices
,
variance
,
use_locking
=
True
)
with
tf
.
control_dependencies
([
update_mean
,
update_variance
]):
x
=
tf
.
identity
(
x
)
else
:
masked_moving_mean
=
tf
.
boolean_mask
(
moving_mean
,
mask
)
masked_moving_variance
=
tf
.
boolean_mask
(
moving_variance
,
mask
)
x
,
_
,
_
=
tf
.
nn
.
fused_batch_norm
(
x
,
scale
,
offset
,
mean
=
masked_moving_mean
,
variance
=
masked_moving_variance
,
epsilon
=
epsilon
,
data_format
=
data_format
,
is_training
=
False
)
return
x
examples/trials/nas_cifar10/src/utils.py
0 → 100644
View file @
55f48d27
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
sys
import
numpy
as
np
import
tensorflow
as
tf
user_flags
=
[]
def
DEFINE_string
(
name
,
default_value
,
doc_string
):
tf
.
app
.
flags
.
DEFINE_string
(
name
,
default_value
,
doc_string
)
global
user_flags
user_flags
.
append
(
name
)
def
DEFINE_integer
(
name
,
default_value
,
doc_string
):
tf
.
app
.
flags
.
DEFINE_integer
(
name
,
default_value
,
doc_string
)
global
user_flags
user_flags
.
append
(
name
)
def
DEFINE_float
(
name
,
default_value
,
doc_string
):
tf
.
app
.
flags
.
DEFINE_float
(
name
,
default_value
,
doc_string
)
global
user_flags
user_flags
.
append
(
name
)
def
DEFINE_boolean
(
name
,
default_value
,
doc_string
):
tf
.
app
.
flags
.
DEFINE_boolean
(
name
,
default_value
,
doc_string
)
global
user_flags
user_flags
.
append
(
name
)
def
print_user_flags
(
line_limit
=
80
):
print
(
"-"
*
80
)
global
user_flags
FLAGS
=
tf
.
app
.
flags
.
FLAGS
for
flag_name
in
sorted
(
user_flags
):
value
=
"{}"
.
format
(
getattr
(
FLAGS
,
flag_name
))
log_string
=
flag_name
log_string
+=
"."
*
(
line_limit
-
len
(
flag_name
)
-
len
(
value
))
log_string
+=
value
print
(
log_string
)
def
get_C
(
x
,
data_format
):
"""
Args:
x: tensor of shape [N, H, W, C] or [N, C, H, W]
"""
if
data_format
==
"NHWC"
:
return
x
.
get_shape
()[
3
].
value
elif
data_format
==
"NCHW"
:
return
x
.
get_shape
()[
1
].
value
else
:
raise
ValueError
(
"Unknown data_format '{0}'"
.
format
(
data_format
))
def
get_HW
(
x
,
data_format
):
"""
Args:
x: tensor of shape [N, H, W, C] or [N, C, H, W]
"""
return
x
.
get_shape
()[
2
].
value
def
get_strides
(
stride
,
data_format
):
"""
Args:
x: tensor of shape [N, H, W, C] or [N, C, H, W]
"""
if
data_format
==
"NHWC"
:
return
[
1
,
stride
,
stride
,
1
]
elif
data_format
==
"NCHW"
:
return
[
1
,
1
,
stride
,
stride
]
else
:
raise
ValueError
(
"Unknown data_format '{0}'"
.
format
(
data_format
))
class
TextColors
:
HEADER
=
'
\033
[95m'
OKBLUE
=
'
\033
[94m'
OKGREEN
=
'
\033
[92m'
WARNING
=
'
\033
[93m'
FAIL
=
'
\033
[91m'
ENDC
=
'
\033
[0m'
BOLD
=
'
\033
[1m'
UNDERLINE
=
'
\033
[4m'
class
Logger
(
object
):
def
__init__
(
self
,
output_file
):
self
.
terminal
=
sys
.
stdout
self
.
log
=
open
(
output_file
,
"a"
)
def
write
(
self
,
message
):
self
.
terminal
.
write
(
message
)
self
.
terminal
.
flush
()
self
.
log
.
write
(
message
)
self
.
log
.
flush
()
def
count_model_params
(
tf_variables
):
"""
Args:
tf_variables: list of all model variables
"""
num_vars
=
0
for
var
in
tf_variables
:
num_vars
+=
np
.
prod
([
dim
.
value
for
dim
in
var
.
get_shape
()])
return
num_vars
def
get_train_ops
(
loss
,
tf_variables
,
train_step
,
clip_mode
=
None
,
grad_bound
=
None
,
l2_reg
=
1e-4
,
lr_warmup_val
=
None
,
lr_warmup_steps
=
100
,
lr_init
=
0.1
,
lr_dec_start
=
0
,
lr_dec_every
=
10000
,
lr_dec_rate
=
0.1
,
lr_dec_min
=
None
,
lr_cosine
=
False
,
lr_max
=
None
,
lr_min
=
None
,
lr_T_0
=
None
,
lr_T_mul
=
None
,
num_train_batches
=
None
,
optim_algo
=
None
,
sync_replicas
=
False
,
num_aggregate
=
None
,
num_replicas
=
None
,
get_grad_norms
=
False
,
moving_average
=
None
):
"""
Args:
clip_mode: "global", "norm", or None.
moving_average: store the moving average of parameters
"""
if
l2_reg
>
0
:
l2_losses
=
[]
for
var
in
tf_variables
:
l2_losses
.
append
(
tf
.
reduce_sum
(
var
**
2
))
l2_loss
=
tf
.
add_n
(
l2_losses
)
loss
+=
l2_reg
*
l2_loss
grads
=
tf
.
gradients
(
loss
,
tf_variables
)
grad_norm
=
tf
.
global_norm
(
grads
)
grad_norms
=
{}
for
v
,
g
in
zip
(
tf_variables
,
grads
):
if
v
is
None
or
g
is
None
:
continue
if
isinstance
(
g
,
tf
.
IndexedSlices
):
grad_norms
[
v
.
name
]
=
tf
.
sqrt
(
tf
.
reduce_sum
(
g
.
values
**
2
))
else
:
grad_norms
[
v
.
name
]
=
tf
.
sqrt
(
tf
.
reduce_sum
(
g
**
2
))
if
clip_mode
is
not
None
:
assert
grad_bound
is
not
None
,
"Need grad_bound to clip gradients."
if
clip_mode
==
"global"
:
grads
,
_
=
tf
.
clip_by_global_norm
(
grads
,
grad_bound
)
elif
clip_mode
==
"norm"
:
clipped
=
[]
for
g
in
grads
:
if
isinstance
(
g
,
tf
.
IndexedSlices
):
c_g
=
tf
.
clip_by_norm
(
g
.
values
,
grad_bound
)
c_g
=
tf
.
IndexedSlices
(
g
.
indices
,
c_g
)
else
:
c_g
=
tf
.
clip_by_norm
(
g
,
grad_bound
)
clipped
.
append
(
g
)
grads
=
clipped
else
:
raise
NotImplementedError
(
"Unknown clip_mode {}"
.
format
(
clip_mode
))
if
lr_cosine
:
assert
lr_max
is
not
None
,
"Need lr_max to use lr_cosine"
assert
lr_min
is
not
None
,
"Need lr_min to use lr_cosine"
assert
lr_T_0
is
not
None
,
"Need lr_T_0 to use lr_cosine"
assert
lr_T_mul
is
not
None
,
"Need lr_T_mul to use lr_cosine"
assert
num_train_batches
is
not
None
,
(
"Need num_train_batches to use"
" lr_cosine"
)
curr_epoch
=
train_step
//
num_train_batches
last_reset
=
tf
.
Variable
(
0
,
dtype
=
tf
.
int32
,
trainable
=
False
,
name
=
"last_reset"
)
T_i
=
tf
.
Variable
(
lr_T_0
,
dtype
=
tf
.
int32
,
trainable
=
False
,
name
=
"T_i"
)
T_curr
=
curr_epoch
-
last_reset
def
_update
():
update_last_reset
=
tf
.
assign
(
last_reset
,
curr_epoch
,
use_locking
=
True
)
update_T_i
=
tf
.
assign
(
T_i
,
T_i
*
lr_T_mul
,
use_locking
=
True
)
with
tf
.
control_dependencies
([
update_last_reset
,
update_T_i
]):
rate
=
tf
.
to_float
(
T_curr
)
/
tf
.
to_float
(
T_i
)
*
3.1415926
lr
=
lr_min
+
0.5
*
(
lr_max
-
lr_min
)
*
(
1.0
+
tf
.
cos
(
rate
))
return
lr
def
_no_update
():
rate
=
tf
.
to_float
(
T_curr
)
/
tf
.
to_float
(
T_i
)
*
3.1415926
lr
=
lr_min
+
0.5
*
(
lr_max
-
lr_min
)
*
(
1.0
+
tf
.
cos
(
rate
))
return
lr
learning_rate
=
tf
.
cond
(
tf
.
greater_equal
(
T_curr
,
T_i
),
_update
,
_no_update
)
else
:
learning_rate
=
tf
.
train
.
exponential_decay
(
lr_init
,
tf
.
maximum
(
train_step
-
lr_dec_start
,
0
),
lr_dec_every
,
lr_dec_rate
,
staircase
=
True
)
if
lr_dec_min
is
not
None
:
learning_rate
=
tf
.
maximum
(
learning_rate
,
lr_dec_min
)
if
lr_warmup_val
is
not
None
:
learning_rate
=
tf
.
cond
(
tf
.
less
(
train_step
,
lr_warmup_steps
),
lambda
:
lr_warmup_val
,
lambda
:
learning_rate
)
if
optim_algo
==
"momentum"
:
opt
=
tf
.
train
.
MomentumOptimizer
(
learning_rate
,
0.9
,
use_locking
=
True
,
use_nesterov
=
True
)
elif
optim_algo
==
"sgd"
:
opt
=
tf
.
train
.
GradientDescentOptimizer
(
learning_rate
,
use_locking
=
True
)
elif
optim_algo
==
"adam"
:
opt
=
tf
.
train
.
AdamOptimizer
(
learning_rate
,
beta1
=
0.0
,
epsilon
=
1e-3
,
use_locking
=
True
)
else
:
raise
ValueError
(
"Unknown optim_algo {}"
.
format
(
optim_algo
))
if
sync_replicas
:
assert
num_aggregate
is
not
None
,
"Need num_aggregate to sync."
assert
num_replicas
is
not
None
,
"Need num_replicas to sync."
opt
=
tf
.
train
.
SyncReplicasOptimizer
(
opt
,
replicas_to_aggregate
=
num_aggregate
,
total_num_replicas
=
num_replicas
,
use_locking
=
True
)
if
moving_average
is
not
None
:
opt
=
tf
.
contrib
.
opt
.
MovingAverageOptimizer
(
opt
,
average_decay
=
moving_average
)
train_op
=
opt
.
apply_gradients
(
zip
(
grads
,
tf_variables
),
global_step
=
train_step
)
if
get_grad_norms
:
return
train_op
,
learning_rate
,
grad_norm
,
opt
,
grad_norms
else
:
return
train_op
,
learning_rate
,
grad_norm
,
opt
examples/tuners/random_nas_tuner/random_nas_tuner.py
View file @
55f48d27
...
@@ -2,13 +2,14 @@ import numpy as np
...
@@ -2,13 +2,14 @@ import numpy as np
from
nni.tuner
import
Tuner
from
nni.tuner
import
Tuner
def
random_archi_generator
(
nas_ss
,
random_state
):
def
random_archi_generator
(
nas_ss
,
random_state
):
'''random
'''random
'''
'''
chosen_archi
=
{}
chosen_archi
=
{}
print
(
"zql: nas search space: "
,
nas_ss
)
for
block_name
,
block_value
in
nas_ss
.
items
():
for
block_name
,
block_value
in
nas_ss
.
items
():
assert
block_value
[
'_type'
]
==
"mutable_layer"
,
"Random NAS Tuner only receives NAS search space whose _type is 'mutable_layer'"
assert
block_value
[
'_type'
]
==
"mutable_layer"
,
\
"Random NAS Tuner only receives NAS search space whose _type is 'mutable_layer'"
block
=
block_value
[
'_value'
]
block
=
block_value
[
'_value'
]
tmp_block
=
{}
tmp_block
=
{}
for
layer_name
,
layer
in
block
.
items
():
for
layer_name
,
layer
in
block
.
items
():
...
@@ -19,13 +20,12 @@ def random_archi_generator(nas_ss, random_state):
...
@@ -19,13 +20,12 @@ def random_archi_generator(nas_ss, random_state):
tmp_layer
[
'chosen_layer'
]
=
value
[
index
]
tmp_layer
[
'chosen_layer'
]
=
value
[
index
]
elif
key
==
'optional_inputs'
:
elif
key
==
'optional_inputs'
:
tmp_layer
[
'chosen_inputs'
]
=
[]
tmp_layer
[
'chosen_inputs'
]
=
[]
print
(
"zql: optional_inputs"
,
layer
[
'optional_inputs'
])
if
layer
[
'optional_inputs'
]:
if
layer
[
'optional_inputs'
]:
if
isinstance
(
layer
[
'optional_input_size'
],
int
):
if
isinstance
(
layer
[
'optional_input_size'
],
int
):
choice_num
=
layer
[
'optional_input_size'
]
choice_num
=
layer
[
'optional_input_size'
]
else
:
else
:
choice_range
=
layer
[
'optional_input_size'
]
choice_range
=
layer
[
'optional_input_size'
]
choice_num
=
random_state
.
randint
(
choice_range
[
0
],
choice_range
[
1
]
+
1
)
choice_num
=
random_state
.
randint
(
choice_range
[
0
],
choice_range
[
1
]
+
1
)
for
_
in
range
(
choice_num
):
for
_
in
range
(
choice_num
):
index
=
random_state
.
randint
(
len
(
layer
[
'optional_inputs'
]))
index
=
random_state
.
randint
(
len
(
layer
[
'optional_inputs'
]))
tmp_layer
[
'chosen_inputs'
].
append
(
layer
[
'optional_inputs'
][
index
])
tmp_layer
[
'chosen_inputs'
].
append
(
layer
[
'optional_inputs'
][
index
])
...
@@ -37,6 +37,7 @@ def random_archi_generator(nas_ss, random_state):
...
@@ -37,6 +37,7 @@ def random_archi_generator(nas_ss, random_state):
chosen_archi
[
block_name
]
=
tmp_block
chosen_archi
[
block_name
]
=
tmp_block
return
chosen_archi
return
chosen_archi
class
RandomNASTuner
(
Tuner
):
class
RandomNASTuner
(
Tuner
):
'''RandomNASTuner
'''RandomNASTuner
'''
'''
...
...
src/nni_manager/rest_server/restValidationSchemas.ts
View file @
55f48d27
...
@@ -174,7 +174,7 @@ export namespace ValidationSchemas {
...
@@ -174,7 +174,7 @@ export namespace ValidationSchemas {
checkpointDir
:
joi
.
string
().
allow
(
''
)
checkpointDir
:
joi
.
string
().
allow
(
''
)
}),
}),
tuner
:
joi
.
object
({
tuner
:
joi
.
object
({
builtinTunerName
:
joi
.
string
().
valid
(
'
TPE
'
,
'
Random
'
,
'
Anneal
'
,
'
Evolution
'
,
'
SMAC
'
,
'
BatchTuner
'
,
'
GridSearch
'
,
'
NetworkMorphism
'
,
'
MetisTuner
'
,
'
GPTuner
'
),
builtinTunerName
:
joi
.
string
().
valid
(
'
TPE
'
,
'
Random
'
,
'
Anneal
'
,
'
Evolution
'
,
'
SMAC
'
,
'
BatchTuner
'
,
'
GridSearch
'
,
'
NetworkMorphism
'
,
'
MetisTuner
'
,
'
GPTuner
'
,
'
PPOTuner
'
),
codeDir
:
joi
.
string
(),
codeDir
:
joi
.
string
(),
classFileName
:
joi
.
string
(),
classFileName
:
joi
.
string
(),
className
:
joi
.
string
(),
className
:
joi
.
string
(),
...
...
src/sdk/pynni/nni/constants.py
View file @
55f48d27
...
@@ -30,7 +30,8 @@ ModuleName = {
...
@@ -30,7 +30,8 @@ ModuleName = {
'NetworkMorphism'
:
'nni.networkmorphism_tuner.networkmorphism_tuner'
,
'NetworkMorphism'
:
'nni.networkmorphism_tuner.networkmorphism_tuner'
,
'Curvefitting'
:
'nni.curvefitting_assessor.curvefitting_assessor'
,
'Curvefitting'
:
'nni.curvefitting_assessor.curvefitting_assessor'
,
'MetisTuner'
:
'nni.metis_tuner.metis_tuner'
,
'MetisTuner'
:
'nni.metis_tuner.metis_tuner'
,
'GPTuner'
:
'nni.gp_tuner.gp_tuner'
'GPTuner'
:
'nni.gp_tuner.gp_tuner'
,
'PPOTuner'
:
'nni.ppo_tuner.ppo_tuner'
}
}
ClassName
=
{
ClassName
=
{
...
@@ -44,6 +45,7 @@ ClassName = {
...
@@ -44,6 +45,7 @@ ClassName = {
'NetworkMorphism'
:
'NetworkMorphismTuner'
,
'NetworkMorphism'
:
'NetworkMorphismTuner'
,
'MetisTuner'
:
'MetisTuner'
,
'MetisTuner'
:
'MetisTuner'
,
'GPTuner'
:
'GPTuner'
,
'GPTuner'
:
'GPTuner'
,
'PPOTuner'
:
'PPOTuner'
,
'Medianstop'
:
'MedianstopAssessor'
,
'Medianstop'
:
'MedianstopAssessor'
,
'Curvefitting'
:
'CurvefittingAssessor'
'Curvefitting'
:
'CurvefittingAssessor'
...
...
src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py
View file @
55f48d27
...
@@ -27,6 +27,7 @@ import logging
...
@@ -27,6 +27,7 @@ import logging
import
hyperopt
as
hp
import
hyperopt
as
hp
import
numpy
as
np
import
numpy
as
np
from
nni.tuner
import
Tuner
from
nni.tuner
import
Tuner
from
nni.nas_utils
import
rewrite_nas_space
from
nni.utils
import
NodeType
,
OptimizeMode
,
extract_scalar_reward
,
split_index
from
nni.utils
import
NodeType
,
OptimizeMode
,
extract_scalar_reward
,
split_index
logger
=
logging
.
getLogger
(
'hyperopt_AutoML'
)
logger
=
logging
.
getLogger
(
'hyperopt_AutoML'
)
...
@@ -240,6 +241,7 @@ class HyperoptTuner(Tuner):
...
@@ -240,6 +241,7 @@ class HyperoptTuner(Tuner):
return
hp
.
anneal
.
suggest
return
hp
.
anneal
.
suggest
raise
RuntimeError
(
'Not support tuner algorithm in hyperopt.'
)
raise
RuntimeError
(
'Not support tuner algorithm in hyperopt.'
)
@
rewrite_nas_space
def
update_search_space
(
self
,
search_space
):
def
update_search_space
(
self
,
search_space
):
"""
"""
Update search space definition in tuner by search_space in parameters.
Update search space definition in tuner by search_space in parameters.
...
...
src/sdk/pynni/nni/msg_dispatcher.py
View file @
55f48d27
...
@@ -101,11 +101,16 @@ class MsgDispatcher(MsgDispatcherBase):
...
@@ -101,11 +101,16 @@ class MsgDispatcher(MsgDispatcherBase):
self
.
tuner
.
update_search_space
(
data
)
self
.
tuner
.
update_search_space
(
data
)
send
(
CommandType
.
Initialized
,
''
)
send
(
CommandType
.
Initialized
,
''
)
def
send_trial_callback
(
self
,
id
,
params
):
"""For tuner to issue trial config when the config is generated
"""
send
(
CommandType
.
NewTrialJob
,
_pack_parameter
(
id
,
params
))
def
handle_request_trial_jobs
(
self
,
data
):
def
handle_request_trial_jobs
(
self
,
data
):
# data: number or trial jobs
# data: number or trial jobs
ids
=
[
_create_parameter_id
()
for
_
in
range
(
data
)]
ids
=
[
_create_parameter_id
()
for
_
in
range
(
data
)]
_logger
.
debug
(
"requesting for generating params of {}"
.
format
(
ids
))
_logger
.
debug
(
"requesting for generating params of {}"
.
format
(
ids
))
params_list
=
self
.
tuner
.
generate_multiple_parameters
(
ids
)
params_list
=
self
.
tuner
.
generate_multiple_parameters
(
ids
,
st_callback
=
self
.
send_trial_callback
)
for
i
,
_
in
enumerate
(
params_list
):
for
i
,
_
in
enumerate
(
params_list
):
send
(
CommandType
.
NewTrialJob
,
_pack_parameter
(
ids
[
i
],
params_list
[
i
]))
send
(
CommandType
.
NewTrialJob
,
_pack_parameter
(
ids
[
i
],
params_list
[
i
]))
...
...
src/sdk/pynni/nni/nas_utils.py
View file @
55f48d27
...
@@ -17,10 +17,16 @@
...
@@ -17,10 +17,16 @@
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
# ==================================================================================================
import
functools
import
logging
from
.
import
trial
from
.
import
trial
_logger
=
logging
.
getLogger
(
__name__
)
_MUTABLE_LAYER_SPACE_PREFIX
=
"_mutable_layer"
def
classic_mode
(
def
classic_mode
(
mutable_id
,
mutable_id
,
mutable_layer_id
,
mutable_layer_id
,
...
@@ -34,13 +40,11 @@ def classic_mode(
...
@@ -34,13 +40,11 @@ def classic_mode(
without touching the full model graph.'''
without touching the full model graph.'''
if
trial
.
get_current_parameter
()
is
None
:
if
trial
.
get_current_parameter
()
is
None
:
trial
.
get_next_parameter
()
trial
.
get_next_parameter
()
mutable_block
=
trial
.
get_current_parameter
(
mutable_id
)
chosen_layer
=
mutable_block
[
mutable_layer_id
][
"chosen_layer"
]
chosen_layer
,
chosen_inputs
=
_get_layer_and_inputs_from_tuner
(
mutable_id
,
mutable_layer_id
,
chosen_inputs
=
mutable_block
[
mutable_layer_id
][
"chosen_inputs"
]
list
(
optional_inputs
.
keys
()))
real_chosen_inputs
=
[
optional_inputs
[
input_name
]
real_chosen_inputs
=
[
optional_inputs
[
input_name
]
for
input_name
in
chosen_inputs
]
for
input_name
in
chosen_inputs
]
layer_out
=
funcs
[
chosen_layer
]([
fixed_inputs
,
real_chosen_inputs
],
**
funcs_args
[
chosen_layer
])
layer_out
=
funcs
[
chosen_layer
](
[
fixed_inputs
,
real_chosen_inputs
],
**
funcs_args
[
chosen_layer
])
return
layer_out
return
layer_out
...
@@ -173,20 +177,44 @@ def reload_tensorflow_variables(tf, session):
...
@@ -173,20 +177,44 @@ def reload_tensorflow_variables(tf, session):
tf: tensorflow module
tf: tensorflow module
'''
'''
subgraph_from_tuner
=
trial
.
get_next_parameter
()
subgraph_from_tuner
=
trial
.
get_next_parameter
()
for
mutable_id
,
mutable_block
in
subgraph_from_tuner
.
items
():
mutable_layers
=
set
()
for
subgraph_key
in
subgraph_from_tuner
:
if
"/"
in
subgraph_key
:
# has to remove the last, could be layer_choice or whatever
mutable_id
,
mutable_layer_id
=
_decompose_general_key
(
subgraph_key
[:
subgraph_key
.
rfind
(
"/"
)])
if
mutable_id
is
not
None
:
mutable_layers
.
add
((
mutable_id
,
mutable_layer_id
))
mutable_layers
=
sorted
(
list
(
mutable_layers
))
for
mutable_id
,
mutable_layer_id
in
mutable_layers
:
if
mutable_id
not
in
name_space
:
if
mutable_id
not
in
name_space
:
_logger
.
warning
(
"{} not found in name space"
.
format
(
mutable_id
))
continue
continue
for
mutable_layer_id
,
mutable_layer
in
mutable_block
.
items
():
name_prefix
=
"{}_{}"
.
format
(
mutable_id
,
mutable_layer_id
)
name_prefix
=
"{}_{}"
.
format
(
mutable_id
,
mutable_layer_id
)
# get optional inputs names
# extract layer information from the subgraph sampled by tuner
optional_inputs
=
name_space
[
name_prefix
][
'optional_inputs'
]
chosen_layer
=
name_space
[
name_prefix
][
'funcs'
].
index
(
# extract layer information from the subgraph sampled by tuner
mutable_layer
[
"chosen_layer"
])
chosen_layer
,
chosen_inputs
=
_get_layer_and_inputs_from_tuner
(
mutable_id
,
mutable_layer_id
,
optional_inputs
)
chosen_inputs
=
[
1
if
inp
in
mutable_layer
[
"chosen_inputs"
]
chosen_layer
=
name_space
[
name_prefix
][
'funcs'
].
index
(
chosen_layer
)
else
0
for
inp
in
name_space
[
name_prefix
][
'optional_inputs'
]]
chosen_inputs
=
[
1
if
inp
in
chosen_inputs
else
0
for
inp
in
optional_inputs
]
# load these information into pre-defined tensorflow variables
# load these information into pre-defined tensorflow variables
tf_variables
[
name_prefix
][
'funcs'
].
load
(
chosen_layer
,
session
)
tf_variables
[
name_prefix
][
'funcs'
].
load
(
chosen_layer
,
session
)
tf_variables
[
name_prefix
][
'optional_inputs'
].
load
(
tf_variables
[
name_prefix
][
'optional_inputs'
].
load
(
chosen_inputs
,
session
)
chosen_inputs
,
session
)
def
_construct_general_key
(
mutable_id
,
mutable_layer_id
):
# Mutable layer key in a general (search space) format
# that is, prefix/mutable_id/mutable_layer_id
return
_MUTABLE_LAYER_SPACE_PREFIX
+
"/"
+
mutable_id
+
"/"
+
mutable_layer_id
def
_decompose_general_key
(
key
):
# inverse operation of above
if
not
key
.
startswith
(
_MUTABLE_LAYER_SPACE_PREFIX
):
return
None
,
None
else
:
_
,
mutable_id
,
mutable_layer_id
=
key
.
split
(
"/"
,
maxsplit
=
2
)
return
mutable_id
,
mutable_layer_id
def
darts_training
(
tf
,
session
,
loss
,
feed_dict
):
def
darts_training
(
tf
,
session
,
loss
,
feed_dict
):
...
@@ -205,4 +233,107 @@ def training_update(nas_mode, tf=None, session=None, loss=None, feed_dict=None):
...
@@ -205,4 +233,107 @@ def training_update(nas_mode, tf=None, session=None, loss=None, feed_dict=None):
if
nas_mode
==
'darts_mode'
:
if
nas_mode
==
'darts_mode'
:
darts_training
(
tf
,
session
,
loss
,
feed_dict
)
darts_training
(
tf
,
session
,
loss
,
feed_dict
)
elif
nas_mode
==
'enas_mode'
:
elif
nas_mode
==
'enas_mode'
:
reload_tensorflow_variables
(
tf
,
session
)
reload_tensorflow_variables
(
tf
,
session
)
\ No newline at end of file
def
_get_layer_and_inputs_from_tuner
(
mutable_id
,
mutable_layer_id
,
optional_inputs
):
# optional_inputs should be name(key)s of the optional inputs
try
:
mutable_block
=
trial
.
get_current_parameter
(
mutable_id
)
# There is a NAS tuner
chosen_layer
=
mutable_block
[
mutable_layer_id
][
"chosen_layer"
]
chosen_inputs
=
mutable_block
[
mutable_layer_id
][
"chosen_inputs"
]
except
KeyError
:
# Try to find converted NAS parameters
params
=
trial
.
get_current_parameter
()
expected_prefix
=
_construct_general_key
(
mutable_id
,
mutable_layer_id
)
chosen_layer
=
params
[
expected_prefix
+
"/layer_choice"
]
# find how many to choose
optional_input_size
=
int
(
params
[
expected_prefix
+
"/optional_input_size"
])
# convert uniform to randint
# find who to choose, can duplicate
optional_input_state
=
params
[
expected_prefix
+
"/optional_input_chosen_state"
]
chosen_inputs
=
[]
# make sure dict -> list produce stable result by sorting
optional_inputs_keys
=
sorted
(
optional_inputs
)
for
i
in
range
(
optional_input_size
):
chosen_inputs
.
append
(
optional_inputs_keys
[
optional_input_state
%
len
(
optional_inputs
)])
optional_input_state
//=
len
(
optional_inputs
)
_logger
.
info
(
"%s_%s: layer: %s, optional inputs: %s"
%
(
mutable_id
,
mutable_layer_id
,
chosen_layer
,
chosen_inputs
))
return
chosen_layer
,
chosen_inputs
def
convert_nas_search_space
(
search_space
):
"""
Args:
param search_space: raw search space
return: the new search space, mutable_layers will be converted into choice
"""
ret
=
dict
()
for
k
,
v
in
search_space
.
items
():
if
"_type"
not
in
v
:
# this should not happen
_logger
.
warning
(
"There is no _type in one of your search space values with key '%s'"
". Please check your search space"
%
k
)
ret
[
k
]
=
v
elif
v
[
"_type"
]
!=
"mutable_layer"
:
ret
[
k
]
=
v
else
:
_logger
.
info
(
"Converting mutable_layer search space with key '%s'"
%
k
)
# v["_value"] looks like {'mutable_layer_1': {'layer_choice': ...} ...}
values
=
v
[
"_value"
]
for
layer_name
,
layer_data
in
values
.
items
():
# there should be at most layer_choice, optional_inputs, optional_input_size in layer_data
# add "_mutable_layer" as prefix so that they can be recovered later
layer_key
=
_construct_general_key
(
k
,
layer_name
)
if
layer_data
.
get
(
"layer_choice"
):
# filter out empty choice and no choice
layer_choice
=
layer_data
[
"layer_choice"
]
else
:
raise
ValueError
(
"No layer choice found in %s"
%
layer_key
)
if
layer_data
.
get
(
"optional_input_size"
):
input_size
=
layer_data
[
"optional_input_size"
]
if
isinstance
(
input_size
,
int
):
input_size
=
[
input_size
,
input_size
]
if
input_size
[
0
]
>
input_size
[
1
]
or
input_size
[
0
]
<
0
:
_logger
.
error
(
"Might not be able to handle optional_input_size < 0, please double check"
)
input_size
[
1
]
+=
1
else
:
_logger
.
info
(
"Optional input choices are set to empty by default in %s"
%
layer_key
)
input_size
=
[
0
,
1
]
if
layer_data
.
get
(
"optional_inputs"
):
total_state_size
=
len
(
layer_data
[
"optional_inputs"
])
**
(
input_size
[
1
]
-
1
)
else
:
_logger
.
info
(
"Optional inputs not found in %s"
%
layer_key
)
total_state_size
=
1
converted
=
{
layer_key
+
"/layer_choice"
:
{
"_type"
:
"choice"
,
"_value"
:
layer_choice
},
layer_key
+
"/optional_input_size"
:
{
"_type"
:
"randint"
,
"_value"
:
input_size
},
layer_key
+
"/optional_input_chosen_state"
:
{
"_type"
:
"randint"
,
"_value"
:
[
0
,
total_state_size
]
}
}
_logger
.
info
(
converted
)
ret
.
update
(
converted
)
return
ret
def
rewrite_nas_space
(
func
):
@
functools
.
wraps
(
func
)
def
wrap
(
self
,
search_space
):
search_space
=
convert_nas_search_space
(
search_space
)
return
func
(
self
,
search_space
)
return
wrap
src/sdk/pynni/nni/ppo_tuner/__init__.py
0 → 100644
View file @
55f48d27
src/sdk/pynni/nni/ppo_tuner/distri.py
0 → 100644
View file @
55f48d27
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
functions for sampling from hidden state
"""
import
tensorflow
as
tf
from
.util
import
fc
class
Pd
:
"""
A particular probability distribution
"""
def
flatparam
(
self
):
raise
NotImplementedError
def
mode
(
self
):
raise
NotImplementedError
def
neglogp
(
self
,
x
):
# Usually it's easier to define the negative logprob
raise
NotImplementedError
def
kl
(
self
,
other
):
raise
NotImplementedError
def
entropy
(
self
):
raise
NotImplementedError
def
sample
(
self
):
raise
NotImplementedError
def
logp
(
self
,
x
):
return
-
self
.
neglogp
(
x
)
def
get_shape
(
self
):
return
self
.
flatparam
().
shape
@
property
def
shape
(
self
):
return
self
.
get_shape
()
def
__getitem__
(
self
,
idx
):
return
self
.
__class__
(
self
.
flatparam
()[
idx
])
class
PdType
:
"""
Parametrized family of probability distributions
"""
def
pdclass
(
self
):
raise
NotImplementedError
def
pdfromflat
(
self
,
flat
,
mask
,
nsteps
,
size
,
is_act_model
):
return
self
.
pdclass
()(
flat
,
mask
,
nsteps
,
size
,
is_act_model
)
def
pdfromlatent
(
self
,
latent_vector
,
init_scale
,
init_bias
):
raise
NotImplementedError
def
param_shape
(
self
):
raise
NotImplementedError
def
sample_shape
(
self
):
raise
NotImplementedError
def
sample_dtype
(
self
):
raise
NotImplementedError
def
param_placeholder
(
self
,
prepend_shape
,
name
=
None
):
return
tf
.
placeholder
(
dtype
=
tf
.
float32
,
shape
=
prepend_shape
+
self
.
param_shape
(),
name
=
name
)
def
sample_placeholder
(
self
,
prepend_shape
,
name
=
None
):
return
tf
.
placeholder
(
dtype
=
self
.
sample_dtype
(),
shape
=
prepend_shape
+
self
.
sample_shape
(),
name
=
name
)
class
CategoricalPd
(
Pd
):
"""
categorical prossibility distribution
"""
def
__init__
(
self
,
logits
,
mask_npinf
,
nsteps
,
size
,
is_act_model
):
self
.
logits
=
logits
self
.
mask_npinf
=
mask_npinf
self
.
nsteps
=
nsteps
self
.
size
=
size
self
.
is_act_model
=
is_act_model
def
flatparam
(
self
):
return
self
.
logits
def
mode
(
self
):
return
tf
.
argmax
(
self
.
logits
,
axis
=-
1
)
@
property
def
mean
(
self
):
return
tf
.
nn
.
softmax
(
self
.
logits
)
def
neglogp
(
self
,
x
):
"""
return tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=x)
Note: we can't use sparse_softmax_cross_entropy_with_logits because
the implementation does not allow second-order derivatives...
"""
if
x
.
dtype
in
{
tf
.
uint8
,
tf
.
int32
,
tf
.
int64
}:
# one-hot encoding
x_shape_list
=
x
.
shape
.
as_list
()
logits_shape_list
=
self
.
logits
.
get_shape
().
as_list
()[:
-
1
]
for
xs
,
ls
in
zip
(
x_shape_list
,
logits_shape_list
):
if
xs
is
not
None
and
ls
is
not
None
:
assert
xs
==
ls
,
'shape mismatch: {} in x vs {} in logits'
.
format
(
xs
,
ls
)
x
=
tf
.
one_hot
(
x
,
self
.
logits
.
get_shape
().
as_list
()[
-
1
])
else
:
# already encoded
assert
x
.
shape
.
as_list
()
==
self
.
logits
.
shape
.
as_list
()
return
tf
.
nn
.
softmax_cross_entropy_with_logits_v2
(
logits
=
self
.
logits
,
labels
=
x
)
def
kl
(
self
,
other
):
"""kl"""
a0
=
self
.
logits
-
tf
.
reduce_max
(
self
.
logits
,
axis
=-
1
,
keepdims
=
True
)
a1
=
other
.
logits
-
tf
.
reduce_max
(
other
.
logits
,
axis
=-
1
,
keepdims
=
True
)
ea0
=
tf
.
exp
(
a0
)
ea1
=
tf
.
exp
(
a1
)
z0
=
tf
.
reduce_sum
(
ea0
,
axis
=-
1
,
keepdims
=
True
)
z1
=
tf
.
reduce_sum
(
ea1
,
axis
=-
1
,
keepdims
=
True
)
p0
=
ea0
/
z0
return
tf
.
reduce_sum
(
p0
*
(
a0
-
tf
.
log
(
z0
)
-
a1
+
tf
.
log
(
z1
)),
axis
=-
1
)
def
entropy
(
self
):
"""compute entropy"""
a0
=
self
.
logits
-
tf
.
reduce_max
(
self
.
logits
,
axis
=-
1
,
keepdims
=
True
)
ea0
=
tf
.
exp
(
a0
)
z0
=
tf
.
reduce_sum
(
ea0
,
axis
=-
1
,
keepdims
=
True
)
p0
=
ea0
/
z0
return
tf
.
reduce_sum
(
p0
*
(
tf
.
log
(
z0
)
-
a0
),
axis
=-
1
)
def
sample
(
self
):
"""sample from logits"""
if
not
self
.
is_act_model
:
re_res
=
tf
.
reshape
(
self
.
logits
,
[
-
1
,
self
.
nsteps
,
self
.
size
])
masked_res
=
tf
.
math
.
add
(
re_res
,
self
.
mask_npinf
)
re_masked_res
=
tf
.
reshape
(
masked_res
,
[
-
1
,
self
.
size
])
u
=
tf
.
random_uniform
(
tf
.
shape
(
re_masked_res
),
dtype
=
self
.
logits
.
dtype
)
return
tf
.
argmax
(
re_masked_res
-
tf
.
log
(
-
tf
.
log
(
u
)),
axis
=-
1
)
else
:
u
=
tf
.
random_uniform
(
tf
.
shape
(
self
.
logits
),
dtype
=
self
.
logits
.
dtype
)
return
tf
.
argmax
(
self
.
logits
-
tf
.
log
(
-
tf
.
log
(
u
)),
axis
=-
1
)
@
classmethod
def
fromflat
(
cls
,
flat
):
return
cls
(
flat
)
class
CategoricalPdType
(
PdType
):
"""
to create CategoricalPd
"""
def
__init__
(
self
,
ncat
,
nsteps
,
np_mask
,
is_act_model
):
self
.
ncat
=
ncat
self
.
nsteps
=
nsteps
self
.
np_mask
=
np_mask
self
.
is_act_model
=
is_act_model
def
pdclass
(
self
):
return
CategoricalPd
def
pdfromlatent
(
self
,
latent_vector
,
init_scale
=
1.0
,
init_bias
=
0.0
):
"""add fc and create CategoricalPd"""
pdparam
,
mask
,
mask_npinf
=
_matching_fc
(
latent_vector
,
'pi'
,
self
.
ncat
,
self
.
nsteps
,
init_scale
=
init_scale
,
init_bias
=
init_bias
,
np_mask
=
self
.
np_mask
,
is_act_model
=
self
.
is_act_model
)
return
self
.
pdfromflat
(
pdparam
,
mask_npinf
,
self
.
nsteps
,
self
.
ncat
,
self
.
is_act_model
),
pdparam
,
mask
,
mask_npinf
def
param_shape
(
self
):
return
[
self
.
ncat
]
def
sample_shape
(
self
):
return
[]
def
sample_dtype
(
self
):
return
tf
.
int32
def
_matching_fc
(
tensor
,
name
,
size
,
nsteps
,
init_scale
,
init_bias
,
np_mask
,
is_act_model
):
"""
add fc op, and add mask op when not in action mode
"""
if
tensor
.
shape
[
-
1
]
==
size
:
assert
False
return
tensor
else
:
mask
=
tf
.
get_variable
(
"act_mask"
,
dtype
=
tf
.
float32
,
initializer
=
np_mask
[
0
],
trainable
=
False
)
mask_npinf
=
tf
.
get_variable
(
"act_mask_npinf"
,
dtype
=
tf
.
float32
,
initializer
=
np_mask
[
1
],
trainable
=
False
)
res
=
fc
(
tensor
,
name
,
size
,
init_scale
=
init_scale
,
init_bias
=
init_bias
)
if
not
is_act_model
:
re_res
=
tf
.
reshape
(
res
,
[
-
1
,
nsteps
,
size
])
masked_res
=
tf
.
math
.
multiply
(
re_res
,
mask
)
re_masked_res
=
tf
.
reshape
(
masked_res
,
[
-
1
,
size
])
return
re_masked_res
,
mask
,
mask_npinf
else
:
return
res
,
mask
,
mask_npinf
src/sdk/pynni/nni/ppo_tuner/model.py
0 → 100644
View file @
55f48d27
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
the main model of policy/value network
"""
import
tensorflow
as
tf
from
.util
import
initialize
,
get_session
class
Model
:
"""
We use this object to :
__init__:
- Creates the step_model
- Creates the train_model
train():
- Make the training part (feedforward and retropropagation of gradients)
save/load():
- Save load the model
"""
def
__init__
(
self
,
*
,
policy
,
nbatch_act
,
nbatch_train
,
nsteps
,
ent_coef
,
vf_coef
,
max_grad_norm
,
microbatch_size
=
None
,
np_mask
=
None
):
"""
init
"""
self
.
sess
=
sess
=
get_session
()
with
tf
.
variable_scope
(
'ppo2_model'
,
reuse
=
tf
.
AUTO_REUSE
):
# CREATE OUR TWO MODELS
# act_model that is used for sampling
act_model
=
policy
(
nbatch_act
,
1
,
sess
,
np_mask
=
np_mask
,
is_act_model
=
True
)
# Train model for training
if
microbatch_size
is
None
:
train_model
=
policy
(
nbatch_train
,
nsteps
,
sess
,
np_mask
=
np_mask
,
is_act_model
=
False
)
else
:
train_model
=
policy
(
microbatch_size
,
nsteps
,
sess
,
np_mask
=
np_mask
,
is_act_model
=
False
)
# CREATE THE PLACEHOLDERS
self
.
A
=
A
=
train_model
.
pdtype
.
sample_placeholder
([
None
])
self
.
ADV
=
ADV
=
tf
.
placeholder
(
tf
.
float32
,
[
None
])
self
.
R
=
R
=
tf
.
placeholder
(
tf
.
float32
,
[
None
])
# Keep track of old actor
self
.
OLDNEGLOGPAC
=
OLDNEGLOGPAC
=
tf
.
placeholder
(
tf
.
float32
,
[
None
])
# Keep track of old critic
self
.
OLDVPRED
=
OLDVPRED
=
tf
.
placeholder
(
tf
.
float32
,
[
None
])
self
.
LR
=
LR
=
tf
.
placeholder
(
tf
.
float32
,
[])
# Cliprange
self
.
CLIPRANGE
=
CLIPRANGE
=
tf
.
placeholder
(
tf
.
float32
,
[])
neglogpac
=
train_model
.
pd
.
neglogp
(
A
)
# Calculate the entropy
# Entropy is used to improve exploration by limiting the premature convergence to suboptimal policy.
entropy
=
tf
.
reduce_mean
(
train_model
.
pd
.
entropy
())
# CALCULATE THE LOSS
# Total loss = Policy gradient loss - entropy * entropy coefficient + Value coefficient * value loss
# Clip the value to reduce variability during Critic training
# Get the predicted value
vpred
=
train_model
.
vf
vpredclipped
=
OLDVPRED
+
tf
.
clip_by_value
(
train_model
.
vf
-
OLDVPRED
,
-
CLIPRANGE
,
CLIPRANGE
)
# Unclipped value
vf_losses1
=
tf
.
square
(
vpred
-
R
)
# Clipped value
vf_losses2
=
tf
.
square
(
vpredclipped
-
R
)
vf_loss
=
.
5
*
tf
.
reduce_mean
(
tf
.
maximum
(
vf_losses1
,
vf_losses2
))
# Calculate ratio (pi current policy / pi old policy)
ratio
=
tf
.
exp
(
OLDNEGLOGPAC
-
neglogpac
)
# Defining Loss = - J is equivalent to max J
pg_losses
=
-
ADV
*
ratio
pg_losses2
=
-
ADV
*
tf
.
clip_by_value
(
ratio
,
1.0
-
CLIPRANGE
,
1.0
+
CLIPRANGE
)
# Final PG loss
pg_loss
=
tf
.
reduce_mean
(
tf
.
maximum
(
pg_losses
,
pg_losses2
))
approxkl
=
.
5
*
tf
.
reduce_mean
(
tf
.
square
(
neglogpac
-
OLDNEGLOGPAC
))
clipfrac
=
tf
.
reduce_mean
(
tf
.
to_float
(
tf
.
greater
(
tf
.
abs
(
ratio
-
1.0
),
CLIPRANGE
)))
# Total loss
loss
=
pg_loss
-
entropy
*
ent_coef
+
vf_loss
*
vf_coef
# UPDATE THE PARAMETERS USING LOSS
# 1. Get the model parameters
params
=
tf
.
trainable_variables
(
'ppo2_model'
)
# 2. Build our trainer
self
.
trainer
=
tf
.
train
.
AdamOptimizer
(
learning_rate
=
LR
,
epsilon
=
1e-5
)
# 3. Calculate the gradients
grads_and_var
=
self
.
trainer
.
compute_gradients
(
loss
,
params
)
grads
,
var
=
zip
(
*
grads_and_var
)
if
max_grad_norm
is
not
None
:
# Clip the gradients (normalize)
grads
,
_grad_norm
=
tf
.
clip_by_global_norm
(
grads
,
max_grad_norm
)
grads_and_var
=
list
(
zip
(
grads
,
var
))
# zip aggregate each gradient with parameters associated
# For instance zip(ABCD, xyza) => Ax, By, Cz, Da
self
.
grads
=
grads
self
.
var
=
var
self
.
_train_op
=
self
.
trainer
.
apply_gradients
(
grads_and_var
)
self
.
loss_names
=
[
'policy_loss'
,
'value_loss'
,
'policy_entropy'
,
'approxkl'
,
'clipfrac'
]
self
.
stats_list
=
[
pg_loss
,
vf_loss
,
entropy
,
approxkl
,
clipfrac
]
self
.
train_model
=
train_model
self
.
act_model
=
act_model
self
.
step
=
act_model
.
step
self
.
value
=
act_model
.
value
self
.
initial_state
=
act_model
.
initial_state
initialize
()
def
train
(
self
,
lr
,
cliprange
,
obs
,
returns
,
masks
,
actions
,
values
,
neglogpacs
,
states
=
None
):
"""
train the model.
Here we calculate advantage A(s,a) = R + yV(s') - V(s)
Returns = R + yV(s')
"""
advs
=
returns
-
values
# Normalize the advantages
advs
=
(
advs
-
advs
.
mean
())
/
(
advs
.
std
()
+
1e-8
)
td_map
=
{
self
.
train_model
.
X
:
obs
,
self
.
A
:
actions
,
self
.
ADV
:
advs
,
self
.
R
:
returns
,
self
.
LR
:
lr
,
self
.
CLIPRANGE
:
cliprange
,
self
.
OLDNEGLOGPAC
:
neglogpacs
,
self
.
OLDVPRED
:
values
}
if
states
is
not
None
:
td_map
[
self
.
train_model
.
S
]
=
states
td_map
[
self
.
train_model
.
M
]
=
masks
return
self
.
sess
.
run
(
self
.
stats_list
+
[
self
.
_train_op
],
td_map
)[:
-
1
]
src/sdk/pynni/nni/ppo_tuner/policy.py
0 → 100644
View file @
55f48d27
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
build policy/value network from model
"""
import
tensorflow
as
tf
from
.distri
import
CategoricalPdType
from
.util
import
lstm_model
,
fc
,
observation_placeholder
,
adjust_shape
class
PolicyWithValue
:
"""
Encapsulates fields and methods for RL policy and value function estimation with shared parameters
"""
def
__init__
(
self
,
env
,
observations
,
latent
,
estimate_q
=
False
,
vf_latent
=
None
,
sess
=
None
,
np_mask
=
None
,
is_act_model
=
False
,
**
tensors
):
"""
Parameters:
----------
env: RL environment
observations: tensorflow placeholder in which the observations will be fed
latent: latent state from which policy distribution parameters should be inferred
vf_latent: latent state from which value function should be inferred (if None, then latent is used)
sess: tensorflow session to run calculations in (if None, default session is used)
**tensors: tensorflow tensors for additional attributes such as state or mask
"""
self
.
X
=
observations
self
.
state
=
tf
.
constant
([])
self
.
initial_state
=
None
self
.
__dict__
.
update
(
tensors
)
vf_latent
=
vf_latent
if
vf_latent
is
not
None
else
latent
vf_latent
=
tf
.
layers
.
flatten
(
vf_latent
)
latent
=
tf
.
layers
.
flatten
(
latent
)
# Based on the action space, will select what probability distribution type
self
.
np_mask
=
np_mask
self
.
pdtype
=
CategoricalPdType
(
env
.
action_space
.
n
,
env
.
nsteps
,
np_mask
,
is_act_model
)
self
.
act_latent
=
latent
self
.
nh
=
env
.
action_space
.
n
self
.
pd
,
self
.
pi
,
self
.
mask
,
self
.
mask_npinf
=
self
.
pdtype
.
pdfromlatent
(
latent
,
init_scale
=
0.01
)
# Take an action
self
.
action
=
self
.
pd
.
sample
()
# Calculate the neg log of our probability
self
.
neglogp
=
self
.
pd
.
neglogp
(
self
.
action
)
self
.
sess
=
sess
or
tf
.
get_default_session
()
assert
estimate_q
is
False
self
.
vf
=
fc
(
vf_latent
,
'vf'
,
1
)
self
.
vf
=
self
.
vf
[:,
0
]
if
is_act_model
:
self
.
_build_model_for_step
()
def
_evaluate
(
self
,
variables
,
observation
,
**
extra_feed
):
sess
=
self
.
sess
feed_dict
=
{
self
.
X
:
adjust_shape
(
self
.
X
,
observation
)}
for
inpt_name
,
data
in
extra_feed
.
items
():
if
inpt_name
in
self
.
__dict__
.
keys
():
inpt
=
self
.
__dict__
[
inpt_name
]
if
isinstance
(
inpt
,
tf
.
Tensor
)
and
inpt
.
_op
.
type
==
'Placeholder'
:
feed_dict
[
inpt
]
=
adjust_shape
(
inpt
,
data
)
return
sess
.
run
(
variables
,
feed_dict
)
def
_build_model_for_step
(
self
):
# multiply with weight and apply mask on self.act_latent to generate
self
.
act_step
=
step
=
tf
.
placeholder
(
shape
=
(),
dtype
=
tf
.
int64
,
name
=
'act_step'
)
with
tf
.
variable_scope
(
'pi'
,
reuse
=
tf
.
AUTO_REUSE
):
from
.util
import
ortho_init
nin
=
self
.
act_latent
.
get_shape
()[
1
].
value
w
=
tf
.
get_variable
(
"w"
,
[
nin
,
self
.
nh
],
initializer
=
ortho_init
(
0.01
))
b
=
tf
.
get_variable
(
"b"
,
[
self
.
nh
],
initializer
=
tf
.
constant_initializer
(
0.0
))
logits
=
tf
.
matmul
(
self
.
act_latent
,
w
)
+
b
piece
=
tf
.
slice
(
self
.
mask
,
[
step
,
0
],
[
1
,
self
.
nh
])
re_piece
=
tf
.
reshape
(
piece
,
[
-
1
])
masked_logits
=
tf
.
math
.
multiply
(
logits
,
re_piece
)
npinf_piece
=
tf
.
slice
(
self
.
mask_npinf
,
[
step
,
0
],
[
1
,
self
.
nh
])
re_npinf_piece
=
tf
.
reshape
(
npinf_piece
,
[
-
1
])
def
sample
(
logits
,
mask_npinf
):
new_logits
=
tf
.
math
.
add
(
logits
,
mask_npinf
)
u
=
tf
.
random_uniform
(
tf
.
shape
(
new_logits
),
dtype
=
logits
.
dtype
)
return
tf
.
argmax
(
new_logits
-
tf
.
log
(
-
tf
.
log
(
u
)),
axis
=-
1
)
def
neglogp
(
logits
,
x
):
# return tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=x)
# Note: we can't use sparse_softmax_cross_entropy_with_logits because
# the implementation does not allow second-order derivatives...
if
x
.
dtype
in
{
tf
.
uint8
,
tf
.
int32
,
tf
.
int64
}:
# one-hot encoding
x_shape_list
=
x
.
shape
.
as_list
()
logits_shape_list
=
logits
.
get_shape
().
as_list
()[:
-
1
]
for
xs
,
ls
in
zip
(
x_shape_list
,
logits_shape_list
):
if
xs
is
not
None
and
ls
is
not
None
:
assert
xs
==
ls
,
'shape mismatch: {} in x vs {} in logits'
.
format
(
xs
,
ls
)
x
=
tf
.
one_hot
(
x
,
logits
.
get_shape
().
as_list
()[
-
1
])
else
:
# already encoded
assert
x
.
shape
.
as_list
()
==
logits
.
shape
.
as_list
()
return
tf
.
nn
.
softmax_cross_entropy_with_logits_v2
(
logits
=
logits
,
labels
=
x
)
self
.
act_action
=
sample
(
masked_logits
,
re_npinf_piece
)
self
.
act_neglogp
=
neglogp
(
masked_logits
,
self
.
act_action
)
def
step
(
self
,
step
,
observation
,
**
extra_feed
):
"""
Compute next action(s) given the observation(s)
Parameters:
----------
observation: observation data (either single or a batch)
**extra_feed: additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__)
Returns:
-------
(action, value estimate, next state, negative log likelihood of the action under current policy parameters) tuple
"""
extra_feed
[
'act_step'
]
=
step
a
,
v
,
state
,
neglogp
=
self
.
_evaluate
([
self
.
act_action
,
self
.
vf
,
self
.
state
,
self
.
act_neglogp
],
observation
,
**
extra_feed
)
if
state
.
size
==
0
:
state
=
None
return
a
,
v
,
state
,
neglogp
def
value
(
self
,
ob
,
*
args
,
**
kwargs
):
"""
Compute value estimate(s) given the observation(s)
Parameters:
----------
observation: observation data (either single or a batch)
**extra_feed: additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__)
Returns:
-------
value estimate
"""
return
self
.
_evaluate
(
self
.
vf
,
ob
,
*
args
,
**
kwargs
)
def
build_lstm_policy
(
model_config
,
value_network
=
None
,
estimate_q
=
False
,
**
policy_kwargs
):
"""
build lstm policy and value network, they share the same lstm network.
the parameters all use their default values.
"""
policy_network
=
lstm_model
(
**
policy_kwargs
)
def
policy_fn
(
nbatch
=
None
,
nsteps
=
None
,
sess
=
None
,
observ_placeholder
=
None
,
np_mask
=
None
,
is_act_model
=
False
):
ob_space
=
model_config
.
observation_space
X
=
observ_placeholder
if
observ_placeholder
is
not
None
else
observation_placeholder
(
ob_space
,
batch_size
=
nbatch
)
extra_tensors
=
{}
# encode_observation is not necessary anymore as we use embedding_lookup
encoded_x
=
X
with
tf
.
variable_scope
(
'pi'
,
reuse
=
tf
.
AUTO_REUSE
):
policy_latent
=
policy_network
(
encoded_x
,
1
,
model_config
.
observation_space
.
n
)
if
isinstance
(
policy_latent
,
tuple
):
policy_latent
,
recurrent_tensors
=
policy_latent
if
recurrent_tensors
is
not
None
:
# recurrent architecture, need a few more steps
nenv
=
nbatch
//
nsteps
assert
nenv
>
0
,
'Bad input for recurrent policy: batch size {} smaller than nsteps {}'
.
format
(
nbatch
,
nsteps
)
policy_latent
,
recurrent_tensors
=
policy_network
(
encoded_x
,
nenv
,
model_config
.
observation_space
.
n
)
extra_tensors
.
update
(
recurrent_tensors
)
_v_net
=
value_network
assert
_v_net
is
None
or
_v_net
==
'shared'
vf_latent
=
policy_latent
policy
=
PolicyWithValue
(
env
=
model_config
,
observations
=
X
,
latent
=
policy_latent
,
vf_latent
=
vf_latent
,
sess
=
sess
,
estimate_q
=
estimate_q
,
np_mask
=
np_mask
,
is_act_model
=
is_act_model
,
**
extra_tensors
)
return
policy
return
policy_fn
src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py
0 → 100644
View file @
55f48d27
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
ppo_tuner.py including:
class PPOTuner
"""
import
os
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
""
import
copy
import
logging
import
numpy
as
np
import
json_tricks
from
gym
import
spaces
import
nni
from
nni.tuner
import
Tuner
from
nni.utils
import
OptimizeMode
,
extract_scalar_reward
from
.model
import
Model
from
.util
import
set_global_seeds
from
.policy
import
build_lstm_policy
logger
=
logging
.
getLogger
(
'ppo_tuner_AutoML'
)
def
constfn
(
val
):
"""wrap as function"""
def
f
(
_
):
return
val
return
f
class
ModelConfig
:
"""
Configurations of the PPO model
"""
def
__init__
(
self
):
self
.
observation_space
=
None
self
.
action_space
=
None
self
.
num_envs
=
0
self
.
nsteps
=
0
self
.
ent_coef
=
0.0
self
.
lr
=
3e-4
self
.
vf_coef
=
0.5
self
.
max_grad_norm
=
0.5
self
.
gamma
=
0.99
self
.
lam
=
0.95
self
.
cliprange
=
0.2
self
.
embedding_size
=
None
# the embedding is for each action
self
.
noptepochs
=
4
# number of training epochs per update
self
.
total_timesteps
=
5000
# number of timesteps (i.e. number of actions taken in the environment)
self
.
nminibatches
=
4
# number of training minibatches per update. For recurrent policies,
# should be smaller or equal than number of environments run in parallel.
class
TrialsInfo
:
"""
Informations of each trial from one model inference
"""
def
__init__
(
self
,
obs
,
actions
,
values
,
neglogpacs
,
dones
,
last_value
,
inf_batch_size
):
self
.
iter
=
0
self
.
obs
=
obs
self
.
actions
=
actions
self
.
values
=
values
self
.
neglogpacs
=
neglogpacs
self
.
dones
=
dones
self
.
last_value
=
last_value
self
.
rewards
=
None
self
.
returns
=
None
self
.
inf_batch_size
=
inf_batch_size
#self.states = None
def
get_next
(
self
):
"""
get actions of the next trial
"""
if
self
.
iter
>=
self
.
inf_batch_size
:
return
None
,
None
actions
=
[]
for
step
in
self
.
actions
:
actions
.
append
(
step
[
self
.
iter
])
self
.
iter
+=
1
return
self
.
iter
-
1
,
actions
def
update_rewards
(
self
,
rewards
,
returns
):
"""
after the trial is finished, reward and return of this trial is updated
"""
self
.
rewards
=
rewards
self
.
returns
=
returns
def
convert_shape
(
self
):
"""
convert shape
"""
def
sf01
(
arr
):
"""
swap and then flatten axes 0 and 1
"""
s
=
arr
.
shape
return
arr
.
swapaxes
(
0
,
1
).
reshape
(
s
[
0
]
*
s
[
1
],
*
s
[
2
:])
self
.
obs
=
sf01
(
self
.
obs
)
self
.
returns
=
sf01
(
self
.
returns
)
self
.
dones
=
sf01
(
self
.
dones
)
self
.
actions
=
sf01
(
self
.
actions
)
self
.
values
=
sf01
(
self
.
values
)
self
.
neglogpacs
=
sf01
(
self
.
neglogpacs
)
class
PPOModel
:
"""
PPO Model
"""
def
__init__
(
self
,
model_config
,
mask
):
self
.
model_config
=
model_config
self
.
states
=
None
# initial state of lstm in policy/value network
self
.
nupdates
=
None
# the number of func train is invoked, used to tune lr and cliprange
self
.
cur_update
=
1
# record the current update
self
.
np_mask
=
mask
# record the mask of each action within one trial
set_global_seeds
(
None
)
assert
isinstance
(
self
.
model_config
.
lr
,
float
)
self
.
lr
=
constfn
(
self
.
model_config
.
lr
)
assert
isinstance
(
self
.
model_config
.
cliprange
,
float
)
self
.
cliprange
=
constfn
(
self
.
model_config
.
cliprange
)
# build lstm policy network, value share the same network
policy
=
build_lstm_policy
(
model_config
)
# Get the nb of env
nenvs
=
model_config
.
num_envs
# Calculate the batch_size
self
.
nbatch
=
nbatch
=
nenvs
*
model_config
.
nsteps
# num of record per update
nbatch_train
=
nbatch
//
model_config
.
nminibatches
# get batch size
# self.nupdates is used to tune lr and cliprange
self
.
nupdates
=
self
.
model_config
.
total_timesteps
//
self
.
nbatch
# Instantiate the model object (that creates act_model and train_model)
self
.
model
=
Model
(
policy
=
policy
,
nbatch_act
=
nenvs
,
nbatch_train
=
nbatch_train
,
nsteps
=
model_config
.
nsteps
,
ent_coef
=
model_config
.
ent_coef
,
vf_coef
=
model_config
.
vf_coef
,
max_grad_norm
=
model_config
.
max_grad_norm
,
np_mask
=
self
.
np_mask
)
self
.
states
=
self
.
model
.
initial_state
logger
.
info
(
'=== finished PPOModel initialization'
)
def
inference
(
self
,
num
):
"""
generate actions along with related info from policy network.
observation is the action of the last step.
Parameters:
----------
num: the number of trials to generate
"""
# Here, we init the lists that will contain the mb of experiences
mb_obs
,
mb_actions
,
mb_values
,
mb_dones
,
mb_neglogpacs
=
[],
[],
[],
[],
[]
# initial observation
# use the (n+1)th embedding to represent the first step action
first_step_ob
=
self
.
model_config
.
action_space
.
n
obs
=
[
first_step_ob
for
_
in
range
(
num
)]
dones
=
[
True
for
_
in
range
(
num
)]
states
=
self
.
states
# For n in range number of steps
for
cur_step
in
range
(
self
.
model_config
.
nsteps
):
# Given observations, get action value and neglopacs
# We already have self.obs because Runner superclass run self.obs[:] = env.reset() on init
actions
,
values
,
states
,
neglogpacs
=
self
.
model
.
step
(
cur_step
,
obs
,
S
=
states
,
M
=
dones
)
mb_obs
.
append
(
obs
.
copy
())
mb_actions
.
append
(
actions
)
mb_values
.
append
(
values
)
mb_neglogpacs
.
append
(
neglogpacs
)
mb_dones
.
append
(
dones
)
# Take actions in env and look the results
# Infos contains a ton of useful informations
obs
[:]
=
actions
if
cur_step
==
self
.
model_config
.
nsteps
-
1
:
dones
=
[
True
for
_
in
range
(
num
)]
else
:
dones
=
[
False
for
_
in
range
(
num
)]
#batch of steps to batch of rollouts
np_obs
=
np
.
asarray
(
obs
)
mb_obs
=
np
.
asarray
(
mb_obs
,
dtype
=
np_obs
.
dtype
)
mb_actions
=
np
.
asarray
(
mb_actions
)
mb_values
=
np
.
asarray
(
mb_values
,
dtype
=
np
.
float32
)
mb_neglogpacs
=
np
.
asarray
(
mb_neglogpacs
,
dtype
=
np
.
float32
)
mb_dones
=
np
.
asarray
(
mb_dones
,
dtype
=
np
.
bool
)
last_values
=
self
.
model
.
value
(
np_obs
,
S
=
states
,
M
=
dones
)
return
mb_obs
,
mb_actions
,
mb_values
,
mb_neglogpacs
,
mb_dones
,
last_values
def
compute_rewards
(
self
,
trials_info
,
trials_result
):
"""
compute the rewards of the trials in trials_info based on trials_result,
and update the rewards in trials_info
Parameters:
----------
trials_info: info of the generated trials
trials_result: final results (e.g., acc) of the generated trials
"""
mb_rewards
=
np
.
asarray
([
trials_result
for
_
in
trials_info
.
actions
],
dtype
=
np
.
float32
)
# discount/bootstrap off value fn
mb_returns
=
np
.
zeros_like
(
mb_rewards
)
mb_advs
=
np
.
zeros_like
(
mb_rewards
)
lastgaelam
=
0
last_dones
=
np
.
asarray
([
True
for
_
in
trials_result
],
dtype
=
np
.
bool
)
# ugly
for
t
in
reversed
(
range
(
self
.
model_config
.
nsteps
)):
if
t
==
self
.
model_config
.
nsteps
-
1
:
nextnonterminal
=
1.0
-
last_dones
nextvalues
=
trials_info
.
last_value
else
:
nextnonterminal
=
1.0
-
trials_info
.
dones
[
t
+
1
]
nextvalues
=
trials_info
.
values
[
t
+
1
]
delta
=
mb_rewards
[
t
]
+
self
.
model_config
.
gamma
*
nextvalues
*
nextnonterminal
-
trials_info
.
values
[
t
]
mb_advs
[
t
]
=
lastgaelam
=
delta
+
self
.
model_config
.
gamma
*
self
.
model_config
.
lam
*
nextnonterminal
*
lastgaelam
mb_returns
=
mb_advs
+
trials_info
.
values
trials_info
.
update_rewards
(
mb_rewards
,
mb_returns
)
trials_info
.
convert_shape
()
def
train
(
self
,
trials_info
,
nenvs
):
"""
train the policy/value network using trials_info
Parameters:
----------
trials_info: complete info of the generated trials from the previous inference
nenvs: the batch size of the (previous) inference
"""
# keep frac decay for future optimization
if
self
.
cur_update
<=
self
.
nupdates
:
frac
=
1.0
-
(
self
.
cur_update
-
1.0
)
/
self
.
nupdates
else
:
logger
.
warning
(
'current update (self.cur_update) %d has exceeded total updates (self.nupdates) %d'
,
self
.
cur_update
,
self
.
nupdates
)
frac
=
1.0
-
(
self
.
nupdates
-
1.0
)
/
self
.
nupdates
lrnow
=
self
.
lr
(
frac
)
cliprangenow
=
self
.
cliprange
(
frac
)
self
.
cur_update
+=
1
states
=
self
.
states
assert
states
is
not
None
# recurrent version
assert
nenvs
%
self
.
model_config
.
nminibatches
==
0
envsperbatch
=
nenvs
//
self
.
model_config
.
nminibatches
envinds
=
np
.
arange
(
nenvs
)
flatinds
=
np
.
arange
(
nenvs
*
self
.
model_config
.
nsteps
).
reshape
(
nenvs
,
self
.
model_config
.
nsteps
)
for
_
in
range
(
self
.
model_config
.
noptepochs
):
np
.
random
.
shuffle
(
envinds
)
for
start
in
range
(
0
,
nenvs
,
envsperbatch
):
end
=
start
+
envsperbatch
mbenvinds
=
envinds
[
start
:
end
]
mbflatinds
=
flatinds
[
mbenvinds
].
ravel
()
slices
=
(
arr
[
mbflatinds
]
for
arr
in
(
trials_info
.
obs
,
trials_info
.
returns
,
trials_info
.
dones
,
trials_info
.
actions
,
trials_info
.
values
,
trials_info
.
neglogpacs
))
mbstates
=
states
[
mbenvinds
]
self
.
model
.
train
(
lrnow
,
cliprangenow
,
*
slices
,
mbstates
)
class
PPOTuner
(
Tuner
):
"""
PPOTuner
"""
def
__init__
(
self
,
optimize_mode
,
trials_per_update
=
20
,
epochs_per_update
=
4
,
minibatch_size
=
4
,
ent_coef
=
0.0
,
lr
=
3e-4
,
vf_coef
=
0.5
,
max_grad_norm
=
0.5
,
gamma
=
0.99
,
lam
=
0.95
,
cliprange
=
0.2
):
"""
initialization, PPO model is not initialized here as search space is not received yet.
Parameters:
----------
optimize_mode: maximize or minimize
trials_per_update: number of trials to have for each model update
epochs_per_update: number of epochs to run for each model update
minibatch_size: minibatch size (number of trials) for the update
ent_coef: policy entropy coefficient in the optimization objective
lr: learning rate of the model (lstm network), constant
vf_coef: value function loss coefficient in the optimization objective
max_grad_norm: gradient norm clipping coefficient
gamma: discounting factor
lam: advantage estimation discounting factor (lambda in the paper)
cliprange: cliprange in the PPO algorithm, constant
"""
self
.
optimize_mode
=
OptimizeMode
(
optimize_mode
)
self
.
model_config
=
ModelConfig
()
self
.
model
=
None
self
.
search_space
=
None
self
.
running_trials
=
{}
# key: parameter_id, value: actions/states/etc.
self
.
inf_batch_size
=
trials_per_update
# number of trials to generate in one inference
self
.
first_inf
=
True
# indicate whether it is the first time to inference new trials
self
.
trials_result
=
[
None
for
_
in
range
(
self
.
inf_batch_size
)]
# results of finished trials
self
.
credit
=
0
# record the unsatisfied trial requests
self
.
param_ids
=
[]
self
.
finished_trials
=
0
self
.
chosen_arch_template
=
{}
self
.
actions_spaces
=
None
self
.
actions_to_config
=
None
self
.
full_act_space
=
None
self
.
trials_info
=
None
self
.
all_trials
=
{}
# used to dedup the same trial, key: config, value: final result
self
.
model_config
.
num_envs
=
self
.
inf_batch_size
self
.
model_config
.
noptepochs
=
epochs_per_update
self
.
model_config
.
nminibatches
=
minibatch_size
self
.
send_trial_callback
=
None
logger
.
info
(
'=== finished PPOTuner initialization'
)
def
_process_one_nas_space
(
self
,
block_name
,
block_space
):
"""
process nas space to determine observation space and action space
Parameters:
----------
block_name: the name of the mutable block
block_space: search space of this mutable block
Returns:
----------
actions_spaces: list of the space of each action
actions_to_config: the mapping from action to generated configuration
"""
actions_spaces
=
[]
actions_to_config
=
[]
block_arch_temp
=
{}
for
l_name
,
layer
in
block_space
.
items
():
chosen_layer_temp
=
{}
if
len
(
layer
[
'layer_choice'
])
>
1
:
actions_spaces
.
append
(
layer
[
'layer_choice'
])
actions_to_config
.
append
((
block_name
,
l_name
,
'chosen_layer'
))
chosen_layer_temp
[
'chosen_layer'
]
=
None
else
:
assert
len
(
layer
[
'layer_choice'
])
==
1
chosen_layer_temp
[
'chosen_layer'
]
=
layer
[
'layer_choice'
][
0
]
if
layer
[
'optional_input_size'
]
not
in
[
0
,
1
,
[
0
,
1
]]:
raise
ValueError
(
'Optional_input_size can only be 0, 1, or [0, 1], but the pecified one is %s'
%
(
layer
[
'optional_input_size'
]))
if
isinstance
(
layer
[
'optional_input_size'
],
list
):
actions_spaces
.
append
([
"None"
,
*
layer
[
'optional_inputs'
]])
actions_to_config
.
append
((
block_name
,
l_name
,
'chosen_inputs'
))
chosen_layer_temp
[
'chosen_inputs'
]
=
None
elif
layer
[
'optional_input_size'
]
==
1
:
actions_spaces
.
append
(
layer
[
'optional_inputs'
])
actions_to_config
.
append
((
block_name
,
l_name
,
'chosen_inputs'
))
chosen_layer_temp
[
'chosen_inputs'
]
=
None
elif
layer
[
'optional_input_size'
]
==
0
:
chosen_layer_temp
[
'chosen_inputs'
]
=
[]
else
:
raise
ValueError
(
'invalid type and value of optional_input_size'
)
block_arch_temp
[
l_name
]
=
chosen_layer_temp
self
.
chosen_arch_template
[
block_name
]
=
block_arch_temp
return
actions_spaces
,
actions_to_config
def
_process_nas_space
(
self
,
search_space
):
"""
process nas search space to get action/observation space
"""
actions_spaces
=
[]
actions_to_config
=
[]
for
b_name
,
block
in
search_space
.
items
():
if
block
[
'_type'
]
!=
'mutable_layer'
:
raise
ValueError
(
'PPOTuner only accept mutable_layer type in search space, but the current one is %s'
%
(
block
[
'_type'
]))
block
=
block
[
'_value'
]
act
,
act_map
=
self
.
_process_one_nas_space
(
b_name
,
block
)
actions_spaces
.
extend
(
act
)
actions_to_config
.
extend
(
act_map
)
# calculate observation space
dedup
=
{}
for
step
in
actions_spaces
:
for
action
in
step
:
dedup
[
action
]
=
1
full_act_space
=
[
act
for
act
,
_
in
dedup
.
items
()]
assert
len
(
full_act_space
)
==
len
(
dedup
)
observation_space
=
len
(
full_act_space
)
nsteps
=
len
(
actions_spaces
)
return
actions_spaces
,
actions_to_config
,
full_act_space
,
observation_space
,
nsteps
def
_generate_action_mask
(
self
):
"""
different step could have different action space. to deal with this case, we merge all the
possible actions into one action space, and use mask to indicate available actions for each step
"""
two_masks
=
[]
mask
=
[]
for
acts
in
self
.
actions_spaces
:
one_mask
=
[
0
for
_
in
range
(
len
(
self
.
full_act_space
))]
for
act
in
acts
:
idx
=
self
.
full_act_space
.
index
(
act
)
one_mask
[
idx
]
=
1
mask
.
append
(
one_mask
)
two_masks
.
append
(
mask
)
mask
=
[]
for
acts
in
self
.
actions_spaces
:
one_mask
=
[
-
np
.
inf
for
_
in
range
(
len
(
self
.
full_act_space
))]
for
act
in
acts
:
idx
=
self
.
full_act_space
.
index
(
act
)
one_mask
[
idx
]
=
0
mask
.
append
(
one_mask
)
two_masks
.
append
(
mask
)
return
np
.
asarray
(
two_masks
,
dtype
=
np
.
float32
)
def
update_search_space
(
self
,
search_space
):
"""
get search space, currently the space only includes that for NAS
Parameters:
----------
search_space: search space for NAS
Returns:
-------
no return
"""
logger
.
info
(
'=== update search space %s'
,
search_space
)
assert
self
.
search_space
is
None
self
.
search_space
=
search_space
assert
self
.
model_config
.
observation_space
is
None
assert
self
.
model_config
.
action_space
is
None
self
.
actions_spaces
,
self
.
actions_to_config
,
self
.
full_act_space
,
obs_space
,
nsteps
=
self
.
_process_nas_space
(
search_space
)
self
.
model_config
.
observation_space
=
spaces
.
Discrete
(
obs_space
)
self
.
model_config
.
action_space
=
spaces
.
Discrete
(
obs_space
)
self
.
model_config
.
nsteps
=
nsteps
# generate mask in numpy
mask
=
self
.
_generate_action_mask
()
assert
self
.
model
is
None
self
.
model
=
PPOModel
(
self
.
model_config
,
mask
)
def
_actions_to_config
(
self
,
actions
):
"""
given actions, to generate the corresponding trial configuration
"""
chosen_arch
=
copy
.
deepcopy
(
self
.
chosen_arch_template
)
for
cnt
,
act
in
enumerate
(
actions
):
act_name
=
self
.
full_act_space
[
act
]
(
block_name
,
layer_name
,
key
)
=
self
.
actions_to_config
[
cnt
]
if
key
==
'chosen_inputs'
:
if
act_name
==
'None'
:
chosen_arch
[
block_name
][
layer_name
][
key
]
=
[]
else
:
chosen_arch
[
block_name
][
layer_name
][
key
]
=
[
act_name
]
elif
key
==
'chosen_layer'
:
chosen_arch
[
block_name
][
layer_name
][
key
]
=
act_name
else
:
raise
ValueError
(
'unrecognized key: {0}'
.
format
(
key
))
return
chosen_arch
def
generate_multiple_parameters
(
self
,
parameter_id_list
,
**
kwargs
):
"""
Returns multiple sets of trial (hyper-)parameters, as iterable of serializable objects.
"""
result
=
[]
self
.
send_trial_callback
=
kwargs
[
'st_callback'
]
for
parameter_id
in
parameter_id_list
:
had_exception
=
False
try
:
logger
.
debug
(
"generating param for %s"
,
parameter_id
)
res
=
self
.
generate_parameters
(
parameter_id
,
**
kwargs
)
except
nni
.
NoMoreTrialError
:
had_exception
=
True
if
not
had_exception
:
result
.
append
(
res
)
return
result
def
generate_parameters
(
self
,
parameter_id
,
**
kwargs
):
"""
generate parameters, if no trial configration for now, self.credit plus 1 to send the config later
"""
if
self
.
first_inf
:
self
.
trials_result
=
[
None
for
_
in
range
(
self
.
inf_batch_size
)]
mb_obs
,
mb_actions
,
mb_values
,
mb_neglogpacs
,
mb_dones
,
last_values
=
self
.
model
.
inference
(
self
.
inf_batch_size
)
self
.
trials_info
=
TrialsInfo
(
mb_obs
,
mb_actions
,
mb_values
,
mb_neglogpacs
,
mb_dones
,
last_values
,
self
.
inf_batch_size
)
self
.
first_inf
=
False
trial_info_idx
,
actions
=
self
.
trials_info
.
get_next
()
if
trial_info_idx
is
None
:
self
.
credit
+=
1
self
.
param_ids
.
append
(
parameter_id
)
raise
nni
.
NoMoreTrialError
(
'no more parameters now.'
)
self
.
running_trials
[
parameter_id
]
=
trial_info_idx
new_config
=
self
.
_actions_to_config
(
actions
)
return
new_config
def
_next_round_inference
(
self
):
"""
"""
self
.
finished_trials
=
0
self
.
model
.
compute_rewards
(
self
.
trials_info
,
self
.
trials_result
)
self
.
model
.
train
(
self
.
trials_info
,
self
.
inf_batch_size
)
self
.
running_trials
=
{}
# generate new trials
self
.
trials_result
=
[
None
for
_
in
range
(
self
.
inf_batch_size
)]
mb_obs
,
mb_actions
,
mb_values
,
mb_neglogpacs
,
mb_dones
,
last_values
=
self
.
model
.
inference
(
self
.
inf_batch_size
)
self
.
trials_info
=
TrialsInfo
(
mb_obs
,
mb_actions
,
mb_values
,
mb_neglogpacs
,
mb_dones
,
last_values
,
self
.
inf_batch_size
)
# check credit and submit new trials
for
_
in
range
(
self
.
credit
):
trial_info_idx
,
actions
=
self
.
trials_info
.
get_next
()
if
trial_info_idx
is
None
:
logger
.
warning
(
'No enough trial config, trials_per_update is suggested to be larger than trialConcurrency'
)
break
assert
self
.
param_ids
param_id
=
self
.
param_ids
.
pop
()
self
.
running_trials
[
param_id
]
=
trial_info_idx
new_config
=
self
.
_actions_to_config
(
actions
)
self
.
send_trial_callback
(
param_id
,
new_config
)
self
.
credit
-=
1
def
receive_trial_result
(
self
,
parameter_id
,
parameters
,
value
,
**
kwargs
):
"""
receive trial's result. if the number of finished trials equals self.inf_batch_size, start the next update to
train the model
"""
trial_info_idx
=
self
.
running_trials
.
pop
(
parameter_id
,
None
)
assert
trial_info_idx
is
not
None
value
=
extract_scalar_reward
(
value
)
if
self
.
optimize_mode
==
OptimizeMode
.
Minimize
:
value
=
-
value
self
.
trials_result
[
trial_info_idx
]
=
value
self
.
finished_trials
+=
1
if
self
.
finished_trials
==
self
.
inf_batch_size
:
self
.
_next_round_inference
()
def
trial_end
(
self
,
parameter_id
,
success
,
**
kwargs
):
"""
to deal with trial failure
"""
if
not
success
:
if
parameter_id
not
in
self
.
running_trials
:
logger
.
warning
(
'The trial is failed, but self.running_trial does not have this trial'
)
return
trial_info_idx
=
self
.
running_trials
.
pop
(
parameter_id
,
None
)
assert
trial_info_idx
is
not
None
# use mean of finished trials as the result of this failed trial
values
=
[
val
for
val
in
self
.
trials_result
if
val
is
not
None
]
logger
.
warning
(
'zql values: {0}'
.
format
(
values
))
self
.
trials_result
[
trial_info_idx
]
=
(
sum
(
values
)
/
len
(
values
))
if
len
(
values
)
>
0
else
0
self
.
finished_trials
+=
1
if
self
.
finished_trials
==
self
.
inf_batch_size
:
self
.
_next_round_inference
()
def
import_data
(
self
,
data
):
"""
Import additional data for tuning
Parameters
----------
data: a list of dictionarys, each of which has at least two keys, 'parameter' and 'value'
"""
logger
.
warning
(
'PPOTuner cannot leverage imported data.'
)
src/sdk/pynni/nni/ppo_tuner/requirements.txt
0 → 100644
View file @
55f48d27
enum34
gym
tensorflow
\ No newline at end of file
src/sdk/pynni/nni/ppo_tuner/util.py
0 → 100644
View file @
55f48d27
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
util functions
"""
import
os
import
random
import
multiprocessing
import
numpy
as
np
import
tensorflow
as
tf
from
gym.spaces
import
Discrete
,
Box
,
MultiDiscrete
def
set_global_seeds
(
i
):
"""set global seeds"""
rank
=
0
myseed
=
i
+
1000
*
rank
if
i
is
not
None
else
None
tf
.
set_random_seed
(
myseed
)
np
.
random
.
seed
(
myseed
)
random
.
seed
(
myseed
)
def
batch_to_seq
(
h
,
nbatch
,
nsteps
,
flat
=
False
):
"""convert from batch to sequence"""
if
flat
:
h
=
tf
.
reshape
(
h
,
[
nbatch
,
nsteps
])
else
:
h
=
tf
.
reshape
(
h
,
[
nbatch
,
nsteps
,
-
1
])
return
[
tf
.
squeeze
(
v
,
[
1
])
for
v
in
tf
.
split
(
axis
=
1
,
num_or_size_splits
=
nsteps
,
value
=
h
)]
def
seq_to_batch
(
h
,
flat
=
False
):
"""convert from sequence to batch"""
shape
=
h
[
0
].
get_shape
().
as_list
()
if
not
flat
:
assert
len
(
shape
)
>
1
nh
=
h
[
0
].
get_shape
()[
-
1
].
value
return
tf
.
reshape
(
tf
.
concat
(
axis
=
1
,
values
=
h
),
[
-
1
,
nh
])
else
:
return
tf
.
reshape
(
tf
.
stack
(
values
=
h
,
axis
=
1
),
[
-
1
])
def
lstm
(
xs
,
ms
,
s
,
scope
,
nh
,
init_scale
=
1.0
):
"""lstm cell"""
nbatch
,
nin
=
[
v
.
value
for
v
in
xs
[
0
].
get_shape
()]
with
tf
.
variable_scope
(
scope
):
wx
=
tf
.
get_variable
(
"wx"
,
[
nin
,
nh
*
4
],
initializer
=
ortho_init
(
init_scale
))
wh
=
tf
.
get_variable
(
"wh"
,
[
nh
,
nh
*
4
],
initializer
=
ortho_init
(
init_scale
))
b
=
tf
.
get_variable
(
"b"
,
[
nh
*
4
],
initializer
=
tf
.
constant_initializer
(
0.0
))
c
,
h
=
tf
.
split
(
axis
=
1
,
num_or_size_splits
=
2
,
value
=
s
)
for
idx
,
(
x
,
m
)
in
enumerate
(
zip
(
xs
,
ms
)):
c
=
c
*
(
1
-
m
)
h
=
h
*
(
1
-
m
)
z
=
tf
.
matmul
(
x
,
wx
)
+
tf
.
matmul
(
h
,
wh
)
+
b
i
,
f
,
o
,
u
=
tf
.
split
(
axis
=
1
,
num_or_size_splits
=
4
,
value
=
z
)
i
=
tf
.
nn
.
sigmoid
(
i
)
f
=
tf
.
nn
.
sigmoid
(
f
)
o
=
tf
.
nn
.
sigmoid
(
o
)
u
=
tf
.
tanh
(
u
)
c
=
f
*
c
+
i
*
u
h
=
o
*
tf
.
tanh
(
c
)
xs
[
idx
]
=
h
s
=
tf
.
concat
(
axis
=
1
,
values
=
[
c
,
h
])
return
xs
,
s
def
lstm_model
(
nlstm
=
128
,
layer_norm
=
False
):
"""
Builds LSTM (Long-Short Term Memory) network to be used in a policy.
Note that the resulting function returns not only the output of the LSTM
(i.e. hidden state of lstm for each step in the sequence), but also a dictionary
with auxiliary tensors to be set as policy attributes.
Specifically,
S is a placeholder to feed current state (LSTM state has to be managed outside policy)
M is a placeholder for the mask (used to mask out observations after the end of the episode, but can be used for other purposes too)
initial_state is a numpy array containing initial lstm state (usually zeros)
state is the output LSTM state (to be fed into S at the next call)
An example of usage of lstm-based policy can be found here: common/tests/test_doc_examples.py/test_lstm_example
Parameters:
----------
nlstm: int LSTM hidden state size
layer_norm: bool if True, layer-normalized version of LSTM is used
Returns:
-------
function that builds LSTM with a given input tensor / placeholder
"""
def
network_fn
(
X
,
nenv
=
1
,
obs_size
=-
1
):
with
tf
.
variable_scope
(
"emb"
,
reuse
=
tf
.
AUTO_REUSE
):
w_emb
=
tf
.
get_variable
(
"w_emb"
,
[
obs_size
+
1
,
32
])
X
=
tf
.
nn
.
embedding_lookup
(
w_emb
,
X
)
nbatch
=
X
.
shape
[
0
]
nsteps
=
nbatch
//
nenv
h
=
tf
.
layers
.
flatten
(
X
)
M
=
tf
.
placeholder
(
tf
.
float32
,
[
nbatch
])
#mask (done t-1)
S
=
tf
.
placeholder
(
tf
.
float32
,
[
nenv
,
2
*
nlstm
])
#states
xs
=
batch_to_seq
(
h
,
nenv
,
nsteps
)
ms
=
batch_to_seq
(
M
,
nenv
,
nsteps
)
assert
not
layer_norm
h5
,
snew
=
lstm
(
xs
,
ms
,
S
,
scope
=
'lstm'
,
nh
=
nlstm
)
h
=
seq_to_batch
(
h5
)
initial_state
=
np
.
zeros
(
S
.
shape
.
as_list
(),
dtype
=
float
)
return
h
,
{
'S'
:
S
,
'M'
:
M
,
'state'
:
snew
,
'initial_state'
:
initial_state
}
return
network_fn
def
ortho_init
(
scale
=
1.0
):
"""init approach"""
def
_ortho_init
(
shape
,
dtype
,
partition_info
=
None
):
#lasagne ortho init for tf
shape
=
tuple
(
shape
)
if
len
(
shape
)
==
2
:
flat_shape
=
shape
elif
len
(
shape
)
==
4
:
# assumes NHWC
flat_shape
=
(
np
.
prod
(
shape
[:
-
1
]),
shape
[
-
1
])
else
:
raise
NotImplementedError
a
=
np
.
random
.
normal
(
0.0
,
1.0
,
flat_shape
)
u
,
_
,
v
=
np
.
linalg
.
svd
(
a
,
full_matrices
=
False
)
q
=
u
if
u
.
shape
==
flat_shape
else
v
# pick the one with the correct shape
q
=
q
.
reshape
(
shape
)
return
(
scale
*
q
[:
shape
[
0
],
:
shape
[
1
]]).
astype
(
np
.
float32
)
return
_ortho_init
def
fc
(
x
,
scope
,
nh
,
*
,
init_scale
=
1.0
,
init_bias
=
0.0
):
"""fully connected op"""
with
tf
.
variable_scope
(
scope
):
nin
=
x
.
get_shape
()[
1
].
value
w
=
tf
.
get_variable
(
"w"
,
[
nin
,
nh
],
initializer
=
ortho_init
(
init_scale
))
b
=
tf
.
get_variable
(
"b"
,
[
nh
],
initializer
=
tf
.
constant_initializer
(
init_bias
))
return
tf
.
matmul
(
x
,
w
)
+
b
def
_check_shape
(
placeholder_shape
,
data_shape
):
"""
check if two shapes are compatible (i.e. differ only by dimensions of size 1, or by the batch dimension)
"""
return
True
# ================================================================
# Shape adjustment for feeding into tf placeholders
# ================================================================
def
adjust_shape
(
placeholder
,
data
):
"""
adjust shape of the data to the shape of the placeholder if possible.
If shape is incompatible, AssertionError is thrown
Parameters:
placeholder: tensorflow input placeholder
data: input data to be (potentially) reshaped to be fed into placeholder
Returns:
reshaped data
"""
if
not
isinstance
(
data
,
np
.
ndarray
)
and
not
isinstance
(
data
,
list
):
return
data
if
isinstance
(
data
,
list
):
data
=
np
.
array
(
data
)
placeholder_shape
=
[
x
or
-
1
for
x
in
placeholder
.
shape
.
as_list
()]
assert
_check_shape
(
placeholder_shape
,
data
.
shape
),
\
'Shape of data {} is not compatible with shape of the placeholder {}'
.
format
(
data
.
shape
,
placeholder_shape
)
return
np
.
reshape
(
data
,
placeholder_shape
)
# ================================================================
# Global session
# ================================================================
def
get_session
(
config
=
None
):
"""Get default session or create one with a given config"""
sess
=
tf
.
get_default_session
()
if
sess
is
None
:
sess
=
make_session
(
config
=
config
,
make_default
=
True
)
return
sess
def
make_session
(
config
=
None
,
num_cpu
=
None
,
make_default
=
False
,
graph
=
None
):
"""Returns a session that will use <num_cpu> CPU's only"""
if
num_cpu
is
None
:
num_cpu
=
int
(
os
.
getenv
(
'RCALL_NUM_CPU'
,
multiprocessing
.
cpu_count
()))
if
config
is
None
:
config
=
tf
.
ConfigProto
(
allow_soft_placement
=
True
,
inter_op_parallelism_threads
=
num_cpu
,
intra_op_parallelism_threads
=
num_cpu
)
config
.
gpu_options
.
allow_growth
=
True
if
make_default
:
return
tf
.
InteractiveSession
(
config
=
config
,
graph
=
graph
)
else
:
return
tf
.
Session
(
config
=
config
,
graph
=
graph
)
ALREADY_INITIALIZED
=
set
()
def
initialize
():
"""Initialize all the uninitialized variables in the global scope."""
new_variables
=
set
(
tf
.
global_variables
())
-
ALREADY_INITIALIZED
get_session
().
run
(
tf
.
variables_initializer
(
new_variables
))
ALREADY_INITIALIZED
.
update
(
new_variables
)
def
observation_placeholder
(
ob_space
,
batch_size
=
None
,
name
=
'Ob'
):
"""
Create placeholder to feed observations into of the size appropriate to the observation space
Parameters:
----------
ob_space: gym.Space observation space
batch_size: int size of the batch to be fed into input. Can be left None in most cases.
name: str name of the placeholder
Returns:
-------
tensorflow placeholder tensor
"""
assert
isinstance
(
ob_space
,
(
Discrete
,
Box
,
MultiDiscrete
)),
\
'Can only deal with Discrete and Box observation spaces for now'
dtype
=
ob_space
.
dtype
if
dtype
==
np
.
int8
:
dtype
=
np
.
uint8
return
tf
.
placeholder
(
shape
=
(
batch_size
,)
+
ob_space
.
shape
,
dtype
=
dtype
,
name
=
name
)
def
explained_variance
(
ypred
,
y
):
"""
Computes fraction of variance that ypred explains about y.
Returns 1 - Var[y-ypred] / Var[y]
interpretation:
ev=0 => might as well have predicted zero
ev=1 => perfect prediction
ev<0 => worse than just predicting zero
"""
assert
y
.
ndim
==
1
and
ypred
.
ndim
==
1
vary
=
np
.
var
(
y
)
return
np
.
nan
if
vary
==
0
else
1
-
np
.
var
(
y
-
ypred
)
/
vary
src/sdk/pynni/nni/tuner.py
View file @
55f48d27
...
@@ -17,11 +17,10 @@
...
@@ -17,11 +17,10 @@
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
# ==================================================================================================
import
logging
import
logging
import
nni
import
nni
from
.recoverable
import
Recoverable
from
.recoverable
import
Recoverable
_logger
=
logging
.
getLogger
(
__name__
)
_logger
=
logging
.
getLogger
(
__name__
)
...
...
tools/nni_annotation/.gitignore
0 → 100644
View file @
55f48d27
_generated
tools/nni_annotation/test_annotation.py
View file @
55f48d27
...
@@ -39,17 +39,18 @@ class AnnotationTestCase(TestCase):
...
@@ -39,17 +39,18 @@ class AnnotationTestCase(TestCase):
shutil
.
rmtree
(
'_generated'
)
shutil
.
rmtree
(
'_generated'
)
def
test_search_space_generator
(
self
):
def
test_search_space_generator
(
self
):
search_space
=
generate_search_space
(
'testcase/annotated'
)
shutil
.
copytree
(
'testcase/annotated'
,
'_generated/annotated'
)
search_space
=
generate_search_space
(
'_generated/annotated'
)
with
open
(
'testcase/searchspace.json'
)
as
f
:
with
open
(
'testcase/searchspace.json'
)
as
f
:
self
.
assertEqual
(
search_space
,
json
.
load
(
f
))
self
.
assertEqual
(
search_space
,
json
.
load
(
f
))
def
test_code_generator
(
self
):
def
test_code_generator
(
self
):
code_dir
=
expand_annotations
(
'testcase/usercode'
,
'_generated'
,
nas_mode
=
'classic_mode'
)
code_dir
=
expand_annotations
(
'testcase/usercode'
,
'_generated
/usercode
'
,
nas_mode
=
'classic_mode'
)
self
.
assertEqual
(
code_dir
,
'_generated'
)
self
.
assertEqual
(
code_dir
,
'_generated
/usercode
'
)
self
.
_assert_source_equal
(
'testcase/annotated/nas.py'
,
'_generated/nas.py'
)
self
.
_assert_source_equal
(
'testcase/annotated/nas.py'
,
'_generated/
usercode/
nas.py'
)
self
.
_assert_source_equal
(
'testcase/annotated/mnist.py'
,
'_generated/mnist.py'
)
self
.
_assert_source_equal
(
'testcase/annotated/mnist.py'
,
'_generated/
usercode/
mnist.py'
)
self
.
_assert_source_equal
(
'testcase/annotated/dir/simple.py'
,
'_generated/dir/simple.py'
)
self
.
_assert_source_equal
(
'testcase/annotated/dir/simple.py'
,
'_generated/
usercode/
dir/simple.py'
)
with
open
(
'testcase/usercode/nonpy.txt'
)
as
src
,
open
(
'_generated/nonpy.txt'
)
as
dst
:
with
open
(
'testcase/usercode/nonpy.txt'
)
as
src
,
open
(
'_generated/
usercode/
nonpy.txt'
)
as
dst
:
assert
src
.
read
()
==
dst
.
read
()
assert
src
.
read
()
==
dst
.
read
()
def
test_annotation_detecting
(
self
):
def
test_annotation_detecting
(
self
):
...
...
tools/nni_cmd/config_schema.py
View file @
55f48d27
...
@@ -142,6 +142,24 @@ tuner_schema_dict = {
...
@@ -142,6 +142,24 @@ tuner_schema_dict = {
Optional
(
'includeIntermediateResults'
):
setType
(
'includeIntermediateResults'
,
bool
),
Optional
(
'includeIntermediateResults'
):
setType
(
'includeIntermediateResults'
,
bool
),
Optional
(
'gpuNum'
):
setNumberRange
(
'gpuNum'
,
int
,
0
,
99999
),
Optional
(
'gpuNum'
):
setNumberRange
(
'gpuNum'
,
int
,
0
,
99999
),
},
},
'PPOTuner'
:
{
'builtinTunerName'
:
'PPOTuner'
,
'classArgs'
:
{
'optimize_mode'
:
setChoice
(
'optimize_mode'
,
'maximize'
,
'minimize'
),
Optional
(
'trials_per_update'
):
setNumberRange
(
'trials_per_update'
,
int
,
0
,
99999
),
Optional
(
'epochs_per_update'
):
setNumberRange
(
'epochs_per_update'
,
int
,
0
,
99999
),
Optional
(
'minibatch_size'
):
setNumberRange
(
'minibatch_size'
,
int
,
0
,
99999
),
Optional
(
'ent_coef'
):
setType
(
'ent_coef'
,
float
),
Optional
(
'lr'
):
setType
(
'lr'
,
float
),
Optional
(
'vf_coef'
):
setType
(
'vf_coef'
,
float
),
Optional
(
'max_grad_norm'
):
setType
(
'max_grad_norm'
,
float
),
Optional
(
'gamma'
):
setType
(
'gamma'
,
float
),
Optional
(
'lam'
):
setType
(
'lam'
,
float
),
Optional
(
'cliprange'
):
setType
(
'cliprange'
,
float
),
},
Optional
(
'includeIntermediateResults'
):
setType
(
'includeIntermediateResults'
,
bool
),
Optional
(
'gpuNum'
):
setNumberRange
(
'gpuNum'
,
int
,
0
,
99999
),
},
'customized'
:
{
'customized'
:
{
'codeDir'
:
setPathCheck
(
'codeDir'
),
'codeDir'
:
setPathCheck
(
'codeDir'
),
'classFileName'
:
setType
(
'classFileName'
,
str
),
'classFileName'
:
setType
(
'classFileName'
,
str
),
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment