"src/vscode:/vscode.git/clone" did not exist on "aabb14ff017a68217515e044eebb53b6ddc3b103"
Unverified Commit 55f48d27 authored by QuanluZhang's avatar QuanluZhang Committed by GitHub
Browse files

Merge dev-nas-tuner back to master (#1531)

* PPO tuner for NAS, supports NNI's NAS interface (#1380)
parent 7246593f
import tensorflow as tf
from src.utils import DEFINE_boolean
from src.utils import DEFINE_float
from src.utils import DEFINE_integer
from src.utils import DEFINE_string
flags = tf.app.flags
FLAGS = flags.FLAGS
DEFINE_boolean("reset_output_dir", False, "Delete output_dir if exists.")
DEFINE_string("data_path", "", "")
DEFINE_string("output_dir", "", "")
DEFINE_string("data_format", "NHWC", "'NHWC' or 'NCWH'")
DEFINE_string("search_for", None, "Must be [macro|micro]")
DEFINE_integer("train_data_size", 45000, "")
DEFINE_integer("batch_size", 32, "")
DEFINE_integer("num_epochs", 300, "")
DEFINE_integer("child_lr_dec_every", 100, "")
DEFINE_integer("child_num_layers", 5, "")
DEFINE_integer("child_num_cells", 5, "")
DEFINE_integer("child_filter_size", 5, "")
DEFINE_integer("child_out_filters", 48, "")
DEFINE_integer("child_out_filters_scale", 1, "")
DEFINE_integer("child_num_branches", 4, "")
DEFINE_integer("child_num_aggregate", None, "")
DEFINE_integer("child_num_replicas", 1, "")
DEFINE_integer("child_block_size", 3, "")
DEFINE_integer("child_lr_T_0", None, "for lr schedule")
DEFINE_integer("child_lr_T_mul", None, "for lr schedule")
DEFINE_integer("child_cutout_size", None, "CutOut size")
DEFINE_float("child_grad_bound", 5.0, "Gradient clipping")
DEFINE_float("child_lr", 0.1, "")
DEFINE_float("child_lr_dec_rate", 0.1, "")
DEFINE_float("child_keep_prob", 0.5, "")
DEFINE_float("child_drop_path_keep_prob", 1.0, "minimum drop_path_keep_prob")
DEFINE_float("child_l2_reg", 1e-4, "")
DEFINE_float("child_lr_max", None, "for lr schedule")
DEFINE_float("child_lr_min", None, "for lr schedule")
DEFINE_string("child_skip_pattern", None, "Must be ['dense', None]")
DEFINE_string("child_fixed_arc", None, "")
DEFINE_boolean("child_use_aux_heads", False, "Should we use an aux head")
DEFINE_boolean("child_sync_replicas", False, "To sync or not to sync.")
DEFINE_boolean("child_lr_cosine", False, "Use cosine lr schedule")
DEFINE_integer("log_every", 50, "How many steps to log")
DEFINE_integer("eval_every_epochs", 1, "How many epochs to eval")
import numpy as np
import tensorflow as tf
from tensorflow.python.training import moving_averages
def lstm(x, prev_c, prev_h, w):
ifog = tf.matmul(tf.concat([x, prev_h], axis=1), w)
i, f, o, g = tf.split(ifog, 4, axis=1)
i = tf.sigmoid(i)
f = tf.sigmoid(f)
o = tf.sigmoid(o)
g = tf.tanh(g)
next_c = i * g + f * prev_c
next_h = o * tf.tanh(next_c)
return next_c, next_h
def stack_lstm(x, prev_c, prev_h, w):
next_c, next_h = [], []
for layer_id, (_c, _h, _w) in enumerate(zip(prev_c, prev_h, w)):
inputs = x if layer_id == 0 else next_h[-1]
curr_c, curr_h = lstm(inputs, _c, _h, _w)
next_c.append(curr_c)
next_h.append(curr_h)
return next_c, next_h
def create_weight(name, shape, initializer=None, trainable=True, seed=None):
if initializer is None:
initializer = tf.contrib.keras.initializers.he_normal(seed=seed)
return tf.get_variable(name, shape, initializer=initializer, trainable=trainable)
def create_bias(name, shape, initializer=None):
if initializer is None:
initializer = tf.constant_initializer(0.0, dtype=tf.float32)
return tf.get_variable(name, shape, initializer=initializer)
def conv_op(inputs, filter_size, is_training, count, out_filters,
data_format, ch_mul=1, start_idx=None, separable=False):
"""
Args:
start_idx: where to start taking the output channels. if None, assuming
fixed_arc mode
count: how many output_channels to take.
"""
if data_format == "NHWC":
inp_c = inputs.get_shape()[3].value
elif data_format == "NCHW":
inp_c = inputs.get_shape()[1].value
with tf.variable_scope("inp_conv_1"):
w = create_weight("w", [1, 1, inp_c, out_filters])
x = tf.nn.conv2d(inputs, w, [1, 1, 1, 1],
"SAME", data_format=data_format)
x = batch_norm(x, is_training, data_format=data_format)
x = tf.nn.relu(x)
with tf.variable_scope("out_conv_{}".format(filter_size)):
if start_idx is None:
if separable:
w_depth = create_weight(
"w_depth", [filter_size, filter_size, out_filters, ch_mul])
w_point = create_weight(
"w_point", [1, 1, out_filters * ch_mul, count])
x = tf.nn.separable_conv2d(x, w_depth, w_point, strides=[1, 1, 1, 1],
padding="SAME", data_format=data_format)
x = batch_norm(
x, is_training, data_format=data_format)
else:
w = create_weight(
"w", [filter_size, filter_size, inp_c, count])
x = tf.nn.conv2d(
x, w, [1, 1, 1, 1], "SAME", data_format=data_format)
x = batch_norm(
x, is_training, data_format=data_format)
else:
if separable:
w_depth = create_weight(
"w_depth", [filter_size, filter_size, out_filters, ch_mul])
#test_depth = w_depth
w_point = create_weight(
"w_point", [out_filters, out_filters * ch_mul])
w_point = w_point[start_idx:start_idx+count, :]
w_point = tf.transpose(w_point, [1, 0])
w_point = tf.reshape(
w_point, [1, 1, out_filters * ch_mul, count])
x = tf.nn.separable_conv2d(x, w_depth, w_point, strides=[1, 1, 1, 1],
padding="SAME", data_format=data_format)
mask = tf.range(0, out_filters, dtype=tf.int32)
mask = tf.logical_and(
start_idx <= mask, mask < start_idx + count)
x = batch_norm_with_mask(
x, is_training, mask, out_filters, data_format=data_format)
else:
w = create_weight(
"w", [filter_size, filter_size, out_filters, out_filters])
w = tf.transpose(w, [3, 0, 1, 2])
w = w[start_idx:start_idx+count, :, :, :]
w = tf.transpose(w, [1, 2, 3, 0])
x = tf.nn.conv2d(
x, w, [1, 1, 1, 1], "SAME", data_format=data_format)
mask = tf.range(0, out_filters, dtype=tf.int32)
mask = tf.logical_and(
start_idx <= mask, mask < start_idx + count)
x = batch_norm_with_mask(
x, is_training, mask, out_filters, data_format=data_format)
x = tf.nn.relu(x)
return x
def pool_op(inputs, is_training, count, out_filters, avg_or_max, data_format, start_idx=None):
"""
Args:
start_idx: where to start taking the output channels. if None, assuming
fixed_arc mode
count: how many output_channels to take.
"""
if data_format == "NHWC":
inp_c = inputs.get_shape()[3].value
elif data_format == "NCHW":
inp_c = inputs.get_shape()[1].value
with tf.variable_scope("conv_1"):
w = create_weight("w", [1, 1, inp_c, out_filters])
x = tf.nn.conv2d(inputs, w, [1, 1, 1, 1],
"SAME", data_format=data_format)
x = batch_norm(x, is_training, data_format=data_format)
x = tf.nn.relu(x)
with tf.variable_scope("pool"):
if data_format == "NHWC":
actual_data_format = "channels_last"
elif data_format == "NCHW":
actual_data_format = "channels_first"
if avg_or_max == "avg":
x = tf.layers.average_pooling2d(
x, [3, 3], [1, 1], "SAME", data_format=actual_data_format)
elif avg_or_max == "max":
x = tf.layers.max_pooling2d(
x, [3, 3], [1, 1], "SAME", data_format=actual_data_format)
else:
raise ValueError("Unknown pool {}".format(avg_or_max))
if start_idx is not None:
if data_format == "NHWC":
x = x[:, :, :, start_idx: start_idx+count]
elif data_format == "NCHW":
x = x[:, start_idx: start_idx+count, :, :]
return x
def global_avg_pool(x, data_format="NHWC"):
if data_format == "NHWC":
x = tf.reduce_mean(x, [1, 2])
elif data_format == "NCHW":
x = tf.reduce_mean(x, [2, 3])
else:
raise NotImplementedError("Unknown data_format {}".format(data_format))
return x
def batch_norm(x, is_training, name="bn", decay=0.9, epsilon=1e-5,
data_format="NHWC"):
if data_format == "NHWC":
shape = [x.get_shape()[3]]
elif data_format == "NCHW":
shape = [x.get_shape()[1]]
else:
raise NotImplementedError("Unknown data_format {}".format(data_format))
with tf.variable_scope(name, reuse=None if is_training else True):
offset = tf.get_variable(
"offset", shape,
initializer=tf.constant_initializer(0.0, dtype=tf.float32))
scale = tf.get_variable(
"scale", shape,
initializer=tf.constant_initializer(1.0, dtype=tf.float32))
moving_mean = tf.get_variable(
"moving_mean", shape, trainable=False,
initializer=tf.constant_initializer(0.0, dtype=tf.float32))
moving_variance = tf.get_variable(
"moving_variance", shape, trainable=False,
initializer=tf.constant_initializer(1.0, dtype=tf.float32))
if is_training:
x, mean, variance = tf.nn.fused_batch_norm(
x, scale, offset, epsilon=epsilon, data_format=data_format,
is_training=True)
update_mean = moving_averages.assign_moving_average(
moving_mean, mean, decay)
update_variance = moving_averages.assign_moving_average(
moving_variance, variance, decay)
with tf.control_dependencies([update_mean, update_variance]):
x = tf.identity(x)
else:
x, _, _ = tf.nn.fused_batch_norm(x, scale, offset, mean=moving_mean,
variance=moving_variance,
epsilon=epsilon, data_format=data_format,
is_training=False)
return x
def batch_norm_with_mask(x, is_training, mask, num_channels, name="bn",
decay=0.9, epsilon=1e-3, data_format="NHWC"):
shape = [num_channels]
indices = tf.where(mask)
indices = tf.to_int32(indices)
indices = tf.reshape(indices, [-1])
with tf.variable_scope(name, reuse=None if is_training else True):
offset = tf.get_variable(
"offset", shape,
initializer=tf.constant_initializer(0.0, dtype=tf.float32))
scale = tf.get_variable(
"scale", shape,
initializer=tf.constant_initializer(1.0, dtype=tf.float32))
offset = tf.boolean_mask(offset, mask)
scale = tf.boolean_mask(scale, mask)
moving_mean = tf.get_variable(
"moving_mean", shape, trainable=False,
initializer=tf.constant_initializer(0.0, dtype=tf.float32))
moving_variance = tf.get_variable(
"moving_variance", shape, trainable=False,
initializer=tf.constant_initializer(1.0, dtype=tf.float32))
if is_training:
x, mean, variance = tf.nn.fused_batch_norm(
x, scale, offset, epsilon=epsilon, data_format=data_format,
is_training=True)
mean = (1.0 - decay) * (tf.boolean_mask(moving_mean, mask) - mean)
variance = (1.0 - decay) * \
(tf.boolean_mask(moving_variance, mask) - variance)
update_mean = tf.scatter_sub(
moving_mean, indices, mean, use_locking=True)
update_variance = tf.scatter_sub(
moving_variance, indices, variance, use_locking=True)
with tf.control_dependencies([update_mean, update_variance]):
x = tf.identity(x)
else:
masked_moving_mean = tf.boolean_mask(moving_mean, mask)
masked_moving_variance = tf.boolean_mask(moving_variance, mask)
x, _, _ = tf.nn.fused_batch_norm(x, scale, offset,
mean=masked_moving_mean,
variance=masked_moving_variance,
epsilon=epsilon, data_format=data_format,
is_training=False)
return x
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import numpy as np
import tensorflow as tf
user_flags = []
def DEFINE_string(name, default_value, doc_string):
tf.app.flags.DEFINE_string(name, default_value, doc_string)
global user_flags
user_flags.append(name)
def DEFINE_integer(name, default_value, doc_string):
tf.app.flags.DEFINE_integer(name, default_value, doc_string)
global user_flags
user_flags.append(name)
def DEFINE_float(name, default_value, doc_string):
tf.app.flags.DEFINE_float(name, default_value, doc_string)
global user_flags
user_flags.append(name)
def DEFINE_boolean(name, default_value, doc_string):
tf.app.flags.DEFINE_boolean(name, default_value, doc_string)
global user_flags
user_flags.append(name)
def print_user_flags(line_limit=80):
print("-" * 80)
global user_flags
FLAGS = tf.app.flags.FLAGS
for flag_name in sorted(user_flags):
value = "{}".format(getattr(FLAGS, flag_name))
log_string = flag_name
log_string += "." * (line_limit - len(flag_name) - len(value))
log_string += value
print(log_string)
def get_C(x, data_format):
"""
Args:
x: tensor of shape [N, H, W, C] or [N, C, H, W]
"""
if data_format == "NHWC":
return x.get_shape()[3].value
elif data_format == "NCHW":
return x.get_shape()[1].value
else:
raise ValueError(
"Unknown data_format '{0}'".format(data_format))
def get_HW(x, data_format):
"""
Args:
x: tensor of shape [N, H, W, C] or [N, C, H, W]
"""
return x.get_shape()[2].value
def get_strides(stride, data_format):
"""
Args:
x: tensor of shape [N, H, W, C] or [N, C, H, W]
"""
if data_format == "NHWC":
return [1, stride, stride, 1]
elif data_format == "NCHW":
return [1, 1, stride, stride]
else:
raise ValueError(
"Unknown data_format '{0}'".format(data_format))
class TextColors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
class Logger(object):
def __init__(self, output_file):
self.terminal = sys.stdout
self.log = open(output_file, "a")
def write(self, message):
self.terminal.write(message)
self.terminal.flush()
self.log.write(message)
self.log.flush()
def count_model_params(tf_variables):
"""
Args:
tf_variables: list of all model variables
"""
num_vars = 0
for var in tf_variables:
num_vars += np.prod([dim.value for dim in var.get_shape()])
return num_vars
def get_train_ops(
loss,
tf_variables,
train_step,
clip_mode=None,
grad_bound=None,
l2_reg=1e-4,
lr_warmup_val=None,
lr_warmup_steps=100,
lr_init=0.1,
lr_dec_start=0,
lr_dec_every=10000,
lr_dec_rate=0.1,
lr_dec_min=None,
lr_cosine=False,
lr_max=None,
lr_min=None,
lr_T_0=None,
lr_T_mul=None,
num_train_batches=None,
optim_algo=None,
sync_replicas=False,
num_aggregate=None,
num_replicas=None,
get_grad_norms=False,
moving_average=None):
"""
Args:
clip_mode: "global", "norm", or None.
moving_average: store the moving average of parameters
"""
if l2_reg > 0:
l2_losses = []
for var in tf_variables:
l2_losses.append(tf.reduce_sum(var ** 2))
l2_loss = tf.add_n(l2_losses)
loss += l2_reg * l2_loss
grads = tf.gradients(loss, tf_variables)
grad_norm = tf.global_norm(grads)
grad_norms = {}
for v, g in zip(tf_variables, grads):
if v is None or g is None:
continue
if isinstance(g, tf.IndexedSlices):
grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g.values ** 2))
else:
grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g ** 2))
if clip_mode is not None:
assert grad_bound is not None, "Need grad_bound to clip gradients."
if clip_mode == "global":
grads, _ = tf.clip_by_global_norm(grads, grad_bound)
elif clip_mode == "norm":
clipped = []
for g in grads:
if isinstance(g, tf.IndexedSlices):
c_g = tf.clip_by_norm(g.values, grad_bound)
c_g = tf.IndexedSlices(g.indices, c_g)
else:
c_g = tf.clip_by_norm(g, grad_bound)
clipped.append(g)
grads = clipped
else:
raise NotImplementedError("Unknown clip_mode {}".format(clip_mode))
if lr_cosine:
assert lr_max is not None, "Need lr_max to use lr_cosine"
assert lr_min is not None, "Need lr_min to use lr_cosine"
assert lr_T_0 is not None, "Need lr_T_0 to use lr_cosine"
assert lr_T_mul is not None, "Need lr_T_mul to use lr_cosine"
assert num_train_batches is not None, ("Need num_train_batches to use"
" lr_cosine")
curr_epoch = train_step // num_train_batches
last_reset = tf.Variable(0, dtype=tf.int32, trainable=False,
name="last_reset")
T_i = tf.Variable(lr_T_0, dtype=tf.int32, trainable=False, name="T_i")
T_curr = curr_epoch - last_reset
def _update():
update_last_reset = tf.assign(
last_reset, curr_epoch, use_locking=True)
update_T_i = tf.assign(T_i, T_i * lr_T_mul, use_locking=True)
with tf.control_dependencies([update_last_reset, update_T_i]):
rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926
lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate))
return lr
def _no_update():
rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926
lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate))
return lr
learning_rate = tf.cond(
tf.greater_equal(T_curr, T_i), _update, _no_update)
else:
learning_rate = tf.train.exponential_decay(
lr_init, tf.maximum(train_step - lr_dec_start, 0), lr_dec_every,
lr_dec_rate, staircase=True)
if lr_dec_min is not None:
learning_rate = tf.maximum(learning_rate, lr_dec_min)
if lr_warmup_val is not None:
learning_rate = tf.cond(tf.less(train_step, lr_warmup_steps),
lambda: lr_warmup_val, lambda: learning_rate)
if optim_algo == "momentum":
opt = tf.train.MomentumOptimizer(
learning_rate, 0.9, use_locking=True, use_nesterov=True)
elif optim_algo == "sgd":
opt = tf.train.GradientDescentOptimizer(
learning_rate, use_locking=True)
elif optim_algo == "adam":
opt = tf.train.AdamOptimizer(learning_rate, beta1=0.0, epsilon=1e-3,
use_locking=True)
else:
raise ValueError("Unknown optim_algo {}".format(optim_algo))
if sync_replicas:
assert num_aggregate is not None, "Need num_aggregate to sync."
assert num_replicas is not None, "Need num_replicas to sync."
opt = tf.train.SyncReplicasOptimizer(
opt,
replicas_to_aggregate=num_aggregate,
total_num_replicas=num_replicas,
use_locking=True)
if moving_average is not None:
opt = tf.contrib.opt.MovingAverageOptimizer(
opt, average_decay=moving_average)
train_op = opt.apply_gradients(
zip(grads, tf_variables), global_step=train_step)
if get_grad_norms:
return train_op, learning_rate, grad_norm, opt, grad_norms
else:
return train_op, learning_rate, grad_norm, opt
...@@ -2,13 +2,14 @@ import numpy as np ...@@ -2,13 +2,14 @@ import numpy as np
from nni.tuner import Tuner from nni.tuner import Tuner
def random_archi_generator(nas_ss, random_state): def random_archi_generator(nas_ss, random_state):
'''random '''random
''' '''
chosen_archi = {} chosen_archi = {}
print("zql: nas search space: ", nas_ss)
for block_name, block_value in nas_ss.items(): for block_name, block_value in nas_ss.items():
assert block_value['_type'] == "mutable_layer", "Random NAS Tuner only receives NAS search space whose _type is 'mutable_layer'" assert block_value['_type'] == "mutable_layer", \
"Random NAS Tuner only receives NAS search space whose _type is 'mutable_layer'"
block = block_value['_value'] block = block_value['_value']
tmp_block = {} tmp_block = {}
for layer_name, layer in block.items(): for layer_name, layer in block.items():
...@@ -19,13 +20,12 @@ def random_archi_generator(nas_ss, random_state): ...@@ -19,13 +20,12 @@ def random_archi_generator(nas_ss, random_state):
tmp_layer['chosen_layer'] = value[index] tmp_layer['chosen_layer'] = value[index]
elif key == 'optional_inputs': elif key == 'optional_inputs':
tmp_layer['chosen_inputs'] = [] tmp_layer['chosen_inputs'] = []
print("zql: optional_inputs", layer['optional_inputs'])
if layer['optional_inputs']: if layer['optional_inputs']:
if isinstance(layer['optional_input_size'], int): if isinstance(layer['optional_input_size'], int):
choice_num = layer['optional_input_size'] choice_num = layer['optional_input_size']
else: else:
choice_range = layer['optional_input_size'] choice_range = layer['optional_input_size']
choice_num = random_state.randint(choice_range[0], choice_range[1]+1) choice_num = random_state.randint(choice_range[0], choice_range[1] + 1)
for _ in range(choice_num): for _ in range(choice_num):
index = random_state.randint(len(layer['optional_inputs'])) index = random_state.randint(len(layer['optional_inputs']))
tmp_layer['chosen_inputs'].append(layer['optional_inputs'][index]) tmp_layer['chosen_inputs'].append(layer['optional_inputs'][index])
...@@ -37,6 +37,7 @@ def random_archi_generator(nas_ss, random_state): ...@@ -37,6 +37,7 @@ def random_archi_generator(nas_ss, random_state):
chosen_archi[block_name] = tmp_block chosen_archi[block_name] = tmp_block
return chosen_archi return chosen_archi
class RandomNASTuner(Tuner): class RandomNASTuner(Tuner):
'''RandomNASTuner '''RandomNASTuner
''' '''
......
...@@ -174,7 +174,7 @@ export namespace ValidationSchemas { ...@@ -174,7 +174,7 @@ export namespace ValidationSchemas {
checkpointDir: joi.string().allow('') checkpointDir: joi.string().allow('')
}), }),
tuner: joi.object({ tuner: joi.object({
builtinTunerName: joi.string().valid('TPE', 'Random', 'Anneal', 'Evolution', 'SMAC', 'BatchTuner', 'GridSearch', 'NetworkMorphism', 'MetisTuner', 'GPTuner'), builtinTunerName: joi.string().valid('TPE', 'Random', 'Anneal', 'Evolution', 'SMAC', 'BatchTuner', 'GridSearch', 'NetworkMorphism', 'MetisTuner', 'GPTuner', 'PPOTuner'),
codeDir: joi.string(), codeDir: joi.string(),
classFileName: joi.string(), classFileName: joi.string(),
className: joi.string(), className: joi.string(),
......
...@@ -30,7 +30,8 @@ ModuleName = { ...@@ -30,7 +30,8 @@ ModuleName = {
'NetworkMorphism': 'nni.networkmorphism_tuner.networkmorphism_tuner', 'NetworkMorphism': 'nni.networkmorphism_tuner.networkmorphism_tuner',
'Curvefitting': 'nni.curvefitting_assessor.curvefitting_assessor', 'Curvefitting': 'nni.curvefitting_assessor.curvefitting_assessor',
'MetisTuner': 'nni.metis_tuner.metis_tuner', 'MetisTuner': 'nni.metis_tuner.metis_tuner',
'GPTuner': 'nni.gp_tuner.gp_tuner' 'GPTuner': 'nni.gp_tuner.gp_tuner',
'PPOTuner': 'nni.ppo_tuner.ppo_tuner'
} }
ClassName = { ClassName = {
...@@ -44,6 +45,7 @@ ClassName = { ...@@ -44,6 +45,7 @@ ClassName = {
'NetworkMorphism':'NetworkMorphismTuner', 'NetworkMorphism':'NetworkMorphismTuner',
'MetisTuner':'MetisTuner', 'MetisTuner':'MetisTuner',
'GPTuner':'GPTuner', 'GPTuner':'GPTuner',
'PPOTuner': 'PPOTuner',
'Medianstop': 'MedianstopAssessor', 'Medianstop': 'MedianstopAssessor',
'Curvefitting': 'CurvefittingAssessor' 'Curvefitting': 'CurvefittingAssessor'
......
...@@ -27,6 +27,7 @@ import logging ...@@ -27,6 +27,7 @@ import logging
import hyperopt as hp import hyperopt as hp
import numpy as np import numpy as np
from nni.tuner import Tuner from nni.tuner import Tuner
from nni.nas_utils import rewrite_nas_space
from nni.utils import NodeType, OptimizeMode, extract_scalar_reward, split_index from nni.utils import NodeType, OptimizeMode, extract_scalar_reward, split_index
logger = logging.getLogger('hyperopt_AutoML') logger = logging.getLogger('hyperopt_AutoML')
...@@ -240,6 +241,7 @@ class HyperoptTuner(Tuner): ...@@ -240,6 +241,7 @@ class HyperoptTuner(Tuner):
return hp.anneal.suggest return hp.anneal.suggest
raise RuntimeError('Not support tuner algorithm in hyperopt.') raise RuntimeError('Not support tuner algorithm in hyperopt.')
@rewrite_nas_space
def update_search_space(self, search_space): def update_search_space(self, search_space):
""" """
Update search space definition in tuner by search_space in parameters. Update search space definition in tuner by search_space in parameters.
......
...@@ -101,11 +101,16 @@ class MsgDispatcher(MsgDispatcherBase): ...@@ -101,11 +101,16 @@ class MsgDispatcher(MsgDispatcherBase):
self.tuner.update_search_space(data) self.tuner.update_search_space(data)
send(CommandType.Initialized, '') send(CommandType.Initialized, '')
def send_trial_callback(self, id, params):
"""For tuner to issue trial config when the config is generated
"""
send(CommandType.NewTrialJob, _pack_parameter(id, params))
def handle_request_trial_jobs(self, data): def handle_request_trial_jobs(self, data):
# data: number or trial jobs # data: number or trial jobs
ids = [_create_parameter_id() for _ in range(data)] ids = [_create_parameter_id() for _ in range(data)]
_logger.debug("requesting for generating params of {}".format(ids)) _logger.debug("requesting for generating params of {}".format(ids))
params_list = self.tuner.generate_multiple_parameters(ids) params_list = self.tuner.generate_multiple_parameters(ids, st_callback=self.send_trial_callback)
for i, _ in enumerate(params_list): for i, _ in enumerate(params_list):
send(CommandType.NewTrialJob, _pack_parameter(ids[i], params_list[i])) send(CommandType.NewTrialJob, _pack_parameter(ids[i], params_list[i]))
......
...@@ -17,10 +17,16 @@ ...@@ -17,10 +17,16 @@
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ================================================================================================== # ==================================================================================================
import functools
import logging
from . import trial from . import trial
_logger = logging.getLogger(__name__)
_MUTABLE_LAYER_SPACE_PREFIX = "_mutable_layer"
def classic_mode( def classic_mode(
mutable_id, mutable_id,
mutable_layer_id, mutable_layer_id,
...@@ -34,13 +40,11 @@ def classic_mode( ...@@ -34,13 +40,11 @@ def classic_mode(
without touching the full model graph.''' without touching the full model graph.'''
if trial.get_current_parameter() is None: if trial.get_current_parameter() is None:
trial.get_next_parameter() trial.get_next_parameter()
mutable_block = trial.get_current_parameter(mutable_id)
chosen_layer = mutable_block[mutable_layer_id]["chosen_layer"] chosen_layer, chosen_inputs = _get_layer_and_inputs_from_tuner(mutable_id, mutable_layer_id,
chosen_inputs = mutable_block[mutable_layer_id]["chosen_inputs"] list(optional_inputs.keys()))
real_chosen_inputs = [optional_inputs[input_name] real_chosen_inputs = [optional_inputs[input_name] for input_name in chosen_inputs]
for input_name in chosen_inputs] layer_out = funcs[chosen_layer]([fixed_inputs, real_chosen_inputs], **funcs_args[chosen_layer])
layer_out = funcs[chosen_layer](
[fixed_inputs, real_chosen_inputs], **funcs_args[chosen_layer])
return layer_out return layer_out
...@@ -173,20 +177,44 @@ def reload_tensorflow_variables(tf, session): ...@@ -173,20 +177,44 @@ def reload_tensorflow_variables(tf, session):
tf: tensorflow module tf: tensorflow module
''' '''
subgraph_from_tuner = trial.get_next_parameter() subgraph_from_tuner = trial.get_next_parameter()
for mutable_id, mutable_block in subgraph_from_tuner.items(): mutable_layers = set()
for subgraph_key in subgraph_from_tuner:
if "/" in subgraph_key:
# has to remove the last, could be layer_choice or whatever
mutable_id, mutable_layer_id = _decompose_general_key(subgraph_key[:subgraph_key.rfind("/")])
if mutable_id is not None:
mutable_layers.add((mutable_id, mutable_layer_id))
mutable_layers = sorted(list(mutable_layers))
for mutable_id, mutable_layer_id in mutable_layers:
if mutable_id not in name_space: if mutable_id not in name_space:
_logger.warning("{} not found in name space".format(mutable_id))
continue continue
for mutable_layer_id, mutable_layer in mutable_block.items(): name_prefix = "{}_{}".format(mutable_id, mutable_layer_id)
name_prefix = "{}_{}".format(mutable_id, mutable_layer_id) # get optional inputs names
# extract layer information from the subgraph sampled by tuner optional_inputs = name_space[name_prefix]['optional_inputs']
chosen_layer = name_space[name_prefix]['funcs'].index( # extract layer information from the subgraph sampled by tuner
mutable_layer["chosen_layer"]) chosen_layer, chosen_inputs = _get_layer_and_inputs_from_tuner(mutable_id, mutable_layer_id, optional_inputs)
chosen_inputs = [1 if inp in mutable_layer["chosen_inputs"] chosen_layer = name_space[name_prefix]['funcs'].index(chosen_layer)
else 0 for inp in name_space[name_prefix]['optional_inputs']] chosen_inputs = [1 if inp in chosen_inputs else 0 for inp in optional_inputs]
# load these information into pre-defined tensorflow variables # load these information into pre-defined tensorflow variables
tf_variables[name_prefix]['funcs'].load(chosen_layer, session) tf_variables[name_prefix]['funcs'].load(chosen_layer, session)
tf_variables[name_prefix]['optional_inputs'].load( tf_variables[name_prefix]['optional_inputs'].load(
chosen_inputs, session) chosen_inputs, session)
def _construct_general_key(mutable_id, mutable_layer_id):
# Mutable layer key in a general (search space) format
# that is, prefix/mutable_id/mutable_layer_id
return _MUTABLE_LAYER_SPACE_PREFIX + "/" + mutable_id + "/" + mutable_layer_id
def _decompose_general_key(key):
# inverse operation of above
if not key.startswith(_MUTABLE_LAYER_SPACE_PREFIX):
return None, None
else:
_, mutable_id, mutable_layer_id = key.split("/", maxsplit=2)
return mutable_id, mutable_layer_id
def darts_training(tf, session, loss, feed_dict): def darts_training(tf, session, loss, feed_dict):
...@@ -205,4 +233,107 @@ def training_update(nas_mode, tf=None, session=None, loss=None, feed_dict=None): ...@@ -205,4 +233,107 @@ def training_update(nas_mode, tf=None, session=None, loss=None, feed_dict=None):
if nas_mode == 'darts_mode': if nas_mode == 'darts_mode':
darts_training(tf, session, loss, feed_dict) darts_training(tf, session, loss, feed_dict)
elif nas_mode == 'enas_mode': elif nas_mode == 'enas_mode':
reload_tensorflow_variables(tf, session) reload_tensorflow_variables(tf, session)
\ No newline at end of file
def _get_layer_and_inputs_from_tuner(mutable_id, mutable_layer_id, optional_inputs):
# optional_inputs should be name(key)s of the optional inputs
try:
mutable_block = trial.get_current_parameter(mutable_id)
# There is a NAS tuner
chosen_layer = mutable_block[mutable_layer_id]["chosen_layer"]
chosen_inputs = mutable_block[mutable_layer_id]["chosen_inputs"]
except KeyError:
# Try to find converted NAS parameters
params = trial.get_current_parameter()
expected_prefix = _construct_general_key(mutable_id, mutable_layer_id)
chosen_layer = params[expected_prefix + "/layer_choice"]
# find how many to choose
optional_input_size = int(params[expected_prefix + "/optional_input_size"]) # convert uniform to randint
# find who to choose, can duplicate
optional_input_state = params[expected_prefix + "/optional_input_chosen_state"]
chosen_inputs = []
# make sure dict -> list produce stable result by sorting
optional_inputs_keys = sorted(optional_inputs)
for i in range(optional_input_size):
chosen_inputs.append(optional_inputs_keys[optional_input_state % len(optional_inputs)])
optional_input_state //= len(optional_inputs)
_logger.info("%s_%s: layer: %s, optional inputs: %s" % (mutable_id, mutable_layer_id,
chosen_layer, chosen_inputs))
return chosen_layer, chosen_inputs
def convert_nas_search_space(search_space):
"""
Args:
param search_space: raw search space
return: the new search space, mutable_layers will be converted into choice
"""
ret = dict()
for k, v in search_space.items():
if "_type" not in v:
# this should not happen
_logger.warning("There is no _type in one of your search space values with key '%s'"
". Please check your search space" % k)
ret[k] = v
elif v["_type"] != "mutable_layer":
ret[k] = v
else:
_logger.info("Converting mutable_layer search space with key '%s'" % k)
# v["_value"] looks like {'mutable_layer_1': {'layer_choice': ...} ...}
values = v["_value"]
for layer_name, layer_data in values.items():
# there should be at most layer_choice, optional_inputs, optional_input_size in layer_data
# add "_mutable_layer" as prefix so that they can be recovered later
layer_key = _construct_general_key(k, layer_name)
if layer_data.get("layer_choice"): # filter out empty choice and no choice
layer_choice = layer_data["layer_choice"]
else:
raise ValueError("No layer choice found in %s" % layer_key)
if layer_data.get("optional_input_size"):
input_size = layer_data["optional_input_size"]
if isinstance(input_size, int):
input_size = [input_size, input_size]
if input_size[0] > input_size[1] or input_size[0] < 0:
_logger.error("Might not be able to handle optional_input_size < 0, please double check")
input_size[1] += 1
else:
_logger.info("Optional input choices are set to empty by default in %s" % layer_key)
input_size = [0, 1]
if layer_data.get("optional_inputs"):
total_state_size = len(layer_data["optional_inputs"]) ** (input_size[1] - 1)
else:
_logger.info("Optional inputs not found in %s" % layer_key)
total_state_size = 1
converted = {
layer_key + "/layer_choice": {
"_type": "choice", "_value": layer_choice
},
layer_key + "/optional_input_size": {
"_type": "randint", "_value": input_size
},
layer_key + "/optional_input_chosen_state": {
"_type": "randint", "_value": [0, total_state_size]
}
}
_logger.info(converted)
ret.update(converted)
return ret
def rewrite_nas_space(func):
@functools.wraps(func)
def wrap(self, search_space):
search_space = convert_nas_search_space(search_space)
return func(self, search_space)
return wrap
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
functions for sampling from hidden state
"""
import tensorflow as tf
from .util import fc
class Pd:
"""
A particular probability distribution
"""
def flatparam(self):
raise NotImplementedError
def mode(self):
raise NotImplementedError
def neglogp(self, x):
# Usually it's easier to define the negative logprob
raise NotImplementedError
def kl(self, other):
raise NotImplementedError
def entropy(self):
raise NotImplementedError
def sample(self):
raise NotImplementedError
def logp(self, x):
return - self.neglogp(x)
def get_shape(self):
return self.flatparam().shape
@property
def shape(self):
return self.get_shape()
def __getitem__(self, idx):
return self.__class__(self.flatparam()[idx])
class PdType:
"""
Parametrized family of probability distributions
"""
def pdclass(self):
raise NotImplementedError
def pdfromflat(self, flat, mask, nsteps, size, is_act_model):
return self.pdclass()(flat, mask, nsteps, size, is_act_model)
def pdfromlatent(self, latent_vector, init_scale, init_bias):
raise NotImplementedError
def param_shape(self):
raise NotImplementedError
def sample_shape(self):
raise NotImplementedError
def sample_dtype(self):
raise NotImplementedError
def param_placeholder(self, prepend_shape, name=None):
return tf.placeholder(dtype=tf.float32, shape=prepend_shape+self.param_shape(), name=name)
def sample_placeholder(self, prepend_shape, name=None):
return tf.placeholder(dtype=self.sample_dtype(), shape=prepend_shape+self.sample_shape(), name=name)
class CategoricalPd(Pd):
"""
categorical prossibility distribution
"""
def __init__(self, logits, mask_npinf, nsteps, size, is_act_model):
self.logits = logits
self.mask_npinf = mask_npinf
self.nsteps = nsteps
self.size = size
self.is_act_model = is_act_model
def flatparam(self):
return self.logits
def mode(self):
return tf.argmax(self.logits, axis=-1)
@property
def mean(self):
return tf.nn.softmax(self.logits)
def neglogp(self, x):
"""
return tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=x)
Note: we can't use sparse_softmax_cross_entropy_with_logits because
the implementation does not allow second-order derivatives...
"""
if x.dtype in {tf.uint8, tf.int32, tf.int64}:
# one-hot encoding
x_shape_list = x.shape.as_list()
logits_shape_list = self.logits.get_shape().as_list()[:-1]
for xs, ls in zip(x_shape_list, logits_shape_list):
if xs is not None and ls is not None:
assert xs == ls, 'shape mismatch: {} in x vs {} in logits'.format(xs, ls)
x = tf.one_hot(x, self.logits.get_shape().as_list()[-1])
else:
# already encoded
assert x.shape.as_list() == self.logits.shape.as_list()
return tf.nn.softmax_cross_entropy_with_logits_v2(
logits=self.logits,
labels=x)
def kl(self, other):
"""kl"""
a0 = self.logits - tf.reduce_max(self.logits, axis=-1, keepdims=True)
a1 = other.logits - tf.reduce_max(other.logits, axis=-1, keepdims=True)
ea0 = tf.exp(a0)
ea1 = tf.exp(a1)
z0 = tf.reduce_sum(ea0, axis=-1, keepdims=True)
z1 = tf.reduce_sum(ea1, axis=-1, keepdims=True)
p0 = ea0 / z0
return tf.reduce_sum(p0 * (a0 - tf.log(z0) - a1 + tf.log(z1)), axis=-1)
def entropy(self):
"""compute entropy"""
a0 = self.logits - tf.reduce_max(self.logits, axis=-1, keepdims=True)
ea0 = tf.exp(a0)
z0 = tf.reduce_sum(ea0, axis=-1, keepdims=True)
p0 = ea0 / z0
return tf.reduce_sum(p0 * (tf.log(z0) - a0), axis=-1)
def sample(self):
"""sample from logits"""
if not self.is_act_model:
re_res = tf.reshape(self.logits, [-1, self.nsteps, self.size])
masked_res = tf.math.add(re_res, self.mask_npinf)
re_masked_res = tf.reshape(masked_res, [-1, self.size])
u = tf.random_uniform(tf.shape(re_masked_res), dtype=self.logits.dtype)
return tf.argmax(re_masked_res - tf.log(-tf.log(u)), axis=-1)
else:
u = tf.random_uniform(tf.shape(self.logits), dtype=self.logits.dtype)
return tf.argmax(self.logits - tf.log(-tf.log(u)), axis=-1)
@classmethod
def fromflat(cls, flat):
return cls(flat)
class CategoricalPdType(PdType):
"""
to create CategoricalPd
"""
def __init__(self, ncat, nsteps, np_mask, is_act_model):
self.ncat = ncat
self.nsteps = nsteps
self.np_mask = np_mask
self.is_act_model = is_act_model
def pdclass(self):
return CategoricalPd
def pdfromlatent(self, latent_vector, init_scale=1.0, init_bias=0.0):
"""add fc and create CategoricalPd"""
pdparam, mask, mask_npinf = _matching_fc(latent_vector, 'pi', self.ncat, self.nsteps,
init_scale=init_scale, init_bias=init_bias,
np_mask=self.np_mask, is_act_model=self.is_act_model)
return self.pdfromflat(pdparam, mask_npinf, self.nsteps, self.ncat, self.is_act_model), pdparam, mask, mask_npinf
def param_shape(self):
return [self.ncat]
def sample_shape(self):
return []
def sample_dtype(self):
return tf.int32
def _matching_fc(tensor, name, size, nsteps, init_scale, init_bias, np_mask, is_act_model):
"""
add fc op, and add mask op when not in action mode
"""
if tensor.shape[-1] == size:
assert False
return tensor
else:
mask = tf.get_variable("act_mask", dtype=tf.float32, initializer=np_mask[0], trainable=False)
mask_npinf = tf.get_variable("act_mask_npinf", dtype=tf.float32, initializer=np_mask[1], trainable=False)
res = fc(tensor, name, size, init_scale=init_scale, init_bias=init_bias)
if not is_act_model:
re_res = tf.reshape(res, [-1, nsteps, size])
masked_res = tf.math.multiply(re_res, mask)
re_masked_res = tf.reshape(masked_res, [-1, size])
return re_masked_res, mask, mask_npinf
else:
return res, mask, mask_npinf
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
the main model of policy/value network
"""
import tensorflow as tf
from .util import initialize, get_session
class Model:
"""
We use this object to :
__init__:
- Creates the step_model
- Creates the train_model
train():
- Make the training part (feedforward and retropropagation of gradients)
save/load():
- Save load the model
"""
def __init__(self, *, policy, nbatch_act, nbatch_train,
nsteps, ent_coef, vf_coef, max_grad_norm, microbatch_size=None, np_mask=None):
"""
init
"""
self.sess = sess = get_session()
with tf.variable_scope('ppo2_model', reuse=tf.AUTO_REUSE):
# CREATE OUR TWO MODELS
# act_model that is used for sampling
act_model = policy(nbatch_act, 1, sess, np_mask=np_mask, is_act_model=True)
# Train model for training
if microbatch_size is None:
train_model = policy(nbatch_train, nsteps, sess, np_mask=np_mask, is_act_model=False)
else:
train_model = policy(microbatch_size, nsteps, sess, np_mask=np_mask, is_act_model=False)
# CREATE THE PLACEHOLDERS
self.A = A = train_model.pdtype.sample_placeholder([None])
self.ADV = ADV = tf.placeholder(tf.float32, [None])
self.R = R = tf.placeholder(tf.float32, [None])
# Keep track of old actor
self.OLDNEGLOGPAC = OLDNEGLOGPAC = tf.placeholder(tf.float32, [None])
# Keep track of old critic
self.OLDVPRED = OLDVPRED = tf.placeholder(tf.float32, [None])
self.LR = LR = tf.placeholder(tf.float32, [])
# Cliprange
self.CLIPRANGE = CLIPRANGE = tf.placeholder(tf.float32, [])
neglogpac = train_model.pd.neglogp(A)
# Calculate the entropy
# Entropy is used to improve exploration by limiting the premature convergence to suboptimal policy.
entropy = tf.reduce_mean(train_model.pd.entropy())
# CALCULATE THE LOSS
# Total loss = Policy gradient loss - entropy * entropy coefficient + Value coefficient * value loss
# Clip the value to reduce variability during Critic training
# Get the predicted value
vpred = train_model.vf
vpredclipped = OLDVPRED + tf.clip_by_value(train_model.vf - OLDVPRED, - CLIPRANGE, CLIPRANGE)
# Unclipped value
vf_losses1 = tf.square(vpred - R)
# Clipped value
vf_losses2 = tf.square(vpredclipped - R)
vf_loss = .5 * tf.reduce_mean(tf.maximum(vf_losses1, vf_losses2))
# Calculate ratio (pi current policy / pi old policy)
ratio = tf.exp(OLDNEGLOGPAC - neglogpac)
# Defining Loss = - J is equivalent to max J
pg_losses = -ADV * ratio
pg_losses2 = -ADV * tf.clip_by_value(ratio, 1.0 - CLIPRANGE, 1.0 + CLIPRANGE)
# Final PG loss
pg_loss = tf.reduce_mean(tf.maximum(pg_losses, pg_losses2))
approxkl = .5 * tf.reduce_mean(tf.square(neglogpac - OLDNEGLOGPAC))
clipfrac = tf.reduce_mean(tf.to_float(tf.greater(tf.abs(ratio - 1.0), CLIPRANGE)))
# Total loss
loss = pg_loss - entropy * ent_coef + vf_loss * vf_coef
# UPDATE THE PARAMETERS USING LOSS
# 1. Get the model parameters
params = tf.trainable_variables('ppo2_model')
# 2. Build our trainer
self.trainer = tf.train.AdamOptimizer(learning_rate=LR, epsilon=1e-5)
# 3. Calculate the gradients
grads_and_var = self.trainer.compute_gradients(loss, params)
grads, var = zip(*grads_and_var)
if max_grad_norm is not None:
# Clip the gradients (normalize)
grads, _grad_norm = tf.clip_by_global_norm(grads, max_grad_norm)
grads_and_var = list(zip(grads, var))
# zip aggregate each gradient with parameters associated
# For instance zip(ABCD, xyza) => Ax, By, Cz, Da
self.grads = grads
self.var = var
self._train_op = self.trainer.apply_gradients(grads_and_var)
self.loss_names = ['policy_loss', 'value_loss', 'policy_entropy', 'approxkl', 'clipfrac']
self.stats_list = [pg_loss, vf_loss, entropy, approxkl, clipfrac]
self.train_model = train_model
self.act_model = act_model
self.step = act_model.step
self.value = act_model.value
self.initial_state = act_model.initial_state
initialize()
def train(self, lr, cliprange, obs, returns, masks, actions, values, neglogpacs, states=None):
"""
train the model.
Here we calculate advantage A(s,a) = R + yV(s') - V(s)
Returns = R + yV(s')
"""
advs = returns - values
# Normalize the advantages
advs = (advs - advs.mean()) / (advs.std() + 1e-8)
td_map = {
self.train_model.X : obs,
self.A : actions,
self.ADV : advs,
self.R : returns,
self.LR : lr,
self.CLIPRANGE : cliprange,
self.OLDNEGLOGPAC : neglogpacs,
self.OLDVPRED : values
}
if states is not None:
td_map[self.train_model.S] = states
td_map[self.train_model.M] = masks
return self.sess.run(
self.stats_list + [self._train_op],
td_map
)[:-1]
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
build policy/value network from model
"""
import tensorflow as tf
from .distri import CategoricalPdType
from .util import lstm_model, fc, observation_placeholder, adjust_shape
class PolicyWithValue:
"""
Encapsulates fields and methods for RL policy and value function estimation with shared parameters
"""
def __init__(self, env, observations, latent, estimate_q=False, vf_latent=None, sess=None, np_mask=None, is_act_model=False, **tensors):
"""
Parameters:
----------
env: RL environment
observations: tensorflow placeholder in which the observations will be fed
latent: latent state from which policy distribution parameters should be inferred
vf_latent: latent state from which value function should be inferred (if None, then latent is used)
sess: tensorflow session to run calculations in (if None, default session is used)
**tensors: tensorflow tensors for additional attributes such as state or mask
"""
self.X = observations
self.state = tf.constant([])
self.initial_state = None
self.__dict__.update(tensors)
vf_latent = vf_latent if vf_latent is not None else latent
vf_latent = tf.layers.flatten(vf_latent)
latent = tf.layers.flatten(latent)
# Based on the action space, will select what probability distribution type
self.np_mask = np_mask
self.pdtype = CategoricalPdType(env.action_space.n, env.nsteps, np_mask, is_act_model)
self.act_latent = latent
self.nh = env.action_space.n
self.pd, self.pi, self.mask, self.mask_npinf = self.pdtype.pdfromlatent(latent, init_scale=0.01)
# Take an action
self.action = self.pd.sample()
# Calculate the neg log of our probability
self.neglogp = self.pd.neglogp(self.action)
self.sess = sess or tf.get_default_session()
assert estimate_q is False
self.vf = fc(vf_latent, 'vf', 1)
self.vf = self.vf[:, 0]
if is_act_model:
self._build_model_for_step()
def _evaluate(self, variables, observation, **extra_feed):
sess = self.sess
feed_dict = {self.X: adjust_shape(self.X, observation)}
for inpt_name, data in extra_feed.items():
if inpt_name in self.__dict__.keys():
inpt = self.__dict__[inpt_name]
if isinstance(inpt, tf.Tensor) and inpt._op.type == 'Placeholder':
feed_dict[inpt] = adjust_shape(inpt, data)
return sess.run(variables, feed_dict)
def _build_model_for_step(self):
# multiply with weight and apply mask on self.act_latent to generate
self.act_step = step = tf.placeholder(shape=(), dtype=tf.int64, name='act_step')
with tf.variable_scope('pi', reuse=tf.AUTO_REUSE):
from .util import ortho_init
nin = self.act_latent.get_shape()[1].value
w = tf.get_variable("w", [nin, self.nh], initializer=ortho_init(0.01))
b = tf.get_variable("b", [self.nh], initializer=tf.constant_initializer(0.0))
logits = tf.matmul(self.act_latent, w)+b
piece = tf.slice(self.mask, [step, 0], [1, self.nh])
re_piece = tf.reshape(piece, [-1])
masked_logits = tf.math.multiply(logits, re_piece)
npinf_piece = tf.slice(self.mask_npinf, [step, 0], [1, self.nh])
re_npinf_piece = tf.reshape(npinf_piece, [-1])
def sample(logits, mask_npinf):
new_logits = tf.math.add(logits, mask_npinf)
u = tf.random_uniform(tf.shape(new_logits), dtype=logits.dtype)
return tf.argmax(new_logits - tf.log(-tf.log(u)), axis=-1)
def neglogp(logits, x):
# return tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=x)
# Note: we can't use sparse_softmax_cross_entropy_with_logits because
# the implementation does not allow second-order derivatives...
if x.dtype in {tf.uint8, tf.int32, tf.int64}:
# one-hot encoding
x_shape_list = x.shape.as_list()
logits_shape_list = logits.get_shape().as_list()[:-1]
for xs, ls in zip(x_shape_list, logits_shape_list):
if xs is not None and ls is not None:
assert xs == ls, 'shape mismatch: {} in x vs {} in logits'.format(xs, ls)
x = tf.one_hot(x, logits.get_shape().as_list()[-1])
else:
# already encoded
assert x.shape.as_list() == logits.shape.as_list()
return tf.nn.softmax_cross_entropy_with_logits_v2(
logits=logits,
labels=x)
self.act_action = sample(masked_logits, re_npinf_piece)
self.act_neglogp = neglogp(masked_logits, self.act_action)
def step(self, step, observation, **extra_feed):
"""
Compute next action(s) given the observation(s)
Parameters:
----------
observation: observation data (either single or a batch)
**extra_feed: additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__)
Returns:
-------
(action, value estimate, next state, negative log likelihood of the action under current policy parameters) tuple
"""
extra_feed['act_step'] = step
a, v, state, neglogp = self._evaluate([self.act_action, self.vf, self.state, self.act_neglogp], observation, **extra_feed)
if state.size == 0:
state = None
return a, v, state, neglogp
def value(self, ob, *args, **kwargs):
"""
Compute value estimate(s) given the observation(s)
Parameters:
----------
observation: observation data (either single or a batch)
**extra_feed: additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__)
Returns:
-------
value estimate
"""
return self._evaluate(self.vf, ob, *args, **kwargs)
def build_lstm_policy(model_config, value_network=None, estimate_q=False, **policy_kwargs):
"""
build lstm policy and value network, they share the same lstm network.
the parameters all use their default values.
"""
policy_network = lstm_model(**policy_kwargs)
def policy_fn(nbatch=None, nsteps=None, sess=None, observ_placeholder=None, np_mask=None, is_act_model=False):
ob_space = model_config.observation_space
X = observ_placeholder if observ_placeholder is not None else observation_placeholder(ob_space, batch_size=nbatch)
extra_tensors = {}
# encode_observation is not necessary anymore as we use embedding_lookup
encoded_x = X
with tf.variable_scope('pi', reuse=tf.AUTO_REUSE):
policy_latent = policy_network(encoded_x, 1, model_config.observation_space.n)
if isinstance(policy_latent, tuple):
policy_latent, recurrent_tensors = policy_latent
if recurrent_tensors is not None:
# recurrent architecture, need a few more steps
nenv = nbatch // nsteps
assert nenv > 0, 'Bad input for recurrent policy: batch size {} smaller than nsteps {}'.format(nbatch, nsteps)
policy_latent, recurrent_tensors = policy_network(encoded_x, nenv, model_config.observation_space.n)
extra_tensors.update(recurrent_tensors)
_v_net = value_network
assert _v_net is None or _v_net == 'shared'
vf_latent = policy_latent
policy = PolicyWithValue(
env=model_config,
observations=X,
latent=policy_latent,
vf_latent=vf_latent,
sess=sess,
estimate_q=estimate_q,
np_mask=np_mask,
is_act_model=is_act_model,
**extra_tensors
)
return policy
return policy_fn
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
ppo_tuner.py including:
class PPOTuner
"""
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""
import copy
import logging
import numpy as np
import json_tricks
from gym import spaces
import nni
from nni.tuner import Tuner
from nni.utils import OptimizeMode, extract_scalar_reward
from .model import Model
from .util import set_global_seeds
from .policy import build_lstm_policy
logger = logging.getLogger('ppo_tuner_AutoML')
def constfn(val):
"""wrap as function"""
def f(_):
return val
return f
class ModelConfig:
"""
Configurations of the PPO model
"""
def __init__(self):
self.observation_space = None
self.action_space = None
self.num_envs = 0
self.nsteps = 0
self.ent_coef = 0.0
self.lr = 3e-4
self.vf_coef = 0.5
self.max_grad_norm = 0.5
self.gamma = 0.99
self.lam = 0.95
self.cliprange = 0.2
self.embedding_size = None # the embedding is for each action
self.noptepochs = 4 # number of training epochs per update
self.total_timesteps = 5000 # number of timesteps (i.e. number of actions taken in the environment)
self.nminibatches = 4 # number of training minibatches per update. For recurrent policies,
# should be smaller or equal than number of environments run in parallel.
class TrialsInfo:
"""
Informations of each trial from one model inference
"""
def __init__(self, obs, actions, values, neglogpacs, dones, last_value, inf_batch_size):
self.iter = 0
self.obs = obs
self.actions = actions
self.values = values
self.neglogpacs = neglogpacs
self.dones = dones
self.last_value = last_value
self.rewards = None
self.returns = None
self.inf_batch_size = inf_batch_size
#self.states = None
def get_next(self):
"""
get actions of the next trial
"""
if self.iter >= self.inf_batch_size:
return None, None
actions = []
for step in self.actions:
actions.append(step[self.iter])
self.iter += 1
return self.iter - 1, actions
def update_rewards(self, rewards, returns):
"""
after the trial is finished, reward and return of this trial is updated
"""
self.rewards = rewards
self.returns = returns
def convert_shape(self):
"""
convert shape
"""
def sf01(arr):
"""
swap and then flatten axes 0 and 1
"""
s = arr.shape
return arr.swapaxes(0, 1).reshape(s[0] * s[1], *s[2:])
self.obs = sf01(self.obs)
self.returns = sf01(self.returns)
self.dones = sf01(self.dones)
self.actions = sf01(self.actions)
self.values = sf01(self.values)
self.neglogpacs = sf01(self.neglogpacs)
class PPOModel:
"""
PPO Model
"""
def __init__(self, model_config, mask):
self.model_config = model_config
self.states = None # initial state of lstm in policy/value network
self.nupdates = None # the number of func train is invoked, used to tune lr and cliprange
self.cur_update = 1 # record the current update
self.np_mask = mask # record the mask of each action within one trial
set_global_seeds(None)
assert isinstance(self.model_config.lr, float)
self.lr = constfn(self.model_config.lr)
assert isinstance(self.model_config.cliprange, float)
self.cliprange = constfn(self.model_config.cliprange)
# build lstm policy network, value share the same network
policy = build_lstm_policy(model_config)
# Get the nb of env
nenvs = model_config.num_envs
# Calculate the batch_size
self.nbatch = nbatch = nenvs * model_config.nsteps # num of record per update
nbatch_train = nbatch // model_config.nminibatches # get batch size
# self.nupdates is used to tune lr and cliprange
self.nupdates = self.model_config.total_timesteps // self.nbatch
# Instantiate the model object (that creates act_model and train_model)
self.model = Model(policy=policy, nbatch_act=nenvs, nbatch_train=nbatch_train,
nsteps=model_config.nsteps, ent_coef=model_config.ent_coef, vf_coef=model_config.vf_coef,
max_grad_norm=model_config.max_grad_norm, np_mask=self.np_mask)
self.states = self.model.initial_state
logger.info('=== finished PPOModel initialization')
def inference(self, num):
"""
generate actions along with related info from policy network.
observation is the action of the last step.
Parameters:
----------
num: the number of trials to generate
"""
# Here, we init the lists that will contain the mb of experiences
mb_obs, mb_actions, mb_values, mb_dones, mb_neglogpacs = [], [], [], [], []
# initial observation
# use the (n+1)th embedding to represent the first step action
first_step_ob = self.model_config.action_space.n
obs = [first_step_ob for _ in range(num)]
dones = [True for _ in range(num)]
states = self.states
# For n in range number of steps
for cur_step in range(self.model_config.nsteps):
# Given observations, get action value and neglopacs
# We already have self.obs because Runner superclass run self.obs[:] = env.reset() on init
actions, values, states, neglogpacs = self.model.step(cur_step, obs, S=states, M=dones)
mb_obs.append(obs.copy())
mb_actions.append(actions)
mb_values.append(values)
mb_neglogpacs.append(neglogpacs)
mb_dones.append(dones)
# Take actions in env and look the results
# Infos contains a ton of useful informations
obs[:] = actions
if cur_step == self.model_config.nsteps - 1:
dones = [True for _ in range(num)]
else:
dones = [False for _ in range(num)]
#batch of steps to batch of rollouts
np_obs = np.asarray(obs)
mb_obs = np.asarray(mb_obs, dtype=np_obs.dtype)
mb_actions = np.asarray(mb_actions)
mb_values = np.asarray(mb_values, dtype=np.float32)
mb_neglogpacs = np.asarray(mb_neglogpacs, dtype=np.float32)
mb_dones = np.asarray(mb_dones, dtype=np.bool)
last_values = self.model.value(np_obs, S=states, M=dones)
return mb_obs, mb_actions, mb_values, mb_neglogpacs, mb_dones, last_values
def compute_rewards(self, trials_info, trials_result):
"""
compute the rewards of the trials in trials_info based on trials_result,
and update the rewards in trials_info
Parameters:
----------
trials_info: info of the generated trials
trials_result: final results (e.g., acc) of the generated trials
"""
mb_rewards = np.asarray([trials_result for _ in trials_info.actions], dtype=np.float32)
# discount/bootstrap off value fn
mb_returns = np.zeros_like(mb_rewards)
mb_advs = np.zeros_like(mb_rewards)
lastgaelam = 0
last_dones = np.asarray([True for _ in trials_result], dtype=np.bool) # ugly
for t in reversed(range(self.model_config.nsteps)):
if t == self.model_config.nsteps - 1:
nextnonterminal = 1.0 - last_dones
nextvalues = trials_info.last_value
else:
nextnonterminal = 1.0 - trials_info.dones[t+1]
nextvalues = trials_info.values[t+1]
delta = mb_rewards[t] + self.model_config.gamma * nextvalues * nextnonterminal - trials_info.values[t]
mb_advs[t] = lastgaelam = delta + self.model_config.gamma * self.model_config.lam * nextnonterminal * lastgaelam
mb_returns = mb_advs + trials_info.values
trials_info.update_rewards(mb_rewards, mb_returns)
trials_info.convert_shape()
def train(self, trials_info, nenvs):
"""
train the policy/value network using trials_info
Parameters:
----------
trials_info: complete info of the generated trials from the previous inference
nenvs: the batch size of the (previous) inference
"""
# keep frac decay for future optimization
if self.cur_update <= self.nupdates:
frac = 1.0 - (self.cur_update - 1.0) / self.nupdates
else:
logger.warning('current update (self.cur_update) %d has exceeded total updates (self.nupdates) %d',
self.cur_update, self.nupdates)
frac = 1.0 - (self.nupdates - 1.0) / self.nupdates
lrnow = self.lr(frac)
cliprangenow = self.cliprange(frac)
self.cur_update += 1
states = self.states
assert states is not None # recurrent version
assert nenvs % self.model_config.nminibatches == 0
envsperbatch = nenvs // self.model_config.nminibatches
envinds = np.arange(nenvs)
flatinds = np.arange(nenvs * self.model_config.nsteps).reshape(nenvs, self.model_config.nsteps)
for _ in range(self.model_config.noptepochs):
np.random.shuffle(envinds)
for start in range(0, nenvs, envsperbatch):
end = start + envsperbatch
mbenvinds = envinds[start:end]
mbflatinds = flatinds[mbenvinds].ravel()
slices = (arr[mbflatinds] for arr in (trials_info.obs, trials_info.returns, trials_info.dones,
trials_info.actions, trials_info.values, trials_info.neglogpacs))
mbstates = states[mbenvinds]
self.model.train(lrnow, cliprangenow, *slices, mbstates)
class PPOTuner(Tuner):
"""
PPOTuner
"""
def __init__(self, optimize_mode, trials_per_update=20, epochs_per_update=4, minibatch_size=4,
ent_coef=0.0, lr=3e-4, vf_coef=0.5, max_grad_norm=0.5, gamma=0.99, lam=0.95, cliprange=0.2):
"""
initialization, PPO model is not initialized here as search space is not received yet.
Parameters:
----------
optimize_mode: maximize or minimize
trials_per_update: number of trials to have for each model update
epochs_per_update: number of epochs to run for each model update
minibatch_size: minibatch size (number of trials) for the update
ent_coef: policy entropy coefficient in the optimization objective
lr: learning rate of the model (lstm network), constant
vf_coef: value function loss coefficient in the optimization objective
max_grad_norm: gradient norm clipping coefficient
gamma: discounting factor
lam: advantage estimation discounting factor (lambda in the paper)
cliprange: cliprange in the PPO algorithm, constant
"""
self.optimize_mode = OptimizeMode(optimize_mode)
self.model_config = ModelConfig()
self.model = None
self.search_space = None
self.running_trials = {} # key: parameter_id, value: actions/states/etc.
self.inf_batch_size = trials_per_update # number of trials to generate in one inference
self.first_inf = True # indicate whether it is the first time to inference new trials
self.trials_result = [None for _ in range(self.inf_batch_size)] # results of finished trials
self.credit = 0 # record the unsatisfied trial requests
self.param_ids = []
self.finished_trials = 0
self.chosen_arch_template = {}
self.actions_spaces = None
self.actions_to_config = None
self.full_act_space = None
self.trials_info = None
self.all_trials = {} # used to dedup the same trial, key: config, value: final result
self.model_config.num_envs = self.inf_batch_size
self.model_config.noptepochs = epochs_per_update
self.model_config.nminibatches = minibatch_size
self.send_trial_callback = None
logger.info('=== finished PPOTuner initialization')
def _process_one_nas_space(self, block_name, block_space):
"""
process nas space to determine observation space and action space
Parameters:
----------
block_name: the name of the mutable block
block_space: search space of this mutable block
Returns:
----------
actions_spaces: list of the space of each action
actions_to_config: the mapping from action to generated configuration
"""
actions_spaces = []
actions_to_config = []
block_arch_temp = {}
for l_name, layer in block_space.items():
chosen_layer_temp = {}
if len(layer['layer_choice']) > 1:
actions_spaces.append(layer['layer_choice'])
actions_to_config.append((block_name, l_name, 'chosen_layer'))
chosen_layer_temp['chosen_layer'] = None
else:
assert len(layer['layer_choice']) == 1
chosen_layer_temp['chosen_layer'] = layer['layer_choice'][0]
if layer['optional_input_size'] not in [0, 1, [0, 1]]:
raise ValueError('Optional_input_size can only be 0, 1, or [0, 1], but the pecified one is %s'
% (layer['optional_input_size']))
if isinstance(layer['optional_input_size'], list):
actions_spaces.append(["None", *layer['optional_inputs']])
actions_to_config.append((block_name, l_name, 'chosen_inputs'))
chosen_layer_temp['chosen_inputs'] = None
elif layer['optional_input_size'] == 1:
actions_spaces.append(layer['optional_inputs'])
actions_to_config.append((block_name, l_name, 'chosen_inputs'))
chosen_layer_temp['chosen_inputs'] = None
elif layer['optional_input_size'] == 0:
chosen_layer_temp['chosen_inputs'] = []
else:
raise ValueError('invalid type and value of optional_input_size')
block_arch_temp[l_name] = chosen_layer_temp
self.chosen_arch_template[block_name] = block_arch_temp
return actions_spaces, actions_to_config
def _process_nas_space(self, search_space):
"""
process nas search space to get action/observation space
"""
actions_spaces = []
actions_to_config = []
for b_name, block in search_space.items():
if block['_type'] != 'mutable_layer':
raise ValueError('PPOTuner only accept mutable_layer type in search space, but the current one is %s'%(block['_type']))
block = block['_value']
act, act_map = self._process_one_nas_space(b_name, block)
actions_spaces.extend(act)
actions_to_config.extend(act_map)
# calculate observation space
dedup = {}
for step in actions_spaces:
for action in step:
dedup[action] = 1
full_act_space = [act for act, _ in dedup.items()]
assert len(full_act_space) == len(dedup)
observation_space = len(full_act_space)
nsteps = len(actions_spaces)
return actions_spaces, actions_to_config, full_act_space, observation_space, nsteps
def _generate_action_mask(self):
"""
different step could have different action space. to deal with this case, we merge all the
possible actions into one action space, and use mask to indicate available actions for each step
"""
two_masks = []
mask = []
for acts in self.actions_spaces:
one_mask = [0 for _ in range(len(self.full_act_space))]
for act in acts:
idx = self.full_act_space.index(act)
one_mask[idx] = 1
mask.append(one_mask)
two_masks.append(mask)
mask = []
for acts in self.actions_spaces:
one_mask = [-np.inf for _ in range(len(self.full_act_space))]
for act in acts:
idx = self.full_act_space.index(act)
one_mask[idx] = 0
mask.append(one_mask)
two_masks.append(mask)
return np.asarray(two_masks, dtype=np.float32)
def update_search_space(self, search_space):
"""
get search space, currently the space only includes that for NAS
Parameters:
----------
search_space: search space for NAS
Returns:
-------
no return
"""
logger.info('=== update search space %s', search_space)
assert self.search_space is None
self.search_space = search_space
assert self.model_config.observation_space is None
assert self.model_config.action_space is None
self.actions_spaces, self.actions_to_config, self.full_act_space, obs_space, nsteps = self._process_nas_space(search_space)
self.model_config.observation_space = spaces.Discrete(obs_space)
self.model_config.action_space = spaces.Discrete(obs_space)
self.model_config.nsteps = nsteps
# generate mask in numpy
mask = self._generate_action_mask()
assert self.model is None
self.model = PPOModel(self.model_config, mask)
def _actions_to_config(self, actions):
"""
given actions, to generate the corresponding trial configuration
"""
chosen_arch = copy.deepcopy(self.chosen_arch_template)
for cnt, act in enumerate(actions):
act_name = self.full_act_space[act]
(block_name, layer_name, key) = self.actions_to_config[cnt]
if key == 'chosen_inputs':
if act_name == 'None':
chosen_arch[block_name][layer_name][key] = []
else:
chosen_arch[block_name][layer_name][key] = [act_name]
elif key == 'chosen_layer':
chosen_arch[block_name][layer_name][key] = act_name
else:
raise ValueError('unrecognized key: {0}'.format(key))
return chosen_arch
def generate_multiple_parameters(self, parameter_id_list, **kwargs):
"""
Returns multiple sets of trial (hyper-)parameters, as iterable of serializable objects.
"""
result = []
self.send_trial_callback = kwargs['st_callback']
for parameter_id in parameter_id_list:
had_exception = False
try:
logger.debug("generating param for %s", parameter_id)
res = self.generate_parameters(parameter_id, **kwargs)
except nni.NoMoreTrialError:
had_exception = True
if not had_exception:
result.append(res)
return result
def generate_parameters(self, parameter_id, **kwargs):
"""
generate parameters, if no trial configration for now, self.credit plus 1 to send the config later
"""
if self.first_inf:
self.trials_result = [None for _ in range(self.inf_batch_size)]
mb_obs, mb_actions, mb_values, mb_neglogpacs, mb_dones, last_values = self.model.inference(self.inf_batch_size)
self.trials_info = TrialsInfo(mb_obs, mb_actions, mb_values, mb_neglogpacs,
mb_dones, last_values, self.inf_batch_size)
self.first_inf = False
trial_info_idx, actions = self.trials_info.get_next()
if trial_info_idx is None:
self.credit += 1
self.param_ids.append(parameter_id)
raise nni.NoMoreTrialError('no more parameters now.')
self.running_trials[parameter_id] = trial_info_idx
new_config = self._actions_to_config(actions)
return new_config
def _next_round_inference(self):
"""
"""
self.finished_trials = 0
self.model.compute_rewards(self.trials_info, self.trials_result)
self.model.train(self.trials_info, self.inf_batch_size)
self.running_trials = {}
# generate new trials
self.trials_result = [None for _ in range(self.inf_batch_size)]
mb_obs, mb_actions, mb_values, mb_neglogpacs, mb_dones, last_values = self.model.inference(self.inf_batch_size)
self.trials_info = TrialsInfo(mb_obs, mb_actions, mb_values, mb_neglogpacs,
mb_dones, last_values, self.inf_batch_size)
# check credit and submit new trials
for _ in range(self.credit):
trial_info_idx, actions = self.trials_info.get_next()
if trial_info_idx is None:
logger.warning('No enough trial config, trials_per_update is suggested to be larger than trialConcurrency')
break
assert self.param_ids
param_id = self.param_ids.pop()
self.running_trials[param_id] = trial_info_idx
new_config = self._actions_to_config(actions)
self.send_trial_callback(param_id, new_config)
self.credit -= 1
def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
"""
receive trial's result. if the number of finished trials equals self.inf_batch_size, start the next update to
train the model
"""
trial_info_idx = self.running_trials.pop(parameter_id, None)
assert trial_info_idx is not None
value = extract_scalar_reward(value)
if self.optimize_mode == OptimizeMode.Minimize:
value = -value
self.trials_result[trial_info_idx] = value
self.finished_trials += 1
if self.finished_trials == self.inf_batch_size:
self._next_round_inference()
def trial_end(self, parameter_id, success, **kwargs):
"""
to deal with trial failure
"""
if not success:
if parameter_id not in self.running_trials:
logger.warning('The trial is failed, but self.running_trial does not have this trial')
return
trial_info_idx = self.running_trials.pop(parameter_id, None)
assert trial_info_idx is not None
# use mean of finished trials as the result of this failed trial
values = [val for val in self.trials_result if val is not None]
logger.warning('zql values: {0}'.format(values))
self.trials_result[trial_info_idx] = (sum(values) / len(values)) if len(values) > 0 else 0
self.finished_trials += 1
if self.finished_trials == self.inf_batch_size:
self._next_round_inference()
def import_data(self, data):
"""
Import additional data for tuning
Parameters
----------
data: a list of dictionarys, each of which has at least two keys, 'parameter' and 'value'
"""
logger.warning('PPOTuner cannot leverage imported data.')
enum34
gym
tensorflow
\ No newline at end of file
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
util functions
"""
import os
import random
import multiprocessing
import numpy as np
import tensorflow as tf
from gym.spaces import Discrete, Box, MultiDiscrete
def set_global_seeds(i):
"""set global seeds"""
rank = 0
myseed = i + 1000 * rank if i is not None else None
tf.set_random_seed(myseed)
np.random.seed(myseed)
random.seed(myseed)
def batch_to_seq(h, nbatch, nsteps, flat=False):
"""convert from batch to sequence"""
if flat:
h = tf.reshape(h, [nbatch, nsteps])
else:
h = tf.reshape(h, [nbatch, nsteps, -1])
return [tf.squeeze(v, [1]) for v in tf.split(axis=1, num_or_size_splits=nsteps, value=h)]
def seq_to_batch(h, flat=False):
"""convert from sequence to batch"""
shape = h[0].get_shape().as_list()
if not flat:
assert len(shape) > 1
nh = h[0].get_shape()[-1].value
return tf.reshape(tf.concat(axis=1, values=h), [-1, nh])
else:
return tf.reshape(tf.stack(values=h, axis=1), [-1])
def lstm(xs, ms, s, scope, nh, init_scale=1.0):
"""lstm cell"""
nbatch, nin = [v.value for v in xs[0].get_shape()]
with tf.variable_scope(scope):
wx = tf.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale))
wh = tf.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale))
b = tf.get_variable("b", [nh*4], initializer=tf.constant_initializer(0.0))
c, h = tf.split(axis=1, num_or_size_splits=2, value=s)
for idx, (x, m) in enumerate(zip(xs, ms)):
c = c*(1-m)
h = h*(1-m)
z = tf.matmul(x, wx) + tf.matmul(h, wh) + b
i, f, o, u = tf.split(axis=1, num_or_size_splits=4, value=z)
i = tf.nn.sigmoid(i)
f = tf.nn.sigmoid(f)
o = tf.nn.sigmoid(o)
u = tf.tanh(u)
c = f*c + i*u
h = o*tf.tanh(c)
xs[idx] = h
s = tf.concat(axis=1, values=[c, h])
return xs, s
def lstm_model(nlstm=128, layer_norm=False):
"""
Builds LSTM (Long-Short Term Memory) network to be used in a policy.
Note that the resulting function returns not only the output of the LSTM
(i.e. hidden state of lstm for each step in the sequence), but also a dictionary
with auxiliary tensors to be set as policy attributes.
Specifically,
S is a placeholder to feed current state (LSTM state has to be managed outside policy)
M is a placeholder for the mask (used to mask out observations after the end of the episode, but can be used for other purposes too)
initial_state is a numpy array containing initial lstm state (usually zeros)
state is the output LSTM state (to be fed into S at the next call)
An example of usage of lstm-based policy can be found here: common/tests/test_doc_examples.py/test_lstm_example
Parameters:
----------
nlstm: int LSTM hidden state size
layer_norm: bool if True, layer-normalized version of LSTM is used
Returns:
-------
function that builds LSTM with a given input tensor / placeholder
"""
def network_fn(X, nenv=1, obs_size=-1):
with tf.variable_scope("emb", reuse=tf.AUTO_REUSE):
w_emb = tf.get_variable("w_emb", [obs_size+1, 32])
X = tf.nn.embedding_lookup(w_emb, X)
nbatch = X.shape[0]
nsteps = nbatch // nenv
h = tf.layers.flatten(X)
M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1)
S = tf.placeholder(tf.float32, [nenv, 2*nlstm]) #states
xs = batch_to_seq(h, nenv, nsteps)
ms = batch_to_seq(M, nenv, nsteps)
assert not layer_norm
h5, snew = lstm(xs, ms, S, scope='lstm', nh=nlstm)
h = seq_to_batch(h5)
initial_state = np.zeros(S.shape.as_list(), dtype=float)
return h, {'S':S, 'M':M, 'state':snew, 'initial_state':initial_state}
return network_fn
def ortho_init(scale=1.0):
"""init approach"""
def _ortho_init(shape, dtype, partition_info=None):
#lasagne ortho init for tf
shape = tuple(shape)
if len(shape) == 2:
flat_shape = shape
elif len(shape) == 4: # assumes NHWC
flat_shape = (np.prod(shape[:-1]), shape[-1])
else:
raise NotImplementedError
a = np.random.normal(0.0, 1.0, flat_shape)
u, _, v = np.linalg.svd(a, full_matrices=False)
q = u if u.shape == flat_shape else v # pick the one with the correct shape
q = q.reshape(shape)
return (scale * q[:shape[0], :shape[1]]).astype(np.float32)
return _ortho_init
def fc(x, scope, nh, *, init_scale=1.0, init_bias=0.0):
"""fully connected op"""
with tf.variable_scope(scope):
nin = x.get_shape()[1].value
w = tf.get_variable("w", [nin, nh], initializer=ortho_init(init_scale))
b = tf.get_variable("b", [nh], initializer=tf.constant_initializer(init_bias))
return tf.matmul(x, w)+b
def _check_shape(placeholder_shape, data_shape):
"""
check if two shapes are compatible (i.e. differ only by dimensions of size 1, or by the batch dimension)
"""
return True
# ================================================================
# Shape adjustment for feeding into tf placeholders
# ================================================================
def adjust_shape(placeholder, data):
"""
adjust shape of the data to the shape of the placeholder if possible.
If shape is incompatible, AssertionError is thrown
Parameters:
placeholder: tensorflow input placeholder
data: input data to be (potentially) reshaped to be fed into placeholder
Returns:
reshaped data
"""
if not isinstance(data, np.ndarray) and not isinstance(data, list):
return data
if isinstance(data, list):
data = np.array(data)
placeholder_shape = [x or -1 for x in placeholder.shape.as_list()]
assert _check_shape(placeholder_shape, data.shape), \
'Shape of data {} is not compatible with shape of the placeholder {}'.format(data.shape, placeholder_shape)
return np.reshape(data, placeholder_shape)
# ================================================================
# Global session
# ================================================================
def get_session(config=None):
"""Get default session or create one with a given config"""
sess = tf.get_default_session()
if sess is None:
sess = make_session(config=config, make_default=True)
return sess
def make_session(config=None, num_cpu=None, make_default=False, graph=None):
"""Returns a session that will use <num_cpu> CPU's only"""
if num_cpu is None:
num_cpu = int(os.getenv('RCALL_NUM_CPU', multiprocessing.cpu_count()))
if config is None:
config = tf.ConfigProto(
allow_soft_placement=True,
inter_op_parallelism_threads=num_cpu,
intra_op_parallelism_threads=num_cpu)
config.gpu_options.allow_growth = True
if make_default:
return tf.InteractiveSession(config=config, graph=graph)
else:
return tf.Session(config=config, graph=graph)
ALREADY_INITIALIZED = set()
def initialize():
"""Initialize all the uninitialized variables in the global scope."""
new_variables = set(tf.global_variables()) - ALREADY_INITIALIZED
get_session().run(tf.variables_initializer(new_variables))
ALREADY_INITIALIZED.update(new_variables)
def observation_placeholder(ob_space, batch_size=None, name='Ob'):
"""
Create placeholder to feed observations into of the size appropriate to the observation space
Parameters:
----------
ob_space: gym.Space observation space
batch_size: int size of the batch to be fed into input. Can be left None in most cases.
name: str name of the placeholder
Returns:
-------
tensorflow placeholder tensor
"""
assert isinstance(ob_space, (Discrete, Box, MultiDiscrete)), \
'Can only deal with Discrete and Box observation spaces for now'
dtype = ob_space.dtype
if dtype == np.int8:
dtype = np.uint8
return tf.placeholder(shape=(batch_size,) + ob_space.shape, dtype=dtype, name=name)
def explained_variance(ypred, y):
"""
Computes fraction of variance that ypred explains about y.
Returns 1 - Var[y-ypred] / Var[y]
interpretation:
ev=0 => might as well have predicted zero
ev=1 => perfect prediction
ev<0 => worse than just predicting zero
"""
assert y.ndim == 1 and ypred.ndim == 1
vary = np.var(y)
return np.nan if vary == 0 else 1 - np.var(y-ypred)/vary
...@@ -17,11 +17,10 @@ ...@@ -17,11 +17,10 @@
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ================================================================================================== # ==================================================================================================
import logging import logging
import nni import nni
from .recoverable import Recoverable from .recoverable import Recoverable
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
......
...@@ -39,17 +39,18 @@ class AnnotationTestCase(TestCase): ...@@ -39,17 +39,18 @@ class AnnotationTestCase(TestCase):
shutil.rmtree('_generated') shutil.rmtree('_generated')
def test_search_space_generator(self): def test_search_space_generator(self):
search_space = generate_search_space('testcase/annotated') shutil.copytree('testcase/annotated', '_generated/annotated')
search_space = generate_search_space('_generated/annotated')
with open('testcase/searchspace.json') as f: with open('testcase/searchspace.json') as f:
self.assertEqual(search_space, json.load(f)) self.assertEqual(search_space, json.load(f))
def test_code_generator(self): def test_code_generator(self):
code_dir = expand_annotations('testcase/usercode', '_generated', nas_mode='classic_mode') code_dir = expand_annotations('testcase/usercode', '_generated/usercode', nas_mode='classic_mode')
self.assertEqual(code_dir, '_generated') self.assertEqual(code_dir, '_generated/usercode')
self._assert_source_equal('testcase/annotated/nas.py', '_generated/nas.py') self._assert_source_equal('testcase/annotated/nas.py', '_generated/usercode/nas.py')
self._assert_source_equal('testcase/annotated/mnist.py', '_generated/mnist.py') self._assert_source_equal('testcase/annotated/mnist.py', '_generated/usercode/mnist.py')
self._assert_source_equal('testcase/annotated/dir/simple.py', '_generated/dir/simple.py') self._assert_source_equal('testcase/annotated/dir/simple.py', '_generated/usercode/dir/simple.py')
with open('testcase/usercode/nonpy.txt') as src, open('_generated/nonpy.txt') as dst: with open('testcase/usercode/nonpy.txt') as src, open('_generated/usercode/nonpy.txt') as dst:
assert src.read() == dst.read() assert src.read() == dst.read()
def test_annotation_detecting(self): def test_annotation_detecting(self):
......
...@@ -142,6 +142,24 @@ tuner_schema_dict = { ...@@ -142,6 +142,24 @@ tuner_schema_dict = {
Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool), Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool),
Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999), Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
}, },
'PPOTuner': {
'builtinTunerName': 'PPOTuner',
'classArgs': {
'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'),
Optional('trials_per_update'): setNumberRange('trials_per_update', int, 0, 99999),
Optional('epochs_per_update'): setNumberRange('epochs_per_update', int, 0, 99999),
Optional('minibatch_size'): setNumberRange('minibatch_size', int, 0, 99999),
Optional('ent_coef'): setType('ent_coef', float),
Optional('lr'): setType('lr', float),
Optional('vf_coef'): setType('vf_coef', float),
Optional('max_grad_norm'): setType('max_grad_norm', float),
Optional('gamma'): setType('gamma', float),
Optional('lam'): setType('lam', float),
Optional('cliprange'): setType('cliprange', float),
},
Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool),
Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
},
'customized': { 'customized': {
'codeDir': setPathCheck('codeDir'), 'codeDir': setPathCheck('codeDir'),
'classFileName': setType('classFileName', str), 'classFileName': setType('classFileName', str),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment