Unverified Commit 55f48d27 authored by QuanluZhang's avatar QuanluZhang Committed by GitHub
Browse files

Merge dev-nas-tuner back to master (#1531)

* PPO tuner for NAS, supports NNI's NAS interface (#1380)
parent 7246593f
import tensorflow as tf
from src.utils import DEFINE_boolean
from src.utils import DEFINE_float
from src.utils import DEFINE_integer
from src.utils import DEFINE_string
flags = tf.app.flags
FLAGS = flags.FLAGS
DEFINE_boolean("reset_output_dir", False, "Delete output_dir if exists.")
DEFINE_string("data_path", "", "")
DEFINE_string("output_dir", "", "")
DEFINE_string("data_format", "NHWC", "'NHWC' or 'NCWH'")
DEFINE_string("search_for", None, "Must be [macro|micro]")
DEFINE_integer("train_data_size", 45000, "")
DEFINE_integer("batch_size", 32, "")
DEFINE_integer("num_epochs", 300, "")
DEFINE_integer("child_lr_dec_every", 100, "")
DEFINE_integer("child_num_layers", 5, "")
DEFINE_integer("child_num_cells", 5, "")
DEFINE_integer("child_filter_size", 5, "")
DEFINE_integer("child_out_filters", 48, "")
DEFINE_integer("child_out_filters_scale", 1, "")
DEFINE_integer("child_num_branches", 4, "")
DEFINE_integer("child_num_aggregate", None, "")
DEFINE_integer("child_num_replicas", 1, "")
DEFINE_integer("child_block_size", 3, "")
DEFINE_integer("child_lr_T_0", None, "for lr schedule")
DEFINE_integer("child_lr_T_mul", None, "for lr schedule")
DEFINE_integer("child_cutout_size", None, "CutOut size")
DEFINE_float("child_grad_bound", 5.0, "Gradient clipping")
DEFINE_float("child_lr", 0.1, "")
DEFINE_float("child_lr_dec_rate", 0.1, "")
DEFINE_float("child_keep_prob", 0.5, "")
DEFINE_float("child_drop_path_keep_prob", 1.0, "minimum drop_path_keep_prob")
DEFINE_float("child_l2_reg", 1e-4, "")
DEFINE_float("child_lr_max", None, "for lr schedule")
DEFINE_float("child_lr_min", None, "for lr schedule")
DEFINE_string("child_skip_pattern", None, "Must be ['dense', None]")
DEFINE_string("child_fixed_arc", None, "")
DEFINE_boolean("child_use_aux_heads", False, "Should we use an aux head")
DEFINE_boolean("child_sync_replicas", False, "To sync or not to sync.")
DEFINE_boolean("child_lr_cosine", False, "Use cosine lr schedule")
DEFINE_integer("log_every", 50, "How many steps to log")
DEFINE_integer("eval_every_epochs", 1, "How many epochs to eval")
import numpy as np
import tensorflow as tf
from tensorflow.python.training import moving_averages
def lstm(x, prev_c, prev_h, w):
ifog = tf.matmul(tf.concat([x, prev_h], axis=1), w)
i, f, o, g = tf.split(ifog, 4, axis=1)
i = tf.sigmoid(i)
f = tf.sigmoid(f)
o = tf.sigmoid(o)
g = tf.tanh(g)
next_c = i * g + f * prev_c
next_h = o * tf.tanh(next_c)
return next_c, next_h
def stack_lstm(x, prev_c, prev_h, w):
next_c, next_h = [], []
for layer_id, (_c, _h, _w) in enumerate(zip(prev_c, prev_h, w)):
inputs = x if layer_id == 0 else next_h[-1]
curr_c, curr_h = lstm(inputs, _c, _h, _w)
next_c.append(curr_c)
next_h.append(curr_h)
return next_c, next_h
def create_weight(name, shape, initializer=None, trainable=True, seed=None):
if initializer is None:
initializer = tf.contrib.keras.initializers.he_normal(seed=seed)
return tf.get_variable(name, shape, initializer=initializer, trainable=trainable)
def create_bias(name, shape, initializer=None):
if initializer is None:
initializer = tf.constant_initializer(0.0, dtype=tf.float32)
return tf.get_variable(name, shape, initializer=initializer)
def conv_op(inputs, filter_size, is_training, count, out_filters,
data_format, ch_mul=1, start_idx=None, separable=False):
"""
Args:
start_idx: where to start taking the output channels. if None, assuming
fixed_arc mode
count: how many output_channels to take.
"""
if data_format == "NHWC":
inp_c = inputs.get_shape()[3].value
elif data_format == "NCHW":
inp_c = inputs.get_shape()[1].value
with tf.variable_scope("inp_conv_1"):
w = create_weight("w", [1, 1, inp_c, out_filters])
x = tf.nn.conv2d(inputs, w, [1, 1, 1, 1],
"SAME", data_format=data_format)
x = batch_norm(x, is_training, data_format=data_format)
x = tf.nn.relu(x)
with tf.variable_scope("out_conv_{}".format(filter_size)):
if start_idx is None:
if separable:
w_depth = create_weight(
"w_depth", [filter_size, filter_size, out_filters, ch_mul])
w_point = create_weight(
"w_point", [1, 1, out_filters * ch_mul, count])
x = tf.nn.separable_conv2d(x, w_depth, w_point, strides=[1, 1, 1, 1],
padding="SAME", data_format=data_format)
x = batch_norm(
x, is_training, data_format=data_format)
else:
w = create_weight(
"w", [filter_size, filter_size, inp_c, count])
x = tf.nn.conv2d(
x, w, [1, 1, 1, 1], "SAME", data_format=data_format)
x = batch_norm(
x, is_training, data_format=data_format)
else:
if separable:
w_depth = create_weight(
"w_depth", [filter_size, filter_size, out_filters, ch_mul])
#test_depth = w_depth
w_point = create_weight(
"w_point", [out_filters, out_filters * ch_mul])
w_point = w_point[start_idx:start_idx+count, :]
w_point = tf.transpose(w_point, [1, 0])
w_point = tf.reshape(
w_point, [1, 1, out_filters * ch_mul, count])
x = tf.nn.separable_conv2d(x, w_depth, w_point, strides=[1, 1, 1, 1],
padding="SAME", data_format=data_format)
mask = tf.range(0, out_filters, dtype=tf.int32)
mask = tf.logical_and(
start_idx <= mask, mask < start_idx + count)
x = batch_norm_with_mask(
x, is_training, mask, out_filters, data_format=data_format)
else:
w = create_weight(
"w", [filter_size, filter_size, out_filters, out_filters])
w = tf.transpose(w, [3, 0, 1, 2])
w = w[start_idx:start_idx+count, :, :, :]
w = tf.transpose(w, [1, 2, 3, 0])
x = tf.nn.conv2d(
x, w, [1, 1, 1, 1], "SAME", data_format=data_format)
mask = tf.range(0, out_filters, dtype=tf.int32)
mask = tf.logical_and(
start_idx <= mask, mask < start_idx + count)
x = batch_norm_with_mask(
x, is_training, mask, out_filters, data_format=data_format)
x = tf.nn.relu(x)
return x
def pool_op(inputs, is_training, count, out_filters, avg_or_max, data_format, start_idx=None):
"""
Args:
start_idx: where to start taking the output channels. if None, assuming
fixed_arc mode
count: how many output_channels to take.
"""
if data_format == "NHWC":
inp_c = inputs.get_shape()[3].value
elif data_format == "NCHW":
inp_c = inputs.get_shape()[1].value
with tf.variable_scope("conv_1"):
w = create_weight("w", [1, 1, inp_c, out_filters])
x = tf.nn.conv2d(inputs, w, [1, 1, 1, 1],
"SAME", data_format=data_format)
x = batch_norm(x, is_training, data_format=data_format)
x = tf.nn.relu(x)
with tf.variable_scope("pool"):
if data_format == "NHWC":
actual_data_format = "channels_last"
elif data_format == "NCHW":
actual_data_format = "channels_first"
if avg_or_max == "avg":
x = tf.layers.average_pooling2d(
x, [3, 3], [1, 1], "SAME", data_format=actual_data_format)
elif avg_or_max == "max":
x = tf.layers.max_pooling2d(
x, [3, 3], [1, 1], "SAME", data_format=actual_data_format)
else:
raise ValueError("Unknown pool {}".format(avg_or_max))
if start_idx is not None:
if data_format == "NHWC":
x = x[:, :, :, start_idx: start_idx+count]
elif data_format == "NCHW":
x = x[:, start_idx: start_idx+count, :, :]
return x
def global_avg_pool(x, data_format="NHWC"):
if data_format == "NHWC":
x = tf.reduce_mean(x, [1, 2])
elif data_format == "NCHW":
x = tf.reduce_mean(x, [2, 3])
else:
raise NotImplementedError("Unknown data_format {}".format(data_format))
return x
def batch_norm(x, is_training, name="bn", decay=0.9, epsilon=1e-5,
data_format="NHWC"):
if data_format == "NHWC":
shape = [x.get_shape()[3]]
elif data_format == "NCHW":
shape = [x.get_shape()[1]]
else:
raise NotImplementedError("Unknown data_format {}".format(data_format))
with tf.variable_scope(name, reuse=None if is_training else True):
offset = tf.get_variable(
"offset", shape,
initializer=tf.constant_initializer(0.0, dtype=tf.float32))
scale = tf.get_variable(
"scale", shape,
initializer=tf.constant_initializer(1.0, dtype=tf.float32))
moving_mean = tf.get_variable(
"moving_mean", shape, trainable=False,
initializer=tf.constant_initializer(0.0, dtype=tf.float32))
moving_variance = tf.get_variable(
"moving_variance", shape, trainable=False,
initializer=tf.constant_initializer(1.0, dtype=tf.float32))
if is_training:
x, mean, variance = tf.nn.fused_batch_norm(
x, scale, offset, epsilon=epsilon, data_format=data_format,
is_training=True)
update_mean = moving_averages.assign_moving_average(
moving_mean, mean, decay)
update_variance = moving_averages.assign_moving_average(
moving_variance, variance, decay)
with tf.control_dependencies([update_mean, update_variance]):
x = tf.identity(x)
else:
x, _, _ = tf.nn.fused_batch_norm(x, scale, offset, mean=moving_mean,
variance=moving_variance,
epsilon=epsilon, data_format=data_format,
is_training=False)
return x
def batch_norm_with_mask(x, is_training, mask, num_channels, name="bn",
decay=0.9, epsilon=1e-3, data_format="NHWC"):
shape = [num_channels]
indices = tf.where(mask)
indices = tf.to_int32(indices)
indices = tf.reshape(indices, [-1])
with tf.variable_scope(name, reuse=None if is_training else True):
offset = tf.get_variable(
"offset", shape,
initializer=tf.constant_initializer(0.0, dtype=tf.float32))
scale = tf.get_variable(
"scale", shape,
initializer=tf.constant_initializer(1.0, dtype=tf.float32))
offset = tf.boolean_mask(offset, mask)
scale = tf.boolean_mask(scale, mask)
moving_mean = tf.get_variable(
"moving_mean", shape, trainable=False,
initializer=tf.constant_initializer(0.0, dtype=tf.float32))
moving_variance = tf.get_variable(
"moving_variance", shape, trainable=False,
initializer=tf.constant_initializer(1.0, dtype=tf.float32))
if is_training:
x, mean, variance = tf.nn.fused_batch_norm(
x, scale, offset, epsilon=epsilon, data_format=data_format,
is_training=True)
mean = (1.0 - decay) * (tf.boolean_mask(moving_mean, mask) - mean)
variance = (1.0 - decay) * \
(tf.boolean_mask(moving_variance, mask) - variance)
update_mean = tf.scatter_sub(
moving_mean, indices, mean, use_locking=True)
update_variance = tf.scatter_sub(
moving_variance, indices, variance, use_locking=True)
with tf.control_dependencies([update_mean, update_variance]):
x = tf.identity(x)
else:
masked_moving_mean = tf.boolean_mask(moving_mean, mask)
masked_moving_variance = tf.boolean_mask(moving_variance, mask)
x, _, _ = tf.nn.fused_batch_norm(x, scale, offset,
mean=masked_moving_mean,
variance=masked_moving_variance,
epsilon=epsilon, data_format=data_format,
is_training=False)
return x
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import numpy as np
import tensorflow as tf
user_flags = []
def DEFINE_string(name, default_value, doc_string):
tf.app.flags.DEFINE_string(name, default_value, doc_string)
global user_flags
user_flags.append(name)
def DEFINE_integer(name, default_value, doc_string):
tf.app.flags.DEFINE_integer(name, default_value, doc_string)
global user_flags
user_flags.append(name)
def DEFINE_float(name, default_value, doc_string):
tf.app.flags.DEFINE_float(name, default_value, doc_string)
global user_flags
user_flags.append(name)
def DEFINE_boolean(name, default_value, doc_string):
tf.app.flags.DEFINE_boolean(name, default_value, doc_string)
global user_flags
user_flags.append(name)
def print_user_flags(line_limit=80):
print("-" * 80)
global user_flags
FLAGS = tf.app.flags.FLAGS
for flag_name in sorted(user_flags):
value = "{}".format(getattr(FLAGS, flag_name))
log_string = flag_name
log_string += "." * (line_limit - len(flag_name) - len(value))
log_string += value
print(log_string)
def get_C(x, data_format):
"""
Args:
x: tensor of shape [N, H, W, C] or [N, C, H, W]
"""
if data_format == "NHWC":
return x.get_shape()[3].value
elif data_format == "NCHW":
return x.get_shape()[1].value
else:
raise ValueError(
"Unknown data_format '{0}'".format(data_format))
def get_HW(x, data_format):
"""
Args:
x: tensor of shape [N, H, W, C] or [N, C, H, W]
"""
return x.get_shape()[2].value
def get_strides(stride, data_format):
"""
Args:
x: tensor of shape [N, H, W, C] or [N, C, H, W]
"""
if data_format == "NHWC":
return [1, stride, stride, 1]
elif data_format == "NCHW":
return [1, 1, stride, stride]
else:
raise ValueError(
"Unknown data_format '{0}'".format(data_format))
class TextColors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
class Logger(object):
def __init__(self, output_file):
self.terminal = sys.stdout
self.log = open(output_file, "a")
def write(self, message):
self.terminal.write(message)
self.terminal.flush()
self.log.write(message)
self.log.flush()
def count_model_params(tf_variables):
"""
Args:
tf_variables: list of all model variables
"""
num_vars = 0
for var in tf_variables:
num_vars += np.prod([dim.value for dim in var.get_shape()])
return num_vars
def get_train_ops(
loss,
tf_variables,
train_step,
clip_mode=None,
grad_bound=None,
l2_reg=1e-4,
lr_warmup_val=None,
lr_warmup_steps=100,
lr_init=0.1,
lr_dec_start=0,
lr_dec_every=10000,
lr_dec_rate=0.1,
lr_dec_min=None,
lr_cosine=False,
lr_max=None,
lr_min=None,
lr_T_0=None,
lr_T_mul=None,
num_train_batches=None,
optim_algo=None,
sync_replicas=False,
num_aggregate=None,
num_replicas=None,
get_grad_norms=False,
moving_average=None):
"""
Args:
clip_mode: "global", "norm", or None.
moving_average: store the moving average of parameters
"""
if l2_reg > 0:
l2_losses = []
for var in tf_variables:
l2_losses.append(tf.reduce_sum(var ** 2))
l2_loss = tf.add_n(l2_losses)
loss += l2_reg * l2_loss
grads = tf.gradients(loss, tf_variables)
grad_norm = tf.global_norm(grads)
grad_norms = {}
for v, g in zip(tf_variables, grads):
if v is None or g is None:
continue
if isinstance(g, tf.IndexedSlices):
grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g.values ** 2))
else:
grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g ** 2))
if clip_mode is not None:
assert grad_bound is not None, "Need grad_bound to clip gradients."
if clip_mode == "global":
grads, _ = tf.clip_by_global_norm(grads, grad_bound)
elif clip_mode == "norm":
clipped = []
for g in grads:
if isinstance(g, tf.IndexedSlices):
c_g = tf.clip_by_norm(g.values, grad_bound)
c_g = tf.IndexedSlices(g.indices, c_g)
else:
c_g = tf.clip_by_norm(g, grad_bound)
clipped.append(g)
grads = clipped
else:
raise NotImplementedError("Unknown clip_mode {}".format(clip_mode))
if lr_cosine:
assert lr_max is not None, "Need lr_max to use lr_cosine"
assert lr_min is not None, "Need lr_min to use lr_cosine"
assert lr_T_0 is not None, "Need lr_T_0 to use lr_cosine"
assert lr_T_mul is not None, "Need lr_T_mul to use lr_cosine"
assert num_train_batches is not None, ("Need num_train_batches to use"
" lr_cosine")
curr_epoch = train_step // num_train_batches
last_reset = tf.Variable(0, dtype=tf.int32, trainable=False,
name="last_reset")
T_i = tf.Variable(lr_T_0, dtype=tf.int32, trainable=False, name="T_i")
T_curr = curr_epoch - last_reset
def _update():
update_last_reset = tf.assign(
last_reset, curr_epoch, use_locking=True)
update_T_i = tf.assign(T_i, T_i * lr_T_mul, use_locking=True)
with tf.control_dependencies([update_last_reset, update_T_i]):
rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926
lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate))
return lr
def _no_update():
rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926
lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate))
return lr
learning_rate = tf.cond(
tf.greater_equal(T_curr, T_i), _update, _no_update)
else:
learning_rate = tf.train.exponential_decay(
lr_init, tf.maximum(train_step - lr_dec_start, 0), lr_dec_every,
lr_dec_rate, staircase=True)
if lr_dec_min is not None:
learning_rate = tf.maximum(learning_rate, lr_dec_min)
if lr_warmup_val is not None:
learning_rate = tf.cond(tf.less(train_step, lr_warmup_steps),
lambda: lr_warmup_val, lambda: learning_rate)
if optim_algo == "momentum":
opt = tf.train.MomentumOptimizer(
learning_rate, 0.9, use_locking=True, use_nesterov=True)
elif optim_algo == "sgd":
opt = tf.train.GradientDescentOptimizer(
learning_rate, use_locking=True)
elif optim_algo == "adam":
opt = tf.train.AdamOptimizer(learning_rate, beta1=0.0, epsilon=1e-3,
use_locking=True)
else:
raise ValueError("Unknown optim_algo {}".format(optim_algo))
if sync_replicas:
assert num_aggregate is not None, "Need num_aggregate to sync."
assert num_replicas is not None, "Need num_replicas to sync."
opt = tf.train.SyncReplicasOptimizer(
opt,
replicas_to_aggregate=num_aggregate,
total_num_replicas=num_replicas,
use_locking=True)
if moving_average is not None:
opt = tf.contrib.opt.MovingAverageOptimizer(
opt, average_decay=moving_average)
train_op = opt.apply_gradients(
zip(grads, tf_variables), global_step=train_step)
if get_grad_norms:
return train_op, learning_rate, grad_norm, opt, grad_norms
else:
return train_op, learning_rate, grad_norm, opt
......@@ -2,13 +2,14 @@ import numpy as np
from nni.tuner import Tuner
def random_archi_generator(nas_ss, random_state):
'''random
'''
chosen_archi = {}
print("zql: nas search space: ", nas_ss)
for block_name, block_value in nas_ss.items():
assert block_value['_type'] == "mutable_layer", "Random NAS Tuner only receives NAS search space whose _type is 'mutable_layer'"
assert block_value['_type'] == "mutable_layer", \
"Random NAS Tuner only receives NAS search space whose _type is 'mutable_layer'"
block = block_value['_value']
tmp_block = {}
for layer_name, layer in block.items():
......@@ -19,13 +20,12 @@ def random_archi_generator(nas_ss, random_state):
tmp_layer['chosen_layer'] = value[index]
elif key == 'optional_inputs':
tmp_layer['chosen_inputs'] = []
print("zql: optional_inputs", layer['optional_inputs'])
if layer['optional_inputs']:
if isinstance(layer['optional_input_size'], int):
choice_num = layer['optional_input_size']
else:
choice_range = layer['optional_input_size']
choice_num = random_state.randint(choice_range[0], choice_range[1]+1)
choice_num = random_state.randint(choice_range[0], choice_range[1] + 1)
for _ in range(choice_num):
index = random_state.randint(len(layer['optional_inputs']))
tmp_layer['chosen_inputs'].append(layer['optional_inputs'][index])
......@@ -37,6 +37,7 @@ def random_archi_generator(nas_ss, random_state):
chosen_archi[block_name] = tmp_block
return chosen_archi
class RandomNASTuner(Tuner):
'''RandomNASTuner
'''
......
......@@ -174,7 +174,7 @@ export namespace ValidationSchemas {
checkpointDir: joi.string().allow('')
}),
tuner: joi.object({
builtinTunerName: joi.string().valid('TPE', 'Random', 'Anneal', 'Evolution', 'SMAC', 'BatchTuner', 'GridSearch', 'NetworkMorphism', 'MetisTuner', 'GPTuner'),
builtinTunerName: joi.string().valid('TPE', 'Random', 'Anneal', 'Evolution', 'SMAC', 'BatchTuner', 'GridSearch', 'NetworkMorphism', 'MetisTuner', 'GPTuner', 'PPOTuner'),
codeDir: joi.string(),
classFileName: joi.string(),
className: joi.string(),
......
......@@ -30,7 +30,8 @@ ModuleName = {
'NetworkMorphism': 'nni.networkmorphism_tuner.networkmorphism_tuner',
'Curvefitting': 'nni.curvefitting_assessor.curvefitting_assessor',
'MetisTuner': 'nni.metis_tuner.metis_tuner',
'GPTuner': 'nni.gp_tuner.gp_tuner'
'GPTuner': 'nni.gp_tuner.gp_tuner',
'PPOTuner': 'nni.ppo_tuner.ppo_tuner'
}
ClassName = {
......@@ -44,6 +45,7 @@ ClassName = {
'NetworkMorphism':'NetworkMorphismTuner',
'MetisTuner':'MetisTuner',
'GPTuner':'GPTuner',
'PPOTuner': 'PPOTuner',
'Medianstop': 'MedianstopAssessor',
'Curvefitting': 'CurvefittingAssessor'
......
......@@ -27,6 +27,7 @@ import logging
import hyperopt as hp
import numpy as np
from nni.tuner import Tuner
from nni.nas_utils import rewrite_nas_space
from nni.utils import NodeType, OptimizeMode, extract_scalar_reward, split_index
logger = logging.getLogger('hyperopt_AutoML')
......@@ -240,6 +241,7 @@ class HyperoptTuner(Tuner):
return hp.anneal.suggest
raise RuntimeError('Not support tuner algorithm in hyperopt.')
@rewrite_nas_space
def update_search_space(self, search_space):
"""
Update search space definition in tuner by search_space in parameters.
......
......@@ -101,11 +101,16 @@ class MsgDispatcher(MsgDispatcherBase):
self.tuner.update_search_space(data)
send(CommandType.Initialized, '')
def send_trial_callback(self, id, params):
"""For tuner to issue trial config when the config is generated
"""
send(CommandType.NewTrialJob, _pack_parameter(id, params))
def handle_request_trial_jobs(self, data):
# data: number or trial jobs
ids = [_create_parameter_id() for _ in range(data)]
_logger.debug("requesting for generating params of {}".format(ids))
params_list = self.tuner.generate_multiple_parameters(ids)
params_list = self.tuner.generate_multiple_parameters(ids, st_callback=self.send_trial_callback)
for i, _ in enumerate(params_list):
send(CommandType.NewTrialJob, _pack_parameter(ids[i], params_list[i]))
......
......@@ -17,10 +17,16 @@
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import functools
import logging
from . import trial
_logger = logging.getLogger(__name__)
_MUTABLE_LAYER_SPACE_PREFIX = "_mutable_layer"
def classic_mode(
mutable_id,
mutable_layer_id,
......@@ -34,13 +40,11 @@ def classic_mode(
without touching the full model graph.'''
if trial.get_current_parameter() is None:
trial.get_next_parameter()
mutable_block = trial.get_current_parameter(mutable_id)
chosen_layer = mutable_block[mutable_layer_id]["chosen_layer"]
chosen_inputs = mutable_block[mutable_layer_id]["chosen_inputs"]
real_chosen_inputs = [optional_inputs[input_name]
for input_name in chosen_inputs]
layer_out = funcs[chosen_layer](
[fixed_inputs, real_chosen_inputs], **funcs_args[chosen_layer])
chosen_layer, chosen_inputs = _get_layer_and_inputs_from_tuner(mutable_id, mutable_layer_id,
list(optional_inputs.keys()))
real_chosen_inputs = [optional_inputs[input_name] for input_name in chosen_inputs]
layer_out = funcs[chosen_layer]([fixed_inputs, real_chosen_inputs], **funcs_args[chosen_layer])
return layer_out
......@@ -173,20 +177,44 @@ def reload_tensorflow_variables(tf, session):
tf: tensorflow module
'''
subgraph_from_tuner = trial.get_next_parameter()
for mutable_id, mutable_block in subgraph_from_tuner.items():
mutable_layers = set()
for subgraph_key in subgraph_from_tuner:
if "/" in subgraph_key:
# has to remove the last, could be layer_choice or whatever
mutable_id, mutable_layer_id = _decompose_general_key(subgraph_key[:subgraph_key.rfind("/")])
if mutable_id is not None:
mutable_layers.add((mutable_id, mutable_layer_id))
mutable_layers = sorted(list(mutable_layers))
for mutable_id, mutable_layer_id in mutable_layers:
if mutable_id not in name_space:
_logger.warning("{} not found in name space".format(mutable_id))
continue
for mutable_layer_id, mutable_layer in mutable_block.items():
name_prefix = "{}_{}".format(mutable_id, mutable_layer_id)
# extract layer information from the subgraph sampled by tuner
chosen_layer = name_space[name_prefix]['funcs'].index(
mutable_layer["chosen_layer"])
chosen_inputs = [1 if inp in mutable_layer["chosen_inputs"]
else 0 for inp in name_space[name_prefix]['optional_inputs']]
# load these information into pre-defined tensorflow variables
tf_variables[name_prefix]['funcs'].load(chosen_layer, session)
tf_variables[name_prefix]['optional_inputs'].load(
chosen_inputs, session)
name_prefix = "{}_{}".format(mutable_id, mutable_layer_id)
# get optional inputs names
optional_inputs = name_space[name_prefix]['optional_inputs']
# extract layer information from the subgraph sampled by tuner
chosen_layer, chosen_inputs = _get_layer_and_inputs_from_tuner(mutable_id, mutable_layer_id, optional_inputs)
chosen_layer = name_space[name_prefix]['funcs'].index(chosen_layer)
chosen_inputs = [1 if inp in chosen_inputs else 0 for inp in optional_inputs]
# load these information into pre-defined tensorflow variables
tf_variables[name_prefix]['funcs'].load(chosen_layer, session)
tf_variables[name_prefix]['optional_inputs'].load(
chosen_inputs, session)
def _construct_general_key(mutable_id, mutable_layer_id):
# Mutable layer key in a general (search space) format
# that is, prefix/mutable_id/mutable_layer_id
return _MUTABLE_LAYER_SPACE_PREFIX + "/" + mutable_id + "/" + mutable_layer_id
def _decompose_general_key(key):
# inverse operation of above
if not key.startswith(_MUTABLE_LAYER_SPACE_PREFIX):
return None, None
else:
_, mutable_id, mutable_layer_id = key.split("/", maxsplit=2)
return mutable_id, mutable_layer_id
def darts_training(tf, session, loss, feed_dict):
......@@ -205,4 +233,107 @@ def training_update(nas_mode, tf=None, session=None, loss=None, feed_dict=None):
if nas_mode == 'darts_mode':
darts_training(tf, session, loss, feed_dict)
elif nas_mode == 'enas_mode':
reload_tensorflow_variables(tf, session)
\ No newline at end of file
reload_tensorflow_variables(tf, session)
def _get_layer_and_inputs_from_tuner(mutable_id, mutable_layer_id, optional_inputs):
# optional_inputs should be name(key)s of the optional inputs
try:
mutable_block = trial.get_current_parameter(mutable_id)
# There is a NAS tuner
chosen_layer = mutable_block[mutable_layer_id]["chosen_layer"]
chosen_inputs = mutable_block[mutable_layer_id]["chosen_inputs"]
except KeyError:
# Try to find converted NAS parameters
params = trial.get_current_parameter()
expected_prefix = _construct_general_key(mutable_id, mutable_layer_id)
chosen_layer = params[expected_prefix + "/layer_choice"]
# find how many to choose
optional_input_size = int(params[expected_prefix + "/optional_input_size"]) # convert uniform to randint
# find who to choose, can duplicate
optional_input_state = params[expected_prefix + "/optional_input_chosen_state"]
chosen_inputs = []
# make sure dict -> list produce stable result by sorting
optional_inputs_keys = sorted(optional_inputs)
for i in range(optional_input_size):
chosen_inputs.append(optional_inputs_keys[optional_input_state % len(optional_inputs)])
optional_input_state //= len(optional_inputs)
_logger.info("%s_%s: layer: %s, optional inputs: %s" % (mutable_id, mutable_layer_id,
chosen_layer, chosen_inputs))
return chosen_layer, chosen_inputs
def convert_nas_search_space(search_space):
"""
Args:
param search_space: raw search space
return: the new search space, mutable_layers will be converted into choice
"""
ret = dict()
for k, v in search_space.items():
if "_type" not in v:
# this should not happen
_logger.warning("There is no _type in one of your search space values with key '%s'"
". Please check your search space" % k)
ret[k] = v
elif v["_type"] != "mutable_layer":
ret[k] = v
else:
_logger.info("Converting mutable_layer search space with key '%s'" % k)
# v["_value"] looks like {'mutable_layer_1': {'layer_choice': ...} ...}
values = v["_value"]
for layer_name, layer_data in values.items():
# there should be at most layer_choice, optional_inputs, optional_input_size in layer_data
# add "_mutable_layer" as prefix so that they can be recovered later
layer_key = _construct_general_key(k, layer_name)
if layer_data.get("layer_choice"): # filter out empty choice and no choice
layer_choice = layer_data["layer_choice"]
else:
raise ValueError("No layer choice found in %s" % layer_key)
if layer_data.get("optional_input_size"):
input_size = layer_data["optional_input_size"]
if isinstance(input_size, int):
input_size = [input_size, input_size]
if input_size[0] > input_size[1] or input_size[0] < 0:
_logger.error("Might not be able to handle optional_input_size < 0, please double check")
input_size[1] += 1
else:
_logger.info("Optional input choices are set to empty by default in %s" % layer_key)
input_size = [0, 1]
if layer_data.get("optional_inputs"):
total_state_size = len(layer_data["optional_inputs"]) ** (input_size[1] - 1)
else:
_logger.info("Optional inputs not found in %s" % layer_key)
total_state_size = 1
converted = {
layer_key + "/layer_choice": {
"_type": "choice", "_value": layer_choice
},
layer_key + "/optional_input_size": {
"_type": "randint", "_value": input_size
},
layer_key + "/optional_input_chosen_state": {
"_type": "randint", "_value": [0, total_state_size]
}
}
_logger.info(converted)
ret.update(converted)
return ret
def rewrite_nas_space(func):
@functools.wraps(func)
def wrap(self, search_space):
search_space = convert_nas_search_space(search_space)
return func(self, search_space)
return wrap
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
functions for sampling from hidden state
"""
import tensorflow as tf
from .util import fc
class Pd:
"""
A particular probability distribution
"""
def flatparam(self):
raise NotImplementedError
def mode(self):
raise NotImplementedError
def neglogp(self, x):
# Usually it's easier to define the negative logprob
raise NotImplementedError
def kl(self, other):
raise NotImplementedError
def entropy(self):
raise NotImplementedError
def sample(self):
raise NotImplementedError
def logp(self, x):
return - self.neglogp(x)
def get_shape(self):
return self.flatparam().shape
@property
def shape(self):
return self.get_shape()
def __getitem__(self, idx):
return self.__class__(self.flatparam()[idx])
class PdType:
"""
Parametrized family of probability distributions
"""
def pdclass(self):
raise NotImplementedError
def pdfromflat(self, flat, mask, nsteps, size, is_act_model):
return self.pdclass()(flat, mask, nsteps, size, is_act_model)
def pdfromlatent(self, latent_vector, init_scale, init_bias):
raise NotImplementedError
def param_shape(self):
raise NotImplementedError
def sample_shape(self):
raise NotImplementedError
def sample_dtype(self):
raise NotImplementedError
def param_placeholder(self, prepend_shape, name=None):
return tf.placeholder(dtype=tf.float32, shape=prepend_shape+self.param_shape(), name=name)
def sample_placeholder(self, prepend_shape, name=None):
return tf.placeholder(dtype=self.sample_dtype(), shape=prepend_shape+self.sample_shape(), name=name)
class CategoricalPd(Pd):
"""
categorical prossibility distribution
"""
def __init__(self, logits, mask_npinf, nsteps, size, is_act_model):
self.logits = logits
self.mask_npinf = mask_npinf
self.nsteps = nsteps
self.size = size
self.is_act_model = is_act_model
def flatparam(self):
return self.logits
def mode(self):
return tf.argmax(self.logits, axis=-1)
@property
def mean(self):
return tf.nn.softmax(self.logits)
def neglogp(self, x):
"""
return tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=x)
Note: we can't use sparse_softmax_cross_entropy_with_logits because
the implementation does not allow second-order derivatives...
"""
if x.dtype in {tf.uint8, tf.int32, tf.int64}:
# one-hot encoding
x_shape_list = x.shape.as_list()
logits_shape_list = self.logits.get_shape().as_list()[:-1]
for xs, ls in zip(x_shape_list, logits_shape_list):
if xs is not None and ls is not None:
assert xs == ls, 'shape mismatch: {} in x vs {} in logits'.format(xs, ls)
x = tf.one_hot(x, self.logits.get_shape().as_list()[-1])
else:
# already encoded
assert x.shape.as_list() == self.logits.shape.as_list()
return tf.nn.softmax_cross_entropy_with_logits_v2(
logits=self.logits,
labels=x)
def kl(self, other):
"""kl"""
a0 = self.logits - tf.reduce_max(self.logits, axis=-1, keepdims=True)
a1 = other.logits - tf.reduce_max(other.logits, axis=-1, keepdims=True)
ea0 = tf.exp(a0)
ea1 = tf.exp(a1)
z0 = tf.reduce_sum(ea0, axis=-1, keepdims=True)
z1 = tf.reduce_sum(ea1, axis=-1, keepdims=True)
p0 = ea0 / z0
return tf.reduce_sum(p0 * (a0 - tf.log(z0) - a1 + tf.log(z1)), axis=-1)
def entropy(self):
"""compute entropy"""
a0 = self.logits - tf.reduce_max(self.logits, axis=-1, keepdims=True)
ea0 = tf.exp(a0)
z0 = tf.reduce_sum(ea0, axis=-1, keepdims=True)
p0 = ea0 / z0
return tf.reduce_sum(p0 * (tf.log(z0) - a0), axis=-1)
def sample(self):
"""sample from logits"""
if not self.is_act_model:
re_res = tf.reshape(self.logits, [-1, self.nsteps, self.size])
masked_res = tf.math.add(re_res, self.mask_npinf)
re_masked_res = tf.reshape(masked_res, [-1, self.size])
u = tf.random_uniform(tf.shape(re_masked_res), dtype=self.logits.dtype)
return tf.argmax(re_masked_res - tf.log(-tf.log(u)), axis=-1)
else:
u = tf.random_uniform(tf.shape(self.logits), dtype=self.logits.dtype)
return tf.argmax(self.logits - tf.log(-tf.log(u)), axis=-1)
@classmethod
def fromflat(cls, flat):
return cls(flat)
class CategoricalPdType(PdType):
"""
to create CategoricalPd
"""
def __init__(self, ncat, nsteps, np_mask, is_act_model):
self.ncat = ncat
self.nsteps = nsteps
self.np_mask = np_mask
self.is_act_model = is_act_model
def pdclass(self):
return CategoricalPd
def pdfromlatent(self, latent_vector, init_scale=1.0, init_bias=0.0):
"""add fc and create CategoricalPd"""
pdparam, mask, mask_npinf = _matching_fc(latent_vector, 'pi', self.ncat, self.nsteps,
init_scale=init_scale, init_bias=init_bias,
np_mask=self.np_mask, is_act_model=self.is_act_model)
return self.pdfromflat(pdparam, mask_npinf, self.nsteps, self.ncat, self.is_act_model), pdparam, mask, mask_npinf
def param_shape(self):
return [self.ncat]
def sample_shape(self):
return []
def sample_dtype(self):
return tf.int32
def _matching_fc(tensor, name, size, nsteps, init_scale, init_bias, np_mask, is_act_model):
"""
add fc op, and add mask op when not in action mode
"""
if tensor.shape[-1] == size:
assert False
return tensor
else:
mask = tf.get_variable("act_mask", dtype=tf.float32, initializer=np_mask[0], trainable=False)
mask_npinf = tf.get_variable("act_mask_npinf", dtype=tf.float32, initializer=np_mask[1], trainable=False)
res = fc(tensor, name, size, init_scale=init_scale, init_bias=init_bias)
if not is_act_model:
re_res = tf.reshape(res, [-1, nsteps, size])
masked_res = tf.math.multiply(re_res, mask)
re_masked_res = tf.reshape(masked_res, [-1, size])
return re_masked_res, mask, mask_npinf
else:
return res, mask, mask_npinf
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
the main model of policy/value network
"""
import tensorflow as tf
from .util import initialize, get_session
class Model:
"""
We use this object to :
__init__:
- Creates the step_model
- Creates the train_model
train():
- Make the training part (feedforward and retropropagation of gradients)
save/load():
- Save load the model
"""
def __init__(self, *, policy, nbatch_act, nbatch_train,
nsteps, ent_coef, vf_coef, max_grad_norm, microbatch_size=None, np_mask=None):
"""
init
"""
self.sess = sess = get_session()
with tf.variable_scope('ppo2_model', reuse=tf.AUTO_REUSE):
# CREATE OUR TWO MODELS
# act_model that is used for sampling
act_model = policy(nbatch_act, 1, sess, np_mask=np_mask, is_act_model=True)
# Train model for training
if microbatch_size is None:
train_model = policy(nbatch_train, nsteps, sess, np_mask=np_mask, is_act_model=False)
else:
train_model = policy(microbatch_size, nsteps, sess, np_mask=np_mask, is_act_model=False)
# CREATE THE PLACEHOLDERS
self.A = A = train_model.pdtype.sample_placeholder([None])
self.ADV = ADV = tf.placeholder(tf.float32, [None])
self.R = R = tf.placeholder(tf.float32, [None])
# Keep track of old actor
self.OLDNEGLOGPAC = OLDNEGLOGPAC = tf.placeholder(tf.float32, [None])
# Keep track of old critic
self.OLDVPRED = OLDVPRED = tf.placeholder(tf.float32, [None])
self.LR = LR = tf.placeholder(tf.float32, [])
# Cliprange
self.CLIPRANGE = CLIPRANGE = tf.placeholder(tf.float32, [])
neglogpac = train_model.pd.neglogp(A)
# Calculate the entropy
# Entropy is used to improve exploration by limiting the premature convergence to suboptimal policy.
entropy = tf.reduce_mean(train_model.pd.entropy())
# CALCULATE THE LOSS
# Total loss = Policy gradient loss - entropy * entropy coefficient + Value coefficient * value loss
# Clip the value to reduce variability during Critic training
# Get the predicted value
vpred = train_model.vf
vpredclipped = OLDVPRED + tf.clip_by_value(train_model.vf - OLDVPRED, - CLIPRANGE, CLIPRANGE)
# Unclipped value
vf_losses1 = tf.square(vpred - R)
# Clipped value
vf_losses2 = tf.square(vpredclipped - R)
vf_loss = .5 * tf.reduce_mean(tf.maximum(vf_losses1, vf_losses2))
# Calculate ratio (pi current policy / pi old policy)
ratio = tf.exp(OLDNEGLOGPAC - neglogpac)
# Defining Loss = - J is equivalent to max J
pg_losses = -ADV * ratio
pg_losses2 = -ADV * tf.clip_by_value(ratio, 1.0 - CLIPRANGE, 1.0 + CLIPRANGE)
# Final PG loss
pg_loss = tf.reduce_mean(tf.maximum(pg_losses, pg_losses2))
approxkl = .5 * tf.reduce_mean(tf.square(neglogpac - OLDNEGLOGPAC))
clipfrac = tf.reduce_mean(tf.to_float(tf.greater(tf.abs(ratio - 1.0), CLIPRANGE)))
# Total loss
loss = pg_loss - entropy * ent_coef + vf_loss * vf_coef
# UPDATE THE PARAMETERS USING LOSS
# 1. Get the model parameters
params = tf.trainable_variables('ppo2_model')
# 2. Build our trainer
self.trainer = tf.train.AdamOptimizer(learning_rate=LR, epsilon=1e-5)
# 3. Calculate the gradients
grads_and_var = self.trainer.compute_gradients(loss, params)
grads, var = zip(*grads_and_var)
if max_grad_norm is not None:
# Clip the gradients (normalize)
grads, _grad_norm = tf.clip_by_global_norm(grads, max_grad_norm)
grads_and_var = list(zip(grads, var))
# zip aggregate each gradient with parameters associated
# For instance zip(ABCD, xyza) => Ax, By, Cz, Da
self.grads = grads
self.var = var
self._train_op = self.trainer.apply_gradients(grads_and_var)
self.loss_names = ['policy_loss', 'value_loss', 'policy_entropy', 'approxkl', 'clipfrac']
self.stats_list = [pg_loss, vf_loss, entropy, approxkl, clipfrac]
self.train_model = train_model
self.act_model = act_model
self.step = act_model.step
self.value = act_model.value
self.initial_state = act_model.initial_state
initialize()
def train(self, lr, cliprange, obs, returns, masks, actions, values, neglogpacs, states=None):
"""
train the model.
Here we calculate advantage A(s,a) = R + yV(s') - V(s)
Returns = R + yV(s')
"""
advs = returns - values
# Normalize the advantages
advs = (advs - advs.mean()) / (advs.std() + 1e-8)
td_map = {
self.train_model.X : obs,
self.A : actions,
self.ADV : advs,
self.R : returns,
self.LR : lr,
self.CLIPRANGE : cliprange,
self.OLDNEGLOGPAC : neglogpacs,
self.OLDVPRED : values
}
if states is not None:
td_map[self.train_model.S] = states
td_map[self.train_model.M] = masks
return self.sess.run(
self.stats_list + [self._train_op],
td_map
)[:-1]
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
build policy/value network from model
"""
import tensorflow as tf
from .distri import CategoricalPdType
from .util import lstm_model, fc, observation_placeholder, adjust_shape
class PolicyWithValue:
"""
Encapsulates fields and methods for RL policy and value function estimation with shared parameters
"""
def __init__(self, env, observations, latent, estimate_q=False, vf_latent=None, sess=None, np_mask=None, is_act_model=False, **tensors):
"""
Parameters:
----------
env: RL environment
observations: tensorflow placeholder in which the observations will be fed
latent: latent state from which policy distribution parameters should be inferred
vf_latent: latent state from which value function should be inferred (if None, then latent is used)
sess: tensorflow session to run calculations in (if None, default session is used)
**tensors: tensorflow tensors for additional attributes such as state or mask
"""
self.X = observations
self.state = tf.constant([])
self.initial_state = None
self.__dict__.update(tensors)
vf_latent = vf_latent if vf_latent is not None else latent
vf_latent = tf.layers.flatten(vf_latent)
latent = tf.layers.flatten(latent)
# Based on the action space, will select what probability distribution type
self.np_mask = np_mask
self.pdtype = CategoricalPdType(env.action_space.n, env.nsteps, np_mask, is_act_model)
self.act_latent = latent
self.nh = env.action_space.n
self.pd, self.pi, self.mask, self.mask_npinf = self.pdtype.pdfromlatent(latent, init_scale=0.01)
# Take an action
self.action = self.pd.sample()
# Calculate the neg log of our probability
self.neglogp = self.pd.neglogp(self.action)
self.sess = sess or tf.get_default_session()
assert estimate_q is False
self.vf = fc(vf_latent, 'vf', 1)
self.vf = self.vf[:, 0]
if is_act_model:
self._build_model_for_step()
def _evaluate(self, variables, observation, **extra_feed):
sess = self.sess
feed_dict = {self.X: adjust_shape(self.X, observation)}
for inpt_name, data in extra_feed.items():
if inpt_name in self.__dict__.keys():
inpt = self.__dict__[inpt_name]
if isinstance(inpt, tf.Tensor) and inpt._op.type == 'Placeholder':
feed_dict[inpt] = adjust_shape(inpt, data)
return sess.run(variables, feed_dict)
def _build_model_for_step(self):
# multiply with weight and apply mask on self.act_latent to generate
self.act_step = step = tf.placeholder(shape=(), dtype=tf.int64, name='act_step')
with tf.variable_scope('pi', reuse=tf.AUTO_REUSE):
from .util import ortho_init
nin = self.act_latent.get_shape()[1].value
w = tf.get_variable("w", [nin, self.nh], initializer=ortho_init(0.01))
b = tf.get_variable("b", [self.nh], initializer=tf.constant_initializer(0.0))
logits = tf.matmul(self.act_latent, w)+b
piece = tf.slice(self.mask, [step, 0], [1, self.nh])
re_piece = tf.reshape(piece, [-1])
masked_logits = tf.math.multiply(logits, re_piece)
npinf_piece = tf.slice(self.mask_npinf, [step, 0], [1, self.nh])
re_npinf_piece = tf.reshape(npinf_piece, [-1])
def sample(logits, mask_npinf):
new_logits = tf.math.add(logits, mask_npinf)
u = tf.random_uniform(tf.shape(new_logits), dtype=logits.dtype)
return tf.argmax(new_logits - tf.log(-tf.log(u)), axis=-1)
def neglogp(logits, x):
# return tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=x)
# Note: we can't use sparse_softmax_cross_entropy_with_logits because
# the implementation does not allow second-order derivatives...
if x.dtype in {tf.uint8, tf.int32, tf.int64}:
# one-hot encoding
x_shape_list = x.shape.as_list()
logits_shape_list = logits.get_shape().as_list()[:-1]
for xs, ls in zip(x_shape_list, logits_shape_list):
if xs is not None and ls is not None:
assert xs == ls, 'shape mismatch: {} in x vs {} in logits'.format(xs, ls)
x = tf.one_hot(x, logits.get_shape().as_list()[-1])
else:
# already encoded
assert x.shape.as_list() == logits.shape.as_list()
return tf.nn.softmax_cross_entropy_with_logits_v2(
logits=logits,
labels=x)
self.act_action = sample(masked_logits, re_npinf_piece)
self.act_neglogp = neglogp(masked_logits, self.act_action)
def step(self, step, observation, **extra_feed):
"""
Compute next action(s) given the observation(s)
Parameters:
----------
observation: observation data (either single or a batch)
**extra_feed: additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__)
Returns:
-------
(action, value estimate, next state, negative log likelihood of the action under current policy parameters) tuple
"""
extra_feed['act_step'] = step
a, v, state, neglogp = self._evaluate([self.act_action, self.vf, self.state, self.act_neglogp], observation, **extra_feed)
if state.size == 0:
state = None
return a, v, state, neglogp
def value(self, ob, *args, **kwargs):
"""
Compute value estimate(s) given the observation(s)
Parameters:
----------
observation: observation data (either single or a batch)
**extra_feed: additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__)
Returns:
-------
value estimate
"""
return self._evaluate(self.vf, ob, *args, **kwargs)
def build_lstm_policy(model_config, value_network=None, estimate_q=False, **policy_kwargs):
"""
build lstm policy and value network, they share the same lstm network.
the parameters all use their default values.
"""
policy_network = lstm_model(**policy_kwargs)
def policy_fn(nbatch=None, nsteps=None, sess=None, observ_placeholder=None, np_mask=None, is_act_model=False):
ob_space = model_config.observation_space
X = observ_placeholder if observ_placeholder is not None else observation_placeholder(ob_space, batch_size=nbatch)
extra_tensors = {}
# encode_observation is not necessary anymore as we use embedding_lookup
encoded_x = X
with tf.variable_scope('pi', reuse=tf.AUTO_REUSE):
policy_latent = policy_network(encoded_x, 1, model_config.observation_space.n)
if isinstance(policy_latent, tuple):
policy_latent, recurrent_tensors = policy_latent
if recurrent_tensors is not None:
# recurrent architecture, need a few more steps
nenv = nbatch // nsteps
assert nenv > 0, 'Bad input for recurrent policy: batch size {} smaller than nsteps {}'.format(nbatch, nsteps)
policy_latent, recurrent_tensors = policy_network(encoded_x, nenv, model_config.observation_space.n)
extra_tensors.update(recurrent_tensors)
_v_net = value_network
assert _v_net is None or _v_net == 'shared'
vf_latent = policy_latent
policy = PolicyWithValue(
env=model_config,
observations=X,
latent=policy_latent,
vf_latent=vf_latent,
sess=sess,
estimate_q=estimate_q,
np_mask=np_mask,
is_act_model=is_act_model,
**extra_tensors
)
return policy
return policy_fn
This diff is collapsed.
enum34
gym
tensorflow
\ No newline at end of file
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
util functions
"""
import os
import random
import multiprocessing
import numpy as np
import tensorflow as tf
from gym.spaces import Discrete, Box, MultiDiscrete
def set_global_seeds(i):
"""set global seeds"""
rank = 0
myseed = i + 1000 * rank if i is not None else None
tf.set_random_seed(myseed)
np.random.seed(myseed)
random.seed(myseed)
def batch_to_seq(h, nbatch, nsteps, flat=False):
"""convert from batch to sequence"""
if flat:
h = tf.reshape(h, [nbatch, nsteps])
else:
h = tf.reshape(h, [nbatch, nsteps, -1])
return [tf.squeeze(v, [1]) for v in tf.split(axis=1, num_or_size_splits=nsteps, value=h)]
def seq_to_batch(h, flat=False):
"""convert from sequence to batch"""
shape = h[0].get_shape().as_list()
if not flat:
assert len(shape) > 1
nh = h[0].get_shape()[-1].value
return tf.reshape(tf.concat(axis=1, values=h), [-1, nh])
else:
return tf.reshape(tf.stack(values=h, axis=1), [-1])
def lstm(xs, ms, s, scope, nh, init_scale=1.0):
"""lstm cell"""
nbatch, nin = [v.value for v in xs[0].get_shape()]
with tf.variable_scope(scope):
wx = tf.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale))
wh = tf.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale))
b = tf.get_variable("b", [nh*4], initializer=tf.constant_initializer(0.0))
c, h = tf.split(axis=1, num_or_size_splits=2, value=s)
for idx, (x, m) in enumerate(zip(xs, ms)):
c = c*(1-m)
h = h*(1-m)
z = tf.matmul(x, wx) + tf.matmul(h, wh) + b
i, f, o, u = tf.split(axis=1, num_or_size_splits=4, value=z)
i = tf.nn.sigmoid(i)
f = tf.nn.sigmoid(f)
o = tf.nn.sigmoid(o)
u = tf.tanh(u)
c = f*c + i*u
h = o*tf.tanh(c)
xs[idx] = h
s = tf.concat(axis=1, values=[c, h])
return xs, s
def lstm_model(nlstm=128, layer_norm=False):
"""
Builds LSTM (Long-Short Term Memory) network to be used in a policy.
Note that the resulting function returns not only the output of the LSTM
(i.e. hidden state of lstm for each step in the sequence), but also a dictionary
with auxiliary tensors to be set as policy attributes.
Specifically,
S is a placeholder to feed current state (LSTM state has to be managed outside policy)
M is a placeholder for the mask (used to mask out observations after the end of the episode, but can be used for other purposes too)
initial_state is a numpy array containing initial lstm state (usually zeros)
state is the output LSTM state (to be fed into S at the next call)
An example of usage of lstm-based policy can be found here: common/tests/test_doc_examples.py/test_lstm_example
Parameters:
----------
nlstm: int LSTM hidden state size
layer_norm: bool if True, layer-normalized version of LSTM is used
Returns:
-------
function that builds LSTM with a given input tensor / placeholder
"""
def network_fn(X, nenv=1, obs_size=-1):
with tf.variable_scope("emb", reuse=tf.AUTO_REUSE):
w_emb = tf.get_variable("w_emb", [obs_size+1, 32])
X = tf.nn.embedding_lookup(w_emb, X)
nbatch = X.shape[0]
nsteps = nbatch // nenv
h = tf.layers.flatten(X)
M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1)
S = tf.placeholder(tf.float32, [nenv, 2*nlstm]) #states
xs = batch_to_seq(h, nenv, nsteps)
ms = batch_to_seq(M, nenv, nsteps)
assert not layer_norm
h5, snew = lstm(xs, ms, S, scope='lstm', nh=nlstm)
h = seq_to_batch(h5)
initial_state = np.zeros(S.shape.as_list(), dtype=float)
return h, {'S':S, 'M':M, 'state':snew, 'initial_state':initial_state}
return network_fn
def ortho_init(scale=1.0):
"""init approach"""
def _ortho_init(shape, dtype, partition_info=None):
#lasagne ortho init for tf
shape = tuple(shape)
if len(shape) == 2:
flat_shape = shape
elif len(shape) == 4: # assumes NHWC
flat_shape = (np.prod(shape[:-1]), shape[-1])
else:
raise NotImplementedError
a = np.random.normal(0.0, 1.0, flat_shape)
u, _, v = np.linalg.svd(a, full_matrices=False)
q = u if u.shape == flat_shape else v # pick the one with the correct shape
q = q.reshape(shape)
return (scale * q[:shape[0], :shape[1]]).astype(np.float32)
return _ortho_init
def fc(x, scope, nh, *, init_scale=1.0, init_bias=0.0):
"""fully connected op"""
with tf.variable_scope(scope):
nin = x.get_shape()[1].value
w = tf.get_variable("w", [nin, nh], initializer=ortho_init(init_scale))
b = tf.get_variable("b", [nh], initializer=tf.constant_initializer(init_bias))
return tf.matmul(x, w)+b
def _check_shape(placeholder_shape, data_shape):
"""
check if two shapes are compatible (i.e. differ only by dimensions of size 1, or by the batch dimension)
"""
return True
# ================================================================
# Shape adjustment for feeding into tf placeholders
# ================================================================
def adjust_shape(placeholder, data):
"""
adjust shape of the data to the shape of the placeholder if possible.
If shape is incompatible, AssertionError is thrown
Parameters:
placeholder: tensorflow input placeholder
data: input data to be (potentially) reshaped to be fed into placeholder
Returns:
reshaped data
"""
if not isinstance(data, np.ndarray) and not isinstance(data, list):
return data
if isinstance(data, list):
data = np.array(data)
placeholder_shape = [x or -1 for x in placeholder.shape.as_list()]
assert _check_shape(placeholder_shape, data.shape), \
'Shape of data {} is not compatible with shape of the placeholder {}'.format(data.shape, placeholder_shape)
return np.reshape(data, placeholder_shape)
# ================================================================
# Global session
# ================================================================
def get_session(config=None):
"""Get default session or create one with a given config"""
sess = tf.get_default_session()
if sess is None:
sess = make_session(config=config, make_default=True)
return sess
def make_session(config=None, num_cpu=None, make_default=False, graph=None):
"""Returns a session that will use <num_cpu> CPU's only"""
if num_cpu is None:
num_cpu = int(os.getenv('RCALL_NUM_CPU', multiprocessing.cpu_count()))
if config is None:
config = tf.ConfigProto(
allow_soft_placement=True,
inter_op_parallelism_threads=num_cpu,
intra_op_parallelism_threads=num_cpu)
config.gpu_options.allow_growth = True
if make_default:
return tf.InteractiveSession(config=config, graph=graph)
else:
return tf.Session(config=config, graph=graph)
ALREADY_INITIALIZED = set()
def initialize():
"""Initialize all the uninitialized variables in the global scope."""
new_variables = set(tf.global_variables()) - ALREADY_INITIALIZED
get_session().run(tf.variables_initializer(new_variables))
ALREADY_INITIALIZED.update(new_variables)
def observation_placeholder(ob_space, batch_size=None, name='Ob'):
"""
Create placeholder to feed observations into of the size appropriate to the observation space
Parameters:
----------
ob_space: gym.Space observation space
batch_size: int size of the batch to be fed into input. Can be left None in most cases.
name: str name of the placeholder
Returns:
-------
tensorflow placeholder tensor
"""
assert isinstance(ob_space, (Discrete, Box, MultiDiscrete)), \
'Can only deal with Discrete and Box observation spaces for now'
dtype = ob_space.dtype
if dtype == np.int8:
dtype = np.uint8
return tf.placeholder(shape=(batch_size,) + ob_space.shape, dtype=dtype, name=name)
def explained_variance(ypred, y):
"""
Computes fraction of variance that ypred explains about y.
Returns 1 - Var[y-ypred] / Var[y]
interpretation:
ev=0 => might as well have predicted zero
ev=1 => perfect prediction
ev<0 => worse than just predicting zero
"""
assert y.ndim == 1 and ypred.ndim == 1
vary = np.var(y)
return np.nan if vary == 0 else 1 - np.var(y-ypred)/vary
......@@ -17,11 +17,10 @@
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import logging
import nni
from .recoverable import Recoverable
_logger = logging.getLogger(__name__)
......
......@@ -39,17 +39,18 @@ class AnnotationTestCase(TestCase):
shutil.rmtree('_generated')
def test_search_space_generator(self):
search_space = generate_search_space('testcase/annotated')
shutil.copytree('testcase/annotated', '_generated/annotated')
search_space = generate_search_space('_generated/annotated')
with open('testcase/searchspace.json') as f:
self.assertEqual(search_space, json.load(f))
def test_code_generator(self):
code_dir = expand_annotations('testcase/usercode', '_generated', nas_mode='classic_mode')
self.assertEqual(code_dir, '_generated')
self._assert_source_equal('testcase/annotated/nas.py', '_generated/nas.py')
self._assert_source_equal('testcase/annotated/mnist.py', '_generated/mnist.py')
self._assert_source_equal('testcase/annotated/dir/simple.py', '_generated/dir/simple.py')
with open('testcase/usercode/nonpy.txt') as src, open('_generated/nonpy.txt') as dst:
code_dir = expand_annotations('testcase/usercode', '_generated/usercode', nas_mode='classic_mode')
self.assertEqual(code_dir, '_generated/usercode')
self._assert_source_equal('testcase/annotated/nas.py', '_generated/usercode/nas.py')
self._assert_source_equal('testcase/annotated/mnist.py', '_generated/usercode/mnist.py')
self._assert_source_equal('testcase/annotated/dir/simple.py', '_generated/usercode/dir/simple.py')
with open('testcase/usercode/nonpy.txt') as src, open('_generated/usercode/nonpy.txt') as dst:
assert src.read() == dst.read()
def test_annotation_detecting(self):
......
......@@ -142,6 +142,24 @@ tuner_schema_dict = {
Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool),
Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
},
'PPOTuner': {
'builtinTunerName': 'PPOTuner',
'classArgs': {
'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'),
Optional('trials_per_update'): setNumberRange('trials_per_update', int, 0, 99999),
Optional('epochs_per_update'): setNumberRange('epochs_per_update', int, 0, 99999),
Optional('minibatch_size'): setNumberRange('minibatch_size', int, 0, 99999),
Optional('ent_coef'): setType('ent_coef', float),
Optional('lr'): setType('lr', float),
Optional('vf_coef'): setType('vf_coef', float),
Optional('max_grad_norm'): setType('max_grad_norm', float),
Optional('gamma'): setType('gamma', float),
Optional('lam'): setType('lam', float),
Optional('cliprange'): setType('cliprange', float),
},
Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool),
Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
},
'customized': {
'codeDir': setPathCheck('codeDir'),
'classFileName': setType('classFileName', str),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment