Merge dev-nas-tuner back to master (#1531)

* PPO tuner for NAS, supports NNI's NAS interface (#1380)

Merge dev-nas-tuner back to master (#1531)
* PPO tuner for NAS, supports NNI's NAS interface (#1380)
55f48d27 · QuanluZhang · GitHub · 7246593f · 55f48d27 · 55f48d27
Unverified Commit 55f48d27 authored Sep 16, 2019 by QuanluZhang Committed by GitHub Sep 16, 2019
20 changed files
--- a/examples/trials/nas_cifar10/src/cifar10_flags.py
+++ b/examples/trials/nas_cifar10/src/cifar10_flags.py
+import tensorflow as tf
+from src.utils import DEFINE_boolean
+from src.utils import DEFINE_float
+from src.utils import DEFINE_integer
+from src.utils import DEFINE_string
+flags = tf.app.flags
+FLAGS = flags.FLAGS
+DEFINE_boolean("reset_output_dir", False, "Delete output_dir if exists.")
+DEFINE_string("data_path", "", "")
+DEFINE_string("output_dir", "", "")
+DEFINE_string("data_format", "NHWC", "'NHWC' or 'NCWH'")
+DEFINE_string("search_for", None, "Must be [macro|micro]")
+DEFINE_integer("train_data_size", 45000, "")
+DEFINE_integer("batch_size", 32, "")
+DEFINE_integer("num_epochs", 300, "")
+DEFINE_integer("child_lr_dec_every", 100, "")
+DEFINE_integer("child_num_layers", 5, "")
+DEFINE_integer("child_num_cells", 5, "")
+DEFINE_integer("child_filter_size", 5, "")
+DEFINE_integer("child_out_filters", 48, "")
+DEFINE_integer("child_out_filters_scale", 1, "")
+DEFINE_integer("child_num_branches", 4, "")
+DEFINE_integer("child_num_aggregate", None, "")
+DEFINE_integer("child_num_replicas", 1, "")
+DEFINE_integer("child_block_size", 3, "")
+DEFINE_integer("child_lr_T_0", None, "for lr schedule")
+DEFINE_integer("child_lr_T_mul", None, "for lr schedule")
+DEFINE_integer("child_cutout_size", None, "CutOut size")
+DEFINE_float("child_grad_bound", 5.0, "Gradient clipping")
+DEFINE_float("child_lr", 0.1, "")
+DEFINE_float("child_lr_dec_rate", 0.1, "")
+DEFINE_float("child_keep_prob", 0.5, "")
+DEFINE_float("child_drop_path_keep_prob", 1.0, "minimum drop_path_keep_prob")
+DEFINE_float("child_l2_reg", 1e-4, "")
+DEFINE_float("child_lr_max", None, "for lr schedule")
+DEFINE_float("child_lr_min", None, "for lr schedule")
+DEFINE_string("child_skip_pattern", None, "Must be ['dense', None]")
+DEFINE_string("child_fixed_arc", None, "")
+DEFINE_boolean("child_use_aux_heads", False, "Should we use an aux head")
+DEFINE_boolean("child_sync_replicas", False, "To sync or not to sync.")
+DEFINE_boolean("child_lr_cosine", False, "Use cosine lr schedule")
+DEFINE_integer("log_every", 50, "How many steps to log")
+DEFINE_integer("eval_every_epochs", 1, "How many epochs to eval")
--- a/examples/trials/nas_cifar10/src/common_ops.py
+++ b/examples/trials/nas_cifar10/src/common_ops.py
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.training import moving_averages
+def lstm(x, prev_c, prev_h, w):
+    ifog = tf.matmul(tf.concat([x, prev_h], axis=1), w)
+    i, f, o, g = tf.split(ifog, 4, axis=1)
+    i = tf.sigmoid(i)
+    f = tf.sigmoid(f)
+    o = tf.sigmoid(o)
+    g = tf.tanh(g)
+    next_c = i * g + f * prev_c
+    next_h = o * tf.tanh(next_c)
+    return next_c, next_h
+def stack_lstm(x, prev_c, prev_h, w):
+    next_c, next_h = [], []
+    for layer_id, (_c, _h, _w) in enumerate(zip(prev_c, prev_h, w)):
+        inputs = x if layer_id == 0 else next_h[-1]
+        curr_c, curr_h = lstm(inputs, _c, _h, _w)
+        next_c.append(curr_c)
+        next_h.append(curr_h)
+    return next_c, next_h
+def create_weight(name, shape, initializer=None, trainable=True, seed=None):
+    if initializer is None:
+        initializer = tf.contrib.keras.initializers.he_normal(seed=seed)
+    return tf.get_variable(name, shape, initializer=initializer, trainable=trainable)
+def create_bias(name, shape, initializer=None):
+    if initializer is None:
+        initializer = tf.constant_initializer(0.0, dtype=tf.float32)
+    return tf.get_variable(name, shape, initializer=initializer)
+def conv_op(inputs, filter_size, is_training, count, out_filters,
+                     data_format, ch_mul=1, start_idx=None, separable=False):
+    """
+    Args:
+        start_idx: where to start taking the output channels. if None, assuming
+            fixed_arc mode
+        count: how many output_channels to take.
+    """
+    if data_format == "NHWC":
+        inp_c = inputs.get_shape()[3].value
+    elif data_format == "NCHW":
+        inp_c = inputs.get_shape()[1].value
+    with tf.variable_scope("inp_conv_1"):
+        w = create_weight("w", [1, 1, inp_c, out_filters])
+        x = tf.nn.conv2d(inputs, w, [1, 1, 1, 1],
+                            "SAME", data_format=data_format)
+        x = batch_norm(x, is_training, data_format=data_format)
+        x = tf.nn.relu(x)
+    with tf.variable_scope("out_conv_{}".format(filter_size)):
+        if start_idx is None:
+            if separable:
+                w_depth = create_weight(
+                    "w_depth", [filter_size, filter_size, out_filters, ch_mul])
+                w_point = create_weight(
+                    "w_point", [1, 1, out_filters * ch_mul, count])
+                x = tf.nn.separable_conv2d(x, w_depth, w_point, strides=[1, 1, 1, 1],
+                                            padding="SAME", data_format=data_format)
+                x = batch_norm(
+                    x, is_training, data_format=data_format)
+            else:
+                w = create_weight(
+                    "w", [filter_size, filter_size, inp_c, count])
+                x = tf.nn.conv2d(
+                    x, w, [1, 1, 1, 1], "SAME", data_format=data_format)
+                x = batch_norm(
+                    x, is_training, data_format=data_format)
+        else:
+            if separable:
+                w_depth = create_weight(
+                    "w_depth", [filter_size, filter_size, out_filters, ch_mul])
+                #test_depth = w_depth
+                w_point = create_weight(
+                    "w_point", [out_filters, out_filters * ch_mul])
+                w_point = w_point[start_idx:start_idx+count, :]
+                w_point = tf.transpose(w_point, [1, 0])
+                w_point = tf.reshape(
+                    w_point, [1, 1, out_filters * ch_mul, count])
+                x = tf.nn.separable_conv2d(x, w_depth, w_point, strides=[1, 1, 1, 1],
+                                            padding="SAME", data_format=data_format)
+                mask = tf.range(0, out_filters, dtype=tf.int32)
+                mask = tf.logical_and(
+                    start_idx <= mask, mask < start_idx + count)
+                x = batch_norm_with_mask(
+                    x, is_training, mask, out_filters, data_format=data_format)
+            else:
+                w = create_weight(
+                    "w", [filter_size, filter_size, out_filters, out_filters])
+                w = tf.transpose(w, [3, 0, 1, 2])
+                w = w[start_idx:start_idx+count, :, :, :]
+                w = tf.transpose(w, [1, 2, 3, 0])
+                x = tf.nn.conv2d(
+                    x, w, [1, 1, 1, 1], "SAME", data_format=data_format)
+                mask = tf.range(0, out_filters, dtype=tf.int32)
+                mask = tf.logical_and(
+                    start_idx <= mask, mask < start_idx + count)
+                x = batch_norm_with_mask(
+                    x, is_training, mask, out_filters, data_format=data_format)
+        x = tf.nn.relu(x)
+    return x
+def pool_op(inputs, is_training, count, out_filters, avg_or_max, data_format, start_idx=None):
+    """
+    Args:
+        start_idx: where to start taking the output channels. if None, assuming
+            fixed_arc mode
+        count: how many output_channels to take.
+    """
+    if data_format == "NHWC":
+        inp_c = inputs.get_shape()[3].value
+    elif data_format == "NCHW":
+        inp_c = inputs.get_shape()[1].value
+    with tf.variable_scope("conv_1"):
+        w = create_weight("w", [1, 1, inp_c, out_filters])
+        x = tf.nn.conv2d(inputs, w, [1, 1, 1, 1],
+                            "SAME", data_format=data_format)
+        x = batch_norm(x, is_training, data_format=data_format)
+        x = tf.nn.relu(x)
+    with tf.variable_scope("pool"):
+        if data_format == "NHWC":
+            actual_data_format = "channels_last"
+        elif data_format == "NCHW":
+            actual_data_format = "channels_first"
+        if avg_or_max == "avg":
+            x = tf.layers.average_pooling2d(
+                x, [3, 3], [1, 1], "SAME", data_format=actual_data_format)
+        elif avg_or_max == "max":
+            x = tf.layers.max_pooling2d(
+                x, [3, 3], [1, 1], "SAME", data_format=actual_data_format)
+        else:
+            raise ValueError("Unknown pool {}".format(avg_or_max))
+        if start_idx is not None:
+            if data_format == "NHWC":
+                x = x[:, :, :, start_idx: start_idx+count]
+            elif data_format == "NCHW":
+                x = x[:, start_idx: start_idx+count, :, :]
+    return x
+def global_avg_pool(x, data_format="NHWC"):
+    if data_format == "NHWC":
+        x = tf.reduce_mean(x, [1, 2])
+    elif data_format == "NCHW":
+        x = tf.reduce_mean(x, [2, 3])
+    else:
+        raise NotImplementedError("Unknown data_format {}".format(data_format))
+    return x
+def batch_norm(x, is_training, name="bn", decay=0.9, epsilon=1e-5,
+               data_format="NHWC"):
+    if data_format == "NHWC":
+        shape = [x.get_shape()[3]]
+    elif data_format == "NCHW":
+        shape = [x.get_shape()[1]]
+    else:
+        raise NotImplementedError("Unknown data_format {}".format(data_format))
+    with tf.variable_scope(name, reuse=None if is_training else True):
+        offset = tf.get_variable(
+            "offset", shape,
+            initializer=tf.constant_initializer(0.0, dtype=tf.float32))
+        scale = tf.get_variable(
+            "scale", shape,
+            initializer=tf.constant_initializer(1.0, dtype=tf.float32))
+        moving_mean = tf.get_variable(
+            "moving_mean", shape, trainable=False,
+            initializer=tf.constant_initializer(0.0, dtype=tf.float32))
+        moving_variance = tf.get_variable(
+            "moving_variance", shape, trainable=False,
+            initializer=tf.constant_initializer(1.0, dtype=tf.float32))
+        if is_training:
+            x, mean, variance = tf.nn.fused_batch_norm(
+                x, scale, offset, epsilon=epsilon, data_format=data_format,
+                is_training=True)
+            update_mean = moving_averages.assign_moving_average(
+                moving_mean, mean, decay)
+            update_variance = moving_averages.assign_moving_average(
+                moving_variance, variance, decay)
+            with tf.control_dependencies([update_mean, update_variance]):
+                x = tf.identity(x)
+        else:
+            x, _, _ = tf.nn.fused_batch_norm(x, scale, offset, mean=moving_mean,
+                                             variance=moving_variance,
+                                             epsilon=epsilon, data_format=data_format,
+                                             is_training=False)
+    return x
+def batch_norm_with_mask(x, is_training, mask, num_channels, name="bn",
+                         decay=0.9, epsilon=1e-3, data_format="NHWC"):
+    shape = [num_channels]
+    indices = tf.where(mask)
+    indices = tf.to_int32(indices)
+    indices = tf.reshape(indices, [-1])
+    with tf.variable_scope(name, reuse=None if is_training else True):
+        offset = tf.get_variable(
+            "offset", shape,
+            initializer=tf.constant_initializer(0.0, dtype=tf.float32))
+        scale = tf.get_variable(
+            "scale", shape,
+            initializer=tf.constant_initializer(1.0, dtype=tf.float32))
+        offset = tf.boolean_mask(offset, mask)
+        scale = tf.boolean_mask(scale, mask)
+        moving_mean = tf.get_variable(
+            "moving_mean", shape, trainable=False,
+            initializer=tf.constant_initializer(0.0, dtype=tf.float32))
+        moving_variance = tf.get_variable(
+            "moving_variance", shape, trainable=False,
+            initializer=tf.constant_initializer(1.0, dtype=tf.float32))
+        if is_training:
+            x, mean, variance = tf.nn.fused_batch_norm(
+                x, scale, offset, epsilon=epsilon, data_format=data_format,
+                is_training=True)
+            mean = (1.0 - decay) * (tf.boolean_mask(moving_mean, mask) - mean)
+            variance = (1.0 - decay) * \
+                (tf.boolean_mask(moving_variance, mask) - variance)
+            update_mean = tf.scatter_sub(
+                moving_mean, indices, mean, use_locking=True)
+            update_variance = tf.scatter_sub(
+                moving_variance, indices, variance, use_locking=True)
+            with tf.control_dependencies([update_mean, update_variance]):
+                x = tf.identity(x)
+        else:
+            masked_moving_mean = tf.boolean_mask(moving_mean, mask)
+            masked_moving_variance = tf.boolean_mask(moving_variance, mask)
+            x, _, _ = tf.nn.fused_batch_norm(x, scale, offset,
+                                             mean=masked_moving_mean,
+                                             variance=masked_moving_variance,
+                                             epsilon=epsilon, data_format=data_format,
+                                             is_training=False)
+    return x
--- a/examples/trials/nas_cifar10/src/utils.py
+++ b/examples/trials/nas_cifar10/src/utils.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import sys
+import numpy as np
+import tensorflow as tf
+user_flags = []
+def DEFINE_string(name, default_value, doc_string):
+    tf.app.flags.DEFINE_string(name, default_value, doc_string)
+    global user_flags
+    user_flags.append(name)
+def DEFINE_integer(name, default_value, doc_string):
+    tf.app.flags.DEFINE_integer(name, default_value, doc_string)
+    global user_flags
+    user_flags.append(name)
+def DEFINE_float(name, default_value, doc_string):
+    tf.app.flags.DEFINE_float(name, default_value, doc_string)
+    global user_flags
+    user_flags.append(name)
+def DEFINE_boolean(name, default_value, doc_string):
+    tf.app.flags.DEFINE_boolean(name, default_value, doc_string)
+    global user_flags
+    user_flags.append(name)
+def print_user_flags(line_limit=80):
+    print("-" * 80)
+    global user_flags
+    FLAGS = tf.app.flags.FLAGS
+    for flag_name in sorted(user_flags):
+        value = "{}".format(getattr(FLAGS, flag_name))
+        log_string = flag_name
+        log_string += "." * (line_limit - len(flag_name) - len(value))
+        log_string += value
+        print(log_string)
+def get_C(x, data_format):
+    """
+    Args:
+        x: tensor of shape [N, H, W, C] or [N, C, H, W]
+    """
+    if data_format == "NHWC":
+        return x.get_shape()[3].value
+    elif data_format == "NCHW":
+        return x.get_shape()[1].value
+    else:
+        raise ValueError(
+            "Unknown data_format '{0}'".format(data_format))
+def get_HW(x, data_format):
+    """
+    Args:
+        x: tensor of shape [N, H, W, C] or [N, C, H, W]
+    """
+    return x.get_shape()[2].value
+def get_strides(stride, data_format):
+    """
+    Args:
+        x: tensor of shape [N, H, W, C] or [N, C, H, W]
+    """
+    if data_format == "NHWC":
+        return [1, stride, stride, 1]
+    elif data_format == "NCHW":
+        return [1, 1, stride, stride]
+    else:
+        raise ValueError(
+            "Unknown data_format '{0}'".format(data_format))
+class TextColors:
+    HEADER = '\033[95m'
+    OKBLUE = '\033[94m'
+    OKGREEN = '\033[92m'
+    WARNING = '\033[93m'
+    FAIL = '\033[91m'
+    ENDC = '\033[0m'
+    BOLD = '\033[1m'
+    UNDERLINE = '\033[4m'
+class Logger(object):
+    def __init__(self, output_file):
+        self.terminal = sys.stdout
+        self.log = open(output_file, "a")
+    def write(self, message):
+        self.terminal.write(message)
+        self.terminal.flush()
+        self.log.write(message)
+        self.log.flush()
+def count_model_params(tf_variables):
+    """
+    Args:
+        tf_variables: list of all model variables
+    """
+    num_vars = 0
+    for var in tf_variables:
+        num_vars += np.prod([dim.value for dim in var.get_shape()])
+    return num_vars
+def get_train_ops(
+        loss,
+        tf_variables,
+        train_step,
+        clip_mode=None,
+        grad_bound=None,
+        l2_reg=1e-4,
+        lr_warmup_val=None,
+        lr_warmup_steps=100,
+        lr_init=0.1,
+        lr_dec_start=0,
+        lr_dec_every=10000,
+        lr_dec_rate=0.1,
+        lr_dec_min=None,
+        lr_cosine=False,
+        lr_max=None,
+        lr_min=None,
+        lr_T_0=None,
+        lr_T_mul=None,
+        num_train_batches=None,
+        optim_algo=None,
+        sync_replicas=False,
+        num_aggregate=None,
+        num_replicas=None,
+        get_grad_norms=False,
+        moving_average=None):
+    """
+    Args:
+        clip_mode: "global", "norm", or None.
+        moving_average: store the moving average of parameters
+    """
+    if l2_reg > 0:
+        l2_losses = []
+        for var in tf_variables:
+            l2_losses.append(tf.reduce_sum(var ** 2))
+        l2_loss = tf.add_n(l2_losses)
+        loss += l2_reg * l2_loss
+    grads = tf.gradients(loss, tf_variables)
+    grad_norm = tf.global_norm(grads)
+    grad_norms = {}
+    for v, g in zip(tf_variables, grads):
+        if v is None or g is None:
+            continue
+        if isinstance(g, tf.IndexedSlices):
+            grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g.values ** 2))
+        else:
+            grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g ** 2))
+    if clip_mode is not None:
+        assert grad_bound is not None, "Need grad_bound to clip gradients."
+        if clip_mode == "global":
+            grads, _ = tf.clip_by_global_norm(grads, grad_bound)
+        elif clip_mode == "norm":
+            clipped = []
+            for g in grads:
+                if isinstance(g, tf.IndexedSlices):
+                    c_g = tf.clip_by_norm(g.values, grad_bound)
+                    c_g = tf.IndexedSlices(g.indices, c_g)
+                else:
+                    c_g = tf.clip_by_norm(g, grad_bound)
+                clipped.append(g)
+            grads = clipped
+        else:
+            raise NotImplementedError("Unknown clip_mode {}".format(clip_mode))
+    if lr_cosine:
+        assert lr_max is not None, "Need lr_max to use lr_cosine"
+        assert lr_min is not None, "Need lr_min to use lr_cosine"
+        assert lr_T_0 is not None, "Need lr_T_0 to use lr_cosine"
+        assert lr_T_mul is not None, "Need lr_T_mul to use lr_cosine"
+        assert num_train_batches is not None, ("Need num_train_batches to use"
+                                               " lr_cosine")
+        curr_epoch = train_step // num_train_batches
+        last_reset = tf.Variable(0, dtype=tf.int32, trainable=False,
+                                 name="last_reset")
+        T_i = tf.Variable(lr_T_0, dtype=tf.int32, trainable=False, name="T_i")
+        T_curr = curr_epoch - last_reset
+        def _update():
+            update_last_reset = tf.assign(
+                last_reset, curr_epoch, use_locking=True)
+            update_T_i = tf.assign(T_i, T_i * lr_T_mul, use_locking=True)
+            with tf.control_dependencies([update_last_reset, update_T_i]):
+                rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926
+                lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate))
+            return lr
+        def _no_update():
+            rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926
+            lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate))
+            return lr
+        learning_rate = tf.cond(
+            tf.greater_equal(T_curr, T_i), _update, _no_update)
+    else:
+        learning_rate = tf.train.exponential_decay(
+            lr_init, tf.maximum(train_step - lr_dec_start, 0), lr_dec_every,
+            lr_dec_rate, staircase=True)
+        if lr_dec_min is not None:
+            learning_rate = tf.maximum(learning_rate, lr_dec_min)
+    if lr_warmup_val is not None:
+        learning_rate = tf.cond(tf.less(train_step, lr_warmup_steps),
+                                lambda: lr_warmup_val, lambda: learning_rate)
+    if optim_algo == "momentum":
+        opt = tf.train.MomentumOptimizer(
+            learning_rate, 0.9, use_locking=True, use_nesterov=True)
+    elif optim_algo == "sgd":
+        opt = tf.train.GradientDescentOptimizer(
+            learning_rate, use_locking=True)
+    elif optim_algo == "adam":
+        opt = tf.train.AdamOptimizer(learning_rate, beta1=0.0, epsilon=1e-3,
+                                     use_locking=True)
+    else:
+        raise ValueError("Unknown optim_algo {}".format(optim_algo))
+    if sync_replicas:
+        assert num_aggregate is not None, "Need num_aggregate to sync."
+        assert num_replicas is not None, "Need num_replicas to sync."
+        opt = tf.train.SyncReplicasOptimizer(
+            opt,
+            replicas_to_aggregate=num_aggregate,
+            total_num_replicas=num_replicas,
+            use_locking=True)
+    if moving_average is not None:
+        opt = tf.contrib.opt.MovingAverageOptimizer(
+            opt, average_decay=moving_average)
+    train_op = opt.apply_gradients(
+        zip(grads, tf_variables), global_step=train_step)
+    if get_grad_norms:
+        return train_op, learning_rate, grad_norm, opt, grad_norms
+    else:
+        return train_op, learning_rate, grad_norm, opt
--- a/examples/tuners/random_nas_tuner/random_nas_tuner.py
+++ b/examples/tuners/random_nas_tuner/random_nas_tuner.py
@@ -2,13 +2,14 @@ import numpy as np
 from nni.tuner import Tuner
 def random_archi_generator(nas_ss, random_state):
    '''random
    '''
    chosen_archi = {}
-    print("zql: nas search space: ", nas_ss)
    for block_name, block_value in nas_ss.items():
-        assert block_value['_type'] == "mutable_layer", "Random NAS Tuner only receives NAS search space whose _type is 'mutable_layer'"
+        assert block_value['_type'] == "mutable_layer", \
+            "Random NAS Tuner only receives NAS search space whose _type is 'mutable_layer'"
        block = block_value['_value']
        tmp_block = {}
        for layer_name, layer in block.items():
@@ -19,13 +20,12 @@ def random_archi_generator(nas_ss, random_state):
                    tmp_layer['chosen_layer'] = value[index]
                elif key == 'optional_inputs':
                    tmp_layer['chosen_inputs'] = []
-                    print("zql: optional_inputs", layer['optional_inputs'])
                    if layer['optional_inputs']:
                        if isinstance(layer['optional_input_size'], int):
                            choice_num = layer['optional_input_size']
                        else:
                            choice_range = layer['optional_input_size']
-                            choice_num = random_state.randint(choice_range[0], choice_range[1]+1)
+                            choice_num = random_state.randint(choice_range[0], choice_range[1] + 1)
                        for _ in range(choice_num):
                            index = random_state.randint(len(layer['optional_inputs']))
                            tmp_layer['chosen_inputs'].append(layer['optional_inputs'][index])
@@ -37,6 +37,7 @@ def random_archi_generator(nas_ss, random_state):
        chosen_archi[block_name] = tmp_block
    return chosen_archi
 class RandomNASTuner(Tuner):
    '''RandomNASTuner
    '''

--- a/src/nni_manager/rest_server/restValidationSchemas.ts
+++ b/src/nni_manager/rest_server/restValidationSchemas.ts
@@ -174,7 +174,7 @@ export namespace ValidationSchemas {
                checkpointDir: joi.string().allow('')
            }),
            tuner: joi.object({
-                builtinTunerName: joi.string().valid('TPE', 'Random', 'Anneal', 'Evolution', 'SMAC', 'BatchTuner', 'GridSearch', 'NetworkMorphism', 'MetisTuner', 'GPTuner'),
+                builtinTunerName: joi.string().valid('TPE', 'Random', 'Anneal', 'Evolution', 'SMAC', 'BatchTuner', 'GridSearch', 'NetworkMorphism', 'MetisTuner', 'GPTuner', 'PPOTuner'),
                codeDir: joi.string(),
                classFileName: joi.string(),
                className: joi.string(),

--- a/src/sdk/pynni/nni/constants.py
+++ b/src/sdk/pynni/nni/constants.py
@@ -30,7 +30,8 @@ ModuleName = {
    'NetworkMorphism': 'nni.networkmorphism_tuner.networkmorphism_tuner',
    'Curvefitting': 'nni.curvefitting_assessor.curvefitting_assessor',
    'MetisTuner': 'nni.metis_tuner.metis_tuner',
-    'GPTuner': 'nni.gp_tuner.gp_tuner'
+    'GPTuner': 'nni.gp_tuner.gp_tuner',
+    'PPOTuner': 'nni.ppo_tuner.ppo_tuner'
 }
 ClassName = {
@@ -44,6 +45,7 @@ ClassName = {
    'NetworkMorphism':'NetworkMorphismTuner',
    'MetisTuner':'MetisTuner',
    'GPTuner':'GPTuner',
+    'PPOTuner': 'PPOTuner',
    'Medianstop': 'MedianstopAssessor',
    'Curvefitting': 'CurvefittingAssessor'

--- a/src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py
+++ b/src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py
@@ -27,6 +27,7 @@ import logging
 import hyperopt as hp
 import numpy as np
 from nni.tuner import Tuner
+from nni.nas_utils import rewrite_nas_space
 from nni.utils import NodeType, OptimizeMode, extract_scalar_reward, split_index
 logger = logging.getLogger('hyperopt_AutoML')
@@ -240,6 +241,7 @@ class HyperoptTuner(Tuner):
            return hp.anneal.suggest
        raise RuntimeError('Not support tuner algorithm in hyperopt.')
+    @rewrite_nas_space
    def update_search_space(self, search_space):
        """
        Update search space definition in tuner by search_space in parameters.

--- a/src/sdk/pynni/nni/msg_dispatcher.py
+++ b/src/sdk/pynni/nni/msg_dispatcher.py
@@ -101,11 +101,16 @@ class MsgDispatcher(MsgDispatcherBase):
        self.tuner.update_search_space(data)
        send(CommandType.Initialized, '')
+    def send_trial_callback(self, id, params):
+        """For tuner to issue trial config when the config is generated
+        """
+        send(CommandType.NewTrialJob, _pack_parameter(id, params))
    def handle_request_trial_jobs(self, data):
        # data: number or trial jobs
        ids = [_create_parameter_id() for _ in range(data)]
        _logger.debug("requesting for generating params of {}".format(ids))
-        params_list = self.tuner.generate_multiple_parameters(ids)
+        params_list = self.tuner.generate_multiple_parameters(ids, st_callback=self.send_trial_callback)
        for i, _ in enumerate(params_list):
            send(CommandType.NewTrialJob, _pack_parameter(ids[i], params_list[i]))

--- a/src/sdk/pynni/nni/nas_utils.py
+++ b/src/sdk/pynni/nni/nas_utils.py
@@ -17,10 +17,16 @@
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
 # OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 # ==================================================================================================
+import functools
+import logging
 from . import trial
+_logger = logging.getLogger(__name__)
+_MUTABLE_LAYER_SPACE_PREFIX = "_mutable_layer"
 def classic_mode(
        mutable_id,
        mutable_layer_id,
@@ -34,13 +40,11 @@ def classic_mode(
    without touching the full model graph.'''
    if trial.get_current_parameter() is None:
        trial.get_next_parameter()
-    mutable_block = trial.get_current_parameter(mutable_id)
-    chosen_layer = mutable_block[mutable_layer_id]["chosen_layer"]
+    chosen_layer, chosen_inputs = _get_layer_and_inputs_from_tuner(mutable_id, mutable_layer_id,
-    chosen_inputs = mutable_block[mutable_layer_id]["chosen_inputs"]
+                                                                   list(optional_inputs.keys()))
-    real_chosen_inputs = [optional_inputs[input_name]
+    real_chosen_inputs = [optional_inputs[input_name] for input_name in chosen_inputs]
-                          for input_name in chosen_inputs]
+    layer_out = funcs[chosen_layer]([fixed_inputs, real_chosen_inputs], **funcs_args[chosen_layer])
-    layer_out = funcs[chosen_layer](
-        [fixed_inputs, real_chosen_inputs], **funcs_args[chosen_layer])
    return layer_out
@@ -173,20 +177,44 @@ def reload_tensorflow_variables(tf, session):
    tf: tensorflow module
    '''
    subgraph_from_tuner = trial.get_next_parameter()
-    for mutable_id, mutable_block in subgraph_from_tuner.items():
+    mutable_layers = set()
+    for subgraph_key in subgraph_from_tuner:
+        if "/" in subgraph_key:
+            # has to remove the last, could be layer_choice or whatever
+            mutable_id, mutable_layer_id = _decompose_general_key(subgraph_key[:subgraph_key.rfind("/")])
+            if mutable_id is not None:
+                mutable_layers.add((mutable_id, mutable_layer_id))
+    mutable_layers = sorted(list(mutable_layers))
+    for mutable_id, mutable_layer_id in mutable_layers:
        if mutable_id not in name_space:
+            _logger.warning("{} not found in name space".format(mutable_id))
            continue
-        for mutable_layer_id, mutable_layer in mutable_block.items():
+        name_prefix = "{}_{}".format(mutable_id, mutable_layer_id)
-            name_prefix = "{}_{}".format(mutable_id, mutable_layer_id)
+        # get optional inputs names
-            # extract layer information from the subgraph sampled by tuner
+        optional_inputs = name_space[name_prefix]['optional_inputs']
-            chosen_layer = name_space[name_prefix]['funcs'].index(
+        # extract layer information from the subgraph sampled by tuner
-                mutable_layer["chosen_layer"])
+        chosen_layer, chosen_inputs = _get_layer_and_inputs_from_tuner(mutable_id, mutable_layer_id, optional_inputs)
-            chosen_inputs = [1 if inp in mutable_layer["chosen_inputs"]
+        chosen_layer = name_space[name_prefix]['funcs'].index(chosen_layer)
-                             else 0 for inp in name_space[name_prefix]['optional_inputs']]
+        chosen_inputs = [1 if inp in chosen_inputs else 0 for inp in optional_inputs]
-            # load these information into pre-defined tensorflow variables
+        # load these information into pre-defined tensorflow variables
-            tf_variables[name_prefix]['funcs'].load(chosen_layer, session)
+        tf_variables[name_prefix]['funcs'].load(chosen_layer, session)
-            tf_variables[name_prefix]['optional_inputs'].load(
+        tf_variables[name_prefix]['optional_inputs'].load(
-                chosen_inputs, session)
+            chosen_inputs, session)
+def _construct_general_key(mutable_id, mutable_layer_id):
+    # Mutable layer key in a general (search space) format
+    # that is, prefix/mutable_id/mutable_layer_id
+    return _MUTABLE_LAYER_SPACE_PREFIX + "/" + mutable_id + "/" + mutable_layer_id
+def _decompose_general_key(key):
+    # inverse operation of above
+    if not key.startswith(_MUTABLE_LAYER_SPACE_PREFIX):
+        return None, None
+    else:
+        _, mutable_id, mutable_layer_id = key.split("/", maxsplit=2)
+        return mutable_id, mutable_layer_id
 def darts_training(tf, session, loss, feed_dict):
@@ -205,4 +233,107 @@ def training_update(nas_mode, tf=None, session=None, loss=None, feed_dict=None):
    if nas_mode == 'darts_mode':
        darts_training(tf, session, loss, feed_dict)
    elif nas_mode == 'enas_mode':
        reload_tensorflow_variables(tf, session)
\ No newline at end of file
+def _get_layer_and_inputs_from_tuner(mutable_id, mutable_layer_id, optional_inputs):
+    # optional_inputs should be name(key)s of the optional inputs
+    try:
+        mutable_block = trial.get_current_parameter(mutable_id)
+        # There is a NAS tuner
+        chosen_layer = mutable_block[mutable_layer_id]["chosen_layer"]
+        chosen_inputs = mutable_block[mutable_layer_id]["chosen_inputs"]
+    except KeyError:
+        # Try to find converted NAS parameters
+        params = trial.get_current_parameter()
+        expected_prefix = _construct_general_key(mutable_id, mutable_layer_id)
+        chosen_layer = params[expected_prefix + "/layer_choice"]
+        # find how many to choose
+        optional_input_size = int(params[expected_prefix + "/optional_input_size"])  # convert uniform to randint
+        # find who to choose, can duplicate
+        optional_input_state = params[expected_prefix + "/optional_input_chosen_state"]
+        chosen_inputs = []
+        # make sure dict -> list produce stable result by sorting
+        optional_inputs_keys = sorted(optional_inputs)
+        for i in range(optional_input_size):
+            chosen_inputs.append(optional_inputs_keys[optional_input_state % len(optional_inputs)])
+            optional_input_state //= len(optional_inputs)
+    _logger.info("%s_%s: layer: %s, optional inputs: %s" % (mutable_id, mutable_layer_id,
+                                                            chosen_layer, chosen_inputs))
+    return chosen_layer, chosen_inputs
+def convert_nas_search_space(search_space):
+    """
+    Args:
+        param search_space: raw search space
+        return: the new search space, mutable_layers will be converted into choice
+    """
+    ret = dict()
+    for k, v in search_space.items():
+        if "_type" not in v:
+            # this should not happen
+            _logger.warning("There is no _type in one of your search space values with key '%s'"
+                            ". Please check your search space" % k)
+            ret[k] = v
+        elif v["_type"] != "mutable_layer":
+            ret[k] = v
+        else:
+            _logger.info("Converting mutable_layer search space with key '%s'" % k)
+            # v["_value"] looks like {'mutable_layer_1': {'layer_choice': ...} ...}
+            values = v["_value"]
+            for layer_name, layer_data in values.items():
+                # there should be at most layer_choice, optional_inputs, optional_input_size in layer_data
+                # add "_mutable_layer" as prefix so that they can be recovered later
+                layer_key = _construct_general_key(k, layer_name)
+                if layer_data.get("layer_choice"):  # filter out empty choice and no choice
+                    layer_choice = layer_data["layer_choice"]
+                else:
+                    raise ValueError("No layer choice found in %s" % layer_key)
+                if layer_data.get("optional_input_size"):
+                    input_size = layer_data["optional_input_size"]
+                    if isinstance(input_size, int):
+                        input_size = [input_size, input_size]
+                    if input_size[0] > input_size[1] or input_size[0] < 0:
+                        _logger.error("Might not be able to handle optional_input_size < 0, please double check")
+                    input_size[1] += 1
+                else:
+                    _logger.info("Optional input choices are set to empty by default in %s" % layer_key)
+                    input_size = [0, 1]
+                if layer_data.get("optional_inputs"):
+                    total_state_size = len(layer_data["optional_inputs"]) ** (input_size[1] - 1)
+                else:
+                    _logger.info("Optional inputs not found in %s" % layer_key)
+                    total_state_size = 1
+                converted = {
+                    layer_key + "/layer_choice": {
+                        "_type": "choice", "_value": layer_choice
+                    },
+                    layer_key + "/optional_input_size": {
+                        "_type": "randint", "_value": input_size
+                    },
+                    layer_key + "/optional_input_chosen_state": {
+                        "_type": "randint", "_value": [0, total_state_size]
+                    }
+                }
+                _logger.info(converted)
+                ret.update(converted)
+    return ret
+def rewrite_nas_space(func):
+    @functools.wraps(func)
+    def wrap(self, search_space):
+        search_space = convert_nas_search_space(search_space)
+        return func(self, search_space)
+    return wrap
--- a/src/sdk/pynni/nni/ppo_tuner/__init__.py
+++ b/src/sdk/pynni/nni/ppo_tuner/__init__.py
--- a/src/sdk/pynni/nni/ppo_tuner/distri.py
+++ b/src/sdk/pynni/nni/ppo_tuner/distri.py
+# Copyright (c) Microsoft Corporation
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge,
+# to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and
+# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+functions for sampling from hidden state
+"""
+import tensorflow as tf
+from .util import fc
+class Pd:
+    """
+    A particular probability distribution
+    """
+    def flatparam(self):
+        raise NotImplementedError
+    def mode(self):
+        raise NotImplementedError
+    def neglogp(self, x):
+        # Usually it's easier to define the negative logprob
+        raise NotImplementedError
+    def kl(self, other):
+        raise NotImplementedError
+    def entropy(self):
+        raise NotImplementedError
+    def sample(self):
+        raise NotImplementedError
+    def logp(self, x):
+        return - self.neglogp(x)
+    def get_shape(self):
+        return self.flatparam().shape
+    @property
+    def shape(self):
+        return self.get_shape()
+    def __getitem__(self, idx):
+        return self.__class__(self.flatparam()[idx])
+class PdType:
+    """
+    Parametrized family of probability distributions
+    """
+    def pdclass(self):
+        raise NotImplementedError
+    def pdfromflat(self, flat, mask, nsteps, size, is_act_model):
+        return self.pdclass()(flat, mask, nsteps, size, is_act_model)
+    def pdfromlatent(self, latent_vector, init_scale, init_bias):
+        raise NotImplementedError
+    def param_shape(self):
+        raise NotImplementedError
+    def sample_shape(self):
+        raise NotImplementedError
+    def sample_dtype(self):
+        raise NotImplementedError
+    def param_placeholder(self, prepend_shape, name=None):
+        return tf.placeholder(dtype=tf.float32, shape=prepend_shape+self.param_shape(), name=name)
+    def sample_placeholder(self, prepend_shape, name=None):
+        return tf.placeholder(dtype=self.sample_dtype(), shape=prepend_shape+self.sample_shape(), name=name)
+class CategoricalPd(Pd):
+    """
+    categorical prossibility distribution
+    """
+    def __init__(self, logits, mask_npinf, nsteps, size, is_act_model):
+        self.logits = logits
+        self.mask_npinf = mask_npinf
+        self.nsteps = nsteps
+        self.size = size
+        self.is_act_model = is_act_model
+    def flatparam(self):
+        return self.logits
+    def mode(self):
+        return tf.argmax(self.logits, axis=-1)
+    @property
+    def mean(self):
+        return tf.nn.softmax(self.logits)
+    def neglogp(self, x):
+        """
+        return tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=x)
+        Note: we can't use sparse_softmax_cross_entropy_with_logits because
+              the implementation does not allow second-order derivatives...
+        """
+        if x.dtype in {tf.uint8, tf.int32, tf.int64}:
+            # one-hot encoding
+            x_shape_list = x.shape.as_list()
+            logits_shape_list = self.logits.get_shape().as_list()[:-1]
+            for xs, ls in zip(x_shape_list, logits_shape_list):
+                if xs is not None and ls is not None:
+                    assert xs == ls, 'shape mismatch: {} in x vs {} in logits'.format(xs, ls)
+            x = tf.one_hot(x, self.logits.get_shape().as_list()[-1])
+        else:
+            # already encoded
+            assert x.shape.as_list() == self.logits.shape.as_list()
+        return tf.nn.softmax_cross_entropy_with_logits_v2(
+            logits=self.logits,
+            labels=x)
+    def kl(self, other):
+        """kl"""
+        a0 = self.logits - tf.reduce_max(self.logits, axis=-1, keepdims=True)
+        a1 = other.logits - tf.reduce_max(other.logits, axis=-1, keepdims=True)
+        ea0 = tf.exp(a0)
+        ea1 = tf.exp(a1)
+        z0 = tf.reduce_sum(ea0, axis=-1, keepdims=True)
+        z1 = tf.reduce_sum(ea1, axis=-1, keepdims=True)
+        p0 = ea0 / z0
+        return tf.reduce_sum(p0 * (a0 - tf.log(z0) - a1 + tf.log(z1)), axis=-1)
+    def entropy(self):
+        """compute entropy"""
+        a0 = self.logits - tf.reduce_max(self.logits, axis=-1, keepdims=True)
+        ea0 = tf.exp(a0)
+        z0 = tf.reduce_sum(ea0, axis=-1, keepdims=True)
+        p0 = ea0 / z0
+        return tf.reduce_sum(p0 * (tf.log(z0) - a0), axis=-1)
+    def sample(self):
+        """sample from logits"""
+        if not self.is_act_model:
+            re_res = tf.reshape(self.logits, [-1, self.nsteps, self.size])
+            masked_res = tf.math.add(re_res, self.mask_npinf)
+            re_masked_res = tf.reshape(masked_res, [-1, self.size])
+            u = tf.random_uniform(tf.shape(re_masked_res), dtype=self.logits.dtype)
+            return tf.argmax(re_masked_res - tf.log(-tf.log(u)), axis=-1)
+        else:
+            u = tf.random_uniform(tf.shape(self.logits), dtype=self.logits.dtype)
+            return tf.argmax(self.logits - tf.log(-tf.log(u)), axis=-1)
+    @classmethod
+    def fromflat(cls, flat):
+        return cls(flat)
+class CategoricalPdType(PdType):
+    """
+    to create CategoricalPd
+    """
+    def __init__(self, ncat, nsteps, np_mask, is_act_model):
+        self.ncat = ncat
+        self.nsteps = nsteps
+        self.np_mask = np_mask
+        self.is_act_model = is_act_model
+    def pdclass(self):
+        return CategoricalPd
+    def pdfromlatent(self, latent_vector, init_scale=1.0, init_bias=0.0):
+        """add fc and create CategoricalPd"""
+        pdparam, mask, mask_npinf = _matching_fc(latent_vector, 'pi', self.ncat, self.nsteps,
+                                                 init_scale=init_scale, init_bias=init_bias,
+                                                 np_mask=self.np_mask, is_act_model=self.is_act_model)
+        return self.pdfromflat(pdparam, mask_npinf, self.nsteps, self.ncat, self.is_act_model), pdparam, mask, mask_npinf
+    def param_shape(self):
+        return [self.ncat]
+    def sample_shape(self):
+        return []
+    def sample_dtype(self):
+        return tf.int32
+def _matching_fc(tensor, name, size, nsteps, init_scale, init_bias, np_mask, is_act_model):
+    """
+    add fc op, and add mask op when not in action mode
+    """
+    if tensor.shape[-1] == size:
+        assert False
+        return tensor
+    else:
+        mask = tf.get_variable("act_mask", dtype=tf.float32, initializer=np_mask[0], trainable=False)
+        mask_npinf = tf.get_variable("act_mask_npinf", dtype=tf.float32, initializer=np_mask[1], trainable=False)
+        res = fc(tensor, name, size, init_scale=init_scale, init_bias=init_bias)
+        if not is_act_model:
+            re_res = tf.reshape(res, [-1, nsteps, size])
+            masked_res = tf.math.multiply(re_res, mask)
+            re_masked_res = tf.reshape(masked_res, [-1, size])
+            return re_masked_res, mask, mask_npinf
+        else:
+            return res, mask, mask_npinf
--- a/src/sdk/pynni/nni/ppo_tuner/model.py
+++ b/src/sdk/pynni/nni/ppo_tuner/model.py
+# Copyright (c) Microsoft Corporation
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge,
+# to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and
+# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+the main model of policy/value network
+"""
+import tensorflow as tf
+from .util import initialize, get_session
+class Model:
+    """
+    We use this object to :
+    __init__:
+    - Creates the step_model
+    - Creates the train_model
+    train():
+    - Make the training part (feedforward and retropropagation of gradients)
+    save/load():
+    - Save load the model
+    """
+    def __init__(self, *, policy, nbatch_act, nbatch_train,
+                 nsteps, ent_coef, vf_coef, max_grad_norm, microbatch_size=None, np_mask=None):
+        """
+        init
+        """
+        self.sess = sess = get_session()
+        with tf.variable_scope('ppo2_model', reuse=tf.AUTO_REUSE):
+            # CREATE OUR TWO MODELS
+            # act_model that is used for sampling
+            act_model = policy(nbatch_act, 1, sess, np_mask=np_mask, is_act_model=True)
+            # Train model for training
+            if microbatch_size is None:
+                train_model = policy(nbatch_train, nsteps, sess, np_mask=np_mask, is_act_model=False)
+            else:
+                train_model = policy(microbatch_size, nsteps, sess, np_mask=np_mask, is_act_model=False)
+        # CREATE THE PLACEHOLDERS
+        self.A = A = train_model.pdtype.sample_placeholder([None])
+        self.ADV = ADV = tf.placeholder(tf.float32, [None])
+        self.R = R = tf.placeholder(tf.float32, [None])
+        # Keep track of old actor
+        self.OLDNEGLOGPAC = OLDNEGLOGPAC = tf.placeholder(tf.float32, [None])
+        # Keep track of old critic
+        self.OLDVPRED = OLDVPRED = tf.placeholder(tf.float32, [None])
+        self.LR = LR = tf.placeholder(tf.float32, [])
+        # Cliprange
+        self.CLIPRANGE = CLIPRANGE = tf.placeholder(tf.float32, [])
+        neglogpac = train_model.pd.neglogp(A)
+        # Calculate the entropy
+        # Entropy is used to improve exploration by limiting the premature convergence to suboptimal policy.
+        entropy = tf.reduce_mean(train_model.pd.entropy())
+        # CALCULATE THE LOSS
+        # Total loss = Policy gradient loss - entropy * entropy coefficient + Value coefficient * value loss
+        # Clip the value to reduce variability during Critic training
+        # Get the predicted value
+        vpred = train_model.vf
+        vpredclipped = OLDVPRED + tf.clip_by_value(train_model.vf - OLDVPRED, - CLIPRANGE, CLIPRANGE)
+        # Unclipped value
+        vf_losses1 = tf.square(vpred - R)
+        # Clipped value
+        vf_losses2 = tf.square(vpredclipped - R)
+        vf_loss = .5 * tf.reduce_mean(tf.maximum(vf_losses1, vf_losses2))
+        # Calculate ratio (pi current policy / pi old policy)
+        ratio = tf.exp(OLDNEGLOGPAC - neglogpac)
+        # Defining Loss = - J is equivalent to max J
+        pg_losses = -ADV * ratio
+        pg_losses2 = -ADV * tf.clip_by_value(ratio, 1.0 - CLIPRANGE, 1.0 + CLIPRANGE)
+        # Final PG loss
+        pg_loss = tf.reduce_mean(tf.maximum(pg_losses, pg_losses2))
+        approxkl = .5 * tf.reduce_mean(tf.square(neglogpac - OLDNEGLOGPAC))
+        clipfrac = tf.reduce_mean(tf.to_float(tf.greater(tf.abs(ratio - 1.0), CLIPRANGE)))
+        # Total loss
+        loss = pg_loss - entropy * ent_coef + vf_loss * vf_coef
+        # UPDATE THE PARAMETERS USING LOSS
+        # 1. Get the model parameters
+        params = tf.trainable_variables('ppo2_model')
+        # 2. Build our trainer
+        self.trainer = tf.train.AdamOptimizer(learning_rate=LR, epsilon=1e-5)
+        # 3. Calculate the gradients
+        grads_and_var = self.trainer.compute_gradients(loss, params)
+        grads, var = zip(*grads_and_var)
+        if max_grad_norm is not None:
+            # Clip the gradients (normalize)
+            grads, _grad_norm = tf.clip_by_global_norm(grads, max_grad_norm)
+        grads_and_var = list(zip(grads, var))
+        # zip aggregate each gradient with parameters associated
+        # For instance zip(ABCD, xyza) => Ax, By, Cz, Da
+        self.grads = grads
+        self.var = var
+        self._train_op = self.trainer.apply_gradients(grads_and_var)
+        self.loss_names = ['policy_loss', 'value_loss', 'policy_entropy', 'approxkl', 'clipfrac']
+        self.stats_list = [pg_loss, vf_loss, entropy, approxkl, clipfrac]
+        self.train_model = train_model
+        self.act_model = act_model
+        self.step = act_model.step
+        self.value = act_model.value
+        self.initial_state = act_model.initial_state
+        initialize()
+    def train(self, lr, cliprange, obs, returns, masks, actions, values, neglogpacs, states=None):
+        """
+        train the model.
+        Here we calculate advantage A(s,a) = R + yV(s') - V(s)
+        Returns = R + yV(s')
+        """
+        advs = returns - values
+        # Normalize the advantages
+        advs = (advs - advs.mean()) / (advs.std() + 1e-8)
+        td_map = {
+            self.train_model.X : obs,
+            self.A : actions,
+            self.ADV : advs,
+            self.R : returns,
+            self.LR : lr,
+            self.CLIPRANGE : cliprange,
+            self.OLDNEGLOGPAC : neglogpacs,
+            self.OLDVPRED : values
+        }
+        if states is not None:
+            td_map[self.train_model.S] = states
+            td_map[self.train_model.M] = masks
+        return self.sess.run(
+            self.stats_list + [self._train_op],
+            td_map
+        )[:-1]
--- a/src/sdk/pynni/nni/ppo_tuner/policy.py
+++ b/src/sdk/pynni/nni/ppo_tuner/policy.py
+# Copyright (c) Microsoft Corporation
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge,
+# to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and
+# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+build policy/value network from model
+"""
+import tensorflow as tf
+from .distri import CategoricalPdType
+from .util import lstm_model, fc, observation_placeholder, adjust_shape
+class PolicyWithValue:
+    """
+    Encapsulates fields and methods for RL policy and value function estimation with shared parameters
+    """
+    def __init__(self, env, observations, latent, estimate_q=False, vf_latent=None, sess=None, np_mask=None, is_act_model=False, **tensors):
+        """
+        Parameters:
+        ----------
+        env:             RL environment
+        observations:    tensorflow placeholder in which the observations will be fed
+        latent:          latent state from which policy distribution parameters should be inferred
+        vf_latent:       latent state from which value function should be inferred (if None, then latent is used)
+        sess:            tensorflow session to run calculations in (if None, default session is used)
+        **tensors:       tensorflow tensors for additional attributes such as state or mask
+        """
+        self.X = observations
+        self.state = tf.constant([])
+        self.initial_state = None
+        self.__dict__.update(tensors)
+        vf_latent = vf_latent if vf_latent is not None else latent
+        vf_latent = tf.layers.flatten(vf_latent)
+        latent = tf.layers.flatten(latent)
+        # Based on the action space, will select what probability distribution type
+        self.np_mask = np_mask
+        self.pdtype = CategoricalPdType(env.action_space.n, env.nsteps, np_mask, is_act_model)
+        self.act_latent = latent
+        self.nh = env.action_space.n
+        self.pd, self.pi, self.mask, self.mask_npinf = self.pdtype.pdfromlatent(latent, init_scale=0.01)
+        # Take an action
+        self.action = self.pd.sample()
+        # Calculate the neg log of our probability
+        self.neglogp = self.pd.neglogp(self.action)
+        self.sess = sess or tf.get_default_session()
+        assert estimate_q is False
+        self.vf = fc(vf_latent, 'vf', 1)
+        self.vf = self.vf[:, 0]
+        if is_act_model:
+            self._build_model_for_step()
+    def _evaluate(self, variables, observation, **extra_feed):
+        sess = self.sess
+        feed_dict = {self.X: adjust_shape(self.X, observation)}
+        for inpt_name, data in extra_feed.items():
+            if inpt_name in self.__dict__.keys():
+                inpt = self.__dict__[inpt_name]
+                if isinstance(inpt, tf.Tensor) and inpt._op.type == 'Placeholder':
+                    feed_dict[inpt] = adjust_shape(inpt, data)
+        return sess.run(variables, feed_dict)
+    def _build_model_for_step(self):
+        # multiply with weight and apply mask on self.act_latent to generate
+        self.act_step = step = tf.placeholder(shape=(), dtype=tf.int64, name='act_step')
+        with tf.variable_scope('pi', reuse=tf.AUTO_REUSE):
+            from .util import ortho_init
+            nin = self.act_latent.get_shape()[1].value
+            w = tf.get_variable("w", [nin, self.nh], initializer=ortho_init(0.01))
+            b = tf.get_variable("b", [self.nh], initializer=tf.constant_initializer(0.0))
+            logits = tf.matmul(self.act_latent, w)+b
+            piece = tf.slice(self.mask, [step, 0], [1, self.nh])
+            re_piece = tf.reshape(piece, [-1])
+            masked_logits = tf.math.multiply(logits, re_piece)
+            npinf_piece = tf.slice(self.mask_npinf, [step, 0], [1, self.nh])
+            re_npinf_piece = tf.reshape(npinf_piece, [-1])
+        def sample(logits, mask_npinf):
+            new_logits = tf.math.add(logits, mask_npinf)
+            u = tf.random_uniform(tf.shape(new_logits), dtype=logits.dtype)
+            return tf.argmax(new_logits - tf.log(-tf.log(u)), axis=-1)
+        def neglogp(logits, x):
+            # return tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=x)
+            # Note: we can't use sparse_softmax_cross_entropy_with_logits because
+            #       the implementation does not allow second-order derivatives...
+            if x.dtype in {tf.uint8, tf.int32, tf.int64}:
+                # one-hot encoding
+                x_shape_list = x.shape.as_list()
+                logits_shape_list = logits.get_shape().as_list()[:-1]
+                for xs, ls in zip(x_shape_list, logits_shape_list):
+                    if xs is not None and ls is not None:
+                        assert xs == ls, 'shape mismatch: {} in x vs {} in logits'.format(xs, ls)
+                x = tf.one_hot(x, logits.get_shape().as_list()[-1])
+            else:
+                # already encoded
+                assert x.shape.as_list() == logits.shape.as_list()
+            return tf.nn.softmax_cross_entropy_with_logits_v2(
+                logits=logits,
+                labels=x)
+        self.act_action = sample(masked_logits, re_npinf_piece)
+        self.act_neglogp = neglogp(masked_logits, self.act_action)
+    def step(self, step, observation, **extra_feed):
+        """
+        Compute next action(s) given the observation(s)
+        Parameters:
+        ----------
+        observation:     observation data (either single or a batch)
+        **extra_feed:    additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__)
+        Returns:
+        -------
+        (action, value estimate, next state, negative log likelihood of the action under current policy parameters) tuple
+        """
+        extra_feed['act_step'] = step
+        a, v, state, neglogp = self._evaluate([self.act_action, self.vf, self.state, self.act_neglogp], observation, **extra_feed)
+        if state.size == 0:
+            state = None
+        return a, v, state, neglogp
+    def value(self, ob, *args, **kwargs):
+        """
+        Compute value estimate(s) given the observation(s)
+        Parameters:
+        ----------
+        observation:     observation data (either single or a batch)
+        **extra_feed:    additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__)
+        Returns:
+        -------
+        value estimate
+        """
+        return self._evaluate(self.vf, ob, *args, **kwargs)
+def build_lstm_policy(model_config, value_network=None, estimate_q=False, **policy_kwargs):
+    """
+    build lstm policy and value network, they share the same lstm network.
+    the parameters all use their default values.
+    """
+    policy_network = lstm_model(**policy_kwargs)
+    def policy_fn(nbatch=None, nsteps=None, sess=None, observ_placeholder=None, np_mask=None, is_act_model=False):
+        ob_space = model_config.observation_space
+        X = observ_placeholder if observ_placeholder is not None else observation_placeholder(ob_space, batch_size=nbatch)
+        extra_tensors = {}
+        # encode_observation is not necessary anymore as we use embedding_lookup
+        encoded_x = X
+        with tf.variable_scope('pi', reuse=tf.AUTO_REUSE):
+            policy_latent = policy_network(encoded_x, 1, model_config.observation_space.n)
+            if isinstance(policy_latent, tuple):
+                policy_latent, recurrent_tensors = policy_latent
+                if recurrent_tensors is not None:
+                    # recurrent architecture, need a few more steps
+                    nenv = nbatch // nsteps
+                    assert nenv > 0, 'Bad input for recurrent policy: batch size {} smaller than nsteps {}'.format(nbatch, nsteps)
+                    policy_latent, recurrent_tensors = policy_network(encoded_x, nenv, model_config.observation_space.n)
+                    extra_tensors.update(recurrent_tensors)
+        _v_net = value_network
+        assert _v_net is None or _v_net == 'shared'
+        vf_latent = policy_latent
+        policy = PolicyWithValue(
+            env=model_config,
+            observations=X,
+            latent=policy_latent,
+            vf_latent=vf_latent,
+            sess=sess,
+            estimate_q=estimate_q,
+            np_mask=np_mask,
+            is_act_model=is_act_model,
+            **extra_tensors
+        )
+        return policy
+    return policy_fn
--- a/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py
+++ b/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py
+# Copyright (c) Microsoft Corporation
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge,
+# to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and
+# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+ppo_tuner.py including:
+    class PPOTuner
+"""
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+import copy
+import logging
+import numpy as np
+import json_tricks
+from gym import spaces
+import nni
+from nni.tuner import Tuner
+from nni.utils import OptimizeMode, extract_scalar_reward
+from .model import Model
+from .util import set_global_seeds
+from .policy import build_lstm_policy
+logger = logging.getLogger('ppo_tuner_AutoML')
+def constfn(val):
+    """wrap as function"""
+    def f(_):
+        return val
+    return f
+class ModelConfig:
+    """
+    Configurations of the PPO model
+    """
+    def __init__(self):
+        self.observation_space = None
+        self.action_space = None
+        self.num_envs = 0
+        self.nsteps = 0
+        self.ent_coef = 0.0
+        self.lr = 3e-4
+        self.vf_coef = 0.5
+        self.max_grad_norm = 0.5
+        self.gamma = 0.99
+        self.lam = 0.95
+        self.cliprange = 0.2
+        self.embedding_size = None  # the embedding is for each action
+        self.noptepochs = 4         # number of training epochs per update
+        self.total_timesteps = 5000 # number of timesteps (i.e. number of actions taken in the environment)
+        self.nminibatches = 4       # number of training minibatches per update. For recurrent policies,
+                                    # should be smaller or equal than number of environments run in parallel.
+class TrialsInfo:
+    """
+    Informations of each trial from one model inference
+    """
+    def __init__(self, obs, actions, values, neglogpacs, dones, last_value, inf_batch_size):
+        self.iter = 0
+        self.obs = obs
+        self.actions = actions
+        self.values = values
+        self.neglogpacs = neglogpacs
+        self.dones = dones
+        self.last_value = last_value
+        self.rewards = None
+        self.returns = None
+        self.inf_batch_size = inf_batch_size
+        #self.states = None
+    def get_next(self):
+        """
+        get actions of the next trial
+        """
+        if self.iter >= self.inf_batch_size:
+            return None, None
+        actions = []
+        for step in self.actions:
+            actions.append(step[self.iter])
+        self.iter += 1
+        return self.iter - 1, actions
+    def update_rewards(self, rewards, returns):
+        """
+        after the trial is finished, reward and return of this trial is updated
+        """
+        self.rewards = rewards
+        self.returns = returns
+    def convert_shape(self):
+        """
+        convert shape
+        """
+        def sf01(arr):
+            """
+            swap and then flatten axes 0 and 1
+            """
+            s = arr.shape
+            return arr.swapaxes(0, 1).reshape(s[0] * s[1], *s[2:])
+        self.obs = sf01(self.obs)
+        self.returns = sf01(self.returns)
+        self.dones = sf01(self.dones)
+        self.actions = sf01(self.actions)
+        self.values = sf01(self.values)
+        self.neglogpacs = sf01(self.neglogpacs)
+class PPOModel:
+    """
+    PPO Model
+    """
+    def __init__(self, model_config, mask):
+        self.model_config = model_config
+        self.states = None    # initial state of lstm in policy/value network
+        self.nupdates = None  # the number of func train is invoked, used to tune lr and cliprange
+        self.cur_update = 1   # record the current update
+        self.np_mask = mask   # record the mask of each action within one trial
+        set_global_seeds(None)
+        assert isinstance(self.model_config.lr, float)
+        self.lr = constfn(self.model_config.lr)
+        assert isinstance(self.model_config.cliprange, float)
+        self.cliprange = constfn(self.model_config.cliprange)
+        # build lstm policy network, value share the same network
+        policy = build_lstm_policy(model_config)
+        # Get the nb of env
+        nenvs = model_config.num_envs
+        # Calculate the batch_size
+        self.nbatch = nbatch = nenvs * model_config.nsteps # num of record per update
+        nbatch_train = nbatch // model_config.nminibatches # get batch size
+        # self.nupdates is used to tune lr and cliprange
+        self.nupdates = self.model_config.total_timesteps // self.nbatch
+        # Instantiate the model object (that creates act_model and train_model)
+        self.model = Model(policy=policy, nbatch_act=nenvs, nbatch_train=nbatch_train,
+                           nsteps=model_config.nsteps, ent_coef=model_config.ent_coef, vf_coef=model_config.vf_coef,
+                           max_grad_norm=model_config.max_grad_norm, np_mask=self.np_mask)
+        self.states = self.model.initial_state
+        logger.info('=== finished PPOModel initialization')
+    def inference(self, num):
+        """
+        generate actions along with related info from policy network.
+        observation is the action of the last step.
+        Parameters:
+        ----------
+        num:             the number of trials to generate
+        """
+        # Here, we init the lists that will contain the mb of experiences
+        mb_obs, mb_actions, mb_values, mb_dones, mb_neglogpacs = [], [], [], [], []
+        # initial observation
+        # use the (n+1)th embedding to represent the first step action
+        first_step_ob = self.model_config.action_space.n
+        obs = [first_step_ob for _ in range(num)]
+        dones = [True for _ in range(num)]
+        states = self.states
+        # For n in range number of steps
+        for cur_step in range(self.model_config.nsteps):
+            # Given observations, get action value and neglopacs
+            # We already have self.obs because Runner superclass run self.obs[:] = env.reset() on init
+            actions, values, states, neglogpacs = self.model.step(cur_step, obs, S=states, M=dones)
+            mb_obs.append(obs.copy())
+            mb_actions.append(actions)
+            mb_values.append(values)
+            mb_neglogpacs.append(neglogpacs)
+            mb_dones.append(dones)
+            # Take actions in env and look the results
+            # Infos contains a ton of useful informations
+            obs[:] = actions
+            if cur_step == self.model_config.nsteps - 1:
+                dones = [True for _ in range(num)]
+            else:
+                dones = [False for _ in range(num)]
+        #batch of steps to batch of rollouts
+        np_obs = np.asarray(obs)
+        mb_obs = np.asarray(mb_obs, dtype=np_obs.dtype)
+        mb_actions = np.asarray(mb_actions)
+        mb_values = np.asarray(mb_values, dtype=np.float32)
+        mb_neglogpacs = np.asarray(mb_neglogpacs, dtype=np.float32)
+        mb_dones = np.asarray(mb_dones, dtype=np.bool)
+        last_values = self.model.value(np_obs, S=states, M=dones)
+        return mb_obs, mb_actions, mb_values, mb_neglogpacs, mb_dones, last_values
+    def compute_rewards(self, trials_info, trials_result):
+        """
+        compute the rewards of the trials in trials_info based on trials_result,
+        and update the rewards in trials_info
+        Parameters:
+        ----------
+        trials_info:             info of the generated trials
+        trials_result:           final results (e.g., acc) of the generated trials
+        """
+        mb_rewards = np.asarray([trials_result for _ in trials_info.actions], dtype=np.float32)
+        # discount/bootstrap off value fn
+        mb_returns = np.zeros_like(mb_rewards)
+        mb_advs = np.zeros_like(mb_rewards)
+        lastgaelam = 0
+        last_dones = np.asarray([True for _ in trials_result], dtype=np.bool) # ugly
+        for t in reversed(range(self.model_config.nsteps)):
+            if t == self.model_config.nsteps - 1:
+                nextnonterminal = 1.0 - last_dones
+                nextvalues = trials_info.last_value
+            else:
+                nextnonterminal = 1.0 - trials_info.dones[t+1]
+                nextvalues = trials_info.values[t+1]
+            delta = mb_rewards[t] + self.model_config.gamma * nextvalues * nextnonterminal - trials_info.values[t]
+            mb_advs[t] = lastgaelam = delta + self.model_config.gamma * self.model_config.lam * nextnonterminal * lastgaelam
+        mb_returns = mb_advs + trials_info.values
+        trials_info.update_rewards(mb_rewards, mb_returns)
+        trials_info.convert_shape()
+    def train(self, trials_info, nenvs):
+        """
+        train the policy/value network using trials_info
+        Parameters:
+        ----------
+        trials_info:             complete info of the generated trials from the previous inference
+        nenvs:                   the batch size of the (previous) inference
+        """
+        # keep frac decay for future optimization
+        if self.cur_update <= self.nupdates:
+            frac = 1.0 - (self.cur_update - 1.0) / self.nupdates
+        else:
+            logger.warning('current update (self.cur_update) %d has exceeded total updates (self.nupdates) %d',
+                           self.cur_update, self.nupdates)
+            frac = 1.0 - (self.nupdates - 1.0) / self.nupdates
+        lrnow = self.lr(frac)
+        cliprangenow = self.cliprange(frac)
+        self.cur_update += 1
+        states = self.states
+        assert states is not None # recurrent version
+        assert nenvs % self.model_config.nminibatches == 0
+        envsperbatch = nenvs // self.model_config.nminibatches
+        envinds = np.arange(nenvs)
+        flatinds = np.arange(nenvs * self.model_config.nsteps).reshape(nenvs, self.model_config.nsteps)
+        for _ in range(self.model_config.noptepochs):
+            np.random.shuffle(envinds)
+            for start in range(0, nenvs, envsperbatch):
+                end = start + envsperbatch
+                mbenvinds = envinds[start:end]
+                mbflatinds = flatinds[mbenvinds].ravel()
+                slices = (arr[mbflatinds] for arr in (trials_info.obs, trials_info.returns, trials_info.dones,
+                                                      trials_info.actions, trials_info.values, trials_info.neglogpacs))
+                mbstates = states[mbenvinds]
+                self.model.train(lrnow, cliprangenow, *slices, mbstates)
+class PPOTuner(Tuner):
+    """
+    PPOTuner
+    """
+    def __init__(self, optimize_mode, trials_per_update=20, epochs_per_update=4, minibatch_size=4,
+                 ent_coef=0.0, lr=3e-4, vf_coef=0.5, max_grad_norm=0.5, gamma=0.99, lam=0.95, cliprange=0.2):
+        """
+        initialization, PPO model is not initialized here as search space is not received yet.
+        Parameters:
+        ----------
+        optimize_mode:         maximize or minimize
+        trials_per_update:     number of trials to have for each model update
+        epochs_per_update:     number of epochs to run for each model update
+        minibatch_size:        minibatch size (number of trials) for the update
+        ent_coef:              policy entropy coefficient in the optimization objective
+        lr:                    learning rate of the model (lstm network), constant
+        vf_coef:               value function loss coefficient in the optimization objective
+        max_grad_norm:         gradient norm clipping coefficient
+        gamma:                 discounting factor
+        lam:                   advantage estimation discounting factor (lambda in the paper)
+        cliprange:             cliprange in the PPO algorithm, constant
+        """
+        self.optimize_mode = OptimizeMode(optimize_mode)
+        self.model_config = ModelConfig()
+        self.model = None
+        self.search_space = None
+        self.running_trials = {}                  # key: parameter_id, value: actions/states/etc.
+        self.inf_batch_size = trials_per_update   # number of trials to generate in one inference
+        self.first_inf = True                     # indicate whether it is the first time to inference new trials
+        self.trials_result = [None for _ in range(self.inf_batch_size)] # results of finished trials
+        self.credit = 0 # record the unsatisfied trial requests
+        self.param_ids = []
+        self.finished_trials = 0
+        self.chosen_arch_template = {}
+        self.actions_spaces = None
+        self.actions_to_config = None
+        self.full_act_space = None
+        self.trials_info = None
+        self.all_trials = {} # used to dedup the same trial, key: config, value: final result
+        self.model_config.num_envs = self.inf_batch_size
+        self.model_config.noptepochs = epochs_per_update
+        self.model_config.nminibatches = minibatch_size
+        self.send_trial_callback = None
+        logger.info('=== finished PPOTuner initialization')
+    def _process_one_nas_space(self, block_name, block_space):
+        """
+        process nas space to determine observation space and action space
+        Parameters:
+        ----------
+        block_name:              the name of the mutable block
+        block_space:             search space of this mutable block
+        Returns:
+        ----------
+        actions_spaces:          list of the space of each action
+        actions_to_config:       the mapping from action to generated configuration
+        """
+        actions_spaces = []
+        actions_to_config = []
+        block_arch_temp = {}
+        for l_name, layer in block_space.items():
+            chosen_layer_temp = {}
+            if len(layer['layer_choice']) > 1:
+                actions_spaces.append(layer['layer_choice'])
+                actions_to_config.append((block_name, l_name, 'chosen_layer'))
+                chosen_layer_temp['chosen_layer'] = None
+            else:
+                assert len(layer['layer_choice']) == 1
+                chosen_layer_temp['chosen_layer'] = layer['layer_choice'][0]
+            if layer['optional_input_size'] not in [0, 1, [0, 1]]:
+                raise ValueError('Optional_input_size can only be 0, 1, or [0, 1], but the pecified one is %s'
+                                 % (layer['optional_input_size']))
+            if isinstance(layer['optional_input_size'], list):
+                actions_spaces.append(["None", *layer['optional_inputs']])
+                actions_to_config.append((block_name, l_name, 'chosen_inputs'))
+                chosen_layer_temp['chosen_inputs'] = None
+            elif layer['optional_input_size'] == 1:
+                actions_spaces.append(layer['optional_inputs'])
+                actions_to_config.append((block_name, l_name, 'chosen_inputs'))
+                chosen_layer_temp['chosen_inputs'] = None
+            elif layer['optional_input_size'] == 0:
+                chosen_layer_temp['chosen_inputs'] = []
+            else:
+                raise ValueError('invalid type and value of optional_input_size')
+            block_arch_temp[l_name] = chosen_layer_temp
+        self.chosen_arch_template[block_name] = block_arch_temp
+        return actions_spaces, actions_to_config
+    def _process_nas_space(self, search_space):
+        """
+        process nas search space to get action/observation space
+        """
+        actions_spaces = []
+        actions_to_config = []
+        for b_name, block in search_space.items():
+            if block['_type'] != 'mutable_layer':
+                raise ValueError('PPOTuner only accept mutable_layer type in search space, but the current one is %s'%(block['_type']))
+            block = block['_value']
+            act, act_map = self._process_one_nas_space(b_name, block)
+            actions_spaces.extend(act)
+            actions_to_config.extend(act_map)
+        # calculate observation space
+        dedup = {}
+        for step in actions_spaces:
+            for action in step:
+                dedup[action] = 1
+        full_act_space = [act for act, _ in dedup.items()]
+        assert len(full_act_space) == len(dedup)
+        observation_space = len(full_act_space)
+        nsteps = len(actions_spaces)
+        return actions_spaces, actions_to_config, full_act_space, observation_space, nsteps
+    def _generate_action_mask(self):
+        """
+        different step could have different action space. to deal with this case, we merge all the
+        possible actions into one action space, and use mask to indicate available actions for each step
+        """
+        two_masks = []
+        mask = []
+        for acts in self.actions_spaces:
+            one_mask = [0 for _ in range(len(self.full_act_space))]
+            for act in acts:
+                idx = self.full_act_space.index(act)
+                one_mask[idx] = 1
+            mask.append(one_mask)
+        two_masks.append(mask)
+        mask = []
+        for acts in self.actions_spaces:
+            one_mask = [-np.inf for _ in range(len(self.full_act_space))]
+            for act in acts:
+                idx = self.full_act_space.index(act)
+                one_mask[idx] = 0
+            mask.append(one_mask)
+        two_masks.append(mask)
+        return np.asarray(two_masks, dtype=np.float32)
+    def update_search_space(self, search_space):
+        """
+        get search space, currently the space only includes that for NAS
+        Parameters:
+        ----------
+        search_space:                  search space for NAS
+        Returns:
+        -------
+        no return
+        """
+        logger.info('=== update search space %s', search_space)
+        assert self.search_space is None
+        self.search_space = search_space
+        assert self.model_config.observation_space is None
+        assert self.model_config.action_space is None
+        self.actions_spaces, self.actions_to_config, self.full_act_space, obs_space, nsteps = self._process_nas_space(search_space)
+        self.model_config.observation_space = spaces.Discrete(obs_space)
+        self.model_config.action_space = spaces.Discrete(obs_space)
+        self.model_config.nsteps = nsteps
+        # generate mask in numpy
+        mask = self._generate_action_mask()
+        assert self.model is None
+        self.model = PPOModel(self.model_config, mask)
+    def _actions_to_config(self, actions):
+        """
+        given actions, to generate the corresponding trial configuration
+        """
+        chosen_arch = copy.deepcopy(self.chosen_arch_template)
+        for cnt, act in enumerate(actions):
+            act_name = self.full_act_space[act]
+            (block_name, layer_name, key) = self.actions_to_config[cnt]
+            if key == 'chosen_inputs':
+                if act_name == 'None':
+                    chosen_arch[block_name][layer_name][key] = []
+                else:
+                    chosen_arch[block_name][layer_name][key] = [act_name]
+            elif key == 'chosen_layer':
+                chosen_arch[block_name][layer_name][key] = act_name
+            else:
+                raise ValueError('unrecognized key: {0}'.format(key))
+        return chosen_arch
+    def generate_multiple_parameters(self, parameter_id_list, **kwargs):
+        """
+        Returns multiple sets of trial (hyper-)parameters, as iterable of serializable objects.
+        """
+        result = []
+        self.send_trial_callback = kwargs['st_callback']
+        for parameter_id in parameter_id_list:
+            had_exception = False
+            try:
+                logger.debug("generating param for %s", parameter_id)
+                res = self.generate_parameters(parameter_id, **kwargs)
+            except nni.NoMoreTrialError:
+                had_exception = True
+            if not had_exception:
+                result.append(res)
+        return result
+    def generate_parameters(self, parameter_id, **kwargs):
+        """
+        generate parameters, if no trial configration for now, self.credit plus 1 to send the config later
+        """
+        if self.first_inf:
+            self.trials_result = [None for _ in range(self.inf_batch_size)]
+            mb_obs, mb_actions, mb_values, mb_neglogpacs, mb_dones, last_values = self.model.inference(self.inf_batch_size)
+            self.trials_info = TrialsInfo(mb_obs, mb_actions, mb_values, mb_neglogpacs,
+                                          mb_dones, last_values, self.inf_batch_size)
+            self.first_inf = False
+        trial_info_idx, actions = self.trials_info.get_next()
+        if trial_info_idx is None:
+            self.credit += 1
+            self.param_ids.append(parameter_id)
+            raise nni.NoMoreTrialError('no more parameters now.')
+        self.running_trials[parameter_id] = trial_info_idx
+        new_config = self._actions_to_config(actions)
+        return new_config
+    def _next_round_inference(self):
+        """
+        """
+        self.finished_trials = 0
+        self.model.compute_rewards(self.trials_info, self.trials_result)
+        self.model.train(self.trials_info, self.inf_batch_size)
+        self.running_trials = {}
+        # generate new trials
+        self.trials_result = [None for _ in range(self.inf_batch_size)]
+        mb_obs, mb_actions, mb_values, mb_neglogpacs, mb_dones, last_values = self.model.inference(self.inf_batch_size)
+        self.trials_info = TrialsInfo(mb_obs, mb_actions, mb_values, mb_neglogpacs,
+                                        mb_dones, last_values, self.inf_batch_size)
+        # check credit and submit new trials
+        for _ in range(self.credit):
+            trial_info_idx, actions = self.trials_info.get_next()
+            if trial_info_idx is None:
+                logger.warning('No enough trial config, trials_per_update is suggested to be larger than trialConcurrency')
+                break
+            assert self.param_ids
+            param_id = self.param_ids.pop()
+            self.running_trials[param_id] = trial_info_idx
+            new_config = self._actions_to_config(actions)
+            self.send_trial_callback(param_id, new_config)
+            self.credit -= 1
+    def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
+        """
+        receive trial's result. if the number of finished trials equals self.inf_batch_size, start the next update to
+        train the model
+        """
+        trial_info_idx = self.running_trials.pop(parameter_id, None)
+        assert trial_info_idx is not None
+        value = extract_scalar_reward(value)
+        if self.optimize_mode == OptimizeMode.Minimize:
+            value = -value
+        self.trials_result[trial_info_idx] = value
+        self.finished_trials += 1
+        if self.finished_trials == self.inf_batch_size:
+            self._next_round_inference()
+    def trial_end(self, parameter_id, success, **kwargs):
+        """
+        to deal with trial failure
+        """
+        if not success:
+            if parameter_id not in self.running_trials:
+                logger.warning('The trial is failed, but self.running_trial does not have this trial')
+                return
+            trial_info_idx = self.running_trials.pop(parameter_id, None)
+            assert trial_info_idx is not None
+            # use mean of finished trials as the result of this failed trial
+            values = [val for val in self.trials_result if val is not None]
+            logger.warning('zql values: {0}'.format(values))
+            self.trials_result[trial_info_idx] = (sum(values) / len(values)) if len(values) > 0 else 0
+            self.finished_trials += 1
+            if self.finished_trials == self.inf_batch_size:
+                self._next_round_inference()
+    def import_data(self, data):
+        """
+        Import additional data for tuning
+        Parameters
+        ----------
+        data:               a list of dictionarys, each of which has at least two keys, 'parameter' and 'value'
+        """
+        logger.warning('PPOTuner cannot leverage imported data.')
--- a/src/sdk/pynni/nni/ppo_tuner/requirements.txt
+++ b/src/sdk/pynni/nni/ppo_tuner/requirements.txt
+enum34
+gym
+tensorflow
\ No newline at end of file
--- a/src/sdk/pynni/nni/ppo_tuner/util.py
+++ b/src/sdk/pynni/nni/ppo_tuner/util.py
+# Copyright (c) Microsoft Corporation
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge,
+# to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and
+# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+util functions
+"""
+import os
+import random
+import multiprocessing
+import numpy as np
+import tensorflow as tf
+from gym.spaces import Discrete, Box, MultiDiscrete
+def set_global_seeds(i):
+    """set global seeds"""
+    rank = 0
+    myseed = i  + 1000 * rank if i is not None else None
+    tf.set_random_seed(myseed)
+    np.random.seed(myseed)
+    random.seed(myseed)
+def batch_to_seq(h, nbatch, nsteps, flat=False):
+    """convert from batch to sequence"""
+    if flat:
+        h = tf.reshape(h, [nbatch, nsteps])
+    else:
+        h = tf.reshape(h, [nbatch, nsteps, -1])
+    return [tf.squeeze(v, [1]) for v in tf.split(axis=1, num_or_size_splits=nsteps, value=h)]
+def seq_to_batch(h, flat=False):
+    """convert from sequence to batch"""
+    shape = h[0].get_shape().as_list()
+    if not flat:
+        assert len(shape) > 1
+        nh = h[0].get_shape()[-1].value
+        return tf.reshape(tf.concat(axis=1, values=h), [-1, nh])
+    else:
+        return tf.reshape(tf.stack(values=h, axis=1), [-1])
+def lstm(xs, ms, s, scope, nh, init_scale=1.0):
+    """lstm cell"""
+    nbatch, nin = [v.value for v in xs[0].get_shape()]
+    with tf.variable_scope(scope):
+        wx = tf.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale))
+        wh = tf.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale))
+        b = tf.get_variable("b", [nh*4], initializer=tf.constant_initializer(0.0))
+    c, h = tf.split(axis=1, num_or_size_splits=2, value=s)
+    for idx, (x, m) in enumerate(zip(xs, ms)):
+        c = c*(1-m)
+        h = h*(1-m)
+        z = tf.matmul(x, wx) + tf.matmul(h, wh) + b
+        i, f, o, u = tf.split(axis=1, num_or_size_splits=4, value=z)
+        i = tf.nn.sigmoid(i)
+        f = tf.nn.sigmoid(f)
+        o = tf.nn.sigmoid(o)
+        u = tf.tanh(u)
+        c = f*c + i*u
+        h = o*tf.tanh(c)
+        xs[idx] = h
+    s = tf.concat(axis=1, values=[c, h])
+    return xs, s
+def lstm_model(nlstm=128, layer_norm=False):
+    """
+    Builds LSTM (Long-Short Term Memory) network to be used in a policy.
+    Note that the resulting function returns not only the output of the LSTM
+    (i.e. hidden state of lstm for each step in the sequence), but also a dictionary
+    with auxiliary tensors to be set as policy attributes.
+    Specifically,
+        S is a placeholder to feed current state (LSTM state has to be managed outside policy)
+        M is a placeholder for the mask (used to mask out observations after the end of the episode, but can be used for other purposes too)
+        initial_state is a numpy array containing initial lstm state (usually zeros)
+        state is the output LSTM state (to be fed into S at the next call)
+    An example of usage of lstm-based policy can be found here: common/tests/test_doc_examples.py/test_lstm_example
+    Parameters:
+    ----------
+    nlstm: int          LSTM hidden state size
+    layer_norm: bool    if True, layer-normalized version of LSTM is used
+    Returns:
+    -------
+    function that builds LSTM with a given input tensor / placeholder
+    """
+    def network_fn(X, nenv=1, obs_size=-1):
+        with tf.variable_scope("emb", reuse=tf.AUTO_REUSE):
+            w_emb = tf.get_variable("w_emb", [obs_size+1, 32])
+            X = tf.nn.embedding_lookup(w_emb, X)
+        nbatch = X.shape[0]
+        nsteps = nbatch // nenv
+        h = tf.layers.flatten(X)
+        M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1)
+        S = tf.placeholder(tf.float32, [nenv, 2*nlstm]) #states
+        xs = batch_to_seq(h, nenv, nsteps)
+        ms = batch_to_seq(M, nenv, nsteps)
+        assert not layer_norm
+        h5, snew = lstm(xs, ms, S, scope='lstm', nh=nlstm)
+        h = seq_to_batch(h5)
+        initial_state = np.zeros(S.shape.as_list(), dtype=float)
+        return h, {'S':S, 'M':M, 'state':snew, 'initial_state':initial_state}
+    return network_fn
+def ortho_init(scale=1.0):
+    """init approach"""
+    def _ortho_init(shape, dtype, partition_info=None):
+        #lasagne ortho init for tf
+        shape = tuple(shape)
+        if len(shape) == 2:
+            flat_shape = shape
+        elif len(shape) == 4: # assumes NHWC
+            flat_shape = (np.prod(shape[:-1]), shape[-1])
+        else:
+            raise NotImplementedError
+        a = np.random.normal(0.0, 1.0, flat_shape)
+        u, _, v = np.linalg.svd(a, full_matrices=False)
+        q = u if u.shape == flat_shape else v # pick the one with the correct shape
+        q = q.reshape(shape)
+        return (scale * q[:shape[0], :shape[1]]).astype(np.float32)
+    return _ortho_init
+def fc(x, scope, nh, *, init_scale=1.0, init_bias=0.0):
+    """fully connected op"""
+    with tf.variable_scope(scope):
+        nin = x.get_shape()[1].value
+        w = tf.get_variable("w", [nin, nh], initializer=ortho_init(init_scale))
+        b = tf.get_variable("b", [nh], initializer=tf.constant_initializer(init_bias))
+        return tf.matmul(x, w)+b
+def _check_shape(placeholder_shape, data_shape):
+    """
+    check if two shapes are compatible (i.e. differ only by dimensions of size 1, or by the batch dimension)
+    """
+    return True
+# ================================================================
+# Shape adjustment for feeding into tf placeholders
+# ================================================================
+def adjust_shape(placeholder, data):
+    """
+    adjust shape of the data to the shape of the placeholder if possible.
+    If shape is incompatible, AssertionError is thrown
+    Parameters:
+    placeholder:     tensorflow input placeholder
+    data:            input data to be (potentially) reshaped to be fed into placeholder
+    Returns:
+    reshaped data
+    """
+    if not isinstance(data, np.ndarray) and not isinstance(data, list):
+        return data
+    if isinstance(data, list):
+        data = np.array(data)
+    placeholder_shape = [x or -1 for x in placeholder.shape.as_list()]
+    assert _check_shape(placeholder_shape, data.shape), \
+        'Shape of data {} is not compatible with shape of the placeholder {}'.format(data.shape, placeholder_shape)
+    return np.reshape(data, placeholder_shape)
+# ================================================================
+# Global session
+# ================================================================
+def get_session(config=None):
+    """Get default session or create one with a given config"""
+    sess = tf.get_default_session()
+    if sess is None:
+        sess = make_session(config=config, make_default=True)
+    return sess
+def make_session(config=None, num_cpu=None, make_default=False, graph=None):
+    """Returns a session that will use <num_cpu> CPU's only"""
+    if num_cpu is None:
+        num_cpu = int(os.getenv('RCALL_NUM_CPU', multiprocessing.cpu_count()))
+    if config is None:
+        config = tf.ConfigProto(
+            allow_soft_placement=True,
+            inter_op_parallelism_threads=num_cpu,
+            intra_op_parallelism_threads=num_cpu)
+        config.gpu_options.allow_growth = True
+    if make_default:
+        return tf.InteractiveSession(config=config, graph=graph)
+    else:
+        return tf.Session(config=config, graph=graph)
+ALREADY_INITIALIZED = set()
+def initialize():
+    """Initialize all the uninitialized variables in the global scope."""
+    new_variables = set(tf.global_variables()) - ALREADY_INITIALIZED
+    get_session().run(tf.variables_initializer(new_variables))
+    ALREADY_INITIALIZED.update(new_variables)
+def observation_placeholder(ob_space, batch_size=None, name='Ob'):
+    """
+    Create placeholder to feed observations into of the size appropriate to the observation space
+    Parameters:
+    ----------
+    ob_space: gym.Space     observation space
+    batch_size: int         size of the batch to be fed into input. Can be left None in most cases.
+    name: str               name of the placeholder
+    Returns:
+    -------
+    tensorflow placeholder tensor
+    """
+    assert isinstance(ob_space, (Discrete, Box, MultiDiscrete)), \
+        'Can only deal with Discrete and Box observation spaces for now'
+    dtype = ob_space.dtype
+    if dtype == np.int8:
+        dtype = np.uint8
+    return tf.placeholder(shape=(batch_size,) + ob_space.shape, dtype=dtype, name=name)
+def explained_variance(ypred, y):
+    """
+    Computes fraction of variance that ypred explains about y.
+    Returns 1 - Var[y-ypred] / Var[y]
+    interpretation:
+        ev=0  =>  might as well have predicted zero
+        ev=1  =>  perfect prediction
+        ev<0  =>  worse than just predicting zero
+    """
+    assert y.ndim == 1 and ypred.ndim == 1
+    vary = np.var(y)
+    return np.nan if vary == 0 else 1 - np.var(y-ypred)/vary
--- a/src/sdk/pynni/nni/tuner.py
+++ b/src/sdk/pynni/nni/tuner.py
@@ -17,11 +17,10 @@
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
 # OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 # ==================================================================================================
 import logging
 import nni
 from .recoverable import Recoverable
 _logger = logging.getLogger(__name__)

--- a/tools/nni_annotation/.gitignore
+++ b/tools/nni_annotation/.gitignore
+_generated
--- a/tools/nni_annotation/test_annotation.py
+++ b/tools/nni_annotation/test_annotation.py
@@ -39,17 +39,18 @@ class AnnotationTestCase(TestCase):
            shutil.rmtree('_generated')
    def test_search_space_generator(self):
-        search_space = generate_search_space('testcase/annotated')
+        shutil.copytree('testcase/annotated', '_generated/annotated')
+        search_space = generate_search_space('_generated/annotated')
        with open('testcase/searchspace.json') as f:
            self.assertEqual(search_space, json.load(f))
    def test_code_generator(self):
-        code_dir = expand_annotations('testcase/usercode', '_generated', nas_mode='classic_mode')
+        code_dir = expand_annotations('testcase/usercode', '_generated/usercode', nas_mode='classic_mode')
-        self.assertEqual(code_dir, '_generated')
+        self.assertEqual(code_dir, '_generated/usercode')
-        self._assert_source_equal('testcase/annotated/nas.py', '_generated/nas.py')
+        self._assert_source_equal('testcase/annotated/nas.py', '_generated/usercode/nas.py')
-        self._assert_source_equal('testcase/annotated/mnist.py', '_generated/mnist.py')
+        self._assert_source_equal('testcase/annotated/mnist.py', '_generated/usercode/mnist.py')
-        self._assert_source_equal('testcase/annotated/dir/simple.py', '_generated/dir/simple.py')
+        self._assert_source_equal('testcase/annotated/dir/simple.py', '_generated/usercode/dir/simple.py')
-        with open('testcase/usercode/nonpy.txt') as src, open('_generated/nonpy.txt') as dst:
+        with open('testcase/usercode/nonpy.txt') as src, open('_generated/usercode/nonpy.txt') as dst:
            assert src.read() == dst.read()
    def test_annotation_detecting(self):

--- a/tools/nni_cmd/config_schema.py
+++ b/tools/nni_cmd/config_schema.py
@@ -142,6 +142,24 @@ tuner_schema_dict = {
        Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool), 
        Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
    },
+    'PPOTuner': {
+        'builtinTunerName': 'PPOTuner',
+        'classArgs': {
+            'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'),
+            Optional('trials_per_update'): setNumberRange('trials_per_update', int, 0, 99999),
+            Optional('epochs_per_update'): setNumberRange('epochs_per_update', int, 0, 99999),
+            Optional('minibatch_size'): setNumberRange('minibatch_size', int, 0, 99999),
+            Optional('ent_coef'): setType('ent_coef', float),
+            Optional('lr'): setType('lr', float),
+            Optional('vf_coef'): setType('vf_coef', float),
+            Optional('max_grad_norm'): setType('max_grad_norm', float),
+            Optional('gamma'): setType('gamma', float),
+            Optional('lam'): setType('lam', float),
+            Optional('cliprange'): setType('cliprange', float),
+        },
+        Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool),
+        Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
+    },
    'customized': {
        'codeDir': setPathCheck('codeDir'),
        'classFileName': setType('classFileName', str),