Merge pull request #241 from microsoft/master

merge master

Merge pull request #241 from microsoft/master
merge master
67287997 · SparkSnail · GitHub · b4773e1e · f8d42a33 · b4773e1e
Unverified Commit 67287997 authored Apr 16, 2020 by SparkSnail Committed by GitHub Apr 16, 2020
20 changed files
--- a/examples/trials/nas_cifar10/config_paiYarn_ppo.yml
+++ b/examples/trials/nas_cifar10/config_paiYarn_ppo.yml
-authorName: Unknown
-experimentName: enas_macro
-trialConcurrency: 20
-maxExecDuration: 2400h
-maxTrialNum: 20000
-#choice: local, remote
-trainingServicePlatform: paiYarn
-#choice: true, false
-useAnnotation: true
-multiPhase: false
-versionCheck: false
-nniManagerIp: 0.0.0.0
-tuner:
-  builtinTunerName: PPOTuner
-  classArgs:
-    optimize_mode: maximize
-    trials_per_update: 60
-    epochs_per_update: 20
-    minibatch_size: 6
-trial:
-  command: sh ./macro_cifar10_pai.sh
-  codeDir: ./
-  gpuNum: 1
-  cpuNum: 1
-  memoryMB: 8196
-  image: msranni/nni:latest
-  virtualCluster: nni
-paiYarnConfig:
-  userName: your_account
-  passWord: your_passwd
-  host: 0.0.0.0
--- a/examples/trials/nas_cifar10/config_pai_ppo.yml
+++ b/examples/trials/nas_cifar10/config_pai_ppo.yml
-authorName: Unknown
-experimentName: enas_macro
-trialConcurrency: 20
-maxExecDuration: 2400h
-maxTrialNum: 20000
-#choice: local, remote
-trainingServicePlatform: pai
-#choice: true, false
-useAnnotation: true
-multiPhase: false
-versionCheck: false
-nniManagerIp: 0.0.0.0
-tuner:
-  builtinTunerName: PPOTuner
-  classArgs:
-    optimize_mode: maximize
-    trials_per_update: 60
-    epochs_per_update: 20
-    minibatch_size: 6
-trial:
-  command: sh ./macro_cifar10_pai.sh
-  codeDir: ./
-  gpuNum: 1
-  cpuNum: 1
-  memoryMB: 8196
-  image: msranni/nni:latest
-  virtualCluster: nni
-  nniManagerNFSMountPath: /home/user/mnt
-  containerNFSMountPath: /mnt/data/user
-  paiStoragePlugin: team_wise
-paiConfig:
-  userName: your_account
-  token: your_token
-  host: 0.0.0.0
--- a/examples/trials/nas_cifar10/config_ppo.yml
+++ b/examples/trials/nas_cifar10/config_ppo.yml
-authorName: Unknown
-experimentName: enas_macro
-trialConcurrency: 4
-maxExecDuration: 2400h
-maxTrialNum: 20000
-#choice: local, remote
-trainingServicePlatform: local
-#choice: true, false
-useAnnotation: true
-multiPhase: false
-tuner:
-  builtinTunerName: PPOTuner
-  classArgs:
-    optimize_mode: maximize
-    trials_per_update: 60
-    epochs_per_update: 12
-    minibatch_size: 10
-  #could use the No. 0 gpu for this tuner
-  #if want to specify multiple gpus, here is an example of specifying three gpus: 0,1,2
-  gpuIndices: 0
-trial:
-  command: sh ./macro_cifar10.sh
-  codeDir: ./
-  gpuNum: 1
--- a/examples/trials/nas_cifar10/data/download.sh
+++ b/examples/trials/nas_cifar10/data/download.sh
-wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
-tar xzf cifar-10-python.tar.gz && mv cifar-10-batches-py cifar10
\ No newline at end of file
--- a/examples/trials/nas_cifar10/macro_cifar10.sh
+++ b/examples/trials/nas_cifar10/macro_cifar10.sh
-#!/bin/bash
-set -e
-export PYTHONPATH="$(pwd)"
-
-python3 src/cifar10/nni_child_cifar10.py \
-  --data_format="NCHW" \
-  --search_for="macro" \
-  --reset_output_dir \
-  --data_path="data/cifar10" \
-  --output_dir="outputs" \
-  --train_data_size=45000 \
-  --batch_size=100 \
-  --num_epochs=8 \
-  --log_every=50 \
-  --eval_every_epochs=1 \
-  --child_use_aux_heads \
-  --child_num_layers=12 \
-  --child_out_filters=36 \
-  --child_l2_reg=0.0002 \
-  --child_num_branches=6 \
-  --child_num_cell_layers=5 \
-  --child_keep_prob=0.50 \
-  --child_drop_path_keep_prob=0.60 \
-  --child_lr_cosine \
-  --child_lr_max=0.05 \
-  --child_lr_min=0.001 \
-  --child_lr_T_0=10 \
-  --child_lr_T_mul=2 \
-  --child_mode="subgraph" \
-  "$@"
-
--- a/examples/trials/nas_cifar10/macro_cifar10_pai.sh
+++ b/examples/trials/nas_cifar10/macro_cifar10_pai.sh
-#!/bin/bash
-set -e
-export PYTHONPATH="$(pwd)"
-
-python3 src/cifar10/nni_child_cifar10.py \
-  --data_format="NCHW" \
-  --search_for="macro" \
-  --reset_output_dir \
-  --data_path="data/cifar10" \
-  --output_dir="outputs" \
-  --train_data_size=45000 \
-  --batch_size=100 \
-  --num_epochs=30 \
-  --log_every=50 \
-  --eval_every_epochs=1 \
-  --child_use_aux_heads \
-  --child_num_layers=12 \
-  --child_out_filters=36 \
-  --child_l2_reg=0.0002 \
-  --child_num_branches=6 \
-  --child_num_cell_layers=5 \
-  --child_keep_prob=0.50 \
-  --child_drop_path_keep_prob=0.60 \
-  --child_lr_cosine \
-  --child_lr_max=0.05 \
-  --child_lr_min=0.001 \
-  --child_lr_T_0=10 \
-  --child_lr_T_mul=2 \
-  --child_mode="subgraph" \
-  "$@"
-
--- a/examples/trials/nas_cifar10/src/__init__.py
+++ b/examples/trials/nas_cifar10/src/__init__.py
--- a/examples/trials/nas_cifar10/src/cifar10/__init__.py
+++ b/examples/trials/nas_cifar10/src/cifar10/__init__.py
--- a/examples/trials/nas_cifar10/src/cifar10/data_utils.py
+++ b/examples/trials/nas_cifar10/src/cifar10/data_utils.py
-import os
-import sys
-import pickle
-import numpy as np
-import tensorflow as tf
-
-
-def _read_data(data_path, train_files):
-    """Reads CIFAR-10 format data. Always returns NHWC format.
-
-    Returns:
-        images: np tensor of size [N, H, W, C]
-        labels: np tensor of size [N]
-    """
-    images, labels = [], []
-    for file_name in train_files:
-        print(file_name)
-        full_name = os.path.join(data_path, file_name)
-        with open(full_name, "rb") as finp:
-            data = pickle.load(finp, encoding='latin1')
-            batch_images = data["data"].astype(np.float32) / 255.0
-            batch_labels = np.array(data["labels"], dtype=np.int32)
-            images.append(batch_images)
-            labels.append(batch_labels)
-    images = np.concatenate(images, axis=0)
-    labels = np.concatenate(labels, axis=0)
-    images = np.reshape(images, [-1, 3, 32, 32])
-    images = np.transpose(images, [0, 2, 3, 1])
-
-    return images, labels
-
-
-def read_data(data_path, num_valids=5000):
-    print("-" * 80)
-    print("Reading data")
-
-    images, labels = {}, {}
-
-    train_files = [
-        "data_batch_1",
-        "data_batch_2",
-        "data_batch_3",
-        "data_batch_4",
-        "data_batch_5",
-    ]
-    test_file = [
-        "test_batch",
-    ]
-    images["train"], labels["train"] = _read_data(data_path, train_files)
-
-    if num_valids:
-        images["valid"] = images["train"][-num_valids:]
-        labels["valid"] = labels["train"][-num_valids:]
-
-        images["train"] = images["train"][:-num_valids]
-        labels["train"] = labels["train"][:-num_valids]
-    else:
-        images["valid"], labels["valid"] = None, None
-
-    images["test"], labels["test"] = _read_data(data_path, test_file)
-
-    print("Prepropcess: [subtract mean], [divide std]")
-    mean = np.mean(images["train"], axis=(0, 1, 2), keepdims=True)
-    std = np.std(images["train"], axis=(0, 1, 2), keepdims=True)
-
-    print("mean: {}".format(np.reshape(mean * 255.0, [-1])))
-    print("std: {}".format(np.reshape(std * 255.0, [-1])))
-
-    images["train"] = (images["train"] - mean) / std
-    if num_valids:
-        images["valid"] = (images["valid"] - mean) / std
-    images["test"] = (images["test"] - mean) / std
-
-    return images, labels
--- a/examples/trials/nas_cifar10/src/cifar10/general_child.py
+++ b/examples/trials/nas_cifar10/src/cifar10/general_child.py
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import numpy as np
-import tensorflow as tf
-from src.common_ops import create_weight, batch_norm, batch_norm_with_mask, global_avg_pool, conv_op, pool_op
-from src.utils import count_model_params, get_train_ops, get_C, get_strides
-from src.cifar10.models import Model
-
-
-class GeneralChild(Model):
-    def __init__(self,
-                 images,
-                 labels,
-                 cutout_size=None,
-                 fixed_arc=None,
-                 out_filters_scale=1,
-                 num_layers=2,
-                 num_branches=6,
-                 out_filters=24,
-                 keep_prob=1.0,
-                 batch_size=32,
-                 clip_mode=None,
-                 grad_bound=None,
-                 l2_reg=1e-4,
-                 lr_init=0.1,
-                 lr_dec_start=0,
-                 lr_dec_every=10000,
-                 lr_dec_rate=0.1,
-                 lr_cosine=False,
-                 lr_max=None,
-                 lr_min=None,
-                 lr_T_0=None,
-                 lr_T_mul=None,
-                 optim_algo=None,
-                 sync_replicas=False,
-                 num_aggregate=None,
-                 num_replicas=None,
-                 data_format="NHWC",
-                 name="child",
-                 mode="subgraph",
-                 *args,
-                 **kwargs
-                 ):
-
-        super(self.__class__, self).__init__(
-            images,
-            labels,
-            cutout_size=cutout_size,
-            batch_size=batch_size,
-            clip_mode=clip_mode,
-            grad_bound=grad_bound,
-            l2_reg=l2_reg,
-            lr_init=lr_init,
-            lr_dec_start=lr_dec_start,
-            lr_dec_every=lr_dec_every,
-            lr_dec_rate=lr_dec_rate,
-            keep_prob=keep_prob,
-            optim_algo=optim_algo,
-            sync_replicas=sync_replicas,
-            num_aggregate=num_aggregate,
-            num_replicas=num_replicas,
-            data_format=data_format,
-            name=name)
-
-        self.lr_cosine = lr_cosine
-        self.lr_max = lr_max
-        self.lr_min = lr_min
-        self.lr_T_0 = lr_T_0
-        self.lr_T_mul = lr_T_mul
-        self.out_filters = out_filters * out_filters_scale
-        self.num_layers = num_layers
-        self.mode = mode
-
-        self.num_branches = num_branches
-        self.fixed_arc = fixed_arc
-        self.out_filters_scale = out_filters_scale
-
-        pool_distance = self.num_layers // 3
-        self.pool_layers = [pool_distance - 1, 2 * pool_distance - 1]
-
-
-
-    def _factorized_reduction(self, x, out_filters, stride, is_training):
-        """Reduces the shape of x without information loss due to striding."""
-        assert out_filters % 2 == 0, (
-            "Need even number of filters when using this factorized reduction.")
-        if stride == 1:
-            with tf.variable_scope("path_conv"):
-                inp_c = get_C(x, self.data_format)
-                w = create_weight("w", [1, 1, inp_c, out_filters])
-                x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME",
-                                 data_format=self.data_format)
-                x = batch_norm(x, is_training, data_format=self.data_format)
-                return x
-
-        stride_spec = get_strides(stride, self.data_format)
-        # Skip path 1
-        path1 = tf.nn.avg_pool(
-            x, [1, 1, 1, 1], stride_spec, "VALID", data_format=self.data_format)
-        with tf.variable_scope("path1_conv"):
-            inp_c = get_C(path1, self.data_format)
-            w = create_weight("w", [1, 1, inp_c, out_filters // 2])
-            path1 = tf.nn.conv2d(path1, w, [1, 1, 1, 1], "SAME",
-                                 data_format=self.data_format)
-
-        # Skip path 2
-        # First pad with 0"s on the right and bottom, then shift the filter to
-        # include those 0"s that were added.
-        if self.data_format == "NHWC":
-            pad_arr = [[0, 0], [0, 1], [0, 1], [0, 0]]
-            path2 = tf.pad(x, pad_arr)[:, 1:, 1:, :]
-            concat_axis = 3
-        else:
-            pad_arr = [[0, 0], [0, 0], [0, 1], [0, 1]]
-            path2 = tf.pad(x, pad_arr)[:, :, 1:, 1:]
-            concat_axis = 1
-
-        path2 = tf.nn.avg_pool(
-            path2, [1, 1, 1, 1], stride_spec, "VALID", data_format=self.data_format)
-        with tf.variable_scope("path2_conv"):
-            inp_c = get_C(path2, self.data_format)
-            w = create_weight("w", [1, 1, inp_c, out_filters // 2])
-            path2 = tf.nn.conv2d(path2, w, [1, 1, 1, 1], "SAME",
-                                 data_format=self.data_format)
-
-        # Concat and apply BN
-        final_path = tf.concat(values=[path1, path2], axis=concat_axis)
-        final_path = batch_norm(final_path, is_training,
-                                data_format=self.data_format)
-
-        return final_path
-
-    def _model(self, images, is_training, reuse=False):
-        '''Build model'''
-        with tf.variable_scope(self.name, reuse=reuse):
-            layers = []
-
-            out_filters = self.out_filters
-            with tf.variable_scope("stem_conv"):
-                w = create_weight("w", [3, 3, 3, out_filters])
-                x = tf.nn.conv2d(
-                    images, w, [1, 1, 1, 1], "SAME", data_format=self.data_format)
-                x = batch_norm(x, is_training, data_format=self.data_format)
-                layers.append(x)
-
-            def add_fixed_pooling_layer(layer_id, layers, out_filters, is_training):
-                '''Add a fixed pooling layer every four layers'''
-                out_filters *= 2
-                with tf.variable_scope("pool_at_{0}".format(layer_id)):
-                    pooled_layers = []
-                    for i, layer in enumerate(layers):
-                        with tf.variable_scope("from_{0}".format(i)):
-                            x = self._factorized_reduction(
-                                layer, out_filters, 2, is_training)
-                        pooled_layers.append(x)
-                    return pooled_layers, out_filters
-
-            def post_process_out(out, optional_inputs):
-                '''Form skip connection and perform batch norm'''
-                with tf.variable_scope("skip"):
-                    inputs = layers[-1]
-                    if self.data_format == "NHWC":
-                        inp_h = inputs.get_shape()[1].value
-                        inp_w = inputs.get_shape()[2].value
-                        inp_c = inputs.get_shape()[3].value
-                        out.set_shape([None, inp_h, inp_w, out_filters])
-                    elif self.data_format == "NCHW":
-                        inp_c = inputs.get_shape()[1].value
-                        inp_h = inputs.get_shape()[2].value
-                        inp_w = inputs.get_shape()[3].value
-                        out.set_shape([None, out_filters, inp_h, inp_w])
-                    optional_inputs.append(out)
-                    pout = tf.add_n(optional_inputs)
-                    out = batch_norm(pout, is_training,
-                                     data_format=self.data_format)
-                layers.append(out)
-                return out
-
-            global layer_id
-            layer_id = -1
-
-            def get_layer_id():
-                global layer_id
-                layer_id += 1
-                return 'layer_' + str(layer_id)
-
-            def conv3(inputs):
-                # res_layers is pre_layers that are chosen to form skip connection
-                # layers[-1] is always the latest input
-                with tf.variable_scope(get_layer_id()):
-                    with tf.variable_scope('branch_0'):
-                        out = conv_op(
-                            inputs[0][0], 3, is_training, out_filters, out_filters, self.data_format, start_idx=None)
-                    out = post_process_out(out, inputs[1])
-                return out
-
-            def conv3_sep(inputs):
-                with tf.variable_scope(get_layer_id()):
-                    with tf.variable_scope('branch_1'):
-                        out = conv_op(
-                            inputs[0][0], 3, is_training, out_filters, out_filters, self.data_format, start_idx=None, separable=True)
-                    out = post_process_out(out, inputs[1])
-                return out
-
-            def conv5(inputs):
-                with tf.variable_scope(get_layer_id()):
-                    with tf.variable_scope('branch_2'):
-                        out = conv_op(
-                            inputs[0][0], 5, is_training, out_filters, out_filters, self.data_format, start_idx=None)
-                    out = post_process_out(out, inputs[1])
-                return out
-
-            def conv5_sep(inputs):
-                with tf.variable_scope(get_layer_id()):
-                    with tf.variable_scope('branch_3'):
-                        out = conv_op(
-                            inputs[0][0], 5, is_training, out_filters, out_filters, self.data_format, start_idx=None, separable=True)
-                    out = post_process_out(out, inputs[1])
-                return out
-
-            def avg_pool(inputs):
-                with tf.variable_scope(get_layer_id()):
-                    with tf.variable_scope('branch_4'):
-                        out = pool_op(
-                            inputs[0][0], is_training, out_filters, out_filters, "avg", self.data_format, start_idx=None)
-                    out = post_process_out(out, inputs[1])
-                return out
-
-            def max_pool(inputs):
-                with tf.variable_scope(get_layer_id()):
-                    with tf.variable_scope('branch_5'):
-                        out = pool_op(
-                            inputs[0][0], is_training, out_filters, out_filters, "max", self.data_format, start_idx=None)
-                    out = post_process_out(out, inputs[1])
-                return out
-
-            """@nni.mutable_layers(
-            {
-                layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
-                fixed_inputs:[x],
-                layer_output: layer_0_out
-            },
-            {
-                layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
-                fixed_inputs:[layer_0_out],
-                optional_inputs: [layer_0_out],
-                optional_input_size: [0, 1],
-                layer_output: layer_1_out
-            },
-            {
-                layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
-                fixed_inputs:[layer_1_out],
-                optional_inputs: [layer_0_out, layer_1_out],
-                optional_input_size: [0, 1],
-                layer_output: layer_2_out
-            },
-            {
-                layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
-                fixed_inputs:[layer_2_out],
-                optional_inputs: [layer_0_out, layer_1_out, layer_2_out],
-                optional_input_size: [0, 1],
-                layer_output: layer_3_out
-            }
-            )"""
-            layers, out_filters = add_fixed_pooling_layer(
-                3, layers, out_filters, is_training)
-            layer_0_out, layer_1_out, layer_2_out, layer_3_out = layers[-4:]
-            """@nni.mutable_layers(
-            {
-                layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
-                fixed_inputs: [layer_3_out],
-                optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out],
-                optional_input_size: [0, 1],
-                layer_output: layer_4_out
-            },
-            {
-                layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
-                fixed_inputs: [layer_4_out],
-                optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out],
-                optional_input_size: [0, 1],
-                layer_output: layer_5_out
-            },
-            {
-                layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
-                fixed_inputs: [layer_5_out],
-                optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out],
-                optional_input_size: [0, 1],
-                layer_output: layer_6_out
-            },
-            {
-                layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
-                fixed_inputs: [layer_6_out],
-                optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out],
-                optional_input_size: [0, 1],
-                layer_output: layer_7_out
-            }
-            )"""
-            layers, out_filters = add_fixed_pooling_layer(
-                7, layers, out_filters, is_training)
-            layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out = layers[
-                -8:]
-            """@nni.mutable_layers(
-            {
-                layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
-                fixed_inputs: [layer_7_out],
-                optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out],
-                optional_input_size: [0, 1],
-                layer_output: layer_8_out
-            },
-            {
-                layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
-                fixed_inputs: [layer_8_out],
-                optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out],
-                optional_input_size: [0, 1],
-                layer_output: layer_9_out
-            },
-            {
-                layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
-                fixed_inputs: [layer_9_out],
-                optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out],
-                optional_input_size: [0, 1],
-                layer_output: layer_10_out
-            },
-            {
-                layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
-                fixed_inputs:[layer_10_out],
-                optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out, layer_10_out],
-                optional_input_size: [0, 1],
-                layer_output: layer_11_out
-            }
-            )"""
-
-            x = global_avg_pool(layer_11_out, data_format=self.data_format)
-            if is_training:
-                x = tf.nn.dropout(x, self.keep_prob)
-            with tf.variable_scope("fc"):
-                if self.data_format == "NHWC":
-                    inp_c = x.get_shape()[3].value
-                elif self.data_format == "NCHW":
-                    inp_c = x.get_shape()[1].value
-                else:
-                    raise ValueError(
-                        "Unknown data_format {0}".format(self.data_format))
-                w = create_weight("w", [inp_c, 10])
-                x = tf.matmul(x, w)
-        return x
-
-
-    # override
-    def _build_train(self):
-        print("-" * 80)
-        print("Build train graph")
-        logits = self._model(self.x_train, is_training=True)
-        log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits(
-            logits=logits, labels=self.y_train)
-        self.loss = tf.reduce_mean(log_probs)
-
-        self.train_preds = tf.argmax(logits, axis=1)
-        self.train_preds = tf.to_int32(self.train_preds)
-        self.train_acc = tf.equal(self.train_preds, self.y_train)
-        self.train_acc = tf.to_int32(self.train_acc)
-        self.train_acc = tf.reduce_sum(self.train_acc)
-
-        tf_variables = [var
-                        for var in tf.trainable_variables() if var.name.startswith(self.name)]
-        self.num_vars = count_model_params(tf_variables)
-        print("Model has {} params".format(self.num_vars))
-
-        self.global_step = tf.Variable(
-            0, dtype=tf.int32, trainable=False, name="global_step")
-
-        self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
-            self.loss,
-            tf_variables,
-            self.global_step,
-            clip_mode=self.clip_mode,
-            grad_bound=self.grad_bound,
-            l2_reg=self.l2_reg,
-            lr_init=self.lr_init,
-            lr_dec_start=self.lr_dec_start,
-            lr_dec_every=self.lr_dec_every,
-            lr_dec_rate=self.lr_dec_rate,
-            lr_cosine=self.lr_cosine,
-            lr_max=self.lr_max,
-            lr_min=self.lr_min,
-            lr_T_0=self.lr_T_0,
-            lr_T_mul=self.lr_T_mul,
-            num_train_batches=self.num_train_batches,
-            optim_algo=self.optim_algo,
-            sync_replicas=False,
-            num_aggregate=self.num_aggregate,
-            num_replicas=self.num_replicas)
-
-    # override
-    def _build_valid(self):
-        if self.x_valid is not None:
-            print("-" * 80)
-            print("Build valid graph")
-            logits = self._model(self.x_valid, False, reuse=True)
-            self.valid_preds = tf.argmax(logits, axis=1)
-            self.valid_preds = tf.to_int32(self.valid_preds)
-            self.valid_acc = tf.equal(self.valid_preds, self.y_valid)
-            self.valid_acc = tf.to_int32(self.valid_acc)
-            self.valid_acc = tf.reduce_sum(self.valid_acc)
-
-    # override
-    def _build_test(self):
-        print("-" * 80)
-        print("Build test graph")
-        logits = self._model(self.x_test, False, reuse=True)
-        self.test_preds = tf.argmax(logits, axis=1)
-        self.test_preds = tf.to_int32(self.test_preds)
-        self.test_acc = tf.equal(self.test_preds, self.y_test)
-        self.test_acc = tf.to_int32(self.test_acc)
-        self.test_acc = tf.reduce_sum(self.test_acc)
-
-
-    def build_model(self):
-
-        self._build_train()
-        self._build_valid()
-        self._build_test()
--- a/examples/trials/nas_cifar10/src/cifar10/models.py
+++ b/examples/trials/nas_cifar10/src/cifar10/models.py
-import os
-import sys
-
-import numpy as np
-import tensorflow as tf
-
-
-class Model(object):
-    def __init__(self,
-                 images,
-                 labels,
-                 cutout_size=None,
-                 batch_size=32,
-                 eval_batch_size=100,
-                 clip_mode=None,
-                 grad_bound=None,
-                 l2_reg=1e-4,
-                 lr_init=0.1,
-                 lr_dec_start=0,
-                 lr_dec_every=100,
-                 lr_dec_rate=0.1,
-                 keep_prob=1.0,
-                 optim_algo=None,
-                 sync_replicas=False,
-                 num_aggregate=None,
-                 num_replicas=None,
-                 data_format="NHWC",
-                 name="generic_model",
-                 seed=None,
-                 ):
-        """
-        Args:
-                lr_dec_every: number of epochs to decay
-        """
-        print("-" * 80)
-        print("Build model {}".format(name))
-
-        self.cutout_size = cutout_size
-        self.batch_size = batch_size
-        self.eval_batch_size = eval_batch_size
-        self.clip_mode = clip_mode
-        self.grad_bound = grad_bound
-        self.l2_reg = l2_reg
-        self.lr_init = lr_init
-        self.lr_dec_start = lr_dec_start
-        self.lr_dec_rate = lr_dec_rate
-        self.keep_prob = keep_prob
-        self.optim_algo = optim_algo
-        self.sync_replicas = sync_replicas
-        self.num_aggregate = num_aggregate
-        self.num_replicas = num_replicas
-        self.data_format = data_format
-        self.name = name
-        self.seed = seed
-
-        self.global_step = None
-        self.valid_acc = None
-        self.test_acc = None
-        print("Build data ops")
-        with tf.device("/cpu:0"):
-            # training data
-            self.num_train_examples = np.shape(images["train"])[0]
-
-            self.num_train_batches = (
-                self.num_train_examples + self.batch_size - 1) // self.batch_size
-            x_train, y_train = tf.train.shuffle_batch(
-                [images["train"], labels["train"]],
-                batch_size=self.batch_size,
-                capacity=50000,
-                enqueue_many=True,
-                min_after_dequeue=0,
-                num_threads=16,
-                seed=self.seed,
-                allow_smaller_final_batch=True,
-            )
-            self.lr_dec_every = lr_dec_every * self.num_train_batches
-
-            def _pre_process(x):
-                x = tf.pad(x, [[4, 4], [4, 4], [0, 0]])
-                x = tf.random_crop(x, [32, 32, 3], seed=self.seed)
-                x = tf.image.random_flip_left_right(x, seed=self.seed)
-                if self.cutout_size is not None:
-                    mask = tf.ones(
-                        [self.cutout_size, self.cutout_size], dtype=tf.int32)
-                    start = tf.random_uniform(
-                        [2], minval=0, maxval=32, dtype=tf.int32)
-                    mask = tf.pad(mask, [[self.cutout_size + start[0], 32 - start[0]],
-                                         [self.cutout_size + start[1], 32 - start[1]]])
-                    mask = mask[self.cutout_size: self.cutout_size + 32,
-                                self.cutout_size: self.cutout_size + 32]
-                    mask = tf.reshape(mask, [32, 32, 1])
-                    mask = tf.tile(mask, [1, 1, 3])
-                    x = tf.where(tf.equal(mask, 0), x=x, y=tf.zeros_like(x))
-                if self.data_format == "NCHW":
-                    x = tf.transpose(x, [2, 0, 1])
-
-                return x
-            self.x_train = tf.map_fn(_pre_process, x_train, back_prop=False)
-            self.y_train = y_train
-
-            # valid data
-            self.x_valid, self.y_valid = None, None
-            if images["valid"] is not None:
-                images["valid_original"] = np.copy(images["valid"])
-                labels["valid_original"] = np.copy(labels["valid"])
-                if self.data_format == "NCHW":
-                    images["valid"] = tf.transpose(
-                        images["valid"], [0, 3, 1, 2])
-                self.num_valid_examples = np.shape(images["valid"])[0]
-                self.num_valid_batches = (
-                    (self.num_valid_examples + self.eval_batch_size - 1)
-                    // self.eval_batch_size)
-                self.x_valid, self.y_valid = tf.train.batch(
-                    [images["valid"], labels["valid"]],
-                    batch_size=self.eval_batch_size,
-                    capacity=5000,
-                    enqueue_many=True,
-                    num_threads=1,
-                    allow_smaller_final_batch=True,
-                )
-
-            # test data
-            if self.data_format == "NCHW":
-                images["test"] = tf.transpose(images["test"], [0, 3, 1, 2])
-            self.num_test_examples = np.shape(images["test"])[0]
-            self.num_test_batches = (
-                (self.num_test_examples + self.eval_batch_size - 1)
-                // self.eval_batch_size)
-            self.x_test, self.y_test = tf.train.batch(
-                [images["test"], labels["test"]],
-                batch_size=self.eval_batch_size,
-                capacity=10000,
-                enqueue_many=True,
-                num_threads=1,
-                allow_smaller_final_batch=True,
-            )
-
-        # cache images and labels
-        self.images = images
-        self.labels = labels
-
-    def eval_once(self, sess, eval_set, child_model, verbose=False):
-        """Expects self.acc and self.global_step to be defined.
-
-        Args:
-                sess: tf.Session() or one of its wrap arounds.
-                feed_dict: can be used to give more information to sess.run().
-                eval_set: "valid" or "test"
-        """
-
-        assert self.global_step is not None
-        global_step = sess.run(self.global_step)
-        print("Eval at {}".format(global_step))
-
-        if eval_set == "valid":
-            assert self.x_valid is not None
-            assert self.valid_acc is not None
-            num_examples = self.num_valid_examples
-            num_batches = self.num_valid_batches
-            acc_op = self.valid_acc
-        elif eval_set == "test":
-            assert self.test_acc is not None
-            num_examples = self.num_test_examples
-            num_batches = self.num_test_batches
-            acc_op = self.test_acc
-        else:
-            raise NotImplementedError("Unknown eval_set '{}'".format(eval_set))
-
-        total_acc = 0
-        total_exp = 0
-
-        for batch_id in range(num_batches):
-            acc = sess.run(acc_op)
-
-            total_acc += acc
-            total_exp += self.eval_batch_size
-            if verbose:
-                sys.stdout.write(
-                    "\r{:<5d}/{:>5d}".format(total_acc, total_exp))
-        if verbose:
-            print("")
-        print("{}_accuracy: {:<6.4f}".format(
-            eval_set, float(total_acc) / total_exp))
-        return float(total_acc) / total_exp
-
-    def _model(self, images, is_training, reuse=None):
-        raise NotImplementedError("Abstract method")
-
-    def _build_train(self):
-        raise NotImplementedError("Abstract method")
-
-    def _build_valid(self):
-        raise NotImplementedError("Abstract method")
-
-    def _build_test(self):
-        raise NotImplementedError("Abstract method")
--- a/examples/trials/nas_cifar10/src/cifar10/nni_child_cifar10.py
+++ b/examples/trials/nas_cifar10/src/cifar10/nni_child_cifar10.py
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import os
-import shutil
-import logging
-import tensorflow as tf
-from src.cifar10.data_utils import read_data
-from src.cifar10.general_child import GeneralChild
-import src.cifar10_flags
-from src.cifar10_flags import FLAGS
-
-
-def build_logger(log_name):
-    logger = logging.getLogger(log_name)
-    logger.setLevel(logging.DEBUG)
-    fh = logging.FileHandler(log_name+'.log')
-    fh.setLevel(logging.DEBUG)
-    logger.addHandler(fh)
-    return logger
-
-
-logger = build_logger("nni_child_cifar10")
-
-
-def build_trial(images, labels, ChildClass):
-    '''Build child class'''
-    child_model = ChildClass(
-        images,
-        labels,
-        use_aux_heads=FLAGS.child_use_aux_heads,
-        cutout_size=FLAGS.child_cutout_size,
-        num_layers=FLAGS.child_num_layers,
-        num_cells=FLAGS.child_num_cells,
-        num_branches=FLAGS.child_num_branches,
-        fixed_arc=FLAGS.child_fixed_arc,
-        out_filters_scale=FLAGS.child_out_filters_scale,
-        out_filters=FLAGS.child_out_filters,
-        keep_prob=FLAGS.child_keep_prob,
-        drop_path_keep_prob=FLAGS.child_drop_path_keep_prob,
-        num_epochs=FLAGS.num_epochs,
-        l2_reg=FLAGS.child_l2_reg,
-        data_format=FLAGS.data_format,
-        batch_size=FLAGS.batch_size,
-        clip_mode="norm",
-        grad_bound=FLAGS.child_grad_bound,
-        lr_init=FLAGS.child_lr,
-        lr_dec_every=FLAGS.child_lr_dec_every,
-        lr_dec_rate=FLAGS.child_lr_dec_rate,
-        lr_cosine=FLAGS.child_lr_cosine,
-        lr_max=FLAGS.child_lr_max,
-        lr_min=FLAGS.child_lr_min,
-        lr_T_0=FLAGS.child_lr_T_0,
-        lr_T_mul=FLAGS.child_lr_T_mul,
-        optim_algo="momentum",
-        sync_replicas=FLAGS.child_sync_replicas,
-        num_aggregate=FLAGS.child_num_aggregate,
-        num_replicas=FLAGS.child_num_replicas
-    )
-
-    return child_model
-
-
-def get_child_ops(child_model):
-    '''Assemble child op to a dict'''
-    child_ops = {
-        "global_step": child_model.global_step,
-        "loss": child_model.loss,
-        "train_op": child_model.train_op,
-        "lr": child_model.lr,
-        "grad_norm": child_model.grad_norm,
-        "train_acc": child_model.train_acc,
-        "optimizer": child_model.optimizer,
-        "num_train_batches": child_model.num_train_batches,
-        "eval_every": child_model.num_train_batches * FLAGS.eval_every_epochs,
-        "eval_func": child_model.eval_once,
-    }
-    return child_ops
-
-
-class NASTrial():
-
-    def __init__(self):
-        images, labels = read_data(FLAGS.data_path, num_valids=0)
-
-        self.output_dir = os.path.join(os.getenv('NNI_OUTPUT_DIR'), '../..')
-        self.file_path = os.path.join(
-            self.output_dir, 'trainable_variable.txt')
-
-        self.graph = tf.Graph()
-        with self.graph.as_default():
-            self.child_model = build_trial(images, labels, GeneralChild)
-
-            self.total_data = {}
-
-            self.child_model.build_model()
-            self.child_ops = get_child_ops(self.child_model)
-            config = tf.ConfigProto(
-                intra_op_parallelism_threads=0,
-                inter_op_parallelism_threads=0,
-                allow_soft_placement=True)
-
-            self.sess = tf.train.SingularMonitoredSession(config=config)
-
-        logger.debug('initlize NASTrial done.')
-
-    def run_one_step(self):
-        '''Run this model on a batch of data'''
-        run_ops = [
-            self.child_ops["loss"],
-            self.child_ops["lr"],
-            self.child_ops["grad_norm"],
-            self.child_ops["train_acc"],
-            self.child_ops["train_op"],
-        ]
-        loss, lr, gn, tr_acc, _ = self.sess.run(run_ops)
-        global_step = self.sess.run(self.child_ops["global_step"])
-        log_string = ""
-        log_string += "ch_step={:<6d}".format(global_step)
-        log_string += " loss={:<8.6f}".format(loss)
-        log_string += " lr={:<8.4f}".format(lr)
-        log_string += " |g|={:<8.4f}".format(gn)
-        log_string += " tr_acc={:<3d}/{:>3d}".format(tr_acc, FLAGS.batch_size)
-        if int(global_step) % FLAGS.log_every == 0:
-            logger.debug(log_string)
-        return loss, global_step
-
-    def run(self):
-        '''Run this model according to the `epoch` set in FALGS'''
-        max_acc = 0
-        while True:
-            _, global_step = self.run_one_step()
-            if global_step % self.child_ops['num_train_batches'] == 0:
-                acc = self.child_ops["eval_func"](
-                    self.sess, "test", self.child_model)
-                max_acc = max(max_acc, acc)
-                '''@nni.report_intermediate_result(acc)'''
-            if global_step / self.child_ops['num_train_batches'] >= FLAGS.num_epochs:
-                '''@nni.report_final_result(max_acc)'''
-                break
-
-
-def main(_):
-    logger.debug("-" * 80)
-
-    if not os.path.isdir(FLAGS.output_dir):
-        logger.debug(
-            "Path {} does not exist. Creating.".format(FLAGS.output_dir))
-        os.makedirs(FLAGS.output_dir)
-    elif FLAGS.reset_output_dir:
-        logger.debug(
-            "Path {} exists. Remove and remake.".format(FLAGS.output_dir))
-        shutil.rmtree(FLAGS.output_dir)
-        os.makedirs(FLAGS.output_dir)
-    logger.debug("-" * 80)
-    trial = NASTrial()
-
-    trial.run()
-
-
-if __name__ == "__main__":
-    tf.app.run()
--- a/examples/trials/nas_cifar10/src/cifar10_flags.py
+++ b/examples/trials/nas_cifar10/src/cifar10_flags.py
-import tensorflow as tf
-from src.utils import DEFINE_boolean
-from src.utils import DEFINE_float
-from src.utils import DEFINE_integer
-from src.utils import DEFINE_string
-flags = tf.app.flags
-FLAGS = flags.FLAGS
-
-DEFINE_boolean("reset_output_dir", False, "Delete output_dir if exists.")
-DEFINE_string("data_path", "", "")
-DEFINE_string("output_dir", "", "")
-DEFINE_string("data_format", "NHWC", "'NHWC' or 'NCWH'")
-DEFINE_string("search_for", None, "Must be [macro|micro]")
-DEFINE_integer("train_data_size", 45000, "")
-DEFINE_integer("batch_size", 32, "")
-
-DEFINE_integer("num_epochs", 300, "")
-DEFINE_integer("child_lr_dec_every", 100, "")
-DEFINE_integer("child_num_layers", 5, "")
-DEFINE_integer("child_num_cells", 5, "")
-DEFINE_integer("child_filter_size", 5, "")
-DEFINE_integer("child_out_filters", 48, "")
-DEFINE_integer("child_out_filters_scale", 1, "")
-DEFINE_integer("child_num_branches", 4, "")
-DEFINE_integer("child_num_aggregate", None, "")
-DEFINE_integer("child_num_replicas", 1, "")
-DEFINE_integer("child_block_size", 3, "")
-DEFINE_integer("child_lr_T_0", None, "for lr schedule")
-DEFINE_integer("child_lr_T_mul", None, "for lr schedule")
-DEFINE_integer("child_cutout_size", None, "CutOut size")
-DEFINE_float("child_grad_bound", 5.0, "Gradient clipping")
-DEFINE_float("child_lr", 0.1, "")
-DEFINE_float("child_lr_dec_rate", 0.1, "")
-DEFINE_float("child_keep_prob", 0.5, "")
-DEFINE_float("child_drop_path_keep_prob", 1.0, "minimum drop_path_keep_prob")
-DEFINE_float("child_l2_reg", 1e-4, "")
-DEFINE_float("child_lr_max", None, "for lr schedule")
-DEFINE_float("child_lr_min", None, "for lr schedule")
-DEFINE_string("child_skip_pattern", None, "Must be ['dense', None]")
-DEFINE_string("child_fixed_arc", None, "")
-DEFINE_boolean("child_use_aux_heads", False, "Should we use an aux head")
-DEFINE_boolean("child_sync_replicas", False, "To sync or not to sync.")
-DEFINE_boolean("child_lr_cosine", False, "Use cosine lr schedule")
-DEFINE_integer("log_every", 50, "How many steps to log")
-DEFINE_integer("eval_every_epochs", 1, "How many epochs to eval")
--- a/examples/trials/nas_cifar10/src/common_ops.py
+++ b/examples/trials/nas_cifar10/src/common_ops.py
-import numpy as np
-import tensorflow as tf
-from tensorflow.python.training import moving_averages
-
-
-def lstm(x, prev_c, prev_h, w):
-    ifog = tf.matmul(tf.concat([x, prev_h], axis=1), w)
-    i, f, o, g = tf.split(ifog, 4, axis=1)
-    i = tf.sigmoid(i)
-    f = tf.sigmoid(f)
-    o = tf.sigmoid(o)
-    g = tf.tanh(g)
-    next_c = i * g + f * prev_c
-    next_h = o * tf.tanh(next_c)
-    return next_c, next_h
-
-
-def stack_lstm(x, prev_c, prev_h, w):
-    next_c, next_h = [], []
-    for layer_id, (_c, _h, _w) in enumerate(zip(prev_c, prev_h, w)):
-        inputs = x if layer_id == 0 else next_h[-1]
-        curr_c, curr_h = lstm(inputs, _c, _h, _w)
-        next_c.append(curr_c)
-        next_h.append(curr_h)
-    return next_c, next_h
-
-
-def create_weight(name, shape, initializer=None, trainable=True, seed=None):
-    if initializer is None:
-        initializer = tf.contrib.keras.initializers.he_normal(seed=seed)
-    return tf.get_variable(name, shape, initializer=initializer, trainable=trainable)
-
-
-def create_bias(name, shape, initializer=None):
-    if initializer is None:
-        initializer = tf.constant_initializer(0.0, dtype=tf.float32)
-    return tf.get_variable(name, shape, initializer=initializer)
-
-
-def conv_op(inputs, filter_size, is_training, count, out_filters,
-                     data_format, ch_mul=1, start_idx=None, separable=False):
-    """
-    Args:
-        start_idx: where to start taking the output channels. if None, assuming
-            fixed_arc mode
-        count: how many output_channels to take.
-    """
-
-    if data_format == "NHWC":
-        inp_c = inputs.get_shape()[3].value
-    elif data_format == "NCHW":
-        inp_c = inputs.get_shape()[1].value
-
-    with tf.variable_scope("inp_conv_1"):
-        w = create_weight("w", [1, 1, inp_c, out_filters])
-        x = tf.nn.conv2d(inputs, w, [1, 1, 1, 1],
-                            "SAME", data_format=data_format)
-        x = batch_norm(x, is_training, data_format=data_format)
-        x = tf.nn.relu(x)
-
-    with tf.variable_scope("out_conv_{}".format(filter_size)):
-        if start_idx is None:
-            if separable:
-                w_depth = create_weight(
-                    "w_depth", [filter_size, filter_size, out_filters, ch_mul])
-                w_point = create_weight(
-                    "w_point", [1, 1, out_filters * ch_mul, count])
-                x = tf.nn.separable_conv2d(x, w_depth, w_point, strides=[1, 1, 1, 1],
-                                            padding="SAME", data_format=data_format)
-                x = batch_norm(
-                    x, is_training, data_format=data_format)
-            else:
-                w = create_weight(
-                    "w", [filter_size, filter_size, inp_c, count])
-                x = tf.nn.conv2d(
-                    x, w, [1, 1, 1, 1], "SAME", data_format=data_format)
-                x = batch_norm(
-                    x, is_training, data_format=data_format)
-        else:
-            if separable:
-                w_depth = create_weight(
-                    "w_depth", [filter_size, filter_size, out_filters, ch_mul])
-                #test_depth = w_depth
-                w_point = create_weight(
-                    "w_point", [out_filters, out_filters * ch_mul])
-                w_point = w_point[start_idx:start_idx+count, :]
-                w_point = tf.transpose(w_point, [1, 0])
-                w_point = tf.reshape(
-                    w_point, [1, 1, out_filters * ch_mul, count])
-
-                x = tf.nn.separable_conv2d(x, w_depth, w_point, strides=[1, 1, 1, 1],
-                                            padding="SAME", data_format=data_format)
-                mask = tf.range(0, out_filters, dtype=tf.int32)
-                mask = tf.logical_and(
-                    start_idx <= mask, mask < start_idx + count)
-                x = batch_norm_with_mask(
-                    x, is_training, mask, out_filters, data_format=data_format)
-            else:
-                w = create_weight(
-                    "w", [filter_size, filter_size, out_filters, out_filters])
-                w = tf.transpose(w, [3, 0, 1, 2])
-                w = w[start_idx:start_idx+count, :, :, :]
-                w = tf.transpose(w, [1, 2, 3, 0])
-                x = tf.nn.conv2d(
-                    x, w, [1, 1, 1, 1], "SAME", data_format=data_format)
-                mask = tf.range(0, out_filters, dtype=tf.int32)
-                mask = tf.logical_and(
-                    start_idx <= mask, mask < start_idx + count)
-                x = batch_norm_with_mask(
-                    x, is_training, mask, out_filters, data_format=data_format)
-        x = tf.nn.relu(x)
-    return x
-
-def pool_op(inputs, is_training, count, out_filters, avg_or_max, data_format, start_idx=None):
-    """
-    Args:
-        start_idx: where to start taking the output channels. if None, assuming
-            fixed_arc mode
-        count: how many output_channels to take.
-    """
-
-    if data_format == "NHWC":
-        inp_c = inputs.get_shape()[3].value
-    elif data_format == "NCHW":
-        inp_c = inputs.get_shape()[1].value
-
-    with tf.variable_scope("conv_1"):
-        w = create_weight("w", [1, 1, inp_c, out_filters])
-        x = tf.nn.conv2d(inputs, w, [1, 1, 1, 1],
-                            "SAME", data_format=data_format)
-        x = batch_norm(x, is_training, data_format=data_format)
-        x = tf.nn.relu(x)
-
-    with tf.variable_scope("pool"):
-        if data_format == "NHWC":
-            actual_data_format = "channels_last"
-        elif data_format == "NCHW":
-            actual_data_format = "channels_first"
-
-        if avg_or_max == "avg":
-            x = tf.layers.average_pooling2d(
-                x, [3, 3], [1, 1], "SAME", data_format=actual_data_format)
-        elif avg_or_max == "max":
-            x = tf.layers.max_pooling2d(
-                x, [3, 3], [1, 1], "SAME", data_format=actual_data_format)
-        else:
-            raise ValueError("Unknown pool {}".format(avg_or_max))
-
-        if start_idx is not None:
-            if data_format == "NHWC":
-                x = x[:, :, :, start_idx: start_idx+count]
-            elif data_format == "NCHW":
-                x = x[:, start_idx: start_idx+count, :, :]
-
-    return x
-
-
-def global_avg_pool(x, data_format="NHWC"):
-    if data_format == "NHWC":
-        x = tf.reduce_mean(x, [1, 2])
-    elif data_format == "NCHW":
-        x = tf.reduce_mean(x, [2, 3])
-    else:
-        raise NotImplementedError("Unknown data_format {}".format(data_format))
-    return x
-
-
-def batch_norm(x, is_training, name="bn", decay=0.9, epsilon=1e-5,
-               data_format="NHWC"):
-    if data_format == "NHWC":
-        shape = [x.get_shape()[3]]
-    elif data_format == "NCHW":
-        shape = [x.get_shape()[1]]
-    else:
-        raise NotImplementedError("Unknown data_format {}".format(data_format))
-
-    with tf.variable_scope(name, reuse=None if is_training else True):
-        offset = tf.get_variable(
-            "offset", shape,
-            initializer=tf.constant_initializer(0.0, dtype=tf.float32))
-        scale = tf.get_variable(
-            "scale", shape,
-            initializer=tf.constant_initializer(1.0, dtype=tf.float32))
-        moving_mean = tf.get_variable(
-            "moving_mean", shape, trainable=False,
-            initializer=tf.constant_initializer(0.0, dtype=tf.float32))
-        moving_variance = tf.get_variable(
-            "moving_variance", shape, trainable=False,
-            initializer=tf.constant_initializer(1.0, dtype=tf.float32))
-
-        if is_training:
-            x, mean, variance = tf.nn.fused_batch_norm(
-                x, scale, offset, epsilon=epsilon, data_format=data_format,
-                is_training=True)
-            update_mean = moving_averages.assign_moving_average(
-                moving_mean, mean, decay)
-            update_variance = moving_averages.assign_moving_average(
-                moving_variance, variance, decay)
-            with tf.control_dependencies([update_mean, update_variance]):
-                x = tf.identity(x)
-        else:
-            x, _, _ = tf.nn.fused_batch_norm(x, scale, offset, mean=moving_mean,
-                                             variance=moving_variance,
-                                             epsilon=epsilon, data_format=data_format,
-                                             is_training=False)
-    return x
-
-
-def batch_norm_with_mask(x, is_training, mask, num_channels, name="bn",
-                         decay=0.9, epsilon=1e-3, data_format="NHWC"):
-
-    shape = [num_channels]
-    indices = tf.where(mask)
-    indices = tf.to_int32(indices)
-    indices = tf.reshape(indices, [-1])
-
-    with tf.variable_scope(name, reuse=None if is_training else True):
-        offset = tf.get_variable(
-            "offset", shape,
-            initializer=tf.constant_initializer(0.0, dtype=tf.float32))
-        scale = tf.get_variable(
-            "scale", shape,
-            initializer=tf.constant_initializer(1.0, dtype=tf.float32))
-        offset = tf.boolean_mask(offset, mask)
-        scale = tf.boolean_mask(scale, mask)
-
-        moving_mean = tf.get_variable(
-            "moving_mean", shape, trainable=False,
-            initializer=tf.constant_initializer(0.0, dtype=tf.float32))
-        moving_variance = tf.get_variable(
-            "moving_variance", shape, trainable=False,
-            initializer=tf.constant_initializer(1.0, dtype=tf.float32))
-
-        if is_training:
-            x, mean, variance = tf.nn.fused_batch_norm(
-                x, scale, offset, epsilon=epsilon, data_format=data_format,
-                is_training=True)
-            mean = (1.0 - decay) * (tf.boolean_mask(moving_mean, mask) - mean)
-            variance = (1.0 - decay) * \
-                (tf.boolean_mask(moving_variance, mask) - variance)
-            update_mean = tf.scatter_sub(
-                moving_mean, indices, mean, use_locking=True)
-            update_variance = tf.scatter_sub(
-                moving_variance, indices, variance, use_locking=True)
-            with tf.control_dependencies([update_mean, update_variance]):
-                x = tf.identity(x)
-        else:
-            masked_moving_mean = tf.boolean_mask(moving_mean, mask)
-            masked_moving_variance = tf.boolean_mask(moving_variance, mask)
-            x, _, _ = tf.nn.fused_batch_norm(x, scale, offset,
-                                             mean=masked_moving_mean,
-                                             variance=masked_moving_variance,
-                                             epsilon=epsilon, data_format=data_format,
-                                             is_training=False)
-    return x
--- a/examples/trials/nas_cifar10/src/utils.py
+++ b/examples/trials/nas_cifar10/src/utils.py
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import sys
-import numpy as np
-import tensorflow as tf
-
-
-user_flags = []
-
-
-def DEFINE_string(name, default_value, doc_string):
-    tf.app.flags.DEFINE_string(name, default_value, doc_string)
-    global user_flags
-    user_flags.append(name)
-
-
-def DEFINE_integer(name, default_value, doc_string):
-    tf.app.flags.DEFINE_integer(name, default_value, doc_string)
-    global user_flags
-    user_flags.append(name)
-
-
-def DEFINE_float(name, default_value, doc_string):
-    tf.app.flags.DEFINE_float(name, default_value, doc_string)
-    global user_flags
-    user_flags.append(name)
-
-
-def DEFINE_boolean(name, default_value, doc_string):
-    tf.app.flags.DEFINE_boolean(name, default_value, doc_string)
-    global user_flags
-    user_flags.append(name)
-
-
-def print_user_flags(line_limit=80):
-    print("-" * 80)
-
-    global user_flags
-    FLAGS = tf.app.flags.FLAGS
-
-    for flag_name in sorted(user_flags):
-        value = "{}".format(getattr(FLAGS, flag_name))
-        log_string = flag_name
-        log_string += "." * (line_limit - len(flag_name) - len(value))
-        log_string += value
-        print(log_string)
-
-
-def get_C(x, data_format):
-    """
-    Args:
-        x: tensor of shape [N, H, W, C] or [N, C, H, W]
-    """
-    if data_format == "NHWC":
-        return x.get_shape()[3].value
-    elif data_format == "NCHW":
-        return x.get_shape()[1].value
-    else:
-        raise ValueError(
-            "Unknown data_format '{0}'".format(data_format))
-
-def get_HW(x, data_format):
-    """
-    Args:
-        x: tensor of shape [N, H, W, C] or [N, C, H, W]
-    """
-    return x.get_shape()[2].value
-
-def get_strides(stride, data_format):
-    """
-    Args:
-        x: tensor of shape [N, H, W, C] or [N, C, H, W]
-    """
-    if data_format == "NHWC":
-        return [1, stride, stride, 1]
-    elif data_format == "NCHW":
-        return [1, 1, stride, stride]
-    else:
-        raise ValueError(
-            "Unknown data_format '{0}'".format(data_format))
-
-
-class TextColors:
-    HEADER = '\033[95m'
-    OKBLUE = '\033[94m'
-    OKGREEN = '\033[92m'
-    WARNING = '\033[93m'
-    FAIL = '\033[91m'
-    ENDC = '\033[0m'
-    BOLD = '\033[1m'
-    UNDERLINE = '\033[4m'
-
-
-class Logger(object):
-    def __init__(self, output_file):
-        self.terminal = sys.stdout
-        self.log = open(output_file, "a")
-
-    def write(self, message):
-        self.terminal.write(message)
-        self.terminal.flush()
-        self.log.write(message)
-        self.log.flush()
-
-
-def count_model_params(tf_variables):
-    """
-    Args:
-        tf_variables: list of all model variables
-    """
-
-    num_vars = 0
-    for var in tf_variables:
-        num_vars += np.prod([dim.value for dim in var.get_shape()])
-    return num_vars
-
-
-def get_train_ops(
-        loss,
-        tf_variables,
-        train_step,
-        clip_mode=None,
-        grad_bound=None,
-        l2_reg=1e-4,
-        lr_warmup_val=None,
-        lr_warmup_steps=100,
-        lr_init=0.1,
-        lr_dec_start=0,
-        lr_dec_every=10000,
-        lr_dec_rate=0.1,
-        lr_dec_min=None,
-        lr_cosine=False,
-        lr_max=None,
-        lr_min=None,
-        lr_T_0=None,
-        lr_T_mul=None,
-        num_train_batches=None,
-        optim_algo=None,
-        sync_replicas=False,
-        num_aggregate=None,
-        num_replicas=None,
-        get_grad_norms=False,
-        moving_average=None):
-    """
-    Args:
-        clip_mode: "global", "norm", or None.
-        moving_average: store the moving average of parameters
-    """
-
-    if l2_reg > 0:
-        l2_losses = []
-        for var in tf_variables:
-            l2_losses.append(tf.reduce_sum(var ** 2))
-        l2_loss = tf.add_n(l2_losses)
-        loss += l2_reg * l2_loss
-
-    grads = tf.gradients(loss, tf_variables)
-    grad_norm = tf.global_norm(grads)
-
-    grad_norms = {}
-    for v, g in zip(tf_variables, grads):
-        if v is None or g is None:
-            continue
-        if isinstance(g, tf.IndexedSlices):
-            grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g.values ** 2))
-        else:
-            grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g ** 2))
-
-    if clip_mode is not None:
-        assert grad_bound is not None, "Need grad_bound to clip gradients."
-        if clip_mode == "global":
-            grads, _ = tf.clip_by_global_norm(grads, grad_bound)
-        elif clip_mode == "norm":
-            clipped = []
-            for g in grads:
-                if isinstance(g, tf.IndexedSlices):
-                    c_g = tf.clip_by_norm(g.values, grad_bound)
-                    c_g = tf.IndexedSlices(g.indices, c_g)
-                else:
-                    c_g = tf.clip_by_norm(g, grad_bound)
-                clipped.append(g)
-            grads = clipped
-        else:
-            raise NotImplementedError("Unknown clip_mode {}".format(clip_mode))
-
-    if lr_cosine:
-        assert lr_max is not None, "Need lr_max to use lr_cosine"
-        assert lr_min is not None, "Need lr_min to use lr_cosine"
-        assert lr_T_0 is not None, "Need lr_T_0 to use lr_cosine"
-        assert lr_T_mul is not None, "Need lr_T_mul to use lr_cosine"
-        assert num_train_batches is not None, ("Need num_train_batches to use"
-                                               " lr_cosine")
-
-        curr_epoch = train_step // num_train_batches
-
-        last_reset = tf.Variable(0, dtype=tf.int32, trainable=False,
-                                 name="last_reset")
-        T_i = tf.Variable(lr_T_0, dtype=tf.int32, trainable=False, name="T_i")
-        T_curr = curr_epoch - last_reset
-
-        def _update():
-            update_last_reset = tf.assign(
-                last_reset, curr_epoch, use_locking=True)
-            update_T_i = tf.assign(T_i, T_i * lr_T_mul, use_locking=True)
-            with tf.control_dependencies([update_last_reset, update_T_i]):
-                rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926
-                lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate))
-            return lr
-
-        def _no_update():
-            rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926
-            lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate))
-            return lr
-
-        learning_rate = tf.cond(
-            tf.greater_equal(T_curr, T_i), _update, _no_update)
-    else:
-        learning_rate = tf.train.exponential_decay(
-            lr_init, tf.maximum(train_step - lr_dec_start, 0), lr_dec_every,
-            lr_dec_rate, staircase=True)
-        if lr_dec_min is not None:
-            learning_rate = tf.maximum(learning_rate, lr_dec_min)
-
-    if lr_warmup_val is not None:
-        learning_rate = tf.cond(tf.less(train_step, lr_warmup_steps),
-                                lambda: lr_warmup_val, lambda: learning_rate)
-
-    if optim_algo == "momentum":
-        opt = tf.train.MomentumOptimizer(
-            learning_rate, 0.9, use_locking=True, use_nesterov=True)
-    elif optim_algo == "sgd":
-        opt = tf.train.GradientDescentOptimizer(
-            learning_rate, use_locking=True)
-    elif optim_algo == "adam":
-        opt = tf.train.AdamOptimizer(learning_rate, beta1=0.0, epsilon=1e-3,
-                                     use_locking=True)
-    else:
-        raise ValueError("Unknown optim_algo {}".format(optim_algo))
-
-    if sync_replicas:
-        assert num_aggregate is not None, "Need num_aggregate to sync."
-        assert num_replicas is not None, "Need num_replicas to sync."
-
-        opt = tf.train.SyncReplicasOptimizer(
-            opt,
-            replicas_to_aggregate=num_aggregate,
-            total_num_replicas=num_replicas,
-            use_locking=True)
-
-    if moving_average is not None:
-        opt = tf.contrib.opt.MovingAverageOptimizer(
-            opt, average_decay=moving_average)
-
-    train_op = opt.apply_gradients(
-        zip(grads, tf_variables), global_step=train_step)
-
-    if get_grad_norms:
-        return train_op, learning_rate, grad_norm, opt, grad_norms
-    else:
-        return train_op, learning_rate, grad_norm, opt
--- a/examples/tuners/enas_nni/README.md
+++ b/examples/tuners/enas_nni/README.md
- **Run ENAS in NNI**	
- ===	
- 
-  Now we have an enas example [enas-nni](https://github.com/countif/enas_nni) run in NNI from our contributors.	
- Thanks our lovely contributors. 	
- And welcome more and more people to join us!
--- a/examples/tuners/enas_nni/README_zh_CN.md
+++ b/examples/tuners/enas_nni/README_zh_CN.md
-**在 NNI 中运行 ENAS**  
-===
-
-来自贡献者的 [enas-nni](https://github.com/countif/enas_nni) 可运行在 NNI 中。 非常感谢！  
-欢迎更多志愿者加入我们！
\ No newline at end of file
--- a/src/nni_manager/package.json
+++ b/src/nni_manager/package.json
@@ -48,9 +48,9 @@
    "chai": "^4.1.2",
    "eslint": "^6.7.2",
    "glob": "^7.1.3",
-    "mocha": "^5.2.0",
+    "mocha": "^7.1.1",
    "npx": "^10.2.0",
-    "nyc": "^13.1.0",
+    "nyc": "^15.0.0",
    "request": "^2.87.0",
    "rmdir": "^1.2.0",
    "tmp": "^0.0.33",
@@ -59,7 +59,6 @@
  },
  "resolutions": {
    "mem": "^4.0.0",
-    "handlebars": "^4.5.3",
    "lodash": "^4.17.13",
    "lodash.merge": "^4.6.2",
    "node.extend": "^1.1.7",

--- a/src/nni_manager/training_service/common/util.ts
+++ b/src/nni_manager/training_service/common/util.ts
@@ -69,7 +69,7 @@ export async function execMkdir(directory: string, share: boolean = false): Prom
 */
 export async function execCopydir(source: string, destination: string): Promise<void> {
    if (process.platform === 'win32') {
-        await cpp.exec(`powershell.exe Copy-Item "${source}" -Destination "${destination}" -Recurse`);
+        await cpp.exec(`powershell.exe Copy-Item "${source}\\*" -Destination "${destination}" -Recurse`);
    } else {
        await cpp.exec(`cp -r '${source}/.' '${destination}'`);
    }

--- a/src/nni_manager/yarn.lock
+++ b/src/nni_manager/yarn.lock