"src/vscode:/vscode.git/clone" did not exist on "313b0f67bf9900ad9366df9ad62fd96656b92b48"
Unverified Commit 67287997 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Merge pull request #241 from microsoft/master

merge master
parents b4773e1e f8d42a33
authorName: Unknown
experimentName: enas_macro
trialConcurrency: 20
maxExecDuration: 2400h
maxTrialNum: 20000
#choice: local, remote
trainingServicePlatform: paiYarn
#choice: true, false
useAnnotation: true
multiPhase: false
versionCheck: false
nniManagerIp: 0.0.0.0
tuner:
builtinTunerName: PPOTuner
classArgs:
optimize_mode: maximize
trials_per_update: 60
epochs_per_update: 20
minibatch_size: 6
trial:
command: sh ./macro_cifar10_pai.sh
codeDir: ./
gpuNum: 1
cpuNum: 1
memoryMB: 8196
image: msranni/nni:latest
virtualCluster: nni
paiYarnConfig:
userName: your_account
passWord: your_passwd
host: 0.0.0.0
authorName: Unknown
experimentName: enas_macro
trialConcurrency: 20
maxExecDuration: 2400h
maxTrialNum: 20000
#choice: local, remote
trainingServicePlatform: pai
#choice: true, false
useAnnotation: true
multiPhase: false
versionCheck: false
nniManagerIp: 0.0.0.0
tuner:
builtinTunerName: PPOTuner
classArgs:
optimize_mode: maximize
trials_per_update: 60
epochs_per_update: 20
minibatch_size: 6
trial:
command: sh ./macro_cifar10_pai.sh
codeDir: ./
gpuNum: 1
cpuNum: 1
memoryMB: 8196
image: msranni/nni:latest
virtualCluster: nni
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig:
userName: your_account
token: your_token
host: 0.0.0.0
authorName: Unknown
experimentName: enas_macro
trialConcurrency: 4
maxExecDuration: 2400h
maxTrialNum: 20000
#choice: local, remote
trainingServicePlatform: local
#choice: true, false
useAnnotation: true
multiPhase: false
tuner:
builtinTunerName: PPOTuner
classArgs:
optimize_mode: maximize
trials_per_update: 60
epochs_per_update: 12
minibatch_size: 10
#could use the No. 0 gpu for this tuner
#if want to specify multiple gpus, here is an example of specifying three gpus: 0,1,2
gpuIndices: 0
trial:
command: sh ./macro_cifar10.sh
codeDir: ./
gpuNum: 1
wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
tar xzf cifar-10-python.tar.gz && mv cifar-10-batches-py cifar10
\ No newline at end of file
#!/bin/bash
set -e
export PYTHONPATH="$(pwd)"
python3 src/cifar10/nni_child_cifar10.py \
--data_format="NCHW" \
--search_for="macro" \
--reset_output_dir \
--data_path="data/cifar10" \
--output_dir="outputs" \
--train_data_size=45000 \
--batch_size=100 \
--num_epochs=8 \
--log_every=50 \
--eval_every_epochs=1 \
--child_use_aux_heads \
--child_num_layers=12 \
--child_out_filters=36 \
--child_l2_reg=0.0002 \
--child_num_branches=6 \
--child_num_cell_layers=5 \
--child_keep_prob=0.50 \
--child_drop_path_keep_prob=0.60 \
--child_lr_cosine \
--child_lr_max=0.05 \
--child_lr_min=0.001 \
--child_lr_T_0=10 \
--child_lr_T_mul=2 \
--child_mode="subgraph" \
"$@"
#!/bin/bash
set -e
export PYTHONPATH="$(pwd)"
python3 src/cifar10/nni_child_cifar10.py \
--data_format="NCHW" \
--search_for="macro" \
--reset_output_dir \
--data_path="data/cifar10" \
--output_dir="outputs" \
--train_data_size=45000 \
--batch_size=100 \
--num_epochs=30 \
--log_every=50 \
--eval_every_epochs=1 \
--child_use_aux_heads \
--child_num_layers=12 \
--child_out_filters=36 \
--child_l2_reg=0.0002 \
--child_num_branches=6 \
--child_num_cell_layers=5 \
--child_keep_prob=0.50 \
--child_drop_path_keep_prob=0.60 \
--child_lr_cosine \
--child_lr_max=0.05 \
--child_lr_min=0.001 \
--child_lr_T_0=10 \
--child_lr_T_mul=2 \
--child_mode="subgraph" \
"$@"
import os
import sys
import pickle
import numpy as np
import tensorflow as tf
def _read_data(data_path, train_files):
"""Reads CIFAR-10 format data. Always returns NHWC format.
Returns:
images: np tensor of size [N, H, W, C]
labels: np tensor of size [N]
"""
images, labels = [], []
for file_name in train_files:
print(file_name)
full_name = os.path.join(data_path, file_name)
with open(full_name, "rb") as finp:
data = pickle.load(finp, encoding='latin1')
batch_images = data["data"].astype(np.float32) / 255.0
batch_labels = np.array(data["labels"], dtype=np.int32)
images.append(batch_images)
labels.append(batch_labels)
images = np.concatenate(images, axis=0)
labels = np.concatenate(labels, axis=0)
images = np.reshape(images, [-1, 3, 32, 32])
images = np.transpose(images, [0, 2, 3, 1])
return images, labels
def read_data(data_path, num_valids=5000):
print("-" * 80)
print("Reading data")
images, labels = {}, {}
train_files = [
"data_batch_1",
"data_batch_2",
"data_batch_3",
"data_batch_4",
"data_batch_5",
]
test_file = [
"test_batch",
]
images["train"], labels["train"] = _read_data(data_path, train_files)
if num_valids:
images["valid"] = images["train"][-num_valids:]
labels["valid"] = labels["train"][-num_valids:]
images["train"] = images["train"][:-num_valids]
labels["train"] = labels["train"][:-num_valids]
else:
images["valid"], labels["valid"] = None, None
images["test"], labels["test"] = _read_data(data_path, test_file)
print("Prepropcess: [subtract mean], [divide std]")
mean = np.mean(images["train"], axis=(0, 1, 2), keepdims=True)
std = np.std(images["train"], axis=(0, 1, 2), keepdims=True)
print("mean: {}".format(np.reshape(mean * 255.0, [-1])))
print("std: {}".format(np.reshape(std * 255.0, [-1])))
images["train"] = (images["train"] - mean) / std
if num_valids:
images["valid"] = (images["valid"] - mean) / std
images["test"] = (images["test"] - mean) / std
return images, labels
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from src.common_ops import create_weight, batch_norm, batch_norm_with_mask, global_avg_pool, conv_op, pool_op
from src.utils import count_model_params, get_train_ops, get_C, get_strides
from src.cifar10.models import Model
class GeneralChild(Model):
def __init__(self,
images,
labels,
cutout_size=None,
fixed_arc=None,
out_filters_scale=1,
num_layers=2,
num_branches=6,
out_filters=24,
keep_prob=1.0,
batch_size=32,
clip_mode=None,
grad_bound=None,
l2_reg=1e-4,
lr_init=0.1,
lr_dec_start=0,
lr_dec_every=10000,
lr_dec_rate=0.1,
lr_cosine=False,
lr_max=None,
lr_min=None,
lr_T_0=None,
lr_T_mul=None,
optim_algo=None,
sync_replicas=False,
num_aggregate=None,
num_replicas=None,
data_format="NHWC",
name="child",
mode="subgraph",
*args,
**kwargs
):
super(self.__class__, self).__init__(
images,
labels,
cutout_size=cutout_size,
batch_size=batch_size,
clip_mode=clip_mode,
grad_bound=grad_bound,
l2_reg=l2_reg,
lr_init=lr_init,
lr_dec_start=lr_dec_start,
lr_dec_every=lr_dec_every,
lr_dec_rate=lr_dec_rate,
keep_prob=keep_prob,
optim_algo=optim_algo,
sync_replicas=sync_replicas,
num_aggregate=num_aggregate,
num_replicas=num_replicas,
data_format=data_format,
name=name)
self.lr_cosine = lr_cosine
self.lr_max = lr_max
self.lr_min = lr_min
self.lr_T_0 = lr_T_0
self.lr_T_mul = lr_T_mul
self.out_filters = out_filters * out_filters_scale
self.num_layers = num_layers
self.mode = mode
self.num_branches = num_branches
self.fixed_arc = fixed_arc
self.out_filters_scale = out_filters_scale
pool_distance = self.num_layers // 3
self.pool_layers = [pool_distance - 1, 2 * pool_distance - 1]
def _factorized_reduction(self, x, out_filters, stride, is_training):
"""Reduces the shape of x without information loss due to striding."""
assert out_filters % 2 == 0, (
"Need even number of filters when using this factorized reduction.")
if stride == 1:
with tf.variable_scope("path_conv"):
inp_c = get_C(x, self.data_format)
w = create_weight("w", [1, 1, inp_c, out_filters])
x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME",
data_format=self.data_format)
x = batch_norm(x, is_training, data_format=self.data_format)
return x
stride_spec = get_strides(stride, self.data_format)
# Skip path 1
path1 = tf.nn.avg_pool(
x, [1, 1, 1, 1], stride_spec, "VALID", data_format=self.data_format)
with tf.variable_scope("path1_conv"):
inp_c = get_C(path1, self.data_format)
w = create_weight("w", [1, 1, inp_c, out_filters // 2])
path1 = tf.nn.conv2d(path1, w, [1, 1, 1, 1], "SAME",
data_format=self.data_format)
# Skip path 2
# First pad with 0"s on the right and bottom, then shift the filter to
# include those 0"s that were added.
if self.data_format == "NHWC":
pad_arr = [[0, 0], [0, 1], [0, 1], [0, 0]]
path2 = tf.pad(x, pad_arr)[:, 1:, 1:, :]
concat_axis = 3
else:
pad_arr = [[0, 0], [0, 0], [0, 1], [0, 1]]
path2 = tf.pad(x, pad_arr)[:, :, 1:, 1:]
concat_axis = 1
path2 = tf.nn.avg_pool(
path2, [1, 1, 1, 1], stride_spec, "VALID", data_format=self.data_format)
with tf.variable_scope("path2_conv"):
inp_c = get_C(path2, self.data_format)
w = create_weight("w", [1, 1, inp_c, out_filters // 2])
path2 = tf.nn.conv2d(path2, w, [1, 1, 1, 1], "SAME",
data_format=self.data_format)
# Concat and apply BN
final_path = tf.concat(values=[path1, path2], axis=concat_axis)
final_path = batch_norm(final_path, is_training,
data_format=self.data_format)
return final_path
def _model(self, images, is_training, reuse=False):
'''Build model'''
with tf.variable_scope(self.name, reuse=reuse):
layers = []
out_filters = self.out_filters
with tf.variable_scope("stem_conv"):
w = create_weight("w", [3, 3, 3, out_filters])
x = tf.nn.conv2d(
images, w, [1, 1, 1, 1], "SAME", data_format=self.data_format)
x = batch_norm(x, is_training, data_format=self.data_format)
layers.append(x)
def add_fixed_pooling_layer(layer_id, layers, out_filters, is_training):
'''Add a fixed pooling layer every four layers'''
out_filters *= 2
with tf.variable_scope("pool_at_{0}".format(layer_id)):
pooled_layers = []
for i, layer in enumerate(layers):
with tf.variable_scope("from_{0}".format(i)):
x = self._factorized_reduction(
layer, out_filters, 2, is_training)
pooled_layers.append(x)
return pooled_layers, out_filters
def post_process_out(out, optional_inputs):
'''Form skip connection and perform batch norm'''
with tf.variable_scope("skip"):
inputs = layers[-1]
if self.data_format == "NHWC":
inp_h = inputs.get_shape()[1].value
inp_w = inputs.get_shape()[2].value
inp_c = inputs.get_shape()[3].value
out.set_shape([None, inp_h, inp_w, out_filters])
elif self.data_format == "NCHW":
inp_c = inputs.get_shape()[1].value
inp_h = inputs.get_shape()[2].value
inp_w = inputs.get_shape()[3].value
out.set_shape([None, out_filters, inp_h, inp_w])
optional_inputs.append(out)
pout = tf.add_n(optional_inputs)
out = batch_norm(pout, is_training,
data_format=self.data_format)
layers.append(out)
return out
global layer_id
layer_id = -1
def get_layer_id():
global layer_id
layer_id += 1
return 'layer_' + str(layer_id)
def conv3(inputs):
# res_layers is pre_layers that are chosen to form skip connection
# layers[-1] is always the latest input
with tf.variable_scope(get_layer_id()):
with tf.variable_scope('branch_0'):
out = conv_op(
inputs[0][0], 3, is_training, out_filters, out_filters, self.data_format, start_idx=None)
out = post_process_out(out, inputs[1])
return out
def conv3_sep(inputs):
with tf.variable_scope(get_layer_id()):
with tf.variable_scope('branch_1'):
out = conv_op(
inputs[0][0], 3, is_training, out_filters, out_filters, self.data_format, start_idx=None, separable=True)
out = post_process_out(out, inputs[1])
return out
def conv5(inputs):
with tf.variable_scope(get_layer_id()):
with tf.variable_scope('branch_2'):
out = conv_op(
inputs[0][0], 5, is_training, out_filters, out_filters, self.data_format, start_idx=None)
out = post_process_out(out, inputs[1])
return out
def conv5_sep(inputs):
with tf.variable_scope(get_layer_id()):
with tf.variable_scope('branch_3'):
out = conv_op(
inputs[0][0], 5, is_training, out_filters, out_filters, self.data_format, start_idx=None, separable=True)
out = post_process_out(out, inputs[1])
return out
def avg_pool(inputs):
with tf.variable_scope(get_layer_id()):
with tf.variable_scope('branch_4'):
out = pool_op(
inputs[0][0], is_training, out_filters, out_filters, "avg", self.data_format, start_idx=None)
out = post_process_out(out, inputs[1])
return out
def max_pool(inputs):
with tf.variable_scope(get_layer_id()):
with tf.variable_scope('branch_5'):
out = pool_op(
inputs[0][0], is_training, out_filters, out_filters, "max", self.data_format, start_idx=None)
out = post_process_out(out, inputs[1])
return out
"""@nni.mutable_layers(
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs:[x],
layer_output: layer_0_out
},
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs:[layer_0_out],
optional_inputs: [layer_0_out],
optional_input_size: [0, 1],
layer_output: layer_1_out
},
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs:[layer_1_out],
optional_inputs: [layer_0_out, layer_1_out],
optional_input_size: [0, 1],
layer_output: layer_2_out
},
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs:[layer_2_out],
optional_inputs: [layer_0_out, layer_1_out, layer_2_out],
optional_input_size: [0, 1],
layer_output: layer_3_out
}
)"""
layers, out_filters = add_fixed_pooling_layer(
3, layers, out_filters, is_training)
layer_0_out, layer_1_out, layer_2_out, layer_3_out = layers[-4:]
"""@nni.mutable_layers(
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs: [layer_3_out],
optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out],
optional_input_size: [0, 1],
layer_output: layer_4_out
},
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs: [layer_4_out],
optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out],
optional_input_size: [0, 1],
layer_output: layer_5_out
},
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs: [layer_5_out],
optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out],
optional_input_size: [0, 1],
layer_output: layer_6_out
},
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs: [layer_6_out],
optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out],
optional_input_size: [0, 1],
layer_output: layer_7_out
}
)"""
layers, out_filters = add_fixed_pooling_layer(
7, layers, out_filters, is_training)
layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out = layers[
-8:]
"""@nni.mutable_layers(
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs: [layer_7_out],
optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out],
optional_input_size: [0, 1],
layer_output: layer_8_out
},
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs: [layer_8_out],
optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out],
optional_input_size: [0, 1],
layer_output: layer_9_out
},
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs: [layer_9_out],
optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out],
optional_input_size: [0, 1],
layer_output: layer_10_out
},
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs:[layer_10_out],
optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out, layer_10_out],
optional_input_size: [0, 1],
layer_output: layer_11_out
}
)"""
x = global_avg_pool(layer_11_out, data_format=self.data_format)
if is_training:
x = tf.nn.dropout(x, self.keep_prob)
with tf.variable_scope("fc"):
if self.data_format == "NHWC":
inp_c = x.get_shape()[3].value
elif self.data_format == "NCHW":
inp_c = x.get_shape()[1].value
else:
raise ValueError(
"Unknown data_format {0}".format(self.data_format))
w = create_weight("w", [inp_c, 10])
x = tf.matmul(x, w)
return x
# override
def _build_train(self):
print("-" * 80)
print("Build train graph")
logits = self._model(self.x_train, is_training=True)
log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=self.y_train)
self.loss = tf.reduce_mean(log_probs)
self.train_preds = tf.argmax(logits, axis=1)
self.train_preds = tf.to_int32(self.train_preds)
self.train_acc = tf.equal(self.train_preds, self.y_train)
self.train_acc = tf.to_int32(self.train_acc)
self.train_acc = tf.reduce_sum(self.train_acc)
tf_variables = [var
for var in tf.trainable_variables() if var.name.startswith(self.name)]
self.num_vars = count_model_params(tf_variables)
print("Model has {} params".format(self.num_vars))
self.global_step = tf.Variable(
0, dtype=tf.int32, trainable=False, name="global_step")
self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
self.loss,
tf_variables,
self.global_step,
clip_mode=self.clip_mode,
grad_bound=self.grad_bound,
l2_reg=self.l2_reg,
lr_init=self.lr_init,
lr_dec_start=self.lr_dec_start,
lr_dec_every=self.lr_dec_every,
lr_dec_rate=self.lr_dec_rate,
lr_cosine=self.lr_cosine,
lr_max=self.lr_max,
lr_min=self.lr_min,
lr_T_0=self.lr_T_0,
lr_T_mul=self.lr_T_mul,
num_train_batches=self.num_train_batches,
optim_algo=self.optim_algo,
sync_replicas=False,
num_aggregate=self.num_aggregate,
num_replicas=self.num_replicas)
# override
def _build_valid(self):
if self.x_valid is not None:
print("-" * 80)
print("Build valid graph")
logits = self._model(self.x_valid, False, reuse=True)
self.valid_preds = tf.argmax(logits, axis=1)
self.valid_preds = tf.to_int32(self.valid_preds)
self.valid_acc = tf.equal(self.valid_preds, self.y_valid)
self.valid_acc = tf.to_int32(self.valid_acc)
self.valid_acc = tf.reduce_sum(self.valid_acc)
# override
def _build_test(self):
print("-" * 80)
print("Build test graph")
logits = self._model(self.x_test, False, reuse=True)
self.test_preds = tf.argmax(logits, axis=1)
self.test_preds = tf.to_int32(self.test_preds)
self.test_acc = tf.equal(self.test_preds, self.y_test)
self.test_acc = tf.to_int32(self.test_acc)
self.test_acc = tf.reduce_sum(self.test_acc)
def build_model(self):
self._build_train()
self._build_valid()
self._build_test()
import os
import sys
import numpy as np
import tensorflow as tf
class Model(object):
def __init__(self,
images,
labels,
cutout_size=None,
batch_size=32,
eval_batch_size=100,
clip_mode=None,
grad_bound=None,
l2_reg=1e-4,
lr_init=0.1,
lr_dec_start=0,
lr_dec_every=100,
lr_dec_rate=0.1,
keep_prob=1.0,
optim_algo=None,
sync_replicas=False,
num_aggregate=None,
num_replicas=None,
data_format="NHWC",
name="generic_model",
seed=None,
):
"""
Args:
lr_dec_every: number of epochs to decay
"""
print("-" * 80)
print("Build model {}".format(name))
self.cutout_size = cutout_size
self.batch_size = batch_size
self.eval_batch_size = eval_batch_size
self.clip_mode = clip_mode
self.grad_bound = grad_bound
self.l2_reg = l2_reg
self.lr_init = lr_init
self.lr_dec_start = lr_dec_start
self.lr_dec_rate = lr_dec_rate
self.keep_prob = keep_prob
self.optim_algo = optim_algo
self.sync_replicas = sync_replicas
self.num_aggregate = num_aggregate
self.num_replicas = num_replicas
self.data_format = data_format
self.name = name
self.seed = seed
self.global_step = None
self.valid_acc = None
self.test_acc = None
print("Build data ops")
with tf.device("/cpu:0"):
# training data
self.num_train_examples = np.shape(images["train"])[0]
self.num_train_batches = (
self.num_train_examples + self.batch_size - 1) // self.batch_size
x_train, y_train = tf.train.shuffle_batch(
[images["train"], labels["train"]],
batch_size=self.batch_size,
capacity=50000,
enqueue_many=True,
min_after_dequeue=0,
num_threads=16,
seed=self.seed,
allow_smaller_final_batch=True,
)
self.lr_dec_every = lr_dec_every * self.num_train_batches
def _pre_process(x):
x = tf.pad(x, [[4, 4], [4, 4], [0, 0]])
x = tf.random_crop(x, [32, 32, 3], seed=self.seed)
x = tf.image.random_flip_left_right(x, seed=self.seed)
if self.cutout_size is not None:
mask = tf.ones(
[self.cutout_size, self.cutout_size], dtype=tf.int32)
start = tf.random_uniform(
[2], minval=0, maxval=32, dtype=tf.int32)
mask = tf.pad(mask, [[self.cutout_size + start[0], 32 - start[0]],
[self.cutout_size + start[1], 32 - start[1]]])
mask = mask[self.cutout_size: self.cutout_size + 32,
self.cutout_size: self.cutout_size + 32]
mask = tf.reshape(mask, [32, 32, 1])
mask = tf.tile(mask, [1, 1, 3])
x = tf.where(tf.equal(mask, 0), x=x, y=tf.zeros_like(x))
if self.data_format == "NCHW":
x = tf.transpose(x, [2, 0, 1])
return x
self.x_train = tf.map_fn(_pre_process, x_train, back_prop=False)
self.y_train = y_train
# valid data
self.x_valid, self.y_valid = None, None
if images["valid"] is not None:
images["valid_original"] = np.copy(images["valid"])
labels["valid_original"] = np.copy(labels["valid"])
if self.data_format == "NCHW":
images["valid"] = tf.transpose(
images["valid"], [0, 3, 1, 2])
self.num_valid_examples = np.shape(images["valid"])[0]
self.num_valid_batches = (
(self.num_valid_examples + self.eval_batch_size - 1)
// self.eval_batch_size)
self.x_valid, self.y_valid = tf.train.batch(
[images["valid"], labels["valid"]],
batch_size=self.eval_batch_size,
capacity=5000,
enqueue_many=True,
num_threads=1,
allow_smaller_final_batch=True,
)
# test data
if self.data_format == "NCHW":
images["test"] = tf.transpose(images["test"], [0, 3, 1, 2])
self.num_test_examples = np.shape(images["test"])[0]
self.num_test_batches = (
(self.num_test_examples + self.eval_batch_size - 1)
// self.eval_batch_size)
self.x_test, self.y_test = tf.train.batch(
[images["test"], labels["test"]],
batch_size=self.eval_batch_size,
capacity=10000,
enqueue_many=True,
num_threads=1,
allow_smaller_final_batch=True,
)
# cache images and labels
self.images = images
self.labels = labels
def eval_once(self, sess, eval_set, child_model, verbose=False):
"""Expects self.acc and self.global_step to be defined.
Args:
sess: tf.Session() or one of its wrap arounds.
feed_dict: can be used to give more information to sess.run().
eval_set: "valid" or "test"
"""
assert self.global_step is not None
global_step = sess.run(self.global_step)
print("Eval at {}".format(global_step))
if eval_set == "valid":
assert self.x_valid is not None
assert self.valid_acc is not None
num_examples = self.num_valid_examples
num_batches = self.num_valid_batches
acc_op = self.valid_acc
elif eval_set == "test":
assert self.test_acc is not None
num_examples = self.num_test_examples
num_batches = self.num_test_batches
acc_op = self.test_acc
else:
raise NotImplementedError("Unknown eval_set '{}'".format(eval_set))
total_acc = 0
total_exp = 0
for batch_id in range(num_batches):
acc = sess.run(acc_op)
total_acc += acc
total_exp += self.eval_batch_size
if verbose:
sys.stdout.write(
"\r{:<5d}/{:>5d}".format(total_acc, total_exp))
if verbose:
print("")
print("{}_accuracy: {:<6.4f}".format(
eval_set, float(total_acc) / total_exp))
return float(total_acc) / total_exp
def _model(self, images, is_training, reuse=None):
raise NotImplementedError("Abstract method")
def _build_train(self):
raise NotImplementedError("Abstract method")
def _build_valid(self):
raise NotImplementedError("Abstract method")
def _build_test(self):
raise NotImplementedError("Abstract method")
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import shutil
import logging
import tensorflow as tf
from src.cifar10.data_utils import read_data
from src.cifar10.general_child import GeneralChild
import src.cifar10_flags
from src.cifar10_flags import FLAGS
def build_logger(log_name):
logger = logging.getLogger(log_name)
logger.setLevel(logging.DEBUG)
fh = logging.FileHandler(log_name+'.log')
fh.setLevel(logging.DEBUG)
logger.addHandler(fh)
return logger
logger = build_logger("nni_child_cifar10")
def build_trial(images, labels, ChildClass):
'''Build child class'''
child_model = ChildClass(
images,
labels,
use_aux_heads=FLAGS.child_use_aux_heads,
cutout_size=FLAGS.child_cutout_size,
num_layers=FLAGS.child_num_layers,
num_cells=FLAGS.child_num_cells,
num_branches=FLAGS.child_num_branches,
fixed_arc=FLAGS.child_fixed_arc,
out_filters_scale=FLAGS.child_out_filters_scale,
out_filters=FLAGS.child_out_filters,
keep_prob=FLAGS.child_keep_prob,
drop_path_keep_prob=FLAGS.child_drop_path_keep_prob,
num_epochs=FLAGS.num_epochs,
l2_reg=FLAGS.child_l2_reg,
data_format=FLAGS.data_format,
batch_size=FLAGS.batch_size,
clip_mode="norm",
grad_bound=FLAGS.child_grad_bound,
lr_init=FLAGS.child_lr,
lr_dec_every=FLAGS.child_lr_dec_every,
lr_dec_rate=FLAGS.child_lr_dec_rate,
lr_cosine=FLAGS.child_lr_cosine,
lr_max=FLAGS.child_lr_max,
lr_min=FLAGS.child_lr_min,
lr_T_0=FLAGS.child_lr_T_0,
lr_T_mul=FLAGS.child_lr_T_mul,
optim_algo="momentum",
sync_replicas=FLAGS.child_sync_replicas,
num_aggregate=FLAGS.child_num_aggregate,
num_replicas=FLAGS.child_num_replicas
)
return child_model
def get_child_ops(child_model):
'''Assemble child op to a dict'''
child_ops = {
"global_step": child_model.global_step,
"loss": child_model.loss,
"train_op": child_model.train_op,
"lr": child_model.lr,
"grad_norm": child_model.grad_norm,
"train_acc": child_model.train_acc,
"optimizer": child_model.optimizer,
"num_train_batches": child_model.num_train_batches,
"eval_every": child_model.num_train_batches * FLAGS.eval_every_epochs,
"eval_func": child_model.eval_once,
}
return child_ops
class NASTrial():
def __init__(self):
images, labels = read_data(FLAGS.data_path, num_valids=0)
self.output_dir = os.path.join(os.getenv('NNI_OUTPUT_DIR'), '../..')
self.file_path = os.path.join(
self.output_dir, 'trainable_variable.txt')
self.graph = tf.Graph()
with self.graph.as_default():
self.child_model = build_trial(images, labels, GeneralChild)
self.total_data = {}
self.child_model.build_model()
self.child_ops = get_child_ops(self.child_model)
config = tf.ConfigProto(
intra_op_parallelism_threads=0,
inter_op_parallelism_threads=0,
allow_soft_placement=True)
self.sess = tf.train.SingularMonitoredSession(config=config)
logger.debug('initlize NASTrial done.')
def run_one_step(self):
'''Run this model on a batch of data'''
run_ops = [
self.child_ops["loss"],
self.child_ops["lr"],
self.child_ops["grad_norm"],
self.child_ops["train_acc"],
self.child_ops["train_op"],
]
loss, lr, gn, tr_acc, _ = self.sess.run(run_ops)
global_step = self.sess.run(self.child_ops["global_step"])
log_string = ""
log_string += "ch_step={:<6d}".format(global_step)
log_string += " loss={:<8.6f}".format(loss)
log_string += " lr={:<8.4f}".format(lr)
log_string += " |g|={:<8.4f}".format(gn)
log_string += " tr_acc={:<3d}/{:>3d}".format(tr_acc, FLAGS.batch_size)
if int(global_step) % FLAGS.log_every == 0:
logger.debug(log_string)
return loss, global_step
def run(self):
'''Run this model according to the `epoch` set in FALGS'''
max_acc = 0
while True:
_, global_step = self.run_one_step()
if global_step % self.child_ops['num_train_batches'] == 0:
acc = self.child_ops["eval_func"](
self.sess, "test", self.child_model)
max_acc = max(max_acc, acc)
'''@nni.report_intermediate_result(acc)'''
if global_step / self.child_ops['num_train_batches'] >= FLAGS.num_epochs:
'''@nni.report_final_result(max_acc)'''
break
def main(_):
logger.debug("-" * 80)
if not os.path.isdir(FLAGS.output_dir):
logger.debug(
"Path {} does not exist. Creating.".format(FLAGS.output_dir))
os.makedirs(FLAGS.output_dir)
elif FLAGS.reset_output_dir:
logger.debug(
"Path {} exists. Remove and remake.".format(FLAGS.output_dir))
shutil.rmtree(FLAGS.output_dir)
os.makedirs(FLAGS.output_dir)
logger.debug("-" * 80)
trial = NASTrial()
trial.run()
if __name__ == "__main__":
tf.app.run()
import tensorflow as tf
from src.utils import DEFINE_boolean
from src.utils import DEFINE_float
from src.utils import DEFINE_integer
from src.utils import DEFINE_string
flags = tf.app.flags
FLAGS = flags.FLAGS
DEFINE_boolean("reset_output_dir", False, "Delete output_dir if exists.")
DEFINE_string("data_path", "", "")
DEFINE_string("output_dir", "", "")
DEFINE_string("data_format", "NHWC", "'NHWC' or 'NCWH'")
DEFINE_string("search_for", None, "Must be [macro|micro]")
DEFINE_integer("train_data_size", 45000, "")
DEFINE_integer("batch_size", 32, "")
DEFINE_integer("num_epochs", 300, "")
DEFINE_integer("child_lr_dec_every", 100, "")
DEFINE_integer("child_num_layers", 5, "")
DEFINE_integer("child_num_cells", 5, "")
DEFINE_integer("child_filter_size", 5, "")
DEFINE_integer("child_out_filters", 48, "")
DEFINE_integer("child_out_filters_scale", 1, "")
DEFINE_integer("child_num_branches", 4, "")
DEFINE_integer("child_num_aggregate", None, "")
DEFINE_integer("child_num_replicas", 1, "")
DEFINE_integer("child_block_size", 3, "")
DEFINE_integer("child_lr_T_0", None, "for lr schedule")
DEFINE_integer("child_lr_T_mul", None, "for lr schedule")
DEFINE_integer("child_cutout_size", None, "CutOut size")
DEFINE_float("child_grad_bound", 5.0, "Gradient clipping")
DEFINE_float("child_lr", 0.1, "")
DEFINE_float("child_lr_dec_rate", 0.1, "")
DEFINE_float("child_keep_prob", 0.5, "")
DEFINE_float("child_drop_path_keep_prob", 1.0, "minimum drop_path_keep_prob")
DEFINE_float("child_l2_reg", 1e-4, "")
DEFINE_float("child_lr_max", None, "for lr schedule")
DEFINE_float("child_lr_min", None, "for lr schedule")
DEFINE_string("child_skip_pattern", None, "Must be ['dense', None]")
DEFINE_string("child_fixed_arc", None, "")
DEFINE_boolean("child_use_aux_heads", False, "Should we use an aux head")
DEFINE_boolean("child_sync_replicas", False, "To sync or not to sync.")
DEFINE_boolean("child_lr_cosine", False, "Use cosine lr schedule")
DEFINE_integer("log_every", 50, "How many steps to log")
DEFINE_integer("eval_every_epochs", 1, "How many epochs to eval")
import numpy as np
import tensorflow as tf
from tensorflow.python.training import moving_averages
def lstm(x, prev_c, prev_h, w):
ifog = tf.matmul(tf.concat([x, prev_h], axis=1), w)
i, f, o, g = tf.split(ifog, 4, axis=1)
i = tf.sigmoid(i)
f = tf.sigmoid(f)
o = tf.sigmoid(o)
g = tf.tanh(g)
next_c = i * g + f * prev_c
next_h = o * tf.tanh(next_c)
return next_c, next_h
def stack_lstm(x, prev_c, prev_h, w):
next_c, next_h = [], []
for layer_id, (_c, _h, _w) in enumerate(zip(prev_c, prev_h, w)):
inputs = x if layer_id == 0 else next_h[-1]
curr_c, curr_h = lstm(inputs, _c, _h, _w)
next_c.append(curr_c)
next_h.append(curr_h)
return next_c, next_h
def create_weight(name, shape, initializer=None, trainable=True, seed=None):
if initializer is None:
initializer = tf.contrib.keras.initializers.he_normal(seed=seed)
return tf.get_variable(name, shape, initializer=initializer, trainable=trainable)
def create_bias(name, shape, initializer=None):
if initializer is None:
initializer = tf.constant_initializer(0.0, dtype=tf.float32)
return tf.get_variable(name, shape, initializer=initializer)
def conv_op(inputs, filter_size, is_training, count, out_filters,
data_format, ch_mul=1, start_idx=None, separable=False):
"""
Args:
start_idx: where to start taking the output channels. if None, assuming
fixed_arc mode
count: how many output_channels to take.
"""
if data_format == "NHWC":
inp_c = inputs.get_shape()[3].value
elif data_format == "NCHW":
inp_c = inputs.get_shape()[1].value
with tf.variable_scope("inp_conv_1"):
w = create_weight("w", [1, 1, inp_c, out_filters])
x = tf.nn.conv2d(inputs, w, [1, 1, 1, 1],
"SAME", data_format=data_format)
x = batch_norm(x, is_training, data_format=data_format)
x = tf.nn.relu(x)
with tf.variable_scope("out_conv_{}".format(filter_size)):
if start_idx is None:
if separable:
w_depth = create_weight(
"w_depth", [filter_size, filter_size, out_filters, ch_mul])
w_point = create_weight(
"w_point", [1, 1, out_filters * ch_mul, count])
x = tf.nn.separable_conv2d(x, w_depth, w_point, strides=[1, 1, 1, 1],
padding="SAME", data_format=data_format)
x = batch_norm(
x, is_training, data_format=data_format)
else:
w = create_weight(
"w", [filter_size, filter_size, inp_c, count])
x = tf.nn.conv2d(
x, w, [1, 1, 1, 1], "SAME", data_format=data_format)
x = batch_norm(
x, is_training, data_format=data_format)
else:
if separable:
w_depth = create_weight(
"w_depth", [filter_size, filter_size, out_filters, ch_mul])
#test_depth = w_depth
w_point = create_weight(
"w_point", [out_filters, out_filters * ch_mul])
w_point = w_point[start_idx:start_idx+count, :]
w_point = tf.transpose(w_point, [1, 0])
w_point = tf.reshape(
w_point, [1, 1, out_filters * ch_mul, count])
x = tf.nn.separable_conv2d(x, w_depth, w_point, strides=[1, 1, 1, 1],
padding="SAME", data_format=data_format)
mask = tf.range(0, out_filters, dtype=tf.int32)
mask = tf.logical_and(
start_idx <= mask, mask < start_idx + count)
x = batch_norm_with_mask(
x, is_training, mask, out_filters, data_format=data_format)
else:
w = create_weight(
"w", [filter_size, filter_size, out_filters, out_filters])
w = tf.transpose(w, [3, 0, 1, 2])
w = w[start_idx:start_idx+count, :, :, :]
w = tf.transpose(w, [1, 2, 3, 0])
x = tf.nn.conv2d(
x, w, [1, 1, 1, 1], "SAME", data_format=data_format)
mask = tf.range(0, out_filters, dtype=tf.int32)
mask = tf.logical_and(
start_idx <= mask, mask < start_idx + count)
x = batch_norm_with_mask(
x, is_training, mask, out_filters, data_format=data_format)
x = tf.nn.relu(x)
return x
def pool_op(inputs, is_training, count, out_filters, avg_or_max, data_format, start_idx=None):
"""
Args:
start_idx: where to start taking the output channels. if None, assuming
fixed_arc mode
count: how many output_channels to take.
"""
if data_format == "NHWC":
inp_c = inputs.get_shape()[3].value
elif data_format == "NCHW":
inp_c = inputs.get_shape()[1].value
with tf.variable_scope("conv_1"):
w = create_weight("w", [1, 1, inp_c, out_filters])
x = tf.nn.conv2d(inputs, w, [1, 1, 1, 1],
"SAME", data_format=data_format)
x = batch_norm(x, is_training, data_format=data_format)
x = tf.nn.relu(x)
with tf.variable_scope("pool"):
if data_format == "NHWC":
actual_data_format = "channels_last"
elif data_format == "NCHW":
actual_data_format = "channels_first"
if avg_or_max == "avg":
x = tf.layers.average_pooling2d(
x, [3, 3], [1, 1], "SAME", data_format=actual_data_format)
elif avg_or_max == "max":
x = tf.layers.max_pooling2d(
x, [3, 3], [1, 1], "SAME", data_format=actual_data_format)
else:
raise ValueError("Unknown pool {}".format(avg_or_max))
if start_idx is not None:
if data_format == "NHWC":
x = x[:, :, :, start_idx: start_idx+count]
elif data_format == "NCHW":
x = x[:, start_idx: start_idx+count, :, :]
return x
def global_avg_pool(x, data_format="NHWC"):
if data_format == "NHWC":
x = tf.reduce_mean(x, [1, 2])
elif data_format == "NCHW":
x = tf.reduce_mean(x, [2, 3])
else:
raise NotImplementedError("Unknown data_format {}".format(data_format))
return x
def batch_norm(x, is_training, name="bn", decay=0.9, epsilon=1e-5,
data_format="NHWC"):
if data_format == "NHWC":
shape = [x.get_shape()[3]]
elif data_format == "NCHW":
shape = [x.get_shape()[1]]
else:
raise NotImplementedError("Unknown data_format {}".format(data_format))
with tf.variable_scope(name, reuse=None if is_training else True):
offset = tf.get_variable(
"offset", shape,
initializer=tf.constant_initializer(0.0, dtype=tf.float32))
scale = tf.get_variable(
"scale", shape,
initializer=tf.constant_initializer(1.0, dtype=tf.float32))
moving_mean = tf.get_variable(
"moving_mean", shape, trainable=False,
initializer=tf.constant_initializer(0.0, dtype=tf.float32))
moving_variance = tf.get_variable(
"moving_variance", shape, trainable=False,
initializer=tf.constant_initializer(1.0, dtype=tf.float32))
if is_training:
x, mean, variance = tf.nn.fused_batch_norm(
x, scale, offset, epsilon=epsilon, data_format=data_format,
is_training=True)
update_mean = moving_averages.assign_moving_average(
moving_mean, mean, decay)
update_variance = moving_averages.assign_moving_average(
moving_variance, variance, decay)
with tf.control_dependencies([update_mean, update_variance]):
x = tf.identity(x)
else:
x, _, _ = tf.nn.fused_batch_norm(x, scale, offset, mean=moving_mean,
variance=moving_variance,
epsilon=epsilon, data_format=data_format,
is_training=False)
return x
def batch_norm_with_mask(x, is_training, mask, num_channels, name="bn",
decay=0.9, epsilon=1e-3, data_format="NHWC"):
shape = [num_channels]
indices = tf.where(mask)
indices = tf.to_int32(indices)
indices = tf.reshape(indices, [-1])
with tf.variable_scope(name, reuse=None if is_training else True):
offset = tf.get_variable(
"offset", shape,
initializer=tf.constant_initializer(0.0, dtype=tf.float32))
scale = tf.get_variable(
"scale", shape,
initializer=tf.constant_initializer(1.0, dtype=tf.float32))
offset = tf.boolean_mask(offset, mask)
scale = tf.boolean_mask(scale, mask)
moving_mean = tf.get_variable(
"moving_mean", shape, trainable=False,
initializer=tf.constant_initializer(0.0, dtype=tf.float32))
moving_variance = tf.get_variable(
"moving_variance", shape, trainable=False,
initializer=tf.constant_initializer(1.0, dtype=tf.float32))
if is_training:
x, mean, variance = tf.nn.fused_batch_norm(
x, scale, offset, epsilon=epsilon, data_format=data_format,
is_training=True)
mean = (1.0 - decay) * (tf.boolean_mask(moving_mean, mask) - mean)
variance = (1.0 - decay) * \
(tf.boolean_mask(moving_variance, mask) - variance)
update_mean = tf.scatter_sub(
moving_mean, indices, mean, use_locking=True)
update_variance = tf.scatter_sub(
moving_variance, indices, variance, use_locking=True)
with tf.control_dependencies([update_mean, update_variance]):
x = tf.identity(x)
else:
masked_moving_mean = tf.boolean_mask(moving_mean, mask)
masked_moving_variance = tf.boolean_mask(moving_variance, mask)
x, _, _ = tf.nn.fused_batch_norm(x, scale, offset,
mean=masked_moving_mean,
variance=masked_moving_variance,
epsilon=epsilon, data_format=data_format,
is_training=False)
return x
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import numpy as np
import tensorflow as tf
user_flags = []
def DEFINE_string(name, default_value, doc_string):
tf.app.flags.DEFINE_string(name, default_value, doc_string)
global user_flags
user_flags.append(name)
def DEFINE_integer(name, default_value, doc_string):
tf.app.flags.DEFINE_integer(name, default_value, doc_string)
global user_flags
user_flags.append(name)
def DEFINE_float(name, default_value, doc_string):
tf.app.flags.DEFINE_float(name, default_value, doc_string)
global user_flags
user_flags.append(name)
def DEFINE_boolean(name, default_value, doc_string):
tf.app.flags.DEFINE_boolean(name, default_value, doc_string)
global user_flags
user_flags.append(name)
def print_user_flags(line_limit=80):
print("-" * 80)
global user_flags
FLAGS = tf.app.flags.FLAGS
for flag_name in sorted(user_flags):
value = "{}".format(getattr(FLAGS, flag_name))
log_string = flag_name
log_string += "." * (line_limit - len(flag_name) - len(value))
log_string += value
print(log_string)
def get_C(x, data_format):
"""
Args:
x: tensor of shape [N, H, W, C] or [N, C, H, W]
"""
if data_format == "NHWC":
return x.get_shape()[3].value
elif data_format == "NCHW":
return x.get_shape()[1].value
else:
raise ValueError(
"Unknown data_format '{0}'".format(data_format))
def get_HW(x, data_format):
"""
Args:
x: tensor of shape [N, H, W, C] or [N, C, H, W]
"""
return x.get_shape()[2].value
def get_strides(stride, data_format):
"""
Args:
x: tensor of shape [N, H, W, C] or [N, C, H, W]
"""
if data_format == "NHWC":
return [1, stride, stride, 1]
elif data_format == "NCHW":
return [1, 1, stride, stride]
else:
raise ValueError(
"Unknown data_format '{0}'".format(data_format))
class TextColors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
class Logger(object):
def __init__(self, output_file):
self.terminal = sys.stdout
self.log = open(output_file, "a")
def write(self, message):
self.terminal.write(message)
self.terminal.flush()
self.log.write(message)
self.log.flush()
def count_model_params(tf_variables):
"""
Args:
tf_variables: list of all model variables
"""
num_vars = 0
for var in tf_variables:
num_vars += np.prod([dim.value for dim in var.get_shape()])
return num_vars
def get_train_ops(
loss,
tf_variables,
train_step,
clip_mode=None,
grad_bound=None,
l2_reg=1e-4,
lr_warmup_val=None,
lr_warmup_steps=100,
lr_init=0.1,
lr_dec_start=0,
lr_dec_every=10000,
lr_dec_rate=0.1,
lr_dec_min=None,
lr_cosine=False,
lr_max=None,
lr_min=None,
lr_T_0=None,
lr_T_mul=None,
num_train_batches=None,
optim_algo=None,
sync_replicas=False,
num_aggregate=None,
num_replicas=None,
get_grad_norms=False,
moving_average=None):
"""
Args:
clip_mode: "global", "norm", or None.
moving_average: store the moving average of parameters
"""
if l2_reg > 0:
l2_losses = []
for var in tf_variables:
l2_losses.append(tf.reduce_sum(var ** 2))
l2_loss = tf.add_n(l2_losses)
loss += l2_reg * l2_loss
grads = tf.gradients(loss, tf_variables)
grad_norm = tf.global_norm(grads)
grad_norms = {}
for v, g in zip(tf_variables, grads):
if v is None or g is None:
continue
if isinstance(g, tf.IndexedSlices):
grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g.values ** 2))
else:
grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g ** 2))
if clip_mode is not None:
assert grad_bound is not None, "Need grad_bound to clip gradients."
if clip_mode == "global":
grads, _ = tf.clip_by_global_norm(grads, grad_bound)
elif clip_mode == "norm":
clipped = []
for g in grads:
if isinstance(g, tf.IndexedSlices):
c_g = tf.clip_by_norm(g.values, grad_bound)
c_g = tf.IndexedSlices(g.indices, c_g)
else:
c_g = tf.clip_by_norm(g, grad_bound)
clipped.append(g)
grads = clipped
else:
raise NotImplementedError("Unknown clip_mode {}".format(clip_mode))
if lr_cosine:
assert lr_max is not None, "Need lr_max to use lr_cosine"
assert lr_min is not None, "Need lr_min to use lr_cosine"
assert lr_T_0 is not None, "Need lr_T_0 to use lr_cosine"
assert lr_T_mul is not None, "Need lr_T_mul to use lr_cosine"
assert num_train_batches is not None, ("Need num_train_batches to use"
" lr_cosine")
curr_epoch = train_step // num_train_batches
last_reset = tf.Variable(0, dtype=tf.int32, trainable=False,
name="last_reset")
T_i = tf.Variable(lr_T_0, dtype=tf.int32, trainable=False, name="T_i")
T_curr = curr_epoch - last_reset
def _update():
update_last_reset = tf.assign(
last_reset, curr_epoch, use_locking=True)
update_T_i = tf.assign(T_i, T_i * lr_T_mul, use_locking=True)
with tf.control_dependencies([update_last_reset, update_T_i]):
rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926
lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate))
return lr
def _no_update():
rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926
lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate))
return lr
learning_rate = tf.cond(
tf.greater_equal(T_curr, T_i), _update, _no_update)
else:
learning_rate = tf.train.exponential_decay(
lr_init, tf.maximum(train_step - lr_dec_start, 0), lr_dec_every,
lr_dec_rate, staircase=True)
if lr_dec_min is not None:
learning_rate = tf.maximum(learning_rate, lr_dec_min)
if lr_warmup_val is not None:
learning_rate = tf.cond(tf.less(train_step, lr_warmup_steps),
lambda: lr_warmup_val, lambda: learning_rate)
if optim_algo == "momentum":
opt = tf.train.MomentumOptimizer(
learning_rate, 0.9, use_locking=True, use_nesterov=True)
elif optim_algo == "sgd":
opt = tf.train.GradientDescentOptimizer(
learning_rate, use_locking=True)
elif optim_algo == "adam":
opt = tf.train.AdamOptimizer(learning_rate, beta1=0.0, epsilon=1e-3,
use_locking=True)
else:
raise ValueError("Unknown optim_algo {}".format(optim_algo))
if sync_replicas:
assert num_aggregate is not None, "Need num_aggregate to sync."
assert num_replicas is not None, "Need num_replicas to sync."
opt = tf.train.SyncReplicasOptimizer(
opt,
replicas_to_aggregate=num_aggregate,
total_num_replicas=num_replicas,
use_locking=True)
if moving_average is not None:
opt = tf.contrib.opt.MovingAverageOptimizer(
opt, average_decay=moving_average)
train_op = opt.apply_gradients(
zip(grads, tf_variables), global_step=train_step)
if get_grad_norms:
return train_op, learning_rate, grad_norm, opt, grad_norms
else:
return train_op, learning_rate, grad_norm, opt
**Run ENAS in NNI**
===
Now we have an enas example [enas-nni](https://github.com/countif/enas_nni) run in NNI from our contributors.
Thanks our lovely contributors.
And welcome more and more people to join us!
**在 NNI 中运行 ENAS**
===
来自贡献者的 [enas-nni](https://github.com/countif/enas_nni) 可运行在 NNI 中。 非常感谢!
欢迎更多志愿者加入我们!
\ No newline at end of file
......@@ -48,9 +48,9 @@
"chai": "^4.1.2",
"eslint": "^6.7.2",
"glob": "^7.1.3",
"mocha": "^5.2.0",
"mocha": "^7.1.1",
"npx": "^10.2.0",
"nyc": "^13.1.0",
"nyc": "^15.0.0",
"request": "^2.87.0",
"rmdir": "^1.2.0",
"tmp": "^0.0.33",
......@@ -59,7 +59,6 @@
},
"resolutions": {
"mem": "^4.0.0",
"handlebars": "^4.5.3",
"lodash": "^4.17.13",
"lodash.merge": "^4.6.2",
"node.extend": "^1.1.7",
......
......@@ -69,7 +69,7 @@ export async function execMkdir(directory: string, share: boolean = false): Prom
*/
export async function execCopydir(source: string, destination: string): Promise<void> {
if (process.platform === 'win32') {
await cpp.exec(`powershell.exe Copy-Item "${source}" -Destination "${destination}" -Recurse`);
await cpp.exec(`powershell.exe Copy-Item "${source}\\*" -Destination "${destination}" -Recurse`);
} else {
await cpp.exec(`cp -r '${source}/.' '${destination}'`);
}
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment