add nas example using general nas programming interface (#1130)

update searchspace_generator, add example, update NAS example

add nas example using general nas programming interface (#1130)
update searchspace_generator, add example, update NAS example
a8233663 · QuanluZhang · GitHub · 2653f2d6 · a8233663 · a8233663
Unverified Commit a8233663 authored Jun 03, 2019 by QuanluZhang Committed by GitHub Jun 03, 2019
8 changed files
--- a/examples/trials/mnist-nas/config.yml
+++ b/examples/trials/mnist-nas/config.yml
+authorName: default
+experimentName: example_mnist
+trialConcurrency: 1
+maxExecDuration: 1h
+maxTrialNum: 10
+#choice: local, remote, pai
+trainingServicePlatform: local
+#choice: true, false
+useAnnotation: true
+tuner:
+  #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
+  #SMAC (SMAC should be installed through nnictl)
+  #codeDir: ~/nni/nni/examples/tuners/random_nas_tuner
+  codeDir: ../../tuners/random_nas_tuner
+  classFileName: random_nas_tuner.py
+  className: RandomNASTuner
+trial:
+  command: python3 mnist.py
+  codeDir: .
+  gpuNum: 0
--- a/examples/trials/mnist-nas/mnist.py
+++ b/examples/trials/mnist-nas/mnist.py
+"""A deep MNIST classifier using convolutional layers."""
+
+import argparse
+import logging
+import math
+import tempfile
+import time
+
+import tensorflow as tf
+from tensorflow.examples.tutorials.mnist import input_data
+
+import nni
+import operators as op
+
+FLAGS = None
+
+logger = logging.getLogger('mnist_AutoML')
+
+
+class MnistNetwork(object):
+    '''
+    MnistNetwork is for initializing and building basic network for mnist.
+    '''
+    def __init__(self,
+                 channel_1_num,
+                 channel_2_num,
+                 conv_size,
+                 hidden_size,
+                 pool_size,
+                 learning_rate,
+                 x_dim=784,
+                 y_dim=10):
+        self.channel_1_num = channel_1_num
+        self.channel_2_num = channel_2_num
+        self.conv_size = conv_size
+        self.hidden_size = hidden_size
+        self.pool_size = pool_size
+        self.learning_rate = learning_rate
+        self.x_dim = x_dim
+        self.y_dim = y_dim
+
+        self.images = tf.placeholder(tf.float32, [None, self.x_dim], name='input_x')
+        self.labels = tf.placeholder(tf.float32, [None, self.y_dim], name='input_y')
+        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
+
+        self.train_step = None
+        self.accuracy = None
+
+    def build_network(self):
+        '''
+        Building network for mnist, meanwhile specifying its neural architecture search space
+        '''
+
+        # Reshape to use within a convolutional neural net.
+        # Last dimension is for "features" - there is only one here, since images are
+        # grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
+        with tf.name_scope('reshape'):
+            try:
+                input_dim = int(math.sqrt(self.x_dim))
+            except:
+                print(
+                    'input dim cannot be sqrt and reshape. input dim: ' + str(self.x_dim))
+                logger.debug(
+                    'input dim cannot be sqrt and reshape. input dim: %s', str(self.x_dim))
+                raise
+            x_image = tf.reshape(self.images, [-1, input_dim, input_dim, 1])
+
+        """@nni.mutable_layers(
+            {
+                layer_choice: [op.conv2d(size=1, in_ch=1, out_ch=self.channel_1_num),
+                               op.conv2d(size=3, in_ch=1, out_ch=self.channel_1_num),
+                               op.twice_conv2d(size=3, in_ch=1, out_ch=self.channel_1_num),
+                               op.twice_conv2d(size=7, in_ch=1, out_ch=self.channel_1_num),
+                               op.dilated_conv(in_ch=1, out_ch=self.channel_1_num),
+                               op.separable_conv(size=3, in_ch=1, out_ch=self.channel_1_num),
+                               op.separable_conv(size=5, in_ch=1, out_ch=self.channel_1_num),
+                               op.separable_conv(size=7, in_ch=1, out_ch=self.channel_1_num)],
+                fixed_inputs: [x_image],
+                layer_output: conv1_out
+            },
+            {
+                layer_choice: [op.post_process(ch_size=self.channel_1_num)],
+                fixed_inputs: [conv1_out],
+                layer_output: post1_out
+            },
+            {
+                layer_choice: [op.max_pool(size=3),
+                               op.max_pool(size=5),
+                               op.max_pool(size=7),
+                               op.avg_pool(size=3),
+                               op.avg_pool(size=5),
+                               op.avg_pool(size=7)],
+                fixed_inputs: [post1_out],
+                layer_output: pool1_out
+            },
+            {
+                layer_choice: [op.conv2d(size=1, in_ch=self.channel_1_num, out_ch=self.channel_2_num),
+                               op.conv2d(size=3, in_ch=self.channel_1_num, out_ch=self.channel_2_num),
+                               op.twice_conv2d(size=3, in_ch=self.channel_1_num, out_ch=self.channel_2_num),
+                               op.twice_conv2d(size=7, in_ch=self.channel_1_num, out_ch=self.channel_2_num),
+                               op.dilated_conv(in_ch=self.channel_1_num, out_ch=self.channel_2_num),
+                               op.separable_conv(size=3, in_ch=self.channel_1_num, out_ch=self.channel_2_num),
+                               op.separable_conv(size=5, in_ch=self.channel_1_num, out_ch=self.channel_2_num),
+                               op.separable_conv(size=7, in_ch=self.channel_1_num, out_ch=self.channel_2_num)],
+                fixed_inputs: [pool1_out],
+                optional_inputs: [post1_out],
+                optional_input_size: [0, 1],
+                layer_output: conv2_out
+            },
+            {
+                layer_choice: [op.post_process(ch_size=self.channel_2_num)],
+                fixed_inputs: [conv2_out],
+                layer_output: post2_out
+            },
+            {
+                layer_choice: [op.max_pool(size=3),
+                               op.max_pool(size=5),
+                               op.max_pool(size=7),
+                               op.avg_pool(size=3),
+                               op.avg_pool(size=5),
+                               op.avg_pool(size=7)],
+                fixed_inputs: [post2_out],
+                optional_inputs: [post1_out, pool1_out],
+                optional_input_size: [0, 1],
+                layer_output: pool2_out
+            }
+        )"""
+
+        # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
+        # is down to 7x7x64 feature maps -- maps this to 1024 features.
+        last_dim_list = pool2_out.get_shape().as_list()
+        assert(last_dim_list[1] == last_dim_list[2])
+        last_dim = last_dim_list[1]
+        with tf.name_scope('fc1'):
+            w_fc1 = op.weight_variable(
+                [last_dim * last_dim * self.channel_2_num, self.hidden_size])
+            b_fc1 = op.bias_variable([self.hidden_size])
+
+        h_pool2_flat = tf.reshape(
+            pool2_out, [-1, last_dim * last_dim * self.channel_2_num])
+        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1)
+
+        # Dropout - controls the complexity of the model, prevents co-adaptation of features.
+        with tf.name_scope('dropout'):
+            h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)
+
+        # Map the 1024 features to 10 classes, one for each digit
+        with tf.name_scope('fc2'):
+            w_fc2 = op.weight_variable([self.hidden_size, self.y_dim])
+            b_fc2 = op.bias_variable([self.y_dim])
+            y_conv = tf.matmul(h_fc1_drop, w_fc2) + b_fc2
+
+        with tf.name_scope('loss'):
+            cross_entropy = tf.reduce_mean(
+                tf.nn.softmax_cross_entropy_with_logits(labels=self.labels, logits=y_conv))
+        with tf.name_scope('adam_optimizer'):
+            self.train_step = tf.train.AdamOptimizer(
+                self.learning_rate).minimize(cross_entropy)
+
+        with tf.name_scope('accuracy'):
+            correct_prediction = tf.equal(
+                tf.argmax(y_conv, 1), tf.argmax(self.labels, 1))
+            self.accuracy = tf.reduce_mean(
+                tf.cast(correct_prediction, tf.float32))
+
+
+def download_mnist_retry(data_dir, max_num_retries=20):
+    """Try to download mnist dataset and avoid errors"""
+    for _ in range(max_num_retries):
+        try:
+            return input_data.read_data_sets(data_dir, one_hot=True)
+        except tf.errors.AlreadyExistsError:
+            time.sleep(1)
+    raise Exception("Failed to download MNIST.")
+
+def main(params):
+    '''
+    Main function, build mnist network, run and send result to NNI.
+    '''
+    # Import data
+    mnist = download_mnist_retry(params['data_dir'])
+    print('Mnist download data done.')
+    logger.debug('Mnist download data done.')
+
+    # Create the model
+    # Build the graph for the deep net
+    mnist_network = MnistNetwork(channel_1_num=params['channel_1_num'],
+                                 channel_2_num=params['channel_2_num'],
+                                 conv_size=params['conv_size'],
+                                 hidden_size=params['hidden_size'],
+                                 pool_size=params['pool_size'],
+                                 learning_rate=params['learning_rate'])
+    mnist_network.build_network()
+    logger.debug('Mnist build network done.')
+
+    # Write log
+    graph_location = tempfile.mkdtemp()
+    logger.debug('Saving graph to: %s', graph_location)
+    train_writer = tf.summary.FileWriter(graph_location)
+    train_writer.add_graph(tf.get_default_graph())
+
+    test_acc = 0.0
+    with tf.Session() as sess:
+        sess.run(tf.global_variables_initializer())
+        for i in range(params['batch_num']):
+            batch = mnist.train.next_batch(params['batch_size'])
+            mnist_network.train_step.run(feed_dict={mnist_network.images: batch[0],
+                                                    mnist_network.labels: batch[1],
+                                                    mnist_network.keep_prob: 1 - params['dropout_rate']}
+                                        )
+
+            if i % 100 == 0:
+                test_acc = mnist_network.accuracy.eval(
+                    feed_dict={mnist_network.images: mnist.test.images,
+                               mnist_network.labels: mnist.test.labels,
+                               mnist_network.keep_prob: 1.0})
+
+                nni.report_intermediate_result(test_acc)
+                logger.debug('test accuracy %g', test_acc)
+                logger.debug('Pipe send intermediate result done.')
+
+        test_acc = mnist_network.accuracy.eval(
+            feed_dict={mnist_network.images: mnist.test.images,
+                       mnist_network.labels: mnist.test.labels,
+                       mnist_network.keep_prob: 1.0})
+
+        nni.report_final_result(test_acc)
+        logger.debug('Final result is %g', test_acc)
+        logger.debug('Send final result done.')
+
+def get_params():
+    ''' Get parameters from command line '''
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data_dir", type=str, default='/tmp/tensorflow/mnist/input_data', help="data directory")
+    parser.add_argument("--dropout_rate", type=float, default=0.5, help="dropout rate")
+    parser.add_argument("--channel_1_num", type=int, default=32)
+    parser.add_argument("--channel_2_num", type=int, default=64)
+    parser.add_argument("--conv_size", type=int, default=5)
+    parser.add_argument("--pool_size", type=int, default=2)
+    parser.add_argument("--hidden_size", type=int, default=1024)
+    parser.add_argument("--learning_rate", type=float, default=1e-4)
+    parser.add_argument("--batch_num", type=int, default=2000)
+    parser.add_argument("--batch_size", type=int, default=32)
+
+    args, _ = parser.parse_known_args()
+    return args
+
+if __name__ == '__main__':
+    try:
+        params = vars(get_params())
+        main(params)
+    except Exception as exception:
+        logger.exception(exception)
+        raise
--- a/examples/trials/mnist-nas/operators.py
+++ b/examples/trials/mnist-nas/operators.py
+import tensorflow as tf
+import math
+
+
+def weight_variable(shape):
+    """weight_variable generates a weight variable of a given shape."""
+    initial = tf.truncated_normal(shape, stddev=0.1)
+    return tf.Variable(initial)
+
+def bias_variable(shape):
+    """bias_variable generates a bias variable of a given shape."""
+    initial = tf.constant(0.1, shape=shape)
+    return tf.Variable(initial)
+
+def sum_op(inputs):
+    """sum_op"""
+    fixed_input = inputs[0][0]
+    optional_input = inputs[1][0]
+    fixed_shape = fixed_input.get_shape().as_list()
+    optional_shape = optional_input.get_shape().as_list()
+    assert fixed_shape[1] == fixed_shape[2]
+    assert optional_shape[1] == optional_shape[2]
+    pool_size = math.ceil(optional_shape[1] / fixed_shape[1])
+    pool_out = tf.nn.avg_pool(optional_input, ksize=[1, pool_size, pool_size, 1], strides=[1, pool_size, pool_size, 1], padding='SAME')
+    conv_matrix = weight_variable([1, 1, optional_shape[3], fixed_shape[3]])
+    conv_out = tf.nn.conv2d(pool_out, conv_matrix, strides=[1, 1, 1, 1], padding='SAME')
+    return fixed_input + conv_out
+
+
+def conv2d(inputs, size=-1, in_ch=-1, out_ch=-1):
+    """conv2d returns a 2d convolution layer with full stride."""
+    if not inputs[1]:
+        x_input = inputs[0][0]
+    else:
+        x_input = sum_op(inputs)
+    if size in [1, 3]:
+        w_matrix = weight_variable([size, size, in_ch, out_ch])
+        return tf.nn.conv2d(x_input, w_matrix, strides=[1, 1, 1, 1], padding='SAME')
+    else:
+        raise Exception("Unknown filter size: %d." % size)
+
+def twice_conv2d(inputs, size=-1, in_ch=-1, out_ch=-1):
+    """twice_conv2d"""
+    if not inputs[1]:
+        x_input = inputs[0][0]
+    else:
+        x_input = sum_op(inputs)
+    if size in  [3, 7]:
+        w_matrix1 = weight_variable([1, size, in_ch, int(out_ch/2)])
+        out = tf.nn.conv2d(x_input, w_matrix1, strides=[1, 1, 1, 1], padding='SAME')
+        w_matrix2 = weight_variable([size, 1, int(out_ch/2), out_ch])
+        return tf.nn.conv2d(out, w_matrix2, strides=[1, 1, 1, 1], padding='SAME')
+    else:
+        raise Exception("Unknown filter size: %d." % size)
+
+def dilated_conv(inputs, size=3, in_ch=-1, out_ch=-1):
+    """dilated_conv"""
+    if not inputs[1]:
+        x_input = inputs[0][0]
+    else:
+        x_input = sum_op(inputs)
+    if size == 3:
+        w_matrix = weight_variable([size, size, in_ch, out_ch])
+        return tf.nn.atrous_conv2d(x_input, w_matrix, rate=2, padding='SAME')
+    else:
+        raise Exception("Unknown filter size: %d." % size)
+
+def separable_conv(inputs, size=-1, in_ch=-1, out_ch=-1):
+    """separable_conv"""
+    if not inputs[1]:
+        x_input = inputs[0][0]
+    else:
+        x_input = sum_op(inputs)
+    if size in [3, 5, 7]:
+        depth_matrix = weight_variable([size, size, in_ch, 1])
+        point_matrix = weight_variable([1, 1, 1*in_ch, out_ch])
+        return tf.nn.separable_conv2d(x_input, depth_matrix, point_matrix, strides=[1, 1, 1, 1], padding='SAME')
+    else:
+        raise Exception("Unknown filter size: %d." % size)
+
+
+def avg_pool(inputs, size=-1):
+    """avg_pool downsamples a feature map."""
+    if not inputs[1]:
+        x_input = inputs[0][0]
+    else:
+        x_input = sum_op(inputs)
+    if size in [3, 5, 7]:
+        return tf.nn.avg_pool(x_input, ksize=[1, size, size, 1], strides=[1, size, size, 1], padding='SAME')
+    else:
+        raise Exception("Unknown filter size: %d." % size)
+
+def max_pool(inputs, size=-1):
+    """max_pool downsamples a feature map."""
+    if not inputs[1]:
+        x_input = inputs[0][0]
+    else:
+        x_input = sum_op(inputs)
+    if size in [3, 5, 7]:
+        return tf.nn.max_pool(x_input, ksize=[1, size, size, 1], strides=[1, size, size, 1], padding='SAME')
+    else:
+        raise Exception("Unknown filter size: %d." % size)
+
+
+def post_process(inputs, ch_size=-1):
+    """post_process"""
+    x_input = inputs[0][0]
+    bias_matrix = bias_variable([ch_size])
+    return tf.nn.relu(x_input + bias_matrix)
--- a/examples/trials/NAS/README.md
+++ b/examples/trials/NAS/README.md
- **Run Neural Network Architecture Search in NNI**	
- ===	
- 
-Now we have an NAS example [NNI-NAS-Example](https://github.com/Crysple/NNI-NAS-Example) run in NNI using NAS interface from our contributors.	
- 
-Thanks our lovely contributors. 	
- 
-And welcome more and more people to join us!
+ **Run Neural Network Architecture Search in NNI**	
+ ===	
+ 
+Now we have an NAS example [NNI-NAS-Example](https://github.com/Crysple/NNI-NAS-Example) run in NNI using NAS interface from our contributors.	
+ 
+Thanks our lovely contributors. 	
+ 
+And welcome more and more people to join us!
\ No newline at end of file
--- a/examples/tuners/random_nas_tuner/random_nas_tuner.py
+++ b/examples/tuners/random_nas_tuner/random_nas_tuner.py
+import numpy as np
+
+from nni.tuner import Tuner
+
+def random_archi_generator(nas_ss, random_state):
+    '''random
+    '''
+    chosen_archi = {}
+    print("zql: nas search space: ", nas_ss)
+    for block_name, block in nas_ss.items():
+        tmp_block = {}
+        for layer_name, layer in block.items():
+            tmp_layer = {}
+            for key, value in layer.items():
+                if key == 'layer_choice':
+                    index = random_state.randint(len(value))
+                    tmp_layer['chosen_layer'] = value[index]
+                elif key == 'optional_inputs':
+                    tmp_layer['chosen_inputs'] = []
+                    print("zql: optional_inputs", layer['optional_inputs'])
+                    if layer['optional_inputs']:
+                        if isinstance(layer['optional_input_size'], int):
+                            choice_num = layer['optional_input_size']
+                        else:
+                            choice_range = layer['optional_input_size']
+                            choice_num = random_state.randint(choice_range[0], choice_range[1]+1)
+                        for _ in range(choice_num):
+                            index = random_state.randint(len(layer['optional_inputs']))
+                            tmp_layer['chosen_inputs'].append(layer['optional_inputs'][index])
+                elif key == 'optional_input_size':
+                    pass
+                else:
+                    raise ValueError('Unknown field %s in layer %s of block %s' % (key, layer_name, block_name))
+            tmp_block[layer_name] = tmp_layer
+        chosen_archi[block_name] = tmp_block
+    return chosen_archi
+
+class RandomNASTuner(Tuner):
+    '''RandomNASTuner
+    '''
+
+    def __init__(self):
+        self.searchspace_json = None
+        self.random_state = None
+
+    def update_search_space(self, search_space):
+        '''update
+        '''
+        self.searchspace_json = search_space
+        self.random_state = np.random.RandomState()
+
+    def generate_parameters(self, parameter_id):
+        '''generate
+        '''
+        return random_archi_generator(self.searchspace_json, self.random_state)
+
+    def receive_trial_result(self, parameter_id, parameters, value):
+        '''receive
+        '''
+        pass
--- a/src/sdk/pynni/nni/smartparam.py
+++ b/src/sdk/pynni/nni/smartparam.py
@@ -124,7 +124,7 @@ else:
            funcs_args,
            fixed_inputs,
            optional_inputs,
-            optional_input_size=0):
+            optional_input_size):
        '''execute the chosen function and inputs.
        Below is an example of chosen function and inputs:
        {

--- a/tools/nni_annotation/code_generator.py
+++ b/tools/nni_annotation/code_generator.py
@@ -79,7 +79,7 @@ def parse_annotation_mutable_layers(code, lineno):
                fields['optional_inputs'] = True
            elif k.id == 'optional_input_size':
                assert not fields['optional_input_size'], 'Duplicated field: optional_input_size'
-                assert type(value) is ast.Num, 'Value of optional_input_size should be a number'
+                assert type(value) is ast.Num or type(value) is ast.List, 'Value of optional_input_size should be a number or list'
                optional_input_size = value
                fields['optional_input_size'] = True
            elif k.id == 'layer_output':
@@ -102,13 +102,14 @@ def parse_annotation_mutable_layers(code, lineno):
        if fields['fixed_inputs']:
            target_call_args.append(fixed_inputs)
        else:
-            target_call_args.append(ast.NameConstant(value=None))
+            target_call_args.append(ast.List(elts=[]))
        if fields['optional_inputs']:
            target_call_args.append(optional_inputs)
            assert fields['optional_input_size'], 'optional_input_size must exist when optional_inputs exists'
            target_call_args.append(optional_input_size)
        else:
-            target_call_args.append(ast.NameConstant(value=None))
+            target_call_args.append(ast.Dict(keys=[], values=[]))
+            target_call_args.append(ast.Num(n=0))
        target_call = ast.Call(func=target_call_attr, args=target_call_args, keywords=[])
        node = ast.Assign(targets=[layer_output], value=target_call)
        nodes.append(node)

--- a/tools/nni_annotation/search_space_generator.py
+++ b/tools/nni_annotation/search_space_generator.py
@@ -54,12 +54,14 @@ class SearchSpaceGenerator(ast.NodeTransformer):
    def generate_mutable_layer_search_space(self, args):
        mutable_block = args[0].s
        mutable_layer = args[1].s
-        if mutable_block not in self.search_space:
-            self.search_space[mutable_block] = dict()
-        self.search_space[mutable_block][mutable_layer] = {
-            'layer_choice': [key.s for key in args[2].keys],
-            'optional_inputs': [key.s for key in args[5].keys],
-            'optional_input_size': args[6].n
+        key = self.module_name + '/' + mutable_block
+        args[0].s = key
+        if key not in self.search_space:
+            self.search_space[key] = dict()
+        self.search_space[key][mutable_layer] = {
+            'layer_choice': [k.s for k in args[2].keys],
+            'optional_inputs': [k.s for k in args[5].keys],
+            'optional_input_size': args[6].n if isinstance(args[6], ast.Num) else [args[6].elts[0].n, args[6].elts[1].n]
        }