"""A deep MNIST classifier using convolutional layers."""

import argparse
import logging
import math
import tempfile
import time

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

import operators as op

FLAGS = None

logger = logging.getLogger('mnist_AutoML')


class MnistNetwork(object):
    '''
    MnistNetwork is for initializing and building basic network for mnist.
    '''
    def __init__(self,
                 channel_1_num,
                 channel_2_num,
                 conv_size,
                 hidden_size,
                 pool_size,
                 learning_rate,
                 x_dim=784,
                 y_dim=10):
        self.channel_1_num = channel_1_num
        self.channel_2_num = channel_2_num
        self.conv_size = conv_size
        self.hidden_size = hidden_size
        self.pool_size = pool_size
        self.learning_rate = learning_rate
        self.x_dim = x_dim
        self.y_dim = y_dim

        self.images = tf.placeholder(tf.float32, [None, self.x_dim], name='input_x')
        self.labels = tf.placeholder(tf.float32, [None, self.y_dim], name='input_y')
        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')

        self.train_step = None
        self.accuracy = None

    def build_network(self):
        '''
        Building network for mnist, meanwhile specifying its neural architecture search space
        '''

        # Reshape to use within a convolutional neural net.
        # Last dimension is for "features" - there is only one here, since images are
        # grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
        with tf.name_scope('reshape'):
            try:
                input_dim = int(math.sqrt(self.x_dim))
            except:
                print(
                    'input dim cannot be sqrt and reshape. input dim: ' + str(self.x_dim))
                logger.debug(
                    'input dim cannot be sqrt and reshape. input dim: %s', str(self.x_dim))
                raise
            x_image = tf.reshape(self.images, [-1, input_dim, input_dim, 1])

        """@nni.mutable_layers(
            {
                layer_choice: [op.conv2d(size=1, in_ch=1, out_ch=self.channel_1_num),
                               op.conv2d(size=3, in_ch=1, out_ch=self.channel_1_num),
                               op.twice_conv2d(size=3, in_ch=1, out_ch=self.channel_1_num),
                               op.twice_conv2d(size=7, in_ch=1, out_ch=self.channel_1_num),
                               op.dilated_conv(in_ch=1, out_ch=self.channel_1_num),
                               op.separable_conv(size=3, in_ch=1, out_ch=self.channel_1_num),
                               op.separable_conv(size=5, in_ch=1, out_ch=self.channel_1_num),
                               op.separable_conv(size=7, in_ch=1, out_ch=self.channel_1_num)],
                fixed_inputs: [x_image],
                layer_output: conv1_out
            },
            {
                layer_choice: [op.post_process(ch_size=self.channel_1_num)],
                fixed_inputs: [conv1_out],
                layer_output: post1_out
            },
            {
                layer_choice: [op.max_pool(size=3),
                               op.max_pool(size=5),
                               op.max_pool(size=7),
                               op.avg_pool(size=3),
                               op.avg_pool(size=5),
                               op.avg_pool(size=7)],
                fixed_inputs: [post1_out],
                layer_output: pool1_out
            },
            {
                layer_choice: [op.conv2d(size=1, in_ch=self.channel_1_num, out_ch=self.channel_2_num),
                               op.conv2d(size=3, in_ch=self.channel_1_num, out_ch=self.channel_2_num),
                               op.twice_conv2d(size=3, in_ch=self.channel_1_num, out_ch=self.channel_2_num),
                               op.twice_conv2d(size=7, in_ch=self.channel_1_num, out_ch=self.channel_2_num),
                               op.dilated_conv(in_ch=self.channel_1_num, out_ch=self.channel_2_num),
                               op.separable_conv(size=3, in_ch=self.channel_1_num, out_ch=self.channel_2_num),
                               op.separable_conv(size=5, in_ch=self.channel_1_num, out_ch=self.channel_2_num),
                               op.separable_conv(size=7, in_ch=self.channel_1_num, out_ch=self.channel_2_num)],
                fixed_inputs: [pool1_out],
                optional_inputs: [post1_out],
                optional_input_size: [0, 1],
                layer_output: conv2_out
            },
            {
                layer_choice: [op.post_process(ch_size=self.channel_2_num)],
                fixed_inputs: [conv2_out],
                layer_output: post2_out
            },
            {
                layer_choice: [op.max_pool(size=3),
                               op.max_pool(size=5),
                               op.max_pool(size=7),
                               op.avg_pool(size=3),
                               op.avg_pool(size=5),
                               op.avg_pool(size=7)],
                fixed_inputs: [post2_out],
                optional_inputs: [post1_out, pool1_out],
                optional_input_size: [0, 1],
                layer_output: pool2_out
            }
        )"""

        # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
        # is down to 7x7x64 feature maps -- maps this to 1024 features.
        last_dim_list = pool2_out.get_shape().as_list()
        assert(last_dim_list[1] == last_dim_list[2])
        last_dim = last_dim_list[1]
        with tf.name_scope('fc1'):
            w_fc1 = op.weight_variable(
                [last_dim * last_dim * self.channel_2_num, self.hidden_size])
            b_fc1 = op.bias_variable([self.hidden_size])

        h_pool2_flat = tf.reshape(
            pool2_out, [-1, last_dim * last_dim * self.channel_2_num])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1)

        # Dropout - controls the complexity of the model, prevents co-adaptation of features.
        with tf.name_scope('dropout'):
            h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)

        # Map the 1024 features to 10 classes, one for each digit
        with tf.name_scope('fc2'):
            w_fc2 = op.weight_variable([self.hidden_size, self.y_dim])
            b_fc2 = op.bias_variable([self.y_dim])
            y_conv = tf.matmul(h_fc1_drop, w_fc2) + b_fc2

        with tf.name_scope('loss'):
            cross_entropy = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(labels=self.labels, logits=y_conv))
        with tf.name_scope('adam_optimizer'):
            self.train_step = tf.train.AdamOptimizer(
                self.learning_rate).minimize(cross_entropy)

        with tf.name_scope('accuracy'):
            correct_prediction = tf.equal(
                tf.argmax(y_conv, 1), tf.argmax(self.labels, 1))
            self.accuracy = tf.reduce_mean(
                tf.cast(correct_prediction, tf.float32))


def download_mnist_retry(data_dir, max_num_retries=20):
    """Try to download mnist dataset and avoid errors"""
    for _ in range(max_num_retries):
        try:
            return input_data.read_data_sets(data_dir, one_hot=True)
        except tf.errors.AlreadyExistsError:
            time.sleep(1)
    raise Exception("Failed to download MNIST.")

def main(params):
    '''
    Main function, build mnist network, run and send result to NNI.
    '''
    # Import data
    mnist = download_mnist_retry(params['data_dir'])
    print('Mnist download data done.')
    logger.debug('Mnist download data done.')

    # Create the model
    # Build the graph for the deep net
    mnist_network = MnistNetwork(channel_1_num=params['channel_1_num'],
                                 channel_2_num=params['channel_2_num'],
                                 conv_size=params['conv_size'],
                                 hidden_size=params['hidden_size'],
                                 pool_size=params['pool_size'],
                                 learning_rate=params['learning_rate'])
    mnist_network.build_network()
    logger.debug('Mnist build network done.')

    # Write log
    graph_location = tempfile.mkdtemp()
    logger.debug('Saving graph to: %s', graph_location)
    train_writer = tf.summary.FileWriter(graph_location)
    train_writer.add_graph(tf.get_default_graph())

    test_acc = 0.0
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(params['batch_num']):
            batch = mnist.train.next_batch(params['batch_size'])
            mnist_network.train_step.run(feed_dict={mnist_network.images: batch[0],
                                                    mnist_network.labels: batch[1],
                                                    mnist_network.keep_prob: 1 - params['dropout_rate']}
                                        )

            if i % 100 == 0:
                test_acc = mnist_network.accuracy.eval(
                    feed_dict={mnist_network.images: mnist.test.images,
                               mnist_network.labels: mnist.test.labels,
                               mnist_network.keep_prob: 1.0})

                """@nni.report_intermediate_result(test_acc)"""
                logger.debug('test accuracy %g', test_acc)
                logger.debug('Pipe send intermediate result done.')

        test_acc = mnist_network.accuracy.eval(
            feed_dict={mnist_network.images: mnist.test.images,
                       mnist_network.labels: mnist.test.labels,
                       mnist_network.keep_prob: 1.0})

        """@nni.report_final_result(test_acc)"""
        logger.debug('Final result is %g', test_acc)
        logger.debug('Send final result done.')

def get_params():
    ''' Get parameters from command line '''
    parser = argparse.ArgumentParser()
    parser.add_argument("--data_dir", type=str, default='/tmp/tensorflow/mnist/input_data', help="data directory")
    parser.add_argument("--dropout_rate", type=float, default=0.5, help="dropout rate")
    parser.add_argument("--channel_1_num", type=int, default=32)
    parser.add_argument("--channel_2_num", type=int, default=64)
    parser.add_argument("--conv_size", type=int, default=5)
    parser.add_argument("--pool_size", type=int, default=2)
    parser.add_argument("--hidden_size", type=int, default=1024)
    parser.add_argument("--learning_rate", type=float, default=1e-4)
    parser.add_argument("--batch_num", type=int, default=2000)
    parser.add_argument("--batch_size", type=int, default=32)

    args, _ = parser.parse_known_args()
    return args

if __name__ == '__main__':
    try:
        params = vars(get_params())
        main(params)
    except Exception as exception:
        logger.exception(exception)
        raise