Merge pull request #138 from Microsoft/master

merge master

Merge pull request #138 from Microsoft/master
merge master
21165b53 · SparkSnail · GitHub · 41a9a598 · f10c3311 · 21165b53
Unverified Commit 21165b53 authored Mar 07, 2019 by SparkSnail Committed by GitHub Mar 07, 2019
17 changed files
--- a/examples/trials/mnist-distributed/dist_mnist.py
+++ b/examples/trials/mnist-distributed/dist_mnist.py
@@ -54,16 +54,19 @@ import nni
 flags = tf.app.flags
 flags.DEFINE_string("data_dir", "/tmp/mnist-data",
                    "Directory for storing mnist data")
-flags.DEFINE_boolean("download_only", False,
+flags.DEFINE_boolean(
+    "download_only", False,
    "Only perform downloading of data; Do not proceed to "
    "session preparation, model definition or training")
-flags.DEFINE_integer("task_index", None,
-                     "Worker task index, should be >= 0. task_index=0 is "
+flags.DEFINE_integer(
+    "task_index", None, "Worker task index, should be >= 0. task_index=0 is "
    "the master worker task the performs the variable "
    "initialization ")
-flags.DEFINE_integer("num_gpus", 1, "Total number of gpus for each machine."
+flags.DEFINE_integer(
+    "num_gpus", 1, "Total number of gpus for each machine."
    "If you don't use GPU, please set it to '0'")
-flags.DEFINE_integer("replicas_to_aggregate", None,
+flags.DEFINE_integer(
+    "replicas_to_aggregate", None,
    "Number of replicas to aggregate before parameter update"
    "is applied (For sync_replicas mode only; default: "
    "num_workers)")
@@ -96,6 +99,7 @@ IMAGE_PIXELS = 28
 #       {'cluster': cluster,
 #        'task': {'type': 'worker', 'index': 1}})

+
 def generate_default_params():
    '''
    Generate default hyper parameters
@@ -106,6 +110,15 @@ def generate_default_params():
        'hidden_units': 100,
    }

+def download_mnist_retry(data_dir, max_num_retries=20):
+    """Try to download mnist dataset and avoid errors"""
+    for _ in range(max_num_retries):
+        try:
+            return input_data.read_data_sets(data_dir, one_hot=True)
+        except tf.errors.AlreadyExistsError:
+            time.sleep(1)
+    raise Exception("Failed to download MNIST.")
+
 def main(unused_argv):
    # Receive NNI hyper parameter and update it onto default params
    RECEIVED_PARAMS = nni.get_next_parameter()
@@ -124,7 +137,7 @@ def main(unused_argv):
    FLAGS.job_name = task_type
    FLAGS.task_index = task_index

-  mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
+    mnist = download_mnist_retry(FLAGS.data_dir)
    if FLAGS.download_only:
        sys.exit(0)

@@ -206,7 +219,8 @@ def main(unused_argv):
        hid = tf.nn.relu(hid_lin)

        y = tf.nn.softmax(tf.nn.xw_plus_b(hid, sm_w, sm_b))
-    cross_entropy = -tf.reduce_sum(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)))
+        cross_entropy = -tf.reduce_sum(
+            y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)))

        opt = tf.train.AdamOptimizer(PARAMS['learning_rate'])

@@ -258,8 +272,9 @@ def main(unused_argv):
        sess_config = tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=False,
-        device_filters=["/job:ps",
-                        "/job:worker/task:%d" % FLAGS.task_index])
+            device_filters=[
+                "/job:ps", "/job:worker/task:%d" % FLAGS.task_index
+            ])

        # The chief worker (task_index==0) session will prepare the session,
        # while the remaining workers will wait for the preparation to complete.
@@ -273,9 +288,11 @@ def main(unused_argv):
            server_grpc_url = "grpc://" + worker_spec[FLAGS.task_index]
            print("Using existing server at: %s" % server_grpc_url)

-      sess = sv.prepare_or_wait_for_session(server_grpc_url, config=sess_config)
+            sess = sv.prepare_or_wait_for_session(
+                server_grpc_url, config=sess_config)
        else:
-      sess = sv.prepare_or_wait_for_session(server.target, config=sess_config)
+            sess = sv.prepare_or_wait_for_session(
+                server.target, config=sess_config)

        print("Worker %d: Session initialization complete." % FLAGS.task_index)

@@ -302,9 +319,14 @@ def main(unused_argv):
                  (now, FLAGS.task_index, local_step, step))

            if step > 0 and step % 5000 == 0 and is_chief:
-        val_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
+                val_feed = {
+                    x: mnist.validation.images,
+                    y_: mnist.validation.labels
+                }
                interim_val_xent = sess.run(cross_entropy, feed_dict=val_feed)
-        print("After %d training step(s), validation cross entropy = %g" % (step, interim_val_xent))
+                print(
+                    "After %d training step(s), validation cross entropy = %g"
+                    % (step, interim_val_xent))

                # Only chief worker can report intermediate metrics
                nni.report_intermediate_result(interim_val_xent)

--- a/examples/trials/mnist-hyperband/mnist.py
+++ b/examples/trials/mnist-hyperband/mnist.py
@@ -3,8 +3,9 @@
 import logging
 import math
 import tempfile
-import tensorflow as tf
+import time

+import tensorflow as tf
 from tensorflow.examples.tutorials.mnist import input_data

 import nni
@@ -142,13 +143,21 @@ def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

+def download_mnist_retry(data_dir, max_num_retries=20):
+    """Try to download mnist dataset and avoid errors"""
+    for _ in range(max_num_retries):
+        try:
+            return input_data.read_data_sets(data_dir, one_hot=True)
+        except tf.errors.AlreadyExistsError:
+            time.sleep(1)
+    raise Exception("Failed to download MNIST.")

 def main(params):
    '''
    Main function, build mnist network, run and send result to NNI.
    '''
    # Import data
-    mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
+    mnist = download_mnist_retry(params['data_dir'])
    print('Mnist download data done.')
    logger.debug('Mnist download data done.')


--- a/examples/trials/mnist/mnist.py
+++ b/examples/trials/mnist/mnist.py
@@ -4,8 +4,9 @@ import argparse
 import logging
 import math
 import tempfile
-import tensorflow as tf
+import time

+import tensorflow as tf
 from tensorflow.examples.tutorials.mnist import input_data

 import nni
@@ -143,13 +144,21 @@ def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

+def download_mnist_retry(data_dir, max_num_retries=20):
+    """Try to download mnist dataset and avoid errors"""
+    for _ in range(max_num_retries):
+        try:
+            return input_data.read_data_sets(data_dir, one_hot=True)
+        except tf.errors.AlreadyExistsError:
+            time.sleep(1)
+    raise Exception("Failed to download MNIST.")

 def main(params):
    '''
    Main function, build mnist network, run and send result to NNI.
    '''
    # Import data
-    mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
+    mnist = download_mnist_retry(params['data_dir'])
    print('Mnist download data done.')
    logger.debug('Mnist download data done.')


--- a/examples/trials/mnist/mnist_before.py
+++ b/examples/trials/mnist/mnist_before.py
@@ -3,8 +3,9 @@ import argparse
 import logging
 import math
 import tempfile
-import tensorflow as tf
+import time

+import tensorflow as tf
 from tensorflow.examples.tutorials.mnist import input_data

 FLAGS = None
@@ -143,13 +144,21 @@ def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

+def download_mnist_retry(data_dir, max_num_retries=20):
+    """Try to download mnist dataset and avoid errors"""
+    for _ in range(max_num_retries):
+        try:
+            return input_data.read_data_sets(data_dir, one_hot=True)
+        except tf.errors.AlreadyExistsError:
+            time.sleep(1)
+    raise Exception("Failed to download MNIST.")

 def main(params):
    '''
    Main function, build mnist network, run and send result to NNI.
    '''
    # Import data
-    mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
+    mnist = download_mnist_retry(params['data_dir'])
    print('Mnist download data done.')
    logger.debug('Mnist download data done.')


--- a/src/sdk/pynni/nni/curvefitting_assessor/curvefitting_assessor.py
+++ b/src/sdk/pynni/nni/curvefitting_assessor/curvefitting_assessor.py
@@ -57,8 +57,8 @@ class CurvefittingAssessor(Assessor):
        self.threshold = threshold
        # Record the number of gap
        self.gap = gap
-        # Record the number of times of judgments
-        self.judgment_num = 0
+        # Record the number of intermediate result in the lastest judgment
+        self.last_judgment_num = dict()
        # Record the best performance
        self.set_best_performance = False
        self.completed_best_performance = None
@@ -112,9 +112,10 @@ class CurvefittingAssessor(Assessor):
        curr_step = len(trial_history)
        if curr_step < self.start_step:
            return AssessResult.Good
-        if (curr_step - self.start_step) // self.gap <= self.judgment_num:
+        
+        if trial_job_id in self.last_judgment_num.keys() and curr_step - self.last_judgment_num[trial_job_id] < self.gap:
            return AssessResult.Good
-        self.judgment_num = (curr_step - self.start_step) // self.gap
+        self.last_judgment_num[trial_job_id] = curr_step

        try:
            start_time = datetime.datetime.now()

--- a/src/sdk/pynni/nni/metis_tuner/Regression_GP/OutlierDetection.py
+++ b/src/sdk/pynni/nni/metis_tuner/Regression_GP/OutlierDetection.py
@@ -39,7 +39,7 @@ def _outlierDetection_threaded(inputs):
    outlier = None

    # Create a diagnostic regression model which removes the sample that we want to evaluate
-    diagnostic_regressor_gp = gp_create_model.createModel(\
+    diagnostic_regressor_gp = gp_create_model.create_model(\
                                    samples_x[0:samples_idx] + samples_x[samples_idx + 1:],\
                                    samples_y_aggregation[0:samples_idx] + samples_y_aggregation[samples_idx + 1:])
    mu, sigma = gp_prediction.predict(samples_x[samples_idx], diagnostic_regressor_gp['model'])
@@ -81,7 +81,7 @@ def outlierDetection(samples_x, samples_y_aggregation):
    for samples_idx in range(0, len(samples_x)):
        #sys.stderr.write("[%s] DEBUG: Evaluating %d of %d samples\n"
        #  \ % (os.path.basename(__file__), samples_idx + 1, len(samples_x)))
-        diagnostic_regressor_gp = gp_create_model.createModel(\
+        diagnostic_regressor_gp = gp_create_model.create_model(\
                                        samples_x[0:samples_idx] + samples_x[samples_idx + 1:],\
                                        samples_y_aggregation[0:samples_idx] + samples_y_aggregation[samples_idx + 1:])
        mu, sigma = gp_prediction.predict(samples_x[samples_idx],
@@ -95,5 +95,3 @@ def outlierDetection(samples_x, samples_y_aggregation):

    outliers = None if len(outliers) == 0 else outliers
    return outliers
-
-    
\ No newline at end of file
--- a/src/sdk/pynni/nni/metis_tuner/metis_tuner.py
+++ b/src/sdk/pynni/nni/metis_tuner/metis_tuner.py
@@ -24,22 +24,20 @@ import os
 import random
 import statistics
 import sys
-
-import numpy as np
-
 from enum import Enum, unique
 from multiprocessing.dummy import Pool as ThreadPool

-from nni.tuner import Tuner
+import numpy as np

-import nni.metis_tuner.lib_data as lib_data
 import nni.metis_tuner.lib_constraint_summation as lib_constraint_summation
-import nni.metis_tuner.Regression_GP.CreateModel as gp_create_model
-import nni.metis_tuner.Regression_GP.Selection as gp_selection
-import nni.metis_tuner.Regression_GP.Prediction as gp_prediction
-import nni.metis_tuner.Regression_GP.OutlierDetection as gp_outlier_detection
+import nni.metis_tuner.lib_data as lib_data
 import nni.metis_tuner.Regression_GMM.CreateModel as gmm_create_model
 import nni.metis_tuner.Regression_GMM.Selection as gmm_selection
+import nni.metis_tuner.Regression_GP.CreateModel as gp_create_model
+import nni.metis_tuner.Regression_GP.OutlierDetection as gp_outlier_detection
+import nni.metis_tuner.Regression_GP.Prediction as gp_prediction
+import nni.metis_tuner.Regression_GP.Selection as gp_selection
+from nni.tuner import Tuner

 logger = logging.getLogger("Metis_Tuner_AutoML")

@@ -67,7 +65,7 @@ class MetisTuner(Tuner):
    """

    def __init__(self, optimize_mode="maximize", no_resampling=True, no_candidates=True,
-                 selection_num_starting_points=10, cold_start_num=10):
+                 selection_num_starting_points=600, cold_start_num=10, exploration_probability=0.1):
        """
        Parameters
        ----------
@@ -89,11 +87,15 @@ class MetisTuner(Tuner):
        cold_start_num: int
            Metis need some trial result to get cold start. when the number of trial result is less than
        cold_start_num, Metis will randomly sample hyper-parameter for trial.
+
+        exploration_probability: float
+            The probability of Metis to select parameter from exploration instead of exploitation.
        """

        self.samples_x = []
        self.samples_y = []
        self.samples_y_aggregation = []
+        self.history_parameters = []
        self.space = None
        self.no_resampling = no_resampling
        self.no_candidates = no_candidates
@@ -101,6 +103,7 @@ class MetisTuner(Tuner):
        self.key_order = []
        self.cold_start_num = cold_start_num
        self.selection_num_starting_points = selection_num_starting_points
+        self.exploration_probability = exploration_probability
        self.minimize_constraints_fun = None
        self.minimize_starting_points = None

@@ -128,7 +131,7 @@ class MetisTuner(Tuner):
                except Exception as ex:
                    logger.exception(ex)
                    raise RuntimeError("The format search space contains \
-                                        some key that didn't define in key_order.")
+                                        some key that didn't define in key_order."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  )

                if key_type == 'quniform':
                    if key_range[2] == 1:
@@ -200,6 +203,8 @@ class MetisTuner(Tuner):
            init_parameter = _rand_init(self.x_bounds, self.x_types, 1)[0]
            results = self._pack_output(init_parameter)
        else:
+            self.minimize_starting_points = _rand_init(self.x_bounds, self.x_types, \
+                                                       self.selection_num_starting_points)
            results = self._selection(self.samples_x, self.samples_y_aggregation, self.samples_y,
                                      self.x_bounds, self.x_types,
                                      threshold_samplessize_resampling=(None if self.no_resampling is True else 50),
@@ -245,7 +250,7 @@ class MetisTuner(Tuner):

            # calculate y aggregation
            median = get_median(temp_y)
-            self.samples_y_aggregation[idx] = median
+            self.samples_y_aggregation[idx] = [median]
        else:
            self.samples_x.append(sample_x)
            self.samples_y.append([value])
@@ -268,8 +273,12 @@ class MetisTuner(Tuner):
        # ===== STEP 1: Compute the current optimum =====
        #sys.stderr.write("[%s] Predicting the optimal configuration from the current training dataset...\n" % (os.path.basename(__file__)))
        gp_model = gp_create_model.create_model(samples_x, samples_y_aggregation)
-        lm_current = gp_selection.selection("lm", samples_y_aggregation, x_bounds,
-                                                       x_types, gp_model['model'],
+        lm_current = gp_selection.selection(
+            "lm",
+            samples_y_aggregation,
+            x_bounds,
+            x_types,
+            gp_model['model'],
            minimize_starting_points,
            minimize_constraints_fun=minimize_constraints_fun)
        if not lm_current:
@@ -284,8 +293,12 @@ class MetisTuner(Tuner):
            # ===== STEP 2: Get recommended configurations for exploration =====
            #sys.stderr.write("[%s] Getting candidates for exploration...\n"
            #% \(os.path.basename(__file__)))
-            results_exploration = gp_selection.selection("lc", samples_y_aggregation,
-                                                                    x_bounds, x_types, gp_model['model'],
+            results_exploration = gp_selection.selection(
+                "lc",
+                samples_y_aggregation,
+                x_bounds,
+                x_types,
+                gp_model['model'],
                minimize_starting_points,
                minimize_constraints_fun=minimize_constraints_fun)

@@ -308,7 +321,8 @@ class MetisTuner(Tuner):
                print("Getting candidates for exploitation...\n")
                try:
                    gmm = gmm_create_model.create_model(samples_x, samples_y_aggregation)
-                    results_exploitation = gmm_selection.selection(x_bounds,
+                    results_exploitation = gmm_selection.selection(
+                        x_bounds,
                        x_types,
                        gmm['clusteringmodel_good'],
                        gmm['clusteringmodel_bad'],
@@ -340,8 +354,6 @@ class MetisTuner(Tuner):
                results_outliers = gp_outlier_detection.outlierDetection_threaded(samples_x, samples_y_aggregation)

                if results_outliers is not None:
-                    #temp = len(candidates)
-
                    for results_outlier in results_outliers:
                        if _num_past_samples(samples_x[results_outlier['samples_idx']], samples_x, samples_y) < max_resampling_per_x:
                            candidates.append({'hyperparameter': samples_x[results_outlier['samples_idx']],\
@@ -357,7 +369,10 @@ class MetisTuner(Tuner):
                logger.info("Evaluating information gain of %d candidates...\n")
                next_improvement = 0

-                threads_inputs = [[candidate, samples_x, samples_y, x_bounds, x_types, minimize_constraints_fun, minimize_starting_points] for candidate in candidates]
+                threads_inputs = [[
+                    candidate, samples_x, samples_y, x_bounds, x_types,
+                    minimize_constraints_fun, minimize_starting_points
+                ] for candidate in candidates]
                threads_pool = ThreadPool(4)
                # Evaluate what would happen if we actually sample each candidate
                threads_results = threads_pool.map(_calculate_lowest_mu_threaded, threads_inputs)
@@ -370,19 +385,21 @@ class MetisTuner(Tuner):
                        temp_improvement = threads_result['expected_lowest_mu'] - lm_current['expected_mu']

                        if next_improvement > temp_improvement:
-                            # logger.info("DEBUG: \"next_candidate\" changed: \
-                            #                 lowest mu might reduce from %f (%s) to %f (%s), %s\n" %\
-                            #                 lm_current['expected_mu'], str(lm_current['hyperparameter']),\
-                            #                 threads_result['expected_lowest_mu'],\
-                            #                 str(threads_result['candidate']['hyperparameter']),\
-                            #                 threads_result['candidate']['reason'])
+                            logger.info("DEBUG: \"next_candidate\" changed: \
+                                            lowest mu might reduce from %f (%s) to %f (%s), %s\n"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          %\
+                                            lm_current['expected_mu'], str(lm_current['hyperparameter']),\
+                                            threads_result['expected_lowest_mu'],\
+                                            str(threads_result['candidate']['hyperparameter']),\
+                                            threads_result['candidate']['reason'])

                            next_improvement = temp_improvement
                            next_candidate = threads_result['candidate']
            else:
                # ===== STEP 6: If we have no candidates, randomly pick one =====
-                logger.info("DEBUG: No candidates from exploration, exploitation,\
-                                 and resampling. We will random a candidate for next_candidate\n")
+                logger.info(
+                    "DEBUG: No candidates from exploration, exploitation,\
+                                 and resampling. We will random a candidate for next_candidate\n"
+                )

                next_candidate = _rand_with_constraints(x_bounds, x_types) \
                                    if minimize_starting_points is None else minimize_starting_points[0]
@@ -391,7 +408,16 @@ class MetisTuner(Tuner):
                next_candidate = {'hyperparameter': next_candidate, 'reason': "random",
                                  'expected_mu': expected_mu, 'expected_sigma': expected_sigma}

+        # ===== STEP 7: If current optimal hyperparameter occurs in the history or exploration probability is less than the threshold, take next config as exploration step  =====
        outputs = self._pack_output(lm_current['hyperparameter'])
+        ap = random.uniform(0, 1)
+        if outputs in self.history_parameters or ap<=self.exploration_probability:
+            if next_candidate is not None:
+                outputs = self._pack_output(next_candidate['hyperparameter'])
+            else:
+                random_parameter = _rand_init(self.x_bounds, self.x_types, 1)[0]
+                outputs = self._pack_output(random_parameter)
+        self.history_parameters.append(outputs)
        return outputs


@@ -437,8 +463,12 @@ def _calculate_lowest_mu_threaded(inputs):
        # Aggregates multiple observation of the sample sampling points
        temp_y_aggregation = [statistics.median(temp_sample_y) for temp_sample_y in temp_samples_y]
        temp_gp = gp_create_model.create_model(temp_samples_x, temp_y_aggregation)
-        temp_results = gp_selection.selection("lm", temp_y_aggregation,
-                                                         x_bounds, x_types, temp_gp['model'],
+        temp_results = gp_selection.selection(
+            "lm",
+            temp_y_aggregation,
+            x_bounds,
+            x_types,
+            temp_gp['model'],
            minimize_starting_points,
            minimize_constraints_fun=minimize_constraints_fun)


--- a/src/webui/src/components/TrialsDetail.tsx
+++ b/src/webui/src/components/TrialsDetail.tsx
@@ -92,7 +92,7 @@ class TrialsDetail extends React.Component<{}, TrialDetailState> {
                            formatter: function (data: TooltipForAccuracy) {
                                const result = '<div class="tooldetailAccuracy">' +
                                    '<div>Trial No: ' + data.data[0] + '</div>' +
-                                    '<div>Default Metrc: ' + data.data[1] + '</div>' +
+                                    '<div>Default Metric: ' + data.data[1] + '</div>' +
                                    '<div>Parameters: ' +
                                    '<pre>' + JSON.stringify(data.data[2], null, 4) + '</pre>' +
                                    '</div>' +

--- a/test/config_test/examples/mnist-annotation.test.yml
+++ b/test/config_test/examples/mnist-annotation.test.yml
 authorName: nni
 experimentName: default_test
 maxExecDuration: 5m
-maxTrialNum: 2
-trialConcurrency: 1
+maxTrialNum: 4
+trialConcurrency: 2

 tuner:
  builtinTunerName: Random

--- a/test/config_test/examples/mnist.test.yml
+++ b/test/config_test/examples/mnist.test.yml
 authorName: nni
 experimentName: default_test
 maxExecDuration: 5m
-maxTrialNum: 2
-trialConcurrency: 1
+maxTrialNum: 4
+trialConcurrency: 2
 searchSpacePath: ./mnist_search_space.json

 tuner:

--- a/test/config_test/multi_phase/multi_phase.test.yml
+++ b/test/config_test/multi_phase/multi_phase.test.yml
 authorName: nni
 experimentName: default_test
 maxExecDuration: 5m
-maxTrialNum: 16
-trialConcurrency: 8
+maxTrialNum: 8
+trialConcurrency: 4
 searchSpacePath: ./search_space.json

 tuner:

--- a/test/pipelines-it-kubeflow.yml
+++ b/test/pipelines-it-kubeflow.yml
+# Copyright (c) Microsoft Corporation
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge,
+# to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and
+# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
 jobs:
 - job: 'integration_test_kubeflow'
  timeoutInMinutes: 0
-  pool: 'NNI CI KUBE CLI'
-
-  variables:
-    new_docker_img: msranni/nni.it.kb:latest

  steps:
  - script: python3 -m pip install --upgrade pip setuptools --user
@@ -19,6 +35,12 @@ jobs:
    displayName: 'build nni bdsit_wheel'

  - script: |
+      source install.sh
+    displayName: 'Install nni toolkit via source code'
+
+  - script: |
+      if [ $(build_docker_img) = 'true' ]
+      then
        cd deployment/pypi
        docker login -u $(docker_hub_user) -p $(docker_hub_pwd)

@@ -26,20 +48,11 @@ jobs:
        # update Dockerfile to install NNI in docker image from whl file built in last step
        sed -ie 's/RUN python3 -m pip --no-cache-dir install nni/COPY .\/dist\/* .\nRUN python3 -m pip install nni-*.whl/' ../docker/Dockerfile
        cat ../docker/Dockerfile
-      echo $IMG_TAG
-      docker build -f ../docker/Dockerfile -t $(new_docker_img) .
-      docker push $(new_docker_img)
-    condition: eq( variables['build_docker_img'], 'true' )
-    displayName: 'build and upload nni docker image'
-
-  - script: |
-      source install.sh
-    displayName: 'Install nni toolkit via source code'
-
-  - script: |
-      if [ $(build_docker_img) = 'true' ]
-      then
-        export TEST_IMG=$(new_docker_img)
+        export IMG_TAG=`date -u +%y%m%d%H%M`
+        docker build -f ../docker/Dockerfile -t $(test_docker_img_name):$IMG_TAG .
+        docker push $(test_docker_img_name):$IMG_TAG
+        export TEST_IMG=$(test_docker_img_name):$IMG_TAG
+        cd ../../
      else
        export TEST_IMG=$(existing_docker_img)
      fi

--- a/test/pipelines-it-pai.yml
+++ b/test/pipelines-it-pai.yml
+# Copyright (c) Microsoft Corporation
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge,
+# to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and
+# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
 jobs:
 - job: 'integration_test_pai'
  timeoutInMinutes: 0
-  pool: 'NNI CI PAI CLI'
-
-  variables:
-    new_docker_img: msranni/nni.it.pai:latest

  steps:
  - script: python3 -m pip install --upgrade pip setuptools --user
@@ -19,30 +35,30 @@ jobs:
    displayName: 'build nni bdsit_wheel'

  - script: |
+      source install.sh
+    displayName: 'Install nni toolkit via source code'
+
+  - script: |
+      if [ $(build_docker_img) = 'true' ]
+      then
        cd deployment/pypi
        docker login -u $(docker_hub_user) -p $(docker_hub_pwd)
-
        echo 'updating docker file for installing nni from local...'
        # update Dockerfile to install NNI in docker image from whl file built in last step
        sed -ie 's/RUN python3 -m pip --no-cache-dir install nni/COPY .\/dist\/* .\nRUN python3 -m pip install nni-*.whl/' ../docker/Dockerfile
        cat ../docker/Dockerfile
-      echo $IMG_TAG
-      docker build -f ../docker/Dockerfile -t $(new_docker_img) .
-      docker push $(new_docker_img)
-    condition: eq( variables['build_docker_img'], 'true' )
-    displayName: 'build and upload nni docker image'
+        export IMG_TAG=`date -u +%y%m%d%H%M`

-  - script: |
-      source install.sh
-    displayName: 'Install nni toolkit via source code'
+        echo 'build and upload docker image'
+        docker build -f ../docker/Dockerfile -t $(test_docker_img_name):$IMG_TAG .
+        docker push $(test_docker_img_name):$IMG_TAG

-  - script: |
-      if [ $(build_docker_img) = 'true' ]
-      then
-        export TEST_IMG=$(new_docker_img)
+        export TEST_IMG=$(test_docker_img_name):$IMG_TAG
+        cd ../../
      else
        export TEST_IMG=$(existing_docker_img)
      fi
+
      echo "TEST_IMG:$TEST_IMG"
      cd test
      python3 generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --pai_pwd $(pai_pwd) \

--- a/tools/nni_annotation/README_zh_CN.md
+++ b/tools/nni_annotation/README_zh_CN.md
-# NNI Annotation 介绍
+# NNI Annotation

-为了获得良好的用户体验并减少用户负担，NNI 设计了通过注释来使用的语法。
+## 概述

-使用 NNI 时，只需要:
+为了获得良好的用户体验并减少对以后代码的影响，NNI 设计了通过 Annotation（标记）来使用的语法。 通过 Annotation，只需要在代码中加入一些注释字符串，就能启用 NNI，完全不影响代码原先的执行逻辑。

-1. 在超参变量前加上如下标记：
+样例如下：

-    '''@nni.variable(nni.choice(2,3,5,7),name=self.conv_size)'''
+```python
+'''@nni.variable(nni.choice(0.1, 0.01, 0.001), name=learning_rate)'''
+learning_rate = 0.1
+```

-2. 在中间结果前加上：
+此样例中，NNI 会从 (0.1, 0.01, 0.001) 中选择一个值赋给 learning_rate 变量。 第一行就是 NNI 的 Annotation，是 Python 中的一个字符串。 接下来的一行需要是赋值语句。 NNI 会根据 Annotation 行的信息，来给这一行的变量赋上相应的值。

-    '''@nni.report_intermediate_result(test_acc)'''
+通过这种方式，不需要修改任何代码，代码既可以直接运行，又可以使用 NNI 来调参。

-3. 在输出结果前加上：
+## Annotation 的类型：

-    '''@nni.report_final_result(test_acc)'''
+NNI 中，有 4 种类型的 Annotation；

-4. 在代码中使用函数 `function_choice`：
+### 1. 变量

-    '''@nni.function_choice(max_pool(h_conv1, self.pool_size),avg_pool(h_conv1, self.pool_size),name=max_pool)'''
+`'''@nni.variable(sampling_algo, name)'''`

-通过这种方法，能够轻松的在 NNI 中实现自动调参。
+`@nni.variable` 用来标记变量。

-`@nni.variable`, `nni.choice` 为搜索空间的类型，通过以下 10 种方法来定义搜索空间：
+**参数**

-1. `@nni.variable(nni.choice(option1,option2,...,optionN),name=variable)`  
-    变量值是选项中的一种，这些变量可以是任意的表达式。
+- **sampling_algo**: 指定搜索空间的采样算法。 可将其换成 NNI 支持的其它采样函数，函数要以 `nni.` 开头。例如，`choice` 或 `uniform`，详见 [SearchSpaceSpec](https://nni.readthedocs.io/zh/latest/SearchSpaceSpec.html)。 
+- **name**: 将被赋值的变量名称。 注意，此参数应该与下面一行等号左边的值相同。

-2. `@nni.variable(nni.randint(upper),name=variable)`  
-    变量可以是范围 [0, upper) 中的任意整数。
+NNI 支持如下 10 种类型来表示搜索空间：

-3. `@nni.variable(nni.uniform(low, high),name=variable)`  
-    变量值会是 low 和 high 之间均匀分布的某个值。
+- `@nni.variable(nni.choice(option1,option2,...,optionN),name=variable)` 变量值是选项中的一种，这些变量可以是任意的表达式。
+- `@nni.variable(nni.randint(upper),name=variable)` 变量可以是范围 [0, upper) 中的任意整数。
+- `@nni.variable(nni.uniform(low, high),name=variable)` 变量值会是 low 和 high 之间均匀分布的某个值。
+- `@nni.variable(nni.quniform(low, high, q),name=variable)` 变量值会是 low 和 high 之间均匀分布的某个值，公式为：round(uniform(low, high) / q) * q
+- `@nni.variable(nni.loguniform(low, high),name=variable)` 变量值是 exp(uniform(low, high)) 的点，数值以对数均匀分布。
+- `@nni.variable(nni.qloguniform(low, high, q),name=variable)` 变量值会是 low 和 high 之间均匀分布的某个值，公式为：round(exp(uniform(low, high)) / q) * q
+- `@nni.variable(nni.normal(mu, sigma),name=variable)` 变量值为正态分布的实数值，平均值为 mu，标准方差为 sigma。
+- `@nni.variable(nni.qnormal(mu, sigma, q),name=variable)` 变量值分布的公式为： round(normal(mu, sigma) / q) * q
+- `@nni.variable(nni.lognormal(mu, sigma),name=variable)` 变量值分布的公式为： exp(normal(mu, sigma))
+- `@nni.variable(nni.qlognormal(mu, sigma, q),name=variable)` 变量值分布的公式为： round(exp(normal(mu, sigma)) / q) * q

-4. `@nni.variable(nni.quniform(low, high, q),name=variable)`  
-    变量值会是 low 和 high 之间均匀分布的某个值，公式为：round(uniform(low, high) / q) * q
+样例如下：

-5. `@nni.variable(nni.loguniform(low, high),name=variable)`  
-    变量值是 exp(uniform(low, high)) 的点，数值以对数均匀分布。
+```python
+'''@nni.variable(nni.choice(0.1, 0.01, 0.001), name=learning_rate)'''
+learning_rate = 0.1
+```

-6. `@nni.variable(nni.qloguniform(low, high, q),name=variable)`  
-    变量值会是 low 和 high 之间均匀分布的某个值，公式为：round(exp(uniform(low, high)) / q) * q
+### 2. 函数

-7. `@nni.variable(nni.normal(label, mu, sigma),name=variable)`  
-    变量值为正态分布的实数值，平均值为 mu，标准方差为 sigma。
+`'''@nni.function_choice(*functions, name)'''`

-8. `@nni.variable(nni.qnormal(label, mu, sigma, q),name=variable)`  
-    变量值分布的公式为： round(normal(mu, sigma) / q) * q
+`@nni.function_choice` 可以从几个函数中选择一个来执行。

-9. `@nni.variable(nni.lognormal(label, mu, sigma),name=variable)`  
-    变量值分布的公式为： exp(normal(mu, sigma))
+**参数**

-10. `@nni.variable(nni.qlognormal(label, mu, sigma, q),name=variable)`  
-    变量值分布的公式为： round(exp(normal(mu, sigma)) / q) * q
\ No newline at end of file
+- **functions**: 可选择的函数。 注意，必须是包括参数的完整函数调用。 例如 `max_pool(hidden_layer, pool_size)`。
+- **name**: 将被替换的函数名称。
+
+例如：
+
+```python
+"""@nni.function_choice(max_pool(hidden_layer, pool_size), avg_pool(hidden_layer, pool_size), name=max_pool)"""
+h_pooling = max_pool(hidden_layer, pool_size)
+```
+
+### 3. 中间结果
+
+`'''@nni.report_intermediate_result(metrics)'''`
+
+`@nni.report_intermediate_result` 用来返回中间结果，这和 [Trials.md](https://nni.readthedocs.io/zh/latest/Trials.html) 中的 `nni.report_intermediate_result` 用法一样。
+
+### 4. 最终结果
+
+`'''@nni.report_final_result(metrics)'''`
+
+`@nni.report_final_result` 用来返回当前 Trial 的最终结果，这和 [Trials.md](https://nni.readthedocs.io/zh/latest/Trials.html) 中的 `nni.report_final_result` 用法一样。
\ No newline at end of file
--- a/tools/nni_annotation/examples/mnist_generated.py
+++ b/tools/nni_annotation/examples/mnist_generated.py
-import nni
 """A deep MNIST classifier using convolutional layers."""
 import logging
 import math
 import tempfile
+import time
+
 import tensorflow as tf
 from tensorflow.examples.tutorials.mnist import input_data
+
+import nni
+
 FLAGS = None
 logger = logging.getLogger('mnist_AutoML')

@@ -123,12 +127,23 @@ def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

+def download_mnist_retry(data_dir, max_num_retries=20):
+    """Try to download mnist dataset and avoid errors"""
+    for _ in range(max_num_retries):
+        try:
+            return input_data.read_data_sets(data_dir, one_hot=True)
+        except tf.errors.AlreadyExistsError:
+            time.sleep(1)
+    raise Exception("Failed to download MNIST.")

 def main(params):
    """
    Main function, build mnist network, run and send result to NNI.
    """
-    mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
+
+def main(params):
+    # Import data
+    mnist = download_mnist_retry(params['data_dir'])
    print('Mnist download data done.')
    logger.debug('Mnist download data done.')
    mnist_network = MnistNetwork(channel_1_num=params['channel_1_num'],

--- a/tools/nni_annotation/examples/mnist_with_annotation.py
+++ b/tools/nni_annotation/examples/mnist_with_annotation.py
@@ -21,8 +21,9 @@
 import logging
 import math
 import tempfile
-import tensorflow as tf
+import time

+import tensorflow as tf
 from tensorflow.examples.tutorials.mnist import input_data

 FLAGS = None
@@ -168,13 +169,21 @@ def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

+def download_mnist_retry(data_dir, max_num_retries=20):
+    """Try to download mnist dataset and avoid errors"""
+    for _ in range(max_num_retries):
+        try:
+            return input_data.read_data_sets(data_dir, one_hot=True)
+        except tf.errors.AlreadyExistsError:
+            time.sleep(1)
+    raise Exception("Failed to download MNIST.")

 def main(params):
    '''
    Main function, build mnist network, run and send result to NNI.
    '''
    # Import data
-    mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
+    mnist = download_mnist_retry(params['data_dir'])
    print('Mnist download data done.')
    logger.debug('Mnist download data done.')


--- a/tools/nni_annotation/examples/mnist_without_annotation.py
+++ b/tools/nni_annotation/examples/mnist_without_annotation.py
@@ -21,8 +21,9 @@
 import logging
 import math
 import tempfile
-import tensorflow as tf
+import time

+import tensorflow as tf
 from tensorflow.examples.tutorials.mnist import input_data

 import nni
@@ -172,13 +173,21 @@ def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

+def download_mnist_retry(data_dir, max_num_retries=20):
+    """Try to download mnist dataset and avoid errors"""
+    for _ in range(max_num_retries):
+        try:
+            return input_data.read_data_sets(data_dir, one_hot=True)
+        except tf.errors.AlreadyExistsError:
+            time.sleep(1)
+    raise Exception("Failed to download MNIST.")

 def main(params):
    '''
    Main function, build mnist network, run and send result to NNI.
    '''
    # Import data
-    mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
+    mnist = download_mnist_retry(params['data_dir'])
    print('Mnist download data done.')
    logger.debug('Mnist download data done.')