Merge pull request #214 from microsoft/master

merge master

Merge pull request #214 from microsoft/master
merge master
7620e7c5 · SparkSnail · GitHub · c037a7c1 · 187494aa · 7620e7c5
Unverified Commit 7620e7c5 authored Nov 14, 2019 by SparkSnail Committed by GitHub Nov 14, 2019
20 changed files
--- a/src/sdk/pynni/nni/gridsearch_tuner/gridsearch_tuner.py
+++ b/src/sdk/pynni/nni/gridsearch_tuner/gridsearch_tuner.py
@@ -17,10 +17,10 @@
 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-'''
+"""
 gridsearch_tuner.py including:
    class GridSearchTuner
-'''
+"""

 import copy
 import logging
@@ -37,29 +37,40 @@ VALUE = '_value'
 logger = logging.getLogger('grid_search_AutoML')

 class GridSearchTuner(Tuner):
-    '''
+    """
    GridSearchTuner will search all the possible configures that the user define in the searchSpace.
-    The only acceptable types of search space are 'choice', 'quniform', 'randint'
+    The only acceptable types of search space are ``choice``, ``quniform``, ``randint``

-    Type 'choice' will select one of the options. Note that it can also be nested.
+    Type ``choice`` will select one of the options. Note that it can also be nested.

-    Type 'quniform' will receive three values [low, high, q], where [low, high] specifies a range and 'q' specifies the interval
-    It will be sampled in a way that the first sampled value is 'low',
+    Type ``quniform`` will receive three values [``low``, ``high``, ``q``],
+    where [``low``, ``high``] specifies a range and ``q`` specifies the interval.
+    It will be sampled in a way that the first sampled value is ``low``,
    and each of the following values is 'interval' larger than the value in front of it.

-    Type 'randint' gives all possible intergers in range[low, high). Note that 'high' is not included.
-    '''
+    Type ``randint`` gives all possible intergers in range[``low``, ``high``). Note that ``high`` is not included.
+    """

    def __init__(self):
        self.count = -1
        self.expanded_search_space = []
        self.supplement_data = dict()

-    def json2parameter(self, ss_spec):
-        '''
-        generate all possible configs for hyperparameters from hyperparameter space.
-        ss_spec: hyperparameter space
-        '''
+    def _json2parameter(self, ss_spec):
+        """
+        Generate all possible configs for hyperparameters from hyperparameter space.
+
+        Parameters
+        ----------
+        ss_spec : dict or list
+            Hyperparameter space or the ``_value`` of a hyperparameter
+
+        Returns
+        -------
+        list or dict
+            All the candidate choices of hyperparameters. for a hyperparameter, chosen_params
+            is a list. for multiple hyperparameters (e.g., search space), chosen_params is a dict.
+        """
        if isinstance(ss_spec, dict):
            if '_type' in ss_spec.keys():
                _type = ss_spec['_type']
@@ -67,7 +78,7 @@ class GridSearchTuner(Tuner):
                chosen_params = list()
                if _type == 'choice':
                    for value in _value:
-                        choice = self.json2parameter(value)
+                        choice = self._json2parameter(value)
                        if isinstance(choice, list):
                            chosen_params.extend(choice)
                        else:
@@ -81,12 +92,12 @@ class GridSearchTuner(Tuner):
            else:
                chosen_params = dict()
                for key in ss_spec.keys():
-                    chosen_params[key] = self.json2parameter(ss_spec[key])
-                return self.expand_parameters(chosen_params)
+                    chosen_params[key] = self._json2parameter(ss_spec[key])
+                return self._expand_parameters(chosen_params)
        elif isinstance(ss_spec, list):
            chosen_params = list()
            for subspec in ss_spec[1:]:
-                choice = self.json2parameter(subspec)
+                choice = self._json2parameter(subspec)
                if isinstance(choice, list):
                    chosen_params.extend(choice)
                else:
@@ -97,27 +108,39 @@ class GridSearchTuner(Tuner):
        return chosen_params

    def _parse_quniform(self, param_value):
-        '''parse type of quniform parameter and return a list'''
+        """
+        Parse type of quniform parameter and return a list
+        """
        low, high, q = param_value[0], param_value[1], param_value[2]
        return np.clip(np.arange(np.round(low/q), np.round(high/q)+1) * q, low, high)

    def _parse_randint(self, param_value):
-        '''parse type of randint parameter and return a list'''
+        """
+        Parse type of randint parameter and return a list
+        """
        return np.arange(param_value[0], param_value[1]).tolist()

-    def expand_parameters(self, para):
-        '''
+    def _expand_parameters(self, para):
+        """
        Enumerate all possible combinations of all parameters
-        para: {key1: [v11, v12, ...], key2: [v21, v22, ...], ...}
-        return: {{key1: v11, key2: v21, ...}, {key1: v11, key2: v22, ...}, ...}
-        '''
+
+        Parameters
+        ----------
+        para : dict
+            {key1: [v11, v12, ...], key2: [v21, v22, ...], ...}
+
+        Returns
+        -------
+        dict
+            {{key1: v11, key2: v21, ...}, {key1: v11, key2: v22, ...}, ...}
+        """
        if len(para) == 1:
            for key, values in para.items():
                return list(map(lambda v: {key: v}, values))

        key = list(para)[0]
        values = para.pop(key)
-        rest_para = self.expand_parameters(para)
+        rest_para = self._expand_parameters(para)
        ret_para = list()
        for val in values:
            for config in rest_para:
@@ -126,12 +149,37 @@ class GridSearchTuner(Tuner):
        return ret_para

    def update_search_space(self, search_space):
-        '''
-        Check if the search space is valid and expand it: support only 'choice', 'quniform', randint'
-        '''
-        self.expanded_search_space = self.json2parameter(search_space)
+        """
+        Check if the search space is valid and expand it: support only ``choice``, ``quniform``, ``randint``.
+
+        Parameters
+        ----------
+        search_space : dict
+            The format could be referred to search space spec (https://nni.readthedocs.io/en/latest/Tutorial/SearchSpaceSpec.html).
+        """
+        self.expanded_search_space = self._json2parameter(search_space)

    def generate_parameters(self, parameter_id, **kwargs):
+        """
+        Generate parameters for one trial.
+
+        Parameters
+        ----------
+        parameter_id : int
+            The id for the generated hyperparameter
+        **kwargs
+            Not used
+
+        Returns
+        -------
+        dict
+            One configuration from the expanded search space.
+
+        Raises
+        ------
+        NoMoreTrialError
+            If all the configurations has been sent, raise :class:`~nni.NoMoreTrialError`.
+        """
        self.count += 1
        while self.count <= len(self.expanded_search_space) - 1:
            _params_tuple = convert_dict2tuple(self.expanded_search_space[self.count])
@@ -142,15 +190,20 @@ class GridSearchTuner(Tuner):
        raise nni.NoMoreTrialError('no more parameters now.')

    def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
+        """
+        Receive a trial's final performance result reported through :func:`~nni.report_final_result` by the trial.
+        GridSearchTuner does not need trial's results.
+        """
        pass

    def import_data(self, data):
-        """Import additional data for tuning
+        """
+        Import additional data for tuning

        Parameters
        ----------
-        data:
-            a list of dictionarys, each of which has at least two keys, 'parameter' and 'value'
+        list
+            A list of dictionarys, each of which has at least two keys, ``parameter`` and ``value``
        """
        _completed_num = 0
        for trial_info in data:

--- a/src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py
+++ b/src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py
@@ -422,7 +422,8 @@ class HyperoptTuner(Tuner):
                    misc_by_id[tid]['vals'][key] = [val]

    def get_suggestion(self, random_search=False):
-        """get suggestion from hyperopt
+        """
+        get suggestion from hyperopt

        Parameters
        ----------
@@ -473,7 +474,8 @@ class HyperoptTuner(Tuner):
        return total_params

    def import_data(self, data):
-        """Import additional data for tuning
+        """
+        Import additional data for tuning

        Parameters
        ----------

--- a/src/sdk/pynni/nni/medianstop_assessor/medianstop_assessor.py
+++ b/src/sdk/pynni/nni/medianstop_assessor/medianstop_assessor.py
@@ -27,21 +27,21 @@ class MedianstopAssessor(Assessor):

    Parameters
    ----------
-    optimize_mode: str
+    optimize_mode : str
        optimize mode, 'maximize' or 'minimize'
-    start_step: int
+    start_step : int
        only after receiving start_step number of reported intermediate results
    """
    def __init__(self, optimize_mode='maximize', start_step=0):
-        self.start_step = start_step
-        self.running_history = dict()
-        self.completed_avg_history = dict()
+        self._start_step = start_step
+        self._running_history = dict()
+        self._completed_avg_history = dict()
        if optimize_mode == 'maximize':
-            self.high_better = True
+            self._high_better = True
        elif optimize_mode == 'minimize':
-            self.high_better = False
+            self._high_better = False
        else:
-            self.high_better = True
+            self._high_better = True
            logger.warning('unrecognized optimize_mode %s', optimize_mode)

    def _update_data(self, trial_job_id, trial_history):
@@ -49,35 +49,35 @@ class MedianstopAssessor(Assessor):

        Parameters
        ----------
-        trial_job_id: int
+        trial_job_id : int
            trial job id
-        trial_history: list
+        trial_history : list
            The history performance matrix of each trial
        """
-        if trial_job_id not in self.running_history:
-            self.running_history[trial_job_id] = []
-        self.running_history[trial_job_id].extend(trial_history[len(self.running_history[trial_job_id]):])
+        if trial_job_id not in self._running_history:
+            self._running_history[trial_job_id] = []
+        self._running_history[trial_job_id].extend(trial_history[len(self._running_history[trial_job_id]):])

    def trial_end(self, trial_job_id, success):
        """trial_end

        Parameters
        ----------
-        trial_job_id: int
+        trial_job_id : int
            trial job id
-        success: bool
+        success : bool
            True if succssfully finish the experiment, False otherwise
        """
-        if trial_job_id in self.running_history:
+        if trial_job_id in self._running_history:
            if success:
                cnt = 0
                history_sum = 0
-                self.completed_avg_history[trial_job_id] = []
-                for each in self.running_history[trial_job_id]:
+                self._completed_avg_history[trial_job_id] = []
+                for each in self._running_history[trial_job_id]:
                    cnt += 1
                    history_sum += each
-                    self.completed_avg_history[trial_job_id].append(history_sum / cnt)
-            self.running_history.pop(trial_job_id)
+                    self._completed_avg_history[trial_job_id].append(history_sum / cnt)
+            self._running_history.pop(trial_job_id)
        else:
            logger.warning('trial_end: trial_job_id does not exist in running_history')

@@ -86,9 +86,9 @@ class MedianstopAssessor(Assessor):

        Parameters
        ----------
-        trial_job_id: int
+        trial_job_id : int
            trial job id
-        trial_history: list
+        trial_history : list
            The history performance matrix of each trial

        Returns
@@ -102,7 +102,7 @@ class MedianstopAssessor(Assessor):
            unrecognize exception in medianstop_assessor
        """
        curr_step = len(trial_history)
-        if curr_step < self.start_step:
+        if curr_step < self._start_step:
            return AssessResult.Good

        try:
@@ -115,18 +115,18 @@ class MedianstopAssessor(Assessor):
            logger.exception(error)

        self._update_data(trial_job_id, num_trial_history)
-        if self.high_better:
+        if self._high_better:
            best_history = max(trial_history)
        else:
            best_history = min(trial_history)

        avg_array = []
-        for id_ in self.completed_avg_history:
-            if len(self.completed_avg_history[id_]) >= curr_step:
-                avg_array.append(self.completed_avg_history[id_][curr_step - 1])
+        for id_ in self._completed_avg_history:
+            if len(self._completed_avg_history[id_]) >= curr_step:
+                avg_array.append(self._completed_avg_history[id_][curr_step - 1])
        if avg_array:
            avg_array.sort()
-            if self.high_better:
+            if self._high_better:
                median = avg_array[(len(avg_array)-1) // 2]
                return AssessResult.Bad if best_history < median else AssessResult.Good
            else:

--- a/src/sdk/pynni/nni/metis_tuner/Regression_GMM/CreateModel.py
+++ b/src/sdk/pynni/nni/metis_tuner/Regression_GMM/CreateModel.py
@@ -16,7 +16,8 @@
 # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.

 import os
 import sys
@@ -31,7 +32,8 @@ def create_model(samples_x, samples_y_aggregation, percentage_goodbatch=0.34):
    '''
    Create the Gaussian Mixture Model
    '''
-    samples = [samples_x[i] + [samples_y_aggregation[i]] for i in range(0, len(samples_x))]
+    samples = [samples_x[i] + [samples_y_aggregation[i]]
+               for i in range(0, len(samples_x))]

    # Sorts so that we can get the top samples
    samples = sorted(samples, key=itemgetter(-1))
@@ -39,13 +41,16 @@ def create_model(samples_x, samples_y_aggregation, percentage_goodbatch=0.34):
    samples_goodbatch = samples[0:samples_goodbatch_size]
    samples_badbatch = samples[samples_goodbatch_size:]

-    samples_x_goodbatch = [sample_goodbatch[0:-1] for sample_goodbatch in samples_goodbatch]
+    samples_x_goodbatch = [sample_goodbatch[0:-1]
+                           for sample_goodbatch in samples_goodbatch]
    #samples_y_goodbatch = [sample_goodbatch[-1] for sample_goodbatch in samples_goodbatch]
-    samples_x_badbatch = [sample_badbatch[0:-1] for sample_badbatch in samples_badbatch]
+    samples_x_badbatch = [sample_badbatch[0:-1]
+                          for sample_badbatch in samples_badbatch]

    # === Trains GMM clustering models === #
    #sys.stderr.write("[%s] Train GMM's GMM model\n" % (os.path.basename(__file__)))
-    bgmm_goodbatch = mm.BayesianGaussianMixture(n_components=max(1, samples_goodbatch_size - 1))
+    bgmm_goodbatch = mm.BayesianGaussianMixture(
+        n_components=max(1, samples_goodbatch_size - 1))
    bad_n_components = max(1, len(samples_x) - samples_goodbatch_size - 1)
    bgmm_badbatch = mm.BayesianGaussianMixture(n_components=bad_n_components)
    bgmm_goodbatch.fit(samples_x_goodbatch)
@@ -55,4 +60,3 @@ def create_model(samples_x, samples_y_aggregation, percentage_goodbatch=0.34):
    model['clusteringmodel_good'] = bgmm_goodbatch
    model['clusteringmodel_bad'] = bgmm_badbatch
    return model
-    
\ No newline at end of file
--- a/src/sdk/pynni/nni/metis_tuner/Regression_GMM/Selection.py
+++ b/src/sdk/pynni/nni/metis_tuner/Regression_GMM/Selection.py
@@ -16,7 +16,8 @@
 # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.

 import os
 import random
@@ -33,14 +34,17 @@ CONSTRAINT_UPPERBOUND = None
 CONSTRAINT_PARAMS_IDX = []


-def _ratio_scores(parameters_value, clusteringmodel_gmm_good, clusteringmodel_gmm_bad):
+def _ratio_scores(parameters_value, clusteringmodel_gmm_good,
+                  clusteringmodel_gmm_bad):
    '''
    The ratio is smaller the better
    '''
-    ratio = clusteringmodel_gmm_good.score([parameters_value]) / clusteringmodel_gmm_bad.score([parameters_value])
+    ratio = clusteringmodel_gmm_good.score(
+        [parameters_value]) / clusteringmodel_gmm_bad.score([parameters_value])
    sigma = 0
    return ratio, sigma

+
 def selection_r(x_bounds,
                x_types,
                clusteringmodel_gmm_good,
@@ -60,6 +64,7 @@ def selection_r(x_bounds,

    return outputs

+
 def selection(x_bounds,
              x_types,
              clusteringmodel_gmm_good,
@@ -69,13 +74,14 @@ def selection(x_bounds,
    '''
    Select the lowest mu value
    '''
-    results = lib_acquisition_function.next_hyperparameter_lowest_mu(\
-                    _ratio_scores, [clusteringmodel_gmm_good, clusteringmodel_gmm_bad],\
-                    x_bounds, x_types, minimize_starting_points, \
+    results = lib_acquisition_function.next_hyperparameter_lowest_mu(
+        _ratio_scores, [clusteringmodel_gmm_good, clusteringmodel_gmm_bad],
+        x_bounds, x_types, minimize_starting_points,
        minimize_constraints_fun=minimize_constraints_fun)

    return results

+
 def _rand_with_constraints(x_bounds, x_types):
    '''
    Random generate the variable with constraints
@@ -96,6 +102,7 @@ def _rand_with_constraints(x_bounds, x_types):
                outputs[i] = random.randint(x_bounds[i][0], x_bounds[i][1])
    return outputs

+
 def _minimize_constraints_fun_summation(x):
    '''
    Minimize constraints fun summation

--- a/src/sdk/pynni/nni/metis_tuner/Regression_GP/OutlierDetection.py
+++ b/src/sdk/pynni/nni/metis_tuner/Regression_GP/OutlierDetection.py
@@ -17,7 +17,9 @@
 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
+"""
+OutlierDectection.py
+"""

 import os
 import sys
@@ -30,19 +32,21 @@ sys.path.insert(1, os.path.join(sys.path[0], '..'))


 def _outlierDetection_threaded(inputs):
-    '''
+    """
    Detect the outlier
-    '''
+    """
    [samples_idx, samples_x, samples_y_aggregation] = inputs
-    sys.stderr.write("[%s] DEBUG: Evaluating %dth of %d samples\n"\
+    sys.stderr.write("[%s] DEBUG: Evaluating %dth of %d samples\n"
                     % (os.path.basename(__file__), samples_idx + 1, len(samples_x)))
    outlier = None

-    # Create a diagnostic regression model which removes the sample that we want to evaluate
-    diagnostic_regressor_gp = gp_create_model.create_model(\
-                                    samples_x[0:samples_idx] + samples_x[samples_idx + 1:],\
+    # Create a diagnostic regression model which removes the sample that we
+    # want to evaluate
+    diagnostic_regressor_gp = gp_create_model.create_model(
+        samples_x[0:samples_idx] + samples_x[samples_idx + 1:],
        samples_y_aggregation[0:samples_idx] + samples_y_aggregation[samples_idx + 1:])
-    mu, sigma = gp_prediction.predict(samples_x[samples_idx], diagnostic_regressor_gp['model'])
+    mu, sigma = gp_prediction.predict(
+        samples_x[samples_idx], diagnostic_regressor_gp['model'])

    # 2.33 is the z-score for 98% confidence level
    if abs(samples_y_aggregation[samples_idx] - mu) > (2.33 * sigma):
@@ -52,16 +56,18 @@ def _outlierDetection_threaded(inputs):
                   "difference": abs(samples_y_aggregation[samples_idx] - mu) - (2.33 * sigma)}
    return outlier

+
 def outlierDetection_threaded(samples_x, samples_y_aggregation):
-    '''
+    """
    Use Multi-thread to detect the outlier
-    '''
+    """
    outliers = []

-    threads_inputs = [[samples_idx, samples_x, samples_y_aggregation]\
+    threads_inputs = [[samples_idx, samples_x, samples_y_aggregation]
                      for samples_idx in range(0, len(samples_x))]
    threads_pool = ThreadPool(min(4, len(threads_inputs)))
-    threads_results = threads_pool.map(_outlierDetection_threaded, threads_inputs)
+    threads_results = threads_pool.map(
+        _outlierDetection_threaded, threads_inputs)
    threads_pool.close()
    threads_pool.join()

@@ -69,15 +75,13 @@ def outlierDetection_threaded(samples_x, samples_y_aggregation):
        if threads_result is not None:
            outliers.append(threads_result)
        else:
-            print("error here.")
+            print("Error: threads_result is None.")

    outliers = outliers if outliers else None
    return outliers

+
 def outlierDetection(samples_x, samples_y_aggregation):
-    '''
-    TODO
-    '''
    outliers = []
    for samples_idx, _ in enumerate(samples_x):
        #sys.stderr.write("[%s] DEBUG: Evaluating %d of %d samples\n"
@@ -92,7 +96,8 @@ def outlierDetection(samples_x, samples_y_aggregation):
            outliers.append({"samples_idx": samples_idx,
                             "expected_mu": mu,
                             "expected_sigma": sigma,
-                             "difference": abs(samples_y_aggregation[samples_idx] - mu) - (2.33 * sigma)})
+                             "difference": \
+                                abs(samples_y_aggregation[samples_idx] - mu) - (2.33 * sigma)})

    outliers = outliers if outliers else None
    return outliers
--- a/src/sdk/pynni/nni/metis_tuner/lib_acquisition_function.py
+++ b/src/sdk/pynni/nni/metis_tuner/lib_acquisition_function.py
@@ -16,7 +16,11 @@
 # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+"""
+lib_acquisition_function.py
+"""

 import sys
 import numpy
@@ -33,9 +37,9 @@ def next_hyperparameter_expected_improvement(fun_prediction,
                                             samples_y_aggregation,
                                             minimize_starting_points,
                                             minimize_constraints_fun=None):
-    '''
+    """
    "Expected Improvement" acquisition function
-    '''
+    """
    best_x = None
    best_acquisition_value = None
    x_bounds_minmax = [[i[0], i[-1]] for i in x_bounds]
@@ -70,6 +74,7 @@ def next_hyperparameter_expected_improvement(fun_prediction,

    return outputs

+
 def _expected_improvement(x, fun_prediction, fun_prediction_args,
                          x_bounds, x_types, samples_y_aggregation,
                          minimize_constraints_fun):
@@ -77,7 +82,8 @@ def _expected_improvement(x, fun_prediction, fun_prediction_args,
    x = lib_data.match_val_type(x, x_bounds, x_types)

    expected_improvement = sys.maxsize
-    if (minimize_constraints_fun is None) or (minimize_constraints_fun(x) is True):
+    if (minimize_constraints_fun is None) or (
+            minimize_constraints_fun(x) is True):
        mu, sigma = fun_prediction(x, *fun_prediction_args)

        loss_optimum = min(samples_y_aggregation)
@@ -101,9 +107,9 @@ def next_hyperparameter_lowest_confidence(fun_prediction,
                                          x_bounds, x_types,
                                          minimize_starting_points,
                                          minimize_constraints_fun=None):
-    '''
+    """
    "Lowest Confidence" acquisition function
-    '''
+    """
    best_x = None
    best_acquisition_value = None
    x_bounds_minmax = [[i[0], i[-1]] for i in x_bounds]
@@ -120,10 +126,12 @@ def next_hyperparameter_lowest_confidence(fun_prediction,
                             x_types,
                             minimize_constraints_fun))

-        if (best_acquisition_value) is None or (res.fun < best_acquisition_value):
+        if (best_acquisition_value) is None or (
+                res.fun < best_acquisition_value):
            res.x = numpy.ndarray.tolist(res.x)
            res.x = lib_data.match_val_type(res.x, x_bounds, x_types)
-            if (minimize_constraints_fun is None) or (minimize_constraints_fun(res.x) is True):
+            if (minimize_constraints_fun is None) or (
+                    minimize_constraints_fun(res.x) is True):
                best_acquisition_value = res.fun
                best_x = res.x

@@ -134,13 +142,15 @@ def next_hyperparameter_lowest_confidence(fun_prediction,
                   'expected_sigma': sigma, 'acquisition_func': "lc"}
    return outputs

+
 def _lowest_confidence(x, fun_prediction, fun_prediction_args,
                       x_bounds, x_types, minimize_constraints_fun):
    # This is only for step-wise optimization
    x = lib_data.match_val_type(x, x_bounds, x_types)

    ci = sys.maxsize
-    if (minimize_constraints_fun is None) or (minimize_constraints_fun(x) is True):
+    if (minimize_constraints_fun is None) or (
+            minimize_constraints_fun(x) is True):
        mu, sigma = fun_prediction(x, *fun_prediction_args)
        ci = (sigma * 1.96 * 2) / mu
        # We want ci to be as large as possible
@@ -156,9 +166,9 @@ def next_hyperparameter_lowest_mu(fun_prediction,
                                  x_bounds, x_types,
                                  minimize_starting_points,
                                  minimize_constraints_fun=None):
-    '''
+    """
    "Lowest Mu" acquisition function
-    '''
+    """
    best_x = None
    best_acquisition_value = None
    x_bounds_minmax = [[i[0], i[-1]] for i in x_bounds]
@@ -169,13 +179,15 @@ def next_hyperparameter_lowest_mu(fun_prediction,
                       x0=starting_point.reshape(1, -1),
                       bounds=x_bounds_minmax,
                       method="L-BFGS-B",
-                       args=(fun_prediction, fun_prediction_args, \
+                       args=(fun_prediction, fun_prediction_args,
                             x_bounds, x_types, minimize_constraints_fun))

-        if (best_acquisition_value is None) or (res.fun < best_acquisition_value):
+        if (best_acquisition_value is None) or (
+                res.fun < best_acquisition_value):
            res.x = numpy.ndarray.tolist(res.x)
            res.x = lib_data.match_val_type(res.x, x_bounds, x_types)
-            if (minimize_constraints_fun is None) or (minimize_constraints_fun(res.x) is True):
+            if (minimize_constraints_fun is None) or (
+                    minimize_constraints_fun(res.x) is True):
                best_acquisition_value = res.fun
                best_x = res.x

@@ -189,14 +201,14 @@ def next_hyperparameter_lowest_mu(fun_prediction,

 def _lowest_mu(x, fun_prediction, fun_prediction_args,
               x_bounds, x_types, minimize_constraints_fun):
-    '''
+    """
    Calculate the lowest mu
-    '''
+    """
    # This is only for step-wise optimization
    x = lib_data.match_val_type(x, x_bounds, x_types)

    mu = sys.maxsize
-    if (minimize_constraints_fun is None) or (minimize_constraints_fun(x) is True):
+    if (minimize_constraints_fun is None) or (
+            minimize_constraints_fun(x) is True):
        mu, _ = fun_prediction(x, *fun_prediction_args)
    return mu
-    
\ No newline at end of file
--- a/src/sdk/pynni/nni/metis_tuner/lib_constraint_summation.py
+++ b/src/sdk/pynni/nni/metis_tuner/lib_constraint_summation.py
@@ -16,7 +16,11 @@
 # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+"""
+lib_constraint_summation.py
+"""

 import math
 import random
@@ -39,6 +43,7 @@ def check_feasibility(x_bounds, lowerbound, upperbound):
    return (x_bounds_lowerbound <= lowerbound <= x_bounds_upperbound) or \
           (x_bounds_lowerbound <= upperbound <= x_bounds_upperbound)

+
 def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100):
    '''
    Key idea is that we try to move towards upperbound, by randomly choose one
@@ -55,7 +60,8 @@ def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100):
            if x_types[i] == "discrete_int":
                x_idx_sorted.append([i, len(x_bounds[i])])
            elif (x_types[i] == "range_int") or (x_types[i] == "range_continuous"):
-                x_idx_sorted.append([i, math.floor(x_bounds[i][1] - x_bounds[i][0])])
+                x_idx_sorted.append(
+                    [i, math.floor(x_bounds[i][1] - x_bounds[i][0])])
        x_idx_sorted = sorted(x_idx_sorted, key=itemgetter(1))

        for _ in range(max_retries):
@@ -77,12 +83,13 @@ def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100):
                                    temp.append(j)
                            # Randomly pick a number from the integer array
                            if temp:
-                                outputs[x_idx] = temp[random.randint(0, len(temp) - 1)]
+                                outputs[x_idx] = temp[random.randint(
+                                    0, len(temp) - 1)]

                        elif (x_types[x_idx] == "range_int") or \
                                (x_types[x_idx] == "range_continuous"):
-                            outputs[x_idx] = random.randint(x_bounds[x_idx][0],
-                                                            min(x_bounds[x_idx][-1], budget_max))
+                            outputs[x_idx] = random.randint(
+                                x_bounds[x_idx][0], min(x_bounds[x_idx][-1], budget_max))

                else:
                    # The last x that we need to assign a random number
@@ -91,26 +98,28 @@ def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100):

                    # This check:
                    # is our smallest possible value going to overflow the available budget space,
-                    # and is our largest possible value going to underflow the lower bound
+                    # and is our largest possible value going to underflow the
+                    # lower bound
                    if (x_bounds[x_idx][0] <= budget_max) and \
                            (x_bounds[x_idx][-1] >= randint_lowerbound):
                        if x_types[x_idx] == "discrete_int":
                            temp = []
                            for j in x_bounds[x_idx]:
-                                # if (j <= budget_max) and (j >= randint_lowerbound):
+                                # if (j <= budget_max) and (j >=
+                                # randint_lowerbound):
                                if randint_lowerbound <= j <= budget_max:
                                    temp.append(j)
                            if temp:
-                                outputs[x_idx] = temp[random.randint(0, len(temp) - 1)]
+                                outputs[x_idx] = temp[random.randint(
+                                    0, len(temp) - 1)]
                        elif (x_types[x_idx] == "range_int") or \
                                (x_types[x_idx] == "range_continuous"):
-                            outputs[x_idx] = random.randint(randint_lowerbound,
-                                                            min(x_bounds[x_idx][1], budget_max))
+                            outputs[x_idx] = random.randint(
+                                randint_lowerbound, min(
+                                    x_bounds[x_idx][1], budget_max))
                if outputs[x_idx] is None:
                    break
-                else:
                budget_allocated += outputs[x_idx]
            if None not in outputs:
                break
    return outputs
-    
\ No newline at end of file
--- a/src/sdk/pynni/nni/metis_tuner/lib_data.py
+++ b/src/sdk/pynni/nni/metis_tuner/lib_data.py
@@ -16,7 +16,8 @@
 # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.

 import math
 import random
@@ -56,7 +57,7 @@ def rand(x_bounds, x_types):
            temp = x_bounds[i][random.randint(0, len(x_bounds[i]) - 1)]
            outputs.append(temp)
        elif x_types[i] == "range_int":
-            temp = random.randint(x_bounds[i][0], x_bounds[i][1] -1)
+            temp = random.randint(x_bounds[i][0], x_bounds[i][1] - 1)
            outputs.append(temp)
        elif x_types[i] == "range_continuous":
            temp = random.uniform(x_bounds[i][0], x_bounds[i][1])

--- a/src/sdk/pynni/nni/metis_tuner/metis_tuner.py
+++ b/src/sdk/pynni/nni/metis_tuner/metis_tuner.py
@@ -16,7 +16,11 @@
 # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+"""
+metis_tuner.py
+"""

 import copy
 import logging
@@ -51,10 +55,45 @@ class MetisTuner(Tuner):

    More algorithm information you could reference here:
    https://www.microsoft.com/en-us/research/publication/metis-robustly-tuning-tail-latencies-cloud-systems/
+
+    Attributes
+    ----------
+        optimize_mode : str
+            optimize_mode is a string that including two mode "maximize" and "minimize"
+
+        no_resampling : bool
+            True or False.
+            Should Metis consider re-sampling as part of the search strategy?
+            If you are confident that the training dataset is noise-free,
+            then you do not need re-sampling.
+
+        no_candidates : bool
+            True or False.
+            Should Metis suggest parameters for the next benchmark?
+            If you do not plan to do more benchmarks,
+            Metis can skip this step.
+
+        selection_num_starting_points : int
+            How many times Metis should try to find the global optimal in the search space?
+            The higher the number, the longer it takes to output the solution.
+
+        cold_start_num : int
+            Metis need some trial result to get cold start.
+            when the number of trial result is less than
+            cold_start_num, Metis will randomly sample hyper-parameter for trial.
+
+        exploration_probability: float
+            The probability of Metis to select parameter from exploration instead of exploitation.
    """

-    def __init__(self, optimize_mode="maximize", no_resampling=True, no_candidates=False,
-                 selection_num_starting_points=600, cold_start_num=10, exploration_probability=0.9):
+    def __init__(
+            self,
+            optimize_mode="maximize",
+            no_resampling=True,
+            no_candidates=False,
+            selection_num_starting_points=600,
+            cold_start_num=10,
+            exploration_probability=0.9):
        """
        Parameters
        ----------
@@ -62,23 +101,34 @@ class MetisTuner(Tuner):
            optimize_mode is a string that including two mode "maximize" and "minimize"

        no_resampling : bool
-            True or False. Should Metis consider re-sampling as part of the search strategy?
-        If you are confident that the training dataset is noise-free, then you do not need re-sampling.
-
-        no_candidates: bool
-            True or False. Should Metis suggest parameters for the next benchmark?
-        If you do not plan to do more benchmarks, Metis can skip this step.
-
-        selection_num_starting_points: int
-            how many times Metis should try to find the global optimal in the search space?
+            True or False.
+            Should Metis consider re-sampling as part of the search strategy?
+            If you are confident that the training dataset is noise-free,
+            then you do not need re-sampling.
+
+        no_candidates : bool
+            True or False.
+            Should Metis suggest parameters for the next benchmark?
+            If you do not plan to do more benchmarks,
+            Metis can skip this step.
+
+        selection_num_starting_points : int
+            How many times Metis should try to find the global optimal in the search space?
            The higher the number, the longer it takes to output the solution.

-        cold_start_num: int
-            Metis need some trial result to get cold start. when the number of trial result is less than
+        cold_start_num : int
+            Metis need some trial result to get cold start.
+            when the number of trial result is less than
            cold_start_num, Metis will randomly sample hyper-parameter for trial.

-        exploration_probability: float
+        exploration_probability : float
            The probability of Metis to select parameter from exploration instead of exploitation.
+
+        x_bounds : list
+            The constration of parameters.
+
+        x_types : list
+            The type of parameters.
        """

        self.samples_x = []
@@ -101,7 +151,8 @@ class MetisTuner(Tuner):


    def update_search_space(self, search_space):
-        """Update the self.x_bounds and self.x_types by the search_space.json
+        """
+        Update the self.x_bounds and self.x_types by the search_space.json

        Parameters
        ----------
@@ -120,12 +171,20 @@ class MetisTuner(Tuner):
                key_range = search_space[key]['_value']
                idx = self.key_order.index(key)
                if key_type == 'quniform':
-                    if key_range[2] == 1 and key_range[0].is_integer() and key_range[1].is_integer():
-                        self.x_bounds[idx] = [key_range[0], key_range[1]+1]
+                    if key_range[2] == 1 and key_range[0].is_integer(
+                    ) and key_range[1].is_integer():
+                        self.x_bounds[idx] = [key_range[0], key_range[1] + 1]
                        self.x_types[idx] = 'range_int'
                    else:
                        low, high, q = key_range
-                        bounds = np.clip(np.arange(np.round(low/q), np.round(high/q)+1) * q, low, high)
+                        bounds = np.clip(
+                            np.arange(
+                                np.round(
+                                    low / q),
+                                np.round(
+                                    high / q) + 1) * q,
+                            low,
+                            high)
                        self.x_bounds[idx] = bounds
                        self.x_types[idx] = 'discrete_int'
                elif key_type == 'randint':
@@ -139,22 +198,28 @@ class MetisTuner(Tuner):

                    for key_value in key_range:
                        if not isinstance(key_value, (int, float)):
-                            raise RuntimeError("Metis Tuner only support numerical choice.")
+                            raise RuntimeError(
+                                "Metis Tuner only support numerical choice.")

                    self.x_types[idx] = 'discrete_int'
                else:
-                    logger.info("Metis Tuner doesn't support this kind of variable: %s", key_type)
-                    raise RuntimeError("Metis Tuner doesn't support this kind of variable: " + str(key_type))
+                    logger.info(
+                        "Metis Tuner doesn't support this kind of variable: %s",
+                        str(key_type))
+                    raise RuntimeError(
+                        "Metis Tuner doesn't support this kind of variable: %s" %
+                        str(key_type))
        else:
            logger.info("The format of search space is not a dict.")
            raise RuntimeError("The format of search space is not a dict.")

-        self.minimize_starting_points = _rand_init(self.x_bounds, self.x_types, \
-                                                        self.selection_num_starting_points)
+        self.minimize_starting_points = _rand_init(
+            self.x_bounds, self.x_types, self.selection_num_starting_points)


    def _pack_output(self, init_parameter):
-        """Pack the output
+        """
+        Pack the output

        Parameters
        ----------
@@ -167,14 +232,18 @@ class MetisTuner(Tuner):
        output = {}
        for i, param in enumerate(init_parameter):
            output[self.key_order[i]] = param
+
        return output


    def generate_parameters(self, parameter_id, **kwargs):
-        """Generate next parameter for trial
+        """
+        Generate next parameter for trial
+
        If the number of trial result is lower than cold start number,
        metis will first random generate some parameters.
-        Otherwise, metis will choose the parameters by the Gussian Process Model and the Gussian Mixture Model.
+        Otherwise, metis will choose the parameters by
+        the Gussian Process Model and the Gussian Mixture Model.

        Parameters
        ----------
@@ -188,26 +257,34 @@ class MetisTuner(Tuner):
            init_parameter = _rand_init(self.x_bounds, self.x_types, 1)[0]
            results = self._pack_output(init_parameter)
        else:
-            self.minimize_starting_points = _rand_init(self.x_bounds, self.x_types, \
-                                                       self.selection_num_starting_points)
-            results = self._selection(self.samples_x, self.samples_y_aggregation, self.samples_y,
-                                      self.x_bounds, self.x_types,
-                                      threshold_samplessize_resampling=(None if self.no_resampling is True else 50),
+            self.minimize_starting_points = _rand_init(
+                self.x_bounds, self.x_types, self.selection_num_starting_points)
+            results = self._selection(
+                self.samples_x,
+                self.samples_y_aggregation,
+                self.samples_y,
+                self.x_bounds,
+                self.x_types,
+                threshold_samplessize_resampling=(
+                    None if self.no_resampling is True else 50),
                no_candidates=self.no_candidates,
                minimize_starting_points=self.minimize_starting_points,
                minimize_constraints_fun=self.minimize_constraints_fun)

-        logger.info("Generate paramageters:\n%s", results)
+        logger.info("Generate paramageters: \n%s", str(results))
        return results


    def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
-        """Tuner receive result from trial.
+        """
+        Tuner receive result from trial.

        Parameters
        ----------
        parameter_id : int
+            The id of parameters, generated by nni manager.
        parameters : dict
+            A group of parameters that trial has tried.
        value : dict/float
            if value is dict, it should have "default" key.
        """
@@ -216,8 +293,8 @@ class MetisTuner(Tuner):
            value = -value

        logger.info("Received trial result.")
-        logger.info("value is :%s", value)
-        logger.info("parameter is : %s", parameters)
+        logger.info("value is : %s", str(value))
+        logger.info("parameter is : %s", str(parameters))

        # parse parameter to sample_x
        sample_x = [0 for i in range(len(self.key_order))]
@@ -244,11 +321,19 @@ class MetisTuner(Tuner):
            self.samples_y_aggregation.append([value])


-    def _selection(self, samples_x, samples_y_aggregation, samples_y,
-                   x_bounds, x_types, max_resampling_per_x=3,
+    def _selection(
+            self,
+            samples_x,
+            samples_y_aggregation,
+            samples_y,
+            x_bounds,
+            x_types,
+            max_resampling_per_x=3,
            threshold_samplessize_exploitation=12,
-                   threshold_samplessize_resampling=50, no_candidates=False,
-                   minimize_starting_points=None, minimize_constraints_fun=None):
+            threshold_samplessize_resampling=50,
+            no_candidates=False,
+            minimize_starting_points=None,
+            minimize_constraints_fun=None):

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
@@ -259,7 +344,8 @@ class MetisTuner(Tuner):
        samples_size_unique = len(samples_y)

        # ===== STEP 1: Compute the current optimum =====
-        gp_model = gp_create_model.create_model(samples_x, samples_y_aggregation)
+        gp_model = gp_create_model.create_model(
+            samples_x, samples_y_aggregation)
        lm_current = gp_selection.selection(
            "lm",
            samples_y_aggregation,
@@ -278,7 +364,7 @@ class MetisTuner(Tuner):
        })

        if no_candidates is False:
-            # ===== STEP 2: Get recommended configurations for exploration =====
+            # ===== STEP 2: Get recommended configurations for exploration ====
            results_exploration = gp_selection.selection(
                "lc",
                samples_y_aggregation,
@@ -303,21 +389,27 @@ class MetisTuner(Tuner):
            else:
                logger.info("DEBUG: No suitable exploration candidates were")

-            # ===== STEP 3: Get recommended configurations for exploitation =====
+            # ===== STEP 3: Get recommended configurations for exploitation ===
            if samples_size_all >= threshold_samplessize_exploitation:
                logger.info("Getting candidates for exploitation...\n")
                try:
-                    gmm = gmm_create_model.create_model(samples_x, samples_y_aggregation)
+                    gmm = gmm_create_model.create_model(
+                        samples_x, samples_y_aggregation)

                    if ("discrete_int" in x_types) or ("range_int" in x_types):
-                        results_exploitation = gmm_selection.selection(x_bounds, x_types,
+                        results_exploitation = gmm_selection.selection(
+                            x_bounds,
+                            x_types,
                            gmm['clusteringmodel_good'],
                            gmm['clusteringmodel_bad'],
                            minimize_starting_points,
                            minimize_constraints_fun=minimize_constraints_fun)
                    else:
-                        # If all parameters are of "range_continuous", let's use GMM to generate random starting points
-                        results_exploitation = gmm_selection.selection_r(x_bounds, x_types,
+                        # If all parameters are of "range_continuous",
+                        # let's use GMM to generate random starting points
+                        results_exploitation = gmm_selection.selection_r(
+                            x_bounds,
+                            x_types,
                            gmm['clusteringmodel_good'],
                            gmm['clusteringmodel_bad'],
                            num_starting_points=self.selection_num_starting_points,
@@ -335,24 +427,30 @@ class MetisTuner(Tuner):
                            }
                            candidates.append(temp_candidate)

-                            logger.info("DEBUG: 1 exploitation_gmm candidate selected\n")
+                            logger.info(
+                                "DEBUG: 1 exploitation_gmm candidate selected\n")
                            logger.info(temp_candidate)
                    else:
-                        logger.info("DEBUG: No suitable exploitation_gmm candidates were found\n")
+                        logger.info(
+                            "DEBUG: No suitable exploitation_gmm candidates were found\n")

                except ValueError as exception:
                    # The exception: ValueError: Fitting the mixture model failed
                    # because some components have ill-defined empirical covariance
                    # (for instance caused by singleton or collapsed samples).
-                    # Try to decrease the number of components, or increase reg_covar.
-                    logger.info("DEBUG: No suitable exploitation_gmm candidates were found due to exception.")
+                    # Try to decrease the number of components, or increase
+                    # reg_covar.
+                    logger.info(
+                        "DEBUG: No suitable exploitation_gmm \
+                        candidates were found due to exception.")
                    logger.info(exception)

            # ===== STEP 4: Get a list of outliers =====
            if (threshold_samplessize_resampling is not None) and \
                    (samples_size_unique >= threshold_samplessize_resampling):
                logger.info("Getting candidates for re-sampling...\n")
-                results_outliers = gp_outlier_detection.outlierDetection_threaded(samples_x, samples_y_aggregation)
+                results_outliers = gp_outlier_detection.outlierDetection_threaded(
+                    samples_x, samples_y_aggregation)

                if results_outliers is not None:
                    for results_outlier in results_outliers:  # pylint: disable=not-an-iterable
@@ -365,11 +463,13 @@ class MetisTuner(Tuner):
                    logger.info("DEBUG: %d re-sampling candidates selected\n")
                    logger.info(temp_candidate)
                else:
-                    logger.info("DEBUG: No suitable resampling candidates were found\n")
+                    logger.info(
+                        "DEBUG: No suitable resampling candidates were found\n")

            if candidates:
-                # ===== STEP 5: Compute the information gain of each candidate towards the optimum =====
-                logger.info("Evaluating information gain of %d candidates...\n")
+                # ===== STEP 5: Compute the information gain of each candidate
+                logger.info(
+                    "Evaluating information gain of %d candidates...\n")
                next_improvement = 0

                threads_inputs = [[
@@ -377,36 +477,45 @@ class MetisTuner(Tuner):
                    minimize_constraints_fun, minimize_starting_points
                ] for candidate in candidates]
                threads_pool = ThreadPool(4)
-                # Evaluate what would happen if we actually sample each candidate
-                threads_results = threads_pool.map(_calculate_lowest_mu_threaded, threads_inputs)
+                # Evaluate what would happen if we actually sample each
+                # candidate
+                threads_results = threads_pool.map(
+                    _calculate_lowest_mu_threaded, threads_inputs)
                threads_pool.close()
                threads_pool.join()

                for threads_result in threads_results:
                    if threads_result['expected_lowest_mu'] < lm_current['expected_mu']:
                        # Information gain
-                        temp_improvement = threads_result['expected_lowest_mu'] - lm_current['expected_mu']
+                        temp_improvement = threads_result['expected_lowest_mu'] - \
+                            lm_current['expected_mu']

                        if next_improvement > temp_improvement:
                            next_improvement = temp_improvement
                            next_candidate = threads_result['candidate']
            else:
-                # ===== STEP 6: If we have no candidates, randomly pick one =====
+                # ===== STEP 6: If we have no candidates, randomly pick one ===
                logger.info(
                    "DEBUG: No candidates from exploration, exploitation,\
                                 and resampling. We will random a candidate for next_candidate\n"
                )

-                next_candidate = _rand_with_constraints(x_bounds, x_types) \
-                                    if minimize_starting_points is None else minimize_starting_points[0]
-                next_candidate = lib_data.match_val_type(next_candidate, x_bounds, x_types)
-                expected_mu, expected_sigma = gp_prediction.predict(next_candidate, gp_model['model'])
-                next_candidate = {'hyperparameter': next_candidate, 'reason': "random",
-                                  'expected_mu': expected_mu, 'expected_sigma': expected_sigma}
-
-        # ===== STEP 7 =====
-        # If current optimal hyperparameter occurs in the history or exploration probability is less than the threshold,
-        # take next config as exploration step
+                next_candidate = _rand_with_constraints(
+                    x_bounds,
+                    x_types) if minimize_starting_points is None else minimize_starting_points[0]
+                next_candidate = lib_data.match_val_type(
+                    next_candidate, x_bounds, x_types)
+                expected_mu, expected_sigma = gp_prediction.predict(
+                    next_candidate, gp_model['model'])
+                next_candidate = {
+                    'hyperparameter': next_candidate,
+                    'reason': "random",
+                    'expected_mu': expected_mu,
+                    'expected_sigma': expected_sigma}
+
+        # STEP 7: If current optimal hyperparameter occurs in the history
+        # or exploration probability is less than the threshold, take next
+        # config as exploration step
        outputs = self._pack_output(lm_current['hyperparameter'])
        ap = random.uniform(0, 1)
        if outputs in self.total_data or ap <= self.exploration_probability:
@@ -419,11 +528,13 @@ class MetisTuner(Tuner):
        return outputs

    def import_data(self, data):
-        """Import additional data for tuning
+        """
+        Import additional data for tuning
+
        Parameters
        ----------
-        data:
-            a list of dictionarys, each of which has at least two keys, 'parameter' and 'value'
+        data : a list of dict
+               each of which has at least two keys: 'parameter' and 'value'.
        """
        _completed_num = 0
        for trial_info in data:
@@ -437,18 +548,26 @@ class MetisTuner(Tuner):
                logger.info("Useless trial data, value is %s, skip this trial data.", _value)
                continue
            self.supplement_data_num += 1
-            _parameter_id = '_'.join(["ImportData", str(self.supplement_data_num)])
+            _parameter_id = '_'.join(
+                ["ImportData", str(self.supplement_data_num)])
            self.total_data.append(_params)
-            self.receive_trial_result(parameter_id=_parameter_id, parameters=_params, value=_value)
+            self.receive_trial_result(
+                parameter_id=_parameter_id,
+                parameters=_params,
+                value=_value)
        logger.info("Successfully import data to metis tuner.")

+
 def _rand_with_constraints(x_bounds, x_types):
    outputs = None
    x_bounds_withconstraints = [x_bounds[i] for i in CONSTRAINT_PARAMS_IDX]
    x_types_withconstraints = [x_types[i] for i in CONSTRAINT_PARAMS_IDX]

-    x_val_withconstraints = lib_constraint_summation.rand(x_bounds_withconstraints,\
-                                x_types_withconstraints, CONSTRAINT_LOWERBOUND, CONSTRAINT_UPPERBOUND)
+    x_val_withconstraints = lib_constraint_summation.rand(
+        x_bounds_withconstraints,
+        x_types_withconstraints,
+        CONSTRAINT_LOWERBOUND,
+        CONSTRAINT_UPPERBOUND)
    if not x_val_withconstraints:
        outputs = [None] * len(x_bounds)

@@ -462,12 +581,18 @@ def _rand_with_constraints(x_bounds, x_types):


 def _calculate_lowest_mu_threaded(inputs):
-    [candidate, samples_x, samples_y, x_bounds, x_types, minimize_constraints_fun, minimize_starting_points] = inputs
+    [candidate, samples_x, samples_y, x_bounds, x_types,
+     minimize_constraints_fun, minimize_starting_points] = inputs

    outputs = {"candidate": candidate, "expected_lowest_mu": None}

-    for expected_mu in [candidate['expected_mu'] + 1.96 * candidate['expected_sigma'],
-                        candidate['expected_mu'] - 1.96 * candidate['expected_sigma']]:
+    for expected_mu in [
+            candidate['expected_mu'] +
+            1.96 *
+            candidate['expected_sigma'],
+            candidate['expected_mu'] -
+            1.96 *
+            candidate['expected_sigma']]:
        temp_samples_x = copy.deepcopy(samples_x)
        temp_samples_y = copy.deepcopy(samples_y)

@@ -480,8 +605,10 @@ def _calculate_lowest_mu_threaded(inputs):
            temp_samples_y.append([expected_mu])

        # Aggregates multiple observation of the sample sampling points
-        temp_y_aggregation = [statistics.median(temp_sample_y) for temp_sample_y in temp_samples_y]
-        temp_gp = gp_create_model.create_model(temp_samples_x, temp_y_aggregation)
+        temp_y_aggregation = [statistics.median(
+            temp_sample_y) for temp_sample_y in temp_samples_y]
+        temp_gp = gp_create_model.create_model(
+            temp_samples_x, temp_y_aggregation)
        temp_results = gp_selection.selection(
            "lm",
            temp_y_aggregation,
@@ -491,7 +618,8 @@ def _calculate_lowest_mu_threaded(inputs):
            minimize_starting_points,
            minimize_constraints_fun=minimize_constraints_fun)

-        if outputs["expected_lowest_mu"] is None or outputs["expected_lowest_mu"] > temp_results['expected_mu']:
+        if outputs["expected_lowest_mu"] is None \
+            or outputs["expected_lowest_mu"] > temp_results['expected_mu']:
            outputs["expected_lowest_mu"] = temp_results['expected_mu']

    return outputs
@@ -510,18 +638,19 @@ def _rand_init(x_bounds, x_types, selection_num_starting_points):
    '''
    Random sample some init seed within bounds.
    '''
-    return [lib_data.rand(x_bounds, x_types) for i \
+    return [lib_data.rand(x_bounds, x_types) for i
            in range(0, selection_num_starting_points)]


 def get_median(temp_list):
-    """Return median
+    """
+    Return median
    """
    num = len(temp_list)
    temp_list.sort()
    print(temp_list)
    if num % 2 == 0:
-        median = (temp_list[int(num/2)] + temp_list[int(num/2) - 1]) / 2
+        median = (temp_list[int(num / 2)] + temp_list[int(num / 2) - 1]) / 2
    else:
-        median = temp_list[int(num/2)]
+        median = temp_list[int(num / 2)]
    return median
--- a/src/sdk/pynni/nni/networkmorphism_tuner/bayesian.py
+++ b/src/sdk/pynni/nni/networkmorphism_tuner/bayesian.py
@@ -38,7 +38,7 @@ from nni.networkmorphism_tuner.layers import is_layer
 def layer_distance(a, b):
    """The distance between two layers."""
    # pylint: disable=unidiomatic-typecheck
-    if type(a) != type(b):
+    if not isinstance(a, type(b)):
        return 1.0
    if is_layer(a, "Conv"):
        att_diff = [
@@ -96,7 +96,8 @@ def skip_connection_distance(a, b):
        return 1.0
    len_a = abs(a[1] - a[0])
    len_b = abs(b[1] - b[0])
-    return (abs(a[0] - b[0]) + abs(len_a - len_b)) / (max(a[0], b[0]) + max(len_a, len_b))
+    return (abs(a[0] - b[0]) + abs(len_a - len_b)) / \
+        (max(a[0], b[0]) + max(len_a, len_b))


 def skip_connections_distance(list_a, list_b):
@@ -161,7 +162,8 @@ class IncrementalGaussianProcess:
    def incremental_fit(self, train_x, train_y):
        """ Incrementally fit the regressor. """
        if not self._first_fitted:
-            raise ValueError("The first_fit function needs to be called first.")
+            raise ValueError(
+                "The first_fit function needs to be called first.")

        train_x, train_y = np.array(train_x), np.array(train_y)

@@ -174,7 +176,7 @@ class IncrementalGaussianProcess:
        temp_distance_matrix = np.concatenate((up_k, down_k), axis=0)
        k_matrix = bourgain_embedding_matrix(temp_distance_matrix)
        diagonal = np.diag_indices_from(k_matrix)
-        diagonal = (diagonal[0][-len(train_x) :], diagonal[1][-len(train_x) :])
+        diagonal = (diagonal[0][-len(train_x):], diagonal[1][-len(train_x):])
        k_matrix[diagonal] += self.alpha

        try:
@@ -186,7 +188,8 @@ class IncrementalGaussianProcess:
        self._y = np.concatenate((self._y, train_y), axis=0)
        self._distance_matrix = temp_distance_matrix

-        self._alpha_vector = cho_solve((self._l_matrix, True), self._y)  # Line 3
+        self._alpha_vector = cho_solve(
+            (self._l_matrix, True), self._y)  # Line 3

        return self

@@ -209,7 +212,8 @@ class IncrementalGaussianProcess:

        self._l_matrix = cholesky(k_matrix, lower=True)  # Line 2

-        self._alpha_vector = cho_solve((self._l_matrix, True), self._y)  # Line 3
+        self._alpha_vector = cho_solve(
+            (self._l_matrix, True), self._y)  # Line 3

        self._first_fitted = True
        return self
@@ -227,7 +231,9 @@ class IncrementalGaussianProcess:

        # compute inverse K_inv of K based on its Cholesky
        # decomposition L and its inverse L_inv
-        l_inv = solve_triangular(self._l_matrix.T, np.eye(self._l_matrix.shape[0]))
+        l_inv = solve_triangular(
+            self._l_matrix.T, np.eye(
+                self._l_matrix.shape[0]))
        k_inv = l_inv.dot(l_inv.T)
        # Compute variance of predictive distribution
        y_var = np.ones(len(train_x), dtype=np.float)
@@ -378,7 +384,11 @@ class BayesianOptimizer:
                        continue

                    temp_acq_value = self.acq(temp_graph)
-                    pq.put(elem_class(temp_acq_value, elem.father_id, temp_graph))
+                    pq.put(
+                        elem_class(
+                            temp_acq_value,
+                            elem.father_id,
+                            temp_graph))
                    descriptors.append(temp_graph.extract_descriptor())
                    if self._accept_new_acq_value(opt_acq, temp_acq_value):
                        opt_acq = temp_acq_value

--- a/src/sdk/pynni/nni/networkmorphism_tuner/graph.py
+++ b/src/sdk/pynni/nni/networkmorphism_tuner/graph.py
@@ -249,7 +249,8 @@ class Graph:
                self.reverse_adj_list[v_id].remove(edge_tuple)
                break
        self.reverse_adj_list[new_v_id].append((u_id, layer_id))
-        for index, value in enumerate(self.layer_id_to_output_node_ids[layer_id]):
+        for index, value in enumerate(
+                self.layer_id_to_output_node_ids[layer_id]):
            if value == v_id:
                self.layer_id_to_output_node_ids[layer_id][index] = new_v_id
                break
@@ -350,7 +351,8 @@ class Graph:
                self._replace_layer(layer_id, new_layer)

            elif is_layer(layer, "BatchNormalization"):
-                new_layer = wider_bn(layer, start_dim, total_dim, n_add, self.weighted)
+                new_layer = wider_bn(
+                    layer, start_dim, total_dim, n_add, self.weighted)
                self._replace_layer(layer_id, new_layer)
                self._search(v, start_dim, total_dim, n_add)

@@ -405,7 +407,8 @@ class Graph:
            target_id: A convolutional layer ID. The new block should be inserted after the block.
            new_layer: An instance of StubLayer subclasses.
        """
-        self.operation_history.append(("to_deeper_model", target_id, new_layer))
+        self.operation_history.append(
+            ("to_deeper_model", target_id, new_layer))
        input_id = self.layer_id_to_input_node_ids[target_id][0]
        output_id = self.layer_id_to_output_node_ids[target_id][0]
        if self.weighted:
@@ -478,14 +481,20 @@ class Graph:
        pre_end_node_id = self.layer_id_to_input_node_ids[end_id][0]
        end_node_id = self.layer_id_to_output_node_ids[end_id][0]

-        skip_output_id = self._insert_pooling_layer_chain(start_node_id, end_node_id)
+        skip_output_id = self._insert_pooling_layer_chain(
+            start_node_id, end_node_id)

        # Add the conv layer
-        new_conv_layer = get_conv_class(self.n_dim)(filters_start, filters_end, 1)
+        new_conv_layer = get_conv_class(
+            self.n_dim)(
+                filters_start,
+                filters_end,
+                1)
        skip_output_id = self.add_layer(new_conv_layer, skip_output_id)

        # Add the add layer.
-        add_input_node_id = self._add_node(deepcopy(self.node_list[end_node_id]))
+        add_input_node_id = self._add_node(
+            deepcopy(self.node_list[end_node_id]))
        add_layer = StubAdd()

        self._redirect_edge(pre_end_node_id, end_node_id, add_input_node_id)
@@ -504,7 +513,8 @@ class Graph:
            weights = np.zeros((filters_end, filters_start) + filter_shape)
            bias = np.zeros(filters_end)
            new_conv_layer.set_weights(
-                (add_noise(weights, np.array([0, 1])), add_noise(bias, np.array([0, 1])))
+                (add_noise(weights, np.array([0, 1])), add_noise(
+                    bias, np.array([0, 1])))
            )

    def to_concat_skip_model(self, start_id, end_id):
@@ -513,7 +523,8 @@ class Graph:
            start_id: The convolutional layer ID, after which to start the skip-connection.
            end_id: The convolutional layer ID, after which to end the skip-connection.
        """
-        self.operation_history.append(("to_concat_skip_model", start_id, end_id))
+        self.operation_history.append(
+            ("to_concat_skip_model", start_id, end_id))
        filters_end = self.layer_list[end_id].output.shape[-1]
        filters_start = self.layer_list[start_id].output.shape[-1]
        start_node_id = self.layer_id_to_output_node_ids[start_id][0]
@@ -521,9 +532,11 @@ class Graph:
        pre_end_node_id = self.layer_id_to_input_node_ids[end_id][0]
        end_node_id = self.layer_id_to_output_node_ids[end_id][0]

-        skip_output_id = self._insert_pooling_layer_chain(start_node_id, end_node_id)
+        skip_output_id = self._insert_pooling_layer_chain(
+            start_node_id, end_node_id)

-        concat_input_node_id = self._add_node(deepcopy(self.node_list[end_node_id]))
+        concat_input_node_id = self._add_node(
+            deepcopy(self.node_list[end_node_id]))
        self._redirect_edge(pre_end_node_id, end_node_id, concat_input_node_id)

        concat_layer = StubConcatenate()
@@ -532,7 +545,10 @@ class Graph:
            self.node_list[skip_output_id],
        ]
        concat_output_node_id = self._add_node(Node(concat_layer.output_shape))
-        self._add_edge(concat_layer, concat_input_node_id, concat_output_node_id)
+        self._add_edge(
+            concat_layer,
+            concat_input_node_id,
+            concat_output_node_id)
        self._add_edge(concat_layer, skip_output_id, concat_output_node_id)
        concat_layer.output = self.node_list[concat_output_node_id]
        self.node_list[concat_output_node_id].shape = concat_layer.output_shape
@@ -559,7 +575,8 @@ class Graph:
            )
            bias = np.zeros(filters_end)
            new_conv_layer.set_weights(
-                (add_noise(weights, np.array([0, 1])), add_noise(bias, np.array([0, 1])))
+                (add_noise(weights, np.array([0, 1])), add_noise(
+                    bias, np.array([0, 1])))
            )

    def _insert_pooling_layer_chain(self, start_node_id, end_node_id):
@@ -568,7 +585,8 @@ class Graph:
            new_layer = deepcopy(layer)
            if is_layer(new_layer, "Conv"):
                filters = self.node_list[start_node_id].shape[-1]
-                new_layer = get_conv_class(self.n_dim)(filters, filters, 1, layer.stride)
+                new_layer = get_conv_class(self.n_dim)(
+                    filters, filters, 1, layer.stride)
                if self.weighted:
                    init_conv_weight(new_layer)
            else:
@@ -601,8 +619,10 @@ class Graph:
                    temp_v = v
                    temp_layer_id = layer_id
                    skip_type = None
-                    while not (temp_v in index_in_main_chain and temp_u in index_in_main_chain):
-                        if is_layer(self.layer_list[temp_layer_id], "Concatenate"):
+                    while not (
+                            temp_v in index_in_main_chain and temp_u in index_in_main_chain):
+                        if is_layer(
+                                self.layer_list[temp_layer_id], "Concatenate"):
                            skip_type = NetworkDescriptor.CONCAT_CONNECT
                        if is_layer(self.layer_list[temp_layer_id], "Add"):
                            skip_type = NetworkDescriptor.ADD_CONNECT
@@ -711,7 +731,8 @@ class Graph:

    def wide_layer_ids(self):
        return (
-            self._conv_layer_ids_in_order()[:-1] + self._dense_layer_ids_in_order()[:-1]
+            self._conv_layer_ids_in_order(
+            )[:-1] + self._dense_layer_ids_in_order()[:-1]
        )

    def skip_connection_layer_ids(self):
@@ -810,7 +831,8 @@ class KerasModel:
        topo_node_list = self.graph.topological_order
        output_id = topo_node_list[-1]
        input_id = topo_node_list[0]
-        input_tensor = keras.layers.Input(shape=graph.node_list[input_id].shape)
+        input_tensor = keras.layers.Input(
+            shape=graph.node_list[input_id].shape)

        node_list = deepcopy(self.graph.node_list)
        node_list[input_id] = input_tensor
@@ -838,7 +860,8 @@ class KerasModel:
        output_tensor = keras.layers.Activation("softmax", name="activation_add")(
            output_tensor
        )
-        self.model = keras.models.Model(inputs=input_tensor, outputs=output_tensor)
+        self.model = keras.models.Model(
+            inputs=input_tensor, outputs=output_tensor)

        if graph.weighted:
            for index, layer in enumerate(self.layers):
@@ -892,7 +915,8 @@ class JSONModel:

        for layer_id, item in enumerate(graph.layer_list):
            layer = graph.layer_list[layer_id]
-            layer_information = layer_description_extractor(layer, graph.node_to_id)
+            layer_information = layer_description_extractor(
+                layer, graph.node_to_id)
            layer_list.append((layer_id, layer_information))

        data["node_list"] = node_list
@@ -938,7 +962,8 @@ def json_to_graph(json_model: str):

    graph.input_shape = input_shape
    vis = json_model["vis"]
-    graph.vis = {tuple(item): True for item in vis} if vis is not None else None
+    graph.vis = {
+        tuple(item): True for item in vis} if vis is not None else None
    graph.weighted = json_model["weighted"]
    layer_id_to_input_node_ids = json_model["layer_id_to_input_node_ids"]
    graph.layer_id_to_input_node_ids = {

--- a/src/sdk/pynni/nni/networkmorphism_tuner/graph_transformer.py
+++ b/src/sdk/pynni/nni/networkmorphism_tuner/graph_transformer.py
@@ -40,7 +40,8 @@ def to_wider_graph(graph):
    '''
    weighted_layer_ids = graph.wide_layer_ids()
    weighted_layer_ids = list(
-        filter(lambda x: graph.layer_list[x].output.shape[-1], weighted_layer_ids)
+        filter(
+            lambda x: graph.layer_list[x].output.shape[-1], weighted_layer_ids)
    )
    wider_layers = sample(weighted_layer_ids, 1)

@@ -58,12 +59,14 @@ def to_wider_graph(graph):
 def to_skip_connection_graph(graph):
    ''' skip connection graph
    '''
-    # The last conv layer cannot be widen since wider operator cannot be done over the two sides of flatten.
+    # The last conv layer cannot be widen since wider operator cannot be done
+    # over the two sides of flatten.
    weighted_layer_ids = graph.skip_connection_layer_ids()
    valid_connection = []
-    for skip_type in sorted([NetworkDescriptor.ADD_CONNECT, NetworkDescriptor.CONCAT_CONNECT]):
+    for skip_type in sorted(
+            [NetworkDescriptor.ADD_CONNECT, NetworkDescriptor.CONCAT_CONNECT]):
        for index_a in range(len(weighted_layer_ids)):
-            for index_b in range(len(weighted_layer_ids))[index_a + 1 :]:
+            for index_b in range(len(weighted_layer_ids))[index_a + 1:]:
                valid_connection.append((index_a, index_b, skip_type))

    if not valid_connection:
@@ -84,9 +87,14 @@ def create_new_layer(layer, n_dim):

    input_shape = layer.output.shape
    dense_deeper_classes = [StubDense, get_dropout_class(n_dim), StubReLU]
-    conv_deeper_classes = [get_conv_class(n_dim), get_batch_norm_class(n_dim), StubReLU]
+    conv_deeper_classes = [
+        get_conv_class(n_dim),
+        get_batch_norm_class(n_dim),
+        StubReLU]
    if is_layer(layer, "ReLU"):
-        conv_deeper_classes = [get_conv_class(n_dim), get_batch_norm_class(n_dim)]
+        conv_deeper_classes = [
+            get_conv_class(n_dim),
+            get_batch_norm_class(n_dim)]
        dense_deeper_classes = [StubDense, get_dropout_class(n_dim)]
    elif is_layer(layer, "Dropout"):
        dense_deeper_classes = [StubDense, StubReLU]

--- a/src/sdk/pynni/nni/networkmorphism_tuner/layer_transformer.py
+++ b/src/sdk/pynni/nni/networkmorphism_tuner/layer_transformer.py
@@ -52,7 +52,8 @@ def deeper_conv_block(conv_layer, kernel_size, weighted=True):

    if weighted:
        new_conv_layer.set_weights(
-            (add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))
+            (add_noise(weight, np.array([0, 1])),
+             add_noise(bias, np.array([0, 1])))
        )
        new_weights = [
            add_noise(np.ones(n_filters, dtype=np.float32), np.array([0, 1])),
@@ -74,7 +75,8 @@ def dense_to_deeper_block(dense_layer, weighted=True):
    new_dense_layer = StubDense(units, units)
    if weighted:
        new_dense_layer.set_weights(
-            (add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))
+            (add_noise(weight, np.array([0, 1])),
+             add_noise(bias, np.array([0, 1])))
        )
    return [StubReLU(), new_dense_layer]

@@ -97,8 +99,11 @@ def wider_pre_dense(layer, n_add, weighted=True):
        teacher_index = rand[i]
        new_weight = teacher_w[teacher_index, :]
        new_weight = new_weight[np.newaxis, :]
-        student_w = np.concatenate((student_w, add_noise(new_weight, student_w)), axis=0)
-        student_b = np.append(student_b, add_noise(teacher_b[teacher_index], student_b))
+        student_w = np.concatenate(
+            (student_w, add_noise(new_weight, student_w)), axis=0)
+        student_b = np.append(
+            student_b, add_noise(
+                teacher_b[teacher_index], student_b))

    new_pre_layer = StubDense(layer.input_units, n_units2 + n_add)
    new_pre_layer.set_weights((student_w, student_b))
@@ -209,7 +214,7 @@ def wider_next_dense(layer, start_dim, total_dim, n_add, weighted=True):
            student_w[:, : start_dim * n_units_each_channel],
            add_noise(new_weight, student_w),
            student_w[
-                :, start_dim * n_units_each_channel : total_dim * n_units_each_channel
+                :, start_dim * n_units_each_channel: total_dim * n_units_each_channel
            ],
        ),
        axis=1,
@@ -225,7 +230,8 @@ def add_noise(weights, other_weights):
    '''
    w_range = np.ptp(other_weights.flatten())
    noise_range = NOISE_RATIO * w_range
-    noise = np.random.uniform(-noise_range / 2.0, noise_range / 2.0, weights.shape)
+    noise = np.random.uniform(-noise_range / 2.0,
+                              noise_range / 2.0, weights.shape)
    return np.add(noise, weights)


@@ -236,7 +242,8 @@ def init_dense_weight(layer):
    weight = np.eye(units)
    bias = np.zeros(units)
    layer.set_weights(
-        (add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))
+        (add_noise(weight, np.array([0, 1])),
+         add_noise(bias, np.array([0, 1])))
    )


@@ -256,7 +263,8 @@ def init_conv_weight(layer):
    bias = np.zeros(n_filters)

    layer.set_weights(
-        (add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))
+        (add_noise(weight, np.array([0, 1])),
+         add_noise(bias, np.array([0, 1])))
    )



--- a/src/sdk/pynni/nni/networkmorphism_tuner/layers.py
+++ b/src/sdk/pynni/nni/networkmorphism_tuner/layers.py
@@ -28,8 +28,10 @@ from nni.networkmorphism_tuner.utils import Constant


 class AvgPool(nn.Module):
-    '''AvgPool Module.
-    '''
+    """
+    AvgPool Module.
+    """
+
    def __init__(self):
        super().__init__()

@@ -39,8 +41,10 @@ class AvgPool(nn.Module):


 class GlobalAvgPool1d(AvgPool):
-    '''GlobalAvgPool1d Module.
-    '''
+    """
+    GlobalAvgPool1d Module.
+    """
+
    def forward(self, input_tensor):
        return functional.avg_pool1d(input_tensor, input_tensor.size()[2:]).view(
            input_tensor.size()[:2]
@@ -48,8 +52,10 @@ class GlobalAvgPool1d(AvgPool):


 class GlobalAvgPool2d(AvgPool):
-    '''GlobalAvgPool2d Module.
-    '''
+    """
+    GlobalAvgPool2d Module.
+    """
+
    def forward(self, input_tensor):
        return functional.avg_pool2d(input_tensor, input_tensor.size()[2:]).view(
            input_tensor.size()[:2]
@@ -57,8 +63,10 @@ class GlobalAvgPool2d(AvgPool):


 class GlobalAvgPool3d(AvgPool):
-    '''GlobalAvgPool3d Module.
-    '''
+    """
+    GlobalAvgPool3d Module.
+    """
+
    def forward(self, input_tensor):
        return functional.avg_pool3d(input_tensor, input_tensor.size()[2:]).view(
            input_tensor.size()[:2]
@@ -66,70 +74,86 @@ class GlobalAvgPool3d(AvgPool):


 class StubLayer:
-    '''StubLayer Module. Base Module.
-    '''
+    """
+    StubLayer Module. Base Module.
+    """
+
    def __init__(self, input_node=None, output_node=None):
        self.input = input_node
        self.output = output_node
        self.weights = None

    def build(self, shape):
-        '''build shape.
-        '''
+        """
+        build shape.
+        """

    def set_weights(self, weights):
-        '''set weights.
-        '''
+        """
+        set weights.
+        """
        self.weights = weights

    def import_weights(self, torch_layer):
-        '''import weights.
-        '''
+        """
+        import weights.
+        """

    def import_weights_keras(self, keras_layer):
-        '''import weights from keras layer.
-        '''
+        """
+        import weights from keras layer.
+        """

    def export_weights(self, torch_layer):
-        '''export weights.
-        '''
+        """
+        export weights.
+        """

    def export_weights_keras(self, keras_layer):
-        '''export weights to keras layer.
-        '''
+        """
+        export weights to keras layer.
+        """

    def get_weights(self):
-        '''get weights.
-        '''
+        """
+        get weights.
+        """
        return self.weights

    def size(self):
-        '''size().
-        '''
+        """
+        size().
+        """
        return 0

    @property
    def output_shape(self):
-        '''output shape.
-        '''
+        """
+        output shape.
+        """
        return self.input.shape

    def to_real_layer(self):
-        '''to real layer.
-        '''
+        """
+        to real layer.
+        """

    def __str__(self):
-        '''str() function to print.
-        '''
+        """
+        str() function to print.
+        """
        return type(self).__name__[4:]


 class StubWeightBiasLayer(StubLayer):
-    '''StubWeightBiasLayer Module to set the bias.
-    '''
+    """
+    StubWeightBiasLayer Module to set the bias.
+    """
+
    def import_weights(self, torch_layer):
        self.set_weights(
-            (torch_layer.weight.data.cpu().numpy(), torch_layer.bias.data.cpu().numpy())
+            (torch_layer.weight.data.cpu().numpy(),
+             torch_layer.bias.data.cpu().numpy())
        )

    def import_weights_keras(self, keras_layer):
@@ -144,8 +168,10 @@ class StubWeightBiasLayer(StubLayer):


 class StubBatchNormalization(StubWeightBiasLayer):
-    '''StubBatchNormalization Module. Batch Norm.
-    '''
+    """
+    StubBatchNormalization Module. Batch Norm.
+    """
+
    def __init__(self, num_features, input_node=None, output_node=None):
        super().__init__(input_node, output_node)
        self.num_features = num_features
@@ -175,29 +201,37 @@ class StubBatchNormalization(StubWeightBiasLayer):


 class StubBatchNormalization1d(StubBatchNormalization):
-    '''StubBatchNormalization1d Module.
-    '''
+    """
+    StubBatchNormalization1d Module.
+    """
+
    def to_real_layer(self):
        return torch.nn.BatchNorm1d(self.num_features)


 class StubBatchNormalization2d(StubBatchNormalization):
-    '''StubBatchNormalization2d Module.
-    '''
+    """
+    StubBatchNormalization2d Module.
+    """
+
    def to_real_layer(self):
        return torch.nn.BatchNorm2d(self.num_features)


 class StubBatchNormalization3d(StubBatchNormalization):
-    '''StubBatchNormalization3d Module.
-    '''
+    """
+    StubBatchNormalization3d Module.
+    """
+
    def to_real_layer(self):
        return torch.nn.BatchNorm3d(self.num_features)


 class StubDense(StubWeightBiasLayer):
-    '''StubDense Module. Linear.
-    '''
+    """
+    StubDense Module. Linear.
+    """
+
    def __init__(self, input_units, units, input_node=None, output_node=None):
        super().__init__(input_node, output_node)
        self.input_units = input_units
@@ -208,7 +242,9 @@ class StubDense(StubWeightBiasLayer):
        return (self.units,)

    def import_weights_keras(self, keras_layer):
-        self.set_weights((keras_layer.get_weights()[0].T, keras_layer.get_weights()[1]))
+        self.set_weights(
+            (keras_layer.get_weights()[0].T,
+             keras_layer.get_weights()[1]))

    def export_weights_keras(self, keras_layer):
        keras_layer.set_weights((self.weights[0].T, self.weights[1]))
@@ -221,9 +257,12 @@ class StubDense(StubWeightBiasLayer):


 class StubConv(StubWeightBiasLayer):
-    '''StubConv Module. Conv.
-    '''
-    def __init__(self, input_channel, filters, kernel_size, stride=1, input_node=None, output_node=None):
+    """
+    StubConv Module. Conv.
+    """
+
+    def __init__(self, input_channel, filters, kernel_size,
+                 stride=1, input_node=None, output_node=None):
        super().__init__(input_node, output_node)
        self.input_channel = input_channel
        self.filters = filters
@@ -242,13 +281,16 @@ class StubConv(StubWeightBiasLayer):
        return tuple(ret)

    def import_weights_keras(self, keras_layer):
-        self.set_weights((keras_layer.get_weights()[0].T, keras_layer.get_weights()[1]))
+        self.set_weights(
+            (keras_layer.get_weights()[0].T,
+             keras_layer.get_weights()[1]))

    def export_weights_keras(self, keras_layer):
        keras_layer.set_weights((self.weights[0].T, self.weights[1]))

    def size(self):
-        return (self.input_channel * self.kernel_size * self.kernel_size + 1) * self.filters
+        return (self.input_channel * self.kernel_size *
+                self.kernel_size + 1) * self.filters

    @abstractmethod
    def to_real_layer(self):
@@ -272,8 +314,10 @@ class StubConv(StubWeightBiasLayer):


 class StubConv1d(StubConv):
-    '''StubConv1d Module.
-    '''
+    """
+    StubConv1d Module.
+    """
+
    def to_real_layer(self):
        return torch.nn.Conv1d(
            self.input_channel,
@@ -285,8 +329,10 @@ class StubConv1d(StubConv):


 class StubConv2d(StubConv):
-    '''StubConv2d Module.
-    '''
+    """
+    StubConv2d Module.
+    """
+
    def to_real_layer(self):
        return torch.nn.Conv2d(
            self.input_channel,
@@ -298,8 +344,10 @@ class StubConv2d(StubConv):


 class StubConv3d(StubConv):
-    '''StubConv3d Module.
-    '''
+    """
+    StubConv3d Module.
+    """
+
    def to_real_layer(self):
        return torch.nn.Conv3d(
            self.input_channel,
@@ -311,8 +359,10 @@ class StubConv3d(StubConv):


 class StubAggregateLayer(StubLayer):
-    '''StubAggregateLayer Module.
-    '''
+    """
+    StubAggregateLayer Module.
+    """
+
    def __init__(self, input_nodes=None, output_node=None):
        if input_nodes is None:
            input_nodes = []
@@ -320,8 +370,8 @@ class StubAggregateLayer(StubLayer):


 class StubConcatenate(StubAggregateLayer):
-    '''StubConcatenate Module.
-    '''
+    """StubConcatenate Module.
+    """
    @property
    def output_shape(self):
        ret = 0
@@ -335,8 +385,9 @@ class StubConcatenate(StubAggregateLayer):


 class StubAdd(StubAggregateLayer):
-    '''StubAdd Module.
-    '''
+    """
+    StubAdd Module.
+    """
    @property
    def output_shape(self):
        return self.input[0].shape
@@ -346,8 +397,9 @@ class StubAdd(StubAggregateLayer):


 class StubFlatten(StubLayer):
-    '''StubFlatten Module.
-    '''
+    """
+    StubFlatten Module.
+    """
    @property
    def output_shape(self):
        ret = 1
@@ -360,22 +412,28 @@ class StubFlatten(StubLayer):


 class StubReLU(StubLayer):
-    '''StubReLU Module.
-    '''
+    """
+    StubReLU Module.
+    """
+
    def to_real_layer(self):
        return torch.nn.ReLU()


 class StubSoftmax(StubLayer):
-    '''StubSoftmax Module.
-    '''
+    """
+    StubSoftmax Module.
+    """
+
    def to_real_layer(self):
        return torch.nn.LogSoftmax(dim=1)


 class StubDropout(StubLayer):
-    '''StubDropout Module.
-    '''
+    """
+    StubDropout Module.
+    """
+
    def __init__(self, rate, input_node=None, output_node=None):
        super().__init__(input_node, output_node)
        self.rate = rate
@@ -386,36 +444,45 @@ class StubDropout(StubLayer):


 class StubDropout1d(StubDropout):
-    '''StubDropout1d Module.
-    '''
+    """
+    StubDropout1d Module.
+    """
+
    def to_real_layer(self):
        return torch.nn.Dropout(self.rate)


 class StubDropout2d(StubDropout):
-    '''StubDropout2d Module.
-    '''
+    """
+    StubDropout2d Module.
+    """
+
    def to_real_layer(self):
        return torch.nn.Dropout2d(self.rate)


 class StubDropout3d(StubDropout):
-    '''StubDropout3d Module.
-    '''
+    """
+    StubDropout3d Module.
+    """
+
    def to_real_layer(self):
        return torch.nn.Dropout3d(self.rate)


 class StubInput(StubLayer):
-    '''StubInput Module.
-    '''
+    """
+    StubInput Module.
+    """
+
    def __init__(self, input_node=None, output_node=None):
        super().__init__(input_node, output_node)


 class StubPooling(StubLayer):
-    '''StubPooling Module.
-    '''
+    """
+    StubPooling Module.
+    """

    def __init__(self,
                 kernel_size=None,
@@ -444,30 +511,37 @@ class StubPooling(StubLayer):


 class StubPooling1d(StubPooling):
-    '''StubPooling1d Module.
-    '''
+    """
+    StubPooling1d Module.
+    """

    def to_real_layer(self):
        return torch.nn.MaxPool1d(self.kernel_size, stride=self.stride)


 class StubPooling2d(StubPooling):
-    '''StubPooling2d Module.
-    '''
+    """
+    StubPooling2d Module.
+    """
+
    def to_real_layer(self):
        return torch.nn.MaxPool2d(self.kernel_size, stride=self.stride)


 class StubPooling3d(StubPooling):
-    '''StubPooling3d Module.
-    '''
+    """
+    StubPooling3d Module.
+    """
+
    def to_real_layer(self):
        return torch.nn.MaxPool3d(self.kernel_size, stride=self.stride)


 class StubGlobalPooling(StubLayer):
-    '''StubGlobalPooling Module.
-    '''
+    """
+    StubGlobalPooling Module.
+    """
+
    def __init__(self, input_node=None, output_node=None):
        super().__init__(input_node, output_node)

@@ -481,49 +555,63 @@ class StubGlobalPooling(StubLayer):


 class StubGlobalPooling1d(StubGlobalPooling):
-    '''StubGlobalPooling1d Module.
-    '''
+    """
+    StubGlobalPooling1d Module.
+    """
+
    def to_real_layer(self):
        return GlobalAvgPool1d()


 class StubGlobalPooling2d(StubGlobalPooling):
-    '''StubGlobalPooling2d Module.
-    '''
+    """
+    StubGlobalPooling2d Module.
+    """
+
    def to_real_layer(self):
        return GlobalAvgPool2d()


 class StubGlobalPooling3d(StubGlobalPooling):
-    '''StubGlobalPooling3d Module.
-    '''
+    """
+    StubGlobalPooling3d Module.
+    """
+
    def to_real_layer(self):
        return GlobalAvgPool3d()


 class TorchConcatenate(nn.Module):
-    '''TorchConcatenate Module.
-    '''
+    """
+    TorchConcatenate Module.
+    """
+
    def forward(self, input_list):
        return torch.cat(input_list, dim=1)


 class TorchAdd(nn.Module):
-    '''TorchAdd Module.
-    '''
+    """
+    TorchAdd Module.
+    """
+
    def forward(self, input_list):
        return input_list[0] + input_list[1]


 class TorchFlatten(nn.Module):
-    '''TorchFlatten Module.
-    '''
+    """
+    TorchFlatten Module.
+    """
+
    def forward(self, input_tensor):
        return input_tensor.view(input_tensor.size(0), -1)

+
 def keras_dropout(layer, rate):
-    '''keras dropout layer.
-    '''
+    """
+    Keras dropout layer.
+    """

    from keras import layers

@@ -539,8 +627,9 @@ def keras_dropout(layer, rate):


 def to_real_keras_layer(layer):
-    ''' real keras layer.
-    '''
+    """
+    Real keras layer.
+    """
    from keras import layers

    if is_layer(layer, "Dense"):
@@ -574,10 +663,14 @@ def to_real_keras_layer(layer):


 def is_layer(layer, layer_type):
-    '''judge the layer type.
-    Returns:
+    """
+    Judge the layer type.
+
+    Returns
+    -------
+    bool
        boolean -- True or False
-    '''
+    """

    if layer_type == "Input":
        return isinstance(layer, StubInput)
@@ -607,8 +700,9 @@ def is_layer(layer, layer_type):


 def layer_description_extractor(layer, node_to_id):
-    '''get layer description.
-    '''
+    """
+    Get layer description.
+    """

    layer_input = layer.input
    layer_output = layer.output
@@ -641,7 +735,8 @@ def layer_description_extractor(layer, node_to_id):
            layer.units,
        ]
    elif isinstance(layer, (StubBatchNormalization,)):
-        return (type(layer).__name__, layer_input, layer_output, layer.num_features)
+        return (type(layer).__name__, layer_input,
+                layer_output, layer.num_features)
    elif isinstance(layer, (StubDropout,)):
        return (type(layer).__name__, layer_input, layer_output, layer.rate)
    elif isinstance(layer, StubPooling):
@@ -658,8 +753,8 @@ def layer_description_extractor(layer, node_to_id):


 def layer_description_builder(layer_information, id_to_node):
-    '''build layer from description.
-    '''
+    """build layer from description.
+    """
    layer_type = layer_information[0]

    layer_input_ids = layer_information[1]
@@ -696,8 +791,9 @@ def layer_description_builder(layer_information, id_to_node):


 def layer_width(layer):
-    '''get layer width.
-    '''
+    """
+    Get layer width.
+    """

    if is_layer(layer, "Dense"):
        return layer.units

--- a/src/sdk/pynni/nni/networkmorphism_tuner/networkmorphism_tuner.py
+++ b/src/sdk/pynni/nni/networkmorphism_tuner/networkmorphism_tuner.py
@@ -17,11 +17,13 @@
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
 # OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 # ==================================================================================================
+"""
+networkmorphsim_tuner.py
+"""

 import logging
 import os

-
 from nni.tuner import Tuner
 from nni.utils import OptimizeMode, extract_scalar_reward
 from nni.networkmorphism_tuner.bayesian import BayesianOptimizer
@@ -34,7 +36,35 @@ logger = logging.getLogger("NetworkMorphism_AutoML")


 class NetworkMorphismTuner(Tuner):
-    """NetworkMorphismTuner is a tuner which using network morphism techniques."""
+    """
+    NetworkMorphismTuner is a tuner which using network morphism techniques.
+
+    Attributes
+    ----------
+    n_classes : int
+        The class number or output node number (default: ``10``)
+    input_shape : tuple
+        A tuple including: (input_width, input_width, input_channel)
+    t_min : float
+        The minimum temperature for simulated annealing. (default: ``Constant.T_MIN``)
+    beta : float
+        The beta in acquisition function. (default: ``Constant.BETA``)
+    algorithm_name : str
+        algorithm name used in the network morphism (default: ``"Bayesian"``)
+    optimize_mode : str
+        optimize mode "minimize" or "maximize" (default: ``"minimize"``)
+    verbose : bool
+        verbose to print the log (default: ``True``)
+    bo : BayesianOptimizer
+        The optimizer used in networkmorphsim tuner.
+    max_model_size : int
+        max model size to the graph (default: ``Constant.MAX_MODEL_SIZE``)
+    default_model_len : int
+        default model length (default: ``Constant.MODEL_LEN``)
+    default_model_width : int
+        default model width (default: ``Constant.MODEL_WIDTH``)
+    search_space : dict
+    """

    def __init__(
            self,
@@ -52,36 +82,8 @@ class NetworkMorphismTuner(Tuner):
            default_model_len=Constant.MODEL_LEN,
            default_model_width=Constant.MODEL_WIDTH,
    ):
-        """ initilizer of the NetworkMorphismTuner.
-
-        Parameters
-        ----------
-        task : str
-            task mode, such as "cv","common" etc. (default: {"cv"})
-        input_width : int
-            input sample shape (default: {32})
-        input_channel : int
-            input sample shape (default: {3})
-        n_output_node : int
-            output node number (default: {10})
-        algorithm_name : str
-            algorithm name used in the network morphism (default: {"Bayesian"})
-        optimize_mode : str
-            optimize mode "minimize" or "maximize" (default: {"minimize"})
-        path : str
-            default mode path to save the model file (default: {"model_path"})
-        verbose : bool
-            verbose to print the log (default: {True})
-        beta : float
-            The beta in acquisition function. (default: {Constant.BETA})
-        t_min : float
-            The minimum temperature for simulated annealing. (default: {Constant.T_MIN})
-        max_model_size : int
-            max model size to the graph (default: {Constant.MAX_MODEL_SIZE})
-        default_model_len : int
-            default model length (default: {Constant.MODEL_LEN})
-        default_model_width : int
-            default model width (default: {Constant.MODEL_WIDTH})
+        """
+        initilizer of the NetworkMorphismTuner.
        """

        if not os.path.exists(path):
@@ -92,7 +94,8 @@ class NetworkMorphismTuner(Tuner):
        elif task == "common":
            self.generators = [MlpGenerator]
        else:
-            raise NotImplementedError('{} task not supported in List ["cv","common"]')
+            raise NotImplementedError(
+                '{} task not supported in List ["cv","common"]')

        self.n_classes = n_output_node
        self.input_shape = (input_width, input_width, input_channel)
@@ -106,7 +109,8 @@ class NetworkMorphismTuner(Tuner):
        self.verbose = verbose
        self.model_count = 0

-        self.bo = BayesianOptimizer(self, self.t_min, self.optimize_mode, self.beta)
+        self.bo = BayesianOptimizer(
+            self, self.t_min, self.optimize_mode, self.beta)
        self.training_queue = []
        self.descriptors = []
        self.history = []
@@ -117,6 +121,7 @@ class NetworkMorphismTuner(Tuner):

        self.search_space = dict()

+
    def update_search_space(self, search_space):
        """
        Update search space definition in tuner by search_space in neural architecture.
@@ -140,7 +145,8 @@ class NetworkMorphismTuner(Tuner):
            new_father_id, generated_graph = self.generate()
            new_model_id = self.model_count
            self.model_count += 1
-            self.training_queue.append((generated_graph, new_father_id, new_model_id))
+            self.training_queue.append(
+                (generated_graph, new_father_id, new_model_id))
            self.descriptors.append(generated_graph.extract_descriptor())

        graph, father_id, model_id = self.training_queue.pop(0)
@@ -153,12 +159,15 @@ class NetworkMorphismTuner(Tuner):
        return json_out

    def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
-        """ Record an observation of the objective function.
+        """
+        Record an observation of the objective function.

        Parameters
        ----------
        parameter_id : int
+            the id of a group of paramters that generated by nni manager.
        parameters : dict
+            A group of parameters.
        value : dict/float
            if value is dict, it should have "default" key.
        """
@@ -175,8 +184,11 @@ class NetworkMorphismTuner(Tuner):
        self.add_model(reward, model_id)
        self.update(father_id, graph, reward, model_id)

+
    def init_search(self):
-        """Call the generators to generate the initial architectures for the search."""
+        """
+        Call the generators to generate the initial architectures for the search.
+        """
        if self.verbose:
            logger.info("Initializing search.")
        for generator in self.generators:
@@ -191,14 +203,16 @@ class NetworkMorphismTuner(Tuner):
        if self.verbose:
            logger.info("Initialization finished.")

+
    def generate(self):
-        """Generate the next neural architecture.
+        """
+        Generate the next neural architecture.

        Returns
        -------
-        other_info: any object
+        other_info : any object
            Anything to be saved in the training queue together with the architecture.
-        generated_graph: Graph
+        generated_graph : Graph
            An instance of Graph.
        """
        generated_graph, new_father_id = self.bo.generate(self.descriptors)
@@ -211,7 +225,8 @@ class NetworkMorphismTuner(Tuner):
        return new_father_id, generated_graph

    def update(self, other_info, graph, metric_value, model_id):
-        """ Update the controller with evaluation result of a neural architecture.
+        """
+        Update the controller with evaluation result of a neural architecture.

        Parameters
        ----------
@@ -228,7 +243,8 @@ class NetworkMorphismTuner(Tuner):
        self.bo.add_child(father_id, model_id)

    def add_model(self, metric_value, model_id):
-        """ Add model to the history, x_queue and y_queue
+        """
+        Add model to the history, x_queue and y_queue

        Parameters
        ----------
@@ -252,16 +268,21 @@ class NetworkMorphismTuner(Tuner):
            file.close()
        return ret

+
    def get_best_model_id(self):
-        """ Get the best model_id from history using the metric value
+        """
+        Get the best model_id from history using the metric value
        """

        if self.optimize_mode is OptimizeMode.Maximize:
-            return max(self.history, key=lambda x: x["metric_value"])["model_id"]
+            return max(self.history, key=lambda x: x["metric_value"])[
+                "model_id"]
        return min(self.history, key=lambda x: x["metric_value"])["model_id"]

+
    def load_model_by_id(self, model_id):
-        """Get the model by model_id
+        """
+        Get the model by model_id

        Parameters
        ----------
@@ -281,7 +302,8 @@ class NetworkMorphismTuner(Tuner):
        return load_model

    def load_best_model(self):
-        """ Get the best model by model id
+        """
+        Get the best model by model id

        Returns
        -------
@@ -291,7 +313,8 @@ class NetworkMorphismTuner(Tuner):
        return self.load_model_by_id(self.get_best_model_id())

    def get_metric_value_by_id(self, model_id):
-        """ Get the model metric valud by its model_id
+        """
+        Get the model metric valud by its model_id

        Parameters
        ----------

--- a/src/sdk/pynni/nni/networkmorphism_tuner/nn.py
+++ b/src/sdk/pynni/nni/networkmorphism_tuner/nn.py
@@ -92,17 +92,25 @@ class CnnGenerator(NetworkGenerator):
        for i in range(model_len):
            output_node_id = graph.add_layer(StubReLU(), output_node_id)
            output_node_id = graph.add_layer(
-                self.batch_norm(graph.node_list[output_node_id].shape[-1]), output_node_id
+                self.batch_norm(
+                    graph.node_list[output_node_id].shape[-1]), output_node_id
            )
            output_node_id = graph.add_layer(
-                self.conv(temp_input_channel, model_width, kernel_size=3, stride=stride),
+                self.conv(
+                    temp_input_channel,
+                    model_width,
+                    kernel_size=3,
+                    stride=stride),
                output_node_id,
            )
            temp_input_channel = model_width
-            if pooling_len == 0 or ((i + 1) % pooling_len == 0 and i != model_len - 1):
-                output_node_id = graph.add_layer(self.pooling(), output_node_id)
+            if pooling_len == 0 or (
+                    (i + 1) % pooling_len == 0 and i != model_len - 1):
+                output_node_id = graph.add_layer(
+                    self.pooling(), output_node_id)

-        output_node_id = graph.add_layer(self.global_avg_pooling(), output_node_id)
+        output_node_id = graph.add_layer(
+            self.global_avg_pooling(), output_node_id)
        output_node_id = graph.add_layer(
            self.dropout(Constant.CONV_DROPOUT_RATE), output_node_id
        )
@@ -111,7 +119,11 @@ class CnnGenerator(NetworkGenerator):
            output_node_id,
        )
        output_node_id = graph.add_layer(StubReLU(), output_node_id)
-        graph.add_layer(StubDense(model_width, self.n_output_node), output_node_id)
+        graph.add_layer(
+            StubDense(
+                model_width,
+                self.n_output_node),
+            output_node_id)
        return graph


@@ -145,7 +157,8 @@ class MlpGenerator(NetworkGenerator):
        if model_width is None:
            model_width = Constant.MODEL_WIDTH
        if isinstance(model_width, list) and not len(model_width) == model_len:
-            raise ValueError("The length of 'model_width' does not match 'model_len'")
+            raise ValueError(
+                "The length of 'model_width' does not match 'model_len'")
        elif isinstance(model_width, int):
            model_width = [model_width] * model_len

@@ -162,5 +175,9 @@ class MlpGenerator(NetworkGenerator):
            output_node_id = graph.add_layer(StubReLU(), output_node_id)
            n_nodes_prev_layer = width

-        graph.add_layer(StubDense(n_nodes_prev_layer, self.n_output_node), output_node_id)
+        graph.add_layer(
+            StubDense(
+                n_nodes_prev_layer,
+                self.n_output_node),
+            output_node_id)
        return graph
--- a/src/sdk/pynni/nni/networkmorphism_tuner/test_networkmorphism_tuner.py
+++ b/src/sdk/pynni/nni/networkmorphism_tuner/test_networkmorphism_tuner.py
@@ -59,9 +59,12 @@ class NetworkMorphismTestCase(TestCase):
            graph_recover.layer_id_to_input_node_ids,
        )
        self.assertEqual(graph_init.adj_list, graph_recover.adj_list)
-        self.assertEqual(graph_init.reverse_adj_list, graph_recover.reverse_adj_list)
        self.assertEqual(
-            len(graph_init.operation_history), len(graph_recover.operation_history)
+            graph_init.reverse_adj_list,
+            graph_recover.reverse_adj_list)
+        self.assertEqual(
+            len(graph_init.operation_history), len(
+                graph_recover.operation_history)
        )
        self.assertEqual(graph_init.n_dim, graph_recover.n_dim)
        self.assertEqual(graph_init.conv, graph_recover.conv)
@@ -71,7 +74,8 @@ class NetworkMorphismTestCase(TestCase):
        node_list_init = [node.shape for node in graph_init.node_list]
        node_list_recover = [node.shape for node in graph_recover.node_list]
        self.assertEqual(node_list_init, node_list_recover)
-        self.assertEqual(len(graph_init.node_to_id), len(graph_recover.node_to_id))
+        self.assertEqual(len(graph_init.node_to_id),
+                         len(graph_recover.node_to_id))
        layer_list_init = [
            layer_description_extractor(item, graph_init.node_to_id)
            for item in graph_init.layer_list
@@ -82,7 +86,8 @@ class NetworkMorphismTestCase(TestCase):
        ]
        self.assertEqual(layer_list_init, layer_list_recover)

-        node_to_id_init = [graph_init.node_to_id[node] for node in graph_init.node_list]
+        node_to_id_init = [graph_init.node_to_id[node]
+                           for node in graph_init.node_list]
        node_to_id_recover = [
            graph_recover.node_to_id[node] for node in graph_recover.node_list
        ]

--- a/src/sdk/pynni/nni/ppo_tuner/__init__.py
+++ b/src/sdk/pynni/nni/ppo_tuner/__init__.py
+from .ppo_tuner import PPOTuner
--- a/src/sdk/pynni/nni/ppo_tuner/distri.py
+++ b/src/sdk/pynni/nni/ppo_tuner/distri.py
@@ -77,7 +77,7 @@ class PdType:

 class CategoricalPd(Pd):
    """
-    categorical prossibility distribution
+    Categorical prossibility distribution
    """
    def __init__(self, logits, mask_npinf, nsteps, size, is_act_model):
        self.logits = logits
@@ -154,7 +154,7 @@ class CategoricalPd(Pd):

 class CategoricalPdType(PdType):
    """
-    to create CategoricalPd
+    To create CategoricalPd
    """
    def __init__(self, ncat, nsteps, np_mask, is_act_model):
        self.ncat = ncat
@@ -180,7 +180,7 @@ class CategoricalPdType(PdType):

 def _matching_fc(tensor, name, size, nsteps, init_scale, init_bias, np_mask, is_act_model):
    """
-    add fc op, and add mask op when not in action mode
+    Add fc op, and add mask op when not in action mode
    """
    if tensor.shape[-1] == size:
        assert False