Merge pull request #214 from microsoft/master

merge master

Merge pull request #214 from microsoft/master
merge master
7620e7c5 · SparkSnail · GitHub · c037a7c1 · 187494aa · 7620e7c5
Unverified Commit 7620e7c5 authored Nov 14, 2019 by SparkSnail Committed by GitHub Nov 14, 2019
16 changed files
--- a/src/sdk/pynni/nni/ppo_tuner/model.py
+++ b/src/sdk/pynni/nni/ppo_tuner/model.py
@@ -28,21 +28,18 @@ from .util import initialize, get_session
 class Model:
    """
    We use this object to :
-    __init__:
+        __init__:
-    - Creates the step_model
+            - Creates the step_model
-    - Creates the train_model
+            - Creates the train_model
-    train():
+        train():
-    - Make the training part (feedforward and retropropagation of gradients)
+            - Make the training part (feedforward and retropropagation of gradients)
-    save/load():
+        save/load():
-    - Save load the model
+            - Save load the model
    """
    def __init__(self, *, policy, nbatch_act, nbatch_train,
                 nsteps, ent_coef, vf_coef, max_grad_norm, microbatch_size=None, np_mask=None):
-        """
-        init
-        """
        self.sess = sess = get_session()
        with tf.variable_scope('ppo2_model', reuse=tf.AUTO_REUSE):
@@ -137,9 +134,13 @@ class Model:
    def train(self, lr, cliprange, obs, returns, masks, actions, values, neglogpacs, states=None):
        """
-        train the model.
+        Train the model.
        Here we calculate advantage A(s,a) = R + yV(s') - V(s)
-        Returns = R + yV(s')
+        Returns
+        -------
+        obj
+            = R + yV(s')
        """
        advs = returns - values

--- a/src/sdk/pynni/nni/ppo_tuner/policy.py
+++ b/src/sdk/pynni/nni/ppo_tuner/policy.py
@@ -34,14 +34,20 @@ class PolicyWithValue:
    def __init__(self, env, observations, latent, estimate_q=False, vf_latent=None, sess=None, np_mask=None, is_act_model=False, **tensors):
        """
-        Parameters:
+        Parameters
        ----------
-        env:             RL environment
+        env : obj
-        observations:    tensorflow placeholder in which the observations will be fed
+            RL environment
-        latent:          latent state from which policy distribution parameters should be inferred
+        observations : tensorflow placeholder
-        vf_latent:       latent state from which value function should be inferred (if None, then latent is used)
+            Tensorflow placeholder in which the observations will be fed
-        sess:            tensorflow session to run calculations in (if None, default session is used)
+        latent : tensor
-        **tensors:       tensorflow tensors for additional attributes such as state or mask
+            Latent state from which policy distribution parameters should be inferred
+        vf_latent : tensor
+            Latent state from which value function should be inferred (if None, then latent is used)
+        sess : tensorflow session
+            Tensorflow session to run calculations in (if None, default session is used)
+        **tensors
+            Tensorflow tensors for additional attributes such as state or mask
        """
        self.X = observations
@@ -138,12 +144,14 @@ class PolicyWithValue:
        """
        Compute next action(s) given the observation(s)
-        Parameters:
+        Parameters
        ----------
-        observation:     observation data (either single or a batch)
+        observation : np array
-        **extra_feed:    additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__)
+            Observation data (either single or a batch)
+        **extra_feed
+            Additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__)
-        Returns:
+        Returns
        -------
        (action, value estimate, next state, negative log likelihood of the action under current policy parameters) tuple
        """
@@ -157,22 +165,40 @@ class PolicyWithValue:
        """
        Compute value estimate(s) given the observation(s)
-        Parameters:
+        Parameters
        ----------
-        observation:     observation data (either single or a batch)
+        observation : np array
-        **extra_feed:    additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__)
+            Observation data (either single or a batch)
+        **extra_feed
+            Additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__)
-        Returns:
+        Returns
        -------
-        value estimate
+        Value estimate
        """
        return self._evaluate(self.vf, ob, *args, **kwargs)
 def build_lstm_policy(model_config, value_network=None, estimate_q=False, **policy_kwargs):
    """
-    build lstm policy and value network, they share the same lstm network.
+    Build lstm policy and value network, they share the same lstm network.
    the parameters all use their default values.
+    Parameter
+    ---------
+    model_config : obj
+        Configurations of the model
+    value_network : obj
+        The network for value function
+    estimate_q : bool
+        Whether to estimate ``q``
+    **policy_kwargs
+        The kwargs for policy network, i.e., lstm model
+    Returns
+    -------
+    func
+        The policy network
    """
    policy_network = lstm_model(**policy_kwargs)

--- a/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py
+++ b/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py
@@ -38,8 +38,10 @@ from .policy import build_lstm_policy
 logger = logging.getLogger('ppo_tuner_AutoML')
-def constfn(val):
+def _constfn(val):
-    """wrap as function"""
+    """
+    Wrap as function
+    """
    def f(_):
        return val
    return f
@@ -90,7 +92,7 @@ class TrialsInfo:
    def get_next(self):
        """
-        get actions of the next trial
+        Get actions of the next trial
        """
        if self.iter >= self.inf_batch_size:
            return None, None
@@ -102,14 +104,14 @@ class TrialsInfo:
    def update_rewards(self, rewards, returns):
        """
-        after the trial is finished, reward and return of this trial is updated
+        After the trial is finished, reward and return of this trial is updated
        """
        self.rewards = rewards
        self.returns = returns
    def convert_shape(self):
        """
-        convert shape
+        Convert shape
        """
        def sf01(arr):
            """
@@ -138,9 +140,9 @@ class PPOModel:
        set_global_seeds(None)
        assert isinstance(self.model_config.lr, float)
-        self.lr = constfn(self.model_config.lr)
+        self.lr = _constfn(self.model_config.lr)
        assert isinstance(self.model_config.cliprange, float)
-        self.cliprange = constfn(self.model_config.cliprange)
+        self.cliprange = _constfn(self.model_config.cliprange)
        # build lstm policy network, value share the same network
        policy = build_lstm_policy(model_config)
@@ -165,12 +167,28 @@ class PPOModel:
    def inference(self, num):
        """
-        generate actions along with related info from policy network.
+        Generate actions along with related info from policy network.
        observation is the action of the last step.
-        Parameters:
+        Parameters
        ----------
-        num:             the number of trials to generate
+        num: int
+            The number of trials to generate
+        Returns
+        -------
+        mb_obs : list
+            Observation of the ``num`` configurations
+        mb_actions : list
+            Actions of the ``num`` configurations
+        mb_values : list
+            Values from the value function of the ``num`` configurations
+        mb_neglogpacs : list
+            ``neglogp`` of the ``num`` configurations
+        mb_dones : list
+            To show whether the play is done, always ``True``
+        last_values : tensorflow tensor
+            The last values of the ``num`` configurations, got with session run
        """
        # Here, we init the lists that will contain the mb of experiences
        mb_obs, mb_actions, mb_values, mb_dones, mb_neglogpacs = [], [], [], [], []
@@ -212,13 +230,15 @@ class PPOModel:
    def compute_rewards(self, trials_info, trials_result):
        """
-        compute the rewards of the trials in trials_info based on trials_result,
+        Compute the rewards of the trials in trials_info based on trials_result,
        and update the rewards in trials_info
-        Parameters:
+        Parameters
        ----------
-        trials_info:             info of the generated trials
+        trials_info : TrialsInfo
-        trials_result:           final results (e.g., acc) of the generated trials
+            Info of the generated trials
+        trials_result : list
+            Final results (e.g., acc) of the generated trials
        """
        mb_rewards = np.asarray([trials_result for _ in trials_info.actions], dtype=np.float32)
        # discount/bootstrap off value fn
@@ -243,12 +263,14 @@ class PPOModel:
    def train(self, trials_info, nenvs):
        """
-        train the policy/value network using trials_info
+        Train the policy/value network using trials_info
-        Parameters:
+        Parameters
        ----------
-        trials_info:             complete info of the generated trials from the previous inference
+        trials_info : TrialsInfo
-        nenvs:                   the batch size of the (previous) inference
+            Complete info of the generated trials from the previous inference
+        nenvs : int
+            The batch size of the (previous) inference
        """
        # keep frac decay for future optimization
        if self.cur_update <= self.nupdates:
@@ -282,27 +304,40 @@ class PPOModel:
 class PPOTuner(Tuner):
    """
-    PPOTuner
+    PPOTuner, the implementation inherits the main logic of the implementation
+    [ppo2 from openai](https://github.com/openai/baselines/tree/master/baselines/ppo2), and is adapted for NAS scenario.
+    It uses ``lstm`` for its policy network and value network, policy and value share the same network.
    """
    def __init__(self, optimize_mode, trials_per_update=20, epochs_per_update=4, minibatch_size=4,
                 ent_coef=0.0, lr=3e-4, vf_coef=0.5, max_grad_norm=0.5, gamma=0.99, lam=0.95, cliprange=0.2):
        """
-        initialization, PPO model is not initialized here as search space is not received yet.
+        Initialization, PPO model is not initialized here as search space is not received yet.
-        Parameters:
+        Parameters
        ----------
-        optimize_mode:         maximize or minimize
+        optimize_mode : str
-        trials_per_update:     number of trials to have for each model update
+            maximize or minimize
-        epochs_per_update:     number of epochs to run for each model update
+        trials_per_update : int
-        minibatch_size:        minibatch size (number of trials) for the update
+            Number of trials to have for each model update
-        ent_coef:              policy entropy coefficient in the optimization objective
+        epochs_per_update : int
-        lr:                    learning rate of the model (lstm network), constant
+            Number of epochs to run for each model update
-        vf_coef:               value function loss coefficient in the optimization objective
+        minibatch_size : int
-        max_grad_norm:         gradient norm clipping coefficient
+            Minibatch size (number of trials) for the update
-        gamma:                 discounting factor
+        ent_coef : float
-        lam:                   advantage estimation discounting factor (lambda in the paper)
+            Policy entropy coefficient in the optimization objective
-        cliprange:             cliprange in the PPO algorithm, constant
+        lr : float
+            Learning rate of the model (lstm network), constant
+        vf_coef : float
+            Value function loss coefficient in the optimization objective
+        max_grad_norm : float
+            Gradient norm clipping coefficient
+        gamma : float
+            Discounting factor
+        lam : float
+            Advantage estimation discounting factor (lambda in the paper)
+        cliprange : float
+            Cliprange in the PPO algorithm, constant
        """
        self.optimize_mode = OptimizeMode(optimize_mode)
        self.model_config = ModelConfig()
@@ -330,21 +365,25 @@ class PPOTuner(Tuner):
        self.model_config.nminibatches = minibatch_size
        self.send_trial_callback = None
-        logger.info('=== finished PPOTuner initialization')
+        logger.info('Finished PPOTuner initialization')
    def _process_one_nas_space(self, block_name, block_space):
        """
-        process nas space to determine observation space and action space
+        Process nas space to determine observation space and action space
-        Parameters:
+        Parameters
        ----------
-        block_name:              the name of the mutable block
+        block_name : str
-        block_space:             search space of this mutable block
+            The name of the mutable block
+        block_space : dict
+            Search space of this mutable block
-        Returns:
+        Returns
-        ----------
+        -------
-        actions_spaces:          list of the space of each action
+        actions_spaces : list
-        actions_to_config:       the mapping from action to generated configuration
+            List of the space of each action
+        actions_to_config : list
+            The mapping from action to generated configuration
        """
        actions_spaces = []
        actions_to_config = []
@@ -385,7 +424,7 @@ class PPOTuner(Tuner):
    def _process_nas_space(self, search_space):
        """
-        process nas search space to get action/observation space
+        Process nas search space to get action/observation space
        """
        actions_spaces = []
        actions_to_config = []
@@ -412,7 +451,7 @@ class PPOTuner(Tuner):
    def _generate_action_mask(self):
        """
-        different step could have different action space. to deal with this case, we merge all the
+        Different step could have different action space. to deal with this case, we merge all the
        possible actions into one action space, and use mask to indicate available actions for each step
        """
        two_masks = []
@@ -439,15 +478,13 @@ class PPOTuner(Tuner):
    def update_search_space(self, search_space):
        """
-        get search space, currently the space only includes that for NAS
+        Get search space, currently the space only includes that for NAS
-        Parameters:
+        Parameters
        ----------
-        search_space:                  search space for NAS
+        search_space : dict
+            Search space for NAS
-        Returns:
+            the format could be referred to search space spec (https://nni.readthedocs.io/en/latest/Tutorial/SearchSpaceSpec.html).
-        -------
-        no return
        """
        logger.info('=== update search space %s', search_space)
        assert self.search_space is None
@@ -470,7 +507,7 @@ class PPOTuner(Tuner):
    def _actions_to_config(self, actions):
        """
-        given actions, to generate the corresponding trial configuration
+        Given actions, to generate the corresponding trial configuration
        """
        chosen_arch = copy.deepcopy(self.chosen_arch_template)
        for cnt, act in enumerate(actions):
@@ -490,6 +527,19 @@ class PPOTuner(Tuner):
    def generate_multiple_parameters(self, parameter_id_list, **kwargs):
        """
        Returns multiple sets of trial (hyper-)parameters, as iterable of serializable objects.
+        Parameters
+        ----------
+        parameter_id_list : list of int
+            Unique identifiers for each set of requested hyper-parameters.
+            These will later be used in :meth:`receive_trial_result`.
+        **kwargs
+            Not used
+        Returns
+        -------
+        list
+            A list of newly generated configurations
        """
        result = []
        self.send_trial_callback = kwargs['st_callback']
@@ -506,7 +556,17 @@ class PPOTuner(Tuner):
    def generate_parameters(self, parameter_id, **kwargs):
        """
-        generate parameters, if no trial configration for now, self.credit plus 1 to send the config later
+        Generate parameters, if no trial configration for now, self.credit plus 1 to send the config later
+        parameter_id : int
+            Unique identifier for requested hyper-parameters. This will later be used in :meth:`receive_trial_result`.
+        **kwargs
+            Not used
+        Returns
+        -------
+        dict
+            One newly generated configuration
        """
        if self.first_inf:
            self.trials_result = [None for _ in range(self.inf_batch_size)]
@@ -527,6 +587,7 @@ class PPOTuner(Tuner):
    def _next_round_inference(self):
        """
+        Run a inference to generate next batch of configurations
        """
        self.finished_trials = 0
        self.model.compute_rewards(self.trials_info, self.trials_result)
@@ -554,8 +615,17 @@ class PPOTuner(Tuner):
    def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
        """
-        receive trial's result. if the number of finished trials equals self.inf_batch_size, start the next update to
+        Receive trial's result. if the number of finished trials equals self.inf_batch_size, start the next update to
-        train the model
+        train the model.
+        Parameters
+        ----------
+        parameter_id : int
+            Unique identifier of used hyper-parameters, same with :meth:`generate_parameters`.
+        parameters : dict
+            Hyper-parameters generated by :meth:`generate_parameters`.
+        value : dict
+            Result from trial (the return value of :func:`nni.report_final_result`).
        """
        trial_info_idx = self.running_trials.pop(parameter_id, None)
        assert trial_info_idx is not None
@@ -572,7 +642,17 @@ class PPOTuner(Tuner):
    def trial_end(self, parameter_id, success, **kwargs):
        """
-        to deal with trial failure
+        To deal with trial failure. If a trial fails, it is popped out from ``self.running_trials``,
+        and the final result of this trial is assigned with the average of the finished trials.
+        Parameters
+        ----------
+        parameter_id : int
+            Unique identifier for hyper-parameters used by this trial.
+        success : bool
+            True if the trial successfully completed; False if failed or terminated.
+        **kwargs
+            Not used
        """
        if not success:
            if parameter_id not in self.running_trials:
@@ -582,7 +662,7 @@ class PPOTuner(Tuner):
            assert trial_info_idx is not None
            # use mean of finished trials as the result of this failed trial
            values = [val for val in self.trials_result if val is not None]
-            logger.warning('zql values: %s', values)
+            logger.warning('In trial_end, values: %s', values)
            self.trials_result[trial_info_idx] = (sum(values) / len(values)) if values else 0
            self.finished_trials += 1
            if self.finished_trials == self.inf_batch_size:
@@ -590,10 +670,11 @@ class PPOTuner(Tuner):
    def import_data(self, data):
        """
-        Import additional data for tuning
+        Import additional data for tuning, not supported yet.
        Parameters
        ----------
-        data:               a list of dictionarys, each of which has at least two keys, 'parameter' and 'value'
+        data : list
+            A list of dictionarys, each of which has at least two keys, ``parameter`` and ``value``
        """
        logger.warning('PPOTuner cannot leverage imported data.')
--- a/src/sdk/pynni/nni/ppo_tuner/util.py
+++ b/src/sdk/pynni/nni/ppo_tuner/util.py
@@ -94,12 +94,14 @@ def lstm_model(nlstm=128, layer_norm=False):
    An example of usage of lstm-based policy can be found here: common/tests/test_doc_examples.py/test_lstm_example
-    Parameters:
+    Parameters
    ----------
-    nlstm: int          LSTM hidden state size
+    nlstm : int
-    layer_norm: bool    if True, layer-normalized version of LSTM is used
+        LSTM hidden state size
+    layer_norm : bool
+        if True, layer-normalized version of LSTM is used
-    Returns:
+    Returns
    -------
    function that builds LSTM with a given input tensor / placeholder
    """
@@ -171,11 +173,15 @@ def adjust_shape(placeholder, data):
    adjust shape of the data to the shape of the placeholder if possible.
    If shape is incompatible, AssertionError is thrown
-    Parameters:
+    Parameters
-    placeholder:     tensorflow input placeholder
+    ----------
-    data:            input data to be (potentially) reshaped to be fed into placeholder
+    placeholder
+        tensorflow input placeholder
+    data
+        input data to be (potentially) reshaped to be fed into placeholder
-    Returns:
+    Returns
+    -------
    reshaped data
    """
    if not isinstance(data, np.ndarray) and not isinstance(data, list):
@@ -230,13 +236,16 @@ def observation_placeholder(ob_space, batch_size=None, name='Ob'):
    """
    Create placeholder to feed observations into of the size appropriate to the observation space
-    Parameters:
+    Parameters
    ----------
-    ob_space: gym.Space     observation space
+    ob_space : gym.Space
-    batch_size: int         size of the batch to be fed into input. Can be left None in most cases.
+        observation space
-    name: str               name of the placeholder
+    batch_size : int
+        size of the batch to be fed into input. Can be left None in most cases.
-    Returns:
+    name : str
+        name of the placeholder
+    Returns
    -------
    tensorflow placeholder tensor
    """

--- a/src/sdk/pynni/nni/smac_tuner/convert_ss_to_scenario.py
+++ b/src/sdk/pynni/nni/smac_tuner/convert_ss_to_scenario.py
@@ -24,11 +24,14 @@ import numpy as np
 def get_json_content(file_path):
-    """Load json file content
+    """
+    Load json file content
    Parameters
    ----------
    file_path:
        path to the file
    Raises
    ------
    TypeError
@@ -43,7 +46,8 @@ def get_json_content(file_path):
 def generate_pcs(nni_search_space_content):
-    """Generate the Parameter Configuration Space (PCS) which defines the
+    """
+    Generate the Parameter Configuration Space (PCS) which defines the
    legal ranges of the parameters to be optimized and their default values.
    Generally, the format is:
    # parameter_name categorical {value_1, ..., value_N} [default value]
@@ -53,14 +57,17 @@ def generate_pcs(nni_search_space_content):
    # parameter_name real [min_value, max_value] [default value]
    # parameter_name real [min_value, max_value] [default value] log
    Reference: https://automl.github.io/SMAC3/stable/options.html
    Parameters
    ----------
    nni_search_space_content: search_space
        The search space in this experiment in nni
    Returns
    -------
    Parameter Configuration Space (PCS)
        the legal ranges of the parameters to be optimized and their default values
    Raises
    ------
    RuntimeError
@@ -122,7 +129,8 @@ def generate_pcs(nni_search_space_content):
 def generate_scenario(ss_content):
-    """Generate the scenario. The scenario-object (smac.scenario.scenario.Scenario) is used to configure SMAC and
+    """
+    Generate the scenario. The scenario-object (smac.scenario.scenario.Scenario) is used to configure SMAC and
    can be constructed either by providing an actual scenario-object, or by specifing the options in a scenario file.
    Reference: https://automl.github.io/SMAC3/stable/options.html
    The format of the scenario file is one option per line:
@@ -191,6 +199,7 @@ def generate_scenario(ss_content):
    wallclock_limit: int
        Maximum amount of wallclock-time used for optimization. Default: inf.
        Use default because this is controlled by nni
    Returns
    -------
    Scenario:

--- a/src/sdk/pynni/nni/smac_tuner/smac_tuner.py
+++ b/src/sdk/pynni/nni/smac_tuner/smac_tuner.py
--- a/src/sdk/pynni/nni/trial.py
+++ b/src/sdk/pynni/nni/trial.py
--- a/src/sdk/pynni/nni/tuner.py
+++ b/src/sdk/pynni/nni/tuner.py
@@ -76,10 +76,12 @@ class Tuner(Recoverable):
    Builtin tuners:
    :class:`~nni.hyperopt_tuner.hyperopt_tuner.HyperoptTuner`
    :class:`~nni.evolution_tuner.evolution_tuner.EvolutionTuner`
-    :class:`~nni.smac_tuner.smac_tuner.SMACTuner`
+    :class:`~nni.smac_tuner.SMACTuner`
-    :class:`~nni.gridsearch_tuner.gridsearch_tuner.GridSearchTuner`
+    :class:`~nni.gridsearch_tuner.GridSearchTuner`
    :class:`~nni.networkmorphism_tuner.networkmorphism_tuner.NetworkMorphismTuner`
    :class:`~nni.metis_tuner.mets_tuner.MetisTuner`
+    :class:`~nni.ppo_tuner.PPOTuner`
+    :class:`~nni.gp_tuner.gp_tuner.GPTuner`
    """
    def generate_parameters(self, parameter_id, **kwargs):

--- a/src/sdk/pynni/tests/test_compressor.py
+++ b/src/sdk/pynni/tests/test_compressor.py
@@ -114,7 +114,14 @@ class CompressorTestCase(TestCase):
    def test_torch_quantizer(self):
        model = TorchMnist()
-        torch_compressor.NaiveQuantizer(model, [{'op_types': ['default']}]).compress()
+        configure_list = [{
+            'quant_types': ['weight'],
+            'quant_bits': {
+                'weight': 8,
+            },
+            'op_types':['Conv2d', 'Linear']
+        }]
+        torch_compressor.NaiveQuantizer(model, configure_list).compress()
 if __name__ == '__main__':

--- a/src/webui/src/App.tsx
+++ b/src/webui/src/App.tsx
--- a/src/webui/src/components/Overview.tsx
+++ b/src/webui/src/components/Overview.tsx
--- a/test/pipelines-it-installation.yml
+++ b/test/pipelines-it-installation.yml
--- a/tools/nni_cmd/config_utils.py
+++ b/tools/nni_cmd/config_utils.py
--- a/tools/nni_cmd/constants.py
+++ b/tools/nni_cmd/constants.py
@@ -66,7 +66,7 @@ EXPERIMENT_INFORMATION_FORMAT = '-----------------------------------------------
                     '%s\n' \
                     '----------------------------------------------------------------------------------------\n'
-EXPERIMENT_DETAIL_FORMAT = 'Id: %s    Status: %s    Port: %s    Platform: %s    StartTime: %s    EndTime: %s    \n'
+EXPERIMENT_DETAIL_FORMAT = 'Id: %s    Name: %s    Status: %s    Port: %s    Platform: %s    StartTime: %s    EndTime: %s\n'
 EXPERIMENT_MONITOR_INFO = 'Id: %s    Status: %s    Port: %s    Platform: %s    \n' \
                          'StartTime: %s    Duration: %s'

--- a/tools/nni_cmd/launcher.py
+++ b/tools/nni_cmd/launcher.py
--- a/tools/nni_cmd/nnictl_utils.py
+++ b/tools/nni_cmd/nnictl_utils.py