[Doc] Tuners: DNGO, PBT, PPO (#4589)

e9fc8f07 · Yuge Zhang · GitHub · 297a1e2e · e9fc8f07 · e9fc8f07
Unverified Commit e9fc8f07 authored Mar 19, 2022 by Yuge Zhang Committed by GitHub Mar 19, 2022
3 changed files
--- a/nni/algorithms/hpo/dngo_tuner.py
+++ b/nni/algorithms/hpo/dngo_tuner.py
@@ -44,7 +44,20 @@ def _random_config(search_space, random_state):
 class DNGOTuner(Tuner):
+    """
+    Use neural networks as an alternative to GPs to model distributions over functions in bayesian optimization.
+    Parameters
+    ----------
+    optimize : maximize | minimize, default = maximize
+        If 'maximize', the tuner will target to maximize metrics. If 'minimize', the tuner will target to minimize metrics.
+    sample_size : int, default = 1000
+        Number of samples to select in each iteration. The best one will be picked from the samples as the next trial.
+    trials_per_update : int, default = 20
+        Number of trials to collect before updating the model.
+    num_epochs_per_training : int, default = 500
+        Number of epochs to train DNGO model.
+    """
    def __init__(self, optimize_mode='maximize', sample_size=1000, trials_per_update=20, num_epochs_per_training=500):
        self.searchspace_json = None
        self.random_state = None

--- a/nni/algorithms/hpo/pbt_tuner.py
+++ b/nni/algorithms/hpo/pbt_tuner.py
@@ -170,26 +170,91 @@ class PBTClassArgsValidator(ClassArgsValidator):
        }).validate(kwargs)
 class PBTTuner(Tuner):
+    """
+    Population Based Training (PBT) comes from `Population Based Training of Neural Networks <https://arxiv.org/abs/1711.09846v1>`__.
+    It's a simple asynchronous optimization algorithm which effectively utilizes a fixed computational budget to jointly optimize
+    a population of models and their hyperparameters to maximize performance.
+    Importantly, PBT discovers a schedule of hyperparameter settings rather than following the generally sub-optimal strategy of
+    trying to find a single fixed set to use for the whole course of training.
+    .. image:: ../../img/pbt.jpg
+    PBTTuner initializes a population with several trials (i.e., ``population_size``).
+    There are four steps in the above figure, each trial only runs by one step. How long is one step is controlled by trial code,
+    e.g., one epoch. When a trial starts, it loads a checkpoint specified by PBTTuner and continues to run one step,
+    then saves checkpoint to a directory specified by PBTTuner and exits.
+    The trials in a population run steps synchronously, that is, after all the trials finish the ``i``-th step,
+    the ``(i+1)``-th step can be started. Exploitation and exploration of PBT are executed between two consecutive steps.
+    Two important steps to follow if you are trying to use PBTTuner:
+    1. **Provide checkpoint directory**. Since some trials need to load other trial's checkpoint,
+       users should provide a directory (i.e., ``all_checkpoint_dir``) which is accessible by every trial.
+       It is easy for local mode, users could directly use the default directory or specify any directory on the local machine.
+       For other training services, users should follow :doc:`the document of those training services <../experiment/training_service>`
+       to provide a directory in a shared storage, such as NFS, Azure storage.
+    2. **Modify your trial code**. Before running a step, a trial needs to load a checkpoint,
+       the checkpoint directory is specified in hyper-parameter configuration generated by PBTTuner,
+       i.e., ``params['load_checkpoint_dir']``. Similarly, the directory for saving checkpoint is also included in the configuration,
+       i.e., ``params['save_checkpoint_dir']``. Here, ``all_checkpoint_dir`` is base folder of ``load_checkpoint_dir``
+       and ``save_checkpoint_dir`` whose format is ``all_checkpoint_dir/<population-id>/<step>``.
+       .. code-block:: python
+        params = nni.get_next_parameter()
+        # the path of the checkpoint to load
+        load_path = os.path.join(params['load_checkpoint_dir'], 'model.pth')
+        # load checkpoint from `load_path`
+        ...
+        # run one step
+        ...
+        # the path for saving a checkpoint
+        save_path = os.path.join(params['save_checkpoint_dir'], 'model.pth')
+        # save checkpoint to `save_path`
+        ...
+    The complete example code can be found :githublink:`here <examples/trials/mnist-pbt-tuner-pytorch>`.
+    Parameters
+    ----------
+    optimize_mode : ``maximize`` or ``minimize``, default: ``maximize``
+        If ``maximize``, the tuner will target to maximize metrics. If ``minimize``, the tuner will target to minimize metrics.
+    all_checkpoint_dir : str
+        Directory for trials to load and save checkpoint.
+        If not specified, the directory would be ``~/nni/checkpoint/``.
+        Note that if the experiment is not local mode,
+        users should provide a path in a shared storage which can be accessed by all the trials.
+    population_size : int, default = 10
+        Number of trials in a population. Each step has this number of trials.
+        In our implementation, one step is running each trial by specific training epochs set by users.
+    factor : float, default = (1.2, 0.8)
+        Factors for perturbation of hyperparameters.
+    resample_probability : float, default = 0.25
+        Probability for resampling.
+    fraction : float, default = 0.2
+        Fraction for selecting bottom and top trials.
+    Examples
+    --------
+    Below is an example of PBTTuner configuration in experiment config file.
+    .. code-block:: yaml
+        tuner:
+          name: PBTTuner
+          classArgs:
+            optimize_mode: maximize
+            all_checkpoint_dir: /the/path/to/store/checkpoints
+            population_size: 10
+    Notes
+    -----
+    Assessor is not allowed if PBTTuner is used.
+    """
    def __init__(self, optimize_mode="maximize", all_checkpoint_dir=None, population_size=10, factor=0.2,
                 resample_probability=0.25, fraction=0.2):
-        """
-        Initialization
-        Parameters
-        ----------
-        optimize_mode : str
-            maximize or minimize
-        all_checkpoint_dir : str
-            directory to store training model checkpoint
-        population_size : int
-            number of trials for each epoch
-        factor : float
-            factor for perturbation
-        resample_probability : float
-            probability for resampling
-        fraction : float
-            fraction for selecting bottom and top trials
-        """
        self.optimize_mode = OptimizeMode(optimize_mode)
        if all_checkpoint_dir is None:
            all_checkpoint_dir = os.getenv('NNI_CHECKPOINT_DIRECTORY')

--- a/nni/algorithms/hpo/ppo_tuner/ppo_tuner.py
+++ b/nni/algorithms/hpo/ppo_tuner/ppo_tuner.py
@@ -306,40 +306,37 @@ class PPOClassArgsValidator(ClassArgsValidator):
 class PPOTuner(Tuner):
    """
    PPOTuner, the implementation inherits the main logic of the implementation
-    [ppo2 from openai](https://github.com/openai/baselines/tree/master/baselines/ppo2), and is adapted for NAS scenario.
+    `ppo2 from openai <https://github.com/openai/baselines/tree/master/baselines/ppo2>`__ and is adapted for NAS scenario.
    It uses ``lstm`` for its policy network and value network, policy and value share the same network.
+    Parameters
+    ----------
+    optimize_mode : str
+        maximize or minimize
+    trials_per_update : int
+        Number of trials to have for each model update
+    epochs_per_update : int
+        Number of epochs to run for each model update
+    minibatch_size : int
+        Minibatch size (number of trials) for the update
+    ent_coef : float
+        Policy entropy coefficient in the optimization objective
+    lr : float
+        Learning rate of the model (lstm network), constant
+    vf_coef : float
+        Value function loss coefficient in the optimization objective
+    max_grad_norm : float
+        Gradient norm clipping coefficient
+    gamma : float
+        Discounting factor
+    lam : float
+        Advantage estimation discounting factor (lambda in the paper)
+    cliprange : float
+        Cliprange in the PPO algorithm, constant
    """
    def __init__(self, optimize_mode, trials_per_update=20, epochs_per_update=4, minibatch_size=4,
                 ent_coef=0.0, lr=3e-4, vf_coef=0.5, max_grad_norm=0.5, gamma=0.99, lam=0.95, cliprange=0.2):
-        """
-        Initialization, PPO model is not initialized here as search space is not received yet.
-        Parameters
-        ----------
-        optimize_mode : str
-            maximize or minimize
-        trials_per_update : int
-            Number of trials to have for each model update
-        epochs_per_update : int
-            Number of epochs to run for each model update
-        minibatch_size : int
-            Minibatch size (number of trials) for the update
-        ent_coef : float
-            Policy entropy coefficient in the optimization objective
-        lr : float
-            Learning rate of the model (lstm network), constant
-        vf_coef : float
-            Value function loss coefficient in the optimization objective
-        max_grad_norm : float
-            Gradient norm clipping coefficient
-        gamma : float
-            Discounting factor
-        lam : float
-            Advantage estimation discounting factor (lambda in the paper)
-        cliprange : float
-            Cliprange in the PPO algorithm, constant
-        """
        self.optimize_mode = OptimizeMode(optimize_mode)
        self.model_config = ModelConfig()
        self.model = None