"vscode:/vscode.git/clone" did not exist on "17fafc1b3026d910a83eb8052fdf811ba56be0b1"
Unverified Commit b52f7756 authored by liuzhe-lz's avatar liuzhe-lz Committed by GitHub
Browse files

HPO doc (#4579)

parent 88ffe908
......@@ -161,6 +161,58 @@ Tutorials
.. _sphx_glr_tutorials_hpo_quickstart_tensorflow:
.. raw:: html
<div class="sphx-glr-thumbcontainer" tooltip="The tutorial consists of 4 steps: ">
.. only:: html
.. figure:: /tutorials/hpo_quickstart_tensorflow/images/thumb/sphx_glr_main_thumb.png
:alt: NNI HPO Quickstart with TensorFlow
:ref:`sphx_glr_tutorials_hpo_quickstart_tensorflow_main.py`
.. raw:: html
</div>
.. toctree::
:hidden:
/tutorials/hpo_quickstart_tensorflow/main
.. raw:: html
<div class="sphx-glr-thumbcontainer" tooltip="It can be run directly and will have the exact same result as original version.">
.. only:: html
.. figure:: /tutorials/hpo_quickstart_tensorflow/images/thumb/sphx_glr_model_thumb.png
:alt: Port TensorFlow Quickstart to NNI
:ref:`sphx_glr_tutorials_hpo_quickstart_tensorflow_model.py`
.. raw:: html
</div>
.. toctree::
:hidden:
/tutorials/hpo_quickstart_tensorflow/model
.. raw:: html
<div class="sphx-glr-clear"></div>
.. only:: html
.. rst-class:: sphx-glr-signature
......
"""
NNI HPO Quickstart with TensorFlow
==================================
This tutorial optimizes the model in `official TensorFlow quickstart`_ with auto-tuning.
The tutorial consists of 4 steps:
1. Modify the model for auto-tuning.
2. Define hyperparameters' search space.
3. Configure the experiment.
4. Run the experiment.
.. _official TensorFlow quickstart: https://www.tensorflow.org/tutorials/quickstart/beginner
"""
# %%
# Step 1: Prepare the model
# -------------------------
# In first step, you need to prepare the model to be tuned.
#
# The model should be put in a separate script.
# It will be evaluated many times concurrently,
# and possibly will be trained on distributed platforms.
#
# In this tutorial, the model is defined in :doc:`model.py <model>`.
#
# Please understand the model code before continue to next step.
# %%
# Step 2: Define search space
# ---------------------------
# In model code, we have prepared 4 hyperparameters to be tuned:
# *dense_units*, *activation_type*, *dropout_rate*, and *learning_rate*.
#
# Here we need to define their *search space* so the tuning algorithm can sample them in desired range.
#
# Assuming we have following prior knowledge for these hyperparameters:
#
# 1. *dense_units* should be one of 64, 128, 256.
# 2. *activation_type* should be one of 'relu', 'tanh', 'swish', or None.
# 3. *dropout_rate* should be a float between 0.5 and 0.9.
# 4. *learning_rate* should be a float between 0.0001 and 0.1, and it follows exponential distribution.
#
# In NNI, the space of *dense_units* and *activation_type* is called ``choice``;
# the space of *dropout_rate* is called ``uniform``;
# and the space of *learning_rate* is called ``loguniform``.
# You may have noticed, these names are derived from ``numpy.random``.
#
# For full specification of search space, check :doc:`the reference </hpo/search_space>`.
#
# Now we can define the search space as follow:
search_space = {
'dense_units': {'_type': 'choice', '_value': [64, 128, 256]},
'activation_type': {'_type': 'choice', '_value': ['relu', 'tanh', 'swish', None]},
'dropout_rate': {'_type': 'uniform', '_value': [0.5, 0.9]},
'learning_rate': {'_type': 'loguniform', '_value': [0.0001, 0.1]},
}
# %%
# Step 3: Configure the experiment
# --------------------------------
# NNI uses an *experiment* to manage the HPO process.
# The *experiment config* defines how to train the models and how to explore the search space.
#
# In this tutorial we use a *local* mode experiment,
# which means models will be trained on local machine, without using any special training platform.
from nni.experiment import Experiment
experiment = Experiment('local')
# %%
# Now we start to configure the experiment.
#
# Firstly, specify the model code.
# In NNI evaluation of each hyperparameter set is called a *trial*.
# So the model script is called *trial code*.
#
# If you are using Linux system without Conda, you many need to change ``python`` to ``python3``.
#
# When ``trial_code_directory`` is a relative path, it relates to current working directory.
# To run ``main.py`` from a different path, you can set trial code directory to ``Path(__file__).parent``.
experiment.config.trial_command = 'python model.py'
experiment.config.trial_code_directory = '.'
# %%
# Then specify the search space we defined above:
experiment.config.search_space = search_space
# %%
# Choose a tuning algorithm.
# Here we use :doc:`TPE tuner </hpo/tuners>`.
experiment.config.tuner.name = 'TPE'
experiment.config.tuner.class_args['optimize_mode'] = 'maximize'
# %%
# Specify how many trials to run.
# Here we evaluate 10 sets of hyperparameters in total, and concurrently evaluate 4 sets at a time.
#
# Please note that ``max_trial_number`` here is merely for a quick example.
# With default config TPE tuner requires 20 trials to warm up.
# In real world max trial number is commonly set to 100+.
#
# You can also set ``max_experiment_duration = '1h'`` to limit running time.
#
# And alternatively, you can skip this part and set no limit at all.
# The experiment will run forever until you press Ctrl-C.
experiment.config.max_trial_number = 10
experiment.config.trial_concurrency = 4
# %%
# Step 4: Run the experiment
# --------------------------
# Now the experiment is ready. Choose a port and launch it.
#
# You can use the web portal to view experiment status: http://localhost:8080.
experiment.run(8080)
"""
Port TensorFlow Quickstart to NNI
=================================
This is a modified version of `TensorFlow quickstart`_.
It can be run directly and will have the exact same result as original version.
Furthermore, it enables the ability of auto-tuning with an NNI *experiment*, which will be discussed later.
For now, we recommend to run this script directly to verify the environment.
There are only 3 key differences from the original version:
1. In `Get optimized hyperparameters`_ part, it receives auto-generated hyperparameters.
2. In `(Optional) Report intermediate results`_ part, it reports per-epoch accuracy for visualization.
3. In `Report final result`_ part, it reports final accuracy for tuner to generate next hyperparameter set.
.. _TensorFlow quickstart: https://www.tensorflow.org/tutorials/quickstart/beginner
"""
# %%
import nni
import tensorflow as tf
# %%
# Hyperparameters to be tuned
# ---------------------------
params = {
'dense_units': 128,
'activation_type': 'relu',
'dropout_rate': 0.2,
'learning_rate': 0.001,
}
# %%
# Get optimized hyperparameters
# -----------------------------
# If run directly, ``nni.get_next_parameters()`` is a no-op and returns an empty dict.
# But with an NNI *experiment*, it will receive optimized hyperparameters from tuning algorithm.
optimized_params = nni.get_next_parameter()
params.update(optimized_params)
# %%
# Load dataset
# ------------
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
# %%
# Build model with hyperparameters
# --------------------------------
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(params['dense_units'], activation=params['activation_type']),
tf.keras.layers.Dropout(params['dropout_rate']),
tf.keras.layers.Dense(10)
])
adam = tf.keras.optimizers.Adam(learning_rate=params['learning_rate'])
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer=adam, loss=loss_fn, metrics=['accuracy'])
# %%
# (Optional) Report intermediate results
# --------------------------------------
# The callback reports per-epoch accuracy to show learning curve in NNI web portal.
# And in :doc:`/hpo/assessors`, you will see how to leverage the metrics for early stopping.
#
# You can safely skip this and the experiment will work fine.
callback = tf.keras.callbacks.LambdaCallback(
on_epoch_end = lambda epoch, logs: nni.report_intermediate_result(logs['accuracy'])
)
# %%
# Train and evluate the model
# ---------------------------
model.fit(x_train, y_train, epochs=5, verbose=2, callbacks=[callback])
loss, accuracy = model.evaluate(x_test, y_test, verbose=2)
# %%
# Report final result
# -------------------
# Report final accuracy to NNI so the tuning algorithm can predict best hyperparameters.
nni.report_final_result(accuracy)
......@@ -7,6 +7,8 @@ Naive random tuner for hyper-parameter optimization.
You can specify an integer seed to determine random result.
"""
from __future__ import annotations
__all__ = ['RandomTuner', 'suggest', 'suggest_parameter']
import logging
......@@ -21,7 +23,26 @@ from nni.tuner import Tuner
_logger = logging.getLogger('nni.tuner.random')
class RandomTuner(Tuner):
def __init__(self, seed=None):
"""
A naive tuner that generates fully random hyperparameters.
Examples
--------
.. code-block::
config.tuner.name = 'Random'
config.tuner.class_args = {
'seed': 100
}
Parameters
----------
seed
The random seed.
"""
def __init__(self, seed: int | None = None):
self.space = None
if seed is None: # explicitly generate a seed to make the experiment reproducible
seed = np.random.default_rng().integers(2 ** 31)
......
......@@ -10,18 +10,21 @@ Official code: https://github.com/hyperopt/hyperopt/blob/master/hyperopt/tpe.py
This is a slightly modified re-implementation of the algorithm.
"""
from __future__ import annotations
__all__ = ['TpeTuner', 'TpeArguments', 'suggest', 'suggest_parameter']
from collections import defaultdict
import logging
import math
from typing import NamedTuple, Optional, Union
from typing import Any, NamedTuple
import numpy as np
from scipy.special import erf # pylint: disable=no-name-in-module
from nni.tuner import Tuner
from nni.common.hpo_utils import OptimizeMode, format_search_space, deformat_parameters, format_parameters
from nni.tuner import Tuner
from nni.typehint import Literal
from nni.utils import extract_scalar_reward
from . import random_tuner
......@@ -32,11 +35,11 @@ _logger = logging.getLogger('nni.tuner.tpe')
class TpeArguments(NamedTuple):
"""
These are the hyper-parameters of TPE algorithm itself.
To avoid confusing with trials' hyper-parameters, they are called "arguments" in this code.
To avoid confusing with trials' hyper-parameters, they are called "arguments" in TPE source code.
Parameters
==========
constant_liar_type: 'best' | 'worst' | 'mean' | None (default: 'best')
----------
constant_liar_type
TPE algorithm itself does not support parallel tuning.
This parameter specifies how to optimize for trial_concurrency > 1.
......@@ -44,20 +47,21 @@ class TpeArguments(NamedTuple):
How each liar works is explained in paper's section 6.1.
In general "best" suit for small trial number and "worst" suit for large trial number.
(:doc:`experiment result </CommunitySharings/ParallelizingTpeSearch>`)
n_startup_jobs: int (default: 20)
n_startup_jobs
The first N hyper-parameters are generated fully randomly for warming up.
If the search space is large, you can increase this value.
Or if max_trial_number is small, you may want to decrease it.
n_ei_candidates: int (default: 24)
n_ei_candidates
For each iteration TPE samples EI for N sets of parameters and choose the best one. (loosely speaking)
linear_forgetting: int (default: 25)
linear_forgetting
TPE will lower the weights of old trials.
This controls how many iterations it takes for a trial to start decay.
prior_weight: float (default: 1.0)
prior_weight
TPE treats user provided search space as prior.
When generating new trials, it also incorporates the prior in trial history by transforming the search space to
one trial configuration (i.e., each parameter of this configuration chooses the mean of its candidate range).
......@@ -66,11 +70,11 @@ class TpeArguments(NamedTuple):
With prior weight 1.0, the search space is treated as one good trial.
For example, "normal(0, 1)" effectly equals to a trial with x = 0 which has yielded good result.
gamma: float (default: 0.25)
gamma
Controls how many trials are considered "good".
The number is calculated as "min(gamma * sqrt(N), linear_forgetting)".
"""
constant_liar_type: Optional[str] = 'best'
constant_liar_type: Literal['best', 'worst', 'mean'] | None = 'best'
n_startup_jobs: int = 20
n_ei_candidates: int = 24
linear_forgetting: int = 25
......@@ -79,18 +83,61 @@ class TpeArguments(NamedTuple):
class TpeTuner(Tuner):
"""
Tree-structured Parzen Estimator (TPE) is an SMBO tuner.
TPE models P(x|y) and P(y) where x represents hyperparameters and y the associated evaluation metric.
P(x|y) is modeled by transforming the generative process of hyperparameters,
replacing the distributions of the configuration prior with non-parametric densities.
TPE is described in detail in *Algorithms for Hyper-Parameter Optimization*. (`paper`_)
.. _paper: https://proceedings.neurips.cc/paper/2011/file/86e8f7ab32cfd12577bc2619bc635690-Paper.pdf
Examples
--------
.. code-block::
## minimal config ##
config.tuner.name = 'TPE'
config.tuner.class_args = {
'optimize_mode': 'minimize'
}
.. code-block::
## advanced config ##
config.tuner.name = 'TPE'
config.tuner.class_args = {
'optimize_mode': maximize,
'seed': 12345,
'tpe_args': {
'constant_liar_type': 'mean',
'n_startup_jobs': 10,
'n_ei_candidates': 20,
'linear_forgetting': 100,
'prior_weight': 0,
'gamma': 0.5
}
}
Parameters
==========
optimze_mode: 'minimize' | 'maximize' (default: 'minimize')
----------
optimze_mode
Whether optimize to minimize or maximize trial result.
seed: int | None
seed
The random seed.
tpe_args: dict[string, Any] | None
tpe_args
Advanced users can use this to customize TPE tuner.
See `TpeArguments` for details.
"""
def __init__(self, optimize_mode='minimize', seed=None, tpe_args=None):
def __init__(self,
optimize_mode: Literal['minimize', 'maximize'] = 'minimize',
seed: int | None = None,
tpe_args: dict[str, Any] | None = None):
self.optimize_mode = OptimizeMode(optimize_mode)
self.args = TpeArguments(**(tpe_args or {}))
self.space = None
......@@ -183,7 +230,7 @@ def suggest_parameter(args, rng, spec, parameter_history):
## Utilities part ##
class Record(NamedTuple):
param: Union[int, float]
param: int | float
loss: float
class BestLiar: # assume running parameters have best result, it accelerates "converging"
......@@ -305,7 +352,7 @@ def adaptive_parzen_normal(args, history_mus, prior_mu, prior_sigma):
this function is used for everything other than "choice" and "randint".
Parameters
==========
----------
args: TpeArguments
Algorithm arguments.
history_mus: 1-d array of float
......@@ -317,7 +364,7 @@ def adaptive_parzen_normal(args, history_mus, prior_mu, prior_sigma):
σ value of normal search space.
Returns
=======
-------
Tuple of three 1-d float arrays: (weight, µ, σ).
The tuple represents N+1 "vicinity of observations" and each one's weight,
......
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from packaging.version import Version
import torch
import torch.nn as nn
......@@ -8,7 +9,6 @@ from nni.retiarii.serializer import basic_unit
from .api import LayerChoice
from .utils import generate_new_label
from ...utils import version_larger_equal
__all__ = ['AutoActivation']
......@@ -99,7 +99,7 @@ class UnaryTanh(nn.Module):
def forward(self, x):
return torch.tanh(x)
if not version_larger_equal(torch.__version__, TorchVersion):
if not Version(torch.__version__) >= Version(TorchVersion):
@basic_unit
class UnaryAsinh(nn.Module):
def forward(self, x):
......@@ -110,7 +110,7 @@ class UnaryAtan(nn.Module):
def forward(self, x):
return torch.atan(x)
if not version_larger_equal(torch.__version__, TorchVersion):
if not Version(torch.__version__) >= Version(TorchVersion):
@basic_unit
class UnarySinc(nn.Module):
def forward(self, x):
......@@ -151,7 +151,7 @@ unary_modules = ['UnaryIdentity', 'UnaryNegative', 'UnaryAbs', 'UnarySquare', 'U
'UnarySinh', 'UnaryCosh', 'UnaryTanh', 'UnaryAtan', 'UnaryMax',
'UnaryMin', 'UnarySigmoid', 'UnaryLogExp', 'UnaryExpSquare', 'UnaryErf']
if not version_larger_equal(torch.__version__, TorchVersion):
if not Version(torch.__version__) >= Version(TorchVersion):
unary_modules.append('UnaryAsinh')
unary_modules.append('UnarySinc')
......
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from packaging.version import Version
import torch
import torch.nn as nn
from ...serializer import basic_unit
from ...utils import version_larger_equal
# NOTE: support pytorch version >= 1.5.0
......@@ -31,10 +31,10 @@ __all__ = [
'Flatten', 'Hardsigmoid'
]
if version_larger_equal(torch.__version__, '1.6.0'):
if Version(torch.__version__) >= Version('1.6.0'):
__all__.append('Hardswish')
if version_larger_equal(torch.__version__, '1.7.0'):
if Version(torch.__version__) >= Version('1.7.0'):
__all__.extend(['Unflatten', 'SiLU', 'TripletMarginWithDistanceLoss'])
......@@ -149,10 +149,10 @@ Transformer = basic_unit(nn.Transformer)
Flatten = basic_unit(nn.Flatten)
Hardsigmoid = basic_unit(nn.Hardsigmoid)
if version_larger_equal(torch.__version__, '1.6.0'):
if Version(torch.__version__) >= Version('1.6.0'):
Hardswish = basic_unit(nn.Hardswish)
if version_larger_equal(torch.__version__, '1.7.0'):
if Version(torch.__version__) >= Version('1.7.0'):
SiLU = basic_unit(nn.SiLU)
Unflatten = basic_unit(nn.Unflatten)
TripletMarginWithDistanceLoss = basic_unit(nn.TripletMarginWithDistanceLoss)
......@@ -18,13 +18,6 @@ def import_(target: str, allow_none: bool = False) -> Any:
return getattr(module, identifier)
def version_larger_equal(a: str, b: str) -> bool:
# TODO: refactor later
a = a.split('+')[0]
b = b.split('+')[0]
return tuple(map(int, a.split('.'))) >= tuple(map(int, b.split('.')))
_last_uid = defaultdict(int)
_DEFAULT_MODEL_NAMESPACE = 'model'
......
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import sys
import typing
if typing.TYPE_CHECKING or sys.version_info >= (3, 8):
Literal = typing.Literal
else:
Literal = typing.Any
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment