Fix HPO doc fixme (#4661)

fbee0df1 · liuzhe-lz · GitHub · f886ae5d · fbee0df1 · fbee0df1
Unverified Commit fbee0df1 authored Mar 21, 2022 by liuzhe-lz Committed by GitHub Mar 21, 2022
14 changed files
--- a/docs/source/tutorials/index.rst
+++ b/docs/source/tutorials/index.rst
@@ -210,7 +210,7 @@ Tutorials
 .. raw:: html
-    <div class="sphx-glr-thumbcontainer" tooltip="The tutorial consists of 4 steps: ">
+    <div class="sphx-glr-thumbcontainer" tooltip="There is also a TensorFlow version&lt;../hpo_quickstart_tensorflow/main&gt; if you prefer it.">
 .. only:: html

--- a/examples/tutorials/hpo_quickstart_pytorch/main.py
+++ b/examples/tutorials/hpo_quickstart_pytorch/main.py
@@ -3,12 +3,14 @@ NNI HPO Quickstart with PyTorch
 ===============================
 This tutorial optimizes the model in `official PyTorch quickstart`_ with auto-tuning.
+There is also a :doc:`TensorFlow version<../hpo_quickstart_tensorflow/main>` if you prefer it.
 The tutorial consists of 4 steps: 
- 1. Modify the model for auto-tuning.
+1. Modify the model for auto-tuning.
- 2. Define hyperparameters' search space.
+2. Define hyperparameters' search space.
- 3. Configure the experiment.
+3. Configure the experiment.
- 4. Run the experiment.
+4. Run the experiment.
 .. _official PyTorch quickstart: https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html
 """
@@ -16,7 +18,7 @@ The tutorial consists of 4 steps:
 # %%
 # Step 1: Prepare the model
 # -------------------------
-# In first step, you need to prepare the model to be tuned.
+# In first step, we need to prepare the model to be tuned.
 #
 # The model should be put in a separate script.
 # It will be evaluated many times concurrently,
@@ -24,6 +26,12 @@ The tutorial consists of 4 steps:
 #
 # In this tutorial, the model is defined in :doc:`model.py <model>`.
 #
+# In short, it is a PyTorch model with 3 additional API calls:
+#
+# 1. Use :func:`nni.get_next_parameter` to fetch the hyperparameters to be evalutated.
+# 2. Use :func:`nni.report_intermediate_result` to report per-epoch accuracy metrics.
+# 3. Use :func:`nni.report_final_result` to report final accuracy.
+#
 # Please understand the model code before continue to next step.
 # %%
@@ -36,9 +44,9 @@ The tutorial consists of 4 steps:
 #
 # Assuming we have following prior knowledge for these hyperparameters:
 #
-#  1. *features* should be one of 128, 256, 512, 1024.
+# 1. *features* should be one of 128, 256, 512, 1024.
-#  2. *lr* should be a float between 0.0001 and 0.1, and it follows exponential distribution.
+# 2. *lr* should be a float between 0.0001 and 0.1, and it follows exponential distribution.
-#  3. *momentum* should be a float between 0 and 1.
+# 3. *momentum* should be a float between 0 and 1.
 #
 # In NNI, the space of *features* is called ``choice``;
 # the space of *lr* is called ``loguniform``;
@@ -69,46 +77,81 @@ experiment = Experiment('local')
 # %%
 # Now we start to configure the experiment.
 #
-# Firstly, specify the model code.
+# Configure trial code
+# ^^^^^^^^^^^^^^^^^^^^
 # In NNI evaluation of each hyperparameter set is called a *trial*.
 # So the model script is called *trial code*.
-#
-# If you are using Linux system without Conda, you many need to change ``python`` to ``python3``.
-#
-# When ``trial_code_directory`` is a relative path, it relates to current working directory.
-# To run ``main.py`` from a different path, you can set trial code directory to ``Path(__file__).parent``.
 experiment.config.trial_command = 'python model.py'
 experiment.config.trial_code_directory = '.'
+# %%
+# When ``trial_code_directory`` is a relative path, it relates to current working directory.
+# To run ``main.py`` in a different path, you can set trial code directory to ``Path(__file__).parent``.
+# (`__file__ <https://docs.python.org/3.10/reference/datamodel.html#index-43>`__
+# is only available in standard Python, not in Jupyter Notebook.)
+#
+# .. attention::
+#
+#     If you are using Linux system without Conda,
+#     you may need to change ``"python model.py"`` to ``"python3 model.py"``.
 # %%
-# Then specify the search space we defined above:
+# Configure search space
+# ^^^^^^^^^^^^^^^^^^^^^^
 experiment.config.search_space = search_space
 # %%
-# Choose a tuning algorithm.
+# Configure tuning algorithm
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^
 # Here we use :doc:`TPE tuner </hpo/tuners>`.
 experiment.config.tuner.name = 'TPE'
 experiment.config.tuner.class_args['optimize_mode'] = 'maximize'
 # %%
-# Specify how many trials to run.
+# Configure how many trials to run
-# Here we evaluate 10 sets of hyperparameters in total, and concurrently evaluate 4 sets at a time.
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# Here we evaluate 10 sets of hyperparameters in total, and concurrently evaluate 2 sets at a time.
+experiment.config.max_trial_number = 10
+experiment.config.trial_concurrency = 2
+# %%
+# .. note::
 #
-# Please note that ``max_trial_number`` here is merely for a quick example.
+#     ``max_trial_number`` is set to 10 here for a fast example.
-# With default config TPE tuner requires 20 trials to warm up.
+#     In real world it should be set to a larger number.
-# In real world max trial number is commonly set to 100+.
+#     With default config TPE tuner requires 20 trials to warm up.
 #
-# You can also set ``max_experiment_duration = '1h'`` to limit running time.
+# You may also set ``max_experiment_duration = '1h'`` to limit running time.
 #
-# And alternatively, you can skip this part and set no limit at all.
+# If neither ``max_trial_number`` nor ``max_experiment_duration`` are set,
-# The experiment will run forever until you press Ctrl-C.
+# the experiment will run forever until you press Ctrl-C.
-experiment.config.max_trial_number = 10
-experiment.config.trial_concurrency = 4
 # %%
 # Step 4: Run the experiment
 # --------------------------
-# Now the experiment is ready. Choose a port and launch it.
+# Now the experiment is ready. Choose a port and launch it. (Here we use port 8080.)
 #
 # You can use the web portal to view experiment status: http://localhost:8080.
 experiment.run(8080)
+# %%
+# After the experiment is done
+# ----------------------------
+# Everything is done and it is safe to exit now. The following are optional.
+#
+# If you are using standard Python instead of Jupyter Notebook,
+# you can add ``input()`` or ``signal.pause()`` to prevent Python from exiting,
+# allowing you to view the web portal after the experiment is done.
+# input('Press enter to quit')
+experiment.stop()
+# %%
+# :meth:`nni.experiment.Experiment.stop` is automatically invoked when Python exits,
+# so it can be omitted in your code.
+#
+# After the experiment is stopped, you can run :meth:`nni.experiment.Experiment.view` to restart web portal.
+#
+# .. tip::
+#
+#     This example uses :doc:`Python API </reference/experiment>` to create experiment.
+#
+#     You can also create and manage experiments with :doc:`command line tool </reference/nnictl>`.
--- a/examples/tutorials/hpo_quickstart_pytorch/model.py
+++ b/examples/tutorials/hpo_quickstart_pytorch/model.py
@@ -5,14 +5,14 @@ This is a modified version of `PyTorch quickstart`_.
 It can be run directly and will have the exact same result as original version.
-Furthermore, it enables the ability of auto-tuning with an NNI *experiment*, which will be discussed later.
+Furthermore, it enables the ability of auto tuning with an NNI *experiment*, which will be detailed later.
-For now, we recommend to run this script directly to verify the environment.
+It is recommended to run this script directly first to verify the environment.
-There are only 2 key differences from the original version:
+There are 2 key differences from the original version:
-1. In `Get optimized hyperparameters`_ part, it receives auto-generated hyperparameters.
+1. In `Get optimized hyperparameters`_ part, it receives generated hyperparameters.
-2. In `Train the model and report accuracy`_ part, it reports accuracy metrics for tuner to generate next hyperparameter set.
+2. In `Train model and report accuracy`_ part, it reports accuracy metrics to NNI.
 .. _PyTorch quickstart: https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html
 """
@@ -28,6 +28,7 @@ from torchvision.transforms import ToTensor
 # %%
 # Hyperparameters to be tuned
 # ---------------------------
+# These are the hyperparameters that will be tuned.
 params = {
    'features': 512,
    'lr': 0.001,
@@ -37,7 +38,7 @@ params = {
 # %%
 # Get optimized hyperparameters
 # -----------------------------
-# If run directly, ``nni.get_next_parameters()`` is a no-op and returns an empty dict.
+# If run directly, :func:`nni.get_next_parameter` is a no-op and returns an empty dict.
 # But with an NNI *experiment*, it will receive optimized hyperparameters from tuning algorithm.
 optimized_params = nni.get_next_parameter()
 params.update(optimized_params)
@@ -83,8 +84,8 @@ loss_fn = nn.CrossEntropyLoss()
 optimizer = torch.optim.SGD(model.parameters(), lr=params['lr'], momentum=params['momentum'])
 # %%
-# Define train() and test()
+# Define train and test
-# -------------------------
+# ---------------------
 def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
@@ -112,9 +113,9 @@ def test(dataloader, model, loss_fn):
    return correct
 # %%
-# Train the model and report accuracy
+# Train model and report accuracy
-# -----------------------------------
+# -------------------------------
-# Report accuracy to NNI so the tuning algorithm can predict best hyperparameters.
+# Report accuracy metrics to NNI so the tuning algorithm can suggest better hyperparameters.
 epochs = 5
 for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")

--- a/examples/tutorials/hpo_quickstart_tensorflow/main.py
+++ b/examples/tutorials/hpo_quickstart_tensorflow/main.py
@@ -5,10 +5,10 @@ This tutorial optimizes the model in `official TensorFlow quickstart`_ with auto
 The tutorial consists of 4 steps: 
- 1. Modify the model for auto-tuning.
+1. Modify the model for auto-tuning.
- 2. Define hyperparameters' search space.
+2. Define hyperparameters' search space.
- 3. Configure the experiment.
+3. Configure the experiment.
- 4. Run the experiment.
+4. Run the experiment.
 .. _official TensorFlow quickstart: https://www.tensorflow.org/tutorials/quickstart/beginner
 """
@@ -16,7 +16,7 @@ The tutorial consists of 4 steps:
 # %%
 # Step 1: Prepare the model
 # -------------------------
-# In first step, you need to prepare the model to be tuned.
+# In first step, we need to prepare the model to be tuned.
 #
 # The model should be put in a separate script.
 # It will be evaluated many times concurrently,
@@ -24,6 +24,12 @@ The tutorial consists of 4 steps:
 #
 # In this tutorial, the model is defined in :doc:`model.py <model>`.
 #
+# In short, it is a TensorFlow model with 3 additional API calls:
+#
+# 1. Use :func:`nni.get_next_parameter` to fetch the hyperparameters to be evalutated.
+# 2. Use :func:`nni.report_intermediate_result` to report per-epoch accuracy metrics.
+# 3. Use :func:`nni.report_final_result` to report final accuracy.
+#
 # Please understand the model code before continue to next step.
 # %%
@@ -36,10 +42,10 @@ The tutorial consists of 4 steps:
 #
 # Assuming we have following prior knowledge for these hyperparameters:
 #
-#  1. *dense_units* should be one of 64, 128, 256.
+# 1. *dense_units* should be one of 64, 128, 256.
-#  2. *activation_type* should be one of 'relu', 'tanh', 'swish', or None.
+# 2. *activation_type* should be one of 'relu', 'tanh', 'swish', or None.
-#  3. *dropout_rate* should be a float between 0.5 and 0.9.
+# 3. *dropout_rate* should be a float between 0.5 and 0.9.
-#  4. *learning_rate* should be a float between 0.0001 and 0.1, and it follows exponential distribution.
+# 4. *learning_rate* should be a float between 0.0001 and 0.1, and it follows exponential distribution.
 #
 # In NNI, the space of *dense_units* and *activation_type* is called ``choice``;
 # the space of *dropout_rate* is called ``uniform``;
@@ -71,46 +77,81 @@ experiment = Experiment('local')
 # %%
 # Now we start to configure the experiment.
 #
-# Firstly, specify the model code.
+# Configure trial code
+# ^^^^^^^^^^^^^^^^^^^^
 # In NNI evaluation of each hyperparameter set is called a *trial*.
 # So the model script is called *trial code*.
-#
-# If you are using Linux system without Conda, you many need to change ``python`` to ``python3``.
-#
-# When ``trial_code_directory`` is a relative path, it relates to current working directory.
-# To run ``main.py`` from a different path, you can set trial code directory to ``Path(__file__).parent``.
 experiment.config.trial_command = 'python model.py'
 experiment.config.trial_code_directory = '.'
+# %%
+# When ``trial_code_directory`` is a relative path, it relates to current working directory.
+# To run ``main.py`` in a different path, you can set trial code directory to ``Path(__file__).parent``.
+# (`__file__ <https://docs.python.org/3.10/reference/datamodel.html#index-43>`__
+# is only available in standard Python, not in Jupyter Notebook.)
+#
+# .. attention::
+#
+#     If you are using Linux system without Conda,
+#     you may need to change ``"python model.py"`` to ``"python3 model.py"``.
 # %%
-# Then specify the search space we defined above:
+# Configure search space
+# ^^^^^^^^^^^^^^^^^^^^^^
 experiment.config.search_space = search_space
 # %%
-# Choose a tuning algorithm.
+# Configure tuning algorithm
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^
 # Here we use :doc:`TPE tuner </hpo/tuners>`.
 experiment.config.tuner.name = 'TPE'
 experiment.config.tuner.class_args['optimize_mode'] = 'maximize'
 # %%
-# Specify how many trials to run.
+# Configure how many trials to run
-# Here we evaluate 10 sets of hyperparameters in total, and concurrently evaluate 4 sets at a time.
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# Here we evaluate 10 sets of hyperparameters in total, and concurrently evaluate 2 sets at a time.
+experiment.config.max_trial_number = 10
+experiment.config.trial_concurrency = 2
+# %%
+# .. note::
 #
-# Please note that ``max_trial_number`` here is merely for a quick example.
+#     ``max_trial_number`` is set to 10 here for a fast example.
-# With default config TPE tuner requires 20 trials to warm up.
+#     In real world it should be set to a larger number.
-# In real world max trial number is commonly set to 100+.
+#     With default config TPE tuner requires 20 trials to warm up.
 #
-# You can also set ``max_experiment_duration = '1h'`` to limit running time.
+# You may also set ``max_experiment_duration = '1h'`` to limit running time.
 #
-# And alternatively, you can skip this part and set no limit at all.
+# If neither ``max_trial_number`` nor ``max_experiment_duration`` are set,
-# The experiment will run forever until you press Ctrl-C.
+# the experiment will run forever until you press Ctrl-C.
-experiment.config.max_trial_number = 10
-experiment.config.trial_concurrency = 4
 # %%
 # Step 4: Run the experiment
 # --------------------------
-# Now the experiment is ready. Choose a port and launch it.
+# Now the experiment is ready. Choose a port and launch it. (Here we use port 8080.)
 #
 # You can use the web portal to view experiment status: http://localhost:8080.
 experiment.run(8080)
+# %%
+# After the experiment is done
+# ----------------------------
+# Everything is done and it is safe to exit now. The following are optional.
+#
+# If you are using standard Python instead of Jupyter Notebook,
+# you can add ``input()`` or ``signal.pause()`` to prevent Python from exiting,
+# allowing you to view the web portal after the experiment is done.
+# input('Press enter to quit')
+experiment.stop()
+# %%
+# :meth:`nni.experiment.Experiment.stop` is automatically invoked when Python exits,
+# so it can be omitted in your code.
+#
+# After the experiment is stopped, you can run :meth:`nni.experiment.Experiment.view` to restart web portal.
+#
+# .. tip::
+#
+#     This example uses :doc:`Python API </reference/experiment>` to create experiment.
+#
+#     You can also create and manage experiments with :doc:`command line tool </reference/nnictl>`.
--- a/examples/tutorials/hpo_quickstart_tensorflow/model.py
+++ b/examples/tutorials/hpo_quickstart_tensorflow/model.py
@@ -5,15 +5,15 @@ This is a modified version of `TensorFlow quickstart`_.
 It can be run directly and will have the exact same result as original version.
-Furthermore, it enables the ability of auto-tuning with an NNI *experiment*, which will be discussed later.
+Furthermore, it enables the ability of auto tuning with an NNI *experiment*, which will be detailed later.
-For now, we recommend to run this script directly to verify the environment.
+It is recommended to run this script directly first to verify the environment.
-There are only 3 key differences from the original version:
+There are 3 key differences from the original version:
- 1. In `Get optimized hyperparameters`_ part, it receives auto-generated hyperparameters.
+1. In `Get optimized hyperparameters`_ part, it receives generated hyperparameters.
- 2. In `(Optional) Report intermediate results`_ part, it reports per-epoch accuracy for visualization.
+2. In `(Optional) Report intermediate results`_ part, it reports per-epoch accuracy metrics.
- 3. In `Report final result`_ part, it reports final accuracy for tuner to generate next hyperparameter set.
+3. In `Report final result`_ part, it reports final accuracy.
 .. _TensorFlow quickstart: https://www.tensorflow.org/tutorials/quickstart/beginner
 """
@@ -25,6 +25,7 @@ import tensorflow as tf
 # %%
 # Hyperparameters to be tuned
 # ---------------------------
+# These are the hyperparameters that will be tuned later.
 params = {
    'dense_units': 128,
    'activation_type': 'relu',
@@ -35,10 +36,11 @@ params = {
 # %%
 # Get optimized hyperparameters
 # -----------------------------
-# If run directly, ``nni.get_next_parameters()`` is a no-op and returns an empty dict.
+# If run directly, :func:`nni.get_next_parameter` is a no-op and returns an empty dict.
 # But with an NNI *experiment*, it will receive optimized hyperparameters from tuning algorithm.
 optimized_params = nni.get_next_parameter()
 params.update(optimized_params)
+print(params)
 # %%
 # Load dataset
@@ -59,18 +61,16 @@ model = tf.keras.models.Sequential([
 ])
 adam = tf.keras.optimizers.Adam(learning_rate=params['learning_rate'])
 loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
 model.compile(optimizer=adam, loss=loss_fn, metrics=['accuracy'])
 # %%
 # (Optional) Report intermediate results
 # --------------------------------------
-# The callback reports per-epoch accuracy to show learning curve in NNI web portal.
+# The callback reports per-epoch accuracy to show learning curve in the web portal.
-# And in :doc:`/hpo/assessors`, you will see how to leverage the metrics for early stopping.
+# You can also leverage the metrics for early stopping with :doc:`NNI assessors </hpo/assessors>`.
 #
-# You can safely skip this and the experiment will work fine.
+# This part can be safely skipped and the experiment will work fine.
 callback = tf.keras.callbacks.LambdaCallback(
    on_epoch_end = lambda epoch, logs: nni.report_intermediate_result(logs['accuracy'])
 )
@@ -84,5 +84,5 @@ loss, accuracy = model.evaluate(x_test, y_test, verbose=2)
 # %%
 # Report final result
 # -------------------
-# Report final accuracy to NNI so the tuning algorithm can predict best hyperparameters.
+# Report final accuracy to NNI so the tuning algorithm can suggest better hyperparameters.
 nni.report_final_result(accuracy)
--- a/nni/__init__.py
+++ b/nni/__init__.py
@@ -10,6 +10,7 @@ from .runtime.log import init_logger
 init_logger()
 from .common.serializer import trace, dump, load
+from .experiment import Experiment
 from .runtime.env_vars import dispatcher_env_vars
 from .utils import ClassArgsValidator

--- a/nni/algorithms/hpo/curvefitting_assessor/curvefitting_assessor.py
+++ b/nni/algorithms/hpo/curvefitting_assessor/curvefitting_assessor.py
@@ -25,11 +25,15 @@ class CurvefittingAssessor(Assessor):
    """
    CurvefittingAssessor uses learning curve fitting algorithm to predict the learning curve performance in the future.
-    The intermediate result **must** be accuracy.
+    The intermediate result **must** be accuracy. Curve fitting does not support minimizing loss.
+    Curve fitting assessor is an LPA (learning, predicting, assessing) algorithm.
    It stops a pending trial X at step S if the trial's forecast result at target step is convergence and lower than the
    best performance in the history.
+    Paper: `Speeding up Automatic Hyperparameter Optimization of Deep Neural Networks by Extrapolation of Learning Curves
+    <https://ml.informatik.uni-freiburg.de/wp-content/uploads/papers/15-IJCAI-Extrapolation_of_Learning_Curves.pdf>`__
    Examples
    --------
@@ -46,12 +50,24 @@ class CurvefittingAssessor(Assessor):
    Parameters
    ----------
    epoch_num : int
-        The total number of epoch
+        The total number of epochs.
+        We need to know the number of epochs to determine which points we need to predict.
    start_step : int
-        only after receiving start_step number of reported intermediate results
+        A trial is determined to be stopped or not only after receiving start_step number of intermediate results.
    threshold : float
-        The threshold that we decide to early stop the worse performance curve.
+        The threshold that we use to decide to early stop the worst performance curve.
+        For example: if threshold = 0.95, and the best performance in the history is 0.9,
+        then we will stop the trial who's predicted value is lower than 0.95 * 0.9 = 0.855.
    gap : int
+        The gap interval between assessor judgements.
+        For example: if gap = 2, start_step = 6,
+        then we will assess the result when we get 6, 8, 10, 12, ... intermediate results.
    """
    def __init__(self, epoch_num=20, start_step=6, threshold=0.95, gap=1):

--- a/nni/algorithms/hpo/gridsearch_tuner.py
+++ b/nni/algorithms/hpo/gridsearch_tuner.py
@@ -2,14 +2,10 @@
 # Licensed under the MIT license.
 """
-Grid search tuner for hyper-parameter optimization.
+Grid search tuner.
 For categorical parameters this tuner fully explore all combinations.
 For numerical parameters it samples them at progressively decreased intervals.
-Use this tuner if you have abundant resource and want to find strictly optimal parameters.
-Grid search tuner has no argument.
 """
 __all__ = ['GridSearchTuner']
@@ -64,16 +60,22 @@ _logger = logging.getLogger('nni.tuner.gridsearch')
 class GridSearchTuner(Tuner):
    """
+    Grid search tuner divides search space into evenly spaced grid, and performs brute-force traverse.
+    Recommended when the search space is small, or if you want to find strictly optimal hyperparameters.
+    **Implementation**
    The original grid search approach performs an exhaustive search through a space consists of ``choice`` and ``randint``.
-    This implementation extends grid search to support all NNI search spaces.
+    NNI's implementation extends grid search to support all search spaces types.
    When the search space contains continuous parameters like ``normal`` and ``loguniform``,
    grid search tuner works in following steps:
    1. Divide the search space into a grid.
-    2. Perform an exhaustive searth throught the grid.
+    2. Perform an exhaustive searth through the grid.
-    3. Subdivide the grid into a finer-grained one.
+    3. Subdivide the grid into a finer-grained new grid.
    4. Goto step 2, until experiment end.
    As a deterministic algorithm, grid search has no argument.

--- a/nni/algorithms/hpo/medianstop_assessor.py
+++ b/nni/algorithms/hpo/medianstop_assessor.py
@@ -26,8 +26,8 @@ class MedianstopAssessor(Assessor):
    if the trial’s best objective value by step S is strictly worse than the median value
    of the running averages of all completed trials’ objectives reported up to step S
-    The algorithm is mentioned in *Google Vizer: A Service for Black-Box Optimization*.
+    Paper: `Google Vizer: A Service for Black-Box Optimization
-    (`paper <https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/46180.pdf>`__)
+    <https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/46180.pdf>`__
    Examples
    --------
@@ -36,7 +36,8 @@ class MedianstopAssessor(Assessor):
        config.assessor.name = 'Medianstop'
        config.tuner.class_args = {
-            'optimize_mode': 'maximize'
+            'optimize_mode': 'maximize',
+            'start_step': 5
        }
    Parameters
@@ -44,7 +45,8 @@ class MedianstopAssessor(Assessor):
    optimize_mode
        Whether optimize to minimize or maximize trial result.
    start_step
-        Only after receiving start_step number of reported intermediate results.
+        A trial is determined to be stopped or not
+        only after receiving start_step number of reported intermediate results.
    """
    def __init__(self, optimize_mode: Literal['minimize', 'maximize'] = 'maximize', start_step: int = 0):

--- a/nni/algorithms/hpo/random_tuner.py
+++ b/nni/algorithms/hpo/random_tuner.py
@@ -2,14 +2,14 @@
 # Licensed under the MIT license.
 """
-Naive random tuner for hyper-parameter optimization.
+Naive random tuner.
 You can specify an integer seed to determine random result.
 """
 from __future__ import annotations
-__all__ = ['RandomTuner', 'suggest', 'suggest_parameter']
+__all__ = ['RandomTuner']
 import logging

--- a/nni/algorithms/hpo/tpe_tuner.py
+++ b/nni/algorithms/hpo/tpe_tuner.py
@@ -13,7 +13,7 @@ This is a slightly modified re-implementation of the algorithm.
 from __future__ import annotations
-__all__ = ['TpeTuner', 'TpeArguments', 'suggest', 'suggest_parameter']
+__all__ = ['TpeTuner', 'TpeArguments']
 from collections import defaultdict
 import logging
@@ -52,7 +52,7 @@ class TpeArguments(NamedTuple):
        (:doc:`experiment result </CommunitySharings/ParallelizingTpeSearch>`)
    n_startup_jobs
-        The first N hyper-parameters are generated fully randomly for warming up.
+        The first N hyperparameters are generated fully randomly for warming up.
        If the search space is large, you can increase this value.
        Or if max_trial_number is small, you may want to decrease it.
@@ -87,13 +87,20 @@ class TpeTuner(Tuner):
    """
    Tree-structured Parzen Estimator (TPE) tuner.
+    TPE is a lightweight tuner that has no extra dependency and supports all search space types,
+    designed to be the default tuner.
+    It has the drawback that TPE cannot discover relationship between different hyperparameters.
+    **Implementation**
    TPE is an SMBO algorithm.
    It models P(x|y) and P(y) where x represents hyperparameters and y the evaluation result.
    P(x|y) is modeled by transforming the generative process of hyperparameters,
    replacing the distributions of the configuration prior with non-parametric densities.
-    Paper: :footcite:`bergstra2011algorithms`.
+    Paper: `Algorithms for Hyper-Parameter Optimization
-    (`PDF <https://proceedings.neurips.cc/paper/2011/file/86e8f7ab32cfd12577bc2619bc635690-Paper.pdf>`__)
+    <https://proceedings.neurips.cc/paper/2011/file/86e8f7ab32cfd12577bc2619bc635690-Paper.pdf>`__
    Examples
    --------
@@ -127,13 +134,13 @@ class TpeTuner(Tuner):
    Parameters
    ----------
-    optimze_mode
+    optimze_mode: Literal['minimize', 'maximize']
        Whether optimize to minimize or maximize trial result.
    seed
        The random seed.
    tpe_args
        Advanced users can use this to customize TPE tuner.
-        See `TpeArguments` for details.
+        See :class:`TpeArguments` for details.
    """
    def __init__(self,

--- a/nni/experiment/experiment.py
+++ b/nni/experiment/experiment.py
@@ -138,7 +138,7 @@ class Experiment:
                if interface.family == socket.AF_INET:
                    ips.append(interface.address)
        ips = [f'http://{ip}:{port}' for ip in ips if ip]
-        msg = 'Web UI URLs: ' + colorama.Fore.CYAN + ' '.join(ips) + colorama.Style.RESET_ALL
+        msg = 'Web portal URLs: ' + colorama.Fore.CYAN + ' '.join(ips) + colorama.Style.RESET_ALL
        _logger.info(msg)
    def stop(self) -> None:
@@ -184,7 +184,6 @@ class Experiment:
                        return False
            except KeyboardInterrupt:
                _logger.warning('KeyboardInterrupt detected')
-            finally:
                self.stop()
    @classmethod

--- a/nni/runtime/log.py
+++ b/nni/runtime/log.py
@@ -61,6 +61,8 @@ def init_logger_for_command_line() -> None:
        _cli_log_initialized = True
        colorful_formatter = Formatter(log_format, time_format)
        colorful_formatter.format = _colorful_format
+        if '_default_' not in handlers:  # this happens when building sphinx gallery
+            _register_handler(StreamHandler(sys.stdout), logging.INFO)
        handlers['_default_'].setFormatter(colorful_formatter)
 def start_experiment_log(experiment_id: str, log_directory: Path, debug: bool) -> None:

--- a/nni/runtime/platform/standalone.py
+++ b/nni/runtime/platform/standalone.py
@@ -23,10 +23,10 @@ def get_next_parameter():
    warning_message = ''.join([
        colorama.Style.BRIGHT,
        colorama.Fore.RED,
-        'Running NNI code without runtime. ',
+        'Running trial code without runtime. ',
-        'Check the following tutorial if you are new to NNI: ',
+        'Please check the tutorial if you are new to NNI: ',
        colorama.Fore.YELLOW,
-        'https://nni.readthedocs.io/en/stable/Tutorial/QuickStart.html#id1',
+        'https://nni.readthedocs.io/en/stable/tutorials/hpo_quickstart_pytorch/main.html',
        colorama.Style.RESET_ALL
    ])
    warnings.warn(warning_message, RuntimeWarning)