Unverified Commit d1c6bbae authored by liuzhe-lz's avatar liuzhe-lz Committed by GitHub
Browse files

HPO PyTorch Tutorial (#4653)

parent 22ee2ac4
...@@ -203,6 +203,58 @@ Tutorials ...@@ -203,6 +203,58 @@ Tutorials
.. _sphx_glr_tutorials_hpo_quickstart_pytorch:
.. raw:: html
<div class="sphx-glr-thumbcontainer" tooltip="The tutorial consists of 4 steps: ">
.. only:: html
.. figure:: /tutorials/hpo_quickstart_pytorch/images/thumb/sphx_glr_main_thumb.png
:alt: NNI HPO Quickstart with PyTorch
:ref:`sphx_glr_tutorials_hpo_quickstart_pytorch_main.py`
.. raw:: html
</div>
.. toctree::
:hidden:
/tutorials/hpo_quickstart_pytorch/main
.. raw:: html
<div class="sphx-glr-thumbcontainer" tooltip="It can be run directly and will have the exact same result as original version.">
.. only:: html
.. figure:: /tutorials/hpo_quickstart_pytorch/images/thumb/sphx_glr_model_thumb.png
:alt: Port PyTorch Quickstart to NNI
:ref:`sphx_glr_tutorials_hpo_quickstart_pytorch_model.py`
.. raw:: html
</div>
.. toctree::
:hidden:
/tutorials/hpo_quickstart_pytorch/model
.. raw:: html
<div class="sphx-glr-clear"></div>
.. _sphx_glr_tutorials_hpo_quickstart_tensorflow: .. _sphx_glr_tutorials_hpo_quickstart_tensorflow:
......
"""
NNI HPO Quickstart with PyTorch
===============================
This tutorial optimizes the model in `official PyTorch quickstart`_ with auto-tuning.
The tutorial consists of 4 steps:
1. Modify the model for auto-tuning.
2. Define hyperparameters' search space.
3. Configure the experiment.
4. Run the experiment.
.. _official PyTorch quickstart: https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html
"""
# %%
# Step 1: Prepare the model
# -------------------------
# In first step, you need to prepare the model to be tuned.
#
# The model should be put in a separate script.
# It will be evaluated many times concurrently,
# and possibly will be trained on distributed platforms.
#
# In this tutorial, the model is defined in :doc:`model.py <model>`.
#
# Please understand the model code before continue to next step.
# %%
# Step 2: Define search space
# ---------------------------
# In model code, we have prepared 3 hyperparameters to be tuned:
# *features*, *lr*, and *momentum*.
#
# Here we need to define their *search space* so the tuning algorithm can sample them in desired range.
#
# Assuming we have following prior knowledge for these hyperparameters:
#
# 1. *features* should be one of 128, 256, 512, 1024.
# 2. *lr* should be a float between 0.0001 and 0.1, and it follows exponential distribution.
# 3. *momentum* should be a float between 0 and 1.
#
# In NNI, the space of *features* is called ``choice``;
# the space of *lr* is called ``loguniform``;
# and the space of *momentum* is called ``uniform``.
# You may have noticed, these names are derived from ``numpy.random``.
#
# For full specification of search space, check :doc:`the reference </hpo/search_space>`.
#
# Now we can define the search space as follow:
search_space = {
'features': {'_type': 'choice', '_value': [128, 256, 512, 1024]},
'lr': {'_type': 'loguniform', '_value': [0.0001, 0.1]},
'momentum': {'_type': 'uniform', '_value': [0, 1]},
}
# %%
# Step 3: Configure the experiment
# --------------------------------
# NNI uses an *experiment* to manage the HPO process.
# The *experiment config* defines how to train the models and how to explore the search space.
#
# In this tutorial we use a *local* mode experiment,
# which means models will be trained on local machine, without using any special training platform.
from nni.experiment import Experiment
experiment = Experiment('local')
# %%
# Now we start to configure the experiment.
#
# Firstly, specify the model code.
# In NNI evaluation of each hyperparameter set is called a *trial*.
# So the model script is called *trial code*.
#
# If you are using Linux system without Conda, you many need to change ``python`` to ``python3``.
#
# When ``trial_code_directory`` is a relative path, it relates to current working directory.
# To run ``main.py`` from a different path, you can set trial code directory to ``Path(__file__).parent``.
experiment.config.trial_command = 'python model.py'
experiment.config.trial_code_directory = '.'
# %%
# Then specify the search space we defined above:
experiment.config.search_space = search_space
# %%
# Choose a tuning algorithm.
# Here we use :doc:`TPE tuner </hpo/tuners>`.
experiment.config.tuner.name = 'TPE'
experiment.config.tuner.class_args['optimize_mode'] = 'maximize'
# %%
# Specify how many trials to run.
# Here we evaluate 10 sets of hyperparameters in total, and concurrently evaluate 4 sets at a time.
#
# Please note that ``max_trial_number`` here is merely for a quick example.
# With default config TPE tuner requires 20 trials to warm up.
# In real world max trial number is commonly set to 100+.
#
# You can also set ``max_experiment_duration = '1h'`` to limit running time.
#
# And alternatively, you can skip this part and set no limit at all.
# The experiment will run forever until you press Ctrl-C.
experiment.config.max_trial_number = 10
experiment.config.trial_concurrency = 4
# %%
# Step 4: Run the experiment
# --------------------------
# Now the experiment is ready. Choose a port and launch it.
#
# You can use the web portal to view experiment status: http://localhost:8080.
experiment.run(8080)
"""
Port PyTorch Quickstart to NNI
==============================
This is a modified version of `PyTorch quickstart`_.
It can be run directly and will have the exact same result as original version.
Furthermore, it enables the ability of auto-tuning with an NNI *experiment*, which will be discussed later.
For now, we recommend to run this script directly to verify the environment.
There are only 2 key differences from the original version:
1. In `Get optimized hyperparameters`_ part, it receives auto-generated hyperparameters.
2. In `Train the model and report accuracy`_ part, it reports accuracy metrics for tuner to generate next hyperparameter set.
.. _PyTorch quickstart: https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html
"""
# %%
import nni
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
# %%
# Hyperparameters to be tuned
# ---------------------------
params = {
'features': 512,
'lr': 0.001,
'momentum': 0,
}
# %%
# Get optimized hyperparameters
# -----------------------------
# If run directly, ``nni.get_next_parameters()`` is a no-op and returns an empty dict.
# But with an NNI *experiment*, it will receive optimized hyperparameters from tuning algorithm.
optimized_params = nni.get_next_parameter()
params.update(optimized_params)
print(params)
# %%
# Load dataset
# ------------
training_data = datasets.FashionMNIST(root="data", train=True, download=True, transform=ToTensor())
test_data = datasets.FashionMNIST(root="data", train=False, download=True, transform=ToTensor())
batch_size = 64
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)
# %%
# Build model with hyperparameters
# --------------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, params['features']),
nn.ReLU(),
nn.Linear(params['features'], params['features']),
nn.ReLU(),
nn.Linear(params['features'], 10)
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
model = NeuralNetwork().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=params['lr'], momentum=params['momentum'])
# %%
# Define train() and test()
# -------------------------
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
model.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
pred = model(X)
loss = loss_fn(pred, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
def test(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
return correct
# %%
# Train the model and report accuracy
# -----------------------------------
# Report accuracy to NNI so the tuning algorithm can predict best hyperparameters.
epochs = 5
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(train_dataloader, model, loss_fn, optimizer)
accuracy = test(test_dataloader, model, loss_fn)
nni.report_intermediate_result(accuracy)
nni.report_final_result(accuracy)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""
Provide ``nnictl hello`` command to generate quickstart example.
"""
from pathlib import Path
import shutil
from colorama import Fore
import nni_assets
def create_example(_args):
example_path = Path(nni_assets.__path__[0], 'hello_hpo')
try:
shutil.copytree(example_path, 'nni_hello_hpo')
except PermissionError:
print(Fore.RED + 'Permission denied. Please run the command in a writable directory.' + Fore.RESET)
exit(1)
except FileExistsError:
print('File exists. Please run "python nni_hello_hpo/main.py" to start the example.')
exit(1)
print('A hyperparameter optimization example has been created at "nni_hello_hpo" directory.')
print('Please run "python nni_hello_hpo/main.py" to try it out.')
...@@ -16,7 +16,8 @@ from .nnictl_utils import stop_experiment, trial_ls, trial_kill, list_experiment ...@@ -16,7 +16,8 @@ from .nnictl_utils import stop_experiment, trial_ls, trial_kill, list_experiment
save_experiment, load_experiment save_experiment, load_experiment
from .algo_management import algo_reg, algo_unreg, algo_show, algo_list from .algo_management import algo_reg, algo_unreg, algo_show, algo_list
from .constants import DEFAULT_REST_PORT from .constants import DEFAULT_REST_PORT
from .import ts_management from . import hello
from . import ts_management
init(autoreset=True) init(autoreset=True)
...@@ -483,6 +484,10 @@ def get_parser(): ...@@ -483,6 +484,10 @@ def get_parser():
jupyter_uninstall_parser = jupyter_subparsers.add_parser('uninstall', description='Uninstall JupyterLab extension.') jupyter_uninstall_parser = jupyter_subparsers.add_parser('uninstall', description='Uninstall JupyterLab extension.')
jupyter_uninstall_parser.set_defaults(func=_jupyter_uninstall) jupyter_uninstall_parser.set_defaults(func=_jupyter_uninstall)
# hello command
parser_hello = subparsers.add_parser('hello', description='Create "hello nni" example in current directory.')
parser_hello.set_defaults(func=hello.create_example)
return parser return parser
......
"""
NNI hyperparameter optimization example.
Check the online tutorial for details:
https://nni.readthedocs.io/en/stable/tutorials/hpo_quickstart_pytorch/main.html
"""
from pathlib import Path
import signal
from nni.experiment import Experiment
# Define search space
search_space = {
'features': {'_type': 'choice', '_value': [128, 256, 512, 1024]},
'lr': {'_type': 'loguniform', '_value': [0.0001, 0.1]},
'momentum': {'_type': 'uniform', '_value': [0, 1]},
}
# Configure experiment
experiment = Experiment('local')
experiment.config.trial_command = 'python model.py'
experiment.config.trial_code_directory = Path(__file__).parent
experiment.config.search_space = search_space
experiment.config.tuner.name = 'Random'
experiment.config.max_trial_number = 10
experiment.config.trial_concurrency = 2
# Run it!
experiment.run(port=8080, wait_completion=False)
print('Experiment is running. Press Ctrl-C to quit.')
signal.pause()
"""
Run main.py to start.
This script is modified from PyTorch quickstart:
https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html
"""
import nni
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
# Get optimized hyperparameters
params = {'features': 512, 'lr': 0.001, 'momentum': 0}
optimized_params = nni.get_next_parameter()
params.update(optimized_params)
# Load dataset
training_data = datasets.FashionMNIST(root='data', train=True, download=True, transform=ToTensor())
test_data = datasets.FashionMNIST(root='data', train=False, download=True, transform=ToTensor())
train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
# Build model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = nn.Sequential(
nn.Flatten(),
nn.Linear(28*28, params['features']),
nn.ReLU(),
nn.Linear(params['features'], params['features']),
nn.ReLU(),
nn.Linear(params['features'], 10)
).to(device)
# Training functions
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=params['lr'], momentum=params['momentum'])
def train(dataloader, model, loss_fn, optimizer):
model.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
pred = model(X)
loss = loss_fn(pred, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
def test(dataloader, model, loss_fn):
model.eval()
correct = 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
return correct / len(dataloader.dataset)
# Train the model
epochs = 5
for t in range(epochs):
train(train_dataloader, model, loss_fn, optimizer)
accuracy = test(test_dataloader, model, loss_fn)
nni.report_intermediate_result(accuracy)
nni.report_final_result(accuracy)
...@@ -112,6 +112,7 @@ def _setup(): ...@@ -112,6 +112,7 @@ def _setup():
packages = _find_python_packages(), packages = _find_python_packages(),
package_data = { package_data = {
'nni': _find_requirements_txt() + _find_default_config(), # setuptools issue #1806 'nni': _find_requirements_txt() + _find_default_config(), # setuptools issue #1806
'nni_assets': _find_asset_files(),
'nni_node': _find_node_files() # note: this does not work before building 'nni_node': _find_node_files() # note: this does not work before building
}, },
...@@ -165,6 +166,14 @@ def _find_requirements_txt(): ...@@ -165,6 +166,14 @@ def _find_requirements_txt():
def _find_default_config(): def _find_default_config():
return ['runtime/default_config/' + name for name in os.listdir('nni/runtime/default_config')] return ['runtime/default_config/' + name for name in os.listdir('nni/runtime/default_config')]
def _find_asset_files():
files = []
for dirpath, dirnames, filenames in os.walk('nni_assets'):
for filename in filenames:
if os.path.splitext(filename)[1] == '.py':
files.append(os.path.join(dirpath[len('nni_assets/'):], filename))
return sorted(files)
def _find_node_files(): def _find_node_files():
if not os.path.exists('nni_node'): if not os.path.exists('nni_node'):
if release and 'build_ts' not in sys.argv and 'clean' not in sys.argv: if release and 'build_ts' not in sys.argv and 'clean' not in sys.argv:
...@@ -279,7 +288,10 @@ _temp_files = [ ...@@ -279,7 +288,10 @@ _temp_files = [
'test/model_path/', 'test/model_path/',
'test/temp.json', 'test/temp.json',
'test/ut/sdk/*.pth', 'test/ut/sdk/*.pth',
'test/ut/tools/annotation/_generated/' 'test/ut/tools/annotation/_generated/',
# example
'nni_assets/**/data/',
] ]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment