Unverified Commit 51d261e7 authored by J-shang's avatar J-shang Committed by GitHub
Browse files

Merge pull request #4668 from microsoft/doc-refactor

parents d63a2ea3 b469e1c1
.. 19ce4f2ee1d3c4f1be277ab09ba40092
.. e973987e22c5e2d43f325d6f29717ecb
:orphan:
参考
==================
......@@ -8,11 +10,5 @@
nnictl 命令 <reference/nnictl>
Experiment 配置 <reference/experiment_config>
Experiment 配置(遗产) <Tutorial/ExperimentConfig>
搜索空间<Tutorial/SearchSpaceSpec>
NNI Annotation<Tutorial/AnnotationSpec>
SDK API 参考 <sdk_reference>
API 参考 <reference/python_api_ref>
支持的框架和库 <SupportedFramework_Library>
从 Python 发起实验 <Tutorial/HowToLaunchFromPython>
共享存储 <Tutorial/HowToUseSharedStorage>
Tensorboard <Tutorial/Tensorboard>
/* HPO */
@article{bergstra2011algorithms,
title={Algorithms for hyper-parameter optimization},
author={Bergstra, James and Bardenet, R{\'e}mi and Bengio, Yoshua and K{\'e}gl, Bal{\'a}zs},
journal={Advances in neural information processing systems},
volume={24},
year={2011}
}
@inproceedings{li2018metis,
title={Metis: Robustly tuning tail latencies of cloud systems},
author={Li, Zhao Lucis and Liang, Chieh-Jan Mike and He, Wenjia and Zhu, Lianjie and Dai, Wenjun and Jiang, Jin and Sun, Guangzhong},
booktitle={2018 USENIX Annual Technical Conference (USENIX ATC 18)},
pages={981--992},
year={2018}
}
@inproceedings{hutter2011sequential,
title={Sequential model-based optimization for general algorithm configuration},
author={Hutter, Frank and Hoos, Holger H and Leyton-Brown, Kevin},
booktitle={International conference on learning and intelligent optimization},
pages={507--523},
year={2011},
organization={Springer}
}
@article{li2017hyperband,
title={Hyperband: A novel bandit-based approach to hyperparameter optimization},
author={Li, Lisha and Jamieson, Kevin and DeSalvo, Giulia and Rostamizadeh, Afshin and Talwalkar, Ameet},
journal={The Journal of Machine Learning Research},
volume={18},
number={1},
pages={6765--6816},
year={2017},
publisher={JMLR. org}
}
@inproceedings{falkner2018bohb,
title={BOHB: Robust and efficient hyperparameter optimization at scale},
author={Falkner, Stefan and Klein, Aaron and Hutter, Frank},
booktitle={International Conference on Machine Learning},
pages={1437--1446},
year={2018},
organization={PMLR}
}
/* NAS */
@inproceedings{zoph2017neural,
title={Neural Architecture Search with Reinforcement Learning},
author={Zoph, Barret and Le, Quoc V},
booktitle={International Conference on Learning Representations},
year={2017}
}
@inproceedings{zoph2018learning,
title={Learning transferable architectures for scalable image recognition},
author={Zoph, Barret and Vasudevan, Vijay and Shlens, Jonathon and Le, Quoc V},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={8697--8710},
year={2018}
}
@inproceedings{liu2018darts,
title={DARTS: Differentiable Architecture Search},
author={Liu, Hanxiao and Simonyan, Karen and Yang, Yiming},
booktitle={International Conference on Learning Representations},
year={2018}
}
@inproceedings{cai2018proxylessnas,
title={ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware},
author={Cai, Han and Zhu, Ligeng and Han, Song},
booktitle={International Conference on Learning Representations},
year={2018}
}
@inproceedings{xie2018snas,
title={SNAS: stochastic neural architecture search},
author={Xie, Sirui and Zheng, Hehui and Liu, Chunxiao and Lin, Liang},
booktitle={International Conference on Learning Representations},
year={2018}
}
@inproceedings{pham2018efficient,
title={Efficient neural architecture search via parameters sharing},
author={Pham, Hieu and Guan, Melody and Zoph, Barret and Le, Quoc and Dean, Jeff},
booktitle={International conference on machine learning},
pages={4095--4104},
year={2018},
organization={PMLR}
}
@inproceedings{radosavovic2019network,
title={On network design spaces for visual recognition},
author={Radosavovic, Ilija and Johnson, Justin and Xie, Saining and Lo, Wan-Yen and Doll{\'a}r, Piotr},
booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
pages={1882--1890},
year={2019}
}
@inproceedings{ying2019bench,
title={Nas-bench-101: Towards reproducible neural architecture search},
author={Ying, Chris and Klein, Aaron and Christiansen, Eric and Real, Esteban and Murphy, Kevin and Hutter, Frank},
booktitle={International Conference on Machine Learning},
pages={7105--7114},
year={2019},
organization={PMLR}
}
@inproceedings{dong2019bench,
title={NAS-Bench-201: Extending the Scope of Reproducible Neural Architecture Search},
author={Dong, Xuanyi and Yang, Yi},
booktitle={International Conference on Learning Representations},
year={2019}
}
####################
Python API Reference
####################
.. toctree::
:maxdepth: 1
Auto Tune <autotune_ref>
NAS <NAS/ApiReference>
Compression <Compression/CompressionReference>
Python API <Tutorial/HowToLaunchFromPython>
\ No newline at end of file
.. 60cb924d0ec522b7709acf4f8cff3f16
####################
Python API 参考
####################
.. toctree::
:maxdepth: 1
自动调优 <autotune_ref>
NAS <NAS/ApiReference>
模型压缩 <Compression/CompressionReference>
Python API <Tutorial/HowToLaunchFromPython>
\ No newline at end of file
.. 4e054d96c7d211dc514c99d673415d8e
NNI 支持的训练平台介绍
=====================================
.. toctree::
Overview <./TrainingService/Overview>
本机<./TrainingService/LocalMode>
远程<./TrainingService/RemoteMachineMode>
OpenPAI<./TrainingService/PaiMode>
Kubeflow<./TrainingService/KubeflowMode>
AdaptDL<./TrainingService/AdaptDLMode>
FrameworkController<./TrainingService/FrameworkControllerMode>
DLTS<./TrainingService/DLTSMode>
AML<./TrainingService/AMLMode>
PAI-DLC<./TrainingService/DLCMode>
混合模式 <./TrainingService/HybridMode>
......@@ -8,7 +8,12 @@ Tutorials
:hidden:
tutorials/nni_experiment
tutorials/nas_quick_start_mnist
tutorials/hello_nas
tutorials/nasbench_as_dataset
tutorials/pruning_quick_start_mnist
tutorials/pruning_speed_up
tutorials/quantization_quick_start_mnist
tutorials/quantization_speed_up
.. ----------------------
......@@ -20,9 +25,63 @@ Tutorials
:tags: Experiment/HPO
.. cardlinkitem::
:header: Get started with NAS on MNIST
:description: bla bla bla bla
:link: tutorials/nas_quick_start_mnist.html
:header: HPO Quickstart with PyTorch
:description: Use HPO to tune a PyTorch FashionMNIST model
:link: tutorials/hpo_quickstart_pytorch/main.html
:image: ../img/thumbnails/overview-33.png
:tags: HPO
.. cardlinkitem::
:header: HPO Quickstart with TensorFlow
:description: Use HPO to tune a TensorFlow MNIST model
:link: tutorials/hpo_quickstart_tensorflow/main.html
:image: ../img/thumbnails/overview-33.png
:tags: HPO
.. cardlinkitem::
:header: Hello, NAS!
:description: Beginners' NAS tutorial on how to search for neural architectures for MNIST dataset.
:link: tutorials/hello_nas.html
:image: ../img/thumbnails/overview-30.png
:background: cyan
:tags: NAS
.. cardlinkitem::
:header: Use NAS Benchmarks as Datasets
:description: Query data from popular NAS benchmarks from our preprocessed benchmark database.
:link: tutorials/nasbench_as_dataset.html
:image: ../img/thumbnails/overview-30.png
:background: pink
:tags: NAS
.. cardlinkitem::
:header: Get Started with Model Pruning on MNIST
:description: Familiarize yourself with pruning to compress your model
:link: tutorials/pruning_quick_start_mnist.html
:image: ../img/thumbnails/overview-29.png
:background: cyan
:tags: Compression
.. cardlinkitem::
:header: Get Started with Model Quantization on MNIST
:description: Familiarize yourself with quantization to compress your model
:link: tutorials/quantization_quick_start_mnist.html
:image: ../img/thumbnails/overview-29.png
:background: cyan
:tags: Compression
.. cardlinkitem::
:header: Speed Up Model with Mask
:description: Make your model real smaller and faster with speed-up after pruned by pruner
:link: tutorials/pruning_speed_up.html
:image: ../img/thumbnails/overview-29.png
:background: cyan
:tags: Compression
.. cardlinkitem::
:header: Speed Up Model with Calibration Config
:description: Make your model real smaller and faster with speed-up after quantized by quantizer
:link: tutorials/quantization_speed_up.html
:image: ../img/thumbnails/overview-29.png
:background: cyan
:tags: Compression
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n# Hello, NAS!\n\nThis is the 101 tutorial of Neural Architecture Search (NAS) on NNI.\nIn this tutorial, we will search for a neural architecture on MNIST dataset with the help of NAS framework of NNI, i.e., *Retiarii*.\nWe use multi-trial NAS as an example to show how to construct and explore a model space.\n\nThere are mainly three crucial components for a neural architecture search task, namely,\n\n* Model search space that defines a set of models to explore.\n* A proper strategy as the method to explore this model space.\n* A model evaluator that reports the performance of every model in the space.\n\nCurrently, PyTorch is the only supported framework by Retiarii, and we have only tested **PyTorch 1.7 to 1.10**.\nThis tutorial assumes PyTorch context but it should also apply to other frameworks, which is in our future plan.\n\n## Define your Model Space\n\nModel space is defined by users to express a set of models that users want to explore, which contains potentially good-performing models.\nIn this framework, a model space is defined with two parts: a base model and possible mutations on the base model.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Define Base Model\n\nDefining a base model is almost the same as defining a PyTorch (or TensorFlow) model.\nUsually, you only need to replace the code ``import torch.nn as nn`` with\n``import nni.retiarii.nn.pytorch as nn`` to use our wrapped PyTorch modules.\n\nBelow is a very simple example of defining a base model.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import torch\nimport torch.nn.functional as F\nimport nni.retiarii.nn.pytorch as nn\nfrom nni.retiarii import model_wrapper\n\n\n@model_wrapper # this decorator should be put on the out most\nclass Net(nn.Module):\n def __init__(self):\n super().__init__()\n self.conv1 = nn.Conv2d(1, 32, 3, 1)\n self.conv2 = nn.Conv2d(32, 64, 3, 1)\n self.dropout1 = nn.Dropout(0.25)\n self.dropout2 = nn.Dropout(0.5)\n self.fc1 = nn.Linear(9216, 128)\n self.fc2 = nn.Linear(128, 10)\n\n def forward(self, x):\n x = F.relu(self.conv1(x))\n x = F.max_pool2d(self.conv2(x), 2)\n x = torch.flatten(self.dropout1(x), 1)\n x = self.fc2(self.dropout2(F.relu(self.fc1(x))))\n output = F.log_softmax(x, dim=1)\n return output"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
".. tip:: Always keep in mind that you should use ``import nni.retiarii.nn.pytorch as nn`` and :meth:`nni.retiarii.model_wrapper`.\n Many mistakes are a result of forgetting one of those.\n Also, please use ``torch.nn`` for submodules of ``nn.init``, e.g., ``torch.nn.init`` instead of ``nn.init``.\n\n### Define Model Mutations\n\nA base model is only one concrete model not a model space. We provide :doc:`API and Primitives </nas/construct_space>`\nfor users to express how the base model can be mutated. That is, to build a model space which includes many models.\n\nBased on the above base model, we can define a model space as below.\n\n.. code-block:: diff\n\n @model_wrapper\n class Net(nn.Module):\n def __init__(self):\n super().__init__()\n self.conv1 = nn.Conv2d(1, 32, 3, 1)\n - self.conv2 = nn.Conv2d(32, 64, 3, 1)\n + self.conv2 = nn.LayerChoice([\n + nn.Conv2d(32, 64, 3, 1),\n + DepthwiseSeparableConv(32, 64)\n + ])\n - self.dropout1 = nn.Dropout(0.25)\n + self.dropout1 = nn.Dropout(nn.ValueChoice([0.25, 0.5, 0.75]))\n self.dropout2 = nn.Dropout(0.5)\n - self.fc1 = nn.Linear(9216, 128)\n - self.fc2 = nn.Linear(128, 10)\n + feature = nn.ValueChoice([64, 128, 256])\n + self.fc1 = nn.Linear(9216, feature)\n + self.fc2 = nn.Linear(feature, 10)\n\n def forward(self, x):\n x = F.relu(self.conv1(x))\n x = F.max_pool2d(self.conv2(x), 2)\n x = torch.flatten(self.dropout1(x), 1)\n x = self.fc2(self.dropout2(F.relu(self.fc1(x))))\n output = F.log_softmax(x, dim=1)\n return output\n\nThis results in the following code:\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"class DepthwiseSeparableConv(nn.Module):\n def __init__(self, in_ch, out_ch):\n super().__init__()\n self.depthwise = nn.Conv2d(in_ch, in_ch, kernel_size=3, groups=in_ch)\n self.pointwise = nn.Conv2d(in_ch, out_ch, kernel_size=1)\n\n def forward(self, x):\n return self.pointwise(self.depthwise(x))\n\n\n@model_wrapper\nclass ModelSpace(nn.Module):\n def __init__(self):\n super().__init__()\n self.conv1 = nn.Conv2d(1, 32, 3, 1)\n # LayerChoice is used to select a layer between Conv2d and DwConv.\n self.conv2 = nn.LayerChoice([\n nn.Conv2d(32, 64, 3, 1),\n DepthwiseSeparableConv(32, 64)\n ])\n # ValueChoice is used to select a dropout rate.\n # ValueChoice can be used as parameter of modules wrapped in `nni.retiarii.nn.pytorch`\n # or customized modules wrapped with `@basic_unit`.\n self.dropout1 = nn.Dropout(nn.ValueChoice([0.25, 0.5, 0.75])) # choose dropout rate from 0.25, 0.5 and 0.75\n self.dropout2 = nn.Dropout(0.5)\n feature = nn.ValueChoice([64, 128, 256])\n self.fc1 = nn.Linear(9216, feature)\n self.fc2 = nn.Linear(feature, 10)\n\n def forward(self, x):\n x = F.relu(self.conv1(x))\n x = F.max_pool2d(self.conv2(x), 2)\n x = torch.flatten(self.dropout1(x), 1)\n x = self.fc2(self.dropout2(F.relu(self.fc1(x))))\n output = F.log_softmax(x, dim=1)\n return output\n\n\nmodel_space = ModelSpace()\nmodel_space"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This example uses two mutation APIs, ``nn.LayerChoice`` and ``nn.ValueChoice``.\n``nn.LayerChoice`` takes a list of candidate modules (two in this example), one will be chosen for each sampled model.\nIt can be used like normal PyTorch module.\n``nn.ValueChoice`` takes a list of candidate values, one will be chosen to take effect for each sampled model.\n\nMore detailed API description and usage can be found :doc:`here </nas/construct_space>`.\n\n<div class=\"alert alert-info\"><h4>Note</h4><p>We are actively enriching the mutation APIs, to facilitate easy construction of model space.\n If the currently supported mutation APIs cannot express your model space,\n please refer to :doc:`this doc </nas/mutator>` for customizing mutators.</p></div>\n\n## Explore the Defined Model Space\n\nThere are basically two exploration approaches: (1) search by evaluating each sampled model independently,\nwhich is the search approach in `multi-trial NAS <multi-trial-nas>`\nand (2) one-shot weight-sharing based search, which is used in one-shot NAS.\nWe demonstrate the first approach in this tutorial. Users can refer to `here <one-shot-nas>` for the second approach.\n\nFirst, users need to pick a proper exploration strategy to explore the defined model space.\nSecond, users need to pick or customize a model evaluator to evaluate the performance of each explored model.\n\n### Pick an exploration strategy\n\nRetiarii supports many :doc:`exploration strategies </nas/exploration_strategy>`.\n\nSimply choosing (i.e., instantiate) an exploration strategy as below.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import nni.retiarii.strategy as strategy\nsearch_strategy = strategy.Random(dedup=True) # dedup=False if deduplication is not wanted"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Pick or customize a model evaluator\n\nIn the exploration process, the exploration strategy repeatedly generates new models. A model evaluator is for training\nand validating each generated model to obtain the model's performance.\nThe performance is sent to the exploration strategy for the strategy to generate better models.\n\nRetiarii has provided :doc:`built-in model evaluators </nas/evaluator>`, but to start with,\nit is recommended to use ``FunctionalEvaluator``, that is, to wrap your own training and evaluation code with one single function.\nThis function should receive one single model class and uses ``nni.report_final_result`` to report the final score of this model.\n\nAn example here creates a simple evaluator that runs on MNIST dataset, trains for 2 epochs, and reports its validation accuracy.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import nni\n\nfrom torchvision import transforms\nfrom torchvision.datasets import MNIST\nfrom torch.utils.data import DataLoader\n\n\ndef train_epoch(model, device, train_loader, optimizer, epoch):\n loss_fn = torch.nn.CrossEntropyLoss()\n model.train()\n for batch_idx, (data, target) in enumerate(train_loader):\n data, target = data.to(device), target.to(device)\n optimizer.zero_grad()\n output = model(data)\n loss = loss_fn(output, target)\n loss.backward()\n optimizer.step()\n if batch_idx % 10 == 0:\n print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n epoch, batch_idx * len(data), len(train_loader.dataset),\n 100. * batch_idx / len(train_loader), loss.item()))\n\n\ndef test_epoch(model, device, test_loader):\n model.eval()\n test_loss = 0\n correct = 0\n with torch.no_grad():\n for data, target in test_loader:\n data, target = data.to(device), target.to(device)\n output = model(data)\n pred = output.argmax(dim=1, keepdim=True)\n correct += pred.eq(target.view_as(pred)).sum().item()\n\n test_loss /= len(test_loader.dataset)\n accuracy = 100. * correct / len(test_loader.dataset)\n\n print('\\nTest set: Accuracy: {}/{} ({:.0f}%)\\n'.format(\n correct, len(test_loader.dataset), accuracy))\n\n return accuracy\n\n\ndef evaluate_model(model_cls):\n # \"model_cls\" is a class, need to instantiate\n model = model_cls()\n\n device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n model.to(device)\n\n optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)\n transf = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])\n train_loader = DataLoader(MNIST('data/mnist', download=True, transform=transf), batch_size=64, shuffle=True)\n test_loader = DataLoader(MNIST('data/mnist', download=True, train=False, transform=transf), batch_size=64)\n\n for epoch in range(3):\n # train the model for one epoch\n train_epoch(model, device, train_loader, optimizer, epoch)\n # test the model for one epoch\n accuracy = test_epoch(model, device, test_loader)\n # call report intermediate result. Result can be float or dict\n nni.report_intermediate_result(accuracy)\n\n # report final test result\n nni.report_final_result(accuracy)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create the evaluator\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from nni.retiarii.evaluator import FunctionalEvaluator\nevaluator = FunctionalEvaluator(evaluate_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The ``train_epoch`` and ``test_epoch`` here can be any customized function, where users can write their own training recipe.\n\nIt is recommended that the :doc:``evaluate_model`` here accepts no additional arguments other than ``model_cls``.\nHowever, in the `advanced tutorial </nas/evaluator>`, we will show how to use additional arguments in case you actually need those.\nIn future, we will support mutation on the arguments of evaluators, which is commonly called \"Hyper-parmeter tuning\".\n\n## Launch an Experiment\n\nAfter all the above are prepared, it is time to start an experiment to do the model search. An example is shown below.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from nni.retiarii.experiment.pytorch import RetiariiExperiment, RetiariiExeConfig\nexp = RetiariiExperiment(model_space, evaluator, [], search_strategy)\nexp_config = RetiariiExeConfig('local')\nexp_config.experiment_name = 'mnist_search'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The following configurations are useful to control how many trials to run at most / at the same time.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"exp_config.max_trial_number = 4 # spawn 4 trials at most\nexp_config.trial_concurrency = 2 # will run two trials concurrently"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Remember to set the following config if you want to GPU.\n``use_active_gpu`` should be set true if you wish to use an occupied GPU (possibly running a GUI).\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"exp_config.trial_gpu_number = 1\nexp_config.training_service.use_active_gpu = True"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Launch the experiment. The experiment should take several minutes to finish on a workstation with 2 GPUs.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"exp.run(exp_config, 8081)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Users can also run Retiarii Experiment with :doc:`different training services </experiment/training_service>`\nbesides ``local`` training service.\n\n## Visualize the Experiment\n\nUsers can visualize their experiment in the same way as visualizing a normal hyper-parameter tuning experiment.\nFor example, open ``localhost:8081`` in your browser, 8081 is the port that you set in ``exp.run``.\nPlease refer to :doc:`here </experiment/webui>` for details.\n\nWe support visualizing models with 3rd-party visualization engines (like `Netron <https://netron.app/>`__).\nThis can be used by clicking ``Visualization`` in detail panel for each trial.\nNote that current visualization is based on `onnx <https://onnx.ai/>`__ ,\nthus visualization is not feasible if the model cannot be exported into onnx.\n\nBuilt-in evaluators (e.g., Classification) will automatically export the model into a file.\nFor your own evaluator, you need to save your file into ``$NNI_OUTPUT_DIR/model.onnx`` to make this work.\nFor instance,\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import os\nfrom pathlib import Path\n\n\ndef evaluate_model_with_visualization(model_cls):\n model = model_cls()\n # dump the model into an onnx\n if 'NNI_OUTPUT_DIR' in os.environ:\n dummy_input = torch.zeros(1, 3, 32, 32)\n torch.onnx.export(model, (dummy_input, ),\n Path(os.environ['NNI_OUTPUT_DIR']) / 'model.onnx')\n evaluate_model(model_cls)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Relaunch the experiment, and a button is shown on WebUI.\n\n<img src=\"file://../../img/netron_entrance_webui.png\">\n\n## Export Top Models\n\nUsers can export top models after the exploration is done using ``export_top_models``.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"for model_dict in exp.export_top_models(formatter='dict'):\n print(model_dict)\n\n# The output is `json` object which records the mutation actions of the top model.\n# If users want to output source code of the top model, they can use graph-based execution engine for the experiment,\n# by simply adding the following two lines.\n#\n# .. code-block:: python\n#\n# exp_config.execution_engine = 'base'\n# export_formatter = 'code'"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
\ No newline at end of file
"""
Hello, NAS!
===========
This is the 101 tutorial of Neural Architecture Search (NAS) on NNI.
In this tutorial, we will search for a neural architecture on MNIST dataset with the help of NAS framework of NNI, i.e., *Retiarii*.
We use multi-trial NAS as an example to show how to construct and explore a model space.
There are mainly three crucial components for a neural architecture search task, namely,
* Model search space that defines a set of models to explore.
* A proper strategy as the method to explore this model space.
* A model evaluator that reports the performance of every model in the space.
Currently, PyTorch is the only supported framework by Retiarii, and we have only tested **PyTorch 1.7 to 1.10**.
This tutorial assumes PyTorch context but it should also apply to other frameworks, which is in our future plan.
Define your Model Space
-----------------------
Model space is defined by users to express a set of models that users want to explore, which contains potentially good-performing models.
In this framework, a model space is defined with two parts: a base model and possible mutations on the base model.
"""
# %%
#
# Define Base Model
# ^^^^^^^^^^^^^^^^^
#
# Defining a base model is almost the same as defining a PyTorch (or TensorFlow) model.
# Usually, you only need to replace the code ``import torch.nn as nn`` with
# ``import nni.retiarii.nn.pytorch as nn`` to use our wrapped PyTorch modules.
#
# Below is a very simple example of defining a base model.
import torch
import torch.nn.functional as F
import nni.retiarii.nn.pytorch as nn
from nni.retiarii import model_wrapper
@model_wrapper # this decorator should be put on the out most
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.dropout1 = nn.Dropout(0.25)
self.dropout2 = nn.Dropout(0.5)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(self.conv2(x), 2)
x = torch.flatten(self.dropout1(x), 1)
x = self.fc2(self.dropout2(F.relu(self.fc1(x))))
output = F.log_softmax(x, dim=1)
return output
# %%
# .. tip:: Always keep in mind that you should use ``import nni.retiarii.nn.pytorch as nn`` and :meth:`nni.retiarii.model_wrapper`.
# Many mistakes are a result of forgetting one of those.
# Also, please use ``torch.nn`` for submodules of ``nn.init``, e.g., ``torch.nn.init`` instead of ``nn.init``.
#
# Define Model Mutations
# ^^^^^^^^^^^^^^^^^^^^^^
#
# A base model is only one concrete model not a model space. We provide :doc:`API and Primitives </nas/construct_space>`
# for users to express how the base model can be mutated. That is, to build a model space which includes many models.
#
# Based on the above base model, we can define a model space as below.
#
# .. code-block:: diff
#
# @model_wrapper
# class Net(nn.Module):
# def __init__(self):
# super().__init__()
# self.conv1 = nn.Conv2d(1, 32, 3, 1)
# - self.conv2 = nn.Conv2d(32, 64, 3, 1)
# + self.conv2 = nn.LayerChoice([
# + nn.Conv2d(32, 64, 3, 1),
# + DepthwiseSeparableConv(32, 64)
# + ])
# - self.dropout1 = nn.Dropout(0.25)
# + self.dropout1 = nn.Dropout(nn.ValueChoice([0.25, 0.5, 0.75]))
# self.dropout2 = nn.Dropout(0.5)
# - self.fc1 = nn.Linear(9216, 128)
# - self.fc2 = nn.Linear(128, 10)
# + feature = nn.ValueChoice([64, 128, 256])
# + self.fc1 = nn.Linear(9216, feature)
# + self.fc2 = nn.Linear(feature, 10)
#
# def forward(self, x):
# x = F.relu(self.conv1(x))
# x = F.max_pool2d(self.conv2(x), 2)
# x = torch.flatten(self.dropout1(x), 1)
# x = self.fc2(self.dropout2(F.relu(self.fc1(x))))
# output = F.log_softmax(x, dim=1)
# return output
#
# This results in the following code:
class DepthwiseSeparableConv(nn.Module):
def __init__(self, in_ch, out_ch):
super().__init__()
self.depthwise = nn.Conv2d(in_ch, in_ch, kernel_size=3, groups=in_ch)
self.pointwise = nn.Conv2d(in_ch, out_ch, kernel_size=1)
def forward(self, x):
return self.pointwise(self.depthwise(x))
@model_wrapper
class ModelSpace(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
# LayerChoice is used to select a layer between Conv2d and DwConv.
self.conv2 = nn.LayerChoice([
nn.Conv2d(32, 64, 3, 1),
DepthwiseSeparableConv(32, 64)
])
# ValueChoice is used to select a dropout rate.
# ValueChoice can be used as parameter of modules wrapped in `nni.retiarii.nn.pytorch`
# or customized modules wrapped with `@basic_unit`.
self.dropout1 = nn.Dropout(nn.ValueChoice([0.25, 0.5, 0.75])) # choose dropout rate from 0.25, 0.5 and 0.75
self.dropout2 = nn.Dropout(0.5)
feature = nn.ValueChoice([64, 128, 256])
self.fc1 = nn.Linear(9216, feature)
self.fc2 = nn.Linear(feature, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(self.conv2(x), 2)
x = torch.flatten(self.dropout1(x), 1)
x = self.fc2(self.dropout2(F.relu(self.fc1(x))))
output = F.log_softmax(x, dim=1)
return output
model_space = ModelSpace()
model_space
# %%
# This example uses two mutation APIs, ``nn.LayerChoice`` and ``nn.ValueChoice``.
# ``nn.LayerChoice`` takes a list of candidate modules (two in this example), one will be chosen for each sampled model.
# It can be used like normal PyTorch module.
# ``nn.ValueChoice`` takes a list of candidate values, one will be chosen to take effect for each sampled model.
#
# More detailed API description and usage can be found :doc:`here </nas/construct_space>`.
#
# .. note::
#
# We are actively enriching the mutation APIs, to facilitate easy construction of model space.
# If the currently supported mutation APIs cannot express your model space,
# please refer to :doc:`this doc </nas/mutator>` for customizing mutators.
#
# Explore the Defined Model Space
# -------------------------------
#
# There are basically two exploration approaches: (1) search by evaluating each sampled model independently,
# which is the search approach in :ref:`multi-trial NAS <multi-trial-nas>`
# and (2) one-shot weight-sharing based search, which is used in one-shot NAS.
# We demonstrate the first approach in this tutorial. Users can refer to :ref:`here <one-shot-nas>` for the second approach.
#
# First, users need to pick a proper exploration strategy to explore the defined model space.
# Second, users need to pick or customize a model evaluator to evaluate the performance of each explored model.
#
# Pick an exploration strategy
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# Retiarii supports many :doc:`exploration strategies </nas/exploration_strategy>`.
#
# Simply choosing (i.e., instantiate) an exploration strategy as below.
import nni.retiarii.strategy as strategy
search_strategy = strategy.Random(dedup=True) # dedup=False if deduplication is not wanted
# %%
# Pick or customize a model evaluator
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# In the exploration process, the exploration strategy repeatedly generates new models. A model evaluator is for training
# and validating each generated model to obtain the model's performance.
# The performance is sent to the exploration strategy for the strategy to generate better models.
#
# Retiarii has provided :doc:`built-in model evaluators </nas/evaluator>`, but to start with,
# it is recommended to use ``FunctionalEvaluator``, that is, to wrap your own training and evaluation code with one single function.
# This function should receive one single model class and uses ``nni.report_final_result`` to report the final score of this model.
#
# An example here creates a simple evaluator that runs on MNIST dataset, trains for 2 epochs, and reports its validation accuracy.
import nni
from torchvision import transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
def train_epoch(model, device, train_loader, optimizer, epoch):
loss_fn = torch.nn.CrossEntropyLoss()
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = loss_fn(output, target)
loss.backward()
optimizer.step()
if batch_idx % 10 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test_epoch(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
accuracy = 100. * correct / len(test_loader.dataset)
print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
correct, len(test_loader.dataset), accuracy))
return accuracy
def evaluate_model(model_cls):
# "model_cls" is a class, need to instantiate
model = model_cls()
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
transf = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_loader = DataLoader(MNIST('data/mnist', download=True, transform=transf), batch_size=64, shuffle=True)
test_loader = DataLoader(MNIST('data/mnist', download=True, train=False, transform=transf), batch_size=64)
for epoch in range(3):
# train the model for one epoch
train_epoch(model, device, train_loader, optimizer, epoch)
# test the model for one epoch
accuracy = test_epoch(model, device, test_loader)
# call report intermediate result. Result can be float or dict
nni.report_intermediate_result(accuracy)
# report final test result
nni.report_final_result(accuracy)
# %%
# Create the evaluator
from nni.retiarii.evaluator import FunctionalEvaluator
evaluator = FunctionalEvaluator(evaluate_model)
# %%
#
# The ``train_epoch`` and ``test_epoch`` here can be any customized function, where users can write their own training recipe.
#
# It is recommended that the :doc:``evaluate_model`` here accepts no additional arguments other than ``model_cls``.
# However, in the `advanced tutorial </nas/evaluator>`, we will show how to use additional arguments in case you actually need those.
# In future, we will support mutation on the arguments of evaluators, which is commonly called "Hyper-parmeter tuning".
#
# Launch an Experiment
# --------------------
#
# After all the above are prepared, it is time to start an experiment to do the model search. An example is shown below.
from nni.retiarii.experiment.pytorch import RetiariiExperiment, RetiariiExeConfig
exp = RetiariiExperiment(model_space, evaluator, [], search_strategy)
exp_config = RetiariiExeConfig('local')
exp_config.experiment_name = 'mnist_search'
# %%
# The following configurations are useful to control how many trials to run at most / at the same time.
exp_config.max_trial_number = 4 # spawn 4 trials at most
exp_config.trial_concurrency = 2 # will run two trials concurrently
# %%
# Remember to set the following config if you want to GPU.
# ``use_active_gpu`` should be set true if you wish to use an occupied GPU (possibly running a GUI).
exp_config.trial_gpu_number = 1
exp_config.training_service.use_active_gpu = True
# %%
# Launch the experiment. The experiment should take several minutes to finish on a workstation with 2 GPUs.
exp.run(exp_config, 8081)
# %%
# Users can also run Retiarii Experiment with :doc:`different training services </experiment/training_service>`
# besides ``local`` training service.
#
# Visualize the Experiment
# ------------------------
#
# Users can visualize their experiment in the same way as visualizing a normal hyper-parameter tuning experiment.
# For example, open ``localhost:8081`` in your browser, 8081 is the port that you set in ``exp.run``.
# Please refer to :doc:`here </experiment/webui>` for details.
#
# We support visualizing models with 3rd-party visualization engines (like `Netron <https://netron.app/>`__).
# This can be used by clicking ``Visualization`` in detail panel for each trial.
# Note that current visualization is based on `onnx <https://onnx.ai/>`__ ,
# thus visualization is not feasible if the model cannot be exported into onnx.
#
# Built-in evaluators (e.g., Classification) will automatically export the model into a file.
# For your own evaluator, you need to save your file into ``$NNI_OUTPUT_DIR/model.onnx`` to make this work.
# For instance,
import os
from pathlib import Path
def evaluate_model_with_visualization(model_cls):
model = model_cls()
# dump the model into an onnx
if 'NNI_OUTPUT_DIR' in os.environ:
dummy_input = torch.zeros(1, 3, 32, 32)
torch.onnx.export(model, (dummy_input, ),
Path(os.environ['NNI_OUTPUT_DIR']) / 'model.onnx')
evaluate_model(model_cls)
# %%
# Relaunch the experiment, and a button is shown on WebUI.
#
# .. image:: ../../img/netron_entrance_webui.png
#
# Export Top Models
# -----------------
#
# Users can export top models after the exploration is done using ``export_top_models``.
for model_dict in exp.export_top_models(formatter='dict'):
print(model_dict)
# The output is `json` object which records the mutation actions of the top model.
# If users want to output source code of the top model, they can use graph-based execution engine for the experiment,
# by simply adding the following two lines.
#
# .. code-block:: python
#
# exp_config.execution_engine = 'base'
# export_formatter = 'code'
6b66fe7afb47bb8f9a4124c8083e2930
\ No newline at end of file
.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "tutorials/hello_nas.py"
.. LINE NUMBERS ARE GIVEN BELOW.
.. only:: html
.. note::
:class: sphx-glr-download-link-note
Click :ref:`here <sphx_glr_download_tutorials_hello_nas.py>`
to download the full example code
.. rst-class:: sphx-glr-example-title
.. _sphx_glr_tutorials_hello_nas.py:
Hello, NAS!
===========
This is the 101 tutorial of Neural Architecture Search (NAS) on NNI.
In this tutorial, we will search for a neural architecture on MNIST dataset with the help of NAS framework of NNI, i.e., *Retiarii*.
We use multi-trial NAS as an example to show how to construct and explore a model space.
There are mainly three crucial components for a neural architecture search task, namely,
* Model search space that defines a set of models to explore.
* A proper strategy as the method to explore this model space.
* A model evaluator that reports the performance of every model in the space.
Currently, PyTorch is the only supported framework by Retiarii, and we have only tested **PyTorch 1.7 to 1.10**.
This tutorial assumes PyTorch context but it should also apply to other frameworks, which is in our future plan.
Define your Model Space
-----------------------
Model space is defined by users to express a set of models that users want to explore, which contains potentially good-performing models.
In this framework, a model space is defined with two parts: a base model and possible mutations on the base model.
.. GENERATED FROM PYTHON SOURCE LINES 26-34
Define Base Model
^^^^^^^^^^^^^^^^^
Defining a base model is almost the same as defining a PyTorch (or TensorFlow) model.
Usually, you only need to replace the code ``import torch.nn as nn`` with
``import nni.retiarii.nn.pytorch as nn`` to use our wrapped PyTorch modules.
Below is a very simple example of defining a base model.
.. GENERATED FROM PYTHON SOURCE LINES 35-61
.. code-block:: default
import torch
import torch.nn.functional as F
import nni.retiarii.nn.pytorch as nn
from nni.retiarii import model_wrapper
@model_wrapper # this decorator should be put on the out most
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.dropout1 = nn.Dropout(0.25)
self.dropout2 = nn.Dropout(0.5)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(self.conv2(x), 2)
x = torch.flatten(self.dropout1(x), 1)
x = self.fc2(self.dropout2(F.relu(self.fc1(x))))
output = F.log_softmax(x, dim=1)
return output
.. GENERATED FROM PYTHON SOURCE LINES 62-104
.. tip:: Always keep in mind that you should use ``import nni.retiarii.nn.pytorch as nn`` and :meth:`nni.retiarii.model_wrapper`.
Many mistakes are a result of forgetting one of those.
Also, please use ``torch.nn`` for submodules of ``nn.init``, e.g., ``torch.nn.init`` instead of ``nn.init``.
Define Model Mutations
^^^^^^^^^^^^^^^^^^^^^^
A base model is only one concrete model not a model space. We provide :doc:`API and Primitives </nas/construct_space>`
for users to express how the base model can be mutated. That is, to build a model space which includes many models.
Based on the above base model, we can define a model space as below.
.. code-block:: diff
@model_wrapper
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
- self.conv2 = nn.Conv2d(32, 64, 3, 1)
+ self.conv2 = nn.LayerChoice([
+ nn.Conv2d(32, 64, 3, 1),
+ DepthwiseSeparableConv(32, 64)
+ ])
- self.dropout1 = nn.Dropout(0.25)
+ self.dropout1 = nn.Dropout(nn.ValueChoice([0.25, 0.5, 0.75]))
self.dropout2 = nn.Dropout(0.5)
- self.fc1 = nn.Linear(9216, 128)
- self.fc2 = nn.Linear(128, 10)
+ feature = nn.ValueChoice([64, 128, 256])
+ self.fc1 = nn.Linear(9216, feature)
+ self.fc2 = nn.Linear(feature, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(self.conv2(x), 2)
x = torch.flatten(self.dropout1(x), 1)
x = self.fc2(self.dropout2(F.relu(self.fc1(x))))
output = F.log_softmax(x, dim=1)
return output
This results in the following code:
.. GENERATED FROM PYTHON SOURCE LINES 104-147
.. code-block:: default
class DepthwiseSeparableConv(nn.Module):
def __init__(self, in_ch, out_ch):
super().__init__()
self.depthwise = nn.Conv2d(in_ch, in_ch, kernel_size=3, groups=in_ch)
self.pointwise = nn.Conv2d(in_ch, out_ch, kernel_size=1)
def forward(self, x):
return self.pointwise(self.depthwise(x))
@model_wrapper
class ModelSpace(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
# LayerChoice is used to select a layer between Conv2d and DwConv.
self.conv2 = nn.LayerChoice([
nn.Conv2d(32, 64, 3, 1),
DepthwiseSeparableConv(32, 64)
])
# ValueChoice is used to select a dropout rate.
# ValueChoice can be used as parameter of modules wrapped in `nni.retiarii.nn.pytorch`
# or customized modules wrapped with `@basic_unit`.
self.dropout1 = nn.Dropout(nn.ValueChoice([0.25, 0.5, 0.75])) # choose dropout rate from 0.25, 0.5 and 0.75
self.dropout2 = nn.Dropout(0.5)
feature = nn.ValueChoice([64, 128, 256])
self.fc1 = nn.Linear(9216, feature)
self.fc2 = nn.Linear(feature, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(self.conv2(x), 2)
x = torch.flatten(self.dropout1(x), 1)
x = self.fc2(self.dropout2(F.relu(self.fc1(x))))
output = F.log_softmax(x, dim=1)
return output
model_space = ModelSpace()
model_space
.. rst-class:: sphx-glr-script-out
Out:
.. code-block:: none
ModelSpace(
(conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
(conv2): LayerChoice([Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1)), DepthwiseSeparableConv(
(depthwise): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), groups=32)
(pointwise): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1))
)], label='model_1')
(dropout1): Dropout(p=0.25, inplace=False)
(dropout2): Dropout(p=0.5, inplace=False)
(fc1): Linear(in_features=9216, out_features=64, bias=True)
(fc2): Linear(in_features=64, out_features=10, bias=True)
)
.. GENERATED FROM PYTHON SOURCE LINES 148-178
This example uses two mutation APIs, ``nn.LayerChoice`` and ``nn.ValueChoice``.
``nn.LayerChoice`` takes a list of candidate modules (two in this example), one will be chosen for each sampled model.
It can be used like normal PyTorch module.
``nn.ValueChoice`` takes a list of candidate values, one will be chosen to take effect for each sampled model.
More detailed API description and usage can be found :doc:`here </nas/construct_space>`.
.. note::
We are actively enriching the mutation APIs, to facilitate easy construction of model space.
If the currently supported mutation APIs cannot express your model space,
please refer to :doc:`this doc </nas/mutator>` for customizing mutators.
Explore the Defined Model Space
-------------------------------
There are basically two exploration approaches: (1) search by evaluating each sampled model independently,
which is the search approach in :ref:`multi-trial NAS <multi-trial-nas>`
and (2) one-shot weight-sharing based search, which is used in one-shot NAS.
We demonstrate the first approach in this tutorial. Users can refer to :ref:`here <one-shot-nas>` for the second approach.
First, users need to pick a proper exploration strategy to explore the defined model space.
Second, users need to pick or customize a model evaluator to evaluate the performance of each explored model.
Pick an exploration strategy
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Retiarii supports many :doc:`exploration strategies </nas/exploration_strategy>`.
Simply choosing (i.e., instantiate) an exploration strategy as below.
.. GENERATED FROM PYTHON SOURCE LINES 178-182
.. code-block:: default
import nni.retiarii.strategy as strategy
search_strategy = strategy.Random(dedup=True) # dedup=False if deduplication is not wanted
.. rst-class:: sphx-glr-script-out
Out:
.. code-block:: none
[2022-02-28 14:01:11] INFO (hyperopt.utils/MainThread) Failed to load dill, try installing dill via "pip install dill" for enhanced pickling support.
[2022-02-28 14:01:11] INFO (hyperopt.fmin/MainThread) Failed to load dill, try installing dill via "pip install dill" for enhanced pickling support.
/home/yugzhan/miniconda3/envs/cu102/lib/python3.8/site-packages/ray/autoscaler/_private/cli_logger.py:57: FutureWarning: Not all Ray CLI dependencies were found. In Ray 1.4+, the Ray CLI, autoscaler, and dashboard will only be usable via `pip install 'ray[default]'`. Please update your install command.
warnings.warn(
.. GENERATED FROM PYTHON SOURCE LINES 183-195
Pick or customize a model evaluator
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
In the exploration process, the exploration strategy repeatedly generates new models. A model evaluator is for training
and validating each generated model to obtain the model's performance.
The performance is sent to the exploration strategy for the strategy to generate better models.
Retiarii has provided :doc:`built-in model evaluators </nas/evaluator>`, but to start with,
it is recommended to use ``FunctionalEvaluator``, that is, to wrap your own training and evaluation code with one single function.
This function should receive one single model class and uses ``nni.report_final_result`` to report the final score of this model.
An example here creates a simple evaluator that runs on MNIST dataset, trains for 2 epochs, and reports its validation accuracy.
.. GENERATED FROM PYTHON SOURCE LINES 195-263
.. code-block:: default
import nni
from torchvision import transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
def train_epoch(model, device, train_loader, optimizer, epoch):
loss_fn = torch.nn.CrossEntropyLoss()
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = loss_fn(output, target)
loss.backward()
optimizer.step()
if batch_idx % 10 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test_epoch(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
accuracy = 100. * correct / len(test_loader.dataset)
print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
correct, len(test_loader.dataset), accuracy))
return accuracy
def evaluate_model(model_cls):
# "model_cls" is a class, need to instantiate
model = model_cls()
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
transf = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_loader = DataLoader(MNIST('data/mnist', download=True, transform=transf), batch_size=64, shuffle=True)
test_loader = DataLoader(MNIST('data/mnist', download=True, train=False, transform=transf), batch_size=64)
for epoch in range(3):
# train the model for one epoch
train_epoch(model, device, train_loader, optimizer, epoch)
# test the model for one epoch
accuracy = test_epoch(model, device, test_loader)
# call report intermediate result. Result can be float or dict
nni.report_intermediate_result(accuracy)
# report final test result
nni.report_final_result(accuracy)
.. GENERATED FROM PYTHON SOURCE LINES 264-265
Create the evaluator
.. GENERATED FROM PYTHON SOURCE LINES 265-269
.. code-block:: default
from nni.retiarii.evaluator import FunctionalEvaluator
evaluator = FunctionalEvaluator(evaluate_model)
.. GENERATED FROM PYTHON SOURCE LINES 270-280
The ``train_epoch`` and ``test_epoch`` here can be any customized function, where users can write their own training recipe.
It is recommended that the :doc:``evaluate_model`` here accepts no additional arguments other than ``model_cls``.
However, in the `advanced tutorial </nas/evaluator>`, we will show how to use additional arguments in case you actually need those.
In future, we will support mutation on the arguments of evaluators, which is commonly called "Hyper-parmeter tuning".
Launch an Experiment
--------------------
After all the above are prepared, it is time to start an experiment to do the model search. An example is shown below.
.. GENERATED FROM PYTHON SOURCE LINES 281-287
.. code-block:: default
from nni.retiarii.experiment.pytorch import RetiariiExperiment, RetiariiExeConfig
exp = RetiariiExperiment(model_space, evaluator, [], search_strategy)
exp_config = RetiariiExeConfig('local')
exp_config.experiment_name = 'mnist_search'
.. GENERATED FROM PYTHON SOURCE LINES 288-289
The following configurations are useful to control how many trials to run at most / at the same time.
.. GENERATED FROM PYTHON SOURCE LINES 289-293
.. code-block:: default
exp_config.max_trial_number = 4 # spawn 4 trials at most
exp_config.trial_concurrency = 2 # will run two trials concurrently
.. GENERATED FROM PYTHON SOURCE LINES 294-296
Remember to set the following config if you want to GPU.
``use_active_gpu`` should be set true if you wish to use an occupied GPU (possibly running a GUI).
.. GENERATED FROM PYTHON SOURCE LINES 296-300
.. code-block:: default
exp_config.trial_gpu_number = 1
exp_config.training_service.use_active_gpu = True
.. GENERATED FROM PYTHON SOURCE LINES 301-302
Launch the experiment. The experiment should take several minutes to finish on a workstation with 2 GPUs.
.. GENERATED FROM PYTHON SOURCE LINES 302-305
.. code-block:: default
exp.run(exp_config, 8081)
.. rst-class:: sphx-glr-script-out
Out:
.. code-block:: none
[2022-02-28 14:01:13] INFO (nni.experiment/MainThread) Creating experiment, Experiment ID: dt84p16a
[2022-02-28 14:01:13] INFO (nni.experiment/MainThread) Connecting IPC pipe...
[2022-02-28 14:01:14] INFO (nni.experiment/MainThread) Starting web server...
[2022-02-28 14:01:15] INFO (nni.experiment/MainThread) Setting up...
[2022-02-28 14:01:15] INFO (nni.runtime.msg_dispatcher_base/Thread-3) Dispatcher started
[2022-02-28 14:01:15] INFO (nni.retiarii.experiment.pytorch/MainThread) Web UI URLs: http://127.0.0.1:8081 http://10.190.172.35:8081 http://192.168.49.1:8081 http://172.17.0.1:8081
[2022-02-28 14:01:15] INFO (nni.retiarii.experiment.pytorch/MainThread) Start strategy...
[2022-02-28 14:01:15] INFO (root/MainThread) Successfully update searchSpace.
[2022-02-28 14:01:15] INFO (nni.retiarii.strategy.bruteforce/MainThread) Random search running in fixed size mode. Dedup: on.
[2022-02-28 14:05:16] INFO (nni.retiarii.experiment.pytorch/Thread-4) Stopping experiment, please wait...
[2022-02-28 14:05:16] INFO (nni.retiarii.experiment.pytorch/MainThread) Strategy exit
[2022-02-28 14:05:16] INFO (nni.retiarii.experiment.pytorch/MainThread) Waiting for experiment to become DONE (you can ctrl+c if there is no running trial jobs)...
[2022-02-28 14:05:17] INFO (nni.runtime.msg_dispatcher_base/Thread-3) Dispatcher exiting...
[2022-02-28 14:05:17] INFO (nni.retiarii.experiment.pytorch/Thread-4) Experiment stopped
.. GENERATED FROM PYTHON SOURCE LINES 306-324
Users can also run Retiarii Experiment with :doc:`different training services </experiment/training_service>`
besides ``local`` training service.
Visualize the Experiment
------------------------
Users can visualize their experiment in the same way as visualizing a normal hyper-parameter tuning experiment.
For example, open ``localhost:8081`` in your browser, 8081 is the port that you set in ``exp.run``.
Please refer to :doc:`here </experiment/webui>` for details.
We support visualizing models with 3rd-party visualization engines (like `Netron <https://netron.app/>`__).
This can be used by clicking ``Visualization`` in detail panel for each trial.
Note that current visualization is based on `onnx <https://onnx.ai/>`__ ,
thus visualization is not feasible if the model cannot be exported into onnx.
Built-in evaluators (e.g., Classification) will automatically export the model into a file.
For your own evaluator, you need to save your file into ``$NNI_OUTPUT_DIR/model.onnx`` to make this work.
For instance,
.. GENERATED FROM PYTHON SOURCE LINES 324-338
.. code-block:: default
import os
from pathlib import Path
def evaluate_model_with_visualization(model_cls):
model = model_cls()
# dump the model into an onnx
if 'NNI_OUTPUT_DIR' in os.environ:
dummy_input = torch.zeros(1, 3, 32, 32)
torch.onnx.export(model, (dummy_input, ),
Path(os.environ['NNI_OUTPUT_DIR']) / 'model.onnx')
evaluate_model(model_cls)
.. GENERATED FROM PYTHON SOURCE LINES 339-347
Relaunch the experiment, and a button is shown on webportal.
.. image:: ../../img/netron_entrance_webui.png
Export Top Models
-----------------
Users can export top models after the exploration is done using ``export_top_models``.
.. GENERATED FROM PYTHON SOURCE LINES 347-359
.. code-block:: default
for model_dict in exp.export_top_models(formatter='dict'):
print(model_dict)
# The output is `json` object which records the mutation actions of the top model.
# If users want to output source code of the top model, they can use graph-based execution engine for the experiment,
# by simply adding the following two lines.
#
# .. code-block:: python
#
# exp_config.execution_engine = 'base'
# export_formatter = 'code'
.. rst-class:: sphx-glr-script-out
Out:
.. code-block:: none
{'model_1': '0', 'model_2': 0.25, 'model_3': 128}
.. rst-class:: sphx-glr-timing
**Total running time of the script:** ( 4 minutes 6.818 seconds)
.. _sphx_glr_download_tutorials_hello_nas.py:
.. only :: html
.. container:: sphx-glr-footer
:class: sphx-glr-footer-example
.. container:: sphx-glr-download sphx-glr-download-python
:download:`Download Python source code: hello_nas.py <hello_nas.py>`
.. container:: sphx-glr-download sphx-glr-download-jupyter
:download:`Download Jupyter notebook: hello_nas.ipynb <hello_nas.ipynb>`
.. only:: html
.. rst-class:: sphx-glr-signature
`Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n# NNI HPO Quickstart with PyTorch\nThis tutorial optimizes the model in `official PyTorch quickstart`_ with auto-tuning.\n\nThere is also a :doc:`TensorFlow version<../hpo_quickstart_tensorflow/main>` if you prefer it.\n\nThe tutorial consists of 4 steps: \n\n1. Modify the model for auto-tuning.\n2. Define hyperparameters' search space.\n3. Configure the experiment.\n4. Run the experiment.\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 1: Prepare the model\nIn first step, we need to prepare the model to be tuned.\n\nThe model should be put in a separate script.\nIt will be evaluated many times concurrently,\nand possibly will be trained on distributed platforms.\n\nIn this tutorial, the model is defined in :doc:`model.py <model>`.\n\nIn short, it is a PyTorch model with 3 additional API calls:\n\n1. Use :func:`nni.get_next_parameter` to fetch the hyperparameters to be evalutated.\n2. Use :func:`nni.report_intermediate_result` to report per-epoch accuracy metrics.\n3. Use :func:`nni.report_final_result` to report final accuracy.\n\nPlease understand the model code before continue to next step.\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 2: Define search space\nIn model code, we have prepared 3 hyperparameters to be tuned:\n*features*, *lr*, and *momentum*.\n\nHere we need to define their *search space* so the tuning algorithm can sample them in desired range.\n\nAssuming we have following prior knowledge for these hyperparameters:\n\n1. *features* should be one of 128, 256, 512, 1024.\n2. *lr* should be a float between 0.0001 and 0.1, and it follows exponential distribution.\n3. *momentum* should be a float between 0 and 1.\n\nIn NNI, the space of *features* is called ``choice``;\nthe space of *lr* is called ``loguniform``;\nand the space of *momentum* is called ``uniform``.\nYou may have noticed, these names are derived from ``numpy.random``.\n\nFor full specification of search space, check :doc:`the reference </hpo/search_space>`.\n\nNow we can define the search space as follow:\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"search_space = {\n 'features': {'_type': 'choice', '_value': [128, 256, 512, 1024]},\n 'lr': {'_type': 'loguniform', '_value': [0.0001, 0.1]},\n 'momentum': {'_type': 'uniform', '_value': [0, 1]},\n}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 3: Configure the experiment\nNNI uses an *experiment* to manage the HPO process.\nThe *experiment config* defines how to train the models and how to explore the search space.\n\nIn this tutorial we use a *local* mode experiment,\nwhich means models will be trained on local machine, without using any special training platform.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from nni.experiment import Experiment\nexperiment = Experiment('local')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we start to configure the experiment.\n\n### Configure trial code\nIn NNI evaluation of each hyperparameter set is called a *trial*.\nSo the model script is called *trial code*.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"experiment.config.trial_command = 'python model.py'\nexperiment.config.trial_code_directory = '.'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"When ``trial_code_directory`` is a relative path, it relates to current working directory.\nTo run ``main.py`` in a different path, you can set trial code directory to ``Path(__file__).parent``.\n(`__file__ <https://docs.python.org/3.10/reference/datamodel.html#index-43>`__\nis only available in standard Python, not in Jupyter Notebook.)\n\n.. attention::\n\n If you are using Linux system without Conda,\n you may need to change ``\"python model.py\"`` to ``\"python3 model.py\"``.\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Configure search space\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"experiment.config.search_space = search_space"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Configure tuning algorithm\nHere we use :doc:`TPE tuner </hpo/tuners>`.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"experiment.config.tuner.name = 'TPE'\nexperiment.config.tuner.class_args['optimize_mode'] = 'maximize'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Configure how many trials to run\nHere we evaluate 10 sets of hyperparameters in total, and concurrently evaluate 2 sets at a time.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"experiment.config.max_trial_number = 10\nexperiment.config.trial_concurrency = 2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<div class=\"alert alert-info\"><h4>Note</h4><p>``max_trial_number`` is set to 10 here for a fast example.\n In real world it should be set to a larger number.\n With default config TPE tuner requires 20 trials to warm up.</p></div>\n\nYou may also set ``max_experiment_duration = '1h'`` to limit running time.\n\nIf neither ``max_trial_number`` nor ``max_experiment_duration`` are set,\nthe experiment will run forever until you press Ctrl-C.\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 4: Run the experiment\nNow the experiment is ready. Choose a port and launch it. (Here we use port 8080.)\n\nYou can use the web portal to view experiment status: http://localhost:8080.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"experiment.run(8080)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## After the experiment is done\nEverything is done and it is safe to exit now. The following are optional.\n\nIf you are using standard Python instead of Jupyter Notebook,\nyou can add ``input()`` or ``signal.pause()`` to prevent Python from exiting,\nallowing you to view the web portal after the experiment is done.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# input('Press enter to quit')\nexperiment.stop()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
":meth:`nni.experiment.Experiment.stop` is automatically invoked when Python exits,\nso it can be omitted in your code.\n\nAfter the experiment is stopped, you can run :meth:`nni.experiment.Experiment.view` to restart web portal.\n\n.. tip::\n\n This example uses :doc:`Python API </reference/experiment>` to create experiment.\n\n You can also create and manage experiments with :doc:`command line tool </reference/nnictl>`.\n\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
\ No newline at end of file
"""
NNI HPO Quickstart with PyTorch
===============================
This tutorial optimizes the model in `official PyTorch quickstart`_ with auto-tuning.
There is also a :doc:`TensorFlow version<../hpo_quickstart_tensorflow/main>` if you prefer it.
The tutorial consists of 4 steps:
1. Modify the model for auto-tuning.
2. Define hyperparameters' search space.
3. Configure the experiment.
4. Run the experiment.
.. _official PyTorch quickstart: https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html
"""
# %%
# Step 1: Prepare the model
# -------------------------
# In first step, we need to prepare the model to be tuned.
#
# The model should be put in a separate script.
# It will be evaluated many times concurrently,
# and possibly will be trained on distributed platforms.
#
# In this tutorial, the model is defined in :doc:`model.py <model>`.
#
# In short, it is a PyTorch model with 3 additional API calls:
#
# 1. Use :func:`nni.get_next_parameter` to fetch the hyperparameters to be evalutated.
# 2. Use :func:`nni.report_intermediate_result` to report per-epoch accuracy metrics.
# 3. Use :func:`nni.report_final_result` to report final accuracy.
#
# Please understand the model code before continue to next step.
# %%
# Step 2: Define search space
# ---------------------------
# In model code, we have prepared 3 hyperparameters to be tuned:
# *features*, *lr*, and *momentum*.
#
# Here we need to define their *search space* so the tuning algorithm can sample them in desired range.
#
# Assuming we have following prior knowledge for these hyperparameters:
#
# 1. *features* should be one of 128, 256, 512, 1024.
# 2. *lr* should be a float between 0.0001 and 0.1, and it follows exponential distribution.
# 3. *momentum* should be a float between 0 and 1.
#
# In NNI, the space of *features* is called ``choice``;
# the space of *lr* is called ``loguniform``;
# and the space of *momentum* is called ``uniform``.
# You may have noticed, these names are derived from ``numpy.random``.
#
# For full specification of search space, check :doc:`the reference </hpo/search_space>`.
#
# Now we can define the search space as follow:
search_space = {
'features': {'_type': 'choice', '_value': [128, 256, 512, 1024]},
'lr': {'_type': 'loguniform', '_value': [0.0001, 0.1]},
'momentum': {'_type': 'uniform', '_value': [0, 1]},
}
# %%
# Step 3: Configure the experiment
# --------------------------------
# NNI uses an *experiment* to manage the HPO process.
# The *experiment config* defines how to train the models and how to explore the search space.
#
# In this tutorial we use a *local* mode experiment,
# which means models will be trained on local machine, without using any special training platform.
from nni.experiment import Experiment
experiment = Experiment('local')
# %%
# Now we start to configure the experiment.
#
# Configure trial code
# ^^^^^^^^^^^^^^^^^^^^
# In NNI evaluation of each hyperparameter set is called a *trial*.
# So the model script is called *trial code*.
experiment.config.trial_command = 'python model.py'
experiment.config.trial_code_directory = '.'
# %%
# When ``trial_code_directory`` is a relative path, it relates to current working directory.
# To run ``main.py`` in a different path, you can set trial code directory to ``Path(__file__).parent``.
# (`__file__ <https://docs.python.org/3.10/reference/datamodel.html#index-43>`__
# is only available in standard Python, not in Jupyter Notebook.)
#
# .. attention::
#
# If you are using Linux system without Conda,
# you may need to change ``"python model.py"`` to ``"python3 model.py"``.
# %%
# Configure search space
# ^^^^^^^^^^^^^^^^^^^^^^
experiment.config.search_space = search_space
# %%
# Configure tuning algorithm
# ^^^^^^^^^^^^^^^^^^^^^^^^^^
# Here we use :doc:`TPE tuner </hpo/tuners>`.
experiment.config.tuner.name = 'TPE'
experiment.config.tuner.class_args['optimize_mode'] = 'maximize'
# %%
# Configure how many trials to run
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# Here we evaluate 10 sets of hyperparameters in total, and concurrently evaluate 2 sets at a time.
experiment.config.max_trial_number = 10
experiment.config.trial_concurrency = 2
# %%
# .. note::
#
# ``max_trial_number`` is set to 10 here for a fast example.
# In real world it should be set to a larger number.
# With default config TPE tuner requires 20 trials to warm up.
#
# You may also set ``max_experiment_duration = '1h'`` to limit running time.
#
# If neither ``max_trial_number`` nor ``max_experiment_duration`` are set,
# the experiment will run forever until you press Ctrl-C.
# %%
# Step 4: Run the experiment
# --------------------------
# Now the experiment is ready. Choose a port and launch it. (Here we use port 8080.)
#
# You can use the web portal to view experiment status: http://localhost:8080.
experiment.run(8080)
# %%
# After the experiment is done
# ----------------------------
# Everything is done and it is safe to exit now. The following are optional.
#
# If you are using standard Python instead of Jupyter Notebook,
# you can add ``input()`` or ``signal.pause()`` to prevent Python from exiting,
# allowing you to view the web portal after the experiment is done.
# input('Press enter to quit')
experiment.stop()
# %%
# :meth:`nni.experiment.Experiment.stop` is automatically invoked when Python exits,
# so it can be omitted in your code.
#
# After the experiment is stopped, you can run :meth:`nni.experiment.Experiment.view` to restart web portal.
#
# .. tip::
#
# This example uses :doc:`Python API </reference/experiment>` to create experiment.
#
# You can also create and manage experiments with :doc:`command line tool </reference/nnictl>`.
f3498812ae89cde34b6f0f54216012fd
\ No newline at end of file
:orphan:
.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "tutorials/hpo_quickstart_pytorch/main.py"
.. LINE NUMBERS ARE GIVEN BELOW.
.. only:: html
.. note::
:class: sphx-glr-download-link-note
Click :ref:`here <sphx_glr_download_tutorials_hpo_quickstart_pytorch_main.py>`
to download the full example code
.. rst-class:: sphx-glr-example-title
.. _sphx_glr_tutorials_hpo_quickstart_pytorch_main.py:
NNI HPO Quickstart with PyTorch
===============================
This tutorial optimizes the model in `official PyTorch quickstart`_ with auto-tuning.
There is also a :doc:`TensorFlow version<../hpo_quickstart_tensorflow/main>` if you prefer it.
The tutorial consists of 4 steps:
1. Modify the model for auto-tuning.
2. Define hyperparameters' search space.
3. Configure the experiment.
4. Run the experiment.
.. _official PyTorch quickstart: https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html
.. GENERATED FROM PYTHON SOURCE LINES 19-36
Step 1: Prepare the model
-------------------------
In first step, we need to prepare the model to be tuned.
The model should be put in a separate script.
It will be evaluated many times concurrently,
and possibly will be trained on distributed platforms.
In this tutorial, the model is defined in :doc:`model.py <model>`.
In short, it is a PyTorch model with 3 additional API calls:
1. Use :func:`nni.get_next_parameter` to fetch the hyperparameters to be evalutated.
2. Use :func:`nni.report_intermediate_result` to report per-epoch accuracy metrics.
3. Use :func:`nni.report_final_result` to report final accuracy.
Please understand the model code before continue to next step.
.. GENERATED FROM PYTHON SOURCE LINES 38-59
Step 2: Define search space
---------------------------
In model code, we have prepared 3 hyperparameters to be tuned:
*features*, *lr*, and *momentum*.
Here we need to define their *search space* so the tuning algorithm can sample them in desired range.
Assuming we have following prior knowledge for these hyperparameters:
1. *features* should be one of 128, 256, 512, 1024.
2. *lr* should be a float between 0.0001 and 0.1, and it follows exponential distribution.
3. *momentum* should be a float between 0 and 1.
In NNI, the space of *features* is called ``choice``;
the space of *lr* is called ``loguniform``;
and the space of *momentum* is called ``uniform``.
You may have noticed, these names are derived from ``numpy.random``.
For full specification of search space, check :doc:`the reference </hpo/search_space>`.
Now we can define the search space as follow:
.. GENERATED FROM PYTHON SOURCE LINES 59-66
.. code-block:: default
search_space = {
'features': {'_type': 'choice', '_value': [128, 256, 512, 1024]},
'lr': {'_type': 'loguniform', '_value': [0.0001, 0.1]},
'momentum': {'_type': 'uniform', '_value': [0, 1]},
}
.. GENERATED FROM PYTHON SOURCE LINES 67-74
Step 3: Configure the experiment
--------------------------------
NNI uses an *experiment* to manage the HPO process.
The *experiment config* defines how to train the models and how to explore the search space.
In this tutorial we use a *local* mode experiment,
which means models will be trained on local machine, without using any special training platform.
.. GENERATED FROM PYTHON SOURCE LINES 74-77
.. code-block:: default
from nni.experiment import Experiment
experiment = Experiment('local')
.. GENERATED FROM PYTHON SOURCE LINES 78-84
Now we start to configure the experiment.
Configure trial code
^^^^^^^^^^^^^^^^^^^^
In NNI evaluation of each hyperparameter set is called a *trial*.
So the model script is called *trial code*.
.. GENERATED FROM PYTHON SOURCE LINES 84-86
.. code-block:: default
experiment.config.trial_command = 'python model.py'
experiment.config.trial_code_directory = '.'
.. GENERATED FROM PYTHON SOURCE LINES 87-96
When ``trial_code_directory`` is a relative path, it relates to current working directory.
To run ``main.py`` in a different path, you can set trial code directory to ``Path(__file__).parent``.
(`__file__ <https://docs.python.org/3.10/reference/datamodel.html#index-43>`__
is only available in standard Python, not in Jupyter Notebook.)
.. attention::
If you are using Linux system without Conda,
you may need to change ``"python model.py"`` to ``"python3 model.py"``.
.. GENERATED FROM PYTHON SOURCE LINES 98-100
Configure search space
^^^^^^^^^^^^^^^^^^^^^^
.. GENERATED FROM PYTHON SOURCE LINES 100-102
.. code-block:: default
experiment.config.search_space = search_space
.. GENERATED FROM PYTHON SOURCE LINES 103-106
Configure tuning algorithm
^^^^^^^^^^^^^^^^^^^^^^^^^^
Here we use :doc:`TPE tuner </hpo/tuners>`.
.. GENERATED FROM PYTHON SOURCE LINES 106-109
.. code-block:: default
experiment.config.tuner.name = 'TPE'
experiment.config.tuner.class_args['optimize_mode'] = 'maximize'
.. GENERATED FROM PYTHON SOURCE LINES 110-113
Configure how many trials to run
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Here we evaluate 10 sets of hyperparameters in total, and concurrently evaluate 2 sets at a time.
.. GENERATED FROM PYTHON SOURCE LINES 113-115
.. code-block:: default
experiment.config.max_trial_number = 10
experiment.config.trial_concurrency = 2
.. GENERATED FROM PYTHON SOURCE LINES 116-126
.. note::
``max_trial_number`` is set to 10 here for a fast example.
In real world it should be set to a larger number.
With default config TPE tuner requires 20 trials to warm up.
You may also set ``max_experiment_duration = '1h'`` to limit running time.
If neither ``max_trial_number`` nor ``max_experiment_duration`` are set,
the experiment will run forever until you press Ctrl-C.
.. GENERATED FROM PYTHON SOURCE LINES 128-133
Step 4: Run the experiment
--------------------------
Now the experiment is ready. Choose a port and launch it. (Here we use port 8080.)
You can use the web portal to view experiment status: http://localhost:8080.
.. GENERATED FROM PYTHON SOURCE LINES 133-135
.. code-block:: default
experiment.run(8080)
.. rst-class:: sphx-glr-script-out
Out:
.. code-block:: none
[2022-03-20 21:07:36] Creating experiment, Experiment ID: p43ny6ew
[2022-03-20 21:07:36] Starting web server...
[2022-03-20 21:07:37] Setting up...
[2022-03-20 21:07:37] Web portal URLs: http://127.0.0.1:8080 http://192.168.100.103:8080
True
.. GENERATED FROM PYTHON SOURCE LINES 136-143
After the experiment is done
----------------------------
Everything is done and it is safe to exit now. The following are optional.
If you are using standard Python instead of Jupyter Notebook,
you can add ``input()`` or ``signal.pause()`` to prevent Python from exiting,
allowing you to view the web portal after the experiment is done.
.. GENERATED FROM PYTHON SOURCE LINES 143-147
.. code-block:: default
# input('Press enter to quit')
experiment.stop()
.. rst-class:: sphx-glr-script-out
Out:
.. code-block:: none
[2022-03-20 21:08:57] Stopping experiment, please wait...
[2022-03-20 21:09:00] Experiment stopped
.. GENERATED FROM PYTHON SOURCE LINES 148-158
:meth:`nni.experiment.Experiment.stop` is automatically invoked when Python exits,
so it can be omitted in your code.
After the experiment is stopped, you can run :meth:`nni.experiment.Experiment.view` to restart web portal.
.. tip::
This example uses :doc:`Python API </reference/experiment>` to create experiment.
You can also create and manage experiments with :doc:`command line tool </reference/nnictl>`.
.. rst-class:: sphx-glr-timing
**Total running time of the script:** ( 1 minutes 24.393 seconds)
.. _sphx_glr_download_tutorials_hpo_quickstart_pytorch_main.py:
.. only :: html
.. container:: sphx-glr-footer
:class: sphx-glr-footer-example
.. container:: sphx-glr-download sphx-glr-download-python
:download:`Download Python source code: main.py <main.py>`
.. container:: sphx-glr-download sphx-glr-download-jupyter
:download:`Download Jupyter notebook: main.ipynb <main.ipynb>`
.. only:: html
.. rst-class:: sphx-glr-signature
`Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n# Port PyTorch Quickstart to NNI\nThis is a modified version of `PyTorch quickstart`_.\n\nIt can be run directly and will have the exact same result as original version.\n\nFurthermore, it enables the ability of auto tuning with an NNI *experiment*, which will be detailed later.\n\nIt is recommended to run this script directly first to verify the environment.\n\nThere are 2 key differences from the original version:\n\n1. In `Get optimized hyperparameters`_ part, it receives generated hyperparameters.\n2. In `Train model and report accuracy`_ part, it reports accuracy metrics to NNI.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import nni\nimport torch\nfrom torch import nn\nfrom torch.utils.data import DataLoader\nfrom torchvision import datasets\nfrom torchvision.transforms import ToTensor"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Hyperparameters to be tuned\nThese are the hyperparameters that will be tuned.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"params = {\n 'features': 512,\n 'lr': 0.001,\n 'momentum': 0,\n}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Get optimized hyperparameters\nIf run directly, :func:`nni.get_next_parameter` is a no-op and returns an empty dict.\nBut with an NNI *experiment*, it will receive optimized hyperparameters from tuning algorithm.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"optimized_params = nni.get_next_parameter()\nparams.update(optimized_params)\nprint(params)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load dataset\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"training_data = datasets.FashionMNIST(root=\"data\", train=True, download=True, transform=ToTensor())\ntest_data = datasets.FashionMNIST(root=\"data\", train=False, download=True, transform=ToTensor())\n\nbatch_size = 64\n\ntrain_dataloader = DataLoader(training_data, batch_size=batch_size)\ntest_dataloader = DataLoader(test_data, batch_size=batch_size)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Build model with hyperparameters\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\nprint(f\"Using {device} device\")\n\nclass NeuralNetwork(nn.Module):\n def __init__(self):\n super(NeuralNetwork, self).__init__()\n self.flatten = nn.Flatten()\n self.linear_relu_stack = nn.Sequential(\n nn.Linear(28*28, params['features']),\n nn.ReLU(),\n nn.Linear(params['features'], params['features']),\n nn.ReLU(),\n nn.Linear(params['features'], 10)\n )\n\n def forward(self, x):\n x = self.flatten(x)\n logits = self.linear_relu_stack(x)\n return logits\n\nmodel = NeuralNetwork().to(device)\n\nloss_fn = nn.CrossEntropyLoss()\noptimizer = torch.optim.SGD(model.parameters(), lr=params['lr'], momentum=params['momentum'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Define train and test\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def train(dataloader, model, loss_fn, optimizer):\n size = len(dataloader.dataset)\n model.train()\n for batch, (X, y) in enumerate(dataloader):\n X, y = X.to(device), y.to(device)\n pred = model(X)\n loss = loss_fn(pred, y)\n optimizer.zero_grad()\n loss.backward()\n optimizer.step()\n\ndef test(dataloader, model, loss_fn):\n size = len(dataloader.dataset)\n num_batches = len(dataloader)\n model.eval()\n test_loss, correct = 0, 0\n with torch.no_grad():\n for X, y in dataloader:\n X, y = X.to(device), y.to(device)\n pred = model(X)\n test_loss += loss_fn(pred, y).item()\n correct += (pred.argmax(1) == y).type(torch.float).sum().item()\n test_loss /= num_batches\n correct /= size\n return correct"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Train model and report accuracy\nReport accuracy metrics to NNI so the tuning algorithm can suggest better hyperparameters.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"epochs = 5\nfor t in range(epochs):\n print(f\"Epoch {t+1}\\n-------------------------------\")\n train(train_dataloader, model, loss_fn, optimizer)\n accuracy = test(test_dataloader, model, loss_fn)\n nni.report_intermediate_result(accuracy)\nnni.report_final_result(accuracy)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
\ No newline at end of file
"""
Port PyTorch Quickstart to NNI
==============================
This is a modified version of `PyTorch quickstart`_.
It can be run directly and will have the exact same result as original version.
Furthermore, it enables the ability of auto tuning with an NNI *experiment*, which will be detailed later.
It is recommended to run this script directly first to verify the environment.
There are 2 key differences from the original version:
1. In `Get optimized hyperparameters`_ part, it receives generated hyperparameters.
2. In `Train model and report accuracy`_ part, it reports accuracy metrics to NNI.
.. _PyTorch quickstart: https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html
"""
# %%
import nni
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
# %%
# Hyperparameters to be tuned
# ---------------------------
# These are the hyperparameters that will be tuned.
params = {
'features': 512,
'lr': 0.001,
'momentum': 0,
}
# %%
# Get optimized hyperparameters
# -----------------------------
# If run directly, :func:`nni.get_next_parameter` is a no-op and returns an empty dict.
# But with an NNI *experiment*, it will receive optimized hyperparameters from tuning algorithm.
optimized_params = nni.get_next_parameter()
params.update(optimized_params)
print(params)
# %%
# Load dataset
# ------------
training_data = datasets.FashionMNIST(root="data", train=True, download=True, transform=ToTensor())
test_data = datasets.FashionMNIST(root="data", train=False, download=True, transform=ToTensor())
batch_size = 64
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)
# %%
# Build model with hyperparameters
# --------------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, params['features']),
nn.ReLU(),
nn.Linear(params['features'], params['features']),
nn.ReLU(),
nn.Linear(params['features'], 10)
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
model = NeuralNetwork().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=params['lr'], momentum=params['momentum'])
# %%
# Define train and test
# ---------------------
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
model.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
pred = model(X)
loss = loss_fn(pred, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
def test(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
return correct
# %%
# Train model and report accuracy
# -------------------------------
# Report accuracy metrics to NNI so the tuning algorithm can suggest better hyperparameters.
epochs = 5
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(train_dataloader, model, loss_fn, optimizer)
accuracy = test(test_dataloader, model, loss_fn)
nni.report_intermediate_result(accuracy)
nni.report_final_result(accuracy)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment