Unverified Commit 6e09c2c1 authored by J-shang's avatar J-shang Committed by GitHub
Browse files

[Doc] update compression tutorials (#4646)

parent a4d8a4ea
...@@ -5,7 +5,11 @@ import re ...@@ -5,7 +5,11 @@ import re
cp_list = { cp_list = {
'tutorials/hello_nas.rst': 'tutorials/cp_hello_nas_quickstart.rst' 'tutorials/hello_nas.rst': 'tutorials/cp_hello_nas_quickstart.rst',
'tutorials/pruning_quick_start_mnist.rst': 'tutorials/cp_pruning_quick_start_mnist.rst',
'tutorials/pruning_speed_up.rst': 'tutorials/cp_pruning_speed_up.rst',
'tutorials/quantization_quick_start_mnist.rst': 'tutorials/cp_quantization_quick_start_mnist.rst',
'tutorials/quantization_speed_up.rst': 'tutorials/cp_quantization_speed_up.rst',
} }
HEADER = """.. THIS FILE IS A COPY OF {} WITH MODIFICATIONS. HEADER = """.. THIS FILE IS A COPY OF {} WITH MODIFICATIONS.
......
...@@ -10,12 +10,6 @@ Model Compression with NNI ...@@ -10,12 +10,6 @@ Model Compression with NNI
Config Specification <compression_config_list> Config Specification <compression_config_list>
Advanced Usage <advanced_usage> Advanced Usage <advanced_usage>
.. attention::
NNI's model pruning framework has been upgraded to a more powerful version (named pruning v2 before nni v2.6).
The old version (`named pruning before nni v2.6 <https://nni.readthedocs.io/en/v2.6/Compression/pruning.html>`_) will be out of maintenance. If for some reason you have to use the old pruning,
v2.6 is the last nni version to support old pruning version.
.. Using rubric to prevent the section heading to be include into toc .. Using rubric to prevent the section heading to be include into toc
.. rubric:: Overview .. rubric:: Overview
...@@ -147,4 +141,10 @@ The following figure shows how NNI prunes and speeds up your models. ...@@ -147,4 +141,10 @@ The following figure shows how NNI prunes and speeds up your models.
:alt: :alt:
The detailed tutorial of Speed Up Model with Mask can be found :doc:`here <../tutorials/pruning_speed_up>`. The detailed tutorial of Speed Up Model with Mask can be found :doc:`here <../tutorials/pruning_speed_up>`.
The detailed tutorial of Speed Up Model with Calibration Config can be found :doc:`here <../tutorials/quantization_speed_up>`. The detailed tutorial of Speed Up Model with Calibration Config can be found :doc:`here <../tutorials/quantization_speed_up>`.
\ No newline at end of file
.. attention::
NNI's model pruning framework has been upgraded to a more powerful version (named pruning v2 before nni v2.6).
The old version (`named pruning before nni v2.6 <https://nni.readthedocs.io/en/v2.6/Compression/pruning.html>`_) will be out of maintenance. If for some reason you have to use the old pruning,
v2.6 is the last nni version to support old pruning version.
.. 1d14b9d13cdd660f8e9dcb2abed0b185 .. cacd7e0a78bfacc867ee868c07c1d700
模型压缩 模型压缩
======== ========
......
...@@ -103,6 +103,6 @@ In the dependency-aware mode, the pruner will provide a better speed gain from t ...@@ -103,6 +103,6 @@ In the dependency-aware mode, the pruner will provide a better speed gain from t
:hidden: :hidden:
:maxdepth: 2 :maxdepth: 2
Quickstart <../tutorials/pruning_quick_start_mnist> Quickstart <../tutorials/cp_pruning_quick_start_mnist>
Pruner Reference <pruner> Pruner <pruner>
Speed Up <../tutorials/pruning_speed_up> Speed Up <../tutorials/cp_pruning_speed_up>
...@@ -14,6 +14,6 @@ create your own quantizer using NNI model compression interface. ...@@ -14,6 +14,6 @@ create your own quantizer using NNI model compression interface.
:hidden: :hidden:
:maxdepth: 2 :maxdepth: 2
Quickstart <../tutorials/quantization_quick_start_mnist> Quickstart <../tutorials/cp_quantization_quick_start_mnist>
Quantizer Reference <quantizer> Quantizer <quantizer>
Speed Up <../tutorials/quantization_speed_up> Speed Up <../tutorials/cp_quantization_speed_up>
...@@ -53,14 +53,14 @@ Tutorials ...@@ -53,14 +53,14 @@ Tutorials
.. raw:: html .. raw:: html
<div class="sphx-glr-thumbcontainer" tooltip="Model pruning is a technique to reduce the model size and computation by reducing model weight ..."> <div class="sphx-glr-thumbcontainer" tooltip="Quantization reduces model size and speeds up inference time by reducing the number of bits req...">
.. only:: html .. only:: html
.. figure:: /tutorials/images/thumb/sphx_glr_pruning_quick_start_mnist_thumb.png .. figure:: /tutorials/images/thumb/sphx_glr_quantization_quick_start_mnist_thumb.png
:alt: Pruning Quickstart :alt: Quantization Quickstart
:ref:`sphx_glr_tutorials_pruning_quick_start_mnist.py` :ref:`sphx_glr_tutorials_quantization_quick_start_mnist.py`
.. raw:: html .. raw:: html
...@@ -70,18 +70,18 @@ Tutorials ...@@ -70,18 +70,18 @@ Tutorials
.. toctree:: .. toctree::
:hidden: :hidden:
/tutorials/pruning_quick_start_mnist /tutorials/quantization_quick_start_mnist
.. raw:: html .. raw:: html
<div class="sphx-glr-thumbcontainer" tooltip="Quantization reduces model size and speeds up inference time by reducing the number of bits req..."> <div class="sphx-glr-thumbcontainer" tooltip=" Introduction ------------">
.. only:: html .. only:: html
.. figure:: /tutorials/images/thumb/sphx_glr_quantization_quick_start_mnist_thumb.png .. figure:: /tutorials/images/thumb/sphx_glr_quantization_speed_up_thumb.png
:alt: Quantization Quickstart :alt: Speed Up Model with Calibration Config
:ref:`sphx_glr_tutorials_quantization_quick_start_mnist.py` :ref:`sphx_glr_tutorials_quantization_speed_up.py`
.. raw:: html .. raw:: html
...@@ -91,18 +91,18 @@ Tutorials ...@@ -91,18 +91,18 @@ Tutorials
.. toctree:: .. toctree::
:hidden: :hidden:
/tutorials/quantization_quick_start_mnist /tutorials/quantization_speed_up
.. raw:: html .. raw:: html
<div class="sphx-glr-thumbcontainer" tooltip=" Introduction ------------"> <div class="sphx-glr-thumbcontainer" tooltip="Model pruning is a technique to reduce the model size and computation by reducing model weight ...">
.. only:: html .. only:: html
.. figure:: /tutorials/images/thumb/sphx_glr_quantization_speed_up_thumb.png .. figure:: /tutorials/images/thumb/sphx_glr_pruning_quick_start_mnist_thumb.png
:alt: Speed Up Model with Calibration Config :alt: Pruning Quickstart
:ref:`sphx_glr_tutorials_quantization_speed_up.py` :ref:`sphx_glr_tutorials_pruning_quick_start_mnist.py`
.. raw:: html .. raw:: html
...@@ -112,7 +112,7 @@ Tutorials ...@@ -112,7 +112,7 @@ Tutorials
.. toctree:: .. toctree::
:hidden: :hidden:
/tutorials/quantization_speed_up /tutorials/pruning_quick_start_mnist
.. raw:: html .. raw:: html
......
...@@ -33,7 +33,18 @@ ...@@ -33,7 +33,18 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"import torch\nimport torch.nn.functional as F\nfrom torch.optim import SGD\n\nfrom scripts.compression_mnist_model import TorchModel, trainer, evaluator, device\n\n# define the model\nmodel = TorchModel().to(device)\n\n# define the optimizer and criterion for pre-training\n\noptimizer = SGD(model.parameters(), 1e-2)\ncriterion = F.nll_loss\n\n# pre-train and evaluate the model on MNIST dataset\nfor epoch in range(3):\n trainer(model, optimizer, criterion)\n evaluator(model)" "import torch\nimport torch.nn.functional as F\nfrom torch.optim import SGD\n\nfrom scripts.compression_mnist_model import TorchModel, trainer, evaluator, device\n\n# define the model\nmodel = TorchModel().to(device)\n\n# show the model structure, note that pruner will wrap the model layer.\nprint(model)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# define the optimizer and criterion for pre-training\n\noptimizer = SGD(model.parameters(), 1e-2)\ncriterion = F.nll_loss\n\n# pre-train and evaluate the model on MNIST dataset\nfor epoch in range(3):\n trainer(model, optimizer, criterion)\n evaluator(model)"
] ]
}, },
{ {
...@@ -69,7 +80,18 @@ ...@@ -69,7 +80,18 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from nni.algorithms.compression.v2.pytorch.pruning import L1NormPruner\n\npruner = L1NormPruner(model, config_list)\n# show the wrapped model structure\nprint(model)\n# compress the model and generate the masks\n_, masks = pruner.compress()\n# show the masks sparsity\nfor name, mask in masks.items():\n print(name, ' sparsity: ', '{:.2}'.format(mask['weight'].sum() / mask['weight'].numel()))" "from nni.algorithms.compression.v2.pytorch.pruning import L1NormPruner\npruner = L1NormPruner(model, config_list)\n\n# show the wrapped model structure, `PrunerModuleWrapper` have wrapped the layers that configured in the config_list.\nprint(model)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# compress the model and generate the masks\n_, masks = pruner.compress()\n# show the masks sparsity\nfor name, mask in masks.items():\n print(name, ' sparsity : ', '{:.2}'.format(mask['weight'].sum() / mask['weight'].numel()))"
] ]
}, },
{ {
......
...@@ -29,6 +29,11 @@ from scripts.compression_mnist_model import TorchModel, trainer, evaluator, devi ...@@ -29,6 +29,11 @@ from scripts.compression_mnist_model import TorchModel, trainer, evaluator, devi
# define the model # define the model
model = TorchModel().to(device) model = TorchModel().to(device)
# show the model structure, note that pruner will wrap the model layer.
print(model)
# %%
# define the optimizer and criterion for pre-training # define the optimizer and criterion for pre-training
optimizer = SGD(model.parameters(), 1e-2) optimizer = SGD(model.parameters(), 1e-2)
...@@ -63,15 +68,18 @@ config_list = [{ ...@@ -63,15 +68,18 @@ config_list = [{
# Pruners usually require `model` and `config_list` as input arguments. # Pruners usually require `model` and `config_list` as input arguments.
from nni.algorithms.compression.v2.pytorch.pruning import L1NormPruner from nni.algorithms.compression.v2.pytorch.pruning import L1NormPruner
pruner = L1NormPruner(model, config_list) pruner = L1NormPruner(model, config_list)
# show the wrapped model structure
# show the wrapped model structure, `PrunerModuleWrapper` have wrapped the layers that configured in the config_list.
print(model) print(model)
# %%
# compress the model and generate the masks # compress the model and generate the masks
_, masks = pruner.compress() _, masks = pruner.compress()
# show the masks sparsity # show the masks sparsity
for name, mask in masks.items(): for name, mask in masks.items():
print(name, ' sparsity: ', '{:.2}'.format(mask['weight'].sum() / mask['weight'].numel())) print(name, ' sparsity : ', '{:.2}'.format(mask['weight'].sum() / mask['weight'].numel()))
# %% # %%
# Speed up the original model with masks, note that `ModelSpeedup` requires an unwrapped model. # Speed up the original model with masks, note that `ModelSpeedup` requires an unwrapped model.
......
775624e7a28ae5c6eb2027eace7fff67 c87607b7befe8496829a8cb5a8632019
\ No newline at end of file \ No newline at end of file
...@@ -39,7 +39,7 @@ Preparation ...@@ -39,7 +39,7 @@ Preparation
In this tutorial, we use a simple model and pre-train on MNIST dataset. In this tutorial, we use a simple model and pre-train on MNIST dataset.
If you are familiar with defining a model and training in pytorch, you can skip directly to `Pruning Model`_. If you are familiar with defining a model and training in pytorch, you can skip directly to `Pruning Model`_.
.. GENERATED FROM PYTHON SOURCE LINES 22-42 .. GENERATED FROM PYTHON SOURCE LINES 22-35
.. code-block:: default .. code-block:: default
...@@ -53,6 +53,35 @@ If you are familiar with defining a model and training in pytorch, you can skip ...@@ -53,6 +53,35 @@ If you are familiar with defining a model and training in pytorch, you can skip
# define the model # define the model
model = TorchModel().to(device) model = TorchModel().to(device)
# show the model structure, note that pruner will wrap the model layer.
print(model)
.. rst-class:: sphx-glr-script-out
Out:
.. code-block:: none
TorchModel(
(conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
(conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
(fc1): Linear(in_features=256, out_features=120, bias=True)
(fc2): Linear(in_features=120, out_features=84, bias=True)
(fc3): Linear(in_features=84, out_features=10, bias=True)
)
.. GENERATED FROM PYTHON SOURCE LINES 36-47
.. code-block:: default
# define the optimizer and criterion for pre-training # define the optimizer and criterion for pre-training
optimizer = SGD(model.parameters(), 1e-2) optimizer = SGD(model.parameters(), 1e-2)
...@@ -73,14 +102,14 @@ If you are familiar with defining a model and training in pytorch, you can skip ...@@ -73,14 +102,14 @@ If you are familiar with defining a model and training in pytorch, you can skip
.. code-block:: none .. code-block:: none
Average test loss: 1.8381, Accuracy: 5939/10000 (59%) Average test loss: 0.5603, Accuracy: 8270/10000 (83%)
Average test loss: 0.3143, Accuracy: 9045/10000 (90%) Average test loss: 0.2395, Accuracy: 9289/10000 (93%)
Average test loss: 0.1928, Accuracy: 9387/10000 (94%) Average test loss: 0.1660, Accuracy: 9527/10000 (95%)
.. GENERATED FROM PYTHON SOURCE LINES 43-53 .. GENERATED FROM PYTHON SOURCE LINES 48-58
Pruning Model Pruning Model
------------- -------------
...@@ -93,7 +122,7 @@ This `config_list` means all layers whose type is `Linear` or `Conv2d` will be p ...@@ -93,7 +122,7 @@ This `config_list` means all layers whose type is `Linear` or `Conv2d` will be p
except the layer named `fc3`, because `fc3` is `exclude`. except the layer named `fc3`, because `fc3` is `exclude`.
The final sparsity ratio for each layer is 50%. The layer named `fc3` will not be pruned. The final sparsity ratio for each layer is 50%. The layer named `fc3` will not be pruned.
.. GENERATED FROM PYTHON SOURCE LINES 53-62 .. GENERATED FROM PYTHON SOURCE LINES 58-67
.. code-block:: default .. code-block:: default
...@@ -113,25 +142,20 @@ The final sparsity ratio for each layer is 50%. The layer named `fc3` will not b ...@@ -113,25 +142,20 @@ The final sparsity ratio for each layer is 50%. The layer named `fc3` will not b
.. GENERATED FROM PYTHON SOURCE LINES 63-64 .. GENERATED FROM PYTHON SOURCE LINES 68-69
Pruners usually require `model` and `config_list` as input arguments. Pruners usually require `model` and `config_list` as input arguments.
.. GENERATED FROM PYTHON SOURCE LINES 64-76 .. GENERATED FROM PYTHON SOURCE LINES 69-76
.. code-block:: default .. code-block:: default
from nni.algorithms.compression.v2.pytorch.pruning import L1NormPruner from nni.algorithms.compression.v2.pytorch.pruning import L1NormPruner
pruner = L1NormPruner(model, config_list) pruner = L1NormPruner(model, config_list)
# show the wrapped model structure
# show the wrapped model structure, `PrunerModuleWrapper` have wrapped the layers that configured in the config_list.
print(model) print(model)
# compress the model and generate the masks
_, masks = pruner.compress()
# show the masks sparsity
for name, mask in masks.items():
print(name, ' sparsity: ', '{:.2}'.format(mask['weight'].sum() / mask['weight'].numel()))
...@@ -158,21 +182,46 @@ Pruners usually require `model` and `config_list` as input arguments. ...@@ -158,21 +182,46 @@ Pruners usually require `model` and `config_list` as input arguments.
) )
(fc3): Linear(in_features=84, out_features=10, bias=True) (fc3): Linear(in_features=84, out_features=10, bias=True)
) )
conv1 sparsity: 0.5
conv2 sparsity: 0.5
fc1 sparsity: 0.5
fc2 sparsity: 0.5
.. GENERATED FROM PYTHON SOURCE LINES 77-80 .. GENERATED FROM PYTHON SOURCE LINES 77-84
.. code-block:: default
# compress the model and generate the masks
_, masks = pruner.compress()
# show the masks sparsity
for name, mask in masks.items():
print(name, ' sparsity : ', '{:.2}'.format(mask['weight'].sum() / mask['weight'].numel()))
.. rst-class:: sphx-glr-script-out
Out:
.. code-block:: none
conv1 sparsity : 0.5
conv2 sparsity : 0.5
fc1 sparsity : 0.5
fc2 sparsity : 0.5
.. GENERATED FROM PYTHON SOURCE LINES 85-88
Speed up the original model with masks, note that `ModelSpeedup` requires an unwrapped model. Speed up the original model with masks, note that `ModelSpeedup` requires an unwrapped model.
The model becomes smaller after speed-up, The model becomes smaller after speed-up,
and reaches a higher sparsity ratio because `ModelSpeedup` will propagate the masks across layers. and reaches a higher sparsity ratio because `ModelSpeedup` will propagate the masks across layers.
.. GENERATED FROM PYTHON SOURCE LINES 80-89 .. GENERATED FROM PYTHON SOURCE LINES 88-97
.. code-block:: default .. code-block:: default
...@@ -195,24 +244,19 @@ and reaches a higher sparsity ratio because `ModelSpeedup` will propagate the ma ...@@ -195,24 +244,19 @@ and reaches a higher sparsity ratio because `ModelSpeedup` will propagate the ma
.. code-block:: none .. code-block:: none
/home/ningshang/nni/nni/compression/pytorch/utils/mask_conflict.py:124: UserWarning: This overload of nonzero is deprecated: aten::log_softmax is not Supported! Please report an issue at https://github.com/microsoft/nni. Thanks~
nonzero() Note: .aten::log_softmax.12 does not have corresponding mask inference object
Consider using one of the following signatures instead: /home/ningshang/anaconda3/envs/nni-dev/lib/python3.8/site-packages/torch/_tensor.py:1013: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:417.)
nonzero(*, bool as_tuple) (Triggered internally at /pytorch/torch/csrc/utils/python_arg_parser.cpp:766.) return self._grad
all_ones = (w_mask.flatten(1).sum(-1) == count).nonzero().squeeze(1).tolist()
/home/ningshang/nni/nni/compression/pytorch/speedup/infer_mask.py:262: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the gradient for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations.
if isinstance(self.output, torch.Tensor) and self.output.grad is not None:
/home/ningshang/nni/nni/compression/pytorch/speedup/compressor.py:282: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the gradient for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations.
if last_output.grad is not None and tin.grad is not None:
.. GENERATED FROM PYTHON SOURCE LINES 90-91 .. GENERATED FROM PYTHON SOURCE LINES 98-99
the model will become real smaller after speed up the model will become real smaller after speed up
.. GENERATED FROM PYTHON SOURCE LINES 91-93 .. GENERATED FROM PYTHON SOURCE LINES 99-101
.. code-block:: default .. code-block:: default
...@@ -239,14 +283,14 @@ the model will become real smaller after speed up ...@@ -239,14 +283,14 @@ the model will become real smaller after speed up
.. GENERATED FROM PYTHON SOURCE LINES 94-98 .. GENERATED FROM PYTHON SOURCE LINES 102-106
Fine-tuning Compacted Model Fine-tuning Compacted Model
--------------------------- ---------------------------
Note that if the model has been sped up, you need to re-initialize a new optimizer for fine-tuning. Note that if the model has been sped up, you need to re-initialize a new optimizer for fine-tuning.
Because speed up will replace the masked big layers with dense small ones. Because speed up will replace the masked big layers with dense small ones.
.. GENERATED FROM PYTHON SOURCE LINES 98-102 .. GENERATED FROM PYTHON SOURCE LINES 106-110
.. code-block:: default .. code-block:: default
...@@ -264,7 +308,7 @@ Because speed up will replace the masked big layers with dense small ones. ...@@ -264,7 +308,7 @@ Because speed up will replace the masked big layers with dense small ones.
.. rst-class:: sphx-glr-timing .. rst-class:: sphx-glr-timing
**Total running time of the script:** ( 1 minutes 15.845 seconds) **Total running time of the script:** ( 1 minutes 38.705 seconds)
.. _sphx_glr_download_tutorials_pruning_quick_start_mnist.py: .. _sphx_glr_download_tutorials_pruning_quick_start_mnist.py:
......
...@@ -112,7 +112,7 @@ ...@@ -112,7 +112,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"For combining usage of ``Pruner`` masks generation with ``ModelSpeedup``,\nplease refer to `Pruning Quick Start <./pruning_quick_start_mnist.html>`__.\n\nNOTE: The current implementation supports PyTorch 1.3.1 or newer.\n\n## Limitations\n\nFor PyTorch we can only replace modules, if functions in ``forward`` should be replaced,\nour current implementation does not work. One workaround is make the function a PyTorch module.\n\nIf you want to speed up your own model which cannot supported by the current implementation,\nyou need implement the replace function for module replacement, welcome to contribute.\n\n## Speedup Results of Examples\n\nThe code of these experiments can be found :githublink:`here <examples/model_compress/pruning/speedup/model_speedup.py>`.\n\nThese result are tested on the `legacy pruning framework <../comporession/pruning_legacy>`__, new results will coming soon.\n\n### slim pruner example\n\non one V100 GPU,\ninput tensor: ``torch.randn(64, 3, 32, 32)``\n\n.. list-table::\n :header-rows: 1\n :widths: auto\n\n * - Times\n - Mask Latency\n - Speedup Latency\n * - 1\n - 0.01197\n - 0.005107\n * - 2\n - 0.02019\n - 0.008769\n * - 4\n - 0.02733\n - 0.014809\n * - 8\n - 0.04310\n - 0.027441\n * - 16\n - 0.07731\n - 0.05008\n * - 32\n - 0.14464\n - 0.10027\n\n### fpgm pruner example\n\non cpu,\ninput tensor: ``torch.randn(64, 1, 28, 28)``\\ ,\ntoo large variance\n\n.. list-table::\n :header-rows: 1\n :widths: auto\n\n * - Times\n - Mask Latency\n - Speedup Latency\n * - 1\n - 0.01383\n - 0.01839\n * - 2\n - 0.01167\n - 0.003558\n * - 4\n - 0.01636\n - 0.01088\n * - 40\n - 0.14412\n - 0.08268\n * - 40\n - 1.29385\n - 0.14408\n * - 40\n - 0.41035\n - 0.46162\n * - 400\n - 6.29020\n - 5.82143\n\n### l1filter pruner example\n\non one V100 GPU,\ninput tensor: ``torch.randn(64, 3, 32, 32)``\n\n.. list-table::\n :header-rows: 1\n :widths: auto\n\n * - Times\n - Mask Latency\n - Speedup Latency\n * - 1\n - 0.01026\n - 0.003677\n * - 2\n - 0.01657\n - 0.008161\n * - 4\n - 0.02458\n - 0.020018\n * - 8\n - 0.03498\n - 0.025504\n * - 16\n - 0.06757\n - 0.047523\n * - 32\n - 0.10487\n - 0.086442\n\n### APoZ pruner example\n\non one V100 GPU,\ninput tensor: ``torch.randn(64, 3, 32, 32)``\n\n.. list-table::\n :header-rows: 1\n :widths: auto\n\n * - Times\n - Mask Latency\n - Speedup Latency\n * - 1\n - 0.01389\n - 0.004208\n * - 2\n - 0.01628\n - 0.008310\n * - 4\n - 0.02521\n - 0.014008\n * - 8\n - 0.03386\n - 0.023923\n * - 16\n - 0.06042\n - 0.046183\n * - 32\n - 0.12421\n - 0.087113\n\n### SimulatedAnnealing pruner example\n\nIn this experiment, we use SimulatedAnnealing pruner to prune the resnet18 on the cifar10 dataset.\nWe measure the latencies and accuracies of the pruned model under different sparsity ratios, as shown in the following figure.\nThe latency is measured on one V100 GPU and the input tensor is ``torch.randn(128, 3, 32, 32)``.\n\n<img src=\"file://../../img/SA_latency_accuracy.png\">\n\n### User configuration for ModelSpeedup\n\n**PyTorch**\n\n.. autoclass:: nni.compression.pytorch.ModelSpeedup\n\n" "For combining usage of ``Pruner`` masks generation with ``ModelSpeedup``,\nplease refer to `Pruning Quick Start <./pruning_quick_start_mnist.html>`__.\n\nNOTE: The current implementation supports PyTorch 1.3.1 or newer.\n\n## Limitations\n\nFor PyTorch we can only replace modules, if functions in ``forward`` should be replaced,\nour current implementation does not work. One workaround is make the function a PyTorch module.\n\nIf you want to speed up your own model which cannot supported by the current implementation,\nyou need implement the replace function for module replacement, welcome to contribute.\n\n## Speedup Results of Examples\n\nThe code of these experiments can be found :githublink:`here <examples/model_compress/pruning/speedup/model_speedup.py>`.\n\nThese result are tested on the `legacy pruning framework <../comporession/pruning_legacy>`__, new results will coming soon.\n\n### slim pruner example\n\non one V100 GPU,\ninput tensor: ``torch.randn(64, 3, 32, 32)``\n\n.. list-table::\n :header-rows: 1\n :widths: auto\n\n * - Times\n - Mask Latency\n - Speedup Latency\n * - 1\n - 0.01197\n - 0.005107\n * - 2\n - 0.02019\n - 0.008769\n * - 4\n - 0.02733\n - 0.014809\n * - 8\n - 0.04310\n - 0.027441\n * - 16\n - 0.07731\n - 0.05008\n * - 32\n - 0.14464\n - 0.10027\n\n### fpgm pruner example\n\non cpu,\ninput tensor: ``torch.randn(64, 1, 28, 28)``\\ ,\ntoo large variance\n\n.. list-table::\n :header-rows: 1\n :widths: auto\n\n * - Times\n - Mask Latency\n - Speedup Latency\n * - 1\n - 0.01383\n - 0.01839\n * - 2\n - 0.01167\n - 0.003558\n * - 4\n - 0.01636\n - 0.01088\n * - 40\n - 0.14412\n - 0.08268\n * - 40\n - 1.29385\n - 0.14408\n * - 40\n - 0.41035\n - 0.46162\n * - 400\n - 6.29020\n - 5.82143\n\n### l1filter pruner example\n\non one V100 GPU,\ninput tensor: ``torch.randn(64, 3, 32, 32)``\n\n.. list-table::\n :header-rows: 1\n :widths: auto\n\n * - Times\n - Mask Latency\n - Speedup Latency\n * - 1\n - 0.01026\n - 0.003677\n * - 2\n - 0.01657\n - 0.008161\n * - 4\n - 0.02458\n - 0.020018\n * - 8\n - 0.03498\n - 0.025504\n * - 16\n - 0.06757\n - 0.047523\n * - 32\n - 0.10487\n - 0.086442\n\n### APoZ pruner example\n\non one V100 GPU,\ninput tensor: ``torch.randn(64, 3, 32, 32)``\n\n.. list-table::\n :header-rows: 1\n :widths: auto\n\n * - Times\n - Mask Latency\n - Speedup Latency\n * - 1\n - 0.01389\n - 0.004208\n * - 2\n - 0.01628\n - 0.008310\n * - 4\n - 0.02521\n - 0.014008\n * - 8\n - 0.03386\n - 0.023923\n * - 16\n - 0.06042\n - 0.046183\n * - 32\n - 0.12421\n - 0.087113\n\n### SimulatedAnnealing pruner example\n\nIn this experiment, we use SimulatedAnnealing pruner to prune the resnet18 on the cifar10 dataset.\nWe measure the latencies and accuracies of the pruned model under different sparsity ratios, as shown in the following figure.\nThe latency is measured on one V100 GPU and the input tensor is ``torch.randn(128, 3, 32, 32)``.\n\n<img src=\"file://../../img/SA_latency_accuracy.png\">\n\n"
] ]
} }
], ],
......
...@@ -237,10 +237,3 @@ print('Speedup Model - Elapsed Time : ', time.time() - start) ...@@ -237,10 +237,3 @@ print('Speedup Model - Elapsed Time : ', time.time() - start)
# The latency is measured on one V100 GPU and the input tensor is ``torch.randn(128, 3, 32, 32)``. # The latency is measured on one V100 GPU and the input tensor is ``torch.randn(128, 3, 32, 32)``.
# #
# .. image:: ../../img/SA_latency_accuracy.png # .. image:: ../../img/SA_latency_accuracy.png
#
# User configuration for ModelSpeedup
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# **PyTorch**
#
# .. autoclass:: nni.compression.pytorch.ModelSpeedup
5bcdee7241d8daf931bd76f435167a58 e7a923e9f98f16e2eb4f3c29c2940f49
\ No newline at end of file \ No newline at end of file
...@@ -136,7 +136,7 @@ Roughly test the original model inference speed. ...@@ -136,7 +136,7 @@ Roughly test the original model inference speed.
.. code-block:: none .. code-block:: none
Original Model - Elapsed Time : 0.035959720611572266 Original Model - Elapsed Time : 0.10696005821228027
...@@ -163,60 +163,10 @@ Speed up the model and show the model structure after speed up. ...@@ -163,60 +163,10 @@ Speed up the model and show the model structure after speed up.
.. code-block:: none .. code-block:: none
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) start to speed up the model aten::log_softmax is not Supported! Please report an issue at https://github.com/microsoft/nni. Thanks~
[2022-02-28 13:29:56] INFO (FixMaskConflict/MainThread) {'conv1': 1, 'conv2': 1} Note: .aten::log_softmax.12 does not have corresponding mask inference object
[2022-02-28 13:29:56] INFO (FixMaskConflict/MainThread) dim0 sparsity: 0.500000
[2022-02-28 13:29:56] INFO (FixMaskConflict/MainThread) dim1 sparsity: 0.000000
[2022-02-28 13:29:56] INFO (FixMaskConflict/MainThread) Dectected conv prune dim" 0
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) infer module masks...
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update mask for conv1
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update mask for .aten::relu.5
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update mask for .aten::max_pool2d.6
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update mask for conv2
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update mask for .aten::relu.7
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update mask for .aten::max_pool2d.8
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update mask for .aten::flatten.9
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update mask for fc1
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update mask for .aten::relu.10
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update mask for fc2
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update mask for .aten::relu.11
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update mask for fc3
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update mask for .aten::log_softmax.12
[2022-02-28 13:29:56] ERROR (nni.compression.pytorch.speedup.jit_translate/MainThread) aten::log_softmax is not Supported! Please report an issue at https://github.com/microsoft/nni. Thanks~
[2022-02-28 13:29:56] WARNING (nni.compression.pytorch.speedup.compressor/MainThread) Note: .aten::log_softmax.12 does not have corresponding mask inference object
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update the indirect sparsity for the fc3
/home/ningshang/anaconda3/envs/nni-dev/lib/python3.8/site-packages/torch/_tensor.py:1013: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:417.) /home/ningshang/anaconda3/envs/nni-dev/lib/python3.8/site-packages/torch/_tensor.py:1013: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:417.)
return self._grad return self._grad
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update the indirect sparsity for the .aten::relu.11
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update the indirect sparsity for the fc2
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update the indirect sparsity for the .aten::relu.10
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update the indirect sparsity for the fc1
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update the indirect sparsity for the .aten::flatten.9
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update the indirect sparsity for the .aten::max_pool2d.8
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update the indirect sparsity for the .aten::relu.7
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update the indirect sparsity for the conv2
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update the indirect sparsity for the .aten::max_pool2d.6
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update the indirect sparsity for the .aten::relu.5
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update the indirect sparsity for the conv1
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) resolve the mask conflict
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) replace compressed modules...
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) replace module (name: conv1, op_type: Conv2d)
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Warning: cannot replace (name: .aten::relu.5, op_type: aten::relu) which is func type
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Warning: cannot replace (name: .aten::max_pool2d.6, op_type: aten::max_pool2d) which is func type
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) replace module (name: conv2, op_type: Conv2d)
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Warning: cannot replace (name: .aten::relu.7, op_type: aten::relu) which is func type
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Warning: cannot replace (name: .aten::max_pool2d.8, op_type: aten::max_pool2d) which is func type
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Warning: cannot replace (name: .aten::flatten.9, op_type: aten::flatten) which is func type
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) replace module (name: fc1, op_type: Linear)
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compress_modules/MainThread) replace linear with new in_features: 256, out_features: 120
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Warning: cannot replace (name: .aten::relu.10, op_type: aten::relu) which is func type
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) replace module (name: fc2, op_type: Linear)
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compress_modules/MainThread) replace linear with new in_features: 120, out_features: 84
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Warning: cannot replace (name: .aten::relu.11, op_type: aten::relu) which is func type
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) replace module (name: fc3, op_type: Linear)
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compress_modules/MainThread) replace linear with new in_features: 84, out_features: 10
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Warning: cannot replace (name: .aten::log_softmax.12, op_type: aten::log_softmax) which is func type
[2022-02-28 13:29:56] INFO (nni.compression.pytorch.speedup.compressor/MainThread) speedup done
TorchModel( TorchModel(
(conv1): Conv2d(1, 3, kernel_size=(5, 5), stride=(1, 1)) (conv1): Conv2d(1, 3, kernel_size=(5, 5), stride=(1, 1))
(conv2): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1)) (conv2): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1))
...@@ -250,12 +200,12 @@ Roughly test the model after speed-up inference speed. ...@@ -250,12 +200,12 @@ Roughly test the model after speed-up inference speed.
.. code-block:: none .. code-block:: none
Speedup Model - Elapsed Time : 0.003432035446166992 Speedup Model - Elapsed Time : 0.002137899398803711
.. GENERATED FROM PYTHON SOURCE LINES 79-247 .. GENERATED FROM PYTHON SOURCE LINES 79-240
For combining usage of ``Pruner`` masks generation with ``ModelSpeedup``, For combining usage of ``Pruner`` masks generation with ``ModelSpeedup``,
please refer to `Pruning Quick Start <./pruning_quick_start_mnist.html>`__. please refer to `Pruning Quick Start <./pruning_quick_start_mnist.html>`__.
...@@ -419,17 +369,10 @@ The latency is measured on one V100 GPU and the input tensor is ``torch.randn(1 ...@@ -419,17 +369,10 @@ The latency is measured on one V100 GPU and the input tensor is ``torch.randn(1
.. image:: ../../img/SA_latency_accuracy.png .. image:: ../../img/SA_latency_accuracy.png
User configuration for ModelSpeedup
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**PyTorch**
.. autoclass:: nni.compression.pytorch.ModelSpeedup
.. rst-class:: sphx-glr-timing .. rst-class:: sphx-glr-timing
**Total running time of the script:** ( 0 minutes 8.409 seconds) **Total running time of the script:** ( 0 minutes 9.859 seconds)
.. _sphx_glr_download_tutorials_pruning_speed_up.py: .. _sphx_glr_download_tutorials_pruning_speed_up.py:
......
...@@ -5,10 +5,12 @@ ...@@ -5,10 +5,12 @@
Computation times Computation times
================= =================
**00:01.175** total execution time for **tutorials** files: **01:48.564** total execution time for **tutorials** files:
+-----------------------------------------------------------------------------------------------------+-----------+--------+ +-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_pruning_customize.py` (``pruning_customize.py``) | 00:01.175 | 0.0 MB | | :ref:`sphx_glr_tutorials_pruning_quick_start_mnist.py` (``pruning_quick_start_mnist.py``) | 01:38.705 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_pruning_speed_up.py` (``pruning_speed_up.py``) | 00:09.859 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+ +-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_hello_nas.py` (``hello_nas.py``) | 00:00.000 | 0.0 MB | | :ref:`sphx_glr_tutorials_hello_nas.py` (``hello_nas.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+ +-----------------------------------------------------------------------------------------------------+-----------+--------+
...@@ -16,9 +18,7 @@ Computation times ...@@ -16,9 +18,7 @@ Computation times
+-----------------------------------------------------------------------------------------------------+-----------+--------+ +-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_nni_experiment.py` (``nni_experiment.py``) | 00:00.000 | 0.0 MB | | :ref:`sphx_glr_tutorials_nni_experiment.py` (``nni_experiment.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+ +-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_pruning_quick_start_mnist.py` (``pruning_quick_start_mnist.py``) | 00:00.000 | 0.0 MB | | :ref:`sphx_glr_tutorials_pruning_customize.py` (``pruning_customize.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_pruning_speed_up.py` (``pruning_speed_up.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+ +-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_quantization_customize.py` (``quantization_customize.py``) | 00:00.000 | 0.0 MB | | :ref:`sphx_glr_tutorials_quantization_customize.py` (``quantization_customize.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+ +-----------------------------------------------------------------------------------------------------+-----------+--------+
......
...@@ -29,6 +29,11 @@ from scripts.compression_mnist_model import TorchModel, trainer, evaluator, devi ...@@ -29,6 +29,11 @@ from scripts.compression_mnist_model import TorchModel, trainer, evaluator, devi
# define the model # define the model
model = TorchModel().to(device) model = TorchModel().to(device)
# show the model structure, note that pruner will wrap the model layer.
print(model)
# %%
# define the optimizer and criterion for pre-training # define the optimizer and criterion for pre-training
optimizer = SGD(model.parameters(), 1e-2) optimizer = SGD(model.parameters(), 1e-2)
...@@ -63,15 +68,18 @@ config_list = [{ ...@@ -63,15 +68,18 @@ config_list = [{
# Pruners usually require `model` and `config_list` as input arguments. # Pruners usually require `model` and `config_list` as input arguments.
from nni.algorithms.compression.v2.pytorch.pruning import L1NormPruner from nni.algorithms.compression.v2.pytorch.pruning import L1NormPruner
pruner = L1NormPruner(model, config_list) pruner = L1NormPruner(model, config_list)
# show the wrapped model structure
# show the wrapped model structure, `PrunerModuleWrapper` have wrapped the layers that configured in the config_list.
print(model) print(model)
# %%
# compress the model and generate the masks # compress the model and generate the masks
_, masks = pruner.compress() _, masks = pruner.compress()
# show the masks sparsity # show the masks sparsity
for name, mask in masks.items(): for name, mask in masks.items():
print(name, ' sparsity: ', '{:.2}'.format(mask['weight'].sum() / mask['weight'].numel())) print(name, ' sparsity : ', '{:.2}'.format(mask['weight'].sum() / mask['weight'].numel()))
# %% # %%
# Speed up the original model with masks, note that `ModelSpeedup` requires an unwrapped model. # Speed up the original model with masks, note that `ModelSpeedup` requires an unwrapped model.
......
...@@ -237,10 +237,3 @@ print('Speedup Model - Elapsed Time : ', time.time() - start) ...@@ -237,10 +237,3 @@ print('Speedup Model - Elapsed Time : ', time.time() - start)
# The latency is measured on one V100 GPU and the input tensor is ``torch.randn(128, 3, 32, 32)``. # The latency is measured on one V100 GPU and the input tensor is ``torch.randn(128, 3, 32, 32)``.
# #
# .. image:: ../../img/SA_latency_accuracy.png # .. image:: ../../img/SA_latency_accuracy.png
#
# User configuration for ModelSpeedup
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# **PyTorch**
#
# .. autoclass:: nni.compression.pytorch.ModelSpeedup
...@@ -25,7 +25,7 @@ class BNNQuantizer(Quantizer): ...@@ -25,7 +25,7 @@ class BNNQuantizer(Quantizer):
r""" r"""
Binarized Neural Networks, as defined in: Binarized Neural Networks, as defined in:
`Binarized Neural Networks: Training Deep Neural Networks with Weights and `Binarized Neural Networks: Training Deep Neural Networks with Weights and
Activations Constrained to +1 or -1 <https://arxiv.org/abs/1602.02830>`__\ , Activations Constrained to +1 or -1 <https://arxiv.org/abs/1602.02830>`__,
.. ..
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment