Unverified Commit 1a016e3d authored by J-shang's avatar J-shang Committed by GitHub
Browse files

[Bugbash] compression doc quick fix (#4718)

parent 611ed639
...@@ -5,14 +5,12 @@ ...@@ -5,14 +5,12 @@
Computation times Computation times
================= =================
**02:34.670** total execution time for **tutorials** files: **03:10.992** total execution time for **tutorials** files:
+-----------------------------------------------------------------------------------------------------+-----------+--------+ +-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_pruning_quick_start_mnist.py` (``pruning_quick_start_mnist.py``) | 01:26.953 | 0.0 MB | | :ref:`sphx_glr_tutorials_quantization_quick_start_mnist.py` (``quantization_quick_start_mnist.py``) | 01:46.015 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+ +-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_quantization_speedup.py` (``quantization_speedup.py``) | 00:55.231 | 0.0 MB | | :ref:`sphx_glr_tutorials_pruning_quick_start_mnist.py` (``pruning_quick_start_mnist.py``) | 01:24.976 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_pruning_speedup.py` (``pruning_speedup.py``) | 00:12.486 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+ +-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_hello_nas.py` (``hello_nas.py``) | 00:00.000 | 0.0 MB | | :ref:`sphx_glr_tutorials_hello_nas.py` (``hello_nas.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+ +-----------------------------------------------------------------------------------------------------+-----------+--------+
...@@ -20,7 +18,9 @@ Computation times ...@@ -20,7 +18,9 @@ Computation times
+-----------------------------------------------------------------------------------------------------+-----------+--------+ +-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_pruning_customize.py` (``pruning_customize.py``) | 00:00.000 | 0.0 MB | | :ref:`sphx_glr_tutorials_pruning_customize.py` (``pruning_customize.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+ +-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_pruning_speedup.py` (``pruning_speedup.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_quantization_customize.py` (``quantization_customize.py``) | 00:00.000 | 0.0 MB | | :ref:`sphx_glr_tutorials_quantization_customize.py` (``quantization_customize.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+ +-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_quantization_quick_start_mnist.py` (``quantization_quick_start_mnist.py``) | 00:00.000 | 0.0 MB | | :ref:`sphx_glr_tutorials_quantization_speedup.py` (``quantization_speedup.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+ +-----------------------------------------------------------------------------------------------------+-----------+--------+
...@@ -277,7 +277,7 @@ if __name__ == '__main__': ...@@ -277,7 +277,7 @@ if __name__ == '__main__':
parser.add_argument('--sparsity', type=float, default=0.5, parser.add_argument('--sparsity', type=float, default=0.5,
help='target overall target sparsity') help='target overall target sparsity')
parser.add_argument('--dependency-aware', action='store_true', default=False, parser.add_argument('--dependency-aware', action='store_true', default=False,
help='toggle dependency aware mode') help='toggle dependency-aware mode')
# finetuning # finetuning
parser.add_argument('--finetune-epochs', type=int, default=5, parser.add_argument('--finetune-epochs', type=int, default=5,
......
...@@ -356,7 +356,7 @@ if __name__ == '__main__': ...@@ -356,7 +356,7 @@ if __name__ == '__main__':
parser.add_argument('--sparsity', type=float, default=0.5, parser.add_argument('--sparsity', type=float, default=0.5,
help='target overall target sparsity') help='target overall target sparsity')
parser.add_argument('--dependency-aware', action='store_true', default=False, parser.add_argument('--dependency-aware', action='store_true', default=False,
help='toggle dependency aware mode') help='toggle dependency-aware mode')
parser.add_argument('--global-sort', action='store_true', default=False, parser.add_argument('--global-sort', action='store_true', default=False,
help='toggle global sort mode') help='toggle global sort mode')
parser.add_argument('--pruner', type=str, default='l1filter', parser.add_argument('--pruner', type=str, default='l1filter',
......
...@@ -3,21 +3,21 @@ Pruning Quickstart ...@@ -3,21 +3,21 @@ Pruning Quickstart
================== ==================
Model pruning is a technique to reduce the model size and computation by reducing model weight size or intermediate state size. Model pruning is a technique to reduce the model size and computation by reducing model weight size or intermediate state size.
It usually has following paths: There are three common practices for pruning a DNN model:
#. Pre-training a model -> Pruning the model -> Fine-tuning the model #. Pre-training a model -> Pruning the model -> Fine-tuning the pruned model
#. Pruning the model aware training -> Fine-tuning the model #. Pruning a model during training (i.e., pruning aware training) -> Fine-tuning the pruned model
#. Pruning the model -> Pre-training the compact model #. Pruning a model -> Training the pruned model from scratch
NNI supports the above three modes and mainly focuses on the pruning stage. NNI supports all of the above pruning practices by working on the key pruning stage.
Follow this tutorial for a quick look at how to use NNI to prune a model in a common practice. Following this tutorial for a quick look at how to use NNI to prune a model in a common practice.
""" """
# %% # %%
# Preparation # Preparation
# ----------- # -----------
# #
# In this tutorial, we use a simple model and pre-train on MNIST dataset. # In this tutorial, we use a simple model and pre-trained on MNIST dataset.
# If you are familiar with defining a model and training in pytorch, you can skip directly to `Pruning Model`_. # If you are familiar with defining a model and training in pytorch, you can skip directly to `Pruning Model`_.
import torch import torch
...@@ -48,11 +48,11 @@ for epoch in range(3): ...@@ -48,11 +48,11 @@ for epoch in range(3):
# Pruning Model # Pruning Model
# ------------- # -------------
# #
# Using L1NormPruner pruning the model and generating the masks. # Using L1NormPruner to prune the model and generate the masks.
# Usually, pruners require original model and ``config_list`` as parameters. # Usually, a pruner requires original model and ``config_list`` as its inputs.
# Detailed about how to write ``config_list`` please refer :doc:`compression config specification <../compression/compression_config_list>`. # Detailed about how to write ``config_list`` please refer :doc:`compression config specification <../compression/compression_config_list>`.
# #
# This `config_list` means all layers whose type is `Linear` or `Conv2d` will be pruned, # The following `config_list` means all layers whose type is `Linear` or `Conv2d` will be pruned,
# except the layer named `fc3`, because `fc3` is `exclude`. # except the layer named `fc3`, because `fc3` is `exclude`.
# The final sparsity ratio for each layer is 50%. The layer named `fc3` will not be pruned. # The final sparsity ratio for each layer is 50%. The layer named `fc3` will not be pruned.
......
...@@ -65,6 +65,12 @@ from nni.algorithms.compression.pytorch.quantization import QAT_Quantizer ...@@ -65,6 +65,12 @@ from nni.algorithms.compression.pytorch.quantization import QAT_Quantizer
dummy_input = torch.rand(32, 1, 28, 28).to(device) dummy_input = torch.rand(32, 1, 28, 28).to(device)
quantizer = QAT_Quantizer(model, config_list, optimizer, dummy_input) quantizer = QAT_Quantizer(model, config_list, optimizer, dummy_input)
quantizer.compress() quantizer.compress()
# %%
# The model has now been wrapped, and quantization targets ('quant_types' setting in `config_list`)
# will be quantized & dequantized for simulated quantization in the wrapped layers.
# QAT is a training-aware quantizer, it will update scale and zero point during training.
for epoch in range(3): for epoch in range(3):
trainer(model, optimizer, criterion) trainer(model, optimizer, criterion)
evaluator(model) evaluator(model)
......
...@@ -35,7 +35,7 @@ class DependencyAwarePruner(Pruner): ...@@ -35,7 +35,7 @@ class DependencyAwarePruner(Pruner):
if self.dependency_aware: if self.dependency_aware:
if not self._supported_dependency_aware(): if not self._supported_dependency_aware():
raise ValueError('This pruner does not support dependency aware!') raise ValueError('This pruner does not support dependency-aware!')
errmsg = "When dependency_aware is set, the dummy_input should not be None" errmsg = "When dependency_aware is set, the dummy_input should not be None"
assert self.dummy_input is not None, errmsg assert self.dummy_input is not None, errmsg
......
...@@ -70,8 +70,7 @@ class BankSparsityAllocator(SparsityAllocator): ...@@ -70,8 +70,7 @@ class BankSparsityAllocator(SparsityAllocator):
# make up for balance_gran # make up for balance_gran
balance_gran = [1] * (n_dim - len(self.balance_gran)) + self.balance_gran balance_gran = [1] * (n_dim - len(self.balance_gran)) + self.balance_gran
for i, j in zip(metric.shape, balance_gran): for i, j in zip(metric.shape, balance_gran):
assert i % j == 0, 'Length of {} weight is not \ assert i % j == 0, 'Length of {} weight is not aligned with balance granularity'.format(name)
aligned with balance granularity'.format(name)
mask = torch.zeros(metric.shape).type_as(metric) mask = torch.zeros(metric.shape).type_as(metric)
loop_iters = [range(int(i / j)) for i, j in zip(metric.shape, balance_gran)] loop_iters = [range(int(i / j)) for i, j in zip(metric.shape, balance_gran)]
...@@ -160,7 +159,7 @@ class GlobalSparsityAllocator(SparsityAllocator): ...@@ -160,7 +159,7 @@ class GlobalSparsityAllocator(SparsityAllocator):
class Conv2dDependencyAwareAllocator(SparsityAllocator): class Conv2dDependencyAwareAllocator(SparsityAllocator):
""" """
A specify allocator for Conv2d with dependency aware. A specify allocator for Conv2d with dependency-aware.
""" """
def __init__(self, pruner: Pruner, dim: int, dummy_input: Any): def __init__(self, pruner: Pruner, dim: int, dummy_input: Any):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment