Unverified Commit 1a016e3d authored by J-shang's avatar J-shang Committed by GitHub
Browse files

[Bugbash] compression doc quick fix (#4718)

parent 611ed639
......@@ -5,14 +5,12 @@
Computation times
=================
**02:34.670** total execution time for **tutorials** files:
**03:10.992** total execution time for **tutorials** files:
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_pruning_quick_start_mnist.py` (``pruning_quick_start_mnist.py``) | 01:26.953 | 0.0 MB |
| :ref:`sphx_glr_tutorials_quantization_quick_start_mnist.py` (``quantization_quick_start_mnist.py``) | 01:46.015 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_quantization_speedup.py` (``quantization_speedup.py``) | 00:55.231 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_pruning_speedup.py` (``pruning_speedup.py``) | 00:12.486 | 0.0 MB |
| :ref:`sphx_glr_tutorials_pruning_quick_start_mnist.py` (``pruning_quick_start_mnist.py``) | 01:24.976 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_hello_nas.py` (``hello_nas.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
......@@ -20,7 +18,9 @@ Computation times
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_pruning_customize.py` (``pruning_customize.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_pruning_speedup.py` (``pruning_speedup.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_quantization_customize.py` (``quantization_customize.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
| :ref:`sphx_glr_tutorials_quantization_quick_start_mnist.py` (``quantization_quick_start_mnist.py``) | 00:00.000 | 0.0 MB |
| :ref:`sphx_glr_tutorials_quantization_speedup.py` (``quantization_speedup.py``) | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
......@@ -277,7 +277,7 @@ if __name__ == '__main__':
parser.add_argument('--sparsity', type=float, default=0.5,
help='target overall target sparsity')
parser.add_argument('--dependency-aware', action='store_true', default=False,
help='toggle dependency aware mode')
help='toggle dependency-aware mode')
# finetuning
parser.add_argument('--finetune-epochs', type=int, default=5,
......
......@@ -356,7 +356,7 @@ if __name__ == '__main__':
parser.add_argument('--sparsity', type=float, default=0.5,
help='target overall target sparsity')
parser.add_argument('--dependency-aware', action='store_true', default=False,
help='toggle dependency aware mode')
help='toggle dependency-aware mode')
parser.add_argument('--global-sort', action='store_true', default=False,
help='toggle global sort mode')
parser.add_argument('--pruner', type=str, default='l1filter',
......
......@@ -3,21 +3,21 @@ Pruning Quickstart
==================
Model pruning is a technique to reduce the model size and computation by reducing model weight size or intermediate state size.
It usually has following paths:
There are three common practices for pruning a DNN model:
#. Pre-training a model -> Pruning the model -> Fine-tuning the model
#. Pruning the model aware training -> Fine-tuning the model
#. Pruning the model -> Pre-training the compact model
#. Pre-training a model -> Pruning the model -> Fine-tuning the pruned model
#. Pruning a model during training (i.e., pruning aware training) -> Fine-tuning the pruned model
#. Pruning a model -> Training the pruned model from scratch
NNI supports the above three modes and mainly focuses on the pruning stage.
Follow this tutorial for a quick look at how to use NNI to prune a model in a common practice.
NNI supports all of the above pruning practices by working on the key pruning stage.
Following this tutorial for a quick look at how to use NNI to prune a model in a common practice.
"""
# %%
# Preparation
# -----------
#
# In this tutorial, we use a simple model and pre-train on MNIST dataset.
# In this tutorial, we use a simple model and pre-trained on MNIST dataset.
# If you are familiar with defining a model and training in pytorch, you can skip directly to `Pruning Model`_.
import torch
......@@ -48,11 +48,11 @@ for epoch in range(3):
# Pruning Model
# -------------
#
# Using L1NormPruner pruning the model and generating the masks.
# Usually, pruners require original model and ``config_list`` as parameters.
# Using L1NormPruner to prune the model and generate the masks.
# Usually, a pruner requires original model and ``config_list`` as its inputs.
# Detailed about how to write ``config_list`` please refer :doc:`compression config specification <../compression/compression_config_list>`.
#
# This `config_list` means all layers whose type is `Linear` or `Conv2d` will be pruned,
# The following `config_list` means all layers whose type is `Linear` or `Conv2d` will be pruned,
# except the layer named `fc3`, because `fc3` is `exclude`.
# The final sparsity ratio for each layer is 50%. The layer named `fc3` will not be pruned.
......
......@@ -65,6 +65,12 @@ from nni.algorithms.compression.pytorch.quantization import QAT_Quantizer
dummy_input = torch.rand(32, 1, 28, 28).to(device)
quantizer = QAT_Quantizer(model, config_list, optimizer, dummy_input)
quantizer.compress()
# %%
# The model has now been wrapped, and quantization targets ('quant_types' setting in `config_list`)
# will be quantized & dequantized for simulated quantization in the wrapped layers.
# QAT is a training-aware quantizer, it will update scale and zero point during training.
for epoch in range(3):
trainer(model, optimizer, criterion)
evaluator(model)
......
......@@ -35,7 +35,7 @@ class DependencyAwarePruner(Pruner):
if self.dependency_aware:
if not self._supported_dependency_aware():
raise ValueError('This pruner does not support dependency aware!')
raise ValueError('This pruner does not support dependency-aware!')
errmsg = "When dependency_aware is set, the dummy_input should not be None"
assert self.dummy_input is not None, errmsg
......
......@@ -70,8 +70,7 @@ class BankSparsityAllocator(SparsityAllocator):
# make up for balance_gran
balance_gran = [1] * (n_dim - len(self.balance_gran)) + self.balance_gran
for i, j in zip(metric.shape, balance_gran):
assert i % j == 0, 'Length of {} weight is not \
aligned with balance granularity'.format(name)
assert i % j == 0, 'Length of {} weight is not aligned with balance granularity'.format(name)
mask = torch.zeros(metric.shape).type_as(metric)
loop_iters = [range(int(i / j)) for i, j in zip(metric.shape, balance_gran)]
......@@ -160,7 +159,7 @@ class GlobalSparsityAllocator(SparsityAllocator):
class Conv2dDependencyAwareAllocator(SparsityAllocator):
"""
A specify allocator for Conv2d with dependency aware.
A specify allocator for Conv2d with dependency-aware.
"""
def __init__(self, pruner: Pruner, dim: int, dummy_input: Any):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment