[Bugbash] compression doc quick fix (#4718)

1a016e3d · J-shang · GitHub · 611ed639 · 1a016e3d · 1a016e3d
Unverified Commit 1a016e3d authored Mar 30, 2022 by J-shang Committed by GitHub Mar 30, 2022
7 changed files
--- a/docs/source/tutorials/sg_execution_times.rst
+++ b/docs/source/tutorials/sg_execution_times.rst
@@ -5,14 +5,12 @@

 Computation times
 =================
-**02:34.670** total execution time for **tutorials** files:
+**03:10.992** total execution time for **tutorials** files:

 +-----------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorials_pruning_quick_start_mnist.py` (``pruning_quick_start_mnist.py``)           | 01:26.953 | 0.0 MB |
+| :ref:`sphx_glr_tutorials_quantization_quick_start_mnist.py` (``quantization_quick_start_mnist.py``) | 01:46.015 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorials_quantization_speedup.py` (``quantization_speedup.py``)                     | 00:55.231 | 0.0 MB |
-+-----------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorials_pruning_speedup.py` (``pruning_speedup.py``)                               | 00:12.486 | 0.0 MB |
+| :ref:`sphx_glr_tutorials_pruning_quick_start_mnist.py` (``pruning_quick_start_mnist.py``)           | 01:24.976 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorials_hello_nas.py` (``hello_nas.py``)                                           | 00:00.000 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------------+-----------+--------+
@@ -20,7 +18,9 @@ Computation times
 +-----------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorials_pruning_customize.py` (``pruning_customize.py``)                           | 00:00.000 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------------+-----------+--------+
+| :ref:`sphx_glr_tutorials_pruning_speedup.py` (``pruning_speedup.py``)                               | 00:00.000 | 0.0 MB |
+-----------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorials_quantization_customize.py` (``quantization_customize.py``)                 | 00:00.000 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorials_quantization_quick_start_mnist.py` (``quantization_quick_start_mnist.py``) | 00:00.000 | 0.0 MB |
+| :ref:`sphx_glr_tutorials_quantization_speedup.py` (``quantization_speedup.py``)                     | 00:00.000 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------------+-----------+--------+
--- a/examples/model_compress/end2end_compression.py
+++ b/examples/model_compress/end2end_compression.py
@@ -277,7 +277,7 @@ if __name__ == '__main__':
    parser.add_argument('--sparsity', type=float, default=0.5,
                        help='target overall target sparsity')
    parser.add_argument('--dependency-aware', action='store_true', default=False,
-                        help='toggle dependency aware mode')
+                        help='toggle dependency-aware mode')

    # finetuning
    parser.add_argument('--finetune-epochs', type=int, default=5,

--- a/examples/model_compress/pruning/basic_pruners_torch.py
+++ b/examples/model_compress/pruning/basic_pruners_torch.py
@@ -356,7 +356,7 @@ if __name__ == '__main__':
    parser.add_argument('--sparsity', type=float, default=0.5,
                        help='target overall target sparsity')
    parser.add_argument('--dependency-aware', action='store_true', default=False,
-                        help='toggle dependency aware mode')
+                        help='toggle dependency-aware mode')
    parser.add_argument('--global-sort', action='store_true', default=False,
                        help='toggle global sort mode')
    parser.add_argument('--pruner', type=str, default='l1filter',

--- a/examples/tutorials/pruning_quick_start_mnist.py
+++ b/examples/tutorials/pruning_quick_start_mnist.py
@@ -3,21 +3,21 @@ Pruning Quickstart
 ==================

 Model pruning is a technique to reduce the model size and computation by reducing model weight size or intermediate state size.
-It usually has following paths:
+There are three common practices for pruning a DNN model:

-#. Pre-training a model -> Pruning the model -> Fine-tuning the model
-#. Pruning the model aware training -> Fine-tuning the model
-#. Pruning the model -> Pre-training the compact model
+#. Pre-training a model -> Pruning the model -> Fine-tuning the pruned model
+#. Pruning a model during training (i.e., pruning aware training) -> Fine-tuning the pruned model
+#. Pruning a model -> Training the pruned model from scratch

-NNI supports the above three modes and mainly focuses on the pruning stage.
-Follow this tutorial for a quick look at how to use NNI to prune a model in a common practice.
+NNI supports all of the above pruning practices by working on the key pruning stage.
+Following this tutorial for a quick look at how to use NNI to prune a model in a common practice.
 """

 # %%
 # Preparation
 # -----------
 #
-# In this tutorial, we use a simple model and pre-train on MNIST dataset.
+# In this tutorial, we use a simple model and pre-trained on MNIST dataset.
 # If you are familiar with defining a model and training in pytorch, you can skip directly to `Pruning Model`_.

 import torch
@@ -48,11 +48,11 @@ for epoch in range(3):
 # Pruning Model
 # -------------
 #
-# Using L1NormPruner pruning the model and generating the masks.
-# Usually, pruners require original model and ``config_list`` as parameters.
+# Using L1NormPruner to prune the model and generate the masks.
+# Usually, a pruner requires original model and ``config_list`` as its inputs.
 # Detailed about how to write ``config_list`` please refer :doc:`compression config specification <../compression/compression_config_list>`.
 #
-# This `config_list` means all layers whose type is `Linear` or `Conv2d` will be pruned,
+# The following `config_list` means all layers whose type is `Linear` or `Conv2d` will be pruned,
 # except the layer named `fc3`, because `fc3` is `exclude`.
 # The final sparsity ratio for each layer is 50%. The layer named `fc3` will not be pruned.


--- a/examples/tutorials/quantization_quick_start_mnist.py
+++ b/examples/tutorials/quantization_quick_start_mnist.py
@@ -65,6 +65,12 @@ from nni.algorithms.compression.pytorch.quantization import QAT_Quantizer
 dummy_input = torch.rand(32, 1, 28, 28).to(device)
 quantizer = QAT_Quantizer(model, config_list, optimizer, dummy_input)
 quantizer.compress()
+
+# %%
+# The model has now been wrapped, and quantization targets ('quant_types' setting in `config_list`)
+# will be quantized & dequantized for simulated quantization in the wrapped layers.
+# QAT is a training-aware quantizer, it will update scale and zero point during training.
+
 for epoch in range(3):
    trainer(model, optimizer, criterion)
    evaluator(model)

--- a/nni/algorithms/compression/pytorch/pruning/dependency_aware_pruner.py
+++ b/nni/algorithms/compression/pytorch/pruning/dependency_aware_pruner.py
@@ -35,7 +35,7 @@ class DependencyAwarePruner(Pruner):

        if self.dependency_aware:
            if not self._supported_dependency_aware():
-                raise ValueError('This pruner does not support dependency aware!')
+                raise ValueError('This pruner does not support dependency-aware!')

            errmsg = "When dependency_aware is set, the dummy_input should not be None"
            assert self.dummy_input is not None, errmsg

--- a/nni/algorithms/compression/v2/pytorch/pruning/tools/sparsity_allocator.py
+++ b/nni/algorithms/compression/v2/pytorch/pruning/tools/sparsity_allocator.py
@@ -70,8 +70,7 @@ class BankSparsityAllocator(SparsityAllocator):
            # make up for balance_gran
            balance_gran = [1] * (n_dim - len(self.balance_gran)) + self.balance_gran
            for i, j in zip(metric.shape, balance_gran):
-                assert i % j == 0, 'Length of {} weight is not \
-                    aligned with balance granularity'.format(name)
+                assert i % j == 0, 'Length of {} weight is not aligned with balance granularity'.format(name)

            mask = torch.zeros(metric.shape).type_as(metric)
            loop_iters = [range(int(i / j)) for i, j in zip(metric.shape, balance_gran)]
@@ -160,7 +159,7 @@ class GlobalSparsityAllocator(SparsityAllocator):

 class Conv2dDependencyAwareAllocator(SparsityAllocator):
    """
-    A specify allocator for Conv2d with dependency aware.
+    A specify allocator for Conv2d with dependency-aware.
    """

    def __init__(self, pruner: Pruner, dim: int, dummy_input: Any):