[Doc] update compression tutorials (#4646)

6e09c2c1 · J-shang · GitHub · a4d8a4ea · 6e09c2c1 · 6e09c2c1
Unverified Commit 6e09c2c1 authored Mar 16, 2022 by J-shang Committed by GitHub Mar 16, 2022
3 changed files
--- a/nni/algorithms/compression/pytorch/quantization/dorefa_quantizer.py
+++ b/nni/algorithms/compression/pytorch/quantization/dorefa_quantizer.py
@@ -15,7 +15,7 @@ logger = logging.getLogger(__name__)
 class DoReFaQuantizer(Quantizer):
    r"""
    Quantizer using the DoReFa scheme, as defined in:
-    `DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients <https://arxiv.org/abs/1606.06160>`__\ ,
+    `DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients <https://arxiv.org/abs/1606.06160>`__,
    authors Shuchang Zhou and Yuxin Wu provide an algorithm named DoReFa to quantize the weight, activation and gradients with training.

    Parameters

--- a/nni/algorithms/compression/pytorch/quantization/qat_quantizer.py
+++ b/nni/algorithms/compression/pytorch/quantization/qat_quantizer.py
@@ -109,8 +109,8 @@ def update_ema(biased_ema, value, decay):
 class QAT_Quantizer(Quantizer):
    r"""
    Quantizer defined in:
-    Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference
-    http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf
+    `Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference
+    <http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf>`__

    Authors Benoit Jacob and Skirmantas Kligys provide an algorithm to quantize the model with training.

@@ -124,6 +124,7 @@ class QAT_Quantizer(Quantizer):

        * Weights are quantized before they are convolved with the input. If batch normalization (see [17]) is used for the layer,
          the batch normalization parameters are “folded into” the weights before quantization.
+
        * Activations are quantized at points where they would be during inference,
          e.g. after the activation function is applied to a convolutional or fully connected layer’s output,
          or after a bypass connection adds or concatenates the outputs of several layers together such as in ResNets.
@@ -184,7 +185,7 @@ class QAT_Quantizer(Quantizer):
        dummy_input = torch.randn(1, 1, 28, 28)

        # pass the dummy_input to the quantizer
-        quantizer = QAT_Quantizer(model, config_list, dummy_input=dummy_input)
+        quantizer = QAT_Quantizer(model, config_list, optimizer, dummy_input=dummy_input)


    The quantizer will automatically detect Conv-BN patterns and simulate batch normalization folding process in the training

--- a/nni/algorithms/compression/v2/pytorch/pruning/basic_pruner.py
+++ b/nni/algorithms/compression/v2/pytorch/pruning/basic_pruner.py
@@ -148,8 +148,8 @@ class LevelPruner(BasicPruner):
        operation an example, weight tensor will be split into sub block whose shape is aligned to
        balance_gran. Then finegrained pruning will be applied internal of sub block. This sparsity
        pattern have more chance to achieve better trade-off between model performance and hardware
-        acceleration. Please refer to releated paper for further information 'Balanced Sparsity for 
-        Efficient DNN Inference on GPU'(https://arxiv.org/pdf/1811.00206.pdf).
+        acceleration. Please refer to releated paper for further information `Balanced Sparsity for
+        Efficient DNN Inference on GPU <https://arxiv.org/pdf/1811.00206.pdf>`__.
    balance_gran : list
        Balance_gran is for special sparse pattern balanced sparsity, Default value is None which means pruning
        without awaring balance, namely normal finegrained pruning.
@@ -290,7 +290,7 @@ class L1NormPruner(NormPruner):
    i.e., compute the l1 norm of the filters in convolution layer as metric values,
    compute the l1 norm of the weight by rows in linear layer as metric values.

-    For more details, please refer to `PRUNING FILTERS FOR EFFICIENT CONVNETS <https://arxiv.org/abs/1608.08710>`__\.
+    For more details, please refer to `PRUNING FILTERS FOR EFFICIENT CONVNETS <https://arxiv.org/abs/1608.08710>`__.

    In addition, L1 norm pruner also supports dependency-aware mode.