[Doc] update compression reference (#4667)

22165cea · J-shang · GitHub · de6662a4 · 22165cea · 22165cea
Unverified Commit 22165cea authored Mar 21, 2022 by J-shang Committed by GitHub Mar 21, 2022
6 changed files
--- a/nni/compression/pytorch/compressor.py
+++ b/nni/compression/pytorch/compressor.py
@@ -631,6 +631,7 @@ class Quantizer(Compressor):
        """
        quantize should overload this method to quantize weight.
        This method is effectively hooked to :meth:`forward` of the model.
+
        Parameters
        ----------
        wrapper : QuantizerModuleWrapper
@@ -642,6 +643,7 @@ class Quantizer(Compressor):
        """
        quantize should overload this method to quantize output.
        This method is effectively hooked to :meth:`forward` of the model.
+
        Parameters
        ----------
        output : Tensor
@@ -655,6 +657,7 @@ class Quantizer(Compressor):
        """
        quantize should overload this method to quantize input.
        This method is effectively hooked to :meth:`forward` of the model.
+
        Parameters
        ----------
        inputs : Tensor
@@ -908,6 +911,7 @@ class QuantGrad(torch.autograd.Function):
    def _quantize(cls, x, scale, zero_point):
        """
        Reference function for quantizing x -- non-clamped.
+
        Parameters
        ----------
        x : Tensor
@@ -916,6 +920,7 @@ class QuantGrad(torch.autograd.Function):
            scale for quantizing x
        zero_point : Tensor
            zero_point for quantizing x
+
        Returns
        -------
        tensor
@@ -927,12 +932,14 @@ class QuantGrad(torch.autograd.Function):
    def get_bits_length(cls, config, quant_type):
        """
        Get bits for quantize config
+
        Parameters
        ----------
        config : Dict
            the configuration for quantization
        quant_type : str
            quant type
+
        Returns
        -------
        int
@@ -948,6 +955,7 @@ class QuantGrad(torch.autograd.Function):
        """
        This method should be overrided by subclass to provide customized backward function,
        default implementation is Straight-Through Estimator
+
        Parameters
        ----------
        tensor : Tensor
@@ -963,6 +971,7 @@ class QuantGrad(torch.autograd.Function):
            quant_min for quantizing tensor
        qmax : Tensor
            quant_max for quantizng tensor
+
        Returns
        -------
        tensor

--- a/nni/compression/pytorch/quantization_speedup/integrated_tensorrt.py
+++ b/nni/compression/pytorch/quantization_speedup/integrated_tensorrt.py
@@ -228,40 +228,41 @@ def build_engine(model_file, config=None, extra_layer_bits=32, strict_datatype=F
        return engine

 class ModelSpeedupTensorRT(BaseModelSpeedup):
+    r"""
+    Parameters
+    ----------
+    model : pytorch model
+        The model to speed up by quantization.
+    input_shape : tuple
+        The input shape of model, shall pass it to torch.onnx.export.
+    config : dict
+        Config recording bits number and name of layers.
+    onnx_path : str
+        The path user want to store onnx model which is converted from pytorch model.
+    extra_layer_bits : int
+        Other layers which are not in config will be quantized to corresponding bits number.
+    strict_datatype : bool
+        Whether constrain layer bits to the number given in config or not. If true, all the layer
+        will be set to given bits strictly. Otherwise, these layers will be set automatically by
+        tensorrt.
+    calibrate_type : tensorrt.tensorrt.CalibrationAlgoType
+        The algorithm of calibrating. Please refer to https://docs.nvidia.com/deeplearning/
+        tensorrt/api/python_api/infer/Int8/Calibrator.html for detail
+    calibrate_data : numpy array
+        The data using to calibrate quantization model
+    calibration_cache : str
+        The path user want to store calibrate cache file
+    batchsize : int
+        The batch size of calibration and inference
+    input_names : list
+        Input name of onnx model providing for torch.onnx.export to generate onnx model
+    output_name : list
+        Output name of onnx model providing for torch.onnx.export to generate onnx model
+    """
+
    def __init__(self, model, input_shape, config=None, onnx_path="default_model.onnx", extra_layer_bits=32, strict_datatype=True,
        calibrate_type=CalibrateType.ENTROPY2, calib_data_loader=None, calibration_cache = "calibration.cache", batchsize=1,
        input_names=["actual_input_1"], output_names=["output1"]):
-        """
-        Parameters
-        ----------
-        model : pytorch model
-            The model to speed up by quantization.
-        input_shape : tuple
-            The input shape of model, shall pass it to torch.onnx.export.
-        config : dict
-            Config recording bits number and name of layers.
-        onnx_path : str
-            The path user want to store onnx model which is converted from pytorch model.
-        extra_layer_bits : int
-            Other layers which are not in config will be quantized to corresponding bits number.
-        strict_datatype : bool
-            Whether constrain layer bits to the number given in config or not. If true, all the layer
-            will be set to given bits strictly. Otherwise, these layers will be set automatically by
-            tensorrt.
-        calibrate_type : tensorrt.tensorrt.CalibrationAlgoType
-            The algorithm of calibrating. Please refer to https://docs.nvidia.com/deeplearning/
-            tensorrt/api/python_api/infer/Int8/Calibrator.html for detail
-        calibrate_data : numpy array
-            The data using to calibrate quantization model
-        calibration_cache : str
-            The path user want to store calibrate cache file
-        batchsize : int
-            The batch size of calibration and inference
-        input_names : list
-            Input name of onnx model providing for torch.onnx.export to generate onnx model
-        output_name : list
-            Output name of onnx model providing for torch.onnx.export to generate onnx model
-        """
        super().__init__(model, config)
        self.model = model
        self.onnx_path = onnx_path

--- a/nni/compression/pytorch/speedup/compressor.py
+++ b/nni/compression/pytorch/speedup/compressor.py
@@ -388,6 +388,9 @@ class ModelSpeedup:
    def replace_submodule(self, unique_name, reindex_dim=None, reindex=None):
        """
        Replace the submodule according to the inferred sparsity.
+
+        Parameters
+        ----------
        unique_name: str
            The unique_name of the submodule to replace.
        reindex_dim: int

--- a/nni/compression/pytorch/utils/mask_conflict.py
+++ b/nni/compression/pytorch/utils/mask_conflict.py
@@ -81,23 +81,23 @@ class MaskFix:


 class GroupMaskConflict(MaskFix):
+    """
+    GroupMaskConflict fix the mask conflict between the layers that
+    has group dependecy with each other.
+
+    Parameters
+    ----------
+    masks : dict
+        a dict object that stores the masks
+    model : torch.nn.Module
+        model to fix the mask conflict
+    dummy_input : torch.Tensor
+        input example to trace the model
+    traced : torch._C.torch.jit.TopLevelTracedModule
+        the traced model of the target model, is this parameter is not None,
+        we donnot use the model and dummpy_input to get the trace graph.
+    """
    def __init__(self, masks, model, dummy_input, traced=None):
-        """
-        GroupMaskConflict fix the mask conflict between the layers that
-        has group dependecy with each other.
-
-        Parameters
-        ----------
-        masks : dict
-            a dict object that stores the masks
-        model : torch.nn.Module
-            model to fix the mask conflict
-        dummy_input : torch.Tensor
-            input example to trace the model
-        traced : torch._C.torch.jit.TopLevelTracedModule
-            the traced model of the target model, is this parameter is not None,
-            we donnot use the model and dummpy_input to get the trace graph.
-        """
        super(GroupMaskConflict, self).__init__(
            masks, model, dummy_input, traced)

@@ -168,23 +168,24 @@ class GroupMaskConflict(MaskFix):


 class ChannelMaskConflict(MaskFix):
+    """
+    ChannelMaskConflict fix the mask conflict between the layers that
+    has channel dependecy with each other.
+
+    Parameters
+    ----------
+    masks : dict
+        a dict object that stores the masks
+    model : torch.nn.Module
+        model to fix the mask conflict
+    dummy_input : torch.Tensor
+        input example to trace the model
+    graph : torch._C.torch.jit.TopLevelTracedModule
+        the traced graph of the target model, is this parameter is not None,
+        we donnot use the model and dummpy_input to get the trace graph.
+    """
+
    def __init__(self, masks, model, dummy_input, traced=None):
-        """
-        ChannelMaskConflict fix the mask conflict between the layers that
-        has channel dependecy with each other.
-
-        Parameters
-        ----------
-        masks : dict
-            a dict object that stores the masks
-        model : torch.nn.Module
-            model to fix the mask conflict
-        dummy_input : torch.Tensor
-            input example to trace the model
-        graph : torch._C.torch.jit.TopLevelTracedModule
-            the traced graph of the target model, is this parameter is not None,
-            we donnot use the model and dummpy_input to get the trace graph.
-        """
        super(ChannelMaskConflict, self).__init__(
            masks, model, dummy_input, traced)
        self.conv_prune_dim = detect_mask_prune_dim(masks, model)

--- a/nni/compression/pytorch/utils/sensitivity_analysis.py
+++ b/nni/compression/pytorch/utils/sensitivity_analysis.py
@@ -18,51 +18,52 @@ logger.setLevel(logging.INFO)


 class SensitivityAnalysis:
-    def __init__(self, model, val_func, sparsities=None, prune_type='l1', early_stop_mode=None, early_stop_value=None):
-        """
-        Perform sensitivity analysis for this model.
-        Parameters
-        ----------
-        model : torch.nn.Module
-            the model to perform sensitivity analysis
-        val_func : function
-            validation function for the model. Due to
-            different models may need different dataset/criterion
-            , therefore the user need to cover this part by themselves.
-            In the val_func, the model should be tested on the validation dateset,
-            and the validation accuracy/loss should be returned as the output of val_func.
-            There are no restrictions on the input parameters of the val_function.
-            User can use the val_args, val_kwargs parameters in analysis
-            to pass all the parameters that val_func needed.
-        sparsities : list
-            The sparsity list provided by users. This parameter is set when the user
-            only wants to test some specific sparsities. In the sparsity list, each element
-            is a sparsity value which means how much weight the pruner should prune. Take
-            [0.25, 0.5, 0.75] for an example, the SensitivityAnalysis will prune 25% 50% 75%
-            weights gradually for each layer.
-        prune_type : str
-            The pruner type used to prune the conv layers, default is 'l1',
-            and 'l2', 'fine-grained' is also supported.
-        early_stop_mode : str
-            If this flag is set, the sensitivity analysis
-            for a conv layer will early stop when the validation metric(
-            for example, accurracy/loss) has alreay meet the threshold. We
-            support four different early stop modes: minimize, maximize, dropped,
-            raised. The default value is None, which means the analysis won't stop
-            until all given sparsities are tested. This option should be used with
-            early_stop_value together.
-
-            minimize: The analysis stops when the validation metric return by the val_func
-            lower than early_stop_value.
-            maximize: The analysis stops when the validation metric return by the val_func
-            larger than early_stop_value.
-            dropped: The analysis stops when the validation metric has dropped by early_stop_value.
-            raised: The analysis stops when the validation metric has raised by early_stop_value.
-        early_stop_value : float
-            This value is used as the threshold for different earlystop modes.
-            This value is effective only when the early_stop_mode is set.
+    """
+    Perform sensitivity analysis for this model.
+
+    Parameters
+    ----------
+    model : torch.nn.Module
+        the model to perform sensitivity analysis
+    val_func : function
+        validation function for the model. Due to
+        different models may need different dataset/criterion
+        , therefore the user need to cover this part by themselves.
+        In the val_func, the model should be tested on the validation dateset,
+        and the validation accuracy/loss should be returned as the output of val_func.
+        There are no restrictions on the input parameters of the val_function.
+        User can use the val_args, val_kwargs parameters in analysis
+        to pass all the parameters that val_func needed.
+    sparsities : list
+        The sparsity list provided by users. This parameter is set when the user
+        only wants to test some specific sparsities. In the sparsity list, each element
+        is a sparsity value which means how much weight the pruner should prune. Take
+        [0.25, 0.5, 0.75] for an example, the SensitivityAnalysis will prune 25% 50% 75%
+        weights gradually for each layer.
+    prune_type : str
+        The pruner type used to prune the conv layers, default is 'l1',
+        and 'l2', 'fine-grained' is also supported.
+    early_stop_mode : str
+        If this flag is set, the sensitivity analysis
+        for a conv layer will early stop when the validation metric(
+        for example, accurracy/loss) has alreay meet the threshold. We
+        support four different early stop modes: minimize, maximize, dropped,
+        raised. The default value is None, which means the analysis won't stop
+        until all given sparsities are tested. This option should be used with
+        early_stop_value together.
+
+        minimize: The analysis stops when the validation metric return by the val_func
+        lower than early_stop_value.
+        maximize: The analysis stops when the validation metric return by the val_func
+        larger than early_stop_value.
+        dropped: The analysis stops when the validation metric has dropped by early_stop_value.
+        raised: The analysis stops when the validation metric has raised by early_stop_value.
+    early_stop_value : float
+        This value is used as the threshold for different earlystop modes.
+        This value is effective only when the early_stop_mode is set.
+    """

-        """
+    def __init__(self, model, val_func, sparsities=None, prune_type='l1', early_stop_mode=None, early_stop_value=None):
        from nni.algorithms.compression.pytorch.pruning.constants_pruner import PRUNER_DICT

        self.model = model

--- a/nni/compression/pytorch/utils/shape_dependency.py
+++ b/nni/compression/pytorch/utils/shape_dependency.py
@@ -91,24 +91,26 @@ def reshape_break_channel_dependency(op_node):


 class ChannelDependency(Dependency):
+    """
+    This model analyze the channel dependencies between the conv
+    layers in a model.
+
+    Parameters
+    ----------
+    model : torch.nn.Module
+        The model to be analyzed.
+    data : torch.Tensor
+        The example input data to trace the network architecture.
+    traced_model : torch._C.Graph
+        if we alreay has the traced graph of the target model, we donnot
+        need to trace the model again.
+    prune_type: str
+        This parameter indicates the channel pruning type: 1) `Filter`
+        prune the filter of the convolution layer to prune the corresponding
+        channels 2) `Batchnorm`: prune the channel in the batchnorm layer
+    """
+
    def __init__(self, model, dummy_input, traced_model=None, prune_type='Filter'):
-        """
-        This model analyze the channel dependencies between the conv
-        layers in a model.
-        Parameters
-        ----------
-        model : torch.nn.Module
-            The model to be analyzed.
-        data : torch.Tensor
-            The example input data to trace the network architecture.
-        traced_model : torch._C.Graph
-            if we alreay has the traced graph of the target model, we donnot
-            need to trace the model again.
-        prune_type: str
-            This parameter indicates the channel pruning type: 1) `Filter`
-            prune the filter of the convolution layer to prune the corresponding
-            channels 2) `Batchnorm`: prune the channel in the batchnorm layer
-        """
        self.prune_type = prune_type
        self.target_types = []
        if self.prune_type == 'Filter':
@@ -271,6 +273,7 @@ class InputChannelDependency(ChannelDependency):
        """
        This model analyze the input channel dependencies between the conv
        layers in a model.
+
        Parameters
        ----------
        model : torch.nn.Module
@@ -329,20 +332,22 @@ class InputChannelDependency(ChannelDependency):


 class GroupDependency(Dependency):
+    """
+    This model analyze the group dependencis between the conv
+    layers in a model.
+
+    Parameters
+    ----------
+    model : torch.nn.Module
+        The model to be analyzed.
+    data : torch.Tensor
+        The example input data to trace the network architecture.
+    traced_model : torch._C.Graph
+        if we alreay has the traced graph of the target model, we donnot
+        need to trace the model again.
+    """
+
    def __init__(self, model, dummy_input, traced_model=None):
-        """
-        This model analyze the group dependencis between the conv
-        layers in a model.
-        Parameters
-        ----------
-        model : torch.nn.Module
-            The model to be analyzed.
-        data : torch.Tensor
-            The example input data to trace the network architecture.
-        traced_model : torch._C.Graph
-            if we alreay has the traced graph of the target model, we donnot
-            need to trace the model again.
-        """
        self.min_groups = {}
        super(GroupDependency, self).__init__(model, dummy_input, traced_model)