Unverified Commit 22165cea authored by J-shang's avatar J-shang Committed by GitHub
Browse files

[Doc] update compression reference (#4667)

parent de6662a4
......@@ -631,6 +631,7 @@ class Quantizer(Compressor):
"""
quantize should overload this method to quantize weight.
This method is effectively hooked to :meth:`forward` of the model.
Parameters
----------
wrapper : QuantizerModuleWrapper
......@@ -642,6 +643,7 @@ class Quantizer(Compressor):
"""
quantize should overload this method to quantize output.
This method is effectively hooked to :meth:`forward` of the model.
Parameters
----------
output : Tensor
......@@ -655,6 +657,7 @@ class Quantizer(Compressor):
"""
quantize should overload this method to quantize input.
This method is effectively hooked to :meth:`forward` of the model.
Parameters
----------
inputs : Tensor
......@@ -908,6 +911,7 @@ class QuantGrad(torch.autograd.Function):
def _quantize(cls, x, scale, zero_point):
"""
Reference function for quantizing x -- non-clamped.
Parameters
----------
x : Tensor
......@@ -916,6 +920,7 @@ class QuantGrad(torch.autograd.Function):
scale for quantizing x
zero_point : Tensor
zero_point for quantizing x
Returns
-------
tensor
......@@ -927,12 +932,14 @@ class QuantGrad(torch.autograd.Function):
def get_bits_length(cls, config, quant_type):
"""
Get bits for quantize config
Parameters
----------
config : Dict
the configuration for quantization
quant_type : str
quant type
Returns
-------
int
......@@ -948,6 +955,7 @@ class QuantGrad(torch.autograd.Function):
"""
This method should be overrided by subclass to provide customized backward function,
default implementation is Straight-Through Estimator
Parameters
----------
tensor : Tensor
......@@ -963,6 +971,7 @@ class QuantGrad(torch.autograd.Function):
quant_min for quantizing tensor
qmax : Tensor
quant_max for quantizng tensor
Returns
-------
tensor
......
......@@ -228,40 +228,41 @@ def build_engine(model_file, config=None, extra_layer_bits=32, strict_datatype=F
return engine
class ModelSpeedupTensorRT(BaseModelSpeedup):
r"""
Parameters
----------
model : pytorch model
The model to speed up by quantization.
input_shape : tuple
The input shape of model, shall pass it to torch.onnx.export.
config : dict
Config recording bits number and name of layers.
onnx_path : str
The path user want to store onnx model which is converted from pytorch model.
extra_layer_bits : int
Other layers which are not in config will be quantized to corresponding bits number.
strict_datatype : bool
Whether constrain layer bits to the number given in config or not. If true, all the layer
will be set to given bits strictly. Otherwise, these layers will be set automatically by
tensorrt.
calibrate_type : tensorrt.tensorrt.CalibrationAlgoType
The algorithm of calibrating. Please refer to https://docs.nvidia.com/deeplearning/
tensorrt/api/python_api/infer/Int8/Calibrator.html for detail
calibrate_data : numpy array
The data using to calibrate quantization model
calibration_cache : str
The path user want to store calibrate cache file
batchsize : int
The batch size of calibration and inference
input_names : list
Input name of onnx model providing for torch.onnx.export to generate onnx model
output_name : list
Output name of onnx model providing for torch.onnx.export to generate onnx model
"""
def __init__(self, model, input_shape, config=None, onnx_path="default_model.onnx", extra_layer_bits=32, strict_datatype=True,
calibrate_type=CalibrateType.ENTROPY2, calib_data_loader=None, calibration_cache = "calibration.cache", batchsize=1,
input_names=["actual_input_1"], output_names=["output1"]):
"""
Parameters
----------
model : pytorch model
The model to speed up by quantization.
input_shape : tuple
The input shape of model, shall pass it to torch.onnx.export.
config : dict
Config recording bits number and name of layers.
onnx_path : str
The path user want to store onnx model which is converted from pytorch model.
extra_layer_bits : int
Other layers which are not in config will be quantized to corresponding bits number.
strict_datatype : bool
Whether constrain layer bits to the number given in config or not. If true, all the layer
will be set to given bits strictly. Otherwise, these layers will be set automatically by
tensorrt.
calibrate_type : tensorrt.tensorrt.CalibrationAlgoType
The algorithm of calibrating. Please refer to https://docs.nvidia.com/deeplearning/
tensorrt/api/python_api/infer/Int8/Calibrator.html for detail
calibrate_data : numpy array
The data using to calibrate quantization model
calibration_cache : str
The path user want to store calibrate cache file
batchsize : int
The batch size of calibration and inference
input_names : list
Input name of onnx model providing for torch.onnx.export to generate onnx model
output_name : list
Output name of onnx model providing for torch.onnx.export to generate onnx model
"""
super().__init__(model, config)
self.model = model
self.onnx_path = onnx_path
......
......@@ -388,6 +388,9 @@ class ModelSpeedup:
def replace_submodule(self, unique_name, reindex_dim=None, reindex=None):
"""
Replace the submodule according to the inferred sparsity.
Parameters
----------
unique_name: str
The unique_name of the submodule to replace.
reindex_dim: int
......
......@@ -81,23 +81,23 @@ class MaskFix:
class GroupMaskConflict(MaskFix):
"""
GroupMaskConflict fix the mask conflict between the layers that
has group dependecy with each other.
Parameters
----------
masks : dict
a dict object that stores the masks
model : torch.nn.Module
model to fix the mask conflict
dummy_input : torch.Tensor
input example to trace the model
traced : torch._C.torch.jit.TopLevelTracedModule
the traced model of the target model, is this parameter is not None,
we donnot use the model and dummpy_input to get the trace graph.
"""
def __init__(self, masks, model, dummy_input, traced=None):
"""
GroupMaskConflict fix the mask conflict between the layers that
has group dependecy with each other.
Parameters
----------
masks : dict
a dict object that stores the masks
model : torch.nn.Module
model to fix the mask conflict
dummy_input : torch.Tensor
input example to trace the model
traced : torch._C.torch.jit.TopLevelTracedModule
the traced model of the target model, is this parameter is not None,
we donnot use the model and dummpy_input to get the trace graph.
"""
super(GroupMaskConflict, self).__init__(
masks, model, dummy_input, traced)
......@@ -168,23 +168,24 @@ class GroupMaskConflict(MaskFix):
class ChannelMaskConflict(MaskFix):
"""
ChannelMaskConflict fix the mask conflict between the layers that
has channel dependecy with each other.
Parameters
----------
masks : dict
a dict object that stores the masks
model : torch.nn.Module
model to fix the mask conflict
dummy_input : torch.Tensor
input example to trace the model
graph : torch._C.torch.jit.TopLevelTracedModule
the traced graph of the target model, is this parameter is not None,
we donnot use the model and dummpy_input to get the trace graph.
"""
def __init__(self, masks, model, dummy_input, traced=None):
"""
ChannelMaskConflict fix the mask conflict between the layers that
has channel dependecy with each other.
Parameters
----------
masks : dict
a dict object that stores the masks
model : torch.nn.Module
model to fix the mask conflict
dummy_input : torch.Tensor
input example to trace the model
graph : torch._C.torch.jit.TopLevelTracedModule
the traced graph of the target model, is this parameter is not None,
we donnot use the model and dummpy_input to get the trace graph.
"""
super(ChannelMaskConflict, self).__init__(
masks, model, dummy_input, traced)
self.conv_prune_dim = detect_mask_prune_dim(masks, model)
......
......@@ -18,51 +18,52 @@ logger.setLevel(logging.INFO)
class SensitivityAnalysis:
def __init__(self, model, val_func, sparsities=None, prune_type='l1', early_stop_mode=None, early_stop_value=None):
"""
Perform sensitivity analysis for this model.
Parameters
----------
model : torch.nn.Module
the model to perform sensitivity analysis
val_func : function
validation function for the model. Due to
different models may need different dataset/criterion
, therefore the user need to cover this part by themselves.
In the val_func, the model should be tested on the validation dateset,
and the validation accuracy/loss should be returned as the output of val_func.
There are no restrictions on the input parameters of the val_function.
User can use the val_args, val_kwargs parameters in analysis
to pass all the parameters that val_func needed.
sparsities : list
The sparsity list provided by users. This parameter is set when the user
only wants to test some specific sparsities. In the sparsity list, each element
is a sparsity value which means how much weight the pruner should prune. Take
[0.25, 0.5, 0.75] for an example, the SensitivityAnalysis will prune 25% 50% 75%
weights gradually for each layer.
prune_type : str
The pruner type used to prune the conv layers, default is 'l1',
and 'l2', 'fine-grained' is also supported.
early_stop_mode : str
If this flag is set, the sensitivity analysis
for a conv layer will early stop when the validation metric(
for example, accurracy/loss) has alreay meet the threshold. We
support four different early stop modes: minimize, maximize, dropped,
raised. The default value is None, which means the analysis won't stop
until all given sparsities are tested. This option should be used with
early_stop_value together.
minimize: The analysis stops when the validation metric return by the val_func
lower than early_stop_value.
maximize: The analysis stops when the validation metric return by the val_func
larger than early_stop_value.
dropped: The analysis stops when the validation metric has dropped by early_stop_value.
raised: The analysis stops when the validation metric has raised by early_stop_value.
early_stop_value : float
This value is used as the threshold for different earlystop modes.
This value is effective only when the early_stop_mode is set.
"""
Perform sensitivity analysis for this model.
Parameters
----------
model : torch.nn.Module
the model to perform sensitivity analysis
val_func : function
validation function for the model. Due to
different models may need different dataset/criterion
, therefore the user need to cover this part by themselves.
In the val_func, the model should be tested on the validation dateset,
and the validation accuracy/loss should be returned as the output of val_func.
There are no restrictions on the input parameters of the val_function.
User can use the val_args, val_kwargs parameters in analysis
to pass all the parameters that val_func needed.
sparsities : list
The sparsity list provided by users. This parameter is set when the user
only wants to test some specific sparsities. In the sparsity list, each element
is a sparsity value which means how much weight the pruner should prune. Take
[0.25, 0.5, 0.75] for an example, the SensitivityAnalysis will prune 25% 50% 75%
weights gradually for each layer.
prune_type : str
The pruner type used to prune the conv layers, default is 'l1',
and 'l2', 'fine-grained' is also supported.
early_stop_mode : str
If this flag is set, the sensitivity analysis
for a conv layer will early stop when the validation metric(
for example, accurracy/loss) has alreay meet the threshold. We
support four different early stop modes: minimize, maximize, dropped,
raised. The default value is None, which means the analysis won't stop
until all given sparsities are tested. This option should be used with
early_stop_value together.
minimize: The analysis stops when the validation metric return by the val_func
lower than early_stop_value.
maximize: The analysis stops when the validation metric return by the val_func
larger than early_stop_value.
dropped: The analysis stops when the validation metric has dropped by early_stop_value.
raised: The analysis stops when the validation metric has raised by early_stop_value.
early_stop_value : float
This value is used as the threshold for different earlystop modes.
This value is effective only when the early_stop_mode is set.
"""
"""
def __init__(self, model, val_func, sparsities=None, prune_type='l1', early_stop_mode=None, early_stop_value=None):
from nni.algorithms.compression.pytorch.pruning.constants_pruner import PRUNER_DICT
self.model = model
......
......@@ -91,24 +91,26 @@ def reshape_break_channel_dependency(op_node):
class ChannelDependency(Dependency):
"""
This model analyze the channel dependencies between the conv
layers in a model.
Parameters
----------
model : torch.nn.Module
The model to be analyzed.
data : torch.Tensor
The example input data to trace the network architecture.
traced_model : torch._C.Graph
if we alreay has the traced graph of the target model, we donnot
need to trace the model again.
prune_type: str
This parameter indicates the channel pruning type: 1) `Filter`
prune the filter of the convolution layer to prune the corresponding
channels 2) `Batchnorm`: prune the channel in the batchnorm layer
"""
def __init__(self, model, dummy_input, traced_model=None, prune_type='Filter'):
"""
This model analyze the channel dependencies between the conv
layers in a model.
Parameters
----------
model : torch.nn.Module
The model to be analyzed.
data : torch.Tensor
The example input data to trace the network architecture.
traced_model : torch._C.Graph
if we alreay has the traced graph of the target model, we donnot
need to trace the model again.
prune_type: str
This parameter indicates the channel pruning type: 1) `Filter`
prune the filter of the convolution layer to prune the corresponding
channels 2) `Batchnorm`: prune the channel in the batchnorm layer
"""
self.prune_type = prune_type
self.target_types = []
if self.prune_type == 'Filter':
......@@ -271,6 +273,7 @@ class InputChannelDependency(ChannelDependency):
"""
This model analyze the input channel dependencies between the conv
layers in a model.
Parameters
----------
model : torch.nn.Module
......@@ -329,20 +332,22 @@ class InputChannelDependency(ChannelDependency):
class GroupDependency(Dependency):
"""
This model analyze the group dependencis between the conv
layers in a model.
Parameters
----------
model : torch.nn.Module
The model to be analyzed.
data : torch.Tensor
The example input data to trace the network architecture.
traced_model : torch._C.Graph
if we alreay has the traced graph of the target model, we donnot
need to trace the model again.
"""
def __init__(self, model, dummy_input, traced_model=None):
"""
This model analyze the group dependencis between the conv
layers in a model.
Parameters
----------
model : torch.nn.Module
The model to be analyzed.
data : torch.Tensor
The example input data to trace the network architecture.
traced_model : torch._C.Graph
if we alreay has the traced graph of the target model, we donnot
need to trace the model again.
"""
self.min_groups = {}
super(GroupDependency, self).__init__(model, dummy_input, traced_model)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment