Unverified Commit db3130d7 authored by J-shang's avatar J-shang Committed by GitHub
Browse files

[Doc] Compression (#4574)

parent cef9babd
...@@ -51,7 +51,16 @@ class AutoCompressTaskGenerator(LotteryTicketTaskGenerator): ...@@ -51,7 +51,16 @@ class AutoCompressTaskGenerator(LotteryTicketTaskGenerator):
class AutoCompressPruner(IterativePruner): class AutoCompressPruner(IterativePruner):
""" r"""
For total iteration number :math:`N`, AutoCompressPruner prune the model that survive the previous iteration for a fixed sparsity ratio (e.g., :math:`1-{(1-0.8)}^{(1/N)}`) to achieve the overall sparsity (e.g., :math:`0.8`):
.. code-block:: bash
1. Generate sparsities distribution using SimulatedAnnealingPruner
2. Perform ADMM-based pruning to generate pruning result for the next iteration.
For more details, please refer to `AutoCompress: An Automatic DNN Structured Pruning Framework for Ultra-High Compression Rates <https://arxiv.org/abs/1907.03141>`__.
Parameters Parameters
---------- ----------
model : Module model : Module
...@@ -70,7 +79,7 @@ class AutoCompressPruner(IterativePruner): ...@@ -70,7 +79,7 @@ class AutoCompressPruner(IterativePruner):
The model will be trained or inferenced `training_epochs` epochs. The model will be trained or inferenced `training_epochs` epochs.
- traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer) - traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer)
The traced optimizer instance which the optimizer class is wrapped by nni.trace. The traced optimizer instance which the optimizer class is wrapped by nni.trace.
E.g. traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters()). E.g. ``traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters())``.
- criterion : Callable[[Tensor, Tensor], Tensor]. - criterion : Callable[[Tensor, Tensor], Tensor].
The criterion function used in trainer. Take model output and target value as input, and return the loss. The criterion function used in trainer. Take model output and target value as input, and return the loss.
- iterations : int. - iterations : int.
...@@ -107,6 +116,34 @@ class AutoCompressPruner(IterativePruner): ...@@ -107,6 +116,34 @@ class AutoCompressPruner(IterativePruner):
If set True, speed up the model at the end of each iteration to make the pruned model compact. If set True, speed up the model at the end of each iteration to make the pruned model compact.
dummy_input : Optional[torch.Tensor] dummy_input : Optional[torch.Tensor]
If `speed_up` is True, `dummy_input` is required for tracing the model in speed up. If `speed_up` is True, `dummy_input` is required for tracing the model in speed up.
Examples
--------
>>> import nni
>>> from nni.algorithms.compression.v2.pytorch.pruning import AutoCompressPruner
>>> model = ...
>>> config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
>>> # make sure you have used nni.trace to wrap the optimizer class before initialize
>>> traced_optimizer = nni.trace(torch.optim.Adam)(model.parameters())
>>> trainer = ...
>>> criterion = ...
>>> evaluator = ...
>>> finetuner = ...
>>> admm_params = {
>>> 'trainer': trainer,
>>> 'traced_optimizer': traced_optimizer,
>>> 'criterion': criterion,
>>> 'iterations': 10,
>>> 'training_epochs': 1
>>> }
>>> sa_params = {
>>> 'evaluator': evaluator
>>> }
>>> pruner = AutoCompressPruner(model, config_list, 10, admm_params, sa_params, finetuner=finetuner)
>>> pruner.compress()
>>> _, model, masks, _, _ = pruner.get_best_result()
The full script can be found :githublink:`here <examples/model_compress/pruning/v2/auto_compress_pruner.py>`.
""" """
def __init__(self, model: Module, config_list: List[Dict], total_iteration: int, admm_params: Dict, def __init__(self, model: Module, config_list: List[Dict], total_iteration: int, admm_params: Dict,
......
...@@ -125,11 +125,14 @@ class BasicPruner(Pruner): ...@@ -125,11 +125,14 @@ class BasicPruner(Pruner):
class LevelPruner(BasicPruner): class LevelPruner(BasicPruner):
""" r"""
This is a basic pruner, and in some papers called it magnitude pruning or fine-grained pruning.
It will mask the smallest magnitude weights in each specified layer by a saprsity ratio configured in the config list.
Parameters Parameters
---------- ----------
model : torch.nn.Module model : torch.nn.Module
Model to be pruned Model to be pruned.
config_list : List[Dict] config_list : List[Dict]
Supported keys: Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed. - sparsity : This is to specify the sparsity for each layer in this config to be compressed.
...@@ -181,7 +184,16 @@ class LevelPruner(BasicPruner): ...@@ -181,7 +184,16 @@ class LevelPruner(BasicPruner):
pruning result: Weight tensor whose shape is [64, 64] will be split into 4 [32, 32] sub blocks. pruning result: Weight tensor whose shape is [64, 64] will be split into 4 [32, 32] sub blocks.
Each sub block will be pruned 256 values. Each sub block will be pruned 256 values.
Examples
--------
>>> model = ...
>>> from nni.algorithms.compression.v2.pytorch.pruning import LevelPruner
>>> config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
>>> pruner = LevelPruner(model, config_list)
>>> masked_model, masks = pruner.compress()
For detailed example please refer to :githublink:`examples/model_compress/pruning/v2/level_pruning_torch.py <examples/model_compress/pruning/v2/level_pruning_torch.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], mode: str = "normal", balance_gran: Optional[List] = None): def __init__(self, model: Module, config_list: List[Dict], mode: str = "normal", balance_gran: Optional[List] = None):
...@@ -215,7 +227,7 @@ class NormPruner(BasicPruner): ...@@ -215,7 +227,7 @@ class NormPruner(BasicPruner):
Parameters Parameters
---------- ----------
model : torch.nn.Module model : torch.nn.Module
Model to be pruned Model to be pruned.
config_list : List[Dict] config_list : List[Dict]
Supported keys: Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed. - sparsity : This is to specify the sparsity for each layer in this config to be compressed.
...@@ -272,11 +284,20 @@ class NormPruner(BasicPruner): ...@@ -272,11 +284,20 @@ class NormPruner(BasicPruner):
class L1NormPruner(NormPruner): class L1NormPruner(NormPruner):
""" r"""
L1 norm pruner computes the l1 norm of the layer weight on the first dimension,
then prune the weight blocks on this dimension with smaller l1 norm values.
i.e., compute the l1 norm of the filters in convolution layer as metric values,
compute the l1 norm of the weight by rows in linear layer as metric values.
For more details, please refer to `PRUNING FILTERS FOR EFFICIENT CONVNETS <https://arxiv.org/abs/1608.08710>`__\.
In addition, L1 norm pruner also supports dependency-aware mode.
Parameters Parameters
---------- ----------
model : torch.nn.Module model : torch.nn.Module
Model to be pruned Model to be pruned.
config_list : List[Dict] config_list : List[Dict]
Supported keys: Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed. - sparsity : This is to specify the sparsity for each layer in this config to be compressed.
...@@ -305,16 +326,21 @@ class L1NormPruner(NormPruner): ...@@ -305,16 +326,21 @@ class L1NormPruner(NormPruner):
class L2NormPruner(NormPruner): class L2NormPruner(NormPruner):
""" r"""
L2 norm pruner is a variant of L1 norm pruner.
The only different between L2 norm pruner and L1 norm pruner is L2 norm pruner prunes the weight with the smallest L2 norm of the weights.
L2 norm pruner also supports dependency-aware mode.
Parameters Parameters
---------- ----------
model : torch.nn.Module model : torch.nn.Module
Model to be pruned Model to be pruned.
config_list : List[Dict] config_list : List[Dict]
Supported keys: Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed. - sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity. - sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in L1NormPruner. - op_types : Conv2d and Linear are supported in L2NormPruner.
- op_names : Operation names to be pruned. - op_names : Operation names to be pruned.
- op_partial_names: Operation partial names to be pruned, will be autocompleted by NNI. - op_partial_names: Operation partial names to be pruned, will be autocompleted by NNI.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning. - exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
...@@ -330,6 +356,16 @@ class L2NormPruner(NormPruner): ...@@ -330,6 +356,16 @@ class L2NormPruner(NormPruner):
dummy_input : Optional[torch.Tensor] dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model. should on the same device with the model.
Examples
--------
>>> model = ...
>>> from nni.algorithms.compression.v2.pytorch.pruning import L2NormPruner
>>> config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
>>> pruner = L2NormPruner(model, config_list)
>>> masked_model, masks = pruner.compress()
For detailed example please refer to :githublink:`examples/model_compress/pruning/v2/norm_pruning_torch.py <examples/model_compress/pruning/v2/norm_pruning_torch.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], def __init__(self, model: Module, config_list: List[Dict],
...@@ -338,11 +374,18 @@ class L2NormPruner(NormPruner): ...@@ -338,11 +374,18 @@ class L2NormPruner(NormPruner):
class FPGMPruner(BasicPruner): class FPGMPruner(BasicPruner):
""" r"""
FPGM pruner prunes the blocks of the weight on the first dimension with the smallest geometric median.
FPGM chooses the weight blocks with the most replaceable contribution.
For more details, please refer to `Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration <https://arxiv.org/abs/1811.00250>`__.
FPGM pruner also supports dependency-aware mode.
Parameters Parameters
---------- ----------
model : torch.nn.Module model : torch.nn.Module
Model to be pruned Model to be pruned.
config_list : List[Dict] config_list : List[Dict]
Supported keys: Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed. - sparsity : This is to specify the sparsity for each layer in this config to be compressed.
...@@ -363,6 +406,16 @@ class FPGMPruner(BasicPruner): ...@@ -363,6 +406,16 @@ class FPGMPruner(BasicPruner):
dummy_input : Optional[torch.Tensor] dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model. should on the same device with the model.
Examples
--------
>>> model = ...
>>> from nni.algorithms.compression.v2.pytorch.pruning import FPGMPruner
>>> config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
>>> pruner = FPGMPruner(model, config_list)
>>> masked_model, masks = pruner.compress()
For detailed example please refer to :githublink:`examples/model_compress/pruning/v2/fpgm_pruning_torch.py <examples/model_compress/pruning/v2/fpgm_pruning_torch.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], def __init__(self, model: Module, config_list: List[Dict],
...@@ -396,11 +449,16 @@ class FPGMPruner(BasicPruner): ...@@ -396,11 +449,16 @@ class FPGMPruner(BasicPruner):
class SlimPruner(BasicPruner): class SlimPruner(BasicPruner):
""" r"""
Slim pruner adds sparsity regularization on the scaling factors of batch normalization (BN) layers during training to identify unimportant channels.
The channels with small scaling factor values will be pruned.
For more details, please refer to `Learning Efficient Convolutional Networks through Network Slimming <https://arxiv.org/abs/1708.06519>`__\.
Parameters Parameters
---------- ----------
model : torch.nn.Module model : torch.nn.Module
Model to be pruned Model to be pruned.
config_list : List[Dict] config_list : List[Dict]
Supported keys: Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed. - sparsity : This is to specify the sparsity for each layer in this config to be compressed.
...@@ -432,7 +490,7 @@ class SlimPruner(BasicPruner): ...@@ -432,7 +490,7 @@ class SlimPruner(BasicPruner):
model.train(mode=training) model.train(mode=training)
traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer) traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer)
The traced optimizer instance which the optimizer class is wrapped by nni.trace. The traced optimizer instance which the optimizer class is wrapped by nni.trace.
E.g. traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters()). E.g. ``traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters())``.
criterion : Callable[[Tensor, Tensor], Tensor] criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss. The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_epochs : int training_epochs : int
...@@ -444,6 +502,21 @@ class SlimPruner(BasicPruner): ...@@ -444,6 +502,21 @@ class SlimPruner(BasicPruner):
If prune the model in a global way, all layer weights with same config will be considered uniformly. If prune the model in a global way, all layer weights with same config will be considered uniformly.
That means a single layer may not reach or exceed the sparsity setting in config, That means a single layer may not reach or exceed the sparsity setting in config,
but the total pruned weights meet the sparsity setting. but the total pruned weights meet the sparsity setting.
Examples
--------
>>> import nni
>>> from nni.algorithms.compression.v2.pytorch.pruning import SlimPruner
>>> model = ...
>>> # make sure you have used nni.trace to wrap the optimizer class before initialize
>>> traced_optimizer = nni.trace(torch.optim.Adam)(model.parameters())
>>> trainer = ...
>>> criterion = ...
>>> config_list = [{ 'sparsity': 0.8, 'op_types': ['BatchNorm2d'] }]
>>> pruner = SlimPruner(model, config_list, trainer, traced_optimizer, criterion, training_epochs=1)
>>> masked_model, masks = pruner.compress()
For detailed example please refer to :githublink:`examples/model_compress/pruning/v2/slim_pruning_torch.py <examples/model_compress/pruning/v2/slim_pruning_torch.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None], def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None],
...@@ -507,7 +580,7 @@ class ActivationPruner(BasicPruner): ...@@ -507,7 +580,7 @@ class ActivationPruner(BasicPruner):
Parameters Parameters
---------- ----------
model : torch.nn.Module model : torch.nn.Module
Model to be pruned Model to be pruned.
config_list : List[Dict] config_list : List[Dict]
Supported keys: Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed. - sparsity : This is to specify the sparsity for each layer in this config to be compressed.
...@@ -537,7 +610,7 @@ class ActivationPruner(BasicPruner): ...@@ -537,7 +610,7 @@ class ActivationPruner(BasicPruner):
model.train(mode=training) model.train(mode=training)
traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer) traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer)
The traced optimizer instance which the optimizer class is wrapped by nni.trace. The traced optimizer instance which the optimizer class is wrapped by nni.trace.
E.g. traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters()). E.g. ``traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters())``.
criterion : Callable[[Tensor, Tensor], Tensor] criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss. The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_batches training_batches
...@@ -627,6 +700,82 @@ class ActivationPruner(BasicPruner): ...@@ -627,6 +700,82 @@ class ActivationPruner(BasicPruner):
class ActivationAPoZRankPruner(ActivationPruner): class ActivationAPoZRankPruner(ActivationPruner):
r"""
Activation APoZ rank pruner is a pruner which prunes on the first weight dimension,
with the smallest importance criterion ``APoZ`` calculated from the output activations of convolution layers to achieve a preset level of network sparsity.
The pruning criterion ``APoZ`` is explained in the paper `Network Trimming: A Data-Driven Neuron Pruning Approach towards Efficient Deep Architectures <https://arxiv.org/abs/1607.03250>`__.
The APoZ is defined as:
:math:`APoZ_{c}^{(i)} = APoZ\left(O_{c}^{(i)}\right)=\frac{\sum_{k}^{N} \sum_{j}^{M} f\left(O_{c, j}^{(i)}(k)=0\right)}{N \times M}`
Activation APoZ rank pruner also supports dependency-aware mode.
Parameters
----------
model : torch.nn.Module
Model to be pruned.
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in ActivationAPoZRankPruner.
- op_names : Operation names to be pruned.
- op_partial_names: Operation partial names to be pruned, will be autocompleted by NNI.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
trainer : Callable[[Module, Optimizer, Callable], None]
A callable function used to train model or just inference. Take model, optimizer, criterion as input.
The model will be trained or inferenced `training_epochs` epochs.
Example::
def trainer(model: Module, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor]):
training = model.training
model.train(mode=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
# If you don't want to update the model, you can skip `optimizer.step()`, and set train mode False.
optimizer.step()
model.train(mode=training)
traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer)
The traced optimizer instance which the optimizer class is wrapped by nni.trace.
E.g. ``traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters())``..
criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_batches
The batch number used to collect activations.
mode : str
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the activation-based metrics and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
Examples
--------
>>> import nni
>>> from nni.algorithms.compression.v2.pytorch.pruning import ActivationAPoZRankPruner
>>> model = ...
>>> # make sure you have used nni.trace to wrap the optimizer class before initialize
>>> traced_optimizer = nni.trace(torch.optim.Adam)(model.parameters())
>>> trainer = ...
>>> criterion = ...
>>> config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
>>> pruner = ActivationAPoZRankPruner(model, config_list, trainer, traced_optimizer, criterion, training_batches=20)
>>> masked_model, masks = pruner.compress()
For detailed example please refer to :githublink:`examples/model_compress/pruning/v2/activation_pruning_torch.py <examples/model_compress/pruning/v2/activation_pruning_torch.py>`
"""
def _activation_trans(self, output: Tensor) -> Tensor: def _activation_trans(self, output: Tensor) -> Tensor:
# return a matrix that the position of zero in `output` is one, others is zero. # return a matrix that the position of zero in `output` is one, others is zero.
return torch.eq(self._activation(output.detach()), torch.zeros_like(output)).type_as(output) return torch.eq(self._activation(output.detach()), torch.zeros_like(output)).type_as(output)
...@@ -636,6 +785,80 @@ class ActivationAPoZRankPruner(ActivationPruner): ...@@ -636,6 +785,80 @@ class ActivationAPoZRankPruner(ActivationPruner):
class ActivationMeanRankPruner(ActivationPruner): class ActivationMeanRankPruner(ActivationPruner):
r"""
Activation mean rank pruner is a pruner which prunes on the first weight dimension,
with the smallest importance criterion ``mean activation`` calculated from the output activations of convolution layers to achieve a preset level of network sparsity.
The pruning criterion ``mean activation`` is explained in section 2.2 of the paper `Pruning Convolutional Neural Networks for Resource Efficient Inference <https://arxiv.org/abs/1611.06440>`__.
Activation mean rank pruner also supports dependency-aware mode.
Parameters
----------
model : torch.nn.Module
Model to be pruned.
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in ActivationPruner.
- op_names : Operation names to be pruned.
- op_partial_names: Operation partial names to be pruned, will be autocompleted by NNI.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
trainer : Callable[[Module, Optimizer, Callable], None]
A callable function used to train model or just inference. Take model, optimizer, criterion as input.
The model will be trained or inferenced `training_epochs` epochs.
Example::
def trainer(model: Module, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor]):
training = model.training
model.train(mode=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
# If you don't want to update the model, you can skip `optimizer.step()`, and set train mode False.
optimizer.step()
model.train(mode=training)
traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer)
The traced optimizer instance which the optimizer class is wrapped by nni.trace.
E.g. ``traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters())``..
criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_batches
The batch number used to collect activations.
mode : str
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the activation-based metrics and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
Examples
--------
>>> import nni
>>> from nni.algorithms.compression.v2.pytorch.pruning import ActivationMeanRankPruner
>>> model = ...
>>> # make sure you have used nni.trace to wrap the optimizer class before initialize
>>> traced_optimizer = nni.trace(torch.optim.Adam)(model.parameters())
>>> trainer = ...
>>> criterion = ...
>>> config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
>>> pruner = ActivationMeanRankPruner(model, config_list, trainer, traced_optimizer, criterion, training_batches=20)
>>> masked_model, masks = pruner.compress()
For detailed example please refer to :githublink:`examples/model_compress/pruning/v2/activation_pruning_torch.py <examples/model_compress/pruning/v2/activation_pruning_torch.py>`
"""
def _activation_trans(self, output: Tensor) -> Tensor: def _activation_trans(self, output: Tensor) -> Tensor:
# return the activation of `output` directly. # return the activation of `output` directly.
return self._activation(output.detach()) return self._activation(output.detach())
...@@ -645,11 +868,21 @@ class ActivationMeanRankPruner(ActivationPruner): ...@@ -645,11 +868,21 @@ class ActivationMeanRankPruner(ActivationPruner):
class TaylorFOWeightPruner(BasicPruner): class TaylorFOWeightPruner(BasicPruner):
""" r"""
Taylor FO weight pruner is a pruner which prunes on the first weight dimension,
based on estimated importance calculated from the first order taylor expansion on weights to achieve a preset level of network sparsity.
The estimated importance is defined as the paper `Importance Estimation for Neural Network Pruning <http://jankautz.com/publications/Importance4NNPruning_CVPR19.pdf>`__.
:math:`\widehat{\mathcal{I}}_{\mathcal{S}}^{(1)}(\mathbf{W}) \triangleq \sum_{s \in \mathcal{S}} \mathcal{I}_{s}^{(1)}(\mathbf{W})=\sum_{s \in \mathcal{S}}\left(g_{s} w_{s}\right)^{2}`
Taylor FO weight pruner also supports dependency-aware mode.
What's more, we provide a global-sort mode for this pruner which is aligned with paper implementation.
Parameters Parameters
---------- ----------
model : torch.nn.Module model : torch.nn.Module
Model to be pruned Model to be pruned.
config_list : List[Dict] config_list : List[Dict]
Supported keys: Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed. - sparsity : This is to specify the sparsity for each layer in this config to be compressed.
...@@ -681,7 +914,7 @@ class TaylorFOWeightPruner(BasicPruner): ...@@ -681,7 +914,7 @@ class TaylorFOWeightPruner(BasicPruner):
model.train(mode=training) model.train(mode=training)
traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer) traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer)
The traced optimizer instance which the optimizer class is wrapped by nni.trace. The traced optimizer instance which the optimizer class is wrapped by nni.trace.
E.g. traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters()). E.g. ``traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters())``.
criterion : Callable[[Tensor, Tensor], Tensor] criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss. The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_batches : int training_batches : int
...@@ -703,6 +936,21 @@ class TaylorFOWeightPruner(BasicPruner): ...@@ -703,6 +936,21 @@ class TaylorFOWeightPruner(BasicPruner):
dummy_input : Optional[torch.Tensor] dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model. should on the same device with the model.
Examples
--------
>>> import nni
>>> from nni.algorithms.compression.v2.pytorch.pruning import TaylorFOWeightPruner
>>> model = ...
>>> # make sure you have used nni.trace to wrap the optimizer class before initialize
>>> traced_optimizer = nni.trace(torch.optim.Adam)(model.parameters())
>>> trainer = ...
>>> criterion = ...
>>> config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
>>> pruner = TaylorFOWeightPruner(model, config_list, trainer, traced_optimizer, criterion, training_batches=20)
>>> masked_model, masks = pruner.compress()
For detailed example please refer to :githublink:`examples/model_compress/pruning/v2/taylorfo_pruning_torch.py <examples/model_compress/pruning/v2/taylorfo_pruning_torch.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None], def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None],
...@@ -772,13 +1020,17 @@ class TaylorFOWeightPruner(BasicPruner): ...@@ -772,13 +1020,17 @@ class TaylorFOWeightPruner(BasicPruner):
class ADMMPruner(BasicPruner): class ADMMPruner(BasicPruner):
""" r"""
ADMM (Alternating Direction Method of Multipliers) Pruner is a kind of mathematical optimization technique. Alternating Direction Method of Multipliers (ADMM) is a mathematical optimization technique,
The metric used in this pruner is the absolute value of the weight. by decomposing the original nonconvex problem into two subproblems that can be solved iteratively.
In each iteration, the weight with small magnitudes will be set to zero. In weight pruning problem, these two subproblems are solved via 1) gradient descent algorithm and 2) Euclidean projection respectively.
Only in the final iteration, the mask will be generated and apply to model wrapper.
During the process of solving these two subproblems, the weights of the original model will be changed.
Then a fine-grained pruning will be applied to prune the model according to the config list given.
The original paper refer to: https://arxiv.org/abs/1804.03294. This solution framework applies both to non-structured and different variations of structured pruning schemes.
For more details, please refer to `A Systematic DNN Weight Pruning Framework using Alternating Direction Method of Multipliers <https://arxiv.org/abs/1804.03294>`__.
Parameters Parameters
---------- ----------
...@@ -814,13 +1066,28 @@ class ADMMPruner(BasicPruner): ...@@ -814,13 +1066,28 @@ class ADMMPruner(BasicPruner):
model.train(mode=training) model.train(mode=training)
traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer) traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer)
The traced optimizer instance which the optimizer class is wrapped by nni.trace. The traced optimizer instance which the optimizer class is wrapped by nni.trace.
E.g. traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters()). E.g. ``traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters())``.
criterion : Callable[[Tensor, Tensor], Tensor] criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss. The criterion function used in trainer. Take model output and target value as input, and return the loss.
iterations : int iterations : int
The total iteration number in admm pruning algorithm. The total iteration number in admm pruning algorithm.
training_epochs : int training_epochs : int
The epoch number for training model in each iteration. The epoch number for training model in each iteration.
Examples
--------
>>> import nni
>>> from nni.algorithms.compression.v2.pytorch.pruning import ADMMPruner
>>> model = ...
>>> # make sure you have used nni.trace to wrap the optimizer class before initialize
>>> traced_optimizer = nni.trace(torch.optim.Adam)(model.parameters())
>>> trainer = ...
>>> criterion = ...
>>> config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
>>> pruner = ADMMPruner(model, config_list, trainer, traced_optimizer, criterion, iterations=10, training_epochs=1)
>>> masked_model, masks = pruner.compress()
For detailed example please refer to :githublink:`examples/model_compress/pruning/v2/admm_pruning_torch.py <examples/model_compress/pruning/v2/admm_pruning_torch.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None], def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None],
......
...@@ -70,7 +70,11 @@ class IterativePruner(PruningScheduler): ...@@ -70,7 +70,11 @@ class IterativePruner(PruningScheduler):
class LinearPruner(IterativePruner): class LinearPruner(IterativePruner):
""" r"""
Linear pruner is an iterative pruner, it will increase sparsity evenly from scratch during each iteration.
For example, the final sparsity is set as 0.5, and the iteration number is 5, then the sparsity used in each iteration are ``[0, 0.1, 0.2, 0.3, 0.4, 0.5]``.
Parameters Parameters
---------- ----------
model : Module model : Module
...@@ -98,6 +102,17 @@ class LinearPruner(IterativePruner): ...@@ -98,6 +102,17 @@ class LinearPruner(IterativePruner):
If evaluator is None, the best result refers to the latest result. If evaluator is None, the best result refers to the latest result.
pruning_params : Dict pruning_params : Dict
If the chosen pruning_algorithm has extra parameters, put them as a dict to pass in. If the chosen pruning_algorithm has extra parameters, put them as a dict to pass in.
Examples
--------
>>> from nni.algorithms.compression.v2.pytorch.pruning import LinearPruner
>>> config_list = [{'sparsity': 0.8, 'op_types': ['Conv2d']}]
>>> finetuner = ...
>>> pruner = LinearPruner(model, config_list, pruning_algorithm='l1', total_iteration=10, finetuner=finetuner)
>>> pruner.compress()
>>> _, model, masks, _, _ = pruner.get_best_result()
For detailed example please refer to :githublink:`examples/model_compress/pruning/v2/iterative_pruning_torch.py <examples/model_compress/pruning/v2/iterative_pruning_torch.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], pruning_algorithm: str, def __init__(self, model: Module, config_list: List[Dict], pruning_algorithm: str,
...@@ -117,7 +132,14 @@ class LinearPruner(IterativePruner): ...@@ -117,7 +132,14 @@ class LinearPruner(IterativePruner):
class AGPPruner(IterativePruner): class AGPPruner(IterativePruner):
""" r"""
This is an iterative pruner, which the sparsity is increased from an initial sparsity value :math:`s_{i}` (usually 0) to a final sparsity value :math:`s_{f}` over a span of :math:`n` pruning iterations,
starting at training step :math:`t_{0}` and with pruning frequency :math:`\Delta t`:
:math:`s_{t}=s_{f}+\left(s_{i}-s_{f}\right)\left(1-\frac{t-t_{0}}{n \Delta t}\right)^{3} \text { for } t \in\left\{t_{0}, t_{0}+\Delta t, \ldots, t_{0} + n \Delta t\right\}`
For more details please refer to `To prune, or not to prune: exploring the efficacy of pruning for model compression <https://arxiv.org/abs/1710.01878>`__\.
Parameters Parameters
---------- ----------
model : Module model : Module
...@@ -145,6 +167,17 @@ class AGPPruner(IterativePruner): ...@@ -145,6 +167,17 @@ class AGPPruner(IterativePruner):
If evaluator is None, the best result refers to the latest result. If evaluator is None, the best result refers to the latest result.
pruning_params : Dict pruning_params : Dict
If the chosen pruning_algorithm has extra parameters, put them as a dict to pass in. If the chosen pruning_algorithm has extra parameters, put them as a dict to pass in.
Examples
--------
>>> from nni.algorithms.compression.v2.pytorch.pruning import AGPPruner
>>> config_list = [{'sparsity': 0.8, 'op_types': ['Conv2d']}]
>>> finetuner = ...
>>> pruner = AGPPruner(model, config_list, pruning_algorithm='l1', total_iteration=10, finetuner=finetuner)
>>> pruner.compress()
>>> _, model, masks, _, _ = pruner.get_best_result()
For detailed example please refer to :githublink:`examples/model_compress/pruning/v2/iterative_pruning_torch.py <examples/model_compress/pruning/v2/iterative_pruning_torch.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], pruning_algorithm: str, def __init__(self, model: Module, config_list: List[Dict], pruning_algorithm: str,
...@@ -164,7 +197,25 @@ class AGPPruner(IterativePruner): ...@@ -164,7 +197,25 @@ class AGPPruner(IterativePruner):
class LotteryTicketPruner(IterativePruner): class LotteryTicketPruner(IterativePruner):
""" r"""
`The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks <https://arxiv.org/abs/1803.03635>`__\ ,
authors Jonathan Frankle and Michael Carbin,provides comprehensive measurement and analysis,
and articulate the *lottery ticket hypothesis*\ : dense, randomly-initialized, feed-forward networks contain subnetworks (*winning tickets*\ ) that
-- when trained in isolation -- reach test accuracy comparable to the original network in a similar number of iterations.
In this paper, the authors use the following process to prune a model, called *iterative prunning*\ :
..
#. Randomly initialize a neural network f(x;theta_0) (where theta\ *0 follows D*\ {theta}).
#. Train the network for j iterations, arriving at parameters theta_j.
#. Prune p% of the parameters in theta_j, creating a mask m.
#. Reset the remaining parameters to their values in theta_0, creating the winning ticket f(x;m*theta_0).
#. Repeat step 2, 3, and 4.
If the configured final sparsity is P (e.g., 0.8) and there are n times iterative pruning,
each iterative pruning prunes 1-(1-P)^(1/n) of the weights that survive the previous round.
Parameters Parameters
---------- ----------
model : Module model : Module
...@@ -194,6 +245,18 @@ class LotteryTicketPruner(IterativePruner): ...@@ -194,6 +245,18 @@ class LotteryTicketPruner(IterativePruner):
If set True, the model weight will reset to the original model weight at the end of each iteration step. If set True, the model weight will reset to the original model weight at the end of each iteration step.
pruning_params : Dict pruning_params : Dict
If the chosen pruning_algorithm has extra parameters, put them as a dict to pass in. If the chosen pruning_algorithm has extra parameters, put them as a dict to pass in.
Examples
--------
>>> from nni.algorithms.compression.v2.pytorch.pruning import LotteryTicketPruner
>>> config_list = [{'sparsity': 0.8, 'op_types': ['Conv2d']}]
>>> finetuner = ...
>>> pruner = LotteryTicketPruner(model, config_list, pruning_algorithm='l1', total_iteration=10, finetuner=finetuner, reset_weight=True)
>>> pruner.compress()
>>> _, model, masks, _, _ = pruner.get_best_result()
For detailed example please refer to :githublink:`examples/model_compress/pruning/v2/iterative_pruning_torch.py <examples/model_compress/pruning/v2/iterative_pruning_torch.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], pruning_algorithm: str, def __init__(self, model: Module, config_list: List[Dict], pruning_algorithm: str,
...@@ -215,6 +278,19 @@ class LotteryTicketPruner(IterativePruner): ...@@ -215,6 +278,19 @@ class LotteryTicketPruner(IterativePruner):
class SimulatedAnnealingPruner(IterativePruner): class SimulatedAnnealingPruner(IterativePruner):
""" """
We implement a guided heuristic search method, Simulated Annealing (SA) algorithm. As mentioned in the paper, this method is enhanced on guided search based on prior experience.
The enhanced SA technique is based on the observation that a DNN layer with more number of weights often has a higher degree of model compression with less impact on overall accuracy.
* Randomly initialize a pruning rate distribution (sparsities).
* While current_temperature < stop_temperature:
#. generate a perturbation to current distribution
#. Perform fast evaluation on the perturbated distribution
#. accept the perturbation according to the performance and probability, if not accepted, return to step 1
#. cool down, current_temperature <- current_temperature * cool_down_rate
For more details, please refer to `AutoCompress: An Automatic DNN Structured Pruning Framework for Ultra-High Compression Rates <https://arxiv.org/abs/1907.03141>`__.
Parameters Parameters
---------- ----------
model : Module model : Module
...@@ -246,6 +322,19 @@ class SimulatedAnnealingPruner(IterativePruner): ...@@ -246,6 +322,19 @@ class SimulatedAnnealingPruner(IterativePruner):
If set True, speed up the model at the end of each iteration to make the pruned model compact. If set True, speed up the model at the end of each iteration to make the pruned model compact.
dummy_input : Optional[torch.Tensor] dummy_input : Optional[torch.Tensor]
If `speed_up` is True, `dummy_input` is required for tracing the model in speed up. If `speed_up` is True, `dummy_input` is required for tracing the model in speed up.
Examples
--------
>>> from nni.algorithms.compression.v2.pytorch.pruning import SimulatedAnnealingPruner
>>> model = ...
>>> config_list = [{'sparsity': 0.8, 'op_types': ['Conv2d']}]
>>> evaluator = ...
>>> finetuner = ...
>>> pruner = SimulatedAnnealingPruner(model, config_list, pruning_algorithm='l1', evaluator=evaluator, cool_down_rate=0.9, finetuner=finetuner)
>>> pruner.compress()
>>> _, model, masks, _, _ = pruner.get_best_result()
For detailed example please refer to :githublink:`examples/model_compress/pruning/v2/simulated_anealing_pruning_torch.py <examples/model_compress/pruning/v2/simulated_anealing_pruning_torch.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], evaluator: Callable[[Module], float], start_temperature: float = 100, def __init__(self, model: Module, config_list: List[Dict], evaluator: Callable[[Module], float], start_temperature: float = 100,
......
...@@ -124,7 +124,21 @@ class WeightScoreTrainerBasedDataCollector(TrainerBasedDataCollector): ...@@ -124,7 +124,21 @@ class WeightScoreTrainerBasedDataCollector(TrainerBasedDataCollector):
class MovementPruner(BasicPruner): class MovementPruner(BasicPruner):
""" r"""
Movement pruner is an implementation of movement pruning.
This is a "fine-pruning" algorithm, which means the masks may change during each fine-tuning step.
Each weight element will be scored by the opposite of the sum of the product of weight and its gradient during each step.
This means the weight elements moving towards zero will accumulate negative scores, the weight elements moving away from zero will accumulate positive scores.
The weight elements with low scores will be masked during inference.
The following figure from the paper shows the weight pruning by movement pruning.
.. image:: ../../img/movement_pruning.png
:target: ../../img/movement_pruning.png
:alt:
For more details, please refer to `Movement Pruning: Adaptive Sparsity by Fine-Tuning <https://arxiv.org/abs/2005.07683>`__.
Parameters Parameters
---------- ----------
model : torch.nn.Module model : torch.nn.Module
...@@ -158,7 +172,7 @@ class MovementPruner(BasicPruner): ...@@ -158,7 +172,7 @@ class MovementPruner(BasicPruner):
model.train(mode=training) model.train(mode=training)
traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer) traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer)
The traced optimizer instance which the optimizer class is wrapped by nni.trace. The traced optimizer instance which the optimizer class is wrapped by nni.trace.
E.g. traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters()). E.g. ``traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters())``.
criterion : Callable[[Tensor, Tensor], Tensor] criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss. The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_epochs : int training_epochs : int
...@@ -171,6 +185,21 @@ class MovementPruner(BasicPruner): ...@@ -171,6 +185,21 @@ class MovementPruner(BasicPruner):
The number of steps at which sparsity stops growing, note that the sparsity stop growing doesn't mean masks not changed. The number of steps at which sparsity stops growing, note that the sparsity stop growing doesn't mean masks not changed.
The sparsity after each `optimizer.step()` is: The sparsity after each `optimizer.step()` is:
total_sparsity * (1 - (1 - (current_step - warm_up_step) / (cool_down_beginning_step - warm_up_step)) ** 3). total_sparsity * (1 - (1 - (current_step - warm_up_step) / (cool_down_beginning_step - warm_up_step)) ** 3).
Examples
--------
>>> import nni
>>> from nni.algorithms.compression.v2.pytorch.pruning import MovementPruner
>>> model = ...
>>> # make sure you have used nni.trace to wrap the optimizer class before initialize
>>> traced_optimizer = nni.trace(torch.optim.Adam)(model.parameters())
>>> trainer = ...
>>> criterion = ...
>>> config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
>>> pruner = MovementPruner(model, config_list, trainer, traced_optimizer, criterion, 10, 3000, 27000)
>>> masked_model, masks = pruner.compress()
For detailed example please refer to :githublink:`examples/model_compress/pruning/v2/movement_pruning_glue.py <examples/model_compress/pruning/v2/movement_pruning_glue.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None], def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None],
traced_optimizer: Traceable, criterion: Callable[[Tensor, Tensor], Tensor], training_epochs: int, warm_up_step: int, traced_optimizer: Traceable, criterion: Callable[[Tensor, Tensor], Tensor], training_epochs: int, warm_up_step: int,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment