"docs/archive_en_US/Tutorial/SearchSpaceSpec.md" did not exist on "171ae9181cb7061378080343cb333957f4ce7f29"
Unverified Commit b8d19e45 authored by colorjam's avatar colorjam Committed by GitHub
Browse files

fix activation collection and add gradient pruners (#2187)

parent 4e2c0aad
......@@ -13,6 +13,8 @@ Index of supported pruning algorithms
* [Filter Pruners with Activation Rank](#activationrankfilterpruner)
* [APoZ Rank Pruner](#activationapozrankfilterpruner)
* [Activation Mean Rank Pruner](#activationmeanrankfilterpruner)
* [Filter Pruners with Gradient Rank](#gradientrankfilterpruner)
* [Taylor FO On Weight Pruner](#taylorfoweightfilterpruner)
## Level Pruner
......@@ -281,7 +283,7 @@ pruner.compress()
- **op_types:** Only Conv1d and Conv2d is supported in L2Filter Pruner
## ActivationRankFilterPruner
ActivationRankFilterPruner is a series of pruners which prune the filters with the smallest importance criterion calculated from the output activations of convolution layers to achieve a preset level of network sparsity
ActivationRankFilterPruner is a series of pruners which prune the filters with the smallest importance criterion calculated from the output activations of convolution layers to achieve a preset level of network sparsity.
### ActivationAPoZRankFilterPruner
......@@ -341,4 +343,42 @@ You can view example for more information
#### User configuration for ActivationMeanRankFilterPruner
- **sparsity:** How much percentage of convolutional filters are to be pruned.
- **op_types:** Only Conv2d is supported in ActivationMeanRankFilterPruner
- **op_types:** Only Conv2d is supported in ActivationMeanRankFilterPruner.
## GradientRankFilterPruner
GradientRankFilterPruner is a series of pruners which prune the filters with the smallest importance criterion calculated from the gradients of convolution layers to achieve a preset level of network sparsity.
### TaylorFOWeightFilterPruner
We implemented it as a one-shot pruner, it prunes convolutional layers based on the first order taylor expansion on weights. The estimated importance of filters is defined as the paper [Importance Estimation for Neural Network Pruning](http://jankautz.com/publications/Importance4NNPruning_CVPR19.pdf). Other pruning criteria mentioned in this paper will be supported in future release.
>
![](../../img/importance_estimation_sum.png)
#### Usage
PyTorch code
```python
from nni.compression.torch import TaylorFOWeightFilterPruner
config_list = [{
'sparsity': 0.5,
'op_types': ['Conv2d']
}]
pruner = TaylorFOWeightFilterPruner(model, config_list, optimizer)
pruner.compress()
```
You can view example for more information
#### User configuration for GradientWeightSumFilterPruner
- **sparsity:** How much percentage of convolutional filters are to be pruned.
- **op_types:** Currently only Conv2d is supported in TaylorFOWeightFilterPruner.
 
\ No newline at end of file
......@@ -7,3 +7,4 @@ from .weight_rank_filter_pruners import *
from .activation_rank_filter_pruners import *
from .quantizers import *
from .apply_compression import apply_compression_results
from .gradient_rank_filter_pruners import *
......@@ -37,13 +37,9 @@ class ActivationRankFilterPruner(Pruner):
super().__init__(model, config_list, optimizer)
self.set_wrappers_attribute("if_calculated", False)
self.set_wrappers_attribute("collected_activation", [])
self.statistics_batch_num = statistics_batch_num
self.hook_id = self._add_activation_collector()
def collector(module_, input_, output):
if len(module_.collected_activation) < self.statistics_batch_num:
module_.collected_activation.append(self.activation(output.detach().cpu()))
self.add_activation_collector(collector)
assert activation in ['relu', 'relu6']
if activation == 'relu':
self.activation = torch.nn.functional.relu
......@@ -52,6 +48,21 @@ class ActivationRankFilterPruner(Pruner):
else:
self.activation = None
def _add_activation_collector(self):
def collector(collected_activation):
def hook(module_, input_, output):
collected_activation.append(self.activation(output.detach().cpu()))
return hook
self.collected_activation = {}
self._fwd_hook_id += 1
self._fwd_hook_handles[self._fwd_hook_id] = []
for wrapper_idx, wrapper in enumerate(self.get_modules_wrapper()):
self.collected_activation[wrapper_idx] = []
handle = wrapper.register_forward_hook(collector(self.collected_activation[wrapper_idx]))
self._fwd_hook_handles[self._fwd_hook_id].append(handle)
return self._fwd_hook_id
def validate_config(self, model, config_list):
"""
Parameters
......@@ -73,24 +84,21 @@ class ActivationRankFilterPruner(Pruner):
def get_mask(self, base_mask, activations, num_prune):
raise NotImplementedError('{} get_mask is not implemented'.format(self.__class__.__name__))
def calc_mask(self, wrapper, **kwargs):
def calc_mask(self, wrapper, wrapper_idx, **kwargs):
"""
Calculate the mask of given layer.
Filters with the smallest importance criterion which is calculated from the activation are masked.
Parameters
----------
layer : LayerInfo
wrapper : Module
the layer to instrument the compression operation
config : dict
layer's pruning config
Returns
-------
dict
dictionary for storing masks
"""
weight = wrapper.module.weight.data
op_type = wrapper.type
config = wrapper.config
......@@ -100,21 +108,27 @@ class ActivationRankFilterPruner(Pruner):
if wrapper.if_calculated:
return None
mask_weight = torch.ones(weight.size()).type_as(weight).detach()
if hasattr(wrapper.module, 'bias') and wrapper.module.bias is not None:
mask_bias = torch.ones(wrapper.module.bias.size()).type_as(wrapper.module.bias).detach()
else:
mask_bias = None
mask = {'weight_mask': mask_weight, 'bias_mask': mask_bias}
try:
filters = weight.size(0)
num_prune = int(filters * config.get('sparsity'))
if filters < 2 or num_prune < 1 or len(wrapper.collected_activation) < self.statistics_batch_num:
acts = self.collected_activation[wrapper_idx]
if filters < 2 or num_prune < 1 or len(acts) < self.statistics_batch_num:
return mask
mask = self.get_mask(mask, wrapper.collected_activation, num_prune)
mask = self.get_mask(mask, acts, num_prune)
finally:
if len(wrapper.collected_activation) == self.statistics_batch_num:
if len(acts) >= self.statistics_batch_num:
wrapper.if_calculated = True
if self.hook_id in self._fwd_hook_handles:
self.remove_activation_collector(self.hook_id)
return mask
......@@ -148,7 +162,7 @@ class ActivationAPoZRankFilterPruner(ActivationRankFilterPruner):
def get_mask(self, base_mask, activations, num_prune):
"""
Calculate the mask of given layer.
Filters with the smallest APoZ(average percentage of zeros) of output activations are masked.
Filters with the largest APoZ(average percentage of zeros) of output activations are masked.
Parameters
----------
......
......@@ -314,8 +314,8 @@ class Pruner(Compressor):
return self.bound_model
def update_mask(self):
for wrapper in self.get_modules_wrapper():
masks = self.calc_mask(wrapper)
for wrapper_idx, wrapper in enumerate(self.get_modules_wrapper()):
masks = self.calc_mask(wrapper, wrapper_idx=wrapper_idx)
if masks is not None:
for k in masks:
assert hasattr(wrapper, k), "there is no attribute '%s' in wrapper" % k
......
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import logging
import torch
from .compressor import Pruner
__all__ = ['TaylorFOWeightFilterPruner']
logger = logging.getLogger('torch gradient rank filter pruners')
class GradientRankFilterPruner(Pruner):
"""
A structured pruning base class that prunes the filters with the smallest
importance criterion in convolution layers (using gradient values)
to achieve a preset level of network sparsity.
"""
def __init__(self, model, config_list, optimizer, statistics_batch_num=1):
"""
Parameters
----------
model : torch.nn.module
Model to be pruned
config_list : list
support key for each list item:
- sparsity: percentage of convolutional filters to be pruned.
optimizer: torch.optim.Optimizer
Optimizer used to train model
statistics_batch_num : int
Num of batches for calculating contribution
"""
super().__init__(model, config_list, optimizer)
self.set_wrappers_attribute("if_calculated", False)
self.set_wrappers_attribute("contribution", None)
self.statistics_batch_num = statistics_batch_num
self.iterations = 0
self.old_step = self.optimizer.step
self.patch_optimizer(self.calc_contributions)
def calc_contributions(self):
raise NotImplementedError('{} calc_contributions is not implemented'.format(self.__class__.__name__))
def get_mask(self, base_mask, contribution, num_prune):
raise NotImplementedError('{} get_mask is not implemented'.format(self.__class__.__name__))
def calc_mask(self, wrapper, **kwargs):
"""
Calculate the mask of given layer.
Filters with the smallest importance criterion which is calculated from the activation are masked.
Parameters
----------
wrapper : Module
the layer to instrument the compression operation
Returns
-------
dict
dictionary for storing masks
"""
weight = wrapper.module.weight.data
op_type = wrapper.type
config = wrapper.config
assert 0 <= config.get('sparsity') < 1, "sparsity must in the range [0, 1)"
assert op_type in config.get('op_types')
if wrapper.if_calculated:
return None
mask_weight = torch.ones(weight.size()).type_as(weight).detach()
if hasattr(wrapper.module, 'bias') and wrapper.module.bias is not None:
mask_bias = torch.ones(wrapper.module.bias.size()).type_as(wrapper.module.bias).detach()
else:
mask_bias = None
mask = {'weight_mask': mask_weight, 'bias_mask': mask_bias}
try:
filters = weight.size(0)
num_prune = int(filters * config.get('sparsity'))
if filters < 2 or num_prune < 1 or self.iterations < self.statistics_batch_num:
return mask
mask = self.get_mask(mask, wrapper.contribution, num_prune)
finally:
if self.iterations >= self.statistics_batch_num:
wrapper.if_calculated = True
return mask
class TaylorFOWeightFilterPruner(GradientRankFilterPruner):
"""
A structured pruning algorithm that prunes the filters with the smallest
importance approximations based on the first order taylor expansion on the weight.
Molchanov, Pavlo and Mallya, Arun and Tyree, Stephen and Frosio, Iuri and Kautz, Jan,
"Importance Estimation for Neural Network Pruning", CVPR 2019.
http://jankautz.com/publications/Importance4NNPruning_CVPR19.pdf
"""
def __init__(self, model, config_list, optimizer, statistics_batch_num=1):
"""
Parameters
----------
model : torch.nn.module
Model to be pruned
config_list : list
support key for each list item:
- sparsity: percentage of convolutional filters to be pruned.
optimizer: torch.optim.Optimizer
Optimizer used to train model
statistics_batch_num : int
Num of batches for activation statistics
"""
super().__init__(model, config_list, optimizer, statistics_batch_num)
def get_mask(self, base_mask, contribution, num_prune):
"""
Calculate the mask of given layer.
Filters with the smallest importance approximations are masked.
Parameters
----------
base_mask : dict
The basic mask with the same shape of weight, all item in the basic mask is 1.
contribution : torch.Tensor
Layer's importance approximations
num_prune : int
Num of filters to prune
Returns
-------
dict
dictionary for storing masks
"""
prune_indices = torch.argsort(contribution)[:num_prune]
for idx in prune_indices:
base_mask['weight_mask'][idx] = 0.
if base_mask['bias_mask'] is not None:
base_mask['bias_mask'][idx] = 0.
return base_mask
def calc_contributions(self):
"""
Calculate the estimated importance of filters as a sum of individual contribution
based on the first order taylor expansion.
"""
if self.iterations >= self.statistics_batch_num:
return
for wrapper in self.get_modules_wrapper():
filters = wrapper.module.weight.size(0)
contribution = (wrapper.module.weight*wrapper.module.weight.grad).data.pow(2).view(filters, -1).sum(dim=1)
if wrapper.contribution is None:
wrapper.contribution = contribution
else:
wrapper.contribution += contribution
self.iterations += 1
......@@ -60,10 +60,8 @@ class WeightRankFilterPruner(Pruner):
Filters with the smallest importance criterion of the kernel weights are masked.
Parameters
----------
layer : LayerInfo
the layer to instrument the compression operation
config : dict
layer's pruning config
wrapper : Module
the module to instrument the compression operation
Returns
-------
dict
......
......@@ -228,6 +228,52 @@ class CompressorTestCase(TestCase):
assert all(mask1['bias_mask'].numpy() == np.array([0., 0., 0., 1., 1.]))
assert all(mask2['bias_mask'].numpy() == np.array([0., 0., 0., 1., 1.]))
def test_torch_taylorFOweight_pruner(self):
"""
Filters with the minimum importance approxiamtion based on the first order
taylor expansion on the weights (w*grad)**2 are pruned in this paper:
Importance Estimation for Neural Network Pruning,
http://jankautz.com/publications/Importance4NNPruning_CVPR19.pdf
So if sparsity of conv1 is 0.2, the expected masks should mask out filter 0, this can be verified through:
`all(torch.sum(mask1['weight_mask'], (1, 2, 3)).numpy() == np.array([0., 25., 25., 25., 25.]))`
If sparsity of conv2 is 0.6, the expected masks should mask out filter 4,5,6,7,8,9 this can be verified through:
`all(torch.sum(mask2['weight_mask'], (1, 2, 3)).numpy() == np.array([125., 125., 125., 125., 0., 0., 0., 0., 0., 0., ]))`
"""
w1 = np.array([np.zeros((1, 5, 5)), np.ones((1, 5, 5)), np.ones((1, 5, 5)) * 2,
np.ones((1, 5, 5)) * 3, np.ones((1, 5, 5)) * 4])
w2 = np.array([[[[i + 1] * 5] * 5] * 5 for i in range(10)[::-1]])
grad1 = np.array([np.ones((1, 5, 5)) * -1, np.ones((1, 5, 5)) * 1, np.ones((1, 5, 5)) * -1,
np.ones((1, 5, 5)) * 1, np.ones((1, 5, 5)) * -1])
grad2 = np.array([[[[(-1)**i] * 5] * 5] * 5 for i in range(10)])
config_list = [{'sparsity': 0.2, 'op_types': ['Conv2d'], 'op_names': ['conv1']},
{'sparsity': 0.6, 'op_types': ['Conv2d'], 'op_names': ['conv2']}]
model = TorchModel()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
pruner = torch_compressor.TaylorFOWeightFilterPruner(model, config_list, optimizer, statistics_batch_num=1)
x = torch.rand((1, 1, 28, 28), requires_grad=True)
model.conv1.module.weight.data = torch.tensor(w1).float()
model.conv2.module.weight.data = torch.tensor(w2).float()
y = model(x)
y.backward(torch.ones_like(y))
model.conv1.module.weight.grad.data = torch.tensor(grad1).float()
model.conv2.module.weight.grad.data = torch.tensor(grad2).float()
optimizer.step()
mask1 = pruner.calc_mask(model.conv1)
mask2 = pruner.calc_mask(model.conv2)
assert all(torch.sum(mask1['weight_mask'], (1, 2, 3)).numpy() == np.array([0., 25., 25., 25., 25.]))
assert all(torch.sum(mask2['weight_mask'], (1, 2, 3)).numpy() == np.array([125., 125., 125., 125., 0., 0., 0., 0., 0., 0., ]))
def test_torch_QAT_quantizer(self):
model = TorchModel()
config_list = [{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment