"tools/git@developer.sourcefind.cn:OpenDAS/nni.git" did not exist on "83684388fd1efc433e6d64207a24e451e6f4697e"
Unverified Commit b8d19e45 authored by colorjam's avatar colorjam Committed by GitHub
Browse files

fix activation collection and add gradient pruners (#2187)

parent 4e2c0aad
...@@ -13,6 +13,8 @@ Index of supported pruning algorithms ...@@ -13,6 +13,8 @@ Index of supported pruning algorithms
* [Filter Pruners with Activation Rank](#activationrankfilterpruner) * [Filter Pruners with Activation Rank](#activationrankfilterpruner)
* [APoZ Rank Pruner](#activationapozrankfilterpruner) * [APoZ Rank Pruner](#activationapozrankfilterpruner)
* [Activation Mean Rank Pruner](#activationmeanrankfilterpruner) * [Activation Mean Rank Pruner](#activationmeanrankfilterpruner)
* [Filter Pruners with Gradient Rank](#gradientrankfilterpruner)
* [Taylor FO On Weight Pruner](#taylorfoweightfilterpruner)
## Level Pruner ## Level Pruner
...@@ -281,7 +283,7 @@ pruner.compress() ...@@ -281,7 +283,7 @@ pruner.compress()
- **op_types:** Only Conv1d and Conv2d is supported in L2Filter Pruner - **op_types:** Only Conv1d and Conv2d is supported in L2Filter Pruner
## ActivationRankFilterPruner ## ActivationRankFilterPruner
ActivationRankFilterPruner is a series of pruners which prune the filters with the smallest importance criterion calculated from the output activations of convolution layers to achieve a preset level of network sparsity ActivationRankFilterPruner is a series of pruners which prune the filters with the smallest importance criterion calculated from the output activations of convolution layers to achieve a preset level of network sparsity.
### ActivationAPoZRankFilterPruner ### ActivationAPoZRankFilterPruner
...@@ -341,4 +343,42 @@ You can view example for more information ...@@ -341,4 +343,42 @@ You can view example for more information
#### User configuration for ActivationMeanRankFilterPruner #### User configuration for ActivationMeanRankFilterPruner
- **sparsity:** How much percentage of convolutional filters are to be pruned. - **sparsity:** How much percentage of convolutional filters are to be pruned.
- **op_types:** Only Conv2d is supported in ActivationMeanRankFilterPruner - **op_types:** Only Conv2d is supported in ActivationMeanRankFilterPruner.
## GradientRankFilterPruner
GradientRankFilterPruner is a series of pruners which prune the filters with the smallest importance criterion calculated from the gradients of convolution layers to achieve a preset level of network sparsity.
### TaylorFOWeightFilterPruner
We implemented it as a one-shot pruner, it prunes convolutional layers based on the first order taylor expansion on weights. The estimated importance of filters is defined as the paper [Importance Estimation for Neural Network Pruning](http://jankautz.com/publications/Importance4NNPruning_CVPR19.pdf). Other pruning criteria mentioned in this paper will be supported in future release.
>
![](../../img/importance_estimation_sum.png)
#### Usage
PyTorch code
```python
from nni.compression.torch import TaylorFOWeightFilterPruner
config_list = [{
'sparsity': 0.5,
'op_types': ['Conv2d']
}]
pruner = TaylorFOWeightFilterPruner(model, config_list, optimizer)
pruner.compress()
```
You can view example for more information
#### User configuration for GradientWeightSumFilterPruner
- **sparsity:** How much percentage of convolutional filters are to be pruned.
- **op_types:** Currently only Conv2d is supported in TaylorFOWeightFilterPruner.
 
\ No newline at end of file
...@@ -7,3 +7,4 @@ from .weight_rank_filter_pruners import * ...@@ -7,3 +7,4 @@ from .weight_rank_filter_pruners import *
from .activation_rank_filter_pruners import * from .activation_rank_filter_pruners import *
from .quantizers import * from .quantizers import *
from .apply_compression import apply_compression_results from .apply_compression import apply_compression_results
from .gradient_rank_filter_pruners import *
...@@ -37,13 +37,9 @@ class ActivationRankFilterPruner(Pruner): ...@@ -37,13 +37,9 @@ class ActivationRankFilterPruner(Pruner):
super().__init__(model, config_list, optimizer) super().__init__(model, config_list, optimizer)
self.set_wrappers_attribute("if_calculated", False) self.set_wrappers_attribute("if_calculated", False)
self.set_wrappers_attribute("collected_activation", [])
self.statistics_batch_num = statistics_batch_num self.statistics_batch_num = statistics_batch_num
self.hook_id = self._add_activation_collector()
def collector(module_, input_, output):
if len(module_.collected_activation) < self.statistics_batch_num:
module_.collected_activation.append(self.activation(output.detach().cpu()))
self.add_activation_collector(collector)
assert activation in ['relu', 'relu6'] assert activation in ['relu', 'relu6']
if activation == 'relu': if activation == 'relu':
self.activation = torch.nn.functional.relu self.activation = torch.nn.functional.relu
...@@ -52,6 +48,21 @@ class ActivationRankFilterPruner(Pruner): ...@@ -52,6 +48,21 @@ class ActivationRankFilterPruner(Pruner):
else: else:
self.activation = None self.activation = None
def _add_activation_collector(self):
def collector(collected_activation):
def hook(module_, input_, output):
collected_activation.append(self.activation(output.detach().cpu()))
return hook
self.collected_activation = {}
self._fwd_hook_id += 1
self._fwd_hook_handles[self._fwd_hook_id] = []
for wrapper_idx, wrapper in enumerate(self.get_modules_wrapper()):
self.collected_activation[wrapper_idx] = []
handle = wrapper.register_forward_hook(collector(self.collected_activation[wrapper_idx]))
self._fwd_hook_handles[self._fwd_hook_id].append(handle)
return self._fwd_hook_id
def validate_config(self, model, config_list): def validate_config(self, model, config_list):
""" """
Parameters Parameters
...@@ -73,24 +84,21 @@ class ActivationRankFilterPruner(Pruner): ...@@ -73,24 +84,21 @@ class ActivationRankFilterPruner(Pruner):
def get_mask(self, base_mask, activations, num_prune): def get_mask(self, base_mask, activations, num_prune):
raise NotImplementedError('{} get_mask is not implemented'.format(self.__class__.__name__)) raise NotImplementedError('{} get_mask is not implemented'.format(self.__class__.__name__))
def calc_mask(self, wrapper, **kwargs): def calc_mask(self, wrapper, wrapper_idx, **kwargs):
""" """
Calculate the mask of given layer. Calculate the mask of given layer.
Filters with the smallest importance criterion which is calculated from the activation are masked. Filters with the smallest importance criterion which is calculated from the activation are masked.
Parameters Parameters
---------- ----------
layer : LayerInfo wrapper : Module
the layer to instrument the compression operation the layer to instrument the compression operation
config : dict
layer's pruning config
Returns Returns
------- -------
dict dict
dictionary for storing masks dictionary for storing masks
""" """
weight = wrapper.module.weight.data weight = wrapper.module.weight.data
op_type = wrapper.type op_type = wrapper.type
config = wrapper.config config = wrapper.config
...@@ -100,21 +108,27 @@ class ActivationRankFilterPruner(Pruner): ...@@ -100,21 +108,27 @@ class ActivationRankFilterPruner(Pruner):
if wrapper.if_calculated: if wrapper.if_calculated:
return None return None
mask_weight = torch.ones(weight.size()).type_as(weight).detach() mask_weight = torch.ones(weight.size()).type_as(weight).detach()
if hasattr(wrapper.module, 'bias') and wrapper.module.bias is not None: if hasattr(wrapper.module, 'bias') and wrapper.module.bias is not None:
mask_bias = torch.ones(wrapper.module.bias.size()).type_as(wrapper.module.bias).detach() mask_bias = torch.ones(wrapper.module.bias.size()).type_as(wrapper.module.bias).detach()
else: else:
mask_bias = None mask_bias = None
mask = {'weight_mask': mask_weight, 'bias_mask': mask_bias} mask = {'weight_mask': mask_weight, 'bias_mask': mask_bias}
try: try:
filters = weight.size(0) filters = weight.size(0)
num_prune = int(filters * config.get('sparsity')) num_prune = int(filters * config.get('sparsity'))
if filters < 2 or num_prune < 1 or len(wrapper.collected_activation) < self.statistics_batch_num: acts = self.collected_activation[wrapper_idx]
if filters < 2 or num_prune < 1 or len(acts) < self.statistics_batch_num:
return mask return mask
mask = self.get_mask(mask, wrapper.collected_activation, num_prune) mask = self.get_mask(mask, acts, num_prune)
finally: finally:
if len(wrapper.collected_activation) == self.statistics_batch_num: if len(acts) >= self.statistics_batch_num:
wrapper.if_calculated = True wrapper.if_calculated = True
if self.hook_id in self._fwd_hook_handles:
self.remove_activation_collector(self.hook_id)
return mask return mask
...@@ -148,7 +162,7 @@ class ActivationAPoZRankFilterPruner(ActivationRankFilterPruner): ...@@ -148,7 +162,7 @@ class ActivationAPoZRankFilterPruner(ActivationRankFilterPruner):
def get_mask(self, base_mask, activations, num_prune): def get_mask(self, base_mask, activations, num_prune):
""" """
Calculate the mask of given layer. Calculate the mask of given layer.
Filters with the smallest APoZ(average percentage of zeros) of output activations are masked. Filters with the largest APoZ(average percentage of zeros) of output activations are masked.
Parameters Parameters
---------- ----------
......
...@@ -314,8 +314,8 @@ class Pruner(Compressor): ...@@ -314,8 +314,8 @@ class Pruner(Compressor):
return self.bound_model return self.bound_model
def update_mask(self): def update_mask(self):
for wrapper in self.get_modules_wrapper(): for wrapper_idx, wrapper in enumerate(self.get_modules_wrapper()):
masks = self.calc_mask(wrapper) masks = self.calc_mask(wrapper, wrapper_idx=wrapper_idx)
if masks is not None: if masks is not None:
for k in masks: for k in masks:
assert hasattr(wrapper, k), "there is no attribute '%s' in wrapper" % k assert hasattr(wrapper, k), "there is no attribute '%s' in wrapper" % k
......
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import logging
import torch
from .compressor import Pruner
__all__ = ['TaylorFOWeightFilterPruner']
logger = logging.getLogger('torch gradient rank filter pruners')
class GradientRankFilterPruner(Pruner):
"""
A structured pruning base class that prunes the filters with the smallest
importance criterion in convolution layers (using gradient values)
to achieve a preset level of network sparsity.
"""
def __init__(self, model, config_list, optimizer, statistics_batch_num=1):
"""
Parameters
----------
model : torch.nn.module
Model to be pruned
config_list : list
support key for each list item:
- sparsity: percentage of convolutional filters to be pruned.
optimizer: torch.optim.Optimizer
Optimizer used to train model
statistics_batch_num : int
Num of batches for calculating contribution
"""
super().__init__(model, config_list, optimizer)
self.set_wrappers_attribute("if_calculated", False)
self.set_wrappers_attribute("contribution", None)
self.statistics_batch_num = statistics_batch_num
self.iterations = 0
self.old_step = self.optimizer.step
self.patch_optimizer(self.calc_contributions)
def calc_contributions(self):
raise NotImplementedError('{} calc_contributions is not implemented'.format(self.__class__.__name__))
def get_mask(self, base_mask, contribution, num_prune):
raise NotImplementedError('{} get_mask is not implemented'.format(self.__class__.__name__))
def calc_mask(self, wrapper, **kwargs):
"""
Calculate the mask of given layer.
Filters with the smallest importance criterion which is calculated from the activation are masked.
Parameters
----------
wrapper : Module
the layer to instrument the compression operation
Returns
-------
dict
dictionary for storing masks
"""
weight = wrapper.module.weight.data
op_type = wrapper.type
config = wrapper.config
assert 0 <= config.get('sparsity') < 1, "sparsity must in the range [0, 1)"
assert op_type in config.get('op_types')
if wrapper.if_calculated:
return None
mask_weight = torch.ones(weight.size()).type_as(weight).detach()
if hasattr(wrapper.module, 'bias') and wrapper.module.bias is not None:
mask_bias = torch.ones(wrapper.module.bias.size()).type_as(wrapper.module.bias).detach()
else:
mask_bias = None
mask = {'weight_mask': mask_weight, 'bias_mask': mask_bias}
try:
filters = weight.size(0)
num_prune = int(filters * config.get('sparsity'))
if filters < 2 or num_prune < 1 or self.iterations < self.statistics_batch_num:
return mask
mask = self.get_mask(mask, wrapper.contribution, num_prune)
finally:
if self.iterations >= self.statistics_batch_num:
wrapper.if_calculated = True
return mask
class TaylorFOWeightFilterPruner(GradientRankFilterPruner):
"""
A structured pruning algorithm that prunes the filters with the smallest
importance approximations based on the first order taylor expansion on the weight.
Molchanov, Pavlo and Mallya, Arun and Tyree, Stephen and Frosio, Iuri and Kautz, Jan,
"Importance Estimation for Neural Network Pruning", CVPR 2019.
http://jankautz.com/publications/Importance4NNPruning_CVPR19.pdf
"""
def __init__(self, model, config_list, optimizer, statistics_batch_num=1):
"""
Parameters
----------
model : torch.nn.module
Model to be pruned
config_list : list
support key for each list item:
- sparsity: percentage of convolutional filters to be pruned.
optimizer: torch.optim.Optimizer
Optimizer used to train model
statistics_batch_num : int
Num of batches for activation statistics
"""
super().__init__(model, config_list, optimizer, statistics_batch_num)
def get_mask(self, base_mask, contribution, num_prune):
"""
Calculate the mask of given layer.
Filters with the smallest importance approximations are masked.
Parameters
----------
base_mask : dict
The basic mask with the same shape of weight, all item in the basic mask is 1.
contribution : torch.Tensor
Layer's importance approximations
num_prune : int
Num of filters to prune
Returns
-------
dict
dictionary for storing masks
"""
prune_indices = torch.argsort(contribution)[:num_prune]
for idx in prune_indices:
base_mask['weight_mask'][idx] = 0.
if base_mask['bias_mask'] is not None:
base_mask['bias_mask'][idx] = 0.
return base_mask
def calc_contributions(self):
"""
Calculate the estimated importance of filters as a sum of individual contribution
based on the first order taylor expansion.
"""
if self.iterations >= self.statistics_batch_num:
return
for wrapper in self.get_modules_wrapper():
filters = wrapper.module.weight.size(0)
contribution = (wrapper.module.weight*wrapper.module.weight.grad).data.pow(2).view(filters, -1).sum(dim=1)
if wrapper.contribution is None:
wrapper.contribution = contribution
else:
wrapper.contribution += contribution
self.iterations += 1
...@@ -60,10 +60,8 @@ class WeightRankFilterPruner(Pruner): ...@@ -60,10 +60,8 @@ class WeightRankFilterPruner(Pruner):
Filters with the smallest importance criterion of the kernel weights are masked. Filters with the smallest importance criterion of the kernel weights are masked.
Parameters Parameters
---------- ----------
layer : LayerInfo wrapper : Module
the layer to instrument the compression operation the module to instrument the compression operation
config : dict
layer's pruning config
Returns Returns
------- -------
dict dict
......
...@@ -228,6 +228,52 @@ class CompressorTestCase(TestCase): ...@@ -228,6 +228,52 @@ class CompressorTestCase(TestCase):
assert all(mask1['bias_mask'].numpy() == np.array([0., 0., 0., 1., 1.])) assert all(mask1['bias_mask'].numpy() == np.array([0., 0., 0., 1., 1.]))
assert all(mask2['bias_mask'].numpy() == np.array([0., 0., 0., 1., 1.])) assert all(mask2['bias_mask'].numpy() == np.array([0., 0., 0., 1., 1.]))
def test_torch_taylorFOweight_pruner(self):
"""
Filters with the minimum importance approxiamtion based on the first order
taylor expansion on the weights (w*grad)**2 are pruned in this paper:
Importance Estimation for Neural Network Pruning,
http://jankautz.com/publications/Importance4NNPruning_CVPR19.pdf
So if sparsity of conv1 is 0.2, the expected masks should mask out filter 0, this can be verified through:
`all(torch.sum(mask1['weight_mask'], (1, 2, 3)).numpy() == np.array([0., 25., 25., 25., 25.]))`
If sparsity of conv2 is 0.6, the expected masks should mask out filter 4,5,6,7,8,9 this can be verified through:
`all(torch.sum(mask2['weight_mask'], (1, 2, 3)).numpy() == np.array([125., 125., 125., 125., 0., 0., 0., 0., 0., 0., ]))`
"""
w1 = np.array([np.zeros((1, 5, 5)), np.ones((1, 5, 5)), np.ones((1, 5, 5)) * 2,
np.ones((1, 5, 5)) * 3, np.ones((1, 5, 5)) * 4])
w2 = np.array([[[[i + 1] * 5] * 5] * 5 for i in range(10)[::-1]])
grad1 = np.array([np.ones((1, 5, 5)) * -1, np.ones((1, 5, 5)) * 1, np.ones((1, 5, 5)) * -1,
np.ones((1, 5, 5)) * 1, np.ones((1, 5, 5)) * -1])
grad2 = np.array([[[[(-1)**i] * 5] * 5] * 5 for i in range(10)])
config_list = [{'sparsity': 0.2, 'op_types': ['Conv2d'], 'op_names': ['conv1']},
{'sparsity': 0.6, 'op_types': ['Conv2d'], 'op_names': ['conv2']}]
model = TorchModel()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
pruner = torch_compressor.TaylorFOWeightFilterPruner(model, config_list, optimizer, statistics_batch_num=1)
x = torch.rand((1, 1, 28, 28), requires_grad=True)
model.conv1.module.weight.data = torch.tensor(w1).float()
model.conv2.module.weight.data = torch.tensor(w2).float()
y = model(x)
y.backward(torch.ones_like(y))
model.conv1.module.weight.grad.data = torch.tensor(grad1).float()
model.conv2.module.weight.grad.data = torch.tensor(grad2).float()
optimizer.step()
mask1 = pruner.calc_mask(model.conv1)
mask2 = pruner.calc_mask(model.conv2)
assert all(torch.sum(mask1['weight_mask'], (1, 2, 3)).numpy() == np.array([0., 25., 25., 25., 25.]))
assert all(torch.sum(mask2['weight_mask'], (1, 2, 3)).numpy() == np.array([125., 125., 125., 125., 0., 0., 0., 0., 0., 0., ]))
def test_torch_QAT_quantizer(self): def test_torch_QAT_quantizer(self):
model = TorchModel() model = TorchModel()
config_list = [{ config_list = [{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment