Unverified Commit e483aa01 authored by lin bin's avatar lin bin Committed by GitHub
Browse files

[Model Compression] Add bank pruning for level pruner (#4481)

parent b8b7ed0e
......@@ -36,6 +36,7 @@ from .tools import (
from .tools import (
SparsityAllocator,
NormalSparsityAllocator,
BankSparsityAllocator,
GlobalSparsityAllocator,
Conv2dDependencyAwareAllocator
)
......@@ -137,9 +138,55 @@ class LevelPruner(BasicPruner):
- op_names : Operation names to be pruned.
- op_partial_names: Operation partial names to be pruned, will be autocompleted by NNI.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
mode : str
'normal' or 'balance'.
If setting 'normal' mode, target tensor will be pruned in the way of finegrained pruning.
If setting 'balance' mode, a specal sparse pattern will chosen by pruner. Take linear
operation an example, weight tensor will be split into sub block whose shape is aligned to
balance_gran. Then finegrained pruning will be applied internal of sub block. This sparsity
pattern have more chance to achieve better trade-off between model performance and hardware
acceleration. Please refer to releated paper for further information 'Balanced Sparsity for
Efficient DNN Inference on GPU'(https://arxiv.org/pdf/1811.00206.pdf).
balance_gran : list
Balance_gran is for special sparse pattern balanced sparsity, Default value is None which means pruning
without awaring balance, namely normal finegrained pruning.
If passing list of int, LevelPruner will prune the model in the granularity of multi-dimension block.
Attention that the length of balance_gran should be smaller than tensor dimension.
For instance, in Linear operation, length of balance_gran should be equal or smaller than two since
dimension of pruning weight is two. If setting balbance_gran = [5, 5], sparsity = 0.6, pruner will
divide pruning parameters into multiple block with tile size (5,5) and each bank has 5 * 5 values
and 10 values would be kept after pruning. Finegrained pruning is applied in the granularity of block
so that each block will kept same number of non-zero values after pruning. Such pruning method "balance"
the non-zero value in tensor which create chance for better hardware acceleration.
Note: If length of given balance_gran smaller than length of pruning tensor shape, it will be made up
in right align(such as example 1).
example 1:
operation: Linear
pruning tensor: weight
pruning tensor shape: [32, 32]
sparsity: 50%
balance_gran: [4]
pruning result: Weight tensor whose shape is [32, 32] will be split into 256 [1, 4] sub blocks.
Each sub block will be pruned 2 values.
example 2:
operation: Linear
pruning tensor: weight
pruning tensor shape: [64, 64]
sparsity: 25%
balance_gran: [32, 32]
pruning result: Weight tensor whose shape is [64, 64] will be split into 4 [32, 32] sub blocks.
Each sub block will be pruned 256 values.
"""
def __init__(self, model: Module, config_list: List[Dict]):
def __init__(self, model: Module, config_list: List[Dict], mode: str = "normal", balance_gran: Optional[List] = None):
self.mode = mode
self.balance_gran = balance_gran
super().__init__(model, config_list)
def _validate_config_before_canonical(self, model: Module, config_list: List[Dict]):
......@@ -155,8 +202,13 @@ class LevelPruner(BasicPruner):
if self.metrics_calculator is None:
self.metrics_calculator = NormMetricsCalculator()
if self.sparsity_allocator is None:
self.sparsity_allocator = NormalSparsityAllocator(self)
if self.mode == "normal":
self.sparsity_allocator = NormalSparsityAllocator(self)
elif self.mode == "balance":
assert self.balance_gran is not None, 'balance_gran should be passed as param in balance mode'
self.sparsity_allocator = BankSparsityAllocator(self, self.balance_gran)
else:
raise NotImplementedError('Only support mode `normal` and `balance`')
class NormPruner(BasicPruner):
"""
......
......@@ -20,6 +20,7 @@ from .metrics_calculator import (
)
from .sparsity_allocator import (
NormalSparsityAllocator,
BankSparsityAllocator,
GlobalSparsityAllocator,
Conv2dDependencyAwareAllocator
)
......
......@@ -2,6 +2,7 @@
# Licensed under the MIT license.
import math
import itertools
from typing import Any, Dict, List, Tuple, Union
import numpy as np
......@@ -40,6 +41,60 @@ class NormalSparsityAllocator(SparsityAllocator):
masks[name]['weight'] *= wrapper.weight_mask
return masks
class BankSparsityAllocator(SparsityAllocator):
"""
In bank pruner, all values in weight are divided into different sub blocks each shape
aligned with balance_gran. Each sub block has the same sparsity which equal to the overall sparsity.
This allocator pruned the weight in the granularity of block.
"""
def __init__(self, pruner: Pruner, balance_gran: list):
super().__init__(pruner)
self.balance_gran = balance_gran
for gran in self.balance_gran:
assert isinstance(gran, int) and gran > 0, 'All values in list balance_gran \
should be type int and bigger than zero'
def generate_sparsity(self, metrics: Dict[str, Tensor]) -> Dict[str, Dict[str, Tensor]]:
masks = {}
for name, wrapper in self.pruner.get_modules_wrapper().items():
sparsity_rate = wrapper.config['total_sparsity']
assert name in metrics, 'Metric of {} is not calculated.'.format(name)
# We assume the metric value are all positive right now.
metric = metrics[name]
if self.continuous_mask:
metric *= self._compress_mask(wrapper.weight_mask)
n_dim = len(metric.shape)
assert n_dim >= len(self.balance_gran), 'Dimension of balance_gran should be smaller than metric'
# make up for balance_gran
balance_gran = [1] * (n_dim - len(self.balance_gran)) + self.balance_gran
for i, j in zip(metric.shape, balance_gran):
assert i % j == 0, 'Length of {} weight is not \
aligned with balance granularity'.format(name)
mask = torch.zeros(metric.shape).type_as(metric)
loop_iters = [range(int(i / j)) for i, j in zip(metric.shape, balance_gran)]
for iter_params in itertools.product(*loop_iters):
index_str_list = [f"{iter_param * gran}:{(iter_param+1) * gran}"\
for iter_param, gran in zip(iter_params, balance_gran)]
index_str = ",".join(index_str_list)
sub_metric_str = "metric[{}]".format(index_str)
sub_mask_str = "mask[{}] = mask_bank".format(index_str)
metric_bank = eval(sub_metric_str)
prune_num = int(sparsity_rate * metric_bank.numel())
if prune_num == 0:
threshold = metric_bank.min() -1
else:
threshold = torch.topk(metric_bank.reshape(-1), prune_num, largest=False)[0].max()
# mask_bank will be used in exec(sub_mask_str)
mask_bank = torch.gt(metric_bank, threshold).type_as(metric_bank)
exec(sub_mask_str)
masks[name] = self._expand_mask(name, mask)
if self.continuous_mask:
masks[name]['weight'] *= wrapper.weight_mask
return masks
class GlobalSparsityAllocator(SparsityAllocator):
"""
......
......@@ -72,6 +72,16 @@ class PrunerTestCase(unittest.TestCase):
sparsity_list = compute_sparsity_mask2compact(pruned_model, masks, config_list)
assert 0.78 < sparsity_list[0]['total_sparsity'] < 0.82
def test_level_pruner_bank(self):
model = TorchModel()
config_list = [{'op_types': ['Conv2d'], 'sparsity': 0.7}]
pruner = LevelPruner(model=model, config_list=config_list, mode='balance', balance_gran=[5])
pruned_model, masks = pruner.compress()
pruner._unwrap_model()
sparsity_list = compute_sparsity_mask2compact(pruned_model, masks, config_list)
# round down cause to lower sparsity
assert sparsity_list[0]['total_sparsity'] == 0.6
def test_l1_norm_pruner(self):
model = TorchModel()
config_list = [{'op_types': ['Conv2d'], 'sparsity': 0.8}]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment