[Model Compression] Add bank pruning for level pruner (#4481)

e483aa01 · lin bin · GitHub · b8b7ed0e · e483aa01 · e483aa01
Unverified Commit e483aa01 authored Feb 11, 2022 by lin bin Committed by GitHub Feb 11, 2022
4 changed files
--- a/nni/algorithms/compression/v2/pytorch/pruning/basic_pruner.py
+++ b/nni/algorithms/compression/v2/pytorch/pruning/basic_pruner.py
@@ -36,6 +36,7 @@ from .tools import (
 from .tools import (
    SparsityAllocator,
    NormalSparsityAllocator,
+    BankSparsityAllocator,
    GlobalSparsityAllocator,
    Conv2dDependencyAwareAllocator
 )
@@ -137,9 +138,55 @@ class LevelPruner(BasicPruner):
            - op_names : Operation names to be pruned.
            - op_partial_names: Operation partial names to be pruned, will be autocompleted by NNI.
            - exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
+    mode : str
+        'normal' or 'balance'.
+        If setting 'normal' mode, target tensor will be pruned in the way of finegrained pruning.
+        If setting 'balance' mode, a specal sparse pattern will chosen by pruner. Take linear
+        operation an example, weight tensor will be split into sub block whose shape is aligned to
+        balance_gran. Then finegrained pruning will be applied internal of sub block. This sparsity
+        pattern have more chance to achieve better trade-off between model performance and hardware
+        acceleration. Please refer to releated paper for further information 'Balanced Sparsity for 
+        Efficient DNN Inference on GPU'(https://arxiv.org/pdf/1811.00206.pdf).
+    balance_gran : list
+        Balance_gran is for special sparse pattern balanced sparsity, Default value is None which means pruning 
+        without awaring balance, namely normal finegrained pruning.
+        If passing list of int, LevelPruner will prune the model in the granularity of multi-dimension block.
+        Attention that the length of balance_gran should be smaller than tensor dimension.
+        For instance, in Linear operation, length of balance_gran should be equal or smaller than two since
+        dimension of pruning weight is two. If setting balbance_gran = [5, 5], sparsity = 0.6, pruner will 
+        divide pruning parameters into multiple block with tile size (5,5) and each bank has 5 * 5 values 
+        and 10 values would be kept after pruning. Finegrained pruning is applied in the granularity of block 
+        so that each block will kept same number of non-zero values after pruning. Such pruning method "balance" 
+        the non-zero value in tensor which create chance for better hardware acceleration.
+
+        Note: If length of given balance_gran smaller than length of pruning tensor shape, it will be made up
+              in right align(such as example 1).
+
+            example 1:
+                operation: Linear
+                pruning tensor: weight
+                pruning tensor shape: [32, 32]
+                sparsity: 50%
+                balance_gran: [4]
+
+                pruning result: Weight tensor whose shape is [32, 32] will be split into 256 [1, 4] sub blocks.
+                                Each sub block will be pruned 2 values.
+
+            example 2:
+                operation: Linear
+                pruning tensor: weight
+                pruning tensor shape: [64, 64]
+                sparsity: 25%
+                balance_gran: [32, 32]
+
+                pruning result: Weight tensor whose shape is [64, 64] will be split into 4 [32, 32] sub blocks.
+                                Each sub block will be pruned 256 values.
+                
    """

-    def __init__(self, model: Module, config_list: List[Dict]):
+    def __init__(self, model: Module, config_list: List[Dict], mode: str = "normal", balance_gran: Optional[List] = None):
+        self.mode = mode
+        self.balance_gran = balance_gran
        super().__init__(model, config_list)

    def _validate_config_before_canonical(self, model: Module, config_list: List[Dict]):
@@ -155,8 +202,13 @@ class LevelPruner(BasicPruner):
        if self.metrics_calculator is None:
            self.metrics_calculator = NormMetricsCalculator()
        if self.sparsity_allocator is None:
-            self.sparsity_allocator = NormalSparsityAllocator(self)
-
+            if self.mode == "normal":
+                self.sparsity_allocator = NormalSparsityAllocator(self)
+            elif self.mode == "balance":
+                assert self.balance_gran is not None, 'balance_gran should be passed as param in balance mode'
+                self.sparsity_allocator = BankSparsityAllocator(self, self.balance_gran)
+            else:
+                raise NotImplementedError('Only support mode `normal` and `balance`')

 class NormPruner(BasicPruner):
    """

--- a/nni/algorithms/compression/v2/pytorch/pruning/tools/__init__.py
+++ b/nni/algorithms/compression/v2/pytorch/pruning/tools/__init__.py
@@ -20,6 +20,7 @@ from .metrics_calculator import (
 )
 from .sparsity_allocator import (
    NormalSparsityAllocator,
+    BankSparsityAllocator,
    GlobalSparsityAllocator,
    Conv2dDependencyAwareAllocator
 )

--- a/nni/algorithms/compression/v2/pytorch/pruning/tools/sparsity_allocator.py
+++ b/nni/algorithms/compression/v2/pytorch/pruning/tools/sparsity_allocator.py
@@ -2,6 +2,7 @@
 # Licensed under the MIT license.

 import math
+import itertools
 from typing import Any, Dict, List, Tuple, Union

 import numpy as np
@@ -40,6 +41,60 @@ class NormalSparsityAllocator(SparsityAllocator):
                masks[name]['weight'] *= wrapper.weight_mask
        return masks

+class BankSparsityAllocator(SparsityAllocator):
+    """
+    In bank pruner, all values in weight are divided into different sub blocks each shape
+    aligned with balance_gran. Each sub block has the same sparsity which equal to the overall sparsity.
+    This allocator pruned the weight in the granularity of block. 
+    """
+    def __init__(self, pruner: Pruner, balance_gran: list):
+        super().__init__(pruner)
+        self.balance_gran = balance_gran
+        for gran in self.balance_gran:
+            assert isinstance(gran, int) and gran > 0, 'All values in list balance_gran \
+                should be type int and bigger than zero'
+
+    def generate_sparsity(self, metrics: Dict[str, Tensor]) -> Dict[str, Dict[str, Tensor]]:
+        masks = {}
+        for name, wrapper in self.pruner.get_modules_wrapper().items():
+            sparsity_rate = wrapper.config['total_sparsity']
+
+            assert name in metrics, 'Metric of {} is not calculated.'.format(name)
+
+            # We assume the metric value are all positive right now.
+            metric = metrics[name]
+            if self.continuous_mask:
+                metric *= self._compress_mask(wrapper.weight_mask)
+            n_dim = len(metric.shape)
+            assert n_dim >= len(self.balance_gran), 'Dimension of balance_gran should be smaller than metric'
+            # make up for balance_gran
+            balance_gran = [1] * (n_dim - len(self.balance_gran)) + self.balance_gran
+            for i, j in zip(metric.shape, balance_gran):
+                assert i % j == 0, 'Length of {} weight is not \
+                    aligned with balance granularity'.format(name)
+
+            mask = torch.zeros(metric.shape).type_as(metric)
+            loop_iters = [range(int(i / j)) for i, j in zip(metric.shape, balance_gran)]
+            for iter_params in itertools.product(*loop_iters):
+                index_str_list = [f"{iter_param * gran}:{(iter_param+1) * gran}"\
+                     for iter_param, gran in zip(iter_params, balance_gran)]
+                index_str = ",".join(index_str_list)
+                sub_metric_str = "metric[{}]".format(index_str)
+                sub_mask_str =  "mask[{}] = mask_bank".format(index_str)
+                metric_bank = eval(sub_metric_str)
+                prune_num = int(sparsity_rate * metric_bank.numel())
+                if prune_num == 0:
+                    threshold = metric_bank.min() -1
+                else:
+                    threshold = torch.topk(metric_bank.reshape(-1), prune_num, largest=False)[0].max()
+                # mask_bank will be used in exec(sub_mask_str)
+                mask_bank = torch.gt(metric_bank, threshold).type_as(metric_bank)
+                exec(sub_mask_str)
+
+            masks[name] = self._expand_mask(name, mask)
+            if self.continuous_mask:
+                masks[name]['weight'] *= wrapper.weight_mask
+        return masks

 class GlobalSparsityAllocator(SparsityAllocator):
    """

--- a/test/ut/compression/v2/test_pruner_torch.py
+++ b/test/ut/compression/v2/test_pruner_torch.py
@@ -72,6 +72,16 @@ class PrunerTestCase(unittest.TestCase):
        sparsity_list = compute_sparsity_mask2compact(pruned_model, masks, config_list)
        assert 0.78 < sparsity_list[0]['total_sparsity'] < 0.82

+    def test_level_pruner_bank(self):
+        model = TorchModel()
+        config_list = [{'op_types': ['Conv2d'], 'sparsity': 0.7}]
+        pruner = LevelPruner(model=model, config_list=config_list, mode='balance', balance_gran=[5])
+        pruned_model, masks = pruner.compress()
+        pruner._unwrap_model()
+        sparsity_list = compute_sparsity_mask2compact(pruned_model, masks, config_list)
+        # round down cause to lower sparsity
+        assert sparsity_list[0]['total_sparsity'] == 0.6
+
    def test_l1_norm_pruner(self):
        model = TorchModel()
        config_list = [{'op_types': ['Conv2d'], 'sparsity': 0.8}]