[Compression] block sparse refactor (#4932)

f24dc27b · J-shang · GitHub · 00e4debb · f24dc27b · f24dc27b
Unverified Commit f24dc27b authored Jun 30, 2022 by J-shang Committed by GitHub Jun 30, 2022
10 changed files
--- a/nni/algorithms/compression/v2/pytorch/pruning/basic_pruner.py
+++ b/nni/algorithms/compression/v2/pytorch/pruning/basic_pruner.py
@@ -13,8 +13,7 @@ from torch.nn import Module
 from torch.optim import Optimizer
 from nni.common.serializer import Traceable
-from nni.algorithms.compression.v2.pytorch.base.pruner import Pruner
+from ..base import Pruner
-from nni.algorithms.compression.v2.pytorch.utils import CompressorSchema, config_list_canonical, OptimizerConstructHelper
 from .tools import (
    DataCollector,
@@ -38,9 +37,11 @@ from .tools import (
    NormalSparsityAllocator,
    BankSparsityAllocator,
    GlobalSparsityAllocator,
-    Conv2dDependencyAwareAllocator
+    DependencyAwareAllocator
 )
+from ..utils import CompressorSchema, config_list_canonical, OptimizerConstructHelper, Scaling
 _logger = logging.getLogger(__name__)
 __all__ = ['LevelPruner', 'L1NormPruner', 'L2NormPruner', 'FPGMPruner', 'SlimPruner', 'ActivationPruner',
@@ -275,12 +276,12 @@ class NormPruner(BasicPruner):
        else:
            self.data_collector.reset()
        if self.metrics_calculator is None:
-            self.metrics_calculator = NormMetricsCalculator(p=self.p, dim=0)
+            self.metrics_calculator = NormMetricsCalculator(p=self.p, scalers=Scaling(kernel_size=[1], kernel_padding_mode='back'))
        if self.sparsity_allocator is None:
            if self.mode == 'normal':
-                self.sparsity_allocator = NormalSparsityAllocator(self, dim=0)
+                self.sparsity_allocator = NormalSparsityAllocator(self, Scaling(kernel_size=[1], kernel_padding_mode='back'))
            elif self.mode == 'dependency_aware':
-                self.sparsity_allocator = Conv2dDependencyAwareAllocator(self, 0, self.dummy_input)
+                self.sparsity_allocator = DependencyAwareAllocator(self, self.dummy_input, Scaling(kernel_size=[1], kernel_padding_mode='back'))
            else:
                raise NotImplementedError('Only support mode `normal` and `dependency_aware`')
@@ -440,12 +441,12 @@ class FPGMPruner(BasicPruner):
        else:
            self.data_collector.reset()
        if self.metrics_calculator is None:
-            self.metrics_calculator = DistMetricsCalculator(p=2, dim=0)
+            self.metrics_calculator = DistMetricsCalculator(p=2, scalers=Scaling(kernel_size=[1], kernel_padding_mode='back'))
        if self.sparsity_allocator is None:
            if self.mode == 'normal':
-                self.sparsity_allocator = NormalSparsityAllocator(self, dim=0)
+                self.sparsity_allocator = NormalSparsityAllocator(self, Scaling(kernel_size=[1], kernel_padding_mode='back'))
            elif self.mode == 'dependency_aware':
-                self.sparsity_allocator = Conv2dDependencyAwareAllocator(self, 0, self.dummy_input)
+                self.sparsity_allocator = DependencyAwareAllocator(self, self.dummy_input, Scaling(kernel_size=[1], kernel_padding_mode='back'))
            else:
                raise NotImplementedError('Only support mode `normal` and `dependency_aware`')
@@ -688,16 +689,16 @@ class ActivationPruner(BasicPruner):
        else:
            self.data_collector.reset(collector_infos=[collector_info])  # type: ignore
        if self.metrics_calculator is None:
-            self.metrics_calculator = self._get_metrics_calculator()
+            self.metrics_calculator = self._create_metrics_calculator()
        if self.sparsity_allocator is None:
            if self.mode == 'normal':
-                self.sparsity_allocator = NormalSparsityAllocator(self, dim=0)
+                self.sparsity_allocator = NormalSparsityAllocator(self, Scaling(kernel_size=[1], kernel_padding_mode='back'))
            elif self.mode == 'dependency_aware':
-                self.sparsity_allocator = Conv2dDependencyAwareAllocator(self, 0, self.dummy_input)
+                self.sparsity_allocator = DependencyAwareAllocator(self, self.dummy_input, Scaling(kernel_size=[1], kernel_padding_mode='back'))
            else:
                raise NotImplementedError('Only support mode `normal` and `dependency_aware`')
-    def _get_metrics_calculator(self) -> MetricsCalculator:
+    def _create_metrics_calculator(self) -> MetricsCalculator:
        raise NotImplementedError()
@@ -782,8 +783,8 @@ class ActivationAPoZRankPruner(ActivationPruner):
        # return a matrix that the position of zero in `output` is one, others is zero.
        return torch.eq(self._activation(output.detach()), torch.zeros_like(output)).type_as(output)
-    def _get_metrics_calculator(self) -> MetricsCalculator:
+    def _create_metrics_calculator(self) -> MetricsCalculator:
-        return APoZRankMetricsCalculator(dim=1)
+        return APoZRankMetricsCalculator(Scaling(kernel_size=[-1, 1], kernel_padding_mode='back'))
 class ActivationMeanRankPruner(ActivationPruner):
@@ -865,8 +866,8 @@ class ActivationMeanRankPruner(ActivationPruner):
        # return the activation of `output` directly.
        return self._activation(output.detach())
-    def _get_metrics_calculator(self) -> MetricsCalculator:
+    def _create_metrics_calculator(self) -> MetricsCalculator:
-        return MeanRankMetricsCalculator(dim=1)
+        return MeanRankMetricsCalculator(Scaling(kernel_size=[-1, 1], kernel_padding_mode='back'))
 class TaylorFOWeightPruner(BasicPruner):
@@ -1009,14 +1010,14 @@ class TaylorFOWeightPruner(BasicPruner):
        else:
            self.data_collector.reset(collector_infos=[collector_info])  # type: ignore
        if self.metrics_calculator is None:
-            self.metrics_calculator = MultiDataNormMetricsCalculator(p=1, dim=0)
+            self.metrics_calculator = MultiDataNormMetricsCalculator(p=1, scalers=Scaling(kernel_size=[1], kernel_padding_mode='back'))
        if self.sparsity_allocator is None:
            if self.mode == 'normal':
-                self.sparsity_allocator = NormalSparsityAllocator(self, dim=0)
+                self.sparsity_allocator = NormalSparsityAllocator(self, Scaling(kernel_size=[1], kernel_padding_mode='back'))
            elif self.mode == 'global':
-                self.sparsity_allocator = GlobalSparsityAllocator(self, dim=0)
+                self.sparsity_allocator = GlobalSparsityAllocator(self, Scaling(kernel_size=[1], kernel_padding_mode='back'))
            elif self.mode == 'dependency_aware':
-                self.sparsity_allocator = Conv2dDependencyAwareAllocator(self, 0, self.dummy_input)
+                self.sparsity_allocator = DependencyAwareAllocator(self, self.dummy_input, Scaling(kernel_size=[1], kernel_padding_mode='back'))
            else:
                raise NotImplementedError('Only support mode `normal`, `global` and `dependency_aware`')
@@ -1146,12 +1147,12 @@ class ADMMPruner(BasicPruner):
            if self.granularity == 'fine-grained':
                self.metrics_calculator = NormMetricsCalculator(p=1)
            elif self.granularity == 'coarse-grained':
-                self.metrics_calculator = NormMetricsCalculator(dim=0, p=1)
+                self.metrics_calculator = NormMetricsCalculator(p=1, scalers=Scaling(kernel_size=[1], kernel_padding_mode='back'))
        if self.sparsity_allocator is None:
            if self.granularity == 'fine-grained':
                self.sparsity_allocator = NormalSparsityAllocator(self)
            elif self.granularity == 'coarse-grained':
-                self.sparsity_allocator = NormalSparsityAllocator(self, dim=0)
+                self.sparsity_allocator = NormalSparsityAllocator(self, Scaling(kernel_size=[1], kernel_padding_mode='back'))
    def compress(self) -> Tuple[Module, Dict]:
        """

--- a/nni/algorithms/compression/v2/pytorch/pruning/tools/__init__.py
+++ b/nni/algorithms/compression/v2/pytorch/pruning/tools/__init__.py
@@ -25,7 +25,7 @@ from .sparsity_allocator import (
    NormalSparsityAllocator,
    BankSparsityAllocator,
    GlobalSparsityAllocator,
-    Conv2dDependencyAwareAllocator
+    DependencyAwareAllocator
 )
 from .task_generator import (
    AGPTaskGenerator,

--- a/nni/algorithms/compression/v2/pytorch/pruning/tools/base.py
+++ b/nni/algorithms/compression/v2/pytorch/pruning/tools/base.py
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
+from __future__ import annotations
 from datetime import datetime
 import logging
 from pathlib import Path
@@ -13,12 +14,24 @@ from torch import Tensor
 from torch.nn import Module
 from torch.optim import Optimizer
-from nni.algorithms.compression.v2.pytorch.base import Pruner, LayerInfo, Task, TaskResult
+from ...base import Pruner, LayerInfo, Task, TaskResult
-from nni.algorithms.compression.v2.pytorch.utils import OptimizerConstructHelper
+from ...utils import OptimizerConstructHelper, Scaling
 _logger = logging.getLogger(__name__)
+def _get_scaler(scalers: Dict[str, Dict[str, Scaling]] | None, module_name: str, target_name: str) -> Scaling | None:
+    # Get scaler for the specific target in the specific module. Return None if don't find it.
+    # `module_name` is not used in current nni version, will support different modules using different scalers in the future.
+    if scalers:
+        default_module_scalers = scalers.get('_default', {})
+        default_target_scaler = default_module_scalers.get(target_name, default_module_scalers.get('_default', None))
+        module_scalers = scalers.get(module_name, {})
+        return module_scalers.get(target_name, module_scalers.get('_default', default_target_scaler))
+    else:
+        return None
 class DataCollector:
    """
    An abstract class for collect the data needed by the compressor.
@@ -245,49 +258,21 @@ class MetricsCalculator:
    Parameters
    ----------
-    dim
+    scalers
-        The dimensions that corresponding to the under pruning weight dimensions in collected data.
+        Scaler is used to scale the metrics' size. It scaling metric to the same size as the shrinked mask in the sparsity allocator.
-        None means one-to-one correspondence between pruned dimensions and data, which equal to set `dim` as all data dimensions.
+        If you want to use different scalers for different pruning targets in different modules, please use a dict `{module_name: {target_name: scaler}}`.
-        Only these `dim` will be kept and other dimensions of the data will be reduced.
+        If allocator meets an unspecified module name, it will try to use `scalers['_default'][target_name]` to scale its mask.
+        If allocator meets an unspecified target name, it will try to use `scalers[module_name]['_default']` to scale its mask.
-        Example:
+        Passing in a scaler instead of a `dict` of scalers will be treated as passed in `{'_default': {'_default': scalers}}`.
+        Passing in `None` means no need to scale.
-        If you want to prune the Conv2d weight in filter level, and the weight size is (32, 16, 3, 3) [out-channel, in-channel, kernal-size-1, kernal-size-2].
-        Then the under pruning dimensions is [0], which means you want to prune the filter or out-channel.
-            Case 1: Directly collect the conv module weight as data to calculate the metric.
-            Then the data has size (32, 16, 3, 3).
-            Mention that the dimension 0 of the data is corresponding to the under pruning weight dimension 0.
-            So in this case, `dim=0` will set in `__init__`.
-            Case 2: Use the output of the conv module as data to calculate the metric.
-            Then the data has size (batch_num, 32, feature_map_size_1, feature_map_size_2).
-            Mention that the dimension 1 of the data is corresponding to the under pruning weight dimension 0.
-            So in this case, `dim=1` will set in `__init__`.
-        In both of these two case, the metric of this module has size (32,).
-    block_sparse_size
-        This used to describe the block size a metric value represented. By default, None means the block size is ones(len(dim)).
-        Make sure len(dim) == len(block_sparse_size), and the block_sparse_size dimension position is corresponding to dim.
-        Example:
-        The under pruning weight size is (768, 768), and you want to apply a block sparse on dim=[0] with block size [64, 768],
-        then you can set block_sparse_size=[64]. The final metric size is (12,).
    """
-    def __init__(self, dim: Optional[Union[int, List[int]]] = None,
+    def __init__(self, scalers: Dict[str, Dict[str, Scaling]] | Scaling | None = None):
-                 block_sparse_size: Optional[Union[int, List[int]]] = None):
+        self.scalers: Dict[str, Dict[str, Scaling]] | None = scalers if isinstance(scalers, (dict, type(None))) else {'_default': {'_default': scalers}}  # type: ignore
-        self.dim = dim if not isinstance(dim, int) else [dim]
-        self.block_sparse_size = block_sparse_size if not isinstance(block_sparse_size, int) else [block_sparse_size]
+    def _get_scaler(self, module_name: str, target_name: str) -> Scaling:
-        if self.block_sparse_size is not None:
+        scaler = _get_scaler(self.scalers, module_name, target_name)
-            assert all(i >= 1 for i in self.block_sparse_size)
+        return scaler if scaler else Scaling([1])
-        elif self.dim is not None:
-            self.block_sparse_size = [1] * len(self.dim)
-        if self.dim is not None:
-            assert all(i >= 0 for i in self.dim)
-            self.dim, self.block_sparse_size = (list(t) for t in zip(*sorted(zip(self.dim, self.block_sparse_size))))  # type: ignore
    def calculate_metrics(self, data: Dict) -> Dict[str, Tensor]:
        """
@@ -307,142 +292,120 @@ class MetricsCalculator:
 class SparsityAllocator:
    """
-    An abstract class for allocate mask based on metrics.
+    A base class for allocating mask based on metrics.
    Parameters
    ----------
    pruner
        The pruner that binded with this `SparsityAllocator`.
-    dim
+    scalers
-        The under pruning weight dimensions, which metric size should equal to the under pruning weight size on these dimensions.
+        Scaler is used to scale the masks' size. It shrinks the mask of the same size as the pruning target to the same size as the metric,
-        None means one-to-one correspondence between pruned dimensions and metric, which equal to set `dim` as all under pruning weight dimensions.
+        or expands the mask of the same size as the metric to the same size as the pruning target.
-        The mask will expand to the weight size depend on `dim`.
+        If you want to use different scalers for different pruning targets in different modules, please use a dict `{module_name: {target_name: scaler}}`.
+        If allocator meets an unspecified module name, it will try to use `scalers['_default'][target_name]` to scale its mask.
-        Example:
+        If allocator meets an unspecified target name, it will try to use `scalers[module_name]['_default']` to scale its mask.
+        Passing in a scaler instead of a `dict` of scalers will be treated as passed in `{'_default': {'_default': scalers}}`.
-        The under pruning weight has size (2, 3, 4), and `dim=1` means the under pruning weight dimension is 1.
+        Passing in `None` means no need to scale.
-        Then the metric should have a size (3,), i.e., `metric=[0.9, 0.1, 0.8]`.
-        Assuming by some kind of `SparsityAllocator` get the mask on weight dimension 1 `mask=[1, 0, 1]`,
-        then the dimension mask will expand to the final mask `[[[1, 1, 1, 1], [0, 0, 0, 0], [1, 1, 1, 1]], [[1, 1, 1, 1], [0, 0, 0, 0], [1, 1, 1, 1]]]`.
-    block_sparse_size
-        This used to describe the block size a metric value represented. By default, None means the block size is ones(len(dim)).
-        Make sure len(dim) == len(block_sparse_size), and the block_sparse_size dimension position is corresponding to dim.
-        Example:
-        The metric size is (12,), and block_sparse_size=[64], then the mask will expand to (768,) at first before expand with `dim`.
    continuous_mask
-        Inherit the mask already in the wrapper if set True.
+        If set True, the part that has been masked will be masked first.
+        If set False, the part that has been masked may be unmasked due to the increase of its corresponding metric.
    """
-    def __init__(self, pruner: Pruner, dim: Optional[Union[int, List[int]]] = None,
+    def __init__(self, pruner: Pruner, scalers: Dict[str, Dict[str, Scaling]] | Scaling | None = None, continuous_mask: bool = True):
-                 block_sparse_size: Optional[Union[int, List[int]]] = None, continuous_mask: bool = True):
        self.pruner = pruner
-        self.dim = dim if not isinstance(dim, int) else [dim]
+        self.scalers: Dict[str, Dict[str, Scaling]] | None = scalers if isinstance(scalers, (dict, type(None))) else {'_default': {'_default': scalers}}  # type: ignore
-        self.block_sparse_size = block_sparse_size if not isinstance(block_sparse_size, int) else [block_sparse_size]
-        if self.block_sparse_size is not None:
-            assert all(i >= 1 for i in self.block_sparse_size)
-        elif self.dim is not None:
-            self.block_sparse_size = [1] * len(self.dim)
-        if self.dim is not None:
-            assert all(i >= 0 for i in self.dim)
-            self.dim, self.block_sparse_size = (list(t) for t in zip(*sorted(zip(self.dim, self.block_sparse_size))))  # type: ignore
        self.continuous_mask = continuous_mask
-    def generate_sparsity(self, metrics: Dict) -> Dict[str, Dict[str, Tensor]]:
+    def _get_scaler(self, module_name: str, target_name: str) -> Scaling | None:
+        return _get_scaler(self.scalers, module_name, target_name)
+    def _expand_mask(self, module_name: str, target_name: str, mask: Tensor) -> Tensor:
+        # Expand the shrinked mask to the pruning target size.
+        scaler = self._get_scaler(module_name=module_name, target_name=target_name)
+        if scaler:
+            wrapper = self.pruner.get_modules_wrapper()[module_name]
+            return scaler.expand(mask, getattr(wrapper, f'{target_name}_mask').shape)
+        else:
+            return mask.clone()
+    def _shrink_mask(self, module_name: str, target_name: str, mask: Tensor) -> Tensor:
+        # Shrink the mask by scaler, shrinked mask usually has the same size with metric.
+        scaler = self._get_scaler(module_name=module_name, target_name=target_name)
+        if scaler:
+            mask = (scaler.shrink(mask) != 0).type_as(mask)
+        return mask
+    def _continuous_mask(self, new_masks: Dict[str, Dict[str, Tensor]]) -> Dict[str, Dict[str, Tensor]]:
+        # Set the already masked part in the metric to the minimum value.
+        target_name = 'weight'
+        for module_name, target_mask in new_masks.items():
+            wrapper = self.pruner.get_modules_wrapper()[module_name]
+            old_target_mask = getattr(wrapper, f'{target_name}_mask', None)
+            if old_target_mask is not None:
+                new_masks[module_name][target_name] = torch.min(target_mask[target_name], old_target_mask)
+        return new_masks
+    def common_target_masks_generation(self, metrics: Dict[str, Tensor]) -> Dict[str, Dict[str, Tensor]]:
        """
+        Generate masks for metrics-dependent targets.
        Parameters
        ----------
        metrics
-            A metric dict. The key is the name of layer, the value is its metric.
+            The format is {module_name: weight_metric}.
+            The metric of `weight` usually has the same size with shrinked mask.
+        Return
+        ------
+        Dict[str, Dict[str, Tensor]]
+            The format is {module_name: {target_name: mask}}.
+            Return the masks of the same size as its target.
        """
        raise NotImplementedError()
-    def _expand_mask(self, name: str, mask: Tensor) -> Dict[str, Tensor]:
+    def special_target_masks_generation(self, masks: Dict[str, Dict[str, Tensor]]) -> Dict[str, Dict[str, Tensor]]:
        """
+        Some pruning targets' mask generation depends on other targets, i.e., bias mask depends on weight mask.
+        This function is used to generate these masks, and it be called at the end of `generate_sparsity`.
        Parameters
        ----------
-        name
+        masks
-            The masked module name.
+            The format is {module_name: {target_name: mask}}.
-        mask
+            It is usually the return value of `common_target_masks_generation`.
-            The reduced mask with `self.dim` and `self.block_sparse_size`.
-        Returns
-        -------
-        Dict[str, Tensor]
-            The key is `weight` or `bias`, value is the final mask.
        """
-        weight_mask = mask.clone()
+        for module_name, module_masks in masks.items():
+            # generate bias mask, this may move to wrapper in the future
-        if self.block_sparse_size is not None:
+            weight_mask = module_masks.get('weight', None)
-            # expend mask with block_sparse_size
+            wrapper = self.pruner.get_modules_wrapper()[module_name]
-            expand_size = list(weight_mask.size())
+            old_bias_mask = getattr(wrapper, 'bias_mask', None)
-            reshape_size = list(weight_mask.size())
+            if weight_mask is not None and old_bias_mask is not None and weight_mask.shape[0] == old_bias_mask.shape[0]:
-            for i, block_width in reversed(list(enumerate(self.block_sparse_size))):
+                # keep dim 0 and reduce all other dims by sum
-                weight_mask = weight_mask.unsqueeze(i + 1)
+                reduce_dims = [reduce_dim for reduce_dim in range(1, len(weight_mask.shape))]
-                expand_size.insert(i + 1, block_width)
+                # count unmasked number of values on dim 0 (output channel) of weight
-                reshape_size[i] *= block_width
+                unmasked_num_on_dim0 = weight_mask.sum(reduce_dims) if reduce_dims else weight_mask
-            weight_mask = weight_mask.expand(expand_size).reshape(reshape_size)
+                module_masks['bias'] = (unmasked_num_on_dim0 != 0).type_as(old_bias_mask)
+        return masks
-        wrapper = self.pruner.get_modules_wrapper()[name]
-        weight_size = wrapper.weight.data.size()  # type: ignore
+    def generate_sparsity(self, metrics: Dict) -> Dict[str, Dict[str, Tensor]]:
-        if self.dim is None:
-            assert weight_mask.size() == weight_size
-            expand_mask = {'weight': weight_mask}
-        else:
-            # expand mask to weight size with dim
-            assert len(weight_mask.size()) == len(self.dim)
-            assert all(weight_size[j] == weight_mask.size(i) for i, j in enumerate(self.dim))
-            idxs = list(range(len(weight_size)))
-            [idxs.pop(i) for i in reversed(self.dim)]
-            for i in idxs:
-                weight_mask = weight_mask.unsqueeze(i)
-            expand_mask = {'weight': weight_mask.expand(weight_size).clone()}
-            # NOTE: assume we only mask output, so the mask and bias have a one-to-one correspondence.
-            # If we support more kind of masks, this place need refactor.
-            if wrapper.bias_mask is not None and weight_mask.size() == wrapper.bias_mask.size():  # type: ignore
-                expand_mask['bias'] = weight_mask.clone()
-        return expand_mask
-    def _compress_mask(self, mask: Tensor) -> Tensor:
        """
-        This function will reduce the mask with `self.dim` and `self.block_sparse_size`.
+        The main function of `SparsityAllocator`, generate a set of masks based on the given metrics.
-        e.g., a mask tensor with size [50, 60, 70], self.dim is (0, 1), self.block_sparse_size is [10, 10].
-        Then, the reduced mask size is [50 / 10, 60 / 10] => [5, 6].
        Parameters
        ----------
-        name
+        metrics
-            The masked module name.
+            A metric dict with format {module_name: weight_metric}
-        mask
-            The entire mask has the same size with weight.
        Returns
        -------
-        Tensor
+        Dict[str, Dict[str, Tensor]]
-            Reduced mask.
+            The masks format is {module_name: {target_name: mask}}.
        """
-        if self.dim is None or len(mask.size()) == 1:
+        masks = self.common_target_masks_generation(metrics)
-            mask = mask.clone()
+        masks = self.special_target_masks_generation(masks)
-        else:
+        if self.continuous_mask:
-            mask_dim = list(range(len(mask.size())))
+            masks = self._continuous_mask(masks)
-            for dim in self.dim:
+        return masks
-                mask_dim.remove(dim)
-            mask = torch.sum(mask, dim=mask_dim)
-        if self.block_sparse_size is not None:
-            # operation like pooling
-            lower_case_letters = 'abcdefghijklmnopqrstuvwxyz'
-            ein_expression = ''
-            for i, step in enumerate(self.block_sparse_size):
-                mask = mask.unfold(i, step, step)
-                ein_expression += lower_case_letters[i]
-            ein_expression = '...{},{}'.format(ein_expression, ein_expression)
-            mask = torch.einsum(ein_expression, mask, torch.ones(self.block_sparse_size).to(mask.device))
-        return (mask != 0).type_as(mask)
 class TaskGenerator:

--- a/nni/algorithms/compression/v2/pytorch/pruning/tools/metrics_calculator.py
+++ b/nni/algorithms/compression/v2/pytorch/pruning/tools/metrics_calculator.py
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
-from typing import Dict, List, Optional, Union
+from __future__ import annotations
+from typing import Dict, List
 import torch
 from torch import Tensor
 from .base import MetricsCalculator
+from ...utils import Scaling
 __all__ = ['NormMetricsCalculator', 'MultiDataNormMetricsCalculator', 'DistMetricsCalculator',
           'APoZRankMetricsCalculator', 'MeanRankMetricsCalculator', 'StraightMetricsCalculator']
@@ -28,49 +31,28 @@ class NormMetricsCalculator(MetricsCalculator):
    """
    Calculate the specify norm for each tensor in data.
    L1, L2, Level, Slim pruner use this to calculate metric.
+    Parameters
+    ----------
+    p
+        The order of norm. None means Frobenius norm.
+    scalers
+        Please view the base class `MetricsCalculator` docstring.
    """
-    def __init__(self, dim: Optional[Union[int, List[int]]] = None, p: Optional[Union[int, float]] = None):
+    def __init__(self, p: int | float | None = None, scalers: Dict[str, Dict[str, Scaling]] | Scaling | None = None):
-        """
+        super().__init__(scalers=scalers)
-        Parameters
-        ----------
-        dim
-            The dimensions that corresponding to the under pruning weight dimensions in collected data.
-            None means one-to-one correspondence between pruned dimensions and data, which equal to set `dim` as all data dimensions.
-            Only these `dim` will be kept and other dimensions of the data will be reduced.
-            Example:
-            If you want to prune the Conv2d weight in filter level, and the weight size is (32, 16, 3, 3) [out-channel, in-channel, kernal-size-1, kernal-size-2].
-            Then the under pruning dimensions is [0], which means you want to prune the filter or out-channel.
-                Case 1: Directly collect the conv module weight as data to calculate the metric.
-                Then the data has size (32, 16, 3, 3).
-                Mention that the dimension 0 of the data is corresponding to the under pruning weight dimension 0.
-                So in this case, `dim=0` will set in `__init__`.
-                Case 2: Use the output of the conv module as data to calculate the metric.
-                Then the data has size (batch_num, 32, feature_map_size_1, feature_map_size_2).
-                Mention that the dimension 1 of the data is corresponding to the under pruning weight dimension 0.
-                So in this case, `dim=1` will set in `__init__`.
-            In both of these two case, the metric of this module has size (32,).
-        p
-            The order of norm. None means Frobenius norm.
-        """
-        super().__init__(dim=dim)
        self.p = p if p is not None else 'fro'
    def calculate_metrics(self, data: Dict[str, Tensor]) -> Dict[str, Tensor]:
+        def reduce_func(t: Tensor) -> Tensor:
+            return t.norm(p=self.p, dim=-1)  # type: ignore
        metrics = {}
-        for name, tensor in data.items():
+        target_name = 'weight'
-            keeped_dim = list(range(len(tensor.size()))) if self.dim is None else self.dim
+        for module_name, target_data in data.items():
-            across_dim = list(range(len(tensor.size())))
+            scaler = self._get_scaler(module_name, target_name)
-            [across_dim.pop(i) for i in reversed(keeped_dim)]
+            metrics[module_name] = scaler.shrink(target_data, reduce_func)
-            if len(across_dim) == 0:
-                metrics[name] = tensor.abs()
-            else:
-                metrics[name] = tensor.norm(p=self.p, dim=across_dim)  # type: ignore
        return metrics
@@ -90,66 +72,32 @@ class DistMetricsCalculator(MetricsCalculator):
    """
    Calculate the sum of specify distance for each element with all other elements in specify `dim` in each tensor in data.
    FPGM pruner uses this to calculate metric.
+    Parameters
+    ----------
+    p
+        The order of norm. None means Frobenius norm.
+    scalers
+        Please view the base class `MetricsCalculator` docstring.
    """
-    def __init__(self, p: float, dim: Union[int, List[int]]):
+    def __init__(self, p: int | float | None = None, scalers: Dict[str, Dict[str, Scaling]] | Scaling | None = None):
-        """
+        super().__init__(scalers=scalers)
-        Parameters
+        self.p = p if p is not None else 'fro'
-        ----------
-        dim
-            The dimensions that corresponding to the under pruning weight dimensions in collected data.
-            None means one-to-one correspondence between pruned dimensions and data, which equal to set `dim` as all data dimensions.
-            Only these `dim` will be kept and other dimensions of the data will be reduced.
-            Example:
-            If you want to prune the Conv2d weight in filter level, and the weight size is (32, 16, 3, 3) [out-channel, in-channel, kernal-size-1, kernal-size-2].
-            Then the under pruning dimensions is [0], which means you want to prune the filter or out-channel.
-                Case 1: Directly collect the conv module weight as data to calculate the metric.
-                Then the data has size (32, 16, 3, 3).
-                Mention that the dimension 0 of the data is corresponding to the under pruning weight dimension 0.
-                So in this case, `dim=0` will set in `__init__`.
-                Case 2: Use the output of the conv module as data to calculate the metric.
-                Then the data has size (batch_num, 32, feature_map_size_1, feature_map_size_2).
-                Mention that the dimension 1 of the data is corresponding to the under pruning weight dimension 0.
-                So in this case, `dim=1` will set in `__init__`.
-            In both of these two case, the metric of this module has size (32,).
-        p
-            The order of norm.
-        """
-        super().__init__(dim=dim)
-        self.p = p
    def calculate_metrics(self, data: Dict[str, Tensor]) -> Dict[str, Tensor]:
+        def reduce_func(t: Tensor) -> Tensor:
+            reshape_data = t.reshape(-1, t.shape[-1])
+            metric = torch.zeros(reshape_data.shape[0], device=reshape_data.device)
+            for i in range(reshape_data.shape[0]):
+                metric[i] = (reshape_data - reshape_data[i]).norm(p=self.p, dim=-1).sum()  # type: ignore
+            return metric.reshape(t.shape[:-1])
        metrics = {}
-        for name, tensor in data.items():
+        target_name = 'weight'
-            keeped_dim = list(range(len(tensor.size()))) if self.dim is None else self.dim
+        for module_name, target_data in data.items():
-            reorder_dim = list(keeped_dim)
+            scaler = self._get_scaler(module_name, target_name)
-            reorder_dim.extend([i for i in range(len(tensor.size())) if i not in keeped_dim])
+            metrics[module_name] = scaler.shrink(target_data, reduce_func)
-            reorder_tensor = tensor.permute(*reorder_dim).clone()
-            metric = torch.ones(*reorder_tensor.size()[:len(keeped_dim)], device=reorder_tensor.device)
-            across_dim = list(range(len(keeped_dim), len(reorder_dim)))
-            idxs = metric.nonzero(as_tuple=False)
-            for idx in idxs:
-                other = reorder_tensor
-                for i in idx:
-                    other = other[i]
-                other = other.clone()
-                if len(across_dim) == 0:
-                    dist_sum = torch.abs(reorder_tensor - other).sum()
-                else:
-                    dist_sum = torch.norm((reorder_tensor - other), p=self.p, dim=across_dim).sum()  # type: ignore
-                # NOTE: this place need refactor when support layer level pruning.
-                tmp_metric = metric
-                for i in idx[:-1]:
-                    tmp_metric = tmp_metric[i]
-                tmp_metric[idx[-1]] = dist_sum
-            metrics[name] = metric
        return metrics
@@ -161,19 +109,15 @@ class APoZRankMetricsCalculator(MetricsCalculator):
    APoZRank pruner uses this to calculate metric.
    """
    def calculate_metrics(self, data: Dict[str, List]) -> Dict[str, Tensor]:
+        def reduce_func(t: Tensor) -> Tensor:
+            return 1 - t.mean(dim=-1)
        metrics = {}
-        for name, (num, zero_counts) in data.items():
+        target_name = 'weight'
-            keeped_dim = list(range(len(zero_counts.size()))) if self.dim is None else self.dim
+        for module_name, target_data in data.items():
-            across_dim = list(range(len(zero_counts.size())))
+            target_data = target_data[1] / target_data[0]
-            [across_dim.pop(i) for i in reversed(keeped_dim)]
+            scaler = self._get_scaler(module_name, target_name)
-            # The element number on each keeped_dim in zero_counts
+            metrics[module_name] = scaler.shrink(target_data, reduce_func)
-            total_size = num
-            for dim, dim_size in enumerate(zero_counts.size()):
-                if dim not in keeped_dim:
-                    total_size *= dim_size
-            _apoz = torch.sum(zero_counts, dim=across_dim).type_as(zero_counts) / total_size
-            # NOTE: the metric is (1 - apoz) because we assume the smaller metric value is more needed to be pruned.
-            metrics[name] = torch.ones_like(_apoz) - _apoz
        return metrics
@@ -183,11 +127,14 @@ class MeanRankMetricsCalculator(MetricsCalculator):
    This metric simply calculate the average on `self.dim`, then divide by the batch_number.
    MeanRank pruner uses this to calculate metric.
    """
-    def calculate_metrics(self, data: Dict[str, List[Tensor]]) -> Dict[str, Tensor]:
+    def calculate_metrics(self, data: Dict[str, List]) -> Dict[str, Tensor]:
+        def reduce_func(t: Tensor) -> Tensor:
+            return t.mean(dim=-1)
        metrics = {}
-        for name, (num, activation_sum) in data.items():
+        target_name = 'weight'
-            keeped_dim = list(range(len(activation_sum.size()))) if self.dim is None else self.dim
+        for module_name, target_data in data.items():
-            across_dim = list(range(len(activation_sum.size())))
+            target_data = target_data[1] / target_data[0]
-            [across_dim.pop(i) for i in reversed(keeped_dim)]
+            scaler = self._get_scaler(module_name, target_name)
-            metrics[name] = torch.mean(activation_sum, across_dim) / num
+            metrics[module_name] = scaler.shrink(target_data, reduce_func)
        return metrics
--- a/nni/algorithms/compression/v2/pytorch/pruning/tools/sparsity_allocator.py
+++ b/nni/algorithms/compression/v2/pytorch/pruning/tools/sparsity_allocator.py
--- a/nni/algorithms/compression/v2/pytorch/utils/__init__.py
+++ b/nni/algorithms/compression/v2/pytorch/utils/__init__.py
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
+from .attr import (
+    get_nested_attr,
+    set_nested_attr
+)
 from .config_validation import CompressorSchema
+from .constructor_helper import *
 from .pruning import (
    config_list_canonical,
    unfold_config_list,
@@ -12,4 +17,4 @@ from .pruning import (
    get_model_weights_numel,
    get_module_by_name
 )
-from .constructor_helper import *
+from .scaling import Scaling
--- a/nni/algorithms/compression/v2/pytorch/utils/attr.py
+++ b/nni/algorithms/compression/v2/pytorch/utils/attr.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+from functools import reduce
+from typing import Any, overload
+@overload
+def get_nested_attr(__o: object, __name: str) -> Any:
+    ...
+@overload
+def get_nested_attr(__o: object, __name: str, __default: Any) -> Any:
+    ...
+def get_nested_attr(__o: object, __name: str, *args) -> Any:
+    """
+    Get a nested named attribute from an object by a `.` separated name.
+    rgetattr(x, 'y.z') is equivalent to getattr(getattr(x, 'y'), 'z') and x.y.z.
+    """
+    def _getattr(__o, __name):
+        return getattr(__o, __name, *args)
+    return reduce(_getattr, [__o] + __name.split('.'))  # type: ignore
+def set_nested_attr(__obj: object, __name: str, __value: Any):
+    """
+    Set the nested named attribute on the given object to the specified value by a `.` separated name.
+    set_nested_attr(x, 'y.z', v) is equivalent to setattr(getattr(x, 'y'), 'z', v) x.y.z = v.
+    """
+    pre, _, post = __name.rpartition('.')
+    return setattr(get_nested_attr(__obj, pre) if pre else __obj, post, __value)
--- a/nni/algorithms/compression/v2/pytorch/utils/scaling.py
+++ b/nni/algorithms/compression/v2/pytorch/utils/scaling.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+from __future__ import annotations
+from functools import reduce
+from typing import Callable, List, overload
+from typing_extensions import Literal
+import torch
+from torch import Tensor
+class Scaling:
+    """
+    In the process of generating masks, a large number of operations like pooling or upsampling are involved.
+    This class provides tensor-related scaling functions for a given scaling kernel.
+    Similar to the concept of convolutional kernel, the scaling kernel also moves over the tensor and does operations.
+    The scaling kernel in this class is defined by two parts, kernel size and scaling function (shrink and expand).
+    Parameters
+    ----------
+    kernel_size
+        kernel_size is the scale, which determines how large a range in a tensor should shrink to a value,
+        or how large a value in a tensor should expand.
+        `-1` can be used to indicate that it is a full step in this dimension,
+        and the dimension where -1 is located will be reduced or unsqueezed during scaling.
+        Example::
+            kernel_size = [2, -1]
+            # For a given 2D-tensor with size (4, 3),
+            [[1, 2, 3],
+             [4, 5, 6],
+             [7, 8, 9],
+             [10, 11, 12]]
+            # shrinking it by shrink function, its size becomes (2,) after shrinking:
+            [shrink([[1, 2, 3], [4, 5, 6]]), shrink([[7, 8, 9], [10, 11, 12]])]
+            # expanding it by expand function with a given expand size,
+            # if the expand function is repeating the values, and the expand size is (4, 6, 2):
+            [[[1, 1],
+              [1, 1],
+              [2, 2],
+              [2, 2],
+              [3, 3],
+              [3, 3]],
+                ...
+              [9, 9]]]
+            # note that the original tensor with size (4, 3) will unsqueeze to size (4, 3, 1) at first
+            # for the `-1` in kernel_size, then expand size (4, 3, 1) to size (4, 6, 2).
+    kernel_padding_mode
+        'front' or 'back', default is 'front'.
+        If set 'front', for a given tensor when shrinking, padding `1` at front of kernel_size until `len(tensor.shape) == len(kernel_size)`;
+        for a given expand size when expanding, padding `1` at front of kernel_size until `len(expand_size) == len(kernel_size)`.
+        If set 'back', for a given tensor when shrinking, padding `-1` at back of kernel_size until `len(tensor.shape) == len(kernel_size)`;
+        for a given expand size when expanding, padding `-1` at back of kernel_size until `len(expand_size) == len(kernel_size)`.
+    """
+    def __init__(self, kernel_size: List[int], kernel_padding_mode: Literal['front', 'back'] = 'front') -> None:
+        self.kernel_size = kernel_size
+        assert kernel_padding_mode in ['front', 'back'], f"kernel_padding_mode should be one of ['front', 'back'], but get kernel_padding_mode={kernel_padding_mode}."
+        self.kernel_padding_mode = kernel_padding_mode
+    def _padding(self, _list: List[int], length: int, padding_value: int = -1, padding_mode: Literal['front', 'back'] = 'back') -> List[int]:
+        """
+        Padding the `_list` to a specific length with `padding_value`.
+        Parameters
+        ----------
+        _list
+            The list of int value to be padding.
+        length
+            The length to pad to.
+        padding_value
+            Padding value, should be a int.
+        padding_mode
+            If `padding_mode` is `'front'`, then the padding applied on the front of the size list.
+            If `padding_mode` is `'back'`, then the padding applied on the back of the size list.
+        Returns
+        -------
+        List[int]
+            The padded list.
+        """
+        assert len(_list) <= length
+        padding = [padding_value for _ in range(length - len(_list))]
+        if padding_mode == 'front':
+            new_list = padding + list(_list)
+        elif padding_mode == 'back':
+            new_list = list(_list) + padding
+        else:
+            raise ValueError(f'Unsupported padding mode: {padding_mode}.')
+        return new_list
+    def _shrink(self, target: Tensor, kernel_size: List[int], reduce_func: Callable[[Tensor], Tensor] | None = None) -> Tensor:
+        """
+        Main logic about how to shrink target. Subclass could override this function to customize.
+        Sum all values covered by the kernel as a simple implementation.
+        """
+        # step 1: put the part covered by the kernel to the end of the converted target.
+        # e.g., target size is [10, 20], kernel_size is [2, 4], then new_target size is [5, 5, 8].
+        reshape_size = []
+        final_size = []
+        reduced_dims = []
+        for (dim, step) in enumerate(kernel_size):
+            if step == -1:
+                step = target.shape[dim]
+                reduced_dims.insert(0, dim)
+            assert target.shape[dim] % step == 0
+            reshape_size.append(target.shape[dim] // step)
+            final_size.append(target.shape[dim] // step)
+            reshape_size.append(step)
+        permute_dims = [2 * _ for _ in range(len(kernel_size))] + [2 * _ + 1 for _ in range(len(kernel_size))]
+        converted_target = target.reshape(reshape_size).permute(permute_dims).reshape(final_size + [-1])
+        # step 2: reduce the converted_target last dim with a certain way, by default is converted_target.sum(-1).
+        result = reduce_func(converted_target) if reduce_func else converted_target.sum(-1)
+        # step 3: reduce the dims where kernel_size is -1.
+        # e.g., target size is [10, 40], kernel_size is [-1, 4], result size is [1, 10], then reduce result to size [10].
+        result = reduce(lambda t, dim: t.squeeze(dim), [result] + reduced_dims)  # type: ignore
+        return result
+    def _expand(self, target: Tensor, kernel_size: List[int], expand_size: List[int]) -> Tensor:
+        """
+        Main logic about how to expand target to a specific size. Subclass could override this function to customize.
+        Repeat each value to reach the kernel size as a simple implementation.
+        """
+        # step 1: unsqueeze the target tensor where -1 is located in kernel_size.
+        unsqueezed_dims = [dim for (dim, step) in enumerate(kernel_size) if step == -1]
+        new_target: Tensor = reduce(lambda t, dim: t.unsqueeze(dim), [target] + unsqueezed_dims)  # type: ignore
+        # step 2: build the _expand_size and unsqueeze target tensor on each dim
+        _expand_size = []
+        for a, b in zip(kernel_size, expand_size):
+            if a == -1:
+                _expand_size.append(1)
+                _expand_size.append(b)
+            else:
+                assert b % a == 0, f'Can not expand tensor with {target.shape} to {expand_size} with kernel size {kernel_size}.'
+                _expand_size.append(b // a)
+                _expand_size.append(a)
+        new_target: Tensor = reduce(lambda t, dim: t.unsqueeze(dim), [new_target] + [2 * _ + 1 for _ in range(len(expand_size))])  # type: ignore
+        # step 3: expanding the new target to _expand_size and reshape to expand_size.
+        # Note that we can also give an interface for how to expand the tensor, like `reduce_func` in `_shrink`, currently we don't have that need.
+        result = new_target.expand(_expand_size).reshape(expand_size).clone()
+        return result
+    def shrink(self, target: Tensor, reduce_func: Callable[[Tensor], Tensor] | None = None) -> Tensor:
+        # Canonicalize kernel_size to target size length at first.
+        # If kernel_padding_mode is 'front', padding 1 at the front of `self.kernel_size`.
+        # e.g., padding kernel_size [2, 2] to [1, 2, 2] when target size length is 3.
+        # If kernel_padding_mode is 'back', padding -1 at the back of `self.kernel_size`.
+        # e.g., padding kernel_size [1] to [1, -1, -1] when target size length is 3.
+        if self.kernel_padding_mode == 'front':
+            kernel_size = self._padding(self.kernel_size, len(target.shape), 1, 'front')
+        elif self.kernel_padding_mode == 'back':
+            kernel_size = self._padding(self.kernel_size, len(target.shape), -1, 'back')
+        else:
+            raise ValueError(f'Unsupported kernel padding mode: {self.kernel_padding_mode}.')
+        return self._shrink(target, kernel_size, reduce_func)
+    def expand(self, target: Tensor, expand_size: List[int]):
+        # Similar with `self.shrink`, canonicalize kernel_size to expand_size length at first.
+        if self.kernel_padding_mode == 'front':
+            kernel_size = self._padding(self.kernel_size, len(expand_size), 1, 'front')
+        elif self.kernel_padding_mode == 'back':
+            kernel_size = self._padding(self.kernel_size, len(expand_size), -1, 'back')
+        else:
+            raise ValueError(f'Unsupported kernel padding mode: {self.kernel_padding_mode}.')
+        return self._expand(target, kernel_size, expand_size)
+    @overload
+    def validate(self, target: List[int]):
+        ...
+    @overload
+    def validate(self, target: Tensor):
+        ...
+    def validate(self, target: List[int] | Tensor):
+        """
+        Validate the target tensor can be shape-lossless scaling.
+        That means the shape will not change after `shrink` then `expand`.
+        """
+        target = target if isinstance(target, Tensor) else torch.rand(target)
+        if self.expand((self.shrink(target)), list(target.shape)).shape != target.shape:
+            raise ValueError(f'The tensor with shape {target.shape}, can not shape-lossless scaling with ' +
+                             f'kernel size is {self.kernel_size} and kernel_padding_mode is {self.kernel_padding_mode}.')
--- a/test/algo/compression/v2/test_pruning_tools_torch.py
+++ b/test/algo/compression/v2/test_pruning_tools_torch.py
@@ -26,6 +26,7 @@ from nni.algorithms.compression.v2.pytorch.pruning.tools import (
 )
 from nni.algorithms.compression.v2.pytorch.pruning.tools.base import HookCollectorInfo
 from nni.algorithms.compression.v2.pytorch.utils import get_module_by_name
+from nni.algorithms.compression.v2.pytorch.utils.scaling import Scaling
 from nni.algorithms.compression.v2.pytorch.utils.constructor_helper import OptimizerConstructHelper
@@ -112,7 +113,7 @@ class PruningToolsTestCase(unittest.TestCase):
    def test_metrics_calculator(self):
        # Test NormMetricsCalculator
-        metrics_calculator = NormMetricsCalculator(dim=0, p=2)
+        metrics_calculator = NormMetricsCalculator(p=2, scalers=Scaling(kernel_size=[1], kernel_padding_mode='back'))
        data = {
            '1': torch.ones(3, 3, 3),
            '2': torch.ones(4, 4) * 2
@@ -125,7 +126,7 @@ class PruningToolsTestCase(unittest.TestCase):
        assert all(torch.equal(result[k], v) for k, v in metrics.items())
        # Test DistMetricsCalculator
-        metrics_calculator = DistMetricsCalculator(dim=0, p=2)
+        metrics_calculator = DistMetricsCalculator(p=2, scalers=Scaling(kernel_size=[1], kernel_padding_mode='back'))
        data = {
            '1': torch.tensor([[1, 2], [4, 6]], dtype=torch.float32),
            '2': torch.tensor([[0, 0], [1, 1]], dtype=torch.float32)
@@ -138,7 +139,7 @@ class PruningToolsTestCase(unittest.TestCase):
        assert all(torch.equal(result[k], v) for k, v in metrics.items())
        # Test MultiDataNormMetricsCalculator
-        metrics_calculator = MultiDataNormMetricsCalculator(dim=0, p=1)
+        metrics_calculator = MultiDataNormMetricsCalculator(p=1, scalers=Scaling(kernel_size=[1], kernel_padding_mode='back'))
        data = {
            '1': [2, torch.ones(3, 3, 3) * 2],
            '2': [2, torch.ones(4, 4) * 2]
@@ -151,7 +152,7 @@ class PruningToolsTestCase(unittest.TestCase):
        assert all(torch.equal(result[k], v) for k, v in metrics.items())
        # Test APoZRankMetricsCalculator
-        metrics_calculator = APoZRankMetricsCalculator(dim=1)
+        metrics_calculator = APoZRankMetricsCalculator(Scaling(kernel_size=[-1, 1], kernel_padding_mode='back'))
        data = {
            '1': [2, torch.tensor([[1, 1], [1, 1]], dtype=torch.float32)],
            '2': [2, torch.tensor([[0, 0, 1], [0, 0, 0]], dtype=torch.float32)]
@@ -164,7 +165,7 @@ class PruningToolsTestCase(unittest.TestCase):
        assert all(torch.equal(result[k], v) for k, v in metrics.items())
        # Test MeanRankMetricsCalculator
-        metrics_calculator = MeanRankMetricsCalculator(dim=1)
+        metrics_calculator = MeanRankMetricsCalculator(Scaling(kernel_size=[-1, 1], kernel_padding_mode='back'))
        data = {
            '1': [2, torch.tensor([[0, 1], [1, 0]], dtype=torch.float32)],
            '2': [2, torch.tensor([[0, 0, 1], [0, 0, 0]], dtype=torch.float32)]

--- a/test/algo/compression/v2/test_scaling.py
+++ b/test/algo/compression/v2/test_scaling.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+import pytest
+import torch
+from nni.algorithms.compression.v2.pytorch.utils.scaling import Scaling
+def test_scaling():
+    data = torch.tensor([_ for _ in range(100)]).reshape(10, 10)
+    scaler = Scaling([5], kernel_padding_mode='front')
+    shrinked_data = scaler.shrink(data)
+    assert list(shrinked_data.shape) == [10, 2]
+    expanded_data = scaler.expand(data, [10, 50])
+    assert list(expanded_data.shape) == [10, 50]
+    scaler = Scaling([5, 5], kernel_padding_mode='back')
+    shrinked_data = scaler.shrink(data)
+    assert list(shrinked_data.shape) == [2, 2]
+    expanded_data = scaler.expand(data, [50, 50, 10])
+    assert list(expanded_data.shape) == [50, 50, 10]
+    scaler.validate([10, 10, 10])
+if __name__ == '__main__':
+    test_scaling()