Unverified Commit a911b856 authored by Yuge Zhang's avatar Yuge Zhang Committed by GitHub
Browse files

Resolve conflicts for #4760 (#4762)

parent 14d2966b
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import tensorflow as tf import tensorflow as tf
from nni.compression.tensorflow import Pruner from nni.compression.tensorflow import Pruner
......
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from .compressor import Compressor, LayerInfo from .compressor import Compressor, LayerInfo
from .pruner import Pruner, PrunerModuleWrapper from .pruner import Pruner, PrunerModuleWrapper
from .scheduler import BasePruningScheduler, Task, TaskResult from .scheduler import BasePruningScheduler, Task, TaskResult
...@@ -35,17 +35,16 @@ def _setattr(model: Module, name: str, module: Module): ...@@ -35,17 +35,16 @@ def _setattr(model: Module, name: str, module: Module):
class Compressor: class Compressor:
""" """
The abstract base pytorch compressor. The abstract base pytorch compressor.
Parameters
----------
model
The model under compressed.
config_list
The config list used by compressor, usually specifies the 'op_types' or 'op_names' that want to compress.
""" """
def __init__(self, model: Optional[Module], config_list: Optional[List[Dict]]): def __init__(self, model: Optional[Module], config_list: Optional[List[Dict]]):
"""
Parameters
----------
model
The model under compressed.
config_list
The config list used by compressor, usually specifies the 'op_types' or 'op_names' that want to compress.
"""
self.is_wrapped = False self.is_wrapped = False
if model is not None: if model is not None:
self.reset(model=model, config_list=config_list) self.reset(model=model, config_list=config_list)
...@@ -258,14 +257,7 @@ class Compressor: ...@@ -258,14 +257,7 @@ class Compressor:
Dict[str, str] Dict[str, str]
Return a dict `{original_model_parameter_name: wrapped_model_parameter_name}` Return a dict `{original_model_parameter_name: wrapped_model_parameter_name}`
""" """
if self.is_wrapped: raise NotImplementedError()
wrapped_param_names = {id(param): name for name, param in self.bound_model.named_parameters()}
self._unwrap_model()
parameter_name_map = {name: wrapped_param_names[id(param)] for name, param in self.bound_model.named_parameters()}
self._wrap_model()
return parameter_name_map
else:
raise Exception('When only the model is wrapped can get the parameter_name_map.')
def _wrap_modules(self, layer: LayerInfo, config: Dict): def _wrap_modules(self, layer: LayerInfo, config: Dict):
""" """
......
...@@ -6,9 +6,9 @@ from typing import Dict, List, Optional, Tuple ...@@ -6,9 +6,9 @@ from typing import Dict, List, Optional, Tuple
import torch import torch
from torch import Tensor from torch import Tensor
from torch.nn import Module from torch.nn import Module, Parameter
from .compressor import Compressor, LayerInfo from .compressor import Compressor, LayerInfo, _setattr
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
...@@ -16,41 +16,68 @@ __all__ = ['Pruner'] ...@@ -16,41 +16,68 @@ __all__ = ['Pruner']
class PrunerModuleWrapper(Module): class PrunerModuleWrapper(Module):
def __init__(self, module: Module, module_name: str, config: Dict, pruner: Compressor): """
""" Wrap a module to enable data parallel, forward method customization and buffer registeration.
Wrap a module to enable data parallel, forward method customization and buffer registeration.
Parameters
----------
module
The module user wants to compress.
config
The configurations that users specify for compression.
module_name
The name of the module to compress, wrapper module shares same name.
"""
Parameters def __init__(self, module: Module, module_name: str, config: Dict):
----------
module
The module user wants to compress.
config
The configurations that users specify for compression.
module_name
The name of the module to compress, wrapper module shares same name.
pruner
The pruner used to calculate mask.
"""
super().__init__() super().__init__()
# origin layer information # origin layer information
self.module = module self.module = module
self.name = module_name self.name = module_name
# config and pruner # config information
self.config = config self.config = config
self.pruner = pruner
self.weight = Parameter(torch.empty(self.module.weight.size()))
# register buffer for mask # register buffer for mask
self.register_buffer("weight_mask", torch.ones(self.module.weight.shape)) self.register_buffer("weight_mask", torch.ones(self.module.weight.shape))
if hasattr(self.module, 'bias') and self.module.bias is not None: if hasattr(self.module, 'bias') and self.module.bias is not None:
self.register_buffer("bias_mask", torch.ones(self.module.bias.shape)) self.register_buffer("bias_mask", torch.ones(self.module.bias.shape))
self.bias = Parameter(torch.empty(self.module.bias.size()))
else: else:
self.register_buffer("bias_mask", None) self.register_buffer("bias_mask", None)
def _weight2buffer(self):
"""
When using this wrapper to inference, call `_weight2buffer()` to make original weight untrainable.
The best place to call this function is in `Pruner._wrap_model()`.
"""
self.weight.data = self.module.weight.data
delattr(self.module, 'weight')
self.module.register_buffer('weight', self.weight.data)
if hasattr(self.module, 'bias') and self.module.bias is not None:
self.bias.data = self.module.bias.data
delattr(self.module, 'bias')
self.module.register_buffer('bias', self.bias.data)
def _weight2parameter(self):
"""
When don't need to record score or need to export the model, call `_weight2parameter()` to make the original weight trainable.
The best place to call this function is in `Pruner._unwrap_model()`.
"""
delattr(self.module, 'weight')
self.module.weight = Parameter(torch.empty(self.weight.size()))
self.module.weight.data = torch.mul(self.weight, self.weight_mask)
if hasattr(self.module, 'bias') and self.module.bias is not None:
delattr(self.module, 'bias')
self.module.bias = Parameter(torch.empty(self.bias.size()))
self.module.bias.data = torch.mul(self.bias, self.bias_mask)
def forward(self, *inputs): def forward(self, *inputs):
# apply mask to weight, bias # apply mask to weight, bias
self.module.weight.data = self.module.weight.data.mul_(self.weight_mask) self.module.weight = torch.mul(self.weight, self.weight_mask)
if hasattr(self.module, 'bias') and self.module.bias is not None: if hasattr(self.module, 'bias') and self.module.bias is not None:
self.module.bias.data = self.module.bias.data.mul_(self.bias_mask) self.module.bias = torch.mul(self.bias, self.bias_mask)
return self.module(*inputs) return self.module(*inputs)
...@@ -74,12 +101,58 @@ class Pruner(Compressor): ...@@ -74,12 +101,58 @@ class Pruner(Compressor):
The configuration for generating the mask. The configuration for generating the mask.
""" """
_logger.debug("Module detected to compress : %s.", layer.name) _logger.debug("Module detected to compress : %s.", layer.name)
wrapper = PrunerModuleWrapper(layer.module, layer.name, config, self) wrapper = PrunerModuleWrapper(layer.module, layer.name, config)
assert hasattr(layer.module, 'weight'), "module %s does not have 'weight' attribute" % layer.name assert hasattr(layer.module, 'weight'), "module %s does not have 'weight' attribute" % layer.name
# move newly registered buffers to the same device of weight # move newly registered buffers to the same device of weight
wrapper.to(layer.module.weight.device) wrapper.to(layer.module.weight.device)
return wrapper return wrapper
# The following `_wrap_model`, `_unwrap_model`, `get_origin2wrapped_parameter_name_map` can merge to `Compressor`,
# if quantizer use the similar structure wrapper.
def _wrap_model(self):
"""
Wrap all modules that needed to be compressed.
Different from the parent function, call `wrapper._weight2buffer()` after replace the origin module to wrapper.
"""
if not self.is_wrapped:
for _, wrapper in reversed(self.get_modules_wrapper().items()):
_setattr(self.bound_model, wrapper.name, wrapper)
wrapper._weight2buffer()
self.is_wrapped = True
def _unwrap_model(self):
"""
Unwrap all modules that needed to be compressed.
Different from the parent function, call `wrapper._weight2parameter()` after replace the wrapper to origin module.
"""
if self.is_wrapped:
for _, wrapper in self.get_modules_wrapper().items():
_setattr(self.bound_model, wrapper.name, wrapper.module)
wrapper._weight2parameter()
self.is_wrapped = False
def get_origin2wrapped_parameter_name_map(self) -> Dict[str, str]:
"""
Get the name mapping of parameters from original model to wrapped model.
Returns
-------
Dict[str, str]
Return a dict `{original_model_parameter_name: wrapped_model_parameter_name}`
"""
if self.is_wrapped:
wrapped_param_names = {id(param): name for name, param in self.bound_model.named_parameters()}
self._unwrap_model()
parameter_name_map = {}
for name, param in self.bound_model.named_parameters():
# If the parameter name in under wrapped module is `xxx.weight` or `xxx.bias`, the name will not change after wrap.
# If the parameter name in under wrapped module is others, the name `xxx.param` will change to `xxx.module.param` after wrap.
parameter_name_map[name] = wrapped_param_names[id(param)] if id(param) in wrapped_param_names else name
self._wrap_model()
return parameter_name_map
else:
raise Exception('When only the model is wrapped can get the parameter_name_map.')
def load_masks(self, masks: Dict[str, Dict[str, Tensor]]): def load_masks(self, masks: Dict[str, Dict[str, Tensor]]):
""" """
Load an exist masks on the wrapper. You can train the model with an exist masks after load the masks. Load an exist masks on the wrapper. You can train the model with an exist masks after load the masks.
......
...@@ -20,7 +20,7 @@ class Task: ...@@ -20,7 +20,7 @@ class Task:
_reference_counter = {} _reference_counter = {}
def __init__(self, task_id: int, model_path: str, masks_path: str, config_list_path: str, def __init__(self, task_id: int, model_path: str, masks_path: str, config_list_path: str,
speed_up: Optional[bool] = True, finetune: Optional[bool] = True, evaluate: Optional[bool] = True): speedup: Optional[bool] = True, finetune: Optional[bool] = True, evaluate: Optional[bool] = True):
""" """
Parameters Parameters
---------- ----------
...@@ -32,8 +32,8 @@ class Task: ...@@ -32,8 +32,8 @@ class Task:
The path of the masks that applied on the model before pruning. The path of the masks that applied on the model before pruning.
config_list_path config_list_path
The path of the config list that used in this task. The path of the config list that used in this task.
speed_up speedup
Control if this task needs speed up, True means use scheduler default value, False means no speed up. Control if this task needs speedup, True means use scheduler default value, False means no speedup.
finetune finetune
Control if this task needs finetune, True means use scheduler default value, False means no finetune. Control if this task needs finetune, True means use scheduler default value, False means no finetune.
evaluate evaluate
...@@ -44,7 +44,7 @@ class Task: ...@@ -44,7 +44,7 @@ class Task:
self.masks_path = masks_path self.masks_path = masks_path
self.config_list_path = config_list_path self.config_list_path = config_list_path
self.speed_up = speed_up self.speedup = speedup
self.finetune = finetune self.finetune = finetune
self.evaluate = evaluate self.evaluate = evaluate
...@@ -65,7 +65,7 @@ class Task: ...@@ -65,7 +65,7 @@ class Task:
'model_path': str(self.model_path), 'model_path': str(self.model_path),
'masks_path': str(self.masks_path), 'masks_path': str(self.masks_path),
'config_list_path': str(self.config_list_path), 'config_list_path': str(self.config_list_path),
'speed_up': self.speed_up, 'speedup': self.speedup,
'finetune': self.finetune, 'finetune': self.finetune,
'evaluate': self.evaluate, 'evaluate': self.evaluate,
'status': self.status, 'status': self.status,
......
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from .basic_pruner import * from .basic_pruner import *
from .basic_scheduler import PruningScheduler from .basic_scheduler import PruningScheduler
from .iterative_pruner import * from .iterative_pruner import *
......
...@@ -12,7 +12,7 @@ from torch.nn import Module ...@@ -12,7 +12,7 @@ from torch.nn import Module
from nni.algorithms.compression.v2.pytorch.base import Task, TaskResult from nni.algorithms.compression.v2.pytorch.base import Task, TaskResult
from nni.algorithms.compression.v2.pytorch.utils import compute_sparsity, config_list_canonical from nni.algorithms.compression.v2.pytorch.utils import compute_sparsity, config_list_canonical
from nni.compression.pytorch.utils.counter import count_flops_params from nni.compression.pytorch.utils import count_flops_params
from .iterative_pruner import IterativePruner, PRUNER_DICT from .iterative_pruner import IterativePruner, PRUNER_DICT
from .tools import TaskGenerator from .tools import TaskGenerator
...@@ -160,9 +160,13 @@ class AMCTaskGenerator(TaskGenerator): ...@@ -160,9 +160,13 @@ class AMCTaskGenerator(TaskGenerator):
class AMCPruner(IterativePruner): class AMCPruner(IterativePruner):
""" r"""
A pytorch implementation of AMC: AutoML for Model Compression and Acceleration on Mobile Devices. AMC pruner leverages reinforcement learning to provide the model compression policy.
(https://arxiv.org/pdf/1802.03494.pdf) According to the author, this learning-based compression policy outperforms conventional rule-based compression policy by having a higher compression ratio,
better preserving the accuracy and freeing human labor.
For more details, please refer to `AMC: AutoML for Model Compression and Acceleration on Mobile Devices <https://arxiv.org/pdf/1802.03494.pdf>`__.
Suggust config all `total_sparsity` in `config_list` a same value. Suggust config all `total_sparsity` in `config_list` a same value.
AMC pruner will treat the first sparsity in `config_list` as the global sparsity. AMC pruner will treat the first sparsity in `config_list` as the global sparsity.
...@@ -181,7 +185,7 @@ class AMCPruner(IterativePruner): ...@@ -181,7 +185,7 @@ class AMCPruner(IterativePruner):
- op_partial_names: Operation partial names to be pruned, will be autocompleted by NNI. - op_partial_names: Operation partial names to be pruned, will be autocompleted by NNI.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning. - exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
dummy_input : torch.Tensor dummy_input : torch.Tensor
`dummy_input` is required for speed-up and tracing the model in RL environment. `dummy_input` is required for speedup and tracing the model in RL environment.
evaluator : Callable[[Module], float] evaluator : Callable[[Module], float]
Evaluate the pruned model and give a score. Evaluate the pruned model and give a score.
pruning_algorithm : str pruning_algorithm : str
...@@ -216,6 +220,18 @@ class AMCPruner(IterativePruner): ...@@ -216,6 +220,18 @@ class AMCPruner(IterativePruner):
target : str target : str
'flops' or 'params'. Note that the sparsity in other pruners always means the parameters sparse, but in AMC, you can choose flops sparse. 'flops' or 'params'. Note that the sparsity in other pruners always means the parameters sparse, but in AMC, you can choose flops sparse.
This parameter is used to explain what the sparsity setting in config_list refers to. This parameter is used to explain what the sparsity setting in config_list refers to.
Examples
--------
>>> from nni.compression.pytorch.pruning import AMCPruner
>>> config_list = [{'op_types': ['Conv2d'], 'total_sparsity': 0.5, 'max_sparsity_per_layer': 0.8}]
>>> dummy_input = torch.rand(...).to(device)
>>> evaluator = ...
>>> finetuner = ...
>>> pruner = AMCPruner(400, model, config_list, dummy_input, evaluator, finetuner=finetuner)
>>> pruner.compress()
The full script can be found :githublink:`here <examples/model_compress/pruning/amc_pruning_torch.py>`.
""" """
def __init__(self, total_episode: int, model: Module, config_list: List[Dict], dummy_input: Tensor, def __init__(self, total_episode: int, model: Module, config_list: List[Dict], dummy_input: Tensor,
...@@ -233,5 +249,5 @@ class AMCPruner(IterativePruner): ...@@ -233,5 +249,5 @@ class AMCPruner(IterativePruner):
ddpg_params=ddpg_params, ddpg_params=ddpg_params,
target=target) target=target)
pruner = PRUNER_DICT[pruning_algorithm](None, None, **pruning_params) pruner = PRUNER_DICT[pruning_algorithm](None, None, **pruning_params)
super().__init__(pruner, task_generator, finetuner=finetuner, speed_up=True, dummy_input=dummy_input, super().__init__(pruner, task_generator, finetuner=finetuner, speedup=True, dummy_input=dummy_input,
evaluator=evaluator, reset_weight=False) evaluator=evaluator, reset_weight=False)
...@@ -51,7 +51,16 @@ class AutoCompressTaskGenerator(LotteryTicketTaskGenerator): ...@@ -51,7 +51,16 @@ class AutoCompressTaskGenerator(LotteryTicketTaskGenerator):
class AutoCompressPruner(IterativePruner): class AutoCompressPruner(IterativePruner):
""" r"""
For total iteration number :math:`N`, AutoCompressPruner prune the model that survive the previous iteration for a fixed sparsity ratio (e.g., :math:`1-{(1-0.8)}^{(1/N)}`) to achieve the overall sparsity (e.g., :math:`0.8`):
.. code-block:: bash
1. Generate sparsities distribution using SimulatedAnnealingPruner
2. Perform ADMM-based pruning to generate pruning result for the next iteration.
For more details, please refer to `AutoCompress: An Automatic DNN Structured Pruning Framework for Ultra-High Compression Rates <https://arxiv.org/abs/1907.03141>`__.
Parameters Parameters
---------- ----------
model : Module model : Module
...@@ -70,7 +79,7 @@ class AutoCompressPruner(IterativePruner): ...@@ -70,7 +79,7 @@ class AutoCompressPruner(IterativePruner):
The model will be trained or inferenced `training_epochs` epochs. The model will be trained or inferenced `training_epochs` epochs.
- traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer) - traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer)
The traced optimizer instance which the optimizer class is wrapped by nni.trace. The traced optimizer instance which the optimizer class is wrapped by nni.trace.
E.g. traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters()). E.g. ``traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters())``.
- criterion : Callable[[Tensor, Tensor], Tensor]. - criterion : Callable[[Tensor, Tensor], Tensor].
The criterion function used in trainer. Take model output and target value as input, and return the loss. The criterion function used in trainer. Take model output and target value as input, and return the loss.
- iterations : int. - iterations : int.
...@@ -103,15 +112,43 @@ class AutoCompressPruner(IterativePruner): ...@@ -103,15 +112,43 @@ class AutoCompressPruner(IterativePruner):
finetuner : Optional[Callable[[Module], None]] finetuner : Optional[Callable[[Module], None]]
The finetuner handles all finetune logic, takes a pytorch module as input. The finetuner handles all finetune logic, takes a pytorch module as input.
It will be called at the end of each iteration, usually for neutralizing the accuracy loss brought by the pruning in this iteration. It will be called at the end of each iteration, usually for neutralizing the accuracy loss brought by the pruning in this iteration.
speed_up : bool speedup : bool
If set True, speed up the model at the end of each iteration to make the pruned model compact. If set True, speedup the model at the end of each iteration to make the pruned model compact.
dummy_input : Optional[torch.Tensor] dummy_input : Optional[torch.Tensor]
If `speed_up` is True, `dummy_input` is required for tracing the model in speed up. If `speedup` is True, `dummy_input` is required for tracing the model in speedup.
Examples
--------
>>> import nni
>>> from nni.compression.pytorch.pruning import AutoCompressPruner
>>> model = ...
>>> config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
>>> # make sure you have used nni.trace to wrap the optimizer class before initialize
>>> traced_optimizer = nni.trace(torch.optim.Adam)(model.parameters())
>>> trainer = ...
>>> criterion = ...
>>> evaluator = ...
>>> finetuner = ...
>>> admm_params = {
>>> 'trainer': trainer,
>>> 'traced_optimizer': traced_optimizer,
>>> 'criterion': criterion,
>>> 'iterations': 10,
>>> 'training_epochs': 1
>>> }
>>> sa_params = {
>>> 'evaluator': evaluator
>>> }
>>> pruner = AutoCompressPruner(model, config_list, 10, admm_params, sa_params, finetuner=finetuner)
>>> pruner.compress()
>>> _, model, masks, _, _ = pruner.get_best_result()
The full script can be found :githublink:`here <examples/model_compress/pruning/auto_compress_pruner.py>`.
""" """
def __init__(self, model: Module, config_list: List[Dict], total_iteration: int, admm_params: Dict, def __init__(self, model: Module, config_list: List[Dict], total_iteration: int, admm_params: Dict,
sa_params: Dict, log_dir: str = '.', keep_intermediate_result: bool = False, sa_params: Dict, log_dir: str = '.', keep_intermediate_result: bool = False,
finetuner: Optional[Callable[[Module], None]] = None, speed_up: bool = False, finetuner: Optional[Callable[[Module], None]] = None, speedup: bool = False,
dummy_input: Optional[Tensor] = None, evaluator: Callable[[Module], float] = None): dummy_input: Optional[Tensor] = None, evaluator: Callable[[Module], float] = None):
task_generator = AutoCompressTaskGenerator(total_iteration=total_iteration, task_generator = AutoCompressTaskGenerator(total_iteration=total_iteration,
origin_model=model, origin_model=model,
...@@ -121,6 +158,21 @@ class AutoCompressPruner(IterativePruner): ...@@ -121,6 +158,21 @@ class AutoCompressPruner(IterativePruner):
keep_intermediate_result=keep_intermediate_result) keep_intermediate_result=keep_intermediate_result)
if 'traced_optimizer' in admm_params: if 'traced_optimizer' in admm_params:
admm_params['traced_optimizer'] = OptimizerConstructHelper.from_trace(model, admm_params['traced_optimizer']) admm_params['traced_optimizer'] = OptimizerConstructHelper.from_trace(model, admm_params['traced_optimizer'])
# granularity in ADMM stage will align with SA stage, if 'granularity' is not specify
if 'granularity' not in admm_params:
# only if level pruning and fine-grained admm pruning used in SA, fine-grained admm pruning will used in auto-compress
if 'pruning_algorithm' in sa_params:
sa_algo = sa_params['pruning_algorithm']
sa_algo_params = sa_params.get('pruning_params')
if sa_algo in ['level']:
admm_params['granularity'] = 'fine-grained'
elif sa_algo in ['admm'] and (sa_algo_params is not None) and not (sa_algo_params.get('granularity') == 'coarse-grained'):
admm_params['granularity'] = 'fine-grained'
else:
admm_params['granularity'] = 'coarse-grained'
else:
admm_params['granularity'] = 'fine-grained'
pruner = ADMMPruner(None, None, **admm_params) pruner = ADMMPruner(None, None, **admm_params)
super().__init__(pruner, task_generator, finetuner=finetuner, speed_up=speed_up, dummy_input=dummy_input, super().__init__(pruner, task_generator, finetuner=finetuner, speedup=speedup, dummy_input=dummy_input,
evaluator=evaluator, reset_weight=False) evaluator=evaluator, reset_weight=False)
...@@ -125,11 +125,14 @@ class BasicPruner(Pruner): ...@@ -125,11 +125,14 @@ class BasicPruner(Pruner):
class LevelPruner(BasicPruner): class LevelPruner(BasicPruner):
""" r"""
This is a basic pruner, and in some papers called it magnitude pruning or fine-grained pruning.
It will mask the smallest magnitude weights in each specified layer by a saprsity ratio configured in the config list.
Parameters Parameters
---------- ----------
model : torch.nn.Module model : torch.nn.Module
Model to be pruned Model to be pruned.
config_list : List[Dict] config_list : List[Dict]
Supported keys: Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed. - sparsity : This is to specify the sparsity for each layer in this config to be compressed.
...@@ -145,18 +148,18 @@ class LevelPruner(BasicPruner): ...@@ -145,18 +148,18 @@ class LevelPruner(BasicPruner):
operation an example, weight tensor will be split into sub block whose shape is aligned to operation an example, weight tensor will be split into sub block whose shape is aligned to
balance_gran. Then finegrained pruning will be applied internal of sub block. This sparsity balance_gran. Then finegrained pruning will be applied internal of sub block. This sparsity
pattern have more chance to achieve better trade-off between model performance and hardware pattern have more chance to achieve better trade-off between model performance and hardware
acceleration. Please refer to releated paper for further information 'Balanced Sparsity for acceleration. Please refer to releated paper for further information `Balanced Sparsity for
Efficient DNN Inference on GPU'(https://arxiv.org/pdf/1811.00206.pdf). Efficient DNN Inference on GPU <https://arxiv.org/pdf/1811.00206.pdf>`__.
balance_gran : list balance_gran : list
Balance_gran is for special sparse pattern balanced sparsity, Default value is None which means pruning Balance_gran is for special sparse pattern balanced sparsity, Default value is None which means pruning
without awaring balance, namely normal finegrained pruning. without awaring balance, namely normal finegrained pruning.
If passing list of int, LevelPruner will prune the model in the granularity of multi-dimension block. If passing list of int, LevelPruner will prune the model in the granularity of multi-dimension block.
Attention that the length of balance_gran should be smaller than tensor dimension. Attention that the length of balance_gran should be smaller than tensor dimension.
For instance, in Linear operation, length of balance_gran should be equal or smaller than two since For instance, in Linear operation, length of balance_gran should be equal or smaller than two since
dimension of pruning weight is two. If setting balbance_gran = [5, 5], sparsity = 0.6, pruner will dimension of pruning weight is two. If setting balbance_gran = [5, 5], sparsity = 0.6, pruner will
divide pruning parameters into multiple block with tile size (5,5) and each bank has 5 * 5 values divide pruning parameters into multiple block with tile size (5,5) and each bank has 5 * 5 values
and 10 values would be kept after pruning. Finegrained pruning is applied in the granularity of block and 10 values would be kept after pruning. Finegrained pruning is applied in the granularity of block
so that each block will kept same number of non-zero values after pruning. Such pruning method "balance" so that each block will kept same number of non-zero values after pruning. Such pruning method "balance"
the non-zero value in tensor which create chance for better hardware acceleration. the non-zero value in tensor which create chance for better hardware acceleration.
Note: If length of given balance_gran smaller than length of pruning tensor shape, it will be made up Note: If length of given balance_gran smaller than length of pruning tensor shape, it will be made up
...@@ -181,7 +184,16 @@ class LevelPruner(BasicPruner): ...@@ -181,7 +184,16 @@ class LevelPruner(BasicPruner):
pruning result: Weight tensor whose shape is [64, 64] will be split into 4 [32, 32] sub blocks. pruning result: Weight tensor whose shape is [64, 64] will be split into 4 [32, 32] sub blocks.
Each sub block will be pruned 256 values. Each sub block will be pruned 256 values.
Examples
--------
>>> model = ...
>>> from nni.compression.pytorch.pruning import LevelPruner
>>> config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
>>> pruner = LevelPruner(model, config_list)
>>> masked_model, masks = pruner.compress()
For detailed example please refer to :githublink:`examples/model_compress/pruning/level_pruning_torch.py <examples/model_compress/pruning/level_pruning_torch.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], mode: str = "normal", balance_gran: Optional[List] = None): def __init__(self, model: Module, config_list: List[Dict], mode: str = "normal", balance_gran: Optional[List] = None):
...@@ -215,7 +227,7 @@ class NormPruner(BasicPruner): ...@@ -215,7 +227,7 @@ class NormPruner(BasicPruner):
Parameters Parameters
---------- ----------
model : torch.nn.Module model : torch.nn.Module
Model to be pruned Model to be pruned.
config_list : List[Dict] config_list : List[Dict]
Supported keys: Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed. - sparsity : This is to specify the sparsity for each layer in this config to be compressed.
...@@ -272,11 +284,20 @@ class NormPruner(BasicPruner): ...@@ -272,11 +284,20 @@ class NormPruner(BasicPruner):
class L1NormPruner(NormPruner): class L1NormPruner(NormPruner):
""" r"""
L1 norm pruner computes the l1 norm of the layer weight on the first dimension,
then prune the weight blocks on this dimension with smaller l1 norm values.
i.e., compute the l1 norm of the filters in convolution layer as metric values,
compute the l1 norm of the weight by rows in linear layer as metric values.
For more details, please refer to `PRUNING FILTERS FOR EFFICIENT CONVNETS <https://arxiv.org/abs/1608.08710>`__.
In addition, L1 norm pruner also supports dependency-aware mode.
Parameters Parameters
---------- ----------
model : torch.nn.Module model : torch.nn.Module
Model to be pruned Model to be pruned.
config_list : List[Dict] config_list : List[Dict]
Supported keys: Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed. - sparsity : This is to specify the sparsity for each layer in this config to be compressed.
...@@ -305,16 +326,21 @@ class L1NormPruner(NormPruner): ...@@ -305,16 +326,21 @@ class L1NormPruner(NormPruner):
class L2NormPruner(NormPruner): class L2NormPruner(NormPruner):
""" r"""
L2 norm pruner is a variant of L1 norm pruner.
The only different between L2 norm pruner and L1 norm pruner is L2 norm pruner prunes the weight with the smallest L2 norm of the weights.
L2 norm pruner also supports dependency-aware mode.
Parameters Parameters
---------- ----------
model : torch.nn.Module model : torch.nn.Module
Model to be pruned Model to be pruned.
config_list : List[Dict] config_list : List[Dict]
Supported keys: Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed. - sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity. - sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in L1NormPruner. - op_types : Conv2d and Linear are supported in L2NormPruner.
- op_names : Operation names to be pruned. - op_names : Operation names to be pruned.
- op_partial_names: Operation partial names to be pruned, will be autocompleted by NNI. - op_partial_names: Operation partial names to be pruned, will be autocompleted by NNI.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning. - exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
...@@ -330,6 +356,16 @@ class L2NormPruner(NormPruner): ...@@ -330,6 +356,16 @@ class L2NormPruner(NormPruner):
dummy_input : Optional[torch.Tensor] dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model. should on the same device with the model.
Examples
--------
>>> model = ...
>>> from nni.compression.pytorch.pruning import L2NormPruner
>>> config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
>>> pruner = L2NormPruner(model, config_list)
>>> masked_model, masks = pruner.compress()
For detailed example please refer to :githublink:`examples/model_compress/pruning/norm_pruning_torch.py <examples/model_compress/pruning/norm_pruning_torch.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], def __init__(self, model: Module, config_list: List[Dict],
...@@ -338,11 +374,18 @@ class L2NormPruner(NormPruner): ...@@ -338,11 +374,18 @@ class L2NormPruner(NormPruner):
class FPGMPruner(BasicPruner): class FPGMPruner(BasicPruner):
""" r"""
FPGM pruner prunes the blocks of the weight on the first dimension with the smallest geometric median.
FPGM chooses the weight blocks with the most replaceable contribution.
For more details, please refer to `Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration <https://arxiv.org/abs/1811.00250>`__.
FPGM pruner also supports dependency-aware mode.
Parameters Parameters
---------- ----------
model : torch.nn.Module model : torch.nn.Module
Model to be pruned Model to be pruned.
config_list : List[Dict] config_list : List[Dict]
Supported keys: Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed. - sparsity : This is to specify the sparsity for each layer in this config to be compressed.
...@@ -363,6 +406,16 @@ class FPGMPruner(BasicPruner): ...@@ -363,6 +406,16 @@ class FPGMPruner(BasicPruner):
dummy_input : Optional[torch.Tensor] dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model. should on the same device with the model.
Examples
--------
>>> model = ...
>>> from nni.compression.pytorch.pruning import FPGMPruner
>>> config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
>>> pruner = FPGMPruner(model, config_list)
>>> masked_model, masks = pruner.compress()
For detailed example please refer to :githublink:`examples/model_compress/pruning/fpgm_pruning_torch.py <examples/model_compress/pruning/fpgm_pruning_torch.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], def __init__(self, model: Module, config_list: List[Dict],
...@@ -396,11 +449,16 @@ class FPGMPruner(BasicPruner): ...@@ -396,11 +449,16 @@ class FPGMPruner(BasicPruner):
class SlimPruner(BasicPruner): class SlimPruner(BasicPruner):
""" r"""
Slim pruner adds sparsity regularization on the scaling factors of batch normalization (BN) layers during training to identify unimportant channels.
The channels with small scaling factor values will be pruned.
For more details, please refer to `Learning Efficient Convolutional Networks through Network Slimming <https://arxiv.org/abs/1708.06519>`__\.
Parameters Parameters
---------- ----------
model : torch.nn.Module model : torch.nn.Module
Model to be pruned Model to be pruned.
config_list : List[Dict] config_list : List[Dict]
Supported keys: Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed. - sparsity : This is to specify the sparsity for each layer in this config to be compressed.
...@@ -432,7 +490,7 @@ class SlimPruner(BasicPruner): ...@@ -432,7 +490,7 @@ class SlimPruner(BasicPruner):
model.train(mode=training) model.train(mode=training)
traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer) traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer)
The traced optimizer instance which the optimizer class is wrapped by nni.trace. The traced optimizer instance which the optimizer class is wrapped by nni.trace.
E.g. traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters()). E.g. ``traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters())``.
criterion : Callable[[Tensor, Tensor], Tensor] criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss. The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_epochs : int training_epochs : int
...@@ -444,6 +502,21 @@ class SlimPruner(BasicPruner): ...@@ -444,6 +502,21 @@ class SlimPruner(BasicPruner):
If prune the model in a global way, all layer weights with same config will be considered uniformly. If prune the model in a global way, all layer weights with same config will be considered uniformly.
That means a single layer may not reach or exceed the sparsity setting in config, That means a single layer may not reach or exceed the sparsity setting in config,
but the total pruned weights meet the sparsity setting. but the total pruned weights meet the sparsity setting.
Examples
--------
>>> import nni
>>> from nni.compression.pytorch.pruning import SlimPruner
>>> model = ...
>>> # make sure you have used nni.trace to wrap the optimizer class before initialize
>>> traced_optimizer = nni.trace(torch.optim.Adam)(model.parameters())
>>> trainer = ...
>>> criterion = ...
>>> config_list = [{ 'sparsity': 0.8, 'op_types': ['BatchNorm2d'] }]
>>> pruner = SlimPruner(model, config_list, trainer, traced_optimizer, criterion, training_epochs=1)
>>> masked_model, masks = pruner.compress()
For detailed example please refer to :githublink:`examples/model_compress/pruning/slim_pruning_torch.py <examples/model_compress/pruning/slim_pruning_torch.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None], def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None],
...@@ -507,7 +580,7 @@ class ActivationPruner(BasicPruner): ...@@ -507,7 +580,7 @@ class ActivationPruner(BasicPruner):
Parameters Parameters
---------- ----------
model : torch.nn.Module model : torch.nn.Module
Model to be pruned Model to be pruned.
config_list : List[Dict] config_list : List[Dict]
Supported keys: Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed. - sparsity : This is to specify the sparsity for each layer in this config to be compressed.
...@@ -537,7 +610,7 @@ class ActivationPruner(BasicPruner): ...@@ -537,7 +610,7 @@ class ActivationPruner(BasicPruner):
model.train(mode=training) model.train(mode=training)
traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer) traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer)
The traced optimizer instance which the optimizer class is wrapped by nni.trace. The traced optimizer instance which the optimizer class is wrapped by nni.trace.
E.g. traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters()). E.g. ``traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters())``.
criterion : Callable[[Tensor, Tensor], Tensor] criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss. The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_batches training_batches
...@@ -627,6 +700,82 @@ class ActivationPruner(BasicPruner): ...@@ -627,6 +700,82 @@ class ActivationPruner(BasicPruner):
class ActivationAPoZRankPruner(ActivationPruner): class ActivationAPoZRankPruner(ActivationPruner):
r"""
Activation APoZ rank pruner is a pruner which prunes on the first weight dimension,
with the smallest importance criterion ``APoZ`` calculated from the output activations of convolution layers to achieve a preset level of network sparsity.
The pruning criterion ``APoZ`` is explained in the paper `Network Trimming: A Data-Driven Neuron Pruning Approach towards Efficient Deep Architectures <https://arxiv.org/abs/1607.03250>`__.
The APoZ is defined as:
:math:`APoZ_{c}^{(i)} = APoZ\left(O_{c}^{(i)}\right)=\frac{\sum_{k}^{N} \sum_{j}^{M} f\left(O_{c, j}^{(i)}(k)=0\right)}{N \times M}`
Activation APoZ rank pruner also supports dependency-aware mode.
Parameters
----------
model : torch.nn.Module
Model to be pruned.
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in ActivationAPoZRankPruner.
- op_names : Operation names to be pruned.
- op_partial_names: Operation partial names to be pruned, will be autocompleted by NNI.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
trainer : Callable[[Module, Optimizer, Callable], None]
A callable function used to train model or just inference. Take model, optimizer, criterion as input.
The model will be trained or inferenced `training_epochs` epochs.
Example::
def trainer(model: Module, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor]):
training = model.training
model.train(mode=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
# If you don't want to update the model, you can skip `optimizer.step()`, and set train mode False.
optimizer.step()
model.train(mode=training)
traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer)
The traced optimizer instance which the optimizer class is wrapped by nni.trace.
E.g. ``traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters())``..
criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_batches
The batch number used to collect activations.
mode : str
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the activation-based metrics and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
Examples
--------
>>> import nni
>>> from nni.compression.pytorch.pruning import ActivationAPoZRankPruner
>>> model = ...
>>> # make sure you have used nni.trace to wrap the optimizer class before initialize
>>> traced_optimizer = nni.trace(torch.optim.Adam)(model.parameters())
>>> trainer = ...
>>> criterion = ...
>>> config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
>>> pruner = ActivationAPoZRankPruner(model, config_list, trainer, traced_optimizer, criterion, training_batches=20)
>>> masked_model, masks = pruner.compress()
For detailed example please refer to :githublink:`examples/model_compress/pruning/activation_pruning_torch.py <examples/model_compress/pruning/activation_pruning_torch.py>`
"""
def _activation_trans(self, output: Tensor) -> Tensor: def _activation_trans(self, output: Tensor) -> Tensor:
# return a matrix that the position of zero in `output` is one, others is zero. # return a matrix that the position of zero in `output` is one, others is zero.
return torch.eq(self._activation(output.detach()), torch.zeros_like(output)).type_as(output) return torch.eq(self._activation(output.detach()), torch.zeros_like(output)).type_as(output)
...@@ -636,6 +785,80 @@ class ActivationAPoZRankPruner(ActivationPruner): ...@@ -636,6 +785,80 @@ class ActivationAPoZRankPruner(ActivationPruner):
class ActivationMeanRankPruner(ActivationPruner): class ActivationMeanRankPruner(ActivationPruner):
r"""
Activation mean rank pruner is a pruner which prunes on the first weight dimension,
with the smallest importance criterion ``mean activation`` calculated from the output activations of convolution layers to achieve a preset level of network sparsity.
The pruning criterion ``mean activation`` is explained in section 2.2 of the paper `Pruning Convolutional Neural Networks for Resource Efficient Inference <https://arxiv.org/abs/1611.06440>`__.
Activation mean rank pruner also supports dependency-aware mode.
Parameters
----------
model : torch.nn.Module
Model to be pruned.
config_list : List[Dict]
Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed.
- sparsity_per_layer : Equals to sparsity.
- op_types : Conv2d and Linear are supported in ActivationPruner.
- op_names : Operation names to be pruned.
- op_partial_names: Operation partial names to be pruned, will be autocompleted by NNI.
- exclude : Set True then the layers setting by op_types and op_names will be excluded from pruning.
trainer : Callable[[Module, Optimizer, Callable], None]
A callable function used to train model or just inference. Take model, optimizer, criterion as input.
The model will be trained or inferenced `training_epochs` epochs.
Example::
def trainer(model: Module, optimizer: Optimizer, criterion: Callable[[Tensor, Tensor], Tensor]):
training = model.training
model.train(mode=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
# If you don't want to update the model, you can skip `optimizer.step()`, and set train mode False.
optimizer.step()
model.train(mode=training)
traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer)
The traced optimizer instance which the optimizer class is wrapped by nni.trace.
E.g. ``traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters())``..
criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_batches
The batch number used to collect activations.
mode : str
'normal' or 'dependency_aware'.
If prune the model in a dependency-aware way, this pruner will
prune the model according to the activation-based metrics and the channel-dependency or
group-dependency of the model. In this way, the pruner will force the conv layers
that have dependencies to prune the same channels, so the speedup module can better
harvest the speed benefit from the pruned model. Note that, if set 'dependency_aware'
, the dummy_input cannot be None, because the pruner needs a dummy input to trace the
dependency between the conv layers.
dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model.
Examples
--------
>>> import nni
>>> from nni.compression.pytorch.pruning import ActivationMeanRankPruner
>>> model = ...
>>> # make sure you have used nni.trace to wrap the optimizer class before initialize
>>> traced_optimizer = nni.trace(torch.optim.Adam)(model.parameters())
>>> trainer = ...
>>> criterion = ...
>>> config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
>>> pruner = ActivationMeanRankPruner(model, config_list, trainer, traced_optimizer, criterion, training_batches=20)
>>> masked_model, masks = pruner.compress()
For detailed example please refer to :githublink:`examples/model_compress/pruning/activation_pruning_torch.py <examples/model_compress/pruning/activation_pruning_torch.py>`
"""
def _activation_trans(self, output: Tensor) -> Tensor: def _activation_trans(self, output: Tensor) -> Tensor:
# return the activation of `output` directly. # return the activation of `output` directly.
return self._activation(output.detach()) return self._activation(output.detach())
...@@ -645,11 +868,21 @@ class ActivationMeanRankPruner(ActivationPruner): ...@@ -645,11 +868,21 @@ class ActivationMeanRankPruner(ActivationPruner):
class TaylorFOWeightPruner(BasicPruner): class TaylorFOWeightPruner(BasicPruner):
""" r"""
Taylor FO weight pruner is a pruner which prunes on the first weight dimension,
based on estimated importance calculated from the first order taylor expansion on weights to achieve a preset level of network sparsity.
The estimated importance is defined as the paper `Importance Estimation for Neural Network Pruning <http://jankautz.com/publications/Importance4NNPruning_CVPR19.pdf>`__.
:math:`\widehat{\mathcal{I}}_{\mathcal{S}}^{(1)}(\mathbf{W}) \triangleq \sum_{s \in \mathcal{S}} \mathcal{I}_{s}^{(1)}(\mathbf{W})=\sum_{s \in \mathcal{S}}\left(g_{s} w_{s}\right)^{2}`
Taylor FO weight pruner also supports dependency-aware mode.
What's more, we provide a global-sort mode for this pruner which is aligned with paper implementation.
Parameters Parameters
---------- ----------
model : torch.nn.Module model : torch.nn.Module
Model to be pruned Model to be pruned.
config_list : List[Dict] config_list : List[Dict]
Supported keys: Supported keys:
- sparsity : This is to specify the sparsity for each layer in this config to be compressed. - sparsity : This is to specify the sparsity for each layer in this config to be compressed.
...@@ -681,7 +914,7 @@ class TaylorFOWeightPruner(BasicPruner): ...@@ -681,7 +914,7 @@ class TaylorFOWeightPruner(BasicPruner):
model.train(mode=training) model.train(mode=training)
traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer) traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer)
The traced optimizer instance which the optimizer class is wrapped by nni.trace. The traced optimizer instance which the optimizer class is wrapped by nni.trace.
E.g. traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters()). E.g. ``traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters())``.
criterion : Callable[[Tensor, Tensor], Tensor] criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss. The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_batches : int training_batches : int
...@@ -703,6 +936,21 @@ class TaylorFOWeightPruner(BasicPruner): ...@@ -703,6 +936,21 @@ class TaylorFOWeightPruner(BasicPruner):
dummy_input : Optional[torch.Tensor] dummy_input : Optional[torch.Tensor]
The dummy input to analyze the topology constraints. Note that, the dummy_input The dummy input to analyze the topology constraints. Note that, the dummy_input
should on the same device with the model. should on the same device with the model.
Examples
--------
>>> import nni
>>> from nni.compression.pytorch.pruning import TaylorFOWeightPruner
>>> model = ...
>>> # make sure you have used nni.trace to wrap the optimizer class before initialize
>>> traced_optimizer = nni.trace(torch.optim.Adam)(model.parameters())
>>> trainer = ...
>>> criterion = ...
>>> config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
>>> pruner = TaylorFOWeightPruner(model, config_list, trainer, traced_optimizer, criterion, training_batches=20)
>>> masked_model, masks = pruner.compress()
For detailed example please refer to :githublink:`examples/model_compress/pruning/taylorfo_pruning_torch.py <examples/model_compress/pruning/taylorfo_pruning_torch.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None], def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None],
...@@ -751,7 +999,7 @@ class TaylorFOWeightPruner(BasicPruner): ...@@ -751,7 +999,7 @@ class TaylorFOWeightPruner(BasicPruner):
return (weight_tensor.detach() * grad.detach()).data.pow(2) return (weight_tensor.detach() * grad.detach()).data.pow(2)
def reset_tools(self): def reset_tools(self):
hook_targets = {layer_info.name: layer_info.module.weight for layer_info, _ in self._detect_modules_to_compress()} hook_targets = {name: wrapper.weight for name, wrapper in self.get_modules_wrapper().items()}
collector_info = HookCollectorInfo(hook_targets, 'tensor', self._collector) collector_info = HookCollectorInfo(hook_targets, 'tensor', self._collector)
if self.data_collector is None: if self.data_collector is None:
self.data_collector = SingleHookTrainerBasedDataCollector(self, self.trainer, self.optimizer_helper, self.criterion, self.data_collector = SingleHookTrainerBasedDataCollector(self, self.trainer, self.optimizer_helper, self.criterion,
...@@ -772,13 +1020,17 @@ class TaylorFOWeightPruner(BasicPruner): ...@@ -772,13 +1020,17 @@ class TaylorFOWeightPruner(BasicPruner):
class ADMMPruner(BasicPruner): class ADMMPruner(BasicPruner):
""" r"""
ADMM (Alternating Direction Method of Multipliers) Pruner is a kind of mathematical optimization technique. Alternating Direction Method of Multipliers (ADMM) is a mathematical optimization technique,
The metric used in this pruner is the absolute value of the weight. by decomposing the original nonconvex problem into two subproblems that can be solved iteratively.
In each iteration, the weight with small magnitudes will be set to zero. In weight pruning problem, these two subproblems are solved via 1) gradient descent algorithm and 2) Euclidean projection respectively.
Only in the final iteration, the mask will be generated and apply to model wrapper.
During the process of solving these two subproblems, the weights of the original model will be changed.
Then a fine-grained pruning will be applied to prune the model according to the config list given.
The original paper refer to: https://arxiv.org/abs/1804.03294. This solution framework applies both to non-structured and different variations of structured pruning schemes.
For more details, please refer to `A Systematic DNN Weight Pruning Framework using Alternating Direction Method of Multipliers <https://arxiv.org/abs/1804.03294>`__.
Parameters Parameters
---------- ----------
...@@ -814,17 +1066,38 @@ class ADMMPruner(BasicPruner): ...@@ -814,17 +1066,38 @@ class ADMMPruner(BasicPruner):
model.train(mode=training) model.train(mode=training)
traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer) traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer)
The traced optimizer instance which the optimizer class is wrapped by nni.trace. The traced optimizer instance which the optimizer class is wrapped by nni.trace.
E.g. traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters()). E.g. ``traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters())``.
criterion : Callable[[Tensor, Tensor], Tensor] criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss. The criterion function used in trainer. Take model output and target value as input, and return the loss.
iterations : int iterations : int
The total iteration number in admm pruning algorithm. The total iteration number in admm pruning algorithm.
training_epochs : int training_epochs : int
The epoch number for training model in each iteration. The epoch number for training model in each iteration.
granularity : str
'fine-grained' or 'coarse-grained'.
If 'coarse-grained' is set, ADMM pruner will generate masks on output channels wise.
In original admm pruning paper, author implemented a fine-grained admm pruning.
In auto-compress paper, author used coarse-grained admm pruning.
Examples
--------
>>> import nni
>>> from nni.compression.pytorch.pruning import ADMMPruner
>>> model = ...
>>> # make sure you have used nni.trace to wrap the optimizer class before initialize
>>> traced_optimizer = nni.trace(torch.optim.Adam)(model.parameters())
>>> trainer = ...
>>> criterion = ...
>>> config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
>>> pruner = ADMMPruner(model, config_list, trainer, traced_optimizer, criterion, iterations=10, training_epochs=1)
>>> masked_model, masks = pruner.compress()
For detailed example please refer to :githublink:`examples/model_compress/pruning/admm_pruning_torch.py <examples/model_compress/pruning/admm_pruning_torch.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None], def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None],
traced_optimizer: Traceable, criterion: Callable[[Tensor, Tensor], Tensor], iterations: int, training_epochs: int): traced_optimizer: Traceable, criterion: Callable[[Tensor, Tensor], Tensor], iterations: int,
training_epochs: int, granularity: str = 'fine-grained'):
self.trainer = trainer self.trainer = trainer
if isinstance(traced_optimizer, OptimizerConstructHelper): if isinstance(traced_optimizer, OptimizerConstructHelper):
self.optimizer_helper = traced_optimizer self.optimizer_helper = traced_optimizer
...@@ -833,6 +1106,8 @@ class ADMMPruner(BasicPruner): ...@@ -833,6 +1106,8 @@ class ADMMPruner(BasicPruner):
self.criterion = criterion self.criterion = criterion
self.iterations = iterations self.iterations = iterations
self.training_epochs = training_epochs self.training_epochs = training_epochs
assert granularity in ['fine-grained', 'coarse-grained']
self.granularity = granularity
super().__init__(model, config_list) super().__init__(model, config_list)
def reset(self, model: Optional[Module], config_list: Optional[List[Dict]]): def reset(self, model: Optional[Module], config_list: Optional[List[Dict]]):
...@@ -864,9 +1139,15 @@ class ADMMPruner(BasicPruner): ...@@ -864,9 +1139,15 @@ class ADMMPruner(BasicPruner):
else: else:
self.data_collector.reset() self.data_collector.reset()
if self.metrics_calculator is None: if self.metrics_calculator is None:
self.metrics_calculator = NormMetricsCalculator() if self.granularity == 'fine-grained':
self.metrics_calculator = NormMetricsCalculator(p=1)
elif self.granularity == 'coarse-grained':
self.metrics_calculator = NormMetricsCalculator(dim=0, p=1)
if self.sparsity_allocator is None: if self.sparsity_allocator is None:
self.sparsity_allocator = NormalSparsityAllocator(self) if self.granularity == 'fine-grained':
self.sparsity_allocator = NormalSparsityAllocator(self)
elif self.granularity == 'coarse-grained':
self.sparsity_allocator = NormalSparsityAllocator(self, dim=0)
def compress(self) -> Tuple[Module, Dict]: def compress(self) -> Tuple[Module, Dict]:
""" """
......
...@@ -26,10 +26,10 @@ class PruningScheduler(BasePruningScheduler): ...@@ -26,10 +26,10 @@ class PruningScheduler(BasePruningScheduler):
finetuner finetuner
The finetuner handled all finetune logic, use a pytorch module as input. The finetuner handled all finetune logic, use a pytorch module as input.
It will be called at the end of each iteration if reset_weight is False, will be called at the beginning of each iteration otherwise. It will be called at the end of each iteration if reset_weight is False, will be called at the beginning of each iteration otherwise.
speed_up speedup
If set True, speed up the model at the end of each iteration to make the pruned model compact. If set True, speedup the model at the end of each iteration to make the pruned model compact.
dummy_input dummy_input
If `speed_up` is True, `dummy_input` is required for tracing the model in speed up. If `speedup` is True, `dummy_input` is required for tracing the model in speedup.
evaluator evaluator
Evaluate the pruned model and give a score. Evaluate the pruned model and give a score.
If evaluator is None, the best result refers to the latest result. If evaluator is None, the best result refers to the latest result.
...@@ -37,12 +37,12 @@ class PruningScheduler(BasePruningScheduler): ...@@ -37,12 +37,12 @@ class PruningScheduler(BasePruningScheduler):
If set True, the model weight will reset to the origin model weight at the end of each iteration step. If set True, the model weight will reset to the origin model weight at the end of each iteration step.
""" """
def __init__(self, pruner: Pruner, task_generator: TaskGenerator, finetuner: Callable[[Module], None] = None, def __init__(self, pruner: Pruner, task_generator: TaskGenerator, finetuner: Callable[[Module], None] = None,
speed_up: bool = False, dummy_input: Tensor = None, evaluator: Optional[Callable[[Module], float]] = None, speedup: bool = False, dummy_input: Tensor = None, evaluator: Optional[Callable[[Module], float]] = None,
reset_weight: bool = False): reset_weight: bool = False):
self.pruner = pruner self.pruner = pruner
self.task_generator = task_generator self.task_generator = task_generator
self.finetuner = finetuner self.finetuner = finetuner
self.speed_up = speed_up self.speedup = speedup
self.dummy_input = dummy_input self.dummy_input = dummy_input
self.evaluator = evaluator self.evaluator = evaluator
self.reset_weight = reset_weight self.reset_weight = reset_weight
...@@ -58,7 +58,7 @@ class PruningScheduler(BasePruningScheduler): ...@@ -58,7 +58,7 @@ class PruningScheduler(BasePruningScheduler):
def pruning_one_step_normal(self, task: Task) -> TaskResult: def pruning_one_step_normal(self, task: Task) -> TaskResult:
""" """
generate masks -> speed up -> finetune -> evaluate generate masks -> speedup -> finetune -> evaluate
""" """
model, masks, config_list = task.load_data() model, masks, config_list = task.load_data()
self.pruner.reset(model, config_list) self.pruner.reset(model, config_list)
...@@ -72,14 +72,14 @@ class PruningScheduler(BasePruningScheduler): ...@@ -72,14 +72,14 @@ class PruningScheduler(BasePruningScheduler):
self.pruner.show_pruned_weights() self.pruner.show_pruned_weights()
self.pruner._unwrap_model() self.pruner._unwrap_model()
# speed up # speedup
if self.speed_up and task.speed_up: if self.speedup and task.speedup:
ModelSpeedup(compact_model, self.dummy_input, pruner_generated_masks).speedup_model() ModelSpeedup(compact_model, self.dummy_input, pruner_generated_masks).speedup_model()
compact_model_masks = {} compact_model_masks = {}
# finetune # finetune
if self.finetuner is not None and task.finetune: if self.finetuner is not None and task.finetune:
if self.speed_up: if self.speedup:
self.finetuner(compact_model) self.finetuner(compact_model)
else: else:
self.pruner._wrap_model() self.pruner._wrap_model()
...@@ -88,7 +88,7 @@ class PruningScheduler(BasePruningScheduler): ...@@ -88,7 +88,7 @@ class PruningScheduler(BasePruningScheduler):
# evaluate # evaluate
if self.evaluator is not None and task.evaluate: if self.evaluator is not None and task.evaluate:
if self.speed_up: if self.speedup:
score = self.evaluator(compact_model) score = self.evaluator(compact_model)
else: else:
self.pruner._wrap_model() self.pruner._wrap_model()
...@@ -104,7 +104,7 @@ class PruningScheduler(BasePruningScheduler): ...@@ -104,7 +104,7 @@ class PruningScheduler(BasePruningScheduler):
def pruning_one_step_reset_weight(self, task: Task) -> TaskResult: def pruning_one_step_reset_weight(self, task: Task) -> TaskResult:
""" """
finetune -> generate masks -> reset weight -> speed up -> evaluate finetune -> generate masks -> reset weight -> speedup -> evaluate
""" """
model, masks, config_list = task.load_data() model, masks, config_list = task.load_data()
checkpoint = deepcopy(model.state_dict()) checkpoint = deepcopy(model.state_dict())
...@@ -126,14 +126,14 @@ class PruningScheduler(BasePruningScheduler): ...@@ -126,14 +126,14 @@ class PruningScheduler(BasePruningScheduler):
# reset model weight # reset model weight
compact_model.load_state_dict(checkpoint) compact_model.load_state_dict(checkpoint)
# speed up # speedup
if self.speed_up and task.speed_up: if self.speedup and task.speedup:
ModelSpeedup(compact_model, self.dummy_input, pruner_generated_masks).speedup_model() ModelSpeedup(compact_model, self.dummy_input, pruner_generated_masks).speedup_model()
compact_model_masks = {} compact_model_masks = {}
# evaluate # evaluate
if self.evaluator is not None and task.evaluate: if self.evaluator is not None and task.evaluate:
if self.speed_up: if self.speedup:
score = self.evaluator(compact_model) score = self.evaluator(compact_model)
else: else:
self.pruner._wrap_model() self.pruner._wrap_model()
......
...@@ -70,7 +70,11 @@ class IterativePruner(PruningScheduler): ...@@ -70,7 +70,11 @@ class IterativePruner(PruningScheduler):
class LinearPruner(IterativePruner): class LinearPruner(IterativePruner):
""" r"""
Linear pruner is an iterative pruner, it will increase sparsity evenly from scratch during each iteration.
For example, the final sparsity is set as 0.5, and the iteration number is 5, then the sparsity used in each iteration are ``[0, 0.1, 0.2, 0.3, 0.4, 0.5]``.
Parameters Parameters
---------- ----------
model : Module model : Module
...@@ -89,20 +93,31 @@ class LinearPruner(IterativePruner): ...@@ -89,20 +93,31 @@ class LinearPruner(IterativePruner):
finetuner : Optional[Callable[[Module], None]] finetuner : Optional[Callable[[Module], None]]
The finetuner handled all finetune logic, use a pytorch module as input. The finetuner handled all finetune logic, use a pytorch module as input.
It will be called at the end of each iteration, usually for neutralizing the accuracy loss brought by the pruning in this iteration. It will be called at the end of each iteration, usually for neutralizing the accuracy loss brought by the pruning in this iteration.
speed_up : bool speedup : bool
If set True, speed up the model at the end of each iteration to make the pruned model compact. If set True, speedup the model at the end of each iteration to make the pruned model compact.
dummy_input : Optional[torch.Tensor] dummy_input : Optional[torch.Tensor]
If `speed_up` is True, `dummy_input` is required for tracing the model in speed up. If `speedup` is True, `dummy_input` is required for tracing the model in speedup.
evaluator : Optional[Callable[[Module], float]] evaluator : Optional[Callable[[Module], float]]
Evaluate the pruned model and give a score. Evaluate the pruned model and give a score.
If evaluator is None, the best result refers to the latest result. If evaluator is None, the best result refers to the latest result.
pruning_params : Dict pruning_params : Dict
If the chosen pruning_algorithm has extra parameters, put them as a dict to pass in. If the chosen pruning_algorithm has extra parameters, put them as a dict to pass in.
Examples
--------
>>> from nni.compression.pytorch.pruning import LinearPruner
>>> config_list = [{'sparsity': 0.8, 'op_types': ['Conv2d']}]
>>> finetuner = ...
>>> pruner = LinearPruner(model, config_list, pruning_algorithm='l1', total_iteration=10, finetuner=finetuner)
>>> pruner.compress()
>>> _, model, masks, _, _ = pruner.get_best_result()
For detailed example please refer to :githublink:`examples/model_compress/pruning/iterative_pruning_torch.py <examples/model_compress/pruning/iterative_pruning_torch.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], pruning_algorithm: str, def __init__(self, model: Module, config_list: List[Dict], pruning_algorithm: str,
total_iteration: int, log_dir: str = '.', keep_intermediate_result: bool = False, total_iteration: int, log_dir: str = '.', keep_intermediate_result: bool = False,
finetuner: Optional[Callable[[Module], None]] = None, speed_up: bool = False, dummy_input: Optional[Tensor] = None, finetuner: Optional[Callable[[Module], None]] = None, speedup: bool = False, dummy_input: Optional[Tensor] = None,
evaluator: Optional[Callable[[Module], float]] = None, pruning_params: Dict = {}): evaluator: Optional[Callable[[Module], float]] = None, pruning_params: Dict = {}):
task_generator = LinearTaskGenerator(total_iteration=total_iteration, task_generator = LinearTaskGenerator(total_iteration=total_iteration,
origin_model=model, origin_model=model,
...@@ -112,12 +127,19 @@ class LinearPruner(IterativePruner): ...@@ -112,12 +127,19 @@ class LinearPruner(IterativePruner):
if 'traced_optimizer' in pruning_params: if 'traced_optimizer' in pruning_params:
pruning_params['traced_optimizer'] = OptimizerConstructHelper.from_trace(model, pruning_params['traced_optimizer']) pruning_params['traced_optimizer'] = OptimizerConstructHelper.from_trace(model, pruning_params['traced_optimizer'])
pruner = PRUNER_DICT[pruning_algorithm](None, None, **pruning_params) pruner = PRUNER_DICT[pruning_algorithm](None, None, **pruning_params)
super().__init__(pruner, task_generator, finetuner=finetuner, speed_up=speed_up, dummy_input=dummy_input, super().__init__(pruner, task_generator, finetuner=finetuner, speedup=speedup, dummy_input=dummy_input,
evaluator=evaluator, reset_weight=False) evaluator=evaluator, reset_weight=False)
class AGPPruner(IterativePruner): class AGPPruner(IterativePruner):
""" r"""
This is an iterative pruner, which the sparsity is increased from an initial sparsity value :math:`s_{i}` (usually 0) to a final sparsity value :math:`s_{f}` over a span of :math:`n` pruning iterations,
starting at training step :math:`t_{0}` and with pruning frequency :math:`\Delta t`:
:math:`s_{t}=s_{f}+\left(s_{i}-s_{f}\right)\left(1-\frac{t-t_{0}}{n \Delta t}\right)^{3} \text { for } t \in\left\{t_{0}, t_{0}+\Delta t, \ldots, t_{0} + n \Delta t\right\}`
For more details please refer to `To prune, or not to prune: exploring the efficacy of pruning for model compression <https://arxiv.org/abs/1710.01878>`__\.
Parameters Parameters
---------- ----------
model : Module model : Module
...@@ -136,20 +158,31 @@ class AGPPruner(IterativePruner): ...@@ -136,20 +158,31 @@ class AGPPruner(IterativePruner):
finetuner : Optional[Callable[[Module], None]] finetuner : Optional[Callable[[Module], None]]
The finetuner handled all finetune logic, use a pytorch module as input. The finetuner handled all finetune logic, use a pytorch module as input.
It will be called at the end of each iteration, usually for neutralizing the accuracy loss brought by the pruning in this iteration. It will be called at the end of each iteration, usually for neutralizing the accuracy loss brought by the pruning in this iteration.
speed_up : bool speedup : bool
If set True, speed up the model at the end of each iteration to make the pruned model compact. If set True, speedup the model at the end of each iteration to make the pruned model compact.
dummy_input : Optional[torch.Tensor] dummy_input : Optional[torch.Tensor]
If `speed_up` is True, `dummy_input` is required for tracing the model in speed up. If `speedup` is True, `dummy_input` is required for tracing the model in speedup.
evaluator : Optional[Callable[[Module], float]] evaluator : Optional[Callable[[Module], float]]
Evaluate the pruned model and give a score. Evaluate the pruned model and give a score.
If evaluator is None, the best result refers to the latest result. If evaluator is None, the best result refers to the latest result.
pruning_params : Dict pruning_params : Dict
If the chosen pruning_algorithm has extra parameters, put them as a dict to pass in. If the chosen pruning_algorithm has extra parameters, put them as a dict to pass in.
Examples
--------
>>> from nni.compression.pytorch.pruning import AGPPruner
>>> config_list = [{'sparsity': 0.8, 'op_types': ['Conv2d']}]
>>> finetuner = ...
>>> pruner = AGPPruner(model, config_list, pruning_algorithm='l1', total_iteration=10, finetuner=finetuner)
>>> pruner.compress()
>>> _, model, masks, _, _ = pruner.get_best_result()
For detailed example please refer to :githublink:`examples/model_compress/pruning/iterative_pruning_torch.py <examples/model_compress/pruning/iterative_pruning_torch.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], pruning_algorithm: str, def __init__(self, model: Module, config_list: List[Dict], pruning_algorithm: str,
total_iteration: int, log_dir: str = '.', keep_intermediate_result: bool = False, total_iteration: int, log_dir: str = '.', keep_intermediate_result: bool = False,
finetuner: Optional[Callable[[Module], None]] = None, speed_up: bool = False, dummy_input: Optional[Tensor] = None, finetuner: Optional[Callable[[Module], None]] = None, speedup: bool = False, dummy_input: Optional[Tensor] = None,
evaluator: Optional[Callable[[Module], float]] = None, pruning_params: Dict = {}): evaluator: Optional[Callable[[Module], float]] = None, pruning_params: Dict = {}):
task_generator = AGPTaskGenerator(total_iteration=total_iteration, task_generator = AGPTaskGenerator(total_iteration=total_iteration,
origin_model=model, origin_model=model,
...@@ -159,12 +192,30 @@ class AGPPruner(IterativePruner): ...@@ -159,12 +192,30 @@ class AGPPruner(IterativePruner):
if 'traced_optimizer' in pruning_params: if 'traced_optimizer' in pruning_params:
pruning_params['traced_optimizer'] = OptimizerConstructHelper.from_trace(model, pruning_params['traced_optimizer']) pruning_params['traced_optimizer'] = OptimizerConstructHelper.from_trace(model, pruning_params['traced_optimizer'])
pruner = PRUNER_DICT[pruning_algorithm](None, None, **pruning_params) pruner = PRUNER_DICT[pruning_algorithm](None, None, **pruning_params)
super().__init__(pruner, task_generator, finetuner=finetuner, speed_up=speed_up, dummy_input=dummy_input, super().__init__(pruner, task_generator, finetuner=finetuner, speedup=speedup, dummy_input=dummy_input,
evaluator=evaluator, reset_weight=False) evaluator=evaluator, reset_weight=False)
class LotteryTicketPruner(IterativePruner): class LotteryTicketPruner(IterativePruner):
""" r"""
`The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks <https://arxiv.org/abs/1803.03635>`__\ ,
authors Jonathan Frankle and Michael Carbin,provides comprehensive measurement and analysis,
and articulate the *lottery ticket hypothesis*\ : dense, randomly-initialized, feed-forward networks contain subnetworks (*winning tickets*\ ) that
-- when trained in isolation -- reach test accuracy comparable to the original network in a similar number of iterations.
In this paper, the authors use the following process to prune a model, called *iterative prunning*\ :
..
#. Randomly initialize a neural network f(x;theta_0) (where theta\ *0 follows D*\ {theta}).
#. Train the network for j iterations, arriving at parameters theta_j.
#. Prune p% of the parameters in theta_j, creating a mask m.
#. Reset the remaining parameters to their values in theta_0, creating the winning ticket f(x;m*theta_0).
#. Repeat step 2, 3, and 4.
If the configured final sparsity is P (e.g., 0.8) and there are n times iterative pruning,
each iterative pruning prunes 1-(1-P)^(1/n) of the weights that survive the previous round.
Parameters Parameters
---------- ----------
model : Module model : Module
...@@ -183,10 +234,10 @@ class LotteryTicketPruner(IterativePruner): ...@@ -183,10 +234,10 @@ class LotteryTicketPruner(IterativePruner):
finetuner : Optional[Callable[[Module], None]] finetuner : Optional[Callable[[Module], None]]
The finetuner handled all finetune logic, use a pytorch module as input. The finetuner handled all finetune logic, use a pytorch module as input.
It will be called at the end of each iteration if reset_weight is False, will be called at the beginning of each iteration otherwise. It will be called at the end of each iteration if reset_weight is False, will be called at the beginning of each iteration otherwise.
speed_up : bool speedup : bool
If set True, speed up the model at the end of each iteration to make the pruned model compact. If set True, speedup the model at the end of each iteration to make the pruned model compact.
dummy_input : Optional[torch.Tensor] dummy_input : Optional[torch.Tensor]
If `speed_up` is True, `dummy_input` is required for tracing the model in speed up. If `speedup` is True, `dummy_input` is required for tracing the model in speedup.
evaluator : Optional[Callable[[Module], float]] evaluator : Optional[Callable[[Module], float]]
Evaluate the pruned model and give a score. Evaluate the pruned model and give a score.
If evaluator is None, the best result refers to the latest result. If evaluator is None, the best result refers to the latest result.
...@@ -194,11 +245,23 @@ class LotteryTicketPruner(IterativePruner): ...@@ -194,11 +245,23 @@ class LotteryTicketPruner(IterativePruner):
If set True, the model weight will reset to the original model weight at the end of each iteration step. If set True, the model weight will reset to the original model weight at the end of each iteration step.
pruning_params : Dict pruning_params : Dict
If the chosen pruning_algorithm has extra parameters, put them as a dict to pass in. If the chosen pruning_algorithm has extra parameters, put them as a dict to pass in.
Examples
--------
>>> from nni.compression.pytorch.pruning import LotteryTicketPruner
>>> config_list = [{'sparsity': 0.8, 'op_types': ['Conv2d']}]
>>> finetuner = ...
>>> pruner = LotteryTicketPruner(model, config_list, pruning_algorithm='l1', total_iteration=10, finetuner=finetuner, reset_weight=True)
>>> pruner.compress()
>>> _, model, masks, _, _ = pruner.get_best_result()
For detailed example please refer to :githublink:`examples/model_compress/pruning/iterative_pruning_torch.py <examples/model_compress/pruning/iterative_pruning_torch.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], pruning_algorithm: str, def __init__(self, model: Module, config_list: List[Dict], pruning_algorithm: str,
total_iteration: int, log_dir: str = '.', keep_intermediate_result: bool = False, total_iteration: int, log_dir: str = '.', keep_intermediate_result: bool = False,
finetuner: Optional[Callable[[Module], None]] = None, speed_up: bool = False, dummy_input: Optional[Tensor] = None, finetuner: Optional[Callable[[Module], None]] = None, speedup: bool = False, dummy_input: Optional[Tensor] = None,
evaluator: Optional[Callable[[Module], float]] = None, reset_weight: bool = True, evaluator: Optional[Callable[[Module], float]] = None, reset_weight: bool = True,
pruning_params: Dict = {}): pruning_params: Dict = {}):
task_generator = LotteryTicketTaskGenerator(total_iteration=total_iteration, task_generator = LotteryTicketTaskGenerator(total_iteration=total_iteration,
...@@ -209,12 +272,25 @@ class LotteryTicketPruner(IterativePruner): ...@@ -209,12 +272,25 @@ class LotteryTicketPruner(IterativePruner):
if 'traced_optimizer' in pruning_params: if 'traced_optimizer' in pruning_params:
pruning_params['traced_optimizer'] = OptimizerConstructHelper.from_trace(model, pruning_params['traced_optimizer']) pruning_params['traced_optimizer'] = OptimizerConstructHelper.from_trace(model, pruning_params['traced_optimizer'])
pruner = PRUNER_DICT[pruning_algorithm](None, None, **pruning_params) pruner = PRUNER_DICT[pruning_algorithm](None, None, **pruning_params)
super().__init__(pruner, task_generator, finetuner=finetuner, speed_up=speed_up, dummy_input=dummy_input, super().__init__(pruner, task_generator, finetuner=finetuner, speedup=speedup, dummy_input=dummy_input,
evaluator=evaluator, reset_weight=reset_weight) evaluator=evaluator, reset_weight=reset_weight)
class SimulatedAnnealingPruner(IterativePruner): class SimulatedAnnealingPruner(IterativePruner):
""" """
We implement a guided heuristic search method, Simulated Annealing (SA) algorithm. As mentioned in the paper, this method is enhanced on guided search based on prior experience.
The enhanced SA technique is based on the observation that a DNN layer with more number of weights often has a higher degree of model compression with less impact on overall accuracy.
* Randomly initialize a pruning rate distribution (sparsities).
* While current_temperature < stop_temperature:
#. generate a perturbation to current distribution
#. Perform fast evaluation on the perturbated distribution
#. accept the perturbation according to the performance and probability, if not accepted, return to step 1
#. cool down, current_temperature <- current_temperature * cool_down_rate
For more details, please refer to `AutoCompress: An Automatic DNN Structured Pruning Framework for Ultra-High Compression Rates <https://arxiv.org/abs/1907.03141>`__.
Parameters Parameters
---------- ----------
model : Module model : Module
...@@ -242,16 +318,29 @@ class SimulatedAnnealingPruner(IterativePruner): ...@@ -242,16 +318,29 @@ class SimulatedAnnealingPruner(IterativePruner):
If keeping the intermediate result, including intermediate model and masks during each iteration. If keeping the intermediate result, including intermediate model and masks during each iteration.
finetuner : Optional[Callable[[Module], None]] finetuner : Optional[Callable[[Module], None]]
The finetuner handled all finetune logic, use a pytorch module as input, will be called in each iteration. The finetuner handled all finetune logic, use a pytorch module as input, will be called in each iteration.
speed_up : bool speedup : bool
If set True, speed up the model at the end of each iteration to make the pruned model compact. If set True, speedup the model at the end of each iteration to make the pruned model compact.
dummy_input : Optional[torch.Tensor] dummy_input : Optional[torch.Tensor]
If `speed_up` is True, `dummy_input` is required for tracing the model in speed up. If `speedup` is True, `dummy_input` is required for tracing the model in speedup.
Examples
--------
>>> from nni.compression.pytorch.pruning import SimulatedAnnealingPruner
>>> model = ...
>>> config_list = [{'sparsity': 0.8, 'op_types': ['Conv2d']}]
>>> evaluator = ...
>>> finetuner = ...
>>> pruner = SimulatedAnnealingPruner(model, config_list, pruning_algorithm='l1', evaluator=evaluator, cool_down_rate=0.9, finetuner=finetuner)
>>> pruner.compress()
>>> _, model, masks, _, _ = pruner.get_best_result()
For detailed example please refer to :githublink:`examples/model_compress/pruning/simulated_anealing_pruning_torch.py <examples/model_compress/pruning/simulated_anealing_pruning_torch.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], evaluator: Callable[[Module], float], start_temperature: float = 100, def __init__(self, model: Module, config_list: List[Dict], evaluator: Callable[[Module], float], start_temperature: float = 100,
stop_temperature: float = 20, cool_down_rate: float = 0.9, perturbation_magnitude: float = 0.35, stop_temperature: float = 20, cool_down_rate: float = 0.9, perturbation_magnitude: float = 0.35,
pruning_algorithm: str = 'level', pruning_params: Dict = {}, log_dir: str = '.', keep_intermediate_result: bool = False, pruning_algorithm: str = 'level', pruning_params: Dict = {}, log_dir: str = '.', keep_intermediate_result: bool = False,
finetuner: Optional[Callable[[Module], None]] = None, speed_up: bool = False, dummy_input: Optional[Tensor] = None): finetuner: Optional[Callable[[Module], None]] = None, speedup: bool = False, dummy_input: Optional[Tensor] = None):
task_generator = SimulatedAnnealingTaskGenerator(origin_model=model, task_generator = SimulatedAnnealingTaskGenerator(origin_model=model,
origin_config_list=config_list, origin_config_list=config_list,
start_temperature=start_temperature, start_temperature=start_temperature,
...@@ -263,5 +352,5 @@ class SimulatedAnnealingPruner(IterativePruner): ...@@ -263,5 +352,5 @@ class SimulatedAnnealingPruner(IterativePruner):
if 'traced_optimizer' in pruning_params: if 'traced_optimizer' in pruning_params:
pruning_params['traced_optimizer'] = OptimizerConstructHelper.from_trace(model, pruning_params['traced_optimizer']) pruning_params['traced_optimizer'] = OptimizerConstructHelper.from_trace(model, pruning_params['traced_optimizer'])
pruner = PRUNER_DICT[pruning_algorithm](None, None, **pruning_params) pruner = PRUNER_DICT[pruning_algorithm](None, None, **pruning_params)
super().__init__(pruner, task_generator, finetuner=finetuner, speed_up=speed_up, dummy_input=dummy_input, super().__init__(pruner, task_generator, finetuner=finetuner, speedup=speedup, dummy_input=dummy_input,
evaluator=evaluator, reset_weight=False) evaluator=evaluator, reset_weight=False)
...@@ -10,7 +10,7 @@ from torch import autograd, Tensor ...@@ -10,7 +10,7 @@ from torch import autograd, Tensor
from torch.nn import Module, Parameter from torch.nn import Module, Parameter
from torch.optim import Optimizer, Adam from torch.optim import Optimizer, Adam
from nni.algorithms.compression.v2.pytorch.base.compressor import Compressor, _setattr, LayerInfo from nni.algorithms.compression.v2.pytorch.base import PrunerModuleWrapper, LayerInfo
from nni.algorithms.compression.v2.pytorch.pruning.basic_pruner import BasicPruner, NORMAL_SCHEMA, EXCLUDE_SCHEMA, INTERNAL_SCHEMA from nni.algorithms.compression.v2.pytorch.pruning.basic_pruner import BasicPruner, NORMAL_SCHEMA, EXCLUDE_SCHEMA, INTERNAL_SCHEMA
from nni.algorithms.compression.v2.pytorch.utils import CompressorSchema, OptimizerConstructHelper from nni.algorithms.compression.v2.pytorch.utils import CompressorSchema, OptimizerConstructHelper
from nni.common.serializer import Traceable from nni.common.serializer import Traceable
...@@ -25,7 +25,7 @@ from .tools import ( ...@@ -25,7 +25,7 @@ from .tools import (
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
class PrunerScoredModuleWrapper(Module): class PrunerScoredModuleWrapper(PrunerModuleWrapper):
""" """
Wrap a module to enable data parallel, forward method customization and buffer registeration. Wrap a module to enable data parallel, forward method customization and buffer registeration.
Different from `PrunerModuleWrapper`, `PrunerScoredModuleWrapper` will record the gradient. Different from `PrunerModuleWrapper`, `PrunerScoredModuleWrapper` will record the gradient.
...@@ -38,59 +38,16 @@ class PrunerScoredModuleWrapper(Module): ...@@ -38,59 +38,16 @@ class PrunerScoredModuleWrapper(Module):
The configurations that users specify for compression. The configurations that users specify for compression.
module_name module_name
The name of the module to compress, wrapper module shares same name. The name of the module to compress, wrapper module shares same name.
pruner
The pruner used to calculate mask.
""" """
def __init__(self, module: Module, module_name: str, config: Dict, pruner: Compressor): def __init__(self, module: Module, module_name: str, config: Dict):
super().__init__() super().__init__(module, module_name, config)
# origin layer information
self.module = module
self.name = module_name
# config and pruner
self.config = config
self.pruner = pruner
self.weight = Parameter(torch.empty(self.module.weight.size()))
self.weight_score = Parameter(torch.empty(self.weight.size())) self.weight_score = Parameter(torch.empty(self.weight.size()))
torch.nn.init.constant_(self.weight_score, val=0.0) torch.nn.init.constant_(self.weight_score, val=0.0)
# register buffer for mask
self.register_buffer("weight_mask", torch.ones(self.module.weight.shape))
if hasattr(self.module, 'bias') and self.module.bias is not None:
self.register_buffer("bias_mask", torch.ones(self.module.bias.shape))
self.bias = Parameter(torch.empty(self.module.bias.size()))
else:
self.register_buffer("bias_mask", None)
def _weight2buffer(self):
"""
When using this wrapper to inference, call `_weight2buffer()` to make original weight untrainable.
The best place to call this function is in `Pruner._wrap_model()`.
"""
self.weight.data = self.module.weight.data
delattr(self.module, 'weight')
self.module.register_buffer('weight', self.weight.data)
if hasattr(self.module, 'bias') and self.module.bias is not None:
self.bias.data = self.module.bias.data
delattr(self.module, 'bias')
self.module.register_buffer('bias', self.bias.data)
def _weight2parameter(self):
"""
When don't need to record score or need to export the model, call `_weight2parameter()` to make the original weight trainable.
The best place to call this function is in `Pruner._unwrap_model()`.
"""
delattr(self.module, 'weight')
self.module.weight = Parameter(torch.empty(self.weight.size()))
self.module.weight.data = torch.mul(self.weight, self.weight_mask)
if hasattr(self.module, 'bias') and self.module.bias is not None:
delattr(self.module, 'bias')
self.module.bias = Parameter(torch.empty(self.bias.size()))
self.module.bias.data = torch.mul(self.bias, self.bias_mask)
def forward(self, *inputs): def forward(self, *inputs):
# apply mask to weight, bias # apply mask to weight, bias
self.module.weight = torch.mul(self.weight, _StraightThrough.apply(self.weight_score, self.weight_mask)) # NOTE: I don't know why training getting slower and slower if only `self.weight_mask` without `detach_()`
self.module.weight = torch.mul(self.weight, _StraightThrough.apply(self.weight_score, self.weight_mask.detach_()))
if hasattr(self.module, 'bias') and self.module.bias is not None: if hasattr(self.module, 'bias') and self.module.bias is not None:
self.module.bias = torch.mul(self.bias, self.bias_mask) self.module.bias = torch.mul(self.bias, self.bias_mask)
return self.module(*inputs) return self.module(*inputs)
...@@ -119,12 +76,26 @@ class WeightScoreTrainerBasedDataCollector(TrainerBasedDataCollector): ...@@ -119,12 +76,26 @@ class WeightScoreTrainerBasedDataCollector(TrainerBasedDataCollector):
data = {} data = {}
for _, wrapper in self.compressor.get_modules_wrapper().items(): for _, wrapper in self.compressor.get_modules_wrapper().items():
data[wrapper.name] = wrapper.weight_score.data.clone().detach() data[wrapper.name] = wrapper.weight_score.data
return data return data
class MovementPruner(BasicPruner): class MovementPruner(BasicPruner):
""" r"""
Movement pruner is an implementation of movement pruning.
This is a "fine-pruning" algorithm, which means the masks may change during each fine-tuning step.
Each weight element will be scored by the opposite of the sum of the product of weight and its gradient during each step.
This means the weight elements moving towards zero will accumulate negative scores, the weight elements moving away from zero will accumulate positive scores.
The weight elements with low scores will be masked during inference.
The following figure from the paper shows the weight pruning by movement pruning.
.. image:: ../../../img/movement_pruning.png
:target: ../../../img/movement_pruning.png
:alt:
For more details, please refer to `Movement Pruning: Adaptive Sparsity by Fine-Tuning <https://arxiv.org/abs/2005.07683>`__.
Parameters Parameters
---------- ----------
model : torch.nn.Module model : torch.nn.Module
...@@ -158,7 +129,7 @@ class MovementPruner(BasicPruner): ...@@ -158,7 +129,7 @@ class MovementPruner(BasicPruner):
model.train(mode=training) model.train(mode=training)
traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer) traced_optimizer : nni.common.serializer.Traceable(torch.optim.Optimizer)
The traced optimizer instance which the optimizer class is wrapped by nni.trace. The traced optimizer instance which the optimizer class is wrapped by nni.trace.
E.g. traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters()). E.g. ``traced_optimizer = nni.trace(torch.nn.Adam)(model.parameters())``.
criterion : Callable[[Tensor, Tensor], Tensor] criterion : Callable[[Tensor, Tensor], Tensor]
The criterion function used in trainer. Take model output and target value as input, and return the loss. The criterion function used in trainer. Take model output and target value as input, and return the loss.
training_epochs : int training_epochs : int
...@@ -171,6 +142,21 @@ class MovementPruner(BasicPruner): ...@@ -171,6 +142,21 @@ class MovementPruner(BasicPruner):
The number of steps at which sparsity stops growing, note that the sparsity stop growing doesn't mean masks not changed. The number of steps at which sparsity stops growing, note that the sparsity stop growing doesn't mean masks not changed.
The sparsity after each `optimizer.step()` is: The sparsity after each `optimizer.step()` is:
total_sparsity * (1 - (1 - (current_step - warm_up_step) / (cool_down_beginning_step - warm_up_step)) ** 3). total_sparsity * (1 - (1 - (current_step - warm_up_step) / (cool_down_beginning_step - warm_up_step)) ** 3).
Examples
--------
>>> import nni
>>> from nni.compression.pytorch.pruning import MovementPruner
>>> model = ...
>>> # make sure you have used nni.trace to wrap the optimizer class before initialize
>>> traced_optimizer = nni.trace(torch.optim.Adam)(model.parameters())
>>> trainer = ...
>>> criterion = ...
>>> config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
>>> pruner = MovementPruner(model, config_list, trainer, traced_optimizer, criterion, 10, 3000, 27000)
>>> masked_model, masks = pruner.compress()
For detailed example please refer to :githublink:`examples/model_compress/pruning/movement_pruning_glue.py <examples/model_compress/pruning/movement_pruning_glue.py>`
""" """
def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None], def __init__(self, model: Module, config_list: List[Dict], trainer: Callable[[Module, Optimizer, Callable], None],
traced_optimizer: Traceable, criterion: Callable[[Tensor, Tensor], Tensor], training_epochs: int, warm_up_step: int, traced_optimizer: Traceable, criterion: Callable[[Tensor, Tensor], Tensor], training_epochs: int, warm_up_step: int,
...@@ -230,28 +216,6 @@ class MovementPruner(BasicPruner): ...@@ -230,28 +216,6 @@ class MovementPruner(BasicPruner):
else: else:
self.data_collector.reset() self.data_collector.reset()
def _wrap_model(self):
"""
Wrap all modules that needed to be compressed.
Different from the parent function, call `wrapper._weight2buffer()` after replace the origin module to wrapper.
"""
if not self.is_wrapped:
for _, wrapper in reversed(self.get_modules_wrapper().items()):
_setattr(self.bound_model, wrapper.name, wrapper)
wrapper._weight2buffer()
self.is_wrapped = True
def _unwrap_model(self):
"""
Unwrap all modules that needed to be compressed.
Different from the parent function, call `wrapper._weight2parameter()` after replace the wrapper to origin module.
"""
if self.is_wrapped:
for _, wrapper in self.get_modules_wrapper().items():
_setattr(self.bound_model, wrapper.name, wrapper.module)
wrapper._weight2parameter()
self.is_wrapped = False
def _wrap_modules(self, layer: LayerInfo, config: Dict): def _wrap_modules(self, layer: LayerInfo, config: Dict):
""" """
Create a wrapper module to replace the original one. Create a wrapper module to replace the original one.
...@@ -265,21 +229,12 @@ class MovementPruner(BasicPruner): ...@@ -265,21 +229,12 @@ class MovementPruner(BasicPruner):
The configuration for generating the mask. The configuration for generating the mask.
""" """
_logger.debug("Module detected to compress : %s.", layer.name) _logger.debug("Module detected to compress : %s.", layer.name)
wrapper = PrunerScoredModuleWrapper(layer.module, layer.name, config, self) wrapper = PrunerScoredModuleWrapper(layer.module, layer.name, config)
assert hasattr(layer.module, 'weight'), "module %s does not have 'weight' attribute" % layer.name assert hasattr(layer.module, 'weight'), "module %s does not have 'weight' attribute" % layer.name
# move newly registered buffers to the same device of weight # move newly registered buffers to the same device of weight
wrapper.to(layer.module.weight.device) wrapper.to(layer.module.weight.device)
return wrapper return wrapper
def get_origin2wrapped_parameter_name_map(self) -> Dict[str, str]:
if self.is_wrapped:
self._unwrap_model()
parameter_name_map = {name: name for name, _ in self.bound_model.named_parameters()}
self._wrap_model()
return parameter_name_map
else:
raise Exception('When only the model is wrapped can get the parameter_name_map.')
def compress(self) -> Tuple[Module, Dict]: def compress(self) -> Tuple[Module, Dict]:
# sparsity grow from 0 # sparsity grow from 0
for _, wrapper in self.get_modules_wrapper().items(): for _, wrapper in self.get_modules_wrapper().items():
......
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from .base import ( from .base import (
HookCollectorInfo, HookCollectorInfo,
DataCollector, DataCollector,
......
...@@ -22,15 +22,14 @@ _logger = logging.getLogger(__name__) ...@@ -22,15 +22,14 @@ _logger = logging.getLogger(__name__)
class DataCollector: class DataCollector:
""" """
An abstract class for collect the data needed by the compressor. An abstract class for collect the data needed by the compressor.
Parameters
----------
compressor
The compressor binded with this DataCollector.
""" """
def __init__(self, compressor: Compressor): def __init__(self, compressor: Compressor):
"""
Parameters
----------
compressor
The compressor binded with this DataCollector.
"""
self.compressor = compressor self.compressor = compressor
def reset(self): def reset(self):
...@@ -242,42 +241,43 @@ class TrainerBasedDataCollector(DataCollector): ...@@ -242,42 +241,43 @@ class TrainerBasedDataCollector(DataCollector):
class MetricsCalculator: class MetricsCalculator:
""" """
An abstract class for calculate a kind of metrics of the given data. An abstract class for calculate a kind of metrics of the given data.
"""
def __init__(self, dim: Optional[Union[int, List[int]]] = None,
block_sparse_size: Optional[Union[int, List[int]]] = None):
"""
Parameters
----------
dim
The dimensions that corresponding to the under pruning weight dimensions in collected data.
None means one-to-one correspondence between pruned dimensions and data, which equal to set `dim` as all data dimensions.
Only these `dim` will be kept and other dimensions of the data will be reduced.
Example: Parameters
----------
dim
The dimensions that corresponding to the under pruning weight dimensions in collected data.
None means one-to-one correspondence between pruned dimensions and data, which equal to set `dim` as all data dimensions.
Only these `dim` will be kept and other dimensions of the data will be reduced.
If you want to prune the Conv2d weight in filter level, and the weight size is (32, 16, 3, 3) [out-channel, in-channel, kernal-size-1, kernal-size-2]. Example:
Then the under pruning dimensions is [0], which means you want to prune the filter or out-channel.
Case 1: Directly collect the conv module weight as data to calculate the metric. If you want to prune the Conv2d weight in filter level, and the weight size is (32, 16, 3, 3) [out-channel, in-channel, kernal-size-1, kernal-size-2].
Then the data has size (32, 16, 3, 3). Then the under pruning dimensions is [0], which means you want to prune the filter or out-channel.
Mention that the dimension 0 of the data is corresponding to the under pruning weight dimension 0.
So in this case, `dim=0` will set in `__init__`.
Case 2: Use the output of the conv module as data to calculate the metric. Case 1: Directly collect the conv module weight as data to calculate the metric.
Then the data has size (batch_num, 32, feature_map_size_1, feature_map_size_2). Then the data has size (32, 16, 3, 3).
Mention that the dimension 1 of the data is corresponding to the under pruning weight dimension 0. Mention that the dimension 0 of the data is corresponding to the under pruning weight dimension 0.
So in this case, `dim=1` will set in `__init__`. So in this case, `dim=0` will set in `__init__`.
In both of these two case, the metric of this module has size (32,). Case 2: Use the output of the conv module as data to calculate the metric.
block_sparse_size Then the data has size (batch_num, 32, feature_map_size_1, feature_map_size_2).
This used to describe the block size a metric value represented. By default, None means the block size is ones(len(dim)). Mention that the dimension 1 of the data is corresponding to the under pruning weight dimension 0.
Make sure len(dim) == len(block_sparse_size), and the block_sparse_size dimension position is corresponding to dim. So in this case, `dim=1` will set in `__init__`.
Example: In both of these two case, the metric of this module has size (32,).
The under pruning weight size is (768, 768), and you want to apply a block sparse on dim=[0] with block size [64, 768], block_sparse_size
then you can set block_sparse_size=[64]. The final metric size is (12,). This used to describe the block size a metric value represented. By default, None means the block size is ones(len(dim)).
""" Make sure len(dim) == len(block_sparse_size), and the block_sparse_size dimension position is corresponding to dim.
Example:
The under pruning weight size is (768, 768), and you want to apply a block sparse on dim=[0] with block size [64, 768],
then you can set block_sparse_size=[64]. The final metric size is (12,).
"""
def __init__(self, dim: Optional[Union[int, List[int]]] = None,
block_sparse_size: Optional[Union[int, List[int]]] = None):
self.dim = dim if not isinstance(dim, int) else [dim] self.dim = dim if not isinstance(dim, int) else [dim]
self.block_sparse_size = block_sparse_size if not isinstance(block_sparse_size, int) else [block_sparse_size] self.block_sparse_size = block_sparse_size if not isinstance(block_sparse_size, int) else [block_sparse_size]
if self.block_sparse_size is not None: if self.block_sparse_size is not None:
...@@ -307,36 +307,35 @@ class MetricsCalculator: ...@@ -307,36 +307,35 @@ class MetricsCalculator:
class SparsityAllocator: class SparsityAllocator:
""" """
An abstract class for allocate mask based on metrics. An abstract class for allocate mask based on metrics.
Parameters
----------
pruner
The pruner that binded with this `SparsityAllocator`.
dim
The under pruning weight dimensions, which metric size should equal to the under pruning weight size on these dimensions.
None means one-to-one correspondence between pruned dimensions and metric, which equal to set `dim` as all under pruning weight dimensions.
The mask will expand to the weight size depend on `dim`.
Example:
The under pruning weight has size (2, 3, 4), and `dim=1` means the under pruning weight dimension is 1.
Then the metric should have a size (3,), i.e., `metric=[0.9, 0.1, 0.8]`.
Assuming by some kind of `SparsityAllocator` get the mask on weight dimension 1 `mask=[1, 0, 1]`,
then the dimension mask will expand to the final mask `[[[1, 1, 1, 1], [0, 0, 0, 0], [1, 1, 1, 1]], [[1, 1, 1, 1], [0, 0, 0, 0], [1, 1, 1, 1]]]`.
block_sparse_size
This used to describe the block size a metric value represented. By default, None means the block size is ones(len(dim)).
Make sure len(dim) == len(block_sparse_size), and the block_sparse_size dimension position is corresponding to dim.
Example:
The metric size is (12,), and block_sparse_size=[64], then the mask will expand to (768,) at first before expand with `dim`.
continuous_mask
Inherit the mask already in the wrapper if set True.
""" """
def __init__(self, pruner: Compressor, dim: Optional[Union[int, List[int]]] = None, def __init__(self, pruner: Compressor, dim: Optional[Union[int, List[int]]] = None,
block_sparse_size: Optional[Union[int, List[int]]] = None, continuous_mask: bool = True): block_sparse_size: Optional[Union[int, List[int]]] = None, continuous_mask: bool = True):
"""
Parameters
----------
pruner
The pruner that binded with this `SparsityAllocator`.
dim
The under pruning weight dimensions, which metric size should equal to the under pruning weight size on these dimensions.
None means one-to-one correspondence between pruned dimensions and metric, which equal to set `dim` as all under pruning weight dimensions.
The mask will expand to the weight size depend on `dim`.
Example:
The under pruning weight has size (2, 3, 4), and `dim=1` means the under pruning weight dimension is 1.
Then the metric should have a size (3,), i.e., `metric=[0.9, 0.1, 0.8]`.
Assuming by some kind of `SparsityAllocator` get the mask on weight dimension 1 `mask=[1, 0, 1]`,
then the dimension mask will expand to the final mask `[[[1, 1, 1, 1], [0, 0, 0, 0], [1, 1, 1, 1]], [[1, 1, 1, 1], [0, 0, 0, 0], [1, 1, 1, 1]]]`.
block_sparse_size
This used to describe the block size a metric value represented. By default, None means the block size is ones(len(dim)).
Make sure len(dim) == len(block_sparse_size), and the block_sparse_size dimension position is corresponding to dim.
Example:
The metric size is (12,), and block_sparse_size=[64], then the mask will expand to (768,) at first before expand with `dim`.
continuous_mask
Inherit the mask already in the wrapper if set True.
"""
self.pruner = pruner self.pruner = pruner
self.dim = dim if not isinstance(dim, int) else [dim] self.dim = dim if not isinstance(dim, int) else [dim]
self.block_sparse_size = block_sparse_size if not isinstance(block_sparse_size, int) else [block_sparse_size] self.block_sparse_size = block_sparse_size if not isinstance(block_sparse_size, int) else [block_sparse_size]
...@@ -385,7 +384,7 @@ class SparsityAllocator: ...@@ -385,7 +384,7 @@ class SparsityAllocator:
weight_mask = weight_mask.expand(expand_size).reshape(reshape_size) weight_mask = weight_mask.expand(expand_size).reshape(reshape_size)
wrapper = self.pruner.get_modules_wrapper()[name] wrapper = self.pruner.get_modules_wrapper()[name]
weight_size = wrapper.module.weight.data.size() weight_size = wrapper.weight.data.size()
if self.dim is None: if self.dim is None:
assert weight_mask.size() == weight_size assert weight_mask.size() == weight_size
......
...@@ -24,7 +24,7 @@ class WeightDataCollector(DataCollector): ...@@ -24,7 +24,7 @@ class WeightDataCollector(DataCollector):
def collect(self) -> Dict[str, Tensor]: def collect(self) -> Dict[str, Tensor]:
data = {} data = {}
for _, wrapper in self.compressor.get_modules_wrapper().items(): for _, wrapper in self.compressor.get_modules_wrapper().items():
data[wrapper.name] = wrapper.module.weight.data data[wrapper.name] = wrapper.weight.data
return data return data
...@@ -39,7 +39,7 @@ class WeightTrainerBasedDataCollector(TrainerBasedDataCollector): ...@@ -39,7 +39,7 @@ class WeightTrainerBasedDataCollector(TrainerBasedDataCollector):
data = {} data = {}
for _, wrapper in self.compressor.get_modules_wrapper().items(): for _, wrapper in self.compressor.get_modules_wrapper().items():
data[wrapper.name] = wrapper.module.weight.data data[wrapper.name] = wrapper.weight.data
return data return data
......
...@@ -19,7 +19,8 @@ class StraightMetricsCalculator(MetricsCalculator): ...@@ -19,7 +19,8 @@ class StraightMetricsCalculator(MetricsCalculator):
def calculate_metrics(self, data: Dict[str, Tensor]) -> Dict[str, Tensor]: def calculate_metrics(self, data: Dict[str, Tensor]) -> Dict[str, Tensor]:
metrics = {} metrics = {}
for name, tensor in data.items(): for name, tensor in data.items():
metrics[name] = tensor.clone().detach() # use inplace detach `detach_` here to avoid creating a new tensor
metrics[name] = tensor.clone().detach_()
return metrics return metrics
......
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from .agent import DDPG from .agent import DDPG
from .amc_env import AMCEnv from .amc_env import AMCEnv
...@@ -11,7 +11,7 @@ from torch import Tensor ...@@ -11,7 +11,7 @@ from torch import Tensor
from torch.nn import Module from torch.nn import Module
from nni.algorithms.compression.v2.pytorch.utils import config_list_canonical from nni.algorithms.compression.v2.pytorch.utils import config_list_canonical
from nni.compression.pytorch.utils.counter import count_flops_params from nni.compression.pytorch.utils import count_flops_params
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
......
...@@ -70,8 +70,7 @@ class BankSparsityAllocator(SparsityAllocator): ...@@ -70,8 +70,7 @@ class BankSparsityAllocator(SparsityAllocator):
# make up for balance_gran # make up for balance_gran
balance_gran = [1] * (n_dim - len(self.balance_gran)) + self.balance_gran balance_gran = [1] * (n_dim - len(self.balance_gran)) + self.balance_gran
for i, j in zip(metric.shape, balance_gran): for i, j in zip(metric.shape, balance_gran):
assert i % j == 0, 'Length of {} weight is not \ assert i % j == 0, 'Length of {} weight is not aligned with balance granularity'.format(name)
aligned with balance granularity'.format(name)
mask = torch.zeros(metric.shape).type_as(metric) mask = torch.zeros(metric.shape).type_as(metric)
loop_iters = [range(int(i / j)) for i, j in zip(metric.shape, balance_gran)] loop_iters = [range(int(i / j)) for i, j in zip(metric.shape, balance_gran)]
...@@ -132,7 +131,7 @@ class GlobalSparsityAllocator(SparsityAllocator): ...@@ -132,7 +131,7 @@ class GlobalSparsityAllocator(SparsityAllocator):
if self.continuous_mask: if self.continuous_mask:
metric = metric * self._compress_mask(wrapper.weight_mask) metric = metric * self._compress_mask(wrapper.weight_mask)
layer_weight_num = wrapper.module.weight.data.numel() layer_weight_num = wrapper.weight.data.numel()
total_weight_num += layer_weight_num total_weight_num += layer_weight_num
expend_times = int(layer_weight_num / metric.numel()) expend_times = int(layer_weight_num / metric.numel())
...@@ -160,7 +159,7 @@ class GlobalSparsityAllocator(SparsityAllocator): ...@@ -160,7 +159,7 @@ class GlobalSparsityAllocator(SparsityAllocator):
class Conv2dDependencyAwareAllocator(SparsityAllocator): class Conv2dDependencyAwareAllocator(SparsityAllocator):
""" """
A specify allocator for Conv2d with dependency aware. A specify allocator for Conv2d with dependency-aware.
""" """
def __init__(self, pruner: Pruner, dim: int, dummy_input: Any): def __init__(self, pruner: Pruner, dim: int, dummy_input: Any):
......
...@@ -239,7 +239,7 @@ class SimulatedAnnealingTaskGenerator(TaskGenerator): ...@@ -239,7 +239,7 @@ class SimulatedAnnealingTaskGenerator(TaskGenerator):
low_limit = 0 low_limit = 0
while True: while True:
# This is to speed up finding the legal sparsity. # This is to speedup finding the legal sparsity.
low_limit = (1 - low_limit) * 0.05 + low_limit low_limit = (1 - low_limit) * 0.05 + low_limit
random_sparsity = sorted(np.random.uniform(low_limit, 1, len(op_names))) random_sparsity = sorted(np.random.uniform(low_limit, 1, len(op_names)))
rescaled_sparsity = self._rescale_sparsity(random_sparsity, target_sparsity, op_names) rescaled_sparsity = self._rescale_sparsity(random_sparsity, target_sparsity, op_names)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment