Unverified Commit 358bdb18 authored by QuanluZhang's avatar QuanluZhang Committed by GitHub
Browse files

Dev compress refactor (#1690)

* refactor
parent 025e0b46
...@@ -9,13 +9,13 @@ You can easily compress a model with NNI compression. Take pruning for example, ...@@ -9,13 +9,13 @@ You can easily compress a model with NNI compression. Take pruning for example,
```python ```python
from nni.compression.torch import LevelPruner from nni.compression.torch import LevelPruner
config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }] config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
pruner = LevelPruner(config_list) pruner = LevelPruner(model, config_list)
pruner(model) pruner.compress()
``` ```
The 'default' op_type stands for the module types defined in [default_layers.py](https://github.com/microsoft/nni/blob/master/src/sdk/pynni/nni/compression/torch/default_layers.py) for pytorch. The 'default' op_type stands for the module types defined in [default_layers.py](https://github.com/microsoft/nni/blob/master/src/sdk/pynni/nni/compression/torch/default_layers.py) for pytorch.
Therefore ```{ 'sparsity': 0.8, 'op_types': ['default'] }```means that **all layers with specified op_types will be compressed with the same 0.8 sparsity**. When ```pruner(model)``` called, the model is compressed with masks and after that you can normally fine tune this model and **pruned weights won't be updated** which have been masked. Therefore ```{ 'sparsity': 0.8, 'op_types': ['default'] }```means that **all layers with specified op_types will be compressed with the same 0.8 sparsity**. When ```pruner.compress()``` called, the model is compressed with masks and after that you can normally fine tune this model and **pruned weights won't be updated** which have been masked.
## Then, make this automatic ## Then, make this automatic
...@@ -84,9 +84,9 @@ config_list_agp = [{'initial_sparsity': 0, 'final_sparsity': conv0_sparsity, ...@@ -84,9 +84,9 @@ config_list_agp = [{'initial_sparsity': 0, 'final_sparsity': conv0_sparsity,
{'initial_sparsity': 0, 'final_sparsity': conv1_sparsity, {'initial_sparsity': 0, 'final_sparsity': conv1_sparsity,
'start_epoch': 0, 'end_epoch': 3, 'start_epoch': 0, 'end_epoch': 3,
'frequency': 1,'op_name': 'conv1' },] 'frequency': 1,'op_name': 'conv1' },]
PRUNERS = {'level':LevelPruner(config_list_level)'agp':AGP_Pruner(config_list_agp)} PRUNERS = {'level':LevelPruner(model, config_list_level)'agp':AGP_Pruner(model, config_list_agp)}
pruner = PRUNERS(params['prune_method']['_name']) pruner = PRUNERS(params['prune_method']['_name'])
pruner(model) pruner.compress()
... # fine tuning ... # fine tuning
acc = evaluate(model) # evaluation acc = evaluate(model) # evaluation
nni.report_final_results(acc) nni.report_final_results(acc)
......
...@@ -25,8 +25,8 @@ Tensorflow code ...@@ -25,8 +25,8 @@ Tensorflow code
```python ```python
from nni.compression.tensorflow import LevelPruner from nni.compression.tensorflow import LevelPruner
config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }] config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
pruner = LevelPruner(config_list) pruner = LevelPruner(tf.get_default_graph(), config_list)
pruner(tf.get_default_graph()) pruner.compress()
``` ```
PyTorch code PyTorch code
...@@ -34,13 +34,13 @@ PyTorch code ...@@ -34,13 +34,13 @@ PyTorch code
```python ```python
from nni.compression.torch import LevelPruner from nni.compression.torch import LevelPruner
config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }] config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
pruner = LevelPruner(config_list) pruner = LevelPruner(model, config_list)
pruner(model) pruner.compress()
``` ```
You can use other compression algorithms in the package of `nni.compression`. The algorithms are implemented in both PyTorch and Tensorflow, under `nni.compression.torch` and `nni.compression.tensorflow` respectively. You can refer to [Pruner](./Pruner.md) and [Quantizer](./Quantizer.md) for detail description of supported algorithms. You can use other compression algorithms in the package of `nni.compression`. The algorithms are implemented in both PyTorch and Tensorflow, under `nni.compression.torch` and `nni.compression.tensorflow` respectively. You can refer to [Pruner](./Pruner.md) and [Quantizer](./Quantizer.md) for detail description of supported algorithms.
The function call `pruner(model)` receives user defined model (in Tensorflow the model can be obtained with `tf.get_default_graph()`, while in PyTorch the model is the defined model class), and the model is modified with masks inserted. Then when you run the model, the masks take effect. The masks can be adjusted at runtime by the algorithms. The function call `pruner.compress()` modifies user defined model (in Tensorflow the model can be obtained with `tf.get_default_graph()`, while in PyTorch the model is the defined model class), and the model is modified with masks inserted. Then when you run the model, the masks take effect. The masks can be adjusted at runtime by the algorithms.
When instantiate a compression algorithm, there is `config_list` passed in. We describe how to write this config below. When instantiate a compression algorithm, there is `config_list` passed in. We describe how to write this config below.
...@@ -111,20 +111,26 @@ If you want to write a new pruning algorithm, you can write a class that inherit ...@@ -111,20 +111,26 @@ If you want to write a new pruning algorithm, you can write a class that inherit
# nni.compression.tensorflow.Pruner with # nni.compression.tensorflow.Pruner with
# nni.compression.torch.Pruner # nni.compression.torch.Pruner
class YourPruner(nni.compression.tensorflow.Pruner): class YourPruner(nni.compression.tensorflow.Pruner):
def __init__(self, config_list): def __init__(self, model, config_list):
# suggest you to use the NNI defined spec for config """
super().__init__(config_list) Suggest you to use the NNI defined spec for config
"""
def bind_model(self, model): super().__init__(model, config_list)
# this func can be used to remember the model or its weights
# in member variables, for getting their values during training def calc_mask(self, layer, config):
pass """
Pruners should overload this method to provide mask for weight tensors.
def calc_mask(self, weight, config, **kwargs): The mask must have the same shape and type comparing to the weight.
# weight is the target weight tensor It will be applied with ``mul()`` operation on the weight.
# config is the selected dict object in config_list for this layer This method is effectively hooked to ``forward()`` method of the model.
# kwargs contains op, op_types, and op_name
# design your mask and return your mask Parameters
----------
layer: LayerInfo
calculate mask for ``layer``'s weight
config: dict
the configuration for generating the mask
"""
return your_mask return your_mask
# note for pytorch version, there is no sess in input arguments # note for pytorch version, there is no sess in input arguments
...@@ -133,16 +139,18 @@ class YourPruner(nni.compression.tensorflow.Pruner): ...@@ -133,16 +139,18 @@ class YourPruner(nni.compression.tensorflow.Pruner):
# note for pytorch version, there is no sess in input arguments # note for pytorch version, there is no sess in input arguments
def step(self, sess): def step(self, sess):
# can do some processing based on the model or weights binded """
# in the func bind_model Can do some processing based on the model or weights binded
in the func bind_model
"""
pass pass
``` ```
For the simplest algorithm, you only need to override `calc_mask`. It receives each layer's weight and selected configuration, as well as op information. You generate the mask for this weight in this function and return. Then NNI applies the mask for you. For the simplest algorithm, you only need to override ``calc_mask``. It receives the to-be-compressed layers one by one along with their compression configuration. You generate the mask for this weight in this function and return. Then NNI applies the mask for you.
Some algorithms generate mask based on training progress, i.e., epoch number. We provide `update_epoch` for the pruner to be aware of the training progress. Some algorithms generate mask based on training progress, i.e., epoch number. We provide `update_epoch` for the pruner to be aware of the training progress. It should be called at the beginning of each epoch.
Some algorithms may want global information for generating masks, for example, all weights of the model (for statistic information), model optimizer's information. NNI supports this requirement using `bind_model`. `bind_model` receives the complete model, thus, it could record any information (e.g., reference to weights) it cares about. Then `step` can process or update the information according to the algorithm. You can refer to [source code of built-in algorithms](https://github.com/microsoft/nni/tree/master/src/sdk/pynni/nni/compressors) for example implementations. Some algorithms may want global information for generating masks, for example, all weights of the model (for statistic information). Your can use `self.bound_model` in the Pruner class for accessing weights. If you also need optimizer's information (for example in Pytorch), you could override `__init__` to receive more arguments such as model's optimizer. Then `step` can process or update the information according to the algorithm. You can refer to [source code of built-in algorithms](https://github.com/microsoft/nni/tree/master/src/sdk/pynni/nni/compressors) for example implementations.
### Quantization algorithm ### Quantization algorithm
...@@ -154,20 +162,19 @@ The interface for customizing quantization algorithm is similar to that of pruni ...@@ -154,20 +162,19 @@ The interface for customizing quantization algorithm is similar to that of pruni
# nni.compression.tensorflow.Quantizer with # nni.compression.tensorflow.Quantizer with
# nni.compression.torch.Quantizer # nni.compression.torch.Quantizer
class YourQuantizer(nni.compression.tensorflow.Quantizer): class YourQuantizer(nni.compression.tensorflow.Quantizer):
def __init__(self, config_list): def __init__(self, model, config_list):
# suggest you to use the NNI defined spec for config """
super().__init__(config_list) Suggest you to use the NNI defined spec for config
"""
def bind_model(self, model): super().__init__(model, config_list)
# this func can be used to remember the model or its weights
# in member variables, for getting their values during training
pass
def quantize_weight(self, weight, config, **kwargs): def quantize_weight(self, weight, config, **kwargs):
# weight is the target weight tensor """
# config is the selected dict object in config_list for this layer weight is the target weight tensor
# kwargs contains op, op_types, and op_name config is the selected dict object in config_list for this layer
# design your quantizer and return new weight kwargs contains op, op_types, and op_name
design your quantizer and return new weight
"""
return new_weight return new_weight
# note for pytorch version, there is no sess in input arguments # note for pytorch version, there is no sess in input arguments
...@@ -176,8 +183,10 @@ class YourQuantizer(nni.compression.tensorflow.Quantizer): ...@@ -176,8 +183,10 @@ class YourQuantizer(nni.compression.tensorflow.Quantizer):
# note for pytorch version, there is no sess in input arguments # note for pytorch version, there is no sess in input arguments
def step(self, sess): def step(self, sess):
# can do some processing based on the model or weights binded """
# in the func bind_model Can do some processing based on the model or weights binded
in the func bind_model
"""
pass pass
``` ```
......
...@@ -13,16 +13,16 @@ Tensorflow code ...@@ -13,16 +13,16 @@ Tensorflow code
``` ```
from nni.compression.tensorflow import LevelPruner from nni.compression.tensorflow import LevelPruner
config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }] config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
pruner = LevelPruner(config_list) pruner = LevelPruner(model_graph, config_list)
pruner(model_graph) pruner.compress()
``` ```
PyTorch code PyTorch code
``` ```
from nni.compression.torch import LevelPruner from nni.compression.torch import LevelPruner
config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }] config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
pruner = LevelPruner(config_list) pruner = LevelPruner(model, config_list)
pruner(model) pruner.compress()
``` ```
#### User configuration for Level Pruner #### User configuration for Level Pruner
...@@ -53,8 +53,8 @@ config_list = [{ ...@@ -53,8 +53,8 @@ config_list = [{
'frequency': 1, 'frequency': 1,
'op_types': 'default' 'op_types': 'default'
}] }]
pruner = AGP_Pruner(config_list) pruner = AGP_Pruner(tf.get_default_graph(), config_list)
pruner(tf.get_default_graph()) pruner.compress()
``` ```
PyTorch code PyTorch code
```python ```python
...@@ -67,8 +67,8 @@ config_list = [{ ...@@ -67,8 +67,8 @@ config_list = [{
'frequency': 1, 'frequency': 1,
'op_types': 'default' 'op_types': 'default'
}] }]
pruner = AGP_Pruner(config_list) pruner = AGP_Pruner(model, config_list)
pruner(model) pruner.compress()
``` ```
Second, you should add code below to update epoch number when you finish one epoch in your training code. Second, you should add code below to update epoch number when you finish one epoch in your training code.
......
...@@ -8,11 +8,11 @@ We provide Naive Quantizer to quantizer weight to default 8 bits, you can use it ...@@ -8,11 +8,11 @@ We provide Naive Quantizer to quantizer weight to default 8 bits, you can use it
### Usage ### Usage
tensorflow tensorflow
```python ```python
nni.compressors.tensorflow.NaiveQuantizer()(model_graph) nni.compressors.tensorflow.NaiveQuantizer(model_graph).compress()
``` ```
pytorch pytorch
```python ```python
nni.compressors.torch.NaiveQuantizer()(model) nni.compressors.torch.NaiveQuantizer(model).compress()
``` ```
*** ***
...@@ -32,15 +32,15 @@ Tensorflow code ...@@ -32,15 +32,15 @@ Tensorflow code
```python ```python
from nni.compressors.tensorflow import QAT_Quantizer from nni.compressors.tensorflow import QAT_Quantizer
config_list = [{ 'q_bits': 8, 'op_types': ['default'] }] config_list = [{ 'q_bits': 8, 'op_types': ['default'] }]
quantizer = QAT_Quantizer(config_list) quantizer = QAT_Quantizer(tf.get_default_graph(), config_list)
quantizer(tf.get_default_graph()) quantizer.compress()
``` ```
PyTorch code PyTorch code
```python ```python
from nni.compressors.torch import QAT_Quantizer from nni.compressors.torch import QAT_Quantizer
config_list = [{ 'q_bits': 8, 'op_types': ['default'] }] config_list = [{ 'q_bits': 8, 'op_types': ['default'] }]
quantizer = QAT_Quantizer(config_list) quantizer = QAT_Quantizer(model, config_list)
quantizer(model) quantizer.compress()
``` ```
You can view example for more information You can view example for more information
...@@ -61,15 +61,15 @@ Tensorflow code ...@@ -61,15 +61,15 @@ Tensorflow code
```python ```python
from nni.compressors.tensorflow import DoReFaQuantizer from nni.compressors.tensorflow import DoReFaQuantizer
config_list = [{ 'q_bits': 8, 'op_types': 'default' }] config_list = [{ 'q_bits': 8, 'op_types': 'default' }]
quantizer = DoReFaQuantizer(config_list) quantizer = DoReFaQuantizer(tf.get_default_graph(), config_list)
quantizer(tf.get_default_graph()) quantizer.compress()
``` ```
PyTorch code PyTorch code
```python ```python
from nni.compressors.torch import DoReFaQuantizer from nni.compressors.torch import DoReFaQuantizer
config_list = [{ 'q_bits': 8, 'op_types': 'default' }] config_list = [{ 'q_bits': 8, 'op_types': 'default' }]
quantizer = DoReFaQuantizer(config_list) quantizer = DoReFaQuantizer(model, config_list)
quantizer(model) quantizer.compress()
``` ```
You can view example for more information You can view example for more information
......
...@@ -93,15 +93,13 @@ def main(): ...@@ -93,15 +93,13 @@ def main():
'frequency': 1, 'frequency': 1,
'op_types': ['default'] 'op_types': ['default']
}] }]
pruner = AGP_Pruner(configure_list) pruner = AGP_Pruner(tf.get_default_graph(), configure_list)
# if you want to load from yaml file # if you want to load from yaml file
# configure_file = nni.compressors.tf_compressor._nnimc_tf._tf_default_load_configure_file('configure_example.yaml','AGPruner') # configure_file = nni.compressors.tf_compressor._nnimc_tf._tf_default_load_configure_file('configure_example.yaml','AGPruner')
# configure_list = configure_file.get('config',[]) # configure_list = configure_file.get('config',[])
# pruner.load_configure(configure_list) # pruner.load_configure(configure_list)
# you can also handle it yourself and input an configure list in json # you can also handle it yourself and input an configure list in json
pruner(tf.get_default_graph()) pruner.compress()
# you can also use compress(model) or compress_default_graph() for tensorflow compressor
# pruner.compress(tf.get_default_graph())
with tf.Session() as sess: with tf.Session() as sess:
sess.run(tf.global_variables_initializer()) sess.run(tf.global_variables_initializer())
......
...@@ -83,8 +83,8 @@ def main(): ...@@ -83,8 +83,8 @@ def main():
DoReFaQuantizer(configure_list).compress(tf.get_default_graph()) DoReFaQuantizer(configure_list).compress(tf.get_default_graph())
''' '''
configure_list = [{'q_bits':8, 'op_types':['default']}] configure_list = [{'q_bits':8, 'op_types':['default']}]
quantizer = QAT_Quantizer(configure_list) quantizer = QAT_Quantizer(tf.get_default_graph(), configure_list)
quantizer(tf.get_default_graph()) quantizer.compress()
# you can also use compress(model) or compress_default_graph() # you can also use compress(model) or compress_default_graph()
# method like QATquantizer(q_bits = 8).compress_default_graph() # method like QATquantizer(q_bits = 8).compress_default_graph()
......
...@@ -79,8 +79,8 @@ def main(): ...@@ -79,8 +79,8 @@ def main():
'op_types': ['default'] 'op_types': ['default']
}] }]
pruner = AGP_Pruner(configure_list) pruner = AGP_Pruner(model, configure_list)
pruner(model) pruner.compress()
# you can also use compress(model) method # you can also use compress(model) method
# like that pruner.compress(model) # like that pruner.compress(model)
......
...@@ -69,8 +69,8 @@ def main(): ...@@ -69,8 +69,8 @@ def main():
DoReFaQuantizer(configure_list).compress(model) DoReFaQuantizer(configure_list).compress(model)
''' '''
configure_list = [{'q_bits':8, 'op_types':['default']}] configure_list = [{'q_bits':8, 'op_types':['default']}]
quantizer = QAT_Quantizer(configure_list) quantizer = QAT_Quantizer(model, configure_list)
quantizer(model) quantizer.compress()
# you can also use compress(model) method # you can also use compress(model) method
# like thaht quantizer.compress(model) # like thaht quantizer.compress(model)
......
...@@ -8,16 +8,18 @@ _logger = logging.getLogger(__name__) ...@@ -8,16 +8,18 @@ _logger = logging.getLogger(__name__)
class LevelPruner(Pruner): class LevelPruner(Pruner):
def __init__(self, config_list): def __init__(self, model, config_list):
""" """
config_list: supported keys: config_list: supported keys:
- sparsity - sparsity
""" """
super().__init__(config_list) super().__init__(model, config_list)
self.mask_list = {} self.mask_list = {}
self.if_init_list = {} self.if_init_list = {}
def calc_mask(self, weight, config, op_name, **kwargs): def calc_mask(self, layer, config):
weight = layer.weight
op_name = layer.name
if self.if_init_list.get(op_name, True): if self.if_init_list.get(op_name, True):
threshold = tf.contrib.distributions.percentile(tf.abs(weight), config['sparsity'] * 100) threshold = tf.contrib.distributions.percentile(tf.abs(weight), config['sparsity'] * 100)
mask = tf.cast(tf.math.greater(tf.abs(weight), threshold), weight.dtype) mask = tf.cast(tf.math.greater(tf.abs(weight), threshold), weight.dtype)
...@@ -38,7 +40,7 @@ class AGP_Pruner(Pruner): ...@@ -38,7 +40,7 @@ class AGP_Pruner(Pruner):
https://arxiv.org/pdf/1710.01878.pdf https://arxiv.org/pdf/1710.01878.pdf
""" """
def __init__(self, config_list): def __init__(self, model, config_list):
""" """
config_list: supported keys: config_list: supported keys:
- initial_sparsity - initial_sparsity
...@@ -47,13 +49,15 @@ class AGP_Pruner(Pruner): ...@@ -47,13 +49,15 @@ class AGP_Pruner(Pruner):
- end_epoch: end epoch number stop update mask - end_epoch: end epoch number stop update mask
- frequency: if you want update every 2 epoch, you can set it 2 - frequency: if you want update every 2 epoch, you can set it 2
""" """
super().__init__(config_list) super().__init__(model, config_list)
self.mask_list = {} self.mask_list = {}
self.if_init_list = {} self.if_init_list = {}
self.now_epoch = tf.Variable(0) self.now_epoch = tf.Variable(0)
self.assign_handler = [] self.assign_handler = []
def calc_mask(self, weight, config, op_name, **kwargs): def calc_mask(self, layer, config):
weight = layer.weight
op_name = layer.name
start_epoch = config.get('start_epoch', 0) start_epoch = config.get('start_epoch', 0)
freq = config.get('frequency', 1) freq = config.get('frequency', 1)
if self.now_epoch >= start_epoch and self.if_init_list.get(op_name, True) and ( if self.now_epoch >= start_epoch and self.if_init_list.get(op_name, True) and (
......
...@@ -10,8 +10,8 @@ _logger = logging.getLogger(__name__) ...@@ -10,8 +10,8 @@ _logger = logging.getLogger(__name__)
class NaiveQuantizer(Quantizer): class NaiveQuantizer(Quantizer):
"""quantize weight to 8 bits """quantize weight to 8 bits
""" """
def __init__(self, config_list): def __init__(self, model, config_list):
super().__init__(config_list) super().__init__(model, config_list)
self.layer_scale = {} self.layer_scale = {}
def quantize_weight(self, weight, config, op_name, **kwargs): def quantize_weight(self, weight, config, op_name, **kwargs):
...@@ -27,12 +27,12 @@ class QAT_Quantizer(Quantizer): ...@@ -27,12 +27,12 @@ class QAT_Quantizer(Quantizer):
Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference
http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf
""" """
def __init__(self, config_list): def __init__(self, model, config_list):
""" """
config_list: supported keys: config_list: supported keys:
- q_bits - q_bits
""" """
super().__init__(config_list) super().__init__(model, config_list)
def quantize_weight(self, weight, config, **kwargs): def quantize_weight(self, weight, config, **kwargs):
a = tf.stop_gradient(tf.reduce_min(weight)) a = tf.stop_gradient(tf.reduce_min(weight))
...@@ -52,12 +52,12 @@ class DoReFaQuantizer(Quantizer): ...@@ -52,12 +52,12 @@ class DoReFaQuantizer(Quantizer):
Zhou et al., DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients Zhou et al., DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients
(https://arxiv.org/abs/1606.06160) (https://arxiv.org/abs/1606.06160)
""" """
def __init__(self, config_list): def __init__(self, model, config_list):
""" """
config_list: supported keys: config_list: supported keys:
- q_bits - q_bits
""" """
super().__init__(config_list) super().__init__(model, config_list)
def quantize_weight(self, weight, config, **kwargs): def quantize_weight(self, weight, config, **kwargs):
a = tf.math.tanh(weight) a = tf.math.tanh(weight)
......
...@@ -6,67 +6,85 @@ _logger = logging.getLogger(__name__) ...@@ -6,67 +6,85 @@ _logger = logging.getLogger(__name__)
class LayerInfo: class LayerInfo:
def __init__(self, op): def __init__(self, op, weight, weight_op):
self.op = op self.op = op
self.name = op.name self.name = op.name
self.type = op.type self.type = op.type
self.weight = weight
self.weight_op = weight_op
class Compressor: class Compressor:
"""Abstract base TensorFlow compressor""" """
Abstract base TensorFlow compressor
"""
def __init__(self, config_list): def __init__(self, model, config_list):
self._bound_model = None """
self._config_list = config_list Record necessary info in class members
Parameters
----------
model : pytorch model
the model user wants to compress
config_list : list
the configurations that users specify for compression
"""
self.bound_model = model
self.config_list = config_list
self.modules_to_compress = []
def __call__(self, model): def compress(self):
"""Compress given graph with algorithm implemented by subclass.
The graph will be editted and returned.
""" """
self.compress(model) Compress the model with algorithm implemented by subclass.
return model
def compress(self, model): The model will be instrumented and user should never edit it after calling this method.
"""Compress given graph with algorithm implemented by subclass. `self.modules_to_compress` records all the to-be-compressed layers
This will edit the graph.
""" """
assert self._bound_model is None, "Each NNI compressor instance can only compress one model" for op in self.bound_model.get_operations():
self._bound_model = model weight_index = _detect_weight_index(op)
self.bind_model(model) if weight_index is None:
for op in model.get_operations(): _logger.warning('Failed to detect weight for layer %s', op.name)
layer = LayerInfo(op) return
config = self._select_config(layer) weight_op = op.inputs[weight_index].op
weight = weight_op.inputs[0]
layer = LayerInfo(op, weight, weight_op)
config = self.select_config(layer)
if config is not None: if config is not None:
self._instrument_layer(layer, config) self._instrument_layer(layer, config)
self.modules_to_compress.append((layer, config))
return self.bound_model
def compress_default_graph(self): def get_modules_to_compress(self):
"""Compress the default graph with algorithm implemented by subclass.
This will edit the default graph.
""" """
self.compress(tf.get_default_graph()) To obtain all the to-be-compressed layers.
def bind_model(self, model): Returns
"""This method is called when a model is bound to the compressor. -------
Compressors can optionally overload this method to do model-specific initialization. self.modules_to_compress : list
It is guaranteed that only one model will be bound to each compressor instance. a list of the layers, each of which is a tuple (`layer`, `config`),
`layer` is `LayerInfo`, `config` is a `dict`
""" """
return self.modules_to_compress
def update_epoch(self, epoch, sess): def select_config(self, layer):
"""If user want to update mask every epoch, user can override this method
""" """
Find the configuration for `layer` by parsing `self.config_list`
def step(self, sess):
"""If user want to update mask every step, user can override this method Parameters
----------
layer : LayerInfo
one layer
Returns
-------
ret : config or None
the retrieved configuration for this layer, if None, this layer should
not be compressed
""" """
def _instrument_layer(self, layer, config):
raise NotImplementedError()
def _select_config(self, layer):
ret = None ret = None
for config in self._config_list: for config in self.config_list:
op_types = config.get('op_types') op_types = config.get('op_types')
if op_types == 'default': if op_types == 'default':
op_types = default_layers.op_weight_index.keys() op_types = default_layers.op_weight_index.keys()
...@@ -79,35 +97,72 @@ class Compressor: ...@@ -79,35 +97,72 @@ class Compressor:
return None return None
return ret return ret
def update_epoch(self, epoch, sess):
"""
If user want to update model every epoch, user can override this method.
This method should be called at the beginning of each epoch
Parameters
----------
epoch : num
the current epoch number
"""
def step(self, sess):
"""
If user want to update mask every step, user can override this method
"""
def _instrument_layer(self, layer, config):
"""
This method is implemented in the subclasses, i.e., `Pruner` and `Quantizer`
Parameters
----------
layer : LayerInfo
the layer to instrument the compression operation
config : dict
the configuration for compressing this layer
"""
raise NotImplementedError()
class Pruner(Compressor): class Pruner(Compressor):
""" """
Abstract base TensorFlow pruner Abstract base TensorFlow pruner
""" """
def calc_mask(self, weight, config, op, op_type, op_name): def calc_mask(self, layer, config):
"""Pruners should overload this method to provide mask for weight tensors. """
Pruners should overload this method to provide mask for weight tensors.
The mask must have the same shape and type comparing to the weight. The mask must have the same shape and type comparing to the weight.
It will be applied with `multiply()` operation. It will be applied with `mul()` operation on the weight.
This method works as a subgraph which will be inserted into the bound model. This method is effectively hooked to `forward()` method of the model.
Parameters
----------
layer : LayerInfo
calculate mask for `layer`'s weight
config : dict
the configuration for generating the mask
""" """
raise NotImplementedError("Pruners must overload calc_mask()") raise NotImplementedError("Pruners must overload calc_mask()")
def _instrument_layer(self, layer, config): def _instrument_layer(self, layer, config):
# it seems the graph editor can only swap edges of nodes or remove all edges from a node """
# it cannot remove one edge from a node, nor can it assign a new edge to a node Create a wrapper forward function to replace the original one.
# we assume there is a proxy operation between the weight and the Conv2D layer
# this is true as long as the weight is `tf.Value` Parameters
# not sure what will happen if the weight is calculated from other operations ----------
weight_index = _detect_weight_index(layer) layer : LayerInfo
if weight_index is None: the layer to instrument the mask
_logger.warning('Failed to detect weight for layer %s', layer.name) config : dict
return the configuration for generating the mask
weight_op = layer.op.inputs[weight_index].op """
weight = weight_op.inputs[0] mask = self.calc_mask(layer, config)
mask = self.calc_mask(weight, config, op=layer.op, op_type=layer.type, op_name=layer.name) new_weight = layer.weight * mask
new_weight = weight * mask tf.contrib.graph_editor.swap_outputs(layer.weight_op, new_weight.op)
tf.contrib.graph_editor.swap_outputs(weight_op, new_weight.op)
class Quantizer(Compressor): class Quantizer(Compressor):
...@@ -133,7 +188,7 @@ def _detect_weight_index(layer): ...@@ -133,7 +188,7 @@ def _detect_weight_index(layer):
index = default_layers.op_weight_index.get(layer.type) index = default_layers.op_weight_index.get(layer.type)
if index is not None: if index is not None:
return index return index
weight_indices = [i for i, op in enumerate(layer.op.inputs) if op.name.endswith('Variable/read')] weight_indices = [i for i, op in enumerate(layer.inputs) if op.name.endswith('Variable/read')]
if len(weight_indices) == 1: if len(weight_indices) == 1:
return weight_indices[0] return weight_indices[0]
return None return None
...@@ -11,16 +11,18 @@ class LevelPruner(Pruner): ...@@ -11,16 +11,18 @@ class LevelPruner(Pruner):
"""Prune to an exact pruning level specification """Prune to an exact pruning level specification
""" """
def __init__(self, config_list): def __init__(self, model, config_list):
""" """
config_list: supported keys: config_list: supported keys:
- sparsity - sparsity
""" """
super().__init__(config_list) super().__init__(model, config_list)
self.mask_list = {} self.mask_list = {}
self.if_init_list = {} self.if_init_list = {}
def calc_mask(self, weight, config, op_name, **kwargs): def calc_mask(self, layer, config):
weight = layer.module.weight.data
op_name = layer.name
if self.if_init_list.get(op_name, True): if self.if_init_list.get(op_name, True):
w_abs = weight.abs() w_abs = weight.abs()
k = int(weight.numel() * config['sparsity']) k = int(weight.numel() * config['sparsity'])
...@@ -45,7 +47,7 @@ class AGP_Pruner(Pruner): ...@@ -45,7 +47,7 @@ class AGP_Pruner(Pruner):
https://arxiv.org/pdf/1710.01878.pdf https://arxiv.org/pdf/1710.01878.pdf
""" """
def __init__(self, config_list): def __init__(self, model, config_list):
""" """
config_list: supported keys: config_list: supported keys:
- initial_sparsity - initial_sparsity
...@@ -54,12 +56,14 @@ class AGP_Pruner(Pruner): ...@@ -54,12 +56,14 @@ class AGP_Pruner(Pruner):
- end_epoch: end epoch number stop update mask, you should make sure start_epoch <= end_epoch - end_epoch: end epoch number stop update mask, you should make sure start_epoch <= end_epoch
- frequency: if you want update every 2 epoch, you can set it 2 - frequency: if you want update every 2 epoch, you can set it 2
""" """
super().__init__(config_list) super().__init__(model, config_list)
self.mask_list = {} self.mask_list = {}
self.now_epoch = 0 self.now_epoch = 0
self.if_init_list = {} self.if_init_list = {}
def calc_mask(self, weight, config, op_name, **kwargs): def calc_mask(self, layer, config):
weight = layer.module.weight.data
op_name = layer.name
start_epoch = config.get('start_epoch', 0) start_epoch = config.get('start_epoch', 0)
freq = config.get('frequency', 1) freq = config.get('frequency', 1)
if self.now_epoch >= start_epoch and self.if_init_list.get(op_name, True) and ( if self.now_epoch >= start_epoch and self.if_init_list.get(op_name, True) and (
......
...@@ -10,8 +10,8 @@ logger = logging.getLogger(__name__) ...@@ -10,8 +10,8 @@ logger = logging.getLogger(__name__)
class NaiveQuantizer(Quantizer): class NaiveQuantizer(Quantizer):
"""quantize weight to 8 bits """quantize weight to 8 bits
""" """
def __init__(self, config_list): def __init__(self, model, config_list):
super().__init__(config_list) super().__init__(model, config_list)
self.layer_scale = {} self.layer_scale = {}
def quantize_weight(self, weight, config, op_name, **kwargs): def quantize_weight(self, weight, config, op_name, **kwargs):
...@@ -27,12 +27,12 @@ class QAT_Quantizer(Quantizer): ...@@ -27,12 +27,12 @@ class QAT_Quantizer(Quantizer):
Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference
http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf
""" """
def __init__(self, config_list): def __init__(self, model, config_list):
""" """
config_list: supported keys: config_list: supported keys:
- q_bits - q_bits
""" """
super().__init__(config_list) super().__init__(model, config_list)
def quantize_weight(self, weight, config, **kwargs): def quantize_weight(self, weight, config, **kwargs):
if config['q_bits'] <= 1: if config['q_bits'] <= 1:
...@@ -53,12 +53,12 @@ class DoReFaQuantizer(Quantizer): ...@@ -53,12 +53,12 @@ class DoReFaQuantizer(Quantizer):
Zhou et al., DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients Zhou et al., DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients
(https://arxiv.org/abs/1606.06160) (https://arxiv.org/abs/1606.06160)
""" """
def __init__(self, config_list): def __init__(self, model, config_list):
""" """
config_list: supported keys: config_list: supported keys:
- q_bits - q_bits
""" """
super().__init__(config_list) super().__init__(model, config_list)
def quantize_weight(self, weight, config, **kwargs): def quantize_weight(self, weight, config, **kwargs):
out = weight.tanh() out = weight.tanh()
......
...@@ -15,49 +15,69 @@ class LayerInfo: ...@@ -15,49 +15,69 @@ class LayerInfo:
class Compressor: class Compressor:
"""Abstract base PyTorch compressor""" """
Abstract base PyTorch compressor
"""
def __init__(self, config_list): def __init__(self, model, config_list):
self._bound_model = None """
self._config_list = config_list Record necessary info in class members
Parameters
----------
model : pytorch model
the model user wants to compress
config_list : list
the configurations that users specify for compression
"""
self.bound_model = model
self.config_list = config_list
self.modules_to_compress = []
def __call__(self, model): def compress(self):
self.compress(model) """
return model Compress the model with algorithm implemented by subclass.
def compress(self, model):
"""Compress the model with algorithm implemented by subclass.
The model will be instrumented and user should never edit it after calling this method. The model will be instrumented and user should never edit it after calling this method.
`self.modules_to_compress` records all the to-be-compressed layers
""" """
assert self._bound_model is None, "Each NNI compressor instance can only compress one model" for name, module in self.bound_model.named_modules():
self._bound_model = model
self.bind_model(model)
for name, module in model.named_modules():
layer = LayerInfo(name, module) layer = LayerInfo(name, module)
config = self._select_config(layer) config = self.select_config(layer)
if config is not None: if config is not None:
self._instrument_layer(layer, config) self._instrument_layer(layer, config)
self.modules_to_compress.append((layer, config))
return self.bound_model
def bind_model(self, model): def get_modules_to_compress(self):
"""This method is called when a model is bound to the compressor.
Users can optionally overload this method to do model-specific initialization.
It is guaranteed that only one model will be bound to each compressor instance.
""" """
To obtain all the to-be-compressed layers.
def update_epoch(self, epoch): Returns
"""if user want to update model every epoch, user can override this method -------
self.modules_to_compress : list
a list of the layers, each of which is a tuple (`layer`, `config`),
`layer` is `LayerInfo`, `config` is a `dict`
""" """
return self.modules_to_compress
def step(self): def select_config(self, layer):
"""if user want to update model every step, user can override this method """
Find the configuration for `layer` by parsing `self.config_list`
Parameters
----------
layer : LayerInfo
one layer
Returns
-------
ret : config or None
the retrieved configuration for this layer, if None, this layer should
not be compressed
""" """
def _instrument_layer(self, layer, config):
raise NotImplementedError()
def _select_config(self, layer):
ret = None ret = None
for config in self._config_list: for config in self.config_list:
config['op_types'] = self._expand_config_op_types(config) config['op_types'] = self._expand_config_op_types(config)
if layer.type not in config['op_types']: if layer.type not in config['op_types']:
continue continue
...@@ -68,6 +88,35 @@ class Compressor: ...@@ -68,6 +88,35 @@ class Compressor:
return None return None
return ret return ret
def update_epoch(self, epoch):
"""
If user want to update model every epoch, user can override this method.
This method should be called at the beginning of each epoch
Parameters
----------
epoch : num
the current epoch number
"""
def step(self):
"""
If user want to update model every step, user can override this method
"""
def _instrument_layer(self, layer, config):
"""
This method is implemented in the subclasses, i.e., `Pruner` and `Quantizer`
Parameters
----------
layer : LayerInfo
the layer to instrument the compression operation
config : dict
the configuration for compressing this layer
"""
raise NotImplementedError()
def _expand_config_op_types(self, config): def _expand_config_op_types(self, config):
if config is None: if config is None:
return [] return []
...@@ -84,17 +133,33 @@ class Pruner(Compressor): ...@@ -84,17 +133,33 @@ class Pruner(Compressor):
Abstract base PyTorch pruner Abstract base PyTorch pruner
""" """
def calc_mask(self, weight, config, op, op_type, op_name): def calc_mask(self, layer, config):
"""Pruners should overload this method to provide mask for weight tensors. """
Pruners should overload this method to provide mask for weight tensors.
The mask must have the same shape and type comparing to the weight. The mask must have the same shape and type comparing to the weight.
It will be applied with `mul()` operation. It will be applied with `mul()` operation on the weight.
This method is effectively hooked to `forward()` method of the model. This method is effectively hooked to `forward()` method of the model.
Parameters
----------
layer : LayerInfo
calculate mask for `layer`'s weight
config : dict
the configuration for generating the mask
""" """
raise NotImplementedError("Pruners must overload calc_mask()") raise NotImplementedError("Pruners must overload calc_mask()")
def _instrument_layer(self, layer, config): def _instrument_layer(self, layer, config):
# TODO: support multiple weight tensors """
# create a wrapper forward function to replace the original one Create a wrapper forward function to replace the original one.
Parameters
----------
layer : LayerInfo
the layer to instrument the mask
config : dict
the configuration for generating the mask
"""
assert layer._forward is None, 'Each model can only be compressed once' assert layer._forward is None, 'Each model can only be compressed once'
if not _check_weight(layer.module): if not _check_weight(layer.module):
_logger.warning('Module %s does not have parameter "weight"', layer.name) _logger.warning('Module %s does not have parameter "weight"', layer.name)
...@@ -104,12 +169,10 @@ class Pruner(Compressor): ...@@ -104,12 +169,10 @@ class Pruner(Compressor):
def new_forward(*inputs): def new_forward(*inputs):
# apply mask to weight # apply mask to weight
old_weight = layer.module.weight.data old_weight = layer.module.weight.data
mask = self.calc_mask(old_weight, config, op=layer.module, op_type=layer.type, op_name=layer.name) mask = self.calc_mask(layer, config)
layer.module.weight.data = old_weight.mul(mask) layer.module.weight.data = old_weight.mul(mask)
# calculate forward # calculate forward
ret = layer._forward(*inputs) ret = layer._forward(*inputs)
# recover original weight
layer.module.weight.data = old_weight
return ret return ret
layer.module.forward = new_forward layer.module.forward = new_forward
......
...@@ -101,20 +101,20 @@ class CompressorTestCase(TestCase): ...@@ -101,20 +101,20 @@ class CompressorTestCase(TestCase):
def test_tf_pruner(self): def test_tf_pruner(self):
model = TfMnist() model = TfMnist()
configure_list = [{'sparsity': 0.8, 'op_types': ['default']}] configure_list = [{'sparsity': 0.8, 'op_types': ['default']}]
tf_compressor.LevelPruner(configure_list).compress_default_graph() tf_compressor.LevelPruner(tf.get_default_graph(), configure_list).compress()
def test_tf_quantizer(self): def test_tf_quantizer(self):
model = TfMnist() model = TfMnist()
tf_compressor.NaiveQuantizer([{'op_types': ['default']}]).compress_default_graph() tf_compressor.NaiveQuantizer(tf.get_default_graph(), [{'op_types': ['default']}]).compress()
def test_torch_pruner(self): def test_torch_pruner(self):
model = TorchMnist() model = TorchMnist()
configure_list = [{'sparsity': 0.8, 'op_types': ['default']}] configure_list = [{'sparsity': 0.8, 'op_types': ['default']}]
torch_compressor.LevelPruner(configure_list).compress(model) torch_compressor.LevelPruner(model, configure_list).compress()
def test_torch_quantizer(self): def test_torch_quantizer(self):
model = TorchMnist() model = TorchMnist()
torch_compressor.NaiveQuantizer([{'op_types': ['default']}]).compress(model) torch_compressor.NaiveQuantizer(model, [{'op_types': ['default']}]).compress()
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment