"vscode:/vscode.git/clone" did not exist on "b8240b7ae9387ba7143e6243b59069c3a04a12e9"
Unverified Commit 5623dbf3 authored by liuzhe-lz's avatar liuzhe-lz Committed by GitHub
Browse files

Compression for Tensorflow (#2755)

parent d654eff4
......@@ -38,7 +38,7 @@ Tensorflow code
```python
from nni.compression.tensorflow import LevelPruner
config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
pruner = LevelPruner(model_graph, config_list)
pruner = LevelPruner(model, config_list)
pruner.compress()
```
......@@ -117,17 +117,6 @@ FPGMPruner prune filters with the smallest geometric median.
### Usage
Tensorflow code
```python
from nni.compression.tensorflow import FPGMPruner
config_list = [{
'sparsity': 0.5,
'op_types': ['Conv2D']
}]
pruner = FPGMPruner(model, config_list)
pruner.compress()
```
PyTorch code
```python
from nni.compression.torch import FPGMPruner
......@@ -146,11 +135,6 @@ pruner.compress()
.. autoclass:: nni.compression.torch.FPGMPruner
```
##### Tensorflow
```eval_rst
.. autoclass:: nni.compression.tensorflow.FPGMPruner
```
## L1Filter Pruner
This is an one-shot pruner, In ['PRUNING FILTERS FOR EFFICIENT CONVNETS'](https://arxiv.org/abs/1608.08710), authors Hao Li, Asim Kadav, Igor Durdanovic, Hanan Samet and Hans Peter Graf.
......@@ -383,12 +367,6 @@ You can view [example](https://github.com/microsoft/nni/blob/master/examples/mod
.. autoclass:: nni.compression.torch.AGPPruner
```
##### Tensorflow
```eval_rst
.. autoclass:: nni.compression.tensorflow.AGPPruner
```
***
## NetAdapt Pruner
......@@ -620,4 +598,4 @@ pruner.compress(eval_args=[model], finetune_args=[model])
```eval_rst
.. autoclass:: nni.compression.torch.SensitivityPruner
```
\ No newline at end of file
```
......@@ -37,7 +37,7 @@ TensorFlow 代码
```python
from nni.compression.tensorflow import LevelPruner
config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
pruner = LevelPruner(model_graph, config_list)
pruner = LevelPruner(model, config_list)
pruner.compress()
```
......@@ -102,16 +102,6 @@ pruner.compress()
### 用法
TensorFlow 代码
```python
from nni.compression.tensorflow import FPGMPruner
config_list = [{
'sparsity': 0.5,
'op_types': ['Conv2D']
}]
pruner = FPGMPruner(model, config_list)
pruner.compress()
```
PyTorch 代码
```python
from nni.compression.torch import FPGMPruner
......
import argparse
import tensorflow as tf
import nni.compression.tensorflow
prune_config = {
'level': {
'dataset_name': 'mnist',
'model_name': 'naive',
'pruner_class': nni.compression.tensorflow.LevelPruner,
'config_list': [{
'sparsity': 0.9,
'op_types': ['default'],
}]
},
}
def get_dataset(dataset_name='mnist'):
assert dataset_name == 'mnist'
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train[..., tf.newaxis] / 255.0
x_test = x_test[..., tf.newaxis] / 255.0
return (x_train, y_train), (x_test, y_test)
def create_model(model_name='naive'):
assert model_name == 'naive'
return tf.keras.Sequential([
tf.keras.layers.Conv2D(filters=20, kernel_size=5),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.ReLU(),
tf.keras.layers.MaxPool2D(pool_size=2),
tf.keras.layers.Conv2D(filters=20, kernel_size=5),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.ReLU(),
tf.keras.layers.MaxPool2D(pool_size=2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(units=500),
tf.keras.layers.ReLU(),
tf.keras.layers.Dense(units=10),
tf.keras.layers.Softmax()
])
def create_pruner(model, pruner_name):
pruner_class = prune_config[pruner_name]['pruner_class']
config_list = prune_config[pruner_name]['config_list']
return pruner_class(model, config_list)
def main(args):
model_name = prune_config[args.pruner_name]['model_name']
dataset_name = prune_config[args.pruner_name]['dataset_name']
train_set, test_set = get_dataset(dataset_name)
model = create_model(model_name)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9, decay=1e-4)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
print('start training')
model.fit(train_set[0], train_set[1], batch_size=args.batch_size, epochs=args.pretrain_epochs, validation_data=test_set)
print('start model pruning')
optimizer_finetune = tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.9, decay=1e-4)
pruner = create_pruner(model, args.pruner_name)
model = pruner.compress()
model.compile(optimizer=optimizer_finetune, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(train_set[0], train_set[1], batch_size=args.batch_size, epochs=args.prune_epochs, validation_data=test_set)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--pruner_name', type=str, default='level')
parser.add_argument('--batch_size', type=int, default=256)
parser.add_argument('--pretrain_epochs', type=int, default=10)
parser.add_argument('--prune_epochs', type=int, default=10)
args = parser.parse_args()
main(args)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from .compressor import LayerInfo, Compressor, Pruner, Quantizer
from .builtin_pruners import *
from .builtin_quantizers import *
from .compressor import Compressor, Pruner
from .pruning import *
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import logging
import numpy as np
import tensorflow as tf
from .compressor import Pruner
__all__ = ['LevelPruner', 'AGPPruner', 'FPGMPruner']
_logger = logging.getLogger(__name__)
class LevelPruner(Pruner):
"""
Parameters
----------
model : tensorflow model
Model to be pruned
config_list : list
Supported keys:
- sparsity : This is to specify the sparsity operations to be compressed to.
- op_types : Operation types to prune.
"""
def __init__(self, model, config_list):
super().__init__(model, config_list)
self.mask_list = {}
self.if_init_list = {}
def calc_mask(self, layer, config):
weight = layer.weight
op_name = layer.name
if self.if_init_list.get(op_name, True):
threshold = tf.contrib.distributions.percentile(tf.abs(weight), config['sparsity'] * 100)
mask = tf.cast(tf.math.greater(tf.abs(weight), threshold), weight.dtype)
self.mask_list.update({op_name: mask})
self.if_init_list.update({op_name: False})
else:
mask = self.mask_list[op_name]
return mask
class AGPPruner(Pruner):
"""
Parameters
----------
model : torch.nn.Module
Model to be pruned.
config_list : listlist
Supported keys:
- initial_sparsity: This is to specify the sparsity when compressor starts to compress.
- final_sparsity: This is to specify the sparsity when compressor finishes to compress.
- start_epoch: This is to specify the epoch number when compressor starts to compress, default start from epoch 0.
- end_epoch: This is to specify the epoch number when compressor finishes to compress.
- frequency: This is to specify every *frequency* number epochs compressor compress once, default frequency=1.
"""
def __init__(self, model, config_list):
super().__init__(model, config_list)
self.mask_list = {}
self.if_init_list = {}
self.now_epoch = tf.Variable(0)
self.assign_handler = []
def calc_mask(self, layer, config):
weight = layer.weight
op_name = layer.name
start_epoch = config.get('start_epoch', 0)
freq = config.get('frequency', 1)
if self.now_epoch >= start_epoch and self.if_init_list.get(op_name, True) and (
self.now_epoch - start_epoch) % freq == 0:
target_sparsity = self.compute_target_sparsity(config)
threshold = tf.contrib.distributions.percentile(weight, target_sparsity * 100)
# stop gradient in case gradient change the mask
mask = tf.stop_gradient(tf.cast(tf.math.greater(weight, threshold), weight.dtype))
self.assign_handler.append(tf.assign(weight, weight * mask))
self.mask_list.update({op_name: tf.constant(mask)})
self.if_init_list.update({op_name: False})
else:
mask = self.mask_list[op_name]
return mask
def compute_target_sparsity(self, config):
end_epoch = config.get('end_epoch', 1)
start_epoch = config.get('start_epoch', 0)
freq = config.get('frequency', 1)
final_sparsity = config.get('final_sparsity', 0)
initial_sparsity = config.get('initial_sparsity', 0)
if end_epoch <= start_epoch or initial_sparsity >= final_sparsity:
_logger.warning('your end epoch <= start epoch or initial_sparsity >= final_sparsity')
return final_sparsity
now_epoch = tf.minimum(self.now_epoch, tf.constant(end_epoch))
span = int(((end_epoch - start_epoch - 1) // freq) * freq)
assert span > 0
base = tf.cast(now_epoch - start_epoch, tf.float32) / span
target_sparsity = (final_sparsity +
(initial_sparsity - final_sparsity) *
(tf.pow(1.0 - base, 3)))
return target_sparsity
def update_epoch(self, epoch, sess):
sess.run(self.assign_handler)
sess.run(tf.assign(self.now_epoch, int(epoch)))
for k in self.if_init_list:
self.if_init_list[k] = True
class FPGMPruner(Pruner):
"""
Parameters
----------
model : tensorflow model
Model to be pruned
config_list : list
Supported keys:
- sparsity : percentage of convolutional filters to be pruned.
- op_types : Only Conv2d is supported in FPGM Pruner.
"""
def __init__(self, model, config_list):
super().__init__(model, config_list)
self.mask_dict = {}
self.assign_handler = []
self.epoch_pruned_layers = set()
def calc_mask(self, layer, config):
"""
Supports Conv1D, Conv2D
filter dimensions for Conv1D:
LEN: filter length
IN: number of input channel
OUT: number of output channel
filter dimensions for Conv2D:
H: filter height
W: filter width
IN: number of input channel
OUT: number of output channel
Parameters
----------
layer : LayerInfo
calculate mask for `layer`'s weight
config : dict
the configuration for generating the mask
"""
weight = layer.weight
op_type = layer.type
op_name = layer.name
assert 0 <= config.get('sparsity') < 1
assert op_type in ['Conv1D', 'Conv2D']
assert op_type in config['op_types']
if layer.name in self.epoch_pruned_layers:
assert layer.name in self.mask_dict
return self.mask_dict.get(layer.name)
try:
w = tf.stop_gradient(tf.transpose(tf.reshape(weight, (-1, weight.shape[-1])), [1, 0]))
masks = np.ones(w.shape)
num_filters = w.shape[0]
num_prune = int(num_filters * config.get('sparsity'))
if num_filters < 2 or num_prune < 1:
return masks
min_gm_idx = self._get_min_gm_kernel_idx(w, num_prune)
for idx in min_gm_idx:
masks[idx] = 0.
finally:
masks = tf.reshape(tf.transpose(masks, [1, 0]), weight.shape)
masks = tf.Variable(masks)
self.mask_dict.update({op_name: masks})
self.epoch_pruned_layers.add(layer.name)
return masks
def _get_min_gm_kernel_idx(self, weight, n):
dist_list = []
for out_i in range(weight.shape[0]):
dist_sum = self._get_distance_sum(weight, out_i)
dist_list.append((dist_sum, out_i))
min_gm_kernels = sorted(dist_list, key=lambda x: x[0])[:n]
return [x[1] for x in min_gm_kernels]
def _get_distance_sum(self, weight, out_idx):
anchor_w = tf.tile(tf.expand_dims(weight[out_idx], 0), [weight.shape[0], 1])
x = weight - anchor_w
x = tf.math.reduce_sum((x*x), -1)
x = tf.math.sqrt(x)
return tf.math.reduce_sum(x)
def update_epoch(self, epoch):
self.epoch_pruned_layers = set()
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import logging
import tensorflow as tf
from .compressor import Quantizer
__all__ = ['NaiveQuantizer', 'QAT_Quantizer', 'DoReFaQuantizer']
_logger = logging.getLogger(__name__)
class NaiveQuantizer(Quantizer):
"""quantize weight to 8 bits
"""
def __init__(self, model, config_list):
super().__init__(model, config_list)
self.layer_scale = {}
def quantize_weight(self, weight, config, op_name, **kwargs):
new_scale = tf.reduce_max(tf.abs(weight)) / 127
scale = tf.maximum(self.layer_scale.get(op_name, tf.constant(0.0)), new_scale)
self.layer_scale[op_name] = scale
orig_type = weight.dtype
return tf.cast(tf.cast(weight / scale, tf.int8), orig_type) * scale
class QAT_Quantizer(Quantizer):
"""Quantizer using the Quantization and Training scheme, as defined in:
Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference
http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf
"""
def __init__(self, model, config_list):
"""
config_list: supported keys:
- q_bits
"""
super().__init__(model, config_list)
def quantize_weight(self, weight, config, **kwargs):
a = tf.stop_gradient(tf.reduce_min(weight))
b = tf.stop_gradient(tf.reduce_max(weight))
n = tf.cast(2 ** config['q_bits'], tf.float32)
scale = b-a/(n-1)
# use gradient_override_map to change round to idetity for gradient
with tf.get_default_graph().gradient_override_map({'Round': 'Identity'}):
qw = tf.round((weight-a)/scale)*scale +a
return qw
class DoReFaQuantizer(Quantizer):
"""Quantizer using the DoReFa scheme, as defined in:
Zhou et al., DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients
(https://arxiv.org/abs/1606.06160)
"""
def __init__(self, model, config_list):
"""
config_list: supported keys:
- q_bits
"""
super().__init__(model, config_list)
def quantize_weight(self, weight, config, **kwargs):
a = tf.math.tanh(weight)
b = a/(2*tf.reduce_max(tf.abs(weight))) + 0.5
scale = pow(2, config['q_bits'] - 1)
# use gradient_override_map to change round to idetity for gradient
with tf.get_default_graph().gradient_override_map({'Round': 'Identity'}):
qw = tf.round(b*scale)/scale
r_qw = 2 * qw - 1
return r_qw
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""
Abstract base classes for TensorFlow model compression.
"""
import logging
import tensorflow as tf
from . import default_layers
tf.config.experimental_run_functions_eagerly(True)
_logger = logging.getLogger(__name__)
class LayerInfo:
def __init__(self, keras_layer):
self.keras_layer = keras_layer
self.name = keras_layer.name
self.type = default_layers.get_op_type(type(keras_layer))
self.weight_index = default_layers.get_weight_index(self.type)
if self.weight_index is not None:
self.weight = keras_layer.weights[self.weight_index]
self._call = None
"""
This structure contains all infomation needed to compress a TensorFlow ``Layer``.
Attributes
----------
layer : tf.keras.layers.Layer
The layer.
name : str
The layer's name. Note that it's local to sub-model and may differ from its attribute name.
type : str
Name of the layer's class.
path : list of str/int
The layer object's and its parents' attribute name / list index.
For example, if the path is `['cells', 2, 'conv']`, then the layer can be accessed as `model.cells[2].conv`.
config : JSON object
Selected configuration for this layer. The format is detailed in tutorial.
Parameters
----------
layer : tf.keras.layers.Layer
See attributes section.
path : list of str/int
See attributes section.
"""
def __init__(self, layer, path=None):
self.layer = layer
self.name = layer.name
self.type = type(layer).__name__
self.path = path
self.config = None
class Compressor:
"""
Abstract base TensorFlow compressor
Common base class for all compressors.
This class is designed for other base classes.
Algorithms should inherit ``Pruner`` or ``Quantizer`` instead.
Attributes
----------
bound_model : tf.keras.Model
Compressed user model.
wrappers : list of tf.keras.Model
A wrapper is an instrumented TF ``Layer``, in ``Model`` format.
The list is ordered by preorder traversal.
Parameters
----------
LayerWrapperClass : a class derive from Model
The class used to instrument layers.
model : tf.keras.Model
The user model to be compressed.
config_list : list of JSON object
User configuration. The format is detailed in tutorial.
"""
def __init__(self, model, config_list):
"""
Record necessary info in class members
def __init__(self, LayerWrapperClass, model, config_list):
assert isinstance(model, tf.keras.Model)
self.validate_config(model, config_list)
Parameters
----------
model : keras model
the model user wants to compress
config_list : list
the configurations that users specify for compression
"""
self.bound_model = model
self.config_list = config_list
self.modules_to_compress = []
self.wrappers = []
def detect_modules_to_compress(self):
"""
detect all modules should be compressed, and save the result in `self.modules_to_compress`.
for layer_info in _detect_layers_to_compress(model, config_list):
self.wrappers.append(LayerWrapperClass(layer_info, self))
if not self.wrappers:
_logger.warning('Nothing is configured to compress, please check your model and config list')
The model will be instrumented and user should never edit it after calling this method.
"""
if self.modules_to_compress is None:
self.modules_to_compress = []
for keras_layer in self.bound_model.layers:
layer = LayerInfo(keras_layer)
config = self.select_config(layer)
if config is not None:
self.modules_to_compress.append((layer, config))
return self.modules_to_compress
_instrument_model(model, self.wrappers)
def compress(self):
def set_wrappers_attribute(self, name, value):
"""
Compress the model with algorithm implemented by subclass.
The model will be instrumented and user should never edit it after calling this method.
`self.modules_to_compress` records all the to-be-compressed layers
Call ``setattr`` on all wrappers.
"""
modules_to_compress = self.detect_modules_to_compress()
for layer, config in modules_to_compress:
self._instrument_layer(layer, config)
return self.bound_model
for wrapper in self.wrappers:
setattr(wrapper, name, value)
def get_modules_to_compress(self):
"""
To obtain all the to-be-compressed layers.
Returns
-------
self.modules_to_compress : list
a list of the layers, each of which is a tuple (`layer`, `config`),
`layer` is `LayerInfo`, `config` is a `dict`
"""
return self.modules_to_compress
class Pruner(Compressor):
"""
Base class for pruning algorithms.
def select_config(self, layer):
"""
Find the configuration for `layer` by parsing `self.config_list`
End users should use ``compress`` and callback APIs (WIP) to prune their models.
Parameters
----------
layer: LayerInfo
one layer
The underlying model is instrumented upon initialization of pruner object.
So if you want to pre-train the model, train it before creating pruner object.
Returns
-------
ret : config or None
the retrieved configuration for this layer, if None, this layer should
not be compressed
"""
ret = None
if layer.type is None:
return None
for config in self.config_list:
config = config.copy()
config['op_types'] = self._expand_config_op_types(config)
if layer.type not in config['op_types']:
continue
if config.get('op_names') and layer.name not in config['op_names']:
continue
ret = config
if ret is None or ret.get('exclude'):
return None
return ret
The compressed model can only execute in eager mode.
def update_epoch(self, epoch):
"""
If user want to update model every epoch, user can override this method.
This method should be called at the beginning of each epoch
Algorithm developers should override ``calc_masks`` method to specify pruning strategy.
Parameters
----------
epoch : num
the current epoch number
"""
Parameters
----------
model : tf.keras.Model
The user model to prune.
config_list : list of JSON object
User configuration. The format is detailed in tutorial.
"""
def __init__(self, model, config_list):
super().__init__(PrunerLayerWrapper, model, config_list)
#self.callback = PrunerCallback(self)
def step(self):
"""
If user want to update mask every step, user can override this method
def compress(self):
"""
Apply compression on a pre-trained model.
If you want to prune the model during training, use callback API (WIP) instead.
def _instrument_layer(self, layer, config):
Returns
-------
tf.keras.Model
The compressed model, for convenience. This is exactly the same object to constructor argument.
"""
This method is implemented in the subclasses, i.e., `Pruner` and `Quantizer`
self._update_mask()
return self.bound_model
Parameters
----------
layer : LayerInfo
the layer to instrument the compression operation
config : dict
the configuration for compressing this layer
def calc_masks(self, wrapper, **kwargs):
"""
raise NotImplementedError()
def _expand_config_op_types(self, config):
if config is None:
return []
op_types = []
for op_type in config.get('op_types', []):
if op_type == 'default':
op_types.extend(default_layers.default_layers)
else:
op_types.append(op_type)
return op_types
Abstract method to be overridden by algorithm. End users should ignore it.
class Pruner(Compressor):
"""
Abstract base TensorFlow pruner
"""
def calc_mask(self, layer, config):
"""
Pruners should overload this method to provide mask for weight tensors.
The mask must have the same shape and type comparing to the weight.
It will be applied with `mul()` operation on the weight.
This method is effectively hooked to `forward()` method of the model.
If the callback is set up, this method will be invoked at end of each training minibatch.
If not, it will only be called when end user invokes ``compress``.
Parameters
----------
layer : LayerInfo
calculate mask for `layer`'s weight
config : dict
the configuration for generating the mask
"""
raise NotImplementedError("Pruners must overload calc_mask()")
wrapper : PrunerLayerWrapper
The instrumented layer.
**kwargs
Reserved for forward compatibility.
def _instrument_layer(self, layer, config):
"""
Create a wrapper forward function to replace the original one.
Parameters
----------
layer : LayerInfo
the layer to instrument the mask
config : dict
the configuration for generating the mask
Returns
-------
dict of (str, tf.Tensor), or None
The key is weight ``Variable``'s name. The value is a mask ``Tensor`` of weight's shape and dtype.
If a weight's key does not appear in the return value, that weight will not be pruned.
Returning ``None`` means the mask is not changed since last time.
Weight names are globally unique, e.g. `model/conv_1/kernel:0`.
"""
layer._call = layer.keras_layer.call
# TODO: maybe it should be able to calc on weight-granularity, beside from layer-granularity
raise NotImplementedError("Pruners must overload calc_masks()")
def new_call(*inputs):
weights = [x.numpy() for x in layer.keras_layer.weights]
mask = self.calc_mask(layer, config)
weights[layer.weight_index] = weights[layer.weight_index] * mask
layer.keras_layer.set_weights(weights)
ret = layer._call(*inputs)
return ret
def _update_mask(self):
for wrapper_idx, wrapper in enumerate(self.wrappers):
masks = self.calc_masks(wrapper, wrapper_idx=wrapper_idx)
if masks is not None:
wrapper.masks = masks
layer.keras_layer.call = new_call
class Quantizer(Compressor):
class PrunerLayerWrapper(tf.keras.Model):
"""
Abstract base TensorFlow quantizer
Instrumented TF layer.
Wrappers will be passed to pruner's ``calc_masks`` API,
and the pruning algorithm should use wrapper's attributes to calculate masks.
Once instrumented, underlying layer's weights will get **modified** by masks before forward pass.
Attributes
----------
layer_info : LayerInfo
All static information of the original layer.
layer : tf.keras.layers.Layer
The original layer.
config : JSON object
Selected configuration. The format is detailed in tutorial.
pruner : Pruner
Bound pruner object.
masks : dict of (str, tf.Tensor)
Current masks. The key is weight's name and the value is mask tensor.
On initialization, `masks` is an empty dict, which means no weight is pruned.
Afterwards, `masks` is the last return value of ``Pruner.calc_masks``.
See ``Pruner.calc_masks`` for details.
"""
def quantize_weight(self, weight, config, op, op_type, op_name):
raise NotImplementedError("Quantizer must overload quantize_weight()")
def __init__(self, layer_info, pruner):
super().__init__()
self.layer_info = layer_info
self.layer = layer_info.layer
self.config = layer_info.config
self.pruner = pruner
self.masks = {}
_logger.info('Layer detected to compress: %s', self.layer.name)
def call(self, *inputs):
new_weights = []
for weight in self.layer.weights:
mask = self.masks.get(weight.name)
if mask is not None:
new_weights.append(tf.math.multiply(weight, mask).numpy())
else:
new_weights.append(weight.numpy())
self.layer.set_weights(new_weights)
return self.layer(*inputs)
# TODO: designed to replace `patch_optimizer`
#class PrunerCallback(tf.keras.callbacks.Callback):
# def __init__(self, pruner):
# super().__init__()
# self._pruner = pruner
#
# def on_train_batch_end(self, batch, logs=None):
# self._pruner.update_mask()
def _detect_layers_to_compress(model, config_list):
# Returns list of LayerInfo.
located_layers = _locate_layers(model)
ret = []
for layer in model.layers:
config = _select_config(LayerInfo(layer), config_list)
if config is not None:
if id(layer) not in located_layers:
_logger.error('Failed to locate layer %s in model. The layer will not be compressed. '
'This is a bug in NNI, feel free to fire an issue.', layer.name)
continue
layer_info = located_layers[id(layer)]
layer_info.config = config
ret.append(layer_info)
return ret
def _locate_layers(model, cur_path=[]):
# Find out how to access layers from model object.
# Returns dict of (layer's object ID, LayerInfo).
# This function is required because TF framework does not track layer's attribute name,
# and to my knowledge `Layer.name` is only useful for read-only access.
# `cur_path`s format is documented in `LayerInfo.path`.
# TODO: it can only find layers in `Model` and `list` for now.
ret = {}
if isinstance(model, tf.keras.Model):
for key, value in model.__dict__.items():
if isinstance(value, tf.keras.Model):
ret.update(_locate_layers(value, cur_path + [key]))
elif isinstance(value, list):
ret.update(_locate_layers(value, cur_path + [key]))
elif isinstance(value, tf.keras.layers.Layer):
ret[id(value)] = LayerInfo(value, cur_path + [key])
elif isinstance(model, list):
for i, item in enumerate(model):
if isinstance(item, tf.keras.Model):
ret.update(_locate_layers(item, cur_path + [i]))
elif isinstance(item, tf.keras.layers.Layer):
ret[id(item)] = LayerInfo(item, cur_path + [i])
else:
raise ValueError('Unexpected model type: {}'.format(type(model)))
return ret
def _select_config(layer_info, config_list):
# Find the last matching config block for given layer.
# Returns None if the layer should not be compressed.
ret = None
for config in config_list:
if 'op_types' in config:
match = layer_info.type in config['op_types']
match_default = 'default' in config['op_types'] and layer_info.type in default_layers.weighted_modules
if not match and not match_default:
continue
if 'op_names' in config and layer_info.name not in config['op_names']:
continue
ret = config
if ret is None or 'exclude' in ret:
return None
return ret
def _instrument_model(model, wrappers):
# Replace layers to wrappers
for wrapper in reversed(wrappers):
cur = model
for key in wrapper.layer_info.path[:-1]:
if isinstance(key, int):
cur = cur[key]
else:
cur = getattr(cur, key)
key = wrapper.layer_info.path[-1]
if isinstance(key, int):
cur[key] = wrapper
else:
setattr(cur, key, wrapper)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from tensorflow import keras
supported_layers = {
keras.layers.Conv1D: ('Conv1D', 0),
keras.layers.Conv2D: ('Conv2D', 0),
keras.layers.Conv2DTranspose: ('Conv2DTranspose', 0),
keras.layers.Conv3D: ('Conv3D', 0),
keras.layers.Conv3DTranspose: ('Conv3DTranspose', 0),
keras.layers.ConvLSTM2D: ('ConvLSTM2D', 0),
keras.layers.Dense: ('Dense', 0),
keras.layers.Embedding: ('Embedding', 0),
keras.layers.GRU: ('GRU', 0),
keras.layers.LSTM: ('LSTM', 0),
}
default_layers = [x[0] for x in supported_layers.values()]
def get_op_type(layer_type):
if layer_type in supported_layers:
return supported_layers[layer_type][0]
else:
return None
def get_weight_index(op_type):
for k in supported_layers:
if supported_layers[k][0] == op_type:
return supported_layers[k][1]
return None
weighted_modules = [
'Conv1D', 'Conv2D', 'Conv3D', 'Conv1DTranspose', 'Conv2DTranspose', 'Conv3DTranspose',
'Dense',
'PReLU',
'Embedding',
]
import tensorflow as tf
from ..compressor import Pruner
__all__ = [
'OneshotPruner',
'LevelPruner',
]
class OneshotPruner(Pruner):
def __init__(self, model, config_list, pruning_algorithm='level', **algo_kwargs):
super().__init__(model, config_list)
self.set_wrappers_attribute('calculated', False)
self.masker = MASKER_DICT[pruning_algorithm](model, self, **algo_kwargs)
def validate_config(self, model, config_list):
pass # TODO
def calc_masks(self, wrapper, wrapper_idx=None):
if wrapper.calculated:
return None
sparsity = wrapper.config['sparsity']
masks = self.masker.calc_masks(sparsity, wrapper, wrapper_idx)
if masks is not None:
wrapper.calculated = True
return masks
class LevelPruner(OneshotPruner):
def __init__(self, model, config_list):
super().__init__(model, config_list, pruning_algorithm='level')
class WeightMasker:
def __init__(self, model, pruner, **kwargs):
self.model = model
self.pruner = pruner
def calc_masks(self, sparsity, wrapper, wrapper_idx=None):
raise NotImplementedError()
class LevelPrunerMasker(WeightMasker):
def calc_masks(self, sparsity, wrapper, wrapper_idx=None):
masks = {}
for weight_variable in wrapper.layer.weights:
if weight_variable.name == 'bias':
continue
k = int(tf.size(weight_variable).numpy() * sparsity)
if k == 0:
continue
weight = weight_variable.read_value()
if wrapper.masks.get(weight_variable.name) is not None:
weight = tf.math.multiply(weight, wrapper.masks[weight_variable.name])
w_abs = tf.math.abs(tf.reshape(weight, [-1]))
threshold = tf.math.top_k(w_abs, k)[0][0]
mask = tf.math.greater(w_abs, threshold)
masks[weight_variable.name] = tf.cast(mask, weight.dtype)
return masks
MASKER_DICT = {
'level': LevelPrunerMasker,
}
......@@ -3,33 +3,12 @@
from unittest import TestCase, main
import numpy as np
import tensorflow as tf
import torch
import torch.nn.functional as F
import schema
import nni.compression.torch as torch_compressor
import math
if tf.__version__ >= '2.0':
import nni.compression.tensorflow as tf_compressor
def get_tf_model():
model = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(filters=5, kernel_size=7, input_shape=[28, 28, 1], activation='relu', padding="SAME"),
tf.keras.layers.MaxPooling2D(pool_size=2),
tf.keras.layers.Conv2D(filters=10, kernel_size=3, activation='relu', padding="SAME"),
tf.keras.layers.MaxPooling2D(pool_size=2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(units=128, activation='relu'),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(units=10, activation='softmax'),
])
model.compile(loss="sparse_categorical_crossentropy",
optimizer=tf.keras.optimizers.SGD(lr=1e-3),
metrics=["accuracy"])
return model
class TorchModel(torch.nn.Module):
def __init__(self):
......@@ -52,13 +31,6 @@ class TorchModel(torch.nn.Module):
return F.log_softmax(x, dim=1)
def tf2(func):
def test_tf2_func(*args):
if tf.__version__ >= '2.0':
func(*args)
return test_tf2_func
class CompressorTestCase(TestCase):
def test_torch_quantizer_modules_detection(self):
# test if modules can be detected
......@@ -92,11 +64,6 @@ class CompressorTestCase(TestCase):
configure_list = [{'sparsity': 0.8, 'op_types': ['default']}]
torch_compressor.LevelPruner(model, configure_list, optimizer).compress()
@tf2
def test_tf_level_pruner(self):
configure_list = [{'sparsity': 0.8, 'op_types': ['default']}]
tf_compressor.LevelPruner(get_tf_model(), configure_list).compress()
def test_torch_naive_quantizer(self):
model = TorchModel()
configure_list = [{
......@@ -108,10 +75,6 @@ class CompressorTestCase(TestCase):
}]
torch_compressor.NaiveQuantizer(model, configure_list).compress()
@tf2
def test_tf_naive_quantizer(self):
tf_compressor.NaiveQuantizer(get_tf_model(), [{'op_types': ['default']}]).compress()
def test_torch_fpgm_pruner(self):
"""
With filters(kernels) weights defined as above (w), it is obvious that w[4] and w[5] is the Geometric Median
......@@ -141,23 +104,7 @@ class CompressorTestCase(TestCase):
masks = pruner.calc_mask(model.conv2)
assert all(torch.sum(masks['weight_mask'], (1, 2, 3)).numpy() == np.array([125., 125., 0., 0., 0., 0., 0., 0., 125., 125.]))
@tf2
def test_tf_fpgm_pruner(self):
w = np.array([np.ones((5, 3, 3)) * (i+1) for i in range(10)]).astype(np.float32)
model = get_tf_model()
config_list = [{'sparsity': 0.2, 'op_types': ['Conv2D']}]
pruner = tf_compressor.FPGMPruner(model, config_list)
weights = model.layers[2].weights
weights[0] = np.array(w).astype(np.float32).transpose([2, 3, 0, 1]).transpose([0, 1, 3, 2])
model.layers[2].set_weights([weights[0], weights[1].numpy()])
layer = tf_compressor.compressor.LayerInfo(model.layers[2])
masks = pruner.calc_mask(layer, config_list[0]).numpy()
masks = masks.reshape((-1, masks.shape[-1])).transpose([1, 0])
assert all(masks.sum((1)) == np.array([45., 45., 45., 45., 0., 0., 45., 45., 45., 45.]))
def test_torch_l1filter_pruner(self):
"""
Filters with the minimum sum of the weights' L1 norm are pruned in this paper:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment