Dev compress refactor (#1690)

* refactor

Dev compress refactor (#1690)
* refactor
358bdb18 · QuanluZhang · GitHub · 025e0b46 · 358bdb18 · 358bdb18
Unverified Commit 358bdb18 authored Nov 04, 2019 by QuanluZhang Committed by GitHub Nov 04, 2019
15 changed files
--- a/docs/en_US/Compressor/AutoCompression.md
+++ b/docs/en_US/Compressor/AutoCompression.md
@@ -9,13 +9,13 @@ You can easily compress a model with NNI compression. Take pruning for example,
 ```python
 from nni.compression.torch import LevelPruner
 config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
-pruner = LevelPruner(config_list)
+pruner = LevelPruner(model, config_list)
-pruner(model)
+pruner.compress()
 ```
 The 'default' op_type stands for the module types defined in [default_layers.py](https://github.com/microsoft/nni/blob/master/src/sdk/pynni/nni/compression/torch/default_layers.py) for pytorch.
-Therefore ```{ 'sparsity': 0.8, 'op_types': ['default'] }```means that **all layers with specified op_types will be compressed with the same 0.8 sparsity**. When ```pruner(model)``` called, the model is compressed with masks and after that you can normally fine tune this model and **pruned weights won't be updated** which have been masked.
+Therefore ```{ 'sparsity': 0.8, 'op_types': ['default'] }```means that **all layers with specified op_types will be compressed with the same 0.8 sparsity**. When ```pruner.compress()``` called, the model is compressed with masks and after that you can normally fine tune this model and **pruned weights won't be updated** which have been masked.
 ## Then, make this automatic
@@ -84,9 +84,9 @@ config_list_agp = [{'initial_sparsity': 0, 'final_sparsity': conv0_sparsity,
                   {'initial_sparsity': 0, 'final_sparsity': conv1_sparsity,
                    'start_epoch': 0, 'end_epoch': 3,
                    'frequency': 1,'op_name': 'conv1' },]
-PRUNERS = {'level':LevelPruner(config_list_level)，'agp':AGP_Pruner(config_list_agp)}
+PRUNERS = {'level':LevelPruner(model, config_list_level)，'agp':AGP_Pruner(model, config_list_agp)}
 pruner = PRUNERS(params['prune_method']['_name'])
-pruner(model)
+pruner.compress()
 ... # fine tuning
 acc = evaluate(model) # evaluation
 nni.report_final_results(acc)

--- a/docs/en_US/Compressor/Overview.md
+++ b/docs/en_US/Compressor/Overview.md
@@ -25,8 +25,8 @@ Tensorflow code
 ```python
 from nni.compression.tensorflow import LevelPruner
 config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
-pruner = LevelPruner(config_list)
+pruner = LevelPruner(tf.get_default_graph(), config_list)
-pruner(tf.get_default_graph())
+pruner.compress()
 ```
 PyTorch code
@@ -34,13 +34,13 @@ PyTorch code
 ```python
 from nni.compression.torch import LevelPruner
 config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
-pruner = LevelPruner(config_list)
+pruner = LevelPruner(model, config_list)
-pruner(model)
+pruner.compress()
 ```
 You can use other compression algorithms in the package of `nni.compression`. The algorithms are implemented in both PyTorch and Tensorflow, under `nni.compression.torch` and `nni.compression.tensorflow` respectively. You can refer to [Pruner](./Pruner.md) and [Quantizer](./Quantizer.md) for detail description of supported algorithms.
-The function call `pruner(model)` receives user defined model (in Tensorflow the model can be obtained with `tf.get_default_graph()`, while in PyTorch the model is the defined model class), and the model is modified with masks inserted. Then when you run the model, the masks take effect. The masks can be adjusted at runtime by the algorithms.
+The function call `pruner.compress()` modifies user defined model (in Tensorflow the model can be obtained with `tf.get_default_graph()`, while in PyTorch the model is the defined model class), and the model is modified with masks inserted. Then when you run the model, the masks take effect. The masks can be adjusted at runtime by the algorithms.
 When instantiate a compression algorithm, there is `config_list` passed in. We describe how to write this config below.
@@ -111,20 +111,26 @@ If you want to write a new pruning algorithm, you can write a class that inherit
 # nni.compression.tensorflow.Pruner with
 # nni.compression.torch.Pruner
 class YourPruner(nni.compression.tensorflow.Pruner):
-    def __init__(self, config_list):
+    def __init__(self, model, config_list):
-        # suggest you to use the NNI defined spec for config
+        """
-        super().__init__(config_list)
+        Suggest you to use the NNI defined spec for config
+        """
-    def bind_model(self, model):
+        super().__init__(model, config_list)
-        # this func can be used to remember the model or its weights
-        # in member variables, for getting their values during training
+    def calc_mask(self, layer, config):
-        pass
+        """
+        Pruners should overload this method to provide mask for weight tensors.
-    def calc_mask(self, weight, config, **kwargs):
+        The mask must have the same shape and type comparing to the weight.
-        # weight is the target weight tensor
+        It will be applied with ``mul()`` operation on the weight.
-        # config is the selected dict object in config_list for this layer
+        This method is effectively hooked to ``forward()`` method of the model.
-        # kwargs contains op, op_types, and op_name
-        # design your mask and return your mask
+        Parameters
+        ----------
+        layer: LayerInfo
+            calculate mask for ``layer``'s weight
+        config: dict
+            the configuration for generating the mask
+        """
        return your_mask
    # note for pytorch version, there is no sess in input arguments
@@ -133,16 +139,18 @@ class YourPruner(nni.compression.tensorflow.Pruner):
    # note for pytorch version, there is no sess in input arguments
    def step(self, sess):
-        # can do some processing based on the model or weights binded
+        """
-        # in the func bind_model
+        Can do some processing based on the model or weights binded
+        in the func bind_model
+        """
        pass
 ```
-For the simplest algorithm, you only need to override `calc_mask`. It receives each layer's weight and selected configuration, as well as op information. You generate the mask for this weight in this function and return. Then NNI applies the mask for you.
+For the simplest algorithm, you only need to override ``calc_mask``. It receives the to-be-compressed layers one by one along with their compression configuration. You generate the mask for this weight in this function and return. Then NNI applies the mask for you.
-Some algorithms generate mask based on training progress, i.e., epoch number. We provide `update_epoch` for the pruner to be aware of the training progress.
+Some algorithms generate mask based on training progress, i.e., epoch number. We provide `update_epoch` for the pruner to be aware of the training progress. It should be called at the beginning of each epoch.
-Some algorithms may want global information for generating masks, for example, all weights of the model (for statistic information), model optimizer's information. NNI supports this requirement using `bind_model`. `bind_model` receives the complete model, thus, it could record any information (e.g., reference to weights) it cares about. Then `step` can process or update the information according to the algorithm. You can refer to [source code of built-in algorithms](https://github.com/microsoft/nni/tree/master/src/sdk/pynni/nni/compressors) for example implementations.
+Some algorithms may want global information for generating masks, for example, all weights of the model (for statistic information). Your can use `self.bound_model` in the Pruner class for accessing weights. If you also need optimizer's information (for example in Pytorch), you could override `__init__` to receive more arguments such as model's optimizer. Then `step` can process or update the information according to the algorithm. You can refer to [source code of built-in algorithms](https://github.com/microsoft/nni/tree/master/src/sdk/pynni/nni/compressors) for example implementations.
 ### Quantization algorithm
@@ -154,20 +162,19 @@ The interface for customizing quantization algorithm is similar to that of pruni
 # nni.compression.tensorflow.Quantizer with
 # nni.compression.torch.Quantizer
 class YourQuantizer(nni.compression.tensorflow.Quantizer):
-    def __init__(self, config_list):
+    def __init__(self, model, config_list):
-        # suggest you to use the NNI defined spec for config
+        """
-        super().__init__(config_list)
+        Suggest you to use the NNI defined spec for config
+        """
-    def bind_model(self, model):
+        super().__init__(model, config_list)
-        # this func can be used to remember the model or its weights
-        # in member variables, for getting their values during training
-        pass
    def quantize_weight(self, weight, config, **kwargs):
-        # weight is the target weight tensor
+        """
-        # config is the selected dict object in config_list for this layer
+        weight is the target weight tensor
-        # kwargs contains op, op_types, and op_name
+        config is the selected dict object in config_list for this layer
-        # design your quantizer and return new weight
+        kwargs contains op, op_types, and op_name
+        design your quantizer and return new weight
+        """
        return new_weight
    # note for pytorch version, there is no sess in input arguments
@@ -176,8 +183,10 @@ class YourQuantizer(nni.compression.tensorflow.Quantizer):
    # note for pytorch version, there is no sess in input arguments
    def step(self, sess):
-        # can do some processing based on the model or weights binded
+        """
-        # in the func bind_model
+        Can do some processing based on the model or weights binded
+        in the func bind_model
+        """
        pass
 ```

--- a/docs/en_US/Compressor/Pruner.md
+++ b/docs/en_US/Compressor/Pruner.md
@@ -13,16 +13,16 @@ Tensorflow code
 ```
 from nni.compression.tensorflow import LevelPruner
 config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
-pruner = LevelPruner(config_list)
+pruner = LevelPruner(model_graph, config_list)
-pruner(model_graph)
+pruner.compress()
 ```
 PyTorch code
 ```
 from nni.compression.torch import LevelPruner
 config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
-pruner = LevelPruner(config_list)
+pruner = LevelPruner(model, config_list)
-pruner(model)
+pruner.compress()
 ```
 #### User configuration for Level Pruner
@@ -53,8 +53,8 @@ config_list = [{
    'frequency': 1,
    'op_types': 'default'
 }]
-pruner = AGP_Pruner(config_list)
+pruner = AGP_Pruner(tf.get_default_graph(), config_list)
-pruner(tf.get_default_graph())
+pruner.compress()
 ```
 PyTorch code
 ```python
@@ -67,8 +67,8 @@ config_list = [{
    'frequency': 1,
    'op_types': 'default'
 }]
-pruner = AGP_Pruner(config_list)
+pruner = AGP_Pruner(model, config_list)
-pruner(model)
+pruner.compress()
 ```
 Second, you should add code below to update epoch number when you finish one epoch in your training code.

--- a/docs/en_US/Compressor/Quantizer.md
+++ b/docs/en_US/Compressor/Quantizer.md
@@ -8,11 +8,11 @@ We provide Naive Quantizer to quantizer weight to default 8 bits, you can use it
 ### Usage
 tensorflow
 ```python
-nni.compressors.tensorflow.NaiveQuantizer()(model_graph)
+nni.compressors.tensorflow.NaiveQuantizer(model_graph).compress()
 ```
 pytorch
 ```python
-nni.compressors.torch.NaiveQuantizer()(model)
+nni.compressors.torch.NaiveQuantizer(model).compress()
 ```
 ***
@@ -32,15 +32,15 @@ Tensorflow code
 ```python
 from nni.compressors.tensorflow import QAT_Quantizer
 config_list = [{ 'q_bits': 8, 'op_types': ['default'] }]
-quantizer = QAT_Quantizer(config_list)
+quantizer = QAT_Quantizer(tf.get_default_graph(), config_list)
-quantizer(tf.get_default_graph())
+quantizer.compress()
 ```
 PyTorch code
 ```python
 from nni.compressors.torch import QAT_Quantizer
 config_list = [{ 'q_bits': 8, 'op_types': ['default'] }]
-quantizer = QAT_Quantizer(config_list)
+quantizer = QAT_Quantizer(model, config_list)
-quantizer(model)
+quantizer.compress()
 ```
 You can view example for more information
@@ -61,15 +61,15 @@ Tensorflow code
 ```python
 from nni.compressors.tensorflow import DoReFaQuantizer
 config_list = [{ 'q_bits': 8, 'op_types': 'default' }]
-quantizer = DoReFaQuantizer(config_list)
+quantizer = DoReFaQuantizer(tf.get_default_graph(), config_list)
-quantizer(tf.get_default_graph())
+quantizer.compress()
 ```
 PyTorch code
 ```python
 from nni.compressors.torch import DoReFaQuantizer
 config_list = [{ 'q_bits': 8, 'op_types': 'default' }]
-quantizer = DoReFaQuantizer(config_list)
+quantizer = DoReFaQuantizer(model, config_list)
-quantizer(model)
+quantizer.compress()
 ```
 You can view example for more information

--- a/examples/model_compress/main_tf_pruner.py
+++ b/examples/model_compress/main_tf_pruner.py
@@ -93,15 +93,13 @@ def main():
        'frequency': 1,
        'op_types': ['default']
    }]
-    pruner = AGP_Pruner(configure_list)
+    pruner = AGP_Pruner(tf.get_default_graph(), configure_list)
    # if you want to load from yaml file
    # configure_file = nni.compressors.tf_compressor._nnimc_tf._tf_default_load_configure_file('configure_example.yaml','AGPruner')
    # configure_list = configure_file.get('config',[])
    # pruner.load_configure(configure_list)
    # you can also handle it yourself and input an configure list in json
-    pruner(tf.get_default_graph())
+    pruner.compress()
-    # you can also use compress(model) or compress_default_graph() for tensorflow compressor
-    # pruner.compress(tf.get_default_graph())
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

--- a/examples/model_compress/main_tf_quantizer.py
+++ b/examples/model_compress/main_tf_quantizer.py
@@ -83,8 +83,8 @@ def main():
    DoReFaQuantizer(configure_list).compress(tf.get_default_graph())
    '''
    configure_list = [{'q_bits':8, 'op_types':['default']}]
-    quantizer = QAT_Quantizer(configure_list)
+    quantizer = QAT_Quantizer(tf.get_default_graph(), configure_list)
-    quantizer(tf.get_default_graph())
+    quantizer.compress()
    # you can also use compress(model) or compress_default_graph()
    # method like QATquantizer(q_bits = 8).compress_default_graph()

--- a/examples/model_compress/main_torch_pruner.py
+++ b/examples/model_compress/main_torch_pruner.py
@@ -79,8 +79,8 @@ def main():
        'op_types': ['default']
    }]
-    pruner = AGP_Pruner(configure_list)
+    pruner = AGP_Pruner(model, configure_list)
-    pruner(model)
+    pruner.compress()
    # you can also use compress(model) method
    # like that pruner.compress(model)

--- a/examples/model_compress/main_torch_quantizer.py
+++ b/examples/model_compress/main_torch_quantizer.py
@@ -69,8 +69,8 @@ def main():
    DoReFaQuantizer(configure_list).compress(model)
    '''
    configure_list = [{'q_bits':8, 'op_types':['default']}]
-    quantizer = QAT_Quantizer(configure_list)
+    quantizer = QAT_Quantizer(model, configure_list)
-    quantizer(model)
+    quantizer.compress()
    # you can also use compress(model) method
    # like thaht quantizer.compress(model)

--- a/src/sdk/pynni/nni/compression/tensorflow/builtin_pruners.py
+++ b/src/sdk/pynni/nni/compression/tensorflow/builtin_pruners.py
@@ -8,16 +8,18 @@ _logger = logging.getLogger(__name__)
 class LevelPruner(Pruner):
-    def __init__(self, config_list):
+    def __init__(self, model, config_list):
        """
        config_list: supported keys:
            - sparsity
        """
-        super().__init__(config_list)
+        super().__init__(model, config_list)
        self.mask_list = {}
        self.if_init_list = {}
-    def calc_mask(self, weight, config, op_name, **kwargs):
+    def calc_mask(self, layer, config):
+        weight = layer.weight
+        op_name = layer.name
        if self.if_init_list.get(op_name, True):
            threshold = tf.contrib.distributions.percentile(tf.abs(weight), config['sparsity'] * 100)
            mask = tf.cast(tf.math.greater(tf.abs(weight), threshold), weight.dtype)
@@ -38,7 +40,7 @@ class AGP_Pruner(Pruner):
    https://arxiv.org/pdf/1710.01878.pdf
    """
-    def __init__(self, config_list):
+    def __init__(self, model, config_list):
        """
        config_list: supported keys:
            - initial_sparsity
@@ -47,13 +49,15 @@ class AGP_Pruner(Pruner):
            - end_epoch: end epoch number stop update mask
            - frequency: if you want update every 2 epoch, you can set it 2
        """
-        super().__init__(config_list)
+        super().__init__(model, config_list)
        self.mask_list = {}
        self.if_init_list = {}
        self.now_epoch = tf.Variable(0)
        self.assign_handler = []
-    def calc_mask(self, weight, config, op_name, **kwargs):
+    def calc_mask(self, layer, config):
+        weight = layer.weight
+        op_name = layer.name
        start_epoch = config.get('start_epoch', 0)
        freq = config.get('frequency', 1)
        if self.now_epoch >= start_epoch and self.if_init_list.get(op_name, True) and (

--- a/src/sdk/pynni/nni/compression/tensorflow/builtin_quantizers.py
+++ b/src/sdk/pynni/nni/compression/tensorflow/builtin_quantizers.py
@@ -10,8 +10,8 @@ _logger = logging.getLogger(__name__)
 class NaiveQuantizer(Quantizer):
    """quantize weight to 8 bits
    """
-    def __init__(self, config_list):
+    def __init__(self, model, config_list):
-        super().__init__(config_list)
+        super().__init__(model, config_list)
        self.layer_scale = {}
    def quantize_weight(self, weight, config, op_name, **kwargs):
@@ -27,12 +27,12 @@ class QAT_Quantizer(Quantizer):
    Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference
    http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf
    """
-    def __init__(self, config_list):
+    def __init__(self, model, config_list):
        """
        config_list: supported keys:
            - q_bits
        """
-        super().__init__(config_list)
+        super().__init__(model, config_list)
    def quantize_weight(self, weight, config, **kwargs):
        a = tf.stop_gradient(tf.reduce_min(weight))
@@ -52,12 +52,12 @@ class DoReFaQuantizer(Quantizer):
    Zhou et al., DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients
    (https://arxiv.org/abs/1606.06160)
    """
-    def __init__(self, config_list):
+    def __init__(self, model, config_list):
        """
        config_list: supported keys:
            - q_bits
        """
-        super().__init__(config_list)
+        super().__init__(model, config_list)
    def quantize_weight(self, weight, config, **kwargs):
        a = tf.math.tanh(weight)

--- a/src/sdk/pynni/nni/compression/tensorflow/compressor.py
+++ b/src/sdk/pynni/nni/compression/tensorflow/compressor.py
@@ -6,67 +6,85 @@ _logger = logging.getLogger(__name__)
 class LayerInfo:
-    def __init__(self, op):
+    def __init__(self, op, weight, weight_op):
        self.op = op
        self.name = op.name
        self.type = op.type
+        self.weight = weight
+        self.weight_op = weight_op
 class Compressor:
-    """Abstract base TensorFlow compressor"""
+    """
+    Abstract base TensorFlow compressor
+    """
-    def __init__(self, config_list):
+    def __init__(self, model, config_list):
-        self._bound_model = None
+        """
-        self._config_list = config_list
+        Record necessary info in class members
+        Parameters
+        ----------
+        model : pytorch model
+            the model user wants to compress
+        config_list : list
+            the configurations that users specify for compression
+        """
+        self.bound_model = model
+        self.config_list = config_list
+        self.modules_to_compress = []
-    def __call__(self, model):
+    def compress(self):
-        """Compress given graph with algorithm implemented by subclass.
-        The graph will be editted and returned.
        """
-        self.compress(model)
+        Compress the model with algorithm implemented by subclass.
-        return model
-    def compress(self, model):
+        The model will be instrumented and user should never edit it after calling this method.
-        """Compress given graph with algorithm implemented by subclass.
+        `self.modules_to_compress` records all the to-be-compressed layers
-        This will edit the graph.
        """
-        assert self._bound_model is None, "Each NNI compressor instance can only compress one model"
+        for op in self.bound_model.get_operations():
-        self._bound_model = model
+            weight_index = _detect_weight_index(op)
-        self.bind_model(model)
+            if weight_index is None:
-        for op in model.get_operations():
+                _logger.warning('Failed to detect weight for layer %s', op.name)
-            layer = LayerInfo(op)
+                return
-            config = self._select_config(layer)
+            weight_op = op.inputs[weight_index].op
+            weight = weight_op.inputs[0]
+            layer = LayerInfo(op, weight, weight_op)
+            config = self.select_config(layer)
            if config is not None:
                self._instrument_layer(layer, config)
+                self.modules_to_compress.append((layer, config))
+        return self.bound_model
-    def compress_default_graph(self):
+    def get_modules_to_compress(self):
-        """Compress the default graph with algorithm implemented by subclass.
-        This will edit the default graph.
        """
-        self.compress(tf.get_default_graph())
+        To obtain all the to-be-compressed layers.
-    def bind_model(self, model):
+        Returns
-        """This method is called when a model is bound to the compressor.
+        -------
-        Compressors can optionally overload this method to do model-specific initialization.
+        self.modules_to_compress : list
-        It is guaranteed that only one model will be bound to each compressor instance.
+            a list of the layers, each of which is a tuple (`layer`, `config`),
+            `layer` is `LayerInfo`, `config` is a `dict`
        """
+        return self.modules_to_compress
-    def update_epoch(self, epoch, sess):
+    def select_config(self, layer):
-        """If user want to update mask every epoch, user can override this method
        """
+        Find the configuration for `layer` by parsing `self.config_list`
-    def step(self, sess):
-        """If user want to update mask every step, user can override this method
+        Parameters
+        ----------
+        layer : LayerInfo
+            one layer
+        Returns
+        -------
+        ret : config or None
+            the retrieved configuration for this layer, if None, this layer should 
+            not be compressed
        """
-    def _instrument_layer(self, layer, config):
-        raise NotImplementedError()
-    def _select_config(self, layer):
        ret = None
-        for config in self._config_list:
+        for config in self.config_list:
            op_types = config.get('op_types')
            if op_types == 'default':
                op_types = default_layers.op_weight_index.keys()
@@ -79,35 +97,72 @@ class Compressor:
            return None
        return ret
+    def update_epoch(self, epoch, sess):
+        """
+        If user want to update model every epoch, user can override this method.
+        This method should be called at the beginning of each epoch
+        Parameters
+        ----------
+        epoch : num
+            the current epoch number
+        """
+    def step(self, sess):
+        """
+        If user want to update mask every step, user can override this method
+        """
+    def _instrument_layer(self, layer, config):
+        """
+        This method is implemented in the subclasses, i.e., `Pruner` and `Quantizer`
+        Parameters
+        ----------
+        layer : LayerInfo
+            the layer to instrument the compression operation
+        config : dict
+            the configuration for compressing this layer
+        """
+        raise NotImplementedError()
 class Pruner(Compressor):
    """
    Abstract base TensorFlow pruner
    """
-    def calc_mask(self, weight, config, op, op_type, op_name):
+    def calc_mask(self, layer, config):
-        """Pruners should overload this method to provide mask for weight tensors.
+        """
+        Pruners should overload this method to provide mask for weight tensors.
        The mask must have the same shape and type comparing to the weight.
-        It will be applied with `multiply()` operation.
+        It will be applied with `mul()` operation on the weight.
-        This method works as a subgraph which will be inserted into the bound model.
+        This method is effectively hooked to `forward()` method of the model.
+        Parameters
+        ----------
+        layer : LayerInfo
+            calculate mask for `layer`'s weight
+        config : dict
+            the configuration for generating the mask
        """
        raise NotImplementedError("Pruners must overload calc_mask()")
    def _instrument_layer(self, layer, config):
-        # it seems the graph editor can only swap edges of nodes or remove all edges from a node
+        """
-        # it cannot remove one edge from a node, nor can it assign a new edge to a node
+        Create a wrapper forward function to replace the original one.
-        # we assume there is a proxy operation between the weight and the Conv2D layer
-        # this is true as long as the weight is `tf.Value`
+        Parameters
-        # not sure what will happen if the weight is calculated from other operations
+        ----------
-        weight_index = _detect_weight_index(layer)
+        layer : LayerInfo
-        if weight_index is None:
+            the layer to instrument the mask
-            _logger.warning('Failed to detect weight for layer %s', layer.name)
+        config : dict
-            return
+            the configuration for generating the mask
-        weight_op = layer.op.inputs[weight_index].op
+        """
-        weight = weight_op.inputs[0]
+        mask = self.calc_mask(layer, config)
-        mask = self.calc_mask(weight, config, op=layer.op, op_type=layer.type, op_name=layer.name)
+        new_weight = layer.weight * mask
-        new_weight = weight * mask
+        tf.contrib.graph_editor.swap_outputs(layer.weight_op, new_weight.op)
-        tf.contrib.graph_editor.swap_outputs(weight_op, new_weight.op)
 class Quantizer(Compressor):
@@ -133,7 +188,7 @@ def _detect_weight_index(layer):
    index = default_layers.op_weight_index.get(layer.type)
    if index is not None:
        return index
-    weight_indices = [i for i, op in enumerate(layer.op.inputs) if op.name.endswith('Variable/read')]
+    weight_indices = [i for i, op in enumerate(layer.inputs) if op.name.endswith('Variable/read')]
    if len(weight_indices) == 1:
        return weight_indices[0]
    return None
--- a/src/sdk/pynni/nni/compression/torch/builtin_pruners.py
+++ b/src/sdk/pynni/nni/compression/torch/builtin_pruners.py
@@ -11,16 +11,18 @@ class LevelPruner(Pruner):
    """Prune to an exact pruning level specification
    """
-    def __init__(self, config_list):
+    def __init__(self, model, config_list):
        """
        config_list: supported keys:
            - sparsity
        """
-        super().__init__(config_list)
+        super().__init__(model, config_list)
        self.mask_list = {}
        self.if_init_list = {}
-    def calc_mask(self, weight, config, op_name, **kwargs):
+    def calc_mask(self, layer, config):
+        weight = layer.module.weight.data
+        op_name = layer.name
        if self.if_init_list.get(op_name, True):
            w_abs = weight.abs()
            k = int(weight.numel() * config['sparsity'])
@@ -45,7 +47,7 @@ class AGP_Pruner(Pruner):
    https://arxiv.org/pdf/1710.01878.pdf
    """
-    def __init__(self, config_list):
+    def __init__(self, model, config_list):
        """
        config_list: supported keys:
            - initial_sparsity
@@ -54,12 +56,14 @@ class AGP_Pruner(Pruner):
            - end_epoch: end epoch number stop update mask, you should make sure start_epoch <= end_epoch
            - frequency: if you want update every 2 epoch, you can set it 2
        """
-        super().__init__(config_list)
+        super().__init__(model, config_list)
        self.mask_list = {}
        self.now_epoch = 0
        self.if_init_list = {}
-    def calc_mask(self, weight, config, op_name, **kwargs):
+    def calc_mask(self, layer, config):
+        weight = layer.module.weight.data
+        op_name = layer.name
        start_epoch = config.get('start_epoch', 0)
        freq = config.get('frequency', 1)
        if self.now_epoch >= start_epoch and self.if_init_list.get(op_name, True) and (

--- a/src/sdk/pynni/nni/compression/torch/builtin_quantizers.py
+++ b/src/sdk/pynni/nni/compression/torch/builtin_quantizers.py
@@ -10,8 +10,8 @@ logger = logging.getLogger(__name__)
 class NaiveQuantizer(Quantizer):
    """quantize weight to 8 bits
    """
-    def __init__(self, config_list):
+    def __init__(self, model, config_list):
-        super().__init__(config_list)
+        super().__init__(model, config_list)
        self.layer_scale = {}
    def quantize_weight(self, weight, config, op_name, **kwargs):
@@ -27,12 +27,12 @@ class QAT_Quantizer(Quantizer):
    Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference
    http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf
    """
-    def __init__(self, config_list):
+    def __init__(self, model, config_list):
        """
        config_list: supported keys:
            - q_bits
        """
-        super().__init__(config_list)
+        super().__init__(model, config_list)
    def quantize_weight(self, weight, config, **kwargs):
        if config['q_bits'] <= 1:
@@ -53,12 +53,12 @@ class DoReFaQuantizer(Quantizer):
    Zhou et al., DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients
    (https://arxiv.org/abs/1606.06160)
    """
-    def __init__(self, config_list):
+    def __init__(self, model, config_list):
        """
        config_list: supported keys:
            - q_bits
        """
-        super().__init__(config_list)
+        super().__init__(model, config_list)
    def quantize_weight(self, weight, config, **kwargs):
        out = weight.tanh()

--- a/src/sdk/pynni/nni/compression/torch/compressor.py
+++ b/src/sdk/pynni/nni/compression/torch/compressor.py
@@ -15,49 +15,69 @@ class LayerInfo:
 class Compressor:
-    """Abstract base PyTorch compressor"""
+    """
+    Abstract base PyTorch compressor
+    """
-    def __init__(self, config_list):
+    def __init__(self, model, config_list):
-        self._bound_model = None
+        """
-        self._config_list = config_list
+        Record necessary info in class members
+        Parameters
+        ----------
+        model : pytorch model
+            the model user wants to compress
+        config_list : list
+            the configurations that users specify for compression
+        """
+        self.bound_model = model
+        self.config_list = config_list
+        self.modules_to_compress = []
-    def __call__(self, model):
+    def compress(self):
-        self.compress(model)
+        """
-        return model
+        Compress the model with algorithm implemented by subclass.
-    def compress(self, model):
-        """Compress the model with algorithm implemented by subclass.
        The model will be instrumented and user should never edit it after calling this method.
+        `self.modules_to_compress` records all the to-be-compressed layers
        """
-        assert self._bound_model is None, "Each NNI compressor instance can only compress one model"
+        for name, module in self.bound_model.named_modules():
-        self._bound_model = model
-        self.bind_model(model)
-        for name, module in model.named_modules():
            layer = LayerInfo(name, module)
-            config = self._select_config(layer)
+            config = self.select_config(layer)
            if config is not None:
                self._instrument_layer(layer, config)
+                self.modules_to_compress.append((layer, config))
+        return self.bound_model
-    def bind_model(self, model):
+    def get_modules_to_compress(self):
-        """This method is called when a model is bound to the compressor.
-        Users can optionally overload this method to do model-specific initialization.
-        It is guaranteed that only one model will be bound to each compressor instance.
        """
+        To obtain all the to-be-compressed layers.
-    def update_epoch(self, epoch):
+        Returns
-        """if user want to update model every epoch, user can override this method
+        -------
+        self.modules_to_compress : list
+            a list of the layers, each of which is a tuple (`layer`, `config`),
+            `layer` is `LayerInfo`, `config` is a `dict`
        """
+        return self.modules_to_compress
-    def step(self):
+    def select_config(self, layer):
-        """if user want to update model every step, user can override this method
+        """
+        Find the configuration for `layer` by parsing `self.config_list`
+        Parameters
+        ----------
+        layer : LayerInfo
+            one layer
+        Returns
+        -------
+        ret : config or None
+            the retrieved configuration for this layer, if None, this layer should 
+            not be compressed
        """
-    def _instrument_layer(self, layer, config):
-        raise NotImplementedError()
-    def _select_config(self, layer):
        ret = None
-        for config in self._config_list:
+        for config in self.config_list:
            config['op_types'] = self._expand_config_op_types(config)
            if layer.type not in config['op_types']:
                continue
@@ -68,6 +88,35 @@ class Compressor:
            return None
        return ret
+    def update_epoch(self, epoch):
+        """
+        If user want to update model every epoch, user can override this method.
+        This method should be called at the beginning of each epoch
+        Parameters
+        ----------
+        epoch : num
+            the current epoch number
+        """
+    def step(self):
+        """
+        If user want to update model every step, user can override this method
+        """
+    def _instrument_layer(self, layer, config):
+        """
+        This method is implemented in the subclasses, i.e., `Pruner` and `Quantizer`
+        Parameters
+        ----------
+        layer : LayerInfo
+            the layer to instrument the compression operation
+        config : dict
+            the configuration for compressing this layer
+        """
+        raise NotImplementedError()
    def _expand_config_op_types(self, config):
        if config is None:
            return []
@@ -84,17 +133,33 @@ class Pruner(Compressor):
    Abstract base PyTorch pruner
    """
-    def calc_mask(self, weight, config, op, op_type, op_name):
+    def calc_mask(self, layer, config):
-        """Pruners should overload this method to provide mask for weight tensors.
+        """
+        Pruners should overload this method to provide mask for weight tensors.
        The mask must have the same shape and type comparing to the weight.
-        It will be applied with `mul()` operation.
+        It will be applied with `mul()` operation on the weight.
        This method is effectively hooked to `forward()` method of the model.
+        Parameters
+        ----------
+        layer : LayerInfo
+            calculate mask for `layer`'s weight
+        config : dict
+            the configuration for generating the mask
        """
        raise NotImplementedError("Pruners must overload calc_mask()")
    def _instrument_layer(self, layer, config):
-        # TODO: support multiple weight tensors
+        """
-        # create a wrapper forward function to replace the original one
+        Create a wrapper forward function to replace the original one.
+        Parameters
+        ----------
+        layer : LayerInfo
+            the layer to instrument the mask
+        config : dict
+            the configuration for generating the mask
+        """
        assert layer._forward is None, 'Each model can only be compressed once'
        if not _check_weight(layer.module):
            _logger.warning('Module %s does not have parameter "weight"', layer.name)
@@ -104,12 +169,10 @@ class Pruner(Compressor):
        def new_forward(*inputs):
            # apply mask to weight
            old_weight = layer.module.weight.data
-            mask = self.calc_mask(old_weight, config, op=layer.module, op_type=layer.type, op_name=layer.name)
+            mask = self.calc_mask(layer, config)
            layer.module.weight.data = old_weight.mul(mask)
            # calculate forward
            ret = layer._forward(*inputs)
-            # recover original weight
-            layer.module.weight.data = old_weight
            return ret
        layer.module.forward = new_forward

--- a/src/sdk/pynni/tests/test_compressor.py
+++ b/src/sdk/pynni/tests/test_compressor.py
@@ -101,20 +101,20 @@ class CompressorTestCase(TestCase):
    def test_tf_pruner(self):
        model = TfMnist()
        configure_list = [{'sparsity': 0.8, 'op_types': ['default']}]
-        tf_compressor.LevelPruner(configure_list).compress_default_graph()
+        tf_compressor.LevelPruner(tf.get_default_graph(), configure_list).compress()
    def test_tf_quantizer(self):
        model = TfMnist()
-        tf_compressor.NaiveQuantizer([{'op_types': ['default']}]).compress_default_graph()
+        tf_compressor.NaiveQuantizer(tf.get_default_graph(), [{'op_types': ['default']}]).compress()
    def test_torch_pruner(self):
        model = TorchMnist()
        configure_list = [{'sparsity': 0.8, 'op_types': ['default']}]
-        torch_compressor.LevelPruner(configure_list).compress(model)
+        torch_compressor.LevelPruner(model, configure_list).compress()
    def test_torch_quantizer(self):
        model = TorchMnist()
-        torch_compressor.NaiveQuantizer([{'op_types': ['default']}]).compress(model)
+        torch_compressor.NaiveQuantizer(model, [{'op_types': ['default']}]).compress()
 if __name__ == '__main__':