Api refactor (#1728)

api refactor for compression, especially, quantization APIs

Api refactor (#1728)
api refactor for compression, especially, quantization APIs
81fcff86 · Cjkkkk · QuanluZhang · 7c4e81b5 · 81fcff86 · 81fcff86
Commit 81fcff86 authored Nov 12, 2019 by Cjkkkk Committed by QuanluZhang Nov 12, 2019
3 changed files
--- a/docs/en_US/Compressor/Overview.md
+++ b/docs/en_US/Compressor/Overview.md
@@ -180,13 +180,55 @@ class YourQuantizer(nni.compression.tensorflow.Quantizer):
    def quantize_weight(self, weight, config, **kwargs):
        """
-        weight is the target weight tensor
+        quantize should overload this method to quantize weight tensors.
-        config is the selected dict object in config_list for this layer
+        This method is effectively hooked to :meth:`forward` of the model.
-        kwargs contains op, op_types, and op_name
-        design your quantizer and return new weight
+        Parameters
+        ----------
+        weight : Tensor
+            weight that needs to be quantized
+        config : dict
+            the configuration for weight quantization
        """
+        # Put your code to generate `new_weight` here
        return new_weight
+    def quantize_output(self, output, config, **kwargs):
+        """
+        quantize should overload this method to quantize output.
+        This method is effectively hooked to `:meth:`forward` of the model.
+        Parameters
+        ----------
+        output : Tensor
+            output that needs to be quantized
+        config : dict
+            the configuration for output quantization
+        """
+        # Put your code to generate `new_output` here
+        return new_output
+    def quantize_input(self, *inputs, config, **kwargs):
+        """
+        quantize should overload this method to quantize input.
+        This method is effectively hooked to :meth:`forward` of the model.
+        Parameters
+        ----------
+        inputs : Tensor
+            inputs that needs to be quantized
+        config : dict
+            the configuration for inputs quantization
+        """
+        # Put your code to generate `new_input` here
+        return new_input
    # note for pytorch version, there is no sess in input arguments
    def update_epoch(self, epoch_num, sess):
        pass
@@ -200,8 +242,6 @@ class YourQuantizer(nni.compression.tensorflow.Quantizer):
        pass
 ```
-__[TODO]__ Will add another member function `quantize_layer_output`, as some quantization algorithms also quantize layers' output.
 ### Usage of user customized compression algorithm
 __[TODO]__ ...
--- a/src/sdk/pynni/nni/compression/torch/compressor.py
+++ b/src/sdk/pynni/nni/compression/torch/compressor.py
@@ -32,21 +32,34 @@ class Compressor:
        """
        self.bound_model = model
        self.config_list = config_list
-        self.modules_to_compress = []
+        self.modules_to_compress = None
-    def compress(self):
+    def detect_modules_to_compress(self):
        """
-        Compress the model with algorithm implemented by subclass.
+        detect all modules should be compressed, and save the result in `self.modules_to_compress`.
        The model will be instrumented and user should never edit it after calling this method.
-        `self.modules_to_compress` records all the to-be-compressed layers
        """
+        if self.modules_to_compress is None:
+            self.modules_to_compress = []
            for name, module in self.bound_model.named_modules():
                layer = LayerInfo(name, module)
                config = self.select_config(layer)
                if config is not None:
-                self._instrument_layer(layer, config)
                    self.modules_to_compress.append((layer, config))
+        return self.modules_to_compress
+    def compress(self):
+        """
+        Compress the model with algorithm implemented by subclass.
+        The model will be instrumented and user should never edit it after calling this method.
+        `self.modules_to_compress` records all the to-be-compressed layers
+        """
+        modules_to_compress = self.detect_modules_to_compress()
+        for layer, config in modules_to_compress:
+            self._instrument_layer(layer, config)
        return self.bound_model
    def get_modules_to_compress(self):
@@ -55,7 +68,7 @@ class Compressor:
        Returns
        -------
-        self.modules_to_compress : list
+        list
            a list of the layers, each of which is a tuple (`layer`, `config`),
            `layer` is `LayerInfo`, `config` is a `dict`
        """
@@ -72,7 +85,7 @@ class Compressor:
        Returns
        -------
-        ret : config or None
+        config or None
            the retrieved configuration for this layer, if None, this layer should
            not be compressed
        """
@@ -240,26 +253,87 @@ class Quantizer(Compressor):
    """
    def quantize_weight(self, weight, config, op, op_type, op_name):
-        """user should know where dequantize goes and implement it in quantize method
+        """
-        we now do not provide dequantize method
+        quantize should overload this method to quantize weight.
+        This method is effectively hooked to :meth:`forward` of the model.
+        Parameters
+        ----------
+        weight : Tensor
+            weight that needs to be quantized
+        config : dict
+            the configuration for weight quantization
        """
        raise NotImplementedError("Quantizer must overload quantize_weight()")
+    def quantize_output(self, output, config, op, op_type, op_name):
+        """
+        quantize should overload this method to quantize output.
+        This method is effectively hooked to :meth:`forward` of the model.
+        Parameters
+        ----------
+        output : Tensor
+            output that needs to be quantized
+        config : dict
+            the configuration for output quantization
+        """
+        raise NotImplementedError("Quantizer must overload quantize_output()")
+    def quantize_input(self, *inputs, config, op, op_type, op_name):
+        """
+        quantize should overload this method to quantize input.
+        This method is effectively hooked to :meth:`forward` of the model.
+        Parameters
+        ----------
+        inputs : Tensor
+            inputs that needs to be quantized
+        config : dict
+            the configuration for inputs quantization
+        """
+        raise NotImplementedError("Quantizer must overload quantize_input()")
    def _instrument_layer(self, layer, config):
+        """
+        Create a wrapper forward function to replace the original one.
+        Parameters
+        ----------
+        layer : LayerInfo
+            the layer to instrument the mask
+        config : dict
+            the configuration for quantization
+        """
        assert layer._forward is None, 'Each model can only be compressed once'
+        assert "quant_types" in config, 'must provide quant_types in config'
+        assert isinstance(config["quant_types"], list), 'quant_types must be list type'
+        if 'weight' in config["quant_types"]:
            if not _check_weight(layer.module):
                _logger.warning('Module %s does not have parameter "weight"', layer.name)
-            return
        layer._forward = layer.module.forward
        def new_forward(*inputs):
+            if 'input' in config["quant_types"]:
+                inputs = self.quantize_input(inputs, config=config, op=layer.module, op_type=layer.type, op_name=layer.name)
+            if 'weight' in config["quant_types"] and _check_weight(layer.module):
                weight = layer.module.weight.data
                new_weight = self.quantize_weight(weight, config, op=layer.module, op_type=layer.type, op_name=layer.name)
                layer.module.weight.data = new_weight
-            return layer._forward(*inputs)
+                result = layer._forward(*inputs)
+                layer.module.weight.data = weight
+            else:
+                result = layer._forward(*inputs)
-        layer.module.forward = new_forward
+            if 'output' in config["quant_types"]:
+                result = self.quantize_output(result, config, op=layer.module, op_type=layer.type, op_name=layer.name)
+            return result
+        layer.module.forward = new_forward
 def _check_weight(module):
    try:

--- a/src/sdk/pynni/tests/test_compressor.py
+++ b/src/sdk/pynni/tests/test_compressor.py
@@ -114,7 +114,14 @@ class CompressorTestCase(TestCase):
    def test_torch_quantizer(self):
        model = TorchMnist()
-        torch_compressor.NaiveQuantizer(model, [{'op_types': ['default']}]).compress()
+        configure_list = [{
+            'quant_types': ['weight'],
+            'quant_bits': {
+                'weight': 8,
+            },
+            'op_types':['Conv2d', 'Linear']
+        }]
+        torch_compressor.NaiveQuantizer(model, configure_list).compress()
 if __name__ == '__main__':