utils.py 8.55 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Quantization related ops for LSTM."""

from __future__ import absolute_import
from __future__ import division

import tensorflow as tf
22
23
from tensorflow.contrib import framework as contrib_framework
from tensorflow.contrib import layers as contrib_layers
24
25
26
27
28
29
30
31
32
from tensorflow.python.training import moving_averages


def _quant_var(
    name,
    initializer_val,
    vars_collection=tf.GraphKeys.MOVING_AVERAGE_VARIABLES,
):
  """Create an var for storing the min/max quantization range."""
33
  return contrib_framework.model_variable(
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
      name,
      shape=[],
      initializer=tf.constant_initializer(initializer_val),
      collections=[vars_collection],
      trainable=False)


def quantizable_concat(inputs,
                       axis,
                       is_training,
                       is_quantized=True,
                       default_min=0,
                       default_max=6,
                       ema_decay=0.999,
                       scope='quantized_concat'):
  """Concat replacement with quantization option.

  Allows concat inputs to share the same min max ranges,
  from experimental/gazelle/synthetic/model/tpu/utils.py.

  Args:
    inputs: list of tensors to concatenate.
    axis: dimension along which to concatenate.
    is_training: true if the graph is a training graph.
    is_quantized: flag to enable/disable quantization.
    default_min: default min value for fake quant op.
    default_max: default max value for fake quant op.
    ema_decay: the moving average decay for the quantization variables.
    scope: Optional scope for variable_scope.

  Returns:
    Tensor resulting from concatenation of input tensors
  """
  if is_quantized:
    with tf.variable_scope(scope):
      tf.logging.info('inputs: {}'.format(inputs))
      for t in inputs:
        tf.logging.info(t)

      min_var = _quant_var('min', default_min)
      max_var = _quant_var('max', default_max)
      if not is_training:
        # If we are building an eval graph just use the values in the variables.
        quant_inputs = [
            tf.fake_quant_with_min_max_vars(t, min_var, max_var) for t in inputs
        ]
        tf.logging.info('min_val: {}'.format(min_var))
        tf.logging.info('max_val: {}'.format(max_var))
      else:
        concat_tensors = tf.concat(inputs, axis=axis)
        tf.logging.info('concat_tensors: {}'.format(concat_tensors))
85
86
87
88
89
        # TFLite requires that 0.0 is always in the [min; max] range.
        range_min = tf.minimum(
            tf.reduce_min(concat_tensors), 0.0, name='SafeQuantRangeMin')
        range_max = tf.maximum(
            tf.reduce_max(concat_tensors), 0.0, name='SafeQuantRangeMax')
90
91
92
93
        # Otherwise we need to keep track of the moving averages of the min and
        # of the elements of the input tensor max.
        min_val = moving_averages.assign_moving_average(
            min_var,
94
            range_min,
95
96
97
98
            ema_decay,
            name='AssignMinEma')
        max_val = moving_averages.assign_moving_average(
            max_var,
99
            range_max,
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
            ema_decay,
            name='AssignMaxEma')
        tf.logging.info('min_val: {}'.format(min_val))
        tf.logging.info('max_val: {}'.format(max_val))
        quant_inputs = [
            tf.fake_quant_with_min_max_vars(t, min_val, max_val) for t in inputs
        ]
      tf.logging.info('quant_inputs: {}'.format(quant_inputs))
      outputs = tf.concat(quant_inputs, axis=axis)
      tf.logging.info('outputs: {}'.format(outputs))
  else:
    outputs = tf.concat(inputs, axis=axis)
  return outputs


def quantizable_separable_conv2d(inputs,
                                 num_outputs,
                                 kernel_size,
                                 is_quantized=True,
                                 depth_multiplier=1,
                                 stride=1,
                                 activation_fn=tf.nn.relu6,
                                 normalizer_fn=None,
                                 scope=None):
  """Quantization friendly backward compatible separable conv2d.

  This op has the same API is separable_conv2d. The main difference is that an
  additional BiasAdd is manually inserted after the depthwise conv, such that
  the depthwise bias will not have name conflict with pointwise bias. The
  motivation of this op is that quantization script need BiasAdd in order to
  recognize the op, in which a native call to separable_conv2d do not create
  for the depthwise conv.

  Args:
    inputs: A tensor of size [batch_size, height, width, channels].
    num_outputs: The number of pointwise convolution output filters. If is
      None, then we skip the pointwise convolution stage.
    kernel_size: A list of length 2: [kernel_height, kernel_width] of the
      filters. Can be an int if both values are the same.
    is_quantized: flag to enable/disable quantization.
    depth_multiplier: The number of depthwise convolution output channels for
      each input channel. The total number of depthwise convolution output
      channels will be equal to num_filters_in * depth_multiplier.
    stride: A list of length 2: [stride_height, stride_width], specifying the
      depthwise convolution stride. Can be an int if both strides are the same.
    activation_fn: Activation function. The default value is a ReLU function.
      Explicitly set it to None to skip it and maintain a linear activation.
    normalizer_fn: Normalization function to use instead of biases.
    scope: Optional scope for variable_scope.

  Returns:
    Tensor resulting from concatenation of input tensors
  """
  if is_quantized:
154
    outputs = contrib_layers.separable_conv2d(
155
156
157
158
159
160
161
162
163
        inputs,
        None,
        kernel_size,
        depth_multiplier=depth_multiplier,
        stride=1,
        activation_fn=None,
        normalizer_fn=None,
        biases_initializer=None,
        scope=scope)
164
    outputs = contrib_layers.bias_add(
165
        outputs, trainable=True, scope='%s_bias' % scope)
166
    outputs = contrib_layers.conv2d(
167
168
169
170
171
172
173
        outputs,
        num_outputs, [1, 1],
        activation_fn=activation_fn,
        stride=stride,
        normalizer_fn=normalizer_fn,
        scope=scope)
  else:
174
    outputs = contrib_layers.separable_conv2d(
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
        inputs,
        num_outputs,
        kernel_size,
        depth_multiplier=depth_multiplier,
        stride=stride,
        activation_fn=activation_fn,
        normalizer_fn=normalizer_fn,
        scope=scope)
  return outputs


def quantize_op(inputs,
                is_training=True,
                is_quantized=True,
                default_min=0,
                default_max=6,
                ema_decay=0.999,
                scope='quant'):
  """Inserts a fake quantization op after inputs.

  Args:
    inputs: A tensor of size [batch_size, height, width, channels].
    is_training: true if the graph is a training graph.
    is_quantized: flag to enable/disable quantization.
    default_min: default min value for fake quant op.
    default_max: default max value for fake quant op.
    ema_decay: the moving average decay for the quantization variables.
    scope: Optional scope for variable_scope.

  Returns:
    Tensor resulting from quantizing the input tensors.
  """
  if is_quantized:
    with tf.variable_scope(scope):
      min_var = _quant_var('min', default_min)
      max_var = _quant_var('max', default_max)
      if is_training:
212
213
214
        # TFLite requires that 0.0 is always in the [min; max] range.
        range_min = tf.minimum(tf.reduce_min(inputs), 0.0, 'SafeQuantRangeMin')
        range_max = tf.maximum(tf.reduce_max(inputs), 0.0, 'SafeQuantRangeMax')
215
        min_val = moving_averages.assign_moving_average(
216
            min_var, range_min, ema_decay, name='AssignMinEma')
217
        max_val = moving_averages.assign_moving_average(
218
            max_var, range_max, ema_decay, name='AssignMaxEma')
219
220
221
222
        inputs = tf.fake_quant_with_min_max_vars(inputs, min_val, max_val)
      else:
        inputs = tf.fake_quant_with_min_max_vars(inputs, min_var, max_var)
  return inputs