inception_v4.py 16.1 KB
Newer Older
Alex Kurakin's avatar
Alex Kurakin committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains the definition of the Inception V4 architecture.

As described in http://arxiv.org/abs/1602.07261.

  Inception-v4, Inception-ResNet and the Impact of Residual Connections
    on Learning
  Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

27
28
import tensorflow.compat.v1 as tf
import tf_slim as slim
Alex Kurakin's avatar
Alex Kurakin committed
29
30
31
32
33
34
35
36
37

from nets import inception_utils


def block_inception_a(inputs, scope=None, reuse=None):
  """Builds Inception-A block for Inception v4 network."""
  # By default use stride=1 and SAME padding
  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
                      stride=1, padding='SAME'):
38
    with tf.variable_scope(
39
        scope, 'BlockInceptionA', [inputs], reuse=reuse):
40
      with tf.variable_scope('Branch_0'):
Alex Kurakin's avatar
Alex Kurakin committed
41
        branch_0 = slim.conv2d(inputs, 96, [1, 1], scope='Conv2d_0a_1x1')
42
      with tf.variable_scope('Branch_1'):
Alex Kurakin's avatar
Alex Kurakin committed
43
44
        branch_1 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1')
        branch_1 = slim.conv2d(branch_1, 96, [3, 3], scope='Conv2d_0b_3x3')
45
      with tf.variable_scope('Branch_2'):
Alex Kurakin's avatar
Alex Kurakin committed
46
47
48
        branch_2 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1')
        branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
        branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3')
49
      with tf.variable_scope('Branch_3'):
Alex Kurakin's avatar
Alex Kurakin committed
50
51
        branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
        branch_3 = slim.conv2d(branch_3, 96, [1, 1], scope='Conv2d_0b_1x1')
52
      return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
Alex Kurakin's avatar
Alex Kurakin committed
53
54
55
56
57
58
59


def block_reduction_a(inputs, scope=None, reuse=None):
  """Builds Reduction-A block for Inception v4 network."""
  # By default use stride=1 and SAME padding
  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
                      stride=1, padding='SAME'):
60
    with tf.variable_scope(
61
        scope, 'BlockReductionA', [inputs], reuse=reuse):
62
      with tf.variable_scope('Branch_0'):
Alex Kurakin's avatar
Alex Kurakin committed
63
64
        branch_0 = slim.conv2d(inputs, 384, [3, 3], stride=2, padding='VALID',
                               scope='Conv2d_1a_3x3')
65
      with tf.variable_scope('Branch_1'):
Alex Kurakin's avatar
Alex Kurakin committed
66
67
68
69
        branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
        branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3')
        branch_1 = slim.conv2d(branch_1, 256, [3, 3], stride=2,
                               padding='VALID', scope='Conv2d_1a_3x3')
70
      with tf.variable_scope('Branch_2'):
Alex Kurakin's avatar
Alex Kurakin committed
71
72
        branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID',
                                   scope='MaxPool_1a_3x3')
73
      return tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
Alex Kurakin's avatar
Alex Kurakin committed
74
75
76
77
78
79
80


def block_inception_b(inputs, scope=None, reuse=None):
  """Builds Inception-B block for Inception v4 network."""
  # By default use stride=1 and SAME padding
  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
                      stride=1, padding='SAME'):
81
    with tf.variable_scope(
82
        scope, 'BlockInceptionB', [inputs], reuse=reuse):
83
      with tf.variable_scope('Branch_0'):
Alex Kurakin's avatar
Alex Kurakin committed
84
        branch_0 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
85
      with tf.variable_scope('Branch_1'):
Alex Kurakin's avatar
Alex Kurakin committed
86
87
88
        branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
        branch_1 = slim.conv2d(branch_1, 224, [1, 7], scope='Conv2d_0b_1x7')
        branch_1 = slim.conv2d(branch_1, 256, [7, 1], scope='Conv2d_0c_7x1')
89
      with tf.variable_scope('Branch_2'):
Alex Kurakin's avatar
Alex Kurakin committed
90
91
92
93
94
        branch_2 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
        branch_2 = slim.conv2d(branch_2, 192, [7, 1], scope='Conv2d_0b_7x1')
        branch_2 = slim.conv2d(branch_2, 224, [1, 7], scope='Conv2d_0c_1x7')
        branch_2 = slim.conv2d(branch_2, 224, [7, 1], scope='Conv2d_0d_7x1')
        branch_2 = slim.conv2d(branch_2, 256, [1, 7], scope='Conv2d_0e_1x7')
95
      with tf.variable_scope('Branch_3'):
Alex Kurakin's avatar
Alex Kurakin committed
96
97
        branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
        branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
98
      return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
Alex Kurakin's avatar
Alex Kurakin committed
99
100
101
102
103
104
105


def block_reduction_b(inputs, scope=None, reuse=None):
  """Builds Reduction-B block for Inception v4 network."""
  # By default use stride=1 and SAME padding
  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
                      stride=1, padding='SAME'):
106
    with tf.variable_scope(
107
        scope, 'BlockReductionB', [inputs], reuse=reuse):
108
      with tf.variable_scope('Branch_0'):
Alex Kurakin's avatar
Alex Kurakin committed
109
110
111
        branch_0 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
        branch_0 = slim.conv2d(branch_0, 192, [3, 3], stride=2,
                               padding='VALID', scope='Conv2d_1a_3x3')
112
      with tf.variable_scope('Branch_1'):
Alex Kurakin's avatar
Alex Kurakin committed
113
114
115
116
117
        branch_1 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1')
        branch_1 = slim.conv2d(branch_1, 256, [1, 7], scope='Conv2d_0b_1x7')
        branch_1 = slim.conv2d(branch_1, 320, [7, 1], scope='Conv2d_0c_7x1')
        branch_1 = slim.conv2d(branch_1, 320, [3, 3], stride=2,
                               padding='VALID', scope='Conv2d_1a_3x3')
118
      with tf.variable_scope('Branch_2'):
Alex Kurakin's avatar
Alex Kurakin committed
119
120
        branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID',
                                   scope='MaxPool_1a_3x3')
121
      return tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
Alex Kurakin's avatar
Alex Kurakin committed
122
123
124
125
126
127
128


def block_inception_c(inputs, scope=None, reuse=None):
  """Builds Inception-C block for Inception v4 network."""
  # By default use stride=1 and SAME padding
  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
                      stride=1, padding='SAME'):
129
    with tf.variable_scope(
130
        scope, 'BlockInceptionC', [inputs], reuse=reuse):
131
      with tf.variable_scope('Branch_0'):
Alex Kurakin's avatar
Alex Kurakin committed
132
        branch_0 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1')
133
      with tf.variable_scope('Branch_1'):
Alex Kurakin's avatar
Alex Kurakin committed
134
        branch_1 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
135
        branch_1 = tf.concat(axis=3, values=[
Alex Kurakin's avatar
Alex Kurakin committed
136
137
            slim.conv2d(branch_1, 256, [1, 3], scope='Conv2d_0b_1x3'),
            slim.conv2d(branch_1, 256, [3, 1], scope='Conv2d_0c_3x1')])
138
      with tf.variable_scope('Branch_2'):
Alex Kurakin's avatar
Alex Kurakin committed
139
140
141
        branch_2 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
        branch_2 = slim.conv2d(branch_2, 448, [3, 1], scope='Conv2d_0b_3x1')
        branch_2 = slim.conv2d(branch_2, 512, [1, 3], scope='Conv2d_0c_1x3')
142
        branch_2 = tf.concat(axis=3, values=[
Alex Kurakin's avatar
Alex Kurakin committed
143
144
            slim.conv2d(branch_2, 256, [1, 3], scope='Conv2d_0d_1x3'),
            slim.conv2d(branch_2, 256, [3, 1], scope='Conv2d_0e_3x1')])
145
      with tf.variable_scope('Branch_3'):
Alex Kurakin's avatar
Alex Kurakin committed
146
147
        branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
        branch_3 = slim.conv2d(branch_3, 256, [1, 1], scope='Conv2d_0b_1x1')
148
      return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
Alex Kurakin's avatar
Alex Kurakin committed
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176


def inception_v4_base(inputs, final_endpoint='Mixed_7d', scope=None):
  """Creates the Inception V4 network up to the given final endpoint.

  Args:
    inputs: a 4-D tensor of size [batch_size, height, width, 3].
    final_endpoint: specifies the endpoint to construct the network up to.
      It can be one of [ 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
      'Mixed_3a', 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
      'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e',
      'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c',
      'Mixed_7d']
    scope: Optional variable_scope.

  Returns:
    logits: the logits outputs of the model.
    end_points: the set of end_points from the inception model.

  Raises:
    ValueError: if final_endpoint is not set to one of the predefined values,
  """
  end_points = {}

  def add_and_check_final(name, net):
    end_points[name] = net
    return name == final_endpoint

177
  with tf.variable_scope(scope, 'InceptionV4', [inputs]):
Alex Kurakin's avatar
Alex Kurakin committed
178
179
180
181
182
183
184
185
186
187
188
189
190
191
    with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                        stride=1, padding='SAME'):
      # 299 x 299 x 3
      net = slim.conv2d(inputs, 32, [3, 3], stride=2,
                        padding='VALID', scope='Conv2d_1a_3x3')
      if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points
      # 149 x 149 x 32
      net = slim.conv2d(net, 32, [3, 3], padding='VALID',
                        scope='Conv2d_2a_3x3')
      if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points
      # 147 x 147 x 32
      net = slim.conv2d(net, 64, [3, 3], scope='Conv2d_2b_3x3')
      if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points
      # 147 x 147 x 64
192
193
      with tf.variable_scope('Mixed_3a'):
        with tf.variable_scope('Branch_0'):
Alex Kurakin's avatar
Alex Kurakin committed
194
195
          branch_0 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
                                     scope='MaxPool_0a_3x3')
196
        with tf.variable_scope('Branch_1'):
Alex Kurakin's avatar
Alex Kurakin committed
197
198
          branch_1 = slim.conv2d(net, 96, [3, 3], stride=2, padding='VALID',
                                 scope='Conv2d_0a_3x3')
199
        net = tf.concat(axis=3, values=[branch_0, branch_1])
Alex Kurakin's avatar
Alex Kurakin committed
200
201
202
        if add_and_check_final('Mixed_3a', net): return net, end_points

      # 73 x 73 x 160
203
204
      with tf.variable_scope('Mixed_4a'):
        with tf.variable_scope('Branch_0'):
Alex Kurakin's avatar
Alex Kurakin committed
205
206
207
          branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
          branch_0 = slim.conv2d(branch_0, 96, [3, 3], padding='VALID',
                                 scope='Conv2d_1a_3x3')
208
        with tf.variable_scope('Branch_1'):
Alex Kurakin's avatar
Alex Kurakin committed
209
210
211
212
213
          branch_1 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, 64, [1, 7], scope='Conv2d_0b_1x7')
          branch_1 = slim.conv2d(branch_1, 64, [7, 1], scope='Conv2d_0c_7x1')
          branch_1 = slim.conv2d(branch_1, 96, [3, 3], padding='VALID',
                                 scope='Conv2d_1a_3x3')
214
        net = tf.concat(axis=3, values=[branch_0, branch_1])
Alex Kurakin's avatar
Alex Kurakin committed
215
216
217
        if add_and_check_final('Mixed_4a', net): return net, end_points

      # 71 x 71 x 192
218
219
      with tf.variable_scope('Mixed_5a'):
        with tf.variable_scope('Branch_0'):
Alex Kurakin's avatar
Alex Kurakin committed
220
221
          branch_0 = slim.conv2d(net, 192, [3, 3], stride=2, padding='VALID',
                                 scope='Conv2d_1a_3x3')
222
        with tf.variable_scope('Branch_1'):
Alex Kurakin's avatar
Alex Kurakin committed
223
224
          branch_1 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
                                     scope='MaxPool_1a_3x3')
225
        net = tf.concat(axis=3, values=[branch_0, branch_1])
Alex Kurakin's avatar
Alex Kurakin committed
226
227
228
229
        if add_and_check_final('Mixed_5a', net): return net, end_points

      # 35 x 35 x 384
      # 4 x Inception-A blocks
Egor-Krivov's avatar
Egor-Krivov committed
230
      for idx in range(4):
Alex Kurakin's avatar
Alex Kurakin committed
231
232
233
234
235
236
237
238
239
240
241
        block_scope = 'Mixed_5' + chr(ord('b') + idx)
        net = block_inception_a(net, block_scope)
        if add_and_check_final(block_scope, net): return net, end_points

      # 35 x 35 x 384
      # Reduction-A block
      net = block_reduction_a(net, 'Mixed_6a')
      if add_and_check_final('Mixed_6a', net): return net, end_points

      # 17 x 17 x 1024
      # 7 x Inception-B blocks
Egor-Krivov's avatar
Egor-Krivov committed
242
      for idx in range(7):
Alex Kurakin's avatar
Alex Kurakin committed
243
244
245
246
247
248
249
250
251
252
253
        block_scope = 'Mixed_6' + chr(ord('b') + idx)
        net = block_inception_b(net, block_scope)
        if add_and_check_final(block_scope, net): return net, end_points

      # 17 x 17 x 1024
      # Reduction-B block
      net = block_reduction_b(net, 'Mixed_7a')
      if add_and_check_final('Mixed_7a', net): return net, end_points

      # 8 x 8 x 1536
      # 3 x Inception-C blocks
Egor-Krivov's avatar
Egor-Krivov committed
254
      for idx in range(3):
Alex Kurakin's avatar
Alex Kurakin committed
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
        block_scope = 'Mixed_7' + chr(ord('b') + idx)
        net = block_inception_c(net, block_scope)
        if add_and_check_final(block_scope, net): return net, end_points
  raise ValueError('Unknown final endpoint %s' % final_endpoint)


def inception_v4(inputs, num_classes=1001, is_training=True,
                 dropout_keep_prob=0.8,
                 reuse=None,
                 scope='InceptionV4',
                 create_aux_logits=True):
  """Creates the Inception V4 model.

  Args:
    inputs: a 4-D tensor of size [batch_size, height, width, 3].
270
271
272
    num_classes: number of predicted classes. If 0 or None, the logits layer
      is omitted and the input features to the logits layer (before dropout)
      are returned instead.
Alex Kurakin's avatar
Alex Kurakin committed
273
274
275
276
277
    is_training: whether is training or not.
    dropout_keep_prob: float, the fraction to keep before final layer.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
james mike dupont's avatar
untie  
james mike dupont committed
278
    create_aux_logits: Whether to include the auxiliary logits.
Alex Kurakin's avatar
Alex Kurakin committed
279
280

  Returns:
281
282
283
    net: a Tensor with the logits (pre-softmax activations) if num_classes
      is a non-zero integer, or the non-dropped input to the logits layer
      if num_classes is 0 or None.
Alex Kurakin's avatar
Alex Kurakin committed
284
285
286
    end_points: the set of end_points from the inception model.
  """
  end_points = {}
287
  with tf.variable_scope(
288
      scope, 'InceptionV4', [inputs], reuse=reuse) as scope:
Alex Kurakin's avatar
Alex Kurakin committed
289
290
291
292
293
294
295
    with slim.arg_scope([slim.batch_norm, slim.dropout],
                        is_training=is_training):
      net, end_points = inception_v4_base(inputs, scope=scope)

      with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                          stride=1, padding='SAME'):
        # Auxiliary Head logits
296
        if create_aux_logits and num_classes:
297
          with tf.variable_scope('AuxLogits'):
Alex Kurakin's avatar
Alex Kurakin committed
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
            # 17 x 17 x 1024
            aux_logits = end_points['Mixed_6h']
            aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3,
                                         padding='VALID',
                                         scope='AvgPool_1a_5x5')
            aux_logits = slim.conv2d(aux_logits, 128, [1, 1],
                                     scope='Conv2d_1b_1x1')
            aux_logits = slim.conv2d(aux_logits, 768,
                                     aux_logits.get_shape()[1:3],
                                     padding='VALID', scope='Conv2d_2a')
            aux_logits = slim.flatten(aux_logits)
            aux_logits = slim.fully_connected(aux_logits, num_classes,
                                              activation_fn=None,
                                              scope='Aux_logits')
            end_points['AuxLogits'] = aux_logits

        # Final pooling and prediction
315
316
        # TODO(sguada,arnoegw): Consider adding a parameter global_pool which
        # can be set to False to disable pooling here (as in resnet_*()).
317
        with tf.variable_scope('Logits'):
Alex Kurakin's avatar
Alex Kurakin committed
318
          # 8 x 8 x 1536
319
320
321
322
323
          kernel_size = net.get_shape()[1:3]
          if kernel_size.is_fully_defined():
            net = slim.avg_pool2d(net, kernel_size, padding='VALID',
                                  scope='AvgPool_1a')
          else:
324
325
326
327
328
            net = tf.reduce_mean(
                input_tensor=net,
                axis=[1, 2],
                keepdims=True,
                name='global_pool')
329
330
331
          end_points['global_pool'] = net
          if not num_classes:
            return net, end_points
Alex Kurakin's avatar
Alex Kurakin committed
332
333
334
335
336
337
338
339
340
341
342
343
344
345
          # 1 x 1 x 1536
          net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b')
          net = slim.flatten(net, scope='PreLogitsFlatten')
          end_points['PreLogitsFlatten'] = net
          # 1536
          logits = slim.fully_connected(net, num_classes, activation_fn=None,
                                        scope='Logits')
          end_points['Logits'] = logits
          end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions')
    return logits, end_points
inception_v4.default_image_size = 299


inception_v4_arg_scope = inception_utils.inception_arg_scope