inception_v4.py 16.5 KB
Newer Older
Alex Kurakin's avatar
Alex Kurakin committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains the definition of the Inception V4 architecture.

As described in http://arxiv.org/abs/1602.07261.

  Inception-v4, Inception-ResNet and the Impact of Residual Connections
    on Learning
  Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
28
from tensorflow.contrib import slim as contrib_slim
Alex Kurakin's avatar
Alex Kurakin committed
29
30
31

from nets import inception_utils

32
slim = contrib_slim
Alex Kurakin's avatar
Alex Kurakin committed
33
34
35
36
37
38
39


def block_inception_a(inputs, scope=None, reuse=None):
  """Builds Inception-A block for Inception v4 network."""
  # By default use stride=1 and SAME padding
  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
                      stride=1, padding='SAME'):
40
41
42
    with tf.compat.v1.variable_scope(
        scope, 'BlockInceptionA', [inputs], reuse=reuse):
      with tf.compat.v1.variable_scope('Branch_0'):
Alex Kurakin's avatar
Alex Kurakin committed
43
        branch_0 = slim.conv2d(inputs, 96, [1, 1], scope='Conv2d_0a_1x1')
44
      with tf.compat.v1.variable_scope('Branch_1'):
Alex Kurakin's avatar
Alex Kurakin committed
45
46
        branch_1 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1')
        branch_1 = slim.conv2d(branch_1, 96, [3, 3], scope='Conv2d_0b_3x3')
47
      with tf.compat.v1.variable_scope('Branch_2'):
Alex Kurakin's avatar
Alex Kurakin committed
48
49
50
        branch_2 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1')
        branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
        branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3')
51
      with tf.compat.v1.variable_scope('Branch_3'):
Alex Kurakin's avatar
Alex Kurakin committed
52
53
        branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
        branch_3 = slim.conv2d(branch_3, 96, [1, 1], scope='Conv2d_0b_1x1')
54
      return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
Alex Kurakin's avatar
Alex Kurakin committed
55
56
57
58
59
60
61


def block_reduction_a(inputs, scope=None, reuse=None):
  """Builds Reduction-A block for Inception v4 network."""
  # By default use stride=1 and SAME padding
  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
                      stride=1, padding='SAME'):
62
63
64
    with tf.compat.v1.variable_scope(
        scope, 'BlockReductionA', [inputs], reuse=reuse):
      with tf.compat.v1.variable_scope('Branch_0'):
Alex Kurakin's avatar
Alex Kurakin committed
65
66
        branch_0 = slim.conv2d(inputs, 384, [3, 3], stride=2, padding='VALID',
                               scope='Conv2d_1a_3x3')
67
      with tf.compat.v1.variable_scope('Branch_1'):
Alex Kurakin's avatar
Alex Kurakin committed
68
69
70
71
        branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
        branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3')
        branch_1 = slim.conv2d(branch_1, 256, [3, 3], stride=2,
                               padding='VALID', scope='Conv2d_1a_3x3')
72
      with tf.compat.v1.variable_scope('Branch_2'):
Alex Kurakin's avatar
Alex Kurakin committed
73
74
        branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID',
                                   scope='MaxPool_1a_3x3')
75
      return tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
Alex Kurakin's avatar
Alex Kurakin committed
76
77
78
79
80
81
82


def block_inception_b(inputs, scope=None, reuse=None):
  """Builds Inception-B block for Inception v4 network."""
  # By default use stride=1 and SAME padding
  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
                      stride=1, padding='SAME'):
83
84
85
    with tf.compat.v1.variable_scope(
        scope, 'BlockInceptionB', [inputs], reuse=reuse):
      with tf.compat.v1.variable_scope('Branch_0'):
Alex Kurakin's avatar
Alex Kurakin committed
86
        branch_0 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
87
      with tf.compat.v1.variable_scope('Branch_1'):
Alex Kurakin's avatar
Alex Kurakin committed
88
89
90
        branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
        branch_1 = slim.conv2d(branch_1, 224, [1, 7], scope='Conv2d_0b_1x7')
        branch_1 = slim.conv2d(branch_1, 256, [7, 1], scope='Conv2d_0c_7x1')
91
      with tf.compat.v1.variable_scope('Branch_2'):
Alex Kurakin's avatar
Alex Kurakin committed
92
93
94
95
96
        branch_2 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
        branch_2 = slim.conv2d(branch_2, 192, [7, 1], scope='Conv2d_0b_7x1')
        branch_2 = slim.conv2d(branch_2, 224, [1, 7], scope='Conv2d_0c_1x7')
        branch_2 = slim.conv2d(branch_2, 224, [7, 1], scope='Conv2d_0d_7x1')
        branch_2 = slim.conv2d(branch_2, 256, [1, 7], scope='Conv2d_0e_1x7')
97
      with tf.compat.v1.variable_scope('Branch_3'):
Alex Kurakin's avatar
Alex Kurakin committed
98
99
        branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
        branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
100
      return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
Alex Kurakin's avatar
Alex Kurakin committed
101
102
103
104
105
106
107


def block_reduction_b(inputs, scope=None, reuse=None):
  """Builds Reduction-B block for Inception v4 network."""
  # By default use stride=1 and SAME padding
  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
                      stride=1, padding='SAME'):
108
109
110
    with tf.compat.v1.variable_scope(
        scope, 'BlockReductionB', [inputs], reuse=reuse):
      with tf.compat.v1.variable_scope('Branch_0'):
Alex Kurakin's avatar
Alex Kurakin committed
111
112
113
        branch_0 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
        branch_0 = slim.conv2d(branch_0, 192, [3, 3], stride=2,
                               padding='VALID', scope='Conv2d_1a_3x3')
114
      with tf.compat.v1.variable_scope('Branch_1'):
Alex Kurakin's avatar
Alex Kurakin committed
115
116
117
118
119
        branch_1 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1')
        branch_1 = slim.conv2d(branch_1, 256, [1, 7], scope='Conv2d_0b_1x7')
        branch_1 = slim.conv2d(branch_1, 320, [7, 1], scope='Conv2d_0c_7x1')
        branch_1 = slim.conv2d(branch_1, 320, [3, 3], stride=2,
                               padding='VALID', scope='Conv2d_1a_3x3')
120
      with tf.compat.v1.variable_scope('Branch_2'):
Alex Kurakin's avatar
Alex Kurakin committed
121
122
        branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID',
                                   scope='MaxPool_1a_3x3')
123
      return tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
Alex Kurakin's avatar
Alex Kurakin committed
124
125
126
127
128
129
130


def block_inception_c(inputs, scope=None, reuse=None):
  """Builds Inception-C block for Inception v4 network."""
  # By default use stride=1 and SAME padding
  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
                      stride=1, padding='SAME'):
131
132
133
    with tf.compat.v1.variable_scope(
        scope, 'BlockInceptionC', [inputs], reuse=reuse):
      with tf.compat.v1.variable_scope('Branch_0'):
Alex Kurakin's avatar
Alex Kurakin committed
134
        branch_0 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1')
135
      with tf.compat.v1.variable_scope('Branch_1'):
Alex Kurakin's avatar
Alex Kurakin committed
136
        branch_1 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
137
        branch_1 = tf.concat(axis=3, values=[
Alex Kurakin's avatar
Alex Kurakin committed
138
139
            slim.conv2d(branch_1, 256, [1, 3], scope='Conv2d_0b_1x3'),
            slim.conv2d(branch_1, 256, [3, 1], scope='Conv2d_0c_3x1')])
140
      with tf.compat.v1.variable_scope('Branch_2'):
Alex Kurakin's avatar
Alex Kurakin committed
141
142
143
        branch_2 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
        branch_2 = slim.conv2d(branch_2, 448, [3, 1], scope='Conv2d_0b_3x1')
        branch_2 = slim.conv2d(branch_2, 512, [1, 3], scope='Conv2d_0c_1x3')
144
        branch_2 = tf.concat(axis=3, values=[
Alex Kurakin's avatar
Alex Kurakin committed
145
146
            slim.conv2d(branch_2, 256, [1, 3], scope='Conv2d_0d_1x3'),
            slim.conv2d(branch_2, 256, [3, 1], scope='Conv2d_0e_3x1')])
147
      with tf.compat.v1.variable_scope('Branch_3'):
Alex Kurakin's avatar
Alex Kurakin committed
148
149
        branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
        branch_3 = slim.conv2d(branch_3, 256, [1, 1], scope='Conv2d_0b_1x1')
150
      return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
Alex Kurakin's avatar
Alex Kurakin committed
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178


def inception_v4_base(inputs, final_endpoint='Mixed_7d', scope=None):
  """Creates the Inception V4 network up to the given final endpoint.

  Args:
    inputs: a 4-D tensor of size [batch_size, height, width, 3].
    final_endpoint: specifies the endpoint to construct the network up to.
      It can be one of [ 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
      'Mixed_3a', 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
      'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e',
      'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c',
      'Mixed_7d']
    scope: Optional variable_scope.

  Returns:
    logits: the logits outputs of the model.
    end_points: the set of end_points from the inception model.

  Raises:
    ValueError: if final_endpoint is not set to one of the predefined values,
  """
  end_points = {}

  def add_and_check_final(name, net):
    end_points[name] = net
    return name == final_endpoint

179
  with tf.compat.v1.variable_scope(scope, 'InceptionV4', [inputs]):
Alex Kurakin's avatar
Alex Kurakin committed
180
181
182
183
184
185
186
187
188
189
190
191
192
193
    with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                        stride=1, padding='SAME'):
      # 299 x 299 x 3
      net = slim.conv2d(inputs, 32, [3, 3], stride=2,
                        padding='VALID', scope='Conv2d_1a_3x3')
      if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points
      # 149 x 149 x 32
      net = slim.conv2d(net, 32, [3, 3], padding='VALID',
                        scope='Conv2d_2a_3x3')
      if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points
      # 147 x 147 x 32
      net = slim.conv2d(net, 64, [3, 3], scope='Conv2d_2b_3x3')
      if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points
      # 147 x 147 x 64
194
195
      with tf.compat.v1.variable_scope('Mixed_3a'):
        with tf.compat.v1.variable_scope('Branch_0'):
Alex Kurakin's avatar
Alex Kurakin committed
196
197
          branch_0 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
                                     scope='MaxPool_0a_3x3')
198
        with tf.compat.v1.variable_scope('Branch_1'):
Alex Kurakin's avatar
Alex Kurakin committed
199
200
          branch_1 = slim.conv2d(net, 96, [3, 3], stride=2, padding='VALID',
                                 scope='Conv2d_0a_3x3')
201
        net = tf.concat(axis=3, values=[branch_0, branch_1])
Alex Kurakin's avatar
Alex Kurakin committed
202
203
204
        if add_and_check_final('Mixed_3a', net): return net, end_points

      # 73 x 73 x 160
205
206
      with tf.compat.v1.variable_scope('Mixed_4a'):
        with tf.compat.v1.variable_scope('Branch_0'):
Alex Kurakin's avatar
Alex Kurakin committed
207
208
209
          branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
          branch_0 = slim.conv2d(branch_0, 96, [3, 3], padding='VALID',
                                 scope='Conv2d_1a_3x3')
210
        with tf.compat.v1.variable_scope('Branch_1'):
Alex Kurakin's avatar
Alex Kurakin committed
211
212
213
214
215
          branch_1 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, 64, [1, 7], scope='Conv2d_0b_1x7')
          branch_1 = slim.conv2d(branch_1, 64, [7, 1], scope='Conv2d_0c_7x1')
          branch_1 = slim.conv2d(branch_1, 96, [3, 3], padding='VALID',
                                 scope='Conv2d_1a_3x3')
216
        net = tf.concat(axis=3, values=[branch_0, branch_1])
Alex Kurakin's avatar
Alex Kurakin committed
217
218
219
        if add_and_check_final('Mixed_4a', net): return net, end_points

      # 71 x 71 x 192
220
221
      with tf.compat.v1.variable_scope('Mixed_5a'):
        with tf.compat.v1.variable_scope('Branch_0'):
Alex Kurakin's avatar
Alex Kurakin committed
222
223
          branch_0 = slim.conv2d(net, 192, [3, 3], stride=2, padding='VALID',
                                 scope='Conv2d_1a_3x3')
224
        with tf.compat.v1.variable_scope('Branch_1'):
Alex Kurakin's avatar
Alex Kurakin committed
225
226
          branch_1 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
                                     scope='MaxPool_1a_3x3')
227
        net = tf.concat(axis=3, values=[branch_0, branch_1])
Alex Kurakin's avatar
Alex Kurakin committed
228
229
230
231
        if add_and_check_final('Mixed_5a', net): return net, end_points

      # 35 x 35 x 384
      # 4 x Inception-A blocks
Egor-Krivov's avatar
Egor-Krivov committed
232
      for idx in range(4):
Alex Kurakin's avatar
Alex Kurakin committed
233
234
235
236
237
238
239
240
241
242
243
        block_scope = 'Mixed_5' + chr(ord('b') + idx)
        net = block_inception_a(net, block_scope)
        if add_and_check_final(block_scope, net): return net, end_points

      # 35 x 35 x 384
      # Reduction-A block
      net = block_reduction_a(net, 'Mixed_6a')
      if add_and_check_final('Mixed_6a', net): return net, end_points

      # 17 x 17 x 1024
      # 7 x Inception-B blocks
Egor-Krivov's avatar
Egor-Krivov committed
244
      for idx in range(7):
Alex Kurakin's avatar
Alex Kurakin committed
245
246
247
248
249
250
251
252
253
254
255
        block_scope = 'Mixed_6' + chr(ord('b') + idx)
        net = block_inception_b(net, block_scope)
        if add_and_check_final(block_scope, net): return net, end_points

      # 17 x 17 x 1024
      # Reduction-B block
      net = block_reduction_b(net, 'Mixed_7a')
      if add_and_check_final('Mixed_7a', net): return net, end_points

      # 8 x 8 x 1536
      # 3 x Inception-C blocks
Egor-Krivov's avatar
Egor-Krivov committed
256
      for idx in range(3):
Alex Kurakin's avatar
Alex Kurakin committed
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
        block_scope = 'Mixed_7' + chr(ord('b') + idx)
        net = block_inception_c(net, block_scope)
        if add_and_check_final(block_scope, net): return net, end_points
  raise ValueError('Unknown final endpoint %s' % final_endpoint)


def inception_v4(inputs, num_classes=1001, is_training=True,
                 dropout_keep_prob=0.8,
                 reuse=None,
                 scope='InceptionV4',
                 create_aux_logits=True):
  """Creates the Inception V4 model.

  Args:
    inputs: a 4-D tensor of size [batch_size, height, width, 3].
272
273
274
    num_classes: number of predicted classes. If 0 or None, the logits layer
      is omitted and the input features to the logits layer (before dropout)
      are returned instead.
Alex Kurakin's avatar
Alex Kurakin committed
275
276
277
278
279
    is_training: whether is training or not.
    dropout_keep_prob: float, the fraction to keep before final layer.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
james mike dupont's avatar
untie  
james mike dupont committed
280
    create_aux_logits: Whether to include the auxiliary logits.
Alex Kurakin's avatar
Alex Kurakin committed
281
282

  Returns:
283
284
285
    net: a Tensor with the logits (pre-softmax activations) if num_classes
      is a non-zero integer, or the non-dropped input to the logits layer
      if num_classes is 0 or None.
Alex Kurakin's avatar
Alex Kurakin committed
286
287
288
    end_points: the set of end_points from the inception model.
  """
  end_points = {}
289
290
  with tf.compat.v1.variable_scope(
      scope, 'InceptionV4', [inputs], reuse=reuse) as scope:
Alex Kurakin's avatar
Alex Kurakin committed
291
292
293
294
295
296
297
    with slim.arg_scope([slim.batch_norm, slim.dropout],
                        is_training=is_training):
      net, end_points = inception_v4_base(inputs, scope=scope)

      with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                          stride=1, padding='SAME'):
        # Auxiliary Head logits
298
        if create_aux_logits and num_classes:
299
          with tf.compat.v1.variable_scope('AuxLogits'):
Alex Kurakin's avatar
Alex Kurakin committed
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
            # 17 x 17 x 1024
            aux_logits = end_points['Mixed_6h']
            aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3,
                                         padding='VALID',
                                         scope='AvgPool_1a_5x5')
            aux_logits = slim.conv2d(aux_logits, 128, [1, 1],
                                     scope='Conv2d_1b_1x1')
            aux_logits = slim.conv2d(aux_logits, 768,
                                     aux_logits.get_shape()[1:3],
                                     padding='VALID', scope='Conv2d_2a')
            aux_logits = slim.flatten(aux_logits)
            aux_logits = slim.fully_connected(aux_logits, num_classes,
                                              activation_fn=None,
                                              scope='Aux_logits')
            end_points['AuxLogits'] = aux_logits

        # Final pooling and prediction
317
318
        # TODO(sguada,arnoegw): Consider adding a parameter global_pool which
        # can be set to False to disable pooling here (as in resnet_*()).
319
        with tf.compat.v1.variable_scope('Logits'):
Alex Kurakin's avatar
Alex Kurakin committed
320
          # 8 x 8 x 1536
321
322
323
324
325
          kernel_size = net.get_shape()[1:3]
          if kernel_size.is_fully_defined():
            net = slim.avg_pool2d(net, kernel_size, padding='VALID',
                                  scope='AvgPool_1a')
          else:
326
327
328
329
330
            net = tf.reduce_mean(
                input_tensor=net,
                axis=[1, 2],
                keepdims=True,
                name='global_pool')
331
332
333
          end_points['global_pool'] = net
          if not num_classes:
            return net, end_points
Alex Kurakin's avatar
Alex Kurakin committed
334
335
336
337
338
339
340
341
342
343
344
345
346
347
          # 1 x 1 x 1536
          net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b')
          net = slim.flatten(net, scope='PreLogitsFlatten')
          end_points['PreLogitsFlatten'] = net
          # 1536
          logits = slim.fully_connected(net, num_classes, activation_fn=None,
                                        scope='Logits')
          end_points['Logits'] = logits
          end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions')
    return logits, end_points
inception_v4.default_image_size = 299


inception_v4_arg_scope = inception_utils.inception_arg_scope