inception_v1.py 17 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains the definition for inception v1 classification network."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
22
from tensorflow.contrib import slim as contrib_slim
23

Alex Kurakin's avatar
Alex Kurakin committed
24
25
from nets import inception_utils

26
slim = contrib_slim
27
28
29
30

# pylint: disable=g-long-lambda
trunc_normal = lambda stddev: tf.compat.v1.truncated_normal_initializer(
    0.0, stddev)
31
32
33
34


def inception_v1_base(inputs,
                      final_endpoint='Mixed_5c',
35
                      include_root_block=True,
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
                      scope='InceptionV1'):
  """Defines the Inception V1 base architecture.

  This architecture is defined in:
    Going deeper with convolutions
    Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
    Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.
    http://arxiv.org/pdf/1409.4842v1.pdf.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    final_endpoint: specifies the endpoint to construct the network up to. It
      can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
      'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
      'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e',
51
52
53
54
55
      'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c']. If
      include_root_block is False, ['Conv2d_1a_7x7', 'MaxPool_2a_3x3',
      'Conv2d_2b_1x1', 'Conv2d_2c_3x3', 'MaxPool_3a_3x3'] will not be available.
    include_root_block: If True, include the convolution and max-pooling layers
      before the inception modules. If False, excludes those layers.
56
57
58
59
60
61
62
63
64
    scope: Optional variable_scope.

  Returns:
    A dictionary from components of the network to the corresponding activation.

  Raises:
    ValueError: if final_endpoint is not set to one of the predefined values.
  """
  end_points = {}
65
  with tf.compat.v1.variable_scope(scope, 'InceptionV1', [inputs]):
66
67
68
69
70
    with slim.arg_scope(
        [slim.conv2d, slim.fully_connected],
        weights_initializer=trunc_normal(0.01)):
      with slim.arg_scope([slim.conv2d, slim.max_pool2d],
                          stride=1, padding='SAME'):
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
        net = inputs
        if include_root_block:
          end_point = 'Conv2d_1a_7x7'
          net = slim.conv2d(inputs, 64, [7, 7], stride=2, scope=end_point)
          end_points[end_point] = net
          if final_endpoint == end_point:
            return net, end_points
          end_point = 'MaxPool_2a_3x3'
          net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
          end_points[end_point] = net
          if final_endpoint == end_point:
            return net, end_points
          end_point = 'Conv2d_2b_1x1'
          net = slim.conv2d(net, 64, [1, 1], scope=end_point)
          end_points[end_point] = net
          if final_endpoint == end_point:
            return net, end_points
          end_point = 'Conv2d_2c_3x3'
          net = slim.conv2d(net, 192, [3, 3], scope=end_point)
          end_points[end_point] = net
          if final_endpoint == end_point:
            return net, end_points
          end_point = 'MaxPool_3a_3x3'
          net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
          end_points[end_point] = net
          if final_endpoint == end_point:
            return net, end_points
98
99

        end_point = 'Mixed_3b'
100
101
        with tf.compat.v1.variable_scope(end_point):
          with tf.compat.v1.variable_scope('Branch_0'):
102
            branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
103
          with tf.compat.v1.variable_scope('Branch_1'):
104
105
            branch_1 = slim.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1')
            branch_1 = slim.conv2d(branch_1, 128, [3, 3], scope='Conv2d_0b_3x3')
106
          with tf.compat.v1.variable_scope('Branch_2'):
107
108
            branch_2 = slim.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1')
            branch_2 = slim.conv2d(branch_2, 32, [3, 3], scope='Conv2d_0b_3x3')
109
          with tf.compat.v1.variable_scope('Branch_3'):
110
111
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 32, [1, 1], scope='Conv2d_0b_1x1')
Derek Chow's avatar
Derek Chow committed
112
113
          net = tf.concat(
              axis=3, values=[branch_0, branch_1, branch_2, branch_3])
114
115
116
117
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'Mixed_3c'
118
119
        with tf.compat.v1.variable_scope(end_point):
          with tf.compat.v1.variable_scope('Branch_0'):
120
            branch_0 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
121
          with tf.compat.v1.variable_scope('Branch_1'):
122
123
            branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
            branch_1 = slim.conv2d(branch_1, 192, [3, 3], scope='Conv2d_0b_3x3')
124
          with tf.compat.v1.variable_scope('Branch_2'):
125
126
            branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
            branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
127
          with tf.compat.v1.variable_scope('Branch_3'):
128
129
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
Derek Chow's avatar
Derek Chow committed
130
131
          net = tf.concat(
              axis=3, values=[branch_0, branch_1, branch_2, branch_3])
132
133
134
135
136
137
138
139
140
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'MaxPool_4a_3x3'
        net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'Mixed_4b'
141
142
        with tf.compat.v1.variable_scope(end_point):
          with tf.compat.v1.variable_scope('Branch_0'):
143
            branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
144
          with tf.compat.v1.variable_scope('Branch_1'):
145
146
            branch_1 = slim.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1')
            branch_1 = slim.conv2d(branch_1, 208, [3, 3], scope='Conv2d_0b_3x3')
147
          with tf.compat.v1.variable_scope('Branch_2'):
148
149
            branch_2 = slim.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1')
            branch_2 = slim.conv2d(branch_2, 48, [3, 3], scope='Conv2d_0b_3x3')
150
          with tf.compat.v1.variable_scope('Branch_3'):
151
152
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
Derek Chow's avatar
Derek Chow committed
153
154
          net = tf.concat(
              axis=3, values=[branch_0, branch_1, branch_2, branch_3])
155
156
157
158
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'Mixed_4c'
159
160
        with tf.compat.v1.variable_scope(end_point):
          with tf.compat.v1.variable_scope('Branch_0'):
161
            branch_0 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
162
          with tf.compat.v1.variable_scope('Branch_1'):
163
164
            branch_1 = slim.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1')
            branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3')
165
          with tf.compat.v1.variable_scope('Branch_2'):
166
167
            branch_2 = slim.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1')
            branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')
168
          with tf.compat.v1.variable_scope('Branch_3'):
169
170
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
Derek Chow's avatar
Derek Chow committed
171
172
          net = tf.concat(
              axis=3, values=[branch_0, branch_1, branch_2, branch_3])
173
174
175
176
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'Mixed_4d'
177
178
        with tf.compat.v1.variable_scope(end_point):
          with tf.compat.v1.variable_scope('Branch_0'):
179
            branch_0 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
180
          with tf.compat.v1.variable_scope('Branch_1'):
181
182
            branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
            branch_1 = slim.conv2d(branch_1, 256, [3, 3], scope='Conv2d_0b_3x3')
183
          with tf.compat.v1.variable_scope('Branch_2'):
184
185
            branch_2 = slim.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1')
            branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')
186
          with tf.compat.v1.variable_scope('Branch_3'):
187
188
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
Derek Chow's avatar
Derek Chow committed
189
190
          net = tf.concat(
              axis=3, values=[branch_0, branch_1, branch_2, branch_3])
191
192
193
194
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'Mixed_4e'
195
196
        with tf.compat.v1.variable_scope(end_point):
          with tf.compat.v1.variable_scope('Branch_0'):
197
            branch_0 = slim.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1')
198
          with tf.compat.v1.variable_scope('Branch_1'):
199
200
            branch_1 = slim.conv2d(net, 144, [1, 1], scope='Conv2d_0a_1x1')
            branch_1 = slim.conv2d(branch_1, 288, [3, 3], scope='Conv2d_0b_3x3')
201
          with tf.compat.v1.variable_scope('Branch_2'):
202
203
            branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
            branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')
204
          with tf.compat.v1.variable_scope('Branch_3'):
205
206
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
Derek Chow's avatar
Derek Chow committed
207
208
          net = tf.concat(
              axis=3, values=[branch_0, branch_1, branch_2, branch_3])
209
210
211
212
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'Mixed_4f'
213
214
        with tf.compat.v1.variable_scope(end_point):
          with tf.compat.v1.variable_scope('Branch_0'):
215
            branch_0 = slim.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1')
216
          with tf.compat.v1.variable_scope('Branch_1'):
217
218
            branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
            branch_1 = slim.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3')
219
          with tf.compat.v1.variable_scope('Branch_2'):
220
221
            branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
            branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3')
222
          with tf.compat.v1.variable_scope('Branch_3'):
223
224
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
Derek Chow's avatar
Derek Chow committed
225
226
          net = tf.concat(
              axis=3, values=[branch_0, branch_1, branch_2, branch_3])
227
228
229
230
231
232
233
234
235
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'MaxPool_5a_2x2'
        net = slim.max_pool2d(net, [2, 2], stride=2, scope=end_point)
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'Mixed_5b'
236
237
        with tf.compat.v1.variable_scope(end_point):
          with tf.compat.v1.variable_scope('Branch_0'):
238
            branch_0 = slim.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1')
239
          with tf.compat.v1.variable_scope('Branch_1'):
240
241
            branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
            branch_1 = slim.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3')
242
          with tf.compat.v1.variable_scope('Branch_2'):
243
244
            branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
            branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0a_3x3')
245
          with tf.compat.v1.variable_scope('Branch_3'):
246
247
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
Derek Chow's avatar
Derek Chow committed
248
249
          net = tf.concat(
              axis=3, values=[branch_0, branch_1, branch_2, branch_3])
250
251
252
253
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'Mixed_5c'
254
255
        with tf.compat.v1.variable_scope(end_point):
          with tf.compat.v1.variable_scope('Branch_0'):
256
            branch_0 = slim.conv2d(net, 384, [1, 1], scope='Conv2d_0a_1x1')
257
          with tf.compat.v1.variable_scope('Branch_1'):
258
259
            branch_1 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
            branch_1 = slim.conv2d(branch_1, 384, [3, 3], scope='Conv2d_0b_3x3')
260
          with tf.compat.v1.variable_scope('Branch_2'):
261
262
            branch_2 = slim.conv2d(net, 48, [1, 1], scope='Conv2d_0a_1x1')
            branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3')
263
          with tf.compat.v1.variable_scope('Branch_3'):
264
265
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
Derek Chow's avatar
Derek Chow committed
266
267
          net = tf.concat(
              axis=3, values=[branch_0, branch_1, branch_2, branch_3])
268
269
270
271
272
273
274
275
276
277
278
279
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points
    raise ValueError('Unknown final endpoint %s' % final_endpoint)


def inception_v1(inputs,
                 num_classes=1000,
                 is_training=True,
                 dropout_keep_prob=0.8,
                 prediction_fn=slim.softmax,
                 spatial_squeeze=True,
                 reuse=None,
280
281
                 scope='InceptionV1',
                 global_pool=False):
282
283
284
285
286
287
288
289
290
291
292
293
294
  """Defines the Inception V1 architecture.

  This architecture is defined in:

    Going deeper with convolutions
    Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
    Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.
    http://arxiv.org/pdf/1409.4842v1.pdf.

  The default image size used to train this network is 224x224.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
295
296
297
    num_classes: number of predicted classes. If 0 or None, the logits layer
      is omitted and the input features to the logits layer (before dropout)
      are returned instead.
298
299
300
    is_training: whether is training or not.
    dropout_keep_prob: the percentage of activation values that are retained.
    prediction_fn: a function to get predictions out of logits.
Derek Chow's avatar
Derek Chow committed
301
302
    spatial_squeeze: if True, logits is of shape [B, C], if false logits is of
        shape [B, 1, 1, C], where B is batch_size and C is number of classes.
303
304
305
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
306
307
308
309
    global_pool: Optional boolean flag to control the avgpooling before the
      logits layer. If false or unset, pooling is done with a fixed window
      that reduces default-sized inputs to 1x1, while larger inputs lead to
      larger outputs. If true, any input size is pooled down to 1x1.
310
311

  Returns:
312
313
314
    net: a Tensor with the logits (pre-softmax activations) if num_classes
      is a non-zero integer, or the non-dropped-out input to the logits layer
      if num_classes is 0 or None.
315
316
317
318
    end_points: a dictionary from components of the network to the corresponding
      activation.
  """
  # Final pooling and prediction
319
320
  with tf.compat.v1.variable_scope(
      scope, 'InceptionV1', [inputs], reuse=reuse) as scope:
321
322
323
    with slim.arg_scope([slim.batch_norm, slim.dropout],
                        is_training=is_training):
      net, end_points = inception_v1_base(inputs, scope=scope)
324
      with tf.compat.v1.variable_scope('Logits'):
325
326
        if global_pool:
          # Global average pooling.
327
328
          net = tf.reduce_mean(
              input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool')
329
330
331
332
333
334
335
336
          end_points['global_pool'] = net
        else:
          # Pooling with a fixed kernel size.
          net = slim.avg_pool2d(net, [7, 7], stride=1, scope='AvgPool_0a_7x7')
          end_points['AvgPool_0a_7x7'] = net
        if not num_classes:
          return net, end_points
        net = slim.dropout(net, dropout_keep_prob, scope='Dropout_0b')
337
338
339
340
341
342
343
344
345
346
        logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                             normalizer_fn=None, scope='Conv2d_0c_1x1')
        if spatial_squeeze:
          logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')

        end_points['Logits'] = logits
        end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
  return logits, end_points
inception_v1.default_image_size = 224

Alex Kurakin's avatar
Alex Kurakin committed
347
inception_v1_arg_scope = inception_utils.inception_arg_scope