resnet_model.py 12.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""ResNet50 model for Keras.

Adapted from tf.keras.applications.resnet50.ResNet50().
Shining Sun's avatar
Shining Sun committed
18
This is ResNet model version 1.5.
19
20
21
22
23
24
25
26
27
28
29

Related papers/blogs:
- https://arxiv.org/abs/1512.03385
- https://arxiv.org/pdf/1603.05027v2.pdf
- http://torch.ch/blog/2016/02/04/resnets.html

"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

30
31
import tensorflow as tf

32
from tensorflow.python.keras import backend
Zongwei Zhou's avatar
Zongwei Zhou committed
33
from tensorflow.python.keras import initializers
34
from tensorflow.python.keras import layers as tf_python_keras_layers
35
36
from tensorflow.python.keras import models
from tensorflow.python.keras import regularizers
Hongkun Yu's avatar
Hongkun Yu committed
37
from official.vision.image_classification import imagenet_preprocessing
38
39
40
41
42

L2_WEIGHT_DECAY = 1e-4
BATCH_NORM_DECAY = 0.9
BATCH_NORM_EPSILON = 1e-5

43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
layers = tf_python_keras_layers


def change_keras_layer(use_tf_keras_layers=False):
  """Change layers to either tf.keras.layers or tf.python.keras.layers.

  Layer version of  tf.keras.layers is depends on tensorflow version, but
  tf.python.keras.layers checks environment variable TF2_BEHAVIOR.
  This function is a temporal function to use tf.keras.layers.
  Currently, tf v2 batchnorm layer is slower than tf v1 batchnorm layer.
  this function is useful for tracking benchmark result for each version.
  This function will be removed when we use tf.keras.layers as default.

  TODO(b/146939027): Remove this function when tf v2 batchnorm reaches training
  speed parity with tf v1 batchnorm.

  Args:
      use_tf_keras_layers: whether to use tf.keras.layers.
  """
  global layers
  if use_tf_keras_layers:
    layers = tf.keras.layers
  else:
    layers = tf_python_keras_layers

68

Zongwei Zhou's avatar
Zongwei Zhou committed
69
70
71
72
73
74
75
76
77
78
def _gen_l2_regularizer(use_l2_regularizer=True):
  return regularizers.l2(L2_WEIGHT_DECAY) if use_l2_regularizer else None


def identity_block(input_tensor,
                   kernel_size,
                   filters,
                   stage,
                   block,
                   use_l2_regularizer=True):
79
80
  """The identity block is the block that has no conv layer at shortcut.

81
82
83
84
85
86
  Args:
    input_tensor: input tensor
    kernel_size: default 3, the kernel size of middle conv layer at main path
    filters: list of integers, the filters of 3 conv layer at main path
    stage: integer, current stage label, used for generating layer names
    block: 'a','b'..., current block label, used for generating layer names
Zongwei Zhou's avatar
Zongwei Zhou committed
87
    use_l2_regularizer: whether to use L2 regularizer on Conv layer.
88

89
90
  Returns:
    Output tensor for the block.
91
92
93
94
95
96
97
98
99
  """
  filters1, filters2, filters3 = filters
  if backend.image_data_format() == 'channels_last':
    bn_axis = 3
  else:
    bn_axis = 1
  conv_name_base = 'res' + str(stage) + block + '_branch'
  bn_name_base = 'bn' + str(stage) + block + '_branch'

Zongwei Zhou's avatar
Zongwei Zhou committed
100
101
102
103
104
105
106
107
108
109
110
111
112
  x = layers.Conv2D(
      filters1, (1, 1),
      use_bias=False,
      kernel_initializer='he_normal',
      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
      name=conv_name_base + '2a')(
          input_tensor)
  x = layers.BatchNormalization(
      axis=bn_axis,
      momentum=BATCH_NORM_DECAY,
      epsilon=BATCH_NORM_EPSILON,
      name=bn_name_base + '2a')(
          x)
113
114
  x = layers.Activation('relu')(x)

Zongwei Zhou's avatar
Zongwei Zhou committed
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
  x = layers.Conv2D(
      filters2,
      kernel_size,
      padding='same',
      use_bias=False,
      kernel_initializer='he_normal',
      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
      name=conv_name_base + '2b')(
          x)
  x = layers.BatchNormalization(
      axis=bn_axis,
      momentum=BATCH_NORM_DECAY,
      epsilon=BATCH_NORM_EPSILON,
      name=bn_name_base + '2b')(
          x)
130
131
  x = layers.Activation('relu')(x)

Zongwei Zhou's avatar
Zongwei Zhou committed
132
133
134
135
136
137
138
139
140
141
142
143
144
  x = layers.Conv2D(
      filters3, (1, 1),
      use_bias=False,
      kernel_initializer='he_normal',
      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
      name=conv_name_base + '2c')(
          x)
  x = layers.BatchNormalization(
      axis=bn_axis,
      momentum=BATCH_NORM_DECAY,
      epsilon=BATCH_NORM_EPSILON,
      name=bn_name_base + '2c')(
          x)
145
146
147
148
149
150
151
152
153
154
155

  x = layers.add([x, input_tensor])
  x = layers.Activation('relu')(x)
  return x


def conv_block(input_tensor,
               kernel_size,
               filters,
               stage,
               block,
Zongwei Zhou's avatar
Zongwei Zhou committed
156
157
               strides=(2, 2),
               use_l2_regularizer=True):
158
159
160
161
162
  """A block that has a conv layer at shortcut.

  Note that from stage 3,
  the second conv layer at main path is with strides=(2, 2)
  And the shortcut should have strides=(2, 2) as well
163
164
165
166
167
168
169
170

  Args:
    input_tensor: input tensor
    kernel_size: default 3, the kernel size of middle conv layer at main path
    filters: list of integers, the filters of 3 conv layer at main path
    stage: integer, current stage label, used for generating layer names
    block: 'a','b'..., current block label, used for generating layer names
    strides: Strides for the second conv layer in the block.
Zongwei Zhou's avatar
Zongwei Zhou committed
171
    use_l2_regularizer: whether to use L2 regularizer on Conv layer.
172
173
174

  Returns:
    Output tensor for the block.
175
176
177
178
179
180
181
182
183
  """
  filters1, filters2, filters3 = filters
  if backend.image_data_format() == 'channels_last':
    bn_axis = 3
  else:
    bn_axis = 1
  conv_name_base = 'res' + str(stage) + block + '_branch'
  bn_name_base = 'bn' + str(stage) + block + '_branch'

Zongwei Zhou's avatar
Zongwei Zhou committed
184
185
186
187
188
189
190
191
192
193
194
195
196
  x = layers.Conv2D(
      filters1, (1, 1),
      use_bias=False,
      kernel_initializer='he_normal',
      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
      name=conv_name_base + '2a')(
          input_tensor)
  x = layers.BatchNormalization(
      axis=bn_axis,
      momentum=BATCH_NORM_DECAY,
      epsilon=BATCH_NORM_EPSILON,
      name=bn_name_base + '2a')(
          x)
197
198
  x = layers.Activation('relu')(x)

Zongwei Zhou's avatar
Zongwei Zhou committed
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
  x = layers.Conv2D(
      filters2,
      kernel_size,
      strides=strides,
      padding='same',
      use_bias=False,
      kernel_initializer='he_normal',
      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
      name=conv_name_base + '2b')(
          x)
  x = layers.BatchNormalization(
      axis=bn_axis,
      momentum=BATCH_NORM_DECAY,
      epsilon=BATCH_NORM_EPSILON,
      name=bn_name_base + '2b')(
          x)
215
216
  x = layers.Activation('relu')(x)

Zongwei Zhou's avatar
Zongwei Zhou committed
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
  x = layers.Conv2D(
      filters3, (1, 1),
      use_bias=False,
      kernel_initializer='he_normal',
      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
      name=conv_name_base + '2c')(
          x)
  x = layers.BatchNormalization(
      axis=bn_axis,
      momentum=BATCH_NORM_DECAY,
      epsilon=BATCH_NORM_EPSILON,
      name=bn_name_base + '2c')(
          x)

  shortcut = layers.Conv2D(
      filters3, (1, 1),
      strides=strides,
      use_bias=False,
      kernel_initializer='he_normal',
      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
      name=conv_name_base + '1')(
          input_tensor)
  shortcut = layers.BatchNormalization(
      axis=bn_axis,
      momentum=BATCH_NORM_DECAY,
      epsilon=BATCH_NORM_EPSILON,
      name=bn_name_base + '1')(
          shortcut)
245
246
247
248
249
250

  x = layers.add([x, shortcut])
  x = layers.Activation('relu')(x)
  return x


Zongwei Zhou's avatar
Zongwei Zhou committed
251
252
def resnet50(num_classes,
             batch_size=None,
Hongkun Yu's avatar
Hongkun Yu committed
253
254
             use_l2_regularizer=True,
             rescale_inputs=False):
255
256
257
258
  """Instantiates the ResNet50 architecture.

  Args:
    num_classes: `int` number of classes for image classification.
259
    batch_size: Size of the batches for each step.
Zongwei Zhou's avatar
Zongwei Zhou committed
260
    use_l2_regularizer: whether to use L2 regularizer on Conv/Dense layer.
Hongkun Yu's avatar
Hongkun Yu committed
261
    rescale_inputs: whether to rescale inputs from 0 to 1.
262
263
264
265

  Returns:
      A Keras model instance.
  """
266
  input_shape = (224, 224, 3)
267
  img_input = layers.Input(shape=input_shape, batch_size=batch_size)
Hongkun Yu's avatar
Hongkun Yu committed
268
269
270
271
272
273
274
275
276
277
278
279
  if rescale_inputs:
    # Hub image modules expect inputs in the range [0, 1]. This rescales these
    # inputs to the range expected by the trained model.
    x = layers.Lambda(
        lambda x: x * 255.0 - backend.constant(
            imagenet_preprocessing.CHANNEL_MEANS,
            shape=[1, 1, 3],
            dtype=x.dtype),
        name='rescale')(
            img_input)
  else:
    x = img_input
280

281
  if backend.image_data_format() == 'channels_first':
Zongwei Zhou's avatar
Zongwei Zhou committed
282
283
    x = layers.Lambda(
        lambda x: backend.permute_dimensions(x, (0, 3, 1, 2)),
Hongkun Yu's avatar
Hongkun Yu committed
284
        name='transpose')(x)
285
    bn_axis = 1
286
  else:  # channels_last
287
288
    bn_axis = 3

289
  x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(x)
Zongwei Zhou's avatar
Zongwei Zhou committed
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
  x = layers.Conv2D(
      64, (7, 7),
      strides=(2, 2),
      padding='valid',
      use_bias=False,
      kernel_initializer='he_normal',
      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
      name='conv1')(
          x)
  x = layers.BatchNormalization(
      axis=bn_axis,
      momentum=BATCH_NORM_DECAY,
      epsilon=BATCH_NORM_EPSILON,
      name='bn_conv1')(
          x)
305
  x = layers.Activation('relu')(x)
306
  x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
307

Zongwei Zhou's avatar
Zongwei Zhou committed
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
  x = conv_block(
      x,
      3, [64, 64, 256],
      stage=2,
      block='a',
      strides=(1, 1),
      use_l2_regularizer=use_l2_regularizer)
  x = identity_block(
      x,
      3, [64, 64, 256],
      stage=2,
      block='b',
      use_l2_regularizer=use_l2_regularizer)
  x = identity_block(
      x,
      3, [64, 64, 256],
      stage=2,
      block='c',
      use_l2_regularizer=use_l2_regularizer)

  x = conv_block(
      x,
      3, [128, 128, 512],
      stage=3,
      block='a',
      use_l2_regularizer=use_l2_regularizer)
  x = identity_block(
      x,
      3, [128, 128, 512],
      stage=3,
      block='b',
      use_l2_regularizer=use_l2_regularizer)
  x = identity_block(
      x,
      3, [128, 128, 512],
      stage=3,
      block='c',
      use_l2_regularizer=use_l2_regularizer)
  x = identity_block(
      x,
      3, [128, 128, 512],
      stage=3,
      block='d',
      use_l2_regularizer=use_l2_regularizer)

  x = conv_block(
      x,
      3, [256, 256, 1024],
      stage=4,
      block='a',
      use_l2_regularizer=use_l2_regularizer)
  x = identity_block(
      x,
      3, [256, 256, 1024],
      stage=4,
      block='b',
      use_l2_regularizer=use_l2_regularizer)
  x = identity_block(
      x,
      3, [256, 256, 1024],
      stage=4,
      block='c',
      use_l2_regularizer=use_l2_regularizer)
  x = identity_block(
      x,
      3, [256, 256, 1024],
      stage=4,
      block='d',
      use_l2_regularizer=use_l2_regularizer)
  x = identity_block(
      x,
      3, [256, 256, 1024],
      stage=4,
      block='e',
      use_l2_regularizer=use_l2_regularizer)
  x = identity_block(
      x,
      3, [256, 256, 1024],
      stage=4,
      block='f',
      use_l2_regularizer=use_l2_regularizer)

  x = conv_block(
      x,
      3, [512, 512, 2048],
      stage=5,
      block='a',
      use_l2_regularizer=use_l2_regularizer)
  x = identity_block(
      x,
      3, [512, 512, 2048],
      stage=5,
      block='b',
      use_l2_regularizer=use_l2_regularizer)
  x = identity_block(
      x,
      3, [512, 512, 2048],
      stage=5,
      block='c',
      use_l2_regularizer=use_l2_regularizer)
408

409
410
  rm_axes = [1, 2] if backend.image_data_format() == 'channels_last' else [2, 3]
  x = layers.Lambda(lambda x: backend.mean(x, rm_axes), name='reduce_mean')(x)
411
  x = layers.Dense(
Reed's avatar
Reed committed
412
      num_classes,
413
      kernel_initializer=initializers.RandomNormal(stddev=0.01),
Zongwei Zhou's avatar
Zongwei Zhou committed
414
415
416
417
      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
      bias_regularizer=_gen_l2_regularizer(use_l2_regularizer),
      name='fc1000')(
          x)
418

419
420
421
  # A softmax that is followed by the model loss must be done cannot be done
  # in float16 due to numeric issues. So we pass dtype=float32.
  x = layers.Activation('softmax', dtype='float32')(x)
422
423
424

  # Create model.
  return models.Model(img_input, x, name='resnet50')