resnet_model.py 9.75 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""ResNet50 model for Keras.

Adapted from tf.keras.applications.resnet50.ResNet50().
Shining Sun's avatar
Shining Sun committed
18
This is ResNet model version 1.5.
19
20
21
22
23
24
25
26
27
28
29
30

Related papers/blogs:
- https://arxiv.org/abs/1512.03385
- https://arxiv.org/pdf/1603.05027v2.pdf
- http://torch.ch/blog/2016/02/04/resnets.html

"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from tensorflow.python.keras import backend
31
from tensorflow.python.keras  import initializers
32
33
34
35
36
37
38
39
40
41
42
43
44
from tensorflow.python.keras import layers
from tensorflow.python.keras import models
from tensorflow.python.keras import regularizers


L2_WEIGHT_DECAY = 1e-4
BATCH_NORM_DECAY = 0.9
BATCH_NORM_EPSILON = 1e-5


def identity_block(input_tensor, kernel_size, filters, stage, block):
  """The identity block is the block that has no conv layer at shortcut.

45
46
47
48
49
50
  Args:
    input_tensor: input tensor
    kernel_size: default 3, the kernel size of middle conv layer at main path
    filters: list of integers, the filters of 3 conv layer at main path
    stage: integer, current stage label, used for generating layer names
    block: 'a','b'..., current block label, used for generating layer names
51

52
53
  Returns:
    Output tensor for the block.
54
55
56
57
58
59
60
61
62
  """
  filters1, filters2, filters3 = filters
  if backend.image_data_format() == 'channels_last':
    bn_axis = 3
  else:
    bn_axis = 1
  conv_name_base = 'res' + str(stage) + block + '_branch'
  bn_name_base = 'bn' + str(stage) + block + '_branch'

63
  x = layers.Conv2D(filters1, (1, 1), use_bias=False,
64
65
66
67
68
69
70
71
72
73
                    kernel_initializer='he_normal',
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2a')(input_tensor)
  x = layers.BatchNormalization(axis=bn_axis,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2a')(x)
  x = layers.Activation('relu')(x)

  x = layers.Conv2D(filters2, kernel_size,
74
                    padding='same', use_bias=False,
75
76
77
78
79
80
81
82
83
                    kernel_initializer='he_normal',
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2b')(x)
  x = layers.BatchNormalization(axis=bn_axis,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2b')(x)
  x = layers.Activation('relu')(x)

84
  x = layers.Conv2D(filters3, (1, 1), use_bias=False,
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
                    kernel_initializer='he_normal',
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2c')(x)
  x = layers.BatchNormalization(axis=bn_axis,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2c')(x)

  x = layers.add([x, input_tensor])
  x = layers.Activation('relu')(x)
  return x


def conv_block(input_tensor,
               kernel_size,
               filters,
               stage,
               block,
               strides=(2, 2)):
  """A block that has a conv layer at shortcut.

  Note that from stage 3,
  the second conv layer at main path is with strides=(2, 2)
  And the shortcut should have strides=(2, 2) as well
109
110
111
112
113
114
115
116
117
118
119

  Args:
    input_tensor: input tensor
    kernel_size: default 3, the kernel size of middle conv layer at main path
    filters: list of integers, the filters of 3 conv layer at main path
    stage: integer, current stage label, used for generating layer names
    block: 'a','b'..., current block label, used for generating layer names
    strides: Strides for the second conv layer in the block.

  Returns:
    Output tensor for the block.
120
121
122
123
124
125
126
127
128
  """
  filters1, filters2, filters3 = filters
  if backend.image_data_format() == 'channels_last':
    bn_axis = 3
  else:
    bn_axis = 1
  conv_name_base = 'res' + str(stage) + block + '_branch'
  bn_name_base = 'bn' + str(stage) + block + '_branch'

129
130
  x = layers.Conv2D(filters1, (1, 1), use_bias=False,
                    kernel_initializer='he_normal',
131
132
133
134
135
136
137
138
139
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2a')(input_tensor)
  x = layers.BatchNormalization(axis=bn_axis,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2a')(x)
  x = layers.Activation('relu')(x)

  x = layers.Conv2D(filters2, kernel_size, strides=strides, padding='same',
140
                    use_bias=False, kernel_initializer='he_normal',
141
142
143
144
145
146
147
148
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2b')(x)
  x = layers.BatchNormalization(axis=bn_axis,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2b')(x)
  x = layers.Activation('relu')(x)

149
  x = layers.Conv2D(filters3, (1, 1), use_bias=False,
150
151
152
153
154
155
156
157
                    kernel_initializer='he_normal',
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2c')(x)
  x = layers.BatchNormalization(axis=bn_axis,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2c')(x)

158
  shortcut = layers.Conv2D(filters3, (1, 1), strides=strides, use_bias=False,
159
160
161
162
163
164
165
166
167
168
169
170
171
                           kernel_initializer='he_normal',
                           kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                           name=conv_name_base + '1')(input_tensor)
  shortcut = layers.BatchNormalization(axis=bn_axis,
                                       momentum=BATCH_NORM_DECAY,
                                       epsilon=BATCH_NORM_EPSILON,
                                       name=bn_name_base + '1')(shortcut)

  x = layers.add([x, shortcut])
  x = layers.Activation('relu')(x)
  return x


172
def resnet50(num_classes, dtype='float32', batch_size=None):
173
174
175
176
  """Instantiates the ResNet50 architecture.

  Args:
    num_classes: `int` number of classes for image classification.
177
178
    dtype: dtype to use float32 or float16 are most common.
    batch_size: Size of the batches for each step.
179
180
181
182

  Returns:
      A Keras model instance.
  """
183
  input_shape = (224, 224, 3)
184
185
  img_input = layers.Input(shape=input_shape, dtype=dtype,
                           batch_size=batch_size)
186

187
  if backend.image_data_format() == 'channels_first':
188
189
    x = layers.Lambda(lambda x: backend.permute_dimensions(x, (0, 3, 1, 2)),
                      name='transpose')(img_input)
190
    bn_axis = 1
191
192
  else:  # channels_last
    x = img_input
193
194
    bn_axis = 3

195
  x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(x)
196
197
  x = layers.Conv2D(64, (7, 7),
                    strides=(2, 2),
198
                    padding='valid', use_bias=False,
199
200
201
202
203
204
205
206
                    kernel_initializer='he_normal',
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name='conv1')(x)
  x = layers.BatchNormalization(axis=bn_axis,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name='bn_conv1')(x)
  x = layers.Activation('relu')(x)
207
  x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228

  x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
  x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
  x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')

  x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
  x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
  x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
  x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')

  x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')

  x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
  x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
  x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')

229
230
  rm_axes = [1, 2] if backend.image_data_format() == 'channels_last' else [2, 3]
  x = layers.Lambda(lambda x: backend.mean(x, rm_axes), name='reduce_mean')(x)
231
  x = layers.Dense(
Reed's avatar
Reed committed
232
      num_classes,
233
      kernel_initializer=initializers.RandomNormal(stddev=0.01),
234
235
236
      kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
      bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
      name='fc1000')(x)
237

Reed's avatar
Reed committed
238
239
240
241
  # TODO(reedwm): Remove manual casts once mixed precision can be enabled with a
  # single line of code.
  x = backend.cast(x, 'float32')
  x = layers.Activation('softmax')(x)
242
243
244

  # Create model.
  return models.Model(img_input, x, name='resnet50')