overfeat.py 5.79 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains the model definition for the OverFeat network.

The definition for the network was obtained from:
  OverFeat: Integrated Recognition, Localization and Detection using
  Convolutional Networks
  Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and
  Yann LeCun, 2014
  http://arxiv.org/abs/1312.6229

Usage:
  with slim.arg_scope(overfeat.overfeat_arg_scope()):
    outputs, end_points = overfeat.overfeat(inputs)

@@overfeat
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

slim = tf.contrib.slim
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)


def overfeat_arg_scope(weight_decay=0.0005):
  with slim.arg_scope([slim.conv2d, slim.fully_connected],
                      activation_fn=tf.nn.relu,
                      weights_regularizer=slim.l2_regularizer(weight_decay),
44
                      biases_initializer=tf.zeros_initializer()):
45
46
47
48
49
50
51
52
53
54
    with slim.arg_scope([slim.conv2d], padding='SAME'):
      with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
        return arg_sc


def overfeat(inputs,
             num_classes=1000,
             is_training=True,
             dropout_keep_prob=0.5,
             spatial_squeeze=True,
55
56
             scope='overfeat',
             global_pool=False):
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
  """Contains the model definition for the OverFeat network.

  The definition for the network was obtained from:
    OverFeat: Integrated Recognition, Localization and Detection using
    Convolutional Networks
    Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and
    Yann LeCun, 2014
    http://arxiv.org/abs/1312.6229

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 231x231. To use in fully
        convolutional mode, set spatial_squeeze to false.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
72
73
    num_classes: number of predicted classes. If 0 or None, the logits layer is
      omitted and the input features to the logits layer are returned instead.
74
75
76
77
78
79
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.
80
81
82
    global_pool: Optional boolean flag. If True, the input to the classification
      layer is avgpooled to size 1x1, for any input size. (This is not part
      of the original OverFeat.)
83
84

  Returns:
85
86
87
88
    net: the output of the logits layer (if num_classes is a non-zero integer),
      or the non-dropped-out input to the logits layer (if num_classes is 0 or
      None).
    end_points: a dict of tensors with intermediate activations.
89
90
  """
  with tf.variable_scope(scope, 'overfeat', [inputs]) as sc:
91
    end_points_collection = sc.original_name_scope + '_end_points'
92
93
94
95
96
97
98
99
100
101
102
103
    # Collect outputs for conv2d, fully_connected and max_pool2d
    with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
                        outputs_collections=end_points_collection):
      net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
                        scope='conv1')
      net = slim.max_pool2d(net, [2, 2], scope='pool1')
      net = slim.conv2d(net, 256, [5, 5], padding='VALID', scope='conv2')
      net = slim.max_pool2d(net, [2, 2], scope='pool2')
      net = slim.conv2d(net, 512, [3, 3], scope='conv3')
      net = slim.conv2d(net, 1024, [3, 3], scope='conv4')
      net = slim.conv2d(net, 1024, [3, 3], scope='conv5')
      net = slim.max_pool2d(net, [2, 2], scope='pool5')
104
105

      # Use conv2d instead of fully_connected layers.
106
107
108
109
110
111
112
      with slim.arg_scope([slim.conv2d],
                          weights_initializer=trunc_normal(0.005),
                          biases_initializer=tf.constant_initializer(0.1)):
        net = slim.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6')
        net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                           scope='dropout6')
        net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
        # Convert end_points_collection into a end_point dict.
        end_points = slim.utils.convert_collection_to_dict(
            end_points_collection)
        if global_pool:
          net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool')
          end_points['global_pool'] = net
        if num_classes:
          net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                             scope='dropout7')
          net = slim.conv2d(net, num_classes, [1, 1],
                            activation_fn=None,
                            normalizer_fn=None,
                            biases_initializer=tf.zeros_initializer(),
                            scope='fc8')
          if spatial_squeeze:
            net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
          end_points[sc.name + '/fc8'] = net
130
131
      return net, end_points
overfeat.default_image_size = 231