common.py 8.64 KB
Newer Older
yukun's avatar
yukun committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides flags that are common to scripts.

Common flags from train/eval/vis/export_model.py are collected in this script.
"""
import collections
20
import copy
21
import json
yukun's avatar
yukun committed
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import tensorflow as tf

flags = tf.app.flags

# Flags for input preprocessing.

flags.DEFINE_integer('min_resize_value', None,
                     'Desired size of the smaller image side.')

flags.DEFINE_integer('max_resize_value', None,
                     'Maximum allowed size of the larger image side.')

flags.DEFINE_integer('resize_factor', None,
                     'Resized dimensions are multiple of factor plus one.')

# Model dependent flags.

flags.DEFINE_integer('logits_kernel_size', 1,
                     'The kernel size for the convolutional kernel that '
                     'generates logits.')

43
# When using 'mobilent_v2', we set atrous_rates = decoder_output_stride = None.
44
45
46
47
# When using 'xception_65' or 'resnet_v1' model variants, we set
# atrous_rates = [6, 12, 18] (output stride 16) and decoder_output_stride = 4.
# See core/feature_extractor.py for supported model variants.
flags.DEFINE_string('model_variant', 'mobilenet_v2', 'DeepLab model variant.')
yukun's avatar
yukun committed
48
49
50
51
52
53
54

flags.DEFINE_multi_float('image_pyramid', None,
                         'Input scales for multi-scale feature extraction.')

flags.DEFINE_boolean('add_image_level_feature', True,
                     'Add image level feature.')

55
flags.DEFINE_list(
56
57
58
59
60
    'image_pooling_crop_size', None,
    'Image pooling crop size [height, width] used in the ASPP module. When '
    'value is None, the model performs image pooling with "crop_size". This'
    'flag is useful when one likes to use different image pooling sizes.')

61
62
63
64
flags.DEFINE_list(
    'image_pooling_stride', '1,1',
    'Image pooling stride [height, width] used in the ASPP image pooling. ')

yukun's avatar
yukun committed
65
66
67
68
69
70
flags.DEFINE_boolean('aspp_with_batch_norm', True,
                     'Use batch norm parameters for ASPP or not.')

flags.DEFINE_boolean('aspp_with_separable_conv', True,
                     'Use separable convolution for ASPP or not.')

71
72
# Defaults to None. Set multi_grid = [1, 2, 4] when using provided
# 'resnet_v1_{50,101}_beta' checkpoints.
yukun's avatar
yukun committed
73
74
75
flags.DEFINE_multi_integer('multi_grid', None,
                           'Employ a hierarchy of atrous rates for ResNet.')

76
77
78
79
flags.DEFINE_float('depth_multiplier', 1.0,
                   'Multiplier for the depth (number of channels) for all '
                   'convolution ops used in MobileNet.')

80
81
82
83
flags.DEFINE_integer('divisible_by', None,
                     'An integer that ensures the layer # channels are '
                     'divisible by this value. Used in MobileNet.')

84
85
# For `xception_65`, use decoder_output_stride = 4. For `mobilenet_v2`, use
# decoder_output_stride = None.
86
87
88
89
90
91
flags.DEFINE_list('decoder_output_stride', None,
                  'Comma-separated list of strings with the number specifying '
                  'output stride of low-level features at each network level.'
                  'Current semantic segmentation implementation assumes at '
                  'most one output stride (i.e., either None or a list with '
                  'only one element.')
yukun's avatar
yukun committed
92
93
94
95
96
97
98

flags.DEFINE_boolean('decoder_use_separable_conv', True,
                     'Employ separable convolution for decoder or not.')

flags.DEFINE_enum('merge_method', 'max', ['max', 'avg'],
                  'Scheme to merge multi scale features.')

99
100
101
102
103
104
105
106
107
flags.DEFINE_boolean(
    'prediction_with_upsampled_logits', True,
    'When performing prediction, there are two options: (1) bilinear '
    'upsampling the logits followed by argmax, or (2) armax followed by '
    'nearest upsampling the predicted labels. The second option may introduce '
    'some "blocking effect", but it is more computationally efficient. '
    'Currently, prediction_with_upsampled_logits=False is only supported for '
    'single-scale inference.')

108
109
110
111
112
flags.DEFINE_string(
    'dense_prediction_cell_json',
    '',
    'A JSON file that specifies the dense prediction cell.')

113
114
115
116
117
118
119
120
flags.DEFINE_integer(
    'nas_stem_output_num_conv_filters', 20,
    'Number of filters of the stem output tensor in NAS models.')

flags.DEFINE_bool('use_bounded_activation', False,
                  'Whether or not to use bounded activations. Bounded '
                  'activations better lend themselves to quantized inference.')

yukun's avatar
yukun committed
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
FLAGS = flags.FLAGS

# Constants

# Perform semantic segmentation predictions.
OUTPUT_TYPE = 'semantic'

# Semantic segmentation item names.
LABELS_CLASS = 'labels_class'
IMAGE = 'image'
HEIGHT = 'height'
WIDTH = 'width'
IMAGE_NAME = 'image_name'
LABEL = 'label'
ORIGINAL_IMAGE = 'original_image'

# Test set name.
TEST_SET = 'test'

140

yukun's avatar
yukun committed
141
142
143
144
145
146
class ModelOptions(
    collections.namedtuple('ModelOptions', [
        'outputs_to_num_classes',
        'crop_size',
        'atrous_rates',
        'output_stride',
147
        'preprocessed_images_dtype',
yukun's avatar
yukun committed
148
149
        'merge_method',
        'add_image_level_feature',
150
        'image_pooling_crop_size',
151
        'image_pooling_stride',
yukun's avatar
yukun committed
152
153
154
155
156
157
        'aspp_with_batch_norm',
        'aspp_with_separable_conv',
        'multi_grid',
        'decoder_output_stride',
        'decoder_use_separable_conv',
        'logits_kernel_size',
158
159
        'model_variant',
        'depth_multiplier',
160
161
        'divisible_by',
        'prediction_with_upsampled_logits',
162
        'dense_prediction_cell_config',
163
164
        'nas_stem_output_num_conv_filters',
        'use_bounded_activation'
yukun's avatar
yukun committed
165
166
167
168
169
170
171
172
173
    ])):
  """Immutable class to hold model options."""

  __slots__ = ()

  def __new__(cls,
              outputs_to_num_classes,
              crop_size=None,
              atrous_rates=None,
174
175
              output_stride=8,
              preprocessed_images_dtype=tf.float32):
yukun's avatar
yukun committed
176
177
178
179
180
181
182
183
184
    """Constructor to set default values.

    Args:
      outputs_to_num_classes: A dictionary from output type to the number of
        classes. For example, for the task of semantic segmentation with 21
        semantic classes, we would have outputs_to_num_classes['semantic'] = 21.
      crop_size: A tuple [crop_height, crop_width].
      atrous_rates: A list of atrous convolution rates for ASPP.
      output_stride: The ratio of input to output spatial resolution.
185
      preprocessed_images_dtype: The type after the preprocessing function.
yukun's avatar
yukun committed
186
187
188
189

    Returns:
      A new ModelOptions instance.
    """
190
191
192
193
    dense_prediction_cell_config = None
    if FLAGS.dense_prediction_cell_json:
      with tf.gfile.Open(FLAGS.dense_prediction_cell_json, 'r') as f:
        dense_prediction_cell_config = json.load(f)
194
195
196
197
198
199
200
201
202
203
204
205
206
    decoder_output_stride = None
    if FLAGS.decoder_output_stride:
      decoder_output_stride = [
          int(x) for x in FLAGS.decoder_output_stride]
      if sorted(decoder_output_stride, reverse=True) != decoder_output_stride:
        raise ValueError('Decoder output stride need to be sorted in the '
                         'descending order.')
    image_pooling_crop_size = None
    if FLAGS.image_pooling_crop_size:
      image_pooling_crop_size = [int(x) for x in FLAGS.image_pooling_crop_size]
    image_pooling_stride = [1, 1]
    if FLAGS.image_pooling_stride:
      image_pooling_stride = [int(x) for x in FLAGS.image_pooling_stride]
yukun's avatar
yukun committed
207
208
    return super(ModelOptions, cls).__new__(
        cls, outputs_to_num_classes, crop_size, atrous_rates, output_stride,
209
210
211
212
213
214
215
216
217
218
        preprocessed_images_dtype, FLAGS.merge_method,
        FLAGS.add_image_level_feature,
        image_pooling_crop_size,
        image_pooling_stride,
        FLAGS.aspp_with_batch_norm,
        FLAGS.aspp_with_separable_conv, FLAGS.multi_grid, decoder_output_stride,
        FLAGS.decoder_use_separable_conv, FLAGS.logits_kernel_size,
        FLAGS.model_variant, FLAGS.depth_multiplier, FLAGS.divisible_by,
        FLAGS.prediction_with_upsampled_logits, dense_prediction_cell_config,
        FLAGS.nas_stem_output_num_conv_filters, FLAGS.use_bounded_activation)
219
220
221
222
223

  def __deepcopy__(self, memo):
    return ModelOptions(copy.deepcopy(self.outputs_to_num_classes),
                        self.crop_size,
                        self.atrous_rates,
224
225
                        self.output_stride,
                        self.preprocessed_images_dtype)