PiperOrigin-RevId: 205684720

a17394cc · Liang-Chieh Chen · Hui Hui · 7922c9eb · a17394cc · a17394cc
Commit a17394cc authored Jul 24, 2018 by Liang-Chieh Chen Committed by Hui Hui Jul 24, 2018
8 changed files
--- a/research/deeplab/README.md
+++ b/research/deeplab/README.md
@@ -33,10 +33,10 @@ works:
 *   DeepLabv3+:

 ```
-@article{deeplabv3plus2018,
+@inproceedings{deeplabv3plus2018,
  title={Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation},
  author={Liang-Chieh Chen and Yukun Zhu and George Papandreou and Florian Schroff and Hartwig Adam},
-  journal={arXiv:1802.02611},
+  booktitle={ECCV},
  year={2018}
 }
 ```
@@ -45,7 +45,7 @@ works:

 ```
 @inproceedings{mobilenetv22018,
-  title={Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation},
+  title={MobileNetV2: Inverted Residuals and Linear Bottlenecks},
  author={Mark Sandler and Andrew Howard and Menglong Zhu and Andrey Zhmoginov and Liang-Chieh Chen},
  booktitle={CVPR},
  year={2018}
@@ -78,6 +78,7 @@ Some segmentation results on Flickr images:
 *   Liang-Chieh Chen, github: [aquariusjay](https://github.com/aquariusjay)
 *   YuKun Zhu, github: [yknzhu](https://github.com/YknZhu)
 *   George Papandreou, github: [gpapan](https://github.com/gpapan)
+*   Hui Hui, github: [huihui-personal](https://github.com/huihui-personal)

 ## Tables of Contents

@@ -154,8 +155,8 @@ and Cityscapes.
    [[link]](http://arxiv.org/abs/1706.05587). arXiv: 1706.05587, 2017.

 4.  **Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation**<br />
-    Liang-Chieh Chen, Yukun Zhu, George Papandreou, Florian Schroff, Hartwig Adam. arXiv: 1802.02611.<br />
-    [[link]](https://arxiv.org/abs/1802.02611). arXiv: 1802.02611, 2018.
+    Liang-Chieh Chen, Yukun Zhu, George Papandreou, Florian Schroff, Hartwig Adam.<br />
+    [[link]](https://arxiv.org/abs/1802.02611). In ECCV, 2018.

 5.  **ParseNet: Looking Wider to See Better**<br />
    Wei Liu, Andrew Rabinovich, Alexander C Berg<br />
@@ -169,9 +170,9 @@ and Cityscapes.
    Sergey Ioffe, Christian Szegedy <br />
    [[link]](https://arxiv.org/abs/1502.03167). In ICML, 2015.

-8.  **Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation**<br />
+8.  **MobileNetV2: Inverted Residuals and Linear Bottlenecks**<br />
    Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen<br />
-    [[link]](https://arxiv.org/abs/1801.04381). arXiv:1801.04381, 2018.
+    [[link]](https://arxiv.org/abs/1801.04381). In CVPR, 2018.

 9.  **Xception: Deep Learning with Depthwise Separable Convolutions**<br />
    François Chollet<br />

--- a/research/deeplab/common.py
+++ b/research/deeplab/common.py
@@ -17,6 +17,7 @@
 Common flags from train/eval/vis/export_model.py are collected in this script.
 """
 import collections
+import copy

 import tensorflow as tf

@@ -51,6 +52,12 @@ flags.DEFINE_multi_float('image_pyramid', None,
 flags.DEFINE_boolean('add_image_level_feature', True,
                     'Add image level feature.')

+flags.DEFINE_multi_integer(
+    'image_pooling_crop_size', None,
+    'Image pooling crop size [height, width] used in the ASPP module. When '
+    'value is None, the model performs image pooling with "crop_size". This'
+    'flag is useful when one likes to use different image pooling sizes.')
+
 flags.DEFINE_boolean('aspp_with_batch_norm', True,
                     'Use batch norm parameters for ASPP or not.')

@@ -106,6 +113,7 @@ class ModelOptions(
        'output_stride',
        'merge_method',
        'add_image_level_feature',
+        'image_pooling_crop_size',
        'aspp_with_batch_norm',
        'aspp_with_separable_conv',
        'multi_grid',
@@ -140,7 +148,13 @@ class ModelOptions(
    return super(ModelOptions, cls).__new__(
        cls, outputs_to_num_classes, crop_size, atrous_rates, output_stride,
        FLAGS.merge_method, FLAGS.add_image_level_feature,
-        FLAGS.aspp_with_batch_norm, FLAGS.aspp_with_separable_conv,
-        FLAGS.multi_grid, FLAGS.decoder_output_stride,
-        FLAGS.decoder_use_separable_conv, FLAGS.logits_kernel_size,
-        FLAGS.model_variant, FLAGS.depth_multiplier)
+        FLAGS.image_pooling_crop_size, FLAGS.aspp_with_batch_norm,
+        FLAGS.aspp_with_separable_conv, FLAGS.multi_grid,
+        FLAGS.decoder_output_stride, FLAGS.decoder_use_separable_conv,
+        FLAGS.logits_kernel_size, FLAGS.model_variant, FLAGS.depth_multiplier)
+
+  def __deepcopy__(self, memo):
+    return ModelOptions(copy.deepcopy(self.outputs_to_num_classes),
+                        self.crop_size,
+                        self.atrous_rates,
+                        self.output_stride)
--- a/research/deeplab/common_test.py
+++ b/research/deeplab/common_test.py
@@ -14,6 +14,7 @@
 # ==============================================================================

 """Tests for common.py."""
+import copy

 import tensorflow as tf

@@ -29,6 +30,23 @@ class CommonTest(tf.test.TestCase):
    self.assertEqual(model_options.outputs_to_num_classes[common.OUTPUT_TYPE],
                     num_classes)

+  def testDeepcopy(self):
+    num_classes = 21
+    model_options = common.ModelOptions(
+        outputs_to_num_classes={common.OUTPUT_TYPE: num_classes})
+    model_options_new = copy.deepcopy(model_options)
+    self.assertEqual((model_options_new.
+                      outputs_to_num_classes[common.OUTPUT_TYPE]),
+                     num_classes)
+
+    num_classes_new = 22
+    model_options_new.outputs_to_num_classes[common.OUTPUT_TYPE] = (
+        num_classes_new)
+    self.assertEqual(model_options.outputs_to_num_classes[common.OUTPUT_TYPE],
+                     num_classes)
+    self.assertEqual((model_options_new.
+                      outputs_to_num_classes[common.OUTPUT_TYPE]),
+                     num_classes_new)

 if __name__ == '__main__':
  tf.test.main()
--- a/research/deeplab/deeplab_demo.ipynb
+++ b/research/deeplab/deeplab_demo.ipynb
@@ -294,13 +294,13 @@
        "  try:\n",
        "    f = urllib.request.urlopen(url)\n",
        "    jpeg_str = f.read()\n",
-        "    original_im = Image.open(BytesIO(jpeg_str))\n",
+        "    orignal_im = Image.open(BytesIO(jpeg_str))\n",
        "  except IOError:\n",
        "    print('Cannot retrieve image. Please check url: ' + url)\n",
        "    return\n",
        "\n",
        "  print('running deeplab on image %s...' % url)\n",
-        "  resized_im, seg_map = MODEL.run(original_im)\n",
+        "  resized_im, seg_map = MODEL.run(orignal_im)\n",
        "\n",
        "  vis_segmentation(resized_im, seg_map)\n",
        "\n",

--- a/research/deeplab/input_preprocess.py
+++ b/research/deeplab/input_preprocess.py
@@ -95,6 +95,7 @@ def preprocess_image_and_label(image,
    original_image = tf.identity(processed_image)

  # Data augmentation by randomly scaling the inputs.
+  if is_training:
    scale = preprocess_utils.get_random_scale(
        min_scale_factor, max_scale_factor, scale_factor_step_size)
    processed_image, label = preprocess_utils.randomly_scale_image_and_label(

--- a/research/deeplab/model.py
+++ b/research/deeplab/model.py
@@ -237,9 +237,6 @@ def multi_scale_logits(images,
  # Setup default values.
  if not image_pyramid:
    image_pyramid = [1.0]
-  if model_options.crop_size is None and model_options.add_image_level_feature:
-    raise ValueError(
-        'Crop size must be specified for using image-level feature.')
  crop_height = (
      model_options.crop_size[0]
      if model_options.crop_size else tf.shape(images)[1])
@@ -378,18 +375,39 @@ def extract_features(images,
        branch_logits = []

        if model_options.add_image_level_feature:
-          pool_height = scale_dimension(model_options.crop_size[0],
+          if model_options.crop_size is not None:
+            image_pooling_crop_size = model_options.image_pooling_crop_size
+            # If image_pooling_crop_size is not specified, use crop_size.
+            if image_pooling_crop_size is None:
+              image_pooling_crop_size = model_options.crop_size
+            pool_height = scale_dimension(image_pooling_crop_size[0],
                                          1. / model_options.output_stride)
-          pool_width = scale_dimension(model_options.crop_size[1],
+            pool_width = scale_dimension(image_pooling_crop_size[1],
                                         1. / model_options.output_stride)
            image_feature = slim.avg_pool2d(
-              features, [pool_height, pool_width], [pool_height, pool_width],
-              padding='VALID')
+                features, [pool_height, pool_width], [1, 1], padding='VALID')
+            resize_height = scale_dimension(model_options.crop_size[0],
+                                            1. / model_options.output_stride)
+            resize_width = scale_dimension(model_options.crop_size[1],
+                                           1. / model_options.output_stride)
+          else:
+            # If crop_size is None, we simply do global pooling.
+            pool_height = tf.shape(features)[1]
+            pool_width = tf.shape(features)[2]
+            image_feature = tf.reduce_mean(features, axis=[1, 2])[:, tf.newaxis,
+                                                                  tf.newaxis]
+            resize_height = pool_height
+            resize_width = pool_width
          image_feature = slim.conv2d(
              image_feature, depth, 1, scope=IMAGE_POOLING_SCOPE)
          image_feature = tf.image.resize_bilinear(
-              image_feature, [pool_height, pool_width], align_corners=True)
-          image_feature.set_shape([None, pool_height, pool_width, depth])
+              image_feature, [resize_height, resize_width], align_corners=True)
+          # Set shape for resize_height/resize_width if they are not Tensor.
+          if isinstance(resize_height, tf.Tensor):
+            resize_height = None
+          if isinstance(resize_width, tf.Tensor):
+            resize_width = None
+          image_feature.set_shape([None, resize_height, resize_width, depth])
          branch_logits.append(image_feature)

        # Employ a 1x1 convolution.
@@ -453,9 +471,14 @@ def _get_logits(images,
      fine_tune_batch_norm=fine_tune_batch_norm)

  if model_options.decoder_output_stride is not None:
-    decoder_height = scale_dimension(model_options.crop_size[0],
+    if model_options.crop_size is None:
+      height = tf.shape(images)[1]
+      width = tf.shape(images)[2]
+    else:
+      height, width = model_options.crop_size
+    decoder_height = scale_dimension(height,
                                     1.0 / model_options.decoder_output_stride)
-    decoder_width = scale_dimension(model_options.crop_size[1],
+    decoder_width = scale_dimension(width,
                                    1.0 / model_options.decoder_output_stride)
    features = refine_by_decoder(
        features,
@@ -557,8 +580,11 @@ def refine_by_decoder(features,
            for j, feature in enumerate(decoder_features_list):
              decoder_features_list[j] = tf.image.resize_bilinear(
                  feature, [decoder_height, decoder_width], align_corners=True)
-              decoder_features_list[j].set_shape(
-                  [None, decoder_height, decoder_width, None])
+              h = (None if isinstance(decoder_height, tf.Tensor)
+                   else decoder_height)
+              w = (None if isinstance(decoder_width, tf.Tensor)
+                   else decoder_width)
+              decoder_features_list[j].set_shape([None, h, w, None])
            decoder_depth = 256
            if decoder_use_separable_conv:
              decoder_features = split_separable_conv2d(

--- a/research/deeplab/utils/get_dataset_colormap.py
+++ b/research/deeplab/utils/get_dataset_colormap.py
@@ -21,6 +21,8 @@ defined by the different datasets. Supported colormaps are:

 * Cityscapes dataset (https://www.cityscapes-dataset.com).

+* Mapillary Vistas (https://research.mapillary.com).
+
 * PASCAL VOC 2012 (http://host.robots.ox.ac.uk/pascal/VOC/).
 """


--- a/research/deeplab/utils/train_utils.py
+++ b/research/deeplab/utils/train_utils.py
@@ -112,10 +112,12 @@ def get_model_init_fn(train_logdir,

  variables_to_restore = slim.get_variables_to_restore(exclude=exclude_list)

+  if variables_to_restore:
    return slim.assign_from_checkpoint_fn(
        tf_initial_checkpoint,
        variables_to_restore,
        ignore_missing_vars=ignore_missing_vars)
+  return None


 def get_model_gradient_multipliers(last_layers, last_layer_gradient_multiplier):