Merge pull request #4311 from YknZhu/master

PiperOrigin-RevId: 197225788

Merge pull request #4311 from YknZhu/master
PiperOrigin-RevId: 197225788
f798e4b5 · aquariusjay · GitHub · 9dec261e · afb2a7dc · f798e4b5
Unverified Commit f798e4b5 authored May 18, 2018 by aquariusjay Committed by GitHub May 18, 2018
20 changed files
--- a/research/deeplab/README.md
+++ b/research/deeplab/README.md
@@ -104,14 +104,33 @@ Misc:
 To get help with issues you may encounter while using the DeepLab Tensorflow
 implementation, create a new question on
-[StackOverflow](https://stackoverflow.com/) with the tags "tensorflow" and
+[StackOverflow](https://stackoverflow.com/) with the tag "tensorflow".
-"deeplab".
 Please report bugs (i.e., broken code, not usage questions) to the
 tensorflow/models GitHub [issue
 tracker](https://github.com/tensorflow/models/issues), prefixing the issue name
 with "deeplab".
+## Change Logs
+### May 18, 2018
+1.  Added builders for ResNet-v1 and Xception model variants.
+1.  Added ADE20K support, including colormap and pretrained Xception_65 checkpoint.
+1.  Fixed a bug on using non-default depth_multiplier for MobileNet-v2.
+### March 22, 2018
+Released checkpoints using MobileNet-V2 as network backbone and pretrained on
+PASCAL VOC 2012 and Cityscapes.
+### March 5, 2018
+First release of DeepLab in TensorFlow including deeper Xception network
+backbone. Included chekcpoints that have been pretrained on PASCAL VOC 2012
+and Cityscapes.
 ## References
 1.  **Semantic Image Segmentation with Deep Convolutional Nets and Fully Connected CRFs**<br />

--- a/research/deeplab/common.py
+++ b/research/deeplab/common.py
@@ -95,6 +95,7 @@ ORIGINAL_IMAGE = 'original_image'
 # Test set name.
 TEST_SET = 'test'
 class ModelOptions(
    collections.namedtuple('ModelOptions', [
        'outputs_to_num_classes',
@@ -109,7 +110,8 @@ class ModelOptions(
        'decoder_output_stride',
        'decoder_use_separable_conv',
        'logits_kernel_size',
-        'model_variant'
+        'model_variant',
+        'depth_multiplier',
    ])):
  """Immutable class to hold model options."""
@@ -139,4 +141,4 @@ class ModelOptions(
        FLAGS.aspp_with_batch_norm, FLAGS.aspp_with_separable_conv,
        FLAGS.multi_grid, FLAGS.decoder_output_stride,
        FLAGS.decoder_use_separable_conv, FLAGS.logits_kernel_size,
-        FLAGS.model_variant)
+        FLAGS.model_variant, FLAGS.depth_multiplier)
--- a/research/deeplab/core/feature_extractor.py
+++ b/research/deeplab/core/feature_extractor.py
@@ -17,8 +17,9 @@
 import functools
 import tensorflow as tf
+from deeplab.core import resnet_v1_beta
 from deeplab.core import xception
-from nets.mobilenet import mobilenet as mobilenet_lib
+from tensorflow.contrib.slim.nets import resnet_utils
 from nets.mobilenet import mobilenet_v2
@@ -56,10 +57,12 @@ def _mobilenet_v2(net,
  """
  with tf.variable_scope(
      scope, 'MobilenetV2', [net], reuse=reuse) as scope:
-    return mobilenet_lib.mobilenet_base(
+    return mobilenet_v2.mobilenet_base(
        net,
        conv_defs=mobilenet_v2.V2_DEF,
-        multiplier=depth_multiplier,
+        depth_multiplier=depth_multiplier,
+        min_depth=8 if depth_multiplier == 1.0 else 1,
+        divisible_by=8 if depth_multiplier == 1.0 else 1,
        final_endpoint=final_endpoint or _MOBILENET_V2_FINAL_ENDPOINT,
        output_stride=output_stride,
        scope=scope)
@@ -68,13 +71,25 @@ def _mobilenet_v2(net,
 # A map from network name to network function.
 networks_map = {
    'mobilenet_v2': _mobilenet_v2,
+    'resnet_v1_50': resnet_v1_beta.resnet_v1_50,
+    'resnet_v1_50_beta': resnet_v1_beta.resnet_v1_50_beta,
+    'resnet_v1_101': resnet_v1_beta.resnet_v1_101,
+    'resnet_v1_101_beta': resnet_v1_beta.resnet_v1_101_beta,
+    'xception_41': xception.xception_41,
    'xception_65': xception.xception_65,
+    'xception_71': xception.xception_71,
 }
 # A map from network name to network arg scope.
 arg_scopes_map = {
    'mobilenet_v2': mobilenet_v2.training_scope,
+    'resnet_v1_50': resnet_utils.resnet_arg_scope,
+    'resnet_v1_50_beta': resnet_utils.resnet_arg_scope,
+    'resnet_v1_101': resnet_utils.resnet_arg_scope,
+    'resnet_v1_101_beta': resnet_utils.resnet_arg_scope,
+    'xception_41': xception.xception_arg_scope,
    'xception_65': xception.xception_arg_scope,
+    'xception_71': xception.xception_arg_scope,
 }
 # Names for end point features.
@@ -86,19 +101,49 @@ networks_to_feature_maps = {
        # The provided checkpoint does not include decoder module.
        DECODER_END_POINTS: None,
    },
+    'resnet_v1_50': {
+        DECODER_END_POINTS: ['block1/unit_2/bottleneck_v1/conv3'],
+    },
+    'resnet_v1_50_beta': {
+        DECODER_END_POINTS: ['block1/unit_2/bottleneck_v1/conv3'],
+    },
+    'resnet_v1_101': {
+        DECODER_END_POINTS: ['block1/unit_2/bottleneck_v1/conv3'],
+    },
+    'resnet_v1_101_beta': {
+        DECODER_END_POINTS: ['block1/unit_2/bottleneck_v1/conv3'],
+    },
+    'xception_41': {
+        DECODER_END_POINTS: [
+            'entry_flow/block2/unit_1/xception_module/'
+            'separable_conv2_pointwise',
+        ],
+    },
    'xception_65': {
        DECODER_END_POINTS: [
            'entry_flow/block2/unit_1/xception_module/'
            'separable_conv2_pointwise',
        ],
-    }
+    },
+    'xception_71': {
+        DECODER_END_POINTS: [
+            'entry_flow/block2/unit_1/xception_module/'
+            'separable_conv2_pointwise',
+        ],
+    },
 }
 # A map from feature extractor name to the network name scope used in the
 # ImageNet pretrained versions of these models.
 name_scope = {
    'mobilenet_v2': 'MobilenetV2',
+    'resnet_v1_50': 'resnet_v1_50',
+    'resnet_v1_50_beta': 'resnet_v1_50',
+    'resnet_v1_101': 'resnet_v1_101',
+    'resnet_v1_101_beta': 'resnet_v1_101',
+    'xception_41': 'xception_41',
    'xception_65': 'xception_65',
+    'xception_71': 'xception_71',
 }
 # Mean pixel value.
@@ -118,7 +163,13 @@ def _preprocess_zero_mean_unit_range(inputs):
 _PREPROCESS_FN = {
    'mobilenet_v2': _preprocess_zero_mean_unit_range,
+    'resnet_v1_50': _preprocess_subtract_imagenet_mean,
+    'resnet_v1_50_beta': _preprocess_zero_mean_unit_range,
+    'resnet_v1_101': _preprocess_subtract_imagenet_mean,
+    'resnet_v1_101_beta': _preprocess_zero_mean_unit_range,
+    'xception_41': _preprocess_zero_mean_unit_range,
    'xception_65': _preprocess_zero_mean_unit_range,
+    'xception_71': _preprocess_zero_mean_unit_range,
 }
@@ -140,7 +191,8 @@ def mean_pixel(model_variant=None):
  Returns:
    Mean pixel value.
  """
-  if model_variant is None:
+  if model_variant in ['resnet_v1_50',
+                       'resnet_v1_101'] or model_variant is None:
    return _MEAN_RGB
  else:
    return [127.5, 127.5, 127.5]
@@ -159,7 +211,8 @@ def extract_features(images,
                     regularize_depthwise=False,
                     preprocess_images=True,
                     num_classes=None,
-                     global_pool=False):
+                     global_pool=False,
+                     use_bounded_activations=False):
  """Extracts features by the particular model_variant.
  Args:
@@ -184,6 +237,8 @@ def extract_features(images,
      to None for dense prediction tasks.
    global_pool: Global pooling for image classification task. Defaults to
      False, since dense prediction tasks do not use this.
+    use_bounded_activations: Whether or not to use bounded activations. Bounded
+      activations better lend themselves to quantized inference.
  Returns:
    features: A tensor of size [batch, feature_height, feature_width,
@@ -195,7 +250,25 @@ def extract_features(images,
  Raises:
    ValueError: Unrecognized model variant.
  """
-  if 'xception' in model_variant:
+  if 'resnet' in model_variant:
+    arg_scope = arg_scopes_map[model_variant](
+        weight_decay=weight_decay,
+        batch_norm_decay=0.95,
+        batch_norm_epsilon=1e-5,
+        batch_norm_scale=True,
+        activation_fn=tf.nn.relu6 if use_bounded_activations else tf.nn.relu)
+    features, end_points = get_network(
+        model_variant, preprocess_images, arg_scope)(
+            inputs=images,
+            num_classes=num_classes,
+            is_training=(is_training and fine_tune_batch_norm),
+            global_pool=global_pool,
+            output_stride=output_stride,
+            multi_grid=multi_grid,
+            reuse=reuse,
+            scope=name_scope[model_variant],
+            use_bounded_activations=use_bounded_activations)
+  elif 'xception' in model_variant:
    arg_scope = arg_scopes_map[model_variant](
        weight_decay=weight_decay,
        batch_norm_decay=0.9997,

--- a/research/deeplab/core/resnet_v1_beta.py
+++ b/research/deeplab/core/resnet_v1_beta.py
+# Copyright 2018 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Resnet v1 model variants.
+Code branched out from slim/nets/resnet_v1.py, and please refer to it for
+more details.
+The original version ResNets-v1 were proposed by:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+    Deep Residual Learning for Image Recognition. arXiv:1512.03385
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import functools
+import tensorflow as tf
+from tensorflow.contrib.slim.nets import resnet_utils
+slim = tf.contrib.slim
+_DEFAULT_MULTI_GRID = [1, 1, 1]
+@slim.add_arg_scope
+def bottleneck(inputs,
+               depth,
+               depth_bottleneck,
+               stride,
+               unit_rate=1,
+               rate=1,
+               outputs_collections=None,
+               scope=None,
+               use_bounded_activations=True):
+  """Bottleneck residual unit variant with BN after convolutions.
+  This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
+  its definition. Note that we use here the bottleneck variant which has an
+  extra bottleneck layer.
+  When putting together two consecutive ResNet blocks that use this unit, one
+  should use stride = 2 in the last unit of the first block.
+  Args:
+    inputs: A tensor of size [batch, height, width, channels].
+    depth: The depth of the ResNet unit output.
+    depth_bottleneck: The depth of the bottleneck layers.
+    stride: The ResNet unit's stride. Determines the amount of downsampling of
+      the units output compared to its input.
+    unit_rate: An integer, unit rate for atrous convolution.
+    rate: An integer, rate for atrous convolution.
+    outputs_collections: Collection to add the ResNet unit output.
+    scope: Optional variable_scope.
+    use_bounded_activations: Whether or not to use bounded activations. Bounded
+      activations better lend themselves to quantized inference.
+  Returns:
+    The ResNet unit's output.
+  """
+  with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
+    depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
+    if depth == depth_in:
+      shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
+    else:
+      shortcut = slim.conv2d(
+          inputs,
+          depth,
+          [1, 1],
+          stride=stride,
+          activation_fn=tf.nn.relu6 if use_bounded_activations else None,
+          scope='shortcut')
+    residual = slim.conv2d(inputs, depth_bottleneck, [1, 1], stride=1,
+                           scope='conv1')
+    residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride,
+                                        rate=rate*unit_rate, scope='conv2')
+    residual = slim.conv2d(residual, depth, [1, 1], stride=1,
+                           activation_fn=None, scope='conv3')
+    if use_bounded_activations:
+      # Use clip_by_value to simulate bandpass activation.
+      residual = tf.clip_by_value(residual, -6.0, 6.0)
+      output = tf.nn.relu6(shortcut + residual)
+    else:
+      output = tf.nn.relu(shortcut + residual)
+    return slim.utils.collect_named_outputs(outputs_collections,
+                                            sc.name,
+                                            output)
+def root_block_fn_for_beta_variant(net):
+  """Gets root_block_fn for beta variant.
+  ResNet-v1 beta variant modifies the first original 7x7 convolution to three
+  3x3 convolutions.
+  Args:
+    net: A tensor of size [batch, height, width, channels], input to the model.
+  Returns:
+    A tensor after three 3x3 convolutions.
+  """
+  net = resnet_utils.conv2d_same(net, 64, 3, stride=2, scope='conv1_1')
+  net = resnet_utils.conv2d_same(net, 64, 3, stride=1, scope='conv1_2')
+  net = resnet_utils.conv2d_same(net, 128, 3, stride=1, scope='conv1_3')
+  return net
+def resnet_v1_beta(inputs,
+                   blocks,
+                   num_classes=None,
+                   is_training=None,
+                   global_pool=True,
+                   output_stride=None,
+                   root_block_fn=None,
+                   store_non_strided_activations=False,
+                   use_bounded_activations=False,
+                   reuse=None,
+                   scope=None):
+  """Generator for v1 ResNet models (beta variant).
+  This function generates a family of modified ResNet v1 models. In particular,
+  the first original 7x7 convolution is replaced with three 3x3 convolutions.
+  See the resnet_v1_*() methods for specific model instantiations, obtained by
+  selecting different block instantiations that produce ResNets of various
+  depths.
+  The code is modified from slim/nets/resnet_v1.py, and please refer to it for
+  more details.
+  Args:
+    inputs: A tensor of size [batch, height_in, width_in, channels].
+    blocks: A list of length equal to the number of ResNet blocks. Each element
+      is a resnet_utils.Block object describing the units in the block.
+    num_classes: Number of predicted classes for classification tasks. If None
+      we return the features before the logit layer.
+    is_training: Enable/disable is_training for batch normalization.
+    global_pool: If True, we perform global average pooling before computing the
+      logits. Set to True for image classification, False for dense prediction.
+    output_stride: If None, then the output will be computed at the nominal
+      network stride. If output_stride is not None, it specifies the requested
+      ratio of input to output spatial resolution.
+    root_block_fn: The function consisting of convolution operations applied to
+      the root input. If root_block_fn is None, use the original setting of
+      RseNet-v1, which is simply one convolution with 7x7 kernel and stride=2.
+    store_non_strided_activations: If True, we compute non-strided (undecimated)
+      activations at the last unit of each block and store them in the
+      `outputs_collections` before subsampling them. This gives us access to
+      higher resolution intermediate activations which are useful in some
+      dense prediction problems but increases 4x the computation and memory cost
+      at the last unit of each block.
+    use_bounded_activations: Whether or not to use bounded activations. Bounded
+      activations better lend themselves to quantized inference.
+    reuse: whether or not the network and its variables should be reused. To be
+      able to reuse 'scope' must be given.
+    scope: Optional variable_scope.
+  Returns:
+    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
+      If global_pool is False, then height_out and width_out are reduced by a
+      factor of output_stride compared to the respective height_in and width_in,
+      else both height_out and width_out equal one. If num_classes is None, then
+      net is the output of the last ResNet block, potentially after global
+      average pooling. If num_classes is not None, net contains the pre-softmax
+      activations.
+    end_points: A dictionary from components of the network to the corresponding
+      activation.
+  Raises:
+    ValueError: If the target output_stride is not valid.
+  """
+  if root_block_fn is None:
+    root_block_fn = functools.partial(resnet_utils.conv2d_same,
+                                      num_outputs=64,
+                                      kernel_size=7,
+                                      stride=2,
+                                      scope='conv1')
+  with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
+    end_points_collection = sc.original_name_scope + '_end_points'
+    with slim.arg_scope([slim.conv2d, bottleneck,
+                         resnet_utils.stack_blocks_dense],
+                        outputs_collections=end_points_collection):
+      with slim.arg_scope(
+          [bottleneck], use_bounded_activations=use_bounded_activations):
+        if is_training is not None:
+          arg_scope = slim.arg_scope([slim.batch_norm], is_training=is_training)
+        else:
+          arg_scope = slim.arg_scope([])
+        with arg_scope:
+          net = inputs
+          if output_stride is not None:
+            if output_stride % 4 != 0:
+              raise ValueError('The output_stride needs to be a multiple of 4.')
+            output_stride /= 4
+          net = root_block_fn(net)
+          net = slim.max_pool2d(net, 3, stride=2, padding='SAME', scope='pool1')
+          net = resnet_utils.stack_blocks_dense(net, blocks, output_stride,
+                                                store_non_strided_activations)
+          if global_pool:
+            # Global average pooling.
+            net = tf.reduce_mean(net, [1, 2], name='pool5', keepdims=True)
+          if num_classes is not None:
+            net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
+                              normalizer_fn=None, scope='logits')
+          # Convert end_points_collection into a dictionary of end_points.
+          end_points = slim.utils.convert_collection_to_dict(
+              end_points_collection)
+          if num_classes is not None:
+            end_points['predictions'] = slim.softmax(net, scope='predictions')
+          return net, end_points
+def resnet_v1_beta_block(scope, base_depth, num_units, stride):
+  """Helper function for creating a resnet_v1 beta variant bottleneck block.
+  Args:
+    scope: The scope of the block.
+    base_depth: The depth of the bottleneck layer for each unit.
+    num_units: The number of units in the block.
+    stride: The stride of the block, implemented as a stride in the last unit.
+      All other units have stride=1.
+  Returns:
+    A resnet_v1 bottleneck block.
+  """
+  return resnet_utils.Block(scope, bottleneck, [{
+      'depth': base_depth * 4,
+      'depth_bottleneck': base_depth,
+      'stride': 1,
+      'unit_rate': 1
+  }] * (num_units - 1) + [{
+      'depth': base_depth * 4,
+      'depth_bottleneck': base_depth,
+      'stride': stride,
+      'unit_rate': 1
+  }])
+def resnet_v1_50(inputs,
+                 num_classes=None,
+                 is_training=None,
+                 global_pool=False,
+                 output_stride=None,
+                 store_non_strided_activations=False,
+                 multi_grid=None,
+                 use_bounded_activations=False,
+                 reuse=None,
+                 scope='resnet_v1_50'):
+  """Resnet v1 50.
+  Args:
+    inputs: A tensor of size [batch, height_in, width_in, channels].
+    num_classes: Number of predicted classes for classification tasks. If None
+      we return the features before the logit layer.
+    is_training: Enable/disable is_training for batch normalization.
+    global_pool: If True, we perform global average pooling before computing the
+      logits. Set to True for image classification, False for dense prediction.
+    output_stride: If None, then the output will be computed at the nominal
+      network stride. If output_stride is not None, it specifies the requested
+      ratio of input to output spatial resolution.
+    store_non_strided_activations: If True, we compute non-strided (undecimated)
+      activations at the last unit of each block and store them in the
+      `outputs_collections` before subsampling them. This gives us access to
+      higher resolution intermediate activations which are useful in some
+      dense prediction problems but increases 4x the computation and memory cost
+      at the last unit of each block.
+    multi_grid: Employ a hierarchy of different atrous rates within network.
+    use_bounded_activations: Whether or not to use bounded activations. Bounded
+      activations better lend themselves to quantized inference.
+    reuse: whether or not the network and its variables should be reused. To be
+      able to reuse 'scope' must be given.
+    scope: Optional variable_scope.
+  Returns:
+    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
+      If global_pool is False, then height_out and width_out are reduced by a
+      factor of output_stride compared to the respective height_in and width_in,
+      else both height_out and width_out equal one. If num_classes is None, then
+      net is the output of the last ResNet block, potentially after global
+      average pooling. If num_classes is not None, net contains the pre-softmax
+      activations.
+    end_points: A dictionary from components of the network to the corresponding
+      activation.
+  Raises:
+    ValueError: if multi_grid is not None and does not have length = 3.
+  """
+  if multi_grid is None:
+    multi_grid = _DEFAULT_MULTI_GRID
+  else:
+    if len(multi_grid) != 3:
+      raise ValueError('Expect multi_grid to have length 3.')
+  blocks = [
+      resnet_v1_beta_block(
+          'block1', base_depth=64, num_units=3, stride=2),
+      resnet_v1_beta_block(
+          'block2', base_depth=128, num_units=4, stride=2),
+      resnet_v1_beta_block(
+          'block3', base_depth=256, num_units=6, stride=2),
+      resnet_utils.Block('block4', bottleneck, [
+          {'depth': 2048,
+           'depth_bottleneck': 512,
+           'stride': 1,
+           'unit_rate': rate} for rate in multi_grid]),
+  ]
+  return resnet_v1_beta(
+      inputs,
+      blocks=blocks,
+      num_classes=num_classes,
+      is_training=is_training,
+      global_pool=global_pool,
+      output_stride=output_stride,
+      store_non_strided_activations=store_non_strided_activations,
+      reuse=reuse,
+      scope=scope,
+      use_bounded_activations=use_bounded_activations)
+def resnet_v1_50_beta(inputs,
+                      num_classes=None,
+                      is_training=None,
+                      global_pool=False,
+                      output_stride=None,
+                      store_non_strided_activations=False,
+                      multi_grid=None,
+                      use_bounded_activations=False,
+                      reuse=None,
+                      scope='resnet_v1_50'):
+  """Resnet v1 50 beta variant.
+  This variant modifies the first convolution layer of ResNet-v1-50. In
+  particular, it changes the original one 7x7 convolution to three 3x3
+  convolutions.
+  Args:
+    inputs: A tensor of size [batch, height_in, width_in, channels].
+    num_classes: Number of predicted classes for classification tasks. If None
+      we return the features before the logit layer.
+    is_training: Enable/disable is_training for batch normalization.
+    global_pool: If True, we perform global average pooling before computing the
+      logits. Set to True for image classification, False for dense prediction.
+    output_stride: If None, then the output will be computed at the nominal
+      network stride. If output_stride is not None, it specifies the requested
+      ratio of input to output spatial resolution.
+    store_non_strided_activations: If True, we compute non-strided (undecimated)
+      activations at the last unit of each block and store them in the
+      `outputs_collections` before subsampling them. This gives us access to
+      higher resolution intermediate activations which are useful in some
+      dense prediction problems but increases 4x the computation and memory cost
+      at the last unit of each block.
+    multi_grid: Employ a hierarchy of different atrous rates within network.
+    use_bounded_activations: Whether or not to use bounded activations. Bounded
+      activations better lend themselves to quantized inference.
+    reuse: whether or not the network and its variables should be reused. To be
+      able to reuse 'scope' must be given.
+    scope: Optional variable_scope.
+  Returns:
+    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
+      If global_pool is False, then height_out and width_out are reduced by a
+      factor of output_stride compared to the respective height_in and width_in,
+      else both height_out and width_out equal one. If num_classes is None, then
+      net is the output of the last ResNet block, potentially after global
+      average pooling. If num_classes is not None, net contains the pre-softmax
+      activations.
+    end_points: A dictionary from components of the network to the corresponding
+      activation.
+  Raises:
+    ValueError: if multi_grid is not None and does not have length = 3.
+  """
+  if multi_grid is None:
+    multi_grid = _DEFAULT_MULTI_GRID
+  else:
+    if len(multi_grid) != 3:
+      raise ValueError('Expect multi_grid to have length 3.')
+  blocks = [
+      resnet_v1_beta_block(
+          'block1', base_depth=64, num_units=3, stride=2),
+      resnet_v1_beta_block(
+          'block2', base_depth=128, num_units=4, stride=2),
+      resnet_v1_beta_block(
+          'block3', base_depth=256, num_units=6, stride=2),
+      resnet_utils.Block('block4', bottleneck, [
+          {'depth': 2048,
+           'depth_bottleneck': 512,
+           'stride': 1,
+           'unit_rate': rate} for rate in multi_grid]),
+  ]
+  return resnet_v1_beta(
+      inputs,
+      blocks=blocks,
+      num_classes=num_classes,
+      is_training=is_training,
+      global_pool=global_pool,
+      output_stride=output_stride,
+      root_block_fn=functools.partial(root_block_fn_for_beta_variant),
+      store_non_strided_activations=store_non_strided_activations,
+      reuse=reuse,
+      scope=scope,
+      use_bounded_activations=use_bounded_activations)
+def resnet_v1_101(inputs,
+                  num_classes=None,
+                  is_training=None,
+                  global_pool=False,
+                  output_stride=None,
+                  store_non_strided_activations=False,
+                  multi_grid=None,
+                  use_bounded_activations=False,
+                  reuse=None,
+                  scope='resnet_v1_101'):
+  """Resnet v1 101.
+  Args:
+    inputs: A tensor of size [batch, height_in, width_in, channels].
+    num_classes: Number of predicted classes for classification tasks. If None
+      we return the features before the logit layer.
+    is_training: Enable/disable is_training for batch normalization.
+    global_pool: If True, we perform global average pooling before computing the
+      logits. Set to True for image classification, False for dense prediction.
+    output_stride: If None, then the output will be computed at the nominal
+      network stride. If output_stride is not None, it specifies the requested
+      ratio of input to output spatial resolution.
+    store_non_strided_activations: If True, we compute non-strided (undecimated)
+      activations at the last unit of each block and store them in the
+      `outputs_collections` before subsampling them. This gives us access to
+      higher resolution intermediate activations which are useful in some
+      dense prediction problems but increases 4x the computation and memory cost
+      at the last unit of each block.
+    multi_grid: Employ a hierarchy of different atrous rates within network.
+    use_bounded_activations: Whether or not to use bounded activations. Bounded
+      activations better lend themselves to quantized inference.
+    reuse: whether or not the network and its variables should be reused. To be
+      able to reuse 'scope' must be given.
+    scope: Optional variable_scope.
+  Returns:
+    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
+      If global_pool is False, then height_out and width_out are reduced by a
+      factor of output_stride compared to the respective height_in and width_in,
+      else both height_out and width_out equal one. If num_classes is None, then
+      net is the output of the last ResNet block, potentially after global
+      average pooling. If num_classes is not None, net contains the pre-softmax
+      activations.
+    end_points: A dictionary from components of the network to the corresponding
+      activation.
+  Raises:
+    ValueError: if multi_grid is not None and does not have length = 3.
+  """
+  if multi_grid is None:
+    multi_grid = _DEFAULT_MULTI_GRID
+  else:
+    if len(multi_grid) != 3:
+      raise ValueError('Expect multi_grid to have length 3.')
+  blocks = [
+      resnet_v1_beta_block(
+          'block1', base_depth=64, num_units=3, stride=2),
+      resnet_v1_beta_block(
+          'block2', base_depth=128, num_units=4, stride=2),
+      resnet_v1_beta_block(
+          'block3', base_depth=256, num_units=23, stride=2),
+      resnet_utils.Block('block4', bottleneck, [
+          {'depth': 2048,
+           'depth_bottleneck': 512,
+           'stride': 1,
+           'unit_rate': rate} for rate in multi_grid]),
+  ]
+  return resnet_v1_beta(
+      inputs,
+      blocks=blocks,
+      num_classes=num_classes,
+      is_training=is_training,
+      global_pool=global_pool,
+      output_stride=output_stride,
+      store_non_strided_activations=store_non_strided_activations,
+      reuse=reuse,
+      scope=scope,
+      use_bounded_activations=use_bounded_activations)
+def resnet_v1_101_beta(inputs,
+                       num_classes=None,
+                       is_training=None,
+                       global_pool=False,
+                       output_stride=None,
+                       store_non_strided_activations=False,
+                       multi_grid=None,
+                       use_bounded_activations=False,
+                       reuse=None,
+                       scope='resnet_v1_101'):
+  """Resnet v1 101 beta variant.
+  This variant modifies the first convolution layer of ResNet-v1-101. In
+  particular, it changes the original one 7x7 convolution to three 3x3
+  convolutions.
+  Args:
+    inputs: A tensor of size [batch, height_in, width_in, channels].
+    num_classes: Number of predicted classes for classification tasks. If None
+      we return the features before the logit layer.
+    is_training: Enable/disable is_training for batch normalization.
+    global_pool: If True, we perform global average pooling before computing the
+      logits. Set to True for image classification, False for dense prediction.
+    output_stride: If None, then the output will be computed at the nominal
+      network stride. If output_stride is not None, it specifies the requested
+      ratio of input to output spatial resolution.
+    store_non_strided_activations: If True, we compute non-strided (undecimated)
+      activations at the last unit of each block and store them in the
+      `outputs_collections` before subsampling them. This gives us access to
+      higher resolution intermediate activations which are useful in some
+      dense prediction problems but increases 4x the computation and memory cost
+      at the last unit of each block.
+    multi_grid: Employ a hierarchy of different atrous rates within network.
+    use_bounded_activations: Whether or not to use bounded activations. Bounded
+      activations better lend themselves to quantized inference.
+    reuse: whether or not the network and its variables should be reused. To be
+      able to reuse 'scope' must be given.
+    scope: Optional variable_scope.
+  Returns:
+    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
+      If global_pool is False, then height_out and width_out are reduced by a
+      factor of output_stride compared to the respective height_in and width_in,
+      else both height_out and width_out equal one. If num_classes is None, then
+      net is the output of the last ResNet block, potentially after global
+      average pooling. If num_classes is not None, net contains the pre-softmax
+      activations.
+    end_points: A dictionary from components of the network to the corresponding
+      activation.
+  Raises:
+    ValueError: if multi_grid is not None and does not have length = 3.
+  """
+  if multi_grid is None:
+    multi_grid = _DEFAULT_MULTI_GRID
+  else:
+    if len(multi_grid) != 3:
+      raise ValueError('Expect multi_grid to have length 3.')
+  blocks = [
+      resnet_v1_beta_block(
+          'block1', base_depth=64, num_units=3, stride=2),
+      resnet_v1_beta_block(
+          'block2', base_depth=128, num_units=4, stride=2),
+      resnet_v1_beta_block(
+          'block3', base_depth=256, num_units=23, stride=2),
+      resnet_utils.Block('block4', bottleneck, [
+          {'depth': 2048,
+           'depth_bottleneck': 512,
+           'stride': 1,
+           'unit_rate': rate} for rate in multi_grid]),
+  ]
+  return resnet_v1_beta(
+      inputs,
+      blocks=blocks,
+      num_classes=num_classes,
+      is_training=is_training,
+      global_pool=global_pool,
+      output_stride=output_stride,
+      root_block_fn=functools.partial(root_block_fn_for_beta_variant),
+      store_non_strided_activations=store_non_strided_activations,
+      use_bounded_activations=use_bounded_activations,
+      reuse=reuse,
+      scope=scope)
--- a/research/deeplab/core/resnet_v1_beta_test.py
+++ b/research/deeplab/core/resnet_v1_beta_test.py
+# Copyright 2018 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for resnet_v1_beta module."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import functools
+import numpy as np
+import tensorflow as tf
+from deeplab.core import resnet_v1_beta
+from tensorflow.contrib.slim.nets import resnet_utils
+slim = tf.contrib.slim
+def create_test_input(batch, height, width, channels):
+  """Create test input tensor."""
+  if None in [batch, height, width, channels]:
+    return tf.placeholder(tf.float32, (batch, height, width, channels))
+  else:
+    return tf.to_float(
+        np.tile(
+            np.reshape(
+                np.reshape(np.arange(height), [height, 1]) +
+                np.reshape(np.arange(width), [1, width]),
+                [1, height, width, 1]),
+            [batch, 1, 1, channels]))
+class ResnetCompleteNetworkTest(tf.test.TestCase):
+  """Tests with complete small ResNet v1 networks."""
+  def _resnet_small(self,
+                    inputs,
+                    num_classes=None,
+                    is_training=True,
+                    global_pool=True,
+                    output_stride=None,
+                    store_non_strided_activations=False,
+                    multi_grid=None,
+                    reuse=None,
+                    scope='resnet_v1_small'):
+    """A shallow and thin ResNet v1 for faster tests."""
+    if multi_grid is None:
+      multi_grid = [1, 1, 1]
+    else:
+      if len(multi_grid) != 3:
+        raise ValueError('Expect multi_grid to have length 3.')
+    block = resnet_v1_beta.resnet_v1_beta_block
+    blocks = [
+        block('block1', base_depth=1, num_units=3, stride=2),
+        block('block2', base_depth=2, num_units=3, stride=2),
+        block('block3', base_depth=4, num_units=3, stride=2),
+        resnet_utils.Block('block4', resnet_v1_beta.bottleneck, [
+            {'depth': 32,
+             'depth_bottleneck': 8,
+             'stride': 1,
+             'unit_rate': rate} for rate in multi_grid])]
+    return resnet_v1_beta.resnet_v1_beta(
+        inputs,
+        blocks,
+        num_classes=num_classes,
+        is_training=is_training,
+        global_pool=global_pool,
+        output_stride=output_stride,
+        root_block_fn=functools.partial(
+            resnet_v1_beta.root_block_fn_for_beta_variant),
+        store_non_strided_activations=store_non_strided_activations,
+        reuse=reuse,
+        scope=scope)
+  def testClassificationEndPoints(self):
+    global_pool = True
+    num_classes = 10
+    inputs = create_test_input(2, 224, 224, 3)
+    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
+      logits, end_points = self._resnet_small(inputs,
+                                              num_classes,
+                                              global_pool=global_pool,
+                                              scope='resnet')
+    self.assertTrue(logits.op.name.startswith('resnet/logits'))
+    self.assertListEqual(logits.get_shape().as_list(), [2, 1, 1, num_classes])
+    self.assertTrue('predictions' in end_points)
+    self.assertListEqual(end_points['predictions'].get_shape().as_list(),
+                         [2, 1, 1, num_classes])
+  def testClassificationEndPointsWithMultigrid(self):
+    global_pool = True
+    num_classes = 10
+    inputs = create_test_input(2, 224, 224, 3)
+    multi_grid = [1, 2, 4]
+    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
+      logits, end_points = self._resnet_small(inputs,
+                                              num_classes,
+                                              global_pool=global_pool,
+                                              multi_grid=multi_grid,
+                                              scope='resnet')
+    self.assertTrue(logits.op.name.startswith('resnet/logits'))
+    self.assertListEqual(logits.get_shape().as_list(), [2, 1, 1, num_classes])
+    self.assertTrue('predictions' in end_points)
+    self.assertListEqual(end_points['predictions'].get_shape().as_list(),
+                         [2, 1, 1, num_classes])
+  def testClassificationShapes(self):
+    global_pool = True
+    num_classes = 10
+    inputs = create_test_input(2, 224, 224, 3)
+    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
+      _, end_points = self._resnet_small(inputs,
+                                         num_classes,
+                                         global_pool=global_pool,
+                                         scope='resnet')
+      endpoint_to_shape = {
+          'resnet/conv1_1': [2, 112, 112, 64],
+          'resnet/conv1_2': [2, 112, 112, 64],
+          'resnet/conv1_3': [2, 112, 112, 128],
+          'resnet/block1': [2, 28, 28, 4],
+          'resnet/block2': [2, 14, 14, 8],
+          'resnet/block3': [2, 7, 7, 16],
+          'resnet/block4': [2, 7, 7, 32]}
+      for endpoint, shape in endpoint_to_shape.iteritems():
+        self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
+  def testFullyConvolutionalEndpointShapes(self):
+    global_pool = False
+    num_classes = 10
+    inputs = create_test_input(2, 321, 321, 3)
+    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
+      _, end_points = self._resnet_small(inputs,
+                                         num_classes,
+                                         global_pool=global_pool,
+                                         scope='resnet')
+      endpoint_to_shape = {
+          'resnet/conv1_1': [2, 161, 161, 64],
+          'resnet/conv1_2': [2, 161, 161, 64],
+          'resnet/conv1_3': [2, 161, 161, 128],
+          'resnet/block1': [2, 41, 41, 4],
+          'resnet/block2': [2, 21, 21, 8],
+          'resnet/block3': [2, 11, 11, 16],
+          'resnet/block4': [2, 11, 11, 32]}
+      for endpoint, shape in endpoint_to_shape.iteritems():
+        self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
+  def testAtrousFullyConvolutionalEndpointShapes(self):
+    global_pool = False
+    num_classes = 10
+    output_stride = 8
+    inputs = create_test_input(2, 321, 321, 3)
+    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
+      _, end_points = self._resnet_small(inputs,
+                                         num_classes,
+                                         global_pool=global_pool,
+                                         output_stride=output_stride,
+                                         scope='resnet')
+      endpoint_to_shape = {
+          'resnet/conv1_1': [2, 161, 161, 64],
+          'resnet/conv1_2': [2, 161, 161, 64],
+          'resnet/conv1_3': [2, 161, 161, 128],
+          'resnet/block1': [2, 41, 41, 4],
+          'resnet/block2': [2, 41, 41, 8],
+          'resnet/block3': [2, 41, 41, 16],
+          'resnet/block4': [2, 41, 41, 32]}
+      for endpoint, shape in endpoint_to_shape.iteritems():
+        self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
+  def testAtrousFullyConvolutionalValues(self):
+    """Verify dense feature extraction with atrous convolution."""
+    nominal_stride = 32
+    for output_stride in [4, 8, 16, 32, None]:
+      with slim.arg_scope(resnet_utils.resnet_arg_scope()):
+        with tf.Graph().as_default():
+          with self.test_session() as sess:
+            tf.set_random_seed(0)
+            inputs = create_test_input(2, 81, 81, 3)
+            # Dense feature extraction followed by subsampling.
+            output, _ = self._resnet_small(inputs,
+                                           None,
+                                           is_training=False,
+                                           global_pool=False,
+                                           output_stride=output_stride)
+            if output_stride is None:
+              factor = 1
+            else:
+              factor = nominal_stride // output_stride
+            output = resnet_utils.subsample(output, factor)
+            # Make the two networks use the same weights.
+            tf.get_variable_scope().reuse_variables()
+            # Feature extraction at the nominal network rate.
+            expected, _ = self._resnet_small(inputs,
+                                             None,
+                                             is_training=False,
+                                             global_pool=False)
+            sess.run(tf.global_variables_initializer())
+            self.assertAllClose(output.eval(), expected.eval(),
+                                atol=1e-4, rtol=1e-4)
+  def testUnknownBatchSize(self):
+    batch = 2
+    height, width = 65, 65
+    global_pool = True
+    num_classes = 10
+    inputs = create_test_input(None, height, width, 3)
+    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
+      logits, _ = self._resnet_small(inputs,
+                                     num_classes,
+                                     global_pool=global_pool,
+                                     scope='resnet')
+    self.assertTrue(logits.op.name.startswith('resnet/logits'))
+    self.assertListEqual(logits.get_shape().as_list(),
+                         [None, 1, 1, num_classes])
+    images = create_test_input(batch, height, width, 3)
+    with self.test_session() as sess:
+      sess.run(tf.global_variables_initializer())
+      output = sess.run(logits, {inputs: images.eval()})
+      self.assertEquals(output.shape, (batch, 1, 1, num_classes))
+  def testFullyConvolutionalUnknownHeightWidth(self):
+    batch = 2
+    height, width = 65, 65
+    global_pool = False
+    inputs = create_test_input(batch, None, None, 3)
+    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
+      output, _ = self._resnet_small(inputs,
+                                     None,
+                                     global_pool=global_pool)
+    self.assertListEqual(output.get_shape().as_list(),
+                         [batch, None, None, 32])
+    images = create_test_input(batch, height, width, 3)
+    with self.test_session() as sess:
+      sess.run(tf.global_variables_initializer())
+      output = sess.run(output, {inputs: images.eval()})
+      self.assertEquals(output.shape, (batch, 3, 3, 32))
+  def testAtrousFullyConvolutionalUnknownHeightWidth(self):
+    batch = 2
+    height, width = 65, 65
+    global_pool = False
+    output_stride = 8
+    inputs = create_test_input(batch, None, None, 3)
+    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
+      output, _ = self._resnet_small(inputs,
+                                     None,
+                                     global_pool=global_pool,
+                                     output_stride=output_stride)
+    self.assertListEqual(output.get_shape().as_list(),
+                         [batch, None, None, 32])
+    images = create_test_input(batch, height, width, 3)
+    with self.test_session() as sess:
+      sess.run(tf.global_variables_initializer())
+      output = sess.run(output, {inputs: images.eval()})
+      self.assertEquals(output.shape, (batch, 9, 9, 32))
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/deeplab/core/xception.py
+++ b/research/deeplab/core/xception.py
@@ -493,6 +493,73 @@ def xception_block(scope,
  }] * num_units)
+def xception_41(inputs,
+                num_classes=None,
+                is_training=True,
+                global_pool=True,
+                keep_prob=0.5,
+                output_stride=None,
+                regularize_depthwise=False,
+                multi_grid=None,
+                reuse=None,
+                scope='xception_41'):
+  """Xception-41 model."""
+  blocks = [
+      xception_block('entry_flow/block1',
+                     depth_list=[128, 128, 128],
+                     skip_connection_type='conv',
+                     activation_fn_in_separable_conv=False,
+                     regularize_depthwise=regularize_depthwise,
+                     num_units=1,
+                     stride=2),
+      xception_block('entry_flow/block2',
+                     depth_list=[256, 256, 256],
+                     skip_connection_type='conv',
+                     activation_fn_in_separable_conv=False,
+                     regularize_depthwise=regularize_depthwise,
+                     num_units=1,
+                     stride=2),
+      xception_block('entry_flow/block3',
+                     depth_list=[728, 728, 728],
+                     skip_connection_type='conv',
+                     activation_fn_in_separable_conv=False,
+                     regularize_depthwise=regularize_depthwise,
+                     num_units=1,
+                     stride=2),
+      xception_block('middle_flow/block1',
+                     depth_list=[728, 728, 728],
+                     skip_connection_type='sum',
+                     activation_fn_in_separable_conv=False,
+                     regularize_depthwise=regularize_depthwise,
+                     num_units=8,
+                     stride=1),
+      xception_block('exit_flow/block1',
+                     depth_list=[728, 1024, 1024],
+                     skip_connection_type='conv',
+                     activation_fn_in_separable_conv=False,
+                     regularize_depthwise=regularize_depthwise,
+                     num_units=1,
+                     stride=2),
+      xception_block('exit_flow/block2',
+                     depth_list=[1536, 1536, 2048],
+                     skip_connection_type='none',
+                     activation_fn_in_separable_conv=True,
+                     regularize_depthwise=regularize_depthwise,
+                     num_units=1,
+                     stride=1,
+                     unit_rate_list=multi_grid),
+  ]
+  return xception(inputs,
+                  blocks=blocks,
+                  num_classes=num_classes,
+                  is_training=is_training,
+                  global_pool=global_pool,
+                  keep_prob=keep_prob,
+                  output_stride=output_stride,
+                  reuse=reuse,
+                  scope=scope)
 def xception_65(inputs,
                num_classes=None,
                is_training=True,
@@ -560,6 +627,87 @@ def xception_65(inputs,
                  scope=scope)
+def xception_71(inputs,
+                num_classes=None,
+                is_training=True,
+                global_pool=True,
+                keep_prob=0.5,
+                output_stride=None,
+                regularize_depthwise=False,
+                multi_grid=None,
+                reuse=None,
+                scope='xception_71'):
+  """Xception-71 model."""
+  blocks = [
+      xception_block('entry_flow/block1',
+                     depth_list=[128, 128, 128],
+                     skip_connection_type='conv',
+                     activation_fn_in_separable_conv=False,
+                     regularize_depthwise=regularize_depthwise,
+                     num_units=1,
+                     stride=2),
+      xception_block('entry_flow/block2',
+                     depth_list=[256, 256, 256],
+                     skip_connection_type='conv',
+                     activation_fn_in_separable_conv=False,
+                     regularize_depthwise=regularize_depthwise,
+                     num_units=1,
+                     stride=1),
+      xception_block('entry_flow/block3',
+                     depth_list=[256, 256, 256],
+                     skip_connection_type='conv',
+                     activation_fn_in_separable_conv=False,
+                     regularize_depthwise=regularize_depthwise,
+                     num_units=1,
+                     stride=2),
+      xception_block('entry_flow/block4',
+                     depth_list=[728, 728, 728],
+                     skip_connection_type='conv',
+                     activation_fn_in_separable_conv=False,
+                     regularize_depthwise=regularize_depthwise,
+                     num_units=1,
+                     stride=1),
+      xception_block('entry_flow/block5',
+                     depth_list=[728, 728, 728],
+                     skip_connection_type='conv',
+                     activation_fn_in_separable_conv=False,
+                     regularize_depthwise=regularize_depthwise,
+                     num_units=1,
+                     stride=2),
+      xception_block('middle_flow/block1',
+                     depth_list=[728, 728, 728],
+                     skip_connection_type='sum',
+                     activation_fn_in_separable_conv=False,
+                     regularize_depthwise=regularize_depthwise,
+                     num_units=16,
+                     stride=1),
+      xception_block('exit_flow/block1',
+                     depth_list=[728, 1024, 1024],
+                     skip_connection_type='conv',
+                     activation_fn_in_separable_conv=False,
+                     regularize_depthwise=regularize_depthwise,
+                     num_units=1,
+                     stride=2),
+      xception_block('exit_flow/block2',
+                     depth_list=[1536, 1536, 2048],
+                     skip_connection_type='none',
+                     activation_fn_in_separable_conv=True,
+                     regularize_depthwise=regularize_depthwise,
+                     num_units=1,
+                     stride=1,
+                     unit_rate_list=multi_grid),
+  ]
+  return xception(inputs,
+                  blocks=blocks,
+                  num_classes=num_classes,
+                  is_training=is_training,
+                  global_pool=global_pool,
+                  keep_prob=keep_prob,
+                  output_stride=output_stride,
+                  reuse=reuse,
+                  scope=scope)
 def xception_arg_scope(weight_decay=0.00004,
                       batch_norm_decay=0.9997,
                       batch_norm_epsilon=0.001,

--- a/research/deeplab/core/xception_test.py
+++ b/research/deeplab/core/xception_test.py
@@ -14,8 +14,8 @@
 # ==============================================================================
 """Tests for xception.py."""
-import six
 import numpy as np
+import six
 import tensorflow as tf
 from deeplab.core import xception

--- a/research/deeplab/datasets/build_ade20k_data.py
+++ b/research/deeplab/datasets/build_ade20k_data.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 # ==============================================================================
+"""Converts ADE20K data to TFRecord file format with Example protos."""
 import math
 import os
 import random
-import string
 import sys
 import build_data
 import tensorflow as tf
@@ -44,12 +45,13 @@ tf.app.flags.DEFINE_string(
 tf.app.flags.DEFINE_string(
    'output_dir', './ADE20K/tfrecord',
-    'Path to save converted SSTable of Tensorflow example')
+    'Path to save converted tfrecord of Tensorflow example')
 _NUM_SHARDS = 4
 def _convert_dataset(dataset_split, dataset_dir, dataset_label_dir):
-  """ Converts the ADE20k dataset into into tfrecord format (SSTable).
+  """Converts the ADE20k dataset into into tfrecord format.
  Args:
    dataset_split: Dataset split (e.g., train, val).
@@ -65,7 +67,7 @@ def _convert_dataset(dataset_split, dataset_dir, dataset_label_dir):
  seg_names = []
  for f in img_names:
    # get the filename without the extension
-    basename = os.path.basename(f).split(".")[0]
+    basename = os.path.basename(f).split('.')[0]
    # cover its corresponding *_seg.png
    seg = os.path.join(dataset_label_dir, basename+'.png')
    seg_names.append(seg)
@@ -104,10 +106,13 @@ def _convert_dataset(dataset_split, dataset_dir, dataset_label_dir):
    sys.stdout.write('\n')
    sys.stdout.flush()
 def main(unused_argv):
  tf.gfile.MakeDirs(FLAGS.output_dir)
-  _convert_dataset('train', FLAGS.train_image_folder, FLAGS.train_image_label_folder)
+  _convert_dataset(
+      'train', FLAGS.train_image_folder, FLAGS.train_image_label_folder)
  _convert_dataset('val', FLAGS.val_image_folder, FLAGS.val_image_label_folder)
 if __name__ == '__main__':
  tf.app.run()
--- a/research/deeplab/datasets/build_data.py
+++ b/research/deeplab/datasets/build_data.py
@@ -127,9 +127,10 @@ def _bytes_list_feature(values):
    A TF-Feature.
  """
  def norm2bytes(value):
-      return value.encode() if isinstance(value, str) and six.PY3 else value
+    return value.encode() if isinstance(value, str) and six.PY3 else value
-  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[norm2bytes(values)]))
+  return tf.train.Feature(
+      bytes_list=tf.train.BytesList(value=[norm2bytes(values)]))
 def image_seg_to_tfexample(image_data, filename, height, width, seg_data):

--- a/research/deeplab/datasets/download_and_convert_ade20k.sh
+++ b/research/deeplab/datasets/download_and_convert_ade20k.sh
@@ -24,7 +24,7 @@
 #     - build_data.py
 #     - build_ade20k_data.py
 #     - download_and_convert_ade20k.sh
-#     + ADE20K 
+#     + ADE20K
 #       + tfrecord
 #       + ADEChallengeData2016
 #         + annotations

--- a/research/deeplab/datasets/segmentation_dataset.py
+++ b/research/deeplab/datasets/segmentation_dataset.py
@@ -69,7 +69,10 @@ _ITEMS_TO_DESCRIPTIONS = {
 DatasetDescriptor = collections.namedtuple(
    'DatasetDescriptor',
    ['splits_to_sizes',   # Splits of the dataset into training, val, and test.
-     'num_classes',   # Number of semantic classes.
+     'num_classes',   # Number of semantic classes, including the background
+                      # class (if exists). For example, there are 20
+                      # foreground classes + 1 background class in the PASCAL
+                      # VOC 2012 dataset. Thus, we set num_classes=21.
     'ignore_label',  # Ignore label value.
    ]
 )
@@ -96,12 +99,12 @@ _PASCAL_VOC_SEG_INFORMATION = DatasetDescriptor(
 # These number (i.e., 'train'/'test') seems to have to be hard coded
 # You are required to figure it out for your training/testing example.
 _ADE20K_INFORMATION = DatasetDescriptor(
-    splits_to_sizes = {
+    splits_to_sizes={
-        'train': 20210, # num of samples in images/training
+        'train': 20210,  # num of samples in images/training
-        'val': 2000, # num of samples in images/validation
+        'val': 2000,  # num of samples in images/validation
    },
-    num_classes=150,
+    num_classes=151,
-    ignore_label=255,
+    ignore_label=0,
 )

--- a/research/deeplab/eval.py
+++ b/research/deeplab/eval.py
@@ -17,8 +17,8 @@
 See model.py for more details and usage.
 """
-import six
 import math
+import six
 import tensorflow as tf
 from deeplab import common
 from deeplab import model

--- a/research/deeplab/g3doc/ade20k.md
+++ b/research/deeplab/g3doc/ade20k.md
@@ -13,8 +13,7 @@ convert ADE20K semantic segmentation dataset to TFRecord.
 bash download_and_convert_ade20k.sh
 ```
-The converted dataset will be saved at
+The converted dataset will be saved at ./deeplab/datasets/ADE20K/tfrecord
-./deeplab/datasets/ADE20K/tfrecord
 ## Recommended Directory Structure for Training and Evaluation
@@ -23,7 +22,7 @@ The converted dataset will be saved at
   - build_data.py
   - build_ade20k_data.py
   - download_and_convert_ade20k.sh
-   + ADE20K 
+   + ADE20K
     + tfrecord
    + exp
      + train_on_train_set
@@ -50,7 +49,7 @@ A local training job using `xception_65` can be run with the following command:
 # From tensorflow/models/research/
 python deeplab/train.py \
    --logtostderr \
-    --training_number_of_steps=50000 \
+    --training_number_of_steps=90000 \
    --train_split="train" \
    --model_variant="xception_65" \
    --atrous_rates=6 \
@@ -61,21 +60,16 @@ python deeplab/train.py \
    --train_crop_size=513 \
    --train_crop_size=513 \
    --train_batch_size=4 \
-    --min_resize_value=350 \
+    --min_resize_value=513 \
-    --max_resize_value=500 \
+    --max_resize_value=513 \
    --resize_factor=16 \
-    --fine_tune_batch_norm=False \
    --dataset="ade20k" \
-    --initialize_last_layer=False \
-    --last_layers_contain_logits_only=True \
    --tf_initial_checkpoint=${PATH_TO_INITIAL_CHECKPOINT} \
    --train_logdir=${PATH_TO_TRAIN_DIR}\
    --dataset_dir=${PATH_TO_DATASET}
 ```
 where ${PATH\_TO\_INITIAL\_CHECKPOINT} is the path to the initial checkpoint.
-For example, if you are using the deeplabv3\_pascal\_train\_aug checkppoint, you
-will set this to `/path/to/deeplabv3\_pascal\_train\_aug/model.ckpt`.
 ${PATH\_TO\_TRAIN\_DIR} is the directory in which training checkpoints and
 events will be written to (it is recommended to set it to the
 `train_on_train_set/train` above), and ${PATH\_TO\_DATASET} is the directory in
@@ -83,24 +77,22 @@ which the ADE20K dataset resides (the `tfrecord` above)
 **Note that for train.py:**
-1.  In order to fine tune the BN layers, one needs to use large batch size (> 12),
+1.  In order to fine tune the BN layers, one needs to use large batch size (>
-    and set fine_tune_batch_norm = True. Here, we simply use small batch size
+    12), and set fine_tune_batch_norm = True. Here, we simply use small batch
-    during training for the purpose of demonstration. If the users have limited
+    size during training for the purpose of demonstration. If the users have
-    GPU memory at hand, please fine-tune from our provided checkpoints whose
+    limited GPU memory at hand, please fine-tune from our provided checkpoints
-    batch norm parameters have been trained, and use smaller learning rate with
+    whose batch norm parameters have been trained, and use smaller learning rate
-    fine_tune_batch_norm = False.
+    with fine_tune_batch_norm = False.
-2. User should fine tune the `min_resize_value` and `max_resize_value` to get
+2.  User should fine tune the `min_resize_value` and `max_resize_value` to get
-   better result. Note that `resize_factor` has to be equal to `output_stride`.
+    better result. Note that `resize_factor` has to be equal to `output_stride`.
-2.  The users should change atrous_rates from [6, 12, 18] to [12, 24, 36] if
+3.  The users should change atrous_rates from [6, 12, 18] to [12, 24, 36] if
    setting output_stride=8.
-3.  The users could skip the flag, `decoder_output_stride`, if you do not want
+4.  The users could skip the flag, `decoder_output_stride`, if you do not want
    to use the decoder structure.
-Currently there are no fine-tuned checkpoint for the ADE20K dataset.
 ## Running Tensorboard
 Progress for training and evaluation jobs can be inspected using Tensorboard. If

--- a/research/deeplab/g3doc/faq.md
+++ b/research/deeplab/g3doc/faq.md
@@ -60,6 +60,12 @@ sh local_test_mobilenetv2.sh
 First, make sure you could reproduce the results with our provided setting.
 After that, you could start to make a new change one at a time to help debug.
 ___
+Q8: What value of `eval_crop_size` should I use?
+A: Our model uses whole-image inference, meaning that we need to set `eval_crop_size` equal to `output_stride` * k + 1, where k is an integer and set k so that the resulting `eval_crop_size` is slightly larger the largest
+image dimension in the dataset. For example, we have `eval_crop_size` = 513x513 for PASCAL dataset whose largest image dimension is 512. Similarly, we set `eval_crop_size` = 1025x2049 for Cityscapes images whose
+image dimension is all equal to 1024x2048.
+___
 ## References

--- a/research/deeplab/g3doc/model_zoo.md
+++ b/research/deeplab/g3doc/model_zoo.md
 # TensorFlow DeepLab Model Zoo
-We provide deeplab models pretrained on PASCAL VOC 2012 and Cityscapes datasets
+We provide deeplab models pretrained several datasets, including (1) PASCAL VOC
-for reproducing our results, as well as some checkpoints that are only
+2012, (2) Cityscapes, and (3) ADE20K for reproducing our results, as well as
-pretrained on ImageNet for training your own models.
+some checkpoints that are only pretrained on ImageNet for training your own
+models.
 ## DeepLab models trained on PASCAL VOC 2012
@@ -69,6 +70,22 @@ Checkpoint name
 [mobilenetv2_coco_cityscapes_trainfine](http://download.tensorflow.org/models/deeplabv3_mnv2_cityscapes_train_2018_02_05.tar.gz) | 16 <br> 8 | [1.0] <br> [0.75:0.25:1.25] | No <br> Yes     | 21.27B <br> 433.24B   | 0.8 <br> 51.12 | 70.71% (val) <br> 73.57% (val) | 23MB
 [xception_cityscapes_trainfine](http://download.tensorflow.org/models/deeplabv3_cityscapes_train_2018_02_06.tar.gz)              | 16 <br> 8 | [1.0] <br> [0.75:0.25:1.25] | No <br> Yes     | 418.64B <br> 8677.92B | 5.0 <br> 422.8 | 78.79% (val) <br> 80.42% (val) | 439MB
+## DeepLab models trained on ADE20K
+### Model details
+We provide some checkpoints that have been pretrained on ADE20K training set.
+Note that the model has only been pretrained on ImageNet, following the
+dataset rule.
+Checkpoint name                       | Network backbone | Pretrained dataset                      | ASPP                                             | Decoder
+------------------------------------- | :--------------: | :-------------------------------------: | :----------------------------------------------: | :-----:
+xception_ade20k_train                 | Xception_65      | ImageNet <br> ADE20K training set       | [6, 12, 18] for OS=16 <br> [12, 24, 36] for OS=8 | OS = 4
+Checkpoint name                                                                                                                  | Eval OS   | Eval scales                 | Left-right Flip |  mIOU                 | Pixel-wise Accuracy | File Size
+-------------------------------------------------------------------------------------------------------------------------------- | :-------: | :-------------------------: | :-------------: | :-------------------: | :------------: | :----------------------------: | :-------:
+[xception_ade20k_train](http://download.tensorflow.org/models/deeplabv3_xception_ade20k_train_2018_05_14.tar.gz)              | 16 | [0.5:0.25:1.75] | Yes     | 43.54% (val) | 81.74% (val) | 439MB
 ## Checkpoints pretrained on ImageNet
 Un-tar'ed directory includes:
@@ -84,15 +101,24 @@ one could use this for training your own models.
    [MobileNet-V2](https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet)
    for details.
-*   xception: We adapt the original Xception model to the task of semantic
+*   xception_{41,65,71}: We adapt the original Xception model to the task of
-    segmentation with the following changes: (1) more layers, (2) all max
+    semantic segmentation with the following changes: (1) more layers, (2) all
-    pooling operations are replaced by strided (atrous) separable convolutions,
+    max pooling operations are replaced by strided (atrous) separable
-    and (3) extra batch-norm and ReLU after each 3x3 depthwise convolution are
+    convolutions, and (3) extra batch-norm and ReLU after each 3x3 depthwise
-    added.
+    convolution are added. We provide three Xception model variants with
+    different network depths.
+*   resnet_v1_{50,101}_beta: We modify the original ResNet-101 [10], similar to
+    PSPNet [11] by replacing the first 7x7 convolution with three 3x3
+    convolutions. See resnet_v1_beta.py for more details.
 Model name                                                                             | File Size
 -------------------------------------------------------------------------------------- | :-------:
-[xception](http://download.tensorflow.org/models/deeplabv3_xception_2018_01_04.tar.gz) | 447MB
+[xception_41](http://download.tensorflow.org/models/xception_41_2018_05_09.tar.gz ) | 288MB
+[xception_65](http://download.tensorflow.org/models/deeplabv3_xception_2018_01_04.tar.gz) | 447MB
+[xception_71](http://download.tensorflow.org/models/xception_71_2018_05_09.tar.gz  ) | 474MB
+[resnet_v1_50_beta](http://download.tensorflow.org/models/resnet_v1_50_2018_05_04.tar.gz)      | 274MB
+[resnet_v1_101_beta](http://download.tensorflow.org/models/resnet_v1_101_2018_05_04.tar.gz)    | 477MB
 ## References
@@ -132,3 +158,16 @@ Model name
 9.  **ImageNet Large Scale Visual Recognition Challenge**<br />
    Olga Russakovsky, Jia Deng, Hao Su, Jonathan Krause, Sanjeev Satheesh, Sean Ma, Zhiheng Huang, Andrej Karpathy, Aditya Khosla, Michael Bernstein, Alexander C. Berg, Li Fei-Fei<br />
    [[link]](http://www.image-net.org/). IJCV, 2015.
+10. **Deep Residual Learning for Image Recognition**<br />
+    Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun<br />
+    [[link]](https://arxiv.org/abs/1512.03385). CVPR, 2016.
+11. **Pyramid Scene Parsing Network**<br />
+    Hengshuang Zhao, Jianping Shi, Xiaojuan Qi, Xiaogang Wang, Jiaya Jia<br />
+    [[link]](https://arxiv.org/abs/1612.01105). In CVPR, 2017.
+12. **Scene Parsing through ADE20K Dataset**<br />
+    Bolei Zhou, Hang Zhao, Xavier Puig, Sanja Fidler, Adela Barriuso, Antonio Torralba<br />
+    [[link]](http://groups.csail.mit.edu/vision/datasets/ADE20K/). In CVPR,
+    2017.
--- a/research/deeplab/model.py
+++ b/research/deeplab/model.py
@@ -64,6 +64,10 @@ _CONCAT_PROJECTION_SCOPE = 'concat_projection'
 _DECODER_SCOPE = 'decoder'
+def get_merged_logits_scope():
+  return _MERGED_LOGITS_SCOPE
 def get_extra_layer_scopes(last_layers_contain_logits_only=False):
  """Gets the scopes for extra layers.
@@ -358,6 +362,7 @@ def _extract_features(images,
      output_stride=model_options.output_stride,
      multi_grid=model_options.multi_grid,
      model_variant=model_options.model_variant,
+      depth_multiplier=model_options.depth_multiplier,
      weight_decay=weight_decay,
      reuse=reuse,
      is_training=is_training,

--- a/research/deeplab/model_test.py
+++ b/research/deeplab/model_test.py
@@ -111,7 +111,7 @@ class DeeplabModelTest(tf.test.TestCase):
        for output in outputs_to_num_classes:
          scales_to_logits = outputs_to_scales_to_logits[output]
          # Expect only one output.
-          self.assertEquals(len(scales_to_logits), 1)
+          self.assertEqual(len(scales_to_logits), 1)
          for logits in scales_to_logits.values():
            self.assertTrue(logits.any())

--- a/research/deeplab/train.py
+++ b/research/deeplab/train.py
@@ -68,7 +68,8 @@ flags.DEFINE_integer('save_summaries_secs', 600,
                     'How often, in seconds, we compute the summaries.')
 flags.DEFINE_boolean('save_summaries_images', False,
-                     'Save sample inputs, labels, and semantic predictions as images to summary.')
+                     'Save sample inputs, labels, and semantic predictions as '
+                     'images to summary.')
 # Settings for training strategy.
@@ -184,9 +185,11 @@ def _build_deeplab(inputs_queue, outputs_to_num_classes, ignore_label):
  """
  samples = inputs_queue.dequeue()
-  # add name to input and label nodes so we can add to summary
+  # Add name to input and label nodes so we can add to summary.
-  samples[common.IMAGE] = tf.identity(samples[common.IMAGE], name = common.IMAGE)
+  samples[common.IMAGE] = tf.identity(
-  samples[common.LABEL] = tf.identity(samples[common.LABEL], name = common.LABEL)
+      samples[common.IMAGE], name=common.IMAGE)
+  samples[common.LABEL] = tf.identity(
+      samples[common.LABEL], name=common.LABEL)
  model_options = common.ModelOptions(
      outputs_to_num_classes=outputs_to_num_classes,
@@ -201,11 +204,11 @@ def _build_deeplab(inputs_queue, outputs_to_num_classes, ignore_label):
      is_training=True,
      fine_tune_batch_norm=FLAGS.fine_tune_batch_norm)
-  # add name to graph node so we can add to summary
+  # Add name to graph node so we can add to summary.
-  outputs_to_scales_to_logits[common.OUTPUT_TYPE][model._MERGED_LOGITS_SCOPE] = tf.identity( 
+  output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE]
-    outputs_to_scales_to_logits[common.OUTPUT_TYPE][model._MERGED_LOGITS_SCOPE],
+  output_type_dict[model.get_merged_logits_scope()] = tf.identity(
-    name = common.OUTPUT_TYPE
+      output_type_dict[model.get_merged_logits_scope()],
-  )
+      name=common.OUTPUT_TYPE)
  for output, num_classes in six.iteritems(outputs_to_num_classes):
    train_utils.add_softmax_cross_entropy_loss_for_each_scale(
@@ -234,7 +237,7 @@ def main(unused_argv):
  assert FLAGS.train_batch_size % config.num_clones == 0, (
      'Training batch size not divisble by number of clones (GPUs).')
-  clone_batch_size = int(FLAGS.train_batch_size / config.num_clones)
+  clone_batch_size = FLAGS.train_batch_size // config.num_clones
  # Get dataset-dependent information.
  dataset = segmentation_dataset.get_dataset(
@@ -286,19 +289,27 @@ def main(unused_argv):
    # Add summaries for images, labels, semantic predictions
    if FLAGS.save_summaries_images:
-        summary_image = graph.get_tensor_by_name(
+      summary_image = graph.get_tensor_by_name(
-            ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/'))
+          ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/'))
-        summaries.add(tf.summary.image('samples/%s' % common.IMAGE, summary_image))
+      summaries.add(
+          tf.summary.image('samples/%s' % common.IMAGE, summary_image))
-        summary_label = tf.cast(graph.get_tensor_by_name(
-            ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/')),
+      first_clone_label = graph.get_tensor_by_name(
-            tf.uint8)
+          ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/'))
-        summaries.add(tf.summary.image('samples/%s' % common.LABEL, summary_label))
+      # Scale up summary image pixel values for better visualization.
+      pixel_scaling = max(1, 255 // dataset.num_classes)
-        predictions = tf.cast(tf.expand_dims(tf.argmax(graph.get_tensor_by_name( 
+      summary_label = tf.cast(first_clone_label * pixel_scaling, tf.uint8)
-            ('%s/%s:0' % (first_clone_scope, common.OUTPUT_TYPE)).strip('/')),
+      summaries.add(
-            3), -1), tf.uint8)
+          tf.summary.image('samples/%s' % common.LABEL, summary_label))
-        summaries.add(tf.summary.image('samples/%s' % common.OUTPUT_TYPE, predictions))
+      first_clone_output = graph.get_tensor_by_name(
+          ('%s/%s:0' % (first_clone_scope, common.OUTPUT_TYPE)).strip('/'))
+      predictions = tf.expand_dims(tf.argmax(first_clone_output, 3), -1)
+      summary_predictions = tf.cast(predictions * pixel_scaling, tf.uint8)
+      summaries.add(
+          tf.summary.image(
+              'samples/%s' % common.OUTPUT_TYPE, summary_predictions))
    # Add summaries for losses.
    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
@@ -325,7 +336,8 @@ def main(unused_argv):
      summaries.add(tf.summary.scalar('total_loss', total_loss))
      # Modify the gradients for biases and last layer variables.
-      last_layers = model.get_extra_layer_scopes(FLAGS.last_layers_contain_logits_only)
+      last_layers = model.get_extra_layer_scopes(
+          FLAGS.last_layers_contain_logits_only)
      grad_mult = train_utils.get_model_gradient_multipliers(
          last_layers, FLAGS.last_layer_gradient_multiplier)
      if grad_mult:

--- a/research/deeplab/utils/get_dataset_colormap.py
+++ b/research/deeplab/utils/get_dataset_colormap.py
@@ -17,30 +17,196 @@
 Visualizes the semantic segmentation results by the color map
 defined by the different datasets. Supported colormaps are:
-1. PASCAL VOC semantic segmentation benchmark.
+* ADE20K (http://groups.csail.mit.edu/vision/datasets/ADE20K/).
-Website: http://host.robots.ox.ac.uk/pascal/VOC/
+* Cityscapes dataset (https://www.cityscapes-dataset.com).
+* PASCAL VOC 2012 (http://host.robots.ox.ac.uk/pascal/VOC/).
 """
 import numpy as np
 # Dataset names.
+_ADE20K = 'ade20k'
 _CITYSCAPES = 'cityscapes'
 _PASCAL = 'pascal'
 # Max number of entries in the colormap for each dataset.
 _DATASET_MAX_ENTRIES = {
+    _ADE20K: 151,
    _CITYSCAPES: 19,
    _PASCAL: 256,
 }
+def create_ade20k_label_colormap():
+  """Creates a label colormap used in ADE20K segmentation benchmark.
+  Returns:
+    A colormap for visualizing segmentation results.
+  """
+  return np.asarray([
+      [0, 0, 0],
+      [120, 120, 120],
+      [180, 120, 120],
+      [6, 230, 230],
+      [80, 50, 50],
+      [4, 200, 3],
+      [120, 120, 80],
+      [140, 140, 140],
+      [204, 5, 255],
+      [230, 230, 230],
+      [4, 250, 7],
+      [224, 5, 255],
+      [235, 255, 7],
+      [150, 5, 61],
+      [120, 120, 70],
+      [8, 255, 51],
+      [255, 6, 82],
+      [143, 255, 140],
+      [204, 255, 4],
+      [255, 51, 7],
+      [204, 70, 3],
+      [0, 102, 200],
+      [61, 230, 250],
+      [255, 6, 51],
+      [11, 102, 255],
+      [255, 7, 71],
+      [255, 9, 224],
+      [9, 7, 230],
+      [220, 220, 220],
+      [255, 9, 92],
+      [112, 9, 255],
+      [8, 255, 214],
+      [7, 255, 224],
+      [255, 184, 6],
+      [10, 255, 71],
+      [255, 41, 10],
+      [7, 255, 255],
+      [224, 255, 8],
+      [102, 8, 255],
+      [255, 61, 6],
+      [255, 194, 7],
+      [255, 122, 8],
+      [0, 255, 20],
+      [255, 8, 41],
+      [255, 5, 153],
+      [6, 51, 255],
+      [235, 12, 255],
+      [160, 150, 20],
+      [0, 163, 255],
+      [140, 140, 140],
+      [250, 10, 15],
+      [20, 255, 0],
+      [31, 255, 0],
+      [255, 31, 0],
+      [255, 224, 0],
+      [153, 255, 0],
+      [0, 0, 255],
+      [255, 71, 0],
+      [0, 235, 255],
+      [0, 173, 255],
+      [31, 0, 255],
+      [11, 200, 200],
+      [255, 82, 0],
+      [0, 255, 245],
+      [0, 61, 255],
+      [0, 255, 112],
+      [0, 255, 133],
+      [255, 0, 0],
+      [255, 163, 0],
+      [255, 102, 0],
+      [194, 255, 0],
+      [0, 143, 255],
+      [51, 255, 0],
+      [0, 82, 255],
+      [0, 255, 41],
+      [0, 255, 173],
+      [10, 0, 255],
+      [173, 255, 0],
+      [0, 255, 153],
+      [255, 92, 0],
+      [255, 0, 255],
+      [255, 0, 245],
+      [255, 0, 102],
+      [255, 173, 0],
+      [255, 0, 20],
+      [255, 184, 184],
+      [0, 31, 255],
+      [0, 255, 61],
+      [0, 71, 255],
+      [255, 0, 204],
+      [0, 255, 194],
+      [0, 255, 82],
+      [0, 10, 255],
+      [0, 112, 255],
+      [51, 0, 255],
+      [0, 194, 255],
+      [0, 122, 255],
+      [0, 255, 163],
+      [255, 153, 0],
+      [0, 255, 10],
+      [255, 112, 0],
+      [143, 255, 0],
+      [82, 0, 255],
+      [163, 255, 0],
+      [255, 235, 0],
+      [8, 184, 170],
+      [133, 0, 255],
+      [0, 255, 92],
+      [184, 0, 255],
+      [255, 0, 31],
+      [0, 184, 255],
+      [0, 214, 255],
+      [255, 0, 112],
+      [92, 255, 0],
+      [0, 224, 255],
+      [112, 224, 255],
+      [70, 184, 160],
+      [163, 0, 255],
+      [153, 0, 255],
+      [71, 255, 0],
+      [255, 0, 163],
+      [255, 204, 0],
+      [255, 0, 143],
+      [0, 255, 235],
+      [133, 255, 0],
+      [255, 0, 235],
+      [245, 0, 255],
+      [255, 0, 122],
+      [255, 245, 0],
+      [10, 190, 212],
+      [214, 255, 0],
+      [0, 204, 255],
+      [20, 0, 255],
+      [255, 255, 0],
+      [0, 153, 255],
+      [0, 41, 255],
+      [0, 255, 204],
+      [41, 0, 255],
+      [41, 255, 0],
+      [173, 0, 255],
+      [0, 245, 255],
+      [71, 0, 255],
+      [122, 0, 255],
+      [0, 255, 184],
+      [0, 92, 255],
+      [184, 255, 0],
+      [0, 133, 255],
+      [255, 214, 0],
+      [25, 194, 194],
+      [102, 255, 0],
+      [92, 0, 255],
+  ])
 def create_cityscapes_label_colormap():
  """Creates a label colormap used in CITYSCAPES segmentation benchmark.
  Returns:
-    A Colormap for visualizing segmentation results.
+    A colormap for visualizing segmentation results.
  """
-  colormap = np.asarray([
+  return np.asarray([
      [128, 64, 128],
      [244, 35, 232],
      [70, 70, 70],
@@ -61,17 +227,37 @@ def create_cityscapes_label_colormap():
      [0, 0, 230],
      [119, 11, 32],
  ])
+def create_pascal_label_colormap():
+  """Creates a label colormap used in PASCAL VOC segmentation benchmark.
+  Returns:
+    A colormap for visualizing segmentation results.
+  """
+  colormap = np.zeros((_DATASET_MAX_ENTRIES[_PASCAL], 3), dtype=int)
+  ind = np.arange(_DATASET_MAX_ENTRIES[_PASCAL], dtype=int)
+  for shift in reversed(range(8)):
+    for channel in range(3):
+      colormap[:, channel] |= bit_get(ind, channel) << shift
+    ind >>= 3
  return colormap
-def get_pascal_name():
+def get_ade20k_name():
-  return _PASCAL
+  return _ADE20K
 def get_cityscapes_name():
  return _CITYSCAPES
+def get_pascal_name():
+  return _PASCAL
 def bit_get(val, idx):
  """Gets the bit value.
@@ -85,23 +271,6 @@ def bit_get(val, idx):
  return (val >> idx) & 1
-def create_pascal_label_colormap():
-  """Creates a label colormap used in PASCAL VOC segmentation benchmark.
-  Returns:
-    A Colormap for visualizing segmentation results.
-  """
-  colormap = np.zeros((_DATASET_MAX_ENTRIES[_PASCAL], 3), dtype=int)
-  ind = np.arange(_DATASET_MAX_ENTRIES[_PASCAL], dtype=int)
-  for shift in reversed(range(8)):
-    for channel in range(3):
-      colormap[:, channel] |= bit_get(ind, channel) << shift
-    ind >>= 3
-  return colormap
 def create_label_colormap(dataset=_PASCAL):
  """Creates a label colormap for the specified dataset.
@@ -114,10 +283,12 @@ def create_label_colormap(dataset=_PASCAL):
  Raises:
    ValueError: If the dataset is not supported.
  """
-  if dataset == _PASCAL:
+  if dataset == _ADE20K:
-    return create_pascal_label_colormap()
+    return create_ade20k_label_colormap()
  elif dataset == _CITYSCAPES:
    return create_cityscapes_label_colormap()
+  elif dataset == _PASCAL:
+    return create_pascal_label_colormap()
  else:
    raise ValueError('Unsupported dataset.')
@@ -132,7 +303,7 @@ def label_to_color_image(label, dataset=_PASCAL):
  Returns:
    result: A 2D array with floating type. The element of the array
      is the color indexed by the corresponding element in the input label
-      to the PASCAL color map.
+      to the dataset color map.
  Raises:
    ValueError: If label is not of rank 2 or its value is larger than color

--- a/research/deeplab/utils/get_dataset_colormap_test.py
+++ b/research/deeplab/utils/get_dataset_colormap_test.py
@@ -70,6 +70,22 @@ class VisualizationUtilTest(tf.test.TestCase):
    with self.assertRaises(ValueError):
      get_dataset_colormap.create_label_colormap('unsupported_dataset')
+  def testUnExpectedLabelDimensionForLabelToADE20KColorImage(self):
+    label = np.array([250])
+    with self.assertRaises(ValueError):
+      get_dataset_colormap.label_to_color_image(
+          label, get_dataset_colormap.get_ade20k_name())
+  def testFirstColorInADE20KColorMap(self):
+    label = np.array([[1, 3], [10, 20]])
+    expected_result = np.array([
+        [[120, 120, 120], [6, 230, 230]],
+        [[4, 250, 7], [204, 70, 3]]
+    ])
+    colored_label = get_dataset_colormap.label_to_color_image(
+        label, get_dataset_colormap.get_ade20k_name())
+    self.assertTrue(np.array_equal(colored_label, expected_result))
 if __name__ == '__main__':
  tf.test.main()