Internal change

PiperOrigin-RevId: 276516912

Internal change
PiperOrigin-RevId: 276516912
87d6459a · Yeqing Li · A. Unique TensorFlower · 88c864f7 · 87d6459a · 87d6459a
Commit 87d6459a authored Oct 24, 2019 by Yeqing Li Committed by A. Unique TensorFlower Oct 24, 2019
5 changed files
--- a/official/vision/detection/modeling/architecture/factory.py
+++ b/official/vision/detection/modeling/architecture/factory.py
@@ -36,14 +36,19 @@ def batch_norm_relu_generator(params):
  return _batch_norm_op


+def dropblock_generator(params):
+  return nn_ops.Dropblock(
+      dropblock_keep_prob=params.dropblock_keep_prob,
+      dropblock_size=params.dropblock_size)
+
+
 def backbone_generator(params):
  """Generator function for various backbone models."""
  if params.architecture.backbone == 'resnet':
    resnet_params = params.resnet
    backbone_fn = resnet.Resnet(
        resnet_depth=resnet_params.resnet_depth,
-        dropblock_keep_prob=resnet_params.dropblock.dropblock_keep_prob,
-        dropblock_size=resnet_params.dropblock.dropblock_size,
+        dropblock=dropblock_generator(resnet_params.dropblock),
        batch_norm_relu=batch_norm_relu_generator(resnet_params.batch_norm))
  else:
    raise ValueError('Backbone model %s is not supported.' %

--- a/official/vision/detection/modeling/architecture/heads.py
+++ b/official/vision/detection/modeling/architecture/heads.py
@@ -508,7 +508,9 @@ class ShapemaskPriorHead(object):
      if self._shape_prior_path:
        if self._use_category_for_mask:
          fid = tf.io.gfile.GFile(self._shape_prior_path, 'rb')
-          class_tups = pickle.load(fid)
+          # The encoding='bytes' options is for incompatibility between python2
+          # and python3 pickle.
+          class_tups = pickle.load(fid, encoding='bytes')
          max_class_id = class_tups[-1][0] + 1
          class_masks = np.zeros((max_class_id, self._num_clusters,
                                  self._mask_crop_size, self._mask_crop_size),

--- a/official/vision/detection/modeling/architecture/identity.py
+++ b/official/vision/detection/modeling/architecture/identity.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Identity Fn that forwards the input features."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+class Identity(object):
+  """Identity function that forwards the input features."""
+
+  def __call__(self, features, is_training=False):
+    """Only forwards the input features."""
+    return features
+
--- a/official/vision/detection/modeling/architecture/nn_ops.py
+++ b/official/vision/detection/modeling/architecture/nn_ops.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+from absl import logging
 import tensorflow.compat.v2 as tf
 from tensorflow.python.keras import backend

@@ -82,3 +83,89 @@ class BatchNormRelu(tf.keras.layers.Layer):
    if self._use_relu:
      inputs = tf.nn.relu(inputs)
    return inputs
+
+
+class Dropblock(object):
+  """DropBlock: a regularization method for convolutional neural networks.
+
+    DropBlock is a form of structured dropout, where units in a contiguous
+    region of a feature map are dropped together. DropBlock works better than
+    dropout on convolutional layers due to the fact that activation units in
+    convolutional layers are spatially correlated.
+    See https://arxiv.org/pdf/1810.12890.pdf for details.
+  """
+
+  def __init__(self,
+               dropblock_keep_prob=None,
+               dropblock_size=None,
+               data_format='channels_last'):
+    self._dropblock_keep_prob = dropblock_keep_prob
+    self._dropblock_size = dropblock_size
+    self._data_format = data_format
+
+  def __call__(self, net, is_training=False):
+    """Builds Dropblock layer.
+
+    Args:
+      net: `Tensor` input tensor.
+      is_training: `bool` if True, the model is in training mode.
+
+    Returns:
+      A version of input tensor with DropBlock applied.
+    """
+    if not is_training or self._dropblock_keep_prob is None:
+      return net
+
+    logging.info('Applying DropBlock: dropblock_size {}, net.shape {}'.format(
+        self._dropblock_size, net.shape))
+
+    if self._data_format == 'channels_last':
+      _, height, width, _ = net.get_shape().as_list()
+    else:
+      _, _, height, width = net.get_shape().as_list()
+
+    total_size = width * height
+    dropblock_size = min(self._dropblock_size, min(width, height))
+    # Seed_drop_rate is the gamma parameter of DropBlcok.
+    seed_drop_rate = (
+        1.0 - self._dropblock_keep_prob) * total_size / dropblock_size**2 / (
+            (width - self._dropblock_size + 1) *
+            (height - self._dropblock_size + 1))
+
+    # Forces the block to be inside the feature map.
+    w_i, h_i = tf.meshgrid(tf.range(width), tf.range(height))
+    valid_block = tf.logical_and(
+        tf.logical_and(w_i >= int(dropblock_size // 2),
+                       w_i < width - (dropblock_size - 1) // 2),
+        tf.logical_and(h_i >= int(dropblock_size // 2),
+                       h_i < width - (dropblock_size - 1) // 2))
+
+    if self._data_format == 'channels_last':
+      valid_block = tf.reshape(valid_block, [1, height, width, 1])
+    else:
+      valid_block = tf.reshape(valid_block, [1, 1, height, width])
+
+    randnoise = tf.random.uniform(net.shape, dtype=tf.float32)
+    valid_block = tf.cast(valid_block, dtype=tf.float32)
+    seed_keep_rate = tf.cast(1 - seed_drop_rate, dtype=tf.float32)
+    block_pattern = (1 - valid_block + seed_keep_rate + randnoise) >= 1
+    block_pattern = tf.cast(block_pattern, dtype=tf.float32)
+
+    if self._data_format == 'channels_last':
+      ksize = [1, self._dropblock_size, self._dropblock_size, 1]
+    else:
+      ksize = [1, 1, self._dropblock_size, self._dropblock_size]
+    block_pattern = -tf.nn.max_pool2d(
+        -block_pattern,
+        ksize=ksize,
+        strides=[1, 1, 1, 1],
+        padding='SAME',
+        data_format='NHWC' if self._data_format == 'channels_last' else 'NCHW')
+
+    percent_ones = tf.cast(
+        tf.reduce_sum(input_tensor=block_pattern), tf.float32) / tf.cast(
+            tf.size(input=block_pattern), tf.float32)
+
+    net = net / tf.cast(percent_ones, net.dtype) * tf.cast(
+        block_pattern, net.dtype)
+    return net
--- a/official/vision/detection/modeling/architecture/resnet.py
+++ b/official/vision/detection/modeling/architecture/resnet.py
@@ -34,17 +34,14 @@ class Resnet(object):

  def __init__(self,
               resnet_depth,
-               dropblock_keep_prob=None,
-               dropblock_size=None,
+               dropblock=nn_ops.Dropblock(),
               batch_norm_relu=nn_ops.BatchNormRelu,
               data_format='channels_last'):
    """ResNet initialization function.

    Args:
      resnet_depth: `int` depth of ResNet backbone model.
-      dropblock_keep_prob: `float` or `Tensor` keep_prob parameter of DropBlock.
-        "None" means no DropBlock.
-      dropblock_size: `int` size parameter of DropBlock.
+      dropblock: a dropblock layer.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by a relu layer(optional).
      data_format: `str` either "channels_first" for `[batch, channels, height,
@@ -52,20 +49,40 @@ class Resnet(object):
    """
    self._resnet_depth = resnet_depth

-    self._dropblock_keep_prob = dropblock_keep_prob
-    self._dropblock_size = dropblock_size
-
+    self._dropblock = dropblock
    self._batch_norm_relu = batch_norm_relu

    self._data_format = data_format

    model_params = {
-        18: {'block': self.residual_block, 'layers': [2, 2, 2, 2]},
-        34: {'block': self.residual_block, 'layers': [3, 4, 6, 3]},
-        50: {'block': self.bottleneck_block, 'layers': [3, 4, 6, 3]},
-        101: {'block': self.bottleneck_block, 'layers': [3, 4, 23, 3]},
-        152: {'block': self.bottleneck_block, 'layers': [3, 8, 36, 3]},
-        200: {'block': self.bottleneck_block, 'layers': [3, 24, 36, 3]}
+        10: {
+            'block': self.residual_block,
+            'layers': [1, 1, 1, 1]
+        },
+        18: {
+            'block': self.residual_block,
+            'layers': [2, 2, 2, 2]
+        },
+        34: {
+            'block': self.residual_block,
+            'layers': [3, 4, 6, 3]
+        },
+        50: {
+            'block': self.bottleneck_block,
+            'layers': [3, 4, 6, 3]
+        },
+        101: {
+            'block': self.bottleneck_block,
+            'layers': [3, 4, 23, 3]
+        },
+        152: {
+            'block': self.bottleneck_block,
+            'layers': [3, 8, 36, 3]
+        },
+        200: {
+            'block': self.bottleneck_block,
+            'layers': [3, 24, 36, 3]
+        }
    }

    if resnet_depth not in model_params:
@@ -95,83 +112,6 @@ class Resnet(object):
      with tf.name_scope('resnet%s' % self._resnet_depth):
        return self._resnet_fn(inputs, is_training)

-  def dropblock(self, net, is_training=None):
-    """DropBlock: a regularization method for convolutional neural networks.
-
-    DropBlock is a form of structured dropout, where units in a contiguous
-    region of a feature map are dropped together. DropBlock works better than
-    dropout on convolutional layers due to the fact that activation units in
-    convolutional layers are spatially correlated.
-    See https://arxiv.org/pdf/1810.12890.pdf for details.
-
-    Args:
-      net: `Tensor` input tensor.
-      is_training: `bool` if True, the model is in training mode.
-
-    Returns:
-        A version of input tensor with DropBlock applied.
-    Raises:
-        if width and height of the input tensor are not equal.
-    """
-
-    if not is_training or self._dropblock_keep_prob is None:
-      return net
-
-    logging.info('Applying DropBlock: dropblock_size {}, net.shape {}'.format(
-        self._dropblock_size, net.shape))
-
-    if self._data_format == 'channels_last':
-      _, width, height, _ = net.get_shape().as_list()
-    else:
-      _, _, width, height = net.get_shape().as_list()
-
-    total_size = width * height
-    dropblock_size = min(self._dropblock_size, min(width, height))
-    # Seed_drop_rate is the gamma parameter of DropBlcok.
-    seed_drop_rate = (
-        1.0 - self._dropblock_keep_prob) * total_size / dropblock_size**2 / (
-            (width - self._dropblock_size + 1) *
-            (height - self._dropblock_size + 1))
-
-    # Forces the block to be inside the feature map.
-    w_i, h_i = tf.meshgrid(tf.range(width), tf.range(height))
-    valid_block = tf.logical_and(
-        tf.logical_and(w_i >= int(dropblock_size // 2),
-                       w_i < width - (dropblock_size - 1) // 2),
-        tf.logical_and(h_i >= int(dropblock_size // 2),
-                       h_i < width - (dropblock_size - 1) // 2))
-
-    if self._data_format == 'channels_last':
-      valid_block = tf.reshape(valid_block, [1, height, width, 1])
-    else:
-      valid_block = tf.reshape(valid_block, [1, 1, height, width])
-
-    randnoise = tf.random.uniform(net.shape, dtype=tf.float32)
-    valid_block = tf.cast(valid_block, dtype=tf.float32)
-    seed_keep_rate = tf.cast(1 - seed_drop_rate, dtype=tf.float32)
-    block_pattern = (1 - valid_block + seed_keep_rate + randnoise) >= 1
-    block_pattern = tf.cast(block_pattern, dtype=tf.float32)
-
-    if dropblock_size == min(width, height):
-      block_pattern = tf.reduce_min(
-          input_tensor=block_pattern,
-          axis=[1, 2] if self._data_format == 'channels_last' else [2, 3],
-          keepdims=True)
-    else:
-      block_pattern = -tf.keras.layers.MaxPool2D(
-          pool_size=self._dropblock_size,
-          strides=1,
-          padding='SAME',
-          data_format=self._data_format)(-block_pattern)
-
-    percent_ones = tf.cast(
-        tf.reduce_sum(input_tensor=block_pattern), tf.float32) / tf.cast(
-            tf.size(input=block_pattern), tf.float32)
-
-    net = net / tf.cast(percent_ones, net.dtype) * tf.cast(
-        block_pattern, net.dtype)
-    return net
-
  def fixed_padding(self, inputs, kernel_size):
    """Pads the input along the spatial dimensions independently of input size.

@@ -300,24 +240,24 @@ class Resnet(object):
          inputs=inputs, filters=filters_out, kernel_size=1, strides=strides)
      shortcut = self._batch_norm_relu(relu=False)(
          shortcut, is_training=is_training)
-    shortcut = self.dropblock(shortcut, is_training=is_training)
+    shortcut = self._dropblock(shortcut, is_training=is_training)

    inputs = self.conv2d_fixed_padding(
        inputs=inputs, filters=filters, kernel_size=1, strides=1)
    inputs = self._batch_norm_relu()(inputs, is_training=is_training)
-    inputs = self.dropblock(inputs, is_training=is_training)
+    inputs = self._dropblock(inputs, is_training=is_training)

    inputs = self.conv2d_fixed_padding(
        inputs=inputs, filters=filters, kernel_size=3, strides=strides)
    inputs = self._batch_norm_relu()(inputs, is_training=is_training)
-    inputs = self.dropblock(inputs, is_training=is_training)
+    inputs = self._dropblock(inputs, is_training=is_training)

    inputs = self.conv2d_fixed_padding(
        inputs=inputs, filters=4 * filters, kernel_size=1, strides=1)
    inputs = self._batch_norm_relu(
        relu=False, init_zero=True)(
            inputs, is_training=is_training)
-    inputs = self.dropblock(inputs, is_training=is_training)
+    inputs = self._dropblock(inputs, is_training=is_training)

    return tf.nn.relu(inputs + shortcut)