Merge branch 'master' of https://github.com/tensorflow/models into RTESuperGLUE

2e9bb539 · stephenwu · 7bae5317 · 8fba84f8 · 2e9bb539 · 2e9bb539
Commit 2e9bb539 authored Feb 25, 2021 by stephenwu
20 changed files
--- a/official/vision/beta/projects/yolo/ops/box_ops.py
+++ b/official/vision/beta/projects/yolo/ops/box_ops.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Bounding box utils."""
+
+import math
+
+import tensorflow as tf
+
+
+def yxyx_to_xcycwh(box: tf.Tensor):
+  """Converts boxes from ymin, xmin, ymax, xmax.
+
+  to x_center, y_center, width, height.
+
+  Args:
+    box: `Tensor` whose shape is [..., 4] and represents the coordinates
+      of boxes in ymin, xmin, ymax, xmax.
+
+  Returns:
+    `Tensor` whose shape is [..., 4] and contains the new format.
+
+  Raises:
+    ValueError: If the last dimension of box is not 4 or if box's dtype isn't
+      a floating point type.
+  """
+  with tf.name_scope('yxyx_to_xcycwh'):
+    ymin, xmin, ymax, xmax = tf.split(box, 4, axis=-1)
+    x_center = (xmax + xmin) / 2
+    y_center = (ymax + ymin) / 2
+    width = xmax - xmin
+    height = ymax - ymin
+    box = tf.concat([x_center, y_center, width, height], axis=-1)
+  return box
+
+
+def xcycwh_to_yxyx(box: tf.Tensor, split_min_max: bool = False):
+  """Converts boxes from x_center, y_center, width, height.
+
+  to ymin, xmin, ymax, xmax.
+
+  Args:
+    box: a `Tensor` whose shape is [..., 4] and represents the coordinates
+      of boxes in x_center, y_center, width, height.
+    split_min_max: bool, whether or not to split x, y min and max values.
+
+  Returns:
+    box: a `Tensor` whose shape is [..., 4] and contains the new format.
+
+  Raises:
+    ValueError: If the last dimension of box is not 4 or if box's dtype isn't
+      a floating point type.
+  """
+  with tf.name_scope('xcycwh_to_yxyx'):
+    xy, wh = tf.split(box, 2, axis=-1)
+    xy_min = xy - wh / 2
+    xy_max = xy + wh / 2
+    x_min, y_min = tf.split(xy_min, 2, axis=-1)
+    x_max, y_max = tf.split(xy_max, 2, axis=-1)
+    box = tf.concat([y_min, x_min, y_max, x_max], axis=-1)
+    if split_min_max:
+      box = tf.split(box, 2, axis=-1)
+  return box
+
+
+def xcycwh_to_xyxy(box: tf.Tensor, split_min_max: bool = False):
+  """Converts boxes from x_center, y_center, width, height to.
+
+  xmin, ymin, xmax, ymax.
+
+  Args:
+    box: box: a `Tensor` whose shape is [..., 4] and represents the
+      coordinates of boxes in x_center, y_center, width, height.
+    split_min_max: bool, whether or not to split x, y min and max values.
+
+  Returns:
+    box: a `Tensor` whose shape is [..., 4] and contains the new format.
+
+  Raises:
+    ValueError: If the last dimension of box is not 4 or if box's dtype isn't
+      a floating point type.
+  """
+  with tf.name_scope('xcycwh_to_yxyx'):
+    xy, wh = tf.split(box, 2, axis=-1)
+    xy_min = xy - wh / 2
+    xy_max = xy + wh / 2
+    box = (xy_min, xy_max)
+    if not split_min_max:
+      box = tf.concat(box, axis=-1)
+  return box
+
+
+def center_distance(center_1: tf.Tensor, center_2: tf.Tensor):
+  """Calculates the squared distance between two points.
+
+  This function is mathematically equivalent to the following code, but has
+  smaller rounding errors.
+
+  tf.norm(center_1 - center_2, axis=-1)**2
+
+  Args:
+    center_1: a `Tensor` whose shape is [..., 2] and represents a point.
+    center_2: a `Tensor` whose shape is [..., 2] and represents a point.
+
+  Returns:
+    dist: a `Tensor` whose shape is [...] and value represents the squared
+      distance between center_1 and center_2.
+
+  Raises:
+    ValueError: If the last dimension of either center_1 or center_2 is not 2.
+  """
+  with tf.name_scope('center_distance'):
+    dist = (center_1[..., 0] - center_2[..., 0])**2 + (center_1[..., 1] -
+                                                       center_2[..., 1])**2
+  return dist
+
+
+def compute_iou(box1, box2, yxyx=False):
+  """Calculates the intersection of union between box1 and box2.
+
+  Args:
+    box1: a `Tensor` whose shape is [..., 4] and represents the coordinates of
+      boxes in x_center, y_center, width, height.
+    box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of
+      boxes in x_center, y_center, width, height.
+    yxyx: `bool`, whether or not box1, and box2 are in yxyx format.
+
+  Returns:
+    iou: a `Tensor` whose shape is [...] and value represents the intersection
+      over union.
+
+  Raises:
+    ValueError: If the last dimension of either box1 or box2 is not 4.
+  """
+  # Get box corners
+  with tf.name_scope('iou'):
+    if not yxyx:
+      box1 = xcycwh_to_yxyx(box1)
+      box2 = xcycwh_to_yxyx(box2)
+
+    b1mi, b1ma = tf.split(box1, 2, axis=-1)
+    b2mi, b2ma = tf.split(box2, 2, axis=-1)
+    intersect_mins = tf.math.maximum(b1mi, b2mi)
+    intersect_maxes = tf.math.minimum(b1ma, b2ma)
+    intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins,
+                                   tf.zeros_like(intersect_mins))
+    intersection = tf.reduce_prod(
+        intersect_wh, axis=-1)  # intersect_wh[..., 0] * intersect_wh[..., 1]
+
+    box1_area = tf.math.abs(tf.reduce_prod(b1ma - b1mi, axis=-1))
+    box2_area = tf.math.abs(tf.reduce_prod(b2ma - b2mi, axis=-1))
+    union = box1_area + box2_area - intersection
+
+    iou = intersection / (union + 1e-7)
+    iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0)
+  return iou
+
+
+def compute_giou(box1, box2):
+  """Calculates the generalized intersection of union between box1 and box2.
+
+  Args:
+    box1: a `Tensor` whose shape is [..., 4] and represents the coordinates of
+      boxes in x_center, y_center, width, height.
+    box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of
+      boxes in x_center, y_center, width, height.
+
+  Returns:
+    iou: a `Tensor` whose shape is [...] and value represents the generalized
+      intersection over union.
+
+  Raises:
+    ValueError: If the last dimension of either box1 or box2 is not 4.
+  """
+  with tf.name_scope('giou'):
+    # get box corners
+    box1 = xcycwh_to_yxyx(box1)
+    box2 = xcycwh_to_yxyx(box2)
+
+    # compute IOU
+    intersect_mins = tf.math.maximum(box1[..., 0:2], box2[..., 0:2])
+    intersect_maxes = tf.math.minimum(box1[..., 2:4], box2[..., 2:4])
+    intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins,
+                                   tf.zeros_like(intersect_mins))
+    intersection = intersect_wh[..., 0] * intersect_wh[..., 1]
+
+    box1_area = tf.math.abs(
+        tf.reduce_prod(box1[..., 2:4] - box1[..., 0:2], axis=-1))
+    box2_area = tf.math.abs(
+        tf.reduce_prod(box2[..., 2:4] - box2[..., 0:2], axis=-1))
+    union = box1_area + box2_area - intersection
+
+    iou = tf.math.divide_no_nan(intersection, union)
+    iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0)
+
+    # find the smallest box to encompase both box1 and box2
+    c_mins = tf.math.minimum(box1[..., 0:2], box2[..., 0:2])
+    c_maxes = tf.math.maximum(box1[..., 2:4], box2[..., 2:4])
+    c = tf.math.abs(tf.reduce_prod(c_mins - c_maxes, axis=-1))
+
+    # compute giou
+    giou = iou - tf.math.divide_no_nan((c - union), c)
+  return iou, giou
+
+
+def compute_diou(box1, box2):
+  """Calculates the distance intersection of union between box1 and box2.
+
+  Args:
+    box1: a `Tensor` whose shape is [..., 4] and represents the coordinates of
+      boxes in x_center, y_center, width, height.
+    box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of
+      boxes in x_center, y_center, width, height.
+
+  Returns:
+    iou: a `Tensor` whose shape is [...] and value represents the distance
+      intersection over union.
+
+  Raises:
+    ValueError: If the last dimension of either box1 or box2 is not 4.
+  """
+  with tf.name_scope('diou'):
+    # compute center distance
+    dist = center_distance(box1[..., 0:2], box2[..., 0:2])
+
+    # get box corners
+    box1 = xcycwh_to_yxyx(box1)
+    box2 = xcycwh_to_yxyx(box2)
+
+    # compute IOU
+    intersect_mins = tf.math.maximum(box1[..., 0:2], box2[..., 0:2])
+    intersect_maxes = tf.math.minimum(box1[..., 2:4], box2[..., 2:4])
+    intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins,
+                                   tf.zeros_like(intersect_mins))
+    intersection = intersect_wh[..., 0] * intersect_wh[..., 1]
+
+    box1_area = tf.math.abs(
+        tf.reduce_prod(box1[..., 2:4] - box1[..., 0:2], axis=-1))
+    box2_area = tf.math.abs(
+        tf.reduce_prod(box2[..., 2:4] - box2[..., 0:2], axis=-1))
+    union = box1_area + box2_area - intersection
+
+    iou = tf.math.divide_no_nan(intersection, union)
+    iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0)
+
+    # compute max diagnal of the smallest enclosing box
+    c_mins = tf.math.minimum(box1[..., 0:2], box2[..., 0:2])
+    c_maxes = tf.math.maximum(box1[..., 2:4], box2[..., 2:4])
+
+    diag_dist = tf.reduce_sum((c_maxes - c_mins)**2, axis=-1)
+
+    regularization = tf.math.divide_no_nan(dist, diag_dist)
+    diou = iou + regularization
+  return iou, diou
+
+
+def compute_ciou(box1, box2):
+  """Calculates the complete intersection of union between box1 and box2.
+
+  Args:
+    box1: a `Tensor` whose shape is [..., 4] and represents the coordinates
+      of boxes in x_center, y_center, width, height.
+    box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of
+      boxes in x_center, y_center, width, height.
+
+  Returns:
+    iou: a `Tensor` whose shape is [...] and value represents the complete
+      intersection over union.
+
+  Raises:
+    ValueError: If the last dimension of either box1 or box2 is not 4.
+  """
+  with tf.name_scope('ciou'):
+    # compute DIOU and IOU
+    iou, diou = compute_diou(box1, box2)
+
+    # computer aspect ratio consistency
+    arcterm = (
+        tf.math.atan(tf.math.divide_no_nan(box1[..., 2], box1[..., 3])) -
+        tf.math.atan(tf.math.divide_no_nan(box2[..., 2], box2[..., 3])))**2
+    v = 4 * arcterm / (math.pi)**2
+
+    # compute IOU regularization
+    a = tf.math.divide_no_nan(v, ((1 - iou) + v))
+    ciou = diou + v * a
+  return iou, ciou
--- a/official/vision/beta/projects/yolo/ops/box_ops_test.py
+++ b/official/vision/beta/projects/yolo/ops/box_ops_test.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+
+from official.vision.beta.projects.yolo.ops import box_ops
+
+
+class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters((1), (4))
+  def test_box_conversions(self, num_boxes):
+    boxes = tf.convert_to_tensor(np.random.rand(num_boxes, 4))
+    expected_shape = np.array([num_boxes, 4])
+    xywh_box = box_ops.yxyx_to_xcycwh(boxes)
+    yxyx_box = box_ops.xcycwh_to_yxyx(boxes)
+    xyxy_box = box_ops.xcycwh_to_xyxy(boxes)
+    self.assertAllEqual(tf.shape(xywh_box).numpy(), expected_shape)
+    self.assertAllEqual(tf.shape(yxyx_box).numpy(), expected_shape)
+    self.assertAllEqual(tf.shape(xyxy_box).numpy(), expected_shape)
+
+  @parameterized.parameters((1), (5), (7))
+  def test_ious(self, num_boxes):
+    boxes = tf.convert_to_tensor(np.random.rand(num_boxes, 4))
+    expected_shape = np.array([
+        num_boxes,
+    ])
+    expected_iou = np.ones([
+        num_boxes,
+    ])
+    iou = box_ops.compute_iou(boxes, boxes)
+    _, giou = box_ops.compute_giou(boxes, boxes)
+    _, ciou = box_ops.compute_ciou(boxes, boxes)
+    _, diou = box_ops.compute_diou(boxes, boxes)
+    self.assertAllEqual(tf.shape(iou).numpy(), expected_shape)
+    self.assertArrayNear(iou, expected_iou, 0.001)
+    self.assertArrayNear(giou, expected_iou, 0.001)
+    self.assertArrayNear(ciou, expected_iou, 0.001)
+    self.assertArrayNear(diou, expected_iou, 0.001)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/vision/beta/projects/yolo/ops/preprocess_ops.py
+++ b/official/vision/beta/projects/yolo/ops/preprocess_ops.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Yolo preprocess ops."""
+
+import tensorflow as tf
+import tensorflow_addons as tfa
+
+from official.vision.beta.projects.yolo.ops import box_ops
+
+
+def resize_crop_filter(image, boxes, default_width, default_height,
+                       target_width, target_height):
+  """Apply zooming to the image and boxes.
+
+  Args:
+    image: a `Tensor` representing the image.
+    boxes: a `Tensor` represeting the boxes.
+    default_width: a `Tensor` representing the width of the image.
+    default_height: a `Tensor` representing the height of the image.
+    target_width: a `Tensor` representing the desired width of the image.
+    target_height: a `Tensor` representing the desired height of the image.
+  Returns:
+    images: a `Tensor` representing the augmented image.
+    boxes: a `Tensor` representing the augmented boxes.
+  """
+  with tf.name_scope('resize_crop_filter'):
+    image = tf.image.resize(image, (target_width, target_height))
+    image = tf.image.resize_with_crop_or_pad(image,
+                                             target_height=default_height,
+                                             target_width=default_width)
+
+    default_width = tf.cast(default_width, boxes.dtype)
+    default_height = tf.cast(default_height, boxes.dtype)
+    target_width = tf.cast(target_width, boxes.dtype)
+    target_height = tf.cast(target_height, boxes.dtype)
+
+    aspect_change_width = target_width / default_width
+    aspect_change_height = target_height / default_height
+
+    x, y, width, height = tf.split(boxes, 4, axis=-1)
+    x = (x - 0.5) * target_width / default_width + 0.5
+    y = (y - 0.5) * target_height / default_height + 0.5
+    width = width * aspect_change_width
+    height = height * aspect_change_height
+    boxes = tf.concat([x, y, width, height], axis=-1)
+  return image, boxes
+
+
+def random_translate(image, box, t, seed=None):
+  """Randomly translate the image and boxes.
+
+  Args:
+      image: a `Tensor` representing the image.
+      box: a `Tensor` represeting the boxes.
+      t: an `int` representing the translation factor
+      seed: an optional seed for tf.random operations
+  Returns:
+      image: a `Tensor` representing the augmented image.
+      box: a `Tensor` representing the augmented boxes.
+  """
+  t_x = tf.random.uniform(minval=-t,
+                          maxval=t,
+                          shape=(),
+                          dtype=tf.float32,
+                          seed=seed)
+  t_y = tf.random.uniform(minval=-t,
+                          maxval=t,
+                          shape=(),
+                          dtype=tf.float32,
+                          seed=seed)
+  box = translate_boxes(box, t_x, t_y)
+  image = translate_image(image, t_x, t_y)
+  return image, box
+
+
+def translate_boxes(box, translate_x, translate_y):
+  """Randomly translate the boxes.
+
+  Args:
+      box: a `Tensor` represeitng the boxes.
+      translate_x: a `Tensor` represting the translation on the x-axis.
+      translate_y: a `Tensor` represting the translation on the y-axis.
+  Returns:
+      box: a `Tensor` representing the augmented boxes.
+  """
+  with tf.name_scope('translate_boxs'):
+    x = box[..., 0] + translate_x
+    y = box[..., 1] + translate_y
+    box = tf.stack([x, y, box[..., 2], box[..., 3]], axis=-1)
+    box.set_shape([None, 4])
+  return box
+
+
+def translate_image(image, translate_x, translate_y):
+  """Randomly translate the image.
+
+  Args:
+      image: a `Tensor` representing the image.
+      translate_x: a `Tensor` represting the translation on the x-axis.
+      translate_y: a `Tensor` represting the translation on the y-axis.
+  Returns:
+      box: a `Tensor` representing the augmented boxes.
+  """
+  with tf.name_scope('translate_image'):
+    if (translate_x != 0 and translate_y != 0):
+      image_jitter = tf.convert_to_tensor([translate_x, translate_y])
+      image_jitter.set_shape([2])
+      image = tfa.image.translate(
+          image, image_jitter * tf.cast(tf.shape(image)[1], tf.float32))
+  return image
+
+
+def pad_max_instances(value, instances, pad_value=0, pad_axis=0):
+  """Pads tensors to max number of instances."""
+  shape = tf.shape(value)
+  dim1 = shape[pad_axis]
+  take = tf.math.reduce_min([instances, dim1])
+  value, _ = tf.split(value, [take, -1],
+                      axis=pad_axis)  # value[:instances, ...]
+  pad = tf.convert_to_tensor([tf.math.reduce_max([instances - dim1, 0])])
+  nshape = tf.concat([shape[:pad_axis], pad, shape[(pad_axis + 1):]], axis=0)
+  pad_tensor = tf.fill(nshape, tf.cast(pad_value, dtype=value.dtype))
+  value = tf.concat([value, pad_tensor], axis=pad_axis)
+  return value
+
+
+def fit_preserve_aspect_ratio(image,
+                              boxes,
+                              width=None,
+                              height=None,
+                              target_dim=None):
+  """Resizes the image while peserving the image aspect ratio.
+
+  Args:
+      image: a `Tensor` representing the image.
+      boxes: a `Tensor` representing the boxes.
+      width: int for the image width.
+      height: int for the image height.
+      target_dim: list or a Tensor of height and width.
+  Returns:
+      image: a `Tensor` representing the image.
+      box: a `Tensor` representing the boxes.
+  """
+  if width is None or height is None:
+    shape = tf.shape(image)
+    if tf.shape(shape)[0] == 4:
+      width = shape[1]
+      height = shape[2]
+    else:
+      width = shape[0]
+      height = shape[1]
+
+  clipper = tf.math.maximum(width, height)
+  if target_dim is None:
+    target_dim = clipper
+
+  pad_width = clipper - width
+  pad_height = clipper - height
+  image = tf.image.pad_to_bounding_box(image, pad_width // 2, pad_height // 2,
+                                       clipper, clipper)
+
+  boxes = box_ops.yxyx_to_xcycwh(boxes)
+  x, y, w, h = tf.split(boxes, 4, axis=-1)
+
+  y *= tf.cast(width / clipper, tf.float32)
+  x *= tf.cast(height / clipper, tf.float32)
+
+  y += tf.cast((pad_width / clipper) / 2, tf.float32)
+  x += tf.cast((pad_height / clipper) / 2, tf.float32)
+
+  h *= tf.cast(width / clipper, tf.float32)
+  w *= tf.cast(height / clipper, tf.float32)
+
+  boxes = tf.concat([x, y, w, h], axis=-1)
+
+  boxes = box_ops.xcycwh_to_yxyx(boxes)
+  image = tf.image.resize(image, (target_dim, target_dim))
+  return image, boxes
+
+
+def get_best_anchor(y_true, anchors, width=1, height=1):
+  """Gets the correct anchor that is assoiciated with each box using IOU.
+
+  Args:
+    y_true: tf.Tensor[] for the list of bounding boxes in the yolo format
+    anchors: list or tensor for the anchor boxes to be used in prediction
+      found via Kmeans
+    width: int for the image width
+    height: int for the image height
+
+  Returns:
+    tf.Tensor: y_true with the anchor associated with each ground truth
+    box known.
+  """
+  with tf.name_scope('get_anchor'):
+    width = tf.cast(width, dtype=tf.float32)
+    height = tf.cast(height, dtype=tf.float32)
+
+    # split the boxes into center and width height
+    anchor_xy = y_true[..., 0:2]
+
+    # scale thhe boxes
+    anchors = tf.convert_to_tensor(anchors, dtype=tf.float32)
+    anchors_x = anchors[..., 0] / width
+    anchors_y = anchors[..., 1] / height
+    anchors = tf.stack([anchors_x, anchors_y], axis=-1)
+    k = tf.shape(anchors)[0]
+
+    # build a matrix of anchor boxes of shape [num_anchors, num_boxes, 4]
+    anchors = tf.transpose(anchors, perm=[1, 0])
+    anchor_xy = tf.tile(tf.expand_dims(anchor_xy, axis=-1),
+                        [1, 1, tf.shape(anchors)[-1]])
+    anchors = tf.tile(tf.expand_dims(anchors, axis=0),
+                      [tf.shape(anchor_xy)[0], 1, 1])
+
+    # stack the xy so, each anchor is asscoaited once with each center from
+    # the ground truth input
+    anchors = tf.concat([anchor_xy, anchors], axis=1)
+    anchors = tf.transpose(anchors, perm=[2, 0, 1])
+
+    # copy the gt n times so that each anchor from above can be compared to
+    # input ground truth to shape: [num_anchors, num_boxes, 4]
+    truth_comp = tf.tile(tf.expand_dims(y_true[..., 0:4], axis=-1),
+                         [1, 1, tf.shape(anchors)[0]])
+    truth_comp = tf.transpose(truth_comp, perm=[2, 0, 1])
+
+    # compute intersection over union of the boxes, and take the argmax of
+    # comuted iou for each box. thus each box is associated with the
+    # largest interection over union
+    iou_raw = box_ops.compute_iou(truth_comp, anchors)
+    values, indexes = tf.math.top_k(tf.transpose(iou_raw, perm=[1, 0]),
+                                    k=tf.cast(k, dtype=tf.int32),
+                                    sorted=True)
+    ind_mask = tf.cast(values > 0.213, dtype=indexes.dtype)
+
+    # pad the indexs such that all values less than the thresh are -1
+    # add one, multiply the mask to zeros all the bad locations
+    # subtract 1 makeing all the bad locations 0.
+    iou_index = tf.concat([
+        tf.keras.backend.expand_dims(indexes[..., 0], axis=-1),
+        ((indexes[..., 1:] + 1) * ind_mask[..., 1:]) - 1
+    ],
+                          axis=-1)
+    iou_index = iou_index[..., :6]
+
+  return tf.cast(iou_index, dtype=tf.float32)
+
+
+def build_grided_gt(y_true, mask, size, dtype, use_tie_breaker):
+  """Converts ground truth for use in loss functions.
+
+  Args:
+    y_true: tf.Tensor[] ground truth
+      [box coords[0:4], classes_onehot[0:-1], best_fit_anchor_box]
+    mask: list of the anchor boxes choresponding to the output,
+      ex. [1, 2, 3] tells this layer to predict only the first 3
+      anchors in the total.
+    size: The dimensions of this output, for regular, it progresses
+      from 13, to 26, to 52.
+    dtype: The expected output dtype.
+    use_tie_breaker: boolean value for wether or not to use the tie_breaker.
+
+  Returns:
+    tf.Tensor[] of shape [size, size, #of_anchors, 4, 1, num_classes]
+  """
+  # unpack required components from the input ground truth
+  boxes = tf.cast(y_true['bbox'], dtype)
+  classes = tf.expand_dims(tf.cast(y_true['classes'], dtype=dtype), axis=-1)
+  anchors = tf.cast(y_true['best_anchors'], dtype)
+
+  # get the number of boxes in the ground truth boxs
+  num_boxes = tf.shape(boxes)[0]
+  # get the number of anchor boxes used for this anchor scale
+  len_masks = tf.shape(mask)[0]
+
+  # init a fixed memeory size grid for this prediction scale
+  # [size, size, # of anchors, 1 + 1 + number of anchors per scale]
+  full = tf.zeros([size, size, len_masks, 6], dtype=dtype)
+  # init a grid to use to track which locations have already
+  # been used before (for the tie breaker)
+  depth_track = tf.zeros((size, size, len_masks), dtype=tf.int32)
+
+  # rescale the x and y centers to the size of the grid [size, size]
+  x = tf.cast(boxes[..., 0] * tf.cast(size, dtype=dtype), dtype=tf.int32)
+  y = tf.cast(boxes[..., 1] * tf.cast(size, dtype=dtype), dtype=tf.int32)
+
+  # init all the tensorArrays to be used in storeing the index
+  # and the values to be used to update both depth_track and full
+  update_index = tf.TensorArray(tf.int32, size=0, dynamic_size=True)
+  update = tf.TensorArray(dtype, size=0, dynamic_size=True)
+
+  # init constants and match data types before entering loop
+  i = 0
+  anchor_id = 0
+  const = tf.cast(tf.convert_to_tensor([1.]), dtype=dtype)
+  mask = tf.cast(mask, dtype=dtype)
+  rand_update = 0.0
+
+  for box_id in range(num_boxes):
+    # If the width or height of the box is zero, skip it.
+    # After pre processing, if the box is not in the i image bounds anymore,
+    # skip it.
+    if tf.keras.backend.all(tf.math.equal(
+        boxes[box_id, 2:4], 0)) or tf.keras.backend.any(
+            tf.math.less(boxes[box_id, 0:2], 0.0)) or tf.keras.backend.any(
+                tf.math.greater_equal(boxes[box_id, 0:2], 1.0)):
+      continue
+    if use_tie_breaker:
+      for anchor_id in range(tf.shape(anchors)[-1]):
+        index = tf.math.equal(anchors[box_id, anchor_id], mask)
+        if tf.keras.backend.any(index):
+          # using the boolean index mask to determine exactly which
+          # anchor box was used
+          p = tf.cast(
+              tf.keras.backend.argmax(tf.cast(index, dtype=tf.int32)),
+              dtype=tf.int32)
+          # determine if the index was used or not
+          used = depth_track[y[box_id], x[box_id], p]
+          # defualt used upadte value
+          uid = 1
+
+          # if anchor_id is 0, this is the best matched anchor for this box
+          # with the highest IOU
+          if anchor_id == 0:
+            # write the box to the update list
+            # create random numbr to trigger a replacment if the cell
+            # is used already
+            if tf.math.equal(used, 1):
+              rand_update = tf.random.uniform([], maxval=1)
+            else:
+              rand_update = 1.0
+
+            if rand_update > 0.5:
+              # write the box to the update list
+              update_index = update_index.write(i, [y[box_id], x[box_id], p])
+              value = tf.concat([boxes[box_id], const, classes[box_id]],
+                                axis=-1)
+              update = update.write(i, value)
+
+          # if used is 2, this cell is filled with a non-optimal box
+          # if used is 0, the cell in the ground truth is not yet consumed
+          # in either case you can replace that cell with a new box, as long
+          # as it is not consumed by an optimal box with anchor_id = 0
+          elif tf.math.equal(used, 2) or tf.math.equal(used, 0):
+            uid = 2
+            # write the box to the update list
+            update_index = update_index.write(i, [y[box_id], x[box_id], p])
+            value = tf.concat([boxes[box_id], const, classes[box_id]], axis=-1)
+            update = update.write(i, value)
+
+          depth_track = tf.tensor_scatter_nd_update(
+              depth_track, [(y[box_id], x[box_id], p)], [uid])
+          i += 1
+    else:
+      index = tf.math.equal(anchors[box_id, 0], mask)
+      # if any there is an index match
+      if tf.keras.backend.any(index):
+        # find the index
+        p = tf.cast(
+            tf.keras.backend.argmax(tf.cast(index, dtype=tf.int32)),
+            dtype=tf.int32)
+        # update the list of used boxes
+        update_index = update_index.write(i, [y[box_id], x[box_id], p])
+        value = tf.concat([boxes[box_id], const, classes[box_id]], axis=-1)
+        update = update.write(i, value)
+        i += 1
+
+  # if the size of the update list is not 0, do an update, other wise,
+  # no boxes and pass an empty grid
+  if tf.math.greater(update_index.size(), 0):
+    update_index = update_index.stack()
+    update = update.stack()
+    full = tf.tensor_scatter_nd_update(full, update_index, update)
+  return full
+
+
+def build_batch_grided_gt(y_true, mask, size, dtype, use_tie_breaker):
+  """Converts ground truth for use in loss functions.
+
+  Args:
+    y_true: tf.Tensor[] ground truth
+      [batch, box coords[0:4], classes_onehot[0:-1], best_fit_anchor_box]
+    mask: list of the anchor boxes choresponding to the output,
+      ex. [1, 2, 3] tells this layer to predict only the first 3 anchors
+      in the total.
+    size: the dimensions of this output, for regular, it progresses from
+      13, to 26, to 52
+    dtype: expected output datatype
+    use_tie_breaker: boolean value for wether or not to use the tie
+      breaker
+
+  Returns:
+    tf.Tensor[] of shape [batch, size, size, #of_anchors, 4, 1, num_classes]
+  """
+  # unpack required components from the input ground truth
+  boxes = tf.cast(y_true['bbox'], dtype)
+  classes = tf.expand_dims(tf.cast(y_true['classes'], dtype=dtype), axis=-1)
+  anchors = tf.cast(y_true['best_anchors'], dtype)
+
+  # get the batch size
+  batches = tf.shape(boxes)[0]
+  # get the number of boxes in the ground truth boxs
+  num_boxes = tf.shape(boxes)[1]
+  # get the number of anchor boxes used for this anchor scale
+  len_masks = tf.shape(mask)[0]
+
+  # init a fixed memeory size grid for this prediction scale
+  # [batch, size, size, # of anchors, 1 + 1 + number of anchors per scale]
+  full = tf.zeros([batches, size, size, len_masks, 1 + 4 + 1], dtype=dtype)
+  # init a grid to use to track which locations have already
+  # been used before (for the tie breaker)
+  depth_track = tf.zeros((batches, size, size, len_masks), dtype=tf.int32)
+
+  # rescale the x and y centers to the size of the grid [size, size]
+  x = tf.cast(boxes[..., 0] * tf.cast(size, dtype=dtype), dtype=tf.int32)
+  y = tf.cast(boxes[..., 1] * tf.cast(size, dtype=dtype), dtype=tf.int32)
+
+  # init all the tensorArrays to be used in storeing the index and the values
+  # to be used to update both depth_track and full
+  update_index = tf.TensorArray(tf.int32, size=0, dynamic_size=True)
+  update = tf.TensorArray(dtype, size=0, dynamic_size=True)
+
+  # init constants and match data types before entering loop
+  i = 0
+  anchor_id = 0
+  const = tf.cast(tf.convert_to_tensor([1.]), dtype=dtype)
+  mask = tf.cast(mask, dtype=dtype)
+  rand_update = 0.0
+
+  for batch in range(batches):
+    for box_id in range(num_boxes):
+      # if the width or height of the box is zero, skip it
+      if tf.keras.backend.all(tf.math.equal(boxes[batch, box_id, 2:4], 0)):
+        continue
+      # after pre processing, if the box is not in the image bounds anymore
+      # skip the box
+      if tf.keras.backend.any(tf.math.less(
+          boxes[batch, box_id, 0:2], 0.0)) or tf.keras.backend.any(
+              tf.math.greater_equal(boxes[batch, box_id, 0:2], 1.0)):
+        continue
+      if use_tie_breaker:
+        for anchor_id in range(tf.shape(anchors)[-1]):
+          index = tf.math.equal(anchors[batch, box_id, anchor_id], mask)
+          if tf.keras.backend.any(index):
+            # using the boolean index mask to determine exactly which anchor
+            #  box was used
+            p = tf.cast(tf.keras.backend.argmax(tf.cast(index, dtype=tf.int32)),
+                        dtype=tf.int32)
+            # determine if the index was used or not
+            used = depth_track[batch, y[batch, box_id], x[batch, box_id], p]
+            # defualt used upadte value
+            uid = 1
+
+            # if anchor_id is 0, this is the best matched anchor for this box
+            # with the highest IOU
+            if anchor_id == 0:
+              # create random number to trigger a replacment if the cell
+              # is used already
+              if tf.math.equal(used, 1):
+                rand_update = tf.random.uniform([], maxval=1)
+              else:
+                rand_update = 1.0
+
+              if rand_update > 0.5:
+                # write the box to the update list
+                update_index = update_index.write(
+                    i, [batch, y[batch, box_id], x[batch, box_id], p])
+                value = tf.concat(
+                    [boxes[batch, box_id], const, classes[batch, box_id]],
+                    axis=-1)
+                update = update.write(i, value)
+
+            # if used is 2, this cell is filled with a non-optimal box
+            # if used is 0, the cell in the ground truth is not yet consumed
+            # in either case you can replace that cell with a new box, as long
+            # as it is not consumed by an optimal box with anchor_id = 0
+            elif tf.math.equal(used, 2) or tf.math.equal(used, 0):
+              uid = 2
+              # write the box to the update list
+              update_index = update_index.write(
+                  i, [batch, y[batch, box_id], x[batch, box_id], p])
+              value = ([boxes[batch, box_id], const, classes[batch, box_id]])
+              update = update.write(i, value)
+
+            # update the used index for where and how the box was placed
+            depth_track = tf.tensor_scatter_nd_update(
+                depth_track, [(batch, y[batch, box_id], x[batch, box_id], p)],
+                [uid])
+            i += 1
+      else:
+        index = tf.math.equal(anchors[batch, box_id, 0], mask)
+        if tf.keras.backend.any(index):
+          # if any there is an index match
+          p = tf.cast(
+              tf.keras.backend.argmax(tf.cast(index, dtype=tf.int32)),
+              dtype=tf.int32)
+          # write the box to the update list
+          update_index = update_index.write(
+              i, [batch, y[batch, box_id], x[batch, box_id], p])
+          value = tf.concat(
+              [boxes[batch, box_id], const, classes[batch, box_id]], axis=-1)
+          update = update.write(i, value)
+          i += 1
+
+  # if the size of the update list is not 0, do an update, other wise,
+  # no boxes and pass an empty grid
+  if tf.math.greater(update_index.size(), 0):
+    update_index = update_index.stack()
+    update = update.stack()
+    full = tf.tensor_scatter_nd_update(full, update_index, update)
+  return full
+
--- a/official/vision/beta/projects/yolo/ops/preprocess_ops_test.py
+++ b/official/vision/beta/projects/yolo/ops/preprocess_ops_test.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+
+from official.vision.beta.projects.yolo.ops import preprocess_ops
+
+
+class PreprocessOpsTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters((416, 416, 5, 300, 300), (100, 200, 6, 50, 50))
+  def test_resize_crop_filter(self, default_width, default_height, num_boxes,
+                              target_width, target_height):
+    image = tf.convert_to_tensor(
+        np.random.rand(default_width, default_height, 3))
+    boxes = tf.convert_to_tensor(np.random.rand(num_boxes, 4))
+    resized_image, resized_boxes = preprocess_ops.resize_crop_filter(
+        image, boxes, default_width, default_height, target_width,
+        target_height)
+    resized_image_shape = tf.shape(resized_image)
+    resized_boxes_shape = tf.shape(resized_boxes)
+    self.assertAllEqual([default_height, default_width, 3],
+                        resized_image_shape.numpy())
+    self.assertAllEqual([num_boxes, 4], resized_boxes_shape.numpy())
+
+  @parameterized.parameters((7, 7., 5.), (25, 35., 45.))
+  def test_translate_boxes(self, num_boxes, translate_x, translate_y):
+    boxes = tf.convert_to_tensor(np.random.rand(num_boxes, 4))
+    translated_boxes = preprocess_ops.translate_boxes(
+        boxes, translate_x, translate_y)
+    translated_boxes_shape = tf.shape(translated_boxes)
+    self.assertAllEqual([num_boxes, 4], translated_boxes_shape.numpy())
+
+  @parameterized.parameters((100, 200, 75., 25.), (400, 600, 25., 75.))
+  def test_translate_image(self, image_height, image_width, translate_x,
+                           translate_y):
+    image = tf.convert_to_tensor(np.random.rand(image_height, image_width, 4))
+    translated_image = preprocess_ops.translate_image(
+        image, translate_x, translate_y)
+    translated_image_shape = tf.shape(translated_image)
+    self.assertAllEqual([image_height, image_width, 4],
+                        translated_image_shape.numpy())
+
+  @parameterized.parameters(([1, 2], 20, 0), ([13, 2, 4], 15, 0))
+  def test_pad_max_instances(self, input_shape, instances, pad_axis):
+    expected_output_shape = input_shape
+    expected_output_shape[pad_axis] = instances
+    output = preprocess_ops.pad_max_instances(
+        np.ones(input_shape), instances, pad_axis=pad_axis)
+    self.assertAllEqual(expected_output_shape, tf.shape(output).numpy())
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/vision/beta/serving/detection.py
+++ b/official/vision/beta/serving/detection.py
@@ -70,8 +70,6 @@ class DetectionModule(export_base.ExportModule):
        aug_scale_min=1.0,
        aug_scale_max=1.0)

-    image_shape = image_info[1, :]  # Shape of original image.
-
    input_anchor = anchor.build_anchor_generator(
        min_level=model_params.min_level,
        max_level=model_params.max_level,
@@ -81,7 +79,7 @@ class DetectionModule(export_base.ExportModule):
    anchor_boxes = input_anchor(image_size=(self._input_image_size[0],
                                            self._input_image_size[1]))

-    return image, anchor_boxes, image_shape
+    return image, anchor_boxes, image_info

  def _run_inference_on_image_tensors(self, images: tf.Tensor):
    """Cast image to float and run inference.
@@ -111,20 +109,22 @@ class DetectionModule(export_base.ExportModule):
            dtype=tf.float32)
        anchor_shapes.append((str(level), anchor_level_spec))

-      image_shape_spec = tf.TensorSpec(shape=[2,], dtype=tf.float32)
+      image_info_spec = tf.TensorSpec(shape=[4, 2], dtype=tf.float32)

-      images, anchor_boxes, image_shape = tf.nest.map_structure(
+      images, anchor_boxes, image_info = tf.nest.map_structure(
          tf.identity,
          tf.map_fn(
              self._build_inputs,
              elems=images,
              fn_output_signature=(images_spec, dict(anchor_shapes),
-                                   image_shape_spec),
+                                   image_info_spec),
              parallel_iterations=32))

+    input_image_shape = image_info[:, 1, :]
+
    detections = self._model.call(
        images=images,
-        image_shape=image_shape,
+        image_shape=input_image_shape,
        anchor_boxes=anchor_boxes,
        training=False)

@@ -132,7 +132,8 @@ class DetectionModule(export_base.ExportModule):
        'detection_boxes': detections['detection_boxes'],
        'detection_scores': detections['detection_scores'],
        'detection_classes': detections['detection_classes'],
-        'num_detections': detections['num_detections']
+        'num_detections': detections['num_detections'],
+        'image_info': image_info
    }
    if 'detection_masks' in detections.keys():
      final_outputs['detection_masks'] = detections['detection_masks']

--- a/official/vision/beta/serving/detection_test.py
+++ b/official/vision/beta/serving/detection_test.py
@@ -125,10 +125,11 @@ class DetectionExportTest(tf.test.TestCase, parameterized.TestCase):

    images = self._get_dummy_input(input_type, batch_size, image_size)

-    processed_images, anchor_boxes, image_shape = module._build_inputs(
+    processed_images, anchor_boxes, image_info = module._build_inputs(
        tf.zeros((224, 224, 3), dtype=tf.uint8))
-    processed_images = tf.expand_dims(processed_images, 0)
+    image_shape = image_info[1, :]
    image_shape = tf.expand_dims(image_shape, 0)
+    processed_images = tf.expand_dims(processed_images, 0)
    for l, l_boxes in anchor_boxes.items():
      anchor_boxes[l] = tf.expand_dims(l_boxes, 0)


--- a/official/vision/beta/serving/export_base.py
+++ b/official/vision/beta/serving/export_base.py
@@ -16,33 +16,30 @@
 """Base class for model export."""

 import abc
-import tensorflow as tf
-
-
-def _decode_image(encoded_image_bytes):
-  image_tensor = tf.image.decode_image(encoded_image_bytes, channels=3)
-  image_tensor.set_shape((None, None, 3))
-  return image_tensor
+from typing import Optional, Sequence, Mapping

+import tensorflow as tf

-def _decode_tf_example(tf_example_string_tensor):
-  keys_to_features = {'image/encoded': tf.io.FixedLenFeature((), tf.string)}
-  parsed_tensors = tf.io.parse_single_example(
-      serialized=tf_example_string_tensor, features=keys_to_features)
-  image_tensor = _decode_image(parsed_tensors['image/encoded'])
-  return image_tensor
+from official.modeling.hyperparams import config_definitions as cfg


 class ExportModule(tf.Module, metaclass=abc.ABCMeta):
  """Base Export Module."""

-  def __init__(self, params, batch_size, input_image_size, model=None):
+  def __init__(self,
+               params: cfg.ExperimentConfig,
+               batch_size: int,
+               input_image_size: Sequence[int],
+               num_channels: int = 3,
+               model: Optional[tf.keras.Model] = None):
    """Initializes a module for export.

    Args:
      params: Experiment params.
-      batch_size: Int or None.
-      input_image_size: List or Tuple of height, width of the input image.
+      batch_size: The batch size of the model input. Can be `int` or None.
+      input_image_size: List or Tuple of size of the input image. For 2D image,
+        it is [height, width].
+      num_channels: The number of the image channels.
      model: A tf.keras.Model instance to be exported.
    """

@@ -50,48 +47,98 @@ class ExportModule(tf.Module, metaclass=abc.ABCMeta):
    self._params = params
    self._batch_size = batch_size
    self._input_image_size = input_image_size
+    self._num_channels = num_channels
    self._model = model

+  def _decode_image(self, encoded_image_bytes: str) -> tf.Tensor:
+    """Decodes an image bytes to an image tensor.
+
+    Use `tf.image.decode_image` to decode an image if input is expected to be 2D
+    image; otherwise use `tf.io.decode_raw` to convert the raw bytes to tensor
+    and reshape it to desire shape.
+
+    Args:
+      encoded_image_bytes: An encoded image string to be decoded.
+
+    Returns:
+      A decoded image tensor.
+    """
+    if len(self._input_image_size) == 2:
+      # Decode an image if 2D input is expected.
+      image_tensor = tf.image.decode_image(
+          encoded_image_bytes, channels=self._num_channels)
+      image_tensor.set_shape((None, None, self._num_channels))
+    else:
+      # Convert raw bytes into a tensor and reshape it, if not 2D input.
+      image_tensor = tf.io.decode_raw(encoded_image_bytes, out_type=tf.uint8)
+      image_tensor = tf.reshape(image_tensor,
+                                self._input_image_size + [self._num_channels])
+    return image_tensor
+
+  def _decode_tf_example(
+      self, tf_example_string_tensor: tf.train.Example) -> tf.Tensor:
+    """Decodes a TF Example to an image tensor.
+
+    Args:
+      tf_example_string_tensor: A tf.train.Example of encoded image and other
+        information.
+
+    Returns:
+      A decoded image tensor.
+    """
+    keys_to_features = {'image/encoded': tf.io.FixedLenFeature((), tf.string)}
+    parsed_tensors = tf.io.parse_single_example(
+        serialized=tf_example_string_tensor, features=keys_to_features)
+    image_tensor = self._decode_image(parsed_tensors['image/encoded'])
+    return image_tensor
+
  @abc.abstractmethod
-  def build_model(self):
+  def build_model(self, **kwargs):
    """Builds model and sets self._model."""

  @abc.abstractmethod
-  def _run_inference_on_image_tensors(self, images):
+  def _run_inference_on_image_tensors(
+      self, images: tf.Tensor) -> Mapping[str, tf.Tensor]:
    """Runs inference on images."""

  @tf.function
-  def inference_from_image_tensors(self, input_tensor):
+  def inference_from_image_tensors(
+      self, input_tensor: tf.Tensor) -> Mapping[str, tf.Tensor]:
    return self._run_inference_on_image_tensors(input_tensor)

  @tf.function
-  def inference_from_image_bytes(self, input_tensor):
+  def inference_from_image_bytes(self, input_tensor: str):
    with tf.device('cpu:0'):
      images = tf.nest.map_structure(
          tf.identity,
          tf.map_fn(
-              _decode_image,
+              self._decode_image,
              elems=input_tensor,
              fn_output_signature=tf.TensorSpec(
-                  shape=[None, None, 3], dtype=tf.uint8),
+                  shape=[None] * len(self._input_image_size) +
+                  [self._num_channels],
+                  dtype=tf.uint8),
              parallel_iterations=32))
      images = tf.stack(images)
    return self._run_inference_on_image_tensors(images)

  @tf.function
-  def inference_from_tf_example(self, input_tensor):
+  def inference_from_tf_example(
+      self, input_tensor: tf.train.Example) -> Mapping[str, tf.Tensor]:
    with tf.device('cpu:0'):
      images = tf.nest.map_structure(
          tf.identity,
          tf.map_fn(
-              _decode_tf_example,
+              self._decode_tf_example,
              elems=input_tensor,
              # Height/width of the shape of input images is unspecified (None)
              # at the time of decoding the example, but the shape will
              # be adjusted to conform to the input layer of the model,
              # by _run_inference_on_image_tensors() below.
              fn_output_signature=tf.TensorSpec(
-                  shape=[None, None, 3], dtype=tf.uint8),
+                  shape=[None] * len(self._input_image_size) +
+                  [self._num_channels],
+                  dtype=tf.uint8),
              dtype=tf.uint8,
              parallel_iterations=32))
      images = tf.stack(images)

--- a/official/vision/beta/serving/export_saved_model_lib.py
+++ b/official/vision/beta/serving/export_saved_model_lib.py
@@ -17,19 +17,30 @@ r"""Vision models export utility function for serving/inference."""

 import os

+from typing import Optional, List
+
 import tensorflow as tf

+from official.core import config_definitions as cfg
 from official.core import train_utils
 from official.vision.beta import configs
 from official.vision.beta.serving import detection
+from official.vision.beta.serving import export_base
 from official.vision.beta.serving import image_classification
 from official.vision.beta.serving import semantic_segmentation


-def export_inference_graph(input_type, batch_size, input_image_size, params,
-                           checkpoint_path, export_dir,
-                           export_checkpoint_subdir=None,
-                           export_saved_model_subdir=None):
+def export_inference_graph(
+    input_type: str,
+    batch_size: Optional[int],
+    input_image_size: List[int],
+    params: cfg.ExperimentConfig,
+    checkpoint_path: str,
+    export_dir: str,
+    num_channels: Optional[int] = 3,
+    export_module: Optional[export_base.ExportModule] = None,
+    export_checkpoint_subdir: Optional[str] = None,
+    export_saved_model_subdir: Optional[str] = None):
  """Exports inference graph for the model specified in the exp config.

  Saved model is stored at export_dir/saved_model, checkpoint is saved
@@ -42,6 +53,10 @@ def export_inference_graph(input_type, batch_size, input_image_size, params,
    params: Experiment params.
    checkpoint_path: Trained checkpoint path or directory.
    export_dir: Export directory path.
+    num_channels: The number of input image channels.
+    export_module: Optional export module to be used instead of using params
+      to create one. If None, the params will be used to create an export
+      module.
    export_checkpoint_subdir: Optional subdirectory under export_dir
      to store checkpoint.
    export_saved_model_subdir: Optional subdirectory under export_dir
@@ -60,21 +75,31 @@ def export_inference_graph(input_type, batch_size, input_image_size, params,
  else:
    output_saved_model_directory = export_dir

-  if isinstance(params.task,
-                configs.image_classification.ImageClassificationTask):
-    export_module = image_classification.ClassificationModule(
-        params=params, batch_size=batch_size, input_image_size=input_image_size)
-  elif isinstance(params.task, configs.retinanet.RetinaNetTask) or isinstance(
-      params.task, configs.maskrcnn.MaskRCNNTask):
-    export_module = detection.DetectionModule(
-        params=params, batch_size=batch_size, input_image_size=input_image_size)
-  elif isinstance(params.task,
-                  configs.semantic_segmentation.SemanticSegmentationTask):
-    export_module = semantic_segmentation.SegmentationModule(
-        params=params, batch_size=batch_size, input_image_size=input_image_size)
-  else:
-    raise ValueError('Export module not implemented for {} task.'.format(
-        type(params.task)))
+  if not export_module:
+    if isinstance(params.task,
+                  configs.image_classification.ImageClassificationTask):
+      export_module = image_classification.ClassificationModule(
+          params=params,
+          batch_size=batch_size,
+          input_image_size=input_image_size,
+          num_channels=num_channels)
+    elif isinstance(params.task, configs.retinanet.RetinaNetTask) or isinstance(
+        params.task, configs.maskrcnn.MaskRCNNTask):
+      export_module = detection.DetectionModule(
+          params=params,
+          batch_size=batch_size,
+          input_image_size=input_image_size,
+          num_channels=num_channels)
+    elif isinstance(params.task,
+                    configs.semantic_segmentation.SemanticSegmentationTask):
+      export_module = semantic_segmentation.SegmentationModule(
+          params=params,
+          batch_size=batch_size,
+          input_image_size=input_image_size,
+          num_channels=num_channels)
+    else:
+      raise ValueError('Export module not implemented for {} task.'.format(
+          type(params.task)))

  model = export_module.build_model()

@@ -87,7 +112,7 @@ def export_inference_graph(input_type, batch_size, input_image_size, params,

  if input_type == 'image_tensor':
    input_signature = tf.TensorSpec(
-        shape=[batch_size, None, None, 3],
+        shape=[batch_size] + [None] * len(input_image_size) + [num_channels],
        dtype=tf.uint8)
    signatures = {
        'serving_default':

--- a/official/vision/beta/serving/export_tfhub.py
+++ b/official/vision/beta/serving/export_tfhub.py
+# Lint as: python3
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""A script to export the image classification as a TF-Hub SavedModel."""
+
+# Import libraries
+from absl import app
+from absl import flags
+
+import tensorflow as tf
+
+from official.common import registry_imports  # pylint: disable=unused-import
+from official.core import exp_factory
+from official.modeling import hyperparams
+from official.vision.beta.serving import image_classification
+
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string(
+    'experiment', None, 'experiment type, e.g. resnet_imagenet')
+flags.DEFINE_string(
+    'checkpoint_path', None, 'Checkpoint path.')
+flags.DEFINE_string(
+    'export_path', None, 'The export directory.')
+flags.DEFINE_multi_string(
+    'config_file',
+    None,
+    'A YAML/JSON files which specifies overrides. The override order '
+    'follows the order of args. Note that each file '
+    'can be used as an override template to override the default parameters '
+    'specified in Python. If the same parameter is specified in both '
+    '`--config_file` and `--params_override`, `config_file` will be used '
+    'first, followed by params_override.')
+flags.DEFINE_string(
+    'params_override', '',
+    'The JSON/YAML file or string which specifies the parameter to be overriden'
+    ' on top of `config_file` template.')
+flags.DEFINE_integer(
+    'batch_size', None, 'The batch size.')
+flags.DEFINE_string(
+    'input_image_size',
+    '224,224',
+    'The comma-separated string of two integers representing the height,width '
+    'of the input to the model.')
+flags.DEFINE_boolean(
+    'skip_logits_layer',
+    False,
+    'Whether to skip the prediction layer and only output the feature vector.')
+
+
+def export_model_to_tfhub(params,
+                          batch_size,
+                          input_image_size,
+                          skip_logits_layer,
+                          checkpoint_path,
+                          export_path):
+  """Export an image classification model to TF-Hub."""
+  export_module = image_classification.ClassificationModule(
+      params=params, batch_size=batch_size, input_image_size=input_image_size)
+
+  model = export_module.build_model(skip_logits_layer=skip_logits_layer)
+  checkpoint = tf.train.Checkpoint(model=model)
+  checkpoint.restore(checkpoint_path).assert_existing_objects_matched()
+  model.save(export_path, include_optimizer=False, save_format='tf')
+
+
+def main(_):
+  params = exp_factory.get_exp_config(FLAGS.experiment)
+  for config_file in FLAGS.config_file or []:
+    params = hyperparams.override_params_dict(
+        params, config_file, is_strict=True)
+  if FLAGS.params_override:
+    params = hyperparams.override_params_dict(
+        params, FLAGS.params_override, is_strict=True)
+  params.validate()
+  params.lock()
+
+  export_model_to_tfhub(
+      params=params,
+      batch_size=FLAGS.batch_size,
+      input_image_size=[int(x) for x in FLAGS.input_image_size.split(',')],
+      skip_logits_layer=FLAGS.skip_logits_layer,
+      checkpoint_path=FLAGS.checkpoint_path,
+      export_path=FLAGS.export_path)
+
+
+if __name__ == '__main__':
+  app.run(main)
--- a/official/vision/beta/serving/image_classification.py
+++ b/official/vision/beta/serving/image_classification.py
@@ -29,14 +29,15 @@ STDDEV_RGB = (0.229 * 255, 0.224 * 255, 0.225 * 255)
 class ClassificationModule(export_base.ExportModule):
  """classification Module."""

-  def build_model(self):
+  def build_model(self, skip_logits_layer=False):
    input_specs = tf.keras.layers.InputSpec(
        shape=[self._batch_size] + self._input_image_size + [3])

    self._model = factory.build_classification_model(
        input_specs=input_specs,
        model_config=self._params.task.model,
-        l2_regularizer=None)
+        l2_regularizer=None,
+        skip_logits_layer=skip_logits_layer)

    return self._model


--- a/official/vision/beta/tasks/image_classification.py
+++ b/official/vision/beta/tasks/image_classification.py
@@ -16,13 +16,14 @@
 """Image classification task definition."""
 from absl import logging
 import tensorflow as tf
+
 from official.common import dataset_fn
 from official.core import base_task
-from official.core import input_reader
 from official.core import task_factory
 from official.modeling import tf_utils
 from official.vision.beta.configs import image_classification as exp_cfg
 from official.vision.beta.dataloaders import classification_input
+from official.vision.beta.dataloaders import input_reader_factory
 from official.vision.beta.dataloaders import tfds_classification_decoders
 from official.vision.beta.modeling import factory

@@ -93,9 +94,10 @@ class ImageClassificationTask(base_task.Task):
        output_size=input_size[:2],
        num_classes=num_classes,
        aug_policy=params.aug_policy,
+        randaug_magnitude=params.randaug_magnitude,
        dtype=params.dtype)

-    reader = input_reader.InputReader(
+    reader = input_reader_factory.input_reader_generator(
        params,
        dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
        decoder_fn=decoder.decode,

--- a/official/vision/beta/tasks/maskrcnn.py
+++ b/official/vision/beta/tasks/maskrcnn.py
@@ -19,9 +19,9 @@ from absl import logging
 import tensorflow as tf
 from official.common import dataset_fn
 from official.core import base_task
-from official.core import input_reader
 from official.core import task_factory
 from official.vision.beta.configs import maskrcnn as exp_cfg
+from official.vision.beta.dataloaders import input_reader_factory
 from official.vision.beta.dataloaders import maskrcnn_input
 from official.vision.beta.dataloaders import tf_example_decoder
 from official.vision.beta.dataloaders import tf_example_label_map_decoder
@@ -143,7 +143,7 @@ class MaskRCNNTask(base_task.Task):
        include_mask=self._task_config.model.include_mask,
        mask_crop_size=params.parser.mask_crop_size)

-    reader = input_reader.InputReader(
+    reader = input_reader_factory.input_reader_generator(
        params,
        dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
        decoder_fn=decoder.decode,

--- a/official/vision/beta/tasks/retinanet.py
+++ b/official/vision/beta/tasks/retinanet.py
@@ -19,10 +19,10 @@ from absl import logging
 import tensorflow as tf
 from official.common import dataset_fn
 from official.core import base_task
-from official.core import input_reader
 from official.core import task_factory
 from official.vision import keras_cv
 from official.vision.beta.configs import retinanet as exp_cfg
+from official.vision.beta.dataloaders import input_reader_factory
 from official.vision.beta.dataloaders import retinanet_input
 from official.vision.beta.dataloaders import tf_example_decoder
 from official.vision.beta.dataloaders import tfds_detection_decoders
@@ -122,7 +122,7 @@ class RetinaNetTask(base_task.Task):
        skip_crowd_during_training=params.parser.skip_crowd_during_training,
        max_num_instances=params.parser.max_num_instances)

-    reader = input_reader.InputReader(
+    reader = input_reader_factory.input_reader_generator(
        params,
        dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
        decoder_fn=decoder.decode,

--- a/official/vision/beta/tasks/semantic_segmentation.py
+++ b/official/vision/beta/tasks/semantic_segmentation.py
@@ -19,9 +19,9 @@ from absl import logging
 import tensorflow as tf
 from official.common import dataset_fn
 from official.core import base_task
-from official.core import input_reader
 from official.core import task_factory
 from official.vision.beta.configs import semantic_segmentation as exp_cfg
+from official.vision.beta.dataloaders import input_reader_factory
 from official.vision.beta.dataloaders import segmentation_input
 from official.vision.beta.dataloaders import tfds_segmentation_decoders
 from official.vision.beta.evaluation import segmentation_metrics
@@ -104,7 +104,7 @@ class SemanticSegmentationTask(base_task.Task):
        aug_rand_hflip=params.aug_rand_hflip,
        dtype=params.dtype)

-    reader = input_reader.InputReader(
+    reader = input_reader_factory.input_reader_generator(
        params,
        dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
        decoder_fn=decoder.decode,
@@ -143,15 +143,15 @@ class SemanticSegmentationTask(base_task.Task):
  def build_metrics(self, training=True):
    """Gets streaming metrics for training/validation."""
    metrics = []
-    if training:
+    if training and self.task_config.evaluation.report_train_mean_iou:
      metrics.append(segmentation_metrics.MeanIoU(
          name='mean_iou',
          num_classes=self.task_config.model.num_classes,
          rescale_predictions=False,
          dtype=tf.float32))
    else:
-      self.miou_metric = segmentation_metrics.MeanIoU(
-          name='val_mean_iou',
+      self.iou_metric = segmentation_metrics.PerClassIoU(
+          name='per_class_iou',
          num_classes=self.task_config.model.num_classes,
          rescale_predictions=not self.task_config.validation_data
          .resize_eval_groundtruth,
@@ -243,7 +243,7 @@ class SemanticSegmentationTask(base_task.Task):
      loss = 0

    logs = {self.loss: loss}
-    logs.update({self.miou_metric.name: (labels, outputs)})
+    logs.update({self.iou_metric.name: (labels, outputs)})

    if metrics:
      self.process_metrics(metrics, labels, outputs)
@@ -257,11 +257,19 @@ class SemanticSegmentationTask(base_task.Task):

  def aggregate_logs(self, state=None, step_outputs=None):
    if state is None:
-      self.miou_metric.reset_states()
-      state = self.miou_metric
-    self.miou_metric.update_state(step_outputs[self.miou_metric.name][0],
-                                  step_outputs[self.miou_metric.name][1])
+      self.iou_metric.reset_states()
+      state = self.iou_metric
+    self.iou_metric.update_state(step_outputs[self.iou_metric.name][0],
+                                 step_outputs[self.iou_metric.name][1])
    return state

  def reduce_aggregated_logs(self, aggregated_logs):
-    return {self.miou_metric.name: self.miou_metric.result().numpy()}
+    result = {}
+    ious = self.iou_metric.result()
+    # TODO(arashwan): support loading class name from a label map file.
+    if self.task_config.evaluation.report_per_class_iou:
+      for i, value in enumerate(ious.numpy()):
+        result.update({'iou/{}'.format(i): value})
+    # Computes mean IoU
+    result.update({'mean_iou': tf.reduce_mean(ious).numpy()})
+    return result
--- a/official/vision/beta/tasks/video_classification.py
+++ b/official/vision/beta/tasks/video_classification.py
@@ -17,10 +17,10 @@
 from absl import logging
 import tensorflow as tf
 from official.core import base_task
-from official.core import input_reader
 from official.core import task_factory
 from official.modeling import tf_utils
 from official.vision.beta.configs import video_classification as exp_cfg
+from official.vision.beta.dataloaders import input_reader_factory
 from official.vision.beta.dataloaders import video_input
 from official.vision.beta.modeling import factory_3d

@@ -74,7 +74,7 @@ class VideoClassificationTask(base_task.Task):
    parser = video_input.Parser(input_params=params)
    postprocess_fn = video_input.PostBatchProcessor(params)

-    reader = input_reader.InputReader(
+    reader = input_reader_factory.input_reader_generator(
        params,
        dataset_fn=self._get_dataset_fn(params),
        decoder_fn=self._get_decoder_fn(params),

--- a/research/delf/delf/python/normalization_layers/__init__.py
+++ b/research/delf/delf/python/normalization_layers/__init__.py
+# Copyright 2021 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
\ No newline at end of file
--- a/research/delf/delf/python/normalization_layers/normalization.py
+++ b/research/delf/delf/python/normalization_layers/normalization.py
+# Copyright 2021 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Normalization layer definitions."""
+
+import tensorflow as tf
+
+
+class L2Normalization(tf.keras.layers.Layer):
+  """Normalization layer using L2 norm."""
+
+  def __init__(self):
+    """Initialization of the L2Normalization layer."""
+    super(L2Normalization, self).__init__()
+    # A lower bound value for the norm.
+    self.eps = 1e-6
+
+  def call(self, x, axis=1):
+    """Invokes the L2Normalization instance.
+
+    Args:
+      x: A Tensor.
+      axis: Dimension along which to normalize. A scalar or a vector of
+        integers.
+
+    Returns:
+      norm: A Tensor with the same shape as `x`.
+    """
+    return tf.nn.l2_normalize(x, axis, epsilon=self.eps)
--- a/official/utils/docs/build_docs_test.py
+++ b/official/utils/docs/build_docs_test.py
-# Lint as: python3
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2021 The TensorFlow Authors All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,40 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for official.tools.build_docs."""
-
-import os
-import shutil
+"""Tests for normalization layers."""

 import tensorflow as tf

-from official.utils.docs import build_docs
-
-
-class BuildDocsTest(tf.test.TestCase):
+from delf.python.normalization_layers import normalization

-  def setUp(self):
-    super(BuildDocsTest, self).setUp()
-    self.workdir = self.get_temp_dir()
-    if os.path.exists(self.workdir):
-      shutil.rmtree(self.workdir)
-    os.makedirs(self.workdir)

-  def test_api_gen(self):
-    build_docs.gen_api_docs(
-        code_url_prefix="http://official/nlp/modeling/",
-        site_path="tf_nlp_modeling/api_docs/python",
-        output_dir=self.workdir,
-        gen_report=False,
-        project_short_name="tf_nlp_modeling",
-        project_full_name="TensorFlow Modeling - NLP Library",
-        search_hints=True)
+class NormalizationsTest(tf.test.TestCase):

-    # Check that the "defined in" section is working
-    with open(os.path.join(self.workdir, "tf_nlp_modeling.md")) as f:
-      content = f.read()
-    self.assertIn("__init__.py", content)
+  def testL2Normalization(self):
+    x = tf.constant([-4.0, 0.0, 4.0])
+    layer = normalization.L2Normalization()
+    # Run tested function.
+    result = layer(x, axis=0)
+    # Define expected result.
+    exp_output = [-0.70710677, 0.0, 0.70710677]
+    # Compare actual and expected.
+    self.assertAllClose(exp_output, result)


-if __name__ == "__main__":
+if __name__ == '__main__':
  tf.test.main()
--- a/research/delf/delf/python/pooling_layers/__init__.py
+++ b/research/delf/delf/python/pooling_layers/__init__.py
+# Copyright 2021 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
\ No newline at end of file
--- a/research/delf/delf/python/pooling_layers/pooling.py
+++ b/research/delf/delf/python/pooling_layers/pooling.py
+# Copyright 2021 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Pooling layers definitions."""
+
+import tensorflow as tf
+
+
+class MAC(tf.keras.layers.Layer):
+  """Global max pooling (MAC) layer.
+
+   Maximum Activations of Convolutions (MAC) is simply constructed by
+   max-pooling over all dimensions per feature map. See
+   https://arxiv.org/abs/1511.05879 for a reference.
+  """
+
+  def call(self, x, axis=None):
+    """Invokes the MAC pooling instance.
+
+    Args:
+      x: [B, H, W, D] A float32 Tensor.
+      axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced.
+
+    Returns:
+      output: [B, D] A float32 Tensor.
+    """
+    if axis is None:
+      axis = [1, 2]
+    return mac(x, axis=axis)
+
+
+class SPoC(tf.keras.layers.Layer):
+  """Average pooling (SPoC) layer.
+
+  Sum-pooled convolutional features (SPoC) is based on the sum pooling of the
+  deep features. See https://arxiv.org/pdf/1510.07493.pdf for a reference.
+  """
+
+  def call(self, x, axis=None):
+    """Invokes the SPoC instance.
+
+    Args:
+      x: [B, H, W, D] A float32 Tensor.
+      axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced.
+
+    Returns:
+      output: [B, D] A float32 Tensor.
+    """
+    if axis is None:
+      axis = [1, 2]
+    return spoc(x, axis)
+
+
+class GeM(tf.keras.layers.Layer):
+  """Generalized mean pooling (GeM) layer.
+
+  Generalized Mean Pooling (GeM) computes the generalized mean of each
+  channel in a tensor. See https://arxiv.org/abs/1711.02512 for a reference.
+  """
+
+  def __init__(self, power=3.):
+    """Initialization of the generalized mean pooling (GeM) layer.
+
+    Args:
+      power:  Float power > 0 is an inverse exponent parameter, used during the
+        generalized mean pooling computation. Setting this exponent as power > 1
+        increases the contrast of the pooled feature map and focuses on the
+        salient features of the image. GeM is a generalization of the average
+        pooling commonly used in classification networks (power = 1) and of
+        spatial max-pooling layer (power = inf).
+    """
+    super(GeM, self).__init__()
+    self.power = power
+    self.eps = 1e-6
+
+  def call(self, x, axis=None):
+    """Invokes the GeM instance.
+
+    Args:
+      x: [B, H, W, D] A float32 Tensor.
+      axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced.
+
+    Returns:
+      output: [B, D] A float32 Tensor.
+    """
+    if axis is None:
+      axis = [1, 2]
+    return gem(x, power=self.power, eps=self.eps, axis=axis)
+
+
+def mac(x, axis=None):
+  """Performs global max pooling (MAC).
+
+  Args:
+    x: [B, H, W, D] A float32 Tensor.
+    axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced.
+
+  Returns:
+    output: [B, D] A float32 Tensor.
+  """
+  if axis is None:
+    axis = [1, 2]
+  return tf.reduce_max(x, axis=axis, keepdims=False)
+
+
+def spoc(x, axis=None):
+  """Performs average pooling (SPoC).
+
+  Args:
+    x: [B, H, W, D] A float32 Tensor.
+    axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced.
+
+  Returns:
+    output: [B, D] A float32 Tensor.
+  """
+  if axis is None:
+    axis = [1, 2]
+  return tf.reduce_mean(x, axis=axis, keepdims=False)
+
+
+def gem(x, axis=None, power=3., eps=1e-6):
+  """Performs generalized mean pooling (GeM).
+
+  Args:
+    x: [B, H, W, D] A float32 Tensor.
+    axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced.
+    power: Float, power > 0 is an inverse exponent parameter (GeM power).
+    eps: Float, parameter for numerical stability.
+
+  Returns:
+    output: [B, D] A float32 Tensor.
+  """
+  if axis is None:
+    axis = [1, 2]
+  tmp = tf.pow(tf.maximum(x, eps), power)
+  out = tf.pow(tf.reduce_mean(tmp, axis=axis, keepdims=False), 1. / power)
+  return out