Merge branch 'panoptic-segmentation' into panoptic-segmentation

ca552843 · Srihari Humbarwadi · GitHub · 7e2f7a35 · 6b90e134 · ca552843
Unverified Commit ca552843 authored Sep 16, 2021 by Srihari Humbarwadi Committed by GitHub Sep 16, 2021
20 changed files
--- a/official/vision/beta/projects/yolo/modeling/yolo_model.py
+++ b/official/vision/beta/projects/yolo/modeling/yolo_model.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Yolo models."""
+import tensorflow as tf
+# static base Yolo Models that do not require configuration
+# similar to a backbone model id.
+# this is done greatly simplify the model config
+# the structure is as follows. model version, {v3, v4, v#, ... etc}
+# the model config type {regular, tiny, small, large, ... etc}
+YOLO_MODELS = {
+    "v4":
+        dict(
+            regular=dict(
+                embed_spp=False,
+                use_fpn=True,
+                max_level_process_len=None,
+                path_process_len=6),
+            tiny=dict(
+                embed_spp=False,
+                use_fpn=False,
+                max_level_process_len=2,
+                path_process_len=1),
+            csp=dict(
+                embed_spp=False,
+                use_fpn=True,
+                max_level_process_len=None,
+                csp_stack=5,
+                fpn_depth=5,
+                path_process_len=6),
+            csp_large=dict(
+                embed_spp=False,
+                use_fpn=True,
+                max_level_process_len=None,
+                csp_stack=7,
+                fpn_depth=7,
+                path_process_len=8,
+                fpn_filter_scale=2),
+        ),
+    "v3":
+        dict(
+            regular=dict(
+                embed_spp=False,
+                use_fpn=False,
+                max_level_process_len=None,
+                path_process_len=6),
+            tiny=dict(
+                embed_spp=False,
+                use_fpn=False,
+                max_level_process_len=2,
+                path_process_len=1),
+            spp=dict(
+                embed_spp=True,
+                use_fpn=False,
+                max_level_process_len=2,
+                path_process_len=1),
+        ),
+}
+class Yolo(tf.keras.Model):
+  """The YOLO model class."""
+  def __init__(self,
+               backbone=None,
+               decoder=None,
+               head=None,
+               detection_generator=None,
+               **kwargs):
+    """Detection initialization function.
+    Args:
+      backbone: `tf.keras.Model` a backbone network.
+      decoder: `tf.keras.Model` a decoder network.
+      head: `RetinaNetHead`, the RetinaNet head.
+      detection_generator: the detection generator.
+      **kwargs: keyword arguments to be passed.
+    """
+    super(Yolo, self).__init__(**kwargs)
+    self._config_dict = {
+        "backbone": backbone,
+        "decoder": decoder,
+        "head": head,
+        "filter": detection_generator
+    }
+    # model components
+    self._backbone = backbone
+    self._decoder = decoder
+    self._head = head
+    self._filter = detection_generator
+    return
+  def call(self, inputs, training=False):
+    maps = self._backbone(inputs)
+    decoded_maps = self._decoder(maps)
+    raw_predictions = self._head(decoded_maps)
+    if training:
+      return {"raw_output": raw_predictions}
+    else:
+      # Post-processing.
+      predictions = self._filter(raw_predictions)
+      predictions.update({"raw_output": raw_predictions})
+      return predictions
+  @property
+  def backbone(self):
+    return self._backbone
+  @property
+  def decoder(self):
+    return self._decoder
+  @property
+  def head(self):
+    return self._head
+  @property
+  def filter(self):
+    return self._filter
+  def get_config(self):
+    return self._config_dict
+  @classmethod
+  def from_config(cls, config):
+    return cls(**config)
+  def get_weight_groups(self, train_vars):
+    """Sort the list of trainable variables into groups for optimization.
+    Args:
+      train_vars: a list of tf.Variables that need to get sorted into their
+        respective groups.
+    Returns:
+      weights: a list of tf.Variables for the weights.
+      bias: a list of tf.Variables for the bias.
+      other: a list of tf.Variables for the other operations.
+    """
+    bias = []
+    weights = []
+    other = []
+    for var in train_vars:
+      if "bias" in var.name:
+        bias.append(var)
+      elif "beta" in var.name:
+        bias.append(var)
+      elif "kernel" in var.name or "weight" in var.name:
+        weights.append(var)
+      else:
+        other.append(var)
+    return weights, bias, other
--- a/official/vision/beta/projects/yolo/ops/box_ops.py
+++ b/official/vision/beta/projects/yolo/ops/box_ops.py
@@ -12,28 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Bounding box utils."""
+"""Yolo box ops."""
 import math
 import tensorflow as tf
+from official.vision.beta.projects.yolo.ops import math_ops
 def yxyx_to_xcycwh(box: tf.Tensor):
-  """Converts boxes from ymin, xmin, ymax, xmax.
+  """Converts boxes from yxyx to x_center, y_center, width, height.
-  to x_center, y_center, width, height.
  Args:
-    box: `Tensor` whose shape is [..., 4] and represents the coordinates
+    box: any `Tensor` whose last dimension is 4 representing the coordinates of
-      of boxes in ymin, xmin, ymax, xmax.
+      boxes in ymin, xmin, ymax, xmax.
  Returns:
-    `Tensor` whose shape is [..., 4] and contains the new format.
+    box: a `Tensor` whose shape is the same as `box` in new format.
-  Raises:
-    ValueError: If the last dimension of box is not 4 or if box's dtype isn't
-      a floating point type.
  """
  with tf.name_scope('yxyx_to_xcycwh'):
    ymin, xmin, ymax, xmax = tf.split(box, 4, axis=-1)
@@ -45,22 +38,15 @@ def yxyx_to_xcycwh(box: tf.Tensor):
  return box
-def xcycwh_to_yxyx(box: tf.Tensor, split_min_max: bool = False):
+def xcycwh_to_yxyx(box: tf.Tensor):
-  """Converts boxes from x_center, y_center, width, height.
+  """Converts boxes from x_center, y_center, width, height to yxyx format.
-  to ymin, xmin, ymax, xmax.
  Args:
-    box: a `Tensor` whose shape is [..., 4] and represents the coordinates
+    box: any `Tensor` whose last dimension is 4 representing the coordinates of
-      of boxes in x_center, y_center, width, height.
+      boxes in x_center, y_center, width, height.
-    split_min_max: bool, whether or not to split x, y min and max values.
  Returns:
-    box: a `Tensor` whose shape is [..., 4] and contains the new format.
+    box: a `Tensor` whose shape is the same as `box` in new format.
-  Raises:
-    ValueError: If the last dimension of box is not 4 or if box's dtype isn't
-      a floating point type.
  """
  with tf.name_scope('xcycwh_to_yxyx'):
    xy, wh = tf.split(box, 2, axis=-1)
@@ -69,229 +55,268 @@ def xcycwh_to_yxyx(box: tf.Tensor, split_min_max: bool = False):
    x_min, y_min = tf.split(xy_min, 2, axis=-1)
    x_max, y_max = tf.split(xy_max, 2, axis=-1)
    box = tf.concat([y_min, x_min, y_max, x_max], axis=-1)
-    if split_min_max:
-      box = tf.split(box, 2, axis=-1)
  return box
-def xcycwh_to_xyxy(box: tf.Tensor, split_min_max: bool = False):
+def intersect_and_union(box1, box2, yxyx=False):
-  """Converts boxes from x_center, y_center, width, height to.
+  """Calculates the intersection and union between box1 and box2.
-  xmin, ymin, xmax, ymax.
  Args:
-    box: box: a `Tensor` whose shape is [..., 4] and represents the
+    box1: any `Tensor` whose last dimension is 4 representing the coordinates of
-      coordinates of boxes in x_center, y_center, width, height.
+      boxes.
-    split_min_max: bool, whether or not to split x, y min and max values.
+    box2: any `Tensor` whose last dimension is 4 representing the coordinates of
+      boxes.
+    yxyx: a `bool` indicating whether the input box is of the format x_center
+      y_center, width, height or y_min, x_min, y_max, x_max.
  Returns:
-    box: a `Tensor` whose shape is [..., 4] and contains the new format.
+    intersection: a `Tensor` who represents the intersection.
+    union: a `Tensor` who represents the union.
-  Raises:
-    ValueError: If the last dimension of box is not 4 or if box's dtype isn't
-      a floating point type.
  """
-  with tf.name_scope('xcycwh_to_yxyx'):
+  if not yxyx:
-    xy, wh = tf.split(box, 2, axis=-1)
+    box1_area = tf.reduce_prod(tf.split(box1, 2, axis=-1)[-1], axis=-1)
-    xy_min = xy - wh / 2
+    box2_area = tf.reduce_prod(tf.split(box2, 2, axis=-1)[-1], axis=-1)
-    xy_max = xy + wh / 2
+    box1 = xcycwh_to_yxyx(box1)
-    box = (xy_min, xy_max)
+    box2 = xcycwh_to_yxyx(box2)
-    if not split_min_max:
-      box = tf.concat(box, axis=-1)
-  return box
+  b1mi, b1ma = tf.split(box1, 2, axis=-1)
+  b2mi, b2ma = tf.split(box2, 2, axis=-1)
+  intersect_mins = tf.math.maximum(b1mi, b2mi)
+  intersect_maxes = tf.math.minimum(b1ma, b2ma)
+  intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins, 0.0)
+  intersection = tf.reduce_prod(intersect_wh, axis=-1)
-def center_distance(center_1: tf.Tensor, center_2: tf.Tensor):
+  if yxyx:
-  """Calculates the squared distance between two points.
+    box1_area = tf.reduce_prod(b1ma - b1mi, axis=-1)
+    box2_area = tf.reduce_prod(b2ma - b2mi, axis=-1)
+  union = box1_area + box2_area - intersection
+  return intersection, union
-  This function is mathematically equivalent to the following code, but has
-  smaller rounding errors.
-  tf.norm(center_1 - center_2, axis=-1)**2
+def smallest_encompassing_box(box1, box2, yxyx=False, clip=False):
+  """Calculates the smallest box that encompasses box1 and box2.
  Args:
-    center_1: a `Tensor` whose shape is [..., 2] and represents a point.
+    box1: any `Tensor` whose last dimension is 4 representing the coordinates of
-    center_2: a `Tensor` whose shape is [..., 2] and represents a point.
+      boxes.
+    box2: any `Tensor` whose last dimension is 4 representing the coordinates of
+      boxes.
+    yxyx: a `bool` indicating whether the input box is of the format x_center
+      y_center, width, height or y_min, x_min, y_max, x_max.
+    clip: a `bool`, whether or not to clip boxes.
  Returns:
-    dist: a `Tensor` whose shape is [...] and value represents the squared
+    box_c: a `Tensor` whose last dimension is 4 representing the coordinates of
-      distance between center_1 and center_2.
+      boxes, the return format is y_min, x_min, y_max, x_max if yxyx is set to
+      to True. In other words it will match the input format.
-  Raises:
-    ValueError: If the last dimension of either center_1 or center_2 is not 2.
  """
-  with tf.name_scope('center_distance'):
+  if not yxyx:
-    dist = (center_1[..., 0] - center_2[..., 0])**2 + (center_1[..., 1] -
+    box1 = xcycwh_to_yxyx(box1)
-                                                       center_2[..., 1])**2
+    box2 = xcycwh_to_yxyx(box2)
-  return dist
+  b1mi, b1ma = tf.split(box1, 2, axis=-1)
+  b2mi, b2ma = tf.split(box2, 2, axis=-1)
+  bcmi = tf.math.minimum(b1mi, b2mi)
+  bcma = tf.math.maximum(b1ma, b2ma)
+  box_c = tf.concat([bcmi, bcma], axis=-1)
+  if not yxyx:
+    box_c = yxyx_to_xcycwh(box_c)
+  if clip:
+    bca = tf.reduce_prod(bcma - bcmi, keepdims=True, axis=-1)
+    box_c = tf.where(bca <= 0.0, tf.zeros_like(box_c), box_c)
+  return bcmi, bcma, box_c
 def compute_iou(box1, box2, yxyx=False):
-  """Calculates the intersection of union between box1 and box2.
+  """Calculates the intersection over union between box1 and box2.
  Args:
-    box1: a `Tensor` whose shape is [..., 4] and represents the coordinates of
+    box1: any `Tensor` whose last dimension is 4 representing the coordinates of
-      boxes in x_center, y_center, width, height.
+      boxes.
-    box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of
+    box2: any `Tensor` whose last dimension is 4 representing the coordinates of
-      boxes in x_center, y_center, width, height.
+      boxes.
-    yxyx: `bool`, whether or not box1, and box2 are in yxyx format.
+    yxyx: a `bool` indicating whether the input box is of the format x_center
+      y_center, width, height or y_min, x_min, y_max, x_max.
  Returns:
-    iou: a `Tensor` whose shape is [...] and value represents the intersection
+    iou: a `Tensor` who represents the intersection over union.
-      over union.
-  Raises:
-    ValueError: If the last dimension of either box1 or box2 is not 4.
  """
-  # Get box corners
  with tf.name_scope('iou'):
-    if not yxyx:
+    intersection, union = intersect_and_union(box1, box2, yxyx=yxyx)
-      box1 = xcycwh_to_yxyx(box1)
+    iou = math_ops.divide_no_nan(intersection, union)
-      box2 = xcycwh_to_yxyx(box2)
-    b1mi, b1ma = tf.split(box1, 2, axis=-1)
-    b2mi, b2ma = tf.split(box2, 2, axis=-1)
-    intersect_mins = tf.math.maximum(b1mi, b2mi)
-    intersect_maxes = tf.math.minimum(b1ma, b2ma)
-    intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins,
-                                   tf.zeros_like(intersect_mins))
-    intersection = tf.reduce_prod(
-        intersect_wh, axis=-1)  # intersect_wh[..., 0] * intersect_wh[..., 1]
-    box1_area = tf.math.abs(tf.reduce_prod(b1ma - b1mi, axis=-1))
-    box2_area = tf.math.abs(tf.reduce_prod(b2ma - b2mi, axis=-1))
-    union = box1_area + box2_area - intersection
-    iou = intersection / (union + 1e-7)
-    iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0)
  return iou
-def compute_giou(box1, box2):
+def compute_giou(box1, box2, yxyx=False):
-  """Calculates the generalized intersection of union between box1 and box2.
+  """Calculates the General intersection over union between box1 and box2.
  Args:
-    box1: a `Tensor` whose shape is [..., 4] and represents the coordinates of
+    box1: any `Tensor` whose last dimension is 4 representing the coordinates of
-      boxes in x_center, y_center, width, height.
+      boxes.
-    box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of
+    box2: any `Tensor` whose last dimension is 4 representing the coordinates of
-      boxes in x_center, y_center, width, height.
+      boxes.
+    yxyx: a `bool` indicating whether the input box is of the format x_center
+      y_center, width, height or y_min, x_min, y_max, x_max.
  Returns:
-    iou: a `Tensor` whose shape is [...] and value represents the generalized
+    giou: a `Tensor` who represents the General intersection over union.
-      intersection over union.
-  Raises:
-    ValueError: If the last dimension of either box1 or box2 is not 4.
  """
  with tf.name_scope('giou'):
-    # get box corners
+    if not yxyx:
-    box1 = xcycwh_to_yxyx(box1)
+      yxyx1 = xcycwh_to_yxyx(box1)
-    box2 = xcycwh_to_yxyx(box2)
+      yxyx2 = xcycwh_to_yxyx(box2)
+    else:
+      yxyx1, yxyx2 = box1, box2
+    cmi, cma, _ = smallest_encompassing_box(yxyx1, yxyx2, yxyx=True)
+    intersection, union = intersect_and_union(yxyx1, yxyx2, yxyx=True)
+    iou = math_ops.divide_no_nan(intersection, union)
+    bcwh = cma - cmi
+    c = tf.math.reduce_prod(bcwh, axis=-1)
-    # compute IOU
+    regularization = math_ops.divide_no_nan((c - union), c)
-    intersect_mins = tf.math.maximum(box1[..., 0:2], box2[..., 0:2])
+    giou = iou - regularization
-    intersect_maxes = tf.math.minimum(box1[..., 2:4], box2[..., 2:4])
-    intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins,
-                                   tf.zeros_like(intersect_mins))
-    intersection = intersect_wh[..., 0] * intersect_wh[..., 1]
-    box1_area = tf.math.abs(
-        tf.reduce_prod(box1[..., 2:4] - box1[..., 0:2], axis=-1))
-    box2_area = tf.math.abs(
-        tf.reduce_prod(box2[..., 2:4] - box2[..., 0:2], axis=-1))
-    union = box1_area + box2_area - intersection
-    iou = tf.math.divide_no_nan(intersection, union)
-    iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0)
-    # find the smallest box to encompase both box1 and box2
-    c_mins = tf.math.minimum(box1[..., 0:2], box2[..., 0:2])
-    c_maxes = tf.math.maximum(box1[..., 2:4], box2[..., 2:4])
-    c = tf.math.abs(tf.reduce_prod(c_mins - c_maxes, axis=-1))
-    # compute giou
-    giou = iou - tf.math.divide_no_nan((c - union), c)
  return iou, giou
-def compute_diou(box1, box2):
+def compute_diou(box1, box2, beta=1.0, yxyx=False):
-  """Calculates the distance intersection of union between box1 and box2.
+  """Calculates the distance intersection over union between box1 and box2.
  Args:
-    box1: a `Tensor` whose shape is [..., 4] and represents the coordinates of
+    box1: any `Tensor` whose last dimension is 4 representing the coordinates of
-      boxes in x_center, y_center, width, height.
+      boxes.
-    box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of
+    box2: any `Tensor` whose last dimension is 4 representing the coordinates of
-      boxes in x_center, y_center, width, height.
+      boxes.
+    beta: a `float` indicating the amount to scale the distance iou
+      regularization term.
+    yxyx: a `bool` indicating whether the input box is of the format x_center
+      y_center, width, height or y_min, x_min, y_max, x_max.
  Returns:
-    iou: a `Tensor` whose shape is [...] and value represents the distance
+    diou: a `Tensor` who represents the distance intersection over union.
-      intersection over union.
-  Raises:
-    ValueError: If the last dimension of either box1 or box2 is not 4.
  """
  with tf.name_scope('diou'):
    # compute center distance
-    dist = center_distance(box1[..., 0:2], box2[..., 0:2])
+    if not yxyx:
+      xycc1, xycc2 = box1, box2
-    # get box corners
+      yxyx1 = xcycwh_to_yxyx(box1)
-    box1 = xcycwh_to_yxyx(box1)
+      yxyx2 = xcycwh_to_yxyx(box2)
-    box2 = xcycwh_to_yxyx(box2)
+    else:
+      yxyx1, yxyx2 = box1, box2
-    # compute IOU
+      xycc1 = yxyx_to_xcycwh(box1)
-    intersect_mins = tf.math.maximum(box1[..., 0:2], box2[..., 0:2])
+      xycc2 = yxyx_to_xcycwh(box2)
-    intersect_maxes = tf.math.minimum(box1[..., 2:4], box2[..., 2:4])
-    intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins,
+    cmi, cma, _ = smallest_encompassing_box(yxyx1, yxyx2, yxyx=True)
-                                   tf.zeros_like(intersect_mins))
+    intersection, union = intersect_and_union(yxyx1, yxyx2, yxyx=True)
-    intersection = intersect_wh[..., 0] * intersect_wh[..., 1]
+    iou = math_ops.divide_no_nan(intersection, union)
+    b1xy, _ = tf.split(xycc1, 2, axis=-1)
+    b2xy, _ = tf.split(xycc2, 2, axis=-1)
+    bcwh = cma - cmi
+    center_dist = tf.reduce_sum((b1xy - b2xy)**2, axis=-1)
+    c_diag = tf.reduce_sum(bcwh**2, axis=-1)
+    regularization = math_ops.divide_no_nan(center_dist, c_diag)
+    diou = iou - regularization**beta
+  return iou, diou
-    box1_area = tf.math.abs(
-        tf.reduce_prod(box1[..., 2:4] - box1[..., 0:2], axis=-1))
-    box2_area = tf.math.abs(
-        tf.reduce_prod(box2[..., 2:4] - box2[..., 0:2], axis=-1))
-    union = box1_area + box2_area - intersection
-    iou = tf.math.divide_no_nan(intersection, union)
+def compute_ciou(box1, box2, yxyx=False, darknet=False):
-    iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0)
+  """Calculates the complete intersection over union between box1 and box2.
-    # compute max diagnal of the smallest enclosing box
+  Args:
-    c_mins = tf.math.minimum(box1[..., 0:2], box2[..., 0:2])
+    box1: any `Tensor` whose last dimension is 4 representing the coordinates of
-    c_maxes = tf.math.maximum(box1[..., 2:4], box2[..., 2:4])
+      boxes.
+    box2: any `Tensor` whose last dimension is 4 representing the coordinates of
+      boxes.
+    yxyx: a `bool` indicating whether the input box is of the format x_center
+      y_center, width, height or y_min, x_min, y_max, x_max.
+    darknet: a `bool` indicating whether the calling function is the YOLO
+      darknet loss.
-    diag_dist = tf.reduce_sum((c_maxes - c_mins)**2, axis=-1)
+  Returns:
+    ciou: a `Tensor` who represents the complete intersection over union.
+  """
+  with tf.name_scope('ciou'):
+    if not yxyx:
+      xycc1, xycc2 = box1, box2
+      yxyx1 = xcycwh_to_yxyx(box1)
+      yxyx2 = xcycwh_to_yxyx(box2)
+    else:
+      yxyx1, yxyx2 = box1, box2
+      xycc1 = yxyx_to_xcycwh(box1)
+      xycc2 = yxyx_to_xcycwh(box2)
+    # Build the smallest encomapssing box.
+    cmi, cma, _ = smallest_encompassing_box(yxyx1, yxyx2, yxyx=True)
+    intersection, union = intersect_and_union(yxyx1, yxyx2, yxyx=True)
+    iou = math_ops.divide_no_nan(intersection, union)
+    b1xy, b1w, b1h = tf.split(xycc1, [2, 1, 1], axis=-1)
+    b2xy, b2w, b2h = tf.split(xycc2, [2, 1, 1], axis=-1)
+    bchw = cma - cmi
+    # Center regularization
+    center_dist = tf.reduce_sum((b1xy - b2xy)**2, axis=-1)
+    c_diag = tf.reduce_sum(bchw**2, axis=-1)
+    regularization = math_ops.divide_no_nan(center_dist, c_diag)
+    # Computer aspect ratio consistency
+    terma = math_ops.divide_no_nan(b1w, b1h)  # gt
+    termb = math_ops.divide_no_nan(b2w, b2h)  # pred
+    arcterm = tf.squeeze(
+        tf.math.pow(tf.math.atan(termb) - tf.math.atan(terma), 2), axis=-1)
+    v = (4 / math.pi**2) * arcterm
+    # Compute the aspect ratio weight, should be treated as a constant
+    a = tf.stop_gradient(math_ops.divide_no_nan(v, 1 - iou + v))
+    if darknet:
+      grad_scale = tf.stop_gradient(tf.square(b2w) + tf.square(b2h))
+      v *= tf.squeeze(grad_scale, axis=-1)
+    ciou = iou - regularization - (v * a)
+  return iou, ciou
-    regularization = tf.math.divide_no_nan(dist, diag_dist)
-    diou = iou + regularization
-  return iou, diou
+def aggregated_comparitive_iou(boxes1, boxes2=None, iou_type=0, beta=0.6):
+  """Calculates the IOU between two set of boxes.
-def compute_ciou(box1, box2):
+  Similar to bbox_overlap but far more versitile.
-  """Calculates the complete intersection of union between box1 and box2.
  Args:
-    box1: a `Tensor` whose shape is [..., 4] and represents the coordinates
+    boxes1: a `Tensor` of shape [batch size, N, 4] representing the coordinates
-      of boxes in x_center, y_center, width, height.
+      of boxes.
-    box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of
+    boxes2: a `Tensor` of shape [batch size, N, 4] representing the coordinates
-      boxes in x_center, y_center, width, height.
+      of boxes.
+    iou_type: `integer` representing the iou version to use, 0 is distance iou,
+      1 is the general iou, 2 is the complete iou, any other number uses the
+      standard iou.
+    beta: `float` for the scaling quantity to apply to distance iou
+      regularization.
  Returns:
-    iou: a `Tensor` whose shape is [...] and value represents the complete
+    iou: a `Tensor` who represents the intersection over union in of the
-      intersection over union.
+      expected/input type.
-  Raises:
-    ValueError: If the last dimension of either box1 or box2 is not 4.
  """
-  with tf.name_scope('ciou'):
+  boxes1 = tf.expand_dims(boxes1, axis=-2)
-    # compute DIOU and IOU
-    iou, diou = compute_diou(box1, box2)
+  if boxes2 is not None:
+    boxes2 = tf.expand_dims(boxes2, axis=-3)
-    # computer aspect ratio consistency
+  else:
-    arcterm = (
+    boxes2 = tf.transpose(boxes1, perm=(0, 2, 1, 3))
-        tf.math.atan(tf.math.divide_no_nan(box1[..., 2], box1[..., 3])) -
-        tf.math.atan(tf.math.divide_no_nan(box2[..., 2], box2[..., 3])))**2
+  if iou_type == 0 or iou_type == 'diou':  # diou
-    v = 4 * arcterm / (math.pi)**2
+    _, iou = compute_diou(boxes1, boxes2, beta=beta, yxyx=True)
+  elif iou_type == 1 or iou_type == 'giou':  # giou
-    # compute IOU regularization
+    _, iou = compute_giou(boxes1, boxes2, yxyx=True)
-    a = tf.math.divide_no_nan(v, ((1 - iou) + v))
+  elif iou_type == 2 or iou_type == 'ciou':  # ciou
-    ciou = diou + v * a
+    _, iou = compute_ciou(boxes1, boxes2, yxyx=True)
-  return iou, ciou
+  else:
+    iou = compute_iou(boxes1, boxes2, yxyx=True)
+  return iou
--- a/official/vision/beta/projects/yolo/ops/box_ops_test.py
+++ b/official/vision/beta/projects/yolo/ops/box_ops_test.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""box_ops tests."""
 from absl.testing import parameterized
 import numpy as np
 import tensorflow as tf
@@ -27,10 +28,8 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
    expected_shape = np.array([num_boxes, 4])
    xywh_box = box_ops.yxyx_to_xcycwh(boxes)
    yxyx_box = box_ops.xcycwh_to_yxyx(boxes)
-    xyxy_box = box_ops.xcycwh_to_xyxy(boxes)
    self.assertAllEqual(tf.shape(xywh_box).numpy(), expected_shape)
    self.assertAllEqual(tf.shape(yxyx_box).numpy(), expected_shape)
-    self.assertAllEqual(tf.shape(xyxy_box).numpy(), expected_shape)
  @parameterized.parameters((1), (5), (7))
  def test_ious(self, num_boxes):
@@ -51,6 +50,5 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
    self.assertArrayNear(ciou, expected_iou, 0.001)
    self.assertArrayNear(diou, expected_iou, 0.001)
 if __name__ == '__main__':
  tf.test.main()
--- a/official/vision/beta/projects/yolo/ops/loss_utils.py
+++ b/official/vision/beta/projects/yolo/ops/loss_utils.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Yolo loss utility functions."""
+import numpy as np
+import tensorflow as tf
+from official.vision.beta.projects.yolo.ops import box_ops
+from official.vision.beta.projects.yolo.ops import math_ops
+@tf.custom_gradient
+def sigmoid_bce(y, x_prime, label_smoothing):
+  """Applies the Sigmoid Cross Entropy Loss.
+  Implements the same derivative as that found in the Darknet C library.
+  The derivative of this method is not the same as the standard binary cross
+  entropy with logits function.
+  The BCE with logits function equation is as follows:
+    x = 1 / (1 + exp(-x_prime))
+    bce = -ylog(x) - (1 - y)log(1 - x)
+  The standard BCE with logits function derivative is as follows:
+    dloss = -y/x + (1-y)/(1-x)
+    dsigmoid = x * (1 - x)
+    dx = dloss * dsigmoid
+  This derivative can be reduced simply to:
+    dx = (-y + x)
+  This simplification is used by the darknet library in order to improve
+  training stability. The gradient is almost the same
+  as tf.keras.losses.binary_crossentropy but varies slightly and
+  yields different performance.
+  Args:
+    y: `Tensor` holding ground truth data.
+    x_prime: `Tensor` holding the predictions prior to application of the
+      sigmoid operation.
+    label_smoothing: float value between 0.0 and 1.0 indicating the amount of
+      smoothing to apply to the data.
+  Returns:
+    bce: Tensor of the be applied loss values.
+    delta: callable function indicating the custom gradient for this operation.
+  """
+  eps = 1e-9
+  x = tf.math.sigmoid(x_prime)
+  y = tf.stop_gradient(y * (1 - label_smoothing) + 0.5 * label_smoothing)
+  bce = -y * tf.math.log(x + eps) - (1 - y) * tf.math.log(1 - x + eps)
+  def delta(dpass):
+    x = tf.math.sigmoid(x_prime)
+    dx = (-y + x) * dpass
+    dy = tf.zeros_like(y)
+    return dy, dx, 0.0
+  return bce, delta
+def apply_mask(mask, x, value=0):
+  """This function is used for gradient masking.
+  The YOLO loss function makes extensive use of dynamically shaped tensors.
+  To allow this use case on the TPU while preserving the gradient correctly
+  for back propagation we use this masking function to use a tf.where operation
+  to hard set masked location to have a gradient and a value of zero.
+  Args:
+    mask: A `Tensor` with the same shape as x used to select values of
+      importance.
+    x: A `Tensor` with the same shape as mask that will be getting masked.
+    value: `float` constant additive value.
+  Returns:
+    x: A masked `Tensor` with the same shape as x.
+  """
+  mask = tf.cast(mask, tf.bool)
+  masked = tf.where(mask, x, tf.zeros_like(x) + value)
+  return masked
+def build_grid(indexes, truths, preds, ind_mask, update=False, grid=None):
+  """This function is used to broadcast elements into the output shape.
+  This function is used to broadcasts a list of truths into the correct index
+  in the output shape. This is used for the ground truth map construction in
+  the scaled loss and the classification map in the darknet loss.
+  Args:
+    indexes: A `Tensor` for the indexes
+    truths: A `Tensor` for the ground truth.
+    preds: A `Tensor` for the predictions.
+    ind_mask: A `Tensor` for the index masks.
+    update: A `bool` for updating the grid.
+    grid: A `Tensor` for the grid.
+  Returns:
+    grid: A `Tensor` representing the augmented grid.
+  """
+  # this function is used to broadcast all the indexes to the correct
+  # into the correct ground truth mask, used for iou detection map
+  # in the scaled loss and the classification mask in the darknet loss
+  num_flatten = tf.shape(preds)[-1]
+  # is there a way to verify that we are not on the CPU?
+  ind_mask = tf.cast(ind_mask, indexes.dtype)
+  # find all the batch indexes using the cumulated sum of a ones tensor
+  # cumsum(ones) - 1 yeild the zero indexed batches
+  bhep = tf.reduce_max(tf.ones_like(indexes), axis=-1, keepdims=True)
+  bhep = tf.math.cumsum(bhep, axis=0) - 1
+  # concatnate the batch sizes to the indexes
+  indexes = tf.concat([bhep, indexes], axis=-1)
+  indexes = apply_mask(tf.cast(ind_mask, indexes.dtype), indexes)
+  indexes = (indexes + (ind_mask - 1))
+  # reshape the indexes into the correct shape for the loss,
+  # just flatten all indexes but the last
+  indexes = tf.reshape(indexes, [-1, 4])
+  # also flatten the ground truth value on all axis but the last
+  truths = tf.reshape(truths, [-1, num_flatten])
+  # build a zero grid in the samve shape as the predicitons
+  if grid is None:
+    grid = tf.zeros_like(preds)
+  # remove invalid values from the truths that may have
+  # come up from computation, invalid = nan and inf
+  truths = math_ops.rm_nan_inf(truths)
+  # scatter update the zero grid
+  if update:
+    grid = tf.tensor_scatter_nd_update(grid, indexes, truths)
+  else:
+    grid = tf.tensor_scatter_nd_max(grid, indexes, truths)
+  # stop gradient and return to avoid TPU errors and save compute
+  # resources
+  return grid
+class GridGenerator:
+  """Grid generator that generates anchor grids for box decoding."""
+  def __init__(self, anchors, masks=None, scale_anchors=None):
+    """Initialize Grid Generator.
+    Args:
+      anchors: A `List[List[int]]` for the anchor boxes that are used in the
+        model at all levels.
+      masks: A `List[int]` for the output level that this specific model output
+        Level.
+      scale_anchors: An `int` for how much to scale this level to get the
+        original input shape.
+    """
+    self.dtype = tf.keras.backend.floatx()
+    if masks is not None:
+      self._num = len(masks)
+    else:
+      self._num = tf.shape(anchors)[0]
+    if masks is not None:
+      anchors = [anchors[mask] for mask in masks]
+    self._scale_anchors = scale_anchors
+    self._anchors = tf.convert_to_tensor(anchors)
+    return
+  def _build_grid_points(self, lwidth, lheight, anchors, dtype):
+    """Generate a grid of fixed grid edges for box center decoding."""
+    with tf.name_scope('center_grid'):
+      y = tf.range(0, lheight)
+      x = tf.range(0, lwidth)
+      num = tf.shape(anchors)[0]
+      x_left = tf.tile(
+          tf.transpose(tf.expand_dims(y, axis=-1), perm=[1, 0]), [lwidth, 1])
+      y_left = tf.tile(tf.expand_dims(x, axis=-1), [1, lheight])
+      x_y = tf.stack([x_left, y_left], axis=-1)
+      x_y = tf.cast(x_y, dtype=dtype)
+      x_y = tf.expand_dims(
+          tf.tile(tf.expand_dims(x_y, axis=-2), [1, 1, num, 1]), axis=0)
+    return x_y
+  def _build_anchor_grid(self, anchors, dtype):
+    """Get the transformed anchor boxes for each dimention."""
+    with tf.name_scope('anchor_grid'):
+      num = tf.shape(anchors)[0]
+      anchors = tf.cast(anchors, dtype=dtype)
+      anchors = tf.reshape(anchors, [1, 1, 1, num, 2])
+    return anchors
+  def _extend_batch(self, grid, batch_size):
+    return tf.tile(grid, [batch_size, 1, 1, 1, 1])
+  def __call__(self, width, height, batch_size, dtype=None):
+    if dtype is None:
+      self.dtype = tf.keras.backend.floatx()
+    else:
+      self.dtype = dtype
+    grid_points = self._build_grid_points(width, height, self._anchors,
+                                          self.dtype)
+    anchor_grid = self._build_anchor_grid(
+        tf.cast(self._anchors, self.dtype) /
+        tf.cast(self._scale_anchors, self.dtype), self.dtype)
+    grid_points = self._extend_batch(grid_points, batch_size)
+    anchor_grid = self._extend_batch(anchor_grid, batch_size)
+    return grid_points, anchor_grid
+TILE_SIZE = 50
+class PairWiseSearch:
+  """Apply a pairwise search between the ground truth and the labels.
+  The goal is to indicate the locations where the predictions overlap with
+  ground truth for dynamic ground truth associations.
+  """
+  def __init__(self,
+               iou_type='iou',
+               any_match=True,
+               min_conf=0.0,
+               track_boxes=False,
+               track_classes=False):
+    """Initialization of Pair Wise Search.
+    Args:
+      iou_type: An `str` for the iou type to use.
+      any_match: A `bool` for any match(no class match).
+      min_conf: An `int` for minimum confidence threshold.
+      track_boxes: A `bool` dynamic box assignment.
+      track_classes: A `bool` dynamic class assignment.
+    """
+    self.iou_type = iou_type
+    self._any = any_match
+    self._min_conf = min_conf
+    self._track_boxes = track_boxes
+    self._track_classes = track_classes
+    return
+  def box_iou(self, true_box, pred_box):
+    # based on the type of loss, compute the iou loss for a box
+    # compute_<name> indicated the type of iou to use
+    if self.iou_type == 'giou':
+      _, iou = box_ops.compute_giou(true_box, pred_box)
+    elif self.iou_type == 'ciou':
+      _, iou = box_ops.compute_ciou(true_box, pred_box)
+    else:
+      iou = box_ops.compute_iou(true_box, pred_box)
+    return iou
+  def _search_body(self, pred_box, pred_class, boxes, classes, running_boxes,
+                   running_classes, max_iou, idx):
+    """Main search fn."""
+    # capture the batch size to be used, and gather a slice of
+    # boxes from the ground truth. currently TILE_SIZE = 50, to
+    # save memory
+    batch_size = tf.shape(boxes)[0]
+    box_slice = tf.slice(boxes, [0, idx * TILE_SIZE, 0],
+                         [batch_size, TILE_SIZE, 4])
+    # match the dimentions of the slice to the model predictions
+    # shape: [batch_size, 1, 1, num, TILE_SIZE, 4]
+    box_slice = tf.expand_dims(box_slice, axis=1)
+    box_slice = tf.expand_dims(box_slice, axis=1)
+    box_slice = tf.expand_dims(box_slice, axis=1)
+    box_grid = tf.expand_dims(pred_box, axis=-2)
+    # capture the classes
+    class_slice = tf.slice(classes, [0, idx * TILE_SIZE],
+                           [batch_size, TILE_SIZE])
+    class_slice = tf.expand_dims(class_slice, axis=1)
+    class_slice = tf.expand_dims(class_slice, axis=1)
+    class_slice = tf.expand_dims(class_slice, axis=1)
+    iou = self.box_iou(box_slice, box_grid)
+    if self._min_conf > 0.0:
+      if not self._any:
+        class_grid = tf.expand_dims(pred_class, axis=-2)
+        class_mask = tf.one_hot(
+            tf.cast(class_slice, tf.int32),
+            depth=tf.shape(pred_class)[-1],
+            dtype=pred_class.dtype)
+        class_mask = tf.reduce_any(tf.equal(class_mask, class_grid), axis=-1)
+      else:
+        class_mask = tf.reduce_max(pred_class, axis=-1, keepdims=True)
+      class_mask = tf.cast(class_mask, iou.dtype)
+      iou *= class_mask
+    max_iou_ = tf.concat([max_iou, iou], axis=-1)
+    max_iou = tf.reduce_max(max_iou_, axis=-1, keepdims=True)
+    ind = tf.expand_dims(tf.argmax(max_iou_, axis=-1), axis=-1)
+    if self._track_boxes:
+      running_boxes = tf.expand_dims(running_boxes, axis=-2)
+      box_slice = tf.zeros_like(running_boxes) + box_slice
+      box_slice = tf.concat([running_boxes, box_slice], axis=-2)
+      running_boxes = tf.gather_nd(box_slice, ind, batch_dims=4)
+    if self._track_classes:
+      running_classes = tf.expand_dims(running_classes, axis=-1)
+      class_slice = tf.zeros_like(running_classes) + class_slice
+      class_slice = tf.concat([running_classes, class_slice], axis=-1)
+      running_classes = tf.gather_nd(class_slice, ind, batch_dims=4)
+    return (pred_box, pred_class, boxes, classes, running_boxes,
+            running_classes, max_iou, idx + 1)
+  def __call__(self,
+               pred_boxes,
+               pred_classes,
+               boxes,
+               classes,
+               scale=None,
+               yxyx=True,
+               clip_thresh=0.0):
+    num_boxes = tf.shape(boxes)[-2]
+    num_tiles = (num_boxes // TILE_SIZE) - 1
+    if yxyx:
+      boxes = box_ops.yxyx_to_xcycwh(boxes)
+    if scale is not None:
+      boxes = boxes * tf.stop_gradient(scale)
+    if self._min_conf > 0.0:
+      pred_classes = tf.cast(pred_classes > self._min_conf, pred_classes.dtype)
+    def _loop_cond(unused_pred_box, unused_pred_class, boxes, unused_classes,
+                   unused_running_boxes, unused_running_classes, unused_max_iou,
+                   idx):
+      # check that the slice has boxes that all zeros
+      batch_size = tf.shape(boxes)[0]
+      box_slice = tf.slice(boxes, [0, idx * TILE_SIZE, 0],
+                           [batch_size, TILE_SIZE, 4])
+      return tf.logical_and(idx < num_tiles,
+                            tf.math.greater(tf.reduce_sum(box_slice), 0))
+    running_boxes = tf.zeros_like(pred_boxes)
+    running_classes = tf.zeros_like(tf.reduce_sum(running_boxes, axis=-1))
+    max_iou = tf.zeros_like(tf.reduce_sum(running_boxes, axis=-1))
+    max_iou = tf.expand_dims(max_iou, axis=-1)
+    (pred_boxes, pred_classes, boxes, classes, running_boxes, running_classes,
+     max_iou, _) = tf.while_loop(_loop_cond, self._search_body, [
+         pred_boxes, pred_classes, boxes, classes, running_boxes,
+         running_classes, max_iou,
+         tf.constant(0)
+     ])
+    mask = tf.cast(max_iou > clip_thresh, running_boxes.dtype)
+    running_boxes *= mask
+    running_classes *= tf.squeeze(mask, axis=-1)
+    max_iou *= mask
+    max_iou = tf.squeeze(max_iou, axis=-1)
+    mask = tf.squeeze(mask, axis=-1)
+    return (tf.stop_gradient(running_boxes), tf.stop_gradient(running_classes),
+            tf.stop_gradient(max_iou), tf.stop_gradient(mask))
+def average_iou(iou):
+  """Computes the average intersection over union without counting locations.
+  where the iou is zero.
+  Args:
+    iou: A `Tensor` representing the iou values.
+  Returns:
+    tf.stop_gradient(avg_iou): A `Tensor` representing average
+     intersection over union.
+  """
+  iou_sum = tf.reduce_sum(iou, axis=tf.range(1, tf.shape(tf.shape(iou))[0]))
+  counts = tf.cast(
+      tf.math.count_nonzero(iou, axis=tf.range(1,
+                                               tf.shape(tf.shape(iou))[0])),
+      iou.dtype)
+  avg_iou = tf.reduce_mean(math_ops.divide_no_nan(iou_sum, counts))
+  return tf.stop_gradient(avg_iou)
+def _scale_boxes(encoded_boxes, width, height, anchor_grid, grid_points,
+                 scale_xy):
+  """Decodes models boxes applying and exponential to width and height maps."""
+  # split the boxes
+  pred_xy = encoded_boxes[..., 0:2]
+  pred_wh = encoded_boxes[..., 2:4]
+  # build a scaling tensor to get the offset of th ebox relative to the image
+  scaler = tf.convert_to_tensor([height, width, height, width])
+  scale_xy = tf.cast(scale_xy, encoded_boxes.dtype)
+  # apply the sigmoid
+  pred_xy = tf.math.sigmoid(pred_xy)
+  # scale the centers and find the offset of each box relative to
+  # their center pixel
+  pred_xy = pred_xy * scale_xy - 0.5 * (scale_xy - 1)
+  # scale the offsets and add them to the grid points or a tensor that is
+  # the realtive location of each pixel
+  box_xy = grid_points + pred_xy
+  # scale the width and height of the predictions and corlate them
+  # to anchor boxes
+  box_wh = tf.math.exp(pred_wh) * anchor_grid
+  # build the final predicted box
+  scaled_box = tf.concat([box_xy, box_wh], axis=-1)
+  pred_box = scaled_box / scaler
+  # shift scaled boxes
+  scaled_box = tf.concat([pred_xy, box_wh], axis=-1)
+  return (scaler, scaled_box, pred_box)
+@tf.custom_gradient
+def _darknet_boxes(encoded_boxes, width, height, anchor_grid, grid_points,
+                   max_delta, scale_xy):
+  """Wrapper for _scale_boxes to implement a custom gradient."""
+  (scaler, scaled_box, pred_box) = _scale_boxes(encoded_boxes, width, height,
+                                                anchor_grid, grid_points,
+                                                scale_xy)
+  def delta(unused_dy_scaler, dy_scaled, dy):
+    dy_xy, dy_wh = tf.split(dy, 2, axis=-1)
+    dy_xy_, dy_wh_ = tf.split(dy_scaled, 2, axis=-1)
+    # add all the gradients that may have been applied to the
+    # boxes and those that have been applied to the width and height
+    dy_wh += dy_wh_
+    dy_xy += dy_xy_
+    # propagate the exponential applied to the width and height in
+    # order to ensure the gradient propagated is of the correct
+    # magnitude
+    pred_wh = encoded_boxes[..., 2:4]
+    dy_wh *= tf.math.exp(pred_wh)
+    dbox = tf.concat([dy_xy, dy_wh], axis=-1)
+    # apply the gradient clipping to xy and wh
+    dbox = math_ops.rm_nan_inf(dbox)
+    delta = tf.cast(max_delta, dbox.dtype)
+    dbox = tf.clip_by_value(dbox, -delta, delta)
+    return dbox, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
+  return (scaler, scaled_box, pred_box), delta
+def _new_coord_scale_boxes(encoded_boxes, width, height, anchor_grid,
+                           grid_points, scale_xy):
+  """Decodes models boxes by squaring and scaling the width and height maps."""
+  # split the boxes
+  pred_xy = encoded_boxes[..., 0:2]
+  pred_wh = encoded_boxes[..., 2:4]
+  # build a scaling tensor to get the offset of th ebox relative to the image
+  scaler = tf.convert_to_tensor([height, width, height, width])
+  scale_xy = tf.cast(scale_xy, pred_xy.dtype)
+  # apply the sigmoid
+  pred_xy = tf.math.sigmoid(pred_xy)
+  pred_wh = tf.math.sigmoid(pred_wh)
+  # scale the xy offset predictions according to the config
+  pred_xy = pred_xy * scale_xy - 0.5 * (scale_xy - 1)
+  # find the true offset from the grid points and the scaler
+  # where the grid points are the relative offset of each pixel with
+  # in the image
+  box_xy = grid_points + pred_xy
+  # decode the widht and height of the boxes and correlate them
+  # to the anchor boxes
+  box_wh = (2 * pred_wh)**2 * anchor_grid
+  # build the final boxes
+  scaled_box = tf.concat([box_xy, box_wh], axis=-1)
+  pred_box = scaled_box / scaler
+  # shift scaled boxes
+  scaled_box = tf.concat([pred_xy, box_wh], axis=-1)
+  return (scaler, scaled_box, pred_box)
+@tf.custom_gradient
+def _darknet_new_coord_boxes(encoded_boxes, width, height, anchor_grid,
+                             grid_points, max_delta, scale_xy):
+  """Wrapper for _new_coord_scale_boxes to implement a custom gradient."""
+  (scaler, scaled_box,
+   pred_box) = _new_coord_scale_boxes(encoded_boxes, width, height, anchor_grid,
+                                      grid_points, scale_xy)
+  def delta(unused_dy_scaler, dy_scaled, dy):
+    dy_xy, dy_wh = tf.split(dy, 2, axis=-1)
+    dy_xy_, dy_wh_ = tf.split(dy_scaled, 2, axis=-1)
+    # add all the gradients that may have been applied to the
+    # boxes and those that have been applied to the width and height
+    dy_wh += dy_wh_
+    dy_xy += dy_xy_
+    dbox = tf.concat([dy_xy, dy_wh], axis=-1)
+    # apply the gradient clipping to xy and wh
+    dbox = math_ops.rm_nan_inf(dbox)
+    delta = tf.cast(max_delta, dbox.dtype)
+    dbox = tf.clip_by_value(dbox, -delta, delta)
+    return dbox, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
+  return (scaler, scaled_box, pred_box), delta
+def _anchor_free_scale_boxes(encoded_boxes, width, height, stride, grid_points,
+                             scale_xy):
+  """Decode models boxes using FPN stride under anchor free conditions."""
+  # split the boxes
+  pred_xy = encoded_boxes[..., 0:2]
+  pred_wh = encoded_boxes[..., 2:4]
+  # build a scaling tensor to get the offset of th ebox relative to the image
+  scaler = tf.convert_to_tensor([height, width, height, width])
+  scale_xy = tf.cast(scale_xy, encoded_boxes.dtype)
+  # scale the centers and find the offset of each box relative to
+  # their center pixel
+  pred_xy = pred_xy * scale_xy - 0.5 * (scale_xy - 1)
+  # scale the offsets and add them to the grid points or a tensor that is
+  # the realtive location of each pixel
+  box_xy = (grid_points + pred_xy) * stride
+  # scale the width and height of the predictions and corlate them
+  # to anchor boxes
+  box_wh = tf.math.exp(pred_wh) * stride
+  # build the final predicted box
+  scaled_box = tf.concat([box_xy, box_wh], axis=-1)
+  pred_box = scaled_box / scaler
+  return (scaler, scaled_box, pred_box)
+def get_predicted_box(width,
+                      height,
+                      encoded_boxes,
+                      anchor_grid,
+                      grid_points,
+                      scale_xy,
+                      stride,
+                      darknet=False,
+                      box_type='original',
+                      max_delta=np.inf):
+  """Decodes the predicted boxes from the model format to a usable format.
+  This function decodes the model outputs into the [x, y, w, h] format for
+  use in the loss function as well as for use within the detection generator.
+  Args:
+    width: A `float` scalar indicating the width of the prediction layer.
+    height: A `float` scalar indicating the height of the prediction layer
+    encoded_boxes: A `Tensor` of shape [..., height, width, 4] holding encoded
+      boxes.
+    anchor_grid: A `Tensor` of shape [..., 1, 1, 2] holding the anchor boxes
+      organized for box decoding, box width and height.
+    grid_points: A `Tensor` of shape [..., height, width, 2] holding the anchor
+      boxes for decoding the box centers.
+    scale_xy: A `float` scaler used to indicate the range for each center
+      outside of its given [..., i, j, 4] index, where i and j are indexing
+      pixels along the width and height of the predicted output map.
+    stride: An `int` defining the amount of down stride realtive to the input
+      image.
+    darknet: A `bool` used to select between custom gradient and default
+      autograd.
+    box_type: An `str` indicating the type of box encoding that is being used.
+    max_delta: A `float` scaler used for gradient clipping in back propagation.
+  Returns:
+    scaler: A `Tensor` of shape [4] returned to allow the scaling of the ground
+      truth boxes to be of the same magnitude as the decoded predicted boxes.
+    scaled_box: A `Tensor` of shape [..., height, width, 4] with the predicted
+      boxes.
+    pred_box: A `Tensor` of shape [..., height, width, 4] with the predicted
+      boxes divided by the scaler parameter used to put all boxes in the [0, 1]
+      range.
+  """
+  if box_type == 'anchor_free':
+    (scaler, scaled_box,
+     pred_box) = _anchor_free_scale_boxes(encoded_boxes, width, height, stride,
+                                          grid_points, scale_xy)
+  elif darknet:
+    # pylint:disable=unbalanced-tuple-unpacking
+    # if we are using the darknet loss we shoud nto propagate the
+    # decoding of the box
+    if box_type == 'scaled':
+      (scaler, scaled_box,
+       pred_box) = _darknet_new_coord_boxes(encoded_boxes, width, height,
+                                            anchor_grid, grid_points, max_delta,
+                                            scale_xy)
+    else:
+      (scaler, scaled_box,
+       pred_box) = _darknet_boxes(encoded_boxes, width, height, anchor_grid,
+                                  grid_points, max_delta, scale_xy)
+  else:
+    # if we are using the scaled loss we should propagate the decoding of
+    # the boxes
+    if box_type == 'scaled':
+      (scaler, scaled_box,
+       pred_box) = _new_coord_scale_boxes(encoded_boxes, width, height,
+                                          anchor_grid, grid_points, scale_xy)
+    else:
+      (scaler, scaled_box, pred_box) = _scale_boxes(encoded_boxes, width,
+                                                    height, anchor_grid,
+                                                    grid_points, scale_xy)
+  return (scaler, scaled_box, pred_box)
--- a/official/vision/beta/projects/yolo/ops/math_ops.py
+++ b/official/vision/beta/projects/yolo/ops/math_ops.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""A set of private math operations used to safely implement the YOLO loss."""
+import tensorflow as tf
+def rm_nan_inf(x, val=0.0):
+  """Remove nan and infinity.
+  Args:
+    x: any `Tensor` of any type.
+    val: value to replace nan and infinity with.
+  Returns:
+    a `Tensor` with nan and infinity removed.
+  """
+  cond = tf.math.logical_or(tf.math.is_nan(x), tf.math.is_inf(x))
+  val = tf.cast(val, dtype=x.dtype)
+  x = tf.where(cond, val, x)
+  return x
+def rm_nan(x, val=0.0):
+  """Remove nan and infinity.
+  Args:
+    x: any `Tensor` of any type.
+    val: value to replace nan.
+  Returns:
+    a `Tensor` with nan removed.
+  """
+  cond = tf.math.is_nan(x)
+  val = tf.cast(val, dtype=x.dtype)
+  x = tf.where(cond, val, x)
+  return x
+def divide_no_nan(a, b):
+  """Nan safe divide operation built to allow model compilation in tflite.
+  Args:
+    a: any `Tensor` of any type.
+    b: any `Tensor` of any type with the same shape as tensor a.
+  Returns:
+    a `Tensor` representing a divided by b, with all nan values removed.
+  """
+  return a / (b + 1e-9)
--- a/official/vision/beta/projects/yolo/ops/preprocess_ops.py
+++ b/official/vision/beta/projects/yolo/ops/preprocess_ops.py
@@ -194,11 +194,11 @@ def get_best_anchor(y_true, anchors, width=1, height=1):
  """Gets the correct anchor that is assoiciated with each box using IOU.
  Args:
-    y_true: tf.Tensor[] for the list of bounding boxes in the yolo format
+    y_true: `tf.Tensor[]` for the list of bounding boxes in the yolo format.
    anchors: list or tensor for the anchor boxes to be used in prediction
-      found via Kmeans
+      found via Kmeans.
-    width: int for the image width
+    width: int for the image width.
-    height: int for the image height
+    height: int for the image height.
  Returns:
    tf.Tensor: y_true with the anchor associated with each ground truth
@@ -263,7 +263,7 @@ def build_grided_gt(y_true, mask, size, dtype, use_tie_breaker):
  Args:
    y_true: tf.Tensor[] ground truth
-      [box coords[0:4], classes_onehot[0:-1], best_fit_anchor_box]
+      [box coords[0:4], classes_onehot[0:-1], best_fit_anchor_box].
    mask: list of the anchor boxes choresponding to the output,
      ex. [1, 2, 3] tells this layer to predict only the first 3
      anchors in the total.
@@ -273,7 +273,7 @@ def build_grided_gt(y_true, mask, size, dtype, use_tie_breaker):
    use_tie_breaker: boolean value for wether or not to use the tie_breaker.
  Returns:
-    tf.Tensor[] of shape [size, size, #of_anchors, 4, 1, num_classes]
+    tf.Tensor[] of shape [size, size, #of_anchors, 4, 1, num_classes].
  """
  # unpack required components from the input ground truth
  boxes = tf.cast(y_true['bbox'], dtype)
@@ -391,18 +391,18 @@ def build_batch_grided_gt(y_true, mask, size, dtype, use_tie_breaker):
  Args:
    y_true: tf.Tensor[] ground truth
-      [batch, box coords[0:4], classes_onehot[0:-1], best_fit_anchor_box]
+      [batch, box coords[0:4], classes_onehot[0:-1], best_fit_anchor_box].
    mask: list of the anchor boxes choresponding to the output,
      ex. [1, 2, 3] tells this layer to predict only the first 3 anchors
      in the total.
    size: the dimensions of this output, for regular, it progresses from
-      13, to 26, to 52
+      13, to 26, to 52.
-    dtype: expected output datatype
+    dtype: expected output datatype.
-    use_tie_breaker: boolean value for wether or not to use the tie
+    use_tie_breaker: boolean value for whether or not to use the tie
-      breaker
+      breaker.
  Returns:
-    tf.Tensor[] of shape [batch, size, size, #of_anchors, 4, 1, num_classes]
+    tf.Tensor[] of shape [batch, size, size, #of_anchors, 4, 1, num_classes].
  """
  # unpack required components from the input ground truth
  boxes = tf.cast(y_true['bbox'], dtype)
@@ -521,4 +521,3 @@ def build_batch_grided_gt(y_true, mask, size, dtype, use_tie_breaker):
    update = update.stack()
    full = tf.tensor_scatter_nd_update(full, update_index, update)
  return full
--- a/official/vision/beta/projects/yolo/ops/preprocess_ops_test.py
+++ b/official/vision/beta/projects/yolo/ops/preprocess_ops_test.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""preprocess_ops tests."""
 from absl.testing import parameterized
 import numpy as np
 import tensorflow as tf

--- a/official/vision/beta/serving/detection.py
+++ b/official/vision/beta/serving/detection.py
@@ -36,8 +36,6 @@ class DetectionModule(export_base.ExportModule):
    if self._batch_size is None:
      raise ValueError('batch_size cannot be None for detection models.')
-    if not self.params.task.model.detection_generator.use_batched_nms:
-      raise ValueError('Only batched_nms is supported.')
    input_specs = tf.keras.layers.InputSpec(shape=[self._batch_size] +
                                            self._input_image_size + [3])

--- a/official/vision/beta/serving/detection_test.py
+++ b/official/vision/beta/serving/detection_test.py
@@ -125,13 +125,6 @@ class DetectionExportTest(tf.test.TestCase, parameterized.TestCase):
      detection.DetectionModule(
          params, batch_size=None, input_image_size=[640, 640])
-  def test_build_model_fail_with_batched_nms_false(self):
-    params = exp_factory.get_exp_config('retinanet_resnetfpn_coco')
-    params.task.model.detection_generator.use_batched_nms = False
-    with self.assertRaisesRegex(ValueError, 'Only batched_nms is supported.'):
-      detection.DetectionModule(
-          params, batch_size=1, input_image_size=[640, 640])
 if __name__ == '__main__':
  tf.test.main()
--- a/official/vision/beta/serving/export_base.py
+++ b/official/vision/beta/serving/export_base.py
@@ -103,6 +103,10 @@ class ExportModule(export_base.ExportModule, metaclass=abc.ABCMeta):
      self, inputs: tf.Tensor) -> Mapping[str, tf.Tensor]:
    return self.serve(inputs)
+  @tf.function
+  def inference_for_tflite(self, inputs: tf.Tensor) -> Mapping[str, tf.Tensor]:
+    return self.serve(inputs)
  @tf.function
  def inference_from_image_bytes(self, inputs: tf.Tensor):
    with tf.device('cpu:0'):
@@ -174,6 +178,13 @@ class ExportModule(export_base.ExportModule, metaclass=abc.ABCMeta):
        signatures[
            def_name] = self.inference_from_tf_example.get_concrete_function(
                input_signature)
+      elif key == 'tflite':
+        input_signature = tf.TensorSpec(
+            shape=[self._batch_size] + self._input_image_size +
+            [self._num_channels],
+            dtype=tf.float32)
+        signatures[def_name] = self.inference_for_tflite.get_concrete_function(
+            input_signature)
      else:
        raise ValueError('Unrecognized `input_type`')
    return signatures
--- a/official/vision/beta/serving/export_tflite.py
+++ b/official/vision/beta/serving/export_tflite.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Binary to convert a saved model to tflite model."""
+from absl import app
+from absl import flags
+from absl import logging
+import tensorflow as tf
+from official.common import registry_imports  # pylint: disable=unused-import
+from official.core import exp_factory
+from official.modeling import hyperparams
+from official.vision.beta.serving import export_tflite_lib
+FLAGS = flags.FLAGS
+flags.DEFINE_string(
+    'experiment',
+    None,
+    'experiment type, e.g. retinanet_resnetfpn_coco',
+    required=True)
+flags.DEFINE_multi_string(
+    'config_file',
+    default='',
+    help='YAML/JSON files which specifies overrides. The override order '
+    'follows the order of args. Note that each file '
+    'can be used as an override template to override the default parameters '
+    'specified in Python. If the same parameter is specified in both '
+    '`--config_file` and `--params_override`, `config_file` will be used '
+    'first, followed by params_override.')
+flags.DEFINE_string(
+    'params_override', '',
+    'The JSON/YAML file or string which specifies the parameter to be overriden'
+    ' on top of `config_file` template.')
+flags.DEFINE_string(
+    'saved_model_dir', None, 'The directory to the saved model.', required=True)
+flags.DEFINE_string(
+    'tflite_path', None, 'The path to the output tflite model.', required=True)
+flags.DEFINE_string(
+    'quant_type',
+    default=None,
+    help='Post training quantization type. Support `int8`, `int8_full`, '
+    '`fp16`, and `default`. See '
+    'https://www.tensorflow.org/lite/performance/post_training_quantization '
+    'for more details.')
+flags.DEFINE_integer('calibration_steps', 500,
+                     'The number of calibration steps for integer model.')
+def main(_) -> None:
+  params = exp_factory.get_exp_config(FLAGS.experiment)
+  if FLAGS.config_file is not None:
+    for config_file in FLAGS.config_file:
+      params = hyperparams.override_params_dict(
+          params, config_file, is_strict=True)
+  if FLAGS.params_override:
+    params = hyperparams.override_params_dict(
+        params, FLAGS.params_override, is_strict=True)
+  params.validate()
+  params.lock()
+  logging.info('Converting SavedModel from %s to TFLite model...',
+               FLAGS.saved_model_dir)
+  tflite_model = export_tflite_lib.convert_tflite_model(
+      saved_model_dir=FLAGS.saved_model_dir,
+      quant_type=FLAGS.quant_type,
+      params=params,
+      calibration_steps=FLAGS.calibration_steps)
+  with tf.io.gfile.GFile(FLAGS.tflite_path, 'wb') as fw:
+    fw.write(tflite_model)
+  logging.info('TFLite model converted and saved to %s.', FLAGS.tflite_path)
+if __name__ == '__main__':
+  app.run(main)
--- a/official/vision/beta/serving/export_tflite_lib.py
+++ b/official/vision/beta/serving/export_tflite_lib.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Library to facilitate TFLite model conversion."""
+import functools
+from typing import Iterator, List, Optional
+from absl import logging
+import tensorflow as tf
+from official.core import config_definitions as cfg
+from official.vision.beta import configs
+from official.vision.beta.tasks import image_classification as img_cls_task
+def create_representative_dataset(
+    params: cfg.ExperimentConfig) -> tf.data.Dataset:
+  """Creates a tf.data.Dataset to load images for representative dataset.
+  Args:
+    params: An ExperimentConfig.
+  Returns:
+    A tf.data.Dataset instance.
+  Raises:
+    ValueError: If task is not supported.
+  """
+  if isinstance(params.task,
+                configs.image_classification.ImageClassificationTask):
+    task = img_cls_task.ImageClassificationTask(params.task)
+  else:
+    raise ValueError('Task {} not supported.'.format(type(params.task)))
+  # Ensure batch size is 1 for TFLite model.
+  params.task.train_data.global_batch_size = 1
+  params.task.train_data.dtype = 'float32'
+  logging.info('Task config: %s', params.task.as_dict())
+  return task.build_inputs(params=params.task.train_data)
+def representative_dataset(
+    params: cfg.ExperimentConfig,
+    calibration_steps: int = 2000) -> Iterator[List[tf.Tensor]]:
+  """"Creates representative dataset for input calibration.
+  Args:
+    params: An ExperimentConfig.
+    calibration_steps: The steps to do calibration.
+  Yields:
+    An input image tensor.
+  """
+  dataset = create_representative_dataset(params=params)
+  for image, _ in dataset.take(calibration_steps):
+    # Skip images that do not have 3 channels.
+    if image.shape[-1] != 3:
+      continue
+    yield [image]
+def convert_tflite_model(saved_model_dir: str,
+                         quant_type: Optional[str] = None,
+                         params: Optional[cfg.ExperimentConfig] = None,
+                         calibration_steps: Optional[int] = 2000) -> bytes:
+  """Converts and returns a TFLite model.
+  Args:
+    saved_model_dir: The directory to the SavedModel.
+    quant_type: The post training quantization (PTQ) method. It can be one of
+      `default` (dynamic range), `fp16` (float16), `int8` (integer wih float
+      fallback), `int8_full` (integer only) and None (no quantization).
+    params: An optional ExperimentConfig to load and preprocess input images to
+      do calibration for integer quantization.
+    calibration_steps: The steps to do calibration.
+  Returns:
+    A converted TFLite model with optional PTQ.
+  Raises:
+    ValueError: If `representative_dataset_path` is not present if integer
+    quantization is requested.
+  """
+  converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
+  if quant_type:
+    if quant_type.startswith('int8'):
+      converter.optimizations = [tf.lite.Optimize.DEFAULT]
+      converter.representative_dataset = functools.partial(
+          representative_dataset,
+          params=params,
+          calibration_steps=calibration_steps)
+      if quant_type == 'int8_full':
+        converter.target_spec.supported_ops = [
+            tf.lite.OpsSet.TFLITE_BUILTINS_INT8
+        ]
+        converter.inference_input_type = tf.uint8  # or tf.int8
+        converter.inference_output_type = tf.uint8  # or tf.int8
+    elif quant_type == 'fp16':
+      converter.optimizations = [tf.lite.Optimize.DEFAULT]
+      converter.target_spec.supported_types = [tf.float16]
+    elif quant_type == 'default':
+      converter.optimizations = [tf.lite.Optimize.DEFAULT]
+  return converter.convert()
--- a/official/vision/beta/serving/export_tflite_lib_test.py
+++ b/official/vision/beta/serving/export_tflite_lib_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for export_tflite_lib."""
+import os
+from absl.testing import parameterized
+import tensorflow as tf
+from tensorflow.python.distribute import combinations
+from official.common import registry_imports  # pylint: disable=unused-import
+from official.core import exp_factory
+from official.vision.beta.dataloaders import tfexample_utils
+from official.vision.beta.serving import export_tflite_lib
+from official.vision.beta.serving import image_classification as image_classification_serving
+class ExportTfliteLibTest(tf.test.TestCase, parameterized.TestCase):
+  def setUp(self):
+    super().setUp()
+    self._test_tfrecord_file = os.path.join(self.get_temp_dir(),
+                                            'test.tfrecord')
+    self._create_test_tfrecord(num_samples=50)
+  def _create_test_tfrecord(self, num_samples):
+    tfexample_utils.dump_to_tfrecord(self._test_tfrecord_file, [
+        tf.train.Example.FromString(
+            tfexample_utils.create_classification_example(
+                image_height=256, image_width=256)) for _ in range(num_samples)
+    ])
+  def _export_from_module(self, module, input_type, saved_model_dir):
+    signatures = module.get_inference_signatures(
+        {input_type: 'serving_default'})
+    tf.saved_model.save(module, saved_model_dir, signatures=signatures)
+  @combinations.generate(
+      combinations.combine(
+          experiment=['mobilenet_imagenet'],
+          quant_type=[None, 'default', 'fp16', 'int8'],
+          input_image_size=[[224, 224]]))
+  def test_export_tflite(self, experiment, quant_type, input_image_size):
+    params = exp_factory.get_exp_config(experiment)
+    params.task.validation_data.input_path = self._test_tfrecord_file
+    params.task.train_data.input_path = self._test_tfrecord_file
+    temp_dir = self.get_temp_dir()
+    module = image_classification_serving.ClassificationModule(
+        params=params, batch_size=1, input_image_size=input_image_size)
+    self._export_from_module(
+        module=module,
+        input_type='tflite',
+        saved_model_dir=os.path.join(temp_dir, 'saved_model'))
+    tflite_model = export_tflite_lib.convert_tflite_model(
+        saved_model_dir=os.path.join(temp_dir, 'saved_model'),
+        quant_type=quant_type,
+        params=params,
+        calibration_steps=5)
+    self.assertIsInstance(tflite_model, bytes)
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/vision/beta/tasks/image_classification.py
+++ b/official/vision/beta/tasks/image_classification.py
@@ -62,11 +62,11 @@ class ImageClassificationTask(base_task.Task):
    # Restoring checkpoint.
    if self.task_config.init_checkpoint_modules == 'all':
      ckpt = tf.train.Checkpoint(**model.checkpoint_items)
-      status = ckpt.restore(ckpt_dir_or_file)
+      status = ckpt.read(ckpt_dir_or_file)
-      status.assert_consumed()
+      status.expect_partial().assert_existing_objects_matched()
    elif self.task_config.init_checkpoint_modules == 'backbone':
      ckpt = tf.train.Checkpoint(backbone=model.backbone)
-      status = ckpt.restore(ckpt_dir_or_file)
+      status = ckpt.read(ckpt_dir_or_file)
      status.expect_partial().assert_existing_objects_matched()
    else:
      raise ValueError(

--- a/official/vision/beta/tasks/maskrcnn.py
+++ b/official/vision/beta/tasks/maskrcnn.py
@@ -96,15 +96,18 @@ class MaskRCNNTask(base_task.Task):
    # Restoring checkpoint.
    if self.task_config.init_checkpoint_modules == 'all':
      ckpt = tf.train.Checkpoint(**model.checkpoint_items)
-      status = ckpt.restore(ckpt_dir_or_file)
+      status = ckpt.read(ckpt_dir_or_file)
-      status.assert_consumed()
-    elif self.task_config.init_checkpoint_modules == 'backbone':
-      ckpt = tf.train.Checkpoint(backbone=model.backbone)
-      status = ckpt.restore(ckpt_dir_or_file)
      status.expect_partial().assert_existing_objects_matched()
    else:
-      raise ValueError(
+      ckpt_items = {}
-          "Only 'all' or 'backbone' can be used to initialize the model.")
+      if 'backbone' in self.task_config.init_checkpoint_modules:
+        ckpt_items.update(backbone=model.backbone)
+      if 'decoder' in self.task_config.init_checkpoint_modules:
+        ckpt_items.update(decoder=model.decoder)
+      ckpt = tf.train.Checkpoint(**ckpt_items)
+      status = ckpt.read(ckpt_dir_or_file)
+      status.expect_partial().assert_existing_objects_matched()
    logging.info('Finished loading pretrained checkpoint from %s',
                 ckpt_dir_or_file)
@@ -261,12 +264,15 @@ class MaskRCNNTask(base_task.Task):
        metrics.append(tf.keras.metrics.Mean(name, dtype=tf.float32))
    else:
-      if self._task_config.annotation_file:
+      if (not self._task_config.model.include_mask
+         ) or self._task_config.annotation_file:
        self.coco_metric = coco_evaluator.COCOEvaluator(
            annotation_file=self._task_config.annotation_file,
            include_mask=self._task_config.model.include_mask,
            per_category_metrics=self._task_config.per_category_metrics)
      else:
+        # Builds COCO-style annotation file if include_mask is True, and
+        # annotation_file isn't provided.
        annotation_path = os.path.join(self._logging_dir, 'annotation.json')
        if tf.io.gfile.exists(annotation_path):
          logging.info(

--- a/official/vision/beta/tasks/retinanet.py
+++ b/official/vision/beta/tasks/retinanet.py
@@ -71,15 +71,18 @@ class RetinaNetTask(base_task.Task):
    # Restoring checkpoint.
    if self.task_config.init_checkpoint_modules == 'all':
      ckpt = tf.train.Checkpoint(**model.checkpoint_items)
-      status = ckpt.restore(ckpt_dir_or_file)
+      status = ckpt.read(ckpt_dir_or_file)
-      status.assert_consumed()
-    elif self.task_config.init_checkpoint_modules == 'backbone':
-      ckpt = tf.train.Checkpoint(backbone=model.backbone)
-      status = ckpt.restore(ckpt_dir_or_file)
      status.expect_partial().assert_existing_objects_matched()
    else:
-      raise ValueError(
+      ckpt_items = {}
-          "Only 'all' or 'backbone' can be used to initialize the model.")
+      if 'backbone' in self.task_config.init_checkpoint_modules:
+        ckpt_items.update(backbone=model.backbone)
+      if 'decoder' in self.task_config.init_checkpoint_modules:
+        ckpt_items.update(decoder=model.decoder)
+      ckpt = tf.train.Checkpoint(**ckpt_items)
+      status = ckpt.read(ckpt_dir_or_file)
+      status.expect_partial().assert_existing_objects_matched()
    logging.info('Finished loading pretrained checkpoint from %s',
                 ckpt_dir_or_file)

--- a/official/vision/beta/tasks/semantic_segmentation.py
+++ b/official/vision/beta/tasks/semantic_segmentation.py
@@ -63,8 +63,8 @@ class SemanticSegmentationTask(base_task.Task):
    # Restoring checkpoint.
    if 'all' in self.task_config.init_checkpoint_modules:
      ckpt = tf.train.Checkpoint(**model.checkpoint_items)
-      status = ckpt.restore(ckpt_dir_or_file)
+      status = ckpt.read(ckpt_dir_or_file)
-      status.assert_consumed()
+      status.expect_partial().assert_existing_objects_matched()
    else:
      ckpt_items = {}
      if 'backbone' in self.task_config.init_checkpoint_modules:
@@ -73,7 +73,7 @@ class SemanticSegmentationTask(base_task.Task):
        ckpt_items.update(decoder=model.decoder)
      ckpt = tf.train.Checkpoint(**ckpt_items)
-      status = ckpt.restore(ckpt_dir_or_file)
+      status = ckpt.read(ckpt_dir_or_file)
      status.expect_partial().assert_existing_objects_matched()
    logging.info('Finished loading pretrained checkpoint from %s',

--- a/official/vision/beta/tasks/video_classification.py
+++ b/official/vision/beta/tasks/video_classification.py
@@ -86,11 +86,11 @@ class VideoClassificationTask(base_task.Task):
    # Restoring checkpoint.
    if self.task_config.init_checkpoint_modules == 'all':
      ckpt = tf.train.Checkpoint(**model.checkpoint_items)
-      status = ckpt.restore(ckpt_dir_or_file)
+      status = ckpt.read(ckpt_dir_or_file)
-      status.assert_consumed()
+      status.expect_partial().assert_existing_objects_matched()
    elif self.task_config.init_checkpoint_modules == 'backbone':
      ckpt = tf.train.Checkpoint(backbone=model.backbone)
-      status = ckpt.restore(ckpt_dir_or_file)
+      status = ckpt.read(ckpt_dir_or_file)
      status.expect_partial().assert_existing_objects_matched()
    else:
      raise ValueError(

--- a/official/vision/image_classification/efficientnet/efficientnet_model.py
+++ b/official/vision/image_classification/efficientnet/efficientnet_model.py
@@ -424,7 +424,6 @@ def efficientnet(image_input: tf.keras.layers.Input, config: ModelConfig):
  return x
-@tf.keras.utils.register_keras_serializable(package='Vision')
 class EfficientNet(tf.keras.Model):
  """Wrapper class for an EfficientNet Keras model.

--- a/official/vision/image_classification/resnet/resnet_model.py
+++ b/official/vision/image_classification/resnet/resnet_model.py
@@ -311,8 +311,7 @@ def resnet50(num_classes,
  x = layers.GlobalAveragePooling2D()(x)
  x = layers.Dense(
      num_classes,
-      kernel_initializer=tf.compat.v1.keras.initializers.random_normal(
+      kernel_initializer=tf.initializers.random_normal(stddev=0.01),
-          stddev=0.01),
      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
      bias_regularizer=_gen_l2_regularizer(use_l2_regularizer),
      name='fc1000')(