nms ops used by detection generator

fb2b278d · Vishnu Banna · 0352c8f4 · fb2b278d · fb2b278d · fb2b278d
Commit fb2b278d authored Jun 03, 2021 by Vishnu Banna
4 changed files
--- a/official/vision/beta/projects/yolo/ops/box_ops.py
+++ b/official/vision/beta/projects/yolo/ops/box_ops.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Bounding box utils."""
-
-import math
-
 import tensorflow as tf
+import tensorflow.keras.backend as K
+from official.vision.beta.projects.yolo.ops import math_ops
+from typing import Tuple, Union
+import math


 def yxyx_to_xcycwh(box: tf.Tensor):
-  """Converts boxes from ymin, xmin, ymax, xmax.
-
-  to x_center, y_center, width, height.
+  """Converts boxes from ymin, xmin, ymax, xmax to x_center, y_center, width, 
+  height.

  Args:
-    box: `Tensor` whose shape is [..., 4] and represents the coordinates
-      of boxes in ymin, xmin, ymax, xmax.
+    box: any `Tensor` whose last dimension is 4 representing the coordinates of 
+      boxes in ymin, xmin, ymax, xmax.
  
  Returns:
-    `Tensor` whose shape is [..., 4] and contains the new format.
-
-  Raises:
-    ValueError: If the last dimension of box is not 4 or if box's dtype isn't
-      a floating point type.
+    box: a `Tensor` whose shape is the same as `box` in new format.
  """
  with tf.name_scope('yxyx_to_xcycwh'):
    ymin, xmin, ymax, xmax = tf.split(box, 4, axis=-1)
@@ -45,22 +26,10 @@ def yxyx_to_xcycwh(box: tf.Tensor):
  return box


-def xcycwh_to_yxyx(box: tf.Tensor, split_min_max: bool = False):
-  """Converts boxes from x_center, y_center, width, height.
-
-  to ymin, xmin, ymax, xmax.
-
-  Args:
-    box: a `Tensor` whose shape is [..., 4] and represents the coordinates
-      of boxes in x_center, y_center, width, height.
-    split_min_max: bool, whether or not to split x, y min and max values.
-
-  Returns:
-    box: a `Tensor` whose shape is [..., 4] and contains the new format.
-
-  Raises:
-    ValueError: If the last dimension of box is not 4 or if box's dtype isn't
-      a floating point type.
+@tf.custom_gradient
+def _xcycwh_to_yxyx(box: tf.Tensor, scale):
+  """Private function called by xcycwh_to_yxyx to allow custom gradients
+  with defaults.
  """
  with tf.name_scope('xcycwh_to_yxyx'):
    xy, wh = tf.split(box, 2, axis=-1)
@@ -69,229 +38,299 @@ def xcycwh_to_yxyx(box: tf.Tensor, split_min_max: bool = False):
    x_min, y_min = tf.split(xy_min, 2, axis=-1)
    x_max, y_max = tf.split(xy_max, 2, axis=-1)
    box = tf.concat([y_min, x_min, y_max, x_max], axis=-1)
-    if split_min_max:
-      box = tf.split(box, 2, axis=-1)
-  return box

+    def delta(dbox):
+      #y_min = top, x_min = left, y_max = bottom, x_max = right
+      dt, dl, db, dr = tf.split(dbox, 4, axis=-1)
+      dx = dl + dr
+      dy = dt + db
+      dw = (dr - dl) / scale
+      dh = (db - dt) / scale
+
+      dbox = tf.concat([dx, dy, dw, dh], axis=-1)
+      return dbox, 0.0
+
+  return box, delta

-def xcycwh_to_xyxy(box: tf.Tensor, split_min_max: bool = False):
-  """Converts boxes from x_center, y_center, width, height to.

-  xmin, ymin, xmax, ymax.
+def xcycwh_to_yxyx(box: tf.Tensor, darknet=False):
+  """Converts boxes from x_center, y_center, width, height to ymin, xmin, ymax, 
+  xmax.
  
  Args:
-    box: box: a `Tensor` whose shape is [..., 4] and represents the
-      coordinates of boxes in x_center, y_center, width, height.
-    split_min_max: bool, whether or not to split x, y min and max values.
+    box: any `Tensor` whose last dimension is 4 representing the coordinates of 
+      boxes in x_center, y_center, width, height.
  
  Returns:
-    box: a `Tensor` whose shape is [..., 4] and contains the new format.
-
-  Raises:
-    ValueError: If the last dimension of box is not 4 or if box's dtype isn't
-      a floating point type.
+    box: a `Tensor` whose shape is the same as `box` in new format.
  """
-  with tf.name_scope('xcycwh_to_yxyx'):
-    xy, wh = tf.split(box, 2, axis=-1)
-    xy_min = xy - wh / 2
-    xy_max = xy + wh / 2
-    box = (xy_min, xy_max)
-    if not split_min_max:
-      box = tf.concat(box, axis=-1)
+  if darknet:
+    scale = 1.0
+  else:
+    scale = 2.0
+  box = _xcycwh_to_yxyx(box, scale)
  return box


-def center_distance(center_1: tf.Tensor, center_2: tf.Tensor):
-  """Calculates the squared distance between two points.
-
-  This function is mathematically equivalent to the following code, but has
-  smaller rounding errors.
-
-  tf.norm(center_1 - center_2, axis=-1)**2
+# IOU
+def intersect_and_union(box1, box2, yxyx=False):
+  """Calculates the intersection and union between box1 and box2.

  Args:
-    center_1: a `Tensor` whose shape is [..., 2] and represents a point.
-    center_2: a `Tensor` whose shape is [..., 2] and represents a point.
+    box1: any `Tensor` whose last dimension is 4 representing the coordinates of 
+      boxes.
+    box2: any `Tensor` whose last dimension is 4 representing the coordinates of 
+      boxes.
+    yxyx: a `bool` indicating whether the input box is of the format x_center
+      y_center, width, height or y_min, x_min, y_max, x_max.

  Returns:
-    dist: a `Tensor` whose shape is [...] and value represents the squared
-      distance between center_1 and center_2.
-
-  Raises:
-    ValueError: If the last dimension of either center_1 or center_2 is not 2.
+    intersection: a `Tensor` who represents the intersection.
+    union: a `Tensor` who represents the union.
  """
-  with tf.name_scope('center_distance'):
-    dist = (center_1[..., 0] - center_2[..., 0])**2 + (center_1[..., 1] -
-                                                       center_2[..., 1])**2
-  return dist

+  if not yxyx:
+    box1 = xcycwh_to_yxyx(box1)
+    box2 = xcycwh_to_yxyx(box2)
+
+  b1mi, b1ma = tf.split(box1, 2, axis=-1)
+  b2mi, b2ma = tf.split(box2, 2, axis=-1)
+  intersect_mins = tf.math.maximum(b1mi, b2mi)
+  intersect_maxes = tf.math.minimum(b1ma, b2ma)
+  intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins, 0.0)
+  intersection = tf.reduce_prod(intersect_wh, axis=-1)
+
+  box1_area = tf.reduce_prod(b1ma - b1mi, axis=-1)
+  box2_area = tf.reduce_prod(b2ma - b2mi, axis=-1)
+  union = box1_area + box2_area - intersection
+  return intersection, union

-def compute_iou(box1, box2, yxyx=False):
-  """Calculates the intersection of union between box1 and box2.
+
+def smallest_encompassing_box(box1, box2, yxyx=False):
+  """Calculates the smallest box that encompasses both that encomapasses both
+  box1 and box2.

  Args:
-    box1: a `Tensor` whose shape is [..., 4] and represents the coordinates of
-      boxes in x_center, y_center, width, height.
-    box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of
-      boxes in x_center, y_center, width, height.
-    yxyx: `bool`, whether or not box1, and box2 are in yxyx format.
+    box1: any `Tensor` whose last dimension is 4 representing the coordinates of 
+      boxes.
+    box2: any `Tensor` whose last dimension is 4 representing the coordinates of 
+      boxes.
+    yxyx: a `bool` indicating whether the input box is of the format x_center
+      y_center, width, height or y_min, x_min, y_max, x_max.

  Returns:
-    iou: a `Tensor` whose shape is [...] and value represents the intersection
-      over union.
-
-  Raises:
-    ValueError: If the last dimension of either box1 or box2 is not 4.
+    box_c: a `Tensor` whose last dimension is 4 representing the coordinates of 
+      boxes, the return format is y_min, x_min, y_max, x_max if yxyx is set to
+      to True. In other words it will match the input format.
  """
-  # Get box corners
-  with tf.name_scope('iou'):
  if not yxyx:
    box1 = xcycwh_to_yxyx(box1)
    box2 = xcycwh_to_yxyx(box2)

  b1mi, b1ma = tf.split(box1, 2, axis=-1)
  b2mi, b2ma = tf.split(box2, 2, axis=-1)
-    intersect_mins = tf.math.maximum(b1mi, b2mi)
-    intersect_maxes = tf.math.minimum(b1ma, b2ma)
-    intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins,
-                                   tf.zeros_like(intersect_mins))
-    intersection = tf.reduce_prod(
-        intersect_wh, axis=-1)  # intersect_wh[..., 0] * intersect_wh[..., 1]

-    box1_area = tf.math.abs(tf.reduce_prod(b1ma - b1mi, axis=-1))
-    box2_area = tf.math.abs(tf.reduce_prod(b2ma - b2mi, axis=-1))
-    union = box1_area + box2_area - intersection
+  bcmi = tf.math.minimum(b1mi, b2mi)
+  bcma = tf.math.maximum(b1ma, b2ma)

-    iou = intersection / (union + 1e-7)
-    iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0)
-  return iou
+  bca = tf.reduce_prod(bcma - bcmi, keepdims=True, axis=-1)
+  box_c = tf.concat([bcmi, bcma], axis=-1)
+
+  if not yxyx:
+    box_c = yxyx_to_xcycwh(box_c)

+  box_c = tf.where(bca == 0.0, tf.zeros_like(box_c), box_c)
+  return box_c

-def compute_giou(box1, box2):
-  """Calculates the generalized intersection of union between box1 and box2.
+
+def compute_iou(box1, box2, yxyx=False):
+  """Calculates the intersection over union between box1 and box2.

  Args:
-    box1: a `Tensor` whose shape is [..., 4] and represents the coordinates of
-      boxes in x_center, y_center, width, height.
-    box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of
-      boxes in x_center, y_center, width, height.
+    box1: any `Tensor` whose last dimension is 4 representing the coordinates of 
+      boxes.
+    box2: any `Tensor` whose last dimension is 4 representing the coordinates of 
+      boxes.
+    yxyx: a `bool` indicating whether the input box is of the format x_center
+      y_center, width, height or y_min, x_min, y_max, x_max.

  Returns:
-    iou: a `Tensor` whose shape is [...] and value represents the generalized
-      intersection over union.
-
-  Raises:
-    ValueError: If the last dimension of either box1 or box2 is not 4.
+    iou: a `Tensor` who represents the intersection over union.
  """
-  with tf.name_scope('giou'):
  # get box corners
-    box1 = xcycwh_to_yxyx(box1)
-    box2 = xcycwh_to_yxyx(box2)
+  with tf.name_scope('iou'):
+    intersection, union = intersect_and_union(box1, box2, yxyx=yxyx)
+    iou = math_ops.divide_no_nan(intersection, union)
+    iou = math_ops.rm_nan_inf(iou, val=0.0)
+  return iou

-    # compute IOU
-    intersect_mins = tf.math.maximum(box1[..., 0:2], box2[..., 0:2])
-    intersect_maxes = tf.math.minimum(box1[..., 2:4], box2[..., 2:4])
-    intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins,
-                                   tf.zeros_like(intersect_mins))
-    intersection = intersect_wh[..., 0] * intersect_wh[..., 1]
-
-    box1_area = tf.math.abs(
-        tf.reduce_prod(box1[..., 2:4] - box1[..., 0:2], axis=-1))
-    box2_area = tf.math.abs(
-        tf.reduce_prod(box2[..., 2:4] - box2[..., 0:2], axis=-1))
-    union = box1_area + box2_area - intersection

-    iou = tf.math.divide_no_nan(intersection, union)
-    iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0)
+def compute_giou(box1, box2, yxyx=False, darknet=False):
+  """Calculates the General intersection over union between box1 and box2.
+
+  Args:
+    box1: any `Tensor` whose last dimension is 4 representing the coordinates of 
+      boxes.
+    box2: any `Tensor` whose last dimension is 4 representing the coordinates of 
+      boxes.
+    yxyx: a `bool` indicating whether the input box is of the format x_center
+      y_center, width, height or y_min, x_min, y_max, x_max.
+    darknet: a `bool` indicating whether the calling function is the yolo 
+      darknet loss.
+
+  Returns:
+    giou: a `Tensor` who represents the General intersection over union.
+  """
+  with tf.name_scope('giou'):
+    # get IOU
+    if not yxyx:
+      box1 = xcycwh_to_yxyx(box1, darknet=darknet)
+      box2 = xcycwh_to_yxyx(box2, darknet=darknet)
+      yxyx = True
+
+    intersection, union = intersect_and_union(box1, box2, yxyx=yxyx)
+    iou = math_ops.divide_no_nan(intersection, union)
+    iou = math_ops.rm_nan_inf(iou, val=0.0)

    # find the smallest box to encompase both box1 and box2
-    c_mins = tf.math.minimum(box1[..., 0:2], box2[..., 0:2])
-    c_maxes = tf.math.maximum(box1[..., 2:4], box2[..., 2:4])
-    c = tf.math.abs(tf.reduce_prod(c_mins - c_maxes, axis=-1))
+    boxc = smallest_encompassing_box(box1, box2, yxyx=yxyx)
+    if yxyx:
+      boxc = yxyx_to_xcycwh(boxc)
+    cxcy, cwch = tf.split(boxc, 2, axis=-1)
+    c = tf.math.reduce_prod(cwch, axis=-1)

    # compute giou
-    giou = iou - tf.math.divide_no_nan((c - union), c)
+    regularization = math_ops.divide_no_nan((c - union), c)
+    giou = iou - regularization
+    giou = tf.clip_by_value(giou, clip_value_min=-1.0, clip_value_max=1.0)
  return iou, giou


-def compute_diou(box1, box2):
-  """Calculates the distance intersection of union between box1 and box2.
+def compute_diou(box1, box2, beta=1.0, yxyx=False, darknet=False):
+  """Calculates the distance intersection over union between box1 and box2.

  Args:
-    box1: a `Tensor` whose shape is [..., 4] and represents the coordinates of
-      boxes in x_center, y_center, width, height.
-    box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of
-      boxes in x_center, y_center, width, height.
+    box1: any `Tensor` whose last dimension is 4 representing the coordinates of 
+      boxes.
+    box2: any `Tensor` whose last dimension is 4 representing the coordinates of 
+      boxes.
+    beta: a `float` indicating the amount to scale the distance iou 
+      regularization term. 
+    yxyx: a `bool` indicating whether the input box is of the format x_center
+      y_center, width, height or y_min, x_min, y_max, x_max.
+    darknet: a `bool` indicating whether the calling function is the yolo 
+      darknet loss.

  Returns:
-    iou: a `Tensor` whose shape is [...] and value represents the distance
-      intersection over union.
-
-  Raises:
-    ValueError: If the last dimension of either box1 or box2 is not 4.
+    diou: a `Tensor` who represents the distance intersection over union.
  """
  with tf.name_scope('diou'):
    # compute center distance
-    dist = center_distance(box1[..., 0:2], box2[..., 0:2])
-
-    # get box corners
-    box1 = xcycwh_to_yxyx(box1)
-    box2 = xcycwh_to_yxyx(box2)
-
-    # compute IOU
-    intersect_mins = tf.math.maximum(box1[..., 0:2], box2[..., 0:2])
-    intersect_maxes = tf.math.minimum(box1[..., 2:4], box2[..., 2:4])
-    intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins,
-                                   tf.zeros_like(intersect_mins))
-    intersection = intersect_wh[..., 0] * intersect_wh[..., 1]
-
-    box1_area = tf.math.abs(
-        tf.reduce_prod(box1[..., 2:4] - box1[..., 0:2], axis=-1))
-    box2_area = tf.math.abs(
-        tf.reduce_prod(box2[..., 2:4] - box2[..., 0:2], axis=-1))
-    union = box1_area + box2_area - intersection
-
-    iou = tf.math.divide_no_nan(intersection, union)
-    iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0)
-
-    # compute max diagnal of the smallest enclosing box
-    c_mins = tf.math.minimum(box1[..., 0:2], box2[..., 0:2])
-    c_maxes = tf.math.maximum(box1[..., 2:4], box2[..., 2:4])
-
-    diag_dist = tf.reduce_sum((c_maxes - c_mins)**2, axis=-1)
-
-    regularization = tf.math.divide_no_nan(dist, diag_dist)
-    diou = iou + regularization
+    if not yxyx:
+      box1 = xcycwh_to_yxyx(box1, darknet=darknet)
+      box2 = xcycwh_to_yxyx(box2, darknet=darknet)
+      yxyx = True
+
+    intersection, union = intersect_and_union(box1, box2, yxyx=yxyx)
+    boxc = smallest_encompassing_box(box1, box2, yxyx=yxyx)
+
+    iou = math_ops.divide_no_nan(intersection, union)
+    iou = math_ops.rm_nan_inf(iou, val=0.0)
+    if yxyx:
+      boxc = yxyx_to_xcycwh(boxc)
+      box1 = yxyx_to_xcycwh(box1)
+      box2 = yxyx_to_xcycwh(box2)
+
+    b1xy, b1wh = tf.split(box1, 2, axis=-1)
+    b2xy, b2wh = tf.split(box2, 2, axis=-1)
+    bcxy, bcwh = tf.split(boxc, 2, axis=-1)
+
+    center_dist = tf.reduce_sum((b1xy - b2xy)**2, axis=-1)
+    c_diag = tf.reduce_sum(bcwh**2, axis=-1)
+
+    regularization = math_ops.divide_no_nan(center_dist, c_diag)
+    diou = iou - regularization**beta
+    diou = tf.clip_by_value(diou, clip_value_min=-1.0, clip_value_max=1.0)
  return iou, diou


-def compute_ciou(box1, box2):
-  """Calculates the complete intersection of union between box1 and box2.
+def compute_ciou(box1, box2, yxyx=False, darknet=False):
+  """Calculates the complete intersection over union between box1 and box2.

  Args:
-    box1: a `Tensor` whose shape is [..., 4] and represents the coordinates
-      of boxes in x_center, y_center, width, height.
-    box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of
-      boxes in x_center, y_center, width, height.
+    box1: any `Tensor` whose last dimension is 4 representing the coordinates of 
+      boxes.
+    box2: any `Tensor` whose last dimension is 4 representing the coordinates of 
+      boxes.
+    yxyx: a `bool` indicating whether the input box is of the format x_center
+      y_center, width, height or y_min, x_min, y_max, x_max.
+    darknet: a `bool` indicating whether the calling function is the yolo 
+      darknet loss.

  Returns:
-    iou: a `Tensor` whose shape is [...] and value represents the complete
-      intersection over union.
-
-  Raises:
-    ValueError: If the last dimension of either box1 or box2 is not 4.
+    ciou: a `Tensor` who represents the complete intersection over union.
  """
  with tf.name_scope('ciou'):
    # compute DIOU and IOU
-    iou, diou = compute_diou(box1, box2)
+    iou, diou = compute_diou(box1, box2, yxyx=yxyx, darknet=darknet)
+
+    if yxyx:
+      box1 = yxyx_to_xcycwh(box1)
+      box2 = yxyx_to_xcycwh(box2)
+
+    b1x, b1y, b1w, b1h = tf.split(box1, 4, axis=-1)
+    b2x, b2y, b2w, b2h = tf.split(box1, 4, axis=-1)

    # computer aspect ratio consistency
-    arcterm = (
-        tf.math.atan(tf.math.divide_no_nan(box1[..., 2], box1[..., 3])) -
-        tf.math.atan(tf.math.divide_no_nan(box2[..., 2], box2[..., 3])))**2
-    v = 4 * arcterm / (math.pi)**2
-
-    # compute IOU regularization
-    a = tf.math.divide_no_nan(v, ((1 - iou) + v))
-    ciou = diou + v * a
+    terma = tf.cast(math_ops.divide_no_nan(b1w, b1h), tf.float32)
+    termb = tf.cast(math_ops.divide_no_nan(b2w, b2h), tf.float32)
+    arcterm = tf.square(tf.math.atan(terma) - tf.math.atan(termb))
+    v = tf.squeeze(4 * arcterm / (math.pi**2), axis=-1)
+    v = tf.cast(v, b1w.dtype)
+
+    a = tf.stop_gradient(math_ops.divide_no_nan(v, ((1 - iou) + v)))
+    ciou = diou - (v * a)
+    ciou = tf.clip_by_value(ciou, clip_value_min=-1.0, clip_value_max=1.0)
  return iou, ciou
+
+
+# equal to bbox_overlap but far more versitile
+def aggregated_comparitive_iou(boxes1,
+                               boxes2=None,
+                               iou_type=0,
+                               beta=0.6):
+  """Calculates the intersection over union between every box in boxes1 and 
+  every box in boxes2.
+
+  Args:
+    boxes1: a `Tensor` of shape [batch size, N, 4] representing the coordinates 
+      of boxes.
+    boxes2: a `Tensor` of shape [batch size, N, 4] representing the coordinates 
+      of boxes.
+    iou_type: `integer` representing the iou version to use, 0 is distance iou, 
+      1 is the general iou, 2 is the complete iou, any other number uses the 
+      standard iou.
+    beta: `float` for the scaling quantity to apply to distance iou 
+      regularization.
+    
+
+  Returns:
+    iou: a `Tensor` who represents the intersection over union in of the 
+      expected/input type.
+  """
+  boxes1 = tf.expand_dims(boxes1, axis=-2)
+
+  if boxes2 is not None:
+    boxes2 = tf.expand_dims(boxes2, axis=-3)
+  else:
+    boxes2 = tf.transpose(boxes1, perm=(0, 2, 1, 3))
+
+  if iou_type == 0:  #diou
+    _, iou = compute_diou(boxes1, boxes2, beta=beta, yxyx=True)
+  elif iou_type == 1:  #giou
+    _, iou = compute_giou(boxes1, boxes2, yxyx=True)
+  elif iou_type == 2:  #ciou
+    _, iou = compute_ciou(boxes1, boxes2, yxyx=True)
+  else:
+    iou = compute_iou(boxes1, boxes2, yxyx=True)
+  return iou
--- a/official/vision/beta/projects/yolo/ops/box_ops_test.py
+++ b/official/vision/beta/projects/yolo/ops/box_ops_test.py
@@ -27,10 +27,8 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
    expected_shape = np.array([num_boxes, 4])
    xywh_box = box_ops.yxyx_to_xcycwh(boxes)
    yxyx_box = box_ops.xcycwh_to_yxyx(boxes)
-    xyxy_box = box_ops.xcycwh_to_xyxy(boxes)
    self.assertAllEqual(tf.shape(xywh_box).numpy(), expected_shape)
    self.assertAllEqual(tf.shape(yxyx_box).numpy(), expected_shape)
-    self.assertAllEqual(tf.shape(xyxy_box).numpy(), expected_shape)

  @parameterized.parameters((1), (5), (7))
  def test_ious(self, num_boxes):

--- a/official/vision/beta/projects/yolo/ops/math_ops.py
+++ b/official/vision/beta/projects/yolo/ops/math_ops.py
+"""A set of private math operations used to safely implement the yolo loss"""
+import tensorflow as tf
+import tensorflow.keras.backend as K
+
+
+def rm_nan_inf(x, val=0.0):
+  """remove nan and infinity   
+
+  Args:
+    x: any `Tensor` of any type. 
+    val: value to replace nan and infinity with. 
+
+  Return:
+    a `Tensor` with nan and infinity removed.
+  """
+  cond = tf.math.logical_or(tf.math.is_nan(x), tf.math.is_inf(x))
+  val = tf.cast(val, dtype=x.dtype)
+  x = tf.where(cond, val, x)
+  return x
+
+
+def rm_nan(x, val=0.0):
+  """remove nan and infinity.   
+
+  Args:
+    x: any `Tensor` of any type. 
+    val: value to replace nan. 
+
+  Return:
+    a `Tensor` with nan removed.
+  """
+  cond = tf.math.is_nan(x)
+  val = tf.cast(val, dtype=x.dtype)
+  x = tf.where(cond, val, x)
+  return x
+
+
+def divide_no_nan(a, b):
+  """Nan safe divide operation built to allow model compilation in tflite. 
+
+  Args:
+    a: any `Tensor` of any type.
+    b: any `Tensor` of any type with the same shape as tensor a. 
+
+  Return:
+    a `Tensor` representing a divided by b, with all nan values removed. 
+  """
+  zero = tf.cast(0.0, b.dtype)
+  return tf.where(b == zero, zero, a / b)
+
+
+def mul_no_nan(x, y):
+  """Nan safe multiply operation built to allow model compilation in tflite and 
+  to allowing one tensor to mask another. Where ever x is zero the 
+  multiplication is not computed and the value is replaced with a zero. This is 
+  requred because 0 * nan = nan. This can make computation unstable in some 
+  cases where the intended behavior is for zero to mean ignore.  
+
+  Args:
+    x: any `Tensor` of any type. 
+    y: any `Tensor` of any type with the same shape as tensor x. 
+
+  Return:
+    a `Tensor` representing x times y, where x is used to safely mask the 
+    tensor y. 
+  """
+  return tf.where(x == 0, tf.cast(0, x.dtype), x * y)
--- a/official/vision/beta/projects/yolo/ops/nms_ops.py
+++ b/official/vision/beta/projects/yolo/ops/nms_ops.py
+import tensorflow as tf
+from official.vision.beta.ops import box_ops as box_utils
+from official.vision.beta.projects.yolo.ops import box_ops as box_ops
+
+NMS_TILE_SIZE = 512
+
+class TiledNMS():
+  IOU_TYPES = {'diou': 0, 'giou': 1, 'ciou': 2, 'iou': 3}
+
+  def __init__(self, iou_type='diou', beta=0.6):
+    '''initialization for all non max supression operations mainly used to 
+    select hyperperamters for the iou type and scaling.
+
+    Args: 
+      iou_type: `str` for the version of IOU to use {diou, giou, ciou, iou}.
+      beta: `float` for the amount to scale regualrization on distance iou.
+    
+    '''
+    self._iou_type = TiledNMS.IOU_TYPES[iou_type]
+    self._beta = beta
+
+  def _self_suppression(self, iou, _, iou_sum):
+    batch_size = tf.shape(iou)[0]
+    can_suppress_others = tf.cast(
+        tf.reshape(tf.reduce_max(iou, 1) <= 0.5, [batch_size, -1, 1]),
+        iou.dtype)
+    iou_suppressed = tf.reshape(
+        tf.cast(tf.reduce_max(can_suppress_others * iou, 1) <= 0.5, iou.dtype),
+        [batch_size, -1, 1]) * iou
+    iou_sum_new = tf.reduce_sum(iou_suppressed, [1, 2])
+    return [
+        iou_suppressed,
+        tf.reduce_any(iou_sum - iou_sum_new > 0.5), iou_sum_new
+    ]
+
+  def _cross_suppression(self, boxes, box_slice, iou_threshold, inner_idx):
+    batch_size = tf.shape(boxes)[0]
+    new_slice = tf.slice(boxes, [0, inner_idx * NMS_TILE_SIZE, 0],
+                         [batch_size, NMS_TILE_SIZE, 4])
+    #iou = box_ops.bbox_overlap(new_slice, box_slice)
+    iou = box_ops.aggregated_comparitive_iou(
+        new_slice, box_slice, beta=self._beta, iou_type=self._iou_type)
+    ret_slice = tf.expand_dims(
+        tf.cast(tf.reduce_all(iou < iou_threshold, [1]), box_slice.dtype),
+        2) * box_slice
+    return boxes, ret_slice, iou_threshold, inner_idx + 1
+
+  def _suppression_loop_body(self, boxes, iou_threshold, output_size, idx):
+    """Process boxes in the range [idx*NMS_TILE_SIZE, (idx+1)*NMS_TILE_SIZE).
+
+    Args:
+      boxes: a tensor with a shape of [batch_size, anchors, 4].
+      iou_threshold: a float representing the threshold for whether boxes
+        overlap too much with respect to IOU.
+      output_size: an int32 tensor of size [batch_size]. Representing the number
+        of selected boxes for each batch.
+      idx: an integer scalar representing induction variable.
+
+    Returns:
+      boxes: updated boxes.
+      iou_threshold: pass down iou_threshold to the next iteration.
+      output_size: the updated output_size.
+      idx: the updated induction variable.
+    """
+    num_tiles = tf.shape(boxes)[1] // NMS_TILE_SIZE
+    batch_size = tf.shape(boxes)[0]
+
+    # Iterates over tiles that can possibly suppress the current tile.
+    box_slice = tf.slice(boxes, [0, idx * NMS_TILE_SIZE, 0],
+                         [batch_size, NMS_TILE_SIZE, 4])
+    _, box_slice, _, _ = tf.while_loop(
+        lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx,
+        self._cross_suppression,
+        [boxes, box_slice, iou_threshold,
+         tf.constant(0)])
+
+    # Iterates over the current tile to compute self-suppression.
+    # iou = box_ops.bbox_overlap(box_slice, box_slice)
+    iou = box_ops.aggregated_comparitive_iou(
+        box_slice, box_slice, beta=self._beta, iou_type=self._iou_type)
+    mask = tf.expand_dims(
+        tf.reshape(tf.range(NMS_TILE_SIZE), [1, -1]) > tf.reshape(
+            tf.range(NMS_TILE_SIZE), [-1, 1]), 0)
+    iou *= tf.cast(tf.logical_and(mask, iou >= iou_threshold), iou.dtype)
+    suppressed_iou, _, _ = tf.while_loop(
+        lambda _iou, loop_condition, _iou_sum: loop_condition,
+        self._self_suppression,
+        [iou, tf.constant(True),
+         tf.reduce_sum(iou, [1, 2])])
+    suppressed_box = tf.reduce_sum(suppressed_iou, 1) > 0
+    box_slice *= tf.expand_dims(1.0 - tf.cast(suppressed_box, box_slice.dtype),
+                                2)
+
+    # Uses box_slice to update the input boxes.
+    mask = tf.reshape(
+        tf.cast(tf.equal(tf.range(num_tiles), idx), boxes.dtype), [1, -1, 1, 1])
+    boxes = tf.tile(tf.expand_dims(
+        box_slice, [1]), [1, num_tiles, 1, 1]) * mask + tf.reshape(
+            boxes, [batch_size, num_tiles, NMS_TILE_SIZE, 4]) * (1 - mask)
+    boxes = tf.reshape(boxes, [batch_size, -1, 4])
+
+    # Updates output_size.
+    output_size += tf.reduce_sum(
+        tf.cast(tf.reduce_any(box_slice > 0, [2]), tf.int32), [1])
+    return boxes, iou_threshold, output_size, idx + 1
+
+  def _sorted_non_max_suppression_padded(self, scores, boxes, max_output_size,
+                                         iou_threshold):
+    """A wrapper that handles non-maximum suppression.
+
+    Assumption:
+      * The boxes are sorted by scores unless the box is a dot (all coordinates
+        are zero).
+      * Boxes with higher scores can be used to suppress boxes with lower 
+        scores.
+
+    The overal design of the algorithm is to handle boxes tile-by-tile:
+
+    boxes = boxes.pad_to_multiply_of(tile_size)
+    num_tiles = len(boxes) // tile_size
+    output_boxes = []
+    for i in range(num_tiles):
+      box_tile = boxes[i*tile_size : (i+1)*tile_size]
+      for j in range(i - 1):
+        suppressing_tile = boxes[j*tile_size : (j+1)*tile_size]
+        iou = bbox_overlap(box_tile, suppressing_tile)
+        # if the box is suppressed in iou, clear it to a dot
+        box_tile *= _update_boxes(iou)
+      # Iteratively handle the diagnal tile.
+      iou = _box_overlap(box_tile, box_tile)
+      iou_changed = True
+      while iou_changed:
+        # boxes that are not suppressed by anything else
+        suppressing_boxes = _get_suppressing_boxes(iou)
+        # boxes that are suppressed by suppressing_boxes
+        suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes)
+        # clear iou to 0 for boxes that are suppressed, as they cannot be used
+        # to suppress other boxes any more
+        new_iou = _clear_iou(iou, suppressed_boxes)
+        iou_changed = (new_iou != iou)
+        iou = new_iou
+      # remaining boxes that can still suppress others, are selected boxes.
+      output_boxes.append(_get_suppressing_boxes(iou))
+      if len(output_boxes) >= max_output_size:
+        break
+
+    Args:
+      scores: a tensor with a shape of [batch_size, anchors].
+      boxes: a tensor with a shape of [batch_size, anchors, 4].
+      max_output_size: a scalar integer `Tensor` representing the maximum number
+        of boxes to be selected by non max suppression.
+      iou_threshold: a float representing the threshold for whether boxes
+        overlap too much with respect to IOU.
+
+    Returns:
+      nms_scores: a tensor with a shape of [batch_size, anchors]. It has same
+        dtype as input scores.
+      nms_proposals: a tensor with a shape of [batch_size, anchors, 4]. It has
+        same dtype as input boxes.
+    """
+    batch_size = tf.shape(boxes)[0]
+    num_boxes = tf.shape(boxes)[1]
+    pad = tf.cast(
+        tf.math.ceil(tf.cast(num_boxes, tf.float32) / NMS_TILE_SIZE),
+        tf.int32) * NMS_TILE_SIZE - num_boxes
+    boxes = tf.pad(tf.cast(boxes, tf.float32), [[0, 0], [0, pad], [0, 0]])
+    scores = tf.pad(
+        tf.cast(scores, tf.float32), [[0, 0], [0, pad]], constant_values=-1)
+    num_boxes += pad
+
+    def _loop_cond(unused_boxes, unused_threshold, output_size, idx):
+      return tf.logical_and(
+          tf.reduce_min(output_size) < max_output_size,
+          idx < num_boxes // NMS_TILE_SIZE)
+
+    selected_boxes, _, output_size, _ = tf.while_loop(
+        _loop_cond, self._suppression_loop_body, [
+            boxes, iou_threshold,
+            tf.zeros([batch_size], tf.int32),
+            tf.constant(0)
+        ])
+    idx = num_boxes - tf.cast(
+        tf.nn.top_k(
+            tf.cast(tf.reduce_any(selected_boxes > 0, [2]), tf.int32) *
+            tf.expand_dims(tf.range(num_boxes, 0, -1), 0), max_output_size)[0],
+        tf.int32)
+    idx = tf.minimum(idx, num_boxes - 1)
+    idx = tf.reshape(
+        idx + tf.reshape(tf.range(batch_size) * num_boxes, [-1, 1]), [-1])
+    boxes = tf.reshape(
+        tf.gather(tf.reshape(boxes, [-1, 4]), idx),
+        [batch_size, max_output_size, 4])
+    boxes = boxes * tf.cast(
+        tf.reshape(tf.range(max_output_size), [1, -1, 1]) < tf.reshape(
+            output_size, [-1, 1, 1]), boxes.dtype)
+    scores = tf.reshape(
+        tf.gather(tf.reshape(scores, [-1, 1]), idx),
+        [batch_size, max_output_size])
+    scores = scores * tf.cast(
+        tf.reshape(tf.range(max_output_size), [1, -1]) < tf.reshape(
+            output_size, [-1, 1]), scores.dtype)
+
+    return scores, boxes
+
+  def _select_top_k_scores(self, scores_in, pre_nms_num_detections):
+    # batch_size, num_anchors, num_class = scores_in.get_shape().as_list()
+    scores_shape = scores_in.get_shape().as_list()  #tf.shape(scores_in)
+    batch_size, num_anchors, num_class = scores_shape[0], scores_shape[
+        1], scores_shape[2]
+    scores_trans = tf.transpose(scores_in, perm=[0, 2, 1])
+    scores_trans = tf.reshape(scores_trans, [-1, num_anchors])
+
+    top_k_scores, top_k_indices = tf.nn.top_k(
+        scores_trans, k=pre_nms_num_detections, sorted=True)
+
+    top_k_scores = tf.reshape(top_k_scores,
+                              [-1, num_class, pre_nms_num_detections])
+    top_k_indices = tf.reshape(top_k_indices,
+                               [-1, num_class, pre_nms_num_detections])
+
+    return tf.transpose(top_k_scores,
+                        [0, 2, 1]), tf.transpose(top_k_indices, [0, 2, 1])
+
+  def complete_nms(self,
+                   boxes,
+                   scores,
+                   pre_nms_top_k=5000,
+                   pre_nms_score_threshold=0.05,
+                   nms_iou_threshold=0.5,
+                   max_num_detections=100):
+    """Generate the final detections given the model outputs.
+
+    This implementation unrolls classes dimension while using the tf.while_loop
+    to implement the batched NMS, so that it can be parallelized at the batch
+    dimension. It should give better performance comparing to v1 implementation.
+    It is TPU compatible.
+
+    Args:
+      boxes: a tensor with shape [batch_size, N, num_classes, 4] or [batch_size,
+        N, 1, 4], which box predictions on all feature levels. The N is the 
+        number of total anchors on all levels.
+      scores: a tensor with shape [batch_size, N, num_classes], which stacks 
+        class probability on all feature levels. The N is the number of total 
+        anchors on all levels. The num_classes is the number of classes the 
+        model predicted. Note that the class_outputs here is the raw score.
+      pre_nms_top_k: an int number of top candidate detections per class
+        before NMS.
+      pre_nms_score_threshold: a float representing the threshold for deciding
+        when to remove boxes based on score.
+      nms_iou_threshold: a float representing the threshold for deciding whether
+        boxes overlap too much with respect to IOU.
+      max_num_detections: a scalar representing maximum number of boxes retained
+        over all classes.
+
+    Returns:
+      nms_boxes: `float` Tensor of shape [batch_size, max_num_detections, 4]
+        representing top detected boxes in [y1, x1, y2, x2].
+      nms_scores: `float` Tensor of shape [batch_size, max_num_detections]
+        representing sorted confidence scores for detected boxes. The values are
+        between [0, 1].
+      nms_classes: `int` Tensor of shape [batch_size, max_num_detections]
+        representing classes for detected boxes.
+      valid_detections: `int` Tensor of shape [batch_size] only the top
+        `valid_detections` boxes are valid detections.
+    """
+    with tf.name_scope('nms'):
+      nmsed_boxes = []
+      nmsed_classes = []
+      nmsed_scores = []
+      valid_detections = []
+      boxes_shape = boxes.get_shape().as_list()
+      batch_size, _, num_classes_for_box, _ = (boxes_shape[0], boxes_shape[1],
+                                               boxes_shape[2], boxes_shape[3])
+
+      scores_shape = scores.get_shape().as_list()
+      _, total_anchors, num_classes = (scores_shape[0], scores_shape[1],
+                                       scores_shape[2])
+
+      scores, indices = self._select_top_k_scores(
+          scores, tf.math.minimum(total_anchors, pre_nms_top_k))
+
+      for i in range(num_classes):
+        boxes_i = boxes[:, :, min(num_classes_for_box - 1, i), :]
+        scores_i = scores[:, :, i]
+        # Obtains pre_nms_top_k before running NMS.
+        boxes_i = tf.gather(boxes_i, indices[:, :, i], batch_dims=1, axis=1)
+
+        # Filter out scores.
+        boxes_i, scores_i = box_utils.filter_boxes_by_scores(
+            boxes_i, scores_i, min_score_threshold=pre_nms_score_threshold)
+
+        (nmsed_scores_i,
+         nmsed_boxes_i) = self._sorted_non_max_suppression_padded(
+             tf.cast(scores_i, tf.float32),
+             tf.cast(boxes_i, tf.float32),
+             max_num_detections,
+             iou_threshold=nms_iou_threshold)
+        nmsed_classes_i = tf.ones_like(nmsed_scores_i, dtype=tf.int32) * i
+
+        #tf.fill([batch_size, max_num_detections], i)
+        nmsed_boxes.append(nmsed_boxes_i)
+        nmsed_scores.append(nmsed_scores_i)
+        nmsed_classes.append(nmsed_classes_i)
+
+    nmsed_boxes = tf.concat(nmsed_boxes, axis=1)
+    nmsed_scores = tf.concat(nmsed_scores, axis=1)
+    nmsed_classes = tf.concat(nmsed_classes, axis=1)
+    nmsed_scores, indices = tf.nn.top_k(
+        nmsed_scores, k=max_num_detections, sorted=True)
+    nmsed_boxes = tf.gather(nmsed_boxes, indices, batch_dims=1, axis=1)
+    nmsed_classes = tf.gather(nmsed_classes, indices, batch_dims=1)
+    valid_detections = tf.reduce_sum(
+        input_tensor=tf.cast(tf.greater(nmsed_scores, -1), tf.int32), axis=1)
+
+    return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections
+
+
+BASE_NMS = TiledNMS(iou_type='iou', beta=0.6)
+
+
+def sorted_non_max_suppression_padded(scores, boxes, max_output_size,
+                                      iou_threshold):
+  """wrapper function to match NMS found in official/vision/beta/ops/nms.py"""
+  return BASE_NMS._sorted_non_max_suppression_padded(scores, boxes,
+                                                     max_output_size,
+                                                     iou_threshold)
+
+
+def sort_drop(objectness, box, classificationsi, k):
+  """This function sorts and drops boxes such that there are only k boxes 
+  sorted by number the objectness or confidence 
+
+  Args: 
+    objectness: a `Tensor` of shape [batch size, N] that needs to be 
+      filtered.
+    box: a `Tensor` of shape [batch size, N, 4] that needs to be filtered.
+    classificationsi: a `Tensor` of shape [batch size, N, num_classes] that 
+      needs to be filtered.
+    k: a `integer` for the maximum number of boxes to keep after filtering
+  
+  Return:
+    objectness: filtered `Tensor` of shape [batch size, k] 
+    boxes: filtered `Tensor` of shape [batch size, k, 4] 
+    classifications: filtered `Tensor` of shape [batch size, k, num_classes] 
+  """
+  # find rhe indexes for the boxes based on the scores
+  objectness, ind = tf.math.top_k(objectness, k=k)
+
+  # build the indexes
+  ind_m = tf.ones_like(ind) * tf.expand_dims(
+      tf.range(0,
+               tf.shape(objectness)[0]), axis=-1)
+  bind = tf.stack([tf.reshape(ind_m, [-1]), tf.reshape(ind, [-1])], axis=-1)
+
+  # gather all the high confidence boxes and classes
+  box = tf.gather_nd(box, bind)
+  classifications = tf.gather_nd(classificationsi, bind)
+
+  # resize and clip the boxes
+  bsize = tf.shape(ind)[0]
+  box = tf.reshape(box, [bsize, k, -1])
+  classifications = tf.reshape(classifications, [bsize, k, -1])
+  return objectness, box, classifications
+
+
+def segment_nms(boxes, classes, confidence, k, iou_thresh):
+  """This is a quick nms that works on very well for small values of k, this 
+  was developed to operate for tflite models as the tiled NMS is far too slow 
+  and typically is not able to compile with tflite. This NMS does not account 
+  for classes, and only works to quickly filter boxes on phones. 
+
+  Args: 
+    boxes: a `Tensor` of shape [batch size, N, 4] that needs to be filtered.
+    classes: a `Tensor` of shape [batch size, N, num_classes] that needs to be 
+      filtered.
+    confidence: a `Tensor` of shape [batch size, N] that needs to be 
+      filtered.
+    k: a `integer` for the maximum number of boxes to keep after filtering
+    iou_thresh: a `float` for the value above which boxes are consdered to be 
+      too similar, the closer to 1.0 the less that gets though. 
+  
+  Return:
+    boxes: filtered `Tensor` of shape [batch size, k, 4]
+    classes: filtered `Tensor` of shape [batch size, k, num_classes] t
+    confidence: filtered `Tensor` of shape [batch size, k] 
+  """
+  mrange = tf.range(k)
+  mask_x = tf.tile(
+      tf.transpose(tf.expand_dims(mrange, axis=-1), perm=[1, 0]), [k, 1])
+  mask_y = tf.tile(tf.expand_dims(mrange, axis=-1), [1, k])
+  mask_diag = tf.expand_dims(mask_x > mask_y, axis=0)
+
+  iou = box_ops.aggregated_comparitive_iou(boxes, iou_type=0)
+
+  # duplicate boxes
+  iou_mask = iou >= iou_thresh
+  iou_mask = tf.logical_and(mask_diag, iou_mask)
+  iou *= tf.cast(iou_mask, iou.dtype)
+
+  can_suppress_others = 1 - tf.cast(
+      tf.reduce_any(iou_mask, axis=-2), boxes.dtype)
+
+  # build a mask of the boxes that need to exit
+  raw = tf.cast(can_suppress_others, boxes.dtype)
+
+  boxes *= tf.expand_dims(raw, axis=-1)
+  confidence *= tf.cast(raw, confidence.dtype)
+  classes *= tf.cast(tf.expand_dims(raw, axis=-1), classes.dtype)
+  return boxes, classes, confidence
+
+
+def nms(boxes,
+        classes,
+        confidence,
+        k,
+        pre_nms_thresh,
+        nms_thresh,
+        prenms_top_k=500):
+  """This is a quick nms that works on very well for small values of k, this 
+  was developed to operate for tflite models as the tiled NMS is far too slow 
+  and typically is not able to compile with tflite. This NMS does not account 
+  for classes, and only works to quickly filter boxes on phones. 
+
+  Args: 
+    boxes: a `Tensor` of shape [batch size, N, 4] that needs to be filtered.
+    classes: a `Tensor` of shape [batch size, N, num_classes] that needs to be 
+      filtered.
+    confidence: a `Tensor` of shape [batch size, N] that needs to be 
+      filtered.
+    k: a `integer` for the maximum number of boxes to keep after filtering
+    nms_thresh: a `float` for the value above which boxes are consdered to be 
+      too similar, the closer to 1.0 the less that gets though. 
+    pre_nms_top_k: an int number of top candidate detections per class
+      before NMS.
+
+  Return:
+    boxes: filtered `Tensor` of shape [batch size, k, 4]
+    classes: filtered `Tensor` of shape [batch size, k, num_classes]
+    confidence: filtered `Tensor` of shape [batch size, k] 
+  """
+
+  # sort the boxes
+  confidence = tf.reduce_max(classes, axis=-1)
+  confidence, boxes, classes = sort_drop(confidence, boxes, classes,
+                                         prenms_top_k)
+
+  # apply non max supression
+  boxes, classes, confidence = segment_nms(boxes, classes, confidence,
+                                           prenms_top_k, nms_thresh)
+
+  # sort the classes of the unspressed boxes
+  class_confidence, class_ind = tf.math.top_k(
+      classes, k=tf.shape(classes)[-1], sorted=True)
+
+  # set low confidence classes to zero
+  mask = tf.fill(
+      tf.shape(class_confidence),
+      tf.cast(pre_nms_thresh, dtype=class_confidence.dtype))
+  mask = tf.math.ceil(tf.nn.relu(class_confidence - mask))
+  class_confidence = tf.cast(class_confidence, mask.dtype) * mask
+  class_ind = tf.cast(class_ind, mask.dtype) * mask
+
+  # sort the classes and take the top_n as an short cut to doing a true
+  # per class NMS
+  top_n = tf.math.minimum(100, tf.shape(classes)[-1])
+  classes = class_ind[..., :top_n]
+  confidence = class_confidence[..., :top_n]
+
+  # reshape and map multiple classes to boxes
+  boxes = tf.expand_dims(boxes, axis=-2)
+  boxes = tf.tile(boxes, [1, 1, top_n, 1])
+
+  shape = tf.shape(boxes)
+  boxes = tf.reshape(boxes, [shape[0], -1, 4])
+  classes = tf.reshape(classes, [shape[0], -1])
+  confidence = tf.reshape(confidence, [shape[0], -1])
+
+  # drop all the low class confidence boxes again
+  confidence, boxes, classes = sort_drop(confidence, boxes, classes, k)
+
+  # mask the boxes classes and scores then toa final reshape before returning
+  mask = tf.fill(
+      tf.shape(confidence), tf.cast(pre_nms_thresh, dtype=confidence.dtype))
+  mask = tf.math.ceil(tf.nn.relu(confidence - mask))
+  confidence = confidence * mask
+  mask = tf.expand_dims(mask, axis=-1)
+  boxes = boxes * mask
+  classes = classes * mask
+
+  classes = tf.squeeze(classes, axis=-1)
+  return boxes, classes, confidence