Add YOLO model

5627de3d · anivegesana · 34e39103 · 5627de3d · 5627de3d · 5627de3d
Commit 5627de3d authored Jul 13, 2021 by anivegesana
4 changed files
--- a/official/vision/beta/projects/yolo/README.md
+++ b/official/vision/beta/projects/yolo/README.md
@@ -17,9 +17,9 @@ repository.
 ## Description
-YOLO v1 the original implementation was released in 2015 providing a 
+YOLO v1 the original implementation was released in 2015 providing a
-groundbreakingalgorithm that would quickly process images and locate objects in 
+ground breaking algorithm that would quickly process images and locate objects
-a single pass through the detector. The original implementation used a
+in a single pass through the detector. The original implementation used a
 backbone derived from state of the art object classifiers of the time, like
 [GoogLeNet](https://arxiv.org/abs/1409.4842) and
 [VGG](https://arxiv.org/abs/1409.1556). More attention was given to the novel
@@ -32,14 +32,14 @@ update and develop this model.
 YOLO v3 and v4 serve as the most up to date and capable versions of the YOLO
 network group. This model uses a custom backbone called Darknet53 that uses
-knowledge gained from the ResNet paper to improve its predictions. The new 
+knowledge gained from the ResNet paper to improve its predictions. The new
-backbone also allows for objects to be detected at multiple scales. As for the 
+backbone also allows for objects to be detected at multiple scales. As for the
-new detection head, the model now predicts the bounding boxes using a set of 
+new detection head, the model now predicts the bounding boxes using a set of
-anchor box priors (Anchor Boxes) as suggestions. Multiscale predictions in 
+anchor box priors (Anchor Boxes) as suggestions. Multiscale predictions in
-combination with Anchor boxes allow for the network to make up to 1000 object 
+combination with Anchor boxes allow for the network to make up to 1000 object
-predictions on a single image. Finally, the new loss function forces the network 
+predictions on a single image. Finally, the new loss function forces the network
 to make better predictions by using Intersection Over Union (IOU) to inform the
-model's confidence rather than relying on the mean squared error for the entire 
+model's confidence rather than relying on the mean squared error for the entire
 output.
@@ -80,5 +80,5 @@ connected to a new, more powerful backbone if a person chose to.
 [![Python 3.8](https://img.shields.io/badge/Python-3.8-3776AB)](https://www.python.org/downloads/release/python-380/)
-DISCLAIMER: this YOLO implementation is still under development. No support 
+DISCLAIMER: this YOLO implementation is still under development. No support
 will be provided during the development phase.
--- a/official/vision/beta/projects/yolo/modeling/layers/detection_generator.py
+++ b/official/vision/beta/projects/yolo/modeling/layers/detection_generator.py
+"""Contains common building blocks for yolo neural networks."""
+import tensorflow as tf
+import tensorflow.keras as ks
+import tensorflow.keras.backend as K
+# from official.vision.beta.projects.yolo.ops import loss_utils
+from official.vision.beta.projects.yolo.ops import box_ops
+# from official.vision.beta.projects.yolo.losses import yolo_loss
+# from official.vision.beta.projects.yolo.ops import nms_ops
+@ks.utils.register_keras_serializable(package='yolo')
+class YoloLayer(ks.Model):
+  def __init__(self,
+               masks,
+               anchors,
+               classes,
+               iou_thresh=0.0,
+               ignore_thresh=0.7,
+               truth_thresh=1.0,
+               nms_thresh=0.6,
+               max_delta=10.0,
+               loss_type='ciou',
+               use_tie_breaker=True,
+               iou_normalizer=1.0,
+               cls_normalizer=1.0,
+               obj_normalizer=1.0,
+               use_scaled_loss=False,
+               darknet = None,
+               pre_nms_points=5000,
+               label_smoothing=0.0,
+               max_boxes=200,
+               new_cords=False,
+               path_scale=None,
+               scale_xy=None,
+               nms_type='greedy',
+               objectness_smooth=False,
+               **kwargs):
+    """
+    parameters for the loss functions used at each detection head output
+    Args:
+      classes: `int` for the number of classes
+      mask: `List[int]` for the output level that this specific model output
+        level
+      anchors: `List[List[int]]` for the anchor boxes that are used in the model
+        at all levels
+      scale_anchors: `int` for how much to scale this level to get the orginal
+        input shape
+      ignore_thresh: `float` for the IOU value over which the loss is not
+        propagated, and a detection is assumed to have been made
+      truth_thresh: `float` for the IOU value over which the loss is propagated
+        despite a detection being made
+      loss_type: `str` for the typeof iou loss to use with in {ciou, diou,
+        giou, iou}
+      iou_normalizer: `float` for how much to scale the loss on the IOU or the
+        boxes
+      cls_normalizer: `float` for how much to scale the loss on the classes
+      obj_normalizer: `float` for how much to scale loss on the detection map
+      objectness_smooth: `float` for how much to smooth the loss on the
+        detection map
+      use_scaled_loss: `bool` for whether to use the scaled loss
+        or the traditional loss
+      label_smoothing: `float` for how much to smooth the loss on the classes
+      new_cords: `bool` for which scaling type to use
+      scale_xy: dictionary `float` values inidcating how far each pixel can see
+        outside of its containment of 1.0. a value of 1.2 indicates there is a
+        20% extended radius around each pixel that this specific pixel can
+        predict values for a center at. the center can range from 0 - value/2
+        to 1 + value/2, this value is set in the yolo filter, and resused here.
+        there should be one value for scale_xy for each level from min_level to
+        max_level
+      max_delta: gradient clipping to apply to the box loss
+      nms_type: "greedy",
+      nms_thresh: 0.6,
+      iou_thresh: 0.213,
+      name=None,
+    Return:
+      loss: `float` for the actual loss
+      box_loss: `float` loss on the boxes used for metrics
+      conf_loss: `float` loss on the confidence used for metrics
+      class_loss: `float` loss on the classes used for metrics
+      avg_iou: `float` metric for the average iou between predictions
+        and ground truth
+      avg_obj: `float` metric for the average confidence of the model
+        for predictions
+      recall50: `float` metric for how accurate the model is
+      precision50: `float` metric for how precise the model is
+    """
+    super().__init__(**kwargs)
+    self._masks = masks
+    self._anchors = anchors
+    self._thresh = iou_thresh
+    self._ignore_thresh = ignore_thresh
+    self._truth_thresh = truth_thresh
+    self._iou_normalizer = iou_normalizer
+    self._cls_normalizer = cls_normalizer
+    self._obj_normalizer = obj_normalizer
+    self._objectness_smooth = objectness_smooth
+    self._nms_thresh = nms_thresh
+    self._max_boxes = max_boxes
+    self._max_delta = max_delta
+    self._classes = classes
+    self._loss_type = loss_type
+    self._use_tie_breaker = use_tie_breaker
+    self._use_scaled_loss = use_scaled_loss
+    self._darknet = darknet
+    self._pre_nms_points = pre_nms_points
+    self._label_smoothing = label_smoothing
+    self._keys = list(masks.keys())
+    self._len_keys = len(self._keys)
+    self._new_cords = new_cords
+    self._path_scale = path_scale or {
+        key: 2**int(key) for key, _ in masks.items()
+    }
+    self._nms_types = {
+        'greedy': 1,
+        'iou': 2,
+        'giou': 3,
+        'ciou': 4,
+        'diou': 5,
+        'class_independent': 6,
+        'weighted_diou': 7
+    }
+    self._nms_type = self._nms_types[nms_type]
+    if self._nms_type >= 2 and self._nms_type <= 5:
+      self._nms = nms_ops.TiledNMS(iou_type=nms_type)
+    self._scale_xy = scale_xy or {key: 1.0 for key, _ in masks.items()}
+    self._generator = {}
+    self._len_mask = {}
+    for key in self._keys:
+      anchors = [self._anchors[mask] for mask in self._masks[key]]
+      self._generator[key] = self.get_generators(anchors, self._path_scale[key],
+                                                 key)
+      self._len_mask[key] = len(self._masks[key])
+    return
+  def get_generators(self, anchors, path_scale, path_key):
+    # anchor_generator = loss_utils.GridGenerator(
+    #     anchors, scale_anchors=path_scale)
+    # return anchor_generator
+    return None
+  def rm_nan_inf(self, x, val=0.0):
+    x = tf.where(tf.math.is_nan(x), tf.cast(val, dtype=x.dtype), x)
+    x = tf.where(tf.math.is_inf(x), tf.cast(val, dtype=x.dtype), x)
+    return x
+  def parse_prediction_path(self, key, inputs):
+    shape_ = tf.shape(inputs)
+    shape = inputs.get_shape().as_list()
+    batchsize, height, width = shape_[0], shape[1], shape[2]
+    generator = self._generator[key]
+    len_mask = self._len_mask[key]
+    scale_xy = self._scale_xy[key]
+    # reshape the yolo output to (batchsize,
+    #                             width,
+    #                             height,
+    #                             number_anchors,
+    #                             remaining_points)
+    data = tf.reshape(inputs, [-1, height, width, len_mask, self._classes + 5])
+    # use the grid generator to get the formatted anchor boxes and grid points
+    # in shape [1, height, width, 2]
+    centers, anchors = generator(height, width, batchsize, dtype=data.dtype)
+    # # tempcode
+    # centers /= tf.cast([width, height], centers.dtype)
+    # anchors /= tf.cast([width, height], anchors.dtype)
+    # split the yolo detections into boxes, object score map, classes
+    boxes, obns_scores, class_scores = tf.split(
+        data, [4, 1, self._classes], axis=-1)
+    # determine the number of classes
+    classes = class_scores.get_shape().as_list()[
+        -1]  #tf.shape(class_scores)[-1]
+    # # configurable to use the new coordinates in scaled Yolo v4 or not
+    # if not self._new_cords[key]:
+    #   # coordinates from scaled yolov4
+    #   _, _, boxes = yolo_loss.get_predicted_box(
+    #       tf.cast(height, data.dtype), tf.cast(width, data.dtype), boxes,
+    #       anchors, centers, scale_xy)
+    # else:
+    #   # coordinates from regular yolov3 - v4
+    #   _, _, boxes = yolo_loss.get_predicted_box_newcords(
+    #       tf.cast(height, data.dtype), tf.cast(width, data.dtype), boxes,
+    #       anchors, centers, scale_xy)
+    boxes = None
+    # convert boxes from yolo(x, y, w. h) to tensorflow(ymin, xmin, ymax, xmax)
+    boxes = box_ops.xcycwh_to_yxyx(boxes)
+    # activate and detection map
+    obns_scores = tf.math.sigmoid(obns_scores)
+    # threshold the detection map
+    obns_mask = tf.cast(obns_scores > self._thresh, obns_scores.dtype)
+    # convert detection map to class detection probabailities
+    class_scores = tf.math.sigmoid(class_scores) * obns_mask * obns_scores
+    class_scores *= tf.cast(class_scores > self._thresh, class_scores.dtype)
+    fill = height * width * len_mask
+    # platten predictions to [batchsize, N, -1] for non max supression
+    boxes = tf.reshape(boxes, [-1, fill, 4])
+    class_scores = tf.reshape(class_scores, [-1, fill, classes])
+    obns_scores = tf.reshape(obns_scores, [-1, fill])
+    return obns_scores, boxes, class_scores
+  def call(self, inputs):
+    boxes = []
+    class_scores = []
+    object_scores = []
+    levels = list(inputs.keys())
+    min_level = int(min(levels))
+    max_level = int(max(levels))
+    # aggregare boxes over each scale
+    for i in range(min_level, max_level + 1):
+      key = str(i)
+      object_scores_, boxes_, class_scores_ = self.parse_prediction_path(
+          key, inputs[key])
+      boxes.append(boxes_)
+      class_scores.append(class_scores_)
+      object_scores.append(object_scores_)
+    # colate all predicitons
+    boxes = tf.concat(boxes, axis=1)
+    object_scores = K.concatenate(object_scores, axis=1)
+    class_scores = K.concatenate(class_scores, axis=1)
+    # # apply nms
+    # if self._nms_type == 7:
+    #   boxes, class_scores, object_scores = nms_ops.non_max_suppression2(
+    #       boxes,
+    #       class_scores,
+    #       object_scores,
+    #       self._max_boxes,
+    #       pre_nms_thresh = self._thresh,
+    #       nms_thresh = self._nms_thresh,
+    #       prenms_top_k=self._pre_nms_points)
+    # elif self._nms_type == 6:
+    #   boxes, class_scores, object_scores = nms_ops.nms(
+    #       boxes,
+    #       class_scores,
+    #       object_scores,
+    #       self._max_boxes,
+    #       self._thresh,
+    #       self._nms_thresh,
+    #       prenms_top_k=self._pre_nms_points)
+    # elif self._nms_type == 1:
+    #   # greedy NMS
+    #   boxes = tf.cast(boxes, dtype=tf.float32)
+    #   class_scores = tf.cast(class_scores, dtype=tf.float32)
+    #   nms_items = tf.image.combined_non_max_suppression(
+    #       tf.expand_dims(boxes, axis=-2),
+    #       class_scores,
+    #       self._pre_nms_points,
+    #       self._max_boxes,
+    #       iou_threshold=self._nms_thresh,
+    #       score_threshold=self._thresh)
+    #   # cast the boxes and predicitons abck to original datatype
+    #   boxes = tf.cast(nms_items.nmsed_boxes, object_scores.dtype)
+    #   class_scores = tf.cast(nms_items.nmsed_classes, object_scores.dtype)
+    #   object_scores = tf.cast(nms_items.nmsed_scores, object_scores.dtype)
+    #
+    # else:
+    #   boxes = tf.cast(boxes, dtype=tf.float32)
+    #   class_scores = tf.cast(class_scores, dtype=tf.float32)
+    #   boxes, confidence, classes, valid = self._nms.complete_nms(
+    #       tf.expand_dims(boxes, axis=-2),
+    #       class_scores,
+    #       pre_nms_top_k=self._pre_nms_points,
+    #       max_num_detections=self._max_boxes,
+    #       nms_iou_threshold=self._nms_thresh,
+    #       pre_nms_score_threshold=self._thresh)
+    #   boxes = tf.cast(boxes, object_scores.dtype)
+    #   class_scores = tf.cast(classes, object_scores.dtype)
+    #   object_scores = tf.cast(confidence, object_scores.dtype)
+    # compute the number of valid detections
+    num_detections = tf.math.reduce_sum(tf.math.ceil(object_scores), axis=-1)
+    # format and return
+    return {
+        'bbox': boxes,
+        'classes': class_scores,
+        'confidence': object_scores,
+        'num_detections': num_detections,
+    }
+  @property
+  def losses(self):
+    """ Generates a dictionary of losses to apply to each path
+    Done in the detection generator because all parameters are the same
+    across both loss and detection generator
+    """
+    # loss_dict = {}
+    # for key in self._keys:
+    #   loss_dict[key] = yolo_loss.Yolo_Loss(
+    #       classes=self._classes,
+    #       anchors=self._anchors,
+    #       darknet=self._darknet,
+    #       truth_thresh=self._truth_thresh[key],
+    #       ignore_thresh=self._ignore_thresh[key],
+    #       loss_type=self._loss_type[key],
+    #       iou_normalizer=self._iou_normalizer[key],
+    #       cls_normalizer=self._cls_normalizer[key],
+    #       obj_normalizer=self._obj_normalizer[key],
+    #       new_cords=self._new_cords[key],
+    #       objectness_smooth=self._objectness_smooth[key],
+    #       use_scaled_loss=self._use_scaled_loss,
+    #       label_smoothing=self._label_smoothing,
+    #       mask=self._masks[key],
+    #       max_delta=self._max_delta[key],
+    #       scale_anchors=self._path_scale[key],
+    #       scale_x_y=self._scale_xy[key])
+    # return loss_dict
+    return None
+  def get_config(self):
+    return {
+        'masks': dict(self._masks),
+        'anchors': [list(a) for a in self._anchors],
+        'thresh': self._thresh,
+        'max_boxes': self._max_boxes,
+    }
--- a/official/vision/beta/projects/yolo/modeling/yolo_model.py
+++ b/official/vision/beta/projects/yolo/modeling/yolo_model.py
+from official.core import registry
+import tensorflow as tf
+import tensorflow.keras as ks
+from typing import *
+from yolo.configs import yolo
+from official.vision.beta.modeling.backbones import factory
+from yolo.modeling.backbones.darknet import build_darknet
+from yolo.modeling.backbones.darknet import Darknet
+from yolo.modeling.decoders.yolo_decoder import YoloDecoder
+from yolo.modeling.heads.yolo_head import YoloHead
+from yolo.modeling.layers.detection_generator import YoloLayer
+# static base Yolo Models that do not require configuration
+# similar to a backbone model id.
+# this is done greatly simplify the model config
+# the structure is as follows. model version, {v3, v4, v#, ... etc}
+# the model config type {regular, tiny, small, large, ... etc}
+YOLO_MODELS = {
+    "v4":
+        dict(
+            regular=dict(
+                embed_spp=False,
+                use_fpn=True,
+                max_level_process_len=None,
+                path_process_len=6),
+            tiny=dict(
+                embed_spp=False,
+                use_fpn=False,
+                max_level_process_len=2,
+                path_process_len=1),
+            csp=dict(
+                embed_spp=False,
+                use_fpn=True,
+                max_level_process_len=None,
+                csp_stack=5,
+                fpn_depth=5,
+                path_process_len=6),
+            csp_large=dict(
+                embed_spp=False,
+                use_fpn=True,
+                max_level_process_len=None,
+                csp_stack=7,
+                fpn_depth=7,
+                path_process_len=8,
+                fpn_filter_scale=2),
+        ),
+    "v3":
+        dict(
+            regular=dict(
+                embed_spp=False,
+                use_fpn=False,
+                max_level_process_len=None,
+                path_process_len=6),
+            tiny=dict(
+                embed_spp=False,
+                use_fpn=False,
+                max_level_process_len=2,
+                path_process_len=1),
+            spp=dict(
+                embed_spp=True,
+                use_fpn=False,
+                max_level_process_len=2,
+                path_process_len=1),
+        ),
+}
+class Yolo(ks.Model):
+  """The YOLO model class."""
+  def __init__(self,
+               backbone=None,
+               decoder=None,
+               head=None,
+               filter=None,
+               **kwargs):
+    """Detection initialization function.
+    Args:
+      backbone: `tf.keras.Model` a backbone network.
+      decoder: `tf.keras.Model` a decoder network.
+      head: `RetinaNetHead`, the RetinaNet head.
+      filter: the detection generator.
+      **kwargs: keyword arguments to be passed.
+    """
+    super(Yolo, self).__init__(**kwargs)
+    self._config_dict = {
+        'backbone': backbone,
+        'decoder': decoder,
+        'head': head,
+        'filter': filter
+    }
+    # model components
+    self._backbone = backbone
+    self._decoder = decoder
+    self._head = head
+    self._filter = filter
+    return
+  def call(self, inputs, training=False):
+    maps = self._backbone(inputs)
+    decoded_maps = self._decoder(maps)
+    raw_predictions = self._head(decoded_maps)
+    if training:
+      return {"raw_output": raw_predictions}
+    else:
+      # Post-processing.
+      predictions = self._filter(raw_predictions)
+      predictions.update({"raw_output": raw_predictions})
+      return predictions
+  @property
+  def backbone(self):
+    return self._backbone
+  @property
+  def decoder(self):
+    return self._decoder
+  @property
+  def head(self):
+    return self._head
+  @property
+  def filter(self):
+    return self._filter
+  def get_config(self):
+    return self._config_dict
+  @classmethod
+  def from_config(cls, config):
+    return cls(**config)
\ No newline at end of file
--- a/official/vision/beta/projects/yolo/ops/nms_ops.py
+++ b/official/vision/beta/projects/yolo/ops/nms_ops.py
-import tensorflow as tf
-from official.vision.beta.ops import box_ops as box_utils
-from official.vision.beta.projects.yolo.ops import box_ops as box_ops
-NMS_TILE_SIZE = 512
-class TiledNMS():
-  IOU_TYPES = {'diou': 0, 'giou': 1, 'ciou': 2, 'iou': 3}
-  def __init__(self, iou_type='diou', beta=0.6):
-    '''Initialization for all non max suppression operations mainly used to
-    select hyperparameters for the iou type and scaling.
-    Args:
-      iou_type: `str` for the version of IOU to use {diou, giou, ciou, iou}.
-      beta: `float` for the amount to scale regularization on distance iou.
-    '''
-    self._iou_type = TiledNMS.IOU_TYPES[iou_type]
-    self._beta = beta
-  def _self_suppression(self, iou, _, iou_sum):
-    batch_size = tf.shape(iou)[0]
-    can_suppress_others = tf.cast(
-        tf.reshape(tf.reduce_max(iou, 1) <= 0.5, [batch_size, -1, 1]),
-        iou.dtype)
-    iou_suppressed = tf.reshape(
-        tf.cast(tf.reduce_max(can_suppress_others * iou, 1) <= 0.5, iou.dtype),
-        [batch_size, -1, 1]) * iou
-    iou_sum_new = tf.reduce_sum(iou_suppressed, [1, 2])
-    return [
-        iou_suppressed,
-        tf.reduce_any(iou_sum - iou_sum_new > 0.5), iou_sum_new
-    ]
-  def _cross_suppression(self, boxes, box_slice, iou_threshold, inner_idx):
-    batch_size = tf.shape(boxes)[0]
-    new_slice = tf.slice(boxes, [0, inner_idx * NMS_TILE_SIZE, 0],
-                         [batch_size, NMS_TILE_SIZE, 4])
-    #iou = box_ops.bbox_overlap(new_slice, box_slice)
-    iou = box_ops.aggregated_comparitive_iou(
-        new_slice, box_slice, beta=self._beta, iou_type=self._iou_type)
-    ret_slice = tf.expand_dims(
-        tf.cast(tf.reduce_all(iou < iou_threshold, [1]), box_slice.dtype),
-        2) * box_slice
-    return boxes, ret_slice, iou_threshold, inner_idx + 1
-  def _suppression_loop_body(self, boxes, iou_threshold, output_size, idx):
-    """Process boxes in the range [idx*NMS_TILE_SIZE, (idx+1)*NMS_TILE_SIZE).
-    Args:
-      boxes: a tensor with a shape of [batch_size, anchors, 4].
-      iou_threshold: a float representing the threshold for whether boxes
-        overlap too much with respect to IOU.
-      output_size: an int32 tensor of size [batch_size]. Representing the number
-        of selected boxes for each batch.
-      idx: an integer scalar representing an induction variable.
-    Returns:
-      boxes: updated boxes.
-      iou_threshold: pass down iou_threshold to the next iteration.
-      output_size: the updated output_size.
-      idx: the updated induction variable.
-    """
-    num_tiles = tf.shape(boxes)[1] // NMS_TILE_SIZE
-    batch_size = tf.shape(boxes)[0]
-    # Iterates over tiles that can possibly suppress the current tile.
-    box_slice = tf.slice(boxes, [0, idx * NMS_TILE_SIZE, 0],
-                         [batch_size, NMS_TILE_SIZE, 4])
-    _, box_slice, _, _ = tf.while_loop(
-        lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx,
-        self._cross_suppression,
-        [boxes, box_slice, iou_threshold,
-         tf.constant(0)])
-    # Iterates over the current tile to compute self-suppression.
-    # iou = box_ops.bbox_overlap(box_slice, box_slice)
-    iou = box_ops.aggregated_comparitive_iou(
-        box_slice, box_slice, beta=self._beta, iou_type=self._iou_type)
-    mask = tf.expand_dims(
-        tf.reshape(tf.range(NMS_TILE_SIZE), [1, -1]) > tf.reshape(
-            tf.range(NMS_TILE_SIZE), [-1, 1]), 0)
-    iou *= tf.cast(tf.logical_and(mask, iou >= iou_threshold), iou.dtype)
-    suppressed_iou, _, _ = tf.while_loop(
-        lambda _iou, loop_condition, _iou_sum: loop_condition,
-        self._self_suppression,
-        [iou, tf.constant(True),
-         tf.reduce_sum(iou, [1, 2])])
-    suppressed_box = tf.reduce_sum(suppressed_iou, 1) > 0
-    box_slice *= tf.expand_dims(1.0 - tf.cast(suppressed_box, box_slice.dtype),
-                                2)
-    # Uses box_slice to update the input boxes.
-    mask = tf.reshape(
-        tf.cast(tf.equal(tf.range(num_tiles), idx), boxes.dtype), [1, -1, 1, 1])
-    boxes = tf.tile(tf.expand_dims(
-        box_slice, [1]), [1, num_tiles, 1, 1]) * mask + tf.reshape(
-            boxes, [batch_size, num_tiles, NMS_TILE_SIZE, 4]) * (1 - mask)
-    boxes = tf.reshape(boxes, [batch_size, -1, 4])
-    # Updates output_size.
-    output_size += tf.reduce_sum(
-        tf.cast(tf.reduce_any(box_slice > 0, [2]), tf.int32), [1])
-    return boxes, iou_threshold, output_size, idx + 1
-  def _sorted_non_max_suppression_padded(self, scores, boxes, max_output_size,
-                                         iou_threshold):
-    """A wrapper that handles non-maximum suppression.
-    Assumption:
-      * The boxes are sorted by scores unless the box is a dot (all coordinates
-        are zero).
-      * Boxes with higher scores can be used to suppress boxes with lower
-        scores.
-    The overall design of the algorithm is to handle boxes tile-by-tile:
-    boxes = boxes.pad_to_multiply_of(tile_size)
-    num_tiles = len(boxes) // tile_size
-    output_boxes = []
-    for i in range(num_tiles):
-      box_tile = boxes[i*tile_size : (i+1)*tile_size]
-      for j in range(i - 1):
-        suppressing_tile = boxes[j*tile_size : (j+1)*tile_size]
-        iou = bbox_overlap(box_tile, suppressing_tile)
-        # if the box is suppressed in iou, clear it to a dot
-        box_tile *= _update_boxes(iou)
-      # Iteratively handle the diagonal tile.
-      iou = _box_overlap(box_tile, box_tile)
-      iou_changed = True
-      while iou_changed:
-        # boxes that are not suppressed by anything else
-        suppressing_boxes = _get_suppressing_boxes(iou)
-        # boxes that are suppressed by suppressing_boxes
-        suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes)
-        # clear iou to 0 for boxes that are suppressed, as they cannot be used
-        # to suppress other boxes any more
-        new_iou = _clear_iou(iou, suppressed_boxes)
-        iou_changed = (new_iou != iou)
-        iou = new_iou
-      # remaining boxes that can still suppress others, are selected boxes.
-      output_boxes.append(_get_suppressing_boxes(iou))
-      if len(output_boxes) >= max_output_size:
-        break
-    Args:
-      scores: a tensor with a shape of [batch_size, anchors].
-      boxes: a tensor with a shape of [batch_size, anchors, 4].
-      max_output_size: a scalar integer `Tensor` representing the maximum number
-        of boxes to be selected by non max suppression.
-      iou_threshold: a float representing the threshold for whether boxes
-        overlap too much with respect to IOU.
-    Returns:
-      nms_scores: a tensor with a shape of [batch_size, anchors]. It has same
-        dtype as input scores.
-      nms_proposals: a tensor with a shape of [batch_size, anchors, 4]. It has
-        same dtype as input boxes.
-    """
-    batch_size = tf.shape(boxes)[0]
-    num_boxes = tf.shape(boxes)[1]
-    pad = tf.cast(
-        tf.math.ceil(tf.cast(num_boxes, tf.float32) / NMS_TILE_SIZE),
-        tf.int32) * NMS_TILE_SIZE - num_boxes
-    boxes = tf.pad(tf.cast(boxes, tf.float32), [[0, 0], [0, pad], [0, 0]])
-    scores = tf.pad(
-        tf.cast(scores, tf.float32), [[0, 0], [0, pad]], constant_values=-1)
-    num_boxes += pad
-    def _loop_cond(unused_boxes, unused_threshold, output_size, idx):
-      return tf.logical_and(
-          tf.reduce_min(output_size) < max_output_size,
-          idx < num_boxes // NMS_TILE_SIZE)
-    selected_boxes, _, output_size, _ = tf.while_loop(
-        _loop_cond, self._suppression_loop_body, [
-            boxes, iou_threshold,
-            tf.zeros([batch_size], tf.int32),
-            tf.constant(0)
-        ])
-    idx = num_boxes - tf.cast(
-        tf.nn.top_k(
-            tf.cast(tf.reduce_any(selected_boxes > 0, [2]), tf.int32) *
-            tf.expand_dims(tf.range(num_boxes, 0, -1), 0), max_output_size)[0],
-        tf.int32)
-    idx = tf.minimum(idx, num_boxes - 1)
-    idx = tf.reshape(
-        idx + tf.reshape(tf.range(batch_size) * num_boxes, [-1, 1]), [-1])
-    boxes = tf.reshape(
-        tf.gather(tf.reshape(boxes, [-1, 4]), idx),
-        [batch_size, max_output_size, 4])
-    boxes = boxes * tf.cast(
-        tf.reshape(tf.range(max_output_size), [1, -1, 1]) < tf.reshape(
-            output_size, [-1, 1, 1]), boxes.dtype)
-    scores = tf.reshape(
-        tf.gather(tf.reshape(scores, [-1, 1]), idx),
-        [batch_size, max_output_size])
-    scores = scores * tf.cast(
-        tf.reshape(tf.range(max_output_size), [1, -1]) < tf.reshape(
-            output_size, [-1, 1]), scores.dtype)
-    return scores, boxes
-  def _select_top_k_scores(self, scores_in, pre_nms_num_detections):
-    # batch_size, num_anchors, num_class = scores_in.get_shape().as_list()
-    scores_shape = scores_in.get_shape().as_list()  #tf.shape(scores_in)
-    batch_size, num_anchors, num_class = scores_shape[0], scores_shape[
-        1], scores_shape[2]
-    scores_trans = tf.transpose(scores_in, perm=[0, 2, 1])
-    scores_trans = tf.reshape(scores_trans, [-1, num_anchors])
-    top_k_scores, top_k_indices = tf.nn.top_k(
-        scores_trans, k=pre_nms_num_detections, sorted=True)
-    top_k_scores = tf.reshape(top_k_scores,
-                              [-1, num_class, pre_nms_num_detections])
-    top_k_indices = tf.reshape(top_k_indices,
-                               [-1, num_class, pre_nms_num_detections])
-    return tf.transpose(top_k_scores,
-                        [0, 2, 1]), tf.transpose(top_k_indices, [0, 2, 1])
-  def complete_nms(self,
-                   boxes,
-                   scores,
-                   pre_nms_top_k=5000,
-                   pre_nms_score_threshold=0.05,
-                   nms_iou_threshold=0.5,
-                   max_num_detections=100):
-    """Generate the final detections given the model outputs.
-    This implementation unrolls classes dimension while using the tf.while_loop
-    to implement the batched NMS, so that it can be parallelized at the batch
-    dimension. It should give better performance compared to v1 implementation.
-    It is TPU compatible.
-    Args:
-      boxes: a tensor with shape [batch_size, N, num_classes, 4] or [batch_size,
-        N, 1, 4], which box predictions on all feature levels. The N is the
-        number of total anchors on all levels.
-      scores: a tensor with shape [batch_size, N, num_classes], which stacks
-        class probability on all feature levels. The N is the number of total
-        anchors on all levels. The num_classes is the number of classes the
-        model predicted. Note that the class_outputs here is the raw score.
-      pre_nms_top_k: an int number of top candidate detections per class
-        before NMS.
-      pre_nms_score_threshold: a float representing the threshold for deciding
-        when to remove boxes based on score.
-      nms_iou_threshold: a float representing the threshold for deciding whether
-        boxes overlap too much with respect to IOU.
-      max_num_detections: a scalar representing maximum number of boxes retained
-        over all classes.
-    Returns:
-      nms_boxes: `float` Tensor of shape [batch_size, max_num_detections, 4]
-        representing top detected boxes in [y1, x1, y2, x2].
-      nms_scores: `float` Tensor of shape [batch_size, max_num_detections]
-        representing sorted confidence scores for detected boxes. The values are
-        between [0, 1].
-      nms_classes: `int` Tensor of shape [batch_size, max_num_detections]
-        representing classes for detected boxes.
-      valid_detections: `int` Tensor of shape [batch_size] only the top
-        `valid_detections` boxes are valid detections.
-    """
-    with tf.name_scope('nms'):
-      nmsed_boxes = []
-      nmsed_classes = []
-      nmsed_scores = []
-      valid_detections = []
-      boxes_shape = boxes.get_shape().as_list()
-      batch_size, _, num_classes_for_box, _ = (boxes_shape[0], boxes_shape[1],
-                                               boxes_shape[2], boxes_shape[3])
-      scores_shape = scores.get_shape().as_list()
-      _, total_anchors, num_classes = (scores_shape[0], scores_shape[1],
-                                       scores_shape[2])
-      scores, indices = self._select_top_k_scores(
-          scores, tf.math.minimum(total_anchors, pre_nms_top_k))
-      for i in range(num_classes):
-        boxes_i = boxes[:, :, min(num_classes_for_box - 1, i), :]
-        scores_i = scores[:, :, i]
-        # Obtains pre_nms_top_k before running NMS.
-        boxes_i = tf.gather(boxes_i, indices[:, :, i], batch_dims=1, axis=1)
-        # Filter out scores.
-        boxes_i, scores_i = box_utils.filter_boxes_by_scores(
-            boxes_i, scores_i, min_score_threshold=pre_nms_score_threshold)
-        (nmsed_scores_i,
-         nmsed_boxes_i) = self._sorted_non_max_suppression_padded(
-             tf.cast(scores_i, tf.float32),
-             tf.cast(boxes_i, tf.float32),
-             max_num_detections,
-             iou_threshold=nms_iou_threshold)
-        nmsed_classes_i = tf.ones_like(nmsed_scores_i, dtype=tf.int32) * i
-        #tf.fill([batch_size, max_num_detections], i)
-        nmsed_boxes.append(nmsed_boxes_i)
-        nmsed_scores.append(nmsed_scores_i)
-        nmsed_classes.append(nmsed_classes_i)
-    nmsed_boxes = tf.concat(nmsed_boxes, axis=1)
-    nmsed_scores = tf.concat(nmsed_scores, axis=1)
-    nmsed_classes = tf.concat(nmsed_classes, axis=1)
-    nmsed_scores, indices = tf.nn.top_k(
-        nmsed_scores, k=max_num_detections, sorted=True)
-    nmsed_boxes = tf.gather(nmsed_boxes, indices, batch_dims=1, axis=1)
-    nmsed_classes = tf.gather(nmsed_classes, indices, batch_dims=1)
-    valid_detections = tf.reduce_sum(
-        input_tensor=tf.cast(tf.greater(nmsed_scores, -1), tf.int32), axis=1)
-    return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections
-BASE_NMS = TiledNMS(iou_type='iou', beta=0.6)
-def sorted_non_max_suppression_padded(scores, boxes, max_output_size,
-                                      iou_threshold):
-  """wrapper function to match NMS found in official/vision/beta/ops/nms.py"""
-  return BASE_NMS._sorted_non_max_suppression_padded(scores, boxes,
-                                                     max_output_size,
-                                                     iou_threshold)
-def sort_drop(objectness, box, classificationsi, k):
-  """This function sorts and drops boxes such that there are only k boxes
-  sorted by number the objectness or confidence
-  Args:
-    objectness: a `Tensor` of shape [batch size, N] that needs to be
-      filtered.
-    box: a `Tensor` of shape [batch size, N, 4] that needs to be filtered.
-    classificationsi: a `Tensor` of shape [batch size, N, num_classes] that
-      needs to be filtered.
-    k: a `integer` for the maximum number of boxes to keep after filtering
-  Return:
-    objectness: filtered `Tensor` of shape [batch size, k]
-    boxes: filtered `Tensor` of shape [batch size, k, 4]
-    classifications: filtered `Tensor` of shape [batch size, k, num_classes]
-  """
-  # find rhe indexes for the boxes based on the scores
-  objectness, ind = tf.math.top_k(objectness, k=k)
-  # build the indexes
-  ind_m = tf.ones_like(ind) * tf.expand_dims(
-      tf.range(0,
-               tf.shape(objectness)[0]), axis=-1)
-  bind = tf.stack([tf.reshape(ind_m, [-1]), tf.reshape(ind, [-1])], axis=-1)
-  # gather all the high confidence boxes and classes
-  box = tf.gather_nd(box, bind)
-  classifications = tf.gather_nd(classificationsi, bind)
-  # resize and clip the boxes
-  bsize = tf.shape(ind)[0]
-  box = tf.reshape(box, [bsize, k, -1])
-  classifications = tf.reshape(classifications, [bsize, k, -1])
-  return objectness, box, classifications
-def segment_nms(boxes, classes, confidence, k, iou_thresh):
-  """This is a quick nms that works on very well for small values of k, this
-  was developed to operate for tflite models as the tiled NMS is far too slow
-  and typically is not able to compile with tflite. This NMS does not account
-  for classes, and only works to quickly filter boxes on phones.
-  Args:
-    boxes: a `Tensor` of shape [batch size, N, 4] that needs to be filtered.
-    classes: a `Tensor` of shape [batch size, N, num_classes] that needs to be
-      filtered.
-    confidence: a `Tensor` of shape [batch size, N] that needs to be
-      filtered.
-    k: a `integer` for the maximum number of boxes to keep after filtering
-    iou_thresh: a `float` for the value above which boxes are considered to be
-      too similar, the closer to 1.0 the less that gets through.
-  Return:
-    boxes: filtered `Tensor` of shape [batch size, k, 4].
-    classes: filtered `Tensor` of shape [batch size, k, num_classes].
-    confidence: filtered `Tensor` of shape [batch size, k].
-  """
-  mrange = tf.range(k)
-  mask_x = tf.tile(
-      tf.transpose(tf.expand_dims(mrange, axis=-1), perm=[1, 0]), [k, 1])
-  mask_y = tf.tile(tf.expand_dims(mrange, axis=-1), [1, k])
-  mask_diag = tf.expand_dims(mask_x > mask_y, axis=0)
-  iou = box_ops.aggregated_comparitive_iou(boxes, iou_type=0)
-  # duplicate boxes
-  iou_mask = iou >= iou_thresh
-  iou_mask = tf.logical_and(mask_diag, iou_mask)
-  iou *= tf.cast(iou_mask, iou.dtype)
-  can_suppress_others = 1 - tf.cast(
-      tf.reduce_any(iou_mask, axis=-2), boxes.dtype)
-  # build a mask of the boxes that need to exit
-  raw = tf.cast(can_suppress_others, boxes.dtype)
-  boxes *= tf.expand_dims(raw, axis=-1)
-  confidence *= tf.cast(raw, confidence.dtype)
-  classes *= tf.cast(tf.expand_dims(raw, axis=-1), classes.dtype)
-  return boxes, classes, confidence
-def nms(boxes,
-        classes,
-        confidence,
-        k,
-        pre_nms_thresh,
-        nms_thresh,
-        prenms_top_k=500):
-  """This is a quick nms that works on very well for small values of k, this
-  was developed to operate for tflite models as the tiled NMS is far too slow
-  and typically is not able to compile with tflite. This NMS does not account
-  for classes, and only works to quickly filter boxes on phones.
-  Args:
-    boxes: a `Tensor` of shape [batch size, N, 4] that needs to be filtered.
-    classes: a `Tensor` of shape [batch size, N, num_classes] that needs to be
-      filtered.
-    confidence: a `Tensor` of shape [batch size, N] that needs to be
-      filtered.
-    k: a `integer` for the maximum number of boxes to keep after filtering
-    nms_thresh: a `float` for the value above which boxes are considered to be
-      too similar, the closer to 1.0 the less that gets through.
-    pre_nms_top_k: an int number of top candidate detections per class
-      before NMS.
-  Return:
-    boxes: filtered `Tensor` of shape [batch size, k, 4].
-    classes: filtered `Tensor` of shape [batch size, k, num_classes].
-    confidence: filtered `Tensor` of shape [batch size, k].
-  """
-  # sort the boxes
-  confidence = tf.reduce_max(classes, axis=-1)
-  confidence, boxes, classes = sort_drop(confidence, boxes, classes,
-                                         prenms_top_k)
-  # apply non max supression
-  boxes, classes, confidence = segment_nms(boxes, classes, confidence,
-                                           prenms_top_k, nms_thresh)
-  # sort the classes of the unspressed boxes
-  class_confidence, class_ind = tf.math.top_k(
-      classes, k=tf.shape(classes)[-1], sorted=True)
-  # set low confidence classes to zero
-  mask = tf.fill(
-      tf.shape(class_confidence),
-      tf.cast(pre_nms_thresh, dtype=class_confidence.dtype))
-  mask = tf.math.ceil(tf.nn.relu(class_confidence - mask))
-  class_confidence = tf.cast(class_confidence, mask.dtype) * mask
-  class_ind = tf.cast(class_ind, mask.dtype) * mask
-  # sort the classes and take the top_n as an short cut to doing a true
-  # per class NMS
-  top_n = tf.math.minimum(100, tf.shape(classes)[-1])
-  classes = class_ind[..., :top_n]
-  confidence = class_confidence[..., :top_n]
-  # reshape and map multiple classes to boxes
-  boxes = tf.expand_dims(boxes, axis=-2)
-  boxes = tf.tile(boxes, [1, 1, top_n, 1])
-  shape = tf.shape(boxes)
-  boxes = tf.reshape(boxes, [shape[0], -1, 4])
-  classes = tf.reshape(classes, [shape[0], -1])
-  confidence = tf.reshape(confidence, [shape[0], -1])
-  # drop all the low class confidence boxes again
-  confidence, boxes, classes = sort_drop(confidence, boxes, classes, k)
-  # mask the boxes classes and scores then toa final reshape before returning
-  mask = tf.fill(
-      tf.shape(confidence), tf.cast(pre_nms_thresh, dtype=confidence.dtype))
-  mask = tf.math.ceil(tf.nn.relu(confidence - mask))
-  confidence = confidence * mask
-  mask = tf.expand_dims(mask, axis=-1)
-  boxes = boxes * mask
-  classes = classes * mask
-  classes = tf.squeeze(classes, axis=-1)
-  return boxes, classes, confidence