Merge branch 'dtk22.04' into 'main'

Dtk22.04 See merge request dcutoolkit/deeplearing/dlexamples_new!49

Merge branch 'dtk22.04' into 'main'
Dtk22.04 See merge request dcutoolkit/deeplearing/dlexamples_new!49
0016b0a7 · sunxx1 · 17bc28d5 · 7a382d5d · 0016b0a7 · 0016b0a7
Commit 0016b0a7 authored Jan 11, 2023 by sunxx1
20 changed files
--- a/Keras/keras-cv/keras_cv/datasets/pascal_voc/__init__.py
+++ b/Keras/keras-cv/keras_cv/datasets/pascal_voc/__init__.py
+# Copyright 2022 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from keras_cv.datasets.pascal_voc.load import load
--- a/Keras/keras-cv/keras_cv/datasets/pascal_voc/load.py
+++ b/Keras/keras-cv/keras_cv/datasets/pascal_voc/load.py
+# Copyright 2022 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tensorflow as tf
+import tensorflow_datasets as tfds
+from tensorflow import keras
+
+from keras_cv import bounding_box
+
+
+def curry_map_function(bounding_box_format, img_size):
+    """Mapping function to create batched image and bbox coordinates"""
+
+    if img_size is not None:
+        resizing = keras.layers.Resizing(
+            height=img_size[0], width=img_size[1], crop_to_aspect_ratio=False
+        )
+
+    # TODO(lukewood): update `keras.layers.Resizing` to support bounding boxes.
+    def apply(inputs):
+        # Support image size none.
+        if img_size is not None:
+            inputs["image"] = resizing(inputs["image"])
+
+        inputs["objects"]["bbox"] = bounding_box.convert_format(
+            inputs["objects"]["bbox"],
+            images=inputs["image"],
+            source="rel_yxyx",
+            target=bounding_box_format,
+        )
+
+        bounding_boxes = inputs["objects"]["bbox"]
+        labels = tf.cast(inputs["objects"]["label"], tf.float32)
+        labels = tf.expand_dims(labels, axis=-1)
+        bounding_boxes = tf.concat([bounding_boxes, labels], axis=-1)
+        return {"images": inputs["image"], "bounding_boxes": bounding_boxes}
+
+    return apply
+
+
+def load(
+    split,
+    bounding_box_format,
+    batch_size=None,
+    shuffle_buffer=None,
+    shuffle_files=True,
+    img_size=None,
+):
+    """Loads the PascalVOC 2007 dataset.
+
+    Usage:
+    ```python
+    dataset, ds_info = keras_cv.datasets.pascal_voc.load(
+        split="train", bounding_box_format="xywh", batch_size=9
+    )
+    ```
+
+    Args:
+        split: the split string passed to the `tensorflow_datasets.load()` call.  Should
+            be one of "train", "test", or "validation."
+        bounding_box_format: the keras_cv bounding box format to load the boxes into.
+            For a list of supported formats, please  Refer
+            [to the keras.io docs](https://keras.io/api/keras_cv/bounding_box/formats/)
+            for more details on supported bounding box formats.
+        batch_size: (Optional) how many instances to include in batches after loading. If
+            not provided, no batching will occur.
+        shuffle_buffer: (Optional) the size of the buffer to use in shuffling.
+        shuffle_files: (Optional) whether or not to shuffle files, defaults to True.
+        img_size: (Optional) size to resize the images to.  By default, images are not
+            resized `tf.RaggedTensor` batches are produced if batching occurs.
+
+    Returns:
+        tf.data.Dataset containing PascalVOC.  Each entry is a dictionary containing
+        keys {"images": images, "bounding_boxes": bounding_boxes} where images is a
+        Tensor of shape [batch, H, W, 3] and bounding_boxes is a `tf.RaggedTensor` of
+        shape [batch, None, 5].
+    """
+    dataset, dataset_info = tfds.load(
+        "voc/2007", split=split, shuffle_files=shuffle_files, with_info=True
+    )
+    dataset = dataset.map(
+        curry_map_function(bounding_box_format=bounding_box_format, img_size=img_size),
+        num_parallel_calls=tf.data.AUTOTUNE,
+    )
+
+    if shuffle_buffer:
+        dataset = dataset.shuffle(shuffle_buffer, reshuffle_each_iteration=True)
+
+    if batch_size is not None:
+        dataset = dataset.apply(
+            tf.data.experimental.dense_to_ragged_batch(batch_size=batch_size)
+        )
+    return dataset, dataset_info
--- a/Keras/keras-cv/keras_cv/datasets/pascal_voc/segmentation.py
+++ b/Keras/keras-cv/keras_cv/datasets/pascal_voc/segmentation.py
+# Copyright 2022 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Data loader for Pascal VOC 2012 segmentation dataset.
+
+The image classification and object detection (bounding box) data is covered by existing
+TF datasets in https://www.tensorflow.org/datasets/catalog/voc. The segmentation data (
+both class segmentation and instance segmentation) are included in the VOC 2012, but not
+offered by TF-DS yet. This module is trying to fill this gap while TFDS team can
+address this feature (b/252870855, https://github.com/tensorflow/datasets/issues/27 and
+https://github.com/tensorflow/datasets/pull/1198).
+
+The schema design is similar to the existing design of TFDS, but trimmed to fit the need
+of Keras CV models.
+
+This module contains following functionalities:
+
+1. Download and unpack original data from Pascal VOC.
+2. Reprocess and build up dataset that include image, class label, object bounding boxes,
+   class and instance segmentation masks.
+3. Produce tfrecords from the dataset.
+4. Load existing tfrecords from result in 3.
+"""
+import logging
+import multiprocessing
+import os.path
+import tarfile
+import xml
+
+import tensorflow as tf
+
+DATA_URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar"
+
+# Note that this list doesn't contain the background class. In the classification use
+# case, the label is 0 based (aeroplane -> 0), whereas in segmentation use case, the 0 is
+# reserved for background, so aeroplane maps to 1.
+CLASSES = [
+    "aeroplane",
+    "bicycle",
+    "bird",
+    "boat",
+    "bottle",
+    "bus",
+    "car",
+    "cat",
+    "chair",
+    "cow",
+    "diningtable",
+    "dog",
+    "horse",
+    "motorbike",
+    "person",
+    "pottedplant",
+    "sheep",
+    "sofa",
+    "train",
+    "tvmonitor",
+]
+# This is used to map between string class to index.
+CLASS_TO_INDEX = {name: index for index, name in enumerate(CLASSES)}
+
+# For the mask data in the PNG file, the encoded raw pixel value need be to converted
+# to the proper class index. In the following map, [0, 0, 0] will be convert to 0, and
+# [128, 0, 0] will be conveted to 1, so on so forth. Also note that the mask class is 1
+# base since class 0 is reserved for the background. The [128, 0, 0] (class 1) is mapped
+# to `aeroplane`.
+VOC_PNG_COLOR_VALUE = [
+    [0, 0, 0],
+    [128, 0, 0],
+    [0, 128, 0],
+    [128, 128, 0],
+    [0, 0, 128],
+    [128, 0, 128],
+    [0, 128, 128],
+    [128, 128, 128],
+    [64, 0, 0],
+    [192, 0, 0],
+    [64, 128, 0],
+    [192, 128, 0],
+    [64, 0, 128],
+    [192, 0, 128],
+    [64, 128, 128],
+    [192, 128, 128],
+    [0, 64, 0],
+    [128, 64, 0],
+    [0, 192, 0],
+    [128, 192, 0],
+    [0, 64, 128],
+]
+# Will be populated by _maybe_populate_voc_color_mapping() below.
+VOC_PNG_COLOR_MAPPING = None
+
+
+def _maybe_populate_voc_color_mapping():
+    # Lazy creation of VOC_PNG_COLOR_MAPPING, which could take 64M memory.
+    global VOC_PNG_COLOR_MAPPING
+    if VOC_PNG_COLOR_MAPPING is None:
+        VOC_PNG_COLOR_MAPPING = [0] * (256**3)
+        for i, colormap in enumerate(VOC_PNG_COLOR_VALUE):
+            VOC_PNG_COLOR_MAPPING[
+                (colormap[0] * 256 + colormap[1]) * 256 + colormap[2]
+            ] = i
+        # There is a special mapping with [224, 224, 192] -> 255
+        VOC_PNG_COLOR_MAPPING[224 * 256 * 256 + 224 * 256 + 192] = 255
+        VOC_PNG_COLOR_MAPPING = tf.constant(VOC_PNG_COLOR_MAPPING)
+    return VOC_PNG_COLOR_MAPPING
+
+
+def _download_pascal_voc_2012(data_url, local_dir_path=None, override_extract=False):
+    """Fetch the original Pascal VOC 2012 from remote URL.
+
+    Args:
+        data_url: string, the URL for the Pascal VOC data, should be in a tar package.
+        local_dir_path: string, the local directory path to save the data.
+    Returns:
+        the path to the folder of extracted Pascal VOC data.
+    """
+    if not local_dir_path:
+        fname = "pascal_voc_2012/data.tar"
+    else:
+        # Make sure the directory exists
+        if not os.path.exists(local_dir_path):
+            os.makedirs(local_dir_path, exist_ok=True)
+        fname = os.path.join(local_dir_path, "data.tar")
+    data_file_path = tf.keras.utils.get_file(fname=fname, origin=data_url)
+    logging.info("Received data file from %s", data_file_path)
+    # Extra the data into the same directory as the tar file.
+    data_directory = os.path.dirname(data_file_path)
+    # Note that the extracted data will be located in a folder `VOCdevkit` (from tar).
+    # If the folder is already there and `override_extract` is False, then we will skip
+    # extracting the folder again.
+    if override_extract or not os.path.exists(
+        os.path.join(data_directory, "VOCdevkit")
+    ):
+        logging.info("Extract data into %s", data_directory)
+        with tarfile.open(data_file_path) as f:
+            f.extractall(data_directory)
+    return os.path.join(data_directory, "VOCdevkit", "VOC2012")
+
+
+def _parse_annotation_data(annotation_file_path):
+    """Parse the annotation XML file for the image.
+
+    The annotation contains the metadata, as well as the object bounding box information.
+
+    """
+    with tf.io.gfile.GFile(annotation_file_path, "r") as f:
+        root = xml.etree.ElementTree.parse(f).getroot()
+
+        size = root.find("size")
+        width = int(size.find("width").text)
+        height = int(size.find("height").text)
+
+        objects = []
+        for obj in root.findall("object"):
+            # Get object's label name.
+            label = CLASS_TO_INDEX[obj.find("name").text.lower()]
+            # Get objects' pose name.
+            pose = obj.find("pose").text.lower()
+            is_truncated = obj.find("truncated").text == "1"
+            is_difficult = obj.find("difficult").text == "1"
+            bndbox = obj.find("bndbox")
+            xmax = int(bndbox.find("xmax").text)
+            xmin = int(bndbox.find("xmin").text)
+            ymax = int(bndbox.find("ymax").text)
+            ymin = int(bndbox.find("ymin").text)
+            objects.append(
+                {
+                    "label": label,
+                    "pose": pose,
+                    "bbox": [ymin, xmin, ymax, xmax],
+                    "is_truncated": is_truncated,
+                    "is_difficult": is_difficult,
+                }
+            )
+
+        return {"width": width, "height": height, "objects": objects}
+
+
+def _get_image_ids(data_dir, split):
+    data_file_mapping = {"train": "train.txt", "eval": "val.txt", None: "trainval.txt"}
+    with tf.io.gfile.GFile(
+        os.path.join(data_dir, "ImageSets", "Segmentation", data_file_mapping[split]),
+        "r",
+    ) as f:
+        image_ids = f.read().splitlines()
+        logging.info(f"Received {len(image_ids)} images for {split} dataset.")
+        return image_ids
+
+
+def _parse_single_image(image_file_path):
+    data_dir, image_file_name = os.path.split(image_file_path)
+    data_dir = os.path.normpath(os.path.join(data_dir, os.path.pardir))
+    image_id, _ = os.path.splitext(image_file_name)
+    class_segmentation_file_path = os.path.join(
+        data_dir, "SegmentationClass", image_id + ".png"
+    )
+    object_segmentation_file_path = os.path.join(
+        data_dir, "SegmentationObject", image_id + ".png"
+    )
+    annotation_file_path = os.path.join(data_dir, "Annotations", image_id + ".xml")
+    image_annotations = _parse_annotation_data(annotation_file_path)
+
+    result = {
+        "image/filename": image_id + ".jpg",
+        "image/file_path": image_file_path,
+        "segmentation/class/file_path": class_segmentation_file_path,
+        "segmentation/object/file_path": object_segmentation_file_path,
+    }
+    result.update(image_annotations)
+    # Labels field should be same as the 'object.label'
+    labels = list(set([o["label"] for o in result["objects"]]))
+    result["labels"] = sorted(labels)
+    return result
+
+
+def _build_metadata(data_dir, image_ids):
+    # Parallel process all the images.
+    image_file_paths = [
+        os.path.join(data_dir, "JPEGImages", i + ".jpg") for i in image_ids
+    ]
+    pool_size = 10 if len(image_ids) > 10 else len(image_ids)
+    with multiprocessing.Pool(pool_size) as p:
+        metadata = p.map(_parse_single_image, image_file_paths)
+
+    # Transpose the metadata which convert from list of dict to dict of list.
+    keys = [
+        "image/filename",
+        "image/file_path",
+        "segmentation/class/file_path",
+        "segmentation/object/file_path",
+        "labels",
+        "width",
+        "height",
+    ]
+    result = {}
+    for key in keys:
+        values = [value[key] for value in metadata]
+        result[key] = values
+
+    # The ragged objects need some special handling
+    for key in ["label", "pose", "bbox", "is_truncated", "is_difficult"]:
+        values = []
+        objects = [value["objects"] for value in metadata]
+        for object in objects:
+            values.append([o[key] for o in object])
+        result["objects/" + key] = values
+    return result
+
+
+# With jit_compile=True, there will be 0.4 sec compilation overhead, but save about 0.2
+# sec per 1000 images. See https://github.com/keras-team/keras-cv/pull/943#discussion_r1001092882
+# for more details.
+@tf.function(jit_compile=True)
+def _decode_png_mask(mask):
+    """Decode the raw PNG image and convert it to 2D tensor with probably class."""
+    # Cast the mask to int32 since the original uint8 will overflow when multiple with 256
+    mask = tf.cast(mask, tf.int32)
+    mask = mask[:, :, 0] * 256 * 256 + mask[:, :, 1] * 256 + mask[:, :, 2]
+    mask = tf.expand_dims(tf.gather(VOC_PNG_COLOR_MAPPING, mask), -1)
+    mask = tf.cast(mask, tf.uint8)
+    return mask
+
+
+def _load_images(example):
+    image_file_path = example.pop("image/file_path")
+    segmentation_class_file_path = example.pop("segmentation/class/file_path")
+    segmentation_object_file_path = example.pop("segmentation/object/file_path")
+    image = tf.io.read_file(image_file_path)
+    image = tf.image.decode_jpeg(image)
+
+    segmentation_class_mask = tf.io.read_file(segmentation_class_file_path)
+    segmentation_class_mask = tf.image.decode_png(segmentation_class_mask)
+    segmentation_class_mask = _decode_png_mask(segmentation_class_mask)
+
+    segmentation_object_mask = tf.io.read_file(segmentation_object_file_path)
+    segmentation_object_mask = tf.image.decode_png(segmentation_object_mask)
+    segmentation_object_mask = _decode_png_mask(segmentation_object_mask)
+
+    example.update(
+        {
+            "image": image,
+            "class_segmentation": segmentation_class_mask,
+            "object_segmentation": segmentation_object_mask,
+        }
+    )
+    return example
+
+
+def _build_dataset_from_metadata(metadata):
+    # The objects need some manual conversion to ragged tensor.
+    metadata["labels"] = tf.ragged.constant(metadata["labels"])
+    metadata["objects/label"] = tf.ragged.constant(metadata["objects/label"])
+    metadata["objects/pose"] = tf.ragged.constant(metadata["objects/pose"])
+    metadata["objects/is_truncated"] = tf.ragged.constant(
+        metadata["objects/is_truncated"]
+    )
+    metadata["objects/is_difficult"] = tf.ragged.constant(
+        metadata["objects/is_difficult"]
+    )
+    metadata["objects/bbox"] = tf.ragged.constant(
+        metadata["objects/bbox"], ragged_rank=1
+    )
+
+    dataset = tf.data.Dataset.from_tensor_slices(metadata)
+    dataset = dataset.map(_load_images, num_parallel_calls=tf.data.AUTOTUNE)
+    return dataset
+
+
+def load(
+    split="train",
+    data_dir=None,
+):
+    """Load the Pacal VOC 2012 dataset.
+
+    This function will download the data tar file from remote if needed, and untar to
+    the local `data_dir`, and build dataset from it.
+
+    Args:
+        split: string, can be 'train', 'eval', or None. When None, both train and eval data
+            will be loaded. Default to `train`
+        data_dir: string, local directory path for the loaded data. This will be used to
+            download the data file, and unzip. It will be used as a cach directory.
+            Default to None, and `~/.keras/pascal_voc_2012` will be used.
+    """
+    supported_split_value = ["train", "eval", None]
+    if split not in supported_split_value:
+        raise ValueError(
+            f"The support value for `split` are {supported_split_value}. "
+            f"Got: {split}"
+        )
+
+    if data_dir is not None:
+        data_dir = os.path.expanduser(data_dir)
+
+    data_dir = _download_pascal_voc_2012(DATA_URL, local_dir_path=data_dir)
+    image_ids = _get_image_ids(data_dir, split)
+    metadata = _build_metadata(data_dir, image_ids)
+    _maybe_populate_voc_color_mapping()
+    dataset = _build_dataset_from_metadata(metadata)
+
+    return dataset
--- a/Keras/keras-cv/keras_cv/datasets/pascal_voc/segmentation_test.py
+++ b/Keras/keras-cv/keras_cv/datasets/pascal_voc/segmentation_test.py
+# Copyright 2022 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import pathlib
+import sys
+
+import tensorflow as tf
+from absl import flags
+
+from keras_cv.datasets.pascal_voc import segmentation
+
+
+class PascalVocSegmentationDataTest(tf.test.TestCase):
+    def setUp(self):
+        super().setUp()
+        self.tempdir = self.get_tempdir()
+        # Note that this will not work with bazel, need to be rewrite into relying on
+        # FLAGS.test_srcdir
+        self.test_data_tar_path = os.path.abspath(
+            os.path.join(
+                os.path.abspath(__file__), os.path.pardir, "test_data", "VOC_mini.tar"
+            )
+        )
+
+    def get_tempdir(self):
+        try:
+            flags.FLAGS.test_tmpdir
+        except flags.UnparsedFlagAccessError:
+            # Need to initialize flags when running `pytest`.
+            flags.FLAGS(sys.argv, known_only=True)
+        return self.create_tempdir().full_path
+
+    def test_download_data(self):
+        # Since the original data package is too large, we use a small package as a
+        # replacement.
+        local_data_dir = os.path.join(self.tempdir, "pascal_voc_2012/")
+        test_data_dir = segmentation._download_pascal_voc_2012(
+            data_url=pathlib.Path(self.test_data_tar_path).as_uri(),
+            local_dir_path=local_data_dir,
+        )
+
+        self.assertTrue(os.path.exists(test_data_dir))
+        # Make sure the data is unzipped correctly and populated with correct content
+        expected_subdirs = [
+            "Annotations",
+            "ImageSets",
+            "JPEGImages",
+            "SegmentationClass",
+            "SegmentationObject",
+        ]
+        for sub_dir in expected_subdirs:
+            self.assertTrue(os.path.exists(os.path.join(test_data_dir, sub_dir)))
+
+    def test_skip_download_and_override(self):
+        local_data_dir = os.path.join(self.tempdir, "pascal_voc_2012/")
+        test_data_dir = segmentation._download_pascal_voc_2012(
+            data_url=pathlib.Path(self.test_data_tar_path).as_uri(),
+            local_dir_path=local_data_dir,
+        )
+
+        # Touch a file in the test_data_dir and make sure it exists (not being override)
+        # when invoke the _download_pascal_voc_2012 again
+        os.makedirs(os.path.join(test_data_dir, "Annotations", "dummy_dir"))
+        segmentation._download_pascal_voc_2012(
+            data_url=pathlib.Path(self.test_data_tar_path).as_uri(),
+            local_dir_path=local_data_dir,
+            override_extract=False,
+        )
+        self.assertTrue(
+            os.path.exists(os.path.join(test_data_dir, "Annotations", "dummy_dir"))
+        )
+
+    def test_get_image_ids(self):
+        local_data_dir = os.path.join(self.tempdir, "pascal_voc_2012/")
+        data_dir = segmentation._download_pascal_voc_2012(
+            data_url=pathlib.Path(self.test_data_tar_path).as_uri(),
+            local_dir_path=local_data_dir,
+        )
+        train_ids = ["2007_000032", "2007_000039", "2007_000063"]
+        eval_ids = ["2007_000033"]
+        train_eval_ids = train_ids + eval_ids
+        self.assertEquals(segmentation._get_image_ids(data_dir, "train"), train_ids)
+        self.assertEquals(segmentation._get_image_ids(data_dir, "eval"), eval_ids)
+        self.assertEquals(segmentation._get_image_ids(data_dir, None), train_eval_ids)
+
+    def test_parse_annotation_file(self):
+        local_data_dir = os.path.join(self.tempdir, "pascal_voc_2012/")
+        data_dir = segmentation._download_pascal_voc_2012(
+            data_url=pathlib.Path(self.test_data_tar_path).as_uri(),
+            local_dir_path=local_data_dir,
+        )
+        # One of the train file.
+        annotation_file = os.path.join(data_dir, "Annotations", "2007_000032.xml")
+        metadata = segmentation._parse_annotation_data(annotation_file)
+        expected_result = {
+            "height": 281,
+            "width": 500,
+            "objects": [
+                {
+                    "label": 0,
+                    "pose": "frontal",
+                    "bbox": [78, 104, 183, 375],
+                    "is_truncated": False,
+                    "is_difficult": False,
+                },
+                {
+                    "label": 0,
+                    "pose": "left",
+                    "bbox": [88, 133, 123, 197],
+                    "is_truncated": False,
+                    "is_difficult": False,
+                },
+                {
+                    "label": 14,
+                    "pose": "rear",
+                    "bbox": [180, 195, 229, 213],
+                    "is_truncated": False,
+                    "is_difficult": False,
+                },
+                {
+                    "label": 14,
+                    "pose": "rear",
+                    "bbox": [189, 26, 238, 44],
+                    "is_truncated": False,
+                    "is_difficult": False,
+                },
+            ],
+        }
+        self.assertEquals(metadata, expected_result)
+
+    def test_decode_png_mask(self):
+        local_data_dir = os.path.join(self.tempdir, "pascal_voc_2012/")
+        data_dir = segmentation._download_pascal_voc_2012(
+            data_url=pathlib.Path(self.test_data_tar_path).as_uri(),
+            local_dir_path=local_data_dir,
+        )
+        mask_file = os.path.join(data_dir, "SegmentationClass", "2007_000032.png")
+        mask = tf.io.decode_png(tf.io.read_file(mask_file))
+        segmentation._maybe_populate_voc_color_mapping()
+        mask = segmentation._decode_png_mask(mask)
+
+        self.assertEquals(mask.shape, (281, 500, 1))
+        self.assertEquals(tf.reduce_max(mask), 255)  # The 255 value is for the boundary
+        self.assertEquals(tf.reduce_min(mask), 0)  # The 0 value is for the background
+        # The mask contains two classes, 1 and 15, see the label section in the previous
+        # test case.
+        self.assertEquals(tf.reduce_sum(tf.cast(tf.equal(mask, 1), tf.int32)), 4734)
+        self.assertEquals(tf.reduce_sum(tf.cast(tf.equal(mask, 15), tf.int32)), 866)
+
+    def test_parse_single_image(self):
+        local_data_dir = os.path.join(self.tempdir, "pascal_voc_2012/")
+        data_dir = segmentation._download_pascal_voc_2012(
+            data_url=pathlib.Path(self.test_data_tar_path).as_uri(),
+            local_dir_path=local_data_dir,
+        )
+        image_file = os.path.join(data_dir, "JPEGImages", "2007_000032.jpg")
+        result_dict = segmentation._parse_single_image(image_file)
+        expected_result = {
+            "image/filename": "2007_000032.jpg",
+            "image/file_path": image_file,
+            "height": 281,
+            "width": 500,
+            "objects": [
+                {
+                    "label": 0,
+                    "pose": "frontal",
+                    "bbox": [78, 104, 183, 375],
+                    "is_truncated": False,
+                    "is_difficult": False,
+                },
+                {
+                    "label": 0,
+                    "pose": "left",
+                    "bbox": [88, 133, 123, 197],
+                    "is_truncated": False,
+                    "is_difficult": False,
+                },
+                {
+                    "label": 14,
+                    "pose": "rear",
+                    "bbox": [180, 195, 229, 213],
+                    "is_truncated": False,
+                    "is_difficult": False,
+                },
+                {
+                    "label": 14,
+                    "pose": "rear",
+                    "bbox": [189, 26, 238, 44],
+                    "is_truncated": False,
+                    "is_difficult": False,
+                },
+            ],
+            "labels": [0, 14],
+            "segmentation/class/file_path": os.path.join(
+                data_dir, "SegmentationClass", "2007_000032.png"
+            ),
+            "segmentation/object/file_path": os.path.join(
+                data_dir, "SegmentationObject", "2007_000032.png"
+            ),
+        }
+        self.assertEquals(result_dict, expected_result)
+
+    def test_build_metadata(self):
+        local_data_dir = os.path.join(self.tempdir, "pascal_voc_2012/")
+        data_dir = segmentation._download_pascal_voc_2012(
+            data_url=pathlib.Path(self.test_data_tar_path).as_uri(),
+            local_dir_path=local_data_dir,
+        )
+        image_ids = segmentation._get_image_ids(data_dir, None)
+        metadata = segmentation._build_metadata(data_dir, image_ids)
+
+        self.assertEquals(
+            metadata["image/filename"],
+            [
+                "2007_000032.jpg",
+                "2007_000039.jpg",
+                "2007_000063.jpg",
+                "2007_000033.jpg",
+            ],
+        )
+        expected_keys = [
+            "image/filename",
+            "image/file_path",
+            "segmentation/class/file_path",
+            "segmentation/object/file_path",
+            "labels",
+            "width",
+            "height",
+            "objects/label",
+            "objects/pose",
+            "objects/bbox",
+            "objects/is_truncated",
+            "objects/is_difficult",
+        ]
+        for key in expected_keys:
+            self.assertLen(metadata[key], 4)
+
+    def test_build_dataset(self):
+        local_data_dir = os.path.join(self.tempdir, "pascal_voc_2012/")
+        data_dir = segmentation._download_pascal_voc_2012(
+            data_url=pathlib.Path(self.test_data_tar_path).as_uri(),
+            local_dir_path=local_data_dir,
+        )
+        image_ids = segmentation._get_image_ids(data_dir, None)
+        metadata = segmentation._build_metadata(data_dir, image_ids)
+        segmentation._maybe_populate_voc_color_mapping()
+        dataset = segmentation._build_dataset_from_metadata(metadata)
+
+        entry = next(dataset.take(1).as_numpy_iterator())
+        self.assertEquals(entry["image/filename"], b"2007_000032.jpg")
+        expected_keys = [
+            "image",
+            "image/filename",
+            "labels",
+            "width",
+            "height",
+            "objects/label",
+            "objects/pose",
+            "objects/bbox",
+            "objects/is_truncated",
+            "objects/is_difficult",
+            "class_segmentation",
+            "object_segmentation",
+        ]
+        for key in expected_keys:
+            self.assertIn(key, entry)
+
+        # Check the mask png content
+        png = entry["class_segmentation"]
+        self.assertEquals(png.shape, (281, 500, 1))
+        self.assertEquals(tf.reduce_max(png), 255)  # The 255 value is for the boundary
+        self.assertEquals(tf.reduce_min(png), 0)  # The 0 value is for the background
+        # The mask contains two classes, 1 and 15, see the label section in the previous
+        # test case.
+        self.assertEquals(tf.reduce_sum(tf.cast(tf.equal(png, 1), tf.int32)), 4734)
+        self.assertEquals(tf.reduce_sum(tf.cast(tf.equal(png, 15), tf.int32)), 866)
--- a/Keras/keras-cv/keras_cv/datasets/pascal_voc/test_data/VOC_mini.tar
+++ b/Keras/keras-cv/keras_cv/datasets/pascal_voc/test_data/VOC_mini.tar
--- a/Keras/keras-cv/keras_cv/keypoint/__init__.py
+++ b/Keras/keras-cv/keras_cv/keypoint/__init__.py
+# Copyright 2022 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from keras_cv.keypoint.converters import convert_format
+from keras_cv.keypoint.formats import REL_XY
+from keras_cv.keypoint.formats import XY
+from keras_cv.keypoint.utils import filter_out_of_image
--- a/Keras/keras-cv/keras_cv/keypoint/converters.py
+++ b/Keras/keras-cv/keras_cv/keypoint/converters.py
+# Copyright 2022 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Converter functions for working with keypoints formats."""
+
+import tensorflow as tf
+
+
+# Internal exception
+class _RequiresImagesException(Exception):
+    pass
+
+
+def _rel_xy_to_xy(keypoints, images=None):
+    if images is None:
+        raise _RequiresImagesException()
+    shape = tf.cast(tf.shape(images), keypoints.dtype)
+    h, w = shape[1], shape[2]
+
+    x, y, rest = tf.split(keypoints, [1, 1, keypoints.shape[-1] - 2], axis=-1)
+
+    return tf.concat([x * w, y * h, rest], axis=-1)
+
+
+def _xy_to_rel_xy(keypoints, images=None):
+    if images is None:
+        raise _RequiresImagesException()
+    shape = tf.cast(tf.shape(images), keypoints.dtype)
+    h, w = shape[1], shape[2]
+
+    x, y, rest = tf.split(keypoints, [1, 1, keypoints.shape[-1] - 2], axis=-1)
+
+    return tf.concat([x / w, y / h, rest], axis=-1)
+
+
+def _xy_noop(keypoints, images=None):
+    return keypoints
+
+
+TO_XY_CONVERTERS = {
+    "xy": _xy_noop,
+    "rel_xy": _rel_xy_to_xy,
+}
+
+FROM_XY_CONVERTERS = {
+    "xy": _xy_noop,
+    "rel_xy": _xy_to_rel_xy,
+}
+
+
+def convert_format(keypoints, source, target, images=None, dtype=None):
+    """Converts keypoints from one format to another.
+
+    Supported formats are:
+    - `"xy"`, absolute pixel positions.
+    - `"rel_xyxy"`.  relative pixel positions.
+
+    Formats are case insensitive.  It is recommended that you
+    capitalize width and height to maximize the visual difference
+    between `"xyWH"` and `"xyxy"`.
+
+    Relative formats, abbreviated `rel`, make use of the shapes of the
+    `images` passsed.  In these formats, the coordinates, widths, and
+    heights are all specified as percentages of the host image.
+    `images` may be a ragged Tensor.  Note that using a ragged Tensor
+    for images may cause a substantial performance loss, as each image
+    will need to be processed separately due to the mismatching image
+    shapes.
+
+    Usage:
+
+    ```python
+    images, keypoints = load_my_dataset()
+    keypoints_in_rel = keras_cv.keypoint.convert_format(
+        keypoint,
+        source='xy',
+        target='rel_xy',
+        images=images,
+    )
+    ```
+
+    Args:
+        keypoints: tf.Tensor or tf.RaggedTensor representing keypoints
+            in the format specified in the `source` parameter.
+            `keypoints` can optionally have extra dimensions stacked
+            on the final axis to store metadata.  keypoints should
+            have a rank between 2 and 4, with the shape
+            `[num_boxes,*]`, `[batch_size, num_boxes, *]` or
+            `[batch_size, num_groups, num_keypoints,*]`.
+        source: One of {" ".join([f'"{f}"' for f in
+            TO_XY_CONVERTERS.keys()])}.  Used to specify the original
+            format of the `boxes` parameter.
+        target: One of {" ".join([f'"{f}"' for f in
+            TO_XY_CONVERTERS.keys()])}.  Used to specify the
+            destination format of the `boxes` parameter.
+        images: (Optional) a batch of images aligned with `boxes` on
+            the first axis.  Should be rank 3 (`HWC` format) or 4
+            (`BHWC` format). Used in some converters to compute
+            relative pixel values of the bounding box dimensions.
+            Required when transforming from a rel format to a non-rel
+            format.
+        dtype: the data type to use when transforming the boxes.
+            Defaults to None, i.e. `keypoints` dtype.
+    """
+
+    source = source.lower()
+    target = target.lower()
+    if source not in TO_XY_CONVERTERS:
+        raise ValueError(
+            f"convert_format() received an unsupported format for the argument "
+            f"`source`. `source` should be one of {TO_XY_CONVERTERS.keys()}. "
+            f"Got source={source}"
+        )
+    if target not in FROM_XY_CONVERTERS:
+        raise ValueError(
+            f"convert_format() received an unsupported format for the argument "
+            f"`target`. `target` should be one of {FROM_XY_CONVERTERS.keys()}. "
+            f"Got target={target}"
+        )
+
+    if dtype:
+        keypoints = tf.cast(keypoints, dtype)
+
+    if source == target:
+        return keypoints
+
+    keypoints, images, squeeze_axis = _format_inputs(keypoints, images)
+
+    try:
+        in_xy = TO_XY_CONVERTERS[source](keypoints, images=images)
+        result = FROM_XY_CONVERTERS[target](in_xy, images=images)
+    except _RequiresImagesException:
+        raise ValueError(
+            "convert_format() must receive `images` when transforming "
+            f"between relative and absolute formats. "
+            f"convert_format() received source=`{source}`, target=`{target}`, "
+            f"but images={images}"
+        )
+
+    return _format_outputs(result, squeeze_axis)
+
+
+def _format_inputs(keypoints, images):
+    keypoints_rank = len(keypoints.shape)
+    if keypoints_rank > 4:
+        raise ValueError(
+            "Expected keypoints rank to be in [2, 4], got "
+            f"len(keypoints.shape)={keypoints_rank}."
+        )
+    keypoints_includes_batch = keypoints_rank > 2
+    keypoints_are_grouped = keypoints_rank == 4
+    if images is not None:
+        images_rank = len(images.shape)
+        if images_rank > 4 or images_rank < 3:
+            raise ValueError(
+                "Expected images rank to be 3 or 4, got "
+                f"len(images.shape)={images_rank}."
+            )
+        images_include_batch = images_rank == 4
+        if keypoints_includes_batch != images_include_batch:
+            raise ValueError(
+                "convert_format() expects both `keypoints` and `images` to be batched "
+                f"or both unbatched. Received len(keypoints.shape)={keypoints_rank}, "
+                f"len(images.shape)={images_rank}. Expected either "
+                "len(keypoints.shape)=2 and len(images.shape)=3, or "
+                "len(keypoints.shape)>=3 and len(images.shape)=4."
+            )
+        if not images_include_batch:
+            images = tf.expand_dims(images, axis=0)
+
+    squeeze_axis = []
+    if not keypoints_includes_batch:
+        keypoints = tf.expand_dims(keypoints, axis=0)
+        squeeze_axis.append(0)
+    if not keypoints_are_grouped:
+        keypoints = tf.expand_dims(keypoints, axis=1)
+        squeeze_axis.append(1)
+
+    return keypoints, images, squeeze_axis
+
+
+def _format_outputs(result, squeeze_axis):
+    if len(squeeze_axis) == 0:
+        return result
+    return tf.squeeze(result, axis=squeeze_axis)
--- a/Keras/keras-cv/keras_cv/keypoint/converters_test.py
+++ b/Keras/keras-cv/keras_cv/keypoint/converters_test.py
+# Copyright 2022 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import itertools
+
+import tensorflow as tf
+from absl.testing import parameterized
+
+from keras_cv import keypoint
+
+xy_keypoints = tf.constant(
+    [[[10, 20], [110, 120], [210, 220]], [[20, 30], [120, 130], [220, 230]]],
+    dtype=tf.float32,
+)
+rel_xy_keypoints = tf.constant(
+    [
+        [[0.01, 0.04], [0.11, 0.24], [0.21, 0.44]],
+        [[0.02, 0.06], [0.12, 0.26], [0.22, 0.46]],
+    ],
+    dtype=tf.float32,
+)
+
+images = tf.ones([2, 500, 1000, 3])
+
+keypoints = {
+    "xy": xy_keypoints,
+    "rel_xy": rel_xy_keypoints,
+}
+
+test_cases = [
+    (f"{source}_{target}", source, target)
+    for (source, target) in itertools.permutations(keypoints.keys(), 2)
+] + [("xy_xy", "xy", "xy")]
+
+
+class ConvertersTestCase(tf.test.TestCase, parameterized.TestCase):
+    @parameterized.named_parameters(*test_cases)
+    def test_converters(self, source, target):
+        source_keypoints = keypoints[source]
+        target_keypoints = keypoints[target]
+        self.assertAllClose(
+            keypoint.convert_format(
+                source_keypoints, source=source, target=target, images=images
+            ),
+            target_keypoints,
+        )
+
+    @parameterized.named_parameters(*test_cases)
+    def test_converters_unbatched(self, source, target):
+        source_keypoints = keypoints[source][0]
+        target_keypoints = keypoints[target][0]
+
+        self.assertAllClose(
+            keypoint.convert_format(
+                source_keypoints, source=source, target=target, images=images[0]
+            ),
+            target_keypoints,
+        )
+
+    @parameterized.named_parameters(*test_cases)
+    def test_converters_ragged_groups(self, source, target):
+        source_keypoints = keypoints[source]
+        target_keypoints = keypoints[target]
+
+        def create_ragged_group(ins):
+            res = []
+            for b, groups in zip(ins, [[1, 2], [0, 3]]):
+                res.append(tf.RaggedTensor.from_row_lengths(b, groups))
+            return tf.stack(res, axis=0)
+
+        source_keypoints = create_ragged_group(source_keypoints)
+        target_keypoints = create_ragged_group(target_keypoints)
+
+        self.assertAllClose(
+            keypoint.convert_format(
+                source_keypoints, source=source, target=target, images=images
+            ),
+            target_keypoints,
+        )
+
+    @parameterized.named_parameters(*test_cases)
+    def test_converters_with_metadata(self, source, target):
+        source_keypoints = keypoints[source]
+        target_keypoints = keypoints[target]
+
+        def add_metadata(ins):
+            return tf.concat([ins, tf.ones([2, 3, 5])], axis=-1)
+
+        source_keypoints = add_metadata(source_keypoints)
+        target_keypoints = add_metadata(target_keypoints)
+
+        self.assertAllClose(
+            keypoint.convert_format(
+                source_keypoints, source=source, target=target, images=images
+            ),
+            target_keypoints,
+        )
+
+    def test_raise_errors_when_missing_shape(self):
+        with self.assertRaises(ValueError) as e:
+            keypoint.convert_format(keypoints["xy"], source="xy", target="rel_xy")
+
+        self.assertEqual(
+            str(e.exception),
+            "convert_format() must receive `images` when transforming "
+            "between relative and absolute formats. convert_format() "
+            "received source=`xy`, target=`rel_xy`, but images=None",
+        )
+
+    @parameterized.named_parameters(
+        (
+            "keypoint_rank",
+            tf.ones([2, 3, 4, 2, 1]),
+            None,
+            "Expected keypoints rank to be in [2, 4], got len(keypoints.shape)=5.",
+        ),
+        (
+            "images_rank",
+            tf.ones([4, 2]),
+            tf.ones([35, 35]),
+            "Expected images rank to be 3 or 4, got len(images.shape)=2.",
+        ),
+        (
+            "batch_mismatch",
+            tf.ones([2, 4, 2]),
+            tf.ones([35, 35, 3]),
+            "convert_format() expects both `keypoints` and `images` to be batched or "
+            "both unbatched. Received len(keypoints.shape)=3, len(images.shape)=3. "
+            "Expected either len(keypoints.shape)=2 and len(images.shape)=3, or "
+            "len(keypoints.shape)>=3 and len(images.shape)=4.",
+        ),
+    )
+    def test_input_format_exception(self, keypoints, images, expected):
+        with self.assertRaises(ValueError) as e:
+            keypoint.convert_format(
+                keypoints, source="xy", target="rel_xy", images=images
+            )
+        self.assertEqual(str(e.exception), expected)
--- a/Keras/keras-cv/keras_cv/keypoint/formats.py
+++ b/Keras/keras-cv/keras_cv/keypoint/formats.py
+# Copyright 2022 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+formats.py contains axis information for each supported format.
+"""
+
+
+class XY:
+    """XY contains axis indices for the XY format.
+
+    All values in the XY format should be absolute pixel values.
+
+    The XY format consists of the following required indices:
+
+    - X: the width position
+    - Y: the height position
+
+    and the following optional indices, used in some KerasCV components:
+
+    - CLASS: class of the keypoints
+    - CONFIDENCE: confidence of the keypoints
+    """
+
+    X = 0
+    Y = 1
+    CLASS = 2
+    CONFIDENCE = 3
+
+
+class REL_XY:
+    """REL_XY contains axis indices for the REL_XY format.
+
+
+    REL_XY is like XY, but each value is relative to the width and height of the
+    origin image.  Values are percentages of the origin images' width and height
+    respectively.
+
+    The REL_XY format consists of the following required indices:
+
+    - X: the width position
+    - Y: the height position
+
+    and the following optional indices, used in some KerasCV components:
+
+    - CLASS: class of the keypoints
+    - CONFIDENCE: confidence of the keypoints
+    """
+
+    X = 0
+    Y = 1
+    CLASS = 2
+    CONFIDENCE = 3
--- a/Keras/keras-cv/keras_cv/keypoint/utils.py
+++ b/Keras/keras-cv/keras_cv/keypoint/utils.py
+# Copyright 2022 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utility functions for keypoint transformation."""
+import tensorflow as tf
+
+H_AXIS = -3
+W_AXIS = -2
+
+
+def filter_out_of_image(keypoints, image):
+    """Discards keypoints if falling outside of the image.
+
+    Args:
+      keypoints: a, possibly ragged, 2D (ungrouped), 3D (grouped)
+        keypoint data in the 'xy' format.
+      image: a 3D tensor in the HWC format.
+
+    Returns:
+      tf.RaggedTensor: a 2D or 3D ragged tensor with at least one
+        ragged rank containing only keypoint in the image.
+    """
+
+    image_shape = tf.cast(tf.shape(image), keypoints.dtype)
+    mask = tf.math.logical_and(
+        tf.math.logical_and(
+            keypoints[..., 0] >= 0, keypoints[..., 0] < image_shape[W_AXIS]
+        ),
+        tf.math.logical_and(
+            keypoints[..., 1] >= 0, keypoints[..., 1] < image_shape[H_AXIS]
+        ),
+    )
+    masked = tf.ragged.boolean_mask(keypoints, mask)
+    if isinstance(masked, tf.RaggedTensor):
+        return masked
+    return tf.RaggedTensor.from_tensor(masked)
--- a/Keras/keras-cv/keras_cv/keypoint/utils_test.py
+++ b/Keras/keras-cv/keras_cv/keypoint/utils_test.py
+# Copyright 2022 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tensorflow as tf
+from absl.testing import parameterized
+
+from keras_cv.keypoint.utils import filter_out_of_image
+
+
+class UtilsTestCase(tf.test.TestCase, parameterized.TestCase):
+    @parameterized.named_parameters(
+        (
+            "all inside",
+            tf.constant([[10.0, 20.0], [30.0, 40.0], [50.0, 50.0]]),
+            tf.zeros([100, 100, 3]),
+            tf.ragged.constant([[10.0, 20.0], [30.0, 40.0], [50.0, 50.0]]),
+        ),
+        (
+            "some inside",
+            tf.constant([[10.0, 20.0], [30.0, 40.0], [50.0, 50.0]]),
+            tf.zeros([50, 50, 3]),
+            tf.ragged.constant([[10.0, 20.0], [30.0, 40.0]]),
+        ),
+        (
+            "ragged input",
+            tf.RaggedTensor.from_row_lengths(
+                [[10.0, 20.0], [30.0, 40.0], [50.0, 50.0]], [2, 1]
+            ),
+            tf.zeros([50, 50, 3]),
+            tf.RaggedTensor.from_row_lengths([[10.0, 20.0], [30.0, 40.0]], [2, 0]),
+        ),
+        (
+            "height - width confusion",
+            tf.constant([[[10.0, 20.0]], [[40.0, 30.0]], [[30.0, 40.0]]]),
+            tf.zeros((50, 40, 3)),
+            tf.ragged.constant([[[10.0, 20.0]], [], [[30.0, 40.0]]], ragged_rank=1),
+        ),
+    )
+    def test_result(self, keypoints, image, expected):
+        self.assertAllClose(filter_out_of_image(keypoints, image), expected)
--- a/Keras/keras-cv/keras_cv/layers/__init__.py
+++ b/Keras/keras-cv/keras_cv/layers/__init__.py
+# Copyright 2022 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from tensorflow.keras.layers import CenterCrop
+from tensorflow.keras.layers import RandomBrightness
+from tensorflow.keras.layers import RandomContrast
+from tensorflow.keras.layers import RandomCrop
+from tensorflow.keras.layers import RandomHeight
+from tensorflow.keras.layers import RandomRotation
+from tensorflow.keras.layers import RandomTranslation
+from tensorflow.keras.layers import RandomWidth
+from tensorflow.keras.layers import RandomZoom
+from tensorflow.keras.layers import Rescaling
+from tensorflow.keras.layers import Resizing
+
+from keras_cv.layers.feature_pyramid import FeaturePyramid
+from keras_cv.layers.object_detection.anchor_generator import AnchorGenerator
+from keras_cv.layers.object_detection.nms_prediction_decoder import NmsPredictionDecoder
+from keras_cv.layers.object_detection.non_max_suppression import NonMaxSuppression
+from keras_cv.layers.object_detection.retina_net_label_encoder import (
+    RetinaNetLabelEncoder,
+)
+from keras_cv.layers.preprocessing.aug_mix import AugMix
+from keras_cv.layers.preprocessing.augmenter import Augmenter
+from keras_cv.layers.preprocessing.auto_contrast import AutoContrast
+from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
+    BaseImageAugmentationLayer,
+)
+from keras_cv.layers.preprocessing.channel_shuffle import ChannelShuffle
+from keras_cv.layers.preprocessing.cut_mix import CutMix
+from keras_cv.layers.preprocessing.equalization import Equalization
+from keras_cv.layers.preprocessing.fourier_mix import FourierMix
+from keras_cv.layers.preprocessing.grayscale import Grayscale
+from keras_cv.layers.preprocessing.grid_mask import GridMask
+from keras_cv.layers.preprocessing.maybe_apply import MaybeApply
+from keras_cv.layers.preprocessing.mix_up import MixUp
+from keras_cv.layers.preprocessing.mosaic import Mosaic
+from keras_cv.layers.preprocessing.posterization import Posterization
+from keras_cv.layers.preprocessing.rand_augment import RandAugment
+from keras_cv.layers.preprocessing.random_augmentation_pipeline import (
+    RandomAugmentationPipeline,
+)
+from keras_cv.layers.preprocessing.random_channel_shift import RandomChannelShift
+from keras_cv.layers.preprocessing.random_choice import RandomChoice
+from keras_cv.layers.preprocessing.random_color_degeneration import (
+    RandomColorDegeneration,
+)
+from keras_cv.layers.preprocessing.random_color_jitter import RandomColorJitter
+from keras_cv.layers.preprocessing.random_crop_and_resize import RandomCropAndResize
+from keras_cv.layers.preprocessing.random_cutout import RandomCutout
+from keras_cv.layers.preprocessing.random_flip import RandomFlip
+from keras_cv.layers.preprocessing.random_gaussian_blur import RandomGaussianBlur
+from keras_cv.layers.preprocessing.random_hue import RandomHue
+from keras_cv.layers.preprocessing.random_jpeg_quality import RandomJpegQuality
+from keras_cv.layers.preprocessing.random_saturation import RandomSaturation
+from keras_cv.layers.preprocessing.random_sharpness import RandomSharpness
+from keras_cv.layers.preprocessing.random_shear import RandomShear
+from keras_cv.layers.preprocessing.randomly_zoomed_crop import RandomlyZoomedCrop
+from keras_cv.layers.preprocessing.solarization import Solarization
+from keras_cv.layers.regularization.drop_path import DropPath
+from keras_cv.layers.regularization.dropblock_2d import DropBlock2D
+from keras_cv.layers.regularization.squeeze_excite import SqueezeAndExcite2D
+from keras_cv.layers.regularization.stochastic_depth import StochasticDepth
+from keras_cv.layers.spatial_pyramid import SpatialPyramidPooling
--- a/Keras/keras-cv/keras_cv/layers/feature_pyramid.py
+++ b/Keras/keras-cv/keras_cv/layers/feature_pyramid.py
+# Copyright 2022 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tensorflow as tf
+
+
+# TODO(scottzhu): Register it later due to the conflict in the retina_net
+# @tf.keras.utils.register_keras_serializable(package="keras_cv")
+class FeaturePyramid(tf.keras.layers.Layer):
+    """Implements a Feature Pyramid Network.
+
+    This implements the paper:
+      Tsung-Yi Lin, Piotr Dollar, Ross Girshick, Kaiming He, Bharath Hariharan, and
+      Serge Belongie.
+      Feature Pyramid Networks for Object Detection.
+      (https://arxiv.org/pdf/1612.03144)
+
+    Feature Pyramid Networks (FPNs) are basic components that are added to an
+    existing feature extractor (CNN) to combine features at different scales. For the
+    basic FPN, the inputs are features `Ci` from different levels of a CNN, which is
+    usually the last block for each level, where the feature is scaled from the image
+    by a factor of `1/2^i`.
+
+    There is an output associated with each level in the basic FPN. The output Pi
+    at level `i` (corresponding to Ci) is given by performing a merge operation on
+    the outputs of:
+
+    1) a lateral operation on Ci (usually a conv2D layer with kernel = 1 and strides = 1)
+    2) a top-down upsampling operation from Pi+1 (except for the top most level)
+
+    The final output of each level will also have a conv2D operation
+    (usually with kernel = 3 and strides = 1).
+
+    The inputs to the layer should be a dict with int keys should match the
+    pyramid_levels, e.g. for `pyramid_levels` = [2,3,4,5], the expected input dict should
+    be `{2:c2, 3:c3, 4:c4, 5:c5}`.
+
+    The output of the layer will have same structures as the inputs, a dict with int keys
+    and value for each of the level.
+
+    Args:
+        min_level: a python int for the lowest level of the pyramid for
+            feature extraction.
+        max_level: a python int for the highest level of the pyramid for
+            feature extraction.
+        num_channels: an integer representing the number of channels for the FPN
+            operations. Defaults to 256.
+        lateral_layers: a python dict with int keys that matches to each of the pyramid
+            level. The values of the dict should be `keras.Layer`, which will be called
+            with feature activation outputs from backbone at each level. Default to
+            None, and a `keras.Conv2D` layer with kernel 1x1 will be created for each
+            pyramid level.
+        output_layers: a python dict with int keys that matches to each of the pyramid
+            level. The values of the dict should be `keras.Layer`, which will be called
+            with feature inputs and merged result from upstream levels. Default to None,
+            and a `keras.Conv2D` layer with kernel 3x3 will be created for each pyramid
+            level.
+
+    Sample Usage:
+    ```python
+
+    inp = tf.keras.layers.Input((384, 384, 3))
+    backbone = tf.keras.applications.EfficientNetB0(input_tensor=inp, include_top=False)
+    layer_names = ['block2b_add', 'block3b_add', 'block5c_add', 'top_activation']
+
+    backbone_outputs = {}
+    for i, layer_name in enumerate(layer_names):
+        backbone_outputs[i+2] = backbone.get_layer(layer_name).output
+
+    # output_dict is a dict with 2, 3, 4, 5 as keys
+    output_dict = keras_cv.layers.FeaturePyramid(min_level=2, max_level=5)(backbone_outputs)
+    ```
+    """
+
+    def __init__(
+        self,
+        min_level,
+        max_level,
+        num_channels=256,
+        lateral_layers=None,
+        output_layers=None,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.min_level = min_level
+        self.max_level = max_level
+        self.pyramid_levels = list(range(min_level, max_level + 1))
+        self.num_channels = num_channels
+
+        # required for successful serialization
+        self.lateral_layers_passed = lateral_layers
+        self.output_layers_passed = output_layers
+
+        if not lateral_layers:
+            # populate self.lateral_ops with default FPN Conv2D 1X1 layers
+            self.lateral_layers = {}
+            for i in self.pyramid_levels:
+                self.lateral_layers[i] = tf.keras.layers.Conv2D(
+                    self.num_channels,
+                    kernel_size=1,
+                    strides=1,
+                    padding="same",
+                    name=f"lateral_P{i}",
+                )
+        else:
+            self._validate_user_layers(lateral_layers, "lateral_layers")
+            self.lateral_layers = lateral_layers
+
+        # Output conv2d layers.
+        if not output_layers:
+            self.output_layers = {}
+            for i in self.pyramid_levels:
+                self.output_layers[i] = tf.keras.layers.Conv2D(
+                    self.num_channels,
+                    kernel_size=3,
+                    strides=1,
+                    padding="same",
+                    name=f"output_P{i}",
+                )
+        else:
+            self._validate_user_layers(output_layers, "output_layers")
+            self.output_layers = output_layers
+
+        # the same upsampling layer is used for all levels
+        self.top_down_op = tf.keras.layers.UpSampling2D(size=2)
+        # the same merge layer is used for all levels
+        self.merge_op = tf.keras.layers.Add()
+
+    def _validate_user_layers(self, user_input, param_name):
+        if (
+            not isinstance(user_input, dict)
+            or sorted(user_input.keys()) != self.pyramid_levels
+        ):
+            raise ValueError(
+                f"Expect {param_name} to be a dict with keys as "
+                f"{self.pyramid_levels}, got {user_input}"
+            )
+
+    def call(self, features):
+        # Note that this assertion might not be true for all the subclasses. It is
+        # possible to have FPN that has high levels than the height of backbone outputs.
+        if (
+            not isinstance(features, dict)
+            or sorted(features.keys()) != self.pyramid_levels
+        ):
+            raise ValueError(
+                "FeaturePyramid expects input features to be a dict with int keys "
+                "that match the values provided in pyramid_levels. "
+                f"Expect feature keys: {self.pyramid_levels}, got: {features}"
+            )
+        return self.build_feature_pyramid(features)
+
+    def build_feature_pyramid(self, input_features):
+        # To illustrate the connection/topology, the basic flow for a FPN with level
+        # 3, 4, 5 is like below:
+        #
+        # input_l5 -> conv2d_1x1_l5 ----V---> conv2d_3x3_l5 -> output_l5
+        #                               V
+        #                          upsample2d
+        #                               V
+        # input_l4 -> conv2d_1x1_l4 -> Add -> conv2d_3x3_l4 -> output_l4
+        #                               V
+        #                          upsample2d
+        #                               V
+        # input_l3 -> conv2d_1x1_l3 -> Add -> conv2d_3x3_l3 -> output_l3
+
+        output_features = {}
+        reversed_levels = list(sorted(input_features.keys(), reverse=True))
+        top_level = reversed_levels[0]
+        for level in reversed_levels:
+            output = self.lateral_layers[level](input_features[level])
+            if level < top_level:
+                # for the top most output, it doesn't need to merge with any upper stream
+                # outputs
+                upstream_output = self.top_down_op(output_features[level + 1])
+                output = self.merge_op([output, upstream_output])
+            output_features[level] = output
+
+        # Post apply the output layers so that we don't leak them to the down stream level
+        for level in reversed_levels:
+            output_features[level] = self.output_layers[level](output_features[level])
+
+        return output_features
+
+    def get_config(self):
+        config = {
+            "min_level": self.min_level,
+            "max_level": self.max_level,
+            "num_channels": self.num_channels,
+            "lateral_layers": self.lateral_layers_passed,
+            "output_layers": self.output_layers_passed,
+        }
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
--- a/Keras/keras-cv/keras_cv/layers/feature_pyramid_test.py
+++ b/Keras/keras-cv/keras_cv/layers/feature_pyramid_test.py
+# Copyright 2022 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tensorflow as tf
+
+from keras_cv.layers import FeaturePyramid
+
+
+class FeaturePyramidTest(tf.test.TestCase):
+    def test_return_type_dict(self):
+        layer = FeaturePyramid(min_level=2, max_level=5)
+        c2 = tf.ones([2, 64, 64, 3])
+        c3 = tf.ones([2, 32, 32, 3])
+        c4 = tf.ones([2, 16, 16, 3])
+        c5 = tf.ones([2, 8, 8, 3])
+
+        inputs = {2: c2, 3: c3, 4: c4, 5: c5}
+        output = layer(inputs)
+        self.assertTrue(isinstance(output, dict))
+        self.assertEquals(sorted(output.keys()), [2, 3, 4, 5])
+
+    def test_result_shapes(self):
+        layer = FeaturePyramid(min_level=2, max_level=5)
+        c2 = tf.ones([2, 64, 64, 3])
+        c3 = tf.ones([2, 32, 32, 3])
+        c4 = tf.ones([2, 16, 16, 3])
+        c5 = tf.ones([2, 8, 8, 3])
+
+        inputs = {2: c2, 3: c3, 4: c4, 5: c5}
+        output = layer(inputs)
+        for level in inputs.keys():
+            self.assertEquals(output[level].shape[1], inputs[level].shape[1])
+            self.assertEquals(output[level].shape[2], inputs[level].shape[2])
+            self.assertEquals(output[level].shape[3], layer.num_channels)
+
+        # Test with different resolution and channel size
+        c2 = tf.ones([2, 64, 128, 4])
+        c3 = tf.ones([2, 32, 64, 8])
+        c4 = tf.ones([2, 16, 32, 16])
+        c5 = tf.ones([2, 8, 16, 32])
+
+        inputs = {2: c2, 3: c3, 4: c4, 5: c5}
+        layer = FeaturePyramid(min_level=2, max_level=5)
+        output = layer(inputs)
+        for level in inputs.keys():
+            self.assertEquals(output[level].shape[1], inputs[level].shape[1])
+            self.assertEquals(output[level].shape[2], inputs[level].shape[2])
+            self.assertEquals(output[level].shape[3], layer.num_channels)
+
+    def test_with_keras_input_tensor(self):
+        # This mimic the model building with Backbone network
+        layer = FeaturePyramid(min_level=2, max_level=5)
+        c2 = tf.keras.layers.Input([64, 64, 3])
+        c3 = tf.keras.layers.Input([32, 32, 3])
+        c4 = tf.keras.layers.Input([16, 16, 3])
+        c5 = tf.keras.layers.Input([8, 8, 3])
+
+        inputs = {2: c2, 3: c3, 4: c4, 5: c5}
+        output = layer(inputs)
+        for level in inputs.keys():
+            self.assertEquals(output[level].shape[1], inputs[level].shape[1])
+            self.assertEquals(output[level].shape[2], inputs[level].shape[2])
+            self.assertEquals(output[level].shape[3], layer.num_channels)
+
+    def test_invalid_lateral_layers(self):
+        lateral_layers = [tf.keras.layers.Conv2D(256, 1)] * 3
+        with self.assertRaisesRegexp(ValueError, "Expect lateral_layers to be a dict"):
+            _ = FeaturePyramid(min_level=2, max_level=5, lateral_layers=lateral_layers)
+        lateral_layers = {
+            2: tf.keras.layers.Conv2D(256, 1),
+            3: tf.keras.layers.Conv2D(256, 1),
+            4: tf.keras.layers.Conv2D(256, 1),
+        }
+        with self.assertRaisesRegexp(ValueError, "with keys as .* [2, 3, 4, 5]"):
+            _ = FeaturePyramid(min_level=2, max_level=5, lateral_layers=lateral_layers)
+
+    def test_invalid_output_layers(self):
+        output_layers = [tf.keras.layers.Conv2D(256, 3)] * 3
+        with self.assertRaisesRegexp(ValueError, "Expect output_layers to be a dict"):
+            _ = FeaturePyramid(min_level=2, max_level=5, output_layers=output_layers)
+        output_layers = {
+            2: tf.keras.layers.Conv2D(256, 3),
+            3: tf.keras.layers.Conv2D(256, 3),
+            4: tf.keras.layers.Conv2D(256, 3),
+        }
+        with self.assertRaisesRegexp(ValueError, "with keys as .* [2, 3, 4, 5]"):
+            _ = FeaturePyramid(min_level=2, max_level=5, output_layers=output_layers)
+
+    def test_invalid_input_features(self):
+        layer = FeaturePyramid(min_level=2, max_level=5)
+
+        c2 = tf.ones([2, 64, 64, 3])
+        c3 = tf.ones([2, 32, 32, 3])
+        c4 = tf.ones([2, 16, 16, 3])
+        c5 = tf.ones([2, 8, 8, 3])
+        list_input = [c2, c3, c4, c5]
+        with self.assertRaisesRegexp(ValueError, "expects input features to be a dict"):
+            layer(list_input)
+
+        dict_input_with_missing_feature = {2: c2, 3: c3, 4: c4}
+        with self.assertRaisesRegexp(ValueError, "Expect feature keys.*[2, 3, 4, 5]"):
+            layer(dict_input_with_missing_feature)
--- a/Keras/keras-cv/keras_cv/layers/object_detection/__init__.py
+++ b/Keras/keras-cv/keras_cv/layers/object_detection/__init__.py
+# Copyright 2022 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/Keras/keras-cv/keras_cv/layers/object_detection/anchor_generator.py
+++ b/Keras/keras-cv/keras_cv/layers/object_detection/anchor_generator.py
+# Copyright 2022 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tensorflow as tf
+from tensorflow import keras
+
+from keras_cv import bounding_box
+
+
+class AnchorGenerator(keras.layers.Layer):
+    """AnchorGenerator generates anchors for multiple feature maps.
+
+    AnchorGenerator takes multiple scales and generates anchor boxes based on the anchor
+    sizes, scales, aspect ratios, and strides provided.  To invoke AnchorGenerator, call
+    it on the image that needs anchor boxes.
+
+    `sizes` and `strides` must match structurally - they are pairs.  Scales and
+    aspect ratios can either be a list, that is then used for all of the sizes
+    (aka levels), or a dictionary from `{'level_{number}': [parameters at scale...]}`.
+
+    Args:
+      bounding_box_format: The format of bounding boxes to generate. Refer
+        [to the keras.io docs](https://keras.io/api/keras_cv/bounding_box/formats/)
+        for more details on supported bounding box formats.
+      sizes: A list of integers that represent the anchor sizes for each level,
+        or a dictionary of integer lists with each key representing a level.
+        For each anchor size, anchor height will be `anchor_size / sqrt(aspect_ratio)`,
+        and anchor width will be `anchor_size * sqrt(aspect_ratio)`.  This is repeated
+        for each scale and aspect ratio.
+      scales: A list of floats corresponding to multipliers that will be
+        multiplied by each `anchor_size` to generate a level.
+      aspect_ratios: A list of floats representing the ratio of anchor width to height.
+      strides: iterable of ints that represent the anchor stride size between
+        center of anchors at each scale.
+      clip_boxes: Whether or not to clip generated anchor boxes to the image size.
+        Defaults to `False`.
+
+    Usage:
+    ```python
+    strides = [8, 16, 32]
+    scales = [1, 1.2599210498948732, 1.5874010519681994]
+    sizes = [32.0, 64.0, 128.0]
+    aspect_ratios = [0.5, 1.0, 2.0]
+
+    image = tf.random.uniform((512, 512, 3))
+    anchor_generator = cv_layers.AnchorGenerator(
+        bounding_box_format="rel_yxyx",
+        sizes=sizes,
+        aspect_ratios=aspect_ratios,
+        scales=scales,
+        strides=strides,
+        clip_boxes=True,
+    )
+    anchors = anchor_generator(image)
+    print(anchors)
+    # > {0: ..., 1: ..., 2: ...}
+    ```
+
+    Input shape: an image with shape `[H, W, C]`
+    Output: a dictionary with integer keys corresponding to each level of the feature
+        pyramid.  The size of the anchors at each level will be
+        `(H/strides[i] * W/strides[i] * len(scales) * len(aspect_ratios), 4)`.
+    """
+
+    def __init__(
+        self,
+        bounding_box_format,
+        sizes,
+        scales,
+        aspect_ratios,
+        strides,
+        clip_boxes=False,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.bounding_box_format = bounding_box_format
+        # aspect_ratio is a single list that is the same across all levels.
+        sizes, strides = self._format_sizes_and_strides(sizes, strides)
+        aspect_ratios = self._match_param_structure_to_sizes(aspect_ratios, sizes)
+        scales = self._match_param_structure_to_sizes(scales, sizes)
+
+        self.anchor_generators = {}
+        for k in sizes.keys():
+            self.anchor_generators[k] = _SingleAnchorGenerator(
+                bounding_box_format,
+                sizes[k],
+                scales[k],
+                aspect_ratios[k],
+                strides[k],
+                clip_boxes,
+                dtype=self.compute_dtype,
+            )
+        self.built = True
+
+    @staticmethod
+    def _format_sizes_and_strides(sizes, strides):
+        result_sizes = AnchorGenerator._ensure_param_is_levels_dict(sizes, "sizes")
+        result_strides = AnchorGenerator._ensure_param_is_levels_dict(
+            strides, "strides"
+        )
+
+        if sorted(result_strides.keys()) != sorted(result_sizes.keys()):
+            raise ValueError(
+                "Expected sizes and strides to be either lists of"
+                "the same length, or dictionaries with the same keys.  Received "
+                f"sizes={sizes}, strides={strides}"
+            )
+
+        return result_sizes, result_strides
+
+    @staticmethod
+    def _ensure_param_is_levels_dict(param, param_name):
+        """Takes a param and its name, converts lists to dictionaries of levels.
+        For example, the list [1, 2] is converted to {0: 1, 1: 2}.
+
+        Raises:
+            ValueError: when param is not a dict, list or tuple.
+        """
+        if isinstance(param, dict):
+            return param
+        if not isinstance(param, (list, tuple)):
+            raise ValueError(
+                f"Expected {param_name} to be a dict, list or tuple, received "
+                f"{param_name}={param}"
+            )
+
+        result = {}
+        for i in range(len(param)):
+            result[i] = param[i]
+        return result
+
+    @staticmethod
+    def _match_param_structure_to_sizes(params, sizes):
+        """broadcast the params to match sizes."""
+        # if isinstance(sizes, (tuple, list)):
+        #     return [params] * len(sizes)
+        if not isinstance(sizes, dict):
+            raise ValueError(
+                "the structure of `sizes` must be a dict, " f"received sizes={sizes}"
+            )
+
+        return tf.nest.map_structure(lambda _: params, sizes)
+
+    def __call__(self, image=None, image_shape=None):
+        if image is None and image_shape is None:
+            raise ValueError("AnchorGenerator() requires `images` or `image_shape`.")
+
+        if image is not None:
+            if image.shape.rank != 3:
+                raise ValueError(
+                    "Expected `image` to be a Tensor of rank 3.  Got "
+                    f"image.shape.rank={image.shape.rank}"
+                )
+            image_shape = tf.shape(image)
+
+        anchor_generators = tf.nest.flatten(self.anchor_generators)
+        results = [anchor_gen(image_shape) for anchor_gen in anchor_generators]
+        results = tf.nest.pack_sequence_as(self.anchor_generators, results)
+        for key in results:
+            results[key] = bounding_box.convert_format(
+                results[key],
+                source="yxyx",
+                target=self.bounding_box_format,
+                image_shape=image_shape,
+            )
+        return results
+
+
+# TODO(tanzheny): consider having customized anchor offset.
+class _SingleAnchorGenerator:
+    """Internal utility to generate anchors for a single feature map in `yxyx` format.
+
+    Example:
+    ```python
+    anchor_gen = _SingleAnchorGenerator(32, [.5, 1., 2.], stride=16)
+    anchors = anchor_gen([512, 512, 3])
+    ```
+
+    Input shape: the size of the image, `[H, W, C]`
+    Output shape: the size of anchors, `[(H / stride) * (W / stride), 4]`
+
+    Args:
+      sizes: A single int represents the base anchor size. The anchor
+        height will be `anchor_size / sqrt(aspect_ratio)`, anchor width will be
+        `anchor_size * sqrt(aspect_ratio)`.
+      scales: A list/tuple, or a list/tuple of a list/tuple of positive
+        floats representing the actual anchor size to the base `anchor_size`.
+      aspect_ratios: a list/tuple of positive floats representing the ratio of
+        anchor width to anchor height.
+      stride: A single int represents the anchor stride size between center of
+        each anchor.
+      clip_boxes: Boolean to represent whether the anchor coordinates should be
+        clipped to the image size. Defaults to `False`.
+      dtype: (Optional) The data type to use for the output anchors.  Defaults to
+        'float32'.
+
+    """
+
+    def __init__(
+        self,
+        bounding_box_format,
+        sizes,
+        scales,
+        aspect_ratios,
+        stride,
+        clip_boxes=False,
+        dtype="float32",
+    ):
+
+        self.sizes = sizes
+        self.scales = scales
+        self.aspect_ratios = aspect_ratios
+        self.stride = stride
+        self.clip_boxes = clip_boxes
+        self.dtype = dtype
+
+    def __call__(self, image_size):
+        image_height = tf.cast(image_size[0], tf.float32)
+        image_width = tf.cast(image_size[1], tf.float32)
+
+        aspect_ratios = tf.cast(self.aspect_ratios, tf.float32)
+        aspect_ratios_sqrt = tf.cast(tf.sqrt(aspect_ratios), dtype=tf.float32)
+        anchor_size = tf.cast(self.sizes, tf.float32)
+
+        # [K]
+        anchor_heights = []
+        anchor_widths = []
+        for scale in self.scales:
+            anchor_size_t = anchor_size * scale
+            anchor_height = anchor_size_t / aspect_ratios_sqrt
+            anchor_width = anchor_size_t * aspect_ratios_sqrt
+            anchor_heights.append(anchor_height)
+            anchor_widths.append(anchor_width)
+        anchor_heights = tf.concat(anchor_heights, axis=0)
+        anchor_widths = tf.concat(anchor_widths, axis=0)
+        half_anchor_heights = tf.reshape(0.5 * anchor_heights, [1, 1, -1])
+        half_anchor_widths = tf.reshape(0.5 * anchor_widths, [1, 1, -1])
+
+        stride = tf.cast(self.stride, tf.float32)
+        # [W]
+        cx = tf.range(0.5 * stride, image_width + 1, stride)
+        # [H]
+        cy = tf.range(0.5 * stride, image_height + 1, stride)
+        # [H, W]
+        cx_grid, cy_grid = tf.meshgrid(cx, cy)
+        # [H, W, 1]
+        cx_grid = tf.expand_dims(cx_grid, axis=-1)
+        cy_grid = tf.expand_dims(cy_grid, axis=-1)
+
+        y_min = tf.reshape(cy_grid - half_anchor_heights, (-1,))
+        y_max = tf.reshape(cy_grid + half_anchor_heights, (-1,))
+        x_min = tf.reshape(cx_grid - half_anchor_widths, (-1,))
+        x_max = tf.reshape(cx_grid + half_anchor_widths, (-1,))
+
+        # [H * W * K, 1]
+        y_min = tf.expand_dims(y_min, axis=-1)
+        y_max = tf.expand_dims(y_max, axis=-1)
+        x_min = tf.expand_dims(x_min, axis=-1)
+        x_max = tf.expand_dims(x_max, axis=-1)
+
+        if self.clip_boxes:
+            y_min = tf.maximum(tf.minimum(y_min, image_height), 0.0)
+            y_max = tf.maximum(tf.minimum(y_max, image_height), 0.0)
+            x_min = tf.maximum(tf.minimum(x_min, image_width), 0.0)
+            x_max = tf.maximum(tf.minimum(x_max, image_width), 0.0)
+
+        # [H * W * K, 4]
+        return tf.cast(tf.concat([y_min, x_min, y_max, x_max], axis=-1), self.dtype)
--- a/Keras/keras-cv/keras_cv/layers/object_detection/anchor_generator_test.py
+++ b/Keras/keras-cv/keras_cv/layers/object_detection/anchor_generator_test.py
+# Copyright 2022 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import tensorflow as tf
+from absl.testing import parameterized
+
+from keras_cv import layers as cv_layers
+
+
+class AnchorGeneratorTest(tf.test.TestCase, parameterized.TestCase):
+    @parameterized.named_parameters(
+        ("unequal_lists", [0, 1, 2], [1]),
+        ("unequal_levels_dicts", {"level_1": [0, 1, 2]}, {"1": [0, 1, 2]}),
+    )
+    def test_raises_when_strides_not_equal_to_sizes(self, sizes, strides):
+        with self.assertRaises(ValueError):
+            cv_layers.AnchorGenerator(
+                bounding_box_format="xyxy",
+                sizes=sizes,
+                strides=strides,
+                aspect_ratios=[3 / 4, 1, 4 / 3],
+                scales=[0.5, 1.0, 1.5],
+            )
+
+    def test_raises_batched_images(self):
+        strides = [4]
+        scales = [1.0]
+        sizes = [4]
+        aspect_ratios = [1.0]
+        anchor_generator = cv_layers.AnchorGenerator(
+            bounding_box_format="xyxy",
+            sizes=sizes,
+            aspect_ratios=aspect_ratios,
+            scales=scales,
+            strides=strides,
+        )
+
+        image = tf.random.uniform((4, 8, 8, 3))
+        with self.assertRaisesRegex(ValueError, "rank"):
+            _ = anchor_generator(image=image)
+
+    def test_output_shapes_image(self):
+        strides = [2**i for i in range(3, 8)]
+        scales = [2**x for x in [0, 1 / 3, 2 / 3]]
+        sizes = [x**2 for x in [32.0, 64.0, 128.0, 256.0, 512.0]]
+        aspect_ratios = [0.5, 1.0, 2.0]
+
+        image_shape = (512, 512, 3)
+        image = tf.random.uniform(image_shape)
+        anchor_generator = cv_layers.AnchorGenerator(
+            bounding_box_format="yxyx",
+            sizes=sizes,
+            aspect_ratios=aspect_ratios,
+            scales=scales,
+            strides=strides,
+        )
+        boxes = anchor_generator(image=image)
+        boxes = tf.concat(list(boxes.values()), axis=0)
+
+        # 49104 is a number found by using the previous internal anchor generator from
+        # PR https://github.com/keras-team/keras-cv/pull/609
+        # This unit test was written to ensure compatibility with the existing model.
+        self.assertEqual(boxes.shape, [49104, 4])
+
+    def test_output_shapes_image_shape(self):
+        strides = [2**i for i in range(3, 8)]
+        scales = [2**x for x in [0, 1 / 3, 2 / 3]]
+        sizes = [x**2 for x in [32.0, 64.0, 128.0, 256.0, 512.0]]
+        aspect_ratios = [0.5, 1.0, 2.0]
+
+        image_shape = (512, 512, 3)
+        anchor_generator = cv_layers.AnchorGenerator(
+            bounding_box_format="yxyx",
+            sizes=sizes,
+            aspect_ratios=aspect_ratios,
+            scales=scales,
+            strides=strides,
+        )
+        boxes = anchor_generator(image_shape=image_shape)
+        boxes = tf.concat(list(boxes.values()), axis=0)
+
+        # 49104 is a number found by using the previous internal anchor generator from
+        # PR https://github.com/keras-team/keras-cv/pull/609
+        # This unit test was written to ensure compatibility with the existing model.
+        self.assertEqual(boxes.shape, [49104, 4])
+
+    def test_hand_crafted_aspect_ratios(self):
+        strides = [4]
+        scales = [1.0]
+        sizes = [4]
+        aspect_ratios = [3 / 4, 1.0, 4 / 3]
+        anchor_generator = cv_layers.AnchorGenerator(
+            bounding_box_format="xyxy",
+            sizes=sizes,
+            aspect_ratios=aspect_ratios,
+            scales=scales,
+            strides=strides,
+        )
+
+        image = tf.random.uniform((8, 8, 3))
+        boxes = anchor_generator(image=image)
+        level_0 = boxes[0]
+
+        # width/4 * height/4 * aspect_ratios =
+        self.assertAllEqual(level_0.shape, [12, 4])
+
+        image = tf.random.uniform((4, 4, 3))
+        boxes = anchor_generator(image=image)
+        level_0 = boxes[0]
+
+        expected_boxes = [
+            [0.267949224, -0.309401035, 3.7320509, 4.30940104],
+            [0, 0, 4, 4],
+            [-0.309401035, 0.267949104, 4.30940104, 3.7320509],
+        ]
+        self.assertAllClose(level_0, expected_boxes)
+
+    def test_hand_crafted_strides(self):
+        strides = [4]
+        scales = [1.0]
+        sizes = [4]
+        aspect_ratios = [1.0]
+        anchor_generator = cv_layers.AnchorGenerator(
+            bounding_box_format="xyxy",
+            sizes=sizes,
+            aspect_ratios=aspect_ratios,
+            scales=scales,
+            strides=strides,
+        )
+
+        image = tf.random.uniform((8, 8, 3))
+        boxes = anchor_generator(image=image)
+        level_0 = boxes[0]
+        expected_boxes = [
+            [0, 0, 4, 4],
+            [4, 0, 8, 4],
+            [0, 4, 4, 8],
+            [4, 4, 8, 8],
+        ]
+        self.assertAllClose(level_0, expected_boxes)
+
+    def test_relative_generation(self):
+        strides = [8, 16, 32]
+
+        # 0, 1 / 3, 2 / 3
+        scales = [2**x for x in [0, 1 / 3, 2 / 3]]
+        sizes = [32.0, 64.0, 128.0]
+        aspect_ratios = [0.5, 1.0, 2.0]
+
+        image = tf.random.uniform((512, 512, 3))
+        anchor_generator = cv_layers.AnchorGenerator(
+            bounding_box_format="rel_yxyx",
+            sizes=sizes,
+            aspect_ratios=aspect_ratios,
+            scales=scales,
+            strides=strides,
+            clip_boxes=False,
+        )
+        boxes = anchor_generator(image=image)
+        boxes = tf.concat(list(boxes.values()), axis=0)
+        self.assertAllLessEqual(boxes, 1.5)
+        self.assertAllGreaterEqual(boxes, -0.50)
--- a/Keras/keras-cv/keras_cv/layers/object_detection/nms_prediction_decoder.py
+++ b/Keras/keras-cv/keras_cv/layers/object_detection/nms_prediction_decoder.py
+# Copyright 2022 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tensorflow as tf
+
+from keras_cv import bounding_box
+from keras_cv import layers as cv_layers
+
+
+class NmsPredictionDecoder(tf.keras.layers.Layer):
+    """A Keras layer that decodes predictions of an object detection model.
+
+    By default, NmsPredictionDecoder uses a
+    `keras_cv.layers.NonMaxSuppression` layer to perform box pruning.  The layer may
+    optionally take a `suppression_layer`, which can perform an alternative suppression
+    operation, such as SoftNonMaxSuppression.
+
+    Arguments:
+      classes: Number of classes in the dataset.
+      bounding_box_format: The format of bounding boxes of input dataset. Refer
+        [to the keras.io docs](https://keras.io/api/keras_cv/bounding_box/formats/)
+        for more details on supported bounding box formats.
+      anchor_generator: a `keras_cv.layers.AnchorGenerator`.
+      suppression_layer: (Optional) a `keras.layers.Layer` that follows the same API
+        signature of the `keras_cv.layers.NonMaxSuppression` layer.  This layer should
+        perform a suppression operation such as NonMaxSuppression, or
+        SoftNonMaxSuppression.
+      box_variance: (Optional) The scaling factors used to scale the bounding box
+        targets.  Defaults to `(0.1, 0.1, 0.2, 0.2)`.  **Important Note:**
+        `box_variance` is applied to the boxes in `xywh` format.
+    """
+
+    def __init__(
+        self,
+        bounding_box_format,
+        anchor_generator,
+        classes=None,
+        suppression_layer=None,
+        box_variance=(0.1, 0.1, 0.2, 0.2),
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        if not suppression_layer and not classes:
+            raise ValueError(
+                "NmsPredictionDecoder() requires either `suppression_layer` "
+                f"or `classes`.  Received `suppression_layer={suppression_layer} and "
+                f"classes={classes}`"
+            )
+        self.bounding_box_format = bounding_box_format
+        self.suppression_layer = suppression_layer or cv_layers.NonMaxSuppression(
+            classes=classes,
+            bounding_box_format=bounding_box_format,
+            confidence_threshold=0.5,
+            iou_threshold=0.5,
+            max_detections=100,
+            max_detections_per_class=100,
+        )
+        if self.suppression_layer.bounding_box_format != self.bounding_box_format:
+            raise ValueError(
+                "`suppression_layer` must have the same `bounding_box_format` "
+                "as the `NmsPredictionDecoder()` layer. "
+                "Received `NmsPredictionDecoder.bounding_box_format="
+                f"{self.bounding_box_format}`, `suppression_layer={suppression_layer}`."
+            )
+        self.anchor_generator = anchor_generator
+        self.box_variance = tf.convert_to_tensor(box_variance, dtype=tf.float32)
+        self.built = True
+
+    # TODO(lukewood): provide this as general utility on top of bounding_box_format.
+    def _decode_box_predictions(self, anchor_boxes, box_predictions):
+        boxes = box_predictions * self.box_variance
+        boxes = tf.concat(
+            [
+                boxes[:, :, :2] * anchor_boxes[:, :, 2:] + anchor_boxes[:, :, :2],
+                tf.math.exp(boxes[:, :, 2:]) * anchor_boxes[:, :, 2:],
+            ],
+            axis=-1,
+        )
+        return boxes
+
+    def call(self, images, predictions):
+        """Accepts images and raw predictions, and returns bounding box predictions.
+
+        Args:
+            images: Tensor of shape [batch, height, width, channels].
+            predictions: Dense Tensor of shape [batch, anchor_boxes, 6] in the
+                `bounding_box_format` specified in the constructor.
+        """
+        if isinstance(images, tf.RaggedTensor):
+            raise ValueError(
+                "DecodePredictions() does not support tf.RaggedTensor inputs. "
+                f"Received images={images}."
+            )
+
+        anchor_boxes = self.anchor_generator(images[0])
+        anchor_boxes = tf.concat(list(anchor_boxes.values()), axis=0)
+        anchor_boxes = bounding_box.convert_format(
+            anchor_boxes,
+            source=self.anchor_generator.bounding_box_format,
+            target="xywh",
+            images=images[0],
+        )
+        predictions = bounding_box.convert_format(
+            predictions, source=self.bounding_box_format, target="xywh", images=images
+        )
+        box_predictions = predictions[:, :, :4]
+        cls_predictions = tf.nn.sigmoid(predictions[:, :, 4:])
+
+        classes = tf.math.argmax(cls_predictions, axis=-1)
+        classes = tf.cast(classes, box_predictions.dtype)
+        confidence = tf.math.reduce_max(cls_predictions, axis=-1)
+        classes = tf.expand_dims(classes, axis=-1)
+        confidence = tf.expand_dims(confidence, axis=-1)
+
+        boxes = self._decode_box_predictions(anchor_boxes[None, ...], box_predictions)
+        boxes = tf.concat([boxes, classes, confidence], axis=-1)
+
+        boxes = bounding_box.convert_format(
+            boxes,
+            source="xywh",
+            target=self.suppression_layer.bounding_box_format,
+            images=images,
+        )
+        return self.suppression_layer(boxes, images=images)
--- a/Keras/keras-cv/keras_cv/layers/object_detection/nms_prediction_decoder_test.py
+++ b/Keras/keras-cv/keras_cv/layers/object_detection/nms_prediction_decoder_test.py
+# Copyright 2022 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tensorflow as tf
+
+from keras_cv import layers as cv_layers
+
+
+class NmsPredictionDecoderTest(tf.test.TestCase):
+    def test_decode_predictions_output_shapes(self):
+        classes = 10
+        images_shape = (8, 512, 1024, 3)
+        predictions_shape = (8, 98208, 4 + classes)
+
+        images = tf.random.uniform(shape=images_shape)
+        predictions = tf.random.uniform(
+            shape=predictions_shape, minval=0.0, maxval=1.0, dtype=tf.float32
+        )
+        strides = [2**i for i in range(3, 8)]
+        scales = [2**x for x in [0, 1 / 3, 2 / 3]]
+        sizes = [x**2 for x in [32.0, 64.0, 128.0, 256.0, 512.0]]
+        aspect_ratios = [0.5, 1.0, 2.0]
+
+        anchor_generator = cv_layers.AnchorGenerator(
+            bounding_box_format="yxyx",
+            sizes=sizes,
+            aspect_ratios=aspect_ratios,
+            scales=scales,
+            strides=strides,
+        )
+        layer = cv_layers.NmsPredictionDecoder(
+            anchor_generator=anchor_generator,
+            classes=classes,
+            bounding_box_format="rel_xyxy",
+        )
+
+        result = layer(images=images, predictions=predictions)
+
+        self.assertEqual(result.shape, [8, None, 6])
--- a/Keras/keras-cv/keras_cv/layers/object_detection/non_max_suppression.py
+++ b/Keras/keras-cv/keras_cv/layers/object_detection/non_max_suppression.py
+# Copyright 2022 The KerasCV Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import tensorflow as tf
+
+from keras_cv import bounding_box
+
+
+@tf.keras.utils.register_keras_serializable(package="keras_cv")
+class NonMaxSuppression(tf.keras.layers.Layer):
+    """
+    Implements the non-max suppression layer.
+
+    Non-maximal suppression is used to suppress potentially repeated boxes by:
+    1) picking the highest ranked boxes
+    2) pruning away all boxes that have a high IoU with the chosen boxes.
+
+    References:
+        - [Yolo paper](https://arxiv.org/pdf/1506.02640)
+
+    Args:
+        classes: an integer representing the number of classes that a bounding
+            box can belong to.
+        bounding_box_format: a case-insensitive string which is one of `"xyxy"`,
+            `"rel_xyxy"`, `"xyWH"`, `"center_xyWH"`, `"yxyx"`, `"rel_yxyx"`. The
+            position and shape of the bounding box will be followed by the class and
+            confidence values (in that order). This is required for proper ranking of
+            the bounding boxes. Therefore, each bounding box is defined by 6 values.
+            For detailed information on the supported format, see the
+            [KerasCV bounding box documentation](https://keras.io/api/keras_cv/bounding_box/formats/).
+        confidence_threshold: a float value in the range [0, 1]. All boxes with
+            confidence below this value will be discarded. Defaults to 0.05.
+        iou_threshold: a float value in the range [0, 1] representing the minimum
+            IoU threshold for two boxes to be considered same for suppression. Defaults
+            to 0.5.
+        max_detections: the maximum detections to consider after nms is applied. A large
+            number may trigger significant memory overhead. Defaults to 100.
+        max_detections_per_class: the maximum detections to consider per class after
+            nms is applied. Defaults to 100.
+
+    Usage:
+    ```python
+    images = np.zeros((2, 480, 480, 3), dtype = np.float32)
+    ex_boxes = np.array([
+                            [
+                                [0, 0, 1, 1, 4, 0.9],
+                                [0, 0, 2, 3, 4, 0.76],
+                                [4, 5, 3, 6, 3, 0.89],
+                                [2, 2, 3, 3, 6, 0.04],
+                            ],
+                            [
+                                [0, 0, 5, 6, 4, 0.9],
+                                [0, 0, 7, 3, 1, 0.76],
+                                [4, 5, 5, 6, 4, 0.04],
+                                [2, 1, 3, 3, 7, 0.48],
+                            ],
+    ], dtype = np.float32)
+
+    nms = NonMaxSuppression(
+        classes=8,
+        bounding_box_format="center_xyWH",
+        iou_threshold=0.1
+    )
+
+    boxes = nms(boxes, images)
+    ```
+    """
+
+    def __init__(
+        self,
+        classes,
+        bounding_box_format,
+        confidence_threshold=0.05,
+        iou_threshold=0.5,
+        max_detections=100,
+        max_detections_per_class=100,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.classes = classes
+        self.bounding_box_format = bounding_box_format
+        self.confidence_threshold = confidence_threshold
+        self.iou_threshold = iou_threshold
+        self.max_detections = max_detections
+        self.max_detections_per_class = max_detections_per_class
+        self.built = True
+
+    def call(self, predictions, images=None):
+        if predictions.shape[-1] != 6:
+            raise ValueError(
+                "keras_cv.layers.NonMaxSuppression() expects `call()` "
+                "argument `predictions` to be of shape (None, None, 6).  Received "
+                f"predictions.shape={tuple(predictions.shape)}."
+            )
+
+        # convert to yxyx for the TF NMS operation
+        predictions = bounding_box.convert_format(
+            predictions,
+            source=self.bounding_box_format,
+            target="yxyx",
+            images=images,
+        )
+
+        # preparing the predictions for TF NMS op
+        boxes = tf.expand_dims(predictions[..., :4], axis=2)
+        class_predictions = tf.cast(predictions[..., 4], tf.int32)
+        scores = predictions[..., 5]
+
+        class_predictions = tf.one_hot(class_predictions, self.classes)
+        scores = tf.expand_dims(scores, axis=-1) * class_predictions
+
+        # applying the NMS operation
+        nmsed_boxes = tf.image.combined_non_max_suppression(
+            boxes,
+            scores,
+            self.max_detections_per_class,
+            self.max_detections,
+            self.iou_threshold,
+            self.confidence_threshold,
+            clip_boxes=False,
+        )
+
+        # output will be a ragged tensor because num_boxes will change across the batch
+        boxes = self._decode_nms_boxes_to_tensor(nmsed_boxes)
+        # converting all boxes to the original format
+        boxes = self._encode_to_ragged(boxes, nmsed_boxes.valid_detections)
+        return bounding_box.convert_format(
+            boxes,
+            source="yxyx",
+            target=self.bounding_box_format,
+            images=images,
+        )
+
+    def _decode_nms_boxes_to_tensor(self, nmsed_boxes):
+        boxes = tf.TensorArray(
+            tf.float32, size=0, infer_shape=False, element_shape=(6,), dynamic_size=True
+        )
+
+        for i in tf.range(tf.shape(nmsed_boxes.nmsed_boxes)[0]):
+            num_detections = nmsed_boxes.valid_detections[i]
+
+            # recombining with classes and scores
+            boxes_recombined = tf.concat(
+                [
+                    nmsed_boxes.nmsed_boxes[i][:num_detections],
+                    tf.expand_dims(
+                        nmsed_boxes.nmsed_classes[i][:num_detections], axis=-1
+                    ),
+                    tf.expand_dims(
+                        nmsed_boxes.nmsed_scores[i][:num_detections], axis=-1
+                    ),
+                ],
+                axis=-1,
+            )
+
+            # iterate through the boxes and append it to TensorArray
+            for j in range(nmsed_boxes.valid_detections[i]):
+                boxes = boxes.write(boxes.size(), boxes_recombined[j])
+
+        # stacking to create a tensor
+        return boxes.stack()
+
+    def _encode_to_ragged(self, boxes, valid_detections):
+        # using cumulative sum to calculate row_limits for ragged tensor
+        row_limits = tf.cumsum(valid_detections)
+        # creating the output RaggedTensor by splitting boxes at row_limits
+        result = tf.RaggedTensor.from_row_limits(values=boxes, row_limits=row_limits)
+        return result
+
+    def get_config(self):
+        config = {
+            "classes": self.classes,
+            "bounding_box_format": self.bounding_box_format,
+            "confidence_threshold": self.confidence_threshold,
+            "iou_threshold": self.iou_threshold,
+            "max_detections": self.max_detections,
+            "max_detections_per_class": self.max_detections_per_class,
+        }
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))