add enlarge bounging box manipulation

Summary: Add a bounding manipulation tool to padding bounding box data. Reviewed By: newstzpz Differential Revision: D28082071 fbshipit-source-id: f168cae48672c4fa5c4ec98697c57ed7833787ab

add enlarge bounging box manipulation
Summary: Add a bounding manipulation tool to padding bounding box data. Reviewed By: newstzpz Differential Revision: D28082071 fbshipit-source-id: f168cae48672c4fa5c4ec98697c57ed7833787ab
e1961ad4 · Sam Tsai · Facebook GitHub Bot · 477ab964 · e1961ad4 · e1961ad4
Commit e1961ad4 authored May 05, 2021 by Sam Tsai Committed by Facebook GitHub Bot May 05, 2021
Show whitespace changes
Inline Side-by-side

Showing with 105 additions and 1 deletion

d2go/data/transforms/box_utils.py d2go/data/transforms/box_utils.py +47 -1

tests/data/test_data_transforms_box_utils.py tests/data/test_data_transforms_box_utils.py +58 -0

No files found.
--- a/d2go/data/transforms/box_utils.py
+++ b/d2go/data/transforms/box_utils.py
 #!/usr/bin/env python3
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from typing import Tuple, List
+from typing import Tuple, List, Any, Union
+import detectron2.data.transforms.augmentation as aug
 import numpy as np
 import torch
+from detectron2.config import CfgNode
+from detectron2.data.transforms.transform import Transform
 from detectron2.structures.boxes import Boxes
+from .build import TRANSFORM_OP_REGISTRY, _json_load
 def get_box_union(boxes: Boxes):
    """ Merge all boxes into a single box """
@@ -129,3 +134,44 @@ def clip_box_xywh(bbox_xywh: torch.Tensor, image_size_hw: List[int]):
    bbox_xyxy[2] = min(bbox_xyxy[2], w)
    bbox_xyxy[3] = min(bbox_xyxy[3], h)
    return get_bbox_xywh_from_xyxy(bbox_xyxy)
+class EnlargeBoundingBox(Transform):
+    """ Enlarge bounding box based on fixed padding or percentage """
+    def __init__(self, percentage: float = None, fixed_pad: int = None):
+        super().__init__()
+        assert percentage is not None or fixed_pad is not None
+        assert percentage is None or fixed_pad is None
+        if percentage is not None:
+            def xfn(x, c):
+                return [((a - b) * percentage + a) for a, b in zip(x, c)]
+        elif fixed_pad is not None:
+            def xfn(x, c):
+                return [(np.sign(a - b) * fixed_pad + a) for a, b in zip(x, c)]
+        self.xfm_fn = xfn
+    def apply_image(self, img: torch.Tensor) -> np.ndarray:
+        return img
+    def apply_coords(self, coords: Any) -> Any:
+        assert coords.shape[1] == 2, "Supported 2d inputs only"
+        center = np.mean(coords, axis=0)
+        for index in range(coords.shape[0]):
+            coords[index] = self.xfm_fn(coords[index], center)
+        return coords
+@TRANSFORM_OP_REGISTRY.register()
+def EnlargeBoundingBoxOp(
+    cfg: CfgNode, arg_str: str, is_train: bool
+) -> List[Union[aug.Augmentation, Transform]]:
+    assert is_train
+    kwargs = _json_load(arg_str) if arg_str is not None else {}
+    assert isinstance(kwargs, dict)
+    return [EnlargeBoundingBox(**kwargs)]
--- a/tests/data/test_data_transforms_box_utils.py
+++ b/tests/data/test_data_transforms_box_utils.py
@@ -7,6 +7,15 @@ import unittest
 import d2go.data.transforms.box_utils as bu
 import numpy as np
 import torch
+from d2go.config import CfgNode
+from d2go.data.transforms.build import build_transform_gen
+def get_default_config():
+    cfg = CfgNode()
+    cfg.D2GO_DATA = CfgNode()
+    cfg.D2GO_DATA.AUG_OPS = CfgNode()
+    return cfg
 class TestDataTransformsBoxUtils(unittest.TestCase):
@@ -46,3 +55,52 @@ class TestDataTransformsBoxUtils(unittest.TestCase):
    def assertArrayEqual(self, a1, a2):
        self.assertTrue(np.array_equal(a1, a2))
+    def test_enlarge_bounding_box(self):
+        default_cfg = get_default_config()
+        default_cfg.D2GO_DATA.AUG_OPS.TRAIN = [
+            'EnlargeBoundingBoxOp::{"fixed_pad": 20}',
+            'EnlargeBoundingBoxOp::{"percentage": 0.2}',
+        ]
+        enlarge_box_tfm = build_transform_gen(default_cfg, is_train=True)
+        boxes = np.array(
+            [[91, 46, 144, 111]],
+            dtype=np.float64,
+        )
+        transformed_bboxs = enlarge_box_tfm[0].apply_box(boxes)
+        expected_bboxs = np.array(
+            [[71, 26, 164, 131]],
+            dtype=np.float64,
+        )
+        err_msg = "transformed_bbox = {}, expected {}".format(
+            transformed_bboxs, expected_bboxs
+        )
+        self.assertTrue(np.allclose(transformed_bboxs, expected_bboxs), err_msg)
+        boxes = np.array(
+            [[91, 46, 144, 111]],
+            dtype=np.float64,
+        )
+        transformed_bboxs = enlarge_box_tfm[1].apply_box(boxes)
+        expected_bboxs = np.array(
+            [[85.7, 39.5, 149.3, 117.5]],
+            dtype=np.float64,
+        )
+        err_msg = "transformed_bbox = {}, expected {}".format(
+            transformed_bboxs, expected_bboxs
+        )
+        self.assertTrue(np.allclose(transformed_bboxs, expected_bboxs), err_msg)
+        dummy_data = np.array(
+            [[91, 46, 144, 111]],
+            dtype=np.float64,
+        )
+        dummy_data_out = enlarge_box_tfm[1].apply_image(dummy_data)
+        expected_out = np.array(
+            [[91, 46, 144, 111]],
+            dtype=np.float64,
+        )
+        err_msg = "Apply image failed"
+        self.assertTrue(np.allclose(dummy_data_out, expected_out), err_msg)