add check/filter for invalid bounding boxes

Summary: Checks for invalid bounding boxes and removes from the being included. Reviewed By: wat3rBro Differential Revision: D28902711 fbshipit-source-id: 1f017d6ccf5c959059bcb94a09ddd81de868feed

add check/filter for invalid bounding boxes
Summary: Checks for invalid bounding boxes and removes from the being included. Reviewed By: wat3rBro Differential Revision: D28902711 fbshipit-source-id: 1f017d6ccf5c959059bcb94a09ddd81de868feed
692a4fb3 · Sam Tsai · Facebook GitHub Bot · 8cbe10d5 · 692a4fb3 · 692a4fb3
Commit 692a4fb3 authored Jun 15, 2021 by Sam Tsai Committed by Facebook GitHub Bot Jun 15, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 96 additions and 0 deletions

d2go/data/extended_coco.py d2go/data/extended_coco.py +28 -0

tests/data/test_d2go_datasets.py tests/data/test_d2go_datasets.py +68 -0

No files found.
--- a/d2go/data/extended_coco.py
+++ b/d2go/data/extended_coco.py
@@ -125,8 +125,20 @@ def convert_coco_text_to_coco_detection_json(
    return coco_text_json
+def valid_bbox(bbox_xywh, img_w, img_h):
+    if (
+        bbox_xywh is None
+        or (bbox_xywh[3] == 0 or bbox_xywh[2] == 0)
+        or not (0 <= bbox_xywh[0] <= img_w - bbox_xywh[2])
+        or not (0 <= bbox_xywh[1] <= img_h - bbox_xywh[3])
+    ):
+        return False
+    return True
 def convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name=None):
    num_instances_without_valid_segmentation = 0
+    num_instances_without_valid_bounding_box = 0
    dataset_dicts = []
    count_ignore_image_root_warning = 0
    for (img_dict, anno_dict_list) in zip(imgs, anns):
@@ -167,6 +179,11 @@ def convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name=None):
                if field in anno
            }
+            bbox_object = obj.get("bbox", None)
+            if not valid_bbox(bbox_object, record["width"], record["height"]):
+                num_instances_without_valid_bounding_box += 1
+                continue
            if obj.get("category_id", None) not in id_map:
                continue
@@ -190,6 +207,8 @@ def convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name=None):
                obj["category_id"] = id_map[obj["category_id"]]
            objs.append(obj)
        record["annotations"] = objs
+        if len(objs) == 0:
+            continue
        if dataset_name is not None:
            record["dataset_name"] = dataset_name
        dataset_dicts.append(record)
@@ -208,6 +227,15 @@ def convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name=None):
                num_instances_without_valid_segmentation
            )
        )
+    if num_instances_without_valid_bounding_box > 0:
+        logger.warning(
+            "Filtered out {} instances without valid bounding boxes. "
+            "There might be issues in your dataset generation process.".format(
+                num_instances_without_valid_bounding_box
+            )
+        )
    return dataset_dicts

--- a/tests/data/test_d2go_datasets.py
+++ b/tests/data/test_d2go_datasets.py
@@ -74,6 +74,74 @@ class TestD2GoDatasets(unittest.TestCase):
                self.assertEqual(out_json["images"][0]["id"], exp_output[0])
                self.assertEqual(out_json["annotations"][0]["image_id"], exp_output[1])
+    def test_annotation_rejection(self):
+        img_list = [
+            {"id": 0, "width": 50, "height": 50, "file_name": "a.png"},
+            {"id": 1, "width": 50, "height": 50, "file_name": "b.png"},
+        ]
+        ann_list = [
+            [
+                {
+                    "id": 0,
+                    "image_id": 0,
+                    "category_id": 0,
+                    "segmentation": [[0, 0, 10, 0, 10, 10, 0, 10]],
+                    "area": 100,
+                    "bbox": [0, 0, 10, 10],
+                },
+                {
+                    "id": 1,
+                    "image_id": 0,
+                    "category_id": 0,
+                    "segmentation": [[0, 0, 10, 0, 10, 10, 0, 10]],
+                    "area": 100,
+                    "bbox": [45, 45, 10, 10],
+                },
+                {
+                    "id": 2,
+                    "image_id": 0,
+                    "category_id": 0,
+                    "segmentation": [[0, 0, 10, 0, 10, 10, 0, 10]],
+                    "area": 100,
+                    "bbox": [-5, -5, 10, 10],
+                },
+                {
+                    "id": 3,
+                    "image_id": 0,
+                    "category_id": 0,
+                    "segmentation": [[0, 0, 10, 0, 10, 10, 0, 10]],
+                    "area": 0,
+                    "bbox": [5, 5, 0, 0],
+                },
+                {
+                    "id": 4,
+                    "image_id": 0,
+                    "category_id": 0,
+                    "segmentation": [[]],
+                    "area": 25,
+                    "bbox": [5, 5, 5, 5],
+                },
+            ],
+            [
+                {
+                    "id": 5,
+                    "image_id": 1,
+                    "category_id": 0,
+                    "segmentation": [[]],
+                    "area": 100,
+                    "bbox": [0, 0, 0, 0],
+                },
+            ]
+        ]
+        out_dict_list = extended_coco.convert_to_dict_list(
+            "",
+            [0],
+            img_list,
+            ann_list,
+        )
+        self.assertEqual(len(out_dict_list), 1)
    @tempdir
    def test_coco_injection(self, tmp_dir):
        image_dir, json_file = create_test_images_and_dataset_json(tmp_dir)