Commit 692a4fb3 authored by Sam Tsai's avatar Sam Tsai Committed by Facebook GitHub Bot
Browse files

add check/filter for invalid bounding boxes

Summary: Checks for invalid bounding boxes and removes from the being included.

Reviewed By: wat3rBro

Differential Revision: D28902711

fbshipit-source-id: 1f017d6ccf5c959059bcb94a09ddd81de868feed
parent 8cbe10d5
...@@ -125,8 +125,20 @@ def convert_coco_text_to_coco_detection_json( ...@@ -125,8 +125,20 @@ def convert_coco_text_to_coco_detection_json(
return coco_text_json return coco_text_json
def valid_bbox(bbox_xywh, img_w, img_h):
if (
bbox_xywh is None
or (bbox_xywh[3] == 0 or bbox_xywh[2] == 0)
or not (0 <= bbox_xywh[0] <= img_w - bbox_xywh[2])
or not (0 <= bbox_xywh[1] <= img_h - bbox_xywh[3])
):
return False
return True
def convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name=None): def convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name=None):
num_instances_without_valid_segmentation = 0 num_instances_without_valid_segmentation = 0
num_instances_without_valid_bounding_box = 0
dataset_dicts = [] dataset_dicts = []
count_ignore_image_root_warning = 0 count_ignore_image_root_warning = 0
for (img_dict, anno_dict_list) in zip(imgs, anns): for (img_dict, anno_dict_list) in zip(imgs, anns):
...@@ -167,6 +179,11 @@ def convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name=None): ...@@ -167,6 +179,11 @@ def convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name=None):
if field in anno if field in anno
} }
bbox_object = obj.get("bbox", None)
if not valid_bbox(bbox_object, record["width"], record["height"]):
num_instances_without_valid_bounding_box += 1
continue
if obj.get("category_id", None) not in id_map: if obj.get("category_id", None) not in id_map:
continue continue
...@@ -190,6 +207,8 @@ def convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name=None): ...@@ -190,6 +207,8 @@ def convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name=None):
obj["category_id"] = id_map[obj["category_id"]] obj["category_id"] = id_map[obj["category_id"]]
objs.append(obj) objs.append(obj)
record["annotations"] = objs record["annotations"] = objs
if len(objs) == 0:
continue
if dataset_name is not None: if dataset_name is not None:
record["dataset_name"] = dataset_name record["dataset_name"] = dataset_name
dataset_dicts.append(record) dataset_dicts.append(record)
...@@ -208,6 +227,15 @@ def convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name=None): ...@@ -208,6 +227,15 @@ def convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name=None):
num_instances_without_valid_segmentation num_instances_without_valid_segmentation
) )
) )
if num_instances_without_valid_bounding_box > 0:
logger.warning(
"Filtered out {} instances without valid bounding boxes. "
"There might be issues in your dataset generation process.".format(
num_instances_without_valid_bounding_box
)
)
return dataset_dicts return dataset_dicts
......
...@@ -74,6 +74,74 @@ class TestD2GoDatasets(unittest.TestCase): ...@@ -74,6 +74,74 @@ class TestD2GoDatasets(unittest.TestCase):
self.assertEqual(out_json["images"][0]["id"], exp_output[0]) self.assertEqual(out_json["images"][0]["id"], exp_output[0])
self.assertEqual(out_json["annotations"][0]["image_id"], exp_output[1]) self.assertEqual(out_json["annotations"][0]["image_id"], exp_output[1])
def test_annotation_rejection(self):
img_list = [
{"id": 0, "width": 50, "height": 50, "file_name": "a.png"},
{"id": 1, "width": 50, "height": 50, "file_name": "b.png"},
]
ann_list = [
[
{
"id": 0,
"image_id": 0,
"category_id": 0,
"segmentation": [[0, 0, 10, 0, 10, 10, 0, 10]],
"area": 100,
"bbox": [0, 0, 10, 10],
},
{
"id": 1,
"image_id": 0,
"category_id": 0,
"segmentation": [[0, 0, 10, 0, 10, 10, 0, 10]],
"area": 100,
"bbox": [45, 45, 10, 10],
},
{
"id": 2,
"image_id": 0,
"category_id": 0,
"segmentation": [[0, 0, 10, 0, 10, 10, 0, 10]],
"area": 100,
"bbox": [-5, -5, 10, 10],
},
{
"id": 3,
"image_id": 0,
"category_id": 0,
"segmentation": [[0, 0, 10, 0, 10, 10, 0, 10]],
"area": 0,
"bbox": [5, 5, 0, 0],
},
{
"id": 4,
"image_id": 0,
"category_id": 0,
"segmentation": [[]],
"area": 25,
"bbox": [5, 5, 5, 5],
},
],
[
{
"id": 5,
"image_id": 1,
"category_id": 0,
"segmentation": [[]],
"area": 100,
"bbox": [0, 0, 0, 0],
},
]
]
out_dict_list = extended_coco.convert_to_dict_list(
"",
[0],
img_list,
ann_list,
)
self.assertEqual(len(out_dict_list), 1)
@tempdir @tempdir
def test_coco_injection(self, tmp_dir): def test_coco_injection(self, tmp_dir):
image_dir, json_file = create_test_images_and_dataset_json(tmp_dir) image_dir, json_file = create_test_images_and_dataset_json(tmp_dir)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment