Commit bfd78461 authored by Sam Tsai's avatar Sam Tsai Committed by Facebook GitHub Bot
Browse files

registry and copy keys for extended coco load

Summary:
1. Add registry for coco injection to allow for easier overriding of cococ injections
2. Coco loading currently is limited to certain keys. Adding option to allow for copying certain keys from the outputs.

Reviewed By: zhanghang1989

Differential Revision: D33132517

fbshipit-source-id: 57ac4994a66f9c75457cada7e85fb15da4818f3e
parent f3a4a534
...@@ -28,6 +28,7 @@ def add_d2go_data_default_configs(_C): ...@@ -28,6 +28,7 @@ def add_d2go_data_default_configs(_C):
_C.D2GO_DATA.DATASETS.COCO_INJECTION.IM_DIRS = [] _C.D2GO_DATA.DATASETS.COCO_INJECTION.IM_DIRS = []
_C.D2GO_DATA.DATASETS.COCO_INJECTION.JSON_FILES = [] _C.D2GO_DATA.DATASETS.COCO_INJECTION.JSON_FILES = []
_C.D2GO_DATA.DATASETS.COCO_INJECTION.KEYPOINT_METADATA = [] _C.D2GO_DATA.DATASETS.COCO_INJECTION.KEYPOINT_METADATA = []
_C.D2GO_DATA.DATASETS.COCO_INJECTION.REGISTER_FUNCTION = "_register_extended_coco"
# On-the-fly register a list of datasets located under detectron2go/datasets # On-the-fly register a list of datasets located under detectron2go/datasets
# by specifying the filename (without .py). # by specifying the filename (without .py).
......
...@@ -10,6 +10,7 @@ import os ...@@ -10,6 +10,7 @@ import os
from d2go.utils.helper import get_dir_path from d2go.utils.helper import get_dir_path
from d2go.utils.misc import fb_overwritable from d2go.utils.misc import fb_overwritable
from detectron2.data import DatasetCatalog, MetadataCatalog from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.utils.registry import Registry
from .extended_coco import coco_text_load, extended_coco_load from .extended_coco import coco_text_load, extended_coco_load
from .extended_lvis import extended_lvis_load from .extended_lvis import extended_lvis_load
...@@ -22,6 +23,14 @@ D2GO_DATASETS_BASE_MODULE = "d2go.datasets" ...@@ -22,6 +23,14 @@ D2GO_DATASETS_BASE_MODULE = "d2go.datasets"
IM_DIR = "image_directory" IM_DIR = "image_directory"
ANN_FN = "annotation_file" ANN_FN = "annotation_file"
COCO_REGISTER_FUNCTION_REGISTRY = Registry("COCO_REGISTER_FUNCTION_REGISTRY")
COCO_REGISTER_FUNCTION_REGISTRY.__doc__ = "Registry - coco register function"
def get_coco_register_function(cfg):
name = cfg.D2GO_DATA.DATASETS.COCO_INJECTION.REGISTER_FUNCTION
return COCO_REGISTER_FUNCTION_REGISTRY.get(name)
def _import_dataset(module_name): def _import_dataset(module_name):
return importlib.import_module( return importlib.import_module(
...@@ -29,6 +38,7 @@ def _import_dataset(module_name): ...@@ -29,6 +38,7 @@ def _import_dataset(module_name):
) )
@COCO_REGISTER_FUNCTION_REGISTRY.register()
def _register_extended_coco(dataset_name, split_dict): def _register_extended_coco(dataset_name, split_dict):
json_file = split_dict[ANN_FN] json_file = split_dict[ANN_FN]
image_root = split_dict[IM_DIR] image_root = split_dict[IM_DIR]
...@@ -113,6 +123,7 @@ def inject_coco_datasets(cfg): ...@@ -113,6 +123,7 @@ def inject_coco_datasets(cfg):
im_dirs = cfg.D2GO_DATA.DATASETS.COCO_INJECTION.IM_DIRS im_dirs = cfg.D2GO_DATA.DATASETS.COCO_INJECTION.IM_DIRS
json_files = cfg.D2GO_DATA.DATASETS.COCO_INJECTION.JSON_FILES json_files = cfg.D2GO_DATA.DATASETS.COCO_INJECTION.JSON_FILES
metadata_type = cfg.D2GO_DATA.DATASETS.COCO_INJECTION.KEYPOINT_METADATA metadata_type = cfg.D2GO_DATA.DATASETS.COCO_INJECTION.KEYPOINT_METADATA
_register_func = get_coco_register_function(cfg)
assert len(names) == len(im_dirs) == len(json_files) assert len(names) == len(im_dirs) == len(json_files)
for ds_index, (name, im_dir, json_file) in enumerate( for ds_index, (name, im_dir, json_file) in enumerate(
...@@ -122,7 +133,7 @@ def inject_coco_datasets(cfg): ...@@ -122,7 +133,7 @@ def inject_coco_datasets(cfg):
if len(metadata_type) != 0: if len(metadata_type) != 0:
split_dict["meta_data"] = get_keypoint_metadata(metadata_type[ds_index]) split_dict["meta_data"] = get_keypoint_metadata(metadata_type[ds_index])
logger.info("Inject coco dataset {}: {}".format(name, split_dict)) logger.info("Inject coco dataset {}: {}".format(name, split_dict))
_register_extended_coco(name, split_dict) _register_func(name, split_dict)
def register_dataset_split(dataset_name, split_dict): def register_dataset_split(dataset_name, split_dict):
......
...@@ -7,6 +7,7 @@ import logging ...@@ -7,6 +7,7 @@ import logging
import shlex import shlex
import subprocess import subprocess
from collections import defaultdict from collections import defaultdict
from typing import Optional, List, Dict
import detectron2.utils.comm as comm import detectron2.utils.comm as comm
from detectron2.data import MetadataCatalog from detectron2.data import MetadataCatalog
...@@ -43,7 +44,7 @@ class InMemoryCOCO(COCO): ...@@ -43,7 +44,7 @@ class InMemoryCOCO(COCO):
self.createIndex() self.createIndex()
def extract_archive_file(archive_fn, im_dir): def extract_archive_file(archive_fn: str, im_dir: str):
if not os.path.exists(im_dir) or not os.listdir(im_dir): if not os.path.exists(im_dir) or not os.listdir(im_dir):
# Dataset is not deployed. Deploy it. # Dataset is not deployed. Deploy it.
archive_fns = archive_fn archive_fns = archive_fn
...@@ -71,8 +72,12 @@ def extract_archive_file(archive_fn, im_dir): ...@@ -71,8 +72,12 @@ def extract_archive_file(archive_fn, im_dir):
def convert_coco_text_to_coco_detection_json( def convert_coco_text_to_coco_detection_json(
source_json, target_json, set_type=None, min_img_size=100, text_cat_id=1 source_json: str,
): target_json: str,
set_type: Optional[str] = None,
min_img_size: int = 100,
text_cat_id: int = 1,
) -> Dict:
""" """
This function converts a COCOText style JSON to a COCODetection style This function converts a COCOText style JSON to a COCODetection style
JSON. JSON.
...@@ -125,7 +130,7 @@ def convert_coco_text_to_coco_detection_json( ...@@ -125,7 +130,7 @@ def convert_coco_text_to_coco_detection_json(
return coco_text_json return coco_text_json
def valid_bbox(bbox_xywh, img_w, img_h): def valid_bbox(bbox_xywh: List[int], img_w: int, img_h: int) -> bool:
if ( if (
bbox_xywh is None bbox_xywh is None
or (bbox_xywh[3] == 0 or bbox_xywh[2] == 0) or (bbox_xywh[3] == 0 or bbox_xywh[2] == 0)
...@@ -136,7 +141,14 @@ def valid_bbox(bbox_xywh, img_w, img_h): ...@@ -136,7 +141,14 @@ def valid_bbox(bbox_xywh, img_w, img_h):
return True return True
def convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name=None): def convert_to_dict_list(
image_root: str,
id_map: Dict,
imgs: Dict,
anns: Dict,
dataset_name: Optional[str] = None,
image_direct_copy_keys: List[str] = None,
) -> List[Dict]:
num_instances_without_valid_segmentation = 0 num_instances_without_valid_segmentation = 0
num_instances_without_valid_bounding_box = 0 num_instances_without_valid_bounding_box = 0
dataset_dicts = [] dataset_dicts = []
...@@ -160,6 +172,13 @@ def convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name=None): ...@@ -160,6 +172,13 @@ def convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name=None):
) )
record["file_name"] = img_dict["file_name"] record["file_name"] = img_dict["file_name"]
if image_direct_copy_keys:
for copy_key in image_direct_copy_keys:
assert (
copy_key in img_dict
), f"{copy_key} not in coco image dictionary entry"
record[copy_key] = img_dict[copy_key]
if "height" in img_dict or "width" in img_dict: if "height" in img_dict or "width" in img_dict:
record["height"] = img_dict["height"] record["height"] = img_dict["height"]
record["width"] = img_dict["width"] record["width"] = img_dict["width"]
...@@ -265,12 +284,12 @@ def convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name=None): ...@@ -265,12 +284,12 @@ def convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name=None):
def coco_text_load( def coco_text_load(
coco_json_file, coco_json_file: str,
image_root, image_root: str,
source_json_file=None, source_json_file: Optional[str] = None,
dataset_name=None, dataset_name: Optional[str] = None,
archive_file=None, archive_file: Optional[str] = None,
): ) -> List[Dict]:
if archive_file is not None: if archive_file is not None:
if comm.get_rank() == 0: if comm.get_rank() == 0:
extract_archive_file(archive_file, image_root) extract_archive_file(archive_file, image_root)
...@@ -288,7 +307,13 @@ def coco_text_load( ...@@ -288,7 +307,13 @@ def coco_text_load(
) )
def extended_coco_load(json_file, image_root, dataset_name=None, loaded_json=None): def extended_coco_load(
json_file: str,
image_root: str,
dataset_name: Optional[str] = None,
loaded_json: Optional[str] = None,
image_direct_copy_keys: List[str] = None,
) -> List[Dict]:
""" """
Load a json file with COCO's annotation format. Load a json file with COCO's annotation format.
Currently only supports instance segmentation annotations. Currently only supports instance segmentation annotations.
...@@ -352,7 +377,14 @@ def extended_coco_load(json_file, image_root, dataset_name=None, loaded_json=Non ...@@ -352,7 +377,14 @@ def extended_coco_load(json_file, image_root, dataset_name=None, loaded_json=Non
logger.info("Loaded {} images from {}".format(len(imgs), json_file)) logger.info("Loaded {} images from {}".format(len(imgs), json_file))
# Return the coco converted to record list # Return the coco converted to record list
return convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name) return convert_to_dict_list(
image_root,
id_map,
imgs,
anns,
dataset_name,
image_direct_copy_keys=image_direct_copy_keys,
)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
import copy import copy
import json import json
import os import os
import tempfile
import unittest import unittest
import d2go.data.extended_coco as extended_coco import d2go.data.extended_coco as extended_coco
...@@ -175,6 +176,29 @@ class TestD2GoDatasets(unittest.TestCase): ...@@ -175,6 +176,29 @@ class TestD2GoDatasets(unittest.TestCase):
self.assertEqual(dic["width"], 80) self.assertEqual(dic["width"], 80)
self.assertEqual(dic["height"], 60) self.assertEqual(dic["height"], 60)
@tempdir
def test_direct_copy_keys(self, tmp_dir):
image_dir, json_file = create_test_images_and_dataset_json(tmp_dir)
with tempfile.NamedTemporaryFile(prefix=tmp_dir, suffix=".json") as h_temp:
new_json_file = h_temp.name
with open(json_file, "r") as h_in:
ds = json.load(h_in)
for idx, x in enumerate(ds["images"]):
x["key1"] = idx
x["key2"] = idx
with open(new_json_file, "w") as h_out:
json.dump(ds, h_out)
loaded_ds = extended_coco.extended_coco_load(new_json_file, image_dir)
self.assertTrue("key1" not in loaded_ds[0])
self.assertTrue("key2" not in loaded_ds[0])
loaded_ds = extended_coco.extended_coco_load(
new_json_file, image_dir, image_direct_copy_keys=["key1"]
)
self.assertTrue("key1" in loaded_ds[0])
self.assertTrue("key2" not in loaded_ds[0])
@tempdir @tempdir
def test_sub_dataset(self, tmp_dir): def test_sub_dataset(self, tmp_dir):
image_dir, json_file = create_test_images_and_dataset_json(tmp_dir) image_dir, json_file = create_test_images_and_dataset_json(tmp_dir)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment