Commit 3144257c authored by mashun1's avatar mashun1
Browse files

catvton

parents
# Copyright (c) Facebook, Inc. and its affiliates.
from .compat import downgrade_config, upgrade_config
from .config import CfgNode, get_cfg, global_cfg, set_global_cfg, configurable
from .instantiate import instantiate
from .lazy import LazyCall, LazyConfig
__all__ = [
"CfgNode",
"get_cfg",
"global_cfg",
"set_global_cfg",
"downgrade_config",
"upgrade_config",
"configurable",
"instantiate",
"LazyCall",
"LazyConfig",
]
from detectron2.utils.env import fixup_module_metadata
fixup_module_metadata(__name__, globals(), __all__)
del fixup_module_metadata
# Copyright (c) Facebook, Inc. and its affiliates.
"""
Backward compatibility of configs.
Instructions to bump version:
+ It's not needed to bump version if new keys are added.
It's only needed when backward-incompatible changes happen
(i.e., some existing keys disappear, or the meaning of a key changes)
+ To bump version, do the following:
1. Increment _C.VERSION in defaults.py
2. Add a converter in this file.
Each ConverterVX has a function "upgrade" which in-place upgrades config from X-1 to X,
and a function "downgrade" which in-place downgrades config from X to X-1
In each function, VERSION is left unchanged.
Each converter assumes that its input has the relevant keys
(i.e., the input is not a partial config).
3. Run the tests (test_config.py) to make sure the upgrade & downgrade
functions are consistent.
"""
import logging
from typing import List, Optional, Tuple
from .config import CfgNode as CN
from .defaults import _C
__all__ = ["upgrade_config", "downgrade_config"]
def upgrade_config(cfg: CN, to_version: Optional[int] = None) -> CN:
"""
Upgrade a config from its current version to a newer version.
Args:
cfg (CfgNode):
to_version (int): defaults to the latest version.
"""
cfg = cfg.clone()
if to_version is None:
to_version = _C.VERSION
assert cfg.VERSION <= to_version, "Cannot upgrade from v{} to v{}!".format(
cfg.VERSION, to_version
)
for k in range(cfg.VERSION, to_version):
converter = globals()["ConverterV" + str(k + 1)]
converter.upgrade(cfg)
cfg.VERSION = k + 1
return cfg
def downgrade_config(cfg: CN, to_version: int) -> CN:
"""
Downgrade a config from its current version to an older version.
Args:
cfg (CfgNode):
to_version (int):
Note:
A general downgrade of arbitrary configs is not always possible due to the
different functionalities in different versions.
The purpose of downgrade is only to recover the defaults in old versions,
allowing it to load an old partial yaml config.
Therefore, the implementation only needs to fill in the default values
in the old version when a general downgrade is not possible.
"""
cfg = cfg.clone()
assert cfg.VERSION >= to_version, "Cannot downgrade from v{} to v{}!".format(
cfg.VERSION, to_version
)
for k in range(cfg.VERSION, to_version, -1):
converter = globals()["ConverterV" + str(k)]
converter.downgrade(cfg)
cfg.VERSION = k - 1
return cfg
def guess_version(cfg: CN, filename: str) -> int:
"""
Guess the version of a partial config where the VERSION field is not specified.
Returns the version, or the latest if cannot make a guess.
This makes it easier for users to migrate.
"""
logger = logging.getLogger(__name__)
def _has(name: str) -> bool:
cur = cfg
for n in name.split("."):
if n not in cur:
return False
cur = cur[n]
return True
# Most users' partial configs have "MODEL.WEIGHT", so guess on it
ret = None
if _has("MODEL.WEIGHT") or _has("TEST.AUG_ON"):
ret = 1
if ret is not None:
logger.warning("Config '{}' has no VERSION. Assuming it to be v{}.".format(filename, ret))
else:
ret = _C.VERSION
logger.warning(
"Config '{}' has no VERSION. Assuming it to be compatible with latest v{}.".format(
filename, ret
)
)
return ret
def _rename(cfg: CN, old: str, new: str) -> None:
old_keys = old.split(".")
new_keys = new.split(".")
def _set(key_seq: List[str], val: str) -> None:
cur = cfg
for k in key_seq[:-1]:
if k not in cur:
cur[k] = CN()
cur = cur[k]
cur[key_seq[-1]] = val
def _get(key_seq: List[str]) -> CN:
cur = cfg
for k in key_seq:
cur = cur[k]
return cur
def _del(key_seq: List[str]) -> None:
cur = cfg
for k in key_seq[:-1]:
cur = cur[k]
del cur[key_seq[-1]]
if len(cur) == 0 and len(key_seq) > 1:
_del(key_seq[:-1])
_set(new_keys, _get(old_keys))
_del(old_keys)
class _RenameConverter:
"""
A converter that handles simple rename.
"""
RENAME: List[Tuple[str, str]] = [] # list of tuples of (old name, new name)
@classmethod
def upgrade(cls, cfg: CN) -> None:
for old, new in cls.RENAME:
_rename(cfg, old, new)
@classmethod
def downgrade(cls, cfg: CN) -> None:
for old, new in cls.RENAME[::-1]:
_rename(cfg, new, old)
class ConverterV1(_RenameConverter):
RENAME = [("MODEL.RPN_HEAD.NAME", "MODEL.RPN.HEAD_NAME")]
class ConverterV2(_RenameConverter):
"""
A large bulk of rename, before public release.
"""
RENAME = [
("MODEL.WEIGHT", "MODEL.WEIGHTS"),
("MODEL.PANOPTIC_FPN.SEMANTIC_LOSS_SCALE", "MODEL.SEM_SEG_HEAD.LOSS_WEIGHT"),
("MODEL.PANOPTIC_FPN.RPN_LOSS_SCALE", "MODEL.RPN.LOSS_WEIGHT"),
("MODEL.PANOPTIC_FPN.INSTANCE_LOSS_SCALE", "MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT"),
("MODEL.PANOPTIC_FPN.COMBINE_ON", "MODEL.PANOPTIC_FPN.COMBINE.ENABLED"),
(
"MODEL.PANOPTIC_FPN.COMBINE_OVERLAP_THRESHOLD",
"MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH",
),
(
"MODEL.PANOPTIC_FPN.COMBINE_STUFF_AREA_LIMIT",
"MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT",
),
(
"MODEL.PANOPTIC_FPN.COMBINE_INSTANCES_CONFIDENCE_THRESHOLD",
"MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH",
),
("MODEL.ROI_HEADS.SCORE_THRESH", "MODEL.ROI_HEADS.SCORE_THRESH_TEST"),
("MODEL.ROI_HEADS.NMS", "MODEL.ROI_HEADS.NMS_THRESH_TEST"),
("MODEL.RETINANET.INFERENCE_SCORE_THRESHOLD", "MODEL.RETINANET.SCORE_THRESH_TEST"),
("MODEL.RETINANET.INFERENCE_TOPK_CANDIDATES", "MODEL.RETINANET.TOPK_CANDIDATES_TEST"),
("MODEL.RETINANET.INFERENCE_NMS_THRESHOLD", "MODEL.RETINANET.NMS_THRESH_TEST"),
("TEST.DETECTIONS_PER_IMG", "TEST.DETECTIONS_PER_IMAGE"),
("TEST.AUG_ON", "TEST.AUG.ENABLED"),
("TEST.AUG_MIN_SIZES", "TEST.AUG.MIN_SIZES"),
("TEST.AUG_MAX_SIZE", "TEST.AUG.MAX_SIZE"),
("TEST.AUG_FLIP", "TEST.AUG.FLIP"),
]
@classmethod
def upgrade(cls, cfg: CN) -> None:
super().upgrade(cfg)
if cfg.MODEL.META_ARCHITECTURE == "RetinaNet":
_rename(
cfg, "MODEL.RETINANET.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS"
)
_rename(cfg, "MODEL.RETINANET.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES")
del cfg["MODEL"]["RPN"]["ANCHOR_SIZES"]
del cfg["MODEL"]["RPN"]["ANCHOR_ASPECT_RATIOS"]
else:
_rename(cfg, "MODEL.RPN.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS")
_rename(cfg, "MODEL.RPN.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES")
del cfg["MODEL"]["RETINANET"]["ANCHOR_SIZES"]
del cfg["MODEL"]["RETINANET"]["ANCHOR_ASPECT_RATIOS"]
del cfg["MODEL"]["RETINANET"]["ANCHOR_STRIDES"]
@classmethod
def downgrade(cls, cfg: CN) -> None:
super().downgrade(cfg)
_rename(cfg, "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS", "MODEL.RPN.ANCHOR_ASPECT_RATIOS")
_rename(cfg, "MODEL.ANCHOR_GENERATOR.SIZES", "MODEL.RPN.ANCHOR_SIZES")
cfg.MODEL.RETINANET.ANCHOR_ASPECT_RATIOS = cfg.MODEL.RPN.ANCHOR_ASPECT_RATIOS
cfg.MODEL.RETINANET.ANCHOR_SIZES = cfg.MODEL.RPN.ANCHOR_SIZES
cfg.MODEL.RETINANET.ANCHOR_STRIDES = [] # this is not used anywhere in any version
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates.
import functools
import inspect
import logging
from fvcore.common.config import CfgNode as _CfgNode
from detectron2.utils.file_io import PathManager
class CfgNode(_CfgNode):
"""
The same as `fvcore.common.config.CfgNode`, but different in:
1. Use unsafe yaml loading by default.
Note that this may lead to arbitrary code execution: you must not
load a config file from untrusted sources before manually inspecting
the content of the file.
2. Support config versioning.
When attempting to merge an old config, it will convert the old config automatically.
.. automethod:: clone
.. automethod:: freeze
.. automethod:: defrost
.. automethod:: is_frozen
.. automethod:: load_yaml_with_base
.. automethod:: merge_from_list
.. automethod:: merge_from_other_cfg
"""
@classmethod
def _open_cfg(cls, filename):
return PathManager.open(filename, "r")
# Note that the default value of allow_unsafe is changed to True
def merge_from_file(self, cfg_filename: str, allow_unsafe: bool = True) -> None:
"""
Load content from the given config file and merge it into self.
Args:
cfg_filename: config filename
allow_unsafe: allow unsafe yaml syntax
"""
assert PathManager.isfile(cfg_filename), f"Config file '{cfg_filename}' does not exist!"
loaded_cfg = self.load_yaml_with_base(cfg_filename, allow_unsafe=allow_unsafe)
loaded_cfg = type(self)(loaded_cfg)
# defaults.py needs to import CfgNode
from .defaults import _C
latest_ver = _C.VERSION
assert (
latest_ver == self.VERSION
), "CfgNode.merge_from_file is only allowed on a config object of latest version!"
logger = logging.getLogger(__name__)
loaded_ver = loaded_cfg.get("VERSION", None)
if loaded_ver is None:
from .compat import guess_version
loaded_ver = guess_version(loaded_cfg, cfg_filename)
assert loaded_ver <= self.VERSION, "Cannot merge a v{} config into a v{} config.".format(
loaded_ver, self.VERSION
)
if loaded_ver == self.VERSION:
self.merge_from_other_cfg(loaded_cfg)
else:
# compat.py needs to import CfgNode
from .compat import upgrade_config, downgrade_config
logger.warning(
"Loading an old v{} config file '{}' by automatically upgrading to v{}. "
"See docs/CHANGELOG.md for instructions to update your files.".format(
loaded_ver, cfg_filename, self.VERSION
)
)
# To convert, first obtain a full config at an old version
old_self = downgrade_config(self, to_version=loaded_ver)
old_self.merge_from_other_cfg(loaded_cfg)
new_config = upgrade_config(old_self)
self.clear()
self.update(new_config)
def dump(self, *args, **kwargs):
"""
Returns:
str: a yaml string representation of the config
"""
# to make it show up in docs
return super().dump(*args, **kwargs)
global_cfg = CfgNode()
def get_cfg() -> CfgNode:
"""
Get a copy of the default config.
Returns:
a detectron2 CfgNode instance.
"""
from .defaults import _C
return _C.clone()
def set_global_cfg(cfg: CfgNode) -> None:
"""
Let the global config point to the given cfg.
Assume that the given "cfg" has the key "KEY", after calling
`set_global_cfg(cfg)`, the key can be accessed by:
::
from detectron2.config import global_cfg
print(global_cfg.KEY)
By using a hacky global config, you can access these configs anywhere,
without having to pass the config object or the values deep into the code.
This is a hacky feature introduced for quick prototyping / research exploration.
"""
global global_cfg
global_cfg.clear()
global_cfg.update(cfg)
def configurable(init_func=None, *, from_config=None):
"""
Decorate a function or a class's __init__ method so that it can be called
with a :class:`CfgNode` object using a :func:`from_config` function that translates
:class:`CfgNode` to arguments.
Examples:
::
# Usage 1: Decorator on __init__:
class A:
@configurable
def __init__(self, a, b=2, c=3):
pass
@classmethod
def from_config(cls, cfg): # 'cfg' must be the first argument
# Returns kwargs to be passed to __init__
return {"a": cfg.A, "b": cfg.B}
a1 = A(a=1, b=2) # regular construction
a2 = A(cfg) # construct with a cfg
a3 = A(cfg, b=3, c=4) # construct with extra overwrite
# Usage 2: Decorator on any function. Needs an extra from_config argument:
@configurable(from_config=lambda cfg: {"a: cfg.A, "b": cfg.B})
def a_func(a, b=2, c=3):
pass
a1 = a_func(a=1, b=2) # regular call
a2 = a_func(cfg) # call with a cfg
a3 = a_func(cfg, b=3, c=4) # call with extra overwrite
Args:
init_func (callable): a class's ``__init__`` method in usage 1. The
class must have a ``from_config`` classmethod which takes `cfg` as
the first argument.
from_config (callable): the from_config function in usage 2. It must take `cfg`
as its first argument.
"""
if init_func is not None:
assert (
inspect.isfunction(init_func)
and from_config is None
and init_func.__name__ == "__init__"
), "Incorrect use of @configurable. Check API documentation for examples."
@functools.wraps(init_func)
def wrapped(self, *args, **kwargs):
try:
from_config_func = type(self).from_config
except AttributeError as e:
raise AttributeError(
"Class with @configurable must have a 'from_config' classmethod."
) from e
if not inspect.ismethod(from_config_func):
raise TypeError("Class with @configurable must have a 'from_config' classmethod.")
if _called_with_cfg(*args, **kwargs):
explicit_args = _get_args_from_config(from_config_func, *args, **kwargs)
init_func(self, **explicit_args)
else:
init_func(self, *args, **kwargs)
return wrapped
else:
if from_config is None:
return configurable # @configurable() is made equivalent to @configurable
assert inspect.isfunction(
from_config
), "from_config argument of configurable must be a function!"
def wrapper(orig_func):
@functools.wraps(orig_func)
def wrapped(*args, **kwargs):
if _called_with_cfg(*args, **kwargs):
explicit_args = _get_args_from_config(from_config, *args, **kwargs)
return orig_func(**explicit_args)
else:
return orig_func(*args, **kwargs)
wrapped.from_config = from_config
return wrapped
return wrapper
def _get_args_from_config(from_config_func, *args, **kwargs):
"""
Use `from_config` to obtain explicit arguments.
Returns:
dict: arguments to be used for cls.__init__
"""
signature = inspect.signature(from_config_func)
if list(signature.parameters.keys())[0] != "cfg":
if inspect.isfunction(from_config_func):
name = from_config_func.__name__
else:
name = f"{from_config_func.__self__}.from_config"
raise TypeError(f"{name} must take 'cfg' as the first argument!")
support_var_arg = any(
param.kind in [param.VAR_POSITIONAL, param.VAR_KEYWORD]
for param in signature.parameters.values()
)
if support_var_arg: # forward all arguments to from_config, if from_config accepts them
ret = from_config_func(*args, **kwargs)
else:
# forward supported arguments to from_config
supported_arg_names = set(signature.parameters.keys())
extra_kwargs = {}
for name in list(kwargs.keys()):
if name not in supported_arg_names:
extra_kwargs[name] = kwargs.pop(name)
ret = from_config_func(*args, **kwargs)
# forward the other arguments to __init__
ret.update(extra_kwargs)
return ret
def _called_with_cfg(*args, **kwargs):
"""
Returns:
bool: whether the arguments contain CfgNode and should be considered
forwarded to from_config.
"""
from omegaconf import DictConfig
if len(args) and isinstance(args[0], (_CfgNode, DictConfig)):
return True
if isinstance(kwargs.pop("cfg", None), (_CfgNode, DictConfig)):
return True
# `from_config`'s first argument is forced to be "cfg".
# So the above check covers all cases.
return False
# Copyright (c) Facebook, Inc. and its affiliates.
from .config import CfgNode as CN
# NOTE: given the new config system
# (https://detectron2.readthedocs.io/en/latest/tutorials/lazyconfigs.html),
# we will stop adding new functionalities to default CfgNode.
# -----------------------------------------------------------------------------
# Convention about Training / Test specific parameters
# -----------------------------------------------------------------------------
# Whenever an argument can be either used for training or for testing, the
# corresponding name will be post-fixed by a _TRAIN for a training parameter,
# or _TEST for a test-specific parameter.
# For example, the number of images during training will be
# IMAGES_PER_BATCH_TRAIN, while the number of images for testing will be
# IMAGES_PER_BATCH_TEST
# -----------------------------------------------------------------------------
# Config definition
# -----------------------------------------------------------------------------
_C = CN()
# The version number, to upgrade from old configs to new ones if any
# changes happen. It's recommended to keep a VERSION in your config file.
_C.VERSION = 2
_C.MODEL = CN()
_C.MODEL.LOAD_PROPOSALS = False
_C.MODEL.MASK_ON = False
_C.MODEL.KEYPOINT_ON = False
_C.MODEL.DEVICE = "cuda"
_C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN"
# Path (a file path, or URL like detectron2://.., https://..) to a checkpoint file
# to be loaded to the model. You can find available models in the model zoo.
_C.MODEL.WEIGHTS = ""
# Values to be used for image normalization (BGR order, since INPUT.FORMAT defaults to BGR).
# To train on images of different number of channels, just set different mean & std.
# Default values are the mean pixel value from ImageNet: [103.53, 116.28, 123.675]
_C.MODEL.PIXEL_MEAN = [103.530, 116.280, 123.675]
# When using pre-trained models in Detectron1 or any MSRA models,
# std has been absorbed into its conv1 weights, so the std needs to be set 1.
# Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std)
_C.MODEL.PIXEL_STD = [1.0, 1.0, 1.0]
# -----------------------------------------------------------------------------
# INPUT
# -----------------------------------------------------------------------------
_C.INPUT = CN()
# By default, {MIN,MAX}_SIZE options are used in transforms.ResizeShortestEdge.
# Please refer to ResizeShortestEdge for detailed definition.
# Size of the smallest side of the image during training
_C.INPUT.MIN_SIZE_TRAIN = (800,)
# Sample size of smallest side by choice or random selection from range give by
# INPUT.MIN_SIZE_TRAIN
_C.INPUT.MIN_SIZE_TRAIN_SAMPLING = "choice"
# Maximum size of the side of the image during training
_C.INPUT.MAX_SIZE_TRAIN = 1333
# Size of the smallest side of the image during testing. Set to zero to disable resize in testing.
_C.INPUT.MIN_SIZE_TEST = 800
# Maximum size of the side of the image during testing
_C.INPUT.MAX_SIZE_TEST = 1333
# Mode for flipping images used in data augmentation during training
# choose one of ["horizontal, "vertical", "none"]
_C.INPUT.RANDOM_FLIP = "horizontal"
# `True` if cropping is used for data augmentation during training
_C.INPUT.CROP = CN({"ENABLED": False})
# Cropping type. See documentation of `detectron2.data.transforms.RandomCrop` for explanation.
_C.INPUT.CROP.TYPE = "relative_range"
# Size of crop in range (0, 1] if CROP.TYPE is "relative" or "relative_range" and in number of
# pixels if CROP.TYPE is "absolute"
_C.INPUT.CROP.SIZE = [0.9, 0.9]
# Whether the model needs RGB, YUV, HSV etc.
# Should be one of the modes defined here, as we use PIL to read the image:
# https://pillow.readthedocs.io/en/stable/handbook/concepts.html#concept-modes
# with BGR being the one exception. One can set image format to BGR, we will
# internally use RGB for conversion and flip the channels over
_C.INPUT.FORMAT = "BGR"
# The ground truth mask format that the model will use.
# Mask R-CNN supports either "polygon" or "bitmask" as ground truth.
_C.INPUT.MASK_FORMAT = "polygon" # alternative: "bitmask"
# -----------------------------------------------------------------------------
# Dataset
# -----------------------------------------------------------------------------
_C.DATASETS = CN()
# List of the dataset names for training. Must be registered in DatasetCatalog
# Samples from these datasets will be merged and used as one dataset.
_C.DATASETS.TRAIN = ()
# List of the pre-computed proposal files for training, which must be consistent
# with datasets listed in DATASETS.TRAIN.
_C.DATASETS.PROPOSAL_FILES_TRAIN = ()
# Number of top scoring precomputed proposals to keep for training
_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN = 2000
# List of the dataset names for testing. Must be registered in DatasetCatalog
_C.DATASETS.TEST = ()
# List of the pre-computed proposal files for test, which must be consistent
# with datasets listed in DATASETS.TEST.
_C.DATASETS.PROPOSAL_FILES_TEST = ()
# Number of top scoring precomputed proposals to keep for test
_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST = 1000
# -----------------------------------------------------------------------------
# DataLoader
# -----------------------------------------------------------------------------
_C.DATALOADER = CN()
# Number of data loading threads
_C.DATALOADER.NUM_WORKERS = 4
# If True, each batch should contain only images for which the aspect ratio
# is compatible. This groups portrait images together, and landscape images
# are not batched with portrait images.
_C.DATALOADER.ASPECT_RATIO_GROUPING = True
# Options: TrainingSampler, RepeatFactorTrainingSampler
_C.DATALOADER.SAMPLER_TRAIN = "TrainingSampler"
# Repeat threshold for RepeatFactorTrainingSampler
_C.DATALOADER.REPEAT_THRESHOLD = 0.0
# if True, take square root when computing repeating factor
_C.DATALOADER.REPEAT_SQRT = True
# Tf True, when working on datasets that have instance annotations, the
# training dataloader will filter out images without associated annotations
_C.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True
# ---------------------------------------------------------------------------- #
# Backbone options
# ---------------------------------------------------------------------------- #
_C.MODEL.BACKBONE = CN()
_C.MODEL.BACKBONE.NAME = "build_resnet_backbone"
# Freeze the first several stages so they are not trained.
# There are 5 stages in ResNet. The first is a convolution, and the following
# stages are each group of residual blocks.
_C.MODEL.BACKBONE.FREEZE_AT = 2
# ---------------------------------------------------------------------------- #
# FPN options
# ---------------------------------------------------------------------------- #
_C.MODEL.FPN = CN()
# Names of the input feature maps to be used by FPN
# They must have contiguous power of 2 strides
# e.g., ["res2", "res3", "res4", "res5"]
_C.MODEL.FPN.IN_FEATURES = []
_C.MODEL.FPN.OUT_CHANNELS = 256
# Options: "" (no norm), "GN"
_C.MODEL.FPN.NORM = ""
# Types for fusing the FPN top-down and lateral features. Can be either "sum" or "avg"
_C.MODEL.FPN.FUSE_TYPE = "sum"
# ---------------------------------------------------------------------------- #
# Proposal generator options
# ---------------------------------------------------------------------------- #
_C.MODEL.PROPOSAL_GENERATOR = CN()
# Current proposal generators include "RPN", "RRPN" and "PrecomputedProposals"
_C.MODEL.PROPOSAL_GENERATOR.NAME = "RPN"
# Proposal height and width both need to be greater than MIN_SIZE
# (a the scale used during training or inference)
_C.MODEL.PROPOSAL_GENERATOR.MIN_SIZE = 0
# ---------------------------------------------------------------------------- #
# Anchor generator options
# ---------------------------------------------------------------------------- #
_C.MODEL.ANCHOR_GENERATOR = CN()
# The generator can be any name in the ANCHOR_GENERATOR registry
_C.MODEL.ANCHOR_GENERATOR.NAME = "DefaultAnchorGenerator"
# Anchor sizes (i.e. sqrt of area) in absolute pixels w.r.t. the network input.
# Format: list[list[float]]. SIZES[i] specifies the list of sizes to use for
# IN_FEATURES[i]; len(SIZES) must be equal to len(IN_FEATURES) or 1.
# When len(SIZES) == 1, SIZES[0] is used for all IN_FEATURES.
_C.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64, 128, 256, 512]]
# Anchor aspect ratios. For each area given in `SIZES`, anchors with different aspect
# ratios are generated by an anchor generator.
# Format: list[list[float]]. ASPECT_RATIOS[i] specifies the list of aspect ratios (H/W)
# to use for IN_FEATURES[i]; len(ASPECT_RATIOS) == len(IN_FEATURES) must be true,
# or len(ASPECT_RATIOS) == 1 is true and aspect ratio list ASPECT_RATIOS[0] is used
# for all IN_FEATURES.
_C.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]]
# Anchor angles.
# list[list[float]], the angle in degrees, for each input feature map.
# ANGLES[i] specifies the list of angles for IN_FEATURES[i].
_C.MODEL.ANCHOR_GENERATOR.ANGLES = [[-90, 0, 90]]
# Relative offset between the center of the first anchor and the top-left corner of the image
# Value has to be in [0, 1). Recommend to use 0.5, which means half stride.
# The value is not expected to affect model accuracy.
_C.MODEL.ANCHOR_GENERATOR.OFFSET = 0.0
# ---------------------------------------------------------------------------- #
# RPN options
# ---------------------------------------------------------------------------- #
_C.MODEL.RPN = CN()
_C.MODEL.RPN.HEAD_NAME = "StandardRPNHead" # used by RPN_HEAD_REGISTRY
# Names of the input feature maps to be used by RPN
# e.g., ["p2", "p3", "p4", "p5", "p6"] for FPN
_C.MODEL.RPN.IN_FEATURES = ["res4"]
# Remove RPN anchors that go outside the image by BOUNDARY_THRESH pixels
# Set to -1 or a large value, e.g. 100000, to disable pruning anchors
_C.MODEL.RPN.BOUNDARY_THRESH = -1
# IOU overlap ratios [BG_IOU_THRESHOLD, FG_IOU_THRESHOLD]
# Minimum overlap required between an anchor and ground-truth box for the
# (anchor, gt box) pair to be a positive example (IoU >= FG_IOU_THRESHOLD
# ==> positive RPN example: 1)
# Maximum overlap allowed between an anchor and ground-truth box for the
# (anchor, gt box) pair to be a negative examples (IoU < BG_IOU_THRESHOLD
# ==> negative RPN example: 0)
# Anchors with overlap in between (BG_IOU_THRESHOLD <= IoU < FG_IOU_THRESHOLD)
# are ignored (-1)
_C.MODEL.RPN.IOU_THRESHOLDS = [0.3, 0.7]
_C.MODEL.RPN.IOU_LABELS = [0, -1, 1]
# Number of regions per image used to train RPN
_C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256
# Target fraction of foreground (positive) examples per RPN minibatch
_C.MODEL.RPN.POSITIVE_FRACTION = 0.5
# Options are: "smooth_l1", "giou", "diou", "ciou"
_C.MODEL.RPN.BBOX_REG_LOSS_TYPE = "smooth_l1"
_C.MODEL.RPN.BBOX_REG_LOSS_WEIGHT = 1.0
# Weights on (dx, dy, dw, dh) for normalizing RPN anchor regression targets
_C.MODEL.RPN.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1.
_C.MODEL.RPN.SMOOTH_L1_BETA = 0.0
_C.MODEL.RPN.LOSS_WEIGHT = 1.0
# Number of top scoring RPN proposals to keep before applying NMS
# When FPN is used, this is *per FPN level* (not total)
_C.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 12000
_C.MODEL.RPN.PRE_NMS_TOPK_TEST = 6000
# Number of top scoring RPN proposals to keep after applying NMS
# When FPN is used, this limit is applied per level and then again to the union
# of proposals from all levels
# NOTE: When FPN is used, the meaning of this config is different from Detectron1.
# It means per-batch topk in Detectron1, but per-image topk here.
# See the "find_top_rpn_proposals" function for details.
_C.MODEL.RPN.POST_NMS_TOPK_TRAIN = 2000
_C.MODEL.RPN.POST_NMS_TOPK_TEST = 1000
# NMS threshold used on RPN proposals
_C.MODEL.RPN.NMS_THRESH = 0.7
# Set this to -1 to use the same number of output channels as input channels.
_C.MODEL.RPN.CONV_DIMS = [-1]
# ---------------------------------------------------------------------------- #
# ROI HEADS options
# ---------------------------------------------------------------------------- #
_C.MODEL.ROI_HEADS = CN()
_C.MODEL.ROI_HEADS.NAME = "Res5ROIHeads"
# Number of foreground classes
_C.MODEL.ROI_HEADS.NUM_CLASSES = 80
# Names of the input feature maps to be used by ROI heads
# Currently all heads (box, mask, ...) use the same input feature map list
# e.g., ["p2", "p3", "p4", "p5"] is commonly used for FPN
_C.MODEL.ROI_HEADS.IN_FEATURES = ["res4"]
# IOU overlap ratios [IOU_THRESHOLD]
# Overlap threshold for an RoI to be considered background (if < IOU_THRESHOLD)
# Overlap threshold for an RoI to be considered foreground (if >= IOU_THRESHOLD)
_C.MODEL.ROI_HEADS.IOU_THRESHOLDS = [0.5]
_C.MODEL.ROI_HEADS.IOU_LABELS = [0, 1]
# RoI minibatch size *per image* (number of regions of interest [ROIs]) during training
# Total number of RoIs per training minibatch =
# ROI_HEADS.BATCH_SIZE_PER_IMAGE * SOLVER.IMS_PER_BATCH
# E.g., a common configuration is: 512 * 16 = 8192
_C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
# Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0)
_C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25
# Only used on test mode
# Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to
# balance obtaining high recall with not having too many low precision
# detections that will slow down inference post processing steps (like NMS)
# A default threshold of 0.0 increases AP by ~0.2-0.3 but significantly slows down
# inference.
_C.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05
# Overlap threshold used for non-maximum suppression (suppress boxes with
# IoU >= this threshold)
_C.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5
# If True, augment proposals with ground-truth boxes before sampling proposals to
# train ROI heads.
_C.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT = True
# ---------------------------------------------------------------------------- #
# Box Head
# ---------------------------------------------------------------------------- #
_C.MODEL.ROI_BOX_HEAD = CN()
# C4 don't use head name option
# Options for non-C4 models: FastRCNNConvFCHead,
_C.MODEL.ROI_BOX_HEAD.NAME = ""
# Options are: "smooth_l1", "giou", "diou", "ciou"
_C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE = "smooth_l1"
# The final scaling coefficient on the box regression loss, used to balance the magnitude of its
# gradients with other losses in the model. See also `MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT`.
_C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT = 1.0
# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets
# These are empirically chosen to approximately lead to unit variance targets
_C.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0)
# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1.
_C.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA = 0.0
_C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14
_C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0
# Type of pooling operation applied to the incoming feature map for each RoI
_C.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2"
_C.MODEL.ROI_BOX_HEAD.NUM_FC = 0
# Hidden layer dimension for FC layers in the RoI box head
_C.MODEL.ROI_BOX_HEAD.FC_DIM = 1024
_C.MODEL.ROI_BOX_HEAD.NUM_CONV = 0
# Channel dimension for Conv layers in the RoI box head
_C.MODEL.ROI_BOX_HEAD.CONV_DIM = 256
# Normalization method for the convolution layers.
# Options: "" (no norm), "GN", "SyncBN".
_C.MODEL.ROI_BOX_HEAD.NORM = ""
# Whether to use class agnostic for bbox regression
_C.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG = False
# If true, RoI heads use bounding boxes predicted by the box head rather than proposal boxes.
_C.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES = False
# Federated loss can be used to improve the training of LVIS
_C.MODEL.ROI_BOX_HEAD.USE_FED_LOSS = False
# Sigmoid cross entrophy is used with federated loss
_C.MODEL.ROI_BOX_HEAD.USE_SIGMOID_CE = False
# The power value applied to image_count when calcualting frequency weight
_C.MODEL.ROI_BOX_HEAD.FED_LOSS_FREQ_WEIGHT_POWER = 0.5
# Number of classes to keep in total
_C.MODEL.ROI_BOX_HEAD.FED_LOSS_NUM_CLASSES = 50
# ---------------------------------------------------------------------------- #
# Cascaded Box Head
# ---------------------------------------------------------------------------- #
_C.MODEL.ROI_BOX_CASCADE_HEAD = CN()
# The number of cascade stages is implicitly defined by the length of the following two configs.
_C.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS = (
(10.0, 10.0, 5.0, 5.0),
(20.0, 20.0, 10.0, 10.0),
(30.0, 30.0, 15.0, 15.0),
)
_C.MODEL.ROI_BOX_CASCADE_HEAD.IOUS = (0.5, 0.6, 0.7)
# ---------------------------------------------------------------------------- #
# Mask Head
# ---------------------------------------------------------------------------- #
_C.MODEL.ROI_MASK_HEAD = CN()
_C.MODEL.ROI_MASK_HEAD.NAME = "MaskRCNNConvUpsampleHead"
_C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14
_C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0
_C.MODEL.ROI_MASK_HEAD.NUM_CONV = 0 # The number of convs in the mask head
_C.MODEL.ROI_MASK_HEAD.CONV_DIM = 256
# Normalization method for the convolution layers.
# Options: "" (no norm), "GN", "SyncBN".
_C.MODEL.ROI_MASK_HEAD.NORM = ""
# Whether to use class agnostic for mask prediction
_C.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK = False
# Type of pooling operation applied to the incoming feature map for each RoI
_C.MODEL.ROI_MASK_HEAD.POOLER_TYPE = "ROIAlignV2"
# ---------------------------------------------------------------------------- #
# Keypoint Head
# ---------------------------------------------------------------------------- #
_C.MODEL.ROI_KEYPOINT_HEAD = CN()
_C.MODEL.ROI_KEYPOINT_HEAD.NAME = "KRCNNConvDeconvUpsampleHead"
_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION = 14
_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO = 0
_C.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS = tuple(512 for _ in range(8))
_C.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 17 # 17 is the number of keypoints in COCO.
# Images with too few (or no) keypoints are excluded from training.
_C.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE = 1
# Normalize by the total number of visible keypoints in the minibatch if True.
# Otherwise, normalize by the total number of keypoints that could ever exist
# in the minibatch.
# The keypoint softmax loss is only calculated on visible keypoints.
# Since the number of visible keypoints can vary significantly between
# minibatches, this has the effect of up-weighting the importance of
# minibatches with few visible keypoints. (Imagine the extreme case of
# only one visible keypoint versus N: in the case of N, each one
# contributes 1/N to the gradient compared to the single keypoint
# determining the gradient direction). Instead, we can normalize the
# loss by the total number of keypoints, if it were the case that all
# keypoints were visible in a full minibatch. (Returning to the example,
# this means that the one visible keypoint contributes as much as each
# of the N keypoints.)
_C.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS = True
# Multi-task loss weight to use for keypoints
# Recommended values:
# - use 1.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is True
# - use 4.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is False
_C.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT = 1.0
# Type of pooling operation applied to the incoming feature map for each RoI
_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE = "ROIAlignV2"
# ---------------------------------------------------------------------------- #
# Semantic Segmentation Head
# ---------------------------------------------------------------------------- #
_C.MODEL.SEM_SEG_HEAD = CN()
_C.MODEL.SEM_SEG_HEAD.NAME = "SemSegFPNHead"
_C.MODEL.SEM_SEG_HEAD.IN_FEATURES = ["p2", "p3", "p4", "p5"]
# Label in the semantic segmentation ground truth that is ignored, i.e., no loss is calculated for
# the correposnding pixel.
_C.MODEL.SEM_SEG_HEAD.IGNORE_VALUE = 255
# Number of classes in the semantic segmentation head
_C.MODEL.SEM_SEG_HEAD.NUM_CLASSES = 54
# Number of channels in the 3x3 convs inside semantic-FPN heads.
_C.MODEL.SEM_SEG_HEAD.CONVS_DIM = 128
# Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride.
_C.MODEL.SEM_SEG_HEAD.COMMON_STRIDE = 4
# Normalization method for the convolution layers. Options: "" (no norm), "GN".
_C.MODEL.SEM_SEG_HEAD.NORM = "GN"
_C.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT = 1.0
_C.MODEL.PANOPTIC_FPN = CN()
# Scaling of all losses from instance detection / segmentation head.
_C.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT = 1.0
# options when combining instance & semantic segmentation outputs
_C.MODEL.PANOPTIC_FPN.COMBINE = CN({"ENABLED": True}) # "COMBINE.ENABLED" is deprecated & not used
_C.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH = 0.5
_C.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT = 4096
_C.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = 0.5
# ---------------------------------------------------------------------------- #
# RetinaNet Head
# ---------------------------------------------------------------------------- #
_C.MODEL.RETINANET = CN()
# This is the number of foreground classes.
_C.MODEL.RETINANET.NUM_CLASSES = 80
_C.MODEL.RETINANET.IN_FEATURES = ["p3", "p4", "p5", "p6", "p7"]
# Convolutions to use in the cls and bbox tower
# NOTE: this doesn't include the last conv for logits
_C.MODEL.RETINANET.NUM_CONVS = 4
# IoU overlap ratio [bg, fg] for labeling anchors.
# Anchors with < bg are labeled negative (0)
# Anchors with >= bg and < fg are ignored (-1)
# Anchors with >= fg are labeled positive (1)
_C.MODEL.RETINANET.IOU_THRESHOLDS = [0.4, 0.5]
_C.MODEL.RETINANET.IOU_LABELS = [0, -1, 1]
# Prior prob for rare case (i.e. foreground) at the beginning of training.
# This is used to set the bias for the logits layer of the classifier subnet.
# This improves training stability in the case of heavy class imbalance.
_C.MODEL.RETINANET.PRIOR_PROB = 0.01
# Inference cls score threshold, only anchors with score > INFERENCE_TH are
# considered for inference (to improve speed)
_C.MODEL.RETINANET.SCORE_THRESH_TEST = 0.05
# Select topk candidates before NMS
_C.MODEL.RETINANET.TOPK_CANDIDATES_TEST = 1000
_C.MODEL.RETINANET.NMS_THRESH_TEST = 0.5
# Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets
_C.MODEL.RETINANET.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
# Loss parameters
_C.MODEL.RETINANET.FOCAL_LOSS_GAMMA = 2.0
_C.MODEL.RETINANET.FOCAL_LOSS_ALPHA = 0.25
_C.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA = 0.1
# Options are: "smooth_l1", "giou", "diou", "ciou"
_C.MODEL.RETINANET.BBOX_REG_LOSS_TYPE = "smooth_l1"
# One of BN, SyncBN, FrozenBN, GN
# Only supports GN until unshared norm is implemented
_C.MODEL.RETINANET.NORM = ""
# ---------------------------------------------------------------------------- #
# ResNe[X]t options (ResNets = {ResNet, ResNeXt}
# Note that parts of a resnet may be used for both the backbone and the head
# These options apply to both
# ---------------------------------------------------------------------------- #
_C.MODEL.RESNETS = CN()
_C.MODEL.RESNETS.DEPTH = 50
_C.MODEL.RESNETS.OUT_FEATURES = ["res4"] # res4 for C4 backbone, res2..5 for FPN backbone
# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
_C.MODEL.RESNETS.NUM_GROUPS = 1
# Options: FrozenBN, GN, "SyncBN", "BN"
_C.MODEL.RESNETS.NORM = "FrozenBN"
# Baseline width of each group.
# Scaling this parameters will scale the width of all bottleneck layers.
_C.MODEL.RESNETS.WIDTH_PER_GROUP = 64
# Place the stride 2 conv on the 1x1 filter
# Use True only for the original MSRA ResNet; use False for C2 and Torch models
_C.MODEL.RESNETS.STRIDE_IN_1X1 = True
# Apply dilation in stage "res5"
_C.MODEL.RESNETS.RES5_DILATION = 1
# Output width of res2. Scaling this parameters will scale the width of all 1x1 convs in ResNet
# For R18 and R34, this needs to be set to 64
_C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256
_C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64
# Apply Deformable Convolution in stages
# Specify if apply deform_conv on Res2, Res3, Res4, Res5
_C.MODEL.RESNETS.DEFORM_ON_PER_STAGE = [False, False, False, False]
# Use True to use modulated deform_conv (DeformableV2, https://arxiv.org/abs/1811.11168);
# Use False for DeformableV1.
_C.MODEL.RESNETS.DEFORM_MODULATED = False
# Number of groups in deformable conv.
_C.MODEL.RESNETS.DEFORM_NUM_GROUPS = 1
# ---------------------------------------------------------------------------- #
# Solver
# ---------------------------------------------------------------------------- #
_C.SOLVER = CN()
# Options: WarmupMultiStepLR, WarmupCosineLR.
# See detectron2/solver/build.py for definition.
_C.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR"
_C.SOLVER.MAX_ITER = 40000
_C.SOLVER.BASE_LR = 0.001
# The end lr, only used by WarmupCosineLR
_C.SOLVER.BASE_LR_END = 0.0
_C.SOLVER.MOMENTUM = 0.9
_C.SOLVER.NESTEROV = False
_C.SOLVER.WEIGHT_DECAY = 0.0001
# The weight decay that's applied to parameters of normalization layers
# (typically the affine transformation)
_C.SOLVER.WEIGHT_DECAY_NORM = 0.0
_C.SOLVER.GAMMA = 0.1
# The iteration number to decrease learning rate by GAMMA.
_C.SOLVER.STEPS = (30000,)
# Number of decays in WarmupStepWithFixedGammaLR schedule
_C.SOLVER.NUM_DECAYS = 3
_C.SOLVER.WARMUP_FACTOR = 1.0 / 1000
_C.SOLVER.WARMUP_ITERS = 1000
_C.SOLVER.WARMUP_METHOD = "linear"
# Whether to rescale the interval for the learning schedule after warmup
_C.SOLVER.RESCALE_INTERVAL = False
# Save a checkpoint after every this number of iterations
_C.SOLVER.CHECKPOINT_PERIOD = 5000
# Number of images per batch across all machines. This is also the number
# of training images per step (i.e. per iteration). If we use 16 GPUs
# and IMS_PER_BATCH = 32, each GPU will see 2 images per batch.
# May be adjusted automatically if REFERENCE_WORLD_SIZE is set.
_C.SOLVER.IMS_PER_BATCH = 16
# The reference number of workers (GPUs) this config is meant to train with.
# It takes no effect when set to 0.
# With a non-zero value, it will be used by DefaultTrainer to compute a desired
# per-worker batch size, and then scale the other related configs (total batch size,
# learning rate, etc) to match the per-worker batch size.
# See documentation of `DefaultTrainer.auto_scale_workers` for details:
_C.SOLVER.REFERENCE_WORLD_SIZE = 0
# Detectron v1 (and previous detection code) used a 2x higher LR and 0 WD for
# biases. This is not useful (at least for recent models). You should avoid
# changing these and they exist only to reproduce Detectron v1 training if
# desired.
_C.SOLVER.BIAS_LR_FACTOR = 1.0
_C.SOLVER.WEIGHT_DECAY_BIAS = None # None means following WEIGHT_DECAY
# Gradient clipping
_C.SOLVER.CLIP_GRADIENTS = CN({"ENABLED": False})
# Type of gradient clipping, currently 2 values are supported:
# - "value": the absolute values of elements of each gradients are clipped
# - "norm": the norm of the gradient for each parameter is clipped thus
# affecting all elements in the parameter
_C.SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "value"
# Maximum absolute value used for clipping gradients
_C.SOLVER.CLIP_GRADIENTS.CLIP_VALUE = 1.0
# Floating point number p for L-p norm to be used with the "norm"
# gradient clipping type; for L-inf, please specify .inf
_C.SOLVER.CLIP_GRADIENTS.NORM_TYPE = 2.0
# Enable automatic mixed precision for training
# Note that this does not change model's inference behavior.
# To use AMP in inference, run inference under autocast()
_C.SOLVER.AMP = CN({"ENABLED": False})
# ---------------------------------------------------------------------------- #
# Specific test options
# ---------------------------------------------------------------------------- #
_C.TEST = CN()
# For end-to-end tests to verify the expected accuracy.
# Each item is [task, metric, value, tolerance]
# e.g.: [['bbox', 'AP', 38.5, 0.2]]
_C.TEST.EXPECTED_RESULTS = []
# The period (in terms of steps) to evaluate the model during training.
# Set to 0 to disable.
_C.TEST.EVAL_PERIOD = 0
# The sigmas used to calculate keypoint OKS. See http://cocodataset.org/#keypoints-eval
# When empty, it will use the defaults in COCO.
# Otherwise it should be a list[float] with the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS.
_C.TEST.KEYPOINT_OKS_SIGMAS = []
# Maximum number of detections to return per image during inference (100 is
# based on the limit established for the COCO dataset).
_C.TEST.DETECTIONS_PER_IMAGE = 100
_C.TEST.AUG = CN({"ENABLED": False})
_C.TEST.AUG.MIN_SIZES = (400, 500, 600, 700, 800, 900, 1000, 1100, 1200)
_C.TEST.AUG.MAX_SIZE = 4000
_C.TEST.AUG.FLIP = True
_C.TEST.PRECISE_BN = CN({"ENABLED": False})
_C.TEST.PRECISE_BN.NUM_ITER = 200
# ---------------------------------------------------------------------------- #
# Misc options
# ---------------------------------------------------------------------------- #
# Directory where output files are written
_C.OUTPUT_DIR = "./output"
# Set seed to negative to fully randomize everything.
# Set seed to positive to use a fixed seed. Note that a fixed seed increases
# reproducibility but does not guarantee fully deterministic behavior.
# Disabling all parallelism further increases reproducibility.
_C.SEED = -1
# Benchmark different cudnn algorithms.
# If input images have very different sizes, this option will have large overhead
# for about 10k iterations. It usually hurts total time, but can benefit for certain models.
# If input images have the same or similar sizes, benchmark is often helpful.
_C.CUDNN_BENCHMARK = False
# The period (in terms of steps) for minibatch visualization at train time.
# Set to 0 to disable.
_C.VIS_PERIOD = 0
# global config is for quick hack purposes.
# You can set them in command line or config files,
# and access it with:
#
# from detectron2.config import global_cfg
# print(global_cfg.HACK)
#
# Do not commit any configs into it.
_C.GLOBAL = CN()
_C.GLOBAL.HACK = 1.0
# Copyright (c) Facebook, Inc. and its affiliates.
import collections.abc as abc
import dataclasses
import logging
from typing import Any
from detectron2.utils.registry import _convert_target_to_string, locate
__all__ = ["dump_dataclass", "instantiate"]
def dump_dataclass(obj: Any):
"""
Dump a dataclass recursively into a dict that can be later instantiated.
Args:
obj: a dataclass object
Returns:
dict
"""
assert dataclasses.is_dataclass(obj) and not isinstance(
obj, type
), "dump_dataclass() requires an instance of a dataclass."
ret = {"_target_": _convert_target_to_string(type(obj))}
for f in dataclasses.fields(obj):
v = getattr(obj, f.name)
if dataclasses.is_dataclass(v):
v = dump_dataclass(v)
if isinstance(v, (list, tuple)):
v = [dump_dataclass(x) if dataclasses.is_dataclass(x) else x for x in v]
ret[f.name] = v
return ret
def instantiate(cfg):
"""
Recursively instantiate objects defined in dictionaries by
"_target_" and arguments.
Args:
cfg: a dict-like object with "_target_" that defines the caller, and
other keys that define the arguments
Returns:
object instantiated by cfg
"""
from omegaconf import ListConfig, DictConfig, OmegaConf
if isinstance(cfg, ListConfig):
lst = [instantiate(x) for x in cfg]
return ListConfig(lst, flags={"allow_objects": True})
if isinstance(cfg, list):
# Specialize for list, because many classes take
# list[objects] as arguments, such as ResNet, DatasetMapper
return [instantiate(x) for x in cfg]
# If input is a DictConfig backed by dataclasses (i.e. omegaconf's structured config),
# instantiate it to the actual dataclass.
if isinstance(cfg, DictConfig) and dataclasses.is_dataclass(cfg._metadata.object_type):
return OmegaConf.to_object(cfg)
if isinstance(cfg, abc.Mapping) and "_target_" in cfg:
# conceptually equivalent to hydra.utils.instantiate(cfg) with _convert_=all,
# but faster: https://github.com/facebookresearch/hydra/issues/1200
cfg = {k: instantiate(v) for k, v in cfg.items()}
cls = cfg.pop("_target_")
cls = instantiate(cls)
if isinstance(cls, str):
cls_name = cls
cls = locate(cls_name)
assert cls is not None, cls_name
else:
try:
cls_name = cls.__module__ + "." + cls.__qualname__
except Exception:
# target could be anything, so the above could fail
cls_name = str(cls)
assert callable(cls), f"_target_ {cls} does not define a callable object"
try:
return cls(**cfg)
except TypeError:
logger = logging.getLogger(__name__)
logger.error(f"Error when instantiating {cls_name}!")
raise
return cfg # return as-is if don't know what to do
# Copyright (c) Facebook, Inc. and its affiliates.
import ast
import builtins
import collections.abc as abc
import importlib
import inspect
import logging
import os
import uuid
from contextlib import contextmanager
from copy import deepcopy
from dataclasses import is_dataclass
from typing import List, Tuple, Union
import cloudpickle
import yaml
from omegaconf import DictConfig, ListConfig, OmegaConf, SCMode
from detectron2.utils.file_io import PathManager
from detectron2.utils.registry import _convert_target_to_string
__all__ = ["LazyCall", "LazyConfig"]
class LazyCall:
"""
Wrap a callable so that when it's called, the call will not be executed,
but returns a dict that describes the call.
LazyCall object has to be called with only keyword arguments. Positional
arguments are not yet supported.
Examples:
::
from detectron2.config import instantiate, LazyCall
layer_cfg = LazyCall(nn.Conv2d)(in_channels=32, out_channels=32)
layer_cfg.out_channels = 64 # can edit it afterwards
layer = instantiate(layer_cfg)
"""
def __init__(self, target):
if not (callable(target) or isinstance(target, (str, abc.Mapping))):
raise TypeError(
f"target of LazyCall must be a callable or defines a callable! Got {target}"
)
self._target = target
def __call__(self, **kwargs):
if is_dataclass(self._target):
# omegaconf object cannot hold dataclass type
# https://github.com/omry/omegaconf/issues/784
target = _convert_target_to_string(self._target)
else:
target = self._target
kwargs["_target_"] = target
return DictConfig(content=kwargs, flags={"allow_objects": True})
def _visit_dict_config(cfg, func):
"""
Apply func recursively to all DictConfig in cfg.
"""
if isinstance(cfg, DictConfig):
func(cfg)
for v in cfg.values():
_visit_dict_config(v, func)
elif isinstance(cfg, ListConfig):
for v in cfg:
_visit_dict_config(v, func)
def _validate_py_syntax(filename):
# see also https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/config.py
with PathManager.open(filename, "r") as f:
content = f.read()
try:
ast.parse(content)
except SyntaxError as e:
raise SyntaxError(f"Config file {filename} has syntax error!") from e
def _cast_to_config(obj):
# if given a dict, return DictConfig instead
if isinstance(obj, dict):
return DictConfig(obj, flags={"allow_objects": True})
return obj
_CFG_PACKAGE_NAME = "detectron2._cfg_loader"
"""
A namespace to put all imported config into.
"""
def _random_package_name(filename):
# generate a random package name when loading config files
return _CFG_PACKAGE_NAME + str(uuid.uuid4())[:4] + "." + os.path.basename(filename)
@contextmanager
def _patch_import():
"""
Enhance relative import statements in config files, so that they:
1. locate files purely based on relative location, regardless of packages.
e.g. you can import file without having __init__
2. do not cache modules globally; modifications of module states has no side effect
3. support other storage system through PathManager, so config files can be in the cloud
4. imported dict are turned into omegaconf.DictConfig automatically
"""
old_import = builtins.__import__
def find_relative_file(original_file, relative_import_path, level):
# NOTE: "from . import x" is not handled. Because then it's unclear
# if such import should produce `x` as a python module or DictConfig.
# This can be discussed further if needed.
relative_import_err = """
Relative import of directories is not allowed within config files.
Within a config file, relative import can only import other config files.
""".replace(
"\n", " "
)
if not len(relative_import_path):
raise ImportError(relative_import_err)
cur_file = os.path.dirname(original_file)
for _ in range(level - 1):
cur_file = os.path.dirname(cur_file)
cur_name = relative_import_path.lstrip(".")
for part in cur_name.split("."):
cur_file = os.path.join(cur_file, part)
if not cur_file.endswith(".py"):
cur_file += ".py"
if not PathManager.isfile(cur_file):
cur_file_no_suffix = cur_file[: -len(".py")]
if PathManager.isdir(cur_file_no_suffix):
raise ImportError(f"Cannot import from {cur_file_no_suffix}." + relative_import_err)
else:
raise ImportError(
f"Cannot import name {relative_import_path} from "
f"{original_file}: {cur_file} does not exist."
)
return cur_file
def new_import(name, globals=None, locals=None, fromlist=(), level=0):
if (
# Only deal with relative imports inside config files
level != 0
and globals is not None
and (globals.get("__package__", "") or "").startswith(_CFG_PACKAGE_NAME)
):
cur_file = find_relative_file(globals["__file__"], name, level)
_validate_py_syntax(cur_file)
spec = importlib.machinery.ModuleSpec(
_random_package_name(cur_file), None, origin=cur_file
)
module = importlib.util.module_from_spec(spec)
module.__file__ = cur_file
with PathManager.open(cur_file) as f:
content = f.read()
exec(compile(content, cur_file, "exec"), module.__dict__)
for name in fromlist: # turn imported dict into DictConfig automatically
val = _cast_to_config(module.__dict__[name])
module.__dict__[name] = val
return module
return old_import(name, globals, locals, fromlist=fromlist, level=level)
builtins.__import__ = new_import
yield new_import
builtins.__import__ = old_import
class LazyConfig:
"""
Provide methods to save, load, and overrides an omegaconf config object
which may contain definition of lazily-constructed objects.
"""
@staticmethod
def load_rel(filename: str, keys: Union[None, str, Tuple[str, ...]] = None):
"""
Similar to :meth:`load()`, but load path relative to the caller's
source file.
This has the same functionality as a relative import, except that this method
accepts filename as a string, so more characters are allowed in the filename.
"""
caller_frame = inspect.stack()[1]
caller_fname = caller_frame[0].f_code.co_filename
assert caller_fname != "<string>", "load_rel Unable to find caller"
caller_dir = os.path.dirname(caller_fname)
filename = os.path.join(caller_dir, filename)
return LazyConfig.load(filename, keys)
@staticmethod
def load(filename: str, keys: Union[None, str, Tuple[str, ...]] = None):
"""
Load a config file.
Args:
filename: absolute path or relative path w.r.t. the current working directory
keys: keys to load and return. If not given, return all keys
(whose values are config objects) in a dict.
"""
has_keys = keys is not None
filename = filename.replace("/./", "/") # redundant
if os.path.splitext(filename)[1] not in [".py", ".yaml", ".yml"]:
raise ValueError(f"Config file {filename} has to be a python or yaml file.")
if filename.endswith(".py"):
_validate_py_syntax(filename)
with _patch_import():
# Record the filename
module_namespace = {
"__file__": filename,
"__package__": _random_package_name(filename),
}
with PathManager.open(filename) as f:
content = f.read()
# Compile first with filename to:
# 1. make filename appears in stacktrace
# 2. make load_rel able to find its parent's (possibly remote) location
exec(compile(content, filename, "exec"), module_namespace)
ret = module_namespace
else:
with PathManager.open(filename) as f:
obj = yaml.unsafe_load(f)
ret = OmegaConf.create(obj, flags={"allow_objects": True})
if has_keys:
if isinstance(keys, str):
return _cast_to_config(ret[keys])
else:
return tuple(_cast_to_config(ret[a]) for a in keys)
else:
if filename.endswith(".py"):
# when not specified, only load those that are config objects
ret = DictConfig(
{
name: _cast_to_config(value)
for name, value in ret.items()
if isinstance(value, (DictConfig, ListConfig, dict))
and not name.startswith("_")
},
flags={"allow_objects": True},
)
return ret
@staticmethod
def save(cfg, filename: str):
"""
Save a config object to a yaml file.
Note that when the config dictionary contains complex objects (e.g. lambda),
it can't be saved to yaml. In that case we will print an error and
attempt to save to a pkl file instead.
Args:
cfg: an omegaconf config object
filename: yaml file name to save the config file
"""
logger = logging.getLogger(__name__)
try:
cfg = deepcopy(cfg)
except Exception:
pass
else:
# if it's deep-copyable, then...
def _replace_type_by_name(x):
if "_target_" in x and callable(x._target_):
try:
x._target_ = _convert_target_to_string(x._target_)
except AttributeError:
pass
# not necessary, but makes yaml looks nicer
_visit_dict_config(cfg, _replace_type_by_name)
save_pkl = False
try:
dict = OmegaConf.to_container(
cfg,
# Do not resolve interpolation when saving, i.e. do not turn ${a} into
# actual values when saving.
resolve=False,
# Save structures (dataclasses) in a format that can be instantiated later.
# Without this option, the type information of the dataclass will be erased.
structured_config_mode=SCMode.INSTANTIATE,
)
dumped = yaml.dump(dict, default_flow_style=None, allow_unicode=True, width=9999)
with PathManager.open(filename, "w") as f:
f.write(dumped)
try:
_ = yaml.unsafe_load(dumped) # test that it is loadable
except Exception:
logger.warning(
"The config contains objects that cannot serialize to a valid yaml. "
f"{filename} is human-readable but cannot be loaded."
)
save_pkl = True
except Exception:
logger.exception("Unable to serialize the config to yaml. Error:")
save_pkl = True
if save_pkl:
new_filename = filename + ".pkl"
try:
# retry by pickle
with PathManager.open(new_filename, "wb") as f:
cloudpickle.dump(cfg, f)
logger.warning(f"Config is saved using cloudpickle at {new_filename}.")
except Exception:
pass
@staticmethod
def apply_overrides(cfg, overrides: List[str]):
"""
In-place override contents of cfg.
Args:
cfg: an omegaconf config object
overrides: list of strings in the format of "a=b" to override configs.
See https://hydra.cc/docs/next/advanced/override_grammar/basic/
for syntax.
Returns:
the cfg object
"""
def safe_update(cfg, key, value):
parts = key.split(".")
for idx in range(1, len(parts)):
prefix = ".".join(parts[:idx])
v = OmegaConf.select(cfg, prefix, default=None)
if v is None:
break
if not OmegaConf.is_config(v):
raise KeyError(
f"Trying to update key {key}, but {prefix} "
f"is not a config, but has type {type(v)}."
)
OmegaConf.update(cfg, key, value, merge=True)
try:
from hydra.core.override_parser.overrides_parser import OverridesParser
has_hydra = True
except ImportError:
has_hydra = False
if has_hydra:
parser = OverridesParser.create()
overrides = parser.parse_overrides(overrides)
for o in overrides:
key = o.key_or_group
value = o.value()
if o.is_delete():
# TODO support this
raise NotImplementedError("deletion is not yet a supported override")
safe_update(cfg, key, value)
else:
# Fallback. Does not support all the features and error checking like hydra.
for o in overrides:
key, value = o.split("=")
try:
value = ast.literal_eval(value)
except NameError:
pass
safe_update(cfg, key, value)
return cfg
@staticmethod
def to_py(cfg, prefix: str = "cfg."):
"""
Try to convert a config object into Python-like psuedo code.
Note that perfect conversion is not always possible. So the returned
results are mainly meant to be human-readable, and not meant to be executed.
Args:
cfg: an omegaconf config object
prefix: root name for the resulting code (default: "cfg.")
Returns:
str of formatted Python code
"""
import black
cfg = OmegaConf.to_container(cfg, resolve=True)
def _to_str(obj, prefix=None, inside_call=False):
if prefix is None:
prefix = []
if isinstance(obj, abc.Mapping) and "_target_" in obj:
# Dict representing a function call
target = _convert_target_to_string(obj.pop("_target_"))
args = []
for k, v in sorted(obj.items()):
args.append(f"{k}={_to_str(v, inside_call=True)}")
args = ", ".join(args)
call = f"{target}({args})"
return "".join(prefix) + call
elif isinstance(obj, abc.Mapping) and not inside_call:
# Dict that is not inside a call is a list of top-level config objects that we
# render as one object per line with dot separated prefixes
key_list = []
for k, v in sorted(obj.items()):
if isinstance(v, abc.Mapping) and "_target_" not in v:
key_list.append(_to_str(v, prefix=prefix + [k + "."]))
else:
key = "".join(prefix) + k
key_list.append(f"{key}={_to_str(v)}")
return "\n".join(key_list)
elif isinstance(obj, abc.Mapping):
# Dict that is inside a call is rendered as a regular dict
return (
"{"
+ ",".join(
f"{repr(k)}: {_to_str(v, inside_call=inside_call)}"
for k, v in sorted(obj.items())
)
+ "}"
)
elif isinstance(obj, list):
return "[" + ",".join(_to_str(x, inside_call=inside_call) for x in obj) + "]"
else:
return repr(obj)
py_str = _to_str(cfg, prefix=[prefix])
try:
return black.format_str(py_str, mode=black.Mode())
except black.InvalidInput:
return py_str
# Copyright (c) Facebook, Inc. and its affiliates.
from . import transforms # isort:skip
from .build import (
build_batch_data_loader,
build_detection_test_loader,
build_detection_train_loader,
get_detection_dataset_dicts,
load_proposals_into_dataset,
print_instances_class_histogram,
)
from .catalog import DatasetCatalog, MetadataCatalog, Metadata
from .common import DatasetFromList, MapDataset, ToIterableDataset
from .dataset_mapper import DatasetMapper
# ensure the builtin datasets are registered
from . import datasets, samplers # isort:skip
__all__ = [k for k in globals().keys() if not k.startswith("_")]
# Copyright (c) Facebook, Inc. and its affiliates.
import logging
import numpy as np
from itertools import count
from typing import List, Tuple
import torch
import tqdm
from fvcore.common.timer import Timer
from detectron2.utils import comm
from .build import build_batch_data_loader
from .common import DatasetFromList, MapDataset
from .samplers import TrainingSampler
logger = logging.getLogger(__name__)
class _EmptyMapDataset(torch.utils.data.Dataset):
"""
Map anything to emptiness.
"""
def __init__(self, dataset):
self.ds = dataset
def __len__(self):
return len(self.ds)
def __getitem__(self, idx):
_ = self.ds[idx]
return [0]
def iter_benchmark(
iterator, num_iter: int, warmup: int = 5, max_time_seconds: float = 60
) -> Tuple[float, List[float]]:
"""
Benchmark an iterator/iterable for `num_iter` iterations with an extra
`warmup` iterations of warmup.
End early if `max_time_seconds` time is spent on iterations.
Returns:
float: average time (seconds) per iteration
list[float]: time spent on each iteration. Sometimes useful for further analysis.
"""
num_iter, warmup = int(num_iter), int(warmup)
iterator = iter(iterator)
for _ in range(warmup):
next(iterator)
timer = Timer()
all_times = []
for curr_iter in tqdm.trange(num_iter):
start = timer.seconds()
if start > max_time_seconds:
num_iter = curr_iter
break
next(iterator)
all_times.append(timer.seconds() - start)
avg = timer.seconds() / num_iter
return avg, all_times
class DataLoaderBenchmark:
"""
Some common benchmarks that help understand perf bottleneck of a standard dataloader
made of dataset, mapper and sampler.
"""
def __init__(
self,
dataset,
*,
mapper,
sampler=None,
total_batch_size,
num_workers=0,
max_time_seconds: int = 90,
):
"""
Args:
max_time_seconds (int): maximum time to spent for each benchmark
other args: same as in `build.py:build_detection_train_loader`
"""
if isinstance(dataset, list):
dataset = DatasetFromList(dataset, copy=False, serialize=True)
if sampler is None:
sampler = TrainingSampler(len(dataset))
self.dataset = dataset
self.mapper = mapper
self.sampler = sampler
self.total_batch_size = total_batch_size
self.num_workers = num_workers
self.per_gpu_batch_size = self.total_batch_size // comm.get_world_size()
self.max_time_seconds = max_time_seconds
def _benchmark(self, iterator, num_iter, warmup, msg=None):
avg, all_times = iter_benchmark(iterator, num_iter, warmup, self.max_time_seconds)
if msg is not None:
self._log_time(msg, avg, all_times)
return avg, all_times
def _log_time(self, msg, avg, all_times, distributed=False):
percentiles = [np.percentile(all_times, k, interpolation="nearest") for k in [1, 5, 95, 99]]
if not distributed:
logger.info(
f"{msg}: avg={1.0/avg:.1f} it/s, "
f"p1={percentiles[0]:.2g}s, p5={percentiles[1]:.2g}s, "
f"p95={percentiles[2]:.2g}s, p99={percentiles[3]:.2g}s."
)
return
avg_per_gpu = comm.all_gather(avg)
percentiles_per_gpu = comm.all_gather(percentiles)
if comm.get_rank() > 0:
return
for idx, avg, percentiles in zip(count(), avg_per_gpu, percentiles_per_gpu):
logger.info(
f"GPU{idx} {msg}: avg={1.0/avg:.1f} it/s, "
f"p1={percentiles[0]:.2g}s, p5={percentiles[1]:.2g}s, "
f"p95={percentiles[2]:.2g}s, p99={percentiles[3]:.2g}s."
)
def benchmark_dataset(self, num_iter, warmup=5):
"""
Benchmark the speed of taking raw samples from the dataset.
"""
def loader():
while True:
for k in self.sampler:
yield self.dataset[k]
self._benchmark(loader(), num_iter, warmup, "Dataset Alone")
def benchmark_mapper(self, num_iter, warmup=5):
"""
Benchmark the speed of taking raw samples from the dataset and map
them in a single process.
"""
def loader():
while True:
for k in self.sampler:
yield self.mapper(self.dataset[k])
self._benchmark(loader(), num_iter, warmup, "Single Process Mapper (sec/sample)")
def benchmark_workers(self, num_iter, warmup=10):
"""
Benchmark the dataloader by tuning num_workers to [0, 1, self.num_workers].
"""
candidates = [0, 1]
if self.num_workers not in candidates:
candidates.append(self.num_workers)
dataset = MapDataset(self.dataset, self.mapper)
for n in candidates:
loader = build_batch_data_loader(
dataset,
self.sampler,
self.total_batch_size,
num_workers=n,
)
self._benchmark(
iter(loader),
num_iter * max(n, 1),
warmup * max(n, 1),
f"DataLoader ({n} workers, bs={self.per_gpu_batch_size})",
)
del loader
def benchmark_IPC(self, num_iter, warmup=10):
"""
Benchmark the dataloader where each worker outputs nothing. This
eliminates the IPC overhead compared to the regular dataloader.
PyTorch multiprocessing's IPC only optimizes for torch tensors.
Large numpy arrays or other data structure may incur large IPC overhead.
"""
n = self.num_workers
dataset = _EmptyMapDataset(MapDataset(self.dataset, self.mapper))
loader = build_batch_data_loader(
dataset, self.sampler, self.total_batch_size, num_workers=n
)
self._benchmark(
iter(loader),
num_iter * max(n, 1),
warmup * max(n, 1),
f"DataLoader ({n} workers, bs={self.per_gpu_batch_size}) w/o comm",
)
def benchmark_distributed(self, num_iter, warmup=10):
"""
Benchmark the dataloader in each distributed worker, and log results of
all workers. This helps understand the final performance as well as
the variances among workers.
It also prints startup time (first iter) of the dataloader.
"""
gpu = comm.get_world_size()
dataset = MapDataset(self.dataset, self.mapper)
n = self.num_workers
loader = build_batch_data_loader(
dataset, self.sampler, self.total_batch_size, num_workers=n
)
timer = Timer()
loader = iter(loader)
next(loader)
startup_time = timer.seconds()
logger.info("Dataloader startup time: {:.2f} seconds".format(startup_time))
comm.synchronize()
avg, all_times = self._benchmark(loader, num_iter * max(n, 1), warmup * max(n, 1))
del loader
self._log_time(
f"DataLoader ({gpu} GPUs x {n} workers, total bs={self.total_batch_size})",
avg,
all_times,
True,
)
# Copyright (c) Facebook, Inc. and its affiliates.
import itertools
import logging
import numpy as np
import operator
import pickle
from collections import OrderedDict, defaultdict
from typing import Any, Callable, Dict, List, Optional, Union
import torch
import torch.utils.data as torchdata
from tabulate import tabulate
from termcolor import colored
from detectron2.config import configurable
from detectron2.structures import BoxMode
from detectron2.utils.comm import get_world_size
from detectron2.utils.env import seed_all_rng
from detectron2.utils.file_io import PathManager
from detectron2.utils.logger import _log_api_usage, log_first_n
from .catalog import DatasetCatalog, MetadataCatalog
from .common import AspectRatioGroupedDataset, DatasetFromList, MapDataset, ToIterableDataset
from .dataset_mapper import DatasetMapper
from .detection_utils import check_metadata_consistency
from .samplers import (
InferenceSampler,
RandomSubsetTrainingSampler,
RepeatFactorTrainingSampler,
TrainingSampler,
)
"""
This file contains the default logic to build a dataloader for training or testing.
"""
__all__ = [
"build_batch_data_loader",
"build_detection_train_loader",
"build_detection_test_loader",
"get_detection_dataset_dicts",
"load_proposals_into_dataset",
"print_instances_class_histogram",
]
def filter_images_with_only_crowd_annotations(dataset_dicts):
"""
Filter out images with none annotations or only crowd annotations
(i.e., images without non-crowd annotations).
A common training-time preprocessing on COCO dataset.
Args:
dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
Returns:
list[dict]: the same format, but filtered.
"""
num_before = len(dataset_dicts)
def valid(anns):
for ann in anns:
if ann.get("iscrowd", 0) == 0:
return True
return False
dataset_dicts = [x for x in dataset_dicts if valid(x["annotations"])]
num_after = len(dataset_dicts)
logger = logging.getLogger(__name__)
logger.info(
"Removed {} images with no usable annotations. {} images left.".format(
num_before - num_after, num_after
)
)
return dataset_dicts
def filter_images_with_few_keypoints(dataset_dicts, min_keypoints_per_image):
"""
Filter out images with too few number of keypoints.
Args:
dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
Returns:
list[dict]: the same format as dataset_dicts, but filtered.
"""
num_before = len(dataset_dicts)
def visible_keypoints_in_image(dic):
# Each keypoints field has the format [x1, y1, v1, ...], where v is visibility
annotations = dic["annotations"]
return sum(
(np.array(ann["keypoints"][2::3]) > 0).sum()
for ann in annotations
if "keypoints" in ann
)
dataset_dicts = [
x for x in dataset_dicts if visible_keypoints_in_image(x) >= min_keypoints_per_image
]
num_after = len(dataset_dicts)
logger = logging.getLogger(__name__)
logger.info(
"Removed {} images with fewer than {} keypoints.".format(
num_before - num_after, min_keypoints_per_image
)
)
return dataset_dicts
def load_proposals_into_dataset(dataset_dicts, proposal_file):
"""
Load precomputed object proposals into the dataset.
The proposal file should be a pickled dict with the following keys:
- "ids": list[int] or list[str], the image ids
- "boxes": list[np.ndarray], each is an Nx4 array of boxes corresponding to the image id
- "objectness_logits": list[np.ndarray], each is an N sized array of objectness scores
corresponding to the boxes.
- "bbox_mode": the BoxMode of the boxes array. Defaults to ``BoxMode.XYXY_ABS``.
Args:
dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
proposal_file (str): file path of pre-computed proposals, in pkl format.
Returns:
list[dict]: the same format as dataset_dicts, but added proposal field.
"""
logger = logging.getLogger(__name__)
logger.info("Loading proposals from: {}".format(proposal_file))
with PathManager.open(proposal_file, "rb") as f:
proposals = pickle.load(f, encoding="latin1")
# Rename the key names in D1 proposal files
rename_keys = {"indexes": "ids", "scores": "objectness_logits"}
for key in rename_keys:
if key in proposals:
proposals[rename_keys[key]] = proposals.pop(key)
# Fetch the indexes of all proposals that are in the dataset
# Convert image_id to str since they could be int.
img_ids = set({str(record["image_id"]) for record in dataset_dicts})
id_to_index = {str(id): i for i, id in enumerate(proposals["ids"]) if str(id) in img_ids}
# Assuming default bbox_mode of precomputed proposals are 'XYXY_ABS'
bbox_mode = BoxMode(proposals["bbox_mode"]) if "bbox_mode" in proposals else BoxMode.XYXY_ABS
for record in dataset_dicts:
# Get the index of the proposal
i = id_to_index[str(record["image_id"])]
boxes = proposals["boxes"][i]
objectness_logits = proposals["objectness_logits"][i]
# Sort the proposals in descending order of the scores
inds = objectness_logits.argsort()[::-1]
record["proposal_boxes"] = boxes[inds]
record["proposal_objectness_logits"] = objectness_logits[inds]
record["proposal_bbox_mode"] = bbox_mode
return dataset_dicts
def print_instances_class_histogram(dataset_dicts, class_names):
"""
Args:
dataset_dicts (list[dict]): list of dataset dicts.
class_names (list[str]): list of class names (zero-indexed).
"""
num_classes = len(class_names)
hist_bins = np.arange(num_classes + 1)
histogram = np.zeros((num_classes,), dtype=int)
for entry in dataset_dicts:
annos = entry["annotations"]
classes = np.asarray(
[x["category_id"] for x in annos if not x.get("iscrowd", 0)], dtype=int
)
if len(classes):
assert classes.min() >= 0, f"Got an invalid category_id={classes.min()}"
assert (
classes.max() < num_classes
), f"Got an invalid category_id={classes.max()} for a dataset of {num_classes} classes"
histogram += np.histogram(classes, bins=hist_bins)[0]
N_COLS = min(6, len(class_names) * 2)
def short_name(x):
# make long class names shorter. useful for lvis
if len(x) > 13:
return x[:11] + ".."
return x
data = list(
itertools.chain(*[[short_name(class_names[i]), int(v)] for i, v in enumerate(histogram)])
)
total_num_instances = sum(data[1::2])
data.extend([None] * (N_COLS - (len(data) % N_COLS)))
if num_classes > 1:
data.extend(["total", total_num_instances])
data = itertools.zip_longest(*[data[i::N_COLS] for i in range(N_COLS)])
table = tabulate(
data,
headers=["category", "#instances"] * (N_COLS // 2),
tablefmt="pipe",
numalign="left",
stralign="center",
)
log_first_n(
logging.INFO,
"Distribution of instances among all {} categories:\n".format(num_classes)
+ colored(table, "cyan"),
key="message",
)
def get_detection_dataset_dicts(
names,
filter_empty=True,
min_keypoints=0,
proposal_files=None,
check_consistency=True,
):
"""
Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation.
Args:
names (str or list[str]): a dataset name or a list of dataset names
filter_empty (bool): whether to filter out images without instance annotations
min_keypoints (int): filter out images with fewer keypoints than
`min_keypoints`. Set to 0 to do nothing.
proposal_files (list[str]): if given, a list of object proposal files
that match each dataset in `names`.
check_consistency (bool): whether to check if datasets have consistent metadata.
Returns:
list[dict]: a list of dicts following the standard dataset dict format.
"""
if isinstance(names, str):
names = [names]
assert len(names), names
available_datasets = DatasetCatalog.keys()
names_set = set(names)
if not names_set.issubset(available_datasets):
logger = logging.getLogger(__name__)
logger.warning(
"The following dataset names are not registered in the DatasetCatalog: "
f"{names_set - available_datasets}. "
f"Available datasets are {available_datasets}"
)
dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in names]
if isinstance(dataset_dicts[0], torchdata.Dataset):
if len(dataset_dicts) > 1:
# ConcatDataset does not work for iterable style dataset.
# We could support concat for iterable as well, but it's often
# not a good idea to concat iterables anyway.
return torchdata.ConcatDataset(dataset_dicts)
return dataset_dicts[0]
for dataset_name, dicts in zip(names, dataset_dicts):
assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)
if proposal_files is not None:
assert len(names) == len(proposal_files)
# load precomputed proposals from proposal files
dataset_dicts = [
load_proposals_into_dataset(dataset_i_dicts, proposal_file)
for dataset_i_dicts, proposal_file in zip(dataset_dicts, proposal_files)
]
dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))
has_instances = "annotations" in dataset_dicts[0]
if filter_empty and has_instances:
dataset_dicts = filter_images_with_only_crowd_annotations(dataset_dicts)
if min_keypoints > 0 and has_instances:
dataset_dicts = filter_images_with_few_keypoints(dataset_dicts, min_keypoints)
if check_consistency and has_instances:
try:
class_names = MetadataCatalog.get(names[0]).thing_classes
check_metadata_consistency("thing_classes", names)
print_instances_class_histogram(dataset_dicts, class_names)
except AttributeError: # class names are not available for this dataset
pass
assert len(dataset_dicts), "No valid data found in {}.".format(",".join(names))
return dataset_dicts
def build_batch_data_loader(
dataset,
sampler,
total_batch_size,
*,
aspect_ratio_grouping=False,
num_workers=0,
collate_fn=None,
drop_last: bool = True,
single_gpu_batch_size=None,
prefetch_factor=2,
persistent_workers=False,
pin_memory=False,
seed=None,
**kwargs,
):
"""
Build a batched dataloader. The main differences from `torch.utils.data.DataLoader` are:
1. support aspect ratio grouping options
2. use no "batch collation", because this is common for detection training
Args:
dataset (torch.utils.data.Dataset): a pytorch map-style or iterable dataset.
sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces indices.
Must be provided iff. ``dataset`` is a map-style dataset.
total_batch_size, aspect_ratio_grouping, num_workers, collate_fn: see
:func:`build_detection_train_loader`.
single_gpu_batch_size: You can specify either `single_gpu_batch_size` or `total_batch_size`.
`single_gpu_batch_size` specifies the batch size that will be used for each gpu/process.
`total_batch_size` allows you to specify the total aggregate batch size across gpus.
It is an error to supply a value for both.
drop_last (bool): if ``True``, the dataloader will drop incomplete batches.
Returns:
iterable[list]. Length of each list is the batch size of the current
GPU. Each element in the list comes from the dataset.
"""
if single_gpu_batch_size:
if total_batch_size:
raise ValueError(
"""total_batch_size and single_gpu_batch_size are mutually incompatible.
Please specify only one. """
)
batch_size = single_gpu_batch_size
else:
world_size = get_world_size()
assert (
total_batch_size > 0 and total_batch_size % world_size == 0
), "Total batch size ({}) must be divisible by the number of gpus ({}).".format(
total_batch_size, world_size
)
batch_size = total_batch_size // world_size
logger = logging.getLogger(__name__)
logger.info("Making batched data loader with batch_size=%d", batch_size)
if isinstance(dataset, torchdata.IterableDataset):
assert sampler is None, "sampler must be None if dataset is IterableDataset"
else:
dataset = ToIterableDataset(dataset, sampler, shard_chunk_size=batch_size)
generator = None
if seed is not None:
generator = torch.Generator()
generator.manual_seed(seed)
if aspect_ratio_grouping:
assert drop_last, "Aspect ratio grouping will drop incomplete batches."
data_loader = torchdata.DataLoader(
dataset,
num_workers=num_workers,
collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements
worker_init_fn=worker_init_reset_seed,
prefetch_factor=prefetch_factor if num_workers > 0 else None,
persistent_workers=persistent_workers,
pin_memory=pin_memory,
generator=generator,
**kwargs,
) # yield individual mapped dict
data_loader = AspectRatioGroupedDataset(data_loader, batch_size)
if collate_fn is None:
return data_loader
return MapDataset(data_loader, collate_fn)
else:
return torchdata.DataLoader(
dataset,
batch_size=batch_size,
drop_last=drop_last,
num_workers=num_workers,
collate_fn=trivial_batch_collator if collate_fn is None else collate_fn,
worker_init_fn=worker_init_reset_seed,
prefetch_factor=prefetch_factor if num_workers > 0 else None,
persistent_workers=persistent_workers,
pin_memory=pin_memory,
generator=generator,
**kwargs,
)
def _get_train_datasets_repeat_factors(cfg) -> Dict[str, float]:
repeat_factors = cfg.DATASETS.TRAIN_REPEAT_FACTOR
assert all(len(tup) == 2 for tup in repeat_factors)
name_to_weight = defaultdict(lambda: 1, dict(repeat_factors))
# The sampling weights map should only contain datasets in train config
unrecognized = set(name_to_weight.keys()) - set(cfg.DATASETS.TRAIN)
assert not unrecognized, f"unrecognized datasets: {unrecognized}"
logger = logging.getLogger(__name__)
logger.info(f"Found repeat factors: {list(name_to_weight.items())}")
# pyre-fixme[7]: Expected `Dict[str, float]` but got `DefaultDict[typing.Any, int]`.
return name_to_weight
def _build_weighted_sampler(cfg, enable_category_balance=False):
dataset_repeat_factors = _get_train_datasets_repeat_factors(cfg)
# OrderedDict to guarantee order of values() consistent with repeat factors
dataset_name_to_dicts = OrderedDict(
{
name: get_detection_dataset_dicts(
[name],
filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
min_keypoints=(
cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
if cfg.MODEL.KEYPOINT_ON
else 0
),
proposal_files=(
cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None
),
)
for name in cfg.DATASETS.TRAIN
}
)
# Repeat factor for every sample in the dataset
repeat_factors = [
[dataset_repeat_factors[dsname]] * len(dataset_name_to_dicts[dsname])
for dsname in cfg.DATASETS.TRAIN
]
repeat_factors = list(itertools.chain.from_iterable(repeat_factors))
repeat_factors = torch.tensor(repeat_factors)
logger = logging.getLogger(__name__)
if enable_category_balance:
"""
1. Calculate repeat factors using category frequency for each dataset and then merge them.
2. Element wise dot producting the dataset frequency repeat factors with
the category frequency repeat factors gives the final repeat factors.
"""
category_repeat_factors = [
RepeatFactorTrainingSampler.repeat_factors_from_category_frequency(
dataset_dict, cfg.DATALOADER.REPEAT_THRESHOLD, sqrt=cfg.DATALOADER.REPEAT_SQRT
)
for dataset_dict in dataset_name_to_dicts.values()
]
# flatten the category repeat factors from all datasets
category_repeat_factors = list(itertools.chain.from_iterable(category_repeat_factors))
category_repeat_factors = torch.tensor(category_repeat_factors)
repeat_factors = torch.mul(category_repeat_factors, repeat_factors)
repeat_factors = repeat_factors / torch.min(repeat_factors)
logger.info(
"Using WeightedCategoryTrainingSampler with repeat_factors={}".format(
cfg.DATASETS.TRAIN_REPEAT_FACTOR
)
)
else:
logger.info(
"Using WeightedTrainingSampler with repeat_factors={}".format(
cfg.DATASETS.TRAIN_REPEAT_FACTOR
)
)
sampler = RepeatFactorTrainingSampler(repeat_factors)
return sampler
def _train_loader_from_config(cfg, mapper=None, *, dataset=None, sampler=None):
if dataset is None:
dataset = get_detection_dataset_dicts(
cfg.DATASETS.TRAIN,
filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
min_keypoints=(
cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0
),
proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
)
_log_api_usage("dataset." + cfg.DATASETS.TRAIN[0])
if mapper is None:
mapper = DatasetMapper(cfg, True)
if sampler is None:
sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
logger = logging.getLogger(__name__)
if isinstance(dataset, torchdata.IterableDataset):
logger.info("Not using any sampler since the dataset is IterableDataset.")
sampler = None
else:
logger.info("Using training sampler {}".format(sampler_name))
if sampler_name == "TrainingSampler":
sampler = TrainingSampler(len(dataset))
elif sampler_name == "RepeatFactorTrainingSampler":
repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency(
dataset, cfg.DATALOADER.REPEAT_THRESHOLD, sqrt=cfg.DATALOADER.REPEAT_SQRT
)
sampler = RepeatFactorTrainingSampler(repeat_factors, seed=cfg.SEED)
elif sampler_name == "RandomSubsetTrainingSampler":
sampler = RandomSubsetTrainingSampler(
len(dataset), cfg.DATALOADER.RANDOM_SUBSET_RATIO
)
elif sampler_name == "WeightedTrainingSampler":
sampler = _build_weighted_sampler(cfg)
elif sampler_name == "WeightedCategoryTrainingSampler":
sampler = _build_weighted_sampler(cfg, enable_category_balance=True)
else:
raise ValueError("Unknown training sampler: {}".format(sampler_name))
return {
"dataset": dataset,
"sampler": sampler,
"mapper": mapper,
"total_batch_size": cfg.SOLVER.IMS_PER_BATCH,
"aspect_ratio_grouping": cfg.DATALOADER.ASPECT_RATIO_GROUPING,
"num_workers": cfg.DATALOADER.NUM_WORKERS,
}
@configurable(from_config=_train_loader_from_config)
def build_detection_train_loader(
dataset,
*,
mapper,
sampler=None,
total_batch_size,
aspect_ratio_grouping=True,
num_workers=0,
collate_fn=None,
**kwargs,
):
"""
Build a dataloader for object detection with some default features.
Args:
dataset (list or torch.utils.data.Dataset): a list of dataset dicts,
or a pytorch dataset (either map-style or iterable). It can be obtained
by using :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
mapper (callable): a callable which takes a sample (dict) from dataset and
returns the format to be consumed by the model.
When using cfg, the default choice is ``DatasetMapper(cfg, is_train=True)``.
sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces
indices to be applied on ``dataset``.
If ``dataset`` is map-style, the default sampler is a :class:`TrainingSampler`,
which coordinates an infinite random shuffle sequence across all workers.
Sampler must be None if ``dataset`` is iterable.
total_batch_size (int): total batch size across all workers.
aspect_ratio_grouping (bool): whether to group images with similar
aspect ratio for efficiency. When enabled, it requires each
element in dataset be a dict with keys "width" and "height".
num_workers (int): number of parallel data loading workers
collate_fn: a function that determines how to do batching, same as the argument of
`torch.utils.data.DataLoader`. Defaults to do no collation and return a list of
data. No collation is OK for small batch size and simple data structures.
If your batch size is large and each sample contains too many small tensors,
it's more efficient to collate them in data loader.
Returns:
torch.utils.data.DataLoader:
a dataloader. Each output from it is a ``list[mapped_element]`` of length
``total_batch_size / num_workers``, where ``mapped_element`` is produced
by the ``mapper``.
"""
if isinstance(dataset, list):
dataset = DatasetFromList(dataset, copy=False)
if mapper is not None:
dataset = MapDataset(dataset, mapper)
if isinstance(dataset, torchdata.IterableDataset):
assert sampler is None, "sampler must be None if dataset is IterableDataset"
else:
if sampler is None:
sampler = TrainingSampler(len(dataset))
assert isinstance(sampler, torchdata.Sampler), f"Expect a Sampler but got {type(sampler)}"
return build_batch_data_loader(
dataset,
sampler,
total_batch_size,
aspect_ratio_grouping=aspect_ratio_grouping,
num_workers=num_workers,
collate_fn=collate_fn,
**kwargs,
)
def _test_loader_from_config(cfg, dataset_name, mapper=None):
"""
Uses the given `dataset_name` argument (instead of the names in cfg), because the
standard practice is to evaluate each test set individually (not combining them).
"""
if isinstance(dataset_name, str):
dataset_name = [dataset_name]
dataset = get_detection_dataset_dicts(
dataset_name,
filter_empty=False,
proposal_files=(
[
cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(x)]
for x in dataset_name
]
if cfg.MODEL.LOAD_PROPOSALS
else None
),
)
if mapper is None:
mapper = DatasetMapper(cfg, False)
return {
"dataset": dataset,
"mapper": mapper,
"num_workers": cfg.DATALOADER.NUM_WORKERS,
"sampler": (
InferenceSampler(len(dataset))
if not isinstance(dataset, torchdata.IterableDataset)
else None
),
}
@configurable(from_config=_test_loader_from_config)
def build_detection_test_loader(
dataset: Union[List[Any], torchdata.Dataset],
*,
mapper: Callable[[Dict[str, Any]], Any],
sampler: Optional[torchdata.Sampler] = None,
batch_size: int = 1,
num_workers: int = 0,
collate_fn: Optional[Callable[[List[Any]], Any]] = None,
) -> torchdata.DataLoader:
"""
Similar to `build_detection_train_loader`, with default batch size = 1,
and sampler = :class:`InferenceSampler`. This sampler coordinates all workers
to produce the exact set of all samples.
Args:
dataset: a list of dataset dicts,
or a pytorch dataset (either map-style or iterable). They can be obtained
by using :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
mapper: a callable which takes a sample (dict) from dataset
and returns the format to be consumed by the model.
When using cfg, the default choice is ``DatasetMapper(cfg, is_train=False)``.
sampler: a sampler that produces
indices to be applied on ``dataset``. Default to :class:`InferenceSampler`,
which splits the dataset across all workers. Sampler must be None
if `dataset` is iterable.
batch_size: the batch size of the data loader to be created.
Default to 1 image per worker since this is the standard when reporting
inference time in papers.
num_workers: number of parallel data loading workers
collate_fn: same as the argument of `torch.utils.data.DataLoader`.
Defaults to do no collation and return a list of data.
Returns:
DataLoader: a torch DataLoader, that loads the given detection
dataset, with test-time transformation and batching.
Examples:
::
data_loader = build_detection_test_loader(
DatasetRegistry.get("my_test"),
mapper=DatasetMapper(...))
# or, instantiate with a CfgNode:
data_loader = build_detection_test_loader(cfg, "my_test")
"""
if isinstance(dataset, list):
dataset = DatasetFromList(dataset, copy=False)
if mapper is not None:
dataset = MapDataset(dataset, mapper)
if isinstance(dataset, torchdata.IterableDataset):
assert sampler is None, "sampler must be None if dataset is IterableDataset"
else:
if sampler is None:
sampler = InferenceSampler(len(dataset))
return torchdata.DataLoader(
dataset,
batch_size=batch_size,
sampler=sampler,
drop_last=False,
num_workers=num_workers,
collate_fn=trivial_batch_collator if collate_fn is None else collate_fn,
)
def trivial_batch_collator(batch):
"""
A batch collator that does nothing.
"""
return batch
def worker_init_reset_seed(worker_id):
initial_seed = torch.initial_seed() % 2**31
seed_all_rng(initial_seed + worker_id)
# Copyright (c) Facebook, Inc. and its affiliates.
import copy
import logging
import types
from collections import UserDict
from typing import List
from detectron2.utils.logger import log_first_n
__all__ = ["DatasetCatalog", "MetadataCatalog", "Metadata"]
class _DatasetCatalog(UserDict):
"""
A global dictionary that stores information about the datasets and how to obtain them.
It contains a mapping from strings
(which are names that identify a dataset, e.g. "coco_2014_train")
to a function which parses the dataset and returns the samples in the
format of `list[dict]`.
The returned dicts should be in Detectron2 Dataset format (See DATASETS.md for details)
if used with the data loader functionalities in `data/build.py,data/detection_transform.py`.
The purpose of having this catalog is to make it easy to choose
different datasets, by just using the strings in the config.
"""
def register(self, name, func):
"""
Args:
name (str): the name that identifies a dataset, e.g. "coco_2014_train".
func (callable): a callable which takes no arguments and returns a list of dicts.
It must return the same results if called multiple times.
"""
assert callable(func), "You must register a function with `DatasetCatalog.register`!"
assert name not in self, "Dataset '{}' is already registered!".format(name)
self[name] = func
def get(self, name):
"""
Call the registered function and return its results.
Args:
name (str): the name that identifies a dataset, e.g. "coco_2014_train".
Returns:
list[dict]: dataset annotations.
"""
try:
f = self[name]
except KeyError as e:
raise KeyError(
"Dataset '{}' is not registered! Available datasets are: {}".format(
name, ", ".join(list(self.keys()))
)
) from e
return f()
def list(self) -> List[str]:
"""
List all registered datasets.
Returns:
list[str]
"""
return list(self.keys())
def remove(self, name):
"""
Alias of ``pop``.
"""
self.pop(name)
def __str__(self):
return "DatasetCatalog(registered datasets: {})".format(", ".join(self.keys()))
__repr__ = __str__
DatasetCatalog = _DatasetCatalog()
DatasetCatalog.__doc__ = (
_DatasetCatalog.__doc__
+ """
.. automethod:: detectron2.data.catalog.DatasetCatalog.register
.. automethod:: detectron2.data.catalog.DatasetCatalog.get
"""
)
class Metadata(types.SimpleNamespace):
"""
A class that supports simple attribute setter/getter.
It is intended for storing metadata of a dataset and make it accessible globally.
Examples:
::
# somewhere when you load the data:
MetadataCatalog.get("mydataset").thing_classes = ["person", "dog"]
# somewhere when you print statistics or visualize:
classes = MetadataCatalog.get("mydataset").thing_classes
"""
# the name of the dataset
# set default to N/A so that `self.name` in the errors will not trigger getattr again
name: str = "N/A"
_RENAMED = {
"class_names": "thing_classes",
"dataset_id_to_contiguous_id": "thing_dataset_id_to_contiguous_id",
"stuff_class_names": "stuff_classes",
}
def __getattr__(self, key):
if key in self._RENAMED:
log_first_n(
logging.WARNING,
"Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]),
n=10,
)
return getattr(self, self._RENAMED[key])
# "name" exists in every metadata
if len(self.__dict__) > 1:
raise AttributeError(
"Attribute '{}' does not exist in the metadata of dataset '{}'. Available "
"keys are {}.".format(key, self.name, str(self.__dict__.keys()))
)
else:
raise AttributeError(
f"Attribute '{key}' does not exist in the metadata of dataset '{self.name}': "
"metadata is empty."
)
def __setattr__(self, key, val):
if key in self._RENAMED:
log_first_n(
logging.WARNING,
"Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]),
n=10,
)
setattr(self, self._RENAMED[key], val)
# Ensure that metadata of the same name stays consistent
try:
oldval = getattr(self, key)
assert oldval == val, (
"Attribute '{}' in the metadata of '{}' cannot be set "
"to a different value!\n{} != {}".format(key, self.name, oldval, val)
)
except AttributeError:
super().__setattr__(key, val)
def as_dict(self):
"""
Returns all the metadata as a dict.
Note that modifications to the returned dict will not reflect on the Metadata object.
"""
return copy.copy(self.__dict__)
def set(self, **kwargs):
"""
Set multiple metadata with kwargs.
"""
for k, v in kwargs.items():
setattr(self, k, v)
return self
def get(self, key, default=None):
"""
Access an attribute and return its value if exists.
Otherwise return default.
"""
try:
return getattr(self, key)
except AttributeError:
return default
class _MetadataCatalog(UserDict):
"""
MetadataCatalog is a global dictionary that provides access to
:class:`Metadata` of a given dataset.
The metadata associated with a certain name is a singleton: once created, the
metadata will stay alive and will be returned by future calls to ``get(name)``.
It's like global variables, so don't abuse it.
It's meant for storing knowledge that's constant and shared across the execution
of the program, e.g.: the class names in COCO.
"""
def get(self, name):
"""
Args:
name (str): name of a dataset (e.g. coco_2014_train).
Returns:
Metadata: The :class:`Metadata` instance associated with this name,
or create an empty one if none is available.
"""
assert len(name)
r = super().get(name, None)
if r is None:
r = self[name] = Metadata(name=name)
return r
def list(self):
"""
List all registered metadata.
Returns:
list[str]: keys (names of datasets) of all registered metadata
"""
return list(self.keys())
def remove(self, name):
"""
Alias of ``pop``.
"""
self.pop(name)
def __str__(self):
return "MetadataCatalog(registered metadata: {})".format(", ".join(self.keys()))
__repr__ = __str__
MetadataCatalog = _MetadataCatalog()
MetadataCatalog.__doc__ = (
_MetadataCatalog.__doc__
+ """
.. automethod:: detectron2.data.catalog.MetadataCatalog.get
"""
)
# Copyright (c) Facebook, Inc. and its affiliates.
import contextlib
import copy
import itertools
import logging
import numpy as np
import pickle
import random
from typing import Callable, Union
import torch
import torch.utils.data as data
from torch.utils.data.sampler import Sampler
from detectron2.utils.serialize import PicklableWrapper
__all__ = ["MapDataset", "DatasetFromList", "AspectRatioGroupedDataset", "ToIterableDataset"]
logger = logging.getLogger(__name__)
# copied from: https://docs.python.org/3/library/itertools.html#recipes
def _roundrobin(*iterables):
"roundrobin('ABC', 'D', 'EF') --> A D E B F C"
# Recipe credited to George Sakkis
num_active = len(iterables)
nexts = itertools.cycle(iter(it).__next__ for it in iterables)
while num_active:
try:
for next in nexts:
yield next()
except StopIteration:
# Remove the iterator we just exhausted from the cycle.
num_active -= 1
nexts = itertools.cycle(itertools.islice(nexts, num_active))
def _shard_iterator_dataloader_worker(iterable, chunk_size=1):
# Shard the iterable if we're currently inside pytorch dataloader worker.
worker_info = data.get_worker_info()
if worker_info is None or worker_info.num_workers == 1:
# do nothing
yield from iterable
else:
# worker0: 0, 1, ..., chunk_size-1, num_workers*chunk_size, num_workers*chunk_size+1, ...
# worker1: chunk_size, chunk_size+1, ...
# worker2: 2*chunk_size, 2*chunk_size+1, ...
# ...
yield from _roundrobin(
*[
itertools.islice(
iterable,
worker_info.id * chunk_size + chunk_i,
None,
worker_info.num_workers * chunk_size,
)
for chunk_i in range(chunk_size)
]
)
class _MapIterableDataset(data.IterableDataset):
"""
Map a function over elements in an IterableDataset.
Similar to pytorch's MapIterDataPipe, but support filtering when map_func
returns None.
This class is not public-facing. Will be called by `MapDataset`.
"""
def __init__(self, dataset, map_func):
self._dataset = dataset
self._map_func = PicklableWrapper(map_func) # wrap so that a lambda will work
def __len__(self):
return len(self._dataset)
def __iter__(self):
for x in map(self._map_func, self._dataset):
if x is not None:
yield x
class MapDataset(data.Dataset):
"""
Map a function over the elements in a dataset.
"""
def __init__(self, dataset, map_func):
"""
Args:
dataset: a dataset where map function is applied. Can be either
map-style or iterable dataset. When given an iterable dataset,
the returned object will also be an iterable dataset.
map_func: a callable which maps the element in dataset. map_func can
return None to skip the data (e.g. in case of errors).
How None is handled depends on the style of `dataset`.
If `dataset` is map-style, it randomly tries other elements.
If `dataset` is iterable, it skips the data and tries the next.
"""
self._dataset = dataset
self._map_func = PicklableWrapper(map_func) # wrap so that a lambda will work
self._rng = random.Random(42)
self._fallback_candidates = set(range(len(dataset)))
def __new__(cls, dataset, map_func):
is_iterable = isinstance(dataset, data.IterableDataset)
if is_iterable:
return _MapIterableDataset(dataset, map_func)
else:
return super().__new__(cls)
def __getnewargs__(self):
return self._dataset, self._map_func
def __len__(self):
return len(self._dataset)
def __getitem__(self, idx):
retry_count = 0
cur_idx = int(idx)
while True:
data = self._map_func(self._dataset[cur_idx])
if data is not None:
self._fallback_candidates.add(cur_idx)
return data
# _map_func fails for this idx, use a random new index from the pool
retry_count += 1
self._fallback_candidates.discard(cur_idx)
cur_idx = self._rng.sample(self._fallback_candidates, k=1)[0]
if retry_count >= 3:
logger = logging.getLogger(__name__)
logger.warning(
"Failed to apply `_map_func` for idx: {}, retry count: {}".format(
idx, retry_count
)
)
class _TorchSerializedList:
"""
A list-like object whose items are serialized and stored in a torch tensor. When
launching a process that uses TorchSerializedList with "fork" start method,
the subprocess can read the same buffer without triggering copy-on-access. When
launching a process that uses TorchSerializedList with "spawn/forkserver" start
method, the list will be pickled by a special ForkingPickler registered by PyTorch
that moves data to shared memory. In both cases, this allows parent and child
processes to share RAM for the list data, hence avoids the issue in
https://github.com/pytorch/pytorch/issues/13246.
See also https://ppwwyyxx.com/blog/2022/Demystify-RAM-Usage-in-Multiprocess-DataLoader/
on how it works.
"""
def __init__(self, lst: list):
self._lst = lst
def _serialize(data):
buffer = pickle.dumps(data, protocol=-1)
return np.frombuffer(buffer, dtype=np.uint8)
logger.info(
"Serializing {} elements to byte tensors and concatenating them all ...".format(
len(self._lst)
)
)
self._lst = [_serialize(x) for x in self._lst]
self._addr = np.asarray([len(x) for x in self._lst], dtype=np.int64)
self._addr = torch.from_numpy(np.cumsum(self._addr))
self._lst = torch.from_numpy(np.concatenate(self._lst))
logger.info("Serialized dataset takes {:.2f} MiB".format(len(self._lst) / 1024**2))
def __len__(self):
return len(self._addr)
def __getitem__(self, idx):
start_addr = 0 if idx == 0 else self._addr[idx - 1].item()
end_addr = self._addr[idx].item()
bytes = memoryview(self._lst[start_addr:end_addr].numpy())
# @lint-ignore PYTHONPICKLEISBAD
return pickle.loads(bytes)
_DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD = _TorchSerializedList
@contextlib.contextmanager
def set_default_dataset_from_list_serialize_method(new):
"""
Context manager for using custom serialize function when creating DatasetFromList
"""
global _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD
orig = _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD
_DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD = new
yield
_DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD = orig
class DatasetFromList(data.Dataset):
"""
Wrap a list to a torch Dataset. It produces elements of the list as data.
"""
def __init__(
self,
lst: list,
copy: bool = True,
serialize: Union[bool, Callable] = True,
):
"""
Args:
lst (list): a list which contains elements to produce.
copy (bool): whether to deepcopy the element when producing it,
so that the result can be modified in place without affecting the
source in the list.
serialize (bool or callable): whether to serialize the stroage to other
backend. If `True`, the default serialize method will be used, if given
a callable, the callable will be used as serialize method.
"""
self._lst = lst
self._copy = copy
if not isinstance(serialize, (bool, Callable)):
raise TypeError(f"Unsupported type for argument `serailzie`: {serialize}")
self._serialize = serialize is not False
if self._serialize:
serialize_method = (
serialize
if isinstance(serialize, Callable)
else _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD
)
logger.info(f"Serializing the dataset using: {serialize_method}")
self._lst = serialize_method(self._lst)
def __len__(self):
return len(self._lst)
def __getitem__(self, idx):
if self._copy and not self._serialize:
return copy.deepcopy(self._lst[idx])
else:
return self._lst[idx]
class ToIterableDataset(data.IterableDataset):
"""
Convert an old indices-based (also called map-style) dataset
to an iterable-style dataset.
"""
def __init__(
self,
dataset: data.Dataset,
sampler: Sampler,
shard_sampler: bool = True,
shard_chunk_size: int = 1,
):
"""
Args:
dataset: an old-style dataset with ``__getitem__``
sampler: a cheap iterable that produces indices to be applied on ``dataset``.
shard_sampler: whether to shard the sampler based on the current pytorch data loader
worker id. When an IterableDataset is forked by pytorch's DataLoader into multiple
workers, it is responsible for sharding its data based on worker id so that workers
don't produce identical data.
Most samplers (like our TrainingSampler) do not shard based on dataloader worker id
and this argument should be set to True. But certain samplers may be already
sharded, in that case this argument should be set to False.
shard_chunk_size: when sharding the sampler, each worker will
"""
assert not isinstance(dataset, data.IterableDataset), dataset
assert isinstance(sampler, Sampler), sampler
self.dataset = dataset
self.sampler = sampler
self.shard_sampler = shard_sampler
self.shard_chunk_size = shard_chunk_size
def __iter__(self):
if not self.shard_sampler:
sampler = self.sampler
else:
# With map-style dataset, `DataLoader(dataset, sampler)` runs the
# sampler in main process only. But `DataLoader(ToIterableDataset(dataset, sampler))`
# will run sampler in every of the N worker. So we should only keep 1/N of the ids on
# each worker. The assumption is that sampler is cheap to iterate so it's fine to
# discard ids in workers.
sampler = _shard_iterator_dataloader_worker(self.sampler, self.shard_chunk_size)
for idx in sampler:
yield self.dataset[idx]
def __len__(self):
return len(self.sampler)
class AspectRatioGroupedDataset(data.IterableDataset):
"""
Batch data that have similar aspect ratio together.
In this implementation, images whose aspect ratio < (or >) 1 will
be batched together.
This improves training speed because the images then need less padding
to form a batch.
It assumes the underlying dataset produces dicts with "width" and "height" keys.
It will then produce a list of original dicts with length = batch_size,
all with similar aspect ratios.
"""
def __init__(self, dataset, batch_size):
"""
Args:
dataset: an iterable. Each element must be a dict with keys
"width" and "height", which will be used to batch data.
batch_size (int):
"""
self.dataset = dataset
self.batch_size = batch_size
self._buckets = [[] for _ in range(2)]
# Hard-coded two aspect ratio groups: w > h and w < h.
# Can add support for more aspect ratio groups, but doesn't seem useful
def __iter__(self):
for d in self.dataset:
w, h = d["width"], d["height"]
bucket_id = 0 if w > h else 1
bucket = self._buckets[bucket_id]
bucket.append(d)
if len(bucket) == self.batch_size:
data = bucket[:]
# Clear bucket first, because code after yield is not
# guaranteed to execute
del bucket[:]
yield data
# Copyright (c) Facebook, Inc. and its affiliates.
import copy
import logging
import numpy as np
from typing import List, Optional, Union
import torch
from detectron2.config import configurable
from . import detection_utils as utils
from . import transforms as T
"""
This file contains the default mapping that's applied to "dataset dicts".
"""
__all__ = ["DatasetMapper"]
class DatasetMapper:
"""
A callable which takes a dataset dict in Detectron2 Dataset format,
and map it into a format used by the model.
This is the default callable to be used to map your dataset dict into training data.
You may need to follow it to implement your own one for customized logic,
such as a different way to read or transform images.
See :doc:`/tutorials/data_loading` for details.
The callable currently does the following:
1. Read the image from "file_name"
2. Applies cropping/geometric transforms to the image and annotations
3. Prepare data and annotations to Tensor and :class:`Instances`
"""
@configurable
def __init__(
self,
is_train: bool,
*,
augmentations: List[Union[T.Augmentation, T.Transform]],
image_format: str,
use_instance_mask: bool = False,
use_keypoint: bool = False,
instance_mask_format: str = "polygon",
keypoint_hflip_indices: Optional[np.ndarray] = None,
precomputed_proposal_topk: Optional[int] = None,
recompute_boxes: bool = False,
):
"""
NOTE: this interface is experimental.
Args:
is_train: whether it's used in training or inference
augmentations: a list of augmentations or deterministic transforms to apply
image_format: an image format supported by :func:`detection_utils.read_image`.
use_instance_mask: whether to process instance segmentation annotations, if available
use_keypoint: whether to process keypoint annotations if available
instance_mask_format: one of "polygon" or "bitmask". Process instance segmentation
masks into this format.
keypoint_hflip_indices: see :func:`detection_utils.create_keypoint_hflip_indices`
precomputed_proposal_topk: if given, will load pre-computed
proposals from dataset_dict and keep the top k proposals for each image.
recompute_boxes: whether to overwrite bounding box annotations
by computing tight bounding boxes from instance mask annotations.
"""
if recompute_boxes:
assert use_instance_mask, "recompute_boxes requires instance masks"
# fmt: off
self.is_train = is_train
self.augmentations = T.AugmentationList(augmentations)
self.image_format = image_format
self.use_instance_mask = use_instance_mask
self.instance_mask_format = instance_mask_format
self.use_keypoint = use_keypoint
self.keypoint_hflip_indices = keypoint_hflip_indices
self.proposal_topk = precomputed_proposal_topk
self.recompute_boxes = recompute_boxes
# fmt: on
logger = logging.getLogger(__name__)
mode = "training" if is_train else "inference"
logger.info(f"[DatasetMapper] Augmentations used in {mode}: {augmentations}")
@classmethod
def from_config(cls, cfg, is_train: bool = True):
augs = utils.build_augmentation(cfg, is_train)
if cfg.INPUT.CROP.ENABLED and is_train:
augs.insert(0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE))
recompute_boxes = cfg.MODEL.MASK_ON
else:
recompute_boxes = False
ret = {
"is_train": is_train,
"augmentations": augs,
"image_format": cfg.INPUT.FORMAT,
"use_instance_mask": cfg.MODEL.MASK_ON,
"instance_mask_format": cfg.INPUT.MASK_FORMAT,
"use_keypoint": cfg.MODEL.KEYPOINT_ON,
"recompute_boxes": recompute_boxes,
}
if cfg.MODEL.KEYPOINT_ON:
ret["keypoint_hflip_indices"] = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
if cfg.MODEL.LOAD_PROPOSALS:
ret["precomputed_proposal_topk"] = (
cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN
if is_train
else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST
)
return ret
def _transform_annotations(self, dataset_dict, transforms, image_shape):
# USER: Modify this if you want to keep them for some reason.
for anno in dataset_dict["annotations"]:
if not self.use_instance_mask:
anno.pop("segmentation", None)
if not self.use_keypoint:
anno.pop("keypoints", None)
# USER: Implement additional transformations if you have other types of data
annos = [
utils.transform_instance_annotations(
obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
)
for obj in dataset_dict.pop("annotations")
if obj.get("iscrowd", 0) == 0
]
instances = utils.annotations_to_instances(
annos, image_shape, mask_format=self.instance_mask_format
)
# After transforms such as cropping are applied, the bounding box may no longer
# tightly bound the object. As an example, imagine a triangle object
# [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
# bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
# the intersection of original bounding box and the cropping box.
if self.recompute_boxes:
instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
dataset_dict["instances"] = utils.filter_empty_instances(instances)
def __call__(self, dataset_dict):
"""
Args:
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
Returns:
dict: a format that builtin models in detectron2 accept
"""
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
# USER: Write your own image loading if it's not from a file
image = utils.read_image(dataset_dict["file_name"], format=self.image_format)
utils.check_image_size(dataset_dict, image)
# USER: Remove if you don't do semantic/panoptic segmentation.
if "sem_seg_file_name" in dataset_dict:
sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
else:
sem_seg_gt = None
aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
transforms = self.augmentations(aug_input)
image, sem_seg_gt = aug_input.image, aug_input.sem_seg
image_shape = image.shape[:2] # h, w
# Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
# but not efficient on large generic data structures due to the use of pickle & mp.Queue.
# Therefore it's important to use torch.Tensor.
dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
if sem_seg_gt is not None:
dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long"))
# USER: Remove if you don't use pre-computed proposals.
# Most users would not need this feature.
if self.proposal_topk is not None:
utils.transform_proposals(
dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk
)
if not self.is_train:
# USER: Modify this if you want to keep them for some reason.
dataset_dict.pop("annotations", None)
dataset_dict.pop("sem_seg_file_name", None)
return dataset_dict
if "annotations" in dataset_dict:
self._transform_annotations(dataset_dict, transforms, image_shape)
return dataset_dict
### Common Datasets
The dataset implemented here do not need to load the data into the final format.
It should provide the minimal data structure needed to use the dataset, so it can be very efficient.
For example, for an image dataset, just provide the file names and labels, but don't read the images.
Let the downstream decide how to read.
# Copyright (c) Facebook, Inc. and its affiliates.
from .coco import load_coco_json, load_sem_seg, register_coco_instances, convert_to_coco_json
from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated
from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta
from .pascal_voc import load_voc_instances, register_pascal_voc
from . import builtin as _builtin # ensure the builtin datasets are registered
__all__ = [k for k in globals().keys() if not k.startswith("_")]
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates.
"""
This file registers pre-defined datasets at hard-coded paths, and their metadata.
We hard-code metadata for common datasets. This will enable:
1. Consistency check when loading the datasets
2. Use models on these standard datasets directly and run demos,
without having to download the dataset annotations
We hard-code some paths to the dataset that's assumed to
exist in "./datasets/".
Users SHOULD NOT use this file to create new dataset / metadata for new dataset.
To add new dataset, refer to the tutorial "docs/DATASETS.md".
"""
import os
from detectron2.data import DatasetCatalog, MetadataCatalog
from .builtin_meta import ADE20K_SEM_SEG_CATEGORIES, _get_builtin_metadata
from .cityscapes import load_cityscapes_instances, load_cityscapes_semantic
from .cityscapes_panoptic import register_all_cityscapes_panoptic
from .coco import load_sem_seg, register_coco_instances
from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated
from .lvis import get_lvis_instances_meta, register_lvis_instances
from .pascal_voc import register_pascal_voc
# ==== Predefined datasets and splits for COCO ==========
_PREDEFINED_SPLITS_COCO = {}
_PREDEFINED_SPLITS_COCO["coco"] = {
"coco_2014_train": ("coco/train2014", "coco/annotations/instances_train2014.json"),
"coco_2014_val": ("coco/val2014", "coco/annotations/instances_val2014.json"),
"coco_2014_minival": ("coco/val2014", "coco/annotations/instances_minival2014.json"),
"coco_2014_valminusminival": (
"coco/val2014",
"coco/annotations/instances_valminusminival2014.json",
),
"coco_2017_train": ("coco/train2017", "coco/annotations/instances_train2017.json"),
"coco_2017_val": ("coco/val2017", "coco/annotations/instances_val2017.json"),
"coco_2017_test": ("coco/test2017", "coco/annotations/image_info_test2017.json"),
"coco_2017_test-dev": ("coco/test2017", "coco/annotations/image_info_test-dev2017.json"),
"coco_2017_val_100": ("coco/val2017", "coco/annotations/instances_val2017_100.json"),
}
_PREDEFINED_SPLITS_COCO["coco_person"] = {
"keypoints_coco_2014_train": (
"coco/train2014",
"coco/annotations/person_keypoints_train2014.json",
),
"keypoints_coco_2014_val": ("coco/val2014", "coco/annotations/person_keypoints_val2014.json"),
"keypoints_coco_2014_minival": (
"coco/val2014",
"coco/annotations/person_keypoints_minival2014.json",
),
"keypoints_coco_2014_valminusminival": (
"coco/val2014",
"coco/annotations/person_keypoints_valminusminival2014.json",
),
"keypoints_coco_2017_train": (
"coco/train2017",
"coco/annotations/person_keypoints_train2017.json",
),
"keypoints_coco_2017_val": ("coco/val2017", "coco/annotations/person_keypoints_val2017.json"),
"keypoints_coco_2017_val_100": (
"coco/val2017",
"coco/annotations/person_keypoints_val2017_100.json",
),
}
_PREDEFINED_SPLITS_COCO_PANOPTIC = {
"coco_2017_train_panoptic": (
# This is the original panoptic annotation directory
"coco/panoptic_train2017",
"coco/annotations/panoptic_train2017.json",
# This directory contains semantic annotations that are
# converted from panoptic annotations.
# It is used by PanopticFPN.
# You can use the script at detectron2/datasets/prepare_panoptic_fpn.py
# to create these directories.
"coco/panoptic_stuff_train2017",
),
"coco_2017_val_panoptic": (
"coco/panoptic_val2017",
"coco/annotations/panoptic_val2017.json",
"coco/panoptic_stuff_val2017",
),
"coco_2017_val_100_panoptic": (
"coco/panoptic_val2017_100",
"coco/annotations/panoptic_val2017_100.json",
"coco/panoptic_stuff_val2017_100",
),
}
def register_all_coco(root):
for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_COCO.items():
for key, (image_root, json_file) in splits_per_dataset.items():
# Assume pre-defined datasets live in `./datasets`.
register_coco_instances(
key,
_get_builtin_metadata(dataset_name),
os.path.join(root, json_file) if "://" not in json_file else json_file,
os.path.join(root, image_root),
)
for (
prefix,
(panoptic_root, panoptic_json, semantic_root),
) in _PREDEFINED_SPLITS_COCO_PANOPTIC.items():
prefix_instances = prefix[: -len("_panoptic")]
instances_meta = MetadataCatalog.get(prefix_instances)
image_root, instances_json = instances_meta.image_root, instances_meta.json_file
# The "separated" version of COCO panoptic segmentation dataset,
# e.g. used by Panoptic FPN
register_coco_panoptic_separated(
prefix,
_get_builtin_metadata("coco_panoptic_separated"),
image_root,
os.path.join(root, panoptic_root),
os.path.join(root, panoptic_json),
os.path.join(root, semantic_root),
instances_json,
)
# The "standard" version of COCO panoptic segmentation dataset,
# e.g. used by Panoptic-DeepLab
register_coco_panoptic(
prefix,
_get_builtin_metadata("coco_panoptic_standard"),
image_root,
os.path.join(root, panoptic_root),
os.path.join(root, panoptic_json),
instances_json,
)
# ==== Predefined datasets and splits for LVIS ==========
_PREDEFINED_SPLITS_LVIS = {
"lvis_v1": {
"lvis_v1_train": ("coco/", "lvis/lvis_v1_train.json"),
"lvis_v1_val": ("coco/", "lvis/lvis_v1_val.json"),
"lvis_v1_test_dev": ("coco/", "lvis/lvis_v1_image_info_test_dev.json"),
"lvis_v1_test_challenge": ("coco/", "lvis/lvis_v1_image_info_test_challenge.json"),
},
"lvis_v0.5": {
"lvis_v0.5_train": ("coco/", "lvis/lvis_v0.5_train.json"),
"lvis_v0.5_val": ("coco/", "lvis/lvis_v0.5_val.json"),
"lvis_v0.5_val_rand_100": ("coco/", "lvis/lvis_v0.5_val_rand_100.json"),
"lvis_v0.5_test": ("coco/", "lvis/lvis_v0.5_image_info_test.json"),
},
"lvis_v0.5_cocofied": {
"lvis_v0.5_train_cocofied": ("coco/", "lvis/lvis_v0.5_train_cocofied.json"),
"lvis_v0.5_val_cocofied": ("coco/", "lvis/lvis_v0.5_val_cocofied.json"),
},
}
def register_all_lvis(root):
for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_LVIS.items():
for key, (image_root, json_file) in splits_per_dataset.items():
register_lvis_instances(
key,
get_lvis_instances_meta(dataset_name),
os.path.join(root, json_file) if "://" not in json_file else json_file,
os.path.join(root, image_root),
)
# ==== Predefined splits for raw cityscapes images ===========
_RAW_CITYSCAPES_SPLITS = {
"cityscapes_fine_{task}_train": ("cityscapes/leftImg8bit/train/", "cityscapes/gtFine/train/"),
"cityscapes_fine_{task}_val": ("cityscapes/leftImg8bit/val/", "cityscapes/gtFine/val/"),
"cityscapes_fine_{task}_test": ("cityscapes/leftImg8bit/test/", "cityscapes/gtFine/test/"),
}
def register_all_cityscapes(root):
for key, (image_dir, gt_dir) in _RAW_CITYSCAPES_SPLITS.items():
meta = _get_builtin_metadata("cityscapes")
image_dir = os.path.join(root, image_dir)
gt_dir = os.path.join(root, gt_dir)
inst_key = key.format(task="instance_seg")
DatasetCatalog.register(
inst_key,
lambda x=image_dir, y=gt_dir: load_cityscapes_instances(
x, y, from_json=True, to_polygons=True
),
)
MetadataCatalog.get(inst_key).set(
image_dir=image_dir, gt_dir=gt_dir, evaluator_type="cityscapes_instance", **meta
)
sem_key = key.format(task="sem_seg")
DatasetCatalog.register(
sem_key, lambda x=image_dir, y=gt_dir: load_cityscapes_semantic(x, y)
)
MetadataCatalog.get(sem_key).set(
image_dir=image_dir,
gt_dir=gt_dir,
evaluator_type="cityscapes_sem_seg",
ignore_label=255,
**meta,
)
# ==== Predefined splits for PASCAL VOC ===========
def register_all_pascal_voc(root):
SPLITS = [
("voc_2007_trainval", "VOC2007", "trainval"),
("voc_2007_train", "VOC2007", "train"),
("voc_2007_val", "VOC2007", "val"),
("voc_2007_test", "VOC2007", "test"),
("voc_2012_trainval", "VOC2012", "trainval"),
("voc_2012_train", "VOC2012", "train"),
("voc_2012_val", "VOC2012", "val"),
]
for name, dirname, split in SPLITS:
year = 2007 if "2007" in name else 2012
register_pascal_voc(name, os.path.join(root, dirname), split, year)
MetadataCatalog.get(name).evaluator_type = "pascal_voc"
def register_all_ade20k(root):
root = os.path.join(root, "ADEChallengeData2016")
for name, dirname in [("train", "training"), ("val", "validation")]:
image_dir = os.path.join(root, "images", dirname)
gt_dir = os.path.join(root, "annotations_detectron2", dirname)
name = f"ade20k_sem_seg_{name}"
DatasetCatalog.register(
name, lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="png", image_ext="jpg")
)
MetadataCatalog.get(name).set(
stuff_classes=ADE20K_SEM_SEG_CATEGORIES[:],
image_root=image_dir,
sem_seg_root=gt_dir,
evaluator_type="sem_seg",
ignore_label=255,
)
# True for open source;
# Internally at fb, we register them elsewhere
if __name__.endswith(".builtin"):
# Assume pre-defined datasets live in `./datasets`.
_root = os.path.expanduser(os.getenv("DETECTRON2_DATASETS", "datasets"))
register_all_coco(_root)
register_all_lvis(_root)
register_all_cityscapes(_root)
register_all_cityscapes_panoptic(_root)
register_all_pascal_voc(_root)
register_all_ade20k(_root)
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates.
"""
Note:
For your custom dataset, there is no need to hard-code metadata anywhere in the code.
For example, for COCO-format dataset, metadata will be obtained automatically
when calling `load_coco_json`. For other dataset, metadata may also be obtained in other ways
during loading.
However, we hard-coded metadata for a few common dataset here.
The only goal is to allow users who don't have these dataset to use pre-trained models.
Users don't have to download a COCO json (which contains metadata), in order to visualize a
COCO model (with correct class names and colors).
"""
# All coco categories, together with their nice-looking visualization colors
# It's from https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json
COCO_CATEGORIES = [
{"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "person"},
{"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"},
{"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"},
{"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"},
{"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"},
{"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "bus"},
{"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"},
{"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"},
{"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"},
{"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"},
{"color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"},
{"color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"},
{"color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"},
{"color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"},
{"color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"},
{"color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"},
{"color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"},
{"color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"},
{"color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"},
{"color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"},
{"color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"},
{"color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"},
{"color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"},
{"color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"},
{"color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"},
{"color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"},
{"color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"},
{"color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"},
{"color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"},
{"color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"},
{"color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"},
{"color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"},
{"color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"},
{"color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"},
{"color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"},
{"color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"},
{"color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"},
{"color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"},
{"color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"},
{"color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"},
{"color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"},
{"color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"},
{"color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"},
{"color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"},
{"color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"},
{"color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"},
{"color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"},
{"color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"},
{"color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"},
{"color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"},
{"color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"},
{"color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"},
{"color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"},
{"color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"},
{"color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"},
{"color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"},
{"color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"},
{"color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"},
{"color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"},
{"color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"},
{"color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"},
{"color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"},
{"color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"},
{"color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"},
{"color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"},
{"color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"},
{"color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"},
{"color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"},
{"color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"},
{"color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"},
{"color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"},
{"color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"},
{"color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"},
{"color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"},
{"color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"},
{"color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"},
{"color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"},
{"color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"},
{"color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"},
{"color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"},
{"color": [255, 255, 128], "isthing": 0, "id": 92, "name": "banner"},
{"color": [147, 211, 203], "isthing": 0, "id": 93, "name": "blanket"},
{"color": [150, 100, 100], "isthing": 0, "id": 95, "name": "bridge"},
{"color": [168, 171, 172], "isthing": 0, "id": 100, "name": "cardboard"},
{"color": [146, 112, 198], "isthing": 0, "id": 107, "name": "counter"},
{"color": [210, 170, 100], "isthing": 0, "id": 109, "name": "curtain"},
{"color": [92, 136, 89], "isthing": 0, "id": 112, "name": "door-stuff"},
{"color": [218, 88, 184], "isthing": 0, "id": 118, "name": "floor-wood"},
{"color": [241, 129, 0], "isthing": 0, "id": 119, "name": "flower"},
{"color": [217, 17, 255], "isthing": 0, "id": 122, "name": "fruit"},
{"color": [124, 74, 181], "isthing": 0, "id": 125, "name": "gravel"},
{"color": [70, 70, 70], "isthing": 0, "id": 128, "name": "house"},
{"color": [255, 228, 255], "isthing": 0, "id": 130, "name": "light"},
{"color": [154, 208, 0], "isthing": 0, "id": 133, "name": "mirror-stuff"},
{"color": [193, 0, 92], "isthing": 0, "id": 138, "name": "net"},
{"color": [76, 91, 113], "isthing": 0, "id": 141, "name": "pillow"},
{"color": [255, 180, 195], "isthing": 0, "id": 144, "name": "platform"},
{"color": [106, 154, 176], "isthing": 0, "id": 145, "name": "playingfield"},
{"color": [230, 150, 140], "isthing": 0, "id": 147, "name": "railroad"},
{"color": [60, 143, 255], "isthing": 0, "id": 148, "name": "river"},
{"color": [128, 64, 128], "isthing": 0, "id": 149, "name": "road"},
{"color": [92, 82, 55], "isthing": 0, "id": 151, "name": "roof"},
{"color": [254, 212, 124], "isthing": 0, "id": 154, "name": "sand"},
{"color": [73, 77, 174], "isthing": 0, "id": 155, "name": "sea"},
{"color": [255, 160, 98], "isthing": 0, "id": 156, "name": "shelf"},
{"color": [255, 255, 255], "isthing": 0, "id": 159, "name": "snow"},
{"color": [104, 84, 109], "isthing": 0, "id": 161, "name": "stairs"},
{"color": [169, 164, 131], "isthing": 0, "id": 166, "name": "tent"},
{"color": [225, 199, 255], "isthing": 0, "id": 168, "name": "towel"},
{"color": [137, 54, 74], "isthing": 0, "id": 171, "name": "wall-brick"},
{"color": [135, 158, 223], "isthing": 0, "id": 175, "name": "wall-stone"},
{"color": [7, 246, 231], "isthing": 0, "id": 176, "name": "wall-tile"},
{"color": [107, 255, 200], "isthing": 0, "id": 177, "name": "wall-wood"},
{"color": [58, 41, 149], "isthing": 0, "id": 178, "name": "water-other"},
{"color": [183, 121, 142], "isthing": 0, "id": 180, "name": "window-blind"},
{"color": [255, 73, 97], "isthing": 0, "id": 181, "name": "window-other"},
{"color": [107, 142, 35], "isthing": 0, "id": 184, "name": "tree-merged"},
{"color": [190, 153, 153], "isthing": 0, "id": 185, "name": "fence-merged"},
{"color": [146, 139, 141], "isthing": 0, "id": 186, "name": "ceiling-merged"},
{"color": [70, 130, 180], "isthing": 0, "id": 187, "name": "sky-other-merged"},
{"color": [134, 199, 156], "isthing": 0, "id": 188, "name": "cabinet-merged"},
{"color": [209, 226, 140], "isthing": 0, "id": 189, "name": "table-merged"},
{"color": [96, 36, 108], "isthing": 0, "id": 190, "name": "floor-other-merged"},
{"color": [96, 96, 96], "isthing": 0, "id": 191, "name": "pavement-merged"},
{"color": [64, 170, 64], "isthing": 0, "id": 192, "name": "mountain-merged"},
{"color": [152, 251, 152], "isthing": 0, "id": 193, "name": "grass-merged"},
{"color": [208, 229, 228], "isthing": 0, "id": 194, "name": "dirt-merged"},
{"color": [206, 186, 171], "isthing": 0, "id": 195, "name": "paper-merged"},
{"color": [152, 161, 64], "isthing": 0, "id": 196, "name": "food-other-merged"},
{"color": [116, 112, 0], "isthing": 0, "id": 197, "name": "building-other-merged"},
{"color": [0, 114, 143], "isthing": 0, "id": 198, "name": "rock-merged"},
{"color": [102, 102, 156], "isthing": 0, "id": 199, "name": "wall-other-merged"},
{"color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"},
]
# fmt: off
COCO_PERSON_KEYPOINT_NAMES = (
"nose",
"left_eye", "right_eye",
"left_ear", "right_ear",
"left_shoulder", "right_shoulder",
"left_elbow", "right_elbow",
"left_wrist", "right_wrist",
"left_hip", "right_hip",
"left_knee", "right_knee",
"left_ankle", "right_ankle",
)
# fmt: on
# Pairs of keypoints that should be exchanged under horizontal flipping
COCO_PERSON_KEYPOINT_FLIP_MAP = (
("left_eye", "right_eye"),
("left_ear", "right_ear"),
("left_shoulder", "right_shoulder"),
("left_elbow", "right_elbow"),
("left_wrist", "right_wrist"),
("left_hip", "right_hip"),
("left_knee", "right_knee"),
("left_ankle", "right_ankle"),
)
# rules for pairs of keypoints to draw a line between, and the line color to use.
KEYPOINT_CONNECTION_RULES = [
# face
("left_ear", "left_eye", (102, 204, 255)),
("right_ear", "right_eye", (51, 153, 255)),
("left_eye", "nose", (102, 0, 204)),
("nose", "right_eye", (51, 102, 255)),
# upper-body
("left_shoulder", "right_shoulder", (255, 128, 0)),
("left_shoulder", "left_elbow", (153, 255, 204)),
("right_shoulder", "right_elbow", (128, 229, 255)),
("left_elbow", "left_wrist", (153, 255, 153)),
("right_elbow", "right_wrist", (102, 255, 224)),
# lower-body
("left_hip", "right_hip", (255, 102, 0)),
("left_hip", "left_knee", (255, 255, 77)),
("right_hip", "right_knee", (153, 255, 204)),
("left_knee", "left_ankle", (191, 255, 128)),
("right_knee", "right_ankle", (255, 195, 77)),
]
# All Cityscapes categories, together with their nice-looking visualization colors
# It's from https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/helpers/labels.py # noqa
CITYSCAPES_CATEGORIES = [
{"color": (128, 64, 128), "isthing": 0, "id": 7, "trainId": 0, "name": "road"},
{"color": (244, 35, 232), "isthing": 0, "id": 8, "trainId": 1, "name": "sidewalk"},
{"color": (70, 70, 70), "isthing": 0, "id": 11, "trainId": 2, "name": "building"},
{"color": (102, 102, 156), "isthing": 0, "id": 12, "trainId": 3, "name": "wall"},
{"color": (190, 153, 153), "isthing": 0, "id": 13, "trainId": 4, "name": "fence"},
{"color": (153, 153, 153), "isthing": 0, "id": 17, "trainId": 5, "name": "pole"},
{"color": (250, 170, 30), "isthing": 0, "id": 19, "trainId": 6, "name": "traffic light"},
{"color": (220, 220, 0), "isthing": 0, "id": 20, "trainId": 7, "name": "traffic sign"},
{"color": (107, 142, 35), "isthing": 0, "id": 21, "trainId": 8, "name": "vegetation"},
{"color": (152, 251, 152), "isthing": 0, "id": 22, "trainId": 9, "name": "terrain"},
{"color": (70, 130, 180), "isthing": 0, "id": 23, "trainId": 10, "name": "sky"},
{"color": (220, 20, 60), "isthing": 1, "id": 24, "trainId": 11, "name": "person"},
{"color": (255, 0, 0), "isthing": 1, "id": 25, "trainId": 12, "name": "rider"},
{"color": (0, 0, 142), "isthing": 1, "id": 26, "trainId": 13, "name": "car"},
{"color": (0, 0, 70), "isthing": 1, "id": 27, "trainId": 14, "name": "truck"},
{"color": (0, 60, 100), "isthing": 1, "id": 28, "trainId": 15, "name": "bus"},
{"color": (0, 80, 100), "isthing": 1, "id": 31, "trainId": 16, "name": "train"},
{"color": (0, 0, 230), "isthing": 1, "id": 32, "trainId": 17, "name": "motorcycle"},
{"color": (119, 11, 32), "isthing": 1, "id": 33, "trainId": 18, "name": "bicycle"},
]
# fmt: off
ADE20K_SEM_SEG_CATEGORIES = [
"wall", "building", "sky", "floor", "tree", "ceiling", "road, route", "bed", "window ", "grass", "cabinet", "sidewalk, pavement", "person", "earth, ground", "door", "table", "mountain, mount", "plant", "curtain", "chair", "car", "water", "painting, picture", "sofa", "shelf", "house", "sea", "mirror", "rug", "field", "armchair", "seat", "fence", "desk", "rock, stone", "wardrobe, closet, press", "lamp", "tub", "rail", "cushion", "base, pedestal, stand", "box", "column, pillar", "signboard, sign", "chest of drawers, chest, bureau, dresser", "counter", "sand", "sink", "skyscraper", "fireplace", "refrigerator, icebox", "grandstand, covered stand", "path", "stairs", "runway", "case, display case, showcase, vitrine", "pool table, billiard table, snooker table", "pillow", "screen door, screen", "stairway, staircase", "river", "bridge, span", "bookcase", "blind, screen", "coffee table", "toilet, can, commode, crapper, pot, potty, stool, throne", "flower", "book", "hill", "bench", "countertop", "stove", "palm, palm tree", "kitchen island", "computer", "swivel chair", "boat", "bar", "arcade machine", "hovel, hut, hutch, shack, shanty", "bus", "towel", "light", "truck", "tower", "chandelier", "awning, sunshade, sunblind", "street lamp", "booth", "tv", "plane", "dirt track", "clothes", "pole", "land, ground, soil", "bannister, banister, balustrade, balusters, handrail", "escalator, moving staircase, moving stairway", "ottoman, pouf, pouffe, puff, hassock", "bottle", "buffet, counter, sideboard", "poster, posting, placard, notice, bill, card", "stage", "van", "ship", "fountain", "conveyer belt, conveyor belt, conveyer, conveyor, transporter", "canopy", "washer, automatic washer, washing machine", "plaything, toy", "pool", "stool", "barrel, cask", "basket, handbasket", "falls", "tent", "bag", "minibike, motorbike", "cradle", "oven", "ball", "food, solid food", "step, stair", "tank, storage tank", "trade name", "microwave", "pot", "animal", "bicycle", "lake", "dishwasher", "screen", "blanket, cover", "sculpture", "hood, exhaust hood", "sconce", "vase", "traffic light", "tray", "trash can", "fan", "pier", "crt screen", "plate", "monitor", "bulletin board", "shower", "radiator", "glass, drinking glass", "clock", "flag", # noqa
]
# After processed by `prepare_ade20k_sem_seg.py`, id 255 means ignore
# fmt: on
def _get_coco_instances_meta():
thing_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 1]
thing_colors = [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 1]
assert len(thing_ids) == 80, len(thing_ids)
# Mapping from the incontiguous COCO category id to an id in [0, 79]
thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)}
thing_classes = [k["name"] for k in COCO_CATEGORIES if k["isthing"] == 1]
ret = {
"thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id,
"thing_classes": thing_classes,
"thing_colors": thing_colors,
}
return ret
def _get_coco_panoptic_separated_meta():
"""
Returns metadata for "separated" version of the panoptic segmentation dataset.
"""
stuff_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 0]
assert len(stuff_ids) == 53, len(stuff_ids)
# For semantic segmentation, this mapping maps from contiguous stuff id
# (in [0, 53], used in models) to ids in the dataset (used for processing results)
# The id 0 is mapped to an extra category "thing".
stuff_dataset_id_to_contiguous_id = {k: i + 1 for i, k in enumerate(stuff_ids)}
# When converting COCO panoptic annotations to semantic annotations
# We label the "thing" category to 0
stuff_dataset_id_to_contiguous_id[0] = 0
# 54 names for COCO stuff categories (including "things")
stuff_classes = ["things"] + [
k["name"].replace("-other", "").replace("-merged", "")
for k in COCO_CATEGORIES
if k["isthing"] == 0
]
# NOTE: I randomly picked a color for things
stuff_colors = [[82, 18, 128]] + [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 0]
ret = {
"stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id,
"stuff_classes": stuff_classes,
"stuff_colors": stuff_colors,
}
ret.update(_get_coco_instances_meta())
return ret
def _get_builtin_metadata(dataset_name):
if dataset_name == "coco":
return _get_coco_instances_meta()
if dataset_name == "coco_panoptic_separated":
return _get_coco_panoptic_separated_meta()
elif dataset_name == "coco_panoptic_standard":
meta = {}
# The following metadata maps contiguous id from [0, #thing categories +
# #stuff categories) to their names and colors. We have to replica of the
# same name and color under "thing_*" and "stuff_*" because the current
# visualization function in D2 handles thing and class classes differently
# due to some heuristic used in Panoptic FPN. We keep the same naming to
# enable reusing existing visualization functions.
thing_classes = [k["name"] for k in COCO_CATEGORIES]
thing_colors = [k["color"] for k in COCO_CATEGORIES]
stuff_classes = [k["name"] for k in COCO_CATEGORIES]
stuff_colors = [k["color"] for k in COCO_CATEGORIES]
meta["thing_classes"] = thing_classes
meta["thing_colors"] = thing_colors
meta["stuff_classes"] = stuff_classes
meta["stuff_colors"] = stuff_colors
# Convert category id for training:
# category id: like semantic segmentation, it is the class id for each
# pixel. Since there are some classes not used in evaluation, the category
# id is not always contiguous and thus we have two set of category ids:
# - original category id: category id in the original dataset, mainly
# used for evaluation.
# - contiguous category id: [0, #classes), in order to train the linear
# softmax classifier.
thing_dataset_id_to_contiguous_id = {}
stuff_dataset_id_to_contiguous_id = {}
for i, cat in enumerate(COCO_CATEGORIES):
if cat["isthing"]:
thing_dataset_id_to_contiguous_id[cat["id"]] = i
else:
stuff_dataset_id_to_contiguous_id[cat["id"]] = i
meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id
meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id
return meta
elif dataset_name == "coco_person":
return {
"thing_classes": ["person"],
"keypoint_names": COCO_PERSON_KEYPOINT_NAMES,
"keypoint_flip_map": COCO_PERSON_KEYPOINT_FLIP_MAP,
"keypoint_connection_rules": KEYPOINT_CONNECTION_RULES,
}
elif dataset_name == "cityscapes":
# fmt: off
CITYSCAPES_THING_CLASSES = [
"person", "rider", "car", "truck",
"bus", "train", "motorcycle", "bicycle",
]
CITYSCAPES_STUFF_CLASSES = [
"road", "sidewalk", "building", "wall", "fence", "pole", "traffic light",
"traffic sign", "vegetation", "terrain", "sky", "person", "rider", "car",
"truck", "bus", "train", "motorcycle", "bicycle",
]
# fmt: on
return {
"thing_classes": CITYSCAPES_THING_CLASSES,
"stuff_classes": CITYSCAPES_STUFF_CLASSES,
}
raise KeyError("No built-in metadata for dataset {}".format(dataset_name))
# Copyright (c) Facebook, Inc. and its affiliates.
import functools
import json
import logging
import multiprocessing as mp
import numpy as np
import os
from itertools import chain
import pycocotools.mask as mask_util
from PIL import Image
from detectron2.structures import BoxMode
from detectron2.utils.comm import get_world_size
from detectron2.utils.file_io import PathManager
from detectron2.utils.logger import setup_logger
try:
import cv2 # noqa
except ImportError:
# OpenCV is an optional dependency at the moment
pass
logger = logging.getLogger(__name__)
def _get_cityscapes_files(image_dir, gt_dir):
files = []
# scan through the directory
cities = PathManager.ls(image_dir)
logger.info(f"{len(cities)} cities found in '{image_dir}'.")
for city in cities:
city_img_dir = os.path.join(image_dir, city)
city_gt_dir = os.path.join(gt_dir, city)
for basename in PathManager.ls(city_img_dir):
image_file = os.path.join(city_img_dir, basename)
suffix = "leftImg8bit.png"
assert basename.endswith(suffix), basename
basename = basename[: -len(suffix)]
instance_file = os.path.join(city_gt_dir, basename + "gtFine_instanceIds.png")
label_file = os.path.join(city_gt_dir, basename + "gtFine_labelIds.png")
json_file = os.path.join(city_gt_dir, basename + "gtFine_polygons.json")
files.append((image_file, instance_file, label_file, json_file))
assert len(files), "No images found in {}".format(image_dir)
for f in files[0]:
assert PathManager.isfile(f), f
return files
def load_cityscapes_instances(image_dir, gt_dir, from_json=True, to_polygons=True):
"""
Args:
image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train".
from_json (bool): whether to read annotations from the raw json file or the png files.
to_polygons (bool): whether to represent the segmentation as polygons
(COCO's format) instead of masks (cityscapes's format).
Returns:
list[dict]: a list of dicts in Detectron2 standard format. (See
`Using Custom Datasets </tutorials/datasets.html>`_ )
"""
if from_json:
assert to_polygons, (
"Cityscapes's json annotations are in polygon format. "
"Converting to mask format is not supported now."
)
files = _get_cityscapes_files(image_dir, gt_dir)
logger.info("Preprocessing cityscapes annotations ...")
# This is still not fast: all workers will execute duplicate works and will
# take up to 10m on a 8GPU server.
pool = mp.Pool(processes=max(mp.cpu_count() // get_world_size() // 2, 4))
ret = pool.map(
functools.partial(_cityscapes_files_to_dict, from_json=from_json, to_polygons=to_polygons),
files,
)
logger.info("Loaded {} images from {}".format(len(ret), image_dir))
# Map cityscape ids to contiguous ids
from cityscapesscripts.helpers.labels import labels
labels = [l for l in labels if l.hasInstances and not l.ignoreInEval]
dataset_id_to_contiguous_id = {l.id: idx for idx, l in enumerate(labels)}
for dict_per_image in ret:
for anno in dict_per_image["annotations"]:
anno["category_id"] = dataset_id_to_contiguous_id[anno["category_id"]]
return ret
def load_cityscapes_semantic(image_dir, gt_dir):
"""
Args:
image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train".
Returns:
list[dict]: a list of dict, each has "file_name" and
"sem_seg_file_name".
"""
ret = []
# gt_dir is small and contain many small files. make sense to fetch to local first
gt_dir = PathManager.get_local_path(gt_dir)
for image_file, _, label_file, json_file in _get_cityscapes_files(image_dir, gt_dir):
label_file = label_file.replace("labelIds", "labelTrainIds")
with PathManager.open(json_file, "r") as f:
jsonobj = json.load(f)
ret.append(
{
"file_name": image_file,
"sem_seg_file_name": label_file,
"height": jsonobj["imgHeight"],
"width": jsonobj["imgWidth"],
}
)
assert len(ret), f"No images found in {image_dir}!"
assert PathManager.isfile(
ret[0]["sem_seg_file_name"]
), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py" # noqa
return ret
def _cityscapes_files_to_dict(files, from_json, to_polygons):
"""
Parse cityscapes annotation files to a instance segmentation dataset dict.
Args:
files (tuple): consists of (image_file, instance_id_file, label_id_file, json_file)
from_json (bool): whether to read annotations from the raw json file or the png files.
to_polygons (bool): whether to represent the segmentation as polygons
(COCO's format) instead of masks (cityscapes's format).
Returns:
A dict in Detectron2 Dataset format.
"""
from cityscapesscripts.helpers.labels import id2label, name2label
image_file, instance_id_file, _, json_file = files
annos = []
if from_json:
from shapely.geometry import MultiPolygon, Polygon
with PathManager.open(json_file, "r") as f:
jsonobj = json.load(f)
ret = {
"file_name": image_file,
"image_id": os.path.basename(image_file),
"height": jsonobj["imgHeight"],
"width": jsonobj["imgWidth"],
}
# `polygons_union` contains the union of all valid polygons.
polygons_union = Polygon()
# CityscapesScripts draw the polygons in sequential order
# and each polygon *overwrites* existing ones. See
# (https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/json2instanceImg.py) # noqa
# We use reverse order, and each polygon *avoids* early ones.
# This will resolve the ploygon overlaps in the same way as CityscapesScripts.
for obj in jsonobj["objects"][::-1]:
if "deleted" in obj: # cityscapes data format specific
continue
label_name = obj["label"]
try:
label = name2label[label_name]
except KeyError:
if label_name.endswith("group"): # crowd area
label = name2label[label_name[: -len("group")]]
else:
raise
if label.id < 0: # cityscapes data format
continue
# Cityscapes's raw annotations uses integer coordinates
# Therefore +0.5 here
poly_coord = np.asarray(obj["polygon"], dtype="f4") + 0.5
# CityscapesScript uses PIL.ImageDraw.polygon to rasterize
# polygons for evaluation. This function operates in integer space
# and draws each pixel whose center falls into the polygon.
# Therefore it draws a polygon which is 0.5 "fatter" in expectation.
# We therefore dilate the input polygon by 0.5 as our input.
poly = Polygon(poly_coord).buffer(0.5, resolution=4)
if not label.hasInstances or label.ignoreInEval:
# even if we won't store the polygon it still contributes to overlaps resolution
polygons_union = polygons_union.union(poly)
continue
# Take non-overlapping part of the polygon
poly_wo_overlaps = poly.difference(polygons_union)
if poly_wo_overlaps.is_empty:
continue
polygons_union = polygons_union.union(poly)
anno = {}
anno["iscrowd"] = label_name.endswith("group")
anno["category_id"] = label.id
if isinstance(poly_wo_overlaps, Polygon):
poly_list = [poly_wo_overlaps]
elif isinstance(poly_wo_overlaps, MultiPolygon):
poly_list = poly_wo_overlaps.geoms
else:
raise NotImplementedError("Unknown geometric structure {}".format(poly_wo_overlaps))
poly_coord = []
for poly_el in poly_list:
# COCO API can work only with exterior boundaries now, hence we store only them.
# TODO: store both exterior and interior boundaries once other parts of the
# codebase support holes in polygons.
poly_coord.append(list(chain(*poly_el.exterior.coords)))
anno["segmentation"] = poly_coord
(xmin, ymin, xmax, ymax) = poly_wo_overlaps.bounds
anno["bbox"] = (xmin, ymin, xmax, ymax)
anno["bbox_mode"] = BoxMode.XYXY_ABS
annos.append(anno)
else:
# See also the official annotation parsing scripts at
# https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/instances2dict.py # noqa
with PathManager.open(instance_id_file, "rb") as f:
inst_image = np.asarray(Image.open(f), order="F")
# ids < 24 are stuff labels (filtering them first is about 5% faster)
flattened_ids = np.unique(inst_image[inst_image >= 24])
ret = {
"file_name": image_file,
"image_id": os.path.basename(image_file),
"height": inst_image.shape[0],
"width": inst_image.shape[1],
}
for instance_id in flattened_ids:
# For non-crowd annotations, instance_id // 1000 is the label_id
# Crowd annotations have <1000 instance ids
label_id = instance_id // 1000 if instance_id >= 1000 else instance_id
label = id2label[label_id]
if not label.hasInstances or label.ignoreInEval:
continue
anno = {}
anno["iscrowd"] = instance_id < 1000
anno["category_id"] = label.id
mask = np.asarray(inst_image == instance_id, dtype=np.uint8, order="F")
inds = np.nonzero(mask)
ymin, ymax = inds[0].min(), inds[0].max()
xmin, xmax = inds[1].min(), inds[1].max()
anno["bbox"] = (xmin, ymin, xmax, ymax)
if xmax <= xmin or ymax <= ymin:
continue
anno["bbox_mode"] = BoxMode.XYXY_ABS
if to_polygons:
# This conversion comes from D4809743 and D5171122,
# when Mask-RCNN was first developed.
contours = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[
-2
]
polygons = [c.reshape(-1).tolist() for c in contours if len(c) >= 3]
# opencv's can produce invalid polygons
if len(polygons) == 0:
continue
anno["segmentation"] = polygons
else:
anno["segmentation"] = mask_util.encode(mask[:, :, None])[0]
annos.append(anno)
ret["annotations"] = annos
return ret
def main() -> None:
global logger, labels
"""
Test the cityscapes dataset loader.
Usage:
python -m detectron2.data.datasets.cityscapes \
cityscapes/leftImg8bit/train cityscapes/gtFine/train
"""
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("image_dir")
parser.add_argument("gt_dir")
parser.add_argument("--type", choices=["instance", "semantic"], default="instance")
args = parser.parse_args()
from cityscapesscripts.helpers.labels import labels
from detectron2.data.catalog import Metadata
from detectron2.utils.visualizer import Visualizer
logger = setup_logger(name=__name__)
dirname = "cityscapes-data-vis"
os.makedirs(dirname, exist_ok=True)
if args.type == "instance":
dicts = load_cityscapes_instances(
args.image_dir, args.gt_dir, from_json=True, to_polygons=True
)
logger.info("Done loading {} samples.".format(len(dicts)))
thing_classes = [k.name for k in labels if k.hasInstances and not k.ignoreInEval]
meta = Metadata().set(thing_classes=thing_classes)
else:
dicts = load_cityscapes_semantic(args.image_dir, args.gt_dir)
logger.info("Done loading {} samples.".format(len(dicts)))
stuff_classes = [k.name for k in labels if k.trainId != 255]
stuff_colors = [k.color for k in labels if k.trainId != 255]
meta = Metadata().set(stuff_classes=stuff_classes, stuff_colors=stuff_colors)
for d in dicts:
img = np.array(Image.open(PathManager.open(d["file_name"], "rb")))
visualizer = Visualizer(img, metadata=meta)
vis = visualizer.draw_dataset_dict(d)
# cv2.imshow("a", vis.get_image()[:, :, ::-1])
# cv2.waitKey()
fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
vis.save(fpath)
if __name__ == "__main__":
main() # pragma: no cover
# Copyright (c) Facebook, Inc. and its affiliates.
import json
import logging
import os
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.data.datasets.builtin_meta import CITYSCAPES_CATEGORIES
from detectron2.utils.file_io import PathManager
"""
This file contains functions to register the Cityscapes panoptic dataset to the DatasetCatalog.
"""
logger = logging.getLogger(__name__)
def get_cityscapes_panoptic_files(image_dir, gt_dir, json_info):
files = []
# scan through the directory
cities = PathManager.ls(image_dir)
logger.info(f"{len(cities)} cities found in '{image_dir}'.")
image_dict = {}
for city in cities:
city_img_dir = os.path.join(image_dir, city)
for basename in PathManager.ls(city_img_dir):
image_file = os.path.join(city_img_dir, basename)
suffix = "_leftImg8bit.png"
assert basename.endswith(suffix), basename
basename = os.path.basename(basename)[: -len(suffix)]
image_dict[basename] = image_file
for ann in json_info["annotations"]:
image_file = image_dict.get(ann["image_id"], None)
assert image_file is not None, "No image {} found for annotation {}".format(
ann["image_id"], ann["file_name"]
)
label_file = os.path.join(gt_dir, ann["file_name"])
segments_info = ann["segments_info"]
files.append((image_file, label_file, segments_info))
assert len(files), "No images found in {}".format(image_dir)
assert PathManager.isfile(files[0][0]), files[0][0]
assert PathManager.isfile(files[0][1]), files[0][1]
return files
def load_cityscapes_panoptic(image_dir, gt_dir, gt_json, meta):
"""
Args:
image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
gt_dir (str): path to the raw annotations. e.g.,
"~/cityscapes/gtFine/cityscapes_panoptic_train".
gt_json (str): path to the json file. e.g.,
"~/cityscapes/gtFine/cityscapes_panoptic_train.json".
meta (dict): dictionary containing "thing_dataset_id_to_contiguous_id"
and "stuff_dataset_id_to_contiguous_id" to map category ids to
contiguous ids for training.
Returns:
list[dict]: a list of dicts in Detectron2 standard format. (See
`Using Custom Datasets </tutorials/datasets.html>`_ )
"""
def _convert_category_id(segment_info, meta):
if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]:
segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][
segment_info["category_id"]
]
else:
segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][
segment_info["category_id"]
]
return segment_info
assert os.path.exists(
gt_json
), "Please run `python cityscapesscripts/preparation/createPanopticImgs.py` to generate label files." # noqa
with open(gt_json) as f:
json_info = json.load(f)
files = get_cityscapes_panoptic_files(image_dir, gt_dir, json_info)
ret = []
for image_file, label_file, segments_info in files:
sem_label_file = (
image_file.replace("leftImg8bit", "gtFine").split(".")[0] + "_labelTrainIds.png"
)
segments_info = [_convert_category_id(x, meta) for x in segments_info]
ret.append(
{
"file_name": image_file,
"image_id": "_".join(
os.path.splitext(os.path.basename(image_file))[0].split("_")[:3]
),
"sem_seg_file_name": sem_label_file,
"pan_seg_file_name": label_file,
"segments_info": segments_info,
}
)
assert len(ret), f"No images found in {image_dir}!"
assert PathManager.isfile(
ret[0]["sem_seg_file_name"]
), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py" # noqa
assert PathManager.isfile(
ret[0]["pan_seg_file_name"]
), "Please generate panoptic annotation with python cityscapesscripts/preparation/createPanopticImgs.py" # noqa
return ret
_RAW_CITYSCAPES_PANOPTIC_SPLITS = {
"cityscapes_fine_panoptic_train": (
"cityscapes/leftImg8bit/train",
"cityscapes/gtFine/cityscapes_panoptic_train",
"cityscapes/gtFine/cityscapes_panoptic_train.json",
),
"cityscapes_fine_panoptic_val": (
"cityscapes/leftImg8bit/val",
"cityscapes/gtFine/cityscapes_panoptic_val",
"cityscapes/gtFine/cityscapes_panoptic_val.json",
),
# "cityscapes_fine_panoptic_test": not supported yet
}
def register_all_cityscapes_panoptic(root):
meta = {}
# The following metadata maps contiguous id from [0, #thing categories +
# #stuff categories) to their names and colors. We have to replica of the
# same name and color under "thing_*" and "stuff_*" because the current
# visualization function in D2 handles thing and class classes differently
# due to some heuristic used in Panoptic FPN. We keep the same naming to
# enable reusing existing visualization functions.
thing_classes = [k["name"] for k in CITYSCAPES_CATEGORIES]
thing_colors = [k["color"] for k in CITYSCAPES_CATEGORIES]
stuff_classes = [k["name"] for k in CITYSCAPES_CATEGORIES]
stuff_colors = [k["color"] for k in CITYSCAPES_CATEGORIES]
meta["thing_classes"] = thing_classes
meta["thing_colors"] = thing_colors
meta["stuff_classes"] = stuff_classes
meta["stuff_colors"] = stuff_colors
# There are three types of ids in cityscapes panoptic segmentation:
# (1) category id: like semantic segmentation, it is the class id for each
# pixel. Since there are some classes not used in evaluation, the category
# id is not always contiguous and thus we have two set of category ids:
# - original category id: category id in the original dataset, mainly
# used for evaluation.
# - contiguous category id: [0, #classes), in order to train the classifier
# (2) instance id: this id is used to differentiate different instances from
# the same category. For "stuff" classes, the instance id is always 0; for
# "thing" classes, the instance id starts from 1 and 0 is reserved for
# ignored instances (e.g. crowd annotation).
# (3) panoptic id: this is the compact id that encode both category and
# instance id by: category_id * 1000 + instance_id.
thing_dataset_id_to_contiguous_id = {}
stuff_dataset_id_to_contiguous_id = {}
for k in CITYSCAPES_CATEGORIES:
if k["isthing"] == 1:
thing_dataset_id_to_contiguous_id[k["id"]] = k["trainId"]
else:
stuff_dataset_id_to_contiguous_id[k["id"]] = k["trainId"]
meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id
meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id
for key, (image_dir, gt_dir, gt_json) in _RAW_CITYSCAPES_PANOPTIC_SPLITS.items():
image_dir = os.path.join(root, image_dir)
gt_dir = os.path.join(root, gt_dir)
gt_json = os.path.join(root, gt_json)
DatasetCatalog.register(
key, lambda x=image_dir, y=gt_dir, z=gt_json: load_cityscapes_panoptic(x, y, z, meta)
)
MetadataCatalog.get(key).set(
panoptic_root=gt_dir,
image_root=image_dir,
panoptic_json=gt_json,
gt_dir=gt_dir.replace("cityscapes_panoptic_", ""),
evaluator_type="cityscapes_panoptic_seg",
ignore_label=255,
label_divisor=1000,
**meta,
)
# Copyright (c) Facebook, Inc. and its affiliates.
import contextlib
import datetime
import io
import json
import logging
import numpy as np
import os
import shutil
import pycocotools.mask as mask_util
from fvcore.common.timer import Timer
from iopath.common.file_io import file_lock
from PIL import Image
from detectron2.structures import Boxes, BoxMode, PolygonMasks, RotatedBoxes
from detectron2.utils.file_io import PathManager
from .. import DatasetCatalog, MetadataCatalog
"""
This file contains functions to parse COCO-format annotations into dicts in "Detectron2 format".
"""
logger = logging.getLogger(__name__)
__all__ = [
"load_coco_json",
"load_sem_seg",
"convert_to_coco_json",
"register_coco_instances",
]
def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None):
"""
Load a json file with COCO's instances annotation format.
Currently supports instance detection, instance segmentation,
and person keypoints annotations.
Args:
json_file (str): full path to the json file in COCO instances annotation format.
image_root (str or path-like): the directory where the images in this json file exists.
dataset_name (str or None): the name of the dataset (e.g., coco_2017_train).
When provided, this function will also do the following:
* Put "thing_classes" into the metadata associated with this dataset.
* Map the category ids into a contiguous range (needed by standard dataset format),
and add "thing_dataset_id_to_contiguous_id" to the metadata associated
with this dataset.
This option should usually be provided, unless users need to load
the original json content and apply more processing manually.
extra_annotation_keys (list[str]): list of per-annotation keys that should also be
loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints",
"category_id", "segmentation"). The values for these keys will be returned as-is.
For example, the densepose annotations are loaded in this way.
Returns:
list[dict]: a list of dicts in Detectron2 standard dataset dicts format (See
`Using Custom Datasets </tutorials/datasets.html>`_ ) when `dataset_name` is not None.
If `dataset_name` is None, the returned `category_ids` may be
incontiguous and may not conform to the Detectron2 standard format.
Notes:
1. This function does not read the image files.
The results do not have the "image" field.
"""
from pycocotools.coco import COCO
timer = Timer()
json_file = PathManager.get_local_path(json_file)
with contextlib.redirect_stdout(io.StringIO()):
coco_api = COCO(json_file)
if timer.seconds() > 1:
logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
id_map = None
if dataset_name is not None:
meta = MetadataCatalog.get(dataset_name)
cat_ids = sorted(coco_api.getCatIds())
cats = coco_api.loadCats(cat_ids)
# The categories in a custom json file may not be sorted.
thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])]
meta.thing_classes = thing_classes
# In COCO, certain category ids are artificially removed,
# and by convention they are always ignored.
# We deal with COCO's id issue and translate
# the category ids to contiguous ids in [0, 80).
# It works by looking at the "categories" field in the json, therefore
# if users' own json also have incontiguous ids, we'll
# apply this mapping as well but print a warning.
if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
if "coco" not in dataset_name:
logger.warning(
"""
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
"""
)
id_map = {v: i for i, v in enumerate(cat_ids)}
meta.thing_dataset_id_to_contiguous_id = id_map
# sort indices for reproducible results
img_ids = sorted(coco_api.imgs.keys())
# imgs is a list of dicts, each looks something like:
# {'license': 4,
# 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
# 'file_name': 'COCO_val2014_000000001268.jpg',
# 'height': 427,
# 'width': 640,
# 'date_captured': '2013-11-17 05:57:24',
# 'id': 1268}
imgs = coco_api.loadImgs(img_ids)
# anns is a list[list[dict]], where each dict is an annotation
# record for an object. The inner list enumerates the objects in an image
# and the outer list enumerates over images. Example of anns[0]:
# [{'segmentation': [[192.81,
# 247.09,
# ...
# 219.03,
# 249.06]],
# 'area': 1035.749,
# 'iscrowd': 0,
# 'image_id': 1268,
# 'bbox': [192.81, 224.8, 74.73, 33.43],
# 'category_id': 16,
# 'id': 42986},
# ...]
anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
total_num_valid_anns = sum([len(x) for x in anns])
total_num_anns = len(coco_api.anns)
if total_num_valid_anns < total_num_anns:
logger.warning(
f"{json_file} contains {total_num_anns} annotations, but only "
f"{total_num_valid_anns} of them match to images in the file."
)
if "minival" not in json_file:
# The popular valminusminival & minival annotations for COCO2014 contain this bug.
# However the ratio of buggy annotations there is tiny and does not affect accuracy.
# Therefore we explicitly white-list them.
ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
json_file
)
imgs_anns = list(zip(imgs, anns))
logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file))
dataset_dicts = []
ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"] + (extra_annotation_keys or [])
num_instances_without_valid_segmentation = 0
for img_dict, anno_dict_list in imgs_anns:
record = {}
record["file_name"] = os.path.join(image_root, img_dict["file_name"])
record["height"] = img_dict["height"]
record["width"] = img_dict["width"]
image_id = record["image_id"] = img_dict["id"]
objs = []
for anno in anno_dict_list:
# Check that the image_id in this annotation is the same as
# the image_id we're looking at.
# This fails only when the data parsing logic or the annotation file is buggy.
# The original COCO valminusminival2014 & minival2014 annotation files
# actually contains bugs that, together with certain ways of using COCO API,
# can trigger this assertion.
assert anno["image_id"] == image_id
assert anno.get("ignore", 0) == 0, '"ignore" in COCO json file is not supported.'
obj = {key: anno[key] for key in ann_keys if key in anno}
if "bbox" in obj and len(obj["bbox"]) == 0:
raise ValueError(
f"One annotation of image {image_id} contains empty 'bbox' value! "
"This json does not have valid COCO format."
)
segm = anno.get("segmentation", None)
if segm: # either list[list[float]] or dict(RLE)
if isinstance(segm, dict):
if isinstance(segm["counts"], list):
# convert to compressed RLE
segm = mask_util.frPyObjects(segm, *segm["size"])
else:
# filter out invalid polygons (< 3 points)
segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
if len(segm) == 0:
num_instances_without_valid_segmentation += 1
continue # ignore this instance
obj["segmentation"] = segm
keypts = anno.get("keypoints", None)
if keypts: # list[int]
for idx, v in enumerate(keypts):
if idx % 3 != 2:
# COCO's segmentation coordinates are floating points in [0, H or W],
# but keypoint coordinates are integers in [0, H-1 or W-1]
# Therefore we assume the coordinates are "pixel indices" and
# add 0.5 to convert to floating point coordinates.
keypts[idx] = v + 0.5
obj["keypoints"] = keypts
obj["bbox_mode"] = BoxMode.XYWH_ABS
if id_map:
annotation_category_id = obj["category_id"]
try:
obj["category_id"] = id_map[annotation_category_id]
except KeyError as e:
raise KeyError(
f"Encountered category_id={annotation_category_id} "
"but this id does not exist in 'categories' of the json file."
) from e
objs.append(obj)
record["annotations"] = objs
dataset_dicts.append(record)
if num_instances_without_valid_segmentation > 0:
logger.warning(
"Filtered out {} instances without valid segmentation. ".format(
num_instances_without_valid_segmentation
)
+ "There might be issues in your dataset generation process. Please "
"check https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html carefully"
)
return dataset_dicts
def load_sem_seg(gt_root, image_root, gt_ext="png", image_ext="jpg"):
"""
Load semantic segmentation datasets. All files under "gt_root" with "gt_ext" extension are
treated as ground truth annotations and all files under "image_root" with "image_ext" extension
as input images. Ground truth and input images are matched using file paths relative to
"gt_root" and "image_root" respectively without taking into account file extensions.
This works for COCO as well as some other datasets.
Args:
gt_root (str): full path to ground truth semantic segmentation files. Semantic segmentation
annotations are stored as images with integer values in pixels that represent
corresponding semantic labels.
image_root (str): the directory where the input images are.
gt_ext (str): file extension for ground truth annotations.
image_ext (str): file extension for input images.
Returns:
list[dict]:
a list of dicts in detectron2 standard format without instance-level
annotation.
Notes:
1. This function does not read the image and ground truth files.
The results do not have the "image" and "sem_seg" fields.
"""
# We match input images with ground truth based on their relative filepaths (without file
# extensions) starting from 'image_root' and 'gt_root' respectively.
def file2id(folder_path, file_path):
# extract relative path starting from `folder_path`
image_id = os.path.normpath(os.path.relpath(file_path, start=folder_path))
# remove file extension
image_id = os.path.splitext(image_id)[0]
return image_id
input_files = sorted(
(os.path.join(image_root, f) for f in PathManager.ls(image_root) if f.endswith(image_ext)),
key=lambda file_path: file2id(image_root, file_path),
)
gt_files = sorted(
(os.path.join(gt_root, f) for f in PathManager.ls(gt_root) if f.endswith(gt_ext)),
key=lambda file_path: file2id(gt_root, file_path),
)
assert len(gt_files) > 0, "No annotations found in {}.".format(gt_root)
# Use the intersection, so that val2017_100 annotations can run smoothly with val2017 images
if len(input_files) != len(gt_files):
logger.warn(
"Directory {} and {} has {} and {} files, respectively.".format(
image_root, gt_root, len(input_files), len(gt_files)
)
)
input_basenames = [os.path.basename(f)[: -len(image_ext)] for f in input_files]
gt_basenames = [os.path.basename(f)[: -len(gt_ext)] for f in gt_files]
intersect = list(set(input_basenames) & set(gt_basenames))
# sort, otherwise each worker may obtain a list[dict] in different order
intersect = sorted(intersect)
logger.warn("Will use their intersection of {} files.".format(len(intersect)))
input_files = [os.path.join(image_root, f + image_ext) for f in intersect]
gt_files = [os.path.join(gt_root, f + gt_ext) for f in intersect]
logger.info(
"Loaded {} images with semantic segmentation from {}".format(len(input_files), image_root)
)
dataset_dicts = []
for img_path, gt_path in zip(input_files, gt_files):
record = {}
record["file_name"] = img_path
record["sem_seg_file_name"] = gt_path
dataset_dicts.append(record)
return dataset_dicts
def convert_to_coco_dict(dataset_name):
"""
Convert an instance detection/segmentation or keypoint detection dataset
in detectron2's standard format into COCO json format.
Generic dataset description can be found here:
https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset
COCO data format description can be found here:
http://cocodataset.org/#format-data
Args:
dataset_name (str):
name of the source dataset
Must be registered in DatastCatalog and in detectron2's standard format.
Must have corresponding metadata "thing_classes"
Returns:
coco_dict: serializable dict in COCO json format
"""
dataset_dicts = DatasetCatalog.get(dataset_name)
metadata = MetadataCatalog.get(dataset_name)
# unmap the category mapping ids for COCO
if hasattr(metadata, "thing_dataset_id_to_contiguous_id"):
reverse_id_mapping = {v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items()}
reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[contiguous_id] # noqa
else:
reverse_id_mapper = lambda contiguous_id: contiguous_id # noqa
categories = [
{"id": reverse_id_mapper(id), "name": name}
for id, name in enumerate(metadata.thing_classes)
]
logger.info("Converting dataset dicts into COCO format")
coco_images = []
coco_annotations = []
for image_id, image_dict in enumerate(dataset_dicts):
coco_image = {
"id": image_dict.get("image_id", image_id),
"width": int(image_dict["width"]),
"height": int(image_dict["height"]),
"file_name": str(image_dict["file_name"]),
}
coco_images.append(coco_image)
anns_per_image = image_dict.get("annotations", [])
for annotation in anns_per_image:
# create a new dict with only COCO fields
coco_annotation = {}
# COCO requirement: XYWH box format for axis-align and XYWHA for rotated
bbox = annotation["bbox"]
if isinstance(bbox, np.ndarray):
if bbox.ndim != 1:
raise ValueError(f"bbox has to be 1-dimensional. Got shape={bbox.shape}.")
bbox = bbox.tolist()
if len(bbox) not in [4, 5]:
raise ValueError(f"bbox has to has length 4 or 5. Got {bbox}.")
from_bbox_mode = annotation["bbox_mode"]
to_bbox_mode = BoxMode.XYWH_ABS if len(bbox) == 4 else BoxMode.XYWHA_ABS
bbox = BoxMode.convert(bbox, from_bbox_mode, to_bbox_mode)
# COCO requirement: instance area
if "segmentation" in annotation:
# Computing areas for instances by counting the pixels
segmentation = annotation["segmentation"]
# TODO: check segmentation type: RLE, BinaryMask or Polygon
if isinstance(segmentation, list):
polygons = PolygonMasks([segmentation])
area = polygons.area()[0].item()
elif isinstance(segmentation, dict): # RLE
area = mask_util.area(segmentation).item()
else:
raise TypeError(f"Unknown segmentation type {type(segmentation)}!")
else:
# Computing areas using bounding boxes
if to_bbox_mode == BoxMode.XYWH_ABS:
bbox_xy = BoxMode.convert(bbox, to_bbox_mode, BoxMode.XYXY_ABS)
area = Boxes([bbox_xy]).area()[0].item()
else:
area = RotatedBoxes([bbox]).area()[0].item()
if "keypoints" in annotation:
keypoints = annotation["keypoints"] # list[int]
for idx, v in enumerate(keypoints):
if idx % 3 != 2:
# COCO's segmentation coordinates are floating points in [0, H or W],
# but keypoint coordinates are integers in [0, H-1 or W-1]
# For COCO format consistency we substract 0.5
# https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163
keypoints[idx] = v - 0.5
if "num_keypoints" in annotation:
num_keypoints = annotation["num_keypoints"]
else:
num_keypoints = sum(kp > 0 for kp in keypoints[2::3])
# COCO requirement:
# linking annotations to images
# "id" field must start with 1
coco_annotation["id"] = len(coco_annotations) + 1
coco_annotation["image_id"] = coco_image["id"]
coco_annotation["bbox"] = [round(float(x), 3) for x in bbox]
coco_annotation["area"] = float(area)
coco_annotation["iscrowd"] = int(annotation.get("iscrowd", 0))
coco_annotation["category_id"] = int(reverse_id_mapper(annotation["category_id"]))
# Add optional fields
if "keypoints" in annotation:
coco_annotation["keypoints"] = keypoints
coco_annotation["num_keypoints"] = num_keypoints
if "segmentation" in annotation:
seg = coco_annotation["segmentation"] = annotation["segmentation"]
if isinstance(seg, dict): # RLE
counts = seg["counts"]
if not isinstance(counts, str):
# make it json-serializable
seg["counts"] = counts.decode("ascii")
coco_annotations.append(coco_annotation)
logger.info(
"Conversion finished, "
f"#images: {len(coco_images)}, #annotations: {len(coco_annotations)}"
)
info = {
"date_created": str(datetime.datetime.now()),
"description": "Automatically generated COCO json file for Detectron2.",
}
coco_dict = {
"info": info,
"images": coco_images,
"categories": categories,
"licenses": None,
}
if len(coco_annotations) > 0:
coco_dict["annotations"] = coco_annotations
return coco_dict
def convert_to_coco_json(dataset_name, output_file, allow_cached=True):
"""
Converts dataset into COCO format and saves it to a json file.
dataset_name must be registered in DatasetCatalog and in detectron2's standard format.
Args:
dataset_name:
reference from the config file to the catalogs
must be registered in DatasetCatalog and in detectron2's standard format
output_file: path of json file that will be saved to
allow_cached: if json file is already present then skip conversion
"""
# TODO: The dataset or the conversion script *may* change,
# a checksum would be useful for validating the cached data
PathManager.mkdirs(os.path.dirname(output_file))
with file_lock(output_file):
if PathManager.exists(output_file) and allow_cached:
logger.warning(
f"Using previously cached COCO format annotations at '{output_file}'. "
"You need to clear the cache file if your dataset has been modified."
)
else:
logger.info(f"Converting annotations of dataset '{dataset_name}' to COCO format ...)")
coco_dict = convert_to_coco_dict(dataset_name)
logger.info(f"Caching COCO format annotations at '{output_file}' ...")
tmp_file = output_file + ".tmp"
with PathManager.open(tmp_file, "w") as f:
json.dump(coco_dict, f)
shutil.move(tmp_file, output_file)
def register_coco_instances(name, metadata, json_file, image_root):
"""
Register a dataset in COCO's json annotation format for
instance detection, instance segmentation and keypoint detection.
(i.e., Type 1 and 2 in http://cocodataset.org/#format-data.
`instances*.json` and `person_keypoints*.json` in the dataset).
This is an example of how to register a new dataset.
You can do something similar to this function, to register new datasets.
Args:
name (str): the name that identifies a dataset, e.g. "coco_2014_train".
metadata (dict): extra metadata associated with this dataset. You can
leave it as an empty dict.
json_file (str): path to the json instance annotation file.
image_root (str or path-like): directory which contains all the images.
"""
assert isinstance(name, str), name
assert isinstance(json_file, (str, os.PathLike)), json_file
assert isinstance(image_root, (str, os.PathLike)), image_root
# 1. register a function which returns dicts
DatasetCatalog.register(name, lambda: load_coco_json(json_file, image_root, name))
# 2. Optionally, add metadata about this dataset,
# since they might be useful in evaluation, visualization or logging
MetadataCatalog.get(name).set(
json_file=json_file, image_root=image_root, evaluator_type="coco", **metadata
)
def main() -> None:
global logger
"""
Test the COCO json dataset loader.
Usage:
python -m detectron2.data.datasets.coco \
path/to/json path/to/image_root dataset_name
"dataset_name" can be "coco_2014_minival_100", or other
pre-registered ones
"""
import sys
import detectron2.data.datasets # noqa # add pre-defined metadata
from detectron2.utils.logger import setup_logger
from detectron2.utils.visualizer import Visualizer
logger = setup_logger(name=__name__)
assert sys.argv[3] in DatasetCatalog.list()
meta = MetadataCatalog.get(sys.argv[3])
dicts = load_coco_json(sys.argv[1], sys.argv[2], sys.argv[3])
logger.info("Done loading {} samples.".format(len(dicts)))
dirname = "coco-data-vis"
os.makedirs(dirname, exist_ok=True)
for d in dicts:
img = np.array(Image.open(d["file_name"]))
visualizer = Visualizer(img, metadata=meta)
vis = visualizer.draw_dataset_dict(d)
fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
vis.save(fpath)
if __name__ == "__main__":
main() # pragma: no cover
# Copyright (c) Facebook, Inc. and its affiliates.
import copy
import json
import os
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.utils.file_io import PathManager
from .coco import load_coco_json, load_sem_seg
__all__ = ["register_coco_panoptic", "register_coco_panoptic_separated"]
def load_coco_panoptic_json(json_file, image_dir, gt_dir, meta):
"""
Args:
image_dir (str): path to the raw dataset. e.g., "~/coco/train2017".
gt_dir (str): path to the raw annotations. e.g., "~/coco/panoptic_train2017".
json_file (str): path to the json file. e.g., "~/coco/annotations/panoptic_train2017.json".
Returns:
list[dict]: a list of dicts in Detectron2 standard format. (See
`Using Custom Datasets </tutorials/datasets.html>`_ )
"""
def _convert_category_id(segment_info, meta):
if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]:
segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][
segment_info["category_id"]
]
segment_info["isthing"] = True
else:
segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][
segment_info["category_id"]
]
segment_info["isthing"] = False
return segment_info
with PathManager.open(json_file) as f:
json_info = json.load(f)
ret = []
for ann in json_info["annotations"]:
image_id = int(ann["image_id"])
# TODO: currently we assume image and label has the same filename but
# different extension, and images have extension ".jpg" for COCO. Need
# to make image extension a user-provided argument if we extend this
# function to support other COCO-like datasets.
image_file = os.path.join(image_dir, os.path.splitext(ann["file_name"])[0] + ".jpg")
label_file = os.path.join(gt_dir, ann["file_name"])
segments_info = [_convert_category_id(x, meta) for x in ann["segments_info"]]
ret.append(
{
"file_name": image_file,
"image_id": image_id,
"pan_seg_file_name": label_file,
"segments_info": segments_info,
}
)
assert len(ret), f"No images found in {image_dir}!"
assert PathManager.isfile(ret[0]["file_name"]), ret[0]["file_name"]
assert PathManager.isfile(ret[0]["pan_seg_file_name"]), ret[0]["pan_seg_file_name"]
return ret
def register_coco_panoptic(
name, metadata, image_root, panoptic_root, panoptic_json, instances_json=None
):
"""
Register a "standard" version of COCO panoptic segmentation dataset named `name`.
The dictionaries in this registered dataset follows detectron2's standard format.
Hence it's called "standard".
Args:
name (str): the name that identifies a dataset,
e.g. "coco_2017_train_panoptic"
metadata (dict): extra metadata associated with this dataset.
image_root (str): directory which contains all the images
panoptic_root (str): directory which contains panoptic annotation images in COCO format
panoptic_json (str): path to the json panoptic annotation file in COCO format
sem_seg_root (none): not used, to be consistent with
`register_coco_panoptic_separated`.
instances_json (str): path to the json instance annotation file
"""
panoptic_name = name
DatasetCatalog.register(
panoptic_name,
lambda: load_coco_panoptic_json(panoptic_json, image_root, panoptic_root, metadata),
)
MetadataCatalog.get(panoptic_name).set(
panoptic_root=panoptic_root,
image_root=image_root,
panoptic_json=panoptic_json,
json_file=instances_json,
evaluator_type="coco_panoptic_seg",
ignore_label=255,
label_divisor=1000,
**metadata,
)
def register_coco_panoptic_separated(
name, metadata, image_root, panoptic_root, panoptic_json, sem_seg_root, instances_json
):
"""
Register a "separated" version of COCO panoptic segmentation dataset named `name`.
The annotations in this registered dataset will contain both instance annotations and
semantic annotations, each with its own contiguous ids. Hence it's called "separated".
It follows the setting used by the PanopticFPN paper:
1. The instance annotations directly come from polygons in the COCO
instances annotation task, rather than from the masks in the COCO panoptic annotations.
The two format have small differences:
Polygons in the instance annotations may have overlaps.
The mask annotations are produced by labeling the overlapped polygons
with depth ordering.
2. The semantic annotations are converted from panoptic annotations, where
all "things" are assigned a semantic id of 0.
All semantic categories will therefore have ids in contiguous
range [1, #stuff_categories].
This function will also register a pure semantic segmentation dataset
named ``name + '_stuffonly'``.
Args:
name (str): the name that identifies a dataset,
e.g. "coco_2017_train_panoptic"
metadata (dict): extra metadata associated with this dataset.
image_root (str): directory which contains all the images
panoptic_root (str): directory which contains panoptic annotation images
panoptic_json (str): path to the json panoptic annotation file
sem_seg_root (str): directory which contains all the ground truth segmentation annotations.
instances_json (str): path to the json instance annotation file
"""
panoptic_name = name + "_separated"
DatasetCatalog.register(
panoptic_name,
lambda: merge_to_panoptic(
load_coco_json(instances_json, image_root, panoptic_name),
load_sem_seg(sem_seg_root, image_root),
),
)
MetadataCatalog.get(panoptic_name).set(
panoptic_root=panoptic_root,
image_root=image_root,
panoptic_json=panoptic_json,
sem_seg_root=sem_seg_root,
json_file=instances_json, # TODO rename
evaluator_type="coco_panoptic_seg",
ignore_label=255,
**metadata,
)
semantic_name = name + "_stuffonly"
DatasetCatalog.register(semantic_name, lambda: load_sem_seg(sem_seg_root, image_root))
MetadataCatalog.get(semantic_name).set(
sem_seg_root=sem_seg_root,
image_root=image_root,
evaluator_type="sem_seg",
ignore_label=255,
**metadata,
)
def merge_to_panoptic(detection_dicts, sem_seg_dicts):
"""
Create dataset dicts for panoptic segmentation, by
merging two dicts using "file_name" field to match their entries.
Args:
detection_dicts (list[dict]): lists of dicts for object detection or instance segmentation.
sem_seg_dicts (list[dict]): lists of dicts for semantic segmentation.
Returns:
list[dict] (one per input image): Each dict contains all (key, value) pairs from dicts in
both detection_dicts and sem_seg_dicts that correspond to the same image.
The function assumes that the same key in different dicts has the same value.
"""
results = []
sem_seg_file_to_entry = {x["file_name"]: x for x in sem_seg_dicts}
assert len(sem_seg_file_to_entry) > 0
for det_dict in detection_dicts:
dic = copy.copy(det_dict)
dic.update(sem_seg_file_to_entry[dic["file_name"]])
results.append(dic)
return results
if __name__ == "__main__":
"""
Test the COCO panoptic dataset loader.
Usage:
python -m detectron2.data.datasets.coco_panoptic \
path/to/image_root path/to/panoptic_root path/to/panoptic_json dataset_name 10
"dataset_name" can be "coco_2017_train_panoptic", or other
pre-registered ones
"""
from detectron2.utils.logger import setup_logger
from detectron2.utils.visualizer import Visualizer
import detectron2.data.datasets # noqa # add pre-defined metadata
import sys
from PIL import Image
import numpy as np
logger = setup_logger(name=__name__)
assert sys.argv[4] in DatasetCatalog.list()
meta = MetadataCatalog.get(sys.argv[4])
dicts = load_coco_panoptic_json(sys.argv[3], sys.argv[1], sys.argv[2], meta.as_dict())
logger.info("Done loading {} samples.".format(len(dicts)))
dirname = "coco-data-vis"
os.makedirs(dirname, exist_ok=True)
num_imgs_to_vis = int(sys.argv[5])
for i, d in enumerate(dicts):
img = np.array(Image.open(d["file_name"]))
visualizer = Visualizer(img, metadata=meta)
vis = visualizer.draw_dataset_dict(d)
fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
vis.save(fpath)
if i + 1 >= num_imgs_to_vis:
break
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment