Commit c732df65 authored by limm's avatar limm
Browse files

push v0.1.3 version commit bd2ea47

parent 5b3792fc
Pipeline #706 failed with stages
in 0 seconds
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import atexit
import bisect
import multiprocessing as mp
from collections import deque
import cv2
import torch
from detectron2.data import MetadataCatalog
from detectron2.engine.defaults import DefaultPredictor
from detectron2.utils.video_visualizer import VideoVisualizer
from detectron2.utils.visualizer import ColorMode, Visualizer
class VisualizationDemo(object):
def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
"""
Args:
cfg (CfgNode):
instance_mode (ColorMode):
parallel (bool): whether to run the model in different processes from visualization.
Useful since the visualization logic can be slow.
"""
self.metadata = MetadataCatalog.get(
cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
)
self.cpu_device = torch.device("cpu")
self.instance_mode = instance_mode
self.parallel = parallel
if parallel:
num_gpu = torch.cuda.device_count()
self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
else:
self.predictor = DefaultPredictor(cfg)
def run_on_image(self, image):
"""
Args:
image (np.ndarray): an image of shape (H, W, C) (in BGR order).
This is the format used by OpenCV.
Returns:
predictions (dict): the output of the model.
vis_output (VisImage): the visualized image output.
"""
vis_output = None
predictions = self.predictor(image)
# Convert image from OpenCV BGR format to Matplotlib RGB format.
image = image[:, :, ::-1]
visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode)
if "panoptic_seg" in predictions:
panoptic_seg, segments_info = predictions["panoptic_seg"]
vis_output = visualizer.draw_panoptic_seg_predictions(
panoptic_seg.to(self.cpu_device), segments_info
)
else:
if "sem_seg" in predictions:
vis_output = visualizer.draw_sem_seg(
predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
)
if "instances" in predictions:
instances = predictions["instances"].to(self.cpu_device)
vis_output = visualizer.draw_instance_predictions(predictions=instances)
return predictions, vis_output
def _frame_from_video(self, video):
while video.isOpened():
success, frame = video.read()
if success:
yield frame
else:
break
def run_on_video(self, video):
"""
Visualizes predictions on frames of the input video.
Args:
video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
either a webcam or a video file.
Yields:
ndarray: BGR visualizations of each video frame.
"""
video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)
def process_predictions(frame, predictions):
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
if "panoptic_seg" in predictions:
panoptic_seg, segments_info = predictions["panoptic_seg"]
vis_frame = video_visualizer.draw_panoptic_seg_predictions(
frame, panoptic_seg.to(self.cpu_device), segments_info
)
elif "instances" in predictions:
predictions = predictions["instances"].to(self.cpu_device)
vis_frame = video_visualizer.draw_instance_predictions(frame, predictions)
elif "sem_seg" in predictions:
vis_frame = video_visualizer.draw_sem_seg(
frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
)
# Converts Matplotlib RGB format to OpenCV BGR format
vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
return vis_frame
frame_gen = self._frame_from_video(video)
if self.parallel:
buffer_size = self.predictor.default_buffer_size
frame_data = deque()
for cnt, frame in enumerate(frame_gen):
frame_data.append(frame)
self.predictor.put(frame)
if cnt >= buffer_size:
frame = frame_data.popleft()
predictions = self.predictor.get()
yield process_predictions(frame, predictions)
while len(frame_data):
frame = frame_data.popleft()
predictions = self.predictor.get()
yield process_predictions(frame, predictions)
else:
for frame in frame_gen:
yield process_predictions(frame, self.predictor(frame))
class AsyncPredictor:
"""
A predictor that runs the model asynchronously, possibly on >1 GPUs.
Because rendering the visualization takes considerably amount of time,
this helps improve throughput when rendering videos.
"""
class _StopToken:
pass
class _PredictWorker(mp.Process):
def __init__(self, cfg, task_queue, result_queue):
self.cfg = cfg
self.task_queue = task_queue
self.result_queue = result_queue
super().__init__()
def run(self):
predictor = DefaultPredictor(self.cfg)
while True:
task = self.task_queue.get()
if isinstance(task, AsyncPredictor._StopToken):
break
idx, data = task
result = predictor(data)
self.result_queue.put((idx, result))
def __init__(self, cfg, num_gpus: int = 1):
"""
Args:
cfg (CfgNode):
num_gpus (int): if 0, will run on CPU
"""
num_workers = max(num_gpus, 1)
self.task_queue = mp.Queue(maxsize=num_workers * 3)
self.result_queue = mp.Queue(maxsize=num_workers * 3)
self.procs = []
for gpuid in range(max(num_gpus, 1)):
cfg = cfg.clone()
cfg.defrost()
cfg.MODEL.DEVICE = "cuda:{}".format(gpuid) if num_gpus > 0 else "cpu"
self.procs.append(
AsyncPredictor._PredictWorker(cfg, self.task_queue, self.result_queue)
)
self.put_idx = 0
self.get_idx = 0
self.result_rank = []
self.result_data = []
for p in self.procs:
p.start()
atexit.register(self.shutdown)
def put(self, image):
self.put_idx += 1
self.task_queue.put((self.put_idx, image))
def get(self):
self.get_idx += 1 # the index needed for this request
if len(self.result_rank) and self.result_rank[0] == self.get_idx:
res = self.result_data[0]
del self.result_data[0], self.result_rank[0]
return res
while True:
# make sure the results are returned in the correct order
idx, res = self.result_queue.get()
if idx == self.get_idx:
return res
insert = bisect.bisect(self.result_rank, idx)
self.result_rank.insert(insert, idx)
self.result_data.insert(insert, res)
def __len__(self):
return self.put_idx - self.get_idx
def __call__(self, image):
self.put(image)
return self.get()
def shutdown(self):
for _ in self.procs:
self.task_queue.put(AsyncPredictor._StopToken())
@property
def default_buffer_size(self):
return len(self.procs) * 5
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from .utils.env import setup_environment
setup_environment()
# This line will be programatically read/write by setup.py.
# Leave them at the bottom of this file and don't touch them.
__version__ = "0.1.3"
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# File:
from . import catalog as _UNUSED # register the handler
from .detection_checkpoint import DetectionCheckpointer
from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer
__all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"]
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import copy
import logging
import re
import torch
from fvcore.common.checkpoint import (
get_missing_parameters_message,
get_unexpected_parameters_message,
)
def convert_basic_c2_names(original_keys):
"""
Apply some basic name conversion to names in C2 weights.
It only deals with typical backbone models.
Args:
original_keys (list[str]):
Returns:
list[str]: The same number of strings matching those in original_keys.
"""
layer_keys = copy.deepcopy(original_keys)
layer_keys = [
{"pred_b": "linear_b", "pred_w": "linear_w"}.get(k, k) for k in layer_keys
] # some hard-coded mappings
layer_keys = [k.replace("_", ".") for k in layer_keys]
layer_keys = [re.sub("\\.b$", ".bias", k) for k in layer_keys]
layer_keys = [re.sub("\\.w$", ".weight", k) for k in layer_keys]
# Uniform both bn and gn names to "norm"
layer_keys = [re.sub("bn\\.s$", "norm.weight", k) for k in layer_keys]
layer_keys = [re.sub("bn\\.bias$", "norm.bias", k) for k in layer_keys]
layer_keys = [re.sub("bn\\.rm", "norm.running_mean", k) for k in layer_keys]
layer_keys = [re.sub("bn\\.running.mean$", "norm.running_mean", k) for k in layer_keys]
layer_keys = [re.sub("bn\\.riv$", "norm.running_var", k) for k in layer_keys]
layer_keys = [re.sub("bn\\.running.var$", "norm.running_var", k) for k in layer_keys]
layer_keys = [re.sub("bn\\.gamma$", "norm.weight", k) for k in layer_keys]
layer_keys = [re.sub("bn\\.beta$", "norm.bias", k) for k in layer_keys]
layer_keys = [re.sub("gn\\.s$", "norm.weight", k) for k in layer_keys]
layer_keys = [re.sub("gn\\.bias$", "norm.bias", k) for k in layer_keys]
# stem
layer_keys = [re.sub("^res\\.conv1\\.norm\\.", "conv1.norm.", k) for k in layer_keys]
# to avoid mis-matching with "conv1" in other components (e.g. detection head)
layer_keys = [re.sub("^conv1\\.", "stem.conv1.", k) for k in layer_keys]
# layer1-4 is used by torchvision, however we follow the C2 naming strategy (res2-5)
# layer_keys = [re.sub("^res2.", "layer1.", k) for k in layer_keys]
# layer_keys = [re.sub("^res3.", "layer2.", k) for k in layer_keys]
# layer_keys = [re.sub("^res4.", "layer3.", k) for k in layer_keys]
# layer_keys = [re.sub("^res5.", "layer4.", k) for k in layer_keys]
# blocks
layer_keys = [k.replace(".branch1.", ".shortcut.") for k in layer_keys]
layer_keys = [k.replace(".branch2a.", ".conv1.") for k in layer_keys]
layer_keys = [k.replace(".branch2b.", ".conv2.") for k in layer_keys]
layer_keys = [k.replace(".branch2c.", ".conv3.") for k in layer_keys]
# DensePose substitutions
layer_keys = [re.sub("^body.conv.fcn", "body_conv_fcn", k) for k in layer_keys]
layer_keys = [k.replace("AnnIndex.lowres", "ann_index_lowres") for k in layer_keys]
layer_keys = [k.replace("Index.UV.lowres", "index_uv_lowres") for k in layer_keys]
layer_keys = [k.replace("U.lowres", "u_lowres") for k in layer_keys]
layer_keys = [k.replace("V.lowres", "v_lowres") for k in layer_keys]
return layer_keys
def convert_c2_detectron_names(weights):
"""
Map Caffe2 Detectron weight names to Detectron2 names.
Args:
weights (dict): name -> tensor
Returns:
dict: detectron2 names -> tensor
dict: detectron2 names -> C2 names
"""
logger = logging.getLogger(__name__)
logger.info("Remapping C2 weights ......")
original_keys = sorted(weights.keys())
layer_keys = copy.deepcopy(original_keys)
layer_keys = convert_basic_c2_names(layer_keys)
# --------------------------------------------------------------------------
# RPN hidden representation conv
# --------------------------------------------------------------------------
# FPN case
# In the C2 model, the RPN hidden layer conv is defined for FPN level 2 and then
# shared for all other levels, hence the appearance of "fpn2"
layer_keys = [
k.replace("conv.rpn.fpn2", "proposal_generator.rpn_head.conv") for k in layer_keys
]
# Non-FPN case
layer_keys = [k.replace("conv.rpn", "proposal_generator.rpn_head.conv") for k in layer_keys]
# --------------------------------------------------------------------------
# RPN box transformation conv
# --------------------------------------------------------------------------
# FPN case (see note above about "fpn2")
layer_keys = [
k.replace("rpn.bbox.pred.fpn2", "proposal_generator.rpn_head.anchor_deltas")
for k in layer_keys
]
layer_keys = [
k.replace("rpn.cls.logits.fpn2", "proposal_generator.rpn_head.objectness_logits")
for k in layer_keys
]
# Non-FPN case
layer_keys = [
k.replace("rpn.bbox.pred", "proposal_generator.rpn_head.anchor_deltas") for k in layer_keys
]
layer_keys = [
k.replace("rpn.cls.logits", "proposal_generator.rpn_head.objectness_logits")
for k in layer_keys
]
# --------------------------------------------------------------------------
# Fast R-CNN box head
# --------------------------------------------------------------------------
layer_keys = [re.sub("^bbox\\.pred", "bbox_pred", k) for k in layer_keys]
layer_keys = [re.sub("^cls\\.score", "cls_score", k) for k in layer_keys]
layer_keys = [re.sub("^fc6\\.", "box_head.fc1.", k) for k in layer_keys]
layer_keys = [re.sub("^fc7\\.", "box_head.fc2.", k) for k in layer_keys]
# 4conv1fc head tensor names: head_conv1_w, head_conv1_gn_s
layer_keys = [re.sub("^head\\.conv", "box_head.conv", k) for k in layer_keys]
# --------------------------------------------------------------------------
# FPN lateral and output convolutions
# --------------------------------------------------------------------------
def fpn_map(name):
"""
Look for keys with the following patterns:
1) Starts with "fpn.inner."
Example: "fpn.inner.res2.2.sum.lateral.weight"
Meaning: These are lateral pathway convolutions
2) Starts with "fpn.res"
Example: "fpn.res2.2.sum.weight"
Meaning: These are FPN output convolutions
"""
splits = name.split(".")
norm = ".norm" if "norm" in splits else ""
if name.startswith("fpn.inner."):
# splits example: ['fpn', 'inner', 'res2', '2', 'sum', 'lateral', 'weight']
stage = int(splits[2][len("res") :])
return "fpn_lateral{}{}.{}".format(stage, norm, splits[-1])
elif name.startswith("fpn.res"):
# splits example: ['fpn', 'res2', '2', 'sum', 'weight']
stage = int(splits[1][len("res") :])
return "fpn_output{}{}.{}".format(stage, norm, splits[-1])
return name
layer_keys = [fpn_map(k) for k in layer_keys]
# --------------------------------------------------------------------------
# Mask R-CNN mask head
# --------------------------------------------------------------------------
# roi_heads.StandardROIHeads case
layer_keys = [k.replace(".[mask].fcn", "mask_head.mask_fcn") for k in layer_keys]
layer_keys = [re.sub("^\\.mask\\.fcn", "mask_head.mask_fcn", k) for k in layer_keys]
layer_keys = [k.replace("mask.fcn.logits", "mask_head.predictor") for k in layer_keys]
# roi_heads.Res5ROIHeads case
layer_keys = [k.replace("conv5.mask", "mask_head.deconv") for k in layer_keys]
# --------------------------------------------------------------------------
# Keypoint R-CNN head
# --------------------------------------------------------------------------
# interestingly, the keypoint head convs have blob names that are simply "conv_fcnX"
layer_keys = [k.replace("conv.fcn", "roi_heads.keypoint_head.conv_fcn") for k in layer_keys]
layer_keys = [
k.replace("kps.score.lowres", "roi_heads.keypoint_head.score_lowres") for k in layer_keys
]
layer_keys = [k.replace("kps.score.", "roi_heads.keypoint_head.score.") for k in layer_keys]
# --------------------------------------------------------------------------
# Done with replacements
# --------------------------------------------------------------------------
assert len(set(layer_keys)) == len(layer_keys)
assert len(original_keys) == len(layer_keys)
new_weights = {}
new_keys_to_original_keys = {}
for orig, renamed in zip(original_keys, layer_keys):
new_keys_to_original_keys[renamed] = orig
if renamed.startswith("bbox_pred.") or renamed.startswith("mask_head.predictor."):
# remove the meaningless prediction weight for background class
new_start_idx = 4 if renamed.startswith("bbox_pred.") else 1
new_weights[renamed] = weights[orig][new_start_idx:]
logger.info(
"Remove prediction weight for background class in {}. The shape changes from "
"{} to {}.".format(
renamed, tuple(weights[orig].shape), tuple(new_weights[renamed].shape)
)
)
elif renamed.startswith("cls_score."):
# move weights of bg class from original index 0 to last index
logger.info(
"Move classification weights for background class in {} from index 0 to "
"index {}.".format(renamed, weights[orig].shape[0] - 1)
)
new_weights[renamed] = torch.cat([weights[orig][1:], weights[orig][:1]])
else:
new_weights[renamed] = weights[orig]
return new_weights, new_keys_to_original_keys
# Note the current matching is not symmetric.
# it assumes model_state_dict will have longer names.
def align_and_update_state_dicts(model_state_dict, ckpt_state_dict, c2_conversion=True):
"""
Match names between the two state-dict, and update the values of model_state_dict in-place with
copies of the matched tensor in ckpt_state_dict.
If `c2_conversion==True`, `ckpt_state_dict` is assumed to be a Caffe2
model and will be renamed at first.
Strategy: suppose that the models that we will create will have prefixes appended
to each of its keys, for example due to an extra level of nesting that the original
pre-trained weights from ImageNet won't contain. For example, model.state_dict()
might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains
res2.conv1.weight. We thus want to match both parameters together.
For that, we look for each model weight, look among all loaded keys if there is one
that is a suffix of the current weight name, and use it if that's the case.
If multiple matches exist, take the one with longest size
of the corresponding name. For example, for the same model as before, the pretrained
weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case,
we want to match backbone[0].body.conv1.weight to conv1.weight, and
backbone[0].body.res2.conv1.weight to res2.conv1.weight.
"""
model_keys = sorted(model_state_dict.keys())
if c2_conversion:
ckpt_state_dict, original_keys = convert_c2_detectron_names(ckpt_state_dict)
# original_keys: the name in the original dict (before renaming)
else:
original_keys = {x: x for x in ckpt_state_dict.keys()}
ckpt_keys = sorted(ckpt_state_dict.keys())
def match(a, b):
# Matched ckpt_key should be a complete (starts with '.') suffix.
# For example, roi_heads.mesh_head.whatever_conv1 does not match conv1,
# but matches whatever_conv1 or mesh_head.whatever_conv1.
return a == b or a.endswith("." + b)
# get a matrix of string matches, where each (i, j) entry correspond to the size of the
# ckpt_key string, if it matches
match_matrix = [len(j) if match(i, j) else 0 for i in model_keys for j in ckpt_keys]
match_matrix = torch.as_tensor(match_matrix).view(len(model_keys), len(ckpt_keys))
# use the matched one with longest size in case of multiple matches
max_match_size, idxs = match_matrix.max(1)
# remove indices that correspond to no-match
idxs[max_match_size == 0] = -1
# used for logging
max_len_model = max(len(key) for key in model_keys) if model_keys else 1
max_len_ckpt = max(len(key) for key in ckpt_keys) if ckpt_keys else 1
log_str_template = "{: <{}} loaded from {: <{}} of shape {}"
logger = logging.getLogger(__name__)
# matched_pairs (matched checkpoint key --> matched model key)
matched_keys = {}
for idx_model, idx_ckpt in enumerate(idxs.tolist()):
if idx_ckpt == -1:
continue
key_model = model_keys[idx_model]
key_ckpt = ckpt_keys[idx_ckpt]
value_ckpt = ckpt_state_dict[key_ckpt]
shape_in_model = model_state_dict[key_model].shape
if shape_in_model != value_ckpt.shape:
logger.warning(
"Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format(
key_ckpt, value_ckpt.shape, key_model, shape_in_model
)
)
logger.warning(
"{} will not be loaded. Please double check and see if this is desired.".format(
key_ckpt
)
)
continue
model_state_dict[key_model] = value_ckpt.clone()
if key_ckpt in matched_keys: # already added to matched_keys
logger.error(
"Ambiguity found for {} in checkpoint!"
"It matches at least two keys in the model ({} and {}).".format(
key_ckpt, key_model, matched_keys[key_ckpt]
)
)
raise ValueError("Cannot match one checkpoint key to multiple keys in the model.")
matched_keys[key_ckpt] = key_model
logger.info(
log_str_template.format(
key_model,
max_len_model,
original_keys[key_ckpt],
max_len_ckpt,
tuple(shape_in_model),
)
)
matched_model_keys = matched_keys.values()
matched_ckpt_keys = matched_keys.keys()
# print warnings about unmatched keys on both side
unmatched_model_keys = [k for k in model_keys if k not in matched_model_keys]
if len(unmatched_model_keys):
logger.info(get_missing_parameters_message(unmatched_model_keys))
unmatched_ckpt_keys = [k for k in ckpt_keys if k not in matched_ckpt_keys]
if len(unmatched_ckpt_keys):
logger.info(
get_unexpected_parameters_message(original_keys[x] for x in unmatched_ckpt_keys)
)
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import logging
from fvcore.common.file_io import PathHandler, PathManager
class ModelCatalog(object):
"""
Store mappings from names to third-party models.
"""
S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron"
# MSRA models have STRIDE_IN_1X1=True. False otherwise.
# NOTE: all BN models here have fused BN into an affine layer.
# As a result, you should only load them to a model with "FrozenBN".
# Loading them to a model with regular BN or SyncBN is wrong.
# Even when loaded to FrozenBN, it is still different from affine by an epsilon,
# which should be negligible for training.
# NOTE: all models here uses PIXEL_STD=[1,1,1]
# NOTE: Most of the BN models here are no longer used. We use the
# re-converted pre-trained models under detectron2 model zoo instead.
C2_IMAGENET_MODELS = {
"MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl",
"MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl",
"FAIR/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl",
"FAIR/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl",
"FAIR/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl",
"FAIR/X-101-64x4d": "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl",
"FAIR/X-152-32x8d-IN5k": "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl",
}
C2_DETECTRON_PATH_FORMAT = (
"{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl" # noqa B950
)
C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival"
C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival"
# format: {model_name} -> part of the url
C2_DETECTRON_MODELS = {
"35857197/e2e_faster_rcnn_R-50-C4_1x": "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW", # noqa B950
"35857345/e2e_faster_rcnn_R-50-FPN_1x": "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I", # noqa B950
"35857890/e2e_faster_rcnn_R-101-FPN_1x": "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7", # noqa B950
"36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ", # noqa B950
"35858791/e2e_mask_rcnn_R-50-C4_1x": "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB", # noqa B950
"35858933/e2e_mask_rcnn_R-50-FPN_1x": "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC", # noqa B950
"35861795/e2e_mask_rcnn_R-101-FPN_1x": "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT", # noqa B950
"36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI", # noqa B950
"48616381/e2e_mask_rcnn_R-50-FPN_2x_gn": "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q", # noqa B950
"37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao", # noqa B950
"35998355/rpn_R-50-C4_1x": "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L", # noqa B950
"35998814/rpn_R-50-FPN_1x": "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179", # noqa B950
"36225147/fast_R-50-FPN_1x": "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2", # noqa B950
}
@staticmethod
def get(name):
if name.startswith("Caffe2Detectron/COCO"):
return ModelCatalog._get_c2_detectron_baseline(name)
if name.startswith("ImageNetPretrained/"):
return ModelCatalog._get_c2_imagenet_pretrained(name)
raise RuntimeError("model not present in the catalog: {}".format(name))
@staticmethod
def _get_c2_imagenet_pretrained(name):
prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX
name = name[len("ImageNetPretrained/") :]
name = ModelCatalog.C2_IMAGENET_MODELS[name]
url = "/".join([prefix, name])
return url
@staticmethod
def _get_c2_detectron_baseline(name):
name = name[len("Caffe2Detectron/COCO/") :]
url = ModelCatalog.C2_DETECTRON_MODELS[name]
if "keypoint_rcnn" in name:
dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS
else:
dataset = ModelCatalog.C2_DATASET_COCO
if "35998355/rpn_R-50-C4_1x" in name:
# this one model is somehow different from others ..
type = "rpn"
else:
type = "generalized_rcnn"
# Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`.
url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format(
prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset
)
return url
class ModelCatalogHandler(PathHandler):
"""
Resolve URL like catalog://.
"""
PREFIX = "catalog://"
def _get_supported_prefixes(self):
return [self.PREFIX]
def _get_local_path(self, path):
logger = logging.getLogger(__name__)
catalog_path = ModelCatalog.get(path[len(self.PREFIX) :])
logger.info("Catalog entry {} points to {}".format(path, catalog_path))
return PathManager.get_local_path(catalog_path)
def _open(self, path, mode="r", **kwargs):
return PathManager.open(self._get_local_path(path), mode, **kwargs)
class Detectron2Handler(PathHandler):
"""
Resolve anything that's in Detectron2 model zoo.
"""
PREFIX = "detectron2://"
S3_DETECTRON2_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/"
def _get_supported_prefixes(self):
return [self.PREFIX]
def _get_local_path(self, path):
name = path[len(self.PREFIX) :]
return PathManager.get_local_path(self.S3_DETECTRON2_PREFIX + name)
def _open(self, path, mode="r", **kwargs):
return PathManager.open(self._get_local_path(path), mode, **kwargs)
PathManager.register_handler(ModelCatalogHandler())
PathManager.register_handler(Detectron2Handler())
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import pickle
from fvcore.common.checkpoint import Checkpointer
from fvcore.common.file_io import PathManager
import detectron2.utils.comm as comm
from .c2_model_loading import align_and_update_state_dicts
class DetectionCheckpointer(Checkpointer):
"""
Same as :class:`Checkpointer`, but is able to handle models in detectron & detectron2
model zoo, and apply conversions for legacy models.
"""
def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
is_main_process = comm.is_main_process()
super().__init__(
model,
save_dir,
save_to_disk=is_main_process if save_to_disk is None else save_to_disk,
**checkpointables,
)
def _load_file(self, filename):
if filename.endswith(".pkl"):
with PathManager.open(filename, "rb") as f:
data = pickle.load(f, encoding="latin1")
if "model" in data and "__author__" in data:
# file is in Detectron2 model zoo format
self.logger.info("Reading a file from '{}'".format(data["__author__"]))
return data
else:
# assume file is from Caffe2 / Detectron1 model zoo
if "blobs" in data:
# Detection models have "blobs", but ImageNet models don't
data = data["blobs"]
data = {k: v for k, v in data.items() if not k.endswith("_momentum")}
return {"model": data, "__author__": "Caffe2", "matching_heuristics": True}
loaded = super()._load_file(filename) # load native pth checkpoint
if "model" not in loaded:
loaded = {"model": loaded}
return loaded
def _load_model(self, checkpoint):
if checkpoint.get("matching_heuristics", False):
self._convert_ndarray_to_tensor(checkpoint["model"])
# convert weights by name-matching heuristics
model_state_dict = self.model.state_dict()
align_and_update_state_dicts(
model_state_dict,
checkpoint["model"],
c2_conversion=checkpoint.get("__author__", None) == "Caffe2",
)
checkpoint["model"] = model_state_dict
# for non-caffe2 models, use standard ways to load it
incompatible = super()._load_model(checkpoint)
if incompatible is None: # support older versions of fvcore
return None
model_buffers = dict(self.model.named_buffers(recurse=False))
for k in ["pixel_mean", "pixel_std"]:
# Ignore missing key message about pixel_mean/std.
# Though they may be missing in old checkpoints, they will be correctly
# initialized from config anyway.
if k in model_buffers:
try:
incompatible.missing_keys.remove(k)
except ValueError:
pass
return incompatible
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from .compat import downgrade_config, upgrade_config
from .config import CfgNode, get_cfg, global_cfg, set_global_cfg, configurable
__all__ = [
"CfgNode",
"get_cfg",
"global_cfg",
"set_global_cfg",
"downgrade_config",
"upgrade_config",
"configurable",
]
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
Backward compatibility of configs.
Instructions to bump version:
+ It's not needed to bump version if new keys are added.
It's only needed when backward-incompatible changes happen
(i.e., some existing keys disappear, or the meaning of a key changes)
+ To bump version, do the following:
1. Increment _C.VERSION in defaults.py
2. Add a converter in this file.
Each ConverterVX has a function "upgrade" which in-place upgrades config from X-1 to X,
and a function "downgrade" which in-place downgrades config from X to X-1
In each function, VERSION is left unchanged.
Each converter assumes that its input has the relevant keys
(i.e., the input is not a partial config).
3. Run the tests (test_config.py) to make sure the upgrade & downgrade
functions are consistent.
"""
import logging
from typing import List, Optional, Tuple
from .config import CfgNode as CN
from .defaults import _C
__all__ = ["upgrade_config", "downgrade_config"]
def upgrade_config(cfg: CN, to_version: Optional[int] = None) -> CN:
"""
Upgrade a config from its current version to a newer version.
Args:
cfg (CfgNode):
to_version (int): defaults to the latest version.
"""
cfg = cfg.clone()
if to_version is None:
to_version = _C.VERSION
assert cfg.VERSION <= to_version, "Cannot upgrade from v{} to v{}!".format(
cfg.VERSION, to_version
)
for k in range(cfg.VERSION, to_version):
converter = globals()["ConverterV" + str(k + 1)]
converter.upgrade(cfg)
cfg.VERSION = k + 1
return cfg
def downgrade_config(cfg: CN, to_version: int) -> CN:
"""
Downgrade a config from its current version to an older version.
Args:
cfg (CfgNode):
to_version (int):
Note:
A general downgrade of arbitrary configs is not always possible due to the
different functionalities in different versions.
The purpose of downgrade is only to recover the defaults in old versions,
allowing it to load an old partial yaml config.
Therefore, the implementation only needs to fill in the default values
in the old version when a general downgrade is not possible.
"""
cfg = cfg.clone()
assert cfg.VERSION >= to_version, "Cannot downgrade from v{} to v{}!".format(
cfg.VERSION, to_version
)
for k in range(cfg.VERSION, to_version, -1):
converter = globals()["ConverterV" + str(k)]
converter.downgrade(cfg)
cfg.VERSION = k - 1
return cfg
def guess_version(cfg: CN, filename: str) -> int:
"""
Guess the version of a partial config where the VERSION field is not specified.
Returns the version, or the latest if cannot make a guess.
This makes it easier for users to migrate.
"""
logger = logging.getLogger(__name__)
def _has(name: str) -> bool:
cur = cfg
for n in name.split("."):
if n not in cur:
return False
cur = cur[n]
return True
# Most users' partial configs have "MODEL.WEIGHT", so guess on it
ret = None
if _has("MODEL.WEIGHT") or _has("TEST.AUG_ON"):
ret = 1
if ret is not None:
logger.warning("Config '{}' has no VERSION. Assuming it to be v{}.".format(filename, ret))
else:
ret = _C.VERSION
logger.warning(
"Config '{}' has no VERSION. Assuming it to be compatible with latest v{}.".format(
filename, ret
)
)
return ret
def _rename(cfg: CN, old: str, new: str) -> None:
old_keys = old.split(".")
new_keys = new.split(".")
def _set(key_seq: List[str], val: str) -> None:
cur = cfg
for k in key_seq[:-1]:
if k not in cur:
cur[k] = CN()
cur = cur[k]
cur[key_seq[-1]] = val
def _get(key_seq: List[str]) -> CN:
cur = cfg
for k in key_seq:
cur = cur[k]
return cur
def _del(key_seq: List[str]) -> None:
cur = cfg
for k in key_seq[:-1]:
cur = cur[k]
del cur[key_seq[-1]]
if len(cur) == 0 and len(key_seq) > 1:
_del(key_seq[:-1])
_set(new_keys, _get(old_keys))
_del(old_keys)
class _RenameConverter:
"""
A converter that handles simple rename.
"""
RENAME: List[Tuple[str, str]] = [] # list of tuples of (old name, new name)
@classmethod
def upgrade(cls, cfg: CN) -> None:
for old, new in cls.RENAME:
_rename(cfg, old, new)
@classmethod
def downgrade(cls, cfg: CN) -> None:
for old, new in cls.RENAME[::-1]:
_rename(cfg, new, old)
class ConverterV1(_RenameConverter):
RENAME = [("MODEL.RPN_HEAD.NAME", "MODEL.RPN.HEAD_NAME")]
class ConverterV2(_RenameConverter):
"""
A large bulk of rename, before public release.
"""
RENAME = [
("MODEL.WEIGHT", "MODEL.WEIGHTS"),
("MODEL.PANOPTIC_FPN.SEMANTIC_LOSS_SCALE", "MODEL.SEM_SEG_HEAD.LOSS_WEIGHT"),
("MODEL.PANOPTIC_FPN.RPN_LOSS_SCALE", "MODEL.RPN.LOSS_WEIGHT"),
("MODEL.PANOPTIC_FPN.INSTANCE_LOSS_SCALE", "MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT"),
("MODEL.PANOPTIC_FPN.COMBINE_ON", "MODEL.PANOPTIC_FPN.COMBINE.ENABLED"),
(
"MODEL.PANOPTIC_FPN.COMBINE_OVERLAP_THRESHOLD",
"MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH",
),
(
"MODEL.PANOPTIC_FPN.COMBINE_STUFF_AREA_LIMIT",
"MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT",
),
(
"MODEL.PANOPTIC_FPN.COMBINE_INSTANCES_CONFIDENCE_THRESHOLD",
"MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH",
),
("MODEL.ROI_HEADS.SCORE_THRESH", "MODEL.ROI_HEADS.SCORE_THRESH_TEST"),
("MODEL.ROI_HEADS.NMS", "MODEL.ROI_HEADS.NMS_THRESH_TEST"),
("MODEL.RETINANET.INFERENCE_SCORE_THRESHOLD", "MODEL.RETINANET.SCORE_THRESH_TEST"),
("MODEL.RETINANET.INFERENCE_TOPK_CANDIDATES", "MODEL.RETINANET.TOPK_CANDIDATES_TEST"),
("MODEL.RETINANET.INFERENCE_NMS_THRESHOLD", "MODEL.RETINANET.NMS_THRESH_TEST"),
("TEST.DETECTIONS_PER_IMG", "TEST.DETECTIONS_PER_IMAGE"),
("TEST.AUG_ON", "TEST.AUG.ENABLED"),
("TEST.AUG_MIN_SIZES", "TEST.AUG.MIN_SIZES"),
("TEST.AUG_MAX_SIZE", "TEST.AUG.MAX_SIZE"),
("TEST.AUG_FLIP", "TEST.AUG.FLIP"),
]
@classmethod
def upgrade(cls, cfg: CN) -> None:
super().upgrade(cfg)
if cfg.MODEL.META_ARCHITECTURE == "RetinaNet":
_rename(
cfg, "MODEL.RETINANET.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS"
)
_rename(cfg, "MODEL.RETINANET.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES")
del cfg["MODEL"]["RPN"]["ANCHOR_SIZES"]
del cfg["MODEL"]["RPN"]["ANCHOR_ASPECT_RATIOS"]
else:
_rename(cfg, "MODEL.RPN.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS")
_rename(cfg, "MODEL.RPN.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES")
del cfg["MODEL"]["RETINANET"]["ANCHOR_SIZES"]
del cfg["MODEL"]["RETINANET"]["ANCHOR_ASPECT_RATIOS"]
del cfg["MODEL"]["RETINANET"]["ANCHOR_STRIDES"]
@classmethod
def downgrade(cls, cfg: CN) -> None:
super().downgrade(cfg)
_rename(cfg, "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS", "MODEL.RPN.ANCHOR_ASPECT_RATIOS")
_rename(cfg, "MODEL.ANCHOR_GENERATOR.SIZES", "MODEL.RPN.ANCHOR_SIZES")
cfg.MODEL.RETINANET.ANCHOR_ASPECT_RATIOS = cfg.MODEL.RPN.ANCHOR_ASPECT_RATIOS
cfg.MODEL.RETINANET.ANCHOR_SIZES = cfg.MODEL.RPN.ANCHOR_SIZES
cfg.MODEL.RETINANET.ANCHOR_STRIDES = [] # this is not used anywhere in any version
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import functools
import inspect
import logging
from fvcore.common.config import CfgNode as _CfgNode
from fvcore.common.file_io import PathManager
class CfgNode(_CfgNode):
"""
The same as `fvcore.common.config.CfgNode`, but different in:
1. Use unsafe yaml loading by default.
Note that this may lead to arbitrary code execution: you must not
load a config file from untrusted sources before manually inspecting
the content of the file.
2. Support config versioning.
When attempting to merge an old config, it will convert the old config automatically.
"""
# Note that the default value of allow_unsafe is changed to True
def merge_from_file(self, cfg_filename: str, allow_unsafe: bool = True) -> None:
assert PathManager.isfile(cfg_filename), f"Config file '{cfg_filename}' does not exist!"
loaded_cfg = _CfgNode.load_yaml_with_base(cfg_filename, allow_unsafe=allow_unsafe)
loaded_cfg = type(self)(loaded_cfg)
# defaults.py needs to import CfgNode
from .defaults import _C
latest_ver = _C.VERSION
assert (
latest_ver == self.VERSION
), "CfgNode.merge_from_file is only allowed on a config object of latest version!"
logger = logging.getLogger(__name__)
loaded_ver = loaded_cfg.get("VERSION", None)
if loaded_ver is None:
from .compat import guess_version
loaded_ver = guess_version(loaded_cfg, cfg_filename)
assert loaded_ver <= self.VERSION, "Cannot merge a v{} config into a v{} config.".format(
loaded_ver, self.VERSION
)
if loaded_ver == self.VERSION:
self.merge_from_other_cfg(loaded_cfg)
else:
# compat.py needs to import CfgNode
from .compat import upgrade_config, downgrade_config
logger.warning(
"Loading an old v{} config file '{}' by automatically upgrading to v{}. "
"See docs/CHANGELOG.md for instructions to update your files.".format(
loaded_ver, cfg_filename, self.VERSION
)
)
# To convert, first obtain a full config at an old version
old_self = downgrade_config(self, to_version=loaded_ver)
old_self.merge_from_other_cfg(loaded_cfg)
new_config = upgrade_config(old_self)
self.clear()
self.update(new_config)
def dump(self, *args, **kwargs):
"""
Returns:
str: a yaml string representation of the config
"""
# to make it show up in docs
return super().dump(*args, **kwargs)
global_cfg = CfgNode()
def get_cfg() -> CfgNode:
"""
Get a copy of the default config.
Returns:
a detectron2 CfgNode instance.
"""
from .defaults import _C
return _C.clone()
def set_global_cfg(cfg: CfgNode) -> None:
"""
Let the global config point to the given cfg.
Assume that the given "cfg" has the key "KEY", after calling
`set_global_cfg(cfg)`, the key can be accessed by:
.. code-block:: python
from detectron2.config import global_cfg
print(global_cfg.KEY)
By using a hacky global config, you can access these configs anywhere,
without having to pass the config object or the values deep into the code.
This is a hacky feature introduced for quick prototyping / research exploration.
"""
global global_cfg
global_cfg.clear()
global_cfg.update(cfg)
def configurable(init_func):
"""
Decorate a class's __init__ method so that it can be called with a CfgNode
object using the class's from_config classmethod.
Examples:
.. code-block:: python
class A:
@configurable
def __init__(self, a, b=2, c=3):
pass
@classmethod
def from_config(cls, cfg):
# Returns kwargs to be passed to __init__
return {"a": cfg.A, "b": cfg.B}
a1 = A(a=1, b=2) # regular construction
a2 = A(cfg) # construct with a cfg
a3 = A(cfg, b=3, c=4) # construct with extra overwrite
"""
assert init_func.__name__ == "__init__", "@configurable should only be used for __init__!"
if init_func.__module__.startswith("detectron2."):
assert (
init_func.__doc__ is not None and "experimental" in init_func.__doc__
), f"configurable {init_func} should be marked experimental"
@functools.wraps(init_func)
def wrapped(self, *args, **kwargs):
try:
from_config_func = type(self).from_config
except AttributeError:
raise AttributeError("Class with @configurable must have a 'from_config' classmethod.")
if not inspect.ismethod(from_config_func):
raise TypeError("Class with @configurable must have a 'from_config' classmethod.")
if _called_with_cfg(*args, **kwargs):
explicit_args = _get_args_from_config(from_config_func, *args, **kwargs)
init_func(self, **explicit_args)
else:
init_func(self, *args, **kwargs)
return wrapped
def _get_args_from_config(from_config_func, *args, **kwargs):
"""
Use `from_config` to obtain explicit arguments.
Returns:
dict: arguments to be used for cls.__init__
"""
signature = inspect.signature(from_config_func)
if list(signature.parameters.keys())[0] != "cfg":
raise TypeError(
f"{from_config_func.__self__}.from_config must take 'cfg' as the first argument!"
)
support_var_arg = any(
param.kind in [param.VAR_POSITIONAL, param.VAR_KEYWORD]
for param in signature.parameters.values()
)
if support_var_arg: # forward all arguments to from_config, if from_config accepts them
ret = from_config_func(*args, **kwargs)
else:
# forward supported arguments to from_config
supported_arg_names = set(signature.parameters.keys())
extra_kwargs = {}
for name in list(kwargs.keys()):
if name not in supported_arg_names:
extra_kwargs[name] = kwargs.pop(name)
ret = from_config_func(*args, **kwargs)
# forward the other arguments to __init__
ret.update(extra_kwargs)
return ret
def _called_with_cfg(*args, **kwargs):
"""
Returns:
bool: whether the arguments contain CfgNode and should be considered
forwarded to from_config.
"""
if len(args) and isinstance(args[0], _CfgNode):
return True
if isinstance(kwargs.pop("cfg", None), _CfgNode):
return True
# `from_config`'s first argument is forced to be "cfg".
# So the above check covers all cases.
return False
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from .config import CfgNode as CN
# -----------------------------------------------------------------------------
# Convention about Training / Test specific parameters
# -----------------------------------------------------------------------------
# Whenever an argument can be either used for training or for testing, the
# corresponding name will be post-fixed by a _TRAIN for a training parameter,
# or _TEST for a test-specific parameter.
# For example, the number of images during training will be
# IMAGES_PER_BATCH_TRAIN, while the number of images for testing will be
# IMAGES_PER_BATCH_TEST
# -----------------------------------------------------------------------------
# Config definition
# -----------------------------------------------------------------------------
_C = CN()
# The version number, to upgrade from old configs to new ones if any
# changes happen. It's recommended to keep a VERSION in your config file.
_C.VERSION = 2
_C.MODEL = CN()
_C.MODEL.LOAD_PROPOSALS = False
_C.MODEL.MASK_ON = False
_C.MODEL.KEYPOINT_ON = False
_C.MODEL.DEVICE = "cuda"
_C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN"
# Path (possibly with schema like catalog:// or detectron2://) to a checkpoint file
# to be loaded to the model. You can find available models in the model zoo.
_C.MODEL.WEIGHTS = ""
# Values to be used for image normalization (BGR order, since INPUT.FORMAT defaults to BGR).
# To train on images of different number of channels, just set different mean & std.
# Default values are the mean pixel value from ImageNet: [103.53, 116.28, 123.675]
_C.MODEL.PIXEL_MEAN = [103.530, 116.280, 123.675]
# When using pre-trained models in Detectron1 or any MSRA models,
# std has been absorbed into its conv1 weights, so the std needs to be set 1.
# Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std)
_C.MODEL.PIXEL_STD = [1.0, 1.0, 1.0]
# -----------------------------------------------------------------------------
# INPUT
# -----------------------------------------------------------------------------
_C.INPUT = CN()
# Size of the smallest side of the image during training
_C.INPUT.MIN_SIZE_TRAIN = (800,)
# Sample size of smallest side by choice or random selection from range give by
# INPUT.MIN_SIZE_TRAIN
_C.INPUT.MIN_SIZE_TRAIN_SAMPLING = "choice"
# Maximum size of the side of the image during training
_C.INPUT.MAX_SIZE_TRAIN = 1333
# Size of the smallest side of the image during testing. Set to zero to disable resize in testing.
_C.INPUT.MIN_SIZE_TEST = 800
# Maximum size of the side of the image during testing
_C.INPUT.MAX_SIZE_TEST = 1333
# `True` if cropping is used for data augmentation during training
_C.INPUT.CROP = CN({"ENABLED": False})
# Cropping type:
# - "relative" crop (H * CROP.SIZE[0], W * CROP.SIZE[1]) part of an input of size (H, W)
# - "relative_range" uniformly sample relative crop size from between [CROP.SIZE[0], [CROP.SIZE[1]].
# and [1, 1] and use it as in "relative" scenario.
# - "absolute" crop part of an input with absolute size: (CROP.SIZE[0], CROP.SIZE[1]).
_C.INPUT.CROP.TYPE = "relative_range"
# Size of crop in range (0, 1] if CROP.TYPE is "relative" or "relative_range" and in number of
# pixels if CROP.TYPE is "absolute"
_C.INPUT.CROP.SIZE = [0.9, 0.9]
# Whether the model needs RGB, YUV, HSV etc.
# Should be one of the modes defined here, as we use PIL to read the image:
# https://pillow.readthedocs.io/en/stable/handbook/concepts.html#concept-modes
# with BGR being the one exception. One can set image format to BGR, we will
# internally use RGB for conversion and flip the channels over
_C.INPUT.FORMAT = "BGR"
# The ground truth mask format that the model will use.
# Mask R-CNN supports either "polygon" or "bitmask" as ground truth.
_C.INPUT.MASK_FORMAT = "polygon" # alternative: "bitmask"
# -----------------------------------------------------------------------------
# Dataset
# -----------------------------------------------------------------------------
_C.DATASETS = CN()
# List of the dataset names for training. Must be registered in DatasetCatalog
_C.DATASETS.TRAIN = ()
# List of the pre-computed proposal files for training, which must be consistent
# with datasets listed in DATASETS.TRAIN.
_C.DATASETS.PROPOSAL_FILES_TRAIN = ()
# Number of top scoring precomputed proposals to keep for training
_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN = 2000
# List of the dataset names for testing. Must be registered in DatasetCatalog
_C.DATASETS.TEST = ()
# List of the pre-computed proposal files for test, which must be consistent
# with datasets listed in DATASETS.TEST.
_C.DATASETS.PROPOSAL_FILES_TEST = ()
# Number of top scoring precomputed proposals to keep for test
_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST = 1000
# -----------------------------------------------------------------------------
# DataLoader
# -----------------------------------------------------------------------------
_C.DATALOADER = CN()
# Number of data loading threads
_C.DATALOADER.NUM_WORKERS = 4
# If True, each batch should contain only images for which the aspect ratio
# is compatible. This groups portrait images together, and landscape images
# are not batched with portrait images.
_C.DATALOADER.ASPECT_RATIO_GROUPING = True
# Options: TrainingSampler, RepeatFactorTrainingSampler
_C.DATALOADER.SAMPLER_TRAIN = "TrainingSampler"
# Repeat threshold for RepeatFactorTrainingSampler
_C.DATALOADER.REPEAT_THRESHOLD = 0.0
# if True, the dataloader will filter out images that have no associated
# annotations at train time.
_C.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True
# ---------------------------------------------------------------------------- #
# Backbone options
# ---------------------------------------------------------------------------- #
_C.MODEL.BACKBONE = CN()
_C.MODEL.BACKBONE.NAME = "build_resnet_backbone"
# Freeze the first several stages so they are not trained.
# There are 5 stages in ResNet. The first is a convolution, and the following
# stages are each group of residual blocks.
_C.MODEL.BACKBONE.FREEZE_AT = 2
# ---------------------------------------------------------------------------- #
# FPN options
# ---------------------------------------------------------------------------- #
_C.MODEL.FPN = CN()
# Names of the input feature maps to be used by FPN
# They must have contiguous power of 2 strides
# e.g., ["res2", "res3", "res4", "res5"]
_C.MODEL.FPN.IN_FEATURES = []
_C.MODEL.FPN.OUT_CHANNELS = 256
# Options: "" (no norm), "GN"
_C.MODEL.FPN.NORM = ""
# Types for fusing the FPN top-down and lateral features. Can be either "sum" or "avg"
_C.MODEL.FPN.FUSE_TYPE = "sum"
# ---------------------------------------------------------------------------- #
# Proposal generator options
# ---------------------------------------------------------------------------- #
_C.MODEL.PROPOSAL_GENERATOR = CN()
# Current proposal generators include "RPN", "RRPN" and "PrecomputedProposals"
_C.MODEL.PROPOSAL_GENERATOR.NAME = "RPN"
# Proposal height and width both need to be greater than MIN_SIZE
# (a the scale used during training or inference)
_C.MODEL.PROPOSAL_GENERATOR.MIN_SIZE = 0
# ---------------------------------------------------------------------------- #
# Anchor generator options
# ---------------------------------------------------------------------------- #
_C.MODEL.ANCHOR_GENERATOR = CN()
# The generator can be any name in the ANCHOR_GENERATOR registry
_C.MODEL.ANCHOR_GENERATOR.NAME = "DefaultAnchorGenerator"
# Anchor sizes (i.e. sqrt of area) in absolute pixels w.r.t. the network input.
# Format: list[list[float]]. SIZES[i] specifies the list of sizes
# to use for IN_FEATURES[i]; len(SIZES) == len(IN_FEATURES) must be true,
# or len(SIZES) == 1 is true and size list SIZES[0] is used for all
# IN_FEATURES.
_C.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64, 128, 256, 512]]
# Anchor aspect ratios. For each area given in `SIZES`, anchors with different aspect
# ratios are generated by an anchor generator.
# Format: list[list[float]]. ASPECT_RATIOS[i] specifies the list of aspect ratios (H/W)
# to use for IN_FEATURES[i]; len(ASPECT_RATIOS) == len(IN_FEATURES) must be true,
# or len(ASPECT_RATIOS) == 1 is true and aspect ratio list ASPECT_RATIOS[0] is used
# for all IN_FEATURES.
_C.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]]
# Anchor angles.
# list[list[float]], the angle in degrees, for each input feature map.
# ANGLES[i] specifies the list of angles for IN_FEATURES[i].
_C.MODEL.ANCHOR_GENERATOR.ANGLES = [[-90, 0, 90]]
# Relative offset between the center of the first anchor and the top-left corner of the image
# Value has to be in [0, 1). Recommend to use 0.5, which means half stride.
# The value is not expected to affect model accuracy.
_C.MODEL.ANCHOR_GENERATOR.OFFSET = 0.0
# ---------------------------------------------------------------------------- #
# RPN options
# ---------------------------------------------------------------------------- #
_C.MODEL.RPN = CN()
_C.MODEL.RPN.HEAD_NAME = "StandardRPNHead" # used by RPN_HEAD_REGISTRY
# Names of the input feature maps to be used by RPN
# e.g., ["p2", "p3", "p4", "p5", "p6"] for FPN
_C.MODEL.RPN.IN_FEATURES = ["res4"]
# Remove RPN anchors that go outside the image by BOUNDARY_THRESH pixels
# Set to -1 or a large value, e.g. 100000, to disable pruning anchors
_C.MODEL.RPN.BOUNDARY_THRESH = -1
# IOU overlap ratios [BG_IOU_THRESHOLD, FG_IOU_THRESHOLD]
# Minimum overlap required between an anchor and ground-truth box for the
# (anchor, gt box) pair to be a positive example (IoU >= FG_IOU_THRESHOLD
# ==> positive RPN example: 1)
# Maximum overlap allowed between an anchor and ground-truth box for the
# (anchor, gt box) pair to be a negative examples (IoU < BG_IOU_THRESHOLD
# ==> negative RPN example: 0)
# Anchors with overlap in between (BG_IOU_THRESHOLD <= IoU < FG_IOU_THRESHOLD)
# are ignored (-1)
_C.MODEL.RPN.IOU_THRESHOLDS = [0.3, 0.7]
_C.MODEL.RPN.IOU_LABELS = [0, -1, 1]
# Total number of RPN examples per image
_C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256
# Target fraction of foreground (positive) examples per RPN minibatch
_C.MODEL.RPN.POSITIVE_FRACTION = 0.5
# Weights on (dx, dy, dw, dh) for normalizing RPN anchor regression targets
_C.MODEL.RPN.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1.
_C.MODEL.RPN.SMOOTH_L1_BETA = 0.0
_C.MODEL.RPN.LOSS_WEIGHT = 1.0
# Number of top scoring RPN proposals to keep before applying NMS
# When FPN is used, this is *per FPN level* (not total)
_C.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 12000
_C.MODEL.RPN.PRE_NMS_TOPK_TEST = 6000
# Number of top scoring RPN proposals to keep after applying NMS
# When FPN is used, this limit is applied per level and then again to the union
# of proposals from all levels
# NOTE: When FPN is used, the meaning of this config is different from Detectron1.
# It means per-batch topk in Detectron1, but per-image topk here.
# See "modeling/rpn/rpn_outputs.py" for details.
_C.MODEL.RPN.POST_NMS_TOPK_TRAIN = 2000
_C.MODEL.RPN.POST_NMS_TOPK_TEST = 1000
# NMS threshold used on RPN proposals
_C.MODEL.RPN.NMS_THRESH = 0.7
# ---------------------------------------------------------------------------- #
# ROI HEADS options
# ---------------------------------------------------------------------------- #
_C.MODEL.ROI_HEADS = CN()
_C.MODEL.ROI_HEADS.NAME = "Res5ROIHeads"
# Number of foreground classes
_C.MODEL.ROI_HEADS.NUM_CLASSES = 80
# Names of the input feature maps to be used by ROI heads
# Currently all heads (box, mask, ...) use the same input feature map list
# e.g., ["p2", "p3", "p4", "p5"] is commonly used for FPN
_C.MODEL.ROI_HEADS.IN_FEATURES = ["res4"]
# IOU overlap ratios [IOU_THRESHOLD]
# Overlap threshold for an RoI to be considered background (if < IOU_THRESHOLD)
# Overlap threshold for an RoI to be considered foreground (if >= IOU_THRESHOLD)
_C.MODEL.ROI_HEADS.IOU_THRESHOLDS = [0.5]
_C.MODEL.ROI_HEADS.IOU_LABELS = [0, 1]
# RoI minibatch size *per image* (number of regions of interest [ROIs])
# Total number of RoIs per training minibatch =
# ROI_HEADS.BATCH_SIZE_PER_IMAGE * SOLVER.IMS_PER_BATCH
# E.g., a common configuration is: 512 * 16 = 8192
_C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
# Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0)
_C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25
# Only used on test mode
# Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to
# balance obtaining high recall with not having too many low precision
# detections that will slow down inference post processing steps (like NMS)
# A default threshold of 0.0 increases AP by ~0.2-0.3 but significantly slows down
# inference.
_C.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05
# Overlap threshold used for non-maximum suppression (suppress boxes with
# IoU >= this threshold)
_C.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5
# If True, augment proposals with ground-truth boxes before sampling proposals to
# train ROI heads.
_C.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT = True
# ---------------------------------------------------------------------------- #
# Box Head
# ---------------------------------------------------------------------------- #
_C.MODEL.ROI_BOX_HEAD = CN()
# C4 don't use head name option
# Options for non-C4 models: FastRCNNConvFCHead,
_C.MODEL.ROI_BOX_HEAD.NAME = ""
# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets
# These are empirically chosen to approximately lead to unit variance targets
_C.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0)
# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1.
_C.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA = 0.0
_C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14
_C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0
# Type of pooling operation applied to the incoming feature map for each RoI
_C.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2"
_C.MODEL.ROI_BOX_HEAD.NUM_FC = 0
# Hidden layer dimension for FC layers in the RoI box head
_C.MODEL.ROI_BOX_HEAD.FC_DIM = 1024
_C.MODEL.ROI_BOX_HEAD.NUM_CONV = 0
# Channel dimension for Conv layers in the RoI box head
_C.MODEL.ROI_BOX_HEAD.CONV_DIM = 256
# Normalization method for the convolution layers.
# Options: "" (no norm), "GN", "SyncBN".
_C.MODEL.ROI_BOX_HEAD.NORM = ""
# Whether to use class agnostic for bbox regression
_C.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG = False
# If true, RoI heads use bounding boxes predicted by the box head rather than proposal boxes.
_C.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES = False
# ---------------------------------------------------------------------------- #
# Cascaded Box Head
# ---------------------------------------------------------------------------- #
_C.MODEL.ROI_BOX_CASCADE_HEAD = CN()
# The number of cascade stages is implicitly defined by the length of the following two configs.
_C.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS = (
(10.0, 10.0, 5.0, 5.0),
(20.0, 20.0, 10.0, 10.0),
(30.0, 30.0, 15.0, 15.0),
)
_C.MODEL.ROI_BOX_CASCADE_HEAD.IOUS = (0.5, 0.6, 0.7)
# ---------------------------------------------------------------------------- #
# Mask Head
# ---------------------------------------------------------------------------- #
_C.MODEL.ROI_MASK_HEAD = CN()
_C.MODEL.ROI_MASK_HEAD.NAME = "MaskRCNNConvUpsampleHead"
_C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14
_C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0
_C.MODEL.ROI_MASK_HEAD.NUM_CONV = 0 # The number of convs in the mask head
_C.MODEL.ROI_MASK_HEAD.CONV_DIM = 256
# Normalization method for the convolution layers.
# Options: "" (no norm), "GN", "SyncBN".
_C.MODEL.ROI_MASK_HEAD.NORM = ""
# Whether to use class agnostic for mask prediction
_C.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK = False
# Type of pooling operation applied to the incoming feature map for each RoI
_C.MODEL.ROI_MASK_HEAD.POOLER_TYPE = "ROIAlignV2"
# ---------------------------------------------------------------------------- #
# Keypoint Head
# ---------------------------------------------------------------------------- #
_C.MODEL.ROI_KEYPOINT_HEAD = CN()
_C.MODEL.ROI_KEYPOINT_HEAD.NAME = "KRCNNConvDeconvUpsampleHead"
_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION = 14
_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO = 0
_C.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS = tuple(512 for _ in range(8))
_C.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 17 # 17 is the number of keypoints in COCO.
# Images with too few (or no) keypoints are excluded from training.
_C.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE = 1
# Normalize by the total number of visible keypoints in the minibatch if True.
# Otherwise, normalize by the total number of keypoints that could ever exist
# in the minibatch.
# The keypoint softmax loss is only calculated on visible keypoints.
# Since the number of visible keypoints can vary significantly between
# minibatches, this has the effect of up-weighting the importance of
# minibatches with few visible keypoints. (Imagine the extreme case of
# only one visible keypoint versus N: in the case of N, each one
# contributes 1/N to the gradient compared to the single keypoint
# determining the gradient direction). Instead, we can normalize the
# loss by the total number of keypoints, if it were the case that all
# keypoints were visible in a full minibatch. (Returning to the example,
# this means that the one visible keypoint contributes as much as each
# of the N keypoints.)
_C.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS = True
# Multi-task loss weight to use for keypoints
# Recommended values:
# - use 1.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is True
# - use 4.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is False
_C.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT = 1.0
# Type of pooling operation applied to the incoming feature map for each RoI
_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE = "ROIAlignV2"
# ---------------------------------------------------------------------------- #
# Semantic Segmentation Head
# ---------------------------------------------------------------------------- #
_C.MODEL.SEM_SEG_HEAD = CN()
_C.MODEL.SEM_SEG_HEAD.NAME = "SemSegFPNHead"
_C.MODEL.SEM_SEG_HEAD.IN_FEATURES = ["p2", "p3", "p4", "p5"]
# Label in the semantic segmentation ground truth that is ignored, i.e., no loss is calculated for
# the correposnding pixel.
_C.MODEL.SEM_SEG_HEAD.IGNORE_VALUE = 255
# Number of classes in the semantic segmentation head
_C.MODEL.SEM_SEG_HEAD.NUM_CLASSES = 54
# Number of channels in the 3x3 convs inside semantic-FPN heads.
_C.MODEL.SEM_SEG_HEAD.CONVS_DIM = 128
# Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride.
_C.MODEL.SEM_SEG_HEAD.COMMON_STRIDE = 4
# Normalization method for the convolution layers. Options: "" (no norm), "GN".
_C.MODEL.SEM_SEG_HEAD.NORM = "GN"
_C.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT = 1.0
_C.MODEL.PANOPTIC_FPN = CN()
# Scaling of all losses from instance detection / segmentation head.
_C.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT = 1.0
# options when combining instance & semantic segmentation outputs
_C.MODEL.PANOPTIC_FPN.COMBINE = CN({"ENABLED": True})
_C.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH = 0.5
_C.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT = 4096
_C.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = 0.5
# ---------------------------------------------------------------------------- #
# RetinaNet Head
# ---------------------------------------------------------------------------- #
_C.MODEL.RETINANET = CN()
# This is the number of foreground classes.
_C.MODEL.RETINANET.NUM_CLASSES = 80
_C.MODEL.RETINANET.IN_FEATURES = ["p3", "p4", "p5", "p6", "p7"]
# Convolutions to use in the cls and bbox tower
# NOTE: this doesn't include the last conv for logits
_C.MODEL.RETINANET.NUM_CONVS = 4
# IoU overlap ratio [bg, fg] for labeling anchors.
# Anchors with < bg are labeled negative (0)
# Anchors with >= bg and < fg are ignored (-1)
# Anchors with >= fg are labeled positive (1)
_C.MODEL.RETINANET.IOU_THRESHOLDS = [0.4, 0.5]
_C.MODEL.RETINANET.IOU_LABELS = [0, -1, 1]
# Prior prob for rare case (i.e. foreground) at the beginning of training.
# This is used to set the bias for the logits layer of the classifier subnet.
# This improves training stability in the case of heavy class imbalance.
_C.MODEL.RETINANET.PRIOR_PROB = 0.01
# Inference cls score threshold, only anchors with score > INFERENCE_TH are
# considered for inference (to improve speed)
_C.MODEL.RETINANET.SCORE_THRESH_TEST = 0.05
_C.MODEL.RETINANET.TOPK_CANDIDATES_TEST = 1000
_C.MODEL.RETINANET.NMS_THRESH_TEST = 0.5
# Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets
_C.MODEL.RETINANET.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
# Loss parameters
_C.MODEL.RETINANET.FOCAL_LOSS_GAMMA = 2.0
_C.MODEL.RETINANET.FOCAL_LOSS_ALPHA = 0.25
_C.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA = 0.1
# ---------------------------------------------------------------------------- #
# ResNe[X]t options (ResNets = {ResNet, ResNeXt}
# Note that parts of a resnet may be used for both the backbone and the head
# These options apply to both
# ---------------------------------------------------------------------------- #
_C.MODEL.RESNETS = CN()
_C.MODEL.RESNETS.DEPTH = 50
_C.MODEL.RESNETS.OUT_FEATURES = ["res4"] # res4 for C4 backbone, res2..5 for FPN backbone
# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
_C.MODEL.RESNETS.NUM_GROUPS = 1
# Options: FrozenBN, GN, "SyncBN", "BN"
_C.MODEL.RESNETS.NORM = "FrozenBN"
# Baseline width of each group.
# Scaling this parameters will scale the width of all bottleneck layers.
_C.MODEL.RESNETS.WIDTH_PER_GROUP = 64
# Place the stride 2 conv on the 1x1 filter
# Use True only for the original MSRA ResNet; use False for C2 and Torch models
_C.MODEL.RESNETS.STRIDE_IN_1X1 = True
# Apply dilation in stage "res5"
_C.MODEL.RESNETS.RES5_DILATION = 1
# Output width of res2. Scaling this parameters will scale the width of all 1x1 convs in ResNet
# For R18 and R34, this needs to be set to 64
_C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256
_C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64
# Apply Deformable Convolution in stages
# Specify if apply deform_conv on Res2, Res3, Res4, Res5
_C.MODEL.RESNETS.DEFORM_ON_PER_STAGE = [False, False, False, False]
# Use True to use modulated deform_conv (DeformableV2, https://arxiv.org/abs/1811.11168);
# Use False for DeformableV1.
_C.MODEL.RESNETS.DEFORM_MODULATED = False
# Number of groups in deformable conv.
_C.MODEL.RESNETS.DEFORM_NUM_GROUPS = 1
# ---------------------------------------------------------------------------- #
# Solver
# ---------------------------------------------------------------------------- #
_C.SOLVER = CN()
# See detectron2/solver/build.py for LR scheduler options
_C.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR"
_C.SOLVER.MAX_ITER = 40000
_C.SOLVER.BASE_LR = 0.001
_C.SOLVER.MOMENTUM = 0.9
_C.SOLVER.NESTEROV = False
_C.SOLVER.WEIGHT_DECAY = 0.0001
# The weight decay that's applied to parameters of normalization layers
# (typically the affine transformation)
_C.SOLVER.WEIGHT_DECAY_NORM = 0.0
_C.SOLVER.GAMMA = 0.1
# The iteration number to decrease learning rate by GAMMA.
_C.SOLVER.STEPS = (30000,)
_C.SOLVER.WARMUP_FACTOR = 1.0 / 1000
_C.SOLVER.WARMUP_ITERS = 1000
_C.SOLVER.WARMUP_METHOD = "linear"
# Save a checkpoint after every this number of iterations
_C.SOLVER.CHECKPOINT_PERIOD = 5000
# Number of images per batch across all machines.
# If we have 16 GPUs and IMS_PER_BATCH = 32,
# each GPU will see 2 images per batch.
_C.SOLVER.IMS_PER_BATCH = 16
# Detectron v1 (and previous detection code) used a 2x higher LR and 0 WD for
# biases. This is not useful (at least for recent models). You should avoid
# changing these and they exist only to reproduce Detectron v1 training if
# desired.
_C.SOLVER.BIAS_LR_FACTOR = 1.0
_C.SOLVER.WEIGHT_DECAY_BIAS = _C.SOLVER.WEIGHT_DECAY
# Gradient clipping
_C.SOLVER.CLIP_GRADIENTS = CN({"ENABLED": False})
# Type of gradient clipping, currently 2 values are supported:
# - "value": the absolute values of elements of each gradients are clipped
# - "norm": the norm of the gradient for each parameter is clipped thus
# affecting all elements in the parameter
_C.SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "value"
# Maximum absolute value used for clipping gradients
_C.SOLVER.CLIP_GRADIENTS.CLIP_VALUE = 1.0
# Floating point number p for L-p norm to be used with the "norm"
# gradient clipping type; for L-inf, please specify .inf
_C.SOLVER.CLIP_GRADIENTS.NORM_TYPE = 2.0
# ---------------------------------------------------------------------------- #
# Specific test options
# ---------------------------------------------------------------------------- #
_C.TEST = CN()
# For end-to-end tests to verify the expected accuracy.
# Each item is [task, metric, value, tolerance]
# e.g.: [['bbox', 'AP', 38.5, 0.2]]
_C.TEST.EXPECTED_RESULTS = []
# The period (in terms of steps) to evaluate the model during training.
# Set to 0 to disable.
_C.TEST.EVAL_PERIOD = 0
# The sigmas used to calculate keypoint OKS. See http://cocodataset.org/#keypoints-eval
# When empty it will use the defaults in COCO.
# Otherwise it should have the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS.
_C.TEST.KEYPOINT_OKS_SIGMAS = []
# Maximum number of detections to return per image during inference (100 is
# based on the limit established for the COCO dataset).
_C.TEST.DETECTIONS_PER_IMAGE = 100
_C.TEST.AUG = CN({"ENABLED": False})
_C.TEST.AUG.MIN_SIZES = (400, 500, 600, 700, 800, 900, 1000, 1100, 1200)
_C.TEST.AUG.MAX_SIZE = 4000
_C.TEST.AUG.FLIP = True
_C.TEST.PRECISE_BN = CN({"ENABLED": False})
_C.TEST.PRECISE_BN.NUM_ITER = 200
# ---------------------------------------------------------------------------- #
# Misc options
# ---------------------------------------------------------------------------- #
# Directory where output files are written
_C.OUTPUT_DIR = "./output"
# Set seed to negative to fully randomize everything.
# Set seed to positive to use a fixed seed. Note that a fixed seed increases
# reproducibility but does not guarantee fully deterministic behavior.
# Disabling all parallelism further increases reproducibility.
_C.SEED = -1
# Benchmark different cudnn algorithms.
# If input images have very different sizes, this option will have large overhead
# for about 10k iterations. It usually hurts total time, but can benefit for certain models.
# If input images have the same or similar sizes, benchmark is often helpful.
_C.CUDNN_BENCHMARK = False
# The period (in terms of steps) for minibatch visualization at train time.
# Set to 0 to disable.
_C.VIS_PERIOD = 0
# global config is for quick hack purposes.
# You can set them in command line or config files,
# and access it with:
#
# from detectron2.config import global_cfg
# print(global_cfg.HACK)
#
# Do not commit any configs into it.
_C.GLOBAL = CN()
_C.GLOBAL.HACK = 1.0
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from . import transforms # isort:skip
from .build import (
build_detection_test_loader,
build_detection_train_loader,
get_detection_dataset_dicts,
load_proposals_into_dataset,
print_instances_class_histogram,
)
from .catalog import DatasetCatalog, MetadataCatalog
from .common import DatasetFromList, MapDataset
from .dataset_mapper import DatasetMapper
# ensure the builtin datasets are registered
from . import datasets, samplers # isort:skip
__all__ = [k for k in globals().keys() if not k.startswith("_")]
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import bisect
import copy
import itertools
import logging
import numpy as np
import operator
import pickle
import torch.utils.data
from fvcore.common.file_io import PathManager
from tabulate import tabulate
from termcolor import colored
from detectron2.structures import BoxMode
from detectron2.utils.comm import get_world_size
from detectron2.utils.env import seed_all_rng
from detectron2.utils.logger import log_first_n
from . import samplers
from .catalog import DatasetCatalog, MetadataCatalog
from .common import AspectRatioGroupedDataset, DatasetFromList, MapDataset
from .dataset_mapper import DatasetMapper
from .detection_utils import check_metadata_consistency
"""
This file contains the default logic to build a dataloader for training or testing.
"""
__all__ = [
"build_detection_train_loader",
"build_detection_test_loader",
"get_detection_dataset_dicts",
"load_proposals_into_dataset",
"print_instances_class_histogram",
]
def filter_images_with_only_crowd_annotations(dataset_dicts):
"""
Filter out images with none annotations or only crowd annotations
(i.e., images without non-crowd annotations).
A common training-time preprocessing on COCO dataset.
Args:
dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
Returns:
list[dict]: the same format, but filtered.
"""
num_before = len(dataset_dicts)
def valid(anns):
for ann in anns:
if ann.get("iscrowd", 0) == 0:
return True
return False
dataset_dicts = [x for x in dataset_dicts if valid(x["annotations"])]
num_after = len(dataset_dicts)
logger = logging.getLogger(__name__)
logger.info(
"Removed {} images with no usable annotations. {} images left.".format(
num_before - num_after, num_after
)
)
return dataset_dicts
def filter_images_with_few_keypoints(dataset_dicts, min_keypoints_per_image):
"""
Filter out images with too few number of keypoints.
Args:
dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
Returns:
list[dict]: the same format as dataset_dicts, but filtered.
"""
num_before = len(dataset_dicts)
def visible_keypoints_in_image(dic):
# Each keypoints field has the format [x1, y1, v1, ...], where v is visibility
annotations = dic["annotations"]
return sum(
(np.array(ann["keypoints"][2::3]) > 0).sum()
for ann in annotations
if "keypoints" in ann
)
dataset_dicts = [
x for x in dataset_dicts if visible_keypoints_in_image(x) >= min_keypoints_per_image
]
num_after = len(dataset_dicts)
logger = logging.getLogger(__name__)
logger.info(
"Removed {} images with fewer than {} keypoints.".format(
num_before - num_after, min_keypoints_per_image
)
)
return dataset_dicts
def load_proposals_into_dataset(dataset_dicts, proposal_file):
"""
Load precomputed object proposals into the dataset.
The proposal file should be a pickled dict with the following keys:
- "ids": list[int] or list[str], the image ids
- "boxes": list[np.ndarray], each is an Nx4 array of boxes corresponding to the image id
- "objectness_logits": list[np.ndarray], each is an N sized array of objectness scores
corresponding to the boxes.
- "bbox_mode": the BoxMode of the boxes array. Defaults to ``BoxMode.XYXY_ABS``.
Args:
dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
proposal_file (str): file path of pre-computed proposals, in pkl format.
Returns:
list[dict]: the same format as dataset_dicts, but added proposal field.
"""
logger = logging.getLogger(__name__)
logger.info("Loading proposals from: {}".format(proposal_file))
with PathManager.open(proposal_file, "rb") as f:
proposals = pickle.load(f, encoding="latin1")
# Rename the key names in D1 proposal files
rename_keys = {"indexes": "ids", "scores": "objectness_logits"}
for key in rename_keys:
if key in proposals:
proposals[rename_keys[key]] = proposals.pop(key)
# Fetch the indexes of all proposals that are in the dataset
# Convert image_id to str since they could be int.
img_ids = set({str(record["image_id"]) for record in dataset_dicts})
id_to_index = {str(id): i for i, id in enumerate(proposals["ids"]) if str(id) in img_ids}
# Assuming default bbox_mode of precomputed proposals are 'XYXY_ABS'
bbox_mode = BoxMode(proposals["bbox_mode"]) if "bbox_mode" in proposals else BoxMode.XYXY_ABS
for record in dataset_dicts:
# Get the index of the proposal
i = id_to_index[str(record["image_id"])]
boxes = proposals["boxes"][i]
objectness_logits = proposals["objectness_logits"][i]
# Sort the proposals in descending order of the scores
inds = objectness_logits.argsort()[::-1]
record["proposal_boxes"] = boxes[inds]
record["proposal_objectness_logits"] = objectness_logits[inds]
record["proposal_bbox_mode"] = bbox_mode
return dataset_dicts
def _quantize(x, bin_edges):
bin_edges = copy.copy(bin_edges)
bin_edges = sorted(bin_edges)
quantized = list(map(lambda y: bisect.bisect_right(bin_edges, y), x))
return quantized
def print_instances_class_histogram(dataset_dicts, class_names):
"""
Args:
dataset_dicts (list[dict]): list of dataset dicts.
class_names (list[str]): list of class names (zero-indexed).
"""
num_classes = len(class_names)
hist_bins = np.arange(num_classes + 1)
histogram = np.zeros((num_classes,), dtype=np.int)
for entry in dataset_dicts:
annos = entry["annotations"]
classes = [x["category_id"] for x in annos if not x.get("iscrowd", 0)]
histogram += np.histogram(classes, bins=hist_bins)[0]
N_COLS = min(6, len(class_names) * 2)
def short_name(x):
# make long class names shorter. useful for lvis
if len(x) > 13:
return x[:11] + ".."
return x
data = list(
itertools.chain(*[[short_name(class_names[i]), int(v)] for i, v in enumerate(histogram)])
)
total_num_instances = sum(data[1::2])
data.extend([None] * (N_COLS - (len(data) % N_COLS)))
if num_classes > 1:
data.extend(["total", total_num_instances])
data = itertools.zip_longest(*[data[i::N_COLS] for i in range(N_COLS)])
table = tabulate(
data,
headers=["category", "#instances"] * (N_COLS // 2),
tablefmt="pipe",
numalign="left",
stralign="center",
)
log_first_n(
logging.INFO,
"Distribution of instances among all {} categories:\n".format(num_classes)
+ colored(table, "cyan"),
key="message",
)
def get_detection_dataset_dicts(
dataset_names, filter_empty=True, min_keypoints=0, proposal_files=None
):
"""
Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation.
Args:
dataset_names (list[str]): a list of dataset names
filter_empty (bool): whether to filter out images without instance annotations
min_keypoints (int): filter out images with fewer keypoints than
`min_keypoints`. Set to 0 to do nothing.
proposal_files (list[str]): if given, a list of object proposal files
that match each dataset in `dataset_names`.
"""
assert len(dataset_names)
dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in dataset_names]
for dataset_name, dicts in zip(dataset_names, dataset_dicts):
assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)
if proposal_files is not None:
assert len(dataset_names) == len(proposal_files)
# load precomputed proposals from proposal files
dataset_dicts = [
load_proposals_into_dataset(dataset_i_dicts, proposal_file)
for dataset_i_dicts, proposal_file in zip(dataset_dicts, proposal_files)
]
dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))
has_instances = "annotations" in dataset_dicts[0]
# Keep images without instance-level GT if the dataset has semantic labels.
if filter_empty and has_instances and "sem_seg_file_name" not in dataset_dicts[0]:
dataset_dicts = filter_images_with_only_crowd_annotations(dataset_dicts)
if min_keypoints > 0 and has_instances:
dataset_dicts = filter_images_with_few_keypoints(dataset_dicts, min_keypoints)
if has_instances:
try:
class_names = MetadataCatalog.get(dataset_names[0]).thing_classes
check_metadata_consistency("thing_classes", dataset_names)
print_instances_class_histogram(dataset_dicts, class_names)
except AttributeError: # class names are not available for this dataset
pass
return dataset_dicts
def build_detection_train_loader(cfg, mapper=None):
"""
A data loader is created by the following steps:
1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts.
2. Coordinate a random shuffle order shared among all processes (all GPUs)
3. Each process spawn another few workers to process the dicts. Each worker will:
* Map each metadata dict into another format to be consumed by the model.
* Batch them by simply putting dicts into a list.
The batched ``list[mapped_dict]`` is what this dataloader will yield.
Args:
cfg (CfgNode): the config
mapper (callable): a callable which takes a sample (dict) from dataset and
returns the format to be consumed by the model.
By default it will be `DatasetMapper(cfg, True)`.
Returns:
an infinite iterator of training data
"""
num_workers = get_world_size()
images_per_batch = cfg.SOLVER.IMS_PER_BATCH
assert (
images_per_batch % num_workers == 0
), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
images_per_batch, num_workers
)
assert (
images_per_batch >= num_workers
), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
images_per_batch, num_workers
)
images_per_worker = images_per_batch // num_workers
dataset_dicts = get_detection_dataset_dicts(
cfg.DATASETS.TRAIN,
filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
if cfg.MODEL.KEYPOINT_ON
else 0,
proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
)
dataset = DatasetFromList(dataset_dicts, copy=False)
if mapper is None:
mapper = DatasetMapper(cfg, True)
dataset = MapDataset(dataset, mapper)
sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
logger = logging.getLogger(__name__)
logger.info("Using training sampler {}".format(sampler_name))
if sampler_name == "TrainingSampler":
sampler = samplers.TrainingSampler(len(dataset))
elif sampler_name == "RepeatFactorTrainingSampler":
sampler = samplers.RepeatFactorTrainingSampler(
dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD
)
else:
raise ValueError("Unknown training sampler: {}".format(sampler_name))
if cfg.DATALOADER.ASPECT_RATIO_GROUPING:
data_loader = torch.utils.data.DataLoader(
dataset,
sampler=sampler,
num_workers=cfg.DATALOADER.NUM_WORKERS,
batch_sampler=None,
collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements
worker_init_fn=worker_init_reset_seed,
) # yield individual mapped dict
data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker)
else:
batch_sampler = torch.utils.data.sampler.BatchSampler(
sampler, images_per_worker, drop_last=True
)
# drop_last so the batch always have the same size
data_loader = torch.utils.data.DataLoader(
dataset,
num_workers=cfg.DATALOADER.NUM_WORKERS,
batch_sampler=batch_sampler,
collate_fn=trivial_batch_collator,
worker_init_fn=worker_init_reset_seed,
)
return data_loader
def build_detection_test_loader(cfg, dataset_name, mapper=None):
"""
Similar to `build_detection_train_loader`.
But this function uses the given `dataset_name` argument (instead of the names in cfg),
and uses batch size 1.
Args:
cfg: a detectron2 CfgNode
dataset_name (str): a name of the dataset that's available in the DatasetCatalog
mapper (callable): a callable which takes a sample (dict) from dataset
and returns the format to be consumed by the model.
By default it will be `DatasetMapper(cfg, False)`.
Returns:
DataLoader: a torch DataLoader, that loads the given detection
dataset, with test-time transformation and batching.
"""
dataset_dicts = get_detection_dataset_dicts(
[dataset_name],
filter_empty=False,
proposal_files=[
cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)]
]
if cfg.MODEL.LOAD_PROPOSALS
else None,
)
dataset = DatasetFromList(dataset_dicts)
if mapper is None:
mapper = DatasetMapper(cfg, False)
dataset = MapDataset(dataset, mapper)
sampler = samplers.InferenceSampler(len(dataset))
# Always use 1 image per worker during inference since this is the
# standard when reporting inference time in papers.
batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)
data_loader = torch.utils.data.DataLoader(
dataset,
num_workers=cfg.DATALOADER.NUM_WORKERS,
batch_sampler=batch_sampler,
collate_fn=trivial_batch_collator,
)
return data_loader
def trivial_batch_collator(batch):
"""
A batch collator that does nothing.
"""
return batch
def worker_init_reset_seed(worker_id):
seed_all_rng(np.random.randint(2 ** 31) + worker_id)
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import copy
import logging
import types
from typing import List
from detectron2.utils.logger import log_first_n
__all__ = ["DatasetCatalog", "MetadataCatalog"]
class DatasetCatalog(object):
"""
A catalog that stores information about the datasets and how to obtain them.
It contains a mapping from strings
(which are names that identify a dataset, e.g. "coco_2014_train")
to a function which parses the dataset and returns the samples in the
format of `list[dict]`.
The returned dicts should be in Detectron2 Dataset format (See DATASETS.md for details)
if used with the data loader functionalities in `data/build.py,data/detection_transform.py`.
The purpose of having this catalog is to make it easy to choose
different datasets, by just using the strings in the config.
"""
_REGISTERED = {}
@staticmethod
def register(name, func):
"""
Args:
name (str): the name that identifies a dataset, e.g. "coco_2014_train".
func (callable): a callable which takes no arguments and returns a list of dicts.
"""
assert callable(func), "You must register a function with `DatasetCatalog.register`!"
assert name not in DatasetCatalog._REGISTERED, "Dataset '{}' is already registered!".format(
name
)
DatasetCatalog._REGISTERED[name] = func
@staticmethod
def get(name):
"""
Call the registered function and return its results.
Args:
name (str): the name that identifies a dataset, e.g. "coco_2014_train".
Returns:
list[dict]: dataset annotations.0
"""
try:
f = DatasetCatalog._REGISTERED[name]
except KeyError:
raise KeyError(
"Dataset '{}' is not registered! Available datasets are: {}".format(
name, ", ".join(DatasetCatalog._REGISTERED.keys())
)
)
return f()
@staticmethod
def list() -> List[str]:
"""
List all registered datasets.
Returns:
list[str]
"""
return list(DatasetCatalog._REGISTERED.keys())
@staticmethod
def clear():
"""
Remove all registered dataset.
"""
DatasetCatalog._REGISTERED.clear()
class Metadata(types.SimpleNamespace):
"""
A class that supports simple attribute setter/getter.
It is intended for storing metadata of a dataset and make it accessible globally.
Examples:
.. code-block:: python
# somewhere when you load the data:
MetadataCatalog.get("mydataset").thing_classes = ["person", "dog"]
# somewhere when you print statistics or visualize:
classes = MetadataCatalog.get("mydataset").thing_classes
"""
# the name of the dataset
# set default to N/A so that `self.name` in the errors will not trigger getattr again
name: str = "N/A"
_RENAMED = {
"class_names": "thing_classes",
"dataset_id_to_contiguous_id": "thing_dataset_id_to_contiguous_id",
"stuff_class_names": "stuff_classes",
}
def __getattr__(self, key):
if key in self._RENAMED:
log_first_n(
logging.WARNING,
"Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]),
n=10,
)
return getattr(self, self._RENAMED[key])
raise AttributeError(
"Attribute '{}' does not exist in the metadata of '{}'. Available keys are {}.".format(
key, self.name, str(self.__dict__.keys())
)
)
def __setattr__(self, key, val):
if key in self._RENAMED:
log_first_n(
logging.WARNING,
"Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]),
n=10,
)
setattr(self, self._RENAMED[key], val)
# Ensure that metadata of the same name stays consistent
try:
oldval = getattr(self, key)
assert oldval == val, (
"Attribute '{}' in the metadata of '{}' cannot be set "
"to a different value!\n{} != {}".format(key, self.name, oldval, val)
)
except AttributeError:
super().__setattr__(key, val)
def as_dict(self):
"""
Returns all the metadata as a dict.
Note that modifications to the returned dict will not reflect on the Metadata object.
"""
return copy.copy(self.__dict__)
def set(self, **kwargs):
"""
Set multiple metadata with kwargs.
"""
for k, v in kwargs.items():
setattr(self, k, v)
return self
def get(self, key, default=None):
"""
Access an attribute and return its value if exists.
Otherwise return default.
"""
try:
return getattr(self, key)
except AttributeError:
return default
class MetadataCatalog:
"""
MetadataCatalog provides access to "Metadata" of a given dataset.
The metadata associated with a certain name is a singleton: once created,
the metadata will stay alive and will be returned by future calls to `get(name)`.
It's like global variables, so don't abuse it.
It's meant for storing knowledge that's constant and shared across the execution
of the program, e.g.: the class names in COCO.
"""
_NAME_TO_META = {}
@staticmethod
def get(name):
"""
Args:
name (str): name of a dataset (e.g. coco_2014_train).
Returns:
Metadata: The :class:`Metadata` instance associated with this name,
or create an empty one if none is available.
"""
assert len(name)
if name in MetadataCatalog._NAME_TO_META:
ret = MetadataCatalog._NAME_TO_META[name]
# TODO this is for the BC breaking change in D15247032.
# Remove this in the future.
if hasattr(ret, "dataset_name"):
logger = logging.getLogger()
logger.warning(
"""
The 'dataset_name' key in metadata is no longer used for
sharing metadata among splits after D15247032! Add
metadata to each split (now called dataset) separately!
"""
)
parent_meta = MetadataCatalog.get(ret.dataset_name).as_dict()
ret.set(**parent_meta)
return ret
else:
m = MetadataCatalog._NAME_TO_META[name] = Metadata(name=name)
return m
@staticmethod
def list():
"""
List all registered metadata.
Returns:
list[str]: keys (names of datasets) of all registered metadata
"""
return list(MetadataCatalog._NAME_TO_META.keys())
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import copy
import logging
import numpy as np
import pickle
import random
import torch.utils.data as data
from detectron2.utils.serialize import PicklableWrapper
__all__ = ["MapDataset", "DatasetFromList", "AspectRatioGroupedDataset"]
class MapDataset(data.Dataset):
"""
Map a function over the elements in a dataset.
Args:
dataset: a dataset where map function is applied.
map_func: a callable which maps the element in dataset. map_func is
responsible for error handling, when error happens, it needs to
return None so the MapDataset will randomly use other
elements from the dataset.
"""
def __init__(self, dataset, map_func):
self._dataset = dataset
self._map_func = PicklableWrapper(map_func) # wrap so that a lambda will work
self._rng = random.Random(42)
self._fallback_candidates = set(range(len(dataset)))
def __len__(self):
return len(self._dataset)
def __getitem__(self, idx):
retry_count = 0
cur_idx = int(idx)
while True:
data = self._map_func(self._dataset[cur_idx])
if data is not None:
self._fallback_candidates.add(cur_idx)
return data
# _map_func fails for this idx, use a random new index from the pool
retry_count += 1
self._fallback_candidates.discard(cur_idx)
cur_idx = self._rng.sample(self._fallback_candidates, k=1)[0]
if retry_count >= 3:
logger = logging.getLogger(__name__)
logger.warning(
"Failed to apply `_map_func` for idx: {}, retry count: {}".format(
idx, retry_count
)
)
class DatasetFromList(data.Dataset):
"""
Wrap a list to a torch Dataset. It produces elements of the list as data.
"""
def __init__(self, lst: list, copy: bool = True, serialize: bool = True):
"""
Args:
lst (list): a list which contains elements to produce.
copy (bool): whether to deepcopy the element when producing it,
so that the result can be modified in place without affecting the
source in the list.
serialize (bool): whether to hold memory using serialized objects, when
enabled, data loader workers can use shared RAM from master
process instead of making a copy.
"""
self._lst = lst
self._copy = copy
self._serialize = serialize
def _serialize(data):
buffer = pickle.dumps(data, protocol=-1)
return np.frombuffer(buffer, dtype=np.uint8)
if self._serialize:
logger = logging.getLogger(__name__)
logger.info(
"Serializing {} elements to byte tensors and concatenating them all ...".format(
len(self._lst)
)
)
self._lst = [_serialize(x) for x in self._lst]
self._addr = np.asarray([len(x) for x in self._lst], dtype=np.int64)
self._addr = np.cumsum(self._addr)
self._lst = np.concatenate(self._lst)
logger.info("Serialized dataset takes {:.2f} MiB".format(len(self._lst) / 1024 ** 2))
def __len__(self):
if self._serialize:
return len(self._addr)
else:
return len(self._lst)
def __getitem__(self, idx):
if self._serialize:
start_addr = 0 if idx == 0 else self._addr[idx - 1].item()
end_addr = self._addr[idx].item()
bytes = memoryview(self._lst[start_addr:end_addr])
return pickle.loads(bytes)
elif self._copy:
return copy.deepcopy(self._lst[idx])
else:
return self._lst[idx]
class AspectRatioGroupedDataset(data.IterableDataset):
"""
Batch data that have similar aspect ratio together.
In this implementation, images whose aspect ratio < (or >) 1 will
be batched together.
This improves training speed because the images then need less padding
to form a batch.
It assumes the underlying dataset produces dicts with "width" and "height" keys.
It will then produce a list of original dicts with length = batch_size,
all with similar aspect ratios.
"""
def __init__(self, dataset, batch_size):
"""
Args:
dataset: an iterable. Each element must be a dict with keys
"width" and "height", which will be used to batch data.
batch_size (int):
"""
self.dataset = dataset
self.batch_size = batch_size
self._buckets = [[] for _ in range(2)]
# Hard-coded two aspect ratio groups: w > h and w < h.
# Can add support for more aspect ratio groups, but doesn't seem useful
def __iter__(self):
for d in self.dataset:
w, h = d["width"], d["height"]
bucket_id = 0 if w > h else 1
bucket = self._buckets[bucket_id]
bucket.append(d)
if len(bucket) == self.batch_size:
yield bucket[:]
del bucket[:]
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import copy
import logging
import numpy as np
import torch
from fvcore.common.file_io import PathManager
from PIL import Image
from . import detection_utils as utils
from . import transforms as T
"""
This file contains the default mapping that's applied to "dataset dicts".
"""
__all__ = ["DatasetMapper"]
class DatasetMapper:
"""
A callable which takes a dataset dict in Detectron2 Dataset format,
and map it into a format used by the model.
This is the default callable to be used to map your dataset dict into training data.
You may need to follow it to implement your own one for customized logic,
such as a different way to read or transform images.
See :doc:`/tutorials/data_loading` for details.
The callable currently does the following:
1. Read the image from "file_name"
2. Applies cropping/geometric transforms to the image and annotations
3. Prepare data and annotations to Tensor and :class:`Instances`
"""
def __init__(self, cfg, is_train=True):
if cfg.INPUT.CROP.ENABLED and is_train:
self.crop_gen = T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)
logging.getLogger(__name__).info("CropGen used in training: " + str(self.crop_gen))
else:
self.crop_gen = None
self.tfm_gens = utils.build_transform_gen(cfg, is_train)
# fmt: off
self.img_format = cfg.INPUT.FORMAT
self.mask_on = cfg.MODEL.MASK_ON
self.mask_format = cfg.INPUT.MASK_FORMAT
self.keypoint_on = cfg.MODEL.KEYPOINT_ON
self.load_proposals = cfg.MODEL.LOAD_PROPOSALS
# fmt: on
if self.keypoint_on and is_train:
# Flip only makes sense in training
self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
else:
self.keypoint_hflip_indices = None
if self.load_proposals:
self.min_box_side_len = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE
self.proposal_topk = (
cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN
if is_train
else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST
)
self.is_train = is_train
def __call__(self, dataset_dict):
"""
Args:
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
Returns:
dict: a format that builtin models in detectron2 accept
"""
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
# USER: Write your own image loading if it's not from a file
image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
utils.check_image_size(dataset_dict, image)
if "annotations" not in dataset_dict:
image, transforms = T.apply_transform_gens(
([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image
)
else:
# Crop around an instance if there are instances in the image.
# USER: Remove if you don't use cropping
if self.crop_gen:
crop_tfm = utils.gen_crop_transform_with_instance(
self.crop_gen.get_crop_size(image.shape[:2]),
image.shape[:2],
np.random.choice(dataset_dict["annotations"]),
)
image = crop_tfm.apply_image(image)
image, transforms = T.apply_transform_gens(self.tfm_gens, image)
if self.crop_gen:
transforms = crop_tfm + transforms
image_shape = image.shape[:2] # h, w
# Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
# but not efficient on large generic data structures due to the use of pickle & mp.Queue.
# Therefore it's important to use torch.Tensor.
dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
# USER: Remove if you don't use pre-computed proposals.
if self.load_proposals:
utils.transform_proposals(
dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk
)
if not self.is_train:
# USER: Modify this if you want to keep them for some reason.
dataset_dict.pop("annotations", None)
dataset_dict.pop("sem_seg_file_name", None)
return dataset_dict
if "annotations" in dataset_dict:
# USER: Modify this if you want to keep them for some reason.
for anno in dataset_dict["annotations"]:
if not self.mask_on:
anno.pop("segmentation", None)
if not self.keypoint_on:
anno.pop("keypoints", None)
# USER: Implement additional transformations if you have other types of data
annos = [
utils.transform_instance_annotations(
obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
)
for obj in dataset_dict.pop("annotations")
if obj.get("iscrowd", 0) == 0
]
instances = utils.annotations_to_instances(
annos, image_shape, mask_format=self.mask_format
)
# Create a tight bounding box from masks, useful when image is cropped
if self.crop_gen and instances.has("gt_masks"):
instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
dataset_dict["instances"] = utils.filter_empty_instances(instances)
# USER: Remove if you don't do semantic/panoptic segmentation.
if "sem_seg_file_name" in dataset_dict:
with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f:
sem_seg_gt = Image.open(f)
sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
dataset_dict["sem_seg"] = sem_seg_gt
return dataset_dict
### Common Datasets
The dataset implemented here do not need to load the data into the final format.
It should provide the minimal data structure needed to use the dataset, so it can be very efficient.
For example, for an image dataset, just provide the file names and labels, but don't read the images.
Let the downstream decide how to read.
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from .cityscapes import load_cityscapes_instances
from .coco import load_coco_json, load_sem_seg
from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta
from .register_coco import register_coco_instances, register_coco_panoptic_separated
from . import builtin # ensure the builtin datasets are registered
__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")]
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
This file registers pre-defined datasets at hard-coded paths, and their metadata.
We hard-code metadata for common datasets. This will enable:
1. Consistency check when loading the datasets
2. Use models on these standard datasets directly and run demos,
without having to download the dataset annotations
We hard-code some paths to the dataset that's assumed to
exist in "./datasets/".
Users SHOULD NOT use this file to create new dataset / metadata for new dataset.
To add new dataset, refer to the tutorial "docs/DATASETS.md".
"""
import os
from detectron2.data import DatasetCatalog, MetadataCatalog
from .builtin_meta import _get_builtin_metadata
from .cityscapes import load_cityscapes_instances, load_cityscapes_semantic
from .lvis import get_lvis_instances_meta, register_lvis_instances
from .pascal_voc import register_pascal_voc
from .register_coco import register_coco_instances, register_coco_panoptic_separated
# ==== Predefined datasets and splits for COCO ==========
_PREDEFINED_SPLITS_COCO = {}
_PREDEFINED_SPLITS_COCO["coco"] = {
"coco_2014_train": ("coco/train2014", "coco/annotations/instances_train2014.json"),
"coco_2014_val": ("coco/val2014", "coco/annotations/instances_val2014.json"),
"coco_2014_minival": ("coco/val2014", "coco/annotations/instances_minival2014.json"),
"coco_2014_minival_100": ("coco/val2014", "coco/annotations/instances_minival2014_100.json"),
"coco_2014_valminusminival": (
"coco/val2014",
"coco/annotations/instances_valminusminival2014.json",
),
"coco_2017_train": ("coco/train2017", "coco/annotations/instances_train2017.json"),
"coco_2017_val": ("coco/val2017", "coco/annotations/instances_val2017.json"),
"coco_2017_test": ("coco/test2017", "coco/annotations/image_info_test2017.json"),
"coco_2017_test-dev": ("coco/test2017", "coco/annotations/image_info_test-dev2017.json"),
"coco_2017_val_100": ("coco/val2017", "coco/annotations/instances_val2017_100.json"),
}
_PREDEFINED_SPLITS_COCO["coco_person"] = {
"keypoints_coco_2014_train": (
"coco/train2014",
"coco/annotations/person_keypoints_train2014.json",
),
"keypoints_coco_2014_val": ("coco/val2014", "coco/annotations/person_keypoints_val2014.json"),
"keypoints_coco_2014_minival": (
"coco/val2014",
"coco/annotations/person_keypoints_minival2014.json",
),
"keypoints_coco_2014_valminusminival": (
"coco/val2014",
"coco/annotations/person_keypoints_valminusminival2014.json",
),
"keypoints_coco_2014_minival_100": (
"coco/val2014",
"coco/annotations/person_keypoints_minival2014_100.json",
),
"keypoints_coco_2017_train": (
"coco/train2017",
"coco/annotations/person_keypoints_train2017.json",
),
"keypoints_coco_2017_val": ("coco/val2017", "coco/annotations/person_keypoints_val2017.json"),
"keypoints_coco_2017_val_100": (
"coco/val2017",
"coco/annotations/person_keypoints_val2017_100.json",
),
}
_PREDEFINED_SPLITS_COCO_PANOPTIC = {
"coco_2017_train_panoptic": (
# This is the original panoptic annotation directory
"coco/panoptic_train2017",
"coco/annotations/panoptic_train2017.json",
# This directory contains semantic annotations that are
# converted from panoptic annotations.
# It is used by PanopticFPN.
# You can use the script at detectron2/datasets/prepare_panoptic_fpn.py
# to create these directories.
"coco/panoptic_stuff_train2017",
),
"coco_2017_val_panoptic": (
"coco/panoptic_val2017",
"coco/annotations/panoptic_val2017.json",
"coco/panoptic_stuff_val2017",
),
"coco_2017_val_100_panoptic": (
"coco/panoptic_val2017_100",
"coco/annotations/panoptic_val2017_100.json",
"coco/panoptic_stuff_val2017_100",
),
}
def register_all_coco(root):
for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_COCO.items():
for key, (image_root, json_file) in splits_per_dataset.items():
# Assume pre-defined datasets live in `./datasets`.
register_coco_instances(
key,
_get_builtin_metadata(dataset_name),
os.path.join(root, json_file) if "://" not in json_file else json_file,
os.path.join(root, image_root),
)
for (
prefix,
(panoptic_root, panoptic_json, semantic_root),
) in _PREDEFINED_SPLITS_COCO_PANOPTIC.items():
prefix_instances = prefix[: -len("_panoptic")]
instances_meta = MetadataCatalog.get(prefix_instances)
image_root, instances_json = instances_meta.image_root, instances_meta.json_file
register_coco_panoptic_separated(
prefix,
_get_builtin_metadata("coco_panoptic_separated"),
image_root,
os.path.join(root, panoptic_root),
os.path.join(root, panoptic_json),
os.path.join(root, semantic_root),
instances_json,
)
# ==== Predefined datasets and splits for LVIS ==========
_PREDEFINED_SPLITS_LVIS = {
"lvis_v0.5": {
"lvis_v0.5_train": ("coco/train2017", "lvis/lvis_v0.5_train.json"),
"lvis_v0.5_val": ("coco/val2017", "lvis/lvis_v0.5_val.json"),
"lvis_v0.5_val_rand_100": ("coco/val2017", "lvis/lvis_v0.5_val_rand_100.json"),
"lvis_v0.5_test": ("coco/test2017", "lvis/lvis_v0.5_image_info_test.json"),
},
"lvis_v0.5_cocofied": {
"lvis_v0.5_train_cocofied": ("coco/train2017", "lvis/lvis_v0.5_train_cocofied.json"),
"lvis_v0.5_val_cocofied": ("coco/val2017", "lvis/lvis_v0.5_val_cocofied.json"),
},
}
def register_all_lvis(root):
for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_LVIS.items():
for key, (image_root, json_file) in splits_per_dataset.items():
# Assume pre-defined datasets live in `./datasets`.
register_lvis_instances(
key,
get_lvis_instances_meta(dataset_name),
os.path.join(root, json_file) if "://" not in json_file else json_file,
os.path.join(root, image_root),
)
# ==== Predefined splits for raw cityscapes images ===========
_RAW_CITYSCAPES_SPLITS = {
"cityscapes_fine_{task}_train": ("cityscapes/leftImg8bit/train", "cityscapes/gtFine/train"),
"cityscapes_fine_{task}_val": ("cityscapes/leftImg8bit/val", "cityscapes/gtFine/val"),
"cityscapes_fine_{task}_test": ("cityscapes/leftImg8bit/test", "cityscapes/gtFine/test"),
}
def register_all_cityscapes(root):
for key, (image_dir, gt_dir) in _RAW_CITYSCAPES_SPLITS.items():
meta = _get_builtin_metadata("cityscapes")
image_dir = os.path.join(root, image_dir)
gt_dir = os.path.join(root, gt_dir)
inst_key = key.format(task="instance_seg")
DatasetCatalog.register(
inst_key,
lambda x=image_dir, y=gt_dir: load_cityscapes_instances(
x, y, from_json=True, to_polygons=True
),
)
MetadataCatalog.get(inst_key).set(
image_dir=image_dir, gt_dir=gt_dir, evaluator_type="cityscapes_instance", **meta
)
sem_key = key.format(task="sem_seg")
DatasetCatalog.register(
sem_key, lambda x=image_dir, y=gt_dir: load_cityscapes_semantic(x, y)
)
MetadataCatalog.get(sem_key).set(
image_dir=image_dir, gt_dir=gt_dir, evaluator_type="cityscapes_sem_seg", **meta
)
# ==== Predefined splits for PASCAL VOC ===========
def register_all_pascal_voc(root):
SPLITS = [
("voc_2007_trainval", "VOC2007", "trainval"),
("voc_2007_train", "VOC2007", "train"),
("voc_2007_val", "VOC2007", "val"),
("voc_2007_test", "VOC2007", "test"),
("voc_2012_trainval", "VOC2012", "trainval"),
("voc_2012_train", "VOC2012", "train"),
("voc_2012_val", "VOC2012", "val"),
]
for name, dirname, split in SPLITS:
year = 2007 if "2007" in name else 2012
register_pascal_voc(name, os.path.join(root, dirname), split, year)
MetadataCatalog.get(name).evaluator_type = "pascal_voc"
# Register them all under "./datasets"
_root = os.getenv("DETECTRON2_DATASETS", "datasets")
register_all_coco(_root)
register_all_lvis(_root)
register_all_cityscapes(_root)
register_all_pascal_voc(_root)
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# All coco categories, together with their nice-looking visualization colors
# It's from https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json
COCO_CATEGORIES = [
{"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "person"},
{"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"},
{"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"},
{"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"},
{"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"},
{"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "bus"},
{"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"},
{"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"},
{"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"},
{"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"},
{"color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"},
{"color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"},
{"color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"},
{"color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"},
{"color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"},
{"color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"},
{"color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"},
{"color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"},
{"color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"},
{"color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"},
{"color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"},
{"color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"},
{"color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"},
{"color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"},
{"color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"},
{"color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"},
{"color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"},
{"color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"},
{"color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"},
{"color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"},
{"color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"},
{"color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"},
{"color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"},
{"color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"},
{"color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"},
{"color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"},
{"color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"},
{"color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"},
{"color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"},
{"color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"},
{"color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"},
{"color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"},
{"color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"},
{"color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"},
{"color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"},
{"color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"},
{"color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"},
{"color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"},
{"color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"},
{"color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"},
{"color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"},
{"color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"},
{"color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"},
{"color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"},
{"color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"},
{"color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"},
{"color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"},
{"color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"},
{"color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"},
{"color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"},
{"color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"},
{"color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"},
{"color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"},
{"color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"},
{"color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"},
{"color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"},
{"color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"},
{"color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"},
{"color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"},
{"color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"},
{"color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"},
{"color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"},
{"color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"},
{"color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"},
{"color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"},
{"color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"},
{"color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"},
{"color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"},
{"color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"},
{"color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"},
{"color": [255, 255, 128], "isthing": 0, "id": 92, "name": "banner"},
{"color": [147, 211, 203], "isthing": 0, "id": 93, "name": "blanket"},
{"color": [150, 100, 100], "isthing": 0, "id": 95, "name": "bridge"},
{"color": [168, 171, 172], "isthing": 0, "id": 100, "name": "cardboard"},
{"color": [146, 112, 198], "isthing": 0, "id": 107, "name": "counter"},
{"color": [210, 170, 100], "isthing": 0, "id": 109, "name": "curtain"},
{"color": [92, 136, 89], "isthing": 0, "id": 112, "name": "door-stuff"},
{"color": [218, 88, 184], "isthing": 0, "id": 118, "name": "floor-wood"},
{"color": [241, 129, 0], "isthing": 0, "id": 119, "name": "flower"},
{"color": [217, 17, 255], "isthing": 0, "id": 122, "name": "fruit"},
{"color": [124, 74, 181], "isthing": 0, "id": 125, "name": "gravel"},
{"color": [70, 70, 70], "isthing": 0, "id": 128, "name": "house"},
{"color": [255, 228, 255], "isthing": 0, "id": 130, "name": "light"},
{"color": [154, 208, 0], "isthing": 0, "id": 133, "name": "mirror-stuff"},
{"color": [193, 0, 92], "isthing": 0, "id": 138, "name": "net"},
{"color": [76, 91, 113], "isthing": 0, "id": 141, "name": "pillow"},
{"color": [255, 180, 195], "isthing": 0, "id": 144, "name": "platform"},
{"color": [106, 154, 176], "isthing": 0, "id": 145, "name": "playingfield"},
{"color": [230, 150, 140], "isthing": 0, "id": 147, "name": "railroad"},
{"color": [60, 143, 255], "isthing": 0, "id": 148, "name": "river"},
{"color": [128, 64, 128], "isthing": 0, "id": 149, "name": "road"},
{"color": [92, 82, 55], "isthing": 0, "id": 151, "name": "roof"},
{"color": [254, 212, 124], "isthing": 0, "id": 154, "name": "sand"},
{"color": [73, 77, 174], "isthing": 0, "id": 155, "name": "sea"},
{"color": [255, 160, 98], "isthing": 0, "id": 156, "name": "shelf"},
{"color": [255, 255, 255], "isthing": 0, "id": 159, "name": "snow"},
{"color": [104, 84, 109], "isthing": 0, "id": 161, "name": "stairs"},
{"color": [169, 164, 131], "isthing": 0, "id": 166, "name": "tent"},
{"color": [225, 199, 255], "isthing": 0, "id": 168, "name": "towel"},
{"color": [137, 54, 74], "isthing": 0, "id": 171, "name": "wall-brick"},
{"color": [135, 158, 223], "isthing": 0, "id": 175, "name": "wall-stone"},
{"color": [7, 246, 231], "isthing": 0, "id": 176, "name": "wall-tile"},
{"color": [107, 255, 200], "isthing": 0, "id": 177, "name": "wall-wood"},
{"color": [58, 41, 149], "isthing": 0, "id": 178, "name": "water-other"},
{"color": [183, 121, 142], "isthing": 0, "id": 180, "name": "window-blind"},
{"color": [255, 73, 97], "isthing": 0, "id": 181, "name": "window-other"},
{"color": [107, 142, 35], "isthing": 0, "id": 184, "name": "tree-merged"},
{"color": [190, 153, 153], "isthing": 0, "id": 185, "name": "fence-merged"},
{"color": [146, 139, 141], "isthing": 0, "id": 186, "name": "ceiling-merged"},
{"color": [70, 130, 180], "isthing": 0, "id": 187, "name": "sky-other-merged"},
{"color": [134, 199, 156], "isthing": 0, "id": 188, "name": "cabinet-merged"},
{"color": [209, 226, 140], "isthing": 0, "id": 189, "name": "table-merged"},
{"color": [96, 36, 108], "isthing": 0, "id": 190, "name": "floor-other-merged"},
{"color": [96, 96, 96], "isthing": 0, "id": 191, "name": "pavement-merged"},
{"color": [64, 170, 64], "isthing": 0, "id": 192, "name": "mountain-merged"},
{"color": [152, 251, 152], "isthing": 0, "id": 193, "name": "grass-merged"},
{"color": [208, 229, 228], "isthing": 0, "id": 194, "name": "dirt-merged"},
{"color": [206, 186, 171], "isthing": 0, "id": 195, "name": "paper-merged"},
{"color": [152, 161, 64], "isthing": 0, "id": 196, "name": "food-other-merged"},
{"color": [116, 112, 0], "isthing": 0, "id": 197, "name": "building-other-merged"},
{"color": [0, 114, 143], "isthing": 0, "id": 198, "name": "rock-merged"},
{"color": [102, 102, 156], "isthing": 0, "id": 199, "name": "wall-other-merged"},
{"color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"},
]
# fmt: off
COCO_PERSON_KEYPOINT_NAMES = (
"nose",
"left_eye", "right_eye",
"left_ear", "right_ear",
"left_shoulder", "right_shoulder",
"left_elbow", "right_elbow",
"left_wrist", "right_wrist",
"left_hip", "right_hip",
"left_knee", "right_knee",
"left_ankle", "right_ankle",
)
# fmt: on
# Pairs of keypoints that should be exchanged under horizontal flipping
COCO_PERSON_KEYPOINT_FLIP_MAP = (
("left_eye", "right_eye"),
("left_ear", "right_ear"),
("left_shoulder", "right_shoulder"),
("left_elbow", "right_elbow"),
("left_wrist", "right_wrist"),
("left_hip", "right_hip"),
("left_knee", "right_knee"),
("left_ankle", "right_ankle"),
)
# rules for pairs of keypoints to draw a line between, and the line color to use.
KEYPOINT_CONNECTION_RULES = [
# face
("left_ear", "left_eye", (102, 204, 255)),
("right_ear", "right_eye", (51, 153, 255)),
("left_eye", "nose", (102, 0, 204)),
("nose", "right_eye", (51, 102, 255)),
# upper-body
("left_shoulder", "right_shoulder", (255, 128, 0)),
("left_shoulder", "left_elbow", (153, 255, 204)),
("right_shoulder", "right_elbow", (128, 229, 255)),
("left_elbow", "left_wrist", (153, 255, 153)),
("right_elbow", "right_wrist", (102, 255, 224)),
# lower-body
("left_hip", "right_hip", (255, 102, 0)),
("left_hip", "left_knee", (255, 255, 77)),
("right_hip", "right_knee", (153, 255, 204)),
("left_knee", "left_ankle", (191, 255, 128)),
("right_knee", "right_ankle", (255, 195, 77)),
]
def _get_coco_instances_meta():
thing_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 1]
thing_colors = [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 1]
assert len(thing_ids) == 80, len(thing_ids)
# Mapping from the incontiguous COCO category id to an id in [0, 79]
thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)}
thing_classes = [k["name"] for k in COCO_CATEGORIES if k["isthing"] == 1]
ret = {
"thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id,
"thing_classes": thing_classes,
"thing_colors": thing_colors,
}
return ret
def _get_coco_panoptic_separated_meta():
"""
Returns metadata for "separated" version of the panoptic segmentation dataset.
"""
stuff_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 0]
assert len(stuff_ids) == 53, len(stuff_ids)
# For semantic segmentation, this mapping maps from contiguous stuff id
# (in [0, 53], used in models) to ids in the dataset (used for processing results)
# The id 0 is mapped to an extra category "thing".
stuff_dataset_id_to_contiguous_id = {k: i + 1 for i, k in enumerate(stuff_ids)}
# When converting COCO panoptic annotations to semantic annotations
# We label the "thing" category to 0
stuff_dataset_id_to_contiguous_id[0] = 0
# 54 names for COCO stuff categories (including "things")
stuff_classes = ["things"] + [
k["name"].replace("-other", "").replace("-merged", "")
for k in COCO_CATEGORIES
if k["isthing"] == 0
]
# NOTE: I randomly picked a color for things
stuff_colors = [[82, 18, 128]] + [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 0]
ret = {
"stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id,
"stuff_classes": stuff_classes,
"stuff_colors": stuff_colors,
}
ret.update(_get_coco_instances_meta())
return ret
def _get_builtin_metadata(dataset_name):
if dataset_name == "coco":
return _get_coco_instances_meta()
if dataset_name == "coco_panoptic_separated":
return _get_coco_panoptic_separated_meta()
elif dataset_name == "coco_person":
return {
"thing_classes": ["person"],
"keypoint_names": COCO_PERSON_KEYPOINT_NAMES,
"keypoint_flip_map": COCO_PERSON_KEYPOINT_FLIP_MAP,
"keypoint_connection_rules": KEYPOINT_CONNECTION_RULES,
}
elif dataset_name == "cityscapes":
# fmt: off
CITYSCAPES_THING_CLASSES = [
"person", "rider", "car", "truck",
"bus", "train", "motorcycle", "bicycle",
]
CITYSCAPES_STUFF_CLASSES = [
"road", "sidewalk", "building", "wall", "fence", "pole", "traffic light",
"traffic sign", "vegetation", "terrain", "sky", "person", "rider", "car",
"truck", "bus", "train", "motorcycle", "bicycle", "license plate",
]
# fmt: on
return {
"thing_classes": CITYSCAPES_THING_CLASSES,
"stuff_classes": CITYSCAPES_STUFF_CLASSES,
}
raise KeyError("No built-in metadata for dataset {}".format(dataset_name))
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import functools
import json
import logging
import multiprocessing as mp
import numpy as np
import os
from itertools import chain
import pycocotools.mask as mask_util
from fvcore.common.file_io import PathManager
from PIL import Image
from detectron2.structures import BoxMode
from detectron2.utils.comm import get_world_size
from detectron2.utils.logger import setup_logger
try:
import cv2 # noqa
except ImportError:
# OpenCV is an optional dependency at the moment
pass
logger = logging.getLogger(__name__)
def get_cityscapes_files(image_dir, gt_dir):
files = []
# scan through the directory
cities = PathManager.ls(image_dir)
logger.info(f"{len(cities)} cities found in '{image_dir}'.")
for city in cities:
city_img_dir = os.path.join(image_dir, city)
city_gt_dir = os.path.join(gt_dir, city)
for basename in PathManager.ls(city_img_dir):
image_file = os.path.join(city_img_dir, basename)
suffix = "leftImg8bit.png"
assert basename.endswith(suffix)
basename = basename[: -len(suffix)]
instance_file = os.path.join(city_gt_dir, basename + "gtFine_instanceIds.png")
label_file = os.path.join(city_gt_dir, basename + "gtFine_labelIds.png")
json_file = os.path.join(city_gt_dir, basename + "gtFine_polygons.json")
files.append((image_file, instance_file, label_file, json_file))
assert len(files), "No images found in {}".format(image_dir)
for f in files[0]:
assert PathManager.isfile(f), f
return files
def load_cityscapes_instances(image_dir, gt_dir, from_json=True, to_polygons=True):
"""
Args:
image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train".
from_json (bool): whether to read annotations from the raw json file or the png files.
to_polygons (bool): whether to represent the segmentation as polygons
(COCO's format) instead of masks (cityscapes's format).
Returns:
list[dict]: a list of dicts in Detectron2 standard format. (See
`Using Custom Datasets </tutorials/datasets.html>`_ )
"""
if from_json:
assert to_polygons, (
"Cityscapes's json annotations are in polygon format. "
"Converting to mask format is not supported now."
)
files = get_cityscapes_files(image_dir, gt_dir)
logger.info("Preprocessing cityscapes annotations ...")
# This is still not fast: all workers will execute duplicate works and will
# take up to 10m on a 8GPU server.
pool = mp.Pool(processes=max(mp.cpu_count() // get_world_size() // 2, 4))
ret = pool.map(
functools.partial(cityscapes_files_to_dict, from_json=from_json, to_polygons=to_polygons),
files,
)
logger.info("Loaded {} images from {}".format(len(ret), image_dir))
# Map cityscape ids to contiguous ids
from cityscapesscripts.helpers.labels import labels
labels = [l for l in labels if l.hasInstances and not l.ignoreInEval]
dataset_id_to_contiguous_id = {l.id: idx for idx, l in enumerate(labels)}
for dict_per_image in ret:
for anno in dict_per_image["annotations"]:
anno["category_id"] = dataset_id_to_contiguous_id[anno["category_id"]]
return ret
def load_cityscapes_semantic(image_dir, gt_dir):
"""
Args:
image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train".
Returns:
list[dict]: a list of dict, each has "file_name" and
"sem_seg_file_name".
"""
ret = []
# gt_dir is small and contain many small files. make sense to fetch to local first
gt_dir = PathManager.get_local_path(gt_dir)
for image_file, _, label_file, json_file in get_cityscapes_files(image_dir, gt_dir):
label_file = label_file.replace("labelIds", "labelTrainIds")
with PathManager.open(json_file, "r") as f:
jsonobj = json.load(f)
ret.append(
{
"file_name": image_file,
"sem_seg_file_name": label_file,
"height": jsonobj["imgHeight"],
"width": jsonobj["imgWidth"],
}
)
assert len(ret), f"No images found in {image_dir}!"
assert PathManager.isfile(
ret[0]["sem_seg_file_name"]
), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py" # noqa
return ret
def cityscapes_files_to_dict(files, from_json, to_polygons):
"""
Parse cityscapes annotation files to a instance segmentation dataset dict.
Args:
files (tuple): consists of (image_file, instance_id_file, label_id_file, json_file)
from_json (bool): whether to read annotations from the raw json file or the png files.
to_polygons (bool): whether to represent the segmentation as polygons
(COCO's format) instead of masks (cityscapes's format).
Returns:
A dict in Detectron2 Dataset format.
"""
from cityscapesscripts.helpers.labels import id2label, name2label
image_file, instance_id_file, _, json_file = files
annos = []
if from_json:
from shapely.geometry import MultiPolygon, Polygon
with PathManager.open(json_file, "r") as f:
jsonobj = json.load(f)
ret = {
"file_name": image_file,
"image_id": os.path.basename(image_file),
"height": jsonobj["imgHeight"],
"width": jsonobj["imgWidth"],
}
# `polygons_union` contains the union of all valid polygons.
polygons_union = Polygon()
# CityscapesScripts draw the polygons in sequential order
# and each polygon *overwrites* existing ones. See
# (https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/json2instanceImg.py) # noqa
# We use reverse order, and each polygon *avoids* early ones.
# This will resolve the ploygon overlaps in the same way as CityscapesScripts.
for obj in jsonobj["objects"][::-1]:
if "deleted" in obj: # cityscapes data format specific
continue
label_name = obj["label"]
try:
label = name2label[label_name]
except KeyError:
if label_name.endswith("group"): # crowd area
label = name2label[label_name[: -len("group")]]
else:
raise
if label.id < 0: # cityscapes data format
continue
# Cityscapes's raw annotations uses integer coordinates
# Therefore +0.5 here
poly_coord = np.asarray(obj["polygon"], dtype="f4") + 0.5
# CityscapesScript uses PIL.ImageDraw.polygon to rasterize
# polygons for evaluation. This function operates in integer space
# and draws each pixel whose center falls into the polygon.
# Therefore it draws a polygon which is 0.5 "fatter" in expectation.
# We therefore dilate the input polygon by 0.5 as our input.
poly = Polygon(poly_coord).buffer(0.5, resolution=4)
if not label.hasInstances or label.ignoreInEval:
# even if we won't store the polygon it still contributes to overlaps resolution
polygons_union = polygons_union.union(poly)
continue
# Take non-overlapping part of the polygon
poly_wo_overlaps = poly.difference(polygons_union)
if poly_wo_overlaps.is_empty:
continue
polygons_union = polygons_union.union(poly)
anno = {}
anno["iscrowd"] = label_name.endswith("group")
anno["category_id"] = label.id
if isinstance(poly_wo_overlaps, Polygon):
poly_list = [poly_wo_overlaps]
elif isinstance(poly_wo_overlaps, MultiPolygon):
poly_list = poly_wo_overlaps.geoms
else:
raise NotImplementedError("Unknown geometric structure {}".format(poly_wo_overlaps))
poly_coord = []
for poly_el in poly_list:
# COCO API can work only with exterior boundaries now, hence we store only them.
# TODO: store both exterior and interior boundaries once other parts of the
# codebase support holes in polygons.
poly_coord.append(list(chain(*poly_el.exterior.coords)))
anno["segmentation"] = poly_coord
(xmin, ymin, xmax, ymax) = poly_wo_overlaps.bounds
anno["bbox"] = (xmin, ymin, xmax, ymax)
anno["bbox_mode"] = BoxMode.XYXY_ABS
annos.append(anno)
else:
# See also the official annotation parsing scripts at
# https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/instances2dict.py # noqa
with PathManager.open(instance_id_file, "rb") as f:
inst_image = np.asarray(Image.open(f), order="F")
# ids < 24 are stuff labels (filtering them first is about 5% faster)
flattened_ids = np.unique(inst_image[inst_image >= 24])
ret = {
"file_name": image_file,
"image_id": os.path.basename(image_file),
"height": inst_image.shape[0],
"width": inst_image.shape[1],
}
for instance_id in flattened_ids:
# For non-crowd annotations, instance_id // 1000 is the label_id
# Crowd annotations have <1000 instance ids
label_id = instance_id // 1000 if instance_id >= 1000 else instance_id
label = id2label[label_id]
if not label.hasInstances or label.ignoreInEval:
continue
anno = {}
anno["iscrowd"] = instance_id < 1000
anno["category_id"] = label.id
mask = np.asarray(inst_image == instance_id, dtype=np.uint8, order="F")
inds = np.nonzero(mask)
ymin, ymax = inds[0].min(), inds[0].max()
xmin, xmax = inds[1].min(), inds[1].max()
anno["bbox"] = (xmin, ymin, xmax, ymax)
if xmax <= xmin or ymax <= ymin:
continue
anno["bbox_mode"] = BoxMode.XYXY_ABS
if to_polygons:
# This conversion comes from D4809743 and D5171122,
# when Mask-RCNN was first developed.
contours = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[
-2
]
polygons = [c.reshape(-1).tolist() for c in contours if len(c) >= 3]
# opencv's can produce invalid polygons
if len(polygons) == 0:
continue
anno["segmentation"] = polygons
else:
anno["segmentation"] = mask_util.encode(mask[:, :, None])[0]
annos.append(anno)
ret["annotations"] = annos
return ret
if __name__ == "__main__":
"""
Test the cityscapes dataset loader.
Usage:
python -m detectron2.data.datasets.cityscapes \
cityscapes/leftImg8bit/train cityscapes/gtFine/train
"""
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("image_dir")
parser.add_argument("gt_dir")
parser.add_argument("--type", choices=["instance", "semantic"], default="instance")
args = parser.parse_args()
from detectron2.data.catalog import Metadata
from detectron2.utils.visualizer import Visualizer
from cityscapesscripts.helpers.labels import labels
logger = setup_logger(name=__name__)
dirname = "cityscapes-data-vis"
os.makedirs(dirname, exist_ok=True)
if args.type == "instance":
dicts = load_cityscapes_instances(
args.image_dir, args.gt_dir, from_json=True, to_polygons=True
)
logger.info("Done loading {} samples.".format(len(dicts)))
thing_classes = [k.name for k in labels if k.hasInstances and not k.ignoreInEval]
meta = Metadata().set(thing_classes=thing_classes)
else:
dicts = load_cityscapes_semantic(args.image_dir, args.gt_dir)
logger.info("Done loading {} samples.".format(len(dicts)))
stuff_names = [k.name for k in labels if k.trainId != 255]
stuff_colors = [k.color for k in labels if k.trainId != 255]
meta = Metadata().set(stuff_names=stuff_names, stuff_colors=stuff_colors)
for d in dicts:
img = np.array(Image.open(PathManager.open(d["file_name"], "rb")))
visualizer = Visualizer(img, metadata=meta)
vis = visualizer.draw_dataset_dict(d)
# cv2.imshow("a", vis.get_image()[:, :, ::-1])
# cv2.waitKey()
fpath = os.path.join(dirname, os.path.basename(d["file_name"]))
vis.save(fpath)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment