Commit 3144257c authored by mashun1's avatar mashun1
Browse files

catvton

parents
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from .data.datasets import builtin # just to register data
from .converters import builtin as builtin_converters # register converters
from .config import (
add_densepose_config,
add_densepose_head_config,
add_hrnet_config,
add_dataset_category_config,
add_bootstrap_config,
load_bootstrap_config,
)
from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
from .evaluation import DensePoseCOCOEvaluator
from .modeling.roi_heads import DensePoseROIHeads
from .modeling.test_time_augmentation import (
DensePoseGeneralizedRCNNWithTTA,
DensePoseDatasetMapperTTA,
)
from .utils.transform import load_from_cfg
from .modeling.hrfpn import build_hrfpn_backbone
# -*- coding = utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-ignore-all-errors
from detectron2.config import CfgNode as CN
def add_dataset_category_config(cfg: CN) -> None:
"""
Add config for additional category-related dataset options
- category whitelisting
- category mapping
"""
_C = cfg
_C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True)
_C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True)
# class to mesh mapping
_C.DATASETS.CLASS_TO_MESH_NAME_MAPPING = CN(new_allowed=True)
def add_evaluation_config(cfg: CN) -> None:
_C = cfg
_C.DENSEPOSE_EVALUATION = CN()
# evaluator type, possible values:
# - "iou": evaluator for models that produce iou data
# - "cse": evaluator for models that produce cse data
_C.DENSEPOSE_EVALUATION.TYPE = "iou"
# storage for DensePose results, possible values:
# - "none": no explicit storage, all the results are stored in the
# dictionary with predictions, memory intensive;
# historically the default storage type
# - "ram": RAM storage, uses per-process RAM storage, which is
# reduced to a single process storage on later stages,
# less memory intensive
# - "file": file storage, uses per-process file-based storage,
# the least memory intensive, but may create bottlenecks
# on file system accesses
_C.DENSEPOSE_EVALUATION.STORAGE = "none"
# minimum threshold for IOU values: the lower its values is,
# the more matches are produced (and the higher the AP score)
_C.DENSEPOSE_EVALUATION.MIN_IOU_THRESHOLD = 0.5
# Non-distributed inference is slower (at inference time) but can avoid RAM OOM
_C.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE = True
# evaluate mesh alignment based on vertex embeddings, only makes sense in CSE context
_C.DENSEPOSE_EVALUATION.EVALUATE_MESH_ALIGNMENT = False
# meshes to compute mesh alignment for
_C.DENSEPOSE_EVALUATION.MESH_ALIGNMENT_MESH_NAMES = []
def add_bootstrap_config(cfg: CN) -> None:
""" """
_C = cfg
_C.BOOTSTRAP_DATASETS = []
_C.BOOTSTRAP_MODEL = CN()
_C.BOOTSTRAP_MODEL.WEIGHTS = ""
_C.BOOTSTRAP_MODEL.DEVICE = "cuda"
def get_bootstrap_dataset_config() -> CN:
_C = CN()
_C.DATASET = ""
# ratio used to mix data loaders
_C.RATIO = 0.1
# image loader
_C.IMAGE_LOADER = CN(new_allowed=True)
_C.IMAGE_LOADER.TYPE = ""
_C.IMAGE_LOADER.BATCH_SIZE = 4
_C.IMAGE_LOADER.NUM_WORKERS = 4
_C.IMAGE_LOADER.CATEGORIES = []
_C.IMAGE_LOADER.MAX_COUNT_PER_CATEGORY = 1_000_000
_C.IMAGE_LOADER.CATEGORY_TO_CLASS_MAPPING = CN(new_allowed=True)
# inference
_C.INFERENCE = CN()
# batch size for model inputs
_C.INFERENCE.INPUT_BATCH_SIZE = 4
# batch size to group model outputs
_C.INFERENCE.OUTPUT_BATCH_SIZE = 2
# sampled data
_C.DATA_SAMPLER = CN(new_allowed=True)
_C.DATA_SAMPLER.TYPE = ""
_C.DATA_SAMPLER.USE_GROUND_TRUTH_CATEGORIES = False
# filter
_C.FILTER = CN(new_allowed=True)
_C.FILTER.TYPE = ""
return _C
def load_bootstrap_config(cfg: CN) -> None:
"""
Bootstrap datasets are given as a list of `dict` that are not automatically
converted into CfgNode. This method processes all bootstrap dataset entries
and ensures that they are in CfgNode format and comply with the specification
"""
if not cfg.BOOTSTRAP_DATASETS:
return
bootstrap_datasets_cfgnodes = []
for dataset_cfg in cfg.BOOTSTRAP_DATASETS:
_C = get_bootstrap_dataset_config().clone()
_C.merge_from_other_cfg(CN(dataset_cfg))
bootstrap_datasets_cfgnodes.append(_C)
cfg.BOOTSTRAP_DATASETS = bootstrap_datasets_cfgnodes
def add_densepose_head_cse_config(cfg: CN) -> None:
"""
Add configuration options for Continuous Surface Embeddings (CSE)
"""
_C = cfg
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE = CN()
# Dimensionality D of the embedding space
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE = 16
# Embedder specifications for various mesh IDs
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS = CN(new_allowed=True)
# normalization coefficient for embedding distances
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_DIST_GAUSS_SIGMA = 0.01
# normalization coefficient for geodesic distances
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.GEODESIC_DIST_GAUSS_SIGMA = 0.01
# embedding loss weight
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_WEIGHT = 0.6
# embedding loss name, currently the following options are supported:
# - EmbeddingLoss: cross-entropy on vertex labels
# - SoftEmbeddingLoss: cross-entropy on vertex label combined with
# Gaussian penalty on distance between vertices
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_NAME = "EmbeddingLoss"
# optimizer hyperparameters
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.FEATURES_LR_FACTOR = 1.0
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_LR_FACTOR = 1.0
# Shape to shape cycle consistency loss parameters:
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS = CN({"ENABLED": False})
# shape to shape cycle consistency loss weight
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.WEIGHT = 0.025
# norm type used for loss computation
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.NORM_P = 2
# normalization term for embedding similarity matrices
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.TEMPERATURE = 0.05
# maximum number of vertices to include into shape to shape cycle loss
# if negative or zero, all vertices are considered
# if positive, random subset of vertices of given size is considered
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.MAX_NUM_VERTICES = 4936
# Pixel to shape cycle consistency loss parameters:
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS = CN({"ENABLED": False})
# pixel to shape cycle consistency loss weight
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.WEIGHT = 0.0001
# norm type used for loss computation
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NORM_P = 2
# map images to all meshes and back (if false, use only gt meshes from the batch)
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.USE_ALL_MESHES_NOT_GT_ONLY = False
# Randomly select at most this number of pixels from every instance
# if negative or zero, all vertices are considered
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NUM_PIXELS_TO_SAMPLE = 100
# normalization factor for pixel to pixel distances (higher value = smoother distribution)
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.PIXEL_SIGMA = 5.0
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_PIXEL_TO_VERTEX = 0.05
_C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_VERTEX_TO_PIXEL = 0.05
def add_densepose_head_config(cfg: CN) -> None:
"""
Add config for densepose head.
"""
_C = cfg
_C.MODEL.DENSEPOSE_ON = True
_C.MODEL.ROI_DENSEPOSE_HEAD = CN()
_C.MODEL.ROI_DENSEPOSE_HEAD.NAME = ""
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8
# Number of parts used for point labels
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24
_C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4
_C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512
_C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3
_C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2
_C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2"
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2 # 15 or 2
# Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD)
_C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7
# Loss weights for annotation masks.(14 Parts)
_C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0
# Loss weights for surface parts. (24 Parts)
_C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0
# Loss weights for UV regression.
_C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01
# Coarse segmentation is trained using instance segmentation task data
_C.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS = False
# For Decoder
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = ""
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4
# For DeepLab head
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN()
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN"
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0
# Predictor class name, must be registered in DENSEPOSE_PREDICTOR_REGISTRY
# Some registered predictors:
# "DensePoseChartPredictor": predicts segmentation and UV coordinates for predefined charts
# "DensePoseChartWithConfidencePredictor": predicts segmentation, UV coordinates
# and associated confidences for predefined charts (default)
# "DensePoseEmbeddingWithConfidencePredictor": predicts segmentation, embeddings
# and associated confidences for CSE
_C.MODEL.ROI_DENSEPOSE_HEAD.PREDICTOR_NAME = "DensePoseChartWithConfidencePredictor"
# Loss class name, must be registered in DENSEPOSE_LOSS_REGISTRY
# Some registered losses:
# "DensePoseChartLoss": loss for chart-based models that estimate
# segmentation and UV coordinates
# "DensePoseChartWithConfidenceLoss": loss for chart-based models that estimate
# segmentation, UV coordinates and the corresponding confidences (default)
_C.MODEL.ROI_DENSEPOSE_HEAD.LOSS_NAME = "DensePoseChartWithConfidenceLoss"
# Confidences
# Enable learning UV confidences (variances) along with the actual values
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False})
# UV confidence lower bound
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01
# Enable learning segmentation confidences (variances) along with the actual values
_C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE = CN({"ENABLED": False})
# Segmentation confidence lower bound
_C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON = 0.01
# Statistical model type for confidence learning, possible values:
# - "iid_iso": statistically independent identically distributed residuals
# with isotropic covariance
# - "indep_aniso": statistically independent residuals with anisotropic
# covariances
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso"
# List of angles for rotation in data augmentation during training
_C.INPUT.ROTATION_ANGLES = [0]
_C.TEST.AUG.ROTATION_ANGLES = () # Rotation TTA
add_densepose_head_cse_config(cfg)
def add_hrnet_config(cfg: CN) -> None:
"""
Add config for HRNet backbone.
"""
_C = cfg
# For HigherHRNet w32
_C.MODEL.HRNET = CN()
_C.MODEL.HRNET.STEM_INPLANES = 64
_C.MODEL.HRNET.STAGE2 = CN()
_C.MODEL.HRNET.STAGE2.NUM_MODULES = 1
_C.MODEL.HRNET.STAGE2.NUM_BRANCHES = 2
_C.MODEL.HRNET.STAGE2.BLOCK = "BASIC"
_C.MODEL.HRNET.STAGE2.NUM_BLOCKS = [4, 4]
_C.MODEL.HRNET.STAGE2.NUM_CHANNELS = [32, 64]
_C.MODEL.HRNET.STAGE2.FUSE_METHOD = "SUM"
_C.MODEL.HRNET.STAGE3 = CN()
_C.MODEL.HRNET.STAGE3.NUM_MODULES = 4
_C.MODEL.HRNET.STAGE3.NUM_BRANCHES = 3
_C.MODEL.HRNET.STAGE3.BLOCK = "BASIC"
_C.MODEL.HRNET.STAGE3.NUM_BLOCKS = [4, 4, 4]
_C.MODEL.HRNET.STAGE3.NUM_CHANNELS = [32, 64, 128]
_C.MODEL.HRNET.STAGE3.FUSE_METHOD = "SUM"
_C.MODEL.HRNET.STAGE4 = CN()
_C.MODEL.HRNET.STAGE4.NUM_MODULES = 3
_C.MODEL.HRNET.STAGE4.NUM_BRANCHES = 4
_C.MODEL.HRNET.STAGE4.BLOCK = "BASIC"
_C.MODEL.HRNET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
_C.MODEL.HRNET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
_C.MODEL.HRNET.STAGE4.FUSE_METHOD = "SUM"
_C.MODEL.HRNET.HRFPN = CN()
_C.MODEL.HRNET.HRFPN.OUT_CHANNELS = 256
def add_densepose_config(cfg: CN) -> None:
add_densepose_head_config(cfg)
add_hrnet_config(cfg)
add_bootstrap_config(cfg)
add_dataset_category_config(cfg)
add_evaluation_config(cfg)
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from .hflip import HFlipConverter
from .to_mask import ToMaskConverter
from .to_chart_result import ToChartResultConverter, ToChartResultConverterWithConfidences
from .segm_to_mask import (
predictor_output_with_fine_and_coarse_segm_to_mask,
predictor_output_with_coarse_segm_to_mask,
resample_fine_and_coarse_segm_to_bbox,
)
from .chart_output_to_chart_result import (
densepose_chart_predictor_output_to_result,
densepose_chart_predictor_output_to_result_with_confidences,
)
from .chart_output_hflip import densepose_chart_predictor_output_hflip
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from typing import Any, Tuple, Type
import torch
class BaseConverter:
"""
Converter base class to be reused by various converters.
Converter allows one to convert data from various source types to a particular
destination type. Each source type needs to register its converter. The
registration for each source type is valid for all descendants of that type.
"""
@classmethod
def register(cls, from_type: Type, converter: Any = None):
"""
Registers a converter for the specified type.
Can be used as a decorator (if converter is None), or called as a method.
Args:
from_type (type): type to register the converter for;
all instances of this type will use the same converter
converter (callable): converter to be registered for the given
type; if None, this method is assumed to be a decorator for the converter
"""
if converter is not None:
cls._do_register(from_type, converter)
def wrapper(converter: Any) -> Any:
cls._do_register(from_type, converter)
return converter
return wrapper
@classmethod
def _do_register(cls, from_type: Type, converter: Any):
cls.registry[from_type] = converter # pyre-ignore[16]
@classmethod
def _lookup_converter(cls, from_type: Type) -> Any:
"""
Perform recursive lookup for the given type
to find registered converter. If a converter was found for some base
class, it gets registered for this class to save on further lookups.
Args:
from_type: type for which to find a converter
Return:
callable or None - registered converter or None
if no suitable entry was found in the registry
"""
if from_type in cls.registry: # pyre-ignore[16]
return cls.registry[from_type]
for base in from_type.__bases__:
converter = cls._lookup_converter(base)
if converter is not None:
cls._do_register(from_type, converter)
return converter
return None
@classmethod
def convert(cls, instance: Any, *args, **kwargs):
"""
Convert an instance to the destination type using some registered
converter. Does recursive lookup for base classes, so there's no need
for explicit registration for derived classes.
Args:
instance: source instance to convert to the destination type
Return:
An instance of the destination type obtained from the source instance
Raises KeyError, if no suitable converter found
"""
instance_type = type(instance)
converter = cls._lookup_converter(instance_type)
if converter is None:
if cls.dst_type is None: # pyre-ignore[16]
output_type_str = "itself"
else:
output_type_str = cls.dst_type
raise KeyError(f"Could not find converter from {instance_type} to {output_type_str}")
return converter(instance, *args, **kwargs)
IntTupleBox = Tuple[int, int, int, int]
def make_int_box(box: torch.Tensor) -> IntTupleBox:
int_box = [0, 0, 0, 0]
int_box[0], int_box[1], int_box[2], int_box[3] = tuple(box.long().tolist())
return int_box[0], int_box[1], int_box[2], int_box[3]
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from ..structures import DensePoseChartPredictorOutput, DensePoseEmbeddingPredictorOutput
from . import (
HFlipConverter,
ToChartResultConverter,
ToChartResultConverterWithConfidences,
ToMaskConverter,
densepose_chart_predictor_output_hflip,
densepose_chart_predictor_output_to_result,
densepose_chart_predictor_output_to_result_with_confidences,
predictor_output_with_coarse_segm_to_mask,
predictor_output_with_fine_and_coarse_segm_to_mask,
)
ToMaskConverter.register(
DensePoseChartPredictorOutput, predictor_output_with_fine_and_coarse_segm_to_mask
)
ToMaskConverter.register(
DensePoseEmbeddingPredictorOutput, predictor_output_with_coarse_segm_to_mask
)
ToChartResultConverter.register(
DensePoseChartPredictorOutput, densepose_chart_predictor_output_to_result
)
ToChartResultConverterWithConfidences.register(
DensePoseChartPredictorOutput, densepose_chart_predictor_output_to_result_with_confidences
)
HFlipConverter.register(DensePoseChartPredictorOutput, densepose_chart_predictor_output_hflip)
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from dataclasses import fields
import torch
from densepose.structures import DensePoseChartPredictorOutput, DensePoseTransformData
def densepose_chart_predictor_output_hflip(
densepose_predictor_output: DensePoseChartPredictorOutput,
transform_data: DensePoseTransformData,
) -> DensePoseChartPredictorOutput:
"""
Change to take into account a Horizontal flip.
"""
if len(densepose_predictor_output) > 0:
PredictorOutput = type(densepose_predictor_output)
output_dict = {}
for field in fields(densepose_predictor_output):
field_value = getattr(densepose_predictor_output, field.name)
# flip tensors
if isinstance(field_value, torch.Tensor):
setattr(densepose_predictor_output, field.name, torch.flip(field_value, [3]))
densepose_predictor_output = _flip_iuv_semantics_tensor(
densepose_predictor_output, transform_data
)
densepose_predictor_output = _flip_segm_semantics_tensor(
densepose_predictor_output, transform_data
)
for field in fields(densepose_predictor_output):
output_dict[field.name] = getattr(densepose_predictor_output, field.name)
return PredictorOutput(**output_dict)
else:
return densepose_predictor_output
def _flip_iuv_semantics_tensor(
densepose_predictor_output: DensePoseChartPredictorOutput,
dp_transform_data: DensePoseTransformData,
) -> DensePoseChartPredictorOutput:
point_label_symmetries = dp_transform_data.point_label_symmetries
uv_symmetries = dp_transform_data.uv_symmetries
N, C, H, W = densepose_predictor_output.u.shape
u_loc = (densepose_predictor_output.u[:, 1:, :, :].clamp(0, 1) * 255).long()
v_loc = (densepose_predictor_output.v[:, 1:, :, :].clamp(0, 1) * 255).long()
Iindex = torch.arange(C - 1, device=densepose_predictor_output.u.device)[
None, :, None, None
].expand(N, C - 1, H, W)
densepose_predictor_output.u[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc]
densepose_predictor_output.v[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc]
for el in ["fine_segm", "u", "v"]:
densepose_predictor_output.__dict__[el] = densepose_predictor_output.__dict__[el][
:, point_label_symmetries, :, :
]
return densepose_predictor_output
def _flip_segm_semantics_tensor(
densepose_predictor_output: DensePoseChartPredictorOutput, dp_transform_data
):
if densepose_predictor_output.coarse_segm.shape[1] > 2:
densepose_predictor_output.coarse_segm = densepose_predictor_output.coarse_segm[
:, dp_transform_data.mask_label_symmetries, :, :
]
return densepose_predictor_output
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from typing import Dict
import torch
from torch.nn import functional as F
from detectron2.structures.boxes import Boxes, BoxMode
from ..structures import (
DensePoseChartPredictorOutput,
DensePoseChartResult,
DensePoseChartResultWithConfidences,
)
from . import resample_fine_and_coarse_segm_to_bbox
from .base import IntTupleBox, make_int_box
def resample_uv_tensors_to_bbox(
u: torch.Tensor,
v: torch.Tensor,
labels: torch.Tensor,
box_xywh_abs: IntTupleBox,
) -> torch.Tensor:
"""
Resamples U and V coordinate estimates for the given bounding box
Args:
u (tensor [1, C, H, W] of float): U coordinates
v (tensor [1, C, H, W] of float): V coordinates
labels (tensor [H, W] of long): labels obtained by resampling segmentation
outputs for the given bounding box
box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
Return:
Resampled U and V coordinates - a tensor [2, H, W] of float
"""
x, y, w, h = box_xywh_abs
w = max(int(w), 1)
h = max(int(h), 1)
u_bbox = F.interpolate(u, (h, w), mode="bilinear", align_corners=False)
v_bbox = F.interpolate(v, (h, w), mode="bilinear", align_corners=False)
uv = torch.zeros([2, h, w], dtype=torch.float32, device=u.device)
for part_id in range(1, u_bbox.size(1)):
uv[0][labels == part_id] = u_bbox[0, part_id][labels == part_id]
uv[1][labels == part_id] = v_bbox[0, part_id][labels == part_id]
return uv
def resample_uv_to_bbox(
predictor_output: DensePoseChartPredictorOutput,
labels: torch.Tensor,
box_xywh_abs: IntTupleBox,
) -> torch.Tensor:
"""
Resamples U and V coordinate estimates for the given bounding box
Args:
predictor_output (DensePoseChartPredictorOutput): DensePose predictor
output to be resampled
labels (tensor [H, W] of long): labels obtained by resampling segmentation
outputs for the given bounding box
box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
Return:
Resampled U and V coordinates - a tensor [2, H, W] of float
"""
return resample_uv_tensors_to_bbox(
predictor_output.u,
predictor_output.v,
labels,
box_xywh_abs,
)
def densepose_chart_predictor_output_to_result(
predictor_output: DensePoseChartPredictorOutput, boxes: Boxes
) -> DensePoseChartResult:
"""
Convert densepose chart predictor outputs to results
Args:
predictor_output (DensePoseChartPredictorOutput): DensePose predictor
output to be converted to results, must contain only 1 output
boxes (Boxes): bounding box that corresponds to the predictor output,
must contain only 1 bounding box
Return:
DensePose chart-based result (DensePoseChartResult)
"""
assert len(predictor_output) == 1 and len(boxes) == 1, (
f"Predictor output to result conversion can operate only single outputs"
f", got {len(predictor_output)} predictor outputs and {len(boxes)} boxes"
)
boxes_xyxy_abs = boxes.tensor.clone()
boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
box_xywh = make_int_box(boxes_xywh_abs[0])
labels = resample_fine_and_coarse_segm_to_bbox(predictor_output, box_xywh).squeeze(0)
uv = resample_uv_to_bbox(predictor_output, labels, box_xywh)
return DensePoseChartResult(labels=labels, uv=uv)
def resample_confidences_to_bbox(
predictor_output: DensePoseChartPredictorOutput,
labels: torch.Tensor,
box_xywh_abs: IntTupleBox,
) -> Dict[str, torch.Tensor]:
"""
Resamples confidences for the given bounding box
Args:
predictor_output (DensePoseChartPredictorOutput): DensePose predictor
output to be resampled
labels (tensor [H, W] of long): labels obtained by resampling segmentation
outputs for the given bounding box
box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
Return:
Resampled confidences - a dict of [H, W] tensors of float
"""
x, y, w, h = box_xywh_abs
w = max(int(w), 1)
h = max(int(h), 1)
confidence_names = [
"sigma_1",
"sigma_2",
"kappa_u",
"kappa_v",
"fine_segm_confidence",
"coarse_segm_confidence",
]
confidence_results = {key: None for key in confidence_names}
confidence_names = [
key for key in confidence_names if getattr(predictor_output, key) is not None
]
confidence_base = torch.zeros([h, w], dtype=torch.float32, device=predictor_output.u.device)
# assign data from channels that correspond to the labels
for key in confidence_names:
resampled_confidence = F.interpolate(
getattr(predictor_output, key),
(h, w),
mode="bilinear",
align_corners=False,
)
result = confidence_base.clone()
for part_id in range(1, predictor_output.u.size(1)):
if resampled_confidence.size(1) != predictor_output.u.size(1):
# confidence is not part-based, don't try to fill it part by part
continue
result[labels == part_id] = resampled_confidence[0, part_id][labels == part_id]
if resampled_confidence.size(1) != predictor_output.u.size(1):
# confidence is not part-based, fill the data with the first channel
# (targeted for segmentation confidences that have only 1 channel)
result = resampled_confidence[0, 0]
confidence_results[key] = result
return confidence_results # pyre-ignore[7]
def densepose_chart_predictor_output_to_result_with_confidences(
predictor_output: DensePoseChartPredictorOutput, boxes: Boxes
) -> DensePoseChartResultWithConfidences:
"""
Convert densepose chart predictor outputs to results
Args:
predictor_output (DensePoseChartPredictorOutput): DensePose predictor
output with confidences to be converted to results, must contain only 1 output
boxes (Boxes): bounding box that corresponds to the predictor output,
must contain only 1 bounding box
Return:
DensePose chart-based result with confidences (DensePoseChartResultWithConfidences)
"""
assert len(predictor_output) == 1 and len(boxes) == 1, (
f"Predictor output to result conversion can operate only single outputs"
f", got {len(predictor_output)} predictor outputs and {len(boxes)} boxes"
)
boxes_xyxy_abs = boxes.tensor.clone()
boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
box_xywh = make_int_box(boxes_xywh_abs[0])
labels = resample_fine_and_coarse_segm_to_bbox(predictor_output, box_xywh).squeeze(0)
uv = resample_uv_to_bbox(predictor_output, labels, box_xywh)
confidences = resample_confidences_to_bbox(predictor_output, labels, box_xywh)
return DensePoseChartResultWithConfidences(labels=labels, uv=uv, **confidences)
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from typing import Any
from .base import BaseConverter
class HFlipConverter(BaseConverter):
"""
Converts various DensePose predictor outputs to DensePose results.
Each DensePose predictor output type has to register its convertion strategy.
"""
registry = {}
dst_type = None
@classmethod
# pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
# inconsistently.
def convert(cls, predictor_outputs: Any, transform_data: Any, *args, **kwargs):
"""
Performs an horizontal flip on DensePose predictor outputs.
Does recursive lookup for base classes, so there's no need
for explicit registration for derived classes.
Args:
predictor_outputs: DensePose predictor output to be converted to BitMasks
transform_data: Anything useful for the flip
Return:
An instance of the same type as predictor_outputs
"""
return super(HFlipConverter, cls).convert(
predictor_outputs, transform_data, *args, **kwargs
)
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from typing import Any
import torch
from torch.nn import functional as F
from detectron2.structures import BitMasks, Boxes, BoxMode
from .base import IntTupleBox, make_int_box
from .to_mask import ImageSizeType
def resample_coarse_segm_tensor_to_bbox(coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox):
"""
Resample coarse segmentation tensor to the given
bounding box and derive labels for each pixel of the bounding box
Args:
coarse_segm: float tensor of shape [1, K, Hout, Wout]
box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
corner coordinates, width (W) and height (H)
Return:
Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
"""
x, y, w, h = box_xywh_abs
w = max(int(w), 1)
h = max(int(h), 1)
labels = F.interpolate(coarse_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
return labels
def resample_fine_and_coarse_segm_tensors_to_bbox(
fine_segm: torch.Tensor, coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox
):
"""
Resample fine and coarse segmentation tensors to the given
bounding box and derive labels for each pixel of the bounding box
Args:
fine_segm: float tensor of shape [1, C, Hout, Wout]
coarse_segm: float tensor of shape [1, K, Hout, Wout]
box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
corner coordinates, width (W) and height (H)
Return:
Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
"""
x, y, w, h = box_xywh_abs
w = max(int(w), 1)
h = max(int(h), 1)
# coarse segmentation
coarse_segm_bbox = F.interpolate(
coarse_segm,
(h, w),
mode="bilinear",
align_corners=False,
).argmax(dim=1)
# combined coarse and fine segmentation
labels = (
F.interpolate(fine_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
* (coarse_segm_bbox > 0).long()
)
return labels
def resample_fine_and_coarse_segm_to_bbox(predictor_output: Any, box_xywh_abs: IntTupleBox):
"""
Resample fine and coarse segmentation outputs from a predictor to the given
bounding box and derive labels for each pixel of the bounding box
Args:
predictor_output: DensePose predictor output that contains segmentation
results to be resampled
box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
corner coordinates, width (W) and height (H)
Return:
Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
"""
return resample_fine_and_coarse_segm_tensors_to_bbox(
predictor_output.fine_segm,
predictor_output.coarse_segm,
box_xywh_abs,
)
def predictor_output_with_coarse_segm_to_mask(
predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType
) -> BitMasks:
"""
Convert predictor output with coarse and fine segmentation to a mask.
Assumes that predictor output has the following attributes:
- coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
unnormalized scores for N instances; D is the number of coarse
segmentation labels, H and W is the resolution of the estimate
Args:
predictor_output: DensePose predictor output to be converted to mask
boxes (Boxes): bounding boxes that correspond to the DensePose
predictor outputs
image_size_hw (tuple [int, int]): image height Himg and width Wimg
Return:
BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
a mask of the size of the image for each instance
"""
H, W = image_size_hw
boxes_xyxy_abs = boxes.tensor.clone()
boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
N = len(boxes_xywh_abs)
masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
for i in range(len(boxes_xywh_abs)):
box_xywh = make_int_box(boxes_xywh_abs[i])
box_mask = resample_coarse_segm_tensor_to_bbox(predictor_output[i].coarse_segm, box_xywh)
x, y, w, h = box_xywh
masks[i, y : y + h, x : x + w] = box_mask
return BitMasks(masks)
def predictor_output_with_fine_and_coarse_segm_to_mask(
predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType
) -> BitMasks:
"""
Convert predictor output with coarse and fine segmentation to a mask.
Assumes that predictor output has the following attributes:
- coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
unnormalized scores for N instances; D is the number of coarse
segmentation labels, H and W is the resolution of the estimate
- fine_segm (tensor of size [N, C, H, W]): fine segmentation
unnormalized scores for N instances; C is the number of fine
segmentation labels, H and W is the resolution of the estimate
Args:
predictor_output: DensePose predictor output to be converted to mask
boxes (Boxes): bounding boxes that correspond to the DensePose
predictor outputs
image_size_hw (tuple [int, int]): image height Himg and width Wimg
Return:
BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
a mask of the size of the image for each instance
"""
H, W = image_size_hw
boxes_xyxy_abs = boxes.tensor.clone()
boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
N = len(boxes_xywh_abs)
masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
for i in range(len(boxes_xywh_abs)):
box_xywh = make_int_box(boxes_xywh_abs[i])
labels_i = resample_fine_and_coarse_segm_to_bbox(predictor_output[i], box_xywh)
x, y, w, h = box_xywh
masks[i, y : y + h, x : x + w] = labels_i > 0
return BitMasks(masks)
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from typing import Any
from detectron2.structures import Boxes
from ..structures import DensePoseChartResult, DensePoseChartResultWithConfidences
from .base import BaseConverter
class ToChartResultConverter(BaseConverter):
"""
Converts various DensePose predictor outputs to DensePose results.
Each DensePose predictor output type has to register its convertion strategy.
"""
registry = {}
dst_type = DensePoseChartResult
@classmethod
# pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
# inconsistently.
def convert(cls, predictor_outputs: Any, boxes: Boxes, *args, **kwargs) -> DensePoseChartResult:
"""
Convert DensePose predictor outputs to DensePoseResult using some registered
converter. Does recursive lookup for base classes, so there's no need
for explicit registration for derived classes.
Args:
densepose_predictor_outputs: DensePose predictor output to be
converted to BitMasks
boxes (Boxes): bounding boxes that correspond to the DensePose
predictor outputs
Return:
An instance of DensePoseResult. If no suitable converter was found, raises KeyError
"""
return super(ToChartResultConverter, cls).convert(predictor_outputs, boxes, *args, **kwargs)
class ToChartResultConverterWithConfidences(BaseConverter):
"""
Converts various DensePose predictor outputs to DensePose results.
Each DensePose predictor output type has to register its convertion strategy.
"""
registry = {}
dst_type = DensePoseChartResultWithConfidences
@classmethod
# pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
# inconsistently.
def convert(
cls, predictor_outputs: Any, boxes: Boxes, *args, **kwargs
) -> DensePoseChartResultWithConfidences:
"""
Convert DensePose predictor outputs to DensePoseResult with confidences
using some registered converter. Does recursive lookup for base classes,
so there's no need for explicit registration for derived classes.
Args:
densepose_predictor_outputs: DensePose predictor output with confidences
to be converted to BitMasks
boxes (Boxes): bounding boxes that correspond to the DensePose
predictor outputs
Return:
An instance of DensePoseResult. If no suitable converter was found, raises KeyError
"""
return super(ToChartResultConverterWithConfidences, cls).convert(
predictor_outputs, boxes, *args, **kwargs
)
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from typing import Any, Tuple
from detectron2.structures import BitMasks, Boxes
from .base import BaseConverter
ImageSizeType = Tuple[int, int]
class ToMaskConverter(BaseConverter):
"""
Converts various DensePose predictor outputs to masks
in bit mask format (see `BitMasks`). Each DensePose predictor output type
has to register its convertion strategy.
"""
registry = {}
dst_type = BitMasks
@classmethod
# pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
# inconsistently.
def convert(
cls,
densepose_predictor_outputs: Any,
boxes: Boxes,
image_size_hw: ImageSizeType,
*args,
**kwargs
) -> BitMasks:
"""
Convert DensePose predictor outputs to BitMasks using some registered
converter. Does recursive lookup for base classes, so there's no need
for explicit registration for derived classes.
Args:
densepose_predictor_outputs: DensePose predictor output to be
converted to BitMasks
boxes (Boxes): bounding boxes that correspond to the DensePose
predictor outputs
image_size_hw (tuple [int, int]): image height and width
Return:
An instance of `BitMasks`. If no suitable converter was found, raises KeyError
"""
return super(ToMaskConverter, cls).convert(
densepose_predictor_outputs, boxes, image_size_hw, *args, **kwargs
)
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from .meshes import builtin
from .build import (
build_detection_test_loader,
build_detection_train_loader,
build_combined_loader,
build_frame_selector,
build_inference_based_loaders,
has_inference_based_loaders,
BootstrapDatasetFactoryCatalog,
)
from .combined_loader import CombinedDataLoader
from .dataset_mapper import DatasetMapper
from .inference_based_loader import InferenceBasedLoader, ScoreBasedFilter
from .image_list_dataset import ImageListDataset
from .utils import is_relative_local_path, maybe_prepend_base_path
# ensure the builtin datasets are registered
from . import datasets
# ensure the bootstrap datasets builders are registered
from . import build
__all__ = [k for k in globals().keys() if not k.startswith("_")]
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
import itertools
import logging
import numpy as np
from collections import UserDict, defaultdict
from dataclasses import dataclass
from typing import Any, Callable, Collection, Dict, Iterable, List, Optional, Sequence, Tuple
import torch
from torch.utils.data.dataset import Dataset
from detectron2.config import CfgNode
from detectron2.data.build import build_detection_test_loader as d2_build_detection_test_loader
from detectron2.data.build import build_detection_train_loader as d2_build_detection_train_loader
from detectron2.data.build import (
load_proposals_into_dataset,
print_instances_class_histogram,
trivial_batch_collator,
worker_init_reset_seed,
)
from detectron2.data.catalog import DatasetCatalog, Metadata, MetadataCatalog
from detectron2.data.samplers import TrainingSampler
from detectron2.utils.comm import get_world_size
from densepose.config import get_bootstrap_dataset_config
from densepose.modeling import build_densepose_embedder
from .combined_loader import CombinedDataLoader, Loader
from .dataset_mapper import DatasetMapper
from .datasets.coco import DENSEPOSE_CSE_KEYS_WITHOUT_MASK, DENSEPOSE_IUV_KEYS_WITHOUT_MASK
from .datasets.dataset_type import DatasetType
from .inference_based_loader import InferenceBasedLoader, ScoreBasedFilter
from .samplers import (
DensePoseConfidenceBasedSampler,
DensePoseCSEConfidenceBasedSampler,
DensePoseCSEUniformSampler,
DensePoseUniformSampler,
MaskFromDensePoseSampler,
PredictionToGroundTruthSampler,
)
from .transform import ImageResizeTransform
from .utils import get_category_to_class_mapping, get_class_to_mesh_name_mapping
from .video import (
FirstKFramesSelector,
FrameSelectionStrategy,
LastKFramesSelector,
RandomKFramesSelector,
VideoKeyframeDataset,
video_list_from_file,
)
__all__ = ["build_detection_train_loader", "build_detection_test_loader"]
Instance = Dict[str, Any]
InstancePredicate = Callable[[Instance], bool]
def _compute_num_images_per_worker(cfg: CfgNode) -> int:
num_workers = get_world_size()
images_per_batch = cfg.SOLVER.IMS_PER_BATCH
assert (
images_per_batch % num_workers == 0
), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
images_per_batch, num_workers
)
assert (
images_per_batch >= num_workers
), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
images_per_batch, num_workers
)
images_per_worker = images_per_batch // num_workers
return images_per_worker
def _map_category_id_to_contiguous_id(dataset_name: str, dataset_dicts: Iterable[Instance]) -> None:
meta = MetadataCatalog.get(dataset_name)
for dataset_dict in dataset_dicts:
for ann in dataset_dict["annotations"]:
ann["category_id"] = meta.thing_dataset_id_to_contiguous_id[ann["category_id"]]
@dataclass
class _DatasetCategory:
"""
Class representing category data in a dataset:
- id: category ID, as specified in the dataset annotations file
- name: category name, as specified in the dataset annotations file
- mapped_id: category ID after applying category maps (DATASETS.CATEGORY_MAPS config option)
- mapped_name: category name after applying category maps
- dataset_name: dataset in which the category is defined
For example, when training models in a class-agnostic manner, one could take LVIS 1.0
dataset and map the animal categories to the same category as human data from COCO:
id = 225
name = "cat"
mapped_id = 1
mapped_name = "person"
dataset_name = "lvis_v1_animals_dp_train"
"""
id: int
name: str
mapped_id: int
mapped_name: str
dataset_name: str
_MergedCategoriesT = Dict[int, List[_DatasetCategory]]
def _add_category_id_to_contiguous_id_maps_to_metadata(
merged_categories: _MergedCategoriesT,
) -> None:
merged_categories_per_dataset = {}
for contiguous_cat_id, cat_id in enumerate(sorted(merged_categories.keys())):
for cat in merged_categories[cat_id]:
if cat.dataset_name not in merged_categories_per_dataset:
merged_categories_per_dataset[cat.dataset_name] = defaultdict(list)
merged_categories_per_dataset[cat.dataset_name][cat_id].append(
(
contiguous_cat_id,
cat,
)
)
logger = logging.getLogger(__name__)
for dataset_name, merged_categories in merged_categories_per_dataset.items():
meta = MetadataCatalog.get(dataset_name)
if not hasattr(meta, "thing_classes"):
meta.thing_classes = []
meta.thing_dataset_id_to_contiguous_id = {}
meta.thing_dataset_id_to_merged_id = {}
else:
meta.thing_classes.clear()
meta.thing_dataset_id_to_contiguous_id.clear()
meta.thing_dataset_id_to_merged_id.clear()
logger.info(f"Dataset {dataset_name}: category ID to contiguous ID mapping:")
for _cat_id, categories in sorted(merged_categories.items()):
added_to_thing_classes = False
for contiguous_cat_id, cat in categories:
if not added_to_thing_classes:
meta.thing_classes.append(cat.mapped_name)
added_to_thing_classes = True
meta.thing_dataset_id_to_contiguous_id[cat.id] = contiguous_cat_id
meta.thing_dataset_id_to_merged_id[cat.id] = cat.mapped_id
logger.info(f"{cat.id} ({cat.name}) -> {contiguous_cat_id}")
def _maybe_create_general_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
def has_annotations(instance: Instance) -> bool:
return "annotations" in instance
def has_only_crowd_anotations(instance: Instance) -> bool:
for ann in instance["annotations"]:
if ann.get("is_crowd", 0) == 0:
return False
return True
def general_keep_instance_predicate(instance: Instance) -> bool:
return has_annotations(instance) and not has_only_crowd_anotations(instance)
if not cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS:
return None
return general_keep_instance_predicate
def _maybe_create_keypoints_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
min_num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
def has_sufficient_num_keypoints(instance: Instance) -> bool:
num_kpts = sum(
(np.array(ann["keypoints"][2::3]) > 0).sum()
for ann in instance["annotations"]
if "keypoints" in ann
)
return num_kpts >= min_num_keypoints
if cfg.MODEL.KEYPOINT_ON and (min_num_keypoints > 0):
return has_sufficient_num_keypoints
return None
def _maybe_create_mask_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
if not cfg.MODEL.MASK_ON:
return None
def has_mask_annotations(instance: Instance) -> bool:
return any("segmentation" in ann for ann in instance["annotations"])
return has_mask_annotations
def _maybe_create_densepose_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
if not cfg.MODEL.DENSEPOSE_ON:
return None
use_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
def has_densepose_annotations(instance: Instance) -> bool:
for ann in instance["annotations"]:
if all(key in ann for key in DENSEPOSE_IUV_KEYS_WITHOUT_MASK) or all(
key in ann for key in DENSEPOSE_CSE_KEYS_WITHOUT_MASK
):
return True
if use_masks and "segmentation" in ann:
return True
return False
return has_densepose_annotations
def _maybe_create_specific_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
specific_predicate_creators = [
_maybe_create_keypoints_keep_instance_predicate,
_maybe_create_mask_keep_instance_predicate,
_maybe_create_densepose_keep_instance_predicate,
]
predicates = [creator(cfg) for creator in specific_predicate_creators]
predicates = [p for p in predicates if p is not None]
if not predicates:
return None
def combined_predicate(instance: Instance) -> bool:
return any(p(instance) for p in predicates)
return combined_predicate
def _get_train_keep_instance_predicate(cfg: CfgNode):
general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
combined_specific_keep_predicate = _maybe_create_specific_keep_instance_predicate(cfg)
def combined_general_specific_keep_predicate(instance: Instance) -> bool:
return general_keep_predicate(instance) and combined_specific_keep_predicate(instance)
if (general_keep_predicate is None) and (combined_specific_keep_predicate is None):
return None
if general_keep_predicate is None:
return combined_specific_keep_predicate
if combined_specific_keep_predicate is None:
return general_keep_predicate
return combined_general_specific_keep_predicate
def _get_test_keep_instance_predicate(cfg: CfgNode):
general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
return general_keep_predicate
def _maybe_filter_and_map_categories(
dataset_name: str, dataset_dicts: List[Instance]
) -> List[Instance]:
meta = MetadataCatalog.get(dataset_name)
category_id_map = meta.thing_dataset_id_to_contiguous_id
filtered_dataset_dicts = []
for dataset_dict in dataset_dicts:
anns = []
for ann in dataset_dict["annotations"]:
cat_id = ann["category_id"]
if cat_id not in category_id_map:
continue
ann["category_id"] = category_id_map[cat_id]
anns.append(ann)
dataset_dict["annotations"] = anns
filtered_dataset_dicts.append(dataset_dict)
return filtered_dataset_dicts
def _add_category_whitelists_to_metadata(cfg: CfgNode) -> None:
for dataset_name, whitelisted_cat_ids in cfg.DATASETS.WHITELISTED_CATEGORIES.items():
meta = MetadataCatalog.get(dataset_name)
meta.whitelisted_categories = whitelisted_cat_ids
logger = logging.getLogger(__name__)
logger.info(
"Whitelisted categories for dataset {}: {}".format(
dataset_name, meta.whitelisted_categories
)
)
def _add_category_maps_to_metadata(cfg: CfgNode) -> None:
for dataset_name, category_map in cfg.DATASETS.CATEGORY_MAPS.items():
category_map = {
int(cat_id_src): int(cat_id_dst) for cat_id_src, cat_id_dst in category_map.items()
}
meta = MetadataCatalog.get(dataset_name)
meta.category_map = category_map
logger = logging.getLogger(__name__)
logger.info("Category maps for dataset {}: {}".format(dataset_name, meta.category_map))
def _add_category_info_to_bootstrapping_metadata(dataset_name: str, dataset_cfg: CfgNode) -> None:
meta = MetadataCatalog.get(dataset_name)
meta.category_to_class_mapping = get_category_to_class_mapping(dataset_cfg)
meta.categories = dataset_cfg.CATEGORIES
meta.max_count_per_category = dataset_cfg.MAX_COUNT_PER_CATEGORY
logger = logging.getLogger(__name__)
logger.info(
"Category to class mapping for dataset {}: {}".format(
dataset_name, meta.category_to_class_mapping
)
)
def _maybe_add_class_to_mesh_name_map_to_metadata(dataset_names: List[str], cfg: CfgNode) -> None:
for dataset_name in dataset_names:
meta = MetadataCatalog.get(dataset_name)
if not hasattr(meta, "class_to_mesh_name"):
meta.class_to_mesh_name = get_class_to_mesh_name_mapping(cfg)
def _merge_categories(dataset_names: Collection[str]) -> _MergedCategoriesT:
merged_categories = defaultdict(list)
category_names = {}
for dataset_name in dataset_names:
meta = MetadataCatalog.get(dataset_name)
whitelisted_categories = meta.get("whitelisted_categories")
category_map = meta.get("category_map", {})
cat_ids = (
whitelisted_categories if whitelisted_categories is not None else meta.categories.keys()
)
for cat_id in cat_ids:
cat_name = meta.categories[cat_id]
cat_id_mapped = category_map.get(cat_id, cat_id)
if cat_id_mapped == cat_id or cat_id_mapped in cat_ids:
category_names[cat_id] = cat_name
else:
category_names[cat_id] = str(cat_id_mapped)
# assign temporary mapped category name, this name can be changed
# during the second pass, since mapped ID can correspond to a category
# from a different dataset
cat_name_mapped = meta.categories[cat_id_mapped]
merged_categories[cat_id_mapped].append(
_DatasetCategory(
id=cat_id,
name=cat_name,
mapped_id=cat_id_mapped,
mapped_name=cat_name_mapped,
dataset_name=dataset_name,
)
)
# second pass to assign proper mapped category names
for cat_id, categories in merged_categories.items():
for cat in categories:
if cat_id in category_names and cat.mapped_name != category_names[cat_id]:
cat.mapped_name = category_names[cat_id]
return merged_categories
def _warn_if_merged_different_categories(merged_categories: _MergedCategoriesT) -> None:
logger = logging.getLogger(__name__)
for cat_id in merged_categories:
merged_categories_i = merged_categories[cat_id]
first_cat_name = merged_categories_i[0].name
if len(merged_categories_i) > 1 and not all(
cat.name == first_cat_name for cat in merged_categories_i[1:]
):
cat_summary_str = ", ".join(
[f"{cat.id} ({cat.name}) from {cat.dataset_name}" for cat in merged_categories_i]
)
logger.warning(
f"Merged category {cat_id} corresponds to the following categories: "
f"{cat_summary_str}"
)
def combine_detection_dataset_dicts(
dataset_names: Collection[str],
keep_instance_predicate: Optional[InstancePredicate] = None,
proposal_files: Optional[Collection[str]] = None,
) -> List[Instance]:
"""
Load and prepare dataset dicts for training / testing
Args:
dataset_names (Collection[str]): a list of dataset names
keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate
applied to instance dicts which defines whether to keep the instance
proposal_files (Collection[str]): if given, a list of object proposal files
that match each dataset in `dataset_names`.
"""
assert len(dataset_names)
if proposal_files is None:
proposal_files = [None] * len(dataset_names)
assert len(dataset_names) == len(proposal_files)
# load datasets and metadata
dataset_name_to_dicts = {}
for dataset_name in dataset_names:
dataset_name_to_dicts[dataset_name] = DatasetCatalog.get(dataset_name)
assert len(dataset_name_to_dicts), f"Dataset '{dataset_name}' is empty!"
# merge categories, requires category metadata to be loaded
# cat_id -> [(orig_cat_id, cat_name, dataset_name)]
merged_categories = _merge_categories(dataset_names)
_warn_if_merged_different_categories(merged_categories)
merged_category_names = [
merged_categories[cat_id][0].mapped_name for cat_id in sorted(merged_categories)
]
# map to contiguous category IDs
_add_category_id_to_contiguous_id_maps_to_metadata(merged_categories)
# load annotations and dataset metadata
for dataset_name, proposal_file in zip(dataset_names, proposal_files):
dataset_dicts = dataset_name_to_dicts[dataset_name]
assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!"
if proposal_file is not None:
dataset_dicts = load_proposals_into_dataset(dataset_dicts, proposal_file)
dataset_dicts = _maybe_filter_and_map_categories(dataset_name, dataset_dicts)
print_instances_class_histogram(dataset_dicts, merged_category_names)
dataset_name_to_dicts[dataset_name] = dataset_dicts
if keep_instance_predicate is not None:
all_datasets_dicts_plain = [
d
for d in itertools.chain.from_iterable(dataset_name_to_dicts.values())
if keep_instance_predicate(d)
]
else:
all_datasets_dicts_plain = list(
itertools.chain.from_iterable(dataset_name_to_dicts.values())
)
return all_datasets_dicts_plain
def build_detection_train_loader(cfg: CfgNode, mapper=None):
"""
A data loader is created in a way similar to that of Detectron2.
The main differences are:
- it allows to combine datasets with different but compatible object category sets
The data loader is created by the following steps:
1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts.
2. Start workers to work on the dicts. Each worker will:
* Map each metadata dict into another format to be consumed by the model.
* Batch them by simply putting dicts into a list.
The batched ``list[mapped_dict]`` is what this dataloader will return.
Args:
cfg (CfgNode): the config
mapper (callable): a callable which takes a sample (dict) from dataset and
returns the format to be consumed by the model.
By default it will be `DatasetMapper(cfg, True)`.
Returns:
an infinite iterator of training data
"""
_add_category_whitelists_to_metadata(cfg)
_add_category_maps_to_metadata(cfg)
_maybe_add_class_to_mesh_name_map_to_metadata(cfg.DATASETS.TRAIN, cfg)
dataset_dicts = combine_detection_dataset_dicts(
cfg.DATASETS.TRAIN,
keep_instance_predicate=_get_train_keep_instance_predicate(cfg),
proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
)
if mapper is None:
mapper = DatasetMapper(cfg, True)
return d2_build_detection_train_loader(cfg, dataset=dataset_dicts, mapper=mapper)
def build_detection_test_loader(cfg, dataset_name, mapper=None):
"""
Similar to `build_detection_train_loader`.
But this function uses the given `dataset_name` argument (instead of the names in cfg),
and uses batch size 1.
Args:
cfg: a detectron2 CfgNode
dataset_name (str): a name of the dataset that's available in the DatasetCatalog
mapper (callable): a callable which takes a sample (dict) from dataset
and returns the format to be consumed by the model.
By default it will be `DatasetMapper(cfg, False)`.
Returns:
DataLoader: a torch DataLoader, that loads the given detection
dataset, with test-time transformation and batching.
"""
_add_category_whitelists_to_metadata(cfg)
_add_category_maps_to_metadata(cfg)
_maybe_add_class_to_mesh_name_map_to_metadata([dataset_name], cfg)
dataset_dicts = combine_detection_dataset_dicts(
[dataset_name],
keep_instance_predicate=_get_test_keep_instance_predicate(cfg),
proposal_files=(
[cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)]]
if cfg.MODEL.LOAD_PROPOSALS
else None
),
)
sampler = None
if not cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE:
sampler = torch.utils.data.SequentialSampler(dataset_dicts)
if mapper is None:
mapper = DatasetMapper(cfg, False)
return d2_build_detection_test_loader(
dataset_dicts, mapper=mapper, num_workers=cfg.DATALOADER.NUM_WORKERS, sampler=sampler
)
def build_frame_selector(cfg: CfgNode):
strategy = FrameSelectionStrategy(cfg.STRATEGY)
if strategy == FrameSelectionStrategy.RANDOM_K:
frame_selector = RandomKFramesSelector(cfg.NUM_IMAGES)
elif strategy == FrameSelectionStrategy.FIRST_K:
frame_selector = FirstKFramesSelector(cfg.NUM_IMAGES)
elif strategy == FrameSelectionStrategy.LAST_K:
frame_selector = LastKFramesSelector(cfg.NUM_IMAGES)
elif strategy == FrameSelectionStrategy.ALL:
frame_selector = None
# pyre-fixme[61]: `frame_selector` may not be initialized here.
return frame_selector
def build_transform(cfg: CfgNode, data_type: str):
if cfg.TYPE == "resize":
if data_type == "image":
return ImageResizeTransform(cfg.MIN_SIZE, cfg.MAX_SIZE)
raise ValueError(f"Unknown transform {cfg.TYPE} for data type {data_type}")
def build_combined_loader(cfg: CfgNode, loaders: Collection[Loader], ratios: Sequence[float]):
images_per_worker = _compute_num_images_per_worker(cfg)
return CombinedDataLoader(loaders, images_per_worker, ratios)
def build_bootstrap_dataset(dataset_name: str, cfg: CfgNode) -> Sequence[torch.Tensor]:
"""
Build dataset that provides data to bootstrap on
Args:
dataset_name (str): Name of the dataset, needs to have associated metadata
to load the data
cfg (CfgNode): bootstrapping config
Returns:
Sequence[Tensor] - dataset that provides image batches, Tensors of size
[N, C, H, W] of type float32
"""
logger = logging.getLogger(__name__)
_add_category_info_to_bootstrapping_metadata(dataset_name, cfg)
meta = MetadataCatalog.get(dataset_name)
factory = BootstrapDatasetFactoryCatalog.get(meta.dataset_type)
dataset = None
if factory is not None:
dataset = factory(meta, cfg)
if dataset is None:
logger.warning(f"Failed to create dataset {dataset_name} of type {meta.dataset_type}")
return dataset
def build_data_sampler(cfg: CfgNode, sampler_cfg: CfgNode, embedder: Optional[torch.nn.Module]):
if sampler_cfg.TYPE == "densepose_uniform":
data_sampler = PredictionToGroundTruthSampler()
# transform densepose pred -> gt
data_sampler.register_sampler(
"pred_densepose",
"gt_densepose",
DensePoseUniformSampler(count_per_class=sampler_cfg.COUNT_PER_CLASS),
)
data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler())
return data_sampler
elif sampler_cfg.TYPE == "densepose_UV_confidence":
data_sampler = PredictionToGroundTruthSampler()
# transform densepose pred -> gt
data_sampler.register_sampler(
"pred_densepose",
"gt_densepose",
DensePoseConfidenceBasedSampler(
confidence_channel="sigma_2",
count_per_class=sampler_cfg.COUNT_PER_CLASS,
search_proportion=0.5,
),
)
data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler())
return data_sampler
elif sampler_cfg.TYPE == "densepose_fine_segm_confidence":
data_sampler = PredictionToGroundTruthSampler()
# transform densepose pred -> gt
data_sampler.register_sampler(
"pred_densepose",
"gt_densepose",
DensePoseConfidenceBasedSampler(
confidence_channel="fine_segm_confidence",
count_per_class=sampler_cfg.COUNT_PER_CLASS,
search_proportion=0.5,
),
)
data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler())
return data_sampler
elif sampler_cfg.TYPE == "densepose_coarse_segm_confidence":
data_sampler = PredictionToGroundTruthSampler()
# transform densepose pred -> gt
data_sampler.register_sampler(
"pred_densepose",
"gt_densepose",
DensePoseConfidenceBasedSampler(
confidence_channel="coarse_segm_confidence",
count_per_class=sampler_cfg.COUNT_PER_CLASS,
search_proportion=0.5,
),
)
data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler())
return data_sampler
elif sampler_cfg.TYPE == "densepose_cse_uniform":
assert embedder is not None
data_sampler = PredictionToGroundTruthSampler()
# transform densepose pred -> gt
data_sampler.register_sampler(
"pred_densepose",
"gt_densepose",
DensePoseCSEUniformSampler(
cfg=cfg,
use_gt_categories=sampler_cfg.USE_GROUND_TRUTH_CATEGORIES,
embedder=embedder,
count_per_class=sampler_cfg.COUNT_PER_CLASS,
),
)
data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler())
return data_sampler
elif sampler_cfg.TYPE == "densepose_cse_coarse_segm_confidence":
assert embedder is not None
data_sampler = PredictionToGroundTruthSampler()
# transform densepose pred -> gt
data_sampler.register_sampler(
"pred_densepose",
"gt_densepose",
DensePoseCSEConfidenceBasedSampler(
cfg=cfg,
use_gt_categories=sampler_cfg.USE_GROUND_TRUTH_CATEGORIES,
embedder=embedder,
confidence_channel="coarse_segm_confidence",
count_per_class=sampler_cfg.COUNT_PER_CLASS,
search_proportion=0.5,
),
)
data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler())
return data_sampler
raise ValueError(f"Unknown data sampler type {sampler_cfg.TYPE}")
def build_data_filter(cfg: CfgNode):
if cfg.TYPE == "detection_score":
min_score = cfg.MIN_VALUE
return ScoreBasedFilter(min_score=min_score)
raise ValueError(f"Unknown data filter type {cfg.TYPE}")
def build_inference_based_loader(
cfg: CfgNode,
dataset_cfg: CfgNode,
model: torch.nn.Module,
embedder: Optional[torch.nn.Module] = None,
) -> InferenceBasedLoader:
"""
Constructs data loader based on inference results of a model.
"""
dataset = build_bootstrap_dataset(dataset_cfg.DATASET, dataset_cfg.IMAGE_LOADER)
meta = MetadataCatalog.get(dataset_cfg.DATASET)
training_sampler = TrainingSampler(len(dataset))
data_loader = torch.utils.data.DataLoader(
dataset, # pyre-ignore[6]
batch_size=dataset_cfg.IMAGE_LOADER.BATCH_SIZE,
sampler=training_sampler,
num_workers=dataset_cfg.IMAGE_LOADER.NUM_WORKERS,
collate_fn=trivial_batch_collator,
worker_init_fn=worker_init_reset_seed,
)
return InferenceBasedLoader(
model,
data_loader=data_loader,
data_sampler=build_data_sampler(cfg, dataset_cfg.DATA_SAMPLER, embedder),
data_filter=build_data_filter(dataset_cfg.FILTER),
shuffle=True,
batch_size=dataset_cfg.INFERENCE.OUTPUT_BATCH_SIZE,
inference_batch_size=dataset_cfg.INFERENCE.INPUT_BATCH_SIZE,
category_to_class_mapping=meta.category_to_class_mapping,
)
def has_inference_based_loaders(cfg: CfgNode) -> bool:
"""
Returns True, if at least one inferense-based loader must
be instantiated for training
"""
return len(cfg.BOOTSTRAP_DATASETS) > 0
def build_inference_based_loaders(
cfg: CfgNode, model: torch.nn.Module
) -> Tuple[List[InferenceBasedLoader], List[float]]:
loaders = []
ratios = []
embedder = build_densepose_embedder(cfg).to(device=model.device) # pyre-ignore[16]
for dataset_spec in cfg.BOOTSTRAP_DATASETS:
dataset_cfg = get_bootstrap_dataset_config().clone()
dataset_cfg.merge_from_other_cfg(CfgNode(dataset_spec))
loader = build_inference_based_loader(cfg, dataset_cfg, model, embedder)
loaders.append(loader)
ratios.append(dataset_cfg.RATIO)
return loaders, ratios
def build_video_list_dataset(meta: Metadata, cfg: CfgNode):
video_list_fpath = meta.video_list_fpath
video_base_path = meta.video_base_path
category = meta.category
if cfg.TYPE == "video_keyframe":
frame_selector = build_frame_selector(cfg.SELECT)
transform = build_transform(cfg.TRANSFORM, data_type="image")
video_list = video_list_from_file(video_list_fpath, video_base_path)
keyframe_helper_fpath = getattr(cfg, "KEYFRAME_HELPER", None)
return VideoKeyframeDataset(
video_list, category, frame_selector, transform, keyframe_helper_fpath
)
class _BootstrapDatasetFactoryCatalog(UserDict):
"""
A global dictionary that stores information about bootstrapped datasets creation functions
from metadata and config, for diverse DatasetType
"""
def register(self, dataset_type: DatasetType, factory: Callable[[Metadata, CfgNode], Dataset]):
"""
Args:
dataset_type (DatasetType): a DatasetType e.g. DatasetType.VIDEO_LIST
factory (Callable[Metadata, CfgNode]): a callable which takes Metadata and cfg
arguments and returns a dataset object.
"""
assert dataset_type not in self, "Dataset '{}' is already registered!".format(dataset_type)
self[dataset_type] = factory
BootstrapDatasetFactoryCatalog = _BootstrapDatasetFactoryCatalog()
BootstrapDatasetFactoryCatalog.register(DatasetType.VIDEO_LIST, build_video_list_dataset)
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
import random
from collections import deque
from typing import Any, Collection, Deque, Iterable, Iterator, List, Sequence
Loader = Iterable[Any]
def _pooled_next(iterator: Iterator[Any], pool: Deque[Any]):
if not pool:
pool.extend(next(iterator))
return pool.popleft()
class CombinedDataLoader:
"""
Combines data loaders using the provided sampling ratios
"""
BATCH_COUNT = 100
def __init__(self, loaders: Collection[Loader], batch_size: int, ratios: Sequence[float]):
self.loaders = loaders
self.batch_size = batch_size
self.ratios = ratios
def __iter__(self) -> Iterator[List[Any]]:
iters = [iter(loader) for loader in self.loaders]
indices = []
pool = [deque()] * len(iters)
# infinite iterator, as in D2
while True:
if not indices:
# just a buffer of indices, its size doesn't matter
# as long as it's a multiple of batch_size
k = self.batch_size * self.BATCH_COUNT
indices = random.choices(range(len(self.loaders)), self.ratios, k=k)
try:
batch = [_pooled_next(iters[i], pool[i]) for i in indices[: self.batch_size]]
except StopIteration:
break
indices = indices[self.batch_size :]
yield batch
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
import copy
import logging
from typing import Any, Dict, List, Tuple
import torch
from detectron2.data import MetadataCatalog
from detectron2.data import detection_utils as utils
from detectron2.data import transforms as T
from detectron2.layers import ROIAlign
from detectron2.structures import BoxMode
from detectron2.utils.file_io import PathManager
from densepose.structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
def build_augmentation(cfg, is_train):
logger = logging.getLogger(__name__)
result = utils.build_augmentation(cfg, is_train)
if is_train:
random_rotation = T.RandomRotation(
cfg.INPUT.ROTATION_ANGLES, expand=False, sample_style="choice"
)
result.append(random_rotation)
logger.info("DensePose-specific augmentation used in training: " + str(random_rotation))
return result
class DatasetMapper:
"""
A customized version of `detectron2.data.DatasetMapper`
"""
def __init__(self, cfg, is_train=True):
self.augmentation = build_augmentation(cfg, is_train)
# fmt: off
self.img_format = cfg.INPUT.FORMAT
self.mask_on = (
cfg.MODEL.MASK_ON or (
cfg.MODEL.DENSEPOSE_ON
and cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS)
)
self.keypoint_on = cfg.MODEL.KEYPOINT_ON
self.densepose_on = cfg.MODEL.DENSEPOSE_ON
assert not cfg.MODEL.LOAD_PROPOSALS, "not supported yet"
# fmt: on
if self.keypoint_on and is_train:
# Flip only makes sense in training
self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
else:
self.keypoint_hflip_indices = None
if self.densepose_on:
densepose_transform_srcs = [
MetadataCatalog.get(ds).densepose_transform_src
for ds in cfg.DATASETS.TRAIN + cfg.DATASETS.TEST
]
assert len(densepose_transform_srcs) > 0
# TODO: check that DensePose transformation data is the same for
# all the datasets. Otherwise one would have to pass DB ID with
# each entry to select proper transformation data. For now, since
# all DensePose annotated data uses the same data semantics, we
# omit this check.
densepose_transform_data_fpath = PathManager.get_local_path(densepose_transform_srcs[0])
self.densepose_transform_data = DensePoseTransformData.load(
densepose_transform_data_fpath
)
self.is_train = is_train
def __call__(self, dataset_dict):
"""
Args:
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
Returns:
dict: a format that builtin models in detectron2 accept
"""
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
utils.check_image_size(dataset_dict, image)
image, transforms = T.apply_transform_gens(self.augmentation, image)
image_shape = image.shape[:2] # h, w
dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
if not self.is_train:
dataset_dict.pop("annotations", None)
return dataset_dict
for anno in dataset_dict["annotations"]:
if not self.mask_on:
anno.pop("segmentation", None)
if not self.keypoint_on:
anno.pop("keypoints", None)
# USER: Implement additional transformations if you have other types of data
# USER: Don't call transpose_densepose if you don't need
annos = [
self._transform_densepose(
utils.transform_instance_annotations(
obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
),
transforms,
)
for obj in dataset_dict.pop("annotations")
if obj.get("iscrowd", 0) == 0
]
if self.mask_on:
self._add_densepose_masks_as_segmentation(annos, image_shape)
instances = utils.annotations_to_instances(annos, image_shape, mask_format="bitmask")
densepose_annotations = [obj.get("densepose") for obj in annos]
if densepose_annotations and not all(v is None for v in densepose_annotations):
instances.gt_densepose = DensePoseList(
densepose_annotations, instances.gt_boxes, image_shape
)
dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()]
return dataset_dict
def _transform_densepose(self, annotation, transforms):
if not self.densepose_on:
return annotation
# Handle densepose annotations
is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
if is_valid:
densepose_data = DensePoseDataRelative(annotation, cleanup=True)
densepose_data.apply_transform(transforms, self.densepose_transform_data)
annotation["densepose"] = densepose_data
else:
# logger = logging.getLogger(__name__)
# logger.debug("Could not load DensePose annotation: {}".format(reason_not_valid))
DensePoseDataRelative.cleanup_annotation(annotation)
# NOTE: annotations for certain instances may be unavailable.
# 'None' is accepted by the DensePostList data structure.
annotation["densepose"] = None
return annotation
def _add_densepose_masks_as_segmentation(
self, annotations: List[Dict[str, Any]], image_shape_hw: Tuple[int, int]
):
for obj in annotations:
if ("densepose" not in obj) or ("segmentation" in obj):
continue
# DP segmentation: torch.Tensor [S, S] of float32, S=256
segm_dp = torch.zeros_like(obj["densepose"].segm)
segm_dp[obj["densepose"].segm > 0] = 1
segm_h, segm_w = segm_dp.shape
bbox_segm_dp = torch.tensor((0, 0, segm_h - 1, segm_w - 1), dtype=torch.float32)
# image bbox
x0, y0, x1, y1 = (
v.item() for v in BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)
)
segm_aligned = (
ROIAlign((y1 - y0, x1 - x0), 1.0, 0, aligned=True)
.forward(segm_dp.view(1, 1, *segm_dp.shape), bbox_segm_dp)
.squeeze()
)
image_mask = torch.zeros(*image_shape_hw, dtype=torch.float32)
image_mask[y0:y1, x0:x1] = segm_aligned
# segmentation for BitMask: np.array [H, W] of bool
obj["segmentation"] = image_mask >= 0.5
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from . import builtin # ensure the builtin datasets are registered
__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")]
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from .chimpnsee import register_dataset as register_chimpnsee_dataset
from .coco import BASE_DATASETS as BASE_COCO_DATASETS
from .coco import DATASETS as COCO_DATASETS
from .coco import register_datasets as register_coco_datasets
from .lvis import DATASETS as LVIS_DATASETS
from .lvis import register_datasets as register_lvis_datasets
DEFAULT_DATASETS_ROOT = "datasets"
register_coco_datasets(COCO_DATASETS, DEFAULT_DATASETS_ROOT)
register_coco_datasets(BASE_COCO_DATASETS, DEFAULT_DATASETS_ROOT)
register_lvis_datasets(LVIS_DATASETS, DEFAULT_DATASETS_ROOT)
register_chimpnsee_dataset(DEFAULT_DATASETS_ROOT) # pyre-ignore[19]
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from typing import Optional
from detectron2.data import DatasetCatalog, MetadataCatalog
from ..utils import maybe_prepend_base_path
from .dataset_type import DatasetType
CHIMPNSEE_DATASET_NAME = "chimpnsee"
def register_dataset(datasets_root: Optional[str] = None) -> None:
def empty_load_callback():
pass
video_list_fpath = maybe_prepend_base_path(
datasets_root,
"chimpnsee/cdna.eva.mpg.de/video_list.txt",
)
video_base_path = maybe_prepend_base_path(datasets_root, "chimpnsee/cdna.eva.mpg.de")
DatasetCatalog.register(CHIMPNSEE_DATASET_NAME, empty_load_callback)
MetadataCatalog.get(CHIMPNSEE_DATASET_NAME).set(
dataset_type=DatasetType.VIDEO_LIST,
video_list_fpath=video_list_fpath,
video_base_path=video_base_path,
category="chimpanzee",
)
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
import contextlib
import io
import logging
import os
from collections import defaultdict
from dataclasses import dataclass
from typing import Any, Dict, Iterable, List, Optional
from fvcore.common.timer import Timer
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode
from detectron2.utils.file_io import PathManager
from ..utils import maybe_prepend_base_path
DENSEPOSE_MASK_KEY = "dp_masks"
DENSEPOSE_IUV_KEYS_WITHOUT_MASK = ["dp_x", "dp_y", "dp_I", "dp_U", "dp_V"]
DENSEPOSE_CSE_KEYS_WITHOUT_MASK = ["dp_x", "dp_y", "dp_vertex", "ref_model"]
DENSEPOSE_ALL_POSSIBLE_KEYS = set(
DENSEPOSE_IUV_KEYS_WITHOUT_MASK + DENSEPOSE_CSE_KEYS_WITHOUT_MASK + [DENSEPOSE_MASK_KEY]
)
DENSEPOSE_METADATA_URL_PREFIX = "https://dl.fbaipublicfiles.com/densepose/data/"
@dataclass
class CocoDatasetInfo:
name: str
images_root: str
annotations_fpath: str
DATASETS = [
CocoDatasetInfo(
name="densepose_coco_2014_train",
images_root="coco/train2014",
annotations_fpath="coco/annotations/densepose_train2014.json",
),
CocoDatasetInfo(
name="densepose_coco_2014_minival",
images_root="coco/val2014",
annotations_fpath="coco/annotations/densepose_minival2014.json",
),
CocoDatasetInfo(
name="densepose_coco_2014_minival_100",
images_root="coco/val2014",
annotations_fpath="coco/annotations/densepose_minival2014_100.json",
),
CocoDatasetInfo(
name="densepose_coco_2014_valminusminival",
images_root="coco/val2014",
annotations_fpath="coco/annotations/densepose_valminusminival2014.json",
),
CocoDatasetInfo(
name="densepose_coco_2014_train_cse",
images_root="coco/train2014",
annotations_fpath="coco_cse/densepose_train2014_cse.json",
),
CocoDatasetInfo(
name="densepose_coco_2014_minival_cse",
images_root="coco/val2014",
annotations_fpath="coco_cse/densepose_minival2014_cse.json",
),
CocoDatasetInfo(
name="densepose_coco_2014_minival_100_cse",
images_root="coco/val2014",
annotations_fpath="coco_cse/densepose_minival2014_100_cse.json",
),
CocoDatasetInfo(
name="densepose_coco_2014_valminusminival_cse",
images_root="coco/val2014",
annotations_fpath="coco_cse/densepose_valminusminival2014_cse.json",
),
CocoDatasetInfo(
name="densepose_chimps",
images_root="densepose_chimps/images",
annotations_fpath="densepose_chimps/densepose_chimps_densepose.json",
),
CocoDatasetInfo(
name="densepose_chimps_cse_train",
images_root="densepose_chimps/images",
annotations_fpath="densepose_chimps/densepose_chimps_cse_train.json",
),
CocoDatasetInfo(
name="densepose_chimps_cse_val",
images_root="densepose_chimps/images",
annotations_fpath="densepose_chimps/densepose_chimps_cse_val.json",
),
CocoDatasetInfo(
name="posetrack2017_train",
images_root="posetrack2017/posetrack_data_2017",
annotations_fpath="posetrack2017/densepose_posetrack_train2017.json",
),
CocoDatasetInfo(
name="posetrack2017_val",
images_root="posetrack2017/posetrack_data_2017",
annotations_fpath="posetrack2017/densepose_posetrack_val2017.json",
),
CocoDatasetInfo(
name="lvis_v05_train",
images_root="coco/train2017",
annotations_fpath="lvis/lvis_v0.5_plus_dp_train.json",
),
CocoDatasetInfo(
name="lvis_v05_val",
images_root="coco/val2017",
annotations_fpath="lvis/lvis_v0.5_plus_dp_val.json",
),
]
BASE_DATASETS = [
CocoDatasetInfo(
name="base_coco_2017_train",
images_root="coco/train2017",
annotations_fpath="coco/annotations/instances_train2017.json",
),
CocoDatasetInfo(
name="base_coco_2017_val",
images_root="coco/val2017",
annotations_fpath="coco/annotations/instances_val2017.json",
),
CocoDatasetInfo(
name="base_coco_2017_val_100",
images_root="coco/val2017",
annotations_fpath="coco/annotations/instances_val2017_100.json",
),
]
def get_metadata(base_path: Optional[str]) -> Dict[str, Any]:
"""
Returns metadata associated with COCO DensePose datasets
Args:
base_path: Optional[str]
Base path used to load metadata from
Returns:
Dict[str, Any]
Metadata in the form of a dictionary
"""
meta = {
"densepose_transform_src": maybe_prepend_base_path(base_path, "UV_symmetry_transforms.mat"),
"densepose_smpl_subdiv": maybe_prepend_base_path(base_path, "SMPL_subdiv.mat"),
"densepose_smpl_subdiv_transform": maybe_prepend_base_path(
base_path,
"SMPL_SUBDIV_TRANSFORM.mat",
),
}
return meta
def _load_coco_annotations(json_file: str):
"""
Load COCO annotations from a JSON file
Args:
json_file: str
Path to the file to load annotations from
Returns:
Instance of `pycocotools.coco.COCO` that provides access to annotations
data
"""
from pycocotools.coco import COCO
logger = logging.getLogger(__name__)
timer = Timer()
with contextlib.redirect_stdout(io.StringIO()):
coco_api = COCO(json_file)
if timer.seconds() > 1:
logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
return coco_api
def _add_categories_metadata(dataset_name: str, categories: List[Dict[str, Any]]):
meta = MetadataCatalog.get(dataset_name)
meta.categories = {c["id"]: c["name"] for c in categories}
logger = logging.getLogger(__name__)
logger.info("Dataset {} categories: {}".format(dataset_name, meta.categories))
def _verify_annotations_have_unique_ids(json_file: str, anns: List[List[Dict[str, Any]]]):
if "minival" in json_file:
# Skip validation on COCO2014 valminusminival and minival annotations
# The ratio of buggy annotations there is tiny and does not affect accuracy
# Therefore we explicitly white-list them
return
ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
json_file
)
def _maybe_add_bbox(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
if "bbox" not in ann_dict:
return
obj["bbox"] = ann_dict["bbox"]
obj["bbox_mode"] = BoxMode.XYWH_ABS
def _maybe_add_segm(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
if "segmentation" not in ann_dict:
return
segm = ann_dict["segmentation"]
if not isinstance(segm, dict):
# filter out invalid polygons (< 3 points)
segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
if len(segm) == 0:
return
obj["segmentation"] = segm
def _maybe_add_keypoints(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
if "keypoints" not in ann_dict:
return
keypts = ann_dict["keypoints"] # list[int]
for idx, v in enumerate(keypts):
if idx % 3 != 2:
# COCO's segmentation coordinates are floating points in [0, H or W],
# but keypoint coordinates are integers in [0, H-1 or W-1]
# Therefore we assume the coordinates are "pixel indices" and
# add 0.5 to convert to floating point coordinates.
keypts[idx] = v + 0.5
obj["keypoints"] = keypts
def _maybe_add_densepose(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
for key in DENSEPOSE_ALL_POSSIBLE_KEYS:
if key in ann_dict:
obj[key] = ann_dict[key]
def _combine_images_with_annotations(
dataset_name: str,
image_root: str,
img_datas: Iterable[Dict[str, Any]],
ann_datas: Iterable[Iterable[Dict[str, Any]]],
):
ann_keys = ["iscrowd", "category_id"]
dataset_dicts = []
contains_video_frame_info = False
for img_dict, ann_dicts in zip(img_datas, ann_datas):
record = {}
record["file_name"] = os.path.join(image_root, img_dict["file_name"])
record["height"] = img_dict["height"]
record["width"] = img_dict["width"]
record["image_id"] = img_dict["id"]
record["dataset"] = dataset_name
if "frame_id" in img_dict:
record["frame_id"] = img_dict["frame_id"]
record["video_id"] = img_dict.get("vid_id", None)
contains_video_frame_info = True
objs = []
for ann_dict in ann_dicts:
assert ann_dict["image_id"] == record["image_id"]
assert ann_dict.get("ignore", 0) == 0
obj = {key: ann_dict[key] for key in ann_keys if key in ann_dict}
_maybe_add_bbox(obj, ann_dict)
_maybe_add_segm(obj, ann_dict)
_maybe_add_keypoints(obj, ann_dict)
_maybe_add_densepose(obj, ann_dict)
objs.append(obj)
record["annotations"] = objs
dataset_dicts.append(record)
if contains_video_frame_info:
create_video_frame_mapping(dataset_name, dataset_dicts)
return dataset_dicts
def get_contiguous_id_to_category_id_map(metadata):
cat_id_2_cont_id = metadata.thing_dataset_id_to_contiguous_id
cont_id_2_cat_id = {}
for cat_id, cont_id in cat_id_2_cont_id.items():
if cont_id in cont_id_2_cat_id:
continue
cont_id_2_cat_id[cont_id] = cat_id
return cont_id_2_cat_id
def maybe_filter_categories_cocoapi(dataset_name, coco_api):
meta = MetadataCatalog.get(dataset_name)
cont_id_2_cat_id = get_contiguous_id_to_category_id_map(meta)
cat_id_2_cont_id = meta.thing_dataset_id_to_contiguous_id
# filter categories
cats = []
for cat in coco_api.dataset["categories"]:
cat_id = cat["id"]
if cat_id not in cat_id_2_cont_id:
continue
cont_id = cat_id_2_cont_id[cat_id]
if (cont_id in cont_id_2_cat_id) and (cont_id_2_cat_id[cont_id] == cat_id):
cats.append(cat)
coco_api.dataset["categories"] = cats
# filter annotations, if multiple categories are mapped to a single
# contiguous ID, use only one category ID and map all annotations to that category ID
anns = []
for ann in coco_api.dataset["annotations"]:
cat_id = ann["category_id"]
if cat_id not in cat_id_2_cont_id:
continue
cont_id = cat_id_2_cont_id[cat_id]
ann["category_id"] = cont_id_2_cat_id[cont_id]
anns.append(ann)
coco_api.dataset["annotations"] = anns
# recreate index
coco_api.createIndex()
def maybe_filter_and_map_categories_cocoapi(dataset_name, coco_api):
meta = MetadataCatalog.get(dataset_name)
category_id_map = meta.thing_dataset_id_to_contiguous_id
# map categories
cats = []
for cat in coco_api.dataset["categories"]:
cat_id = cat["id"]
if cat_id not in category_id_map:
continue
cat["id"] = category_id_map[cat_id]
cats.append(cat)
coco_api.dataset["categories"] = cats
# map annotation categories
anns = []
for ann in coco_api.dataset["annotations"]:
cat_id = ann["category_id"]
if cat_id not in category_id_map:
continue
ann["category_id"] = category_id_map[cat_id]
anns.append(ann)
coco_api.dataset["annotations"] = anns
# recreate index
coco_api.createIndex()
def create_video_frame_mapping(dataset_name, dataset_dicts):
mapping = defaultdict(dict)
for d in dataset_dicts:
video_id = d.get("video_id")
if video_id is None:
continue
mapping[video_id].update({d["frame_id"]: d["file_name"]})
MetadataCatalog.get(dataset_name).set(video_frame_mapping=mapping)
def load_coco_json(annotations_json_file: str, image_root: str, dataset_name: str):
"""
Loads a JSON file with annotations in COCO instances format.
Replaces `detectron2.data.datasets.coco.load_coco_json` to handle metadata
in a more flexible way. Postpones category mapping to a later stage to be
able to combine several datasets with different (but coherent) sets of
categories.
Args:
annotations_json_file: str
Path to the JSON file with annotations in COCO instances format.
image_root: str
directory that contains all the images
dataset_name: str
the name that identifies a dataset, e.g. "densepose_coco_2014_train"
extra_annotation_keys: Optional[List[str]]
If provided, these keys are used to extract additional data from
the annotations.
"""
coco_api = _load_coco_annotations(PathManager.get_local_path(annotations_json_file))
_add_categories_metadata(dataset_name, coco_api.loadCats(coco_api.getCatIds()))
# sort indices for reproducible results
img_ids = sorted(coco_api.imgs.keys())
# imgs is a list of dicts, each looks something like:
# {'license': 4,
# 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
# 'file_name': 'COCO_val2014_000000001268.jpg',
# 'height': 427,
# 'width': 640,
# 'date_captured': '2013-11-17 05:57:24',
# 'id': 1268}
imgs = coco_api.loadImgs(img_ids)
logger = logging.getLogger(__name__)
logger.info("Loaded {} images in COCO format from {}".format(len(imgs), annotations_json_file))
# anns is a list[list[dict]], where each dict is an annotation
# record for an object. The inner list enumerates the objects in an image
# and the outer list enumerates over images.
anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
_verify_annotations_have_unique_ids(annotations_json_file, anns)
dataset_records = _combine_images_with_annotations(dataset_name, image_root, imgs, anns)
return dataset_records
def register_dataset(dataset_data: CocoDatasetInfo, datasets_root: Optional[str] = None):
"""
Registers provided COCO DensePose dataset
Args:
dataset_data: CocoDatasetInfo
Dataset data
datasets_root: Optional[str]
Datasets root folder (default: None)
"""
annotations_fpath = maybe_prepend_base_path(datasets_root, dataset_data.annotations_fpath)
images_root = maybe_prepend_base_path(datasets_root, dataset_data.images_root)
def load_annotations():
return load_coco_json(
annotations_json_file=annotations_fpath,
image_root=images_root,
dataset_name=dataset_data.name,
)
DatasetCatalog.register(dataset_data.name, load_annotations)
MetadataCatalog.get(dataset_data.name).set(
json_file=annotations_fpath,
image_root=images_root,
**get_metadata(DENSEPOSE_METADATA_URL_PREFIX)
)
def register_datasets(
datasets_data: Iterable[CocoDatasetInfo], datasets_root: Optional[str] = None
):
"""
Registers provided COCO DensePose datasets
Args:
datasets_data: Iterable[CocoDatasetInfo]
An iterable of dataset datas
datasets_root: Optional[str]
Datasets root folder (default: None)
"""
for dataset_data in datasets_data:
register_dataset(dataset_data, datasets_root)
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from enum import Enum
class DatasetType(Enum):
"""
Dataset type, mostly used for datasets that contain data to bootstrap models on
"""
VIDEO_LIST = "video_list"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment