"magic_pdf/vscode:/vscode.git/clone" did not exist on "3bd0ecf16655ee5774c7c089ec6e181d11dd8004"
Commit c732df65 authored by limm's avatar limm
Browse files

push v0.1.3 version commit bd2ea47

parent 5b3792fc
Pipeline #706 failed with stages
in 0 seconds
_BASE_: "../densepose_rcnn_R_50_FPN_s1x.yaml"
MODEL:
WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl"
DATASETS:
TRAIN: ()
TEST: ("densepose_coco_2014_minival_100",)
TEST:
EXPECTED_RESULTS: [["bbox", "AP", 59.27, 0.025], ["densepose_gps", "AP", 60.11, 0.02], ["densepose_gpsm", "AP", 64.20, 0.02]]
_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
DATASETS:
TRAIN: ("densepose_coco_2014_minival_100",)
TEST: ("densepose_coco_2014_minival_100",)
SOLVER:
MAX_ITER: 40
STEPS: (30,)
_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
ROI_HEADS:
NUM_CLASSES: 1
DATASETS:
TRAIN: ("densepose_coco_2014_minival",)
TEST: ("densepose_coco_2014_minival",)
SOLVER:
MAX_ITER: 6000
STEPS: (5500, 5800)
TEST:
EXPECTED_RESULTS: [["bbox", "AP", 58.27, 1.0], ["densepose_gps", "AP", 42.47, 1.5], ["densepose_gpsm", "AP", 49.20, 1.5]]
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from .data.datasets import builtin # just to register data
from .config import add_densepose_config, add_dataset_category_config
from .densepose_head import ROI_DENSEPOSE_HEAD_REGISTRY
from .evaluator import DensePoseCOCOEvaluator
from .roi_head import DensePoseROIHeads
from .data.structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
from .modeling.test_time_augmentation import DensePoseGeneralizedRCNNWithTTA
from .utils.transform import load_from_cfg
# -*- coding = utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from detectron2.config import CfgNode as CN
def add_dataset_category_config(cfg: CN):
"""
Add config for additional category-related dataset options
- category whitelisting
- category mapping
"""
_C = cfg
_C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True)
_C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True)
def add_densepose_config(cfg: CN):
"""
Add config for densepose head.
"""
_C = cfg
_C.MODEL.DENSEPOSE_ON = True
_C.MODEL.ROI_DENSEPOSE_HEAD = CN()
_C.MODEL.ROI_DENSEPOSE_HEAD.NAME = ""
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8
# Number of parts used for point labels
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24
_C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4
_C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512
_C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3
_C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2
_C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2"
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28
_C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2
_C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2 # 15 or 2
# Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD)
_C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7
# Loss weights for annotation masks.(14 Parts)
_C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0
# Loss weights for surface parts. (24 Parts)
_C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0
# Loss weights for UV regression.
_C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01
# For Decoder
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = ""
_C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4
# For DeepLab head
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN()
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN"
_C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0
# Confidences
# Enable learning confidences (variances) along with the actual values
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False})
# UV confidence lower bound
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01
# Statistical model type for confidence learning, possible values:
# - "iid_iso": statistically independent identically distributed residuals
# with isotropic covariance
# - "indep_aniso": statistically independent residuals with anisotropic
# covariances
_C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso"
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from .build import build_detection_test_loader, build_detection_train_loader
from .dataset_mapper import DatasetMapper
# ensure the builtin datasets are registered
from . import datasets
__all__ = [k for k in globals().keys() if not k.startswith("_")]
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import itertools
import logging
import numpy as np
import operator
from typing import Any, Callable, Collection, Dict, Iterable, List, Optional
import torch
from detectron2.config import CfgNode
from detectron2.data import samplers
from detectron2.data.build import (
load_proposals_into_dataset,
print_instances_class_histogram,
trivial_batch_collator,
worker_init_reset_seed,
)
from detectron2.data.catalog import DatasetCatalog, MetadataCatalog
from detectron2.data.common import AspectRatioGroupedDataset, DatasetFromList, MapDataset
from detectron2.utils.comm import get_world_size
from .dataset_mapper import DatasetMapper
from .datasets.coco import DENSEPOSE_KEYS_WITHOUT_MASK as DENSEPOSE_COCO_KEYS_WITHOUT_MASK
from .datasets.coco import DENSEPOSE_MASK_KEY as DENSEPOSE_COCO_MASK_KEY
__all__ = ["build_detection_train_loader", "build_detection_test_loader"]
Instance = Dict[str, Any]
InstancePredicate = Callable[[Instance], bool]
def _compute_num_images_per_worker(cfg: CfgNode):
num_workers = get_world_size()
images_per_batch = cfg.SOLVER.IMS_PER_BATCH
assert (
images_per_batch % num_workers == 0
), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
images_per_batch, num_workers
)
assert (
images_per_batch >= num_workers
), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
images_per_batch, num_workers
)
images_per_worker = images_per_batch // num_workers
return images_per_worker
def _map_category_id_to_contiguous_id(dataset_name: str, dataset_dicts: Iterable[Instance]):
meta = MetadataCatalog.get(dataset_name)
for dataset_dict in dataset_dicts:
for ann in dataset_dict["annotations"]:
ann["category_id"] = meta.thing_dataset_id_to_contiguous_id[ann["category_id"]]
def _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names: Iterable[str]):
# merge categories for all datasets
merged_categories = {}
for dataset_name in dataset_names:
meta = MetadataCatalog.get(dataset_name)
for cat_id, cat_name in meta.categories.items():
if cat_id not in merged_categories:
merged_categories[cat_id] = (cat_name, dataset_name)
continue
cat_name_other, dataset_name_other = merged_categories[cat_id]
if cat_name_other != cat_name:
raise ValueError(
f"Incompatible categories for category ID {cat_id}: "
f'dataset {dataset_name} value "{cat_name}", '
f'dataset {dataset_name_other} value "{cat_name_other}"'
)
merged_cat_id_to_cont_id = {}
for i, cat_id in enumerate(sorted(merged_categories.keys())):
merged_cat_id_to_cont_id[cat_id] = i
# add category maps to metadata
for dataset_name in dataset_names:
meta = MetadataCatalog.get(dataset_name)
categories = meta.get("categories")
meta.thing_classes = [categories[cat_id] for cat_id in sorted(categories.keys())]
meta.thing_dataset_id_to_contiguous_id = {
cat_id: merged_cat_id_to_cont_id[cat_id] for cat_id in sorted(categories.keys())
}
meta.thing_contiguous_id_to_dataset_id = {
merged_cat_id_to_cont_id[cat_id]: cat_id for cat_id in sorted(categories.keys())
}
def _maybe_create_general_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
def has_annotations(instance: Instance) -> bool:
return "annotations" in instance
def has_only_crowd_anotations(instance: Instance) -> bool:
for ann in instance["annotations"]:
if ann.get("is_crowd", 0) == 0:
return False
return True
def general_keep_instance_predicate(instance: Instance) -> bool:
return has_annotations(instance) and not has_only_crowd_anotations(instance)
if not cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS:
return None
return general_keep_instance_predicate
def _maybe_create_keypoints_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
min_num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
def has_sufficient_num_keypoints(instance: Instance) -> bool:
num_kpts = sum(
(np.array(ann["keypoints"][2::3]) > 0).sum()
for ann in instance["annotations"]
if "keypoints" in ann
)
return num_kpts >= min_num_keypoints
if cfg.MODEL.KEYPOINT_ON and (min_num_keypoints > 0):
return has_sufficient_num_keypoints
return None
def _maybe_create_mask_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
if not cfg.MODEL.MASK_ON:
return None
def has_mask_annotations(instance: Instance) -> bool:
return any("segmentation" in ann for ann in instance["annotations"])
return has_mask_annotations
def _maybe_create_densepose_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
if not cfg.MODEL.DENSEPOSE_ON:
return None
def has_densepose_annotations(instance: Instance) -> bool:
for ann in instance["annotations"]:
if all(key in ann for key in DENSEPOSE_COCO_KEYS_WITHOUT_MASK) and (
(DENSEPOSE_COCO_MASK_KEY in ann) or ("segmentation" in ann)
):
return True
return False
return has_densepose_annotations
def _maybe_create_specific_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
specific_predicate_creators = [
_maybe_create_keypoints_keep_instance_predicate,
_maybe_create_mask_keep_instance_predicate,
_maybe_create_densepose_keep_instance_predicate,
]
predicates = [creator(cfg) for creator in specific_predicate_creators]
predicates = [p for p in predicates if p is not None]
if not predicates:
return None
def combined_predicate(instance: Instance) -> bool:
return any(p(instance) for p in predicates)
return combined_predicate
def _get_train_keep_instance_predicate(cfg: CfgNode):
general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
combined_specific_keep_predicate = _maybe_create_specific_keep_instance_predicate(cfg)
def combined_general_specific_keep_predicate(instance: Instance) -> bool:
return general_keep_predicate(instance) and combined_specific_keep_predicate(instance)
if (general_keep_predicate is None) and (combined_specific_keep_predicate is None):
return None
if general_keep_predicate is None:
return combined_specific_keep_predicate
if combined_specific_keep_predicate is None:
return general_keep_predicate
return combined_general_specific_keep_predicate
def _get_test_keep_instance_predicate(cfg: CfgNode):
general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
return general_keep_predicate
def _maybe_filter_and_map_categories(
dataset_name: str, dataset_dicts: List[Instance]
) -> List[Instance]:
meta = MetadataCatalog.get(dataset_name)
whitelisted_categories = meta.get("whitelisted_categories")
category_map = meta.get("category_map", {})
if whitelisted_categories is None and not category_map:
return dataset_dicts
filtered_dataset_dicts = []
for dataset_dict in dataset_dicts:
anns = []
for ann in dataset_dict["annotations"]:
cat_id = ann["category_id"]
if whitelisted_categories is not None and cat_id not in whitelisted_categories:
continue
ann["category_id"] = category_map.get(cat_id, cat_id)
anns.append(ann)
dataset_dict["annotations"] = anns
filtered_dataset_dicts.append(dataset_dict)
return filtered_dataset_dicts
def _add_category_whitelists_to_metadata(cfg: CfgNode):
for dataset_name, whitelisted_cat_ids in cfg.DATASETS.WHITELISTED_CATEGORIES.items():
meta = MetadataCatalog.get(dataset_name)
meta.whitelisted_categories = whitelisted_cat_ids
logger = logging.getLogger(__name__)
logger.info(
"Whitelisted categories for dataset {}: {}".format(
dataset_name, meta.whitelisted_categories
)
)
def _add_category_maps_to_metadata(cfg: CfgNode):
for dataset_name, category_map in cfg.DATASETS.CATEGORY_MAPS.items():
category_map = {
int(cat_id_src): int(cat_id_dst) for cat_id_src, cat_id_dst in category_map.items()
}
meta = MetadataCatalog.get(dataset_name)
meta.category_map = category_map
logger = logging.getLogger(__name__)
logger.info("Category maps for dataset {}: {}".format(dataset_name, meta.category_map))
def combine_detection_dataset_dicts(
dataset_names: Collection[str],
keep_instance_predicate: Optional[InstancePredicate] = None,
proposal_files: Optional[Collection[str]] = None,
) -> List[Instance]:
"""
Load and prepare dataset dicts for training / testing
Args:
dataset_names (Collection[str]): a list of dataset names
keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate
applied to instance dicts which defines whether to keep the instance
proposal_files (Collection[str]): if given, a list of object proposal files
that match each dataset in `dataset_names`.
"""
assert len(dataset_names)
if proposal_files is None:
proposal_files = [None] * len(dataset_names)
assert len(dataset_names) == len(proposal_files)
# load annotations and dataset metadata
dataset_map = {}
for dataset_name in dataset_names:
dataset_dicts = DatasetCatalog.get(dataset_name)
dataset_map[dataset_name] = dataset_dicts
# initialize category maps
_add_category_id_to_contiguous_id_maps_to_metadata(dataset_names)
# apply category maps
all_datasets_dicts = []
for dataset_name, proposal_file in zip(dataset_names, proposal_files):
dataset_dicts = dataset_map[dataset_name]
assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!"
if proposal_file is not None:
dataset_dicts = load_proposals_into_dataset(dataset_dicts, proposal_file)
dataset_dicts = _maybe_filter_and_map_categories(dataset_name, dataset_dicts)
_map_category_id_to_contiguous_id(dataset_name, dataset_dicts)
print_instances_class_histogram(
dataset_dicts, MetadataCatalog.get(dataset_name).thing_classes
)
all_datasets_dicts.append(dataset_dicts)
if keep_instance_predicate is not None:
all_datasets_dicts_plain = [
d
for d in itertools.chain.from_iterable(all_datasets_dicts)
if keep_instance_predicate(d)
]
else:
all_datasets_dicts_plain = list(itertools.chain.from_iterable(all_datasets_dicts))
return all_datasets_dicts_plain
def build_detection_train_loader(cfg: CfgNode, mapper=None):
"""
A data loader is created in a way similar to that of Detectron2.
The main differences are:
- it allows to combine datasets with different but compatible object category sets
The data loader is created by the following steps:
1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts.
2. Start workers to work on the dicts. Each worker will:
* Map each metadata dict into another format to be consumed by the model.
* Batch them by simply putting dicts into a list.
The batched ``list[mapped_dict]`` is what this dataloader will return.
Args:
cfg (CfgNode): the config
mapper (callable): a callable which takes a sample (dict) from dataset and
returns the format to be consumed by the model.
By default it will be `DatasetMapper(cfg, True)`.
Returns:
an infinite iterator of training data
"""
images_per_worker = _compute_num_images_per_worker(cfg)
_add_category_whitelists_to_metadata(cfg)
_add_category_maps_to_metadata(cfg)
dataset_dicts = combine_detection_dataset_dicts(
cfg.DATASETS.TRAIN,
keep_instance_predicate=_get_train_keep_instance_predicate(cfg),
proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
)
dataset = DatasetFromList(dataset_dicts, copy=False)
if mapper is None:
mapper = DatasetMapper(cfg, True)
dataset = MapDataset(dataset, mapper)
sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
logger = logging.getLogger(__name__)
logger.info("Using training sampler {}".format(sampler_name))
if sampler_name == "TrainingSampler":
sampler = samplers.TrainingSampler(len(dataset))
elif sampler_name == "RepeatFactorTrainingSampler":
sampler = samplers.RepeatFactorTrainingSampler(
dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD
)
else:
raise ValueError("Unknown training sampler: {}".format(sampler_name))
if cfg.DATALOADER.ASPECT_RATIO_GROUPING:
data_loader = torch.utils.data.DataLoader(
dataset,
sampler=sampler,
num_workers=cfg.DATALOADER.NUM_WORKERS,
batch_sampler=None,
collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements
worker_init_fn=worker_init_reset_seed,
) # yield individual mapped dict
data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker)
else:
batch_sampler = torch.utils.data.sampler.BatchSampler(
sampler, images_per_worker, drop_last=True
)
# drop_last so the batch always have the same size
data_loader = torch.utils.data.DataLoader(
dataset,
num_workers=cfg.DATALOADER.NUM_WORKERS,
batch_sampler=batch_sampler,
collate_fn=trivial_batch_collator,
worker_init_fn=worker_init_reset_seed,
)
return data_loader
def build_detection_test_loader(cfg, dataset_name, mapper=None):
"""
Similar to `build_detection_train_loader`.
But this function uses the given `dataset_name` argument (instead of the names in cfg),
and uses batch size 1.
Args:
cfg: a detectron2 CfgNode
dataset_name (str): a name of the dataset that's available in the DatasetCatalog
mapper (callable): a callable which takes a sample (dict) from dataset
and returns the format to be consumed by the model.
By default it will be `DatasetMapper(cfg, False)`.
Returns:
DataLoader: a torch DataLoader, that loads the given detection
dataset, with test-time transformation and batching.
"""
_add_category_whitelists_to_metadata(cfg)
_add_category_maps_to_metadata(cfg)
dataset_dicts = combine_detection_dataset_dicts(
[dataset_name],
keep_instance_predicate=_get_test_keep_instance_predicate(cfg),
proposal_files=[
cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)]
]
if cfg.MODEL.LOAD_PROPOSALS
else None,
)
dataset = DatasetFromList(dataset_dicts)
if mapper is None:
mapper = DatasetMapper(cfg, False)
dataset = MapDataset(dataset, mapper)
sampler = samplers.InferenceSampler(len(dataset))
# Always use 1 image per worker during inference since this is the
# standard when reporting inference time in papers.
batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)
data_loader = torch.utils.data.DataLoader(
dataset,
num_workers=cfg.DATALOADER.NUM_WORKERS,
batch_sampler=batch_sampler,
collate_fn=trivial_batch_collator,
)
return data_loader
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import copy
import torch
from fvcore.common.file_io import PathManager
from detectron2.data import MetadataCatalog
from detectron2.data import detection_utils as utils
from detectron2.data import transforms as T
from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
class DatasetMapper:
"""
A customized version of `detectron2.data.DatasetMapper`
"""
def __init__(self, cfg, is_train=True):
self.tfm_gens = utils.build_transform_gen(cfg, is_train)
# fmt: off
self.img_format = cfg.INPUT.FORMAT
self.mask_on = cfg.MODEL.MASK_ON
self.keypoint_on = cfg.MODEL.KEYPOINT_ON
self.densepose_on = cfg.MODEL.DENSEPOSE_ON
assert not cfg.MODEL.LOAD_PROPOSALS, "not supported yet"
# fmt: on
if self.keypoint_on and is_train:
# Flip only makes sense in training
self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
else:
self.keypoint_hflip_indices = None
if self.densepose_on:
densepose_transform_srcs = [
MetadataCatalog.get(ds).densepose_transform_src
for ds in cfg.DATASETS.TRAIN + cfg.DATASETS.TEST
]
assert len(densepose_transform_srcs) > 0
# TODO: check that DensePose transformation data is the same for
# all the datasets. Otherwise one would have to pass DB ID with
# each entry to select proper transformation data. For now, since
# all DensePose annotated data uses the same data semantics, we
# omit this check.
densepose_transform_data_fpath = PathManager.get_local_path(densepose_transform_srcs[0])
self.densepose_transform_data = DensePoseTransformData.load(
densepose_transform_data_fpath
)
self.is_train = is_train
def __call__(self, dataset_dict):
"""
Args:
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
Returns:
dict: a format that builtin models in detectron2 accept
"""
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
utils.check_image_size(dataset_dict, image)
image, transforms = T.apply_transform_gens(self.tfm_gens, image)
image_shape = image.shape[:2] # h, w
dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
if not self.is_train:
dataset_dict.pop("annotations", None)
return dataset_dict
for anno in dataset_dict["annotations"]:
if not self.mask_on:
anno.pop("segmentation", None)
if not self.keypoint_on:
anno.pop("keypoints", None)
# USER: Implement additional transformations if you have other types of data
# USER: Don't call transpose_densepose if you don't need
annos = [
self._transform_densepose(
utils.transform_instance_annotations(
obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
),
transforms,
)
for obj in dataset_dict.pop("annotations")
if obj.get("iscrowd", 0) == 0
]
instances = utils.annotations_to_instances(annos, image_shape)
if len(annos) and "densepose" in annos[0]:
gt_densepose = [obj["densepose"] for obj in annos]
instances.gt_densepose = DensePoseList(gt_densepose, instances.gt_boxes, image_shape)
dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()]
return dataset_dict
def _transform_densepose(self, annotation, transforms):
if not self.densepose_on:
return annotation
# Handle densepose annotations
is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
if is_valid:
densepose_data = DensePoseDataRelative(annotation, cleanup=True)
densepose_data.apply_transform(transforms, self.densepose_transform_data)
annotation["densepose"] = densepose_data
else:
# logger = logging.getLogger(__name__)
# logger.debug("Could not load DensePose annotation: {}".format(reason_not_valid))
DensePoseDataRelative.cleanup_annotation(annotation)
# NOTE: annotations for certain instances may be unavailable.
# 'None' is accepted by the DensePostList data structure.
annotation["densepose"] = None
return annotation
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from . import builtin # ensure the builtin datasets are registered
__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")]
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from .coco import BASE_DATASETS as BASE_COCO_DATASETS
from .coco import DATASETS as COCO_DATASETS
from .coco import register_datasets as register_coco_datasets
DEFAULT_DATASETS_ROOT = "datasets"
register_coco_datasets(COCO_DATASETS, DEFAULT_DATASETS_ROOT)
register_coco_datasets(BASE_COCO_DATASETS, DEFAULT_DATASETS_ROOT)
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import contextlib
import io
import logging
import os
from dataclasses import dataclass
from typing import Any, Dict, Iterable, List, Optional
from fvcore.common.file_io import PathManager
from fvcore.common.timer import Timer
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode
DENSEPOSE_MASK_KEY = "dp_masks"
DENSEPOSE_KEYS_WITHOUT_MASK = ["dp_x", "dp_y", "dp_I", "dp_U", "dp_V"]
DENSEPOSE_KEYS = DENSEPOSE_KEYS_WITHOUT_MASK + [DENSEPOSE_MASK_KEY]
DENSEPOSE_METADATA_URL_PREFIX = "https://dl.fbaipublicfiles.com/densepose/data/"
@dataclass
class CocoDatasetInfo:
name: str
images_root: str
annotations_fpath: str
DATASETS = [
CocoDatasetInfo(
name="densepose_coco_2014_train",
images_root="coco/train2014",
annotations_fpath="coco/annotations/densepose_train2014.json",
),
CocoDatasetInfo(
name="densepose_coco_2014_minival",
images_root="coco/val2014",
annotations_fpath="coco/annotations/densepose_minival2014.json",
),
CocoDatasetInfo(
name="densepose_coco_2014_minival_100",
images_root="coco/val2014",
annotations_fpath="coco/annotations/densepose_minival2014_100.json",
),
CocoDatasetInfo(
name="densepose_coco_2014_valminusminival",
images_root="coco/val2014",
annotations_fpath="coco/annotations/densepose_valminusminival2014.json",
),
CocoDatasetInfo(
name="densepose_chimps",
images_root="densepose_evolution/densepose_chimps",
annotations_fpath="densepose_evolution/annotations/densepose_chimps_densepose.json",
),
]
BASE_DATASETS = [
CocoDatasetInfo(
name="base_coco_2017_train",
images_root="coco/train2017",
annotations_fpath="coco/annotations/instances_train2017.json",
),
CocoDatasetInfo(
name="base_coco_2017_val",
images_root="coco/val2017",
annotations_fpath="coco/annotations/instances_val2017.json",
),
CocoDatasetInfo(
name="base_coco_2017_val_100",
images_root="coco/val2017",
annotations_fpath="coco/annotations/instances_val2017_100.json",
),
]
def _is_relative_local_path(path: os.PathLike):
path_str = os.fsdecode(path)
return ("://" not in path_str) and not os.path.isabs(path)
def _maybe_prepend_base_path(base_path: Optional[os.PathLike], path: os.PathLike):
"""
Prepends the provided path with a base path prefix if:
1) base path is not None;
2) path is a local path
"""
if base_path is None:
return path
if _is_relative_local_path(path):
return os.path.join(base_path, path)
return path
def get_metadata(base_path: Optional[os.PathLike]) -> Dict[str, Any]:
"""
Returns metadata associated with COCO DensePose datasets
Args:
base_path: Optional[os.PathLike]
Base path used to load metadata from
Returns:
Dict[str, Any]
Metadata in the form of a dictionary
"""
meta = {
"densepose_transform_src": _maybe_prepend_base_path(
base_path, "UV_symmetry_transforms.mat"
),
"densepose_smpl_subdiv": _maybe_prepend_base_path(base_path, "SMPL_subdiv.mat"),
"densepose_smpl_subdiv_transform": _maybe_prepend_base_path(
base_path, "SMPL_SUBDIV_TRANSFORM.mat"
),
}
return meta
def _load_coco_annotations(json_file: str):
"""
Load COCO annotations from a JSON file
Args:
json_file: str
Path to the file to load annotations from
Returns:
Instance of `pycocotools.coco.COCO` that provides access to annotations
data
"""
from pycocotools.coco import COCO
logger = logging.getLogger(__name__)
timer = Timer()
with contextlib.redirect_stdout(io.StringIO()):
coco_api = COCO(json_file)
if timer.seconds() > 1:
logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
return coco_api
def _add_categories_metadata(dataset_name: str, categories: Dict[str, Any]):
meta = MetadataCatalog.get(dataset_name)
meta.categories = {c["id"]: c["name"] for c in categories}
logger = logging.getLogger(__name__)
logger.info("Dataset {} categories: {}".format(dataset_name, categories))
def _verify_annotations_have_unique_ids(json_file: str, anns: List[List[Dict[str, Any]]]):
if "minival" in json_file:
# Skip validation on COCO2014 valminusminival and minival annotations
# The ratio of buggy annotations there is tiny and does not affect accuracy
# Therefore we explicitly white-list them
return
ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
json_file
)
def _maybe_add_bbox(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
if "bbox" not in ann_dict:
return
obj["bbox"] = ann_dict["bbox"]
obj["bbox_mode"] = BoxMode.XYWH_ABS
def _maybe_add_segm(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
if "segmentation" not in ann_dict:
return
segm = ann_dict["segmentation"]
if not isinstance(segm, dict):
# filter out invalid polygons (< 3 points)
segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
if len(segm) == 0:
return
obj["segmentation"] = segm
def _maybe_add_keypoints(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
if "keypoints" not in ann_dict:
return
keypts = ann_dict["keypoints"] # list[int]
for idx, v in enumerate(keypts):
if idx % 3 != 2:
# COCO's segmentation coordinates are floating points in [0, H or W],
# but keypoint coordinates are integers in [0, H-1 or W-1]
# Therefore we assume the coordinates are "pixel indices" and
# add 0.5 to convert to floating point coordinates.
keypts[idx] = v + 0.5
obj["keypoints"] = keypts
def _maybe_add_densepose(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
for key in DENSEPOSE_KEYS:
if key in ann_dict:
obj[key] = ann_dict[key]
def _combine_images_with_annotations(
dataset_name: str,
image_root: str,
img_datas: Iterable[Dict[str, Any]],
ann_datas: Iterable[Iterable[Dict[str, Any]]],
):
ann_keys = ["iscrowd", "category_id"]
dataset_dicts = []
for img_dict, ann_dicts in zip(img_datas, ann_datas):
record = {}
record["file_name"] = os.path.join(image_root, img_dict["file_name"])
record["height"] = img_dict["height"]
record["width"] = img_dict["width"]
record["image_id"] = img_dict["id"]
record["dataset"] = dataset_name
objs = []
for ann_dict in ann_dicts:
assert ann_dict["image_id"] == record["image_id"]
assert ann_dict.get("ignore", 0) == 0
obj = {key: ann_dict[key] for key in ann_keys if key in ann_dict}
_maybe_add_bbox(obj, ann_dict)
_maybe_add_segm(obj, ann_dict)
_maybe_add_keypoints(obj, ann_dict)
_maybe_add_densepose(obj, ann_dict)
objs.append(obj)
record["annotations"] = objs
dataset_dicts.append(record)
return dataset_dicts
def load_coco_json(annotations_json_file: str, image_root: str, dataset_name: str):
"""
Loads a JSON file with annotations in COCO instances format.
Replaces `detectron2.data.datasets.coco.load_coco_json` to handle metadata
in a more flexible way. Postpones category mapping to a later stage to be
able to combine several datasets with different (but coherent) sets of
categories.
Args:
annotations_json_file: str
Path to the JSON file with annotations in COCO instances format.
image_root: str
directory that contains all the images
dataset_name: str
the name that identifies a dataset, e.g. "densepose_coco_2014_train"
extra_annotation_keys: Optional[List[str]]
If provided, these keys are used to extract additional data from
the annotations.
"""
coco_api = _load_coco_annotations(PathManager.get_local_path(annotations_json_file))
_add_categories_metadata(dataset_name, coco_api.loadCats(coco_api.getCatIds()))
# sort indices for reproducible results
img_ids = sorted(coco_api.imgs.keys())
# imgs is a list of dicts, each looks something like:
# {'license': 4,
# 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
# 'file_name': 'COCO_val2014_000000001268.jpg',
# 'height': 427,
# 'width': 640,
# 'date_captured': '2013-11-17 05:57:24',
# 'id': 1268}
imgs = coco_api.loadImgs(img_ids)
logger = logging.getLogger(__name__)
logger.info("Loaded {} images in COCO format from {}".format(len(imgs), annotations_json_file))
# anns is a list[list[dict]], where each dict is an annotation
# record for an object. The inner list enumerates the objects in an image
# and the outer list enumerates over images.
anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
_verify_annotations_have_unique_ids(annotations_json_file, anns)
dataset_records = _combine_images_with_annotations(dataset_name, image_root, imgs, anns)
return dataset_records
def register_dataset(dataset_data: CocoDatasetInfo, datasets_root: Optional[os.PathLike] = None):
"""
Registers provided COCO DensePose dataset
Args:
dataset_data: CocoDatasetInfo
Dataset data
datasets_root: Optional[os.PathLike]
Datasets root folder (default: None)
"""
annotations_fpath = _maybe_prepend_base_path(datasets_root, dataset_data.annotations_fpath)
images_root = _maybe_prepend_base_path(datasets_root, dataset_data.images_root)
def load_annotations():
return load_coco_json(
annotations_json_file=annotations_fpath,
image_root=images_root,
dataset_name=dataset_data.name,
)
DatasetCatalog.register(dataset_data.name, load_annotations)
MetadataCatalog.get(dataset_data.name).set(
json_file=annotations_fpath,
image_root=images_root,
**get_metadata(DENSEPOSE_METADATA_URL_PREFIX)
)
def register_datasets(
datasets_data: Iterable[CocoDatasetInfo], datasets_root: Optional[os.PathLike] = None
):
"""
Registers provided COCO DensePose datasets
Args:
datasets_data: Iterable[CocoDatasetInfo]
An iterable of dataset datas
datasets_root: Optional[os.PathLike]
Datasets root folder (default: None)
"""
for dataset_data in datasets_data:
register_dataset(dataset_data, datasets_root)
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import base64
import numpy as np
from io import BytesIO
import torch
from PIL import Image
from torch.nn import functional as F
class DensePoseTransformData(object):
# Horizontal symmetry label transforms used for horizontal flip
MASK_LABEL_SYMMETRIES = [0, 1, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14]
# fmt: off
POINT_LABEL_SYMMETRIES = [ 0, 1, 2, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, 21, 24, 23] # noqa
# fmt: on
def __init__(self, uv_symmetries):
self.mask_label_symmetries = DensePoseTransformData.MASK_LABEL_SYMMETRIES
self.point_label_symmetries = DensePoseTransformData.POINT_LABEL_SYMMETRIES
self.uv_symmetries = uv_symmetries
@staticmethod
def load(fpath):
import scipy.io
uv_symmetry_map = scipy.io.loadmat(fpath)
uv_symmetry_map_torch = {}
for key in ["U_transforms", "V_transforms"]:
uv_symmetry_map_torch[key] = []
map_src = uv_symmetry_map[key]
map_dst = uv_symmetry_map_torch[key]
for i in range(map_src.shape[1]):
map_dst.append(torch.from_numpy(map_src[0, i]).to(dtype=torch.float))
uv_symmetry_map_torch[key] = torch.stack(map_dst, dim=0).to(
device=torch.cuda.current_device()
)
transform_data = DensePoseTransformData(uv_symmetry_map_torch)
return transform_data
class DensePoseDataRelative(object):
"""
Dense pose relative annotations that can be applied to any bounding box:
x - normalized X coordinates [0, 255] of annotated points
y - normalized Y coordinates [0, 255] of annotated points
i - body part labels 0,...,24 for annotated points
u - body part U coordinates [0, 1] for annotated points
v - body part V coordinates [0, 1] for annotated points
segm - 256x256 segmentation mask with values 0,...,14
To obtain absolute x and y data wrt some bounding box one needs to first
divide the data by 256, multiply by the respective bounding box size
and add bounding box offset:
x_img = x0 + x_norm * w / 256.0
y_img = y0 + y_norm * h / 256.0
Segmentation masks are typically sampled to get image-based masks.
"""
# Key for normalized X coordinates in annotation dict
X_KEY = "dp_x"
# Key for normalized Y coordinates in annotation dict
Y_KEY = "dp_y"
# Key for U part coordinates in annotation dict
U_KEY = "dp_U"
# Key for V part coordinates in annotation dict
V_KEY = "dp_V"
# Key for I point labels in annotation dict
I_KEY = "dp_I"
# Key for segmentation mask in annotation dict
S_KEY = "dp_masks"
# Number of body parts in segmentation masks
N_BODY_PARTS = 14
# Number of parts in point labels
N_PART_LABELS = 24
MASK_SIZE = 256
def __init__(self, annotation, cleanup=False):
is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
assert is_valid, "Invalid DensePose annotations: {}".format(reason_not_valid)
self.x = torch.as_tensor(annotation[DensePoseDataRelative.X_KEY])
self.y = torch.as_tensor(annotation[DensePoseDataRelative.Y_KEY])
self.i = torch.as_tensor(annotation[DensePoseDataRelative.I_KEY])
self.u = torch.as_tensor(annotation[DensePoseDataRelative.U_KEY])
self.v = torch.as_tensor(annotation[DensePoseDataRelative.V_KEY])
self.segm = DensePoseDataRelative.extract_segmentation_mask(annotation)
self.device = torch.device("cpu")
if cleanup:
DensePoseDataRelative.cleanup_annotation(annotation)
def to(self, device):
if self.device == device:
return self
new_data = DensePoseDataRelative.__new__(DensePoseDataRelative)
new_data.x = self.x
new_data.x = self.x.to(device)
new_data.y = self.y.to(device)
new_data.i = self.i.to(device)
new_data.u = self.u.to(device)
new_data.v = self.v.to(device)
new_data.segm = self.segm.to(device)
new_data.device = device
return new_data
@staticmethod
def extract_segmentation_mask(annotation):
import pycocotools.mask as mask_utils
poly_specs = annotation[DensePoseDataRelative.S_KEY]
segm = torch.zeros((DensePoseDataRelative.MASK_SIZE,) * 2, dtype=torch.float32)
for i in range(DensePoseDataRelative.N_BODY_PARTS):
poly_i = poly_specs[i]
if poly_i:
mask_i = mask_utils.decode(poly_i)
segm[mask_i > 0] = i + 1
return segm
@staticmethod
def validate_annotation(annotation):
for key in [
DensePoseDataRelative.X_KEY,
DensePoseDataRelative.Y_KEY,
DensePoseDataRelative.I_KEY,
DensePoseDataRelative.U_KEY,
DensePoseDataRelative.V_KEY,
DensePoseDataRelative.S_KEY,
]:
if key not in annotation:
return False, "no {key} data in the annotation".format(key=key)
return True, None
@staticmethod
def cleanup_annotation(annotation):
for key in [
DensePoseDataRelative.X_KEY,
DensePoseDataRelative.Y_KEY,
DensePoseDataRelative.I_KEY,
DensePoseDataRelative.U_KEY,
DensePoseDataRelative.V_KEY,
DensePoseDataRelative.S_KEY,
]:
if key in annotation:
del annotation[key]
def apply_transform(self, transforms, densepose_transform_data):
self._transform_pts(transforms, densepose_transform_data)
self._transform_segm(transforms, densepose_transform_data)
def _transform_pts(self, transforms, dp_transform_data):
import detectron2.data.transforms as T
# NOTE: This assumes that HorizFlipTransform is the only one that does flip
do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
if do_hflip:
self.x = self.segm.size(1) - self.x
self._flip_iuv_semantics(dp_transform_data)
def _flip_iuv_semantics(self, dp_transform_data: DensePoseTransformData) -> None:
i_old = self.i.clone()
uv_symmetries = dp_transform_data.uv_symmetries
pt_label_symmetries = dp_transform_data.point_label_symmetries
for i in range(self.N_PART_LABELS):
if i + 1 in i_old:
annot_indices_i = i_old == i + 1
if pt_label_symmetries[i + 1] != i + 1:
self.i[annot_indices_i] = pt_label_symmetries[i + 1]
u_loc = (self.u[annot_indices_i] * 255).long()
v_loc = (self.v[annot_indices_i] * 255).long()
self.u[annot_indices_i] = uv_symmetries["U_transforms"][i][v_loc, u_loc].to(
device=self.u.device
)
self.v[annot_indices_i] = uv_symmetries["V_transforms"][i][v_loc, u_loc].to(
device=self.v.device
)
def _transform_segm(self, transforms, dp_transform_data):
import detectron2.data.transforms as T
# NOTE: This assumes that HorizFlipTransform is the only one that does flip
do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
if do_hflip:
self.segm = torch.flip(self.segm, [1])
self._flip_segm_semantics(dp_transform_data)
def _flip_segm_semantics(self, dp_transform_data):
old_segm = self.segm.clone()
mask_label_symmetries = dp_transform_data.mask_label_symmetries
for i in range(self.N_BODY_PARTS):
if mask_label_symmetries[i + 1] != i + 1:
self.segm[old_segm == i + 1] = mask_label_symmetries[i + 1]
def normalized_coords_transform(x0, y0, w, h):
"""
Coordinates transform that maps top left corner to (-1, -1) and bottom
right corner to (1, 1). Used for torch.grid_sample to initialize the
grid
"""
def f(p):
return (2 * (p[0] - x0) / w - 1, 2 * (p[1] - y0) / h - 1)
return f
class DensePoseOutput(object):
def __init__(self, S, I, U, V, confidences):
"""
Args:
S (`torch.Tensor`): coarse segmentation tensor of size (N, A, H, W)
I (`torch.Tensor`): fine segmentation tensor of size (N, C, H, W)
U (`torch.Tensor`): U coordinates for each fine segmentation label of size (N, C, H, W)
V (`torch.Tensor`): V coordinates for each fine segmentation label of size (N, C, H, W)
confidences (dict of str -> `torch.Tensor`) estimated confidence model parameters
"""
self.S = S
self.I = I # noqa: E741
self.U = U
self.V = V
self.confidences = confidences
self._check_output_dims(S, I, U, V)
def _check_output_dims(self, S, I, U, V):
assert (
len(S.size()) == 4
), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
S.size()
)
assert (
len(I.size()) == 4
), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
S.size()
)
assert (
len(U.size()) == 4
), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
S.size()
)
assert (
len(V.size()) == 4
), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
S.size()
)
assert len(S) == len(I), (
"Number of output segmentation planes {} "
"should be equal to the number of output part index "
"planes {}".format(len(S), len(I))
)
assert S.size()[2:] == I.size()[2:], (
"Output segmentation plane size {} "
"should be equal to the output part index "
"plane size {}".format(S.size()[2:], I.size()[2:])
)
assert I.size() == U.size(), (
"Part index output shape {} "
"should be the same as U coordinates output shape {}".format(I.size(), U.size())
)
assert I.size() == V.size(), (
"Part index output shape {} "
"should be the same as V coordinates output shape {}".format(I.size(), V.size())
)
def resize(self, image_size_hw):
# do nothing - outputs are invariant to resize
pass
def _crop(self, S, I, U, V, bbox_old_xywh, bbox_new_xywh):
"""
Resample S, I, U, V from bbox_old to the cropped bbox_new
"""
x0old, y0old, wold, hold = bbox_old_xywh
x0new, y0new, wnew, hnew = bbox_new_xywh
tr_coords = normalized_coords_transform(x0old, y0old, wold, hold)
topleft = (x0new, y0new)
bottomright = (x0new + wnew, y0new + hnew)
topleft_norm = tr_coords(topleft)
bottomright_norm = tr_coords(bottomright)
hsize = S.size(1)
wsize = S.size(2)
grid = torch.meshgrid(
torch.arange(
topleft_norm[1],
bottomright_norm[1],
(bottomright_norm[1] - topleft_norm[1]) / hsize,
)[:hsize],
torch.arange(
topleft_norm[0],
bottomright_norm[0],
(bottomright_norm[0] - topleft_norm[0]) / wsize,
)[:wsize],
)
grid = torch.stack(grid, dim=2).to(S.device)
assert (
grid.size(0) == hsize
), "Resampled grid expected " "height={}, actual height={}".format(hsize, grid.size(0))
assert grid.size(1) == wsize, "Resampled grid expected " "width={}, actual width={}".format(
wsize, grid.size(1)
)
S_new = F.grid_sample(
S.unsqueeze(0),
torch.unsqueeze(grid, 0),
mode="bilinear",
padding_mode="border",
align_corners=True,
).squeeze(0)
I_new = F.grid_sample(
I.unsqueeze(0),
torch.unsqueeze(grid, 0),
mode="bilinear",
padding_mode="border",
align_corners=True,
).squeeze(0)
U_new = F.grid_sample(
U.unsqueeze(0),
torch.unsqueeze(grid, 0),
mode="bilinear",
padding_mode="border",
align_corners=True,
).squeeze(0)
V_new = F.grid_sample(
V.unsqueeze(0),
torch.unsqueeze(grid, 0),
mode="bilinear",
padding_mode="border",
align_corners=True,
).squeeze(0)
return S_new, I_new, U_new, V_new
def crop(self, indices_cropped, bboxes_old, bboxes_new):
"""
Crop outputs for selected bounding boxes to the new bounding boxes.
"""
# VK: cropping is ignored for now
# for i, ic in enumerate(indices_cropped):
# self.S[ic], self.I[ic], self.U[ic], self.V[ic] = \
# self._crop(self.S[ic], self.I[ic], self.U[ic], self.V[ic],
# bboxes_old[i], bboxes_new[i])
pass
def hflip(self, transform_data: DensePoseTransformData) -> None:
"""
Change S, I, U and V to take into account a Horizontal flip.
"""
if self.I.shape[0] > 0:
for el in "SIUV":
self.__dict__[el] = torch.flip(self.__dict__[el], [3])
self._flip_iuv_semantics_tensor(transform_data)
self._flip_segm_semantics_tensor(transform_data)
def _flip_iuv_semantics_tensor(self, dp_transform_data: DensePoseTransformData) -> None:
point_label_symmetries = dp_transform_data.point_label_symmetries
uv_symmetries = dp_transform_data.uv_symmetries
N, C, H, W = self.U.shape
u_loc = (self.U[:, 1:, :, :].clamp(0, 1) * 255).long()
v_loc = (self.V[:, 1:, :, :].clamp(0, 1) * 255).long()
Iindex = torch.arange(C - 1, device=self.U.device)[None, :, None, None].expand(
N, C - 1, H, W
)
self.U[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc].to(
device=self.U.device
)
self.V[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc].to(
device=self.V.device
)
for el in "IUV":
self.__dict__[el] = self.__dict__[el][:, point_label_symmetries, :, :]
def _flip_segm_semantics_tensor(self, dp_transform_data):
if self.S.shape[1] == DensePoseDataRelative.N_BODY_PARTS + 1:
self.S = self.S[:, dp_transform_data.mask_label_symmetries, :, :]
def to_result(self, boxes_xywh):
"""
Convert DensePose outputs to results format. Results are more compact,
but cannot be resampled any more
"""
result = DensePoseResult(boxes_xywh, self.S, self.I, self.U, self.V)
return result
def __getitem__(self, item):
if isinstance(item, int):
S_selected = self.S[item].unsqueeze(0)
I_selected = self.I[item].unsqueeze(0)
U_selected = self.U[item].unsqueeze(0)
V_selected = self.V[item].unsqueeze(0)
conf_selected = {}
for key in self.confidences:
conf_selected[key] = self.confidences[key][item].unsqueeze(0)
else:
S_selected = self.S[item]
I_selected = self.I[item]
U_selected = self.U[item]
V_selected = self.V[item]
conf_selected = {}
for key in self.confidences:
conf_selected[key] = self.confidences[key][item]
return DensePoseOutput(S_selected, I_selected, U_selected, V_selected, conf_selected)
def __str__(self):
s = "DensePoseOutput S {}, I {}, U {}, V {}".format(
list(self.S.size()), list(self.I.size()), list(self.U.size()), list(self.V.size())
)
s_conf = "confidences: [{}]".format(
", ".join([f"{key} {list(self.confidences[key].size())}" for key in self.confidences])
)
return ", ".join([s, s_conf])
def __len__(self):
return self.S.size(0)
class DensePoseResult(object):
def __init__(self, boxes_xywh, S, I, U, V):
self.results = []
self.boxes_xywh = boxes_xywh.cpu().tolist()
assert len(boxes_xywh.size()) == 2
assert boxes_xywh.size(1) == 4
for i, box_xywh in enumerate(boxes_xywh):
result_i = self._output_to_result(box_xywh, S[[i]], I[[i]], U[[i]], V[[i]])
result_numpy_i = result_i.cpu().numpy()
result_encoded_i = DensePoseResult.encode_png_data(result_numpy_i)
result_encoded_with_shape_i = (result_numpy_i.shape, result_encoded_i)
self.results.append(result_encoded_with_shape_i)
def __str__(self):
s = "DensePoseResult: N={} [{}]".format(
len(self.results), ", ".join([str(list(r[0])) for r in self.results])
)
return s
def _output_to_result(self, box_xywh, S, I, U, V):
x, y, w, h = box_xywh
w = max(int(w), 1)
h = max(int(h), 1)
result = torch.zeros([3, h, w], dtype=torch.uint8, device=U.device)
assert (
len(S.size()) == 4
), "AnnIndex tensor size should have {} " "dimensions but has {}".format(4, len(S.size()))
s_bbox = F.interpolate(S, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
assert (
len(I.size()) == 4
), "IndexUV tensor size should have {} " "dimensions but has {}".format(4, len(S.size()))
i_bbox = (
F.interpolate(I, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
* (s_bbox > 0).long()
).squeeze(0)
assert len(U.size()) == 4, "U tensor size should have {} " "dimensions but has {}".format(
4, len(U.size())
)
u_bbox = F.interpolate(U, (h, w), mode="bilinear", align_corners=False)
assert len(V.size()) == 4, "V tensor size should have {} " "dimensions but has {}".format(
4, len(V.size())
)
v_bbox = F.interpolate(V, (h, w), mode="bilinear", align_corners=False)
result[0] = i_bbox
for part_id in range(1, u_bbox.size(1)):
result[1][i_bbox == part_id] = (
(u_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8)
)
result[2][i_bbox == part_id] = (
(v_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8)
)
assert (
result.size(1) == h
), "Results height {} should be equal" "to bounding box height {}".format(result.size(1), h)
assert (
result.size(2) == w
), "Results width {} should be equal" "to bounding box width {}".format(result.size(2), w)
return result
@staticmethod
def encode_png_data(arr):
"""
Encode array data as a PNG image using the highest compression rate
@param arr [in] Data stored in an array of size (3, M, N) of type uint8
@return Base64-encoded string containing PNG-compressed data
"""
assert len(arr.shape) == 3, "Expected a 3D array as an input," " got a {0}D array".format(
len(arr.shape)
)
assert arr.shape[0] == 3, "Expected first array dimension of size 3," " got {0}".format(
arr.shape[0]
)
assert arr.dtype == np.uint8, "Expected an array of type np.uint8, " " got {0}".format(
arr.dtype
)
data = np.moveaxis(arr, 0, -1)
im = Image.fromarray(data)
fstream = BytesIO()
im.save(fstream, format="png", optimize=True)
s = base64.encodebytes(fstream.getvalue()).decode()
return s
@staticmethod
def decode_png_data(shape, s):
"""
Decode array data from a string that contains PNG-compressed data
@param Base64-encoded string containing PNG-compressed data
@return Data stored in an array of size (3, M, N) of type uint8
"""
fstream = BytesIO(base64.decodebytes(s.encode()))
im = Image.open(fstream)
data = np.moveaxis(np.array(im.getdata(), dtype=np.uint8), -1, 0)
return data.reshape(shape)
def __len__(self):
return len(self.results)
def __getitem__(self, item):
result_encoded = self.results[item]
bbox_xywh = self.boxes_xywh[item]
return result_encoded, bbox_xywh
class DensePoseList(object):
_TORCH_DEVICE_CPU = torch.device("cpu")
def __init__(self, densepose_datas, boxes_xyxy_abs, image_size_hw, device=_TORCH_DEVICE_CPU):
assert len(densepose_datas) == len(
boxes_xyxy_abs
), "Attempt to initialize DensePoseList with {} DensePose datas " "and {} boxes".format(
len(densepose_datas), len(boxes_xyxy_abs)
)
self.densepose_datas = []
for densepose_data in densepose_datas:
assert isinstance(densepose_data, DensePoseDataRelative) or densepose_data is None, (
"Attempt to initialize DensePoseList with DensePose datas "
"of type {}, expected DensePoseDataRelative".format(type(densepose_data))
)
densepose_data_ondevice = (
densepose_data.to(device) if densepose_data is not None else None
)
self.densepose_datas.append(densepose_data_ondevice)
self.boxes_xyxy_abs = boxes_xyxy_abs.to(device)
self.image_size_hw = image_size_hw
self.device = device
def to(self, device):
if self.device == device:
return self
return DensePoseList(self.densepose_datas, self.boxes_xyxy_abs, self.image_size_hw, device)
def __iter__(self):
return iter(self.densepose_datas)
def __len__(self):
return len(self.densepose_datas)
def __repr__(self):
s = self.__class__.__name__ + "("
s += "num_instances={}, ".format(len(self.densepose_datas))
s += "image_width={}, ".format(self.image_size_hw[1])
s += "image_height={})".format(self.image_size_hw[0])
return s
def __getitem__(self, item):
if isinstance(item, int):
densepose_data_rel = self.densepose_datas[item]
return densepose_data_rel
elif isinstance(item, slice):
densepose_datas_rel = self.densepose_datas[item]
boxes_xyxy_abs = self.boxes_xyxy_abs[item]
return DensePoseList(
densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
)
elif isinstance(item, torch.Tensor) and (item.dtype == torch.bool):
densepose_datas_rel = [self.densepose_datas[i] for i, x in enumerate(item) if x > 0]
boxes_xyxy_abs = self.boxes_xyxy_abs[item]
return DensePoseList(
densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
)
else:
densepose_datas_rel = [self.densepose_datas[i] for i in item]
boxes_xyxy_abs = self.boxes_xyxy_abs[item]
return DensePoseList(
densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
)
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
# This is a modified version of cocoeval.py where we also have the densepose evaluation.
__author__ = "tsungyi"
import copy
import datetime
import itertools
import logging
import numpy as np
import pickle
import time
from collections import defaultdict
from enum import Enum
from typing import Any, Dict, Tuple
import scipy.spatial.distance as ssd
from fvcore.common.file_io import PathManager
from pycocotools import mask as maskUtils
from scipy.io import loadmat
from scipy.ndimage import zoom as spzoom
from .data.structures import DensePoseDataRelative, DensePoseResult
logger = logging.getLogger(__name__)
class DensePoseEvalMode(str, Enum):
# use both masks and geodesic distances (GPS * IOU) to compute scores
GPSM = "gpsm"
# use only geodesic distances (GPS) to compute scores
GPS = "gps"
# use only masks (IOU) to compute scores
IOU = "iou"
class DensePoseDataMode(str, Enum):
# use estimated IUV data (default mode)
IUV_DT = "iuvdt"
# use ground truth IUV data
IUV_GT = "iuvgt"
# use ground truth labels I and set UV to 0
I_GT_UV_0 = "igtuv0"
# use ground truth labels I and estimated UV coordinates
I_GT_UV_DT = "igtuvdt"
# use estimated labels I and set UV to 0
I_DT_UV_0 = "idtuv0"
class DensePoseCocoEval(object):
# Interface for evaluating detection on the Microsoft COCO dataset.
#
# The usage for CocoEval is as follows:
# cocoGt=..., cocoDt=... # load dataset and results
# E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object
# E.params.recThrs = ...; # set parameters as desired
# E.evaluate(); # run per image evaluation
# E.accumulate(); # accumulate per image results
# E.summarize(); # display summary metrics of results
# For example usage see evalDemo.m and http://mscoco.org/.
#
# The evaluation parameters are as follows (defaults in brackets):
# imgIds - [all] N img ids to use for evaluation
# catIds - [all] K cat ids to use for evaluation
# iouThrs - [.5:.05:.95] T=10 IoU thresholds for evaluation
# recThrs - [0:.01:1] R=101 recall thresholds for evaluation
# areaRng - [...] A=4 object area ranges for evaluation
# maxDets - [1 10 100] M=3 thresholds on max detections per image
# iouType - ['segm'] set iouType to 'segm', 'bbox', 'keypoints' or 'densepose'
# iouType replaced the now DEPRECATED useSegm parameter.
# useCats - [1] if true use category labels for evaluation
# Note: if useCats=0 category labels are ignored as in proposal scoring.
# Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified.
#
# evaluate(): evaluates detections on every image and every category and
# concats the results into the "evalImgs" with fields:
# dtIds - [1xD] id for each of the D detections (dt)
# gtIds - [1xG] id for each of the G ground truths (gt)
# dtMatches - [TxD] matching gt id at each IoU or 0
# gtMatches - [TxG] matching dt id at each IoU or 0
# dtScores - [1xD] confidence of each dt
# gtIgnore - [1xG] ignore flag for each gt
# dtIgnore - [TxD] ignore flag for each dt at each IoU
#
# accumulate(): accumulates the per-image, per-category evaluation
# results in "evalImgs" into the dictionary "eval" with fields:
# params - parameters used for evaluation
# date - date evaluation was performed
# counts - [T,R,K,A,M] parameter dimensions (see above)
# precision - [TxRxKxAxM] precision for every evaluation setting
# recall - [TxKxAxM] max recall for every evaluation setting
# Note: precision and recall==-1 for settings with no gt objects.
#
# See also coco, mask, pycocoDemo, pycocoEvalDemo
#
# Microsoft COCO Toolbox. version 2.0
# Data, paper, and tutorials available at: http://mscoco.org/
# Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
# Licensed under the Simplified BSD License [see coco/license.txt]
def __init__(
self,
cocoGt=None,
cocoDt=None,
iouType: str = "densepose",
dpEvalMode: DensePoseEvalMode = DensePoseEvalMode.GPS,
dpDataMode: DensePoseDataMode = DensePoseDataMode.IUV_DT,
):
"""
Initialize CocoEval using coco APIs for gt and dt
:param cocoGt: coco object with ground truth annotations
:param cocoDt: coco object with detection results
:return: None
"""
self.cocoGt = cocoGt # ground truth COCO API
self.cocoDt = cocoDt # detections COCO API
self._dpEvalMode = dpEvalMode
self._dpDataMode = dpDataMode
self.params = {} # evaluation parameters
self.evalImgs = defaultdict(list) # per-image per-category eval results [KxAxI]
self.eval = {} # accumulated evaluation results
self._gts = defaultdict(list) # gt for evaluation
self._dts = defaultdict(list) # dt for evaluation
self.params = Params(iouType=iouType) # parameters
self._paramsEval = {} # parameters for evaluation
self.stats = [] # result summarization
self.ious = {} # ious between all gts and dts
if cocoGt is not None:
self.params.imgIds = sorted(cocoGt.getImgIds())
self.params.catIds = sorted(cocoGt.getCatIds())
self.ignoreThrBB = 0.7
self.ignoreThrUV = 0.9
def _loadGEval(self):
smpl_subdiv_fpath = PathManager.get_local_path(
"https://dl.fbaipublicfiles.com/densepose/data/SMPL_subdiv.mat"
)
pdist_transform_fpath = PathManager.get_local_path(
"https://dl.fbaipublicfiles.com/densepose/data/SMPL_SUBDIV_TRANSFORM.mat"
)
pdist_matrix_fpath = PathManager.get_local_path(
"https://dl.fbaipublicfiles.com/densepose/data/Pdist_matrix.pkl", timeout_sec=120
)
SMPL_subdiv = loadmat(smpl_subdiv_fpath)
self.PDIST_transform = loadmat(pdist_transform_fpath)
self.PDIST_transform = self.PDIST_transform["index"].squeeze()
UV = np.array([SMPL_subdiv["U_subdiv"], SMPL_subdiv["V_subdiv"]]).squeeze()
ClosestVertInds = np.arange(UV.shape[1]) + 1
self.Part_UVs = []
self.Part_ClosestVertInds = []
for i in np.arange(24):
self.Part_UVs.append(UV[:, SMPL_subdiv["Part_ID_subdiv"].squeeze() == (i + 1)])
self.Part_ClosestVertInds.append(
ClosestVertInds[SMPL_subdiv["Part_ID_subdiv"].squeeze() == (i + 1)]
)
with open(pdist_matrix_fpath, "rb") as hFile:
arrays = pickle.load(hFile, encoding="latin1")
self.Pdist_matrix = arrays["Pdist_matrix"]
self.Part_ids = np.array(SMPL_subdiv["Part_ID_subdiv"].squeeze())
# Mean geodesic distances for parts.
self.Mean_Distances = np.array([0, 0.351, 0.107, 0.126, 0.237, 0.173, 0.142, 0.128, 0.150])
# Coarse Part labels.
self.CoarseParts = np.array(
[0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8]
)
def _prepare(self):
"""
Prepare ._gts and ._dts for evaluation based on params
:return: None
"""
def _toMask(anns, coco):
# modify ann['segmentation'] by reference
for ann in anns:
rle = coco.annToRLE(ann)
ann["segmentation"] = rle
def _getIgnoreRegion(iid, coco):
img = coco.imgs[iid]
if "ignore_regions_x" not in img.keys():
return None
if len(img["ignore_regions_x"]) == 0:
return None
rgns_merged = []
for region_x, region_y in zip(img["ignore_regions_x"], img["ignore_regions_y"]):
rgns = [iter(region_x), iter(region_y)]
rgns_merged.append([next(it) for it in itertools.cycle(rgns)])
rles = maskUtils.frPyObjects(rgns_merged, img["height"], img["width"])
rle = maskUtils.merge(rles)
return maskUtils.decode(rle)
def _checkIgnore(dt, iregion):
if iregion is None:
return True
bb = np.array(dt["bbox"]).astype(np.int)
x1, y1, x2, y2 = bb[0], bb[1], bb[0] + bb[2], bb[1] + bb[3]
x2 = min([x2, iregion.shape[1]])
y2 = min([y2, iregion.shape[0]])
if bb[2] * bb[3] == 0:
return False
crop_iregion = iregion[y1:y2, x1:x2]
if crop_iregion.sum() == 0:
return True
if "densepose" not in dt.keys(): # filtering boxes
return crop_iregion.sum() / bb[2] / bb[3] < self.ignoreThrBB
# filtering UVs
ignoremask = np.require(crop_iregion, requirements=["F"])
mask = self._extract_mask(dt)
uvmask = np.require(np.asarray(mask > 0), dtype=np.uint8, requirements=["F"])
uvmask_ = maskUtils.encode(uvmask)
ignoremask_ = maskUtils.encode(ignoremask)
uviou = maskUtils.iou([uvmask_], [ignoremask_], [1])[0]
return uviou < self.ignoreThrUV
p = self.params
if p.useCats:
gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
else:
gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))
imns = self.cocoGt.loadImgs(p.imgIds)
self.size_mapping = {}
for im in imns:
self.size_mapping[im["id"]] = [im["height"], im["width"]]
# if iouType == 'uv', add point gt annotations
if p.iouType == "densepose":
self._loadGEval()
# convert ground truth to mask if iouType == 'segm'
if p.iouType == "segm":
_toMask(gts, self.cocoGt)
_toMask(dts, self.cocoDt)
# set ignore flag
for gt in gts:
gt["ignore"] = gt["ignore"] if "ignore" in gt else 0
gt["ignore"] = "iscrowd" in gt and gt["iscrowd"]
if p.iouType == "keypoints":
gt["ignore"] = (gt["num_keypoints"] == 0) or gt["ignore"]
if p.iouType == "densepose":
gt["ignore"] = ("dp_x" in gt) == 0
self._gts = defaultdict(list) # gt for evaluation
self._dts = defaultdict(list) # dt for evaluation
self._igrgns = defaultdict(list)
for gt in gts:
iid = gt["image_id"]
if iid not in self._igrgns.keys():
self._igrgns[iid] = _getIgnoreRegion(iid, self.cocoGt)
if _checkIgnore(gt, self._igrgns[iid]):
self._gts[iid, gt["category_id"]].append(gt)
for dt in dts:
iid = dt["image_id"]
if (iid not in self._igrgns) or _checkIgnore(dt, self._igrgns[iid]):
self._dts[iid, dt["category_id"]].append(dt)
self.evalImgs = defaultdict(list) # per-image per-category evaluation results
self.eval = {} # accumulated evaluation results
def evaluate(self):
"""
Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
:return: None
"""
tic = time.time()
logger.info("Running per image DensePose evaluation... {}".format(self.params.iouType))
p = self.params
# add backward compatibility if useSegm is specified in params
if p.useSegm is not None:
p.iouType = "segm" if p.useSegm == 1 else "bbox"
logger.info("useSegm (deprecated) is not None. Running DensePose evaluation")
p.imgIds = list(np.unique(p.imgIds))
if p.useCats:
p.catIds = list(np.unique(p.catIds))
p.maxDets = sorted(p.maxDets)
self.params = p
self._prepare()
# loop through images, area range, max detection number
catIds = p.catIds if p.useCats else [-1]
if p.iouType in ["segm", "bbox"]:
computeIoU = self.computeIoU
elif p.iouType == "keypoints":
computeIoU = self.computeOks
elif p.iouType == "densepose":
computeIoU = self.computeOgps
if self._dpEvalMode == DensePoseEvalMode.GPSM:
self.real_ious = {
(imgId, catId): self.computeDPIoU(imgId, catId)
for imgId in p.imgIds
for catId in catIds
}
self.ious = {
(imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds
}
evaluateImg = self.evaluateImg
maxDet = p.maxDets[-1]
self.evalImgs = [
evaluateImg(imgId, catId, areaRng, maxDet)
for catId in catIds
for areaRng in p.areaRng
for imgId in p.imgIds
]
self._paramsEval = copy.deepcopy(self.params)
toc = time.time()
logger.info("DensePose evaluation DONE (t={:0.2f}s).".format(toc - tic))
def getDensePoseMask(self, polys):
maskGen = np.zeros([256, 256])
for i in range(1, 15):
if polys[i - 1]:
currentMask = maskUtils.decode(polys[i - 1])
maskGen[currentMask > 0] = i
return maskGen
def _generate_rlemask_on_image(self, mask, imgId, data):
bbox_xywh = np.array(data["bbox"])
x, y, w, h = bbox_xywh
im_h, im_w = self.size_mapping[imgId]
im_mask = np.zeros((im_h, im_w), dtype=np.uint8)
if mask is not None:
x0 = max(int(x), 0)
x1 = min(int(x + w), im_w, int(x) + mask.shape[1])
y0 = max(int(y), 0)
y1 = min(int(y + h), im_h, int(y) + mask.shape[0])
y = int(y)
x = int(x)
im_mask[y0:y1, x0:x1] = mask[y0 - y : y1 - y, x0 - x : x1 - x]
im_mask = np.require(np.asarray(im_mask > 0), dtype=np.uint8, requirements=["F"])
rle_mask = maskUtils.encode(np.array(im_mask[:, :, np.newaxis], order="F"))[0]
return rle_mask
def computeDPIoU(self, imgId, catId):
p = self.params
if p.useCats:
gt = self._gts[imgId, catId]
dt = self._dts[imgId, catId]
else:
gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
if len(gt) == 0 and len(dt) == 0:
return []
inds = np.argsort([-d["score"] for d in dt], kind="mergesort")
dt = [dt[i] for i in inds]
if len(dt) > p.maxDets[-1]:
dt = dt[0 : p.maxDets[-1]]
gtmasks = []
for g in gt:
if DensePoseDataRelative.S_KEY in g:
mask = self.getDensePoseMask(g[DensePoseDataRelative.S_KEY])
_, _, w, h = g["bbox"]
scale_x = float(max(w, 1)) / mask.shape[1]
scale_y = float(max(h, 1)) / mask.shape[0]
mask = spzoom(mask, (scale_y, scale_x), order=1, prefilter=False)
mask = np.array(mask > 0.5, dtype=np.uint8)
rle_mask = self._generate_rlemask_on_image(mask, imgId, g)
elif "segmentation" in g:
segmentation = g["segmentation"]
if isinstance(segmentation, list) and segmentation:
# polygons
im_h, im_w = self.size_mapping[imgId]
rles = maskUtils.frPyObjects(segmentation, im_h, im_w)
rle_mask = maskUtils.merge(rles)
elif isinstance(segmentation, dict):
if isinstance(segmentation["counts"], list):
# uncompressed RLE
im_h, im_w = self.size_mapping[imgId]
rle_mask = maskUtils.frPyObjects(segmentation, im_h, im_w)
else:
# compressed RLE
rle_mask = segmentation
else:
rle_mask = self._generate_rlemask_on_image(None, imgId, g)
else:
rle_mask = self._generate_rlemask_on_image(None, imgId, g)
gtmasks.append(rle_mask)
dtmasks = []
for d in dt:
mask = self._extract_mask(d)
mask = np.require(np.asarray(mask > 0), dtype=np.uint8, requirements=["F"])
rle_mask = self._generate_rlemask_on_image(mask, imgId, d)
dtmasks.append(rle_mask)
# compute iou between each dt and gt region
iscrowd = [int(o["iscrowd"]) for o in gt]
iousDP = maskUtils.iou(dtmasks, gtmasks, iscrowd)
return iousDP
def computeIoU(self, imgId, catId):
p = self.params
if p.useCats:
gt = self._gts[imgId, catId]
dt = self._dts[imgId, catId]
else:
gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
if len(gt) == 0 and len(dt) == 0:
return []
inds = np.argsort([-d["score"] for d in dt], kind="mergesort")
dt = [dt[i] for i in inds]
if len(dt) > p.maxDets[-1]:
dt = dt[0 : p.maxDets[-1]]
if p.iouType == "segm":
g = [g["segmentation"] for g in gt]
d = [d["segmentation"] for d in dt]
elif p.iouType == "bbox":
g = [g["bbox"] for g in gt]
d = [d["bbox"] for d in dt]
else:
raise Exception("unknown iouType for iou computation")
# compute iou between each dt and gt region
iscrowd = [int(o["iscrowd"]) for o in gt]
ious = maskUtils.iou(d, g, iscrowd)
return ious
def computeOks(self, imgId, catId):
p = self.params
# dimension here should be Nxm
gts = self._gts[imgId, catId]
dts = self._dts[imgId, catId]
inds = np.argsort([-d["score"] for d in dts], kind="mergesort")
dts = [dts[i] for i in inds]
if len(dts) > p.maxDets[-1]:
dts = dts[0 : p.maxDets[-1]]
# if len(gts) == 0 and len(dts) == 0:
if len(gts) == 0 or len(dts) == 0:
return []
ious = np.zeros((len(dts), len(gts)))
sigmas = (
np.array(
[
0.26,
0.25,
0.25,
0.35,
0.35,
0.79,
0.79,
0.72,
0.72,
0.62,
0.62,
1.07,
1.07,
0.87,
0.87,
0.89,
0.89,
]
)
/ 10.0
)
vars = (sigmas * 2) ** 2
k = len(sigmas)
# compute oks between each detection and ground truth object
for j, gt in enumerate(gts):
# create bounds for ignore regions(double the gt bbox)
g = np.array(gt["keypoints"])
xg = g[0::3]
yg = g[1::3]
vg = g[2::3]
k1 = np.count_nonzero(vg > 0)
bb = gt["bbox"]
x0 = bb[0] - bb[2]
x1 = bb[0] + bb[2] * 2
y0 = bb[1] - bb[3]
y1 = bb[1] + bb[3] * 2
for i, dt in enumerate(dts):
d = np.array(dt["keypoints"])
xd = d[0::3]
yd = d[1::3]
if k1 > 0:
# measure the per-keypoint distance if keypoints visible
dx = xd - xg
dy = yd - yg
else:
# measure minimum distance to keypoints in (x0,y0) & (x1,y1)
z = np.zeros(k)
dx = np.max((z, x0 - xd), axis=0) + np.max((z, xd - x1), axis=0)
dy = np.max((z, y0 - yd), axis=0) + np.max((z, yd - y1), axis=0)
e = (dx ** 2 + dy ** 2) / vars / (gt["area"] + np.spacing(1)) / 2
if k1 > 0:
e = e[vg > 0]
ious[i, j] = np.sum(np.exp(-e)) / e.shape[0]
return ious
def _extract_mask(self, dt: Dict[str, Any]) -> np.ndarray:
(densepose_shape, densepose_data_encoded), densepose_bbox_xywh = dt["densepose"]
densepose_data = DensePoseResult.decode_png_data(densepose_shape, densepose_data_encoded)
return densepose_data[0]
def _extract_iuv(
self, densepose_data: np.ndarray, py: np.ndarray, px: np.ndarray, gt: Dict[str, Any]
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Extract arrays of I, U and V values at given points as numpy arrays
given the data mode stored in self._dpDataMode
"""
if self._dpDataMode == DensePoseDataMode.IUV_DT:
# estimated labels and UV (default)
ipoints = densepose_data[0, py, px]
upoints = densepose_data[1, py, px] / 255.0 # convert from uint8 by /255.
vpoints = densepose_data[2, py, px] / 255.0
elif self._dpDataMode == DensePoseDataMode.IUV_GT:
# ground truth
ipoints = np.array(gt["dp_I"])
upoints = np.array(gt["dp_U"])
vpoints = np.array(gt["dp_V"])
elif self._dpDataMode == DensePoseDataMode.I_GT_UV_0:
# ground truth labels, UV = 0
ipoints = np.array(gt["dp_I"])
upoints = upoints * 0.0
vpoints = vpoints * 0.0
elif self._dpDataMode == DensePoseDataMode.I_GT_UV_DT:
# ground truth labels, estimated UV
ipoints = np.array(gt["dp_I"])
upoints = densepose_data[1, py, px] / 255.0 # convert from uint8 by /255.
vpoints = densepose_data[2, py, px] / 255.0
elif self._dpDataMode == DensePoseDataMode.I_DT_UV_0:
# estimated labels, UV = 0
ipoints = densepose_data[0, py, px]
upoints = upoints * 0.0
vpoints = vpoints * 0.0
else:
raise ValueError(f"Unknown data mode: {self._dpDataMode}")
return ipoints, upoints, vpoints
def computeOgps(self, imgId, catId):
p = self.params
# dimension here should be Nxm
g = self._gts[imgId, catId]
d = self._dts[imgId, catId]
inds = np.argsort([-d_["score"] for d_ in d], kind="mergesort")
d = [d[i] for i in inds]
if len(d) > p.maxDets[-1]:
d = d[0 : p.maxDets[-1]]
# if len(gts) == 0 and len(dts) == 0:
if len(g) == 0 or len(d) == 0:
return []
ious = np.zeros((len(d), len(g)))
# compute opgs between each detection and ground truth object
# sigma = self.sigma #0.255 # dist = 0.3m corresponds to ogps = 0.5
# 1 # dist = 0.3m corresponds to ogps = 0.96
# 1.45 # dist = 1.7m (person height) corresponds to ogps = 0.5)
for j, gt in enumerate(g):
if not gt["ignore"]:
g_ = gt["bbox"]
for i, dt in enumerate(d):
#
dy = int(dt["bbox"][3])
dx = int(dt["bbox"][2])
dp_x = np.array(gt["dp_x"]) * g_[2] / 255.0
dp_y = np.array(gt["dp_y"]) * g_[3] / 255.0
py = (dp_y + g_[1] - dt["bbox"][1]).astype(np.int)
px = (dp_x + g_[0] - dt["bbox"][0]).astype(np.int)
#
pts = np.zeros(len(px))
pts[px >= dx] = -1
pts[py >= dy] = -1
pts[px < 0] = -1
pts[py < 0] = -1
if len(pts) < 1:
ogps = 0.0
elif np.max(pts) == -1:
ogps = 0.0
else:
px[pts == -1] = 0
py[pts == -1] = 0
(densepose_shape, densepose_data_encoded), densepose_bbox_xywh = dt[
"densepose"
]
densepose_data = DensePoseResult.decode_png_data(
densepose_shape, densepose_data_encoded
)
assert densepose_data.shape[2] == dx, (
"DensePoseData width {} should be equal to "
"detection bounding box width {}".format(densepose_data.shape[2], dx)
)
assert densepose_data.shape[1] == dy, (
"DensePoseData height {} should be equal to "
"detection bounding box height {}".format(densepose_data.shape[1], dy)
)
ipoints, upoints, vpoints = self._extract_iuv(densepose_data, py, px, gt)
ipoints[pts == -1] = 0
# Find closest vertices in subsampled mesh.
cVerts, cVertsGT = self.findAllClosestVerts(gt, upoints, vpoints, ipoints)
# Get pairwise geodesic distances between gt and estimated mesh points.
dist = self.getDistances(cVertsGT, cVerts)
# Compute the Ogps measure.
# Find the mean geodesic normalization distance for
# each GT point, based on which part it is on.
Current_Mean_Distances = self.Mean_Distances[
self.CoarseParts[self.Part_ids[cVertsGT[cVertsGT > 0].astype(int) - 1]]
]
# Compute gps
ogps_values = np.exp(-(dist ** 2) / (2 * (Current_Mean_Distances ** 2)))
#
if len(dist) > 0:
ogps = np.sum(ogps_values) / len(dist)
ious[i, j] = ogps
gbb = [gt["bbox"] for gt in g]
dbb = [dt["bbox"] for dt in d]
# compute iou between each dt and gt region
iscrowd = [int(o["iscrowd"]) for o in g]
ious_bb = maskUtils.iou(dbb, gbb, iscrowd)
return ious, ious_bb
def evaluateImg(self, imgId, catId, aRng, maxDet):
"""
perform evaluation for single category and image
:return: dict (single image results)
"""
p = self.params
if p.useCats:
gt = self._gts[imgId, catId]
dt = self._dts[imgId, catId]
else:
gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
if len(gt) == 0 and len(dt) == 0:
return None
for g in gt:
# g['_ignore'] = g['ignore']
if g["ignore"] or (g["area"] < aRng[0] or g["area"] > aRng[1]):
g["_ignore"] = True
else:
g["_ignore"] = False
# sort dt highest score first, sort gt ignore last
gtind = np.argsort([g["_ignore"] for g in gt], kind="mergesort")
gt = [gt[i] for i in gtind]
dtind = np.argsort([-d["score"] for d in dt], kind="mergesort")
dt = [dt[i] for i in dtind[0:maxDet]]
iscrowd = [int(o["iscrowd"]) for o in gt]
# load computed ious
if p.iouType == "densepose":
# print('Checking the length', len(self.ious[imgId, catId]))
# if len(self.ious[imgId, catId]) == 0:
# print(self.ious[imgId, catId])
ious = (
self.ious[imgId, catId][0][:, gtind]
if len(self.ious[imgId, catId]) > 0
else self.ious[imgId, catId]
)
ioubs = (
self.ious[imgId, catId][1][:, gtind]
if len(self.ious[imgId, catId]) > 0
else self.ious[imgId, catId]
)
if self._dpEvalMode == DensePoseEvalMode.GPSM:
iousM = (
self.real_ious[imgId, catId][:, gtind]
if len(self.real_ious[imgId, catId]) > 0
else self.real_ious[imgId, catId]
)
else:
ious = (
self.ious[imgId, catId][:, gtind]
if len(self.ious[imgId, catId]) > 0
else self.ious[imgId, catId]
)
T = len(p.iouThrs)
G = len(gt)
D = len(dt)
gtm = np.zeros((T, G))
dtm = np.zeros((T, D))
gtIg = np.array([g["_ignore"] for g in gt])
dtIg = np.zeros((T, D))
if np.all(gtIg) and p.iouType == "densepose":
dtIg = np.logical_or(dtIg, True)
if len(ious) > 0: # and not p.iouType == 'densepose':
for tind, t in enumerate(p.iouThrs):
for dind, d in enumerate(dt):
# information about best match so far (m=-1 -> unmatched)
iou = min([t, 1 - 1e-10])
m = -1
for gind, _g in enumerate(gt):
# if this gt already matched, and not a crowd, continue
if gtm[tind, gind] > 0 and not iscrowd[gind]:
continue
# if dt matched to reg gt, and on ignore gt, stop
if m > -1 and gtIg[m] == 0 and gtIg[gind] == 1:
break
if p.iouType == "densepose":
if self._dpEvalMode == DensePoseEvalMode.GPSM:
new_iou = np.sqrt(iousM[dind, gind] * ious[dind, gind])
elif self._dpEvalMode == DensePoseEvalMode.IOU:
new_iou = iousM[dind, gind]
elif self._dpEvalMode == DensePoseEvalMode.GPS:
new_iou = ious[dind, gind]
else:
new_iou = ious[dind, gind]
if new_iou < iou:
continue
if new_iou == 0.0:
continue
# if match successful and best so far, store appropriately
iou = new_iou
m = gind
# if match made store id of match for both dt and gt
if m == -1:
continue
dtIg[tind, dind] = gtIg[m]
dtm[tind, dind] = gt[m]["id"]
gtm[tind, m] = d["id"]
if p.iouType == "densepose":
if not len(ioubs) == 0:
for dind, d in enumerate(dt):
# information about best match so far (m=-1 -> unmatched)
if dtm[tind, dind] == 0:
ioub = 0.8
m = -1
for gind, _g in enumerate(gt):
# if this gt already matched, and not a crowd, continue
if gtm[tind, gind] > 0 and not iscrowd[gind]:
continue
# continue to next gt unless better match made
if ioubs[dind, gind] < ioub:
continue
# if match successful and best so far, store appropriately
ioub = ioubs[dind, gind]
m = gind
# if match made store id of match for both dt and gt
if m > -1:
dtIg[:, dind] = gtIg[m]
if gtIg[m]:
dtm[tind, dind] = gt[m]["id"]
gtm[tind, m] = d["id"]
# set unmatched detections outside of area range to ignore
a = np.array([d["area"] < aRng[0] or d["area"] > aRng[1] for d in dt]).reshape((1, len(dt)))
dtIg = np.logical_or(dtIg, np.logical_and(dtm == 0, np.repeat(a, T, 0)))
# store results for given image and category
# print('Done with the function', len(self.ious[imgId, catId]))
return {
"image_id": imgId,
"category_id": catId,
"aRng": aRng,
"maxDet": maxDet,
"dtIds": [d["id"] for d in dt],
"gtIds": [g["id"] for g in gt],
"dtMatches": dtm,
"gtMatches": gtm,
"dtScores": [d["score"] for d in dt],
"gtIgnore": gtIg,
"dtIgnore": dtIg,
}
def accumulate(self, p=None):
"""
Accumulate per image evaluation results and store the result in self.eval
:param p: input params for evaluation
:return: None
"""
logger.info("Accumulating evaluation results...")
tic = time.time()
if not self.evalImgs:
logger.info("Please run evaluate() first")
# allows input customized parameters
if p is None:
p = self.params
p.catIds = p.catIds if p.useCats == 1 else [-1]
T = len(p.iouThrs)
R = len(p.recThrs)
K = len(p.catIds) if p.useCats else 1
A = len(p.areaRng)
M = len(p.maxDets)
precision = -(np.ones((T, R, K, A, M))) # -1 for the precision of absent categories
recall = -(np.ones((T, K, A, M)))
# create dictionary for future indexing
logger.info("Categories: {}".format(p.catIds))
_pe = self._paramsEval
catIds = _pe.catIds if _pe.useCats else [-1]
setK = set(catIds)
setA = set(map(tuple, _pe.areaRng))
setM = set(_pe.maxDets)
setI = set(_pe.imgIds)
# get inds to evaluate
k_list = [n for n, k in enumerate(p.catIds) if k in setK]
m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
i_list = [n for n, i in enumerate(p.imgIds) if i in setI]
I0 = len(_pe.imgIds)
A0 = len(_pe.areaRng)
# retrieve E at each category, area range, and max number of detections
for k, k0 in enumerate(k_list):
Nk = k0 * A0 * I0
for a, a0 in enumerate(a_list):
Na = a0 * I0
for m, maxDet in enumerate(m_list):
E = [self.evalImgs[Nk + Na + i] for i in i_list]
E = [e for e in E if e is not None]
if len(E) == 0:
continue
dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E])
# different sorting method generates slightly different results.
# mergesort is used to be consistent as Matlab implementation.
inds = np.argsort(-dtScores, kind="mergesort")
dtm = np.concatenate([e["dtMatches"][:, 0:maxDet] for e in E], axis=1)[:, inds]
dtIg = np.concatenate([e["dtIgnore"][:, 0:maxDet] for e in E], axis=1)[:, inds]
gtIg = np.concatenate([e["gtIgnore"] for e in E])
npig = np.count_nonzero(gtIg == 0)
if npig == 0:
continue
tps = np.logical_and(dtm, np.logical_not(dtIg))
fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg))
tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
tp = np.array(tp)
fp = np.array(fp)
nd = len(tp)
rc = tp / npig
pr = tp / (fp + tp + np.spacing(1))
q = np.zeros((R,))
if nd:
recall[t, k, a, m] = rc[-1]
else:
recall[t, k, a, m] = 0
# numpy is slow without cython optimization for accessing elements
# use python array gets significant speed improvement
pr = pr.tolist()
q = q.tolist()
for i in range(nd - 1, 0, -1):
if pr[i] > pr[i - 1]:
pr[i - 1] = pr[i]
inds = np.searchsorted(rc, p.recThrs, side="left")
try:
for ri, pi in enumerate(inds):
q[ri] = pr[pi]
except Exception:
pass
precision[t, :, k, a, m] = np.array(q)
logger.info(
"Final: max precision {}, min precision {}".format(np.max(precision), np.min(precision))
)
self.eval = {
"params": p,
"counts": [T, R, K, A, M],
"date": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"precision": precision,
"recall": recall,
}
toc = time.time()
logger.info("DONE (t={:0.2f}s).".format(toc - tic))
def summarize(self):
"""
Compute and display summary metrics for evaluation results.
Note this function can *only* be applied on the default parameter setting
"""
def _summarize(ap=1, iouThr=None, areaRng="all", maxDets=100):
p = self.params
iStr = " {:<18} {} @[ {}={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}"
titleStr = "Average Precision" if ap == 1 else "Average Recall"
typeStr = "(AP)" if ap == 1 else "(AR)"
measure = "IoU"
if self.params.iouType == "keypoints":
measure = "OKS"
elif self.params.iouType == "densepose":
measure = "OGPS"
iouStr = (
"{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1])
if iouThr is None
else "{:0.2f}".format(iouThr)
)
aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
if ap == 1:
# dimension of precision: [TxRxKxAxM]
s = self.eval["precision"]
# IoU
if iouThr is not None:
t = np.where(np.abs(iouThr - p.iouThrs) < 0.001)[0]
s = s[t]
s = s[:, :, :, aind, mind]
else:
# dimension of recall: [TxKxAxM]
s = self.eval["recall"]
if iouThr is not None:
t = np.where(iouThr == p.iouThrs)[0]
s = s[t]
s = s[:, :, aind, mind]
if len(s[s > -1]) == 0:
mean_s = -1
else:
mean_s = np.mean(s[s > -1])
logger.info(iStr.format(titleStr, typeStr, measure, iouStr, areaRng, maxDets, mean_s))
return mean_s
def _summarizeDets():
stats = np.zeros((12,))
stats[0] = _summarize(1)
stats[1] = _summarize(1, iouThr=0.5, maxDets=self.params.maxDets[2])
stats[2] = _summarize(1, iouThr=0.75, maxDets=self.params.maxDets[2])
stats[3] = _summarize(1, areaRng="small", maxDets=self.params.maxDets[2])
stats[4] = _summarize(1, areaRng="medium", maxDets=self.params.maxDets[2])
stats[5] = _summarize(1, areaRng="large", maxDets=self.params.maxDets[2])
stats[6] = _summarize(0, maxDets=self.params.maxDets[0])
stats[7] = _summarize(0, maxDets=self.params.maxDets[1])
stats[8] = _summarize(0, maxDets=self.params.maxDets[2])
stats[9] = _summarize(0, areaRng="small", maxDets=self.params.maxDets[2])
stats[10] = _summarize(0, areaRng="medium", maxDets=self.params.maxDets[2])
stats[11] = _summarize(0, areaRng="large", maxDets=self.params.maxDets[2])
return stats
def _summarizeKps():
stats = np.zeros((10,))
stats[0] = _summarize(1, maxDets=20)
stats[1] = _summarize(1, maxDets=20, iouThr=0.5)
stats[2] = _summarize(1, maxDets=20, iouThr=0.75)
stats[3] = _summarize(1, maxDets=20, areaRng="medium")
stats[4] = _summarize(1, maxDets=20, areaRng="large")
stats[5] = _summarize(0, maxDets=20)
stats[6] = _summarize(0, maxDets=20, iouThr=0.5)
stats[7] = _summarize(0, maxDets=20, iouThr=0.75)
stats[8] = _summarize(0, maxDets=20, areaRng="medium")
stats[9] = _summarize(0, maxDets=20, areaRng="large")
return stats
def _summarizeUvs():
stats = np.zeros((10,))
stats[0] = _summarize(1, maxDets=self.params.maxDets[0])
stats[1] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.5)
stats[2] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.75)
stats[3] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="medium")
stats[4] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="large")
stats[5] = _summarize(0, maxDets=self.params.maxDets[0])
stats[6] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.5)
stats[7] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.75)
stats[8] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="medium")
stats[9] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="large")
return stats
def _summarizeUvsOld():
stats = np.zeros((18,))
stats[0] = _summarize(1, maxDets=self.params.maxDets[0])
stats[1] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.5)
stats[2] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.55)
stats[3] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.60)
stats[4] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.65)
stats[5] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.70)
stats[6] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.75)
stats[7] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.80)
stats[8] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.85)
stats[9] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.90)
stats[10] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.95)
stats[11] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="medium")
stats[12] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="large")
stats[13] = _summarize(0, maxDets=self.params.maxDets[0])
stats[14] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.5)
stats[15] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.75)
stats[16] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="medium")
stats[17] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="large")
return stats
if not self.eval:
raise Exception("Please run accumulate() first")
iouType = self.params.iouType
if iouType in ["segm", "bbox"]:
summarize = _summarizeDets
elif iouType in ["keypoints"]:
summarize = _summarizeKps
elif iouType in ["densepose"]:
summarize = _summarizeUvs
self.stats = summarize()
def __str__(self):
self.summarize()
# ================ functions for dense pose ==============================
def findAllClosestVerts(self, gt, U_points, V_points, Index_points):
#
I_gt = np.array(gt["dp_I"])
U_gt = np.array(gt["dp_U"])
V_gt = np.array(gt["dp_V"])
#
# print(I_gt)
#
ClosestVerts = np.ones(Index_points.shape) * -1
for i in np.arange(24):
#
if sum(Index_points == (i + 1)) > 0:
UVs = np.array(
[U_points[Index_points == (i + 1)], V_points[Index_points == (i + 1)]]
)
Current_Part_UVs = self.Part_UVs[i]
Current_Part_ClosestVertInds = self.Part_ClosestVertInds[i]
D = ssd.cdist(Current_Part_UVs.transpose(), UVs.transpose()).squeeze()
ClosestVerts[Index_points == (i + 1)] = Current_Part_ClosestVertInds[
np.argmin(D, axis=0)
]
#
ClosestVertsGT = np.ones(Index_points.shape) * -1
for i in np.arange(24):
if sum(I_gt == (i + 1)) > 0:
UVs = np.array([U_gt[I_gt == (i + 1)], V_gt[I_gt == (i + 1)]])
Current_Part_UVs = self.Part_UVs[i]
Current_Part_ClosestVertInds = self.Part_ClosestVertInds[i]
D = ssd.cdist(Current_Part_UVs.transpose(), UVs.transpose()).squeeze()
ClosestVertsGT[I_gt == (i + 1)] = Current_Part_ClosestVertInds[np.argmin(D, axis=0)]
#
return ClosestVerts, ClosestVertsGT
def getDistances(self, cVertsGT, cVerts):
ClosestVertsTransformed = self.PDIST_transform[cVerts.astype(int) - 1]
ClosestVertsGTTransformed = self.PDIST_transform[cVertsGT.astype(int) - 1]
#
ClosestVertsTransformed[cVerts < 0] = 0
ClosestVertsGTTransformed[cVertsGT < 0] = 0
#
cVertsGT = ClosestVertsGTTransformed
cVerts = ClosestVertsTransformed
#
n = 27554
dists = []
for d in range(len(cVertsGT)):
if cVertsGT[d] > 0:
if cVerts[d] > 0:
i = cVertsGT[d] - 1
j = cVerts[d] - 1
if j == i:
dists.append(0)
elif j > i:
ccc = i
i = j
j = ccc
i = n - i - 1
j = n - j - 1
k = (n * (n - 1) / 2) - (n - i) * ((n - i) - 1) / 2 + j - i - 1
k = (n * n - n) / 2 - k - 1
dists.append(self.Pdist_matrix[int(k)][0])
else:
i = n - i - 1
j = n - j - 1
k = (n * (n - 1) / 2) - (n - i) * ((n - i) - 1) / 2 + j - i - 1
k = (n * n - n) / 2 - k - 1
dists.append(self.Pdist_matrix[int(k)][0])
else:
dists.append(np.inf)
return np.atleast_1d(np.array(dists).squeeze())
class Params:
"""
Params for coco evaluation api
"""
def setDetParams(self):
self.imgIds = []
self.catIds = []
# np.arange causes trouble. the data point on arange is slightly larger than the true value
self.iouThrs = np.linspace(0.5, 0.95, np.round((0.95 - 0.5) / 0.05) + 1, endpoint=True)
self.recThrs = np.linspace(0.0, 1.00, np.round((1.00 - 0.0) / 0.01) + 1, endpoint=True)
self.maxDets = [1, 10, 100]
self.areaRng = [
[0 ** 2, 1e5 ** 2],
[0 ** 2, 32 ** 2],
[32 ** 2, 96 ** 2],
[96 ** 2, 1e5 ** 2],
]
self.areaRngLbl = ["all", "small", "medium", "large"]
self.useCats = 1
def setKpParams(self):
self.imgIds = []
self.catIds = []
# np.arange causes trouble. the data point on arange is slightly larger than the true value
self.iouThrs = np.linspace(0.5, 0.95, np.round((0.95 - 0.5) / 0.05) + 1, endpoint=True)
self.recThrs = np.linspace(0.0, 1.00, np.round((1.00 - 0.0) / 0.01) + 1, endpoint=True)
self.maxDets = [20]
self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
self.areaRngLbl = ["all", "medium", "large"]
self.useCats = 1
def setUvParams(self):
self.imgIds = []
self.catIds = []
self.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True)
self.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True)
self.maxDets = [20]
self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
self.areaRngLbl = ["all", "medium", "large"]
self.useCats = 1
def __init__(self, iouType="segm"):
if iouType == "segm" or iouType == "bbox":
self.setDetParams()
elif iouType == "keypoints":
self.setKpParams()
elif iouType == "densepose":
self.setUvParams()
else:
raise Exception("iouType not supported")
self.iouType = iouType
# useSegm is deprecated
self.useSegm = None
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import math
from dataclasses import dataclass
from enum import Enum
import fvcore.nn.weight_init as weight_init
import torch
from torch import nn
from torch.nn import functional as F
from detectron2.config import CfgNode
from detectron2.layers import Conv2d, ConvTranspose2d, interpolate
from detectron2.structures.boxes import matched_boxlist_iou
from detectron2.utils.registry import Registry
from .data.structures import DensePoseOutput
ROI_DENSEPOSE_HEAD_REGISTRY = Registry("ROI_DENSEPOSE_HEAD")
class DensePoseUVConfidenceType(Enum):
"""
Statistical model type for confidence learning, possible values:
- "iid_iso": statistically independent identically distributed residuals
with anisotropic covariance
- "indep_aniso": statistically independent residuals with anisotropic
covariances
For details, see:
N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
"""
# fmt: off
IID_ISO = "iid_iso"
INDEP_ANISO = "indep_aniso"
# fmt: on
@dataclass
class DensePoseUVConfidenceConfig:
"""
Configuration options for confidence on UV data
"""
enabled: bool = False
# lower bound on UV confidences
epsilon: float = 0.01
type: DensePoseUVConfidenceType = DensePoseUVConfidenceType.IID_ISO
@dataclass
class DensePoseConfidenceModelConfig:
"""
Configuration options for confidence models
"""
# confidence for U and V values
uv_confidence: DensePoseUVConfidenceConfig
@staticmethod
def from_cfg(cfg: CfgNode) -> "DensePoseConfidenceModelConfig":
return DensePoseConfidenceModelConfig(
uv_confidence=DensePoseUVConfidenceConfig(
enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.ENABLED,
epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON,
type=DensePoseUVConfidenceType(cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE),
)
)
def initialize_module_params(module):
for name, param in module.named_parameters():
if "bias" in name:
nn.init.constant_(param, 0)
elif "weight" in name:
nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
@ROI_DENSEPOSE_HEAD_REGISTRY.register()
class DensePoseDeepLabHead(nn.Module):
def __init__(self, cfg, input_channels):
super(DensePoseDeepLabHead, self).__init__()
# fmt: off
hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM
self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
self.use_nonlocal = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON
# fmt: on
pad_size = kernel_size // 2
n_channels = input_channels
self.ASPP = ASPP(input_channels, [6, 12, 56], n_channels) # 6, 12, 56
self.add_module("ASPP", self.ASPP)
if self.use_nonlocal:
self.NLBlock = NONLocalBlock2D(input_channels, bn_layer=True)
self.add_module("NLBlock", self.NLBlock)
# weight_init.c2_msra_fill(self.ASPP)
for i in range(self.n_stacked_convs):
norm_module = nn.GroupNorm(32, hidden_dim) if norm == "GN" else None
layer = Conv2d(
n_channels,
hidden_dim,
kernel_size,
stride=1,
padding=pad_size,
bias=not norm,
norm=norm_module,
)
weight_init.c2_msra_fill(layer)
n_channels = hidden_dim
layer_name = self._get_layer_name(i)
self.add_module(layer_name, layer)
self.n_out_channels = hidden_dim
# initialize_module_params(self)
def forward(self, features):
x0 = features
x = self.ASPP(x0)
if self.use_nonlocal:
x = self.NLBlock(x)
output = x
for i in range(self.n_stacked_convs):
layer_name = self._get_layer_name(i)
x = getattr(self, layer_name)(x)
x = F.relu(x)
output = x
return output
def _get_layer_name(self, i):
layer_name = "body_conv_fcn{}".format(i + 1)
return layer_name
# Copied from
# https://github.com/pytorch/vision/blob/master/torchvision/models/segmentation/deeplabv3.py
# See https://arxiv.org/pdf/1706.05587.pdf for details
class ASPPConv(nn.Sequential):
def __init__(self, in_channels, out_channels, dilation):
modules = [
nn.Conv2d(
in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False
),
nn.GroupNorm(32, out_channels),
nn.ReLU(),
]
super(ASPPConv, self).__init__(*modules)
class ASPPPooling(nn.Sequential):
def __init__(self, in_channels, out_channels):
super(ASPPPooling, self).__init__(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(in_channels, out_channels, 1, bias=False),
nn.GroupNorm(32, out_channels),
nn.ReLU(),
)
def forward(self, x):
size = x.shape[-2:]
x = super(ASPPPooling, self).forward(x)
return F.interpolate(x, size=size, mode="bilinear", align_corners=False)
class ASPP(nn.Module):
def __init__(self, in_channels, atrous_rates, out_channels):
super(ASPP, self).__init__()
modules = []
modules.append(
nn.Sequential(
nn.Conv2d(in_channels, out_channels, 1, bias=False),
nn.GroupNorm(32, out_channels),
nn.ReLU(),
)
)
rate1, rate2, rate3 = tuple(atrous_rates)
modules.append(ASPPConv(in_channels, out_channels, rate1))
modules.append(ASPPConv(in_channels, out_channels, rate2))
modules.append(ASPPConv(in_channels, out_channels, rate3))
modules.append(ASPPPooling(in_channels, out_channels))
self.convs = nn.ModuleList(modules)
self.project = nn.Sequential(
nn.Conv2d(5 * out_channels, out_channels, 1, bias=False),
# nn.BatchNorm2d(out_channels),
nn.ReLU()
# nn.Dropout(0.5)
)
def forward(self, x):
res = []
for conv in self.convs:
res.append(conv(x))
res = torch.cat(res, dim=1)
return self.project(res)
# copied from
# https://github.com/AlexHex7/Non-local_pytorch/blob/master/lib/non_local_embedded_gaussian.py
# See https://arxiv.org/abs/1711.07971 for details
class _NonLocalBlockND(nn.Module):
def __init__(
self, in_channels, inter_channels=None, dimension=3, sub_sample=True, bn_layer=True
):
super(_NonLocalBlockND, self).__init__()
assert dimension in [1, 2, 3]
self.dimension = dimension
self.sub_sample = sub_sample
self.in_channels = in_channels
self.inter_channels = inter_channels
if self.inter_channels is None:
self.inter_channels = in_channels // 2
if self.inter_channels == 0:
self.inter_channels = 1
if dimension == 3:
conv_nd = nn.Conv3d
max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2))
bn = nn.GroupNorm # (32, hidden_dim) #nn.BatchNorm3d
elif dimension == 2:
conv_nd = nn.Conv2d
max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2))
bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm2d
else:
conv_nd = nn.Conv1d
max_pool_layer = nn.MaxPool1d(kernel_size=2)
bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm1d
self.g = conv_nd(
in_channels=self.in_channels,
out_channels=self.inter_channels,
kernel_size=1,
stride=1,
padding=0,
)
if bn_layer:
self.W = nn.Sequential(
conv_nd(
in_channels=self.inter_channels,
out_channels=self.in_channels,
kernel_size=1,
stride=1,
padding=0,
),
bn(32, self.in_channels),
)
nn.init.constant_(self.W[1].weight, 0)
nn.init.constant_(self.W[1].bias, 0)
else:
self.W = conv_nd(
in_channels=self.inter_channels,
out_channels=self.in_channels,
kernel_size=1,
stride=1,
padding=0,
)
nn.init.constant_(self.W.weight, 0)
nn.init.constant_(self.W.bias, 0)
self.theta = conv_nd(
in_channels=self.in_channels,
out_channels=self.inter_channels,
kernel_size=1,
stride=1,
padding=0,
)
self.phi = conv_nd(
in_channels=self.in_channels,
out_channels=self.inter_channels,
kernel_size=1,
stride=1,
padding=0,
)
if sub_sample:
self.g = nn.Sequential(self.g, max_pool_layer)
self.phi = nn.Sequential(self.phi, max_pool_layer)
def forward(self, x):
"""
:param x: (b, c, t, h, w)
:return:
"""
batch_size = x.size(0)
g_x = self.g(x).view(batch_size, self.inter_channels, -1)
g_x = g_x.permute(0, 2, 1)
theta_x = self.theta(x).view(batch_size, self.inter_channels, -1)
theta_x = theta_x.permute(0, 2, 1)
phi_x = self.phi(x).view(batch_size, self.inter_channels, -1)
f = torch.matmul(theta_x, phi_x)
f_div_C = F.softmax(f, dim=-1)
y = torch.matmul(f_div_C, g_x)
y = y.permute(0, 2, 1).contiguous()
y = y.view(batch_size, self.inter_channels, *x.size()[2:])
W_y = self.W(y)
z = W_y + x
return z
class NONLocalBlock2D(_NonLocalBlockND):
def __init__(self, in_channels, inter_channels=None, sub_sample=True, bn_layer=True):
super(NONLocalBlock2D, self).__init__(
in_channels,
inter_channels=inter_channels,
dimension=2,
sub_sample=sub_sample,
bn_layer=bn_layer,
)
@ROI_DENSEPOSE_HEAD_REGISTRY.register()
class DensePoseV1ConvXHead(nn.Module):
def __init__(self, cfg, input_channels):
super(DensePoseV1ConvXHead, self).__init__()
# fmt: off
hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
# fmt: on
pad_size = kernel_size // 2
n_channels = input_channels
for i in range(self.n_stacked_convs):
layer = Conv2d(n_channels, hidden_dim, kernel_size, stride=1, padding=pad_size)
layer_name = self._get_layer_name(i)
self.add_module(layer_name, layer)
n_channels = hidden_dim
self.n_out_channels = n_channels
initialize_module_params(self)
def forward(self, features):
x = features
output = x
for i in range(self.n_stacked_convs):
layer_name = self._get_layer_name(i)
x = getattr(self, layer_name)(x)
x = F.relu(x)
output = x
return output
def _get_layer_name(self, i):
layer_name = "body_conv_fcn{}".format(i + 1)
return layer_name
class DensePosePredictor(nn.Module):
def __init__(self, cfg, input_channels):
super(DensePosePredictor, self).__init__()
dim_in = input_channels
n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
self.ann_index_lowres = ConvTranspose2d(
dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
)
self.index_uv_lowres = ConvTranspose2d(
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
)
self.u_lowres = ConvTranspose2d(
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
)
self.v_lowres = ConvTranspose2d(
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
)
self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE
self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
self._initialize_confidence_estimation_layers(cfg, self.confidence_model_cfg, dim_in)
initialize_module_params(self)
def forward(self, head_outputs):
ann_index_lowres = self.ann_index_lowres(head_outputs)
index_uv_lowres = self.index_uv_lowres(head_outputs)
u_lowres = self.u_lowres(head_outputs)
v_lowres = self.v_lowres(head_outputs)
def interp2d(input):
return interpolate(
input, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
)
ann_index = interp2d(ann_index_lowres)
index_uv = interp2d(index_uv_lowres)
u = interp2d(u_lowres)
v = interp2d(v_lowres)
(
(sigma_1, sigma_2, kappa_u, kappa_v),
(sigma_1_lowres, sigma_2_lowres, kappa_u_lowres, kappa_v_lowres),
(ann_index, index_uv),
) = self._forward_confidence_estimation_layers(
self.confidence_model_cfg, head_outputs, interp2d, ann_index, index_uv
)
return (
(ann_index, index_uv, u, v),
(ann_index_lowres, index_uv_lowres, u_lowres, v_lowres),
(sigma_1, sigma_2, kappa_u, kappa_v),
(sigma_1_lowres, sigma_2_lowres, kappa_u_lowres, kappa_v_lowres),
)
def _initialize_confidence_estimation_layers(
self, cfg: CfgNode, confidence_model_cfg: DensePoseConfidenceModelConfig, dim_in: int
):
dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
if confidence_model_cfg.uv_confidence.enabled:
if confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
self.sigma_2_lowres = ConvTranspose2d(
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
)
elif confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.INDEP_ANISO:
self.sigma_2_lowres = ConvTranspose2d(
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
)
self.kappa_u_lowres = ConvTranspose2d(
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
)
self.kappa_v_lowres = ConvTranspose2d(
dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
)
else:
raise ValueError(
f"Unknown confidence model type: {confidence_model_cfg.confidence_model_type}"
)
def _forward_confidence_estimation_layers(
self, confidence_model_cfg, head_outputs, interp2d, ann_index, index_uv
):
sigma_1, sigma_2, kappa_u, kappa_v = None, None, None, None
sigma_1_lowres, sigma_2_lowres, kappa_u_lowres, kappa_v_lowres = None, None, None, None
if confidence_model_cfg.uv_confidence.enabled:
if confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
sigma_2_lowres = self.sigma_2_lowres(head_outputs)
sigma_2 = interp2d(sigma_2_lowres)
elif confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.INDEP_ANISO:
sigma_2_lowres = self.sigma_2_lowres(head_outputs)
kappa_u_lowres = self.kappa_u_lowres(head_outputs)
kappa_v_lowres = self.kappa_v_lowres(head_outputs)
sigma_2 = interp2d(sigma_2_lowres)
kappa_u = interp2d(kappa_u_lowres)
kappa_v = interp2d(kappa_v_lowres)
else:
raise ValueError(
f"Unknown confidence model type: {confidence_model_cfg.confidence_model_type}"
)
return (
(sigma_1, sigma_2, kappa_u, kappa_v),
(sigma_1_lowres, sigma_2_lowres, kappa_u_lowres, kappa_v_lowres),
(ann_index, index_uv),
)
class DensePoseDataFilter(object):
def __init__(self, cfg):
self.iou_threshold = cfg.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD
@torch.no_grad()
def __call__(self, proposals_with_targets):
"""
Filters proposals with targets to keep only the ones relevant for
DensePose training
proposals: list(Instances), each element of the list corresponds to
various instances (proposals, GT for boxes and densepose) for one
image
"""
proposals_filtered = []
for proposals_per_image in proposals_with_targets:
if not hasattr(proposals_per_image, "gt_densepose"):
continue
assert hasattr(proposals_per_image, "gt_boxes")
assert hasattr(proposals_per_image, "proposal_boxes")
gt_boxes = proposals_per_image.gt_boxes
est_boxes = proposals_per_image.proposal_boxes
# apply match threshold for densepose head
iou = matched_boxlist_iou(gt_boxes, est_boxes)
iou_select = iou > self.iou_threshold
proposals_per_image = proposals_per_image[iou_select]
assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes)
# filter out any target without densepose annotation
gt_densepose = proposals_per_image.gt_densepose
assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.gt_densepose)
selected_indices = [
i for i, dp_target in enumerate(gt_densepose) if dp_target is not None
]
if len(selected_indices) != len(gt_densepose):
proposals_per_image = proposals_per_image[selected_indices]
assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes)
assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.gt_densepose)
proposals_filtered.append(proposals_per_image)
return proposals_filtered
def build_densepose_head(cfg, input_channels):
head_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.NAME
return ROI_DENSEPOSE_HEAD_REGISTRY.get(head_name)(cfg, input_channels)
def build_densepose_predictor(cfg, input_channels):
predictor = DensePosePredictor(cfg, input_channels)
return predictor
def build_densepose_data_filter(cfg):
dp_filter = DensePoseDataFilter(cfg)
return dp_filter
def densepose_inference(densepose_outputs, densepose_confidences, detections):
"""
Infer dense pose estimate based on outputs from the DensePose head
and detections. The estimate for each detection instance is stored in its
"pred_densepose" attribute.
Args:
densepose_outputs (tuple(`torch.Tensor`)): iterable containing 4 elements:
- s (:obj: `torch.Tensor`): coarse segmentation tensor of size (N, A, H, W),
- i (:obj: `torch.Tensor`): fine segmentation tensor of size (N, C, H, W),
- u (:obj: `torch.Tensor`): U coordinates for each class of size (N, C, H, W),
- v (:obj: `torch.Tensor`): V coordinates for each class of size (N, C, H, W),
where N is the total number of detections in a batch,
A is the number of coarse segmentations labels
(e.g. 15 for coarse body parts + background),
C is the number of fine segmentation labels
(e.g. 25 for fine body parts + background),
W is the resolution along the X axis
H is the resolution along the Y axis
densepose_confidences (tuple(`torch.Tensor`)): iterable containing 4 elements:
- sigma_1 (:obj: `torch.Tensor`): global confidences for UV coordinates
of size (N, C, H, W)
- sigma_2 (:obj: `torch.Tensor`): individual confidences for UV coordinates
of size (N, C, H, W)
- kappa_u (:obj: `torch.Tensor`): first component of confidence direction
vector of size (N, C, H, W)
- kappa_v (:obj: `torch.Tensor`): second component of confidence direction
vector of size (N, C, H, W)
detections (list[Instances]): A list of N Instances, where N is the number of images
in the batch. Instances are modified by this method: "pred_densepose" attribute
is added to each instance, the attribute contains the corresponding
DensePoseOutput object.
"""
# DensePose outputs: segmentation, body part indices, U, V
s, index_uv, u, v = densepose_outputs
sigma_1, sigma_2, kappa_u, kappa_v = densepose_confidences
k = 0
for detection in detections:
n_i = len(detection)
s_i = s[k : k + n_i]
index_uv_i = index_uv[k : k + n_i]
u_i = u[k : k + n_i]
v_i = v[k : k + n_i]
_local_vars = locals()
confidences = {
name: _local_vars[name]
for name in ("sigma_1", "sigma_2", "kappa_u", "kappa_v")
if _local_vars.get(name) is not None
}
densepose_output_i = DensePoseOutput(s_i, index_uv_i, u_i, v_i, confidences)
detection.pred_densepose = densepose_output_i
k += n_i
def _linear_interpolation_utilities(v_norm, v0_src, size_src, v0_dst, size_dst, size_z):
"""
Computes utility values for linear interpolation at points v.
The points are given as normalized offsets in the source interval
(v0_src, v0_src + size_src), more precisely:
v = v0_src + v_norm * size_src / 256.0
The computed utilities include lower points v_lo, upper points v_hi,
interpolation weights v_w and flags j_valid indicating whether the
points falls into the destination interval (v0_dst, v0_dst + size_dst).
Args:
v_norm (:obj: `torch.Tensor`): tensor of size N containing
normalized point offsets
v0_src (:obj: `torch.Tensor`): tensor of size N containing
left bounds of source intervals for normalized points
size_src (:obj: `torch.Tensor`): tensor of size N containing
source interval sizes for normalized points
v0_dst (:obj: `torch.Tensor`): tensor of size N containing
left bounds of destination intervals
size_dst (:obj: `torch.Tensor`): tensor of size N containing
destination interval sizes
size_z (int): interval size for data to be interpolated
Returns:
v_lo (:obj: `torch.Tensor`): int tensor of size N containing
indices of lower values used for interpolation, all values are
integers from [0, size_z - 1]
v_hi (:obj: `torch.Tensor`): int tensor of size N containing
indices of upper values used for interpolation, all values are
integers from [0, size_z - 1]
v_w (:obj: `torch.Tensor`): float tensor of size N containing
interpolation weights
j_valid (:obj: `torch.Tensor`): uint8 tensor of size N containing
0 for points outside the estimation interval
(v0_est, v0_est + size_est) and 1 otherwise
"""
v = v0_src + v_norm * size_src / 256.0
j_valid = (v - v0_dst >= 0) * (v - v0_dst < size_dst)
v_grid = (v - v0_dst) * size_z / size_dst
v_lo = v_grid.floor().long().clamp(min=0, max=size_z - 1)
v_hi = (v_lo + 1).clamp(max=size_z - 1)
v_grid = torch.min(v_hi.float(), v_grid)
v_w = v_grid - v_lo.float()
return v_lo, v_hi, v_w, j_valid
def _grid_sampling_utilities(
zh, zw, bbox_xywh_est, bbox_xywh_gt, index_gt, x_norm, y_norm, index_bbox
):
"""
Prepare tensors used in grid sampling.
Args:
z_est (:obj: `torch.Tensor`): tensor of size (N,C,H,W) with estimated
values of Z to be extracted for the points X, Y and channel
indices I
bbox_xywh_est (:obj: `torch.Tensor`): tensor of size (N, 4) containing
estimated bounding boxes in format XYWH
bbox_xywh_gt (:obj: `torch.Tensor`): tensor of size (N, 4) containing
matched ground truth bounding boxes in format XYWH
index_gt (:obj: `torch.Tensor`): tensor of size K with point labels for
ground truth points
x_norm (:obj: `torch.Tensor`): tensor of size K with X normalized
coordinates of ground truth points. Image X coordinates can be
obtained as X = Xbbox + x_norm * Wbbox / 255
y_norm (:obj: `torch.Tensor`): tensor of size K with Y normalized
coordinates of ground truth points. Image Y coordinates can be
obtained as Y = Ybbox + y_norm * Hbbox / 255
index_bbox (:obj: `torch.Tensor`): tensor of size K with bounding box
indices for each ground truth point. The values are thus in
[0, N-1]
Returns:
j_valid (:obj: `torch.Tensor`): uint8 tensor of size M containing
0 for points to be discarded and 1 for points to be selected
y_lo (:obj: `torch.Tensor`): int tensor of indices of upper values
in z_est for each point
y_hi (:obj: `torch.Tensor`): int tensor of indices of lower values
in z_est for each point
x_lo (:obj: `torch.Tensor`): int tensor of indices of left values
in z_est for each point
x_hi (:obj: `torch.Tensor`): int tensor of indices of right values
in z_est for each point
w_ylo_xlo (:obj: `torch.Tensor`): float tensor of size M;
contains upper-left value weight for each point
w_ylo_xhi (:obj: `torch.Tensor`): float tensor of size M;
contains upper-right value weight for each point
w_yhi_xlo (:obj: `torch.Tensor`): float tensor of size M;
contains lower-left value weight for each point
w_yhi_xhi (:obj: `torch.Tensor`): float tensor of size M;
contains lower-right value weight for each point
"""
x0_gt, y0_gt, w_gt, h_gt = bbox_xywh_gt[index_bbox].unbind(dim=1)
x0_est, y0_est, w_est, h_est = bbox_xywh_est[index_bbox].unbind(dim=1)
x_lo, x_hi, x_w, jx_valid = _linear_interpolation_utilities(
x_norm, x0_gt, w_gt, x0_est, w_est, zw
)
y_lo, y_hi, y_w, jy_valid = _linear_interpolation_utilities(
y_norm, y0_gt, h_gt, y0_est, h_est, zh
)
j_valid = jx_valid * jy_valid
w_ylo_xlo = (1.0 - x_w) * (1.0 - y_w)
w_ylo_xhi = x_w * (1.0 - y_w)
w_yhi_xlo = (1.0 - x_w) * y_w
w_yhi_xhi = x_w * y_w
return j_valid, y_lo, y_hi, x_lo, x_hi, w_ylo_xlo, w_ylo_xhi, w_yhi_xlo, w_yhi_xhi
def _extract_at_points_packed(
z_est,
index_bbox_valid,
slice_index_uv,
y_lo,
y_hi,
x_lo,
x_hi,
w_ylo_xlo,
w_ylo_xhi,
w_yhi_xlo,
w_yhi_xhi,
):
"""
Extract ground truth values z_gt for valid point indices and estimated
values z_est using bilinear interpolation over top-left (y_lo, x_lo),
top-right (y_lo, x_hi), bottom-left (y_hi, x_lo) and bottom-right
(y_hi, x_hi) values in z_est with corresponding weights:
w_ylo_xlo, w_ylo_xhi, w_yhi_xlo and w_yhi_xhi.
Use slice_index_uv to slice dim=1 in z_est
"""
z_est_sampled = (
z_est[index_bbox_valid, slice_index_uv, y_lo, x_lo] * w_ylo_xlo
+ z_est[index_bbox_valid, slice_index_uv, y_lo, x_hi] * w_ylo_xhi
+ z_est[index_bbox_valid, slice_index_uv, y_hi, x_lo] * w_yhi_xlo
+ z_est[index_bbox_valid, slice_index_uv, y_hi, x_hi] * w_yhi_xhi
)
return z_est_sampled
def _resample_data(
z, bbox_xywh_src, bbox_xywh_dst, wout, hout, mode="nearest", padding_mode="zeros"
):
"""
Args:
z (:obj: `torch.Tensor`): tensor of size (N,C,H,W) with data to be
resampled
bbox_xywh_src (:obj: `torch.Tensor`): tensor of size (N,4) containing
source bounding boxes in format XYWH
bbox_xywh_dst (:obj: `torch.Tensor`): tensor of size (N,4) containing
destination bounding boxes in format XYWH
Return:
zresampled (:obj: `torch.Tensor`): tensor of size (N, C, Hout, Wout)
with resampled values of z, where D is the discretization size
"""
n = bbox_xywh_src.size(0)
assert n == bbox_xywh_dst.size(0), (
"The number of "
"source ROIs for resampling ({}) should be equal to the number "
"of destination ROIs ({})".format(bbox_xywh_src.size(0), bbox_xywh_dst.size(0))
)
x0src, y0src, wsrc, hsrc = bbox_xywh_src.unbind(dim=1)
x0dst, y0dst, wdst, hdst = bbox_xywh_dst.unbind(dim=1)
x0dst_norm = 2 * (x0dst - x0src) / wsrc - 1
y0dst_norm = 2 * (y0dst - y0src) / hsrc - 1
x1dst_norm = 2 * (x0dst + wdst - x0src) / wsrc - 1
y1dst_norm = 2 * (y0dst + hdst - y0src) / hsrc - 1
grid_w = torch.arange(wout, device=z.device, dtype=torch.float) / wout
grid_h = torch.arange(hout, device=z.device, dtype=torch.float) / hout
grid_w_expanded = grid_w[None, None, :].expand(n, hout, wout)
grid_h_expanded = grid_h[None, :, None].expand(n, hout, wout)
dx_expanded = (x1dst_norm - x0dst_norm)[:, None, None].expand(n, hout, wout)
dy_expanded = (y1dst_norm - y0dst_norm)[:, None, None].expand(n, hout, wout)
x0_expanded = x0dst_norm[:, None, None].expand(n, hout, wout)
y0_expanded = y0dst_norm[:, None, None].expand(n, hout, wout)
grid_x = grid_w_expanded * dx_expanded + x0_expanded
grid_y = grid_h_expanded * dy_expanded + y0_expanded
grid = torch.stack((grid_x, grid_y), dim=3)
# resample Z from (N, C, H, W) into (N, C, Hout, Wout)
zresampled = F.grid_sample(z, grid, mode=mode, padding_mode=padding_mode, align_corners=True)
return zresampled
def _extract_single_tensors_from_matches_one_image(
proposals_targets, bbox_with_dp_offset, bbox_global_offset
):
i_gt_all = []
x_norm_all = []
y_norm_all = []
u_gt_all = []
v_gt_all = []
s_gt_all = []
bbox_xywh_gt_all = []
bbox_xywh_est_all = []
# Ibbox_all == k should be true for all data that corresponds
# to bbox_xywh_gt[k] and bbox_xywh_est[k]
# index k here is global wrt images
i_bbox_all = []
# at offset k (k is global) contains index of bounding box data
# within densepose output tensor
i_with_dp = []
boxes_xywh_est = proposals_targets.proposal_boxes.clone()
boxes_xywh_gt = proposals_targets.gt_boxes.clone()
n_i = len(boxes_xywh_est)
assert n_i == len(boxes_xywh_gt)
if n_i:
boxes_xywh_est.tensor[:, 2] -= boxes_xywh_est.tensor[:, 0]
boxes_xywh_est.tensor[:, 3] -= boxes_xywh_est.tensor[:, 1]
boxes_xywh_gt.tensor[:, 2] -= boxes_xywh_gt.tensor[:, 0]
boxes_xywh_gt.tensor[:, 3] -= boxes_xywh_gt.tensor[:, 1]
if hasattr(proposals_targets, "gt_densepose"):
densepose_gt = proposals_targets.gt_densepose
for k, box_xywh_est, box_xywh_gt, dp_gt in zip(
range(n_i), boxes_xywh_est.tensor, boxes_xywh_gt.tensor, densepose_gt
):
if (dp_gt is not None) and (len(dp_gt.x) > 0):
i_gt_all.append(dp_gt.i)
x_norm_all.append(dp_gt.x)
y_norm_all.append(dp_gt.y)
u_gt_all.append(dp_gt.u)
v_gt_all.append(dp_gt.v)
s_gt_all.append(dp_gt.segm.unsqueeze(0))
bbox_xywh_gt_all.append(box_xywh_gt.view(-1, 4))
bbox_xywh_est_all.append(box_xywh_est.view(-1, 4))
i_bbox_k = torch.full_like(dp_gt.i, bbox_with_dp_offset + len(i_with_dp))
i_bbox_all.append(i_bbox_k)
i_with_dp.append(bbox_global_offset + k)
return (
i_gt_all,
x_norm_all,
y_norm_all,
u_gt_all,
v_gt_all,
s_gt_all,
bbox_xywh_gt_all,
bbox_xywh_est_all,
i_bbox_all,
i_with_dp,
)
def _extract_single_tensors_from_matches(proposals_with_targets):
i_img = []
i_gt_all = []
x_norm_all = []
y_norm_all = []
u_gt_all = []
v_gt_all = []
s_gt_all = []
bbox_xywh_gt_all = []
bbox_xywh_est_all = []
i_bbox_all = []
i_with_dp_all = []
n = 0
for i, proposals_targets_per_image in enumerate(proposals_with_targets):
n_i = proposals_targets_per_image.proposal_boxes.tensor.size(0)
if not n_i:
continue
(
i_gt_img,
x_norm_img,
y_norm_img,
u_gt_img,
v_gt_img,
s_gt_img,
bbox_xywh_gt_img,
bbox_xywh_est_img,
i_bbox_img,
i_with_dp_img,
) = _extract_single_tensors_from_matches_one_image( # noqa
proposals_targets_per_image, len(i_with_dp_all), n
)
i_gt_all.extend(i_gt_img)
x_norm_all.extend(x_norm_img)
y_norm_all.extend(y_norm_img)
u_gt_all.extend(u_gt_img)
v_gt_all.extend(v_gt_img)
s_gt_all.extend(s_gt_img)
bbox_xywh_gt_all.extend(bbox_xywh_gt_img)
bbox_xywh_est_all.extend(bbox_xywh_est_img)
i_bbox_all.extend(i_bbox_img)
i_with_dp_all.extend(i_with_dp_img)
i_img.extend([i] * len(i_with_dp_img))
n += n_i
# concatenate all data into a single tensor
if (n > 0) and (len(i_with_dp_all) > 0):
i_gt = torch.cat(i_gt_all, 0).long()
x_norm = torch.cat(x_norm_all, 0)
y_norm = torch.cat(y_norm_all, 0)
u_gt = torch.cat(u_gt_all, 0)
v_gt = torch.cat(v_gt_all, 0)
s_gt = torch.cat(s_gt_all, 0)
bbox_xywh_gt = torch.cat(bbox_xywh_gt_all, 0)
bbox_xywh_est = torch.cat(bbox_xywh_est_all, 0)
i_bbox = torch.cat(i_bbox_all, 0).long()
else:
i_gt = None
x_norm = None
y_norm = None
u_gt = None
v_gt = None
s_gt = None
bbox_xywh_gt = None
bbox_xywh_est = None
i_bbox = None
return (
i_img,
i_with_dp_all,
bbox_xywh_est,
bbox_xywh_gt,
i_gt,
x_norm,
y_norm,
u_gt,
v_gt,
s_gt,
i_bbox,
)
class IIDIsotropicGaussianUVLoss(nn.Module):
"""
Loss for the case of iid residuals with isotropic covariance:
$Sigma_i = sigma_i^2 I$
The loss (negative log likelihood) is then:
$1/2 sum_{i=1}^n (log(2 pi) + 2 log sigma_i^2 + ||delta_i||^2 / sigma_i^2)$,
where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates
difference between estimated and ground truth UV values
For details, see:
N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
"""
def __init__(self, sigma_lower_bound: float):
super(IIDIsotropicGaussianUVLoss, self).__init__()
self.sigma_lower_bound = sigma_lower_bound
self.log2pi = math.log(2 * math.pi)
def forward(
self,
u: torch.Tensor,
v: torch.Tensor,
sigma_u: torch.Tensor,
target_u: torch.Tensor,
target_v: torch.Tensor,
):
# compute $\sigma_i^2$
# use sigma_lower_bound to avoid degenerate solution for variance
# (sigma -> 0)
sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound
# compute \|delta_i\|^2
delta_t_delta = (u - target_u) ** 2 + (v - target_v) ** 2
# the total loss from the formula above:
loss = 0.5 * (self.log2pi + 2 * torch.log(sigma2) + delta_t_delta / sigma2)
return loss.sum()
class IndepAnisotropicGaussianUVLoss(nn.Module):
"""
Loss for the case of independent residuals with anisotropic covariances:
$Sigma_i = sigma_i^2 I + r_i r_i^T$
The loss (negative log likelihood) is then:
$1/2 sum_{i=1}^n (log(2 pi)
+ log sigma_i^2 (sigma_i^2 + ||r_i||^2)
+ ||delta_i||^2 / sigma_i^2
- <delta_i, r_i>^2 / (sigma_i^2 * (sigma_i^2 + ||r_i||^2)))$,
where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates
difference between estimated and ground truth UV values
For details, see:
N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
"""
def __init__(self, sigma_lower_bound: float):
super(IndepAnisotropicGaussianUVLoss, self).__init__()
self.sigma_lower_bound = sigma_lower_bound
self.log2pi = math.log(2 * math.pi)
def forward(
self,
u: torch.Tensor,
v: torch.Tensor,
sigma_u: torch.Tensor,
kappa_u_est: torch.Tensor,
kappa_v_est: torch.Tensor,
target_u: torch.Tensor,
target_v: torch.Tensor,
):
# compute $\sigma_i^2$
sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound
# compute \|r_i\|^2
r_sqnorm2 = kappa_u_est ** 2 + kappa_v_est ** 2
delta_u = u - target_u
delta_v = v - target_v
# compute \|delta_i\|^2
delta_sqnorm = delta_u ** 2 + delta_v ** 2
delta_u_r_u = delta_u * kappa_u_est
delta_v_r_v = delta_v * kappa_v_est
# compute the scalar product <delta_i, r_i>
delta_r = delta_u_r_u + delta_v_r_v
# compute squared scalar product <delta_i, r_i>^2
delta_r_sqnorm = delta_r ** 2
denom2 = sigma2 * (sigma2 + r_sqnorm2)
loss = 0.5 * (
self.log2pi + torch.log(denom2) + delta_sqnorm / sigma2 - delta_r_sqnorm / denom2
)
return loss.sum()
class DensePoseLosses(object):
def __init__(self, cfg):
# fmt: off
self.heatmap_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE
self.w_points = cfg.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS
self.w_part = cfg.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS
self.w_segm = cfg.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS
self.n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
# fmt: on
self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
self.uv_loss_with_confidences = IIDIsotropicGaussianUVLoss(
self.confidence_model_cfg.uv_confidence.epsilon
)
elif self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.INDEP_ANISO:
self.uv_loss_with_confidences = IndepAnisotropicGaussianUVLoss(
self.confidence_model_cfg.uv_confidence.epsilon
)
def __call__(self, proposals_with_gt, densepose_outputs, densepose_confidences):
losses = {}
# densepose outputs are computed for all images and all bounding boxes;
# i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively,
# the outputs will have size(0) == 3+1+2+1 == 7
s, index_uv, u, v = densepose_outputs
sigma_1, sigma_2, kappa_u, kappa_v = densepose_confidences
conf_type = self.confidence_model_cfg.uv_confidence.type
assert u.size(2) == v.size(2)
assert u.size(3) == v.size(3)
assert u.size(2) == index_uv.size(2)
assert u.size(3) == index_uv.size(3)
with torch.no_grad():
(
index_uv_img,
i_with_dp,
bbox_xywh_est,
bbox_xywh_gt,
index_gt_all,
x_norm,
y_norm,
u_gt_all,
v_gt_all,
s_gt,
index_bbox,
) = _extract_single_tensors_from_matches( # noqa
proposals_with_gt
)
n_batch = len(i_with_dp)
# NOTE: we need to keep the same computation graph on all the GPUs to
# perform reduction properly. Hence even if we have no data on one
# of the GPUs, we still need to generate the computation graph.
# Add fake (zero) loss in the form Tensor.sum() * 0
if not n_batch:
losses["loss_densepose_I"] = index_uv.sum() * 0
losses["loss_densepose_S"] = s.sum() * 0
if self.confidence_model_cfg.uv_confidence.enabled:
losses["loss_densepose_UV"] = (u.sum() + v.sum()) * 0
if conf_type == DensePoseUVConfidenceType.IID_ISO:
losses["loss_densepose_UV"] += sigma_2.sum() * 0
elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO:
losses["loss_densepose_UV"] += (
sigma_2.sum() + kappa_u.sum() + kappa_v.sum()
) * 0
else:
losses["loss_densepose_U"] = u.sum() * 0
losses["loss_densepose_V"] = v.sum() * 0
return losses
zh = u.size(2)
zw = u.size(3)
(
j_valid,
y_lo,
y_hi,
x_lo,
x_hi,
w_ylo_xlo,
w_ylo_xhi,
w_yhi_xlo,
w_yhi_xhi,
) = _grid_sampling_utilities( # noqa
zh, zw, bbox_xywh_est, bbox_xywh_gt, index_gt_all, x_norm, y_norm, index_bbox
)
j_valid_fg = j_valid * (index_gt_all > 0)
u_gt = u_gt_all[j_valid_fg]
u_est_all = _extract_at_points_packed(
u[i_with_dp],
index_bbox,
index_gt_all,
y_lo,
y_hi,
x_lo,
x_hi,
w_ylo_xlo,
w_ylo_xhi,
w_yhi_xlo,
w_yhi_xhi,
)
u_est = u_est_all[j_valid_fg]
v_gt = v_gt_all[j_valid_fg]
v_est_all = _extract_at_points_packed(
v[i_with_dp],
index_bbox,
index_gt_all,
y_lo,
y_hi,
x_lo,
x_hi,
w_ylo_xlo,
w_ylo_xhi,
w_yhi_xlo,
w_yhi_xhi,
)
v_est = v_est_all[j_valid_fg]
index_uv_gt = index_gt_all[j_valid]
index_uv_est_all = _extract_at_points_packed(
index_uv[i_with_dp],
index_bbox,
slice(None),
y_lo,
y_hi,
x_lo,
x_hi,
w_ylo_xlo[:, None],
w_ylo_xhi[:, None],
w_yhi_xlo[:, None],
w_yhi_xhi[:, None],
)
index_uv_est = index_uv_est_all[j_valid, :]
if self.confidence_model_cfg.uv_confidence.enabled:
sigma_2_est_all = _extract_at_points_packed(
sigma_2[i_with_dp],
index_bbox,
index_gt_all,
y_lo,
y_hi,
x_lo,
x_hi,
w_ylo_xlo,
w_ylo_xhi,
w_yhi_xlo,
w_yhi_xhi,
)
sigma_2_est = sigma_2_est_all[j_valid_fg]
if conf_type in [DensePoseUVConfidenceType.INDEP_ANISO]:
kappa_u_est_all = _extract_at_points_packed(
kappa_u[i_with_dp],
index_bbox,
index_gt_all,
y_lo,
y_hi,
x_lo,
x_hi,
w_ylo_xlo,
w_ylo_xhi,
w_yhi_xlo,
w_yhi_xhi,
)
kappa_u_est = kappa_u_est_all[j_valid_fg]
kappa_v_est_all = _extract_at_points_packed(
kappa_v[i_with_dp],
index_bbox,
index_gt_all,
y_lo,
y_hi,
x_lo,
x_hi,
w_ylo_xlo,
w_ylo_xhi,
w_yhi_xlo,
w_yhi_xhi,
)
kappa_v_est = kappa_v_est_all[j_valid_fg]
# Resample everything to the estimated data size, no need to resample
# S_est then:
s_est = s[i_with_dp]
with torch.no_grad():
s_gt = _resample_data(
s_gt.unsqueeze(1),
bbox_xywh_gt,
bbox_xywh_est,
self.heatmap_size,
self.heatmap_size,
mode="nearest",
padding_mode="zeros",
).squeeze(1)
# add point-based losses:
if self.confidence_model_cfg.uv_confidence.enabled:
if conf_type == DensePoseUVConfidenceType.IID_ISO:
uv_loss = (
self.uv_loss_with_confidences(u_est, v_est, sigma_2_est, u_gt, v_gt)
* self.w_points
)
losses["loss_densepose_UV"] = uv_loss
elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO:
uv_loss = (
self.uv_loss_with_confidences(
u_est, v_est, sigma_2_est, kappa_u_est, kappa_v_est, u_gt, v_gt
)
* self.w_points
)
losses["loss_densepose_UV"] = uv_loss
else:
raise ValueError(f"Unknown confidence model type: {conf_type}")
else:
u_loss = F.smooth_l1_loss(u_est, u_gt, reduction="sum") * self.w_points
losses["loss_densepose_U"] = u_loss
v_loss = F.smooth_l1_loss(v_est, v_gt, reduction="sum") * self.w_points
losses["loss_densepose_V"] = v_loss
index_uv_loss = F.cross_entropy(index_uv_est, index_uv_gt.long()) * self.w_part
losses["loss_densepose_I"] = index_uv_loss
if self.n_segm_chan == 2:
s_gt = s_gt > 0
s_loss = F.cross_entropy(s_est, s_gt.long()) * self.w_segm
losses["loss_densepose_S"] = s_loss
return losses
def build_densepose_losses(cfg):
losses = DensePoseLosses(cfg)
return losses
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import contextlib
import copy
import io
import itertools
import json
import logging
import os
from collections import OrderedDict
import torch
from fvcore.common.file_io import PathManager
from pycocotools.coco import COCO
from detectron2.data import MetadataCatalog
from detectron2.evaluation import DatasetEvaluator
from detectron2.structures import BoxMode
from detectron2.utils.comm import all_gather, is_main_process, synchronize
from detectron2.utils.logger import create_small_table
from .densepose_coco_evaluation import DensePoseCocoEval, DensePoseEvalMode
class DensePoseCOCOEvaluator(DatasetEvaluator):
def __init__(self, dataset_name, distributed, output_dir=None):
self._distributed = distributed
self._output_dir = output_dir
self._cpu_device = torch.device("cpu")
self._logger = logging.getLogger(__name__)
self._metadata = MetadataCatalog.get(dataset_name)
json_file = PathManager.get_local_path(self._metadata.json_file)
with contextlib.redirect_stdout(io.StringIO()):
self._coco_api = COCO(json_file)
def reset(self):
self._predictions = []
def process(self, inputs, outputs):
"""
Args:
inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
It is a list of dict. Each dict corresponds to an image and
contains keys like "height", "width", "file_name", "image_id".
outputs: the outputs of a COCO model. It is a list of dicts with key
"instances" that contains :class:`Instances`.
The :class:`Instances` object needs to have `densepose` field.
"""
for input, output in zip(inputs, outputs):
instances = output["instances"].to(self._cpu_device)
boxes = instances.pred_boxes.tensor.clone()
boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
instances.pred_densepose = instances.pred_densepose.to_result(boxes)
json_results = prediction_to_json(instances, input["image_id"])
self._predictions.extend(json_results)
def evaluate(self):
if self._distributed:
synchronize()
predictions = all_gather(self._predictions)
predictions = list(itertools.chain(*predictions))
if not is_main_process():
return
else:
predictions = self._predictions
return copy.deepcopy(self._eval_predictions(predictions))
def _eval_predictions(self, predictions):
"""
Evaluate predictions on densepose.
Return results with the metrics of the tasks.
"""
self._logger.info("Preparing results for COCO format ...")
if self._output_dir:
file_path = os.path.join(self._output_dir, "coco_densepose_results.json")
with open(file_path, "w") as f:
json.dump(predictions, f)
f.flush()
os.fsync(f.fileno())
self._logger.info("Evaluating predictions ...")
res = OrderedDict()
results_gps, results_gpsm = _evaluate_predictions_on_coco(self._coco_api, predictions)
res["densepose_gps"] = results_gps
res["densepose_gpsm"] = results_gpsm
return res
def prediction_to_json(instances, img_id):
"""
Args:
instances (Instances): the output of the model
img_id (str): the image id in COCO
Returns:
list[dict]: the results in densepose evaluation format
"""
scores = instances.scores.tolist()
results = []
for k in range(len(instances)):
densepose = instances.pred_densepose[k]
result = {
"image_id": img_id,
"category_id": 1, # densepose only has one class
"bbox": densepose[1],
"score": scores[k],
"densepose": densepose,
}
results.append(result)
return results
def _evaluate_predictions_on_coco(coco_gt, coco_results):
metrics = ["AP", "AP50", "AP75", "APm", "APl"]
logger = logging.getLogger(__name__)
if len(coco_results) == 0: # cocoapi does not handle empty results very well
logger.warn("No predictions from the model! Set scores to -1")
results_gps = {metric: -1 for metric in metrics}
results_gpsm = {metric: -1 for metric in metrics}
return results_gps, results_gpsm
coco_dt = coco_gt.loadRes(coco_results)
results_gps = _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics)
logger.info(
"Evaluation results for densepose, GPS metric: \n" + create_small_table(results_gps)
)
results_gpsm = _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics)
logger.info(
"Evaluation results for densepose, GPSm metric: \n" + create_small_table(results_gpsm)
)
return results_gps, results_gpsm
def _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics):
coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPS)
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
return results
def _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics):
coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPSM)
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
return results
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from detectron2.modeling.test_time_augmentation import GeneralizedRCNNWithTTA
class DensePoseGeneralizedRCNNWithTTA(GeneralizedRCNNWithTTA):
def __init__(self, cfg, model, transform_data, tta_mapper=None, batch_size=1):
"""
Args:
cfg (CfgNode):
model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on.
transform_data (DensePoseTransformData): contains symmetry label
transforms used for horizontal flip
tta_mapper (callable): takes a dataset dict and returns a list of
augmented versions of the dataset dict. Defaults to
`DatasetMapperTTA(cfg)`.
batch_size (int): batch the augmented images into this batch size for inference.
"""
self._transform_data = transform_data
super().__init__(cfg=cfg, model=model, tta_mapper=tta_mapper, batch_size=batch_size)
# the implementation follows closely the one from detectron2/modeling
def _inference_one_image(self, input):
"""
Args:
input (dict): one dataset dict
Returns:
dict: one output dict
"""
augmented_inputs, aug_vars = self._get_augmented_inputs(input)
# Detect boxes from all augmented versions
with self._turn_off_roi_heads(["mask_on", "keypoint_on", "densepose_on"]):
# temporarily disable roi heads
all_boxes, all_scores, all_classes = self._get_augmented_boxes(
augmented_inputs, aug_vars
)
merged_instances = self._merge_detections(
all_boxes, all_scores, all_classes, (aug_vars["height"], aug_vars["width"])
)
if self.cfg.MODEL.MASK_ON or self.cfg.MODEL.DENSEPOSE_ON:
# Use the detected boxes to obtain new fields
augmented_instances = self._rescale_detected_boxes(
augmented_inputs, merged_instances, aug_vars
)
# run forward on the detected boxes
outputs = self._batch_inference(
augmented_inputs, augmented_instances, do_postprocess=False
)
# Delete now useless variables to avoid being out of memory
del augmented_inputs, augmented_instances, merged_instances
# average the predictions
if self.cfg.MODEL.MASK_ON:
outputs[0].pred_masks = self._reduce_pred_masks(outputs, aug_vars)
if self.cfg.MODEL.DENSEPOSE_ON:
outputs[0].pred_densepose = self._reduce_pred_densepose(outputs, aug_vars)
# postprocess
output = self._detector_postprocess(outputs[0], aug_vars)
return {"instances": output}
else:
return {"instances": merged_instances}
def _reduce_pred_densepose(self, outputs, aug_vars):
for idx, output in enumerate(outputs):
if aug_vars["do_hflip"][idx]:
output.pred_densepose.hflip(self._transform_data)
# Less memory-intensive averaging
for attr in "SIUV":
setattr(
outputs[0].pred_densepose,
attr,
sum(getattr(o.pred_densepose, attr) for o in outputs) / len(outputs),
)
return outputs[0].pred_densepose
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import numpy as np
from typing import Dict
import fvcore.nn.weight_init as weight_init
import torch
import torch.nn as nn
from torch.nn import functional as F
from detectron2.layers import Conv2d, ShapeSpec, get_norm
from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads
from detectron2.modeling.poolers import ROIPooler
from detectron2.modeling.roi_heads import select_foreground_proposals
from .densepose_head import (
build_densepose_data_filter,
build_densepose_head,
build_densepose_losses,
build_densepose_predictor,
densepose_inference,
)
class Decoder(nn.Module):
"""
A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper
(https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from
all levels of the FPN into single output.
"""
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features):
super(Decoder, self).__init__()
# fmt: off
self.in_features = in_features
feature_strides = {k: v.stride for k, v in input_shape.items()}
feature_channels = {k: v.channels for k, v in input_shape.items()}
num_classes = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES
conv_dims = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS
self.common_stride = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE
norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM
# fmt: on
self.scale_heads = []
for in_feature in self.in_features:
head_ops = []
head_length = max(
1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))
)
for k in range(head_length):
conv = Conv2d(
feature_channels[in_feature] if k == 0 else conv_dims,
conv_dims,
kernel_size=3,
stride=1,
padding=1,
bias=not norm,
norm=get_norm(norm, conv_dims),
activation=F.relu,
)
weight_init.c2_msra_fill(conv)
head_ops.append(conv)
if feature_strides[in_feature] != self.common_stride:
head_ops.append(
nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
)
self.scale_heads.append(nn.Sequential(*head_ops))
self.add_module(in_feature, self.scale_heads[-1])
self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
weight_init.c2_msra_fill(self.predictor)
def forward(self, features):
for i, _ in enumerate(self.in_features):
if i == 0:
x = self.scale_heads[i](features[i])
else:
x = x + self.scale_heads[i](features[i])
x = self.predictor(x)
return x
@ROI_HEADS_REGISTRY.register()
class DensePoseROIHeads(StandardROIHeads):
"""
A Standard ROIHeads which contains an addition of DensePose head.
"""
def __init__(self, cfg, input_shape):
super().__init__(cfg, input_shape)
self._init_densepose_head(cfg, input_shape)
def _init_densepose_head(self, cfg, input_shape):
# fmt: off
self.densepose_on = cfg.MODEL.DENSEPOSE_ON
if not self.densepose_on:
return
self.densepose_data_filter = build_densepose_data_filter(cfg)
dp_pooler_resolution = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION
dp_pooler_sampling_ratio = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO
dp_pooler_type = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE
self.use_decoder = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON
# fmt: on
if self.use_decoder:
dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride,)
else:
dp_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features)
in_channels = [input_shape[f].channels for f in self.in_features][0]
if self.use_decoder:
self.decoder = Decoder(cfg, input_shape, self.in_features)
self.densepose_pooler = ROIPooler(
output_size=dp_pooler_resolution,
scales=dp_pooler_scales,
sampling_ratio=dp_pooler_sampling_ratio,
pooler_type=dp_pooler_type,
)
self.densepose_head = build_densepose_head(cfg, in_channels)
self.densepose_predictor = build_densepose_predictor(
cfg, self.densepose_head.n_out_channels
)
self.densepose_losses = build_densepose_losses(cfg)
def _forward_densepose(self, features, instances):
"""
Forward logic of the densepose prediction branch.
Args:
features (list[Tensor]): #level input features for densepose prediction
instances (list[Instances]): the per-image instances to train/predict densepose.
In training, they can be the proposals.
In inference, they can be the predicted boxes.
Returns:
In training, a dict of losses.
In inference, update `instances` with new fields "densepose" and return it.
"""
if not self.densepose_on:
return {} if self.training else instances
features = [features[f] for f in self.in_features]
if self.training:
proposals, _ = select_foreground_proposals(instances, self.num_classes)
proposals_dp = self.densepose_data_filter(proposals)
if len(proposals_dp) > 0:
# NOTE may deadlock in DDP if certain workers have empty proposals_dp
proposal_boxes = [x.proposal_boxes for x in proposals_dp]
if self.use_decoder:
features = [self.decoder(features)]
features_dp = self.densepose_pooler(features, proposal_boxes)
densepose_head_outputs = self.densepose_head(features_dp)
densepose_outputs, _, confidences, _ = self.densepose_predictor(
densepose_head_outputs
)
densepose_loss_dict = self.densepose_losses(
proposals_dp, densepose_outputs, confidences
)
return densepose_loss_dict
else:
pred_boxes = [x.pred_boxes for x in instances]
if self.use_decoder:
features = [self.decoder(features)]
features_dp = self.densepose_pooler(features, pred_boxes)
if len(features_dp) > 0:
densepose_head_outputs = self.densepose_head(features_dp)
densepose_outputs, _, confidences, _ = self.densepose_predictor(
densepose_head_outputs
)
else:
# If no detection occurred instances
# set densepose_outputs to empty tensors
empty_tensor = torch.zeros(size=(0, 0, 0, 0), device=features_dp.device)
densepose_outputs = tuple([empty_tensor] * 4)
confidences = tuple([empty_tensor] * 4)
densepose_inference(densepose_outputs, confidences, instances)
return instances
def forward(self, images, features, proposals, targets=None):
instances, losses = super().forward(images, features, proposals, targets)
del targets, images
if self.training:
losses.update(self._forward_densepose(features, instances))
return instances, losses
def forward_with_given_boxes(self, features, instances):
"""
Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
This is useful for downstream tasks where a box is known, but need to obtain
other attributes (outputs of other heads).
Test-time augmentation also uses this.
Args:
features: same as in `forward()`
instances (list[Instances]): instances to predict other outputs. Expect the keys
"pred_boxes" and "pred_classes" to exist.
Returns:
instances (list[Instances]):
the same `Instances` objects, with extra
fields such as `pred_masks` or `pred_keypoints`.
"""
instances = super().forward_with_given_boxes(features, instances)
instances = self._forward_densepose(features, instances)
return instances
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from typing import Any, Dict, Optional, Tuple
class EntrySelector(object):
"""
Base class for entry selectors
"""
@staticmethod
def from_string(spec: str) -> "EntrySelector":
if spec == "*":
return AllEntrySelector()
return FieldEntrySelector(spec)
class AllEntrySelector(EntrySelector):
"""
Selector that accepts all entries
"""
SPECIFIER = "*"
def __call__(self, entry):
return True
class FieldEntrySelector(EntrySelector):
"""
Selector that accepts only entries that match provided field
specifier(s). Only a limited set of specifiers is supported for now:
<specifiers>::=<specifier>[<comma><specifiers>]
<specifier>::=<field_name>[<type_delim><type>]<equal><value_or_range>
<field_name> is a valid identifier
<type> ::= "int" | "str"
<equal> ::= "="
<comma> ::= ","
<type_delim> ::= ":"
<value_or_range> ::= <value> | <range>
<range> ::= <value><range_delim><value>
<range_delim> ::= "-"
<value> is a string without spaces and special symbols
(e.g. <comma>, <equal>, <type_delim>, <range_delim>)
"""
_SPEC_DELIM = ","
_TYPE_DELIM = ":"
_RANGE_DELIM = "-"
_EQUAL = "="
_ERROR_PREFIX = "Invalid field selector specifier"
class _FieldEntryValuePredicate(object):
"""
Predicate that checks strict equality for the specified entry field
"""
def __init__(self, name: str, typespec: str, value: str):
import builtins
self.name = name
self.type = getattr(builtins, typespec) if typespec is not None else str
self.value = value
def __call__(self, entry):
return entry[self.name] == self.type(self.value)
class _FieldEntryRangePredicate(object):
"""
Predicate that checks whether an entry field falls into the specified range
"""
def __init__(self, name: str, typespec: str, vmin: str, vmax: str):
import builtins
self.name = name
self.type = getattr(builtins, typespec) if typespec is not None else str
self.vmin = vmin
self.vmax = vmax
def __call__(self, entry):
return (entry[self.name] >= self.type(self.vmin)) and (
entry[self.name] <= self.type(self.vmax)
)
def __init__(self, spec: str):
self._predicates = self._parse_specifier_into_predicates(spec)
def __call__(self, entry: Dict[str, Any]):
for predicate in self._predicates:
if not predicate(entry):
return False
return True
def _parse_specifier_into_predicates(self, spec: str):
predicates = []
specs = spec.split(self._SPEC_DELIM)
for subspec in specs:
eq_idx = subspec.find(self._EQUAL)
if eq_idx > 0:
field_name_with_type = subspec[:eq_idx]
field_name, field_type = self._parse_field_name_type(field_name_with_type)
field_value_or_range = subspec[eq_idx + 1 :]
if self._is_range_spec(field_value_or_range):
vmin, vmax = self._get_range_spec(field_value_or_range)
predicate = FieldEntrySelector._FieldEntryRangePredicate(
field_name, field_type, vmin, vmax
)
else:
predicate = FieldEntrySelector._FieldEntryValuePredicate(
field_name, field_type, field_value_or_range
)
predicates.append(predicate)
elif eq_idx == 0:
self._parse_error(f'"{subspec}", field name is empty!')
else:
self._parse_error(f'"{subspec}", should have format ' "<field>=<value_or_range>!")
return predicates
def _parse_field_name_type(self, field_name_with_type: str) -> Tuple[str, Optional[str]]:
type_delim_idx = field_name_with_type.find(self._TYPE_DELIM)
if type_delim_idx > 0:
field_name = field_name_with_type[:type_delim_idx]
field_type = field_name_with_type[type_delim_idx + 1 :]
elif type_delim_idx == 0:
self._parse_error(f'"{field_name_with_type}", field name is empty!')
else:
field_name = field_name_with_type
field_type = None
return field_name, field_type
def _is_range_spec(self, field_value_or_range):
delim_idx = field_value_or_range.find(self._RANGE_DELIM)
return delim_idx > 0
def _get_range_spec(self, field_value_or_range):
if self._is_range_spec(field_value_or_range):
delim_idx = field_value_or_range.find(self._RANGE_DELIM)
vmin = field_value_or_range[:delim_idx]
vmax = field_value_or_range[delim_idx + 1 :]
return vmin, vmax
else:
self._parse_error('"field_value_or_range", range of values expected!')
def _parse_error(self, msg):
raise ValueError(f"{self._ERROR_PREFIX}: {msg}")
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import logging
def verbosity_to_level(verbosity):
if verbosity is not None:
if verbosity == 0:
return logging.WARNING
elif verbosity == 1:
return logging.INFO
elif verbosity >= 2:
return logging.DEBUG
return logging.WARNING
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from fvcore.common.file_io import PathManager
from detectron2.data import MetadataCatalog
from densepose import DensePoseTransformData
def load_for_dataset(dataset_name):
path = MetadataCatalog.get(dataset_name).densepose_transform_src
densepose_transform_data_fpath = PathManager.get_local_path(path)
return DensePoseTransformData.load(densepose_transform_data_fpath)
def load_from_cfg(cfg):
return load_for_dataset(cfg.DATASETS.TEST[0])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment