Commit 3144257c authored by mashun1's avatar mashun1
Browse files

catvton

parents
Pipeline #1744 failed with stages
in 0 seconds
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
import csv
import logging
import numpy as np
from typing import Any, Callable, Dict, List, Optional, Union
import av
import torch
from torch.utils.data.dataset import Dataset
from detectron2.utils.file_io import PathManager
from ..utils import maybe_prepend_base_path
from .frame_selector import FrameSelector, FrameTsList
FrameList = List[av.frame.Frame] # pyre-ignore[16]
FrameTransform = Callable[[torch.Tensor], torch.Tensor]
def list_keyframes(video_fpath: str, video_stream_idx: int = 0) -> FrameTsList:
"""
Traverses all keyframes of a video file. Returns a list of keyframe
timestamps. Timestamps are counts in timebase units.
Args:
video_fpath (str): Video file path
video_stream_idx (int): Video stream index (default: 0)
Returns:
List[int]: list of keyframe timestaps (timestamp is a count in timebase
units)
"""
try:
with PathManager.open(video_fpath, "rb") as io:
# pyre-fixme[16]: Module `av` has no attribute `open`.
container = av.open(io, mode="r")
stream = container.streams.video[video_stream_idx]
keyframes = []
pts = -1
# Note: even though we request forward seeks for keyframes, sometimes
# a keyframe in backwards direction is returned. We introduce tolerance
# as a max count of ignored backward seeks
tolerance_backward_seeks = 2
while True:
try:
container.seek(pts + 1, backward=False, any_frame=False, stream=stream)
except av.AVError as e:
# the exception occurs when the video length is exceeded,
# we then return whatever data we've already collected
logger = logging.getLogger(__name__)
logger.debug(
f"List keyframes: Error seeking video file {video_fpath}, "
f"video stream {video_stream_idx}, pts {pts + 1}, AV error: {e}"
)
return keyframes
except OSError as e:
logger = logging.getLogger(__name__)
logger.warning(
f"List keyframes: Error seeking video file {video_fpath}, "
f"video stream {video_stream_idx}, pts {pts + 1}, OS error: {e}"
)
return []
packet = next(container.demux(video=video_stream_idx))
if packet.pts is not None and packet.pts <= pts:
logger = logging.getLogger(__name__)
logger.warning(
f"Video file {video_fpath}, stream {video_stream_idx}: "
f"bad seek for packet {pts + 1} (got packet {packet.pts}), "
f"tolerance {tolerance_backward_seeks}."
)
tolerance_backward_seeks -= 1
if tolerance_backward_seeks == 0:
return []
pts += 1
continue
tolerance_backward_seeks = 2
pts = packet.pts
if pts is None:
return keyframes
if packet.is_keyframe:
keyframes.append(pts)
return keyframes
except OSError as e:
logger = logging.getLogger(__name__)
logger.warning(
f"List keyframes: Error opening video file container {video_fpath}, " f"OS error: {e}"
)
except RuntimeError as e:
logger = logging.getLogger(__name__)
logger.warning(
f"List keyframes: Error opening video file container {video_fpath}, "
f"Runtime error: {e}"
)
return []
def read_keyframes(
video_fpath: str, keyframes: FrameTsList, video_stream_idx: int = 0
) -> FrameList: # pyre-ignore[11]
"""
Reads keyframe data from a video file.
Args:
video_fpath (str): Video file path
keyframes (List[int]): List of keyframe timestamps (as counts in
timebase units to be used in container seek operations)
video_stream_idx (int): Video stream index (default: 0)
Returns:
List[Frame]: list of frames that correspond to the specified timestamps
"""
try:
with PathManager.open(video_fpath, "rb") as io:
# pyre-fixme[16]: Module `av` has no attribute `open`.
container = av.open(io)
stream = container.streams.video[video_stream_idx]
frames = []
for pts in keyframes:
try:
container.seek(pts, any_frame=False, stream=stream)
frame = next(container.decode(video=0))
frames.append(frame)
except av.AVError as e:
logger = logging.getLogger(__name__)
logger.warning(
f"Read keyframes: Error seeking video file {video_fpath}, "
f"video stream {video_stream_idx}, pts {pts}, AV error: {e}"
)
container.close()
return frames
except OSError as e:
logger = logging.getLogger(__name__)
logger.warning(
f"Read keyframes: Error seeking video file {video_fpath}, "
f"video stream {video_stream_idx}, pts {pts}, OS error: {e}"
)
container.close()
return frames
except StopIteration:
logger = logging.getLogger(__name__)
logger.warning(
f"Read keyframes: Error decoding frame from {video_fpath}, "
f"video stream {video_stream_idx}, pts {pts}"
)
container.close()
return frames
container.close()
return frames
except OSError as e:
logger = logging.getLogger(__name__)
logger.warning(
f"Read keyframes: Error opening video file container {video_fpath}, OS error: {e}"
)
except RuntimeError as e:
logger = logging.getLogger(__name__)
logger.warning(
f"Read keyframes: Error opening video file container {video_fpath}, Runtime error: {e}"
)
return []
def video_list_from_file(video_list_fpath: str, base_path: Optional[str] = None):
"""
Create a list of paths to video files from a text file.
Args:
video_list_fpath (str): path to a plain text file with the list of videos
base_path (str): base path for entries from the video list (default: None)
"""
video_list = []
with PathManager.open(video_list_fpath, "r") as io:
for line in io:
video_list.append(maybe_prepend_base_path(base_path, str(line.strip())))
return video_list
def read_keyframe_helper_data(fpath: str):
"""
Read keyframe data from a file in CSV format: the header should contain
"video_id" and "keyframes" fields. Value specifications are:
video_id: int
keyframes: list(int)
Example of contents:
video_id,keyframes
2,"[1,11,21,31,41,51,61,71,81]"
Args:
fpath (str): File containing keyframe data
Return:
video_id_to_keyframes (dict: int -> list(int)): for a given video ID it
contains a list of keyframes for that video
"""
video_id_to_keyframes = {}
try:
with PathManager.open(fpath, "r") as io:
csv_reader = csv.reader(io)
header = next(csv_reader)
video_id_idx = header.index("video_id")
keyframes_idx = header.index("keyframes")
for row in csv_reader:
video_id = int(row[video_id_idx])
assert (
video_id not in video_id_to_keyframes
), f"Duplicate keyframes entry for video {fpath}"
video_id_to_keyframes[video_id] = (
[int(v) for v in row[keyframes_idx][1:-1].split(",")]
if len(row[keyframes_idx]) > 2
else []
)
except Exception as e:
logger = logging.getLogger(__name__)
logger.warning(f"Error reading keyframe helper data from {fpath}: {e}")
return video_id_to_keyframes
class VideoKeyframeDataset(Dataset):
"""
Dataset that provides keyframes for a set of videos.
"""
_EMPTY_FRAMES = torch.empty((0, 3, 1, 1))
def __init__(
self,
video_list: List[str],
category_list: Union[str, List[str], None] = None,
frame_selector: Optional[FrameSelector] = None,
transform: Optional[FrameTransform] = None,
keyframe_helper_fpath: Optional[str] = None,
):
"""
Dataset constructor
Args:
video_list (List[str]): list of paths to video files
category_list (Union[str, List[str], None]): list of animal categories for each
video file. If it is a string, or None, this applies to all videos
frame_selector (Callable: KeyFrameList -> KeyFrameList):
selects keyframes to process, keyframes are given by
packet timestamps in timebase counts. If None, all keyframes
are selected (default: None)
transform (Callable: torch.Tensor -> torch.Tensor):
transforms a batch of RGB images (tensors of size [B, 3, H, W]),
returns a tensor of the same size. If None, no transform is
applied (default: None)
"""
if type(category_list) is list:
self.category_list = category_list
else:
self.category_list = [category_list] * len(video_list)
assert len(video_list) == len(
self.category_list
), "length of video and category lists must be equal"
self.video_list = video_list
self.frame_selector = frame_selector
self.transform = transform
self.keyframe_helper_data = (
read_keyframe_helper_data(keyframe_helper_fpath)
if keyframe_helper_fpath is not None
else None
)
def __getitem__(self, idx: int) -> Dict[str, Any]:
"""
Gets selected keyframes from a given video
Args:
idx (int): video index in the video list file
Returns:
A dictionary containing two keys:
images (torch.Tensor): tensor of size [N, H, W, 3] or of size
defined by the transform that contains keyframes data
categories (List[str]): categories of the frames
"""
categories = [self.category_list[idx]]
fpath = self.video_list[idx]
keyframes = (
list_keyframes(fpath)
if self.keyframe_helper_data is None or idx not in self.keyframe_helper_data
else self.keyframe_helper_data[idx]
)
transform = self.transform
frame_selector = self.frame_selector
if not keyframes:
return {"images": self._EMPTY_FRAMES, "categories": []}
if frame_selector is not None:
keyframes = frame_selector(keyframes)
frames = read_keyframes(fpath, keyframes)
if not frames:
return {"images": self._EMPTY_FRAMES, "categories": []}
frames = np.stack([frame.to_rgb().to_ndarray() for frame in frames])
frames = torch.as_tensor(frames, device=torch.device("cpu"))
frames = frames[..., [2, 1, 0]] # RGB -> BGR
frames = frames.permute(0, 3, 1, 2).float() # NHWC -> NCHW
if transform is not None:
frames = transform(frames)
return {"images": frames, "categories": categories}
def __len__(self):
return len(self.video_list)
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from .trainer import Trainer
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# pyre-unsafe
import logging
import os
from collections import OrderedDict
from typing import List, Optional, Union
import torch
from torch import nn
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import CfgNode
from detectron2.engine import DefaultTrainer
from detectron2.evaluation import (
DatasetEvaluator,
DatasetEvaluators,
inference_on_dataset,
print_csv_format,
)
from detectron2.solver.build import get_default_optimizer_params, maybe_add_gradient_clipping
from detectron2.utils import comm
from detectron2.utils.events import EventWriter, get_event_storage
from densepose import DensePoseDatasetMapperTTA, DensePoseGeneralizedRCNNWithTTA, load_from_cfg
from densepose.data import (
DatasetMapper,
build_combined_loader,
build_detection_test_loader,
build_detection_train_loader,
build_inference_based_loaders,
has_inference_based_loaders,
)
from densepose.evaluation.d2_evaluator_adapter import Detectron2COCOEvaluatorAdapter
from densepose.evaluation.evaluator import DensePoseCOCOEvaluator, build_densepose_evaluator_storage
from densepose.modeling.cse import Embedder
class SampleCountingLoader:
def __init__(self, loader):
self.loader = loader
def __iter__(self):
it = iter(self.loader)
storage = get_event_storage()
while True:
try:
batch = next(it)
num_inst_per_dataset = {}
for data in batch:
dataset_name = data["dataset"]
if dataset_name not in num_inst_per_dataset:
num_inst_per_dataset[dataset_name] = 0
num_inst = len(data["instances"])
num_inst_per_dataset[dataset_name] += num_inst
for dataset_name in num_inst_per_dataset:
storage.put_scalar(f"batch/{dataset_name}", num_inst_per_dataset[dataset_name])
yield batch
except StopIteration:
break
class SampleCountMetricPrinter(EventWriter):
def __init__(self):
self.logger = logging.getLogger(__name__)
def write(self):
storage = get_event_storage()
batch_stats_strs = []
for key, buf in storage.histories().items():
if key.startswith("batch/"):
batch_stats_strs.append(f"{key} {buf.avg(20)}")
self.logger.info(", ".join(batch_stats_strs))
class Trainer(DefaultTrainer):
@classmethod
def extract_embedder_from_model(cls, model: nn.Module) -> Optional[Embedder]:
if isinstance(model, nn.parallel.DistributedDataParallel):
model = model.module
if hasattr(model, "roi_heads") and hasattr(model.roi_heads, "embedder"):
return model.roi_heads.embedder
return None
# TODO: the only reason to copy the base class code here is to pass the embedder from
# the model to the evaluator; that should be refactored to avoid unnecessary copy-pasting
@classmethod
def test(
cls,
cfg: CfgNode,
model: nn.Module,
evaluators: Optional[Union[DatasetEvaluator, List[DatasetEvaluator]]] = None,
):
"""
Args:
cfg (CfgNode):
model (nn.Module):
evaluators (DatasetEvaluator, list[DatasetEvaluator] or None): if None, will call
:meth:`build_evaluator`. Otherwise, must have the same length as
``cfg.DATASETS.TEST``.
Returns:
dict: a dict of result metrics
"""
logger = logging.getLogger(__name__)
if isinstance(evaluators, DatasetEvaluator):
evaluators = [evaluators]
if evaluators is not None:
assert len(cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format(
len(cfg.DATASETS.TEST), len(evaluators)
)
results = OrderedDict()
for idx, dataset_name in enumerate(cfg.DATASETS.TEST):
data_loader = cls.build_test_loader(cfg, dataset_name)
# When evaluators are passed in as arguments,
# implicitly assume that evaluators can be created before data_loader.
if evaluators is not None:
evaluator = evaluators[idx]
else:
try:
embedder = cls.extract_embedder_from_model(model)
evaluator = cls.build_evaluator(cfg, dataset_name, embedder=embedder)
except NotImplementedError:
logger.warn(
"No evaluator found. Use `DefaultTrainer.test(evaluators=)`, "
"or implement its `build_evaluator` method."
)
results[dataset_name] = {}
continue
if cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE or comm.is_main_process():
results_i = inference_on_dataset(model, data_loader, evaluator)
else:
results_i = {}
results[dataset_name] = results_i
if comm.is_main_process():
assert isinstance(
results_i, dict
), "Evaluator must return a dict on the main process. Got {} instead.".format(
results_i
)
logger.info("Evaluation results for {} in csv format:".format(dataset_name))
print_csv_format(results_i)
if len(results) == 1:
results = list(results.values())[0]
return results
@classmethod
def build_evaluator(
cls,
cfg: CfgNode,
dataset_name: str,
output_folder: Optional[str] = None,
embedder: Optional[Embedder] = None,
) -> DatasetEvaluators:
if output_folder is None:
output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
evaluators = []
distributed = cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE
# Note: we currently use COCO evaluator for both COCO and LVIS datasets
# to have compatible metrics. LVIS bbox evaluator could also be used
# with an adapter to properly handle filtered / mapped categories
# evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
# if evaluator_type == "coco":
# evaluators.append(COCOEvaluator(dataset_name, output_dir=output_folder))
# elif evaluator_type == "lvis":
# evaluators.append(LVISEvaluator(dataset_name, output_dir=output_folder))
evaluators.append(
Detectron2COCOEvaluatorAdapter(
dataset_name, output_dir=output_folder, distributed=distributed
)
)
if cfg.MODEL.DENSEPOSE_ON:
storage = build_densepose_evaluator_storage(cfg, output_folder)
evaluators.append(
DensePoseCOCOEvaluator(
dataset_name,
distributed,
output_folder,
evaluator_type=cfg.DENSEPOSE_EVALUATION.TYPE,
min_iou_threshold=cfg.DENSEPOSE_EVALUATION.MIN_IOU_THRESHOLD,
storage=storage,
embedder=embedder,
should_evaluate_mesh_alignment=cfg.DENSEPOSE_EVALUATION.EVALUATE_MESH_ALIGNMENT,
mesh_alignment_mesh_names=cfg.DENSEPOSE_EVALUATION.MESH_ALIGNMENT_MESH_NAMES,
)
)
return DatasetEvaluators(evaluators)
@classmethod
def build_optimizer(cls, cfg: CfgNode, model: nn.Module):
params = get_default_optimizer_params(
model,
base_lr=cfg.SOLVER.BASE_LR,
weight_decay_norm=cfg.SOLVER.WEIGHT_DECAY_NORM,
bias_lr_factor=cfg.SOLVER.BIAS_LR_FACTOR,
weight_decay_bias=cfg.SOLVER.WEIGHT_DECAY_BIAS,
overrides={
"features": {
"lr": cfg.SOLVER.BASE_LR * cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.FEATURES_LR_FACTOR,
},
"embeddings": {
"lr": cfg.SOLVER.BASE_LR * cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_LR_FACTOR,
},
},
)
optimizer = torch.optim.SGD(
params,
cfg.SOLVER.BASE_LR,
momentum=cfg.SOLVER.MOMENTUM,
nesterov=cfg.SOLVER.NESTEROV,
weight_decay=cfg.SOLVER.WEIGHT_DECAY,
)
# pyre-fixme[6]: For 2nd param expected `Type[Optimizer]` but got `SGD`.
return maybe_add_gradient_clipping(cfg, optimizer)
@classmethod
def build_test_loader(cls, cfg: CfgNode, dataset_name):
return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False))
@classmethod
def build_train_loader(cls, cfg: CfgNode):
data_loader = build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True))
if not has_inference_based_loaders(cfg):
return data_loader
model = cls.build_model(cfg)
model.to(cfg.BOOTSTRAP_MODEL.DEVICE)
DetectionCheckpointer(model).resume_or_load(cfg.BOOTSTRAP_MODEL.WEIGHTS, resume=False)
inference_based_loaders, ratios = build_inference_based_loaders(cfg, model)
loaders = [data_loader] + inference_based_loaders
ratios = [1.0] + ratios
combined_data_loader = build_combined_loader(cfg, loaders, ratios)
sample_counting_loader = SampleCountingLoader(combined_data_loader)
return sample_counting_loader
def build_writers(self):
writers = super().build_writers()
writers.append(SampleCountMetricPrinter())
return writers
@classmethod
def test_with_TTA(cls, cfg: CfgNode, model):
logger = logging.getLogger("detectron2.trainer")
# In the end of training, run an evaluation with TTA
# Only support some R-CNN models.
logger.info("Running inference with test-time augmentation ...")
transform_data = load_from_cfg(cfg)
model = DensePoseGeneralizedRCNNWithTTA(
cfg, model, transform_data, DensePoseDatasetMapperTTA(cfg)
)
evaluators = [
cls.build_evaluator(
cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
)
for name in cfg.DATASETS.TEST
]
res = cls.test(cfg, model, evaluators) # pyre-ignore[6]
res = OrderedDict({k + "_TTA": v for k, v in res.items()})
return res
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from .evaluator import DensePoseCOCOEvaluator
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from detectron2.data.catalog import Metadata
from detectron2.evaluation import COCOEvaluator
from densepose.data.datasets.coco import (
get_contiguous_id_to_category_id_map,
maybe_filter_categories_cocoapi,
)
def _maybe_add_iscrowd_annotations(cocoapi) -> None:
for ann in cocoapi.dataset["annotations"]:
if "iscrowd" not in ann:
ann["iscrowd"] = 0
class Detectron2COCOEvaluatorAdapter(COCOEvaluator):
def __init__(
self,
dataset_name,
output_dir=None,
distributed=True,
):
super().__init__(dataset_name, output_dir=output_dir, distributed=distributed)
maybe_filter_categories_cocoapi(dataset_name, self._coco_api)
_maybe_add_iscrowd_annotations(self._coco_api)
# substitute category metadata to account for categories
# that are mapped to the same contiguous id
if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
self._maybe_substitute_metadata()
def _maybe_substitute_metadata(self):
cont_id_2_cat_id = get_contiguous_id_to_category_id_map(self._metadata)
cat_id_2_cont_id = self._metadata.thing_dataset_id_to_contiguous_id
if len(cont_id_2_cat_id) == len(cat_id_2_cont_id):
return
cat_id_2_cont_id_injective = {}
for cat_id, cont_id in cat_id_2_cont_id.items():
if (cont_id in cont_id_2_cat_id) and (cont_id_2_cat_id[cont_id] == cat_id):
cat_id_2_cont_id_injective[cat_id] = cont_id
metadata_new = Metadata(name=self._metadata.name)
for key, value in self._metadata.__dict__.items():
if key == "thing_dataset_id_to_contiguous_id":
setattr(metadata_new, key, cat_id_2_cont_id_injective)
else:
setattr(metadata_new, key, value)
self._metadata = metadata_new
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
# This is a modified version of cocoeval.py where we also have the densepose evaluation.
# pyre-unsafe
__author__ = "tsungyi"
import copy
import datetime
import logging
import numpy as np
import pickle
import time
from collections import defaultdict
from enum import Enum
from typing import Any, Dict, Tuple
import scipy.spatial.distance as ssd
import torch
import torch.nn.functional as F
from pycocotools import mask as maskUtils
from scipy.io import loadmat
from scipy.ndimage import zoom as spzoom
from detectron2.utils.file_io import PathManager
from densepose.converters.chart_output_to_chart_result import resample_uv_tensors_to_bbox
from densepose.converters.segm_to_mask import (
resample_coarse_segm_tensor_to_bbox,
resample_fine_and_coarse_segm_tensors_to_bbox,
)
from densepose.modeling.cse.utils import squared_euclidean_distance_matrix
from densepose.structures import DensePoseDataRelative
from densepose.structures.mesh import create_mesh
logger = logging.getLogger(__name__)
class DensePoseEvalMode(str, Enum):
# use both masks and geodesic distances (GPS * IOU) to compute scores
GPSM = "gpsm"
# use only geodesic distances (GPS) to compute scores
GPS = "gps"
# use only masks (IOU) to compute scores
IOU = "iou"
class DensePoseDataMode(str, Enum):
# use estimated IUV data (default mode)
IUV_DT = "iuvdt"
# use ground truth IUV data
IUV_GT = "iuvgt"
# use ground truth labels I and set UV to 0
I_GT_UV_0 = "igtuv0"
# use ground truth labels I and estimated UV coordinates
I_GT_UV_DT = "igtuvdt"
# use estimated labels I and set UV to 0
I_DT_UV_0 = "idtuv0"
class DensePoseCocoEval:
# Interface for evaluating detection on the Microsoft COCO dataset.
#
# The usage for CocoEval is as follows:
# cocoGt=..., cocoDt=... # load dataset and results
# E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object
# E.params.recThrs = ...; # set parameters as desired
# E.evaluate(); # run per image evaluation
# E.accumulate(); # accumulate per image results
# E.summarize(); # display summary metrics of results
# For example usage see evalDemo.m and http://mscoco.org/.
#
# The evaluation parameters are as follows (defaults in brackets):
# imgIds - [all] N img ids to use for evaluation
# catIds - [all] K cat ids to use for evaluation
# iouThrs - [.5:.05:.95] T=10 IoU thresholds for evaluation
# recThrs - [0:.01:1] R=101 recall thresholds for evaluation
# areaRng - [...] A=4 object area ranges for evaluation
# maxDets - [1 10 100] M=3 thresholds on max detections per image
# iouType - ['segm'] set iouType to 'segm', 'bbox', 'keypoints' or 'densepose'
# iouType replaced the now DEPRECATED useSegm parameter.
# useCats - [1] if true use category labels for evaluation
# Note: if useCats=0 category labels are ignored as in proposal scoring.
# Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified.
#
# evaluate(): evaluates detections on every image and every category and
# concats the results into the "evalImgs" with fields:
# dtIds - [1xD] id for each of the D detections (dt)
# gtIds - [1xG] id for each of the G ground truths (gt)
# dtMatches - [TxD] matching gt id at each IoU or 0
# gtMatches - [TxG] matching dt id at each IoU or 0
# dtScores - [1xD] confidence of each dt
# gtIgnore - [1xG] ignore flag for each gt
# dtIgnore - [TxD] ignore flag for each dt at each IoU
#
# accumulate(): accumulates the per-image, per-category evaluation
# results in "evalImgs" into the dictionary "eval" with fields:
# params - parameters used for evaluation
# date - date evaluation was performed
# counts - [T,R,K,A,M] parameter dimensions (see above)
# precision - [TxRxKxAxM] precision for every evaluation setting
# recall - [TxKxAxM] max recall for every evaluation setting
# Note: precision and recall==-1 for settings with no gt objects.
#
# See also coco, mask, pycocoDemo, pycocoEvalDemo
#
# Microsoft COCO Toolbox. version 2.0
# Data, paper, and tutorials available at: http://mscoco.org/
# Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
# Licensed under the Simplified BSD License [see coco/license.txt]
def __init__(
self,
cocoGt=None,
cocoDt=None,
iouType: str = "densepose",
multi_storage=None,
embedder=None,
dpEvalMode: DensePoseEvalMode = DensePoseEvalMode.GPS,
dpDataMode: DensePoseDataMode = DensePoseDataMode.IUV_DT,
):
"""
Initialize CocoEval using coco APIs for gt and dt
:param cocoGt: coco object with ground truth annotations
:param cocoDt: coco object with detection results
:return: None
"""
self.cocoGt = cocoGt # ground truth COCO API
self.cocoDt = cocoDt # detections COCO API
self.multi_storage = multi_storage
self.embedder = embedder
self._dpEvalMode = dpEvalMode
self._dpDataMode = dpDataMode
self.evalImgs = defaultdict(list) # per-image per-category eval results [KxAxI]
self.eval = {} # accumulated evaluation results
self._gts = defaultdict(list) # gt for evaluation
self._dts = defaultdict(list) # dt for evaluation
self.params = Params(iouType=iouType) # parameters
self._paramsEval = {} # parameters for evaluation
self.stats = [] # result summarization
self.ious = {} # ious between all gts and dts
if cocoGt is not None:
self.params.imgIds = sorted(cocoGt.getImgIds())
self.params.catIds = sorted(cocoGt.getCatIds())
self.ignoreThrBB = 0.7
self.ignoreThrUV = 0.9
def _loadGEval(self):
smpl_subdiv_fpath = PathManager.get_local_path(
"https://dl.fbaipublicfiles.com/densepose/data/SMPL_subdiv.mat"
)
pdist_transform_fpath = PathManager.get_local_path(
"https://dl.fbaipublicfiles.com/densepose/data/SMPL_SUBDIV_TRANSFORM.mat"
)
pdist_matrix_fpath = PathManager.get_local_path(
"https://dl.fbaipublicfiles.com/densepose/data/Pdist_matrix.pkl", timeout_sec=120
)
SMPL_subdiv = loadmat(smpl_subdiv_fpath)
self.PDIST_transform = loadmat(pdist_transform_fpath)
self.PDIST_transform = self.PDIST_transform["index"].squeeze()
UV = np.array([SMPL_subdiv["U_subdiv"], SMPL_subdiv["V_subdiv"]]).squeeze()
ClosestVertInds = np.arange(UV.shape[1]) + 1
self.Part_UVs = []
self.Part_ClosestVertInds = []
for i in np.arange(24):
self.Part_UVs.append(UV[:, SMPL_subdiv["Part_ID_subdiv"].squeeze() == (i + 1)])
self.Part_ClosestVertInds.append(
ClosestVertInds[SMPL_subdiv["Part_ID_subdiv"].squeeze() == (i + 1)]
)
with open(pdist_matrix_fpath, "rb") as hFile:
arrays = pickle.load(hFile, encoding="latin1")
self.Pdist_matrix = arrays["Pdist_matrix"]
self.Part_ids = np.array(SMPL_subdiv["Part_ID_subdiv"].squeeze())
# Mean geodesic distances for parts.
self.Mean_Distances = np.array([0, 0.351, 0.107, 0.126, 0.237, 0.173, 0.142, 0.128, 0.150])
# Coarse Part labels.
self.CoarseParts = np.array(
[0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8]
)
def _prepare(self):
"""
Prepare ._gts and ._dts for evaluation based on params
:return: None
"""
def _toMask(anns, coco):
# modify ann['segmentation'] by reference
for ann in anns:
# safeguard for invalid segmentation annotation;
# annotations containing empty lists exist in the posetrack
# dataset. This is not a correct segmentation annotation
# in terms of COCO format; we need to deal with it somehow
segm = ann["segmentation"]
if type(segm) is list and len(segm) == 0:
ann["segmentation"] = None
continue
rle = coco.annToRLE(ann)
ann["segmentation"] = rle
def _getIgnoreRegion(iid, coco):
img = coco.imgs[iid]
if "ignore_regions_x" not in img.keys():
return None
if len(img["ignore_regions_x"]) == 0:
return None
rgns_merged = [
[v for xy in zip(region_x, region_y) for v in xy]
for region_x, region_y in zip(img["ignore_regions_x"], img["ignore_regions_y"])
]
rles = maskUtils.frPyObjects(rgns_merged, img["height"], img["width"])
rle = maskUtils.merge(rles)
return maskUtils.decode(rle)
def _checkIgnore(dt, iregion):
if iregion is None:
return True
bb = np.array(dt["bbox"]).astype(int)
x1, y1, x2, y2 = bb[0], bb[1], bb[0] + bb[2], bb[1] + bb[3]
x2 = min([x2, iregion.shape[1]])
y2 = min([y2, iregion.shape[0]])
if bb[2] * bb[3] == 0:
return False
crop_iregion = iregion[y1:y2, x1:x2]
if crop_iregion.sum() == 0:
return True
if "densepose" not in dt.keys(): # filtering boxes
return crop_iregion.sum() / bb[2] / bb[3] < self.ignoreThrBB
# filtering UVs
ignoremask = np.require(crop_iregion, requirements=["F"])
mask = self._extract_mask(dt)
uvmask = np.require(np.asarray(mask > 0), dtype=np.uint8, requirements=["F"])
uvmask_ = maskUtils.encode(uvmask)
ignoremask_ = maskUtils.encode(ignoremask)
uviou = maskUtils.iou([uvmask_], [ignoremask_], [1])[0]
return uviou < self.ignoreThrUV
p = self.params
if p.useCats:
gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
else:
gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))
imns = self.cocoGt.loadImgs(p.imgIds)
self.size_mapping = {}
for im in imns:
self.size_mapping[im["id"]] = [im["height"], im["width"]]
# if iouType == 'uv', add point gt annotations
if p.iouType == "densepose":
self._loadGEval()
# convert ground truth to mask if iouType == 'segm'
if p.iouType == "segm":
_toMask(gts, self.cocoGt)
_toMask(dts, self.cocoDt)
# set ignore flag
for gt in gts:
gt["ignore"] = gt["ignore"] if "ignore" in gt else 0
gt["ignore"] = "iscrowd" in gt and gt["iscrowd"]
if p.iouType == "keypoints":
gt["ignore"] = (gt["num_keypoints"] == 0) or gt["ignore"]
if p.iouType == "densepose":
gt["ignore"] = ("dp_x" in gt) == 0
if p.iouType == "segm":
gt["ignore"] = gt["segmentation"] is None
self._gts = defaultdict(list) # gt for evaluation
self._dts = defaultdict(list) # dt for evaluation
self._igrgns = defaultdict(list)
for gt in gts:
iid = gt["image_id"]
if iid not in self._igrgns.keys():
self._igrgns[iid] = _getIgnoreRegion(iid, self.cocoGt)
if _checkIgnore(gt, self._igrgns[iid]):
self._gts[iid, gt["category_id"]].append(gt)
for dt in dts:
iid = dt["image_id"]
if (iid not in self._igrgns) or _checkIgnore(dt, self._igrgns[iid]):
self._dts[iid, dt["category_id"]].append(dt)
self.evalImgs = defaultdict(list) # per-image per-category evaluation results
self.eval = {} # accumulated evaluation results
def evaluate(self):
"""
Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
:return: None
"""
tic = time.time()
logger.info("Running per image DensePose evaluation... {}".format(self.params.iouType))
p = self.params
# add backward compatibility if useSegm is specified in params
if p.useSegm is not None:
p.iouType = "segm" if p.useSegm == 1 else "bbox"
logger.info("useSegm (deprecated) is not None. Running DensePose evaluation")
p.imgIds = list(np.unique(p.imgIds))
if p.useCats:
p.catIds = list(np.unique(p.catIds))
p.maxDets = sorted(p.maxDets)
self.params = p
self._prepare()
# loop through images, area range, max detection number
catIds = p.catIds if p.useCats else [-1]
if p.iouType in ["segm", "bbox"]:
computeIoU = self.computeIoU
elif p.iouType == "keypoints":
computeIoU = self.computeOks
elif p.iouType == "densepose":
computeIoU = self.computeOgps
if self._dpEvalMode in {DensePoseEvalMode.GPSM, DensePoseEvalMode.IOU}:
self.real_ious = {
(imgId, catId): self.computeDPIoU(imgId, catId)
for imgId in p.imgIds
for catId in catIds
}
self.ious = {
(imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds
}
evaluateImg = self.evaluateImg
maxDet = p.maxDets[-1]
self.evalImgs = [
evaluateImg(imgId, catId, areaRng, maxDet)
for catId in catIds
for areaRng in p.areaRng
for imgId in p.imgIds
]
self._paramsEval = copy.deepcopy(self.params)
toc = time.time()
logger.info("DensePose evaluation DONE (t={:0.2f}s).".format(toc - tic))
def getDensePoseMask(self, polys):
maskGen = np.zeros([256, 256])
stop = min(len(polys) + 1, 15)
for i in range(1, stop):
if polys[i - 1]:
currentMask = maskUtils.decode(polys[i - 1])
maskGen[currentMask > 0] = i
return maskGen
def _generate_rlemask_on_image(self, mask, imgId, data):
bbox_xywh = np.array(data["bbox"])
x, y, w, h = bbox_xywh
im_h, im_w = self.size_mapping[imgId]
im_mask = np.zeros((im_h, im_w), dtype=np.uint8)
if mask is not None:
x0 = max(int(x), 0)
x1 = min(int(x + w), im_w, int(x) + mask.shape[1])
y0 = max(int(y), 0)
y1 = min(int(y + h), im_h, int(y) + mask.shape[0])
y = int(y)
x = int(x)
im_mask[y0:y1, x0:x1] = mask[y0 - y : y1 - y, x0 - x : x1 - x]
im_mask = np.require(np.asarray(im_mask > 0), dtype=np.uint8, requirements=["F"])
rle_mask = maskUtils.encode(np.array(im_mask[:, :, np.newaxis], order="F"))[0]
return rle_mask
def computeDPIoU(self, imgId, catId):
p = self.params
if p.useCats:
gt = self._gts[imgId, catId]
dt = self._dts[imgId, catId]
else:
gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
if len(gt) == 0 and len(dt) == 0:
return []
inds = np.argsort([-d["score"] for d in dt], kind="mergesort")
dt = [dt[i] for i in inds]
if len(dt) > p.maxDets[-1]:
dt = dt[0 : p.maxDets[-1]]
gtmasks = []
for g in gt:
if DensePoseDataRelative.S_KEY in g:
# convert DensePose mask to a binary mask
mask = np.minimum(self.getDensePoseMask(g[DensePoseDataRelative.S_KEY]), 1.0)
_, _, w, h = g["bbox"]
scale_x = float(max(w, 1)) / mask.shape[1]
scale_y = float(max(h, 1)) / mask.shape[0]
mask = spzoom(mask, (scale_y, scale_x), order=1, prefilter=False)
mask = np.array(mask > 0.5, dtype=np.uint8)
rle_mask = self._generate_rlemask_on_image(mask, imgId, g)
elif "segmentation" in g:
segmentation = g["segmentation"]
if isinstance(segmentation, list) and segmentation:
# polygons
im_h, im_w = self.size_mapping[imgId]
rles = maskUtils.frPyObjects(segmentation, im_h, im_w)
rle_mask = maskUtils.merge(rles)
elif isinstance(segmentation, dict):
if isinstance(segmentation["counts"], list):
# uncompressed RLE
im_h, im_w = self.size_mapping[imgId]
rle_mask = maskUtils.frPyObjects(segmentation, im_h, im_w)
else:
# compressed RLE
rle_mask = segmentation
else:
rle_mask = self._generate_rlemask_on_image(None, imgId, g)
else:
rle_mask = self._generate_rlemask_on_image(None, imgId, g)
gtmasks.append(rle_mask)
dtmasks = []
for d in dt:
mask = self._extract_mask(d)
mask = np.require(np.asarray(mask > 0), dtype=np.uint8, requirements=["F"])
rle_mask = self._generate_rlemask_on_image(mask, imgId, d)
dtmasks.append(rle_mask)
# compute iou between each dt and gt region
iscrowd = [int(o.get("iscrowd", 0)) for o in gt]
iousDP = maskUtils.iou(dtmasks, gtmasks, iscrowd)
return iousDP
def computeIoU(self, imgId, catId):
p = self.params
if p.useCats:
gt = self._gts[imgId, catId]
dt = self._dts[imgId, catId]
else:
gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
if len(gt) == 0 and len(dt) == 0:
return []
inds = np.argsort([-d["score"] for d in dt], kind="mergesort")
dt = [dt[i] for i in inds]
if len(dt) > p.maxDets[-1]:
dt = dt[0 : p.maxDets[-1]]
if p.iouType == "segm":
g = [g["segmentation"] for g in gt if g["segmentation"] is not None]
d = [d["segmentation"] for d in dt if d["segmentation"] is not None]
elif p.iouType == "bbox":
g = [g["bbox"] for g in gt]
d = [d["bbox"] for d in dt]
else:
raise Exception("unknown iouType for iou computation")
# compute iou between each dt and gt region
iscrowd = [int(o.get("iscrowd", 0)) for o in gt]
ious = maskUtils.iou(d, g, iscrowd)
return ious
def computeOks(self, imgId, catId):
p = self.params
# dimension here should be Nxm
gts = self._gts[imgId, catId]
dts = self._dts[imgId, catId]
inds = np.argsort([-d["score"] for d in dts], kind="mergesort")
dts = [dts[i] for i in inds]
if len(dts) > p.maxDets[-1]:
dts = dts[0 : p.maxDets[-1]]
# if len(gts) == 0 and len(dts) == 0:
if len(gts) == 0 or len(dts) == 0:
return []
ious = np.zeros((len(dts), len(gts)))
sigmas = (
np.array(
[
0.26,
0.25,
0.25,
0.35,
0.35,
0.79,
0.79,
0.72,
0.72,
0.62,
0.62,
1.07,
1.07,
0.87,
0.87,
0.89,
0.89,
]
)
/ 10.0
)
vars = (sigmas * 2) ** 2
k = len(sigmas)
# compute oks between each detection and ground truth object
for j, gt in enumerate(gts):
# create bounds for ignore regions(double the gt bbox)
g = np.array(gt["keypoints"])
xg = g[0::3]
yg = g[1::3]
vg = g[2::3]
k1 = np.count_nonzero(vg > 0)
bb = gt["bbox"]
x0 = bb[0] - bb[2]
x1 = bb[0] + bb[2] * 2
y0 = bb[1] - bb[3]
y1 = bb[1] + bb[3] * 2
for i, dt in enumerate(dts):
d = np.array(dt["keypoints"])
xd = d[0::3]
yd = d[1::3]
if k1 > 0:
# measure the per-keypoint distance if keypoints visible
dx = xd - xg
dy = yd - yg
else:
# measure minimum distance to keypoints in (x0,y0) & (x1,y1)
z = np.zeros(k)
dx = np.max((z, x0 - xd), axis=0) + np.max((z, xd - x1), axis=0)
dy = np.max((z, y0 - yd), axis=0) + np.max((z, yd - y1), axis=0)
e = (dx**2 + dy**2) / vars / (gt["area"] + np.spacing(1)) / 2
if k1 > 0:
e = e[vg > 0]
ious[i, j] = np.sum(np.exp(-e)) / e.shape[0]
return ious
def _extract_mask(self, dt: Dict[str, Any]) -> np.ndarray:
if "densepose" in dt:
densepose_results_quantized = dt["densepose"]
return densepose_results_quantized.labels_uv_uint8[0].numpy()
elif "cse_mask" in dt:
return dt["cse_mask"]
elif "coarse_segm" in dt:
dy = max(int(dt["bbox"][3]), 1)
dx = max(int(dt["bbox"][2]), 1)
return (
F.interpolate(
dt["coarse_segm"].unsqueeze(0),
(dy, dx),
mode="bilinear",
align_corners=False,
)
.squeeze(0)
.argmax(0)
.numpy()
.astype(np.uint8)
)
elif "record_id" in dt:
assert (
self.multi_storage is not None
), f"Storage record id encountered in a detection {dt}, but no storage provided!"
record = self.multi_storage.get(dt["rank"], dt["record_id"])
coarse_segm = record["coarse_segm"]
dy = max(int(dt["bbox"][3]), 1)
dx = max(int(dt["bbox"][2]), 1)
return (
F.interpolate(
coarse_segm.unsqueeze(0),
(dy, dx),
mode="bilinear",
align_corners=False,
)
.squeeze(0)
.argmax(0)
.numpy()
.astype(np.uint8)
)
else:
raise Exception(f"No mask data in the detection: {dt}")
raise ValueError('The prediction dict needs to contain either "densepose" or "cse_mask"')
def _extract_iuv(
self, densepose_data: np.ndarray, py: np.ndarray, px: np.ndarray, gt: Dict[str, Any]
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Extract arrays of I, U and V values at given points as numpy arrays
given the data mode stored in self._dpDataMode
"""
if self._dpDataMode == DensePoseDataMode.IUV_DT:
# estimated labels and UV (default)
ipoints = densepose_data[0, py, px]
upoints = densepose_data[1, py, px] / 255.0 # convert from uint8 by /255.
vpoints = densepose_data[2, py, px] / 255.0
elif self._dpDataMode == DensePoseDataMode.IUV_GT:
# ground truth
ipoints = np.array(gt["dp_I"])
upoints = np.array(gt["dp_U"])
vpoints = np.array(gt["dp_V"])
elif self._dpDataMode == DensePoseDataMode.I_GT_UV_0:
# ground truth labels, UV = 0
ipoints = np.array(gt["dp_I"])
upoints = upoints * 0.0
vpoints = vpoints * 0.0
elif self._dpDataMode == DensePoseDataMode.I_GT_UV_DT:
# ground truth labels, estimated UV
ipoints = np.array(gt["dp_I"])
upoints = densepose_data[1, py, px] / 255.0 # convert from uint8 by /255.
vpoints = densepose_data[2, py, px] / 255.0
elif self._dpDataMode == DensePoseDataMode.I_DT_UV_0:
# estimated labels, UV = 0
ipoints = densepose_data[0, py, px]
upoints = upoints * 0.0
vpoints = vpoints * 0.0
else:
raise ValueError(f"Unknown data mode: {self._dpDataMode}")
return ipoints, upoints, vpoints
def computeOgps_single_pair(self, dt, gt, py, px, pt_mask):
if "densepose" in dt:
ipoints, upoints, vpoints = self.extract_iuv_from_quantized(dt, gt, py, px, pt_mask)
return self.computeOgps_single_pair_iuv(dt, gt, ipoints, upoints, vpoints)
elif "u" in dt:
ipoints, upoints, vpoints = self.extract_iuv_from_raw(dt, gt, py, px, pt_mask)
return self.computeOgps_single_pair_iuv(dt, gt, ipoints, upoints, vpoints)
elif "record_id" in dt:
assert (
self.multi_storage is not None
), f"Storage record id encountered in detection {dt}, but no storage provided!"
record = self.multi_storage.get(dt["rank"], dt["record_id"])
record["bbox"] = dt["bbox"]
if "u" in record:
ipoints, upoints, vpoints = self.extract_iuv_from_raw(record, gt, py, px, pt_mask)
return self.computeOgps_single_pair_iuv(dt, gt, ipoints, upoints, vpoints)
elif "embedding" in record:
return self.computeOgps_single_pair_cse(
dt,
gt,
py,
px,
pt_mask,
record["coarse_segm"],
record["embedding"],
record["bbox"],
)
else:
raise Exception(f"Unknown record format: {record}")
elif "embedding" in dt:
return self.computeOgps_single_pair_cse(
dt, gt, py, px, pt_mask, dt["coarse_segm"], dt["embedding"], dt["bbox"]
)
raise Exception(f"Unknown detection format: {dt}")
def extract_iuv_from_quantized(self, dt, gt, py, px, pt_mask):
densepose_results_quantized = dt["densepose"]
ipoints, upoints, vpoints = self._extract_iuv(
densepose_results_quantized.labels_uv_uint8.numpy(), py, px, gt
)
ipoints[pt_mask == -1] = 0
return ipoints, upoints, vpoints
def extract_iuv_from_raw(self, dt, gt, py, px, pt_mask):
labels_dt = resample_fine_and_coarse_segm_tensors_to_bbox(
dt["fine_segm"].unsqueeze(0),
dt["coarse_segm"].unsqueeze(0),
dt["bbox"],
)
uv = resample_uv_tensors_to_bbox(
dt["u"].unsqueeze(0), dt["v"].unsqueeze(0), labels_dt.squeeze(0), dt["bbox"]
)
labels_uv_uint8 = torch.cat((labels_dt.byte(), (uv * 255).clamp(0, 255).byte()))
ipoints, upoints, vpoints = self._extract_iuv(labels_uv_uint8.numpy(), py, px, gt)
ipoints[pt_mask == -1] = 0
return ipoints, upoints, vpoints
def computeOgps_single_pair_iuv(self, dt, gt, ipoints, upoints, vpoints):
cVertsGT, ClosestVertsGTTransformed = self.findAllClosestVertsGT(gt)
cVerts = self.findAllClosestVertsUV(upoints, vpoints, ipoints)
# Get pairwise geodesic distances between gt and estimated mesh points.
dist = self.getDistancesUV(ClosestVertsGTTransformed, cVerts)
# Compute the Ogps measure.
# Find the mean geodesic normalization distance for
# each GT point, based on which part it is on.
Current_Mean_Distances = self.Mean_Distances[
self.CoarseParts[self.Part_ids[cVertsGT[cVertsGT > 0].astype(int) - 1]]
]
return dist, Current_Mean_Distances
def computeOgps_single_pair_cse(
self, dt, gt, py, px, pt_mask, coarse_segm, embedding, bbox_xywh_abs
):
# 0-based mesh vertex indices
cVertsGT = torch.as_tensor(gt["dp_vertex"], dtype=torch.int64)
# label for each pixel of the bbox, [H, W] tensor of long
labels_dt = resample_coarse_segm_tensor_to_bbox(
coarse_segm.unsqueeze(0), bbox_xywh_abs
).squeeze(0)
x, y, w, h = bbox_xywh_abs
# embedding for each pixel of the bbox, [D, H, W] tensor of float32
embedding = F.interpolate(
embedding.unsqueeze(0), (int(h), int(w)), mode="bilinear", align_corners=False
).squeeze(0)
# valid locations py, px
py_pt = torch.from_numpy(py[pt_mask > -1])
px_pt = torch.from_numpy(px[pt_mask > -1])
cVerts = torch.ones_like(cVertsGT) * -1
cVerts[pt_mask > -1] = self.findClosestVertsCse(
embedding, py_pt, px_pt, labels_dt, gt["ref_model"]
)
# Get pairwise geodesic distances between gt and estimated mesh points.
dist = self.getDistancesCse(cVertsGT, cVerts, gt["ref_model"])
# normalize distances
if (gt["ref_model"] == "smpl_27554") and ("dp_I" in gt):
Current_Mean_Distances = self.Mean_Distances[
self.CoarseParts[np.array(gt["dp_I"], dtype=int)]
]
else:
Current_Mean_Distances = 0.255
return dist, Current_Mean_Distances
def computeOgps(self, imgId, catId):
p = self.params
# dimension here should be Nxm
g = self._gts[imgId, catId]
d = self._dts[imgId, catId]
inds = np.argsort([-d_["score"] for d_ in d], kind="mergesort")
d = [d[i] for i in inds]
if len(d) > p.maxDets[-1]:
d = d[0 : p.maxDets[-1]]
# if len(gts) == 0 and len(dts) == 0:
if len(g) == 0 or len(d) == 0:
return []
ious = np.zeros((len(d), len(g)))
# compute opgs between each detection and ground truth object
# sigma = self.sigma #0.255 # dist = 0.3m corresponds to ogps = 0.5
# 1 # dist = 0.3m corresponds to ogps = 0.96
# 1.45 # dist = 1.7m (person height) corresponds to ogps = 0.5)
for j, gt in enumerate(g):
if not gt["ignore"]:
g_ = gt["bbox"]
for i, dt in enumerate(d):
#
dy = int(dt["bbox"][3])
dx = int(dt["bbox"][2])
dp_x = np.array(gt["dp_x"]) * g_[2] / 255.0
dp_y = np.array(gt["dp_y"]) * g_[3] / 255.0
py = (dp_y + g_[1] - dt["bbox"][1]).astype(int)
px = (dp_x + g_[0] - dt["bbox"][0]).astype(int)
#
pts = np.zeros(len(px))
pts[px >= dx] = -1
pts[py >= dy] = -1
pts[px < 0] = -1
pts[py < 0] = -1
if len(pts) < 1:
ogps = 0.0
elif np.max(pts) == -1:
ogps = 0.0
else:
px[pts == -1] = 0
py[pts == -1] = 0
dists_between_matches, dist_norm_coeffs = self.computeOgps_single_pair(
dt, gt, py, px, pts
)
# Compute gps
ogps_values = np.exp(
-(dists_between_matches**2) / (2 * (dist_norm_coeffs**2))
)
#
ogps = np.mean(ogps_values) if len(ogps_values) > 0 else 0.0
ious[i, j] = ogps
gbb = [gt["bbox"] for gt in g]
dbb = [dt["bbox"] for dt in d]
# compute iou between each dt and gt region
iscrowd = [int(o.get("iscrowd", 0)) for o in g]
ious_bb = maskUtils.iou(dbb, gbb, iscrowd)
return ious, ious_bb
def evaluateImg(self, imgId, catId, aRng, maxDet):
"""
perform evaluation for single category and image
:return: dict (single image results)
"""
p = self.params
if p.useCats:
gt = self._gts[imgId, catId]
dt = self._dts[imgId, catId]
else:
gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
if len(gt) == 0 and len(dt) == 0:
return None
for g in gt:
# g['_ignore'] = g['ignore']
if g["ignore"] or (g["area"] < aRng[0] or g["area"] > aRng[1]):
g["_ignore"] = True
else:
g["_ignore"] = False
# sort dt highest score first, sort gt ignore last
gtind = np.argsort([g["_ignore"] for g in gt], kind="mergesort")
gt = [gt[i] for i in gtind]
dtind = np.argsort([-d["score"] for d in dt], kind="mergesort")
dt = [dt[i] for i in dtind[0:maxDet]]
iscrowd = [int(o.get("iscrowd", 0)) for o in gt]
# load computed ious
if p.iouType == "densepose":
# print('Checking the length', len(self.ious[imgId, catId]))
# if len(self.ious[imgId, catId]) == 0:
# print(self.ious[imgId, catId])
ious = (
self.ious[imgId, catId][0][:, gtind]
if len(self.ious[imgId, catId]) > 0
else self.ious[imgId, catId]
)
ioubs = (
self.ious[imgId, catId][1][:, gtind]
if len(self.ious[imgId, catId]) > 0
else self.ious[imgId, catId]
)
if self._dpEvalMode in {DensePoseEvalMode.GPSM, DensePoseEvalMode.IOU}:
iousM = (
self.real_ious[imgId, catId][:, gtind]
if len(self.real_ious[imgId, catId]) > 0
else self.real_ious[imgId, catId]
)
else:
ious = (
self.ious[imgId, catId][:, gtind]
if len(self.ious[imgId, catId]) > 0
else self.ious[imgId, catId]
)
T = len(p.iouThrs)
G = len(gt)
D = len(dt)
gtm = np.zeros((T, G))
dtm = np.zeros((T, D))
gtIg = np.array([g["_ignore"] for g in gt])
dtIg = np.zeros((T, D))
if np.all(gtIg) and p.iouType == "densepose":
dtIg = np.logical_or(dtIg, True)
if len(ious) > 0: # and not p.iouType == 'densepose':
for tind, t in enumerate(p.iouThrs):
for dind, d in enumerate(dt):
# information about best match so far (m=-1 -> unmatched)
iou = min([t, 1 - 1e-10])
m = -1
for gind, _g in enumerate(gt):
# if this gt already matched, and not a crowd, continue
if gtm[tind, gind] > 0 and not iscrowd[gind]:
continue
# if dt matched to reg gt, and on ignore gt, stop
if m > -1 and gtIg[m] == 0 and gtIg[gind] == 1:
break
if p.iouType == "densepose":
if self._dpEvalMode == DensePoseEvalMode.GPSM:
new_iou = np.sqrt(iousM[dind, gind] * ious[dind, gind])
elif self._dpEvalMode == DensePoseEvalMode.IOU:
new_iou = iousM[dind, gind]
elif self._dpEvalMode == DensePoseEvalMode.GPS:
new_iou = ious[dind, gind]
else:
new_iou = ious[dind, gind]
if new_iou < iou:
continue
if new_iou == 0.0:
continue
# if match successful and best so far, store appropriately
iou = new_iou
m = gind
# if match made store id of match for both dt and gt
if m == -1:
continue
dtIg[tind, dind] = gtIg[m]
dtm[tind, dind] = gt[m]["id"]
gtm[tind, m] = d["id"]
if p.iouType == "densepose":
if not len(ioubs) == 0:
for dind, d in enumerate(dt):
# information about best match so far (m=-1 -> unmatched)
if dtm[tind, dind] == 0:
ioub = 0.8
m = -1
for gind, _g in enumerate(gt):
# if this gt already matched, and not a crowd, continue
if gtm[tind, gind] > 0 and not iscrowd[gind]:
continue
# continue to next gt unless better match made
if ioubs[dind, gind] < ioub:
continue
# if match successful and best so far, store appropriately
ioub = ioubs[dind, gind]
m = gind
# if match made store id of match for both dt and gt
if m > -1:
dtIg[:, dind] = gtIg[m]
if gtIg[m]:
dtm[tind, dind] = gt[m]["id"]
gtm[tind, m] = d["id"]
# set unmatched detections outside of area range to ignore
a = np.array([d["area"] < aRng[0] or d["area"] > aRng[1] for d in dt]).reshape((1, len(dt)))
dtIg = np.logical_or(dtIg, np.logical_and(dtm == 0, np.repeat(a, T, 0)))
# store results for given image and category
# print('Done with the function', len(self.ious[imgId, catId]))
return {
"image_id": imgId,
"category_id": catId,
"aRng": aRng,
"maxDet": maxDet,
"dtIds": [d["id"] for d in dt],
"gtIds": [g["id"] for g in gt],
"dtMatches": dtm,
"gtMatches": gtm,
"dtScores": [d["score"] for d in dt],
"gtIgnore": gtIg,
"dtIgnore": dtIg,
}
def accumulate(self, p=None):
"""
Accumulate per image evaluation results and store the result in self.eval
:param p: input params for evaluation
:return: None
"""
logger.info("Accumulating evaluation results...")
tic = time.time()
if not self.evalImgs:
logger.info("Please run evaluate() first")
# allows input customized parameters
if p is None:
p = self.params
p.catIds = p.catIds if p.useCats == 1 else [-1]
T = len(p.iouThrs)
R = len(p.recThrs)
K = len(p.catIds) if p.useCats else 1
A = len(p.areaRng)
M = len(p.maxDets)
precision = -(np.ones((T, R, K, A, M))) # -1 for the precision of absent categories
recall = -(np.ones((T, K, A, M)))
# create dictionary for future indexing
logger.info("Categories: {}".format(p.catIds))
_pe = self._paramsEval
catIds = _pe.catIds if _pe.useCats else [-1]
setK = set(catIds)
setA = set(map(tuple, _pe.areaRng))
setM = set(_pe.maxDets)
setI = set(_pe.imgIds)
# get inds to evaluate
k_list = [n for n, k in enumerate(p.catIds) if k in setK]
m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
i_list = [n for n, i in enumerate(p.imgIds) if i in setI]
I0 = len(_pe.imgIds)
A0 = len(_pe.areaRng)
# retrieve E at each category, area range, and max number of detections
for k, k0 in enumerate(k_list):
Nk = k0 * A0 * I0
for a, a0 in enumerate(a_list):
Na = a0 * I0
for m, maxDet in enumerate(m_list):
E = [self.evalImgs[Nk + Na + i] for i in i_list]
E = [e for e in E if e is not None]
if len(E) == 0:
continue
dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E])
# different sorting method generates slightly different results.
# mergesort is used to be consistent as Matlab implementation.
inds = np.argsort(-dtScores, kind="mergesort")
dtm = np.concatenate([e["dtMatches"][:, 0:maxDet] for e in E], axis=1)[:, inds]
dtIg = np.concatenate([e["dtIgnore"][:, 0:maxDet] for e in E], axis=1)[:, inds]
gtIg = np.concatenate([e["gtIgnore"] for e in E])
npig = np.count_nonzero(gtIg == 0)
if npig == 0:
continue
tps = np.logical_and(dtm, np.logical_not(dtIg))
fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg))
tp_sum = np.cumsum(tps, axis=1).astype(dtype=float)
fp_sum = np.cumsum(fps, axis=1).astype(dtype=float)
for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
tp = np.array(tp)
fp = np.array(fp)
nd = len(tp)
rc = tp / npig
pr = tp / (fp + tp + np.spacing(1))
q = np.zeros((R,))
if nd:
recall[t, k, a, m] = rc[-1]
else:
recall[t, k, a, m] = 0
# numpy is slow without cython optimization for accessing elements
# use python array gets significant speed improvement
pr = pr.tolist()
q = q.tolist()
for i in range(nd - 1, 0, -1):
if pr[i] > pr[i - 1]:
pr[i - 1] = pr[i]
inds = np.searchsorted(rc, p.recThrs, side="left")
try:
for ri, pi in enumerate(inds):
q[ri] = pr[pi]
except Exception:
pass
precision[t, :, k, a, m] = np.array(q)
logger.info(
"Final: max precision {}, min precision {}".format(np.max(precision), np.min(precision))
)
self.eval = {
"params": p,
"counts": [T, R, K, A, M],
"date": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"precision": precision,
"recall": recall,
}
toc = time.time()
logger.info("DONE (t={:0.2f}s).".format(toc - tic))
def summarize(self):
"""
Compute and display summary metrics for evaluation results.
Note this function can *only* be applied on the default parameter setting
"""
def _summarize(ap=1, iouThr=None, areaRng="all", maxDets=100):
p = self.params
iStr = " {:<18} {} @[ {}={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}"
titleStr = "Average Precision" if ap == 1 else "Average Recall"
typeStr = "(AP)" if ap == 1 else "(AR)"
measure = "IoU"
if self.params.iouType == "keypoints":
measure = "OKS"
elif self.params.iouType == "densepose":
measure = "OGPS"
iouStr = (
"{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1])
if iouThr is None
else "{:0.2f}".format(iouThr)
)
aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
if ap == 1:
# dimension of precision: [TxRxKxAxM]
s = self.eval["precision"]
# IoU
if iouThr is not None:
t = np.where(np.abs(iouThr - p.iouThrs) < 0.001)[0]
s = s[t]
s = s[:, :, :, aind, mind]
else:
# dimension of recall: [TxKxAxM]
s = self.eval["recall"]
if iouThr is not None:
t = np.where(np.abs(iouThr - p.iouThrs) < 0.001)[0]
s = s[t]
s = s[:, :, aind, mind]
if len(s[s > -1]) == 0:
mean_s = -1
else:
mean_s = np.mean(s[s > -1])
logger.info(iStr.format(titleStr, typeStr, measure, iouStr, areaRng, maxDets, mean_s))
return mean_s
def _summarizeDets():
stats = np.zeros((12,))
stats[0] = _summarize(1)
stats[1] = _summarize(1, iouThr=0.5, maxDets=self.params.maxDets[2])
stats[2] = _summarize(1, iouThr=0.75, maxDets=self.params.maxDets[2])
stats[3] = _summarize(1, areaRng="small", maxDets=self.params.maxDets[2])
stats[4] = _summarize(1, areaRng="medium", maxDets=self.params.maxDets[2])
stats[5] = _summarize(1, areaRng="large", maxDets=self.params.maxDets[2])
stats[6] = _summarize(0, maxDets=self.params.maxDets[0])
stats[7] = _summarize(0, maxDets=self.params.maxDets[1])
stats[8] = _summarize(0, maxDets=self.params.maxDets[2])
stats[9] = _summarize(0, areaRng="small", maxDets=self.params.maxDets[2])
stats[10] = _summarize(0, areaRng="medium", maxDets=self.params.maxDets[2])
stats[11] = _summarize(0, areaRng="large", maxDets=self.params.maxDets[2])
return stats
def _summarizeKps():
stats = np.zeros((10,))
stats[0] = _summarize(1, maxDets=20)
stats[1] = _summarize(1, maxDets=20, iouThr=0.5)
stats[2] = _summarize(1, maxDets=20, iouThr=0.75)
stats[3] = _summarize(1, maxDets=20, areaRng="medium")
stats[4] = _summarize(1, maxDets=20, areaRng="large")
stats[5] = _summarize(0, maxDets=20)
stats[6] = _summarize(0, maxDets=20, iouThr=0.5)
stats[7] = _summarize(0, maxDets=20, iouThr=0.75)
stats[8] = _summarize(0, maxDets=20, areaRng="medium")
stats[9] = _summarize(0, maxDets=20, areaRng="large")
return stats
def _summarizeUvs():
stats = [_summarize(1, maxDets=self.params.maxDets[0])]
min_threshold = self.params.iouThrs.min()
if min_threshold <= 0.201:
stats += [_summarize(1, maxDets=self.params.maxDets[0], iouThr=0.2)]
if min_threshold <= 0.301:
stats += [_summarize(1, maxDets=self.params.maxDets[0], iouThr=0.3)]
if min_threshold <= 0.401:
stats += [_summarize(1, maxDets=self.params.maxDets[0], iouThr=0.4)]
stats += [
_summarize(1, maxDets=self.params.maxDets[0], iouThr=0.5),
_summarize(1, maxDets=self.params.maxDets[0], iouThr=0.75),
_summarize(1, maxDets=self.params.maxDets[0], areaRng="medium"),
_summarize(1, maxDets=self.params.maxDets[0], areaRng="large"),
_summarize(0, maxDets=self.params.maxDets[0]),
_summarize(0, maxDets=self.params.maxDets[0], iouThr=0.5),
_summarize(0, maxDets=self.params.maxDets[0], iouThr=0.75),
_summarize(0, maxDets=self.params.maxDets[0], areaRng="medium"),
_summarize(0, maxDets=self.params.maxDets[0], areaRng="large"),
]
return np.array(stats)
def _summarizeUvsOld():
stats = np.zeros((18,))
stats[0] = _summarize(1, maxDets=self.params.maxDets[0])
stats[1] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.5)
stats[2] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.55)
stats[3] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.60)
stats[4] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.65)
stats[5] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.70)
stats[6] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.75)
stats[7] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.80)
stats[8] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.85)
stats[9] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.90)
stats[10] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.95)
stats[11] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="medium")
stats[12] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="large")
stats[13] = _summarize(0, maxDets=self.params.maxDets[0])
stats[14] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.5)
stats[15] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.75)
stats[16] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="medium")
stats[17] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="large")
return stats
if not self.eval:
raise Exception("Please run accumulate() first")
iouType = self.params.iouType
if iouType in ["segm", "bbox"]:
summarize = _summarizeDets
elif iouType in ["keypoints"]:
summarize = _summarizeKps
elif iouType in ["densepose"]:
summarize = _summarizeUvs
self.stats = summarize()
def __str__(self):
self.summarize()
# ================ functions for dense pose ==============================
def findAllClosestVertsUV(self, U_points, V_points, Index_points):
ClosestVerts = np.ones(Index_points.shape) * -1
for i in np.arange(24):
#
if (i + 1) in Index_points:
UVs = np.array(
[U_points[Index_points == (i + 1)], V_points[Index_points == (i + 1)]]
)
Current_Part_UVs = self.Part_UVs[i]
Current_Part_ClosestVertInds = self.Part_ClosestVertInds[i]
D = ssd.cdist(Current_Part_UVs.transpose(), UVs.transpose()).squeeze()
ClosestVerts[Index_points == (i + 1)] = Current_Part_ClosestVertInds[
np.argmin(D, axis=0)
]
ClosestVertsTransformed = self.PDIST_transform[ClosestVerts.astype(int) - 1]
ClosestVertsTransformed[ClosestVerts < 0] = 0
return ClosestVertsTransformed
def findClosestVertsCse(self, embedding, py, px, mask, mesh_name):
mesh_vertex_embeddings = self.embedder(mesh_name)
pixel_embeddings = embedding[:, py, px].t().to(device="cuda")
mask_vals = mask[py, px]
edm = squared_euclidean_distance_matrix(pixel_embeddings, mesh_vertex_embeddings)
vertex_indices = edm.argmin(dim=1).cpu()
vertex_indices[mask_vals <= 0] = -1
return vertex_indices
def findAllClosestVertsGT(self, gt):
#
I_gt = np.array(gt["dp_I"])
U_gt = np.array(gt["dp_U"])
V_gt = np.array(gt["dp_V"])
#
# print(I_gt)
#
ClosestVertsGT = np.ones(I_gt.shape) * -1
for i in np.arange(24):
if (i + 1) in I_gt:
UVs = np.array([U_gt[I_gt == (i + 1)], V_gt[I_gt == (i + 1)]])
Current_Part_UVs = self.Part_UVs[i]
Current_Part_ClosestVertInds = self.Part_ClosestVertInds[i]
D = ssd.cdist(Current_Part_UVs.transpose(), UVs.transpose()).squeeze()
ClosestVertsGT[I_gt == (i + 1)] = Current_Part_ClosestVertInds[np.argmin(D, axis=0)]
#
ClosestVertsGTTransformed = self.PDIST_transform[ClosestVertsGT.astype(int) - 1]
ClosestVertsGTTransformed[ClosestVertsGT < 0] = 0
return ClosestVertsGT, ClosestVertsGTTransformed
def getDistancesCse(self, cVertsGT, cVerts, mesh_name):
geodists_vertices = torch.ones_like(cVertsGT) * float("inf")
selected = (cVertsGT >= 0) * (cVerts >= 0)
mesh = create_mesh(mesh_name, "cpu")
geodists_vertices[selected] = mesh.geodists[cVertsGT[selected], cVerts[selected]]
return geodists_vertices.numpy()
def getDistancesUV(self, cVertsGT, cVerts):
#
n = 27554
dists = []
for d in range(len(cVertsGT)):
if cVertsGT[d] > 0:
if cVerts[d] > 0:
i = cVertsGT[d] - 1
j = cVerts[d] - 1
if j == i:
dists.append(0)
elif j > i:
ccc = i
i = j
j = ccc
i = n - i - 1
j = n - j - 1
k = (n * (n - 1) / 2) - (n - i) * ((n - i) - 1) / 2 + j - i - 1
k = (n * n - n) / 2 - k - 1
dists.append(self.Pdist_matrix[int(k)][0])
else:
i = n - i - 1
j = n - j - 1
k = (n * (n - 1) / 2) - (n - i) * ((n - i) - 1) / 2 + j - i - 1
k = (n * n - n) / 2 - k - 1
dists.append(self.Pdist_matrix[int(k)][0])
else:
dists.append(np.inf)
return np.atleast_1d(np.array(dists).squeeze())
class Params:
"""
Params for coco evaluation api
"""
def setDetParams(self):
self.imgIds = []
self.catIds = []
# np.arange causes trouble. the data point on arange is slightly larger than the true value
self.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True)
self.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True)
self.maxDets = [1, 10, 100]
self.areaRng = [
[0**2, 1e5**2],
[0**2, 32**2],
[32**2, 96**2],
[96**2, 1e5**2],
]
self.areaRngLbl = ["all", "small", "medium", "large"]
self.useCats = 1
def setKpParams(self):
self.imgIds = []
self.catIds = []
# np.arange causes trouble. the data point on arange is slightly larger than the true value
self.iouThrs = np.linspace(0.5, 0.95, np.round((0.95 - 0.5) / 0.05) + 1, endpoint=True)
self.recThrs = np.linspace(0.0, 1.00, np.round((1.00 - 0.0) / 0.01) + 1, endpoint=True)
self.maxDets = [20]
self.areaRng = [[0**2, 1e5**2], [32**2, 96**2], [96**2, 1e5**2]]
self.areaRngLbl = ["all", "medium", "large"]
self.useCats = 1
def setUvParams(self):
self.imgIds = []
self.catIds = []
self.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True)
self.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True)
self.maxDets = [20]
self.areaRng = [[0**2, 1e5**2], [32**2, 96**2], [96**2, 1e5**2]]
self.areaRngLbl = ["all", "medium", "large"]
self.useCats = 1
def __init__(self, iouType="segm"):
if iouType == "segm" or iouType == "bbox":
self.setDetParams()
elif iouType == "keypoints":
self.setKpParams()
elif iouType == "densepose":
self.setUvParams()
else:
raise Exception("iouType not supported")
self.iouType = iouType
# useSegm is deprecated
self.useSegm = None
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
import contextlib
import copy
import io
import itertools
import logging
import numpy as np
import os
from collections import OrderedDict
from typing import Dict, Iterable, List, Optional
import pycocotools.mask as mask_utils
import torch
from pycocotools.coco import COCO
from tabulate import tabulate
from detectron2.config import CfgNode
from detectron2.data import MetadataCatalog
from detectron2.evaluation import DatasetEvaluator
from detectron2.structures import BoxMode
from detectron2.utils.comm import gather, get_rank, is_main_process, synchronize
from detectron2.utils.file_io import PathManager
from detectron2.utils.logger import create_small_table
from densepose.converters import ToChartResultConverter, ToMaskConverter
from densepose.data.datasets.coco import maybe_filter_and_map_categories_cocoapi
from densepose.structures import (
DensePoseChartPredictorOutput,
DensePoseEmbeddingPredictorOutput,
quantize_densepose_chart_result,
)
from .densepose_coco_evaluation import DensePoseCocoEval, DensePoseEvalMode
from .mesh_alignment_evaluator import MeshAlignmentEvaluator
from .tensor_storage import (
SingleProcessFileTensorStorage,
SingleProcessRamTensorStorage,
SingleProcessTensorStorage,
SizeData,
storage_gather,
)
class DensePoseCOCOEvaluator(DatasetEvaluator):
def __init__(
self,
dataset_name,
distributed,
output_dir=None,
evaluator_type: str = "iuv",
min_iou_threshold: float = 0.5,
storage: Optional[SingleProcessTensorStorage] = None,
embedder=None,
should_evaluate_mesh_alignment: bool = False,
mesh_alignment_mesh_names: Optional[List[str]] = None,
):
self._embedder = embedder
self._distributed = distributed
self._output_dir = output_dir
self._evaluator_type = evaluator_type
self._storage = storage
self._should_evaluate_mesh_alignment = should_evaluate_mesh_alignment
assert not (
should_evaluate_mesh_alignment and embedder is None
), "Mesh alignment evaluation is activated, but no vertex embedder provided!"
if should_evaluate_mesh_alignment:
self._mesh_alignment_evaluator = MeshAlignmentEvaluator(
embedder,
mesh_alignment_mesh_names,
)
self._cpu_device = torch.device("cpu")
self._logger = logging.getLogger(__name__)
self._metadata = MetadataCatalog.get(dataset_name)
self._min_threshold = min_iou_threshold
json_file = PathManager.get_local_path(self._metadata.json_file)
with contextlib.redirect_stdout(io.StringIO()):
self._coco_api = COCO(json_file)
maybe_filter_and_map_categories_cocoapi(dataset_name, self._coco_api)
def reset(self):
self._predictions = []
def process(self, inputs, outputs):
"""
Args:
inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
It is a list of dict. Each dict corresponds to an image and
contains keys like "height", "width", "file_name", "image_id".
outputs: the outputs of a COCO model. It is a list of dicts with key
"instances" that contains :class:`Instances`.
The :class:`Instances` object needs to have `densepose` field.
"""
for input, output in zip(inputs, outputs):
instances = output["instances"].to(self._cpu_device)
if not instances.has("pred_densepose"):
continue
prediction_list = prediction_to_dict(
instances,
input["image_id"],
self._embedder,
self._metadata.class_to_mesh_name,
self._storage is not None,
)
if self._storage is not None:
for prediction_dict in prediction_list:
dict_to_store = {}
for field_name in self._storage.data_schema:
dict_to_store[field_name] = prediction_dict[field_name]
record_id = self._storage.put(dict_to_store)
prediction_dict["record_id"] = record_id
prediction_dict["rank"] = get_rank()
for field_name in self._storage.data_schema:
del prediction_dict[field_name]
self._predictions.extend(prediction_list)
def evaluate(self, img_ids=None):
if self._distributed:
synchronize()
predictions = gather(self._predictions)
predictions = list(itertools.chain(*predictions))
else:
predictions = self._predictions
multi_storage = storage_gather(self._storage) if self._storage is not None else None
if not is_main_process():
return
return copy.deepcopy(self._eval_predictions(predictions, multi_storage, img_ids))
def _eval_predictions(self, predictions, multi_storage=None, img_ids=None):
"""
Evaluate predictions on densepose.
Return results with the metrics of the tasks.
"""
self._logger.info("Preparing results for COCO format ...")
if self._output_dir:
PathManager.mkdirs(self._output_dir)
file_path = os.path.join(self._output_dir, "coco_densepose_predictions.pth")
with PathManager.open(file_path, "wb") as f:
torch.save(predictions, f)
self._logger.info("Evaluating predictions ...")
res = OrderedDict()
results_gps, results_gpsm, results_segm = _evaluate_predictions_on_coco(
self._coco_api,
predictions,
multi_storage,
self._embedder,
class_names=self._metadata.get("thing_classes"),
min_threshold=self._min_threshold,
img_ids=img_ids,
)
res["densepose_gps"] = results_gps
res["densepose_gpsm"] = results_gpsm
res["densepose_segm"] = results_segm
if self._should_evaluate_mesh_alignment:
res["densepose_mesh_alignment"] = self._evaluate_mesh_alignment()
return res
def _evaluate_mesh_alignment(self):
self._logger.info("Mesh alignment evaluation ...")
mean_ge, mean_gps, per_mesh_metrics = self._mesh_alignment_evaluator.evaluate()
results = {
"GE": mean_ge * 100,
"GPS": mean_gps * 100,
}
mesh_names = set()
for metric_name in per_mesh_metrics:
for mesh_name, value in per_mesh_metrics[metric_name].items():
results[f"{metric_name}-{mesh_name}"] = value * 100
mesh_names.add(mesh_name)
self._print_mesh_alignment_results(results, mesh_names)
return results
def _print_mesh_alignment_results(self, results: Dict[str, float], mesh_names: Iterable[str]):
self._logger.info("Evaluation results for densepose, mesh alignment:")
self._logger.info(f'| {"Mesh":13s} | {"GErr":7s} | {"GPS":7s} |')
self._logger.info("| :-----------: | :-----: | :-----: |")
for mesh_name in mesh_names:
ge_key = f"GE-{mesh_name}"
ge_str = f"{results[ge_key]:.4f}" if ge_key in results else " "
gps_key = f"GPS-{mesh_name}"
gps_str = f"{results[gps_key]:.4f}" if gps_key in results else " "
self._logger.info(f"| {mesh_name:13s} | {ge_str:7s} | {gps_str:7s} |")
self._logger.info("| :-------------------------------: |")
ge_key = "GE"
ge_str = f"{results[ge_key]:.4f}" if ge_key in results else " "
gps_key = "GPS"
gps_str = f"{results[gps_key]:.4f}" if gps_key in results else " "
self._logger.info(f'| {"MEAN":13s} | {ge_str:7s} | {gps_str:7s} |')
def prediction_to_dict(instances, img_id, embedder, class_to_mesh_name, use_storage):
"""
Args:
instances (Instances): the output of the model
img_id (str): the image id in COCO
Returns:
list[dict]: the results in densepose evaluation format
"""
scores = instances.scores.tolist()
classes = instances.pred_classes.tolist()
raw_boxes_xywh = BoxMode.convert(
instances.pred_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
)
if isinstance(instances.pred_densepose, DensePoseEmbeddingPredictorOutput):
results_densepose = densepose_cse_predictions_to_dict(
instances, embedder, class_to_mesh_name, use_storage
)
elif isinstance(instances.pred_densepose, DensePoseChartPredictorOutput):
if not use_storage:
results_densepose = densepose_chart_predictions_to_dict(instances)
else:
results_densepose = densepose_chart_predictions_to_storage_dict(instances)
results = []
for k in range(len(instances)):
result = {
"image_id": img_id,
"category_id": classes[k],
"bbox": raw_boxes_xywh[k].tolist(),
"score": scores[k],
}
results.append({**result, **results_densepose[k]})
return results
def densepose_chart_predictions_to_dict(instances):
segmentations = ToMaskConverter.convert(
instances.pred_densepose, instances.pred_boxes, instances.image_size
)
results = []
for k in range(len(instances)):
densepose_results_quantized = quantize_densepose_chart_result(
ToChartResultConverter.convert(instances.pred_densepose[k], instances.pred_boxes[k])
)
densepose_results_quantized.labels_uv_uint8 = (
densepose_results_quantized.labels_uv_uint8.cpu()
)
segmentation = segmentations.tensor[k]
segmentation_encoded = mask_utils.encode(
np.require(segmentation.numpy(), dtype=np.uint8, requirements=["F"])
)
segmentation_encoded["counts"] = segmentation_encoded["counts"].decode("utf-8")
result = {
"densepose": densepose_results_quantized,
"segmentation": segmentation_encoded,
}
results.append(result)
return results
def densepose_chart_predictions_to_storage_dict(instances):
results = []
for k in range(len(instances)):
densepose_predictor_output = instances.pred_densepose[k]
result = {
"coarse_segm": densepose_predictor_output.coarse_segm.squeeze(0).cpu(),
"fine_segm": densepose_predictor_output.fine_segm.squeeze(0).cpu(),
"u": densepose_predictor_output.u.squeeze(0).cpu(),
"v": densepose_predictor_output.v.squeeze(0).cpu(),
}
results.append(result)
return results
def densepose_cse_predictions_to_dict(instances, embedder, class_to_mesh_name, use_storage):
results = []
for k in range(len(instances)):
cse = instances.pred_densepose[k]
results.append(
{
"coarse_segm": cse.coarse_segm[0].cpu(),
"embedding": cse.embedding[0].cpu(),
}
)
return results
def _evaluate_predictions_on_coco(
coco_gt,
coco_results,
multi_storage=None,
embedder=None,
class_names=None,
min_threshold: float = 0.5,
img_ids=None,
):
logger = logging.getLogger(__name__)
densepose_metrics = _get_densepose_metrics(min_threshold)
if len(coco_results) == 0: # cocoapi does not handle empty results very well
logger.warn("No predictions from the model! Set scores to -1")
results_gps = {metric: -1 for metric in densepose_metrics}
results_gpsm = {metric: -1 for metric in densepose_metrics}
results_segm = {metric: -1 for metric in densepose_metrics}
return results_gps, results_gpsm, results_segm
coco_dt = coco_gt.loadRes(coco_results)
results = []
for eval_mode_name in ["GPS", "GPSM", "IOU"]:
eval_mode = getattr(DensePoseEvalMode, eval_mode_name)
coco_eval = DensePoseCocoEval(
coco_gt, coco_dt, "densepose", multi_storage, embedder, dpEvalMode=eval_mode
)
result = _derive_results_from_coco_eval(
coco_eval, eval_mode_name, densepose_metrics, class_names, min_threshold, img_ids
)
results.append(result)
return results
def _get_densepose_metrics(min_threshold: float = 0.5):
metrics = ["AP"]
if min_threshold <= 0.201:
metrics += ["AP20"]
if min_threshold <= 0.301:
metrics += ["AP30"]
if min_threshold <= 0.401:
metrics += ["AP40"]
metrics.extend(["AP50", "AP75", "APm", "APl", "AR", "AR50", "AR75", "ARm", "ARl"])
return metrics
def _derive_results_from_coco_eval(
coco_eval, eval_mode_name, metrics, class_names, min_threshold: float, img_ids
):
if img_ids is not None:
coco_eval.params.imgIds = img_ids
coco_eval.params.iouThrs = np.linspace(
min_threshold, 0.95, int(np.round((0.95 - min_threshold) / 0.05)) + 1, endpoint=True
)
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
logger = logging.getLogger(__name__)
logger.info(
f"Evaluation results for densepose, {eval_mode_name} metric: \n"
+ create_small_table(results)
)
if class_names is None or len(class_names) <= 1:
return results
# Compute per-category AP, the same way as it is done in D2
# (see detectron2/evaluation/coco_evaluation.py):
precisions = coco_eval.eval["precision"]
# precision has dims (iou, recall, cls, area range, max dets)
assert len(class_names) == precisions.shape[2]
results_per_category = []
for idx, name in enumerate(class_names):
# area range index 0: all area ranges
# max dets index -1: typically 100 per image
precision = precisions[:, :, idx, 0, -1]
precision = precision[precision > -1]
ap = np.mean(precision) if precision.size else float("nan")
results_per_category.append((f"{name}", float(ap * 100)))
# tabulate it
n_cols = min(6, len(results_per_category) * 2)
results_flatten = list(itertools.chain(*results_per_category))
results_2d = itertools.zip_longest(*[results_flatten[i::n_cols] for i in range(n_cols)])
table = tabulate(
results_2d,
tablefmt="pipe",
floatfmt=".3f",
headers=["category", "AP"] * (n_cols // 2),
numalign="left",
)
logger.info(f"Per-category {eval_mode_name} AP: \n" + table)
results.update({"AP-" + name: ap for name, ap in results_per_category})
return results
def build_densepose_evaluator_storage(cfg: CfgNode, output_folder: str):
storage_spec = cfg.DENSEPOSE_EVALUATION.STORAGE
if storage_spec == "none":
return None
evaluator_type = cfg.DENSEPOSE_EVALUATION.TYPE
# common output tensor sizes
hout = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE
wout = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE
n_csc = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
# specific output tensors
if evaluator_type == "iuv":
n_fsc = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
schema = {
"coarse_segm": SizeData(dtype="float32", shape=(n_csc, hout, wout)),
"fine_segm": SizeData(dtype="float32", shape=(n_fsc, hout, wout)),
"u": SizeData(dtype="float32", shape=(n_fsc, hout, wout)),
"v": SizeData(dtype="float32", shape=(n_fsc, hout, wout)),
}
elif evaluator_type == "cse":
embed_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE
schema = {
"coarse_segm": SizeData(dtype="float32", shape=(n_csc, hout, wout)),
"embedding": SizeData(dtype="float32", shape=(embed_size, hout, wout)),
}
else:
raise ValueError(f"Unknown evaluator type: {evaluator_type}")
# storage types
if storage_spec == "ram":
storage = SingleProcessRamTensorStorage(schema, io.BytesIO())
elif storage_spec == "file":
fpath = os.path.join(output_folder, f"DensePoseEvaluatorStorage.{get_rank()}.bin")
PathManager.mkdirs(output_folder)
storage = SingleProcessFileTensorStorage(schema, fpath, "wb")
else:
raise ValueError(f"Unknown storage specification: {storage_spec}")
return storage
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# pyre-unsafe
import json
import logging
from typing import List, Optional
import torch
from torch import nn
from detectron2.utils.file_io import PathManager
from densepose.structures.mesh import create_mesh
class MeshAlignmentEvaluator:
"""
Class for evaluation of 3D mesh alignment based on the learned vertex embeddings
"""
def __init__(self, embedder: nn.Module, mesh_names: Optional[List[str]]):
self.embedder = embedder
# use the provided mesh names if not None and not an empty list
self.mesh_names = mesh_names if mesh_names else embedder.mesh_names
self.logger = logging.getLogger(__name__)
with PathManager.open(
"https://dl.fbaipublicfiles.com/densepose/data/cse/mesh_keyvertices_v0.json", "r"
) as f:
self.mesh_keyvertices = json.load(f)
def evaluate(self):
ge_per_mesh = {}
gps_per_mesh = {}
for mesh_name_1 in self.mesh_names:
avg_errors = []
avg_gps = []
embeddings_1 = self.embedder(mesh_name_1)
keyvertices_1 = self.mesh_keyvertices[mesh_name_1]
keyvertex_names_1 = list(keyvertices_1.keys())
keyvertex_indices_1 = [keyvertices_1[name] for name in keyvertex_names_1]
for mesh_name_2 in self.mesh_names:
if mesh_name_1 == mesh_name_2:
continue
embeddings_2 = self.embedder(mesh_name_2)
keyvertices_2 = self.mesh_keyvertices[mesh_name_2]
sim_matrix_12 = embeddings_1[keyvertex_indices_1].mm(embeddings_2.T)
vertices_2_matching_keyvertices_1 = sim_matrix_12.argmax(axis=1)
mesh_2 = create_mesh(mesh_name_2, embeddings_2.device)
geodists = mesh_2.geodists[
vertices_2_matching_keyvertices_1,
[keyvertices_2[name] for name in keyvertex_names_1],
]
Current_Mean_Distances = 0.255
gps = (-(geodists**2) / (2 * (Current_Mean_Distances**2))).exp()
avg_errors.append(geodists.mean().item())
avg_gps.append(gps.mean().item())
ge_mean = torch.as_tensor(avg_errors).mean().item()
gps_mean = torch.as_tensor(avg_gps).mean().item()
ge_per_mesh[mesh_name_1] = ge_mean
gps_per_mesh[mesh_name_1] = gps_mean
ge_mean_global = torch.as_tensor(list(ge_per_mesh.values())).mean().item()
gps_mean_global = torch.as_tensor(list(gps_per_mesh.values())).mean().item()
per_mesh_metrics = {
"GE": ge_per_mesh,
"GPS": gps_per_mesh,
}
return ge_mean_global, gps_mean_global, per_mesh_metrics
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
import io
import numpy as np
import os
from dataclasses import dataclass
from functools import reduce
from operator import mul
from typing import BinaryIO, Dict, Optional, Tuple
import torch
from detectron2.utils.comm import gather, get_rank
from detectron2.utils.file_io import PathManager
@dataclass
class SizeData:
dtype: str
shape: Tuple[int]
def _calculate_record_field_size_b(data_schema: Dict[str, SizeData], field_name: str) -> int:
schema = data_schema[field_name]
element_size_b = np.dtype(schema.dtype).itemsize
record_field_size_b = reduce(mul, schema.shape) * element_size_b
return record_field_size_b
def _calculate_record_size_b(data_schema: Dict[str, SizeData]) -> int:
record_size_b = 0
for field_name in data_schema:
record_field_size_b = _calculate_record_field_size_b(data_schema, field_name)
record_size_b += record_field_size_b
return record_size_b
def _calculate_record_field_sizes_b(data_schema: Dict[str, SizeData]) -> Dict[str, int]:
field_sizes_b = {}
for field_name in data_schema:
field_sizes_b[field_name] = _calculate_record_field_size_b(data_schema, field_name)
return field_sizes_b
class SingleProcessTensorStorage:
"""
Compact tensor storage to keep tensor data of predefined size and type.
"""
def __init__(self, data_schema: Dict[str, SizeData], storage_impl: BinaryIO):
"""
Construct tensor storage based on information on data shape and size.
Internally uses numpy to interpret the type specification.
The storage must support operations `seek(offset, whence=os.SEEK_SET)` and
`read(size)` to be able to perform the `get` operation.
The storage must support operation `write(bytes)` to be able to perform
the `put` operation.
Args:
data_schema (dict: str -> SizeData): dictionary which maps tensor name
to its size data (shape and data type), e.g.
```
{
"coarse_segm": SizeData(dtype="float32", shape=(112, 112)),
"embedding": SizeData(dtype="float32", shape=(16, 112, 112)),
}
```
storage_impl (BinaryIO): io instance that handles file-like seek, read
and write operations, e.g. a file handle or a memory buffer like io.BytesIO
"""
self.data_schema = data_schema
self.record_size_b = _calculate_record_size_b(data_schema)
self.record_field_sizes_b = _calculate_record_field_sizes_b(data_schema)
self.storage_impl = storage_impl
self.next_record_id = 0
def get(self, record_id: int) -> Dict[str, torch.Tensor]:
"""
Load tensors from the storage by record ID
Args:
record_id (int): Record ID, for which to load the data
Return:
dict: str -> tensor: tensor name mapped to tensor data, recorded under the provided ID
"""
self.storage_impl.seek(record_id * self.record_size_b, os.SEEK_SET)
data_bytes = self.storage_impl.read(self.record_size_b)
assert len(data_bytes) == self.record_size_b, (
f"Expected data size {self.record_size_b} B could not be read: "
f"got {len(data_bytes)} B"
)
record = {}
cur_idx = 0
# it's important to read and write in the same order
for field_name in sorted(self.data_schema):
schema = self.data_schema[field_name]
field_size_b = self.record_field_sizes_b[field_name]
chunk = data_bytes[cur_idx : cur_idx + field_size_b]
data_np = np.frombuffer(
chunk, dtype=schema.dtype, count=reduce(mul, schema.shape)
).reshape(schema.shape)
record[field_name] = torch.from_numpy(data_np)
cur_idx += field_size_b
return record
def put(self, data: Dict[str, torch.Tensor]) -> int:
"""
Store tensors in the storage
Args:
data (dict: str -> tensor): data to store, a dictionary which maps
tensor names into tensors; tensor shapes must match those specified
in data schema.
Return:
int: record ID, under which the data is stored
"""
# it's important to read and write in the same order
for field_name in sorted(self.data_schema):
assert (
field_name in data
), f"Field '{field_name}' not present in data: data keys are {data.keys()}"
value = data[field_name]
assert value.shape == self.data_schema[field_name].shape, (
f"Mismatched tensor shapes for field '{field_name}': "
f"expected {self.data_schema[field_name].shape}, got {value.shape}"
)
data_bytes = value.cpu().numpy().tobytes()
assert len(data_bytes) == self.record_field_sizes_b[field_name], (
f"Expected field {field_name} to be of size "
f"{self.record_field_sizes_b[field_name]} B, got {len(data_bytes)} B"
)
self.storage_impl.write(data_bytes)
record_id = self.next_record_id
self.next_record_id += 1
return record_id
class SingleProcessFileTensorStorage(SingleProcessTensorStorage):
"""
Implementation of a single process tensor storage which stores data in a file
"""
def __init__(self, data_schema: Dict[str, SizeData], fpath: str, mode: str):
self.fpath = fpath
assert "b" in mode, f"Tensor storage should be opened in binary mode, got '{mode}'"
if "w" in mode:
# pyre-fixme[6]: For 2nd argument expected `Union[typing_extensions.Liter...
file_h = PathManager.open(fpath, mode)
elif "r" in mode:
local_fpath = PathManager.get_local_path(fpath)
file_h = open(local_fpath, mode)
else:
raise ValueError(f"Unsupported file mode {mode}, supported modes: rb, wb")
super().__init__(data_schema, file_h) # pyre-ignore[6]
class SingleProcessRamTensorStorage(SingleProcessTensorStorage):
"""
Implementation of a single process tensor storage which stores data in RAM
"""
def __init__(self, data_schema: Dict[str, SizeData], buf: io.BytesIO):
super().__init__(data_schema, buf)
class MultiProcessTensorStorage:
"""
Representation of a set of tensor storages created by individual processes,
allows to access those storages from a single owner process. The storages
should either be shared or broadcasted to the owner process.
The processes are identified by their rank, data is uniquely defined by
the rank of the process and the record ID.
"""
def __init__(self, rank_to_storage: Dict[int, SingleProcessTensorStorage]):
self.rank_to_storage = rank_to_storage
def get(self, rank: int, record_id: int) -> Dict[str, torch.Tensor]:
storage = self.rank_to_storage[rank]
return storage.get(record_id)
def put(self, rank: int, data: Dict[str, torch.Tensor]) -> int:
storage = self.rank_to_storage[rank]
return storage.put(data)
class MultiProcessFileTensorStorage(MultiProcessTensorStorage):
def __init__(self, data_schema: Dict[str, SizeData], rank_to_fpath: Dict[int, str], mode: str):
rank_to_storage = {
rank: SingleProcessFileTensorStorage(data_schema, fpath, mode)
for rank, fpath in rank_to_fpath.items()
}
super().__init__(rank_to_storage) # pyre-ignore[6]
class MultiProcessRamTensorStorage(MultiProcessTensorStorage):
def __init__(self, data_schema: Dict[str, SizeData], rank_to_buffer: Dict[int, io.BytesIO]):
rank_to_storage = {
rank: SingleProcessRamTensorStorage(data_schema, buf)
for rank, buf in rank_to_buffer.items()
}
super().__init__(rank_to_storage) # pyre-ignore[6]
def _ram_storage_gather(
storage: SingleProcessRamTensorStorage, dst_rank: int = 0
) -> Optional[MultiProcessRamTensorStorage]:
storage.storage_impl.seek(0, os.SEEK_SET)
# TODO: overhead, pickling a bytes object, can just pass bytes in a tensor directly
# see detectron2/utils.comm.py
data_list = gather(storage.storage_impl.read(), dst=dst_rank)
if get_rank() != dst_rank:
return None
rank_to_buffer = {i: io.BytesIO(data_list[i]) for i in range(len(data_list))}
multiprocess_storage = MultiProcessRamTensorStorage(storage.data_schema, rank_to_buffer)
return multiprocess_storage
def _file_storage_gather(
storage: SingleProcessFileTensorStorage,
dst_rank: int = 0,
mode: str = "rb",
) -> Optional[MultiProcessFileTensorStorage]:
storage.storage_impl.close()
fpath_list = gather(storage.fpath, dst=dst_rank)
if get_rank() != dst_rank:
return None
rank_to_fpath = {i: fpath_list[i] for i in range(len(fpath_list))}
return MultiProcessFileTensorStorage(storage.data_schema, rank_to_fpath, mode)
def storage_gather(
storage: SingleProcessTensorStorage, dst_rank: int = 0
) -> Optional[MultiProcessTensorStorage]:
if isinstance(storage, SingleProcessRamTensorStorage):
return _ram_storage_gather(storage, dst_rank)
elif isinstance(storage, SingleProcessFileTensorStorage):
return _file_storage_gather(storage, dst_rank)
raise Exception(f"Unsupported storage for gather operation: {storage}")
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from .confidence import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
from .filter import DensePoseDataFilter
from .inference import densepose_inference
from .utils import initialize_module_params
from .build import (
build_densepose_data_filter,
build_densepose_embedder,
build_densepose_head,
build_densepose_losses,
build_densepose_predictor,
)
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from typing import Optional
from torch import nn
from detectron2.config import CfgNode
from .cse.embedder import Embedder
from .filter import DensePoseDataFilter
def build_densepose_predictor(cfg: CfgNode, input_channels: int):
"""
Create an instance of DensePose predictor based on configuration options.
Args:
cfg (CfgNode): configuration options
input_channels (int): input tensor size along the channel dimension
Return:
An instance of DensePose predictor
"""
from .predictors import DENSEPOSE_PREDICTOR_REGISTRY
predictor_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.PREDICTOR_NAME
return DENSEPOSE_PREDICTOR_REGISTRY.get(predictor_name)(cfg, input_channels)
def build_densepose_data_filter(cfg: CfgNode):
"""
Build DensePose data filter which selects data for training
Args:
cfg (CfgNode): configuration options
Return:
Callable: list(Tensor), list(Instances) -> list(Tensor), list(Instances)
An instance of DensePose filter, which takes feature tensors and proposals
as an input and returns filtered features and proposals
"""
dp_filter = DensePoseDataFilter(cfg)
return dp_filter
def build_densepose_head(cfg: CfgNode, input_channels: int):
"""
Build DensePose head based on configurations options
Args:
cfg (CfgNode): configuration options
input_channels (int): input tensor size along the channel dimension
Return:
An instance of DensePose head
"""
from .roi_heads.registry import ROI_DENSEPOSE_HEAD_REGISTRY
head_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.NAME
return ROI_DENSEPOSE_HEAD_REGISTRY.get(head_name)(cfg, input_channels)
def build_densepose_losses(cfg: CfgNode):
"""
Build DensePose loss based on configurations options
Args:
cfg (CfgNode): configuration options
Return:
An instance of DensePose loss
"""
from .losses import DENSEPOSE_LOSS_REGISTRY
loss_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.LOSS_NAME
return DENSEPOSE_LOSS_REGISTRY.get(loss_name)(cfg)
def build_densepose_embedder(cfg: CfgNode) -> Optional[nn.Module]:
"""
Build embedder used to embed mesh vertices into an embedding space.
Embedder contains sub-embedders, one for each mesh ID.
Args:
cfg (cfgNode): configuration options
Return:
Embedding module
"""
if cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS:
return Embedder(cfg)
return None
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from dataclasses import dataclass
from enum import Enum
from detectron2.config import CfgNode
class DensePoseUVConfidenceType(Enum):
"""
Statistical model type for confidence learning, possible values:
- "iid_iso": statistically independent identically distributed residuals
with anisotropic covariance
- "indep_aniso": statistically independent residuals with anisotropic
covariances
For details, see:
N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
"""
# fmt: off
IID_ISO = "iid_iso"
INDEP_ANISO = "indep_aniso"
# fmt: on
@dataclass
class DensePoseUVConfidenceConfig:
"""
Configuration options for confidence on UV data
"""
enabled: bool = False
# lower bound on UV confidences
epsilon: float = 0.01
type: DensePoseUVConfidenceType = DensePoseUVConfidenceType.IID_ISO
@dataclass
class DensePoseSegmConfidenceConfig:
"""
Configuration options for confidence on segmentation
"""
enabled: bool = False
# lower bound on confidence values
epsilon: float = 0.01
@dataclass
class DensePoseConfidenceModelConfig:
"""
Configuration options for confidence models
"""
# confidence for U and V values
uv_confidence: DensePoseUVConfidenceConfig
# segmentation confidence
segm_confidence: DensePoseSegmConfidenceConfig
@staticmethod
def from_cfg(cfg: CfgNode) -> "DensePoseConfidenceModelConfig":
return DensePoseConfidenceModelConfig(
uv_confidence=DensePoseUVConfidenceConfig(
enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.ENABLED,
epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON,
type=DensePoseUVConfidenceType(cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE),
),
segm_confidence=DensePoseSegmConfidenceConfig(
enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.ENABLED,
epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON,
),
)
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# pyre-unsafe
from .vertex_direct_embedder import VertexDirectEmbedder
from .vertex_feature_embedder import VertexFeatureEmbedder
from .embedder import Embedder
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# pyre-unsafe
import logging
import numpy as np
import pickle
from enum import Enum
from typing import Optional
import torch
from torch import nn
from detectron2.config import CfgNode
from detectron2.utils.file_io import PathManager
from .vertex_direct_embedder import VertexDirectEmbedder
from .vertex_feature_embedder import VertexFeatureEmbedder
class EmbedderType(Enum):
"""
Embedder type which defines how vertices are mapped into the embedding space:
- "vertex_direct": direct vertex embedding
- "vertex_feature": embedding vertex features
"""
VERTEX_DIRECT = "vertex_direct"
VERTEX_FEATURE = "vertex_feature"
def create_embedder(embedder_spec: CfgNode, embedder_dim: int) -> nn.Module:
"""
Create an embedder based on the provided configuration
Args:
embedder_spec (CfgNode): embedder configuration
embedder_dim (int): embedding space dimensionality
Return:
An embedder instance for the specified configuration
Raises ValueError, in case of unexpected embedder type
"""
embedder_type = EmbedderType(embedder_spec.TYPE)
if embedder_type == EmbedderType.VERTEX_DIRECT:
embedder = VertexDirectEmbedder(
num_vertices=embedder_spec.NUM_VERTICES,
embed_dim=embedder_dim,
)
if embedder_spec.INIT_FILE != "":
embedder.load(embedder_spec.INIT_FILE)
elif embedder_type == EmbedderType.VERTEX_FEATURE:
embedder = VertexFeatureEmbedder(
num_vertices=embedder_spec.NUM_VERTICES,
feature_dim=embedder_spec.FEATURE_DIM,
embed_dim=embedder_dim,
train_features=embedder_spec.FEATURES_TRAINABLE,
)
if embedder_spec.INIT_FILE != "":
embedder.load(embedder_spec.INIT_FILE)
else:
raise ValueError(f"Unexpected embedder type {embedder_type}")
if not embedder_spec.IS_TRAINABLE:
embedder.requires_grad_(False)
return embedder
class Embedder(nn.Module):
"""
Embedder module that serves as a container for embedders to use with different
meshes. Extends Module to automatically save / load state dict.
"""
DEFAULT_MODEL_CHECKPOINT_PREFIX = "roi_heads.embedder."
def __init__(self, cfg: CfgNode):
"""
Initialize mesh embedders. An embedder for mesh `i` is stored in a submodule
"embedder_{i}".
Args:
cfg (CfgNode): configuration options
"""
super(Embedder, self).__init__()
self.mesh_names = set()
embedder_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE
logger = logging.getLogger(__name__)
for mesh_name, embedder_spec in cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS.items():
logger.info(f"Adding embedder embedder_{mesh_name} with spec {embedder_spec}")
self.add_module(f"embedder_{mesh_name}", create_embedder(embedder_spec, embedder_dim))
self.mesh_names.add(mesh_name)
if cfg.MODEL.WEIGHTS != "":
self.load_from_model_checkpoint(cfg.MODEL.WEIGHTS)
def load_from_model_checkpoint(self, fpath: str, prefix: Optional[str] = None):
if prefix is None:
prefix = Embedder.DEFAULT_MODEL_CHECKPOINT_PREFIX
state_dict = None
if fpath.endswith(".pkl"):
with PathManager.open(fpath, "rb") as hFile:
state_dict = pickle.load(hFile, encoding="latin1")
else:
with PathManager.open(fpath, "rb") as hFile:
state_dict = torch.load(hFile, map_location=torch.device("cpu"))
if state_dict is not None and "model" in state_dict:
state_dict_local = {}
for key in state_dict["model"]:
if key.startswith(prefix):
v_key = state_dict["model"][key]
if isinstance(v_key, np.ndarray):
v_key = torch.from_numpy(v_key)
state_dict_local[key[len(prefix) :]] = v_key
# non-strict loading to finetune on different meshes
self.load_state_dict(state_dict_local, strict=False)
def forward(self, mesh_name: str) -> torch.Tensor:
"""
Produce vertex embeddings for the specific mesh; vertex embeddings are
a tensor of shape [N, D] where:
N = number of vertices
D = number of dimensions in the embedding space
Args:
mesh_name (str): name of a mesh for which to obtain vertex embeddings
Return:
Vertex embeddings, a tensor of shape [N, D]
"""
return getattr(self, f"embedder_{mesh_name}")()
def has_embeddings(self, mesh_name: str) -> bool:
return hasattr(self, f"embedder_{mesh_name}")
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# pyre-unsafe
import torch
from torch.nn import functional as F
def squared_euclidean_distance_matrix(pts1: torch.Tensor, pts2: torch.Tensor) -> torch.Tensor:
"""
Get squared Euclidean Distance Matrix
Computes pairwise squared Euclidean distances between points
Args:
pts1: Tensor [M x D], M is the number of points, D is feature dimensionality
pts2: Tensor [N x D], N is the number of points, D is feature dimensionality
Return:
Tensor [M, N]: matrix of squared Euclidean distances; at index (m, n)
it contains || pts1[m] - pts2[n] ||^2
"""
edm = torch.mm(-2 * pts1, pts2.t())
edm += (pts1 * pts1).sum(1, keepdim=True) + (pts2 * pts2).sum(1, keepdim=True).t()
return edm.contiguous()
def normalize_embeddings(embeddings: torch.Tensor, epsilon: float = 1e-6) -> torch.Tensor:
"""
Normalize N D-dimensional embedding vectors arranged in a tensor [N, D]
Args:
embeddings (tensor [N, D]): N D-dimensional embedding vectors
epsilon (float): minimum value for a vector norm
Return:
Normalized embeddings (tensor [N, D]), such that L2 vector norms are all equal to 1.
"""
return embeddings / torch.clamp(embeddings.norm(p=None, dim=1, keepdim=True), min=epsilon)
def get_closest_vertices_mask_from_ES(
E: torch.Tensor,
S: torch.Tensor,
h: int,
w: int,
mesh_vertex_embeddings: torch.Tensor,
device: torch.device,
):
"""
Interpolate Embeddings and Segmentations to the size of a given bounding box,
and compute closest vertices and the segmentation mask
Args:
E (tensor [1, D, H, W]): D-dimensional embedding vectors for every point of the
default-sized box
S (tensor [1, 2, H, W]): 2-dimensional segmentation mask for every point of the
default-sized box
h (int): height of the target bounding box
w (int): width of the target bounding box
mesh_vertex_embeddings (tensor [N, D]): vertex embeddings for a chosen mesh
N is the number of vertices in the mesh, D is feature dimensionality
device (torch.device): device to move the tensors to
Return:
Closest Vertices (tensor [h, w]), int, for every point of the resulting box
Segmentation mask (tensor [h, w]), boolean, for every point of the resulting box
"""
embedding_resized = F.interpolate(E, size=(h, w), mode="bilinear")[0].to(device)
coarse_segm_resized = F.interpolate(S, size=(h, w), mode="bilinear")[0].to(device)
mask = coarse_segm_resized.argmax(0) > 0
closest_vertices = torch.zeros(mask.shape, dtype=torch.long, device=device)
all_embeddings = embedding_resized[:, mask].t()
size_chunk = 10_000 # Chunking to avoid possible OOM
edm = []
if len(all_embeddings) == 0:
return closest_vertices, mask
for chunk in range((len(all_embeddings) - 1) // size_chunk + 1):
chunk_embeddings = all_embeddings[size_chunk * chunk : size_chunk * (chunk + 1)]
edm.append(
torch.argmin(
squared_euclidean_distance_matrix(chunk_embeddings, mesh_vertex_embeddings), dim=1
)
)
closest_vertices[mask] = torch.cat(edm)
return closest_vertices, mask
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# pyre-unsafe
import pickle
import torch
from torch import nn
from detectron2.utils.file_io import PathManager
from .utils import normalize_embeddings
class VertexDirectEmbedder(nn.Module):
"""
Class responsible for embedding vertices. Vertex embeddings take
the form of a tensor of size [N, D], where
N = number of vertices
D = number of dimensions in the embedding space
"""
def __init__(self, num_vertices: int, embed_dim: int):
"""
Initialize embedder, set random embeddings
Args:
num_vertices (int): number of vertices to embed
embed_dim (int): number of dimensions in the embedding space
"""
super(VertexDirectEmbedder, self).__init__()
self.embeddings = nn.Parameter(torch.Tensor(num_vertices, embed_dim))
self.reset_parameters()
@torch.no_grad()
def reset_parameters(self):
"""
Reset embeddings to random values
"""
self.embeddings.zero_()
def forward(self) -> torch.Tensor:
"""
Produce vertex embeddings, a tensor of shape [N, D] where:
N = number of vertices
D = number of dimensions in the embedding space
Return:
Full vertex embeddings, a tensor of shape [N, D]
"""
return normalize_embeddings(self.embeddings)
@torch.no_grad()
def load(self, fpath: str):
"""
Load data from a file
Args:
fpath (str): file path to load data from
"""
with PathManager.open(fpath, "rb") as hFile:
data = pickle.load(hFile)
for name in ["embeddings"]:
if name in data:
getattr(self, name).copy_(
torch.tensor(data[name]).float().to(device=getattr(self, name).device)
)
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# pyre-unsafe
import pickle
import torch
from torch import nn
from detectron2.utils.file_io import PathManager
from .utils import normalize_embeddings
class VertexFeatureEmbedder(nn.Module):
"""
Class responsible for embedding vertex features. Mapping from
feature space to the embedding space is a tensor of size [K, D], where
K = number of dimensions in the feature space
D = number of dimensions in the embedding space
Vertex features is a tensor of size [N, K], where
N = number of vertices
K = number of dimensions in the feature space
Vertex embeddings are computed as F * E = tensor of size [N, D]
"""
def __init__(
self, num_vertices: int, feature_dim: int, embed_dim: int, train_features: bool = False
):
"""
Initialize embedder, set random embeddings
Args:
num_vertices (int): number of vertices to embed
feature_dim (int): number of dimensions in the feature space
embed_dim (int): number of dimensions in the embedding space
train_features (bool): determines whether vertex features should
be trained (default: False)
"""
super(VertexFeatureEmbedder, self).__init__()
if train_features:
self.features = nn.Parameter(torch.Tensor(num_vertices, feature_dim))
else:
self.register_buffer("features", torch.Tensor(num_vertices, feature_dim))
self.embeddings = nn.Parameter(torch.Tensor(feature_dim, embed_dim))
self.reset_parameters()
@torch.no_grad()
def reset_parameters(self):
self.features.zero_()
self.embeddings.zero_()
def forward(self) -> torch.Tensor:
"""
Produce vertex embeddings, a tensor of shape [N, D] where:
N = number of vertices
D = number of dimensions in the embedding space
Return:
Full vertex embeddings, a tensor of shape [N, D]
"""
return normalize_embeddings(torch.mm(self.features, self.embeddings))
@torch.no_grad()
def load(self, fpath: str):
"""
Load data from a file
Args:
fpath (str): file path to load data from
"""
with PathManager.open(fpath, "rb") as hFile:
data = pickle.load(hFile)
for name in ["features", "embeddings"]:
if name in data:
getattr(self, name).copy_(
torch.tensor(data[name]).float().to(device=getattr(self, name).device)
)
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from collections import OrderedDict
from detectron2.checkpoint import DetectionCheckpointer
def _rename_HRNet_weights(weights):
# We detect and rename HRNet weights for DensePose. 1956 and 1716 are values that are
# common to all HRNet pretrained weights, and should be enough to accurately identify them
if (
len(weights["model"].keys()) == 1956
and len([k for k in weights["model"].keys() if k.startswith("stage")]) == 1716
):
hrnet_weights = OrderedDict()
for k in weights["model"].keys():
hrnet_weights["backbone.bottom_up." + str(k)] = weights["model"][k]
return {"model": hrnet_weights}
else:
return weights
class DensePoseCheckpointer(DetectionCheckpointer):
"""
Same as :class:`DetectionCheckpointer`, but is able to handle HRNet weights
"""
def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
super().__init__(model, save_dir, save_to_disk=save_to_disk, **checkpointables)
def _load_file(self, filename: str) -> object:
"""
Adding hrnet support
"""
weights = super()._load_file(filename)
return _rename_HRNet_weights(weights)
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from typing import List
import torch
from detectron2.config import CfgNode
from detectron2.structures import Instances
from detectron2.structures.boxes import matched_pairwise_iou
class DensePoseDataFilter:
def __init__(self, cfg: CfgNode):
self.iou_threshold = cfg.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD
self.keep_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
@torch.no_grad()
def __call__(self, features: List[torch.Tensor], proposals_with_targets: List[Instances]):
"""
Filters proposals with targets to keep only the ones relevant for
DensePose training
Args:
features (list[Tensor]): input data as a list of features,
each feature is a tensor. Axis 0 represents the number of
images `N` in the input data; axes 1-3 are channels,
height, and width, which may vary between features
(e.g., if a feature pyramid is used).
proposals_with_targets (list[Instances]): length `N` list of
`Instances`. The i-th `Instances` contains instances
(proposals, GT) for the i-th input image,
Returns:
list[Tensor]: filtered features
list[Instances]: filtered proposals
"""
proposals_filtered = []
# TODO: the commented out code was supposed to correctly deal with situations
# where no valid DensePose GT is available for certain images. The corresponding
# image features were sliced and proposals were filtered. This led to performance
# deterioration, both in terms of runtime and in terms of evaluation results.
#
# feature_mask = torch.ones(
# len(proposals_with_targets),
# dtype=torch.bool,
# device=features[0].device if len(features) > 0 else torch.device("cpu"),
# )
for i, proposals_per_image in enumerate(proposals_with_targets):
if not proposals_per_image.has("gt_densepose") and (
not proposals_per_image.has("gt_masks") or not self.keep_masks
):
# feature_mask[i] = 0
continue
gt_boxes = proposals_per_image.gt_boxes
est_boxes = proposals_per_image.proposal_boxes
# apply match threshold for densepose head
iou = matched_pairwise_iou(gt_boxes, est_boxes)
iou_select = iou > self.iou_threshold
proposals_per_image = proposals_per_image[iou_select] # pyre-ignore[6]
N_gt_boxes = len(proposals_per_image.gt_boxes)
assert N_gt_boxes == len(proposals_per_image.proposal_boxes), (
f"The number of GT boxes {N_gt_boxes} is different from the "
f"number of proposal boxes {len(proposals_per_image.proposal_boxes)}"
)
# filter out any target without suitable annotation
if self.keep_masks:
gt_masks = (
proposals_per_image.gt_masks
if hasattr(proposals_per_image, "gt_masks")
else [None] * N_gt_boxes
)
else:
gt_masks = [None] * N_gt_boxes
gt_densepose = (
proposals_per_image.gt_densepose
if hasattr(proposals_per_image, "gt_densepose")
else [None] * N_gt_boxes
)
assert len(gt_masks) == N_gt_boxes
assert len(gt_densepose) == N_gt_boxes
selected_indices = [
i
for i, (dp_target, mask_target) in enumerate(zip(gt_densepose, gt_masks))
if (dp_target is not None) or (mask_target is not None)
]
# if not len(selected_indices):
# feature_mask[i] = 0
# continue
if len(selected_indices) != N_gt_boxes:
proposals_per_image = proposals_per_image[selected_indices] # pyre-ignore[6]
assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes)
proposals_filtered.append(proposals_per_image)
# features_filtered = [feature[feature_mask] for feature in features]
# return features_filtered, proposals_filtered
return features, proposals_filtered
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
"""
MIT License
Copyright (c) 2019 Microsoft
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
from detectron2.layers import ShapeSpec
from detectron2.modeling.backbone import BACKBONE_REGISTRY
from detectron2.modeling.backbone.backbone import Backbone
from .hrnet import build_pose_hrnet_backbone
class HRFPN(Backbone):
"""HRFPN (High Resolution Feature Pyramids)
Transforms outputs of HRNet backbone so they are suitable for the ROI_heads
arXiv: https://arxiv.org/abs/1904.04514
Adapted from https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/necks/hrfpn.py
Args:
bottom_up: (list) output of HRNet
in_features (list): names of the input features (output of HRNet)
in_channels (list): number of channels for each branch
out_channels (int): output channels of feature pyramids
n_out_features (int): number of output stages
pooling (str): pooling for generating feature pyramids (from {MAX, AVG})
share_conv (bool): Have one conv per output, or share one with all the outputs
"""
def __init__(
self,
bottom_up,
in_features,
n_out_features,
in_channels,
out_channels,
pooling="AVG",
share_conv=False,
):
super(HRFPN, self).__init__()
assert isinstance(in_channels, list)
self.bottom_up = bottom_up
self.in_features = in_features
self.n_out_features = n_out_features
self.in_channels = in_channels
self.out_channels = out_channels
self.num_ins = len(in_channels)
self.share_conv = share_conv
if self.share_conv:
self.fpn_conv = nn.Conv2d(
in_channels=out_channels, out_channels=out_channels, kernel_size=3, padding=1
)
else:
self.fpn_conv = nn.ModuleList()
for _ in range(self.n_out_features):
self.fpn_conv.append(
nn.Conv2d(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
padding=1,
)
)
# Custom change: Replaces a simple bilinear interpolation
self.interp_conv = nn.ModuleList()
for i in range(len(self.in_features)):
self.interp_conv.append(
nn.Sequential(
nn.ConvTranspose2d(
in_channels=in_channels[i],
out_channels=in_channels[i],
kernel_size=4,
stride=2**i,
padding=0,
output_padding=0,
bias=False,
),
nn.BatchNorm2d(in_channels[i], momentum=0.1),
nn.ReLU(inplace=True),
)
)
# Custom change: Replaces a couple (reduction conv + pooling) by one conv
self.reduction_pooling_conv = nn.ModuleList()
for i in range(self.n_out_features):
self.reduction_pooling_conv.append(
nn.Sequential(
nn.Conv2d(sum(in_channels), out_channels, kernel_size=2**i, stride=2**i),
nn.BatchNorm2d(out_channels, momentum=0.1),
nn.ReLU(inplace=True),
)
)
if pooling == "MAX":
self.pooling = F.max_pool2d
else:
self.pooling = F.avg_pool2d
self._out_features = []
self._out_feature_channels = {}
self._out_feature_strides = {}
for i in range(self.n_out_features):
self._out_features.append("p%d" % (i + 1))
self._out_feature_channels.update({self._out_features[-1]: self.out_channels})
self._out_feature_strides.update({self._out_features[-1]: 2 ** (i + 2)})
# default init_weights for conv(msra) and norm in ConvModule
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, a=1)
nn.init.constant_(m.bias, 0)
def forward(self, inputs):
bottom_up_features = self.bottom_up(inputs)
assert len(bottom_up_features) == len(self.in_features)
inputs = [bottom_up_features[f] for f in self.in_features]
outs = []
for i in range(len(inputs)):
outs.append(self.interp_conv[i](inputs[i]))
shape_2 = min(o.shape[2] for o in outs)
shape_3 = min(o.shape[3] for o in outs)
out = torch.cat([o[:, :, :shape_2, :shape_3] for o in outs], dim=1)
outs = []
for i in range(self.n_out_features):
outs.append(self.reduction_pooling_conv[i](out))
for i in range(len(outs)): # Make shapes consistent
outs[-1 - i] = outs[-1 - i][
:, :, : outs[-1].shape[2] * 2**i, : outs[-1].shape[3] * 2**i
]
outputs = []
for i in range(len(outs)):
if self.share_conv:
outputs.append(self.fpn_conv(outs[i]))
else:
outputs.append(self.fpn_conv[i](outs[i]))
assert len(self._out_features) == len(outputs)
return dict(zip(self._out_features, outputs))
@BACKBONE_REGISTRY.register()
def build_hrfpn_backbone(cfg, input_shape: ShapeSpec) -> HRFPN:
in_channels = cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS
in_features = ["p%d" % (i + 1) for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES)]
n_out_features = len(cfg.MODEL.ROI_HEADS.IN_FEATURES)
out_channels = cfg.MODEL.HRNET.HRFPN.OUT_CHANNELS
hrnet = build_pose_hrnet_backbone(cfg, input_shape)
hrfpn = HRFPN(
hrnet,
in_features,
n_out_features,
in_channels,
out_channels,
pooling="AVG",
share_conv=False,
)
return hrfpn
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment