Commit 3144257c authored by mashun1's avatar mashun1
Browse files

catvton

parents
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from typing import BinaryIO, Dict, Union
import torch
def normalized_coords_transform(x0, y0, w, h):
"""
Coordinates transform that maps top left corner to (-1, -1) and bottom
right corner to (1, 1). Used for torch.grid_sample to initialize the
grid
"""
def f(p):
return (2 * (p[0] - x0) / w - 1, 2 * (p[1] - y0) / h - 1)
return f
class DensePoseTransformData:
# Horizontal symmetry label transforms used for horizontal flip
MASK_LABEL_SYMMETRIES = [0, 1, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14]
# fmt: off
POINT_LABEL_SYMMETRIES = [ 0, 1, 2, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, 21, 24, 23] # noqa
# fmt: on
def __init__(self, uv_symmetries: Dict[str, torch.Tensor], device: torch.device):
self.mask_label_symmetries = DensePoseTransformData.MASK_LABEL_SYMMETRIES
self.point_label_symmetries = DensePoseTransformData.POINT_LABEL_SYMMETRIES
self.uv_symmetries = uv_symmetries
self.device = torch.device("cpu")
def to(self, device: torch.device, copy: bool = False) -> "DensePoseTransformData":
"""
Convert transform data to the specified device
Args:
device (torch.device): device to convert the data to
copy (bool): flag that specifies whether to copy or to reference the data
in case the device is the same
Return:
An instance of `DensePoseTransformData` with data stored on the specified device
"""
if self.device == device and not copy:
return self
uv_symmetry_map = {}
for key in self.uv_symmetries:
uv_symmetry_map[key] = self.uv_symmetries[key].to(device=device, copy=copy)
return DensePoseTransformData(uv_symmetry_map, device)
@staticmethod
def load(io: Union[str, BinaryIO]):
"""
Args:
io: (str or binary file-like object): input file to load data from
Returns:
An instance of `DensePoseTransformData` with transforms loaded from the file
"""
import scipy.io
uv_symmetry_map = scipy.io.loadmat(io)
uv_symmetry_map_torch = {}
for key in ["U_transforms", "V_transforms"]:
uv_symmetry_map_torch[key] = []
map_src = uv_symmetry_map[key]
map_dst = uv_symmetry_map_torch[key]
for i in range(map_src.shape[1]):
map_dst.append(torch.from_numpy(map_src[0, i]).to(dtype=torch.float))
uv_symmetry_map_torch[key] = torch.stack(map_dst, dim=0)
transform_data = DensePoseTransformData(uv_symmetry_map_torch, device=torch.device("cpu"))
return transform_data
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from typing import Any, Dict, Optional, Tuple
class EntrySelector:
"""
Base class for entry selectors
"""
@staticmethod
def from_string(spec: str) -> "EntrySelector":
if spec == "*":
return AllEntrySelector()
return FieldEntrySelector(spec)
class AllEntrySelector(EntrySelector):
"""
Selector that accepts all entries
"""
SPECIFIER = "*"
def __call__(self, entry):
return True
class FieldEntrySelector(EntrySelector):
"""
Selector that accepts only entries that match provided field
specifier(s). Only a limited set of specifiers is supported for now:
<specifiers>::=<specifier>[<comma><specifiers>]
<specifier>::=<field_name>[<type_delim><type>]<equal><value_or_range>
<field_name> is a valid identifier
<type> ::= "int" | "str"
<equal> ::= "="
<comma> ::= ","
<type_delim> ::= ":"
<value_or_range> ::= <value> | <range>
<range> ::= <value><range_delim><value>
<range_delim> ::= "-"
<value> is a string without spaces and special symbols
(e.g. <comma>, <equal>, <type_delim>, <range_delim>)
"""
_SPEC_DELIM = ","
_TYPE_DELIM = ":"
_RANGE_DELIM = "-"
_EQUAL = "="
_ERROR_PREFIX = "Invalid field selector specifier"
class _FieldEntryValuePredicate:
"""
Predicate that checks strict equality for the specified entry field
"""
def __init__(self, name: str, typespec: Optional[str], value: str):
import builtins
self.name = name
self.type = getattr(builtins, typespec) if typespec is not None else str
self.value = value
def __call__(self, entry):
return entry[self.name] == self.type(self.value)
class _FieldEntryRangePredicate:
"""
Predicate that checks whether an entry field falls into the specified range
"""
def __init__(self, name: str, typespec: Optional[str], vmin: str, vmax: str):
import builtins
self.name = name
self.type = getattr(builtins, typespec) if typespec is not None else str
self.vmin = vmin
self.vmax = vmax
def __call__(self, entry):
return (entry[self.name] >= self.type(self.vmin)) and (
entry[self.name] <= self.type(self.vmax)
)
def __init__(self, spec: str):
self._predicates = self._parse_specifier_into_predicates(spec)
def __call__(self, entry: Dict[str, Any]):
for predicate in self._predicates:
if not predicate(entry):
return False
return True
def _parse_specifier_into_predicates(self, spec: str):
predicates = []
specs = spec.split(self._SPEC_DELIM)
for subspec in specs:
eq_idx = subspec.find(self._EQUAL)
if eq_idx > 0:
field_name_with_type = subspec[:eq_idx]
field_name, field_type = self._parse_field_name_type(field_name_with_type)
field_value_or_range = subspec[eq_idx + 1 :]
if self._is_range_spec(field_value_or_range):
vmin, vmax = self._get_range_spec(field_value_or_range)
predicate = FieldEntrySelector._FieldEntryRangePredicate(
field_name, field_type, vmin, vmax
)
else:
predicate = FieldEntrySelector._FieldEntryValuePredicate(
field_name, field_type, field_value_or_range
)
predicates.append(predicate)
elif eq_idx == 0:
self._parse_error(f'"{subspec}", field name is empty!')
else:
self._parse_error(f'"{subspec}", should have format ' "<field>=<value_or_range>!")
return predicates
def _parse_field_name_type(self, field_name_with_type: str) -> Tuple[str, Optional[str]]:
type_delim_idx = field_name_with_type.find(self._TYPE_DELIM)
if type_delim_idx > 0:
field_name = field_name_with_type[:type_delim_idx]
field_type = field_name_with_type[type_delim_idx + 1 :]
elif type_delim_idx == 0:
self._parse_error(f'"{field_name_with_type}", field name is empty!')
else:
field_name = field_name_with_type
field_type = None
# pyre-fixme[61]: `field_name` may not be initialized here.
# pyre-fixme[61]: `field_type` may not be initialized here.
return field_name, field_type
def _is_range_spec(self, field_value_or_range):
delim_idx = field_value_or_range.find(self._RANGE_DELIM)
return delim_idx > 0
def _get_range_spec(self, field_value_or_range):
if self._is_range_spec(field_value_or_range):
delim_idx = field_value_or_range.find(self._RANGE_DELIM)
vmin = field_value_or_range[:delim_idx]
vmax = field_value_or_range[delim_idx + 1 :]
return vmin, vmax
else:
self._parse_error('"field_value_or_range", range of values expected!')
def _parse_error(self, msg):
raise ValueError(f"{self._ERROR_PREFIX}: {msg}")
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
import logging
def verbosity_to_level(verbosity) -> int:
if verbosity is not None:
if verbosity == 0:
return logging.WARNING
elif verbosity == 1:
return logging.INFO
elif verbosity >= 2:
return logging.DEBUG
return logging.WARNING
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from detectron2.data import MetadataCatalog
from detectron2.utils.file_io import PathManager
from densepose import DensePoseTransformData
def load_for_dataset(dataset_name):
path = MetadataCatalog.get(dataset_name).densepose_transform_src
densepose_transform_data_fpath = PathManager.get_local_path(path)
return DensePoseTransformData.load(densepose_transform_data_fpath)
def load_from_cfg(cfg):
return load_for_dataset(cfg.DATASETS.TEST[0])
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
import logging
import numpy as np
import cv2
import torch
Image = np.ndarray
Boxes = torch.Tensor
class MatrixVisualizer:
"""
Base visualizer for matrix data
"""
def __init__(
self,
inplace=True,
cmap=cv2.COLORMAP_PARULA,
val_scale=1.0,
alpha=0.7,
interp_method_matrix=cv2.INTER_LINEAR,
interp_method_mask=cv2.INTER_NEAREST,
):
self.inplace = inplace
self.cmap = cmap
self.val_scale = val_scale
self.alpha = alpha
self.interp_method_matrix = interp_method_matrix
self.interp_method_mask = interp_method_mask
def visualize(self, image_bgr, mask, matrix, bbox_xywh):
self._check_image(image_bgr)
self._check_mask_matrix(mask, matrix)
if self.inplace:
image_target_bgr = image_bgr
else:
image_target_bgr = image_bgr * 0
x, y, w, h = [int(v) for v in bbox_xywh]
if w <= 0 or h <= 0:
return image_bgr
mask, matrix = self._resize(mask, matrix, w, h)
mask_bg = np.tile((mask == 0)[:, :, np.newaxis], [1, 1, 3])
matrix_scaled = matrix.astype(np.float32) * self.val_scale
_EPSILON = 1e-6
if np.any(matrix_scaled > 255 + _EPSILON):
logger = logging.getLogger(__name__)
logger.warning(
f"Matrix has values > {255 + _EPSILON} after " f"scaling, clipping to [0..255]"
)
matrix_scaled_8u = matrix_scaled.clip(0, 255).astype(np.uint8)
matrix_vis = cv2.applyColorMap(matrix_scaled_8u, self.cmap)
matrix_vis[mask_bg] = image_target_bgr[y : y + h, x : x + w, :][mask_bg]
image_target_bgr[y : y + h, x : x + w, :] = (
image_target_bgr[y : y + h, x : x + w, :] * (1.0 - self.alpha) + matrix_vis * self.alpha
)
return image_target_bgr.astype(np.uint8)
def _resize(self, mask, matrix, w, h):
if (w != mask.shape[1]) or (h != mask.shape[0]):
mask = cv2.resize(mask, (w, h), self.interp_method_mask)
if (w != matrix.shape[1]) or (h != matrix.shape[0]):
matrix = cv2.resize(matrix, (w, h), self.interp_method_matrix)
return mask, matrix
def _check_image(self, image_rgb):
assert len(image_rgb.shape) == 3
assert image_rgb.shape[2] == 3
assert image_rgb.dtype == np.uint8
def _check_mask_matrix(self, mask, matrix):
assert len(matrix.shape) == 2
assert len(mask.shape) == 2
assert mask.dtype == np.uint8
class RectangleVisualizer:
_COLOR_GREEN = (18, 127, 15)
def __init__(self, color=_COLOR_GREEN, thickness=1):
self.color = color
self.thickness = thickness
def visualize(self, image_bgr, bbox_xywh, color=None, thickness=None):
x, y, w, h = bbox_xywh
color = color or self.color
thickness = thickness or self.thickness
cv2.rectangle(image_bgr, (int(x), int(y)), (int(x + w), int(y + h)), color, thickness)
return image_bgr
class PointsVisualizer:
_COLOR_GREEN = (18, 127, 15)
def __init__(self, color_bgr=_COLOR_GREEN, r=5):
self.color_bgr = color_bgr
self.r = r
def visualize(self, image_bgr, pts_xy, colors_bgr=None, rs=None):
for j, pt_xy in enumerate(pts_xy):
x, y = pt_xy
color_bgr = colors_bgr[j] if colors_bgr is not None else self.color_bgr
r = rs[j] if rs is not None else self.r
cv2.circle(image_bgr, (x, y), r, color_bgr, -1)
return image_bgr
class TextVisualizer:
_COLOR_GRAY = (218, 227, 218)
_COLOR_WHITE = (255, 255, 255)
def __init__(
self,
font_face=cv2.FONT_HERSHEY_SIMPLEX,
font_color_bgr=_COLOR_GRAY,
font_scale=0.35,
font_line_type=cv2.LINE_AA,
font_line_thickness=1,
fill_color_bgr=_COLOR_WHITE,
fill_color_transparency=1.0,
frame_color_bgr=_COLOR_WHITE,
frame_color_transparency=1.0,
frame_thickness=1,
):
self.font_face = font_face
self.font_color_bgr = font_color_bgr
self.font_scale = font_scale
self.font_line_type = font_line_type
self.font_line_thickness = font_line_thickness
self.fill_color_bgr = fill_color_bgr
self.fill_color_transparency = fill_color_transparency
self.frame_color_bgr = frame_color_bgr
self.frame_color_transparency = frame_color_transparency
self.frame_thickness = frame_thickness
def visualize(self, image_bgr, txt, topleft_xy):
txt_w, txt_h = self.get_text_size_wh(txt)
topleft_xy = tuple(map(int, topleft_xy))
x, y = topleft_xy
if self.frame_color_transparency < 1.0:
t = self.frame_thickness
image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :] = (
image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :]
* self.frame_color_transparency
+ np.array(self.frame_color_bgr) * (1.0 - self.frame_color_transparency)
).astype(float)
if self.fill_color_transparency < 1.0:
image_bgr[y : y + txt_h, x : x + txt_w, :] = (
image_bgr[y : y + txt_h, x : x + txt_w, :] * self.fill_color_transparency
+ np.array(self.fill_color_bgr) * (1.0 - self.fill_color_transparency)
).astype(float)
cv2.putText(
image_bgr,
txt,
topleft_xy,
self.font_face,
self.font_scale,
self.font_color_bgr,
self.font_line_thickness,
self.font_line_type,
)
return image_bgr
def get_text_size_wh(self, txt):
((txt_w, txt_h), _) = cv2.getTextSize(
txt, self.font_face, self.font_scale, self.font_line_thickness
)
return txt_w, txt_h
class CompoundVisualizer:
def __init__(self, visualizers):
self.visualizers = visualizers
def visualize(self, image_bgr, data):
assert len(data) == len(
self.visualizers
), "The number of datas {} should match the number of visualizers" " {}".format(
len(data), len(self.visualizers)
)
image = image_bgr
for i, visualizer in enumerate(self.visualizers):
image = visualizer.visualize(image, data[i])
return image
def __str__(self):
visualizer_str = ", ".join([str(v) for v in self.visualizers])
return "Compound Visualizer [{}]".format(visualizer_str)
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
from .base import RectangleVisualizer, TextVisualizer
class BoundingBoxVisualizer:
def __init__(self):
self.rectangle_visualizer = RectangleVisualizer()
def visualize(self, image_bgr, boxes_xywh):
for bbox_xywh in boxes_xywh:
image_bgr = self.rectangle_visualizer.visualize(image_bgr, bbox_xywh)
return image_bgr
class ScoredBoundingBoxVisualizer:
def __init__(self, bbox_visualizer_params=None, score_visualizer_params=None, **kwargs):
if bbox_visualizer_params is None:
bbox_visualizer_params = {}
if score_visualizer_params is None:
score_visualizer_params = {}
self.visualizer_bbox = RectangleVisualizer(**bbox_visualizer_params)
self.visualizer_score = TextVisualizer(**score_visualizer_params)
def visualize(self, image_bgr, scored_bboxes):
boxes_xywh, box_scores = scored_bboxes
assert len(boxes_xywh) == len(
box_scores
), "Number of bounding boxes {} should be equal to the number of scores {}".format(
len(boxes_xywh), len(box_scores)
)
for i, box_xywh in enumerate(boxes_xywh):
score_i = box_scores[i]
image_bgr = self.visualizer_bbox.visualize(image_bgr, box_xywh)
score_txt = "{0:6.4f}".format(score_i)
topleft_xy = box_xywh[0], box_xywh[1]
image_bgr = self.visualizer_score.visualize(image_bgr, score_txt, topleft_xy)
return image_bgr
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
import numpy as np
from typing import Iterable, Optional, Tuple
import cv2
from densepose.structures import DensePoseDataRelative
from .base import Boxes, Image, MatrixVisualizer, PointsVisualizer
class DensePoseDataCoarseSegmentationVisualizer:
"""
Visualizer for ground truth segmentation
"""
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs):
self.mask_visualizer = MatrixVisualizer(
inplace=inplace,
cmap=cmap,
val_scale=255.0 / DensePoseDataRelative.N_BODY_PARTS,
alpha=alpha,
)
def visualize(
self,
image_bgr: Image,
bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]],
) -> Image:
if bbox_densepose_datas is None:
return image_bgr
for bbox_xywh, densepose_data in zip(*bbox_densepose_datas):
matrix = densepose_data.segm.numpy()
mask = np.zeros(matrix.shape, dtype=np.uint8)
mask[matrix > 0] = 1
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh.numpy())
return image_bgr
class DensePoseDataPointsVisualizer:
def __init__(self, densepose_data_to_value_fn=None, cmap=cv2.COLORMAP_PARULA, **kwargs):
self.points_visualizer = PointsVisualizer()
self.densepose_data_to_value_fn = densepose_data_to_value_fn
self.cmap = cmap
def visualize(
self,
image_bgr: Image,
bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]],
) -> Image:
if bbox_densepose_datas is None:
return image_bgr
for bbox_xywh, densepose_data in zip(*bbox_densepose_datas):
x0, y0, w, h = bbox_xywh.numpy()
x = densepose_data.x.numpy() * w / 255.0 + x0
y = densepose_data.y.numpy() * h / 255.0 + y0
pts_xy = zip(x, y)
if self.densepose_data_to_value_fn is None:
image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy)
else:
v = self.densepose_data_to_value_fn(densepose_data)
img_colors_bgr = cv2.applyColorMap(v, self.cmap)
colors_bgr = [
[int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
]
image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy, colors_bgr)
return image_bgr
def _densepose_data_u_for_cmap(densepose_data):
u = np.clip(densepose_data.u.numpy(), 0, 1) * 255.0
return u.astype(np.uint8)
def _densepose_data_v_for_cmap(densepose_data):
v = np.clip(densepose_data.v.numpy(), 0, 1) * 255.0
return v.astype(np.uint8)
def _densepose_data_i_for_cmap(densepose_data):
i = (
np.clip(densepose_data.i.numpy(), 0.0, DensePoseDataRelative.N_PART_LABELS)
* 255.0
/ DensePoseDataRelative.N_PART_LABELS
)
return i.astype(np.uint8)
class DensePoseDataPointsUVisualizer(DensePoseDataPointsVisualizer):
def __init__(self, **kwargs):
super(DensePoseDataPointsUVisualizer, self).__init__(
densepose_data_to_value_fn=_densepose_data_u_for_cmap, **kwargs
)
class DensePoseDataPointsVVisualizer(DensePoseDataPointsVisualizer):
def __init__(self, **kwargs):
super(DensePoseDataPointsVVisualizer, self).__init__(
densepose_data_to_value_fn=_densepose_data_v_for_cmap, **kwargs
)
class DensePoseDataPointsIVisualizer(DensePoseDataPointsVisualizer):
def __init__(self, **kwargs):
super(DensePoseDataPointsIVisualizer, self).__init__(
densepose_data_to_value_fn=_densepose_data_i_for_cmap, **kwargs
)
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
import numpy as np
from typing import Optional, Tuple
import cv2
from densepose.structures import DensePoseDataRelative
from ..structures import DensePoseChartPredictorOutput
from .base import Boxes, Image, MatrixVisualizer
class DensePoseOutputsVisualizer:
def __init__(
self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, to_visualize=None, **kwargs
):
assert to_visualize in "IUV", "can only visualize IUV"
self.to_visualize = to_visualize
if self.to_visualize == "I":
val_scale = 255.0 / DensePoseDataRelative.N_PART_LABELS
else:
val_scale = 1.0
self.mask_visualizer = MatrixVisualizer(
inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha
)
def visualize(
self,
image_bgr: Image,
dp_output_with_bboxes: Tuple[Optional[DensePoseChartPredictorOutput], Optional[Boxes]],
) -> Image:
densepose_output, bboxes_xywh = dp_output_with_bboxes
if densepose_output is None or bboxes_xywh is None:
return image_bgr
assert isinstance(
densepose_output, DensePoseChartPredictorOutput
), "DensePoseChartPredictorOutput expected, {} encountered".format(type(densepose_output))
S = densepose_output.coarse_segm
I = densepose_output.fine_segm # noqa
U = densepose_output.u
V = densepose_output.v
N = S.size(0)
assert N == I.size(
0
), "densepose outputs S {} and I {}" " should have equal first dim size".format(
S.size(), I.size()
)
assert N == U.size(
0
), "densepose outputs S {} and U {}" " should have equal first dim size".format(
S.size(), U.size()
)
assert N == V.size(
0
), "densepose outputs S {} and V {}" " should have equal first dim size".format(
S.size(), V.size()
)
assert N == len(
bboxes_xywh
), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
len(bboxes_xywh), N
)
for n in range(N):
Sn = S[n].argmax(dim=0)
In = I[n].argmax(dim=0) * (Sn > 0).long()
segmentation = In.cpu().numpy().astype(np.uint8)
mask = np.zeros(segmentation.shape, dtype=np.uint8)
mask[segmentation > 0] = 1
bbox_xywh = bboxes_xywh[n]
if self.to_visualize == "I":
vis = segmentation
elif self.to_visualize in "UV":
U_or_Vn = {"U": U, "V": V}[self.to_visualize][n].cpu().numpy().astype(np.float32)
vis = np.zeros(segmentation.shape, dtype=np.float32)
for partId in range(U_or_Vn.shape[0]):
vis[segmentation == partId] = (
U_or_Vn[partId][segmentation == partId].clip(0, 1) * 255
)
# pyre-fixme[61]: `vis` may not be initialized here.
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, vis, bbox_xywh)
return image_bgr
class DensePoseOutputsUVisualizer(DensePoseOutputsVisualizer):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs):
super().__init__(inplace=inplace, cmap=cmap, alpha=alpha, to_visualize="U", **kwargs)
class DensePoseOutputsVVisualizer(DensePoseOutputsVisualizer):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs):
super().__init__(inplace=inplace, cmap=cmap, alpha=alpha, to_visualize="V", **kwargs)
class DensePoseOutputsFineSegmentationVisualizer(DensePoseOutputsVisualizer):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs):
super().__init__(inplace=inplace, cmap=cmap, alpha=alpha, to_visualize="I", **kwargs)
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# pyre-unsafe
import json
import numpy as np
from functools import lru_cache
from typing import Dict, List, Optional, Tuple
import cv2
import torch
from detectron2.utils.file_io import PathManager
from densepose.modeling import build_densepose_embedder
from densepose.modeling.cse.utils import get_closest_vertices_mask_from_ES
from ..data.utils import get_class_to_mesh_name_mapping
from ..structures import DensePoseEmbeddingPredictorOutput
from ..structures.mesh import create_mesh
from .base import Boxes, Image, MatrixVisualizer
from .densepose_results_textures import get_texture_atlas
@lru_cache()
def get_xyz_vertex_embedding(mesh_name: str, device: torch.device):
if mesh_name == "smpl_27554":
embed_path = PathManager.get_local_path(
"https://dl.fbaipublicfiles.com/densepose/data/cse/mds_d=256.npy"
)
embed_map, _ = np.load(embed_path, allow_pickle=True)
embed_map = torch.tensor(embed_map).float()[:, 0]
embed_map -= embed_map.min()
embed_map /= embed_map.max()
else:
mesh = create_mesh(mesh_name, device)
embed_map = mesh.vertices.sum(dim=1)
embed_map -= embed_map.min()
embed_map /= embed_map.max()
embed_map = embed_map**2
return embed_map
class DensePoseOutputsVertexVisualizer:
def __init__(
self,
cfg,
inplace=True,
cmap=cv2.COLORMAP_JET,
alpha=0.7,
device="cuda",
default_class=0,
**kwargs,
):
self.mask_visualizer = MatrixVisualizer(
inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha
)
self.class_to_mesh_name = get_class_to_mesh_name_mapping(cfg)
self.embedder = build_densepose_embedder(cfg)
self.device = torch.device(device)
self.default_class = default_class
self.mesh_vertex_embeddings = {
mesh_name: self.embedder(mesh_name).to(self.device)
for mesh_name in self.class_to_mesh_name.values()
if self.embedder.has_embeddings(mesh_name)
}
def visualize(
self,
image_bgr: Image,
outputs_boxes_xywh_classes: Tuple[
Optional[DensePoseEmbeddingPredictorOutput], Optional[Boxes], Optional[List[int]]
],
) -> Image:
if outputs_boxes_xywh_classes[0] is None:
return image_bgr
S, E, N, bboxes_xywh, pred_classes = self.extract_and_check_outputs_and_boxes(
outputs_boxes_xywh_classes
)
for n in range(N):
x, y, w, h = bboxes_xywh[n].int().tolist()
mesh_name = self.class_to_mesh_name[pred_classes[n]]
closest_vertices, mask = get_closest_vertices_mask_from_ES(
E[[n]],
S[[n]],
h,
w,
self.mesh_vertex_embeddings[mesh_name],
self.device,
)
embed_map = get_xyz_vertex_embedding(mesh_name, self.device)
vis = (embed_map[closest_vertices].clip(0, 1) * 255.0).cpu().numpy()
mask_numpy = mask.cpu().numpy().astype(dtype=np.uint8)
image_bgr = self.mask_visualizer.visualize(image_bgr, mask_numpy, vis, [x, y, w, h])
return image_bgr
def extract_and_check_outputs_and_boxes(self, outputs_boxes_xywh_classes):
densepose_output, bboxes_xywh, pred_classes = outputs_boxes_xywh_classes
if pred_classes is None:
pred_classes = [self.default_class] * len(bboxes_xywh)
assert isinstance(
densepose_output, DensePoseEmbeddingPredictorOutput
), "DensePoseEmbeddingPredictorOutput expected, {} encountered".format(
type(densepose_output)
)
S = densepose_output.coarse_segm
E = densepose_output.embedding
N = S.size(0)
assert N == E.size(
0
), "CSE coarse_segm {} and embeddings {}" " should have equal first dim size".format(
S.size(), E.size()
)
assert N == len(
bboxes_xywh
), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
len(bboxes_xywh), N
)
assert N == len(pred_classes), (
"number of predicted classes {}"
" should be equal to first dim size of outputs {}".format(len(bboxes_xywh), N)
)
return S, E, N, bboxes_xywh, pred_classes
def get_texture_atlases(json_str: Optional[str]) -> Optional[Dict[str, Optional[np.ndarray]]]:
"""
json_str is a JSON string representing a mesh_name -> texture_atlas_path dictionary
"""
if json_str is None:
return None
paths = json.loads(json_str)
return {mesh_name: get_texture_atlas(path) for mesh_name, path in paths.items()}
class DensePoseOutputsTextureVisualizer(DensePoseOutputsVertexVisualizer):
def __init__(
self,
cfg,
texture_atlases_dict,
device="cuda",
default_class=0,
**kwargs,
):
self.embedder = build_densepose_embedder(cfg)
self.texture_image_dict = {}
self.alpha_dict = {}
for mesh_name in texture_atlases_dict.keys():
if texture_atlases_dict[mesh_name].shape[-1] == 4: # Image with alpha channel
self.alpha_dict[mesh_name] = texture_atlases_dict[mesh_name][:, :, -1] / 255.0
self.texture_image_dict[mesh_name] = texture_atlases_dict[mesh_name][:, :, :3]
else:
self.alpha_dict[mesh_name] = texture_atlases_dict[mesh_name].sum(axis=-1) > 0
self.texture_image_dict[mesh_name] = texture_atlases_dict[mesh_name]
self.device = torch.device(device)
self.class_to_mesh_name = get_class_to_mesh_name_mapping(cfg)
self.default_class = default_class
self.mesh_vertex_embeddings = {
mesh_name: self.embedder(mesh_name).to(self.device)
for mesh_name in self.class_to_mesh_name.values()
}
def visualize(
self,
image_bgr: Image,
outputs_boxes_xywh_classes: Tuple[
Optional[DensePoseEmbeddingPredictorOutput], Optional[Boxes], Optional[List[int]]
],
) -> Image:
image_target_bgr = image_bgr.copy()
if outputs_boxes_xywh_classes[0] is None:
return image_target_bgr
S, E, N, bboxes_xywh, pred_classes = self.extract_and_check_outputs_and_boxes(
outputs_boxes_xywh_classes
)
meshes = {
p: create_mesh(self.class_to_mesh_name[p], self.device) for p in np.unique(pred_classes)
}
for n in range(N):
x, y, w, h = bboxes_xywh[n].int().cpu().numpy()
mesh_name = self.class_to_mesh_name[pred_classes[n]]
closest_vertices, mask = get_closest_vertices_mask_from_ES(
E[[n]],
S[[n]],
h,
w,
self.mesh_vertex_embeddings[mesh_name],
self.device,
)
uv_array = meshes[pred_classes[n]].texcoords[closest_vertices].permute((2, 0, 1))
uv_array = uv_array.cpu().numpy().clip(0, 1)
textured_image = self.generate_image_with_texture(
image_target_bgr[y : y + h, x : x + w],
uv_array,
mask.cpu().numpy(),
self.class_to_mesh_name[pred_classes[n]],
)
if textured_image is None:
continue
image_target_bgr[y : y + h, x : x + w] = textured_image
return image_target_bgr
def generate_image_with_texture(self, bbox_image_bgr, uv_array, mask, mesh_name):
alpha = self.alpha_dict.get(mesh_name)
texture_image = self.texture_image_dict.get(mesh_name)
if alpha is None or texture_image is None:
return None
U, V = uv_array
x_index = (U * texture_image.shape[1]).astype(int)
y_index = (V * texture_image.shape[0]).astype(int)
local_texture = texture_image[y_index, x_index][mask]
local_alpha = np.expand_dims(alpha[y_index, x_index][mask], -1)
output_image = bbox_image_bgr.copy()
output_image[mask] = output_image[mask] * (1 - local_alpha) + local_texture * local_alpha
return output_image.astype(np.uint8)
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
import logging
import numpy as np
from typing import List, Optional, Tuple
import cv2
import torch
from densepose.structures import DensePoseDataRelative
from ..structures import DensePoseChartResult
from .base import Boxes, Image, MatrixVisualizer
class DensePoseResultsVisualizer:
def visualize(
self,
image_bgr: Image,
results_and_boxes_xywh: Tuple[Optional[List[DensePoseChartResult]], Optional[Boxes]],
) -> Image:
densepose_result, boxes_xywh = results_and_boxes_xywh
if densepose_result is None or boxes_xywh is None:
return image_bgr
boxes_xywh = boxes_xywh.cpu().numpy()
context = self.create_visualization_context(image_bgr)
for i, result in enumerate(densepose_result):
iuv_array = torch.cat(
(result.labels[None].type(torch.float32), result.uv * 255.0)
).type(torch.uint8)
self.visualize_iuv_arr(context, iuv_array.cpu().numpy(), boxes_xywh[i])
image_bgr = self.context_to_image_bgr(context)
return image_bgr
def create_visualization_context(self, image_bgr: Image):
return image_bgr
def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh) -> None:
pass
def context_to_image_bgr(self, context):
return context
def get_image_bgr_from_context(self, context):
return context
class DensePoseMaskedColormapResultsVisualizer(DensePoseResultsVisualizer):
def __init__(
self,
data_extractor,
segm_extractor,
inplace=True,
cmap=cv2.COLORMAP_PARULA,
alpha=0.7,
val_scale=1.0,
**kwargs,
):
self.mask_visualizer = MatrixVisualizer(
inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha
)
self.data_extractor = data_extractor
self.segm_extractor = segm_extractor
def context_to_image_bgr(self, context):
return context
def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh) -> None:
image_bgr = self.get_image_bgr_from_context(context)
matrix = self.data_extractor(iuv_arr)
segm = self.segm_extractor(iuv_arr)
mask = np.zeros(matrix.shape, dtype=np.uint8)
mask[segm > 0] = 1
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)
def _extract_i_from_iuvarr(iuv_arr):
return iuv_arr[0, :, :]
def _extract_u_from_iuvarr(iuv_arr):
return iuv_arr[1, :, :]
def _extract_v_from_iuvarr(iuv_arr):
return iuv_arr[2, :, :]
class DensePoseResultsMplContourVisualizer(DensePoseResultsVisualizer):
def __init__(self, levels=10, **kwargs):
self.levels = levels
self.plot_args = kwargs
def create_visualization_context(self, image_bgr: Image):
import matplotlib.pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
context = {}
context["image_bgr"] = image_bgr
dpi = 100
height_inches = float(image_bgr.shape[0]) / dpi
width_inches = float(image_bgr.shape[1]) / dpi
fig = plt.figure(figsize=(width_inches, height_inches), dpi=dpi)
plt.axes([0, 0, 1, 1])
plt.axis("off")
context["fig"] = fig
canvas = FigureCanvas(fig)
context["canvas"] = canvas
extent = (0, image_bgr.shape[1], image_bgr.shape[0], 0)
plt.imshow(image_bgr[:, :, ::-1], extent=extent)
return context
def context_to_image_bgr(self, context):
fig = context["fig"]
w, h = map(int, fig.get_size_inches() * fig.get_dpi())
canvas = context["canvas"]
canvas.draw()
image_1d = np.fromstring(canvas.tostring_rgb(), dtype="uint8")
image_rgb = image_1d.reshape(h, w, 3)
image_bgr = image_rgb[:, :, ::-1].copy()
return image_bgr
def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> None:
import matplotlib.pyplot as plt
u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
extent = (
bbox_xywh[0],
bbox_xywh[0] + bbox_xywh[2],
bbox_xywh[1],
bbox_xywh[1] + bbox_xywh[3],
)
plt.contour(u, self.levels, extent=extent, **self.plot_args)
plt.contour(v, self.levels, extent=extent, **self.plot_args)
class DensePoseResultsCustomContourVisualizer(DensePoseResultsVisualizer):
"""
Contour visualization using marching squares
"""
def __init__(self, levels=10, **kwargs):
# TODO: colormap is hardcoded
cmap = cv2.COLORMAP_PARULA
if isinstance(levels, int):
self.levels = np.linspace(0, 1, levels)
else:
self.levels = levels
if "linewidths" in kwargs:
self.linewidths = kwargs["linewidths"]
else:
self.linewidths = [1] * len(self.levels)
self.plot_args = kwargs
img_colors_bgr = cv2.applyColorMap((self.levels * 255).astype(np.uint8), cmap)
self.level_colors_bgr = [
[int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
]
def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> None:
image_bgr = self.get_image_bgr_from_context(context)
segm = _extract_i_from_iuvarr(iuv_arr)
u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
self._contours(image_bgr, u, segm, bbox_xywh)
self._contours(image_bgr, v, segm, bbox_xywh)
def _contours(self, image_bgr, arr, segm, bbox_xywh):
for part_idx in range(1, DensePoseDataRelative.N_PART_LABELS + 1):
mask = segm == part_idx
if not np.any(mask):
continue
arr_min = np.amin(arr[mask])
arr_max = np.amax(arr[mask])
I, J = np.nonzero(mask)
i0 = np.amin(I)
i1 = np.amax(I) + 1
j0 = np.amin(J)
j1 = np.amax(J) + 1
if (j1 == j0 + 1) or (i1 == i0 + 1):
continue
Nw = arr.shape[1] - 1
Nh = arr.shape[0] - 1
for level_idx, level in enumerate(self.levels):
if (level < arr_min) or (level > arr_max):
continue
vp = arr[i0:i1, j0:j1] >= level
bin_codes = vp[:-1, :-1] + vp[1:, :-1] * 2 + vp[1:, 1:] * 4 + vp[:-1, 1:] * 8
mp = mask[i0:i1, j0:j1]
bin_mask_codes = mp[:-1, :-1] + mp[1:, :-1] * 2 + mp[1:, 1:] * 4 + mp[:-1, 1:] * 8
it = np.nditer(bin_codes, flags=["multi_index"])
color_bgr = self.level_colors_bgr[level_idx]
linewidth = self.linewidths[level_idx]
while not it.finished:
if (it[0] != 0) and (it[0] != 15):
i, j = it.multi_index
if bin_mask_codes[i, j] != 0:
self._draw_line(
image_bgr,
arr,
mask,
level,
color_bgr,
linewidth,
it[0],
it.multi_index,
bbox_xywh,
Nw,
Nh,
(i0, j0),
)
it.iternext()
def _draw_line(
self,
image_bgr,
arr,
mask,
v,
color_bgr,
linewidth,
bin_code,
multi_idx,
bbox_xywh,
Nw,
Nh,
offset,
):
lines = self._bin_code_2_lines(arr, v, bin_code, multi_idx, Nw, Nh, offset)
x0, y0, w, h = bbox_xywh
x1 = x0 + w
y1 = y0 + h
for line in lines:
x0r, y0r = line[0]
x1r, y1r = line[1]
pt0 = (int(x0 + x0r * (x1 - x0)), int(y0 + y0r * (y1 - y0)))
pt1 = (int(x0 + x1r * (x1 - x0)), int(y0 + y1r * (y1 - y0)))
cv2.line(image_bgr, pt0, pt1, color_bgr, linewidth)
def _bin_code_2_lines(self, arr, v, bin_code, multi_idx, Nw, Nh, offset):
i0, j0 = offset
i, j = multi_idx
i += i0
j += j0
v0, v1, v2, v3 = arr[i, j], arr[i + 1, j], arr[i + 1, j + 1], arr[i, j + 1]
x0i = float(j) / Nw
y0j = float(i) / Nh
He = 1.0 / Nh
We = 1.0 / Nw
if (bin_code == 1) or (bin_code == 14):
a = (v - v0) / (v1 - v0)
b = (v - v0) / (v3 - v0)
pt1 = (x0i, y0j + a * He)
pt2 = (x0i + b * We, y0j)
return [(pt1, pt2)]
elif (bin_code == 2) or (bin_code == 13):
a = (v - v0) / (v1 - v0)
b = (v - v1) / (v2 - v1)
pt1 = (x0i, y0j + a * He)
pt2 = (x0i + b * We, y0j + He)
return [(pt1, pt2)]
elif (bin_code == 3) or (bin_code == 12):
a = (v - v0) / (v3 - v0)
b = (v - v1) / (v2 - v1)
pt1 = (x0i + a * We, y0j)
pt2 = (x0i + b * We, y0j + He)
return [(pt1, pt2)]
elif (bin_code == 4) or (bin_code == 11):
a = (v - v1) / (v2 - v1)
b = (v - v3) / (v2 - v3)
pt1 = (x0i + a * We, y0j + He)
pt2 = (x0i + We, y0j + b * He)
return [(pt1, pt2)]
elif (bin_code == 6) or (bin_code == 9):
a = (v - v0) / (v1 - v0)
b = (v - v3) / (v2 - v3)
pt1 = (x0i, y0j + a * He)
pt2 = (x0i + We, y0j + b * He)
return [(pt1, pt2)]
elif (bin_code == 7) or (bin_code == 8):
a = (v - v0) / (v3 - v0)
b = (v - v3) / (v2 - v3)
pt1 = (x0i + a * We, y0j)
pt2 = (x0i + We, y0j + b * He)
return [(pt1, pt2)]
elif bin_code == 5:
a1 = (v - v0) / (v1 - v0)
b1 = (v - v1) / (v2 - v1)
pt11 = (x0i, y0j + a1 * He)
pt12 = (x0i + b1 * We, y0j + He)
a2 = (v - v0) / (v3 - v0)
b2 = (v - v3) / (v2 - v3)
pt21 = (x0i + a2 * We, y0j)
pt22 = (x0i + We, y0j + b2 * He)
return [(pt11, pt12), (pt21, pt22)]
elif bin_code == 10:
a1 = (v - v0) / (v3 - v0)
b1 = (v - v0) / (v1 - v0)
pt11 = (x0i + a1 * We, y0j)
pt12 = (x0i, y0j + b1 * He)
a2 = (v - v1) / (v2 - v1)
b2 = (v - v3) / (v2 - v3)
pt21 = (x0i + a2 * We, y0j + He)
pt22 = (x0i + We, y0j + b2 * He)
return [(pt11, pt12), (pt21, pt22)]
return []
try:
import matplotlib
matplotlib.use("Agg")
DensePoseResultsContourVisualizer = DensePoseResultsMplContourVisualizer
except ModuleNotFoundError:
logger = logging.getLogger(__name__)
logger.warning("Could not import matplotlib, using custom contour visualizer")
DensePoseResultsContourVisualizer = DensePoseResultsCustomContourVisualizer
class DensePoseResultsFineSegmentationVisualizer(DensePoseMaskedColormapResultsVisualizer):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs):
super(DensePoseResultsFineSegmentationVisualizer, self).__init__(
_extract_i_from_iuvarr,
_extract_i_from_iuvarr,
inplace,
cmap,
alpha,
val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS,
**kwargs,
)
class DensePoseResultsUVisualizer(DensePoseMaskedColormapResultsVisualizer):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs):
super(DensePoseResultsUVisualizer, self).__init__(
_extract_u_from_iuvarr,
_extract_i_from_iuvarr,
inplace,
cmap,
alpha,
val_scale=1.0,
**kwargs,
)
class DensePoseResultsVVisualizer(DensePoseMaskedColormapResultsVisualizer):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs):
super(DensePoseResultsVVisualizer, self).__init__(
_extract_v_from_iuvarr,
_extract_i_from_iuvarr,
inplace,
cmap,
alpha,
val_scale=1.0,
**kwargs,
)
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
import numpy as np
from typing import List, Optional, Tuple
import torch
from detectron2.data.detection_utils import read_image
from ..structures import DensePoseChartResult
from .base import Boxes, Image
from .densepose_results import DensePoseResultsVisualizer
def get_texture_atlas(path: Optional[str]) -> Optional[np.ndarray]:
if path is None:
return None
# Reading images like that downsamples 16-bit images to 8-bit
# If 16-bit images are needed, we can replace that by cv2.imread with the
# cv2.IMREAD_UNCHANGED flag (with cv2 we also need it to keep alpha channels)
# The rest of the pipeline would need to be adapted to 16-bit images too
bgr_image = read_image(path)
rgb_image = np.copy(bgr_image) # Convert BGR -> RGB
rgb_image[:, :, :3] = rgb_image[:, :, 2::-1] # Works with alpha channel
return rgb_image
class DensePoseResultsVisualizerWithTexture(DensePoseResultsVisualizer):
"""
texture_atlas: An image, size 6N * 4N, with N * N squares for each of the 24 body parts.
It must follow the grid found at https://github.com/facebookresearch/DensePose/blob/master/DensePoseData/demo_data/texture_atlas_200.png # noqa
For each body part, U is proportional to the x coordinate, and (1 - V) to y
"""
def __init__(self, texture_atlas, **kwargs):
self.texture_atlas = texture_atlas
self.body_part_size = texture_atlas.shape[0] // 6
assert self.body_part_size == texture_atlas.shape[1] // 4
def visualize(
self,
image_bgr: Image,
results_and_boxes_xywh: Tuple[Optional[List[DensePoseChartResult]], Optional[Boxes]],
) -> Image:
densepose_result, boxes_xywh = results_and_boxes_xywh
if densepose_result is None or boxes_xywh is None:
return image_bgr
boxes_xywh = boxes_xywh.int().cpu().numpy()
texture_image, alpha = self.get_texture()
for i, result in enumerate(densepose_result):
iuv_array = torch.cat((result.labels[None], result.uv.clamp(0, 1)))
x, y, w, h = boxes_xywh[i]
bbox_image = image_bgr[y : y + h, x : x + w]
image_bgr[y : y + h, x : x + w] = self.generate_image_with_texture(
texture_image, alpha, bbox_image, iuv_array.cpu().numpy()
)
return image_bgr
def get_texture(self):
N = self.body_part_size
texture_image = np.zeros([24, N, N, self.texture_atlas.shape[-1]])
for i in range(4):
for j in range(6):
texture_image[(6 * i + j), :, :, :] = self.texture_atlas[
N * j : N * (j + 1), N * i : N * (i + 1), :
]
if texture_image.shape[-1] == 4: # Image with alpha channel
alpha = texture_image[:, :, :, -1] / 255.0
texture_image = texture_image[:, :, :, :3]
else:
alpha = texture_image.sum(axis=-1) > 0
return texture_image, alpha
def generate_image_with_texture(self, texture_image, alpha, bbox_image_bgr, iuv_array):
I, U, V = iuv_array
generated_image_bgr = bbox_image_bgr.copy()
for PartInd in range(1, 25):
x, y = np.where(I == PartInd)
x_index = (U[x, y] * (self.body_part_size - 1)).astype(int)
y_index = ((1 - V[x, y]) * (self.body_part_size - 1)).astype(int)
part_alpha = np.expand_dims(alpha[PartInd - 1, y_index, x_index], -1)
generated_image_bgr[I == PartInd] = (
generated_image_bgr[I == PartInd] * (1 - part_alpha)
+ texture_image[PartInd - 1, y_index, x_index] * part_alpha
)
return generated_image_bgr.astype(np.uint8)
# Copyright (c) Facebook, Inc. and its affiliates.
# pyre-unsafe
import logging
from typing import List, Optional, Sequence, Tuple
import torch
from detectron2.layers.nms import batched_nms
from detectron2.structures.instances import Instances
from densepose.converters import ToChartResultConverterWithConfidences
from densepose.structures import (
DensePoseChartResultWithConfidences,
DensePoseEmbeddingPredictorOutput,
)
from densepose.vis.bounding_box import BoundingBoxVisualizer, ScoredBoundingBoxVisualizer
from densepose.vis.densepose_outputs_vertex import DensePoseOutputsVertexVisualizer
from densepose.vis.densepose_results import DensePoseResultsVisualizer
from .base import CompoundVisualizer
Scores = Sequence[float]
DensePoseChartResultsWithConfidences = List[DensePoseChartResultWithConfidences]
def extract_scores_from_instances(instances: Instances, select=None):
if instances.has("scores"):
return instances.scores if select is None else instances.scores[select]
return None
def extract_boxes_xywh_from_instances(instances: Instances, select=None):
if instances.has("pred_boxes"):
boxes_xywh = instances.pred_boxes.tensor.clone()
boxes_xywh[:, 2] -= boxes_xywh[:, 0]
boxes_xywh[:, 3] -= boxes_xywh[:, 1]
return boxes_xywh if select is None else boxes_xywh[select]
return None
def create_extractor(visualizer: object):
"""
Create an extractor for the provided visualizer
"""
if isinstance(visualizer, CompoundVisualizer):
extractors = [create_extractor(v) for v in visualizer.visualizers]
return CompoundExtractor(extractors)
elif isinstance(visualizer, DensePoseResultsVisualizer):
return DensePoseResultExtractor()
elif isinstance(visualizer, ScoredBoundingBoxVisualizer):
return CompoundExtractor([extract_boxes_xywh_from_instances, extract_scores_from_instances])
elif isinstance(visualizer, BoundingBoxVisualizer):
return extract_boxes_xywh_from_instances
elif isinstance(visualizer, DensePoseOutputsVertexVisualizer):
return DensePoseOutputsExtractor()
else:
logger = logging.getLogger(__name__)
logger.error(f"Could not create extractor for {visualizer}")
return None
class BoundingBoxExtractor:
"""
Extracts bounding boxes from instances
"""
def __call__(self, instances: Instances):
boxes_xywh = extract_boxes_xywh_from_instances(instances)
return boxes_xywh
class ScoredBoundingBoxExtractor:
"""
Extracts bounding boxes from instances
"""
def __call__(self, instances: Instances, select=None):
scores = extract_scores_from_instances(instances)
boxes_xywh = extract_boxes_xywh_from_instances(instances)
if (scores is None) or (boxes_xywh is None):
return (boxes_xywh, scores)
if select is not None:
scores = scores[select]
boxes_xywh = boxes_xywh[select]
return (boxes_xywh, scores)
class DensePoseResultExtractor:
"""
Extracts DensePose chart result with confidences from instances
"""
def __call__(
self, instances: Instances, select=None
) -> Tuple[Optional[DensePoseChartResultsWithConfidences], Optional[torch.Tensor]]:
if instances.has("pred_densepose") and instances.has("pred_boxes"):
dpout = instances.pred_densepose
boxes_xyxy = instances.pred_boxes
boxes_xywh = extract_boxes_xywh_from_instances(instances)
if select is not None:
dpout = dpout[select]
boxes_xyxy = boxes_xyxy[select]
converter = ToChartResultConverterWithConfidences()
results = [converter.convert(dpout[i], boxes_xyxy[[i]]) for i in range(len(dpout))]
return results, boxes_xywh
else:
return None, None
class DensePoseOutputsExtractor:
"""
Extracts DensePose result from instances
"""
def __call__(
self,
instances: Instances,
select=None,
) -> Tuple[
Optional[DensePoseEmbeddingPredictorOutput], Optional[torch.Tensor], Optional[List[int]]
]:
if not (instances.has("pred_densepose") and instances.has("pred_boxes")):
return None, None, None
dpout = instances.pred_densepose
boxes_xyxy = instances.pred_boxes
boxes_xywh = extract_boxes_xywh_from_instances(instances)
if instances.has("pred_classes"):
classes = instances.pred_classes.tolist()
else:
classes = None
if select is not None:
dpout = dpout[select]
boxes_xyxy = boxes_xyxy[select]
if classes is not None:
classes = classes[select]
return dpout, boxes_xywh, classes
class CompoundExtractor:
"""
Extracts data for CompoundVisualizer
"""
def __init__(self, extractors):
self.extractors = extractors
def __call__(self, instances: Instances, select=None):
datas = []
for extractor in self.extractors:
data = extractor(instances, select)
datas.append(data)
return datas
class NmsFilteredExtractor:
"""
Extracts data in the format accepted by NmsFilteredVisualizer
"""
def __init__(self, extractor, iou_threshold):
self.extractor = extractor
self.iou_threshold = iou_threshold
def __call__(self, instances: Instances, select=None):
scores = extract_scores_from_instances(instances)
boxes_xywh = extract_boxes_xywh_from_instances(instances)
if boxes_xywh is None:
return None
select_local_idx = batched_nms(
boxes_xywh,
scores,
torch.zeros(len(scores), dtype=torch.int32),
iou_threshold=self.iou_threshold,
).squeeze()
select_local = torch.zeros(len(boxes_xywh), dtype=torch.bool, device=boxes_xywh.device)
select_local[select_local_idx] = True
select = select_local if select is None else (select & select_local)
return self.extractor(instances, select=select)
class ScoreThresholdedExtractor:
"""
Extracts data in the format accepted by ScoreThresholdedVisualizer
"""
def __init__(self, extractor, min_score):
self.extractor = extractor
self.min_score = min_score
def __call__(self, instances: Instances, select=None):
scores = extract_scores_from_instances(instances)
if scores is None:
return None
select_local = scores > self.min_score
select = select_local if select is None else (select & select_local)
data = self.extractor(instances, select=select)
return data
# Copyright (c) Facebook, Inc. and its affiliates.
from .utils.env import setup_environment
setup_environment()
# This line will be programatically read/write by setup.py.
# Leave them at the bottom of this file and don't touch them.
__version__ = "0.6"
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates.
# File:
from . import catalog as _UNUSED # register the handler
from .detection_checkpoint import DetectionCheckpointer
from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer
__all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"]
# Copyright (c) Facebook, Inc. and its affiliates.
import copy
import logging
import re
from typing import Dict, List
import torch
def convert_basic_c2_names(original_keys):
"""
Apply some basic name conversion to names in C2 weights.
It only deals with typical backbone models.
Args:
original_keys (list[str]):
Returns:
list[str]: The same number of strings matching those in original_keys.
"""
layer_keys = copy.deepcopy(original_keys)
layer_keys = [
{"pred_b": "linear_b", "pred_w": "linear_w"}.get(k, k) for k in layer_keys
] # some hard-coded mappings
layer_keys = [k.replace("_", ".") for k in layer_keys]
layer_keys = [re.sub("\\.b$", ".bias", k) for k in layer_keys]
layer_keys = [re.sub("\\.w$", ".weight", k) for k in layer_keys]
# Uniform both bn and gn names to "norm"
layer_keys = [re.sub("bn\\.s$", "norm.weight", k) for k in layer_keys]
layer_keys = [re.sub("bn\\.bias$", "norm.bias", k) for k in layer_keys]
layer_keys = [re.sub("bn\\.rm", "norm.running_mean", k) for k in layer_keys]
layer_keys = [re.sub("bn\\.running.mean$", "norm.running_mean", k) for k in layer_keys]
layer_keys = [re.sub("bn\\.riv$", "norm.running_var", k) for k in layer_keys]
layer_keys = [re.sub("bn\\.running.var$", "norm.running_var", k) for k in layer_keys]
layer_keys = [re.sub("bn\\.gamma$", "norm.weight", k) for k in layer_keys]
layer_keys = [re.sub("bn\\.beta$", "norm.bias", k) for k in layer_keys]
layer_keys = [re.sub("gn\\.s$", "norm.weight", k) for k in layer_keys]
layer_keys = [re.sub("gn\\.bias$", "norm.bias", k) for k in layer_keys]
# stem
layer_keys = [re.sub("^res\\.conv1\\.norm\\.", "conv1.norm.", k) for k in layer_keys]
# to avoid mis-matching with "conv1" in other components (e.g. detection head)
layer_keys = [re.sub("^conv1\\.", "stem.conv1.", k) for k in layer_keys]
# layer1-4 is used by torchvision, however we follow the C2 naming strategy (res2-5)
# layer_keys = [re.sub("^res2.", "layer1.", k) for k in layer_keys]
# layer_keys = [re.sub("^res3.", "layer2.", k) for k in layer_keys]
# layer_keys = [re.sub("^res4.", "layer3.", k) for k in layer_keys]
# layer_keys = [re.sub("^res5.", "layer4.", k) for k in layer_keys]
# blocks
layer_keys = [k.replace(".branch1.", ".shortcut.") for k in layer_keys]
layer_keys = [k.replace(".branch2a.", ".conv1.") for k in layer_keys]
layer_keys = [k.replace(".branch2b.", ".conv2.") for k in layer_keys]
layer_keys = [k.replace(".branch2c.", ".conv3.") for k in layer_keys]
# DensePose substitutions
layer_keys = [re.sub("^body.conv.fcn", "body_conv_fcn", k) for k in layer_keys]
layer_keys = [k.replace("AnnIndex.lowres", "ann_index_lowres") for k in layer_keys]
layer_keys = [k.replace("Index.UV.lowres", "index_uv_lowres") for k in layer_keys]
layer_keys = [k.replace("U.lowres", "u_lowres") for k in layer_keys]
layer_keys = [k.replace("V.lowres", "v_lowres") for k in layer_keys]
return layer_keys
def convert_c2_detectron_names(weights):
"""
Map Caffe2 Detectron weight names to Detectron2 names.
Args:
weights (dict): name -> tensor
Returns:
dict: detectron2 names -> tensor
dict: detectron2 names -> C2 names
"""
logger = logging.getLogger(__name__)
logger.info("Renaming Caffe2 weights ......")
original_keys = sorted(weights.keys())
layer_keys = copy.deepcopy(original_keys)
layer_keys = convert_basic_c2_names(layer_keys)
# --------------------------------------------------------------------------
# RPN hidden representation conv
# --------------------------------------------------------------------------
# FPN case
# In the C2 model, the RPN hidden layer conv is defined for FPN level 2 and then
# shared for all other levels, hence the appearance of "fpn2"
layer_keys = [
k.replace("conv.rpn.fpn2", "proposal_generator.rpn_head.conv") for k in layer_keys
]
# Non-FPN case
layer_keys = [k.replace("conv.rpn", "proposal_generator.rpn_head.conv") for k in layer_keys]
# --------------------------------------------------------------------------
# RPN box transformation conv
# --------------------------------------------------------------------------
# FPN case (see note above about "fpn2")
layer_keys = [
k.replace("rpn.bbox.pred.fpn2", "proposal_generator.rpn_head.anchor_deltas")
for k in layer_keys
]
layer_keys = [
k.replace("rpn.cls.logits.fpn2", "proposal_generator.rpn_head.objectness_logits")
for k in layer_keys
]
# Non-FPN case
layer_keys = [
k.replace("rpn.bbox.pred", "proposal_generator.rpn_head.anchor_deltas") for k in layer_keys
]
layer_keys = [
k.replace("rpn.cls.logits", "proposal_generator.rpn_head.objectness_logits")
for k in layer_keys
]
# --------------------------------------------------------------------------
# Fast R-CNN box head
# --------------------------------------------------------------------------
layer_keys = [re.sub("^bbox\\.pred", "bbox_pred", k) for k in layer_keys]
layer_keys = [re.sub("^cls\\.score", "cls_score", k) for k in layer_keys]
layer_keys = [re.sub("^fc6\\.", "box_head.fc1.", k) for k in layer_keys]
layer_keys = [re.sub("^fc7\\.", "box_head.fc2.", k) for k in layer_keys]
# 4conv1fc head tensor names: head_conv1_w, head_conv1_gn_s
layer_keys = [re.sub("^head\\.conv", "box_head.conv", k) for k in layer_keys]
# --------------------------------------------------------------------------
# FPN lateral and output convolutions
# --------------------------------------------------------------------------
def fpn_map(name):
"""
Look for keys with the following patterns:
1) Starts with "fpn.inner."
Example: "fpn.inner.res2.2.sum.lateral.weight"
Meaning: These are lateral pathway convolutions
2) Starts with "fpn.res"
Example: "fpn.res2.2.sum.weight"
Meaning: These are FPN output convolutions
"""
splits = name.split(".")
norm = ".norm" if "norm" in splits else ""
if name.startswith("fpn.inner."):
# splits example: ['fpn', 'inner', 'res2', '2', 'sum', 'lateral', 'weight']
stage = int(splits[2][len("res") :])
return "fpn_lateral{}{}.{}".format(stage, norm, splits[-1])
elif name.startswith("fpn.res"):
# splits example: ['fpn', 'res2', '2', 'sum', 'weight']
stage = int(splits[1][len("res") :])
return "fpn_output{}{}.{}".format(stage, norm, splits[-1])
return name
layer_keys = [fpn_map(k) for k in layer_keys]
# --------------------------------------------------------------------------
# Mask R-CNN mask head
# --------------------------------------------------------------------------
# roi_heads.StandardROIHeads case
layer_keys = [k.replace(".[mask].fcn", "mask_head.mask_fcn") for k in layer_keys]
layer_keys = [re.sub("^\\.mask\\.fcn", "mask_head.mask_fcn", k) for k in layer_keys]
layer_keys = [k.replace("mask.fcn.logits", "mask_head.predictor") for k in layer_keys]
# roi_heads.Res5ROIHeads case
layer_keys = [k.replace("conv5.mask", "mask_head.deconv") for k in layer_keys]
# --------------------------------------------------------------------------
# Keypoint R-CNN head
# --------------------------------------------------------------------------
# interestingly, the keypoint head convs have blob names that are simply "conv_fcnX"
layer_keys = [k.replace("conv.fcn", "roi_heads.keypoint_head.conv_fcn") for k in layer_keys]
layer_keys = [
k.replace("kps.score.lowres", "roi_heads.keypoint_head.score_lowres") for k in layer_keys
]
layer_keys = [k.replace("kps.score.", "roi_heads.keypoint_head.score.") for k in layer_keys]
# --------------------------------------------------------------------------
# Done with replacements
# --------------------------------------------------------------------------
assert len(set(layer_keys)) == len(layer_keys)
assert len(original_keys) == len(layer_keys)
new_weights = {}
new_keys_to_original_keys = {}
for orig, renamed in zip(original_keys, layer_keys):
new_keys_to_original_keys[renamed] = orig
if renamed.startswith("bbox_pred.") or renamed.startswith("mask_head.predictor."):
# remove the meaningless prediction weight for background class
new_start_idx = 4 if renamed.startswith("bbox_pred.") else 1
new_weights[renamed] = weights[orig][new_start_idx:]
logger.info(
"Remove prediction weight for background class in {}. The shape changes from "
"{} to {}.".format(
renamed, tuple(weights[orig].shape), tuple(new_weights[renamed].shape)
)
)
elif renamed.startswith("cls_score."):
# move weights of bg class from original index 0 to last index
logger.info(
"Move classification weights for background class in {} from index 0 to "
"index {}.".format(renamed, weights[orig].shape[0] - 1)
)
new_weights[renamed] = torch.cat([weights[orig][1:], weights[orig][:1]])
else:
new_weights[renamed] = weights[orig]
return new_weights, new_keys_to_original_keys
# Note the current matching is not symmetric.
# it assumes model_state_dict will have longer names.
def align_and_update_state_dicts(model_state_dict, ckpt_state_dict, c2_conversion=True):
"""
Match names between the two state-dict, and returns a new chkpt_state_dict with names
converted to match model_state_dict with heuristics. The returned dict can be later
loaded with fvcore checkpointer.
If `c2_conversion==True`, `ckpt_state_dict` is assumed to be a Caffe2
model and will be renamed at first.
Strategy: suppose that the models that we will create will have prefixes appended
to each of its keys, for example due to an extra level of nesting that the original
pre-trained weights from ImageNet won't contain. For example, model.state_dict()
might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains
res2.conv1.weight. We thus want to match both parameters together.
For that, we look for each model weight, look among all loaded keys if there is one
that is a suffix of the current weight name, and use it if that's the case.
If multiple matches exist, take the one with longest size
of the corresponding name. For example, for the same model as before, the pretrained
weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case,
we want to match backbone[0].body.conv1.weight to conv1.weight, and
backbone[0].body.res2.conv1.weight to res2.conv1.weight.
"""
model_keys = sorted(model_state_dict.keys())
if c2_conversion:
ckpt_state_dict, original_keys = convert_c2_detectron_names(ckpt_state_dict)
# original_keys: the name in the original dict (before renaming)
else:
original_keys = {x: x for x in ckpt_state_dict.keys()}
ckpt_keys = sorted(ckpt_state_dict.keys())
def match(a, b):
# Matched ckpt_key should be a complete (starts with '.') suffix.
# For example, roi_heads.mesh_head.whatever_conv1 does not match conv1,
# but matches whatever_conv1 or mesh_head.whatever_conv1.
return a == b or a.endswith("." + b)
# get a matrix of string matches, where each (i, j) entry correspond to the size of the
# ckpt_key string, if it matches
match_matrix = [len(j) if match(i, j) else 0 for i in model_keys for j in ckpt_keys]
match_matrix = torch.as_tensor(match_matrix).view(len(model_keys), len(ckpt_keys))
# use the matched one with longest size in case of multiple matches
max_match_size, idxs = match_matrix.max(1)
# remove indices that correspond to no-match
idxs[max_match_size == 0] = -1
logger = logging.getLogger(__name__)
# matched_pairs (matched checkpoint key --> matched model key)
matched_keys = {}
result_state_dict = {}
for idx_model, idx_ckpt in enumerate(idxs.tolist()):
if idx_ckpt == -1:
continue
key_model = model_keys[idx_model]
key_ckpt = ckpt_keys[idx_ckpt]
value_ckpt = ckpt_state_dict[key_ckpt]
shape_in_model = model_state_dict[key_model].shape
if shape_in_model != value_ckpt.shape:
logger.warning(
"Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format(
key_ckpt, value_ckpt.shape, key_model, shape_in_model
)
)
logger.warning(
"{} will not be loaded. Please double check and see if this is desired.".format(
key_ckpt
)
)
continue
assert key_model not in result_state_dict
result_state_dict[key_model] = value_ckpt
if key_ckpt in matched_keys: # already added to matched_keys
logger.error(
"Ambiguity found for {} in checkpoint!"
"It matches at least two keys in the model ({} and {}).".format(
key_ckpt, key_model, matched_keys[key_ckpt]
)
)
raise ValueError("Cannot match one checkpoint key to multiple keys in the model.")
matched_keys[key_ckpt] = key_model
# logging:
matched_model_keys = sorted(matched_keys.values())
if len(matched_model_keys) == 0:
logger.warning("No weights in checkpoint matched with model.")
return ckpt_state_dict
common_prefix = _longest_common_prefix(matched_model_keys)
rev_matched_keys = {v: k for k, v in matched_keys.items()}
original_keys = {k: original_keys[rev_matched_keys[k]] for k in matched_model_keys}
model_key_groups = _group_keys_by_module(matched_model_keys, original_keys)
table = []
memo = set()
for key_model in matched_model_keys:
if key_model in memo:
continue
if key_model in model_key_groups:
group = model_key_groups[key_model]
memo |= set(group)
shapes = [tuple(model_state_dict[k].shape) for k in group]
table.append(
(
_longest_common_prefix([k[len(common_prefix) :] for k in group]) + "*",
_group_str([original_keys[k] for k in group]),
" ".join([str(x).replace(" ", "") for x in shapes]),
)
)
else:
key_checkpoint = original_keys[key_model]
shape = str(tuple(model_state_dict[key_model].shape))
table.append((key_model[len(common_prefix) :], key_checkpoint, shape))
submodule_str = common_prefix[:-1] if common_prefix else "model"
logger.info(
f"Following weights matched with submodule {submodule_str} - Total num: {len(table)}"
)
unmatched_ckpt_keys = [k for k in ckpt_keys if k not in set(matched_keys.keys())]
for k in unmatched_ckpt_keys:
result_state_dict[k] = ckpt_state_dict[k]
return result_state_dict
def _group_keys_by_module(keys: List[str], original_names: Dict[str, str]):
"""
Params in the same submodule are grouped together.
Args:
keys: names of all parameters
original_names: mapping from parameter name to their name in the checkpoint
Returns:
dict[name -> all other names in the same group]
"""
def _submodule_name(key):
pos = key.rfind(".")
if pos < 0:
return None
prefix = key[: pos + 1]
return prefix
all_submodules = [_submodule_name(k) for k in keys]
all_submodules = [x for x in all_submodules if x]
all_submodules = sorted(all_submodules, key=len)
ret = {}
for prefix in all_submodules:
group = [k for k in keys if k.startswith(prefix)]
if len(group) <= 1:
continue
original_name_lcp = _longest_common_prefix_str([original_names[k] for k in group])
if len(original_name_lcp) == 0:
# don't group weights if original names don't share prefix
continue
for k in group:
if k in ret:
continue
ret[k] = group
return ret
def _longest_common_prefix(names: List[str]) -> str:
"""
["abc.zfg", "abc.zef"] -> "abc."
"""
names = [n.split(".") for n in names]
m1, m2 = min(names), max(names)
ret = [a for a, b in zip(m1, m2) if a == b]
ret = ".".join(ret) + "." if len(ret) else ""
return ret
def _longest_common_prefix_str(names: List[str]) -> str:
m1, m2 = min(names), max(names)
lcp = []
for a, b in zip(m1, m2):
if a == b:
lcp.append(a)
else:
break
lcp = "".join(lcp)
return lcp
def _group_str(names: List[str]) -> str:
"""
Turn "common1", "common2", "common3" into "common{1,2,3}"
"""
lcp = _longest_common_prefix_str(names)
rest = [x[len(lcp) :] for x in names]
rest = "{" + ",".join(rest) + "}"
ret = lcp + rest
# add some simplification for BN specifically
ret = ret.replace("bn_{beta,running_mean,running_var,gamma}", "bn_*")
ret = ret.replace("bn_beta,bn_running_mean,bn_running_var,bn_gamma", "bn_*")
return ret
# Copyright (c) Facebook, Inc. and its affiliates.
import logging
from detectron2.utils.file_io import PathHandler, PathManager
class ModelCatalog:
"""
Store mappings from names to third-party models.
"""
S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron"
# MSRA models have STRIDE_IN_1X1=True. False otherwise.
# NOTE: all BN models here have fused BN into an affine layer.
# As a result, you should only load them to a model with "FrozenBN".
# Loading them to a model with regular BN or SyncBN is wrong.
# Even when loaded to FrozenBN, it is still different from affine by an epsilon,
# which should be negligible for training.
# NOTE: all models here uses PIXEL_STD=[1,1,1]
# NOTE: Most of the BN models here are no longer used. We use the
# re-converted pre-trained models under detectron2 model zoo instead.
C2_IMAGENET_MODELS = {
"MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl",
"MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl",
"FAIR/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl",
"FAIR/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl",
"FAIR/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl",
"FAIR/X-101-64x4d": "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl",
"FAIR/X-152-32x8d-IN5k": "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl",
}
C2_DETECTRON_PATH_FORMAT = (
"{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl" # noqa B950
)
C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival"
C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival"
# format: {model_name} -> part of the url
C2_DETECTRON_MODELS = {
"35857197/e2e_faster_rcnn_R-50-C4_1x": "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW", # noqa B950
"35857345/e2e_faster_rcnn_R-50-FPN_1x": "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I", # noqa B950
"35857890/e2e_faster_rcnn_R-101-FPN_1x": "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7", # noqa B950
"36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ", # noqa B950
"35858791/e2e_mask_rcnn_R-50-C4_1x": "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB", # noqa B950
"35858933/e2e_mask_rcnn_R-50-FPN_1x": "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC", # noqa B950
"35861795/e2e_mask_rcnn_R-101-FPN_1x": "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT", # noqa B950
"36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI", # noqa B950
"48616381/e2e_mask_rcnn_R-50-FPN_2x_gn": "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q", # noqa B950
"37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao", # noqa B950
"35998355/rpn_R-50-C4_1x": "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L", # noqa B950
"35998814/rpn_R-50-FPN_1x": "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179", # noqa B950
"36225147/fast_R-50-FPN_1x": "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2", # noqa B950
}
@staticmethod
def get(name):
if name.startswith("Caffe2Detectron/COCO"):
return ModelCatalog._get_c2_detectron_baseline(name)
if name.startswith("ImageNetPretrained/"):
return ModelCatalog._get_c2_imagenet_pretrained(name)
raise RuntimeError("model not present in the catalog: {}".format(name))
@staticmethod
def _get_c2_imagenet_pretrained(name):
prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX
name = name[len("ImageNetPretrained/") :]
name = ModelCatalog.C2_IMAGENET_MODELS[name]
url = "/".join([prefix, name])
return url
@staticmethod
def _get_c2_detectron_baseline(name):
name = name[len("Caffe2Detectron/COCO/") :]
url = ModelCatalog.C2_DETECTRON_MODELS[name]
if "keypoint_rcnn" in name:
dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS
else:
dataset = ModelCatalog.C2_DATASET_COCO
if "35998355/rpn_R-50-C4_1x" in name:
# this one model is somehow different from others ..
type = "rpn"
else:
type = "generalized_rcnn"
# Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`.
url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format(
prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset
)
return url
class ModelCatalogHandler(PathHandler):
"""
Resolve URL like catalog://.
"""
PREFIX = "catalog://"
def _get_supported_prefixes(self):
return [self.PREFIX]
def _get_local_path(self, path, **kwargs):
logger = logging.getLogger(__name__)
catalog_path = ModelCatalog.get(path[len(self.PREFIX) :])
logger.info("Catalog entry {} points to {}".format(path, catalog_path))
return PathManager.get_local_path(catalog_path, **kwargs)
def _open(self, path, mode="r", **kwargs):
return PathManager.open(self._get_local_path(path), mode, **kwargs)
PathManager.register_handler(ModelCatalogHandler())
# Copyright (c) Facebook, Inc. and its affiliates.
import logging
import os
import pickle
from urllib.parse import parse_qs, urlparse
import torch
from fvcore.common.checkpoint import Checkpointer
from torch.nn.parallel import DistributedDataParallel
import detectron2.utils.comm as comm
from detectron2.utils.file_io import PathManager
from .c2_model_loading import align_and_update_state_dicts
class DetectionCheckpointer(Checkpointer):
"""
Same as :class:`Checkpointer`, but is able to:
1. handle models in detectron & detectron2 model zoo, and apply conversions for legacy models.
2. correctly load checkpoints that are only available on the master worker
"""
def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
is_main_process = comm.is_main_process()
super().__init__(
model,
save_dir,
save_to_disk=is_main_process if save_to_disk is None else save_to_disk,
**checkpointables,
)
self.path_manager = PathManager
self._parsed_url_during_load = None
def load(self, path, *args, **kwargs):
assert self._parsed_url_during_load is None
need_sync = False
logger = logging.getLogger(__name__)
logger.info("[DetectionCheckpointer] Loading from {} ...".format(path))
if path and isinstance(self.model, DistributedDataParallel):
path = self.path_manager.get_local_path(path)
has_file = os.path.isfile(path)
all_has_file = comm.all_gather(has_file)
if not all_has_file[0]:
raise OSError(f"File {path} not found on main worker.")
if not all(all_has_file):
logger.warning(
f"Not all workers can read checkpoint {path}. "
"Training may fail to fully resume."
)
# TODO: broadcast the checkpoint file contents from main
# worker, and load from it instead.
need_sync = True
if not has_file:
path = None # don't load if not readable
if path:
parsed_url = urlparse(path)
self._parsed_url_during_load = parsed_url
path = parsed_url._replace(query="").geturl() # remove query from filename
path = self.path_manager.get_local_path(path)
ret = super().load(path, *args, **kwargs)
if need_sync:
logger.info("Broadcasting model states from main worker ...")
self.model._sync_params_and_buffers()
self._parsed_url_during_load = None # reset to None
return ret
def _load_file(self, filename):
if filename.endswith(".pkl"):
with PathManager.open(filename, "rb") as f:
data = pickle.load(f, encoding="latin1")
if "model" in data and "__author__" in data:
# file is in Detectron2 model zoo format
self.logger.info("Reading a file from '{}'".format(data["__author__"]))
return data
else:
# assume file is from Caffe2 / Detectron1 model zoo
if "blobs" in data:
# Detection models have "blobs", but ImageNet models don't
data = data["blobs"]
data = {k: v for k, v in data.items() if not k.endswith("_momentum")}
return {"model": data, "__author__": "Caffe2", "matching_heuristics": True}
elif filename.endswith(".pyth"):
# assume file is from pycls; no one else seems to use the ".pyth" extension
with PathManager.open(filename, "rb") as f:
data = torch.load(f)
assert (
"model_state" in data
), f"Cannot load .pyth file {filename}; pycls checkpoints must contain 'model_state'."
model_state = {
k: v
for k, v in data["model_state"].items()
if not k.endswith("num_batches_tracked")
}
return {"model": model_state, "__author__": "pycls", "matching_heuristics": True}
loaded = self._torch_load(filename)
if "model" not in loaded:
loaded = {"model": loaded}
assert self._parsed_url_during_load is not None, "`_load_file` must be called inside `load`"
parsed_url = self._parsed_url_during_load
queries = parse_qs(parsed_url.query)
if queries.pop("matching_heuristics", "False") == ["True"]:
loaded["matching_heuristics"] = True
if len(queries) > 0:
raise ValueError(
f"Unsupported query remaining: f{queries}, orginal filename: {parsed_url.geturl()}"
)
return loaded
def _torch_load(self, f):
return super()._load_file(f)
def _load_model(self, checkpoint):
if checkpoint.get("matching_heuristics", False):
self._convert_ndarray_to_tensor(checkpoint["model"])
# convert weights by name-matching heuristics
checkpoint["model"] = align_and_update_state_dicts(
self.model.state_dict(),
checkpoint["model"],
c2_conversion=checkpoint.get("__author__", None) == "Caffe2",
)
# for non-caffe2 models, use standard ways to load it
incompatible = super()._load_model(checkpoint)
model_buffers = dict(self.model.named_buffers(recurse=False))
for k in ["pixel_mean", "pixel_std"]:
# Ignore missing key message about pixel_mean/std.
# Though they may be missing in old checkpoints, they will be correctly
# initialized from config anyway.
if k in model_buffers:
try:
incompatible.missing_keys.remove(k)
except ValueError:
pass
for k in incompatible.unexpected_keys[:]:
# Ignore unexpected keys about cell anchors. They exist in old checkpoints
# but now they are non-persistent buffers and will not be in new checkpoints.
if "anchor_generator.cell_anchors" in k:
incompatible.unexpected_keys.remove(k)
return incompatible
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment