Commit c732df65 authored by limm's avatar limm
Browse files

push v0.1.3 version commit bd2ea47

parent 5b3792fc
Pipeline #706 failed with stages
in 0 seconds
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import logging
import numpy as np
import cv2
import torch
Image = np.ndarray
Boxes = torch.Tensor
class MatrixVisualizer(object):
"""
Base visualizer for matrix data
"""
def __init__(
self,
inplace=True,
cmap=cv2.COLORMAP_PARULA,
val_scale=1.0,
alpha=0.7,
interp_method_matrix=cv2.INTER_LINEAR,
interp_method_mask=cv2.INTER_NEAREST,
):
self.inplace = inplace
self.cmap = cmap
self.val_scale = val_scale
self.alpha = alpha
self.interp_method_matrix = interp_method_matrix
self.interp_method_mask = interp_method_mask
def visualize(self, image_bgr, mask, matrix, bbox_xywh):
self._check_image(image_bgr)
self._check_mask_matrix(mask, matrix)
if self.inplace:
image_target_bgr = image_bgr
else:
image_target_bgr = image_bgr * 0
x, y, w, h = [int(v) for v in bbox_xywh]
if w <= 0 or h <= 0:
return image_bgr
mask, matrix = self._resize(mask, matrix, w, h)
mask_bg = np.tile((mask == 0)[:, :, np.newaxis], [1, 1, 3])
matrix_scaled = matrix.astype(np.float32) * self.val_scale
_EPSILON = 1e-6
if np.any(matrix_scaled > 255 + _EPSILON):
logger = logging.getLogger(__name__)
logger.warning(
f"Matrix has values > {255 + _EPSILON} after " f"scaling, clipping to [0..255]"
)
matrix_scaled_8u = matrix_scaled.clip(0, 255).astype(np.uint8)
matrix_vis = cv2.applyColorMap(matrix_scaled_8u, self.cmap)
matrix_vis[mask_bg] = image_target_bgr[y : y + h, x : x + w, :][mask_bg]
image_target_bgr[y : y + h, x : x + w, :] = (
image_target_bgr[y : y + h, x : x + w, :] * (1.0 - self.alpha) + matrix_vis * self.alpha
)
return image_target_bgr.astype(np.uint8)
def _resize(self, mask, matrix, w, h):
if (w != mask.shape[1]) or (h != mask.shape[0]):
mask = cv2.resize(mask, (w, h), self.interp_method_mask)
if (w != matrix.shape[1]) or (h != matrix.shape[0]):
matrix = cv2.resize(matrix, (w, h), self.interp_method_matrix)
return mask, matrix
def _check_image(self, image_rgb):
assert len(image_rgb.shape) == 3
assert image_rgb.shape[2] == 3
assert image_rgb.dtype == np.uint8
def _check_mask_matrix(self, mask, matrix):
assert len(matrix.shape) == 2
assert len(mask.shape) == 2
assert mask.dtype == np.uint8
class RectangleVisualizer(object):
_COLOR_GREEN = (18, 127, 15)
def __init__(self, color=_COLOR_GREEN, thickness=1):
self.color = color
self.thickness = thickness
def visualize(self, image_bgr, bbox_xywh, color=None, thickness=None):
x, y, w, h = bbox_xywh
color = color or self.color
thickness = thickness or self.thickness
cv2.rectangle(image_bgr, (int(x), int(y)), (int(x + w), int(y + h)), color, thickness)
return image_bgr
class PointsVisualizer(object):
_COLOR_GREEN = (18, 127, 15)
def __init__(self, color_bgr=_COLOR_GREEN, r=5):
self.color_bgr = color_bgr
self.r = r
def visualize(self, image_bgr, pts_xy, colors_bgr=None, rs=None):
for j, pt_xy in enumerate(pts_xy):
x, y = pt_xy
color_bgr = colors_bgr[j] if colors_bgr is not None else self.color_bgr
r = rs[j] if rs is not None else self.r
cv2.circle(image_bgr, (x, y), r, color_bgr, -1)
return image_bgr
class TextVisualizer(object):
_COLOR_GRAY = (218, 227, 218)
_COLOR_WHITE = (255, 255, 255)
def __init__(
self,
font_face=cv2.FONT_HERSHEY_SIMPLEX,
font_color_bgr=_COLOR_GRAY,
font_scale=0.35,
font_line_type=cv2.LINE_AA,
font_line_thickness=1,
fill_color_bgr=_COLOR_WHITE,
fill_color_transparency=1.0,
frame_color_bgr=_COLOR_WHITE,
frame_color_transparency=1.0,
frame_thickness=1,
):
self.font_face = font_face
self.font_color_bgr = font_color_bgr
self.font_scale = font_scale
self.font_line_type = font_line_type
self.font_line_thickness = font_line_thickness
self.fill_color_bgr = fill_color_bgr
self.fill_color_transparency = fill_color_transparency
self.frame_color_bgr = frame_color_bgr
self.frame_color_transparency = frame_color_transparency
self.frame_thickness = frame_thickness
def visualize(self, image_bgr, txt, topleft_xy):
txt_w, txt_h = self.get_text_size_wh(txt)
topleft_xy = tuple(map(int, topleft_xy))
x, y = topleft_xy
if self.frame_color_transparency < 1.0:
t = self.frame_thickness
image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :] = (
image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :]
* self.frame_color_transparency
+ np.array(self.frame_color_bgr) * (1.0 - self.frame_color_transparency)
).astype(np.float)
if self.fill_color_transparency < 1.0:
image_bgr[y : y + txt_h, x : x + txt_w, :] = (
image_bgr[y : y + txt_h, x : x + txt_w, :] * self.fill_color_transparency
+ np.array(self.fill_color_bgr) * (1.0 - self.fill_color_transparency)
).astype(np.float)
cv2.putText(
image_bgr,
txt,
topleft_xy,
self.font_face,
self.font_scale,
self.font_color_bgr,
self.font_line_thickness,
self.font_line_type,
)
return image_bgr
def get_text_size_wh(self, txt):
((txt_w, txt_h), _) = cv2.getTextSize(
txt, self.font_face, self.font_scale, self.font_line_thickness
)
return txt_w, txt_h
class CompoundVisualizer(object):
def __init__(self, visualizers):
self.visualizers = visualizers
def visualize(self, image_bgr, data):
assert len(data) == len(
self.visualizers
), "The number of datas {} should match the number of visualizers" " {}".format(
len(data), len(self.visualizers)
)
image = image_bgr
for i, visualizer in enumerate(self.visualizers):
image = visualizer.visualize(image, data[i])
return image
def __str__(self):
visualizer_str = ", ".join([str(v) for v in self.visualizers])
return "Compound Visualizer [{}]".format(visualizer_str)
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from .base import RectangleVisualizer, TextVisualizer
class BoundingBoxVisualizer(object):
def __init__(self):
self.rectangle_visualizer = RectangleVisualizer()
def visualize(self, image_bgr, boxes_xywh):
for bbox_xywh in boxes_xywh:
image_bgr = self.rectangle_visualizer.visualize(image_bgr, bbox_xywh)
return image_bgr
class ScoredBoundingBoxVisualizer(object):
def __init__(self, bbox_visualizer_params=None, score_visualizer_params=None):
if bbox_visualizer_params is None:
bbox_visualizer_params = {}
if score_visualizer_params is None:
score_visualizer_params = {}
self.visualizer_bbox = RectangleVisualizer(**bbox_visualizer_params)
self.visualizer_score = TextVisualizer(**score_visualizer_params)
def visualize(self, image_bgr, scored_bboxes):
boxes_xywh, box_scores = scored_bboxes
assert len(boxes_xywh) == len(
box_scores
), "Number of bounding boxes {} should be equal to the number of scores {}".format(
len(boxes_xywh), len(box_scores)
)
for i, box_xywh in enumerate(boxes_xywh):
score_i = box_scores[i]
image_bgr = self.visualizer_bbox.visualize(image_bgr, box_xywh)
score_txt = "{0:6.4f}".format(score_i)
topleft_xy = box_xywh[0], box_xywh[1]
image_bgr = self.visualizer_score.visualize(image_bgr, score_txt, topleft_xy)
return image_bgr
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import logging
import numpy as np
from typing import Iterable, Optional, Tuple
import cv2
from ..data.structures import DensePoseDataRelative, DensePoseOutput, DensePoseResult
from .base import Boxes, Image, MatrixVisualizer, PointsVisualizer
class DensePoseResultsVisualizer(object):
def visualize(self, image_bgr: Image, densepose_result: Optional[DensePoseResult]) -> Image:
if densepose_result is None:
return image_bgr
context = self.create_visualization_context(image_bgr)
for i, result_encoded_w_shape in enumerate(densepose_result.results):
iuv_arr = DensePoseResult.decode_png_data(*result_encoded_w_shape)
bbox_xywh = densepose_result.boxes_xywh[i]
self.visualize_iuv_arr(context, iuv_arr, bbox_xywh)
image_bgr = self.context_to_image_bgr(context)
return image_bgr
class DensePoseMaskedColormapResultsVisualizer(DensePoseResultsVisualizer):
def __init__(
self,
data_extractor,
segm_extractor,
inplace=True,
cmap=cv2.COLORMAP_PARULA,
alpha=0.7,
val_scale=1.0,
):
self.mask_visualizer = MatrixVisualizer(
inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha
)
self.data_extractor = data_extractor
self.segm_extractor = segm_extractor
def create_visualization_context(self, image_bgr: Image):
return image_bgr
def context_to_image_bgr(self, context):
return context
def get_image_bgr_from_context(self, context):
return context
def visualize_iuv_arr(self, context, iuv_arr, bbox_xywh):
image_bgr = self.get_image_bgr_from_context(context)
matrix = self.data_extractor(iuv_arr)
segm = self.segm_extractor(iuv_arr)
mask = np.zeros(matrix.shape, dtype=np.uint8)
mask[segm > 0] = 1
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)
return image_bgr
def _extract_i_from_iuvarr(iuv_arr):
return iuv_arr[0, :, :]
def _extract_u_from_iuvarr(iuv_arr):
return iuv_arr[1, :, :]
def _extract_v_from_iuvarr(iuv_arr):
return iuv_arr[2, :, :]
class DensePoseResultsMplContourVisualizer(DensePoseResultsVisualizer):
def __init__(self, levels=10, **kwargs):
self.levels = levels
self.plot_args = kwargs
def create_visualization_context(self, image_bgr: Image):
import matplotlib.pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
context = {}
context["image_bgr"] = image_bgr
dpi = 100
height_inches = float(image_bgr.shape[0]) / dpi
width_inches = float(image_bgr.shape[1]) / dpi
fig = plt.figure(figsize=(width_inches, height_inches), dpi=dpi)
plt.axes([0, 0, 1, 1])
plt.axis("off")
context["fig"] = fig
canvas = FigureCanvas(fig)
context["canvas"] = canvas
extent = (0, image_bgr.shape[1], image_bgr.shape[0], 0)
plt.imshow(image_bgr[:, :, ::-1], extent=extent)
return context
def context_to_image_bgr(self, context):
fig = context["fig"]
w, h = map(int, fig.get_size_inches() * fig.get_dpi())
canvas = context["canvas"]
canvas.draw()
image_1d = np.fromstring(canvas.tostring_rgb(), dtype="uint8")
image_rgb = image_1d.reshape(h, w, 3)
image_bgr = image_rgb[:, :, ::-1].copy()
return image_bgr
def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> Image:
import matplotlib.pyplot as plt
u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
extent = (
bbox_xywh[0],
bbox_xywh[0] + bbox_xywh[2],
bbox_xywh[1],
bbox_xywh[1] + bbox_xywh[3],
)
plt.contour(u, self.levels, extent=extent, **self.plot_args)
plt.contour(v, self.levels, extent=extent, **self.plot_args)
class DensePoseResultsCustomContourVisualizer(DensePoseResultsVisualizer):
"""
Contour visualization using marching squares
"""
def __init__(self, levels=10, **kwargs):
# TODO: colormap is hardcoded
cmap = cv2.COLORMAP_PARULA
if isinstance(levels, int):
self.levels = np.linspace(0, 1, levels)
else:
self.levels = levels
if "linewidths" in kwargs:
self.linewidths = kwargs["linewidths"]
else:
self.linewidths = [1] * len(self.levels)
self.plot_args = kwargs
img_colors_bgr = cv2.applyColorMap((self.levels * 255).astype(np.uint8), cmap)
self.level_colors_bgr = [
[int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
]
def create_visualization_context(self, image_bgr: Image):
return image_bgr
def context_to_image_bgr(self, context):
return context
def get_image_bgr_from_context(self, context):
return context
def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> Image:
image_bgr = self.get_image_bgr_from_context(context)
segm = _extract_i_from_iuvarr(iuv_arr)
u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
self._contours(image_bgr, u, segm, bbox_xywh)
self._contours(image_bgr, v, segm, bbox_xywh)
def _contours(self, image_bgr, arr, segm, bbox_xywh):
for part_idx in range(1, DensePoseDataRelative.N_PART_LABELS + 1):
mask = segm == part_idx
if not np.any(mask):
continue
arr_min = np.amin(arr[mask])
arr_max = np.amax(arr[mask])
I, J = np.nonzero(mask)
i0 = np.amin(I)
i1 = np.amax(I) + 1
j0 = np.amin(J)
j1 = np.amax(J) + 1
if (j1 == j0 + 1) or (i1 == i0 + 1):
continue
Nw = arr.shape[1] - 1
Nh = arr.shape[0] - 1
for level_idx, level in enumerate(self.levels):
if (level < arr_min) or (level > arr_max):
continue
vp = arr[i0:i1, j0:j1] >= level
bin_codes = vp[:-1, :-1] + vp[1:, :-1] * 2 + vp[1:, 1:] * 4 + vp[:-1, 1:] * 8
mp = mask[i0:i1, j0:j1]
bin_mask_codes = mp[:-1, :-1] + mp[1:, :-1] * 2 + mp[1:, 1:] * 4 + mp[:-1, 1:] * 8
it = np.nditer(bin_codes, flags=["multi_index"])
color_bgr = self.level_colors_bgr[level_idx]
linewidth = self.linewidths[level_idx]
while not it.finished:
if (it[0] != 0) and (it[0] != 15):
i, j = it.multi_index
if bin_mask_codes[i, j] != 0:
self._draw_line(
image_bgr,
arr,
mask,
level,
color_bgr,
linewidth,
it[0],
it.multi_index,
bbox_xywh,
Nw,
Nh,
(i0, j0),
)
it.iternext()
def _draw_line(
self,
image_bgr,
arr,
mask,
v,
color_bgr,
linewidth,
bin_code,
multi_idx,
bbox_xywh,
Nw,
Nh,
offset,
):
lines = self._bin_code_2_lines(arr, v, bin_code, multi_idx, Nw, Nh, offset)
x0, y0, w, h = bbox_xywh
x1 = x0 + w
y1 = y0 + h
for line in lines:
x0r, y0r = line[0]
x1r, y1r = line[1]
pt0 = (int(x0 + x0r * (x1 - x0)), int(y0 + y0r * (y1 - y0)))
pt1 = (int(x0 + x1r * (x1 - x0)), int(y0 + y1r * (y1 - y0)))
cv2.line(image_bgr, pt0, pt1, color_bgr, linewidth)
def _bin_code_2_lines(self, arr, v, bin_code, multi_idx, Nw, Nh, offset):
i0, j0 = offset
i, j = multi_idx
i += i0
j += j0
v0, v1, v2, v3 = arr[i, j], arr[i + 1, j], arr[i + 1, j + 1], arr[i, j + 1]
x0i = float(j) / Nw
y0j = float(i) / Nh
He = 1.0 / Nh
We = 1.0 / Nw
if (bin_code == 1) or (bin_code == 14):
a = (v - v0) / (v1 - v0)
b = (v - v0) / (v3 - v0)
pt1 = (x0i, y0j + a * He)
pt2 = (x0i + b * We, y0j)
return [(pt1, pt2)]
elif (bin_code == 2) or (bin_code == 13):
a = (v - v0) / (v1 - v0)
b = (v - v1) / (v2 - v1)
pt1 = (x0i, y0j + a * He)
pt2 = (x0i + b * We, y0j + He)
return [(pt1, pt2)]
elif (bin_code == 3) or (bin_code == 12):
a = (v - v0) / (v3 - v0)
b = (v - v1) / (v2 - v1)
pt1 = (x0i + a * We, y0j)
pt2 = (x0i + b * We, y0j + He)
return [(pt1, pt2)]
elif (bin_code == 4) or (bin_code == 11):
a = (v - v1) / (v2 - v1)
b = (v - v3) / (v2 - v3)
pt1 = (x0i + a * We, y0j + He)
pt2 = (x0i + We, y0j + b * He)
return [(pt1, pt2)]
elif (bin_code == 6) or (bin_code == 9):
a = (v - v0) / (v1 - v0)
b = (v - v3) / (v2 - v3)
pt1 = (x0i, y0j + a * He)
pt2 = (x0i + We, y0j + b * He)
return [(pt1, pt2)]
elif (bin_code == 7) or (bin_code == 8):
a = (v - v0) / (v3 - v0)
b = (v - v3) / (v2 - v3)
pt1 = (x0i + a * We, y0j)
pt2 = (x0i + We, y0j + b * He)
return [(pt1, pt2)]
elif bin_code == 5:
a1 = (v - v0) / (v1 - v0)
b1 = (v - v1) / (v2 - v1)
pt11 = (x0i, y0j + a1 * He)
pt12 = (x0i + b1 * We, y0j + He)
a2 = (v - v0) / (v3 - v0)
b2 = (v - v3) / (v2 - v3)
pt21 = (x0i + a2 * We, y0j)
pt22 = (x0i + We, y0j + b2 * He)
return [(pt11, pt12), (pt21, pt22)]
elif bin_code == 10:
a1 = (v - v0) / (v3 - v0)
b1 = (v - v0) / (v1 - v0)
pt11 = (x0i + a1 * We, y0j)
pt12 = (x0i, y0j + b1 * He)
a2 = (v - v1) / (v2 - v1)
b2 = (v - v3) / (v2 - v3)
pt21 = (x0i + a2 * We, y0j + He)
pt22 = (x0i + We, y0j + b2 * He)
return [(pt11, pt12), (pt21, pt22)]
return []
try:
import matplotlib
matplotlib.use("Agg")
DensePoseResultsContourVisualizer = DensePoseResultsMplContourVisualizer
except ModuleNotFoundError:
logger = logging.getLogger(__name__)
logger.warning("Could not import matplotlib, using custom contour visualizer")
DensePoseResultsContourVisualizer = DensePoseResultsCustomContourVisualizer
class DensePoseResultsFineSegmentationVisualizer(DensePoseMaskedColormapResultsVisualizer):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
super(DensePoseResultsFineSegmentationVisualizer, self).__init__(
_extract_i_from_iuvarr,
_extract_i_from_iuvarr,
inplace,
cmap,
alpha,
val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS,
)
class DensePoseResultsUVisualizer(DensePoseMaskedColormapResultsVisualizer):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
super(DensePoseResultsUVisualizer, self).__init__(
_extract_u_from_iuvarr, _extract_i_from_iuvarr, inplace, cmap, alpha, val_scale=1.0
)
class DensePoseResultsVVisualizer(DensePoseMaskedColormapResultsVisualizer):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
super(DensePoseResultsVVisualizer, self).__init__(
_extract_v_from_iuvarr, _extract_i_from_iuvarr, inplace, cmap, alpha, val_scale=1.0
)
class DensePoseOutputsFineSegmentationVisualizer(object):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
self.mask_visualizer = MatrixVisualizer(
inplace=inplace,
cmap=cmap,
val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS,
alpha=alpha,
)
def visualize(
self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
) -> Image:
if dp_output_with_bboxes is None:
return image_bgr
densepose_output, bboxes_xywh = dp_output_with_bboxes
S = densepose_output.S
I = densepose_output.I # noqa
U = densepose_output.U
V = densepose_output.V
N = S.size(0)
assert N == I.size(
0
), "densepose outputs S {} and I {}" " should have equal first dim size".format(
S.size(), I.size()
)
assert N == U.size(
0
), "densepose outputs S {} and U {}" " should have equal first dim size".format(
S.size(), U.size()
)
assert N == V.size(
0
), "densepose outputs S {} and V {}" " should have equal first dim size".format(
S.size(), V.size()
)
assert N == len(
bboxes_xywh
), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
len(bboxes_xywh), N
)
for n in range(N):
Sn = S[n].argmax(dim=0)
In = I[n].argmax(dim=0) * (Sn > 0).long()
matrix = In.cpu().numpy().astype(np.uint8)
mask = np.zeros(matrix.shape, dtype=np.uint8)
mask[matrix > 0] = 1
bbox_xywh = bboxes_xywh[n]
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)
return image_bgr
class DensePoseOutputsUVisualizer(object):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
self.mask_visualizer = MatrixVisualizer(
inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha
)
def visualize(
self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
) -> Image:
if dp_output_with_bboxes is None:
return image_bgr
densepose_output, bboxes_xywh = dp_output_with_bboxes
assert isinstance(
densepose_output, DensePoseOutput
), "DensePoseOutput expected, {} encountered".format(type(densepose_output))
S = densepose_output.S
I = densepose_output.I # noqa
U = densepose_output.U
V = densepose_output.V
N = S.size(0)
assert N == I.size(
0
), "densepose outputs S {} and I {}" " should have equal first dim size".format(
S.size(), I.size()
)
assert N == U.size(
0
), "densepose outputs S {} and U {}" " should have equal first dim size".format(
S.size(), U.size()
)
assert N == V.size(
0
), "densepose outputs S {} and V {}" " should have equal first dim size".format(
S.size(), V.size()
)
assert N == len(
bboxes_xywh
), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
len(bboxes_xywh), N
)
for n in range(N):
Sn = S[n].argmax(dim=0)
In = I[n].argmax(dim=0) * (Sn > 0).long()
segmentation = In.cpu().numpy().astype(np.uint8)
mask = np.zeros(segmentation.shape, dtype=np.uint8)
mask[segmentation > 0] = 1
Un = U[n].cpu().numpy().astype(np.float32)
Uvis = np.zeros(segmentation.shape, dtype=np.float32)
for partId in range(Un.shape[0]):
Uvis[segmentation == partId] = Un[partId][segmentation == partId].clip(0, 1) * 255
bbox_xywh = bboxes_xywh[n]
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, Uvis, bbox_xywh)
return image_bgr
class DensePoseOutputsVVisualizer(object):
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
self.mask_visualizer = MatrixVisualizer(
inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha
)
def visualize(
self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
) -> Image:
if dp_output_with_bboxes is None:
return image_bgr
densepose_output, bboxes_xywh = dp_output_with_bboxes
assert isinstance(
densepose_output, DensePoseOutput
), "DensePoseOutput expected, {} encountered".format(type(densepose_output))
S = densepose_output.S
I = densepose_output.I # noqa
U = densepose_output.U
V = densepose_output.V
N = S.size(0)
assert N == I.size(
0
), "densepose outputs S {} and I {}" " should have equal first dim size".format(
S.size(), I.size()
)
assert N == U.size(
0
), "densepose outputs S {} and U {}" " should have equal first dim size".format(
S.size(), U.size()
)
assert N == V.size(
0
), "densepose outputs S {} and V {}" " should have equal first dim size".format(
S.size(), V.size()
)
assert N == len(
bboxes_xywh
), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
len(bboxes_xywh), N
)
for n in range(N):
Sn = S[n].argmax(dim=0)
In = I[n].argmax(dim=0) * (Sn > 0).long()
segmentation = In.cpu().numpy().astype(np.uint8)
mask = np.zeros(segmentation.shape, dtype=np.uint8)
mask[segmentation > 0] = 1
Vn = V[n].cpu().numpy().astype(np.float32)
Vvis = np.zeros(segmentation.shape, dtype=np.float32)
for partId in range(Vn.size(0)):
Vvis[segmentation == partId] = Vn[partId][segmentation == partId].clip(0, 1) * 255
bbox_xywh = bboxes_xywh[n]
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, Vvis, bbox_xywh)
return image_bgr
class DensePoseDataCoarseSegmentationVisualizer(object):
"""
Visualizer for ground truth segmentation
"""
def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
self.mask_visualizer = MatrixVisualizer(
inplace=inplace,
cmap=cmap,
val_scale=255.0 / DensePoseDataRelative.N_BODY_PARTS,
alpha=alpha,
)
def visualize(
self,
image_bgr: Image,
bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]],
) -> Image:
if bbox_densepose_datas is None:
return image_bgr
for bbox_xywh, densepose_data in zip(*bbox_densepose_datas):
matrix = densepose_data.segm.numpy()
mask = np.zeros(matrix.shape, dtype=np.uint8)
mask[matrix > 0] = 1
image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh.numpy())
return image_bgr
class DensePoseDataPointsVisualizer(object):
def __init__(self, densepose_data_to_value_fn=None, cmap=cv2.COLORMAP_PARULA):
self.points_visualizer = PointsVisualizer()
self.densepose_data_to_value_fn = densepose_data_to_value_fn
self.cmap = cmap
def visualize(
self,
image_bgr: Image,
bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]],
) -> Image:
if bbox_densepose_datas is None:
return image_bgr
for bbox_xywh, densepose_data in zip(*bbox_densepose_datas):
x0, y0, w, h = bbox_xywh.numpy()
x = densepose_data.x.numpy() * w / 255.0 + x0
y = densepose_data.y.numpy() * h / 255.0 + y0
pts_xy = zip(x, y)
if self.densepose_data_to_value_fn is None:
image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy)
else:
v = self.densepose_data_to_value_fn(densepose_data)
img_colors_bgr = cv2.applyColorMap(v, self.cmap)
colors_bgr = [
[int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
]
image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy, colors_bgr)
return image_bgr
def _densepose_data_u_for_cmap(densepose_data):
u = np.clip(densepose_data.u.numpy(), 0, 1) * 255.0
return u.astype(np.uint8)
def _densepose_data_v_for_cmap(densepose_data):
v = np.clip(densepose_data.v.numpy(), 0, 1) * 255.0
return v.astype(np.uint8)
def _densepose_data_i_for_cmap(densepose_data):
i = (
np.clip(densepose_data.i.numpy(), 0.0, DensePoseDataRelative.N_PART_LABELS)
* 255.0
/ DensePoseDataRelative.N_PART_LABELS
)
return i.astype(np.uint8)
class DensePoseDataPointsUVisualizer(DensePoseDataPointsVisualizer):
def __init__(self):
super(DensePoseDataPointsUVisualizer, self).__init__(
densepose_data_to_value_fn=_densepose_data_u_for_cmap
)
class DensePoseDataPointsVVisualizer(DensePoseDataPointsVisualizer):
def __init__(self):
super(DensePoseDataPointsVVisualizer, self).__init__(
densepose_data_to_value_fn=_densepose_data_v_for_cmap
)
class DensePoseDataPointsIVisualizer(DensePoseDataPointsVisualizer):
def __init__(self):
super(DensePoseDataPointsIVisualizer, self).__init__(
densepose_data_to_value_fn=_densepose_data_i_for_cmap
)
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import logging
from typing import Sequence
import torch
from detectron2.layers.nms import batched_nms
from detectron2.structures.instances import Instances
from densepose.vis.bounding_box import BoundingBoxVisualizer, ScoredBoundingBoxVisualizer
from densepose.vis.densepose import DensePoseResultsVisualizer
from .base import CompoundVisualizer
Scores = Sequence[float]
def extract_scores_from_instances(instances: Instances, select=None):
if instances.has("scores"):
return instances.scores if select is None else instances.scores[select]
return None
def extract_boxes_xywh_from_instances(instances: Instances, select=None):
if instances.has("pred_boxes"):
boxes_xywh = instances.pred_boxes.tensor.clone()
boxes_xywh[:, 2] -= boxes_xywh[:, 0]
boxes_xywh[:, 3] -= boxes_xywh[:, 1]
return boxes_xywh if select is None else boxes_xywh[select]
return None
def create_extractor(visualizer: object):
"""
Create an extractor for the provided visualizer
"""
if isinstance(visualizer, CompoundVisualizer):
extractors = [create_extractor(v) for v in visualizer.visualizers]
return CompoundExtractor(extractors)
elif isinstance(visualizer, DensePoseResultsVisualizer):
return DensePoseResultExtractor()
elif isinstance(visualizer, ScoredBoundingBoxVisualizer):
return CompoundExtractor([extract_boxes_xywh_from_instances, extract_scores_from_instances])
elif isinstance(visualizer, BoundingBoxVisualizer):
return extract_boxes_xywh_from_instances
else:
logger = logging.getLogger(__name__)
logger.error(f"Could not create extractor for {visualizer}")
return None
class BoundingBoxExtractor(object):
"""
Extracts bounding boxes from instances
"""
def __call__(self, instances: Instances):
boxes_xywh = extract_boxes_xywh_from_instances(instances)
return boxes_xywh
class ScoredBoundingBoxExtractor(object):
"""
Extracts bounding boxes from instances
"""
def __call__(self, instances: Instances, select=None):
scores = extract_scores_from_instances(instances)
boxes_xywh = extract_boxes_xywh_from_instances(instances)
if (scores is None) or (boxes_xywh is None):
return (boxes_xywh, scores)
if select is not None:
scores = scores[select]
boxes_xywh = boxes_xywh[select]
return (boxes_xywh, scores)
class DensePoseResultExtractor(object):
"""
Extracts DensePose result from instances
"""
def __call__(self, instances: Instances, select=None):
boxes_xywh = extract_boxes_xywh_from_instances(instances)
if instances.has("pred_densepose") and (boxes_xywh is not None):
dpout = instances.pred_densepose
if select is not None:
dpout = dpout[select]
boxes_xywh = boxes_xywh[select]
return dpout.to_result(boxes_xywh)
else:
return None
class CompoundExtractor(object):
"""
Extracts data for CompoundVisualizer
"""
def __init__(self, extractors):
self.extractors = extractors
def __call__(self, instances: Instances, select=None):
datas = []
for extractor in self.extractors:
data = extractor(instances, select)
datas.append(data)
return datas
class NmsFilteredExtractor(object):
"""
Extracts data in the format accepted by NmsFilteredVisualizer
"""
def __init__(self, extractor, iou_threshold):
self.extractor = extractor
self.iou_threshold = iou_threshold
def __call__(self, instances: Instances, select=None):
scores = extract_scores_from_instances(instances)
boxes_xywh = extract_boxes_xywh_from_instances(instances)
if boxes_xywh is None:
return None
select_local_idx = batched_nms(
boxes_xywh,
scores,
torch.zeros(len(scores), dtype=torch.int32),
iou_threshold=self.iou_threshold,
).squeeze()
select_local = torch.zeros(len(boxes_xywh), dtype=torch.bool, device=boxes_xywh.device)
select_local[select_local_idx] = True
select = select_local if select is None else (select & select_local)
return self.extractor(instances, select=select)
class ScoreThresholdedExtractor(object):
"""
Extracts data in the format accepted by ScoreThresholdedVisualizer
"""
def __init__(self, extractor, min_score):
self.extractor = extractor
self.min_score = min_score
def __call__(self, instances: Instances, select=None):
scores = extract_scores_from_instances(instances)
if scores is None:
return None
select_local = scores > self.min_score
select = select_local if select is None else (select & select_local)
data = self.extractor(instances, select=select)
return data
## Some scripts for developers to use, include:
- `run_instant_tests.sh`: run training for a few iterations.
- `run_inference_tests.sh`: run inference on a small dataset.
- `../../dev/linter.sh`: lint the codebase before commit
- `../../dev/parse_results.sh`: parse results from log file.
#!/bin/bash -e
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
BIN="python train_net.py"
OUTPUT="inference_test_output"
NUM_GPUS=2
IMS_PER_GPU=2
IMS_PER_BATCH=$(( NUM_GPUS * IMS_PER_GPU ))
CFG_LIST=( "${@:1}" )
if [ ${#CFG_LIST[@]} -eq 0 ]; then
CFG_LIST=( ./configs/quick_schedules/*inference_acc_test.yaml )
fi
echo "========================================================================"
echo "Configs to run:"
echo "${CFG_LIST[@]}"
echo "========================================================================"
for cfg in "${CFG_LIST[@]}"; do
echo "========================================================================"
echo "Running $cfg ..."
echo "========================================================================"
$BIN \
--eval-only \
--num-gpus $NUM_GPUS \
--config-file "$cfg" \
OUTPUT_DIR "$OUTPUT" \
SOLVER.IMS_PER_BATCH $IMS_PER_BATCH
rm -rf $OUTPUT
done
#!/bin/bash -e
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
BIN="python train_net.py"
OUTPUT="instant_test_output"
NUM_GPUS=2
SOLVER_IMS_PER_BATCH=$((NUM_GPUS * 2))
CFG_LIST=( "${@:1}" )
if [ ${#CFG_LIST[@]} -eq 0 ]; then
CFG_LIST=( ./configs/quick_schedules/*instant_test.yaml )
fi
echo "========================================================================"
echo "Configs to run:"
echo "${CFG_LIST[@]}"
echo "========================================================================"
for cfg in "${CFG_LIST[@]}"; do
echo "========================================================================"
echo "Running $cfg ..."
echo "========================================================================"
$BIN --num-gpus $NUM_GPUS --config-file "$cfg" \
SOLVER.IMS_PER_BATCH $SOLVER_IMS_PER_BATCH \
OUTPUT_DIR "$OUTPUT"
rm -rf "$OUTPUT"
done
# Getting Started with DensePose
## Inference with Pre-trained Models
1. Pick a model and its config file from [Model Zoo](MODEL_ZOO.md), for example [densepose_rcnn_R_50_FPN_s1x.yaml](../configs/densepose_rcnn_R_50_FPN_s1x.yaml)
2. Run the [Apply Net](TOOL_APPLY_NET.md) tool to visualize the results or save the to disk. For example, to use contour visualization for DensePose, one can run:
```bash
python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml densepose_rcnn_R_50_FPN_s1x.pkl image.jpg dp_contour,bbox --output image_densepose_contour.png
```
Please see [Apply Net](TOOL_APPLY_NET.md) for more details on the tool.
## Training
First, prepare the [dataset](http://densepose.org/#dataset) into the following structure under the directory you'll run training scripts:
<pre>
datasets/coco/
annotations/
densepose_{train,minival,valminusminival}2014.json
<a href="https://dl.fbaipublicfiles.com/detectron2/densepose/densepose_minival2014_100.json">densepose_minival2014_100.json </a> (optional, for testing only)
{train,val}2014/
# image files that are mentioned in the corresponding json
</pre>
To train a model one can use the [train_net.py](../train_net.py) script.
This script was used to train all DensePose models in [Model Zoo](MODEL_ZOO.md).
For example, to launch end-to-end DensePose-RCNN training with ResNet-50 FPN backbone
on 8 GPUs following the s1x schedule, one can run
```bash
python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml --num-gpus 8
```
The configs are made for 8-GPU training. To train on 1 GPU, one can apply the
[linear learning rate scaling rule](https://arxiv.org/abs/1706.02677):
```bash
python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml \
SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025
```
## Evaluation
Model testing can be done in the same way as training, except for an additional flag `--eval-only` and
model location specification through `MODEL.WEIGHTS model.pth` in the command line
```bash
python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml \
--eval-only MODEL.WEIGHTS model.pth
```
## Tools
We provide tools which allow one to:
- easily view DensePose annotated data in a dataset;
- perform DensePose inference on a set of images;
- visualize DensePose model results;
`query_db` is a tool to print or visualize DensePose data in a dataset.
Please refer to [Query DB](TOOL_QUERY_DB.md) for more details on this tool
`apply_net` is a tool to print or visualize DensePose results.
Please refer to [Apply Net](TOOL_APPLY_NET.md) for more details on this tool
# Model Zoo and Baselines
# Introduction
We provide baselines trained with Detectron2 DensePose. The corresponding
configuration files can be found in the [configs](../configs) directory.
All models were trained on COCO `train2014` + `valminusminival2014` and
evaluated on COCO `minival2014`. For the details on common settings in which
baselines were trained, please check [Detectron 2 Model Zoo](../../../MODEL_ZOO.md).
## License
All models available for download through this document are licensed under the
[Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/)
## COCO DensePose Baselines with DensePose-RCNN
### Legacy Models
Baselines trained using schedules from [Güler et al, 2018](https://arxiv.org/pdf/1802.00434.pdf)
<table><tbody>
<!-- START TABLE -->
<!-- TABLE HEADER -->
<th valign="bottom">Name</th>
<th valign="bottom">lr<br/>sched</th>
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
<th valign="bottom">train<br/>mem<br/>(GB)</th>
<th valign="bottom">box<br/>AP</th>
<th valign="bottom">dp. AP<br/>GPS</th>
<th valign="bottom">dp. AP<br/>GPSm</th>
<th valign="bottom">model id</th>
<th valign="bottom">download</th>
<!-- TABLE BODY -->
<!-- ROW: densepose_rcnn_R_50_FPN_s1x_legacy -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml">R_50_FPN_s1x_legacy</a></td>
<td align="center">s1x</td>
<td align="center">0.307</td>
<td align="center">0.051</td>
<td align="center">3.2</td>
<td align="center">58.1</td>
<td align="center">52.1</td>
<td align="center">54.9</td>
<td align="center">164832157</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x_legacy/164832157/model_final_d366fa.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x_legacy/164832157/metrics.json">metrics</a></td>
</tr>
<!-- ROW: densepose_rcnn_R_101_FPN_s1x_legacy -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml">R_101_FPN_s1x_legacy</a></td>
<td align="center">s1x</td>
<td align="center">0.390</td>
<td align="center">0.063</td>
<td align="center">4.3</td>
<td align="center">59.5</td>
<td align="center">53.2</td>
<td align="center">56.1</td>
<td align="center">164832182</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x_legacy/164832182/model_final_10af0e.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x_legacy/164832182/metrics.json">metrics</a></td>
</tr>
</tbody></table>
### Improved Baselines, Original Fully Convolutional Haad
These models use an improved training schedule and Panoptic FPN head from [Kirillov et al, 2019](https://arxiv.org/abs/1901.02446).
<table><tbody>
<!-- START TABLE -->
<!-- TABLE HEADER -->
<th valign="bottom">Name</th>
<th valign="bottom">lr<br/>sched</th>
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
<th valign="bottom">train<br/>mem<br/>(GB)</th>
<th valign="bottom">box<br/>AP</th>
<th valign="bottom">dp. AP<br/>GPS</th>
<th valign="bottom">dp. AP<br/>GPSm</th>
<th valign="bottom">model id</th>
<th valign="bottom">download</th>
<!-- TABLE BODY -->
<!-- ROW: densepose_rcnn_R_50_FPN_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_s1x.yaml">R_50_FPN_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.359</td>
<td align="center">0.066</td>
<td align="center">4.5</td>
<td align="center">61.2</td>
<td align="center">63.7</td>
<td align="center">65.3</td>
<td align="center">165712039</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/metrics.json">metrics</a></td>
</tr>
<!-- ROW: densepose_rcnn_R_101_FPN_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_s1x.yaml">R_101_FPN_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.428</td>
<td align="center">0.079</td>
<td align="center">5.8</td>
<td align="center">62.3</td>
<td align="center">64.5</td>
<td align="center">66.4</td>
<td align="center">165712084</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x/165712084/model_final_c6ab63.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x/165712084/metrics.json">metrics</a></td>
</tr>
</tbody></table>
### Improved Baselines, DeepLabV3 Head
These models use an improved training schedule, Panoptic FPN head from [Kirillov et al, 2019](https://arxiv.org/abs/1901.02446) and DeepLabV3 head from [Chen et al, 2017](https://arxiv.org/abs/1706.05587).
<table><tbody>
<!-- START TABLE -->
<!-- TABLE HEADER -->
<th valign="bottom">Name</th>
<th valign="bottom">lr<br/>sched</th>
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
<th valign="bottom">train<br/>mem<br/>(GB)</th>
<th valign="bottom">box<br/>AP</th>
<th valign="bottom">dp. AP<br/>GPS</th>
<th valign="bottom">dp. AP<br/>GPSm</th>
<th valign="bottom">model id</th>
<th valign="bottom">download</th>
<!-- TABLE BODY -->
<!-- ROW: densepose_rcnn_R_50_FPN_DL_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml">R_50_FPN_DL_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.392</td>
<td align="center">0.070</td>
<td align="center">6.7</td>
<td align="center">61.1</td>
<td align="center">65.6</td>
<td align="center">66.8</td>
<td align="center">165712097</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_s1x/165712097/model_final_0ed407.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_s1x/165712097/metrics.json">metrics</a></td>
</tr>
<!-- ROW: densepose_rcnn_R_101_FPN_DL_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml">R_101_FPN_DL_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.478</td>
<td align="center">0.083</td>
<td align="center">7.0</td>
<td align="center">62.3</td>
<td align="center">66.3</td>
<td align="center">67.7</td>
<td align="center">165712116</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_s1x/165712116/model_final_844d15.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_s1x/165712116/metrics.json">metrics</a></td>
</tr>
</tbody></table>
### Baselines with Confidence Estimation
These models perform additional estimation of confidence in regressed UV coodrinates, along the lines of [Neverova et al., 2019](https://papers.nips.cc/paper/8378-correlated-uncertainty-for-learning-dense-correspondences-from-noisy-labels).
<table><tbody>
<!-- START TABLE -->
<!-- TABLE HEADER -->
<th valign="bottom">Name</th>
<th valign="bottom">lr<br/>sched</th>
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
<th valign="bottom">train<br/>mem<br/>(GB)</th>
<th valign="bottom">box<br/>AP</th>
<th valign="bottom">dp. AP<br/>GPS</th>
<th valign="bottom">dp. AP<br/>GPSm</th>
<th valign="bottom">model id</th>
<th valign="bottom">download</th>
<!-- TABLE BODY -->
<!-- ROW: densepose_rcnn_R_50_FPN_WC1_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml">R_50_FPN_WC1_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.353</td>
<td align="center">0.064</td>
<td align="center">4.6</td>
<td align="center">60.5</td>
<td align="center">64.2</td>
<td align="center">65.6</td>
<td align="center">173862049</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_WC1_s1x/173862049/model_final_289019.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_WC1_s1x/173862049/metrics.json">metrics</a></td>
</tr>
<!-- ROW: densepose_rcnn_R_50_FPN_WC2_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml">R_50_FPN_WC2_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.364</td>
<td align="center">0.066</td>
<td align="center">4.8</td>
<td align="center">60.7</td>
<td align="center">64.2</td>
<td align="center">65.7</td>
<td align="center">173861455</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_WC2_s1x/173861455/model_final_3abe14.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_WC2_s1x/173861455/metrics.json">metrics</a></td>
</tr>
<!-- ROW: densepose_rcnn_R_50_FPN_DL_WC1_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml">R_50_FPN_DL_WC1_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.397</td>
<td align="center">0.068</td>
<td align="center">6.7</td>
<td align="center">61.1</td>
<td align="center">65.8</td>
<td align="center">67.1</td>
<td align="center">173067973</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_WC1_s1x/173067973/model_final_b1e525.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_WC1_s1x/173067973/metrics.json">metrics</a></td>
</tr>
<!-- ROW: densepose_rcnn_R_50_FPN_DL_WC2_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml">R_50_FPN_DL_WC2_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.410</td>
<td align="center">0.070</td>
<td align="center">6.8</td>
<td align="center">60.8</td>
<td align="center">65.6</td>
<td align="center">66.7</td>
<td align="center">173859335</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_WC2_s1x/173859335/model_final_60fed4.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_WC2_s1x/173859335/metrics.json">metrics</a></td>
</tr>
<!-- ROW: densepose_rcnn_R_101_FPN_WC1_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml">R_101_FPN_WC1_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.435</td>
<td align="center">0.076</td>
<td align="center">5.7</td>
<td align="center">62.5</td>
<td align="center">64.9</td>
<td align="center">66.5</td>
<td align="center">171402969</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_WC1_s1x/171402969/model_final_9e47f0.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_WC1_s1x/171402969/metrics.json">metrics</a></td>
</tr>
<!-- ROW: densepose_rcnn_R_101_FPN_WC2_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml">R_101_FPN_WC2_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.450</td>
<td align="center">0.078</td>
<td align="center">5.7</td>
<td align="center">62.3</td>
<td align="center">64.8</td>
<td align="center">66.6</td>
<td align="center">173860702</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_WC2_s1x/173860702/model_final_5ea023.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_WC2_s1x/173860702/metrics.json">metrics</a></td>
</tr>
<!-- ROW: densepose_rcnn_R_101_FPN_DL_WC1_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml">R_101_FPN_DL_WC1_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.479</td>
<td align="center">0.081</td>
<td align="center">7.9</td>
<td align="center">62.0</td>
<td align="center">66.2</td>
<td align="center">67.4</td>
<td align="center">173858525</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_WC1_s1x/173858525/model_final_f359f3.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_WC1_s1x/173858525/metrics.json">metrics</a></td>
</tr>
<!-- ROW: densepose_rcnn_R_101_FPN_DL_WC2_s1x -->
<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml">R_101_FPN_DL_WC2_s1x</a></td>
<td align="center">s1x</td>
<td align="center">0.491</td>
<td align="center">0.082</td>
<td align="center">7.6</td>
<td align="center">61.7</td>
<td align="center">65.9</td>
<td align="center">67.3</td>
<td align="center">173294801</td>
<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_WC2_s1x/173294801/model_final_6e1ed1.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_WC2_s1x/173294801/metrics.json">metrics</a></td>
</tr>
</tbody></table>
## Old Baselines
It is still possible to use some baselines from [DensePose 1](https://github.com/facebookresearch/DensePose).
Below are evaluation metrics for the baselines recomputed in the current framework:
| Model | bbox AP | AP | AP50 | AP75 | APm |APl |
|-----|-----|-----|--- |--- |--- |--- |
| [`ResNet50_FPN_s1x-e2e`](https://dl.fbaipublicfiles.com/densepose/DensePose_ResNet50_FPN_s1x-e2e.pkl) | 54.673 | 48.894 | 84.963 | 50.717 | 43.132 | 50.433 |
| [`ResNet101_FPN_s1x-e2e`](https://dl.fbaipublicfiles.com/densepose/DensePose_ResNet101_FPN_s1x-e2e.pkl) | 56.032 | 51.088 | 86.250 | 55.057 | 46.542 | 52.563 |
Note: these scores are close, but not strictly equal to the ones reported in the [DensePose 1 Model Zoo](https://github.com/facebookresearch/DensePose/blob/master/MODEL_ZOO.md),
which is due to small incompatibilities between the frameworks.
# Apply Net
`apply_net` is a tool to print or visualize DensePose results on a set of images.
It has two modes: `dump` to save DensePose model results to a pickle file
and `show` to visualize them on images.
## Dump Mode
The general command form is:
```bash
python apply_net.py dump [-h] [-v] [--output <dump_file>] <config> <model> <input>
```
There are three mandatory arguments:
- `<config>`, configuration file for a given model;
- `<model>`, model file with trained parameters
- `<input>`, input image file name, pattern or folder
One can additionally provide `--output` argument to define the output file name,
which defaults to `output.pkl`.
Examples:
1. Dump results of a DensePose model with ResNet-50 FPN backbone for images
in a folder `images` to file `dump.pkl`:
```bash
python apply_net.py dump configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl images --output dump.pkl -v
```
2. Dump results of a DensePose model with ResNet-50 FPN backbone for images
with file name matching a pattern `image*.jpg` to file `results.pkl`:
```bash
python apply_net.py dump configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl "image*.jpg" --output results.pkl -v
```
If you want to load the pickle file generated by the above command:
```
# make sure DensePose is in your PYTHONPATH, or use the following line to add it:
sys.path.append("/your_detectron2_path/detectron2_repo/projects/DensePose/")
f = open('/your_result_path/results.pkl', 'rb')
data = pickle.load(f)
```
The file `results.pkl` contains the list of results per image, for each image the result is a dictionary:
```
data: [{'file_name': '/your_path/image1.jpg',
'scores': tensor([0.9884]),
'pred_boxes_XYXY': tensor([[ 69.6114, 0.0000, 706.9797, 706.0000]]),
'pred_densepose': <densepose.structures.DensePoseResult object at 0x7f791b312470>},
{'file_name': '/your_path/image2.jpg',
'scores': tensor([0.9999, 0.5373, 0.3991]),
'pred_boxes_XYXY': tensor([[ 59.5734, 7.7535, 579.9311, 932.3619],
[612.9418, 686.1254, 612.9999, 704.6053],
[164.5081, 407.4034, 598.3944, 920.4266]]),
'pred_densepose': <densepose.structures.DensePoseResult object at 0x7f7071229be0>}]
```
We can use the following code, to parse the outputs of the first
detected instance on the first image.
```
img_id, instance_id = 0, 0 # Look at the first image and the first detected instance
bbox_xyxy = data[img_id]['pred_boxes_XYXY'][instance_id]
result_encoded = data[img_id]['pred_densepose'].results[instance_id]
iuv_arr = DensePoseResult.decode_png_data(*result_encoded)
```
The array `bbox_xyxy` contains (x0, y0, x1, y1) of the bounding box.
The shape of `iuv_arr` is `[3, H, W]`, where (H, W) is the shape of the bounding box.
- `iuv_arr[0,:,:]`: The patch index of image points, indicating which of the 24 surface patches the point is on.
- `iuv_arr[1,:,:]`: The U-coordinate value of image points.
- `iuv_arr[2,:,:]`: The V-coordinate value of image points.
## Visualization Mode
The general command form is:
```bash
python apply_net.py show [-h] [-v] [--min_score <score>] [--nms_thresh <threshold>] [--output <image_file>] <config> <model> <input> <visualizations>
```
There are four mandatory arguments:
- `<config>`, configuration file for a given model;
- `<model>`, model file with trained parameters
- `<input>`, input image file name, pattern or folder
- `<visualizations>`, visualizations specifier; currently available visualizations are:
* `bbox` - bounding boxes of detected persons;
* `dp_segm` - segmentation masks for detected persons;
* `dp_u` - each body part is colored according to the estimated values of the
U coordinate in part parameterization;
* `dp_v` - each body part is colored according to the estimated values of the
V coordinate in part parameterization;
* `dp_contour` - plots contours with color-coded U and V coordinates
One can additionally provide the following optional arguments:
- `--min_score` to only show detections with sufficient scores that are not lower than provided value
- `--nms_thresh` to additionally apply non-maximum suppression to detections at a given threshold
- `--output` to define visualization file name template, which defaults to `output.png`.
To distinguish output file names for different images, the tool appends 1-based entry index,
e.g. output.0001.png, output.0002.png, etc...
The following examples show how to output results of a DensePose model
with ResNet-50 FPN backbone using different visualizations for image `image.jpg`:
1. Show bounding box and segmentation:
```bash
python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg bbox,dp_segm -v
```
![Bounding Box + Segmentation Visualization](images/res_bbox_dp_segm.jpg)
2. Show bounding box and estimated U coordinates for body parts:
```bash
python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg bbox,dp_u -v
```
![Bounding Box + U Coordinate Visualization](images/res_bbox_dp_u.jpg)
3. Show bounding box and estimated V coordinates for body parts:
```bash
python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg bbox,dp_v -v
```
![Bounding Box + V Coordinate Visualization](images/res_bbox_dp_v.jpg)
4. Show bounding box and estimated U and V coordinates via contour plots:
```bash
python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg dp_contour,bbox -v
```
![Bounding Box + Contour Visualization](images/res_bbox_dp_contour.jpg)
# Query Dataset
`query_db` is a tool to print or visualize DensePose data from a dataset.
It has two modes: `print` and `show` to output dataset entries to standard
output or to visualize them on images.
## Print Mode
The general command form is:
```bash
python query_db.py print [-h] [-v] [--max-entries N] <dataset> <selector>
```
There are two mandatory arguments:
- `<dataset>`, DensePose dataset specification, from which to select
the entries (e.g. `densepose_coco_2014_train`).
- `<selector>`, dataset entry selector which can be a single specification,
or a comma-separated list of specifications of the form
`field[:type]=value` for exact match with the value
or `field[:type]=min-max` for a range of values
One can additionally limit the maximum number of entries to output
by providing `--max-entries` argument.
Examples:
1. Output at most 10 first entries from the `densepose_coco_2014_train` dataset:
```bash
python query_db.py print densepose_coco_2014_train \* --max-entries 10 -v
```
2. Output all entries with `file_name` equal to `COCO_train2014_000000000036.jpg`:
```bash
python query_db.py print densepose_coco_2014_train file_name=COCO_train2014_000000000036.jpg -v
```
3. Output all entries with `image_id` between 36 and 156:
```bash
python query_db.py print densepose_coco_2014_train image_id:int=36-156 -v
```
## Visualization Mode
The general command form is:
```bash
python query_db.py show [-h] [-v] [--max-entries N] [--output <image_file>] <dataset> <selector> <visualizations>
```
There are three mandatory arguments:
- `<dataset>`, DensePose dataset specification, from which to select
the entries (e.g. `densepose_coco_2014_train`).
- `<selector>`, dataset entry selector which can be a single specification,
or a comma-separated list of specifications of the form
`field[:type]=value` for exact match with the value
or `field[:type]=min-max` for a range of values
- `<visualizations>`, visualizations specifier; currently available visualizations are:
* `bbox` - bounding boxes of annotated persons;
* `dp_i` - annotated points colored according to the containing part;
* `dp_pts` - annotated points in green color;
* `dp_segm` - segmentation masks for annotated persons;
* `dp_u` - annotated points colored according to their U coordinate in part parameterization;
* `dp_v` - annotated points colored according to their V coordinate in part parameterization;
One can additionally provide one of the two optional arguments:
- `--max_entries` to limit the maximum number of entries to visualize
- `--output` to provide visualization file name template, which defaults
to `output.png`. To distinguish file names for different dataset
entries, the tool appends 1-based entry index to the output file name,
e.g. output.0001.png, output.0002.png, etc.
The following examples show how to output different visualizations for image with `id = 322`
from `densepose_coco_2014_train` dataset:
1. Show bounding box and segmentation:
```bash
python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_segm -v
```
![Bounding Box + Segmentation Visualization](images/vis_bbox_dp_segm.jpg)
2. Show bounding box and points colored according to the containing part:
```bash
python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_i -v
```
![Bounding Box + Point Label Visualization](images/vis_bbox_dp_i.jpg)
3. Show bounding box and annotated points in green color:
```bash
python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_segm -v
```
![Bounding Box + Point Visualization](images/vis_bbox_dp_pts.jpg)
4. Show bounding box and annotated points colored according to their U coordinate in part parameterization:
```bash
python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_u -v
```
![Bounding Box + Point U Visualization](images/vis_bbox_dp_u.jpg)
5. Show bounding box and annotated points colored according to their V coordinate in part parameterization:
```bash
python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_v -v
```
![Bounding Box + Point V Visualization](images/vis_bbox_dp_v.jpg)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment