Unverified Commit ff62af6b authored by Danila Rukhovich's avatar Danila Rukhovich Committed by GitHub
Browse files

[Fix] Unify camera poses (#653)

* refactor K and Rt to depth2img for SUN RGB-D

* fix lint

* update 3 tests

* fix extra calib key and comments

* remove calib from browse_dataset

* fix cam to depth; rename return_z
parent 23071a56
...@@ -193,7 +193,7 @@ train_pipeline = [ ...@@ -193,7 +193,7 @@ train_pipeline = [
type='Collect3D', type='Collect3D',
keys=[ keys=[
'img', 'gt_bboxes', 'gt_labels', 'points', 'gt_bboxes_3d', 'img', 'gt_bboxes', 'gt_labels', 'points', 'gt_bboxes_3d',
'gt_labels_3d', 'calib' 'gt_labels_3d'
]) ])
] ]
...@@ -230,7 +230,7 @@ test_pipeline = [ ...@@ -230,7 +230,7 @@ test_pipeline = [
type='DefaultFormatBundle3D', type='DefaultFormatBundle3D',
class_names=class_names, class_names=class_names,
with_label=False), with_label=False),
dict(type='Collect3D', keys=['img', 'points', 'calib']) dict(type='Collect3D', keys=['img', 'points'])
]), ]),
] ]
# construct a pipeline for data and gt loading in show function # construct a pipeline for data and gt loading in show function
...@@ -247,7 +247,7 @@ eval_pipeline = [ ...@@ -247,7 +247,7 @@ eval_pipeline = [
type='DefaultFormatBundle3D', type='DefaultFormatBundle3D',
class_names=class_names, class_names=class_names,
with_label=False), with_label=False),
dict(type='Collect3D', keys=['img', 'points', 'calib']) dict(type='Collect3D', keys=['img', 'points'])
] ]
data = dict( data = dict(
......
...@@ -155,13 +155,10 @@ def inference_multi_modality_detector(model, pcd, image, ann_file): ...@@ -155,13 +155,10 @@ def inference_multi_modality_detector(model, pcd, image, ann_file):
bbox_fields=[], bbox_fields=[],
mask_fields=[], mask_fields=[],
seg_fields=[]) seg_fields=[])
# depth map points to image conversion
if box_mode_3d == Box3DMode.DEPTH:
data.update(dict(calib=info['calib']))
data = test_pipeline(data) data = test_pipeline(data)
# TODO: this code is dataset-specific. Move lidar2img and
# depth2img to .pkl annotations in the future.
# LiDAR to image conversion # LiDAR to image conversion
if box_mode_3d == Box3DMode.LIDAR: if box_mode_3d == Box3DMode.LIDAR:
rect = info['calib']['R0_rect'].astype(np.float32) rect = info['calib']['R0_rect'].astype(np.float32)
...@@ -169,9 +166,14 @@ def inference_multi_modality_detector(model, pcd, image, ann_file): ...@@ -169,9 +166,14 @@ def inference_multi_modality_detector(model, pcd, image, ann_file):
P2 = info['calib']['P2'].astype(np.float32) P2 = info['calib']['P2'].astype(np.float32)
lidar2img = P2 @ rect @ Trv2c lidar2img = P2 @ rect @ Trv2c
data['img_metas'][0].data['lidar2img'] = lidar2img data['img_metas'][0].data['lidar2img'] = lidar2img
# Depth to image conversion
elif box_mode_3d == Box3DMode.DEPTH: elif box_mode_3d == Box3DMode.DEPTH:
data['calib'][0]['Rt'] = data['calib'][0]['Rt'].astype(np.float32) rt_mat = info['calib']['Rt']
data['calib'][0]['K'] = data['calib'][0]['K'].astype(np.float32) # follow Coord3DMode.convert_point
rt_mat = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]
]) @ rt_mat.transpose(1, 0)
depth2img = info['calib']['K'] @ rt_mat
data['img_metas'][0].data['depth2img'] = depth2img
data = collate([data], samples_per_gpu=1) data = collate([data], samples_per_gpu=1)
if next(model.parameters()).is_cuda: if next(model.parameters()).is_cuda:
...@@ -182,9 +184,6 @@ def inference_multi_modality_detector(model, pcd, image, ann_file): ...@@ -182,9 +184,6 @@ def inference_multi_modality_detector(model, pcd, image, ann_file):
data['img_metas'] = data['img_metas'][0].data data['img_metas'] = data['img_metas'][0].data
data['points'] = data['points'][0].data data['points'] = data['points'][0].data
data['img'] = data['img'][0].data data['img'] = data['img'][0].data
if box_mode_3d == Box3DMode.DEPTH:
data['calib'][0]['Rt'] = data['calib'][0]['Rt'][0].data
data['calib'][0]['K'] = data['calib'][0]['K'][0].data
# forward the model # forward the model
with torch.no_grad(): with torch.no_grad():
...@@ -411,17 +410,13 @@ def show_proj_det_result_meshlab(data, ...@@ -411,17 +410,13 @@ def show_proj_det_result_meshlab(data,
box_mode='lidar', box_mode='lidar',
show=show) show=show)
elif box_mode == Box3DMode.DEPTH: elif box_mode == Box3DMode.DEPTH:
if 'calib' not in data.keys():
raise NotImplementedError(
'camera calibration information is not provided')
show_bboxes = DepthInstance3DBoxes(pred_bboxes, origin=(0.5, 0.5, 0)) show_bboxes = DepthInstance3DBoxes(pred_bboxes, origin=(0.5, 0.5, 0))
show_multi_modality_result( show_multi_modality_result(
img, img,
None, None,
show_bboxes, show_bboxes,
data['calib'][0], None,
out_dir, out_dir,
file_name, file_name,
box_mode='depth', box_mode='depth',
......
...@@ -50,7 +50,8 @@ def corners_nd(dims, origin=0.5): ...@@ -50,7 +50,8 @@ def corners_nd(dims, origin=0.5):
Args: Args:
dims (np.ndarray, shape=[N, ndim]): Array of length per dim dims (np.ndarray, shape=[N, ndim]): Array of length per dim
origin (list or array or float): origin point relate to smallest point. origin (list or array or float, optional): origin point relate to
smallest point. Defaults to 0.5
Returns: Returns:
np.ndarray, shape=[N, 2 ** ndim, ndim]: Returned corners. np.ndarray, shape=[N, 2 ** ndim, ndim]: Returned corners.
...@@ -102,7 +103,10 @@ def center_to_corner_box2d(centers, dims, angles=None, origin=0.5): ...@@ -102,7 +103,10 @@ def center_to_corner_box2d(centers, dims, angles=None, origin=0.5):
Args: Args:
centers (np.ndarray): Locations in kitti label file with shape (N, 2). centers (np.ndarray): Locations in kitti label file with shape (N, 2).
dims (np.ndarray): Dimensions in kitti label file with shape (N, 2). dims (np.ndarray): Dimensions in kitti label file with shape (N, 2).
angles (np.ndarray): Rotation_y in kitti label file with shape (N). angles (np.ndarray, optional): Rotation_y in kitti label file with
shape (N). Defaults to None.
origin (list or array or float, optional): origin point relate to
smallest point. Defaults to 0.5.
Returns: Returns:
np.ndarray: Corners with the shape of (N, 4, 2). np.ndarray: Corners with the shape of (N, 4, 2).
...@@ -173,7 +177,7 @@ def rotation_3d_in_axis(points, angles, axis=0): ...@@ -173,7 +177,7 @@ def rotation_3d_in_axis(points, angles, axis=0):
Args: Args:
points (np.ndarray, shape=[N, point_size, 3]]): points (np.ndarray, shape=[N, point_size, 3]]):
angles (np.ndarray, shape=[N]]): angles (np.ndarray, shape=[N]]):
axis (int): Axis to rotate at. axis (int, optional): Axis to rotate at. Defaults to 0.
Returns: Returns:
np.ndarray: Rotated points. np.ndarray: Rotated points.
...@@ -208,10 +212,13 @@ def center_to_corner_box3d(centers, ...@@ -208,10 +212,13 @@ def center_to_corner_box3d(centers,
Args: Args:
centers (np.ndarray): Locations in kitti label file with shape (N, 3). centers (np.ndarray): Locations in kitti label file with shape (N, 3).
dims (np.ndarray): Dimensions in kitti label file with shape (N, 3). dims (np.ndarray): Dimensions in kitti label file with shape (N, 3).
angles (np.ndarray): Rotation_y in kitti label file with shape (N). angles (np.ndarray, optional): Rotation_y in kitti label file with
origin (list or array or float): Origin point relate to smallest point. shape (N). Defaults to None.
use (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar. origin (list or array or float, optional): Origin point relate to
axis (int): Rotation axis. 1 for camera and 2 for lidar. smallest point. Use (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0)
in lidar. Defaults to (0.5, 1.0, 0.5).
axis (int, optional): Rotation axis. 1 for camera and 2 for lidar.
Defaults to 1.
Returns: Returns:
np.ndarray: Corners with the shape of (N, 8, 3). np.ndarray: Corners with the shape of (N, 8, 3).
...@@ -308,8 +315,8 @@ def rotation_points_single_angle(points, angle, axis=0): ...@@ -308,8 +315,8 @@ def rotation_points_single_angle(points, angle, axis=0):
Args: Args:
points (np.ndarray, shape=[N, 3]]): points (np.ndarray, shape=[N, 3]]):
angles (np.ndarray, shape=[1]]): angle (np.ndarray, shape=[1]]):
axis (int): Axis to rotate at. axis (int, optional): Axis to rotate at. Defaults to 0.
Returns: Returns:
np.ndarray: Rotated points. np.ndarray: Rotated points.
...@@ -341,7 +348,8 @@ def points_cam2img(points_3d, proj_mat, with_depth=False): ...@@ -341,7 +348,8 @@ def points_cam2img(points_3d, proj_mat, with_depth=False):
Args: Args:
points_3d (np.ndarray): Points in shape (N, 3) points_3d (np.ndarray): Points in shape (N, 3)
proj_mat (np.ndarray): Transformation matrix between coordinates. proj_mat (np.ndarray): Transformation matrix between coordinates.
with_depth (bool): Whether to keep depth in the output. with_depth (bool, optional): Whether to keep depth in the output.
Defaults to False.
Returns: Returns:
np.ndarray: Points in image coordinates with shape [N, 2]. np.ndarray: Points in image coordinates with shape [N, 2].
...@@ -420,8 +428,10 @@ def points_in_rbbox(points, rbbox, z_axis=2, origin=(0.5, 0.5, 0)): ...@@ -420,8 +428,10 @@ def points_in_rbbox(points, rbbox, z_axis=2, origin=(0.5, 0.5, 0)):
Args: Args:
points (np.ndarray, shape=[N, 3+dim]): Points to query. points (np.ndarray, shape=[N, 3+dim]): Points to query.
rbbox (np.ndarray, shape=[M, 7]): Boxes3d with rotation. rbbox (np.ndarray, shape=[M, 7]): Boxes3d with rotation.
z_axis (int): Indicate which axis is height. z_axis (int, optional): Indicate which axis is height.
origin (tuple[int]): Indicate the position of box center. Defaults to 2.
origin (tuple[int], optional): Indicate the position of
box center. Defaults to (0.5, 0.5, 0).
Returns: Returns:
np.ndarray, shape=[N, M]: Indices of points in each box. np.ndarray, shape=[N, M]: Indices of points in each box.
...@@ -479,11 +489,13 @@ def create_anchors_3d_range(feature_size, ...@@ -479,11 +489,13 @@ def create_anchors_3d_range(feature_size,
anchor_range (torch.Tensor | list[float]): Range of anchors with anchor_range (torch.Tensor | list[float]): Range of anchors with
shape [6]. The order is consistent with that of anchors, i.e., shape [6]. The order is consistent with that of anchors, i.e.,
(x_min, y_min, z_min, x_max, y_max, z_max). (x_min, y_min, z_min, x_max, y_max, z_max).
sizes (list[list] | np.ndarray | torch.Tensor): Anchor size with sizes (list[list] | np.ndarray | torch.Tensor, optional):
shape [N, 3], in order of x, y, z. Anchor size with shape [N, 3], in order of x, y, z.
rotations (list[float] | np.ndarray | torch.Tensor): Rotations of Defaults to ((1.6, 3.9, 1.56), ).
anchors in a single feature grid. rotations (list[float] | np.ndarray | torch.Tensor, optional):
dtype (type): Data type. Default to np.float32. Rotations of anchors in a single feature grid.
Defaults to (0, np.pi / 2).
dtype (type, optional): Data type. Default to np.float32.
Returns: Returns:
np.ndarray: Range based anchors with shape of \ np.ndarray: Range based anchors with shape of \
...@@ -520,7 +532,8 @@ def center_to_minmax_2d(centers, dims, origin=0.5): ...@@ -520,7 +532,8 @@ def center_to_minmax_2d(centers, dims, origin=0.5):
Args: Args:
centers (np.ndarray): Center points. centers (np.ndarray): Center points.
dims (np.ndarray): Dimensions. dims (np.ndarray): Dimensions.
origin (list or array or float): origin point relate to smallest point. origin (list or array or float, optional): Origin point relate
to smallest point. Defaults to 0.5.
Returns: Returns:
np.ndarray: Minmax points. np.ndarray: Minmax points.
...@@ -559,6 +572,8 @@ def iou_jit(boxes, query_boxes, mode='iou', eps=0.0): ...@@ -559,6 +572,8 @@ def iou_jit(boxes, query_boxes, mode='iou', eps=0.0):
Args: Args:
boxes (np.ndarray): Input bounding boxes with shape of (N, 4). boxes (np.ndarray): Input bounding boxes with shape of (N, 4).
query_boxes (np.ndarray): Query boxes with shape of (K, 4). query_boxes (np.ndarray): Query boxes with shape of (K, 4).
mode (str, optional): IoU mode. Defaults to 'iou'.
eps (float, optional): Value added to denominator. Defaults to 0.
Returns: Returns:
np.ndarray: Overlap between boxes and query_boxes np.ndarray: Overlap between boxes and query_boxes
...@@ -648,8 +663,10 @@ def get_frustum(bbox_image, C, near_clip=0.001, far_clip=100): ...@@ -648,8 +663,10 @@ def get_frustum(bbox_image, C, near_clip=0.001, far_clip=100):
Args: Args:
bbox_image (list[int]): box in image coordinates. bbox_image (list[int]): box in image coordinates.
C (np.ndarray): Intrinsics. C (np.ndarray): Intrinsics.
near_clip (float): Nearest distance of frustum. near_clip (float, optional): Nearest distance of frustum.
far_clip (float): Farthest distance of frustum. Defaults to 0.001.
far_clip (float, optional): Farthest distance of frustum.
Defaults to 100.
Returns: Returns:
np.ndarray, shape=[8, 3]: coordinates of frustum corners. np.ndarray, shape=[8, 3]: coordinates of frustum corners.
...@@ -742,12 +759,12 @@ def points_in_convex_polygon_3d_jit(points, ...@@ -742,12 +759,12 @@ def points_in_convex_polygon_3d_jit(points,
Args: Args:
points (np.ndarray): Input points with shape of (num_points, 3). points (np.ndarray): Input points with shape of (num_points, 3).
polygon_surfaces (np.ndarray): Polygon surfaces with shape of \ polygon_surfaces (np.ndarray): Polygon surfaces with shape of
(num_polygon, max_num_surfaces, max_num_points_of_surface, 3). \ (num_polygon, max_num_surfaces, max_num_points_of_surface, 3).
All surfaces' normal vector must direct to internal. \ All surfaces' normal vector must direct to internal.
Max_num_points_of_surface must at least 3. Max_num_points_of_surface must at least 3.
num_surfaces (np.ndarray): Number of surfaces a polygon contains \ num_surfaces (np.ndarray, optional): Number of surfaces a polygon
shape of (num_polygon). contains shape of (num_polygon). Defaults to None.
Returns: Returns:
np.ndarray: Result matrix with the shape of [num_points, num_polygon]. np.ndarray: Result matrix with the shape of [num_points, num_polygon].
...@@ -772,7 +789,8 @@ def points_in_convex_polygon_jit(points, polygon, clockwise=True): ...@@ -772,7 +789,8 @@ def points_in_convex_polygon_jit(points, polygon, clockwise=True):
points (np.ndarray): Input points with the shape of [num_points, 2]. points (np.ndarray): Input points with the shape of [num_points, 2].
polygon (np.ndarray): Input polygon with the shape of polygon (np.ndarray): Input polygon with the shape of
[num_polygon, num_points_of_polygon, 2]. [num_polygon, num_points_of_polygon, 2].
clockwise (bool): Indicate polygon is clockwise. clockwise (bool, optional): Indicate polygon is clockwise. Defaults
to True.
Returns: Returns:
np.ndarray: Result matrix with the shape of [num_points, num_polygon]. np.ndarray: Result matrix with the shape of [num_points, num_polygon].
...@@ -821,10 +839,11 @@ def boxes3d_to_corners3d_lidar(boxes3d, bottom_center=True): ...@@ -821,10 +839,11 @@ def boxes3d_to_corners3d_lidar(boxes3d, bottom_center=True):
2 -------- 1 2 -------- 1
Args: Args:
boxes3d (np.ndarray): Boxes with shape of (N, 7) \ boxes3d (np.ndarray): Boxes with shape of (N, 7)
[x, y, z, w, l, h, ry] in LiDAR coords, see the definition of ry \ [x, y, z, w, l, h, ry] in LiDAR coords, see the definition of ry
in KITTI dataset. in KITTI dataset.
bottom_center (bool): Whether z is on the bottom center of object. bottom_center (bool, optional): Whether z is on the bottom center
of object. Defaults to True.
Returns: Returns:
np.ndarray: Box corners with the shape of [N, 8, 3]. np.ndarray: Box corners with the shape of [N, 8, 3].
......
...@@ -227,21 +227,11 @@ class Coord3DMode(IntEnum): ...@@ -227,21 +227,11 @@ class Coord3DMode(IntEnum):
if rt_mat is None: if rt_mat is None:
rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]]) rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])
elif src == Coord3DMode.DEPTH and dst == Coord3DMode.CAM: elif src == Coord3DMode.DEPTH and dst == Coord3DMode.CAM:
# LIDAR-CAM conversion is different from DEPTH-CAM conversion
# because SUNRGB-D camera calibration files are different from
# that of KITTI, and currently we keep this hack
if rt_mat is None: if rt_mat is None:
rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]]) rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
else:
rt_mat = rt_mat.new_tensor(
[[1, 0, 0], [0, 0, -1], [0, 1, 0]]) @ \
rt_mat.transpose(1, 0)
elif src == Coord3DMode.CAM and dst == Coord3DMode.DEPTH: elif src == Coord3DMode.CAM and dst == Coord3DMode.DEPTH:
if rt_mat is None: if rt_mat is None:
rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]]) rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
else:
rt_mat = rt_mat @ rt_mat.new_tensor([[1, 0, 0], [0, 0, 1],
[0, -1, 0]])
elif src == Coord3DMode.LIDAR and dst == Coord3DMode.DEPTH: elif src == Coord3DMode.LIDAR and dst == Coord3DMode.DEPTH:
if rt_mat is None: if rt_mat is None:
rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]]) rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
......
...@@ -111,12 +111,14 @@ def get_box_type(box_type): ...@@ -111,12 +111,14 @@ def get_box_type(box_type):
return box_type_3d, box_mode_3d return box_type_3d, box_mode_3d
def points_cam2img(points_3d, proj_mat): def points_cam2img(points_3d, proj_mat, with_depth=False):
"""Project points from camera coordicates to image coordinates. """Project points from camera coordicates to image coordinates.
Args: Args:
points_3d (torch.Tensor): Points in shape (N, 3) points_3d (torch.Tensor): Points in shape (N, 3).
proj_mat (torch.Tensor): Transformation matrix between coordinates. proj_mat (torch.Tensor): Transformation matrix between coordinates.
with_depth (bool, optional): Whether to keep depth in the output.
Defaults to False.
Returns: Returns:
torch.Tensor: Points in image coordinates with shape [N, 2]. torch.Tensor: Points in image coordinates with shape [N, 2].
...@@ -141,6 +143,9 @@ def points_cam2img(points_3d, proj_mat): ...@@ -141,6 +143,9 @@ def points_cam2img(points_3d, proj_mat):
[points_3d, points_3d.new_ones(*points_shape)], dim=-1) [points_3d, points_3d.new_ones(*points_shape)], dim=-1)
point_2d = torch.matmul(points_4, proj_mat.t()) point_2d = torch.matmul(points_4, proj_mat.t())
point_2d_res = point_2d[..., :2] / point_2d[..., 2:3] point_2d_res = point_2d[..., :2] / point_2d[..., 2:3]
if with_depth:
return torch.cat([point_2d_res, point_2d[..., 2:3]], dim=-1)
return point_2d_res return point_2d_res
......
...@@ -120,6 +120,7 @@ def draw_lidar_bbox3d_on_img(bboxes3d, ...@@ -120,6 +120,7 @@ def draw_lidar_bbox3d_on_img(bboxes3d,
return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness) return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
# TODO: remove third parameter in all functions here in favour of img_metas
def draw_depth_bbox3d_on_img(bboxes3d, def draw_depth_bbox3d_on_img(bboxes3d,
raw_img, raw_img,
calibs, calibs,
...@@ -137,35 +138,22 @@ def draw_depth_bbox3d_on_img(bboxes3d, ...@@ -137,35 +138,22 @@ def draw_depth_bbox3d_on_img(bboxes3d,
color (tuple[int]): The color to draw bboxes. Default: (0, 255, 0). color (tuple[int]): The color to draw bboxes. Default: (0, 255, 0).
thickness (int, optional): The thickness of bboxes. Default: 1. thickness (int, optional): The thickness of bboxes. Default: 1.
""" """
from mmdet3d.core import Coord3DMode
from mmdet3d.core.bbox import points_cam2img from mmdet3d.core.bbox import points_cam2img
from mmdet3d.models import apply_3d_transformation from mmdet3d.models import apply_3d_transformation
img = raw_img.copy() img = raw_img.copy()
calibs = copy.deepcopy(calibs)
img_metas = copy.deepcopy(img_metas) img_metas = copy.deepcopy(img_metas)
corners_3d = bboxes3d.corners corners_3d = bboxes3d.corners
num_bbox = corners_3d.shape[0] num_bbox = corners_3d.shape[0]
points_3d = corners_3d.reshape(-1, 3) points_3d = corners_3d.reshape(-1, 3)
assert ('Rt' in calibs.keys() and 'K' in calibs.keys()), \
'Rt and K matrix should be provided as camera caliberation information'
if not isinstance(calibs['Rt'], torch.Tensor):
calibs['Rt'] = torch.from_numpy(np.array(calibs['Rt']))
if not isinstance(calibs['K'], torch.Tensor):
calibs['K'] = torch.from_numpy(np.array(calibs['K']))
calibs['Rt'] = calibs['Rt'].reshape(3, 3).float().cpu()
calibs['K'] = calibs['K'].reshape(3, 3).float().cpu()
# first reverse the data transformations # first reverse the data transformations
xyz_depth = apply_3d_transformation( xyz_depth = apply_3d_transformation(
points_3d, 'DEPTH', img_metas, reverse=True) points_3d, 'DEPTH', img_metas, reverse=True)
# then convert from depth coords to camera coords
xyz_cam = Coord3DMode.convert_point(
xyz_depth, Coord3DMode.DEPTH, Coord3DMode.CAM, rt_mat=calibs['Rt'])
# project to 2d to get image coords (uv) # project to 2d to get image coords (uv)
uv_origin = points_cam2img(xyz_cam, calibs['K']) uv_origin = points_cam2img(xyz_depth,
xyz_depth.new_tensor(img_metas['depth2img']))
uv_origin = (uv_origin - 1).round() uv_origin = (uv_origin - 1).round()
imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy() imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
......
...@@ -100,6 +100,7 @@ class Collect3D(object): ...@@ -100,6 +100,7 @@ class Collect3D(object):
- 'ori_shape': original shape of the image as a tuple (h, w, c) - 'ori_shape': original shape of the image as a tuple (h, w, c)
- 'pad_shape': image shape after padding - 'pad_shape': image shape after padding
- 'lidar2img': transform from lidar to image - 'lidar2img': transform from lidar to image
- 'depth2img': transform from depth to image
- 'pcd_horizontal_flip': a boolean indicating if point cloud is \ - 'pcd_horizontal_flip': a boolean indicating if point cloud is \
flipped horizontally flipped horizontally
- 'pcd_vertical_flip': a boolean indicating if point cloud is \ - 'pcd_vertical_flip': a boolean indicating if point cloud is \
...@@ -134,7 +135,7 @@ class Collect3D(object): ...@@ -134,7 +135,7 @@ class Collect3D(object):
def __init__(self, def __init__(self,
keys, keys,
meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img', meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img',
'pad_shape', 'scale_factor', 'flip', 'depth2img', 'pad_shape', 'scale_factor', 'flip',
'cam_intrinsic', 'pcd_horizontal_flip', 'cam_intrinsic', 'pcd_horizontal_flip',
'pcd_vertical_flip', 'box_mode_3d', 'box_type_3d', 'pcd_vertical_flip', 'box_mode_3d', 'box_type_3d',
'img_norm_cfg', 'rect', 'Trv2c', 'P2', 'pcd_trans', 'img_norm_cfg', 'rect', 'Trv2c', 'P2', 'pcd_trans',
......
...@@ -101,7 +101,12 @@ class SUNRGBDDataset(Custom3DDataset): ...@@ -101,7 +101,12 @@ class SUNRGBDDataset(Custom3DDataset):
input_dict['img_prefix'] = None input_dict['img_prefix'] = None
input_dict['img_info'] = dict(filename=img_filename) input_dict['img_info'] = dict(filename=img_filename)
calib = info['calib'] calib = info['calib']
input_dict['calib'] = calib rt_mat = calib['Rt']
# follow Coord3DMode.convert_point
rt_mat = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]
]) @ rt_mat.transpose(1, 0)
depth2img = calib['K'] @ rt_mat
input_dict['depth2img'] = depth2img
if not self.test_mode: if not self.test_mode:
annos = self.get_ann_info(index) annos = self.get_ann_info(index)
...@@ -187,8 +192,8 @@ class SUNRGBDDataset(Custom3DDataset): ...@@ -187,8 +192,8 @@ class SUNRGBDDataset(Custom3DDataset):
data_info = self.data_infos[i] data_info = self.data_infos[i]
pts_path = data_info['pts_path'] pts_path = data_info['pts_path']
file_name = osp.split(pts_path)[-1].split('.')[0] file_name = osp.split(pts_path)[-1].split('.')[0]
points, img_metas, img, calib = self._extract_data( points, img_metas, img = self._extract_data(
i, pipeline, ['points', 'img_metas', 'img', 'calib']) i, pipeline, ['points', 'img_metas', 'img'])
# scale colors to [0, 255] # scale colors to [0, 255]
points = points.numpy() points = points.numpy()
points[:, 3:] *= 255 points[:, 3:] *= 255
...@@ -199,7 +204,7 @@ class SUNRGBDDataset(Custom3DDataset): ...@@ -199,7 +204,7 @@ class SUNRGBDDataset(Custom3DDataset):
file_name, show) file_name, show)
# multi-modality visualization # multi-modality visualization
if self.modality['use_camera'] and 'calib' in data_info.keys(): if self.modality['use_camera']:
img = img.numpy() img = img.numpy()
# need to transpose channel to first dim # need to transpose channel to first dim
img = img.transpose(1, 2, 0) img = img.transpose(1, 2, 0)
...@@ -211,7 +216,7 @@ class SUNRGBDDataset(Custom3DDataset): ...@@ -211,7 +216,7 @@ class SUNRGBDDataset(Custom3DDataset):
img, img,
gt_bboxes, gt_bboxes,
pred_bboxes, pred_bboxes,
calib, None,
out_dir, out_dir,
file_name, file_name,
box_mode='depth', box_mode='depth',
......
...@@ -378,7 +378,6 @@ class ImVoteNet(Base3DDetector): ...@@ -378,7 +378,6 @@ class ImVoteNet(Base3DDetector):
gt_bboxes_ignore=None, gt_bboxes_ignore=None,
gt_masks=None, gt_masks=None,
proposals=None, proposals=None,
calib=None,
bboxes_2d=None, bboxes_2d=None,
gt_bboxes_3d=None, gt_bboxes_3d=None,
gt_labels_3d=None, gt_labels_3d=None,
...@@ -405,8 +404,6 @@ class ImVoteNet(Base3DDetector): ...@@ -405,8 +404,6 @@ class ImVoteNet(Base3DDetector):
2d bbox, used if the architecture supports a segmentation task. 2d bbox, used if the architecture supports a segmentation task.
proposals: override rpn proposals (2d) with custom proposals. proposals: override rpn proposals (2d) with custom proposals.
Use when `with_rpn` is False. Use when `with_rpn` is False.
calib (dict[str, torch.Tensor]): camera calibration matrices,
Rt and K.
bboxes_2d (list[torch.Tensor]): provided 2d bboxes, bboxes_2d (list[torch.Tensor]): provided 2d bboxes,
not supported yet. not supported yet.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): 3d gt bboxes. gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): 3d gt bboxes.
...@@ -452,7 +449,7 @@ class ImVoteNet(Base3DDetector): ...@@ -452,7 +449,7 @@ class ImVoteNet(Base3DDetector):
self.extract_pts_feat(points) self.extract_pts_feat(points)
img_features, masks = self.fusion_layer(img, bboxes_2d, seeds_3d, img_features, masks = self.fusion_layer(img, bboxes_2d, seeds_3d,
img_metas, calib) img_metas)
inds = sample_valid_seeds(masks, self.num_sampled_seed) inds = sample_valid_seeds(masks, self.num_sampled_seed)
batch_size, img_feat_size = img_features.shape[:2] batch_size, img_feat_size = img_features.shape[:2]
...@@ -528,7 +525,6 @@ class ImVoteNet(Base3DDetector): ...@@ -528,7 +525,6 @@ class ImVoteNet(Base3DDetector):
points=None, points=None,
img_metas=None, img_metas=None,
img=None, img=None,
calib=None,
bboxes_2d=None, bboxes_2d=None,
**kwargs): **kwargs):
"""Forwarding of test for image branch pretrain or stage 2 train. """Forwarding of test for image branch pretrain or stage 2 train.
...@@ -546,9 +542,6 @@ class ImVoteNet(Base3DDetector): ...@@ -546,9 +542,6 @@ class ImVoteNet(Base3DDetector):
list indicates test-time augmentations and inner Tensor list indicates test-time augmentations and inner Tensor
should have a shape NxCxHxW, which contains all images should have a shape NxCxHxW, which contains all images
in the batch. Defaults to None. Defaults to None. in the batch. Defaults to None. Defaults to None.
calibs (list[dict[str, torch.Tensor]], optional): camera
calibration matrices, Rt and K.
List indicates test-time augs. Defaults to None.
bboxes_2d (list[list[torch.Tensor]], optional): bboxes_2d (list[list[torch.Tensor]], optional):
Provided 2d bboxes, not supported yet. Defaults to None. Provided 2d bboxes, not supported yet. Defaults to None.
...@@ -602,11 +595,10 @@ class ImVoteNet(Base3DDetector): ...@@ -602,11 +595,10 @@ class ImVoteNet(Base3DDetector):
points[0], points[0],
img_metas[0], img_metas[0],
img[0], img[0],
calibs=calib[0],
bboxes_2d=bboxes_2d[0] if bboxes_2d is not None else None, bboxes_2d=bboxes_2d[0] if bboxes_2d is not None else None,
**kwargs) **kwargs)
else: else:
return self.aug_test(points, img_metas, img, calib, bboxes_2d, return self.aug_test(points, img_metas, img, bboxes_2d,
**kwargs) **kwargs)
def simple_test_img_only(self, def simple_test_img_only(self,
...@@ -652,7 +644,6 @@ class ImVoteNet(Base3DDetector): ...@@ -652,7 +644,6 @@ class ImVoteNet(Base3DDetector):
points=None, points=None,
img_metas=None, img_metas=None,
img=None, img=None,
calibs=None,
bboxes_2d=None, bboxes_2d=None,
rescale=False, rescale=False,
**kwargs): **kwargs):
...@@ -666,8 +657,6 @@ class ImVoteNet(Base3DDetector): ...@@ -666,8 +657,6 @@ class ImVoteNet(Base3DDetector):
images in a batch. Defaults to None. images in a batch. Defaults to None.
img (torch.Tensor, optional): Should have a shape NxCxHxW, img (torch.Tensor, optional): Should have a shape NxCxHxW,
which contains all images in the batch. Defaults to None. which contains all images in the batch. Defaults to None.
calibs (dict[str, torch.Tensor], optional): camera
calibration matrices, Rt and K. Defaults to None.
bboxes_2d (list[torch.Tensor], optional): bboxes_2d (list[torch.Tensor], optional):
Provided 2d bboxes, not supported yet. Defaults to None. Provided 2d bboxes, not supported yet. Defaults to None.
rescale (bool, optional): Whether or not rescale bboxes. rescale (bool, optional): Whether or not rescale bboxes.
...@@ -684,7 +673,7 @@ class ImVoteNet(Base3DDetector): ...@@ -684,7 +673,7 @@ class ImVoteNet(Base3DDetector):
self.extract_pts_feat(points) self.extract_pts_feat(points)
img_features, masks = self.fusion_layer(img, bboxes_2d, seeds_3d, img_features, masks = self.fusion_layer(img, bboxes_2d, seeds_3d,
img_metas, calibs) img_metas)
inds = sample_valid_seeds(masks, self.num_sampled_seed) inds = sample_valid_seeds(masks, self.num_sampled_seed)
batch_size, img_feat_size = img_features.shape[:2] batch_size, img_feat_size = img_features.shape[:2]
...@@ -755,7 +744,6 @@ class ImVoteNet(Base3DDetector): ...@@ -755,7 +744,6 @@ class ImVoteNet(Base3DDetector):
points=None, points=None,
img_metas=None, img_metas=None,
imgs=None, imgs=None,
calibs=None,
bboxes_2d=None, bboxes_2d=None,
rescale=False, rescale=False,
**kwargs): **kwargs):
...@@ -774,9 +762,6 @@ class ImVoteNet(Base3DDetector): ...@@ -774,9 +762,6 @@ class ImVoteNet(Base3DDetector):
list indicates test-time augmentations and inner Tensor list indicates test-time augmentations and inner Tensor
should have a shape NxCxHxW, which contains all images should have a shape NxCxHxW, which contains all images
in the batch. Defaults to None. Defaults to None. in the batch. Defaults to None. Defaults to None.
calibs (list[dict[str, torch.Tensor]], optional): camera
calibration matrices, Rt and K.
List indicates test-time augs. Defaults to None.
bboxes_2d (list[list[torch.Tensor]], optional): bboxes_2d (list[list[torch.Tensor]], optional):
Provided 2d bboxes, not supported yet. Defaults to None. Provided 2d bboxes, not supported yet. Defaults to None.
rescale (bool, optional): Whether or not rescale bboxes. rescale (bool, optional): Whether or not rescale bboxes.
...@@ -790,8 +775,9 @@ class ImVoteNet(Base3DDetector): ...@@ -790,8 +775,9 @@ class ImVoteNet(Base3DDetector):
# only support aug_test for one sample # only support aug_test for one sample
aug_bboxes = [] aug_bboxes = []
for x, pts_cat, img_meta, bbox_2d, img, calib in zip( for x, pts_cat, img_meta, bbox_2d, img in zip(feats, points_cat,
feats, points_cat, img_metas, bboxes_2d, imgs, calibs): img_metas, bboxes_2d,
imgs):
bbox_2d = self.extract_bboxes_2d( bbox_2d = self.extract_bboxes_2d(
img, img_metas, train=False, bboxes_2d=bbox_2d, **kwargs) img, img_metas, train=False, bboxes_2d=bbox_2d, **kwargs)
...@@ -799,7 +785,7 @@ class ImVoteNet(Base3DDetector): ...@@ -799,7 +785,7 @@ class ImVoteNet(Base3DDetector):
seeds_3d, seed_3d_features, seed_indices = x seeds_3d, seed_3d_features, seed_indices = x
img_features, masks = self.fusion_layer(img, bbox_2d, seeds_3d, img_features, masks = self.fusion_layer(img, bbox_2d, seeds_3d,
img_metas, calib) img_metas)
inds = sample_valid_seeds(masks, self.num_sampled_seed) inds = sample_valid_seeds(masks, self.num_sampled_seed)
batch_size, img_feat_size = img_features.shape[:2] batch_size, img_feat_size = img_features.shape[:2]
......
import torch import torch
from torch import nn as nn from torch import nn as nn
from mmdet3d.core.bbox import Coord3DMode, points_cam2img from mmdet3d.core.bbox import points_cam2img
from ..builder import FUSION_LAYERS from ..builder import FUSION_LAYERS
from . import apply_3d_transformation, bbox_2d_transform, coord_2d_transform from . import apply_3d_transformation, bbox_2d_transform, coord_2d_transform
...@@ -22,8 +22,7 @@ class VoteFusion(nn.Module): ...@@ -22,8 +22,7 @@ class VoteFusion(nn.Module):
self.num_classes = num_classes self.num_classes = num_classes
self.max_imvote_per_pixel = max_imvote_per_pixel self.max_imvote_per_pixel = max_imvote_per_pixel
def forward(self, imgs, bboxes_2d_rescaled, seeds_3d_depth, img_metas, def forward(self, imgs, bboxes_2d_rescaled, seeds_3d_depth, img_metas):
calibs):
"""Forward function. """Forward function.
Args: Args:
...@@ -31,7 +30,6 @@ class VoteFusion(nn.Module): ...@@ -31,7 +30,6 @@ class VoteFusion(nn.Module):
bboxes_2d_rescaled (list[torch.Tensor]): 2D bboxes. bboxes_2d_rescaled (list[torch.Tensor]): 2D bboxes.
seeds_3d_depth (torch.Tensor): 3D seeds. seeds_3d_depth (torch.Tensor): 3D seeds.
img_metas (list[dict]): Meta information of images. img_metas (list[dict]): Meta information of images.
calibs: Camera calibration information of the images.
Returns: Returns:
torch.Tensor: Concatenated cues of each point. torch.Tensor: Concatenated cues of each point.
...@@ -52,16 +50,11 @@ class VoteFusion(nn.Module): ...@@ -52,16 +50,11 @@ class VoteFusion(nn.Module):
xyz_depth = apply_3d_transformation( xyz_depth = apply_3d_transformation(
seed_3d_depth, 'DEPTH', img_meta, reverse=True) seed_3d_depth, 'DEPTH', img_meta, reverse=True)
# then convert from depth coords to camera coords # project points from depth to image
xyz_cam = Coord3DMode.convert_point( depth2img = xyz_depth.new_tensor(img_meta['depth2img'])
xyz_depth, uvz_origin = points_cam2img(xyz_depth, depth2img, True)
Coord3DMode.DEPTH, z_cam = uvz_origin[..., 2]
Coord3DMode.CAM, uv_origin = (uvz_origin[..., :2] - 1).round()
rt_mat=calibs['Rt'][i])
# project to 2d to get image coords (uv)
uv_origin = points_cam2img(xyz_cam, calibs['K'][i])
uv_origin = (uv_origin - 1).round()
# rescale 2d coordinates and bboxes # rescale 2d coordinates and bboxes
uv_rescaled = coord_2d_transform(img_meta, uv_origin, True) uv_rescaled = coord_2d_transform(img_meta, uv_origin, True)
...@@ -113,22 +106,12 @@ class VoteFusion(nn.Module): ...@@ -113,22 +106,12 @@ class VoteFusion(nn.Module):
seed_3d_expanded = seed_3d_depth.view(seed_num, 1, -1).expand( seed_3d_expanded = seed_3d_depth.view(seed_num, 1, -1).expand(
-1, bbox_num, -1) -1, bbox_num, -1)
z_cam = xyz_cam[..., 2:3].view(seed_num, 1, z_cam = z_cam.view(seed_num, 1, 1).expand(-1, bbox_num, -1)
1).expand(-1, bbox_num, -1)
delta_u = delta_u * z_cam / calibs['K'][i, 0, 0]
delta_v = delta_v * z_cam / calibs['K'][i, 0, 0]
imvote = torch.cat( imvote = torch.cat(
[delta_u, delta_v, [delta_u, delta_v,
torch.zeros_like(delta_v)], dim=-1).view(-1, 3) torch.zeros_like(delta_v)], dim=-1).view(-1, 3)
imvote = imvote * z_cam.reshape(-1, 1)
# convert from camera coords to depth coords imvote = imvote @ torch.inverse(depth2img.t())
imvote = Coord3DMode.convert_point(
imvote.view((-1, 3)),
Coord3DMode.CAM,
Coord3DMode.DEPTH,
rt_mat=calibs['Rt'][i])
# apply transformation to lifted imvotes # apply transformation to lifted imvotes
imvote = apply_3d_transformation( imvote = apply_3d_transformation(
......
...@@ -79,7 +79,7 @@ def _generate_sunrgbd_multi_modality_dataset_config(): ...@@ -79,7 +79,7 @@ def _generate_sunrgbd_multi_modality_dataset_config():
type='Collect3D', type='Collect3D',
keys=[ keys=[
'img', 'gt_bboxes', 'gt_labels', 'points', 'gt_bboxes_3d', 'img', 'gt_bboxes', 'gt_labels', 'points', 'gt_bboxes_3d',
'gt_labels_3d', 'calib' 'gt_labels_3d'
]) ])
] ]
modality = dict(use_lidar=True, use_camera=True) modality = dict(use_lidar=True, use_camera=True)
...@@ -158,20 +158,23 @@ def test_getitem(): ...@@ -158,20 +158,23 @@ def test_getitem():
points = data['points']._data points = data['points']._data
gt_bboxes_3d = data['gt_bboxes_3d']._data gt_bboxes_3d = data['gt_bboxes_3d']._data
gt_labels_3d = data['gt_labels_3d']._data gt_labels_3d = data['gt_labels_3d']._data
calib = data['calib']
img = data['img']._data img = data['img']._data
depth2img = data['img_metas']._data['depth2img']
expected_Rt = np.array([[0.97959, 0.012593, -0.20061], expected_rt_mat = np.array([[0.97959, 0.012593, -0.20061],
[0.012593, 0.99223, 0.12377], [0.012593, 0.99223, 0.12377],
[0.20061, -0.12377, 0.97182]]) [0.20061, -0.12377, 0.97182]])
expected_K = np.array([[529.5, 0., 0.], [0., 529.5, 0.], [365., 265., 1.]]) expected_k_mat = np.array([[529.5, 0., 0.], [0., 529.5, 0.],
[365., 265., 1.]])
rt_mat = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]
]) @ expected_rt_mat.transpose(1, 0)
expected_depth2img = expected_k_mat @ rt_mat
assert torch.allclose(points, expected_points, 1e-2) assert torch.allclose(points, expected_points, 1e-2)
assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3) assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)
assert np.all(gt_labels_3d.numpy() == expected_gt_labels) assert np.all(gt_labels_3d.numpy() == expected_gt_labels)
assert img.shape[:] == (3, 608, 832) assert img.shape[:] == (3, 608, 832)
assert np.allclose(calib['Rt'], expected_Rt) assert np.allclose(depth2img, expected_depth2img)
assert np.allclose(calib['K'], expected_K)
def test_evaluate(): def test_evaluate():
...@@ -295,7 +298,7 @@ def test_show(): ...@@ -295,7 +298,7 @@ def test_show():
type='DefaultFormatBundle3D', type='DefaultFormatBundle3D',
class_names=class_names, class_names=class_names,
with_label=False), with_label=False),
dict(type='Collect3D', keys=['points', 'img', 'calib']) dict(type='Collect3D', keys=['points', 'img'])
] ]
tmp_dir = tempfile.TemporaryDirectory() tmp_dir = tempfile.TemporaryDirectory()
temp_dir = tmp_dir.name temp_dir = tmp_dir.name
......
...@@ -32,16 +32,16 @@ def test_vote_fusion(): ...@@ -32,16 +32,16 @@ def test_vote_fusion():
'transformation_3d_flow': ['HF', 'R', 'S', 'T'] 'transformation_3d_flow': ['HF', 'R', 'S', 'T']
} }
calibs = { rt_mat = torch.tensor([[0.979570, 0.047954, -0.195330],
'Rt': [0.047954, 0.887470, 0.458370],
torch.tensor([[[0.979570, 0.047954, -0.195330], [0.195330, -0.458370, 0.867030]])
[0.047954, 0.887470, 0.458370], k_mat = torch.tensor([[529.5000, 0.0000, 365.0000],
[0.195330, -0.458370, 0.867030]]]), [0.0000, 529.5000, 265.0000],
'K': [0.0000, 0.0000, 1.0000]])
torch.tensor([[[529.5000, 0.0000, 365.0000], rt_mat = rt_mat.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]
[0.0000, 529.5000, 265.0000], [0.0000, 0.0000, ]) @ rt_mat.transpose(1, 0)
1.0000]]]) depth2img = k_mat @ rt_mat
} img_meta['depth2img'] = depth2img
bboxes = torch.tensor([[[ bboxes = torch.tensor([[[
5.4286e+02, 9.8283e+01, 6.1700e+02, 1.6742e+02, 9.7922e-01, 3.0000e+00 5.4286e+02, 9.8283e+01, 6.1700e+02, 1.6742e+02, 9.7922e-01, 3.0000e+00
...@@ -309,12 +309,12 @@ def test_vote_fusion(): ...@@ -309,12 +309,12 @@ def test_vote_fusion():
]]]) ]]])
fusion = VoteFusion() fusion = VoteFusion()
out1, out2 = fusion(imgs, bboxes, seeds_3d, [img_meta], calibs) out1, out2 = fusion(imgs, bboxes, seeds_3d, [img_meta])
assert torch.allclose(expected_tensor1, out1[:, :, :15], 1e-3) assert torch.allclose(expected_tensor1, out1[:, :, :15], 1e-3)
assert torch.allclose(expected_tensor2.float(), out2.float(), 1e-3) assert torch.allclose(expected_tensor2.float(), out2.float(), 1e-3)
assert torch.allclose(expected_tensor3, out1[:, :, 30:45], 1e-3) assert torch.allclose(expected_tensor3, out1[:, :, 30:45], 1e-3)
out1, out2 = fusion(imgs, bboxes[:, :2], seeds_3d, [img_meta], calibs) out1, out2 = fusion(imgs, bboxes[:, :2], seeds_3d, [img_meta])
out1 = out1[:, :15, 30:45] out1 = out1[:, :15, 30:45]
out2 = out2[:, 30:45].float() out2 = out2[:, 30:45].float()
assert torch.allclose(torch.zeros_like(out1), out1, 1e-3) assert torch.allclose(torch.zeros_like(out1), out1, 1e-3)
......
...@@ -90,12 +90,16 @@ def test_show_result_meshlab(): ...@@ -90,12 +90,16 @@ def test_show_result_meshlab():
torch.tensor( torch.tensor(
[[-1.1580, 3.3041, -0.9961, 0.3829, 0.4647, 0.5574, 1.1213]])) [[-1.1580, 3.3041, -0.9961, 0.3829, 0.4647, 0.5574, 1.1213]]))
img = np.random.randn(1, 3, 608, 832) img = np.random.randn(1, 3, 608, 832)
K = np.array([[[529.5000, 0.0000, 365.0000], [0.0000, 529.5000, 265.0000], k_mat = np.array([[529.5000, 0.0000, 365.0000],
[0.0000, 0.0000, 1.0000]]]) [0.0000, 529.5000, 265.0000], [0.0000, 0.0000, 1.0000]])
Rt = torch.tensor([[[0.9980, 0.0058, -0.0634], [0.0058, 0.9835, 0.1808], rt_mat = np.array([[0.9980, 0.0058, -0.0634], [0.0058, 0.9835, 0.1808],
[0.0634, -0.1808, 0.9815]]]) [0.0634, -0.1808, 0.9815]])
rt_mat = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]]) @ rt_mat.transpose(
1, 0)
depth2img = k_mat @ rt_mat
img_meta = dict( img_meta = dict(
filename=filename, filename=filename,
depth2img=depth2img,
pcd_horizontal_flip=False, pcd_horizontal_flip=False,
pcd_vertical_flip=False, pcd_vertical_flip=False,
box_mode_3d=Box3DMode.DEPTH, box_mode_3d=Box3DMode.DEPTH,
...@@ -104,12 +108,8 @@ def test_show_result_meshlab(): ...@@ -104,12 +108,8 @@ def test_show_result_meshlab():
pcd_scale_factor=1.0, pcd_scale_factor=1.0,
pts_filename=pcd, pts_filename=pcd,
transformation_3d_flow=['R', 'S', 'T']) transformation_3d_flow=['R', 'S', 'T'])
calib = dict(K=K, Rt=Rt)
data = dict( data = dict(
points=[[torch.tensor(points)]], points=[[torch.tensor(points)]], img_metas=[[img_meta]], img=[img])
img_metas=[[img_meta]],
img=[img],
calib=[calib])
result = [dict(boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d)] result = [dict(boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d)]
tmp_dir = tempfile.TemporaryDirectory() tmp_dir = tempfile.TemporaryDirectory()
temp_out_dir = tmp_dir.name temp_out_dir = tmp_dir.name
......
...@@ -191,6 +191,8 @@ def test_points_conversion(): ...@@ -191,6 +191,8 @@ def test_points_conversion():
depth_points.tensor[:, 3:] depth_points.tensor[:, 3:]
], ],
dim=1) dim=1)
mat = rt_mat_provided.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
rt_mat_provided = mat @ rt_mat_provided.transpose(1, 0)
cam_point_tensor_new = Coord3DMode.convert_point( cam_point_tensor_new = Coord3DMode.convert_point(
depth_points_new, depth_points_new,
Coord3DMode.DEPTH, Coord3DMode.DEPTH,
......
...@@ -141,7 +141,7 @@ def show_proj_bbox_img(idx, ...@@ -141,7 +141,7 @@ def show_proj_bbox_img(idx,
img, img,
gt_bboxes, gt_bboxes,
None, None,
example['calib'], None,
out_dir, out_dir,
filename, filename,
box_mode='depth', box_mode='depth',
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment