"vscode:/vscode.git/clone" did not exist on "ccae9dbdeb71ccb2809bc41b10dd3440ee1d037a"
Unverified Commit c7659a12 authored by Danila Rukhovich's avatar Danila Rukhovich Committed by GitHub
Browse files

[Enhance] Update PointFusion (#791)

* update point fusion

* remove LIDAR hardcode

* move get_proj_mat_by_coord_type to utils

* fix lint

* remove todo

* fix lint
parent fc301b98
......@@ -5,12 +5,13 @@ from .cam_box3d import CameraInstance3DBoxes
from .coord_3d_mode import Coord3DMode
from .depth_box3d import DepthInstance3DBoxes
from .lidar_box3d import LiDARInstance3DBoxes
from .utils import (get_box_type, limit_period, mono_cam_box2vis,
points_cam2img, rotation_3d_in_axis, xywhr2xyxyr)
from .utils import (get_box_type, get_proj_mat_by_coord_type, limit_period,
mono_cam_box2vis, points_cam2img, rotation_3d_in_axis,
xywhr2xyxyr)
__all__ = [
'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes',
'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr',
'get_box_type', 'rotation_3d_in_axis', 'limit_period', 'points_cam2img',
'Coord3DMode', 'mono_cam_box2vis'
'Coord3DMode', 'mono_cam_box2vis', 'get_proj_mat_by_coord_type'
]
......@@ -195,3 +195,20 @@ def mono_cam_box2vis(cam_box):
cam_box, box_dim=cam_box.shape[-1], origin=(0.5, 0.5, 0.5))
return cam_box
def get_proj_mat_by_coord_type(img_meta, coord_type):
"""Obtain image features using points.
Args:
img_meta (dict): Meta info.
coord_type (str): 'DEPTH' or 'CAMERA' or 'LIDAR'.
Can be case-insensitive.
Returns:
torch.Tensor: transformation matrix.
"""
coord_type = coord_type.upper()
mapping = {'LIDAR': 'lidar2img', 'DEPTH': 'depth2img', 'CAMERA': 'cam2img'}
assert coord_type in mapping.keys()
return img_meta[mapping[coord_type]]
......@@ -61,7 +61,8 @@ class ImVoxelNet(BaseDetector):
img_meta,
img_features=feature[None, ...],
points=points,
lidar2img_rt=points.new_tensor(img_meta['lidar2img']),
proj_mat=points.new_tensor(img_meta['lidar2img']),
coord_type='LIDAR',
img_scale_factor=img_scale_factor,
img_crop_offset=img_crop_offset,
img_flip=img_flip,
......
......@@ -5,12 +5,12 @@ from functools import partial
from mmdet3d.core.points import get_points_type
def apply_3d_transformation(pcd, coords_type, img_meta, reverse=False):
def apply_3d_transformation(pcd, coord_type, img_meta, reverse=False):
"""Apply transformation to input point cloud.
Args:
pcd (torch.Tensor): The point cloud to be transformed.
coords_type (str): 'DEPTH' or 'CAMERA' or 'LIDAR'
coord_type (str): 'DEPTH' or 'CAMERA' or 'LIDAR'.
img_meta(dict): Meta info regarding data transformation.
reverse (bool): Reversed transformation or not.
......@@ -54,7 +54,7 @@ def apply_3d_transformation(pcd, coords_type, img_meta, reverse=False):
if 'transformation_3d_flow' in img_meta else []
pcd = pcd.clone() # prevent inplace modification
pcd = get_points_type(coords_type)(pcd)
pcd = get_points_type(coord_type)(pcd)
horizontal_flip_func = partial(pcd.flip, bev_direction='horizontal') \
if pcd_horizontal_flip else lambda: None
......
......@@ -5,15 +5,17 @@ from mmcv.runner import BaseModule
from torch import nn as nn
from torch.nn import functional as F
from mmdet3d.core.bbox.structures import (get_proj_mat_by_coord_type,
points_cam2img)
from ..builder import FUSION_LAYERS
from . import apply_3d_transformation
def point_sample(
img_meta,
def point_sample(img_meta,
img_features,
points,
lidar2img_rt,
proj_mat,
coord_type,
img_scale_factor,
img_crop_offset,
img_flip,
......@@ -21,15 +23,15 @@ def point_sample(
img_shape,
aligned=True,
padding_mode='zeros',
align_corners=True,
):
align_corners=True):
"""Obtain image features using points.
Args:
img_meta (dict): Meta info.
img_features (torch.Tensor): 1 x C x H x W image features.
points (torch.Tensor): Nx3 point cloud in LiDAR coordinates.
lidar2img_rt (torch.Tensor): 4x4 transformation matrix.
proj_mat (torch.Tensor): 4x4 transformation matrix.
coord_type (str): 'DEPTH' or 'CAMERA' or 'LIDAR'.
img_scale_factor (torch.Tensor): Scale factor with shape of \
(w_scale, h_scale).
img_crop_offset (torch.Tensor): Crop offset used to crop \
......@@ -51,19 +53,11 @@ def point_sample(
"""
# apply transformation based on info in img_meta
points = apply_3d_transformation(points, 'LIDAR', img_meta, reverse=True)
# project points from velo coordinate to camera coordinate
num_points = points.shape[0]
pts_4d = torch.cat([points, points.new_ones(size=(num_points, 1))], dim=-1)
pts_2d = pts_4d @ lidar2img_rt.t()
# cam_points is Tensor of Nx4 whose last column is 1
# transform camera coordinate to image coordinate
points = apply_3d_transformation(
points, coord_type, img_meta, reverse=True)
pts_2d[:, 2] = torch.clamp(pts_2d[:, 2], min=1e-5)
pts_2d[:, 0] /= pts_2d[:, 2]
pts_2d[:, 1] /= pts_2d[:, 2]
# project points to camera coordinate
pts_2d = points_cam2img(points, proj_mat)
# img transformation: scale -> crop -> flip
# the image is resized by img_scale_factor
......@@ -108,6 +102,8 @@ class PointFusion(BaseModule):
mid_channels (int): Channels of middle layers
out_channels (int): Channels of output fused features
img_levels (int, optional): Number of image levels. Defaults to 3.
coord_type (str): 'DEPTH' or 'CAMERA' or 'LIDAR'.
Defaults to 'LIDAR'.
conv_cfg (dict, optional): Dict config of conv layers of middle
layers. Defaults to None.
norm_cfg (dict, optional): Dict config of norm layers of middle
......@@ -137,6 +133,7 @@ class PointFusion(BaseModule):
mid_channels,
out_channels,
img_levels=3,
coord_type='LIDAR',
conv_cfg=None,
norm_cfg=None,
act_cfg=None,
......@@ -158,6 +155,7 @@ class PointFusion(BaseModule):
assert len(img_channels) == len(img_levels)
self.img_levels = img_levels
self.coord_type = coord_type
self.act_cfg = act_cfg
self.activate_out = activate_out
self.fuse_out = fuse_out
......@@ -289,13 +287,15 @@ class PointFusion(BaseModule):
img_crop_offset = (
pts.new_tensor(img_meta['img_crop_offset'])
if 'img_crop_offset' in img_meta.keys() else 0)
proj_mat = get_proj_mat_by_coord_type(img_meta, self.coord_type)
img_pts = point_sample(
img_meta,
img_feats,
pts,
pts.new_tensor(img_meta['lidar2img']),
img_scale_factor,
img_crop_offset,
img_meta=img_meta,
img_features=img_feats,
points=pts,
proj_mat=pts.new_tensor(proj_mat),
coord_type=self.coord_type,
img_scale_factor=img_scale_factor,
img_crop_offset=img_crop_offset,
img_flip=img_flip,
img_pad_shape=img_meta['input_shape'][:2],
img_shape=img_meta['img_shape'][:2],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment