"llama/llama.cpp/src/llama-grammar.cpp" did not exist on "7a81daf02696ba1be3450878c48da78dcfcc3826"
Commit d1aac35d authored by zhangwenwei's avatar zhangwenwei
Browse files

Initial commit

parents
from .point_fusion import PointFusion
__all__ = ['PointFusion']
import torch
import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import xavier_init
from mmdet3d.models.utils import ConvModule
from ..plugins import NonLocal2D
from ..registry import FUSION_LAYERS
def point_sample(
img_features,
points,
lidar2img_rt,
pcd_rotate_mat,
img_scale_factor,
img_crop_offset,
pcd_trans_factor,
pcd_scale_factor,
pcd_flip,
img_flip,
img_pad_shape,
img_shape,
aligned=True,
padding_mode='zeros',
align_corners=True,
):
"""sample image features using point coordinates
Arguments:
img_features (Tensor): 1xCxHxW image features
points (Tensor): Nx3 point cloud coordinates
P (Tensor): 4x4 transformation matrix
scale_factor (Tensor): scale_factor of images
img_pad_shape (int, int): int tuple indicates the h & w after padding,
this is necessary to obtain features in feature map
img_shape (int, int): int tuple indicates the h & w before padding
after scaling, this is necessary for flipping coordinates
return:
(Tensor): NxC image features sampled by point coordinates
"""
# aug order: flip -> trans -> scale -> rot
# The transformation follows the augmentation order in data pipeline
if pcd_flip:
# if the points are flipped, flip them back first
points[:, 1] = -points[:, 1]
points -= pcd_trans_factor
# the points should be scaled to the original scale in velo coordinate
points /= pcd_scale_factor
# the points should be rotated back
# pcd_rotate_mat @ pcd_rotate_mat.inverse() is not exactly an identity
# matrix, use angle to create the inverse rot matrix neither.
points = points @ pcd_rotate_mat.inverse()
# project points from velo coordinate to camera coordinate
num_points = points.shape[0]
pts_4d = torch.cat([points, points.new_ones(size=(num_points, 1))], dim=-1)
pts_2d = pts_4d @ lidar2img_rt.t()
# cam_points is Tensor of Nx4 whose last column is 1
# transform camera coordinate to image coordinate
pts_2d[:, 2] = torch.clamp(pts_2d[:, 2], min=1e-5)
pts_2d[:, 0] /= pts_2d[:, 2]
pts_2d[:, 1] /= pts_2d[:, 2]
# img transformation: scale -> crop -> flip
# the image is resized by img_scale_factor
img_coors = pts_2d[:, 0:2] * img_scale_factor # Nx2
img_coors -= img_crop_offset
# grid sample, the valid grid range should be in [-1,1]
coor_x, coor_y = torch.split(img_coors, 1, dim=1) # each is Nx1
if img_flip:
# by default we take it as horizontal flip
# use img_shape before padding for flip
orig_h, orig_w = img_shape
coor_x = orig_w - coor_x
h, w = img_pad_shape
coor_y = coor_y / h * 2 - 1
coor_x = coor_x / w * 2 - 1
grid = torch.cat([coor_x, coor_y],
dim=1).unsqueeze(0).unsqueeze(0) # Nx2 -> 1x1xNx2
# align_corner=True provides higher performance
mode = 'bilinear' if aligned else 'nearest'
point_features = F.grid_sample(
img_features,
grid,
mode=mode,
padding_mode=padding_mode,
align_corners=align_corners) # 1xCx1xN feats
return point_features.squeeze().t()
@FUSION_LAYERS.register_module
class PointFusion(nn.Module):
"""Fuse image features from fused single scale features
"""
def __init__(self,
img_channels,
pts_channels,
mid_channels,
out_channels,
img_levels=3,
conv_cfg=None,
norm_cfg=None,
activation=None,
activate_out=True,
fuse_out=False,
refine_type=None,
dropout_ratio=0,
aligned=True,
align_corners=True,
padding_mode='zeros',
lateral_conv=True):
super(PointFusion, self).__init__()
if isinstance(img_levels, int):
img_levels = [img_levels]
if isinstance(img_channels, int):
img_channels = [img_channels] * len(img_levels)
assert isinstance(img_levels, list)
assert isinstance(img_channels, list)
assert len(img_channels) == len(img_levels)
self.img_levels = img_levels
self.activation = activation
self.activate_out = activate_out
self.fuse_out = fuse_out
self.refine_type = refine_type
self.dropout_ratio = dropout_ratio
self.img_channels = img_channels
self.aligned = aligned
self.align_corners = align_corners
self.padding_mode = padding_mode
self.lateral_convs = None
if lateral_conv:
self.lateral_convs = nn.ModuleList()
for i in range(len(img_channels)):
l_conv = ConvModule(
img_channels[i],
mid_channels,
3,
padding=1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
activation=self.activation,
inplace=False)
self.lateral_convs.append(l_conv)
self.img_transform = nn.Sequential(
nn.Linear(mid_channels * len(img_channels), out_channels),
nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
)
else:
self.img_transform = nn.Sequential(
nn.Linear(sum(img_channels), out_channels),
nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
)
self.pts_transform = nn.Sequential(
nn.Linear(pts_channels, out_channels),
nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
)
if self.fuse_out:
self.fuse_conv = nn.Sequential(
nn.Linear(mid_channels, out_channels),
# For pts the BN is initialized differently by default
# TODO: check whether this is necessary
nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
nn.ReLU(inplace=False))
if self.refine_type == 'non_local':
self.refine = NonLocal2D(
out_channels,
reduction=1,
use_scale=False,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg)
self.init_weights()
# default init_weights for conv(msra) and norm in ConvModule
def init_weights(self):
for m in self.modules():
if isinstance(m, (nn.Conv2d, nn.Linear)):
xavier_init(m, distribution='uniform')
def forward(self, img_feats, pts, pts_feats, img_meta):
"""
img_feats (List[Tensor]): img features
pts: [List[Tensor]]: a batch of points with shape Nx3
pts_feats (Tensor): a tensor consist of point features of the
total batch
"""
img_pts = self.obtain_mlvl_feats(img_feats, pts, img_meta)
img_pre_fuse = self.img_transform(img_pts)
if self.training and self.dropout_ratio > 0:
img_pre_fuse = F.dropout(img_pre_fuse, self.dropout_ratio)
pts_pre_fuse = self.pts_transform(pts_feats)
fuse_out = img_pre_fuse + pts_pre_fuse
if self.activate_out:
fuse_out = F.relu(fuse_out)
if self.fuse_out:
fuse_out = self.fuse_conv(fuse_out)
if self.refine_type is not None:
fuse_out_T = fuse_out.t()[None, ..., None] # NxC -> 1xCxNx1
batch_idx = 0
attentive = []
for i in range(len(pts)):
end_idx = batch_idx + len(pts[i])
attentive.append(
self.refine(fuse_out_T[:, :, batch_idx:end_idx]))
batch_idx = end_idx
fuse_out = torch.cat(attentive, dim=-2).squeeze().t()
return fuse_out
def obtain_mlvl_feats(self, img_feats, pts, img_meta):
if self.lateral_convs is not None:
img_ins = [
lateral_conv(img_feats[i])
for i, lateral_conv in zip(self.img_levels, self.lateral_convs)
]
else:
img_ins = img_feats
img_feats_per_point = []
# Sample multi-level features
for i in range(len(img_meta)):
mlvl_img_feats = []
for level in range(len(self.img_levels)):
if torch.isnan(img_ins[level][i:i + 1]).any():
import pdb
pdb.set_trace()
mlvl_img_feats.append(
self.sample_single(img_ins[level][i:i + 1], pts[i][:, :3],
img_meta[i]))
mlvl_img_feats = torch.cat(mlvl_img_feats, dim=-1)
img_feats_per_point.append(mlvl_img_feats)
img_pts = torch.cat(img_feats_per_point, dim=0)
return img_pts
def sample_single(self, img_feats, pts, img_meta):
pcd_scale_factor = (
img_meta['pcd_scale_factor']
if 'pcd_scale_factor' in img_meta.keys() else 1)
pcd_trans_factor = (
pts.new_tensor(img_meta['pcd_trans'])
if 'pcd_trans' in img_meta.keys() else 0)
pcd_rotate_mat = (
pts.new_tensor(img_meta['pcd_rotation'])
if 'pcd_rotation' in img_meta.keys() else
torch.eye(3).type_as(pts).to(pts.device))
img_scale_factor = (
img_meta['scale_factor']
if 'scale_factor' in img_meta.keys() else 1)
pcd_flip = img_meta['pcd_flip'] if 'pcd_flip' in img_meta.keys(
) else False
img_flip = img_meta['flip'] if 'flip' in img_meta.keys() else False
img_crop_offset = (
pts.new_tensor(img_meta['img_crop_offset'])
if 'img_crop_offset' in img_meta.keys() else 0)
img_pts = point_sample(
img_feats,
pts,
pts.new_tensor(img_meta['lidar2img']),
pcd_rotate_mat,
img_scale_factor,
img_crop_offset,
pcd_trans_factor,
pcd_scale_factor,
pcd_flip=pcd_flip,
img_flip=img_flip,
img_pad_shape=img_meta['pad_shape'][:2],
img_shape=img_meta['img_shape'][:2],
aligned=self.aligned,
padding_mode=self.padding_mode,
align_corners=self.align_corners,
)
return img_pts
from mmdet.models.losses import FocalLoss, SmoothL1Loss
__all__ = ['FocalLoss', 'SmoothL1Loss']
from .pillar_scatter import PointPillarsScatter
from .sparse_encoder import SparseEncoder
__all__ = ['PointPillarsScatter', 'SparseEncoder']
import torch
from torch import nn
from ..registry import MIDDLE_ENCODERS
@MIDDLE_ENCODERS.register_module
class PointPillarsScatter(nn.Module):
def __init__(self, in_channels, output_shape):
"""
Point Pillar's Scatter.
Converts learned features from dense tensor to sparse pseudo image.
Args:
output_shape (list[int]): Required output shape of features.
in_channels (int): Number of input features.
"""
super().__init__()
self.name = 'PointPillarsScatter'
self.output_shape = output_shape
self.ny = output_shape[0]
self.nx = output_shape[1]
self.nchannels = in_channels
def forward(self, voxel_features, coors, batch_size=None):
# TODO: rewrite the function in a batch manner
# no need to deal with different batch cases
if batch_size is not None:
return self.forward_batch(voxel_features, coors, batch_size)
else:
return self.forward_single(voxel_features, coors)
def forward_single(self, voxel_features, coors):
# Create the canvas for this sample
canvas = torch.zeros(
self.nchannels,
self.nx * self.ny,
dtype=voxel_features.dtype,
device=voxel_features.device)
indices = coors[:, 1] * self.nx + coors[:, 2]
indices = indices.long()
voxels = voxel_features.t()
# Now scatter the blob back to the canvas.
canvas[:, indices] = voxels
# Undo the column stacking to final 4-dim tensor
canvas = canvas.view(1, self.nchannels, self.ny, self.nx)
return [canvas]
def forward_batch(self, voxel_features, coors, batch_size):
# batch_canvas will be the final output.
batch_canvas = []
for batch_itt in range(batch_size):
# Create the canvas for this sample
canvas = torch.zeros(
self.nchannels,
self.nx * self.ny,
dtype=voxel_features.dtype,
device=voxel_features.device)
# Only include non-empty pillars
batch_mask = coors[:, 0] == batch_itt
this_coors = coors[batch_mask, :]
indices = this_coors[:, 2] * self.nx + this_coors[:, 3]
indices = indices.type(torch.long)
voxels = voxel_features[batch_mask, :]
voxels = voxels.t()
# Now scatter the blob back to the canvas.
canvas[:, indices] = voxels
# Append to a list for later stacking.
batch_canvas.append(canvas)
# Stack to 3-dim tensor (batch-size, nchannels, nrows*ncols)
batch_canvas = torch.stack(batch_canvas, 0)
# Undo the column stacking to final 4-dim tensor
batch_canvas = batch_canvas.view(batch_size, self.nchannels, self.ny,
self.nx)
return batch_canvas
import torch.nn as nn
import mmdet3d.ops.spconv as spconv
from ..registry import MIDDLE_ENCODERS
from ..utils import build_norm_layer
@MIDDLE_ENCODERS.register_module
class SparseEncoder(nn.Module):
def __init__(self,
in_channels,
output_shape,
pre_act,
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01)):
super().__init__()
self.sparse_shape = output_shape
self.output_shape = output_shape
self.in_channels = in_channels
self.pre_act = pre_act
# Spconv init all weight on its own
# TODO: make the network could be modified
if pre_act:
self.conv_input = spconv.SparseSequential(
spconv.SubMConv3d(
in_channels,
16,
3,
padding=1,
bias=False,
indice_key='subm1'), )
block = self.pre_act_block
else:
norm_name, norm_layer = build_norm_layer(norm_cfg, 16)
self.conv_input = spconv.SparseSequential(
spconv.SubMConv3d(
in_channels,
16,
3,
padding=1,
bias=False,
indice_key='subm1'),
norm_layer,
nn.ReLU(),
)
block = self.post_act_block
self.conv1 = spconv.SparseSequential(
block(16, 16, 3, norm_cfg=norm_cfg, padding=1,
indice_key='subm1'), )
self.conv2 = spconv.SparseSequential(
# [1600, 1408, 41] -> [800, 704, 21]
block(
16,
32,
3,
norm_cfg=norm_cfg,
stride=2,
padding=1,
indice_key='spconv2',
conv_type='spconv'),
block(32, 32, 3, norm_cfg=norm_cfg, padding=1, indice_key='subm2'),
block(32, 32, 3, norm_cfg=norm_cfg, padding=1, indice_key='subm2'),
)
self.conv3 = spconv.SparseSequential(
# [800, 704, 21] -> [400, 352, 11]
block(
32,
64,
3,
norm_cfg=norm_cfg,
stride=2,
padding=1,
indice_key='spconv3',
conv_type='spconv'),
block(64, 64, 3, norm_cfg=norm_cfg, padding=1, indice_key='subm3'),
block(64, 64, 3, norm_cfg=norm_cfg, padding=1, indice_key='subm3'),
)
self.conv4 = spconv.SparseSequential(
# [400, 352, 11] -> [200, 176, 5]
block(
64,
64,
3,
norm_cfg=norm_cfg,
stride=2,
padding=(0, 1, 1),
indice_key='spconv4',
conv_type='spconv'),
block(64, 64, 3, norm_cfg=norm_cfg, padding=1, indice_key='subm4'),
block(64, 64, 3, norm_cfg=norm_cfg, padding=1, indice_key='subm4'),
)
norm_name, norm_layer = build_norm_layer(norm_cfg, 128)
self.conv_out = spconv.SparseSequential(
# [200, 176, 5] -> [200, 176, 2]
spconv.SparseConv3d(
128,
128, (3, 1, 1),
stride=(2, 1, 1),
padding=0,
bias=False,
indice_key='spconv_down2'),
norm_layer,
nn.ReLU(),
)
def forward(self, voxel_features, coors, batch_size):
"""
:param voxel_features: (N, C)
:param coors: (N, 4) [batch_idx, z_idx, y_idx, x_idx]
:param batch_size:
:return:
"""
coors = coors.int()
input_sp_tensor = spconv.SparseConvTensor(voxel_features, coors,
self.sparse_shape,
batch_size)
x = self.conv_input(input_sp_tensor)
x_conv1 = self.conv1(x)
x_conv2 = self.conv2(x_conv1)
x_conv3 = self.conv3(x_conv2)
x_conv4 = self.conv4(x_conv3)
# for detection head
# [200, 176, 5] -> [200, 176, 2]
out = self.conv_out(x_conv4)
spatial_features = out.dense()
N, C, D, H, W = spatial_features.shape
spatial_features = spatial_features.view(N, C * D, H, W)
return spatial_features
def pre_act_block(self,
in_channels,
out_channels,
kernel_size,
indice_key=None,
stride=1,
padding=0,
conv_type='subm',
norm_cfg=None):
norm_name, norm_layer = build_norm_layer(norm_cfg, in_channels)
if conv_type == 'subm':
m = spconv.SparseSequential(
norm_layer,
nn.ReLU(inplace=True),
spconv.SubMConv3d(
in_channels,
out_channels,
kernel_size,
padding=padding,
bias=False,
indice_key=indice_key),
)
elif conv_type == 'spconv':
m = spconv.SparseSequential(
norm_layer,
nn.ReLU(inplace=True),
spconv.SparseConv3d(
in_channels,
out_channels,
kernel_size,
stride=stride,
padding=padding,
bias=False,
indice_key=indice_key),
)
else:
raise NotImplementedError
return m
def post_act_block(self,
in_channels,
out_channels,
kernel_size,
indice_key,
stride=1,
padding=0,
conv_type='subm',
norm_cfg=None):
norm_name, norm_layer = build_norm_layer(norm_cfg, out_channels)
if conv_type == 'subm':
m = spconv.SparseSequential(
spconv.SubMConv3d(
in_channels,
out_channels,
kernel_size,
bias=False,
indice_key=indice_key),
norm_layer,
nn.ReLU(inplace=True),
)
elif conv_type == 'spconv':
m = spconv.SparseSequential(
spconv.SparseConv3d(
in_channels,
out_channels,
kernel_size,
stride=stride,
padding=padding,
bias=False,
indice_key=indice_key),
norm_layer,
nn.ReLU(inplace=True),
)
else:
raise NotImplementedError
return m
from mmdet.models.necks.fpn import FPN
from .second_fpn import SECONDFPN
__all__ = ['FPN', 'SECONDFPN']
import logging
from functools import partial
import torch
import torch.nn as nn
from mmcv.cnn import constant_init, kaiming_init
from mmcv.runner import load_checkpoint
from torch.nn import Sequential
from torch.nn.modules.batchnorm import _BatchNorm
from .. import builder
from ..registry import NECKS
from ..utils import build_norm_layer
class Empty(nn.Module):
def __init__(self, *args, **kwargs):
super(Empty, self).__init__()
def forward(self, *args, **kwargs):
if len(args) == 1:
return args[0]
elif len(args) == 0:
return None
return args
@NECKS.register_module
class SECONDFPN(nn.Module):
"""Compare with RPN, RPNV2 support arbitrary number of stage.
"""
def __init__(self,
use_norm=True,
in_channels=[128, 128, 256],
upsample_strides=[1, 2, 4],
num_upsample_filters=[256, 256, 256],
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01)):
# if for GroupNorm,
# cfg is dict(type='GN', num_groups=num_groups, eps=1e-3, affine=True)
super(SECONDFPN, self).__init__()
assert len(num_upsample_filters) == len(upsample_strides)
self.in_channels = in_channels
if norm_cfg is not None:
ConvTranspose2d = partial(nn.ConvTranspose2d, bias=False)
else:
ConvTranspose2d = partial(nn.ConvTranspose2d, bias=True)
deblocks = []
for i, num_upsample_filter in enumerate(num_upsample_filters):
norm_layer = (
build_norm_layer(norm_cfg, num_upsample_filter)[1]
if norm_cfg is not None else Empty)
deblock = Sequential(
ConvTranspose2d(
in_channels[i],
num_upsample_filter,
upsample_strides[i],
stride=upsample_strides[i]),
norm_layer,
nn.ReLU(inplace=True),
)
deblocks.append(deblock)
self.deblocks = nn.ModuleList(deblocks)
def init_weights(self, pretrained=None):
if isinstance(pretrained, str):
logger = logging.getLogger()
load_checkpoint(self, pretrained, strict=False, logger=logger)
elif pretrained is None:
# keeping the initiation yields better results
for m in self.modules():
if isinstance(m, nn.Conv2d):
kaiming_init(m)
elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
constant_init(m, 1)
else:
raise TypeError('pretrained must be a str or None')
return
def forward(self, inputs):
assert len(inputs) == len(self.in_channels)
ups = [deblock(inputs[i]) for i, deblock in enumerate(self.deblocks)]
if len(ups) > 1:
x = torch.cat(ups, dim=1)
else:
x = ups[0]
return [x]
@NECKS.register_module
class SECONDFusionFPN(SECONDFPN):
"""Compare with RPN, RPNV2 support arbitrary number of stage.
"""
def __init__(self,
use_norm=True,
in_channels=[128, 128, 256],
upsample_strides=[1, 2, 4],
num_upsample_filters=[256, 256, 256],
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
down_sample_rate=[40, 8, 8],
fusion_layer=None,
cat_points=False):
super(SECONDFusionFPN, self).__init__(
use_norm,
in_channels,
upsample_strides,
num_upsample_filters,
norm_cfg,
)
self.fusion_layer = None
if fusion_layer is not None:
self.fusion_layer = builder.build_fusion_layer(fusion_layer)
self.cat_points = cat_points
self.down_sample_rate = down_sample_rate
def forward(self,
inputs,
coors=None,
points=None,
img_feats=None,
img_meta=None):
assert len(inputs) == len(self.in_channels)
ups = [deblock(inputs[i]) for i, deblock in enumerate(self.deblocks)]
if len(ups) > 1:
x = torch.cat(ups, dim=1)
else:
x = ups[0]
if (self.fusion_layer is not None and img_feats is not None):
downsample_pts_coors = torch.zeros_like(coors)
downsample_pts_coors[:, 0] = coors[:, 0]
downsample_pts_coors[:, 1] = (
coors[:, 1] / self.down_sample_rate[0])
downsample_pts_coors[:, 2] = (
coors[:, 2] / self.down_sample_rate[1])
downsample_pts_coors[:, 3] = (
coors[:, 3] / self.down_sample_rate[2])
# fusion for each point
x = self.fusion_layer(img_feats, points, x, downsample_pts_coors,
img_meta)
return [x]
from mmdet.utils import Registry
VOXEL_ENCODERS = Registry('voxel_encoder')
MIDDLE_ENCODERS = Registry('middle_encoder')
FUSION_LAYERS = Registry('fusion_layer')
from mmdet.models.roi_extractors.single_level import SingleRoIExtractor
__all__ = ['SingleRoIExtractor']
from mmdet.models.utils import ResLayer, bias_init_with_prob
__all__ = ['bias_init_with_prob', 'ResLayer']
import numpy as np
import torch.nn as nn
def xavier_init(module, gain=1, bias=0, distribution='normal'):
assert distribution in ['uniform', 'normal']
if distribution == 'uniform':
nn.init.xavier_uniform_(module.weight, gain=gain)
else:
nn.init.xavier_normal_(module.weight, gain=gain)
if hasattr(module, 'bias'):
nn.init.constant_(module.bias, bias)
def normal_init(module, mean=0, std=1, bias=0):
nn.init.normal_(module.weight, mean, std)
if hasattr(module, 'bias'):
nn.init.constant_(module.bias, bias)
def uniform_init(module, a=0, b=1, bias=0):
nn.init.uniform_(module.weight, a, b)
if hasattr(module, 'bias'):
nn.init.constant_(module.bias, bias)
def kaiming_init(module,
mode='fan_out',
nonlinearity='relu',
bias=0,
distribution='normal'):
assert distribution in ['uniform', 'normal']
if distribution == 'uniform':
nn.init.kaiming_uniform_(
module.weight, mode=mode, nonlinearity=nonlinearity)
else:
nn.init.kaiming_normal_(
module.weight, mode=mode, nonlinearity=nonlinearity)
if hasattr(module, 'bias'):
nn.init.constant_(module.bias, bias)
def bias_init_with_prob(prior_prob):
""" initialize conv/fc bias value according to giving probablity"""
bias_init = float(-np.log((1 - prior_prob) / prior_prob))
return bias_init
from .pillar_encoder import AlignedPillarFeatureNet, PillarFeatureNet
from .voxel_encoder import (DynamicVFE, VoxelFeatureExtractor,
VoxelFeatureExtractorV2, VoxelFeatureExtractorV3)
__all__ = [
'PillarFeatureNet', 'AlignedPillarFeatureNet', 'VoxelFeatureExtractor',
'DynamicVFE', 'VoxelFeatureExtractorV2', 'VoxelFeatureExtractorV3'
]
import torch
from torch import nn
from mmdet3d.ops import DynamicScatter, build_norm_layer
from ..registry import VOXEL_ENCODERS
from .utils import PFNLayer, get_paddings_indicator
@VOXEL_ENCODERS.register_module
class PillarFeatureNet(nn.Module):
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=(64, ),
with_distance=False,
with_cluster_center=True,
with_voxel_center=True,
voxel_size=(0.2, 0.2, 4),
point_cloud_range=(0, -40, -3, 70.4, 40, 1),
mode='max'):
""" Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
num_input_features (int). Number of input features,
either x, y, z or x, y, z, r.
use_norm (bool). Whether to include BatchNorm.
num_filters (list[int]). Number of features in each of the
N PFNLayers.
with_distance (bool). Whether to include Euclidean distance
to points.
voxel_size (list[float]). Size of voxels, only utilize x and y
size.
point_cloud_range (list[float>]). Point cloud range, only
utilize x and y min.
"""
super(PillarFeatureNet, self).__init__()
assert len(num_filters) > 0
if with_cluster_center:
num_input_features += 3
if with_voxel_center:
num_input_features += 2
if with_distance:
num_input_features += 1
self._with_distance = with_distance
self._with_cluster_center = with_cluster_center
self._with_voxel_center = with_voxel_center
# Create PillarFeatureNet layers
self.num_input_features = num_input_features
num_filters = [num_input_features] + list(num_filters)
pfn_layers = []
for i in range(len(num_filters) - 1):
in_filters = num_filters[i]
out_filters = num_filters[i + 1]
if i < len(num_filters) - 2:
last_layer = False
else:
last_layer = True
pfn_layers.append(
PFNLayer(
in_filters,
out_filters,
use_norm,
last_layer=last_layer,
mode=mode))
self.pfn_layers = nn.ModuleList(pfn_layers)
# Need pillar (voxel) size and x/y offset in order to calculate offset
self.vx = voxel_size[0]
self.vy = voxel_size[1]
self.x_offset = self.vx / 2 + point_cloud_range[0]
self.y_offset = self.vy / 2 + point_cloud_range[1]
self.point_cloud_range = point_cloud_range
def forward(self, features, num_points, coors):
features_ls = [features]
# Find distance of x, y, and z from cluster center
if self._with_cluster_center:
points_mean = features[:, :, :3].sum(
dim=1, keepdim=True) / num_points.type_as(features).view(
-1, 1, 1)
f_cluster = features[:, :, :3] - points_mean
features_ls.append(f_cluster)
# Find distance of x, y, and z from pillar center
if self._with_voxel_center:
f_center = features[:, :, :2]
f_center[:, :, 0] = f_center[:, :, 0] - (
coors[:, 3].type_as(features).unsqueeze(1) * self.vx +
self.x_offset)
f_center[:, :, 1] = f_center[:, :, 1] - (
coors[:, 2].type_as(features).unsqueeze(1) * self.vy +
self.y_offset)
features_ls.append(f_center)
if self._with_distance:
points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
features_ls.append(points_dist)
# Combine together feature decorations
features = torch.cat(features_ls, dim=-1)
# The feature decorations were calculated without regard to whether
# pillar was empty. Need to ensure that
# empty pillars remain set to zeros.
voxel_count = features.shape[1]
mask = get_paddings_indicator(num_points, voxel_count, axis=0)
mask = torch.unsqueeze(mask, -1).type_as(features)
features *= mask
for pfn in self.pfn_layers:
features = pfn(features, num_points)
return features.squeeze()
@VOXEL_ENCODERS.register_module
class DynamicPillarFeatureNet(PillarFeatureNet):
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=(64, ),
with_distance=False,
with_cluster_center=True,
with_voxel_center=True,
voxel_size=(0.2, 0.2, 4),
point_cloud_range=(0, -40, -3, 70.4, 40, 1),
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
mode='max'):
"""
Dynamic Pillar Feature Net for Dynamic Voxelization.
The difference is in the forward part
"""
super(DynamicPillarFeatureNet, self).__init__(
num_input_features,
use_norm,
num_filters,
with_distance,
with_cluster_center=with_cluster_center,
with_voxel_center=with_voxel_center,
voxel_size=voxel_size,
point_cloud_range=point_cloud_range,
mode=mode)
num_filters = [self.num_input_features] + list(num_filters)
pfn_layers = []
# TODO: currently only support one PFNLayer
for i in range(len(num_filters) - 1):
in_filters = num_filters[i]
out_filters = num_filters[i + 1]
if i > 0:
in_filters *= 2
norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)
pfn_layers.append(
nn.Sequential(
nn.Linear(in_filters, out_filters, bias=False), norm_layer,
nn.ReLU(inplace=True)))
self.num_pfn = len(pfn_layers)
self.pfn_layers = nn.ModuleList(pfn_layers)
self.pfn_scatter = DynamicScatter(voxel_size, point_cloud_range,
(mode != 'max'))
self.cluster_scatter = DynamicScatter(
voxel_size, point_cloud_range, average_points=True)
def map_voxel_center_to_point(self, pts_coors, voxel_mean, voxel_coors):
# Step 1: scatter voxel into canvas
# Calculate necessary things for canvas creation
canvas_y = int(
(self.point_cloud_range[4] - self.point_cloud_range[1]) / self.vy)
canvas_x = int(
(self.point_cloud_range[3] - self.point_cloud_range[0]) / self.vx)
canvas_channel = voxel_mean.size(1)
batch_size = pts_coors[-1, 0] + 1
canvas_len = canvas_y * canvas_x * batch_size
# Create the canvas for this sample
canvas = voxel_mean.new_zeros(canvas_channel, canvas_len)
# Only include non-empty pillars
indices = (
voxel_coors[:, 0] * canvas_y * canvas_x +
voxel_coors[:, 2] * canvas_x + voxel_coors[:, 3])
# Scatter the blob back to the canvas
canvas[:, indices.long()] = voxel_mean.t()
# Step 2: get voxel mean for each point
voxel_index = (
pts_coors[:, 0] * canvas_y * canvas_x +
pts_coors[:, 2] * canvas_x + pts_coors[:, 3])
center_per_point = canvas[:, voxel_index.long()].t()
return center_per_point
def forward(self, features, coors):
"""
features (torch.Tensor): NxC
coors (torch.Tensor): Nx(1+NDim)
"""
features_ls = [features]
# Find distance of x, y, and z from cluster center
if self._with_cluster_center:
voxel_mean, mean_coors = self.cluster_scatter(features, coors)
points_mean = self.map_voxel_center_to_point(
coors, voxel_mean, mean_coors)
# TODO: maybe also do cluster for reflectivity
f_cluster = features[:, :3] - points_mean[:, :3]
features_ls.append(f_cluster)
# Find distance of x, y, and z from pillar center
if self._with_voxel_center:
f_center = features.new_zeros(size=(features.size(0), 2))
f_center[:, 0] = features[:, 0] - (
coors[:, 3].type_as(features) * self.vx + self.x_offset)
f_center[:, 1] = features[:, 1] - (
coors[:, 2].type_as(features) * self.vy + self.y_offset)
features_ls.append(f_center)
if self._with_distance:
points_dist = torch.norm(features[:, :3], 2, 1, keepdim=True)
features_ls.append(points_dist)
# Combine together feature decorations
features = torch.cat(features_ls, dim=-1)
for i, pfn in enumerate(self.pfn_layers):
point_feats = pfn(features)
voxel_feats, voxel_coors = self.pfn_scatter(point_feats, coors)
if i != len(self.pfn_layers) - 1:
# need to concat voxel feats if it is not the last pfn
feat_per_point = self.map_voxel_center_to_point(
coors, voxel_feats, voxel_coors)
features = torch.cat([point_feats, feat_per_point], dim=1)
return voxel_feats, voxel_coors
@VOXEL_ENCODERS.register_module
class AlignedPillarFeatureNet(nn.Module):
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=(64, ),
with_distance=False,
with_cluster_center=True,
with_voxel_center=True,
voxel_size=(0.2, 0.2, 4),
point_cloud_range=(0, -40, -3, 70.4, 40, 1),
mode='max'):
""" Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
num_input_features (int): Number of input features, either x, y, z
or x, y, z, r.
use_norm (bool): Whether to include BatchNorm.
num_filters (list[int]): Number of features in each of the N
PFNLayers.
with_distance (bool): Whether to include Euclidean distance to
points.
voxel_size (list[float]): Size of voxels, only utilize x and y
size.
point_cloud_range: (list[float]): Point cloud range, only
utilize x and y min.
"""
super(AlignedPillarFeatureNet, self).__init__()
assert len(num_filters) > 0
if with_cluster_center:
print('Use cluster center')
num_input_features += 3
if with_voxel_center:
print('Use voxel center')
num_input_features += 2
if with_distance:
num_input_features += 1
self._with_distance = with_distance
self._with_cluster_center = with_cluster_center
self._with_voxel_center = with_voxel_center
# Create PillarFeatureNet layers
num_filters = [num_input_features] + list(num_filters)
pfn_layers = []
for i in range(len(num_filters) - 1):
in_filters = num_filters[i]
out_filters = num_filters[i + 1]
if i < len(num_filters) - 2:
last_layer = False
else:
last_layer = True
pfn_layers.append(
PFNLayer(
in_filters,
out_filters,
use_norm,
last_layer=last_layer,
mode=mode))
self.pfn_layers = nn.ModuleList(pfn_layers)
# Need pillar (voxel) size and x/y offset in order to
# calculate pillar offset
self.vx = voxel_size[0]
self.vy = voxel_size[1]
self.vz = voxel_size[2]
self.x_offset = self.vx / 2 + point_cloud_range[0]
self.y_offset = self.vy / 2 + point_cloud_range[1]
self.z_offset = self.vz / 2 + point_cloud_range[2]
def forward(self, features, num_points, coors):
features_ls = [features]
# Find distance of x, y, and z from cluster center
if self._with_cluster_center:
points_mean = features[:, :, :3].sum(
dim=1, keepdim=True) / num_points.type_as(features).view(
-1, 1, 1)
f_cluster = features[:, :, :3] - points_mean
features_ls.append(f_cluster)
x_distance = features[:, :, 0] - (
coors[:, 3].type_as(features).unsqueeze(1) * self.vx +
self.x_offset)
y_distance = features[:, :, 1] - (
coors[:, 2].type_as(features).unsqueeze(1) * self.vy +
self.y_offset)
z_distance = features[:, :, 2] - (
coors[:, 1].type_as(features).unsqueeze(1) * self.vz +
self.z_offset)
normed_x_distance = 1 - torch.abs(x_distance / self.vx)
normed_y_distance = 1 - torch.abs(y_distance / self.vy)
normed_z_distance = 1 - torch.abs(z_distance / self.vz)
x_mask = torch.gt(normed_x_distance, 0).type_as(features)
y_mask = torch.gt(normed_y_distance, 0).type_as(features)
z_mask = torch.gt(normed_z_distance, 0).type_as(features)
nonzero_points_mask = x_mask.mul(y_mask).mul(z_mask)
aligned_distance = normed_x_distance.mul(normed_y_distance).mul(
normed_z_distance).mul(nonzero_points_mask)
# Find distance of x, y, and z from pillar center
if self._with_voxel_center:
f_center = features[:, :, :2]
f_center[:, :, 0] = f_center[:, :, 0] - (
coors[:, 3].type_as(features).unsqueeze(1) * self.vx +
self.x_offset)
f_center[:, :, 1] = f_center[:, :, 1] - (
coors[:, 2].type_as(features).unsqueeze(1) * self.vy +
self.y_offset)
features_ls.append(f_center)
if self._with_distance:
points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
features_ls.append(points_dist)
# Combine together feature decorations
features = torch.cat(features_ls, dim=-1)
# The feature decorations were calculated without regard to
# whether pillar was empty. Need to ensure that
# empty pillars remain set to zeros.
voxel_count = features.shape[1]
mask = get_paddings_indicator(num_points, voxel_count, axis=0)
mask = torch.unsqueeze(mask, -1).type_as(features)
features *= mask
for pfn in self.pfn_layers:
if pfn.last_vfe:
features = pfn(features, aligned_distance)
else:
features = pfn(features)
return features.squeeze()
import torch
from torch import nn
from torch.nn import functional as F
from ..utils import build_norm_layer
class Empty(nn.Module):
def __init__(self, *args, **kwargs):
super(Empty, self).__init__()
def forward(self, *args, **kwargs):
if len(args) == 1:
return args[0]
elif len(args) == 0:
return None
return args
def get_paddings_indicator(actual_num, max_num, axis=0):
"""Create boolean mask by actually number of a padded tensor.
Args:
actual_num ([type]): [description]
max_num ([type]): [description]
Returns:
[type]: [description]
"""
actual_num = torch.unsqueeze(actual_num, axis + 1)
# tiled_actual_num: [N, M, 1]
max_num_shape = [1] * len(actual_num.shape)
max_num_shape[axis + 1] = -1
max_num = torch.arange(
max_num, dtype=torch.int, device=actual_num.device).view(max_num_shape)
# tiled_actual_num: [[3,3,3,3,3], [4,4,4,4,4], [2,2,2,2,2]]
# tiled_max_num: [[0,1,2,3,4], [0,1,2,3,4], [0,1,2,3,4]]
paddings_indicator = actual_num.int() > max_num
# paddings_indicator shape: [batch_size, max_num]
return paddings_indicator
class VFELayer(nn.Module):
def __init__(self,
in_channels,
out_channels,
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
max_out=True,
cat_max=True):
super(VFELayer, self).__init__()
self.cat_max = cat_max
self.max_out = max_out
# self.units = int(out_channels / 2)
if norm_cfg:
norm_name, norm_layer = build_norm_layer(norm_cfg, out_channels)
self.norm = norm_layer
self.linear = nn.Linear(in_channels, out_channels, bias=False)
else:
self.norm = Empty(out_channels)
self.linear = nn.Linear(in_channels, out_channels, bias=True)
def forward(self, inputs):
# [K, T, 7] tensordot [7, units] = [K, T, units]
voxel_count = inputs.shape[1]
x = self.linear(inputs)
x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2,
1).contiguous()
pointwise = F.relu(x)
# [K, T, units]
if self.max_out:
aggregated = torch.max(pointwise, dim=1, keepdim=True)[0]
else:
# this is for fusion layer
return pointwise
if not self.cat_max:
return aggregated.squeeze(1)
else:
# [K, 1, units]
repeated = aggregated.repeat(1, voxel_count, 1)
concatenated = torch.cat([pointwise, repeated], dim=2)
# [K, T, 2 * units]
return concatenated
class PFNLayer(nn.Module):
def __init__(self,
in_channels,
out_channels,
use_norm=True,
last_layer=False,
mode='max'):
""" Pillar Feature Net Layer.
The Pillar Feature Net is composed of a series of these layers, but the
PointPillars paper results only used a single PFNLayer.
Args:
in_channels (int): Number of input channels.
out_channels (int): Number of output channels.
use_norm (bool): Whether to include BatchNorm.
last_layer (bool): If last_layer, there is no concatenation of
features.
"""
super().__init__()
self.name = 'PFNLayer'
self.last_vfe = last_layer
if not self.last_vfe:
out_channels = out_channels // 2
self.units = out_channels
if use_norm:
self.norm = nn.BatchNorm1d(self.units, eps=1e-3, momentum=0.01)
self.linear = nn.Linear(in_channels, self.units, bias=False)
else:
self.norm = Empty(self.unints)
self.linear = nn.Linear(in_channels, self.units, bias=True)
self.mode = mode
def forward(self, inputs, num_voxels=None, aligned_distance=None):
x = self.linear(inputs)
x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2,
1).contiguous()
x = F.relu(x)
if self.mode == 'max':
if aligned_distance is not None:
x = x.mul(aligned_distance.unsqueeze(-1))
x_max = torch.max(x, dim=1, keepdim=True)[0]
elif self.mode == 'avg':
if aligned_distance is not None:
x = x.mul(aligned_distance.unsqueeze(-1))
x_max = x.sum(
dim=1, keepdim=True) / num_voxels.type_as(inputs).view(
-1, 1, 1)
if self.last_vfe:
return x_max
else:
x_repeat = x_max.repeat(1, inputs.shape[1], 1)
x_concatenated = torch.cat([x, x_repeat], dim=2)
return x_concatenated
import torch
from torch import nn
from torch.nn import functional as F
from mmdet3d.ops import DynamicScatter
from .. import builder
from ..registry import VOXEL_ENCODERS
from ..utils import build_norm_layer
from .utils import Empty, VFELayer, get_paddings_indicator
@VOXEL_ENCODERS.register_module
class VoxelFeatureExtractor(nn.Module):
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=[32, 128],
with_distance=False,
name='VoxelFeatureExtractor'):
super(VoxelFeatureExtractor, self).__init__()
self.name = name
assert len(num_filters) == 2
num_input_features += 3 # add mean features
if with_distance:
num_input_features += 1
self._with_distance = with_distance
self.vfe1 = VFELayer(num_input_features, num_filters[0], use_norm)
self.vfe2 = VFELayer(num_filters[0], num_filters[1], use_norm)
if use_norm:
self.linear = nn.Linear(num_filters[1], num_filters[1], bias=False)
self.norm = nn.BatchNorm1d(num_filters[1], eps=1e-3, momentum=0.01)
else:
self.linear = nn.Linear(num_filters[1], num_filters[1], bias=True)
self.norm = Empty(num_filters[1])
def forward(self, features, num_voxels, **kwargs):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_voxels: [concated_num_points]
# t = time.time()
# torch.cuda.synchronize()
points_mean = features[:, :, :3].sum(
dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1)
features_relative = features[:, :, :3] - points_mean
if self._with_distance:
points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
features = torch.cat([features, features_relative, points_dist],
dim=-1)
else:
features = torch.cat([features, features_relative], dim=-1)
voxel_count = features.shape[1]
mask = get_paddings_indicator(num_voxels, voxel_count, axis=0)
mask = torch.unsqueeze(mask, -1).type_as(features)
# mask = features.max(dim=2, keepdim=True)[0] != 0
# torch.cuda.synchronize()
# print("vfe prep forward time", time.time() - t)
x = self.vfe1(features)
x *= mask
x = self.vfe2(x)
x *= mask
x = self.linear(x)
x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2,
1).contiguous()
x = F.relu(x)
x *= mask
# x: [concated_num_points, num_voxel_size, 128]
voxelwise = torch.max(x, dim=1)[0]
return voxelwise
@VOXEL_ENCODERS.register_module
class VoxelFeatureExtractorV2(nn.Module):
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=[32, 128],
with_distance=False,
name='VoxelFeatureExtractor'):
super(VoxelFeatureExtractorV2, self).__init__()
self.name = name
assert len(num_filters) > 0
num_input_features += 3
if with_distance:
num_input_features += 1
self._with_distance = with_distance
num_filters = [num_input_features] + num_filters
filters_pairs = [[num_filters[i], num_filters[i + 1]]
for i in range(len(num_filters) - 1)]
self.vfe_layers = nn.ModuleList(
[VFELayer(i, o, use_norm) for i, o in filters_pairs])
if use_norm:
self.linear = nn.Linear(
num_filters[-1], num_filters[-1], bias=False)
self.norm = nn.BatchNorm1d(
num_filters[-1], eps=1e-3, momentum=0.01)
else:
self.linear = nn.Linear(
num_filters[-1], num_filters[-1], bias=True)
self.norm = Empty(num_filters[-1])
def forward(self, features, num_voxels, **kwargs):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_voxels: [concated_num_points]
points_mean = features[:, :, :3].sum(
dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1)
features_relative = features[:, :, :3] - points_mean
if self._with_distance:
points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
features = torch.cat([features, features_relative, points_dist],
dim=-1)
else:
features = torch.cat([features, features_relative], dim=-1)
voxel_count = features.shape[1]
mask = get_paddings_indicator(num_voxels, voxel_count, axis=0)
mask = torch.unsqueeze(mask, -1).type_as(features)
for vfe in self.vfe_layers:
features = vfe(features)
features *= mask
features = self.linear(features)
features = self.norm(features.permute(0, 2, 1).contiguous()).permute(
0, 2, 1).contiguous()
features = F.relu(features)
features *= mask
# x: [concated_num_points, num_voxel_size, 128]
voxelwise = torch.max(features, dim=1)[0]
return voxelwise
@VOXEL_ENCODERS.register_module
class VoxelFeatureExtractorV3(nn.Module):
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=[32, 128],
with_distance=False,
name='VoxelFeatureExtractor'):
super(VoxelFeatureExtractorV3, self).__init__()
self.name = name
def forward(self, features, num_points, coors):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_points: [concated_num_points]
points_mean = features[:, :, :4].sum(
dim=1, keepdim=False) / num_points.type_as(features).view(-1, 1)
return points_mean.contiguous()
@VOXEL_ENCODERS.register_module
class DynamicVFEV3(nn.Module):
def __init__(self,
num_input_features=4,
voxel_size=(0.2, 0.2, 4),
point_cloud_range=(0, -40, -3, 70.4, 40, 1)):
super(DynamicVFEV3, self).__init__()
self.scatter = DynamicScatter(voxel_size, point_cloud_range, True)
@torch.no_grad()
def forward(self, features, coors):
# This function is used from the start of the voxelnet
# num_points: [concated_num_points]
features, features_coors = self.scatter(features, coors)
return features, features_coors
@VOXEL_ENCODERS.register_module
class DynamicVFE(nn.Module):
def __init__(self,
num_input_features=4,
num_filters=[],
with_distance=False,
with_cluster_center=False,
with_voxel_center=False,
voxel_size=(0.2, 0.2, 4),
point_cloud_range=(0, -40, -3, 70.4, 40, 1),
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
mode='max',
fusion_layer=None,
return_point_feats=False):
super(DynamicVFE, self).__init__()
assert len(num_filters) > 0
if with_cluster_center:
num_input_features += 3
if with_voxel_center:
num_input_features += 3
if with_distance:
num_input_features += 3
self.num_input_features = num_input_features
self._with_distance = with_distance
self._with_cluster_center = with_cluster_center
self._with_voxel_center = with_voxel_center
self.return_point_feats = return_point_feats
# Need pillar (voxel) size and x/y offset in order to calculate offset
self.vx = voxel_size[0]
self.vy = voxel_size[1]
self.vz = voxel_size[2]
self.x_offset = self.vx / 2 + point_cloud_range[0]
self.y_offset = self.vy / 2 + point_cloud_range[1]
self.z_offset = self.vz / 2 + point_cloud_range[2]
self.point_cloud_range = point_cloud_range
self.scatter = DynamicScatter(voxel_size, point_cloud_range, True)
num_filters = [self.num_input_features] + list(num_filters)
vfe_layers = []
for i in range(len(num_filters) - 1):
in_filters = num_filters[i]
out_filters = num_filters[i + 1]
if i > 0:
in_filters *= 2
norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)
vfe_layers.append(
nn.Sequential(
nn.Linear(in_filters, out_filters, bias=False), norm_layer,
nn.ReLU(inplace=True)))
self.vfe_layers = nn.ModuleList(vfe_layers)
self.num_vfe = len(vfe_layers)
self.vfe_scatter = DynamicScatter(voxel_size, point_cloud_range,
(mode != 'max'))
self.cluster_scatter = DynamicScatter(
voxel_size, point_cloud_range, average_points=True)
self.fusion_layer = None
if fusion_layer is not None:
self.fusion_layer = builder.build_fusion_layer(fusion_layer)
def map_voxel_center_to_point(self, pts_coors, voxel_mean, voxel_coors):
# Step 1: scatter voxel into canvas
# Calculate necessary things for canvas creation
canvas_z = int(
(self.point_cloud_range[5] - self.point_cloud_range[2]) / self.vz)
canvas_y = int(
(self.point_cloud_range[4] - self.point_cloud_range[1]) / self.vy)
canvas_x = int(
(self.point_cloud_range[3] - self.point_cloud_range[0]) / self.vx)
# canvas_channel = voxel_mean.size(1)
batch_size = pts_coors[-1, 0] + 1
canvas_len = canvas_z * canvas_y * canvas_x * batch_size
# Create the canvas for this sample
canvas = voxel_mean.new_zeros(canvas_len, dtype=torch.long)
# Only include non-empty pillars
indices = (
voxel_coors[:, 0] * canvas_z * canvas_y * canvas_x +
voxel_coors[:, 1] * canvas_y * canvas_x +
voxel_coors[:, 2] * canvas_x + voxel_coors[:, 3])
# Scatter the blob back to the canvas
canvas[indices.long()] = torch.arange(
start=0, end=voxel_mean.size(0), device=voxel_mean.device)
# Step 2: get voxel mean for each point
voxel_index = (
pts_coors[:, 0] * canvas_z * canvas_y * canvas_x +
pts_coors[:, 1] * canvas_y * canvas_x +
pts_coors[:, 2] * canvas_x + pts_coors[:, 3])
voxel_inds = canvas[voxel_index.long()]
center_per_point = voxel_mean[voxel_inds, ...]
return center_per_point
def forward(self,
features,
coors,
points=None,
img_feats=None,
img_meta=None):
"""
features (torch.Tensor): NxC
coors (torch.Tensor): Nx(1+NDim)
"""
features_ls = [features]
# Find distance of x, y, and z from cluster center
if self._with_cluster_center:
voxel_mean, mean_coors = self.cluster_scatter(features, coors)
points_mean = self.map_voxel_center_to_point(
coors, voxel_mean, mean_coors)
# TODO: maybe also do cluster for reflectivity
f_cluster = features[:, :3] - points_mean[:, :3]
features_ls.append(f_cluster)
# Find distance of x, y, and z from pillar center
if self._with_voxel_center:
f_center = features.new_zeros(size=(features.size(0), 3))
f_center[:, 0] = features[:, 0] - (
coors[:, 3].type_as(features) * self.vx + self.x_offset)
f_center[:, 1] = features[:, 1] - (
coors[:, 2].type_as(features) * self.vy + self.y_offset)
f_center[:, 2] = features[:, 2] - (
coors[:, 1].type_as(features) * self.vz + self.z_offset)
features_ls.append(f_center)
if self._with_distance:
points_dist = torch.norm(features[:, :3], 2, 1, keepdim=True)
features_ls.append(points_dist)
# Combine together feature decorations
features = torch.cat(features_ls, dim=-1)
for i, vfe in enumerate(self.vfe_layers):
point_feats = vfe(features)
if (i == len(self.vfe_layers) - 1 and self.fusion_layer is not None
and img_feats is not None):
point_feats = self.fusion_layer(img_feats, points, point_feats,
img_meta)
voxel_feats, voxel_coors = self.vfe_scatter(point_feats, coors)
if i != len(self.vfe_layers) - 1:
# need to concat voxel feats if it is not the last vfe
feat_per_point = self.map_voxel_center_to_point(
coors, voxel_feats, voxel_coors)
features = torch.cat([point_feats, feat_per_point], dim=1)
if self.return_point_feats:
return point_feats
return voxel_feats, voxel_coors
@VOXEL_ENCODERS.register_module
class HardVFE(nn.Module):
def __init__(self,
num_input_features=4,
num_filters=[],
with_distance=False,
with_cluster_center=False,
with_voxel_center=False,
voxel_size=(0.2, 0.2, 4),
point_cloud_range=(0, -40, -3, 70.4, 40, 1),
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
mode='max',
fusion_layer=None,
return_point_feats=False):
super(HardVFE, self).__init__()
assert len(num_filters) > 0
if with_cluster_center:
num_input_features += 3
if with_voxel_center:
num_input_features += 3
if with_distance:
num_input_features += 3
self.num_input_features = num_input_features
self._with_distance = with_distance
self._with_cluster_center = with_cluster_center
self._with_voxel_center = with_voxel_center
self.return_point_feats = return_point_feats
# Need pillar (voxel) size and x/y offset to calculate pillar offset
self.vx = voxel_size[0]
self.vy = voxel_size[1]
self.vz = voxel_size[2]
self.x_offset = self.vx / 2 + point_cloud_range[0]
self.y_offset = self.vy / 2 + point_cloud_range[1]
self.z_offset = self.vz / 2 + point_cloud_range[2]
self.point_cloud_range = point_cloud_range
self.scatter = DynamicScatter(voxel_size, point_cloud_range, True)
num_filters = [self.num_input_features] + list(num_filters)
vfe_layers = []
for i in range(len(num_filters) - 1):
in_filters = num_filters[i]
out_filters = num_filters[i + 1]
if i > 0:
in_filters *= 2
# TODO: pass norm_cfg to VFE
# norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)
if i == (len(num_filters) - 2):
cat_max = False
max_out = True
if fusion_layer:
max_out = False
else:
max_out = True
cat_max = True
vfe_layers.append(
VFELayer(
in_filters,
out_filters,
norm_cfg=norm_cfg,
max_out=max_out,
cat_max=cat_max))
self.vfe_layers = nn.ModuleList(vfe_layers)
self.num_vfe = len(vfe_layers)
self.fusion_layer = None
if fusion_layer is not None:
self.fusion_layer = builder.build_fusion_layer(fusion_layer)
def forward(self,
features,
num_points,
coors,
img_feats=None,
img_meta=None):
"""
features (torch.Tensor): NxMxC
coors (torch.Tensor): Nx(1+NDim)
"""
features_ls = [features]
# Find distance of x, y, and z from cluster center
if self._with_cluster_center:
points_mean = (
features[:, :, :3].sum(dim=1, keepdim=True) /
num_points.type_as(features).view(-1, 1, 1))
# TODO: maybe also do cluster for reflectivity
f_cluster = features[:, :, :3] - points_mean
features_ls.append(f_cluster)
# Find distance of x, y, and z from pillar center
if self._with_voxel_center:
f_center = features.new_zeros(
size=(features.size(0), features.size(1), 3))
f_center[:, :, 0] = features[:, :, 0] - (
coors[:, 3].type_as(features).unsqueeze(1) * self.vx +
self.x_offset)
f_center[:, :, 1] = features[:, :, 1] - (
coors[:, 2].type_as(features).unsqueeze(1) * self.vy +
self.y_offset)
f_center[:, :, 2] = features[:, :, 2] - (
coors[:, 1].type_as(features).unsqueeze(1) * self.vz +
self.z_offset)
features_ls.append(f_center)
if self._with_distance:
points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
features_ls.append(points_dist)
# Combine together feature decorations
voxel_feats = torch.cat(features_ls, dim=-1)
# The feature decorations were calculated without regard to whether
# pillar was empty.
# Need to ensure that empty voxels remain set to zeros.
voxel_count = voxel_feats.shape[1]
mask = get_paddings_indicator(num_points, voxel_count, axis=0)
voxel_feats *= mask.unsqueeze(-1).type_as(voxel_feats)
for i, vfe in enumerate(self.vfe_layers):
voxel_feats = vfe(voxel_feats)
if torch.isnan(voxel_feats).any():
import pdb
pdb.set_trace()
if (self.fusion_layer is not None and img_feats is not None):
voxel_feats = self.fusion_with_mask(features, mask, voxel_feats,
coors, img_feats, img_meta)
if torch.isnan(voxel_feats).any():
import pdb
pdb.set_trace()
return voxel_feats
def fusion_with_mask(self, features, mask, voxel_feats, coors, img_feats,
img_meta):
# the features is consist of a batch of points
batch_size = coors[-1, 0] + 1
points = []
for i in range(batch_size):
single_mask = (coors[:, 0] == i)
points.append(features[single_mask][mask[single_mask]])
point_feats = voxel_feats[mask]
if torch.isnan(point_feats).any():
import pdb
pdb.set_trace()
point_feats = self.fusion_layer(img_feats, points, point_feats,
img_meta)
if torch.isnan(point_feats).any():
import pdb
pdb.set_trace()
voxel_canvas = voxel_feats.new_zeros(
size=(voxel_feats.size(0), voxel_feats.size(1),
point_feats.size(-1)))
voxel_canvas[mask] = point_feats
out = torch.max(voxel_canvas, dim=1)[0]
if torch.isnan(out).any():
import pdb
pdb.set_trace()
return out
from mmdet.ops import (RoIAlign, SigmoidFocalLoss, build_norm_layer,
get_compiler_version, get_compiling_cuda_version, nms,
roi_align, sigmoid_focal_loss)
from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization
__all__ = [
'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'get_compiler_version',
'get_compiling_cuda_version', 'build_conv_layer', 'build_norm_layer',
'batched_nms', 'Voxelization', 'voxelization', 'dynamic_scatter',
'DynamicScatter', 'sigmoid_focal_loss', 'SigmoidFocalLoss'
]
from .iou3d_utils import (boxes_iou3d_gpu, boxes_iou_bev, nms_gpu,
nms_normal_gpu)
__all__ = ['boxes_iou_bev', 'boxes_iou3d_gpu', 'nms_gpu', 'nms_normal_gpu']
import torch
from . import iou3d_cuda
def boxes_iou_bev(boxes_a, boxes_b):
"""
:param boxes_a: (M, 5)
:param boxes_b: (N, 5)
:return:
ans_iou: (M, N)
"""
ans_iou = torch.cuda.FloatTensor(
torch.Size((boxes_a.shape[0], boxes_b.shape[0]))).zero_()
iou3d_cuda.boxes_iou_bev_gpu(boxes_a.contiguous(), boxes_b.contiguous(),
ans_iou)
return ans_iou
def boxes_iou3d_gpu(boxes_a, boxes_b, mode='iou'):
"""
:param boxes_a: (N, 7) [x, y, z, h, w, l, ry]
:param boxes_b: (M, 7) [x, y, z, h, w, l, ry]
:param mode "iou" (intersection over union) or iof (intersection over
foreground).
:return:
ans_iou: (M, N)
"""
boxes_a_bev = boxes3d_to_bev_torch(boxes_a)
boxes_b_bev = boxes3d_to_bev_torch(boxes_b)
# bev overlap
overlaps_bev = torch.cuda.FloatTensor(
torch.Size((boxes_a.shape[0], boxes_b.shape[0]))).zero_() # (N, M)
iou3d_cuda.boxes_overlap_bev_gpu(boxes_a_bev.contiguous(),
boxes_b_bev.contiguous(), overlaps_bev)
# height overlap
boxes_a_height_min = (boxes_a[:, 1] - boxes_a[:, 3]).view(-1, 1)
boxes_a_height_max = boxes_a[:, 1].view(-1, 1)
boxes_b_height_min = (boxes_b[:, 1] - boxes_b[:, 3]).view(1, -1)
boxes_b_height_max = boxes_b[:, 1].view(1, -1)
max_of_min = torch.max(boxes_a_height_min, boxes_b_height_min)
min_of_max = torch.min(boxes_a_height_max, boxes_b_height_max)
overlaps_h = torch.clamp(min_of_max - max_of_min, min=0)
# 3d iou
overlaps_3d = overlaps_bev * overlaps_h
vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
if mode == 'iou':
# the clamp func is used to avoid division of 0
iou3d = overlaps_3d / torch.clamp(
vol_a + vol_b - overlaps_3d, min=1e-8)
else:
iou3d = overlaps_3d / torch.clamp(vol_a, min=1e-8)
return iou3d
def nms_gpu(boxes, scores, thresh):
"""
:param boxes: (N, 5) [x1, y1, x2, y2, ry]
:param scores: (N)
:param thresh:
:return:
"""
# areas = (x2 - x1) * (y2 - y1)
order = scores.sort(0, descending=True)[1]
boxes = boxes[order].contiguous()
keep = torch.LongTensor(boxes.size(0))
num_out = iou3d_cuda.nms_gpu(boxes, keep, thresh)
return order[keep[:num_out].cuda()].contiguous()
def nms_normal_gpu(boxes, scores, thresh):
"""
:param boxes: (N, 5) [x1, y1, x2, y2, ry]
:param scores: (N)
:param thresh:
:return:
"""
# areas = (x2 - x1) * (y2 - y1)
order = scores.sort(0, descending=True)[1]
boxes = boxes[order].contiguous()
keep = torch.LongTensor(boxes.size(0))
num_out = iou3d_cuda.nms_normal_gpu(boxes, keep, thresh)
return order[keep[:num_out].cuda()].contiguous()
def boxes3d_to_bev_torch(boxes3d):
"""
:param boxes3d: (N, 7) [x, y, z, h, w, l, ry] in camera coords
:return:
boxes_bev: (N, 5) [x1, y1, x2, y2, ry]
"""
boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
cu, cv = boxes3d[:, 0], boxes3d[:, 2]
half_l, half_w = boxes3d[:, 5] / 2, boxes3d[:, 4] / 2
boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w
boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w
boxes_bev[:, 4] = boxes3d[:, 6]
return boxes_bev
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
setup(
name='iou3d',
ext_modules=[
CUDAExtension(
'iou3d_cuda', [
'src/iou3d.cpp',
'src/iou3d_kernel.cu',
],
extra_compile_args={
'cxx': ['-g', '-I /usr/local/cuda/include'],
'nvcc': ['-O2']
})
],
cmdclass={'build_ext': BuildExtension})
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment