Commit f717eb62 authored by wuyuefeng's avatar wuyuefeng Committed by zhangwenwei
Browse files

Votenet

parent ac3590a1
# model settings
model = dict(
type='VoteNet',
backbone=dict(
type='PointNet2SASSG',
in_channels=4,
num_points=(2048, 1024, 512, 256),
radius=(0.2, 0.4, 0.8, 1.2),
num_samples=(64, 32, 16, 16),
sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
(128, 128, 256)),
fp_channels=((256, 256), (256, 256)),
norm_cfg=dict(type='BN2d'),
pool_mod='max'),
bbox_head=dict(
type='VoteHead',
num_classes=18,
bbox_coder=dict(
type='PartialBinBasedBBoxCoder',
num_sizes=18,
num_dir_bins=1,
with_rot=False,
mean_sizes=[[0.76966727, 0.8116021, 0.92573744],
[1.876858, 1.8425595, 1.1931566],
[0.61328, 0.6148609, 0.7182701],
[1.3955007, 1.5121545, 0.83443564],
[0.97949594, 1.0675149, 0.6329687],
[0.531663, 0.5955577, 1.7500148],
[0.9624706, 0.72462326, 1.1481868],
[0.83221924, 1.0490936, 1.6875663],
[0.21132214, 0.4206159, 0.5372846],
[1.4440073, 1.8970833, 0.26985747],
[1.0294262, 1.4040797, 0.87554324],
[1.3766412, 0.65521795, 1.6813129],
[0.6650819, 0.71111923, 1.298853],
[0.41999173, 0.37906948, 1.7513971],
[0.59359556, 0.5912492, 0.73919016],
[0.50867593, 0.50656086, 0.30136237],
[1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]),
vote_moudule_cfg=dict(
in_channels=256,
vote_per_seed=1,
gt_per_seed=3,
conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
norm_feats=True,
vote_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='none',
loss_dst_weight=10.0)),
vote_aggregation_cfg=dict(
num_point=256,
radius=0.3,
num_sample=16,
mlp_channels=[256, 128, 128, 128],
use_xyz=True,
normalize_xyz=True),
feat_channels=(128, 128),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.2, 0.8],
reduction='sum',
loss_weight=5.0),
center_loss=dict(
type='ChamferDistance',
mode='l2',
reduction='sum',
loss_src_weight=10.0,
loss_dst_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)))
# model training and testing settings
train_cfg = dict(pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote')
test_cfg = dict(
sample_mod='seed', nms_thr=0.25, score_thr=0.05, per_class_proposal=True)
# dataset settings
dataset_type = 'ScanNetDataset'
data_root = './data/scannet/'
class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin')
train_pipeline = [
dict(
type='LoadPointsFromFile',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
with_mask_3d=True,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,
36, 39)),
dict(type='IndoorPointSample', num_points=40000),
dict(type='IndoorFlipData', flip_ratio_yz=0.5, flip_ratio_xz=0.5),
dict(
type='IndoorGlobalRotScale',
shift_height=True,
rot_range=[-1 / 36, 1 / 36],
scale_range=None),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=[
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask'
])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='IndoorPointSample', num_points=40000),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_train.pkl',
pipeline=train_pipeline,
filter_empty_gt=False,
classes=class_names)),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True))
# optimizer
lr = 0.008 # max learning rate
optimizer = dict(type='Adam', lr=lr)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(policy='step', warmup=None, step=[24, 32])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=10,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 36
dist_params = dict(backend='nccl')
log_level = 'INFO'
find_unused_parameters = True
work_dir = './work_dirs/votenet_scannet'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings
model = dict(
type='VoteNet',
backbone=dict(
type='PointNet2SASSG',
in_channels=4,
num_points=(2048, 1024, 512, 256),
radius=(0.2, 0.4, 0.8, 1.2),
num_samples=(64, 32, 16, 16),
sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
(128, 128, 256)),
fp_channels=((256, 256), (256, 256)),
norm_cfg=dict(type='BN2d'),
pool_mod='max'),
bbox_head=dict(
type='VoteHead',
num_classes=10,
bbox_coder=dict(
type='PartialBinBasedBBoxCoder',
num_sizes=10,
num_dir_bins=12,
with_rot=True,
mean_sizes=[[2.114256, 1.620300, 0.927272],
[0.791118, 1.279516, 0.718182],
[0.923508, 1.867419, 0.845495],
[0.591958, 0.552978, 0.827272],
[0.699104, 0.454178, 0.75625],
[0.69519, 1.346299, 0.736364],
[0.528526, 1.002642, 1.172878],
[0.500618, 0.632163, 0.683424],
[0.404671, 1.071108, 1.688889],
[0.76584, 1.398258, 0.472728]]),
vote_moudule_cfg=dict(
in_channels=256,
vote_per_seed=1,
gt_per_seed=3,
conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
norm_feats=True,
vote_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='none',
loss_dst_weight=10.0)),
vote_aggregation_cfg=dict(
num_point=256,
radius=0.3,
num_sample=16,
mlp_channels=[256, 128, 128, 128],
use_xyz=True,
normalize_xyz=True),
feat_channels=(128, 128),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.2, 0.8],
reduction='sum',
loss_weight=5.0),
center_loss=dict(
type='ChamferDistance',
mode='l2',
reduction='sum',
loss_src_weight=10.0,
loss_dst_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)))
# model training and testing settings
train_cfg = dict(pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote')
test_cfg = dict(
sample_mod='seed', nms_thr=0.25, score_thr=0.05, per_class_proposal=True)
# dataset settings
dataset_type = 'SUNRGBDDataset'
data_root = 'data/sunrgbd/'
class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
'night_stand', 'bookshelf', 'bathtub')
train_pipeline = [
dict(
type='LoadPointsFromFile',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='LoadAnnotations3D'),
dict(type='IndoorFlipData', flip_ratio_yz=0.5),
dict(
type='IndoorGlobalRotScale',
shift_height=True,
rot_range=[-1 / 6, 1 / 6],
scale_range=[0.85, 1.15]),
dict(type='IndoorPointSample', num_points=20000),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='IndoorPointSample', num_points=20000),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=16,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'sunrgbd_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
filter_empty_gt=False)),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'sunrgbd_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'sunrgbd_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True))
# optimizer
lr = 0.008 # max learning rate
optimizer = dict(type='Adam', lr=lr)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(policy='step', warmup=None, step=[24, 32])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=30,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 36
dist_params = dict(backend='nccl')
log_level = 'INFO'
find_unused_parameters = True
work_dir = './work_dirs/votenet_sunrgbd'
load_from = None
resume_from = None
workflow = [('train', 1)]
......@@ -8,7 +8,9 @@ from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler, IoUBalancedNegSampler,
PseudoSampler, RandomSampler, SamplingResult)
from .structures import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes
from .transforms import bbox3d2result, bbox3d2roi, boxes3d_to_bev_torch_lidar
from .transforms import (bbox3d2result, bbox3d2roi,
box3d_to_corner3d_upright_depth,
boxes3d_to_bev_torch_lidar)
from .assign_sampling import ( # isort:skip, avoid recursive imports
build_bbox_coder, # temporally settings
......@@ -22,5 +24,6 @@ __all__ = [
'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar',
'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes',
'CameraInstance3DBoxes', 'bbox3d2roi', 'bbox3d2result'
'CameraInstance3DBoxes', 'bbox3d2roi', 'bbox3d2result',
'box3d_to_corner3d_upright_depth'
]
from mmdet.core.bbox import build_bbox_coder
from .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder
from .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder
__all__ = ['build_bbox_coder', 'DeltaXYZWLHRBBoxCoder']
__all__ = [
'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'PartialBinBasedBBoxCoder'
]
import numpy as np
import torch
from mmdet.core.bbox import BaseBBoxCoder
from mmdet.core.bbox.builder import BBOX_CODERS
@BBOX_CODERS.register_module()
class PartialBinBasedBBoxCoder(BaseBBoxCoder):
"""Partial bin based bbox coder
Args:
num_dir_bins (int): Number of bins to encode direction angle.
num_sizes (int): Number of size clusters.
mean_sizes (list[list[int]]): Mean size of bboxes in each class.
with_rot (bool): Whether the bbox is with rotation.
"""
def __init__(self, num_dir_bins, num_sizes, mean_sizes, with_rot=True):
super(PartialBinBasedBBoxCoder, self).__init__()
assert len(mean_sizes) == num_sizes
self.num_dir_bins = num_dir_bins
self.num_sizes = num_sizes
self.mean_sizes = mean_sizes
self.with_rot = with_rot
def encode(self, gt_bboxes_3d, gt_labels_3d):
"""Encode ground truth to prediction targets.
Args:
gt_bboxes_3d (Tensor): 3d gt bboxes with shape (n, 7).
gt_labels_3d (Tensor): Gt classes.
Returns:
tuple: Targets of center, size and direction.
"""
# generate center target
center_target = gt_bboxes_3d[..., 0:3]
# generate bbox size target
size_class_target = gt_labels_3d
size_res_target = gt_bboxes_3d[..., 3:6] - gt_bboxes_3d.new_tensor(
self.mean_sizes)[size_class_target]
# generate dir target
box_num = gt_bboxes_3d.shape[0]
if self.with_rot:
(dir_class_target,
dir_res_target) = self.angle2class(gt_bboxes_3d[..., 6])
else:
dir_class_target = gt_labels_3d.new_zeros(box_num)
dir_res_target = gt_bboxes_3d.new_zeros(box_num)
return (center_target, size_class_target, size_res_target,
dir_class_target, dir_res_target)
def decode(self, bbox_out):
"""Decode predicted parts to bbox3d.
Args:
bbox_out (dict): predictions from model, should contain keys below
- center: predicted bottom center of bboxes.
- dir_class: predicted bbox direction class.
- dir_res: predicted bbox direction residual.
- size_class: predicted bbox size class.
- size_res: predicted bbox size residual.
Returns:
Tensor: decoded bbox3d with shape (batch, n, 7)
"""
center = bbox_out['center']
batch_size, num_proposal = center.shape[:2]
# decode heading angle
if self.with_rot:
dir_class = torch.argmax(bbox_out['dir_class'], -1)
dir_res = torch.gather(bbox_out['dir_res'], 2,
dir_class.unsqueeze(-1))
dir_res.squeeze_(2)
dir_angle = self.class2angle(dir_class, dir_res).reshape(
batch_size, num_proposal, 1)
else:
dir_angle = center.new_zeros(batch_size, num_proposal, 1)
# decode bbox size
size_class = torch.argmax(bbox_out['size_class'], -1, keepdim=True)
size_res = torch.gather(bbox_out['size_res'], 2,
size_class.unsqueeze(-1).repeat(1, 1, 1, 3))
mean_sizes = center.new_tensor(self.mean_sizes)
size_base = torch.index_select(mean_sizes, 0, size_class.reshape(-1))
bbox_size = size_base.reshape(batch_size, num_proposal,
-1) + size_res.squeeze(2)
bbox3d = torch.cat([center, bbox_size, dir_angle], dim=-1)
return bbox3d
def split_pred(self, preds, base_xyz):
"""Split predicted features to specific parts.
Args:
preds (Tensor): predicted features to split.
base_xyz (Tensor): coordinates of points.
Returns:
dict: split results.
"""
results = {}
start, end = 0, 0
preds_trans = preds.transpose(2, 1)
# decode objectness score
end += 2
results['obj_scores'] = preds_trans[..., start:end]
start = end
# decode center
end += 3
# (batch_size, num_proposal, 3)
results['center'] = base_xyz + preds_trans[..., start:end]
start = end
# decode direction
end += self.num_dir_bins
results['dir_class'] = preds_trans[..., start:end]
start = end
end += self.num_dir_bins
dir_res_norm = preds_trans[..., start:end]
start = end
results['dir_res_norm'] = dir_res_norm
results['dir_res'] = dir_res_norm * (np.pi / self.num_dir_bins)
# decode size
end += self.num_sizes
results['size_class'] = preds_trans[..., start:end]
start = end
end += self.num_sizes * 3
size_res_norm = preds_trans[..., start:end]
batch_size, num_proposal = preds_trans.shape[:2]
size_res_norm = size_res_norm.view(
[batch_size, num_proposal, self.num_sizes, 3])
start = end
results['size_res_norm'] = size_res_norm
mean_sizes = preds.new_tensor(self.mean_sizes)
results['size_res'] = (
size_res_norm * mean_sizes.unsqueeze(0).unsqueeze(0))
# decode semantic score
results['sem_scores'] = preds_trans[..., start:]
return results
def angle2class(self, angle):
"""Convert continuous angle to a discrete class and a residual.
Convert continuous angle to a discrete class and a small
regression number from class center angle to current angle.
Args:
angle (Tensor): Angle is from 0-2pi (or -pi~pi), class center at
0, 1*(2pi/N), 2*(2pi/N) ... (N-1)*(2pi/N)
Returns:
tuple: Encoded discrete class and residual.
"""
angle = angle % (2 * np.pi)
angle_per_class = 2 * np.pi / float(self.num_dir_bins)
shifted_angle = (angle + angle_per_class / 2) % (2 * np.pi)
angle_cls = shifted_angle // angle_per_class
angle_res = shifted_angle - (
angle_cls * angle_per_class + angle_per_class / 2)
return angle_cls.long(), angle_res
def class2angle(self, angle_cls, angle_res, limit_period=True):
"""Inverse function to angle2class
Args:
angle_cls (Tensor): Angle class to decode.
angle_res (Tensor): Angle residual to decode.
limit_period (bool): Whether to limit angle to [-pi, pi].
Returns:
Tensor: angle decoded from angle_cls and angle_res.
"""
angle_per_class = 2 * np.pi / float(self.num_dir_bins)
angle_center = angle_cls.float() * angle_per_class
angle = angle_center + angle_res
if limit_period:
angle[angle > np.pi] -= 2 * np.pi
return angle
......@@ -84,3 +84,87 @@ def bbox3d2result(bboxes, scores, labels):
"""
return dict(
boxes_3d=bboxes.cpu(), scores_3d=scores.cpu(), labels_3d=labels.cpu())
def upright_depth_to_lidar_torch(points=None,
bboxes=None,
to_bottom_center=False):
"""Convert points and boxes in upright depth coordinate to lidar.
Args:
points (None | Tensor): points in upright depth coordinate.
bboxes (None | Tensor): bboxes in upright depth coordinate.
to_bottom_center (bool): covert bboxes to bottom center.
Returns:
tuple: points and bboxes in lidar coordinate.
"""
if points is not None:
points_lidar = points.clone()
points_lidar = points_lidar[..., [1, 0, 2]]
points_lidar[..., 1] *= -1
else:
points_lidar = None
if bboxes is not None:
bboxes_lidar = bboxes.clone()
bboxes_lidar = bboxes_lidar[..., [1, 0, 2, 4, 3, 5, 6]]
bboxes_lidar[..., 1] *= -1
if to_bottom_center:
bboxes_lidar[..., 2] -= 0.5 * bboxes_lidar[..., 5]
else:
bboxes_lidar = None
return points_lidar, bboxes_lidar
def box3d_to_corner3d_upright_depth(boxes3d):
"""Convert box3d to corner3d in upright depth coordinate
Args:
boxes3d (Tensor): boxes with shape [n,7] in upright depth coordinate
Returns:
Tensor: boxes with [n, 8, 3] in upright depth coordinate
"""
boxes_num = boxes3d.shape[0]
ry = boxes3d[:, 6:7]
l, w, h = boxes3d[:, 3:4], boxes3d[:, 4:5], boxes3d[:, 5:6]
zeros = boxes3d.new_zeros((boxes_num, 1))
ones = boxes3d.new_ones((boxes_num, 1))
# zeros = torch.cuda.FloatTensor(boxes_num, 1).fill_(0)
# ones = torch.cuda.FloatTensor(boxes_num, 1).fill_(1)
x_corners = torch.cat(
[-l / 2., l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2., -l / 2.],
dim=1) # (N, 8)
y_corners = torch.cat(
[w / 2., w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2.],
dim=1) # (N, 8)
z_corners = torch.cat(
[h / 2., h / 2., h / 2., h / 2., -h / 2., -h / 2., -h / 2., -h / 2.],
dim=1) # (N, 8)
temp_corners = torch.cat(
(x_corners.unsqueeze(dim=2), y_corners.unsqueeze(dim=2),
z_corners.unsqueeze(dim=2)),
dim=2) # (N, 8, 3)
cosa, sina = torch.cos(-ry), torch.sin(-ry)
raw_1 = torch.cat([cosa, -sina, zeros], dim=1) # (N, 3)
raw_2 = torch.cat([sina, cosa, zeros], dim=1) # (N, 3)
raw_3 = torch.cat([zeros, zeros, ones], dim=1) # (N, 3)
R = torch.cat((raw_1.unsqueeze(dim=1), raw_2.unsqueeze(dim=1),
raw_3.unsqueeze(dim=1)),
dim=1) # (N, 3, 3)
rotated_corners = torch.matmul(temp_corners, R) # (N, 8, 3)
x_corners = rotated_corners[:, :, 0]
y_corners = rotated_corners[:, :, 1]
z_corners = rotated_corners[:, :, 2]
x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2]
x = x_loc.view(-1, 1) + x_corners.view(-1, 8)
y = y_loc.view(-1, 1) + y_corners.view(-1, 8)
z = z_loc.view(-1, 1) + z_corners.view(-1, 8)
corners3d = torch.cat(
(x.view(-1, 8, 1), y.view(-1, 8, 1), z.view(-1, 8, 1)), dim=2)
return corners3d
from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks,
merge_aug_proposals, merge_aug_scores,
multiclass_nms)
from .box3d_nms import box3d_multiclass_nms
from .box3d_nms import aligned_3d_nms, box3d_multiclass_nms
__all__ = [
'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
'merge_aug_scores', 'merge_aug_masks', 'box3d_multiclass_nms'
'merge_aug_scores', 'merge_aug_masks', 'box3d_multiclass_nms',
'aligned_3d_nms'
]
......@@ -64,3 +64,52 @@ def box3d_multiclass_nms(mlvl_bboxes,
labels = mlvl_scores.new_zeros((0, mlvl_scores.size(-1)))
dir_scores = mlvl_scores.new_zeros((0, ))
return bboxes, scores, labels, dir_scores
def aligned_3d_nms(boxes, scores, classes, thresh):
"""3d nms for aligned boxes.
Args:
boxes (Tensor): Aligned box with shape [n, 6].
scores (Tensor): Scores of each box.
classes (Tensor): Class of each box.
thresh (float): Iou threshold for nms.
Returns:
Tensor: Indices of selected boxes.
"""
x1 = boxes[:, 0]
y1 = boxes[:, 1]
z1 = boxes[:, 2]
x2 = boxes[:, 3]
y2 = boxes[:, 4]
z2 = boxes[:, 5]
area = (x2 - x1) * (y2 - y1) * (z2 - z1)
zero = boxes.new_zeros(1, )
score_sorted = torch.argsort(scores)
pick = []
while (score_sorted.shape[0] != 0):
last = score_sorted.shape[0]
i = score_sorted[-1]
pick.append(i)
xx1 = torch.max(x1[i], x1[score_sorted[:last - 1]])
yy1 = torch.max(y1[i], y1[score_sorted[:last - 1]])
zz1 = torch.max(z1[i], z1[score_sorted[:last - 1]])
xx2 = torch.min(x2[i], x2[score_sorted[:last - 1]])
yy2 = torch.min(y2[i], y2[score_sorted[:last - 1]])
zz2 = torch.min(z2[i], z2[score_sorted[:last - 1]])
classes1 = classes[i]
classes2 = classes[score_sorted[:last - 1]]
inter_l = torch.max(zero, xx2 - xx1)
inter_w = torch.max(zero, yy2 - yy1)
inter_h = torch.max(zero, zz2 - zz1)
inter = inter_l * inter_w * inter_h
iou = inter / (area[i] + area[score_sorted[:last - 1]] - inter)
iou = iou * (classes1 == classes2).float()
score_sorted = score_sorted[torch.nonzero(iou <= thresh).flatten()]
indices = boxes.new_tensor(pick, dtype=torch.long)
return indices
......@@ -19,12 +19,14 @@ class Custom3DDataset(Dataset):
pipeline=None,
classes=None,
modality=None,
filter_empty_gt=True,
test_mode=False):
super().__init__()
self.data_root = data_root
self.ann_file = ann_file
self.test_mode = test_mode
self.modality = modality
self.filter_empty_gt = filter_empty_gt
self.CLASSES = self.get_classes(classes)
self.data_infos = self.load_annotations(self.ann_file)
......@@ -52,7 +54,7 @@ class Custom3DDataset(Dataset):
if not self.test_mode:
annos = self.get_ann_info(index)
input_dict['ann_info'] = annos
if len(annos['gt_bboxes_3d']) == 0:
if self.filter_empty_gt and len(annos['gt_bboxes_3d']) == 0:
return None
return input_dict
......@@ -67,7 +69,8 @@ class Custom3DDataset(Dataset):
return None
self.pre_pipeline(input_dict)
example = self.pipeline(input_dict)
if example is None or len(example['gt_bboxes_3d']._data) == 0:
if self.filter_empty_gt and (example is None or len(
example['gt_bboxes_3d']._data) == 0):
return None
return example
......@@ -124,10 +127,13 @@ class Custom3DDataset(Dataset):
results (list[dict]): List of results.
metric (str | list[str]): Metrics to be evaluated.
iou_thr (list[float]): AP IoU thresholds.
"""
from mmdet3d.core.evaluation import indoor_eval
assert isinstance(
results, list), f'Expect results to be list, got {type(results)}.'
assert len(results) > 0, f'Expect length of results > 0.'
assert len(results) == len(self.data_infos)
assert isinstance(
results[0], dict
), f'Expect elements in results to be dict, got {type(results[0])}.'
......
......@@ -7,6 +7,7 @@ from .indoor_loading import (LoadAnnotations3D, LoadPointsFromFile,
NormalizePointsColor)
from .indoor_sample import IndoorPointSample
from .loading import LoadMultiViewImageFromFiles
from .point_seg_class_mapping import PointSegClassMapping
from .train_aug import (GlobalRotScale, ObjectNoise, ObjectRangeFilter,
ObjectSample, PointShuffle, PointsRangeFilter,
RandomFlip3D)
......@@ -18,5 +19,5 @@ __all__ = [
'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler',
'IndoorGlobalRotScale', 'IndoorPointsColorJitter', 'IndoorFlipData',
'MMDataBaseSampler', 'NormalizePointsColor', 'LoadAnnotations3D',
'IndoorPointSample'
'IndoorPointSample', 'PointSegClassMapping'
]
......@@ -224,7 +224,7 @@ class IndoorGlobalRotScale(object):
results['scale_ratio'] = scale_ratio
results['points'] = points
results['gt_bboxes_3d'] = gt_bboxes_3d
results['gt_bboxes_3d'] = gt_bboxes_3d.astype(np.float32)
return results
def __repr__(self):
......
......@@ -189,7 +189,8 @@ class LoadAnnotations3D(LoadAnnotations):
self.file_client = mmcv.FileClient(**self.file_client_args)
try:
mask_bytes = self.file_client.get(pts_semantic_mask_path)
pts_semantic_mask = np.frombuffer(mask_bytes, dtype=np.int)
# add .copy() to fix read-only bug
pts_semantic_mask = np.frombuffer(mask_bytes, dtype=np.int).copy()
except ConnectionError:
mmcv.check_file_exist(pts_semantic_mask_path)
pts_semantic_mask = np.fromfile(
......
from mmdet.datasets.builder import PIPELINES
@PIPELINES.register_module()
class PointSegClassMapping(object):
"""Map original semantic class to valid category ids.
Map valid classes as 0~len(valid_cat_ids)-1 and
others as len(valid_cat_ids).
Args:
valid_cat_ids (tuple[int): A tuple of valid category.
"""
def __init__(self, valid_cat_ids):
self.valid_cat_ids = valid_cat_ids
def __call__(self, results):
assert 'pts_semantic_mask' in results
pts_semantic_mask = results['pts_semantic_mask']
neg_cls = len(self.valid_cat_ids)
for i in range(pts_semantic_mask.shape[0]):
if pts_semantic_mask[i] in self.valid_cat_ids:
converted_id = self.valid_cat_ids.index(pts_semantic_mask[i])
pts_semantic_mask[i] = converted_id
else:
pts_semantic_mask[i] = neg_cls
results['pts_semantic_mask'] = pts_semantic_mask
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(valid_cat_ids={})'.format(self.valid_cat_ids)
return repr_str
......@@ -20,9 +20,10 @@ class ScanNetDataset(Custom3DDataset):
pipeline=None,
classes=None,
modality=None,
filter_empty_gt=True,
test_mode=False):
super().__init__(data_root, ann_file, pipeline, classes, modality,
test_mode)
filter_empty_gt, test_mode)
def get_ann_info(self, index):
# Use index to get the annos, thus the evalhook could also use this api
......
......@@ -16,9 +16,10 @@ class SUNRGBDDataset(Custom3DDataset):
pipeline=None,
classes=None,
modality=None,
filter_empty_gt=True,
test_mode=False):
super().__init__(data_root, ann_file, pipeline, classes, modality,
test_mode)
filter_empty_gt, test_mode)
def get_ann_info(self, index):
# Use index to get the annos, thus the evalhook could also use this api
......
......@@ -8,6 +8,7 @@ from .detectors import * # noqa: F401,F403
from .fusion_layers import * # noqa: F401,F403
from .losses import * # noqa: F401,F403
from .middle_encoders import * # noqa: F401,F403
from .model_utils import * # noqa: F401,F403
from .necks import * # noqa: F401,F403
from .registry import FUSION_LAYERS, MIDDLE_ENCODERS, VOXEL_ENCODERS
from .roi_heads import * # noqa: F401,F403
......
from .anchor3d_head import Anchor3DHead
from .parta2_rpn_head import PartA2RPNHead
from .vote_head import VoteHead
__all__ = ['Anchor3DHead', 'PartA2RPNHead']
__all__ = ['Anchor3DHead', 'PartA2RPNHead', 'VoteHead']
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import ConvModule
from mmdet3d.core import build_bbox_coder, multi_apply
from mmdet3d.core.bbox.box_torch_ops import boxes3d_to_corners3d_lidar_torch
from mmdet3d.core.bbox.transforms import upright_depth_to_lidar_torch
from mmdet3d.core.post_processing import aligned_3d_nms
from mmdet3d.models.builder import build_loss
from mmdet3d.models.losses import chamfer_distance
from mmdet3d.models.model_utils import VoteModule
from mmdet3d.ops import (PointSAModule, furthest_point_sample,
points_in_boxes_batch)
from mmdet.models import HEADS
@HEADS.register_module()
class VoteHead(nn.Module):
"""Bbox head of Votenet.
https://arxiv.org/pdf/1904.09664.pdf
Args:
num_classes (int): The number of class.
bbox_coder (BaseBBoxCoder): Bbox coder for encoding and
decoding boxes.
train_cfg (dict): Config for training.
test_cfg (dict): Config for testing.
vote_moudule_cfg (dict): Config of VoteModule for point-wise votes.
vote_aggregation_cfg (dict): Config of vote aggregation layer.
feat_channels (tuple[int]): Convolution channels of
prediction layer.
conv_cfg (dict): Config of convolution in prediction layer.
norm_cfg (dict): Config of BN in prediction layer.
objectness_loss (dict): Config of objectness loss.
center_loss (dict): Config of center loss.
dir_class_loss (dict): Config of direction classification loss.
dir_res_loss (dict): Config of direction residual regression loss.
size_class_loss (dict): Config of size classification loss.
size_res_loss (dict): Config of size residual regression loss.
semantic_loss (dict): Config of point-wise semantic segmentation loss.
"""
def __init__(self,
num_classes,
bbox_coder,
train_cfg=None,
test_cfg=None,
vote_moudule_cfg=None,
vote_aggregation_cfg=None,
feat_channels=(128, 128),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=None,
center_loss=None,
dir_class_loss=None,
dir_res_loss=None,
size_class_loss=None,
size_res_loss=None,
semantic_loss=None):
super(VoteHead, self).__init__()
self.num_classes = num_classes
self.train_cfg = train_cfg
self.test_cfg = test_cfg
self.gt_per_seed = vote_moudule_cfg['gt_per_seed']
self.num_proposal = vote_aggregation_cfg['num_point']
self.objectness_loss = build_loss(objectness_loss)
self.center_loss = build_loss(center_loss)
self.dir_class_loss = build_loss(dir_class_loss)
self.dir_res_loss = build_loss(dir_res_loss)
self.size_class_loss = build_loss(size_class_loss)
self.size_res_loss = build_loss(size_res_loss)
self.semantic_loss = build_loss(semantic_loss)
assert vote_aggregation_cfg['mlp_channels'][0] == vote_moudule_cfg[
'in_channels']
self.bbox_coder = build_bbox_coder(bbox_coder)
self.num_sizes = self.bbox_coder.num_sizes
self.num_dir_bins = self.bbox_coder.num_dir_bins
self.vote_module = VoteModule(**vote_moudule_cfg)
self.vote_aggregation = PointSAModule(**vote_aggregation_cfg)
prev_channel = vote_aggregation_cfg['mlp_channels'][-1]
conv_pred_list = list()
for k in range(len(feat_channels)):
conv_pred_list.append(
ConvModule(
prev_channel,
feat_channels[k],
1,
padding=0,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
bias=True,
inplace=True))
prev_channel = feat_channels[k]
self.conv_pred = nn.Sequential(*conv_pred_list)
# Objectness scores (2), center residual (3),
# heading class+residual (num_dir_bins*2),
# size class+residual(num_sizes*4)
conv_out_channel = (2 + 3 + self.num_dir_bins * 2 +
self.num_sizes * 4 + num_classes)
self.conv_pred.add_module('conv_out',
nn.Conv1d(prev_channel, conv_out_channel, 1))
def init_weights(self):
pass
def forward(self, feat_dict, sample_mod):
"""Forward pass.
The forward of VoteHead is devided into 4 steps:
1. Generate vote_points from seed_points.
2. Aggregate vote_points.
3. Predict bbox and score.
4. Decode predictions.
Args:
feat_dict (dict): feature dict from backbone.
sample_mod (str): sample mode for vote aggregation layer.
valid modes are "vote", "seed" and "random".
"""
assert sample_mod in ['vote', 'seed', 'random']
seed_points = feat_dict['fp_xyz'][-1]
seed_features = feat_dict['fp_features'][-1]
seed_indices = feat_dict['fp_indices'][-1]
# 1. generate vote_points from seed_points
vote_points, vote_features = self.vote_module(seed_points,
seed_features)
results = dict(
seed_points=seed_points,
seed_indices=seed_indices,
vote_points=vote_points,
vote_features=vote_features)
# 2. aggregate vote_points
if sample_mod == 'vote':
# use fps in vote_aggregation
sample_indices = None
elif sample_mod == 'seed':
# FPS on seed and choose the votes corresponding to the seeds
sample_indices = furthest_point_sample(seed_points,
self.num_proposal)
elif sample_mod == 'random':
# Random sampling from the votes
batch_size, num_seed = seed_points.shape[:2]
sample_indices = seed_points.new_tensor(
torch.randint(0, num_seed, (batch_size, self.num_proposal)),
dtype=torch.int32)
else:
raise NotImplementedError
vote_aggregation_ret = self.vote_aggregation(vote_points,
vote_features,
sample_indices)
aggregated_points, features, aggregated_indices = vote_aggregation_ret
results['aggregated_points'] = aggregated_points
results['aggregated_indices'] = aggregated_indices
# 3. predict bbox and score
predictions = self.conv_pred(features)
# 4. decode predictions
decode_res = self.bbox_coder.split_pred(predictions, aggregated_points)
results.update(decode_res)
return results
def loss(self,
bbox_preds,
points,
gt_bboxes_3d,
gt_labels_3d,
pts_semantic_mask=None,
pts_instance_mask=None,
img_meta=None,
gt_bboxes_ignore=None):
targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d,
pts_semantic_mask, pts_instance_mask,
bbox_preds)
(vote_targets, vote_target_masks, size_class_targets, size_res_targets,
dir_class_targets, dir_res_targets, center_targets, mask_targets,
valid_gt_masks, objectness_targets, objectness_weights,
box_loss_weights, valid_gt_weights) = targets
# calculate vote loss
vote_loss = self.vote_module.get_loss(bbox_preds['seed_points'],
bbox_preds['vote_points'],
bbox_preds['seed_indices'],
vote_target_masks, vote_targets)
# calculate objectness loss
objectness_loss = self.objectness_loss(
bbox_preds['obj_scores'].transpose(2, 1),
objectness_targets,
weight=objectness_weights)
# calculate center loss
source2target_loss, target2source_loss = self.center_loss(
bbox_preds['center'],
center_targets,
src_weight=box_loss_weights,
dst_weight=valid_gt_weights)
center_loss = source2target_loss + target2source_loss
# calculate direction class loss
dir_class_loss = self.dir_class_loss(
bbox_preds['dir_class'].transpose(2, 1),
dir_class_targets,
weight=box_loss_weights)
# calculate direction residual loss
batch_size, proposal_num = size_class_targets.shape[:2]
heading_label_one_hot = vote_targets.new_zeros(
(batch_size, proposal_num, self.num_dir_bins))
heading_label_one_hot.scatter_(2, dir_class_targets.unsqueeze(-1), 1)
dir_res_norm = torch.sum(
bbox_preds['dir_res_norm'] * heading_label_one_hot, -1)
dir_res_loss = self.dir_res_loss(
dir_res_norm, dir_res_targets, weight=box_loss_weights)
# calculate size class loss
size_class_loss = self.size_class_loss(
bbox_preds['size_class'].transpose(2, 1),
size_class_targets,
weight=box_loss_weights)
# calculate size residual loss
one_hot_size_targets = vote_targets.new_zeros(
(batch_size, proposal_num, self.num_sizes))
one_hot_size_targets.scatter_(2, size_class_targets.unsqueeze(-1), 1)
one_hot_size_targets_expand = one_hot_size_targets.unsqueeze(
-1).repeat(1, 1, 1, 3)
size_residual_norm = torch.sum(
bbox_preds['size_res_norm'] * one_hot_size_targets_expand, 2)
box_loss_weights_expand = box_loss_weights.unsqueeze(-1).repeat(
1, 1, 3)
size_res_loss = self.size_res_loss(
size_residual_norm,
size_res_targets,
weight=box_loss_weights_expand)
# calculate semantic loss
semantic_loss = self.semantic_loss(
bbox_preds['sem_scores'].transpose(2, 1),
mask_targets,
weight=box_loss_weights)
losses = dict(
vote_loss=vote_loss,
objectness_loss=objectness_loss,
semantic_loss=semantic_loss,
center_loss=center_loss,
dir_class_loss=dir_class_loss,
dir_res_loss=dir_res_loss,
size_class_loss=size_class_loss,
size_res_loss=size_res_loss)
return losses
def get_targets(self,
points,
gt_bboxes_3d,
gt_labels_3d,
pts_semantic_mask=None,
pts_instance_mask=None,
bbox_preds=None):
"""Get targets of vote head.
Args:
points (list[Tensor]): Points of each batch.
gt_bboxes_3d (list[Tensor]): gt bboxes of each batch.
gt_labels_3d (list[Tensor]): gt class labels of each batch.
pts_semantic_mask (None | list[Tensor]): point-wise semantic
label of each batch.
pts_instance_mask (None | list[Tensor]): point-wise instance
label of each batch.
bbox_preds (Tensor): Bbox predictions of vote head.
Returns:
tuple: Targets of vote head.
"""
# find empty example
valid_gt_masks = list()
gt_num = list()
for index in range(len(gt_labels_3d)):
if len(gt_labels_3d[index]) == 0:
gt_bboxes_3d[index] = gt_bboxes_3d[index].new_zeros(
1, gt_bboxes_3d[index].shape[-1])
gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1)
valid_gt_masks.append(gt_labels_3d[index].new_zeros(1))
gt_num.append(1)
else:
valid_gt_masks.append(gt_labels_3d[index].new_ones(
gt_labels_3d[index].shape))
gt_num.append(gt_labels_3d[index].shape[0])
max_gt_num = max(gt_num)
if pts_semantic_mask is None:
pts_semantic_mask = [None for i in range(len(gt_labels_3d))]
pts_instance_mask = [None for i in range(len(gt_labels_3d))]
aggregated_points = [
bbox_preds['aggregated_points'][i]
for i in range(len(gt_labels_3d))
]
(vote_targets, vote_target_masks, size_class_targets, size_res_targets,
dir_class_targets, dir_res_targets, center_targets, mask_targets,
objectness_targets, objectness_masks) = multi_apply(
self.get_targets_single, points, gt_bboxes_3d, gt_labels_3d,
pts_semantic_mask, pts_instance_mask, aggregated_points)
# pad targets as original code of votenet.
for index in range(len(gt_labels_3d)):
pad_num = max_gt_num - gt_labels_3d[index].shape[0]
center_targets[index] = F.pad(center_targets[index],
(0, 0, 0, pad_num))
valid_gt_masks[index] = F.pad(valid_gt_masks[index], (0, pad_num))
vote_targets = torch.stack(vote_targets)
vote_target_masks = torch.stack(vote_target_masks)
center_targets = torch.stack(center_targets)
valid_gt_masks = torch.stack(valid_gt_masks)
objectness_targets = torch.stack(objectness_targets)
objectness_weights = torch.stack(objectness_masks)
objectness_weights /= (torch.sum(objectness_weights) + 1e-6)
box_loss_weights = objectness_targets.float() / (
torch.sum(objectness_targets).float() + 1e-6)
valid_gt_weights = valid_gt_masks.float() / (
torch.sum(valid_gt_masks.float()) + 1e-6)
dir_class_targets = torch.stack(dir_class_targets)
dir_res_targets = torch.stack(dir_res_targets)
size_class_targets = torch.stack(size_class_targets)
size_res_targets = torch.stack(size_res_targets)
mask_targets = torch.stack(mask_targets)
return (vote_targets, vote_target_masks, size_class_targets,
size_res_targets, dir_class_targets, dir_res_targets,
center_targets, mask_targets, valid_gt_masks,
objectness_targets, objectness_weights, box_loss_weights,
valid_gt_weights)
def get_targets_single(self,
points,
gt_bboxes_3d,
gt_labels_3d,
pts_semantic_mask=None,
pts_instance_mask=None,
aggregated_points=None):
assert self.bbox_coder.with_rot or pts_semantic_mask is not None
# generate votes target
num_points = points.shape[0]
if self.bbox_coder.with_rot:
points_lidar, gt_bboxes_3d_lidar = upright_depth_to_lidar_torch(
points, gt_bboxes_3d, to_bottom_center=True)
vote_targets = points.new_zeros([num_points, 3 * self.gt_per_seed])
vote_target_masks = points.new_zeros([num_points],
dtype=torch.long)
vote_target_idx = points.new_zeros([num_points], dtype=torch.long)
box_indices_all = points_in_boxes_batch(
points_lidar.unsqueeze(0), gt_bboxes_3d_lidar.unsqueeze(0))[0]
for i in range(gt_bboxes_3d.shape[0]):
box_indices = box_indices_all[:, i]
indices = torch.nonzero(box_indices).squeeze(-1)
selected_points = points[indices]
vote_target_masks[indices] = 1
vote_targets_tmp = vote_targets[indices]
votes = gt_bboxes_3d[i][:3].unsqueeze(
0) - selected_points[:, :3]
for j in range(self.gt_per_seed):
column_indices = torch.nonzero(
vote_target_idx[indices] == j).squeeze(-1)
vote_targets_tmp[column_indices,
int(j * 3):int(j * 3 +
3)] = votes[column_indices]
if j == 0:
vote_targets_tmp[column_indices] = votes[
column_indices].repeat(1, self.gt_per_seed)
vote_targets[indices] = vote_targets_tmp
vote_target_idx[indices] = torch.clamp(
vote_target_idx[indices] + 1, max=2)
elif pts_semantic_mask is not None:
vote_targets = points.new_zeros([num_points, 3])
vote_target_masks = points.new_zeros([num_points],
dtype=torch.long)
for i in torch.unique(pts_instance_mask):
indices = torch.nonzero(pts_instance_mask == i).squeeze(-1)
if pts_semantic_mask[indices[0]] < self.num_classes:
selected_points = points[indices, :3]
center = 0.5 * (
selected_points.min(0)[0] + selected_points.max(0)[0])
vote_targets[indices, :] = center - selected_points
vote_target_masks[indices] = 1
vote_targets = vote_targets.repeat((1, self.gt_per_seed))
else:
raise NotImplementedError
(center_targets, size_class_targets, size_res_targets,
dir_class_targets,
dir_res_targets) = self.bbox_coder.encode(gt_bboxes_3d, gt_labels_3d)
proposal_num = aggregated_points.shape[0]
distance1, _, assignment, _ = chamfer_distance(
aggregated_points.unsqueeze(0),
center_targets.unsqueeze(0),
reduction='none')
assignment = assignment.squeeze(0)
euclidean_distance1 = torch.sqrt(distance1.squeeze(0) + 1e-6)
objectness_targets = points.new_zeros((proposal_num), dtype=torch.long)
objectness_targets[
euclidean_distance1 < self.train_cfg['pos_distance_thr']] = 1
objectness_masks = points.new_zeros((proposal_num))
objectness_masks[
euclidean_distance1 < self.train_cfg['pos_distance_thr']] = 1.0
objectness_masks[
euclidean_distance1 > self.train_cfg['neg_distance_thr']] = 1.0
dir_class_targets = dir_class_targets[assignment]
dir_res_targets = dir_res_targets[assignment]
dir_res_targets /= (np.pi / self.num_dir_bins)
size_class_targets = size_class_targets[assignment]
size_res_targets = size_res_targets[assignment]
one_hot_size_targets = gt_bboxes_3d.new_zeros(
(proposal_num, self.num_sizes))
one_hot_size_targets.scatter_(1, size_class_targets.unsqueeze(-1), 1)
one_hot_size_targets = one_hot_size_targets.unsqueeze(-1).repeat(
1, 1, 3)
mean_sizes = size_res_targets.new_tensor(
self.bbox_coder.mean_sizes).unsqueeze(0)
pos_mean_sizes = torch.sum(one_hot_size_targets * mean_sizes, 1)
size_res_targets /= pos_mean_sizes
mask_targets = gt_labels_3d[assignment]
return (vote_targets, vote_target_masks, size_class_targets,
size_res_targets,
dir_class_targets, dir_res_targets, center_targets,
mask_targets.long(), objectness_targets, objectness_masks)
def get_bboxes(self, points, bbox_preds, img_meta, rescale=False):
# decode boxes
obj_scores = F.softmax(bbox_preds['obj_scores'], dim=-1)[..., -1]
sem_scores = F.softmax(bbox_preds['sem_scores'], dim=-1)
bbox_depth = self.bbox_coder.decode(bbox_preds)
points_lidar, bbox_lidar = upright_depth_to_lidar_torch(
points[..., :3], bbox_depth, to_bottom_center=True)
batch_size = bbox_depth.shape[0]
results = list()
for b in range(batch_size):
bbox_selected, score_selected, labels = self.multiclass_nms_single(
obj_scores[b], sem_scores[b], bbox_lidar[b], points_lidar[b])
results.append((bbox_selected, score_selected, labels))
return results
def multiclass_nms_single(self, obj_scores, sem_scores, bbox,
points_lidar):
box_indices = points_in_boxes_batch(
points_lidar.unsqueeze(0), bbox.unsqueeze(0))[0]
nonempty_box_mask = box_indices.T.sum(1) > 5
bbox_classes = torch.argmax(sem_scores, -1)
# boxes3d to aligned boxes
corner3d = boxes3d_to_corners3d_lidar_torch(bbox)
minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6)))
minmax_box3d[:, :3] = torch.min(corner3d, dim=1)[0]
minmax_box3d[:, 3:] = torch.max(corner3d, dim=1)[0]
nms_selected = aligned_3d_nms(minmax_box3d[nonempty_box_mask],
obj_scores[nonempty_box_mask],
bbox_classes[nonempty_box_mask],
self.test_cfg.nms_thr)
# filter empty boxes and boxes with low score
scores_mask = (obj_scores > self.test_cfg.score_thr)
nonempty_box_inds = torch.nonzero(nonempty_box_mask).flatten()
nonempty_mask = torch.zeros_like(bbox_classes).scatter(
0, nonempty_box_inds[nms_selected], 1)
selected = (nonempty_mask.bool() & scores_mask.bool())
if self.test_cfg.per_class_proposal:
bbox_selected, score_selected, labels = [], [], []
for k in range(sem_scores.shape[-1]):
bbox_selected.append(bbox[selected])
score_selected.append(obj_scores[selected] *
sem_scores[selected][:, k])
labels.append(
torch.zeros_like(bbox_classes[selected]).fill_(k))
bbox_selected = torch.cat(bbox_selected, 0)
score_selected = torch.cat(score_selected, 0)
labels = torch.cat(labels, 0)
else:
bbox_selected = bbox[selected]
score_selected = obj_scores[selected]
labels = bbox_classes[selected]
return bbox_selected, score_selected, labels
......@@ -4,10 +4,11 @@ from .mvx_faster_rcnn import (DynamicMVXFasterRCNN, DynamicMVXFasterRCNNV2,
from .mvx_single_stage import MVXSingleStageDetector
from .mvx_two_stage import MVXTwoStageDetector
from .parta2 import PartA2
from .votenet import VoteNet
from .voxelnet import DynamicVoxelNet, VoxelNet
__all__ = [
'BaseDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXSingleStageDetector',
'MVXTwoStageDetector', 'DynamicMVXFasterRCNN', 'DynamicMVXFasterRCNNV2',
'DynamicMVXFasterRCNNV3', 'PartA2'
'DynamicMVXFasterRCNNV3', 'PartA2', 'VoteNet'
]
import torch
from mmdet3d.core import bbox3d2result
from mmdet.models import DETECTORS, SingleStageDetector
@DETECTORS.register_module()
class VoteNet(SingleStageDetector):
"""VoteNet model.
https://arxiv.org/pdf/1904.09664.pdf
"""
def __init__(self,
backbone,
bbox_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None):
super(VoteNet, self).__init__(
backbone=backbone,
bbox_head=bbox_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
pretrained=pretrained)
def extract_feat(self, points):
x = self.backbone(points)
if self.with_neck:
x = self.neck(x)
return x
def forward_train(self,
points,
img_meta,
gt_bboxes_3d,
gt_labels_3d,
pts_semantic_mask=None,
pts_instance_mask=None,
gt_bboxes_ignore=None):
"""Forward of training.
Args:
points (list[Tensor]): Points of each batch.
img_meta (list): Image metas.
gt_bboxes_3d (list[Tensor]): gt bboxes of each batch.
gt_labels_3d (list[Tensor]): gt class labels of each batch.
pts_semantic_mask (None | list[Tensor]): point-wise semantic
label of each batch.
pts_instance_mask (None | list[Tensor]): point-wise instance
label of each batch.
gt_bboxes_ignore (None | list[Tensor]): Specify which bounding.
Returns:
dict: Losses.
"""
points_cat = torch.stack(points) # tmp
x = self.extract_feat(points_cat)
bbox_preds = self.bbox_head(x, self.train_cfg.sample_mod)
loss_inputs = (points, gt_bboxes_3d, gt_labels_3d, pts_semantic_mask,
pts_instance_mask, img_meta)
losses = self.bbox_head.loss(
bbox_preds, *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
return losses
def forward_test(self, **kwargs):
return self.simple_test(**kwargs)
def forward(self, return_loss=True, **kwargs):
if return_loss:
return self.forward_train(**kwargs)
else:
return self.forward_test(**kwargs)
def simple_test(self,
points,
img_meta,
gt_bboxes_3d=None,
gt_labels_3d=None,
pts_semantic_mask=None,
pts_instance_mask=None,
rescale=False):
"""Forward of testing.
Args:
points (list[Tensor]): Points of each sample.
img_meta (list): Image metas.
gt_bboxes_3d (list[Tensor]): gt bboxes of each sample.
gt_labels_3d (list[Tensor]): gt class labels of each sample.
pts_semantic_mask (None | list[Tensor]): point-wise semantic
label of each sample.
pts_instance_mask (None | list[Tensor]): point-wise instance
label of each sample.
rescale (bool): Whether to rescale results.
Returns:
list: Predicted 3d boxes.
"""
points_cat = torch.stack(points) # tmp
x = self.extract_feat(points_cat)
bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod)
bbox_list = self.bbox_head.get_bboxes(
points_cat, bbox_preds, img_meta, rescale=rescale)
bbox_results = [
bbox3d2result(bboxes, scores, labels)
for bboxes, scores, labels in bbox_list
]
return bbox_results[0]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment