Commit 19a56f6b authored by zhangwenwei's avatar zhangwenwei
Browse files

Merge branch 'votenet' into 'master'

Votenet

See merge request open-mmlab/mmdet.3d!46
parents ac3590a1 f717eb62
# model settings
model = dict(
type='VoteNet',
backbone=dict(
type='PointNet2SASSG',
in_channels=4,
num_points=(2048, 1024, 512, 256),
radius=(0.2, 0.4, 0.8, 1.2),
num_samples=(64, 32, 16, 16),
sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
(128, 128, 256)),
fp_channels=((256, 256), (256, 256)),
norm_cfg=dict(type='BN2d'),
pool_mod='max'),
bbox_head=dict(
type='VoteHead',
num_classes=18,
bbox_coder=dict(
type='PartialBinBasedBBoxCoder',
num_sizes=18,
num_dir_bins=1,
with_rot=False,
mean_sizes=[[0.76966727, 0.8116021, 0.92573744],
[1.876858, 1.8425595, 1.1931566],
[0.61328, 0.6148609, 0.7182701],
[1.3955007, 1.5121545, 0.83443564],
[0.97949594, 1.0675149, 0.6329687],
[0.531663, 0.5955577, 1.7500148],
[0.9624706, 0.72462326, 1.1481868],
[0.83221924, 1.0490936, 1.6875663],
[0.21132214, 0.4206159, 0.5372846],
[1.4440073, 1.8970833, 0.26985747],
[1.0294262, 1.4040797, 0.87554324],
[1.3766412, 0.65521795, 1.6813129],
[0.6650819, 0.71111923, 1.298853],
[0.41999173, 0.37906948, 1.7513971],
[0.59359556, 0.5912492, 0.73919016],
[0.50867593, 0.50656086, 0.30136237],
[1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]),
vote_moudule_cfg=dict(
in_channels=256,
vote_per_seed=1,
gt_per_seed=3,
conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
norm_feats=True,
vote_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='none',
loss_dst_weight=10.0)),
vote_aggregation_cfg=dict(
num_point=256,
radius=0.3,
num_sample=16,
mlp_channels=[256, 128, 128, 128],
use_xyz=True,
normalize_xyz=True),
feat_channels=(128, 128),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.2, 0.8],
reduction='sum',
loss_weight=5.0),
center_loss=dict(
type='ChamferDistance',
mode='l2',
reduction='sum',
loss_src_weight=10.0,
loss_dst_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)))
# model training and testing settings
train_cfg = dict(pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote')
test_cfg = dict(
sample_mod='seed', nms_thr=0.25, score_thr=0.05, per_class_proposal=True)
# dataset settings
dataset_type = 'ScanNetDataset'
data_root = './data/scannet/'
class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin')
train_pipeline = [
dict(
type='LoadPointsFromFile',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
with_mask_3d=True,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,
36, 39)),
dict(type='IndoorPointSample', num_points=40000),
dict(type='IndoorFlipData', flip_ratio_yz=0.5, flip_ratio_xz=0.5),
dict(
type='IndoorGlobalRotScale',
shift_height=True,
rot_range=[-1 / 36, 1 / 36],
scale_range=None),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=[
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask'
])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='IndoorPointSample', num_points=40000),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_train.pkl',
pipeline=train_pipeline,
filter_empty_gt=False,
classes=class_names)),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True))
# optimizer
lr = 0.008 # max learning rate
optimizer = dict(type='Adam', lr=lr)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(policy='step', warmup=None, step=[24, 32])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=10,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 36
dist_params = dict(backend='nccl')
log_level = 'INFO'
find_unused_parameters = True
work_dir = './work_dirs/votenet_scannet'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings
model = dict(
type='VoteNet',
backbone=dict(
type='PointNet2SASSG',
in_channels=4,
num_points=(2048, 1024, 512, 256),
radius=(0.2, 0.4, 0.8, 1.2),
num_samples=(64, 32, 16, 16),
sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
(128, 128, 256)),
fp_channels=((256, 256), (256, 256)),
norm_cfg=dict(type='BN2d'),
pool_mod='max'),
bbox_head=dict(
type='VoteHead',
num_classes=10,
bbox_coder=dict(
type='PartialBinBasedBBoxCoder',
num_sizes=10,
num_dir_bins=12,
with_rot=True,
mean_sizes=[[2.114256, 1.620300, 0.927272],
[0.791118, 1.279516, 0.718182],
[0.923508, 1.867419, 0.845495],
[0.591958, 0.552978, 0.827272],
[0.699104, 0.454178, 0.75625],
[0.69519, 1.346299, 0.736364],
[0.528526, 1.002642, 1.172878],
[0.500618, 0.632163, 0.683424],
[0.404671, 1.071108, 1.688889],
[0.76584, 1.398258, 0.472728]]),
vote_moudule_cfg=dict(
in_channels=256,
vote_per_seed=1,
gt_per_seed=3,
conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
norm_feats=True,
vote_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='none',
loss_dst_weight=10.0)),
vote_aggregation_cfg=dict(
num_point=256,
radius=0.3,
num_sample=16,
mlp_channels=[256, 128, 128, 128],
use_xyz=True,
normalize_xyz=True),
feat_channels=(128, 128),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.2, 0.8],
reduction='sum',
loss_weight=5.0),
center_loss=dict(
type='ChamferDistance',
mode='l2',
reduction='sum',
loss_src_weight=10.0,
loss_dst_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)))
# model training and testing settings
train_cfg = dict(pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote')
test_cfg = dict(
sample_mod='seed', nms_thr=0.25, score_thr=0.05, per_class_proposal=True)
# dataset settings
dataset_type = 'SUNRGBDDataset'
data_root = 'data/sunrgbd/'
class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
'night_stand', 'bookshelf', 'bathtub')
train_pipeline = [
dict(
type='LoadPointsFromFile',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='LoadAnnotations3D'),
dict(type='IndoorFlipData', flip_ratio_yz=0.5),
dict(
type='IndoorGlobalRotScale',
shift_height=True,
rot_range=[-1 / 6, 1 / 6],
scale_range=[0.85, 1.15]),
dict(type='IndoorPointSample', num_points=20000),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='IndoorPointSample', num_points=20000),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=16,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'sunrgbd_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
filter_empty_gt=False)),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'sunrgbd_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'sunrgbd_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True))
# optimizer
lr = 0.008 # max learning rate
optimizer = dict(type='Adam', lr=lr)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(policy='step', warmup=None, step=[24, 32])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=30,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 36
dist_params = dict(backend='nccl')
log_level = 'INFO'
find_unused_parameters = True
work_dir = './work_dirs/votenet_sunrgbd'
load_from = None
resume_from = None
workflow = [('train', 1)]
......@@ -8,7 +8,9 @@ from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler, IoUBalancedNegSampler,
PseudoSampler, RandomSampler, SamplingResult)
from .structures import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes
from .transforms import bbox3d2result, bbox3d2roi, boxes3d_to_bev_torch_lidar
from .transforms import (bbox3d2result, bbox3d2roi,
box3d_to_corner3d_upright_depth,
boxes3d_to_bev_torch_lidar)
from .assign_sampling import ( # isort:skip, avoid recursive imports
build_bbox_coder, # temporally settings
......@@ -22,5 +24,6 @@ __all__ = [
'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar',
'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes',
'CameraInstance3DBoxes', 'bbox3d2roi', 'bbox3d2result'
'CameraInstance3DBoxes', 'bbox3d2roi', 'bbox3d2result',
'box3d_to_corner3d_upright_depth'
]
from mmdet.core.bbox import build_bbox_coder
from .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder
from .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder
__all__ = ['build_bbox_coder', 'DeltaXYZWLHRBBoxCoder']
__all__ = [
'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'PartialBinBasedBBoxCoder'
]
import numpy as np
import torch
from mmdet.core.bbox import BaseBBoxCoder
from mmdet.core.bbox.builder import BBOX_CODERS
@BBOX_CODERS.register_module()
class PartialBinBasedBBoxCoder(BaseBBoxCoder):
"""Partial bin based bbox coder
Args:
num_dir_bins (int): Number of bins to encode direction angle.
num_sizes (int): Number of size clusters.
mean_sizes (list[list[int]]): Mean size of bboxes in each class.
with_rot (bool): Whether the bbox is with rotation.
"""
def __init__(self, num_dir_bins, num_sizes, mean_sizes, with_rot=True):
super(PartialBinBasedBBoxCoder, self).__init__()
assert len(mean_sizes) == num_sizes
self.num_dir_bins = num_dir_bins
self.num_sizes = num_sizes
self.mean_sizes = mean_sizes
self.with_rot = with_rot
def encode(self, gt_bboxes_3d, gt_labels_3d):
"""Encode ground truth to prediction targets.
Args:
gt_bboxes_3d (Tensor): 3d gt bboxes with shape (n, 7).
gt_labels_3d (Tensor): Gt classes.
Returns:
tuple: Targets of center, size and direction.
"""
# generate center target
center_target = gt_bboxes_3d[..., 0:3]
# generate bbox size target
size_class_target = gt_labels_3d
size_res_target = gt_bboxes_3d[..., 3:6] - gt_bboxes_3d.new_tensor(
self.mean_sizes)[size_class_target]
# generate dir target
box_num = gt_bboxes_3d.shape[0]
if self.with_rot:
(dir_class_target,
dir_res_target) = self.angle2class(gt_bboxes_3d[..., 6])
else:
dir_class_target = gt_labels_3d.new_zeros(box_num)
dir_res_target = gt_bboxes_3d.new_zeros(box_num)
return (center_target, size_class_target, size_res_target,
dir_class_target, dir_res_target)
def decode(self, bbox_out):
"""Decode predicted parts to bbox3d.
Args:
bbox_out (dict): predictions from model, should contain keys below
- center: predicted bottom center of bboxes.
- dir_class: predicted bbox direction class.
- dir_res: predicted bbox direction residual.
- size_class: predicted bbox size class.
- size_res: predicted bbox size residual.
Returns:
Tensor: decoded bbox3d with shape (batch, n, 7)
"""
center = bbox_out['center']
batch_size, num_proposal = center.shape[:2]
# decode heading angle
if self.with_rot:
dir_class = torch.argmax(bbox_out['dir_class'], -1)
dir_res = torch.gather(bbox_out['dir_res'], 2,
dir_class.unsqueeze(-1))
dir_res.squeeze_(2)
dir_angle = self.class2angle(dir_class, dir_res).reshape(
batch_size, num_proposal, 1)
else:
dir_angle = center.new_zeros(batch_size, num_proposal, 1)
# decode bbox size
size_class = torch.argmax(bbox_out['size_class'], -1, keepdim=True)
size_res = torch.gather(bbox_out['size_res'], 2,
size_class.unsqueeze(-1).repeat(1, 1, 1, 3))
mean_sizes = center.new_tensor(self.mean_sizes)
size_base = torch.index_select(mean_sizes, 0, size_class.reshape(-1))
bbox_size = size_base.reshape(batch_size, num_proposal,
-1) + size_res.squeeze(2)
bbox3d = torch.cat([center, bbox_size, dir_angle], dim=-1)
return bbox3d
def split_pred(self, preds, base_xyz):
"""Split predicted features to specific parts.
Args:
preds (Tensor): predicted features to split.
base_xyz (Tensor): coordinates of points.
Returns:
dict: split results.
"""
results = {}
start, end = 0, 0
preds_trans = preds.transpose(2, 1)
# decode objectness score
end += 2
results['obj_scores'] = preds_trans[..., start:end]
start = end
# decode center
end += 3
# (batch_size, num_proposal, 3)
results['center'] = base_xyz + preds_trans[..., start:end]
start = end
# decode direction
end += self.num_dir_bins
results['dir_class'] = preds_trans[..., start:end]
start = end
end += self.num_dir_bins
dir_res_norm = preds_trans[..., start:end]
start = end
results['dir_res_norm'] = dir_res_norm
results['dir_res'] = dir_res_norm * (np.pi / self.num_dir_bins)
# decode size
end += self.num_sizes
results['size_class'] = preds_trans[..., start:end]
start = end
end += self.num_sizes * 3
size_res_norm = preds_trans[..., start:end]
batch_size, num_proposal = preds_trans.shape[:2]
size_res_norm = size_res_norm.view(
[batch_size, num_proposal, self.num_sizes, 3])
start = end
results['size_res_norm'] = size_res_norm
mean_sizes = preds.new_tensor(self.mean_sizes)
results['size_res'] = (
size_res_norm * mean_sizes.unsqueeze(0).unsqueeze(0))
# decode semantic score
results['sem_scores'] = preds_trans[..., start:]
return results
def angle2class(self, angle):
"""Convert continuous angle to a discrete class and a residual.
Convert continuous angle to a discrete class and a small
regression number from class center angle to current angle.
Args:
angle (Tensor): Angle is from 0-2pi (or -pi~pi), class center at
0, 1*(2pi/N), 2*(2pi/N) ... (N-1)*(2pi/N)
Returns:
tuple: Encoded discrete class and residual.
"""
angle = angle % (2 * np.pi)
angle_per_class = 2 * np.pi / float(self.num_dir_bins)
shifted_angle = (angle + angle_per_class / 2) % (2 * np.pi)
angle_cls = shifted_angle // angle_per_class
angle_res = shifted_angle - (
angle_cls * angle_per_class + angle_per_class / 2)
return angle_cls.long(), angle_res
def class2angle(self, angle_cls, angle_res, limit_period=True):
"""Inverse function to angle2class
Args:
angle_cls (Tensor): Angle class to decode.
angle_res (Tensor): Angle residual to decode.
limit_period (bool): Whether to limit angle to [-pi, pi].
Returns:
Tensor: angle decoded from angle_cls and angle_res.
"""
angle_per_class = 2 * np.pi / float(self.num_dir_bins)
angle_center = angle_cls.float() * angle_per_class
angle = angle_center + angle_res
if limit_period:
angle[angle > np.pi] -= 2 * np.pi
return angle
......@@ -84,3 +84,87 @@ def bbox3d2result(bboxes, scores, labels):
"""
return dict(
boxes_3d=bboxes.cpu(), scores_3d=scores.cpu(), labels_3d=labels.cpu())
def upright_depth_to_lidar_torch(points=None,
bboxes=None,
to_bottom_center=False):
"""Convert points and boxes in upright depth coordinate to lidar.
Args:
points (None | Tensor): points in upright depth coordinate.
bboxes (None | Tensor): bboxes in upright depth coordinate.
to_bottom_center (bool): covert bboxes to bottom center.
Returns:
tuple: points and bboxes in lidar coordinate.
"""
if points is not None:
points_lidar = points.clone()
points_lidar = points_lidar[..., [1, 0, 2]]
points_lidar[..., 1] *= -1
else:
points_lidar = None
if bboxes is not None:
bboxes_lidar = bboxes.clone()
bboxes_lidar = bboxes_lidar[..., [1, 0, 2, 4, 3, 5, 6]]
bboxes_lidar[..., 1] *= -1
if to_bottom_center:
bboxes_lidar[..., 2] -= 0.5 * bboxes_lidar[..., 5]
else:
bboxes_lidar = None
return points_lidar, bboxes_lidar
def box3d_to_corner3d_upright_depth(boxes3d):
"""Convert box3d to corner3d in upright depth coordinate
Args:
boxes3d (Tensor): boxes with shape [n,7] in upright depth coordinate
Returns:
Tensor: boxes with [n, 8, 3] in upright depth coordinate
"""
boxes_num = boxes3d.shape[0]
ry = boxes3d[:, 6:7]
l, w, h = boxes3d[:, 3:4], boxes3d[:, 4:5], boxes3d[:, 5:6]
zeros = boxes3d.new_zeros((boxes_num, 1))
ones = boxes3d.new_ones((boxes_num, 1))
# zeros = torch.cuda.FloatTensor(boxes_num, 1).fill_(0)
# ones = torch.cuda.FloatTensor(boxes_num, 1).fill_(1)
x_corners = torch.cat(
[-l / 2., l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2., -l / 2.],
dim=1) # (N, 8)
y_corners = torch.cat(
[w / 2., w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2.],
dim=1) # (N, 8)
z_corners = torch.cat(
[h / 2., h / 2., h / 2., h / 2., -h / 2., -h / 2., -h / 2., -h / 2.],
dim=1) # (N, 8)
temp_corners = torch.cat(
(x_corners.unsqueeze(dim=2), y_corners.unsqueeze(dim=2),
z_corners.unsqueeze(dim=2)),
dim=2) # (N, 8, 3)
cosa, sina = torch.cos(-ry), torch.sin(-ry)
raw_1 = torch.cat([cosa, -sina, zeros], dim=1) # (N, 3)
raw_2 = torch.cat([sina, cosa, zeros], dim=1) # (N, 3)
raw_3 = torch.cat([zeros, zeros, ones], dim=1) # (N, 3)
R = torch.cat((raw_1.unsqueeze(dim=1), raw_2.unsqueeze(dim=1),
raw_3.unsqueeze(dim=1)),
dim=1) # (N, 3, 3)
rotated_corners = torch.matmul(temp_corners, R) # (N, 8, 3)
x_corners = rotated_corners[:, :, 0]
y_corners = rotated_corners[:, :, 1]
z_corners = rotated_corners[:, :, 2]
x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2]
x = x_loc.view(-1, 1) + x_corners.view(-1, 8)
y = y_loc.view(-1, 1) + y_corners.view(-1, 8)
z = z_loc.view(-1, 1) + z_corners.view(-1, 8)
corners3d = torch.cat(
(x.view(-1, 8, 1), y.view(-1, 8, 1), z.view(-1, 8, 1)), dim=2)
return corners3d
from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks,
merge_aug_proposals, merge_aug_scores,
multiclass_nms)
from .box3d_nms import box3d_multiclass_nms
from .box3d_nms import aligned_3d_nms, box3d_multiclass_nms
__all__ = [
'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
'merge_aug_scores', 'merge_aug_masks', 'box3d_multiclass_nms'
'merge_aug_scores', 'merge_aug_masks', 'box3d_multiclass_nms',
'aligned_3d_nms'
]
......@@ -64,3 +64,52 @@ def box3d_multiclass_nms(mlvl_bboxes,
labels = mlvl_scores.new_zeros((0, mlvl_scores.size(-1)))
dir_scores = mlvl_scores.new_zeros((0, ))
return bboxes, scores, labels, dir_scores
def aligned_3d_nms(boxes, scores, classes, thresh):
"""3d nms for aligned boxes.
Args:
boxes (Tensor): Aligned box with shape [n, 6].
scores (Tensor): Scores of each box.
classes (Tensor): Class of each box.
thresh (float): Iou threshold for nms.
Returns:
Tensor: Indices of selected boxes.
"""
x1 = boxes[:, 0]
y1 = boxes[:, 1]
z1 = boxes[:, 2]
x2 = boxes[:, 3]
y2 = boxes[:, 4]
z2 = boxes[:, 5]
area = (x2 - x1) * (y2 - y1) * (z2 - z1)
zero = boxes.new_zeros(1, )
score_sorted = torch.argsort(scores)
pick = []
while (score_sorted.shape[0] != 0):
last = score_sorted.shape[0]
i = score_sorted[-1]
pick.append(i)
xx1 = torch.max(x1[i], x1[score_sorted[:last - 1]])
yy1 = torch.max(y1[i], y1[score_sorted[:last - 1]])
zz1 = torch.max(z1[i], z1[score_sorted[:last - 1]])
xx2 = torch.min(x2[i], x2[score_sorted[:last - 1]])
yy2 = torch.min(y2[i], y2[score_sorted[:last - 1]])
zz2 = torch.min(z2[i], z2[score_sorted[:last - 1]])
classes1 = classes[i]
classes2 = classes[score_sorted[:last - 1]]
inter_l = torch.max(zero, xx2 - xx1)
inter_w = torch.max(zero, yy2 - yy1)
inter_h = torch.max(zero, zz2 - zz1)
inter = inter_l * inter_w * inter_h
iou = inter / (area[i] + area[score_sorted[:last - 1]] - inter)
iou = iou * (classes1 == classes2).float()
score_sorted = score_sorted[torch.nonzero(iou <= thresh).flatten()]
indices = boxes.new_tensor(pick, dtype=torch.long)
return indices
......@@ -19,12 +19,14 @@ class Custom3DDataset(Dataset):
pipeline=None,
classes=None,
modality=None,
filter_empty_gt=True,
test_mode=False):
super().__init__()
self.data_root = data_root
self.ann_file = ann_file
self.test_mode = test_mode
self.modality = modality
self.filter_empty_gt = filter_empty_gt
self.CLASSES = self.get_classes(classes)
self.data_infos = self.load_annotations(self.ann_file)
......@@ -52,7 +54,7 @@ class Custom3DDataset(Dataset):
if not self.test_mode:
annos = self.get_ann_info(index)
input_dict['ann_info'] = annos
if len(annos['gt_bboxes_3d']) == 0:
if self.filter_empty_gt and len(annos['gt_bboxes_3d']) == 0:
return None
return input_dict
......@@ -67,7 +69,8 @@ class Custom3DDataset(Dataset):
return None
self.pre_pipeline(input_dict)
example = self.pipeline(input_dict)
if example is None or len(example['gt_bboxes_3d']._data) == 0:
if self.filter_empty_gt and (example is None or len(
example['gt_bboxes_3d']._data) == 0):
return None
return example
......@@ -124,10 +127,13 @@ class Custom3DDataset(Dataset):
results (list[dict]): List of results.
metric (str | list[str]): Metrics to be evaluated.
iou_thr (list[float]): AP IoU thresholds.
"""
from mmdet3d.core.evaluation import indoor_eval
assert isinstance(
results, list), f'Expect results to be list, got {type(results)}.'
assert len(results) > 0, f'Expect length of results > 0.'
assert len(results) == len(self.data_infos)
assert isinstance(
results[0], dict
), f'Expect elements in results to be dict, got {type(results[0])}.'
......
......@@ -7,6 +7,7 @@ from .indoor_loading import (LoadAnnotations3D, LoadPointsFromFile,
NormalizePointsColor)
from .indoor_sample import IndoorPointSample
from .loading import LoadMultiViewImageFromFiles
from .point_seg_class_mapping import PointSegClassMapping
from .train_aug import (GlobalRotScale, ObjectNoise, ObjectRangeFilter,
ObjectSample, PointShuffle, PointsRangeFilter,
RandomFlip3D)
......@@ -18,5 +19,5 @@ __all__ = [
'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler',
'IndoorGlobalRotScale', 'IndoorPointsColorJitter', 'IndoorFlipData',
'MMDataBaseSampler', 'NormalizePointsColor', 'LoadAnnotations3D',
'IndoorPointSample'
'IndoorPointSample', 'PointSegClassMapping'
]
......@@ -224,7 +224,7 @@ class IndoorGlobalRotScale(object):
results['scale_ratio'] = scale_ratio
results['points'] = points
results['gt_bboxes_3d'] = gt_bboxes_3d
results['gt_bboxes_3d'] = gt_bboxes_3d.astype(np.float32)
return results
def __repr__(self):
......
......@@ -189,7 +189,8 @@ class LoadAnnotations3D(LoadAnnotations):
self.file_client = mmcv.FileClient(**self.file_client_args)
try:
mask_bytes = self.file_client.get(pts_semantic_mask_path)
pts_semantic_mask = np.frombuffer(mask_bytes, dtype=np.int)
# add .copy() to fix read-only bug
pts_semantic_mask = np.frombuffer(mask_bytes, dtype=np.int).copy()
except ConnectionError:
mmcv.check_file_exist(pts_semantic_mask_path)
pts_semantic_mask = np.fromfile(
......
from mmdet.datasets.builder import PIPELINES
@PIPELINES.register_module()
class PointSegClassMapping(object):
"""Map original semantic class to valid category ids.
Map valid classes as 0~len(valid_cat_ids)-1 and
others as len(valid_cat_ids).
Args:
valid_cat_ids (tuple[int): A tuple of valid category.
"""
def __init__(self, valid_cat_ids):
self.valid_cat_ids = valid_cat_ids
def __call__(self, results):
assert 'pts_semantic_mask' in results
pts_semantic_mask = results['pts_semantic_mask']
neg_cls = len(self.valid_cat_ids)
for i in range(pts_semantic_mask.shape[0]):
if pts_semantic_mask[i] in self.valid_cat_ids:
converted_id = self.valid_cat_ids.index(pts_semantic_mask[i])
pts_semantic_mask[i] = converted_id
else:
pts_semantic_mask[i] = neg_cls
results['pts_semantic_mask'] = pts_semantic_mask
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(valid_cat_ids={})'.format(self.valid_cat_ids)
return repr_str
......@@ -20,9 +20,10 @@ class ScanNetDataset(Custom3DDataset):
pipeline=None,
classes=None,
modality=None,
filter_empty_gt=True,
test_mode=False):
super().__init__(data_root, ann_file, pipeline, classes, modality,
test_mode)
filter_empty_gt, test_mode)
def get_ann_info(self, index):
# Use index to get the annos, thus the evalhook could also use this api
......
......@@ -16,9 +16,10 @@ class SUNRGBDDataset(Custom3DDataset):
pipeline=None,
classes=None,
modality=None,
filter_empty_gt=True,
test_mode=False):
super().__init__(data_root, ann_file, pipeline, classes, modality,
test_mode)
filter_empty_gt, test_mode)
def get_ann_info(self, index):
# Use index to get the annos, thus the evalhook could also use this api
......
......@@ -8,6 +8,7 @@ from .detectors import * # noqa: F401,F403
from .fusion_layers import * # noqa: F401,F403
from .losses import * # noqa: F401,F403
from .middle_encoders import * # noqa: F401,F403
from .model_utils import * # noqa: F401,F403
from .necks import * # noqa: F401,F403
from .registry import FUSION_LAYERS, MIDDLE_ENCODERS, VOXEL_ENCODERS
from .roi_heads import * # noqa: F401,F403
......
from .anchor3d_head import Anchor3DHead
from .parta2_rpn_head import PartA2RPNHead
from .vote_head import VoteHead
__all__ = ['Anchor3DHead', 'PartA2RPNHead']
__all__ = ['Anchor3DHead', 'PartA2RPNHead', 'VoteHead']
This diff is collapsed.
......@@ -4,10 +4,11 @@ from .mvx_faster_rcnn import (DynamicMVXFasterRCNN, DynamicMVXFasterRCNNV2,
from .mvx_single_stage import MVXSingleStageDetector
from .mvx_two_stage import MVXTwoStageDetector
from .parta2 import PartA2
from .votenet import VoteNet
from .voxelnet import DynamicVoxelNet, VoxelNet
__all__ = [
'BaseDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXSingleStageDetector',
'MVXTwoStageDetector', 'DynamicMVXFasterRCNN', 'DynamicMVXFasterRCNNV2',
'DynamicMVXFasterRCNNV3', 'PartA2'
'DynamicMVXFasterRCNNV3', 'PartA2', 'VoteNet'
]
import torch
from mmdet3d.core import bbox3d2result
from mmdet.models import DETECTORS, SingleStageDetector
@DETECTORS.register_module()
class VoteNet(SingleStageDetector):
"""VoteNet model.
https://arxiv.org/pdf/1904.09664.pdf
"""
def __init__(self,
backbone,
bbox_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None):
super(VoteNet, self).__init__(
backbone=backbone,
bbox_head=bbox_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
pretrained=pretrained)
def extract_feat(self, points):
x = self.backbone(points)
if self.with_neck:
x = self.neck(x)
return x
def forward_train(self,
points,
img_meta,
gt_bboxes_3d,
gt_labels_3d,
pts_semantic_mask=None,
pts_instance_mask=None,
gt_bboxes_ignore=None):
"""Forward of training.
Args:
points (list[Tensor]): Points of each batch.
img_meta (list): Image metas.
gt_bboxes_3d (list[Tensor]): gt bboxes of each batch.
gt_labels_3d (list[Tensor]): gt class labels of each batch.
pts_semantic_mask (None | list[Tensor]): point-wise semantic
label of each batch.
pts_instance_mask (None | list[Tensor]): point-wise instance
label of each batch.
gt_bboxes_ignore (None | list[Tensor]): Specify which bounding.
Returns:
dict: Losses.
"""
points_cat = torch.stack(points) # tmp
x = self.extract_feat(points_cat)
bbox_preds = self.bbox_head(x, self.train_cfg.sample_mod)
loss_inputs = (points, gt_bboxes_3d, gt_labels_3d, pts_semantic_mask,
pts_instance_mask, img_meta)
losses = self.bbox_head.loss(
bbox_preds, *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
return losses
def forward_test(self, **kwargs):
return self.simple_test(**kwargs)
def forward(self, return_loss=True, **kwargs):
if return_loss:
return self.forward_train(**kwargs)
else:
return self.forward_test(**kwargs)
def simple_test(self,
points,
img_meta,
gt_bboxes_3d=None,
gt_labels_3d=None,
pts_semantic_mask=None,
pts_instance_mask=None,
rescale=False):
"""Forward of testing.
Args:
points (list[Tensor]): Points of each sample.
img_meta (list): Image metas.
gt_bboxes_3d (list[Tensor]): gt bboxes of each sample.
gt_labels_3d (list[Tensor]): gt class labels of each sample.
pts_semantic_mask (None | list[Tensor]): point-wise semantic
label of each sample.
pts_instance_mask (None | list[Tensor]): point-wise instance
label of each sample.
rescale (bool): Whether to rescale results.
Returns:
list: Predicted 3d boxes.
"""
points_cat = torch.stack(points) # tmp
x = self.extract_feat(points_cat)
bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod)
bbox_list = self.bbox_head.get_bboxes(
points_cat, bbox_preds, img_meta, rescale=rescale)
bbox_results = [
bbox3d2result(bboxes, scores, labels)
for bboxes, scores, labels in bbox_list
]
return bbox_results[0]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment