"docs/source/vscode:/vscode.git/clone" did not exist on "8f36444c4f9a55669bcb64e20b5588c0dd72bd93"
Commit 19a56f6b authored by zhangwenwei's avatar zhangwenwei
Browse files

Merge branch 'votenet' into 'master'

Votenet

See merge request open-mmlab/mmdet.3d!46
parents ac3590a1 f717eb62
# model settings
model = dict(
type='VoteNet',
backbone=dict(
type='PointNet2SASSG',
in_channels=4,
num_points=(2048, 1024, 512, 256),
radius=(0.2, 0.4, 0.8, 1.2),
num_samples=(64, 32, 16, 16),
sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
(128, 128, 256)),
fp_channels=((256, 256), (256, 256)),
norm_cfg=dict(type='BN2d'),
pool_mod='max'),
bbox_head=dict(
type='VoteHead',
num_classes=18,
bbox_coder=dict(
type='PartialBinBasedBBoxCoder',
num_sizes=18,
num_dir_bins=1,
with_rot=False,
mean_sizes=[[0.76966727, 0.8116021, 0.92573744],
[1.876858, 1.8425595, 1.1931566],
[0.61328, 0.6148609, 0.7182701],
[1.3955007, 1.5121545, 0.83443564],
[0.97949594, 1.0675149, 0.6329687],
[0.531663, 0.5955577, 1.7500148],
[0.9624706, 0.72462326, 1.1481868],
[0.83221924, 1.0490936, 1.6875663],
[0.21132214, 0.4206159, 0.5372846],
[1.4440073, 1.8970833, 0.26985747],
[1.0294262, 1.4040797, 0.87554324],
[1.3766412, 0.65521795, 1.6813129],
[0.6650819, 0.71111923, 1.298853],
[0.41999173, 0.37906948, 1.7513971],
[0.59359556, 0.5912492, 0.73919016],
[0.50867593, 0.50656086, 0.30136237],
[1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]),
vote_moudule_cfg=dict(
in_channels=256,
vote_per_seed=1,
gt_per_seed=3,
conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
norm_feats=True,
vote_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='none',
loss_dst_weight=10.0)),
vote_aggregation_cfg=dict(
num_point=256,
radius=0.3,
num_sample=16,
mlp_channels=[256, 128, 128, 128],
use_xyz=True,
normalize_xyz=True),
feat_channels=(128, 128),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.2, 0.8],
reduction='sum',
loss_weight=5.0),
center_loss=dict(
type='ChamferDistance',
mode='l2',
reduction='sum',
loss_src_weight=10.0,
loss_dst_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)))
# model training and testing settings
train_cfg = dict(pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote')
test_cfg = dict(
sample_mod='seed', nms_thr=0.25, score_thr=0.05, per_class_proposal=True)
# dataset settings
dataset_type = 'ScanNetDataset'
data_root = './data/scannet/'
class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin')
train_pipeline = [
dict(
type='LoadPointsFromFile',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
with_mask_3d=True,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,
36, 39)),
dict(type='IndoorPointSample', num_points=40000),
dict(type='IndoorFlipData', flip_ratio_yz=0.5, flip_ratio_xz=0.5),
dict(
type='IndoorGlobalRotScale',
shift_height=True,
rot_range=[-1 / 36, 1 / 36],
scale_range=None),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=[
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask'
])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='IndoorPointSample', num_points=40000),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_train.pkl',
pipeline=train_pipeline,
filter_empty_gt=False,
classes=class_names)),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True))
# optimizer
lr = 0.008 # max learning rate
optimizer = dict(type='Adam', lr=lr)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(policy='step', warmup=None, step=[24, 32])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=10,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 36
dist_params = dict(backend='nccl')
log_level = 'INFO'
find_unused_parameters = True
work_dir = './work_dirs/votenet_scannet'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings
model = dict(
type='VoteNet',
backbone=dict(
type='PointNet2SASSG',
in_channels=4,
num_points=(2048, 1024, 512, 256),
radius=(0.2, 0.4, 0.8, 1.2),
num_samples=(64, 32, 16, 16),
sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
(128, 128, 256)),
fp_channels=((256, 256), (256, 256)),
norm_cfg=dict(type='BN2d'),
pool_mod='max'),
bbox_head=dict(
type='VoteHead',
num_classes=10,
bbox_coder=dict(
type='PartialBinBasedBBoxCoder',
num_sizes=10,
num_dir_bins=12,
with_rot=True,
mean_sizes=[[2.114256, 1.620300, 0.927272],
[0.791118, 1.279516, 0.718182],
[0.923508, 1.867419, 0.845495],
[0.591958, 0.552978, 0.827272],
[0.699104, 0.454178, 0.75625],
[0.69519, 1.346299, 0.736364],
[0.528526, 1.002642, 1.172878],
[0.500618, 0.632163, 0.683424],
[0.404671, 1.071108, 1.688889],
[0.76584, 1.398258, 0.472728]]),
vote_moudule_cfg=dict(
in_channels=256,
vote_per_seed=1,
gt_per_seed=3,
conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
norm_feats=True,
vote_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='none',
loss_dst_weight=10.0)),
vote_aggregation_cfg=dict(
num_point=256,
radius=0.3,
num_sample=16,
mlp_channels=[256, 128, 128, 128],
use_xyz=True,
normalize_xyz=True),
feat_channels=(128, 128),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.2, 0.8],
reduction='sum',
loss_weight=5.0),
center_loss=dict(
type='ChamferDistance',
mode='l2',
reduction='sum',
loss_src_weight=10.0,
loss_dst_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)))
# model training and testing settings
train_cfg = dict(pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote')
test_cfg = dict(
sample_mod='seed', nms_thr=0.25, score_thr=0.05, per_class_proposal=True)
# dataset settings
dataset_type = 'SUNRGBDDataset'
data_root = 'data/sunrgbd/'
class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
'night_stand', 'bookshelf', 'bathtub')
train_pipeline = [
dict(
type='LoadPointsFromFile',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='LoadAnnotations3D'),
dict(type='IndoorFlipData', flip_ratio_yz=0.5),
dict(
type='IndoorGlobalRotScale',
shift_height=True,
rot_range=[-1 / 6, 1 / 6],
scale_range=[0.85, 1.15]),
dict(type='IndoorPointSample', num_points=20000),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='IndoorPointSample', num_points=20000),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=16,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'sunrgbd_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
filter_empty_gt=False)),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'sunrgbd_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'sunrgbd_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True))
# optimizer
lr = 0.008 # max learning rate
optimizer = dict(type='Adam', lr=lr)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(policy='step', warmup=None, step=[24, 32])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=30,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 36
dist_params = dict(backend='nccl')
log_level = 'INFO'
find_unused_parameters = True
work_dir = './work_dirs/votenet_sunrgbd'
load_from = None
resume_from = None
workflow = [('train', 1)]
...@@ -8,7 +8,9 @@ from .samplers import (BaseSampler, CombinedSampler, ...@@ -8,7 +8,9 @@ from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler, IoUBalancedNegSampler, InstanceBalancedPosSampler, IoUBalancedNegSampler,
PseudoSampler, RandomSampler, SamplingResult) PseudoSampler, RandomSampler, SamplingResult)
from .structures import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes from .structures import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes
from .transforms import bbox3d2result, bbox3d2roi, boxes3d_to_bev_torch_lidar from .transforms import (bbox3d2result, bbox3d2roi,
box3d_to_corner3d_upright_depth,
boxes3d_to_bev_torch_lidar)
from .assign_sampling import ( # isort:skip, avoid recursive imports from .assign_sampling import ( # isort:skip, avoid recursive imports
build_bbox_coder, # temporally settings build_bbox_coder, # temporally settings
...@@ -22,5 +24,6 @@ __all__ = [ ...@@ -22,5 +24,6 @@ __all__ = [
'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar', 'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar',
'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d', 'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes', 'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes',
'CameraInstance3DBoxes', 'bbox3d2roi', 'bbox3d2result' 'CameraInstance3DBoxes', 'bbox3d2roi', 'bbox3d2result',
'box3d_to_corner3d_upright_depth'
] ]
from mmdet.core.bbox import build_bbox_coder from mmdet.core.bbox import build_bbox_coder
from .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder from .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder
from .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder
__all__ = ['build_bbox_coder', 'DeltaXYZWLHRBBoxCoder'] __all__ = [
'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'PartialBinBasedBBoxCoder'
]
import numpy as np
import torch
from mmdet.core.bbox import BaseBBoxCoder
from mmdet.core.bbox.builder import BBOX_CODERS
@BBOX_CODERS.register_module()
class PartialBinBasedBBoxCoder(BaseBBoxCoder):
"""Partial bin based bbox coder
Args:
num_dir_bins (int): Number of bins to encode direction angle.
num_sizes (int): Number of size clusters.
mean_sizes (list[list[int]]): Mean size of bboxes in each class.
with_rot (bool): Whether the bbox is with rotation.
"""
def __init__(self, num_dir_bins, num_sizes, mean_sizes, with_rot=True):
super(PartialBinBasedBBoxCoder, self).__init__()
assert len(mean_sizes) == num_sizes
self.num_dir_bins = num_dir_bins
self.num_sizes = num_sizes
self.mean_sizes = mean_sizes
self.with_rot = with_rot
def encode(self, gt_bboxes_3d, gt_labels_3d):
"""Encode ground truth to prediction targets.
Args:
gt_bboxes_3d (Tensor): 3d gt bboxes with shape (n, 7).
gt_labels_3d (Tensor): Gt classes.
Returns:
tuple: Targets of center, size and direction.
"""
# generate center target
center_target = gt_bboxes_3d[..., 0:3]
# generate bbox size target
size_class_target = gt_labels_3d
size_res_target = gt_bboxes_3d[..., 3:6] - gt_bboxes_3d.new_tensor(
self.mean_sizes)[size_class_target]
# generate dir target
box_num = gt_bboxes_3d.shape[0]
if self.with_rot:
(dir_class_target,
dir_res_target) = self.angle2class(gt_bboxes_3d[..., 6])
else:
dir_class_target = gt_labels_3d.new_zeros(box_num)
dir_res_target = gt_bboxes_3d.new_zeros(box_num)
return (center_target, size_class_target, size_res_target,
dir_class_target, dir_res_target)
def decode(self, bbox_out):
"""Decode predicted parts to bbox3d.
Args:
bbox_out (dict): predictions from model, should contain keys below
- center: predicted bottom center of bboxes.
- dir_class: predicted bbox direction class.
- dir_res: predicted bbox direction residual.
- size_class: predicted bbox size class.
- size_res: predicted bbox size residual.
Returns:
Tensor: decoded bbox3d with shape (batch, n, 7)
"""
center = bbox_out['center']
batch_size, num_proposal = center.shape[:2]
# decode heading angle
if self.with_rot:
dir_class = torch.argmax(bbox_out['dir_class'], -1)
dir_res = torch.gather(bbox_out['dir_res'], 2,
dir_class.unsqueeze(-1))
dir_res.squeeze_(2)
dir_angle = self.class2angle(dir_class, dir_res).reshape(
batch_size, num_proposal, 1)
else:
dir_angle = center.new_zeros(batch_size, num_proposal, 1)
# decode bbox size
size_class = torch.argmax(bbox_out['size_class'], -1, keepdim=True)
size_res = torch.gather(bbox_out['size_res'], 2,
size_class.unsqueeze(-1).repeat(1, 1, 1, 3))
mean_sizes = center.new_tensor(self.mean_sizes)
size_base = torch.index_select(mean_sizes, 0, size_class.reshape(-1))
bbox_size = size_base.reshape(batch_size, num_proposal,
-1) + size_res.squeeze(2)
bbox3d = torch.cat([center, bbox_size, dir_angle], dim=-1)
return bbox3d
def split_pred(self, preds, base_xyz):
"""Split predicted features to specific parts.
Args:
preds (Tensor): predicted features to split.
base_xyz (Tensor): coordinates of points.
Returns:
dict: split results.
"""
results = {}
start, end = 0, 0
preds_trans = preds.transpose(2, 1)
# decode objectness score
end += 2
results['obj_scores'] = preds_trans[..., start:end]
start = end
# decode center
end += 3
# (batch_size, num_proposal, 3)
results['center'] = base_xyz + preds_trans[..., start:end]
start = end
# decode direction
end += self.num_dir_bins
results['dir_class'] = preds_trans[..., start:end]
start = end
end += self.num_dir_bins
dir_res_norm = preds_trans[..., start:end]
start = end
results['dir_res_norm'] = dir_res_norm
results['dir_res'] = dir_res_norm * (np.pi / self.num_dir_bins)
# decode size
end += self.num_sizes
results['size_class'] = preds_trans[..., start:end]
start = end
end += self.num_sizes * 3
size_res_norm = preds_trans[..., start:end]
batch_size, num_proposal = preds_trans.shape[:2]
size_res_norm = size_res_norm.view(
[batch_size, num_proposal, self.num_sizes, 3])
start = end
results['size_res_norm'] = size_res_norm
mean_sizes = preds.new_tensor(self.mean_sizes)
results['size_res'] = (
size_res_norm * mean_sizes.unsqueeze(0).unsqueeze(0))
# decode semantic score
results['sem_scores'] = preds_trans[..., start:]
return results
def angle2class(self, angle):
"""Convert continuous angle to a discrete class and a residual.
Convert continuous angle to a discrete class and a small
regression number from class center angle to current angle.
Args:
angle (Tensor): Angle is from 0-2pi (or -pi~pi), class center at
0, 1*(2pi/N), 2*(2pi/N) ... (N-1)*(2pi/N)
Returns:
tuple: Encoded discrete class and residual.
"""
angle = angle % (2 * np.pi)
angle_per_class = 2 * np.pi / float(self.num_dir_bins)
shifted_angle = (angle + angle_per_class / 2) % (2 * np.pi)
angle_cls = shifted_angle // angle_per_class
angle_res = shifted_angle - (
angle_cls * angle_per_class + angle_per_class / 2)
return angle_cls.long(), angle_res
def class2angle(self, angle_cls, angle_res, limit_period=True):
"""Inverse function to angle2class
Args:
angle_cls (Tensor): Angle class to decode.
angle_res (Tensor): Angle residual to decode.
limit_period (bool): Whether to limit angle to [-pi, pi].
Returns:
Tensor: angle decoded from angle_cls and angle_res.
"""
angle_per_class = 2 * np.pi / float(self.num_dir_bins)
angle_center = angle_cls.float() * angle_per_class
angle = angle_center + angle_res
if limit_period:
angle[angle > np.pi] -= 2 * np.pi
return angle
...@@ -84,3 +84,87 @@ def bbox3d2result(bboxes, scores, labels): ...@@ -84,3 +84,87 @@ def bbox3d2result(bboxes, scores, labels):
""" """
return dict( return dict(
boxes_3d=bboxes.cpu(), scores_3d=scores.cpu(), labels_3d=labels.cpu()) boxes_3d=bboxes.cpu(), scores_3d=scores.cpu(), labels_3d=labels.cpu())
def upright_depth_to_lidar_torch(points=None,
bboxes=None,
to_bottom_center=False):
"""Convert points and boxes in upright depth coordinate to lidar.
Args:
points (None | Tensor): points in upright depth coordinate.
bboxes (None | Tensor): bboxes in upright depth coordinate.
to_bottom_center (bool): covert bboxes to bottom center.
Returns:
tuple: points and bboxes in lidar coordinate.
"""
if points is not None:
points_lidar = points.clone()
points_lidar = points_lidar[..., [1, 0, 2]]
points_lidar[..., 1] *= -1
else:
points_lidar = None
if bboxes is not None:
bboxes_lidar = bboxes.clone()
bboxes_lidar = bboxes_lidar[..., [1, 0, 2, 4, 3, 5, 6]]
bboxes_lidar[..., 1] *= -1
if to_bottom_center:
bboxes_lidar[..., 2] -= 0.5 * bboxes_lidar[..., 5]
else:
bboxes_lidar = None
return points_lidar, bboxes_lidar
def box3d_to_corner3d_upright_depth(boxes3d):
"""Convert box3d to corner3d in upright depth coordinate
Args:
boxes3d (Tensor): boxes with shape [n,7] in upright depth coordinate
Returns:
Tensor: boxes with [n, 8, 3] in upright depth coordinate
"""
boxes_num = boxes3d.shape[0]
ry = boxes3d[:, 6:7]
l, w, h = boxes3d[:, 3:4], boxes3d[:, 4:5], boxes3d[:, 5:6]
zeros = boxes3d.new_zeros((boxes_num, 1))
ones = boxes3d.new_ones((boxes_num, 1))
# zeros = torch.cuda.FloatTensor(boxes_num, 1).fill_(0)
# ones = torch.cuda.FloatTensor(boxes_num, 1).fill_(1)
x_corners = torch.cat(
[-l / 2., l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2., -l / 2.],
dim=1) # (N, 8)
y_corners = torch.cat(
[w / 2., w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2.],
dim=1) # (N, 8)
z_corners = torch.cat(
[h / 2., h / 2., h / 2., h / 2., -h / 2., -h / 2., -h / 2., -h / 2.],
dim=1) # (N, 8)
temp_corners = torch.cat(
(x_corners.unsqueeze(dim=2), y_corners.unsqueeze(dim=2),
z_corners.unsqueeze(dim=2)),
dim=2) # (N, 8, 3)
cosa, sina = torch.cos(-ry), torch.sin(-ry)
raw_1 = torch.cat([cosa, -sina, zeros], dim=1) # (N, 3)
raw_2 = torch.cat([sina, cosa, zeros], dim=1) # (N, 3)
raw_3 = torch.cat([zeros, zeros, ones], dim=1) # (N, 3)
R = torch.cat((raw_1.unsqueeze(dim=1), raw_2.unsqueeze(dim=1),
raw_3.unsqueeze(dim=1)),
dim=1) # (N, 3, 3)
rotated_corners = torch.matmul(temp_corners, R) # (N, 8, 3)
x_corners = rotated_corners[:, :, 0]
y_corners = rotated_corners[:, :, 1]
z_corners = rotated_corners[:, :, 2]
x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2]
x = x_loc.view(-1, 1) + x_corners.view(-1, 8)
y = y_loc.view(-1, 1) + y_corners.view(-1, 8)
z = z_loc.view(-1, 1) + z_corners.view(-1, 8)
corners3d = torch.cat(
(x.view(-1, 8, 1), y.view(-1, 8, 1), z.view(-1, 8, 1)), dim=2)
return corners3d
from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks, from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks,
merge_aug_proposals, merge_aug_scores, merge_aug_proposals, merge_aug_scores,
multiclass_nms) multiclass_nms)
from .box3d_nms import box3d_multiclass_nms from .box3d_nms import aligned_3d_nms, box3d_multiclass_nms
__all__ = [ __all__ = [
'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
'merge_aug_scores', 'merge_aug_masks', 'box3d_multiclass_nms' 'merge_aug_scores', 'merge_aug_masks', 'box3d_multiclass_nms',
'aligned_3d_nms'
] ]
...@@ -64,3 +64,52 @@ def box3d_multiclass_nms(mlvl_bboxes, ...@@ -64,3 +64,52 @@ def box3d_multiclass_nms(mlvl_bboxes,
labels = mlvl_scores.new_zeros((0, mlvl_scores.size(-1))) labels = mlvl_scores.new_zeros((0, mlvl_scores.size(-1)))
dir_scores = mlvl_scores.new_zeros((0, )) dir_scores = mlvl_scores.new_zeros((0, ))
return bboxes, scores, labels, dir_scores return bboxes, scores, labels, dir_scores
def aligned_3d_nms(boxes, scores, classes, thresh):
"""3d nms for aligned boxes.
Args:
boxes (Tensor): Aligned box with shape [n, 6].
scores (Tensor): Scores of each box.
classes (Tensor): Class of each box.
thresh (float): Iou threshold for nms.
Returns:
Tensor: Indices of selected boxes.
"""
x1 = boxes[:, 0]
y1 = boxes[:, 1]
z1 = boxes[:, 2]
x2 = boxes[:, 3]
y2 = boxes[:, 4]
z2 = boxes[:, 5]
area = (x2 - x1) * (y2 - y1) * (z2 - z1)
zero = boxes.new_zeros(1, )
score_sorted = torch.argsort(scores)
pick = []
while (score_sorted.shape[0] != 0):
last = score_sorted.shape[0]
i = score_sorted[-1]
pick.append(i)
xx1 = torch.max(x1[i], x1[score_sorted[:last - 1]])
yy1 = torch.max(y1[i], y1[score_sorted[:last - 1]])
zz1 = torch.max(z1[i], z1[score_sorted[:last - 1]])
xx2 = torch.min(x2[i], x2[score_sorted[:last - 1]])
yy2 = torch.min(y2[i], y2[score_sorted[:last - 1]])
zz2 = torch.min(z2[i], z2[score_sorted[:last - 1]])
classes1 = classes[i]
classes2 = classes[score_sorted[:last - 1]]
inter_l = torch.max(zero, xx2 - xx1)
inter_w = torch.max(zero, yy2 - yy1)
inter_h = torch.max(zero, zz2 - zz1)
inter = inter_l * inter_w * inter_h
iou = inter / (area[i] + area[score_sorted[:last - 1]] - inter)
iou = iou * (classes1 == classes2).float()
score_sorted = score_sorted[torch.nonzero(iou <= thresh).flatten()]
indices = boxes.new_tensor(pick, dtype=torch.long)
return indices
...@@ -19,12 +19,14 @@ class Custom3DDataset(Dataset): ...@@ -19,12 +19,14 @@ class Custom3DDataset(Dataset):
pipeline=None, pipeline=None,
classes=None, classes=None,
modality=None, modality=None,
filter_empty_gt=True,
test_mode=False): test_mode=False):
super().__init__() super().__init__()
self.data_root = data_root self.data_root = data_root
self.ann_file = ann_file self.ann_file = ann_file
self.test_mode = test_mode self.test_mode = test_mode
self.modality = modality self.modality = modality
self.filter_empty_gt = filter_empty_gt
self.CLASSES = self.get_classes(classes) self.CLASSES = self.get_classes(classes)
self.data_infos = self.load_annotations(self.ann_file) self.data_infos = self.load_annotations(self.ann_file)
...@@ -52,7 +54,7 @@ class Custom3DDataset(Dataset): ...@@ -52,7 +54,7 @@ class Custom3DDataset(Dataset):
if not self.test_mode: if not self.test_mode:
annos = self.get_ann_info(index) annos = self.get_ann_info(index)
input_dict['ann_info'] = annos input_dict['ann_info'] = annos
if len(annos['gt_bboxes_3d']) == 0: if self.filter_empty_gt and len(annos['gt_bboxes_3d']) == 0:
return None return None
return input_dict return input_dict
...@@ -67,7 +69,8 @@ class Custom3DDataset(Dataset): ...@@ -67,7 +69,8 @@ class Custom3DDataset(Dataset):
return None return None
self.pre_pipeline(input_dict) self.pre_pipeline(input_dict)
example = self.pipeline(input_dict) example = self.pipeline(input_dict)
if example is None or len(example['gt_bboxes_3d']._data) == 0: if self.filter_empty_gt and (example is None or len(
example['gt_bboxes_3d']._data) == 0):
return None return None
return example return example
...@@ -124,10 +127,13 @@ class Custom3DDataset(Dataset): ...@@ -124,10 +127,13 @@ class Custom3DDataset(Dataset):
results (list[dict]): List of results. results (list[dict]): List of results.
metric (str | list[str]): Metrics to be evaluated. metric (str | list[str]): Metrics to be evaluated.
iou_thr (list[float]): AP IoU thresholds. iou_thr (list[float]): AP IoU thresholds.
""" """
from mmdet3d.core.evaluation import indoor_eval from mmdet3d.core.evaluation import indoor_eval
assert isinstance( assert isinstance(
results, list), f'Expect results to be list, got {type(results)}.' results, list), f'Expect results to be list, got {type(results)}.'
assert len(results) > 0, f'Expect length of results > 0.'
assert len(results) == len(self.data_infos)
assert isinstance( assert isinstance(
results[0], dict results[0], dict
), f'Expect elements in results to be dict, got {type(results[0])}.' ), f'Expect elements in results to be dict, got {type(results[0])}.'
......
...@@ -7,6 +7,7 @@ from .indoor_loading import (LoadAnnotations3D, LoadPointsFromFile, ...@@ -7,6 +7,7 @@ from .indoor_loading import (LoadAnnotations3D, LoadPointsFromFile,
NormalizePointsColor) NormalizePointsColor)
from .indoor_sample import IndoorPointSample from .indoor_sample import IndoorPointSample
from .loading import LoadMultiViewImageFromFiles from .loading import LoadMultiViewImageFromFiles
from .point_seg_class_mapping import PointSegClassMapping
from .train_aug import (GlobalRotScale, ObjectNoise, ObjectRangeFilter, from .train_aug import (GlobalRotScale, ObjectNoise, ObjectRangeFilter,
ObjectSample, PointShuffle, PointsRangeFilter, ObjectSample, PointShuffle, PointsRangeFilter,
RandomFlip3D) RandomFlip3D)
...@@ -18,5 +19,5 @@ __all__ = [ ...@@ -18,5 +19,5 @@ __all__ = [
'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler', 'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler',
'IndoorGlobalRotScale', 'IndoorPointsColorJitter', 'IndoorFlipData', 'IndoorGlobalRotScale', 'IndoorPointsColorJitter', 'IndoorFlipData',
'MMDataBaseSampler', 'NormalizePointsColor', 'LoadAnnotations3D', 'MMDataBaseSampler', 'NormalizePointsColor', 'LoadAnnotations3D',
'IndoorPointSample' 'IndoorPointSample', 'PointSegClassMapping'
] ]
...@@ -224,7 +224,7 @@ class IndoorGlobalRotScale(object): ...@@ -224,7 +224,7 @@ class IndoorGlobalRotScale(object):
results['scale_ratio'] = scale_ratio results['scale_ratio'] = scale_ratio
results['points'] = points results['points'] = points
results['gt_bboxes_3d'] = gt_bboxes_3d results['gt_bboxes_3d'] = gt_bboxes_3d.astype(np.float32)
return results return results
def __repr__(self): def __repr__(self):
......
...@@ -189,7 +189,8 @@ class LoadAnnotations3D(LoadAnnotations): ...@@ -189,7 +189,8 @@ class LoadAnnotations3D(LoadAnnotations):
self.file_client = mmcv.FileClient(**self.file_client_args) self.file_client = mmcv.FileClient(**self.file_client_args)
try: try:
mask_bytes = self.file_client.get(pts_semantic_mask_path) mask_bytes = self.file_client.get(pts_semantic_mask_path)
pts_semantic_mask = np.frombuffer(mask_bytes, dtype=np.int) # add .copy() to fix read-only bug
pts_semantic_mask = np.frombuffer(mask_bytes, dtype=np.int).copy()
except ConnectionError: except ConnectionError:
mmcv.check_file_exist(pts_semantic_mask_path) mmcv.check_file_exist(pts_semantic_mask_path)
pts_semantic_mask = np.fromfile( pts_semantic_mask = np.fromfile(
......
from mmdet.datasets.builder import PIPELINES
@PIPELINES.register_module()
class PointSegClassMapping(object):
"""Map original semantic class to valid category ids.
Map valid classes as 0~len(valid_cat_ids)-1 and
others as len(valid_cat_ids).
Args:
valid_cat_ids (tuple[int): A tuple of valid category.
"""
def __init__(self, valid_cat_ids):
self.valid_cat_ids = valid_cat_ids
def __call__(self, results):
assert 'pts_semantic_mask' in results
pts_semantic_mask = results['pts_semantic_mask']
neg_cls = len(self.valid_cat_ids)
for i in range(pts_semantic_mask.shape[0]):
if pts_semantic_mask[i] in self.valid_cat_ids:
converted_id = self.valid_cat_ids.index(pts_semantic_mask[i])
pts_semantic_mask[i] = converted_id
else:
pts_semantic_mask[i] = neg_cls
results['pts_semantic_mask'] = pts_semantic_mask
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(valid_cat_ids={})'.format(self.valid_cat_ids)
return repr_str
...@@ -20,9 +20,10 @@ class ScanNetDataset(Custom3DDataset): ...@@ -20,9 +20,10 @@ class ScanNetDataset(Custom3DDataset):
pipeline=None, pipeline=None,
classes=None, classes=None,
modality=None, modality=None,
filter_empty_gt=True,
test_mode=False): test_mode=False):
super().__init__(data_root, ann_file, pipeline, classes, modality, super().__init__(data_root, ann_file, pipeline, classes, modality,
test_mode) filter_empty_gt, test_mode)
def get_ann_info(self, index): def get_ann_info(self, index):
# Use index to get the annos, thus the evalhook could also use this api # Use index to get the annos, thus the evalhook could also use this api
......
...@@ -16,9 +16,10 @@ class SUNRGBDDataset(Custom3DDataset): ...@@ -16,9 +16,10 @@ class SUNRGBDDataset(Custom3DDataset):
pipeline=None, pipeline=None,
classes=None, classes=None,
modality=None, modality=None,
filter_empty_gt=True,
test_mode=False): test_mode=False):
super().__init__(data_root, ann_file, pipeline, classes, modality, super().__init__(data_root, ann_file, pipeline, classes, modality,
test_mode) filter_empty_gt, test_mode)
def get_ann_info(self, index): def get_ann_info(self, index):
# Use index to get the annos, thus the evalhook could also use this api # Use index to get the annos, thus the evalhook could also use this api
......
...@@ -8,6 +8,7 @@ from .detectors import * # noqa: F401,F403 ...@@ -8,6 +8,7 @@ from .detectors import * # noqa: F401,F403
from .fusion_layers import * # noqa: F401,F403 from .fusion_layers import * # noqa: F401,F403
from .losses import * # noqa: F401,F403 from .losses import * # noqa: F401,F403
from .middle_encoders import * # noqa: F401,F403 from .middle_encoders import * # noqa: F401,F403
from .model_utils import * # noqa: F401,F403
from .necks import * # noqa: F401,F403 from .necks import * # noqa: F401,F403
from .registry import FUSION_LAYERS, MIDDLE_ENCODERS, VOXEL_ENCODERS from .registry import FUSION_LAYERS, MIDDLE_ENCODERS, VOXEL_ENCODERS
from .roi_heads import * # noqa: F401,F403 from .roi_heads import * # noqa: F401,F403
......
from .anchor3d_head import Anchor3DHead from .anchor3d_head import Anchor3DHead
from .parta2_rpn_head import PartA2RPNHead from .parta2_rpn_head import PartA2RPNHead
from .vote_head import VoteHead
__all__ = ['Anchor3DHead', 'PartA2RPNHead'] __all__ = ['Anchor3DHead', 'PartA2RPNHead', 'VoteHead']
This diff is collapsed.
...@@ -4,10 +4,11 @@ from .mvx_faster_rcnn import (DynamicMVXFasterRCNN, DynamicMVXFasterRCNNV2, ...@@ -4,10 +4,11 @@ from .mvx_faster_rcnn import (DynamicMVXFasterRCNN, DynamicMVXFasterRCNNV2,
from .mvx_single_stage import MVXSingleStageDetector from .mvx_single_stage import MVXSingleStageDetector
from .mvx_two_stage import MVXTwoStageDetector from .mvx_two_stage import MVXTwoStageDetector
from .parta2 import PartA2 from .parta2 import PartA2
from .votenet import VoteNet
from .voxelnet import DynamicVoxelNet, VoxelNet from .voxelnet import DynamicVoxelNet, VoxelNet
__all__ = [ __all__ = [
'BaseDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXSingleStageDetector', 'BaseDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXSingleStageDetector',
'MVXTwoStageDetector', 'DynamicMVXFasterRCNN', 'DynamicMVXFasterRCNNV2', 'MVXTwoStageDetector', 'DynamicMVXFasterRCNN', 'DynamicMVXFasterRCNNV2',
'DynamicMVXFasterRCNNV3', 'PartA2' 'DynamicMVXFasterRCNNV3', 'PartA2', 'VoteNet'
] ]
import torch
from mmdet3d.core import bbox3d2result
from mmdet.models import DETECTORS, SingleStageDetector
@DETECTORS.register_module()
class VoteNet(SingleStageDetector):
"""VoteNet model.
https://arxiv.org/pdf/1904.09664.pdf
"""
def __init__(self,
backbone,
bbox_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None):
super(VoteNet, self).__init__(
backbone=backbone,
bbox_head=bbox_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
pretrained=pretrained)
def extract_feat(self, points):
x = self.backbone(points)
if self.with_neck:
x = self.neck(x)
return x
def forward_train(self,
points,
img_meta,
gt_bboxes_3d,
gt_labels_3d,
pts_semantic_mask=None,
pts_instance_mask=None,
gt_bboxes_ignore=None):
"""Forward of training.
Args:
points (list[Tensor]): Points of each batch.
img_meta (list): Image metas.
gt_bboxes_3d (list[Tensor]): gt bboxes of each batch.
gt_labels_3d (list[Tensor]): gt class labels of each batch.
pts_semantic_mask (None | list[Tensor]): point-wise semantic
label of each batch.
pts_instance_mask (None | list[Tensor]): point-wise instance
label of each batch.
gt_bboxes_ignore (None | list[Tensor]): Specify which bounding.
Returns:
dict: Losses.
"""
points_cat = torch.stack(points) # tmp
x = self.extract_feat(points_cat)
bbox_preds = self.bbox_head(x, self.train_cfg.sample_mod)
loss_inputs = (points, gt_bboxes_3d, gt_labels_3d, pts_semantic_mask,
pts_instance_mask, img_meta)
losses = self.bbox_head.loss(
bbox_preds, *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
return losses
def forward_test(self, **kwargs):
return self.simple_test(**kwargs)
def forward(self, return_loss=True, **kwargs):
if return_loss:
return self.forward_train(**kwargs)
else:
return self.forward_test(**kwargs)
def simple_test(self,
points,
img_meta,
gt_bboxes_3d=None,
gt_labels_3d=None,
pts_semantic_mask=None,
pts_instance_mask=None,
rescale=False):
"""Forward of testing.
Args:
points (list[Tensor]): Points of each sample.
img_meta (list): Image metas.
gt_bboxes_3d (list[Tensor]): gt bboxes of each sample.
gt_labels_3d (list[Tensor]): gt class labels of each sample.
pts_semantic_mask (None | list[Tensor]): point-wise semantic
label of each sample.
pts_instance_mask (None | list[Tensor]): point-wise instance
label of each sample.
rescale (bool): Whether to rescale results.
Returns:
list: Predicted 3d boxes.
"""
points_cat = torch.stack(points) # tmp
x = self.extract_feat(points_cat)
bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod)
bbox_list = self.bbox_head.get_bboxes(
points_cat, bbox_preds, img_meta, rescale=rescale)
bbox_results = [
bbox3d2result(bboxes, scores, labels)
for bboxes, scores, labels in bbox_list
]
return bbox_results[0]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment