Commit 0be27ffb authored by Jingwei Zhang's avatar Jingwei Zhang Committed by ZwwWayne
Browse files

[Refactor] Refactor FCAF3D (#1945)

* add fcaf3d config

* support inference of fcaf3d on scannet-3d

* minor changes

* fix config of scannet

* align test precision of facaf3d && support training

* add ut

* fix bugs of dataset conversion and indoor_metric

* support fcaf3d on sunrgbd datasets and add rorate_iou_loss ut

* small fix

* add docstring and typehint

* add typehint for SparseTensor

* remove pdb

* fix fcaf3d ut
parent 1e0e50d5
...@@ -8,7 +8,7 @@ metainfo = dict( ...@@ -8,7 +8,7 @@ metainfo = dict(
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin')) 'garbagebin'))
file_client_args = dict(backend='disk') # file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients. # Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details. # for more details.
......
model = dict(
type='MinkSingleStage3DDetector',
data_preprocessor=dict(type='Det3DDataPreprocessor'),
backbone=dict(type='MinkResNet', in_channels=3, depth=34),
bbox_head=dict(
type='FCAF3DHead',
in_channels=(64, 128, 256, 512),
out_channels=128,
voxel_size=.01,
pts_prune_threshold=100000,
pts_assign_threshold=27,
pts_center_threshold=18,
num_classes=18,
num_reg_outs=6,
center_loss=dict(type='mmdet.CrossEntropyLoss', use_sigmoid=True),
bbox_loss=dict(type='AxisAlignedIoULoss'),
cls_loss=dict(type='mmdet.FocalLoss'),
),
train_cfg=dict(),
test_cfg=dict(nms_pre=1000, iou_thr=.5, score_thr=.01))
_base_ = [
'../_base_/models/fcaf3d.py', '../_base_/default_runtime.py',
'../_base_/datasets/scannet-3d.py'
]
n_points = 100000
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='LoadAnnotations3D'),
dict(type='GlobalAlignment', rotation_axis=2),
dict(type='PointSample', num_points=n_points),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266],
scale_ratio_range=[.9, 1.1],
translation_std=[.1, .1, .1],
shift_height=False),
dict(type='NormalizePointsColor', color_mean=None),
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='GlobalAlignment', rotation_axis=2),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=n_points),
dict(type='NormalizePointsColor', color_mean=None),
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
train_dataloader = dict(
dataset=dict(
type='RepeatDataset',
times=10,
dataset=dict(pipeline=train_pipeline, filter_empty_gt=True)))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=0.001, weight_decay=0.0001),
clip_grad=dict(max_norm=10, norm_type=2))
# learning rate
param_scheduler = dict(
type='MultiStepLR',
begin=0,
end=12,
by_epoch=True,
milestones=[8, 11],
gamma=0.1)
custom_hooks = [dict(type='EmptyCacheHook', after_iter=True)]
# training schedule for 1x
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=12)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
_base_ = [
'../_base_/models/fcaf3d.py', '../_base_/default_runtime.py',
'../_base_/datasets/sunrgbd-3d.py'
]
n_points = 100000
model = dict(
bbox_head=dict(
num_classes=10,
num_reg_outs=8,
bbox_loss=dict(type='RotatedIoU3DLoss')))
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='LoadAnnotations3D'),
dict(type='PointSample', num_points=n_points),
dict(type='RandomFlip3D', sync_2d=False, flip_ratio_bev_horizontal=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.523599, 0.523599],
scale_ratio_range=[0.85, 1.15],
translation_std=[.1, .1, .1],
shift_height=False),
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=n_points)
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
train_dataloader = dict(
batch_size=8,
dataset=dict(
type='RepeatDataset',
times=3,
dataset=dict(pipeline=train_pipeline, filter_empty_gt=True)))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=0.001, weight_decay=0.0001),
clip_grad=dict(max_norm=10, norm_type=2))
# learning rate
param_scheduler = dict(
type='MultiStepLR',
begin=0,
end=12,
by_epoch=True,
milestones=[8, 11],
gamma=0.1)
custom_hooks = [dict(type='EmptyCacheHook', after_iter=True)]
# training schedule for 1x
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=12)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
...@@ -78,8 +78,9 @@ class IndoorMetric(BaseMetric): ...@@ -78,8 +78,9 @@ class IndoorMetric(BaseMetric):
ann_infos.append(eval_ann) ann_infos.append(eval_ann)
pred_results.append(sinlge_pred_results) pred_results.append(sinlge_pred_results)
# some checkpoints may not record the key "box_type_3d"
box_type_3d, box_mode_3d = get_box_type( box_type_3d, box_mode_3d = get_box_type(
self.dataset_meta['box_type_3d']) self.dataset_meta.get('box_type_3d', 'depth'))
ret_dict = indoor_eval( ret_dict = indoor_eval(
ann_infos, ann_infos,
......
...@@ -58,7 +58,7 @@ class MinkResNet(nn.Module): ...@@ -58,7 +58,7 @@ class MinkResNet(nn.Module):
for i, num_blocks in enumerate(stage_blocks): for i, num_blocks in enumerate(stage_blocks):
setattr( setattr(
self, f'layer{i}', self, f'layer{i + 1}',
self._make_layer(block, 64 * 2**i, stage_blocks[i], stride=2)) self._make_layer(block, 64 * 2**i, stage_blocks[i], stride=2))
def init_weights(self): def init_weights(self):
...@@ -111,6 +111,6 @@ class MinkResNet(nn.Module): ...@@ -111,6 +111,6 @@ class MinkResNet(nn.Module):
x = self.maxpool(x) x = self.maxpool(x)
outs = [] outs = []
for i in range(self.num_stages): for i in range(self.num_stages):
x = getattr(self, f'layer{i}')(x) x = getattr(self, f'layer{i + 1}')(x)
outs.append(x) outs.append(x)
return outs return outs
...@@ -5,6 +5,7 @@ from .base_3d_dense_head import Base3DDenseHead ...@@ -5,6 +5,7 @@ from .base_3d_dense_head import Base3DDenseHead
from .base_conv_bbox_head import BaseConvBboxHead from .base_conv_bbox_head import BaseConvBboxHead
from .base_mono3d_dense_head import BaseMono3DDenseHead from .base_mono3d_dense_head import BaseMono3DDenseHead
from .centerpoint_head import CenterHead from .centerpoint_head import CenterHead
from .fcaf3d_head import FCAF3DHead
from .fcos_mono3d_head import FCOSMono3DHead from .fcos_mono3d_head import FCOSMono3DHead
from .free_anchor3d_head import FreeAnchor3DHead from .free_anchor3d_head import FreeAnchor3DHead
from .groupfree3d_head import GroupFree3DHead from .groupfree3d_head import GroupFree3DHead
...@@ -22,5 +23,5 @@ __all__ = [ ...@@ -22,5 +23,5 @@ __all__ = [
'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead', 'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead',
'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead', 'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead',
'GroupFree3DHead', 'PointRPNHead', 'SMOKEMono3DHead', 'PGDHead', 'GroupFree3DHead', 'PointRPNHead', 'SMOKEMono3DHead', 'PGDHead',
'MonoFlexHead', 'Base3DDenseHead' 'MonoFlexHead', 'Base3DDenseHead', 'FCAF3DHead'
] ]
This diff is collapsed.
...@@ -8,6 +8,7 @@ from .groupfree3dnet import GroupFree3DNet ...@@ -8,6 +8,7 @@ from .groupfree3dnet import GroupFree3DNet
from .h3dnet import H3DNet from .h3dnet import H3DNet
from .imvotenet import ImVoteNet from .imvotenet import ImVoteNet
from .imvoxelnet import ImVoxelNet from .imvoxelnet import ImVoxelNet
from .mink_single_stage import MinkSingleStage3DDetector
from .multiview_dfm import MultiViewDfM from .multiview_dfm import MultiViewDfM
from .mvx_faster_rcnn import DynamicMVXFasterRCNN, MVXFasterRCNN from .mvx_faster_rcnn import DynamicMVXFasterRCNN, MVXFasterRCNN
from .mvx_two_stage import MVXTwoStageDetector from .mvx_two_stage import MVXTwoStageDetector
...@@ -21,25 +22,9 @@ from .votenet import VoteNet ...@@ -21,25 +22,9 @@ from .votenet import VoteNet
from .voxelnet import VoxelNet from .voxelnet import VoxelNet
__all__ = [ __all__ = [
'Base3DDetector', 'Base3DDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXTwoStageDetector',
'DfM', 'DynamicMVXFasterRCNN', 'MVXFasterRCNN', 'PartA2', 'VoteNet', 'H3DNet',
'VoxelNet', 'CenterPoint', 'SSD3DNet', 'ImVoteNet', 'SingleStageMono3DDetector',
'DynamicVoxelNet', 'FCOSMono3D', 'ImVoxelNet', 'GroupFree3DNet', 'PointRCNN', 'SMOKEMono3D',
'MVXTwoStageDetector', 'SASSD', 'MinkSingleStage3DDetector', 'MultiViewDfM', 'DfM'
'DynamicMVXFasterRCNN',
'MVXFasterRCNN',
'MultiViewDfM',
'PartA2',
'VoteNet',
'H3DNet',
'CenterPoint',
'SSD3DNet',
'ImVoteNet',
'SingleStageMono3DDetector',
'FCOSMono3D',
'ImVoxelNet',
'GroupFree3DNet',
'PointRCNN',
'SMOKEMono3D',
'SASSD',
] ]
# Copyright (c) OpenMMLab. All rights reserved.
# Adapted from https://github.com/SamsungLabs/fcaf3d/blob/master/mmdet3d/models/detectors/single_stage_sparse.py # noqa
from typing import Dict, Tuple, Union
import torch
from torch import Tensor
try:
import MinkowskiEngine as ME
except ImportError:
# Please follow getting_started.md to install MinkowskiEngine.
pass
from mmdet3d.registry import MODELS
from mmdet3d.utils import ConfigType, OptConfigType, OptMultiConfig
from .single_stage import SingleStage3DDetector
@MODELS.register_module()
class MinkSingleStage3DDetector(SingleStage3DDetector):
r"""MinkSingleStage3DDetector.
This class serves as a base class for single-stage 3D detectors based on
MinkowskiEngine `GSDN <https://arxiv.org/abs/2006.12356>`_.
Args:
backbone (dict): Config dict of detector's backbone.
neck (dict, optional): Config dict of neck. Defaults to None.
bbox_head (dict, optional): Config dict of box head. Defaults to None.
train_cfg (dict, optional): Config dict of training hyper-parameters.
Defaults to None.
test_cfg (dict, optional): Config dict of test hyper-parameters.
Defaults to None.
data_preprocessor (dict or ConfigDict, optional): The pre-process
config of :class:`BaseDataPreprocessor`. it usually includes,
``pad_size_divisor``, ``pad_value``, ``mean`` and ``std``.
init_cfg (dict or ConfigDict, optional): the config to control the
initialization. Defaults to None.
"""
def __init__(self,
backbone: ConfigType,
neck: OptConfigType = None,
bbox_head: OptConfigType = None,
train_cfg: OptConfigType = None,
test_cfg: OptConfigType = None,
data_preprocessor: OptConfigType = None,
init_cfg: OptMultiConfig = None) -> None:
super().__init__(
backbone=backbone,
neck=neck,
bbox_head=bbox_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
data_preprocessor=data_preprocessor,
init_cfg=init_cfg)
self.voxel_size = bbox_head['voxel_size']
# # TODO: unify the keys
# self.head = self.bbox_head
def extract_feat(
self, batch_inputs_dict: Dict[str, Tensor]
) -> Union[Tuple[torch.Tensor], Dict[str, Tensor]]:
"""Directly extract features from the backbone+neck.
Args:
batch_inputs_dict (dict): The model input dict which includes
'points' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
Returns:
tuple[Tensor] | dict: For outside 3D object detection, we
typically obtain a tuple of features from the backbone + neck,
and for inside 3D object detection, usually a dict containing
features will be obtained.
"""
points = batch_inputs_dict['points']
coordinates, features = ME.utils.batch_sparse_collate(
[(p[:, :3] / self.voxel_size, p[:, 3:]) for p in points],
device=points[0].device)
x = ME.SparseTensor(coordinates=coordinates, features=features)
x = self.backbone(x)
if self.with_neck:
x = self.neck(x)
return x
...@@ -143,7 +143,11 @@ class SingleStage3DDetector(Base3DDetector): ...@@ -143,7 +143,11 @@ class SingleStage3DDetector(Base3DDetector):
"""Directly extract features from the backbone+neck. """Directly extract features from the backbone+neck.
Args: Args:
points (torch.Tensor): Input points. batch_inputs_dict (dict): The model input dict which include
'points', 'img' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
Returns: Returns:
tuple[Tensor] | dict: For outside 3D object detection, we tuple[Tensor] | dict: For outside 3D object detection, we
......
...@@ -4,11 +4,12 @@ from .axis_aligned_iou_loss import AxisAlignedIoULoss, axis_aligned_iou_loss ...@@ -4,11 +4,12 @@ from .axis_aligned_iou_loss import AxisAlignedIoULoss, axis_aligned_iou_loss
from .chamfer_distance import ChamferDistance, chamfer_distance from .chamfer_distance import ChamferDistance, chamfer_distance
from .multibin_loss import MultiBinLoss from .multibin_loss import MultiBinLoss
from .paconv_regularization_loss import PAConvRegularizationLoss from .paconv_regularization_loss import PAConvRegularizationLoss
from .rotated_iou_loss import RotatedIoU3DLoss, rotated_iou_3d_loss
from .uncertain_smooth_l1_loss import UncertainL1Loss, UncertainSmoothL1Loss from .uncertain_smooth_l1_loss import UncertainL1Loss, UncertainSmoothL1Loss
__all__ = [ __all__ = [
'FocalLoss', 'SmoothL1Loss', 'binary_cross_entropy', 'ChamferDistance', 'FocalLoss', 'SmoothL1Loss', 'binary_cross_entropy', 'ChamferDistance',
'chamfer_distance', 'axis_aligned_iou_loss', 'AxisAlignedIoULoss', 'chamfer_distance', 'axis_aligned_iou_loss', 'AxisAlignedIoULoss',
'PAConvRegularizationLoss', 'UncertainL1Loss', 'UncertainSmoothL1Loss', 'PAConvRegularizationLoss', 'UncertainL1Loss', 'UncertainSmoothL1Loss',
'MultiBinLoss' 'MultiBinLoss', 'RotatedIoU3DLoss', 'rotated_iou_3d_loss'
] ]
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional
import torch
from mmcv.ops import diff_iou_rotated_3d
from torch import Tensor
from torch import nn as nn
from mmdet3d.registry import MODELS
from mmdet.models.losses.utils import weighted_loss
@weighted_loss
def rotated_iou_3d_loss(pred, target: Tensor) -> Tensor:
"""Calculate the IoU loss (1-IoU) of two sets of rotated bounding boxes.
Note that predictions and targets are one-to-one corresponded.
Args:
pred (torch.Tensor): Bbox predictions with shape [N, 7]
(x, y, z, w, l, h, alpha).
target (torch.Tensor): Bbox targets (gt) with shape [N, 7]
(x, y, z, w, l, h, alpha).
Returns:
torch.Tensor: IoU loss between predictions and targets.
"""
iou_loss = 1 - diff_iou_rotated_3d(pred.unsqueeze(0),
target.unsqueeze(0))[0]
return iou_loss
@MODELS.register_module()
class RotatedIoU3DLoss(nn.Module):
"""Calculate the IoU loss (1-IoU) of rotated bounding boxes.
Args:
reduction (str): Method to reduce losses.
The valid reduction method are none, sum or mean.
loss_weight (float, optional): Weight of loss. Defaults to 1.0.
"""
def __init__(self,
reduction: str = 'mean',
loss_weight: Optional[float] = 1.0):
super().__init__()
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred: Tensor,
target: Tensor,
weight: Optional[Tensor] = None,
avg_factor: Optional[int] = None,
reduction_override: Optional[str] = None,
**kwargs) -> Tensor:
"""Forward function of loss calculation.
Args:
pred (torch.Tensor): Bbox predictions with shape [..., 7]
(x, y, z, w, l, h, alpha).
target (torch.Tensor): Bbox targets (gt) with shape [..., 7]
(x, y, z, w, l, h, alpha).
weight (torch.Tensor | float, optional): Weight of loss.
Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): Method to reduce losses.
The valid reduction method are 'none', 'sum' or 'mean'.
Defaults to None.
Returns:
torch.Tensor: IoU loss between predictions and targets.
"""
if weight is not None and not torch.any(weight > 0):
return pred.sum() * weight.sum() # 0
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
if weight is not None and weight.dim() > 1:
weight = weight.mean(-1)
loss = self.loss_weight * rotated_iou_3d_loss(
pred,
target,
weight,
reduction=reduction,
avg_factor=avg_factor,
**kwargs)
return loss
...@@ -9,10 +9,19 @@ from matplotlib.collections import PatchCollection ...@@ -9,10 +9,19 @@ from matplotlib.collections import PatchCollection
from matplotlib.patches import PathPatch from matplotlib.patches import PathPatch
from matplotlib.path import Path from matplotlib.path import Path
from mmengine.dist import master_only from mmengine.dist import master_only
from mmengine.structures import InstanceData
from mmengine.visualization.utils import check_type, tensor2ndarray
from torch import Tensor from torch import Tensor
from mmdet3d.registry import VISUALIZERS
from mmdet3d.structures import (BaseInstance3DBoxes, CameraInstance3DBoxes,
Coord3DMode, DepthInstance3DBoxes,
Det3DDataSample, LiDARInstance3DBoxes,
PointData, points_cam2img)
from mmdet3d.structures.bbox_3d.box_3d_mode import Box3DMode from mmdet3d.structures.bbox_3d.box_3d_mode import Box3DMode
from mmdet.visualization import DetLocalVisualizer from mmdet.visualization import DetLocalVisualizer
from .vis_utils import (proj_camera_bbox3d_to_img, proj_depth_bbox3d_to_img,
proj_lidar_bbox3d_to_img, to_depth_mode)
try: try:
import open3d as o3d import open3d as o3d
...@@ -21,17 +30,6 @@ except ImportError: ...@@ -21,17 +30,6 @@ except ImportError:
raise ImportError( raise ImportError(
'Please run "pip install open3d" to install open3d first.') 'Please run "pip install open3d" to install open3d first.')
from mmengine.structures import InstanceData
from mmengine.visualization.utils import check_type, tensor2ndarray
from mmdet3d.registry import VISUALIZERS
from mmdet3d.structures import (BaseInstance3DBoxes, CameraInstance3DBoxes,
Coord3DMode, DepthInstance3DBoxes,
Det3DDataSample, LiDARInstance3DBoxes,
PointData, points_cam2img)
from .vis_utils import (proj_camera_bbox3d_to_img, proj_depth_bbox3d_to_img,
proj_lidar_bbox3d_to_img, to_depth_mode)
@VISUALIZERS.register_module() @VISUALIZERS.register_module()
class Det3DLocalVisualizer(DetLocalVisualizer): class Det3DLocalVisualizer(DetLocalVisualizer):
......
# Copyright (c) OpenMMLab. All rights reserved.
from unittest import TestCase
import pytest
import torch
from mmdet3d import * # noqa
from mmdet3d.models.dense_heads import FCAF3DHead
from tests.utils.model_utils import _create_detector_inputs
class TestAnchor3DHead(TestCase):
def test_fcaf3d_head_loss(self):
"""Test anchor head loss when truth is empty and non-empty."""
if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda')
try:
import MinkowskiEngine as ME
except ImportError:
pytest.skip('test requires MinkowskiEngine installation')
# build head
fcaf3d_head = FCAF3DHead(
in_channels=(64, 128, 256, 512),
out_channels=128,
voxel_size=1.,
pts_prune_threshold=1000,
pts_assign_threshold=27,
pts_center_threshold=18,
num_classes=18,
num_reg_outs=6,
test_cfg=dict(nms_pre=1000, iou_thr=.5, score_thr=.01),
center_loss=dict(type='mmdet.CrossEntropyLoss', use_sigmoid=True),
bbox_loss=dict(type='AxisAlignedIoULoss'),
cls_loss=dict(type='mmdet.FocalLoss'),
)
fcaf3d_head = fcaf3d_head.cuda()
# fake input of head
coordinates, features = [torch.randn(500, 3).cuda() * 100
], [torch.randn(500, 3).cuda()]
tensor_coordinates, tensor_features = ME.utils.sparse_collate(
coordinates, features)
x = ME.SparseTensor(
features=tensor_features, coordinates=tensor_coordinates)
# backbone
conv1 = ME.MinkowskiConvolution(
3, 64, kernel_size=3, stride=2, dimension=3).cuda()
conv2 = ME.MinkowskiConvolution(
64, 128, kernel_size=3, stride=2, dimension=3).cuda()
conv3 = ME.MinkowskiConvolution(
128, 256, kernel_size=3, stride=2, dimension=3).cuda()
conv4 = ME.MinkowskiConvolution(
256, 512, kernel_size=3, stride=2, dimension=3).cuda()
# backbone outputs of 4 levels
x1 = conv1(x)
x2 = conv2(x1)
x3 = conv3(x2)
x4 = conv4(x3)
x = (x1, x2, x3, x4)
# fake annotation
packed_inputs = _create_detector_inputs(
with_points=False,
with_img=False,
num_gt_instance=3,
num_classes=1,
points_feat_dim=6,
gt_bboxes_dim=6)
data_samples = [
sample.cuda() for sample in packed_inputs['data_samples']
]
gt_losses = fcaf3d_head.loss(x, data_samples)
print(gt_losses)
self.assertGreaterEqual(gt_losses['cls_loss'], 0,
'cls loss should be non-zero')
self.assertGreaterEqual(gt_losses['bbox_loss'], 0,
'box loss should be non-zero')
self.assertGreaterEqual(gt_losses['center_loss'], 0,
'dir loss should be none-zero')
import unittest
import torch
from mmengine import DefaultScope
from mmdet3d.registry import MODELS
from tests.utils.model_utils import (_create_detector_inputs,
_get_detector_cfg, _setup_seed)
class TestFCAF3d(unittest.TestCase):
def test_fcaf3d(self):
try:
import MinkowskiEngine # noqa: F401
except ImportError:
return
import mmdet3d.models
assert hasattr(mmdet3d.models, 'MinkSingleStage3DDetector')
DefaultScope.get_instance('test_fcaf3d', scope_name='mmdet3d')
_setup_seed(0)
fcaf3d_net_cfg = _get_detector_cfg(
'fcaf3d/fcaf3d_8xb2_scannet-3d-18class.py')
model = MODELS.build(fcaf3d_net_cfg)
num_gt_instance = 3
packed_inputs = _create_detector_inputs(
num_gt_instance=num_gt_instance,
num_classes=1,
points_feat_dim=6,
gt_bboxes_dim=6)
if torch.cuda.is_available():
model = model.cuda()
with torch.no_grad():
data = model.data_preprocessor(packed_inputs, False)
torch.cuda.empty_cache()
results = model.forward(**data, mode='predict')
self.assertEqual(len(results), 1)
self.assertIn('bboxes_3d', results[0].pred_instances_3d)
self.assertIn('scores_3d', results[0].pred_instances_3d)
self.assertIn('labels_3d', results[0].pred_instances_3d)
losses = model.forward(**data, mode='loss')
self.assertGreater(losses['center_loss'], 0)
self.assertGreater(losses['bbox_loss'], 0)
self.assertGreater(losses['cls_loss'], 0)
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import torch
from mmdet3d.models.losses import RotatedIoU3DLoss
def test_rotated_iou_3d_loss():
if not torch.cuda.is_available():
return
boxes1 = torch.tensor([[.5, .5, .5, 1., 1., 1., .0],
[.5, .5, .5, 1., 1., 1., .0],
[.5, .5, .5, 1., 1., 1., .0],
[.5, .5, .5, 1., 1., 1., .0],
[.5, .5, .5, 1., 1., 1., .0]]).cuda()
boxes2 = torch.tensor([[.5, .5, .5, 1., 1., 1., .0],
[.5, .5, .5, 1., 1., 2., np.pi / 2],
[.5, .5, .5, 1., 1., 1., np.pi / 4],
[1., 1., 1., 1., 1., 1., .0],
[-1.5, -1.5, -1.5, 2.5, 2.5, 2.5, .0]]).cuda()
expect_ious = 1 - torch.tensor([[1., .5, .7071, 1 / 15, .0]]).cuda()
ious = RotatedIoU3DLoss(reduction='none')(boxes1, boxes2)
assert torch.allclose(ious, expect_ious, atol=1e-4)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment