"vscode:/vscode.git/clone" did not exist on "9bdaefc8a685389c26b9775e219a785ba5d7e1cc"
Commit 7f9874aa authored by Xiangxu-0103's avatar Xiangxu-0103 Committed by ZwwWayne
Browse files

[Feature] Support FCAF3D on S3DIS dataset in `dev-1.x` branch (#1984)



* support fcaf3d for s3dis dataset

* Update convert_utils.py

* Update seg3d_dataset.py

* Delete compose.py

* fix import error

* use `mmengine.Compose`

* Update s3dis-3d.py

* Update fcaf3d_2xb8_s3dis-3d-5class.py

* Update s3dis_dataset.py

* update unittest for s3dis

* update docs

* use `mmcv.Compose` instead of `mmengine.Compose`

* update docstring

* fix s3dis preprocessing bug

* Add typehint

* Update config and fix s3dis dataset

* update typehit

* Update convert_utils.py

* Update README and metafile
Co-authored-by: default avatarTai-Wang <tab_wang@outlook.com>

fix __all__ keyword
parent e9149550
...@@ -10,20 +10,20 @@ except ImportError: ...@@ -10,20 +10,20 @@ except ImportError:
import torch.nn as nn import torch.nn as nn
from mmdet3d.models.builder import BACKBONES from mmdet3d.registry import MODELS
@BACKBONES.register_module() @MODELS.register_module()
class MinkResNet(nn.Module): class MinkResNet(nn.Module):
r"""Minkowski ResNet backbone. See `4D Spatio-Temporal ConvNets r"""Minkowski ResNet backbone. See `4D Spatio-Temporal ConvNets
<https://arxiv.org/abs/1904.08755>`_ for more details. <https://arxiv.org/abs/1904.08755>`_ for more details.
Args: Args:
depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
in_channels (ont): Number of input channels, 3 for RGB. in_channels (int): Number of input channels, 3 for RGB.
num_stages (int, optional): Resnet stages. Default: 4. num_stages (int): Resnet stages. Defaults to 4.
pool (bool, optional): Add max pooling after first conv if True. pool (bool): Whether to add max pooling after first conv.
Default: True. Defaults to True.
""" """
arch_settings = { arch_settings = {
18: (BasicBlock, (2, 2, 2, 2)), 18: (BasicBlock, (2, 2, 2, 2)),
......
...@@ -18,21 +18,23 @@ from mmengine.model import bias_init_with_prob ...@@ -18,21 +18,23 @@ from mmengine.model import bias_init_with_prob
from mmengine.structures import InstanceData from mmengine.structures import InstanceData
from torch import Tensor, nn from torch import Tensor, nn
from mmdet3d.models import HEADS, build_loss from mmdet3d.registry import MODELS
from mmdet3d.structures import BaseInstance3DBoxes, rotation_3d_in_axis from mmdet3d.structures import BaseInstance3DBoxes, rotation_3d_in_axis
from mmdet3d.utils import InstanceList, OptInstanceList from mmdet3d.utils import InstanceList, OptInstanceList
from .base_3d_dense_head import Base3DDenseHead from .base_3d_dense_head import Base3DDenseHead
@HEADS.register_module() @MODELS.register_module()
class FCAF3DHead(Base3DDenseHead): class FCAF3DHead(Base3DDenseHead):
r"""Bbox head of `FCAF3D <https://arxiv.org/abs/2112.00322>`_. r"""Bbox head of `FCAF3D <https://arxiv.org/abs/2112.00322>`_.
Actually here we store both the sparse 3D FPN and a head. The neck and Actually here we store both the sparse 3D FPN and a head. The neck and
the head can not be simply separated as pruning score on the i-th level the head can not be simply separated as pruning score on the i-th level
of FPN requires classification scores from i+1-th level of the head. of FPN requires classification scores from i+1-th level of the head.
Args: Args:
num_classes (int): Number of classes. num_classes (int): Number of classes.
in_channels (tuple[int]): Number of channels in input tensors. in_channels (int): Number of channels in input tensors.
out_channels (int): Number of channels in the neck output tensors. out_channels (int): Number of channels in the neck output tensors.
num_reg_outs (int): Number of regression layer channels. num_reg_outs (int): Number of regression layer channels.
voxel_size (float): Voxel size in meters. voxel_size (float): Voxel size in meters.
...@@ -43,9 +45,12 @@ class FCAF3DHead(Base3DDenseHead): ...@@ -43,9 +45,12 @@ class FCAF3DHead(Base3DDenseHead):
pts_center_threshold (int): Box to location assigner parameter. pts_center_threshold (int): Box to location assigner parameter.
After feature level for the box is determined, assigner selects After feature level for the box is determined, assigner selects
pts_center_threshold locations closest to the box center. pts_center_threshold locations closest to the box center.
center_loss (dict, optional): Config of centerness loss. center_loss (dict): Config of centerness loss. Defaults to
bbox_loss (dict, optional): Config of bbox loss. dict(type='mmdet.CrossEntropyLoss', use_sigmoid=True).
cls_loss (dict, optional): Config of classification loss. bbox_loss (dict): Config of bbox loss. Defaults to
dict(type='AxisAlignedIoULoss').
cls_loss (dict): Config of classification loss. Defaults to
dict = dict(type='mmdet.FocalLoss').
train_cfg (dict, optional): Config for train stage. Defaults to None. train_cfg (dict, optional): Config for train stage. Defaults to None.
test_cfg (dict, optional): Config for test stage. Defaults to None. test_cfg (dict, optional): Config for test stage. Defaults to None.
init_cfg (dict, optional): Config for weight initialization. init_cfg (dict, optional): Config for weight initialization.
...@@ -77,9 +82,9 @@ class FCAF3DHead(Base3DDenseHead): ...@@ -77,9 +82,9 @@ class FCAF3DHead(Base3DDenseHead):
self.pts_prune_threshold = pts_prune_threshold self.pts_prune_threshold = pts_prune_threshold
self.pts_assign_threshold = pts_assign_threshold self.pts_assign_threshold = pts_assign_threshold
self.pts_center_threshold = pts_center_threshold self.pts_center_threshold = pts_center_threshold
self.center_loss = build_loss(center_loss) self.center_loss = MODELS.build(center_loss)
self.bbox_loss = build_loss(bbox_loss) self.bbox_loss = MODELS.build(bbox_loss)
self.cls_loss = build_loss(cls_loss) self.cls_loss = MODELS.build(cls_loss)
self.train_cfg = train_cfg self.train_cfg = train_cfg
self.test_cfg = test_cfg self.test_cfg = test_cfg
self._init_layers(in_channels, out_channels, num_reg_outs, num_classes) self._init_layers(in_channels, out_channels, num_reg_outs, num_classes)
...@@ -91,6 +96,7 @@ class FCAF3DHead(Base3DDenseHead): ...@@ -91,6 +96,7 @@ class FCAF3DHead(Base3DDenseHead):
Args: Args:
in_channels (int): Number of input channels. in_channels (int): Number of input channels.
out_channels (int): Number of output channels. out_channels (int): Number of output channels.
Returns: Returns:
torch.nn.Module: With corresponding layers. torch.nn.Module: With corresponding layers.
""" """
...@@ -106,6 +112,7 @@ class FCAF3DHead(Base3DDenseHead): ...@@ -106,6 +112,7 @@ class FCAF3DHead(Base3DDenseHead):
Args: Args:
in_channels (int): Number of input channels. in_channels (int): Number of input channels.
out_channels (int): Number of output channels. out_channels (int): Number of output channels.
Returns: Returns:
torch.nn.Module: With corresponding layers. torch.nn.Module: With corresponding layers.
""" """
...@@ -335,7 +342,7 @@ class FCAF3DHead(Base3DDenseHead): ...@@ -335,7 +342,7 @@ class FCAF3DHead(Base3DDenseHead):
`labels_3d``、``depths``、``centers_2d`` and attributes. `labels_3d``、``depths``、``centers_2d`` and attributes.
batch_img_metas (list[dict]): Meta information of each image, e.g., batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc. image size, scaling factor, etc.
batch_gt_instances_ignore (list[:obj:`InstanceData`], Optional): batch_gt_instances_ignore (list[:obj:`InstanceData`], optional):
Batch of gt_instances_ignore. It includes ``bboxes`` attribute Batch of gt_instances_ignore. It includes ``bboxes`` attribute
data that is ignored during training and testing. data that is ignored during training and testing.
Defaults to None. Defaults to None.
...@@ -430,7 +437,7 @@ class FCAF3DHead(Base3DDenseHead): ...@@ -430,7 +437,7 @@ class FCAF3DHead(Base3DDenseHead):
Returns: Returns:
list[InstanceData]: Predicted bboxes, scores, and labels for list[InstanceData]: Predicted bboxes, scores, and labels for
all scenes. all scenes.
""" """
results = [] results = []
for i in range(len(batch_input_metas)): for i in range(len(batch_input_metas)):
...@@ -519,7 +526,7 @@ class FCAF3DHead(Base3DDenseHead): ...@@ -519,7 +526,7 @@ class FCAF3DHead(Base3DDenseHead):
Returns: Returns:
Tensor: Face distances of shape (N_points, N_boxes, 6), Tensor: Face distances of shape (N_points, N_boxes, 6),
(dx_min, dx_max, dy_min, dy_max, dz_min, dz_max). (dx_min, dx_max, dy_min, dy_max, dz_min, dz_max).
""" """
shift = torch.stack( shift = torch.stack(
(points[..., 0] - boxes[..., 0], points[..., 1] - boxes[..., 1], (points[..., 0] - boxes[..., 0], points[..., 1] - boxes[..., 1],
...@@ -568,7 +575,7 @@ class FCAF3DHead(Base3DDenseHead): ...@@ -568,7 +575,7 @@ class FCAF3DHead(Base3DDenseHead):
Returns: Returns:
tuple[Tensor, ...]: Centerness, bbox and classification tuple[Tensor, ...]: Centerness, bbox and classification
targets for all locations. targets for all locations.
""" """
float_max = points[0].new_tensor(1e8) float_max = points[0].new_tensor(1e8)
n_levels = len(points) n_levels = len(points)
......
...@@ -3,8 +3,10 @@ import unittest ...@@ -3,8 +3,10 @@ import unittest
import numpy as np import numpy as np
import torch import torch
from mmengine.testing import assert_allclose
from mmdet3d.datasets import S3DISSegDataset from mmdet3d.datasets import S3DISDataset, S3DISSegDataset
from mmdet3d.structures import DepthInstance3DBoxes
from mmdet3d.utils import register_all_modules from mmdet3d.utils import register_all_modules
...@@ -55,8 +57,101 @@ def _generate_s3dis_seg_dataset_config(): ...@@ -55,8 +57,101 @@ def _generate_s3dis_seg_dataset_config():
pipeline, modality) pipeline, modality)
def _generate_s3dis_dataset_config():
data_root = 'tests/data/s3dis'
ann_file = 's3dis_infos.pkl'
classes = ('table', 'chair', 'sofa', 'bookcase', 'board')
modality = dict(use_lidar=True, use_camera=False)
pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
with_mask_3d=True,
with_seg_3d=True),
dict(type='PointSegClassMapping'),
dict(type='PointSample', num_points=5),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=1.0,
flip_ratio_bev_vertical=1.0),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0]),
dict(type='NormalizePointsColor', color_mean=None),
dict(
type='Pack3DDetInputs',
keys=[
'points', 'pts_semantic_mask', 'gt_bboxes_3d', 'gt_labels_3d',
'pts_instance_mask'
])
]
data_prefix = dict(
pts='points',
pts_instance_mask='instance_mask',
pts_semantic_mask='semantic_mask')
return data_root, ann_file, classes, data_prefix, pipeline, modality
class TestS3DISDataset(unittest.TestCase): class TestS3DISDataset(unittest.TestCase):
def test_s3dis(self):
np.random.seed(0)
data_root, ann_file, classes, data_prefix, \
pipeline, modality = _generate_s3dis_dataset_config()
register_all_modules()
s3dis_dataset = S3DISDataset(
data_root,
ann_file,
data_prefix=data_prefix,
pipeline=pipeline,
metainfo=dict(classes=classes),
modality=modality)
s3dis_dataset.prepare_data(0)
input_dict = s3dis_dataset.get_data_info(0)
s3dis_dataset[0]
# assert the path should contains data_prefix and data_root
self.assertIn(data_prefix['pts'],
input_dict['lidar_points']['lidar_path'])
self.assertIn(data_root, input_dict['lidar_points']['lidar_path'])
ann_info = s3dis_dataset.parse_ann_info(input_dict)
# assert the keys in ann_info and the type
except_label = np.array([1, 1, 3, 1, 2, 0, 0, 0, 3])
self.assertEqual(ann_info['gt_labels_3d'].dtype, np.int64)
assert_allclose(ann_info['gt_labels_3d'], except_label)
self.assertIsInstance(ann_info['gt_bboxes_3d'], DepthInstance3DBoxes)
assert len(ann_info['gt_bboxes_3d']) == 9
assert torch.allclose(ann_info['gt_bboxes_3d'].tensor.sum(),
torch.tensor([63.0455]))
no_class_s3dis_dataset = S3DISDataset(
data_root, ann_file, metainfo=dict(classes=['table']))
input_dict = no_class_s3dis_dataset.get_data_info(0)
ann_info = no_class_s3dis_dataset.parse_ann_info(input_dict)
# assert the keys in ann_info and the type
self.assertIn('gt_labels_3d', ann_info)
# assert mapping to -1 or 1
assert (ann_info['gt_labels_3d'] <= 0).all()
self.assertEqual(ann_info['gt_labels_3d'].dtype, np.int64)
# all instance have been filtered by classes
self.assertEqual(len(ann_info['gt_labels_3d']), 9)
self.assertEqual(len(no_class_s3dis_dataset.metainfo['classes']), 1)
def test_s3dis_seg(self): def test_s3dis_seg(self):
data_root, ann_file, classes, palette, scene_idxs, data_prefix, \ data_root, ann_file, classes, palette, scene_idxs, data_prefix, \
pipeline, modality, = _generate_s3dis_seg_dataset_config() pipeline, modality, = _generate_s3dis_seg_dataset_config()
......
...@@ -129,7 +129,7 @@ class S3DISData(object): ...@@ -129,7 +129,7 @@ class S3DISData(object):
- gt_num (int): Number of boxes. - gt_num (int): Number of boxes.
""" """
bboxes, labels = [], [] bboxes, labels = [], []
for i in range(1, pts_instance_mask.max()): for i in range(1, pts_instance_mask.max() + 1):
ids = pts_instance_mask == i ids = pts_instance_mask == i
# check if all instance points have same semantic label # check if all instance points have same semantic label
assert pts_semantic_mask[ids].min() == pts_semantic_mask[ids].max() assert pts_semantic_mask[ids].min() == pts_semantic_mask[ids].max()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment