Release v1.0.0rc3

Release v1.0.0rc3

Release v1.0.0rc3
bb204696 · Wenwei Zhang · GitHub · 14c5ded4 · dea954e5 · bb204696
Unverified Commit bb204696 authored Jun 14, 2022 by Wenwei Zhang Committed by GitHub Jun 14, 2022
17 changed files
--- a/mmdet3d/models/detectors/__init__.py
+++ b/mmdet3d/models/detectors/__init__.py
@@ -11,6 +11,7 @@ from .mvx_faster_rcnn import DynamicMVXFasterRCNN, MVXFasterRCNN
 from .mvx_two_stage import MVXTwoStageDetector
 from .parta2 import PartA2
 from .point_rcnn import PointRCNN
+from .sassd import SASSD
 from .single_stage_mono3d import SingleStageMono3DDetector
 from .smoke_mono3d import SMOKEMono3D
 from .ssd3dnet import SSD3DNet
@@ -21,5 +22,6 @@ __all__ = [
    'Base3DDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXTwoStageDetector',
    'DynamicMVXFasterRCNN', 'MVXFasterRCNN', 'PartA2', 'VoteNet', 'H3DNet',
    'CenterPoint', 'SSD3DNet', 'ImVoteNet', 'SingleStageMono3DDetector',
-    'FCOSMono3D', 'ImVoxelNet', 'GroupFree3DNet', 'PointRCNN', 'SMOKEMono3D'
+    'FCOSMono3D', 'ImVoxelNet', 'GroupFree3DNet', 'PointRCNN', 'SMOKEMono3D',
+    'SASSD'
 ]
--- a/mmdet3d/models/detectors/sassd.py
+++ b/mmdet3d/models/detectors/sassd.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+from mmcv.ops import Voxelization
+from mmcv.runner import force_fp32
+from torch.nn import functional as F
+from mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d
+from mmdet.models.builder import DETECTORS
+from .. import builder
+from .single_stage import SingleStage3DDetector
+@DETECTORS.register_module()
+class SASSD(SingleStage3DDetector):
+    r"""`SASSD <https://github.com/skyhehe123/SA-SSD>` _ for 3D detection."""
+    def __init__(self,
+                 voxel_layer,
+                 voxel_encoder,
+                 middle_encoder,
+                 backbone,
+                 neck=None,
+                 bbox_head=None,
+                 train_cfg=None,
+                 test_cfg=None,
+                 init_cfg=None,
+                 pretrained=None):
+        super(SASSD, self).__init__(
+            backbone=backbone,
+            neck=neck,
+            bbox_head=bbox_head,
+            train_cfg=train_cfg,
+            test_cfg=test_cfg,
+            init_cfg=init_cfg,
+            pretrained=pretrained)
+        self.voxel_layer = Voxelization(**voxel_layer)
+        self.voxel_encoder = builder.build_voxel_encoder(voxel_encoder)
+        self.middle_encoder = builder.build_middle_encoder(middle_encoder)
+    def extract_feat(self, points, img_metas=None, test_mode=False):
+        """Extract features from points."""
+        voxels, num_points, coors = self.voxelize(points)
+        voxel_features = self.voxel_encoder(voxels, num_points, coors)
+        batch_size = coors[-1, 0].item() + 1
+        x, point_misc = self.middle_encoder(voxel_features, coors, batch_size,
+                                            test_mode)
+        x = self.backbone(x)
+        if self.with_neck:
+            x = self.neck(x)
+        return x, point_misc
+    @torch.no_grad()
+    @force_fp32()
+    def voxelize(self, points):
+        """Apply hard voxelization to points."""
+        voxels, coors, num_points = [], [], []
+        for res in points:
+            res_voxels, res_coors, res_num_points = self.voxel_layer(res)
+            voxels.append(res_voxels)
+            coors.append(res_coors)
+            num_points.append(res_num_points)
+        voxels = torch.cat(voxels, dim=0)
+        num_points = torch.cat(num_points, dim=0)
+        coors_batch = []
+        for i, coor in enumerate(coors):
+            coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
+            coors_batch.append(coor_pad)
+        coors_batch = torch.cat(coors_batch, dim=0)
+        return voxels, num_points, coors_batch
+    def forward_train(self,
+                      points,
+                      img_metas,
+                      gt_bboxes_3d,
+                      gt_labels_3d,
+                      gt_bboxes_ignore=None):
+        """Training forward function.
+        Args:
+            points (list[torch.Tensor]): Point cloud of each sample.
+            img_metas (list[dict]): Meta information of each sample
+            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
+                boxes for each sample.
+            gt_labels_3d (list[torch.Tensor]): Ground truth labels for
+                boxes of each sampole
+            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth
+                boxes to be ignored. Defaults to None.
+        Returns:
+            dict: Losses of each branch.
+        """
+        x, point_misc = self.extract_feat(points, img_metas, test_mode=False)
+        aux_loss = self.middle_encoder.aux_loss(*point_misc, gt_bboxes_3d)
+        outs = self.bbox_head(x)
+        loss_inputs = outs + (gt_bboxes_3d, gt_labels_3d, img_metas)
+        losses = self.bbox_head.loss(
+            *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
+        losses.update(aux_loss)
+        return losses
+    def simple_test(self, points, img_metas, imgs=None, rescale=False):
+        """Test function without augmentaiton."""
+        x, _ = self.extract_feat(points, img_metas, test_mode=True)
+        outs = self.bbox_head(x)
+        bbox_list = self.bbox_head.get_bboxes(
+            *outs, img_metas, rescale=rescale)
+        bbox_results = [
+            bbox3d2result(bboxes, scores, labels)
+            for bboxes, scores, labels in bbox_list
+        ]
+        return bbox_results
+    def aug_test(self, points, img_metas, imgs=None, rescale=False):
+        """Test function with augmentaiton."""
+        feats = self.extract_feats(points, img_metas, test_mode=True)
+        # only support aug_test for one sample
+        aug_bboxes = []
+        for x, img_meta in zip(feats, img_metas):
+            outs = self.bbox_head(x)
+            bbox_list = self.bbox_head.get_bboxes(
+                *outs, img_meta, rescale=rescale)
+            bbox_list = [
+                dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)
+                for bboxes, scores, labels in bbox_list
+            ]
+            aug_bboxes.append(bbox_list[0])
+        # after merging, bboxes will be rescaled to the original image size
+        merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas,
+                                            self.bbox_head.test_cfg)
+        return [merged_bboxes]
--- a/mmdet3d/models/detectors/two_stage.py
+++ b/mmdet3d/models/detectors/two_stage.py
@@ -30,7 +30,8 @@ class TwoStage3DDetector(Base3DDetector, TwoStageDetector):
                          'please use "init_cfg" instead')
            backbone.pretrained = pretrained
        self.backbone = build_backbone(backbone)
+        self.train_cfg = train_cfg
+        self.test_cfg = test_cfg
        if neck is not None:
            self.neck = build_neck(neck)

--- a/mmdet3d/models/middle_encoders/__init__.py
+++ b/mmdet3d/models/middle_encoders/__init__.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from .pillar_scatter import PointPillarsScatter
-from .sparse_encoder import SparseEncoder
+from .sparse_encoder import SparseEncoder, SparseEncoderSASSD
 from .sparse_unet import SparseUNet
-__all__ = ['PointPillarsScatter', 'SparseEncoder', 'SparseUNet']
+__all__ = [
+    'PointPillarsScatter', 'SparseEncoder', 'SparseEncoderSASSD', 'SparseUNet'
+]
--- a/mmdet3d/models/middle_encoders/sparse_encoder.py
+++ b/mmdet3d/models/middle_encoders/sparse_encoder.py
 # Copyright (c) OpenMMLab. All rights reserved.
+import torch
+from mmcv.ops import points_in_boxes_all, three_interpolate, three_nn
 from mmcv.runner import auto_fp16
 from torch import nn as nn
 from mmdet3d.ops import SparseBasicBlock, make_sparse_convmodule
 from mmdet3d.ops.spconv import IS_SPCONV2_AVAILABLE
+from mmdet.models.losses import sigmoid_focal_loss, smooth_l1_loss
 from ..builder import MIDDLE_ENCODERS
 if IS_SPCONV2_AVAILABLE:
@@ -30,9 +32,10 @@ class SparseEncoder(nn.Module):
            Defaults to 128.
        encoder_channels (tuple[tuple[int]], optional):
            Convolutional channels of each encode block.
+            Defaults to ((16, ), (32, 32, 32), (64, 64, 64), (64, 64, 64)).
        encoder_paddings (tuple[tuple[int]], optional):
            Paddings of each encode block.
-            Defaults to ((16, ), (32, 32, 32), (64, 64, 64), (64, 64, 64)).
+            Defaults to ((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, 1)).
        block_type (str, optional): Type of the block to use.
            Defaults to 'conv_module'.
    """
@@ -106,8 +109,8 @@ class SparseEncoder(nn.Module):
        """Forward of SparseEncoder.
        Args:
-            voxel_features (torch.float32): Voxel features in shape (N, C).
+            voxel_features (torch.Tensor): Voxel features in shape (N, C).
-            coors (torch.int32): Coordinates in shape (N, 4),
+            coors (torch.Tensor): Coordinates in shape (N, 4),
                the columns in the order of (batch_idx, z_idx, y_idx, x_idx).
            batch_size (int): Batch size.
@@ -209,3 +212,280 @@ class SparseEncoder(nn.Module):
            stage_layers = SparseSequential(*blocks_list)
            self.encoder_layers.add_module(stage_name, stage_layers)
        return out_channels
+@MIDDLE_ENCODERS.register_module()
+class SparseEncoderSASSD(SparseEncoder):
+    r"""Sparse encoder for `SASSD <https://github.com/skyhehe123/SA-SSD>`_
+    Args:
+        in_channels (int): The number of input channels.
+        sparse_shape (list[int]): The sparse shape of input tensor.
+        order (list[str], optional): Order of conv module.
+            Defaults to ('conv', 'norm', 'act').
+        norm_cfg (dict, optional): Config of normalization layer. Defaults to
+            dict(type='BN1d', eps=1e-3, momentum=0.01).
+        base_channels (int, optional): Out channels for conv_input layer.
+            Defaults to 16.
+        output_channels (int, optional): Out channels for conv_out layer.
+            Defaults to 128.
+        encoder_channels (tuple[tuple[int]], optional):
+            Convolutional channels of each encode block.
+            Defaults to ((16, ), (32, 32, 32), (64, 64, 64), (64, 64, 64)).
+        encoder_paddings (tuple[tuple[int]], optional):
+            Paddings of each encode block.
+            Defaults to ((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, 1)).
+        block_type (str, optional): Type of the block to use.
+            Defaults to 'conv_module'.
+    """
+    def __init__(self,
+                 in_channels,
+                 sparse_shape,
+                 order=('conv', 'norm', 'act'),
+                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
+                 base_channels=16,
+                 output_channels=128,
+                 encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64,
+                                                                        64)),
+                 encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,
+                                                                 1)),
+                 block_type='conv_module'):
+        super(SparseEncoderSASSD, self).__init__(
+            in_channels=in_channels,
+            sparse_shape=sparse_shape,
+            order=order,
+            norm_cfg=norm_cfg,
+            base_channels=base_channels,
+            output_channels=output_channels,
+            encoder_channels=encoder_channels,
+            encoder_paddings=encoder_paddings,
+            block_type=block_type)
+        self.point_fc = nn.Linear(112, 64, bias=False)
+        self.point_cls = nn.Linear(64, 1, bias=False)
+        self.point_reg = nn.Linear(64, 3, bias=False)
+    @auto_fp16(apply_to=('voxel_features', ))
+    def forward(self, voxel_features, coors, batch_size, test_mode=False):
+        """Forward of SparseEncoder.
+        Args:
+            voxel_features (torch.Tensor): Voxel features in shape (N, C).
+            coors (torch.Tensor): Coordinates in shape (N, 4),
+                the columns in the order of (batch_idx, z_idx, y_idx, x_idx).
+            batch_size (int): Batch size.
+            test_mode (bool, optional): Whether in test mode.
+                Defaults to False.
+        Returns:
+            dict: Backbone features.
+            tuple[torch.Tensor]: Mean feature value of the points,
+                Classificaion result of the points,
+                Regression offsets of the points.
+        """
+        coors = coors.int()
+        input_sp_tensor = SparseConvTensor(voxel_features, coors,
+                                           self.sparse_shape, batch_size)
+        x = self.conv_input(input_sp_tensor)
+        encode_features = []
+        for encoder_layer in self.encoder_layers:
+            x = encoder_layer(x)
+            encode_features.append(x)
+        # for detection head
+        # [200, 176, 5] -> [200, 176, 2]
+        out = self.conv_out(encode_features[-1])
+        spatial_features = out.dense()
+        N, C, D, H, W = spatial_features.shape
+        spatial_features = spatial_features.view(N, C * D, H, W)
+        if test_mode:
+            return spatial_features, None
+        points_mean = torch.zeros_like(voxel_features)
+        points_mean[:, 0] = coors[:, 0]
+        points_mean[:, 1:] = voxel_features[:, :3]
+        # auxiliary network
+        p0 = self.make_auxiliary_points(
+            encode_features[0],
+            points_mean,
+            offset=(0, -40., -3.),
+            voxel_size=(.1, .1, .2))
+        p1 = self.make_auxiliary_points(
+            encode_features[1],
+            points_mean,
+            offset=(0, -40., -3.),
+            voxel_size=(.2, .2, .4))
+        p2 = self.make_auxiliary_points(
+            encode_features[2],
+            points_mean,
+            offset=(0, -40., -3.),
+            voxel_size=(.4, .4, .8))
+        pointwise = torch.cat([p0, p1, p2], dim=-1)
+        pointwise = self.point_fc(pointwise)
+        point_cls = self.point_cls(pointwise)
+        point_reg = self.point_reg(pointwise)
+        point_misc = (points_mean, point_cls, point_reg)
+        return spatial_features, point_misc
+    def get_auxiliary_targets(self, nxyz, gt_boxes3d, enlarge=1.0):
+        """Get auxiliary target.
+        Args:
+            nxyz (torch.Tensor): Mean features of the points.
+            gt_boxes3d (torch.Tensor): Coordinates in shape (N, 4),
+                the columns in the order of (batch_idx, z_idx, y_idx, x_idx).
+            enlarge (int, optional): Enlaged scale. Defaults to 1.0.
+        Returns:
+            tuple[torch.Tensor]: Label of the points and
+                center offsets of the points.
+        """
+        center_offsets = list()
+        pts_labels = list()
+        for i in range(len(gt_boxes3d)):
+            boxes3d = gt_boxes3d[i].tensor.cpu()
+            idx = torch.nonzero(nxyz[:, 0] == i).view(-1)
+            new_xyz = nxyz[idx, 1:].cpu()
+            boxes3d[:, 3:6] *= enlarge
+            pts_in_flag, center_offset = self.calculate_pts_offsets(
+                new_xyz, boxes3d)
+            pts_label = pts_in_flag.max(0)[0].byte()
+            pts_labels.append(pts_label)
+            center_offsets.append(center_offset)
+        center_offsets = torch.cat(center_offsets).cuda()
+        pts_labels = torch.cat(pts_labels).to(center_offsets.device)
+        return pts_labels, center_offsets
+    def calculate_pts_offsets(self, points, boxes):
+        """Find all boxes in which each point is, as well as the offsets from
+        the box centers.
+        Args:
+            points (torch.Tensor): [M, 3], [x, y, z] in LiDAR/DEPTH coordinate
+            boxes (torch.Tensor): [T, 7],
+                num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz],
+                (x, y, z) is the bottom center.
+        Returns:
+            tuple[torch.Tensor]: Point indices of boxes with the shape of
+                (T, M). Default background = 0.
+                And offsets from the box centers of points,
+                if it belows to the box, with the shape of (M, 3).
+                Default background = 0.
+        """
+        boxes_num = len(boxes)
+        pts_num = len(points)
+        points = points.cuda()
+        boxes = boxes.to(points.device)
+        box_idxs_of_pts = points_in_boxes_all(points[None, ...], boxes[None,
+                                                                       ...])
+        pts_indices = box_idxs_of_pts.squeeze(0).transpose(0, 1)
+        center_offsets = torch.zeros_like(points).to(points.device)
+        for i in range(boxes_num):
+            for j in range(pts_num):
+                if pts_indices[i][j] == 1:
+                    center_offsets[j][0] = points[j][0] - boxes[i][0]
+                    center_offsets[j][1] = points[j][1] - boxes[i][1]
+                    center_offsets[j][2] = (
+                        points[j][2] - (boxes[i][2] + boxes[i][2] / 2.0))
+        return pts_indices.cpu(), center_offsets.cpu()
+    def aux_loss(self, points, point_cls, point_reg, gt_bboxes):
+        """Calculate auxiliary loss.
+        Args:
+            points (torch.Tensor): Mean feature value of the points.
+            point_cls (torch.Tensor): Classificaion result of the points.
+            point_reg (torch.Tensor): Regression offsets of the points.
+            gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Ground truth
+                boxes for each sample.
+        Returns:
+            dict: Backbone features.
+        """
+        num_boxes = len(gt_bboxes)
+        pts_labels, center_targets = self.get_auxiliary_targets(
+            points, gt_bboxes)
+        rpn_cls_target = pts_labels.long()
+        pos = (pts_labels > 0).float()
+        neg = (pts_labels == 0).float()
+        pos_normalizer = pos.sum().clamp(min=1.0)
+        cls_weights = pos + neg
+        reg_weights = pos
+        reg_weights = reg_weights / pos_normalizer
+        aux_loss_cls = sigmoid_focal_loss(
+            point_cls,
+            rpn_cls_target,
+            weight=cls_weights,
+            avg_factor=pos_normalizer)
+        aux_loss_cls /= num_boxes
+        weight = reg_weights[..., None]
+        aux_loss_reg = smooth_l1_loss(point_reg, center_targets, beta=1 / 9.)
+        aux_loss_reg = torch.sum(aux_loss_reg * weight)[None]
+        aux_loss_reg /= num_boxes
+        aux_loss_cls, aux_loss_reg = [aux_loss_cls], [aux_loss_reg]
+        return dict(aux_loss_cls=aux_loss_cls, aux_loss_reg=aux_loss_reg)
+    def make_auxiliary_points(self,
+                              source_tensor,
+                              target,
+                              offset=(0., -40., -3.),
+                              voxel_size=(.05, .05, .1)):
+        """Make auxiliary points for loss computation.
+        Args:
+            source_tensor (torch.Tensor): (M, C) features to be propigated.
+            target (torch.Tensor): (N, 4) bxyz positions of the
+                target features.
+            offset (tuple[float], optional): Voxelization offset.
+                Defaults to (0., -40., -3.)
+            voxel_size (tuple[float], optional): Voxelization size.
+                Defaults to (.05, .05, .1)
+        Returns:
+            torch.Tensor: (N, C) tensor of the features of the target features.
+        """
+        # Tansfer tensor to points
+        source = source_tensor.indices.float()
+        offset = torch.Tensor(offset).to(source.device)
+        voxel_size = torch.Tensor(voxel_size).to(source.device)
+        source[:, 1:] = (
+            source[:, [3, 2, 1]] * voxel_size + offset + .5 * voxel_size)
+        source_feats = source_tensor.features[None, ...].transpose(1, 2)
+        # Interplate auxiliary points
+        dist, idx = three_nn(target[None, ...], source[None, ...])
+        dist_recip = 1.0 / (dist + 1e-8)
+        norm = torch.sum(dist_recip, dim=2, keepdim=True)
+        weight = dist_recip / norm
+        new_features = three_interpolate(source_feats.contiguous(), idx,
+                                         weight)
+        return new_features.squeeze(0).transpose(0, 1)
--- a/mmdet3d/models/middle_encoders/sparse_unet.py
+++ b/mmdet3d/models/middle_encoders/sparse_unet.py
@@ -11,6 +11,7 @@ else:
 from mmcv.runner import BaseModule, auto_fp16
 from mmdet3d.ops import SparseBasicBlock, make_sparse_convmodule
+from mmdet3d.ops.sparse_block import replace_feature
 from ..builder import MIDDLE_ENCODERS
@@ -168,10 +169,11 @@ class SparseUNet(BaseModule):
            :obj:`SparseConvTensor`: Upsampled feature.
        """
        x = lateral_layer(x_lateral)
-        x.features = torch.cat((x_bottom.features, x.features), dim=1)
+        x = replace_feature(x, torch.cat((x_bottom.features, x.features),
+                                         dim=1))
        x_merge = merge_layer(x)
        x = self.reduce_channel(x, x_merge.features.shape[1])
-        x.features = x_merge.features + x.features
+        x = replace_feature(x, x_merge.features + x.features)
        x = upsample_layer(x)
        return x
@@ -191,8 +193,7 @@ class SparseUNet(BaseModule):
        n, in_channels = features.shape
        assert (in_channels % out_channels
                == 0) and (in_channels >= out_channels)
+        x = replace_feature(x, features.view(n, out_channels, -1).sum(dim=2))
-        x.features = features.view(n, out_channels, -1).sum(dim=2)
        return x
    def make_encoder_layers(self, make_block, norm_cfg, in_channels):

--- a/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
+++ b/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
@@ -2,7 +2,15 @@
 import numpy as np
 import torch
 from mmcv.cnn import ConvModule, normal_init
-from mmcv.ops import SparseConvTensor, SparseMaxPool3d, SparseSequential
+from mmdet3d.ops.spconv import IS_SPCONV2_AVAILABLE
+if IS_SPCONV2_AVAILABLE:
+    from spconv.pytorch import (SparseConvTensor, SparseMaxPool3d,
+                                SparseSequential)
+else:
+    from mmcv.ops import SparseConvTensor, SparseMaxPool3d, SparseSequential
 from mmcv.runner import BaseModule
 from torch import nn as nn
@@ -252,7 +260,7 @@ class PartA2BboxHead(BaseModule):
                                   sparse_idx[:, 2], sparse_idx[:, 3]]
        seg_features = seg_feats[sparse_idx[:, 0], sparse_idx[:, 1],
                                 sparse_idx[:, 2], sparse_idx[:, 3]]
-        coords = sparse_idx.int()
+        coords = sparse_idx.int().contiguous()
        part_features = SparseConvTensor(part_features, coords, sparse_shape,
                                         rcnn_batch_size)
        seg_features = SparseConvTensor(seg_features, coords, sparse_shape,

--- a/mmdet3d/ops/spconv/overwrite_spconv/__init__.py
+++ b/mmdet3d/ops/spconv/overwrite_spconv/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from .write_spconv2 import register_spconv2
+__all__ = ['register_spconv2']
--- a/mmdet3d/version.py
+++ b/mmdet3d/version.py
 # Copyright (c) Open-MMLab. All rights reserved.
-__version__ = '1.0.0rc2'
+__version__ = '1.0.0rc3'
 short_version = __version__

--- a/requirements/mminstall.txt
+++ b/requirements/mminstall.txt
-mmcv-full>=1.4.8,<=1.5.0
+mmcv-full>=1.4.8,<=1.6.0
-mmdet>=2.19.0,<=3.0.0
+mmdet>=2.24.0,<=3.0.0
 mmsegmentation>=0.20.0,<=1.0.0
--- a/requirements/readthedocs.txt
+++ b/requirements/readthedocs.txt
 mmcv>=1.4.8
-mmdet>=2.19.0
+mmdet>=2.24.0
 mmsegmentation>=0.20.1
 torch
 torchvision
--- a/tests/test_data/test_pipelines/test_loadings/test_loading.py
+++ b/tests/test_data/test_pipelines/test_loadings/test_loading.py
@@ -310,7 +310,8 @@ def test_load_image_from_file_mono_3d():
    repr_str = repr(load_image_from_file_mono_3d)
    expected_repr_str = 'LoadImageFromFileMono3D(to_float32=False, ' \
-        "color_type='color', file_client_args={'backend': 'disk'})"
+        "color_type='color', channel_order='bgr', " \
+        "file_client_args={'backend': 'disk'})"
    assert repr_str == expected_repr_str

--- a/tests/test_models/test_common_modules/test_middle_encoders.py
+++ b/tests/test_models/test_common_modules/test_middle_encoders.py
@@ -25,3 +25,25 @@ def test_sparse_encoder():
    ret = sparse_encoder(voxel_features, coors, 4)
    assert ret.shape == torch.Size([4, 256, 128, 128])
+def test_sparse_encoder_for_ssd():
+    if not torch.cuda.is_available():
+        pytest.skip('test requires GPU and torch+cuda')
+    sparse_encoder_for_ssd_cfg = dict(
+        type='SparseEncoderSASSD',
+        in_channels=5,
+        sparse_shape=[40, 1024, 1024],
+        order=('conv', 'norm', 'act'),
+        encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128,
+                                                                      128)),
+        encoder_paddings=((1, 1, 1), (1, 1, 1), (1, 1, 1), (1, 1, 1), (1, 1,
+                                                                       1)),
+        block_type='basicblock')
+    sparse_encoder = build_middle_encoder(sparse_encoder_for_ssd_cfg).cuda()
+    voxel_features = torch.rand([207842, 5]).cuda()
+    coors = torch.randint(0, 4, [207842, 4]).cuda()
+    ret, _ = sparse_encoder(voxel_features, coors, 4, True)
+    assert ret.shape == torch.Size([4, 256, 128, 128])
--- a/tests/test_models/test_detectors.py
+++ b/tests/test_models/test_detectors.py
@@ -567,3 +567,42 @@ def test_smoke():
    assert boxes_3d.tensor.shape[1] == 7
    assert scores_3d.shape[0] >= 0
    assert labels_3d.shape[0] >= 0
+def test_sassd():
+    if not torch.cuda.is_available():
+        pytest.skip('test requires GPU and torch+cuda')
+    _setup_seed(0)
+    sassd_cfg = _get_detector_cfg('sassd/sassd_6x8_80e_kitti-3d-3class.py')
+    self = build_detector(sassd_cfg).cuda()
+    points_0 = torch.rand([2010, 4], device='cuda')
+    points_1 = torch.rand([2020, 4], device='cuda')
+    points = [points_0, points_1]
+    gt_bbox_0 = LiDARInstance3DBoxes(torch.rand([10, 7], device='cuda'))
+    gt_bbox_1 = LiDARInstance3DBoxes(torch.rand([10, 7], device='cuda'))
+    gt_bboxes = [gt_bbox_0, gt_bbox_1]
+    gt_labels_0 = torch.randint(0, 3, [10], device='cuda')
+    gt_labels_1 = torch.randint(0, 3, [10], device='cuda')
+    gt_labels = [gt_labels_0, gt_labels_1]
+    img_meta_0 = dict(box_type_3d=LiDARInstance3DBoxes)
+    img_meta_1 = dict(box_type_3d=LiDARInstance3DBoxes)
+    img_metas = [img_meta_0, img_meta_1]
+    # test forward_train
+    losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels)
+    assert losses['loss_cls'][0] >= 0
+    assert losses['loss_bbox'][0] >= 0
+    assert losses['loss_dir'][0] >= 0
+    assert losses['aux_loss_cls'][0] >= 0
+    assert losses['aux_loss_reg'][0] >= 0
+    # test simple_test
+    with torch.no_grad():
+        results = self.simple_test(points, img_metas)
+    boxes_3d = results[0]['boxes_3d']
+    scores_3d = results[0]['scores_3d']
+    labels_3d = results[0]['labels_3d']
+    assert boxes_3d.tensor.shape == (50, 7)
+    assert scores_3d.shape == torch.Size([50])
+    assert labels_3d.shape == torch.Size([50])
--- a/tests/test_utils/test_merge_augs.py
+++ b/tests/test_utils/test_merge_augs.py
@@ -29,11 +29,17 @@ def test_merge_aug_bboxes_3d():
             [2.5831, 4.8117, -1.2733, 0.5852, 0.8832, 0.9733, 1.6500],
             [-1.0864, 1.9045, -1.2000, 0.7128, 1.5631, 2.1045, 0.1022]],
            device='cuda'))
-    labels_3d = torch.tensor([0, 7, 6])
+    labels_3d = torch.tensor([0, 7, 6], device='cuda')
-    scores_3d = torch.tensor([0.5, 1.0, 1.0])
+    scores_3d_1 = torch.tensor([0.3, 0.6, 0.9], device='cuda')
-    aug_result = dict(
+    scores_3d_2 = torch.tensor([0.2, 0.5, 0.8], device='cuda')
-        boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d)
+    scores_3d_3 = torch.tensor([0.1, 0.4, 0.7], device='cuda')
-    aug_results = [aug_result, aug_result, aug_result]
+    aug_result_1 = dict(
+        boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d_1)
+    aug_result_2 = dict(
+        boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d_2)
+    aug_result_3 = dict(
+        boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d_3)
+    aug_results = [aug_result_1, aug_result_2, aug_result_3]
    test_cfg = mmcv.ConfigDict(
        use_rotate_nms=True,
        nms_across_levels=False,
@@ -53,9 +59,8 @@ def test_merge_aug_bboxes_3d():
         [1.0473, -4.1687, -1.2317, 2.3021, 1.8876, 1.9696, -1.6956],
         [-1.0473, 4.1687, -1.2317, 2.3021, 1.8876, 1.9696, 1.4460],
         [2.0946, 8.3374, -2.4634, 4.6042, 3.7752, 3.9392, 1.6956]])
-    expected_scores_3d = torch.tensor([
+    expected_scores_3d = torch.tensor(
-        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.5000, 0.5000, 0.5000
+        [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1])
-    ])
    expected_labels_3d = torch.tensor([6, 6, 6, 7, 7, 7, 0, 0, 0])
    assert torch.allclose(results['boxes_3d'].tensor, expected_boxes_3d)
    assert torch.allclose(results['scores_3d'], expected_scores_3d)

--- a/tests/test_utils/test_nms.py
+++ b/tests/test_utils/test_nms.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import numpy as np
+import pytest
 import torch
@@ -73,3 +74,41 @@ def test_circle_nms():
    keep = circle_nms(boxes.numpy(), 0.175)
    expected_keep = [1, 2, 3, 4, 5, 6, 7, 8, 9]
    assert np.all(keep == expected_keep)
+# copied from tests/test_ops/test_iou3d.py from mmcv<=1.5
+@pytest.mark.skipif(
+    not torch.cuda.is_available(), reason='requires CUDA support')
+def test_nms_bev():
+    from mmdet3d.core.post_processing import nms_bev
+    np_boxes = np.array(
+        [[6.0, 3.0, 8.0, 7.0, 2.0], [3.0, 6.0, 9.0, 11.0, 1.0],
+         [3.0, 7.0, 10.0, 12.0, 1.0], [1.0, 4.0, 13.0, 7.0, 3.0]],
+        dtype=np.float32)
+    np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32)
+    np_inds = np.array([1, 0, 3])
+    boxes = torch.from_numpy(np_boxes)
+    scores = torch.from_numpy(np_scores)
+    inds = nms_bev(boxes.cuda(), scores.cuda(), thresh=0.3)
+    assert np.allclose(inds.cpu().numpy(), np_inds)
+# copied from tests/test_ops/test_iou3d.py from mmcv<=1.5
+@pytest.mark.skipif(
+    not torch.cuda.is_available(), reason='requires CUDA support')
+def test_nms_normal_bev():
+    from mmdet3d.core.post_processing import nms_normal_bev
+    np_boxes = np.array(
+        [[6.0, 3.0, 8.0, 7.0, 2.0], [3.0, 6.0, 9.0, 11.0, 1.0],
+         [3.0, 7.0, 10.0, 12.0, 1.0], [1.0, 4.0, 13.0, 7.0, 3.0]],
+        dtype=np.float32)
+    np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32)
+    np_inds = np.array([1, 0, 3])
+    boxes = torch.from_numpy(np_boxes)
+    scores = torch.from_numpy(np_scores)
+    inds = nms_normal_bev(boxes.cuda(), scores.cuda(), thresh=0.3)
+    assert np.allclose(inds.cpu().numpy(), np_inds)
--- a/tools/create_data.py
+++ b/tools/create_data.py
@@ -7,7 +7,7 @@ from tools.data_converter import kitti_converter as kitti
 from tools.data_converter import lyft_converter as lyft_converter
 from tools.data_converter import nuscenes_converter as nuscenes_converter
 from tools.data_converter.create_gt_database import (
-    create_groundtruth_database, GTDatabaseCreater)
+    GTDatabaseCreater, create_groundtruth_database)
 def kitti_data_prep(root_path,