Add type hint for middle_encoder and voxel_encoder (#2556)

* 2023/05/26 add type hint * 2023/05/26 modify ugly typehint

Add type hint for middle_encoder and voxel_encoder (#2556)
* 2023/05/26 add type hint * 2023/05/26 modify ugly typehint
fa724b10 · Quantum Cat · GitHub · 8e634dd1 · fa724b10 · fa724b10
Unverified Commit fa724b10 authored May 29, 2023 by Quantum Cat Committed by GitHub May 29, 2023
7 changed files
--- a/mmdet3d/models/middle_encoders/pillar_scatter.py
+++ b/mmdet3d/models/middle_encoders/pillar_scatter.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import List
+
 import torch
-from torch import nn
+from torch import Tensor, nn

 from mmdet3d.registry import MODELS

@@ -16,14 +18,17 @@ class PointPillarsScatter(nn.Module):
        output_shape (list[int]): Required output shape of features.
    """

-    def __init__(self, in_channels, output_shape):
+    def __init__(self, in_channels: int, output_shape: List[int]):
        super().__init__()
        self.output_shape = output_shape
        self.ny = output_shape[0]
        self.nx = output_shape[1]
        self.in_channels = in_channels

-    def forward(self, voxel_features, coors, batch_size=None):
+    def forward(self,
+                voxel_features: Tensor,
+                coors: Tensor,
+                batch_size: int = None) -> Tensor:
        """Foraward function to scatter features."""
        # TODO: rewrite the function in a batch manner
        # no need to deal with different batch cases
@@ -32,7 +37,7 @@ class PointPillarsScatter(nn.Module):
        else:
            return self.forward_single(voxel_features, coors)

-    def forward_single(self, voxel_features, coors):
+    def forward_single(self, voxel_features: Tensor, coors: Tensor) -> Tensor:
        """Scatter features of single sample.

        Args:
@@ -56,7 +61,8 @@ class PointPillarsScatter(nn.Module):
        canvas = canvas.view(1, self.in_channels, self.ny, self.nx)
        return canvas

-    def forward_batch(self, voxel_features, coors, batch_size):
+    def forward_batch(self, voxel_features: Tensor, coors: Tensor,
+                      batch_size: int) -> Tensor:
        """Scatter features of single sample.

        Args:

--- a/mmdet3d/models/middle_encoders/sparse_encoder.py
+++ b/mmdet3d/models/middle_encoders/sparse_encoder.py
 # Copyright (c) OpenMMLab. All rights reserved.
-from typing import List, Tuple
+from typing import Dict, List, Optional, Tuple, Union

 import torch
 from mmcv.ops import points_in_boxes_all, three_interpolate, three_nn
@@ -18,6 +18,8 @@ if IS_SPCONV2_AVAILABLE:
 else:
    from mmcv.ops import SparseConvTensor, SparseSequential

+TwoTupleIntType = Tuple[Tuple[int]]
+

 @MODELS.register_module()
 class SparseEncoder(nn.Module):
@@ -26,7 +28,7 @@ class SparseEncoder(nn.Module):
    Args:
        in_channels (int): The number of input channels.
        sparse_shape (list[int]): The sparse shape of input tensor.
-        order (list[str], optional): Order of conv module.
+        order (tuple[str], optional): Order of conv module.
            Defaults to ('conv', 'norm', 'act').
        norm_cfg (dict, optional): Config of normalization layer. Defaults to
            dict(type='BN1d', eps=1e-3, momentum=0.01).
@@ -46,19 +48,24 @@ class SparseEncoder(nn.Module):
            Default to False.
    """

-    def __init__(self,
-                 in_channels,
-                 sparse_shape,
-                 order=('conv', 'norm', 'act'),
-                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
-                 base_channels=16,
-                 output_channels=128,
-                 encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64,
-                                                                        64)),
-                 encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,
-                                                                 1)),
-                 block_type='conv_module',
-                 return_middle_feats=False):
+    def __init__(
+            self,
+            in_channels: int,
+            sparse_shape: List[int],
+            order: Optional[Tuple[str]] = ('conv', 'norm', 'act'),
+            norm_cfg: Optional[dict] = dict(
+                type='BN1d', eps=1e-3, momentum=0.01),
+            base_channels: Optional[int] = 16,
+            output_channels: Optional[int] = 128,
+            encoder_channels: Optional[TwoTupleIntType] = ((16, ), (32, 32,
+                                                                    32),
+                                                           (64, 64,
+                                                            64), (64, 64, 64)),
+            encoder_paddings: Optional[TwoTupleIntType] = ((1, ), (1, 1, 1),
+                                                           (1, 1, 1),
+                                                           ((0, 1, 1), 1, 1)),
+            block_type: Optional[str] = 'conv_module',
+            return_middle_feats: Optional[bool] = False):
        super().__init__()
        assert block_type in ['conv_module', 'basicblock']
        self.sparse_shape = sparse_shape
@@ -112,7 +119,8 @@ class SparseEncoder(nn.Module):
            conv_type='SparseConv3d')

    @amp.autocast(enabled=False)
-    def forward(self, voxel_features, coors, batch_size):
+    def forward(self, voxel_features: Tensor, coors: Tensor,
+                batch_size: int) -> Union[Tensor, Tuple[Tensor, list]]:
        """Forward of SparseEncoder.

        Args:
@@ -154,12 +162,14 @@ class SparseEncoder(nn.Module):
        else:
            return spatial_features

-    def make_encoder_layers(self,
-                            make_block,
-                            norm_cfg,
-                            in_channels,
-                            block_type='conv_module',
-                            conv_cfg=dict(type='SubMConv3d')):
+    def make_encoder_layers(
+        self,
+        make_block: nn.Module,
+        norm_cfg: Dict,
+        in_channels: int,
+        block_type: Optional[str] = 'conv_module',
+        conv_cfg: Optional[dict] = dict(type='SubMConv3d')
+    ) -> int:
        """make encoder layers using sparse convs.

        Args:
@@ -256,18 +266,22 @@ class SparseEncoderSASSD(SparseEncoder):
            Defaults to 'conv_module'.
    """

-    def __init__(self,
-                 in_channels: int,
-                 sparse_shape: List[int],
-                 order: Tuple[str] = ('conv', 'norm', 'act'),
-                 norm_cfg: dict = dict(type='BN1d', eps=1e-3, momentum=0.01),
-                 base_channels: int = 16,
-                 output_channels: int = 128,
-                 encoder_channels: Tuple[tuple] = ((16, ), (32, 32, 32),
-                                                   (64, 64, 64), (64, 64, 64)),
-                 encoder_paddings: Tuple[tuple] = ((1, ), (1, 1, 1), (1, 1, 1),
-                                                   ((0, 1, 1), 1, 1)),
-                 block_type: str = 'conv_module'):
+    def __init__(
+            self,
+            in_channels: int,
+            sparse_shape: List[int],
+            order: Tuple[str] = ('conv', 'norm', 'act'),
+            norm_cfg: dict = dict(type='BN1d', eps=1e-3, momentum=0.01),
+            base_channels: int = 16,
+            output_channels: int = 128,
+            encoder_channels: Optional[TwoTupleIntType] = ((16, ), (32, 32,
+                                                                    32),
+                                                           (64, 64,
+                                                            64), (64, 64, 64)),
+            encoder_paddings: Optional[TwoTupleIntType] = ((1, ), (1, 1, 1),
+                                                           (1, 1, 1),
+                                                           ((0, 1, 1), 1, 1)),
+            block_type: str = 'conv_module'):
        super(SparseEncoderSASSD, self).__init__(
            in_channels=in_channels,
            sparse_shape=sparse_shape,

--- a/mmdet3d/models/middle_encoders/sparse_unet.py
+++ b/mmdet3d/models/middle_encoders/sparse_unet.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Dict, List, Optional, Tuple
+
 import torch
+from torch import Tensor, nn

 from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE

@@ -14,6 +17,8 @@ from mmdet3d.models.layers import SparseBasicBlock, make_sparse_convmodule
 from mmdet3d.models.layers.sparse_block import replace_feature
 from mmdet3d.registry import MODELS

+TwoTupleIntType = Tuple[Tuple[int]]
+

 @MODELS.register_module()
 class SparseUNet(BaseModule):
@@ -35,21 +40,28 @@ class SparseUNet(BaseModule):
        decoder_paddings (tuple[tuple[int]]): Paddings of each decode block.
    """

-    def __init__(self,
-                 in_channels,
-                 sparse_shape,
-                 order=('conv', 'norm', 'act'),
-                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
-                 base_channels=16,
-                 output_channels=128,
-                 encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64,
-                                                                        64)),
-                 encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,
-                                                                 1)),
-                 decoder_channels=((64, 64, 64), (64, 64, 32), (32, 32, 16),
-                                   (16, 16, 16)),
-                 decoder_paddings=((1, 0), (1, 0), (0, 0), (0, 1)),
-                 init_cfg=None):
+    def __init__(
+            self,
+            in_channels: int,
+            sparse_shape: List[int],
+            order: Tuple[str] = ('conv', 'norm', 'act'),
+            norm_cfg: dict = dict(type='BN1d', eps=1e-3, momentum=0.01),
+            base_channels: int = 16,
+            output_channels: int = 128,
+            encoder_channels: Optional[TwoTupleIntType] = ((16, ), (32, 32,
+                                                                    32),
+                                                           (64, 64,
+                                                            64), (64, 64, 64)),
+            encoder_paddings: Optional[TwoTupleIntType] = ((1, ), (1, 1, 1),
+                                                           (1, 1, 1),
+                                                           ((0, 1, 1), 1, 1)),
+            decoder_channels: Optional[TwoTupleIntType] = ((64, 64,
+                                                            64), (64, 64, 32),
+                                                           (32, 32,
+                                                            16), (16, 16, 16)),
+            decoder_paddings: Optional[TwoTupleIntType] = ((1, 0), (1, 0),
+                                                           (0, 0), (0, 1)),
+            init_cfg: bool = None):
        super().__init__(init_cfg=init_cfg)
        self.sparse_shape = sparse_shape
        self.in_channels = in_channels
@@ -101,7 +113,8 @@ class SparseUNet(BaseModule):
            indice_key='spconv_down2',
            conv_type='SparseConv3d')

-    def forward(self, voxel_features, coors, batch_size):
+    def forward(self, voxel_features: Tensor, coors: Tensor,
+                batch_size: int) -> Dict[str, Tensor]:
        """Forward of SparseUNet.

        Args:
@@ -152,8 +165,10 @@ class SparseUNet(BaseModule):

        return ret

-    def decoder_layer_forward(self, x_lateral, x_bottom, lateral_layer,
-                              merge_layer, upsample_layer):
+    def decoder_layer_forward(
+            self, x_lateral: SparseConvTensor, x_bottom: SparseConvTensor,
+            lateral_layer: SparseBasicBlock, merge_layer: SparseSequential,
+            upsample_layer: SparseSequential) -> SparseConvTensor:
        """Forward of upsample and residual block.

        Args:
@@ -176,7 +191,8 @@ class SparseUNet(BaseModule):
        return x

    @staticmethod
-    def reduce_channel(x, out_channels):
+    def reduce_channel(x: SparseConvTensor,
+                       out_channels: int) -> SparseConvTensor:
        """reduce channel for element-wise addition.

        Args:
@@ -194,7 +210,8 @@ class SparseUNet(BaseModule):
        x = replace_feature(x, features.view(n, out_channels, -1).sum(dim=2))
        return x

-    def make_encoder_layers(self, make_block, norm_cfg, in_channels):
+    def make_encoder_layers(self, make_block: nn.Module, norm_cfg: dict,
+                            in_channels: int) -> int:
        """make encoder layers using sparse convs.

        Args:
@@ -240,7 +257,8 @@ class SparseUNet(BaseModule):
            self.encoder_layers.add_module(stage_name, stage_layers)
        return out_channels

-    def make_decoder_layers(self, make_block, norm_cfg, in_channels):
+    def make_decoder_layers(self, make_block: nn.Module, norm_cfg: dict,
+                            in_channels: int) -> int:
        """make decoder layers using sparse convs.

        Args:

--- a/mmdet3d/models/middle_encoders/voxel_set_abstraction.py
+++ b/mmdet3d/models/middle_encoders/voxel_set_abstraction.py
@@ -7,12 +7,13 @@ import torch.nn as nn
 from mmcv.cnn import ConvModule
 from mmcv.ops.furthest_point_sample import furthest_point_sample
 from mmengine.model import BaseModule
+from torch import Tensor

 from mmdet3d.registry import MODELS
 from mmdet3d.utils import InstanceList


-def bilinear_interpolate_torch(inputs, x, y):
+def bilinear_interpolate_torch(inputs: Tensor, x: Tensor, y: Tensor) -> Tensor:
    """Bilinear interpolate for inputs."""
    x0 = torch.floor(x).long()
    x1 = x0 + 1

--- a/mmdet3d/models/voxel_encoders/pillar_encoder.py
+++ b/mmdet3d/models/voxel_encoders/pillar_encoder.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional, Tuple
+
 import torch
 from mmcv.cnn import build_norm_layer
 from mmcv.ops import DynamicScatter
-from torch import nn
+from torch import Tensor, nn

 from mmdet3d.registry import MODELS
 from .utils import PFNLayer, get_paddings_indicator
@@ -37,16 +39,18 @@ class PillarFeatureNet(nn.Module):
    """

    def __init__(self,
-                 in_channels=4,
-                 feat_channels=(64, ),
-                 with_distance=False,
-                 with_cluster_center=True,
-                 with_voxel_center=True,
-                 voxel_size=(0.2, 0.2, 4),
-                 point_cloud_range=(0, -40, -3, 70.4, 40, 1),
-                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
-                 mode='max',
-                 legacy=True):
+                 in_channels: Optional[int] = 4,
+                 feat_channels: Optional[tuple] = (64, ),
+                 with_distance: Optional[bool] = False,
+                 with_cluster_center: Optional[bool] = True,
+                 with_voxel_center: Optional[bool] = True,
+                 voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4),
+                 point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4,
+                                                              40, 1),
+                 norm_cfg: Optional[dict] = dict(
+                     type='BN1d', eps=1e-3, momentum=0.01),
+                 mode: Optional[str] = 'max',
+                 legacy: Optional[bool] = True):
        super(PillarFeatureNet, self).__init__()
        assert len(feat_channels) > 0
        self.legacy = legacy
@@ -88,7 +92,8 @@ class PillarFeatureNet(nn.Module):
        self.z_offset = self.vz / 2 + point_cloud_range[2]
        self.point_cloud_range = point_cloud_range

-    def forward(self, features, num_points, coors, *args, **kwargs):
+    def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
+                *args, **kwargs) -> Tensor:
        """Forward function.

        Args:
@@ -187,16 +192,18 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
    """

    def __init__(self,
-                 in_channels=4,
-                 feat_channels=(64, ),
-                 with_distance=False,
-                 with_cluster_center=True,
-                 with_voxel_center=True,
-                 voxel_size=(0.2, 0.2, 4),
-                 point_cloud_range=(0, -40, -3, 70.4, 40, 1),
-                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
-                 mode='max',
-                 legacy=True):
+                 in_channels: Optional[int] = 4,
+                 feat_channels: Optional[tuple] = (64, ),
+                 with_distance: Optional[bool] = False,
+                 with_cluster_center: Optional[bool] = True,
+                 with_voxel_center: Optional[bool] = True,
+                 voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4),
+                 point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4,
+                                                              40, 1),
+                 norm_cfg: Optional[dict] = dict(
+                     type='BN1d', eps=1e-3, momentum=0.01),
+                 mode: Optional[str] = 'max',
+                 legacy: Optional[bool] = True):
        super(DynamicPillarFeatureNet, self).__init__(
            in_channels,
            feat_channels,
@@ -229,7 +236,8 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
        self.cluster_scatter = DynamicScatter(
            voxel_size, point_cloud_range, average_points=True)

-    def map_voxel_center_to_point(self, pts_coors, voxel_mean, voxel_coors):
+    def map_voxel_center_to_point(self, pts_coors: Tensor, voxel_mean: Tensor,
+                                  voxel_coors: Tensor) -> Tensor:
        """Map the centers of voxels to its corresponding points.

        Args:
@@ -268,7 +276,7 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
        center_per_point = canvas[:, voxel_index.long()].t()
        return center_per_point

-    def forward(self, features, coors):
+    def forward(self, features: Tensor, coors: Tensor) -> Tensor:
        """Forward function.

        Args:

--- a/mmdet3d/models/voxel_encoders/utils.py
+++ b/mmdet3d/models/voxel_encoders/utils.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional
+
 import torch
 from mmcv.cnn import build_norm_layer
-from torch import nn
+from torch import Tensor, nn
 from torch.nn import functional as F


-def get_paddings_indicator(actual_num, max_num, axis=0):
+def get_paddings_indicator(actual_num: Tensor,
+                           max_num: Tensor,
+                           axis: int = 0) -> Tensor:
    """Create boolean mask by actually number of a padded tensor.

    Args:
@@ -46,11 +50,12 @@ class VFELayer(nn.Module):
    """

    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
-                 max_out=True,
-                 cat_max=True):
+                 in_channels: int,
+                 out_channels: int,
+                 norm_cfg: Optional[dict] = dict(
+                     type='BN1d', eps=1e-3, momentum=0.01),
+                 max_out: Optional[bool] = True,
+                 cat_max: Optional[bool] = True):
        super(VFELayer, self).__init__()
        self.cat_max = cat_max
        self.max_out = max_out
@@ -59,7 +64,7 @@ class VFELayer(nn.Module):
        self.norm = build_norm_layer(norm_cfg, out_channels)[1]
        self.linear = nn.Linear(in_channels, out_channels, bias=False)

-    def forward(self, inputs):
+    def forward(self, inputs: Tensor) -> Tensor:
        """Forward function.

        Args:
@@ -119,11 +124,12 @@ class PFNLayer(nn.Module):
    """

    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
-                 last_layer=False,
-                 mode='max'):
+                 in_channels: int,
+                 out_channels: int,
+                 norm_cfg: Optional[dict] = dict(
+                     type='BN1d', eps=1e-3, momentum=0.01),
+                 last_layer: Optional[bool] = False,
+                 mode: Optional[str] = 'max'):

        super().__init__()
        self.name = 'PFNLayer'
@@ -138,7 +144,10 @@ class PFNLayer(nn.Module):
        assert mode in ['max', 'avg']
        self.mode = mode

-    def forward(self, inputs, num_voxels=None, aligned_distance=None):
+    def forward(self,
+                inputs: Tensor,
+                num_voxels: Optional[Tensor] = None,
+                aligned_distance: Optional[Tensor] = None) -> Tensor:
        """Forward function.

        Args:

--- a/mmdet3d/models/voxel_encoders/voxel_encoder.py
+++ b/mmdet3d/models/voxel_encoders/voxel_encoder.py
@@ -57,13 +57,14 @@ class DynamicSimpleVFE(nn.Module):
    """

    def __init__(self,
-                 voxel_size=(0.2, 0.2, 4),
-                 point_cloud_range=(0, -40, -3, 70.4, 40, 1)):
+                 voxel_size: Tuple[float] = (0.2, 0.2, 4),
+                 point_cloud_range: Tuple[float] = (0, -40, -3, 70.4, 40, 1)):
        super(DynamicSimpleVFE, self).__init__()
        self.scatter = DynamicScatter(voxel_size, point_cloud_range, True)

    @torch.no_grad()
-    def forward(self, features, coors, *args, **kwargs):
+    def forward(self, features: Tensor, coors: Tensor, *args,
+                **kwargs) -> Tensor:
        """Forward function.

        Args:
@@ -114,17 +115,17 @@ class DynamicVFE(nn.Module):
    """

    def __init__(self,
-                 in_channels=4,
-                 feat_channels=[],
-                 with_distance=False,
-                 with_cluster_center=False,
-                 with_voxel_center=False,
-                 voxel_size=(0.2, 0.2, 4),
-                 point_cloud_range=(0, -40, -3, 70.4, 40, 1),
-                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
-                 mode='max',
-                 fusion_layer=None,
-                 return_point_feats=False):
+                 in_channels: int = 4,
+                 feat_channels: list = [],
+                 with_distance: bool = False,
+                 with_cluster_center: bool = False,
+                 with_voxel_center: bool = False,
+                 voxel_size: Tuple[float] = (0.2, 0.2, 4),
+                 point_cloud_range: Tuple[float] = (0, -40, -3, 70.4, 40, 1),
+                 norm_cfg: dict = dict(type='BN1d', eps=1e-3, momentum=0.01),
+                 mode: str = 'max',
+                 fusion_layer: dict = None,
+                 return_point_feats: bool = False):
        super(DynamicVFE, self).__init__()
        assert mode in ['avg', 'max']
        assert len(feat_channels) > 0
@@ -171,7 +172,8 @@ class DynamicVFE(nn.Module):
        if fusion_layer is not None:
            self.fusion_layer = MODELS.build(fusion_layer)

-    def map_voxel_center_to_point(self, pts_coors, voxel_mean, voxel_coors):
+    def map_voxel_center_to_point(self, pts_coors: Tensor, voxel_mean: Tensor,
+                                  voxel_coors: Tensor) -> Tensor:
        """Map voxel features to its corresponding points.

        Args:
@@ -214,13 +216,13 @@ class DynamicVFE(nn.Module):
        return center_per_point

    def forward(self,
-                features,
-                coors,
-                points=None,
-                img_feats=None,
-                img_metas=None,
+                features: Tensor,
+                coors: Tensor,
+                points: Optional[Sequence[Tensor]] = None,
+                img_feats: Optional[Sequence[Tensor]] = None,
+                img_metas: Optional[dict] = None,
                *args,
-                **kwargs):
+                **kwargs) -> tuple:
        """Forward functions.

        Args:
@@ -313,17 +315,17 @@ class HardVFE(nn.Module):
    """

    def __init__(self,
-                 in_channels=4,
-                 feat_channels=[],
-                 with_distance=False,
-                 with_cluster_center=False,
-                 with_voxel_center=False,
-                 voxel_size=(0.2, 0.2, 4),
-                 point_cloud_range=(0, -40, -3, 70.4, 40, 1),
-                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
-                 mode='max',
-                 fusion_layer=None,
-                 return_point_feats=False):
+                 in_channels: int = 4,
+                 feat_channels: list = [],
+                 with_distance: bool = False,
+                 with_cluster_center: bool = False,
+                 with_voxel_center: bool = False,
+                 voxel_size: Tuple[float] = (0.2, 0.2, 4),
+                 point_cloud_range: Tuple[float] = (0, -40, -3, 70.4, 40, 1),
+                 norm_cfg: dict = dict(type='BN1d', eps=1e-3, momentum=0.01),
+                 mode: str = 'max',
+                 fusion_layer: dict = None,
+                 return_point_feats: bool = False):
        super(HardVFE, self).__init__()
        assert len(feat_channels) > 0
        if with_cluster_center:
@@ -379,13 +381,13 @@ class HardVFE(nn.Module):
            self.fusion_layer = MODELS.build(fusion_layer)

    def forward(self,
-                features,
-                num_points,
-                coors,
-                img_feats=None,
-                img_metas=None,
+                features: Tensor,
+                num_points: Tensor,
+                coors: Tensor,
+                img_feats: Optional[Sequence[Tensor]] = None,
+                img_metas: Optional[dict] = None,
                *args,
-                **kwargs):
+                **kwargs) -> tuple:
        """Forward functions.

        Args:
@@ -448,8 +450,10 @@ class HardVFE(nn.Module):

        return voxel_feats

-    def fusion_with_mask(self, features, mask, voxel_feats, coors, img_feats,
-                         img_metas):
+    def fusion_with_mask(self, features: Tensor, mask: Tensor,
+                         voxel_feats: Tensor, coors: Tensor,
+                         img_feats: Sequence[Tensor],
+                         img_metas: Sequence[dict]) -> Tensor:
        """Fuse image and point features with mask.

        Args: