Commit adb17824 authored by xiangxu-0103's avatar xiangxu-0103 Committed by ZwwWayne
Browse files

[Fix]: fix semantic segmentation related bugs (#1909)

delete whitespace

update docs

remove unnecessary optional docs

update docs

add mmengine assertion

add docstring

fix mminstall

update mmengine version

fix

[Fix]: fix semantic segmentation related bugs (#1909)

fix semantic seg

fix lint

remove unused imports

fix

update pointnet2-s3dis config

update data_list according to scene_idxs

remove useless function

fix bug lack `eval_ann_info` during evaluation

fix bug

update doc

fix lint

update docs

Update det3d_dataset.py

update docstrings

update docs

fix lint

update docs

fix

fix

fix lint
parent b37dc416
...@@ -166,24 +166,25 @@ class _S3DISSegDataset(Seg3DDataset): ...@@ -166,24 +166,25 @@ class _S3DISSegDataset(Seg3DDataset):
wrapper to concat all the provided data in different areas. wrapper to concat all the provided data in different areas.
Args: Args:
data_root (str): Path of dataset root. data_root (str, optional): Path of dataset root, Defaults to None.
ann_file (str): Path of annotation file. ann_file (str): Path of annotation file. Defaults to ''.
pipeline (list[dict], optional): Pipeline used for data processing. metainfo (dict, optional): Meta information for dataset, such as class
Defaults to None. information. Defaults to None.
classes (tuple[str], optional): Classes used in the dataset. data_prefix (dict): Prefix for training data. Defaults to
Defaults to None. dict(pts='points', instance_mask='', semantic_mask='').
palette (list[list[int]], optional): The palette of segmentation map. pipeline (list[dict]): Pipeline used for data processing.
Defaults to None. Defaults to [].
modality (dict, optional): Modality to specify the sensor data used modality (dict): Modality to specify the sensor data used as input.
as input. Defaults to None. Defaults to dict(use_lidar=True, use_camera=False).
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
ignore_index (int, optional): The label index to be ignored, e.g. ignore_index (int, optional): The label index to be ignored, e.g.
unannotated points. If None is given, set to len(self.CLASSES). unannotated points. If None is given, set to len(self.CLASSES) to
be consistent with PointSegClassMapping function in pipeline.
Defaults to None. Defaults to None.
scene_idxs (np.ndarray | str, optional): Precomputed index to load scene_idxs (np.ndarray | str, optional): Precomputed index to load
data. For scenes with many points, we may sample it several times. data. For scenes with many points, we may sample it several times.
Defaults to None. Defaults to None.
test_mode (bool): Whether the dataset is in test mode.
Defaults to False.
""" """
METAINFO = { METAINFO = {
'CLASSES': 'CLASSES':
...@@ -207,9 +208,9 @@ class _S3DISSegDataset(Seg3DDataset): ...@@ -207,9 +208,9 @@ class _S3DISSegDataset(Seg3DDataset):
pts='points', img='', instance_mask='', semantic_mask=''), pts='points', img='', instance_mask='', semantic_mask=''),
pipeline: List[Union[dict, Callable]] = [], pipeline: List[Union[dict, Callable]] = [],
modality: dict = dict(use_lidar=True, use_camera=False), modality: dict = dict(use_lidar=True, use_camera=False),
ignore_index=None, ignore_index: Optional[int] = None,
scene_idxs=None, scene_idxs: Optional[Union[np.ndarray, str]] = None,
test_mode=False, test_mode: bool = False,
**kwargs) -> None: **kwargs) -> None:
super().__init__( super().__init__(
data_root=data_root, data_root=data_root,
...@@ -250,37 +251,40 @@ class S3DISSegDataset(_S3DISSegDataset): ...@@ -250,37 +251,40 @@ class S3DISSegDataset(_S3DISSegDataset):
data downloading. data downloading.
Args: Args:
data_root (str): Path of dataset root. data_root (str, optional): Path of dataset root. Defaults to None.
ann_files (list[str]): Path of several annotation files. ann_files (list[str]): Path of several annotation files.
pipeline (list[dict], optional): Pipeline used for data processing. Defaults to ''.
Defaults to None. metainfo (dict, optional): Meta information for dataset, such as class
classes (tuple[str], optional): Classes used in the dataset. information. Defaults to None.
Defaults to None. data_prefix (dict): Prefix for training data. Defaults to
palette (list[list[int]], optional): The palette of segmentation map. dict(pts='points', instance_mask='', semantic_mask='').
Defaults to None. pipeline (list[dict]): Pipeline used for data processing.
modality (dict, optional): Modality to specify the sensor data used Defaults to [].
as input. Defaults to None. modality (dict): Modality to specify the sensor data used as input.
test_mode (bool, optional): Whether the dataset is in test mode. Defaults to dict(use_lidar=True, use_camera=False).
Defaults to False.
ignore_index (int, optional): The label index to be ignored, e.g. ignore_index (int, optional): The label index to be ignored, e.g.
unannotated points. If None is given, set to len(self.CLASSES). unannotated points. If None is given, set to len(self.CLASSES) to
be consistent with PointSegClassMapping function in pipeline.
Defaults to None. Defaults to None.
scene_idxs (list[np.ndarray] | list[str], optional): Precomputed index scene_idxs (list[np.ndarray] | list[str], optional): Precomputed index
to load data. For scenes with many points, we may sample it several to load data. For scenes with many points, we may sample it
times. Defaults to None. several times. Defaults to None.
test_mode (bool): Whether the dataset is in test mode.
Defaults to False.
""" """
def __init__(self, def __init__(self,
data_root: Optional[str] = None, data_root: Optional[str] = None,
ann_files: str = '', ann_files: List[str] = '',
metainfo: Optional[dict] = None, metainfo: Optional[dict] = None,
data_prefix: dict = dict( data_prefix: dict = dict(
pts='points', img='', instance_mask='', semantic_mask=''), pts='points', img='', instance_mask='', semantic_mask=''),
pipeline: List[Union[dict, Callable]] = [], pipeline: List[Union[dict, Callable]] = [],
modality: dict = dict(use_lidar=True, use_camera=False), modality: dict = dict(use_lidar=True, use_camera=False),
ignore_index=None, ignore_index: Optional[int] = None,
scene_idxs=None, scene_idxs: Optional[Union[List[np.ndarray],
test_mode=False, List[str]]] = None,
test_mode: bool = False,
**kwargs) -> None: **kwargs) -> None:
# make sure that ann_files and scene_idxs have same length # make sure that ann_files and scene_idxs have same length
...@@ -318,13 +322,12 @@ class S3DISSegDataset(_S3DISSegDataset): ...@@ -318,13 +322,12 @@ class S3DISSegDataset(_S3DISSegDataset):
# data_list and scene_idxs need to be concat # data_list and scene_idxs need to be concat
self.concat_data_list([dst.data_list for dst in datasets]) self.concat_data_list([dst.data_list for dst in datasets])
self.concat_scene_idxs([dst.scene_idxs for dst in datasets])
# set group flag for the sampler # set group flag for the sampler
if not self.test_mode: if not self.test_mode:
self._set_group_flag() self._set_group_flag()
def concat_data_list(self, data_lists): def concat_data_list(self, data_lists: List[List[dict]]) -> List[dict]:
"""Concat data_list from several datasets to form self.data_list. """Concat data_list from several datasets to form self.data_list.
Args: Args:
...@@ -334,21 +337,6 @@ class S3DISSegDataset(_S3DISSegDataset): ...@@ -334,21 +337,6 @@ class S3DISSegDataset(_S3DISSegDataset):
data for data_list in data_lists for data in data_list data for data_list in data_lists for data in data_list
] ]
def concat_scene_idxs(self, scene_idxs):
"""Concat scene_idxs from several datasets to form self.scene_idxs.
Needs to manually add offset to scene_idxs[1, 2, ...].
Args:
scene_idxs (list[np.ndarray])
"""
self.scene_idxs = np.array([], dtype=np.int32)
offset = 0
for one_scene_idxs in scene_idxs:
self.scene_idxs = np.concatenate(
[self.scene_idxs, one_scene_idxs + offset]).astype(np.int32)
offset = np.unique(self.scene_idxs).max() + 1
@staticmethod @staticmethod
def _duplicate_to_list(x, num): def _duplicate_to_list(x, num):
"""Repeat x `num` times to form a list.""" """Repeat x `num` times to form a list."""
......
...@@ -26,13 +26,13 @@ class ScanNetDataset(Det3DDataset): ...@@ -26,13 +26,13 @@ class ScanNetDataset(Det3DDataset):
metainfo (dict, optional): Meta information for dataset, such as class metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None. information. Defaults to None.
data_prefix (dict): Prefix for data. Defaults to data_prefix (dict): Prefix for data. Defaults to
`dict(pts='points', dict(pts='points',
pts_isntance_mask='instance_mask', pts_isntance_mask='instance_mask',
pts_semantic_mask='semantic_mask')`. pts_semantic_mask='semantic_mask').
pipeline (list[dict]): Pipeline used for data processing. pipeline (list[dict]): Pipeline used for data processing.
Defaults to None. Defaults to [].
modality (dict): Modality to specify the sensor data used modality (dict): Modality to specify the sensor data used as input.
as input. Defaults to None. Defaults to dict(use_camera=False, use_lidar=True).
box_type_3d (str): Type of 3D box of this dataset. box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`. to its original format then converted them to `box_type_3d`.
...@@ -41,8 +41,10 @@ class ScanNetDataset(Det3DDataset): ...@@ -41,8 +41,10 @@ class ScanNetDataset(Det3DDataset):
- 'LiDAR': Box in LiDAR coordinates. - 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset. - 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates. - 'Camera': Box in camera coordinates.
filter_empty_gt (bool): Whether to filter empty GT. filter_empty_gt (bool): Whether to filter the data with empty GT.
Defaults to True. If it's set to be True, the example with empty annotations after
data pipeline will be dropped and a random example will be chosen
in `__getitem__`. Defaults to True.
test_mode (bool): Whether the dataset is in test mode. test_mode (bool): Whether the dataset is in test mode.
Defaults to False. Defaults to False.
""" """
...@@ -71,7 +73,7 @@ class ScanNetDataset(Det3DDataset): ...@@ -71,7 +73,7 @@ class ScanNetDataset(Det3DDataset):
box_type_3d: str = 'Depth', box_type_3d: str = 'Depth',
filter_empty_gt: bool = True, filter_empty_gt: bool = True,
test_mode: bool = False, test_mode: bool = False,
**kwargs): **kwargs) -> None:
# construct seg_label_mapping for semantic mask # construct seg_label_mapping for semantic mask
seg_max_cat_id = len(self.METAINFO['seg_all_class_ids']) seg_max_cat_id = len(self.METAINFO['seg_all_class_ids'])
...@@ -128,8 +130,8 @@ class ScanNetDataset(Det3DDataset): ...@@ -128,8 +130,8 @@ class ScanNetDataset(Det3DDataset):
info (dict): Raw info dict. info (dict): Raw info dict.
Returns: Returns:
dict: Data information that will be passed to the data dict: Has `ann_info` in training stage. And
preprocessing transforms. It includes the following keys: all path has been converted to absolute path.
""" """
info['axis_align_matrix'] = self._get_axis_align_matrix(info) info['axis_align_matrix'] = self._get_axis_align_matrix(info)
info['pts_instance_mask_path'] = osp.join( info['pts_instance_mask_path'] = osp.join(
...@@ -146,13 +148,13 @@ class ScanNetDataset(Det3DDataset): ...@@ -146,13 +148,13 @@ class ScanNetDataset(Det3DDataset):
return info return info
def parse_ann_info(self, info: dict) -> dict: def parse_ann_info(self, info: dict) -> dict:
"""Process the `instances` in data info to `ann_info` """Process the `instances` in data info to `ann_info`.
Args: Args:
info (dict): Info dict. info (dict): Info dict.
Returns: Returns:
dict: Processed `ann_info` dict: Processed `ann_info`.
""" """
ann_info = super().parse_ann_info(info) ann_info = super().parse_ann_info(info)
# empty gt # empty gt
...@@ -181,24 +183,25 @@ class ScanNetSegDataset(Seg3DDataset): ...@@ -181,24 +183,25 @@ class ScanNetSegDataset(Seg3DDataset):
for data downloading. for data downloading.
Args: Args:
data_root (str): Path of dataset root. data_root (str, optional): Path of dataset root. Defaults to None.
ann_file (str): Path of annotation file. ann_file (str): Path of annotation file. Defaults to ''.
pipeline (list[dict], optional): Pipeline used for data processing. pipeline (list[dict]): Pipeline used for data processing.
Defaults to None. Defaults to [].
classes (tuple[str], optional): Classes used in the dataset. metainfo (dict, optional): Meta information for dataset, such as class
Defaults to None. information. Defaults to None.
palette (list[list[int]], optional): The palette of segmentation map. data_prefix (dict): Prefix for training data. Defaults to
Defaults to None. dict(pts='velodyne', img='', instance_mask='', semantic_mask='').
modality (dict, optional): Modality to specify the sensor data used modality (dict): Modality to specify the sensor data used as input.
as input. Defaults to None. Defaults to dict(use_lidar=True, use_camera=False).
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
ignore_index (int, optional): The label index to be ignored, e.g. ignore_index (int, optional): The label index to be ignored, e.g.
unannotated points. If None is given, set to len(self.CLASSES). unannotated points. If None is given, set to len(self.CLASSES) to
be consistent with PointSegClassMapping function in pipeline.
Defaults to None. Defaults to None.
scene_idxs (np.ndarray | str, optional): Precomputed index to load scene_idxs (np.ndarray | str, optional): Precomputed index to load
data. For scenes with many points, we may sample it several times. data. For scenes with many points, we may sample it several times.
Defaults to None. Defaults to None.
test_mode (bool): Whether the dataset is in test mode.
Defaults to False.
""" """
METAINFO = { METAINFO = {
'CLASSES': 'CLASSES':
...@@ -242,9 +245,9 @@ class ScanNetSegDataset(Seg3DDataset): ...@@ -242,9 +245,9 @@ class ScanNetSegDataset(Seg3DDataset):
pts='points', img='', instance_mask='', semantic_mask=''), pts='points', img='', instance_mask='', semantic_mask=''),
pipeline: List[Union[dict, Callable]] = [], pipeline: List[Union[dict, Callable]] = [],
modality: dict = dict(use_lidar=True, use_camera=False), modality: dict = dict(use_lidar=True, use_camera=False),
ignore_index=None, ignore_index: Optional[int] = None,
scene_idxs=None, scene_idxs: Optional[Union[np.ndarray, str]] = None,
test_mode=False, test_mode: bool = False,
**kwargs) -> None: **kwargs) -> None:
super().__init__( super().__init__(
data_root=data_root, data_root=data_root,
...@@ -315,10 +318,10 @@ class ScanNetInstanceSegDataset(Seg3DDataset): ...@@ -315,10 +318,10 @@ class ScanNetInstanceSegDataset(Seg3DDataset):
pts='points', img='', instance_mask='', semantic_mask=''), pts='points', img='', instance_mask='', semantic_mask=''),
pipeline: List[Union[dict, Callable]] = [], pipeline: List[Union[dict, Callable]] = [],
modality: dict = dict(use_lidar=True, use_camera=False), modality: dict = dict(use_lidar=True, use_camera=False),
test_mode=False, test_mode: bool = False,
ignore_index=None, ignore_index: Optional[int] = None,
scene_idxs=None, scene_idxs: Optional[Union[np.ndarray, str]] = None,
file_client_args=dict(backend='disk'), file_client_args: dict = dict(backend='disk'),
**kwargs) -> None: **kwargs) -> None:
super().__init__( super().__init__(
data_root=data_root, data_root=data_root,
......
...@@ -16,24 +16,20 @@ class Seg3DDataset(BaseDataset): ...@@ -16,24 +16,20 @@ class Seg3DDataset(BaseDataset):
This is the base dataset of ScanNet, S3DIS and SemanticKITTI dataset. This is the base dataset of ScanNet, S3DIS and SemanticKITTI dataset.
Args: Args:
data_root (str): Path of dataset root. data_root (str, optional): Path of dataset root. Defaults to None.
ann_file (str): Path of annotation file. ann_file (str): Path of annotation file. Defaults to ''.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
metainfo (dict, optional): Meta information for dataset, such as class metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None. information. Defaults to None.
data_prefix (dict, optional): Prefix for training data. Defaults to data_prefix (dict): Prefix for training data. Defaults to
dict(pts='velodyne', img='', instance_mask='', semantic_mask=''). dict(pts='velodyne', img='', instance_mask='', semantic_mask='').
pipeline (list[dict], optional): Pipeline used for data processing. pipeline (list[dict]): Pipeline used for data processing.
Defaults to None. Defaults to [].
modality (dict, optional): Modality to specify the sensor data used modality (dict): Modality to specify the sensor data used
as input, it usually has following keys. as input, it usually has following keys:
- use_camera: bool - use_camera: bool
- use_lidar: bool - use_lidar: bool
Defaults to `dict(use_lidar=True, use_camera=False)` Defaults to dict(use_lidar=True, use_camera=False).
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
ignore_index (int, optional): The label index to be ignored, e.g. ignore_index (int, optional): The label index to be ignored, e.g.
unannotated points. If None is given, set to len(self.CLASSES) to unannotated points. If None is given, set to len(self.CLASSES) to
be consistent with PointSegClassMapping function in pipeline. be consistent with PointSegClassMapping function in pipeline.
...@@ -41,11 +37,13 @@ class Seg3DDataset(BaseDataset): ...@@ -41,11 +37,13 @@ class Seg3DDataset(BaseDataset):
scene_idxs (np.ndarray | str, optional): Precomputed index to load scene_idxs (np.ndarray | str, optional): Precomputed index to load
data. For scenes with many points, we may sample it several times. data. For scenes with many points, we may sample it several times.
Defaults to None. Defaults to None.
load_eval_anns (bool): Whether to load annotations test_mode (bool): Whether the dataset is in test mode.
in test_mode, the annotation will be save in Defaults to False.
`eval_ann_infos`, which can be use in Evaluator. load_eval_anns (bool): Whether to load annotations in test_mode,
the annotation will be save in `eval_ann_infos`, which can be used
in Evaluator. Defaults to True.
file_client_args (dict): Configuration of file client. file_client_args (dict): Configuration of file client.
Defaults to `dict(backend='disk')`. Defaults to dict(backend='disk').
""" """
METAINFO = { METAINFO = {
'CLASSES': None, # names of all classes data used for the task 'CLASSES': None, # names of all classes data used for the task
...@@ -66,7 +64,7 @@ class Seg3DDataset(BaseDataset): ...@@ -66,7 +64,7 @@ class Seg3DDataset(BaseDataset):
pipeline: List[Union[dict, Callable]] = [], pipeline: List[Union[dict, Callable]] = [],
modality: dict = dict(use_lidar=True, use_camera=False), modality: dict = dict(use_lidar=True, use_camera=False),
ignore_index: Optional[int] = None, ignore_index: Optional[int] = None,
scene_idxs: Optional[str] = None, scene_idxs: Optional[Union[str, np.ndarray]] = None,
test_mode: bool = False, test_mode: bool = False,
load_eval_anns: bool = True, load_eval_anns: bool = True,
file_client_args: dict = dict(backend='disk'), file_client_args: dict = dict(backend='disk'),
...@@ -121,6 +119,7 @@ class Seg3DDataset(BaseDataset): ...@@ -121,6 +119,7 @@ class Seg3DDataset(BaseDataset):
self.metainfo['seg_label_mapping'] = self.seg_label_mapping self.metainfo['seg_label_mapping'] = self.seg_label_mapping
self.scene_idxs = self.get_scene_idxs(scene_idxs) self.scene_idxs = self.get_scene_idxs(scene_idxs)
self.data_list = [self.data_list[i] for i in self.scene_idxs]
# set group flag for the sampler # set group flag for the sampler
if not self.test_mode: if not self.test_mode:
...@@ -141,7 +140,6 @@ class Seg3DDataset(BaseDataset): ...@@ -141,7 +140,6 @@ class Seg3DDataset(BaseDataset):
new_classes (list, tuple, optional): The new classes name from new_classes (list, tuple, optional): The new classes name from
metainfo. Default to None. metainfo. Default to None.
Returns: Returns:
tuple: The mapping from old classes in cls.METAINFO to tuple: The mapping from old classes in cls.METAINFO to
new classes in metainfo new classes in metainfo
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import Callable, List, Optional, Union from typing import Callable, List, Optional, Union
import numpy as np
from mmdet3d.registry import DATASETS from mmdet3d.registry import DATASETS
from .seg3d_dataset import Seg3DDataset from .seg3d_dataset import Seg3DDataset
...@@ -14,26 +16,28 @@ class SemanticKITTIDataset(Seg3DDataset): ...@@ -14,26 +16,28 @@ class SemanticKITTIDataset(Seg3DDataset):
for data downloading for data downloading
Args: Args:
data_root (str): Path of dataset root. data_root (str, optional): Path of dataset root. Defaults to None.
ann_file (str): Path of annotation file. ann_file (str): Path of annotation file. Defaults to ''.
pipeline (list[dict], optional): Pipeline used for data processing. metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None.
data_prefix (dict): Prefix for training data. Defaults to
dict(pts='points', img='', instance_mask='', semantic_mask='').
pipeline (list[dict]): Pipeline used for data processing.
Defaults to [].
modality (dict): Modality to specify the sensor data used as input,
it usually has following keys:
- use_camera: bool
- use_lidar: bool
Defaults to dict(use_lidar=True, use_camera=False).
ignore_index (int, optional): The label index to be ignored, e.g.
unannotated points. If None is given, set to len(self.CLASSES) to
be consistent with PointSegClassMapping function in pipeline.
Defaults to None. Defaults to None.
classes (tuple[str], optional): Classes used in the dataset. scene_idxs (np.ndarray | str, optional): Precomputed index to load
data. For scenes with many points, we may sample it several times.
Defaults to None. Defaults to None.
modality (dict, optional): Modality to specify the sensor data used test_mode (bool): Whether the dataset is in test mode.
as input. Defaults to None.
box_type_3d (str, optional): NO 3D box for this dataset.
You can choose any type
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False. Defaults to False.
""" """
METAINFO = { METAINFO = {
...@@ -55,9 +59,9 @@ class SemanticKITTIDataset(Seg3DDataset): ...@@ -55,9 +59,9 @@ class SemanticKITTIDataset(Seg3DDataset):
pts='points', img='', instance_mask='', semantic_mask=''), pts='points', img='', instance_mask='', semantic_mask=''),
pipeline: List[Union[dict, Callable]] = [], pipeline: List[Union[dict, Callable]] = [],
modality: dict = dict(use_lidar=True, use_camera=False), modality: dict = dict(use_lidar=True, use_camera=False),
ignore_index=None, ignore_index: Optional[int] = None,
scene_idxs=None, scene_idxs: Optional[Union[str, np.ndarray]] = None,
test_mode=False, test_mode: bool = False,
**kwargs) -> None: **kwargs) -> None:
super().__init__( super().__init__(
......
...@@ -24,13 +24,13 @@ class SUNRGBDDataset(Det3DDataset): ...@@ -24,13 +24,13 @@ class SUNRGBDDataset(Det3DDataset):
ann_file (str): Path of annotation file. ann_file (str): Path of annotation file.
metainfo (dict, optional): Meta information for dataset, such as class metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None. information. Defaults to None.
data_prefix (dict, optiona;): Prefix for data. Defaults to data_prefix (dict): Prefix for data. Defaults to
dict(pts='points',img='sunrgbd_trainval'). dict(pts='points',img='sunrgbd_trainval').
pipeline (list[dict], optional): Pipeline used for data processing. pipeline (list[dict]): Pipeline used for data processing.
Defaults to None. Defaults to [].
modality (dict, optional): Modality to specify the sensor data used modality (dict): Modality to specify the sensor data used as input.
as input. Defaults to dict(use_camera=True, use_lidar=True). Defaults to dict(use_camera=True, use_lidar=True).
default_cam_key (str, optional): The default camera name adopted. default_cam_key (str): The default camera name adopted.
Defaults to 'CAM0'. Defaults to 'CAM0'.
box_type_3d (str): Type of 3D box of this dataset. box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box Based on the `box_type_3d`, the dataset will encapsulate the box
...@@ -40,9 +40,9 @@ class SUNRGBDDataset(Det3DDataset): ...@@ -40,9 +40,9 @@ class SUNRGBDDataset(Det3DDataset):
- 'LiDAR': Box in LiDAR coordinates. - 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset. - 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates. - 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT. filter_empty_gt (bool): Whether to filter empty GT.
Defaults to True. Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode. test_mode (bool): Whether the dataset is in test mode.
Defaults to False. Defaults to False.
""" """
METAINFO = { METAINFO = {
...@@ -58,11 +58,11 @@ class SUNRGBDDataset(Det3DDataset): ...@@ -58,11 +58,11 @@ class SUNRGBDDataset(Det3DDataset):
pts='points', img='sunrgbd_trainval/image'), pts='points', img='sunrgbd_trainval/image'),
pipeline: List[Union[dict, Callable]] = [], pipeline: List[Union[dict, Callable]] = [],
default_cam_key: str = 'CAM0', default_cam_key: str = 'CAM0',
modality=dict(use_camera=True, use_lidar=True), modality: dict = dict(use_camera=True, use_lidar=True),
box_type_3d: str = 'Depth', box_type_3d: str = 'Depth',
filter_empty_gt: bool = True, filter_empty_gt: bool = True,
test_mode: bool = False, test_mode: bool = False,
**kwargs): **kwargs) -> None:
super().__init__( super().__init__(
data_root=data_root, data_root=data_root,
ann_file=ann_file, ann_file=ann_file,
...@@ -121,7 +121,7 @@ class SUNRGBDDataset(Det3DDataset): ...@@ -121,7 +121,7 @@ class SUNRGBDDataset(Det3DDataset):
return info return info
def parse_ann_info(self, info: dict) -> dict: def parse_ann_info(self, info: dict) -> dict:
"""Process the `instances` in data info to `ann_info` """Process the `instances` in data info to `ann_info`.
Args: Args:
info (dict): Info dict. info (dict): Info dict.
......
...@@ -18,9 +18,8 @@ class BatchSampler: ...@@ -18,9 +18,8 @@ class BatchSampler:
sample_list (list[dict]): List of samples. sample_list (list[dict]): List of samples.
name (str, optional): The category of samples. Defaults to None. name (str, optional): The category of samples. Defaults to None.
epoch (int, optional): Sampling epoch. Defaults to None. epoch (int, optional): Sampling epoch. Defaults to None.
shuffle (bool, optional): Whether to shuffle indices. shuffle (bool): Whether to shuffle indices. Defaults to False.
Defaults to False. drop_reminder (bool): Drop reminder. Defaults to False.
drop_reminder (bool, optional): Drop reminder. Defaults to False.
""" """
def __init__(self, def __init__(self,
...@@ -90,12 +89,11 @@ class DataBaseSampler(object): ...@@ -90,12 +89,11 @@ class DataBaseSampler(object):
prepare (dict): Name of preparation functions and the input value. prepare (dict): Name of preparation functions and the input value.
sample_groups (dict): Sampled classes and numbers. sample_groups (dict): Sampled classes and numbers.
classes (list[str], optional): List of classes. Defaults to None. classes (list[str], optional): List of classes. Defaults to None.
points_loader(dict, optional): Config of points loader. Defaults to points_loader (dict): Config of points loader. Defaults to
dict(type='LoadPointsFromFile', load_dim=4, use_dim=[0, 1, 2, 3]). dict(type='LoadPointsFromFile', load_dim=4, use_dim=[0, 1, 2, 3]).
file_client_args (dict, optional): Config dict of file clients, file_client_args (dict): Arguments to instantiate a FileClient.
refer to See :class:`mmengine.fileio.FileClient` for details.
https://github.com/open-mmlab/mmengine/blob/main/mmengine/fileio/file_client.py Defaults to dict(backend='disk').
for more details. Defaults to dict(backend='disk').
""" """
def __init__( def __init__(
......
...@@ -102,7 +102,7 @@ class Pack3DDetInputs(BaseTransform): ...@@ -102,7 +102,7 @@ class Pack3DDetInputs(BaseTransform):
- points - points
- img - img
- 'data_samples' (obj:`Det3DDataSample`): The annotation info of - 'data_samples' (:obj:`Det3DDataSample`): The annotation info of
the sample. the sample.
""" """
# augtest # augtest
......
...@@ -20,19 +20,17 @@ class LoadMultiViewImageFromFiles(BaseTransform): ...@@ -20,19 +20,17 @@ class LoadMultiViewImageFromFiles(BaseTransform):
Expects results['img_filename'] to be a list of filenames. Expects results['img_filename'] to be a list of filenames.
Args: Args:
to_float32 (bool, optional): Whether to convert the img to float32. to_float32 (bool): Whether to convert the img to float32.
Defaults to False. Defaults to False.
color_type (str, optional): Color type of the file. color_type (str): Color type of the file. Defaults to 'unchanged'.
Defaults to 'unchanged'. file_client_args (dict): Arguments to instantiate a FileClient.
file_client_args (dict): Config dict of file clients, See :class:`mmengine.fileio.FileClient` for details.
refer to Defaults to dict(backend='disk').
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py num_views (int): Number of view in a frame. Defaults to 5.
for more details. Defaults to dict(backend='disk'). num_ref_frames (int): Number of frame in loading. Defaults to -1.
num_views (int): num of view in a frame. Default to 5. test_mode (bool): Whether is test mode in loading. Defaults to False.
num_ref_frames (int): num of frame in loading. Default to -1. set_default_scale (bool): Whether to set default scale.
test_mode (bool): Whether is test mode in loading. Default to False. Defaults to True.
set_default_scale (bool): Whether to set default scale. Default to
True.
""" """
def __init__(self, def __init__(self,
...@@ -210,7 +208,7 @@ class LoadMultiViewImageFromFiles(BaseTransform): ...@@ -210,7 +208,7 @@ class LoadMultiViewImageFromFiles(BaseTransform):
results['num_ref_frames'] = self.num_ref_frames results['num_ref_frames'] = self.num_ref_frames
return results return results
def __repr__(self): def __repr__(self) -> str:
"""str: Return a string that describes the module.""" """str: Return a string that describes the module."""
repr_str = self.__class__.__name__ repr_str = self.__class__.__name__
repr_str += f'(to_float32={self.to_float32}, ' repr_str += f'(to_float32={self.to_float32}, '
...@@ -276,22 +274,17 @@ class LoadPointsFromMultiSweeps(BaseTransform): ...@@ -276,22 +274,17 @@ class LoadPointsFromMultiSweeps(BaseTransform):
This is usually used for nuScenes dataset to utilize previous sweeps. This is usually used for nuScenes dataset to utilize previous sweeps.
Args: Args:
sweeps_num (int, optional): Number of sweeps. Defaults to 10. sweeps_num (int): Number of sweeps. Defaults to 10.
load_dim (int, optional): Dimension number of the loaded points. load_dim (int): Dimension number of the loaded points. Defaults to 5.
Defaults to 5. use_dim (list[int]): Which dimension to use. Defaults to [0, 1, 2, 4].
use_dim (list[int], optional): Which dimension to use. file_client_args (dict): Arguments to instantiate a FileClient.
Defaults to [0, 1, 2, 4]. See :class:`mmengine.fileio.FileClient` for details.
file_client_args (dict, optional): Config dict of file clients, Defaults to dict(backend='disk').
refer to pad_empty_sweeps (bool): Whether to repeat keyframe when
https://github.com/open-mmlab/mmengine/blob/main/mmengine/fileio/file_client.py
for more details. Defaults to dict(backend='disk').
pad_empty_sweeps (bool, optional): Whether to repeat keyframe when
sweeps is empty. Defaults to False. sweeps is empty. Defaults to False.
remove_close (bool, optional): Whether to remove close points. remove_close (bool): Whether to remove close points. Defaults to False.
Defaults to False. test_mode (bool): If `test_mode=True`, it will not randomly sample
test_mode (bool, optional): If `test_mode=True`, it will not sweeps but select the nearest N frames. Defaults to False.
randomly sample sweeps but select the nearest N frames.
Defaults to False.
""" """
def __init__(self, def __init__(self,
...@@ -336,11 +329,11 @@ class LoadPointsFromMultiSweeps(BaseTransform): ...@@ -336,11 +329,11 @@ class LoadPointsFromMultiSweeps(BaseTransform):
def _remove_close(self, def _remove_close(self,
points: Union[np.ndarray, BasePoints], points: Union[np.ndarray, BasePoints],
radius: float = 1.0) -> Union[np.ndarray, BasePoints]: radius: float = 1.0) -> Union[np.ndarray, BasePoints]:
"""Removes point too close within a certain radius from origin. """Remove point too close within a certain radius from origin.
Args: Args:
points (np.ndarray | :obj:`BasePoints`): Sweep points. points (np.ndarray | :obj:`BasePoints`): Sweep points.
radius (float, optional): Radius below which points are removed. radius (float): Radius below which points are removed.
Defaults to 1.0. Defaults to 1.0.
Returns: Returns:
...@@ -414,7 +407,7 @@ class LoadPointsFromMultiSweeps(BaseTransform): ...@@ -414,7 +407,7 @@ class LoadPointsFromMultiSweeps(BaseTransform):
results['points'] = points results['points'] = points
return results return results
def __repr__(self): def __repr__(self) -> str:
"""str: Return a string that describes the module.""" """str: Return a string that describes the module."""
return f'{self.__class__.__name__}(sweeps_num={self.sweeps_num})' return f'{self.__class__.__name__}(sweeps_num={self.sweeps_num})'
...@@ -465,7 +458,7 @@ class PointSegClassMapping(BaseTransform): ...@@ -465,7 +458,7 @@ class PointSegClassMapping(BaseTransform):
return results return results
def __repr__(self): def __repr__(self) -> str:
"""str: Return a string that describes the module.""" """str: Return a string that describes the module."""
repr_str = self.__class__.__name__ repr_str = self.__class__.__name__
return repr_str return repr_str
...@@ -505,7 +498,7 @@ class NormalizePointsColor(BaseTransform): ...@@ -505,7 +498,7 @@ class NormalizePointsColor(BaseTransform):
input_dict['points'] = points input_dict['points'] = points
return input_dict return input_dict
def __repr__(self): def __repr__(self) -> str:
"""str: Return a string that describes the module.""" """str: Return a string that describes the module."""
repr_str = self.__class__.__name__ repr_str = self.__class__.__name__
repr_str += f'(color_mean={self.color_mean})' repr_str += f'(color_mean={self.color_mean})'
...@@ -533,19 +526,15 @@ class LoadPointsFromFile(BaseTransform): ...@@ -533,19 +526,15 @@ class LoadPointsFromFile(BaseTransform):
- 'LIDAR': Points in LiDAR coordinates. - 'LIDAR': Points in LiDAR coordinates.
- 'DEPTH': Points in depth coordinates, usually for indoor dataset. - 'DEPTH': Points in depth coordinates, usually for indoor dataset.
- 'CAMERA': Points in camera coordinates. - 'CAMERA': Points in camera coordinates.
load_dim (int, optional): The dimension of the loaded points. load_dim (int): The dimension of the loaded points. Defaults to 6.
Defaults to 6. use_dim (list[int] | int): Which dimensions of the points to use.
use_dim (list[int] | int, optional): Which dimensions of the points Defaults to [0, 1, 2]. For KITTI dataset, set use_dim=4
to use. Defaults to [0, 1, 2]. For KITTI dataset, set use_dim=4
or use_dim=[0, 1, 2, 3] to use the intensity dimension. or use_dim=[0, 1, 2, 3] to use the intensity dimension.
shift_height (bool, optional): Whether to use shifted height. shift_height (bool): Whether to use shifted height. Defaults to False.
Defaults to False. use_color (bool): Whether to use color features. Defaults to False.
use_color (bool, optional): Whether to use color features. file_client_args (dict): Arguments to instantiate a FileClient.
Defaults to False. See :class:`mmengine.fileio.FileClient` for details.
file_client_args (dict, optional): Config dict of file clients, Defaults to dict(backend='disk').
refer to
https://github.com/open-mmlab/mmengine/blob/main/mmengine/fileio/file_client.py
for more details. Defaults to dict(backend='disk').
""" """
def __init__( def __init__(
...@@ -638,7 +627,7 @@ class LoadPointsFromFile(BaseTransform): ...@@ -638,7 +627,7 @@ class LoadPointsFromFile(BaseTransform):
return results return results
def __repr__(self): def __repr__(self) -> str:
"""str: Return a string that describes the module.""" """str: Return a string that describes the module."""
repr_str = self.__class__.__name__ + '(' repr_str = self.__class__.__name__ + '('
repr_str += f'shift_height={self.shift_height}, ' repr_str += f'shift_height={self.shift_height}, '
...@@ -688,7 +677,7 @@ class LoadAnnotations3D(LoadAnnotations): ...@@ -688,7 +677,7 @@ class LoadAnnotations3D(LoadAnnotations):
- pts_instance_mask_path (str): Path of instance mask file. - pts_instance_mask_path (str): Path of instance mask file.
Only when `with_mask_3d` is True. Only when `with_mask_3d` is True.
- pts_semantic_mask_path (str): Path of semantic mask file. - pts_semantic_mask_path (str): Path of semantic mask file.
Only when Only when `with_seg_3d` is True.
Added Keys: Added Keys:
...@@ -713,33 +702,25 @@ class LoadAnnotations3D(LoadAnnotations): ...@@ -713,33 +702,25 @@ class LoadAnnotations3D(LoadAnnotations):
Only when `with_seg_3d` is True. Only when `with_seg_3d` is True.
Args: Args:
with_bbox_3d (bool, optional): Whether to load 3D boxes. with_bbox_3d (bool): Whether to load 3D boxes. Defaults to True.
Defaults to True. with_label_3d (bool): Whether to load 3D labels. Defaults to True.
with_label_3d (bool, optional): Whether to load 3D labels. with_attr_label (bool): Whether to load attribute label.
Defaults to True.
with_attr_label (bool, optional): Whether to load attribute label.
Defaults to False.
with_mask_3d (bool, optional): Whether to load 3D instance masks.
for points. Defaults to False.
with_seg_3d (bool, optional): Whether to load 3D semantic masks.
for points. Defaults to False.
with_bbox (bool, optional): Whether to load 2D boxes.
Defaults to False.
with_label (bool, optional): Whether to load 2D labels.
Defaults to False. Defaults to False.
with_mask (bool, optional): Whether to load 2D instance masks. with_mask_3d (bool): Whether to load 3D instance masks for points.
Defaults to False. Defaults to False.
with_seg (bool, optional): Whether to load 2D semantic masks. with_seg_3d (bool): Whether to load 3D semantic masks for points.
Defaults to False. Defaults to False.
with_bbox_depth (bool, optional): Whether to load 2.5D boxes. with_bbox (bool): Whether to load 2D boxes. Defaults to False.
Defaults to False. with_label (bool): Whether to load 2D labels. Defaults to False.
poly2mask (bool, optional): Whether to convert polygon annotations with_mask (bool): Whether to load 2D instance masks. Defaults to False.
to bitmasks. Defaults to True. with_seg (bool): Whether to load 2D semantic masks. Defaults to False.
seg_3d_dtype (dtype, optional): Dtype of 3D semantic masks. with_bbox_depth (bool): Whether to load 2.5D boxes. Defaults to False.
Defaults to int64. poly2mask (bool): Whether to convert polygon annotations to bitmasks.
file_client_args (dict): Config dict of file clients, refer to Defaults to True.
https://github.com/open-mmlab/mmengine/blob/main/mmengine/fileio/file_client.py seg_3d_dtype (dtype): Dtype of 3D semantic masks. Defaults to int64.
for more details. file_client_args (dict): Arguments to instantiate a FileClient.
See :class:`mmengine.fileio.FileClient` for details.
Defaults to dict(backend='disk').
""" """
def __init__( def __init__(
...@@ -889,7 +870,8 @@ class LoadAnnotations3D(LoadAnnotations): ...@@ -889,7 +870,8 @@ class LoadAnnotations3D(LoadAnnotations):
`ignore_flag` `ignore_flag`
Args: Args:
results (dict): Result dict from :obj:``mmcv.BaseDataset``. results (dict): Result dict from :obj:`mmcv.BaseDataset`.
Returns: Returns:
dict: The dict contains loaded bounding box annotations. dict: The dict contains loaded bounding box annotations.
""" """
...@@ -900,7 +882,7 @@ class LoadAnnotations3D(LoadAnnotations): ...@@ -900,7 +882,7 @@ class LoadAnnotations3D(LoadAnnotations):
"""Private function to load label annotations. """Private function to load label annotations.
Args: Args:
results (dict): Result dict from :obj :obj:``mmcv.BaseDataset``. results (dict): Result dict from :obj :obj:`mmcv.BaseDataset`.
Returns: Returns:
dict: The dict contains loaded label annotations. dict: The dict contains loaded label annotations.
...@@ -933,7 +915,7 @@ class LoadAnnotations3D(LoadAnnotations): ...@@ -933,7 +915,7 @@ class LoadAnnotations3D(LoadAnnotations):
return results return results
def __repr__(self): def __repr__(self) -> str:
"""str: Return a string that describes the module.""" """str: Return a string that describes the module."""
indent_str = ' ' indent_str = ' '
repr_str = self.__class__.__name__ + '(\n' repr_str = self.__class__.__name__ + '(\n'
......
...@@ -19,18 +19,17 @@ class MultiScaleFlipAug3D(BaseTransform): ...@@ -19,18 +19,17 @@ class MultiScaleFlipAug3D(BaseTransform):
img_scale (tuple | list[tuple]): Images scales for resizing. img_scale (tuple | list[tuple]): Images scales for resizing.
pts_scale_ratio (float | list[float]): Points scale ratios for pts_scale_ratio (float | list[float]): Points scale ratios for
resizing. resizing.
flip (bool, optional): Whether apply flip augmentation. flip (bool): Whether apply flip augmentation. Defaults to False.
Defaults to False. flip_direction (str | list[str]): Flip augmentation directions
flip_direction (str | list[str], optional): Flip augmentation for images, options are "horizontal" and "vertical".
directions for images, options are "horizontal" and "vertical".
If flip_direction is list, multiple flip augmentations will If flip_direction is list, multiple flip augmentations will
be applied. It has no effect when ``flip == False``. be applied. It has no effect when ``flip == False``.
Defaults to 'horizontal'. Defaults to 'horizontal'.
pcd_horizontal_flip (bool, optional): Whether to apply horizontal pcd_horizontal_flip (bool): Whether to apply horizontal flip
flip augmentation to point cloud. Defaults to True. augmentation to point cloud. Defaults to False.
Note that it works only when 'flip' is turned on. Note that it works only when 'flip' is turned on.
pcd_vertical_flip (bool, optional): Whether to apply vertical flip pcd_vertical_flip (bool): Whether to apply vertical flip
augmentation to point cloud. Defaults to True. augmentation to point cloud. Defaults to False.
Note that it works only when 'flip' is turned on. Note that it works only when 'flip' is turned on.
""" """
...@@ -112,7 +111,7 @@ class MultiScaleFlipAug3D(BaseTransform): ...@@ -112,7 +111,7 @@ class MultiScaleFlipAug3D(BaseTransform):
return aug_data_list return aug_data_list
def __repr__(self): def __repr__(self) -> str:
"""str: Return a string that describes the module.""" """str: Return a string that describes the module."""
repr_str = self.__class__.__name__ repr_str = self.__class__.__name__
repr_str += f'(transforms={self.transforms}, ' repr_str += f'(transforms={self.transforms}, '
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp import os.path as osp
from typing import Callable, List, Optional, Union from typing import Callable, List, Union
import numpy as np import numpy as np
...@@ -24,20 +24,20 @@ class WaymoDataset(KittiDataset): ...@@ -24,20 +24,20 @@ class WaymoDataset(KittiDataset):
data_root (str): Path of dataset root. data_root (str): Path of dataset root.
ann_file (str): Path of annotation file. ann_file (str): Path of annotation file.
data_prefix (dict): data prefix for point cloud and data_prefix (dict): data prefix for point cloud and
camera data dict. Default to dict( camera data dict. Defaults to dict(
pts='velodyne', pts='velodyne',
CAM_FRONT='image_0', CAM_FRONT='image_0',
CAM_FRONT_RIGHT='image_1', CAM_FRONT_RIGHT='image_1',
CAM_FRONT_LEFT='image_2', CAM_FRONT_LEFT='image_2',
CAM_SIDE_RIGHT='image_3', CAM_SIDE_RIGHT='image_3',
CAM_SIDE_LEFT='image_4') CAM_SIDE_LEFT='image_4')
pipeline (list[dict], optional): Pipeline used for data processing. pipeline (list[dict]): Pipeline used for data processing.
Defaults to None. Defaults to [].
modality (dict, optional): Modality to specify the sensor data used modality (dict): Modality to specify the sensor data used
as input. Defaults to dict(use_lidar=True). as input. Defaults to dict(use_lidar=True).
default_cam_key (str, optional): Default camera key for lidar2img default_cam_key (str): Default camera key for lidar2img
association. Defaults to 'CAM_FRONT'. association. Defaults to 'CAM_FRONT'.
box_type_3d (str, optional): Type of 3D box of this dataset. box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`. to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes: Defaults to 'LiDAR' in this dataset. Available options includes:
...@@ -45,22 +45,23 @@ class WaymoDataset(KittiDataset): ...@@ -45,22 +45,23 @@ class WaymoDataset(KittiDataset):
- 'LiDAR': Box in LiDAR coordinates. - 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset. - 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates. - 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT. filter_empty_gt (bool): Whether to filter the data with empty GT.
Defaults to True. If it's set to be True, the example with empty annotations after
test_mode (bool, optional): Whether the dataset is in test mode. data pipeline will be dropped and a random example will be chosen
in `__getitem__`. Defaults to True.
test_mode (bool): Whether the dataset is in test mode.
Defaults to False. Defaults to False.
pcd_limit_range (list[float], optional): The range of point cloud pcd_limit_range (list[float]): The range of point cloud
used to filter invalid predicted boxes. used to filter invalid predicted boxes.
Defaults to [-85, -85, -5, 85, 85, 5]. Defaults to [-85, -85, -5, 85, 85, 5].
cam_sync_instances (bool, optional): If use the camera sync label cam_sync_instances (bool): If use the camera sync label
supported from waymo version 1.3.1. Defaults to False. supported from waymo version 1.3.1. Defaults to False.
load_interval (int, optional): load frame interval. load_interval (int): load frame interval. Defaults to 1.
Defaults to 1. task (str): task for 3D detection (lidar, mono3d).
task (str, optional): task for 3D detection (lidar, mono3d).
lidar: take all the ground trurh in the frame. lidar: take all the ground trurh in the frame.
mono3d: take the groundtruth that can be seen in the cam. mono3d: take the groundtruth that can be seen in the cam.
Defaults to 'lidar'. Defaults to 'lidar_det'.
max_sweeps (int, optional): max sweep for each frame. Defaults to 0. max_sweeps (int): max sweep for each frame. Defaults to 0.
""" """
METAINFO = {'CLASSES': ('Car', 'Pedestrian', 'Cyclist')} METAINFO = {'CLASSES': ('Car', 'Pedestrian', 'Cyclist')}
...@@ -75,17 +76,17 @@ class WaymoDataset(KittiDataset): ...@@ -75,17 +76,17 @@ class WaymoDataset(KittiDataset):
CAM_SIDE_RIGHT='image_3', CAM_SIDE_RIGHT='image_3',
CAM_SIDE_LEFT='image_4'), CAM_SIDE_LEFT='image_4'),
pipeline: List[Union[dict, Callable]] = [], pipeline: List[Union[dict, Callable]] = [],
modality: Optional[dict] = dict(use_lidar=True), modality: dict = dict(use_lidar=True),
default_cam_key: str = 'CAM_FRONT', default_cam_key: str = 'CAM_FRONT',
box_type_3d: str = 'LiDAR', box_type_3d: str = 'LiDAR',
filter_empty_gt: bool = True, filter_empty_gt: bool = True,
test_mode: bool = False, test_mode: bool = False,
pcd_limit_range: List[float] = [0, -40, -3, 70.4, 40, 0.0], pcd_limit_range: List[float] = [0, -40, -3, 70.4, 40, 0.0],
cam_sync_instances=False, cam_sync_instances: bool = False,
load_interval=1, load_interval: int = 1,
task='lidar_det', task: str = 'lidar_det',
max_sweeps=0, max_sweeps: int = 0,
**kwargs): **kwargs) -> None:
self.load_interval = load_interval self.load_interval = load_interval
# set loading mode for different task settings # set loading mode for different task settings
self.cam_sync_instances = cam_sync_instances self.cam_sync_instances = cam_sync_instances
...@@ -111,7 +112,7 @@ class WaymoDataset(KittiDataset): ...@@ -111,7 +112,7 @@ class WaymoDataset(KittiDataset):
**kwargs) **kwargs)
def parse_ann_info(self, info: dict) -> dict: def parse_ann_info(self, info: dict) -> dict:
"""Get annotation info according to the given index. """Process the `instances` in data info to `ann_info`.
Args: Args:
info (dict): Data information of single data sample. info (dict): Data information of single data sample.
......
...@@ -41,19 +41,20 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta): ...@@ -41,19 +41,20 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta):
Args: Args:
channels (int): Channels after modules, before conv_seg. channels (int): Channels after modules, before conv_seg.
num_classes (int): Number of classes. num_classes (int): Number of classes.
dropout_ratio (float, optional): Ratio of dropout layer. Default: 0.5. dropout_ratio (float): Ratio of dropout layer. Defaults to 0.5.
conv_cfg (dict, optional): Config of conv layers. conv_cfg (dict): Config of conv layers.
Default: dict(type='Conv1d'). Defaults to dict(type='Conv1d').
norm_cfg (dict, optional): Config of norm layers. norm_cfg (dict): Config of norm layers.
Default: dict(type='BN1d'). Defaults to dict(type='BN1d').
act_cfg (dict, optional): Config of activation layers. act_cfg (dict): Config of activation layers.
Default: dict(type='ReLU'). Defaults to dict(type='ReLU').
loss_decode (dict, optional): Config of decode loss. loss_decode (dict): Config of decode loss.
Default: dict(type='CrossEntropyLoss'). Defaults to dict(type='CrossEntropyLoss').
ignore_index (int, optional): The label index to be ignored. ignore_index (int): The label index to be ignored.
When using masked BCE loss, ignore_index should be set to None. When using masked BCE loss, ignore_index should be set to None.
Default: 255. Defaults to 255.
init_cfg (dict or list[dict], optional): Initialization config dict. init_cfg (dict or list[dict], optional): Initialization config dict.
Defaults to None.
""" """
def __init__(self, def __init__(self,
...@@ -105,8 +106,8 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta): ...@@ -105,8 +106,8 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta):
output = self.conv_seg(feat) output = self.conv_seg(feat)
return output return output
def loss(self, inputs: List[Tensor], def loss(self, inputs: List[Tensor], batch_data_samples: SampleList,
batch_data_samples: SampleList) -> dict: train_cfg: ConfigType) -> dict:
"""Forward function for training. """Forward function for training.
Args: Args:
......
...@@ -140,7 +140,8 @@ class Base3DSegmentor(BaseModel, metaclass=ABCMeta): ...@@ -140,7 +140,8 @@ class Base3DSegmentor(BaseModel, metaclass=ABCMeta):
def postprocess_result(self, seg_pred_list: List[dict], def postprocess_result(self, seg_pred_list: List[dict],
batch_img_metas: List[dict]) -> list: batch_img_metas: List[dict]) -> list:
""" Convert results list to `Det3DDataSample`. """Convert results list to `Det3DDataSample`.
Args: Args:
seg_logits_list (List[dict]): List of segmentation results, seg_logits_list (List[dict]): List of segmentation results,
seg_logits from model of each input point clouds sample. seg_logits from model of each input point clouds sample.
...@@ -157,7 +158,8 @@ class Base3DSegmentor(BaseModel, metaclass=ABCMeta): ...@@ -157,7 +158,8 @@ class Base3DSegmentor(BaseModel, metaclass=ABCMeta):
for i in range(len(seg_pred_list)): for i in range(len(seg_pred_list)):
img_meta = batch_img_metas[i] img_meta = batch_img_metas[i]
seg_pred = seg_pred_list[i] seg_pred = seg_pred_list[i]
prediction = Det3DDataSample(**{'metainfo': img_meta}) prediction = Det3DDataSample(**{'metainfo': img_meta.metainfo})
prediction.set_data({'eval_ann_info': img_meta.eval_ann_info})
prediction.set_data( prediction.set_data(
{'pred_pts_seg': PointData(**{'pts_semantic_mask': seg_pred})}) {'pred_pts_seg': PointData(**{'pts_semantic_mask': seg_pred})})
predictions.append(prediction) predictions.append(prediction)
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import List from typing import List, Tuple
import numpy as np import numpy as np
import torch import torch
...@@ -65,10 +65,10 @@ class EncoderDecoder3D(Base3DSegmentor): ...@@ -65,10 +65,10 @@ class EncoderDecoder3D(Base3DSegmentor):
loass. Defaults to None. loass. Defaults to None.
train_cfg (OptConfigType): The config for training. Defaults to None. train_cfg (OptConfigType): The config for training. Defaults to None.
test_cfg (OptConfigType): The config for testing. Defaults to None. test_cfg (OptConfigType): The config for testing. Defaults to None.
data_preprocessor (dict, optional): The pre-process config of data_preprocessor (OptConfigType): The pre-process config of
:class:`BaseDataPreprocessor`. :class:`BaseDataPreprocessor`. Defaults to None.
init_cfg (dict, optional): The weight initialized config for init_cfg (OptMultiConfig): The weight initialized config for
:class:`BaseModule`. :class:`BaseModule`. Defaults to None.
""" # noqa: E501 """ # noqa: E501
def __init__(self, def __init__(self,
...@@ -80,7 +80,7 @@ class EncoderDecoder3D(Base3DSegmentor): ...@@ -80,7 +80,7 @@ class EncoderDecoder3D(Base3DSegmentor):
train_cfg: OptConfigType = None, train_cfg: OptConfigType = None,
test_cfg: OptConfigType = None, test_cfg: OptConfigType = None,
data_preprocessor: OptConfigType = None, data_preprocessor: OptConfigType = None,
init_cfg: OptMultiConfig = None): init_cfg: OptMultiConfig = None) -> None:
super(EncoderDecoder3D, self).__init__( super(EncoderDecoder3D, self).__init__(
data_preprocessor=data_preprocessor, init_cfg=init_cfg) data_preprocessor=data_preprocessor, init_cfg=init_cfg)
self.backbone = MODELS.build(backbone) self.backbone = MODELS.build(backbone)
...@@ -122,15 +122,15 @@ class EncoderDecoder3D(Base3DSegmentor): ...@@ -122,15 +122,15 @@ class EncoderDecoder3D(Base3DSegmentor):
else: else:
self.loss_regularization = MODELS.build(loss_regularization) self.loss_regularization = MODELS.build(loss_regularization)
def extract_feat(self, batch_inputs) -> List[Tensor]: def extract_feat(self, batch_inputs: Tensor) -> Tensor:
"""Extract features from points.""" """Extract features from points."""
x = self.backbone(batch_inputs) x = self.backbone(batch_inputs)
if self.with_neck: if self.with_neck:
x = self.neck(x) x = self.neck(x)
return x return x
def encode_decode(self, batch_inputs: torch.Tensor, def encode_decode(self, batch_inputs: Tensor,
batch_input_metas: List[dict]) -> List[Tensor]: batch_input_metas: List[dict]) -> Tensor:
"""Encode points with backbone and decode into a semantic segmentation """Encode points with backbone and decode into a semantic segmentation
map of the same size as input. map of the same size as input.
...@@ -178,7 +178,7 @@ class EncoderDecoder3D(Base3DSegmentor): ...@@ -178,7 +178,7 @@ class EncoderDecoder3D(Base3DSegmentor):
return losses return losses
def _loss_regularization_forward_train(self): def _loss_regularization_forward_train(self) -> dict:
"""Calculate regularization loss for model weight in training.""" """Calculate regularization loss for model weight in training."""
losses = dict() losses = dict()
if isinstance(self.loss_regularization, nn.ModuleList): if isinstance(self.loss_regularization, nn.ModuleList):
...@@ -213,7 +213,8 @@ class EncoderDecoder3D(Base3DSegmentor): ...@@ -213,7 +213,8 @@ class EncoderDecoder3D(Base3DSegmentor):
""" """
# extract features using backbone # extract features using backbone
x = self.extract_feat(batch_inputs_dict) points = torch.stack(batch_inputs_dict['points'])
x = self.extract_feat(points)
losses = dict() losses = dict()
...@@ -236,7 +237,7 @@ class EncoderDecoder3D(Base3DSegmentor): ...@@ -236,7 +237,7 @@ class EncoderDecoder3D(Base3DSegmentor):
patch_center: Tensor, patch_center: Tensor,
coord_max: Tensor, coord_max: Tensor,
feats: Tensor, feats: Tensor,
use_normalized_coord: bool = False): use_normalized_coord: bool = False) -> Tensor:
"""Generating model input. """Generating model input.
Generate input by subtracting patch center and adding additional Generate input by subtracting patch center and adding additional
...@@ -273,7 +274,7 @@ class EncoderDecoder3D(Base3DSegmentor): ...@@ -273,7 +274,7 @@ class EncoderDecoder3D(Base3DSegmentor):
block_size: float, block_size: float,
sample_rate: float = 0.5, sample_rate: float = 0.5,
use_normalized_coord: bool = False, use_normalized_coord: bool = False,
eps: float = 1e-3): eps: float = 1e-3) -> Tuple[Tensor, Tensor]:
"""Sampling points in a sliding window fashion. """Sampling points in a sliding window fashion.
First sample patches to cover all the input points. First sample patches to cover all the input points.
...@@ -291,7 +292,7 @@ class EncoderDecoder3D(Base3DSegmentor): ...@@ -291,7 +292,7 @@ class EncoderDecoder3D(Base3DSegmentor):
points coverage. Defaults to 1e-3. points coverage. Defaults to 1e-3.
Returns: Returns:
np.ndarray | np.ndarray: tuple:
- patch_points (torch.Tensor): Points of different patches of - patch_points (torch.Tensor): Points of different patches of
shape [K, N, 3+C]. shape [K, N, 3+C].
...@@ -372,7 +373,7 @@ class EncoderDecoder3D(Base3DSegmentor): ...@@ -372,7 +373,7 @@ class EncoderDecoder3D(Base3DSegmentor):
return patch_points, patch_idxs return patch_points, patch_idxs
def slide_inference(self, point: Tensor, img_meta: List[dict], def slide_inference(self, point: Tensor, img_meta: List[dict],
rescale: bool): rescale: bool) -> Tensor:
"""Inference by sliding-window with overlap. """Inference by sliding-window with overlap.
Args: Args:
...@@ -417,14 +418,14 @@ class EncoderDecoder3D(Base3DSegmentor): ...@@ -417,14 +418,14 @@ class EncoderDecoder3D(Base3DSegmentor):
return preds.transpose(0, 1) # to [num_classes, K*N] return preds.transpose(0, 1) # to [num_classes, K*N]
def whole_inference(self, points: Tensor, input_metas: List[dict], def whole_inference(self, points: Tensor, input_metas: List[dict],
rescale: bool): rescale: bool) -> Tensor:
"""Inference with full scene (one forward pass without sliding).""" """Inference with full scene (one forward pass without sliding)."""
seg_logit = self.encode_decode(points, input_metas) seg_logit = self.encode_decode(points, input_metas)
# TODO: if rescale and voxelization segmentor # TODO: if rescale and voxelization segmentor
return seg_logit return seg_logit
def inference(self, points: Tensor, input_metas: List[dict], def inference(self, points: Tensor, input_metas: List[dict],
rescale: bool): rescale: bool) -> Tensor:
"""Inference with slide/whole style. """Inference with slide/whole style.
Args: Args:
...@@ -489,7 +490,7 @@ class EncoderDecoder3D(Base3DSegmentor): ...@@ -489,7 +490,7 @@ class EncoderDecoder3D(Base3DSegmentor):
seg_map = seg_map.cpu() seg_map = seg_map.cpu()
seg_pred_list.append(seg_map) seg_pred_list.append(seg_map)
return self.postprocess_result(seg_pred_list, batch_input_metas) return self.postprocess_result(seg_pred_list, batch_data_samples)
def _forward(self, def _forward(self,
batch_inputs_dict: dict, batch_inputs_dict: dict,
...@@ -510,7 +511,8 @@ class EncoderDecoder3D(Base3DSegmentor): ...@@ -510,7 +511,8 @@ class EncoderDecoder3D(Base3DSegmentor):
Returns: Returns:
Tensor: Forward output of model without any post-processes. Tensor: Forward output of model without any post-processes.
""" """
x = self.extract_feat(batch_inputs_dict) points = torch.stack(batch_inputs_dict['points'])
x = self.extract_feat(points)
return self.decode_head.forward(x) return self.decode_head.forward(x)
def aug_test(self, batch_inputs, batch_img_metas): def aug_test(self, batch_inputs, batch_img_metas):
......
...@@ -5,6 +5,15 @@ short_version = __version__ ...@@ -5,6 +5,15 @@ short_version = __version__
def parse_version_info(version_str): def parse_version_info(version_str):
"""Parse a version string into a tuple.
Args:
version_str (str): The version string.
Returns:
tuple[int | str]: The version info, e.g., "1.3.0" is parsed into
(1, 3, 0), and "2.0.0rc1" is parsed into (2, 0, 0, 'rc1').
"""
version_info = [] version_info = []
for x in version_str.split('.'): for x in version_str.split('.'):
if x.isdigit(): if x.isdigit():
......
mmcv-full>=2.0.0rc0,<2.1.0 mmcv>=2.0.0rc0,<2.1.0
mmdet>=3.0.0rc0,<3.1.0 mmdet>=3.0.0rc0,<3.1.0
mmengine>=0.1.0,<1.0.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment