"torchvision/vscode:/vscode.git/clone" did not exist on "f5afae50bc8e99b873e2345bcda2dedfc863a737"
Unverified Commit 74117ce4 authored by Xiangxu-0103's avatar Xiangxu-0103 Committed by GitHub
Browse files

[Fix]: Fix nuscenes dataset training (#1882)

* fix nuscenes training bug

* update kitti_dataset docstring

* update nuscenes_dataset docstring

* fix lint and modifty docstring
parent f3898480
......@@ -26,11 +26,11 @@ class Det3DDataset(BaseDataset):
metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None.
data_prefix (dict, optional): Prefix for training data. Defaults to
dict(pts='velodyne', img="").
dict(pts='velodyne', img='').
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input, it usually has following keys.
as input, it usually has following keys:
- use_camera: bool
- use_lidar: bool
......@@ -40,7 +40,7 @@ class Det3DDataset(BaseDataset):
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR'. Available options includes
Defaults to 'LiDAR'. Available options includes:
- 'LiDAR': Box in LiDAR coordinates, usually for
outdoor point cloud 3d detection.
......@@ -49,15 +49,15 @@ class Det3DDataset(BaseDataset):
- 'Camera': Box in camera coordinates, usually
for vision-based 3d detection.
filter_empty_gt (bool): Whether to filter the data with
filter_empty_gt (bool, optional): Whether to filter the data with
empty GT. Defaults to True.
test_mode (bool): Whether the dataset is in test mode.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
load_eval_anns (bool): Whether to load annotations
in test_mode, the annotation will be save in
`eval_ann_infos`, which can be use in Evaluator.
file_client_args (dict): Configuration of file client.
Defaults to `dict(backend='disk')`.
load_eval_anns (bool, optional): Whether to load annotations
in test_mode, the annotation will be save in `eval_ann_infos`,
which can be used in Evaluator. Defaults to True.
file_client_args (dict, optional): Configuration of file client.
Defaults to dict(backend='disk').
"""
def __init__(self,
......@@ -73,7 +73,7 @@ class Det3DDataset(BaseDataset):
test_mode: bool = False,
load_eval_anns=True,
file_client_args: dict = dict(backend='disk'),
**kwargs):
**kwargs) -> None:
# init file client
self.file_client = mmengine.FileClient(**file_client_args)
self.filter_empty_gt = filter_empty_gt
......@@ -125,7 +125,7 @@ class Det3DDataset(BaseDataset):
self.metainfo['box_type_3d'] = box_type_3d
self.metainfo['label_mapping'] = self.label_mapping
def _remove_dontcare(self, ann_info):
def _remove_dontcare(self, ann_info: dict) -> dict:
"""Remove annotations that do not need to be cared.
-1 indicate dontcare in MMDet3d.
......@@ -291,7 +291,7 @@ class Det3DDataset(BaseDataset):
return info
def prepare_data(self, index):
def prepare_data(self, index: int) -> Optional[dict]:
"""Data preparation for both training and testing stage.
Called by `__getitem__` of dataset.
......@@ -300,7 +300,7 @@ class Det3DDataset(BaseDataset):
index (int): Index for accessing the target data.
Returns:
dict: Data dict of the corresponding index.
dict | None: Data dict of the corresponding index.
"""
input_dict = self.get_data_info(index)
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Callable, List, Optional, Union
from typing import Callable, List, Union
import numpy as np
......@@ -22,11 +22,12 @@ class KittiDataset(Det3DDataset):
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to `dict(use_lidar=True)`.
default_cam_key (str, optional): The default camera name adopted.
Defaults to 'CAM2'.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes
Defaults to 'LiDAR' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
......@@ -35,9 +36,9 @@ class KittiDataset(Det3DDataset):
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
pcd_limit_range (list, optional): The range of point cloud used to
filter invalid predicted boxes.
Default: [0, -40, -3, 70.4, 40, 0.0].
pcd_limit_range (list[float], optional): The range of point cloud
used to filter invalid predicted boxes.
Defaults to [0, -40, -3, 70.4, 40, 0.0].
"""
# TODO: use full classes of kitti
METAINFO = {
......@@ -49,13 +50,13 @@ class KittiDataset(Det3DDataset):
data_root: str,
ann_file: str,
pipeline: List[Union[dict, Callable]] = [],
modality: Optional[dict] = dict(use_lidar=True),
modality: dict = dict(use_lidar=True),
default_cam_key: str = 'CAM2',
box_type_3d: str = 'LiDAR',
filter_empty_gt: bool = True,
test_mode: bool = False,
pcd_limit_range: List[float] = [0, -40, -3, 70.4, 40, 0.0],
**kwargs):
**kwargs) -> None:
self.pcd_limit_range = pcd_limit_range
super().__init__(
......@@ -111,7 +112,7 @@ class KittiDataset(Det3DDataset):
return info
def parse_ann_info(self, info):
def parse_ann_info(self, info: dict) -> dict:
"""Get annotation info according to the given index.
Args:
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List
from typing import Callable, List, Union
import numpy as np
......@@ -24,18 +24,18 @@ class LyftDataset(Det3DDataset):
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
as input. Defaults to dict(use_camera=False, use_lidar=True).
box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes
Defaults to 'LiDAR' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool): Whether to filter empty GT.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool): Whether the dataset is in test mode.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
"""
......@@ -48,8 +48,8 @@ class LyftDataset(Det3DDataset):
def __init__(self,
data_root: str,
ann_file: str,
pipeline: List[dict] = None,
modality: Dict = dict(use_camera=False, use_lidar=True),
pipeline: List[Union[dict, Callable]] = [],
modality: dict = dict(use_camera=False, use_lidar=True),
box_type_3d: str = 'LiDAR',
filter_empty_gt: bool = True,
test_mode: bool = False,
......
# Copyright (c) OpenMMLab. All rights reserved.
from os import path as osp
from typing import Dict, List
from typing import Callable, List, Union
import numpy as np
......@@ -22,25 +22,26 @@ class NuScenesDataset(Det3DDataset):
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
task (str, optional): Detection task. Defaults to '3d'.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes.
Defaults to 'LiDAR' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to dict(use_camera=False,use_lidar=True).
filter_empty_gt (bool): Whether to filter empty GT.
as input. Defaults to dict(use_camera=False, use_lidar=True).
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool): Whether the dataset is in test mode.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
with_velocity (bool): Whether include velocity prediction
with_velocity (bool, optional): Whether to include velocity prediction
into the experiments. Defaults to True.
use_valid_flag (bool): Whether to use `use_valid_flag` key
use_valid_flag (bool, optional): Whether to use `use_valid_flag` key
in the info file as mask to filter gt_boxes and gt_names.
Defaults to False.
"""
......@@ -56,9 +57,9 @@ class NuScenesDataset(Det3DDataset):
data_root: str,
ann_file: str,
task: str = '3d',
pipeline: List[dict] = None,
pipeline: List[Union[dict, Callable]] = [],
box_type_3d: str = 'LiDAR',
modality: Dict = dict(
modality: dict = dict(
use_camera=False,
use_lidar=True,
),
......@@ -66,7 +67,7 @@ class NuScenesDataset(Det3DDataset):
test_mode: bool = False,
with_velocity: bool = True,
use_valid_flag: bool = False,
**kwargs):
**kwargs) -> None:
self.use_valid_flag = use_valid_flag
self.with_velocity = with_velocity
......@@ -85,7 +86,7 @@ class NuScenesDataset(Det3DDataset):
test_mode=test_mode,
**kwargs)
def _filter_with_mask(self, ann_info):
def _filter_with_mask(self, ann_info: dict) -> dict:
"""Remove annotations that do not need to be cared.
Args:
......
......@@ -36,7 +36,7 @@ class ScanNetDataset(Det3DDataset):
box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'Depth' in this dataset. Available options includes
Defaults to 'Depth' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
......@@ -61,13 +61,13 @@ class ScanNetDataset(Det3DDataset):
def __init__(self,
data_root: str,
ann_file: str,
metainfo: dict = None,
metainfo: Optional[dict] = None,
data_prefix: dict = dict(
pts='points',
pts_instance_mask='instance_mask',
pts_semantic_mask='semantic_mask'),
pipeline: List[Union[dict, Callable]] = [],
modality=dict(use_camera=False, use_lidar=True),
modality: dict = dict(use_camera=False, use_lidar=True),
box_type_3d: str = 'Depth',
filter_empty_gt: bool = True,
test_mode: bool = False,
......@@ -101,7 +101,7 @@ class ScanNetDataset(Det3DDataset):
assert self.modality['use_camera'] or self.modality['use_lidar']
@staticmethod
def _get_axis_align_matrix(info: dict) -> dict:
def _get_axis_align_matrix(info: dict) -> np.ndarray:
"""Get axis_align_matrix from info. If not exist, return identity mat.
Args:
......
......@@ -24,25 +24,25 @@ class SUNRGBDDataset(Det3DDataset):
ann_file (str): Path of annotation file.
metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None.
data_prefix (dict): Prefix for data. Defaults to
`dict(pts='points',img='sunrgbd_trainval')`.
data_prefix (dict, optiona;): Prefix for data. Defaults to
dict(pts='points',img='sunrgbd_trainval').
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to `dict(use_camera=True, use_lidar=True)`.
default_cam_key (str): The default camera name adopted.
Defaults to "CAM0".
as input. Defaults to dict(use_camera=True, use_lidar=True).
default_cam_key (str, optional): The default camera name adopted.
Defaults to 'CAM0'.
box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'Depth' in this dataset. Available options includes
Defaults to 'Depth' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool): Whether to filter empty GT.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool): Whether the dataset is in test mode.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
"""
METAINFO = {
......
......@@ -23,8 +23,8 @@ class WaymoDataset(KittiDataset):
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
data_prefix (list[dict]): data prefix for point cloud and
camera data dict, default to dict(
data_prefix (dict): data prefix for point cloud and
camera data dict. Default to dict(
pts='velodyne',
CAM_FRONT='image_0',
CAM_FRONT_RIGHT='image_1',
......@@ -34,13 +34,14 @@ class WaymoDataset(KittiDataset):
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to `dict(use_lidar=True)`.
as input. Defaults to dict(use_lidar=True).
default_cam_key (str, optional): Default camera key for lidar2img
association.
association. Defaults to 'CAM_FRONT'.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes
Defaults to 'LiDAR' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
......@@ -48,16 +49,18 @@ class WaymoDataset(KittiDataset):
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
pcd_limit_range (list, optional): The range of point cloud used to
filter invalid predicted boxes.
Default: [-85, -85, -5, 85, 85, 5].
pcd_limit_range (list[float], optional): The range of point cloud
used to filter invalid predicted boxes.
Defaults to [-85, -85, -5, 85, 85, 5].
cam_sync_instances (bool, optional): If use the camera sync label
supported from waymo version 1.3.1.
supported from waymo version 1.3.1. Defaults to False.
load_interval (int, optional): load frame interval.
Defaults to 1.
task (str, optional): task for 3D detection (lidar, mono3d).
lidar: take all the ground trurh in the frame.
mono3d: take the groundtruth that can be seen in the cam.
max_sweeps (int, optional): max sweep for each frame.
Defaults to 'lidar'.
max_sweeps (int, optional): max sweep for each frame. Defaults to 0.
"""
METAINFO = {'CLASSES': ('Car', 'Pedestrian', 'Cyclist')}
......
......@@ -284,6 +284,7 @@ def update_nuscenes_infos(pkl_path, out_dir):
temp_data_info['ego2global'] = convert_quaternion_to_matrix(
ori_info_dict['ego2global_rotation'],
ori_info_dict['ego2global_translation'])
temp_data_info['lidar_points']['num_pts_feats'] = 5
temp_data_info['lidar_points']['lidar_path'] = ori_info_dict[
'lidar_path'].split('/')[-1]
temp_data_info['lidar_points'][
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment