Unverified Commit d7067e44 authored by Wenwei Zhang's avatar Wenwei Zhang Committed by GitHub
Browse files

Bump version to v1.1.0rc2

Bump to v1.1.0rc2
parents 28fe73d2 fb0e57e5
...@@ -7,7 +7,7 @@ from .nuscenes_metric import NuScenesMetric # noqa: F401,F403 ...@@ -7,7 +7,7 @@ from .nuscenes_metric import NuScenesMetric # noqa: F401,F403
from .seg_metric import SegMetric # noqa: F401,F403 from .seg_metric import SegMetric # noqa: F401,F403
from .waymo_metric import WaymoMetric # noqa: F401,F403 from .waymo_metric import WaymoMetric # noqa: F401,F403
__all_ = [ __all__ = [
'KittiMetric', 'NuScenesMetric', 'IndoorMetric', 'LyftMetric', 'SegMetric', 'KittiMetric', 'NuScenesMetric', 'IndoorMetric', 'LyftMetric', 'SegMetric',
'InstanceSegMetric', 'WaymoMetric' 'InstanceSegMetric', 'WaymoMetric'
] ]
...@@ -3,13 +3,13 @@ from collections import OrderedDict ...@@ -3,13 +3,13 @@ from collections import OrderedDict
from typing import Dict, List, Optional, Sequence from typing import Dict, List, Optional, Sequence
import numpy as np import numpy as np
from mmdet.evaluation import eval_map
from mmengine.evaluator import BaseMetric from mmengine.evaluator import BaseMetric
from mmengine.logging import MMLogger from mmengine.logging import MMLogger
from mmdet3d.evaluation import indoor_eval from mmdet3d.evaluation import indoor_eval
from mmdet3d.registry import METRICS from mmdet3d.registry import METRICS
from mmdet3d.structures import get_box_type from mmdet3d.structures import get_box_type
from mmdet.evaluation import eval_map
@METRICS.register_module() @METRICS.register_module()
...@@ -78,14 +78,15 @@ class IndoorMetric(BaseMetric): ...@@ -78,14 +78,15 @@ class IndoorMetric(BaseMetric):
ann_infos.append(eval_ann) ann_infos.append(eval_ann)
pred_results.append(sinlge_pred_results) pred_results.append(sinlge_pred_results)
# some checkpoints may not record the key "box_type_3d"
box_type_3d, box_mode_3d = get_box_type( box_type_3d, box_mode_3d = get_box_type(
self.dataset_meta['box_type_3d']) self.dataset_meta.get('box_type_3d', 'depth'))
ret_dict = indoor_eval( ret_dict = indoor_eval(
ann_infos, ann_infos,
pred_results, pred_results,
self.iou_thr, self.iou_thr,
self.dataset_meta['CLASSES'], self.dataset_meta['classes'],
logger=logger, logger=logger,
box_mode_3d=box_mode_3d) box_mode_3d=box_mode_3d)
...@@ -141,7 +142,7 @@ class Indoor2DMetric(BaseMetric): ...@@ -141,7 +142,7 @@ class Indoor2DMetric(BaseMetric):
pred_labels = pred['labels'].cpu().numpy() pred_labels = pred['labels'].cpu().numpy()
dets = [] dets = []
for label in range(len(self.dataset_meta['CLASSES'])): for label in range(len(self.dataset_meta['classes'])):
index = np.where(pred_labels == label)[0] index = np.where(pred_labels == label)[0]
pred_bbox_scores = np.hstack( pred_bbox_scores = np.hstack(
[pred_bboxes[index], pred_scores[index].reshape((-1, 1))]) [pred_bboxes[index], pred_scores[index].reshape((-1, 1))])
...@@ -170,7 +171,7 @@ class Indoor2DMetric(BaseMetric): ...@@ -170,7 +171,7 @@ class Indoor2DMetric(BaseMetric):
annotations, annotations,
scale_ranges=None, scale_ranges=None,
iou_thr=iou_thr_2d_single, iou_thr=iou_thr_2d_single,
dataset=self.dataset_meta['CLASSES'], dataset=self.dataset_meta['classes'],
logger=logger) logger=logger)
eval_results['mAP_' + str(iou_thr_2d_single)] = mean_ap eval_results['mAP_' + str(iou_thr_2d_single)] = mean_ap
return eval_results return eval_results
...@@ -64,7 +64,7 @@ class InstanceSegMetric(BaseMetric): ...@@ -64,7 +64,7 @@ class InstanceSegMetric(BaseMetric):
""" """
logger: MMLogger = MMLogger.get_current_instance() logger: MMLogger = MMLogger.get_current_instance()
self.classes = self.dataset_meta['CLASSES'] self.classes = self.dataset_meta['classes']
self.valid_class_ids = self.dataset_meta['seg_valid_class_ids'] self.valid_class_ids = self.dataset_meta['seg_valid_class_ids']
gt_semantic_masks = [] gt_semantic_masks = []
......
...@@ -36,6 +36,10 @@ class KittiMetric(BaseMetric): ...@@ -36,6 +36,10 @@ class KittiMetric(BaseMetric):
If not specified, a temp file will be created. Default: None. If not specified, a temp file will be created. Default: None.
default_cam_key (str, optional): The default camera for lidar to default_cam_key (str, optional): The default camera for lidar to
camear conversion. By default, KITTI: CAM2, Waymo: CAM_FRONT camear conversion. By default, KITTI: CAM2, Waymo: CAM_FRONT
format_only (bool): Format the output results without perform
evaluation. It is useful when you want to format the result
to a specific format and submit it to the test server.
Defaults to False.
submission_prefix (str, optional): The prefix of submission data. submission_prefix (str, optional): The prefix of submission data.
If not specified, the submission data will not be generated. If not specified, the submission data will not be generated.
Default: None. Default: None.
...@@ -52,6 +56,7 @@ class KittiMetric(BaseMetric): ...@@ -52,6 +56,7 @@ class KittiMetric(BaseMetric):
prefix: Optional[str] = None, prefix: Optional[str] = None,
pklfile_prefix: str = None, pklfile_prefix: str = None,
default_cam_key: str = 'CAM2', default_cam_key: str = 'CAM2',
format_only: bool = False,
submission_prefix: str = None, submission_prefix: str = None,
collect_device: str = 'cpu', collect_device: str = 'cpu',
file_client_args: dict = dict(backend='disk')): file_client_args: dict = dict(backend='disk')):
...@@ -61,6 +66,13 @@ class KittiMetric(BaseMetric): ...@@ -61,6 +66,13 @@ class KittiMetric(BaseMetric):
self.pcd_limit_range = pcd_limit_range self.pcd_limit_range = pcd_limit_range
self.ann_file = ann_file self.ann_file = ann_file
self.pklfile_prefix = pklfile_prefix self.pklfile_prefix = pklfile_prefix
self.format_only = format_only
if self.format_only:
assert submission_prefix is not None, 'submission_prefix must be'
'not None when format_only is True, otherwise the result files'
'will be saved to a temp directory which will be cleaned up at'
'the end.'
self.submission_prefix = submission_prefix self.submission_prefix = submission_prefix
self.pred_box_type_3d = pred_box_type_3d self.pred_box_type_3d = pred_box_type_3d
self.default_cam_key = default_cam_key self.default_cam_key = default_cam_key
...@@ -74,68 +86,62 @@ class KittiMetric(BaseMetric): ...@@ -74,68 +86,62 @@ class KittiMetric(BaseMetric):
raise KeyError("metric should be one of 'bbox', 'img_bbox', " raise KeyError("metric should be one of 'bbox', 'img_bbox', "
'but got {metric}.') 'but got {metric}.')
def convert_annos_to_kitti_annos( def convert_annos_to_kitti_annos(self, data_infos: dict) -> list:
self,
data_annos: list,
classes: list = [
'Pedestrian', 'Cyclist', 'Car', 'Van', 'Truck', 'Person_sitting',
'Tram', 'Misc'
]
) -> list:
"""Convert loading annotations to Kitti annotations. """Convert loading annotations to Kitti annotations.
Args: Args:
data_annos (list[dict]): Annotations loaded from ann_file. data_infos (dict): Data infos including metainfo and annotations
classes (list[str]): Classes used in the dataset. Default used loaded from ann_file.
['Pedestrian', 'Cyclist', 'Car', 'Van', 'Truck',
'Person_sitting', 'Tram', 'Misc'].
Returns: Returns:
List[dict]: List of Kitti annotations. List[dict]: List of Kitti annotations.
""" """
assert 'instances' in data_annos[0] data_annos = data_infos['data_list']
for i, annos in enumerate(data_annos): if not self.format_only:
if len(annos['instances']) == 0: cat2label = data_infos['metainfo']['categories']
kitti_annos = { label2cat = dict((v, k) for (k, v) in cat2label.items())
'name': np.array([]), assert 'instances' in data_annos[0]
'truncated': np.array([]), for i, annos in enumerate(data_annos):
'occluded': np.array([]), if len(annos['instances']) == 0:
'alpha': np.array([]), kitti_annos = {
'bbox': np.zeros([0, 4]), 'name': np.array([]),
'dimensions': np.zeros([0, 3]), 'truncated': np.array([]),
'location': np.zeros([0, 3]), 'occluded': np.array([]),
'rotation_y': np.array([]), 'alpha': np.array([]),
'score': np.array([]), 'bbox': np.zeros([0, 4]),
} 'dimensions': np.zeros([0, 3]),
else: 'location': np.zeros([0, 3]),
kitti_annos = { 'rotation_y': np.array([]),
'name': [], 'score': np.array([]),
'truncated': [], }
'occluded': [], else:
'alpha': [], kitti_annos = {
'bbox': [], 'name': [],
'location': [], 'truncated': [],
'dimensions': [], 'occluded': [],
'rotation_y': [], 'alpha': [],
'score': [] 'bbox': [],
} 'location': [],
for instance in annos['instances']: 'dimensions': [],
labels = instance['bbox_label'] 'rotation_y': [],
if labels == -1: 'score': []
kitti_annos['name'].append('DontCare') }
else: for instance in annos['instances']:
kitti_annos['name'].append(classes[labels]) label = instance['bbox_label']
kitti_annos['truncated'].append(instance['truncated']) kitti_annos['name'].append(label2cat[label])
kitti_annos['occluded'].append(instance['occluded']) kitti_annos['truncated'].append(instance['truncated'])
kitti_annos['alpha'].append(instance['alpha']) kitti_annos['occluded'].append(instance['occluded'])
kitti_annos['bbox'].append(instance['bbox']) kitti_annos['alpha'].append(instance['alpha'])
kitti_annos['location'].append(instance['bbox_3d'][:3]) kitti_annos['bbox'].append(instance['bbox'])
kitti_annos['dimensions'].append(instance['bbox_3d'][3:6]) kitti_annos['location'].append(instance['bbox_3d'][:3])
kitti_annos['rotation_y'].append(instance['bbox_3d'][6]) kitti_annos['dimensions'].append(
kitti_annos['score'].append(instance['score']) instance['bbox_3d'][3:6])
for name in kitti_annos: kitti_annos['rotation_y'].append(
kitti_annos[name] = np.array(kitti_annos[name]) instance['bbox_3d'][6])
data_annos[i]['kitti_annos'] = kitti_annos kitti_annos['score'].append(instance['score'])
for name in kitti_annos:
kitti_annos[name] = np.array(kitti_annos[name])
data_annos[i]['kitti_annos'] = kitti_annos
return data_annos return data_annos
def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None:
...@@ -176,24 +182,29 @@ class KittiMetric(BaseMetric): ...@@ -176,24 +182,29 @@ class KittiMetric(BaseMetric):
the metrics, and the values are corresponding results. the metrics, and the values are corresponding results.
""" """
logger: MMLogger = MMLogger.get_current_instance() logger: MMLogger = MMLogger.get_current_instance()
self.classes = self.dataset_meta['CLASSES'] self.classes = self.dataset_meta['classes']
# load annotations # load annotations
pkl_annos = load( pkl_infos = load(self.ann_file, file_client_args=self.file_client_args)
self.ann_file, file_client_args=self.file_client_args)['data_list'] self.data_infos = self.convert_annos_to_kitti_annos(pkl_infos)
self.data_infos = self.convert_annos_to_kitti_annos(pkl_annos)
result_dict, tmp_dir = self.format_results( result_dict, tmp_dir = self.format_results(
results, results,
pklfile_prefix=self.pklfile_prefix, pklfile_prefix=self.pklfile_prefix,
submission_prefix=self.submission_prefix, submission_prefix=self.submission_prefix,
classes=self.classes) classes=self.classes)
metric_dict = {}
if self.format_only:
logger.info('results are saved in '
f'{osp.dirname(self.submission_prefix)}')
return metric_dict
gt_annos = [ gt_annos = [
self.data_infos[result['sample_idx']]['kitti_annos'] self.data_infos[result['sample_idx']]['kitti_annos']
for result in results for result in results
] ]
metric_dict = {}
for metric in self.metrics: for metric in self.metrics:
ap_dict = self.kitti_evaluate( ap_dict = self.kitti_evaluate(
result_dict, result_dict,
...@@ -331,7 +342,7 @@ class KittiMetric(BaseMetric): ...@@ -331,7 +342,7 @@ class KittiMetric(BaseMetric):
mmengine.mkdir_or_exist(submission_prefix) mmengine.mkdir_or_exist(submission_prefix)
det_annos = [] det_annos = []
print('\nConverting prediction to KITTI format') print('\nConverting 3D prediction to KITTI format')
for idx, pred_dicts in enumerate( for idx, pred_dicts in enumerate(
mmengine.track_iter_progress(net_outputs)): mmengine.track_iter_progress(net_outputs)):
annos = [] annos = []
...@@ -457,7 +468,7 @@ class KittiMetric(BaseMetric): ...@@ -457,7 +468,7 @@ class KittiMetric(BaseMetric):
assert len(net_outputs) == len(self.data_infos), \ assert len(net_outputs) == len(self.data_infos), \
'invalid list length of network outputs' 'invalid list length of network outputs'
det_annos = [] det_annos = []
print('\nConverting prediction to KITTI format') print('\nConverting 2D prediction to KITTI format')
for i, bboxes_per_sample in enumerate( for i, bboxes_per_sample in enumerate(
mmengine.track_iter_progress(net_outputs)): mmengine.track_iter_progress(net_outputs)):
annos = [] annos = []
...@@ -526,7 +537,7 @@ class KittiMetric(BaseMetric): ...@@ -526,7 +537,7 @@ class KittiMetric(BaseMetric):
mmengine.mkdir_or_exist(submission_prefix) mmengine.mkdir_or_exist(submission_prefix)
print(f'Saving KITTI submission to {submission_prefix}') print(f'Saving KITTI submission to {submission_prefix}')
for i, anno in enumerate(det_annos): for i, anno in enumerate(det_annos):
sample_idx = self.data_infos[i]['image']['image_idx'] sample_idx = sample_id_list[i]
cur_det_file = f'{submission_prefix}/{sample_idx:06d}.txt' cur_det_file = f'{submission_prefix}/{sample_idx:06d}.txt'
with open(cur_det_file, 'w') as f: with open(cur_det_file, 'w') as f:
bbox = anno['bbox'] bbox = anno['bbox']
......
...@@ -110,7 +110,7 @@ class LyftMetric(BaseMetric): ...@@ -110,7 +110,7 @@ class LyftMetric(BaseMetric):
""" """
logger: MMLogger = MMLogger.get_current_instance() logger: MMLogger = MMLogger.get_current_instance()
classes = self.dataset_meta['CLASSES'] classes = self.dataset_meta['classes']
self.version = self.dataset_meta['version'] self.version = self.dataset_meta['version']
# load annotations # load annotations
......
...@@ -151,7 +151,7 @@ class NuScenesMetric(BaseMetric): ...@@ -151,7 +151,7 @@ class NuScenesMetric(BaseMetric):
""" """
logger: MMLogger = MMLogger.get_current_instance() logger: MMLogger = MMLogger.get_current_instance()
classes = self.dataset_meta['CLASSES'] classes = self.dataset_meta['classes']
self.version = self.dataset_meta['version'] self.version = self.dataset_meta['version']
# load annotations # load annotations
self.data_infos = load( self.data_infos = load(
......
...@@ -36,14 +36,24 @@ class WaymoMetric(KittiMetric): ...@@ -36,14 +36,24 @@ class WaymoMetric(KittiMetric):
names to disambiguate homonymous metrics of different evaluators. names to disambiguate homonymous metrics of different evaluators.
If prefix is not provided in the argument, self.default_prefix If prefix is not provided in the argument, self.default_prefix
will be used instead. Defaults to None. will be used instead. Defaults to None.
convert_kitti_format (bool, optional): Whether convert the reuslts to
kitti format. Now, in order to be compatible with camera-based
methods, defaults to True.
pklfile_prefix (str, optional): The prefix of pkl files, including pklfile_prefix (str, optional): The prefix of pkl files, including
the file path and the prefix of filename, e.g., "a/b/prefix". the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None. If not specified, a temp file will be created. Default: None.
submission_prefix (str, optional): The prefix of submission data. submission_prefix (str, optional): The prefix of submission data.
If not specified, the submission data will not be generated. If not specified, the submission data will not be generated.
Default: None. Default: None.
task: (str, optional): task for 3D detection, if cam, would filter load_type (str, optional): Type of loading mode during training.
the points that outside the image.
- 'frame_based': Load all of the instances in the frame.
- 'mv_image_based': Load all of the instances in the frame and need
to convert to the FOV-based data type to support image-based
detector.
- 'fov_image_base': Only load the instances inside the default cam,
and need to convert to the FOV-based data type to support
image-based detector.
default_cam_key (str, optional): The default camera for lidar to default_cam_key (str, optional): The default camera for lidar to
camear conversion. By default, KITTI: CAM2, Waymo: CAM_FRONT camear conversion. By default, KITTI: CAM2, Waymo: CAM_FRONT
use_pred_sample_idx (bool, optional): In formating results, use the use_pred_sample_idx (bool, optional): In formating results, use the
...@@ -54,6 +64,11 @@ class WaymoMetric(KittiMetric): ...@@ -54,6 +64,11 @@ class WaymoMetric(KittiMetric):
from different ranks during distributed training. Must be 'cpu' or from different ranks during distributed training. Must be 'cpu' or
'gpu'. Defaults to 'cpu'. 'gpu'. Defaults to 'cpu'.
file_client_args (dict): file client for reading gt in waymo format. file_client_args (dict): file client for reading gt in waymo format.
Defaults to ``dict(backend='disk')``.
idx2metainfo (Optional[str], optional): The file path of the metainfo
in waymmo. It stores the mapping from sample_idx to metainfo.
The metainfo must contain the keys: 'idx2contextname' and
'idx2timestamp'. Defaults to None.
""" """
num_cams = 5 num_cams = 5
...@@ -64,19 +79,28 @@ class WaymoMetric(KittiMetric): ...@@ -64,19 +79,28 @@ class WaymoMetric(KittiMetric):
split: str = 'training', split: str = 'training',
metric: Union[str, List[str]] = 'mAP', metric: Union[str, List[str]] = 'mAP',
pcd_limit_range: List[float] = [-85, -85, -5, 85, 85, 5], pcd_limit_range: List[float] = [-85, -85, -5, 85, 85, 5],
convert_kitti_format: bool = True,
prefix: Optional[str] = None, prefix: Optional[str] = None,
pklfile_prefix: str = None, pklfile_prefix: str = None,
submission_prefix: str = None, submission_prefix: str = None,
task='lidar', load_type: str = 'frame_based',
default_cam_key: str = 'CAM_FRONT', default_cam_key: str = 'CAM_FRONT',
use_pred_sample_idx: bool = False, use_pred_sample_idx: bool = False,
collect_device: str = 'cpu', collect_device: str = 'cpu',
file_client_args: dict = dict(backend='disk')): file_client_args: dict = dict(backend='disk'),
idx2metainfo: Optional[str] = None):
self.waymo_bin_file = waymo_bin_file self.waymo_bin_file = waymo_bin_file
self.data_root = data_root self.data_root = data_root
self.split = split self.split = split
self.task = task self.load_type = load_type
self.use_pred_sample_idx = use_pred_sample_idx self.use_pred_sample_idx = use_pred_sample_idx
self.convert_kitti_format = convert_kitti_format
if idx2metainfo is not None:
self.idx2metainfo = mmengine.load(idx2metainfo)
else:
self.idx2metainfo = None
super().__init__( super().__init__(
ann_file=ann_file, ann_file=ann_file,
metric=metric, metric=metric,
...@@ -100,13 +124,15 @@ class WaymoMetric(KittiMetric): ...@@ -100,13 +124,15 @@ class WaymoMetric(KittiMetric):
the metrics, and the values are corresponding results. the metrics, and the values are corresponding results.
""" """
logger: MMLogger = MMLogger.get_current_instance() logger: MMLogger = MMLogger.get_current_instance()
self.classes = self.dataset_meta['CLASSES'] self.classes = self.dataset_meta['classes']
# load annotations # load annotations
self.data_infos = load(self.ann_file)['data_list'] self.data_infos = load(self.ann_file)['data_list']
assert len(results) == len(self.data_infos), \
'invalid list length of network outputs'
# different from kitti, waymo do not need to convert the ann file # different from kitti, waymo do not need to convert the ann file
# handle the mono3d task # handle the mv_image_based load_mode
if self.task == 'mono3d': if self.load_type == 'mv_image_based':
new_data_infos = [] new_data_infos = []
for info in self.data_infos: for info in self.data_infos:
height = info['images'][self.default_cam_key]['height'] height = info['images'][self.default_cam_key]['height']
...@@ -131,7 +157,7 @@ class WaymoMetric(KittiMetric): ...@@ -131,7 +157,7 @@ class WaymoMetric(KittiMetric):
# TODO check if need to modify the sample id # TODO check if need to modify the sample id
# TODO check when will use it except for evaluation. # TODO check when will use it except for evaluation.
camera_info['sample_id'] = info['sample_id'] camera_info['sample_idx'] = info['sample_idx']
new_data_infos.append(camera_info) new_data_infos.append(camera_info)
self.data_infos = new_data_infos self.data_infos = new_data_infos
...@@ -142,8 +168,6 @@ class WaymoMetric(KittiMetric): ...@@ -142,8 +168,6 @@ class WaymoMetric(KittiMetric):
eval_tmp_dir = None eval_tmp_dir = None
pklfile_prefix = self.pklfile_prefix pklfile_prefix = self.pklfile_prefix
# load annotations
result_dict, tmp_dir = self.format_results( result_dict, tmp_dir = self.format_results(
results, results,
pklfile_prefix=pklfile_prefix, pklfile_prefix=pklfile_prefix,
...@@ -186,11 +210,7 @@ class WaymoMetric(KittiMetric): ...@@ -186,11 +210,7 @@ class WaymoMetric(KittiMetric):
f'compute_detection_metrics_main {pklfile_prefix}.bin ' + \ f'compute_detection_metrics_main {pklfile_prefix}.bin ' + \
f'{self.waymo_bin_file}' f'{self.waymo_bin_file}'
print(eval_str) print(eval_str)
ret_bytes = subprocess.check_output( ret_bytes = subprocess.check_output(eval_str, shell=True)
'mmdet3d/evaluation/functional/waymo_utils/' +
f'compute_detection_metrics_main {pklfile_prefix}.bin ' +
f'{self.waymo_bin_file}',
shell=True)
ret_texts = ret_bytes.decode('utf-8') ret_texts = ret_bytes.decode('utf-8')
print_log(ret_texts, logger=logger) print_log(ret_texts, logger=logger)
...@@ -292,7 +312,7 @@ class WaymoMetric(KittiMetric): ...@@ -292,7 +312,7 @@ class WaymoMetric(KittiMetric):
pklfile_prefix: str = None, pklfile_prefix: str = None,
submission_prefix: str = None, submission_prefix: str = None,
classes: List[str] = None): classes: List[str] = None):
"""Format the results to pkl file. """Format the results to bin file.
Args: Args:
results (list[dict]): Testing results of the results (list[dict]): Testing results of the
...@@ -313,9 +333,22 @@ class WaymoMetric(KittiMetric): ...@@ -313,9 +333,22 @@ class WaymoMetric(KittiMetric):
the formatted result, tmp_dir is the temporal directory created the formatted result, tmp_dir is the temporal directory created
for saving json files when jsonfile_prefix is not specified. for saving json files when jsonfile_prefix is not specified.
""" """
result_files, tmp_dir = super().format_results(results, pklfile_prefix, waymo_save_tmp_dir = tempfile.TemporaryDirectory()
submission_prefix, waymo_results_save_dir = waymo_save_tmp_dir.name
classes) waymo_results_final_path = f'{pklfile_prefix}.bin'
if self.convert_kitti_format:
results_kitti_format, tmp_dir = super().format_results(
results, pklfile_prefix, submission_prefix, classes)
final_results = results_kitti_format['pred_instances_3d']
else:
final_results = results
for i, res in enumerate(final_results):
# Actually, `sample_idx` here is the filename without suffix.
# It's for identitying the sample in formating.
res['sample_idx'] = self.data_infos[i]['sample_idx']
res['pred_instances_3d']['bboxes_3d'].limit_yaw(
offset=0.5, period=np.pi * 2)
waymo_root = self.data_root waymo_root = self.data_root
if self.split == 'training': if self.split == 'training':
...@@ -326,21 +359,23 @@ class WaymoMetric(KittiMetric): ...@@ -326,21 +359,23 @@ class WaymoMetric(KittiMetric):
prefix = '2' prefix = '2'
else: else:
raise ValueError('Not supported split value.') raise ValueError('Not supported split value.')
waymo_save_tmp_dir = tempfile.TemporaryDirectory()
waymo_results_save_dir = waymo_save_tmp_dir.name from ..functional.waymo_utils.prediction_to_waymo import \
waymo_results_final_path = f'{pklfile_prefix}.bin' Prediction2Waymo
from ..functional.waymo_utils.prediction_kitti_to_waymo import \ converter = Prediction2Waymo(
KITTI2Waymo final_results,
converter = KITTI2Waymo(
result_files['pred_instances_3d'],
waymo_tfrecords_dir, waymo_tfrecords_dir,
waymo_results_save_dir, waymo_results_save_dir,
waymo_results_final_path, waymo_results_final_path,
prefix, prefix,
file_client_args=self.file_client_args) classes,
file_client_args=self.file_client_args,
from_kitti_format=self.convert_kitti_format,
idx2metainfo=self.idx2metainfo)
converter.convert() converter.convert()
waymo_save_tmp_dir.cleanup() waymo_save_tmp_dir.cleanup()
return result_files, waymo_save_tmp_dir
return final_results, waymo_save_tmp_dir
def merge_multi_view_boxes(self, box_dict_per_frame: List[dict], def merge_multi_view_boxes(self, box_dict_per_frame: List[dict],
cam0_info: dict): cam0_info: dict):
...@@ -379,7 +414,7 @@ class WaymoMetric(KittiMetric): ...@@ -379,7 +414,7 @@ class WaymoMetric(KittiMetric):
torch.from_numpy(box_dict['box3d_lidar']).cuda()) torch.from_numpy(box_dict['box3d_lidar']).cuda())
scores = torch.from_numpy(box_dict['scores']).cuda() scores = torch.from_numpy(box_dict['scores']).cuda()
labels = torch.from_numpy(box_dict['label_preds']).long().cuda() labels = torch.from_numpy(box_dict['label_preds']).long().cuda()
nms_scores = scores.new_zeros(scores.shape[0], len(self.CLASSES) + 1) nms_scores = scores.new_zeros(scores.shape[0], len(self.classes) + 1)
indices = labels.new_tensor(list(range(scores.shape[0]))) indices = labels.new_tensor(list(range(scores.shape[0])))
nms_scores[indices, labels] = scores nms_scores[indices, labels] = scores
lidar_boxes3d_for_nms = xywhr2xyxyr(lidar_boxes3d.bev) lidar_boxes3d_for_nms = xywhr2xyxyr(lidar_boxes3d.bev)
...@@ -397,7 +432,7 @@ class WaymoMetric(KittiMetric): ...@@ -397,7 +432,7 @@ class WaymoMetric(KittiMetric):
lidar2cam = cam0_info['images'][self.default_cam_key]['lidar2img'] lidar2cam = cam0_info['images'][self.default_cam_key]['lidar2img']
lidar2cam = np.array(lidar2cam).astype(np.float32) lidar2cam = np.array(lidar2cam).astype(np.float32)
box_preds_camera = box_preds_lidar.convert_to( box_preds_camera = box_preds_lidar.convert_to(
Box3DMode.CAM, np.linalg.inv(lidar2cam), correct_yaw=True) Box3DMode.CAM, lidar2cam, correct_yaw=True)
# Note: bbox is meaningless in final evaluation, set to 0 # Note: bbox is meaningless in final evaluation, set to 0
merged_box_dict = dict( merged_box_dict = dict(
bbox=np.zeros([box_preds_lidar.tensor.shape[0], 4]), bbox=np.zeros([box_preds_lidar.tensor.shape[0], 4]),
...@@ -405,7 +440,7 @@ class WaymoMetric(KittiMetric): ...@@ -405,7 +440,7 @@ class WaymoMetric(KittiMetric):
box3d_lidar=box_preds_lidar.tensor.numpy(), box3d_lidar=box_preds_lidar.tensor.numpy(),
scores=scores.numpy(), scores=scores.numpy(),
label_preds=labels.numpy(), label_preds=labels.numpy(),
sample_idx=box_dict['sample_id'], sample_idx=box_dict['sample_idx'],
) )
return merged_box_dict return merged_box_dict
...@@ -431,8 +466,6 @@ class WaymoMetric(KittiMetric): ...@@ -431,8 +466,6 @@ class WaymoMetric(KittiMetric):
Returns: Returns:
list[dict]: A list of dictionaries with the kitti format. list[dict]: A list of dictionaries with the kitti format.
""" """
assert len(net_outputs) == len(self.data_infos), \
'invalid list length of network outputs'
if submission_prefix is not None: if submission_prefix is not None:
mmengine.mkdir_or_exist(submission_prefix) mmengine.mkdir_or_exist(submission_prefix)
...@@ -444,7 +477,7 @@ class WaymoMetric(KittiMetric): ...@@ -444,7 +477,7 @@ class WaymoMetric(KittiMetric):
sample_idx = sample_id_list[idx] sample_idx = sample_id_list[idx]
info = self.data_infos[sample_idx] info = self.data_infos[sample_idx]
if self.task == 'mono_det': if self.load_type == 'mv_image_based':
if idx % self.num_cams == 0: if idx % self.num_cams == 0:
box_dict_per_frame = [] box_dict_per_frame = []
cam0_key = list(info['images'].keys())[0] cam0_key = list(info['images'].keys())[0]
...@@ -461,7 +494,7 @@ class WaymoMetric(KittiMetric): ...@@ -461,7 +494,7 @@ class WaymoMetric(KittiMetric):
# If you want to use another camera, please modify it. # If you want to use another camera, please modify it.
image_shape = (info['images'][self.default_cam_key]['height'], image_shape = (info['images'][self.default_cam_key]['height'],
info['images'][self.default_cam_key]['width']) info['images'][self.default_cam_key]['width'])
if self.task == 'mono3d': if self.load_type == 'mv_image_based':
box_dict_per_frame.append(box_dict) box_dict_per_frame.append(box_dict)
if (idx + 1) % self.num_cams != 0: if (idx + 1) % self.num_cams != 0:
continue continue
...@@ -544,7 +577,7 @@ class WaymoMetric(KittiMetric): ...@@ -544,7 +577,7 @@ class WaymoMetric(KittiMetric):
# In waymo validation sample_idx in prediction is 000xxx # In waymo validation sample_idx in prediction is 000xxx
# but in info file it is 1000xxx # but in info file it is 1000xxx
save_sample_idx = box_dict['sample_idx'] save_sample_idx = box_dict['sample_idx']
annos[-1]['sample_id'] = np.array( annos[-1]['sample_idx'] = np.array(
[save_sample_idx] * len(annos[-1]['score']), dtype=np.int64) [save_sample_idx] * len(annos[-1]['score']), dtype=np.int64)
det_annos += annos det_annos += annos
...@@ -561,12 +594,12 @@ class WaymoMetric(KittiMetric): ...@@ -561,12 +594,12 @@ class WaymoMetric(KittiMetric):
def convert_valid_bboxes(self, box_dict: dict, info: dict): def convert_valid_bboxes(self, box_dict: dict, info: dict):
"""Convert the predicted boxes into valid ones. Should handle the """Convert the predicted boxes into valid ones. Should handle the
different task mode (mono3d, mv3d, lidar), separately. load_model (frame_based, mv_image_based, fov_image_based), separately.
Args: Args:
box_dict (dict): Box dictionaries to be converted. box_dict (dict): Box dictionaries to be converted.
- boxes_3d (:obj:`LiDARInstance3DBoxes`): 3D bounding boxes. - bboxes_3d (:obj:`LiDARInstance3DBoxes`): 3D bounding boxes.
- scores_3d (torch.Tensor): Scores of boxes. - scores_3d (torch.Tensor): Scores of boxes.
- labels_3d (torch.Tensor): Class labels of boxes. - labels_3d (torch.Tensor): Class labels of boxes.
info (dict): Data info. info (dict): Data info.
...@@ -587,7 +620,7 @@ class WaymoMetric(KittiMetric): ...@@ -587,7 +620,7 @@ class WaymoMetric(KittiMetric):
box_preds = box_dict['bboxes_3d'] box_preds = box_dict['bboxes_3d']
scores = box_dict['scores_3d'] scores = box_dict['scores_3d']
labels = box_dict['labels_3d'] labels = box_dict['labels_3d']
sample_idx = info['sample_id'] sample_idx = info['sample_idx']
box_preds.limit_yaw(offset=0.5, period=np.pi * 2) box_preds.limit_yaw(offset=0.5, period=np.pi * 2)
if len(box_preds) == 0: if len(box_preds) == 0:
...@@ -598,11 +631,11 @@ class WaymoMetric(KittiMetric): ...@@ -598,11 +631,11 @@ class WaymoMetric(KittiMetric):
scores=np.zeros([0]), scores=np.zeros([0]),
label_preds=np.zeros([0, 4]), label_preds=np.zeros([0, 4]),
sample_idx=sample_idx) sample_idx=sample_idx)
# Here default used 'CAM2' to compute metric. If you want to # Here default used 'CAM_FRONT' to compute metric. If you want to
# use another camera, please modify it. # use another camera, please modify it.
if self.task in ['mv3d', 'lidar']: if self.load_type in ['frame_based', 'fov_image_based']:
cam_key = self.default_cam_key cam_key = self.default_cam_key
elif self.task == 'mono3d': elif self.load_type == 'mv_image_based':
cam_key = list(info['images'].keys())[0] cam_key = list(info['images'].keys())[0]
else: else:
raise NotImplementedError raise NotImplementedError
...@@ -635,12 +668,12 @@ class WaymoMetric(KittiMetric): ...@@ -635,12 +668,12 @@ class WaymoMetric(KittiMetric):
(box_2d_preds[:, 1] < image_shape[0]) & (box_2d_preds[:, 1] < image_shape[0]) &
(box_2d_preds[:, 2] > 0) & (box_2d_preds[:, 3] > 0)) (box_2d_preds[:, 2] > 0) & (box_2d_preds[:, 3] > 0))
# check box_preds_lidar # check box_preds_lidar
if self.task in ['lidar', 'mono3d']: if self.load_type in ['frame_based']:
limit_range = box_preds.tensor.new_tensor(self.pcd_limit_range) limit_range = box_preds.tensor.new_tensor(self.pcd_limit_range)
valid_pcd_inds = ((box_preds_lidar.center > limit_range[:3]) & valid_pcd_inds = ((box_preds_lidar.center > limit_range[:3]) &
(box_preds_lidar.center < limit_range[3:])) (box_preds_lidar.center < limit_range[3:]))
valid_inds = valid_pcd_inds.all(-1) valid_inds = valid_pcd_inds.all(-1)
elif self.task == 'mono3d': if self.load_type in ['mv_image_based', 'fov_image_based']:
valid_inds = valid_cam_inds valid_inds = valid_cam_inds
if valid_inds.sum() > 0: if valid_inds.sum() > 0:
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from mmdet.models.backbones import SSDVGG, HRNet, ResNet, ResNetV1d, ResNeXt from mmdet.models.backbones import SSDVGG, HRNet, ResNet, ResNetV1d, ResNeXt
from .dgcnn import DGCNNBackbone from .dgcnn import DGCNNBackbone
from .dla import DLANet from .dla import DLANet
from .mink_resnet import MinkResNet from .mink_resnet import MinkResNet
......
...@@ -5,28 +5,25 @@ try: ...@@ -5,28 +5,25 @@ try:
import MinkowskiEngine as ME import MinkowskiEngine as ME
from MinkowskiEngine.modules.resnet_block import BasicBlock, Bottleneck from MinkowskiEngine.modules.resnet_block import BasicBlock, Bottleneck
except ImportError: except ImportError:
import warnings
warnings.warn(
'Please follow `getting_started.md` to install MinkowskiEngine.`')
# blocks are used in the static part of MinkResNet # blocks are used in the static part of MinkResNet
BasicBlock, Bottleneck = None, None ME = BasicBlock = Bottleneck = None
import torch.nn as nn import torch.nn as nn
from mmdet3d.models.builder import BACKBONES from mmdet3d.registry import MODELS
@BACKBONES.register_module() @MODELS.register_module()
class MinkResNet(nn.Module): class MinkResNet(nn.Module):
r"""Minkowski ResNet backbone. See `4D Spatio-Temporal ConvNets r"""Minkowski ResNet backbone. See `4D Spatio-Temporal ConvNets
<https://arxiv.org/abs/1904.08755>`_ for more details. <https://arxiv.org/abs/1904.08755>`_ for more details.
Args: Args:
depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
in_channels (ont): Number of input channels, 3 for RGB. in_channels (int): Number of input channels, 3 for RGB.
num_stages (int, optional): Resnet stages. Default: 4. num_stages (int): Resnet stages. Defaults to 4.
pool (bool, optional): Add max pooling after first conv if True. pool (bool): Whether to add max pooling after first conv.
Default: True. Defaults to True.
""" """
arch_settings = { arch_settings = {
18: (BasicBlock, (2, 2, 2, 2)), 18: (BasicBlock, (2, 2, 2, 2)),
...@@ -38,6 +35,10 @@ class MinkResNet(nn.Module): ...@@ -38,6 +35,10 @@ class MinkResNet(nn.Module):
def __init__(self, depth, in_channels, num_stages=4, pool=True): def __init__(self, depth, in_channels, num_stages=4, pool=True):
super(MinkResNet, self).__init__() super(MinkResNet, self).__init__()
if ME is None:
raise ImportError(
'Please follow `getting_started.md` to install MinkowskiEngine.`' # noqa: E501
)
if depth not in self.arch_settings: if depth not in self.arch_settings:
raise KeyError(f'invalid depth {depth} for resnet') raise KeyError(f'invalid depth {depth} for resnet')
assert 4 >= num_stages >= 1 assert 4 >= num_stages >= 1
...@@ -58,7 +59,7 @@ class MinkResNet(nn.Module): ...@@ -58,7 +59,7 @@ class MinkResNet(nn.Module):
for i, num_blocks in enumerate(stage_blocks): for i, num_blocks in enumerate(stage_blocks):
setattr( setattr(
self, f'layer{i}', self, f'layer{i + 1}',
self._make_layer(block, 64 * 2**i, stage_blocks[i], stride=2)) self._make_layer(block, 64 * 2**i, stage_blocks[i], stride=2))
def init_weights(self): def init_weights(self):
...@@ -111,6 +112,6 @@ class MinkResNet(nn.Module): ...@@ -111,6 +112,6 @@ class MinkResNet(nn.Module):
x = self.maxpool(x) x = self.maxpool(x)
outs = [] outs = []
for i in range(self.num_stages): for i in range(self.num_stages):
x = getattr(self, f'layer{i}')(x) x = getattr(self, f'layer{i + 1}')(x)
outs.append(x) outs.append(x)
return outs return outs
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from mmdet3d.registry import MODELS
from mmdet.models.backbones import RegNet from mmdet.models.backbones import RegNet
from mmdet3d.registry import MODELS
@MODELS.register_module() @MODELS.register_module()
class NoStemRegNet(RegNet): class NoStemRegNet(RegNet):
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import math import math
from numbers import Number from numbers import Number
from typing import Dict, List, Optional, Sequence, Tuple, Union from typing import Dict, List, Optional, Sequence, Union
import numpy as np import numpy as np
import torch import torch
from mmcv.ops import Voxelization from mmcv.ops import Voxelization
from mmdet.models import DetDataPreprocessor
from mmengine.model import stack_batch from mmengine.model import stack_batch
from mmengine.utils import is_list_of from mmengine.utils import is_list_of
from torch.nn import functional as F from torch.nn import functional as F
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from mmdet3d.utils import OptConfigType from mmdet3d.utils import OptConfigType
from mmdet.models import DetDataPreprocessor
from .utils import multiview_img_stack_batch from .utils import multiview_img_stack_batch
...@@ -28,24 +28,25 @@ class Det3DDataPreprocessor(DetDataPreprocessor): ...@@ -28,24 +28,25 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
- 1) For image data: - 1) For image data:
- Pad images in inputs to the maximum size of current batch with defined - Pad images in inputs to the maximum size of current batch with defined
``pad_value``. The padding size can be divisible by a defined ``pad_value``. The padding size can be divisible by a defined
``pad_size_divisor`` ``pad_size_divisor``.
- Stack images in inputs to batch_imgs. - Stack images in inputs to batch_imgs.
- Convert images in inputs from bgr to rgb if the shape of input is - Convert images in inputs from bgr to rgb if the shape of input is
(3, H, W). (3, H, W).
- Normalize images in inputs with defined std and mean. - Normalize images in inputs with defined std and mean.
- Do batch augmentations during training. - Do batch augmentations during training.
- 2) For point cloud data: - 2) For point cloud data:
- if no voxelization, directly return list of point cloud data. - If no voxelization, directly return list of point cloud data.
- if voxelization is applied, voxelize point cloud according to - If voxelization is applied, voxelize point cloud according to
``voxel_type`` and obtain ``voxels``. ``voxel_type`` and obtain ``voxels``.
Args: Args:
voxel (bool): Whether to apply voxelziation to point cloud. voxel (bool): Whether to apply voxelization to point cloud.
Defaults to False.
voxel_type (str): Voxelization type. Two voxelization types are voxel_type (str): Voxelization type. Two voxelization types are
provided: 'hard' and 'dynamic', respectively for hard provided: 'hard' and 'dynamic', respectively for hard
voxelization and dynamic voxelization. Defaults to 'hard'. voxelization and dynamic voxelization. Defaults to 'hard'.
voxel_layer (:obj:`ConfigDict`, optional): Voxelization layer voxel_layer (dict or :obj:`ConfigDict`, optional): Voxelization layer
config. Defaults to None. config. Defaults to None.
mean (Sequence[Number], optional): The pixel mean of R, G, B channels. mean (Sequence[Number], optional): The pixel mean of R, G, B channels.
Defaults to None. Defaults to None.
...@@ -54,11 +55,21 @@ class Det3DDataPreprocessor(DetDataPreprocessor): ...@@ -54,11 +55,21 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
pad_size_divisor (int): The size of padded image should be pad_size_divisor (int): The size of padded image should be
divisible by ``pad_size_divisor``. Defaults to 1. divisible by ``pad_size_divisor``. Defaults to 1.
pad_value (Number): The padded pixel value. Defaults to 0. pad_value (Number): The padded pixel value. Defaults to 0.
bgr_to_rgb (bool): whether to convert image from BGR to RGB. pad_mask (bool): Whether to pad instance masks. Defaults to False.
mask_pad_value (int): The padded pixel value for instance masks.
Defaults to 0.
pad_seg (bool): Whether to pad semantic segmentation maps.
Defaults to False.
seg_pad_value (int): The padded pixel value for semantic
segmentation maps. Defaults to 255.
bgr_to_rgb (bool): Whether to convert image from BGR to RGB.
Defaults to False. Defaults to False.
rgb_to_bgr (bool): whether to convert image from RGB to RGB. rgb_to_bgr (bool): Whether to convert image from RGB to BGR.
Defaults to False. Defaults to False.
batch_augments (list[dict], optional): Batch-level augmentations boxtype2tensor (bool): Whether to keep the ``BaseBoxes`` type of
bboxes data or not. Defaults to True.
batch_augments (List[dict], optional): Batch-level augmentations.
Defaults to None.
""" """
def __init__(self, def __init__(self,
...@@ -76,8 +87,8 @@ class Det3DDataPreprocessor(DetDataPreprocessor): ...@@ -76,8 +87,8 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
bgr_to_rgb: bool = False, bgr_to_rgb: bool = False,
rgb_to_bgr: bool = False, rgb_to_bgr: bool = False,
boxtype2tensor: bool = True, boxtype2tensor: bool = True,
batch_augments: Optional[List[dict]] = None): batch_augments: Optional[List[dict]] = None) -> None:
super().__init__( super(Det3DDataPreprocessor, self).__init__(
mean=mean, mean=mean,
std=std, std=std,
pad_size_divisor=pad_size_divisor, pad_size_divisor=pad_size_divisor,
...@@ -94,24 +105,21 @@ class Det3DDataPreprocessor(DetDataPreprocessor): ...@@ -94,24 +105,21 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
if voxel: if voxel:
self.voxel_layer = Voxelization(**voxel_layer) self.voxel_layer = Voxelization(**voxel_layer)
def forward( def forward(self,
self, data: Union[dict, List[dict]],
data: Union[dict, List[dict]], training: bool = False) -> Union[dict, List[dict]]:
training: bool = False """Perform normalization, padding and bgr2rgb conversion based on
) -> Tuple[Union[dict, List[dict]], Optional[list]]:
"""Perform normalization、padding and bgr2rgb conversion based on
``BaseDataPreprocessor``. ``BaseDataPreprocessor``.
Args: Args:
data (dict | List[dict]): data from dataloader. data (dict or List[dict]): Data from dataloader.
The dict contains the whole batch data, when it is The dict contains the whole batch data, when it is
a list[dict], the list indicate test time augmentation. a list[dict], the list indicate test time augmentation.
training (bool): Whether to enable training time augmentation. training (bool): Whether to enable training time augmentation.
Defaults to False. Defaults to False.
Returns: Returns:
Dict | List[Dict]: Data in the same format as the model input. dict or List[dict]: Data in the same format as the model input.
""" """
if isinstance(data, list): if isinstance(data, list):
num_augs = len(data) num_augs = len(data)
...@@ -126,7 +134,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor): ...@@ -126,7 +134,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
return self.simple_process(data, training) return self.simple_process(data, training)
def simple_process(self, data: dict, training: bool = False) -> dict: def simple_process(self, data: dict, training: bool = False) -> dict:
"""Perform normalizationpadding and bgr2rgb conversion for img data """Perform normalization, padding and bgr2rgb conversion for img data
based on ``BaseDataPreprocessor``, and voxelize point cloud if `voxel` based on ``BaseDataPreprocessor``, and voxelize point cloud if `voxel`
is set to be True. is set to be True.
...@@ -188,7 +196,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor): ...@@ -188,7 +196,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
return {'inputs': batch_inputs, 'data_samples': data_samples} return {'inputs': batch_inputs, 'data_samples': data_samples}
def preprocess_img(self, _batch_img): def preprocess_img(self, _batch_img: torch.Tensor) -> torch.Tensor:
# channel transform # channel transform
if self._channel_conversion: if self._channel_conversion:
_batch_img = _batch_img[[2, 1, 0], ...] _batch_img = _batch_img[[2, 1, 0], ...]
...@@ -206,7 +214,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor): ...@@ -206,7 +214,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
return _batch_img return _batch_img
def collate_data(self, data: dict) -> dict: def collate_data(self, data: dict) -> dict:
"""Copying data to the target device and Performs normalization """Copying data to the target device and Performs normalization,
padding and bgr2rgb conversion and stack based on padding and bgr2rgb conversion and stack based on
``BaseDataPreprocessor``. ``BaseDataPreprocessor``.
...@@ -273,7 +281,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor): ...@@ -273,7 +281,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
raise TypeError( raise TypeError(
'Output of `cast_data` should be a list of dict ' 'Output of `cast_data` should be a list of dict '
'or a tuple with inputs and data_samples, but got' 'or a tuple with inputs and data_samples, but got'
f'{type(data)} {data}') f'{type(data)}: {data}')
data['inputs']['imgs'] = batch_imgs data['inputs']['imgs'] = batch_imgs
...@@ -284,14 +292,14 @@ class Det3DDataPreprocessor(DetDataPreprocessor): ...@@ -284,14 +292,14 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
def _get_pad_shape(self, data: dict) -> List[tuple]: def _get_pad_shape(self, data: dict) -> List[tuple]:
"""Get the pad_shape of each image based on data and """Get the pad_shape of each image based on data and
pad_size_divisor.""" pad_size_divisor."""
# rewrite `_get_pad_shape` for obaining image inputs. # rewrite `_get_pad_shape` for obtaining image inputs.
_batch_inputs = data['inputs']['img'] _batch_inputs = data['inputs']['img']
# Process data with `pseudo_collate`. # Process data with `pseudo_collate`.
if is_list_of(_batch_inputs, torch.Tensor): if is_list_of(_batch_inputs, torch.Tensor):
batch_pad_shape = [] batch_pad_shape = []
for ori_input in _batch_inputs: for ori_input in _batch_inputs:
if ori_input.dim() == 4: if ori_input.dim() == 4:
# mean multiivew input, select ont of the # mean multiview input, select one of the
# image to calculate the pad shape # image to calculate the pad shape
ori_input = ori_input[0] ori_input = ori_input[0]
pad_h = int( pad_h = int(
...@@ -316,24 +324,24 @@ class Det3DDataPreprocessor(DetDataPreprocessor): ...@@ -316,24 +324,24 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
batch_pad_shape = [(pad_h, pad_w)] * _batch_inputs.shape[0] batch_pad_shape = [(pad_h, pad_w)] * _batch_inputs.shape[0]
else: else:
raise TypeError('Output of `cast_data` should be a list of dict ' raise TypeError('Output of `cast_data` should be a list of dict '
'or a tuple with inputs and data_samples, but got' 'or a tuple with inputs and data_samples, but got '
f'{type(data)}: {data}') f'{type(data)}: {data}')
return batch_pad_shape return batch_pad_shape
@torch.no_grad() @torch.no_grad()
def voxelize(self, points: List[torch.Tensor]) -> Dict: def voxelize(self, points: List[torch.Tensor]) -> Dict[str, torch.Tensor]:
"""Apply voxelization to point cloud. """Apply voxelization to point cloud.
Args: Args:
points (List[Tensor]): Point cloud in one data batch. points (List[Tensor]): Point cloud in one data batch.
Returns: Returns:
dict[str, Tensor]: Voxelization information. Dict[str, Tensor]: Voxelization information.
- voxels (Tensor): Features of voxels, shape is MXNxC for hard - voxels (Tensor): Features of voxels, shape is MxNxC for hard
voxelization, NXC for dynamic voxelization. voxelization, NxC for dynamic voxelization.
- coors (Tensor): Coordinates of voxels, shape is Nx(1+NDim), - coors (Tensor): Coordinates of voxels, shape is Nx(1+NDim),
where 1 represents the batch index. where 1 represents the batch index.
- num_points (Tensor, optional): Number of points in each voxel. - num_points (Tensor, optional): Number of points in each voxel.
- voxel_centers (Tensor, optional): Centers of voxels. - voxel_centers (Tensor, optional): Centers of voxels.
""" """
...@@ -342,43 +350,38 @@ class Det3DDataPreprocessor(DetDataPreprocessor): ...@@ -342,43 +350,38 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
if self.voxel_type == 'hard': if self.voxel_type == 'hard':
voxels, coors, num_points, voxel_centers = [], [], [], [] voxels, coors, num_points, voxel_centers = [], [], [], []
for res in points: for i, res in enumerate(points):
res_voxels, res_coors, res_num_points = self.voxel_layer(res) res_voxels, res_coors, res_num_points = self.voxel_layer(res)
res_voxel_centers = ( res_voxel_centers = (
res_coors[:, [2, 1, 0]] + 0.5) * res_voxels.new_tensor( res_coors[:, [2, 1, 0]] + 0.5) * res_voxels.new_tensor(
self.voxel_layer.voxel_size) + res_voxels.new_tensor( self.voxel_layer.voxel_size) + res_voxels.new_tensor(
self.voxel_layer.point_cloud_range[0:3]) self.voxel_layer.point_cloud_range[0:3])
res_coors = F.pad(res_coors, (1, 0), mode='constant', value=i)
voxels.append(res_voxels) voxels.append(res_voxels)
coors.append(res_coors) coors.append(res_coors)
num_points.append(res_num_points) num_points.append(res_num_points)
voxel_centers.append(res_voxel_centers) voxel_centers.append(res_voxel_centers)
voxels = torch.cat(voxels, dim=0) voxels = torch.cat(voxels, dim=0)
coors = torch.cat(coors, dim=0)
num_points = torch.cat(num_points, dim=0) num_points = torch.cat(num_points, dim=0)
voxel_centers = torch.cat(voxel_centers, dim=0) voxel_centers = torch.cat(voxel_centers, dim=0)
coors_batch = []
for i, coor in enumerate(coors):
coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
coors_batch.append(coor_pad)
coors_batch = torch.cat(coors_batch, dim=0)
voxel_dict['num_points'] = num_points voxel_dict['num_points'] = num_points
voxel_dict['voxel_centers'] = voxel_centers voxel_dict['voxel_centers'] = voxel_centers
elif self.voxel_type == 'dynamic': elif self.voxel_type == 'dynamic':
coors = [] coors = []
# dynamic voxelization only provide a coors mapping # dynamic voxelization only provide a coors mapping
for res in points: for i, res in enumerate(points):
res_coors = self.voxel_layer(res) res_coors = self.voxel_layer(res)
res_coors = F.pad(res_coors, (1, 0), mode='constant', value=i)
coors.append(res_coors) coors.append(res_coors)
voxels = torch.cat(points, dim=0) voxels = torch.cat(points, dim=0)
coors_batch = [] coors = torch.cat(coors, dim=0)
for i, coor in enumerate(coors):
coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
coors_batch.append(coor_pad)
coors_batch = torch.cat(coors_batch, dim=0)
else: else:
raise ValueError(f'Invalid voxelization type {self.voxel_type}') raise ValueError(f'Invalid voxelization type {self.voxel_type}')
voxel_dict['voxels'] = voxels voxel_dict['voxels'] = voxels
voxel_dict['coors'] = coors_batch voxel_dict['coors'] = coors
return voxel_dict return voxel_dict
...@@ -12,7 +12,7 @@ def multiview_img_stack_batch( ...@@ -12,7 +12,7 @@ def multiview_img_stack_batch(
""" """
Compared to the stack_batch in mmengine.model.utils, Compared to the stack_batch in mmengine.model.utils,
multiview_img_stack_batch further handle the multiview images. multiview_img_stack_batch further handle the multiview images.
see diff of padded_sizes[:, :-2] = 0 vs padded_sizees[:, 0] = 0 in line 47 see diff of padded_sizes[:, :-2] = 0 vs padded_sizes[:, 0] = 0 in line 47
Stack multiple tensors to form a batch and pad the tensor to the max Stack multiple tensors to form a batch and pad the tensor to the max
shape use the right bottom padding mode in these images. If shape use the right bottom padding mode in these images. If
``pad_size_divisor > 0``, add padding to ensure the shape of each dim is ``pad_size_divisor > 0``, add padding to ensure the shape of each dim is
...@@ -23,20 +23,20 @@ def multiview_img_stack_batch( ...@@ -23,20 +23,20 @@ def multiview_img_stack_batch(
pad_size_divisor (int): If ``pad_size_divisor > 0``, add padding pad_size_divisor (int): If ``pad_size_divisor > 0``, add padding
to ensure the shape of each dim is divisible by to ensure the shape of each dim is divisible by
``pad_size_divisor``. This depends on the model, and many ``pad_size_divisor``. This depends on the model, and many
models need to be divisible by 32. Defaults to 1 models need to be divisible by 32. Defaults to 1.
pad_value (int, float): The padding value. Defaults to 0. pad_value (int or float): The padding value. Defaults to 0.
Returns: Returns:
Tensor: The n dim tensor. Tensor: The n dim tensor.
""" """
assert isinstance( assert isinstance(
tensor_list, tensor_list,
list), (f'Expected input type to be list, but got {type(tensor_list)}') list), f'Expected input type to be list, but got {type(tensor_list)}'
assert tensor_list, '`tensor_list` could not be an empty list' assert tensor_list, '`tensor_list` could not be an empty list'
assert len({ assert len({
tensor.ndim tensor.ndim
for tensor in tensor_list for tensor in tensor_list
}) == 1, (f'Expected the dimensions of all tensors must be the same, ' }) == 1, ('Expected the dimensions of all tensors must be the same, '
f'but got {[tensor.ndim for tensor in tensor_list]}') f'but got {[tensor.ndim for tensor in tensor_list]}')
dim = tensor_list[0].dim() dim = tensor_list[0].dim()
...@@ -46,7 +46,7 @@ def multiview_img_stack_batch( ...@@ -46,7 +46,7 @@ def multiview_img_stack_batch(
max_sizes = torch.ceil( max_sizes = torch.ceil(
torch.max(all_sizes, dim=0)[0] / pad_size_divisor) * pad_size_divisor torch.max(all_sizes, dim=0)[0] / pad_size_divisor) * pad_size_divisor
padded_sizes = max_sizes - all_sizes padded_sizes = max_sizes - all_sizes
# The first dim normally means channel, which should not be padded. # The first dim normally means channel, which should not be padded.
padded_sizes[:, :-2] = 0 padded_sizes[:, :-2] = 0
if padded_sizes.sum() == 0: if padded_sizes.sum() == 0:
return torch.stack(tensor_list) return torch.stack(tensor_list)
......
...@@ -41,19 +41,20 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta): ...@@ -41,19 +41,20 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta):
Args: Args:
channels (int): Channels after modules, before conv_seg. channels (int): Channels after modules, before conv_seg.
num_classes (int): Number of classes. num_classes (int): Number of classes.
dropout_ratio (float, optional): Ratio of dropout layer. Default: 0.5. dropout_ratio (float): Ratio of dropout layer. Defaults to 0.5.
conv_cfg (dict, optional): Config of conv layers. conv_cfg (dict): Config of conv layers.
Default: dict(type='Conv1d'). Defaults to dict(type='Conv1d').
norm_cfg (dict, optional): Config of norm layers. norm_cfg (dict): Config of norm layers.
Default: dict(type='BN1d'). Defaults to dict(type='BN1d').
act_cfg (dict, optional): Config of activation layers. act_cfg (dict): Config of activation layers.
Default: dict(type='ReLU'). Defaults to dict(type='ReLU').
loss_decode (dict, optional): Config of decode loss. loss_decode (dict): Config of decode loss.
Default: dict(type='CrossEntropyLoss'). Defaults to dict(type='CrossEntropyLoss').
ignore_index (int, optional): The label index to be ignored. ignore_index (int): The label index to be ignored.
When using masked BCE loss, ignore_index should be set to None. When using masked BCE loss, ignore_index should be set to None.
Default: 255. Defaults to 255.
init_cfg (dict or list[dict], optional): Initialization config dict. init_cfg (dict or list[dict], optional): Initialization config dict.
Defaults to None.
""" """
def __init__(self, def __init__(self,
...@@ -86,8 +87,6 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta): ...@@ -86,8 +87,6 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta):
else: else:
self.dropout = None self.dropout = None
self.fp16_enabled = False
def init_weights(self): def init_weights(self):
"""Initialize weights of classification layer.""" """Initialize weights of classification layer."""
super().init_weights() super().init_weights()
...@@ -105,15 +104,15 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta): ...@@ -105,15 +104,15 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta):
output = self.conv_seg(feat) output = self.conv_seg(feat)
return output return output
def loss(self, inputs: List[Tensor], def loss(self, inputs: List[Tensor], batch_data_samples: SampleList,
batch_data_samples: SampleList) -> dict: train_cfg: ConfigType) -> dict:
"""Forward function for training. """Forward function for training.
Args: Args:
inputs (list[torch.Tensor]): List of multi-level point features. inputs (list[torch.Tensor]): List of multi-level point features.
img_metas (list[dict]): Meta information of each sample. batch_data_samples (List[:obj:`Det3DDataSample`]): The seg
pts_semantic_mask (torch.Tensor): Semantic segmentation masks data samples. It usually includes information such
used if the architecture supports semantic segmentation task. as `metainfo` and `gt_pts_seg`.
train_cfg (dict): The training config. train_cfg (dict): The training config.
Returns: Returns:
...@@ -129,7 +128,9 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta): ...@@ -129,7 +128,9 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta):
Args: Args:
inputs (list[Tensor]): List of multi-level point features. inputs (list[Tensor]): List of multi-level point features.
batch_img_metas (list[dict]): Meta information of each sample. batch_data_samples (List[:obj:`Det3DDataSample`]): The seg
data samples. It usually includes information such
as `metainfo` and `gt_pts_seg`.
test_cfg (dict): The testing config. test_cfg (dict): The testing config.
Returns: Returns:
......
...@@ -5,6 +5,7 @@ from .base_3d_dense_head import Base3DDenseHead ...@@ -5,6 +5,7 @@ from .base_3d_dense_head import Base3DDenseHead
from .base_conv_bbox_head import BaseConvBboxHead from .base_conv_bbox_head import BaseConvBboxHead
from .base_mono3d_dense_head import BaseMono3DDenseHead from .base_mono3d_dense_head import BaseMono3DDenseHead
from .centerpoint_head import CenterHead from .centerpoint_head import CenterHead
from .fcaf3d_head import FCAF3DHead
from .fcos_mono3d_head import FCOSMono3DHead from .fcos_mono3d_head import FCOSMono3DHead
from .free_anchor3d_head import FreeAnchor3DHead from .free_anchor3d_head import FreeAnchor3DHead
from .groupfree3d_head import GroupFree3DHead from .groupfree3d_head import GroupFree3DHead
...@@ -22,5 +23,5 @@ __all__ = [ ...@@ -22,5 +23,5 @@ __all__ = [
'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead', 'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead',
'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead', 'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead',
'GroupFree3DHead', 'PointRPNHead', 'SMOKEMono3DHead', 'PGDHead', 'GroupFree3DHead', 'PointRPNHead', 'SMOKEMono3DHead', 'PGDHead',
'MonoFlexHead', 'Base3DDenseHead' 'MonoFlexHead', 'Base3DDenseHead', 'FCAF3DHead'
] ]
...@@ -4,6 +4,7 @@ from typing import List, Tuple ...@@ -4,6 +4,7 @@ from typing import List, Tuple
import numpy as np import numpy as np
import torch import torch
from mmdet.models.utils import multi_apply
from torch import Tensor from torch import Tensor
from torch import nn as nn from torch import nn as nn
...@@ -12,7 +13,6 @@ from mmdet3d.models.test_time_augs import merge_aug_bboxes_3d ...@@ -12,7 +13,6 @@ from mmdet3d.models.test_time_augs import merge_aug_bboxes_3d
from mmdet3d.registry import MODELS, TASK_UTILS from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet3d.utils.typing import (ConfigType, InstanceList, OptConfigType, from mmdet3d.utils.typing import (ConfigType, InstanceList, OptConfigType,
OptInstanceList) OptInstanceList)
from mmdet.models.utils import multi_apply
from .base_3d_dense_head import Base3DDenseHead from .base_3d_dense_head import Base3DDenseHead
from .train_mixins import AnchorTrainMixin from .train_mixins import AnchorTrainMixin
......
...@@ -4,13 +4,13 @@ from typing import Any, List, Sequence, Tuple, Union ...@@ -4,13 +4,13 @@ from typing import Any, List, Sequence, Tuple, Union
import torch import torch
from mmcv.cnn import ConvModule from mmcv.cnn import ConvModule
from mmdet.models.utils import multi_apply
from mmengine.model import bias_init_with_prob, normal_init from mmengine.model import bias_init_with_prob, normal_init
from torch import Tensor from torch import Tensor
from torch import nn as nn from torch import nn as nn
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from mmdet3d.utils import ConfigType, InstanceList, OptConfigType from mmdet3d.utils import ConfigType, InstanceList, OptConfigType
from mmdet.models.utils import multi_apply
from .base_mono3d_dense_head import BaseMono3DDenseHead from .base_mono3d_dense_head import BaseMono3DDenseHead
......
...@@ -4,6 +4,7 @@ from typing import List, Optional, Tuple ...@@ -4,6 +4,7 @@ from typing import List, Optional, Tuple
import numpy as np import numpy as np
import torch import torch
from mmdet.models.utils import select_single_mlvl
from mmengine.config import ConfigDict from mmengine.config import ConfigDict
from mmengine.model import BaseModule, constant_init from mmengine.model import BaseModule, constant_init
from mmengine.structures import InstanceData from mmengine.structures import InstanceData
...@@ -13,7 +14,6 @@ from mmdet3d.models.layers import box3d_multiclass_nms ...@@ -13,7 +14,6 @@ from mmdet3d.models.layers import box3d_multiclass_nms
from mmdet3d.structures import limit_period, xywhr2xyxyr from mmdet3d.structures import limit_period, xywhr2xyxyr
from mmdet3d.structures.det3d_data_sample import SampleList from mmdet3d.structures.det3d_data_sample import SampleList
from mmdet3d.utils.typing import InstanceList, OptMultiConfig from mmdet3d.utils.typing import InstanceList, OptMultiConfig
from mmdet.models.utils import select_single_mlvl
class Base3DDenseHead(BaseModule, metaclass=ABCMeta): class Base3DDenseHead(BaseModule, metaclass=ABCMeta):
......
...@@ -4,6 +4,7 @@ from typing import Dict, List, Optional, Tuple, Union ...@@ -4,6 +4,7 @@ from typing import Dict, List, Optional, Tuple, Union
import torch import torch
from mmcv.cnn import ConvModule, build_conv_layer from mmcv.cnn import ConvModule, build_conv_layer
from mmdet.models.utils import multi_apply
from mmengine.model import BaseModule from mmengine.model import BaseModule
from mmengine.structures import InstanceData from mmengine.structures import InstanceData
from torch import Tensor, nn from torch import Tensor, nn
...@@ -12,7 +13,6 @@ from mmdet3d.models.utils import (clip_sigmoid, draw_heatmap_gaussian, ...@@ -12,7 +13,6 @@ from mmdet3d.models.utils import (clip_sigmoid, draw_heatmap_gaussian,
gaussian_radius) gaussian_radius)
from mmdet3d.registry import MODELS, TASK_UTILS from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet3d.structures import Det3DDataSample, xywhr2xyxyr from mmdet3d.structures import Det3DDataSample, xywhr2xyxyr
from mmdet.models.utils import multi_apply
from .. import builder from .. import builder
from ..layers import circle_nms, nms_bev from ..layers import circle_nms, nms_bev
......
# Copyright (c) OpenMMLab. All rights reserved.
# Adapted from https://github.com/SamsungLabs/fcaf3d/blob/master/mmdet3d/models/dense_heads/fcaf3d_neck_with_head.py # noqa
from typing import List, Optional, Tuple
try:
import MinkowskiEngine as ME
from MinkowskiEngine import SparseTensor
except ImportError:
# Please follow getting_started.md to install MinkowskiEngine.
ME = SparseTensor = None
pass
import torch
from mmcv.cnn import Scale
from mmcv.ops import nms3d, nms3d_normal
from mmdet.utils import reduce_mean
from mmengine.model import bias_init_with_prob
from mmengine.structures import InstanceData
from torch import Tensor, nn
from mmdet3d.registry import MODELS
from mmdet3d.structures import BaseInstance3DBoxes, rotation_3d_in_axis
from mmdet3d.utils import InstanceList, OptInstanceList
from .base_3d_dense_head import Base3DDenseHead
@MODELS.register_module()
class FCAF3DHead(Base3DDenseHead):
r"""Bbox head of `FCAF3D <https://arxiv.org/abs/2112.00322>`_.
Actually here we store both the sparse 3D FPN and a head. The neck and
the head can not be simply separated as pruning score on the i-th level
of FPN requires classification scores from i+1-th level of the head.
Args:
num_classes (int): Number of classes.
in_channels (int): Number of channels in input tensors.
out_channels (int): Number of channels in the neck output tensors.
num_reg_outs (int): Number of regression layer channels.
voxel_size (float): Voxel size in meters.
pts_prune_threshold (int): Pruning threshold on each feature level.
pts_assign_threshold (int): Box to location assigner parameter.
Assigner selects the maximum feature level with more locations
inside the box than pts_assign_threshold.
pts_center_threshold (int): Box to location assigner parameter.
After feature level for the box is determined, assigner selects
pts_center_threshold locations closest to the box center.
center_loss (dict): Config of centerness loss. Defaults to
dict(type='mmdet.CrossEntropyLoss', use_sigmoid=True).
bbox_loss (dict): Config of bbox loss. Defaults to
dict(type='AxisAlignedIoULoss').
cls_loss (dict): Config of classification loss. Defaults to
dict = dict(type='mmdet.FocalLoss').
train_cfg (dict, optional): Config for train stage. Defaults to None.
test_cfg (dict, optional): Config for test stage. Defaults to None.
init_cfg (dict, optional): Config for weight initialization.
Defaults to None.
"""
def __init__(self,
num_classes: int,
in_channels: int,
out_channels: int,
num_reg_outs: int,
voxel_size: float,
pts_prune_threshold: int,
pts_assign_threshold: int,
pts_center_threshold: int,
center_loss: dict = dict(
type='mmdet.CrossEntropyLoss', use_sigmoid=True),
bbox_loss: dict = dict(type='AxisAlignedIoULoss'),
cls_loss: dict = dict(type='mmdet.FocalLoss'),
train_cfg: Optional[dict] = None,
test_cfg: Optional[dict] = None,
init_cfg: Optional[dict] = None):
super(FCAF3DHead, self).__init__(init_cfg)
if ME is None:
raise ImportError(
'Please follow `getting_started.md` to install MinkowskiEngine.`' # noqa: E501
)
self.voxel_size = voxel_size
self.pts_prune_threshold = pts_prune_threshold
self.pts_assign_threshold = pts_assign_threshold
self.pts_center_threshold = pts_center_threshold
self.center_loss = MODELS.build(center_loss)
self.bbox_loss = MODELS.build(bbox_loss)
self.cls_loss = MODELS.build(cls_loss)
self.train_cfg = train_cfg
self.test_cfg = test_cfg
self._init_layers(in_channels, out_channels, num_reg_outs, num_classes)
@staticmethod
def _make_block(in_channels: int, out_channels: int) -> nn.Module:
"""Construct Conv-Norm-Act block.
Args:
in_channels (int): Number of input channels.
out_channels (int): Number of output channels.
Returns:
torch.nn.Module: With corresponding layers.
"""
return nn.Sequential(
ME.MinkowskiConvolution(
in_channels, out_channels, kernel_size=3, dimension=3),
ME.MinkowskiBatchNorm(out_channels), ME.MinkowskiELU())
@staticmethod
def _make_up_block(in_channels: int, out_channels: int) -> nn.Module:
"""Construct DeConv-Norm-Act-Conv-Norm-Act block.
Args:
in_channels (int): Number of input channels.
out_channels (int): Number of output channels.
Returns:
torch.nn.Module: With corresponding layers.
"""
return nn.Sequential(
ME.MinkowskiGenerativeConvolutionTranspose(
in_channels,
out_channels,
kernel_size=2,
stride=2,
dimension=3), ME.MinkowskiBatchNorm(out_channels),
ME.MinkowskiELU(),
ME.MinkowskiConvolution(
out_channels, out_channels, kernel_size=3, dimension=3),
ME.MinkowskiBatchNorm(out_channels), ME.MinkowskiELU())
def _init_layers(self, in_channels: Tuple[int], out_channels: int,
num_reg_outs: int, num_classes: int):
"""Initialize layers.
Args:
in_channels (tuple[int]): Number of channels in input tensors.
out_channels (int): Number of channels in the neck output tensors.
num_reg_outs (int): Number of regression layer channels.
num_classes (int): Number of classes.
"""
# neck layers
self.pruning = ME.MinkowskiPruning()
for i in range(len(in_channels)):
if i > 0:
self.__setattr__(
f'up_block_{i}',
self._make_up_block(in_channels[i], in_channels[i - 1]))
self.__setattr__(f'out_block_{i}',
self._make_block(in_channels[i], out_channels))
# head layers
self.conv_center = ME.MinkowskiConvolution(
out_channels, 1, kernel_size=1, dimension=3)
self.conv_reg = ME.MinkowskiConvolution(
out_channels, num_reg_outs, kernel_size=1, dimension=3)
self.conv_cls = ME.MinkowskiConvolution(
out_channels, num_classes, kernel_size=1, bias=True, dimension=3)
self.scales = nn.ModuleList(
[Scale(1.) for _ in range(len(in_channels))])
def init_weights(self):
"""Initialize weights."""
nn.init.normal_(self.conv_center.kernel, std=.01)
nn.init.normal_(self.conv_reg.kernel, std=.01)
nn.init.normal_(self.conv_cls.kernel, std=.01)
nn.init.constant_(self.conv_cls.bias, bias_init_with_prob(.01))
def forward(self, x: List[Tensor]) -> Tuple[List[Tensor], ...]:
"""Forward pass.
Args:
x (list[Tensor]): Features from the backbone.
Returns:
Tuple[List[Tensor], ...]: Predictions of the head.
"""
center_preds, bbox_preds, cls_preds, points = [], [], [], []
inputs = x
x = inputs[-1]
prune_score = None
for i in range(len(inputs) - 1, -1, -1):
if i < len(inputs) - 1:
x = self.__getattr__(f'up_block_{i + 1}')(x)
x = inputs[i] + x
x = self._prune(x, prune_score)
out = self.__getattr__(f'out_block_{i}')(x)
center_pred, bbox_pred, cls_pred, point, prune_score = \
self._forward_single(out, self.scales[i])
center_preds.append(center_pred)
bbox_preds.append(bbox_pred)
cls_preds.append(cls_pred)
points.append(point)
return center_preds[::-1], bbox_preds[::-1], cls_preds[::-1], \
points[::-1]
def _prune(self, x: SparseTensor, scores: SparseTensor) -> SparseTensor:
"""Prunes the tensor by score thresholding.
Args:
x (SparseTensor): Tensor to be pruned.
scores (SparseTensor): Scores for thresholding.
Returns:
SparseTensor: Pruned tensor.
"""
with torch.no_grad():
coordinates = x.C.float()
interpolated_scores = scores.features_at_coordinates(coordinates)
prune_mask = interpolated_scores.new_zeros(
(len(interpolated_scores)), dtype=torch.bool)
for permutation in x.decomposition_permutations:
score = interpolated_scores[permutation]
mask = score.new_zeros((len(score)), dtype=torch.bool)
topk = min(len(score), self.pts_prune_threshold)
ids = torch.topk(score.squeeze(1), topk, sorted=False).indices
mask[ids] = True
prune_mask[permutation[mask]] = True
x = self.pruning(x, prune_mask)
return x
def _forward_single(self, x: SparseTensor,
scale: Scale) -> Tuple[Tensor, ...]:
"""Forward pass per level.
Args:
x (SparseTensor): Per level neck output tensor.
scale (mmcv.cnn.Scale): Per level multiplication weight.
Returns:
tuple[Tensor]: Per level head predictions.
"""
center_pred = self.conv_center(x).features
scores = self.conv_cls(x)
cls_pred = scores.features
prune_scores = ME.SparseTensor(
scores.features.max(dim=1, keepdim=True).values,
coordinate_map_key=scores.coordinate_map_key,
coordinate_manager=scores.coordinate_manager)
reg_final = self.conv_reg(x).features
reg_distance = torch.exp(scale(reg_final[:, :6]))
reg_angle = reg_final[:, 6:]
bbox_pred = torch.cat((reg_distance, reg_angle), dim=1)
center_preds, bbox_preds, cls_preds, points = [], [], [], []
for permutation in x.decomposition_permutations:
center_preds.append(center_pred[permutation])
bbox_preds.append(bbox_pred[permutation])
cls_preds.append(cls_pred[permutation])
points = x.decomposed_coordinates
for i in range(len(points)):
points[i] = points[i] * self.voxel_size
return center_preds, bbox_preds, cls_preds, points, prune_scores
def _loss_by_feat_single(self, center_preds: List[Tensor],
bbox_preds: List[Tensor], cls_preds: List[Tensor],
points: List[Tensor],
gt_bboxes: BaseInstance3DBoxes, gt_labels: Tensor,
input_meta: dict) -> Tuple[Tensor, ...]:
"""Loss function of single sample.
Args:
center_preds (list[Tensor]): Centerness predictions for all levels.
bbox_preds (list[Tensor]): Bbox predictions for all levels.
cls_preds (list[Tensor]): Classification predictions for all
levels.
points (list[Tensor]): Final location coordinates for all levels.
gt_bboxes (:obj:`BaseInstance3DBoxes`): Ground truth boxes.
gt_labels (Tensor): Ground truth labels.
input_meta (dict): Scene meta info.
Returns:
tuple[Tensor, ...]: Centerness, bbox, and classification loss
values.
"""
center_targets, bbox_targets, cls_targets = self.get_targets(
points, gt_bboxes, gt_labels)
center_preds = torch.cat(center_preds)
bbox_preds = torch.cat(bbox_preds)
cls_preds = torch.cat(cls_preds)
points = torch.cat(points)
# cls loss
pos_inds = torch.nonzero(cls_targets >= 0).squeeze(1)
n_pos = points.new_tensor(len(pos_inds))
n_pos = max(reduce_mean(n_pos), 1.)
cls_loss = self.cls_loss(cls_preds, cls_targets, avg_factor=n_pos)
# bbox and centerness losses
pos_center_preds = center_preds[pos_inds]
pos_bbox_preds = bbox_preds[pos_inds]
pos_center_targets = center_targets[pos_inds].unsqueeze(1)
pos_bbox_targets = bbox_targets[pos_inds]
# reduce_mean is outside if / else block to prevent deadlock
center_denorm = max(
reduce_mean(pos_center_targets.sum().detach()), 1e-6)
if len(pos_inds) > 0:
pos_points = points[pos_inds]
center_loss = self.center_loss(
pos_center_preds, pos_center_targets, avg_factor=n_pos)
bbox_loss = self.bbox_loss(
self._bbox_to_loss(
self._bbox_pred_to_bbox(pos_points, pos_bbox_preds)),
self._bbox_to_loss(pos_bbox_targets),
weight=pos_center_targets.squeeze(1),
avg_factor=center_denorm)
else:
center_loss = pos_center_preds.sum()
bbox_loss = pos_bbox_preds.sum()
return center_loss, bbox_loss, cls_loss
def loss_by_feat(self,
center_preds: List[List[Tensor]],
bbox_preds: List[List[Tensor]],
cls_preds: List[List[Tensor]],
points: List[List[Tensor]],
batch_gt_instances_3d: InstanceList,
batch_input_metas: List[dict],
batch_gt_instances_ignore: OptInstanceList = None,
**kwargs) -> dict:
"""Loss function about feature.
Args:
center_preds (list[list[Tensor]]): Centerness predictions for
all scenes. The first list contains predictions from different
levels. The second list contains predictions in a mini-batch.
bbox_preds (list[list[Tensor]]): Bbox predictions for all scenes.
The first list contains predictions from different
levels. The second list contains predictions in a mini-batch.
cls_preds (list[list[Tensor]]): Classification predictions for all
scenes. The first list contains predictions from different
levels. The second list contains predictions in a mini-batch.
points (list[list[Tensor]]): Final location coordinates for all
scenes. The first list contains predictions from different
levels. The second list contains predictions in a mini-batch.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes_3d``、`
`labels_3d``、``depths``、``centers_2d`` and attributes.
batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
batch_gt_instances_ignore (list[:obj:`InstanceData`], optional):
Batch of gt_instances_ignore. It includes ``bboxes`` attribute
data that is ignored during training and testing.
Defaults to None.
Returns:
dict: Centerness, bbox, and classification losses.
"""
center_losses, bbox_losses, cls_losses = [], [], []
for i in range(len(batch_input_metas)):
center_loss, bbox_loss, cls_loss = self._loss_by_feat_single(
center_preds=[x[i] for x in center_preds],
bbox_preds=[x[i] for x in bbox_preds],
cls_preds=[x[i] for x in cls_preds],
points=[x[i] for x in points],
input_meta=batch_input_metas[i],
gt_bboxes=batch_gt_instances_3d[i].bboxes_3d,
gt_labels=batch_gt_instances_3d[i].labels_3d)
center_losses.append(center_loss)
bbox_losses.append(bbox_loss)
cls_losses.append(cls_loss)
return dict(
center_loss=torch.mean(torch.stack(center_losses)),
bbox_loss=torch.mean(torch.stack(bbox_losses)),
cls_loss=torch.mean(torch.stack(cls_losses)))
def _predict_by_feat_single(self, center_preds: List[Tensor],
bbox_preds: List[Tensor],
cls_preds: List[Tensor], points: List[Tensor],
input_meta: dict) -> InstanceData:
"""Generate boxes for single sample.
Args:
center_preds (list[Tensor]): Centerness predictions for all levels.
bbox_preds (list[Tensor]): Bbox predictions for all levels.
cls_preds (list[Tensor]): Classification predictions for all
levels.
points (list[Tensor]): Final location coordinates for all levels.
input_meta (dict): Scene meta info.
Returns:
InstanceData: Predicted bounding boxes, scores and labels.
"""
mlvl_bboxes, mlvl_scores = [], []
for center_pred, bbox_pred, cls_pred, point in zip(
center_preds, bbox_preds, cls_preds, points):
scores = cls_pred.sigmoid() * center_pred.sigmoid()
max_scores, _ = scores.max(dim=1)
if len(scores) > self.test_cfg.nms_pre > 0:
_, ids = max_scores.topk(self.test_cfg.nms_pre)
bbox_pred = bbox_pred[ids]
scores = scores[ids]
point = point[ids]
bboxes = self._bbox_pred_to_bbox(point, bbox_pred)
mlvl_bboxes.append(bboxes)
mlvl_scores.append(scores)
bboxes = torch.cat(mlvl_bboxes)
scores = torch.cat(mlvl_scores)
bboxes, scores, labels = self._single_scene_multiclass_nms(
bboxes, scores, input_meta)
bboxes = input_meta['box_type_3d'](
bboxes,
box_dim=bboxes.shape[1],
with_yaw=bboxes.shape[1] == 7,
origin=(.5, .5, .5))
results = InstanceData()
results.bboxes_3d = bboxes
results.scores_3d = scores
results.labels_3d = labels
return results
def predict_by_feat(self, center_preds: List[List[Tensor]],
bbox_preds: List[List[Tensor]], cls_preds,
points: List[List[Tensor]],
batch_input_metas: List[dict],
**kwargs) -> List[InstanceData]:
"""Generate boxes for all scenes.
Args:
center_preds (list[list[Tensor]]): Centerness predictions for
all scenes.
bbox_preds (list[list[Tensor]]): Bbox predictions for all scenes.
cls_preds (list[list[Tensor]]): Classification predictions for all
scenes.
points (list[list[Tensor]]): Final location coordinates for all
scenes.
batch_input_metas (list[dict]): Meta infos for all scenes.
Returns:
list[InstanceData]: Predicted bboxes, scores, and labels for
all scenes.
"""
results = []
for i in range(len(batch_input_metas)):
result = self._predict_by_feat_single(
center_preds=[x[i] for x in center_preds],
bbox_preds=[x[i] for x in bbox_preds],
cls_preds=[x[i] for x in cls_preds],
points=[x[i] for x in points],
input_meta=batch_input_metas[i])
results.append(result)
return results
@staticmethod
def _bbox_to_loss(bbox: Tensor) -> Tensor:
"""Transform box to the axis-aligned or rotated iou loss format.
Args:
bbox (Tensor): 3D box of shape (N, 6) or (N, 7).
Returns:
Tensor: Transformed 3D box of shape (N, 6) or (N, 7).
"""
# rotated iou loss accepts (x, y, z, w, h, l, heading)
if bbox.shape[-1] != 6:
return bbox
# axis-aligned case: x, y, z, w, h, l -> x1, y1, z1, x2, y2, z2
return torch.stack(
(bbox[..., 0] - bbox[..., 3] / 2, bbox[..., 1] - bbox[..., 4] / 2,
bbox[..., 2] - bbox[..., 5] / 2, bbox[..., 0] + bbox[..., 3] / 2,
bbox[..., 1] + bbox[..., 4] / 2, bbox[..., 2] + bbox[..., 5] / 2),
dim=-1)
@staticmethod
def _bbox_pred_to_bbox(points: Tensor, bbox_pred: Tensor) -> Tensor:
"""Transform predicted bbox parameters to bbox.
Args:
points (Tensor): Final locations of shape (N, 3)
bbox_pred (Tensor): Predicted bbox parameters of shape (N, 6)
or (N, 8).
Returns:
Tensor: Transformed 3D box of shape (N, 6) or (N, 7).
"""
if bbox_pred.shape[0] == 0:
return bbox_pred
x_center = points[:, 0] + (bbox_pred[:, 1] - bbox_pred[:, 0]) / 2
y_center = points[:, 1] + (bbox_pred[:, 3] - bbox_pred[:, 2]) / 2
z_center = points[:, 2] + (bbox_pred[:, 5] - bbox_pred[:, 4]) / 2
# dx_min, dx_max, dy_min, dy_max, dz_min, dz_max -> x, y, z, w, l, h
base_bbox = torch.stack([
x_center,
y_center,
z_center,
bbox_pred[:, 0] + bbox_pred[:, 1],
bbox_pred[:, 2] + bbox_pred[:, 3],
bbox_pred[:, 4] + bbox_pred[:, 5],
], -1)
# axis-aligned case
if bbox_pred.shape[1] == 6:
return base_bbox
# rotated case: ..., sin(2a)ln(q), cos(2a)ln(q)
scale = bbox_pred[:, 0] + bbox_pred[:, 1] + \
bbox_pred[:, 2] + bbox_pred[:, 3]
q = torch.exp(
torch.sqrt(
torch.pow(bbox_pred[:, 6], 2) + torch.pow(bbox_pred[:, 7], 2)))
alpha = 0.5 * torch.atan2(bbox_pred[:, 6], bbox_pred[:, 7])
return torch.stack(
(x_center, y_center, z_center, scale / (1 + q), scale /
(1 + q) * q, bbox_pred[:, 5] + bbox_pred[:, 4], alpha),
dim=-1)
@staticmethod
def _get_face_distances(points: Tensor, boxes: Tensor) -> Tensor:
"""Calculate distances from point to box faces.
Args:
points (Tensor): Final locations of shape (N_points, N_boxes, 3).
boxes (Tensor): 3D boxes of shape (N_points, N_boxes, 7)
Returns:
Tensor: Face distances of shape (N_points, N_boxes, 6),
(dx_min, dx_max, dy_min, dy_max, dz_min, dz_max).
"""
shift = torch.stack(
(points[..., 0] - boxes[..., 0], points[..., 1] - boxes[..., 1],
points[..., 2] - boxes[..., 2]),
dim=-1).permute(1, 0, 2)
shift = rotation_3d_in_axis(
shift, -boxes[0, :, 6], axis=2).permute(1, 0, 2)
centers = boxes[..., :3] + shift
dx_min = centers[..., 0] - boxes[..., 0] + boxes[..., 3] / 2
dx_max = boxes[..., 0] + boxes[..., 3] / 2 - centers[..., 0]
dy_min = centers[..., 1] - boxes[..., 1] + boxes[..., 4] / 2
dy_max = boxes[..., 1] + boxes[..., 4] / 2 - centers[..., 1]
dz_min = centers[..., 2] - boxes[..., 2] + boxes[..., 5] / 2
dz_max = boxes[..., 2] + boxes[..., 5] / 2 - centers[..., 2]
return torch.stack((dx_min, dx_max, dy_min, dy_max, dz_min, dz_max),
dim=-1)
@staticmethod
def _get_centerness(face_distances: Tensor) -> Tensor:
"""Compute point centerness w.r.t containing box.
Args:
face_distances (Tensor): Face distances of shape (B, N, 6),
(dx_min, dx_max, dy_min, dy_max, dz_min, dz_max).
Returns:
Tensor: Centerness of shape (B, N).
"""
x_dims = face_distances[..., [0, 1]]
y_dims = face_distances[..., [2, 3]]
z_dims = face_distances[..., [4, 5]]
centerness_targets = x_dims.min(dim=-1)[0] / x_dims.max(dim=-1)[0] * \
y_dims.min(dim=-1)[0] / y_dims.max(dim=-1)[0] * \
z_dims.min(dim=-1)[0] / z_dims.max(dim=-1)[0]
return torch.sqrt(centerness_targets)
@torch.no_grad()
def get_targets(self, points: Tensor, gt_bboxes: BaseInstance3DBoxes,
gt_labels: Tensor) -> Tuple[Tensor, ...]:
"""Compute targets for final locations for a single scene.
Args:
points (list[Tensor]): Final locations for all levels.
gt_bboxes (BaseInstance3DBoxes): Ground truth boxes.
gt_labels (Tensor): Ground truth labels.
Returns:
tuple[Tensor, ...]: Centerness, bbox and classification
targets for all locations.
"""
float_max = points[0].new_tensor(1e8)
n_levels = len(points)
levels = torch.cat([
points[i].new_tensor(i).expand(len(points[i]))
for i in range(len(points))
])
points = torch.cat(points)
gt_bboxes = gt_bboxes.to(points.device)
n_points = len(points)
n_boxes = len(gt_bboxes)
volumes = gt_bboxes.volume.unsqueeze(0).expand(n_points, n_boxes)
# condition 1: point inside box
boxes = torch.cat((gt_bboxes.gravity_center, gt_bboxes.tensor[:, 3:]),
dim=1)
boxes = boxes.expand(n_points, n_boxes, 7)
points = points.unsqueeze(1).expand(n_points, n_boxes, 3)
face_distances = self._get_face_distances(points, boxes)
inside_box_condition = face_distances.min(dim=-1).values > 0
# condition 2: positive points per level >= limit
# calculate positive points per scale
n_pos_points_per_level = []
for i in range(n_levels):
n_pos_points_per_level.append(
torch.sum(inside_box_condition[levels == i], dim=0))
# find best level
n_pos_points_per_level = torch.stack(n_pos_points_per_level, dim=0)
lower_limit_mask = n_pos_points_per_level < self.pts_assign_threshold
lower_index = torch.argmax(lower_limit_mask.int(), dim=0) - 1
lower_index = torch.where(lower_index < 0, 0, lower_index)
all_upper_limit_mask = torch.all(
torch.logical_not(lower_limit_mask), dim=0)
best_level = torch.where(all_upper_limit_mask, n_levels - 1,
lower_index)
# keep only points with best level
best_level = best_level.expand(n_points, n_boxes)
levels = torch.unsqueeze(levels, 1).expand(n_points, n_boxes)
level_condition = best_level == levels
# condition 3: limit topk points per box by centerness
centerness = self._get_centerness(face_distances)
centerness = torch.where(inside_box_condition, centerness,
torch.ones_like(centerness) * -1)
centerness = torch.where(level_condition, centerness,
torch.ones_like(centerness) * -1)
top_centerness = torch.topk(
centerness,
min(self.pts_center_threshold + 1, len(centerness)),
dim=0).values[-1]
topk_condition = centerness > top_centerness.unsqueeze(0)
# condition 4: min volume box per point
volumes = torch.where(inside_box_condition, volumes, float_max)
volumes = torch.where(level_condition, volumes, float_max)
volumes = torch.where(topk_condition, volumes, float_max)
min_volumes, min_inds = volumes.min(dim=1)
center_targets = centerness[torch.arange(n_points), min_inds]
bbox_targets = boxes[torch.arange(n_points), min_inds]
if not gt_bboxes.with_yaw:
bbox_targets = bbox_targets[:, :-1]
cls_targets = gt_labels[min_inds]
cls_targets = torch.where(min_volumes == float_max, -1, cls_targets)
return center_targets, bbox_targets, cls_targets
def _single_scene_multiclass_nms(self, bboxes: Tensor, scores: Tensor,
input_meta: dict) -> Tuple[Tensor, ...]:
"""Multi-class nms for a single scene.
Args:
bboxes (Tensor): Predicted boxes of shape (N_boxes, 6) or
(N_boxes, 7).
scores (Tensor): Predicted scores of shape (N_boxes, N_classes).
input_meta (dict): Scene meta data.
Returns:
tuple[Tensor, ...]: Predicted bboxes, scores and labels.
"""
num_classes = scores.shape[1]
with_yaw = bboxes.shape[1] == 7
nms_bboxes, nms_scores, nms_labels = [], [], []
for i in range(num_classes):
ids = scores[:, i] > self.test_cfg.score_thr
if not ids.any():
continue
class_scores = scores[ids, i]
class_bboxes = bboxes[ids]
if with_yaw:
nms_function = nms3d
else:
class_bboxes = torch.cat(
(class_bboxes, torch.zeros_like(class_bboxes[:, :1])),
dim=1)
nms_function = nms3d_normal
nms_ids = nms_function(class_bboxes, class_scores,
self.test_cfg.iou_thr)
nms_bboxes.append(class_bboxes[nms_ids])
nms_scores.append(class_scores[nms_ids])
nms_labels.append(
bboxes.new_full(
class_scores[nms_ids].shape, i, dtype=torch.long))
if len(nms_bboxes):
nms_bboxes = torch.cat(nms_bboxes, dim=0)
nms_scores = torch.cat(nms_scores, dim=0)
nms_labels = torch.cat(nms_labels, dim=0)
else:
nms_bboxes = bboxes.new_zeros((0, bboxes.shape[1]))
nms_scores = bboxes.new_zeros((0, ))
nms_labels = bboxes.new_zeros((0, ))
if not with_yaw:
nms_bboxes = nms_bboxes[:, :6]
return nms_bboxes, nms_scores, nms_labels
...@@ -4,6 +4,7 @@ from typing import List, Optional, Sequence, Tuple ...@@ -4,6 +4,7 @@ from typing import List, Optional, Sequence, Tuple
import numpy as np import numpy as np
import torch import torch
from mmcv.cnn import Scale from mmcv.cnn import Scale
from mmdet.models.utils import multi_apply, select_single_mlvl
from mmengine.model import normal_init from mmengine.model import normal_init
from mmengine.structures import InstanceData from mmengine.structures import InstanceData
from torch import Tensor from torch import Tensor
...@@ -14,7 +15,6 @@ from mmdet3d.registry import MODELS, TASK_UTILS ...@@ -14,7 +15,6 @@ from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet3d.structures import limit_period, points_img2cam, xywhr2xyxyr from mmdet3d.structures import limit_period, points_img2cam, xywhr2xyxyr
from mmdet3d.utils import (ConfigType, InstanceList, OptConfigType, from mmdet3d.utils import (ConfigType, InstanceList, OptConfigType,
OptInstanceList) OptInstanceList)
from mmdet.models.utils import multi_apply, select_single_mlvl
from .anchor_free_mono3d_head import AnchorFreeMono3DHead from .anchor_free_mono3d_head import AnchorFreeMono3DHead
RangeType = Sequence[Tuple[int, int]] RangeType = Sequence[Tuple[int, int]]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment