"magic_pdf/git@developer.sourcefind.cn:wangsen/mineru.git" did not exist on "bf45c8fb2b67c00aec8873ba887e937caf82ae9b"
Unverified Commit ffcea26c authored by yinchimaoliang's avatar yinchimaoliang Committed by GitHub
Browse files

[Feature]: Add double flip (#143)

* Able to double_flip test.

* Remove nms_post_max_size

* Able to merge then nms for tta.

* Able to merge then nms for tta.

* reproducible performance.

* Add more configs.

* Able to use scale tta.

* Move scale aug to merge_aug_bboxes_3d

* Remove brackets, add comments.

* Delete unnecessary configs, Add instruction in README.

* Add docstring.

* Change readme, add empty line.

* Change name.

* Add tta in unittest.

* Add 'to cpu'

* Add simple test

* Move 'to cpu'

* Add full tta config

* Delete circle_nms tta config, change max_num.

* Update benchmark

* Remove unnecessary code, move position of flip.

* Complete docstring.

* Add comment for double flip change.
parent f98bc952
......@@ -74,11 +74,10 @@ test_cfg = dict(
max_per_img=500,
max_pool_nms=False,
min_radius=[4, 12, 10, 1, 0.85, 0.175],
post_max_size=83,
score_threshold=0.1,
out_size_factor=8,
voxel_size=voxel_size[:2],
nms_type='rotate',
nms_pre_max_size=1000,
nms_post_max_size=83,
nms_iou_threshold=0.2))
pre_max_size=1000,
post_max_size=83,
nms_thr=0.2))
......@@ -73,12 +73,11 @@ test_cfg = dict(
max_per_img=500,
max_pool_nms=False,
min_radius=[4, 12, 10, 1, 0.85, 0.175],
post_max_size=83,
score_threshold=0.1,
pc_range=[-51.2, -51.2],
out_size_factor=4,
voxel_size=voxel_size[:2],
nms_type='rotate',
nms_pre_max_size=1000,
nms_post_max_size=83,
nms_iou_threshold=0.2))
pre_max_size=1000,
post_max_size=83,
nms_thr=0.2))
......@@ -33,6 +33,72 @@ We follow the below style to name config files. Contributors are advised to foll
}
```
## Usage
### Test time augmentation
We have supported double-flip and scale augmentation during test time. To use test time augmentation, users need to modify the
`test_pipeline` and `test_cfg` in the config.
For example, we change `centerpoint_0075voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus.py` to the following.
```python
_base_ = './centerpoint_0075voxel_second_secfpn_circlenms' \
'_4x8_cyclic_20e_nus.py'
test_cfg = dict(
pts=dict(
use_rotate_nms=True,
max_num=83))
point_cloud_range = [-54, -54, -5.0, 54, 54, 3.0]
file_client_args = dict(backend='disk')
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=9,
use_dim=[0, 1, 2, 3, 4],
file_client_args=file_client_args,
pad_empty_sweeps=True,
remove_close=True),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=[0.95, 1.0, 1.05],
flip=True,
pcd_horizontal_flip=True,
pcd_vertical_flip=True,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D', sync_2d=False),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
data = dict(
val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline))
```
## Results
### CenterPoint
......@@ -47,6 +113,9 @@ We follow the below style to name config files. Contributors are advised to foll
|above w/o circle nms|voxel (0.075)|✗|✗| | |57.63|65.39| |
|[SECFPN](./centerpoint_0075voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus.py)|voxel (0.075)|✓|✓|8.5| |57.27|65.58|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/centerpoint/centerpoint_0075voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus/centerpoint_0075voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus_20200930_201619-67c8496f.pth) | [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/centerpoint/centerpoint_0075voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus/centerpoint_0075voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus_20200930_201619.log.json)|
|above w/o circle nms|voxel (0.075)|✓|✗| | |57.43|65.63||
|above w/ double flip|voxel (0.075)|✓|✗| | |59.73|67.39||
|above w/ scale tta|voxel (0.075)|✓|✗| | |60.43|67.65||
|above w/ circle nms w/o scale tta|voxel (0.075)|✓|✗| | |59.52|67.24||
|[SECFPN](./centerpoint_02pillar_second_secfpn_circlenms_4x8_cyclic_20e_nus.py)|pillar (0.2)|✗|✓|4.4| |49.07|59.66|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/centerpoint/centerpoint_01voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus/centerpoint_02pillar_second_secfpn_circlenms_4x8_cyclic_20e_nus_20201004_170716-a134a233.pth) | [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/centerpoint/centerpoint_01voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus/centerpoint_02pillar_second_secfpn_circlenms_4x8_cyclic_20e_nus_20201004_170716.log.json)|
|above w/o circle nms|pillar (0.2)|✗|✗| | |49.12|59.66||
|[SECFPN](./centerpoint_02pillar_second_secfpn_dcn_4x8_cyclic_20e_nus.py)|pillar (0.2)|✓|✗| 4.6| |48.8 |59.67 |[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/centerpoint/centerpoint_02pillar_second_secfpn_dcn_4x8_cyclic_20e_nus/centerpoint_02pillar_second_secfpn_dcn_4x8_cyclic_20e_nus_20200930_103722-3bb135f2.pth) | [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/centerpoint/centerpoint_02pillar_second_secfpn_dcn_4x8_cyclic_20e_nus/centerpoint_02pillar_second_secfpn_dcn_4x8_cyclic_20e_nus_20200930_103722.log.json)|
......
_base_ = './centerpoint_0075voxel_second_secfpn_dcn_4x8_cyclic_20e_nus.py'
point_cloud_range = [-54, -54, -5.0, 54, 54, 3.0]
file_client_args = dict(backend='disk')
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=9,
use_dim=[0, 1, 2, 3, 4],
file_client_args=file_client_args,
pad_empty_sweeps=True,
remove_close=True),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
# Add double-flip augmentation
flip=True,
pcd_horizontal_flip=True,
pcd_vertical_flip=True,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D', sync_2d=False),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
data = dict(
val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline))
_base_ = './centerpoint_0075voxel_second_secfpn_dcn_4x8_cyclic_20e_nus.py'
test_cfg = dict(pts=dict(use_rotate_nms=True, max_num=500))
point_cloud_range = [-54, -54, -5.0, 54, 54, 3.0]
file_client_args = dict(backend='disk')
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=9,
use_dim=[0, 1, 2, 3, 4],
file_client_args=file_client_args,
pad_empty_sweeps=True,
remove_close=True),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=[0.95, 1.0, 1.05],
# Add double-flip augmentation
flip=True,
pcd_horizontal_flip=True,
pcd_vertical_flip=True,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D', sync_2d=False),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
data = dict(
val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline))
_base_ = './centerpoint_0075voxel_second_secfpn_dcn_' \
'circlenms_4x8_cyclic_20e_nus.py'
point_cloud_range = [-54, -54, -5.0, 54, 54, 3.0]
file_client_args = dict(backend='disk')
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=9,
use_dim=[0, 1, 2, 3, 4],
file_client_args=file_client_args,
pad_empty_sweeps=True,
remove_close=True),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
# Add double-flip augmentation
flip=True,
pcd_horizontal_flip=True,
pcd_vertical_flip=True,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D', sync_2d=False),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
data = dict(
val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline))
......@@ -73,7 +73,11 @@ class MultiScaleFlipAug3D(object):
different scales and flips.
"""
aug_data = []
flip_aug = [False, True] if self.flip else [False]
# modified from `flip_aug = [False, True] if self.flip else [False]`
# to reduce unnecessary scenes when using double flip augmentation
# during test time
flip_aug = [True] if self.flip else [False]
pcd_horizontal_flip_aug = [False, True] \
if self.flip and self.pcd_horizontal_flip else [False]
pcd_vertical_flip_aug = [False, True] \
......
......@@ -690,7 +690,7 @@ class CenterHead(nn.Module):
for j, num_class in enumerate(self.num_classes):
rets[j][i][k] += flag
flag += num_class
labels = torch.cat([ret[i][k] for ret in rets])
labels = torch.cat([ret[i][k].int() for ret in rets])
ret_list.append([bboxes, scores, labels])
return ret_list
......@@ -763,9 +763,9 @@ class CenterHead(nn.Module):
selected = nms_gpu(
boxes_for_nms,
top_scores,
thresh=self.test_cfg['nms_iou_threshold'],
pre_maxsize=self.test_cfg['nms_pre_max_size'],
post_max_size=self.test_cfg['nms_post_max_size'])
thresh=self.test_cfg['nms_thr'],
pre_maxsize=self.test_cfg['pre_max_size'],
post_max_size=self.test_cfg['post_max_size'])
else:
selected = []
......
from mmdet3d.core import bbox3d2result
import torch
from mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d
from mmdet.models import DETECTORS
from .mvx_two_stage import MVXTwoStageDetector
......@@ -79,3 +81,115 @@ class CenterPoint(MVXTwoStageDetector):
for bboxes, scores, labels in bbox_list
]
return bbox_results
def aug_test_pts(self, feats, img_metas, rescale=False):
"""Test function of point cloud branch with augmentaiton.
The function implementation process is as follows:
- step 1: map features back for double-flip augmentation.
- step 2: merge all features and generate boxes.
- step 3: map boxes back for scale augmentation.
- step 4: merge results.
Args:
feats (list[torch.Tensor]): Feature of point cloud.
img_metas (list[dict]): Meta information of samples.
rescale (bool): Whether to rescale bboxes. Default: False.
Returns:
dict: Returned bboxes consists of the following keys:
- boxes_3d (:obj:`LiDARInstance3DBoxes`): Predicted bboxes.
- scores_3d (torch.Tensor): Scores of predicted boxes.
- labels_3d (torch.Tensor): Labels of predicted boxes.
"""
# only support aug_test for one sample
outs_list = []
for x, img_meta in zip(feats, img_metas):
outs = self.pts_bbox_head(x)
# merge augmented outputs before decoding bboxes
for task_id, out in enumerate(outs):
for key in out[0].keys():
if img_meta[0]['pcd_horizontal_flip']:
outs[task_id][0][key] = torch.flip(
outs[task_id][0][key], dims=[2])
if key == 'reg':
outs[task_id][0][key][:, 1, ...] = 1 - outs[
task_id][0][key][:, 1, ...]
elif key == 'rot':
outs[task_id][0][
key][:, 1,
...] = -outs[task_id][0][key][:, 1, ...]
elif key == 'vel':
outs[task_id][0][
key][:, 1,
...] = -outs[task_id][0][key][:, 1, ...]
if img_meta[0]['pcd_vertical_flip']:
outs[task_id][0][key] = torch.flip(
outs[task_id][0][key], dims=[3])
if key == 'reg':
outs[task_id][0][key][:, 0, ...] = 1 - outs[
task_id][0][key][:, 0, ...]
elif key == 'rot':
outs[task_id][0][
key][:, 0,
...] = -outs[task_id][0][key][:, 0, ...]
elif key == 'vel':
outs[task_id][0][
key][:, 0,
...] = -outs[task_id][0][key][:, 0, ...]
outs_list.append(outs)
preds_dicts = dict()
scale_img_metas = []
# concat outputs sharing the same pcd_scale_factor
for i, (img_meta, outs) in enumerate(zip(img_metas, outs_list)):
pcd_scale_factor = img_meta[0]['pcd_scale_factor']
if pcd_scale_factor not in preds_dicts.keys():
preds_dicts[pcd_scale_factor] = outs
scale_img_metas.append(img_meta)
else:
for task_id, out in enumerate(outs):
for key in out[0].keys():
preds_dicts[pcd_scale_factor][task_id][0][key] += out[
0][key]
aug_bboxes = []
for pcd_scale_factor, preds_dict in preds_dicts.items():
for task_id, pred_dict in enumerate(preds_dict):
# merge outputs with different flips before decoding bboxes
for key in pred_dict[0].keys():
preds_dict[task_id][0][key] /= len(outs_list) / len(
preds_dicts.keys())
bbox_list = self.pts_bbox_head.get_bboxes(
preds_dict, img_metas[0], rescale=rescale)
bbox_list = [
dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)
for bboxes, scores, labels in bbox_list
]
aug_bboxes.append(bbox_list[0])
if len(preds_dicts.keys()) > 1:
# merge outputs with different scales after decoding bboxes
merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, scale_img_metas,
self.pts_bbox_head.test_cfg)
return merged_bboxes
else:
for key in bbox_list[0].keys():
bbox_list[0][key] = bbox_list[0][key].to('cpu')
import pdb
pdb.set_trace()
return bbox_list[0]
def aug_test(self, points, img_metas, imgs=None, rescale=False):
"""Test function with augmentaiton."""
img_feats, pts_feats = self.extract_feats(points, img_metas, imgs)
bbox_list = dict()
if pts_feats and self.with_pts_bbox:
pts_bbox = self.aug_test_pts(pts_feats, img_metas, rescale)
bbox_list.update(pts_bbox=pts_bbox)
return [bbox_list]
......@@ -210,13 +210,22 @@ def test_centerpoint():
if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda')
centerpoint = _get_detector_cfg(
'centerpoint/centerpoint_02pillar_second_secfpn_4x8_cyclic_20e_nus.py')
'centerpoint/centerpoint_0075voxel_second_secfpn_'
'dcn_4x8_cyclic_flip-tta_20e_nus.py')
self = build_detector(centerpoint).cuda()
points_0 = torch.rand([1000, 5], device='cuda')
points_1 = torch.rand([1000, 5], device='cuda')
points = [points_0, points_1]
img_meta_0 = dict(box_type_3d=LiDARInstance3DBoxes)
img_meta_1 = dict(box_type_3d=LiDARInstance3DBoxes)
img_meta_0 = dict(
box_type_3d=LiDARInstance3DBoxes,
flip=True,
pcd_horizontal_flip=True,
pcd_vertical_flip=False)
img_meta_1 = dict(
box_type_3d=LiDARInstance3DBoxes,
flip=True,
pcd_horizontal_flip=False,
pcd_vertical_flip=True)
img_metas = [img_meta_0, img_meta_1]
gt_bbox_0 = LiDARInstance3DBoxes(
torch.rand([10, 9], device='cuda'), box_dim=9)
......@@ -248,3 +257,22 @@ def test_centerpoint():
assert boxes_3d_1.tensor.shape[1] == 9
assert scores_3d_1.shape[0] >= 0
assert labels_3d_1.shape[0] >= 0
# test_aug_test
points = [[torch.rand([1000, 5], device='cuda')]]
img_metas = [[
dict(
box_type_3d=LiDARInstance3DBoxes,
pcd_scale_factor=1.0,
flip=True,
pcd_horizontal_flip=True,
pcd_vertical_flip=False)
]]
results = self.aug_test(points, img_metas)
boxes_3d_0 = results[0]['pts_bbox']['boxes_3d']
scores_3d_0 = results[0]['pts_bbox']['scores_3d']
labels_3d_0 = results[0]['pts_bbox']['labels_3d']
assert boxes_3d_0.tensor.shape[0] >= 0
assert boxes_3d_0.tensor.shape[1] == 9
assert scores_3d_0.shape[0] >= 0
assert labels_3d_0.shape[0] >= 0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment