Commit 5a1575a0 authored by zhangwenwei's avatar zhangwenwei
Browse files

Merge branch 'add-ori-mvx' into 'master'

Add ori mvx

See merge request open-mmlab/mmdet.3d!79
parents 0ed9c576 3298db8a
......@@ -27,7 +27,7 @@ linting:
stage: test
script:
- echo "Start building..."
- pip install "git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI"
- pip install "git+https://github.com/open-mmlab/cocoapi.git#subdirectory=pycocotools"
- pip install git+https://github.com/open-mmlab/mmcv.git
- pip install git+https://github.com/open-mmlab/mmdetection.git
- python -c "import mmdet; print(mmdet.__version__)"
......
......@@ -4,8 +4,6 @@ point_cloud_range = [0, -40, -3, 70.4, 40, 1]
model = dict(
type='DynamicMVXFasterRCNN',
pretrained=('./pretrain_detectron/'
'ImageNetPretrained/MSRA/resnet50_msra.pth'),
img_backbone=dict(
type='ResNet',
depth=50,
......@@ -136,22 +134,10 @@ class_names = ['Pedestrian', 'Cyclist', 'Car']
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
input_modality = dict(use_lidar=True, use_camera=True)
db_sampler = dict(
type='MMDataBaseSampler',
data_root=data_root,
info_path=data_root + 'kitti_mm_dbinfos_train.pkl',
rate=1.0,
object_rot_range=[0.0, 0.0],
blending_type=['box', 'gaussian', 'poisson'],
depth_consistent=True,
check_2D_collision=True,
collision_thr=[0, 0.3, 0.5, 0.7],
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6),
classes=class_names)
train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='Resize',
img_scale=[(640, 192), (2560, 768)],
......@@ -171,10 +157,11 @@ train_pipeline = [
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=['points', 'img', 'gt_bboxes_3d', 'gt_labels_3d'])
keys=['points', 'img', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1280, 384),
......@@ -196,7 +183,7 @@ test_pipeline = [
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
dict(type='Collect3D', keys=['points', 'img'])
])
]
......@@ -204,6 +191,9 @@ data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
......@@ -212,7 +202,7 @@ data = dict(
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False),
test_mode=False)),
val=dict(
type=dataset_type,
data_root=data_root,
......@@ -255,10 +245,10 @@ log_config = dict(
# yapf:enable
evaluation = dict(interval=1)
# runtime settings
total_epochs = 80
total_epochs = 40
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/sec_secfpn_80e'
load_from = './pretrain_mmdet/mvx_faster_rcnn_r50_fpn_detectron2-caffe_freezeBN_l1-loss_roialign-v2_1x_coco-3-class_44.7_20200205-b1c1533f.pth' # noqa
work_dir = None
load_from = './pretrain_mmdet/mvx_faster_rcnn_detectron2-caffe_20e_coco-pretrain_gt-sample_kitti-3-class_moderate-79.3_20200207-a4a6a3c7.pth' # noqa
resume_from = None
workflow = [('train', 1)]
# model settings
norm_cfg = dict(type='BN', requires_grad=False)
model = dict(
type='FasterRCNN',
pretrained='open-mmlab://regnetx_3.2gf',
backbone=dict(
type='RegNet',
arch='regnetx_3.2gf',
out_indices=(0, 1, 2, 3),
frozen_stages=1,
base_channels=32,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[96, 192, 432, 1008],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='StandardRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=10,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0))))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
# following the setting of detectron,
# which improves ~0.2 bbox mAP.
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/nuscenes/'
classes = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier')
img_norm_cfg = dict(
# The mean and std is used in PyCls when training RegNets
mean=[103.53, 116.28, 123.675],
std=[57.375, 57.12, 58.395],
to_rgb=False)
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/nuscenes/': 's3://nuscenes/nuscenes/',
'data/nuscenes/': 's3://nuscenes/nuscenes/'
}))
train_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(
type='LoadAnnotations',
with_bbox=True,
with_mask=False,
file_client_args=file_client_args),
dict(
type='Resize',
img_scale=(1280, 720),
ratio_range=(0.75, 1.25),
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug',
img_scale=(1280, 720),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
classes=classes,
ann_file=data_root + 'nuscenes_infos_train.coco.json',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
classes=classes,
ann_file=data_root + 'nuscenes_infos_val.coco.json',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
classes=classes,
ann_file=data_root + 'nuscenes_infos_val.coco.json',
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
evaluation = dict(interval=1)
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl', port=29501)
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r50_fpn_1x'
load_from = './pretrain_mmdet/mask_rcnn_regnetx-3GF_fpn_mstrain_3x_coco_box-AP-43.1_mask-AP-38.7-e003695a.pth' # noqa
resume_from = None
workflow = [('train', 1)]
# model settings
norm_cfg = dict(type='BN', requires_grad=False)
model = dict(
type='RetinaNet',
pretrained=('open-mmlab://resnet50_caffe_bgr'),
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=norm_cfg,
norm_eval=True,
style='caffe'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=1,
add_extra_convs=True,
num_outs=5),
bbox_head=dict(
type='RetinaHead',
num_classes=10,
in_channels=256,
stacked_convs=4,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
octave_base_scale=4,
scales_per_octave=3,
ratios=[0.5, 1.0, 2.0],
strides=[8, 16, 32, 64, 128]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)))
# training and testing settings
train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.4,
min_pos_iou=0,
ignore_iof_thr=-1),
allowed_border=-1,
pos_weight=-1,
debug=False)
test_cfg = dict(
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.5),
max_per_img=100)
# dataset settings
dataset_type = 'NuScenes2DDataset'
data_root = 'data/nuscenes/'
# Values to be used for image normalization (BGR order)
# Default mean pixel value are from ImageNet: [103.53, 116.28, 123.675]
# When using pre-trained models in Detectron1 or any MSRA models,
# std has been absorbed into its conv1 weights, so the std needs to be set 1.
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='Resize',
img_scale=(1600, 900),
ratio_range=(0.8, 1.2),
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1600, 900),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'nuscenes_infos_train.coco.json',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'nuscenes_infos_val.coco.json',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'nuscenes_infos_val.coco.json',
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/retinanet_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
API Documentation
=================
mmdet3d.apis
--------------
.. automodule:: mmdet3d.apis
:members:
mmdet3d.core
--------------
......
from .train import batch_processor, train_detector
__all__ = ['batch_processor', 'train_detector']
import torch
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import DistSamplerSeedHook, Runner, build_optimizer
from mmdet3d.utils import get_root_logger
from mmdet.apis.train import parse_losses
from mmdet.core import (DistEvalHook, DistOptimizerHook, EvalHook,
Fp16OptimizerHook)
from mmdet.datasets import build_dataloader, build_dataset
def batch_processor(model, data, train_mode):
"""Process a data batch.
This method is required as an argument of Runner, which defines how to
process a data batch and obtain proper outputs. The first 3 arguments of
batch_processor are fixed.
Args:
model (nn.Module): A PyTorch model.
data (dict): The data batch in a dict.
train_mode (bool): Training mode or not. It may be useless for some
models.
Returns:
dict: A dict containing losses and log vars.
"""
losses = model(**data)
loss, log_vars = parse_losses(losses)
if 'img_metas' in data:
num_samples = len(data['img_metas'].data)
else:
num_samples = len(data['img'].data)
outputs = dict(loss=loss, log_vars=log_vars, num_samples=num_samples)
return outputs
def train_detector(model,
dataset,
cfg,
distributed=False,
validate=False,
timestamp=None,
meta=None):
logger = get_root_logger(cfg.log_level)
# prepare data loaders
dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
data_loaders = [
build_dataloader(
ds,
cfg.data.samples_per_gpu,
cfg.data.workers_per_gpu,
# cfg.gpus will be ignored if distributed
len(cfg.gpu_ids),
dist=distributed,
seed=cfg.seed) for ds in dataset
]
# put model on gpus
if distributed:
find_unused_parameters = cfg.get('find_unused_parameters', False)
# Sets the `find_unused_parameters` parameter in
# torch.nn.parallel.DistributedDataParallel
model = MMDistributedDataParallel(
model.cuda(),
device_ids=[torch.cuda.current_device()],
broadcast_buffers=False,
find_unused_parameters=find_unused_parameters)
else:
model = MMDataParallel(
model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
# build runner
optimizer = build_optimizer(model, cfg.optimizer)
runner = Runner(
model,
batch_processor,
optimizer,
cfg.work_dir,
logger=logger,
meta=meta)
# an ugly walkaround to make the .log and .log.json filenames the same
runner.timestamp = timestamp
# fp16 setting
fp16_cfg = cfg.get('fp16', None)
if fp16_cfg is not None:
optimizer_config = Fp16OptimizerHook(
**cfg.optimizer_config, **fp16_cfg, distributed=distributed)
elif distributed and 'type' not in cfg.optimizer_config:
optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
else:
optimizer_config = cfg.optimizer_config
# register hooks
runner.register_training_hooks(cfg.lr_config, optimizer_config,
cfg.checkpoint_config, cfg.log_config,
cfg.get('momentum_config', None))
if distributed:
runner.register_hook(DistSamplerSeedHook())
# register eval hooks
if validate:
val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
val_dataloader = build_dataloader(
val_dataset,
samples_per_gpu=1,
workers_per_gpu=cfg.data.workers_per_gpu,
dist=distributed,
shuffle=False)
eval_cfg = cfg.get('evaluation', {})
eval_hook = DistEvalHook if distributed else EvalHook
runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
if cfg.resume_from:
runner.resume(cfg.resume_from)
elif cfg.load_from:
runner.load_checkpoint(cfg.load_from)
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
......@@ -52,7 +52,7 @@ class KittiDataset(Custom3DDataset):
def get_data_info(self, index):
info = self.data_infos[index]
sample_idx = info['image']['image_idx']
img_filename = os.path.join(self.root_split,
img_filename = os.path.join(self.data_root,
info['image']['image_path'])
# TODO: consider use torch.Tensor only
......@@ -65,7 +65,8 @@ class KittiDataset(Custom3DDataset):
input_dict = dict(
sample_idx=sample_idx,
pts_filename=pts_filename,
img_filename=img_filename,
img_prefix=None,
img_info=dict(filename=img_filename),
lidar2img=lidar2img)
if not self.test_mode:
......@@ -113,8 +114,8 @@ class KittiDataset(Custom3DDataset):
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
gt_labels_3d=gt_labels_3d,
gt_bboxes=gt_bboxes,
gt_labels=gt_labels)
bboxes=gt_bboxes,
labels=gt_labels)
return anns_results
def drop_arrays_by_name(self, gt_names, used_classes):
......@@ -151,6 +152,24 @@ class KittiDataset(Custom3DDataset):
result_files = self.bbox2result_kitti2d(outputs, self.CLASSES,
pklfile_prefix,
submission_prefix)
elif 'pts_bbox' in outputs[0] or 'img_bbox' in outputs[0]:
result_files = dict()
for name in outputs[0]:
results_ = [out[name] for out in outputs]
pklfile_prefix_ = pklfile_prefix + name
if submission_prefix is not None:
submission_prefix_ = submission_prefix + name
else:
submission_prefix_ = None
if 'img' in name:
result_files = self.bbox2result_kitti2d(
results_, self.CLASSES, pklfile_prefix_,
submission_prefix_)
else:
result_files_ = self.bbox2result_kitti(
results_, self.CLASSES, pklfile_prefix_,
submission_prefix_)
result_files[name] = result_files_
else:
result_files = self.bbox2result_kitti(outputs, self.CLASSES,
pklfile_prefix,
......@@ -162,8 +181,7 @@ class KittiDataset(Custom3DDataset):
metric=None,
logger=None,
pklfile_prefix=None,
submission_prefix=None,
result_names=['pts_bbox']):
submission_prefix=None):
"""Evaluation in KITTI protocol.
Args:
......@@ -178,11 +196,30 @@ class KittiDataset(Custom3DDataset):
If not specified, the submission data will not be generated.
Returns:
dict[str: float]
dict[str: float]: results of each evaluation metric
"""
result_files, tmp_dir = self.format_results(results, pklfile_prefix)
from mmdet3d.core.evaluation import kitti_eval
gt_annos = [info['annos'] for info in self.data_infos]
if isinstance(result_files, dict):
ap_dict = dict()
for name, result_files_ in result_files.items():
eval_types = ['bbox', 'bev', '3d']
if 'img' in name:
eval_types = ['bbox']
ap_result_str, ap_dict_ = kitti_eval(
gt_annos,
result_files_,
self.CLASSES,
eval_types=eval_types)
for ap_type, ap in ap_dict_.items():
ap_dict[f'{name}/{ap_type}'] = float('{:.4f}'.format(ap))
print_log(
f'Results of {name}:\n' + ap_result_str, logger=logger)
else:
if metric == 'img_bbox':
ap_result_str, ap_dict = kitti_eval(
gt_annos, result_files, self.CLASSES, eval_types=['bbox'])
......@@ -190,6 +227,7 @@ class KittiDataset(Custom3DDataset):
ap_result_str, ap_dict = kitti_eval(gt_annos, result_files,
self.CLASSES)
print_log('\n' + ap_result_str, logger=logger)
if tmp_dir is not None:
tmp_dir.cleanup()
return ap_dict
......
from mmdet.datasets.pipelines import Compose
from .dbsampler import DataBaseSampler, MMDataBaseSampler
from .dbsampler import DataBaseSampler
from .formating import DefaultFormatBundle, DefaultFormatBundle3D
from .indoor_augment import (IndoorFlipData, IndoorGlobalRotScaleTrans,
IndoorPointsColorJitter)
......@@ -19,6 +19,6 @@ __all__ = [
'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile',
'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler',
'IndoorGlobalRotScaleTrans', 'IndoorPointsColorJitter', 'IndoorFlipData',
'MMDataBaseSampler', 'NormalizePointsColor', 'LoadAnnotations3D',
'IndoorPointSample', 'PointSegClassMapping', 'MultiScaleFlipAug3D'
'NormalizePointsColor', 'LoadAnnotations3D', 'IndoorPointSample',
'PointSegClassMapping', 'MultiScaleFlipAug3D'
]
......@@ -2,8 +2,6 @@ import copy
import os
import pickle
import cv2
import mmcv
import numpy as np
from mmdet3d.core.bbox import box_np_ops
......@@ -263,255 +261,3 @@ class DataBaseSampler(object):
boxes[i, -1] - sp_boxes[i - num_gt, -1])
valid_samples.append(sampled[i - num_gt])
return valid_samples
@OBJECTSAMPLERS.register_module()
class MMDataBaseSampler(DataBaseSampler):
def __init__(self,
info_path,
data_root,
rate,
prepare,
object_rot_range,
sample_groups,
classes=None,
check_2D_collision=False,
collision_thr=0,
collision_in_classes=False,
depth_consistent=False,
blending_type=None):
super(MMDataBaseSampler, self).__init__(
info_path=info_path,
data_root=data_root,
rate=rate,
prepare=prepare,
object_rot_range=object_rot_range,
sample_groups=sample_groups,
classes=classes)
self.blending_type = blending_type
self.depth_consistent = depth_consistent
self.check_2D_collision = check_2D_collision
self.collision_thr = collision_thr
self.collision_in_classes = collision_in_classes
def sample_all(self, gt_bboxes_3d, gt_names, gt_bboxes_2d=None, img=None):
sampled_num_dict = {}
sample_num_per_class = []
for class_name, max_sample_num in zip(self.sample_classes,
self.sample_max_nums):
sampled_num = int(max_sample_num -
np.sum([n == class_name for n in gt_names]))
sampled_num = np.round(self.rate * sampled_num).astype(np.int64)
sampled_num_dict[class_name] = sampled_num
sample_num_per_class.append(sampled_num)
sampled = []
sampled_gt_bboxes_3d = []
sampled_gt_bboxes_2d = []
avoid_coll_boxes_3d = gt_bboxes_3d
avoid_coll_boxes_2d = gt_bboxes_2d
for class_name, sampled_num in zip(self.sample_classes,
sample_num_per_class):
if sampled_num > 0:
sampled_cls = self.sample_class_v2(class_name, sampled_num,
avoid_coll_boxes_3d,
avoid_coll_boxes_2d)
sampled += sampled_cls
if len(sampled_cls) > 0:
if len(sampled_cls) == 1:
sampled_gt_box_3d = sampled_cls[0]['box3d_lidar'][
np.newaxis, ...]
sampled_gt_box_2d = sampled_cls[0]['box2d_camera'][
np.newaxis, ...]
else:
sampled_gt_box_3d = np.stack(
[s['box3d_lidar'] for s in sampled_cls], axis=0)
sampled_gt_box_2d = np.stack(
[s['box2d_camera'] for s in sampled_cls], axis=0)
sampled_gt_bboxes_3d += [sampled_gt_box_3d]
sampled_gt_bboxes_2d += [sampled_gt_box_2d]
if self.collision_in_classes:
# TODO: check whether check collision check among
# classes is necessary
avoid_coll_boxes_3d = np.concatenate(
[avoid_coll_boxes_3d, sampled_gt_box_3d], axis=0)
avoid_coll_boxes_2d = np.concatenate(
[avoid_coll_boxes_2d, sampled_gt_box_2d], axis=0)
ret = None
if len(sampled) > 0:
sampled_gt_bboxes_3d = np.concatenate(sampled_gt_bboxes_3d, axis=0)
sampled_gt_bboxes_2d = np.concatenate(sampled_gt_bboxes_2d, axis=0)
s_points_list = []
count = 0
if self.depth_consistent:
# change the paster order based on distance
center = sampled_gt_bboxes_3d[:, 0:3]
paste_order = np.argsort(
-np.power(np.sum(np.power(center, 2), axis=-1), 1 / 2),
axis=-1)
for idx in range(len(sampled)):
if self.depth_consistent:
inds = np.where(paste_order == idx)[0][0]
info = sampled[inds]
else:
info = sampled[idx]
pcd_file_path = os.path.join(
self.data_root,
info['path']) if self.data_root else info['path']
img_file_path = pcd_file_path + '.png'
mask_file_path = pcd_file_path + '.mask.png'
s_points = np.fromfile(
pcd_file_path, dtype=np.float32).reshape([-1, 4])
s_patch = mmcv.imread(img_file_path)
s_mask = mmcv.imread(mask_file_path, 'grayscale')
if 'rot_transform' in info:
rot = info['rot_transform']
s_points[:, :3] = box_np_ops.rotation_points_single_angle(
s_points[:, :3], rot, axis=2)
# TODO: might need to rot 2d bbox in the future
# the points of each sample already minus the object center
# so this time it needs to add the offset back
s_points[:, :3] += info['box3d_lidar'][:3]
img = self.paste_obj(
img,
s_patch,
s_mask,
bbox_2d=info['box2d_camera'].astype(np.int32))
count += 1
s_points_list.append(s_points)
ret = dict(
img=img,
gt_names=np.array([s['name'] for s in sampled]),
difficulty=np.array([s['difficulty'] for s in sampled]),
gt_bboxes_3d=sampled_gt_bboxes_3d,
gt_bboxes_2d=sampled_gt_bboxes_2d,
points=np.concatenate(s_points_list, axis=0),
group_ids=np.arange(gt_bboxes_3d.shape[0],
gt_bboxes_3d.shape[0] + len(sampled)))
return ret
def paste_obj(self, img, obj_img, obj_mask, bbox_2d):
# paste the image patch back
x1, y1, x2, y2 = bbox_2d
# the bbox might exceed the img size because the img is different
img_h, img_w = img.shape[:2]
w = np.maximum(min(x2, img_w - 1) - x1 + 1, 1)
h = np.maximum(min(y2, img_h - 1) - y1 + 1, 1)
obj_mask = obj_mask[:h, :w]
obj_img = obj_img[:h, :w]
# choose a blend option
if not self.blending_type:
blending_op = 'none'
else:
blending_choice = np.random.randint(len(self.blending_type))
blending_op = self.blending_type[blending_choice]
if blending_op.find('poisson') != -1:
# options: cv2.NORMAL_CLONE=1, or cv2.MONOCHROME_TRANSFER=3
# cv2.MIXED_CLONE mixed the texture, thus is not used.
if blending_op == 'poisson':
mode = np.random.choice([1, 3], 1)[0]
elif blending_op == 'poisson_normal':
mode = cv2.NORMAL_CLONE
elif blending_op == 'poisson_transfer':
mode = cv2.MONOCHROME_TRANSFER
else:
raise NotImplementedError
center = (int(x1 + w / 2), int(y1 + h / 2))
img = cv2.seamlessClone(obj_img, img, obj_mask * 255, center, mode)
else:
if blending_op == 'gaussian':
obj_mask = cv2.GaussianBlur(
obj_mask.astype(np.float32), (5, 5), 2)
elif blending_op == 'box':
obj_mask = cv2.blur(obj_mask.astype(np.float32), (3, 3))
paste_mask = 1 - obj_mask
img[y1:y1 + h,
x1:x1 + w] = (img[y1:y1 + h, x1:x1 + w].astype(np.float32) *
paste_mask[..., None]).astype(np.uint8)
img[y1:y1 + h, x1:x1 + w] += (obj_img.astype(np.float32) *
obj_mask[..., None]).astype(np.uint8)
return img
def sample_class_v2(self, name, num, gt_bboxes_3d, gt_bboxes_2d):
sampled = self.sampler_dict[name].sample(num)
sampled = copy.deepcopy(sampled)
num_gt = gt_bboxes_3d.shape[0]
num_sampled = len(sampled)
# avoid collision in BEV first
gt_bboxes_bv = box_np_ops.center_to_corner_box2d(
gt_bboxes_3d[:, 0:2], gt_bboxes_3d[:, 3:5], gt_bboxes_3d[:, 6])
sp_boxes = np.stack([i['box3d_lidar'] for i in sampled], axis=0)
sp_boxes_bv = box_np_ops.center_to_corner_box2d(
sp_boxes[:, 0:2], sp_boxes[:, 3:5], sp_boxes[:, 6])
total_bv = np.concatenate([gt_bboxes_bv, sp_boxes_bv], axis=0)
coll_mat = data_augment_utils.box_collision_test(total_bv, total_bv)
# Then avoid collision in 2D space
if self.check_2D_collision:
sp_boxes_2d = np.stack([i['box2d_camera'] for i in sampled],
axis=0)
total_bbox_2d = np.concatenate([gt_bboxes_2d, sp_boxes_2d],
axis=0) # Nx4
# random select a collision threshold
if isinstance(self.collision_thr, float):
collision_thr = self.collision_thr
elif isinstance(self.collision_thr, list):
collision_thr = np.random.choice(self.collision_thr)
elif isinstance(self.collision_thr, dict):
mode = self.collision_thr.get('mode', 'value')
if mode == 'value':
collision_thr = np.random.choice(
self.collision_thr['thr_range'])
elif mode == 'range':
collision_thr = np.random.uniform(
self.collision_thr['thr_range'][0],
self.collision_thr['thr_range'][1])
if collision_thr == 0:
# use similar collision test as BEV did
# Nx4 (x1, y1, x2, y2) -> corners: Nx4x2
# ((x1, y1), (x2, y1), (x1, y2), (x2, y2))
x1y1 = total_bbox_2d[:, :2]
x2y2 = total_bbox_2d[:, 2:]
x1y2 = np.stack([total_bbox_2d[:, 0], total_bbox_2d[:, 3]],
axis=-1)
x2y1 = np.stack([total_bbox_2d[:, 2], total_bbox_2d[:, 1]],
axis=-1)
total_2d = np.stack([x1y1, x2y1, x1y2, x2y2], axis=1)
coll_mat_2d = data_augment_utils.box_collision_test(
total_2d, total_2d)
else:
# use iof rather than iou to protect the foreground
overlaps = box_np_ops.iou_jit(total_bbox_2d, total_bbox_2d,
'iof')
coll_mat_2d = overlaps > collision_thr
coll_mat = coll_mat + coll_mat_2d
diag = np.arange(total_bv.shape[0])
coll_mat[diag, diag] = False
valid_samples = []
for i in range(num_gt, num_gt + num_sampled):
if coll_mat[i].any():
coll_mat[i] = False
coll_mat[:, i] = False
else:
valid_samples.append(sampled[i - num_gt])
return valid_samples
from abc import ABCMeta, abstractmethod
from mmdet.models.detectors import BaseDetector
import torch.nn as nn
class Base3DDetector(nn.Module, metaclass=ABCMeta):
class Base3DDetector(BaseDetector):
"""Base class for detectors"""
def __init__(self):
super(Base3DDetector, self).__init__()
self.fp16_enabled = False
@property
def with_neck(self):
return hasattr(self, 'neck') and self.neck is not None
@property
def with_shared_head(self):
return hasattr(self, 'shared_head') and self.shared_head is not None
@property
def with_bbox(self):
return hasattr(self, 'bbox_head') and self.bbox_head is not None
@property
def with_mask(self):
return hasattr(self, 'mask_head') and self.mask_head is not None
@abstractmethod
def extract_feat(self, imgs):
pass
def extract_feats(self, imgs):
assert isinstance(imgs, list)
for img in imgs:
yield self.extract_feat(img)
@abstractmethod
def forward_train(self, **kwargs):
pass
@abstractmethod
def simple_test(self, **kwargs):
pass
@abstractmethod
def aug_test(self, **kwargs):
pass
def init_weights(self, pretrained=None):
if pretrained is not None:
from mmdet3d.utils import get_root_logger
logger = get_root_logger()
logger.info('load model from: {}'.format(pretrained))
def forward_test(self, points, img_metas, imgs=None, **kwargs):
def forward_test(self, points, img_metas, img=None, **kwargs):
"""
Args:
points (List[Tensor]): the outer list indicates test-time
......@@ -62,7 +13,7 @@ class Base3DDetector(nn.Module, metaclass=ABCMeta):
img_metas (List[List[dict]]): the outer list indicates test-time
augs (multiscale, flip, etc.) and the inner list indicates
images in a batch
imgs (List[Tensor], optional): the outer list indicates test-time
img (List[Tensor], optional): the outer list indicates test-time
augmentations and inner Tensor should have a shape NxCxHxW,
which contains all images in the batch. Defaults to None.
"""
......@@ -81,10 +32,10 @@ class Base3DDetector(nn.Module, metaclass=ABCMeta):
assert samples_per_gpu == 1
if num_augs == 1:
imgs = [imgs] if imgs is None else imgs
return self.simple_test(points[0], img_metas[0], imgs[0], **kwargs)
img = [img] if img is None else img
return self.simple_test(points[0], img_metas[0], img[0], **kwargs)
else:
return self.aug_test(points, img_metas, imgs, **kwargs)
return self.aug_test(points, img_metas, img, **kwargs)
def forward(self, return_loss=True, **kwargs):
"""
......
......@@ -265,13 +265,13 @@ class MVXTwoStageDetector(Base3DDetector):
proposal_list = proposals
# bbox head forward and loss
img_roi_losses = self.roi_head.forward_train(x, img_metas,
proposal_list, gt_bboxes,
gt_labels,
gt_bboxes_ignore,
**kwargs)
if self.with_img_bbox:
# bbox head forward and loss
img_roi_losses = self.img_roi_head.forward_train(
x, img_metas, proposal_list, gt_bboxes, gt_labels,
gt_bboxes_ignore, **kwargs)
losses.update(img_roi_losses)
return losses
def simple_test_img(self, x, img_metas, proposals=None, rescale=False):
......
......@@ -2,12 +2,17 @@ import torch
import torch.nn.functional as F
from mmdet3d.ops import Voxelization
from mmdet.models import DETECTORS, TwoStageDetector
from mmdet.models import DETECTORS
from .. import builder
from .two_stage import TwoStage3DDetector
@DETECTORS.register_module()
class PartA2(TwoStageDetector):
class PartA2(TwoStage3DDetector):
"""Part-A2 detector
Please refer to the `paper <https://arxiv.org/abs/1907.03670>`_
"""
def __init__(self,
voxel_layer,
......@@ -111,41 +116,6 @@ class PartA2(TwoStageDetector):
return losses
def forward_test(self, points, img_metas, imgs=None, **kwargs):
"""
Args:
points (List[Tensor]): the outer list indicates test-time
augmentations and inner Tensor should have a shape NxC,
which contains all points in the batch.
img_metas (List[List[dict]]): the outer list indicates test-time
augs (multiscale, flip, etc.) and the inner list indicates
images in a batch
"""
for var, name in [(points, 'points'), (img_metas, 'img_metas')]:
if not isinstance(var, list):
raise TypeError('{} must be a list, but got {}'.format(
name, type(var)))
num_augs = len(points)
if num_augs != len(img_metas):
raise ValueError(
'num of augmentations ({}) != num of image meta ({})'.format(
len(points), len(img_metas)))
# TODO: remove the restriction of imgs_per_gpu == 1 when prepared
samples_per_gpu = len(points[0])
assert samples_per_gpu == 1
if num_augs == 1:
return self.simple_test(points[0], img_metas[0], **kwargs)
else:
return self.aug_test(points, img_metas, **kwargs)
def forward(self, return_loss=True, **kwargs):
if return_loss:
return self.forward_train(**kwargs)
else:
return self.forward_test(**kwargs)
def simple_test(self, points, img_metas, proposals=None, rescale=False):
feats_dict, voxels_dict = self.extract_feat(points, img_metas)
......@@ -159,6 +129,3 @@ class PartA2(TwoStageDetector):
return self.roi_head.simple_test(feats_dict, voxels_dict, img_metas,
proposal_list)
def aug_test(self, **kwargs):
raise NotImplementedError
......@@ -6,6 +6,21 @@ from .base import Base3DDetector
@DETECTORS.register_module()
class SingleStage3DDetector(Base3DDetector):
"""SingleStage3DDetector
This class serves as a base class for single-stage 3D detectors.
Args:
backbone (dict): Config dict of detector's backbone.
neck (dict, optional): Config dict of neck. Defaults to None.
bbox_head (dict, optional): Config dict of box head. Defaults to None.
train_cfg (dict, optional): Config dict of training hyper-parameters.
Defaults to None.
test_cfg (dict, optional): Config dict of test hyper-parameters.
Defaults to None.
pretrained (str, optional): Path of pretrained models.
Defaults to None.
"""
def __init__(self,
backbone,
......
from mmdet.models import DETECTORS, TwoStageDetector
from .base import Base3DDetector
@DETECTORS.register_module()
class TwoStage3DDetector(Base3DDetector, TwoStageDetector):
"""Base class of two-stage 3D detector
It inherits original ``:class:TwoStageDetector`` and
``:class:Base3DDetector``. This class could serve as a base class for
all two-stage 3D detectors.
"""
def __init__(self, **kwargs):
super(TwoStage3DDetector, self).__init__(**kwargs)
......@@ -287,7 +287,7 @@ class PointFusion(nn.Module):
pts.new_tensor(img_meta['pcd_rotation']) if 'pcd_rotation'
in img_meta.keys() else torch.eye(3).type_as(pts).to(pts.device))
img_scale_factor = (
img_meta['scale_factor']
pts.new_tensor(img_meta['scale_factor'][:2])
if 'scale_factor' in img_meta.keys() else 1)
pcd_flip = img_meta['pcd_flip'] if 'pcd_flip' in img_meta.keys(
) else False
......
matplotlib
mmcv>=0.5.1
mmcv>=0.6.0
numba==0.48.0
numpy
# need older pillow until torchvision is fixed
......
......@@ -115,10 +115,8 @@ def test_config_data_pipeline():
print('Found config_dpath = {!r}'.format(config_dpath))
# Only tests a representative subset of configurations
# TODO: test pipelines using Albu, current Albu throw None given empty GT
config_names = [
'mvxnet/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py',
'mvxnet/retinanet_r50_fpn_caffe_2x8_1x_nus.py',
'mvxnet/'
'faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py',
]
......
......@@ -82,10 +82,10 @@ def _test_two_stage_forward(cfg_file):
gt_masks=gt_masks,
return_loss=True)
assert isinstance(losses, dict)
from mmdet.apis.train import parse_losses
total_loss = parse_losses(losses)[0].requires_grad_(True)
assert float(total_loss.item()) > 0
total_loss.backward()
loss, _ = detector._parse_losses(losses)
loss.requires_grad_(True)
assert float(loss.item()) > 0
loss.backward()
# Test forward train with an empty truth batch
mm_inputs = _demo_mm_inputs(input_shape, num_items=[0])
......@@ -102,10 +102,9 @@ def _test_two_stage_forward(cfg_file):
gt_masks=gt_masks,
return_loss=True)
assert isinstance(losses, dict)
from mmdet.apis.train import parse_losses
total_loss = parse_losses(losses)[0].requires_grad_(True)
assert float(total_loss.item()) > 0
total_loss.backward()
loss, _ = detector._parse_losses(losses)
assert float(loss.item()) > 0
loss.backward()
# Test forward test
with torch.no_grad():
......@@ -140,6 +139,8 @@ def _test_single_stage_forward(cfg_file):
gt_labels=gt_labels,
return_loss=True)
assert isinstance(losses, dict)
loss, _ = detector._parse_losses(losses)
assert float(loss.item()) > 0
# Test forward test
with torch.no_grad():
......
......@@ -12,11 +12,10 @@ from mmcv import Config, DictAction
from mmcv.runner import init_dist
from mmdet3d import __version__
from mmdet3d.apis import train_detector
from mmdet3d.datasets import build_dataset
from mmdet3d.models import build_detector
from mmdet3d.utils import collect_env, get_root_logger
from mmdet.apis import set_random_seed
from mmdet.apis import set_random_seed, train_detector
def parse_args():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment