Commit 3298db8a authored by zhangwenwei's avatar zhangwenwei
Browse files

Add ori mvx

parent 0ed9c576
...@@ -27,7 +27,7 @@ linting: ...@@ -27,7 +27,7 @@ linting:
stage: test stage: test
script: script:
- echo "Start building..." - echo "Start building..."
- pip install "git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI" - pip install "git+https://github.com/open-mmlab/cocoapi.git#subdirectory=pycocotools"
- pip install git+https://github.com/open-mmlab/mmcv.git - pip install git+https://github.com/open-mmlab/mmcv.git
- pip install git+https://github.com/open-mmlab/mmdetection.git - pip install git+https://github.com/open-mmlab/mmdetection.git
- python -c "import mmdet; print(mmdet.__version__)" - python -c "import mmdet; print(mmdet.__version__)"
......
...@@ -4,8 +4,6 @@ point_cloud_range = [0, -40, -3, 70.4, 40, 1] ...@@ -4,8 +4,6 @@ point_cloud_range = [0, -40, -3, 70.4, 40, 1]
model = dict( model = dict(
type='DynamicMVXFasterRCNN', type='DynamicMVXFasterRCNN',
pretrained=('./pretrain_detectron/'
'ImageNetPretrained/MSRA/resnet50_msra.pth'),
img_backbone=dict( img_backbone=dict(
type='ResNet', type='ResNet',
depth=50, depth=50,
...@@ -136,22 +134,10 @@ class_names = ['Pedestrian', 'Cyclist', 'Car'] ...@@ -136,22 +134,10 @@ class_names = ['Pedestrian', 'Cyclist', 'Car']
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
input_modality = dict(use_lidar=True, use_camera=True) input_modality = dict(use_lidar=True, use_camera=True)
db_sampler = dict(
type='MMDataBaseSampler',
data_root=data_root,
info_path=data_root + 'kitti_mm_dbinfos_train.pkl',
rate=1.0,
object_rot_range=[0.0, 0.0],
blending_type=['box', 'gaussian', 'poisson'],
depth_consistent=True,
check_2D_collision=True,
collision_thr=[0, 0.3, 0.5, 0.7],
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6),
classes=class_names)
train_pipeline = [ train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict( dict(
type='Resize', type='Resize',
img_scale=[(640, 192), (2560, 768)], img_scale=[(640, 192), (2560, 768)],
...@@ -171,10 +157,11 @@ train_pipeline = [ ...@@ -171,10 +157,11 @@ train_pipeline = [
dict(type='DefaultFormatBundle3D', class_names=class_names), dict(type='DefaultFormatBundle3D', class_names=class_names),
dict( dict(
type='Collect3D', type='Collect3D',
keys=['points', 'img', 'gt_bboxes_3d', 'gt_labels_3d']) keys=['points', 'img', 'gt_bboxes_3d', 'gt_labels_3d']),
] ]
test_pipeline = [ test_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadImageFromFile'),
dict( dict(
type='MultiScaleFlipAug3D', type='MultiScaleFlipAug3D',
img_scale=(1280, 384), img_scale=(1280, 384),
...@@ -196,7 +183,7 @@ test_pipeline = [ ...@@ -196,7 +183,7 @@ test_pipeline = [
type='DefaultFormatBundle3D', type='DefaultFormatBundle3D',
class_names=class_names, class_names=class_names,
with_label=False), with_label=False),
dict(type='Collect3D', keys=['points']) dict(type='Collect3D', keys=['points', 'img'])
]) ])
] ]
...@@ -204,15 +191,18 @@ data = dict( ...@@ -204,15 +191,18 @@ data = dict(
samples_per_gpu=2, samples_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
train=dict( train=dict(
type=dataset_type, type='RepeatDataset',
data_root=data_root, times=2,
ann_file=data_root + 'kitti_infos_train.pkl', dataset=dict(
split='training', type=dataset_type,
pts_prefix='velodyne_reduced', data_root=data_root,
pipeline=train_pipeline, ann_file=data_root + 'kitti_infos_train.pkl',
modality=input_modality, split='training',
classes=class_names, pts_prefix='velodyne_reduced',
test_mode=False), pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False)),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
...@@ -255,10 +245,10 @@ log_config = dict( ...@@ -255,10 +245,10 @@ log_config = dict(
# yapf:enable # yapf:enable
evaluation = dict(interval=1) evaluation = dict(interval=1)
# runtime settings # runtime settings
total_epochs = 80 total_epochs = 40
dist_params = dict(backend='nccl') dist_params = dict(backend='nccl')
log_level = 'INFO' log_level = 'INFO'
work_dir = './work_dirs/sec_secfpn_80e' work_dir = None
load_from = './pretrain_mmdet/mvx_faster_rcnn_r50_fpn_detectron2-caffe_freezeBN_l1-loss_roialign-v2_1x_coco-3-class_44.7_20200205-b1c1533f.pth' # noqa load_from = './pretrain_mmdet/mvx_faster_rcnn_detectron2-caffe_20e_coco-pretrain_gt-sample_kitti-3-class_moderate-79.3_20200207-a4a6a3c7.pth' # noqa
resume_from = None resume_from = None
workflow = [('train', 1)] workflow = [('train', 1)]
# model settings
norm_cfg = dict(type='BN', requires_grad=False)
model = dict(
type='FasterRCNN',
pretrained='open-mmlab://regnetx_3.2gf',
backbone=dict(
type='RegNet',
arch='regnetx_3.2gf',
out_indices=(0, 1, 2, 3),
frozen_stages=1,
base_channels=32,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[96, 192, 432, 1008],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='StandardRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=10,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0))))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
# following the setting of detectron,
# which improves ~0.2 bbox mAP.
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/nuscenes/'
classes = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier')
img_norm_cfg = dict(
# The mean and std is used in PyCls when training RegNets
mean=[103.53, 116.28, 123.675],
std=[57.375, 57.12, 58.395],
to_rgb=False)
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/nuscenes/': 's3://nuscenes/nuscenes/',
'data/nuscenes/': 's3://nuscenes/nuscenes/'
}))
train_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(
type='LoadAnnotations',
with_bbox=True,
with_mask=False,
file_client_args=file_client_args),
dict(
type='Resize',
img_scale=(1280, 720),
ratio_range=(0.75, 1.25),
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug',
img_scale=(1280, 720),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
classes=classes,
ann_file=data_root + 'nuscenes_infos_train.coco.json',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
classes=classes,
ann_file=data_root + 'nuscenes_infos_val.coco.json',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
classes=classes,
ann_file=data_root + 'nuscenes_infos_val.coco.json',
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
evaluation = dict(interval=1)
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl', port=29501)
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r50_fpn_1x'
load_from = './pretrain_mmdet/mask_rcnn_regnetx-3GF_fpn_mstrain_3x_coco_box-AP-43.1_mask-AP-38.7-e003695a.pth' # noqa
resume_from = None
workflow = [('train', 1)]
# model settings
norm_cfg = dict(type='BN', requires_grad=False)
model = dict(
type='RetinaNet',
pretrained=('open-mmlab://resnet50_caffe_bgr'),
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=norm_cfg,
norm_eval=True,
style='caffe'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=1,
add_extra_convs=True,
num_outs=5),
bbox_head=dict(
type='RetinaHead',
num_classes=10,
in_channels=256,
stacked_convs=4,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
octave_base_scale=4,
scales_per_octave=3,
ratios=[0.5, 1.0, 2.0],
strides=[8, 16, 32, 64, 128]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)))
# training and testing settings
train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.4,
min_pos_iou=0,
ignore_iof_thr=-1),
allowed_border=-1,
pos_weight=-1,
debug=False)
test_cfg = dict(
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.5),
max_per_img=100)
# dataset settings
dataset_type = 'NuScenes2DDataset'
data_root = 'data/nuscenes/'
# Values to be used for image normalization (BGR order)
# Default mean pixel value are from ImageNet: [103.53, 116.28, 123.675]
# When using pre-trained models in Detectron1 or any MSRA models,
# std has been absorbed into its conv1 weights, so the std needs to be set 1.
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='Resize',
img_scale=(1600, 900),
ratio_range=(0.8, 1.2),
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1600, 900),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'nuscenes_infos_train.coco.json',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'nuscenes_infos_val.coco.json',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'nuscenes_infos_val.coco.json',
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/retinanet_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
API Documentation API Documentation
================= =================
mmdet3d.apis
--------------
.. automodule:: mmdet3d.apis
:members:
mmdet3d.core mmdet3d.core
-------------- --------------
......
from .train import batch_processor, train_detector
__all__ = ['batch_processor', 'train_detector']
import torch
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import DistSamplerSeedHook, Runner, build_optimizer
from mmdet3d.utils import get_root_logger
from mmdet.apis.train import parse_losses
from mmdet.core import (DistEvalHook, DistOptimizerHook, EvalHook,
Fp16OptimizerHook)
from mmdet.datasets import build_dataloader, build_dataset
def batch_processor(model, data, train_mode):
"""Process a data batch.
This method is required as an argument of Runner, which defines how to
process a data batch and obtain proper outputs. The first 3 arguments of
batch_processor are fixed.
Args:
model (nn.Module): A PyTorch model.
data (dict): The data batch in a dict.
train_mode (bool): Training mode or not. It may be useless for some
models.
Returns:
dict: A dict containing losses and log vars.
"""
losses = model(**data)
loss, log_vars = parse_losses(losses)
if 'img_metas' in data:
num_samples = len(data['img_metas'].data)
else:
num_samples = len(data['img'].data)
outputs = dict(loss=loss, log_vars=log_vars, num_samples=num_samples)
return outputs
def train_detector(model,
dataset,
cfg,
distributed=False,
validate=False,
timestamp=None,
meta=None):
logger = get_root_logger(cfg.log_level)
# prepare data loaders
dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
data_loaders = [
build_dataloader(
ds,
cfg.data.samples_per_gpu,
cfg.data.workers_per_gpu,
# cfg.gpus will be ignored if distributed
len(cfg.gpu_ids),
dist=distributed,
seed=cfg.seed) for ds in dataset
]
# put model on gpus
if distributed:
find_unused_parameters = cfg.get('find_unused_parameters', False)
# Sets the `find_unused_parameters` parameter in
# torch.nn.parallel.DistributedDataParallel
model = MMDistributedDataParallel(
model.cuda(),
device_ids=[torch.cuda.current_device()],
broadcast_buffers=False,
find_unused_parameters=find_unused_parameters)
else:
model = MMDataParallel(
model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
# build runner
optimizer = build_optimizer(model, cfg.optimizer)
runner = Runner(
model,
batch_processor,
optimizer,
cfg.work_dir,
logger=logger,
meta=meta)
# an ugly walkaround to make the .log and .log.json filenames the same
runner.timestamp = timestamp
# fp16 setting
fp16_cfg = cfg.get('fp16', None)
if fp16_cfg is not None:
optimizer_config = Fp16OptimizerHook(
**cfg.optimizer_config, **fp16_cfg, distributed=distributed)
elif distributed and 'type' not in cfg.optimizer_config:
optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
else:
optimizer_config = cfg.optimizer_config
# register hooks
runner.register_training_hooks(cfg.lr_config, optimizer_config,
cfg.checkpoint_config, cfg.log_config,
cfg.get('momentum_config', None))
if distributed:
runner.register_hook(DistSamplerSeedHook())
# register eval hooks
if validate:
val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
val_dataloader = build_dataloader(
val_dataset,
samples_per_gpu=1,
workers_per_gpu=cfg.data.workers_per_gpu,
dist=distributed,
shuffle=False)
eval_cfg = cfg.get('evaluation', {})
eval_hook = DistEvalHook if distributed else EvalHook
runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
if cfg.resume_from:
runner.resume(cfg.resume_from)
elif cfg.load_from:
runner.load_checkpoint(cfg.load_from)
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
...@@ -52,7 +52,7 @@ class KittiDataset(Custom3DDataset): ...@@ -52,7 +52,7 @@ class KittiDataset(Custom3DDataset):
def get_data_info(self, index): def get_data_info(self, index):
info = self.data_infos[index] info = self.data_infos[index]
sample_idx = info['image']['image_idx'] sample_idx = info['image']['image_idx']
img_filename = os.path.join(self.root_split, img_filename = os.path.join(self.data_root,
info['image']['image_path']) info['image']['image_path'])
# TODO: consider use torch.Tensor only # TODO: consider use torch.Tensor only
...@@ -65,7 +65,8 @@ class KittiDataset(Custom3DDataset): ...@@ -65,7 +65,8 @@ class KittiDataset(Custom3DDataset):
input_dict = dict( input_dict = dict(
sample_idx=sample_idx, sample_idx=sample_idx,
pts_filename=pts_filename, pts_filename=pts_filename,
img_filename=img_filename, img_prefix=None,
img_info=dict(filename=img_filename),
lidar2img=lidar2img) lidar2img=lidar2img)
if not self.test_mode: if not self.test_mode:
...@@ -113,8 +114,8 @@ class KittiDataset(Custom3DDataset): ...@@ -113,8 +114,8 @@ class KittiDataset(Custom3DDataset):
anns_results = dict( anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d, gt_bboxes_3d=gt_bboxes_3d,
gt_labels_3d=gt_labels_3d, gt_labels_3d=gt_labels_3d,
gt_bboxes=gt_bboxes, bboxes=gt_bboxes,
gt_labels=gt_labels) labels=gt_labels)
return anns_results return anns_results
def drop_arrays_by_name(self, gt_names, used_classes): def drop_arrays_by_name(self, gt_names, used_classes):
...@@ -151,6 +152,24 @@ class KittiDataset(Custom3DDataset): ...@@ -151,6 +152,24 @@ class KittiDataset(Custom3DDataset):
result_files = self.bbox2result_kitti2d(outputs, self.CLASSES, result_files = self.bbox2result_kitti2d(outputs, self.CLASSES,
pklfile_prefix, pklfile_prefix,
submission_prefix) submission_prefix)
elif 'pts_bbox' in outputs[0] or 'img_bbox' in outputs[0]:
result_files = dict()
for name in outputs[0]:
results_ = [out[name] for out in outputs]
pklfile_prefix_ = pklfile_prefix + name
if submission_prefix is not None:
submission_prefix_ = submission_prefix + name
else:
submission_prefix_ = None
if 'img' in name:
result_files = self.bbox2result_kitti2d(
results_, self.CLASSES, pklfile_prefix_,
submission_prefix_)
else:
result_files_ = self.bbox2result_kitti(
results_, self.CLASSES, pklfile_prefix_,
submission_prefix_)
result_files[name] = result_files_
else: else:
result_files = self.bbox2result_kitti(outputs, self.CLASSES, result_files = self.bbox2result_kitti(outputs, self.CLASSES,
pklfile_prefix, pklfile_prefix,
...@@ -162,8 +181,7 @@ class KittiDataset(Custom3DDataset): ...@@ -162,8 +181,7 @@ class KittiDataset(Custom3DDataset):
metric=None, metric=None,
logger=None, logger=None,
pklfile_prefix=None, pklfile_prefix=None,
submission_prefix=None, submission_prefix=None):
result_names=['pts_bbox']):
"""Evaluation in KITTI protocol. """Evaluation in KITTI protocol.
Args: Args:
...@@ -178,18 +196,38 @@ class KittiDataset(Custom3DDataset): ...@@ -178,18 +196,38 @@ class KittiDataset(Custom3DDataset):
If not specified, the submission data will not be generated. If not specified, the submission data will not be generated.
Returns: Returns:
dict[str: float] dict[str: float]: results of each evaluation metric
""" """
result_files, tmp_dir = self.format_results(results, pklfile_prefix) result_files, tmp_dir = self.format_results(results, pklfile_prefix)
from mmdet3d.core.evaluation import kitti_eval from mmdet3d.core.evaluation import kitti_eval
gt_annos = [info['annos'] for info in self.data_infos] gt_annos = [info['annos'] for info in self.data_infos]
if metric == 'img_bbox':
ap_result_str, ap_dict = kitti_eval( if isinstance(result_files, dict):
gt_annos, result_files, self.CLASSES, eval_types=['bbox']) ap_dict = dict()
for name, result_files_ in result_files.items():
eval_types = ['bbox', 'bev', '3d']
if 'img' in name:
eval_types = ['bbox']
ap_result_str, ap_dict_ = kitti_eval(
gt_annos,
result_files_,
self.CLASSES,
eval_types=eval_types)
for ap_type, ap in ap_dict_.items():
ap_dict[f'{name}/{ap_type}'] = float('{:.4f}'.format(ap))
print_log(
f'Results of {name}:\n' + ap_result_str, logger=logger)
else: else:
ap_result_str, ap_dict = kitti_eval(gt_annos, result_files, if metric == 'img_bbox':
self.CLASSES) ap_result_str, ap_dict = kitti_eval(
print_log('\n' + ap_result_str, logger=logger) gt_annos, result_files, self.CLASSES, eval_types=['bbox'])
else:
ap_result_str, ap_dict = kitti_eval(gt_annos, result_files,
self.CLASSES)
print_log('\n' + ap_result_str, logger=logger)
if tmp_dir is not None: if tmp_dir is not None:
tmp_dir.cleanup() tmp_dir.cleanup()
return ap_dict return ap_dict
......
from mmdet.datasets.pipelines import Compose from mmdet.datasets.pipelines import Compose
from .dbsampler import DataBaseSampler, MMDataBaseSampler from .dbsampler import DataBaseSampler
from .formating import DefaultFormatBundle, DefaultFormatBundle3D from .formating import DefaultFormatBundle, DefaultFormatBundle3D
from .indoor_augment import (IndoorFlipData, IndoorGlobalRotScaleTrans, from .indoor_augment import (IndoorFlipData, IndoorGlobalRotScaleTrans,
IndoorPointsColorJitter) IndoorPointsColorJitter)
...@@ -19,6 +19,6 @@ __all__ = [ ...@@ -19,6 +19,6 @@ __all__ = [
'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile', 'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile',
'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler', 'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler',
'IndoorGlobalRotScaleTrans', 'IndoorPointsColorJitter', 'IndoorFlipData', 'IndoorGlobalRotScaleTrans', 'IndoorPointsColorJitter', 'IndoorFlipData',
'MMDataBaseSampler', 'NormalizePointsColor', 'LoadAnnotations3D', 'NormalizePointsColor', 'LoadAnnotations3D', 'IndoorPointSample',
'IndoorPointSample', 'PointSegClassMapping', 'MultiScaleFlipAug3D' 'PointSegClassMapping', 'MultiScaleFlipAug3D'
] ]
...@@ -2,8 +2,6 @@ import copy ...@@ -2,8 +2,6 @@ import copy
import os import os
import pickle import pickle
import cv2
import mmcv
import numpy as np import numpy as np
from mmdet3d.core.bbox import box_np_ops from mmdet3d.core.bbox import box_np_ops
...@@ -263,255 +261,3 @@ class DataBaseSampler(object): ...@@ -263,255 +261,3 @@ class DataBaseSampler(object):
boxes[i, -1] - sp_boxes[i - num_gt, -1]) boxes[i, -1] - sp_boxes[i - num_gt, -1])
valid_samples.append(sampled[i - num_gt]) valid_samples.append(sampled[i - num_gt])
return valid_samples return valid_samples
@OBJECTSAMPLERS.register_module()
class MMDataBaseSampler(DataBaseSampler):
def __init__(self,
info_path,
data_root,
rate,
prepare,
object_rot_range,
sample_groups,
classes=None,
check_2D_collision=False,
collision_thr=0,
collision_in_classes=False,
depth_consistent=False,
blending_type=None):
super(MMDataBaseSampler, self).__init__(
info_path=info_path,
data_root=data_root,
rate=rate,
prepare=prepare,
object_rot_range=object_rot_range,
sample_groups=sample_groups,
classes=classes)
self.blending_type = blending_type
self.depth_consistent = depth_consistent
self.check_2D_collision = check_2D_collision
self.collision_thr = collision_thr
self.collision_in_classes = collision_in_classes
def sample_all(self, gt_bboxes_3d, gt_names, gt_bboxes_2d=None, img=None):
sampled_num_dict = {}
sample_num_per_class = []
for class_name, max_sample_num in zip(self.sample_classes,
self.sample_max_nums):
sampled_num = int(max_sample_num -
np.sum([n == class_name for n in gt_names]))
sampled_num = np.round(self.rate * sampled_num).astype(np.int64)
sampled_num_dict[class_name] = sampled_num
sample_num_per_class.append(sampled_num)
sampled = []
sampled_gt_bboxes_3d = []
sampled_gt_bboxes_2d = []
avoid_coll_boxes_3d = gt_bboxes_3d
avoid_coll_boxes_2d = gt_bboxes_2d
for class_name, sampled_num in zip(self.sample_classes,
sample_num_per_class):
if sampled_num > 0:
sampled_cls = self.sample_class_v2(class_name, sampled_num,
avoid_coll_boxes_3d,
avoid_coll_boxes_2d)
sampled += sampled_cls
if len(sampled_cls) > 0:
if len(sampled_cls) == 1:
sampled_gt_box_3d = sampled_cls[0]['box3d_lidar'][
np.newaxis, ...]
sampled_gt_box_2d = sampled_cls[0]['box2d_camera'][
np.newaxis, ...]
else:
sampled_gt_box_3d = np.stack(
[s['box3d_lidar'] for s in sampled_cls], axis=0)
sampled_gt_box_2d = np.stack(
[s['box2d_camera'] for s in sampled_cls], axis=0)
sampled_gt_bboxes_3d += [sampled_gt_box_3d]
sampled_gt_bboxes_2d += [sampled_gt_box_2d]
if self.collision_in_classes:
# TODO: check whether check collision check among
# classes is necessary
avoid_coll_boxes_3d = np.concatenate(
[avoid_coll_boxes_3d, sampled_gt_box_3d], axis=0)
avoid_coll_boxes_2d = np.concatenate(
[avoid_coll_boxes_2d, sampled_gt_box_2d], axis=0)
ret = None
if len(sampled) > 0:
sampled_gt_bboxes_3d = np.concatenate(sampled_gt_bboxes_3d, axis=0)
sampled_gt_bboxes_2d = np.concatenate(sampled_gt_bboxes_2d, axis=0)
s_points_list = []
count = 0
if self.depth_consistent:
# change the paster order based on distance
center = sampled_gt_bboxes_3d[:, 0:3]
paste_order = np.argsort(
-np.power(np.sum(np.power(center, 2), axis=-1), 1 / 2),
axis=-1)
for idx in range(len(sampled)):
if self.depth_consistent:
inds = np.where(paste_order == idx)[0][0]
info = sampled[inds]
else:
info = sampled[idx]
pcd_file_path = os.path.join(
self.data_root,
info['path']) if self.data_root else info['path']
img_file_path = pcd_file_path + '.png'
mask_file_path = pcd_file_path + '.mask.png'
s_points = np.fromfile(
pcd_file_path, dtype=np.float32).reshape([-1, 4])
s_patch = mmcv.imread(img_file_path)
s_mask = mmcv.imread(mask_file_path, 'grayscale')
if 'rot_transform' in info:
rot = info['rot_transform']
s_points[:, :3] = box_np_ops.rotation_points_single_angle(
s_points[:, :3], rot, axis=2)
# TODO: might need to rot 2d bbox in the future
# the points of each sample already minus the object center
# so this time it needs to add the offset back
s_points[:, :3] += info['box3d_lidar'][:3]
img = self.paste_obj(
img,
s_patch,
s_mask,
bbox_2d=info['box2d_camera'].astype(np.int32))
count += 1
s_points_list.append(s_points)
ret = dict(
img=img,
gt_names=np.array([s['name'] for s in sampled]),
difficulty=np.array([s['difficulty'] for s in sampled]),
gt_bboxes_3d=sampled_gt_bboxes_3d,
gt_bboxes_2d=sampled_gt_bboxes_2d,
points=np.concatenate(s_points_list, axis=0),
group_ids=np.arange(gt_bboxes_3d.shape[0],
gt_bboxes_3d.shape[0] + len(sampled)))
return ret
def paste_obj(self, img, obj_img, obj_mask, bbox_2d):
# paste the image patch back
x1, y1, x2, y2 = bbox_2d
# the bbox might exceed the img size because the img is different
img_h, img_w = img.shape[:2]
w = np.maximum(min(x2, img_w - 1) - x1 + 1, 1)
h = np.maximum(min(y2, img_h - 1) - y1 + 1, 1)
obj_mask = obj_mask[:h, :w]
obj_img = obj_img[:h, :w]
# choose a blend option
if not self.blending_type:
blending_op = 'none'
else:
blending_choice = np.random.randint(len(self.blending_type))
blending_op = self.blending_type[blending_choice]
if blending_op.find('poisson') != -1:
# options: cv2.NORMAL_CLONE=1, or cv2.MONOCHROME_TRANSFER=3
# cv2.MIXED_CLONE mixed the texture, thus is not used.
if blending_op == 'poisson':
mode = np.random.choice([1, 3], 1)[0]
elif blending_op == 'poisson_normal':
mode = cv2.NORMAL_CLONE
elif blending_op == 'poisson_transfer':
mode = cv2.MONOCHROME_TRANSFER
else:
raise NotImplementedError
center = (int(x1 + w / 2), int(y1 + h / 2))
img = cv2.seamlessClone(obj_img, img, obj_mask * 255, center, mode)
else:
if blending_op == 'gaussian':
obj_mask = cv2.GaussianBlur(
obj_mask.astype(np.float32), (5, 5), 2)
elif blending_op == 'box':
obj_mask = cv2.blur(obj_mask.astype(np.float32), (3, 3))
paste_mask = 1 - obj_mask
img[y1:y1 + h,
x1:x1 + w] = (img[y1:y1 + h, x1:x1 + w].astype(np.float32) *
paste_mask[..., None]).astype(np.uint8)
img[y1:y1 + h, x1:x1 + w] += (obj_img.astype(np.float32) *
obj_mask[..., None]).astype(np.uint8)
return img
def sample_class_v2(self, name, num, gt_bboxes_3d, gt_bboxes_2d):
sampled = self.sampler_dict[name].sample(num)
sampled = copy.deepcopy(sampled)
num_gt = gt_bboxes_3d.shape[0]
num_sampled = len(sampled)
# avoid collision in BEV first
gt_bboxes_bv = box_np_ops.center_to_corner_box2d(
gt_bboxes_3d[:, 0:2], gt_bboxes_3d[:, 3:5], gt_bboxes_3d[:, 6])
sp_boxes = np.stack([i['box3d_lidar'] for i in sampled], axis=0)
sp_boxes_bv = box_np_ops.center_to_corner_box2d(
sp_boxes[:, 0:2], sp_boxes[:, 3:5], sp_boxes[:, 6])
total_bv = np.concatenate([gt_bboxes_bv, sp_boxes_bv], axis=0)
coll_mat = data_augment_utils.box_collision_test(total_bv, total_bv)
# Then avoid collision in 2D space
if self.check_2D_collision:
sp_boxes_2d = np.stack([i['box2d_camera'] for i in sampled],
axis=0)
total_bbox_2d = np.concatenate([gt_bboxes_2d, sp_boxes_2d],
axis=0) # Nx4
# random select a collision threshold
if isinstance(self.collision_thr, float):
collision_thr = self.collision_thr
elif isinstance(self.collision_thr, list):
collision_thr = np.random.choice(self.collision_thr)
elif isinstance(self.collision_thr, dict):
mode = self.collision_thr.get('mode', 'value')
if mode == 'value':
collision_thr = np.random.choice(
self.collision_thr['thr_range'])
elif mode == 'range':
collision_thr = np.random.uniform(
self.collision_thr['thr_range'][0],
self.collision_thr['thr_range'][1])
if collision_thr == 0:
# use similar collision test as BEV did
# Nx4 (x1, y1, x2, y2) -> corners: Nx4x2
# ((x1, y1), (x2, y1), (x1, y2), (x2, y2))
x1y1 = total_bbox_2d[:, :2]
x2y2 = total_bbox_2d[:, 2:]
x1y2 = np.stack([total_bbox_2d[:, 0], total_bbox_2d[:, 3]],
axis=-1)
x2y1 = np.stack([total_bbox_2d[:, 2], total_bbox_2d[:, 1]],
axis=-1)
total_2d = np.stack([x1y1, x2y1, x1y2, x2y2], axis=1)
coll_mat_2d = data_augment_utils.box_collision_test(
total_2d, total_2d)
else:
# use iof rather than iou to protect the foreground
overlaps = box_np_ops.iou_jit(total_bbox_2d, total_bbox_2d,
'iof')
coll_mat_2d = overlaps > collision_thr
coll_mat = coll_mat + coll_mat_2d
diag = np.arange(total_bv.shape[0])
coll_mat[diag, diag] = False
valid_samples = []
for i in range(num_gt, num_gt + num_sampled):
if coll_mat[i].any():
coll_mat[i] = False
coll_mat[:, i] = False
else:
valid_samples.append(sampled[i - num_gt])
return valid_samples
from abc import ABCMeta, abstractmethod from mmdet.models.detectors import BaseDetector
import torch.nn as nn
class Base3DDetector(BaseDetector):
class Base3DDetector(nn.Module, metaclass=ABCMeta):
"""Base class for detectors""" """Base class for detectors"""
def __init__(self): def forward_test(self, points, img_metas, img=None, **kwargs):
super(Base3DDetector, self).__init__()
self.fp16_enabled = False
@property
def with_neck(self):
return hasattr(self, 'neck') and self.neck is not None
@property
def with_shared_head(self):
return hasattr(self, 'shared_head') and self.shared_head is not None
@property
def with_bbox(self):
return hasattr(self, 'bbox_head') and self.bbox_head is not None
@property
def with_mask(self):
return hasattr(self, 'mask_head') and self.mask_head is not None
@abstractmethod
def extract_feat(self, imgs):
pass
def extract_feats(self, imgs):
assert isinstance(imgs, list)
for img in imgs:
yield self.extract_feat(img)
@abstractmethod
def forward_train(self, **kwargs):
pass
@abstractmethod
def simple_test(self, **kwargs):
pass
@abstractmethod
def aug_test(self, **kwargs):
pass
def init_weights(self, pretrained=None):
if pretrained is not None:
from mmdet3d.utils import get_root_logger
logger = get_root_logger()
logger.info('load model from: {}'.format(pretrained))
def forward_test(self, points, img_metas, imgs=None, **kwargs):
""" """
Args: Args:
points (List[Tensor]): the outer list indicates test-time points (List[Tensor]): the outer list indicates test-time
...@@ -62,7 +13,7 @@ class Base3DDetector(nn.Module, metaclass=ABCMeta): ...@@ -62,7 +13,7 @@ class Base3DDetector(nn.Module, metaclass=ABCMeta):
img_metas (List[List[dict]]): the outer list indicates test-time img_metas (List[List[dict]]): the outer list indicates test-time
augs (multiscale, flip, etc.) and the inner list indicates augs (multiscale, flip, etc.) and the inner list indicates
images in a batch images in a batch
imgs (List[Tensor], optional): the outer list indicates test-time img (List[Tensor], optional): the outer list indicates test-time
augmentations and inner Tensor should have a shape NxCxHxW, augmentations and inner Tensor should have a shape NxCxHxW,
which contains all images in the batch. Defaults to None. which contains all images in the batch. Defaults to None.
""" """
...@@ -81,10 +32,10 @@ class Base3DDetector(nn.Module, metaclass=ABCMeta): ...@@ -81,10 +32,10 @@ class Base3DDetector(nn.Module, metaclass=ABCMeta):
assert samples_per_gpu == 1 assert samples_per_gpu == 1
if num_augs == 1: if num_augs == 1:
imgs = [imgs] if imgs is None else imgs img = [img] if img is None else img
return self.simple_test(points[0], img_metas[0], imgs[0], **kwargs) return self.simple_test(points[0], img_metas[0], img[0], **kwargs)
else: else:
return self.aug_test(points, img_metas, imgs, **kwargs) return self.aug_test(points, img_metas, img, **kwargs)
def forward(self, return_loss=True, **kwargs): def forward(self, return_loss=True, **kwargs):
""" """
......
...@@ -265,13 +265,13 @@ class MVXTwoStageDetector(Base3DDetector): ...@@ -265,13 +265,13 @@ class MVXTwoStageDetector(Base3DDetector):
proposal_list = proposals proposal_list = proposals
# bbox head forward and loss # bbox head forward and loss
img_roi_losses = self.roi_head.forward_train(x, img_metas, if self.with_img_bbox:
proposal_list, gt_bboxes, # bbox head forward and loss
gt_labels, img_roi_losses = self.img_roi_head.forward_train(
gt_bboxes_ignore, x, img_metas, proposal_list, gt_bboxes, gt_labels,
**kwargs) gt_bboxes_ignore, **kwargs)
losses.update(img_roi_losses)
losses.update(img_roi_losses)
return losses return losses
def simple_test_img(self, x, img_metas, proposals=None, rescale=False): def simple_test_img(self, x, img_metas, proposals=None, rescale=False):
......
...@@ -2,12 +2,17 @@ import torch ...@@ -2,12 +2,17 @@ import torch
import torch.nn.functional as F import torch.nn.functional as F
from mmdet3d.ops import Voxelization from mmdet3d.ops import Voxelization
from mmdet.models import DETECTORS, TwoStageDetector from mmdet.models import DETECTORS
from .. import builder from .. import builder
from .two_stage import TwoStage3DDetector
@DETECTORS.register_module() @DETECTORS.register_module()
class PartA2(TwoStageDetector): class PartA2(TwoStage3DDetector):
"""Part-A2 detector
Please refer to the `paper <https://arxiv.org/abs/1907.03670>`_
"""
def __init__(self, def __init__(self,
voxel_layer, voxel_layer,
...@@ -111,41 +116,6 @@ class PartA2(TwoStageDetector): ...@@ -111,41 +116,6 @@ class PartA2(TwoStageDetector):
return losses return losses
def forward_test(self, points, img_metas, imgs=None, **kwargs):
"""
Args:
points (List[Tensor]): the outer list indicates test-time
augmentations and inner Tensor should have a shape NxC,
which contains all points in the batch.
img_metas (List[List[dict]]): the outer list indicates test-time
augs (multiscale, flip, etc.) and the inner list indicates
images in a batch
"""
for var, name in [(points, 'points'), (img_metas, 'img_metas')]:
if not isinstance(var, list):
raise TypeError('{} must be a list, but got {}'.format(
name, type(var)))
num_augs = len(points)
if num_augs != len(img_metas):
raise ValueError(
'num of augmentations ({}) != num of image meta ({})'.format(
len(points), len(img_metas)))
# TODO: remove the restriction of imgs_per_gpu == 1 when prepared
samples_per_gpu = len(points[0])
assert samples_per_gpu == 1
if num_augs == 1:
return self.simple_test(points[0], img_metas[0], **kwargs)
else:
return self.aug_test(points, img_metas, **kwargs)
def forward(self, return_loss=True, **kwargs):
if return_loss:
return self.forward_train(**kwargs)
else:
return self.forward_test(**kwargs)
def simple_test(self, points, img_metas, proposals=None, rescale=False): def simple_test(self, points, img_metas, proposals=None, rescale=False):
feats_dict, voxels_dict = self.extract_feat(points, img_metas) feats_dict, voxels_dict = self.extract_feat(points, img_metas)
...@@ -159,6 +129,3 @@ class PartA2(TwoStageDetector): ...@@ -159,6 +129,3 @@ class PartA2(TwoStageDetector):
return self.roi_head.simple_test(feats_dict, voxels_dict, img_metas, return self.roi_head.simple_test(feats_dict, voxels_dict, img_metas,
proposal_list) proposal_list)
def aug_test(self, **kwargs):
raise NotImplementedError
...@@ -6,6 +6,21 @@ from .base import Base3DDetector ...@@ -6,6 +6,21 @@ from .base import Base3DDetector
@DETECTORS.register_module() @DETECTORS.register_module()
class SingleStage3DDetector(Base3DDetector): class SingleStage3DDetector(Base3DDetector):
"""SingleStage3DDetector
This class serves as a base class for single-stage 3D detectors.
Args:
backbone (dict): Config dict of detector's backbone.
neck (dict, optional): Config dict of neck. Defaults to None.
bbox_head (dict, optional): Config dict of box head. Defaults to None.
train_cfg (dict, optional): Config dict of training hyper-parameters.
Defaults to None.
test_cfg (dict, optional): Config dict of test hyper-parameters.
Defaults to None.
pretrained (str, optional): Path of pretrained models.
Defaults to None.
"""
def __init__(self, def __init__(self,
backbone, backbone,
......
from mmdet.models import DETECTORS, TwoStageDetector
from .base import Base3DDetector
@DETECTORS.register_module()
class TwoStage3DDetector(Base3DDetector, TwoStageDetector):
"""Base class of two-stage 3D detector
It inherits original ``:class:TwoStageDetector`` and
``:class:Base3DDetector``. This class could serve as a base class for
all two-stage 3D detectors.
"""
def __init__(self, **kwargs):
super(TwoStage3DDetector, self).__init__(**kwargs)
...@@ -287,7 +287,7 @@ class PointFusion(nn.Module): ...@@ -287,7 +287,7 @@ class PointFusion(nn.Module):
pts.new_tensor(img_meta['pcd_rotation']) if 'pcd_rotation' pts.new_tensor(img_meta['pcd_rotation']) if 'pcd_rotation'
in img_meta.keys() else torch.eye(3).type_as(pts).to(pts.device)) in img_meta.keys() else torch.eye(3).type_as(pts).to(pts.device))
img_scale_factor = ( img_scale_factor = (
img_meta['scale_factor'] pts.new_tensor(img_meta['scale_factor'][:2])
if 'scale_factor' in img_meta.keys() else 1) if 'scale_factor' in img_meta.keys() else 1)
pcd_flip = img_meta['pcd_flip'] if 'pcd_flip' in img_meta.keys( pcd_flip = img_meta['pcd_flip'] if 'pcd_flip' in img_meta.keys(
) else False ) else False
......
matplotlib matplotlib
mmcv>=0.5.1 mmcv>=0.6.0
numba==0.48.0 numba==0.48.0
numpy numpy
# need older pillow until torchvision is fixed # need older pillow until torchvision is fixed
......
...@@ -115,10 +115,8 @@ def test_config_data_pipeline(): ...@@ -115,10 +115,8 @@ def test_config_data_pipeline():
print('Found config_dpath = {!r}'.format(config_dpath)) print('Found config_dpath = {!r}'.format(config_dpath))
# Only tests a representative subset of configurations # Only tests a representative subset of configurations
# TODO: test pipelines using Albu, current Albu throw None given empty GT
config_names = [ config_names = [
'mvxnet/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py', 'mvxnet/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py',
'mvxnet/retinanet_r50_fpn_caffe_2x8_1x_nus.py',
'mvxnet/' 'mvxnet/'
'faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py', 'faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py',
] ]
......
...@@ -82,10 +82,10 @@ def _test_two_stage_forward(cfg_file): ...@@ -82,10 +82,10 @@ def _test_two_stage_forward(cfg_file):
gt_masks=gt_masks, gt_masks=gt_masks,
return_loss=True) return_loss=True)
assert isinstance(losses, dict) assert isinstance(losses, dict)
from mmdet.apis.train import parse_losses loss, _ = detector._parse_losses(losses)
total_loss = parse_losses(losses)[0].requires_grad_(True) loss.requires_grad_(True)
assert float(total_loss.item()) > 0 assert float(loss.item()) > 0
total_loss.backward() loss.backward()
# Test forward train with an empty truth batch # Test forward train with an empty truth batch
mm_inputs = _demo_mm_inputs(input_shape, num_items=[0]) mm_inputs = _demo_mm_inputs(input_shape, num_items=[0])
...@@ -102,10 +102,9 @@ def _test_two_stage_forward(cfg_file): ...@@ -102,10 +102,9 @@ def _test_two_stage_forward(cfg_file):
gt_masks=gt_masks, gt_masks=gt_masks,
return_loss=True) return_loss=True)
assert isinstance(losses, dict) assert isinstance(losses, dict)
from mmdet.apis.train import parse_losses loss, _ = detector._parse_losses(losses)
total_loss = parse_losses(losses)[0].requires_grad_(True) assert float(loss.item()) > 0
assert float(total_loss.item()) > 0 loss.backward()
total_loss.backward()
# Test forward test # Test forward test
with torch.no_grad(): with torch.no_grad():
...@@ -140,6 +139,8 @@ def _test_single_stage_forward(cfg_file): ...@@ -140,6 +139,8 @@ def _test_single_stage_forward(cfg_file):
gt_labels=gt_labels, gt_labels=gt_labels,
return_loss=True) return_loss=True)
assert isinstance(losses, dict) assert isinstance(losses, dict)
loss, _ = detector._parse_losses(losses)
assert float(loss.item()) > 0
# Test forward test # Test forward test
with torch.no_grad(): with torch.no_grad():
......
...@@ -12,11 +12,10 @@ from mmcv import Config, DictAction ...@@ -12,11 +12,10 @@ from mmcv import Config, DictAction
from mmcv.runner import init_dist from mmcv.runner import init_dist
from mmdet3d import __version__ from mmdet3d import __version__
from mmdet3d.apis import train_detector
from mmdet3d.datasets import build_dataset from mmdet3d.datasets import build_dataset
from mmdet3d.models import build_detector from mmdet3d.models import build_detector
from mmdet3d.utils import collect_env, get_root_logger from mmdet3d.utils import collect_env, get_root_logger
from mmdet.apis import set_random_seed from mmdet.apis import set_random_seed, train_detector
def parse_args(): def parse_args():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment