Commit ba3cd005 authored by 雍大凯's avatar 雍大凯
Browse files

将子模块转换为普通目录

parent d2b71343
_base_ = './cascade_mask_rcnn_r50_fpn_1x_nuim.py'
# learning policy
lr_config = dict(step=[16, 19])
runner = dict(max_epochs=20)
load_from = 'http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco/cascade_mask_rcnn_r50_fpn_20e_coco_bbox_mAP-0.419__segm_mAP-0.365_20200504_174711-4af8e66e.pth' # noqa
_base_ = './cascade_mask_rcnn_r50_fpn_1x_nuim.py'
model = dict(
pretrained='open-mmlab://resnext101_32x4d',
backbone=dict(
type='ResNeXt',
depth=101,
groups=32,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
style='pytorch'))
_base_ = './htc_without_semantic_r50_fpn_1x_nuim.py'
model = dict(
roi_head=dict(
semantic_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
out_channels=256,
featmap_strides=[8]),
semantic_head=dict(
type='FusedSemanticHead',
num_ins=5,
fusion_level=1,
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=32,
ignore_label=0,
loss_weight=0.2)))
data_root = 'data/nuimages/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True),
dict(
type='Resize',
img_scale=[(1280, 720), (1920, 1080)],
multiscale_mode='range',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='SegRescale', scale_factor=1 / 8),
dict(type='DefaultFormatBundle'),
dict(
type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg'])
]
data = dict(
train=dict(
seg_prefix=data_root + 'annotations/semantic_masks/',
pipeline=train_pipeline))
_base_ = './htc_r50_fpn_1x_nuim.py'
load_from = 'http://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_20e_coco/htc_r50_fpn_20e_coco_20200319-fe28c577.pth' # noqa
_base_ = './htc_r50_fpn_coco-20e_1x_nuim.py'
# learning policy
lr_config = dict(step=[16, 19])
runner = dict(max_epochs=20)
_base_ = [
'../_base_/datasets/nuim_instance.py',
'../_base_/schedules/mmdet_schedule_1x.py', '../_base_/default_runtime.py'
]
# model settings
model = dict(
type='HybridTaskCascade',
pretrained='torchvision://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
roi_head=dict(
type='HybridTaskCascadeRoIHead',
interleaved=True,
mask_info_flow=True,
num_stages=3,
stage_loss_weights=[1, 0.5, 0.25],
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=10,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
loss_weight=1.0)),
dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=10,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1]),
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
loss_weight=1.0)),
dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=10,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067]),
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
],
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=[
dict(
type='HTCMaskHead',
with_conv_res=False,
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=10,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)),
dict(
type='HTCMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=10,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)),
dict(
type='HTCMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=10,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))
]),
# model training and testing settings
train_cfg=dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_per_img=2000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.7,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False)
]),
test_cfg=dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
score_thr=0.001,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100,
mask_thr_binary=0.5)))
_base_ = './htc_r50_fpn_1x_nuim.py'
model = dict(
pretrained='open-mmlab://resnext101_64x4d',
backbone=dict(
type='ResNeXt',
depth=101,
groups=64,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch',
dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),
stage_with_dcn=(False, True, True, True)))
data = dict(samples_per_gpu=1, workers_per_gpu=1)
# learning policy
lr_config = dict(step=[16, 19])
runner = dict(max_epochs=20)
load_from = 'http://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco_20200312-946fd751.pth' # noqa
_base_ = './mask_rcnn_r50_fpn_1x_nuim.py'
model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101))
_base_ = [
'../_base_/models/mask_rcnn_r50_fpn.py',
'../_base_/datasets/nuim_instance.py',
'../_base_/schedules/mmdet_schedule_1x.py', '../_base_/default_runtime.py'
]
model = dict(
pretrained='open-mmlab://detectron2/resnet50_caffe',
backbone=dict(norm_cfg=dict(requires_grad=False), style='caffe'),
roi_head=dict(
bbox_head=dict(num_classes=10), mask_head=dict(num_classes=10)))
# use caffe img_norm
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(
type='Resize',
img_scale=[(1280, 720), (1920, 1080)],
multiscale_mode='range',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1600, 900),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
train=dict(pipeline=train_pipeline),
val=dict(pipeline=test_pipeline),
test=dict(pipeline=test_pipeline))
_base_ = [
'../_base_/models/mask_rcnn_r50_fpn.py',
'../_base_/datasets/nuim_instance.py',
'../_base_/schedules/mmdet_schedule_1x.py', '../_base_/default_runtime.py'
]
model = dict(
pretrained='open-mmlab://detectron2/resnet50_caffe',
backbone=dict(norm_cfg=dict(requires_grad=False), style='caffe'),
roi_head=dict(
bbox_head=dict(num_classes=10), mask_head=dict(num_classes=10)))
# use caffe img_norm
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(
type='Resize',
img_scale=[(1280, 720), (1920, 1080)],
multiscale_mode='range',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1600, 900),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
train=dict(pipeline=train_pipeline),
val=dict(pipeline=test_pipeline),
test=dict(pipeline=test_pipeline))
load_from = 'https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth' # noqa
_base_ = [
'../_base_/models/mask_rcnn_r50_fpn.py',
'../_base_/datasets/nuim_instance.py',
'../_base_/schedules/mmdet_schedule_1x.py', '../_base_/default_runtime.py'
]
model = dict(
pretrained='open-mmlab://detectron2/resnet50_caffe',
backbone=dict(norm_cfg=dict(requires_grad=False), style='caffe'),
roi_head=dict(
bbox_head=dict(num_classes=10), mask_head=dict(num_classes=10)))
# use caffe img_norm
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(
type='Resize',
img_scale=[(1280, 720), (1920, 1080)],
multiscale_mode='range',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1600, 900),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
train=dict(pipeline=train_pipeline),
val=dict(pipeline=test_pipeline),
test=dict(pipeline=test_pipeline))
# learning policy
lr_config = dict(step=[16, 19])
runner = dict(max_epochs=20)
load_from = 'http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth' # noqa
_base_ = [
'../_base_/models/mask_rcnn_r50_fpn.py',
'../_base_/datasets/nuim_instance.py',
'../_base_/schedules/mmdet_schedule_1x.py', '../_base_/default_runtime.py'
]
model = dict(
roi_head=dict(
bbox_head=dict(num_classes=10), mask_head=dict(num_classes=10)))
_base_ = [
'../_base_/models/mask_rcnn_r50_fpn.py',
'../_base_/datasets/nuim_instance.py',
'../_base_/schedules/mmdet_schedule_1x.py', '../_base_/default_runtime.py'
]
model = dict(
roi_head=dict(
bbox_head=dict(num_classes=10), mask_head=dict(num_classes=10)))
load_from = 'https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_2x_coco/mask_rcnn_r50_fpn_2x_coco_bbox_mAP-0.392__segm_mAP-0.354_20200505_003907-3e542a40.pth' # noqa
_base_ = [
'../_base_/models/mask_rcnn_r50_fpn.py',
'../_base_/datasets/nuim_instance.py',
'../_base_/schedules/mmdet_schedule_1x.py', '../_base_/default_runtime.py'
]
model = dict(
roi_head=dict(
bbox_head=dict(num_classes=10), mask_head=dict(num_classes=10)))
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/nuscenes/': 's3://nuscenes/nuscenes/',
'data/nuscenes/': 's3://nuscenes/nuscenes/'
}))
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
test_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug',
img_scale=(1600, 900),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data_root = 'data/nuimages/'
# data = dict(
# val=dict(
# ann_file=data_root + 'annotations/nuimages_v1.0-mini.json'),
# test=dict(
# ann_file=data_root + 'annotations/nuimages_v1.0-mini.json'))
_base_ = './mask_rcnn_r50_fpn_1x_nuim.py'
model = dict(
pretrained='open-mmlab://resnext101_32x4d',
backbone=dict(
type='ResNeXt',
depth=101,
groups=32,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
style='pytorch'))
Collections:
- Name: Mask R-CNN
Metadata:
Training Data: nuImages
Training Techniques:
- SGD with Momentum
- Weight Decay
Training Resources: 8x TITAN Xp
Architecture:
- Softmax
- RPN
- Convolution
- Dense Connections
- FPN
- ResNet
- RoIAlign
Paper:
URL: https://arxiv.org/abs/1703.06870v3
Title: "Mask R-CNN"
README: https://github.com/open-mmlab/mmdetection/blob/master/configs/mask_rcnn/README.md
Code:
URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/mask_rcnn.py#L6
Version: v2.0.0
- Name: Cascade R-CNN
Metadata:
Training Data: nuImages
Training Techniques:
- SGD with Momentum
- Weight Decay
Training Resources: 8x V100 GPUs
Architecture:
- Cascade R-CNN
- FPN
- RPN
- ResNet
- RoIAlign
Paper:
URL: http://dx.doi.org/10.1109/tpami.2019.2956516
Title: 'Cascade R-CNN: Delving into High Quality Object Detection'
README: https://github.com/open-mmlab/mmdetection/blob/master/configs/cascade_rcnn/README.md
Code:
URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/cascade_rcnn.py#L6
Version: v2.0.0
- Name: HTC
Metadata:
Training Data: nuImages
Training Techniques:
- SGD with Momentum
- Weight Decay
Training Resources: 8x V100 GPUs
Architecture:
- FPN
- HTC
- RPN
- ResNet
- ResNeXt
- RoIAlign
Paper:
URL: https://arxiv.org/abs/1901.07518
Title: 'Hybrid Task Cascade for Instance Segmentation'
README: https://github.com/open-mmlab/mmdetection/blob/master/configs/htc/README.md
Code:
URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/htc.py#L6
Version: v2.0.0
Models:
- Name: mask_rcnn_r50_fpn_1x_nuim
In Collection: Mask R-CNN
Config: configs/nuimages/mask_rcnn_r50_fpn_1x_nuim.py
Metadata:
Training Memory (GB): 7.4
Training Resources: 8x TITAN Xp
Results:
- Task: Object Detection
Dataset: nuImages
Metrics:
Box AP: 47.8
- Task: Instance Segmentation
Dataset: nuImages
Metrics:
Mask AP: 38.4
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/mask_rcnn_r50_fpn_1x_nuim/mask_rcnn_r50_fpn_1x_nuim_20201008_195238-e99f5182.pth
- Name: mask_rcnn_r50_fpn_coco-2x_1x_nuim
In Collection: Mask R-CNN
Config: configs/nuimages/mask_rcnn_r50_fpn_coco-2x_1x_nuim.py
Metadata:
Training Memory (GB): 7.4
Training Resources: 8x TITAN Xp
Results:
- Task: Object Detection
Dataset: nuImages
Metrics:
Box AP: 49.7
- Task: Instance Segmentation
Dataset: nuImages
Metrics:
Mask AP: 40.5
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/mask_rcnn_r50_fpn_coco-2x_1x_nuim/mask_rcnn_r50_fpn_coco-2x_1x_nuim_20201008_195238-b1742a60.pth
- Name: mask_rcnn_r50_caffe_fpn_1x_nuim
In Collection: Mask R-CNN
Config: configs/nuimages/mask_rcnn_r50_caffe_fpn_1x_nuim.py
Metadata:
Training Memory (GB): 7.0
Training Resources: 8x TITAN Xp
Results:
- Task: Object Detection
Dataset: nuImages
Metrics:
Box AP: 47.7
- Task: Instance Segmentation
Dataset: nuImages
Metrics:
Mask AP: 38.2
Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/nuimages_semseg/mask_rcnn_r50_caffe_fpn_1x_nuim/mask_rcnn_r50_caffe_fpn_1x_nuim_20220718_195238-b1742a60.pth
- Name: mask_rcnn_r50_caffe_fpn_coco-3x_1x_nuim
In Collection: Mask R-CNN
Config: configs/nuimages/mask_rcnn_r50_caffe_fpn_coco-3x_1x_nuim.py
Metadata:
Training Memory (GB): 7.0
Training Resources: 8x TITAN Xp
Results:
- Task: Object Detection
Dataset: nuImages
Metrics:
Box AP: 49.9
- Task: Instance Segmentation
Dataset: nuImages
Metrics:
Mask AP: 40.8
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/mask_rcnn_r50_caffe_fpn_coco-3x_1x_nuim/mask_rcnn_r50_caffe_fpn_coco-3x_1x_nuim_20201008_195305-661a992e.pth
- Name: mask_rcnn_r50_caffe_fpn_coco-3x_20e_nuim
In Collection: Mask R-CNN
Config: configs/nuimages/mask_rcnn_r50_caffe_fpn_coco-3x_20e_nuim.py
Metadata:
Training Memory (GB): 7.0
Training Resources: 8x TITAN Xp
Results:
- Task: Object Detection
Dataset: nuImages
Metrics:
Box AP: 50.6
- Task: Instance Segmentation
Dataset: nuImages
Metrics:
Mask AP: 41.3
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/mask_rcnn_r50_caffe_fpn_coco-3x_20e_nuim/mask_rcnn_r50_caffe_fpn_coco-3x_20e_nuim_20201009_125002-5529442c.pth
- Name: mask_rcnn_r101_fpn_1x_nuim
In Collection: Mask R-CNN
Config: configs/nuimages/mask_rcnn_r101_fpn_1x_nuim.py
Metadata:
Training Memory (GB): 10.9
Training Resources: 8x TITAN Xp
Results:
- Task: Object Detection
Dataset: nuImages
Metrics:
Box AP: 48.9
- Task: Instance Segmentation
Dataset: nuImages
Metrics:
Mask AP: 39.1
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/mask_rcnn_r101_fpn_1x_nuim/mask_rcnn_r101_fpn_1x_nuim_20201024_134803-65c7623a.pth
- Name: mask_rcnn_x101_32x4d_fpn_1x_nuim
In Collection: Mask R-CNN
Config: configs/nuimages/mask_rcnn_x101_32x4d_fpn_1x_nuim.py
Metadata:
Training Memory (GB): 13.3
Training Resources: 8x TITAN Xp
Results:
- Task: Object Detection
Dataset: nuImages
Metrics:
Box AP: 50.4
- Task: Instance Segmentation
Dataset: nuImages
Metrics:
Mask AP: 40.5
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/mask_rcnn_x101_32x4d_fpn_1x_nuim/mask_rcnn_x101_32x4d_fpn_1x_nuim_20201024_135741-b699ab37.pth
- Name: cascade_mask_rcnn_r50_fpn_1x_nuim
In Collection: Cascade R-CNN
Config: configs/nuimages/cascade_mask_rcnn_r50_fpn_1x_nuim.py
Metadata:
Training Memory (GB): 8.9
Training Resources: 8x TITAN Xp
Results:
- Task: Object Detection
Dataset: nuImages
Metrics:
Box AP: 50.8
- Task: Instance Segmentation
Dataset: nuImages
Metrics:
Mask AP: 40.4
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/cascade_mask_rcnn_r50_fpn_1x_nuim/cascade_mask_rcnn_r50_fpn_1x_nuim_20201008_195342-1147c036.pth
- Name: cascade_mask_rcnn_r50_fpn_coco-20e_1x_nuim
In Collection: Cascade R-CNN
Config: configs/nuimages/cascade_mask_rcnn_r50_fpn_coco-20e_1x_nuim.py
Metadata:
Training Memory (GB): 8.9
Training Resources: 8x TITAN Xp
Results:
- Task: Object Detection
Dataset: nuImages
Metrics:
Box AP: 52.8
- Task: Instance Segmentation
Dataset: nuImages
Metrics:
Mask AP: 42.2
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/cascade_mask_rcnn_r50_fpn_coco-20e_1x_nuim/cascade_mask_rcnn_r50_fpn_coco-20e_1x_nuim_20201009_124158-ad0540e3.pth
- Name: cascade_mask_rcnn_r50_fpn_coco-20e_20e_nuim
In Collection: Cascade R-CNN
Config: configs/nuimages/cascade_mask_rcnn_r50_fpn_coco-20e_20e_nuim.py
Metadata:
Training Memory (GB): 8.9
Training Resources: 8x TITAN Xp
Results:
- Task: Object Detection
Dataset: nuImages
Metrics:
Box AP: 52.8
- Task: Instance Segmentation
Dataset: nuImages
Metrics:
Mask AP: 42.2
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/cascade_mask_rcnn_r50_fpn_coco-20e_20e_nuim/cascade_mask_rcnn_r50_fpn_coco-20e_20e_nuim_20201009_124951-40963960.pth
- Name: cascade_mask_rcnn_r101_fpn_1x_nuim
In Collection: Cascade R-CNN
Config: configs/nuimages/cascade_mask_rcnn_r101_fpn_1x_nuim.py
Metadata:
Training Memory (GB): 12.5
Training Resources: 8x TITAN Xp
Results:
- Task: Object Detection
Dataset: nuImages
Metrics:
Box AP: 51.5
- Task: Instance Segmentation
Dataset: nuImages
Metrics:
Mask AP: 40.7
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/cascade_mask_rcnn_r101_fpn_1x_nuim/cascade_mask_rcnn_r101_fpn_1x_nuim_20201024_134804-45215b1e.pth
- Name: cascade_mask_rcnn_x101_32x4d_fpn_1x_nuim
In Collection: Cascade R-CNN
Config: configs/nuimages/cascade_mask_rcnn_x101_32x4d_fpn_1x_nuim.py
Metadata:
Training Memory (GB): 14.9
Training Resources: 8x TITAN Xp
Results:
- Task: Object Detection
Dataset: nuImages
Metrics:
Box AP: 52.8
- Task: Instance Segmentation
Dataset: nuImages
Metrics:
Mask AP: 41.6
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/cascade_mask_rcnn_x101_32x4d_fpn_1x_nuim/cascade_mask_rcnn_x101_32x4d_fpn_1x_nuim_20201024_135753-e0e49778.pth
- Name: htc_r50_fpn_coco-20e_1x_nuim
In Collection: HTC
Config: configs/nuimages/htc_r50_fpn_coco-20e_1x_nuim.py
Metadata:
Training Memory (GB): 11.6
Training Resources: 8x V100 GPUs
Results:
- Task: Object Detection
Dataset: nuImages
Metrics:
Box AP: 53.8
- Task: Instance Segmentation
Dataset: nuImages
Metrics:
Mask AP: 43.8
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/htc_r50_fpn_coco-20e_1x_nuim/htc_r50_fpn_coco-20e_1x_nuim_20201010_070203-0b53a65e.pth
- Name: htc_r50_fpn_coco-20e_20e_nuim
In Collection: HTC
Config: configs/nuimages/htc_r50_fpn_coco-20e_20e_nuim.py
Metadata:
Training Memory (GB): 11.6
Training Resources: 8x V100 GPUs
Results:
- Task: Object Detection
Dataset: nuImages
Metrics:
Box AP: 54.8
- Task: Instance Segmentation
Dataset: nuImages
Metrics:
Mask AP: 44.4
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/htc_r50_fpn_coco-20e_20e_nuim/htc_r50_fpn_coco-20e_20e_nuim_20201008_211415-d6c60a2c.pth
- Name: htc_x101_64x4d_fpn_dconv_c3-c5_coco-20e_16x1_20e_nuim
In Collection: HTC
Config: configs/nuimages/htc_x101_64x4d_fpn_dconv_c3-c5_coco-20e_16x1_20e_nuim.py
Metadata:
Training Memory (GB): 13.3
Training Resources: 8x V100 GPUs
Results:
- Task: Object Detection
Dataset: nuImages
Metrics:
Box AP: 57.3
- Task: Instance Segmentation
Dataset: nuImages
Metrics:
Mask AP: 46.4
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/htc_x101_64x4d_fpn_dconv_c3-c5_coco-20e_16x1_20e_nuim/htc_x101_64x4d_fpn_dconv_c3-c5_coco-20e_16x1_20e_nuim_20201008_211222-0b16ac4b.pth
# PAConv: Position Adaptive Convolution with Dynamic Kernel Assembling on Point Clouds
> [PAConv: Position Adaptive Convolution with Dynamic Kernel Assembling on Point Clouds](https://arxiv.org/abs/2103.14635)
<!-- [ALGORITHM] -->
## Abstract
We introduce Position Adaptive Convolution (PAConv), a generic convolution operation for 3D point cloud processing. The key of PAConv is to construct the convolution kernel by dynamically assembling basic weight matrices stored in Weight Bank, where the coefficients of these weight matrices are self-adaptively learned from point positions through ScoreNet. In this way, the kernel is built in a data-driven manner, endowing PAConv with more flexibility than 2D convolutions to better handle the irregular and unordered point cloud data. Besides, the complexity of the learning process is reduced by combining weight matrices instead of brutally predicting kernels from point positions.
Furthermore, different from the existing point convolution operators whose network architectures are often heavily engineered, we integrate our PAConv into classical MLP-based point cloud pipelines without changing network configurations. Even built on simple networks, our method still approaches or even surpasses the state-of-the-art models, and significantly improves baseline performance on both classification and segmentation tasks, yet with decent efficiency. Thorough ablation studies and visualizations are provided to understand PAConv.
<div align=center>
<img src="https://user-images.githubusercontent.com/79644370/143881915-003d5f10-3999-474e-969a-c354cb738a11.png" width="800"/>
</div>
## Introduction
We implement PAConv and provide the result and checkpoints on S3DIS dataset.
**Notice**: The original PAConv paper used step learning rate schedule. We discovered that cosine schedule achieves slightly better results and adopt it in our implementations.
## Results and models
### S3DIS
| Method | Split | Lr schd | Mem (GB) | Inf time (fps) | mIoU (Val set) | Download |
| :-------------------------------------------------------------------------: | :----: | :---------: | :------: | :------------: | :------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| [PAConv (SSG)](./paconv_ssg_8x8_cosine_150e_s3dis_seg-3d-13class.py) | Area_5 | cosine 150e | 5.8 | | 66.65 | [model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/paconv/paconv_ssg_8x8_cosine_150e_s3dis_seg-3d-13class/paconv_ssg_8x8_cosine_150e_s3dis_seg-3d-13class_20210729_200615-2147b2d1.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/paconv/paconv_ssg_8x8_cosine_150e_s3dis_seg-3d-13class/paconv_ssg_8x8_cosine_150e_s3dis_seg-3d-13class_20210729_200615.log.json) |
| [PAConv\* (SSG)](./paconv_cuda_ssg_8x8_cosine_200e_s3dis_seg-3d-13class.py) | Area_5 | cosine 200e | 3.8 | | 65.33 | [model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/paconv/paconv_cuda_ssg_8x8_cosine_200e_s3dis_seg-3d-13class/paconv_cuda_ssg_8x8_cosine_200e_s3dis_seg-3d-13class_20210802_171802-e5ea9bb9.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/paconv/paconv_cuda_ssg_8x8_cosine_200e_s3dis_seg-3d-13class/paconv_cuda_ssg_8x8_cosine_200e_s3dis_seg-3d-13class_20210802_171802.log.json) |
**Notes:**
- We use XYZ+Color+Normalized_XYZ as input in all the experiments on S3DIS datasets.
- `Area_5` Split means training the model on Area_1, 2, 3, 4, 6 and testing on Area_5.
- PAConv\* stands for the CUDA implementation of PAConv operations. See the [paper](https://arxiv.org/pdf/2103.14635.pdf) appendix section D for more details. In our experiments, the training of PAConv\* is found to be very unstable. We achieved slightly lower mIoU than the result in the paper, but is consistent with the result obtained by running their [official code](https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg). Besides, although the GPU memory consumption of PAConv\* is significantly lower than PAConv, its training and inference speed are actually slower (by ~10%).
## Indeterminism
Since PAConv testing adopts sliding patch inference which involves random point sampling, and the test script uses fixed random seeds while the random seeds of validation in training are not fixed, the test results may be slightly different from the results reported above.
## Citation
```latex
@inproceedings{xu2021paconv,
title={PAConv: Position Adaptive Convolution with Dynamic Kernel Assembling on Point Clouds},
author={Xu, Mutian and Ding, Runyu and Zhao, Hengshuang and Qi, Xiaojuan},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={3173--3182},
year={2021}
}
```
Collections:
- Name: PAConv
Metadata:
Training Techniques:
- SGD
Training Resources: 8x Titan XP GPUs
Architecture:
- PAConv
Paper:
URL: https://arxiv.org/abs/2103.14635
Title: 'PAConv: Position Adaptive Convolution with Dynamic Kernel Assembling on Point Clouds'
README: configs/paconv/README.md
Code:
URL: https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/ops/paconv/paconv.py#L106
Version: v0.16.0
Models:
- Name: paconv_ssg_8x8_cosine_150e_s3dis_seg-3d-13class.py
In Collection: PAConv
Config: configs/paconv/paconv_ssg_8x8_cosine_150e_s3dis_seg-3d-13class.py
Metadata:
Training Data: S3DIS
Training Memory (GB): 5.8
Results:
- Task: 3D Semantic Segmentation
Dataset: S3DIS
Metrics:
mIoU: 66.65
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/paconv/paconv_ssg_8x8_cosine_150e_s3dis_seg-3d-13class/paconv_ssg_8x8_cosine_150e_s3dis_seg-3d-13class_20210729_200615-2147b2d1.pth
_base_ = [
'../_base_/datasets/s3dis_seg-3d-13class.py',
'../_base_/models/paconv_cuda_ssg.py',
'../_base_/schedules/seg_cosine_150e.py', '../_base_/default_runtime.py'
]
# data settings
class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door',
'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter')
num_points = 4096
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=tuple(range(len(class_names))),
max_cat_id=13),
dict(
type='IndoorPatchPointSample',
num_points=num_points,
block_size=1.0,
use_normalized_coord=True,
num_try=10000,
enlarge_size=None,
min_unique_num=num_points // 4,
eps=0.0),
dict(type='NormalizePointsColor', color_mean=None),
dict(
type='GlobalRotScaleTrans',
rot_range=[0.0, 6.283185307179586], # [0, 2 * pi]
scale_ratio_range=[0.8, 1.2],
translation_std=[0, 0, 0]),
dict(
type='RandomJitterPoints',
jitter_std=[0.01, 0.01, 0.01],
clip_range=[-0.05, 0.05]),
dict(type='RandomDropPointsColor', drop_ratio=0.2),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
]
data = dict(samples_per_gpu=8, train=dict(pipeline=train_pipeline))
evaluation = dict(interval=1)
# model settings
model = dict(
decode_head=dict(
num_classes=13, ignore_index=13,
loss_decode=dict(class_weight=None)), # S3DIS doesn't use class_weight
test_cfg=dict(
num_points=4096,
block_size=1.0,
sample_rate=0.5,
use_normalized_coord=True,
batch_size=12))
# runtime settings
runner = dict(max_epochs=200)
_base_ = [
'../_base_/datasets/s3dis_seg-3d-13class.py',
'../_base_/models/paconv_ssg.py', '../_base_/schedules/seg_cosine_150e.py',
'../_base_/default_runtime.py'
]
# data settings
class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door',
'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter')
num_points = 4096
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=tuple(range(len(class_names))),
max_cat_id=13),
dict(
type='IndoorPatchPointSample',
num_points=num_points,
block_size=1.0,
use_normalized_coord=True,
num_try=10000,
enlarge_size=None,
min_unique_num=num_points // 4,
eps=0.0),
dict(type='NormalizePointsColor', color_mean=None),
dict(
type='GlobalRotScaleTrans',
rot_range=[0.0, 6.283185307179586], # [0, 2 * pi]
scale_ratio_range=[0.8, 1.2],
translation_std=[0, 0, 0]),
dict(
type='RandomJitterPoints',
jitter_std=[0.01, 0.01, 0.01],
clip_range=[-0.05, 0.05]),
dict(type='RandomDropPointsColor', drop_ratio=0.2),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
]
data = dict(samples_per_gpu=8, train=dict(pipeline=train_pipeline))
evaluation = dict(interval=1)
# model settings
model = dict(
decode_head=dict(
num_classes=13, ignore_index=13,
loss_decode=dict(class_weight=None)), # S3DIS doesn't use class_weight
test_cfg=dict(
num_points=4096,
block_size=1.0,
sample_rate=0.5,
use_normalized_coord=True,
batch_size=12))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment