Commit 85529f35 authored by unknown's avatar unknown
Browse files

添加openmmlab测试用例

parent b21b0c01
_base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py'
model = dict(
pretrained='open-mmlab://resnext101_64x4d',
backbone=dict(
type='ResNeXt',
depth=101,
groups=64,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
style='pytorch'))
_base_ = './cascade_mask_rcnn_r50_fpn_20e_coco.py'
model = dict(
pretrained='open-mmlab://resnext101_64x4d',
backbone=dict(
type='ResNeXt',
depth=101,
groups=64,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
style='pytorch'))
_base_ = './cascade_rcnn_r50_caffe_fpn_1x_coco.py'
model = dict(
pretrained='open-mmlab://detectron2/resnet101_caffe',
backbone=dict(depth=101))
_base_ = './cascade_rcnn_r50_fpn_1x_coco.py'
model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101))
_base_ = './cascade_rcnn_r50_fpn_20e_coco.py'
model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101))
_base_ = './cascade_rcnn_r50_fpn_1x_coco.py'
model = dict(
pretrained='open-mmlab://detectron2/resnet50_caffe',
backbone=dict(norm_cfg=dict(requires_grad=False), style='caffe'))
# use caffe img_norm
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
train=dict(pipeline=train_pipeline),
val=dict(pipeline=test_pipeline),
test=dict(pipeline=test_pipeline))
_base_ = [
'../_base_/models/cascade_rcnn_r50_fpn.py',
'../_base_/datasets/coco_detection.py',
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
]
_base_ = './cascade_rcnn_r50_fpn_1x_coco.py'
# learning policy
lr_config = dict(step=[16, 19])
runner = dict(type='EpochBasedRunner', max_epochs=20)
_base_ = './cascade_rcnn_r50_fpn_1x_coco.py'
model = dict(
pretrained='open-mmlab://resnext101_32x4d',
backbone=dict(
type='ResNeXt',
depth=101,
groups=32,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
style='pytorch'))
_base_ = './cascade_rcnn_r50_fpn_20e_coco.py'
model = dict(
pretrained='open-mmlab://resnext101_32x4d',
backbone=dict(
type='ResNeXt',
depth=101,
groups=32,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
style='pytorch'))
_base_ = './cascade_rcnn_r50_fpn_1x_coco.py'
model = dict(
type='CascadeRCNN',
pretrained='open-mmlab://resnext101_64x4d',
backbone=dict(
type='ResNeXt',
depth=101,
groups=64,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
style='pytorch'))
_base_ = './cascade_rcnn_r50_fpn_20e_coco.py'
model = dict(
type='CascadeRCNN',
pretrained='open-mmlab://resnext101_64x4d',
backbone=dict(
type='ResNeXt',
depth=101,
groups=64,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
style='pytorch'))
Collections:
- Name: Cascade R-CNN
Metadata:
Training Data: COCO
Training Techniques:
- SGD with Momentum
- Weight Decay
Training Resources: 8x NVIDIA V100 GPUs
Architecture:
- Cascade R-CNN
- FPN
- RPN
- ResNet
- RoIAlign
Paper: http://dx.doi.org/10.1109/tpami.2019.2956516
README: configs/cascade_rcnn/README.md
Models:
- Name: cascade_rcnn_r50_caffe_fpn_1x_coco
In Collection: Cascade R-CNN
Config: configs/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco.py
Metadata:
Training Memory (GB): 4.2
Epochs: 12
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 40.4
Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco/cascade_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.404_20200504_174853-b857be87.pth
- Name: cascade_rcnn_r50_fpn_1x_coco
In Collection: Cascade R-CNN
Config: configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py
Metadata:
Training Memory (GB): 4.4
inference time (s/im): 0.06211
Epochs: 12
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 40.3
Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco/cascade_rcnn_r50_fpn_1x_coco_20200316-3dc56deb.pth
- Name: cascade_rcnn_r50_fpn_20e_coco
In Collection: Cascade R-CNN
Config: configs/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco.py
Metadata:
Training Memory (GB): 4.4
inference time (s/im): 0.06211
Epochs: 20
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 41.0
Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco/cascade_rcnn_r50_fpn_20e_coco_bbox_mAP-0.41_20200504_175131-e9872a90.pth
- Name: cascade_rcnn_r101_caffe_fpn_1x_coco
In Collection: Cascade R-CNN
Config: configs/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco.py
Metadata:
Training Memory (GB): 6.2
Epochs: 12
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 42.3
Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco/cascade_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.423_20200504_175649-cab8dbd5.pth
- Name: cascade_rcnn_r101_fpn_1x_coco
In Collection: Cascade R-CNN
Config: configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py
Metadata:
Training Memory (GB): 6.4
inference time (s/im): 0.07407
Epochs: 12
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 42.0
Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco/cascade_rcnn_r101_fpn_1x_coco_20200317-0b6a2fbf.pth
- Name: cascade_rcnn_r101_fpn_20e_coco
In Collection: Cascade R-CNN
Config: configs/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco.py
Metadata:
Training Memory (GB): 6.4
inference time (s/im): 0.07407
Epochs: 20
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 42.5
Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco/cascade_rcnn_r101_fpn_20e_coco_bbox_mAP-0.425_20200504_231812-5057dcc5.pth
- Name: cascade_rcnn_x101_32x4d_fpn_1x_coco
In Collection: Cascade R-CNN
Config: configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py
Metadata:
Training Memory (GB): 7.6
inference time (s/im): 0.09174
Epochs: 12
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 43.7
Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco/cascade_rcnn_x101_32x4d_fpn_1x_coco_20200316-95c2deb6.pth
- Name: cascade_rcnn_x101_32x4d_fpn_20e_coco
In Collection: Cascade R-CNN
Config: configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco.py
Metadata:
Training Memory (GB): 7.6
Epochs: 20
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 43.7
Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco/cascade_rcnn_x101_32x4d_fpn_20e_coco_20200906_134608-9ae0a720.pth
- Name: cascade_rcnn_x101_64x4d_fpn_1x_coco
In Collection: Cascade R-CNN
Config: configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco.py
Metadata:
Training Memory (GB): 10.7
Epochs: 12
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 44.7
Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco/cascade_rcnn_x101_64x4d_fpn_1x_coco_20200515_075702-43ce6a30.pth
- Name: cascade_rcnn_x101_64x4d_fpn_20e_coco
In Collection: Cascade R-CNN
Config: configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py
Metadata:
Training Memory (GB): 10.7
Epochs: 20
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 44.5
Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco/cascade_rcnn_x101_64x4d_fpn_20e_coco_20200509_224357-051557b1.pth
- Name: cascade_mask_rcnn_r50_caffe_fpn_1x_coco
In Collection: Cascade R-CNN
Config: configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco.py
Metadata:
Training Memory (GB): 5.9
Epochs: 12
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 41.2
- Task: Instance Segmentation
Dataset: COCO
Metrics:
mask AP: 36.0
Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco/cascade_mask_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.412__segm_mAP-0.36_20200504_174659-5004b251.pth
- Name: cascade_mask_rcnn_r50_fpn_1x_coco
In Collection: Cascade R-CNN
Config: configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py
Metadata:
Training Memory (GB): 6.0
inference time (s/im): 0.08929
Epochs: 12
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 41.2
- Task: Instance Segmentation
Dataset: COCO
Metrics:
mask AP: 35.9
Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco/cascade_mask_rcnn_r50_fpn_1x_coco_20200203-9d4dcb24.pth
- Name: cascade_mask_rcnn_r50_fpn_20e_coco
In Collection: Cascade R-CNN
Config: configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco.py
Metadata:
Training Memory (GB): 6.0
inference time (s/im): 0.08929
Epochs: 20
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 41.9
- Task: Instance Segmentation
Dataset: COCO
Metrics:
mask AP: 36.5
Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco/cascade_mask_rcnn_r50_fpn_20e_coco_bbox_mAP-0.419__segm_mAP-0.365_20200504_174711-4af8e66e.pth
- Name: cascade_mask_rcnn_r101_caffe_fpn_1x_coco
In Collection: Cascade R-CNN
Config: configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco.py
Metadata:
Training Memory (GB): 7.8
Epochs: 12
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 43.2
- Task: Instance Segmentation
Dataset: COCO
Metrics:
mask AP: 37.6
Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco/cascade_mask_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.432__segm_mAP-0.376_20200504_174813-5c1e9599.pth
- Name: cascade_mask_rcnn_r101_fpn_1x_coco
In Collection: Cascade R-CNN
Config: configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py
Metadata:
Training Memory (GB): 7.9
inference time (s/im): 0.10204
Epochs: 12
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 42.9
- Task: Instance Segmentation
Dataset: COCO
Metrics:
mask AP: 37.3
Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco/cascade_mask_rcnn_r101_fpn_1x_coco_20200203-befdf6ee.pth
- Name: cascade_mask_rcnn_r101_fpn_20e_coco
In Collection: Cascade R-CNN
Config: configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco.py
Metadata:
Training Memory (GB): 7.9
inference time (s/im): 0.10204
Epochs: 20
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 43.4
- Task: Instance Segmentation
Dataset: COCO
Metrics:
mask AP: 37.8
Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco/cascade_mask_rcnn_r101_fpn_20e_coco_bbox_mAP-0.434__segm_mAP-0.378_20200504_174836-005947da.pth
- Name: cascade_mask_rcnn_x101_32x4d_fpn_1x_coco
In Collection: Cascade R-CNN
Config: configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py
Metadata:
Training Memory (GB): 9.2
inference time (s/im): 0.11628
Epochs: 12
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 44.3
- Task: Instance Segmentation
Dataset: COCO
Metrics:
mask AP: 38.3
Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco_20200201-0f411b1f.pth
- Name: cascade_mask_rcnn_x101_32x4d_fpn_20e_coco
In Collection: Cascade R-CNN
Config: configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco.py
Metadata:
Training Memory (GB): 9.2
inference time (s/im): 0.11628
Epochs: 20
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 45.0
- Task: Instance Segmentation
Dataset: COCO
Metrics:
mask AP: 39.0
Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco_20200528_083917-ed1f4751.pth
- Name: cascade_mask_rcnn_x101_64x4d_fpn_1x_coco
In Collection: Cascade R-CNN
Config: configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py
Metadata:
Training Memory (GB): 12.2
inference time (s/im): 0.14925
Epochs: 12
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 45.3
- Task: Instance Segmentation
Dataset: COCO
Metrics:
mask AP: 39.2
Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco_20200203-9a2db89d.pth
- Name: cascade_mask_rcnn_x101_64x4d_fpn_20e_coco
In Collection: Cascade R-CNN
Config: configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco.py
Metadata:
Training Memory (GB): 12.2
Epochs: 20
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 45.6
- Task: Instance Segmentation
Dataset: COCO
Metrics:
mask AP: 39.5
Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco_20200512_161033-bdb5126a.pth
# Cascade RPN
<!-- [ALGORITHM] -->
We provide the code for reproducing experiment results of [Cascade RPN](https://arxiv.org/abs/1909.06720).
```
@inproceedings{vu2019cascade,
title={Cascade RPN: Delving into High-Quality Region Proposal Network with Adaptive Convolution},
author={Vu, Thang and Jang, Hyunjun and Pham, Trung X and Yoo, Chang D},
booktitle={Conference on Neural Information Processing Systems (NeurIPS)},
year={2019}
}
```
## Benchmark
### Region proposal performance
| Method | Backbone | Style | Mem (GB) | Train time (s/iter) | Inf time (fps) | AR 1000 | Download |
|:------:|:--------:|:-----:|:--------:|:-------------------:|:--------------:|:-------:|:--------------------------------------:|
| CRPN | R-50-FPN | caffe | - | - | - | 72.0 | [model](https://drive.google.com/file/d/1qxVdOnCgK-ee7_z0x6mvAir_glMu2Ihi/view?usp=sharing) |
### Detection performance
| Method | Proposal | Backbone | Style | Schedule | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
|:-------------:|:-----------:|:--------:|:-------:|:--------:|:--------:|:-------------------:|:--------------:|:------:|:--------------------------------------------:|
| Fast R-CNN | Cascade RPN | R-50-FPN | caffe | 1x | - | - | - | 39.9 | [model](https://drive.google.com/file/d/1NmbnuY5VHi8I9FE8xnp5uNvh2i-t-6_L/view?usp=sharing) |
| Faster R-CNN | Cascade RPN | R-50-FPN | caffe | 1x | - | - | - | 40.4 | [model](https://drive.google.com/file/d/1dS3Q66qXMJpcuuQgDNkLp669E5w1UMuZ/view?usp=sharing) |
_base_ = '../fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py'
model = dict(
pretrained='open-mmlab://detectron2/resnet50_caffe',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='caffe'),
roi_head=dict(
bbox_head=dict(
bbox_coder=dict(target_stds=[0.04, 0.04, 0.08, 0.08]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.5),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),
# model training and testing settings
train_cfg=dict(
rcnn=dict(
assigner=dict(
pos_iou_thr=0.65, neg_iou_thr=0.65, min_pos_iou=0.65),
sampler=dict(num=256))),
test_cfg=dict(rcnn=dict(score_thr=1e-3)))
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadProposals', num_max_proposals=300),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadProposals', num_max_proposals=300),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='ToTensor', keys=['proposals']),
dict(
type='ToDataContainer',
fields=[dict(key='proposals', stack=False)]),
dict(type='Collect', keys=['img', 'proposals']),
])
]
data = dict(
train=dict(
proposal_file=data_root +
'proposals/crpn_r50_caffe_fpn_1x_train2017.pkl',
pipeline=train_pipeline),
val=dict(
proposal_file=data_root +
'proposals/crpn_r50_caffe_fpn_1x_val2017.pkl',
pipeline=test_pipeline),
test=dict(
proposal_file=data_root +
'proposals/crpn_r50_caffe_fpn_1x_val2017.pkl',
pipeline=test_pipeline))
optimizer_config = dict(
_delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
_base_ = '../faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py'
rpn_weight = 0.7
model = dict(
rpn_head=dict(
_delete_=True,
type='CascadeRPNHead',
num_stages=2,
stages=[
dict(
type='StageCascadeRPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[1.0],
strides=[4, 8, 16, 32, 64]),
adapt_cfg=dict(type='dilation', dilation=3),
bridged_feature=True,
sampling=False,
with_cls=False,
reg_decoded_bbox=True,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=(.0, .0, .0, .0),
target_stds=(0.1, 0.1, 0.5, 0.5)),
loss_bbox=dict(
type='IoULoss', linear=True,
loss_weight=10.0 * rpn_weight)),
dict(
type='StageCascadeRPNHead',
in_channels=256,
feat_channels=256,
adapt_cfg=dict(type='offset'),
bridged_feature=False,
sampling=True,
with_cls=True,
reg_decoded_bbox=True,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=(.0, .0, .0, .0),
target_stds=(0.05, 0.05, 0.1, 0.1)),
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
loss_weight=1.0 * rpn_weight),
loss_bbox=dict(
type='IoULoss', linear=True,
loss_weight=10.0 * rpn_weight))
]),
roi_head=dict(
bbox_head=dict(
bbox_coder=dict(target_stds=[0.04, 0.04, 0.08, 0.08]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.5),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),
# model training and testing settings
train_cfg=dict(
rpn=[
dict(
assigner=dict(
type='RegionAssigner', center_ratio=0.2, ignore_ratio=0.5),
allowed_border=-1,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False)
],
rpn_proposal=dict(max_per_img=300, nms=dict(iou_threshold=0.8)),
rcnn=dict(
assigner=dict(
pos_iou_thr=0.65, neg_iou_thr=0.65, min_pos_iou=0.65),
sampler=dict(type='RandomSampler', num=256))),
test_cfg=dict(
rpn=dict(max_per_img=300, nms=dict(iou_threshold=0.8)),
rcnn=dict(score_thr=1e-3)))
optimizer_config = dict(
_delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
_base_ = '../rpn/rpn_r50_caffe_fpn_1x_coco.py'
model = dict(
rpn_head=dict(
_delete_=True,
type='CascadeRPNHead',
num_stages=2,
stages=[
dict(
type='StageCascadeRPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[1.0],
strides=[4, 8, 16, 32, 64]),
adapt_cfg=dict(type='dilation', dilation=3),
bridged_feature=True,
sampling=False,
with_cls=False,
reg_decoded_bbox=True,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=(.0, .0, .0, .0),
target_stds=(0.1, 0.1, 0.5, 0.5)),
loss_bbox=dict(type='IoULoss', linear=True, loss_weight=10.0)),
dict(
type='StageCascadeRPNHead',
in_channels=256,
feat_channels=256,
adapt_cfg=dict(type='offset'),
bridged_feature=False,
sampling=True,
with_cls=True,
reg_decoded_bbox=True,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=(.0, .0, .0, .0),
target_stds=(0.05, 0.05, 0.1, 0.1)),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True,
loss_weight=1.0),
loss_bbox=dict(type='IoULoss', linear=True, loss_weight=10.0))
]),
train_cfg=dict(rpn=[
dict(
assigner=dict(
type='RegionAssigner', center_ratio=0.2, ignore_ratio=0.5),
allowed_border=-1,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.3,
ignore_iof_thr=-1,
iou_calculator=dict(type='BboxOverlaps2D')),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False)
]),
test_cfg=dict(
rpn=dict(
nms_pre=2000,
max_per_img=2000,
nms=dict(type='nms', iou_threshold=0.8),
min_bbox_size=0)))
optimizer_config = dict(
_delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
# CenterNet
## Introduction
<!-- [ALGORITHM] -->
```latex
@article{zhou2019objects,
title={Objects as Points},
author={Zhou, Xingyi and Wang, Dequan and Kr{\"a}henb{\"u}hl, Philipp},
booktitle={arXiv preprint arXiv:1904.07850},
year={2019}
}
```
## Results and models
| Backbone | DCN | Mem (GB) | Box AP | Flip box AP| Config | Download |
| :-------------: | :--------: |:----------------: | :------: | :------------: | :----: | :----: |
| ResNet-18 | N | 3.45 | 26.0 | 27.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/centernet/centernet_resnet18_140e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_140e_coco/centernet_resnet18_140e_coco_20210519_092334-eafe8ccd.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_140e_coco/centernet_resnet18_140e_coco_20210519_092334.log.json) |
| ResNet-18 | Y | 3.47 | 29.5 | 31.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/centernet/centernet_resnet18_dcnv2_140e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_dcnv2_140e_coco/centernet_resnet18_dcnv2_140e_coco_20210520_101209-da388ba2.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_dcnv2_140e_coco/centernet_resnet18_dcnv2_140e_coco_20210520_101209.log.json) |
Note:
- Flip box AP setting is single-scale and `flip=True`.
- Due to complex data enhancement, we find that the performance is unstable and may fluctuate by about 0.4 mAP. mAP 29.4 ~ 29.8 is acceptable in ResNet-18-DCNv2.
- Compared to the source code, we refer to [CenterNet-Better](https://github.com/FateScript/CenterNet-better), and make the following changes
- fix wrong image mean and variance in image normalization to be compatible with the pre-trained backbone.
- Use SGD rather than ADAM optimizer and add warmup and grad clip.
- Use DistributedDataParallel as other models in MMDetection rather than using DataParallel.
_base_ = './centernet_resnet18_dcnv2_140e_coco.py'
model = dict(neck=dict(use_dcn=False))
_base_ = [
'../_base_/datasets/coco_detection.py',
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
]
model = dict(
type='CenterNet',
pretrained='torchvision://resnet18',
backbone=dict(
type='ResNet', depth=18, norm_eval=False, norm_cfg=dict(type='BN')),
neck=dict(
type='CTResNetNeck',
in_channel=512,
num_deconv_filters=(256, 128, 64),
num_deconv_kernels=(4, 4, 4),
use_dcn=True),
bbox_head=dict(
type='CenterNetHead',
num_classes=80,
in_channel=64,
feat_channel=64,
loss_center_heatmap=dict(type='GaussianFocalLoss', loss_weight=1.0),
loss_wh=dict(type='L1Loss', loss_weight=0.1),
loss_offset=dict(type='L1Loss', loss_weight=1.0)),
train_cfg=None,
test_cfg=dict(topk=100, local_maximum_kernel=3, max_per_img=100))
# We fixed the incorrect img_norm_cfg problem in the source code.
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', to_float32=True, color_type='color'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='PhotoMetricDistortion',
brightness_delta=32,
contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5),
hue_delta=18),
dict(
type='RandomCenterCropPad',
crop_size=(512, 512),
ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3),
mean=[0, 0, 0],
std=[1, 1, 1],
to_rgb=True,
test_pad_mode=None),
dict(type='Resize', img_scale=(512, 512), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]
test_pipeline = [
dict(type='LoadImageFromFile', to_float32=True),
dict(
type='MultiScaleFlipAug',
scale_factor=1.0,
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(
type='RandomCenterCropPad',
ratios=None,
border=None,
mean=[0, 0, 0],
std=[1, 1, 1],
to_rgb=True,
test_mode=True,
test_pad_mode=['logical_or', 31],
test_pad_add_pix=1),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='DefaultFormatBundle'),
dict(
type='Collect',
meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape',
'scale_factor', 'flip', 'flip_direction',
'img_norm_cfg', 'border'),
keys=['img'])
])
]
data = dict(
samples_per_gpu=16,
workers_per_gpu=4,
train=dict(pipeline=train_pipeline),
val=dict(pipeline=test_pipeline),
test=dict(pipeline=test_pipeline))
# optimizer
# Based on the default settings of modern detectors, the SGD effect is better
# than the Adam in the source code, so we use SGD default settings and
# if you use adam+lr5e-4, the map is 29.1.
optimizer_config = dict(
_delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
# Based on the default settings of modern detectors, we added warmup settings.
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[90, 120])
runner = dict(max_epochs=140)
# Avoid evaluation and saving weights too frequently
evaluation = dict(interval=5, metric='bbox')
checkpoint_config = dict(interval=5)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment