Commit 85529f35 authored by unknown's avatar unknown
Browse files

添加openmmlab测试用例

parent b21b0c01
_base_ = [
'../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py'
]
# model settings
model = dict(
type='GridRCNN',
pretrained='torchvision://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
roi_head=dict(
type='GridRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
with_reg=False,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False),
grid_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
grid_head=dict(
type='GridHead',
grid_points=9,
num_convs=8,
in_channels=256,
point_feat_channels=64,
norm_cfg=dict(type='GN', num_groups=36),
loss_grid=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=15))),
# model training and testing settings
train_cfg=dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_pre=2000,
max_per_img=2000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_radius=1,
pos_weight=-1,
max_num_grid=192,
debug=False)),
test_cfg=dict(
rpn=dict(
nms_pre=1000,
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
score_thr=0.03,
nms=dict(type='nms', iou_threshold=0.3),
max_per_img=100)))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=3665,
warmup_ratio=1.0 / 80,
step=[17, 23])
runner = dict(type='EpochBasedRunner', max_epochs=25)
_base_ = './grid_rcnn_r50_fpn_gn-head_2x_coco.py'
model = dict(
pretrained='open-mmlab://resnext101_32x4d',
backbone=dict(
type='ResNeXt',
depth=101,
groups=32,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=3665,
warmup_ratio=1.0 / 80,
step=[17, 23])
runner = dict(type='EpochBasedRunner', max_epochs=25)
_base_ = './grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py'
model = dict(
pretrained='open-mmlab://resnext101_64x4d',
backbone=dict(
type='ResNeXt',
depth=101,
groups=64,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'))
Collections:
- Name: Grid R-CNN
Metadata:
Training Data: COCO
Training Techniques:
- SGD with Momentum
- Weight Decay
Training Resources: 8x NVIDIA V100 GPUs
Architecture:
- RPN
- Dilated Convolution
- ResNet
- RoIAlign
Paper: https://arxiv.org/abs/1906.05688
README: configs/grid_rcnn/README.md
Models:
- Name: grid_rcnn_r50_fpn_gn-head_2x_coco
In Collection: Grid R-CNN
Config: configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py
Metadata:
Training Memory (GB): 5.1
inference time (s/im): 0.06667
Epochs: 24
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 40.4
Weights: https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco/grid_rcnn_r50_fpn_gn-head_2x_coco_20200130-6cca8223.pth
- Name: grid_rcnn_r101_fpn_gn-head_2x_coco
In Collection: Grid R-CNN
Config: configs/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco.py
Metadata:
Training Memory (GB): 7.0
inference time (s/im): 0.07937
Epochs: 24
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 41.5
Weights: https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco/grid_rcnn_r101_fpn_gn-head_2x_coco_20200309-d6eca030.pth
- Name: grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco
In Collection: Grid R-CNN
Config: configs/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py
Metadata:
Training Memory (GB): 8.3
inference time (s/im): 0.09259
Epochs: 24
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 42.9
Weights: https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco_20200130-d8f0e3ff.pth
- Name: grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco
In Collection: Grid R-CNN
Config: configs/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco.py
Metadata:
Training Memory (GB): 11.3
inference time (s/im): 0.12987
Epochs: 24
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 43.0
Weights: https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco_20200204-ec76a754.pth
# GRoIE
## A novel Region of Interest Extraction Layer for Instance Segmentation
By Leonardo Rossi, Akbar Karimi and Andrea Prati from
[IMPLab](http://implab.ce.unipr.it/).
We provide configs to reproduce the results in the paper for
"*A novel Region of Interest Extraction Layer for Instance Segmentation*"
on COCO object detection.
## Introduction
<!-- [ALGORITHM] -->
This paper is motivated by the need to overcome to the limitations of existing
RoI extractors which select only one (the best) layer from FPN.
Our intuition is that all the layers of FPN retain useful information.
Therefore, the proposed layer (called Generic RoI Extractor - **GRoIE**)
introduces non-local building blocks and attention mechanisms to boost the
performance.
## Results and models
The results on COCO 2017 minival (5k images) are shown in the below table.
You can find
[here](https://drive.google.com/drive/folders/19ssstbq_h0Z1cgxHmJYFO8s1arf3QJbT)
the trained models.
### Application of GRoIE to different architectures
| Backbone | Method | Lr schd | box AP | mask AP | Config | Download|
| :-------: | :--------------: | :-----: | :----: | :-----: | :-------:| :--------:|
| R-50-FPN | Faster Original | 1x | 37.4 | | [config](../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) |
| R-50-FPN | + GRoIE | 1x | 38.3 | | [config](./faster_rcnn_r50_fpn_groie_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/groie/faster_rcnn_r50_fpn_groie_1x_coco/faster_rcnn_r50_fpn_groie_1x_coco_20200604_211715-66ee9516.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/groie/faster_rcnn_r50_fpn_groie_1x_coco/faster_rcnn_r50_fpn_groie_1x_coco_20200604_211715.log.json) |
| R-50-FPN | Grid R-CNN | 1x | 39.1 | | [config](./grid_rcnn_r50_fpn_gn-head_1x_coco.py)| [model](https://download.openmmlab.com/mmdetection/v2.0/groie/grid_rcnn_r50_fpn_gn-head_1x_coco/grid_rcnn_r50_fpn_gn-head_1x_coco_20200605_202059-64f00ee8.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/groie/grid_rcnn_r50_fpn_gn-head_1x_coco/grid_rcnn_r50_fpn_gn-head_1x_coco_20200605_202059.log.json) |
| R-50-FPN | + GRoIE | 1x | | | [config](./grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py)||
| R-50-FPN | Mask R-CNN | 1x | 38.2 | 34.7 | [config](../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py)| [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205_050542.log.json) |
| R-50-FPN | + GRoIE | 1x | 39.0 | 36.0 | [config](./mask_rcnn_r50_fpn_groie_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_groie_1x_coco/mask_rcnn_r50_fpn_groie_1x_coco_20200604_211715-50d90c74.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_groie_1x_coco/mask_rcnn_r50_fpn_groie_1x_coco_20200604_211715.log.json) |
| R-50-FPN | GC-Net | 1x | 40.7 | 36.5 | [config](../gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200202-50b90e5c.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200202_085547.log.json) |
| R-50-FPN | + GRoIE | 1x | 41.0 | 37.8 | [config](./mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200604_211715-42eb79e1.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200604_211715-42eb79e1.pth) |
| R-101-FPN | GC-Net | 1x | 42.2 | 37.8 | [config](../gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200206-8407a3f0.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200206_142508.log.json) |
| R-101-FPN | + GRoIE | 1x | | | [config](./mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py)| [model](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200607_224507-8daae01c.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200607_224507.log.json) |
## Citation
If you use this work or benchmark in your research, please cite this project.
```latex
@misc{rossi2020novel,
title={A novel Region of Interest Extraction Layer for Instance Segmentation},
author={Leonardo Rossi and Akbar Karimi and Andrea Prati},
year={2020},
eprint={2004.13665},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
## Contact
The implementation of GROI is currently maintained by
[Leonardo Rossi](https://github.com/hachreak/).
_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
# model settings
model = dict(
roi_head=dict(
bbox_roi_extractor=dict(
type='GenericRoIExtractor',
aggregation='sum',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32],
pre_cfg=dict(
type='ConvModule',
in_channels=256,
out_channels=256,
kernel_size=5,
padding=2,
inplace=False,
),
post_cfg=dict(
type='GeneralizedAttention',
in_channels=256,
spatial_range=-1,
num_heads=6,
attention_type='0100',
kv_stride=2))))
_base_ = '../grid_rcnn/grid_rcnn_r50_fpn_gn-head_1x_coco.py'
# model settings
model = dict(
roi_head=dict(
bbox_roi_extractor=dict(
type='GenericRoIExtractor',
aggregation='sum',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32],
pre_cfg=dict(
type='ConvModule',
in_channels=256,
out_channels=256,
kernel_size=5,
padding=2,
inplace=False,
),
post_cfg=dict(
type='GeneralizedAttention',
in_channels=256,
spatial_range=-1,
num_heads=6,
attention_type='0100',
kv_stride=2)),
grid_roi_extractor=dict(
type='GenericRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32],
pre_cfg=dict(
type='ConvModule',
in_channels=256,
out_channels=256,
kernel_size=5,
padding=2,
inplace=False,
),
post_cfg=dict(
type='GeneralizedAttention',
in_channels=256,
spatial_range=-1,
num_heads=6,
attention_type='0100',
kv_stride=2))))
_base_ = '../gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py'
# model settings
model = dict(
roi_head=dict(
bbox_roi_extractor=dict(
type='GenericRoIExtractor',
aggregation='sum',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32],
pre_cfg=dict(
type='ConvModule',
in_channels=256,
out_channels=256,
kernel_size=5,
padding=2,
inplace=False,
),
post_cfg=dict(
type='GeneralizedAttention',
in_channels=256,
spatial_range=-1,
num_heads=6,
attention_type='0100',
kv_stride=2)),
mask_roi_extractor=dict(
type='GenericRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32],
pre_cfg=dict(
type='ConvModule',
in_channels=256,
out_channels=256,
kernel_size=5,
padding=2,
inplace=False,
),
post_cfg=dict(
type='GeneralizedAttention',
in_channels=256,
spatial_range=-1,
num_heads=6,
attention_type='0100',
kv_stride=2))))
_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'
# model settings
model = dict(
roi_head=dict(
bbox_roi_extractor=dict(
type='GenericRoIExtractor',
aggregation='sum',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32],
pre_cfg=dict(
type='ConvModule',
in_channels=256,
out_channels=256,
kernel_size=5,
padding=2,
inplace=False,
),
post_cfg=dict(
type='GeneralizedAttention',
in_channels=256,
spatial_range=-1,
num_heads=6,
attention_type='0100',
kv_stride=2)),
mask_roi_extractor=dict(
type='GenericRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32],
pre_cfg=dict(
type='ConvModule',
in_channels=256,
out_channels=256,
kernel_size=5,
padding=2,
inplace=False,
),
post_cfg=dict(
type='GeneralizedAttention',
in_channels=256,
spatial_range=-1,
num_heads=6,
attention_type='0100',
kv_stride=2))))
_base_ = '../gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py'
# model settings
model = dict(
roi_head=dict(
bbox_roi_extractor=dict(
type='GenericRoIExtractor',
aggregation='sum',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32],
pre_cfg=dict(
type='ConvModule',
in_channels=256,
out_channels=256,
kernel_size=5,
padding=2,
inplace=False,
),
post_cfg=dict(
type='GeneralizedAttention',
in_channels=256,
spatial_range=-1,
num_heads=6,
attention_type='0100',
kv_stride=2)),
mask_roi_extractor=dict(
type='GenericRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32],
pre_cfg=dict(
type='ConvModule',
in_channels=256,
out_channels=256,
kernel_size=5,
padding=2,
inplace=False,
),
post_cfg=dict(
type='GeneralizedAttention',
in_channels=256,
spatial_range=-1,
num_heads=6,
attention_type='0100',
kv_stride=2))))
Collections:
- Name: GRoIE
Metadata:
Training Data: COCO
Training Techniques:
- SGD with Momentum
- Weight Decay
Training Resources: 8x NVIDIA V100 GPUs
Architecture:
- Generic RoI Extractor
- FPN
- RPN
- ResNet
- RoIAlign
Paper: https://arxiv.org/abs/2004.13665
README: configs/groie/README.md
Models:
- Name: faster_rcnn_r50_fpn_groie_1x_coco
In Collection: GRoIE
Config: configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py
Metadata:
Epochs: 12
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 38.3
Weights: https://download.openmmlab.com/mmdetection/v2.0/groie/faster_rcnn_r50_fpn_groie_1x_coco/faster_rcnn_r50_fpn_groie_1x_coco_20200604_211715-66ee9516.pth
- Name: grid_rcnn_r50_fpn_gn-head_groie_1x_coco
In Collection: GRoIE
Config: configs/groie/grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py
Metadata:
Epochs: 12
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 39.1
- Name: mask_rcnn_r50_fpn_groie_1x_coco
In Collection: GRoIE
Config: configs/groie/mask_rcnn_r50_fpn_groie_1x_coco.py
Metadata:
Epochs: 12
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 39.0
- Task: Instance Segmentation
Dataset: COCO
Metrics:
mask AP: 36.0
Weights: https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_groie_1x_coco/mask_rcnn_r50_fpn_groie_1x_coco_20200604_211715-50d90c74.pth
- Name: mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco
In Collection: GRoIE
Config: configs/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py
Metadata:
Epochs: 12
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 41.0
- Task: Instance Segmentation
Dataset: COCO
Metrics:
mask AP: 37.8
Weights: https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200604_211715-42eb79e1.pth
- Name: mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco
In Collection: GRoIE
Config: configs/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py
Metadata:
Epochs: 12
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 42.6
- Task: Instance Segmentation
Dataset: COCO
Metrics:
mask AP: 38.7
Weights: https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200607_224507-8daae01c.pth
# Region Proposal by Guided Anchoring
## Introduction
<!-- [ALGORITHM] -->
We provide config files to reproduce the results in the CVPR 2019 paper for [Region Proposal by Guided Anchoring](https://arxiv.org/abs/1901.03278).
```latex
@inproceedings{wang2019region,
title={Region Proposal by Guided Anchoring},
author={Jiaqi Wang and Kai Chen and Shuo Yang and Chen Change Loy and Dahua Lin},
booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
year={2019}
}
```
## Results and Models
The results on COCO 2017 val is shown in the below table. (results on test-dev are usually slightly higher than val).
| Method | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | AR 1000 | Config | Download |
| :----: | :-------------: | :-----: | :-----: | :------: | :------------: | :-----: | :------: | :--------: |
| GA-RPN | R-50-FPN | caffe | 1x | 5.3 | 15.8 | 68.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco/ga_rpn_r50_caffe_fpn_1x_coco_20200531-899008a6.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco/ga_rpn_r50_caffe_fpn_1x_coco_20200531_011819.log.json) |
| GA-RPN | R-101-FPN | caffe | 1x | 7.3 | 13.0 | 69.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco/ga_rpn_r101_caffe_fpn_1x_coco_20200531-ca9ba8fb.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco/ga_rpn_r101_caffe_fpn_1x_coco_20200531_011812.log.json) |
| GA-RPN | X-101-32x4d-FPN | pytorch | 1x | 8.5 | 10.0 | 70.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco/ga_rpn_x101_32x4d_fpn_1x_coco_20200220-c28d1b18.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco/ga_rpn_x101_32x4d_fpn_1x_coco_20200220_221326.log.json) |
| GA-RPN | X-101-64x4d-FPN | pytorch | 1x | 7.1 | 7.5 | 71.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco/ga_rpn_x101_64x4d_fpn_1x_coco_20200225-3c6e1aa2.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco/ga_rpn_x101_64x4d_fpn_1x_coco_20200225_152704.log.json) |
| Method | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download |
| :------------: | :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: |
| GA-Faster RCNN | R-50-FPN | caffe | 1x | 5.5 | | 39.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco/ga_faster_r50_caffe_fpn_1x_coco_20200702_000718-a11ccfe6.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco/ga_faster_r50_caffe_fpn_1x_coco_20200702_000718.log.json) |
| GA-Faster RCNN | R-101-FPN | caffe | 1x | 7.5 | | 41.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco/ga_faster_r101_caffe_fpn_1x_coco_bbox_mAP-0.415_20200505_115528-fb82e499.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco/ga_faster_r101_caffe_fpn_1x_coco_20200505_115528.log.json) |
| GA-Faster RCNN | X-101-32x4d-FPN | pytorch | 1x | 8.7 | 9.7 | 43.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco/ga_faster_x101_32x4d_fpn_1x_coco_20200215-1ded9da3.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco/ga_faster_x101_32x4d_fpn_1x_coco_20200215_184547.log.json) |
| GA-Faster RCNN | X-101-64x4d-FPN | pytorch | 1x | 11.8 | 7.3 | 43.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco/ga_faster_x101_64x4d_fpn_1x_coco_20200215-0fa7bde7.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco/ga_faster_x101_64x4d_fpn_1x_coco_20200215_104455.log.json) |
| GA-RetinaNet | R-50-FPN | caffe | 1x | 3.5 | 16.8 | 36.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco/ga_retinanet_r50_caffe_fpn_1x_coco_20201020-39581c6f.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco/ga_retinanet_r50_caffe_fpn_1x_coco_20201020_225450.log.json) |
| GA-RetinaNet | R-101-FPN | caffe | 1x | 5.5 | 12.9 | 39.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco/ga_retinanet_r101_caffe_fpn_1x_coco_20200531-6266453c.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco/ga_retinanet_r101_caffe_fpn_1x_coco_20200531_012847.log.json) |
| GA-RetinaNet | X-101-32x4d-FPN | pytorch | 1x | 6.9 | 10.6 | 40.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco/ga_retinanet_x101_32x4d_fpn_1x_coco_20200219-40c56caa.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco/ga_retinanet_x101_32x4d_fpn_1x_coco_20200219_223025.log.json) |
| GA-RetinaNet | X-101-64x4d-FPN | pytorch | 1x | 9.9 | 7.7 | 41.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco/ga_retinanet_x101_64x4d_fpn_1x_coco_20200226-ef9f7f1f.pth) &#124; [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco/ga_retinanet_x101_64x4d_fpn_1x_coco_20200226_221123.log.json) |
- In the Guided Anchoring paper, `score_thr` is set to 0.001 in Fast/Faster RCNN and 0.05 in RetinaNet for both baselines and Guided Anchoring.
- Performance on COCO test-dev benchmark are shown as follows.
| Method | Backbone | Style | Lr schd | Aug Train | Score thr | AP | AP_50 | AP_75 | AP_small | AP_medium | AP_large | Download |
| :------------: | :-------: | :---: | :-----: | :-------: | :-------: | :---: | :---: | :---: | :------: | :-------: | :------: | :------: |
| GA-Faster RCNN | R-101-FPN | caffe | 1x | F | 0.05 | | | | | | | |
| GA-Faster RCNN | R-101-FPN | caffe | 1x | F | 0.001 | | | | | | | |
| GA-RetinaNet | R-101-FPN | caffe | 1x | F | 0.05 | | | | | | | |
| GA-RetinaNet | R-101-FPN | caffe | 2x | T | 0.05 | | | | | | | |
_base_ = '../fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py'
model = dict(
pretrained='open-mmlab://detectron2/resnet50_caffe',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='caffe'),
roi_head=dict(
bbox_head=dict(bbox_coder=dict(target_stds=[0.05, 0.05, 0.1, 0.1]))),
# model training and testing settings
train_cfg=dict(
rcnn=dict(
assigner=dict(pos_iou_thr=0.6, neg_iou_thr=0.6, min_pos_iou=0.6),
sampler=dict(num=256))),
test_cfg=dict(rcnn=dict(score_thr=1e-3)))
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadProposals', num_max_proposals=300),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadProposals', num_max_proposals=None),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img', 'proposals']),
])
]
data = dict(
train=dict(
proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_train2017.pkl',
pipeline=train_pipeline),
val=dict(
proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_val2017.pkl',
pipeline=test_pipeline),
test=dict(
proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_val2017.pkl',
pipeline=test_pipeline))
optimizer_config = dict(
_delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
_base_ = './ga_faster_r50_caffe_fpn_1x_coco.py'
model = dict(
pretrained='open-mmlab://detectron2/resnet101_caffe',
backbone=dict(depth=101))
_base_ = '../faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py'
model = dict(
rpn_head=dict(
_delete_=True,
type='GARPNHead',
in_channels=256,
feat_channels=256,
approx_anchor_generator=dict(
type='AnchorGenerator',
octave_base_scale=8,
scales_per_octave=3,
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
square_anchor_generator=dict(
type='AnchorGenerator',
ratios=[1.0],
scales=[8],
strides=[4, 8, 16, 32, 64]),
anchor_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[0.07, 0.07, 0.14, 0.14]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[0.07, 0.07, 0.11, 0.11]),
loc_filter_thr=0.01,
loss_loc=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
roi_head=dict(
bbox_head=dict(bbox_coder=dict(target_stds=[0.05, 0.05, 0.1, 0.1]))),
# model training and testing settings
train_cfg=dict(
rpn=dict(
ga_assigner=dict(
type='ApproxMaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
ga_sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
center_ratio=0.2,
ignore_ratio=0.5),
rpn_proposal=dict(nms_post=1000, max_per_img=300),
rcnn=dict(
assigner=dict(pos_iou_thr=0.6, neg_iou_thr=0.6, min_pos_iou=0.6),
sampler=dict(type='RandomSampler', num=256))),
test_cfg=dict(
rpn=dict(nms_post=1000, max_per_img=300), rcnn=dict(score_thr=1e-3)))
optimizer_config = dict(
_delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
model = dict(
rpn_head=dict(
_delete_=True,
type='GARPNHead',
in_channels=256,
feat_channels=256,
approx_anchor_generator=dict(
type='AnchorGenerator',
octave_base_scale=8,
scales_per_octave=3,
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
square_anchor_generator=dict(
type='AnchorGenerator',
ratios=[1.0],
scales=[8],
strides=[4, 8, 16, 32, 64]),
anchor_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[0.07, 0.07, 0.14, 0.14]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[0.07, 0.07, 0.11, 0.11]),
loc_filter_thr=0.01,
loss_loc=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
roi_head=dict(
bbox_head=dict(bbox_coder=dict(target_stds=[0.05, 0.05, 0.1, 0.1]))),
# model training and testing settings
train_cfg=dict(
rpn=dict(
ga_assigner=dict(
type='ApproxMaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
ga_sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
center_ratio=0.2,
ignore_ratio=0.5),
rpn_proposal=dict(nms_post=1000, max_per_img=300),
rcnn=dict(
assigner=dict(pos_iou_thr=0.6, neg_iou_thr=0.6, min_pos_iou=0.6),
sampler=dict(type='RandomSampler', num=256))),
test_cfg=dict(
rpn=dict(nms_post=1000, max_per_img=300), rcnn=dict(score_thr=1e-3)))
optimizer_config = dict(
_delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
_base_ = './ga_faster_r50_fpn_1x_coco.py'
model = dict(
pretrained='open-mmlab://resnext101_32x4d',
backbone=dict(
type='ResNeXt',
depth=101,
groups=32,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
style='pytorch'))
_base_ = './ga_faster_r50_fpn_1x_coco.py'
model = dict(
pretrained='open-mmlab://resnext101_64x4d',
backbone=dict(
type='ResNeXt',
depth=101,
groups=64,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
style='pytorch'))
_base_ = './ga_retinanet_r50_caffe_fpn_1x_coco.py'
model = dict(
pretrained='open-mmlab://detectron2/resnet101_caffe',
backbone=dict(depth=101))
_base_ = '../_base_/default_runtime.py'
# model settings
model = dict(
type='RetinaNet',
pretrained='open-mmlab://detectron2/resnet101_caffe',
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='caffe'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=1,
add_extra_convs=True,
num_outs=5),
bbox_head=dict(
type='GARetinaHead',
num_classes=80,
in_channels=256,
stacked_convs=4,
feat_channels=256,
approx_anchor_generator=dict(
type='AnchorGenerator',
octave_base_scale=4,
scales_per_octave=3,
ratios=[0.5, 1.0, 2.0],
strides=[8, 16, 32, 64, 128]),
square_anchor_generator=dict(
type='AnchorGenerator',
ratios=[1.0],
scales=[4],
strides=[8, 16, 32, 64, 128]),
anchor_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loc_filter_thr=0.01,
loss_loc=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=0.04, loss_weight=1.0)))
# training and testing settings
train_cfg = dict(
ga_assigner=dict(
type='ApproxMaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
ga_sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.0,
ignore_iof_thr=-1),
allowed_border=-1,
pos_weight=-1,
center_ratio=0.2,
ignore_ratio=0.5,
debug=False)
test_cfg = dict(
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='Resize',
img_scale=[(1333, 480), (1333, 960)],
keep_ratio=True,
multiscale_mode='range'),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline))
evaluation = dict(interval=1, metric='bbox')
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[16, 22])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=24)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment