Commit eb1107e4 authored by raojy's avatar raojy
Browse files

fix_mmdetection

parent 7aa442d5
Pipeline #3461 canceled with stages
_base_ = '../common/ms-poly-90k_coco-instance.py'
# model settings
model = dict(
type='CondInst',
data_preprocessor=dict(
type='DetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_mask=True,
pad_size_divisor=32),
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=1,
add_extra_convs='on_output', # use P5
num_outs=5,
relu_before_extra_convs=True),
bbox_head=dict(
type='CondInstBboxHead',
num_params=169,
num_classes=80,
in_channels=256,
stacked_convs=4,
feat_channels=256,
strides=[8, 16, 32, 64, 128],
norm_on_bbox=True,
centerness_on_reg=True,
dcn_on_last_conv=False,
center_sampling=True,
conv_bias=True,
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='GIoULoss', loss_weight=1.0),
loss_centerness=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
mask_head=dict(
type='CondInstMaskHead',
num_layers=3,
feat_channels=8,
size_of_interest=8,
mask_out_stride=4,
max_masks_to_train=300,
mask_feature_head=dict(
in_channels=256,
feat_channels=128,
start_level=0,
end_level=2,
out_channels=8,
mask_stride=8,
num_stacked_convs=4,
norm_cfg=dict(type='BN', requires_grad=True)),
loss_mask=dict(
type='DiceLoss',
use_sigmoid=True,
activate=True,
eps=5e-6,
loss_weight=1.0)),
# model training and testing settings
test_cfg=dict(
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.6),
max_per_img=100,
mask_thr=0.5))
# optimizer
optim_wrapper = dict(optimizer=dict(lr=0.01))
Collections:
- Name: CondInst
Metadata:
Training Data: COCO
Training Techniques:
- SGD with Momentum
- Weight Decay
Training Resources: 8x A100 GPUs
Architecture:
- FPN
- FCOS
- ResNet
Paper: https://arxiv.org/abs/2003.05664
README: configs/condinst/README.md
Models:
- Name: condinst_r50_fpn_ms-poly-90k_coco_instance
In Collection: CondInst
Config: configs/condinst/condinst_r50_fpn_ms-poly-90k_coco_instance.py
Metadata:
Training Memory (GB): 4.4
Iterations: 90000
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 39.8
- Task: Instance Segmentation
Dataset: COCO
Metrics:
mask AP: 36.0
Weights: https://download.openmmlab.com/mmdetection/v3.0/condinst/condinst_r50_fpn_ms-poly-90k_coco_instance/condinst_r50_fpn_ms-poly-90k_coco_instance_20221129_125223-4c186406.pth
_base_ = ['../detr/detr_r50_8xb2-150e_coco.py']
model = dict(
type='ConditionalDETR',
num_queries=300,
decoder=dict(
num_layers=6,
layer_cfg=dict(
self_attn_cfg=dict(
_delete_=True,
embed_dims=256,
num_heads=8,
attn_drop=0.1,
cross_attn=False),
cross_attn_cfg=dict(
_delete_=True,
embed_dims=256,
num_heads=8,
attn_drop=0.1,
cross_attn=True))),
bbox_head=dict(
type='ConditionalDETRHead',
loss_cls=dict(
_delete_=True,
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=2.0)),
# training and testing settings
train_cfg=dict(
assigner=dict(
type='HungarianAssigner',
match_costs=[
dict(type='FocalLossCost', weight=2.0),
dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),
dict(type='IoUCost', iou_mode='giou', weight=2.0)
])))
# learning policy
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=50, val_interval=1)
param_scheduler = [dict(type='MultiStepLR', end=50, milestones=[40])]
Collections:
- Name: Conditional DETR
Metadata:
Training Data: COCO
Training Techniques:
- AdamW
- Multi Scale Train
- Gradient Clip
Training Resources: 8x A100 GPUs
Architecture:
- ResNet
- Transformer
Paper:
URL: https://arxiv.org/abs/2108.06152
Title: 'Conditional DETR for Fast Training Convergence'
README: configs/conditional_detr/README.md
Code:
URL: https://github.com/open-mmlab/mmdetection/blob/f4112c9e5611468ffbd57cfba548fd1289264b52/mmdet/models/detectors/conditional_detr.py#L14
Version: v3.0.0rc6
Models:
- Name: conditional-detr_r50_8xb2-50e_coco
In Collection: Conditional DETR
Config: configs/conditional_detr/conditional-detr_r50_8xb2-50e_coco.py
Metadata:
Epochs: 50
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 40.9
Weights: https://download.openmmlab.com/mmdetection/v3.0/conditional_detr/conditional-detr_r50_8xb2-50e_coco/conditional-detr_r50_8xb2-50e_coco_20221121_180202-c83a1dc0.pth
_base_ = './cascade-mask-rcnn_convnext-t-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py' # noqa
# please install mmpretrain
# import mmpretrain.models to trigger register_module in mmpretrain
custom_imports = dict(
imports=['mmpretrain.models'], allow_failed_imports=False)
checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-small_3rdparty_32xb128-noema_in1k_20220301-303e75e3.pth' # noqa
model = dict(
backbone=dict(
_delete_=True,
type='mmpretrain.ConvNeXt',
arch='small',
out_indices=[0, 1, 2, 3],
drop_path_rate=0.6,
layer_scale_init_value=1.0,
gap_before_final_norm=False,
init_cfg=dict(
type='Pretrained', checkpoint=checkpoint_file,
prefix='backbone.')))
optim_wrapper = dict(paramwise_cfg={
'decay_rate': 0.7,
'decay_type': 'layer_wise',
'num_layers': 12
})
_base_ = [
'../_base_/models/cascade-mask-rcnn_r50_fpn.py',
'../_base_/datasets/coco_instance.py',
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
]
# please install mmpretrain
# import mmpretrain.models to trigger register_module in mmpretrain
custom_imports = dict(
imports=['mmpretrain.models'], allow_failed_imports=False)
checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth' # noqa
model = dict(
backbone=dict(
_delete_=True,
type='mmpretrain.ConvNeXt',
arch='tiny',
out_indices=[0, 1, 2, 3],
drop_path_rate=0.4,
layer_scale_init_value=1.0,
gap_before_final_norm=False,
init_cfg=dict(
type='Pretrained', checkpoint=checkpoint_file,
prefix='backbone.')),
neck=dict(in_channels=[96, 192, 384, 768]),
roi_head=dict(bbox_head=[
dict(
type='ConvFCBBoxHead',
num_shared_convs=4,
num_shared_fcs=1,
in_channels=256,
conv_out_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
reg_decoded_bbox=True,
norm_cfg=dict(type='SyncBN', requires_grad=True),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='GIoULoss', loss_weight=10.0)),
dict(
type='ConvFCBBoxHead',
num_shared_convs=4,
num_shared_fcs=1,
in_channels=256,
conv_out_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1]),
reg_class_agnostic=False,
reg_decoded_bbox=True,
norm_cfg=dict(type='SyncBN', requires_grad=True),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='GIoULoss', loss_weight=10.0)),
dict(
type='ConvFCBBoxHead',
num_shared_convs=4,
num_shared_fcs=1,
in_channels=256,
conv_out_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067]),
reg_class_agnostic=False,
reg_decoded_bbox=True,
norm_cfg=dict(type='SyncBN', requires_grad=True),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='GIoULoss', loss_weight=10.0))
]))
# augmentation strategy originates from DETR / Sparse RCNN
train_pipeline = [
dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(type='RandomFlip', prob=0.5),
dict(
type='RandomChoice',
transforms=[[
dict(
type='RandomChoiceResize',
scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
(736, 1333), (768, 1333), (800, 1333)],
keep_ratio=True)
],
[
dict(
type='RandomChoiceResize',
scales=[(400, 1333), (500, 1333), (600, 1333)],
keep_ratio=True),
dict(
type='RandomCrop',
crop_type='absolute_range',
crop_size=(384, 600),
allow_negative_crop=True),
dict(
type='RandomChoiceResize',
scales=[(480, 1333), (512, 1333), (544, 1333),
(576, 1333), (608, 1333), (640, 1333),
(672, 1333), (704, 1333), (736, 1333),
(768, 1333), (800, 1333)],
keep_ratio=True)
]]),
dict(type='PackDetInputs')
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
max_epochs = 36
train_cfg = dict(max_epochs=max_epochs)
# learning rate
param_scheduler = [
dict(
type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
end=1000),
dict(
type='MultiStepLR',
begin=0,
end=max_epochs,
by_epoch=True,
milestones=[27, 33],
gamma=0.1)
]
# Enable automatic-mixed-precision training with AmpOptimWrapper.
optim_wrapper = dict(
type='AmpOptimWrapper',
constructor='LearningRateDecayOptimizerConstructor',
paramwise_cfg={
'decay_rate': 0.7,
'decay_type': 'layer_wise',
'num_layers': 6
},
optimizer=dict(
_delete_=True,
type='AdamW',
lr=0.0002,
betas=(0.9, 0.999),
weight_decay=0.05))
_base_ = [
'../_base_/models/mask-rcnn_r50_fpn.py',
'../_base_/datasets/coco_instance.py',
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
]
# please install mmpretrain
# import mmpretrain.models to trigger register_module in mmpretrain
custom_imports = dict(
imports=['mmpretrain.models'], allow_failed_imports=False)
checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth' # noqa
model = dict(
backbone=dict(
_delete_=True,
type='mmpretrain.ConvNeXt',
arch='tiny',
out_indices=[0, 1, 2, 3],
drop_path_rate=0.4,
layer_scale_init_value=1.0,
gap_before_final_norm=False,
init_cfg=dict(
type='Pretrained', checkpoint=checkpoint_file,
prefix='backbone.')),
neck=dict(in_channels=[96, 192, 384, 768]))
# augmentation strategy originates from DETR / Sparse RCNN
train_pipeline = [
dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(type='RandomFlip', prob=0.5),
dict(
type='RandomChoice',
transforms=[[
dict(
type='RandomChoiceResize',
scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
(736, 1333), (768, 1333), (800, 1333)],
keep_ratio=True)
],
[
dict(
type='RandomChoiceResize',
scales=[(400, 1333), (500, 1333), (600, 1333)],
keep_ratio=True),
dict(
type='RandomCrop',
crop_type='absolute_range',
crop_size=(384, 600),
allow_negative_crop=True),
dict(
type='RandomChoiceResize',
scales=[(480, 1333), (512, 1333), (544, 1333),
(576, 1333), (608, 1333), (640, 1333),
(672, 1333), (704, 1333), (736, 1333),
(768, 1333), (800, 1333)],
keep_ratio=True)
]]),
dict(type='PackDetInputs')
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
max_epochs = 36
train_cfg = dict(max_epochs=max_epochs)
# learning rate
param_scheduler = [
dict(
type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
end=1000),
dict(
type='MultiStepLR',
begin=0,
end=max_epochs,
by_epoch=True,
milestones=[27, 33],
gamma=0.1)
]
# Enable automatic-mixed-precision training with AmpOptimWrapper.
optim_wrapper = dict(
type='AmpOptimWrapper',
constructor='LearningRateDecayOptimizerConstructor',
paramwise_cfg={
'decay_rate': 0.95,
'decay_type': 'layer_wise',
'num_layers': 6
},
optimizer=dict(
_delete_=True,
type='AdamW',
lr=0.0001,
betas=(0.9, 0.999),
weight_decay=0.05,
))
Models:
- Name: mask-rcnn_convnext-t-p4-w7_fpn_amp-ms-crop-3x_coco
In Collection: Mask R-CNN
Config: configs/convnext/mask-rcnn_convnext-t-p4-w7_fpn_amp-ms-crop-3x_coco.py
Metadata:
Training Memory (GB): 7.3
Epochs: 36
Training Data: COCO
Training Techniques:
- AdamW
- Mixed Precision Training
Training Resources: 8x A100 GPUs
Architecture:
- ConvNeXt
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 46.2
- Task: Instance Segmentation
Dataset: COCO
Metrics:
mask AP: 41.7
Weights: https://download.openmmlab.com/mmdetection/v2.0/convnext/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco_20220426_154953-050731f4.pth
Paper:
URL: https://arxiv.org/abs/2201.03545
Title: 'A ConvNet for the 2020s'
README: configs/convnext/README.md
Code:
URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465
Version: v2.16.0
- Name: cascade-mask-rcnn_convnext-t-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco
In Collection: Cascade Mask R-CNN
Config: configs/convnext/cascade-mask-rcnn_convnext-t-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py
Metadata:
Training Memory (GB): 9.0
Epochs: 36
Training Data: COCO
Training Techniques:
- AdamW
- Mixed Precision Training
Training Resources: 8x A100 GPUs
Architecture:
- ConvNeXt
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 50.3
- Task: Instance Segmentation
Dataset: COCO
Metrics:
mask AP: 43.6
Weights: https://download.openmmlab.com/mmdetection/v2.0/convnext/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco_20220509_204200-8f07c40b.pth
Paper:
URL: https://arxiv.org/abs/2201.03545
Title: 'A ConvNet for the 2020s'
README: configs/convnext/README.md
Code:
URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465
Version: v2.25.0
- Name: cascade-mask-rcnn_convnext-s-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco
In Collection: Cascade Mask R-CNN
Config: configs/convnext/cascade-mask-rcnn_convnext-s-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py
Metadata:
Training Memory (GB): 12.3
Epochs: 36
Training Data: COCO
Training Techniques:
- AdamW
- Mixed Precision Training
Training Resources: 8x A100 GPUs
Architecture:
- ConvNeXt
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 51.8
- Task: Instance Segmentation
Dataset: COCO
Metrics:
mask AP: 44.8
Weights: https://download.openmmlab.com/mmdetection/v2.0/convnext/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco_20220510_201004-3d24f5a4.pth
Paper:
URL: https://arxiv.org/abs/2201.03545
Title: 'A ConvNet for the 2020s'
README: configs/convnext/README.md
Code:
URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465
Version: v2.25.0
_base_ = './cornernet_hourglass104_8xb6-210e-mstest_coco.py'
train_dataloader = dict(batch_size=5)
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# USER SHOULD NOT CHANGE ITS VALUES.
# base_batch_size = (10 GPUs) x (5 samples per GPU)
auto_scale_lr = dict(base_batch_size=50)
_base_ = './cornernet_hourglass104_8xb6-210e-mstest_coco.py'
train_dataloader = dict(batch_size=3)
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# USER SHOULD NOT CHANGE ITS VALUES.
# base_batch_size = (32 GPUs) x (3 samples per GPU)
auto_scale_lr = dict(base_batch_size=96)
_base_ = [
'../_base_/default_runtime.py', '../_base_/datasets/coco_detection.py'
]
data_preprocessor = dict(
type='DetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True)
# model settings
model = dict(
type='CornerNet',
data_preprocessor=data_preprocessor,
backbone=dict(
type='HourglassNet',
downsample_times=5,
num_stacks=2,
stage_channels=[256, 256, 384, 384, 384, 512],
stage_blocks=[2, 2, 2, 2, 2, 4],
norm_cfg=dict(type='BN', requires_grad=True)),
neck=None,
bbox_head=dict(
type='CornerHead',
num_classes=80,
in_channels=256,
num_feat_levels=2,
corner_emb_channels=1,
loss_heatmap=dict(
type='GaussianFocalLoss', alpha=2.0, gamma=4.0, loss_weight=1),
loss_embedding=dict(
type='AssociativeEmbeddingLoss',
pull_weight=0.10,
push_weight=0.10),
loss_offset=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1)),
# training and testing settings
train_cfg=None,
test_cfg=dict(
corner_topk=100,
local_maximum_kernel=3,
distance_threshold=0.5,
score_thr=0.05,
max_per_img=100,
nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian')))
# data settings
train_pipeline = [
dict(type='LoadImageFromFile', backend_args=_base_.backend_args),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='PhotoMetricDistortion',
brightness_delta=32,
contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5),
hue_delta=18),
dict(
# The cropped images are padded into squares during training,
# but may be smaller than crop_size.
type='RandomCenterCropPad',
crop_size=(511, 511),
ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3),
test_mode=False,
test_pad_mode=None,
mean=data_preprocessor['mean'],
std=data_preprocessor['std'],
# Image data is not converted to rgb.
to_rgb=data_preprocessor['bgr_to_rgb']),
# Make sure the output is always crop_size.
dict(type='Resize', scale=(511, 511), keep_ratio=False),
dict(type='RandomFlip', prob=0.5),
dict(type='PackDetInputs'),
]
test_pipeline = [
dict(
type='LoadImageFromFile',
to_float32=True,
backend_args=_base_.backend_args,
),
# don't need Resize
dict(
type='RandomCenterCropPad',
crop_size=None,
ratios=None,
border=None,
test_mode=True,
test_pad_mode=['logical_or', 127],
mean=data_preprocessor['mean'],
std=data_preprocessor['std'],
# Image data is not converted to rgb.
to_rgb=data_preprocessor['bgr_to_rgb']),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'border'))
]
train_dataloader = dict(
batch_size=6,
num_workers=3,
batch_sampler=None,
dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader
# optimizer
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='Adam', lr=0.0005),
clip_grad=dict(max_norm=35, norm_type=2))
max_epochs = 210
# learning rate
param_scheduler = [
dict(
type='LinearLR',
start_factor=1.0 / 3,
by_epoch=False,
begin=0,
end=500),
dict(
type='MultiStepLR',
begin=0,
end=max_epochs,
by_epoch=True,
milestones=[180],
gamma=0.1)
]
train_cfg = dict(
type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# USER SHOULD NOT CHANGE ITS VALUES.
# base_batch_size = (8 GPUs) x (6 samples per GPU)
auto_scale_lr = dict(base_batch_size=48)
tta_model = dict(
type='DetTTAModel',
tta_cfg=dict(
nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian'),
max_per_img=100))
tta_pipeline = [
dict(
type='LoadImageFromFile',
to_float32=True,
backend_args=_base_.backend_args),
dict(
type='TestTimeAug',
transforms=[
[
# ``RandomFlip`` must be placed before ``RandomCenterCropPad``,
# otherwise bounding box coordinates after flipping cannot be
# recovered correctly.
dict(type='RandomFlip', prob=1.),
dict(type='RandomFlip', prob=0.)
],
[
dict(
type='RandomCenterCropPad',
crop_size=None,
ratios=None,
border=None,
test_mode=True,
test_pad_mode=['logical_or', 127],
mean=data_preprocessor['mean'],
std=data_preprocessor['std'],
# Image data is not converted to rgb.
to_rgb=data_preprocessor['bgr_to_rgb'])
],
[dict(type='LoadAnnotations', with_bbox=True)],
[
dict(
type='PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'flip', 'flip_direction', 'border'))
]
])
]
Collections:
- Name: CornerNet
Metadata:
Training Data: COCO
Training Techniques:
- Adam
Training Resources: 8x V100 GPUs
Architecture:
- Corner Pooling
- Stacked Hourglass Network
Paper:
URL: https://arxiv.org/abs/1808.01244
Title: 'CornerNet: Detecting Objects as Paired Keypoints'
README: configs/cornernet/README.md
Code:
URL: https://github.com/open-mmlab/mmdetection/blob/v2.3.0/mmdet/models/detectors/cornernet.py#L9
Version: v2.3.0
Models:
- Name: cornernet_hourglass104_10xb5-crop511-210e-mstest_coco
In Collection: CornerNet
Config: configs/cornernet/cornernet_hourglass104_10xb5-crop511-210e-mstest_coco.py
Metadata:
Training Resources: 10x V100 GPUs
Batch Size: 50
Training Memory (GB): 13.9
inference time (ms/im):
- value: 238.1
hardware: V100
backend: PyTorch
batch size: 1
mode: FP32
resolution: (800, 1333)
Epochs: 210
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 41.2
Weights: https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco/cornernet_hourglass104_mstest_10x5_210e_coco_20200824_185720-5fefbf1c.pth
- Name: cornernet_hourglass104_8xb6-210e-mstest_coco
In Collection: CornerNet
Config: configs/cornernet/cornernet_hourglass104_8xb6-210e-mstest_coco.py
Metadata:
Batch Size: 48
Training Memory (GB): 15.9
inference time (ms/im):
- value: 238.1
hardware: V100
backend: PyTorch
batch size: 1
mode: FP32
resolution: (800, 1333)
Epochs: 210
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 41.2
Weights: https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco/cornernet_hourglass104_mstest_8x6_210e_coco_20200825_150618-79b44c30.pth
- Name: cornernet_hourglass104_32xb3-210e-mstest_coco
In Collection: CornerNet
Config: configs/cornernet/cornernet_hourglass104_32xb3-210e-mstest_coco.py
Metadata:
Training Resources: 32x V100 GPUs
Batch Size: 96
Training Memory (GB): 9.5
inference time (ms/im):
- value: 256.41
hardware: V100
backend: PyTorch
batch size: 1
mode: FP32
resolution: (800, 1333)
Epochs: 210
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 40.4
Weights: https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco/cornernet_hourglass104_mstest_32x3_210e_coco_20200819_203110-1efaea91.pth
_base_ = ['../_base_/default_runtime.py']
model = dict(
type='CrowdDet',
data_preprocessor=dict(
type='DetDataPreprocessor',
mean=[103.53, 116.28, 123.675],
std=[57.375, 57.12, 58.395],
bgr_to_rgb=False,
pad_size_divisor=64,
# This option is set according to https://github.com/Purkialo/CrowdDet/
# blob/master/lib/data/CrowdHuman.py The images in the entire batch are
# resize together.
batch_augments=[
dict(type='BatchResize', scale=(1400, 800), pad_size_divisor=64)
]),
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch',
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5,
upsample_cfg=dict(mode='bilinear', align_corners=False)),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[1.0, 2.0, 3.0],
strides=[4, 8, 16, 32, 64],
centers=[(8, 8), (8, 8), (8, 8), (8, 8), (8, 8)]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0.0, 0.0, 0.0, 0.0],
target_stds=[1.0, 1.0, 1.0, 1.0],
clip_border=False),
loss_cls=dict(type='CrossEntropyLoss', loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='MultiInstanceRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(
type='RoIAlign',
output_size=7,
sampling_ratio=-1,
aligned=True,
use_torchvision=True),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='MultiInstanceBBoxHead',
with_refine=False,
num_shared_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=1,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss',
loss_weight=1.0,
use_sigmoid=False,
reduction='none'),
loss_bbox=dict(
type='SmoothL1Loss', loss_weight=1.0, reduction='none'))),
# model training and testing settings
train_cfg=dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=(0.3, 0.7),
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_pre=2400,
max_per_img=2000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=2),
rcnn=dict(
assigner=dict(
type='MultiInstanceAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.3,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='MultiInsRandomSampler',
num=512,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
pos_weight=-1,
debug=False)),
test_cfg=dict(
rpn=dict(
nms_pre=1200,
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=2),
rcnn=dict(
nms=dict(type='nms', iou_threshold=0.5),
score_thr=0.01,
max_per_img=500)))
dataset_type = 'CrowdHumanDataset'
data_root = 'data/CrowdHuman/'
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/tracking/CrowdHuman/'
# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/tracking/',
# 'data/': 's3://openmmlab/datasets/tracking/'
# }))
backend_args = None
train_pipeline = [
dict(type='LoadImageFromFile', backend_args=backend_args),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='RandomFlip', prob=0.5),
dict(
type='PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
'flip_direction'))
]
test_pipeline = [
dict(type='LoadImageFromFile', backend_args=backend_args),
dict(type='Resize', scale=(1400, 800), keep_ratio=True),
# avoid bboxes being resized
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor'))
]
train_dataloader = dict(
batch_size=2,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
batch_sampler=None, # The 'batch_sampler' may decrease the precision
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='annotation_train.odgt',
data_prefix=dict(img='Images/'),
filter_cfg=dict(filter_empty_gt=True, min_size=32),
pipeline=train_pipeline,
backend_args=backend_args))
val_dataloader = dict(
batch_size=1,
num_workers=2,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='annotation_val.odgt',
data_prefix=dict(img='Images/'),
test_mode=True,
pipeline=test_pipeline,
backend_args=backend_args))
test_dataloader = val_dataloader
val_evaluator = dict(
type='CrowdHumanMetric',
ann_file=data_root + 'annotation_val.odgt',
metric=['AP', 'MR', 'JI'],
backend_args=backend_args)
test_evaluator = val_evaluator
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=30, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
param_scheduler = [
dict(
type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=800),
dict(
type='MultiStepLR',
begin=0,
end=30,
by_epoch=True,
milestones=[24, 27],
gamma=0.1)
]
# optimizer
auto_scale_lr = dict(base_batch_size=16)
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001))
_base_ = './crowddet-rcnn_r50_fpn_8xb2-30e_crowdhuman.py'
model = dict(roi_head=dict(bbox_head=dict(with_refine=True)))
Collections:
- Name: CrowdDet
Metadata:
Training Data: CrowdHuman
Training Techniques:
- SGD
- EMD Loss
Training Resources: 8x A100 GPUs
Architecture:
- FPN
- RPN
- ResNet
- RoIPool
Paper:
URL: https://arxiv.org/abs/2003.09163
Title: 'Detection in Crowded Scenes: One Proposal, Multiple Predictions'
README: configs/crowddet/README.md
Code:
URL: https://github.com/open-mmlab/mmdetection/blob/v3.0.0rc3/mmdet/models/detectors/crowddet.py
Version: v3.0.0rc3
Models:
- Name: crowddet-rcnn_refine_r50_fpn_8xb2-30e_crowdhuman
In Collection: CrowdDet
Config: configs/crowddet/crowddet-rcnn_refine_r50_fpn_8xb2-30e_crowdhuman.py
Metadata:
Training Memory (GB): 4.8
Epochs: 30
Results:
- Task: Object Detection
Dataset: CrowdHuman
Metrics:
box AP: 90.32
Weights: https://download.openmmlab.com/mmdetection/v3.0/crowddet/crowddet-rcnn_refine_r50_fpn_8xb2-30e_crowdhuman/crowddet-rcnn_refine_r50_fpn_8xb2-30e_crowdhuman_20221024_215917-45602806.pth
- Name: crowddet-rcnn_r50_fpn_8xb2-30e_crowdhuman
In Collection: CrowdDet
Config: configs/crowddet/crowddet-rcnn_r50_fpn_8xb2-30e_crowdhuman.py
Metadata:
Training Memory (GB): 4.4
Epochs: 30
Results:
- Task: Object Detection
Dataset: CrowdHuman
Metrics:
box AP: 90.0
Weights: https://download.openmmlab.com/mmdetection/v3.0/crowddet/crowddet-rcnn_r50_fpn_8xb2-30e_crowdhuman/crowddet-rcnn_r50_fpn_8xb2-30e_crowdhuman_20221023_174954-dc319c2d.pth
_base_ = [
'../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py'
]
model = dict(
type='DABDETR',
num_queries=300,
with_random_refpoints=False,
num_patterns=0,
data_preprocessor=dict(
type='DetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=1),
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(3, ),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='pytorch',
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
neck=dict(
type='ChannelMapper',
in_channels=[2048],
kernel_size=1,
out_channels=256,
act_cfg=None,
norm_cfg=None,
num_outs=1),
encoder=dict(
num_layers=6,
layer_cfg=dict(
self_attn_cfg=dict(
embed_dims=256, num_heads=8, dropout=0., batch_first=True),
ffn_cfg=dict(
embed_dims=256,
feedforward_channels=2048,
num_fcs=2,
ffn_drop=0.,
act_cfg=dict(type='PReLU')))),
decoder=dict(
num_layers=6,
query_dim=4,
query_scale_type='cond_elewise',
with_modulated_hw_attn=True,
layer_cfg=dict(
self_attn_cfg=dict(
embed_dims=256,
num_heads=8,
attn_drop=0.,
proj_drop=0.,
cross_attn=False),
cross_attn_cfg=dict(
embed_dims=256,
num_heads=8,
attn_drop=0.,
proj_drop=0.,
cross_attn=True),
ffn_cfg=dict(
embed_dims=256,
feedforward_channels=2048,
num_fcs=2,
ffn_drop=0.,
act_cfg=dict(type='PReLU'))),
return_intermediate=True),
positional_encoding=dict(num_feats=128, temperature=20, normalize=True),
bbox_head=dict(
type='DABDETRHead',
num_classes=80,
embed_dims=256,
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=5.0),
loss_iou=dict(type='GIoULoss', loss_weight=2.0)),
# training and testing settings
train_cfg=dict(
assigner=dict(
type='HungarianAssigner',
match_costs=[
dict(type='FocalLossCost', weight=2., eps=1e-8),
dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),
dict(type='IoUCost', iou_mode='giou', weight=2.0)
])),
test_cfg=dict(max_per_img=300))
# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
# from the default setting in mmdet.
train_pipeline = [
dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='RandomFlip', prob=0.5),
dict(
type='RandomChoice',
transforms=[[
dict(
type='RandomChoiceResize',
scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
(736, 1333), (768, 1333), (800, 1333)],
keep_ratio=True)
],
[
dict(
type='RandomChoiceResize',
scales=[(400, 1333), (500, 1333), (600, 1333)],
keep_ratio=True),
dict(
type='RandomCrop',
crop_type='absolute_range',
crop_size=(384, 600),
allow_negative_crop=True),
dict(
type='RandomChoiceResize',
scales=[(480, 1333), (512, 1333), (544, 1333),
(576, 1333), (608, 1333), (640, 1333),
(672, 1333), (704, 1333), (736, 1333),
(768, 1333), (800, 1333)],
keep_ratio=True)
]]),
dict(type='PackDetInputs')
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
# optimizer
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0001),
clip_grad=dict(max_norm=0.1, norm_type=2),
paramwise_cfg=dict(
custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))
# learning policy
max_epochs = 50
train_cfg = dict(
type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=max_epochs,
by_epoch=True,
milestones=[40],
gamma=0.1)
]
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# USER SHOULD NOT CHANGE ITS VALUES.
# base_batch_size = (8 GPUs) x (2 samples per GPU)
auto_scale_lr = dict(base_batch_size=16, enable=False)
Collections:
- Name: DAB-DETR
Metadata:
Training Data: COCO
Training Techniques:
- AdamW
- Multi Scale Train
- Gradient Clip
Training Resources: 8x A100 GPUs
Architecture:
- ResNet
- Transformer
Paper:
URL: https://arxiv.org/abs/2201.12329
Title: 'DAB-DETR: Dynamic Anchor Boxes are Better Queries for DETR'
README: configs/dab_detr/README.md
Code:
URL: https://github.com/open-mmlab/mmdetection/blob/f4112c9e5611468ffbd57cfba548fd1289264b52/mmdet/models/detectors/dab_detr.py#L15
Version: v3.0.0rc6
Models:
- Name: dab-detr_r50_8xb2-50e_coco
In Collection: DAB-DETR
Config: configs/dab_detr/dab-detr_r50_8xb2-50e_coco.py
Metadata:
Epochs: 50
Results:
- Task: Object Detection
Dataset: COCO
Metrics:
box AP: 42.3
Weights: https://download.openmmlab.com/mmdetection/v3.0/dab_detr/dab-detr_r50_8xb2-50e_coco/dab-detr_r50_8xb2-50e_coco_20221122_120837-c1035c8c.pth
_base_ = '../cascade_rcnn/cascade-mask-rcnn_r101_fpn_1x_coco.py'
model = dict(
backbone=dict(
dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),
stage_with_dcn=(False, True, True, True)))
_base_ = '../cascade_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py'
model = dict(
backbone=dict(
dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),
stage_with_dcn=(False, True, True, True)))
_base_ = '../cascade_rcnn/cascade-mask-rcnn_x101-32x4d_fpn_1x_coco.py'
model = dict(
backbone=dict(
dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),
stage_with_dcn=(False, True, True, True)))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment