Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
SOLOv2-pytorch
Commits
8885a81b
Commit
8885a81b
authored
Aug 25, 2019
by
v-zeya
Browse files
add reppoints head, reppoints detector and the config files.
parent
c0a4c007
Changes
17
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
2574 additions
and
2 deletions
+2574
-2
configs/reppoints/bbox_r50_grid_center_fpn_1x.py
configs/reppoints/bbox_r50_grid_center_fpn_1x.py
+143
-0
configs/reppoints/bbox_r50_grid_fpn_1x.py
configs/reppoints/bbox_r50_grid_fpn_1x.py
+148
-0
configs/reppoints/reppoints_minmax_r50_fpn_1x.py
configs/reppoints/reppoints_minmax_r50_fpn_1x.py
+142
-0
configs/reppoints/reppoints_moment_r101_dcn_fpn_2x.py
configs/reppoints/reppoints_moment_r101_dcn_fpn_2x.py
+145
-0
configs/reppoints/reppoints_moment_r101_dcn_fpn_2x_mt.py
configs/reppoints/reppoints_moment_r101_dcn_fpn_2x_mt.py
+149
-0
configs/reppoints/reppoints_moment_r101_fpn_2x.py
configs/reppoints/reppoints_moment_r101_fpn_2x.py
+142
-0
configs/reppoints/reppoints_moment_r101_fpn_2x_mt.py
configs/reppoints/reppoints_moment_r101_fpn_2x_mt.py
+146
-0
configs/reppoints/reppoints_moment_r50_fpn_1x.py
configs/reppoints/reppoints_moment_r50_fpn_1x.py
+142
-0
configs/reppoints/reppoints_moment_r50_fpn_2x.py
configs/reppoints/reppoints_moment_r50_fpn_2x.py
+142
-0
configs/reppoints/reppoints_moment_r50_fpn_2x_mt.py
configs/reppoints/reppoints_moment_r50_fpn_2x_mt.py
+146
-0
configs/reppoints/reppoints_moment_x101_dcn_fpn_2x.py
configs/reppoints/reppoints_moment_x101_dcn_fpn_2x.py
+150
-0
configs/reppoints/reppoints_moment_x101_dcn_fpn_2x_mt.py
configs/reppoints/reppoints_moment_x101_dcn_fpn_2x_mt.py
+154
-0
configs/reppoints/reppoints_partial_minmax_r50_fpn_1x.py
configs/reppoints/reppoints_partial_minmax_r50_fpn_1x.py
+142
-0
mmdet/models/anchor_heads/__init__.py
mmdet/models/anchor_heads/__init__.py
+3
-1
mmdet/models/anchor_heads/reppoints_head.py
mmdet/models/anchor_heads/reppoints_head.py
+596
-0
mmdet/models/detectors/__init__.py
mmdet/models/detectors/__init__.py
+3
-1
mmdet/models/detectors/reppoints_detector.py
mmdet/models/detectors/reppoints_detector.py
+81
-0
No files found.
configs/reppoints/bbox_r50_grid_center_fpn_1x.py
0 → 100644
View file @
8885a81b
# model settings
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
,
requires_grad
=
True
)
model
=
dict
(
type
=
'RepPointsDetector'
,
pretrained
=
'torchvision://resnet50'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
style
=
'pytorch'
),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
start_level
=
1
,
add_extra_convs
=
True
,
num_outs
=
5
,
norm_cfg
=
norm_cfg
),
bbox_head
=
dict
(
type
=
'RepPointsHead'
,
num_classes
=
81
,
in_channels
=
256
,
feat_channels
=
256
,
point_feat_channels
=
256
,
stacked_convs
=
3
,
num_points
=
9
,
gradient_mul
=
0.1
,
point_strides
=
[
8
,
16
,
32
,
64
,
128
],
point_base_scale
=
4
,
norm_cfg
=
norm_cfg
,
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox_init
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
0.5
),
loss_bbox_refine
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
1.0
),
transform_method
=
'minmax'
,
use_grid_points
=
True
))
# training and testing settings
train_cfg
=
dict
(
init
=
dict
(
assigner
=
dict
(
type
=
'PointAssigner'
,
scale
=
4
,
pos_num
=
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
refine
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.4
,
min_pos_iou
=
0
,
ignore_iof_thr
=-
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
nms_pre
=
1000
,
min_bbox_size
=
0
,
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_thr
=
0.5
),
max_per_img
=
100
)
# dataset settings
dataset_type
=
'CocoDataset'
data_root
=
'data/coco/'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1333
,
800
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1333
,
800
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
imgs_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_train2017.json'
,
img_prefix
=
data_root
+
'train2017/'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
step
=
[
8
,
11
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs
=
12
device_ids
=
range
(
8
)
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/bbox_r50_grid_center_fpn_1x'
load_from
=
None
resume_from
=
None
auto_resume
=
True
workflow
=
[(
'train'
,
1
)]
configs/reppoints/bbox_r50_grid_fpn_1x.py
0 → 100644
View file @
8885a81b
# model settings
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
,
requires_grad
=
True
)
model
=
dict
(
type
=
'RepPointsDetector'
,
pretrained
=
'torchvision://resnet50'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
style
=
'pytorch'
),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
start_level
=
1
,
add_extra_convs
=
True
,
num_outs
=
5
,
norm_cfg
=
norm_cfg
),
bbox_head
=
dict
(
type
=
'RepPointsHead'
,
num_classes
=
81
,
in_channels
=
256
,
feat_channels
=
256
,
point_feat_channels
=
256
,
stacked_convs
=
3
,
num_points
=
9
,
gradient_mul
=
0.1
,
point_strides
=
[
8
,
16
,
32
,
64
,
128
],
point_base_scale
=
4
,
norm_cfg
=
norm_cfg
,
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox_init
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
0.5
),
loss_bbox_refine
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
1.0
),
transform_method
=
'minmax'
,
use_grid_points
=
True
))
# training and testing settings
train_cfg
=
dict
(
init
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.4
,
min_pos_iou
=
0
,
ignore_iof_thr
=-
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
refine
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.4
,
min_pos_iou
=
0
,
ignore_iof_thr
=-
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
nms_pre
=
1000
,
min_bbox_size
=
0
,
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_thr
=
0.5
),
max_per_img
=
100
)
# dataset settings
dataset_type
=
'CocoDataset'
data_root
=
'data/coco/'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1333
,
800
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1333
,
800
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
imgs_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_train2017.json'
,
img_prefix
=
data_root
+
'train2017/'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
step
=
[
8
,
11
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs
=
12
device_ids
=
range
(
8
)
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/bbox_r50_grid_fpn_1x'
load_from
=
None
resume_from
=
None
auto_resume
=
True
workflow
=
[(
'train'
,
1
)]
configs/reppoints/reppoints_minmax_r50_fpn_1x.py
0 → 100644
View file @
8885a81b
# model settings
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
,
requires_grad
=
True
)
model
=
dict
(
type
=
'RepPointsDetector'
,
pretrained
=
'torchvision://resnet50'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
style
=
'pytorch'
),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
start_level
=
1
,
add_extra_convs
=
True
,
num_outs
=
5
,
norm_cfg
=
norm_cfg
),
bbox_head
=
dict
(
type
=
'RepPointsHead'
,
num_classes
=
81
,
in_channels
=
256
,
feat_channels
=
256
,
point_feat_channels
=
256
,
stacked_convs
=
3
,
num_points
=
9
,
gradient_mul
=
0.1
,
point_strides
=
[
8
,
16
,
32
,
64
,
128
],
point_base_scale
=
4
,
norm_cfg
=
norm_cfg
,
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox_init
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
0.5
),
loss_bbox_refine
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
1.0
),
transform_method
=
'minmax'
))
# training and testing settings
train_cfg
=
dict
(
init
=
dict
(
assigner
=
dict
(
type
=
'PointAssigner'
,
scale
=
4
,
pos_num
=
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
refine
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.4
,
min_pos_iou
=
0
,
ignore_iof_thr
=-
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
nms_pre
=
1000
,
min_bbox_size
=
0
,
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_thr
=
0.5
),
max_per_img
=
100
)
# dataset settings
dataset_type
=
'CocoDataset'
data_root
=
'data/coco/'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1333
,
800
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1333
,
800
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
imgs_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_train2017.json'
,
img_prefix
=
data_root
+
'train2017/'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
step
=
[
8
,
11
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs
=
12
device_ids
=
range
(
8
)
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/reppoints_minmax_r50_fpn_1x'
load_from
=
None
resume_from
=
None
auto_resume
=
True
workflow
=
[(
'train'
,
1
)]
configs/reppoints/reppoints_moment_r101_dcn_fpn_2x.py
0 → 100644
View file @
8885a81b
# model settings
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
,
requires_grad
=
True
)
model
=
dict
(
type
=
'RepPointsDetector'
,
pretrained
=
'torchvision://resnet101'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
101
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
style
=
'pytorch'
,
dcn
=
dict
(
modulated
=
False
,
deformable_groups
=
1
,
fallback_on_stride
=
False
),
stage_with_dcn
=
(
False
,
True
,
True
,
True
)),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
start_level
=
1
,
add_extra_convs
=
True
,
num_outs
=
5
,
norm_cfg
=
norm_cfg
),
bbox_head
=
dict
(
type
=
'RepPointsHead'
,
num_classes
=
81
,
in_channels
=
256
,
feat_channels
=
256
,
point_feat_channels
=
256
,
stacked_convs
=
3
,
num_points
=
9
,
gradient_mul
=
0.1
,
point_strides
=
[
8
,
16
,
32
,
64
,
128
],
point_base_scale
=
4
,
norm_cfg
=
norm_cfg
,
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox_init
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
0.5
),
loss_bbox_refine
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
1.0
),
transform_method
=
'moment'
))
# training and testing settings
train_cfg
=
dict
(
init
=
dict
(
assigner
=
dict
(
type
=
'PointAssigner'
,
scale
=
4
,
pos_num
=
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
refine
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.4
,
min_pos_iou
=
0
,
ignore_iof_thr
=-
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
nms_pre
=
1000
,
min_bbox_size
=
0
,
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_thr
=
0.5
),
max_per_img
=
100
)
# dataset settings
dataset_type
=
'CocoDataset'
data_root
=
'data/coco/'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1333
,
800
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1333
,
800
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
imgs_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_train2017.json'
,
img_prefix
=
data_root
+
'train2017/'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
step
=
[
16
,
22
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs
=
24
device_ids
=
range
(
8
)
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/reppoints_moment_r101_dcn_fpn_2x'
load_from
=
None
resume_from
=
None
auto_resume
=
True
workflow
=
[(
'train'
,
1
)]
configs/reppoints/reppoints_moment_r101_dcn_fpn_2x_mt.py
0 → 100644
View file @
8885a81b
# model settings
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
,
requires_grad
=
True
)
model
=
dict
(
type
=
'RepPointsDetector'
,
pretrained
=
'torchvision://resnet101'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
101
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
style
=
'pytorch'
,
dcn
=
dict
(
modulated
=
False
,
deformable_groups
=
1
,
fallback_on_stride
=
False
),
stage_with_dcn
=
(
False
,
True
,
True
,
True
)),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
start_level
=
1
,
add_extra_convs
=
True
,
num_outs
=
5
,
norm_cfg
=
norm_cfg
),
bbox_head
=
dict
(
type
=
'RepPointsHead'
,
num_classes
=
81
,
in_channels
=
256
,
feat_channels
=
256
,
point_feat_channels
=
256
,
stacked_convs
=
3
,
num_points
=
9
,
gradient_mul
=
0.1
,
point_strides
=
[
8
,
16
,
32
,
64
,
128
],
point_base_scale
=
4
,
norm_cfg
=
norm_cfg
,
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox_init
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
0.5
),
loss_bbox_refine
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
1.0
),
transform_method
=
'moment'
))
# training and testing settings
train_cfg
=
dict
(
init
=
dict
(
assigner
=
dict
(
type
=
'PointAssigner'
,
scale
=
4
,
pos_num
=
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
refine
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.4
,
min_pos_iou
=
0
,
ignore_iof_thr
=-
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
nms_pre
=
1000
,
min_bbox_size
=
0
,
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_thr
=
0.5
),
max_per_img
=
100
)
# dataset settings
dataset_type
=
'CocoDataset'
data_root
=
'data/coco/'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
[(
1333
,
480
),
(
1333
,
960
)],
keep_ratio
=
True
,
multiscale_mode
=
'range'
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1333
,
800
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
imgs_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_train2017.json'
,
img_prefix
=
data_root
+
'train2017/'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
step
=
[
16
,
22
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs
=
24
device_ids
=
range
(
8
)
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/reppoints_moment_r101_dcn_fpn_2x_mt'
load_from
=
None
resume_from
=
None
auto_resume
=
True
workflow
=
[(
'train'
,
1
)]
configs/reppoints/reppoints_moment_r101_fpn_2x.py
0 → 100644
View file @
8885a81b
# model settings
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
,
requires_grad
=
True
)
model
=
dict
(
type
=
'RepPointsDetector'
,
pretrained
=
'torchvision://resnet101'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
101
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
style
=
'pytorch'
),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
start_level
=
1
,
add_extra_convs
=
True
,
num_outs
=
5
,
norm_cfg
=
norm_cfg
),
bbox_head
=
dict
(
type
=
'RepPointsHead'
,
num_classes
=
81
,
in_channels
=
256
,
feat_channels
=
256
,
point_feat_channels
=
256
,
stacked_convs
=
3
,
num_points
=
9
,
gradient_mul
=
0.1
,
point_strides
=
[
8
,
16
,
32
,
64
,
128
],
point_base_scale
=
4
,
norm_cfg
=
norm_cfg
,
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox_init
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
0.5
),
loss_bbox_refine
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
1.0
),
transform_method
=
'moment'
))
# training and testing settings
train_cfg
=
dict
(
init
=
dict
(
assigner
=
dict
(
type
=
'PointAssigner'
,
scale
=
4
,
pos_num
=
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
refine
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.4
,
min_pos_iou
=
0
,
ignore_iof_thr
=-
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
nms_pre
=
1000
,
min_bbox_size
=
0
,
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_thr
=
0.5
),
max_per_img
=
100
)
# dataset settings
dataset_type
=
'CocoDataset'
data_root
=
'data/coco/'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1333
,
800
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1333
,
800
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
imgs_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_train2017.json'
,
img_prefix
=
data_root
+
'train2017/'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
step
=
[
16
,
22
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs
=
24
device_ids
=
range
(
8
)
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/reppoints_moment_r101_fpn_2x'
load_from
=
None
resume_from
=
None
auto_resume
=
True
workflow
=
[(
'train'
,
1
)]
configs/reppoints/reppoints_moment_r101_fpn_2x_mt.py
0 → 100644
View file @
8885a81b
# model settings
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
,
requires_grad
=
True
)
model
=
dict
(
type
=
'RepPointsDetector'
,
pretrained
=
'torchvision://resnet101'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
101
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
style
=
'pytorch'
),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
start_level
=
1
,
add_extra_convs
=
True
,
num_outs
=
5
,
norm_cfg
=
norm_cfg
),
bbox_head
=
dict
(
type
=
'RepPointsHead'
,
num_classes
=
81
,
in_channels
=
256
,
feat_channels
=
256
,
point_feat_channels
=
256
,
stacked_convs
=
3
,
num_points
=
9
,
gradient_mul
=
0.1
,
point_strides
=
[
8
,
16
,
32
,
64
,
128
],
point_base_scale
=
4
,
norm_cfg
=
norm_cfg
,
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox_init
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
0.5
),
loss_bbox_refine
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
1.0
),
transform_method
=
'moment'
))
# training and testing settings
train_cfg
=
dict
(
init
=
dict
(
assigner
=
dict
(
type
=
'PointAssigner'
,
scale
=
4
,
pos_num
=
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
refine
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.4
,
min_pos_iou
=
0
,
ignore_iof_thr
=-
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
nms_pre
=
1000
,
min_bbox_size
=
0
,
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_thr
=
0.5
),
max_per_img
=
100
)
# dataset settings
dataset_type
=
'CocoDataset'
data_root
=
'data/coco/'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
[(
1333
,
480
),
(
1333
,
960
)],
keep_ratio
=
True
,
multiscale_mode
=
'range'
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1333
,
800
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
imgs_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_train2017.json'
,
img_prefix
=
data_root
+
'train2017/'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
step
=
[
16
,
22
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs
=
24
device_ids
=
range
(
8
)
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/reppoints_moment_r101_fpn_2x_mt'
load_from
=
None
resume_from
=
None
auto_resume
=
True
workflow
=
[(
'train'
,
1
)]
configs/reppoints/reppoints_moment_r50_fpn_1x.py
0 → 100644
View file @
8885a81b
# model settings
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
,
requires_grad
=
True
)
model
=
dict
(
type
=
'RepPointsDetector'
,
pretrained
=
'torchvision://resnet50'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
style
=
'pytorch'
),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
start_level
=
1
,
add_extra_convs
=
True
,
num_outs
=
5
,
norm_cfg
=
norm_cfg
),
bbox_head
=
dict
(
type
=
'RepPointsHead'
,
num_classes
=
81
,
in_channels
=
256
,
feat_channels
=
256
,
point_feat_channels
=
256
,
stacked_convs
=
3
,
num_points
=
9
,
gradient_mul
=
0.1
,
point_strides
=
[
8
,
16
,
32
,
64
,
128
],
point_base_scale
=
4
,
norm_cfg
=
norm_cfg
,
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox_init
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
0.5
),
loss_bbox_refine
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
1.0
),
transform_method
=
'moment'
))
# training and testing settings
train_cfg
=
dict
(
init
=
dict
(
assigner
=
dict
(
type
=
'PointAssigner'
,
scale
=
4
,
pos_num
=
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
refine
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.4
,
min_pos_iou
=
0
,
ignore_iof_thr
=-
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
nms_pre
=
1000
,
min_bbox_size
=
0
,
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_thr
=
0.5
),
max_per_img
=
100
)
# dataset settings
dataset_type
=
'CocoDataset'
data_root
=
'data/coco/'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1333
,
800
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1333
,
800
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
imgs_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_train2017.json'
,
img_prefix
=
data_root
+
'train2017/'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
step
=
[
8
,
11
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs
=
12
device_ids
=
range
(
8
)
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/reppoints_moment_r50_fpn_1x'
load_from
=
None
resume_from
=
None
auto_resume
=
True
workflow
=
[(
'train'
,
1
)]
configs/reppoints/reppoints_moment_r50_fpn_2x.py
0 → 100644
View file @
8885a81b
# model settings
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
,
requires_grad
=
True
)
model
=
dict
(
type
=
'RepPointsDetector'
,
pretrained
=
'torchvision://resnet50'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
style
=
'pytorch'
),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
start_level
=
1
,
add_extra_convs
=
True
,
num_outs
=
5
,
norm_cfg
=
norm_cfg
),
bbox_head
=
dict
(
type
=
'RepPointsHead'
,
num_classes
=
81
,
in_channels
=
256
,
feat_channels
=
256
,
point_feat_channels
=
256
,
stacked_convs
=
3
,
num_points
=
9
,
gradient_mul
=
0.1
,
point_strides
=
[
8
,
16
,
32
,
64
,
128
],
point_base_scale
=
4
,
norm_cfg
=
norm_cfg
,
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox_init
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
0.5
),
loss_bbox_refine
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
1.0
),
transform_method
=
'moment'
))
# training and testing settings
train_cfg
=
dict
(
init
=
dict
(
assigner
=
dict
(
type
=
'PointAssigner'
,
scale
=
4
,
pos_num
=
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
refine
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.4
,
min_pos_iou
=
0
,
ignore_iof_thr
=-
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
nms_pre
=
1000
,
min_bbox_size
=
0
,
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_thr
=
0.5
),
max_per_img
=
100
)
# dataset settings
dataset_type
=
'CocoDataset'
data_root
=
'data/coco/'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1333
,
800
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1333
,
800
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
imgs_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_train2017.json'
,
img_prefix
=
data_root
+
'train2017/'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
step
=
[
16
,
22
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs
=
24
device_ids
=
range
(
8
)
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/reppoints_moment_r50_fpn_2x'
load_from
=
None
resume_from
=
None
auto_resume
=
True
workflow
=
[(
'train'
,
1
)]
configs/reppoints/reppoints_moment_r50_fpn_2x_mt.py
0 → 100644
View file @
8885a81b
# model settings
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
,
requires_grad
=
True
)
model
=
dict
(
type
=
'RepPointsDetector'
,
pretrained
=
'torchvision://resnet50'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
style
=
'pytorch'
),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
start_level
=
1
,
add_extra_convs
=
True
,
num_outs
=
5
,
norm_cfg
=
norm_cfg
),
bbox_head
=
dict
(
type
=
'RepPointsHead'
,
num_classes
=
81
,
in_channels
=
256
,
feat_channels
=
256
,
point_feat_channels
=
256
,
stacked_convs
=
3
,
num_points
=
9
,
gradient_mul
=
0.1
,
point_strides
=
[
8
,
16
,
32
,
64
,
128
],
point_base_scale
=
4
,
norm_cfg
=
norm_cfg
,
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox_init
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
0.5
),
loss_bbox_refine
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
1.0
),
transform_method
=
'moment'
))
# training and testing settings
train_cfg
=
dict
(
init
=
dict
(
assigner
=
dict
(
type
=
'PointAssigner'
,
scale
=
4
,
pos_num
=
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
refine
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.4
,
min_pos_iou
=
0
,
ignore_iof_thr
=-
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
nms_pre
=
1000
,
min_bbox_size
=
0
,
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_thr
=
0.5
),
max_per_img
=
100
)
# dataset settings
dataset_type
=
'CocoDataset'
data_root
=
'data/coco/'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
[(
1333
,
480
),
(
1333
,
960
)],
keep_ratio
=
True
,
multiscale_mode
=
'range'
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1333
,
800
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
imgs_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_train2017.json'
,
img_prefix
=
data_root
+
'train2017/'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
step
=
[
16
,
22
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs
=
24
device_ids
=
range
(
8
)
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/reppoints_moment_r50_fpn_2x_mt'
load_from
=
None
resume_from
=
None
auto_resume
=
True
workflow
=
[(
'train'
,
1
)]
configs/reppoints/reppoints_moment_x101_dcn_fpn_2x.py
0 → 100644
View file @
8885a81b
# model settings
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
,
requires_grad
=
True
)
model
=
dict
(
type
=
'RepPointsDetector'
,
pretrained
=
'open-mmlab://resnext101_32x4d'
,
backbone
=
dict
(
type
=
'ResNeXt'
,
depth
=
101
,
groups
=
32
,
base_width
=
4
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
style
=
'pytorch'
,
dcn
=
dict
(
modulated
=
False
,
groups
=
32
,
deformable_groups
=
1
,
fallback_on_stride
=
False
),
stage_with_dcn
=
(
False
,
True
,
True
,
True
)),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
start_level
=
1
,
add_extra_convs
=
True
,
num_outs
=
5
,
norm_cfg
=
norm_cfg
),
bbox_head
=
dict
(
type
=
'RepPointsHead'
,
num_classes
=
81
,
in_channels
=
256
,
feat_channels
=
256
,
point_feat_channels
=
256
,
stacked_convs
=
3
,
num_points
=
9
,
gradient_mul
=
0.1
,
point_strides
=
[
8
,
16
,
32
,
64
,
128
],
point_base_scale
=
4
,
norm_cfg
=
norm_cfg
,
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox_init
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
0.5
),
loss_bbox_refine
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
1.0
),
transform_method
=
'moment'
))
# training and testing settings
train_cfg
=
dict
(
init
=
dict
(
assigner
=
dict
(
type
=
'PointAssigner'
,
scale
=
4
,
pos_num
=
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
refine
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.4
,
min_pos_iou
=
0
,
ignore_iof_thr
=-
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
nms_pre
=
1000
,
min_bbox_size
=
0
,
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_thr
=
0.5
),
max_per_img
=
100
)
# dataset settings
dataset_type
=
'CocoDataset'
data_root
=
'data/coco/'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1333
,
800
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1333
,
800
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
imgs_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_train2017.json'
,
img_prefix
=
data_root
+
'train2017/'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
step
=
[
16
,
22
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs
=
24
device_ids
=
range
(
8
)
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/reppoints_moment_x101_dcn_fpn_2x'
load_from
=
None
resume_from
=
None
auto_resume
=
True
workflow
=
[(
'train'
,
1
)]
configs/reppoints/reppoints_moment_x101_dcn_fpn_2x_mt.py
0 → 100644
View file @
8885a81b
# model settings
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
,
requires_grad
=
True
)
model
=
dict
(
type
=
'RepPointsDetector'
,
pretrained
=
'open-mmlab://resnext101_32x4d'
,
backbone
=
dict
(
type
=
'ResNeXt'
,
depth
=
101
,
groups
=
32
,
base_width
=
4
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
style
=
'pytorch'
,
dcn
=
dict
(
modulated
=
False
,
groups
=
32
,
deformable_groups
=
1
,
fallback_on_stride
=
False
),
stage_with_dcn
=
(
False
,
True
,
True
,
True
)),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
start_level
=
1
,
add_extra_convs
=
True
,
num_outs
=
5
,
norm_cfg
=
norm_cfg
),
bbox_head
=
dict
(
type
=
'RepPointsHead'
,
num_classes
=
81
,
in_channels
=
256
,
feat_channels
=
256
,
point_feat_channels
=
256
,
stacked_convs
=
3
,
num_points
=
9
,
gradient_mul
=
0.1
,
point_strides
=
[
8
,
16
,
32
,
64
,
128
],
point_base_scale
=
4
,
norm_cfg
=
norm_cfg
,
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox_init
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
0.5
),
loss_bbox_refine
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
1.0
),
transform_method
=
'moment'
))
# training and testing settings
train_cfg
=
dict
(
init
=
dict
(
assigner
=
dict
(
type
=
'PointAssigner'
,
scale
=
4
,
pos_num
=
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
refine
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.4
,
min_pos_iou
=
0
,
ignore_iof_thr
=-
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
nms_pre
=
1000
,
min_bbox_size
=
0
,
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_thr
=
0.5
),
max_per_img
=
100
)
# dataset settings
dataset_type
=
'CocoDataset'
data_root
=
'data/coco/'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
[(
1333
,
480
),
(
1333
,
960
)],
keep_ratio
=
True
,
multiscale_mode
=
'range'
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1333
,
800
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
imgs_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_train2017.json'
,
img_prefix
=
data_root
+
'train2017/'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
step
=
[
16
,
22
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs
=
24
device_ids
=
range
(
8
)
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/reppoints_moment_x101_dcn_fpn_2x_mt'
load_from
=
None
resume_from
=
None
auto_resume
=
True
workflow
=
[(
'train'
,
1
)]
configs/reppoints/reppoints_partial_minmax_r50_fpn_1x.py
0 → 100644
View file @
8885a81b
# model settings
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
,
requires_grad
=
True
)
model
=
dict
(
type
=
'RepPointsDetector'
,
pretrained
=
'torchvision://resnet50'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
style
=
'pytorch'
),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
start_level
=
1
,
add_extra_convs
=
True
,
num_outs
=
5
,
norm_cfg
=
norm_cfg
),
bbox_head
=
dict
(
type
=
'RepPointsHead'
,
num_classes
=
81
,
in_channels
=
256
,
feat_channels
=
256
,
point_feat_channels
=
256
,
stacked_convs
=
3
,
num_points
=
9
,
gradient_mul
=
0.1
,
point_strides
=
[
8
,
16
,
32
,
64
,
128
],
point_base_scale
=
4
,
norm_cfg
=
norm_cfg
,
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox_init
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
0.5
),
loss_bbox_refine
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
0.11
,
loss_weight
=
1.0
),
transform_method
=
'partial_minmax'
))
# training and testing settings
train_cfg
=
dict
(
init
=
dict
(
assigner
=
dict
(
type
=
'PointAssigner'
,
scale
=
4
,
pos_num
=
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
refine
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.4
,
min_pos_iou
=
0
,
ignore_iof_thr
=-
1
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
nms_pre
=
1000
,
min_bbox_size
=
0
,
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_thr
=
0.5
),
max_per_img
=
100
)
# dataset settings
dataset_type
=
'CocoDataset'
data_root
=
'data/coco/'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1333
,
800
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1333
,
800
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
imgs_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_train2017.json'
,
img_prefix
=
data_root
+
'train2017/'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'annotations/instances_val2017.json'
,
img_prefix
=
data_root
+
'val2017/'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
step
=
[
8
,
11
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs
=
12
device_ids
=
range
(
8
)
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/reppoints_partial_minmax_r50_fpn_1x'
load_from
=
None
resume_from
=
None
auto_resume
=
True
workflow
=
[(
'train'
,
1
)]
mmdet/models/anchor_heads/__init__.py
View file @
8885a81b
...
@@ -3,11 +3,13 @@ from .fcos_head import FCOSHead
...
@@ -3,11 +3,13 @@ from .fcos_head import FCOSHead
from
.ga_retina_head
import
GARetinaHead
from
.ga_retina_head
import
GARetinaHead
from
.ga_rpn_head
import
GARPNHead
from
.ga_rpn_head
import
GARPNHead
from
.guided_anchor_head
import
FeatureAdaption
,
GuidedAnchorHead
from
.guided_anchor_head
import
FeatureAdaption
,
GuidedAnchorHead
from
.reppoints_head
import
RepPointsHead
from
.retina_head
import
RetinaHead
from
.retina_head
import
RetinaHead
from
.rpn_head
import
RPNHead
from
.rpn_head
import
RPNHead
from
.ssd_head
import
SSDHead
from
.ssd_head
import
SSDHead
__all__
=
[
__all__
=
[
'AnchorHead'
,
'GuidedAnchorHead'
,
'FeatureAdaption'
,
'RPNHead'
,
'AnchorHead'
,
'GuidedAnchorHead'
,
'FeatureAdaption'
,
'RPNHead'
,
'GARPNHead'
,
'RetinaHead'
,
'GARetinaHead'
,
'SSDHead'
,
'FCOSHead'
'GARPNHead'
,
'RetinaHead'
,
'GARetinaHead'
,
'SSDHead'
,
'FCOSHead'
,
'RepPointsHead'
]
]
mmdet/models/anchor_heads/reppoints_head.py
0 → 100644
View file @
8885a81b
from
__future__
import
division
import
numpy
as
np
import
torch
import
torch.nn
as
nn
from
mmcv.cnn
import
normal_init
from
mmdet.core
import
(
PointGenerator
,
multi_apply
,
multiclass_nms
,
point_target
)
from
mmdet.ops
import
DeformConv
from
..builder
import
build_loss
from
..registry
import
HEADS
from
..utils
import
ConvModule
,
bias_init_with_prob
@
HEADS
.
register_module
class
RepPointsHead
(
nn
.
Module
):
"""RepPoint head.
Args:
in_channels (int): Number of channels in the input feature map.
feat_channels (int): Number of channels of the feature map.
point_feat_channels (int): Number of channels of points features.
stacked_convs (int): How many conv layers are used.
gradient_mul (float): The multiplier to gradients from
points refinement and recognition.
point_strides (Iterable): points strides.
point_base_scale (int): bbox scale for assigning labels.
loss_cls (dict): Config of classification loss.
loss_bbox_init (dict): Config of initial points loss.
loss_bbox_refine (dict): Config of points loss in refinement.
use_grid_points (bool): If we use bounding box representation, the
reppoints is represented as grid points on the bounding box.
center_init (bool): Whether to use center point assignment.
transform_method (str): The methods to transform RepPoints to bbox.
"""
# noqa: W605
def
__init__
(
self
,
num_classes
,
in_channels
,
feat_channels
=
256
,
point_feat_channels
=
256
,
stacked_convs
=
3
,
num_points
=
9
,
gradient_mul
=
0.1
,
point_strides
=
[
8
,
16
,
32
,
64
,
128
],
point_base_scale
=
4
,
conv_cfg
=
None
,
norm_cfg
=
None
,
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox_init
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
0.5
),
loss_bbox_refine
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
use_grid_points
=
False
,
center_init
=
True
,
transform_method
=
'moment'
,
moment_mul
=
0.01
):
super
(
RepPointsHead
,
self
).
__init__
()
self
.
in_channels
=
in_channels
self
.
num_classes
=
num_classes
self
.
feat_channels
=
feat_channels
self
.
point_feat_channels
=
point_feat_channels
self
.
stacked_convs
=
stacked_convs
self
.
num_points
=
num_points
self
.
gradient_mul
=
gradient_mul
self
.
point_base_scale
=
point_base_scale
self
.
point_strides
=
point_strides
self
.
conv_cfg
=
conv_cfg
self
.
norm_cfg
=
norm_cfg
self
.
use_sigmoid_cls
=
loss_cls
.
get
(
'use_sigmoid'
,
False
)
self
.
sampling
=
loss_cls
[
'type'
]
not
in
[
'FocalLoss'
]
self
.
loss_cls
=
build_loss
(
loss_cls
)
self
.
loss_bbox_init
=
build_loss
(
loss_bbox_init
)
self
.
loss_bbox_refine
=
build_loss
(
loss_bbox_refine
)
self
.
use_grid_points
=
use_grid_points
self
.
center_init
=
center_init
self
.
transform_method
=
transform_method
if
self
.
transform_method
==
'moment'
:
self
.
moment_transfer
=
nn
.
Parameter
(
data
=
torch
.
zeros
(
2
),
requires_grad
=
True
)
self
.
moment_mul
=
moment_mul
if
self
.
use_sigmoid_cls
:
self
.
cls_out_channels
=
self
.
num_classes
-
1
else
:
self
.
cls_out_channels
=
self
.
num_classes
self
.
point_generators
=
[
PointGenerator
()
for
_
in
self
.
point_strides
]
# we use deformable conv to extract points features
self
.
dcn_kernel
=
int
(
np
.
sqrt
(
num_points
))
self
.
dcn_pad
=
int
((
self
.
dcn_kernel
-
1
)
/
2
)
assert
self
.
dcn_kernel
*
self
.
dcn_kernel
==
num_points
,
\
"The points number should be a square number."
assert
self
.
dcn_kernel
%
2
==
1
,
\
"The points number should be an odd square number."
dcn_base
=
np
.
arange
(
-
self
.
dcn_pad
,
self
.
dcn_pad
+
1
).
astype
(
np
.
float64
)
dcn_base_y
=
np
.
repeat
(
dcn_base
,
self
.
dcn_kernel
)
dcn_base_x
=
np
.
tile
(
dcn_base
,
self
.
dcn_kernel
)
dcn_base_offset
=
np
.
stack
([
dcn_base_y
,
dcn_base_x
],
axis
=
1
).
reshape
(
(
-
1
))
self
.
dcn_base_offset
=
torch
.
tensor
(
dcn_base_offset
).
view
(
1
,
-
1
,
1
,
1
)
self
.
_init_layers
()
def
_init_layers
(
self
):
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
cls_convs
=
nn
.
ModuleList
()
self
.
reg_convs
=
nn
.
ModuleList
()
for
i
in
range
(
self
.
stacked_convs
):
chn
=
self
.
in_channels
if
i
==
0
else
self
.
feat_channels
self
.
cls_convs
.
append
(
ConvModule
(
chn
,
self
.
feat_channels
,
3
,
stride
=
1
,
padding
=
1
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
))
self
.
reg_convs
.
append
(
ConvModule
(
chn
,
self
.
feat_channels
,
3
,
stride
=
1
,
padding
=
1
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
))
pts_out_dim
=
4
if
self
.
use_grid_points
else
2
*
self
.
num_points
self
.
reppoints_cls_conv
=
DeformConv
(
self
.
feat_channels
,
self
.
point_feat_channels
,
self
.
dcn_kernel
,
1
,
self
.
dcn_pad
)
self
.
reppoints_cls_out
=
nn
.
Conv2d
(
self
.
point_feat_channels
,
self
.
cls_out_channels
,
1
,
1
,
0
)
self
.
reppoints_pts_init_conv
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
point_feat_channels
,
3
,
1
,
1
)
self
.
reppoints_pts_init_out
=
nn
.
Conv2d
(
self
.
point_feat_channels
,
pts_out_dim
,
1
,
1
,
0
)
self
.
reppoints_pts_refine_conv
=
DeformConv
(
self
.
feat_channels
,
self
.
point_feat_channels
,
self
.
dcn_kernel
,
1
,
self
.
dcn_pad
)
self
.
reppoints_pts_refine_out
=
nn
.
Conv2d
(
self
.
point_feat_channels
,
pts_out_dim
,
1
,
1
,
0
)
def
init_weights
(
self
):
for
m
in
self
.
cls_convs
:
normal_init
(
m
.
conv
,
std
=
0.01
)
for
m
in
self
.
reg_convs
:
normal_init
(
m
.
conv
,
std
=
0.01
)
bias_cls
=
bias_init_with_prob
(
0.01
)
normal_init
(
self
.
reppoints_cls_conv
,
std
=
0.01
)
normal_init
(
self
.
reppoints_cls_out
,
std
=
0.01
,
bias
=
bias_cls
)
normal_init
(
self
.
reppoints_pts_init_conv
,
std
=
0.01
)
normal_init
(
self
.
reppoints_pts_init_out
,
std
=
0.01
)
normal_init
(
self
.
reppoints_pts_refine_conv
,
std
=
0.01
)
normal_init
(
self
.
reppoints_pts_refine_out
,
std
=
0.01
)
def
points2bbox
(
self
,
pts
,
y_first
=
True
):
"""
Converting the points set into bounding box.
:param pts: the input points sets (fields), each points
set (fields) is represented as 2n scalar.
:param y_first: if y_fisrt=True, the point set is represented as
[y1, x1, y2, x2 ... yn, xn], otherwise the point set is
represented as [x1, y1, x2, y2 ... xn, yn].
:return: each points set is converting to a bbox [x1, y1, x2, y2].
"""
pts_reshape
=
pts
.
view
(
pts
.
shape
[
0
],
-
1
,
2
,
*
pts
.
shape
[
2
:])
pts_y
=
pts_reshape
[:,
:,
0
,
...]
if
y_first
else
pts_reshape
[:,
:,
1
,
...]
pts_x
=
pts_reshape
[:,
:,
1
,
...]
if
y_first
else
pts_reshape
[:,
:,
0
,
...]
if
self
.
transform_method
==
'minmax'
:
bbox_left
=
pts_x
.
min
(
dim
=
1
,
keepdim
=
True
)[
0
]
bbox_right
=
pts_x
.
max
(
dim
=
1
,
keepdim
=
True
)[
0
]
bbox_up
=
pts_y
.
min
(
dim
=
1
,
keepdim
=
True
)[
0
]
bbox_bottom
=
pts_y
.
max
(
dim
=
1
,
keepdim
=
True
)[
0
]
bbox
=
torch
.
cat
([
bbox_left
,
bbox_up
,
bbox_right
,
bbox_bottom
],
dim
=
1
)
elif
self
.
transform_method
==
'partial_minmax'
:
pts_y
=
pts_y
[:,
:
4
,
...]
pts_x
=
pts_x
[:,
:
4
,
...]
bbox_left
=
pts_x
.
min
(
dim
=
1
,
keepdim
=
True
)[
0
]
bbox_right
=
pts_x
.
max
(
dim
=
1
,
keepdim
=
True
)[
0
]
bbox_up
=
pts_y
.
min
(
dim
=
1
,
keepdim
=
True
)[
0
]
bbox_bottom
=
pts_y
.
max
(
dim
=
1
,
keepdim
=
True
)[
0
]
bbox
=
torch
.
cat
([
bbox_left
,
bbox_up
,
bbox_right
,
bbox_bottom
],
dim
=
1
)
elif
self
.
transform_method
==
'moment'
:
pts_y_mean
=
pts_y
.
mean
(
dim
=
1
,
keepdim
=
True
)
pts_x_mean
=
pts_x
.
mean
(
dim
=
1
,
keepdim
=
True
)
pts_y_std
=
torch
.
std
(
pts_y
-
pts_y_mean
,
dim
=
1
,
keepdim
=
True
)
pts_x_std
=
torch
.
std
(
pts_x
-
pts_x_mean
,
dim
=
1
,
keepdim
=
True
)
moment_transfer
=
(
self
.
moment_transfer
*
self
.
moment_mul
)
+
(
self
.
moment_transfer
.
detach
()
*
(
1
-
self
.
moment_mul
))
moment_width_transfer
=
moment_transfer
[
0
]
moment_height_transfer
=
moment_transfer
[
1
]
half_width
=
pts_x_std
*
torch
.
exp
(
moment_width_transfer
)
half_height
=
pts_y_std
*
torch
.
exp
(
moment_height_transfer
)
bbox
=
torch
.
cat
([
pts_x_mean
-
half_width
,
pts_y_mean
-
half_height
,
pts_x_mean
+
half_width
,
pts_y_mean
+
half_height
],
dim
=
1
)
else
:
raise
NotImplementedError
return
bbox
def
gen_grid_from_reg
(
self
,
reg
,
previous_boxes
):
"""
Base on the previous bboxes and regression values, we compute the
regressed bboxes and generate the grids on the bboxes.
:param reg: the regression value to previous bboxes.
:param previous_boxes: previous bboxes.
:return: generate grids on the regressed bboxes.
"""
b
,
_
,
h
,
w
=
reg
.
shape
bxy
=
(
previous_boxes
[:,
:
2
,
...]
+
previous_boxes
[:,
2
:,
...])
/
2.
bwh
=
(
previous_boxes
[:,
2
:,
...]
-
previous_boxes
[:,
:
2
,
...]).
clamp
(
min
=
1e-6
)
grid_topleft
=
bxy
+
bwh
*
reg
[:,
:
2
,
...]
-
0.5
*
bwh
*
torch
.
exp
(
reg
[:,
2
:,
...])
grid_wh
=
bwh
*
torch
.
exp
(
reg
[:,
2
:,
...])
grid_left
=
grid_topleft
[:,
[
0
],
...]
grid_top
=
grid_topleft
[:,
[
1
],
...]
grid_width
=
grid_wh
[:,
[
0
],
...]
grid_height
=
grid_wh
[:,
[
1
],
...]
intervel
=
torch
.
linspace
(
0.
,
1.
,
self
.
dcn_kernel
).
view
(
1
,
self
.
dcn_kernel
,
1
,
1
).
type_as
(
reg
)
grid_x
=
grid_left
+
grid_width
*
intervel
grid_x
=
grid_x
.
unsqueeze
(
1
).
repeat
(
1
,
self
.
dcn_kernel
,
1
,
1
,
1
)
grid_x
=
grid_x
.
view
(
b
,
-
1
,
h
,
w
)
grid_y
=
grid_top
+
grid_height
*
intervel
grid_y
=
grid_y
.
unsqueeze
(
2
).
repeat
(
1
,
1
,
self
.
dcn_kernel
,
1
,
1
)
grid_y
=
grid_y
.
view
(
b
,
-
1
,
h
,
w
)
grid_yx
=
torch
.
stack
([
grid_y
,
grid_x
],
dim
=
2
)
grid_yx
=
grid_yx
.
view
(
b
,
-
1
,
h
,
w
)
regressed_bbox
=
torch
.
cat
([
grid_left
,
grid_top
,
grid_left
+
grid_width
,
grid_top
+
grid_height
],
1
)
return
grid_yx
,
regressed_bbox
def
forward_single
(
self
,
x
):
dcn_base_offset
=
self
.
dcn_base_offset
.
type_as
(
x
)
# If we use center_init, the initial reppoints is from center points.
# If we use bounding bbox representation, the initial reppoints is
# from regular grid placed on a pre-defined bbox.
if
self
.
use_grid_points
or
not
self
.
center_init
:
scale
=
self
.
point_base_scale
/
2
points_init
=
dcn_base_offset
/
dcn_base_offset
.
max
()
*
scale
bbox_init
=
x
.
new_tensor
([
-
scale
,
-
scale
,
scale
,
scale
]).
view
(
1
,
4
,
1
,
1
)
else
:
points_init
=
0
cls_feat
=
x
pts_feat
=
x
for
cls_conv
in
self
.
cls_convs
:
cls_feat
=
cls_conv
(
cls_feat
)
for
reg_conv
in
self
.
reg_convs
:
pts_feat
=
reg_conv
(
pts_feat
)
# initialize reppoints
pts_out_init
=
self
.
reppoints_pts_init_out
(
self
.
relu
(
self
.
reppoints_pts_init_conv
(
pts_feat
)))
if
self
.
use_grid_points
:
pts_out_init
,
bbox_out_init
=
self
.
gen_grid_from_reg
(
pts_out_init
,
bbox_init
.
detach
())
else
:
pts_out_init
=
pts_out_init
+
points_init
# refine and classify reppoints
pts_out_init_grad_mul
=
(
1
-
self
.
gradient_mul
)
*
pts_out_init
.
detach
(
)
+
self
.
gradient_mul
*
pts_out_init
dcn_offset
=
pts_out_init_grad_mul
-
dcn_base_offset
cls_out
=
self
.
reppoints_cls_out
(
self
.
relu
(
self
.
reppoints_cls_conv
(
cls_feat
,
dcn_offset
)))
pts_out_refine
=
self
.
reppoints_pts_refine_out
(
self
.
relu
(
self
.
reppoints_pts_refine_conv
(
pts_feat
,
dcn_offset
)))
if
self
.
use_grid_points
:
pts_out_refine
,
bbox_out_refine
=
self
.
gen_grid_from_reg
(
pts_out_refine
,
bbox_out_init
.
detach
())
else
:
pts_out_refine
=
pts_out_refine
+
pts_out_init
.
detach
()
return
cls_out
,
pts_out_init
,
pts_out_refine
def
forward
(
self
,
feats
):
return
multi_apply
(
self
.
forward_single
,
feats
)
def
get_points
(
self
,
featmap_sizes
,
img_metas
):
"""Get points according to feature map sizes.
Args:
featmap_sizes (list[tuple]): Multi-level feature map sizes.
img_metas (list[dict]): Image meta info.
Returns:
tuple: points of each image, valid flags of each image
"""
num_imgs
=
len
(
img_metas
)
num_levels
=
len
(
featmap_sizes
)
# since feature map sizes of all images are the same, we only compute
# points center for one time
multi_level_points
=
[]
for
i
in
range
(
num_levels
):
points
=
self
.
point_generators
[
i
].
grid_points
(
featmap_sizes
[
i
],
self
.
point_strides
[
i
])
multi_level_points
.
append
(
points
)
points_list
=
[[
point
.
clone
()
for
point
in
multi_level_points
]
for
_
in
range
(
num_imgs
)]
# for each image, we compute valid flags of multi level grids
valid_flag_list
=
[]
for
img_id
,
img_meta
in
enumerate
(
img_metas
):
multi_level_flags
=
[]
for
i
in
range
(
num_levels
):
point_stride
=
self
.
point_strides
[
i
]
feat_h
,
feat_w
=
featmap_sizes
[
i
]
h
,
w
,
_
=
img_meta
[
'pad_shape'
]
valid_feat_h
=
min
(
int
(
np
.
ceil
(
h
/
point_stride
)),
feat_h
)
valid_feat_w
=
min
(
int
(
np
.
ceil
(
w
/
point_stride
)),
feat_w
)
flags
=
self
.
point_generators
[
i
].
valid_flags
(
(
feat_h
,
feat_w
),
(
valid_feat_h
,
valid_feat_w
))
multi_level_flags
.
append
(
flags
)
valid_flag_list
.
append
(
multi_level_flags
)
return
points_list
,
valid_flag_list
def
centers_to_bboxes
(
self
,
point_list
):
"""Get bboxes according to center points. Only used in MaxIOUAssigner.
"""
bbox_list
=
[]
for
i_img
,
point
in
enumerate
(
point_list
):
bbox
=
[]
for
i_lvl
in
range
(
len
(
self
.
point_strides
)):
scale
=
self
.
point_base_scale
*
self
.
point_strides
[
i_lvl
]
*
0.5
bbox_shift
=
torch
.
Tensor
([
-
scale
,
-
scale
,
scale
,
scale
]).
view
(
1
,
4
).
type_as
(
point
[
0
])
bbox_center
=
torch
.
cat
(
[
point
[
i_lvl
][:,
:
2
],
point
[
i_lvl
][:,
:
2
]],
dim
=
1
)
bbox
.
append
(
bbox_center
+
bbox_shift
)
bbox_list
.
append
(
bbox
)
return
bbox_list
def
offset_to_pts
(
self
,
center_list
,
pred_list
):
"""Change from point offset to point coordinate.
"""
pts_list
=
[]
for
i_lvl
in
range
(
len
(
self
.
point_strides
)):
pts_lvl
=
[]
for
i_img
in
range
(
len
(
center_list
)):
pts_center
=
center_list
[
i_img
][
i_lvl
][:,
:
2
].
repeat
(
1
,
self
.
num_points
)
pts_shift
=
pred_list
[
i_lvl
][
i_img
]
yx_pts_shift
=
pts_shift
.
permute
(
1
,
2
,
0
).
view
(
-
1
,
2
*
self
.
num_points
)
y_pts_shift
=
yx_pts_shift
[...,
0
::
2
]
x_pts_shift
=
yx_pts_shift
[...,
1
::
2
]
xy_pts_shift
=
torch
.
stack
([
x_pts_shift
,
y_pts_shift
],
-
1
)
xy_pts_shift
=
xy_pts_shift
.
view
(
*
yx_pts_shift
.
shape
[:
-
1
],
-
1
)
pts
=
xy_pts_shift
*
self
.
point_strides
[
i_lvl
]
+
pts_center
pts_lvl
.
append
(
pts
)
pts_lvl
=
torch
.
stack
(
pts_lvl
,
0
)
pts_list
.
append
(
pts_lvl
)
return
pts_list
def
loss_single
(
self
,
cls_score
,
pts_pred_init
,
pts_pred_refine
,
labels
,
label_weights
,
bbox_gt_init
,
bbox_weights_init
,
bbox_gt_refine
,
bbox_weights_refine
,
stride
,
num_total_samples_init
,
num_total_samples_refine
):
# classification loss
labels
=
labels
.
reshape
(
-
1
)
label_weights
=
label_weights
.
reshape
(
-
1
)
cls_score
=
cls_score
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
self
.
cls_out_channels
)
loss_cls
=
self
.
loss_cls
(
cls_score
,
labels
,
label_weights
,
avg_factor
=
num_total_samples_refine
)
# points loss
bbox_gt_init
=
bbox_gt_init
.
reshape
(
-
1
,
4
)
bbox_weights_init
=
bbox_weights_init
.
reshape
(
-
1
,
4
)
bbox_pred_init
=
self
.
points2bbox
(
pts_pred_init
.
reshape
(
-
1
,
2
*
self
.
num_points
),
y_first
=
False
)
bbox_gt_refine
=
bbox_gt_refine
.
reshape
(
-
1
,
4
)
bbox_weights_refine
=
bbox_weights_refine
.
reshape
(
-
1
,
4
)
bbox_pred_refine
=
self
.
points2bbox
(
pts_pred_refine
.
reshape
(
-
1
,
2
*
self
.
num_points
),
y_first
=
False
)
normalize_term
=
self
.
point_base_scale
*
stride
loss_pts_init
=
self
.
loss_bbox_init
(
bbox_pred_init
/
normalize_term
,
bbox_gt_init
/
normalize_term
,
bbox_weights_init
,
avg_factor
=
num_total_samples_init
)
loss_pts_refine
=
self
.
loss_bbox_refine
(
bbox_pred_refine
/
normalize_term
,
bbox_gt_refine
/
normalize_term
,
bbox_weights_refine
,
avg_factor
=
num_total_samples_refine
)
return
loss_cls
,
loss_pts_init
,
loss_pts_refine
def
loss
(
self
,
cls_scores
,
pts_preds_init
,
pts_preds_refine
,
gt_bboxes
,
gt_labels
,
img_metas
,
cfg
,
gt_bboxes_ignore
=
None
):
featmap_sizes
=
[
featmap
.
size
()[
-
2
:]
for
featmap
in
cls_scores
]
assert
len
(
featmap_sizes
)
==
len
(
self
.
point_generators
)
label_channels
=
self
.
cls_out_channels
if
self
.
use_sigmoid_cls
else
1
# target for initial stage
center_list
,
valid_flag_list
=
self
.
get_points
(
featmap_sizes
,
img_metas
)
pts_coordinate_preds_init
=
self
.
offset_to_pts
(
center_list
,
pts_preds_init
)
if
cfg
.
init
.
assigner
[
'type'
]
==
'PointAssigner'
:
# Assign target for center list
candidate_list
=
center_list
else
:
# transform center list to bbox list and
# assign target for bbox list
bbox_list
=
self
.
centers_to_bboxes
(
center_list
)
candidate_list
=
bbox_list
cls_reg_targets_init
=
point_target
(
candidate_list
,
valid_flag_list
,
gt_bboxes
,
img_metas
,
cfg
.
init
,
gt_bboxes_ignore_list
=
gt_bboxes_ignore
,
gt_labels_list
=
gt_labels
,
label_channels
=
label_channels
,
sampling
=
self
.
sampling
)
(
*
_
,
bbox_gt_list_init
,
candidate_list_init
,
bbox_weights_list_init
,
num_total_pos_init
,
num_total_neg_init
)
=
cls_reg_targets_init
num_total_samples_init
=
(
num_total_pos_init
+
num_total_neg_init
if
self
.
sampling
else
num_total_pos_init
)
# target for refinement stage
center_list
,
valid_flag_list
=
self
.
get_points
(
featmap_sizes
,
img_metas
)
pts_coordinate_preds_refine
=
self
.
offset_to_pts
(
center_list
,
pts_preds_refine
)
bbox_list
=
[]
for
i_img
,
center
in
enumerate
(
center_list
):
bbox
=
[]
for
i_lvl
in
range
(
len
(
pts_preds_refine
)):
bbox_preds_init
=
self
.
points2bbox
(
pts_preds_init
[
i_lvl
].
detach
())
bbox_shift
=
bbox_preds_init
*
self
.
point_strides
[
i_lvl
]
bbox_center
=
torch
.
cat
(
[
center
[
i_lvl
][:,
:
2
],
center
[
i_lvl
][:,
:
2
]],
dim
=
1
)
bbox
.
append
(
bbox_center
+
bbox_shift
[
i_img
].
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
4
))
bbox_list
.
append
(
bbox
)
cls_reg_targets_refine
=
point_target
(
bbox_list
,
valid_flag_list
,
gt_bboxes
,
img_metas
,
cfg
.
refine
,
gt_bboxes_ignore_list
=
gt_bboxes_ignore
,
gt_labels_list
=
gt_labels
,
label_channels
=
label_channels
,
sampling
=
self
.
sampling
)
(
labels_list
,
label_weights_list
,
bbox_gt_list_refine
,
candidate_list_refine
,
bbox_weights_list_refine
,
num_total_pos_refine
,
num_total_neg_refine
)
=
cls_reg_targets_refine
num_total_samples_refine
=
(
num_total_pos_refine
+
num_total_neg_refine
if
self
.
sampling
else
num_total_pos_refine
)
# compute loss
losses_cls
,
losses_pts_init
,
losses_pts_refine
=
multi_apply
(
self
.
loss_single
,
cls_scores
,
pts_coordinate_preds_init
,
pts_coordinate_preds_refine
,
labels_list
,
label_weights_list
,
bbox_gt_list_init
,
bbox_weights_list_init
,
bbox_gt_list_refine
,
bbox_weights_list_refine
,
self
.
point_strides
,
num_total_samples_init
=
num_total_samples_init
,
num_total_samples_refine
=
num_total_samples_refine
)
loss_dict_all
=
{
'loss_cls'
:
losses_cls
,
'loss_pts_init'
:
losses_pts_init
,
'loss_pts_refine'
:
losses_pts_refine
}
return
loss_dict_all
def
get_bboxes
(
self
,
cls_scores
,
pts_preds_init
,
pts_preds_refine
,
img_metas
,
cfg
,
rescale
=
False
,
nms
=
True
):
assert
len
(
cls_scores
)
==
len
(
pts_preds_refine
)
bbox_preds_refine
=
[
self
.
points2bbox
(
pts_pred_refine
)
for
pts_pred_refine
in
pts_preds_refine
]
num_levels
=
len
(
cls_scores
)
mlvl_points
=
[
self
.
point_generators
[
i
].
grid_points
(
cls_scores
[
i
].
size
()[
-
2
:],
self
.
point_strides
[
i
])
for
i
in
range
(
num_levels
)
]
result_list
=
[]
for
img_id
in
range
(
len
(
img_metas
)):
cls_score_list
=
[
cls_scores
[
i
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
bbox_pred_list
=
[
bbox_preds_refine
[
i
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
img_shape
=
img_metas
[
img_id
][
'img_shape'
]
scale_factor
=
img_metas
[
img_id
][
'scale_factor'
]
proposals
=
self
.
get_bboxes_single
(
cls_score_list
,
bbox_pred_list
,
mlvl_points
,
img_shape
,
scale_factor
,
cfg
,
rescale
,
nms
)
result_list
.
append
(
proposals
)
return
result_list
def
get_bboxes_single
(
self
,
cls_scores
,
bbox_preds
,
mlvl_points
,
img_shape
,
scale_factor
,
cfg
,
rescale
=
False
,
nms
=
True
):
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
==
len
(
mlvl_points
)
mlvl_bboxes
=
[]
mlvl_scores
=
[]
for
i_lvl
,
(
cls_score
,
bbox_pred
,
points
)
in
enumerate
(
zip
(
cls_scores
,
bbox_preds
,
mlvl_points
)):
assert
cls_score
.
size
()[
-
2
:]
==
bbox_pred
.
size
()[
-
2
:]
cls_score
=
cls_score
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
self
.
cls_out_channels
)
if
self
.
use_sigmoid_cls
:
scores
=
cls_score
.
sigmoid
()
else
:
scores
=
cls_score
.
softmax
(
-
1
)
bbox_pred
=
bbox_pred
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
4
)
nms_pre
=
cfg
.
get
(
'nms_pre'
,
-
1
)
if
nms_pre
>
0
and
scores
.
shape
[
0
]
>
nms_pre
:
if
self
.
use_sigmoid_cls
:
max_scores
,
_
=
scores
.
max
(
dim
=
1
)
else
:
max_scores
,
_
=
scores
[:,
1
:].
max
(
dim
=
1
)
_
,
topk_inds
=
max_scores
.
topk
(
nms_pre
)
points
=
points
[
topk_inds
,
:]
bbox_pred
=
bbox_pred
[
topk_inds
,
:]
scores
=
scores
[
topk_inds
,
:]
bbox_pos_center
=
torch
.
cat
([
points
[:,
:
2
],
points
[:,
:
2
]],
dim
=
1
)
bboxes
=
bbox_pred
*
self
.
point_strides
[
i_lvl
]
+
bbox_pos_center
x1
=
bboxes
[:,
0
].
clamp
(
min
=
0
,
max
=
img_shape
[
1
])
y1
=
bboxes
[:,
1
].
clamp
(
min
=
0
,
max
=
img_shape
[
0
])
x2
=
bboxes
[:,
2
].
clamp
(
min
=
0
,
max
=
img_shape
[
1
])
y2
=
bboxes
[:,
3
].
clamp
(
min
=
0
,
max
=
img_shape
[
0
])
bboxes
=
torch
.
stack
([
x1
,
y1
,
x2
,
y2
],
dim
=-
1
)
mlvl_bboxes
.
append
(
bboxes
)
mlvl_scores
.
append
(
scores
)
mlvl_bboxes
=
torch
.
cat
(
mlvl_bboxes
)
if
rescale
:
mlvl_bboxes
/=
mlvl_bboxes
.
new_tensor
(
scale_factor
)
mlvl_scores
=
torch
.
cat
(
mlvl_scores
)
if
self
.
use_sigmoid_cls
:
padding
=
mlvl_scores
.
new_zeros
(
mlvl_scores
.
shape
[
0
],
1
)
mlvl_scores
=
torch
.
cat
([
padding
,
mlvl_scores
],
dim
=
1
)
if
nms
:
det_bboxes
,
det_labels
=
multiclass_nms
(
mlvl_bboxes
,
mlvl_scores
,
cfg
.
score_thr
,
cfg
.
nms
,
cfg
.
max_per_img
)
return
det_bboxes
,
det_labels
else
:
return
mlvl_bboxes
,
mlvl_scores
mmdet/models/detectors/__init__.py
View file @
8885a81b
...
@@ -8,6 +8,7 @@ from .grid_rcnn import GridRCNN
...
@@ -8,6 +8,7 @@ from .grid_rcnn import GridRCNN
from
.htc
import
HybridTaskCascade
from
.htc
import
HybridTaskCascade
from
.mask_rcnn
import
MaskRCNN
from
.mask_rcnn
import
MaskRCNN
from
.mask_scoring_rcnn
import
MaskScoringRCNN
from
.mask_scoring_rcnn
import
MaskScoringRCNN
from
.reppoints_detector
import
RepPointsDetector
from
.retinanet
import
RetinaNet
from
.retinanet
import
RetinaNet
from
.rpn
import
RPN
from
.rpn
import
RPN
from
.single_stage
import
SingleStageDetector
from
.single_stage
import
SingleStageDetector
...
@@ -16,5 +17,6 @@ from .two_stage import TwoStageDetector
...
@@ -16,5 +17,6 @@ from .two_stage import TwoStageDetector
__all__
=
[
__all__
=
[
'BaseDetector'
,
'SingleStageDetector'
,
'TwoStageDetector'
,
'RPN'
,
'BaseDetector'
,
'SingleStageDetector'
,
'TwoStageDetector'
,
'RPN'
,
'FastRCNN'
,
'FasterRCNN'
,
'MaskRCNN'
,
'CascadeRCNN'
,
'HybridTaskCascade'
,
'FastRCNN'
,
'FasterRCNN'
,
'MaskRCNN'
,
'CascadeRCNN'
,
'HybridTaskCascade'
,
'DoubleHeadRCNN'
,
'RetinaNet'
,
'FCOS'
,
'GridRCNN'
,
'MaskScoringRCNN'
'DoubleHeadRCNN'
,
'RetinaNet'
,
'FCOS'
,
'GridRCNN'
,
'MaskScoringRCNN'
,
'RepPointsDetector'
]
]
mmdet/models/detectors/reppoints_detector.py
0 → 100644
View file @
8885a81b
import
torch
from
mmdet.core
import
bbox2result
,
bbox_mapping_back
,
multiclass_nms
from
..registry
import
DETECTORS
from
.single_stage
import
SingleStageDetector
@
DETECTORS
.
register_module
class
RepPointsDetector
(
SingleStageDetector
):
"""RepPoints: Point Set Representation for Object Detection.
This detector is the implementation of:
- RepPoints detector (https://arxiv.org/pdf/1904.11490)
"""
def
__init__
(
self
,
backbone
,
neck
,
bbox_head
,
train_cfg
=
None
,
test_cfg
=
None
,
pretrained
=
None
):
super
(
RepPointsDetector
,
self
).
__init__
(
backbone
,
neck
,
bbox_head
,
train_cfg
,
test_cfg
,
pretrained
)
def
merge_aug_results
(
self
,
aug_bboxes
,
aug_scores
,
img_metas
):
"""Merge augmented detection bboxes and scores.
Args:
aug_bboxes (list[Tensor]): shape (n, 4*#class)
aug_scores (list[Tensor] or None): shape (n, #class)
img_shapes (list[Tensor]): shape (3, ).
Returns:
tuple: (bboxes, scores)
"""
recovered_bboxes
=
[]
for
bboxes
,
img_info
in
zip
(
aug_bboxes
,
img_metas
):
img_shape
=
img_info
[
0
][
'img_shape'
]
scale_factor
=
img_info
[
0
][
'scale_factor'
]
flip
=
img_info
[
0
][
'flip'
]
bboxes
=
bbox_mapping_back
(
bboxes
,
img_shape
,
scale_factor
,
flip
)
recovered_bboxes
.
append
(
bboxes
)
bboxes
=
torch
.
cat
(
recovered_bboxes
,
dim
=
0
)
if
aug_scores
is
None
:
return
bboxes
else
:
scores
=
torch
.
cat
(
aug_scores
,
dim
=
0
)
return
bboxes
,
scores
def
aug_test
(
self
,
imgs
,
img_metas
,
rescale
=
False
):
# recompute feats to save memory
feats
=
self
.
extract_feats
(
imgs
)
aug_bboxes
=
[]
aug_scores
=
[]
for
x
,
img_meta
in
zip
(
feats
,
img_metas
):
# only one image in the batch
outs
=
self
.
bbox_head
(
x
)
bbox_inputs
=
outs
+
(
img_meta
,
self
.
test_cfg
,
False
,
False
)
det_bboxes
,
det_scores
=
self
.
bbox_head
.
get_bboxes
(
*
bbox_inputs
)[
0
]
aug_bboxes
.
append
(
det_bboxes
)
aug_scores
.
append
(
det_scores
)
# after merging, bboxes will be rescaled to the original image size
merged_bboxes
,
merged_scores
=
self
.
merge_aug_results
(
aug_bboxes
,
aug_scores
,
img_metas
)
det_bboxes
,
det_labels
=
multiclass_nms
(
merged_bboxes
,
merged_scores
,
self
.
test_cfg
.
score_thr
,
self
.
test_cfg
.
nms
,
self
.
test_cfg
.
max_per_img
)
if
rescale
:
_det_bboxes
=
det_bboxes
else
:
_det_bboxes
=
det_bboxes
.
clone
()
_det_bboxes
[:,
:
4
]
*=
img_metas
[
0
][
0
][
'scale_factor'
]
bbox_results
=
bbox2result
(
_det_bboxes
,
det_labels
,
self
.
bbox_head
.
num_classes
)
return
bbox_results
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment