Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
SOLOv2-pytorch
Commits
441015ea
Commit
441015ea
authored
Feb 06, 2019
by
Kai Chen
Browse files
Merge branch 'master' into pytorch-1.0
parents
2017c81e
3b6ae96d
Changes
76
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1110 additions
and
240 deletions
+1110
-240
mmdet/core/bbox/assigners/max_iou_assigner.py
mmdet/core/bbox/assigners/max_iou_assigner.py
+7
-8
mmdet/core/bbox/geometry.py
mmdet/core/bbox/geometry.py
+1
-1
mmdet/core/evaluation/mean_ap.py
mmdet/core/evaluation/mean_ap.py
+8
-4
mmdet/core/loss/losses.py
mmdet/core/loss/losses.py
+2
-0
mmdet/datasets/__init__.py
mmdet/datasets/__init__.py
+3
-1
mmdet/datasets/coco.py
mmdet/datasets/coco.py
+1
-1
mmdet/datasets/custom.py
mmdet/datasets/custom.py
+21
-2
mmdet/datasets/extra_aug.py
mmdet/datasets/extra_aug.py
+165
-0
mmdet/datasets/transforms.py
mmdet/datasets/transforms.py
+8
-2
mmdet/models/__init__.py
mmdet/models/__init__.py
+12
-7
mmdet/models/anchor_heads/__init__.py
mmdet/models/anchor_heads/__init__.py
+6
-0
mmdet/models/anchor_heads/anchor_head.py
mmdet/models/anchor_heads/anchor_head.py
+121
-128
mmdet/models/anchor_heads/retina_head.py
mmdet/models/anchor_heads/retina_head.py
+70
-0
mmdet/models/anchor_heads/rpn_head.py
mmdet/models/anchor_heads/rpn_head.py
+92
-0
mmdet/models/anchor_heads/ssd_head.py
mmdet/models/anchor_heads/ssd_head.py
+184
-0
mmdet/models/backbones/__init__.py
mmdet/models/backbones/__init__.py
+2
-1
mmdet/models/backbones/resnet.py
mmdet/models/backbones/resnet.py
+189
-62
mmdet/models/backbones/resnext.py
mmdet/models/backbones/resnext.py
+78
-21
mmdet/models/backbones/ssd_vgg.py
mmdet/models/backbones/ssd_vgg.py
+130
-0
mmdet/models/bbox_heads/bbox_head.py
mmdet/models/bbox_heads/bbox_head.py
+10
-2
No files found.
mmdet/core/bbox/assigners/max_iou_assigner.py
View file @
441015ea
...
...
@@ -69,7 +69,7 @@ class MaxIoUAssigner(BaseAssigner):
if
bboxes
.
shape
[
0
]
==
0
or
gt_bboxes
.
shape
[
0
]
==
0
:
raise
ValueError
(
'No gt or bboxes'
)
bboxes
=
bboxes
[:,
:
4
]
overlaps
=
bbox_overlaps
(
bboxes
,
gt_
bboxes
)
overlaps
=
bbox_overlaps
(
gt_
bboxes
,
bboxes
)
if
(
self
.
ignore_iof_thr
>
0
)
and
(
gt_bboxes_ignore
is
not
None
)
and
(
gt_bboxes_ignore
.
numel
()
>
0
):
...
...
@@ -88,8 +88,8 @@ class MaxIoUAssigner(BaseAssigner):
"""Assign w.r.t. the overlaps of bboxes with gts.
Args:
overlaps (Tensor): Overlaps between
n
bboxes and
k gt_
bboxes,
shape(
n
,
k
).
overlaps (Tensor): Overlaps between
k gt_
bboxes and
n
bboxes,
shape(
k
,
n
).
gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).
Returns:
...
...
@@ -98,19 +98,18 @@ class MaxIoUAssigner(BaseAssigner):
if
overlaps
.
numel
()
==
0
:
raise
ValueError
(
'No gt or proposals'
)
num_
bboxe
s
,
num_
gt
s
=
overlaps
.
size
(
0
),
overlaps
.
size
(
1
)
num_
gt
s
,
num_
bboxe
s
=
overlaps
.
size
(
0
),
overlaps
.
size
(
1
)
# 1. assign -1 by default
assigned_gt_inds
=
overlaps
.
new_full
(
(
num_bboxes
,
),
-
1
,
dtype
=
torch
.
long
)
assert
overlaps
.
size
()
==
(
num_bboxes
,
num_gts
)
# for each anchor, which gt best overlaps with it
# for each anchor, the max iou of all gts
max_overlaps
,
argmax_overlaps
=
overlaps
.
max
(
dim
=
1
)
max_overlaps
,
argmax_overlaps
=
overlaps
.
max
(
dim
=
0
)
# for each gt, which anchor best overlaps with it
# for each gt, the max iou of all proposals
gt_max_overlaps
,
gt_argmax_overlaps
=
overlaps
.
max
(
dim
=
0
)
gt_max_overlaps
,
gt_argmax_overlaps
=
overlaps
.
max
(
dim
=
1
)
# 2. assign negative: below
if
isinstance
(
self
.
neg_iou_thr
,
float
):
...
...
@@ -129,7 +128,7 @@ class MaxIoUAssigner(BaseAssigner):
for
i
in
range
(
num_gts
):
if
gt_max_overlaps
[
i
]
>=
self
.
min_pos_iou
:
if
self
.
gt_max_assign_all
:
max_iou_inds
=
overlaps
[
:
,
i
]
==
gt_max_overlaps
[
i
]
max_iou_inds
=
overlaps
[
i
,
:
]
==
gt_max_overlaps
[
i
]
assigned_gt_inds
[
max_iou_inds
]
=
i
+
1
else
:
assigned_gt_inds
[
gt_argmax_overlaps
[
i
]]
=
i
+
1
...
...
mmdet/core/bbox/geometry.py
View file @
441015ea
...
...
@@ -16,7 +16,7 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
foreground).
Returns:
ious(Tensor): shape (
n
,
k
) if is_aligned == False else shape (
n
, 1)
ious(Tensor): shape (
m
,
n
) if is_aligned == False else shape (
m
, 1)
"""
assert
mode
in
[
'iou'
,
'iof'
]
...
...
mmdet/core/evaluation/mean_ap.py
View file @
441015ea
import
mmcv
import
numpy
as
np
from
terminaltables
import
AsciiTable
...
...
@@ -234,8 +235,9 @@ def eval_map(det_results,
gt_ignore (list): gt ignore indicators of each image, a list of K array
scale_ranges (list, optional): [(min1, max1), (min2, max2), ...]
iou_thr (float): IoU threshold
dataset (None or str): dataset name, there are minor differences in
metrics for different datsets, e.g. "voc07", "imagenet_det", etc.
dataset (None or str or list): dataset name or dataset classes, there
are minor differences in metrics for different datsets, e.g.
"voc07", "imagenet_det", etc.
print_summary (bool): whether to print the mAP summary
Returns:
...
...
@@ -333,7 +335,7 @@ def print_map_summary(mean_ap, results, dataset=None):
Args:
mean_ap(float): calculated from `eval_map`
results(list): calculated from `eval_map`
dataset(None or str or list): dataset name.
dataset(None or str or list): dataset name
or dataset classes
.
"""
num_scales
=
len
(
results
[
0
][
'ap'
])
if
isinstance
(
results
[
0
][
'ap'
],
np
.
ndarray
)
else
1
...
...
@@ -353,8 +355,10 @@ def print_map_summary(mean_ap, results, dataset=None):
if
dataset
is
None
:
label_names
=
[
str
(
i
)
for
i
in
range
(
1
,
num_classes
+
1
)]
el
se
:
el
if
mmcv
.
is_str
(
dataset
)
:
label_names
=
get_classes
(
dataset
)
else
:
label_names
=
dataset
if
not
isinstance
(
mean_ap
,
list
):
mean_ap
=
[
mean_ap
]
...
...
mmdet/core/loss/losses.py
View file @
441015ea
...
...
@@ -100,6 +100,8 @@ def accuracy(pred, target, topk=1):
if
isinstance
(
topk
,
int
):
topk
=
(
topk
,
)
return_single
=
True
else
:
return_single
=
False
maxk
=
max
(
topk
)
_
,
pred_label
=
pred
.
topk
(
maxk
,
1
,
True
,
True
)
...
...
mmdet/datasets/__init__.py
View file @
441015ea
...
...
@@ -6,9 +6,11 @@ from .loader import GroupSampler, DistributedGroupSampler, build_dataloader
from
.utils
import
to_tensor
,
random_scale
,
show_ann
,
get_dataset
from
.concat_dataset
import
ConcatDataset
from
.repeat_dataset
import
RepeatDataset
from
.extra_aug
import
ExtraAugmentation
__all__
=
[
'CustomDataset'
,
'XMLDataset'
,
'CocoDataset'
,
'VOCDataset'
,
'GroupSampler'
,
'DistributedGroupSampler'
,
'build_dataloader'
,
'to_tensor'
,
'random_scale'
,
'show_ann'
,
'get_dataset'
,
'ConcatDataset'
,
'RepeatDataset'
'show_ann'
,
'get_dataset'
,
'ConcatDataset'
,
'RepeatDataset'
,
'ExtraAugmentation'
]
mmdet/datasets/coco.py
View file @
441015ea
...
...
@@ -40,7 +40,7 @@ class CocoDataset(CustomDataset):
img_id
=
self
.
img_infos
[
idx
][
'id'
]
ann_ids
=
self
.
coco
.
getAnnIds
(
imgIds
=
[
img_id
])
ann_info
=
self
.
coco
.
loadAnns
(
ann_ids
)
return
self
.
_parse_ann_info
(
ann_info
)
return
self
.
_parse_ann_info
(
ann_info
,
self
.
with_mask
)
def
_filter_imgs
(
self
,
min_size
=
32
):
"""Filter images too small or without ground truths."""
...
...
mmdet/datasets/custom.py
View file @
441015ea
...
...
@@ -8,6 +8,7 @@ from torch.utils.data import Dataset
from
.transforms
import
(
ImageTransform
,
BboxTransform
,
MaskTransform
,
Numpy2Tensor
)
from
.utils
import
to_tensor
,
random_scale
from
.extra_aug
import
ExtraAugmentation
class
CustomDataset
(
Dataset
):
...
...
@@ -46,9 +47,12 @@ class CustomDataset(Dataset):
with_mask
=
True
,
with_crowd
=
True
,
with_label
=
True
,
extra_aug
=
None
,
resize_keep_ratio
=
True
,
test_mode
=
False
):
# prefix of images path
self
.
img_prefix
=
img_prefix
# load annotations (and proposals)
self
.
img_infos
=
self
.
load_annotations
(
ann_file
)
if
proposal_file
is
not
None
:
...
...
@@ -98,6 +102,15 @@ class CustomDataset(Dataset):
self
.
mask_transform
=
MaskTransform
()
self
.
numpy2tensor
=
Numpy2Tensor
()
# if use extra augmentation
if
extra_aug
is
not
None
:
self
.
extra_aug
=
ExtraAugmentation
(
**
extra_aug
)
else
:
self
.
extra_aug
=
None
# image rescale if keep ratio
self
.
resize_keep_ratio
=
resize_keep_ratio
def
__len__
(
self
):
return
len
(
self
.
img_infos
)
...
...
@@ -176,11 +189,17 @@ class CustomDataset(Dataset):
if
len
(
gt_bboxes
)
==
0
:
return
None
# extra augmentation
if
self
.
extra_aug
is
not
None
:
img
,
gt_bboxes
,
gt_labels
=
self
.
extra_aug
(
img
,
gt_bboxes
,
gt_labels
)
# apply transforms
flip
=
True
if
np
.
random
.
rand
()
<
self
.
flip_ratio
else
False
img_scale
=
random_scale
(
self
.
img_scales
)
# sample a scale
img
,
img_shape
,
pad_shape
,
scale_factor
=
self
.
img_transform
(
img
,
img_scale
,
flip
)
img
,
img_scale
,
flip
,
keep_ratio
=
self
.
resize_keep_ratio
)
img
=
img
.
copy
()
if
self
.
proposals
is
not
None
:
proposals
=
self
.
bbox_transform
(
proposals
,
img_shape
,
scale_factor
,
flip
)
...
...
@@ -232,7 +251,7 @@ class CustomDataset(Dataset):
def
prepare_single
(
img
,
scale
,
flip
,
proposal
=
None
):
_img
,
img_shape
,
pad_shape
,
scale_factor
=
self
.
img_transform
(
img
,
scale
,
flip
)
img
,
scale
,
flip
,
keep_ratio
=
self
.
resize_keep_ratio
)
_img
=
to_tensor
(
_img
)
_img_meta
=
dict
(
ori_shape
=
(
img_info
[
'height'
],
img_info
[
'width'
],
3
),
...
...
mmdet/datasets/extra_aug.py
0 → 100644
View file @
441015ea
import
mmcv
import
numpy
as
np
from
numpy
import
random
from
mmdet.core.evaluation.bbox_overlaps
import
bbox_overlaps
class
PhotoMetricDistortion
(
object
):
def
__init__
(
self
,
brightness_delta
=
32
,
contrast_range
=
(
0.5
,
1.5
),
saturation_range
=
(
0.5
,
1.5
),
hue_delta
=
18
):
self
.
brightness_delta
=
brightness_delta
self
.
contrast_lower
,
self
.
contrast_upper
=
contrast_range
self
.
saturation_lower
,
self
.
saturation_upper
=
saturation_range
self
.
hue_delta
=
hue_delta
def
__call__
(
self
,
img
,
boxes
,
labels
):
# random brightness
if
random
.
randint
(
2
):
delta
=
random
.
uniform
(
-
self
.
brightness_delta
,
self
.
brightness_delta
)
img
+=
delta
# mode == 0 --> do random contrast first
# mode == 1 --> do random contrast last
mode
=
random
.
randint
(
2
)
if
mode
==
1
:
if
random
.
randint
(
2
):
alpha
=
random
.
uniform
(
self
.
contrast_lower
,
self
.
contrast_upper
)
img
*=
alpha
# convert color from BGR to HSV
img
=
mmcv
.
bgr2hsv
(
img
)
# random saturation
if
random
.
randint
(
2
):
img
[...,
1
]
*=
random
.
uniform
(
self
.
saturation_lower
,
self
.
saturation_upper
)
# random hue
if
random
.
randint
(
2
):
img
[...,
0
]
+=
random
.
uniform
(
-
self
.
hue_delta
,
self
.
hue_delta
)
img
[...,
0
][
img
[...,
0
]
>
360
]
-=
360
img
[...,
0
][
img
[...,
0
]
<
0
]
+=
360
# convert color from HSV to BGR
img
=
mmcv
.
hsv2bgr
(
img
)
# random contrast
if
mode
==
0
:
if
random
.
randint
(
2
):
alpha
=
random
.
uniform
(
self
.
contrast_lower
,
self
.
contrast_upper
)
img
*=
alpha
# randomly swap channels
if
random
.
randint
(
2
):
img
=
img
[...,
random
.
permutation
(
3
)]
return
img
,
boxes
,
labels
class
Expand
(
object
):
def
__init__
(
self
,
mean
=
(
0
,
0
,
0
),
to_rgb
=
True
,
ratio_range
=
(
1
,
4
)):
if
to_rgb
:
self
.
mean
=
mean
[::
-
1
]
else
:
self
.
mean
=
mean
self
.
min_ratio
,
self
.
max_ratio
=
ratio_range
def
__call__
(
self
,
img
,
boxes
,
labels
):
if
random
.
randint
(
2
):
return
img
,
boxes
,
labels
h
,
w
,
c
=
img
.
shape
ratio
=
random
.
uniform
(
self
.
min_ratio
,
self
.
max_ratio
)
expand_img
=
np
.
full
((
int
(
h
*
ratio
),
int
(
w
*
ratio
),
c
),
self
.
mean
).
astype
(
img
.
dtype
)
left
=
int
(
random
.
uniform
(
0
,
w
*
ratio
-
w
))
top
=
int
(
random
.
uniform
(
0
,
h
*
ratio
-
h
))
expand_img
[
top
:
top
+
h
,
left
:
left
+
w
]
=
img
img
=
expand_img
boxes
+=
np
.
tile
((
left
,
top
),
2
)
return
img
,
boxes
,
labels
class
RandomCrop
(
object
):
def
__init__
(
self
,
min_ious
=
(
0.1
,
0.3
,
0.5
,
0.7
,
0.9
),
min_crop_size
=
0.3
):
# 1: return ori img
self
.
sample_mode
=
(
1
,
*
min_ious
,
0
)
self
.
min_crop_size
=
min_crop_size
def
__call__
(
self
,
img
,
boxes
,
labels
):
h
,
w
,
c
=
img
.
shape
while
True
:
mode
=
random
.
choice
(
self
.
sample_mode
)
if
mode
==
1
:
return
img
,
boxes
,
labels
min_iou
=
mode
for
i
in
range
(
50
):
new_w
=
random
.
uniform
(
self
.
min_crop_size
*
w
,
w
)
new_h
=
random
.
uniform
(
self
.
min_crop_size
*
h
,
h
)
# h / w in [0.5, 2]
if
new_h
/
new_w
<
0.5
or
new_h
/
new_w
>
2
:
continue
left
=
random
.
uniform
(
w
-
new_w
)
top
=
random
.
uniform
(
h
-
new_h
)
patch
=
np
.
array
((
int
(
left
),
int
(
top
),
int
(
left
+
new_w
),
int
(
top
+
new_h
)))
overlaps
=
bbox_overlaps
(
patch
.
reshape
(
-
1
,
4
),
boxes
.
reshape
(
-
1
,
4
)).
reshape
(
-
1
)
if
overlaps
.
min
()
<
min_iou
:
continue
# center of boxes should inside the crop img
center
=
(
boxes
[:,
:
2
]
+
boxes
[:,
2
:])
/
2
mask
=
(
center
[:,
0
]
>
patch
[
0
])
*
(
center
[:,
1
]
>
patch
[
1
])
*
(
center
[:,
0
]
<
patch
[
2
])
*
(
center
[:,
1
]
<
patch
[
3
])
if
not
mask
.
any
():
continue
boxes
=
boxes
[
mask
]
labels
=
labels
[
mask
]
# adjust boxes
img
=
img
[
patch
[
1
]:
patch
[
3
],
patch
[
0
]:
patch
[
2
]]
boxes
[:,
2
:]
=
boxes
[:,
2
:].
clip
(
max
=
patch
[
2
:])
boxes
[:,
:
2
]
=
boxes
[:,
:
2
].
clip
(
min
=
patch
[:
2
])
boxes
-=
np
.
tile
(
patch
[:
2
],
2
)
return
img
,
boxes
,
labels
class
ExtraAugmentation
(
object
):
def
__init__
(
self
,
photo_metric_distortion
=
None
,
expand
=
None
,
random_crop
=
None
):
self
.
transforms
=
[]
if
photo_metric_distortion
is
not
None
:
self
.
transforms
.
append
(
PhotoMetricDistortion
(
**
photo_metric_distortion
))
if
expand
is
not
None
:
self
.
transforms
.
append
(
Expand
(
**
expand
))
if
random_crop
is
not
None
:
self
.
transforms
.
append
(
RandomCrop
(
**
random_crop
))
def
__call__
(
self
,
img
,
boxes
,
labels
):
img
=
img
.
astype
(
np
.
float32
)
for
transform
in
self
.
transforms
:
img
,
boxes
,
labels
=
transform
(
img
,
boxes
,
labels
)
return
img
,
boxes
,
labels
mmdet/datasets/transforms.py
View file @
441015ea
...
...
@@ -25,8 +25,14 @@ class ImageTransform(object):
self
.
to_rgb
=
to_rgb
self
.
size_divisor
=
size_divisor
def
__call__
(
self
,
img
,
scale
,
flip
=
False
):
img
,
scale_factor
=
mmcv
.
imrescale
(
img
,
scale
,
return_scale
=
True
)
def
__call__
(
self
,
img
,
scale
,
flip
=
False
,
keep_ratio
=
True
):
if
keep_ratio
:
img
,
scale_factor
=
mmcv
.
imrescale
(
img
,
scale
,
return_scale
=
True
)
else
:
img
,
w_scale
,
h_scale
=
mmcv
.
imresize
(
img
,
scale
,
return_scale
=
True
)
scale_factor
=
np
.
array
([
w_scale
,
h_scale
,
w_scale
,
h_scale
],
dtype
=
np
.
float32
)
img_shape
=
img
.
shape
img
=
mmcv
.
imnormalize
(
img
,
self
.
mean
,
self
.
std
,
self
.
to_rgb
)
if
flip
:
...
...
mmdet/models/__init__.py
View file @
441015ea
from
.detectors
import
(
BaseDetector
,
TwoStageDetector
,
RPN
,
FastRCNN
,
FasterRCNN
,
MaskRCNN
)
from
.builder
import
(
build_neck
,
build_rpn_head
,
build_roi_extractor
,
build_bbox_head
,
build_mask_head
,
build_detector
)
from
.backbones
import
*
# noqa: F401,F403
from
.necks
import
*
# noqa: F401,F403
from
.roi_extractors
import
*
# noqa: F401,F403
from
.anchor_heads
import
*
# noqa: F401,F403
from
.bbox_heads
import
*
# noqa: F401,F403
from
.mask_heads
import
*
# noqa: F401,F403
from
.detectors
import
*
# noqa: F401,F403
from
.registry
import
BACKBONES
,
NECKS
,
ROI_EXTRACTORS
,
HEADS
,
DETECTORS
from
.builder
import
(
build_backbone
,
build_neck
,
build_roi_extractor
,
build_head
,
build_detector
)
__all__
=
[
'BaseDetector'
,
'TwoStageDetector'
,
'RPN'
,
'FastRCNN'
,
'FasterRCNN'
,
'MaskRCNN'
,
'build_backbone'
,
'build_neck'
,
'build_rpn_head'
,
'build_roi_extractor'
,
'build_bbox_head'
,
'build_mask_head'
,
'BACKBONES'
,
'NECKS'
,
'ROI_EXTRACTORS'
,
'HEADS'
,
'DETECTORS'
,
'build_backbone'
,
'build_neck'
,
'build_roi_extractor'
,
'build_head'
,
'build_detector'
]
mmdet/models/anchor_heads/__init__.py
0 → 100644
View file @
441015ea
from
.anchor_head
import
AnchorHead
from
.rpn_head
import
RPNHead
from
.retina_head
import
RetinaHead
from
.ssd_head
import
SSDHead
__all__
=
[
'AnchorHead'
,
'RPNHead'
,
'RetinaHead'
,
'SSDHead'
]
mmdet/models/
single_stage
_heads/
retina
_head.py
→
mmdet/models/
anchor
_heads/
anchor
_head.py
View file @
441015ea
...
...
@@ -3,114 +3,84 @@ from __future__ import division
import
numpy
as
np
import
torch
import
torch.nn
as
nn
from
mmcv.cnn
import
normal_init
from
mmdet.core
import
(
AnchorGenerator
,
anchor_target
,
multi_apply
,
delta2bbox
,
weighted_smoothl1
,
from
mmdet.core
import
(
AnchorGenerator
,
anchor_target
,
delta2bbox
,
multi_apply
,
weighted_cross_entropy
,
weighted_smoothl1
,
weighted_binary_cross_entropy
,
weighted_sigmoid_focal_loss
,
multiclass_nms
)
from
..
utils
import
normal_init
,
bias_init_with_prob
from
..
registry
import
HEADS
class
RetinaHead
(
nn
.
Module
):
"""Head of RetinaNet.
/ cls_convs - retina_cls (3x3 conv)
input -
\ reg_convs - retina_reg (3x3 conv)
@
HEADS
.
register_module
class
AnchorHead
(
nn
.
Module
):
"""Anchor-based head (RPN, RetinaNet, SSD, etc.).
Args:
in_channels (int): Number of channels in the input feature map.
num_classes (int): Class number (including background).
stacked_convs (int): Number of convolutional layers added for cls and
reg branch.
feat_channels (int): Number of channels for the RPN feature map.
scales_per_octave (int): Number of anchor scales per octave.
octave_base_scale (int): Base octave scale. Anchor scales are computed
as `s*2^(i/n)`, for i in [0, n-1], where s is `octave_base_scale`
and n is `scales_per_octave`.
feat_channels (int): Number of channels of the feature map.
anchor_scales (Iterable): Anchor scales.
anchor_ratios (Iterable): Anchor aspect ratios.
anchor_strides (Iterable): Anchor strides.
anchor_base_sizes (Iterable): Anchor base sizes.
target_means (Iterable): Mean values of regression targets.
target_stds (Iterable): Std values of regression targets.
use_sigmoid_cls (bool): Whether to use sigmoid loss for classification.
(softmax by default)
use_focal_loss (bool): Whether to use focal loss for classification.
"""
# noqa: W605
def
__init__
(
self
,
in_channels
,
num_classes
,
stacked_convs
=
4
,
in_channels
,
feat_channels
=
256
,
octave_base_scale
=
4
,
scales_per_octave
=
3
,
anchor_scales
=
[
8
,
16
,
32
],
anchor_ratios
=
[
0.5
,
1.0
,
2.0
],
anchor_strides
=
[
8
,
16
,
32
,
64
,
128
],
anchor_strides
=
[
4
,
8
,
16
,
32
,
64
],
anchor_base_sizes
=
None
,
target_means
=
(.
0
,
.
0
,
.
0
,
.
0
),
target_stds
=
(
1.0
,
1.0
,
1.0
,
1.0
)):
super
(
RetinaHead
,
self
).
__init__
()
target_stds
=
(
1.0
,
1.0
,
1.0
,
1.0
),
use_sigmoid_cls
=
False
,
use_focal_loss
=
False
):
super
(
AnchorHead
,
self
).
__init__
()
self
.
in_channels
=
in_channels
self
.
num_classes
=
num_classes
self
.
octave_base_scale
=
octave_base_scale
self
.
scales_per_octave
=
scales_per_octave
self
.
feat_channels
=
feat_channels
self
.
anchor_scales
=
anchor_scales
self
.
anchor_ratios
=
anchor_ratios
self
.
anchor_strides
=
anchor_strides
self
.
anchor_base_sizes
=
list
(
anchor_strides
)
if
anchor_base_sizes
is
None
else
anchor_base_sizes
self
.
target_means
=
target_means
self
.
target_stds
=
target_stds
self
.
use_sigmoid_cls
=
use_sigmoid_cls
self
.
use_focal_loss
=
use_focal_loss
self
.
anchor_generators
=
[]
for
anchor_base
in
self
.
anchor_base_sizes
:
octave_scales
=
np
.
array
(
[
2
**
(
i
/
scales_per_octave
)
for
i
in
range
(
scales_per_octave
)])
anchor_scales
=
octave_scales
*
octave_base_scale
self
.
anchor_generators
.
append
(
AnchorGenerator
(
anchor_base
,
anchor_scales
,
anchor_ratios
))
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
num_anchors
=
int
(
len
(
self
.
anchor_ratios
)
*
self
.
scales_per_octave
)
self
.
cls_out_channels
=
self
.
num_classes
-
1
self
.
bbox_pred_dim
=
4
self
.
stacked_convs
=
stacked_convs
self
.
cls_convs
=
nn
.
ModuleList
()
self
.
reg_convs
=
nn
.
ModuleList
()
for
i
in
range
(
self
.
stacked_convs
):
chn
=
in_channels
if
i
==
0
else
feat_channels
self
.
cls_convs
.
append
(
nn
.
Conv2d
(
chn
,
feat_channels
,
3
,
stride
=
1
,
padding
=
1
))
self
.
reg_convs
.
append
(
nn
.
Conv2d
(
chn
,
feat_channels
,
3
,
stride
=
1
,
padding
=
1
))
self
.
retina_cls
=
nn
.
Conv2d
(
feat_channels
,
self
.
num_anchors
*
self
.
cls_out_channels
,
3
,
stride
=
1
,
padding
=
1
)
self
.
retina_reg
=
nn
.
Conv2d
(
feat_channels
,
self
.
num_anchors
*
self
.
bbox_pred_dim
,
3
,
stride
=
1
,
padding
=
1
)
self
.
debug_imgs
=
None
self
.
num_anchors
=
len
(
self
.
anchor_ratios
)
*
len
(
self
.
anchor_scales
)
if
self
.
use_sigmoid_cls
:
self
.
cls_out_channels
=
self
.
num_classes
-
1
else
:
self
.
cls_out_channels
=
self
.
num_classes
self
.
_init_layers
()
def
_init_layers
(
self
):
self
.
conv_cls
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
num_anchors
*
self
.
cls_out_channels
,
1
)
self
.
conv_reg
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
num_anchors
*
4
,
1
)
def
init_weights
(
self
):
for
m
in
self
.
cls_convs
:
normal_init
(
m
,
std
=
0.01
)
for
m
in
self
.
reg_convs
:
normal_init
(
m
,
std
=
0.01
)
bias_cls
=
bias_init_with_prob
(
0.01
)
normal_init
(
self
.
retina_cls
,
std
=
0.01
,
bias
=
bias_cls
)
normal_init
(
self
.
retina_reg
,
std
=
0.01
)
normal_init
(
self
.
conv_cls
,
std
=
0.01
)
normal_init
(
self
.
conv_reg
,
std
=
0.01
)
def
forward_single
(
self
,
x
):
cls_feat
=
x
reg_feat
=
x
for
cls_conv
in
self
.
cls_convs
:
cls_feat
=
self
.
relu
(
cls_conv
(
cls_feat
))
for
reg_conv
in
self
.
reg_convs
:
reg_feat
=
self
.
relu
(
reg_conv
(
reg_feat
))
cls_score
=
self
.
retina_cls
(
cls_feat
)
bbox_pred
=
self
.
retina_reg
(
reg_feat
)
cls_score
=
self
.
conv_cls
(
x
)
bbox_pred
=
self
.
conv_reg
(
x
)
return
cls_score
,
bbox_pred
def
forward
(
self
,
feats
):
...
...
@@ -156,30 +126,47 @@ class RetinaHead(nn.Module):
return
anchor_list
,
valid_flag_list
def
loss_single
(
self
,
cls_score
,
bbox_pred
,
labels
,
label_weights
,
bbox_targets
,
bbox_weights
,
num_
pos
_samples
,
cfg
):
bbox_targets
,
bbox_weights
,
num_
total
_samples
,
cfg
):
# classification loss
labels
=
labels
.
contiguous
().
view
(
-
1
,
self
.
cls_out_channels
)
label_weights
=
label_weights
.
contiguous
().
view
(
-
1
,
self
.
cls_out_channels
)
cls_score
=
cls_score
.
permute
(
0
,
2
,
3
,
1
).
contiguous
().
view
(
if
self
.
use_sigmoid_cls
:
labels
=
labels
.
reshape
(
-
1
,
self
.
cls_out_channels
)
label_weights
=
label_weights
.
reshape
(
-
1
,
self
.
cls_out_channels
)
else
:
labels
=
labels
.
reshape
(
-
1
)
label_weights
=
label_weights
.
reshape
(
-
1
)
cls_score
=
cls_score
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
self
.
cls_out_channels
)
loss_cls
=
weighted_sigmoid_focal_loss
(
cls_score
,
labels
,
label_weights
,
cfg
.
gamma
,
cfg
.
alpha
,
avg_factor
=
num_pos_samples
)
if
self
.
use_sigmoid_cls
:
if
self
.
use_focal_loss
:
cls_criterion
=
weighted_sigmoid_focal_loss
else
:
cls_criterion
=
weighted_binary_cross_entropy
else
:
if
self
.
use_focal_loss
:
raise
NotImplementedError
else
:
cls_criterion
=
weighted_cross_entropy
if
self
.
use_focal_loss
:
loss_cls
=
cls_criterion
(
cls_score
,
labels
,
label_weights
,
gamma
=
cfg
.
gamma
,
alpha
=
cfg
.
alpha
,
avg_factor
=
num_total_samples
)
else
:
loss_cls
=
cls_criterion
(
cls_score
,
labels
,
label_weights
,
avg_factor
=
num_total_samples
)
# regression loss
bbox_targets
=
bbox_targets
.
contiguous
().
view
(
-
1
,
4
)
bbox_weights
=
bbox_weights
.
contiguous
().
view
(
-
1
,
4
)
bbox_pred
=
bbox_pred
.
permute
(
0
,
2
,
3
,
1
).
contiguous
().
view
(
-
1
,
4
)
bbox_targets
=
bbox_targets
.
reshape
(
-
1
,
4
)
bbox_weights
=
bbox_weights
.
reshape
(
-
1
,
4
)
bbox_pred
=
bbox_pred
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
4
)
loss_reg
=
weighted_smoothl1
(
bbox_pred
,
bbox_targets
,
bbox_weights
,
beta
=
cfg
.
smoothl1_beta
,
avg_factor
=
num_
pos
_samples
)
avg_factor
=
num_
total
_samples
)
return
loss_cls
,
loss_reg
def
loss
(
self
,
cls_scores
,
bbox_preds
,
gt_bboxes
,
gt_labels
,
img_metas
,
...
...
@@ -189,6 +176,8 @@ class RetinaHead(nn.Module):
anchor_list
,
valid_flag_list
=
self
.
get_anchors
(
featmap_sizes
,
img_metas
)
sampling
=
False
if
self
.
use_focal_loss
else
True
label_channels
=
self
.
cls_out_channels
if
self
.
use_sigmoid_cls
else
1
cls_reg_targets
=
anchor_target
(
anchor_list
,
valid_flag_list
,
...
...
@@ -198,13 +187,14 @@ class RetinaHead(nn.Module):
self
.
target_stds
,
cfg
,
gt_labels_list
=
gt_labels
,
cls_out
_channels
=
self
.
cls_out
_channels
,
sampling
=
False
)
label
_channels
=
label
_channels
,
sampling
=
sampling
)
if
cls_reg_targets
is
None
:
return
None
(
labels_list
,
label_weights_list
,
bbox_targets_list
,
bbox_weights_list
,
num_total_pos
,
num_total_neg
)
=
cls_reg_targets
num_total_samples
=
(
num_total_pos
if
self
.
use_focal_loss
else
num_total_pos
+
num_total_neg
)
losses_cls
,
losses_reg
=
multi_apply
(
self
.
loss_single
,
cls_scores
,
...
...
@@ -213,16 +203,12 @@ class RetinaHead(nn.Module):
label_weights_list
,
bbox_targets_list
,
bbox_weights_list
,
num_
pos
_samples
=
num_total_
po
s
,
num_
total
_samples
=
num_total_
sample
s
,
cfg
=
cfg
)
return
dict
(
loss_cls
=
losses_cls
,
loss_reg
=
losses_reg
)
def
get_det_bboxes
(
self
,
cls_scores
,
bbox_preds
,
img_metas
,
cfg
,
rescale
=
False
):
def
get_bboxes
(
self
,
cls_scores
,
bbox_preds
,
img_metas
,
cfg
,
rescale
=
False
):
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
num_levels
=
len
(
cls_scores
)
...
...
@@ -231,7 +217,6 @@ class RetinaHead(nn.Module):
self
.
anchor_strides
[
i
])
for
i
in
range
(
num_levels
)
]
result_list
=
[]
for
img_id
in
range
(
len
(
img_metas
)):
cls_score_list
=
[
...
...
@@ -242,46 +227,54 @@ class RetinaHead(nn.Module):
]
img_shape
=
img_metas
[
img_id
][
'img_shape'
]
scale_factor
=
img_metas
[
img_id
][
'scale_factor'
]
result
s
=
self
.
_
get_
det_
bboxes_single
(
cls_score_list
,
bbox_pred_list
,
mlvl_anchors
,
img_shape
,
scale_factor
,
cfg
,
rescale
)
result_list
.
append
(
result
s
)
proposal
s
=
self
.
get_bboxes_single
(
cls_score_list
,
bbox_pred_list
,
mlvl_anchors
,
img_shape
,
scale_factor
,
cfg
,
rescale
)
result_list
.
append
(
proposal
s
)
return
result_list
def
_get_d
et_bboxes_single
(
self
,
cls_scores
,
bbox_preds
,
mlvl_anchors
,
img_shape
,
scale_factor
,
cfg
,
rescale
=
False
):
def
g
et_bboxes_single
(
self
,
cls_scores
,
bbox_preds
,
mlvl_anchors
,
img_shape
,
scale_factor
,
cfg
,
rescale
=
False
):
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
==
len
(
mlvl_anchors
)
mlvl_
proposal
s
=
[]
mlvl_
bboxe
s
=
[]
mlvl_scores
=
[]
for
cls_score
,
bbox_pred
,
anchors
in
zip
(
cls_scores
,
bbox_preds
,
mlvl_anchors
):
assert
cls_score
.
size
()[
-
2
:]
==
bbox_pred
.
size
()[
-
2
:]
cls_score
=
cls_score
.
permute
(
1
,
2
,
0
).
contiguous
().
view
(
cls_score
=
cls_score
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
self
.
cls_out_channels
)
scores
=
cls_score
.
sigmoid
()
bbox_pred
=
bbox_pred
.
permute
(
1
,
2
,
0
).
contiguous
().
view
(
-
1
,
4
)
proposals
=
delta2bbox
(
anchors
,
bbox_pred
,
self
.
target_means
,
self
.
target_stds
,
img_shape
)
if
cfg
.
nms_pre
>
0
and
scores
.
shape
[
0
]
>
cfg
.
nms_pre
:
maxscores
,
_
=
scores
.
max
(
dim
=
1
)
_
,
topk_inds
=
maxscores
.
topk
(
cfg
.
nms_pre
)
proposals
=
proposals
[
topk_inds
,
:]
if
self
.
use_sigmoid_cls
:
scores
=
cls_score
.
sigmoid
()
else
:
scores
=
cls_score
.
softmax
(
-
1
)
bbox_pred
=
bbox_pred
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
4
)
nms_pre
=
cfg
.
get
(
'nms_pre'
,
-
1
)
if
nms_pre
>
0
and
scores
.
shape
[
0
]
>
nms_pre
:
if
self
.
use_sigmoid_cls
:
max_scores
,
_
=
scores
.
max
(
dim
=
1
)
else
:
max_scores
,
_
=
scores
[:,
1
:].
max
(
dim
=
1
)
_
,
topk_inds
=
max_scores
.
topk
(
nms_pre
)
anchors
=
anchors
[
topk_inds
,
:]
bbox_pred
=
bbox_pred
[
topk_inds
,
:]
scores
=
scores
[
topk_inds
,
:]
mlvl_proposals
.
append
(
proposals
)
bboxes
=
delta2bbox
(
anchors
,
bbox_pred
,
self
.
target_means
,
self
.
target_stds
,
img_shape
)
mlvl_bboxes
.
append
(
bboxes
)
mlvl_scores
.
append
(
scores
)
mlvl_
proposal
s
=
torch
.
cat
(
mlvl_
proposal
s
)
mlvl_
bboxe
s
=
torch
.
cat
(
mlvl_
bboxe
s
)
if
rescale
:
mlvl_
proposals
/=
scale_factor
mlvl_
bboxes
/=
mlvl_bboxes
.
new_tensor
(
scale_factor
)
mlvl_scores
=
torch
.
cat
(
mlvl_scores
)
padding
=
mlvl_scores
.
new_zeros
(
mlvl_scores
.
shape
[
0
],
1
)
mlvl_scores
=
torch
.
cat
([
padding
,
mlvl_scores
],
dim
=
1
)
det_bboxes
,
det_labels
=
multiclass_nms
(
mlvl_proposals
,
mlvl_scores
,
cfg
.
score_thr
,
cfg
.
nms
,
cfg
.
max_per_img
)
if
self
.
use_sigmoid_cls
:
padding
=
mlvl_scores
.
new_zeros
(
mlvl_scores
.
shape
[
0
],
1
)
mlvl_scores
=
torch
.
cat
([
padding
,
mlvl_scores
],
dim
=
1
)
det_bboxes
,
det_labels
=
multiclass_
nms
(
mlvl_bboxes
,
mlvl_scores
,
cfg
.
score_thr
,
cfg
.
nms
,
cfg
.
max_per_img
)
return
det_bboxes
,
det_labels
mmdet/models/anchor_heads/retina_head.py
0 → 100644
View file @
441015ea
import
numpy
as
np
import
torch.nn
as
nn
from
mmcv.cnn
import
normal_init
from
.anchor_head
import
AnchorHead
from
..registry
import
HEADS
from
..utils
import
bias_init_with_prob
@
HEADS
.
register_module
class
RetinaHead
(
AnchorHead
):
def
__init__
(
self
,
num_classes
,
in_channels
,
stacked_convs
=
4
,
octave_base_scale
=
4
,
scales_per_octave
=
3
,
**
kwargs
):
self
.
stacked_convs
=
stacked_convs
self
.
octave_base_scale
=
octave_base_scale
self
.
scales_per_octave
=
scales_per_octave
octave_scales
=
np
.
array
(
[
2
**
(
i
/
scales_per_octave
)
for
i
in
range
(
scales_per_octave
)])
anchor_scales
=
octave_scales
*
octave_base_scale
super
(
RetinaHead
,
self
).
__init__
(
num_classes
,
in_channels
,
anchor_scales
=
anchor_scales
,
use_sigmoid_cls
=
True
,
use_focal_loss
=
True
,
**
kwargs
)
def
_init_layers
(
self
):
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
cls_convs
=
nn
.
ModuleList
()
self
.
reg_convs
=
nn
.
ModuleList
()
for
i
in
range
(
self
.
stacked_convs
):
chn
=
self
.
in_channels
if
i
==
0
else
self
.
feat_channels
self
.
cls_convs
.
append
(
nn
.
Conv2d
(
chn
,
self
.
feat_channels
,
3
,
stride
=
1
,
padding
=
1
))
self
.
reg_convs
.
append
(
nn
.
Conv2d
(
chn
,
self
.
feat_channels
,
3
,
stride
=
1
,
padding
=
1
))
self
.
retina_cls
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
num_anchors
*
self
.
cls_out_channels
,
3
,
padding
=
1
)
self
.
retina_reg
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
num_anchors
*
4
,
3
,
padding
=
1
)
def
init_weights
(
self
):
for
m
in
self
.
cls_convs
:
normal_init
(
m
,
std
=
0.01
)
for
m
in
self
.
reg_convs
:
normal_init
(
m
,
std
=
0.01
)
bias_cls
=
bias_init_with_prob
(
0.01
)
normal_init
(
self
.
retina_cls
,
std
=
0.01
,
bias
=
bias_cls
)
normal_init
(
self
.
retina_reg
,
std
=
0.01
)
def
forward_single
(
self
,
x
):
cls_feat
=
x
reg_feat
=
x
for
cls_conv
in
self
.
cls_convs
:
cls_feat
=
self
.
relu
(
cls_conv
(
cls_feat
))
for
reg_conv
in
self
.
reg_convs
:
reg_feat
=
self
.
relu
(
reg_conv
(
reg_feat
))
cls_score
=
self
.
retina_cls
(
cls_feat
)
bbox_pred
=
self
.
retina_reg
(
reg_feat
)
return
cls_score
,
bbox_pred
mmdet/models/anchor_heads/rpn_head.py
0 → 100644
View file @
441015ea
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
mmcv.cnn
import
normal_init
from
mmdet.core
import
delta2bbox
from
mmdet.ops
import
nms
from
.anchor_head
import
AnchorHead
from
..registry
import
HEADS
@
HEADS
.
register_module
class
RPNHead
(
AnchorHead
):
def
__init__
(
self
,
in_channels
,
**
kwargs
):
super
(
RPNHead
,
self
).
__init__
(
2
,
in_channels
,
**
kwargs
)
def
_init_layers
(
self
):
self
.
rpn_conv
=
nn
.
Conv2d
(
self
.
in_channels
,
self
.
feat_channels
,
3
,
padding
=
1
)
self
.
rpn_cls
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
num_anchors
*
self
.
cls_out_channels
,
1
)
self
.
rpn_reg
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
num_anchors
*
4
,
1
)
def
init_weights
(
self
):
normal_init
(
self
.
rpn_conv
,
std
=
0.01
)
normal_init
(
self
.
rpn_cls
,
std
=
0.01
)
normal_init
(
self
.
rpn_reg
,
std
=
0.01
)
def
forward_single
(
self
,
x
):
x
=
self
.
rpn_conv
(
x
)
x
=
F
.
relu
(
x
,
inplace
=
True
)
rpn_cls_score
=
self
.
rpn_cls
(
x
)
rpn_bbox_pred
=
self
.
rpn_reg
(
x
)
return
rpn_cls_score
,
rpn_bbox_pred
def
loss
(
self
,
cls_scores
,
bbox_preds
,
gt_bboxes
,
img_metas
,
cfg
):
losses
=
super
(
RPNHead
,
self
).
loss
(
cls_scores
,
bbox_preds
,
gt_bboxes
,
None
,
img_metas
,
cfg
)
return
dict
(
loss_rpn_cls
=
losses
[
'loss_cls'
],
loss_rpn_reg
=
losses
[
'loss_reg'
])
def
get_bboxes_single
(
self
,
cls_scores
,
bbox_preds
,
mlvl_anchors
,
img_shape
,
scale_factor
,
cfg
,
rescale
=
False
):
mlvl_proposals
=
[]
for
idx
in
range
(
len
(
cls_scores
)):
rpn_cls_score
=
cls_scores
[
idx
]
rpn_bbox_pred
=
bbox_preds
[
idx
]
assert
rpn_cls_score
.
size
()[
-
2
:]
==
rpn_bbox_pred
.
size
()[
-
2
:]
anchors
=
mlvl_anchors
[
idx
]
rpn_cls_score
=
rpn_cls_score
.
permute
(
1
,
2
,
0
)
if
self
.
use_sigmoid_cls
:
rpn_cls_score
=
rpn_cls_score
.
reshape
(
-
1
)
scores
=
rpn_cls_score
.
sigmoid
()
else
:
rpn_cls_score
=
rpn_cls_score
.
reshape
(
-
1
,
2
)
scores
=
rpn_cls_score
.
softmax
(
dim
=
1
)[:,
1
]
rpn_bbox_pred
=
rpn_bbox_pred
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
4
)
if
cfg
.
nms_pre
>
0
and
scores
.
shape
[
0
]
>
cfg
.
nms_pre
:
_
,
topk_inds
=
scores
.
topk
(
cfg
.
nms_pre
)
rpn_bbox_pred
=
rpn_bbox_pred
[
topk_inds
,
:]
anchors
=
anchors
[
topk_inds
,
:]
scores
=
scores
[
topk_inds
]
proposals
=
delta2bbox
(
anchors
,
rpn_bbox_pred
,
self
.
target_means
,
self
.
target_stds
,
img_shape
)
if
cfg
.
min_bbox_size
>
0
:
w
=
proposals
[:,
2
]
-
proposals
[:,
0
]
+
1
h
=
proposals
[:,
3
]
-
proposals
[:,
1
]
+
1
valid_inds
=
torch
.
nonzero
((
w
>=
cfg
.
min_bbox_size
)
&
(
h
>=
cfg
.
min_bbox_size
)).
squeeze
()
proposals
=
proposals
[
valid_inds
,
:]
scores
=
scores
[
valid_inds
]
proposals
=
torch
.
cat
([
proposals
,
scores
.
unsqueeze
(
-
1
)],
dim
=-
1
)
proposals
,
_
=
nms
(
proposals
,
cfg
.
nms_thr
)
proposals
=
proposals
[:
cfg
.
nms_post
,
:]
mlvl_proposals
.
append
(
proposals
)
proposals
=
torch
.
cat
(
mlvl_proposals
,
0
)
if
cfg
.
nms_across_levels
:
proposals
,
_
=
nms
(
proposals
,
cfg
.
nms_thr
)
proposals
=
proposals
[:
cfg
.
max_num
,
:]
else
:
scores
=
proposals
[:,
4
]
num
=
min
(
cfg
.
max_num
,
proposals
.
shape
[
0
])
_
,
topk_inds
=
scores
.
topk
(
num
)
proposals
=
proposals
[
topk_inds
,
:]
return
proposals
mmdet/models/anchor_heads/ssd_head.py
0 → 100644
View file @
441015ea
import
numpy
as
np
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
mmcv.cnn
import
xavier_init
from
mmdet.core
import
(
AnchorGenerator
,
anchor_target
,
weighted_smoothl1
,
multi_apply
)
from
.anchor_head
import
AnchorHead
from
..registry
import
HEADS
@
HEADS
.
register_module
class
SSDHead
(
AnchorHead
):
def
__init__
(
self
,
input_size
=
300
,
num_classes
=
81
,
in_channels
=
(
512
,
1024
,
512
,
256
,
256
,
256
),
anchor_strides
=
(
8
,
16
,
32
,
64
,
100
,
300
),
basesize_ratio_range
=
(
0.1
,
0.9
),
anchor_ratios
=
([
2
],
[
2
,
3
],
[
2
,
3
],
[
2
,
3
],
[
2
],
[
2
]),
target_means
=
(.
0
,
.
0
,
.
0
,
.
0
),
target_stds
=
(
1.0
,
1.0
,
1.0
,
1.0
)):
super
(
AnchorHead
,
self
).
__init__
()
self
.
input_size
=
input_size
self
.
num_classes
=
num_classes
self
.
in_channels
=
in_channels
self
.
cls_out_channels
=
num_classes
num_anchors
=
[
len
(
ratios
)
*
2
+
2
for
ratios
in
anchor_ratios
]
reg_convs
=
[]
cls_convs
=
[]
for
i
in
range
(
len
(
in_channels
)):
reg_convs
.
append
(
nn
.
Conv2d
(
in_channels
[
i
],
num_anchors
[
i
]
*
4
,
kernel_size
=
3
,
padding
=
1
))
cls_convs
.
append
(
nn
.
Conv2d
(
in_channels
[
i
],
num_anchors
[
i
]
*
num_classes
,
kernel_size
=
3
,
padding
=
1
))
self
.
reg_convs
=
nn
.
ModuleList
(
reg_convs
)
self
.
cls_convs
=
nn
.
ModuleList
(
cls_convs
)
min_ratio
,
max_ratio
=
basesize_ratio_range
min_ratio
=
int
(
min_ratio
*
100
)
max_ratio
=
int
(
max_ratio
*
100
)
step
=
int
(
np
.
floor
(
max_ratio
-
min_ratio
)
/
(
len
(
in_channels
)
-
2
))
min_sizes
=
[]
max_sizes
=
[]
for
r
in
range
(
int
(
min_ratio
),
int
(
max_ratio
)
+
1
,
step
):
min_sizes
.
append
(
int
(
input_size
*
r
/
100
))
max_sizes
.
append
(
int
(
input_size
*
(
r
+
step
)
/
100
))
if
input_size
==
300
:
if
basesize_ratio_range
[
0
]
==
0.15
:
# SSD300 COCO
min_sizes
.
insert
(
0
,
int
(
input_size
*
7
/
100
))
max_sizes
.
insert
(
0
,
int
(
input_size
*
15
/
100
))
elif
basesize_ratio_range
[
0
]
==
0.2
:
# SSD300 VOC
min_sizes
.
insert
(
0
,
int
(
input_size
*
10
/
100
))
max_sizes
.
insert
(
0
,
int
(
input_size
*
20
/
100
))
elif
input_size
==
512
:
if
basesize_ratio_range
[
0
]
==
0.1
:
# SSD512 COCO
min_sizes
.
insert
(
0
,
int
(
input_size
*
4
/
100
))
max_sizes
.
insert
(
0
,
int
(
input_size
*
10
/
100
))
elif
basesize_ratio_range
[
0
]
==
0.15
:
# SSD512 VOC
min_sizes
.
insert
(
0
,
int
(
input_size
*
7
/
100
))
max_sizes
.
insert
(
0
,
int
(
input_size
*
15
/
100
))
self
.
anchor_generators
=
[]
self
.
anchor_strides
=
anchor_strides
for
k
in
range
(
len
(
anchor_strides
)):
base_size
=
min_sizes
[
k
]
stride
=
anchor_strides
[
k
]
ctr
=
((
stride
-
1
)
/
2.
,
(
stride
-
1
)
/
2.
)
scales
=
[
1.
,
np
.
sqrt
(
max_sizes
[
k
]
/
min_sizes
[
k
])]
ratios
=
[
1.
]
for
r
in
anchor_ratios
[
k
]:
ratios
+=
[
1
/
r
,
r
]
# 4 or 6 ratio
anchor_generator
=
AnchorGenerator
(
base_size
,
scales
,
ratios
,
scale_major
=
False
,
ctr
=
ctr
)
indices
=
list
(
range
(
len
(
ratios
)))
indices
.
insert
(
1
,
len
(
indices
))
anchor_generator
.
base_anchors
=
torch
.
index_select
(
anchor_generator
.
base_anchors
,
0
,
torch
.
LongTensor
(
indices
))
self
.
anchor_generators
.
append
(
anchor_generator
)
self
.
target_means
=
target_means
self
.
target_stds
=
target_stds
self
.
use_sigmoid_cls
=
False
self
.
use_focal_loss
=
False
def
init_weights
(
self
):
for
m
in
self
.
modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
xavier_init
(
m
,
distribution
=
'uniform'
,
bias
=
0
)
def
forward
(
self
,
feats
):
cls_scores
=
[]
bbox_preds
=
[]
for
feat
,
reg_conv
,
cls_conv
in
zip
(
feats
,
self
.
reg_convs
,
self
.
cls_convs
):
cls_scores
.
append
(
cls_conv
(
feat
))
bbox_preds
.
append
(
reg_conv
(
feat
))
return
cls_scores
,
bbox_preds
def
loss_single
(
self
,
cls_score
,
bbox_pred
,
labels
,
label_weights
,
bbox_targets
,
bbox_weights
,
num_total_samples
,
cfg
):
loss_cls_all
=
F
.
cross_entropy
(
cls_score
,
labels
,
reduction
=
'none'
)
*
label_weights
pos_inds
=
(
labels
>
0
).
nonzero
().
view
(
-
1
)
neg_inds
=
(
labels
==
0
).
nonzero
().
view
(
-
1
)
num_pos_samples
=
pos_inds
.
size
(
0
)
num_neg_samples
=
cfg
.
neg_pos_ratio
*
num_pos_samples
if
num_neg_samples
>
neg_inds
.
size
(
0
):
num_neg_samples
=
neg_inds
.
size
(
0
)
topk_loss_cls_neg
,
_
=
loss_cls_all
[
neg_inds
].
topk
(
num_neg_samples
)
loss_cls_pos
=
loss_cls_all
[
pos_inds
].
sum
()
loss_cls_neg
=
topk_loss_cls_neg
.
sum
()
loss_cls
=
(
loss_cls_pos
+
loss_cls_neg
)
/
num_total_samples
loss_reg
=
weighted_smoothl1
(
bbox_pred
,
bbox_targets
,
bbox_weights
,
beta
=
cfg
.
smoothl1_beta
,
avg_factor
=
num_total_samples
)
return
loss_cls
[
None
],
loss_reg
def
loss
(
self
,
cls_scores
,
bbox_preds
,
gt_bboxes
,
gt_labels
,
img_metas
,
cfg
):
featmap_sizes
=
[
featmap
.
size
()[
-
2
:]
for
featmap
in
cls_scores
]
assert
len
(
featmap_sizes
)
==
len
(
self
.
anchor_generators
)
anchor_list
,
valid_flag_list
=
self
.
get_anchors
(
featmap_sizes
,
img_metas
)
cls_reg_targets
=
anchor_target
(
anchor_list
,
valid_flag_list
,
gt_bboxes
,
img_metas
,
self
.
target_means
,
self
.
target_stds
,
cfg
,
gt_labels_list
=
gt_labels
,
label_channels
=
1
,
sampling
=
False
,
unmap_outputs
=
False
)
if
cls_reg_targets
is
None
:
return
None
(
labels_list
,
label_weights_list
,
bbox_targets_list
,
bbox_weights_list
,
num_total_pos
,
num_total_neg
)
=
cls_reg_targets
num_images
=
len
(
img_metas
)
all_cls_scores
=
torch
.
cat
([
s
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
num_images
,
-
1
,
self
.
cls_out_channels
)
for
s
in
cls_scores
],
1
)
all_labels
=
torch
.
cat
(
labels_list
,
-
1
).
view
(
num_images
,
-
1
)
all_label_weights
=
torch
.
cat
(
label_weights_list
,
-
1
).
view
(
num_images
,
-
1
)
all_bbox_preds
=
torch
.
cat
([
b
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
num_images
,
-
1
,
4
)
for
b
in
bbox_preds
],
-
2
)
all_bbox_targets
=
torch
.
cat
(
bbox_targets_list
,
-
2
).
view
(
num_images
,
-
1
,
4
)
all_bbox_weights
=
torch
.
cat
(
bbox_weights_list
,
-
2
).
view
(
num_images
,
-
1
,
4
)
losses_cls
,
losses_reg
=
multi_apply
(
self
.
loss_single
,
all_cls_scores
,
all_bbox_preds
,
all_labels
,
all_label_weights
,
all_bbox_targets
,
all_bbox_weights
,
num_total_samples
=
num_total_pos
,
cfg
=
cfg
)
return
dict
(
loss_cls
=
losses_cls
,
loss_reg
=
losses_reg
)
mmdet/models/backbones/__init__.py
View file @
441015ea
from
.resnet
import
ResNet
from
.resnext
import
ResNeXt
from
.ssd_vgg
import
SSDVGG
__all__
=
[
'ResNet'
,
'ResNeXt'
]
__all__
=
[
'ResNet'
,
'ResNeXt'
,
'SSDVGG'
]
mmdet/models/backbones/resnet.py
View file @
441015ea
...
...
@@ -6,6 +6,10 @@ import torch.utils.checkpoint as cp
from
mmcv.cnn
import
constant_init
,
kaiming_init
from
mmcv.runner
import
load_checkpoint
from
mmdet.ops
import
DeformConv
,
ModulatedDeformConv
from
..registry
import
BACKBONES
from
..utils
import
build_norm_layer
def
conv3x3
(
in_planes
,
out_planes
,
stride
=
1
,
dilation
=
1
):
"3x3 convolution with padding"
...
...
@@ -29,27 +33,41 @@ class BasicBlock(nn.Module):
dilation
=
1
,
downsample
=
None
,
style
=
'pytorch'
,
with_cp
=
False
):
with_cp
=
False
,
normalize
=
dict
(
type
=
'BN'
)):
super
(
BasicBlock
,
self
).
__init__
()
self
.
norm1_name
,
norm1
=
build_norm_layer
(
normalize
,
planes
,
postfix
=
1
)
self
.
norm2_name
,
norm2
=
build_norm_layer
(
normalize
,
planes
,
postfix
=
2
)
self
.
conv1
=
conv3x3
(
inplanes
,
planes
,
stride
,
dilation
)
self
.
bn1
=
nn
.
BatchNorm2d
(
planes
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
add_module
(
self
.
norm1_name
,
norm1
)
self
.
conv2
=
conv3x3
(
planes
,
planes
)
self
.
bn2
=
nn
.
BatchNorm2d
(
planes
)
self
.
add_module
(
self
.
norm2_name
,
norm2
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
downsample
=
downsample
self
.
stride
=
stride
self
.
dilation
=
dilation
assert
not
with_cp
@
property
def
norm1
(
self
):
return
getattr
(
self
,
self
.
norm1_name
)
@
property
def
norm2
(
self
):
return
getattr
(
self
,
self
.
norm2_name
)
def
forward
(
self
,
x
):
identity
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
b
n1
(
out
)
out
=
self
.
n
orm
1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
b
n2
(
out
)
out
=
self
.
n
orm
2
(
out
)
if
self
.
downsample
is
not
None
:
identity
=
self
.
downsample
(
x
)
...
...
@@ -70,46 +88,101 @@ class Bottleneck(nn.Module):
dilation
=
1
,
downsample
=
None
,
style
=
'pytorch'
,
with_cp
=
False
):
with_cp
=
False
,
normalize
=
dict
(
type
=
'BN'
),
dcn
=
None
):
"""Bottleneck block for ResNet.
If style is "pytorch", the stride-two layer is the 3x3 conv layer,
if it is "caffe", the stride-two layer is the first 1x1 conv layer.
"""
super
(
Bottleneck
,
self
).
__init__
()
assert
style
in
[
'pytorch'
,
'caffe'
]
assert
dcn
is
None
or
isinstance
(
dcn
,
dict
)
self
.
inplanes
=
inplanes
self
.
planes
=
planes
self
.
normalize
=
normalize
self
.
dcn
=
dcn
self
.
with_dcn
=
dcn
is
not
None
if
style
==
'pytorch'
:
self
.
conv1_stride
=
1
self
.
conv2_stride
=
stride
else
:
self
.
conv1_stride
=
stride
self
.
conv2_stride
=
1
self
.
norm1_name
,
norm1
=
build_norm_layer
(
normalize
,
planes
,
postfix
=
1
)
self
.
norm2_name
,
norm2
=
build_norm_layer
(
normalize
,
planes
,
postfix
=
2
)
self
.
norm3_name
,
norm3
=
build_norm_layer
(
normalize
,
planes
*
self
.
expansion
,
postfix
=
3
)
self
.
conv1
=
nn
.
Conv2d
(
inplanes
,
planes
,
kernel_size
=
1
,
stride
=
self
.
conv1_stride
,
bias
=
False
)
self
.
conv2
=
nn
.
Conv2d
(
planes
,
planes
,
kernel_size
=
3
,
stride
=
self
.
conv2_stride
,
padding
=
dilation
,
dilation
=
dilation
,
bias
=
False
)
self
.
bn1
=
nn
.
BatchNorm2d
(
planes
)
self
.
bn2
=
nn
.
BatchNorm2d
(
planes
)
self
.
add_module
(
self
.
norm1_name
,
norm1
)
fallback_on_stride
=
False
self
.
with_modulated_dcn
=
False
if
self
.
with_dcn
:
fallback_on_stride
=
dcn
.
get
(
'fallback_on_stride'
,
False
)
self
.
with_modulated_dcn
=
dcn
.
get
(
'modulated'
,
False
)
if
not
self
.
with_dcn
or
fallback_on_stride
:
self
.
conv2
=
nn
.
Conv2d
(
planes
,
planes
,
kernel_size
=
3
,
stride
=
self
.
conv2_stride
,
padding
=
dilation
,
dilation
=
dilation
,
bias
=
False
)
else
:
deformable_groups
=
dcn
.
get
(
'deformable_groups'
,
1
)
if
not
self
.
with_modulated_dcn
:
conv_op
=
DeformConv
offset_channels
=
18
else
:
conv_op
=
ModulatedDeformConv
offset_channels
=
27
self
.
conv2_offset
=
nn
.
Conv2d
(
planes
,
deformable_groups
*
offset_channels
,
kernel_size
=
3
,
stride
=
self
.
conv2_stride
,
padding
=
dilation
,
dilation
=
dilation
)
self
.
conv2
=
conv_op
(
planes
,
planes
,
kernel_size
=
3
,
stride
=
self
.
conv2_stride
,
padding
=
dilation
,
dilation
=
dilation
,
deformable_groups
=
deformable_groups
,
bias
=
False
)
self
.
add_module
(
self
.
norm2_name
,
norm2
)
self
.
conv3
=
nn
.
Conv2d
(
planes
,
planes
*
self
.
expansion
,
kernel_size
=
1
,
bias
=
False
)
self
.
bn3
=
nn
.
BatchNorm2d
(
planes
*
self
.
expansion
)
self
.
add_module
(
self
.
norm3_name
,
norm3
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
downsample
=
downsample
self
.
stride
=
stride
self
.
dilation
=
dilation
self
.
with_cp
=
with_cp
self
.
normalize
=
normalize
@
property
def
norm1
(
self
):
return
getattr
(
self
,
self
.
norm1_name
)
@
property
def
norm2
(
self
):
return
getattr
(
self
,
self
.
norm2_name
)
@
property
def
norm3
(
self
):
return
getattr
(
self
,
self
.
norm3_name
)
def
forward
(
self
,
x
):
...
...
@@ -117,15 +190,24 @@ class Bottleneck(nn.Module):
identity
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
b
n1
(
out
)
out
=
self
.
n
orm
1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
bn2
(
out
)
if
not
self
.
with_dcn
:
out
=
self
.
conv2
(
out
)
elif
self
.
with_modulated_dcn
:
offset_mask
=
self
.
conv2_offset
(
out
)
offset
=
offset_mask
[:,
:
18
,
:,
:]
mask
=
offset_mask
[:,
-
9
:,
:,
:].
sigmoid
()
out
=
self
.
conv2
(
out
,
offset
,
mask
)
else
:
offset
=
self
.
conv2_offset
(
out
)
out
=
self
.
conv2
(
out
,
offset
)
out
=
self
.
norm2
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv3
(
out
)
out
=
self
.
b
n3
(
out
)
out
=
self
.
n
orm
3
(
out
)
if
self
.
downsample
is
not
None
:
identity
=
self
.
downsample
(
x
)
...
...
@@ -151,7 +233,9 @@ def make_res_layer(block,
stride
=
1
,
dilation
=
1
,
style
=
'pytorch'
,
with_cp
=
False
):
with_cp
=
False
,
normalize
=
dict
(
type
=
'BN'
),
dcn
=
None
):
downsample
=
None
if
stride
!=
1
or
inplanes
!=
planes
*
block
.
expansion
:
downsample
=
nn
.
Sequential
(
...
...
@@ -161,7 +245,7 @@ def make_res_layer(block,
kernel_size
=
1
,
stride
=
stride
,
bias
=
False
),
nn
.
BatchNorm2d
(
planes
*
block
.
expansion
),
build_norm_layer
(
normalize
,
planes
*
block
.
expansion
)
[
1
]
,
)
layers
=
[]
...
...
@@ -173,15 +257,26 @@ def make_res_layer(block,
dilation
,
downsample
,
style
=
style
,
with_cp
=
with_cp
))
with_cp
=
with_cp
,
normalize
=
normalize
,
dcn
=
dcn
))
inplanes
=
planes
*
block
.
expansion
for
i
in
range
(
1
,
blocks
):
layers
.
append
(
block
(
inplanes
,
planes
,
1
,
dilation
,
style
=
style
,
with_cp
=
with_cp
))
block
(
inplanes
,
planes
,
1
,
dilation
,
style
=
style
,
with_cp
=
with_cp
,
normalize
=
normalize
,
dcn
=
dcn
))
return
nn
.
Sequential
(
*
layers
)
@
BACKBONES
.
register_module
class
ResNet
(
nn
.
Module
):
"""ResNet backbone.
...
...
@@ -196,11 +291,14 @@ class ResNet(nn.Module):
the first 1x1 conv layer.
frozen_stages (int): Stages to be frozen (all param fixed). -1 means
not freezing any parameters.
bn_eval (bool): Whether to set BN layers to eval mode, namely, freeze
running stats (mean and var).
bn_frozen (bool): Whether to freeze weight and bias of BN layers.
normalize (dict): dictionary to construct and config norm layer.
norm_eval (bool): Whether to set norm layers to eval mode, namely,
freeze running stats (mean and var). Note: Effect on Batch Norm
and its variants only.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed.
zero_init_residual (bool): whether to use zero init for last norm layer
in resblocks to let them behave as identity.
"""
arch_settings
=
{
...
...
@@ -219,9 +317,12 @@ class ResNet(nn.Module):
out_indices
=
(
0
,
1
,
2
,
3
),
style
=
'pytorch'
,
frozen_stages
=-
1
,
bn_eval
=
True
,
bn_frozen
=
False
,
with_cp
=
False
):
normalize
=
dict
(
type
=
'BN'
,
frozen
=
False
),
norm_eval
=
True
,
dcn
=
None
,
stage_with_dcn
=
(
False
,
False
,
False
,
False
),
with_cp
=
False
,
zero_init_residual
=
True
):
super
(
ResNet
,
self
).
__init__
()
if
depth
not
in
self
.
arch_settings
:
raise
KeyError
(
'invalid depth {} for resnet'
.
format
(
depth
))
...
...
@@ -230,29 +331,29 @@ class ResNet(nn.Module):
assert
num_stages
>=
1
and
num_stages
<=
4
self
.
strides
=
strides
self
.
dilations
=
dilations
assert
len
(
strides
)
==
len
(
dilations
)
==
num_stages
assert
len
(
strides
)
==
len
(
dilations
)
==
len
(
stage_with_dcn
)
==
num_stages
self
.
out_indices
=
out_indices
assert
max
(
out_indices
)
<
num_stages
self
.
style
=
style
self
.
frozen_stages
=
frozen_stages
self
.
bn_eval
=
bn_eval
self
.
bn_frozen
=
bn_frozen
self
.
normalize
=
normalize
self
.
with_cp
=
with_cp
self
.
norm_eval
=
norm_eval
self
.
dcn
=
dcn
self
.
stage_with_dcn
=
stage_with_dcn
self
.
zero_init_residual
=
zero_init_residual
self
.
block
,
stage_blocks
=
self
.
arch_settings
[
depth
]
self
.
stage_blocks
=
stage_blocks
[:
num_stages
]
self
.
inplanes
=
64
self
.
conv1
=
nn
.
Conv2d
(
3
,
64
,
kernel_size
=
7
,
stride
=
2
,
padding
=
3
,
bias
=
False
)
self
.
bn1
=
nn
.
BatchNorm2d
(
64
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
maxpool
=
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
self
.
_make_stem_layer
()
self
.
res_layers
=
[]
for
i
,
num_blocks
in
enumerate
(
self
.
stage_blocks
):
stride
=
strides
[
i
]
dilation
=
dilations
[
i
]
dcn
=
self
.
dcn
if
self
.
stage_with_dcn
[
i
]
else
None
planes
=
64
*
2
**
i
res_layer
=
make_res_layer
(
self
.
block
,
...
...
@@ -262,15 +363,43 @@ class ResNet(nn.Module):
stride
=
stride
,
dilation
=
dilation
,
style
=
self
.
style
,
with_cp
=
with_cp
)
with_cp
=
with_cp
,
normalize
=
normalize
,
dcn
=
dcn
)
self
.
inplanes
=
planes
*
self
.
block
.
expansion
layer_name
=
'layer{}'
.
format
(
i
+
1
)
self
.
add_module
(
layer_name
,
res_layer
)
self
.
res_layers
.
append
(
layer_name
)
self
.
_freeze_stages
()
self
.
feat_dim
=
self
.
block
.
expansion
*
64
*
2
**
(
len
(
self
.
stage_blocks
)
-
1
)
@
property
def
norm1
(
self
):
return
getattr
(
self
,
self
.
norm1_name
)
def
_make_stem_layer
(
self
):
self
.
conv1
=
nn
.
Conv2d
(
3
,
64
,
kernel_size
=
7
,
stride
=
2
,
padding
=
3
,
bias
=
False
)
self
.
norm1_name
,
norm1
=
build_norm_layer
(
self
.
normalize
,
64
,
postfix
=
1
)
self
.
add_module
(
self
.
norm1_name
,
norm1
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
maxpool
=
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
def
_freeze_stages
(
self
):
if
self
.
frozen_stages
>=
0
:
for
m
in
[
self
.
conv1
,
self
.
norm1
]:
for
param
in
m
.
parameters
():
param
.
requires_grad
=
False
for
i
in
range
(
1
,
self
.
frozen_stages
+
1
):
m
=
getattr
(
self
,
'layer{}'
.
format
(
i
))
for
param
in
m
.
parameters
():
param
.
requires_grad
=
False
def
init_weights
(
self
,
pretrained
=
None
):
if
isinstance
(
pretrained
,
str
):
logger
=
logging
.
getLogger
()
...
...
@@ -279,14 +408,27 @@ class ResNet(nn.Module):
for
m
in
self
.
modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
kaiming_init
(
m
)
elif
isinstance
(
m
,
nn
.
BatchNorm2d
):
elif
isinstance
(
m
,
(
nn
.
BatchNorm2d
,
nn
.
GroupNorm
)
):
constant_init
(
m
,
1
)
if
self
.
dcn
is
not
None
:
for
m
in
self
.
modules
():
if
isinstance
(
m
,
Bottleneck
)
and
hasattr
(
m
,
'conv2_offset'
):
constant_init
(
m
.
conv2_offset
,
0
)
if
self
.
zero_init_residual
:
for
m
in
self
.
modules
():
if
isinstance
(
m
,
Bottleneck
):
constant_init
(
m
.
norm3
,
0
)
elif
isinstance
(
m
,
BasicBlock
):
constant_init
(
m
.
norm2
,
0
)
else
:
raise
TypeError
(
'pretrained must be a str or None'
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
b
n1
(
x
)
x
=
self
.
n
orm
1
(
x
)
x
=
self
.
relu
(
x
)
x
=
self
.
maxpool
(
x
)
outs
=
[]
...
...
@@ -302,23 +444,8 @@ class ResNet(nn.Module):
def
train
(
self
,
mode
=
True
):
super
(
ResNet
,
self
).
train
(
mode
)
if
self
.
b
n_eval
:
if
mode
and
self
.
n
orm
_eval
:
for
m
in
self
.
modules
():
# trick: eval have effect on BatchNorm only
if
isinstance
(
m
,
nn
.
BatchNorm2d
):
m
.
eval
()
if
self
.
bn_frozen
:
for
params
in
m
.
parameters
():
params
.
requires_grad
=
False
if
mode
and
self
.
frozen_stages
>=
0
:
for
param
in
self
.
conv1
.
parameters
():
param
.
requires_grad
=
False
for
param
in
self
.
bn1
.
parameters
():
param
.
requires_grad
=
False
self
.
bn1
.
eval
()
self
.
bn1
.
weight
.
requires_grad
=
False
self
.
bn1
.
bias
.
requires_grad
=
False
for
i
in
range
(
1
,
self
.
frozen_stages
+
1
):
mod
=
getattr
(
self
,
'layer{}'
.
format
(
i
))
mod
.
eval
()
for
param
in
mod
.
parameters
():
param
.
requires_grad
=
False
mmdet/models/backbones/resnext.py
View file @
441015ea
...
...
@@ -2,8 +2,11 @@ import math
import
torch.nn
as
nn
from
.resnet
import
ResNet
from
mmdet.ops
import
DeformConv
,
ModulatedDeformConv
from
.resnet
import
Bottleneck
as
_Bottleneck
from
.resnet
import
ResNet
from
..registry
import
BACKBONES
from
..utils
import
build_norm_layer
class
Bottleneck
(
_Bottleneck
):
...
...
@@ -20,26 +23,65 @@ class Bottleneck(_Bottleneck):
else
:
width
=
math
.
floor
(
self
.
planes
*
(
base_width
/
64
))
*
groups
self
.
norm1_name
,
norm1
=
build_norm_layer
(
self
.
normalize
,
width
,
postfix
=
1
)
self
.
norm2_name
,
norm2
=
build_norm_layer
(
self
.
normalize
,
width
,
postfix
=
2
)
self
.
norm3_name
,
norm3
=
build_norm_layer
(
self
.
normalize
,
self
.
planes
*
self
.
expansion
,
postfix
=
3
)
self
.
conv1
=
nn
.
Conv2d
(
self
.
inplanes
,
width
,
kernel_size
=
1
,
stride
=
self
.
conv1_stride
,
bias
=
False
)
self
.
bn1
=
nn
.
BatchNorm2d
(
width
)
self
.
conv2
=
nn
.
Conv2d
(
width
,
width
,
kernel_size
=
3
,
stride
=
self
.
conv2_stride
,
padding
=
self
.
dilation
,
dilation
=
self
.
dilation
,
groups
=
groups
,
bias
=
False
)
self
.
bn2
=
nn
.
BatchNorm2d
(
width
)
self
.
add_module
(
self
.
norm1_name
,
norm1
)
fallback_on_stride
=
False
self
.
with_modulated_dcn
=
False
if
self
.
with_dcn
:
fallback_on_stride
=
self
.
dcn
.
get
(
'fallback_on_stride'
,
False
)
self
.
with_modulated_dcn
=
self
.
dcn
.
get
(
'modulated'
,
False
)
if
not
self
.
with_dcn
or
fallback_on_stride
:
self
.
conv2
=
nn
.
Conv2d
(
width
,
width
,
kernel_size
=
3
,
stride
=
self
.
conv2_stride
,
padding
=
self
.
dilation
,
dilation
=
self
.
dilation
,
groups
=
groups
,
bias
=
False
)
else
:
groups
=
self
.
dcn
.
get
(
'groups'
,
1
)
deformable_groups
=
self
.
dcn
.
get
(
'deformable_groups'
,
1
)
if
not
self
.
with_modulated_dcn
:
conv_op
=
DeformConv
offset_channels
=
18
else
:
conv_op
=
ModulatedDeformConv
offset_channels
=
27
self
.
conv2_offset
=
nn
.
Conv2d
(
width
,
deformable_groups
*
offset_channels
,
kernel_size
=
3
,
stride
=
self
.
conv2_stride
,
padding
=
self
.
dilation
,
dilation
=
self
.
dilation
)
self
.
conv2
=
conv_op
(
width
,
width
,
kernel_size
=
3
,
stride
=
self
.
conv2_stride
,
padding
=
self
.
dilation
,
dilation
=
self
.
dilation
,
groups
=
groups
,
deformable_groups
=
deformable_groups
,
bias
=
False
)
self
.
add_module
(
self
.
norm2_name
,
norm2
)
self
.
conv3
=
nn
.
Conv2d
(
width
,
self
.
planes
*
self
.
expansion
,
kernel_size
=
1
,
bias
=
False
)
self
.
bn3
=
nn
.
BatchNorm2d
(
self
.
planes
*
self
.
expansion
)
self
.
add_module
(
self
.
norm3_name
,
norm3
)
def
make_res_layer
(
block
,
...
...
@@ -51,7 +93,9 @@ def make_res_layer(block,
groups
=
1
,
base_width
=
4
,
style
=
'pytorch'
,
with_cp
=
False
):
with_cp
=
False
,
normalize
=
dict
(
type
=
'BN'
),
dcn
=
None
):
downsample
=
None
if
stride
!=
1
or
inplanes
!=
planes
*
block
.
expansion
:
downsample
=
nn
.
Sequential
(
...
...
@@ -61,7 +105,7 @@ def make_res_layer(block,
kernel_size
=
1
,
stride
=
stride
,
bias
=
False
),
nn
.
BatchNorm2d
(
planes
*
block
.
expansion
),
build_norm_layer
(
normalize
,
planes
*
block
.
expansion
)
[
1
]
,
)
layers
=
[]
...
...
@@ -75,7 +119,9 @@ def make_res_layer(block,
groups
=
groups
,
base_width
=
base_width
,
style
=
style
,
with_cp
=
with_cp
))
with_cp
=
with_cp
,
normalize
=
normalize
,
dcn
=
dcn
))
inplanes
=
planes
*
block
.
expansion
for
i
in
range
(
1
,
blocks
):
layers
.
append
(
...
...
@@ -87,11 +133,14 @@ def make_res_layer(block,
groups
=
groups
,
base_width
=
base_width
,
style
=
style
,
with_cp
=
with_cp
))
with_cp
=
with_cp
,
normalize
=
normalize
,
dcn
=
dcn
))
return
nn
.
Sequential
(
*
layers
)
@
BACKBONES
.
register_module
class
ResNeXt
(
ResNet
):
"""ResNeXt backbone.
...
...
@@ -108,11 +157,14 @@ class ResNeXt(ResNet):
the first 1x1 conv layer.
frozen_stages (int): Stages to be frozen (all param fixed). -1 means
not freezing any parameters.
bn_eval (bool): Whether to set BN layers to eval mode, namely, freeze
running stats (mean and var).
bn_frozen (bool): Whether to freeze weight and bias of BN layers.
normalize (dict): dictionary to construct and config norm layer.
norm_eval (bool): Whether to set norm layers to eval mode, namely,
freeze running stats (mean and var). Note: Effect on Batch Norm
and its variants only.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed.
zero_init_residual (bool): whether to use zero init for last norm layer
in resblocks to let them behave as identity.
"""
arch_settings
=
{
...
...
@@ -131,6 +183,7 @@ class ResNeXt(ResNet):
for
i
,
num_blocks
in
enumerate
(
self
.
stage_blocks
):
stride
=
self
.
strides
[
i
]
dilation
=
self
.
dilations
[
i
]
dcn
=
self
.
dcn
if
self
.
stage_with_dcn
[
i
]
else
None
planes
=
64
*
2
**
i
res_layer
=
make_res_layer
(
self
.
block
,
...
...
@@ -142,8 +195,12 @@ class ResNeXt(ResNet):
groups
=
self
.
groups
,
base_width
=
self
.
base_width
,
style
=
self
.
style
,
with_cp
=
self
.
with_cp
)
with_cp
=
self
.
with_cp
,
normalize
=
self
.
normalize
,
dcn
=
dcn
)
self
.
inplanes
=
planes
*
self
.
block
.
expansion
layer_name
=
'layer{}'
.
format
(
i
+
1
)
self
.
add_module
(
layer_name
,
res_layer
)
self
.
res_layers
.
append
(
layer_name
)
self
.
_freeze_stages
()
mmdet/models/backbones/ssd_vgg.py
0 → 100644
View file @
441015ea
import
logging
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
mmcv.cnn
import
(
VGG
,
xavier_init
,
constant_init
,
kaiming_init
,
normal_init
)
from
mmcv.runner
import
load_checkpoint
from
..registry
import
BACKBONES
@
BACKBONES
.
register_module
class
SSDVGG
(
VGG
):
extra_setting
=
{
300
:
(
256
,
'S'
,
512
,
128
,
'S'
,
256
,
128
,
256
,
128
,
256
),
512
:
(
256
,
'S'
,
512
,
128
,
'S'
,
256
,
128
,
'S'
,
256
,
128
,
'S'
,
256
,
128
),
}
def
__init__
(
self
,
input_size
,
depth
,
with_last_pool
=
False
,
ceil_mode
=
True
,
out_indices
=
(
3
,
4
),
out_feature_indices
=
(
22
,
34
),
l2_norm_scale
=
20.
):
super
(
SSDVGG
,
self
).
__init__
(
depth
,
with_last_pool
=
with_last_pool
,
ceil_mode
=
ceil_mode
,
out_indices
=
out_indices
)
assert
input_size
in
(
300
,
512
)
self
.
input_size
=
input_size
self
.
features
.
add_module
(
str
(
len
(
self
.
features
)),
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
1
,
padding
=
1
))
self
.
features
.
add_module
(
str
(
len
(
self
.
features
)),
nn
.
Conv2d
(
512
,
1024
,
kernel_size
=
3
,
padding
=
6
,
dilation
=
6
))
self
.
features
.
add_module
(
str
(
len
(
self
.
features
)),
nn
.
ReLU
(
inplace
=
True
))
self
.
features
.
add_module
(
str
(
len
(
self
.
features
)),
nn
.
Conv2d
(
1024
,
1024
,
kernel_size
=
1
))
self
.
features
.
add_module
(
str
(
len
(
self
.
features
)),
nn
.
ReLU
(
inplace
=
True
))
self
.
out_feature_indices
=
out_feature_indices
self
.
inplanes
=
1024
self
.
extra
=
self
.
_make_extra_layers
(
self
.
extra_setting
[
input_size
])
self
.
l2_norm
=
L2Norm
(
self
.
features
[
out_feature_indices
[
0
]
-
1
].
out_channels
,
l2_norm_scale
)
def
init_weights
(
self
,
pretrained
=
None
):
if
isinstance
(
pretrained
,
str
):
logger
=
logging
.
getLogger
()
load_checkpoint
(
self
,
pretrained
,
strict
=
False
,
logger
=
logger
)
elif
pretrained
is
None
:
for
m
in
self
.
features
.
modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
kaiming_init
(
m
)
elif
isinstance
(
m
,
nn
.
BatchNorm2d
):
constant_init
(
m
,
1
)
elif
isinstance
(
m
,
nn
.
Linear
):
normal_init
(
m
,
std
=
0.01
)
else
:
raise
TypeError
(
'pretrained must be a str or None'
)
for
m
in
self
.
extra
.
modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
xavier_init
(
m
,
distribution
=
'uniform'
)
constant_init
(
self
.
l2_norm
,
self
.
l2_norm
.
scale
)
def
forward
(
self
,
x
):
outs
=
[]
for
i
,
layer
in
enumerate
(
self
.
features
):
x
=
layer
(
x
)
if
i
in
self
.
out_feature_indices
:
outs
.
append
(
x
)
for
i
,
layer
in
enumerate
(
self
.
extra
):
x
=
F
.
relu
(
layer
(
x
),
inplace
=
True
)
if
i
%
2
==
1
:
outs
.
append
(
x
)
outs
[
0
]
=
self
.
l2_norm
(
outs
[
0
])
if
len
(
outs
)
==
1
:
return
outs
[
0
]
else
:
return
tuple
(
outs
)
def
_make_extra_layers
(
self
,
outplanes
):
layers
=
[]
kernel_sizes
=
(
1
,
3
)
num_layers
=
0
outplane
=
None
for
i
in
range
(
len
(
outplanes
)):
if
self
.
inplanes
==
'S'
:
self
.
inplanes
=
outplane
continue
k
=
kernel_sizes
[
num_layers
%
2
]
if
outplanes
[
i
]
==
'S'
:
outplane
=
outplanes
[
i
+
1
]
conv
=
nn
.
Conv2d
(
self
.
inplanes
,
outplane
,
k
,
stride
=
2
,
padding
=
1
)
else
:
outplane
=
outplanes
[
i
]
conv
=
nn
.
Conv2d
(
self
.
inplanes
,
outplane
,
k
,
stride
=
1
,
padding
=
0
)
layers
.
append
(
conv
)
self
.
inplanes
=
outplanes
[
i
]
num_layers
+=
1
if
self
.
input_size
==
512
:
layers
.
append
(
nn
.
Conv2d
(
self
.
inplanes
,
256
,
4
,
padding
=
1
))
return
nn
.
Sequential
(
*
layers
)
class
L2Norm
(
nn
.
Module
):
def
__init__
(
self
,
n_dims
,
scale
=
20.
,
eps
=
1e-10
):
super
(
L2Norm
,
self
).
__init__
()
self
.
n_dims
=
n_dims
self
.
weight
=
nn
.
Parameter
(
torch
.
Tensor
(
self
.
n_dims
))
self
.
eps
=
eps
self
.
scale
=
scale
def
forward
(
self
,
x
):
norm
=
x
.
pow
(
2
).
sum
(
1
,
keepdim
=
True
).
sqrt
()
+
self
.
eps
return
self
.
weight
[
None
,
:,
None
,
None
].
expand_as
(
x
)
*
x
/
norm
mmdet/models/bbox_heads/bbox_head.py
View file @
441015ea
...
...
@@ -4,8 +4,10 @@ import torch.nn.functional as F
from
mmdet.core
import
(
delta2bbox
,
multiclass_nms
,
bbox_target
,
weighted_cross_entropy
,
weighted_smoothl1
,
accuracy
)
from
..registry
import
HEADS
@
HEADS
.
register_module
class
BBoxHead
(
nn
.
Module
):
"""Simplest RoI head, with only two fc layers for classification and
regression respectively"""
...
...
@@ -78,8 +80,14 @@ class BBoxHead(nn.Module):
target_stds
=
self
.
target_stds
)
return
cls_reg_targets
def
loss
(
self
,
cls_score
,
bbox_pred
,
labels
,
label_weights
,
bbox_targets
,
bbox_weights
,
reduce
=
True
):
def
loss
(
self
,
cls_score
,
bbox_pred
,
labels
,
label_weights
,
bbox_targets
,
bbox_weights
,
reduce
=
True
):
losses
=
dict
()
if
cls_score
is
not
None
:
losses
[
'loss_cls'
]
=
weighted_cross_entropy
(
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment