Commit 441015ea authored by Kai Chen's avatar Kai Chen
Browse files

Merge branch 'master' into pytorch-1.0

parents 2017c81e 3b6ae96d
...@@ -69,7 +69,7 @@ class MaxIoUAssigner(BaseAssigner): ...@@ -69,7 +69,7 @@ class MaxIoUAssigner(BaseAssigner):
if bboxes.shape[0] == 0 or gt_bboxes.shape[0] == 0: if bboxes.shape[0] == 0 or gt_bboxes.shape[0] == 0:
raise ValueError('No gt or bboxes') raise ValueError('No gt or bboxes')
bboxes = bboxes[:, :4] bboxes = bboxes[:, :4]
overlaps = bbox_overlaps(bboxes, gt_bboxes) overlaps = bbox_overlaps(gt_bboxes, bboxes)
if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and ( if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and (
gt_bboxes_ignore.numel() > 0): gt_bboxes_ignore.numel() > 0):
...@@ -88,8 +88,8 @@ class MaxIoUAssigner(BaseAssigner): ...@@ -88,8 +88,8 @@ class MaxIoUAssigner(BaseAssigner):
"""Assign w.r.t. the overlaps of bboxes with gts. """Assign w.r.t. the overlaps of bboxes with gts.
Args: Args:
overlaps (Tensor): Overlaps between n bboxes and k gt_bboxes, overlaps (Tensor): Overlaps between k gt_bboxes and n bboxes,
shape(n, k). shape(k, n).
gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ). gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).
Returns: Returns:
...@@ -98,19 +98,18 @@ class MaxIoUAssigner(BaseAssigner): ...@@ -98,19 +98,18 @@ class MaxIoUAssigner(BaseAssigner):
if overlaps.numel() == 0: if overlaps.numel() == 0:
raise ValueError('No gt or proposals') raise ValueError('No gt or proposals')
num_bboxes, num_gts = overlaps.size(0), overlaps.size(1) num_gts, num_bboxes = overlaps.size(0), overlaps.size(1)
# 1. assign -1 by default # 1. assign -1 by default
assigned_gt_inds = overlaps.new_full( assigned_gt_inds = overlaps.new_full(
(num_bboxes, ), -1, dtype=torch.long) (num_bboxes, ), -1, dtype=torch.long)
assert overlaps.size() == (num_bboxes, num_gts)
# for each anchor, which gt best overlaps with it # for each anchor, which gt best overlaps with it
# for each anchor, the max iou of all gts # for each anchor, the max iou of all gts
max_overlaps, argmax_overlaps = overlaps.max(dim=1) max_overlaps, argmax_overlaps = overlaps.max(dim=0)
# for each gt, which anchor best overlaps with it # for each gt, which anchor best overlaps with it
# for each gt, the max iou of all proposals # for each gt, the max iou of all proposals
gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=0) gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1)
# 2. assign negative: below # 2. assign negative: below
if isinstance(self.neg_iou_thr, float): if isinstance(self.neg_iou_thr, float):
...@@ -129,7 +128,7 @@ class MaxIoUAssigner(BaseAssigner): ...@@ -129,7 +128,7 @@ class MaxIoUAssigner(BaseAssigner):
for i in range(num_gts): for i in range(num_gts):
if gt_max_overlaps[i] >= self.min_pos_iou: if gt_max_overlaps[i] >= self.min_pos_iou:
if self.gt_max_assign_all: if self.gt_max_assign_all:
max_iou_inds = overlaps[:, i] == gt_max_overlaps[i] max_iou_inds = overlaps[i, :] == gt_max_overlaps[i]
assigned_gt_inds[max_iou_inds] = i + 1 assigned_gt_inds[max_iou_inds] = i + 1
else: else:
assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1 assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1
......
...@@ -16,7 +16,7 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): ...@@ -16,7 +16,7 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
foreground). foreground).
Returns: Returns:
ious(Tensor): shape (n, k) if is_aligned == False else shape (n, 1) ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1)
""" """
assert mode in ['iou', 'iof'] assert mode in ['iou', 'iof']
......
import mmcv
import numpy as np import numpy as np
from terminaltables import AsciiTable from terminaltables import AsciiTable
...@@ -234,8 +235,9 @@ def eval_map(det_results, ...@@ -234,8 +235,9 @@ def eval_map(det_results,
gt_ignore (list): gt ignore indicators of each image, a list of K array gt_ignore (list): gt ignore indicators of each image, a list of K array
scale_ranges (list, optional): [(min1, max1), (min2, max2), ...] scale_ranges (list, optional): [(min1, max1), (min2, max2), ...]
iou_thr (float): IoU threshold iou_thr (float): IoU threshold
dataset (None or str): dataset name, there are minor differences in dataset (None or str or list): dataset name or dataset classes, there
metrics for different datsets, e.g. "voc07", "imagenet_det", etc. are minor differences in metrics for different datsets, e.g.
"voc07", "imagenet_det", etc.
print_summary (bool): whether to print the mAP summary print_summary (bool): whether to print the mAP summary
Returns: Returns:
...@@ -333,7 +335,7 @@ def print_map_summary(mean_ap, results, dataset=None): ...@@ -333,7 +335,7 @@ def print_map_summary(mean_ap, results, dataset=None):
Args: Args:
mean_ap(float): calculated from `eval_map` mean_ap(float): calculated from `eval_map`
results(list): calculated from `eval_map` results(list): calculated from `eval_map`
dataset(None or str or list): dataset name. dataset(None or str or list): dataset name or dataset classes.
""" """
num_scales = len(results[0]['ap']) if isinstance(results[0]['ap'], num_scales = len(results[0]['ap']) if isinstance(results[0]['ap'],
np.ndarray) else 1 np.ndarray) else 1
...@@ -353,8 +355,10 @@ def print_map_summary(mean_ap, results, dataset=None): ...@@ -353,8 +355,10 @@ def print_map_summary(mean_ap, results, dataset=None):
if dataset is None: if dataset is None:
label_names = [str(i) for i in range(1, num_classes + 1)] label_names = [str(i) for i in range(1, num_classes + 1)]
else: elif mmcv.is_str(dataset):
label_names = get_classes(dataset) label_names = get_classes(dataset)
else:
label_names = dataset
if not isinstance(mean_ap, list): if not isinstance(mean_ap, list):
mean_ap = [mean_ap] mean_ap = [mean_ap]
......
...@@ -100,6 +100,8 @@ def accuracy(pred, target, topk=1): ...@@ -100,6 +100,8 @@ def accuracy(pred, target, topk=1):
if isinstance(topk, int): if isinstance(topk, int):
topk = (topk, ) topk = (topk, )
return_single = True return_single = True
else:
return_single = False
maxk = max(topk) maxk = max(topk)
_, pred_label = pred.topk(maxk, 1, True, True) _, pred_label = pred.topk(maxk, 1, True, True)
......
...@@ -6,9 +6,11 @@ from .loader import GroupSampler, DistributedGroupSampler, build_dataloader ...@@ -6,9 +6,11 @@ from .loader import GroupSampler, DistributedGroupSampler, build_dataloader
from .utils import to_tensor, random_scale, show_ann, get_dataset from .utils import to_tensor, random_scale, show_ann, get_dataset
from .concat_dataset import ConcatDataset from .concat_dataset import ConcatDataset
from .repeat_dataset import RepeatDataset from .repeat_dataset import RepeatDataset
from .extra_aug import ExtraAugmentation
__all__ = [ __all__ = [
'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'GroupSampler', 'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'GroupSampler',
'DistributedGroupSampler', 'build_dataloader', 'to_tensor', 'random_scale', 'DistributedGroupSampler', 'build_dataloader', 'to_tensor', 'random_scale',
'show_ann', 'get_dataset', 'ConcatDataset', 'RepeatDataset' 'show_ann', 'get_dataset', 'ConcatDataset', 'RepeatDataset',
'ExtraAugmentation'
] ]
...@@ -40,7 +40,7 @@ class CocoDataset(CustomDataset): ...@@ -40,7 +40,7 @@ class CocoDataset(CustomDataset):
img_id = self.img_infos[idx]['id'] img_id = self.img_infos[idx]['id']
ann_ids = self.coco.getAnnIds(imgIds=[img_id]) ann_ids = self.coco.getAnnIds(imgIds=[img_id])
ann_info = self.coco.loadAnns(ann_ids) ann_info = self.coco.loadAnns(ann_ids)
return self._parse_ann_info(ann_info) return self._parse_ann_info(ann_info, self.with_mask)
def _filter_imgs(self, min_size=32): def _filter_imgs(self, min_size=32):
"""Filter images too small or without ground truths.""" """Filter images too small or without ground truths."""
......
...@@ -8,6 +8,7 @@ from torch.utils.data import Dataset ...@@ -8,6 +8,7 @@ from torch.utils.data import Dataset
from .transforms import (ImageTransform, BboxTransform, MaskTransform, from .transforms import (ImageTransform, BboxTransform, MaskTransform,
Numpy2Tensor) Numpy2Tensor)
from .utils import to_tensor, random_scale from .utils import to_tensor, random_scale
from .extra_aug import ExtraAugmentation
class CustomDataset(Dataset): class CustomDataset(Dataset):
...@@ -46,9 +47,12 @@ class CustomDataset(Dataset): ...@@ -46,9 +47,12 @@ class CustomDataset(Dataset):
with_mask=True, with_mask=True,
with_crowd=True, with_crowd=True,
with_label=True, with_label=True,
extra_aug=None,
resize_keep_ratio=True,
test_mode=False): test_mode=False):
# prefix of images path # prefix of images path
self.img_prefix = img_prefix self.img_prefix = img_prefix
# load annotations (and proposals) # load annotations (and proposals)
self.img_infos = self.load_annotations(ann_file) self.img_infos = self.load_annotations(ann_file)
if proposal_file is not None: if proposal_file is not None:
...@@ -98,6 +102,15 @@ class CustomDataset(Dataset): ...@@ -98,6 +102,15 @@ class CustomDataset(Dataset):
self.mask_transform = MaskTransform() self.mask_transform = MaskTransform()
self.numpy2tensor = Numpy2Tensor() self.numpy2tensor = Numpy2Tensor()
# if use extra augmentation
if extra_aug is not None:
self.extra_aug = ExtraAugmentation(**extra_aug)
else:
self.extra_aug = None
# image rescale if keep ratio
self.resize_keep_ratio = resize_keep_ratio
def __len__(self): def __len__(self):
return len(self.img_infos) return len(self.img_infos)
...@@ -176,11 +189,17 @@ class CustomDataset(Dataset): ...@@ -176,11 +189,17 @@ class CustomDataset(Dataset):
if len(gt_bboxes) == 0: if len(gt_bboxes) == 0:
return None return None
# extra augmentation
if self.extra_aug is not None:
img, gt_bboxes, gt_labels = self.extra_aug(img, gt_bboxes,
gt_labels)
# apply transforms # apply transforms
flip = True if np.random.rand() < self.flip_ratio else False flip = True if np.random.rand() < self.flip_ratio else False
img_scale = random_scale(self.img_scales) # sample a scale img_scale = random_scale(self.img_scales) # sample a scale
img, img_shape, pad_shape, scale_factor = self.img_transform( img, img_shape, pad_shape, scale_factor = self.img_transform(
img, img_scale, flip) img, img_scale, flip, keep_ratio=self.resize_keep_ratio)
img = img.copy()
if self.proposals is not None: if self.proposals is not None:
proposals = self.bbox_transform(proposals, img_shape, scale_factor, proposals = self.bbox_transform(proposals, img_shape, scale_factor,
flip) flip)
...@@ -232,7 +251,7 @@ class CustomDataset(Dataset): ...@@ -232,7 +251,7 @@ class CustomDataset(Dataset):
def prepare_single(img, scale, flip, proposal=None): def prepare_single(img, scale, flip, proposal=None):
_img, img_shape, pad_shape, scale_factor = self.img_transform( _img, img_shape, pad_shape, scale_factor = self.img_transform(
img, scale, flip) img, scale, flip, keep_ratio=self.resize_keep_ratio)
_img = to_tensor(_img) _img = to_tensor(_img)
_img_meta = dict( _img_meta = dict(
ori_shape=(img_info['height'], img_info['width'], 3), ori_shape=(img_info['height'], img_info['width'], 3),
......
import mmcv
import numpy as np
from numpy import random
from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps
class PhotoMetricDistortion(object):
def __init__(self,
brightness_delta=32,
contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5),
hue_delta=18):
self.brightness_delta = brightness_delta
self.contrast_lower, self.contrast_upper = contrast_range
self.saturation_lower, self.saturation_upper = saturation_range
self.hue_delta = hue_delta
def __call__(self, img, boxes, labels):
# random brightness
if random.randint(2):
delta = random.uniform(-self.brightness_delta,
self.brightness_delta)
img += delta
# mode == 0 --> do random contrast first
# mode == 1 --> do random contrast last
mode = random.randint(2)
if mode == 1:
if random.randint(2):
alpha = random.uniform(self.contrast_lower,
self.contrast_upper)
img *= alpha
# convert color from BGR to HSV
img = mmcv.bgr2hsv(img)
# random saturation
if random.randint(2):
img[..., 1] *= random.uniform(self.saturation_lower,
self.saturation_upper)
# random hue
if random.randint(2):
img[..., 0] += random.uniform(-self.hue_delta, self.hue_delta)
img[..., 0][img[..., 0] > 360] -= 360
img[..., 0][img[..., 0] < 0] += 360
# convert color from HSV to BGR
img = mmcv.hsv2bgr(img)
# random contrast
if mode == 0:
if random.randint(2):
alpha = random.uniform(self.contrast_lower,
self.contrast_upper)
img *= alpha
# randomly swap channels
if random.randint(2):
img = img[..., random.permutation(3)]
return img, boxes, labels
class Expand(object):
def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)):
if to_rgb:
self.mean = mean[::-1]
else:
self.mean = mean
self.min_ratio, self.max_ratio = ratio_range
def __call__(self, img, boxes, labels):
if random.randint(2):
return img, boxes, labels
h, w, c = img.shape
ratio = random.uniform(self.min_ratio, self.max_ratio)
expand_img = np.full((int(h * ratio), int(w * ratio), c),
self.mean).astype(img.dtype)
left = int(random.uniform(0, w * ratio - w))
top = int(random.uniform(0, h * ratio - h))
expand_img[top:top + h, left:left + w] = img
img = expand_img
boxes += np.tile((left, top), 2)
return img, boxes, labels
class RandomCrop(object):
def __init__(self,
min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
min_crop_size=0.3):
# 1: return ori img
self.sample_mode = (1, *min_ious, 0)
self.min_crop_size = min_crop_size
def __call__(self, img, boxes, labels):
h, w, c = img.shape
while True:
mode = random.choice(self.sample_mode)
if mode == 1:
return img, boxes, labels
min_iou = mode
for i in range(50):
new_w = random.uniform(self.min_crop_size * w, w)
new_h = random.uniform(self.min_crop_size * h, h)
# h / w in [0.5, 2]
if new_h / new_w < 0.5 or new_h / new_w > 2:
continue
left = random.uniform(w - new_w)
top = random.uniform(h - new_h)
patch = np.array((int(left), int(top), int(left + new_w),
int(top + new_h)))
overlaps = bbox_overlaps(
patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1)
if overlaps.min() < min_iou:
continue
# center of boxes should inside the crop img
center = (boxes[:, :2] + boxes[:, 2:]) / 2
mask = (center[:, 0] > patch[0]) * (
center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * (
center[:, 1] < patch[3])
if not mask.any():
continue
boxes = boxes[mask]
labels = labels[mask]
# adjust boxes
img = img[patch[1]:patch[3], patch[0]:patch[2]]
boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:])
boxes[:, :2] = boxes[:, :2].clip(min=patch[:2])
boxes -= np.tile(patch[:2], 2)
return img, boxes, labels
class ExtraAugmentation(object):
def __init__(self,
photo_metric_distortion=None,
expand=None,
random_crop=None):
self.transforms = []
if photo_metric_distortion is not None:
self.transforms.append(
PhotoMetricDistortion(**photo_metric_distortion))
if expand is not None:
self.transforms.append(Expand(**expand))
if random_crop is not None:
self.transforms.append(RandomCrop(**random_crop))
def __call__(self, img, boxes, labels):
img = img.astype(np.float32)
for transform in self.transforms:
img, boxes, labels = transform(img, boxes, labels)
return img, boxes, labels
...@@ -25,8 +25,14 @@ class ImageTransform(object): ...@@ -25,8 +25,14 @@ class ImageTransform(object):
self.to_rgb = to_rgb self.to_rgb = to_rgb
self.size_divisor = size_divisor self.size_divisor = size_divisor
def __call__(self, img, scale, flip=False): def __call__(self, img, scale, flip=False, keep_ratio=True):
img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) if keep_ratio:
img, scale_factor = mmcv.imrescale(img, scale, return_scale=True)
else:
img, w_scale, h_scale = mmcv.imresize(
img, scale, return_scale=True)
scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
dtype=np.float32)
img_shape = img.shape img_shape = img.shape
img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb)
if flip: if flip:
......
from .detectors import (BaseDetector, TwoStageDetector, RPN, FastRCNN, from .backbones import * # noqa: F401,F403
FasterRCNN, MaskRCNN) from .necks import * # noqa: F401,F403
from .builder import (build_neck, build_rpn_head, build_roi_extractor, from .roi_extractors import * # noqa: F401,F403
build_bbox_head, build_mask_head, build_detector) from .anchor_heads import * # noqa: F401,F403
from .bbox_heads import * # noqa: F401,F403
from .mask_heads import * # noqa: F401,F403
from .detectors import * # noqa: F401,F403
from .registry import BACKBONES, NECKS, ROI_EXTRACTORS, HEADS, DETECTORS
from .builder import (build_backbone, build_neck, build_roi_extractor,
build_head, build_detector)
__all__ = [ __all__ = [
'BaseDetector', 'TwoStageDetector', 'RPN', 'FastRCNN', 'FasterRCNN', 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'HEADS', 'DETECTORS',
'MaskRCNN', 'build_backbone', 'build_neck', 'build_rpn_head', 'build_backbone', 'build_neck', 'build_roi_extractor', 'build_head',
'build_roi_extractor', 'build_bbox_head', 'build_mask_head',
'build_detector' 'build_detector'
] ]
from .anchor_head import AnchorHead
from .rpn_head import RPNHead
from .retina_head import RetinaHead
from .ssd_head import SSDHead
__all__ = ['AnchorHead', 'RPNHead', 'RetinaHead', 'SSDHead']
...@@ -3,114 +3,84 @@ from __future__ import division ...@@ -3,114 +3,84 @@ from __future__ import division
import numpy as np import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
from mmcv.cnn import normal_init
from mmdet.core import (AnchorGenerator, anchor_target, multi_apply, from mmdet.core import (AnchorGenerator, anchor_target, delta2bbox,
delta2bbox, weighted_smoothl1, multi_apply, weighted_cross_entropy, weighted_smoothl1,
weighted_binary_cross_entropy,
weighted_sigmoid_focal_loss, multiclass_nms) weighted_sigmoid_focal_loss, multiclass_nms)
from ..utils import normal_init, bias_init_with_prob from ..registry import HEADS
class RetinaHead(nn.Module): @HEADS.register_module
"""Head of RetinaNet. class AnchorHead(nn.Module):
"""Anchor-based head (RPN, RetinaNet, SSD, etc.).
/ cls_convs - retina_cls (3x3 conv)
input -
\ reg_convs - retina_reg (3x3 conv)
Args: Args:
in_channels (int): Number of channels in the input feature map. in_channels (int): Number of channels in the input feature map.
num_classes (int): Class number (including background). feat_channels (int): Number of channels of the feature map.
stacked_convs (int): Number of convolutional layers added for cls and anchor_scales (Iterable): Anchor scales.
reg branch.
feat_channels (int): Number of channels for the RPN feature map.
scales_per_octave (int): Number of anchor scales per octave.
octave_base_scale (int): Base octave scale. Anchor scales are computed
as `s*2^(i/n)`, for i in [0, n-1], where s is `octave_base_scale`
and n is `scales_per_octave`.
anchor_ratios (Iterable): Anchor aspect ratios. anchor_ratios (Iterable): Anchor aspect ratios.
anchor_strides (Iterable): Anchor strides. anchor_strides (Iterable): Anchor strides.
anchor_base_sizes (Iterable): Anchor base sizes.
target_means (Iterable): Mean values of regression targets. target_means (Iterable): Mean values of regression targets.
target_stds (Iterable): Std values of regression targets. target_stds (Iterable): Std values of regression targets.
use_sigmoid_cls (bool): Whether to use sigmoid loss for classification.
(softmax by default)
use_focal_loss (bool): Whether to use focal loss for classification.
""" # noqa: W605 """ # noqa: W605
def __init__(self, def __init__(self,
in_channels,
num_classes, num_classes,
stacked_convs=4, in_channels,
feat_channels=256, feat_channels=256,
octave_base_scale=4, anchor_scales=[8, 16, 32],
scales_per_octave=3,
anchor_ratios=[0.5, 1.0, 2.0], anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[8, 16, 32, 64, 128], anchor_strides=[4, 8, 16, 32, 64],
anchor_base_sizes=None, anchor_base_sizes=None,
target_means=(.0, .0, .0, .0), target_means=(.0, .0, .0, .0),
target_stds=(1.0, 1.0, 1.0, 1.0)): target_stds=(1.0, 1.0, 1.0, 1.0),
super(RetinaHead, self).__init__() use_sigmoid_cls=False,
use_focal_loss=False):
super(AnchorHead, self).__init__()
self.in_channels = in_channels self.in_channels = in_channels
self.num_classes = num_classes self.num_classes = num_classes
self.octave_base_scale = octave_base_scale self.feat_channels = feat_channels
self.scales_per_octave = scales_per_octave self.anchor_scales = anchor_scales
self.anchor_ratios = anchor_ratios self.anchor_ratios = anchor_ratios
self.anchor_strides = anchor_strides self.anchor_strides = anchor_strides
self.anchor_base_sizes = list( self.anchor_base_sizes = list(
anchor_strides) if anchor_base_sizes is None else anchor_base_sizes anchor_strides) if anchor_base_sizes is None else anchor_base_sizes
self.target_means = target_means self.target_means = target_means
self.target_stds = target_stds self.target_stds = target_stds
self.use_sigmoid_cls = use_sigmoid_cls
self.use_focal_loss = use_focal_loss
self.anchor_generators = [] self.anchor_generators = []
for anchor_base in self.anchor_base_sizes: for anchor_base in self.anchor_base_sizes:
octave_scales = np.array(
[2**(i / scales_per_octave) for i in range(scales_per_octave)])
anchor_scales = octave_scales * octave_base_scale
self.anchor_generators.append( self.anchor_generators.append(
AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) AnchorGenerator(anchor_base, anchor_scales, anchor_ratios))
self.relu = nn.ReLU(inplace=True)
self.num_anchors = int(
len(self.anchor_ratios) * self.scales_per_octave)
self.cls_out_channels = self.num_classes - 1
self.bbox_pred_dim = 4
self.stacked_convs = stacked_convs self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales)
self.cls_convs = nn.ModuleList() if self.use_sigmoid_cls:
self.reg_convs = nn.ModuleList() self.cls_out_channels = self.num_classes - 1
for i in range(self.stacked_convs): else:
chn = in_channels if i == 0 else feat_channels self.cls_out_channels = self.num_classes
self.cls_convs.append(
nn.Conv2d(chn, feat_channels, 3, stride=1, padding=1)) self._init_layers()
self.reg_convs.append(
nn.Conv2d(chn, feat_channels, 3, stride=1, padding=1)) def _init_layers(self):
self.retina_cls = nn.Conv2d( self.conv_cls = nn.Conv2d(self.feat_channels,
feat_channels, self.num_anchors * self.cls_out_channels, 1)
self.num_anchors * self.cls_out_channels, self.conv_reg = nn.Conv2d(self.feat_channels, self.num_anchors * 4, 1)
3,
stride=1,
padding=1)
self.retina_reg = nn.Conv2d(
feat_channels,
self.num_anchors * self.bbox_pred_dim,
3,
stride=1,
padding=1)
self.debug_imgs = None
def init_weights(self): def init_weights(self):
for m in self.cls_convs: normal_init(self.conv_cls, std=0.01)
normal_init(m, std=0.01) normal_init(self.conv_reg, std=0.01)
for m in self.reg_convs:
normal_init(m, std=0.01)
bias_cls = bias_init_with_prob(0.01)
normal_init(self.retina_cls, std=0.01, bias=bias_cls)
normal_init(self.retina_reg, std=0.01)
def forward_single(self, x): def forward_single(self, x):
cls_feat = x cls_score = self.conv_cls(x)
reg_feat = x bbox_pred = self.conv_reg(x)
for cls_conv in self.cls_convs:
cls_feat = self.relu(cls_conv(cls_feat))
for reg_conv in self.reg_convs:
reg_feat = self.relu(reg_conv(reg_feat))
cls_score = self.retina_cls(cls_feat)
bbox_pred = self.retina_reg(reg_feat)
return cls_score, bbox_pred return cls_score, bbox_pred
def forward(self, feats): def forward(self, feats):
...@@ -156,30 +126,47 @@ class RetinaHead(nn.Module): ...@@ -156,30 +126,47 @@ class RetinaHead(nn.Module):
return anchor_list, valid_flag_list return anchor_list, valid_flag_list
def loss_single(self, cls_score, bbox_pred, labels, label_weights, def loss_single(self, cls_score, bbox_pred, labels, label_weights,
bbox_targets, bbox_weights, num_pos_samples, cfg): bbox_targets, bbox_weights, num_total_samples, cfg):
# classification loss # classification loss
labels = labels.contiguous().view(-1, self.cls_out_channels) if self.use_sigmoid_cls:
label_weights = label_weights.contiguous().view( labels = labels.reshape(-1, self.cls_out_channels)
-1, self.cls_out_channels) label_weights = label_weights.reshape(-1, self.cls_out_channels)
cls_score = cls_score.permute(0, 2, 3, 1).contiguous().view( else:
labels = labels.reshape(-1)
label_weights = label_weights.reshape(-1)
cls_score = cls_score.permute(0, 2, 3, 1).reshape(
-1, self.cls_out_channels) -1, self.cls_out_channels)
loss_cls = weighted_sigmoid_focal_loss( if self.use_sigmoid_cls:
cls_score, if self.use_focal_loss:
labels, cls_criterion = weighted_sigmoid_focal_loss
label_weights, else:
cfg.gamma, cls_criterion = weighted_binary_cross_entropy
cfg.alpha, else:
avg_factor=num_pos_samples) if self.use_focal_loss:
raise NotImplementedError
else:
cls_criterion = weighted_cross_entropy
if self.use_focal_loss:
loss_cls = cls_criterion(
cls_score,
labels,
label_weights,
gamma=cfg.gamma,
alpha=cfg.alpha,
avg_factor=num_total_samples)
else:
loss_cls = cls_criterion(
cls_score, labels, label_weights, avg_factor=num_total_samples)
# regression loss # regression loss
bbox_targets = bbox_targets.contiguous().view(-1, 4) bbox_targets = bbox_targets.reshape(-1, 4)
bbox_weights = bbox_weights.contiguous().view(-1, 4) bbox_weights = bbox_weights.reshape(-1, 4)
bbox_pred = bbox_pred.permute(0, 2, 3, 1).contiguous().view(-1, 4) bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)
loss_reg = weighted_smoothl1( loss_reg = weighted_smoothl1(
bbox_pred, bbox_pred,
bbox_targets, bbox_targets,
bbox_weights, bbox_weights,
beta=cfg.smoothl1_beta, beta=cfg.smoothl1_beta,
avg_factor=num_pos_samples) avg_factor=num_total_samples)
return loss_cls, loss_reg return loss_cls, loss_reg
def loss(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas, def loss(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas,
...@@ -189,6 +176,8 @@ class RetinaHead(nn.Module): ...@@ -189,6 +176,8 @@ class RetinaHead(nn.Module):
anchor_list, valid_flag_list = self.get_anchors( anchor_list, valid_flag_list = self.get_anchors(
featmap_sizes, img_metas) featmap_sizes, img_metas)
sampling = False if self.use_focal_loss else True
label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1
cls_reg_targets = anchor_target( cls_reg_targets = anchor_target(
anchor_list, anchor_list,
valid_flag_list, valid_flag_list,
...@@ -198,13 +187,14 @@ class RetinaHead(nn.Module): ...@@ -198,13 +187,14 @@ class RetinaHead(nn.Module):
self.target_stds, self.target_stds,
cfg, cfg,
gt_labels_list=gt_labels, gt_labels_list=gt_labels,
cls_out_channels=self.cls_out_channels, label_channels=label_channels,
sampling=False) sampling=sampling)
if cls_reg_targets is None: if cls_reg_targets is None:
return None return None
(labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,
num_total_pos, num_total_neg) = cls_reg_targets num_total_pos, num_total_neg) = cls_reg_targets
num_total_samples = (num_total_pos if self.use_focal_loss else
num_total_pos + num_total_neg)
losses_cls, losses_reg = multi_apply( losses_cls, losses_reg = multi_apply(
self.loss_single, self.loss_single,
cls_scores, cls_scores,
...@@ -213,16 +203,12 @@ class RetinaHead(nn.Module): ...@@ -213,16 +203,12 @@ class RetinaHead(nn.Module):
label_weights_list, label_weights_list,
bbox_targets_list, bbox_targets_list,
bbox_weights_list, bbox_weights_list,
num_pos_samples=num_total_pos, num_total_samples=num_total_samples,
cfg=cfg) cfg=cfg)
return dict(loss_cls=losses_cls, loss_reg=losses_reg) return dict(loss_cls=losses_cls, loss_reg=losses_reg)
def get_det_bboxes(self, def get_bboxes(self, cls_scores, bbox_preds, img_metas, cfg,
cls_scores, rescale=False):
bbox_preds,
img_metas,
cfg,
rescale=False):
assert len(cls_scores) == len(bbox_preds) assert len(cls_scores) == len(bbox_preds)
num_levels = len(cls_scores) num_levels = len(cls_scores)
...@@ -231,7 +217,6 @@ class RetinaHead(nn.Module): ...@@ -231,7 +217,6 @@ class RetinaHead(nn.Module):
self.anchor_strides[i]) self.anchor_strides[i])
for i in range(num_levels) for i in range(num_levels)
] ]
result_list = [] result_list = []
for img_id in range(len(img_metas)): for img_id in range(len(img_metas)):
cls_score_list = [ cls_score_list = [
...@@ -242,46 +227,54 @@ class RetinaHead(nn.Module): ...@@ -242,46 +227,54 @@ class RetinaHead(nn.Module):
] ]
img_shape = img_metas[img_id]['img_shape'] img_shape = img_metas[img_id]['img_shape']
scale_factor = img_metas[img_id]['scale_factor'] scale_factor = img_metas[img_id]['scale_factor']
results = self._get_det_bboxes_single( proposals = self.get_bboxes_single(cls_score_list, bbox_pred_list,
cls_score_list, bbox_pred_list, mlvl_anchors, img_shape, mlvl_anchors, img_shape,
scale_factor, cfg, rescale) scale_factor, cfg, rescale)
result_list.append(results) result_list.append(proposals)
return result_list return result_list
def _get_det_bboxes_single(self, def get_bboxes_single(self,
cls_scores, cls_scores,
bbox_preds, bbox_preds,
mlvl_anchors, mlvl_anchors,
img_shape, img_shape,
scale_factor, scale_factor,
cfg, cfg,
rescale=False): rescale=False):
assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors) assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
mlvl_proposals = [] mlvl_bboxes = []
mlvl_scores = [] mlvl_scores = []
for cls_score, bbox_pred, anchors in zip(cls_scores, bbox_preds, for cls_score, bbox_pred, anchors in zip(cls_scores, bbox_preds,
mlvl_anchors): mlvl_anchors):
assert cls_score.size()[-2:] == bbox_pred.size()[-2:] assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
cls_score = cls_score.permute(1, 2, 0).contiguous().view( cls_score = cls_score.permute(1, 2, 0).reshape(
-1, self.cls_out_channels) -1, self.cls_out_channels)
scores = cls_score.sigmoid() if self.use_sigmoid_cls:
bbox_pred = bbox_pred.permute(1, 2, 0).contiguous().view(-1, 4) scores = cls_score.sigmoid()
proposals = delta2bbox(anchors, bbox_pred, self.target_means, else:
self.target_stds, img_shape) scores = cls_score.softmax(-1)
if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
maxscores, _ = scores.max(dim=1) nms_pre = cfg.get('nms_pre', -1)
_, topk_inds = maxscores.topk(cfg.nms_pre) if nms_pre > 0 and scores.shape[0] > nms_pre:
proposals = proposals[topk_inds, :] if self.use_sigmoid_cls:
max_scores, _ = scores.max(dim=1)
else:
max_scores, _ = scores[:, 1:].max(dim=1)
_, topk_inds = max_scores.topk(nms_pre)
anchors = anchors[topk_inds, :]
bbox_pred = bbox_pred[topk_inds, :]
scores = scores[topk_inds, :] scores = scores[topk_inds, :]
mlvl_proposals.append(proposals) bboxes = delta2bbox(anchors, bbox_pred, self.target_means,
self.target_stds, img_shape)
mlvl_bboxes.append(bboxes)
mlvl_scores.append(scores) mlvl_scores.append(scores)
mlvl_proposals = torch.cat(mlvl_proposals) mlvl_bboxes = torch.cat(mlvl_bboxes)
if rescale: if rescale:
mlvl_proposals /= scale_factor mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
mlvl_scores = torch.cat(mlvl_scores) mlvl_scores = torch.cat(mlvl_scores)
padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) if self.use_sigmoid_cls:
mlvl_scores = torch.cat([padding, mlvl_scores], dim=1) padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
det_bboxes, det_labels = multiclass_nms(mlvl_proposals, mlvl_scores, mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
cfg.score_thr, cfg.nms, det_bboxes, det_labels = multiclass_nms(
cfg.max_per_img) mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img)
return det_bboxes, det_labels return det_bboxes, det_labels
import numpy as np
import torch.nn as nn
from mmcv.cnn import normal_init
from .anchor_head import AnchorHead
from ..registry import HEADS
from ..utils import bias_init_with_prob
@HEADS.register_module
class RetinaHead(AnchorHead):
def __init__(self,
num_classes,
in_channels,
stacked_convs=4,
octave_base_scale=4,
scales_per_octave=3,
**kwargs):
self.stacked_convs = stacked_convs
self.octave_base_scale = octave_base_scale
self.scales_per_octave = scales_per_octave
octave_scales = np.array(
[2**(i / scales_per_octave) for i in range(scales_per_octave)])
anchor_scales = octave_scales * octave_base_scale
super(RetinaHead, self).__init__(
num_classes,
in_channels,
anchor_scales=anchor_scales,
use_sigmoid_cls=True,
use_focal_loss=True,
**kwargs)
def _init_layers(self):
self.relu = nn.ReLU(inplace=True)
self.cls_convs = nn.ModuleList()
self.reg_convs = nn.ModuleList()
for i in range(self.stacked_convs):
chn = self.in_channels if i == 0 else self.feat_channels
self.cls_convs.append(
nn.Conv2d(chn, self.feat_channels, 3, stride=1, padding=1))
self.reg_convs.append(
nn.Conv2d(chn, self.feat_channels, 3, stride=1, padding=1))
self.retina_cls = nn.Conv2d(
self.feat_channels,
self.num_anchors * self.cls_out_channels,
3,
padding=1)
self.retina_reg = nn.Conv2d(
self.feat_channels, self.num_anchors * 4, 3, padding=1)
def init_weights(self):
for m in self.cls_convs:
normal_init(m, std=0.01)
for m in self.reg_convs:
normal_init(m, std=0.01)
bias_cls = bias_init_with_prob(0.01)
normal_init(self.retina_cls, std=0.01, bias=bias_cls)
normal_init(self.retina_reg, std=0.01)
def forward_single(self, x):
cls_feat = x
reg_feat = x
for cls_conv in self.cls_convs:
cls_feat = self.relu(cls_conv(cls_feat))
for reg_conv in self.reg_convs:
reg_feat = self.relu(reg_conv(reg_feat))
cls_score = self.retina_cls(cls_feat)
bbox_pred = self.retina_reg(reg_feat)
return cls_score, bbox_pred
import torch
import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import normal_init
from mmdet.core import delta2bbox
from mmdet.ops import nms
from .anchor_head import AnchorHead
from ..registry import HEADS
@HEADS.register_module
class RPNHead(AnchorHead):
def __init__(self, in_channels, **kwargs):
super(RPNHead, self).__init__(2, in_channels, **kwargs)
def _init_layers(self):
self.rpn_conv = nn.Conv2d(
self.in_channels, self.feat_channels, 3, padding=1)
self.rpn_cls = nn.Conv2d(self.feat_channels,
self.num_anchors * self.cls_out_channels, 1)
self.rpn_reg = nn.Conv2d(self.feat_channels, self.num_anchors * 4, 1)
def init_weights(self):
normal_init(self.rpn_conv, std=0.01)
normal_init(self.rpn_cls, std=0.01)
normal_init(self.rpn_reg, std=0.01)
def forward_single(self, x):
x = self.rpn_conv(x)
x = F.relu(x, inplace=True)
rpn_cls_score = self.rpn_cls(x)
rpn_bbox_pred = self.rpn_reg(x)
return rpn_cls_score, rpn_bbox_pred
def loss(self, cls_scores, bbox_preds, gt_bboxes, img_metas, cfg):
losses = super(RPNHead, self).loss(cls_scores, bbox_preds, gt_bboxes,
None, img_metas, cfg)
return dict(
loss_rpn_cls=losses['loss_cls'], loss_rpn_reg=losses['loss_reg'])
def get_bboxes_single(self,
cls_scores,
bbox_preds,
mlvl_anchors,
img_shape,
scale_factor,
cfg,
rescale=False):
mlvl_proposals = []
for idx in range(len(cls_scores)):
rpn_cls_score = cls_scores[idx]
rpn_bbox_pred = bbox_preds[idx]
assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]
anchors = mlvl_anchors[idx]
rpn_cls_score = rpn_cls_score.permute(1, 2, 0)
if self.use_sigmoid_cls:
rpn_cls_score = rpn_cls_score.reshape(-1)
scores = rpn_cls_score.sigmoid()
else:
rpn_cls_score = rpn_cls_score.reshape(-1, 2)
scores = rpn_cls_score.softmax(dim=1)[:, 1]
rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4)
if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre:
_, topk_inds = scores.topk(cfg.nms_pre)
rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]
anchors = anchors[topk_inds, :]
scores = scores[topk_inds]
proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means,
self.target_stds, img_shape)
if cfg.min_bbox_size > 0:
w = proposals[:, 2] - proposals[:, 0] + 1
h = proposals[:, 3] - proposals[:, 1] + 1
valid_inds = torch.nonzero((w >= cfg.min_bbox_size) &
(h >= cfg.min_bbox_size)).squeeze()
proposals = proposals[valid_inds, :]
scores = scores[valid_inds]
proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1)
proposals, _ = nms(proposals, cfg.nms_thr)
proposals = proposals[:cfg.nms_post, :]
mlvl_proposals.append(proposals)
proposals = torch.cat(mlvl_proposals, 0)
if cfg.nms_across_levels:
proposals, _ = nms(proposals, cfg.nms_thr)
proposals = proposals[:cfg.max_num, :]
else:
scores = proposals[:, 4]
num = min(cfg.max_num, proposals.shape[0])
_, topk_inds = scores.topk(num)
proposals = proposals[topk_inds, :]
return proposals
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import xavier_init
from mmdet.core import (AnchorGenerator, anchor_target, weighted_smoothl1,
multi_apply)
from .anchor_head import AnchorHead
from ..registry import HEADS
@HEADS.register_module
class SSDHead(AnchorHead):
def __init__(self,
input_size=300,
num_classes=81,
in_channels=(512, 1024, 512, 256, 256, 256),
anchor_strides=(8, 16, 32, 64, 100, 300),
basesize_ratio_range=(0.1, 0.9),
anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]),
target_means=(.0, .0, .0, .0),
target_stds=(1.0, 1.0, 1.0, 1.0)):
super(AnchorHead, self).__init__()
self.input_size = input_size
self.num_classes = num_classes
self.in_channels = in_channels
self.cls_out_channels = num_classes
num_anchors = [len(ratios) * 2 + 2 for ratios in anchor_ratios]
reg_convs = []
cls_convs = []
for i in range(len(in_channels)):
reg_convs.append(
nn.Conv2d(
in_channels[i],
num_anchors[i] * 4,
kernel_size=3,
padding=1))
cls_convs.append(
nn.Conv2d(
in_channels[i],
num_anchors[i] * num_classes,
kernel_size=3,
padding=1))
self.reg_convs = nn.ModuleList(reg_convs)
self.cls_convs = nn.ModuleList(cls_convs)
min_ratio, max_ratio = basesize_ratio_range
min_ratio = int(min_ratio * 100)
max_ratio = int(max_ratio * 100)
step = int(np.floor(max_ratio - min_ratio) / (len(in_channels) - 2))
min_sizes = []
max_sizes = []
for r in range(int(min_ratio), int(max_ratio) + 1, step):
min_sizes.append(int(input_size * r / 100))
max_sizes.append(int(input_size * (r + step) / 100))
if input_size == 300:
if basesize_ratio_range[0] == 0.15: # SSD300 COCO
min_sizes.insert(0, int(input_size * 7 / 100))
max_sizes.insert(0, int(input_size * 15 / 100))
elif basesize_ratio_range[0] == 0.2: # SSD300 VOC
min_sizes.insert(0, int(input_size * 10 / 100))
max_sizes.insert(0, int(input_size * 20 / 100))
elif input_size == 512:
if basesize_ratio_range[0] == 0.1: # SSD512 COCO
min_sizes.insert(0, int(input_size * 4 / 100))
max_sizes.insert(0, int(input_size * 10 / 100))
elif basesize_ratio_range[0] == 0.15: # SSD512 VOC
min_sizes.insert(0, int(input_size * 7 / 100))
max_sizes.insert(0, int(input_size * 15 / 100))
self.anchor_generators = []
self.anchor_strides = anchor_strides
for k in range(len(anchor_strides)):
base_size = min_sizes[k]
stride = anchor_strides[k]
ctr = ((stride - 1) / 2., (stride - 1) / 2.)
scales = [1., np.sqrt(max_sizes[k] / min_sizes[k])]
ratios = [1.]
for r in anchor_ratios[k]:
ratios += [1 / r, r] # 4 or 6 ratio
anchor_generator = AnchorGenerator(
base_size, scales, ratios, scale_major=False, ctr=ctr)
indices = list(range(len(ratios)))
indices.insert(1, len(indices))
anchor_generator.base_anchors = torch.index_select(
anchor_generator.base_anchors, 0, torch.LongTensor(indices))
self.anchor_generators.append(anchor_generator)
self.target_means = target_means
self.target_stds = target_stds
self.use_sigmoid_cls = False
self.use_focal_loss = False
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
xavier_init(m, distribution='uniform', bias=0)
def forward(self, feats):
cls_scores = []
bbox_preds = []
for feat, reg_conv, cls_conv in zip(feats, self.reg_convs,
self.cls_convs):
cls_scores.append(cls_conv(feat))
bbox_preds.append(reg_conv(feat))
return cls_scores, bbox_preds
def loss_single(self, cls_score, bbox_pred, labels, label_weights,
bbox_targets, bbox_weights, num_total_samples, cfg):
loss_cls_all = F.cross_entropy(
cls_score, labels, reduction='none') * label_weights
pos_inds = (labels > 0).nonzero().view(-1)
neg_inds = (labels == 0).nonzero().view(-1)
num_pos_samples = pos_inds.size(0)
num_neg_samples = cfg.neg_pos_ratio * num_pos_samples
if num_neg_samples > neg_inds.size(0):
num_neg_samples = neg_inds.size(0)
topk_loss_cls_neg, _ = loss_cls_all[neg_inds].topk(num_neg_samples)
loss_cls_pos = loss_cls_all[pos_inds].sum()
loss_cls_neg = topk_loss_cls_neg.sum()
loss_cls = (loss_cls_pos + loss_cls_neg) / num_total_samples
loss_reg = weighted_smoothl1(
bbox_pred,
bbox_targets,
bbox_weights,
beta=cfg.smoothl1_beta,
avg_factor=num_total_samples)
return loss_cls[None], loss_reg
def loss(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas,
cfg):
featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
assert len(featmap_sizes) == len(self.anchor_generators)
anchor_list, valid_flag_list = self.get_anchors(
featmap_sizes, img_metas)
cls_reg_targets = anchor_target(
anchor_list,
valid_flag_list,
gt_bboxes,
img_metas,
self.target_means,
self.target_stds,
cfg,
gt_labels_list=gt_labels,
label_channels=1,
sampling=False,
unmap_outputs=False)
if cls_reg_targets is None:
return None
(labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,
num_total_pos, num_total_neg) = cls_reg_targets
num_images = len(img_metas)
all_cls_scores = torch.cat([
s.permute(0, 2, 3, 1).reshape(
num_images, -1, self.cls_out_channels) for s in cls_scores
], 1)
all_labels = torch.cat(labels_list, -1).view(num_images, -1)
all_label_weights = torch.cat(label_weights_list, -1).view(
num_images, -1)
all_bbox_preds = torch.cat([
b.permute(0, 2, 3, 1).reshape(num_images, -1, 4)
for b in bbox_preds
], -2)
all_bbox_targets = torch.cat(bbox_targets_list, -2).view(
num_images, -1, 4)
all_bbox_weights = torch.cat(bbox_weights_list, -2).view(
num_images, -1, 4)
losses_cls, losses_reg = multi_apply(
self.loss_single,
all_cls_scores,
all_bbox_preds,
all_labels,
all_label_weights,
all_bbox_targets,
all_bbox_weights,
num_total_samples=num_total_pos,
cfg=cfg)
return dict(loss_cls=losses_cls, loss_reg=losses_reg)
from .resnet import ResNet from .resnet import ResNet
from .resnext import ResNeXt from .resnext import ResNeXt
from .ssd_vgg import SSDVGG
__all__ = ['ResNet', 'ResNeXt'] __all__ = ['ResNet', 'ResNeXt', 'SSDVGG']
...@@ -6,6 +6,10 @@ import torch.utils.checkpoint as cp ...@@ -6,6 +6,10 @@ import torch.utils.checkpoint as cp
from mmcv.cnn import constant_init, kaiming_init from mmcv.cnn import constant_init, kaiming_init
from mmcv.runner import load_checkpoint from mmcv.runner import load_checkpoint
from mmdet.ops import DeformConv, ModulatedDeformConv
from ..registry import BACKBONES
from ..utils import build_norm_layer
def conv3x3(in_planes, out_planes, stride=1, dilation=1): def conv3x3(in_planes, out_planes, stride=1, dilation=1):
"3x3 convolution with padding" "3x3 convolution with padding"
...@@ -29,27 +33,41 @@ class BasicBlock(nn.Module): ...@@ -29,27 +33,41 @@ class BasicBlock(nn.Module):
dilation=1, dilation=1,
downsample=None, downsample=None,
style='pytorch', style='pytorch',
with_cp=False): with_cp=False,
normalize=dict(type='BN')):
super(BasicBlock, self).__init__() super(BasicBlock, self).__init__()
self.norm1_name, norm1 = build_norm_layer(normalize, planes, postfix=1)
self.norm2_name, norm2 = build_norm_layer(normalize, planes, postfix=2)
self.conv1 = conv3x3(inplanes, planes, stride, dilation) self.conv1 = conv3x3(inplanes, planes, stride, dilation)
self.bn1 = nn.BatchNorm2d(planes) self.add_module(self.norm1_name, norm1)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes) self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes) self.add_module(self.norm2_name, norm2)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample self.downsample = downsample
self.stride = stride self.stride = stride
self.dilation = dilation self.dilation = dilation
assert not with_cp assert not with_cp
@property
def norm1(self):
return getattr(self, self.norm1_name)
@property
def norm2(self):
return getattr(self, self.norm2_name)
def forward(self, x): def forward(self, x):
identity = x identity = x
out = self.conv1(x) out = self.conv1(x)
out = self.bn1(out) out = self.norm1(out)
out = self.relu(out) out = self.relu(out)
out = self.conv2(out) out = self.conv2(out)
out = self.bn2(out) out = self.norm2(out)
if self.downsample is not None: if self.downsample is not None:
identity = self.downsample(x) identity = self.downsample(x)
...@@ -70,46 +88,101 @@ class Bottleneck(nn.Module): ...@@ -70,46 +88,101 @@ class Bottleneck(nn.Module):
dilation=1, dilation=1,
downsample=None, downsample=None,
style='pytorch', style='pytorch',
with_cp=False): with_cp=False,
normalize=dict(type='BN'),
dcn=None):
"""Bottleneck block for ResNet. """Bottleneck block for ResNet.
If style is "pytorch", the stride-two layer is the 3x3 conv layer, If style is "pytorch", the stride-two layer is the 3x3 conv layer,
if it is "caffe", the stride-two layer is the first 1x1 conv layer. if it is "caffe", the stride-two layer is the first 1x1 conv layer.
""" """
super(Bottleneck, self).__init__() super(Bottleneck, self).__init__()
assert style in ['pytorch', 'caffe'] assert style in ['pytorch', 'caffe']
assert dcn is None or isinstance(dcn, dict)
self.inplanes = inplanes self.inplanes = inplanes
self.planes = planes self.planes = planes
self.normalize = normalize
self.dcn = dcn
self.with_dcn = dcn is not None
if style == 'pytorch': if style == 'pytorch':
self.conv1_stride = 1 self.conv1_stride = 1
self.conv2_stride = stride self.conv2_stride = stride
else: else:
self.conv1_stride = stride self.conv1_stride = stride
self.conv2_stride = 1 self.conv2_stride = 1
self.norm1_name, norm1 = build_norm_layer(normalize, planes, postfix=1)
self.norm2_name, norm2 = build_norm_layer(normalize, planes, postfix=2)
self.norm3_name, norm3 = build_norm_layer(
normalize, planes * self.expansion, postfix=3)
self.conv1 = nn.Conv2d( self.conv1 = nn.Conv2d(
inplanes, inplanes,
planes, planes,
kernel_size=1, kernel_size=1,
stride=self.conv1_stride, stride=self.conv1_stride,
bias=False) bias=False)
self.conv2 = nn.Conv2d( self.add_module(self.norm1_name, norm1)
planes, fallback_on_stride = False
planes, self.with_modulated_dcn = False
kernel_size=3, if self.with_dcn:
stride=self.conv2_stride, fallback_on_stride = dcn.get('fallback_on_stride', False)
padding=dilation, self.with_modulated_dcn = dcn.get('modulated', False)
dilation=dilation, if not self.with_dcn or fallback_on_stride:
bias=False) self.conv2 = nn.Conv2d(
planes,
self.bn1 = nn.BatchNorm2d(planes) planes,
self.bn2 = nn.BatchNorm2d(planes) kernel_size=3,
stride=self.conv2_stride,
padding=dilation,
dilation=dilation,
bias=False)
else:
deformable_groups = dcn.get('deformable_groups', 1)
if not self.with_modulated_dcn:
conv_op = DeformConv
offset_channels = 18
else:
conv_op = ModulatedDeformConv
offset_channels = 27
self.conv2_offset = nn.Conv2d(
planes,
deformable_groups * offset_channels,
kernel_size=3,
stride=self.conv2_stride,
padding=dilation,
dilation=dilation)
self.conv2 = conv_op(
planes,
planes,
kernel_size=3,
stride=self.conv2_stride,
padding=dilation,
dilation=dilation,
deformable_groups=deformable_groups,
bias=False)
self.add_module(self.norm2_name, norm2)
self.conv3 = nn.Conv2d( self.conv3 = nn.Conv2d(
planes, planes * self.expansion, kernel_size=1, bias=False) planes, planes * self.expansion, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion) self.add_module(self.norm3_name, norm3)
self.relu = nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
self.downsample = downsample self.downsample = downsample
self.stride = stride self.stride = stride
self.dilation = dilation self.dilation = dilation
self.with_cp = with_cp self.with_cp = with_cp
self.normalize = normalize
@property
def norm1(self):
return getattr(self, self.norm1_name)
@property
def norm2(self):
return getattr(self, self.norm2_name)
@property
def norm3(self):
return getattr(self, self.norm3_name)
def forward(self, x): def forward(self, x):
...@@ -117,15 +190,24 @@ class Bottleneck(nn.Module): ...@@ -117,15 +190,24 @@ class Bottleneck(nn.Module):
identity = x identity = x
out = self.conv1(x) out = self.conv1(x)
out = self.bn1(out) out = self.norm1(out)
out = self.relu(out) out = self.relu(out)
out = self.conv2(out) if not self.with_dcn:
out = self.bn2(out) out = self.conv2(out)
elif self.with_modulated_dcn:
offset_mask = self.conv2_offset(out)
offset = offset_mask[:, :18, :, :]
mask = offset_mask[:, -9:, :, :].sigmoid()
out = self.conv2(out, offset, mask)
else:
offset = self.conv2_offset(out)
out = self.conv2(out, offset)
out = self.norm2(out)
out = self.relu(out) out = self.relu(out)
out = self.conv3(out) out = self.conv3(out)
out = self.bn3(out) out = self.norm3(out)
if self.downsample is not None: if self.downsample is not None:
identity = self.downsample(x) identity = self.downsample(x)
...@@ -151,7 +233,9 @@ def make_res_layer(block, ...@@ -151,7 +233,9 @@ def make_res_layer(block,
stride=1, stride=1,
dilation=1, dilation=1,
style='pytorch', style='pytorch',
with_cp=False): with_cp=False,
normalize=dict(type='BN'),
dcn=None):
downsample = None downsample = None
if stride != 1 or inplanes != planes * block.expansion: if stride != 1 or inplanes != planes * block.expansion:
downsample = nn.Sequential( downsample = nn.Sequential(
...@@ -161,7 +245,7 @@ def make_res_layer(block, ...@@ -161,7 +245,7 @@ def make_res_layer(block,
kernel_size=1, kernel_size=1,
stride=stride, stride=stride,
bias=False), bias=False),
nn.BatchNorm2d(planes * block.expansion), build_norm_layer(normalize, planes * block.expansion)[1],
) )
layers = [] layers = []
...@@ -173,15 +257,26 @@ def make_res_layer(block, ...@@ -173,15 +257,26 @@ def make_res_layer(block,
dilation, dilation,
downsample, downsample,
style=style, style=style,
with_cp=with_cp)) with_cp=with_cp,
normalize=normalize,
dcn=dcn))
inplanes = planes * block.expansion inplanes = planes * block.expansion
for i in range(1, blocks): for i in range(1, blocks):
layers.append( layers.append(
block(inplanes, planes, 1, dilation, style=style, with_cp=with_cp)) block(
inplanes,
planes,
1,
dilation,
style=style,
with_cp=with_cp,
normalize=normalize,
dcn=dcn))
return nn.Sequential(*layers) return nn.Sequential(*layers)
@BACKBONES.register_module
class ResNet(nn.Module): class ResNet(nn.Module):
"""ResNet backbone. """ResNet backbone.
...@@ -196,11 +291,14 @@ class ResNet(nn.Module): ...@@ -196,11 +291,14 @@ class ResNet(nn.Module):
the first 1x1 conv layer. the first 1x1 conv layer.
frozen_stages (int): Stages to be frozen (all param fixed). -1 means frozen_stages (int): Stages to be frozen (all param fixed). -1 means
not freezing any parameters. not freezing any parameters.
bn_eval (bool): Whether to set BN layers to eval mode, namely, freeze normalize (dict): dictionary to construct and config norm layer.
running stats (mean and var). norm_eval (bool): Whether to set norm layers to eval mode, namely,
bn_frozen (bool): Whether to freeze weight and bias of BN layers. freeze running stats (mean and var). Note: Effect on Batch Norm
and its variants only.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed. memory while slowing down the training speed.
zero_init_residual (bool): whether to use zero init for last norm layer
in resblocks to let them behave as identity.
""" """
arch_settings = { arch_settings = {
...@@ -219,9 +317,12 @@ class ResNet(nn.Module): ...@@ -219,9 +317,12 @@ class ResNet(nn.Module):
out_indices=(0, 1, 2, 3), out_indices=(0, 1, 2, 3),
style='pytorch', style='pytorch',
frozen_stages=-1, frozen_stages=-1,
bn_eval=True, normalize=dict(type='BN', frozen=False),
bn_frozen=False, norm_eval=True,
with_cp=False): dcn=None,
stage_with_dcn=(False, False, False, False),
with_cp=False,
zero_init_residual=True):
super(ResNet, self).__init__() super(ResNet, self).__init__()
if depth not in self.arch_settings: if depth not in self.arch_settings:
raise KeyError('invalid depth {} for resnet'.format(depth)) raise KeyError('invalid depth {} for resnet'.format(depth))
...@@ -230,29 +331,29 @@ class ResNet(nn.Module): ...@@ -230,29 +331,29 @@ class ResNet(nn.Module):
assert num_stages >= 1 and num_stages <= 4 assert num_stages >= 1 and num_stages <= 4
self.strides = strides self.strides = strides
self.dilations = dilations self.dilations = dilations
assert len(strides) == len(dilations) == num_stages assert len(strides) == len(dilations) == len(
stage_with_dcn) == num_stages
self.out_indices = out_indices self.out_indices = out_indices
assert max(out_indices) < num_stages assert max(out_indices) < num_stages
self.style = style self.style = style
self.frozen_stages = frozen_stages self.frozen_stages = frozen_stages
self.bn_eval = bn_eval self.normalize = normalize
self.bn_frozen = bn_frozen
self.with_cp = with_cp self.with_cp = with_cp
self.norm_eval = norm_eval
self.dcn = dcn
self.stage_with_dcn = stage_with_dcn
self.zero_init_residual = zero_init_residual
self.block, stage_blocks = self.arch_settings[depth] self.block, stage_blocks = self.arch_settings[depth]
self.stage_blocks = stage_blocks[:num_stages] self.stage_blocks = stage_blocks[:num_stages]
self.inplanes = 64 self.inplanes = 64
self.conv1 = nn.Conv2d( self._make_stem_layer()
3, 64, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.res_layers = [] self.res_layers = []
for i, num_blocks in enumerate(self.stage_blocks): for i, num_blocks in enumerate(self.stage_blocks):
stride = strides[i] stride = strides[i]
dilation = dilations[i] dilation = dilations[i]
dcn = self.dcn if self.stage_with_dcn[i] else None
planes = 64 * 2**i planes = 64 * 2**i
res_layer = make_res_layer( res_layer = make_res_layer(
self.block, self.block,
...@@ -262,15 +363,43 @@ class ResNet(nn.Module): ...@@ -262,15 +363,43 @@ class ResNet(nn.Module):
stride=stride, stride=stride,
dilation=dilation, dilation=dilation,
style=self.style, style=self.style,
with_cp=with_cp) with_cp=with_cp,
normalize=normalize,
dcn=dcn)
self.inplanes = planes * self.block.expansion self.inplanes = planes * self.block.expansion
layer_name = 'layer{}'.format(i + 1) layer_name = 'layer{}'.format(i + 1)
self.add_module(layer_name, res_layer) self.add_module(layer_name, res_layer)
self.res_layers.append(layer_name) self.res_layers.append(layer_name)
self._freeze_stages()
self.feat_dim = self.block.expansion * 64 * 2**( self.feat_dim = self.block.expansion * 64 * 2**(
len(self.stage_blocks) - 1) len(self.stage_blocks) - 1)
@property
def norm1(self):
return getattr(self, self.norm1_name)
def _make_stem_layer(self):
self.conv1 = nn.Conv2d(
3, 64, kernel_size=7, stride=2, padding=3, bias=False)
self.norm1_name, norm1 = build_norm_layer(
self.normalize, 64, postfix=1)
self.add_module(self.norm1_name, norm1)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
def _freeze_stages(self):
if self.frozen_stages >= 0:
for m in [self.conv1, self.norm1]:
for param in m.parameters():
param.requires_grad = False
for i in range(1, self.frozen_stages + 1):
m = getattr(self, 'layer{}'.format(i))
for param in m.parameters():
param.requires_grad = False
def init_weights(self, pretrained=None): def init_weights(self, pretrained=None):
if isinstance(pretrained, str): if isinstance(pretrained, str):
logger = logging.getLogger() logger = logging.getLogger()
...@@ -279,14 +408,27 @@ class ResNet(nn.Module): ...@@ -279,14 +408,27 @@ class ResNet(nn.Module):
for m in self.modules(): for m in self.modules():
if isinstance(m, nn.Conv2d): if isinstance(m, nn.Conv2d):
kaiming_init(m) kaiming_init(m)
elif isinstance(m, nn.BatchNorm2d): elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
constant_init(m, 1) constant_init(m, 1)
if self.dcn is not None:
for m in self.modules():
if isinstance(m, Bottleneck) and hasattr(
m, 'conv2_offset'):
constant_init(m.conv2_offset, 0)
if self.zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
constant_init(m.norm3, 0)
elif isinstance(m, BasicBlock):
constant_init(m.norm2, 0)
else: else:
raise TypeError('pretrained must be a str or None') raise TypeError('pretrained must be a str or None')
def forward(self, x): def forward(self, x):
x = self.conv1(x) x = self.conv1(x)
x = self.bn1(x) x = self.norm1(x)
x = self.relu(x) x = self.relu(x)
x = self.maxpool(x) x = self.maxpool(x)
outs = [] outs = []
...@@ -302,23 +444,8 @@ class ResNet(nn.Module): ...@@ -302,23 +444,8 @@ class ResNet(nn.Module):
def train(self, mode=True): def train(self, mode=True):
super(ResNet, self).train(mode) super(ResNet, self).train(mode)
if self.bn_eval: if mode and self.norm_eval:
for m in self.modules(): for m in self.modules():
# trick: eval have effect on BatchNorm only
if isinstance(m, nn.BatchNorm2d): if isinstance(m, nn.BatchNorm2d):
m.eval() m.eval()
if self.bn_frozen:
for params in m.parameters():
params.requires_grad = False
if mode and self.frozen_stages >= 0:
for param in self.conv1.parameters():
param.requires_grad = False
for param in self.bn1.parameters():
param.requires_grad = False
self.bn1.eval()
self.bn1.weight.requires_grad = False
self.bn1.bias.requires_grad = False
for i in range(1, self.frozen_stages + 1):
mod = getattr(self, 'layer{}'.format(i))
mod.eval()
for param in mod.parameters():
param.requires_grad = False
...@@ -2,8 +2,11 @@ import math ...@@ -2,8 +2,11 @@ import math
import torch.nn as nn import torch.nn as nn
from .resnet import ResNet from mmdet.ops import DeformConv, ModulatedDeformConv
from .resnet import Bottleneck as _Bottleneck from .resnet import Bottleneck as _Bottleneck
from .resnet import ResNet
from ..registry import BACKBONES
from ..utils import build_norm_layer
class Bottleneck(_Bottleneck): class Bottleneck(_Bottleneck):
...@@ -20,26 +23,65 @@ class Bottleneck(_Bottleneck): ...@@ -20,26 +23,65 @@ class Bottleneck(_Bottleneck):
else: else:
width = math.floor(self.planes * (base_width / 64)) * groups width = math.floor(self.planes * (base_width / 64)) * groups
self.norm1_name, norm1 = build_norm_layer(
self.normalize, width, postfix=1)
self.norm2_name, norm2 = build_norm_layer(
self.normalize, width, postfix=2)
self.norm3_name, norm3 = build_norm_layer(
self.normalize, self.planes * self.expansion, postfix=3)
self.conv1 = nn.Conv2d( self.conv1 = nn.Conv2d(
self.inplanes, self.inplanes,
width, width,
kernel_size=1, kernel_size=1,
stride=self.conv1_stride, stride=self.conv1_stride,
bias=False) bias=False)
self.bn1 = nn.BatchNorm2d(width) self.add_module(self.norm1_name, norm1)
self.conv2 = nn.Conv2d( fallback_on_stride = False
width, self.with_modulated_dcn = False
width, if self.with_dcn:
kernel_size=3, fallback_on_stride = self.dcn.get('fallback_on_stride', False)
stride=self.conv2_stride, self.with_modulated_dcn = self.dcn.get('modulated', False)
padding=self.dilation, if not self.with_dcn or fallback_on_stride:
dilation=self.dilation, self.conv2 = nn.Conv2d(
groups=groups, width,
bias=False) width,
self.bn2 = nn.BatchNorm2d(width) kernel_size=3,
stride=self.conv2_stride,
padding=self.dilation,
dilation=self.dilation,
groups=groups,
bias=False)
else:
groups = self.dcn.get('groups', 1)
deformable_groups = self.dcn.get('deformable_groups', 1)
if not self.with_modulated_dcn:
conv_op = DeformConv
offset_channels = 18
else:
conv_op = ModulatedDeformConv
offset_channels = 27
self.conv2_offset = nn.Conv2d(
width,
deformable_groups * offset_channels,
kernel_size=3,
stride=self.conv2_stride,
padding=self.dilation,
dilation=self.dilation)
self.conv2 = conv_op(
width,
width,
kernel_size=3,
stride=self.conv2_stride,
padding=self.dilation,
dilation=self.dilation,
groups=groups,
deformable_groups=deformable_groups,
bias=False)
self.add_module(self.norm2_name, norm2)
self.conv3 = nn.Conv2d( self.conv3 = nn.Conv2d(
width, self.planes * self.expansion, kernel_size=1, bias=False) width, self.planes * self.expansion, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(self.planes * self.expansion) self.add_module(self.norm3_name, norm3)
def make_res_layer(block, def make_res_layer(block,
...@@ -51,7 +93,9 @@ def make_res_layer(block, ...@@ -51,7 +93,9 @@ def make_res_layer(block,
groups=1, groups=1,
base_width=4, base_width=4,
style='pytorch', style='pytorch',
with_cp=False): with_cp=False,
normalize=dict(type='BN'),
dcn=None):
downsample = None downsample = None
if stride != 1 or inplanes != planes * block.expansion: if stride != 1 or inplanes != planes * block.expansion:
downsample = nn.Sequential( downsample = nn.Sequential(
...@@ -61,7 +105,7 @@ def make_res_layer(block, ...@@ -61,7 +105,7 @@ def make_res_layer(block,
kernel_size=1, kernel_size=1,
stride=stride, stride=stride,
bias=False), bias=False),
nn.BatchNorm2d(planes * block.expansion), build_norm_layer(normalize, planes * block.expansion)[1],
) )
layers = [] layers = []
...@@ -75,7 +119,9 @@ def make_res_layer(block, ...@@ -75,7 +119,9 @@ def make_res_layer(block,
groups=groups, groups=groups,
base_width=base_width, base_width=base_width,
style=style, style=style,
with_cp=with_cp)) with_cp=with_cp,
normalize=normalize,
dcn=dcn))
inplanes = planes * block.expansion inplanes = planes * block.expansion
for i in range(1, blocks): for i in range(1, blocks):
layers.append( layers.append(
...@@ -87,11 +133,14 @@ def make_res_layer(block, ...@@ -87,11 +133,14 @@ def make_res_layer(block,
groups=groups, groups=groups,
base_width=base_width, base_width=base_width,
style=style, style=style,
with_cp=with_cp)) with_cp=with_cp,
normalize=normalize,
dcn=dcn))
return nn.Sequential(*layers) return nn.Sequential(*layers)
@BACKBONES.register_module
class ResNeXt(ResNet): class ResNeXt(ResNet):
"""ResNeXt backbone. """ResNeXt backbone.
...@@ -108,11 +157,14 @@ class ResNeXt(ResNet): ...@@ -108,11 +157,14 @@ class ResNeXt(ResNet):
the first 1x1 conv layer. the first 1x1 conv layer.
frozen_stages (int): Stages to be frozen (all param fixed). -1 means frozen_stages (int): Stages to be frozen (all param fixed). -1 means
not freezing any parameters. not freezing any parameters.
bn_eval (bool): Whether to set BN layers to eval mode, namely, freeze normalize (dict): dictionary to construct and config norm layer.
running stats (mean and var). norm_eval (bool): Whether to set norm layers to eval mode, namely,
bn_frozen (bool): Whether to freeze weight and bias of BN layers. freeze running stats (mean and var). Note: Effect on Batch Norm
and its variants only.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed. memory while slowing down the training speed.
zero_init_residual (bool): whether to use zero init for last norm layer
in resblocks to let them behave as identity.
""" """
arch_settings = { arch_settings = {
...@@ -131,6 +183,7 @@ class ResNeXt(ResNet): ...@@ -131,6 +183,7 @@ class ResNeXt(ResNet):
for i, num_blocks in enumerate(self.stage_blocks): for i, num_blocks in enumerate(self.stage_blocks):
stride = self.strides[i] stride = self.strides[i]
dilation = self.dilations[i] dilation = self.dilations[i]
dcn = self.dcn if self.stage_with_dcn[i] else None
planes = 64 * 2**i planes = 64 * 2**i
res_layer = make_res_layer( res_layer = make_res_layer(
self.block, self.block,
...@@ -142,8 +195,12 @@ class ResNeXt(ResNet): ...@@ -142,8 +195,12 @@ class ResNeXt(ResNet):
groups=self.groups, groups=self.groups,
base_width=self.base_width, base_width=self.base_width,
style=self.style, style=self.style,
with_cp=self.with_cp) with_cp=self.with_cp,
normalize=self.normalize,
dcn=dcn)
self.inplanes = planes * self.block.expansion self.inplanes = planes * self.block.expansion
layer_name = 'layer{}'.format(i + 1) layer_name = 'layer{}'.format(i + 1)
self.add_module(layer_name, res_layer) self.add_module(layer_name, res_layer)
self.res_layers.append(layer_name) self.res_layers.append(layer_name)
self._freeze_stages()
import logging
import torch
import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import (VGG, xavier_init, constant_init, kaiming_init,
normal_init)
from mmcv.runner import load_checkpoint
from ..registry import BACKBONES
@BACKBONES.register_module
class SSDVGG(VGG):
extra_setting = {
300: (256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256),
512: (256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256, 128),
}
def __init__(self,
input_size,
depth,
with_last_pool=False,
ceil_mode=True,
out_indices=(3, 4),
out_feature_indices=(22, 34),
l2_norm_scale=20.):
super(SSDVGG, self).__init__(
depth,
with_last_pool=with_last_pool,
ceil_mode=ceil_mode,
out_indices=out_indices)
assert input_size in (300, 512)
self.input_size = input_size
self.features.add_module(
str(len(self.features)),
nn.MaxPool2d(kernel_size=3, stride=1, padding=1))
self.features.add_module(
str(len(self.features)),
nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6))
self.features.add_module(
str(len(self.features)), nn.ReLU(inplace=True))
self.features.add_module(
str(len(self.features)), nn.Conv2d(1024, 1024, kernel_size=1))
self.features.add_module(
str(len(self.features)), nn.ReLU(inplace=True))
self.out_feature_indices = out_feature_indices
self.inplanes = 1024
self.extra = self._make_extra_layers(self.extra_setting[input_size])
self.l2_norm = L2Norm(
self.features[out_feature_indices[0] - 1].out_channels,
l2_norm_scale)
def init_weights(self, pretrained=None):
if isinstance(pretrained, str):
logger = logging.getLogger()
load_checkpoint(self, pretrained, strict=False, logger=logger)
elif pretrained is None:
for m in self.features.modules():
if isinstance(m, nn.Conv2d):
kaiming_init(m)
elif isinstance(m, nn.BatchNorm2d):
constant_init(m, 1)
elif isinstance(m, nn.Linear):
normal_init(m, std=0.01)
else:
raise TypeError('pretrained must be a str or None')
for m in self.extra.modules():
if isinstance(m, nn.Conv2d):
xavier_init(m, distribution='uniform')
constant_init(self.l2_norm, self.l2_norm.scale)
def forward(self, x):
outs = []
for i, layer in enumerate(self.features):
x = layer(x)
if i in self.out_feature_indices:
outs.append(x)
for i, layer in enumerate(self.extra):
x = F.relu(layer(x), inplace=True)
if i % 2 == 1:
outs.append(x)
outs[0] = self.l2_norm(outs[0])
if len(outs) == 1:
return outs[0]
else:
return tuple(outs)
def _make_extra_layers(self, outplanes):
layers = []
kernel_sizes = (1, 3)
num_layers = 0
outplane = None
for i in range(len(outplanes)):
if self.inplanes == 'S':
self.inplanes = outplane
continue
k = kernel_sizes[num_layers % 2]
if outplanes[i] == 'S':
outplane = outplanes[i + 1]
conv = nn.Conv2d(
self.inplanes, outplane, k, stride=2, padding=1)
else:
outplane = outplanes[i]
conv = nn.Conv2d(
self.inplanes, outplane, k, stride=1, padding=0)
layers.append(conv)
self.inplanes = outplanes[i]
num_layers += 1
if self.input_size == 512:
layers.append(nn.Conv2d(self.inplanes, 256, 4, padding=1))
return nn.Sequential(*layers)
class L2Norm(nn.Module):
def __init__(self, n_dims, scale=20., eps=1e-10):
super(L2Norm, self).__init__()
self.n_dims = n_dims
self.weight = nn.Parameter(torch.Tensor(self.n_dims))
self.eps = eps
self.scale = scale
def forward(self, x):
norm = x.pow(2).sum(1, keepdim=True).sqrt() + self.eps
return self.weight[None, :, None, None].expand_as(x) * x / norm
...@@ -4,8 +4,10 @@ import torch.nn.functional as F ...@@ -4,8 +4,10 @@ import torch.nn.functional as F
from mmdet.core import (delta2bbox, multiclass_nms, bbox_target, from mmdet.core import (delta2bbox, multiclass_nms, bbox_target,
weighted_cross_entropy, weighted_smoothl1, accuracy) weighted_cross_entropy, weighted_smoothl1, accuracy)
from ..registry import HEADS
@HEADS.register_module
class BBoxHead(nn.Module): class BBoxHead(nn.Module):
"""Simplest RoI head, with only two fc layers for classification and """Simplest RoI head, with only two fc layers for classification and
regression respectively""" regression respectively"""
...@@ -78,8 +80,14 @@ class BBoxHead(nn.Module): ...@@ -78,8 +80,14 @@ class BBoxHead(nn.Module):
target_stds=self.target_stds) target_stds=self.target_stds)
return cls_reg_targets return cls_reg_targets
def loss(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, def loss(self,
bbox_weights, reduce=True): cls_score,
bbox_pred,
labels,
label_weights,
bbox_targets,
bbox_weights,
reduce=True):
losses = dict() losses = dict()
if cls_score is not None: if cls_score is not None:
losses['loss_cls'] = weighted_cross_entropy( losses['loss_cls'] = weighted_cross_entropy(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment