Commit 76168f9c authored by ThangVu's avatar ThangVu
Browse files

resolve conflict GN-dev with master

parents 8a086f02 c5d8f002
from .custom import CustomDataset
from .xml_style import XMLDataset
from .coco import CocoDataset
from .voc import VOCDataset
from .loader import GroupSampler, DistributedGroupSampler, build_dataloader
from .utils import to_tensor, random_scale, show_ann, get_dataset
from .concat_dataset import ConcatDataset
from .repeat_dataset import RepeatDataset
from .extra_aug import ExtraAugmentation
__all__ = [
'CustomDataset', 'CocoDataset', 'GroupSampler', 'DistributedGroupSampler',
'build_dataloader', 'to_tensor', 'random_scale', 'show_ann',
'get_dataset', 'ConcatDataset', 'RepeatDataset',
'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'GroupSampler',
'DistributedGroupSampler', 'build_dataloader', 'to_tensor', 'random_scale',
'show_ann', 'get_dataset', 'ConcatDataset', 'RepeatDataset',
'ExtraAugmentation'
]
......@@ -6,6 +6,21 @@ from .custom import CustomDataset
class CocoDataset(CustomDataset):
CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush')
def load_annotations(self, ann_file):
self.coco = COCO(ann_file)
self.cat_ids = self.coco.getCatIds()
......
......@@ -3,16 +3,18 @@ from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
class ConcatDataset(_ConcatDataset):
"""
Same as torch.utils.data.dataset.ConcatDataset, but
"""A wrapper of concatenated dataset.
Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but
concat the group flag for image aspect ratio.
Args:
datasets (list[:obj:`Dataset`]): A list of datasets.
"""
def __init__(self, datasets):
"""
flag: Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0.
"""
super(ConcatDataset, self).__init__(datasets)
self.CLASSES = datasets[0].CLASSES
if hasattr(datasets[0], 'flag'):
flags = []
for i in range(0, len(datasets)):
......
......@@ -8,6 +8,7 @@ from torch.utils.data import Dataset
from .transforms import (ImageTransform, BboxTransform, MaskTransform,
Numpy2Tensor)
from .utils import to_tensor, random_scale
from .extra_aug import ExtraAugmentation
class CustomDataset(Dataset):
......@@ -32,6 +33,8 @@ class CustomDataset(Dataset):
The `ann` field is optional for testing.
"""
CLASSES = None
def __init__(self,
ann_file,
img_prefix,
......@@ -44,7 +47,12 @@ class CustomDataset(Dataset):
with_mask=True,
with_crowd=True,
with_label=True,
extra_aug=None,
resize_keep_ratio=True,
test_mode=False):
# prefix of images path
self.img_prefix = img_prefix
# load annotations (and proposals)
self.img_infos = self.load_annotations(ann_file)
if proposal_file is not None:
......@@ -58,8 +66,6 @@ class CustomDataset(Dataset):
if self.proposals is not None:
self.proposals = [self.proposals[i] for i in valid_inds]
# prefix of images path
self.img_prefix = img_prefix
# (long_edge, short_edge) or [(long1, short1), (long2, short2), ...]
self.img_scales = img_scale if isinstance(img_scale,
list) else [img_scale]
......@@ -96,6 +102,15 @@ class CustomDataset(Dataset):
self.mask_transform = MaskTransform()
self.numpy2tensor = Numpy2Tensor()
# if use extra augmentation
if extra_aug is not None:
self.extra_aug = ExtraAugmentation(**extra_aug)
else:
self.extra_aug = None
# image rescale if keep ratio
self.resize_keep_ratio = resize_keep_ratio
def __len__(self):
return len(self.img_infos)
......@@ -174,11 +189,17 @@ class CustomDataset(Dataset):
if len(gt_bboxes) == 0:
return None
# extra augmentation
if self.extra_aug is not None:
img, gt_bboxes, gt_labels = self.extra_aug(img, gt_bboxes,
gt_labels)
# apply transforms
flip = True if np.random.rand() < self.flip_ratio else False
img_scale = random_scale(self.img_scales) # sample a scale
img, img_shape, pad_shape, scale_factor = self.img_transform(
img, img_scale, flip)
img, img_scale, flip, keep_ratio=self.resize_keep_ratio)
img = img.copy()
if self.proposals is not None:
proposals = self.bbox_transform(proposals, img_shape, scale_factor,
flip)
......@@ -230,7 +251,7 @@ class CustomDataset(Dataset):
def prepare_single(img, scale, flip, proposal=None):
_img, img_shape, pad_shape, scale_factor = self.img_transform(
img, scale, flip)
img, scale, flip, keep_ratio=self.resize_keep_ratio)
_img = to_tensor(_img)
_img_meta = dict(
ori_shape=(img_info['height'], img_info['width'], 3),
......
import mmcv
import numpy as np
from numpy import random
from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps
class PhotoMetricDistortion(object):
def __init__(self,
brightness_delta=32,
contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5),
hue_delta=18):
self.brightness_delta = brightness_delta
self.contrast_lower, self.contrast_upper = contrast_range
self.saturation_lower, self.saturation_upper = saturation_range
self.hue_delta = hue_delta
def __call__(self, img, boxes, labels):
# random brightness
if random.randint(2):
delta = random.uniform(-self.brightness_delta,
self.brightness_delta)
img += delta
# mode == 0 --> do random contrast first
# mode == 1 --> do random contrast last
mode = random.randint(2)
if mode == 1:
if random.randint(2):
alpha = random.uniform(self.contrast_lower,
self.contrast_upper)
img *= alpha
# convert color from BGR to HSV
img = mmcv.bgr2hsv(img)
# random saturation
if random.randint(2):
img[..., 1] *= random.uniform(self.saturation_lower,
self.saturation_upper)
# random hue
if random.randint(2):
img[..., 0] += random.uniform(-self.hue_delta, self.hue_delta)
img[..., 0][img[..., 0] > 360] -= 360
img[..., 0][img[..., 0] < 0] += 360
# convert color from HSV to BGR
img = mmcv.hsv2bgr(img)
# random contrast
if mode == 0:
if random.randint(2):
alpha = random.uniform(self.contrast_lower,
self.contrast_upper)
img *= alpha
# randomly swap channels
if random.randint(2):
img = img[..., random.permutation(3)]
return img, boxes, labels
class Expand(object):
def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)):
if to_rgb:
self.mean = mean[::-1]
else:
self.mean = mean
self.min_ratio, self.max_ratio = ratio_range
def __call__(self, img, boxes, labels):
if random.randint(2):
return img, boxes, labels
h, w, c = img.shape
ratio = random.uniform(self.min_ratio, self.max_ratio)
expand_img = np.full((int(h * ratio), int(w * ratio), c),
self.mean).astype(img.dtype)
left = int(random.uniform(0, w * ratio - w))
top = int(random.uniform(0, h * ratio - h))
expand_img[top:top + h, left:left + w] = img
img = expand_img
boxes += np.tile((left, top), 2)
return img, boxes, labels
class RandomCrop(object):
def __init__(self,
min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
min_crop_size=0.3):
# 1: return ori img
self.sample_mode = (1, *min_ious, 0)
self.min_crop_size = min_crop_size
def __call__(self, img, boxes, labels):
h, w, c = img.shape
while True:
mode = random.choice(self.sample_mode)
if mode == 1:
return img, boxes, labels
min_iou = mode
for i in range(50):
new_w = random.uniform(self.min_crop_size * w, w)
new_h = random.uniform(self.min_crop_size * h, h)
# h / w in [0.5, 2]
if new_h / new_w < 0.5 or new_h / new_w > 2:
continue
left = random.uniform(w - new_w)
top = random.uniform(h - new_h)
patch = np.array((int(left), int(top), int(left + new_w),
int(top + new_h)))
overlaps = bbox_overlaps(
patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1)
if overlaps.min() < min_iou:
continue
# center of boxes should inside the crop img
center = (boxes[:, :2] + boxes[:, 2:]) / 2
mask = (center[:, 0] > patch[0]) * (
center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * (
center[:, 1] < patch[3])
if not mask.any():
continue
boxes = boxes[mask]
labels = labels[mask]
# adjust boxes
img = img[patch[1]:patch[3], patch[0]:patch[2]]
boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:])
boxes[:, :2] = boxes[:, :2].clip(min=patch[:2])
boxes -= np.tile(patch[:2], 2)
return img, boxes, labels
class ExtraAugmentation(object):
def __init__(self,
photo_metric_distortion=None,
expand=None,
random_crop=None):
self.transforms = []
if photo_metric_distortion is not None:
self.transforms.append(
PhotoMetricDistortion(**photo_metric_distortion))
if expand is not None:
self.transforms.append(Expand(**expand))
if random_crop is not None:
self.transforms.append(RandomCrop(**random_crop))
def __call__(self, img, boxes, labels):
img = img.astype(np.float32)
for transform in self.transforms:
img, boxes, labels = transform(img, boxes, labels)
return img, boxes, labels
......@@ -6,12 +6,14 @@ class RepeatDataset(object):
def __init__(self, dataset, times):
self.dataset = dataset
self.times = times
self.CLASSES = dataset.CLASSES
if hasattr(self.dataset, 'flag'):
self.flag = np.tile(self.dataset.flag, times)
self._original_length = len(self.dataset)
self._ori_len = len(self.dataset)
def __getitem__(self, idx):
return self.dataset[idx % self._original_length]
return self.dataset[idx % self._ori_len]
def __len__(self):
return self.times * self._original_length
return self.times * self._ori_len
......@@ -25,8 +25,14 @@ class ImageTransform(object):
self.to_rgb = to_rgb
self.size_divisor = size_divisor
def __call__(self, img, scale, flip=False):
img, scale_factor = mmcv.imrescale(img, scale, return_scale=True)
def __call__(self, img, scale, flip=False, keep_ratio=True):
if keep_ratio:
img, scale_factor = mmcv.imrescale(img, scale, return_scale=True)
else:
img, w_scale, h_scale = mmcv.imresize(
img, scale, return_scale=True)
scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
dtype=np.float32)
img_shape = img.shape
img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb)
if flip:
......
from .xml_style import XMLDataset
class VOCDataset(XMLDataset):
CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
'tvmonitor')
def __init__(self, **kwargs):
super(VOCDataset, self).__init__(**kwargs)
if 'VOC2007' in self.img_prefix:
self.year = 2007
elif 'VOC2012' in self.img_prefix:
self.year = 2012
else:
raise ValueError('Cannot infer dataset year from img_prefix')
import os.path as osp
import xml.etree.ElementTree as ET
import mmcv
import numpy as np
from .custom import CustomDataset
class XMLDataset(CustomDataset):
def __init__(self, **kwargs):
super(XMLDataset, self).__init__(**kwargs)
self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)}
def load_annotations(self, ann_file):
img_infos = []
img_ids = mmcv.list_from_file(ann_file)
for img_id in img_ids:
filename = 'JPEGImages/{}.jpg'.format(img_id)
xml_path = osp.join(self.img_prefix, 'Annotations',
'{}.xml'.format(img_id))
tree = ET.parse(xml_path)
root = tree.getroot()
size = root.find('size')
width = int(size.find('width').text)
height = int(size.find('height').text)
img_infos.append(
dict(id=img_id, filename=filename, width=width, height=height))
return img_infos
def get_ann_info(self, idx):
img_id = self.img_infos[idx]['id']
xml_path = osp.join(self.img_prefix, 'Annotations',
'{}.xml'.format(img_id))
tree = ET.parse(xml_path)
root = tree.getroot()
bboxes = []
labels = []
bboxes_ignore = []
labels_ignore = []
for obj in root.findall('object'):
name = obj.find('name').text
label = self.cat2label[name]
difficult = int(obj.find('difficult').text)
bnd_box = obj.find('bndbox')
bbox = [
int(bnd_box.find('xmin').text),
int(bnd_box.find('ymin').text),
int(bnd_box.find('xmax').text),
int(bnd_box.find('ymax').text)
]
if difficult:
bboxes_ignore.append(bbox)
labels_ignore.append(label)
else:
bboxes.append(bbox)
labels.append(label)
if not bboxes:
bboxes = np.zeros((0, 4))
labels = np.zeros((0, ))
else:
bboxes = np.array(bboxes, ndmin=2) - 1
labels = np.array(labels)
if not bboxes_ignore:
bboxes_ignore = np.zeros((0, 4))
labels_ignore = np.zeros((0, ))
else:
bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1
labels_ignore = np.array(labels_ignore)
ann = dict(
bboxes=bboxes.astype(np.float32),
labels=labels.astype(np.int64),
bboxes_ignore=bboxes_ignore.astype(np.float32),
labels_ignore=labels_ignore.astype(np.int64))
return ann
from .resnet import ResNet
from .resnext import ResNeXt
from .ssd_vgg import SSDVGG
__all__ = ['ResNet']
__all__ = ['ResNet', 'ResNeXt', 'SSDVGG']
......@@ -53,7 +53,7 @@ class BasicBlock(nn.Module):
assert not with_cp
def forward(self, x):
residual = x
identity = x
out = self.conv1(x)
out = getattr(self, self.norm_names[0])(out)
......@@ -63,9 +63,9 @@ class BasicBlock(nn.Module):
out = getattr(self, self.norm_names[1])(out)
if self.downsample is not None:
residual = self.downsample(x)
identity = self.downsample(x)
out += residual
out += identity
out = self.relu(out)
return out
......@@ -83,25 +83,31 @@ class Bottleneck(nn.Module):
style='pytorch',
with_cp=False,
normalize=dict(type='BN')):
"""Bottleneck block.
"""Bottleneck block for ResNet.
If style is "pytorch", the stride-two layer is the 3x3 conv layer,
if it is "caffe", the stride-two layer is the first 1x1 conv layer.
"""
super(Bottleneck, self).__init__()
assert style in ['pytorch', 'caffe']
self.inplanes = inplanes
self.planes = planes
if style == 'pytorch':
conv1_stride = 1
conv2_stride = stride
self.conv1_stride = 1
self.conv2_stride = stride
else:
conv1_stride = stride
conv2_stride = 1
self.conv1_stride = stride
self.conv2_stride = 1
self.conv1 = nn.Conv2d(
inplanes, planes, kernel_size=1, stride=conv1_stride, bias=False)
inplanes,
planes,
kernel_size=1,
stride=self.conv1_stride,
bias=False)
self.conv2 = nn.Conv2d(
planes,
planes,
kernel_size=3,
stride=conv2_stride,
stride=self.conv2_stride,
padding=dilation,
dilation=dilation,
bias=False)
......@@ -127,7 +133,7 @@ class Bottleneck(nn.Module):
def forward(self, x):
def _inner_forward(x):
residual = x
identity = x
out = self.conv1(x)
out = getattr(self, self.norm_names[0])(out)
......@@ -141,9 +147,9 @@ class Bottleneck(nn.Module):
out = getattr(self, self.norm_names[2])(out)
if self.downsample is not None:
residual = self.downsample(x)
identity = self.downsample(x)
out += residual
out += identity
return out
......@@ -243,11 +249,19 @@ class ResNet(nn.Module):
super(ResNet, self).__init__()
if depth not in self.arch_settings:
raise KeyError('invalid depth {} for resnet'.format(depth))
self.depth = depth
self.num_stages = num_stages
assert num_stages >= 1 and num_stages <= 4
block, stage_blocks = self.arch_settings[depth]
self.stage_blocks = stage_blocks[:num_stages]
assert len(strides) == len(dilations) == num_stages
assert max(out_indices) < num_stages
self.strides = strides
self.dilations = dilations
assert len(strides) == len(dilations) == num_stages
self.out_indices = out_indices
assert max(out_indices) < num_stages
self.style = style
self.with_cp = with_cp
self.frozen_stages = frozen_stages
assert isinstance(normalize, dict) and 'type' in normalize
assert normalize['type'] in ['BN', 'GN']
......@@ -256,17 +270,15 @@ class ResNet(nn.Module):
else:
assert (set(['type', 'bn_eval', 'bn_frozen'])
== set(normalize))
self.out_indices = out_indices
self.style = style
self.with_cp = with_cp
self.frozen_stages = frozen_stages
if normalize['type'] == 'BN':
self.bn_eval = normalize['bn_eval']
self.bn_frozen = normalize['bn_frozen']
self.normalize = normalize
self.block, stage_blocks = self.arch_settings[depth]
self.stage_blocks = stage_blocks[:num_stages]
self.inplanes = 64
self.conv1 = nn.Conv2d(
3, 64, kernel_size=7, stride=2, padding=3, bias=False)
stem_norm = build_norm_layer(normalize, 64)
......@@ -276,12 +288,12 @@ class ResNet(nn.Module):
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.res_layers = []
for i, num_blocks in enumerate(stage_blocks):
for i, num_blocks in enumerate(self.stage_blocks):
stride = strides[i]
dilation = dilations[i]
planes = 64 * 2**i
res_layer = make_res_layer(
block,
self.block,
self.inplanes,
planes,
num_blocks,
......@@ -290,12 +302,13 @@ class ResNet(nn.Module):
style=self.style,
with_cp=with_cp,
normalize=normalize)
self.inplanes = planes * block.expansion
self.inplanes = planes * self.block.expansion
layer_name = 'layer{}'.format(i + 1)
self.add_module(layer_name, res_layer)
self.res_layers.append(layer_name)
self.feat_dim = block.expansion * 64 * 2**(len(stage_blocks) - 1)
self.feat_dim = self.block.expansion * 64 * 2**(
len(self.stage_blocks) - 1)
def init_weights(self, pretrained=None):
if isinstance(pretrained, str):
......
import math
import torch.nn as nn
from .resnet import ResNet
from .resnet import Bottleneck as _Bottleneck
class Bottleneck(_Bottleneck):
def __init__(self, *args, groups=1, base_width=4, **kwargs):
"""Bottleneck block for ResNeXt.
If style is "pytorch", the stride-two layer is the 3x3 conv layer,
if it is "caffe", the stride-two layer is the first 1x1 conv layer.
"""
super(Bottleneck, self).__init__(*args, **kwargs)
if groups == 1:
width = self.planes
else:
width = math.floor(self.planes * (base_width / 64)) * groups
self.conv1 = nn.Conv2d(
self.inplanes,
width,
kernel_size=1,
stride=self.conv1_stride,
bias=False)
self.bn1 = nn.BatchNorm2d(width)
self.conv2 = nn.Conv2d(
width,
width,
kernel_size=3,
stride=self.conv2_stride,
padding=self.dilation,
dilation=self.dilation,
groups=groups,
bias=False)
self.bn2 = nn.BatchNorm2d(width)
self.conv3 = nn.Conv2d(
width, self.planes * self.expansion, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(self.planes * self.expansion)
def make_res_layer(block,
inplanes,
planes,
blocks,
stride=1,
dilation=1,
groups=1,
base_width=4,
style='pytorch',
with_cp=False):
downsample = None
if stride != 1 or inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(
inplanes,
planes * block.expansion,
kernel_size=1,
stride=stride,
bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(
block(
inplanes,
planes,
stride=stride,
dilation=dilation,
downsample=downsample,
groups=groups,
base_width=base_width,
style=style,
with_cp=with_cp))
inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(
block(
inplanes,
planes,
stride=1,
dilation=dilation,
groups=groups,
base_width=base_width,
style=style,
with_cp=with_cp))
return nn.Sequential(*layers)
class ResNeXt(ResNet):
"""ResNeXt backbone.
Args:
depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
num_stages (int): Resnet stages, normally 4.
groups (int): Group of resnext.
base_width (int): Base width of resnext.
strides (Sequence[int]): Strides of the first block of each stage.
dilations (Sequence[int]): Dilation of each stage.
out_indices (Sequence[int]): Output from which stages.
style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
layer is the 3x3 conv layer, otherwise the stride-two layer is
the first 1x1 conv layer.
frozen_stages (int): Stages to be frozen (all param fixed). -1 means
not freezing any parameters.
bn_eval (bool): Whether to set BN layers to eval mode, namely, freeze
running stats (mean and var).
bn_frozen (bool): Whether to freeze weight and bias of BN layers.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed.
"""
arch_settings = {
50: (Bottleneck, (3, 4, 6, 3)),
101: (Bottleneck, (3, 4, 23, 3)),
152: (Bottleneck, (3, 8, 36, 3))
}
def __init__(self, groups=1, base_width=4, **kwargs):
super(ResNeXt, self).__init__(**kwargs)
self.groups = groups
self.base_width = base_width
self.inplanes = 64
self.res_layers = []
for i, num_blocks in enumerate(self.stage_blocks):
stride = self.strides[i]
dilation = self.dilations[i]
planes = 64 * 2**i
res_layer = make_res_layer(
self.block,
self.inplanes,
planes,
num_blocks,
stride=stride,
dilation=dilation,
groups=self.groups,
base_width=self.base_width,
style=self.style,
with_cp=self.with_cp)
self.inplanes = planes * self.block.expansion
layer_name = 'layer{}'.format(i + 1)
self.add_module(layer_name, res_layer)
self.res_layers.append(layer_name)
import logging
import torch
import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import (VGG, xavier_init, constant_init, kaiming_init,
normal_init)
from mmcv.runner import load_checkpoint
class SSDVGG(VGG):
extra_setting = {
300: (256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256),
512: (256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256, 128),
}
def __init__(self,
input_size,
depth,
with_last_pool=False,
ceil_mode=True,
out_indices=(3, 4),
out_feature_indices=(22, 34),
l2_norm_scale=20.):
super(SSDVGG, self).__init__(
depth,
with_last_pool=with_last_pool,
ceil_mode=ceil_mode,
out_indices=out_indices)
assert input_size in (300, 512)
self.input_size = input_size
self.features.add_module(
str(len(self.features)),
nn.MaxPool2d(kernel_size=3, stride=1, padding=1))
self.features.add_module(
str(len(self.features)),
nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6))
self.features.add_module(
str(len(self.features)), nn.ReLU(inplace=True))
self.features.add_module(
str(len(self.features)), nn.Conv2d(1024, 1024, kernel_size=1))
self.features.add_module(
str(len(self.features)), nn.ReLU(inplace=True))
self.out_feature_indices = out_feature_indices
self.inplanes = 1024
self.extra = self._make_extra_layers(self.extra_setting[input_size])
self.l2_norm = L2Norm(
self.features[out_feature_indices[0] - 1].out_channels,
l2_norm_scale)
def init_weights(self, pretrained=None):
if isinstance(pretrained, str):
logger = logging.getLogger()
load_checkpoint(self, pretrained, strict=False, logger=logger)
elif pretrained is None:
for m in self.features.modules():
if isinstance(m, nn.Conv2d):
kaiming_init(m)
elif isinstance(m, nn.BatchNorm2d):
constant_init(m, 1)
elif isinstance(m, nn.Linear):
normal_init(m, std=0.01)
else:
raise TypeError('pretrained must be a str or None')
for m in self.extra.modules():
if isinstance(m, nn.Conv2d):
xavier_init(m, distribution='uniform')
constant_init(self.l2_norm, self.l2_norm.scale)
def forward(self, x):
outs = []
for i, layer in enumerate(self.features):
x = layer(x)
if i in self.out_feature_indices:
outs.append(x)
for i, layer in enumerate(self.extra):
x = F.relu(layer(x), inplace=True)
if i % 2 == 1:
outs.append(x)
outs[0] = self.l2_norm(outs[0])
if len(outs) == 1:
return outs[0]
else:
return tuple(outs)
def _make_extra_layers(self, outplanes):
layers = []
kernel_sizes = (1, 3)
num_layers = 0
outplane = None
for i in range(len(outplanes)):
if self.inplanes == 'S':
self.inplanes = outplane
continue
k = kernel_sizes[num_layers % 2]
if outplanes[i] == 'S':
outplane = outplanes[i + 1]
conv = nn.Conv2d(
self.inplanes, outplane, k, stride=2, padding=1)
else:
outplane = outplanes[i]
conv = nn.Conv2d(
self.inplanes, outplane, k, stride=1, padding=0)
layers.append(conv)
self.inplanes = outplanes[i]
num_layers += 1
if self.input_size == 512:
layers.append(nn.Conv2d(self.inplanes, 256, 4, padding=1))
return nn.Sequential(*layers)
class L2Norm(nn.Module):
def __init__(self, n_dims, scale=20., eps=1e-10):
super(L2Norm, self).__init__()
self.n_dims = n_dims
self.weight = nn.Parameter(torch.Tensor(self.n_dims))
self.eps = eps
self.scale = scale
def forward(self, x):
norm = x.pow(2).sum(1, keepdim=True).sqrt() + self.eps
return self.weight[None, :, None, None].expand_as(x) * x / norm
......@@ -79,11 +79,11 @@ class BBoxHead(nn.Module):
return cls_reg_targets
def loss(self, cls_score, bbox_pred, labels, label_weights, bbox_targets,
bbox_weights):
bbox_weights, reduce=True):
losses = dict()
if cls_score is not None:
losses['loss_cls'] = weighted_cross_entropy(
cls_score, labels, label_weights)
cls_score, labels, label_weights, reduce=reduce)
losses['acc'] = accuracy(cls_score, labels)
if bbox_pred is not None:
losses['loss_reg'] = weighted_smoothl1(
......
......@@ -99,11 +99,12 @@ class BaseDetector(nn.Module):
if isinstance(dataset, str):
class_names = get_classes(dataset)
elif isinstance(dataset, list):
elif isinstance(dataset, (list, tuple)) or dataset is None:
class_names = dataset
else:
raise TypeError('dataset must be a valid dataset name or a list'
' of class names, not {}'.format(type(dataset)))
raise TypeError(
'dataset must be a valid dataset name or a sequence'
' of class names, not {}'.format(type(dataset)))
for img, img_meta in zip(imgs, img_metas):
h, w, _ = img_meta['img_shape']
......
......@@ -4,7 +4,7 @@ import torch.nn as nn
from .base import BaseDetector
from .test_mixins import RPNTestMixin, BBoxTestMixin, MaskTestMixin
from .. import builder
from mmdet.core import (assign_and_sample, bbox2roi, bbox2result, multi_apply)
from mmdet.core import bbox2roi, bbox2result, build_assigner, build_sampler
class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
......@@ -102,13 +102,22 @@ class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
# assign gts and sample proposals
if self.with_bbox or self.with_mask:
assign_results, sampling_results = multi_apply(
assign_and_sample,
proposal_list,
gt_bboxes,
gt_bboxes_ignore,
gt_labels,
cfg=self.train_cfg.rcnn)
bbox_assigner = build_assigner(self.train_cfg.rcnn.assigner)
bbox_sampler = build_sampler(
self.train_cfg.rcnn.sampler, context=self)
num_imgs = img.size(0)
sampling_results = []
for i in range(num_imgs):
assign_result = bbox_assigner.assign(
proposal_list[i], gt_bboxes[i], gt_bboxes_ignore[i],
gt_labels[i])
sampling_result = bbox_sampler.sample(
assign_result,
proposal_list[i],
gt_bboxes[i],
gt_labels[i],
feats=[lvl_feat[i][None] for lvl_feat in x])
sampling_results.append(sampling_result)
# bbox head forward and loss
if self.with_bbox:
......
......@@ -97,7 +97,11 @@ class FCNMaskHead(nn.Module):
def loss(self, mask_pred, mask_targets, labels):
loss = dict()
loss_mask = mask_cross_entropy(mask_pred, mask_targets, labels)
if self.class_agnostic:
loss_mask = mask_cross_entropy(mask_pred, mask_targets,
torch.zeros_like(labels))
else:
loss_mask = mask_cross_entropy(mask_pred, mask_targets, labels)
loss['loss_mask'] = loss_mask
return loss
......
from .retina_head import RetinaHead
from .ssd_head import SSDHead
__all__ = ['RetinaHead']
__all__ = ['RetinaHead', 'SSDHead']
from __future__ import division
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import xavier_init
from mmdet.core import (AnchorGenerator, anchor_target, multi_apply,
delta2bbox, weighted_smoothl1, multiclass_nms)
class SSDHead(nn.Module):
def __init__(self,
input_size=300,
in_channels=(512, 1024, 512, 256, 256, 256),
num_classes=81,
anchor_strides=(8, 16, 32, 64, 100, 300),
basesize_ratio_range=(0.1, 0.9),
anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]),
target_means=(.0, .0, .0, .0),
target_stds=(1.0, 1.0, 1.0, 1.0)):
super(SSDHead, self).__init__()
# construct head
num_anchors = [len(ratios) * 2 + 2 for ratios in anchor_ratios]
self.in_channels = in_channels
self.num_classes = num_classes
self.cls_out_channels = num_classes
reg_convs = []
cls_convs = []
for i in range(len(in_channels)):
reg_convs.append(
nn.Conv2d(
in_channels[i],
num_anchors[i] * 4,
kernel_size=3,
padding=1))
cls_convs.append(
nn.Conv2d(
in_channels[i],
num_anchors[i] * num_classes,
kernel_size=3,
padding=1))
self.reg_convs = nn.ModuleList(reg_convs)
self.cls_convs = nn.ModuleList(cls_convs)
min_ratio, max_ratio = basesize_ratio_range
min_ratio = int(min_ratio * 100)
max_ratio = int(max_ratio * 100)
step = int(np.floor(max_ratio - min_ratio) / (len(in_channels) - 2))
min_sizes = []
max_sizes = []
for r in range(int(min_ratio), int(max_ratio) + 1, step):
min_sizes.append(int(input_size * r / 100))
max_sizes.append(int(input_size * (r + step) / 100))
if input_size == 300:
if basesize_ratio_range[0] == 0.15: # SSD300 COCO
min_sizes.insert(0, int(input_size * 7 / 100))
max_sizes.insert(0, int(input_size * 15 / 100))
elif basesize_ratio_range[0] == 0.2: # SSD300 VOC
min_sizes.insert(0, int(input_size * 10 / 100))
max_sizes.insert(0, int(input_size * 20 / 100))
elif input_size == 512:
if basesize_ratio_range[0] == 0.1: # SSD512 COCO
min_sizes.insert(0, int(input_size * 4 / 100))
max_sizes.insert(0, int(input_size * 10 / 100))
elif basesize_ratio_range[0] == 0.15: # SSD512 VOC
min_sizes.insert(0, int(input_size * 7 / 100))
max_sizes.insert(0, int(input_size * 15 / 100))
self.anchor_generators = []
self.anchor_strides = anchor_strides
for k in range(len(anchor_strides)):
base_size = min_sizes[k]
stride = anchor_strides[k]
ctr = ((stride - 1) / 2., (stride - 1) / 2.)
scales = [1., np.sqrt(max_sizes[k] / min_sizes[k])]
ratios = [1.]
for r in anchor_ratios[k]:
ratios += [1 / r, r] # 4 or 6 ratio
anchor_generator = AnchorGenerator(
base_size, scales, ratios, scale_major=False, ctr=ctr)
indices = list(range(len(ratios)))
indices.insert(1, len(indices))
anchor_generator.base_anchors = torch.index_select(
anchor_generator.base_anchors, 0, torch.LongTensor(indices))
self.anchor_generators.append(anchor_generator)
self.target_means = target_means
self.target_stds = target_stds
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
xavier_init(m, distribution='uniform', bias=0)
def forward(self, feats):
cls_scores = []
bbox_preds = []
for feat, reg_conv, cls_conv in zip(feats, self.reg_convs,
self.cls_convs):
cls_scores.append(cls_conv(feat))
bbox_preds.append(reg_conv(feat))
return cls_scores, bbox_preds
def get_anchors(self, featmap_sizes, img_metas):
"""Get anchors according to feature map sizes.
Args:
featmap_sizes (list[tuple]): Multi-level feature map sizes.
img_metas (list[dict]): Image meta info.
Returns:
tuple: anchors of each image, valid flags of each image
"""
num_imgs = len(img_metas)
num_levels = len(featmap_sizes)
# since feature map sizes of all images are the same, we only compute
# anchors for one time
multi_level_anchors = []
for i in range(num_levels):
anchors = self.anchor_generators[i].grid_anchors(
featmap_sizes[i], self.anchor_strides[i])
multi_level_anchors.append(anchors)
anchor_list = [multi_level_anchors for _ in range(num_imgs)]
# for each image, we compute valid flags of multi level anchors
valid_flag_list = []
for img_id, img_meta in enumerate(img_metas):
multi_level_flags = []
for i in range(num_levels):
anchor_stride = self.anchor_strides[i]
feat_h, feat_w = featmap_sizes[i]
h, w, _ = img_meta['pad_shape']
valid_feat_h = min(int(np.ceil(h / anchor_stride)), feat_h)
valid_feat_w = min(int(np.ceil(w / anchor_stride)), feat_w)
flags = self.anchor_generators[i].valid_flags(
(feat_h, feat_w), (valid_feat_h, valid_feat_w))
multi_level_flags.append(flags)
valid_flag_list.append(multi_level_flags)
return anchor_list, valid_flag_list
def loss_single(self, cls_score, bbox_pred, labels, label_weights,
bbox_targets, bbox_weights, num_pos_samples, cfg):
loss_cls_all = F.cross_entropy(
cls_score, labels, reduction='none') * label_weights
pos_label_inds = (labels > 0).nonzero().view(-1)
neg_label_inds = (labels == 0).nonzero().view(-1)
num_sample_pos = pos_label_inds.size(0)
num_sample_neg = cfg.neg_pos_ratio * num_sample_pos
if num_sample_neg > neg_label_inds.size(0):
num_sample_neg = neg_label_inds.size(0)
topk_loss_cls_neg, topk_loss_cls_neg_inds = \
loss_cls_all[neg_label_inds].topk(num_sample_neg)
loss_cls_pos = loss_cls_all[pos_label_inds].sum()
loss_cls_neg = topk_loss_cls_neg.sum()
loss_cls = (loss_cls_pos + loss_cls_neg) / num_pos_samples
loss_reg = weighted_smoothl1(
bbox_pred,
bbox_targets,
bbox_weights,
beta=cfg.smoothl1_beta,
avg_factor=num_pos_samples)
return loss_cls[None], loss_reg
def loss(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas,
cfg):
featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
assert len(featmap_sizes) == len(self.anchor_generators)
anchor_list, valid_flag_list = self.get_anchors(
featmap_sizes, img_metas)
cls_reg_targets = anchor_target(
anchor_list,
valid_flag_list,
gt_bboxes,
img_metas,
self.target_means,
self.target_stds,
cfg,
gt_labels_list=gt_labels,
cls_out_channels=self.cls_out_channels,
sampling=False,
unmap_outputs=False)
if cls_reg_targets is None:
return None
(labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,
num_total_pos, num_total_neg) = cls_reg_targets
num_images = len(img_metas)
all_cls_scores = torch.cat([
s.permute(0, 2, 3, 1).contiguous().view(
num_images, -1, self.cls_out_channels) for s in cls_scores
], 1)
all_labels = torch.cat(labels_list, -1).view(num_images, -1)
all_label_weights = torch.cat(label_weights_list, -1).view(
num_images, -1)
all_bbox_preds = torch.cat([
b.permute(0, 2, 3, 1).contiguous().view(num_images, -1, 4)
for b in bbox_preds
], -2)
all_bbox_targets = torch.cat(bbox_targets_list, -2).view(
num_images, -1, 4)
all_bbox_weights = torch.cat(bbox_weights_list, -2).view(
num_images, -1, 4)
losses_cls, losses_reg = multi_apply(
self.loss_single,
all_cls_scores,
all_bbox_preds,
all_labels,
all_label_weights,
all_bbox_targets,
all_bbox_weights,
num_pos_samples=num_total_pos,
cfg=cfg)
return dict(loss_cls=losses_cls, loss_reg=losses_reg)
def get_det_bboxes(self,
cls_scores,
bbox_preds,
img_metas,
cfg,
rescale=False):
assert len(cls_scores) == len(bbox_preds)
num_levels = len(cls_scores)
mlvl_anchors = [
self.anchor_generators[i].grid_anchors(cls_scores[i].size()[-2:],
self.anchor_strides[i])
for i in range(num_levels)
]
result_list = []
for img_id in range(len(img_metas)):
cls_score_list = [
cls_scores[i][img_id].detach() for i in range(num_levels)
]
bbox_pred_list = [
bbox_preds[i][img_id].detach() for i in range(num_levels)
]
img_shape = img_metas[img_id]['img_shape']
scale_factor = img_metas[img_id]['scale_factor']
results = self._get_det_bboxes_single(
cls_score_list, bbox_pred_list, mlvl_anchors, img_shape,
scale_factor, cfg, rescale)
result_list.append(results)
return result_list
def _get_det_bboxes_single(self,
cls_scores,
bbox_preds,
mlvl_anchors,
img_shape,
scale_factor,
cfg,
rescale=False):
assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
mlvl_proposals = []
mlvl_scores = []
for cls_score, bbox_pred, anchors in zip(cls_scores, bbox_preds,
mlvl_anchors):
assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
cls_score = cls_score.permute(1, 2, 0).contiguous().view(
-1, self.cls_out_channels)
scores = cls_score.softmax(-1)
bbox_pred = bbox_pred.permute(1, 2, 0).contiguous().view(-1, 4)
proposals = delta2bbox(anchors, bbox_pred, self.target_means,
self.target_stds, img_shape)
mlvl_proposals.append(proposals)
mlvl_scores.append(scores)
mlvl_proposals = torch.cat(mlvl_proposals)
if rescale:
mlvl_proposals /= mlvl_proposals.new_tensor(scale_factor)
mlvl_scores = torch.cat(mlvl_scores)
det_bboxes, det_labels = multiclass_nms(mlvl_proposals, mlvl_scores,
cfg.score_thr, cfg.nms,
cfg.max_per_img)
return det_bboxes, det_labels
......@@ -12,7 +12,7 @@ def readme():
MAJOR = 0
MINOR = 5
PATCH = 4
PATCH = 5
SUFFIX = ''
SHORT_VERSION = '{}.{}.{}{}'.format(MAJOR, MINOR, PATCH, SUFFIX)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment