Commit 108fc9e1 authored by Kai Chen's avatar Kai Chen
Browse files

set up the codebase skeleton (WIP)

parent 6985ef31
import torch
from mmcv.ops import nms
import numpy as np
from ..bbox_ops import bbox_mapping_back
def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
"""Merge augmented proposals (multiscale, flip, etc.)
Args:
aug_proposals (list[Tensor]): proposals from different testing
schemes, shape (n, 5). Note that they are not rescaled to the
original image size.
img_metas (list[dict]): image info including "shape_scale" and "flip".
rpn_test_cfg (dict): rpn test config.
Returns:
Tensor: shape (n, 4), proposals corresponding to original image scale.
"""
recovered_proposals = []
for proposals, img_info in zip(aug_proposals, img_metas):
shape_scale = img_info['shape_scale'][0]
flip = img_info['flip'][0]
_proposals = proposals.clone()
_proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], shape_scale,
flip)
recovered_proposals.append(_proposals)
aug_proposals = torch.cat(recovered_proposals, dim=0)
nms_keep = nms(aug_proposals, rpn_test_cfg.nms_thr,
aug_proposals.get_device())
merged_proposals = aug_proposals[nms_keep, :]
scores = merged_proposals[:, 4]
_, order = scores.sort(0, descending=True)
num = min(rpn_test_cfg.max_num, merged_proposals.shape[0])
order = order[:num]
merged_proposals = merged_proposals[order, :]
return merged_proposals
def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg):
"""Merge augmented detection bboxes and scores.
Args:
aug_bboxes (list[Tensor]): shape (n, 4*#class)
aug_scores (list[Tensor] or None): shape (n, #class)
img_shapes (list[Tensor]): shape (3, ).
rcnn_test_cfg (dict): rcnn test config.
Returns:
tuple: (bboxes, scores)
"""
recovered_bboxes = []
for bboxes, img_info in zip(aug_bboxes, img_metas):
shape_scale = img_info['shape_scale'][0]
flip = img_info['flip'][0]
bboxes = bbox_mapping_back(bboxes, shape_scale, flip)
recovered_bboxes.append(bboxes)
bboxes = torch.stack(recovered_bboxes).mean(dim=0)
if aug_scores is None:
return bboxes
else:
scores = torch.stack(aug_scores).mean(dim=0)
return bboxes, scores
def merge_aug_scores(aug_scores):
"""Merge augmented bbox scores."""
if isinstance(aug_scores[0], torch.Tensor):
return torch.mean(torch.stack(aug_scores), dim=0)
else:
return np.mean(aug_scores, axis=0)
def merge_aug_masks(aug_masks, bboxes, img_metas, rcnn_test_cfg, weights=None):
"""Merge augmented mask prediction.
Args:
aug_masks (list[ndarray]): shape (n, #class, h, w)
img_shapes (list[ndarray]): shape (3, ).
rcnn_test_cfg (dict): rcnn test config.
Returns:
tuple: (bboxes, scores)
"""
recovered_masks = [
mask if not img_info['flip'][0] else mask[..., ::-1]
for mask, img_info in zip(aug_masks, img_metas)
]
if weights is None:
merged_masks = np.mean(recovered_masks, axis=0)
else:
merged_masks = np.average(
np.array(recovered_masks), axis=0, weights=np.array(weights))
return merged_masks
from .anchor_target import anchor_target
from .bbox_target import bbox_target
from .mask_target import mask_target
__all__ = ['anchor_target', 'bbox_target', 'mask_target']
def anchor_target():
pass
from .coco import CocoDataset
from .collate import *
from .sampler import *
from .transforms import *
import os.path as osp
import mmcv
import numpy as np
from pycocotools.coco import COCO
from torch.utils.data import Dataset
from .transforms import (ImageTransform, BboxTransform, PolyMaskTransform,
Numpy2Tensor)
from .utils import show_ann, random_scale
from .utils import DataContainer as DC
def parse_ann_info(ann_info, cat2label, with_mask=True):
"""Parse bbox and mask annotation.
Args:
ann_info (list[dict]): Annotation info of an image.
cat2label (dict): The mapping from category ids to labels.
with_mask (bool): Whether to parse mask annotations.
Returns:
tuple: gt_bboxes, gt_labels and gt_mask_info
"""
gt_bboxes = []
gt_labels = []
gt_bboxes_ignore = []
# each mask consists of one or several polys, each poly is a list of float.
if with_mask:
gt_mask_polys = []
gt_poly_lens = []
for i, ann in enumerate(ann_info):
if ann.get('ignore', False):
continue
x1, y1, w, h = ann['bbox']
if ann['area'] <= 0 or w < 1 or h < 1:
continue
bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
if ann['iscrowd']:
gt_bboxes_ignore.append(bbox)
else:
gt_bboxes.append(bbox)
gt_labels.append(cat2label[ann['category_id']])
if with_mask:
# Note polys are not resized
mask_polys = [
p for p in ann['segmentation'] if len(p) >= 6
] # valid polygons have >= 3 points (6 coordinates)
poly_lens = [len(p) for p in mask_polys]
gt_mask_polys.append(mask_polys)
gt_poly_lens.extend(poly_lens)
if gt_bboxes:
gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
gt_labels = np.array(gt_labels, dtype=np.int64)
else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
if gt_bboxes_ignore:
gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
else:
gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
ann = dict(
bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)
if with_mask:
ann['mask_polys'] = gt_mask_polys
ann['poly_lens'] = gt_poly_lens
return ann
class CocoDataset(Dataset):
def __init__(self,
ann_file,
img_prefix,
img_scale,
img_norm_cfg,
size_divisor=None,
proposal_file=None,
num_max_proposals=1000,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True,
test_mode=False,
debug=False):
# path of the data file
self.coco = COCO(ann_file)
# filter images with no annotation during training
if not test_mode:
self.img_ids, self.img_infos = self._filter_imgs()
else:
self.img_ids = self.coco.getImgIds()
self.img_infos = [
self.coco.loadImgs(idx)[0] for idx in self.img_ids
]
assert len(self.img_ids) == len(self.img_infos)
# get the mapping from original category ids to labels
self.cat_ids = self.coco.getCatIds()
self.cat2label = {
cat_id: i + 1
for i, cat_id in enumerate(self.cat_ids)
}
# prefix of images path
self.img_prefix = img_prefix
# (long_edge, short_edge) or [(long1, short1), (long2, short2), ...]
self.img_scales = img_scale if isinstance(img_scale,
list) else [img_scale]
assert mmcv.is_list_of(self.img_scales, tuple)
# color channel order and normalize configs
self.img_norm_cfg = img_norm_cfg
# proposals
self.proposals = mmcv.load(
proposal_file) if proposal_file is not None else None
self.num_max_proposals = num_max_proposals
# flip ratio
self.flip_ratio = flip_ratio
assert flip_ratio >= 0 and flip_ratio <= 1
# padding border to ensure the image size can be divided by
# size_divisor (used for FPN)
self.size_divisor = size_divisor
# with crowd or not, False when using RetinaNet
self.with_crowd = with_crowd
# with mask or not
self.with_mask = with_mask
# with label is False for RPN
self.with_label = with_label
# in test mode or not
self.test_mode = test_mode
# debug mode or not
self.debug = debug
# set group flag for the sampler
self._set_group_flag()
# transforms
self.img_transform = ImageTransform(
size_divisor=self.size_divisor, **self.img_norm_cfg)
self.bbox_transform = BboxTransform()
self.mask_transform = PolyMaskTransform()
self.numpy2tensor = Numpy2Tensor()
def __len__(self):
return len(self.img_ids)
def _filter_imgs(self, min_size=32):
"""Filter images too small or without ground truths."""
img_ids = list(set([_['image_id'] for _ in self.coco.anns.values()]))
valid_ids = []
img_infos = []
for i in img_ids:
info = self.coco.loadImgs(i)[0]
if min(info['width'], info['height']) >= min_size:
valid_ids.append(i)
img_infos.append(info)
return valid_ids, img_infos
def _load_ann_info(self, idx):
img_id = self.img_ids[idx]
ann_ids = self.coco.getAnnIds(imgIds=img_id)
ann_info = self.coco.loadAnns(ann_ids)
return ann_info
def _set_group_flag(self):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0.
"""
self.flag = np.zeros(len(self.img_ids), dtype=np.uint8)
for i in range(len(self.img_ids)):
img_info = self.img_infos[i]
if img_info['width'] / img_info['height'] > 1:
self.flag[i] = 1
def _rand_another(self, idx):
pool = np.where(self.flag == self.flag[idx])[0]
return np.random.choice(pool)
def __getitem__(self, idx):
if self.test_mode:
return self.prepare_test_img(idx)
while True:
img_info = self.img_infos[idx]
ann_info = self._load_ann_info(idx)
# load image
img = mmcv.imread(osp.join(self.img_prefix, img_info['file_name']))
if self.debug:
show_ann(self.coco, img, ann_info)
# load proposals if necessary
if self.proposals is not None:
proposals = self.proposals[idx][:self.num_max_proposals, :4]
# TODO: Handle empty proposals properly. Currently images with
# no proposals are just ignored, but they can be used for
# training in concept.
if len(proposals) == 0:
idx = self._rand_another(idx)
continue
ann = parse_ann_info(ann_info, self.cat2label, self.with_mask)
gt_bboxes = ann['bboxes']
gt_labels = ann['labels']
gt_bboxes_ignore = ann['bboxes_ignore']
# skip the image if there is no valid gt bbox
if len(gt_bboxes) == 0:
idx = self._rand_another(idx)
continue
# apply transforms
flip = True if np.random.rand() < self.flip_ratio else False
img_scale = random_scale(self.img_scales) # sample a scale
img, img_shape, scale_factor = self.img_transform(
img, img_scale, flip)
if self.proposals is not None:
proposals = self.bbox_transform(proposals, img_shape,
scale_factor, flip)
gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor,
flip)
gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape,
scale_factor, flip)
if self.with_mask:
gt_mask_polys, gt_poly_lens, num_polys_per_mask = \
self.mask_transform(
ann['mask_polys'], ann['poly_lens'],
img_info['height'], img_info['width'], flip)
ori_shape = (img_info['height'], img_info['width'])
img_meta = dict(
ori_shape=DC(ori_shape),
img_shape=DC(img_shape),
scale_factor=DC(scale_factor),
flip=DC(flip))
data = dict(
img=DC(img, stack=True),
img_meta=img_meta,
gt_bboxes=DC(gt_bboxes))
if self.proposals is not None:
data['proposals'] = DC(proposals)
if self.with_label:
data['gt_labels'] = DC(gt_labels)
if self.with_crowd:
data['gt_bboxes_ignore'] = DC(gt_bboxes_ignore)
if self.with_mask:
data['gt_mask_polys'] = DC(gt_mask_polys)
data['gt_poly_lens'] = DC(gt_poly_lens)
data['num_polys_per_mask'] = DC(num_polys_per_mask)
return data
def prepare_test_img(self, idx):
"""Prepare an image for testing (multi-scale and flipping)"""
img_info = self._load_info(idx, with_ann=False)
img_file = osp.join(self.prefix, img_info['file_name'])
proposal = (self.proposals[idx][:, :4]
if self.proposals is not None else None)
def prepare_single(img_file, scale, flip, proposal=None):
img_np, shape_scale_np = self.img_transform(img_file, scale, flip)
img, shape_scale = self.numpy2tensor(img_np, shape_scale_np)
img_meta = dict(shape_scale=shape_scale, flip=flip)
if proposal is not None:
proposal = self.bbox_transform(proposal, shape_scale_np, flip)
proposal = self.numpy2tensor(proposal)
return img, img_meta, proposal
imgs = []
img_metas = []
proposals = []
for scale in self.img_scale:
img, img_meta, proposal = prepare_single(img_file, scale, False,
proposal)
imgs.append(img)
img_metas.append(img_meta)
proposals.append(proposal)
if self.flip_ratio > 0:
img, img_meta, prop = prepare_single(img_file, scale, True,
proposal)
imgs.append(img)
img_metas.append(img_meta)
proposals.append(prop)
if self.proposals is None:
return imgs, img_metas
else:
return imgs, img_metas, proposals
import collections
import torch
import torch.nn.functional as F
from torch.utils.data.dataloader import default_collate
from .utils import DataContainer
# https://github.com/pytorch/pytorch/issues/973
import resource
rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))
__all__ = ['collate']
def collate(batch, samples_per_gpu=1):
if not isinstance(batch, collections.Sequence):
raise TypeError("{} is not supported.".format(batch.dtype))
if isinstance(batch[0], DataContainer):
assert len(batch) % samples_per_gpu == 0
stacked = []
if batch[0].stack:
for i in range(0, len(batch), samples_per_gpu):
assert isinstance(batch[i].data, torch.Tensor)
# TODO: handle tensors other than 3d
assert batch[i].dim() == 3
c, h, w = batch[0].size()
for sample in batch[i:i + samples_per_gpu]:
assert c == sample.size(0)
h = max(h, sample.size(1))
w = max(w, sample.size(2))
padded_samples = [
F.pad(
sample.data,
(0, w - sample.size(2), 0, h - sample.size(1)),
value=sample.padding_value)
for sample in batch[i:i + samples_per_gpu]
]
stacked.append(default_collate(padded_samples))
else:
for i in range(0, len(batch), samples_per_gpu):
stacked.append(
[sample.data for sample in batch[i:i + samples_per_gpu]])
return DataContainer(stacked, batch[0].stack, batch[0].padding_value)
elif isinstance(batch[0], collections.Sequence):
transposed = zip(*batch)
return [collate(samples, samples_per_gpu) for samples in transposed]
elif isinstance(batch[0], collections.Mapping):
return {
key: collate([d[key] for d in batch], samples_per_gpu)
for key in batch[0]
}
else:
return default_collate(batch)
from __future__ import division
import math
import torch
import numpy as np
from torch.distributed import get_world_size, get_rank
from torch.utils.data.sampler import Sampler
__all__ = ['GroupSampler', 'DistributedGroupSampler']
class GroupSampler(Sampler):
def __init__(self, dataset, samples_per_gpu=1):
assert hasattr(dataset, 'flag')
self.dataset = dataset
self.samples_per_gpu = samples_per_gpu
self.flag = dataset.flag.astype(np.int64)
self.group_sizes = np.bincount(self.flag)
self.num_samples = 0
for i, size in enumerate(self.group_sizes):
self.num_samples += int(np.ceil(
size / self.samples_per_gpu)) * self.samples_per_gpu
def __iter__(self):
indices = []
for i, size in enumerate(self.group_sizes):
if size == 0:
continue
indice = np.where(self.flag == i)[0]
assert len(indice) == size
np.random.shuffle(indice)
num_extra = int(np.ceil(size / self.samples_per_gpu)
) * self.samples_per_gpu - len(indice)
indice = np.concatenate([indice, indice[:num_extra]])
indices.append(indice)
indices = np.concatenate(indices)
indices = [
indices[i * self.samples_per_gpu:(i + 1) * self.samples_per_gpu]
for i in np.random.permutation(
range(len(indices) // self.samples_per_gpu))
]
indices = np.concatenate(indices)
indices = torch.from_numpy(indices).long()
assert len(indices) == self.num_samples
return iter(indices)
def __len__(self):
return self.num_samples
class DistributedGroupSampler(Sampler):
"""Sampler that restricts data loading to a subset of the dataset.
It is especially useful in conjunction with
:class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
process can pass a DistributedSampler instance as a DataLoader sampler,
and load a subset of the original dataset that is exclusive to it.
.. note::
Dataset is assumed to be of constant size.
Arguments:
dataset: Dataset used for sampling.
num_replicas (optional): Number of processes participating in
distributed training.
rank (optional): Rank of the current process within num_replicas.
"""
def __init__(self,
dataset,
samples_per_gpu=1,
num_replicas=None,
rank=None):
if num_replicas is None:
num_replicas = get_world_size()
if rank is None:
rank = get_rank()
self.dataset = dataset
self.samples_per_gpu = samples_per_gpu
self.num_replicas = num_replicas
self.rank = rank
self.epoch = 0
assert hasattr(self.dataset, 'flag')
self.flag = self.dataset.flag
self.group_sizes = np.bincount(self.flag)
self.num_samples = 0
for i, j in enumerate(self.group_sizes):
self.num_samples += int(
math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu /
self.num_replicas)) * self.samples_per_gpu
self.total_size = self.num_samples * self.num_replicas
def __iter__(self):
# deterministically shuffle based on epoch
g = torch.Generator()
g.manual_seed(self.epoch)
indices = []
for i, size in enumerate(self.group_sizes):
if size > 0:
indice = np.where(self.flag == i)[0]
assert len(indice) == size
indice = indice[list(torch.randperm(int(size),
generator=g))].tolist()
extra = int(
math.ceil(
size * 1.0 / self.samples_per_gpu / self.num_replicas)
) * self.samples_per_gpu * self.num_replicas - len(indice)
indice += indice[:extra]
indices += indice
assert len(indices) == self.total_size
indices = [
indices[j] for i in list(
torch.randperm(
len(indices) // self.samples_per_gpu, generator=g))
for j in range(i * self.samples_per_gpu, (i + 1) *
self.samples_per_gpu)
]
# subsample
offset = self.num_samples * self.rank
indices = indices[offset:offset + self.num_samples]
assert len(indices) == self.num_samples
return iter(indices)
def __len__(self):
return self.num_samples
def set_epoch(self, epoch):
self.epoch = epoch
import mmcv
# import cvbase as cvb
import numpy as np
import torch
from mmdet.core import segms
__all__ = [
'ImageTransform', 'BboxTransform', 'PolyMaskTransform', 'Numpy2Tensor'
]
class ImageTransform(object):
"""Preprocess an image
1. rescale the image to expected size
2. normalize the image
3. flip the image (if needed)
4. pad the image (if needed)
5. transpose to (c, h, w)
"""
def __init__(self,
mean=(0, 0, 0),
std=(1, 1, 1),
to_rgb=True,
size_divisor=None):
self.mean = np.array(mean, dtype=np.float32)
self.std = np.array(std, dtype=np.float32)
self.to_rgb = to_rgb
self.size_divisor = size_divisor
def __call__(self, img, scale, flip=False):
img, scale_factor = mmcv.imrescale(img, scale, True)
img_shape = img.shape
img = mmcv.imnorm(img, self.mean, self.std, self.to_rgb)
if flip:
img = mmcv.imflip(img)
if self.size_divisor is not None:
img = mmcv.impad_to_multiple(img, self.size_divisor)
img = img.transpose(2, 0, 1)
return img, img_shape, scale_factor
# img, scale = cvb.resize_keep_ar(img_or_path, max_long_edge,
# max_short_edge, True)
# shape_scale = np.array(img.shape + (scale, ), dtype=np.float32)
# if flip:
# img = img[:, ::-1, :].copy()
# if self.color_order == 'RGB':
# img = cvb.bgr2rgb(img)
# img = img.astype(np.float32)
# img -= self.color_mean
# img /= self.color_std
# if self.size_divisor is None:
# padded_img = img
# else:
# pad_h = int(np.ceil(
# img.shape[0] / self.size_divisor)) * self.size_divisor
# pad_w = int(np.ceil(
# img.shape[1] / self.size_divisor)) * self.size_divisor
# padded_img = cvb.pad_img(img, (pad_h, pad_w), pad_val=0)
# padded_img = padded_img.transpose(2, 0, 1)
# return padded_img, shape_scale
class ImageCrop(object):
"""crop image patches and resize patches into fixed size
1. (read and) flip image (if needed)
2. crop image patches according to given bboxes
3. resize patches into fixed size (default 224x224)
4. normalize the image (if needed)
5. transpose to (c, h, w) (if needed)
"""
def __init__(self,
normalize=True,
transpose=True,
color_order='RGB',
color_mean=(0, 0, 0),
color_std=(1, 1, 1)):
self.normalize = normalize
self.transpose = transpose
assert color_order in ['RGB', 'BGR']
self.color_order = color_order
self.color_mean = np.array(color_mean, dtype=np.float32)
self.color_std = np.array(color_std, dtype=np.float32)
def __call__(self,
img_or_path,
bboxes,
crop_size,
scale_ratio=1.0,
flip=False):
img = cvb.read_img(img_or_path)
if flip:
img = img[:, ::-1, :].copy()
crop_imgs = cvb.crop_img(
img,
bboxes[:, :4],
scale_ratio=scale_ratio,
pad_fill=self.color_mean)
processed_crop_imgs_list = []
for i in range(len(crop_imgs)):
crop_img = crop_imgs[i]
crop_img = cvb.resize(crop_img, crop_size)
crop_img = crop_img.astype(np.float32)
crop_img -= self.color_mean
crop_img /= self.color_std
processed_crop_imgs_list.append(crop_img)
processed_crop_imgs = np.stack(processed_crop_imgs_list, axis=0)
processed_crop_imgs = processed_crop_imgs.transpose(0, 3, 1, 2)
return processed_crop_imgs
class BboxTransform(object):
"""Preprocess gt bboxes
1. rescale bboxes according to image size
2. flip bboxes (if needed)
3. pad the first dimension to `max_num_gts`
"""
def __init__(self, max_num_gts=None):
self.max_num_gts = max_num_gts
def __call__(self, bboxes, img_shape, scale_factor, flip=False):
gt_bboxes = bboxes * scale_factor
if flip:
gt_bboxes = mmcv.bbox_flip(gt_bboxes, img_shape)
if self.max_num_gts is None:
return gt_bboxes
else:
num_gts = gt_bboxes.shape[0]
padded_bboxes = np.zeros((self.max_num_gts, 4), dtype=np.float32)
padded_bboxes[:num_gts, :] = gt_bboxes
return padded_bboxes
class PolyMaskTransform(object):
def __init__(self):
pass
def __call__(self, gt_mask_polys, gt_poly_lens, img_h, img_w, flip=False):
"""
Args:
gt_mask_polys(list): a list of masks, each mask is a list of polys,
each poly is a list of numbers
gt_poly_lens(list): a list of int, indicating the size of each poly
"""
if flip:
gt_mask_polys = segms.flip_segms(gt_mask_polys, img_h, img_w)
num_polys_per_mask = np.array(
[len(mask_polys) for mask_polys in gt_mask_polys], dtype=np.int64)
gt_poly_lens = np.array(gt_poly_lens, dtype=np.int64)
gt_mask_polys = [
np.concatenate(mask_polys).astype(np.float32)
for mask_polys in gt_mask_polys
]
gt_mask_polys = np.concatenate(gt_mask_polys)
return gt_mask_polys, gt_poly_lens, num_polys_per_mask
class MaskTransform(object):
"""Preprocess masks
1. resize masks to expected size and stack to a single array
2. flip the masks (if needed)
3. pad the masks (if needed)
"""
def __init__(self, max_num_gts, pad_size=None):
self.max_num_gts = max_num_gts
self.pad_size = pad_size
def __call__(self, masks, img_size, flip=False):
max_long_edge = max(img_size)
max_short_edge = min(img_size)
masks = [
cvb.resize_keep_ar(
mask,
max_long_edge,
max_short_edge,
interpolation=cvb.INTER_NEAREST) for mask in masks
]
masks = np.stack(masks, axis=0)
if flip:
masks = masks[:, ::-1, :]
if self.pad_size is None:
pad_h = masks.shape[1]
pad_w = masks.shape[2]
else:
pad_size = self.pad_size if self.pad_size > 0 else max_long_edge
pad_h = pad_w = pad_size
padded_masks = np.zeros(
(self.max_num_gts, pad_h, pad_w), dtype=masks.dtype)
padded_masks[:masks.shape[0], :masks.shape[1], :masks.shape[2]] = masks
return padded_masks
class Numpy2Tensor(object):
def __init__(self):
pass
def __call__(self, *args):
if len(args) == 1:
return torch.from_numpy(args[0])
else:
return tuple([torch.from_numpy(array) for array in args])
from .data_container import DataContainer
from .misc import *
import functools
from collections import Sequence
import mmcv
import numpy as np
import torch
def to_tensor(data):
"""Convert objects of various python types to :obj:`torch.Tensor`.
Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
:class:`Sequence`, :class:`int` and :class:`float`.
"""
if isinstance(data, np.ndarray):
return torch.from_numpy(data)
elif isinstance(data, torch.Tensor):
return data
elif isinstance(data, Sequence) and not mmcv.is_str(data):
return torch.tensor(data)
elif isinstance(data, int):
return torch.LongTensor([data])
elif isinstance(data, float):
return torch.FloatTensor([data])
else:
raise TypeError('type {} cannot be converted to tensor.'.format(
type(data)))
def assert_tensor_type(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
if not isinstance(args[0].data, torch.Tensor):
raise AttributeError('{} has no attribute {} for type {}'.format(
args[0].__class__.__name__, func.__name__, args[0].datatype))
return func(*args, **kwargs)
return wrapper
class DataContainer(object):
def __init__(self, data, stack=False, padding_value=0):
if isinstance(data, list):
self._data = data
else:
self._data = to_tensor(data)
self._stack = stack
self._padding_value = padding_value
def __repr__(self):
return '{}({})'.format(self.__class__.__name__, repr(self.data))
@property
def data(self):
return self._data
@property
def datatype(self):
if isinstance(self.data, torch.Tensor):
return self.data.type()
else:
return type(self.data)
@property
def stack(self):
return self._stack
@property
def padding_value(self):
return self._padding_value
@assert_tensor_type
def size(self, *args, **kwargs):
return self.data.size(*args, **kwargs)
@assert_tensor_type
def dim(self):
return self.data.dim()
import mmcv
import matplotlib.pyplot as plt
import numpy as np
import pycocotools.mask as maskUtils
def random_scale(img_scales, mode='range'):
"""Randomly select a scale from a list of scales or scale ranges.
Args:
img_scales (list[tuple]): Image scale or scale range.
mode (str): "range" or "value".
Returns:
tuple: Sampled image scale.
"""
num_scales = len(img_scales)
if num_scales == 1: # fixed scale is specified
img_scale = img_scales[0]
elif num_scales == 2: # randomly sample a scale
if mode == 'range':
img_scale_long = [max(s) for s in img_scales]
img_scale_short = [min(s) for s in img_scales]
long_edge = np.random.randint(
min(img_scale_long),
max(img_scale_long) + 1)
short_edge = np.random.randint(
min(img_scale_short),
max(img_scale_short) + 1)
img_scale = (long_edge, short_edge)
elif mode == 'value':
img_scale = img_scales[np.random.randint(num_scales)]
else:
if mode != 'value':
raise ValueError(
'Only "value" mode supports more than 2 image scales')
img_scale = img_scales[np.random.randint(num_scales)]
return img_scale
def show_ann(coco, img, ann_info):
plt.imshow(mmcv.bgr2rgb(img))
plt.axis('off')
coco.showAnns(ann_info)
plt.show()
def draw_bbox_and_segm(img, results, dataset, score_thr=0.5):
bbox_results, segm_results = results
hi_bboxes = []
for cls_bboxes, cls_segms in zip(bbox_results, segm_results):
if len(cls_bboxes) == 0:
hi_bboxes.append(cls_bboxes)
continue
inds = np.where(cls_bboxes[:, -1] > score_thr)[0]
hi_bboxes.append(cls_bboxes[inds, :])
color_mask = np.random.random((1, 3))
for i in inds:
mask = maskUtils.decode(cls_segms[i]).astype(np.bool)
img[mask] = img[mask] * 0.5 + color_mask * 0.5
mmcv.draw_bboxes_with_label(np.ascontiguousarray(img), hi_bboxes, dataset)
from .resnet import resnet
import math
import torch.nn as nn
import torch.utils.checkpoint as cp
from torchpack import load_checkpoint
def conv3x3(in_planes, out_planes, stride=1, dilation=1):
"3x3 convolution with padding"
return nn.Conv2d(
in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=dilation,
dilation=dilation,
bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self,
inplanes,
planes,
stride=1,
dilation=1,
downsample=None,
style='fb'):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride, dilation)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
self.dilation = dilation
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self,
inplanes,
planes,
stride=1,
dilation=1,
downsample=None,
style='fb',
with_cp=False):
"""Bottleneck block
if style is "fb", the stride-two layer is the 3x3 conv layer,
if style is "msra", the stride-two layer is the first 1x1 conv layer
"""
super(Bottleneck, self).__init__()
assert style in ['fb', 'msra']
if style == 'fb':
conv1_stride = 1
conv2_stride = stride
else:
conv1_stride = stride
conv2_stride = 1
self.conv1 = nn.Conv2d(
inplanes, planes, kernel_size=1, stride=conv1_stride, bias=False)
self.conv2 = nn.Conv2d(
planes,
planes,
kernel_size=3,
stride=conv2_stride,
padding=dilation,
dilation=dilation,
bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(
planes, planes * self.expansion, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
self.dilation = dilation
self.with_cp = with_cp
def forward(self, x):
def _inner_forward(x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
return out
if self.with_cp and x.requires_grad:
out = cp.checkpoint(_inner_forward, x)
else:
out = _inner_forward(x)
out = self.relu(out)
return out
def make_res_layer(block,
inplanes,
planes,
blocks,
stride=1,
dilation=1,
style='fb',
with_cp=False):
downsample = None
if stride != 1 or inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(
inplanes,
planes * block.expansion,
kernel_size=1,
stride=stride,
bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(
block(
inplanes,
planes,
stride,
dilation,
downsample,
style=style,
with_cp=with_cp))
inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(
block(inplanes, planes, 1, dilation, style=style, with_cp=with_cp))
return nn.Sequential(*layers)
class ResHead(nn.Module):
def __init__(self, block, num_blocks, stride=2, dilation=1, style='fb'):
self.layer4 = make_res_layer(
block,
1024,
512,
num_blocks,
stride=stride,
dilation=dilation,
style=style)
def forward(self, x):
return self.layer4(x)
class ResNet(nn.Module):
def __init__(self,
block,
layers,
strides=(1, 2, 2, 2),
dilations=(1, 1, 1, 1),
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
style='fb',
sync_bn=False,
with_cp=False):
super(ResNet, self).__init__()
if not len(layers) == len(strides) == len(dilations):
raise ValueError(
'The number of layers, strides and dilations must be equal, '
'but found have {} layers, {} strides and {} dilations'.format(
len(layers), len(strides), len(dilations)))
assert max(out_indices) < len(layers)
self.out_indices = out_indices
self.frozen_stages = frozen_stages
self.style = style
self.sync_bn = sync_bn
self.inplanes = 64
self.conv1 = nn.Conv2d(
3, 64, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.res_layers = []
for i, num_blocks in enumerate(layers):
stride = strides[i]
dilation = dilations[i]
layer_name = 'layer{}'.format(i + 1)
planes = 64 * 2**i
res_layer = make_res_layer(
block,
self.inplanes,
planes,
num_blocks,
stride=stride,
dilation=dilation,
style=self.style,
with_cp=with_cp)
self.inplanes = planes * block.expansion
setattr(self, layer_name, res_layer)
self.res_layers.append(layer_name)
self.feat_dim = block.expansion * 64 * 2**(len(layers) - 1)
self.with_cp = with_cp
def init_weights(self, pretrained=None):
if isinstance(pretrained, str):
load_checkpoint(self, pretrained, strict=False)
elif pretrained is None:
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
nn.init.normal_(m.weight, 0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
else:
raise TypeError('pretrained must be a str or None')
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
outs = []
for i, layer_name in enumerate(self.res_layers):
res_layer = getattr(self, layer_name)
x = res_layer(x)
if i in self.out_indices:
outs.append(x)
if len(outs) == 1:
return outs[0]
else:
return tuple(outs)
def train(self, mode=True):
super(ResNet, self).train(mode)
if not self.sync_bn:
for m in self.modules():
if isinstance(m, nn.BatchNorm2d):
m.eval()
if mode and self.frozen_stages >= 0:
for param in self.conv1.parameters():
param.requires_grad = False
for param in self.bn1.parameters():
param.requires_grad = False
self.bn1.eval()
self.bn1.weight.requires_grad = False
self.bn1.bias.requires_grad = False
for i in range(1, self.frozen_stages + 1):
mod = getattr(self, 'layer{}'.format(i))
mod.eval()
for param in mod.parameters():
param.requires_grad = False
resnet_cfg = {
18: (BasicBlock, (2, 2, 2, 2)),
34: (BasicBlock, (3, 4, 6, 3)),
50: (Bottleneck, (3, 4, 6, 3)),
101: (Bottleneck, (3, 4, 23, 3)),
152: (Bottleneck, (3, 8, 36, 3))
}
def resnet(depth,
num_stages=4,
strides=(1, 2, 2, 2),
dilations=(1, 1, 1, 1),
out_indices=(2, ),
frozen_stages=-1,
style='fb',
sync_bn=False,
with_cp=False):
"""Constructs a ResNet model.
Args:
depth (int): depth of resnet, from {18, 34, 50, 101, 152}
num_stages (int): num of resnet stages, normally 4
strides (list): strides of the first block of each stage
dilations (list): dilation of each stage
out_indices (list): output from which stages
"""
if depth not in resnet_cfg:
raise KeyError('invalid depth {} for resnet'.format(depth))
block, layers = resnet_cfg[depth]
model = ResNet(block, layers[:num_stages], strides, dilations, out_indices,
frozen_stages, style, sync_bn, with_cp)
return model
from .bbox_head import BBoxHead
__all__ = ['BBoxHead']
import torch.nn as nn
import torch.nn.functional as F
from mmdet.core import (bbox_transform_inv, bbox_target, multiclass_nms,
weighted_cross_entropy, weighted_smoothl1, accuracy)
class BBoxHead(nn.Module):
"""Simplest RoI head, with only two fc layers for classification and
regression respectively"""
def __init__(self,
exclude_mal_box=True,
with_avg_pool=False,
with_cls=True,
with_reg=True,
roi_feat_size=7,
in_channels=256,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False):
super(BBoxHead, self).__init__()
assert with_cls or with_reg
self.with_avg_pool = with_avg_pool
self.with_cls = with_cls
self.with_reg = with_reg
self.roi_feat_size = roi_feat_size
self.in_channels = in_channels
self.num_classes = num_classes
self.target_means = target_means
self.target_stds = target_stds
self.reg_class_agnostic = reg_class_agnostic
self.exclude_mal_box = exclude_mal_box
in_channels = self.in_channels
if self.with_avg_pool:
self.avg_pool = nn.AvgPool2d(roi_feat_size)
else:
in_channels *= (self.roi_feat_size * self.roi_feat_size)
if self.with_cls:
self.fc_cls = nn.Linear(in_channels, num_classes)
if self.with_reg:
out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes
self.fc_reg = nn.Linear(in_channels, out_dim_reg)
self.debug_imgs = None
def init_weights(self):
if self.with_cls:
nn.init.normal_(self.fc_cls.weight, 0, 0.01)
nn.init.constant_(self.fc_cls.bias, 0)
if self.with_reg:
nn.init.normal_(self.fc_reg.weight, 0, 0.001)
nn.init.constant_(self.fc_reg.bias, 0)
def forward(self, x):
if self.with_avg_pool:
x = self.avg_pool(x)
x = x.view(x.size(0), -1)
cls_score = self.fc_cls(x) if self.with_cls else None
bbox_pred = self.fc_reg(x) if self.with_reg else None
return cls_score, bbox_pred
def bbox_target(self, pos_proposals, neg_proposals, pos_gt_bboxes,
pos_gt_labels, rcnn_train_cfg):
reg_num_classes = 1 if self.reg_class_agnostic else self.num_classes
cls_reg_targets = bbox_target(
pos_proposals,
neg_proposals,
pos_gt_bboxes,
pos_gt_labels,
self.target_means,
self.target_stds,
rcnn_train_cfg,
reg_num_classes,
debug_imgs=self.debug_imgs)
return cls_reg_targets
def loss(self, cls_score, bbox_pred, labels, label_weights, bbox_targets,
bbox_weights):
losses = dict()
if cls_score is not None:
losses['loss_cls'] = weighted_cross_entropy(
cls_score, labels, label_weights)
losses['acc'] = accuracy(cls_score, labels)
if bbox_pred is not None:
losses['loss_reg'] = weighted_smoothl1(
bbox_pred,
bbox_targets,
bbox_weights,
ave_factor=bbox_targets.size(0))
return losses
def get_det_bboxes(self,
rois,
cls_score,
bbox_pred,
img_shape,
rescale=False,
nms_cfg=None):
if isinstance(cls_score, list):
cls_score = sum(cls_score) / float(len(cls_score))
scores = F.softmax(cls_score, dim=1) if cls_score is not None else None
if bbox_pred is not None:
bboxes = bbox_transform_inv(rois[:, 1:], bbox_pred,
self.target_means, self.target_stds,
img_shape)
else:
bboxes = rois[:, 1:]
# TODO: add clip here
if rescale:
bboxes /= img_shape[-1]
if nms_cfg is None:
return bboxes, scores
else:
det_bboxes, det_labels = multiclass_nms(
bboxes, scores, nms_cfg.score_thr, nms_cfg.nms_thr,
nms_cfg.max_per_img)
return det_bboxes, det_labels
import mmcv
from torch import nn
from . import (backbones, necks, roi_extractors, rpn_heads, bbox_heads,
mask_heads)
__all__ = [
'build_backbone', 'build_neck', 'build_rpn_head', 'build_roi_extractor',
'build_bbox_head', 'build_mask_head'
]
def _build_module(cfg, parrent=None):
return cfg if isinstance(cfg, nn.Module) else mmcv.obj_from_dict(
cfg, parrent)
def build(cfg, parrent=None):
if isinstance(cfg, list):
modules = [_build_module(cfg_, parrent) for cfg_ in cfg]
return nn.Sequential(*modules)
else:
return _build_module(cfg, parrent)
def build_backbone(cfg):
return build(cfg, backbones)
def build_neck(cfg):
return build(cfg, necks)
def build_rpn_head(cfg):
return build(cfg, rpn_heads)
def build_roi_extractor(cfg):
return build(cfg, roi_extractors)
def build_bbox_head(cfg):
return build(cfg, bbox_heads)
def build_mask_head(cfg):
return build(cfg, mask_heads)
from .conv_module import ConvModule
from .norm import build_norm_layer
__all__ = ['ConvModule', 'build_norm_layer']
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment