"docs/git@developer.sourcefind.cn:zhaoyu6/sglang.git" did not exist on "2e4babdb0a19fd3bd5a863cba083d36c17506b40"
Unverified Commit 0d5233a3 authored by Kai Chen's avatar Kai Chen Committed by GitHub
Browse files

Make data pre-processing pipeline customizable (#935)

* define data pipelines

* update two config files

* minor fix for config files

* allow img_scale to be optional and update config

* add some docstrings

* add extra aug to transform

* bug fix for mask resizing

* fix cropping

* add faster rcnn example

* fix imports

* fix robustness testing

* add img_norm_cfg to img_meta

* fix the inference api with the new data pipeline

* fix proposal loading

* delete args of DefaultFormatBundle

* add more configs

* update configs

* bug fix

* add a brief doc

* update gt_labels in RandomCrop

* fix key error for new apis

* bug fix for masks of crowd bboxes

* add argument data_root

* minor fix

* update new hrnet configs

* update docs

* rename MultiscaleFlipAug to MultiScaleFlipAug

* add __repr__ for all transforms

* move DATA_PIPELINE.md to docs/

* fix image url
parent 7bb38af4
from .compose import Compose
from .formating import (Collect, ImageToTensor, ToDataContainer, ToTensor,
Transpose, to_tensor)
from .loading import LoadAnnotations, LoadImageFromFile, LoadProposals
from .test_aug import MultiScaleFlipAug
from .transforms import (Expand, MinIoURandomCrop, Normalize, Pad,
PhotoMetricDistortion, RandomCrop, RandomFlip, Resize,
SegResizeFlipPadRescale)
__all__ = [
'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer',
'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile',
'LoadProposals', 'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad',
'RandomCrop', 'Normalize', 'SegResizeFlipPadRescale', 'MinIoURandomCrop',
'Expand', 'PhotoMetricDistortion'
]
import collections
from mmdet.utils import build_from_cfg
from ..registry import PIPELINES
@PIPELINES.register_module
class Compose(object):
def __init__(self, transforms):
assert isinstance(transforms, collections.abc.Sequence)
self.transforms = []
for transform in transforms:
if isinstance(transform, dict):
transform = build_from_cfg(transform, PIPELINES)
self.transforms.append(transform)
elif callable(transform):
self.transforms.append(transform)
else:
raise TypeError('transform must be callable or a dict')
def __call__(self, data):
for t in self.transforms:
data = t(data)
if data is None:
return None
return data
def __repr__(self):
format_string = self.__class__.__name__ + '('
for t in self.transforms:
format_string += '\n'
format_string += ' {0}'.format(t)
format_string += '\n)'
return format_string
from collections.abc import Sequence
import mmcv
import numpy as np
import torch
from mmcv.parallel import DataContainer as DC
from ..registry import PIPELINES
def to_tensor(data):
"""Convert objects of various python types to :obj:`torch.Tensor`.
Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
:class:`Sequence`, :class:`int` and :class:`float`.
"""
if isinstance(data, torch.Tensor):
return data
elif isinstance(data, np.ndarray):
return torch.from_numpy(data)
elif isinstance(data, Sequence) and not mmcv.is_str(data):
return torch.tensor(data)
elif isinstance(data, int):
return torch.LongTensor([data])
elif isinstance(data, float):
return torch.FloatTensor([data])
else:
raise TypeError('type {} cannot be converted to tensor.'.format(
type(data)))
@PIPELINES.register_module
class ToTensor(object):
def __init__(self, keys):
self.keys = keys
def __call__(self, results):
for key in self.keys:
results[key] = to_tensor(results[key])
return results
def __repr__(self):
return self.__class__.__name__ + '(keys={})'.format(self.keys)
@PIPELINES.register_module
class ImageToTensor(object):
def __init__(self, keys):
self.keys = keys
def __call__(self, results):
for key in self.keys:
results[key] = to_tensor(results[key].transpose(2, 0, 1))
return results
def __repr__(self):
return self.__class__.__name__ + '(keys={})'.format(self.keys)
@PIPELINES.register_module
class Transpose(object):
def __init__(self, keys, order):
self.keys = keys
self.order = order
def __call__(self, results):
for key in self.keys:
results[key] = results[key].transpose(self.order)
return results
def __repr__(self):
return self.__class__.__name__ + '(keys={}, order={})'.format(
self.keys, self.order)
@PIPELINES.register_module
class ToDataContainer(object):
def __init__(self,
fields=(dict(key='img', stack=True), dict(key='gt_bboxes'),
dict(key='gt_labels'))):
self.fields = fields
def __call__(self, results):
for field in self.fields:
field = field.copy()
key = field.pop('key')
results[key] = DC(results[key], **field)
return results
def __repr__(self):
return self.__class__.__name__ + '(fields={})'.format(self.fields)
@PIPELINES.register_module
class DefaultFormatBundle(object):
"""Default formatting bundle.
It simplifies the pipeline of formatting common fields, including "img",
"proposals", "gt_bboxes", "gt_labels", "gt_masks" and "gt_semantic_seg".
These fields are formatted as follows.
- img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
- proposals: (1)to tensor, (2)to DataContainer
- gt_bboxes: (1)to tensor, (2)to DataContainer
- gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
- gt_labels: (1)to tensor, (2)to DataContainer
- gt_masks: (1)to tensor, (2)to DataContainer (cpu_only=True)
- gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor,
(3)to DataContainer (stack=True)
"""
def __call__(self, results):
if 'img' in results:
img = np.ascontiguousarray(results['img'].transpose(2, 0, 1))
results['img'] = DC(to_tensor(img), stack=True)
for key in ['proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels']:
if key not in results:
continue
results[key] = DC(to_tensor(results[key]))
if 'gt_masks' in results:
results['gt_masks'] = DC(results['gt_masks'], cpu_only=True)
if 'gt_semantic_seg' in results:
results['gt_semantic_seg'] = DC(
to_tensor(results['gt_semantic_seg'][None, ...]), stack=True)
return results
def __repr__(self):
return self.__class__.__name__
@PIPELINES.register_module
class Collect(object):
def __init__(self,
keys,
meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape',
'scale_factor', 'flip', 'img_norm_cfg')):
self.keys = keys
self.meta_keys = meta_keys
def __call__(self, results):
data = {}
img_meta = {}
for key in self.meta_keys:
img_meta[key] = results[key]
data['img_meta'] = DC(img_meta, cpu_only=True)
for key in self.keys:
data[key] = results[key]
return data
def __repr__(self):
return self.__class__.__name__ + '(keys={}, meta_keys={})'.format(
self.keys, self.meta_keys)
import os.path as osp
import warnings
import mmcv
import numpy as np
import pycocotools.mask as maskUtils
from ..registry import PIPELINES
@PIPELINES.register_module
class LoadImageFromFile(object):
def __init__(self, to_float32=False):
self.to_float32 = to_float32
def __call__(self, results):
filename = osp.join(results['img_prefix'],
results['img_info']['filename'])
img = mmcv.imread(filename)
if self.to_float32:
img = img.astype(np.float32)
results['filename'] = filename
results['img'] = img
results['img_shape'] = img.shape
results['ori_shape'] = img.shape
return results
def __repr__(self):
return self.__class__.__name__ + '(to_float32={})'.format(
self.to_float32)
@PIPELINES.register_module
class LoadAnnotations(object):
def __init__(self,
with_bbox=True,
with_label=True,
with_mask=False,
with_seg=False,
poly2mask=True,
skip_img_without_anno=True):
self.with_bbox = with_bbox
self.with_label = with_label
self.with_mask = with_mask
self.with_seg = with_seg
self.poly2mask = poly2mask
self.skip_img_without_anno = skip_img_without_anno
def _load_bboxes(self, results):
ann_info = results['ann_info']
results['gt_bboxes'] = ann_info['bboxes']
if len(results['gt_bboxes']) == 0 and self.skip_img_without_anno:
file_path = osp.join(results['img_prefix'],
results['img_info']['filename'])
warnings.warn(
'Skip the image "{}" that has no valid gt bbox'.format(
file_path))
return None
results['gt_bboxes_ignore'] = ann_info.get('bboxes_ignore', None)
results['bbox_fields'].extend(['gt_bboxes', 'gt_bboxes_ignore'])
return results
def _load_labels(self, results):
results['gt_labels'] = results['ann_info']['labels']
return results
def _poly2mask(self, mask_ann, img_h, img_w):
if isinstance(mask_ann, list):
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles = maskUtils.frPyObjects(mask_ann, img_h, img_w)
rle = maskUtils.merge(rles)
elif isinstance(mask_ann['counts'], list):
# uncompressed RLE
rle = maskUtils.frPyObjects(mask_ann, img_h, img_w)
else:
# rle
rle = mask_ann
mask = maskUtils.decode(rle)
return mask
def _load_masks(self, results):
h, w = results['img_info']['height'], results['img_info']['width']
gt_masks = results['ann_info']['masks']
if self.poly2mask:
gt_masks = [self._poly2mask(mask, h, w) for mask in gt_masks]
results['gt_masks'] = gt_masks
results['mask_fields'].append('gt_masks')
return results
def _load_semantic_seg(self, results):
results['gt_semantic_seg'] = mmcv.imread(
osp.join(results['seg_prefix'], results['ann_info']['seg_map']),
flag='unchanged').squeeze()
return results
def __call__(self, results):
if self.with_bbox:
results = self._load_bboxes(results)
if results is None:
return None
if self.with_label:
results = self._load_labels(results)
if self.with_mask:
results = self._load_masks(results)
if self.with_seg:
results = self._load_semantic_seg(results)
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += ('(with_bbox={}, with_label={}, with_mask={},'
' with_seg={})').format(self.with_bbox, self.with_label,
self.with_mask, self.with_seg)
return repr_str
@PIPELINES.register_module
class LoadProposals(object):
def __init__(self, num_max_proposals=None):
self.num_max_proposals = num_max_proposals
def __call__(self, results):
proposals = results['proposals']
if proposals.shape[1] not in (4, 5):
raise AssertionError(
'proposals should have shapes (n, 4) or (n, 5), '
'but found {}'.format(proposals.shape))
proposals = proposals[:, :4]
if self.num_max_proposals is not None:
proposals = proposals[:self.num_max_proposals]
if len(proposals) == 0:
proposals = np.array([0, 0, 0, 0], dtype=np.float32)
results['proposals'] = proposals
results['bbox_fields'].append('proposals')
return results
def __repr__(self):
return self.__class__.__name__ + '(num_max_proposals={})'.format(
self.num_max_proposals)
import mmcv
from ..registry import PIPELINES
from .compose import Compose
@PIPELINES.register_module
class MultiScaleFlipAug(object):
def __init__(self, transforms, img_scale, flip=False):
self.transforms = Compose(transforms)
self.img_scale = img_scale if isinstance(img_scale,
list) else [img_scale]
assert mmcv.is_list_of(self.img_scale, tuple)
self.flip = flip
def __call__(self, results):
aug_data = []
flip_aug = [False, True] if self.flip else [False]
for scale in self.img_scale:
for flip in flip_aug:
_results = results.copy()
_results['scale'] = scale
_results['flip'] = flip
data = self.transforms(_results)
aug_data.append(data)
# list of dict to dict of list
aug_data_dict = {key: [] for key in aug_data[0]}
for data in aug_data:
for key, val in data.items():
aug_data_dict[key].append(val)
return aug_data_dict
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(transforms={}, img_scale={}, flip={})'.format(
self.transforms, self.img_scale, self.flip)
return repr_str
import mmcv
import numpy as np
from imagecorruptions import corrupt
from numpy import random
from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps
from ..registry import PIPELINES
@PIPELINES.register_module
class Resize(object):
"""Resize images & bbox & mask.
This transform resizes the input image to some scale. Bboxes and masks are
then resized with the same scale factor. If the input dict contains the key
"scale", then the scale in the input dict is used, otherwise the specified
scale in the init method is used.
`img_scale` can either be a tuple (single-scale) or a list of tuple
(multi-scale). There are 3 multiscale modes:
- `ratio_range` is not None: randomly sample a ratio from the ratio range
and multiply it with the image scale.
- `ratio_range` is None and `multiscale_mode` == "range": randomly sample a
scale from the a range.
- `ratio_range` is None and `multiscale_mode` == "value": randomly sample a
scale from multiple scales.
Args:
img_scale (tuple or list[tuple]): Images scales for resizing.
multiscale_mode (str): Either "range" or "value".
ratio_range (tuple[float]): (min_ratio, max_ratio)
keep_ratio (bool): Whether to keep the aspect ratio when resizing the
image.
"""
def __init__(self,
img_scale=None,
multiscale_mode='range',
ratio_range=None,
keep_ratio=True):
if img_scale is None:
self.img_scale = None
else:
if isinstance(img_scale, list):
self.img_scale = img_scale
else:
self.img_scale = [img_scale]
assert mmcv.is_list_of(self.img_scale, tuple)
if ratio_range is not None:
# mode 1: given a scale and a range of image ratio
assert len(self.img_scale) == 1
else:
# mode 2: given multiple scales or a range of scales
assert multiscale_mode in ['value', 'range']
self.multiscale_mode = multiscale_mode
self.ratio_range = ratio_range
self.keep_ratio = keep_ratio
@staticmethod
def random_select(img_scales):
assert mmcv.is_list_of(img_scales, tuple)
scale_idx = np.random.randint(len(img_scales))
img_scale = img_scales[scale_idx]
return img_scale, scale_idx
@staticmethod
def random_sample(img_scales):
assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2
img_scale_long = [max(s) for s in img_scales]
img_scale_short = [min(s) for s in img_scales]
long_edge = np.random.randint(
min(img_scale_long),
max(img_scale_long) + 1)
short_edge = np.random.randint(
min(img_scale_short),
max(img_scale_short) + 1)
img_scale = (long_edge, short_edge)
return img_scale, None
@staticmethod
def random_sample_ratio(img_scale, ratio_range):
assert isinstance(img_scale, tuple) and len(img_scale) == 2
min_ratio, max_ratio = ratio_range
assert min_ratio <= max_ratio
ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio
scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio)
return scale, None
def _random_scale(self, results):
if self.ratio_range is not None:
scale, scale_idx = self.random_sample_ratio(
self.img_scale[0], self.ratio_range)
elif len(self.img_scale) == 1:
scale, scale_idx = self.img_scale[0], 0
elif self.multiscale_mode == 'range':
scale, scale_idx = self.random_sample(self.img_scale)
elif self.multiscale_mode == 'value':
scale, scale_idx = self.random_select(self.img_scale)
else:
raise NotImplementedError
results['scale'] = scale
results['scale_idx'] = scale_idx
def _resize_img(self, results):
if self.keep_ratio:
img, scale_factor = mmcv.imrescale(
results['img'], results['scale'], return_scale=True)
else:
img, w_scale, h_scale = mmcv.imresize(
results['img'], results['scale'], return_scale=True)
scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
dtype=np.float32)
results['img'] = img
results['img_shape'] = img.shape
results['pad_shape'] = img.shape # in case that there is no padding
results['scale_factor'] = scale_factor
results['keep_ratio'] = self.keep_ratio
def _resize_bboxes(self, results):
img_shape = results['img_shape']
for key in results.get('bbox_fields', []):
bboxes = results[key] * results['scale_factor']
bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1] - 1)
bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0] - 1)
results[key] = bboxes
def _resize_masks(self, results):
for key in results.get('mask_fields', []):
if results[key] is None:
continue
if self.keep_ratio:
masks = [
mmcv.imrescale(
mask, results['scale_factor'], interpolation='nearest')
for mask in results[key]
]
else:
mask_size = (results['img_shape'][1], results['img_shape'][0])
masks = [
mmcv.imresize(mask, mask_size, interpolation='nearest')
for mask in results[key]
]
results[key] = masks
def __call__(self, results):
if 'scale' not in results:
self._random_scale(results)
self._resize_img(results)
self._resize_bboxes(results)
self._resize_masks(results)
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += ('(img_scale={}, multiscale_mode={}, ratio_range={}, '
'keep_ratio={})').format(self.img_scale,
self.multiscale_mode,
self.ratio_range,
self.keep_ratio)
return repr_str
@PIPELINES.register_module
class RandomFlip(object):
"""Flip the image & bbox & mask.
If the input dict contains the key "flip", then the flag will be used,
otherwise it will be randomly decided by a ratio specified in the init
method.
Args:
flip_ratio (float, optional): The flipping probability.
"""
def __init__(self, flip_ratio=None):
self.flip_ratio = flip_ratio
if flip_ratio is not None:
assert flip_ratio >= 0 and flip_ratio <= 1
def bbox_flip(self, bboxes, img_shape):
"""Flip bboxes horizontally.
Args:
bboxes(ndarray): shape (..., 4*k)
img_shape(tuple): (height, width)
"""
assert bboxes.shape[-1] % 4 == 0
w = img_shape[1]
flipped = bboxes.copy()
flipped[..., 0::4] = w - bboxes[..., 2::4] - 1
flipped[..., 2::4] = w - bboxes[..., 0::4] - 1
return flipped
def __call__(self, results):
if 'flip' not in results:
flip = True if np.random.rand() < self.flip_ratio else False
results['flip'] = flip
if results['flip']:
# flip image
results['img'] = mmcv.imflip(results['img'])
# flip bboxes
for key in results.get('bbox_fields', []):
results[key] = self.bbox_flip(results[key],
results['img_shape'])
# flip masks
for key in results.get('mask_fields', []):
results[key] = [mask[:, ::-1] for mask in results[key]]
return results
def __repr__(self):
return self.__class__.__name__ + '(flip_ratio={})'.format(
self.flip_ratio)
@PIPELINES.register_module
class Pad(object):
"""Pad the image & mask.
There are two padding modes: (1) pad to a fixed size and (2) pad to the
minimum size that is divisible by some number.
Args:
size (tuple, optional): Fixed padding size.
size_divisor (int, optional): The divisor of padded size.
pad_val (float, optional): Padding value, 0 by default.
"""
def __init__(self, size=None, size_divisor=None, pad_val=0):
self.size = size
self.size_divisor = size_divisor
self.pad_val = pad_val
# only one of size and size_divisor should be valid
assert size is not None or size_divisor is not None
assert size is None or size_divisor is None
def _pad_img(self, results):
if self.size is not None:
padded_img = mmcv.impad(results['img'], self.size)
elif self.size_divisor is not None:
padded_img = mmcv.impad_to_multiple(
results['img'], self.size_divisor, pad_val=self.pad_val)
results['img'] = padded_img
results['pad_shape'] = padded_img.shape
results['pad_fixed_size'] = self.size
results['pad_size_divisor'] = self.size_divisor
def _pad_masks(self, results):
pad_shape = results['pad_shape'][:2]
for key in results.get('mask_fields', []):
padded_masks = [
mmcv.impad(mask, pad_shape, pad_val=self.pad_val)
for mask in results[key]
]
results[key] = np.stack(padded_masks, axis=0)
def __call__(self, results):
self._pad_img(results)
self._pad_masks(results)
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(size={}, size_divisor={}, pad_val={})'.format(
self.size, self.size_divisor, self.pad_val)
return repr_str
@PIPELINES.register_module
class Normalize(object):
"""Normalize the image.
Args:
mean (sequence): Mean values of 3 channels.
std (sequence): Std values of 3 channels.
to_rgb (bool): Whether to convert the image from BGR to RGB,
default is true.
"""
def __init__(self, mean, std, to_rgb=True):
self.mean = np.array(mean, dtype=np.float32)
self.std = np.array(std, dtype=np.float32)
self.to_rgb = to_rgb
def __call__(self, results):
results['img'] = mmcv.imnormalize(results['img'], self.mean, self.std,
self.to_rgb)
results['img_norm_cfg'] = dict(
mean=self.mean, std=self.std, to_rgb=self.to_rgb)
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(mean={}, std={}, to_rgb={})'.format(
self.mean, self.std, self.to_rgb)
return repr_str
@PIPELINES.register_module
class RandomCrop(object):
"""Random crop the image & bboxes.
Args:
crop_size (tuple): Expected size after cropping, (h, w).
"""
def __init__(self, crop_size):
self.crop_size = crop_size
def __call__(self, results):
img = results['img']
margin_h = max(img.shape[0] - self.crop_size[0], 0)
margin_w = max(img.shape[1] - self.crop_size[1], 0)
offset_h = np.random.randint(0, margin_h + 1)
offset_w = np.random.randint(0, margin_w + 1)
crop_y1, crop_y2 = offset_h, offset_h + self.crop_size[0]
crop_x1, crop_x2 = offset_w, offset_w + self.crop_size[1]
# crop the image
img = img[crop_y1:crop_y2, crop_x1:crop_x2, :]
img_shape = img.shape
results['img'] = img
results['img_shape'] = img_shape
# crop bboxes accordingly and clip to the image boundary
for key in results.get('bbox_fields', []):
bbox_offset = np.array([offset_w, offset_h, offset_w, offset_h],
dtype=np.float32)
bboxes = results[key] - bbox_offset
bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1] - 1)
bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0] - 1)
results[key] = bboxes
# filter out the gt bboxes that are completely cropped
if 'gt_bboxes' in results:
gt_bboxes = results['gt_bboxes']
valid_inds = (gt_bboxes[:, 2] > gt_bboxes[:, 0]) & (
gt_bboxes[:, 3] > gt_bboxes[:, 1])
# if no gt bbox remains after cropping, just skip this image
if not np.any(valid_inds):
return None
results['gt_bboxes'] = gt_bboxes[valid_inds, :]
if 'gt_labels' in results:
results['gt_labels'] = results['gt_labels'][valid_inds]
# filter and crop the masks
if 'gt_masks' in results:
valid_gt_masks = []
for i in valid_inds:
gt_mask = results['gt_masks'][i][crop_y1:crop_y2, crop_x1:
crop_x2]
valid_gt_masks.append(gt_mask)
results['gt_masks'] = valid_gt_masks
return results
def __repr__(self):
return self.__class__.__name__ + '(crop_size={})'.format(
self.crop_size)
@PIPELINES.register_module
class SegResizeFlipPadRescale(object):
"""A sequential transforms to semantic segmentation maps.
The same pipeline as input images is applied to the semantic segmentation
map, and finally rescale it by some scale factor. The transforms include:
1. resize
2. flip
3. pad
4. rescale (so that the final size can be different from the image size)
Args:
scale_factor (float): The scale factor of the final output.
"""
def __init__(self, scale_factor=1):
self.scale_factor = scale_factor
def __call__(self, results):
if results['keep_ratio']:
gt_seg = mmcv.imrescale(
results['gt_semantic_seg'],
results['scale'],
interpolation='nearest')
else:
gt_seg = mmcv.imresize(
results['gt_semantic_seg'],
results['scale'],
interpolation='nearest')
if results['flip']:
gt_seg = mmcv.imflip(gt_seg)
if gt_seg.shape != results['pad_shape']:
gt_seg = mmcv.impad(gt_seg, results['pad_shape'][:2])
if self.scale_factor != 1:
gt_seg = mmcv.imrescale(
gt_seg, self.scale_factor, interpolation='nearest')
results['gt_semantic_seg'] = gt_seg
return results
def __repr__(self):
return self.__class__.__name__ + '(scale_factor={})'.format(
self.scale_factor)
@PIPELINES.register_module
class PhotoMetricDistortion(object):
"""Apply photometric distortion to image sequentially, every transformation
is applied with a probability of 0.5. The position of random contrast is in
second or second to last.
1. random brightness
2. random contrast (mode 0)
3. convert color from BGR to HSV
4. random saturation
5. random hue
6. convert color from HSV to BGR
7. random contrast (mode 1)
8. randomly swap channels
Args:
brightness_delta (int): delta of brightness.
contrast_range (tuple): range of contrast.
saturation_range (tuple): range of saturation.
hue_delta (int): delta of hue.
"""
def __init__(self,
brightness_delta=32,
contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5),
hue_delta=18):
self.brightness_delta = brightness_delta
self.contrast_lower, self.contrast_upper = contrast_range
self.saturation_lower, self.saturation_upper = saturation_range
self.hue_delta = hue_delta
def __call__(self, results):
img = results['img']
# random brightness
if random.randint(2):
delta = random.uniform(-self.brightness_delta,
self.brightness_delta)
img += delta
# mode == 0 --> do random contrast first
# mode == 1 --> do random contrast last
mode = random.randint(2)
if mode == 1:
if random.randint(2):
alpha = random.uniform(self.contrast_lower,
self.contrast_upper)
img *= alpha
# convert color from BGR to HSV
img = mmcv.bgr2hsv(img)
# random saturation
if random.randint(2):
img[..., 1] *= random.uniform(self.saturation_lower,
self.saturation_upper)
# random hue
if random.randint(2):
img[..., 0] += random.uniform(-self.hue_delta, self.hue_delta)
img[..., 0][img[..., 0] > 360] -= 360
img[..., 0][img[..., 0] < 0] += 360
# convert color from HSV to BGR
img = mmcv.hsv2bgr(img)
# random contrast
if mode == 0:
if random.randint(2):
alpha = random.uniform(self.contrast_lower,
self.contrast_upper)
img *= alpha
# randomly swap channels
if random.randint(2):
img = img[..., random.permutation(3)]
results['img'] = img
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += ('(brightness_delta={}, contrast_range={}, '
'saturation_range={}, hue_delta={})').format(
self.brightness_delta, self.contrast_range,
self.saturation_range, self.hue_delta)
return repr_str
@PIPELINES.register_module
class Expand(object):
"""Random expand the image & bboxes.
Randomly place the original image on a canvas of 'ratio' x original image
size filled with mean values. The ratio is in the range of ratio_range.
Args:
mean (tuple): mean value of dataset.
to_rgb (bool): if need to convert the order of mean to align with RGB.
ratio_range (tuple): range of expand ratio.
"""
def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)):
if to_rgb:
self.mean = mean[::-1]
else:
self.mean = mean
self.min_ratio, self.max_ratio = ratio_range
def __call__(self, results):
if random.randint(2):
return results
img, boxes = [results[k] for k in ('img', 'gt_bboxes')]
h, w, c = img.shape
ratio = random.uniform(self.min_ratio, self.max_ratio)
expand_img = np.full((int(h * ratio), int(w * ratio), c),
self.mean).astype(img.dtype)
left = int(random.uniform(0, w * ratio - w))
top = int(random.uniform(0, h * ratio - h))
expand_img[top:top + h, left:left + w] = img
boxes += np.tile((left, top), 2)
results['img'] = expand_img
results['gt_bboxes'] = boxes
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(mean={}, to_rgb={}, ratio_range={})'.format(
self.mean, self.to_rgb, self.ratio_range)
return repr_str
@PIPELINES.register_module
class MinIoURandomCrop(object):
"""Random crop the image & bboxes, the cropped patches have minimum IoU
requirement with original image & bboxes, the IoU threshold is randomly
selected from min_ious.
Args:
min_ious (tuple): minimum IoU threshold
crop_size (tuple): Expected size after cropping, (h, w).
"""
def __init__(self, min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3):
# 1: return ori img
self.sample_mode = (1, *min_ious, 0)
self.min_crop_size = min_crop_size
def __call__(self, results):
img, boxes, labels = [
results[k] for k in ('img', 'gt_bboxes', 'gt_labels')
]
h, w, c = img.shape
while True:
mode = random.choice(self.sample_mode)
if mode == 1:
return results
min_iou = mode
for i in range(50):
new_w = random.uniform(self.min_crop_size * w, w)
new_h = random.uniform(self.min_crop_size * h, h)
# h / w in [0.5, 2]
if new_h / new_w < 0.5 or new_h / new_w > 2:
continue
left = random.uniform(w - new_w)
top = random.uniform(h - new_h)
patch = np.array(
(int(left), int(top), int(left + new_w), int(top + new_h)))
overlaps = bbox_overlaps(
patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1)
if overlaps.min() < min_iou:
continue
# center of boxes should inside the crop img
center = (boxes[:, :2] + boxes[:, 2:]) / 2
mask = (center[:, 0] > patch[0]) * (
center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * (
center[:, 1] < patch[3])
if not mask.any():
continue
boxes = boxes[mask]
labels = labels[mask]
# adjust boxes
img = img[patch[1]:patch[3], patch[0]:patch[2]]
boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:])
boxes[:, :2] = boxes[:, :2].clip(min=patch[:2])
boxes -= np.tile(patch[:2], 2)
results['img'] = img
results['gt_bboxes'] = boxes
results['gt_labels'] = labels
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(min_ious={}, min_crop_size={})'.format(
self.min_ious, self.min_crop_size)
return repr_str
@PIPELINES.register_module
class Corrupt(object):
def __init__(self, corruption, severity=1):
self.corruption = corruption
self.severity = severity
def __call__(self, results):
results['img'] = corrupt(
results['img'].astype(np.uint8),
corruption_name=self.corruption,
severity=self.severity)
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(corruption={}, severity={})'.format(
self.corruption, self.severity)
return repr_str
from mmdet.utils import Registry from mmdet.utils import Registry
DATASETS = Registry('dataset') DATASETS = Registry('dataset')
PIPELINES = Registry('pipeline')
from collections import Sequence
import matplotlib.pyplot as plt
import mmcv
import numpy as np
import torch
def to_tensor(data):
"""Convert objects of various python types to :obj:`torch.Tensor`.
Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
:class:`Sequence`, :class:`int` and :class:`float`.
"""
if isinstance(data, torch.Tensor):
return data
elif isinstance(data, np.ndarray):
return torch.from_numpy(data)
elif isinstance(data, Sequence) and not mmcv.is_str(data):
return torch.tensor(data)
elif isinstance(data, int):
return torch.LongTensor([data])
elif isinstance(data, float):
return torch.FloatTensor([data])
else:
raise TypeError('type {} cannot be converted to tensor.'.format(
type(data)))
def random_scale(img_scales, mode='range'):
"""Randomly select a scale from a list of scales or scale ranges.
Args:
img_scales (list[tuple]): Image scale or scale range.
mode (str): "range" or "value".
Returns:
tuple: Sampled image scale.
"""
num_scales = len(img_scales)
if num_scales == 1: # fixed scale is specified
img_scale = img_scales[0]
elif num_scales == 2: # randomly sample a scale
if mode == 'range':
img_scale_long = [max(s) for s in img_scales]
img_scale_short = [min(s) for s in img_scales]
long_edge = np.random.randint(
min(img_scale_long),
max(img_scale_long) + 1)
short_edge = np.random.randint(
min(img_scale_short),
max(img_scale_short) + 1)
img_scale = (long_edge, short_edge)
elif mode == 'value':
img_scale = img_scales[np.random.randint(num_scales)]
else:
if mode != 'value':
raise ValueError(
'Only "value" mode supports more than 2 image scales')
img_scale = img_scales[np.random.randint(num_scales)]
return img_scale
def show_ann(coco, img, ann_info):
plt.imshow(mmcv.bgr2rgb(img))
plt.axis('off')
coco.showAnns(ann_info)
plt.show()
...@@ -87,12 +87,7 @@ class BaseDetector(nn.Module): ...@@ -87,12 +87,7 @@ class BaseDetector(nn.Module):
else: else:
return self.forward_test(img, img_meta, **kwargs) return self.forward_test(img, img_meta, **kwargs)
def show_result(self, def show_result(self, data, result, dataset=None, score_thr=0.3):
data,
result,
img_norm_cfg,
dataset=None,
score_thr=0.3):
if isinstance(result, tuple): if isinstance(result, tuple):
bbox_result, segm_result = result bbox_result, segm_result = result
else: else:
...@@ -100,7 +95,7 @@ class BaseDetector(nn.Module): ...@@ -100,7 +95,7 @@ class BaseDetector(nn.Module):
img_tensor = data['img'][0] img_tensor = data['img'][0]
img_metas = data['img_meta'][0].data[0] img_metas = data['img_meta'][0].data[0]
imgs = tensor2imgs(img_tensor, **img_norm_cfg) imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
assert len(imgs) == len(img_metas) assert len(imgs) == len(img_metas)
if dataset is None: if dataset is None:
......
...@@ -402,7 +402,7 @@ class CascadeRCNN(BaseDetector, RPNTestMixin): ...@@ -402,7 +402,7 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
def aug_test(self, img, img_meta, proposals=None, rescale=False): def aug_test(self, img, img_meta, proposals=None, rescale=False):
raise NotImplementedError raise NotImplementedError
def show_result(self, data, result, img_norm_cfg, **kwargs): def show_result(self, data, result, **kwargs):
if self.with_mask: if self.with_mask:
ms_bbox_result, ms_segm_result = result ms_bbox_result, ms_segm_result = result
if isinstance(ms_bbox_result, dict): if isinstance(ms_bbox_result, dict):
...@@ -411,5 +411,4 @@ class CascadeRCNN(BaseDetector, RPNTestMixin): ...@@ -411,5 +411,4 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
else: else:
if isinstance(result, dict): if isinstance(result, dict):
result = result['ensemble'] result = result['ensemble']
super(CascadeRCNN, self).show_result(data, result, img_norm_cfg, super(CascadeRCNN, self).show_result(data, result, **kwargs)
**kwargs)
...@@ -81,7 +81,7 @@ class RPN(BaseDetector, RPNTestMixin): ...@@ -81,7 +81,7 @@ class RPN(BaseDetector, RPNTestMixin):
# TODO: remove this restriction # TODO: remove this restriction
return proposal_list[0].cpu().numpy() return proposal_list[0].cpu().numpy()
def show_result(self, data, result, img_norm_cfg, dataset=None, top_k=20): def show_result(self, data, result, dataset=None, top_k=20):
"""Show RPN proposals on the image. """Show RPN proposals on the image.
Although we assume batch size is 1, this method supports arbitrary Although we assume batch size is 1, this method supports arbitrary
...@@ -89,7 +89,7 @@ class RPN(BaseDetector, RPNTestMixin): ...@@ -89,7 +89,7 @@ class RPN(BaseDetector, RPNTestMixin):
""" """
img_tensor = data['img'][0] img_tensor = data['img'][0]
img_metas = data['img_meta'][0].data[0] img_metas = data['img_meta'][0].data[0]
imgs = tensor2imgs(img_tensor, **img_norm_cfg) imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
assert len(imgs) == len(img_metas) assert len(imgs) == len(img_metas)
for img, img_meta in zip(imgs, img_metas): for img, img_meta in zip(imgs, img_metas):
h, w, _ = img_meta['img_shape'] h, w, _ = img_meta['img_shape']
......
...@@ -61,14 +61,16 @@ def build_from_cfg(cfg, registry, default_args=None): ...@@ -61,14 +61,16 @@ def build_from_cfg(cfg, registry, default_args=None):
args = cfg.copy() args = cfg.copy()
obj_type = args.pop('type') obj_type = args.pop('type')
if mmcv.is_str(obj_type): if mmcv.is_str(obj_type):
obj_type = registry.get(obj_type) obj_cls = registry.get(obj_type)
if obj_type is None: if obj_cls is None:
raise KeyError('{} is not in the {} registry'.format( raise KeyError('{} is not in the {} registry'.format(
obj_type, registry.name)) obj_type, registry.name))
elif not inspect.isclass(obj_type): elif inspect.isclass(obj_type):
obj_cls = obj_type
else:
raise TypeError('type must be a str or valid type, but got {}'.format( raise TypeError('type must be a str or valid type, but got {}'.format(
type(obj_type))) type(obj_type)))
if default_args is not None: if default_args is not None:
for name, value in default_args.items(): for name, value in default_args.items():
args.setdefault(name, value) args.setdefault(name, value)
return obj_type(**args) return obj_cls(**args)
...@@ -27,7 +27,7 @@ def single_gpu_test(model, data_loader, show=False): ...@@ -27,7 +27,7 @@ def single_gpu_test(model, data_loader, show=False):
results.append(result) results.append(result)
if show: if show:
model.module.show_result(data, result, dataset.img_norm_cfg) model.module.show_result(data, result)
batch_size = data['img'][0].size(0) batch_size = data['img'][0].size(0)
for _ in range(batch_size): for _ in range(batch_size):
......
import argparse import argparse
import copy
import os import os
import os.path as osp import os.path as osp
import shutil import shutil
...@@ -350,13 +351,15 @@ def main(): ...@@ -350,13 +351,15 @@ def main():
continue continue
# assign corruption and severity # assign corruption and severity
if corruption_severity == 0: if corruption_severity > 0:
# evaluate without corruptions for severity = 0 test_data_cfg = copy.deepcopy(cfg.data.test)
cfg.data.test['corruption'] = None corruption_trans = dict(
cfg.data.test['corruption_severity'] = 0 type='Corrupt',
else: corruption=corruption,
cfg.data.test['corruption'] = corruption severity=corruption_severity)
cfg.data.test['corruption_severity'] = corruption_severity # TODO: hard coded "1", we assume that the first step is
# loading images, which needs to be fixed in the future
test_data_cfg['pipeline'].insert(1, corruption_trans)
# print info # print info
print('\nTesting {} at severity {}'.format(corruption, print('\nTesting {} at severity {}'.format(corruption,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment