"src/vscode:/vscode.git/clone" did not exist on "ecc1df990a45900bd06ea2e4051db15a6603fdd6"
Commit ff284133 authored by Lawrence's avatar Lawrence Committed by Kai Chen
Browse files

Remove redundant data transforms (#1522)

* Delete transforms.py

* Delete extra_aug.py

* Update __init__.py
parent 7357e40b
......@@ -3,7 +3,6 @@ from .cityscapes import CityscapesDataset
from .coco import CocoDataset
from .custom import CustomDataset
from .dataset_wrappers import ConcatDataset, RepeatDataset
from .extra_aug import ExtraAugmentation
from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
from .registry import DATASETS
from .voc import VOCDataset
......@@ -13,6 +12,6 @@ from .xml_style import XMLDataset
__all__ = [
'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset',
'CityscapesDataset', 'GroupSampler', 'DistributedGroupSampler',
'build_dataloader', 'ConcatDataset', 'RepeatDataset', 'ExtraAugmentation',
'WIDERFaceDataset', 'DATASETS', 'build_dataset'
'build_dataloader', 'ConcatDataset', 'RepeatDataset', 'WIDERFaceDataset',
'DATASETS', 'build_dataset'
]
import mmcv
import numpy as np
from numpy import random
from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps
class PhotoMetricDistortion(object):
def __init__(self,
brightness_delta=32,
contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5),
hue_delta=18):
self.brightness_delta = brightness_delta
self.contrast_lower, self.contrast_upper = contrast_range
self.saturation_lower, self.saturation_upper = saturation_range
self.hue_delta = hue_delta
def __call__(self, img, boxes, labels):
# random brightness
if random.randint(2):
delta = random.uniform(-self.brightness_delta,
self.brightness_delta)
img += delta
# mode == 0 --> do random contrast first
# mode == 1 --> do random contrast last
mode = random.randint(2)
if mode == 1:
if random.randint(2):
alpha = random.uniform(self.contrast_lower,
self.contrast_upper)
img *= alpha
# convert color from BGR to HSV
img = mmcv.bgr2hsv(img)
# random saturation
if random.randint(2):
img[..., 1] *= random.uniform(self.saturation_lower,
self.saturation_upper)
# random hue
if random.randint(2):
img[..., 0] += random.uniform(-self.hue_delta, self.hue_delta)
img[..., 0][img[..., 0] > 360] -= 360
img[..., 0][img[..., 0] < 0] += 360
# convert color from HSV to BGR
img = mmcv.hsv2bgr(img)
# random contrast
if mode == 0:
if random.randint(2):
alpha = random.uniform(self.contrast_lower,
self.contrast_upper)
img *= alpha
# randomly swap channels
if random.randint(2):
img = img[..., random.permutation(3)]
return img, boxes, labels
class Expand(object):
def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)):
if to_rgb:
self.mean = mean[::-1]
else:
self.mean = mean
self.min_ratio, self.max_ratio = ratio_range
def __call__(self, img, boxes, labels):
if random.randint(2):
return img, boxes, labels
h, w, c = img.shape
ratio = random.uniform(self.min_ratio, self.max_ratio)
expand_img = np.full((int(h * ratio), int(w * ratio), c),
self.mean).astype(img.dtype)
left = int(random.uniform(0, w * ratio - w))
top = int(random.uniform(0, h * ratio - h))
expand_img[top:top + h, left:left + w] = img
img = expand_img
boxes += np.tile((left, top), 2)
return img, boxes, labels
class RandomCrop(object):
def __init__(self, min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3):
# 1: return ori img
self.sample_mode = (1, *min_ious, 0)
self.min_crop_size = min_crop_size
def __call__(self, img, boxes, labels):
h, w, c = img.shape
while True:
mode = random.choice(self.sample_mode)
if mode == 1:
return img, boxes, labels
min_iou = mode
for i in range(50):
new_w = random.uniform(self.min_crop_size * w, w)
new_h = random.uniform(self.min_crop_size * h, h)
# h / w in [0.5, 2]
if new_h / new_w < 0.5 or new_h / new_w > 2:
continue
left = random.uniform(w - new_w)
top = random.uniform(h - new_h)
patch = np.array(
(int(left), int(top), int(left + new_w), int(top + new_h)))
overlaps = bbox_overlaps(
patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1)
if overlaps.min() < min_iou:
continue
# center of boxes should inside the crop img
center = (boxes[:, :2] + boxes[:, 2:]) / 2
mask = (center[:, 0] > patch[0]) * (
center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * (
center[:, 1] < patch[3])
if not mask.any():
continue
boxes = boxes[mask]
labels = labels[mask]
# adjust boxes
img = img[patch[1]:patch[3], patch[0]:patch[2]]
boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:])
boxes[:, :2] = boxes[:, :2].clip(min=patch[:2])
boxes -= np.tile(patch[:2], 2)
return img, boxes, labels
class ExtraAugmentation(object):
def __init__(self,
photo_metric_distortion=None,
expand=None,
random_crop=None):
self.transforms = []
if photo_metric_distortion is not None:
self.transforms.append(
PhotoMetricDistortion(**photo_metric_distortion))
if expand is not None:
self.transforms.append(Expand(**expand))
if random_crop is not None:
self.transforms.append(RandomCrop(**random_crop))
def __call__(self, img, boxes, labels):
img = img.astype(np.float32)
for transform in self.transforms:
img, boxes, labels = transform(img, boxes, labels)
return img, boxes, labels
import mmcv
import numpy as np
import torch
__all__ = [
'ImageTransform', 'BboxTransform', 'MaskTransform', 'SegMapTransform',
'Numpy2Tensor'
]
class ImageTransform(object):
"""Preprocess an image.
1. rescale the image to expected size
2. normalize the image
3. flip the image (if needed)
4. pad the image (if needed)
5. transpose to (c, h, w)
"""
def __init__(self,
mean=(0, 0, 0),
std=(1, 1, 1),
to_rgb=True,
size_divisor=None):
self.mean = np.array(mean, dtype=np.float32)
self.std = np.array(std, dtype=np.float32)
self.to_rgb = to_rgb
self.size_divisor = size_divisor
def __call__(self, img, scale, flip=False, keep_ratio=True):
if keep_ratio:
img, scale_factor = mmcv.imrescale(img, scale, return_scale=True)
else:
img, w_scale, h_scale = mmcv.imresize(
img, scale, return_scale=True)
scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
dtype=np.float32)
img_shape = img.shape
img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb)
if flip:
img = mmcv.imflip(img)
if self.size_divisor is not None:
img = mmcv.impad_to_multiple(img, self.size_divisor)
pad_shape = img.shape
else:
pad_shape = img_shape
img = img.transpose(2, 0, 1)
return img, img_shape, pad_shape, scale_factor
def bbox_flip(bboxes, img_shape, direction='horizontal'):
"""Flip bboxes horizontally or vertically.
Args:
bboxes(ndarray): shape (..., 4*k)
img_shape(tuple): (height, width)
"""
assert bboxes.shape[-1] % 4 == 0
flipped = bboxes.copy()
if direction == 'horizontal':
w = img_shape[1]
flipped[..., 0::4] = w - bboxes[..., 2::4] - 1
flipped[..., 2::4] = w - bboxes[..., 0::4] - 1
else:
h = img_shape[0]
flipped[..., 1::4] = h - bboxes[..., 3::4] - 1
flipped[..., 3::4] = h - bboxes[..., 1::4] - 1
return flipped
class BboxTransform(object):
"""Preprocess gt bboxes.
1. rescale bboxes according to image size
2. flip bboxes (if needed)
3. pad the first dimension to `max_num_gts`
"""
def __init__(self, max_num_gts=None):
self.max_num_gts = max_num_gts
def __call__(self, bboxes, img_shape, scale_factor, flip=False):
gt_bboxes = bboxes * scale_factor
if flip:
gt_bboxes = bbox_flip(gt_bboxes, img_shape)
gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)
if self.max_num_gts is None:
return gt_bboxes
else:
num_gts = gt_bboxes.shape[0]
padded_bboxes = np.zeros((self.max_num_gts, 4), dtype=np.float32)
padded_bboxes[:num_gts, :] = gt_bboxes
return padded_bboxes
class MaskTransform(object):
"""Preprocess masks.
1. resize masks to expected size and stack to a single array
2. flip the masks (if needed)
3. pad the masks (if needed)
"""
def __call__(self, masks, pad_shape, scale_factor, flip=False):
# aspect ratio unchanged
if isinstance(scale_factor, float):
masks = [
mmcv.imrescale(mask, scale_factor, interpolation='nearest')
for mask in masks
]
# aspect ratio changed
else:
w_ratio, h_ratio = scale_factor[:2]
if masks:
h, w = masks[0].shape[:2]
new_h = int(np.round(h * h_ratio))
new_w = int(np.round(w * w_ratio))
new_size = (new_w, new_h)
masks = [
mmcv.imresize(mask, new_size, interpolation='nearest')
for mask in masks
]
if flip:
masks = [mask[:, ::-1] for mask in masks]
padded_masks = [
mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks
]
padded_masks = np.stack(padded_masks, axis=0)
return padded_masks
class SegMapTransform(object):
"""Preprocess semantic segmentation maps.
1. rescale the segmentation map to expected size
3. flip the image (if needed)
4. pad the image (if needed)
"""
def __init__(self, size_divisor=None):
self.size_divisor = size_divisor
def __call__(self, img, scale, flip=False, keep_ratio=True):
if keep_ratio:
img = mmcv.imrescale(img, scale, interpolation='nearest')
else:
img = mmcv.imresize(img, scale, interpolation='nearest')
if flip:
img = mmcv.imflip(img)
if self.size_divisor is not None:
img = mmcv.impad_to_multiple(img, self.size_divisor)
return img
class Numpy2Tensor(object):
def __init__(self):
pass
def __call__(self, *args):
if len(args) == 1:
return torch.from_numpy(args[0])
else:
return tuple([torch.from_numpy(np.array(array)) for array in args])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment