# Copyright (c) OpenMMLab. All rights reserved. import mmcv import numpy as np from mmcls.datasets import PIPELINES as CLS_PIPELINE from ..builder import PIPELINES @PIPELINES.register_module() class Flip: """Flip the input data with a probability. Reverse the order of elements in the given data with a specific direction. The shape of the data is preserved, but the elements are reordered. Required keys are the keys in attributes "keys", added or modified keys are "flip", "flip_direction" and the keys in attributes "keys". It also supports flipping a list of images with the same flip. Args: keys (list[str]): The images to be flipped. flip_ratio (float): The propability to flip the images. direction (str): Flip images horizontally or vertically. Options are "horizontal" | "vertical". Default: "horizontal". """ _directions = ['horizontal', 'vertical'] def __init__(self, keys, flip_ratio=0.5, direction='horizontal'): if direction not in self._directions: raise ValueError(f'Direction {direction} is not supported.' f'Currently support ones are {self._directions}') self.keys = keys self.flip_ratio = flip_ratio self.direction = direction def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ flip = np.random.random() < self.flip_ratio if flip: for key in self.keys: if isinstance(results[key], list): for v in results[key]: mmcv.imflip_(v, self.direction) else: mmcv.imflip_(results[key], self.direction) results['flip'] = flip results['flip_direction'] = self.direction return results def __repr__(self): repr_str = self.__class__.__name__ repr_str += (f'(keys={self.keys}, flip_ratio={self.flip_ratio}, ' f'direction={self.direction})') return repr_str @PIPELINES.register_module() class Resize: """Resize data to a specific size for training or resize the images to fit the network input regulation for testing. When used for resizing images to fit network input regulation, the case is that a network may have several downsample and then upsample operation, then the input height and width should be divisible by the downsample factor of the network. For example, the network would downsample the input for 5 times with stride 2, then the downsample factor is 2^5 = 32 and the height and width should be divisible by 32. Required keys are the keys in attribute "keys", added or modified keys are "keep_ratio", "scale_factor", "interpolation" and the keys in attribute "keys". All keys in "keys" should have the same shape. "test_trans" is used to record the test transformation to align the input's shape. Args: keys (list[str]): The images to be resized. scale (float | Tuple[int]): If scale is Tuple(int), target spatial size (h, w). Otherwise, target spatial size is scaled by input size. If any of scale is -1, we will rescale short edge. Note that when it is used, `size_factor` and `max_size` are useless. Default: None keep_ratio (bool): If set to True, images will be resized without changing the aspect ratio. Otherwise, it will resize images to a given size. Default: False. Note that it is used togher with `scale`. size_factor (int): Let the output shape be a multiple of size_factor. Default:None. Note that when it is used, `scale` should be set to None and `keep_ratio` should be set to False. max_size (int): The maximum size of the longest side of the output. Default:None. Note that it is used togher with `size_factor`. interpolation (str): Algorithm used for interpolation: "nearest" | "bilinear" | "bicubic" | "area" | "lanczos". Default: "bilinear". backend (str | None): The image resize backend type. Options are `cv2`, `pillow`, `None`. If backend is None, the global imread_backend specified by ``mmcv.use_backend()`` will be used. Default: None. """ def __init__(self, keys, scale=None, keep_ratio=False, size_factor=None, max_size=None, interpolation='bilinear', backend=None): assert keys, 'Keys should not be empty.' if size_factor: assert scale is None, ('When size_factor is used, scale should ', f'be None. But received {scale}.') assert keep_ratio is False, ('When size_factor is used, ' 'keep_ratio should be False.') if max_size: assert size_factor is not None, ( 'When max_size is used, ' f'size_factor should also be set. But received {size_factor}.') if isinstance(scale, float): if scale <= 0: raise ValueError(f'Invalid scale {scale}, must be positive.') elif mmcv.is_tuple_of(scale, int): max_long_edge = max(scale) max_short_edge = min(scale) if max_short_edge == -1: # assign np.inf to long edge for rescaling short edge later. scale = (np.inf, max_long_edge) elif scale is not None: raise TypeError( f'Scale must be None, float or tuple of int, but got ' f'{type(scale)}.') self.keys = keys self.scale = scale self.size_factor = size_factor self.max_size = max_size self.keep_ratio = keep_ratio self.interpolation = interpolation self.backend = backend def _resize(self, img, scale): """Resize given image with corresponding scale. Args: img (np.array): Image to be resized. scale (float | Tuple[int]): Scale used in resize process. Returns: tuple: Tuple contains resized image and scale factor in resize process. """ if self.keep_ratio: img, scale_factor = mmcv.imrescale( img, scale, return_scale=True, interpolation=self.interpolation, backend=self.backend) else: img, w_scale, h_scale = mmcv.imresize( img, scale, return_scale=True, interpolation=self.interpolation, backend=self.backend) scale_factor = np.array((w_scale, h_scale), dtype=np.float32) return img, scale_factor def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ if self.size_factor: h, w = results[self.keys[0]].shape[:2] new_h = h - (h % self.size_factor) new_w = w - (w % self.size_factor) if self.max_size: new_h = min(self.max_size - (self.max_size % self.size_factor), new_h) new_w = min(self.max_size - (self.max_size % self.size_factor), new_w) scale = (new_w, new_h) elif isinstance(self.scale, tuple) and (np.inf in self.scale): # find inf in self.scale, calculate ``scale`` manually h, w = results[self.keys[0]].shape[:2] if h < w: scale = (int(self.scale[-1] / h * w), self.scale[-1]) else: scale = (self.scale[-1], int(self.scale[-1] / w * h)) else: # direct use the given ones scale = self.scale # here we assume all images in self.keys have the same input size for key in self.keys: results[key], scale_factor = self._resize(results[key], scale) if len(results[key].shape) == 2: results[key] = np.expand_dims(results[key], axis=2) results['scale_factor'] = scale_factor results['keep_ratio'] = self.keep_ratio results['interpolation'] = self.interpolation return results def __repr__(self): repr_str = self.__class__.__name__ repr_str += ( f'(keys={self.keys}, scale={self.scale}, ' f'keep_ratio={self.keep_ratio}, size_factor={self.size_factor}, ' f'max_size={self.max_size},interpolation={self.interpolation})') return repr_str @PIPELINES.register_module() class NumpyPad: """Numpy Padding. In this augmentation, numpy padding is adopted to customize padding augmentation. Please carefully read the numpy manual in: https://numpy.org/doc/stable/reference/generated/numpy.pad.html If you just hope a single dimension to be padded, you must set ``padding`` like this: :: padding = ((2, 2), (0, 0), (0, 0)) In this case, if you adopt an input with three dimension, only the first diemansion will be padded. Args: keys (list[str]): The images to be resized. padding (int | tuple(int)): Please refer to the args ``pad_width`` in ``numpy.pad``. """ def __init__(self, keys, padding, **kwargs): self.keys = keys self.padding = padding self.kwargs = kwargs def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ for k in self.keys: results[k] = np.pad(results[k], self.padding, **self.kwargs) return results def __repr__(self) -> str: repr_str = self.__class__.__name__ repr_str += ( f'(keys={self.keys}, padding={self.padding}, kwargs={self.kwargs})' ) return repr_str @CLS_PIPELINE.register_module() @PIPELINES.register_module() class RandomImgNoise: """Add random noise with specific distribution and range to the input image. Args: keys (list[str]): The images to be added random noise. lower_bound (float, optional): The lower bound of the noise. Default to ``0.``. upper_bound (float, optional): The upper bound of the noise. Default to ``1 / 128.``. distribution (str, optional): The probability distribution of the noise. Default to 'uniform'. """ def __init__(self, keys, lower_bound=0, upper_bound=1 / 128., distribution='uniform'): assert keys, 'Keys should not be empty.' self.keys = keys self.lower_bound = lower_bound self.upper_bound = upper_bound if distribution not in ['uniform', 'normal']: raise KeyError('Only support \'uniform\' distribution and ' '\'normal\' distribution, receive ' f'{distribution}.') self.distribution = distribution def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ if self.distribution == 'uniform': dist_fn = np.random.rand else: # self.distribution == 'normal dist_fn = np.random.randn for key in self.keys: img_size = results[key].shape noise = dist_fn(*img_size) scale = noise.max() - noise.min() noise = noise - noise.min() noise = noise / scale * (self.upper_bound - self.lower_bound) noise = noise + self.lower_bound results[key] += noise return results def __repr__(self): repr_str = self.__class__.__name__ repr_str += (f'(keys={self.keys}, lower_bound={self.lower_bound}, ' f'upper_bound={self.upper_bound})') return repr_str @CLS_PIPELINE.register_module() @PIPELINES.register_module() class RandomCropLongEdge: """Random crop the given image by the long edge. Args: keys (list[str]): The images to be cropped. """ def __init__(self, keys): assert keys, 'Keys should not be empty.' self.keys = keys def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ for key in self.keys: img = results[key] img_height, img_width = img.shape[:2] crop_size = min(img_height, img_width) y1 = 0 if img_height == crop_size else \ np.random.randint(0, img_height - crop_size) x1 = 0 if img_width == crop_size else \ np.random.randint(0, img_width - crop_size) y2, x2 = y1 + crop_size - 1, x1 + crop_size - 1 img = mmcv.imcrop(img, bboxes=np.array([x1, y1, x2, y2])) results[key] = img return results def __repr__(self): repr_str = self.__class__.__name__ repr_str += (f'(keys={self.keys})') return repr_str @CLS_PIPELINE.register_module() @PIPELINES.register_module() class CenterCropLongEdge: """Center crop the given image by the long edge. Args: keys (list[str]): The images to be cropped. """ def __init__(self, keys): assert keys, 'Keys should not be empty.' self.keys = keys def __call__(self, results): """Call function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ for key in self.keys: img = results[key] img_height, img_width = img.shape[:2] crop_size = min(img_height, img_width) y1 = 0 if img_height == crop_size else \ int(round(img_height - crop_size) / 2) x1 = 0 if img_width == crop_size else \ int(round(img_width - crop_size) / 2) y2 = y1 + crop_size - 1 x2 = x1 + crop_size - 1 img = mmcv.imcrop(img, bboxes=np.array([x1, y1, x2, y2])) results[key] = img return results def __repr__(self): repr_str = self.__class__.__name__ repr_str += (f'(keys={self.keys})') return repr_str