""" Copyright (c) Meta Platforms, Inc. and affiliates. This source code is licensed under the MIT license found in the LICENSE file in the root directory of this source tree. """ # Implements image augmentation import albumentations as alb from albumentations.pytorch import ToTensorV2 import cv2 import numpy as np from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD def alb_wrapper(transform): def f(im): return transform(image=np.asarray(im))["image"] return f class Erosion(alb.ImageOnlyTransform): """ Apply erosion operation to an image. Erosion is a morphological operation that shrinks the white regions in a binary image. Args: scale (int or tuple/list of int): The scale or range for the size of the erosion kernel. If an integer is provided, a square kernel of that size will be used. If a tuple or list is provided, it should contain two integers representing the minimum and maximum sizes for the erosion kernel. always_apply (bool, optional): Whether to always apply this transformation. Default is False. p (float, optional): The probability of applying this transformation. Default is 0.5. Returns: numpy.ndarray: The transformed image. """ def __init__(self, scale, always_apply=False, p=0.5): super().__init__(always_apply=always_apply, p=p) if type(scale) is tuple or type(scale) is list: assert len(scale) == 2 self.scale = scale else: self.scale = (scale, scale) def apply(self, img, **params): kernel = cv2.getStructuringElement( cv2.MORPH_ELLIPSE, tuple(np.random.randint(self.scale[0], self.scale[1], 2)) ) img = cv2.erode(img, kernel, iterations=1) return img class Dilation(alb.ImageOnlyTransform): """ Apply dilation operation to an image. Dilation is a morphological operation that expands the white regions in a binary image. Args: scale (int or tuple/list of int): The scale or range for the size of the dilation kernel. If an integer is provided, a square kernel of that size will be used. If a tuple or list is provided, it should contain two integers representing the minimum and maximum sizes for the dilation kernel. always_apply (bool, optional): Whether to always apply this transformation. Default is False. p (float, optional): The probability of applying this transformation. Default is 0.5. Returns: numpy.ndarray: The transformed image. """ def __init__(self, scale, always_apply=False, p=0.5): super().__init__(always_apply=always_apply, p=p) if type(scale) is tuple or type(scale) is list: assert len(scale) == 2 self.scale = scale else: self.scale = (scale, scale) def apply(self, img, **params): kernel = cv2.getStructuringElement( cv2.MORPH_ELLIPSE, tuple(np.random.randint(self.scale[0], self.scale[1], 2)) ) img = cv2.dilate(img, kernel, iterations=1) return img class Bitmap(alb.ImageOnlyTransform): """ Apply a bitmap-style transformation to an image. This transformation replaces all pixel values below a certain threshold with a specified value. Args: value (int, optional): The value to replace pixels below the threshold with. Default is 0. lower (int, optional): The threshold value below which pixels will be replaced. Default is 200. always_apply (bool, optional): Whether to always apply this transformation. Default is False. p (float, optional): The probability of applying this transformation. Default is 0.5. Returns: numpy.ndarray: The transformed image. """ def __init__(self, value=0, lower=200, always_apply=False, p=0.5): super().__init__(always_apply=always_apply, p=p) self.lower = lower self.value = value def apply(self, img, **params): img = img.copy() img[img < self.lower] = self.value return img train_transform = alb_wrapper( alb.Compose( [ Bitmap(p=0), alb.OneOf([Erosion((2, 3)), Dilation((2, 3))], p=0.02), alb.Affine(shear={"x": (0, 3), "y": (-3, 0)}, cval=(255, 255, 255), p=0.03), alb.ShiftScaleRotate( shift_limit_x=(0, 0.04), shift_limit_y=(0, 0.03), scale_limit=(-0.15, 0.03), rotate_limit=2, border_mode=0, interpolation=2, value=(255, 255, 255), p=0.03, ), alb.GridDistortion( distort_limit=0.05, border_mode=0, interpolation=2, value=(255, 255, 255), p=0.04, ), alb.Compose( [ alb.Affine( translate_px=(0, 5), always_apply=True, cval=(255, 255, 255) ), alb.ElasticTransform( p=1, alpha=50, sigma=120 * 0.1, alpha_affine=120 * 0.01, border_mode=0, value=(255, 255, 255), ), ], p=0.04, ), alb.RandomBrightnessContrast(0.1, 0.1, True, p=0.03), alb.ImageCompression(95, p=0.07), alb.GaussNoise(20, p=0.08), alb.GaussianBlur((3, 3), p=0.03), alb.Resize(1024, 1024), alb.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD), ToTensorV2(), ] ) ) test_transform = alb_wrapper( alb.Compose( [ alb.Resize(1024, 1024), alb.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD), ToTensorV2(), ] ) ) # if __name__ == '__main__': # from PIL import Image # image = Image.open('/data/hypertext/ucaswei/codes_new/show/49.jpg').convert('RGB') # # image = np.array(image) # for i in range(100): # image1 = train_transform(image) # image1 = Image.fromarray(np.uint8(image1)) # image1.save('/data/hypertext/ucaswei/codes_new/aug/' + str(i) + '.jpg') # mm_projector_1 = nn.Linear(1024, 256) # x = torch.zeros(2, 3, 1024, 1024) # with torch.no_grad(): # y = model(x) # print(y.shape) # y = mm_projector_1(y.permute(0,2,1)) # print(y.shape) # print(y.permute(0,2,1).shape)