Random Perspective transform added (#781)

* final changes * final * linter * test changes * linter * lint * indent * lint * minor changes * parameter added * ci * ci fixes * indent * indent * indent * arg fixed

Random Perspective transform added (#781)
* final changes * final * linter * test changes * linter * lint * indent * lint * minor changes * parameter added * ci * ci fixes * indent * indent * indent * arg fixed
25154f99 · Surgan Jandial · Francisco Massa · 4785dfac · 25154f99 · 25154f99
Commit 25154f99 authored Apr 25, 2019 by Surgan Jandial Committed by Francisco Massa Apr 25, 2019
Showing with 125 additions and 1 deletion

test/test_transforms.py test/test_transforms.py +16 -0

torchvision/transforms/functional.py torchvision/transforms/functional.py +43 -0

torchvision/transforms/transforms.py torchvision/transforms/transforms.py +66 -1

No files found.
--- a/test/test_transforms.py
+++ b/test/test_transforms.py
@@ -150,6 +150,22 @@ class Tester(unittest.TestCase):
            assert (min(aspect_ratio_range) - epsilon <= aspect_ratio_obtained <= max(aspect_ratio_range) + epsilon or
                    aspect_ratio_obtained == 1.0)

+    def test_randomperspective(self):
+        for i in range(10):
+            height = random.randint(24, 32) * 2
+            width = random.randint(24, 32) * 2
+            img = torch.ones(3, height, width)
+            to_pil_image = transforms.ToPILImage()
+            img = to_pil_image(img)
+            perp = transforms.RandomPerspective()
+            startpoints, endpoints = perp.get_params(width, height, 0.5)
+            tr_img = F.perspective(img, startpoints, endpoints)
+            tr_img2 = F.to_tensor(F.perspective(tr_img, endpoints, startpoints))
+            tr_img = F.to_tensor(tr_img)
+            assert img.size[0] == width and img.size[1] == height
+            assert torch.nn.functional.mse_loss(tr_img, F.to_tensor(img)) + 0.3 > \
+                torch.nn.functional.mse_loss(tr_img2, F.to_tensor(img))
+
    def test_resize(self):
        height = random.randint(24, 32) * 2
        width = random.randint(24, 32) * 2

--- a/torchvision/transforms/functional.py
+++ b/torchvision/transforms/functional.py
@@ -407,6 +407,49 @@ def hflip(img):
    return img.transpose(Image.FLIP_LEFT_RIGHT)


+def _get_perspective_coeffs(startpoints, endpoints):
+    """Helper function to get the coefficients (a, b, c, d, e, f, g, h) for the perspective transforms.
+
+    In Perspective Transform each pixel (x, y) in the orignal image gets transformed as,
+     (x, y) -> ( (ax + by + c) / (gx + hy + 1), (dx + ey + f) / (gx + hy + 1) )
+
+    Args:
+        List containing [top-left, top-right, bottom-right, bottom-left] of the orignal image,
+        List containing [top-left, top-right, bottom-right, bottom-left] of the transformed
+                   image
+    Returns:
+        octuple (a, b, c, d, e, f, g, h) for transforming each pixel.
+    """
+    matrix = []
+
+    for p1, p2 in zip(endpoints, startpoints):
+        matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0] * p1[0], -p2[0] * p1[1]])
+        matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1] * p1[0], -p2[1] * p1[1]])
+
+    A = torch.tensor(matrix, dtype=torch.float)
+    B = torch.tensor(startpoints, dtype=torch.float).view(8)
+    res = torch.gels(B, A)[0]
+    return res.squeeze_(1).tolist()
+
+
+def perspective(img, startpoints, endpoints, interpolation=Image.BICUBIC):
+    """Perform perspective transform of the given PIL Image.
+
+    Args:
+        img (PIL Image): Image to be transformed.
+        coeffs (tuple) : 8-tuple (a, b, c, d, e, f, g, h) which contains the coefficients.
+                            for a perspective transform.
+        interpolation: Default- Image.BICUBIC
+    Returns:
+        PIL Image:  Perspectively transformed Image.
+    """
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+    coeffs = _get_perspective_coeffs(startpoints, endpoints)
+    return img.transform(img.size, Image.PERSPECTIVE, coeffs, interpolation)
+
+
 def vflip(img):
    """Vertically flip the given PIL Image.


--- a/torchvision/transforms/transforms.py
+++ b/torchvision/transforms/transforms.py
@@ -27,7 +27,8 @@ else:
 __all__ = ["Compose", "ToTensor", "ToPILImage", "Normalize", "Resize", "Scale", "CenterCrop", "Pad",
           "Lambda", "RandomApply", "RandomChoice", "RandomOrder", "RandomCrop", "RandomHorizontalFlip",
           "RandomVerticalFlip", "RandomResizedCrop", "RandomSizedCrop", "FiveCrop", "TenCrop", "LinearTransformation",
-           "ColorJitter", "RandomRotation", "RandomAffine", "Grayscale", "RandomGrayscale"]
+           "ColorJitter", "RandomRotation", "RandomAffine", "Grayscale", "RandomGrayscale",
+           "RandomPerspective"]

 _pil_interpolation_to_str = {
    Image.NEAREST: 'PIL.Image.NEAREST',
@@ -528,6 +529,70 @@ class RandomVerticalFlip(object):
        return self.__class__.__name__ + '(p={})'.format(self.p)


+class RandomPerspective(object):
+    """Performs Perspective transformation of the given PIL Image randomly with a given probability.
+
+    Args:
+        interpolation : Default- Image.BICUBIC
+
+        p (float): probability of the image being perspectively transformed. Default value is 0.5
+
+        distortion_scale(float): it controls the degree of distortion and ranges from 0 to 1. Default value is 0.5.
+
+    """
+
+    def __init__(self, distortion_scale=0.5, p=0.5, interpolation=Image.BICUBIC):
+        self.p = p
+        self.interpolation = interpolation
+        self.distortion_scale = distortion_scale
+
+    def __call__(self, img):
+        """
+        Args:
+            img (PIL Image): Image to be Perspectively transformed.
+
+        Returns:
+            PIL Image: Random perspectivley transformed image.
+        """
+        if not F._is_pil_image(img):
+            raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+        if random.random() < self.p:
+            width, height = img.size
+            startpoints, endpoints = self.get_params(width, height, self.distortion_scale)
+            return F.perspective(img, startpoints, endpoints, self.interpolation)
+        return img
+
+    @staticmethod
+    def get_params(width, height, distortion_scale):
+        """Get parameters for ``perspective`` for a random perspective transform.
+
+        Args:
+            width : width of the image.
+            height : height of the image.
+
+        Returns:
+            List containing [top-left, top-right, bottom-right, bottom-left] of the orignal image,
+            List containing [top-left, top-right, bottom-right, bottom-left] of the transformed image.
+        """
+        half_height = int(height / 2)
+        half_width = int(width / 2)
+        topleft = (random.randint(0, int(distortion_scale * half_width)),
+                   random.randint(0, int(distortion_scale * half_height)))
+        topright = (random.randint(width - int(distortion_scale * half_width) - 1, width - 1),
+                    random.randint(0, int(distortion_scale * half_height)))
+        botright = (random.randint(width - int(distortion_scale * half_width) - 1, width - 1),
+                    random.randint(height - int(distortion_scale * half_height) - 1, height - 1))
+        botleft = (random.randint(0, int(distortion_scale * half_width)),
+                   random.randint(height - int(distortion_scale * half_height) - 1, height - 1))
+        startpoints = [(0, 0), (width - 1, 0), (width - 1, height - 1), (0, height - 1)]
+        endpoints = [topleft, topright, botright, botleft]
+        return startpoints, endpoints
+
+    def __repr__(self):
+        return self.__class__.__name__ + '(p={})'.format(self.p)
+
+
 class RandomResizedCrop(object):
    """Crop the given PIL Image to random size and aspect ratio.