Commit bd62df6e authored by Soumith Chintala's avatar Soumith Chintala
Browse files

adding unit tests for image transforms

parent 44da562d
...@@ -144,14 +144,6 @@ The data is preprocessed [as described here](https://github.com/facebook/fb.resn ...@@ -144,14 +144,6 @@ The data is preprocessed [as described here](https://github.com/facebook/fb.resn
Transforms are common image transforms. Transforms are common image transforms.
They can be chained together using `transforms.Compose` They can be chained together using `transforms.Compose`
- `ToTensor()` - converts PIL Image to Tensor
- `Normalize(mean, std)` - normalizes the image given mean, std (for example: mean = [0.3, 1.2, 2.1])
- `Scale(size, interpolation=Image.BILINEAR)` - Scales the smaller image edge to the given size. Interpolation modes are options from PIL
- `CenterCrop(size)` - center-crops the image to the given size
- `RandomCrop(size)` - Random crops the image to the given size.
- `RandomHorizontalFlip()` - hflip the image with probability 0.5
- `RandomSizedCrop(size, interpolation=Image.BILINEAR)` - Random crop with size 0.08-1 and aspect ratio 3/4 - 4/3 (Inception-style)
### `transforms.Compose` ### `transforms.Compose`
One can compose several transforms together. One can compose several transforms together.
...@@ -166,3 +158,45 @@ transform = transforms.Compose([ ...@@ -166,3 +158,45 @@ transform = transforms.Compose([
std = [ 0.229, 0.224, 0.225 ]), std = [ 0.229, 0.224, 0.225 ]),
]) ])
``` ```
## Transforms on PIL.Image
### `Scale(size, interpolation=Image.BILINEAR)`
Rescales the input PIL.Image to the given 'size'.
'size' will be the size of the smaller edge.
For example, if height > width, then image will be
rescaled to (size * height / width, size)
- size: size of the smaller edge
- interpolation: Default: PIL.Image.BILINEAR
### `CenterCrop(size)` - center-crops the image to the given size
Crops the given PIL.Image at the center to have a region of
the given size. size can be a tuple (target_height, target_width)
or an integer, in which case the target will be of a square shape (size, size)
### `RandomCrop(size)`
Crops the given PIL.Image at a random location to have a region of
the given size. size can be a tuple (target_height, target_width)
or an integer, in which case the target will be of a square shape (size, size)
### `RandomHorizontalFlip()`
Randomly horizontally flips the given PIL.Image with a probability of 0.5
### `RandomSizedCrop(size, interpolation=Image.BILINEAR)`
Random crop the given PIL.Image to a random size of (0.08 to 1.0) of the original size
and and a random aspect ratio of 3/4 to 4/3 of the original aspect ratio
This is popularly used to train the Inception networks
- size: size of the smaller edge
- interpolation: Default: PIL.Image.BILINEAR
## Transforms on torch.*Tensor
### `Normalize(mean, std)`
Given mean: (R, G, B) and std: (R, G, B), will normalize each channel of the torch.*Tensor, i.e. channel = (channel - mean) / std
## Conversion Transforms
- `ToTensor()` - Converts a PIL.Image (RGB) or numpy.ndarray (H x W x C) in the range [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
- `ToPILImage()` - Converts a torch.*Tensor of range [0, 1] and shape C x H x W or numpy ndarray of dtype=uint8, range[0, 255] and shape H x W x C to a PIL.Image of range [0, 255]
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import numpy as np
import unittest
import random
class Tester(unittest.TestCase):
def test_crop(self):
height = random.randint(10, 32) * 2
width = random.randint(10, 32) * 2
oheight = random.randint(5, (height - 2) / 2) * 2
owidth = random.randint(5, (width - 2) / 2) * 2
img = torch.ones(3, height, width)
oh1 = (height - oheight) / 2
ow1 = (width - owidth) / 2
imgnarrow = img[:, oh1 :oh1 + oheight, ow1 :ow1 + owidth]
imgnarrow.fill_(0)
result = transforms.Compose([
transforms.ToPILImage(),
transforms.CenterCrop((oheight, owidth)),
transforms.ToTensor(),
])(img)
assert result.sum() == 0, "height: " + str(height) + " width: " \
+ str( width) + " oheight: " + str(oheight) + " owidth: " + str(owidth)
oheight += 1
owidth += 1
result = transforms.Compose([
transforms.ToPILImage(),
transforms.CenterCrop((oheight, owidth)),
transforms.ToTensor(),
])(img)
sum1 = result.sum()
assert sum1 > 1, "height: " + str(height) + " width: " \
+ str( width) + " oheight: " + str(oheight) + " owidth: " + str(owidth)
oheight += 1
owidth += 1
result = transforms.Compose([
transforms.ToPILImage(),
transforms.CenterCrop((oheight, owidth)),
transforms.ToTensor(),
])(img)
sum2 = result.sum()
assert sum2 > 0, "height: " + str(height) + " width: " \
+ str( width) + " oheight: " + str(oheight) + " owidth: " + str(owidth)
assert sum2 > sum1, "height: " + str(height) + " width: " \
+ str( width) + " oheight: " + str(oheight) + " owidth: " + str(owidth)
def test_scale(self):
height = random.randint(24, 32) * 2
width = random.randint(24, 32) * 2
osize = random.randint(5, 12) * 2
img = torch.ones(3, height, width)
result = transforms.Compose([
transforms.ToPILImage(),
transforms.Scale(osize),
transforms.ToTensor(),
])(img)
# print img.size()
# print 'output size:', osize
# print result.size()
assert osize in result.size()
if height < width:
assert result.size(1) <= result.size(2)
elif width < height:
assert result.size(1) >= result.size(2)
def test_random_crop(self):
height = random.randint(10, 32) * 2
width = random.randint(10, 32) * 2
oheight = random.randint(5, (height - 2) / 2) * 2
owidth = random.randint(5, (width - 2) / 2) * 2
img = torch.ones(3, height, width)
result = transforms.Compose([
transforms.ToPILImage(),
transforms.RandomCrop((oheight, owidth)),
transforms.ToTensor(),
])(img)
assert result.size(1) == oheight
assert result.size(2) == owidth
if __name__ == '__main__':
unittest.main()
from __future__ import division
import torch import torch
import math import math
import random import random
from PIL import Image from PIL import Image
import numpy as np import numpy as np
import numbers
class Compose(object): class Compose(object):
""" Composes several transforms together.
For example:
>>> transforms.Compose([
>>> transforms.CenterCrop(10),
>>> transforms.ToTensor(),
>>> ])
"""
def __init__(self, transforms): def __init__(self, transforms):
self.transforms = transforms self.transforms = transforms
...@@ -16,6 +24,8 @@ class Compose(object): ...@@ -16,6 +24,8 @@ class Compose(object):
class ToTensor(object): class ToTensor(object):
""" Converts a PIL.Image (RGB) or numpy.ndarray (H x W x C) in the range [0, 255]
to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0] """
def __call__(self, pic): def __call__(self, pic):
if isinstance(pic, np.ndarray): if isinstance(pic, np.ndarray):
# handle numpy array # handle numpy array
...@@ -24,24 +34,50 @@ class ToTensor(object): ...@@ -24,24 +34,50 @@ class ToTensor(object):
# handle PIL Image # handle PIL Image
img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes())) img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
img = img.view(pic.size[0], pic.size[1], 3) img = img.view(pic.size[0], pic.size[1], 3)
# put it in CHW format # put it from WHC to CHW format
# yikes, this transpose takes 80% of the loading time/CPU # yikes, this transpose takes 80% of the loading time/CPU
img = img.transpose(0, 2).transpose(1, 2).contiguous() img = img.transpose(0, 2).contiguous()
return img.float() return img.float().div(255)
class ToPILImage(object):
""" Converts a torch.*Tensor of range [0, 1] and shape C x H x W
or numpy ndarray of dtype=uint8, range[0, 255] and shape H x W x C
to a PIL.Image of range [0, 255]
"""
def __call__(self, pic):
if isinstance(pic, np.ndarray):
# handle numpy array
img = Image.fromarray(pic)
else:
npimg = pic.mul(255).byte().numpy()
npimg = np.transpose(npimg, (1,2,0))
img = Image.fromarray(npimg)
return img
class Normalize(object): class Normalize(object):
""" Given mean: (R, G, B) and std: (R, G, B),
will normalize each channel of the torch.*Tensor, i.e.
channel = (channel - mean) / std
"""
def __init__(self, mean, std): def __init__(self, mean, std):
self.mean = mean self.mean = mean
self.std = std self.std = std
def __call__(self, tensor): def __call__(self, tensor):
# TODO: make efficient
for t, m, s in zip(tensor, self.mean, self.std): for t, m, s in zip(tensor, self.mean, self.std):
t.sub_(m).div_(s) t.sub_(m).div_(s)
return tensor return tensor
class Scale(object): class Scale(object):
"Scales the smaller edge to size" """ Rescales the input PIL.Image to the given 'size'.
'size' will be the size of the smaller edge.
For example, if height > width, then image will be
rescaled to (size * height / width, size)
size: size of the smaller edge
interpolation: Default: PIL.Image.BILINEAR
"""
def __init__(self, size, interpolation=Image.BILINEAR): def __init__(self, size, interpolation=Image.BILINEAR):
self.size = size self.size = size
self.interpolation = interpolation self.interpolation = interpolation
...@@ -51,27 +87,44 @@ class Scale(object): ...@@ -51,27 +87,44 @@ class Scale(object):
if (w <= h and w == self.size) or (h <= w and h == self.size): if (w <= h and w == self.size) or (h <= w and h == self.size):
return img return img
if w < h: if w < h:
return img.resize((w, int(round(h / w * self.size))), self.interpolation) ow = self.size
oh = int(self.size * h / w)
return img.resize((ow, oh), self.interpolation)
else: else:
return img.resize((int(round(w / h * self.size)), h), self.interpolation) oh = self.size
ow = int(self.size * w / h)
return img.resize((ow, oh), self.interpolation)
class CenterCrop(object): class CenterCrop(object):
"Crop to centered rectangle" """Crops the given PIL.Image at the center to have a region of
the given size. size can be a tuple (target_height, target_width)
or an integer, in which case the target will be of a square shape (size, size)
"""
def __init__(self, size): def __init__(self, size):
self.size = size if isinstance(size, numbers.Number):
self.size = (int(size), int(size))
else:
self.size = size
def __call__(self, img): def __call__(self, img):
w, h = img.size w, h = img.size
x1 = int(round((w - self.size) / 2)) th, tw = self.size
y1 = int(round((h - self.size) / 2)) x1 = int(round((w - tw) / 2))
return img.crop((x1, y1, x1 + self.size, y1 + self.size)) y1 = int(round((h - th) / 2))
return img.crop((x1, y1, x1 + tw, y1 + th))
class RandomCrop(object): class RandomCrop(object):
"Random crop form larger image with optional zero padding" """Crops the given PIL.Image at a random location to have a region of
the given size. size can be a tuple (target_height, target_width)
or an integer, in which case the target will be of a square shape (size, size)
"""
def __init__(self, size, padding=0): def __init__(self, size, padding=0):
self.size = size if isinstance(size, numbers.Number):
self.size = (int(size), int(size))
else:
self.size = size
self.padding = padding self.padding = padding
def __call__(self, img): def __call__(self, img):
...@@ -79,16 +132,18 @@ class RandomCrop(object): ...@@ -79,16 +132,18 @@ class RandomCrop(object):
raise NotImplementedError() raise NotImplementedError()
w, h = img.size w, h = img.size
if w == self.size and h == self.size: th, tw = self.size
if w == tw and h == th:
return img return img
x1 = random.randint(0, w - self.size) x1 = random.randint(0, w - tw)
y1 = random.randint(0, h - self.size) y1 = random.randint(0, h - th)
return img.crop((x1, y1, x1 + self.size, y1 + self.size)) return img.crop((x1, y1, x1 + tw, y1 + th))
class RandomHorizontalFlip(object): class RandomHorizontalFlip(object):
"Horizontal flip with 0.5 probability" """Randomly horizontally flips the given PIL.Image with a probability of 0.5
"""
def __call__(self, img): def __call__(self, img):
if random.random() < 0.5: if random.random() < 0.5:
return img.transpose(Image.FLIP_LEFT_RIGHT) return img.transpose(Image.FLIP_LEFT_RIGHT)
...@@ -96,7 +151,12 @@ class RandomHorizontalFlip(object): ...@@ -96,7 +151,12 @@ class RandomHorizontalFlip(object):
class RandomSizedCrop(object): class RandomSizedCrop(object):
"Random crop with size 0.08-1 and aspect ratio 3/4 - 4/3 (Inception-style)" """Random crop the given PIL.Image to a random size of (0.08 to 1.0) of the original size
and and a random aspect ratio of 3/4 to 4/3 of the original aspect ratio
This is popularly used to train the Inception networks
size: size of the smaller edge
interpolation: Default: PIL.Image.BILINEAR
"""
def __init__(self, size, interpolation=Image.BILINEAR): def __init__(self, size, interpolation=Image.BILINEAR):
self.size = size self.size = size
self.interpolation = interpolation self.interpolation = interpolation
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment