functional_tensor.py 7.98 KB
Newer Older
1
from __future__ import division
2
3
import torch
import torchvision.transforms.functional as F
4
5
from torch import Tensor
from torch.jit.annotations import Optional, List, BroadcastingList2, Tuple
6
7


8
9
10
11
12
13
def _is_tensor_a_torch_image(input):
    return len(input.shape) == 3


def vflip(img):
    # type: (Tensor) -> Tensor
14
15
16
    """Vertically flip the given the Image Tensor.

    Args:
17
        img (Tensor): Image Tensor to be flipped in the form [C, H, W].
18
19
20
21

    Returns:
        Tensor:  Vertically flipped image Tensor.
    """
22
    if not _is_tensor_a_torch_image(img):
23
24
        raise TypeError('tensor is not a torch image.')

25
    return img.flip(-2)
26
27


28
29
def hflip(img):
    # type: (Tensor) -> Tensor
30
31
32
    """Horizontally flip the given the Image Tensor.

    Args:
33
        img (Tensor): Image Tensor to be flipped in the form [C, H, W].
34
35
36
37

    Returns:
        Tensor:  Horizontally flipped image Tensor.
    """
38
    if not _is_tensor_a_torch_image(img):
39
40
        raise TypeError('tensor is not a torch image.')

41
    return img.flip(-1)
ekka's avatar
ekka committed
42
43
44


def crop(img, top, left, height, width):
45
    # type: (Tensor, int, int, int, int) -> Tensor
ekka's avatar
ekka committed
46
    """Crop the given Image Tensor.
47

ekka's avatar
ekka committed
48
49
50
51
52
53
    Args:
        img (Tensor): Image to be cropped in the form [C, H, W]. (0,0) denotes the top left corner of the image.
        top (int): Vertical component of the top left corner of the crop box.
        left (int): Horizontal component of the top left corner of the crop box.
        height (int): Height of the crop box.
        width (int): Width of the crop box.
54

ekka's avatar
ekka committed
55
56
57
    Returns:
        Tensor: Cropped image.
    """
58
    if not _is_tensor_a_torch_image(img):
ekka's avatar
ekka committed
59
60
61
        raise TypeError('tensor is not a torch image.')

    return img[..., top:top + height, left:left + width]
62
63


64
def rgb_to_grayscale(img):
65
    # type: (Tensor) -> Tensor
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
    """Convert the given RGB Image Tensor to Grayscale.
    For RGB to Grayscale conversion, ITU-R 601-2 luma transform is performed which
    is L = R * 0.2989 + G * 0.5870 + B * 0.1140

    Args:
        img (Tensor): Image to be converted to Grayscale in the form [C, H, W].

    Returns:
        Tensor: Grayscale image.

    """
    if img.shape[0] != 3:
        raise TypeError('Input Image does not contain 3 Channels')

    return (0.2989 * img[0] + 0.5870 * img[1] + 0.1140 * img[2]).to(img.dtype)


83
def adjust_brightness(img, brightness_factor):
84
    # type: (Tensor, float) -> Tensor
85
86
87
88
89
90
91
92
93
94
95
    """Adjust brightness of an RGB image.

    Args:
        img (Tensor): Image to be adjusted.
        brightness_factor (float):  How much to adjust the brightness. Can be
            any non negative number. 0 gives a black image, 1 gives the
            original image while 2 increases the brightness by a factor of 2.

    Returns:
        Tensor: Brightness adjusted image.
    """
96
    if not _is_tensor_a_torch_image(img):
97
98
        raise TypeError('tensor is not a torch image.')

99
    return _blend(img, torch.zeros_like(img), brightness_factor)
100
101
102


def adjust_contrast(img, contrast_factor):
103
    # type: (Tensor, float) -> Tensor
104
105
106
107
108
109
110
111
112
113
114
    """Adjust contrast of an RGB image.

    Args:
        img (Tensor): Image to be adjusted.
        contrast_factor (float): How much to adjust the contrast. Can be any
            non negative number. 0 gives a solid gray image, 1 gives the
            original image while 2 increases the contrast by a factor of 2.

    Returns:
        Tensor: Contrast adjusted image.
    """
115
    if not _is_tensor_a_torch_image(img):
116
117
        raise TypeError('tensor is not a torch image.')

118
    mean = torch.mean(rgb_to_grayscale(img).to(torch.float))
119
120
121
122
123

    return _blend(img, mean, contrast_factor)


def adjust_saturation(img, saturation_factor):
124
    # type: (Tensor, float) -> Tensor
125
126
127
128
129
130
131
132
133
134
135
    """Adjust color saturation of an RGB image.

    Args:
        img (Tensor): Image to be adjusted.
        saturation_factor (float):  How much to adjust the saturation. 0 will
            give a black and white image, 1 will give the original image while
            2 will enhance the saturation by a factor of 2.

    Returns:
        Tensor: Saturation adjusted image.
    """
136
    if not _is_tensor_a_torch_image(img):
137
138
        raise TypeError('tensor is not a torch image.')

139
    return _blend(img, rgb_to_grayscale(img), saturation_factor)
140
141


142
def center_crop(img, output_size):
143
    # type: (Tensor, BroadcastingList2[int]) -> Tensor
144
145
146
147
148
149
150
151
152
153
    """Crop the Image Tensor and resize it to desired size.

    Args:
        img (Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
        output_size (sequence or int): (height, width) of the crop box. If int,
                it is used for both directions

    Returns:
            Tensor: Cropped image.
    """
154
    if not _is_tensor_a_torch_image(img):
155
156
157
158
159
160
161
162
163
164
165
        raise TypeError('tensor is not a torch image.')

    _, image_width, image_height = img.size()
    crop_height, crop_width = output_size
    crop_top = int(round((image_height - crop_height) / 2.))
    crop_left = int(round((image_width - crop_width) / 2.))

    return crop(img, crop_top, crop_left, crop_height, crop_width)


def five_crop(img, size):
166
    # type: (Tensor, BroadcastingList2[int]) -> List[Tensor]
167
168
    """Crop the given Image Tensor into four corners and the central crop.
    .. Note::
169
        This transform returns a List of Tensors and there may be a
170
171
172
173
174
175
176
177
        mismatch in the number of inputs and targets your ``Dataset`` returns.

    Args:
       size (sequence or int): Desired output size of the crop. If size is an
           int instead of sequence like (h, w), a square crop (size, size) is
           made.

    Returns:
178
       List: List (tl, tr, bl, br, center)
179
180
                Corresponding top left, top right, bottom left, bottom right and center crop.
    """
181
    if not _is_tensor_a_torch_image(img):
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
        raise TypeError('tensor is not a torch image.')

    assert len(size) == 2, "Please provide only two dimensions (h, w) for size."

    _, image_width, image_height = img.size()
    crop_height, crop_width = size
    if crop_width > image_width or crop_height > image_height:
        msg = "Requested crop size {} is bigger than input size {}"
        raise ValueError(msg.format(size, (image_height, image_width)))

    tl = crop(img, 0, 0, crop_width, crop_height)
    tr = crop(img, image_width - crop_width, 0, image_width, crop_height)
    bl = crop(img, 0, image_height - crop_height, crop_width, image_height)
    br = crop(img, image_width - crop_width, image_height - crop_height, image_width, image_height)
    center = center_crop(img, (crop_height, crop_width))

198
    return [tl, tr, bl, br, center]
199
200
201


def ten_crop(img, size, vertical_flip=False):
202
    # type: (Tensor, BroadcastingList2[int], bool) -> List[Tensor]
203
204
205
    """Crop the given Image Tensor into four corners and the central crop plus the
        flipped version of these (horizontal flipping is used by default).
    .. Note::
206
        This transform returns a List of images and there may be a
207
208
209
210
211
212
213
214
215
        mismatch in the number of inputs and targets your ``Dataset`` returns.

    Args:
       size (sequence or int): Desired output size of the crop. If size is an
            int instead of sequence like (h, w), a square crop (size, size) is
            made.
       vertical_flip (bool): Use vertical flipping instead of horizontal

    Returns:
216
       List: List (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip)
217
218
219
                Corresponding top left, top right, bottom left, bottom right and center crop
                and same for the flipped image's tensor.
    """
220
    if not _is_tensor_a_torch_image(img):
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
        raise TypeError('tensor is not a torch image.')

    assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
    first_five = five_crop(img, size)

    if vertical_flip:
        img = vflip(img)
    else:
        img = hflip(img)

    second_five = five_crop(img, size)

    return first_five + second_five


236
def _blend(img1, img2, ratio):
237
238
    # type: (Tensor, Tensor, float) -> Tensor
    bound = 1 if img1.dtype in [torch.half, torch.float32, torch.float64] else 255
239
    return (ratio * img1 + (1 - ratio) * img2).clamp(0, bound).to(img1.dtype)