test_transforms.py 72.8 KB
Newer Older
1
import os
2
3
import torch
import torchvision.transforms as transforms
4
import torchvision.transforms.functional as F
5
import torchvision.transforms.functional_tensor as F_t
6
from torch._utils_internal import get_file_path_2
7
from numpy.testing import assert_array_almost_equal
8
import unittest
9
import math
10
import random
11
import numpy as np
12
13
14
15
16
17
from PIL import Image
try:
    import accimage
except ImportError:
    accimage = None

18
19
20
21
22
try:
    from scipy import stats
except ImportError:
    stats = None

23
from common_utils import cycle_over, int_dtypes, float_dtypes
24
25


26
27
GRACE_HOPPER = get_file_path_2(
    os.path.dirname(os.path.abspath(__file__)), 'assets', 'grace_hopper_517x606.jpg')
28
29


30
class Tester(unittest.TestCase):
31

32
33
34
35
    def test_crop(self):
        height = random.randint(10, 32) * 2
        width = random.randint(10, 32) * 2
        oheight = random.randint(5, (height - 2) / 2) * 2
36
37
        owidth = random.randint(5, (width - 2) / 2) * 2

38
        img = torch.ones(3, height, width)
39
40
41
        oh1 = (height - oheight) // 2
        ow1 = (width - owidth) // 2
        imgnarrow = img[:, oh1:oh1 + oheight, ow1:ow1 + owidth]
42
43
44
45
46
47
        imgnarrow.fill_(0)
        result = transforms.Compose([
            transforms.ToPILImage(),
            transforms.CenterCrop((oheight, owidth)),
            transforms.ToTensor(),
        ])(img)
48
49
        self.assertEqual(result.sum(), 0,
                         "height: {} width: {} oheight: {} owdith: {}".format(height, width, oheight, owidth))
50
51
52
53
54
55
56
57
        oheight += 1
        owidth += 1
        result = transforms.Compose([
            transforms.ToPILImage(),
            transforms.CenterCrop((oheight, owidth)),
            transforms.ToTensor(),
        ])(img)
        sum1 = result.sum()
58
59
        self.assertGreater(sum1, 1,
                           "height: {} width: {} oheight: {} owdith: {}".format(height, width, oheight, owidth))
60
        oheight += 1
61
        owidth += 1
62
63
64
65
66
67
        result = transforms.Compose([
            transforms.ToPILImage(),
            transforms.CenterCrop((oheight, owidth)),
            transforms.ToTensor(),
        ])(img)
        sum2 = result.sum()
68
69
70
71
        self.assertGreater(sum2, 0,
                           "height: {} width: {} oheight: {} owdith: {}".format(height, width, oheight, owidth))
        self.assertGreater(sum2, sum1,
                           "height: {} width: {} oheight: {} owdith: {}".format(height, width, oheight, owidth))
72

73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
    def test_five_crop(self):
        to_pil_image = transforms.ToPILImage()
        h = random.randint(5, 25)
        w = random.randint(5, 25)
        for single_dim in [True, False]:
            crop_h = random.randint(1, h)
            crop_w = random.randint(1, w)
            if single_dim:
                crop_h = min(crop_h, crop_w)
                crop_w = crop_h
                transform = transforms.FiveCrop(crop_h)
            else:
                transform = transforms.FiveCrop((crop_h, crop_w))

            img = torch.FloatTensor(3, h, w).uniform_()
            results = transform(to_pil_image(img))

90
            self.assertEqual(len(results), 5)
91
            for crop in results:
92
                self.assertEqual(crop.size, (crop_w, crop_h))
93
94
95
96
97
98
99
100

            to_pil_image = transforms.ToPILImage()
            tl = to_pil_image(img[:, 0:crop_h, 0:crop_w])
            tr = to_pil_image(img[:, 0:crop_h, w - crop_w:])
            bl = to_pil_image(img[:, h - crop_h:, 0:crop_w])
            br = to_pil_image(img[:, h - crop_h:, w - crop_w:])
            center = transforms.CenterCrop((crop_h, crop_w))(to_pil_image(img))
            expected_output = (tl, tr, bl, br, center)
101
            self.assertEqual(results, expected_output)
102
103
104
105
106
107
108
109
110
111
112
113

    def test_ten_crop(self):
        to_pil_image = transforms.ToPILImage()
        h = random.randint(5, 25)
        w = random.randint(5, 25)
        for should_vflip in [True, False]:
            for single_dim in [True, False]:
                crop_h = random.randint(1, h)
                crop_w = random.randint(1, w)
                if single_dim:
                    crop_h = min(crop_h, crop_w)
                    crop_w = crop_h
114
115
                    transform = transforms.TenCrop(crop_h,
                                                   vertical_flip=should_vflip)
116
117
                    five_crop = transforms.FiveCrop(crop_h)
                else:
118
119
                    transform = transforms.TenCrop((crop_h, crop_w),
                                                   vertical_flip=should_vflip)
120
121
122
123
124
                    five_crop = transforms.FiveCrop((crop_h, crop_w))

                img = to_pil_image(torch.FloatTensor(3, h, w).uniform_())
                results = transform(img)
                expected_output = five_crop(img)
125
126
127
128
129

                # Checking if FiveCrop and TenCrop can be printed as string
                transform.__repr__()
                five_crop.__repr__()

130
131
132
133
134
135
136
                if should_vflip:
                    vflipped_img = img.transpose(Image.FLIP_TOP_BOTTOM)
                    expected_output += five_crop(vflipped_img)
                else:
                    hflipped_img = img.transpose(Image.FLIP_LEFT_RIGHT)
                    expected_output += five_crop(hflipped_img)

137
138
                self.assertEqual(len(results), 10)
                self.assertEqual(results, expected_output)
139

140
141
142
143
144
145
146
147
    def test_randomresized_params(self):
        height = random.randint(24, 32) * 2
        width = random.randint(24, 32) * 2
        img = torch.ones(3, height, width)
        to_pil_image = transforms.ToPILImage()
        img = to_pil_image(img)
        size = 100
        epsilon = 0.05
148
        min_scale = 0.25
Francisco Massa's avatar
Francisco Massa committed
149
        for _ in range(10):
150
            scale_min = max(round(random.random(), 2), min_scale)
151
            scale_range = (scale_min, scale_min + round(random.random(), 2))
152
            aspect_min = max(round(random.random(), 2), epsilon)
153
154
            aspect_ratio_range = (aspect_min, aspect_min + round(random.random(), 2))
            randresizecrop = transforms.RandomResizedCrop(size, scale_range, aspect_ratio_range)
155
            i, j, h, w = randresizecrop.get_params(img, scale_range, aspect_ratio_range)
156
            aspect_ratio_obtained = w / h
157
158
159
160
161
162
163
            self.assertTrue((min(aspect_ratio_range) - epsilon <= aspect_ratio_obtained and
                             aspect_ratio_obtained <= max(aspect_ratio_range) + epsilon) or
                            aspect_ratio_obtained == 1.0)
            self.assertIsInstance(i, int)
            self.assertIsInstance(j, int)
            self.assertIsInstance(h, int)
            self.assertIsInstance(w, int)
164

165
    def test_randomperspective(self):
Francisco Massa's avatar
Francisco Massa committed
166
        for _ in range(10):
167
168
169
170
171
172
173
174
175
176
            height = random.randint(24, 32) * 2
            width = random.randint(24, 32) * 2
            img = torch.ones(3, height, width)
            to_pil_image = transforms.ToPILImage()
            img = to_pil_image(img)
            perp = transforms.RandomPerspective()
            startpoints, endpoints = perp.get_params(width, height, 0.5)
            tr_img = F.perspective(img, startpoints, endpoints)
            tr_img2 = F.to_tensor(F.perspective(tr_img, endpoints, startpoints))
            tr_img = F.to_tensor(tr_img)
177
178
179
180
            self.assertEqual(img.size[0], width)
            self.assertEqual(img.size[1], height)
            self.assertGreater(torch.nn.functional.mse_loss(tr_img, F.to_tensor(img)) + 0.3,
                               torch.nn.functional.mse_loss(tr_img2, F.to_tensor(img)))
181

182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
    def test_randomperspective_fill(self):
        height = 100
        width = 100
        img = torch.ones(3, height, width)
        to_pil_image = transforms.ToPILImage()
        img = to_pil_image(img)

        modes = ("L", "RGB", "F")
        nums_bands = [len(mode) for mode in modes]
        fill = 127

        for mode, num_bands in zip(modes, nums_bands):
            img_conv = img.convert(mode)
            perspective = transforms.RandomPerspective(p=1, fill=fill)
            tr_img = perspective(img_conv)
            pixel = tr_img.getpixel((0, 0))

            if not isinstance(pixel, tuple):
                pixel = (pixel,)
            self.assertTupleEqual(pixel, tuple([fill] * num_bands))

        for mode, num_bands in zip(modes, nums_bands):
            img_conv = img.convert(mode)
            startpoints, endpoints = transforms.RandomPerspective.get_params(width, height, 0.5)
            tr_img = F.perspective(img_conv, startpoints, endpoints, fill=fill)
            pixel = tr_img.getpixel((0, 0))
208

209
210
211
212
213
214
215
216
            if not isinstance(pixel, tuple):
                pixel = (pixel,)
            self.assertTupleEqual(pixel, tuple([fill] * num_bands))

            for wrong_num_bands in set(nums_bands) - {num_bands}:
                with self.assertRaises(ValueError):
                    F.perspective(img_conv, startpoints, endpoints, fill=tuple([fill] * wrong_num_bands))

217
    def test_resize(self):
218
219
220
        height = random.randint(24, 32) * 2
        width = random.randint(24, 32) * 2
        osize = random.randint(5, 12) * 2
221

222
223
224
225
226
        # TODO: Check output size check for bug-fix, improve this later
        t = transforms.Resize(osize)
        self.assertTrue(isinstance(t.size, int))
        self.assertEqual(t.size, osize)

227
228
229
        img = torch.ones(3, height, width)
        result = transforms.Compose([
            transforms.ToPILImage(),
230
            transforms.Resize(osize),
231
232
            transforms.ToTensor(),
        ])(img)
233
        self.assertIn(osize, result.size())
234
        if height < width:
235
            self.assertLessEqual(result.size(1), result.size(2))
236
        elif width < height:
237
            self.assertGreaterEqual(result.size(1), result.size(2))
238

239
240
        result = transforms.Compose([
            transforms.ToPILImage(),
241
            transforms.Resize([osize, osize]),
242
243
            transforms.ToTensor(),
        ])(img)
244
245
246
        self.assertIn(osize, result.size())
        self.assertEqual(result.size(1), osize)
        self.assertEqual(result.size(2), osize)
247

248
249
250
251
        oheight = random.randint(5, 12) * 2
        owidth = random.randint(5, 12) * 2
        result = transforms.Compose([
            transforms.ToPILImage(),
252
            transforms.Resize((oheight, owidth)),
253
254
            transforms.ToTensor(),
        ])(img)
255
256
        self.assertEqual(result.size(1), oheight)
        self.assertEqual(result.size(2), owidth)
257
258
259

        result = transforms.Compose([
            transforms.ToPILImage(),
260
            transforms.Resize([oheight, owidth]),
261
262
            transforms.ToTensor(),
        ])(img)
263
264
        self.assertEqual(result.size(1), oheight)
        self.assertEqual(result.size(2), owidth)
265

266
267
268
269
    def test_random_crop(self):
        height = random.randint(10, 32) * 2
        width = random.randint(10, 32) * 2
        oheight = random.randint(5, (height - 2) / 2) * 2
270
        owidth = random.randint(5, (width - 2) / 2) * 2
271
272
273
274
275
276
        img = torch.ones(3, height, width)
        result = transforms.Compose([
            transforms.ToPILImage(),
            transforms.RandomCrop((oheight, owidth)),
            transforms.ToTensor(),
        ])(img)
277
278
        self.assertEqual(result.size(1), oheight)
        self.assertEqual(result.size(2), owidth)
279

280
281
282
283
284
285
        padding = random.randint(1, 20)
        result = transforms.Compose([
            transforms.ToPILImage(),
            transforms.RandomCrop((oheight, owidth), padding=padding),
            transforms.ToTensor(),
        ])(img)
286
287
        self.assertEqual(result.size(1), oheight)
        self.assertEqual(result.size(2), owidth)
288

289
290
291
292
293
        result = transforms.Compose([
            transforms.ToPILImage(),
            transforms.RandomCrop((height, width)),
            transforms.ToTensor()
        ])(img)
294
295
296
        self.assertEqual(result.size(1), height)
        self.assertEqual(result.size(2), width)
        self.assertTrue(np.allclose(img.numpy(), result.numpy()))
297

298
299
300
301
302
        result = transforms.Compose([
            transforms.ToPILImage(),
            transforms.RandomCrop((height + 1, width + 1), pad_if_needed=True),
            transforms.ToTensor(),
        ])(img)
303
304
        self.assertEqual(result.size(1), height + 1)
        self.assertEqual(result.size(2), width + 1)
305

vfdev's avatar
vfdev committed
306
307
308
309
310
        t = transforms.RandomCrop(48)
        img = torch.ones(3, 32, 32)
        with self.assertRaisesRegex(ValueError, r"Required crop size .+ is larger then input image size .+"):
            t(img)

311
312
313
314
315
    def test_pad(self):
        height = random.randint(10, 32) * 2
        width = random.randint(10, 32) * 2
        img = torch.ones(3, height, width)
        padding = random.randint(1, 20)
316
        fill = random.randint(1, 50)
317
318
        result = transforms.Compose([
            transforms.ToPILImage(),
319
            transforms.Pad(padding, fill=fill),
320
321
            transforms.ToTensor(),
        ])(img)
322
323
        self.assertEqual(result.size(1), height + 2 * padding)
        self.assertEqual(result.size(2), width + 2 * padding)
324
325
326
327
328
329
330
331
        # check that all elements in the padded region correspond
        # to the pad value
        fill_v = fill / 255
        eps = 1e-5
        self.assertTrue((result[:, :padding, :] - fill_v).abs().max() < eps)
        self.assertTrue((result[:, :, :padding] - fill_v).abs().max() < eps)
        self.assertRaises(ValueError, transforms.Pad(padding, fill=(1, 2)),
                          transforms.ToPILImage()(img))
Soumith Chintala's avatar
Soumith Chintala committed
332

333
334
335
336
337
338
339
    def test_pad_with_tuple_of_pad_values(self):
        height = random.randint(10, 32) * 2
        width = random.randint(10, 32) * 2
        img = transforms.ToPILImage()(torch.ones(3, height, width))

        padding = tuple([random.randint(1, 20) for _ in range(2)])
        output = transforms.Pad(padding)(img)
340
        self.assertEqual(output.size, (width + padding[0] * 2, height + padding[1] * 2))
341
342
343

        padding = tuple([random.randint(1, 20) for _ in range(4)])
        output = transforms.Pad(padding)(img)
344
345
        self.assertEqual(output.size[0], width + padding[0] + padding[2])
        self.assertEqual(output.size[1], height + padding[1] + padding[3])
346

347
348
349
        # Checking if Padding can be printed as string
        transforms.Pad(padding).__repr__()

350
351
    def test_pad_with_non_constant_padding_modes(self):
        """Unit tests for edge, reflect, symmetric padding"""
vfdev's avatar
vfdev committed
352
        img = torch.zeros(3, 27, 27).byte()
353
354
355
356
357
358
359
360
361
        img[:, :, 0] = 1  # Constant value added to leftmost edge
        img = transforms.ToPILImage()(img)
        img = F.pad(img, 1, (200, 200, 200))

        # pad 3 to all sidess
        edge_padded_img = F.pad(img, 3, padding_mode='edge')
        # First 6 elements of leftmost edge in the middle of the image, values are in order:
        # edge_pad, edge_pad, edge_pad, constant_pad, constant value added to leftmost edge, 0
        edge_middle_slice = np.asarray(edge_padded_img).transpose(2, 0, 1)[0][17][:6]
362
363
        self.assertTrue(np.all(edge_middle_slice == np.asarray([200, 200, 200, 200, 1, 0])))
        self.assertEqual(transforms.ToTensor()(edge_padded_img).size(), (3, 35, 35))
364
365
366
367
368
369

        # Pad 3 to left/right, 2 to top/bottom
        reflect_padded_img = F.pad(img, (3, 2), padding_mode='reflect')
        # First 6 elements of leftmost edge in the middle of the image, values are in order:
        # reflect_pad, reflect_pad, reflect_pad, constant_pad, constant value added to leftmost edge, 0
        reflect_middle_slice = np.asarray(reflect_padded_img).transpose(2, 0, 1)[0][17][:6]
370
371
        self.assertTrue(np.all(reflect_middle_slice == np.asarray([0, 0, 1, 200, 1, 0])))
        self.assertEqual(transforms.ToTensor()(reflect_padded_img).size(), (3, 33, 35))
372
373
374
375
376
377

        # Pad 3 to left, 2 to top, 2 to right, 1 to bottom
        symmetric_padded_img = F.pad(img, (3, 2, 2, 1), padding_mode='symmetric')
        # First 6 elements of leftmost edge in the middle of the image, values are in order:
        # sym_pad, sym_pad, sym_pad, constant_pad, constant value added to leftmost edge, 0
        symmetric_middle_slice = np.asarray(symmetric_padded_img).transpose(2, 0, 1)[0][17][:6]
378
379
        self.assertTrue(np.all(symmetric_middle_slice == np.asarray([0, 1, 200, 200, 1, 0])))
        self.assertEqual(transforms.ToTensor()(symmetric_padded_img).size(), (3, 32, 34))
380

381
382
383
384
385
386
387
388
389
390
        # Check negative padding explicitly for symmetric case, since it is not
        # implemented for tensor case to compare to
        # Crop 1 to left, pad 2 to top, pad 3 to right, crop 3 to bottom
        symmetric_padded_img_neg = F.pad(img, (-1, 2, 3, -3), padding_mode='symmetric')
        symmetric_neg_middle_left = np.asarray(symmetric_padded_img_neg).transpose(2, 0, 1)[0][17][:3]
        symmetric_neg_middle_right = np.asarray(symmetric_padded_img_neg).transpose(2, 0, 1)[0][17][-4:]
        self.assertTrue(np.all(symmetric_neg_middle_left == np.asarray([1, 0, 0])))
        self.assertTrue(np.all(symmetric_neg_middle_right == np.asarray([200, 200, 0, 0])))
        self.assertEqual(transforms.ToTensor()(symmetric_padded_img_neg).size(), (3, 28, 31))

391
    def test_pad_raises_with_invalid_pad_sequence_len(self):
392
393
394
395
396
397
398
399
400
        with self.assertRaises(ValueError):
            transforms.Pad(())

        with self.assertRaises(ValueError):
            transforms.Pad((1, 2, 3))

        with self.assertRaises(ValueError):
            transforms.Pad((1, 2, 3, 4, 5))

401
402
403
404
405
406
407
408
    def test_pad_with_mode_F_images(self):
        pad = 2
        transform = transforms.Pad(pad)

        img = Image.new("F", (10, 10))
        padded_img = transform(img)
        self.assertSequenceEqual(padded_img.size, [edge_size + 2 * pad for edge_size in img.size])

Soumith Chintala's avatar
Soumith Chintala committed
409
410
411
412
    def test_lambda(self):
        trans = transforms.Lambda(lambda x: x.add(10))
        x = torch.randn(10)
        y = trans(x)
413
        self.assertTrue(y.equal(torch.add(x, 10)))
Soumith Chintala's avatar
Soumith Chintala committed
414
415
416
417

        trans = transforms.Lambda(lambda x: x.add_(10))
        x = torch.randn(10)
        y = trans(x)
418
        self.assertTrue(y.equal(x))
419

420
421
422
        # Checking if Lambda can be printed as string
        trans.__repr__()

423
    @unittest.skipIf(stats is None, 'scipy.stats not available')
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
    def test_random_apply(self):
        random_state = random.getstate()
        random.seed(42)
        random_apply_transform = transforms.RandomApply(
            [
                transforms.RandomRotation((-45, 45)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),
            ], p=0.75
        )
        img = transforms.ToPILImage()(torch.rand(3, 10, 10))
        num_samples = 250
        num_applies = 0
        for _ in range(num_samples):
            out = random_apply_transform(img)
            if out != img:
                num_applies += 1

        p_value = stats.binom_test(num_applies, num_samples, p=0.75)
        random.setstate(random_state)
444
        self.assertGreater(p_value, 0.0001)
445
446
447
448

        # Checking if RandomApply can be printed as string
        random_apply_transform.__repr__()

449
    @unittest.skipIf(stats is None, 'scipy.stats not available')
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
    def test_random_choice(self):
        random_state = random.getstate()
        random.seed(42)
        random_choice_transform = transforms.RandomChoice(
            [
                transforms.Resize(15),
                transforms.Resize(20),
                transforms.CenterCrop(10)
            ]
        )
        img = transforms.ToPILImage()(torch.rand(3, 25, 25))
        num_samples = 250
        num_resize_15 = 0
        num_resize_20 = 0
        num_crop_10 = 0
        for _ in range(num_samples):
            out = random_choice_transform(img)
            if out.size == (15, 15):
                num_resize_15 += 1
            elif out.size == (20, 20):
                num_resize_20 += 1
            elif out.size == (10, 10):
                num_crop_10 += 1

        p_value = stats.binom_test(num_resize_15, num_samples, p=0.33333)
475
        self.assertGreater(p_value, 0.0001)
476
        p_value = stats.binom_test(num_resize_20, num_samples, p=0.33333)
477
        self.assertGreater(p_value, 0.0001)
478
        p_value = stats.binom_test(num_crop_10, num_samples, p=0.33333)
479
        self.assertGreater(p_value, 0.0001)
480
481
482
483
484

        random.setstate(random_state)
        # Checking if RandomChoice can be printed as string
        random_choice_transform.__repr__()

485
    @unittest.skipIf(stats is None, 'scipy.stats not available')
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
    def test_random_order(self):
        random_state = random.getstate()
        random.seed(42)
        random_order_transform = transforms.RandomOrder(
            [
                transforms.Resize(20),
                transforms.CenterCrop(10)
            ]
        )
        img = transforms.ToPILImage()(torch.rand(3, 25, 25))
        num_samples = 250
        num_normal_order = 0
        resize_crop_out = transforms.CenterCrop(10)(transforms.Resize(20)(img))
        for _ in range(num_samples):
            out = random_order_transform(img)
            if out == resize_crop_out:
                num_normal_order += 1

        p_value = stats.binom_test(num_normal_order, num_samples, p=0.5)
        random.setstate(random_state)
506
        self.assertGreater(p_value, 0.0001)
507
508
509
510

        # Checking if RandomOrder can be printed as string
        random_order_transform.__repr__()

511
    def test_to_tensor(self):
512
        test_channels = [1, 3, 4]
513
514
        height, width = 4, 4
        trans = transforms.ToTensor()
515

516
517
518
519
520
521
522
        with self.assertRaises(TypeError):
            trans(np.random.rand(1, height, width).tolist())

        with self.assertRaises(ValueError):
            trans(np.random.rand(height))
            trans(np.random.rand(1, 1, height, width))

523
524
525
526
        for channels in test_channels:
            input_data = torch.ByteTensor(channels, height, width).random_(0, 255).float().div_(255)
            img = transforms.ToPILImage()(input_data)
            output = trans(img)
527
            self.assertTrue(np.allclose(input_data.numpy(), output.numpy()))
528

529
            ndarray = np.random.randint(low=0, high=255, size=(height, width, channels)).astype(np.uint8)
530
531
            output = trans(ndarray)
            expected_output = ndarray.transpose((2, 0, 1)) / 255.0
532
            self.assertTrue(np.allclose(output.numpy(), expected_output))
533

534
535
536
            ndarray = np.random.rand(height, width, channels).astype(np.float32)
            output = trans(ndarray)
            expected_output = ndarray.transpose((2, 0, 1))
537
            self.assertTrue(np.allclose(output.numpy(), expected_output))
538

539
540
541
542
        # separate test for mode '1' PIL images
        input_data = torch.ByteTensor(1, height, width).bernoulli_()
        img = transforms.ToPILImage()(input_data.mul(255)).convert('1')
        output = trans(img)
543
        self.assertTrue(np.allclose(input_data.numpy(), output.numpy()))
544

545
546
547
548
549
550
551
    def test_max_value(self):
        for dtype in int_dtypes():
            self.assertEqual(F_t._max_value(dtype), torch.iinfo(dtype).max)

        for dtype in float_dtypes():
            self.assertGreater(F_t._max_value(dtype), torch.finfo(dtype).max)

552
553
554
555
556
557
    def test_convert_image_dtype_float_to_float(self):
        for input_dtype, output_dtypes in cycle_over(float_dtypes()):
            input_image = torch.tensor((0.0, 1.0), dtype=input_dtype)
            for output_dtype in output_dtypes:
                with self.subTest(input_dtype=input_dtype, output_dtype=output_dtype):
                    transform = transforms.ConvertImageDtype(output_dtype)
558
559
                    transform_script = torch.jit.script(F.convert_image_dtype)

560
                    output_image = transform(input_image)
561
562
563
564
                    output_image_script = transform_script(input_image, output_dtype)

                    script_diff = output_image_script - output_image
                    self.assertLess(script_diff.abs().max(), 1e-6)
565
566
567
568
569
570
571
572
573
574
575
576
577

                    actual_min, actual_max = output_image.tolist()
                    desired_min, desired_max = 0.0, 1.0

                    self.assertAlmostEqual(actual_min, desired_min)
                    self.assertAlmostEqual(actual_max, desired_max)

    def test_convert_image_dtype_float_to_int(self):
        for input_dtype in float_dtypes():
            input_image = torch.tensor((0.0, 1.0), dtype=input_dtype)
            for output_dtype in int_dtypes():
                with self.subTest(input_dtype=input_dtype, output_dtype=output_dtype):
                    transform = transforms.ConvertImageDtype(output_dtype)
578
                    transform_script = torch.jit.script(F.convert_image_dtype)
579
580
581
582
583
584
585
586

                    if (input_dtype == torch.float32 and output_dtype in (torch.int32, torch.int64)) or (
                            input_dtype == torch.float64 and output_dtype == torch.int64
                    ):
                        with self.assertRaises(RuntimeError):
                            transform(input_image)
                    else:
                        output_image = transform(input_image)
587
588
589
590
                        output_image_script = transform_script(input_image, output_dtype)

                        script_diff = output_image_script - output_image
                        self.assertLess(script_diff.abs().max(), 1e-6)
591
592
593
594
595
596
597
598
599
600
601
602
603

                        actual_min, actual_max = output_image.tolist()
                        desired_min, desired_max = 0, torch.iinfo(output_dtype).max

                        self.assertEqual(actual_min, desired_min)
                        self.assertEqual(actual_max, desired_max)

    def test_convert_image_dtype_int_to_float(self):
        for input_dtype in int_dtypes():
            input_image = torch.tensor((0, torch.iinfo(input_dtype).max), dtype=input_dtype)
            for output_dtype in float_dtypes():
                with self.subTest(input_dtype=input_dtype, output_dtype=output_dtype):
                    transform = transforms.ConvertImageDtype(output_dtype)
604
605
                    transform_script = torch.jit.script(F.convert_image_dtype)

606
                    output_image = transform(input_image)
607
608
609
610
                    output_image_script = transform_script(input_image, output_dtype)

                    script_diff = output_image_script - output_image
                    self.assertLess(script_diff.abs().max(), 1e-6)
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628

                    actual_min, actual_max = output_image.tolist()
                    desired_min, desired_max = 0.0, 1.0

                    self.assertAlmostEqual(actual_min, desired_min)
                    self.assertGreaterEqual(actual_min, desired_min)
                    self.assertAlmostEqual(actual_max, desired_max)
                    self.assertLessEqual(actual_max, desired_max)

    def test_convert_image_dtype_int_to_int(self):
        for input_dtype, output_dtypes in cycle_over(int_dtypes()):
            input_max = torch.iinfo(input_dtype).max
            input_image = torch.tensor((0, input_max), dtype=input_dtype)
            for output_dtype in output_dtypes:
                output_max = torch.iinfo(output_dtype).max

                with self.subTest(input_dtype=input_dtype, output_dtype=output_dtype):
                    transform = transforms.ConvertImageDtype(output_dtype)
629
630
                    transform_script = torch.jit.script(F.convert_image_dtype)

631
                    output_image = transform(input_image)
632
633
634
635
636
637
                    output_image_script = transform_script(input_image, output_dtype)

                    script_diff = output_image_script.float() - output_image.float()
                    self.assertLess(
                        script_diff.abs().max(), 1e-6, msg="{} vs {}".format(output_image_script, output_image)
                    )
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670

                    actual_min, actual_max = output_image.tolist()
                    desired_min, desired_max = 0, output_max

                    # see https://github.com/pytorch/vision/pull/2078#issuecomment-641036236 for details
                    if input_max >= output_max:
                        error_term = 0
                    else:
                        error_term = 1 - (torch.iinfo(output_dtype).max + 1) // (torch.iinfo(input_dtype).max + 1)

                    self.assertEqual(actual_min, desired_min)
                    self.assertEqual(actual_max, desired_max + error_term)

    def test_convert_image_dtype_int_to_int_consistency(self):
        for input_dtype, output_dtypes in cycle_over(int_dtypes()):
            input_max = torch.iinfo(input_dtype).max
            input_image = torch.tensor((0, input_max), dtype=input_dtype)
            for output_dtype in output_dtypes:
                output_max = torch.iinfo(output_dtype).max
                if output_max <= input_max:
                    continue

                with self.subTest(input_dtype=input_dtype, output_dtype=output_dtype):
                    transform = transforms.ConvertImageDtype(output_dtype)
                    inverse_transfrom = transforms.ConvertImageDtype(input_dtype)
                    output_image = inverse_transfrom(transform(input_image))

                    actual_min, actual_max = output_image.tolist()
                    desired_min, desired_max = 0, input_max

                    self.assertEqual(actual_min, desired_min)
                    self.assertEqual(actual_max, desired_max)

671
672
673
674
675
676
677
678
    @unittest.skipIf(accimage is None, 'accimage not available')
    def test_accimage_to_tensor(self):
        trans = transforms.ToTensor()

        expected_output = trans(Image.open(GRACE_HOPPER).convert('RGB'))
        output = trans(accimage.Image(GRACE_HOPPER))

        self.assertEqual(expected_output.size(), output.size())
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
        self.assertTrue(np.allclose(output.numpy(), expected_output.numpy()))

    def test_pil_to_tensor(self):
        test_channels = [1, 3, 4]
        height, width = 4, 4
        trans = transforms.PILToTensor()

        with self.assertRaises(TypeError):
            trans(np.random.rand(1, height, width).tolist())
            trans(np.random.rand(1, height, width))

        for channels in test_channels:
            input_data = torch.ByteTensor(channels, height, width).random_(0, 255)
            img = transforms.ToPILImage()(input_data)
            output = trans(img)
            self.assertTrue(np.allclose(input_data.numpy(), output.numpy()))

            input_data = np.random.randint(low=0, high=255, size=(height, width, channels)).astype(np.uint8)
            img = transforms.ToPILImage()(input_data)
            output = trans(img)
            expected_output = input_data.transpose((2, 0, 1))
            self.assertTrue(np.allclose(output.numpy(), expected_output))

            input_data = torch.as_tensor(np.random.rand(channels, height, width).astype(np.float32))
            img = transforms.ToPILImage()(input_data)  # CHW -> HWC and (* 255).byte()
            output = trans(img)  # HWC -> CHW
            expected_output = (input_data * 255).byte()
            self.assertTrue(np.allclose(output.numpy(), expected_output.numpy()))

        # separate test for mode '1' PIL images
        input_data = torch.ByteTensor(1, height, width).bernoulli_()
        img = transforms.ToPILImage()(input_data.mul(255)).convert('1')
        output = trans(img)
        self.assertTrue(np.allclose(input_data.numpy(), output.numpy()))

    @unittest.skipIf(accimage is None, 'accimage not available')
    def test_accimage_pil_to_tensor(self):
        trans = transforms.PILToTensor()

        expected_output = trans(Image.open(GRACE_HOPPER).convert('RGB'))
        output = trans(accimage.Image(GRACE_HOPPER))

        self.assertEqual(expected_output.size(), output.size())
722
        self.assertTrue(np.allclose(output.numpy(), expected_output.numpy()))
723
724
725
726

    @unittest.skipIf(accimage is None, 'accimage not available')
    def test_accimage_resize(self):
        trans = transforms.Compose([
727
            transforms.Resize(256, interpolation=Image.LINEAR),
728
729
730
            transforms.ToTensor(),
        ])

731
732
733
        # Checking if Compose, Resize and ToTensor can be printed as string
        trans.__repr__()

734
735
736
737
738
739
740
        expected_output = trans(Image.open(GRACE_HOPPER).convert('RGB'))
        output = trans(accimage.Image(GRACE_HOPPER))

        self.assertEqual(expected_output.size(), output.size())
        self.assertLess(np.abs((expected_output - output).mean()), 1e-3)
        self.assertLess((expected_output - output).var(), 1e-5)
        # note the high absolute tolerance
741
        self.assertTrue(np.allclose(output.numpy(), expected_output.numpy(), atol=5e-2))
742
743
744
745
746
747
748
749

    @unittest.skipIf(accimage is None, 'accimage not available')
    def test_accimage_crop(self):
        trans = transforms.Compose([
            transforms.CenterCrop(256),
            transforms.ToTensor(),
        ])

750
751
752
        # Checking if Compose, CenterCrop and ToTensor can be printed as string
        trans.__repr__()

753
754
755
756
        expected_output = trans(Image.open(GRACE_HOPPER).convert('RGB'))
        output = trans(accimage.Image(GRACE_HOPPER))

        self.assertEqual(expected_output.size(), output.size())
757
        self.assertTrue(np.allclose(output.numpy(), expected_output.numpy()))
758

759
    def test_1_channel_tensor_to_pil_image(self):
760
761
        to_tensor = transforms.ToTensor()

762
        img_data_float = torch.Tensor(1, 4, 4).uniform_()
763
764
765
766
        img_data_byte = torch.ByteTensor(1, 4, 4).random_(0, 255)
        img_data_short = torch.ShortTensor(1, 4, 4).random_()
        img_data_int = torch.IntTensor(1, 4, 4).random_()

767
768
769
770
771
772
773
774
775
776
        inputs = [img_data_float, img_data_byte, img_data_short, img_data_int]
        expected_outputs = [img_data_float.mul(255).int().float().div(255).numpy(),
                            img_data_byte.float().div(255.0).numpy(),
                            img_data_short.numpy(),
                            img_data_int.numpy()]
        expected_modes = ['L', 'L', 'I;16', 'I']

        for img_data, expected_output, mode in zip(inputs, expected_outputs, expected_modes):
            for transform in [transforms.ToPILImage(), transforms.ToPILImage(mode=mode)]:
                img = transform(img_data)
777
778
                self.assertEqual(img.mode, mode)
                self.assertTrue(np.allclose(expected_output, to_tensor(img).numpy()))
779
780
        # 'F' mode for torch.FloatTensor
        img_F_mode = transforms.ToPILImage(mode='F')(img_data_float)
781
782
783
        self.assertEqual(img_F_mode.mode, 'F')
        self.assertTrue(np.allclose(np.array(Image.fromarray(img_data_float.squeeze(0).numpy(), mode='F')),
                                    np.array(img_F_mode)))
784
785
786
787
788
789
790
791
792
793
794
795

    def test_1_channel_ndarray_to_pil_image(self):
        img_data_float = torch.Tensor(4, 4, 1).uniform_().numpy()
        img_data_byte = torch.ByteTensor(4, 4, 1).random_(0, 255).numpy()
        img_data_short = torch.ShortTensor(4, 4, 1).random_().numpy()
        img_data_int = torch.IntTensor(4, 4, 1).random_().numpy()

        inputs = [img_data_float, img_data_byte, img_data_short, img_data_int]
        expected_modes = ['F', 'L', 'I;16', 'I']
        for img_data, mode in zip(inputs, expected_modes):
            for transform in [transforms.ToPILImage(), transforms.ToPILImage(mode=mode)]:
                img = transform(img_data)
796
797
                self.assertEqual(img.mode, mode)
                self.assertTrue(np.allclose(img_data[:, :, 0], img))
798

surgan12's avatar
surgan12 committed
799
800
801
802
    def test_2_channel_ndarray_to_pil_image(self):
        def verify_img_data(img_data, mode):
            if mode is None:
                img = transforms.ToPILImage()(img_data)
803
                self.assertEqual(img.mode, 'LA')  # default should assume LA
surgan12's avatar
surgan12 committed
804
805
            else:
                img = transforms.ToPILImage(mode=mode)(img_data)
806
                self.assertEqual(img.mode, mode)
surgan12's avatar
surgan12 committed
807
808
            split = img.split()
            for i in range(2):
809
                self.assertTrue(np.allclose(img_data[:, :, i], split[i]))
surgan12's avatar
surgan12 committed
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826

        img_data = torch.ByteTensor(4, 4, 2).random_(0, 255).numpy()
        for mode in [None, 'LA']:
            verify_img_data(img_data, mode)

        transforms.ToPILImage().__repr__()

        with self.assertRaises(ValueError):
            # should raise if we try a mode for 4 or 1 or 3 channel images
            transforms.ToPILImage(mode='RGBA')(img_data)
            transforms.ToPILImage(mode='P')(img_data)
            transforms.ToPILImage(mode='RGB')(img_data)

    def test_2_channel_tensor_to_pil_image(self):
        def verify_img_data(img_data, expected_output, mode):
            if mode is None:
                img = transforms.ToPILImage()(img_data)
827
                self.assertEqual(img.mode, 'LA')  # default should assume LA
surgan12's avatar
surgan12 committed
828
829
            else:
                img = transforms.ToPILImage(mode=mode)(img_data)
830
                self.assertEqual(img.mode, mode)
surgan12's avatar
surgan12 committed
831
832
            split = img.split()
            for i in range(2):
833
                self.assertTrue(np.allclose(expected_output[i].numpy(), F.to_tensor(split[i]).numpy()))
surgan12's avatar
surgan12 committed
834
835
836
837
838
839
840
841
842
843
844
845

        img_data = torch.Tensor(2, 4, 4).uniform_()
        expected_output = img_data.mul(255).int().float().div(255)
        for mode in [None, 'LA']:
            verify_img_data(img_data, expected_output, mode=mode)

        with self.assertRaises(ValueError):
            # should raise if we try a mode for 4 or 1 or 3 channel images
            transforms.ToPILImage(mode='RGBA')(img_data)
            transforms.ToPILImage(mode='P')(img_data)
            transforms.ToPILImage(mode='RGB')(img_data)

846
847
848
849
    def test_3_channel_tensor_to_pil_image(self):
        def verify_img_data(img_data, expected_output, mode):
            if mode is None:
                img = transforms.ToPILImage()(img_data)
850
                self.assertEqual(img.mode, 'RGB')  # default should assume RGB
851
852
            else:
                img = transforms.ToPILImage(mode=mode)(img_data)
853
                self.assertEqual(img.mode, mode)
854
855
            split = img.split()
            for i in range(3):
856
                self.assertTrue(np.allclose(expected_output[i].numpy(), F.to_tensor(split[i]).numpy()))
857

858
859
860
861
        img_data = torch.Tensor(3, 4, 4).uniform_()
        expected_output = img_data.mul(255).int().float().div(255)
        for mode in [None, 'RGB', 'HSV', 'YCbCr']:
            verify_img_data(img_data, expected_output, mode=mode)
862

863
        with self.assertRaises(ValueError):
surgan12's avatar
surgan12 committed
864
            # should raise if we try a mode for 4 or 1 or 2 channel images
865
866
            transforms.ToPILImage(mode='RGBA')(img_data)
            transforms.ToPILImage(mode='P')(img_data)
surgan12's avatar
surgan12 committed
867
            transforms.ToPILImage(mode='LA')(img_data)
868

Varun Agrawal's avatar
Varun Agrawal committed
869
870
871
        with self.assertRaises(ValueError):
            transforms.ToPILImage()(torch.Tensor(1, 3, 4, 4).uniform_())

872
873
874
875
    def test_3_channel_ndarray_to_pil_image(self):
        def verify_img_data(img_data, mode):
            if mode is None:
                img = transforms.ToPILImage()(img_data)
876
                self.assertEqual(img.mode, 'RGB')  # default should assume RGB
877
878
            else:
                img = transforms.ToPILImage(mode=mode)(img_data)
879
                self.assertEqual(img.mode, mode)
880
881
            split = img.split()
            for i in range(3):
882
                self.assertTrue(np.allclose(img_data[:, :, i], split[i]))
883

884
885
886
887
        img_data = torch.ByteTensor(4, 4, 3).random_(0, 255).numpy()
        for mode in [None, 'RGB', 'HSV', 'YCbCr']:
            verify_img_data(img_data, mode)

888
889
890
        # Checking if ToPILImage can be printed as string
        transforms.ToPILImage().__repr__()

891
        with self.assertRaises(ValueError):
surgan12's avatar
surgan12 committed
892
            # should raise if we try a mode for 4 or 1 or 2 channel images
893
894
            transforms.ToPILImage(mode='RGBA')(img_data)
            transforms.ToPILImage(mode='P')(img_data)
surgan12's avatar
surgan12 committed
895
            transforms.ToPILImage(mode='LA')(img_data)
896
897
898
899
900

    def test_4_channel_tensor_to_pil_image(self):
        def verify_img_data(img_data, expected_output, mode):
            if mode is None:
                img = transforms.ToPILImage()(img_data)
901
                self.assertEqual(img.mode, 'RGBA')  # default should assume RGBA
902
903
            else:
                img = transforms.ToPILImage(mode=mode)(img_data)
904
                self.assertEqual(img.mode, mode)
905
906
907

            split = img.split()
            for i in range(4):
908
                self.assertTrue(np.allclose(expected_output[i].numpy(), F.to_tensor(split[i]).numpy()))
909

910
        img_data = torch.Tensor(4, 4, 4).uniform_()
911
        expected_output = img_data.mul(255).int().float().div(255)
surgan12's avatar
surgan12 committed
912
        for mode in [None, 'RGBA', 'CMYK', 'RGBX']:
913
            verify_img_data(img_data, expected_output, mode)
914

915
        with self.assertRaises(ValueError):
surgan12's avatar
surgan12 committed
916
            # should raise if we try a mode for 3 or 1 or 2 channel images
917
918
            transforms.ToPILImage(mode='RGB')(img_data)
            transforms.ToPILImage(mode='P')(img_data)
surgan12's avatar
surgan12 committed
919
            transforms.ToPILImage(mode='LA')(img_data)
920
921
922
923
924

    def test_4_channel_ndarray_to_pil_image(self):
        def verify_img_data(img_data, mode):
            if mode is None:
                img = transforms.ToPILImage()(img_data)
925
                self.assertEqual(img.mode, 'RGBA')  # default should assume RGBA
926
927
            else:
                img = transforms.ToPILImage(mode=mode)(img_data)
928
                self.assertEqual(img.mode, mode)
929
930
            split = img.split()
            for i in range(4):
931
                self.assertTrue(np.allclose(img_data[:, :, i], split[i]))
932

933
        img_data = torch.ByteTensor(4, 4, 4).random_(0, 255).numpy()
surgan12's avatar
surgan12 committed
934
        for mode in [None, 'RGBA', 'CMYK', 'RGBX']:
935
            verify_img_data(img_data, mode)
936

937
        with self.assertRaises(ValueError):
surgan12's avatar
surgan12 committed
938
            # should raise if we try a mode for 3 or 1 or 2 channel images
939
940
            transforms.ToPILImage(mode='RGB')(img_data)
            transforms.ToPILImage(mode='P')(img_data)
surgan12's avatar
surgan12 committed
941
            transforms.ToPILImage(mode='LA')(img_data)
942

Varun Agrawal's avatar
Varun Agrawal committed
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
    def test_2d_tensor_to_pil_image(self):
        to_tensor = transforms.ToTensor()

        img_data_float = torch.Tensor(4, 4).uniform_()
        img_data_byte = torch.ByteTensor(4, 4).random_(0, 255)
        img_data_short = torch.ShortTensor(4, 4).random_()
        img_data_int = torch.IntTensor(4, 4).random_()

        inputs = [img_data_float, img_data_byte, img_data_short, img_data_int]
        expected_outputs = [img_data_float.mul(255).int().float().div(255).numpy(),
                            img_data_byte.float().div(255.0).numpy(),
                            img_data_short.numpy(),
                            img_data_int.numpy()]
        expected_modes = ['L', 'L', 'I;16', 'I']

        for img_data, expected_output, mode in zip(inputs, expected_outputs, expected_modes):
            for transform in [transforms.ToPILImage(), transforms.ToPILImage(mode=mode)]:
                img = transform(img_data)
961
962
                self.assertEqual(img.mode, mode)
                self.assertTrue(np.allclose(expected_output, to_tensor(img).numpy()))
Varun Agrawal's avatar
Varun Agrawal committed
963
964
965
966
967
968
969
970
971
972
973
974

    def test_2d_ndarray_to_pil_image(self):
        img_data_float = torch.Tensor(4, 4).uniform_().numpy()
        img_data_byte = torch.ByteTensor(4, 4).random_(0, 255).numpy()
        img_data_short = torch.ShortTensor(4, 4).random_().numpy()
        img_data_int = torch.IntTensor(4, 4).random_().numpy()

        inputs = [img_data_float, img_data_byte, img_data_short, img_data_int]
        expected_modes = ['F', 'L', 'I;16', 'I']
        for img_data, mode in zip(inputs, expected_modes):
            for transform in [transforms.ToPILImage(), transforms.ToPILImage(mode=mode)]:
                img = transform(img_data)
975
976
                self.assertEqual(img.mode, mode)
                self.assertTrue(np.allclose(img_data, img))
Varun Agrawal's avatar
Varun Agrawal committed
977
978
979
980
981

    def test_tensor_bad_types_to_pil_image(self):
        with self.assertRaises(ValueError):
            transforms.ToPILImage()(torch.ones(1, 3, 4, 4))

982
    def test_ndarray_bad_types_to_pil_image(self):
983
        trans = transforms.ToPILImage()
984
        with self.assertRaises(TypeError):
985
986
987
988
989
            trans(np.ones([4, 4, 1], np.int64))
            trans(np.ones([4, 4, 1], np.uint16))
            trans(np.ones([4, 4, 1], np.uint32))
            trans(np.ones([4, 4, 1], np.float64))

Varun Agrawal's avatar
Varun Agrawal committed
990
991
992
        with self.assertRaises(ValueError):
            transforms.ToPILImage()(np.ones([1, 4, 4, 3]))

993
994
    @unittest.skipIf(stats is None, 'scipy.stats not available')
    def test_random_vertical_flip(self):
995
996
        random_state = random.getstate()
        random.seed(42)
997
998
999
        img = transforms.ToPILImage()(torch.rand(3, 10, 10))
        vimg = img.transpose(Image.FLIP_TOP_BOTTOM)

1000
        num_samples = 250
1001
        num_vertical = 0
1002
        for _ in range(num_samples):
1003
1004
1005
1006
            out = transforms.RandomVerticalFlip()(img)
            if out == vimg:
                num_vertical += 1

1007
1008
        p_value = stats.binom_test(num_vertical, num_samples, p=0.5)
        random.setstate(random_state)
1009
        self.assertGreater(p_value, 0.0001)
1010

1011
1012
1013
1014
1015
1016
1017
1018
1019
        num_samples = 250
        num_vertical = 0
        for _ in range(num_samples):
            out = transforms.RandomVerticalFlip(p=0.7)(img)
            if out == vimg:
                num_vertical += 1

        p_value = stats.binom_test(num_vertical, num_samples, p=0.7)
        random.setstate(random_state)
1020
        self.assertGreater(p_value, 0.0001)
1021

1022
1023
1024
        # Checking if RandomVerticalFlip can be printed as string
        transforms.RandomVerticalFlip().__repr__()

1025
1026
    @unittest.skipIf(stats is None, 'scipy.stats not available')
    def test_random_horizontal_flip(self):
1027
1028
        random_state = random.getstate()
        random.seed(42)
1029
1030
1031
        img = transforms.ToPILImage()(torch.rand(3, 10, 10))
        himg = img.transpose(Image.FLIP_LEFT_RIGHT)

1032
        num_samples = 250
1033
        num_horizontal = 0
1034
        for _ in range(num_samples):
1035
1036
1037
1038
            out = transforms.RandomHorizontalFlip()(img)
            if out == himg:
                num_horizontal += 1

1039
1040
        p_value = stats.binom_test(num_horizontal, num_samples, p=0.5)
        random.setstate(random_state)
1041
        self.assertGreater(p_value, 0.0001)
1042

1043
1044
1045
1046
1047
1048
1049
1050
1051
        num_samples = 250
        num_horizontal = 0
        for _ in range(num_samples):
            out = transforms.RandomHorizontalFlip(p=0.7)(img)
            if out == himg:
                num_horizontal += 1

        p_value = stats.binom_test(num_horizontal, num_samples, p=0.7)
        random.setstate(random_state)
1052
        self.assertGreater(p_value, 0.0001)
1053

1054
1055
1056
        # Checking if RandomHorizontalFlip can be printed as string
        transforms.RandomHorizontalFlip().__repr__()

1057
    @unittest.skipIf(stats is None, 'scipy.stats is not available')
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
    def test_normalize(self):
        def samples_from_standard_normal(tensor):
            p_value = stats.kstest(list(tensor.view(-1)), 'norm', args=(0, 1)).pvalue
            return p_value > 0.0001

        random_state = random.getstate()
        random.seed(42)
        for channels in [1, 3]:
            img = torch.rand(channels, 10, 10)
            mean = [img[c].mean() for c in range(channels)]
            std = [img[c].std() for c in range(channels)]
            normalized = transforms.Normalize(mean, std)(img)
1070
            self.assertTrue(samples_from_standard_normal(normalized))
1071
1072
        random.setstate(random_state)

1073
1074
1075
        # Checking if Normalize can be printed as string
        transforms.Normalize(mean, std).__repr__()

1076
1077
1078
        # Checking the optional in-place behaviour
        tensor = torch.rand((1, 16, 16))
        tensor_inplace = transforms.Normalize((0.5,), (0.5,), inplace=True)(tensor)
1079
        self.assertTrue(torch.equal(tensor, tensor_inplace))
1080

1081
1082
1083
1084
1085
1086
1087
1088
1089
    def test_normalize_different_dtype(self):
        for dtype1 in [torch.float32, torch.float64]:
            img = torch.rand(3, 10, 10, dtype=dtype1)
            for dtype2 in [torch.int64, torch.float32, torch.float64]:
                mean = torch.tensor([1, 2, 3], dtype=dtype2)
                std = torch.tensor([1, 2, 1], dtype=dtype2)
                # checks that it doesn't crash
                transforms.functional.normalize(img, mean, std)

1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
    def test_normalize_3d_tensor(self):
        torch.manual_seed(28)
        n_channels = 3
        img_size = 10
        mean = torch.rand(n_channels)
        std = torch.rand(n_channels)
        img = torch.rand(n_channels, img_size, img_size)
        target = F.normalize(img, mean, std).numpy()

        mean_unsqueezed = mean.view(-1, 1, 1)
        std_unsqueezed = std.view(-1, 1, 1)
        result1 = F.normalize(img, mean_unsqueezed, std_unsqueezed)
        result2 = F.normalize(img,
                              mean_unsqueezed.repeat(1, img_size, img_size),
                              std_unsqueezed.repeat(1, img_size, img_size))
        assert_array_almost_equal(target, result1.numpy())
        assert_array_almost_equal(target, result2.numpy())

1108
1109
1110
1111
1112
1113
1114
    def test_adjust_brightness(self):
        x_shape = [2, 2, 3]
        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
        x_pil = Image.fromarray(x_np, mode='RGB')

        # test 0
1115
        y_pil = F.adjust_brightness(x_pil, 1)
1116
        y_np = np.array(y_pil)
1117
        self.assertTrue(np.allclose(y_np, x_np))
1118
1119

        # test 1
1120
        y_pil = F.adjust_brightness(x_pil, 0.5)
1121
1122
1123
        y_np = np.array(y_pil)
        y_ans = [0, 2, 6, 27, 67, 113, 18, 4, 117, 45, 127, 0]
        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
1124
        self.assertTrue(np.allclose(y_np, y_ans))
1125
1126

        # test 2
1127
        y_pil = F.adjust_brightness(x_pil, 2)
1128
1129
1130
        y_np = np.array(y_pil)
        y_ans = [0, 10, 26, 108, 255, 255, 74, 16, 255, 180, 255, 2]
        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
1131
        self.assertTrue(np.allclose(y_np, y_ans))
1132
1133
1134
1135
1136
1137
1138
1139

    def test_adjust_contrast(self):
        x_shape = [2, 2, 3]
        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
        x_pil = Image.fromarray(x_np, mode='RGB')

        # test 0
1140
        y_pil = F.adjust_contrast(x_pil, 1)
1141
        y_np = np.array(y_pil)
1142
        self.assertTrue(np.allclose(y_np, x_np))
1143
1144

        # test 1
1145
        y_pil = F.adjust_contrast(x_pil, 0.5)
1146
1147
1148
        y_np = np.array(y_pil)
        y_ans = [43, 45, 49, 70, 110, 156, 61, 47, 160, 88, 170, 43]
        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
1149
        self.assertTrue(np.allclose(y_np, y_ans))
1150
1151

        # test 2
1152
        y_pil = F.adjust_contrast(x_pil, 2)
1153
1154
1155
        y_np = np.array(y_pil)
        y_ans = [0, 0, 0, 22, 184, 255, 0, 0, 255, 94, 255, 0]
        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
1156
        self.assertTrue(np.allclose(y_np, y_ans))
1157

Francisco Massa's avatar
Francisco Massa committed
1158
    @unittest.skipIf(Image.__version__ >= '7', "Temporarily disabled")
1159
1160
1161
1162
1163
1164
1165
    def test_adjust_saturation(self):
        x_shape = [2, 2, 3]
        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
        x_pil = Image.fromarray(x_np, mode='RGB')

        # test 0
1166
        y_pil = F.adjust_saturation(x_pil, 1)
1167
        y_np = np.array(y_pil)
1168
        self.assertTrue(np.allclose(y_np, x_np))
1169
1170

        # test 1
1171
        y_pil = F.adjust_saturation(x_pil, 0.5)
1172
1173
1174
        y_np = np.array(y_pil)
        y_ans = [2, 4, 8, 87, 128, 173, 39, 25, 138, 133, 215, 88]
        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
1175
        self.assertTrue(np.allclose(y_np, y_ans))
1176
1177

        # test 2
1178
        y_pil = F.adjust_saturation(x_pil, 2)
1179
1180
1181
        y_np = np.array(y_pil)
        y_ans = [0, 6, 22, 0, 149, 255, 32, 0, 255, 4, 255, 0]
        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
1182
        self.assertTrue(np.allclose(y_np, y_ans))
1183
1184
1185
1186
1187
1188
1189
1190

    def test_adjust_hue(self):
        x_shape = [2, 2, 3]
        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
        x_pil = Image.fromarray(x_np, mode='RGB')

        with self.assertRaises(ValueError):
1191
1192
            F.adjust_hue(x_pil, -0.7)
            F.adjust_hue(x_pil, 1)
1193
1194
1195

        # test 0: almost same as x_data but not exact.
        # probably because hsv <-> rgb floating point ops
1196
        y_pil = F.adjust_hue(x_pil, 0)
1197
1198
1199
        y_np = np.array(y_pil)
        y_ans = [0, 5, 13, 54, 139, 226, 35, 8, 234, 91, 255, 1]
        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
1200
        self.assertTrue(np.allclose(y_np, y_ans))
1201
1202

        # test 1
1203
        y_pil = F.adjust_hue(x_pil, 0.25)
1204
1205
1206
        y_np = np.array(y_pil)
        y_ans = [13, 0, 12, 224, 54, 226, 234, 8, 99, 1, 222, 255]
        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
1207
        self.assertTrue(np.allclose(y_np, y_ans))
1208
1209

        # test 2
1210
        y_pil = F.adjust_hue(x_pil, -0.25)
1211
1212
1213
        y_np = np.array(y_pil)
        y_ans = [0, 13, 2, 54, 226, 58, 8, 234, 152, 255, 43, 1]
        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
1214
        self.assertTrue(np.allclose(y_np, y_ans))
1215
1216
1217
1218
1219
1220
1221
1222

    def test_adjust_gamma(self):
        x_shape = [2, 2, 3]
        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
        x_pil = Image.fromarray(x_np, mode='RGB')

        # test 0
1223
        y_pil = F.adjust_gamma(x_pil, 1)
1224
        y_np = np.array(y_pil)
1225
        self.assertTrue(np.allclose(y_np, x_np))
1226
1227

        # test 1
1228
        y_pil = F.adjust_gamma(x_pil, 0.5)
1229
        y_np = np.array(y_pil)
1230
        y_ans = [0, 35, 57, 117, 186, 241, 97, 45, 245, 152, 255, 16]
1231
        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
1232
        self.assertTrue(np.allclose(y_np, y_ans))
1233
1234

        # test 2
1235
        y_pil = F.adjust_gamma(x_pil, 2)
1236
        y_np = np.array(y_pil)
1237
        y_ans = [0, 0, 0, 11, 71, 201, 5, 0, 215, 31, 255, 0]
1238
        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
1239
        self.assertTrue(np.allclose(y_np, y_ans))
1240
1241
1242
1243
1244
1245
1246
1247

    def test_adjusts_L_mode(self):
        x_shape = [2, 2, 3]
        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
        x_rgb = Image.fromarray(x_np, mode='RGB')

        x_l = x_rgb.convert('L')
1248
1249
1250
1251
1252
        self.assertEqual(F.adjust_brightness(x_l, 2).mode, 'L')
        self.assertEqual(F.adjust_saturation(x_l, 2).mode, 'L')
        self.assertEqual(F.adjust_contrast(x_l, 2).mode, 'L')
        self.assertEqual(F.adjust_hue(x_l, 0.4).mode, 'L')
        self.assertEqual(F.adjust_gamma(x_l, 0.5).mode, 'L')
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264

    def test_color_jitter(self):
        color_jitter = transforms.ColorJitter(2, 2, 2, 0.1)

        x_shape = [2, 2, 3]
        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
        x_pil = Image.fromarray(x_np, mode='RGB')
        x_pil_2 = x_pil.convert('L')

        for i in range(10):
            y_pil = color_jitter(x_pil)
1265
            self.assertEqual(y_pil.mode, x_pil.mode)
1266
1267

            y_pil_2 = color_jitter(x_pil_2)
1268
            self.assertEqual(y_pil_2.mode, x_pil_2.mode)
1269

1270
1271
1272
        # Checking if ColorJitter can be printed as string
        color_jitter.__repr__()

1273
    def test_linear_transformation(self):
ekka's avatar
ekka committed
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
        num_samples = 1000
        x = torch.randn(num_samples, 3, 10, 10)
        flat_x = x.view(x.size(0), x.size(1) * x.size(2) * x.size(3))
        # compute principal components
        sigma = torch.mm(flat_x.t(), flat_x) / flat_x.size(0)
        u, s, _ = np.linalg.svd(sigma.numpy())
        zca_epsilon = 1e-10  # avoid division by 0
        d = torch.Tensor(np.diag(1. / np.sqrt(s + zca_epsilon)))
        u = torch.Tensor(u)
        principal_components = torch.mm(torch.mm(u, d), u.t())
        mean_vector = (torch.sum(flat_x, dim=0) / flat_x.size(0))
        # initialize whitening matrix
1286
        whitening = transforms.LinearTransformation(principal_components, mean_vector)
ekka's avatar
ekka committed
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
        # estimate covariance and mean using weak law of large number
        num_features = flat_x.size(1)
        cov = 0.0
        mean = 0.0
        for i in x:
            xwhite = whitening(i)
            xwhite = xwhite.view(1, -1).numpy()
            cov += np.dot(xwhite, xwhite.T) / num_features
            mean += np.sum(xwhite) / num_features
        # if rtol for std = 1e-3 then rtol for cov = 2e-3 as std**2 = cov
1297
1298
1299
1300
        self.assertTrue(np.allclose(cov / num_samples, np.identity(1), rtol=2e-3),
                        "cov not close to 1")
        self.assertTrue(np.allclose(mean / num_samples, 0, rtol=1e-3),
                        "mean not close to 0")
ekka's avatar
ekka committed
1301

1302
        # Checking if LinearTransformation can be printed as string
ekka's avatar
ekka committed
1303
1304
        whitening.__repr__()

1305
1306
1307
1308
    def test_rotate(self):
        x = np.zeros((100, 100, 3), dtype=np.uint8)
        x[40, 40] = [255, 255, 255]

vfdev's avatar
vfdev committed
1309
        with self.assertRaisesRegex(TypeError, r"img should be PIL Image"):
1310
1311
1312
1313
1314
            F.rotate(x, 10)

        img = F.to_pil_image(x)

        result = F.rotate(img, 45)
1315
        self.assertEqual(result.size, (100, 100))
1316
        r, c, ch = np.where(result)
1317
1318
1319
        self.assertTrue(all(x in r for x in [49, 50]))
        self.assertTrue(all(x in c for x in [36]))
        self.assertTrue(all(x in ch for x in [0, 1, 2]))
1320
1321

        result = F.rotate(img, 45, expand=True)
1322
        self.assertEqual(result.size, (142, 142))
1323
        r, c, ch = np.where(result)
1324
1325
1326
        self.assertTrue(all(x in r for x in [70, 71]))
        self.assertTrue(all(x in c for x in [57]))
        self.assertTrue(all(x in ch for x in [0, 1, 2]))
1327
1328

        result = F.rotate(img, 45, center=(40, 40))
1329
        self.assertEqual(result.size, (100, 100))
1330
        r, c, ch = np.where(result)
1331
1332
1333
        self.assertTrue(all(x in r for x in [40]))
        self.assertTrue(all(x in c for x in [40]))
        self.assertTrue(all(x in ch for x in [0, 1, 2]))
1334
1335
1336
1337

        result_a = F.rotate(img, 90)
        result_b = F.rotate(img, -270)

1338
        self.assertTrue(np.all(np.array(result_a) == np.array(result_b)))
1339

Philip Meier's avatar
Philip Meier committed
1340
1341
1342
    def test_rotate_fill(self):
        img = F.to_pil_image(np.ones((100, 100, 3), dtype=np.uint8) * 255, "RGB")

1343
        modes = ("L", "RGB", "F")
Philip Meier's avatar
Philip Meier committed
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
        nums_bands = [len(mode) for mode in modes]
        fill = 127

        for mode, num_bands in zip(modes, nums_bands):
            img_conv = img.convert(mode)
            img_rot = F.rotate(img_conv, 45.0, fill=fill)
            pixel = img_rot.getpixel((0, 0))

            if not isinstance(pixel, tuple):
                pixel = (pixel,)
            self.assertTupleEqual(pixel, tuple([fill] * num_bands))

            for wrong_num_bands in set(nums_bands) - {num_bands}:
                with self.assertRaises(ValueError):
                    F.rotate(img_conv, 45.0, fill=tuple([fill] * wrong_num_bands))

1360
    def test_affine(self):
Francisco Massa's avatar
Francisco Massa committed
1361
1362
1363
        input_img = np.zeros((40, 40, 3), dtype=np.uint8)
        cnt = [20, 20]
        for pt in [(16, 16), (20, 16), (20, 20)]:
1364
1365
1366
1367
            for i in range(-5, 5):
                for j in range(-5, 5):
                    input_img[pt[0] + i, pt[1] + j, :] = [255, 155, 55]

vfdev's avatar
vfdev committed
1368
1369
        with self.assertRaises(TypeError, msg="Argument translate should be a sequence"):
            F.affine(input_img, 10, translate=0, scale=1, shear=1)
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380

        pil_img = F.to_pil_image(input_img)

        def _to_3x3_inv(inv_result_matrix):
            result_matrix = np.zeros((3, 3))
            result_matrix[:2, :] = np.array(inv_result_matrix).reshape((2, 3))
            result_matrix[2, 2] = 1
            return np.linalg.inv(result_matrix)

        def _test_transformation(a, t, s, sh):
            a_rad = math.radians(a)
ptrblck's avatar
ptrblck committed
1381
            s_rad = [math.radians(sh_) for sh_ in sh]
1382
1383
1384
1385
1386
            cx, cy = cnt
            tx, ty = t
            sx, sy = s_rad
            rot = a_rad

1387
            # 1) Check transformation matrix:
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
            C = np.array([[1, 0, cx],
                          [0, 1, cy],
                          [0, 0, 1]])
            T = np.array([[1, 0, tx],
                          [0, 1, ty],
                          [0, 0, 1]])
            Cinv = np.linalg.inv(C)

            RS = np.array(
                [[s * math.cos(rot), -s * math.sin(rot), 0],
                 [s * math.sin(rot), s * math.cos(rot), 0],
                 [0, 0, 1]])

            SHx = np.array([[1, -math.tan(sx), 0],
                            [0, 1, 0],
                            [0, 0, 1]])

            SHy = np.array([[1, 0, 0],
                            [-math.tan(sy), 1, 0],
                            [0, 0, 1]])

            RSS = np.matmul(RS, np.matmul(SHy, SHx))

            true_matrix = np.matmul(T, np.matmul(C, np.matmul(RSS, Cinv)))

1413
1414
            result_matrix = _to_3x3_inv(F._get_inverse_affine_matrix(center=cnt, angle=a,
                                                                     translate=t, scale=s, shear=sh))
1415
            self.assertLess(np.sum(np.abs(true_matrix - result_matrix)), 1e-10)
1416
            # 2) Perform inverse mapping:
Francisco Massa's avatar
Francisco Massa committed
1417
            true_result = np.zeros((40, 40, 3), dtype=np.uint8)
1418
1419
1420
            inv_true_matrix = np.linalg.inv(true_matrix)
            for y in range(true_result.shape[0]):
                for x in range(true_result.shape[1]):
1421
1422
1423
1424
1425
1426
                    # Same as for PIL:
                    # https://github.com/python-pillow/Pillow/blob/71f8ec6a0cfc1008076a023c0756542539d057ab/
                    # src/libImaging/Geometry.c#L1060
                    input_pt = np.array([x + 0.5, y + 0.5, 1.0])
                    res = np.floor(np.dot(inv_true_matrix, input_pt)).astype(np.int)
                    _x, _y = res[:2]
1427
1428
1429
1430
                    if 0 <= _x < input_img.shape[1] and 0 <= _y < input_img.shape[0]:
                        true_result[y, x, :] = input_img[_y, _x, :]

            result = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh)
1431
            self.assertEqual(result.size, pil_img.size)
1432
1433
1434
1435
            # Compute number of different pixels:
            np_result = np.array(result)
            n_diff_pixels = np.sum(np_result != true_result) / 3
            # Accept 3 wrong pixels
1436
1437
1438
            self.assertLess(n_diff_pixels, 3,
                            "a={}, t={}, s={}, sh={}\n".format(a, t, s, sh) +
                            "n diff pixels={}\n".format(np.sum(np.array(result)[:, :, 0] != true_result[:, :, 0])))
1439
1440
1441

        # Test rotation
        a = 45
ptrblck's avatar
ptrblck committed
1442
        _test_transformation(a=a, t=(0, 0), s=1.0, sh=(0.0, 0.0))
1443
1444
1445

        # Test translation
        t = [10, 15]
ptrblck's avatar
ptrblck committed
1446
        _test_transformation(a=0.0, t=t, s=1.0, sh=(0.0, 0.0))
1447
1448
1449

        # Test scale
        s = 1.2
ptrblck's avatar
ptrblck committed
1450
        _test_transformation(a=0.0, t=(0.0, 0.0), s=s, sh=(0.0, 0.0))
1451
1452

        # Test shear
ptrblck's avatar
ptrblck committed
1453
        sh = [45.0, 25.0]
1454
1455
1456
1457
1458
        _test_transformation(a=0.0, t=(0.0, 0.0), s=1.0, sh=sh)

        # Test rotation, scale, translation, shear
        for a in range(-90, 90, 25):
            for t1 in range(-10, 10, 5):
1459
                for s in [0.75, 0.98, 1.0, 1.2, 1.4]:
1460
                    for sh in range(-15, 15, 5):
ptrblck's avatar
ptrblck committed
1461
                        _test_transformation(a=a, t=(t1, t1), s=s, sh=(sh, sh))
1462

1463
1464
1465
1466
1467
1468
1469
1470
1471
    def test_random_rotation(self):

        with self.assertRaises(ValueError):
            transforms.RandomRotation(-0.7)
            transforms.RandomRotation([-0.7])
            transforms.RandomRotation([-0.7, 0, 0.7])

        t = transforms.RandomRotation(10)
        angle = t.get_params(t.degrees)
1472
        self.assertTrue(angle > -10 and angle < 10)
1473
1474
1475

        t = transforms.RandomRotation((-10, 10))
        angle = t.get_params(t.degrees)
1476
        self.assertTrue(angle > -10 and angle < 10)
1477

1478
1479
1480
        # Checking if RandomRotation can be printed as string
        t.__repr__()

1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
    def test_random_affine(self):

        with self.assertRaises(ValueError):
            transforms.RandomAffine(-0.7)
            transforms.RandomAffine([-0.7])
            transforms.RandomAffine([-0.7, 0, 0.7])

            transforms.RandomAffine([-90, 90], translate=2.0)
            transforms.RandomAffine([-90, 90], translate=[-1.0, 1.0])
            transforms.RandomAffine([-90, 90], translate=[-1.0, 0.0, 1.0])

            transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.0])
            transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[-1.0, 1.0])
            transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, -0.5])
            transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 3.0, -0.5])

            transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=-7)
            transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[-10])
            transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[-10, 0, 10])
ptrblck's avatar
ptrblck committed
1500
            transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[-10, 0, 10, 0, 10])
1501
1502
1503
1504

        x = np.zeros((100, 100, 3), dtype=np.uint8)
        img = F.to_pil_image(x)

ptrblck's avatar
ptrblck committed
1505
        t = transforms.RandomAffine(10, translate=[0.5, 0.3], scale=[0.7, 1.3], shear=[-10, 10, 20, 40])
1506
1507
1508
        for _ in range(100):
            angle, translations, scale, shear = t.get_params(t.degrees, t.translate, t.scale, t.shear,
                                                             img_size=img.size)
1509
1510
1511
1512
1513
1514
1515
1516
            self.assertTrue(-10 < angle < 10)
            self.assertTrue(-img.size[0] * 0.5 <= translations[0] <= img.size[0] * 0.5,
                            "{} vs {}".format(translations[0], img.size[0] * 0.5))
            self.assertTrue(-img.size[1] * 0.5 <= translations[1] <= img.size[1] * 0.5,
                            "{} vs {}".format(translations[1], img.size[1] * 0.5))
            self.assertTrue(0.7 < scale < 1.3)
            self.assertTrue(-10 < shear[0] < 10)
            self.assertTrue(-20 < shear[1] < 40)
1517
1518
1519
1520
1521

        # Checking if RandomAffine can be printed as string
        t.__repr__()

        t = transforms.RandomAffine(10, resample=Image.BILINEAR)
1522
        self.assertIn("Image.BILINEAR", t.__repr__())
1523

1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
    def test_to_grayscale(self):
        """Unit tests for grayscale transform"""

        x_shape = [2, 2, 3]
        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
        x_pil = Image.fromarray(x_np, mode='RGB')
        x_pil_2 = x_pil.convert('L')
        gray_np = np.array(x_pil_2)

        # Test Set: Grayscale an image with desired number of output channels
        # Case 1: RGB -> 1 channel grayscale
        trans1 = transforms.Grayscale(num_output_channels=1)
        gray_pil_1 = trans1(x_pil)
        gray_np_1 = np.array(gray_pil_1)
1539
1540
        self.assertEqual(gray_pil_1.mode, 'L', 'mode should be L')
        self.assertEqual(gray_np_1.shape, tuple(x_shape[0:2]), 'should be 1 channel')
1541
1542
1543
1544
1545
1546
        np.testing.assert_equal(gray_np, gray_np_1)

        # Case 2: RGB -> 3 channel grayscale
        trans2 = transforms.Grayscale(num_output_channels=3)
        gray_pil_2 = trans2(x_pil)
        gray_np_2 = np.array(gray_pil_2)
1547
1548
        self.assertEqual(gray_pil_2.mode, 'RGB', 'mode should be RGB')
        self.assertEqual(gray_np_2.shape, tuple(x_shape), 'should be 3 channel')
1549
1550
1551
1552
1553
1554
1555
1556
        np.testing.assert_equal(gray_np_2[:, :, 0], gray_np_2[:, :, 1])
        np.testing.assert_equal(gray_np_2[:, :, 1], gray_np_2[:, :, 2])
        np.testing.assert_equal(gray_np, gray_np_2[:, :, 0])

        # Case 3: 1 channel grayscale -> 1 channel grayscale
        trans3 = transforms.Grayscale(num_output_channels=1)
        gray_pil_3 = trans3(x_pil_2)
        gray_np_3 = np.array(gray_pil_3)
1557
1558
        self.assertEqual(gray_pil_3.mode, 'L', 'mode should be L')
        self.assertEqual(gray_np_3.shape, tuple(x_shape[0:2]), 'should be 1 channel')
1559
1560
1561
1562
1563
1564
        np.testing.assert_equal(gray_np, gray_np_3)

        # Case 4: 1 channel grayscale -> 3 channel grayscale
        trans4 = transforms.Grayscale(num_output_channels=3)
        gray_pil_4 = trans4(x_pil_2)
        gray_np_4 = np.array(gray_pil_4)
1565
1566
        self.assertEqual(gray_pil_4.mode, 'RGB', 'mode should be RGB')
        self.assertEqual(gray_np_4.shape, tuple(x_shape), 'should be 3 channel')
1567
1568
1569
1570
        np.testing.assert_equal(gray_np_4[:, :, 0], gray_np_4[:, :, 1])
        np.testing.assert_equal(gray_np_4[:, :, 1], gray_np_4[:, :, 2])
        np.testing.assert_equal(gray_np, gray_np_4[:, :, 0])

1571
1572
1573
        # Checking if Grayscale can be printed as string
        trans4.__repr__()

1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
    @unittest.skipIf(stats is None, 'scipy.stats not available')
    def test_random_grayscale(self):
        """Unit tests for random grayscale transform"""

        # Test Set 1: RGB -> 3 channel grayscale
        random_state = random.getstate()
        random.seed(42)
        x_shape = [2, 2, 3]
        x_np = np.random.randint(0, 256, x_shape, np.uint8)
        x_pil = Image.fromarray(x_np, mode='RGB')
        x_pil_2 = x_pil.convert('L')
        gray_np = np.array(x_pil_2)

        num_samples = 250
        num_gray = 0
        for _ in range(num_samples):
            gray_pil_2 = transforms.RandomGrayscale(p=0.5)(x_pil)
            gray_np_2 = np.array(gray_pil_2)
            if np.array_equal(gray_np_2[:, :, 0], gray_np_2[:, :, 1]) and \
1593
1594
                    np.array_equal(gray_np_2[:, :, 1], gray_np_2[:, :, 2]) and \
                    np.array_equal(gray_np, gray_np_2[:, :, 0]):
1595
1596
1597
1598
                num_gray = num_gray + 1

        p_value = stats.binom_test(num_gray, num_samples, p=0.5)
        random.setstate(random_state)
1599
        self.assertGreater(p_value, 0.0001)
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619

        # Test Set 2: grayscale -> 1 channel grayscale
        random_state = random.getstate()
        random.seed(42)
        x_shape = [2, 2, 3]
        x_np = np.random.randint(0, 256, x_shape, np.uint8)
        x_pil = Image.fromarray(x_np, mode='RGB')
        x_pil_2 = x_pil.convert('L')
        gray_np = np.array(x_pil_2)

        num_samples = 250
        num_gray = 0
        for _ in range(num_samples):
            gray_pil_3 = transforms.RandomGrayscale(p=0.5)(x_pil_2)
            gray_np_3 = np.array(gray_pil_3)
            if np.array_equal(gray_np, gray_np_3):
                num_gray = num_gray + 1

        p_value = stats.binom_test(num_gray, num_samples, p=1.0)  # Note: grayscale is always unchanged
        random.setstate(random_state)
1620
        self.assertGreater(p_value, 0.0001)
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633

        # Test set 3: Explicit tests
        x_shape = [2, 2, 3]
        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
        x_pil = Image.fromarray(x_np, mode='RGB')
        x_pil_2 = x_pil.convert('L')
        gray_np = np.array(x_pil_2)

        # Case 3a: RGB -> 3 channel grayscale (grayscaled)
        trans2 = transforms.RandomGrayscale(p=1.0)
        gray_pil_2 = trans2(x_pil)
        gray_np_2 = np.array(gray_pil_2)
1634
1635
        self.assertEqual(gray_pil_2.mode, 'RGB', 'mode should be RGB')
        self.assertEqual(gray_np_2.shape, tuple(x_shape), 'should be 3 channel')
1636
1637
1638
1639
1640
1641
1642
1643
        np.testing.assert_equal(gray_np_2[:, :, 0], gray_np_2[:, :, 1])
        np.testing.assert_equal(gray_np_2[:, :, 1], gray_np_2[:, :, 2])
        np.testing.assert_equal(gray_np, gray_np_2[:, :, 0])

        # Case 3b: RGB -> 3 channel grayscale (unchanged)
        trans2 = transforms.RandomGrayscale(p=0.0)
        gray_pil_2 = trans2(x_pil)
        gray_np_2 = np.array(gray_pil_2)
1644
1645
        self.assertEqual(gray_pil_2.mode, 'RGB', 'mode should be RGB')
        self.assertEqual(gray_np_2.shape, tuple(x_shape), 'should be 3 channel')
1646
1647
1648
1649
1650
1651
        np.testing.assert_equal(x_np, gray_np_2)

        # Case 3c: 1 channel grayscale -> 1 channel grayscale (grayscaled)
        trans3 = transforms.RandomGrayscale(p=1.0)
        gray_pil_3 = trans3(x_pil_2)
        gray_np_3 = np.array(gray_pil_3)
1652
1653
        self.assertEqual(gray_pil_3.mode, 'L', 'mode should be L')
        self.assertEqual(gray_np_3.shape, tuple(x_shape[0:2]), 'should be 1 channel')
1654
1655
1656
1657
1658
1659
        np.testing.assert_equal(gray_np, gray_np_3)

        # Case 3d: 1 channel grayscale -> 1 channel grayscale (unchanged)
        trans3 = transforms.RandomGrayscale(p=0.0)
        gray_pil_3 = trans3(x_pil_2)
        gray_np_3 = np.array(gray_pil_3)
1660
1661
        self.assertEqual(gray_pil_3.mode, 'L', 'mode should be L')
        self.assertEqual(gray_np_3.shape, tuple(x_shape[0:2]), 'should be 1 channel')
1662
1663
        np.testing.assert_equal(gray_np, gray_np_3)

1664
1665
1666
        # Checking if RandomGrayscale can be printed as string
        trans3.__repr__()

1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
    def test_gaussian_blur_asserts(self):
        np_img = np.ones((100, 100, 3), dtype=np.uint8) * 255
        img = F.to_pil_image(np_img, "RGB")

        with self.assertRaisesRegex(ValueError, r"If kernel_size is a sequence its length should be 2"):
            F.gaussian_blur(img, [3])

        with self.assertRaisesRegex(ValueError, r"If kernel_size is a sequence its length should be 2"):
            F.gaussian_blur(img, [3, 3, 3])
        with self.assertRaisesRegex(ValueError, r"Kernel size should be a tuple/list of two integers"):
            transforms.GaussianBlur([3, 3, 3])

        with self.assertRaisesRegex(ValueError, r"kernel_size should have odd and positive integers"):
            F.gaussian_blur(img, [4, 4])
        with self.assertRaisesRegex(ValueError, r"Kernel size value should be an odd and positive number"):
            transforms.GaussianBlur([4, 4])

        with self.assertRaisesRegex(ValueError, r"kernel_size should have odd and positive integers"):
            F.gaussian_blur(img, [-3, -3])
        with self.assertRaisesRegex(ValueError, r"Kernel size value should be an odd and positive number"):
            transforms.GaussianBlur([-3, -3])

        with self.assertRaisesRegex(ValueError, r"If sigma is a sequence, its length should be 2"):
            F.gaussian_blur(img, 3, [1, 1, 1])
        with self.assertRaisesRegex(ValueError, r"sigma should be a single number or a list/tuple with length 2"):
            transforms.GaussianBlur(3, [1, 1, 1])

        with self.assertRaisesRegex(ValueError, r"sigma should have positive values"):
            F.gaussian_blur(img, 3, -1.0)
        with self.assertRaisesRegex(ValueError, r"If sigma is a single number, it must be positive"):
            transforms.GaussianBlur(3, -1.0)

        with self.assertRaisesRegex(TypeError, r"kernel_size should be int or a sequence of integers"):
            F.gaussian_blur(img, "kernel_size_string")
        with self.assertRaisesRegex(ValueError, r"Kernel size should be a tuple/list of two integers"):
            transforms.GaussianBlur("kernel_size_string")

        with self.assertRaisesRegex(TypeError, r"sigma should be either float or sequence of floats"):
            F.gaussian_blur(img, 3, "sigma_string")
        with self.assertRaisesRegex(ValueError, r"sigma should be a single number or a list/tuple with length 2"):
            transforms.GaussianBlur(3, "sigma_string")

1709

1710
1711
if __name__ == '__main__':
    unittest.main()