Commit 0fc002df authored by huchen's avatar huchen
Browse files

init the dlexamples new

parent 0e04b692
import sys
import os
import unittest
from unittest import mock
import numpy as np
import PIL
from PIL import Image
from torch._utils_internal import get_file_path_2
import torchvision
from common_utils import get_tmp_dir
from fakedata_generation import mnist_root, cifar_root, imagenet_root, \
cityscapes_root, svhn_root, voc_root, ucf101_root, places365_root
import xml.etree.ElementTree as ET
from urllib.request import Request, urlopen
import itertools
try:
import scipy
HAS_SCIPY = True
except ImportError:
HAS_SCIPY = False
try:
import av
HAS_PYAV = True
except ImportError:
HAS_PYAV = False
class Tester(unittest.TestCase):
def generic_classification_dataset_test(self, dataset, num_images=1):
self.assertEqual(len(dataset), num_images)
img, target = dataset[0]
self.assertTrue(isinstance(img, PIL.Image.Image))
self.assertTrue(isinstance(target, int))
def generic_segmentation_dataset_test(self, dataset, num_images=1):
self.assertEqual(len(dataset), num_images)
img, target = dataset[0]
self.assertTrue(isinstance(img, PIL.Image.Image))
self.assertTrue(isinstance(target, PIL.Image.Image))
def test_imagefolder(self):
# TODO: create the fake data on-the-fly
FAKEDATA_DIR = get_file_path_2(
os.path.dirname(os.path.abspath(__file__)), 'assets', 'fakedata')
with get_tmp_dir(src=os.path.join(FAKEDATA_DIR, 'imagefolder')) as root:
classes = sorted(['a', 'b'])
class_a_image_files = [
os.path.join(root, 'a', file) for file in ('a1.png', 'a2.png', 'a3.png')
]
class_b_image_files = [
os.path.join(root, 'b', file) for file in ('b1.png', 'b2.png', 'b3.png', 'b4.png')
]
dataset = torchvision.datasets.ImageFolder(root, loader=lambda x: x)
# test if all classes are present
self.assertEqual(classes, sorted(dataset.classes))
# test if combination of classes and class_to_index functions correctly
for cls in classes:
self.assertEqual(cls, dataset.classes[dataset.class_to_idx[cls]])
# test if all images were detected correctly
class_a_idx = dataset.class_to_idx['a']
class_b_idx = dataset.class_to_idx['b']
imgs_a = [(img_file, class_a_idx) for img_file in class_a_image_files]
imgs_b = [(img_file, class_b_idx) for img_file in class_b_image_files]
imgs = sorted(imgs_a + imgs_b)
self.assertEqual(imgs, dataset.imgs)
# test if the datasets outputs all images correctly
outputs = sorted([dataset[i] for i in range(len(dataset))])
self.assertEqual(imgs, outputs)
# redo all tests with specified valid image files
dataset = torchvision.datasets.ImageFolder(
root, loader=lambda x: x, is_valid_file=lambda x: '3' in x)
self.assertEqual(classes, sorted(dataset.classes))
class_a_idx = dataset.class_to_idx['a']
class_b_idx = dataset.class_to_idx['b']
imgs_a = [(img_file, class_a_idx) for img_file in class_a_image_files
if '3' in img_file]
imgs_b = [(img_file, class_b_idx) for img_file in class_b_image_files
if '3' in img_file]
imgs = sorted(imgs_a + imgs_b)
self.assertEqual(imgs, dataset.imgs)
outputs = sorted([dataset[i] for i in range(len(dataset))])
self.assertEqual(imgs, outputs)
def test_imagefolder_empty(self):
with get_tmp_dir() as root:
with self.assertRaises(RuntimeError):
torchvision.datasets.ImageFolder(root, loader=lambda x: x)
with self.assertRaises(RuntimeError):
torchvision.datasets.ImageFolder(
root, loader=lambda x: x, is_valid_file=lambda x: False
)
@mock.patch('torchvision.datasets.mnist.download_and_extract_archive')
def test_mnist(self, mock_download_extract):
num_examples = 30
with mnist_root(num_examples, "MNIST") as root:
dataset = torchvision.datasets.MNIST(root, download=True)
self.generic_classification_dataset_test(dataset, num_images=num_examples)
img, target = dataset[0]
self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target)
@mock.patch('torchvision.datasets.mnist.download_and_extract_archive')
def test_kmnist(self, mock_download_extract):
num_examples = 30
with mnist_root(num_examples, "KMNIST") as root:
dataset = torchvision.datasets.KMNIST(root, download=True)
self.generic_classification_dataset_test(dataset, num_images=num_examples)
img, target = dataset[0]
self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target)
@mock.patch('torchvision.datasets.mnist.download_and_extract_archive')
def test_fashionmnist(self, mock_download_extract):
num_examples = 30
with mnist_root(num_examples, "FashionMNIST") as root:
dataset = torchvision.datasets.FashionMNIST(root, download=True)
self.generic_classification_dataset_test(dataset, num_images=num_examples)
img, target = dataset[0]
self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target)
@mock.patch('torchvision.datasets.imagenet._verify_archive')
@unittest.skipIf(not HAS_SCIPY, "scipy unavailable")
def test_imagenet(self, mock_verify):
with imagenet_root() as root:
dataset = torchvision.datasets.ImageNet(root, split='train')
self.generic_classification_dataset_test(dataset)
dataset = torchvision.datasets.ImageNet(root, split='val')
self.generic_classification_dataset_test(dataset)
@mock.patch('torchvision.datasets.cifar.check_integrity')
@mock.patch('torchvision.datasets.cifar.CIFAR10._check_integrity')
def test_cifar10(self, mock_ext_check, mock_int_check):
mock_ext_check.return_value = True
mock_int_check.return_value = True
with cifar_root('CIFAR10') as root:
dataset = torchvision.datasets.CIFAR10(root, train=True, download=True)
self.generic_classification_dataset_test(dataset, num_images=5)
img, target = dataset[0]
self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target)
dataset = torchvision.datasets.CIFAR10(root, train=False, download=True)
self.generic_classification_dataset_test(dataset)
img, target = dataset[0]
self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target)
@mock.patch('torchvision.datasets.cifar.check_integrity')
@mock.patch('torchvision.datasets.cifar.CIFAR10._check_integrity')
def test_cifar100(self, mock_ext_check, mock_int_check):
mock_ext_check.return_value = True
mock_int_check.return_value = True
with cifar_root('CIFAR100') as root:
dataset = torchvision.datasets.CIFAR100(root, train=True, download=True)
self.generic_classification_dataset_test(dataset)
img, target = dataset[0]
self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target)
dataset = torchvision.datasets.CIFAR100(root, train=False, download=True)
self.generic_classification_dataset_test(dataset)
img, target = dataset[0]
self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target)
@unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows')
def test_cityscapes(self):
with cityscapes_root() as root:
for mode in ['coarse', 'fine']:
if mode == 'coarse':
splits = ['train', 'train_extra', 'val']
else:
splits = ['train', 'val', 'test']
for split in splits:
for target_type in ['semantic', 'instance']:
dataset = torchvision.datasets.Cityscapes(
root, split=split, target_type=target_type, mode=mode)
self.generic_segmentation_dataset_test(dataset, num_images=2)
color_dataset = torchvision.datasets.Cityscapes(
root, split=split, target_type='color', mode=mode)
color_img, color_target = color_dataset[0]
self.assertTrue(isinstance(color_img, PIL.Image.Image))
self.assertTrue(np.array(color_target).shape[2] == 4)
polygon_dataset = torchvision.datasets.Cityscapes(
root, split=split, target_type='polygon', mode=mode)
polygon_img, polygon_target = polygon_dataset[0]
self.assertTrue(isinstance(polygon_img, PIL.Image.Image))
self.assertTrue(isinstance(polygon_target, dict))
self.assertTrue(isinstance(polygon_target['imgHeight'], int))
self.assertTrue(isinstance(polygon_target['objects'], list))
# Test multiple target types
targets_combo = ['semantic', 'polygon', 'color']
multiple_types_dataset = torchvision.datasets.Cityscapes(
root, split=split, target_type=targets_combo, mode=mode)
output = multiple_types_dataset[0]
self.assertTrue(isinstance(output, tuple))
self.assertTrue(len(output) == 2)
self.assertTrue(isinstance(output[0], PIL.Image.Image))
self.assertTrue(isinstance(output[1], tuple))
self.assertTrue(len(output[1]) == 3)
self.assertTrue(isinstance(output[1][0], PIL.Image.Image)) # semantic
self.assertTrue(isinstance(output[1][1], dict)) # polygon
self.assertTrue(isinstance(output[1][2], PIL.Image.Image)) # color
@mock.patch('torchvision.datasets.SVHN._check_integrity')
@unittest.skipIf(not HAS_SCIPY, "scipy unavailable")
def test_svhn(self, mock_check):
mock_check.return_value = True
with svhn_root() as root:
dataset = torchvision.datasets.SVHN(root, split="train")
self.generic_classification_dataset_test(dataset, num_images=2)
dataset = torchvision.datasets.SVHN(root, split="test")
self.generic_classification_dataset_test(dataset, num_images=2)
dataset = torchvision.datasets.SVHN(root, split="extra")
self.generic_classification_dataset_test(dataset, num_images=2)
@mock.patch('torchvision.datasets.voc.download_extract')
def test_voc_parse_xml(self, mock_download_extract):
with voc_root() as root:
dataset = torchvision.datasets.VOCDetection(root)
single_object_xml = """<annotation>
<object>
<name>cat</name>
</object>
</annotation>"""
multiple_object_xml = """<annotation>
<object>
<name>cat</name>
</object>
<object>
<name>dog</name>
</object>
</annotation>"""
single_object_parsed = dataset.parse_voc_xml(ET.fromstring(single_object_xml))
multiple_object_parsed = dataset.parse_voc_xml(ET.fromstring(multiple_object_xml))
self.assertEqual(single_object_parsed, {'annotation': {'object': [{'name': 'cat'}]}})
self.assertEqual(multiple_object_parsed,
{'annotation': {
'object': [{
'name': 'cat'
}, {
'name': 'dog'
}]
}})
@unittest.skipIf(not HAS_PYAV, "PyAV unavailable")
def test_ucf101(self):
with ucf101_root() as (root, ann_root):
for split in {True, False}:
for fold in range(1, 4):
for length in {10, 15, 20}:
dataset = torchvision.datasets.UCF101(
root, ann_root, length, fold=fold, train=split)
self.assertGreater(len(dataset), 0)
video, audio, label = dataset[0]
self.assertEqual(video.size(), (length, 320, 240, 3))
self.assertEqual(audio.numel(), 0)
self.assertEqual(label, 0)
video, audio, label = dataset[len(dataset) - 1]
self.assertEqual(video.size(), (length, 320, 240, 3))
self.assertEqual(audio.numel(), 0)
self.assertEqual(label, 1)
def test_places365(self):
for split, small in itertools.product(("train-standard", "train-challenge", "val"), (False, True)):
with places365_root(split=split, small=small) as places365:
root, data = places365
dataset = torchvision.datasets.Places365(root, split=split, small=small, download=True)
self.generic_classification_dataset_test(dataset, num_images=len(data["imgs"]))
def test_places365_transforms(self):
expected_image = "image"
expected_target = "target"
def transform(image):
return expected_image
def target_transform(target):
return expected_target
with places365_root() as places365:
root, data = places365
dataset = torchvision.datasets.Places365(
root, transform=transform, target_transform=target_transform, download=True
)
actual_image, actual_target = dataset[0]
self.assertEqual(actual_image, expected_image)
self.assertEqual(actual_target, expected_target)
def test_places365_devkit_download(self):
for split in ("train-standard", "train-challenge", "val"):
with self.subTest(split=split):
with places365_root(split=split) as places365:
root, data = places365
dataset = torchvision.datasets.Places365(root, split=split, download=True)
with self.subTest("classes"):
self.assertSequenceEqual(dataset.classes, data["classes"])
with self.subTest("class_to_idx"):
self.assertDictEqual(dataset.class_to_idx, data["class_to_idx"])
with self.subTest("imgs"):
self.assertSequenceEqual(dataset.imgs, data["imgs"])
def test_places365_devkit_no_download(self):
for split in ("train-standard", "train-challenge", "val"):
with self.subTest(split=split):
with places365_root(split=split, extract_images=False) as places365:
root, data = places365
with self.assertRaises(RuntimeError):
torchvision.datasets.Places365(root, split=split, download=False)
def test_places365_images_download(self):
for split, small in itertools.product(("train-standard", "train-challenge", "val"), (False, True)):
with self.subTest(split=split, small=small):
with places365_root(split=split, small=small) as places365:
root, data = places365
dataset = torchvision.datasets.Places365(root, split=split, small=small, download=True)
assert all(os.path.exists(item[0]) for item in dataset.imgs)
def test_places365_images_download_preexisting(self):
split = "train-standard"
small = False
images_dir = "data_large_standard"
with places365_root(split=split, small=small) as places365:
root, data = places365
os.mkdir(os.path.join(root, images_dir))
with self.assertRaises(RuntimeError):
torchvision.datasets.Places365(root, split=split, small=small, download=True)
def test_places365_repr_smoke(self):
with places365_root(extract_images=False) as places365:
root, data = places365
dataset = torchvision.datasets.Places365(root, download=True)
self.assertIsInstance(repr(dataset), str)
if __name__ == '__main__':
unittest.main()
import contextlib
import itertools
import time
import unittest.mock
from datetime import datetime
from os import path
from urllib.error import HTTPError
from urllib.parse import urlparse
from urllib.request import urlopen, Request
import pytest
from torchvision import datasets
from torchvision.datasets.utils import download_url, check_integrity
from common_utils import get_tmp_dir
from fakedata_generation import places365_root
def limit_requests_per_time(min_secs_between_requests=2.0):
last_requests = {}
def outer_wrapper(fn):
def inner_wrapper(request, *args, **kwargs):
url = request.full_url if isinstance(request, Request) else request
netloc = urlparse(url).netloc
last_request = last_requests.get(netloc)
if last_request is not None:
elapsed_secs = (datetime.now() - last_request).total_seconds()
delta = min_secs_between_requests - elapsed_secs
if delta > 0:
time.sleep(delta)
response = fn(request, *args, **kwargs)
last_requests[netloc] = datetime.now()
return response
return inner_wrapper
return outer_wrapper
urlopen = limit_requests_per_time()(urlopen)
@contextlib.contextmanager
def log_download_attempts(
urls_and_md5s=None,
patch=True,
download_url_target="torchvision.datasets.utils.download_url",
patch_auxiliaries=None,
):
if urls_and_md5s is None:
urls_and_md5s = set()
if patch_auxiliaries is None:
patch_auxiliaries = patch
with contextlib.ExitStack() as stack:
download_url_mock = stack.enter_context(
unittest.mock.patch(download_url_target, wraps=None if patch else download_url)
)
if patch_auxiliaries:
# download_and_extract_archive
stack.enter_context(unittest.mock.patch("torchvision.datasets.utils.extract_archive"))
try:
yield urls_and_md5s
finally:
for args, kwargs in download_url_mock.call_args_list:
url = args[0]
md5 = args[-1] if len(args) == 4 else kwargs.get("md5")
urls_and_md5s.add((url, md5))
def retry(fn, times=1, wait=5.0):
msgs = []
for _ in range(times + 1):
try:
return fn()
except AssertionError as error:
msgs.append(str(error))
time.sleep(wait)
else:
raise AssertionError(
"\n".join(
(
f"Assertion failed {times + 1} times with {wait:.1f} seconds intermediate wait time.\n",
*(f"{idx}: {error}" for idx, error in enumerate(msgs, 1)),
)
)
)
@contextlib.contextmanager
def assert_server_response_ok():
try:
yield
except HTTPError as error:
raise AssertionError(f"The server returned {error.code}: {error.reason}.") from error
def assert_url_is_accessible(url):
request = Request(url, headers=dict(method="HEAD"))
with assert_server_response_ok():
urlopen(request)
def assert_file_downloads_correctly(url, md5):
with get_tmp_dir() as root:
file = path.join(root, path.basename(url))
with assert_server_response_ok():
with urlopen(url) as response, open(file, "wb") as fh:
fh.write(response.read())
assert check_integrity(file, md5=md5), "The MD5 checksums mismatch"
class DownloadConfig:
def __init__(self, url, md5=None, id=None):
self.url = url
self.md5 = md5
self.id = id or url
def __repr__(self):
return self.id
def make_download_configs(urls_and_md5s, name=None):
return [
DownloadConfig(url, md5=md5, id=f"{name}, {url}" if name is not None else None) for url, md5 in urls_and_md5s
]
def collect_download_configs(dataset_loader, name, **kwargs):
with contextlib.suppress(Exception), log_download_attempts(**kwargs) as urls_and_md5s:
dataset_loader()
return make_download_configs(urls_and_md5s, name)
def places365():
with log_download_attempts(patch=False) as urls_and_md5s:
for split, small in itertools.product(("train-standard", "train-challenge", "val"), (False, True)):
with places365_root(split=split, small=small) as places365:
root, data = places365
datasets.Places365(root, split=split, small=small, download=True)
return make_download_configs(urls_and_md5s, "Places365")
def caltech101():
return collect_download_configs(lambda: datasets.Caltech101(".", download=True), "Caltech101")
def caltech256():
return collect_download_configs(lambda: datasets.Caltech256(".", download=True), "Caltech256")
def cifar10():
return collect_download_configs(lambda: datasets.CIFAR10(".", download=True), "CIFAR10")
def cifar100():
return collect_download_configs(lambda: datasets.CIFAR10(".", download=True), "CIFAR100")
def voc():
download_configs = []
for year in ("2007", "2007-test", "2008", "2009", "2010", "2011", "2012"):
with contextlib.suppress(Exception), log_download_attempts(
download_url_target="torchvision.datasets.voc.download_url"
) as urls_and_md5s:
datasets.VOCSegmentation(".", year=year, download=True)
download_configs.extend(make_download_configs(urls_and_md5s, f"VOC, {year}"))
return download_configs
def make_parametrize_kwargs(download_configs):
argvalues = []
ids = []
for config in download_configs:
argvalues.append((config.url, config.md5))
ids.append(config.id)
return dict(argnames=("url", "md5"), argvalues=argvalues, ids=ids)
@pytest.mark.parametrize(
**make_parametrize_kwargs(
itertools.chain(
places365(),
caltech101(),
caltech256(),
cifar10(),
cifar100(),
voc(),
)
)
)
def test_url_is_accessible(url, md5):
retry(lambda: assert_url_is_accessible(url))
@pytest.mark.parametrize(**make_parametrize_kwargs(itertools.chain()))
def test_file_downloads_correctly(url, md5):
retry(lambda: assert_file_downloads_correctly(url, md5))
import contextlib
import sys
import os
import torch
import unittest
from torchvision import io
from torchvision.datasets.samplers import (
DistributedSampler,
RandomClipSampler,
UniformClipSampler,
)
from torchvision.datasets.video_utils import VideoClips, unfold
from torchvision import get_video_backend
from common_utils import get_tmp_dir
@contextlib.contextmanager
def get_list_of_videos(num_videos=5, sizes=None, fps=None):
with get_tmp_dir() as tmp_dir:
names = []
for i in range(num_videos):
if sizes is None:
size = 5 * (i + 1)
else:
size = sizes[i]
if fps is None:
f = 5
else:
f = fps[i]
data = torch.randint(0, 255, (size, 300, 400, 3), dtype=torch.uint8)
name = os.path.join(tmp_dir, "{}.mp4".format(i))
names.append(name)
io.write_video(name, data, fps=f)
yield names
@unittest.skipIf(not io.video._av_available(), "this test requires av")
class Tester(unittest.TestCase):
def test_random_clip_sampler(self):
with get_list_of_videos(num_videos=3, sizes=[25, 25, 25]) as video_list:
video_clips = VideoClips(video_list, 5, 5)
sampler = RandomClipSampler(video_clips, 3)
self.assertEqual(len(sampler), 3 * 3)
indices = torch.tensor(list(iter(sampler)))
videos = indices // 5
v_idxs, count = torch.unique(videos, return_counts=True)
self.assertTrue(v_idxs.equal(torch.tensor([0, 1, 2])))
self.assertTrue(count.equal(torch.tensor([3, 3, 3])))
def test_random_clip_sampler_unequal(self):
with get_list_of_videos(num_videos=3, sizes=[10, 25, 25]) as video_list:
video_clips = VideoClips(video_list, 5, 5)
sampler = RandomClipSampler(video_clips, 3)
self.assertEqual(len(sampler), 2 + 3 + 3)
indices = list(iter(sampler))
self.assertIn(0, indices)
self.assertIn(1, indices)
# remove elements of the first video, to simplify testing
indices.remove(0)
indices.remove(1)
indices = torch.tensor(indices) - 2
videos = indices // 5
v_idxs, count = torch.unique(videos, return_counts=True)
self.assertTrue(v_idxs.equal(torch.tensor([0, 1])))
self.assertTrue(count.equal(torch.tensor([3, 3])))
def test_uniform_clip_sampler(self):
with get_list_of_videos(num_videos=3, sizes=[25, 25, 25]) as video_list:
video_clips = VideoClips(video_list, 5, 5)
sampler = UniformClipSampler(video_clips, 3)
self.assertEqual(len(sampler), 3 * 3)
indices = torch.tensor(list(iter(sampler)))
videos = indices // 5
v_idxs, count = torch.unique(videos, return_counts=True)
self.assertTrue(v_idxs.equal(torch.tensor([0, 1, 2])))
self.assertTrue(count.equal(torch.tensor([3, 3, 3])))
self.assertTrue(indices.equal(torch.tensor([0, 2, 4, 5, 7, 9, 10, 12, 14])))
def test_uniform_clip_sampler_insufficient_clips(self):
with get_list_of_videos(num_videos=3, sizes=[10, 25, 25]) as video_list:
video_clips = VideoClips(video_list, 5, 5)
sampler = UniformClipSampler(video_clips, 3)
self.assertEqual(len(sampler), 3 * 3)
indices = torch.tensor(list(iter(sampler)))
self.assertTrue(indices.equal(torch.tensor([0, 0, 1, 2, 4, 6, 7, 9, 11])))
def test_distributed_sampler_and_uniform_clip_sampler(self):
with get_list_of_videos(num_videos=3, sizes=[25, 25, 25]) as video_list:
video_clips = VideoClips(video_list, 5, 5)
clip_sampler = UniformClipSampler(video_clips, 3)
distributed_sampler_rank0 = DistributedSampler(
clip_sampler,
num_replicas=2,
rank=0,
group_size=3,
)
indices = torch.tensor(list(iter(distributed_sampler_rank0)))
self.assertEqual(len(distributed_sampler_rank0), 6)
self.assertTrue(indices.equal(torch.tensor([0, 2, 4, 10, 12, 14])))
distributed_sampler_rank1 = DistributedSampler(
clip_sampler,
num_replicas=2,
rank=1,
group_size=3,
)
indices = torch.tensor(list(iter(distributed_sampler_rank1)))
self.assertEqual(len(distributed_sampler_rank1), 6)
self.assertTrue(indices.equal(torch.tensor([5, 7, 9, 0, 2, 4])))
if __name__ == '__main__':
unittest.main()
import os
import shutil
import contextlib
import tempfile
import unittest
from torchvision.datasets import ImageFolder
FAKEDATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)),
'assets', 'fakedata')
@contextlib.contextmanager
def tmp_dir(src=None, **kwargs):
tmp_dir = tempfile.mkdtemp(**kwargs)
if src is not None:
os.rmdir(tmp_dir)
shutil.copytree(src, tmp_dir)
try:
yield tmp_dir
finally:
shutil.rmtree(tmp_dir)
def mock_transform(return_value, arg_list):
def mock(arg):
arg_list.append(arg)
return return_value
return mock
class Tester(unittest.TestCase):
def test_transform(self):
with tmp_dir(src=os.path.join(FAKEDATA_DIR, 'imagefolder')) as root:
class_a_image_files = [os.path.join(root, 'a', file)
for file in ('a1.png', 'a2.png', 'a3.png')]
class_b_image_files = [os.path.join(root, 'b', file)
for file in ('b1.png', 'b2.png', 'b3.png', 'b4.png')]
return_value = os.path.join(root, 'a', 'a1.png')
args = []
transform = mock_transform(return_value, args)
dataset = ImageFolder(root, loader=lambda x: x, transform=transform)
outputs = [dataset[i][0] for i in range(len(dataset))]
self.assertEqual([return_value] * len(outputs), outputs)
imgs = sorted(class_a_image_files + class_b_image_files)
self.assertEqual(imgs, sorted(args))
def test_target_transform(self):
with tmp_dir(src=os.path.join(FAKEDATA_DIR, 'imagefolder')) as root:
class_a_image_files = [os.path.join(root, 'a', file)
for file in ('a1.png', 'a2.png', 'a3.png')]
class_b_image_files = [os.path.join(root, 'b', file)
for file in ('b1.png', 'b2.png', 'b3.png', 'b4.png')]
return_value = os.path.join(root, 'a', 'a1.png')
args = []
target_transform = mock_transform(return_value, args)
dataset = ImageFolder(root, loader=lambda x: x,
target_transform=target_transform)
outputs = [dataset[i][1] for i in range(len(dataset))]
self.assertEqual([return_value] * len(outputs), outputs)
class_a_idx = dataset.class_to_idx['a']
class_b_idx = dataset.class_to_idx['b']
targets = sorted([class_a_idx] * len(class_a_image_files) +
[class_b_idx] * len(class_b_image_files))
self.assertEqual(targets, sorted(args))
if __name__ == '__main__':
unittest.main()
import os
import sys
import tempfile
import torchvision.datasets.utils as utils
import unittest
import zipfile
import tarfile
import gzip
import warnings
from torch._utils_internal import get_file_path_2
from urllib.error import URLError
from common_utils import get_tmp_dir
TEST_FILE = get_file_path_2(
os.path.dirname(os.path.abspath(__file__)), 'assets', 'grace_hopper_517x606.jpg')
class Tester(unittest.TestCase):
def test_check_md5(self):
fpath = TEST_FILE
correct_md5 = '9c0bb82894bb3af7f7675ef2b3b6dcdc'
false_md5 = ''
self.assertTrue(utils.check_md5(fpath, correct_md5))
self.assertFalse(utils.check_md5(fpath, false_md5))
def test_check_integrity(self):
existing_fpath = TEST_FILE
nonexisting_fpath = ''
correct_md5 = '9c0bb82894bb3af7f7675ef2b3b6dcdc'
false_md5 = ''
self.assertTrue(utils.check_integrity(existing_fpath, correct_md5))
self.assertFalse(utils.check_integrity(existing_fpath, false_md5))
self.assertTrue(utils.check_integrity(existing_fpath))
self.assertFalse(utils.check_integrity(nonexisting_fpath))
def test_download_url(self):
with get_tmp_dir() as temp_dir:
url = "http://github.com/pytorch/vision/archive/master.zip"
try:
utils.download_url(url, temp_dir)
self.assertFalse(len(os.listdir(temp_dir)) == 0)
except URLError:
msg = "could not download test file '{}'".format(url)
warnings.warn(msg, RuntimeWarning)
raise unittest.SkipTest(msg)
def test_download_url_retry_http(self):
with get_tmp_dir() as temp_dir:
url = "https://github.com/pytorch/vision/archive/master.zip"
try:
utils.download_url(url, temp_dir)
self.assertFalse(len(os.listdir(temp_dir)) == 0)
except URLError:
msg = "could not download test file '{}'".format(url)
warnings.warn(msg, RuntimeWarning)
raise unittest.SkipTest(msg)
def test_download_url_dont_exist(self):
with get_tmp_dir() as temp_dir:
url = "http://github.com/pytorch/vision/archive/this_doesnt_exist.zip"
with self.assertRaises(URLError):
utils.download_url(url, temp_dir)
@unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows')
def test_extract_zip(self):
with get_tmp_dir() as temp_dir:
with tempfile.NamedTemporaryFile(suffix='.zip') as f:
with zipfile.ZipFile(f, 'w') as zf:
zf.writestr('file.tst', 'this is the content')
utils.extract_archive(f.name, temp_dir)
self.assertTrue(os.path.exists(os.path.join(temp_dir, 'file.tst')))
with open(os.path.join(temp_dir, 'file.tst'), 'r') as nf:
data = nf.read()
self.assertEqual(data, 'this is the content')
@unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows')
def test_extract_tar(self):
for ext, mode in zip(['.tar', '.tar.gz', '.tgz'], ['w', 'w:gz', 'w:gz']):
with get_tmp_dir() as temp_dir:
with tempfile.NamedTemporaryFile() as bf:
bf.write("this is the content".encode())
bf.seek(0)
with tempfile.NamedTemporaryFile(suffix=ext) as f:
with tarfile.open(f.name, mode=mode) as zf:
zf.add(bf.name, arcname='file.tst')
utils.extract_archive(f.name, temp_dir)
self.assertTrue(os.path.exists(os.path.join(temp_dir, 'file.tst')))
with open(os.path.join(temp_dir, 'file.tst'), 'r') as nf:
data = nf.read()
self.assertEqual(data, 'this is the content')
@unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows')
def test_extract_tar_xz(self):
for ext, mode in zip(['.tar.xz'], ['w:xz']):
with get_tmp_dir() as temp_dir:
with tempfile.NamedTemporaryFile() as bf:
bf.write("this is the content".encode())
bf.seek(0)
with tempfile.NamedTemporaryFile(suffix=ext) as f:
with tarfile.open(f.name, mode=mode) as zf:
zf.add(bf.name, arcname='file.tst')
utils.extract_archive(f.name, temp_dir)
self.assertTrue(os.path.exists(os.path.join(temp_dir, 'file.tst')))
with open(os.path.join(temp_dir, 'file.tst'), 'r') as nf:
data = nf.read()
self.assertEqual(data, 'this is the content')
@unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows')
def test_extract_gzip(self):
with get_tmp_dir() as temp_dir:
with tempfile.NamedTemporaryFile(suffix='.gz') as f:
with gzip.GzipFile(f.name, 'wb') as zf:
zf.write('this is the content'.encode())
utils.extract_archive(f.name, temp_dir)
f_name = os.path.join(temp_dir, os.path.splitext(os.path.basename(f.name))[0])
self.assertTrue(os.path.exists(f_name))
with open(os.path.join(f_name), 'r') as nf:
data = nf.read()
self.assertEqual(data, 'this is the content')
def test_verify_str_arg(self):
self.assertEqual("a", utils.verify_str_arg("a", "arg", ("a",)))
self.assertRaises(ValueError, utils.verify_str_arg, 0, ("a",), "arg")
self.assertRaises(ValueError, utils.verify_str_arg, "b", ("a",), "arg")
if __name__ == '__main__':
unittest.main()
import contextlib
import os
import torch
import unittest
from torchvision import io
from torchvision.datasets.video_utils import VideoClips, unfold
from common_utils import get_tmp_dir
@contextlib.contextmanager
def get_list_of_videos(num_videos=5, sizes=None, fps=None):
with get_tmp_dir() as tmp_dir:
names = []
for i in range(num_videos):
if sizes is None:
size = 5 * (i + 1)
else:
size = sizes[i]
if fps is None:
f = 5
else:
f = fps[i]
data = torch.randint(0, 255, (size, 300, 400, 3), dtype=torch.uint8)
name = os.path.join(tmp_dir, "{}.mp4".format(i))
names.append(name)
io.write_video(name, data, fps=f)
yield names
class Tester(unittest.TestCase):
def test_unfold(self):
a = torch.arange(7)
r = unfold(a, 3, 3, 1)
expected = torch.tensor([
[0, 1, 2],
[3, 4, 5],
])
self.assertTrue(r.equal(expected))
r = unfold(a, 3, 2, 1)
expected = torch.tensor([
[0, 1, 2],
[2, 3, 4],
[4, 5, 6]
])
self.assertTrue(r.equal(expected))
r = unfold(a, 3, 2, 2)
expected = torch.tensor([
[0, 2, 4],
[2, 4, 6],
])
self.assertTrue(r.equal(expected))
@unittest.skipIf(not io.video._av_available(), "this test requires av")
def test_video_clips(self):
with get_list_of_videos(num_videos=3) as video_list:
video_clips = VideoClips(video_list, 5, 5, num_workers=2)
self.assertEqual(video_clips.num_clips(), 1 + 2 + 3)
for i, (v_idx, c_idx) in enumerate([(0, 0), (1, 0), (1, 1), (2, 0), (2, 1), (2, 2)]):
video_idx, clip_idx = video_clips.get_clip_location(i)
self.assertEqual(video_idx, v_idx)
self.assertEqual(clip_idx, c_idx)
video_clips = VideoClips(video_list, 6, 6)
self.assertEqual(video_clips.num_clips(), 0 + 1 + 2)
for i, (v_idx, c_idx) in enumerate([(1, 0), (2, 0), (2, 1)]):
video_idx, clip_idx = video_clips.get_clip_location(i)
self.assertEqual(video_idx, v_idx)
self.assertEqual(clip_idx, c_idx)
video_clips = VideoClips(video_list, 6, 1)
self.assertEqual(video_clips.num_clips(), 0 + (10 - 6 + 1) + (15 - 6 + 1))
for i, v_idx, c_idx in [(0, 1, 0), (4, 1, 4), (5, 2, 0), (6, 2, 1)]:
video_idx, clip_idx = video_clips.get_clip_location(i)
self.assertEqual(video_idx, v_idx)
self.assertEqual(clip_idx, c_idx)
@unittest.skipIf(not io.video._av_available(), "this test requires av")
def test_video_clips_custom_fps(self):
with get_list_of_videos(num_videos=3, sizes=[12, 12, 12], fps=[3, 4, 6]) as video_list:
num_frames = 4
for fps in [1, 3, 4, 10]:
video_clips = VideoClips(video_list, num_frames, num_frames, fps, num_workers=2)
for i in range(video_clips.num_clips()):
video, audio, info, video_idx = video_clips.get_clip(i)
self.assertEqual(video.shape[0], num_frames)
self.assertEqual(info["video_fps"], fps)
# TODO add tests checking that the content is right
def test_compute_clips_for_video(self):
video_pts = torch.arange(30)
# case 1: single clip
num_frames = 13
orig_fps = 30
duration = float(len(video_pts)) / orig_fps
new_fps = 13
clips, idxs = VideoClips.compute_clips_for_video(video_pts, num_frames, num_frames,
orig_fps, new_fps)
resampled_idxs = VideoClips._resample_video_idx(int(duration * new_fps), orig_fps, new_fps)
self.assertEqual(len(clips), 1)
self.assertTrue(clips.equal(idxs))
self.assertTrue(idxs[0].equal(resampled_idxs))
# case 2: all frames appear only once
num_frames = 4
orig_fps = 30
duration = float(len(video_pts)) / orig_fps
new_fps = 12
clips, idxs = VideoClips.compute_clips_for_video(video_pts, num_frames, num_frames,
orig_fps, new_fps)
resampled_idxs = VideoClips._resample_video_idx(int(duration * new_fps), orig_fps, new_fps)
self.assertEqual(len(clips), 3)
self.assertTrue(clips.equal(idxs))
self.assertTrue(idxs.flatten().equal(resampled_idxs))
if __name__ == '__main__':
unittest.main()
import unittest
from torchvision import set_video_backend
import test_datasets_video_utils
# Disabling the video backend switching temporarily
# set_video_backend('video_reader')
if __name__ == '__main__':
suite = unittest.TestLoader().loadTestsFromModule(test_datasets_video_utils)
unittest.TextTestRunner(verbosity=1).run(suite)
import os
import unittest
import colorsys
import math
import numpy as np
from PIL.Image import NEAREST, BILINEAR, BICUBIC
import torch
import torchvision.transforms.functional_tensor as F_t
import torchvision.transforms.functional_pil as F_pil
import torchvision.transforms.functional as F
from common_utils import TransformsTester
class Tester(TransformsTester):
def setUp(self):
self.device = "cpu"
def _test_fn_on_batch(self, batch_tensors, fn, **fn_kwargs):
transformed_batch = fn(batch_tensors, **fn_kwargs)
for i in range(len(batch_tensors)):
img_tensor = batch_tensors[i, ...]
transformed_img = fn(img_tensor, **fn_kwargs)
self.assertTrue(transformed_img.equal(transformed_batch[i, ...]))
scripted_fn = torch.jit.script(fn)
# scriptable function test
s_transformed_batch = scripted_fn(batch_tensors, **fn_kwargs)
self.assertTrue(transformed_batch.allclose(s_transformed_batch))
def test_vflip(self):
script_vflip = torch.jit.script(F.vflip)
img_tensor, pil_img = self._create_data(16, 18, device=self.device)
vflipped_img = F.vflip(img_tensor)
vflipped_pil_img = F.vflip(pil_img)
self.compareTensorToPIL(vflipped_img, vflipped_pil_img)
# scriptable function test
vflipped_img_script = script_vflip(img_tensor)
self.assertTrue(vflipped_img.equal(vflipped_img_script))
batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device)
self._test_fn_on_batch(batch_tensors, F.vflip)
def test_hflip(self):
script_hflip = torch.jit.script(F.hflip)
img_tensor, pil_img = self._create_data(16, 18, device=self.device)
hflipped_img = F.hflip(img_tensor)
hflipped_pil_img = F.hflip(pil_img)
self.compareTensorToPIL(hflipped_img, hflipped_pil_img)
# scriptable function test
hflipped_img_script = script_hflip(img_tensor)
self.assertTrue(hflipped_img.equal(hflipped_img_script))
batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device)
self._test_fn_on_batch(batch_tensors, F.hflip)
def test_crop(self):
script_crop = torch.jit.script(F.crop)
img_tensor, pil_img = self._create_data(16, 18, device=self.device)
test_configs = [
(1, 2, 4, 5), # crop inside top-left corner
(2, 12, 3, 4), # crop inside top-right corner
(8, 3, 5, 6), # crop inside bottom-left corner
(8, 11, 4, 3), # crop inside bottom-right corner
]
for top, left, height, width in test_configs:
pil_img_cropped = F.crop(pil_img, top, left, height, width)
img_tensor_cropped = F.crop(img_tensor, top, left, height, width)
self.compareTensorToPIL(img_tensor_cropped, pil_img_cropped)
img_tensor_cropped = script_crop(img_tensor, top, left, height, width)
self.compareTensorToPIL(img_tensor_cropped, pil_img_cropped)
batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device)
self._test_fn_on_batch(batch_tensors, F.crop, top=top, left=left, height=height, width=width)
def test_hsv2rgb(self):
scripted_fn = torch.jit.script(F_t._hsv2rgb)
shape = (3, 100, 150)
for _ in range(10):
hsv_img = torch.rand(*shape, dtype=torch.float, device=self.device)
rgb_img = F_t._hsv2rgb(hsv_img)
ft_img = rgb_img.permute(1, 2, 0).flatten(0, 1)
h, s, v, = hsv_img.unbind(0)
h = h.flatten().cpu().numpy()
s = s.flatten().cpu().numpy()
v = v.flatten().cpu().numpy()
rgb = []
for h1, s1, v1 in zip(h, s, v):
rgb.append(colorsys.hsv_to_rgb(h1, s1, v1))
colorsys_img = torch.tensor(rgb, dtype=torch.float32, device=self.device)
max_diff = (ft_img - colorsys_img).abs().max()
self.assertLess(max_diff, 1e-5)
s_rgb_img = scripted_fn(hsv_img)
self.assertTrue(rgb_img.allclose(s_rgb_img))
batch_tensors = self._create_data_batch(120, 100, num_samples=4, device=self.device).float()
self._test_fn_on_batch(batch_tensors, F_t._hsv2rgb)
def test_rgb2hsv(self):
scripted_fn = torch.jit.script(F_t._rgb2hsv)
shape = (3, 150, 100)
for _ in range(10):
rgb_img = torch.rand(*shape, dtype=torch.float, device=self.device)
hsv_img = F_t._rgb2hsv(rgb_img)
ft_hsv_img = hsv_img.permute(1, 2, 0).flatten(0, 1)
r, g, b, = rgb_img.unbind(dim=-3)
r = r.flatten().cpu().numpy()
g = g.flatten().cpu().numpy()
b = b.flatten().cpu().numpy()
hsv = []
for r1, g1, b1 in zip(r, g, b):
hsv.append(colorsys.rgb_to_hsv(r1, g1, b1))
colorsys_img = torch.tensor(hsv, dtype=torch.float32, device=self.device)
ft_hsv_img_h, ft_hsv_img_sv = torch.split(ft_hsv_img, [1, 2], dim=1)
colorsys_img_h, colorsys_img_sv = torch.split(colorsys_img, [1, 2], dim=1)
max_diff_h = ((colorsys_img_h * 2 * math.pi).sin() - (ft_hsv_img_h * 2 * math.pi).sin()).abs().max()
max_diff_sv = (colorsys_img_sv - ft_hsv_img_sv).abs().max()
max_diff = max(max_diff_h, max_diff_sv)
self.assertLess(max_diff, 1e-5)
s_hsv_img = scripted_fn(rgb_img)
self.assertTrue(hsv_img.allclose(s_hsv_img))
batch_tensors = self._create_data_batch(120, 100, num_samples=4, device=self.device).float()
self._test_fn_on_batch(batch_tensors, F_t._rgb2hsv)
def test_rgb_to_grayscale(self):
script_rgb_to_grayscale = torch.jit.script(F.rgb_to_grayscale)
img_tensor, pil_img = self._create_data(32, 34, device=self.device)
for num_output_channels in (3, 1):
gray_pil_image = F.rgb_to_grayscale(pil_img, num_output_channels=num_output_channels)
gray_tensor = F.rgb_to_grayscale(img_tensor, num_output_channels=num_output_channels)
self.approxEqualTensorToPIL(gray_tensor.float(), gray_pil_image, tol=1.0 + 1e-10, agg_method="max")
s_gray_tensor = script_rgb_to_grayscale(img_tensor, num_output_channels=num_output_channels)
self.assertTrue(s_gray_tensor.equal(gray_tensor))
batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device)
self._test_fn_on_batch(batch_tensors, F.rgb_to_grayscale, num_output_channels=num_output_channels)
def test_center_crop(self):
script_center_crop = torch.jit.script(F.center_crop)
img_tensor, pil_img = self._create_data(32, 34, device=self.device)
cropped_pil_image = F.center_crop(pil_img, [10, 11])
cropped_tensor = F.center_crop(img_tensor, [10, 11])
self.compareTensorToPIL(cropped_tensor, cropped_pil_image)
cropped_tensor = script_center_crop(img_tensor, [10, 11])
self.compareTensorToPIL(cropped_tensor, cropped_pil_image)
batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device)
self._test_fn_on_batch(batch_tensors, F.center_crop, output_size=[10, 11])
def test_five_crop(self):
script_five_crop = torch.jit.script(F.five_crop)
img_tensor, pil_img = self._create_data(32, 34, device=self.device)
cropped_pil_images = F.five_crop(pil_img, [10, 11])
cropped_tensors = F.five_crop(img_tensor, [10, 11])
for i in range(5):
self.compareTensorToPIL(cropped_tensors[i], cropped_pil_images[i])
cropped_tensors = script_five_crop(img_tensor, [10, 11])
for i in range(5):
self.compareTensorToPIL(cropped_tensors[i], cropped_pil_images[i])
batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device)
tuple_transformed_batches = F.five_crop(batch_tensors, [10, 11])
for i in range(len(batch_tensors)):
img_tensor = batch_tensors[i, ...]
tuple_transformed_imgs = F.five_crop(img_tensor, [10, 11])
self.assertEqual(len(tuple_transformed_imgs), len(tuple_transformed_batches))
for j in range(len(tuple_transformed_imgs)):
true_transformed_img = tuple_transformed_imgs[j]
transformed_img = tuple_transformed_batches[j][i, ...]
self.assertTrue(true_transformed_img.equal(transformed_img))
# scriptable function test
s_tuple_transformed_batches = script_five_crop(batch_tensors, [10, 11])
for transformed_batch, s_transformed_batch in zip(tuple_transformed_batches, s_tuple_transformed_batches):
self.assertTrue(transformed_batch.equal(s_transformed_batch))
def test_ten_crop(self):
script_ten_crop = torch.jit.script(F.ten_crop)
img_tensor, pil_img = self._create_data(32, 34, device=self.device)
cropped_pil_images = F.ten_crop(pil_img, [10, 11])
cropped_tensors = F.ten_crop(img_tensor, [10, 11])
for i in range(10):
self.compareTensorToPIL(cropped_tensors[i], cropped_pil_images[i])
cropped_tensors = script_ten_crop(img_tensor, [10, 11])
for i in range(10):
self.compareTensorToPIL(cropped_tensors[i], cropped_pil_images[i])
batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device)
tuple_transformed_batches = F.ten_crop(batch_tensors, [10, 11])
for i in range(len(batch_tensors)):
img_tensor = batch_tensors[i, ...]
tuple_transformed_imgs = F.ten_crop(img_tensor, [10, 11])
self.assertEqual(len(tuple_transformed_imgs), len(tuple_transformed_batches))
for j in range(len(tuple_transformed_imgs)):
true_transformed_img = tuple_transformed_imgs[j]
transformed_img = tuple_transformed_batches[j][i, ...]
self.assertTrue(true_transformed_img.equal(transformed_img))
# scriptable function test
s_tuple_transformed_batches = script_ten_crop(batch_tensors, [10, 11])
for transformed_batch, s_transformed_batch in zip(tuple_transformed_batches, s_tuple_transformed_batches):
self.assertTrue(transformed_batch.equal(s_transformed_batch))
def test_pad(self):
script_fn = torch.jit.script(F.pad)
tensor, pil_img = self._create_data(7, 8, device=self.device)
batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device)
for dt in [None, torch.float32, torch.float64, torch.float16]:
if dt == torch.float16 and torch.device(self.device).type == "cpu":
# skip float16 on CPU case
continue
if dt is not None:
# This is a trivial cast to float of uint8 data to test all cases
tensor = tensor.to(dt)
batch_tensors = batch_tensors.to(dt)
for pad in [2, [3, ], [0, 3], (3, 3), [4, 2, 4, 3]]:
configs = [
{"padding_mode": "constant", "fill": 0},
{"padding_mode": "constant", "fill": 10},
{"padding_mode": "constant", "fill": 20},
{"padding_mode": "edge"},
{"padding_mode": "reflect"},
{"padding_mode": "symmetric"},
]
for kwargs in configs:
pad_tensor = F_t.pad(tensor, pad, **kwargs)
pad_pil_img = F_pil.pad(pil_img, pad, **kwargs)
pad_tensor_8b = pad_tensor
# we need to cast to uint8 to compare with PIL image
if pad_tensor_8b.dtype != torch.uint8:
pad_tensor_8b = pad_tensor_8b.to(torch.uint8)
self.compareTensorToPIL(pad_tensor_8b, pad_pil_img, msg="{}, {}".format(pad, kwargs))
if isinstance(pad, int):
script_pad = [pad, ]
else:
script_pad = pad
pad_tensor_script = script_fn(tensor, script_pad, **kwargs)
self.assertTrue(pad_tensor.equal(pad_tensor_script), msg="{}, {}".format(pad, kwargs))
self._test_fn_on_batch(batch_tensors, F.pad, padding=script_pad, **kwargs)
def _test_adjust_fn(self, fn, fn_pil, fn_t, configs, tol=2.0 + 1e-10, agg_method="max"):
script_fn = torch.jit.script(fn)
torch.manual_seed(15)
tensor, pil_img = self._create_data(26, 34, device=self.device)
batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device)
for dt in [None, torch.float32, torch.float64]:
if dt is not None:
tensor = F.convert_image_dtype(tensor, dt)
batch_tensors = F.convert_image_dtype(batch_tensors, dt)
for config in configs:
adjusted_tensor = fn_t(tensor, **config)
adjusted_pil = fn_pil(pil_img, **config)
scripted_result = script_fn(tensor, **config)
msg = "{}, {}".format(dt, config)
self.assertEqual(adjusted_tensor.dtype, scripted_result.dtype, msg=msg)
self.assertEqual(adjusted_tensor.size()[1:], adjusted_pil.size[::-1], msg=msg)
rbg_tensor = adjusted_tensor
if adjusted_tensor.dtype != torch.uint8:
rbg_tensor = F.convert_image_dtype(adjusted_tensor, torch.uint8)
# Check that max difference does not exceed 2 in [0, 255] range
# Exact matching is not possible due to incompatibility convert_image_dtype and PIL results
self.approxEqualTensorToPIL(rbg_tensor.float(), adjusted_pil, tol=tol, msg=msg, agg_method=agg_method)
atol = 1e-6
if adjusted_tensor.dtype == torch.uint8 and "cuda" in torch.device(self.device).type:
atol = 1.0
self.assertTrue(adjusted_tensor.allclose(scripted_result, atol=atol), msg=msg)
self._test_fn_on_batch(batch_tensors, fn, **config)
def test_adjust_brightness(self):
self._test_adjust_fn(
F.adjust_brightness,
F_pil.adjust_brightness,
F_t.adjust_brightness,
[{"brightness_factor": f} for f in [0.1, 0.5, 1.0, 1.34, 2.5]]
)
def test_adjust_contrast(self):
self._test_adjust_fn(
F.adjust_contrast,
F_pil.adjust_contrast,
F_t.adjust_contrast,
[{"contrast_factor": f} for f in [0.2, 0.5, 1.0, 1.5, 2.0]]
)
def test_adjust_saturation(self):
self._test_adjust_fn(
F.adjust_saturation,
F_pil.adjust_saturation,
F_t.adjust_saturation,
[{"saturation_factor": f} for f in [0.5, 0.75, 1.0, 1.5, 2.0]]
)
def test_adjust_hue(self):
self._test_adjust_fn(
F.adjust_hue,
F_pil.adjust_hue,
F_t.adjust_hue,
[{"hue_factor": f} for f in [-0.45, -0.25, 0.0, 0.25, 0.45]],
tol=0.1,
agg_method="mean"
)
def test_adjust_gamma(self):
self._test_adjust_fn(
F.adjust_gamma,
F_pil.adjust_gamma,
F_t.adjust_gamma,
[{"gamma": g1, "gain": g2} for g1, g2 in zip([0.8, 1.0, 1.2], [0.7, 1.0, 1.3])]
)
def test_resize(self):
script_fn = torch.jit.script(F_t.resize)
tensor, pil_img = self._create_data(26, 36, device=self.device)
batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device)
for dt in [None, torch.float32, torch.float64, torch.float16]:
if dt == torch.float16 and torch.device(self.device).type == "cpu":
# skip float16 on CPU case
continue
if dt is not None:
# This is a trivial cast to float of uint8 data to test all cases
tensor = tensor.to(dt)
batch_tensors = batch_tensors.to(dt)
for size in [32, 26, [32, ], [32, 32], (32, 32), [26, 35]]:
for interpolation in [BILINEAR, BICUBIC, NEAREST]:
resized_tensor = F_t.resize(tensor, size=size, interpolation=interpolation)
resized_pil_img = F_pil.resize(pil_img, size=size, interpolation=interpolation)
self.assertEqual(
resized_tensor.size()[1:], resized_pil_img.size[::-1], msg="{}, {}".format(size, interpolation)
)
if interpolation != NEAREST:
# We can not check values if mode = NEAREST, as results are different
# E.g. resized_tensor = [[a, a, b, c, d, d, e, ...]]
# E.g. resized_pil_img = [[a, b, c, c, d, e, f, ...]]
resized_tensor_f = resized_tensor
# we need to cast to uint8 to compare with PIL image
if resized_tensor_f.dtype == torch.uint8:
resized_tensor_f = resized_tensor_f.to(torch.float)
# Pay attention to high tolerance for MAE
self.approxEqualTensorToPIL(
resized_tensor_f, resized_pil_img, tol=8.0, msg="{}, {}".format(size, interpolation)
)
if isinstance(size, int):
script_size = [size, ]
else:
script_size = size
resize_result = script_fn(tensor, size=script_size, interpolation=interpolation)
self.assertTrue(resized_tensor.equal(resize_result), msg="{}, {}".format(size, interpolation))
self._test_fn_on_batch(
batch_tensors, F.resize, size=script_size, interpolation=interpolation
)
def test_resized_crop(self):
# test values of F.resized_crop in several cases:
# 1) resize to the same size, crop to the same size => should be identity
tensor, _ = self._create_data(26, 36, device=self.device)
for i in [0, 2, 3]:
out_tensor = F.resized_crop(tensor, top=0, left=0, height=26, width=36, size=[26, 36], interpolation=i)
self.assertTrue(tensor.equal(out_tensor), msg="{} vs {}".format(out_tensor[0, :5, :5], tensor[0, :5, :5]))
# 2) resize by half and crop a TL corner
tensor, _ = self._create_data(26, 36, device=self.device)
out_tensor = F.resized_crop(tensor, top=0, left=0, height=20, width=30, size=[10, 15], interpolation=0)
expected_out_tensor = tensor[:, :20:2, :30:2]
self.assertTrue(
expected_out_tensor.equal(out_tensor),
msg="{} vs {}".format(expected_out_tensor[0, :10, :10], out_tensor[0, :10, :10])
)
batch_tensors = self._create_data_batch(26, 36, num_samples=4, device=self.device)
self._test_fn_on_batch(
batch_tensors, F.resized_crop, top=1, left=2, height=20, width=30, size=[10, 15], interpolation=0
)
def _test_affine_identity_map(self, tensor, scripted_affine):
# 1) identity map
out_tensor = F.affine(tensor, angle=0, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=0)
self.assertTrue(
tensor.equal(out_tensor), msg="{} vs {}".format(out_tensor[0, :5, :5], tensor[0, :5, :5])
)
out_tensor = scripted_affine(tensor, angle=0, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=0)
self.assertTrue(
tensor.equal(out_tensor), msg="{} vs {}".format(out_tensor[0, :5, :5], tensor[0, :5, :5])
)
def _test_affine_square_rotations(self, tensor, pil_img, scripted_affine):
# 2) Test rotation
test_configs = [
(90, torch.rot90(tensor, k=1, dims=(-1, -2))),
(45, None),
(30, None),
(-30, None),
(-45, None),
(-90, torch.rot90(tensor, k=-1, dims=(-1, -2))),
(180, torch.rot90(tensor, k=2, dims=(-1, -2))),
]
for a, true_tensor in test_configs:
out_pil_img = F.affine(
pil_img, angle=a, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=0
)
out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1))).to(self.device)
for fn in [F.affine, scripted_affine]:
out_tensor = fn(
tensor, angle=a, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=0
)
if true_tensor is not None:
self.assertTrue(
true_tensor.equal(out_tensor),
msg="{}\n{} vs \n{}".format(a, out_tensor[0, :5, :5], true_tensor[0, :5, :5])
)
if out_tensor.dtype != torch.uint8:
out_tensor = out_tensor.to(torch.uint8)
num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
# Tolerance : less than 6% of different pixels
self.assertLess(
ratio_diff_pixels,
0.06,
msg="{}\n{} vs \n{}".format(
ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7]
)
)
def _test_affine_rect_rotations(self, tensor, pil_img, scripted_affine):
test_configs = [
90, 45, 15, -30, -60, -120
]
for a in test_configs:
out_pil_img = F.affine(
pil_img, angle=a, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=0
)
out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
for fn in [F.affine, scripted_affine]:
out_tensor = fn(
tensor, angle=a, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=0
).cpu()
if out_tensor.dtype != torch.uint8:
out_tensor = out_tensor.to(torch.uint8)
num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
# Tolerance : less than 3% of different pixels
self.assertLess(
ratio_diff_pixels,
0.03,
msg="{}: {}\n{} vs \n{}".format(
a, ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7]
)
)
def _test_affine_translations(self, tensor, pil_img, scripted_affine):
# 3) Test translation
test_configs = [
[10, 12], (-12, -13)
]
for t in test_configs:
out_pil_img = F.affine(pil_img, angle=0, translate=t, scale=1.0, shear=[0.0, 0.0], resample=0)
for fn in [F.affine, scripted_affine]:
out_tensor = fn(tensor, angle=0, translate=t, scale=1.0, shear=[0.0, 0.0], resample=0)
if out_tensor.dtype != torch.uint8:
out_tensor = out_tensor.to(torch.uint8)
self.compareTensorToPIL(out_tensor, out_pil_img)
def _test_affine_all_ops(self, tensor, pil_img, scripted_affine):
# 4) Test rotation + translation + scale + share
test_configs = [
(45, [5, 6], 1.0, [0.0, 0.0]),
(33, (5, -4), 1.0, [0.0, 0.0]),
(45, [-5, 4], 1.2, [0.0, 0.0]),
(33, (-4, -8), 2.0, [0.0, 0.0]),
(85, (10, -10), 0.7, [0.0, 0.0]),
(0, [0, 0], 1.0, [35.0, ]),
(-25, [0, 0], 1.2, [0.0, 15.0]),
(-45, [-10, 0], 0.7, [2.0, 5.0]),
(-45, [-10, -10], 1.2, [4.0, 5.0]),
(-90, [0, 0], 1.0, [0.0, 0.0]),
]
for r in [0, ]:
for a, t, s, sh in test_configs:
out_pil_img = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh, resample=r)
out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
for fn in [F.affine, scripted_affine]:
out_tensor = fn(tensor, angle=a, translate=t, scale=s, shear=sh, resample=r).cpu()
if out_tensor.dtype != torch.uint8:
out_tensor = out_tensor.to(torch.uint8)
num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
# Tolerance : less than 5% (cpu), 6% (cuda) of different pixels
tol = 0.06 if self.device == "cuda" else 0.05
self.assertLess(
ratio_diff_pixels,
tol,
msg="{}: {}\n{} vs \n{}".format(
(r, a, t, s, sh), ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7]
)
)
def test_affine(self):
# Tests on square and rectangular images
scripted_affine = torch.jit.script(F.affine)
data = [self._create_data(26, 26, device=self.device), self._create_data(32, 26, device=self.device)]
for tensor, pil_img in data:
for dt in [None, torch.float32, torch.float64, torch.float16]:
if dt == torch.float16 and torch.device(self.device).type == "cpu":
# skip float16 on CPU case
continue
if dt is not None:
tensor = tensor.to(dtype=dt)
self._test_affine_identity_map(tensor, scripted_affine)
if pil_img.size[0] == pil_img.size[1]:
self._test_affine_square_rotations(tensor, pil_img, scripted_affine)
else:
self._test_affine_rect_rotations(tensor, pil_img, scripted_affine)
self._test_affine_translations(tensor, pil_img, scripted_affine)
self._test_affine_all_ops(tensor, pil_img, scripted_affine)
batch_tensors = self._create_data_batch(26, 36, num_samples=4, device=self.device)
if dt is not None:
batch_tensors = batch_tensors.to(dtype=dt)
self._test_fn_on_batch(
batch_tensors, F.affine, angle=-43, translate=[-3, 4], scale=1.2, shear=[4.0, 5.0]
)
def _test_rotate_all_options(self, tensor, pil_img, scripted_rotate, centers):
img_size = pil_img.size
dt = tensor.dtype
for r in [0, ]:
for a in range(-180, 180, 17):
for e in [True, False]:
for c in centers:
out_pil_img = F.rotate(pil_img, angle=a, resample=r, expand=e, center=c)
out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
for fn in [F.rotate, scripted_rotate]:
out_tensor = fn(tensor, angle=a, resample=r, expand=e, center=c).cpu()
if out_tensor.dtype != torch.uint8:
out_tensor = out_tensor.to(torch.uint8)
self.assertEqual(
out_tensor.shape,
out_pil_tensor.shape,
msg="{}: {} vs {}".format(
(img_size, r, dt, a, e, c), out_tensor.shape, out_pil_tensor.shape
)
)
num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
# Tolerance : less than 3% of different pixels
self.assertLess(
ratio_diff_pixels,
0.03,
msg="{}: {}\n{} vs \n{}".format(
(img_size, r, dt, a, e, c),
ratio_diff_pixels,
out_tensor[0, :7, :7],
out_pil_tensor[0, :7, :7]
)
)
def test_rotate(self):
# Tests on square image
scripted_rotate = torch.jit.script(F.rotate)
data = [self._create_data(26, 26, device=self.device), self._create_data(32, 26, device=self.device)]
for tensor, pil_img in data:
img_size = pil_img.size
centers = [
None,
(int(img_size[0] * 0.3), int(img_size[0] * 0.4)),
[int(img_size[0] * 0.5), int(img_size[0] * 0.6)]
]
for dt in [None, torch.float32, torch.float64, torch.float16]:
if dt == torch.float16 and torch.device(self.device).type == "cpu":
# skip float16 on CPU case
continue
if dt is not None:
tensor = tensor.to(dtype=dt)
self._test_rotate_all_options(tensor, pil_img, scripted_rotate, centers)
batch_tensors = self._create_data_batch(26, 36, num_samples=4, device=self.device)
if dt is not None:
batch_tensors = batch_tensors.to(dtype=dt)
center = (20, 22)
self._test_fn_on_batch(
batch_tensors, F.rotate, angle=32, resample=0, expand=True, center=center
)
def _test_perspective(self, tensor, pil_img, scripted_transform, test_configs):
dt = tensor.dtype
for r in [0, ]:
for spoints, epoints in test_configs:
out_pil_img = F.perspective(pil_img, startpoints=spoints, endpoints=epoints, interpolation=r)
out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
for fn in [F.perspective, scripted_transform]:
out_tensor = fn(tensor, startpoints=spoints, endpoints=epoints, interpolation=r).cpu()
if out_tensor.dtype != torch.uint8:
out_tensor = out_tensor.to(torch.uint8)
num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
# Tolerance : less than 5% of different pixels
self.assertLess(
ratio_diff_pixels,
0.05,
msg="{}: {}\n{} vs \n{}".format(
(r, dt, spoints, epoints),
ratio_diff_pixels,
out_tensor[0, :7, :7],
out_pil_tensor[0, :7, :7]
)
)
def test_perspective(self):
from torchvision.transforms import RandomPerspective
data = [self._create_data(26, 34, device=self.device), self._create_data(26, 26, device=self.device)]
scripted_transform = torch.jit.script(F.perspective)
for tensor, pil_img in data:
test_configs = [
[[[0, 0], [33, 0], [33, 25], [0, 25]], [[3, 2], [32, 3], [30, 24], [2, 25]]],
[[[3, 2], [32, 3], [30, 24], [2, 25]], [[0, 0], [33, 0], [33, 25], [0, 25]]],
[[[3, 2], [32, 3], [30, 24], [2, 25]], [[5, 5], [30, 3], [33, 19], [4, 25]]],
]
n = 10
test_configs += [
RandomPerspective.get_params(pil_img.size[0], pil_img.size[1], i / n) for i in range(n)
]
for dt in [None, torch.float32, torch.float64, torch.float16]:
if dt == torch.float16 and torch.device(self.device).type == "cpu":
# skip float16 on CPU case
continue
if dt is not None:
tensor = tensor.to(dtype=dt)
self._test_perspective(tensor, pil_img, scripted_transform, test_configs)
batch_tensors = self._create_data_batch(26, 36, num_samples=4, device=self.device)
if dt is not None:
batch_tensors = batch_tensors.to(dtype=dt)
for spoints, epoints in test_configs:
self._test_fn_on_batch(
batch_tensors, F.perspective, startpoints=spoints, endpoints=epoints, interpolation=0
)
def test_gaussian_blur(self):
small_image_tensor = torch.from_numpy(
np.arange(3 * 10 * 12, dtype="uint8").reshape((10, 12, 3))
).permute(2, 0, 1).to(self.device)
large_image_tensor = torch.from_numpy(
np.arange(26 * 28, dtype="uint8").reshape((1, 26, 28))
).to(self.device)
scripted_transform = torch.jit.script(F.gaussian_blur)
# true_cv2_results = {
# # np_img = np.arange(3 * 10 * 12, dtype="uint8").reshape((10, 12, 3))
# # cv2.GaussianBlur(np_img, ksize=(3, 3), sigmaX=0.8)
# "3_3_0.8": ...
# # cv2.GaussianBlur(np_img, ksize=(3, 3), sigmaX=0.5)
# "3_3_0.5": ...
# # cv2.GaussianBlur(np_img, ksize=(3, 5), sigmaX=0.8)
# "3_5_0.8": ...
# # cv2.GaussianBlur(np_img, ksize=(3, 5), sigmaX=0.5)
# "3_5_0.5": ...
# # np_img2 = np.arange(26 * 28, dtype="uint8").reshape((26, 28))
# # cv2.GaussianBlur(np_img2, ksize=(23, 23), sigmaX=1.7)
# "23_23_1.7": ...
# }
p = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'assets', 'gaussian_blur_opencv_results.pt')
true_cv2_results = torch.load(p)
for tensor in [small_image_tensor, large_image_tensor]:
for dt in [None, torch.float32, torch.float64, torch.float16]:
if dt == torch.float16 and torch.device(self.device).type == "cpu":
# skip float16 on CPU case
continue
if dt is not None:
tensor = tensor.to(dtype=dt)
for ksize in [(3, 3), [3, 5], (23, 23)]:
for sigma in [[0.5, 0.5], (0.5, 0.5), (0.8, 0.8), (1.7, 1.7)]:
_ksize = (ksize, ksize) if isinstance(ksize, int) else ksize
_sigma = sigma[0] if sigma is not None else None
shape = tensor.shape
gt_key = "{}_{}_{}__{}_{}_{}".format(
shape[-2], shape[-1], shape[-3],
_ksize[0], _ksize[1], _sigma
)
if gt_key not in true_cv2_results:
continue
true_out = torch.tensor(
true_cv2_results[gt_key]
).reshape(shape[-2], shape[-1], shape[-3]).permute(2, 0, 1).to(tensor)
for fn in [F.gaussian_blur, scripted_transform]:
out = fn(tensor, kernel_size=ksize, sigma=sigma)
self.assertEqual(true_out.shape, out.shape, msg="{}, {}".format(ksize, sigma))
self.assertLessEqual(
torch.max(true_out.float() - out.float()),
1.0,
msg="{}, {}".format(ksize, sigma)
)
@unittest.skipIf(not torch.cuda.is_available(), reason="Skip if no CUDA device")
class CUDATester(Tester):
def setUp(self):
self.device = "cuda"
if __name__ == '__main__':
unittest.main()
import torch.hub as hub
import tempfile
import shutil
import os
import sys
import unittest
def sum_of_model_parameters(model):
s = 0
for p in model.parameters():
s += p.sum()
return s
SUM_OF_PRETRAINED_RESNET18_PARAMS = -12703.9931640625
@unittest.skipIf('torchvision' in sys.modules,
'TestHub must start without torchvision imported')
class TestHub(unittest.TestCase):
# Only run this check ONCE before all tests start.
# - If torchvision is imported before all tests start, e.g. we might find _C.so
# which doesn't exist in downloaded zip but in the installed wheel.
# - After the first test is run, torchvision is already in sys.modules due to
# Python cache as we run all hub tests in the same python process.
def test_load_from_github(self):
hub_model = hub.load(
'pytorch/vision',
'resnet18',
pretrained=True,
progress=False)
self.assertAlmostEqual(sum_of_model_parameters(hub_model).item(),
SUM_OF_PRETRAINED_RESNET18_PARAMS,
places=2)
def test_set_dir(self):
temp_dir = tempfile.gettempdir()
hub.set_dir(temp_dir)
hub_model = hub.load(
'pytorch/vision',
'resnet18',
pretrained=True,
progress=False)
self.assertAlmostEqual(sum_of_model_parameters(hub_model).item(),
SUM_OF_PRETRAINED_RESNET18_PARAMS,
places=2)
self.assertTrue(os.path.exists(temp_dir + '/pytorch_vision_master'))
shutil.rmtree(temp_dir + '/pytorch_vision_master')
def test_list_entrypoints(self):
entry_lists = hub.list('pytorch/vision', force_reload=True)
self.assertIn('resnet18', entry_lists)
if __name__ == "__main__":
unittest.main()
import os
import io
import glob
import unittest
import sys
import torch
import torchvision
from PIL import Image
from torchvision.io.image import (
decode_png, decode_jpeg, encode_jpeg, write_jpeg, decode_image, read_file,
encode_png, write_png, write_file)
import numpy as np
from common_utils import get_tmp_dir
IMAGE_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets")
IMAGE_DIR = os.path.join(IMAGE_ROOT, "fakedata", "imagefolder")
DAMAGED_JPEG = os.path.join(IMAGE_ROOT, 'damaged_jpeg')
def get_images(directory, img_ext):
assert os.path.isdir(directory)
for root, _, files in os.walk(directory):
if os.path.basename(root) in {'damaged_jpeg', 'jpeg_write'}:
continue
for fl in files:
_, ext = os.path.splitext(fl)
if ext == img_ext:
yield os.path.join(root, fl)
class ImageTester(unittest.TestCase):
def test_decode_jpeg(self):
for img_path in get_images(IMAGE_ROOT, ".jpg"):
img_pil = torch.load(img_path.replace('jpg', 'pth'))
img_pil = img_pil.permute(2, 0, 1)
data = read_file(img_path)
img_ljpeg = decode_jpeg(data)
self.assertTrue(img_ljpeg.equal(img_pil))
with self.assertRaisesRegex(RuntimeError, "Expected a non empty 1-dimensional tensor"):
decode_jpeg(torch.empty((100, 1), dtype=torch.uint8))
with self.assertRaisesRegex(RuntimeError, "Expected a torch.uint8 tensor"):
decode_jpeg(torch.empty((100, ), dtype=torch.float16))
with self.assertRaises(RuntimeError):
decode_jpeg(torch.empty((100), dtype=torch.uint8))
def test_damaged_images(self):
# Test image with bad Huffman encoding (should not raise)
bad_huff = read_file(os.path.join(DAMAGED_JPEG, 'bad_huffman.jpg'))
try:
_ = decode_jpeg(bad_huff)
except RuntimeError:
self.assertTrue(False)
# Truncated images should raise an exception
truncated_images = glob.glob(
os.path.join(DAMAGED_JPEG, 'corrupt*.jpg'))
for image_path in truncated_images:
data = read_file(image_path)
with self.assertRaises(RuntimeError):
decode_jpeg(data)
def test_encode_jpeg(self):
for img_path in get_images(IMAGE_ROOT, ".jpg"):
dirname = os.path.dirname(img_path)
filename, _ = os.path.splitext(os.path.basename(img_path))
write_folder = os.path.join(dirname, 'jpeg_write')
expected_file = os.path.join(
write_folder, '{0}_pil.jpg'.format(filename))
img = decode_jpeg(read_file(img_path))
with open(expected_file, 'rb') as f:
pil_bytes = f.read()
pil_bytes = torch.as_tensor(list(pil_bytes), dtype=torch.uint8)
for src_img in [img, img.contiguous()]:
# PIL sets jpeg quality to 75 by default
jpeg_bytes = encode_jpeg(src_img, quality=75)
self.assertTrue(jpeg_bytes.equal(pil_bytes))
with self.assertRaisesRegex(
RuntimeError, "Input tensor dtype should be uint8"):
encode_jpeg(torch.empty((3, 100, 100), dtype=torch.float32))
with self.assertRaisesRegex(
ValueError, "Image quality should be a positive number "
"between 1 and 100"):
encode_jpeg(torch.empty((3, 100, 100), dtype=torch.uint8), quality=-1)
with self.assertRaisesRegex(
ValueError, "Image quality should be a positive number "
"between 1 and 100"):
encode_jpeg(torch.empty((3, 100, 100), dtype=torch.uint8), quality=101)
with self.assertRaisesRegex(
RuntimeError, "The number of channels should be 1 or 3, got: 5"):
encode_jpeg(torch.empty((5, 100, 100), dtype=torch.uint8))
with self.assertRaisesRegex(
RuntimeError, "Input data should be a 3-dimensional tensor"):
encode_jpeg(torch.empty((1, 3, 100, 100), dtype=torch.uint8))
with self.assertRaisesRegex(
RuntimeError, "Input data should be a 3-dimensional tensor"):
encode_jpeg(torch.empty((100, 100), dtype=torch.uint8))
def test_write_jpeg(self):
for img_path in get_images(IMAGE_ROOT, ".jpg"):
data = read_file(img_path)
img = decode_jpeg(data)
basedir = os.path.dirname(img_path)
filename, _ = os.path.splitext(os.path.basename(img_path))
torch_jpeg = os.path.join(
basedir, '{0}_torch.jpg'.format(filename))
pil_jpeg = os.path.join(
basedir, 'jpeg_write', '{0}_pil.jpg'.format(filename))
write_jpeg(img, torch_jpeg, quality=75)
with open(torch_jpeg, 'rb') as f:
torch_bytes = f.read()
with open(pil_jpeg, 'rb') as f:
pil_bytes = f.read()
os.remove(torch_jpeg)
self.assertEqual(torch_bytes, pil_bytes)
def test_decode_png(self):
for img_path in get_images(IMAGE_DIR, ".png"):
img_pil = torch.from_numpy(np.array(Image.open(img_path)))
img_pil = img_pil.permute(2, 0, 1)
data = read_file(img_path)
img_lpng = decode_png(data)
self.assertTrue(img_lpng.equal(img_pil))
with self.assertRaises(RuntimeError):
decode_png(torch.empty((), dtype=torch.uint8))
with self.assertRaises(RuntimeError):
decode_png(torch.randint(3, 5, (300,), dtype=torch.uint8))
def test_encode_png(self):
for img_path in get_images(IMAGE_DIR, '.png'):
pil_image = Image.open(img_path)
img_pil = torch.from_numpy(np.array(pil_image))
img_pil = img_pil.permute(2, 0, 1)
png_buf = encode_png(img_pil, compression_level=6)
rec_img = Image.open(io.BytesIO(bytes(png_buf.tolist())))
rec_img = torch.from_numpy(np.array(rec_img))
rec_img = rec_img.permute(2, 0, 1)
self.assertTrue(img_pil.equal(rec_img))
with self.assertRaisesRegex(
RuntimeError, "Input tensor dtype should be uint8"):
encode_png(torch.empty((3, 100, 100), dtype=torch.float32))
with self.assertRaisesRegex(
RuntimeError, "Compression level should be between 0 and 9"):
encode_png(torch.empty((3, 100, 100), dtype=torch.uint8),
compression_level=-1)
with self.assertRaisesRegex(
RuntimeError, "Compression level should be between 0 and 9"):
encode_png(torch.empty((3, 100, 100), dtype=torch.uint8),
compression_level=10)
with self.assertRaisesRegex(
RuntimeError, "The number of channels should be 1 or 3, got: 5"):
encode_png(torch.empty((5, 100, 100), dtype=torch.uint8))
def test_write_png(self):
for img_path in get_images(IMAGE_DIR, '.png'):
pil_image = Image.open(img_path)
img_pil = torch.from_numpy(np.array(pil_image))
img_pil = img_pil.permute(2, 0, 1)
basedir = os.path.dirname(img_path)
filename, _ = os.path.splitext(os.path.basename(img_path))
torch_png = os.path.join(basedir, '{0}_torch.png'.format(filename))
write_png(img_pil, torch_png, compression_level=6)
saved_image = torch.from_numpy(np.array(Image.open(torch_png)))
os.remove(torch_png)
saved_image = saved_image.permute(2, 0, 1)
self.assertTrue(img_pil.equal(saved_image))
def test_decode_image(self):
for img_path in get_images(IMAGE_ROOT, ".jpg"):
img_pil = torch.load(img_path.replace('jpg', 'pth'))
img_pil = img_pil.permute(2, 0, 1)
img_ljpeg = decode_image(read_file(img_path))
self.assertTrue(img_ljpeg.equal(img_pil))
for img_path in get_images(IMAGE_DIR, ".png"):
img_pil = torch.from_numpy(np.array(Image.open(img_path)))
img_pil = img_pil.permute(2, 0, 1)
img_lpng = decode_image(read_file(img_path))
self.assertTrue(img_lpng.equal(img_pil))
def test_read_file(self):
with get_tmp_dir() as d:
fname, content = 'test1.bin', b'TorchVision\211\n'
fpath = os.path.join(d, fname)
with open(fpath, 'wb') as f:
f.write(content)
data = read_file(fpath)
expected = torch.tensor(list(content), dtype=torch.uint8)
self.assertTrue(data.equal(expected))
os.unlink(fpath)
with self.assertRaisesRegex(
RuntimeError, "No such file or directory: 'tst'"):
read_file('tst')
def test_write_file(self):
with get_tmp_dir() as d:
fname, content = 'test1.bin', b'TorchVision\211\n'
fpath = os.path.join(d, fname)
content_tensor = torch.tensor(list(content), dtype=torch.uint8)
write_file(fpath, content_tensor)
with open(fpath, 'rb') as f:
saved_content = f.read()
self.assertEqual(content, saved_content)
os.unlink(fpath)
if __name__ == '__main__':
unittest.main()
import os
import contextlib
import tempfile
import torch
import torchvision.datasets.utils as utils
import torchvision.io as io
from torchvision import get_video_backend
import unittest
import warnings
from urllib.error import URLError
from common_utils import get_tmp_dir
try:
import av
# Do a version test too
io.video._check_av_available()
except ImportError:
av = None
def _create_video_frames(num_frames, height, width):
y, x = torch.meshgrid(torch.linspace(-2, 2, height), torch.linspace(-2, 2, width))
data = []
for i in range(num_frames):
xc = float(i) / num_frames
yc = 1 - float(i) / (2 * num_frames)
d = torch.exp(-((x - xc) ** 2 + (y - yc) ** 2) / 2) * 255
data.append(d.unsqueeze(2).repeat(1, 1, 3).byte())
return torch.stack(data, 0)
@contextlib.contextmanager
def temp_video(num_frames, height, width, fps, lossless=False, video_codec=None, options=None):
if lossless:
if video_codec is not None:
raise ValueError("video_codec can't be specified together with lossless")
if options is not None:
raise ValueError("options can't be specified together with lossless")
video_codec = 'libx264rgb'
options = {'crf': '0'}
if video_codec is None:
if get_video_backend() == "pyav":
video_codec = 'libx264'
else:
# when video_codec is not set, we assume it is libx264rgb which accepts
# RGB pixel formats as input instead of YUV
video_codec = 'libx264rgb'
if options is None:
options = {}
data = _create_video_frames(num_frames, height, width)
with tempfile.NamedTemporaryFile(suffix='.mp4') as f:
f.close()
io.write_video(f.name, data, fps=fps, video_codec=video_codec, options=options)
yield f.name, data
os.unlink(f.name)
@unittest.skipIf(get_video_backend() != "pyav" and not io._HAS_VIDEO_OPT,
"video_reader backend not available")
@unittest.skipIf(av is None, "PyAV unavailable")
class TestIO(unittest.TestCase):
# compression adds artifacts, thus we add a tolerance of
# 6 in 0-255 range
TOLERANCE = 6
def test_write_read_video(self):
with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
lv, _, info = io.read_video(f_name)
self.assertTrue(data.equal(lv))
self.assertEqual(info["video_fps"], 5)
@unittest.skipIf(not io._HAS_VIDEO_OPT, "video_reader backend is not chosen")
def test_probe_video_from_file(self):
with temp_video(10, 300, 300, 5) as (f_name, data):
video_info = io._probe_video_from_file(f_name)
self.assertAlmostEqual(video_info.video_duration, 2, delta=0.1)
self.assertAlmostEqual(video_info.video_fps, 5, delta=0.1)
@unittest.skipIf(not io._HAS_VIDEO_OPT, "video_reader backend is not chosen")
def test_probe_video_from_memory(self):
with temp_video(10, 300, 300, 5) as (f_name, data):
with open(f_name, "rb") as fp:
filebuffer = fp.read()
video_info = io._probe_video_from_memory(filebuffer)
self.assertAlmostEqual(video_info.video_duration, 2, delta=0.1)
self.assertAlmostEqual(video_info.video_fps, 5, delta=0.1)
def test_read_timestamps(self):
with temp_video(10, 300, 300, 5) as (f_name, data):
pts, _ = io.read_video_timestamps(f_name)
# note: not all formats/codecs provide accurate information for computing the
# timestamps. For the format that we use here, this information is available,
# so we use it as a baseline
container = av.open(f_name)
stream = container.streams[0]
pts_step = int(round(float(1 / (stream.average_rate * stream.time_base))))
num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration)))
expected_pts = [i * pts_step for i in range(num_frames)]
self.assertEqual(pts, expected_pts)
container.close()
def test_read_partial_video(self):
with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
pts, _ = io.read_video_timestamps(f_name)
for start in range(5):
for offset in range(1, 4):
lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1])
s_data = data[start:(start + offset)]
self.assertEqual(len(lv), offset)
self.assertTrue(s_data.equal(lv))
if get_video_backend() == "pyav":
# for "video_reader" backend, we don't decode the closest early frame
# when the given start pts is not matching any frame pts
lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7])
self.assertEqual(len(lv), 4)
self.assertTrue(data[4:8].equal(lv))
def test_read_partial_video_bframes(self):
# do not use lossless encoding, to test the presence of B-frames
options = {'bframes': '16', 'keyint': '10', 'min-keyint': '4'}
with temp_video(100, 300, 300, 5, options=options) as (f_name, data):
pts, _ = io.read_video_timestamps(f_name)
for start in range(0, 80, 20):
for offset in range(1, 4):
lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1])
s_data = data[start:(start + offset)]
self.assertEqual(len(lv), offset)
self.assertTrue((s_data.float() - lv.float()).abs().max() < self.TOLERANCE)
lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7])
# TODO fix this
if get_video_backend() == 'pyav':
self.assertEqual(len(lv), 4)
self.assertTrue((data[4:8].float() - lv.float()).abs().max() < self.TOLERANCE)
else:
self.assertEqual(len(lv), 3)
self.assertTrue((data[5:8].float() - lv.float()).abs().max() < self.TOLERANCE)
def test_read_packed_b_frames_divx_file(self):
with get_tmp_dir() as temp_dir:
name = "hmdb51_Turnk_r_Pippi_Michel_cartwheel_f_cm_np2_le_med_6.avi"
f_name = os.path.join(temp_dir, name)
url = "https://download.pytorch.org/vision_tests/io/" + name
try:
utils.download_url(url, temp_dir)
pts, fps = io.read_video_timestamps(f_name)
self.assertEqual(pts, sorted(pts))
self.assertEqual(fps, 30)
except URLError:
msg = "could not download test file '{}'".format(url)
warnings.warn(msg, RuntimeWarning)
raise unittest.SkipTest(msg)
def test_read_timestamps_from_packet(self):
with temp_video(10, 300, 300, 5, video_codec='mpeg4') as (f_name, data):
pts, _ = io.read_video_timestamps(f_name)
# note: not all formats/codecs provide accurate information for computing the
# timestamps. For the format that we use here, this information is available,
# so we use it as a baseline
container = av.open(f_name)
stream = container.streams[0]
# make sure we went through the optimized codepath
self.assertIn(b'Lavc', stream.codec_context.extradata)
pts_step = int(round(float(1 / (stream.average_rate * stream.time_base))))
num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration)))
expected_pts = [i * pts_step for i in range(num_frames)]
self.assertEqual(pts, expected_pts)
container.close()
def test_read_video_pts_unit_sec(self):
with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
lv, _, info = io.read_video(f_name, pts_unit='sec')
self.assertTrue(data.equal(lv))
self.assertEqual(info["video_fps"], 5)
self.assertEqual(info, {"video_fps": 5})
def test_read_timestamps_pts_unit_sec(self):
with temp_video(10, 300, 300, 5) as (f_name, data):
pts, _ = io.read_video_timestamps(f_name, pts_unit='sec')
container = av.open(f_name)
stream = container.streams[0]
pts_step = int(round(float(1 / (stream.average_rate * stream.time_base))))
num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration)))
expected_pts = [i * pts_step * stream.time_base for i in range(num_frames)]
self.assertEqual(pts, expected_pts)
container.close()
def test_read_partial_video_pts_unit_sec(self):
with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
pts, _ = io.read_video_timestamps(f_name, pts_unit='sec')
for start in range(5):
for offset in range(1, 4):
lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1], pts_unit='sec')
s_data = data[start:(start + offset)]
self.assertEqual(len(lv), offset)
self.assertTrue(s_data.equal(lv))
container = av.open(f_name)
stream = container.streams[0]
lv, _, _ = io.read_video(f_name,
int(pts[4] * (1.0 / stream.time_base) + 1) * stream.time_base, pts[7],
pts_unit='sec')
if get_video_backend() == "pyav":
# for "video_reader" backend, we don't decode the closest early frame
# when the given start pts is not matching any frame pts
self.assertEqual(len(lv), 4)
self.assertTrue(data[4:8].equal(lv))
container.close()
def test_read_video_corrupted_file(self):
with tempfile.NamedTemporaryFile(suffix='.mp4') as f:
f.write(b'This is not an mpg4 file')
video, audio, info = io.read_video(f.name)
self.assertIsInstance(video, torch.Tensor)
self.assertIsInstance(audio, torch.Tensor)
self.assertEqual(video.numel(), 0)
self.assertEqual(audio.numel(), 0)
self.assertEqual(info, {})
def test_read_video_timestamps_corrupted_file(self):
with tempfile.NamedTemporaryFile(suffix='.mp4') as f:
f.write(b'This is not an mpg4 file')
video_pts, video_fps = io.read_video_timestamps(f.name)
self.assertEqual(video_pts, [])
self.assertIs(video_fps, None)
@unittest.skip("Temporarily disabled due to new pyav")
def test_read_video_partially_corrupted_file(self):
with temp_video(5, 4, 4, 5, lossless=True) as (f_name, data):
with open(f_name, 'r+b') as f:
size = os.path.getsize(f_name)
bytes_to_overwrite = size // 10
# seek to the middle of the file
f.seek(5 * bytes_to_overwrite)
# corrupt 10% of the file from the middle
f.write(b'\xff' * bytes_to_overwrite)
# this exercises the container.decode assertion check
video, audio, info = io.read_video(f.name, pts_unit='sec')
# check that size is not equal to 5, but 3
# TODO fix this
if get_video_backend() == 'pyav':
self.assertEqual(len(video), 3)
else:
self.assertEqual(len(video), 4)
# but the valid decoded content is still correct
self.assertTrue(video[:3].equal(data[:3]))
# and the last few frames are wrong
self.assertFalse(video.equal(data))
# TODO add tests for audio
if __name__ == '__main__':
unittest.main()
import unittest
from torchvision import set_video_backend
import test_io
# Disabling the video backend switching temporarily
# set_video_backend('video_reader')
if __name__ == '__main__':
suite = unittest.TestLoader().loadTestsFromModule(test_io)
unittest.TextTestRunner(verbosity=1).run(suite)
#include <torch/script.h>
#include <torch/torch.h>
#include <iostream>
#include "../torchvision/csrc/models/models.h"
using namespace vision::models;
template <typename Model>
torch::Tensor forward_model(const std::string& input_path, torch::Tensor x) {
Model network;
torch::load(network, input_path);
network->eval();
return network->forward(x);
}
torch::Tensor forward_alexnet(const std::string& input_path, torch::Tensor x) {
return forward_model<AlexNet>(input_path, x);
}
torch::Tensor forward_vgg11(const std::string& input_path, torch::Tensor x) {
return forward_model<VGG11>(input_path, x);
}
torch::Tensor forward_vgg13(const std::string& input_path, torch::Tensor x) {
return forward_model<VGG13>(input_path, x);
}
torch::Tensor forward_vgg16(const std::string& input_path, torch::Tensor x) {
return forward_model<VGG16>(input_path, x);
}
torch::Tensor forward_vgg19(const std::string& input_path, torch::Tensor x) {
return forward_model<VGG19>(input_path, x);
}
torch::Tensor forward_vgg11bn(const std::string& input_path, torch::Tensor x) {
return forward_model<VGG11BN>(input_path, x);
}
torch::Tensor forward_vgg13bn(const std::string& input_path, torch::Tensor x) {
return forward_model<VGG13BN>(input_path, x);
}
torch::Tensor forward_vgg16bn(const std::string& input_path, torch::Tensor x) {
return forward_model<VGG16BN>(input_path, x);
}
torch::Tensor forward_vgg19bn(const std::string& input_path, torch::Tensor x) {
return forward_model<VGG19BN>(input_path, x);
}
torch::Tensor forward_resnet18(const std::string& input_path, torch::Tensor x) {
return forward_model<ResNet18>(input_path, x);
}
torch::Tensor forward_resnet34(const std::string& input_path, torch::Tensor x) {
return forward_model<ResNet34>(input_path, x);
}
torch::Tensor forward_resnet50(const std::string& input_path, torch::Tensor x) {
return forward_model<ResNet50>(input_path, x);
}
torch::Tensor forward_resnet101(
const std::string& input_path,
torch::Tensor x) {
return forward_model<ResNet101>(input_path, x);
}
torch::Tensor forward_resnet152(
const std::string& input_path,
torch::Tensor x) {
return forward_model<ResNet152>(input_path, x);
}
torch::Tensor forward_resnext50_32x4d(
const std::string& input_path,
torch::Tensor x) {
return forward_model<ResNext50_32x4d>(input_path, x);
}
torch::Tensor forward_resnext101_32x8d(
const std::string& input_path,
torch::Tensor x) {
return forward_model<ResNext101_32x8d>(input_path, x);
}
torch::Tensor forward_wide_resnet50_2(
const std::string& input_path,
torch::Tensor x) {
return forward_model<WideResNet50_2>(input_path, x);
}
torch::Tensor forward_wide_resnet101_2(
const std::string& input_path,
torch::Tensor x) {
return forward_model<WideResNet101_2>(input_path, x);
}
torch::Tensor forward_squeezenet1_0(
const std::string& input_path,
torch::Tensor x) {
return forward_model<SqueezeNet1_0>(input_path, x);
}
torch::Tensor forward_squeezenet1_1(
const std::string& input_path,
torch::Tensor x) {
return forward_model<SqueezeNet1_1>(input_path, x);
}
torch::Tensor forward_densenet121(
const std::string& input_path,
torch::Tensor x) {
return forward_model<DenseNet121>(input_path, x);
}
torch::Tensor forward_densenet169(
const std::string& input_path,
torch::Tensor x) {
return forward_model<DenseNet169>(input_path, x);
}
torch::Tensor forward_densenet201(
const std::string& input_path,
torch::Tensor x) {
return forward_model<DenseNet201>(input_path, x);
}
torch::Tensor forward_densenet161(
const std::string& input_path,
torch::Tensor x) {
return forward_model<DenseNet161>(input_path, x);
}
torch::Tensor forward_mobilenetv2(
const std::string& input_path,
torch::Tensor x) {
return forward_model<MobileNetV2>(input_path, x);
}
torch::Tensor forward_googlenet(
const std::string& input_path,
torch::Tensor x) {
GoogLeNet network;
torch::load(network, input_path);
network->eval();
return network->forward(x).output;
}
torch::Tensor forward_inceptionv3(
const std::string& input_path,
torch::Tensor x) {
InceptionV3 network;
torch::load(network, input_path);
network->eval();
return network->forward(x).output;
}
torch::Tensor forward_mnasnet0_5(const std::string& input_path, torch::Tensor x) {
return forward_model<MNASNet0_5>(input_path, x);
}
torch::Tensor forward_mnasnet0_75(const std::string& input_path, torch::Tensor x) {
return forward_model<MNASNet0_75>(input_path, x);
}
torch::Tensor forward_mnasnet1_0(const std::string& input_path, torch::Tensor x) {
return forward_model<MNASNet1_0>(input_path, x);
}
torch::Tensor forward_mnasnet1_3(const std::string& input_path, torch::Tensor x) {
return forward_model<MNASNet1_3>(input_path, x);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward_alexnet", &forward_alexnet, "forward_alexnet");
m.def("forward_vgg11", &forward_vgg11, "forward_vgg11");
m.def("forward_vgg13", &forward_vgg13, "forward_vgg13");
m.def("forward_vgg16", &forward_vgg16, "forward_vgg16");
m.def("forward_vgg19", &forward_vgg19, "forward_vgg19");
m.def("forward_vgg11bn", &forward_vgg11bn, "forward_vgg11bn");
m.def("forward_vgg13bn", &forward_vgg13bn, "forward_vgg13bn");
m.def("forward_vgg16bn", &forward_vgg16bn, "forward_vgg16bn");
m.def("forward_vgg19bn", &forward_vgg19bn, "forward_vgg19bn");
m.def("forward_resnet18", &forward_resnet18, "forward_resnet18");
m.def("forward_resnet34", &forward_resnet34, "forward_resnet34");
m.def("forward_resnet50", &forward_resnet50, "forward_resnet50");
m.def("forward_resnet101", &forward_resnet101, "forward_resnet101");
m.def("forward_resnet152", &forward_resnet152, "forward_resnet152");
m.def(
"forward_resnext50_32x4d",
&forward_resnext50_32x4d,
"forward_resnext50_32x4d");
m.def(
"forward_resnext101_32x8d",
&forward_resnext101_32x8d,
"forward_resnext101_32x8d");
m.def(
"forward_wide_resnet50_2",
&forward_wide_resnet50_2,
"forward_wide_resnet50_2");
m.def(
"forward_wide_resnet101_2",
&forward_wide_resnet101_2,
"forward_wide_resnet101_2");
m.def(
"forward_squeezenet1_0", &forward_squeezenet1_0, "forward_squeezenet1_0");
m.def(
"forward_squeezenet1_1", &forward_squeezenet1_1, "forward_squeezenet1_1");
m.def("forward_densenet121", &forward_densenet121, "forward_densenet121");
m.def("forward_densenet169", &forward_densenet169, "forward_densenet169");
m.def("forward_densenet201", &forward_densenet201, "forward_densenet201");
m.def("forward_densenet161", &forward_densenet161, "forward_densenet161");
m.def("forward_mobilenetv2", &forward_mobilenetv2, "forward_mobilenetv2");
m.def("forward_googlenet", &forward_googlenet, "forward_googlenet");
m.def("forward_inceptionv3", &forward_inceptionv3, "forward_inceptionv3");
m.def("forward_mnasnet0_5", &forward_mnasnet0_5, "forward_mnasnet0_5");
m.def("forward_mnasnet0_75", &forward_mnasnet0_75, "forward_mnasnet0_75");
m.def("forward_mnasnet1_0", &forward_mnasnet1_0, "forward_mnasnet1_0");
m.def("forward_mnasnet1_3", &forward_mnasnet1_3, "forward_mnasnet1_3");
}
from common_utils import TestCase, map_nested_tensor_object, freeze_rng_state
from collections import OrderedDict
from itertools import product
import torch
import torch.nn as nn
import numpy as np
from torchvision import models
import unittest
import traceback
import random
def set_rng_seed(seed):
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)
def get_available_classification_models():
# TODO add a registration mechanism to torchvision.models
return [k for k, v in models.__dict__.items() if callable(v) and k[0].lower() == k[0] and k[0] != "_"]
def get_available_segmentation_models():
# TODO add a registration mechanism to torchvision.models
return [k for k, v in models.segmentation.__dict__.items() if callable(v) and k[0].lower() == k[0] and k[0] != "_"]
def get_available_detection_models():
# TODO add a registration mechanism to torchvision.models
return [k for k, v in models.detection.__dict__.items() if callable(v) and k[0].lower() == k[0] and k[0] != "_"]
def get_available_video_models():
# TODO add a registration mechanism to torchvision.models
return [k for k, v in models.video.__dict__.items() if callable(v) and k[0].lower() == k[0] and k[0] != "_"]
# models that are in torch hub, as well as r3d_18. we tried testing all models
# but the test was too slow. not included are detection models, because
# they are not yet supported in JIT.
# If 'unwrapper' is provided it will be called with the script model outputs
# before they are compared to the eager model outputs. This is useful if the
# model outputs are different between TorchScript / Eager mode
script_test_models = {
'deeplabv3_resnet50': {},
'deeplabv3_resnet101': {},
'mobilenet_v2': {},
'resnext50_32x4d': {},
'fcn_resnet50': {},
'fcn_resnet101': {},
'googlenet': {
'unwrapper': lambda x: x.logits
},
'densenet121': {},
'resnet18': {},
'alexnet': {},
'shufflenet_v2_x1_0': {},
'squeezenet1_0': {},
'vgg11': {},
'inception_v3': {
'unwrapper': lambda x: x.logits
},
'r3d_18': {},
"fasterrcnn_resnet50_fpn": {
'unwrapper': lambda x: x[1]
},
"maskrcnn_resnet50_fpn": {
'unwrapper': lambda x: x[1]
},
"keypointrcnn_resnet50_fpn": {
'unwrapper': lambda x: x[1]
},
"retinanet_resnet50_fpn": {
'unwrapper': lambda x: x[1]
}
}
# The following models exhibit flaky numerics under autocast in _test_*_model harnesses.
# This may be caused by the harness environment (e.g. num classes, input initialization
# via torch.rand), and does not prove autocast is unsuitable when training with real data
# (autocast has been used successfully with real data for some of these models).
# TODO: investigate why autocast numerics are flaky in the harnesses.
#
# For the following models, _test_*_model harnesses skip numerical checks on outputs when
# trying autocast. However, they still try an autocasted forward pass, so they still ensure
# autocast coverage suffices to prevent dtype errors in each model.
autocast_flaky_numerics = (
"fasterrcnn_resnet50_fpn",
"inception_v3",
"keypointrcnn_resnet50_fpn",
"maskrcnn_resnet50_fpn",
"resnet101",
"resnet152",
"wide_resnet101_2",
"retinanet_resnet50_fpn",
)
class ModelTester(TestCase):
def checkModule(self, model, name, args):
if name not in script_test_models:
return
unwrapper = script_test_models[name].get('unwrapper', None)
return super(ModelTester, self).checkModule(model, args, unwrapper=unwrapper, skip=False)
def _test_classification_model(self, name, input_shape, dev):
set_rng_seed(0)
# passing num_class equal to a number other than 1000 helps in making the test
# more enforcing in nature
model = models.__dict__[name](num_classes=50)
model.eval().to(device=dev)
# RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests
x = torch.rand(input_shape).to(device=dev)
out = model(x)
self.assertExpected(out.cpu(), prec=0.1, strip_suffix="_" + dev)
self.assertEqual(out.shape[-1], 50)
self.checkModule(model, name, (x,))
if dev == "cuda":
with torch.cuda.amp.autocast():
out = model(x)
# See autocast_flaky_numerics comment at top of file.
if name not in autocast_flaky_numerics:
self.assertExpected(out.cpu(), prec=0.1, strip_suffix="_" + dev)
self.assertEqual(out.shape[-1], 50)
def _test_segmentation_model(self, name, dev):
# passing num_class equal to a number other than 1000 helps in making the test
# more enforcing in nature
model = models.segmentation.__dict__[name](num_classes=50, pretrained_backbone=False)
model.eval().to(device=dev)
input_shape = (1, 3, 300, 300)
# RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests
x = torch.rand(input_shape).to(device=dev)
out = model(x)
self.assertEqual(tuple(out["out"].shape), (1, 50, 300, 300))
self.checkModule(model, name, (x,))
if dev == "cuda":
with torch.cuda.amp.autocast():
out = model(x)
self.assertEqual(tuple(out["out"].shape), (1, 50, 300, 300))
def _test_detection_model(self, name, dev):
set_rng_seed(0)
kwargs = {}
if "retinanet" in name:
kwargs["score_thresh"] = 0.013
model = models.detection.__dict__[name](num_classes=50, pretrained_backbone=False, **kwargs)
model.eval().to(device=dev)
input_shape = (3, 300, 300)
# RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests
x = torch.rand(input_shape).to(device=dev)
model_input = [x]
out = model(model_input)
self.assertIs(model_input[0], x)
def check_out(out):
self.assertEqual(len(out), 1)
def subsample_tensor(tensor):
num_elems = tensor.numel()
num_samples = 20
if num_elems <= num_samples:
return tensor
flat_tensor = tensor.flatten()
ith_index = num_elems // num_samples
return flat_tensor[ith_index - 1::ith_index]
def compute_mean_std(tensor):
# can't compute mean of integral tensor
tensor = tensor.to(torch.double)
mean = torch.mean(tensor)
std = torch.std(tensor)
return {"mean": mean, "std": std}
if name == "maskrcnn_resnet50_fpn":
# maskrcnn_resnet_50_fpn numerically unstable across platforms, so for now
# compare results with mean and std
test_value = map_nested_tensor_object(out, tensor_map_fn=compute_mean_std)
# mean values are small, use large prec
self.assertExpected(test_value, prec=.01, strip_suffix="_" + dev)
else:
self.assertExpected(map_nested_tensor_object(out, tensor_map_fn=subsample_tensor),
prec=0.01,
strip_suffix="_" + dev)
check_out(out)
scripted_model = torch.jit.script(model)
scripted_model.eval()
scripted_out = scripted_model(model_input)[1]
self.assertEqual(scripted_out[0]["boxes"], out[0]["boxes"])
self.assertEqual(scripted_out[0]["scores"], out[0]["scores"])
# labels currently float in script: need to investigate (though same result)
self.assertEqual(scripted_out[0]["labels"].to(dtype=torch.long), out[0]["labels"])
self.assertTrue("boxes" in out[0])
self.assertTrue("scores" in out[0])
self.assertTrue("labels" in out[0])
# don't check script because we are compiling it here:
# TODO: refactor tests
# self.check_script(model, name)
self.checkModule(model, name, ([x],))
if dev == "cuda":
with torch.cuda.amp.autocast():
out = model(model_input)
# See autocast_flaky_numerics comment at top of file.
if name not in autocast_flaky_numerics:
check_out(out)
def _test_detection_model_validation(self, name):
set_rng_seed(0)
model = models.detection.__dict__[name](num_classes=50, pretrained_backbone=False)
input_shape = (3, 300, 300)
x = [torch.rand(input_shape)]
# validate that targets are present in training
self.assertRaises(ValueError, model, x)
# validate type
targets = [{'boxes': 0.}]
self.assertRaises(ValueError, model, x, targets=targets)
# validate boxes shape
for boxes in (torch.rand((4,)), torch.rand((1, 5))):
targets = [{'boxes': boxes}]
self.assertRaises(ValueError, model, x, targets=targets)
# validate that no degenerate boxes are present
boxes = torch.tensor([[1, 3, 1, 4], [2, 4, 3, 4]])
targets = [{'boxes': boxes}]
self.assertRaises(ValueError, model, x, targets=targets)
def _test_video_model(self, name, dev):
# the default input shape is
# bs * num_channels * clip_len * h *w
input_shape = (1, 3, 4, 112, 112)
# test both basicblock and Bottleneck
model = models.video.__dict__[name](num_classes=50)
model.eval().to(device=dev)
# RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests
x = torch.rand(input_shape).to(device=dev)
out = model(x)
self.checkModule(model, name, (x,))
self.assertEqual(out.shape[-1], 50)
if dev == "cuda":
with torch.cuda.amp.autocast():
out = model(x)
self.assertEqual(out.shape[-1], 50)
def _make_sliced_model(self, model, stop_layer):
layers = OrderedDict()
for name, layer in model.named_children():
layers[name] = layer
if name == stop_layer:
break
new_model = torch.nn.Sequential(layers)
return new_model
def test_memory_efficient_densenet(self):
input_shape = (1, 3, 300, 300)
x = torch.rand(input_shape)
for name in ['densenet121', 'densenet169', 'densenet201', 'densenet161']:
model1 = models.__dict__[name](num_classes=50, memory_efficient=True)
params = model1.state_dict()
num_params = sum([x.numel() for x in model1.parameters()])
model1.eval()
out1 = model1(x)
out1.sum().backward()
num_grad = sum([x.grad.numel() for x in model1.parameters() if x.grad is not None])
model2 = models.__dict__[name](num_classes=50, memory_efficient=False)
model2.load_state_dict(params)
model2.eval()
out2 = model2(x)
max_diff = (out1 - out2).abs().max()
self.assertTrue(num_params == num_grad)
self.assertTrue(max_diff < 1e-5)
def test_resnet_dilation(self):
# TODO improve tests to also check that each layer has the right dimensionality
for i in product([False, True], [False, True], [False, True]):
model = models.__dict__["resnet50"](replace_stride_with_dilation=i)
model = self._make_sliced_model(model, stop_layer="layer4")
model.eval()
x = torch.rand(1, 3, 224, 224)
out = model(x)
f = 2 ** sum(i)
self.assertEqual(out.shape, (1, 2048, 7 * f, 7 * f))
def test_mobilenetv2_residual_setting(self):
model = models.__dict__["mobilenet_v2"](inverted_residual_setting=[[1, 16, 1, 1], [6, 24, 2, 2]])
model.eval()
x = torch.rand(1, 3, 224, 224)
out = model(x)
self.assertEqual(out.shape[-1], 1000)
def test_mobilenetv2_norm_layer(self):
model = models.__dict__["mobilenet_v2"]()
self.assertTrue(any(isinstance(x, nn.BatchNorm2d) for x in model.modules()))
def get_gn(num_channels):
return nn.GroupNorm(32, num_channels)
model = models.__dict__["mobilenet_v2"](norm_layer=get_gn)
self.assertFalse(any(isinstance(x, nn.BatchNorm2d) for x in model.modules()))
self.assertTrue(any(isinstance(x, nn.GroupNorm) for x in model.modules()))
def test_fasterrcnn_double(self):
model = models.detection.fasterrcnn_resnet50_fpn(num_classes=50, pretrained_backbone=False)
model.double()
model.eval()
input_shape = (3, 300, 300)
x = torch.rand(input_shape, dtype=torch.float64)
model_input = [x]
out = model(model_input)
self.assertIs(model_input[0], x)
self.assertEqual(len(out), 1)
self.assertTrue("boxes" in out[0])
self.assertTrue("scores" in out[0])
self.assertTrue("labels" in out[0])
def test_googlenet_eval(self):
m = torch.jit.script(models.googlenet(pretrained=True).eval())
self.checkModule(m, "googlenet", torch.rand(1, 3, 224, 224))
@unittest.skipIf(not torch.cuda.is_available(), 'needs GPU')
def test_fasterrcnn_switch_devices(self):
def checkOut(out):
self.assertEqual(len(out), 1)
self.assertTrue("boxes" in out[0])
self.assertTrue("scores" in out[0])
self.assertTrue("labels" in out[0])
model = models.detection.fasterrcnn_resnet50_fpn(num_classes=50, pretrained_backbone=False)
model.cuda()
model.eval()
input_shape = (3, 300, 300)
x = torch.rand(input_shape, device='cuda')
model_input = [x]
out = model(model_input)
self.assertIs(model_input[0], x)
checkOut(out)
with torch.cuda.amp.autocast():
out = model(model_input)
checkOut(out)
# now switch to cpu and make sure it works
model.cpu()
x = x.cpu()
out_cpu = model([x])
checkOut(out_cpu)
def test_generalizedrcnn_transform_repr(self):
min_size, max_size = 224, 299
image_mean = [0.485, 0.456, 0.406]
image_std = [0.229, 0.224, 0.225]
t = models.detection.transform.GeneralizedRCNNTransform(min_size=min_size,
max_size=max_size,
image_mean=image_mean,
image_std=image_std)
# Check integrity of object __repr__ attribute
expected_string = 'GeneralizedRCNNTransform('
_indent = '\n '
expected_string += '{0}Normalize(mean={1}, std={2})'.format(_indent, image_mean, image_std)
expected_string += '{0}Resize(min_size=({1},), max_size={2}, '.format(_indent, min_size, max_size)
expected_string += "mode='bilinear')\n)"
self.assertEqual(t.__repr__(), expected_string)
_devs = ["cpu", "cuda"] if torch.cuda.is_available() else ["cpu"]
for model_name in get_available_classification_models():
for dev in _devs:
# for-loop bodies don't define scopes, so we have to save the variables
# we want to close over in some way
def do_test(self, model_name=model_name, dev=dev):
input_shape = (1, 3, 224, 224)
if model_name in ['inception_v3']:
input_shape = (1, 3, 299, 299)
self._test_classification_model(model_name, input_shape, dev)
setattr(ModelTester, "test_" + model_name + "_" + dev, do_test)
for model_name in get_available_segmentation_models():
for dev in _devs:
# for-loop bodies don't define scopes, so we have to save the variables
# we want to close over in some way
def do_test(self, model_name=model_name, dev=dev):
self._test_segmentation_model(model_name, dev)
setattr(ModelTester, "test_" + model_name + "_" + dev, do_test)
for model_name in get_available_detection_models():
for dev in _devs:
# for-loop bodies don't define scopes, so we have to save the variables
# we want to close over in some way
def do_test(self, model_name=model_name, dev=dev):
self._test_detection_model(model_name, dev)
setattr(ModelTester, "test_" + model_name + "_" + dev, do_test)
def do_validation_test(self, model_name=model_name):
self._test_detection_model_validation(model_name)
setattr(ModelTester, "test_" + model_name + "_validation", do_validation_test)
for model_name in get_available_video_models():
for dev in _devs:
def do_test(self, model_name=model_name, dev=dev):
self._test_video_model(model_name, dev)
setattr(ModelTester, "test_" + model_name + "_" + dev, do_test)
if __name__ == '__main__':
unittest.main()
import torch
import torchvision.models
from torchvision.ops import MultiScaleRoIAlign
from torchvision.models.detection.rpn import AnchorGenerator, RPNHead, RegionProposalNetwork
from torchvision.models.detection.roi_heads import RoIHeads
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, TwoMLPHead
import unittest
class Tester(unittest.TestCase):
def _make_empty_sample(self, add_masks=False, add_keypoints=False):
images = [torch.rand((3, 100, 100), dtype=torch.float32)]
boxes = torch.zeros((0, 4), dtype=torch.float32)
negative_target = {"boxes": boxes,
"labels": torch.zeros(0, dtype=torch.int64),
"image_id": 4,
"area": (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]),
"iscrowd": torch.zeros((0,), dtype=torch.int64)}
if add_masks:
negative_target["masks"] = torch.zeros(0, 100, 100, dtype=torch.uint8)
if add_keypoints:
negative_target["keypoints"] = torch.zeros(17, 0, 3, dtype=torch.float32)
targets = [negative_target]
return images, targets
def test_targets_to_anchors(self):
_, targets = self._make_empty_sample()
anchors = [torch.randint(-50, 50, (3, 4), dtype=torch.float32)]
anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
rpn_anchor_generator = AnchorGenerator(
anchor_sizes, aspect_ratios
)
rpn_head = RPNHead(4, rpn_anchor_generator.num_anchors_per_location()[0])
head = RegionProposalNetwork(
rpn_anchor_generator, rpn_head,
0.5, 0.3,
256, 0.5,
2000, 2000, 0.7)
labels, matched_gt_boxes = head.assign_targets_to_anchors(anchors, targets)
self.assertEqual(labels[0].sum(), 0)
self.assertEqual(labels[0].shape, torch.Size([anchors[0].shape[0]]))
self.assertEqual(labels[0].dtype, torch.float32)
self.assertEqual(matched_gt_boxes[0].sum(), 0)
self.assertEqual(matched_gt_boxes[0].shape, anchors[0].shape)
self.assertEqual(matched_gt_boxes[0].dtype, torch.float32)
def test_assign_targets_to_proposals(self):
proposals = [torch.randint(-50, 50, (20, 4), dtype=torch.float32)]
gt_boxes = [torch.zeros((0, 4), dtype=torch.float32)]
gt_labels = [torch.tensor([[0]], dtype=torch.int64)]
box_roi_pool = MultiScaleRoIAlign(
featmap_names=['0', '1', '2', '3'],
output_size=7,
sampling_ratio=2)
resolution = box_roi_pool.output_size[0]
representation_size = 1024
box_head = TwoMLPHead(
4 * resolution ** 2,
representation_size)
representation_size = 1024
box_predictor = FastRCNNPredictor(
representation_size,
2)
roi_heads = RoIHeads(
# Box
box_roi_pool, box_head, box_predictor,
0.5, 0.5,
512, 0.25,
None,
0.05, 0.5, 100)
matched_idxs, labels = roi_heads.assign_targets_to_proposals(proposals, gt_boxes, gt_labels)
self.assertEqual(matched_idxs[0].sum(), 0)
self.assertEqual(matched_idxs[0].shape, torch.Size([proposals[0].shape[0]]))
self.assertEqual(matched_idxs[0].dtype, torch.int64)
self.assertEqual(labels[0].sum(), 0)
self.assertEqual(labels[0].shape, torch.Size([proposals[0].shape[0]]))
self.assertEqual(labels[0].dtype, torch.int64)
def test_forward_negative_sample_frcnn(self):
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
num_classes=2, min_size=100, max_size=100)
images, targets = self._make_empty_sample()
loss_dict = model(images, targets)
self.assertEqual(loss_dict["loss_box_reg"], torch.tensor(0.))
self.assertEqual(loss_dict["loss_rpn_box_reg"], torch.tensor(0.))
def test_forward_negative_sample_mrcnn(self):
model = torchvision.models.detection.maskrcnn_resnet50_fpn(
num_classes=2, min_size=100, max_size=100)
images, targets = self._make_empty_sample(add_masks=True)
loss_dict = model(images, targets)
self.assertEqual(loss_dict["loss_box_reg"], torch.tensor(0.))
self.assertEqual(loss_dict["loss_rpn_box_reg"], torch.tensor(0.))
self.assertEqual(loss_dict["loss_mask"], torch.tensor(0.))
def test_forward_negative_sample_krcnn(self):
model = torchvision.models.detection.keypointrcnn_resnet50_fpn(
num_classes=2, min_size=100, max_size=100)
images, targets = self._make_empty_sample(add_keypoints=True)
loss_dict = model(images, targets)
self.assertEqual(loss_dict["loss_box_reg"], torch.tensor(0.))
self.assertEqual(loss_dict["loss_rpn_box_reg"], torch.tensor(0.))
self.assertEqual(loss_dict["loss_keypoint"], torch.tensor(0.))
if __name__ == '__main__':
unittest.main()
import copy
import torch
from torchvision.models.detection import _utils
from torchvision.models.detection.transform import GeneralizedRCNNTransform
import unittest
from torchvision.models.detection import fasterrcnn_resnet50_fpn, maskrcnn_resnet50_fpn, keypointrcnn_resnet50_fpn
class Tester(unittest.TestCase):
def test_balanced_positive_negative_sampler(self):
sampler = _utils.BalancedPositiveNegativeSampler(4, 0.25)
# keep all 6 negatives first, then add 3 positives, last two are ignore
matched_idxs = [torch.tensor([0, 0, 0, 0, 0, 0, 1, 1, 1, -1, -1])]
pos, neg = sampler(matched_idxs)
# we know the number of elements that should be sampled for the positive (1)
# and the negative (3), and their location. Let's make sure that they are
# there
self.assertEqual(pos[0].sum(), 1)
self.assertEqual(pos[0][6:9].sum(), 1)
self.assertEqual(neg[0].sum(), 3)
self.assertEqual(neg[0][0:6].sum(), 3)
def test_fasterrcnn_resnet50_fpn_frozen_layers(self):
# we know how many initial layers and parameters of the network should
# be frozen for each trainable_backbone_layers paramter value
# i.e all 53 params are frozen if trainable_backbone_layers=0
# ad first 24 params are frozen if trainable_backbone_layers=2
expected_frozen_params = {0: 53, 1: 43, 2: 24, 3: 11, 4: 1, 5: 0}
for train_layers, exp_froz_params in expected_frozen_params.items():
model = fasterrcnn_resnet50_fpn(pretrained=True, progress=False,
num_classes=91, pretrained_backbone=False,
trainable_backbone_layers=train_layers)
# boolean list that is true if the param at that index is frozen
is_frozen = [not parameter.requires_grad for _, parameter in model.named_parameters()]
# check that expected initial number of layers are frozen
self.assertTrue(all(is_frozen[:exp_froz_params]))
def test_maskrcnn_resnet50_fpn_frozen_layers(self):
# we know how many initial layers and parameters of the maskrcnn should
# be frozen for each trainable_backbone_layers paramter value
# i.e all 53 params are frozen if trainable_backbone_layers=0
# ad first 24 params are frozen if trainable_backbone_layers=2
expected_frozen_params = {0: 53, 1: 43, 2: 24, 3: 11, 4: 1, 5: 0}
for train_layers, exp_froz_params in expected_frozen_params.items():
model = maskrcnn_resnet50_fpn(pretrained=True, progress=False,
num_classes=91, pretrained_backbone=False,
trainable_backbone_layers=train_layers)
# boolean list that is true if the parameter at that index is frozen
is_frozen = [not parameter.requires_grad for _, parameter in model.named_parameters()]
# check that expected initial number of layers in maskrcnn are frozen
self.assertTrue(all(is_frozen[:exp_froz_params]))
def test_keypointrcnn_resnet50_fpn_frozen_layers(self):
# we know how many initial layers and parameters of the keypointrcnn should
# be frozen for each trainable_backbone_layers paramter value
# i.e all 53 params are frozen if trainable_backbone_layers=0
# ad first 24 params are frozen if trainable_backbone_layers=2
expected_frozen_params = {0: 53, 1: 43, 2: 24, 3: 11, 4: 1, 5: 0}
for train_layers, exp_froz_params in expected_frozen_params.items():
model = keypointrcnn_resnet50_fpn(pretrained=True, progress=False,
num_classes=2, pretrained_backbone=False,
trainable_backbone_layers=train_layers)
# boolean list that is true if the parameter at that index is frozen
is_frozen = [not parameter.requires_grad for _, parameter in model.named_parameters()]
# check that expected initial number of layers in keypointrcnn are frozen
self.assertTrue(all(is_frozen[:exp_froz_params]))
def test_transform_copy_targets(self):
transform = GeneralizedRCNNTransform(300, 500, torch.zeros(3), torch.ones(3))
image = [torch.rand(3, 200, 300), torch.rand(3, 200, 200)]
targets = [{'boxes': torch.rand(3, 4)}, {'boxes': torch.rand(2, 4)}]
targets_copy = copy.deepcopy(targets)
out = transform(image, targets) # noqa: F841
self.assertTrue(torch.equal(targets[0]['boxes'], targets_copy[0]['boxes']))
self.assertTrue(torch.equal(targets[1]['boxes'], targets_copy[1]['boxes']))
if __name__ == '__main__':
unittest.main()
import io
import torch
from torchvision import ops
from torchvision import models
from torchvision.models.detection.image_list import ImageList
from torchvision.models.detection.transform import GeneralizedRCNNTransform
from torchvision.models.detection.rpn import AnchorGenerator, RPNHead, RegionProposalNetwork
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.models.detection.roi_heads import RoIHeads
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, TwoMLPHead
from torchvision.models.detection.mask_rcnn import MaskRCNNHeads, MaskRCNNPredictor
from collections import OrderedDict
# onnxruntime requires python 3.5 or above
try:
import onnxruntime
except ImportError:
onnxruntime = None
import unittest
from torchvision.ops._register_onnx_ops import _onnx_opset_version
@unittest.skipIf(onnxruntime is None, 'ONNX Runtime unavailable')
class ONNXExporterTester(unittest.TestCase):
@classmethod
def setUpClass(cls):
torch.manual_seed(123)
def run_model(self, model, inputs_list, tolerate_small_mismatch=False, do_constant_folding=True, dynamic_axes=None,
output_names=None, input_names=None):
model.eval()
onnx_io = io.BytesIO()
# export to onnx with the first input
torch.onnx.export(model, inputs_list[0], onnx_io,
do_constant_folding=do_constant_folding, opset_version=_onnx_opset_version,
dynamic_axes=dynamic_axes, input_names=input_names, output_names=output_names)
# validate the exported model with onnx runtime
for test_inputs in inputs_list:
with torch.no_grad():
if isinstance(test_inputs, torch.Tensor) or \
isinstance(test_inputs, list):
test_inputs = (test_inputs,)
test_ouputs = model(*test_inputs)
if isinstance(test_ouputs, torch.Tensor):
test_ouputs = (test_ouputs,)
self.ort_validate(onnx_io, test_inputs, test_ouputs, tolerate_small_mismatch)
def ort_validate(self, onnx_io, inputs, outputs, tolerate_small_mismatch=False):
inputs, _ = torch.jit._flatten(inputs)
outputs, _ = torch.jit._flatten(outputs)
def to_numpy(tensor):
if tensor.requires_grad:
return tensor.detach().cpu().numpy()
else:
return tensor.cpu().numpy()
inputs = list(map(to_numpy, inputs))
outputs = list(map(to_numpy, outputs))
ort_session = onnxruntime.InferenceSession(onnx_io.getvalue())
# compute onnxruntime output prediction
ort_inputs = dict((ort_session.get_inputs()[i].name, inpt) for i, inpt in enumerate(inputs))
ort_outs = ort_session.run(None, ort_inputs)
for i in range(0, len(outputs)):
try:
torch.testing.assert_allclose(outputs[i], ort_outs[i], rtol=1e-03, atol=1e-05)
except AssertionError as error:
if tolerate_small_mismatch:
self.assertIn("(0.00%)", str(error), str(error))
else:
raise
@unittest.skip("Disable test until Split w/ zero sizes is implemented in ORT")
def test_new_empty_tensor(self):
class Module(torch.nn.Module):
def __init__(self):
super(Module, self).__init__()
self.conv2 = ops.misc.ConvTranspose2d(16, 33, (3, 5))
def forward(self, input2):
return self.conv2(input2)
input = torch.rand(0, 16, 10, 10)
test_input = torch.rand(0, 16, 20, 20)
self.run_model(Module(), [(input, ), (test_input,)], do_constant_folding=False)
def test_nms(self):
boxes = torch.rand(5, 4)
boxes[:, 2:] += torch.rand(5, 2)
scores = torch.randn(5)
class Module(torch.nn.Module):
def forward(self, boxes, scores):
return ops.nms(boxes, scores, 0.5)
self.run_model(Module(), [(boxes, scores)])
def test_clip_boxes_to_image(self):
boxes = torch.randn(5, 4) * 500
boxes[:, 2:] += boxes[:, :2]
size = torch.randn(200, 300)
size_2 = torch.randn(300, 400)
class Module(torch.nn.Module):
def forward(self, boxes, size):
return ops.boxes.clip_boxes_to_image(boxes, size.shape)
self.run_model(Module(), [(boxes, size), (boxes, size_2)],
input_names=["boxes", "size"],
dynamic_axes={"size": [0, 1]})
def test_roi_align(self):
x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
single_roi = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
model = ops.RoIAlign((5, 5), 1, 2)
self.run_model(model, [(x, single_roi)])
def test_roi_align_aligned(self):
x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
single_roi = torch.tensor([[0, 1.5, 1.5, 3, 3]], dtype=torch.float32)
model = ops.RoIAlign((5, 5), 1, 2, aligned=True)
self.run_model(model, [(x, single_roi)])
x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
single_roi = torch.tensor([[0, 0.2, 0.3, 4.5, 3.5]], dtype=torch.float32)
model = ops.RoIAlign((5, 5), 0.5, 3, aligned=True)
self.run_model(model, [(x, single_roi)])
x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
single_roi = torch.tensor([[0, 0.2, 0.3, 4.5, 3.5]], dtype=torch.float32)
model = ops.RoIAlign((5, 5), 1.8, 2, aligned=True)
self.run_model(model, [(x, single_roi)])
x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
single_roi = torch.tensor([[0, 0.2, 0.3, 4.5, 3.5]], dtype=torch.float32)
model = ops.RoIAlign((2, 2), 2.5, 0, aligned=True)
self.run_model(model, [(x, single_roi)])
@unittest.skip # Issue in exporting ROIAlign with aligned = True for malformed boxes
def test_roi_align_malformed_boxes(self):
x = torch.randn(1, 1, 10, 10, dtype=torch.float32)
single_roi = torch.tensor([[0, 2, 0.3, 1.5, 1.5]], dtype=torch.float32)
model = ops.RoIAlign((5, 5), 1, 1, aligned=True)
self.run_model(model, [(x, single_roi)])
def test_roi_pool(self):
x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
rois = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
pool_h = 5
pool_w = 5
model = ops.RoIPool((pool_h, pool_w), 2)
self.run_model(model, [(x, rois)])
def test_resize_images(self):
class TransformModule(torch.nn.Module):
def __init__(self_module):
super(TransformModule, self_module).__init__()
self_module.transform = self._init_test_generalized_rcnn_transform()
def forward(self_module, images):
return self_module.transform.resize(images, None)[0]
input = torch.rand(3, 10, 20)
input_test = torch.rand(3, 100, 150)
self.run_model(TransformModule(), [(input,), (input_test,)],
input_names=["input1"], dynamic_axes={"input1": [0, 1, 2, 3]})
def test_transform_images(self):
class TransformModule(torch.nn.Module):
def __init__(self_module):
super(TransformModule, self_module).__init__()
self_module.transform = self._init_test_generalized_rcnn_transform()
def forward(self_module, images):
return self_module.transform(images)[0].tensors
input = torch.rand(3, 100, 200), torch.rand(3, 200, 200)
input_test = torch.rand(3, 100, 200), torch.rand(3, 200, 200)
self.run_model(TransformModule(), [(input,), (input_test,)])
def _init_test_generalized_rcnn_transform(self):
min_size = 100
max_size = 200
image_mean = [0.485, 0.456, 0.406]
image_std = [0.229, 0.224, 0.225]
transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)
return transform
def _init_test_rpn(self):
anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
out_channels = 256
rpn_head = RPNHead(out_channels, rpn_anchor_generator.num_anchors_per_location()[0])
rpn_fg_iou_thresh = 0.7
rpn_bg_iou_thresh = 0.3
rpn_batch_size_per_image = 256
rpn_positive_fraction = 0.5
rpn_pre_nms_top_n = dict(training=2000, testing=1000)
rpn_post_nms_top_n = dict(training=2000, testing=1000)
rpn_nms_thresh = 0.7
rpn = RegionProposalNetwork(
rpn_anchor_generator, rpn_head,
rpn_fg_iou_thresh, rpn_bg_iou_thresh,
rpn_batch_size_per_image, rpn_positive_fraction,
rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh)
return rpn
def _init_test_roi_heads_faster_rcnn(self):
out_channels = 256
num_classes = 91
box_fg_iou_thresh = 0.5
box_bg_iou_thresh = 0.5
box_batch_size_per_image = 512
box_positive_fraction = 0.25
bbox_reg_weights = None
box_score_thresh = 0.05
box_nms_thresh = 0.5
box_detections_per_img = 100
box_roi_pool = ops.MultiScaleRoIAlign(
featmap_names=['0', '1', '2', '3'],
output_size=7,
sampling_ratio=2)
resolution = box_roi_pool.output_size[0]
representation_size = 1024
box_head = TwoMLPHead(
out_channels * resolution ** 2,
representation_size)
representation_size = 1024
box_predictor = FastRCNNPredictor(
representation_size,
num_classes)
roi_heads = RoIHeads(
box_roi_pool, box_head, box_predictor,
box_fg_iou_thresh, box_bg_iou_thresh,
box_batch_size_per_image, box_positive_fraction,
bbox_reg_weights,
box_score_thresh, box_nms_thresh, box_detections_per_img)
return roi_heads
def get_features(self, images):
s0, s1 = images.shape[-2:]
features = [
('0', torch.rand(2, 256, s0 // 4, s1 // 4)),
('1', torch.rand(2, 256, s0 // 8, s1 // 8)),
('2', torch.rand(2, 256, s0 // 16, s1 // 16)),
('3', torch.rand(2, 256, s0 // 32, s1 // 32)),
('4', torch.rand(2, 256, s0 // 64, s1 // 64)),
]
features = OrderedDict(features)
return features
def test_rpn(self):
class RPNModule(torch.nn.Module):
def __init__(self_module):
super(RPNModule, self_module).__init__()
self_module.rpn = self._init_test_rpn()
def forward(self_module, images, features):
images = ImageList(images, [i.shape[-2:] for i in images])
return self_module.rpn(images, features)
images = torch.rand(2, 3, 150, 150)
features = self.get_features(images)
images2 = torch.rand(2, 3, 80, 80)
test_features = self.get_features(images2)
model = RPNModule()
model.eval()
model(images, features)
self.run_model(model, [(images, features), (images2, test_features)], tolerate_small_mismatch=True,
input_names=["input1", "input2", "input3", "input4", "input5", "input6"],
dynamic_axes={"input1": [0, 1, 2, 3], "input2": [0, 1, 2, 3],
"input3": [0, 1, 2, 3], "input4": [0, 1, 2, 3],
"input5": [0, 1, 2, 3], "input6": [0, 1, 2, 3]})
def test_multi_scale_roi_align(self):
class TransformModule(torch.nn.Module):
def __init__(self):
super(TransformModule, self).__init__()
self.model = ops.MultiScaleRoIAlign(['feat1', 'feat2'], 3, 2)
self.image_sizes = [(512, 512)]
def forward(self, input, boxes):
return self.model(input, boxes, self.image_sizes)
i = OrderedDict()
i['feat1'] = torch.rand(1, 5, 64, 64)
i['feat2'] = torch.rand(1, 5, 16, 16)
boxes = torch.rand(6, 4) * 256
boxes[:, 2:] += boxes[:, :2]
i1 = OrderedDict()
i1['feat1'] = torch.rand(1, 5, 64, 64)
i1['feat2'] = torch.rand(1, 5, 16, 16)
boxes1 = torch.rand(6, 4) * 256
boxes1[:, 2:] += boxes1[:, :2]
self.run_model(TransformModule(), [(i, [boxes],), (i1, [boxes1],)])
def test_roi_heads(self):
class RoiHeadsModule(torch.nn.Module):
def __init__(self_module):
super(RoiHeadsModule, self_module).__init__()
self_module.transform = self._init_test_generalized_rcnn_transform()
self_module.rpn = self._init_test_rpn()
self_module.roi_heads = self._init_test_roi_heads_faster_rcnn()
def forward(self_module, images, features):
original_image_sizes = [img.shape[-2:] for img in images]
images = ImageList(images, [i.shape[-2:] for i in images])
proposals, _ = self_module.rpn(images, features)
detections, _ = self_module.roi_heads(features, proposals, images.image_sizes)
detections = self_module.transform.postprocess(detections,
images.image_sizes,
original_image_sizes)
return detections
images = torch.rand(2, 3, 100, 100)
features = self.get_features(images)
images2 = torch.rand(2, 3, 150, 150)
test_features = self.get_features(images2)
model = RoiHeadsModule()
model.eval()
model(images, features)
self.run_model(model, [(images, features), (images2, test_features)], tolerate_small_mismatch=True,
input_names=["input1", "input2", "input3", "input4", "input5", "input6"],
dynamic_axes={"input1": [0, 1, 2, 3], "input2": [0, 1, 2, 3], "input3": [0, 1, 2, 3],
"input4": [0, 1, 2, 3], "input5": [0, 1, 2, 3], "input6": [0, 1, 2, 3]})
def get_image_from_url(self, url, size=None):
import requests
from PIL import Image
from io import BytesIO
from torchvision import transforms
data = requests.get(url)
image = Image.open(BytesIO(data.content)).convert("RGB")
if size is None:
size = (300, 200)
image = image.resize(size, Image.BILINEAR)
to_tensor = transforms.ToTensor()
return to_tensor(image)
def get_test_images(self):
image_url = "http://farm3.staticflickr.com/2469/3915380994_2e611b1779_z.jpg"
image = self.get_image_from_url(url=image_url, size=(100, 320))
image_url2 = "https://pytorch.org/tutorials/_static/img/tv_tutorial/tv_image05.png"
image2 = self.get_image_from_url(url=image_url2, size=(250, 380))
images = [image]
test_images = [image2]
return images, test_images
def test_faster_rcnn(self):
images, test_images = self.get_test_images()
dummy_image = [torch.ones(3, 100, 100) * 0.3]
model = models.detection.faster_rcnn.fasterrcnn_resnet50_fpn(pretrained=True, min_size=200, max_size=300)
model.eval()
model(images)
# Test exported model on images of different size, or dummy input
self.run_model(model, [(images,), (test_images,), (dummy_image,)], input_names=["images_tensors"],
output_names=["outputs"],
dynamic_axes={"images_tensors": [0, 1, 2, 3], "outputs": [0, 1, 2, 3]},
tolerate_small_mismatch=True)
# Test exported model for an image with no detections on other images
self.run_model(model, [(dummy_image,), (images,)], input_names=["images_tensors"],
output_names=["outputs"],
dynamic_axes={"images_tensors": [0, 1, 2, 3], "outputs": [0, 1, 2, 3]},
tolerate_small_mismatch=True)
# Verify that paste_mask_in_image beahves the same in tracing.
# This test also compares both paste_masks_in_image and _onnx_paste_masks_in_image
# (since jit_trace witll call _onnx_paste_masks_in_image).
def test_paste_mask_in_image(self):
# disable profiling
torch._C._jit_set_profiling_executor(False)
torch._C._jit_set_profiling_mode(False)
masks = torch.rand(10, 1, 26, 26)
boxes = torch.rand(10, 4)
boxes[:, 2:] += torch.rand(10, 2)
boxes *= 50
o_im_s = (100, 100)
from torchvision.models.detection.roi_heads import paste_masks_in_image
out = paste_masks_in_image(masks, boxes, o_im_s)
jit_trace = torch.jit.trace(paste_masks_in_image,
(masks, boxes,
[torch.tensor(o_im_s[0]),
torch.tensor(o_im_s[1])]))
out_trace = jit_trace(masks, boxes, [torch.tensor(o_im_s[0]), torch.tensor(o_im_s[1])])
assert torch.all(out.eq(out_trace))
masks2 = torch.rand(20, 1, 26, 26)
boxes2 = torch.rand(20, 4)
boxes2[:, 2:] += torch.rand(20, 2)
boxes2 *= 100
o_im_s2 = (200, 200)
from torchvision.models.detection.roi_heads import paste_masks_in_image
out2 = paste_masks_in_image(masks2, boxes2, o_im_s2)
out_trace2 = jit_trace(masks2, boxes2, [torch.tensor(o_im_s2[0]), torch.tensor(o_im_s2[1])])
assert torch.all(out2.eq(out_trace2))
def test_mask_rcnn(self):
images, test_images = self.get_test_images()
dummy_image = [torch.ones(3, 100, 100) * 0.3]
model = models.detection.mask_rcnn.maskrcnn_resnet50_fpn(pretrained=True, min_size=200, max_size=300)
model.eval()
model(images)
# Test exported model on images of different size, or dummy input
self.run_model(model, [(images,), (test_images,), (dummy_image,)],
input_names=["images_tensors"],
output_names=["boxes", "labels", "scores", "masks"],
dynamic_axes={"images_tensors": [0, 1, 2, 3], "boxes": [0, 1], "labels": [0],
"scores": [0], "masks": [0, 1, 2, 3]},
tolerate_small_mismatch=True)
# TODO: enable this test once dynamic model export is fixed
# Test exported model for an image with no detections on other images
self.run_model(model, [(dummy_image,), (images,)],
input_names=["images_tensors"],
output_names=["boxes", "labels", "scores", "masks"],
dynamic_axes={"images_tensors": [0, 1, 2, 3], "boxes": [0, 1], "labels": [0],
"scores": [0], "masks": [0, 1, 2, 3]},
tolerate_small_mismatch=True)
# Verify that heatmaps_to_keypoints behaves the same in tracing.
# This test also compares both heatmaps_to_keypoints and _onnx_heatmaps_to_keypoints
# (since jit_trace witll call _heatmaps_to_keypoints).
# @unittest.skip("Disable test until Resize bug fixed in ORT")
def test_heatmaps_to_keypoints(self):
# disable profiling
torch._C._jit_set_profiling_executor(False)
torch._C._jit_set_profiling_mode(False)
maps = torch.rand(10, 1, 26, 26)
rois = torch.rand(10, 4)
from torchvision.models.detection.roi_heads import heatmaps_to_keypoints
out = heatmaps_to_keypoints(maps, rois)
jit_trace = torch.jit.trace(heatmaps_to_keypoints, (maps, rois))
out_trace = jit_trace(maps, rois)
assert torch.all(out[0].eq(out_trace[0]))
assert torch.all(out[1].eq(out_trace[1]))
maps2 = torch.rand(20, 2, 21, 21)
rois2 = torch.rand(20, 4)
from torchvision.models.detection.roi_heads import heatmaps_to_keypoints
out2 = heatmaps_to_keypoints(maps2, rois2)
out_trace2 = jit_trace(maps2, rois2)
assert torch.all(out2[0].eq(out_trace2[0]))
assert torch.all(out2[1].eq(out_trace2[1]))
def test_keypoint_rcnn(self):
images, test_images = self.get_test_images()
dummy_images = [torch.ones(3, 100, 100) * 0.3]
model = models.detection.keypoint_rcnn.keypointrcnn_resnet50_fpn(pretrained=True, min_size=200, max_size=300)
model.eval()
model(images)
self.run_model(model, [(images,), (test_images,), (dummy_images,)],
input_names=["images_tensors"],
output_names=["outputs1", "outputs2", "outputs3", "outputs4"],
dynamic_axes={"images_tensors": [0, 1, 2, 3]},
tolerate_small_mismatch=True)
self.run_model(model, [(dummy_images,), (test_images,)],
input_names=["images_tensors"],
output_names=["outputs1", "outputs2", "outputs3", "outputs4"],
dynamic_axes={"images_tensors": [0, 1, 2, 3]},
tolerate_small_mismatch=True)
if __name__ == '__main__':
unittest.main()
import math
import unittest
import numpy as np
import torch
from torch import Tensor
from torch.autograd import gradcheck
from torch.jit.annotations import Tuple
from torch.nn.modules.utils import _pair
from torchvision import ops
class OpTester(object):
@classmethod
def setUpClass(cls):
cls.dtype = torch.float64
def test_forward_cpu_contiguous(self):
self._test_forward(device=torch.device('cpu'), contiguous=True)
def test_forward_cpu_non_contiguous(self):
self._test_forward(device=torch.device('cpu'), contiguous=False)
def test_backward_cpu_contiguous(self):
self._test_backward(device=torch.device('cpu'), contiguous=True)
def test_backward_cpu_non_contiguous(self):
self._test_backward(device=torch.device('cpu'), contiguous=False)
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_forward_cuda_contiguous(self):
self._test_forward(device=torch.device('cuda'), contiguous=True)
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_forward_cuda_non_contiguous(self):
self._test_forward(device=torch.device('cuda'), contiguous=False)
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_backward_cuda_contiguous(self):
self._test_backward(device=torch.device('cuda'), contiguous=True)
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_backward_cuda_non_contiguous(self):
self._test_backward(device=torch.device('cuda'), contiguous=False)
def _test_forward(self, device, contiguous):
pass
def _test_backward(self, device, contiguous):
pass
class RoIOpTester(OpTester):
def _test_forward(self, device, contiguous, x_dtype=None, rois_dtype=None):
x_dtype = self.dtype if x_dtype is None else x_dtype
rois_dtype = self.dtype if rois_dtype is None else rois_dtype
pool_size = 5
# n_channels % (pool_size ** 2) == 0 required for PS opeartions.
n_channels = 2 * (pool_size ** 2)
x = torch.rand(2, n_channels, 10, 10, dtype=x_dtype, device=device)
if not contiguous:
x = x.permute(0, 1, 3, 2)
rois = torch.tensor([[0, 0, 0, 9, 9], # format is (xyxy)
[0, 0, 5, 4, 9],
[0, 5, 5, 9, 9],
[1, 0, 0, 9, 9]],
dtype=rois_dtype, device=device)
pool_h, pool_w = pool_size, pool_size
y = self.fn(x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1)
# the following should be true whether we're running an autocast test or not.
self.assertTrue(y.dtype == x.dtype)
gt_y = self.expected_fn(x, rois, pool_h, pool_w, spatial_scale=1,
sampling_ratio=-1, device=device, dtype=self.dtype)
tol = 1e-3 if (x_dtype is torch.half or rois_dtype is torch.half) else 1e-5
self.assertTrue(torch.allclose(gt_y.to(y.dtype), y, rtol=tol, atol=tol))
def _test_backward(self, device, contiguous):
pool_size = 2
x = torch.rand(1, 2 * (pool_size ** 2), 5, 5, dtype=self.dtype, device=device, requires_grad=True)
if not contiguous:
x = x.permute(0, 1, 3, 2)
rois = torch.tensor([[0, 0, 0, 4, 4], # format is (xyxy)
[0, 0, 2, 3, 4],
[0, 2, 2, 4, 4]],
dtype=self.dtype, device=device)
def func(z):
return self.fn(z, rois, pool_size, pool_size, spatial_scale=1, sampling_ratio=1)
script_func = self.get_script_fn(rois, pool_size)
self.assertTrue(gradcheck(func, (x,)))
self.assertTrue(gradcheck(script_func, (x,)))
def test_boxes_shape(self):
self._test_boxes_shape()
def _helper_boxes_shape(self, func):
# test boxes as Tensor[N, 5]
with self.assertRaises(AssertionError):
a = torch.linspace(1, 8 * 8, 8 * 8).reshape(1, 1, 8, 8)
boxes = torch.tensor([[0, 0, 3, 3]], dtype=a.dtype)
func(a, boxes, output_size=(2, 2))
# test boxes as List[Tensor[N, 4]]
with self.assertRaises(AssertionError):
a = torch.linspace(1, 8 * 8, 8 * 8).reshape(1, 1, 8, 8)
boxes = torch.tensor([[0, 0, 3]], dtype=a.dtype)
ops.roi_pool(a, [boxes], output_size=(2, 2))
def fn(*args, **kwargs):
pass
def get_script_fn(*args, **kwargs):
pass
def expected_fn(*args, **kwargs):
pass
class RoIPoolTester(RoIOpTester, unittest.TestCase):
def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
return ops.RoIPool((pool_h, pool_w), spatial_scale)(x, rois)
def get_script_fn(self, rois, pool_size):
@torch.jit.script
def script_fn(input, rois, pool_size):
# type: (Tensor, Tensor, int) -> Tensor
return ops.roi_pool(input, rois, pool_size, 1.0)[0]
return lambda x: script_fn(x, rois, pool_size)
def expected_fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1,
device=None, dtype=torch.float64):
if device is None:
device = torch.device("cpu")
n_channels = x.size(1)
y = torch.zeros(rois.size(0), n_channels, pool_h, pool_w, dtype=dtype, device=device)
def get_slice(k, block):
return slice(int(np.floor(k * block)), int(np.ceil((k + 1) * block)))
for roi_idx, roi in enumerate(rois):
batch_idx = int(roi[0])
j_begin, i_begin, j_end, i_end = (int(round(x.item() * spatial_scale)) for x in roi[1:])
roi_x = x[batch_idx, :, i_begin:i_end + 1, j_begin:j_end + 1]
roi_h, roi_w = roi_x.shape[-2:]
bin_h = roi_h / pool_h
bin_w = roi_w / pool_w
for i in range(0, pool_h):
for j in range(0, pool_w):
bin_x = roi_x[:, get_slice(i, bin_h), get_slice(j, bin_w)]
if bin_x.numel() > 0:
y[roi_idx, :, i, j] = bin_x.reshape(n_channels, -1).max(dim=1)[0]
return y
def _test_boxes_shape(self):
self._helper_boxes_shape(ops.roi_pool)
class PSRoIPoolTester(RoIOpTester, unittest.TestCase):
def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
return ops.PSRoIPool((pool_h, pool_w), 1)(x, rois)
def get_script_fn(self, rois, pool_size):
@torch.jit.script
def script_fn(input, rois, pool_size):
# type: (Tensor, Tensor, int) -> Tensor
return ops.ps_roi_pool(input, rois, pool_size, 1.0)[0]
return lambda x: script_fn(x, rois, pool_size)
def expected_fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1,
device=None, dtype=torch.float64):
if device is None:
device = torch.device("cpu")
n_input_channels = x.size(1)
self.assertEqual(n_input_channels % (pool_h * pool_w), 0, "input channels must be divisible by ph * pw")
n_output_channels = int(n_input_channels / (pool_h * pool_w))
y = torch.zeros(rois.size(0), n_output_channels, pool_h, pool_w, dtype=dtype, device=device)
def get_slice(k, block):
return slice(int(np.floor(k * block)), int(np.ceil((k + 1) * block)))
for roi_idx, roi in enumerate(rois):
batch_idx = int(roi[0])
j_begin, i_begin, j_end, i_end = (int(round(x.item() * spatial_scale)) for x in roi[1:])
roi_x = x[batch_idx, :, i_begin:i_end + 1, j_begin:j_end + 1]
roi_height = max(i_end - i_begin, 1)
roi_width = max(j_end - j_begin, 1)
bin_h, bin_w = roi_height / float(pool_h), roi_width / float(pool_w)
for i in range(0, pool_h):
for j in range(0, pool_w):
bin_x = roi_x[:, get_slice(i, bin_h), get_slice(j, bin_w)]
if bin_x.numel() > 0:
area = bin_x.size(-2) * bin_x.size(-1)
for c_out in range(0, n_output_channels):
c_in = c_out * (pool_h * pool_w) + pool_w * i + j
t = torch.sum(bin_x[c_in, :, :])
y[roi_idx, c_out, i, j] = t / area
return y
def _test_boxes_shape(self):
self._helper_boxes_shape(ops.ps_roi_pool)
def bilinear_interpolate(data, y, x, snap_border=False):
height, width = data.shape
if snap_border:
if -1 < y <= 0:
y = 0
elif height - 1 <= y < height:
y = height - 1
if -1 < x <= 0:
x = 0
elif width - 1 <= x < width:
x = width - 1
y_low = int(math.floor(y))
x_low = int(math.floor(x))
y_high = y_low + 1
x_high = x_low + 1
wy_h = y - y_low
wx_h = x - x_low
wy_l = 1 - wy_h
wx_l = 1 - wx_h
val = 0
for wx, xp in zip((wx_l, wx_h), (x_low, x_high)):
for wy, yp in zip((wy_l, wy_h), (y_low, y_high)):
if 0 <= yp < height and 0 <= xp < width:
val += wx * wy * data[yp, xp]
return val
class RoIAlignTester(RoIOpTester, unittest.TestCase):
def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, aligned=False, **kwargs):
return ops.RoIAlign((pool_h, pool_w), spatial_scale=spatial_scale,
sampling_ratio=sampling_ratio, aligned=aligned)(x, rois)
def get_script_fn(self, rois, pool_size):
@torch.jit.script
def script_fn(input, rois, pool_size):
# type: (Tensor, Tensor, int) -> Tensor
return ops.roi_align(input, rois, pool_size, 1.0)[0]
return lambda x: script_fn(x, rois, pool_size)
def expected_fn(self, in_data, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, aligned=False,
device=None, dtype=torch.float64):
if device is None:
device = torch.device("cpu")
n_channels = in_data.size(1)
out_data = torch.zeros(rois.size(0), n_channels, pool_h, pool_w, dtype=dtype, device=device)
offset = 0.5 if aligned else 0.
for r, roi in enumerate(rois):
batch_idx = int(roi[0])
j_begin, i_begin, j_end, i_end = (x.item() * spatial_scale - offset for x in roi[1:])
roi_h = i_end - i_begin
roi_w = j_end - j_begin
bin_h = roi_h / pool_h
bin_w = roi_w / pool_w
for i in range(0, pool_h):
start_h = i_begin + i * bin_h
grid_h = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_h))
for j in range(0, pool_w):
start_w = j_begin + j * bin_w
grid_w = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_w))
for channel in range(0, n_channels):
val = 0
for iy in range(0, grid_h):
y = start_h + (iy + 0.5) * bin_h / grid_h
for ix in range(0, grid_w):
x = start_w + (ix + 0.5) * bin_w / grid_w
val += bilinear_interpolate(in_data[batch_idx, channel, :, :], y, x, snap_border=True)
val /= grid_h * grid_w
out_data[r, channel, i, j] = val
return out_data
def _test_boxes_shape(self):
self._helper_boxes_shape(ops.roi_align)
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_roi_align_autocast(self):
for x_dtype in (torch.float, torch.half):
for rois_dtype in (torch.float, torch.half):
with torch.cuda.amp.autocast():
self._test_forward(torch.device("cuda"), contiguous=False, x_dtype=x_dtype, rois_dtype=rois_dtype)
class PSRoIAlignTester(RoIOpTester, unittest.TestCase):
def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
return ops.PSRoIAlign((pool_h, pool_w), spatial_scale=spatial_scale,
sampling_ratio=sampling_ratio)(x, rois)
def get_script_fn(self, rois, pool_size):
@torch.jit.script
def script_fn(input, rois, pool_size):
# type: (Tensor, Tensor, int) -> Tensor
return ops.ps_roi_align(input, rois, pool_size, 1.0)[0]
return lambda x: script_fn(x, rois, pool_size)
def expected_fn(self, in_data, rois, pool_h, pool_w, device, spatial_scale=1,
sampling_ratio=-1, dtype=torch.float64):
if device is None:
device = torch.device("cpu")
n_input_channels = in_data.size(1)
self.assertEqual(n_input_channels % (pool_h * pool_w), 0, "input channels must be divisible by ph * pw")
n_output_channels = int(n_input_channels / (pool_h * pool_w))
out_data = torch.zeros(rois.size(0), n_output_channels, pool_h, pool_w, dtype=dtype, device=device)
for r, roi in enumerate(rois):
batch_idx = int(roi[0])
j_begin, i_begin, j_end, i_end = (x.item() * spatial_scale - 0.5 for x in roi[1:])
roi_h = i_end - i_begin
roi_w = j_end - j_begin
bin_h = roi_h / pool_h
bin_w = roi_w / pool_w
for i in range(0, pool_h):
start_h = i_begin + i * bin_h
grid_h = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_h))
for j in range(0, pool_w):
start_w = j_begin + j * bin_w
grid_w = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_w))
for c_out in range(0, n_output_channels):
c_in = c_out * (pool_h * pool_w) + pool_w * i + j
val = 0
for iy in range(0, grid_h):
y = start_h + (iy + 0.5) * bin_h / grid_h
for ix in range(0, grid_w):
x = start_w + (ix + 0.5) * bin_w / grid_w
val += bilinear_interpolate(in_data[batch_idx, c_in, :, :], y, x, snap_border=True)
val /= grid_h * grid_w
out_data[r, c_out, i, j] = val
return out_data
def _test_boxes_shape(self):
self._helper_boxes_shape(ops.ps_roi_align)
class MultiScaleRoIAlignTester(unittest.TestCase):
def test_msroialign_repr(self):
fmap_names = ['0']
output_size = (7, 7)
sampling_ratio = 2
# Pass mock feature map names
t = ops.poolers.MultiScaleRoIAlign(fmap_names, output_size, sampling_ratio)
# Check integrity of object __repr__ attribute
expected_string = (f"MultiScaleRoIAlign(featmap_names={fmap_names}, output_size={output_size}, "
f"sampling_ratio={sampling_ratio})")
self.assertEqual(t.__repr__(), expected_string)
class NMSTester(unittest.TestCase):
def reference_nms(self, boxes, scores, iou_threshold):
"""
Args:
box_scores (N, 5): boxes in corner-form and probabilities.
iou_threshold: intersection over union threshold.
Returns:
picked: a list of indexes of the kept boxes
"""
picked = []
_, indexes = scores.sort(descending=True)
while len(indexes) > 0:
current = indexes[0]
picked.append(current.item())
if len(indexes) == 1:
break
current_box = boxes[current, :]
indexes = indexes[1:]
rest_boxes = boxes[indexes, :]
iou = ops.box_iou(rest_boxes, current_box.unsqueeze(0)).squeeze(1)
indexes = indexes[iou <= iou_threshold]
return torch.as_tensor(picked)
def _create_tensors_with_iou(self, N, iou_thresh):
# force last box to have a pre-defined iou with the first box
# let b0 be [x0, y0, x1, y1], and b1 be [x0, y0, x1 + d, y1],
# then, in order to satisfy ops.iou(b0, b1) == iou_thresh,
# we need to have d = (x1 - x0) * (1 - iou_thresh) / iou_thresh
# Adjust the threshold upward a bit with the intent of creating
# at least one box that exceeds (barely) the threshold and so
# should be suppressed.
boxes = torch.rand(N, 4) * 100
boxes[:, 2:] += boxes[:, :2]
boxes[-1, :] = boxes[0, :]
x0, y0, x1, y1 = boxes[-1].tolist()
iou_thresh += 1e-5
boxes[-1, 2] += (x1 - x0) * (1 - iou_thresh) / iou_thresh
scores = torch.rand(N)
return boxes, scores
def test_nms(self):
err_msg = 'NMS incompatible between CPU and reference implementation for IoU={}'
for iou in [0.2, 0.5, 0.8]:
boxes, scores = self._create_tensors_with_iou(1000, iou)
keep_ref = self.reference_nms(boxes, scores, iou)
keep = ops.nms(boxes, scores, iou)
self.assertTrue(torch.allclose(keep, keep_ref), err_msg.format(iou))
self.assertRaises(RuntimeError, ops.nms, torch.rand(4), torch.rand(3), 0.5)
self.assertRaises(RuntimeError, ops.nms, torch.rand(3, 5), torch.rand(3), 0.5)
self.assertRaises(RuntimeError, ops.nms, torch.rand(3, 4), torch.rand(3, 2), 0.5)
self.assertRaises(RuntimeError, ops.nms, torch.rand(3, 4), torch.rand(4), 0.5)
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_nms_cuda(self):
err_msg = 'NMS incompatible between CPU and CUDA for IoU={}'
for iou in [0.2, 0.5, 0.8]:
boxes, scores = self._create_tensors_with_iou(1000, iou)
r_cpu = ops.nms(boxes, scores, iou)
r_cuda = ops.nms(boxes.cuda(), scores.cuda(), iou)
is_eq = torch.allclose(r_cpu, r_cuda.cpu())
if not is_eq:
# if the indices are not the same, ensure that it's because the scores
# are duplicate
is_eq = torch.allclose(scores[r_cpu], scores[r_cuda.cpu()])
self.assertTrue(is_eq, err_msg.format(iou))
class NewEmptyTensorTester(unittest.TestCase):
def test_new_empty_tensor(self):
input = torch.tensor([2., 2.], requires_grad=True)
new_shape = [3, 3]
out = torch.ops.torchvision._new_empty_tensor_op(input, new_shape)
assert out.size() == torch.Size([3, 3])
assert out.dtype == input.dtype
class DeformConvTester(OpTester, unittest.TestCase):
def expected_fn(self, x, weight, offset, bias, stride=1, padding=0, dilation=1):
stride_h, stride_w = _pair(stride)
pad_h, pad_w = _pair(padding)
dil_h, dil_w = _pair(dilation)
weight_h, weight_w = weight.shape[-2:]
n_batches, n_in_channels, in_h, in_w = x.shape
n_out_channels = weight.shape[0]
out_h = (in_h + 2 * pad_h - (dil_h * (weight_h - 1) + 1)) // stride_h + 1
out_w = (in_w + 2 * pad_w - (dil_w * (weight_w - 1) + 1)) // stride_w + 1
n_offset_grps = offset.shape[1] // (2 * weight_h * weight_w)
in_c_per_offset_grp = n_in_channels // n_offset_grps
n_weight_grps = n_in_channels // weight.shape[1]
in_c_per_weight_grp = weight.shape[1]
out_c_per_weight_grp = n_out_channels // n_weight_grps
out = torch.zeros(n_batches, n_out_channels, out_h, out_w, device=x.device, dtype=x.dtype)
for b in range(n_batches):
for c_out in range(n_out_channels):
for i in range(out_h):
for j in range(out_w):
for di in range(weight_h):
for dj in range(weight_w):
for c in range(in_c_per_weight_grp):
weight_grp = c_out // out_c_per_weight_grp
c_in = weight_grp * in_c_per_weight_grp + c
offset_grp = c_in // in_c_per_offset_grp
offset_idx = 2 * (offset_grp * (weight_h * weight_w) + di * weight_w + dj)
pi = stride_h * i - pad_h + dil_h * di + offset[b, offset_idx, i, j]
pj = stride_w * j - pad_w + dil_w * dj + offset[b, offset_idx + 1, i, j]
out[b, c_out, i, j] += (weight[c_out, c, di, dj] *
bilinear_interpolate(x[b, c_in, :, :], pi, pj))
out += bias.view(1, n_out_channels, 1, 1)
return out
def get_fn_args(self, device, contiguous, batch_sz):
n_in_channels = 6
n_out_channels = 2
n_weight_grps = 2
n_offset_grps = 3
stride = (2, 1)
pad = (1, 0)
dilation = (2, 1)
stride_h, stride_w = stride
pad_h, pad_w = pad
dil_h, dil_w = dilation
weight_h, weight_w = (3, 2)
in_h, in_w = (5, 4)
out_h = (in_h + 2 * pad_h - (dil_h * (weight_h - 1) + 1)) // stride_h + 1
out_w = (in_w + 2 * pad_w - (dil_w * (weight_w - 1) + 1)) // stride_w + 1
x = torch.rand(batch_sz, n_in_channels, in_h, in_w, device=device, dtype=self.dtype, requires_grad=True)
offset = torch.randn(batch_sz, n_offset_grps * 2 * weight_h * weight_w, out_h, out_w,
device=device, dtype=self.dtype, requires_grad=True)
weight = torch.randn(n_out_channels, n_in_channels // n_weight_grps, weight_h, weight_w,
device=device, dtype=self.dtype, requires_grad=True)
bias = torch.randn(n_out_channels, device=device, dtype=self.dtype, requires_grad=True)
if not contiguous:
x = x.permute(0, 1, 3, 2).contiguous().permute(0, 1, 3, 2)
offset = offset.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
weight = weight.permute(3, 2, 0, 1).contiguous().permute(2, 3, 1, 0)
return x, weight, offset, bias, stride, pad, dilation
def _test_forward(self, device, contiguous):
for batch_sz in [0, 33]:
self._test_forward_with_batchsize(device, contiguous, batch_sz)
def _test_forward_with_batchsize(self, device, contiguous, batch_sz):
x, _, offset, _, stride, padding, dilation = self.get_fn_args(device, contiguous, batch_sz)
in_channels = 6
out_channels = 2
kernel_size = (3, 2)
groups = 2
layer = ops.DeformConv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding,
dilation=dilation, groups=groups).to(device=x.device, dtype=x.dtype)
res = layer(x, offset)
weight = layer.weight.data
bias = layer.bias.data
expected = self.expected_fn(x, weight, offset, bias, stride=stride, padding=padding, dilation=dilation)
self.assertTrue(torch.allclose(res, expected), '\nres:\n{}\nexpected:\n{}'.format(res, expected))
# test for wrong sizes
with self.assertRaises(RuntimeError):
wrong_offset = torch.rand_like(offset[:, :2])
res = layer(x, wrong_offset)
def _test_backward(self, device, contiguous):
for batch_sz in [0, 33]:
self._test_backward_with_batchsize(device, contiguous, batch_sz)
def _test_backward_with_batchsize(self, device, contiguous, batch_sz):
x, weight, offset, bias, stride, padding, dilation = self.get_fn_args(device, contiguous, batch_sz)
def func(x_, offset_, weight_, bias_):
return ops.deform_conv2d(x_, offset_, weight_, bias_, stride=stride, padding=padding, dilation=dilation)
gradcheck(func, (x, offset, weight, bias), nondet_tol=1e-5)
@torch.jit.script
def script_func(x_, offset_, weight_, bias_, stride_, pad_, dilation_):
# type: (Tensor, Tensor, Tensor, Tensor, Tuple[int, int], Tuple[int, int], Tuple[int, int]) -> Tensor
return ops.deform_conv2d(x_, offset_, weight_, bias_, stride=stride_, padding=pad_, dilation=dilation_)
gradcheck(lambda z, off, wei, bi: script_func(z, off, wei, bi, stride, padding, dilation),
(x, offset, weight, bias), nondet_tol=1e-5)
# Test from https://github.com/pytorch/vision/issues/2598
# Run on CUDA only
if "cuda" in device.type:
# compare grads computed on CUDA with grads computed on CPU
true_cpu_grads = None
init_weight = torch.randn(9, 9, 3, 3, requires_grad=True)
img = torch.randn(8, 9, 1000, 110)
offset = torch.rand(8, 2 * 3 * 3, 1000, 110)
if not contiguous:
img = img.permute(0, 1, 3, 2).contiguous().permute(0, 1, 3, 2)
offset = offset.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
weight = init_weight.permute(3, 2, 0, 1).contiguous().permute(2, 3, 1, 0)
else:
weight = init_weight
for d in ["cpu", "cuda"]:
out = ops.deform_conv2d(img.to(d), offset.to(d), weight.to(d), padding=1)
out.mean().backward()
if true_cpu_grads is None:
true_cpu_grads = init_weight.grad
self.assertTrue(true_cpu_grads is not None)
else:
self.assertTrue(init_weight.grad is not None)
res_grads = init_weight.grad.to("cpu")
self.assertTrue(true_cpu_grads.allclose(res_grads))
class FrozenBNTester(unittest.TestCase):
def test_frozenbatchnorm2d_repr(self):
num_features = 32
eps = 1e-5
t = ops.misc.FrozenBatchNorm2d(num_features, eps=eps)
# Check integrity of object __repr__ attribute
expected_string = f"FrozenBatchNorm2d({num_features}, eps={eps})"
self.assertEqual(t.__repr__(), expected_string)
def test_frozenbatchnorm2d_eps(self):
sample_size = (4, 32, 28, 28)
x = torch.rand(sample_size)
state_dict = dict(weight=torch.rand(sample_size[1]),
bias=torch.rand(sample_size[1]),
running_mean=torch.rand(sample_size[1]),
running_var=torch.rand(sample_size[1]),
num_batches_tracked=torch.tensor(100))
# Check that default eps is zero for backward-compatibility
fbn = ops.misc.FrozenBatchNorm2d(sample_size[1])
fbn.load_state_dict(state_dict, strict=False)
bn = torch.nn.BatchNorm2d(sample_size[1], eps=0).eval()
bn.load_state_dict(state_dict)
# Difference is expected to fall in an acceptable range
self.assertTrue(torch.allclose(fbn(x), bn(x), atol=1e-6))
# Check computation for eps > 0
fbn = ops.misc.FrozenBatchNorm2d(sample_size[1], eps=1e-5)
fbn.load_state_dict(state_dict, strict=False)
bn = torch.nn.BatchNorm2d(sample_size[1], eps=1e-5).eval()
bn.load_state_dict(state_dict)
self.assertTrue(torch.allclose(fbn(x), bn(x), atol=1e-6))
def test_frozenbatchnorm2d_n_arg(self):
"""Ensure a warning is thrown when passing `n` kwarg
(remove this when support of `n` is dropped)"""
self.assertWarns(DeprecationWarning, ops.misc.FrozenBatchNorm2d, 32, eps=1e-5, n=32)
class BoxConversionTester(unittest.TestCase):
@staticmethod
def _get_box_sequences():
# Define here the argument type of `boxes` supported by region pooling operations
box_tensor = torch.tensor([[0, 0, 0, 100, 100], [1, 0, 0, 100, 100]], dtype=torch.float)
box_list = [torch.tensor([[0, 0, 100, 100]], dtype=torch.float),
torch.tensor([[0, 0, 100, 100]], dtype=torch.float)]
box_tuple = tuple(box_list)
return box_tensor, box_list, box_tuple
def test_check_roi_boxes_shape(self):
# Ensure common sequences of tensors are supported
for box_sequence in self._get_box_sequences():
self.assertIsNone(ops._utils.check_roi_boxes_shape(box_sequence))
def test_convert_boxes_to_roi_format(self):
# Ensure common sequences of tensors yield the same result
ref_tensor = None
for box_sequence in self._get_box_sequences():
if ref_tensor is None:
ref_tensor = box_sequence
else:
self.assertTrue(torch.equal(ref_tensor, ops._utils.convert_boxes_to_roi_format(box_sequence)))
class BoxTester(unittest.TestCase):
def test_bbox_same(self):
box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
[10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
exp_xyxy = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
[10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
box_same = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xyxy")
self.assertEqual(exp_xyxy.size(), torch.Size([4, 4]))
self.assertEqual(exp_xyxy.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_same, exp_xyxy)).item()
box_same = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="xywh")
self.assertEqual(exp_xyxy.size(), torch.Size([4, 4]))
self.assertEqual(exp_xyxy.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_same, exp_xyxy)).item()
box_same = ops.box_convert(box_tensor, in_fmt="cxcywh", out_fmt="cxcywh")
self.assertEqual(exp_xyxy.size(), torch.Size([4, 4]))
self.assertEqual(exp_xyxy.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_same, exp_xyxy)).item()
def test_bbox_xyxy_xywh(self):
# Simple test convert boxes to xywh and back. Make sure they are same.
# box_tensor is in x1 y1 x2 y2 format.
box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
[10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
exp_xywh = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
[10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float)
box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh")
self.assertEqual(exp_xywh.size(), torch.Size([4, 4]))
self.assertEqual(exp_xywh.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_xywh, exp_xywh)).item()
# Reverse conversion
box_xyxy = ops.box_convert(box_xywh, in_fmt="xywh", out_fmt="xyxy")
self.assertEqual(box_xyxy.size(), torch.Size([4, 4]))
self.assertEqual(box_xyxy.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_xyxy, box_tensor)).item()
def test_bbox_xyxy_cxcywh(self):
# Simple test convert boxes to xywh and back. Make sure they are same.
# box_tensor is in x1 y1 x2 y2 format.
box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
[10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
exp_cxcywh = torch.tensor([[50, 50, 100, 100], [0, 0, 0, 0],
[20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float)
box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh")
self.assertEqual(exp_cxcywh.size(), torch.Size([4, 4]))
self.assertEqual(exp_cxcywh.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_cxcywh, exp_cxcywh)).item()
# Reverse conversion
box_xyxy = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xyxy")
self.assertEqual(box_xyxy.size(), torch.Size([4, 4]))
self.assertEqual(box_xyxy.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_xyxy, box_tensor)).item()
def test_bbox_xywh_cxcywh(self):
box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
[10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float)
# This is wrong
exp_cxcywh = torch.tensor([[50, 50, 100, 100], [0, 0, 0, 0],
[20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float)
box_cxcywh = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="cxcywh")
self.assertEqual(exp_cxcywh.size(), torch.Size([4, 4]))
self.assertEqual(exp_cxcywh.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_cxcywh, exp_cxcywh)).item()
# Reverse conversion
box_xywh = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xywh")
self.assertEqual(box_xywh.size(), torch.Size([4, 4]))
self.assertEqual(box_xywh.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_xywh, box_tensor)).item()
def test_bbox_invalid(self):
box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
[10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float)
invalid_infmts = ["xwyh", "cxwyh"]
invalid_outfmts = ["xwcx", "xhwcy"]
for inv_infmt in invalid_infmts:
for inv_outfmt in invalid_outfmts:
self.assertRaises(ValueError, ops.box_convert, box_tensor, inv_infmt, inv_outfmt)
def test_bbox_convert_jit(self):
box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
[10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
scripted_fn = torch.jit.script(ops.box_convert)
TOLERANCE = 1e-3
box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh")
scripted_xywh = scripted_fn(box_tensor, 'xyxy', 'xywh')
self.assertTrue((scripted_xywh - box_xywh).abs().max() < TOLERANCE)
box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh")
scripted_cxcywh = scripted_fn(box_tensor, 'xyxy', 'cxcywh')
self.assertTrue((scripted_cxcywh - box_cxcywh).abs().max() < TOLERANCE)
class BoxAreaTester(unittest.TestCase):
def test_box_area(self):
# A bounding box of area 10000 and a degenerate case
box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0]], dtype=torch.float)
expected = torch.tensor([10000, 0])
calc_area = ops.box_area(box_tensor)
assert calc_area.size() == torch.Size([2])
assert calc_area.dtype == box_tensor.dtype
assert torch.all(torch.eq(calc_area, expected)).item() is True
class BoxIouTester(unittest.TestCase):
def test_iou(self):
# Boxes to test Iou
boxes1 = torch.tensor([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]], dtype=torch.float)
boxes2 = torch.tensor([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]], dtype=torch.float)
# Expected IoU matrix for these boxes
expected = torch.tensor([[1.0, 0.25, 0.0], [0.25, 1.0, 0.0], [0.0, 0.0, 1.0]])
out = ops.box_iou(boxes1, boxes2)
# Check if all elements of tensor are as expected.
assert out.size() == torch.Size([3, 3])
tolerance = 1e-4
assert ((out - expected).abs().max() < tolerance).item() is True
class GenBoxIouTester(unittest.TestCase):
def test_gen_iou(self):
# Test Generalized IoU
boxes1 = torch.tensor([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]], dtype=torch.float)
boxes2 = torch.tensor([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]], dtype=torch.float)
# Expected gIoU matrix for these boxes
expected = torch.tensor([[1.0, 0.25, -0.7778], [0.25, 1.0, -0.8611],
[-0.7778, -0.8611, 1.0]])
out = ops.generalized_box_iou(boxes1, boxes2)
# Check if all elements of tensor are as expected.
assert out.size() == torch.Size([3, 3])
tolerance = 1e-4
assert ((out - expected).abs().max() < tolerance).item() is True
if __name__ == '__main__':
unittest.main()
import torchvision
from common_utils import TestCase, map_nested_tensor_object
from collections import OrderedDict
from itertools import product
import torch
import numpy as np
from torchvision import models
import unittest
import traceback
import random
def set_rng_seed(seed):
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)
def get_available_quantizable_models():
# TODO add a registration mechanism to torchvision.models
return [k for k, v in models.quantization.__dict__.items() if callable(v) and k[0].lower() == k[0] and k[0] != "_"]
# list of models that are not scriptable
scriptable_quantizable_models_blacklist = []
@unittest.skipUnless('fbgemm' in torch.backends.quantized.supported_engines and
'qnnpack' in torch.backends.quantized.supported_engines,
"This Pytorch Build has not been built with fbgemm and qnnpack")
class ModelTester(TestCase):
def check_quantized_model(self, model, input_shape):
x = torch.rand(input_shape)
model(x)
return
def check_script(self, model, name):
if name in scriptable_quantizable_models_blacklist:
return
scriptable = True
msg = ""
try:
torch.jit.script(model)
except Exception as e:
tb = traceback.format_exc()
scriptable = False
msg = str(e) + str(tb)
self.assertTrue(scriptable, msg)
def _test_classification_model(self, name, input_shape):
# First check if quantize=True provides models that can run with input data
model = torchvision.models.quantization.__dict__[name](pretrained=False, quantize=True)
self.check_quantized_model(model, input_shape)
for eval_mode in [True, False]:
model = torchvision.models.quantization.__dict__[name](pretrained=False, quantize=False)
if eval_mode:
model.eval()
model.qconfig = torch.quantization.default_qconfig
else:
model.train()
model.qconfig = torch.quantization.default_qat_qconfig
model.fuse_model()
if eval_mode:
torch.quantization.prepare(model, inplace=True)
else:
torch.quantization.prepare_qat(model, inplace=True)
model.eval()
torch.quantization.convert(model, inplace=True)
self.check_script(model, name)
for model_name in get_available_quantizable_models():
# for-loop bodies don't define scopes, so we have to save the variables
# we want to close over in some way
def do_test(self, model_name=model_name):
input_shape = (1, 3, 224, 224)
if model_name in ['inception_v3']:
input_shape = (1, 3, 299, 299)
self._test_classification_model(model_name, input_shape)
# inception_v3 was causing timeouts on circleci
# See https://github.com/pytorch/vision/issues/1857
if model_name not in ['inception_v3']:
setattr(ModelTester, "test_" + model_name, do_test)
if __name__ == '__main__':
unittest.main()
import os
import torch
import torchvision.transforms as transforms
import torchvision.transforms.functional as F
import torchvision.transforms.functional_tensor as F_t
from torch._utils_internal import get_file_path_2
from numpy.testing import assert_array_almost_equal
import unittest
import math
import random
import numpy as np
from PIL import Image
try:
import accimage
except ImportError:
accimage = None
try:
from scipy import stats
except ImportError:
stats = None
from common_utils import cycle_over, int_dtypes, float_dtypes
GRACE_HOPPER = get_file_path_2(
os.path.dirname(os.path.abspath(__file__)), 'assets', 'grace_hopper_517x606.jpg')
class Tester(unittest.TestCase):
def test_crop(self):
height = random.randint(10, 32) * 2
width = random.randint(10, 32) * 2
oheight = random.randint(5, (height - 2) / 2) * 2
owidth = random.randint(5, (width - 2) / 2) * 2
img = torch.ones(3, height, width)
oh1 = (height - oheight) // 2
ow1 = (width - owidth) // 2
imgnarrow = img[:, oh1:oh1 + oheight, ow1:ow1 + owidth]
imgnarrow.fill_(0)
result = transforms.Compose([
transforms.ToPILImage(),
transforms.CenterCrop((oheight, owidth)),
transforms.ToTensor(),
])(img)
self.assertEqual(result.sum(), 0,
"height: {} width: {} oheight: {} owdith: {}".format(height, width, oheight, owidth))
oheight += 1
owidth += 1
result = transforms.Compose([
transforms.ToPILImage(),
transforms.CenterCrop((oheight, owidth)),
transforms.ToTensor(),
])(img)
sum1 = result.sum()
self.assertGreater(sum1, 1,
"height: {} width: {} oheight: {} owdith: {}".format(height, width, oheight, owidth))
oheight += 1
owidth += 1
result = transforms.Compose([
transforms.ToPILImage(),
transforms.CenterCrop((oheight, owidth)),
transforms.ToTensor(),
])(img)
sum2 = result.sum()
self.assertGreater(sum2, 0,
"height: {} width: {} oheight: {} owdith: {}".format(height, width, oheight, owidth))
self.assertGreater(sum2, sum1,
"height: {} width: {} oheight: {} owdith: {}".format(height, width, oheight, owidth))
def test_five_crop(self):
to_pil_image = transforms.ToPILImage()
h = random.randint(5, 25)
w = random.randint(5, 25)
for single_dim in [True, False]:
crop_h = random.randint(1, h)
crop_w = random.randint(1, w)
if single_dim:
crop_h = min(crop_h, crop_w)
crop_w = crop_h
transform = transforms.FiveCrop(crop_h)
else:
transform = transforms.FiveCrop((crop_h, crop_w))
img = torch.FloatTensor(3, h, w).uniform_()
results = transform(to_pil_image(img))
self.assertEqual(len(results), 5)
for crop in results:
self.assertEqual(crop.size, (crop_w, crop_h))
to_pil_image = transforms.ToPILImage()
tl = to_pil_image(img[:, 0:crop_h, 0:crop_w])
tr = to_pil_image(img[:, 0:crop_h, w - crop_w:])
bl = to_pil_image(img[:, h - crop_h:, 0:crop_w])
br = to_pil_image(img[:, h - crop_h:, w - crop_w:])
center = transforms.CenterCrop((crop_h, crop_w))(to_pil_image(img))
expected_output = (tl, tr, bl, br, center)
self.assertEqual(results, expected_output)
def test_ten_crop(self):
to_pil_image = transforms.ToPILImage()
h = random.randint(5, 25)
w = random.randint(5, 25)
for should_vflip in [True, False]:
for single_dim in [True, False]:
crop_h = random.randint(1, h)
crop_w = random.randint(1, w)
if single_dim:
crop_h = min(crop_h, crop_w)
crop_w = crop_h
transform = transforms.TenCrop(crop_h,
vertical_flip=should_vflip)
five_crop = transforms.FiveCrop(crop_h)
else:
transform = transforms.TenCrop((crop_h, crop_w),
vertical_flip=should_vflip)
five_crop = transforms.FiveCrop((crop_h, crop_w))
img = to_pil_image(torch.FloatTensor(3, h, w).uniform_())
results = transform(img)
expected_output = five_crop(img)
# Checking if FiveCrop and TenCrop can be printed as string
transform.__repr__()
five_crop.__repr__()
if should_vflip:
vflipped_img = img.transpose(Image.FLIP_TOP_BOTTOM)
expected_output += five_crop(vflipped_img)
else:
hflipped_img = img.transpose(Image.FLIP_LEFT_RIGHT)
expected_output += five_crop(hflipped_img)
self.assertEqual(len(results), 10)
self.assertEqual(results, expected_output)
def test_randomresized_params(self):
height = random.randint(24, 32) * 2
width = random.randint(24, 32) * 2
img = torch.ones(3, height, width)
to_pil_image = transforms.ToPILImage()
img = to_pil_image(img)
size = 100
epsilon = 0.05
min_scale = 0.25
for _ in range(10):
scale_min = max(round(random.random(), 2), min_scale)
scale_range = (scale_min, scale_min + round(random.random(), 2))
aspect_min = max(round(random.random(), 2), epsilon)
aspect_ratio_range = (aspect_min, aspect_min + round(random.random(), 2))
randresizecrop = transforms.RandomResizedCrop(size, scale_range, aspect_ratio_range)
i, j, h, w = randresizecrop.get_params(img, scale_range, aspect_ratio_range)
aspect_ratio_obtained = w / h
self.assertTrue((min(aspect_ratio_range) - epsilon <= aspect_ratio_obtained and
aspect_ratio_obtained <= max(aspect_ratio_range) + epsilon) or
aspect_ratio_obtained == 1.0)
self.assertIsInstance(i, int)
self.assertIsInstance(j, int)
self.assertIsInstance(h, int)
self.assertIsInstance(w, int)
def test_randomperspective(self):
for _ in range(10):
height = random.randint(24, 32) * 2
width = random.randint(24, 32) * 2
img = torch.ones(3, height, width)
to_pil_image = transforms.ToPILImage()
img = to_pil_image(img)
perp = transforms.RandomPerspective()
startpoints, endpoints = perp.get_params(width, height, 0.5)
tr_img = F.perspective(img, startpoints, endpoints)
tr_img2 = F.to_tensor(F.perspective(tr_img, endpoints, startpoints))
tr_img = F.to_tensor(tr_img)
self.assertEqual(img.size[0], width)
self.assertEqual(img.size[1], height)
self.assertGreater(torch.nn.functional.mse_loss(tr_img, F.to_tensor(img)) + 0.3,
torch.nn.functional.mse_loss(tr_img2, F.to_tensor(img)))
def test_randomperspective_fill(self):
height = 100
width = 100
img = torch.ones(3, height, width)
to_pil_image = transforms.ToPILImage()
img = to_pil_image(img)
modes = ("L", "RGB", "F")
nums_bands = [len(mode) for mode in modes]
fill = 127
for mode, num_bands in zip(modes, nums_bands):
img_conv = img.convert(mode)
perspective = transforms.RandomPerspective(p=1, fill=fill)
tr_img = perspective(img_conv)
pixel = tr_img.getpixel((0, 0))
if not isinstance(pixel, tuple):
pixel = (pixel,)
self.assertTupleEqual(pixel, tuple([fill] * num_bands))
for mode, num_bands in zip(modes, nums_bands):
img_conv = img.convert(mode)
startpoints, endpoints = transforms.RandomPerspective.get_params(width, height, 0.5)
tr_img = F.perspective(img_conv, startpoints, endpoints, fill=fill)
pixel = tr_img.getpixel((0, 0))
if not isinstance(pixel, tuple):
pixel = (pixel,)
self.assertTupleEqual(pixel, tuple([fill] * num_bands))
for wrong_num_bands in set(nums_bands) - {num_bands}:
with self.assertRaises(ValueError):
F.perspective(img_conv, startpoints, endpoints, fill=tuple([fill] * wrong_num_bands))
def test_resize(self):
input_sizes = [
# height, width
# square image
(28, 28),
(27, 27),
# rectangular image: h < w
(28, 34),
(29, 35),
# rectangular image: h > w
(34, 28),
(35, 29),
]
test_output_sizes_1 = [
# single integer
22, 27, 28, 36,
# single integer in tuple/list
[22, ], (27, ),
]
test_output_sizes_2 = [
# two integers
[22, 22], [22, 28], [22, 36],
[27, 22], [36, 22], [28, 28],
[28, 37], [37, 27], [37, 37]
]
for height, width in input_sizes:
img = Image.new("RGB", size=(width, height), color=127)
for osize in test_output_sizes_1:
t = transforms.Resize(osize)
result = t(img)
msg = "{}, {} - {}".format(height, width, osize)
osize = osize[0] if isinstance(osize, (list, tuple)) else osize
# If size is an int, smaller edge of the image will be matched to this number.
# i.e, if height > width, then image will be rescaled to (size * height / width, size).
if height < width:
expected_size = (int(osize * width / height), osize) # (w, h)
self.assertEqual(result.size, expected_size, msg=msg)
elif width < height:
expected_size = (osize, int(osize * height / width)) # (w, h)
self.assertEqual(result.size, expected_size, msg=msg)
else:
expected_size = (osize, osize) # (w, h)
self.assertEqual(result.size, expected_size, msg=msg)
for height, width in input_sizes:
img = Image.new("RGB", size=(width, height), color=127)
for osize in test_output_sizes_2:
oheight, owidth = osize
t = transforms.Resize(osize)
result = t(img)
self.assertEqual((owidth, oheight), result.size)
def test_random_crop(self):
height = random.randint(10, 32) * 2
width = random.randint(10, 32) * 2
oheight = random.randint(5, (height - 2) / 2) * 2
owidth = random.randint(5, (width - 2) / 2) * 2
img = torch.ones(3, height, width)
result = transforms.Compose([
transforms.ToPILImage(),
transforms.RandomCrop((oheight, owidth)),
transforms.ToTensor(),
])(img)
self.assertEqual(result.size(1), oheight)
self.assertEqual(result.size(2), owidth)
padding = random.randint(1, 20)
result = transforms.Compose([
transforms.ToPILImage(),
transforms.RandomCrop((oheight, owidth), padding=padding),
transforms.ToTensor(),
])(img)
self.assertEqual(result.size(1), oheight)
self.assertEqual(result.size(2), owidth)
result = transforms.Compose([
transforms.ToPILImage(),
transforms.RandomCrop((height, width)),
transforms.ToTensor()
])(img)
self.assertEqual(result.size(1), height)
self.assertEqual(result.size(2), width)
self.assertTrue(np.allclose(img.numpy(), result.numpy()))
result = transforms.Compose([
transforms.ToPILImage(),
transforms.RandomCrop((height + 1, width + 1), pad_if_needed=True),
transforms.ToTensor(),
])(img)
self.assertEqual(result.size(1), height + 1)
self.assertEqual(result.size(2), width + 1)
t = transforms.RandomCrop(48)
img = torch.ones(3, 32, 32)
with self.assertRaisesRegex(ValueError, r"Required crop size .+ is larger then input image size .+"):
t(img)
def test_pad(self):
height = random.randint(10, 32) * 2
width = random.randint(10, 32) * 2
img = torch.ones(3, height, width)
padding = random.randint(1, 20)
fill = random.randint(1, 50)
result = transforms.Compose([
transforms.ToPILImage(),
transforms.Pad(padding, fill=fill),
transforms.ToTensor(),
])(img)
self.assertEqual(result.size(1), height + 2 * padding)
self.assertEqual(result.size(2), width + 2 * padding)
# check that all elements in the padded region correspond
# to the pad value
fill_v = fill / 255
eps = 1e-5
self.assertTrue((result[:, :padding, :] - fill_v).abs().max() < eps)
self.assertTrue((result[:, :, :padding] - fill_v).abs().max() < eps)
self.assertRaises(ValueError, transforms.Pad(padding, fill=(1, 2)),
transforms.ToPILImage()(img))
def test_pad_with_tuple_of_pad_values(self):
height = random.randint(10, 32) * 2
width = random.randint(10, 32) * 2
img = transforms.ToPILImage()(torch.ones(3, height, width))
padding = tuple([random.randint(1, 20) for _ in range(2)])
output = transforms.Pad(padding)(img)
self.assertEqual(output.size, (width + padding[0] * 2, height + padding[1] * 2))
padding = tuple([random.randint(1, 20) for _ in range(4)])
output = transforms.Pad(padding)(img)
self.assertEqual(output.size[0], width + padding[0] + padding[2])
self.assertEqual(output.size[1], height + padding[1] + padding[3])
# Checking if Padding can be printed as string
transforms.Pad(padding).__repr__()
def test_pad_with_non_constant_padding_modes(self):
"""Unit tests for edge, reflect, symmetric padding"""
img = torch.zeros(3, 27, 27).byte()
img[:, :, 0] = 1 # Constant value added to leftmost edge
img = transforms.ToPILImage()(img)
img = F.pad(img, 1, (200, 200, 200))
# pad 3 to all sidess
edge_padded_img = F.pad(img, 3, padding_mode='edge')
# First 6 elements of leftmost edge in the middle of the image, values are in order:
# edge_pad, edge_pad, edge_pad, constant_pad, constant value added to leftmost edge, 0
edge_middle_slice = np.asarray(edge_padded_img).transpose(2, 0, 1)[0][17][:6]
self.assertTrue(np.all(edge_middle_slice == np.asarray([200, 200, 200, 200, 1, 0])))
self.assertEqual(transforms.ToTensor()(edge_padded_img).size(), (3, 35, 35))
# Pad 3 to left/right, 2 to top/bottom
reflect_padded_img = F.pad(img, (3, 2), padding_mode='reflect')
# First 6 elements of leftmost edge in the middle of the image, values are in order:
# reflect_pad, reflect_pad, reflect_pad, constant_pad, constant value added to leftmost edge, 0
reflect_middle_slice = np.asarray(reflect_padded_img).transpose(2, 0, 1)[0][17][:6]
self.assertTrue(np.all(reflect_middle_slice == np.asarray([0, 0, 1, 200, 1, 0])))
self.assertEqual(transforms.ToTensor()(reflect_padded_img).size(), (3, 33, 35))
# Pad 3 to left, 2 to top, 2 to right, 1 to bottom
symmetric_padded_img = F.pad(img, (3, 2, 2, 1), padding_mode='symmetric')
# First 6 elements of leftmost edge in the middle of the image, values are in order:
# sym_pad, sym_pad, sym_pad, constant_pad, constant value added to leftmost edge, 0
symmetric_middle_slice = np.asarray(symmetric_padded_img).transpose(2, 0, 1)[0][17][:6]
self.assertTrue(np.all(symmetric_middle_slice == np.asarray([0, 1, 200, 200, 1, 0])))
self.assertEqual(transforms.ToTensor()(symmetric_padded_img).size(), (3, 32, 34))
# Check negative padding explicitly for symmetric case, since it is not
# implemented for tensor case to compare to
# Crop 1 to left, pad 2 to top, pad 3 to right, crop 3 to bottom
symmetric_padded_img_neg = F.pad(img, (-1, 2, 3, -3), padding_mode='symmetric')
symmetric_neg_middle_left = np.asarray(symmetric_padded_img_neg).transpose(2, 0, 1)[0][17][:3]
symmetric_neg_middle_right = np.asarray(symmetric_padded_img_neg).transpose(2, 0, 1)[0][17][-4:]
self.assertTrue(np.all(symmetric_neg_middle_left == np.asarray([1, 0, 0])))
self.assertTrue(np.all(symmetric_neg_middle_right == np.asarray([200, 200, 0, 0])))
self.assertEqual(transforms.ToTensor()(symmetric_padded_img_neg).size(), (3, 28, 31))
def test_pad_raises_with_invalid_pad_sequence_len(self):
with self.assertRaises(ValueError):
transforms.Pad(())
with self.assertRaises(ValueError):
transforms.Pad((1, 2, 3))
with self.assertRaises(ValueError):
transforms.Pad((1, 2, 3, 4, 5))
def test_pad_with_mode_F_images(self):
pad = 2
transform = transforms.Pad(pad)
img = Image.new("F", (10, 10))
padded_img = transform(img)
self.assertSequenceEqual(padded_img.size, [edge_size + 2 * pad for edge_size in img.size])
def test_lambda(self):
trans = transforms.Lambda(lambda x: x.add(10))
x = torch.randn(10)
y = trans(x)
self.assertTrue(y.equal(torch.add(x, 10)))
trans = transforms.Lambda(lambda x: x.add_(10))
x = torch.randn(10)
y = trans(x)
self.assertTrue(y.equal(x))
# Checking if Lambda can be printed as string
trans.__repr__()
@unittest.skipIf(stats is None, 'scipy.stats not available')
def test_random_apply(self):
random_state = random.getstate()
random.seed(42)
random_apply_transform = transforms.RandomApply(
[
transforms.RandomRotation((-45, 45)),
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
], p=0.75
)
img = transforms.ToPILImage()(torch.rand(3, 10, 10))
num_samples = 250
num_applies = 0
for _ in range(num_samples):
out = random_apply_transform(img)
if out != img:
num_applies += 1
p_value = stats.binom_test(num_applies, num_samples, p=0.75)
random.setstate(random_state)
self.assertGreater(p_value, 0.0001)
# Checking if RandomApply can be printed as string
random_apply_transform.__repr__()
@unittest.skipIf(stats is None, 'scipy.stats not available')
def test_random_choice(self):
random_state = random.getstate()
random.seed(42)
random_choice_transform = transforms.RandomChoice(
[
transforms.Resize(15),
transforms.Resize(20),
transforms.CenterCrop(10)
]
)
img = transforms.ToPILImage()(torch.rand(3, 25, 25))
num_samples = 250
num_resize_15 = 0
num_resize_20 = 0
num_crop_10 = 0
for _ in range(num_samples):
out = random_choice_transform(img)
if out.size == (15, 15):
num_resize_15 += 1
elif out.size == (20, 20):
num_resize_20 += 1
elif out.size == (10, 10):
num_crop_10 += 1
p_value = stats.binom_test(num_resize_15, num_samples, p=0.33333)
self.assertGreater(p_value, 0.0001)
p_value = stats.binom_test(num_resize_20, num_samples, p=0.33333)
self.assertGreater(p_value, 0.0001)
p_value = stats.binom_test(num_crop_10, num_samples, p=0.33333)
self.assertGreater(p_value, 0.0001)
random.setstate(random_state)
# Checking if RandomChoice can be printed as string
random_choice_transform.__repr__()
@unittest.skipIf(stats is None, 'scipy.stats not available')
def test_random_order(self):
random_state = random.getstate()
random.seed(42)
random_order_transform = transforms.RandomOrder(
[
transforms.Resize(20),
transforms.CenterCrop(10)
]
)
img = transforms.ToPILImage()(torch.rand(3, 25, 25))
num_samples = 250
num_normal_order = 0
resize_crop_out = transforms.CenterCrop(10)(transforms.Resize(20)(img))
for _ in range(num_samples):
out = random_order_transform(img)
if out == resize_crop_out:
num_normal_order += 1
p_value = stats.binom_test(num_normal_order, num_samples, p=0.5)
random.setstate(random_state)
self.assertGreater(p_value, 0.0001)
# Checking if RandomOrder can be printed as string
random_order_transform.__repr__()
def test_to_tensor(self):
test_channels = [1, 3, 4]
height, width = 4, 4
trans = transforms.ToTensor()
with self.assertRaises(TypeError):
trans(np.random.rand(1, height, width).tolist())
with self.assertRaises(ValueError):
trans(np.random.rand(height))
trans(np.random.rand(1, 1, height, width))
for channels in test_channels:
input_data = torch.ByteTensor(channels, height, width).random_(0, 255).float().div_(255)
img = transforms.ToPILImage()(input_data)
output = trans(img)
self.assertTrue(np.allclose(input_data.numpy(), output.numpy()))
ndarray = np.random.randint(low=0, high=255, size=(height, width, channels)).astype(np.uint8)
output = trans(ndarray)
expected_output = ndarray.transpose((2, 0, 1)) / 255.0
self.assertTrue(np.allclose(output.numpy(), expected_output))
ndarray = np.random.rand(height, width, channels).astype(np.float32)
output = trans(ndarray)
expected_output = ndarray.transpose((2, 0, 1))
self.assertTrue(np.allclose(output.numpy(), expected_output))
# separate test for mode '1' PIL images
input_data = torch.ByteTensor(1, height, width).bernoulli_()
img = transforms.ToPILImage()(input_data.mul(255)).convert('1')
output = trans(img)
self.assertTrue(np.allclose(input_data.numpy(), output.numpy()))
def test_max_value(self):
for dtype in int_dtypes():
self.assertEqual(F_t._max_value(dtype), torch.iinfo(dtype).max)
for dtype in float_dtypes():
self.assertGreater(F_t._max_value(dtype), torch.finfo(dtype).max)
def test_convert_image_dtype_float_to_float(self):
for input_dtype, output_dtypes in cycle_over(float_dtypes()):
input_image = torch.tensor((0.0, 1.0), dtype=input_dtype)
for output_dtype in output_dtypes:
with self.subTest(input_dtype=input_dtype, output_dtype=output_dtype):
transform = transforms.ConvertImageDtype(output_dtype)
transform_script = torch.jit.script(F.convert_image_dtype)
output_image = transform(input_image)
output_image_script = transform_script(input_image, output_dtype)
script_diff = output_image_script - output_image
self.assertLess(script_diff.abs().max(), 1e-6)
actual_min, actual_max = output_image.tolist()
desired_min, desired_max = 0.0, 1.0
self.assertAlmostEqual(actual_min, desired_min)
self.assertAlmostEqual(actual_max, desired_max)
def test_convert_image_dtype_float_to_int(self):
for input_dtype in float_dtypes():
input_image = torch.tensor((0.0, 1.0), dtype=input_dtype)
for output_dtype in int_dtypes():
with self.subTest(input_dtype=input_dtype, output_dtype=output_dtype):
transform = transforms.ConvertImageDtype(output_dtype)
transform_script = torch.jit.script(F.convert_image_dtype)
if (input_dtype == torch.float32 and output_dtype in (torch.int32, torch.int64)) or (
input_dtype == torch.float64 and output_dtype == torch.int64
):
with self.assertRaises(RuntimeError):
transform(input_image)
else:
output_image = transform(input_image)
output_image_script = transform_script(input_image, output_dtype)
script_diff = output_image_script - output_image
self.assertLess(script_diff.abs().max(), 1e-6)
actual_min, actual_max = output_image.tolist()
desired_min, desired_max = 0, torch.iinfo(output_dtype).max
self.assertEqual(actual_min, desired_min)
self.assertEqual(actual_max, desired_max)
def test_convert_image_dtype_int_to_float(self):
for input_dtype in int_dtypes():
input_image = torch.tensor((0, torch.iinfo(input_dtype).max), dtype=input_dtype)
for output_dtype in float_dtypes():
with self.subTest(input_dtype=input_dtype, output_dtype=output_dtype):
transform = transforms.ConvertImageDtype(output_dtype)
transform_script = torch.jit.script(F.convert_image_dtype)
output_image = transform(input_image)
output_image_script = transform_script(input_image, output_dtype)
script_diff = output_image_script - output_image
self.assertLess(script_diff.abs().max(), 1e-6)
actual_min, actual_max = output_image.tolist()
desired_min, desired_max = 0.0, 1.0
self.assertAlmostEqual(actual_min, desired_min)
self.assertGreaterEqual(actual_min, desired_min)
self.assertAlmostEqual(actual_max, desired_max)
self.assertLessEqual(actual_max, desired_max)
def test_convert_image_dtype_int_to_int(self):
for input_dtype, output_dtypes in cycle_over(int_dtypes()):
input_max = torch.iinfo(input_dtype).max
input_image = torch.tensor((0, input_max), dtype=input_dtype)
for output_dtype in output_dtypes:
output_max = torch.iinfo(output_dtype).max
with self.subTest(input_dtype=input_dtype, output_dtype=output_dtype):
transform = transforms.ConvertImageDtype(output_dtype)
transform_script = torch.jit.script(F.convert_image_dtype)
output_image = transform(input_image)
output_image_script = transform_script(input_image, output_dtype)
script_diff = output_image_script.float() - output_image.float()
self.assertLess(
script_diff.abs().max(), 1e-6, msg="{} vs {}".format(output_image_script, output_image)
)
actual_min, actual_max = output_image.tolist()
desired_min, desired_max = 0, output_max
# see https://github.com/pytorch/vision/pull/2078#issuecomment-641036236 for details
if input_max >= output_max:
error_term = 0
else:
error_term = 1 - (torch.iinfo(output_dtype).max + 1) // (torch.iinfo(input_dtype).max + 1)
self.assertEqual(actual_min, desired_min)
self.assertEqual(actual_max, desired_max + error_term)
def test_convert_image_dtype_int_to_int_consistency(self):
for input_dtype, output_dtypes in cycle_over(int_dtypes()):
input_max = torch.iinfo(input_dtype).max
input_image = torch.tensor((0, input_max), dtype=input_dtype)
for output_dtype in output_dtypes:
output_max = torch.iinfo(output_dtype).max
if output_max <= input_max:
continue
with self.subTest(input_dtype=input_dtype, output_dtype=output_dtype):
transform = transforms.ConvertImageDtype(output_dtype)
inverse_transfrom = transforms.ConvertImageDtype(input_dtype)
output_image = inverse_transfrom(transform(input_image))
actual_min, actual_max = output_image.tolist()
desired_min, desired_max = 0, input_max
self.assertEqual(actual_min, desired_min)
self.assertEqual(actual_max, desired_max)
@unittest.skipIf(accimage is None, 'accimage not available')
def test_accimage_to_tensor(self):
trans = transforms.ToTensor()
expected_output = trans(Image.open(GRACE_HOPPER).convert('RGB'))
output = trans(accimage.Image(GRACE_HOPPER))
self.assertEqual(expected_output.size(), output.size())
self.assertTrue(np.allclose(output.numpy(), expected_output.numpy()))
def test_pil_to_tensor(self):
test_channels = [1, 3, 4]
height, width = 4, 4
trans = transforms.PILToTensor()
with self.assertRaises(TypeError):
trans(np.random.rand(1, height, width).tolist())
trans(np.random.rand(1, height, width))
for channels in test_channels:
input_data = torch.ByteTensor(channels, height, width).random_(0, 255)
img = transforms.ToPILImage()(input_data)
output = trans(img)
self.assertTrue(np.allclose(input_data.numpy(), output.numpy()))
input_data = np.random.randint(low=0, high=255, size=(height, width, channels)).astype(np.uint8)
img = transforms.ToPILImage()(input_data)
output = trans(img)
expected_output = input_data.transpose((2, 0, 1))
self.assertTrue(np.allclose(output.numpy(), expected_output))
input_data = torch.as_tensor(np.random.rand(channels, height, width).astype(np.float32))
img = transforms.ToPILImage()(input_data) # CHW -> HWC and (* 255).byte()
output = trans(img) # HWC -> CHW
expected_output = (input_data * 255).byte()
self.assertTrue(np.allclose(output.numpy(), expected_output.numpy()))
# separate test for mode '1' PIL images
input_data = torch.ByteTensor(1, height, width).bernoulli_()
img = transforms.ToPILImage()(input_data.mul(255)).convert('1')
output = trans(img)
self.assertTrue(np.allclose(input_data.numpy(), output.numpy()))
@unittest.skipIf(accimage is None, 'accimage not available')
def test_accimage_pil_to_tensor(self):
trans = transforms.PILToTensor()
expected_output = trans(Image.open(GRACE_HOPPER).convert('RGB'))
output = trans(accimage.Image(GRACE_HOPPER))
self.assertEqual(expected_output.size(), output.size())
self.assertTrue(np.allclose(output.numpy(), expected_output.numpy()))
@unittest.skipIf(accimage is None, 'accimage not available')
def test_accimage_resize(self):
trans = transforms.Compose([
transforms.Resize(256, interpolation=Image.LINEAR),
transforms.ToTensor(),
])
# Checking if Compose, Resize and ToTensor can be printed as string
trans.__repr__()
expected_output = trans(Image.open(GRACE_HOPPER).convert('RGB'))
output = trans(accimage.Image(GRACE_HOPPER))
self.assertEqual(expected_output.size(), output.size())
self.assertLess(np.abs((expected_output - output).mean()), 1e-3)
self.assertLess((expected_output - output).var(), 1e-5)
# note the high absolute tolerance
self.assertTrue(np.allclose(output.numpy(), expected_output.numpy(), atol=5e-2))
@unittest.skipIf(accimage is None, 'accimage not available')
def test_accimage_crop(self):
trans = transforms.Compose([
transforms.CenterCrop(256),
transforms.ToTensor(),
])
# Checking if Compose, CenterCrop and ToTensor can be printed as string
trans.__repr__()
expected_output = trans(Image.open(GRACE_HOPPER).convert('RGB'))
output = trans(accimage.Image(GRACE_HOPPER))
self.assertEqual(expected_output.size(), output.size())
self.assertTrue(np.allclose(output.numpy(), expected_output.numpy()))
def test_1_channel_tensor_to_pil_image(self):
to_tensor = transforms.ToTensor()
img_data_float = torch.Tensor(1, 4, 4).uniform_()
img_data_byte = torch.ByteTensor(1, 4, 4).random_(0, 255)
img_data_short = torch.ShortTensor(1, 4, 4).random_()
img_data_int = torch.IntTensor(1, 4, 4).random_()
inputs = [img_data_float, img_data_byte, img_data_short, img_data_int]
expected_outputs = [img_data_float.mul(255).int().float().div(255).numpy(),
img_data_byte.float().div(255.0).numpy(),
img_data_short.numpy(),
img_data_int.numpy()]
expected_modes = ['L', 'L', 'I;16', 'I']
for img_data, expected_output, mode in zip(inputs, expected_outputs, expected_modes):
for transform in [transforms.ToPILImage(), transforms.ToPILImage(mode=mode)]:
img = transform(img_data)
self.assertEqual(img.mode, mode)
self.assertTrue(np.allclose(expected_output, to_tensor(img).numpy()))
# 'F' mode for torch.FloatTensor
img_F_mode = transforms.ToPILImage(mode='F')(img_data_float)
self.assertEqual(img_F_mode.mode, 'F')
self.assertTrue(np.allclose(np.array(Image.fromarray(img_data_float.squeeze(0).numpy(), mode='F')),
np.array(img_F_mode)))
def test_1_channel_ndarray_to_pil_image(self):
img_data_float = torch.Tensor(4, 4, 1).uniform_().numpy()
img_data_byte = torch.ByteTensor(4, 4, 1).random_(0, 255).numpy()
img_data_short = torch.ShortTensor(4, 4, 1).random_().numpy()
img_data_int = torch.IntTensor(4, 4, 1).random_().numpy()
inputs = [img_data_float, img_data_byte, img_data_short, img_data_int]
expected_modes = ['F', 'L', 'I;16', 'I']
for img_data, mode in zip(inputs, expected_modes):
for transform in [transforms.ToPILImage(), transforms.ToPILImage(mode=mode)]:
img = transform(img_data)
self.assertEqual(img.mode, mode)
self.assertTrue(np.allclose(img_data[:, :, 0], img))
def test_2_channel_ndarray_to_pil_image(self):
def verify_img_data(img_data, mode):
if mode is None:
img = transforms.ToPILImage()(img_data)
self.assertEqual(img.mode, 'LA') # default should assume LA
else:
img = transforms.ToPILImage(mode=mode)(img_data)
self.assertEqual(img.mode, mode)
split = img.split()
for i in range(2):
self.assertTrue(np.allclose(img_data[:, :, i], split[i]))
img_data = torch.ByteTensor(4, 4, 2).random_(0, 255).numpy()
for mode in [None, 'LA']:
verify_img_data(img_data, mode)
transforms.ToPILImage().__repr__()
with self.assertRaises(ValueError):
# should raise if we try a mode for 4 or 1 or 3 channel images
transforms.ToPILImage(mode='RGBA')(img_data)
transforms.ToPILImage(mode='P')(img_data)
transforms.ToPILImage(mode='RGB')(img_data)
def test_2_channel_tensor_to_pil_image(self):
def verify_img_data(img_data, expected_output, mode):
if mode is None:
img = transforms.ToPILImage()(img_data)
self.assertEqual(img.mode, 'LA') # default should assume LA
else:
img = transforms.ToPILImage(mode=mode)(img_data)
self.assertEqual(img.mode, mode)
split = img.split()
for i in range(2):
self.assertTrue(np.allclose(expected_output[i].numpy(), F.to_tensor(split[i]).numpy()))
img_data = torch.Tensor(2, 4, 4).uniform_()
expected_output = img_data.mul(255).int().float().div(255)
for mode in [None, 'LA']:
verify_img_data(img_data, expected_output, mode=mode)
with self.assertRaises(ValueError):
# should raise if we try a mode for 4 or 1 or 3 channel images
transforms.ToPILImage(mode='RGBA')(img_data)
transforms.ToPILImage(mode='P')(img_data)
transforms.ToPILImage(mode='RGB')(img_data)
def test_3_channel_tensor_to_pil_image(self):
def verify_img_data(img_data, expected_output, mode):
if mode is None:
img = transforms.ToPILImage()(img_data)
self.assertEqual(img.mode, 'RGB') # default should assume RGB
else:
img = transforms.ToPILImage(mode=mode)(img_data)
self.assertEqual(img.mode, mode)
split = img.split()
for i in range(3):
self.assertTrue(np.allclose(expected_output[i].numpy(), F.to_tensor(split[i]).numpy()))
img_data = torch.Tensor(3, 4, 4).uniform_()
expected_output = img_data.mul(255).int().float().div(255)
for mode in [None, 'RGB', 'HSV', 'YCbCr']:
verify_img_data(img_data, expected_output, mode=mode)
with self.assertRaises(ValueError):
# should raise if we try a mode for 4 or 1 or 2 channel images
transforms.ToPILImage(mode='RGBA')(img_data)
transforms.ToPILImage(mode='P')(img_data)
transforms.ToPILImage(mode='LA')(img_data)
with self.assertRaises(ValueError):
transforms.ToPILImage()(torch.Tensor(1, 3, 4, 4).uniform_())
def test_3_channel_ndarray_to_pil_image(self):
def verify_img_data(img_data, mode):
if mode is None:
img = transforms.ToPILImage()(img_data)
self.assertEqual(img.mode, 'RGB') # default should assume RGB
else:
img = transforms.ToPILImage(mode=mode)(img_data)
self.assertEqual(img.mode, mode)
split = img.split()
for i in range(3):
self.assertTrue(np.allclose(img_data[:, :, i], split[i]))
img_data = torch.ByteTensor(4, 4, 3).random_(0, 255).numpy()
for mode in [None, 'RGB', 'HSV', 'YCbCr']:
verify_img_data(img_data, mode)
# Checking if ToPILImage can be printed as string
transforms.ToPILImage().__repr__()
with self.assertRaises(ValueError):
# should raise if we try a mode for 4 or 1 or 2 channel images
transforms.ToPILImage(mode='RGBA')(img_data)
transforms.ToPILImage(mode='P')(img_data)
transforms.ToPILImage(mode='LA')(img_data)
def test_4_channel_tensor_to_pil_image(self):
def verify_img_data(img_data, expected_output, mode):
if mode is None:
img = transforms.ToPILImage()(img_data)
self.assertEqual(img.mode, 'RGBA') # default should assume RGBA
else:
img = transforms.ToPILImage(mode=mode)(img_data)
self.assertEqual(img.mode, mode)
split = img.split()
for i in range(4):
self.assertTrue(np.allclose(expected_output[i].numpy(), F.to_tensor(split[i]).numpy()))
img_data = torch.Tensor(4, 4, 4).uniform_()
expected_output = img_data.mul(255).int().float().div(255)
for mode in [None, 'RGBA', 'CMYK', 'RGBX']:
verify_img_data(img_data, expected_output, mode)
with self.assertRaises(ValueError):
# should raise if we try a mode for 3 or 1 or 2 channel images
transforms.ToPILImage(mode='RGB')(img_data)
transforms.ToPILImage(mode='P')(img_data)
transforms.ToPILImage(mode='LA')(img_data)
def test_4_channel_ndarray_to_pil_image(self):
def verify_img_data(img_data, mode):
if mode is None:
img = transforms.ToPILImage()(img_data)
self.assertEqual(img.mode, 'RGBA') # default should assume RGBA
else:
img = transforms.ToPILImage(mode=mode)(img_data)
self.assertEqual(img.mode, mode)
split = img.split()
for i in range(4):
self.assertTrue(np.allclose(img_data[:, :, i], split[i]))
img_data = torch.ByteTensor(4, 4, 4).random_(0, 255).numpy()
for mode in [None, 'RGBA', 'CMYK', 'RGBX']:
verify_img_data(img_data, mode)
with self.assertRaises(ValueError):
# should raise if we try a mode for 3 or 1 or 2 channel images
transforms.ToPILImage(mode='RGB')(img_data)
transforms.ToPILImage(mode='P')(img_data)
transforms.ToPILImage(mode='LA')(img_data)
def test_2d_tensor_to_pil_image(self):
to_tensor = transforms.ToTensor()
img_data_float = torch.Tensor(4, 4).uniform_()
img_data_byte = torch.ByteTensor(4, 4).random_(0, 255)
img_data_short = torch.ShortTensor(4, 4).random_()
img_data_int = torch.IntTensor(4, 4).random_()
inputs = [img_data_float, img_data_byte, img_data_short, img_data_int]
expected_outputs = [img_data_float.mul(255).int().float().div(255).numpy(),
img_data_byte.float().div(255.0).numpy(),
img_data_short.numpy(),
img_data_int.numpy()]
expected_modes = ['L', 'L', 'I;16', 'I']
for img_data, expected_output, mode in zip(inputs, expected_outputs, expected_modes):
for transform in [transforms.ToPILImage(), transforms.ToPILImage(mode=mode)]:
img = transform(img_data)
self.assertEqual(img.mode, mode)
self.assertTrue(np.allclose(expected_output, to_tensor(img).numpy()))
def test_2d_ndarray_to_pil_image(self):
img_data_float = torch.Tensor(4, 4).uniform_().numpy()
img_data_byte = torch.ByteTensor(4, 4).random_(0, 255).numpy()
img_data_short = torch.ShortTensor(4, 4).random_().numpy()
img_data_int = torch.IntTensor(4, 4).random_().numpy()
inputs = [img_data_float, img_data_byte, img_data_short, img_data_int]
expected_modes = ['F', 'L', 'I;16', 'I']
for img_data, mode in zip(inputs, expected_modes):
for transform in [transforms.ToPILImage(), transforms.ToPILImage(mode=mode)]:
img = transform(img_data)
self.assertEqual(img.mode, mode)
self.assertTrue(np.allclose(img_data, img))
def test_tensor_bad_types_to_pil_image(self):
with self.assertRaises(ValueError):
transforms.ToPILImage()(torch.ones(1, 3, 4, 4))
def test_ndarray_bad_types_to_pil_image(self):
trans = transforms.ToPILImage()
with self.assertRaises(TypeError):
trans(np.ones([4, 4, 1], np.int64))
trans(np.ones([4, 4, 1], np.uint16))
trans(np.ones([4, 4, 1], np.uint32))
trans(np.ones([4, 4, 1], np.float64))
with self.assertRaises(ValueError):
transforms.ToPILImage()(np.ones([1, 4, 4, 3]))
@unittest.skipIf(stats is None, 'scipy.stats not available')
def test_random_vertical_flip(self):
random_state = random.getstate()
random.seed(42)
img = transforms.ToPILImage()(torch.rand(3, 10, 10))
vimg = img.transpose(Image.FLIP_TOP_BOTTOM)
num_samples = 250
num_vertical = 0
for _ in range(num_samples):
out = transforms.RandomVerticalFlip()(img)
if out == vimg:
num_vertical += 1
p_value = stats.binom_test(num_vertical, num_samples, p=0.5)
random.setstate(random_state)
self.assertGreater(p_value, 0.0001)
num_samples = 250
num_vertical = 0
for _ in range(num_samples):
out = transforms.RandomVerticalFlip(p=0.7)(img)
if out == vimg:
num_vertical += 1
p_value = stats.binom_test(num_vertical, num_samples, p=0.7)
random.setstate(random_state)
self.assertGreater(p_value, 0.0001)
# Checking if RandomVerticalFlip can be printed as string
transforms.RandomVerticalFlip().__repr__()
@unittest.skipIf(stats is None, 'scipy.stats not available')
def test_random_horizontal_flip(self):
random_state = random.getstate()
random.seed(42)
img = transforms.ToPILImage()(torch.rand(3, 10, 10))
himg = img.transpose(Image.FLIP_LEFT_RIGHT)
num_samples = 250
num_horizontal = 0
for _ in range(num_samples):
out = transforms.RandomHorizontalFlip()(img)
if out == himg:
num_horizontal += 1
p_value = stats.binom_test(num_horizontal, num_samples, p=0.5)
random.setstate(random_state)
self.assertGreater(p_value, 0.0001)
num_samples = 250
num_horizontal = 0
for _ in range(num_samples):
out = transforms.RandomHorizontalFlip(p=0.7)(img)
if out == himg:
num_horizontal += 1
p_value = stats.binom_test(num_horizontal, num_samples, p=0.7)
random.setstate(random_state)
self.assertGreater(p_value, 0.0001)
# Checking if RandomHorizontalFlip can be printed as string
transforms.RandomHorizontalFlip().__repr__()
@unittest.skipIf(stats is None, 'scipy.stats is not available')
def test_normalize(self):
def samples_from_standard_normal(tensor):
p_value = stats.kstest(list(tensor.view(-1)), 'norm', args=(0, 1)).pvalue
return p_value > 0.0001
random_state = random.getstate()
random.seed(42)
for channels in [1, 3]:
img = torch.rand(channels, 10, 10)
mean = [img[c].mean() for c in range(channels)]
std = [img[c].std() for c in range(channels)]
normalized = transforms.Normalize(mean, std)(img)
self.assertTrue(samples_from_standard_normal(normalized))
random.setstate(random_state)
# Checking if Normalize can be printed as string
transforms.Normalize(mean, std).__repr__()
# Checking the optional in-place behaviour
tensor = torch.rand((1, 16, 16))
tensor_inplace = transforms.Normalize((0.5,), (0.5,), inplace=True)(tensor)
self.assertTrue(torch.equal(tensor, tensor_inplace))
def test_normalize_different_dtype(self):
for dtype1 in [torch.float32, torch.float64]:
img = torch.rand(3, 10, 10, dtype=dtype1)
for dtype2 in [torch.int64, torch.float32, torch.float64]:
mean = torch.tensor([1, 2, 3], dtype=dtype2)
std = torch.tensor([1, 2, 1], dtype=dtype2)
# checks that it doesn't crash
transforms.functional.normalize(img, mean, std)
def test_normalize_3d_tensor(self):
torch.manual_seed(28)
n_channels = 3
img_size = 10
mean = torch.rand(n_channels)
std = torch.rand(n_channels)
img = torch.rand(n_channels, img_size, img_size)
target = F.normalize(img, mean, std).numpy()
mean_unsqueezed = mean.view(-1, 1, 1)
std_unsqueezed = std.view(-1, 1, 1)
result1 = F.normalize(img, mean_unsqueezed, std_unsqueezed)
result2 = F.normalize(img,
mean_unsqueezed.repeat(1, img_size, img_size),
std_unsqueezed.repeat(1, img_size, img_size))
assert_array_almost_equal(target, result1.numpy())
assert_array_almost_equal(target, result2.numpy())
def test_adjust_brightness(self):
x_shape = [2, 2, 3]
x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
x_pil = Image.fromarray(x_np, mode='RGB')
# test 0
y_pil = F.adjust_brightness(x_pil, 1)
y_np = np.array(y_pil)
self.assertTrue(np.allclose(y_np, x_np))
# test 1
y_pil = F.adjust_brightness(x_pil, 0.5)
y_np = np.array(y_pil)
y_ans = [0, 2, 6, 27, 67, 113, 18, 4, 117, 45, 127, 0]
y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
self.assertTrue(np.allclose(y_np, y_ans))
# test 2
y_pil = F.adjust_brightness(x_pil, 2)
y_np = np.array(y_pil)
y_ans = [0, 10, 26, 108, 255, 255, 74, 16, 255, 180, 255, 2]
y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
self.assertTrue(np.allclose(y_np, y_ans))
def test_adjust_contrast(self):
x_shape = [2, 2, 3]
x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
x_pil = Image.fromarray(x_np, mode='RGB')
# test 0
y_pil = F.adjust_contrast(x_pil, 1)
y_np = np.array(y_pil)
self.assertTrue(np.allclose(y_np, x_np))
# test 1
y_pil = F.adjust_contrast(x_pil, 0.5)
y_np = np.array(y_pil)
y_ans = [43, 45, 49, 70, 110, 156, 61, 47, 160, 88, 170, 43]
y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
self.assertTrue(np.allclose(y_np, y_ans))
# test 2
y_pil = F.adjust_contrast(x_pil, 2)
y_np = np.array(y_pil)
y_ans = [0, 0, 0, 22, 184, 255, 0, 0, 255, 94, 255, 0]
y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
self.assertTrue(np.allclose(y_np, y_ans))
@unittest.skipIf(Image.__version__ >= '7', "Temporarily disabled")
def test_adjust_saturation(self):
x_shape = [2, 2, 3]
x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
x_pil = Image.fromarray(x_np, mode='RGB')
# test 0
y_pil = F.adjust_saturation(x_pil, 1)
y_np = np.array(y_pil)
self.assertTrue(np.allclose(y_np, x_np))
# test 1
y_pil = F.adjust_saturation(x_pil, 0.5)
y_np = np.array(y_pil)
y_ans = [2, 4, 8, 87, 128, 173, 39, 25, 138, 133, 215, 88]
y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
self.assertTrue(np.allclose(y_np, y_ans))
# test 2
y_pil = F.adjust_saturation(x_pil, 2)
y_np = np.array(y_pil)
y_ans = [0, 6, 22, 0, 149, 255, 32, 0, 255, 4, 255, 0]
y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
self.assertTrue(np.allclose(y_np, y_ans))
def test_adjust_hue(self):
x_shape = [2, 2, 3]
x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
x_pil = Image.fromarray(x_np, mode='RGB')
with self.assertRaises(ValueError):
F.adjust_hue(x_pil, -0.7)
F.adjust_hue(x_pil, 1)
# test 0: almost same as x_data but not exact.
# probably because hsv <-> rgb floating point ops
y_pil = F.adjust_hue(x_pil, 0)
y_np = np.array(y_pil)
y_ans = [0, 5, 13, 54, 139, 226, 35, 8, 234, 91, 255, 1]
y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
self.assertTrue(np.allclose(y_np, y_ans))
# test 1
y_pil = F.adjust_hue(x_pil, 0.25)
y_np = np.array(y_pil)
y_ans = [13, 0, 12, 224, 54, 226, 234, 8, 99, 1, 222, 255]
y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
self.assertTrue(np.allclose(y_np, y_ans))
# test 2
y_pil = F.adjust_hue(x_pil, -0.25)
y_np = np.array(y_pil)
y_ans = [0, 13, 2, 54, 226, 58, 8, 234, 152, 255, 43, 1]
y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
self.assertTrue(np.allclose(y_np, y_ans))
def test_adjust_gamma(self):
x_shape = [2, 2, 3]
x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
x_pil = Image.fromarray(x_np, mode='RGB')
# test 0
y_pil = F.adjust_gamma(x_pil, 1)
y_np = np.array(y_pil)
self.assertTrue(np.allclose(y_np, x_np))
# test 1
y_pil = F.adjust_gamma(x_pil, 0.5)
y_np = np.array(y_pil)
y_ans = [0, 35, 57, 117, 186, 241, 97, 45, 245, 152, 255, 16]
y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
self.assertTrue(np.allclose(y_np, y_ans))
# test 2
y_pil = F.adjust_gamma(x_pil, 2)
y_np = np.array(y_pil)
y_ans = [0, 0, 0, 11, 71, 201, 5, 0, 215, 31, 255, 0]
y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
self.assertTrue(np.allclose(y_np, y_ans))
def test_adjusts_L_mode(self):
x_shape = [2, 2, 3]
x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
x_rgb = Image.fromarray(x_np, mode='RGB')
x_l = x_rgb.convert('L')
self.assertEqual(F.adjust_brightness(x_l, 2).mode, 'L')
self.assertEqual(F.adjust_saturation(x_l, 2).mode, 'L')
self.assertEqual(F.adjust_contrast(x_l, 2).mode, 'L')
self.assertEqual(F.adjust_hue(x_l, 0.4).mode, 'L')
self.assertEqual(F.adjust_gamma(x_l, 0.5).mode, 'L')
def test_color_jitter(self):
color_jitter = transforms.ColorJitter(2, 2, 2, 0.1)
x_shape = [2, 2, 3]
x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
x_pil = Image.fromarray(x_np, mode='RGB')
x_pil_2 = x_pil.convert('L')
for i in range(10):
y_pil = color_jitter(x_pil)
self.assertEqual(y_pil.mode, x_pil.mode)
y_pil_2 = color_jitter(x_pil_2)
self.assertEqual(y_pil_2.mode, x_pil_2.mode)
# Checking if ColorJitter can be printed as string
color_jitter.__repr__()
def test_linear_transformation(self):
num_samples = 1000
x = torch.randn(num_samples, 3, 10, 10)
flat_x = x.view(x.size(0), x.size(1) * x.size(2) * x.size(3))
# compute principal components
sigma = torch.mm(flat_x.t(), flat_x) / flat_x.size(0)
u, s, _ = np.linalg.svd(sigma.numpy())
zca_epsilon = 1e-10 # avoid division by 0
d = torch.Tensor(np.diag(1. / np.sqrt(s + zca_epsilon)))
u = torch.Tensor(u)
principal_components = torch.mm(torch.mm(u, d), u.t())
mean_vector = (torch.sum(flat_x, dim=0) / flat_x.size(0))
# initialize whitening matrix
whitening = transforms.LinearTransformation(principal_components, mean_vector)
# estimate covariance and mean using weak law of large number
num_features = flat_x.size(1)
cov = 0.0
mean = 0.0
for i in x:
xwhite = whitening(i)
xwhite = xwhite.view(1, -1).numpy()
cov += np.dot(xwhite, xwhite.T) / num_features
mean += np.sum(xwhite) / num_features
# if rtol for std = 1e-3 then rtol for cov = 2e-3 as std**2 = cov
self.assertTrue(np.allclose(cov / num_samples, np.identity(1), rtol=2e-3),
"cov not close to 1")
self.assertTrue(np.allclose(mean / num_samples, 0, rtol=1e-3),
"mean not close to 0")
# Checking if LinearTransformation can be printed as string
whitening.__repr__()
def test_rotate(self):
x = np.zeros((100, 100, 3), dtype=np.uint8)
x[40, 40] = [255, 255, 255]
with self.assertRaisesRegex(TypeError, r"img should be PIL Image"):
F.rotate(x, 10)
img = F.to_pil_image(x)
result = F.rotate(img, 45)
self.assertEqual(result.size, (100, 100))
r, c, ch = np.where(result)
self.assertTrue(all(x in r for x in [49, 50]))
self.assertTrue(all(x in c for x in [36]))
self.assertTrue(all(x in ch for x in [0, 1, 2]))
result = F.rotate(img, 45, expand=True)
self.assertEqual(result.size, (142, 142))
r, c, ch = np.where(result)
self.assertTrue(all(x in r for x in [70, 71]))
self.assertTrue(all(x in c for x in [57]))
self.assertTrue(all(x in ch for x in [0, 1, 2]))
result = F.rotate(img, 45, center=(40, 40))
self.assertEqual(result.size, (100, 100))
r, c, ch = np.where(result)
self.assertTrue(all(x in r for x in [40]))
self.assertTrue(all(x in c for x in [40]))
self.assertTrue(all(x in ch for x in [0, 1, 2]))
result_a = F.rotate(img, 90)
result_b = F.rotate(img, -270)
self.assertTrue(np.all(np.array(result_a) == np.array(result_b)))
def test_rotate_fill(self):
img = F.to_pil_image(np.ones((100, 100, 3), dtype=np.uint8) * 255, "RGB")
modes = ("L", "RGB", "F")
nums_bands = [len(mode) for mode in modes]
fill = 127
for mode, num_bands in zip(modes, nums_bands):
img_conv = img.convert(mode)
img_rot = F.rotate(img_conv, 45.0, fill=fill)
pixel = img_rot.getpixel((0, 0))
if not isinstance(pixel, tuple):
pixel = (pixel,)
self.assertTupleEqual(pixel, tuple([fill] * num_bands))
for wrong_num_bands in set(nums_bands) - {num_bands}:
with self.assertRaises(ValueError):
F.rotate(img_conv, 45.0, fill=tuple([fill] * wrong_num_bands))
def test_affine(self):
input_img = np.zeros((40, 40, 3), dtype=np.uint8)
cnt = [20, 20]
for pt in [(16, 16), (20, 16), (20, 20)]:
for i in range(-5, 5):
for j in range(-5, 5):
input_img[pt[0] + i, pt[1] + j, :] = [255, 155, 55]
with self.assertRaises(TypeError, msg="Argument translate should be a sequence"):
F.affine(input_img, 10, translate=0, scale=1, shear=1)
pil_img = F.to_pil_image(input_img)
def _to_3x3_inv(inv_result_matrix):
result_matrix = np.zeros((3, 3))
result_matrix[:2, :] = np.array(inv_result_matrix).reshape((2, 3))
result_matrix[2, 2] = 1
return np.linalg.inv(result_matrix)
def _test_transformation(a, t, s, sh):
a_rad = math.radians(a)
s_rad = [math.radians(sh_) for sh_ in sh]
cx, cy = cnt
tx, ty = t
sx, sy = s_rad
rot = a_rad
# 1) Check transformation matrix:
C = np.array([[1, 0, cx],
[0, 1, cy],
[0, 0, 1]])
T = np.array([[1, 0, tx],
[0, 1, ty],
[0, 0, 1]])
Cinv = np.linalg.inv(C)
RS = np.array(
[[s * math.cos(rot), -s * math.sin(rot), 0],
[s * math.sin(rot), s * math.cos(rot), 0],
[0, 0, 1]])
SHx = np.array([[1, -math.tan(sx), 0],
[0, 1, 0],
[0, 0, 1]])
SHy = np.array([[1, 0, 0],
[-math.tan(sy), 1, 0],
[0, 0, 1]])
RSS = np.matmul(RS, np.matmul(SHy, SHx))
true_matrix = np.matmul(T, np.matmul(C, np.matmul(RSS, Cinv)))
result_matrix = _to_3x3_inv(F._get_inverse_affine_matrix(center=cnt, angle=a,
translate=t, scale=s, shear=sh))
self.assertLess(np.sum(np.abs(true_matrix - result_matrix)), 1e-10)
# 2) Perform inverse mapping:
true_result = np.zeros((40, 40, 3), dtype=np.uint8)
inv_true_matrix = np.linalg.inv(true_matrix)
for y in range(true_result.shape[0]):
for x in range(true_result.shape[1]):
# Same as for PIL:
# https://github.com/python-pillow/Pillow/blob/71f8ec6a0cfc1008076a023c0756542539d057ab/
# src/libImaging/Geometry.c#L1060
input_pt = np.array([x + 0.5, y + 0.5, 1.0])
res = np.floor(np.dot(inv_true_matrix, input_pt)).astype(np.int)
_x, _y = res[:2]
if 0 <= _x < input_img.shape[1] and 0 <= _y < input_img.shape[0]:
true_result[y, x, :] = input_img[_y, _x, :]
result = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh)
self.assertEqual(result.size, pil_img.size)
# Compute number of different pixels:
np_result = np.array(result)
n_diff_pixels = np.sum(np_result != true_result) / 3
# Accept 3 wrong pixels
self.assertLess(n_diff_pixels, 3,
"a={}, t={}, s={}, sh={}\n".format(a, t, s, sh) +
"n diff pixels={}\n".format(np.sum(np.array(result)[:, :, 0] != true_result[:, :, 0])))
# Test rotation
a = 45
_test_transformation(a=a, t=(0, 0), s=1.0, sh=(0.0, 0.0))
# Test translation
t = [10, 15]
_test_transformation(a=0.0, t=t, s=1.0, sh=(0.0, 0.0))
# Test scale
s = 1.2
_test_transformation(a=0.0, t=(0.0, 0.0), s=s, sh=(0.0, 0.0))
# Test shear
sh = [45.0, 25.0]
_test_transformation(a=0.0, t=(0.0, 0.0), s=1.0, sh=sh)
# Test rotation, scale, translation, shear
for a in range(-90, 90, 25):
for t1 in range(-10, 10, 5):
for s in [0.75, 0.98, 1.0, 1.2, 1.4]:
for sh in range(-15, 15, 5):
_test_transformation(a=a, t=(t1, t1), s=s, sh=(sh, sh))
def test_random_rotation(self):
with self.assertRaises(ValueError):
transforms.RandomRotation(-0.7)
transforms.RandomRotation([-0.7])
transforms.RandomRotation([-0.7, 0, 0.7])
t = transforms.RandomRotation(10)
angle = t.get_params(t.degrees)
self.assertTrue(angle > -10 and angle < 10)
t = transforms.RandomRotation((-10, 10))
angle = t.get_params(t.degrees)
self.assertTrue(angle > -10 and angle < 10)
# Checking if RandomRotation can be printed as string
t.__repr__()
def test_random_affine(self):
with self.assertRaises(ValueError):
transforms.RandomAffine(-0.7)
transforms.RandomAffine([-0.7])
transforms.RandomAffine([-0.7, 0, 0.7])
transforms.RandomAffine([-90, 90], translate=2.0)
transforms.RandomAffine([-90, 90], translate=[-1.0, 1.0])
transforms.RandomAffine([-90, 90], translate=[-1.0, 0.0, 1.0])
transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.0])
transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[-1.0, 1.0])
transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, -0.5])
transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 3.0, -0.5])
transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=-7)
transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[-10])
transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[-10, 0, 10])
transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[-10, 0, 10, 0, 10])
x = np.zeros((100, 100, 3), dtype=np.uint8)
img = F.to_pil_image(x)
t = transforms.RandomAffine(10, translate=[0.5, 0.3], scale=[0.7, 1.3], shear=[-10, 10, 20, 40])
for _ in range(100):
angle, translations, scale, shear = t.get_params(t.degrees, t.translate, t.scale, t.shear,
img_size=img.size)
self.assertTrue(-10 < angle < 10)
self.assertTrue(-img.size[0] * 0.5 <= translations[0] <= img.size[0] * 0.5,
"{} vs {}".format(translations[0], img.size[0] * 0.5))
self.assertTrue(-img.size[1] * 0.5 <= translations[1] <= img.size[1] * 0.5,
"{} vs {}".format(translations[1], img.size[1] * 0.5))
self.assertTrue(0.7 < scale < 1.3)
self.assertTrue(-10 < shear[0] < 10)
self.assertTrue(-20 < shear[1] < 40)
# Checking if RandomAffine can be printed as string
t.__repr__()
t = transforms.RandomAffine(10, resample=Image.BILINEAR)
self.assertIn("Image.BILINEAR", t.__repr__())
def test_to_grayscale(self):
"""Unit tests for grayscale transform"""
x_shape = [2, 2, 3]
x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
x_pil = Image.fromarray(x_np, mode='RGB')
x_pil_2 = x_pil.convert('L')
gray_np = np.array(x_pil_2)
# Test Set: Grayscale an image with desired number of output channels
# Case 1: RGB -> 1 channel grayscale
trans1 = transforms.Grayscale(num_output_channels=1)
gray_pil_1 = trans1(x_pil)
gray_np_1 = np.array(gray_pil_1)
self.assertEqual(gray_pil_1.mode, 'L', 'mode should be L')
self.assertEqual(gray_np_1.shape, tuple(x_shape[0:2]), 'should be 1 channel')
np.testing.assert_equal(gray_np, gray_np_1)
# Case 2: RGB -> 3 channel grayscale
trans2 = transforms.Grayscale(num_output_channels=3)
gray_pil_2 = trans2(x_pil)
gray_np_2 = np.array(gray_pil_2)
self.assertEqual(gray_pil_2.mode, 'RGB', 'mode should be RGB')
self.assertEqual(gray_np_2.shape, tuple(x_shape), 'should be 3 channel')
np.testing.assert_equal(gray_np_2[:, :, 0], gray_np_2[:, :, 1])
np.testing.assert_equal(gray_np_2[:, :, 1], gray_np_2[:, :, 2])
np.testing.assert_equal(gray_np, gray_np_2[:, :, 0])
# Case 3: 1 channel grayscale -> 1 channel grayscale
trans3 = transforms.Grayscale(num_output_channels=1)
gray_pil_3 = trans3(x_pil_2)
gray_np_3 = np.array(gray_pil_3)
self.assertEqual(gray_pil_3.mode, 'L', 'mode should be L')
self.assertEqual(gray_np_3.shape, tuple(x_shape[0:2]), 'should be 1 channel')
np.testing.assert_equal(gray_np, gray_np_3)
# Case 4: 1 channel grayscale -> 3 channel grayscale
trans4 = transforms.Grayscale(num_output_channels=3)
gray_pil_4 = trans4(x_pil_2)
gray_np_4 = np.array(gray_pil_4)
self.assertEqual(gray_pil_4.mode, 'RGB', 'mode should be RGB')
self.assertEqual(gray_np_4.shape, tuple(x_shape), 'should be 3 channel')
np.testing.assert_equal(gray_np_4[:, :, 0], gray_np_4[:, :, 1])
np.testing.assert_equal(gray_np_4[:, :, 1], gray_np_4[:, :, 2])
np.testing.assert_equal(gray_np, gray_np_4[:, :, 0])
# Checking if Grayscale can be printed as string
trans4.__repr__()
@unittest.skipIf(stats is None, 'scipy.stats not available')
def test_random_grayscale(self):
"""Unit tests for random grayscale transform"""
# Test Set 1: RGB -> 3 channel grayscale
random_state = random.getstate()
random.seed(42)
x_shape = [2, 2, 3]
x_np = np.random.randint(0, 256, x_shape, np.uint8)
x_pil = Image.fromarray(x_np, mode='RGB')
x_pil_2 = x_pil.convert('L')
gray_np = np.array(x_pil_2)
num_samples = 250
num_gray = 0
for _ in range(num_samples):
gray_pil_2 = transforms.RandomGrayscale(p=0.5)(x_pil)
gray_np_2 = np.array(gray_pil_2)
if np.array_equal(gray_np_2[:, :, 0], gray_np_2[:, :, 1]) and \
np.array_equal(gray_np_2[:, :, 1], gray_np_2[:, :, 2]) and \
np.array_equal(gray_np, gray_np_2[:, :, 0]):
num_gray = num_gray + 1
p_value = stats.binom_test(num_gray, num_samples, p=0.5)
random.setstate(random_state)
self.assertGreater(p_value, 0.0001)
# Test Set 2: grayscale -> 1 channel grayscale
random_state = random.getstate()
random.seed(42)
x_shape = [2, 2, 3]
x_np = np.random.randint(0, 256, x_shape, np.uint8)
x_pil = Image.fromarray(x_np, mode='RGB')
x_pil_2 = x_pil.convert('L')
gray_np = np.array(x_pil_2)
num_samples = 250
num_gray = 0
for _ in range(num_samples):
gray_pil_3 = transforms.RandomGrayscale(p=0.5)(x_pil_2)
gray_np_3 = np.array(gray_pil_3)
if np.array_equal(gray_np, gray_np_3):
num_gray = num_gray + 1
p_value = stats.binom_test(num_gray, num_samples, p=1.0) # Note: grayscale is always unchanged
random.setstate(random_state)
self.assertGreater(p_value, 0.0001)
# Test set 3: Explicit tests
x_shape = [2, 2, 3]
x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
x_pil = Image.fromarray(x_np, mode='RGB')
x_pil_2 = x_pil.convert('L')
gray_np = np.array(x_pil_2)
# Case 3a: RGB -> 3 channel grayscale (grayscaled)
trans2 = transforms.RandomGrayscale(p=1.0)
gray_pil_2 = trans2(x_pil)
gray_np_2 = np.array(gray_pil_2)
self.assertEqual(gray_pil_2.mode, 'RGB', 'mode should be RGB')
self.assertEqual(gray_np_2.shape, tuple(x_shape), 'should be 3 channel')
np.testing.assert_equal(gray_np_2[:, :, 0], gray_np_2[:, :, 1])
np.testing.assert_equal(gray_np_2[:, :, 1], gray_np_2[:, :, 2])
np.testing.assert_equal(gray_np, gray_np_2[:, :, 0])
# Case 3b: RGB -> 3 channel grayscale (unchanged)
trans2 = transforms.RandomGrayscale(p=0.0)
gray_pil_2 = trans2(x_pil)
gray_np_2 = np.array(gray_pil_2)
self.assertEqual(gray_pil_2.mode, 'RGB', 'mode should be RGB')
self.assertEqual(gray_np_2.shape, tuple(x_shape), 'should be 3 channel')
np.testing.assert_equal(x_np, gray_np_2)
# Case 3c: 1 channel grayscale -> 1 channel grayscale (grayscaled)
trans3 = transforms.RandomGrayscale(p=1.0)
gray_pil_3 = trans3(x_pil_2)
gray_np_3 = np.array(gray_pil_3)
self.assertEqual(gray_pil_3.mode, 'L', 'mode should be L')
self.assertEqual(gray_np_3.shape, tuple(x_shape[0:2]), 'should be 1 channel')
np.testing.assert_equal(gray_np, gray_np_3)
# Case 3d: 1 channel grayscale -> 1 channel grayscale (unchanged)
trans3 = transforms.RandomGrayscale(p=0.0)
gray_pil_3 = trans3(x_pil_2)
gray_np_3 = np.array(gray_pil_3)
self.assertEqual(gray_pil_3.mode, 'L', 'mode should be L')
self.assertEqual(gray_np_3.shape, tuple(x_shape[0:2]), 'should be 1 channel')
np.testing.assert_equal(gray_np, gray_np_3)
# Checking if RandomGrayscale can be printed as string
trans3.__repr__()
def test_gaussian_blur_asserts(self):
np_img = np.ones((100, 100, 3), dtype=np.uint8) * 255
img = F.to_pil_image(np_img, "RGB")
with self.assertRaisesRegex(ValueError, r"If kernel_size is a sequence its length should be 2"):
F.gaussian_blur(img, [3])
with self.assertRaisesRegex(ValueError, r"If kernel_size is a sequence its length should be 2"):
F.gaussian_blur(img, [3, 3, 3])
with self.assertRaisesRegex(ValueError, r"Kernel size should be a tuple/list of two integers"):
transforms.GaussianBlur([3, 3, 3])
with self.assertRaisesRegex(ValueError, r"kernel_size should have odd and positive integers"):
F.gaussian_blur(img, [4, 4])
with self.assertRaisesRegex(ValueError, r"Kernel size value should be an odd and positive number"):
transforms.GaussianBlur([4, 4])
with self.assertRaisesRegex(ValueError, r"kernel_size should have odd and positive integers"):
F.gaussian_blur(img, [-3, -3])
with self.assertRaisesRegex(ValueError, r"Kernel size value should be an odd and positive number"):
transforms.GaussianBlur([-3, -3])
with self.assertRaisesRegex(ValueError, r"If sigma is a sequence, its length should be 2"):
F.gaussian_blur(img, 3, [1, 1, 1])
with self.assertRaisesRegex(ValueError, r"sigma should be a single number or a list/tuple with length 2"):
transforms.GaussianBlur(3, [1, 1, 1])
with self.assertRaisesRegex(ValueError, r"sigma should have positive values"):
F.gaussian_blur(img, 3, -1.0)
with self.assertRaisesRegex(ValueError, r"If sigma is a single number, it must be positive"):
transforms.GaussianBlur(3, -1.0)
with self.assertRaisesRegex(TypeError, r"kernel_size should be int or a sequence of integers"):
F.gaussian_blur(img, "kernel_size_string")
with self.assertRaisesRegex(ValueError, r"Kernel size should be a tuple/list of two integers"):
transforms.GaussianBlur("kernel_size_string")
with self.assertRaisesRegex(TypeError, r"sigma should be either float or sequence of floats"):
F.gaussian_blur(img, 3, "sigma_string")
with self.assertRaisesRegex(ValueError, r"sigma should be a single number or a list/tuple with length 2"):
transforms.GaussianBlur(3, "sigma_string")
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment