Adapted functional tensor tests on CPU/CUDA (#2569)

* Adapted almost all functional tensor tests on CPU/CUDA - fixed bug with transforms using generated grid - remains *_crop, blocked by #2568 - TODO: test_adjustments * Apply suggestions from code review Co-authored-by: Francisco Massa <fvsmassa@gmail.com> * Fixed issues according to review * Split tests into two: cpu and cuda * Updated test_adjustments to run on CPU and CUDA Co-authored-by: Francisco Massa <fvsmassa@gmail.com>

Adapted functional tensor tests on CPU/CUDA (#2569)
* Adapted almost all functional tensor tests on CPU/CUDA - fixed bug with transforms using generated grid - remains *_crop, blocked by #2568 - TODO: test_adjustments * Apply suggestions from code review Co-authored-by: Francisco Massa <fvsmassa@gmail.com> * Fixed issues according to review * Split tests into two: cpu and cuda * Updated test_adjustments to run on CPU and CUDA Co-authored-by: Francisco Massa <fvsmassa@gmail.com>
39702993 · vfdev · GitHub · b16914be · 39702993 · 39702993
Unverified Commit 39702993 authored Aug 20, 2020 by vfdev Committed by GitHub Aug 20, 2020
3 changed files
--- a/test/test_functional_tensor.py
+++ b/test/test_functional_tensor.py
--- a/torchvision/transforms/functional.py
+++ b/torchvision/transforms/functional.py
@@ -223,10 +223,10 @@ def to_pil_image(pic, mode=None):
            pic = np.expand_dims(pic, 2)

    npimg = pic
-    if isinstance(pic, torch.FloatTensor) and mode != 'F':
-        pic = pic.mul(255).byte()
    if isinstance(pic, torch.Tensor):
-        npimg = np.transpose(pic.numpy(), (1, 2, 0))
+        if pic.is_floating_point() and mode != 'F':
+            pic = pic.mul(255).byte()
+        npimg = np.transpose(pic.cpu().numpy(), (1, 2, 0))

    if not isinstance(npimg, np.ndarray):
        raise TypeError('Input pic must be a torch.Tensor or NumPy ndarray, ' +

--- a/torchvision/transforms/functional_tensor.py
+++ b/torchvision/transforms/functional_tensor.py
@@ -3,7 +3,7 @@ from typing import Optional, Dict, Tuple

 import torch
 from torch import Tensor
-from torch.nn.functional import affine_grid, grid_sample
+from torch.nn.functional import grid_sample
 from torch.jit.annotations import List, BroadcastingList2


@@ -714,12 +714,13 @@ def _gen_affine_grid(
    # 2) we can normalize by other image size, such that it covers "extend" option like in PIL.Image.rotate

    d = 0.5
-    base_grid = torch.empty(1, oh, ow, 3)
+    base_grid = torch.empty(1, oh, ow, 3, dtype=theta.dtype, device=theta.device)
    base_grid[..., 0].copy_(torch.linspace(-ow * 0.5 + d, ow * 0.5 + d - 1, steps=ow))
    base_grid[..., 1].copy_(torch.linspace(-oh * 0.5 + d, oh * 0.5 + d - 1, steps=oh).unsqueeze_(-1))
    base_grid[..., 2].fill_(1)

-    output_grid = base_grid.view(1, oh * ow, 3).bmm(theta.transpose(1, 2) / torch.tensor([0.5 * w, 0.5 * h]))
+    rescaled_theta = theta.transpose(1, 2) / torch.tensor([0.5 * w, 0.5 * h], dtype=theta.dtype, device=theta.device)
+    output_grid = base_grid.view(1, oh * ow, 3).bmm(rescaled_theta)
    return output_grid.view(1, oh, ow, 2)


@@ -746,14 +747,15 @@ def affine(

    _assert_grid_transform_inputs(img, matrix, resample, fillcolor, _interpolation_modes)

-    theta = torch.tensor(matrix, dtype=torch.float).reshape(1, 2, 3)
+    theta = torch.tensor(matrix, dtype=torch.float, device=img.device).reshape(1, 2, 3)
    shape = img.shape
+    # grid will be generated on the same device as theta and img
    grid = _gen_affine_grid(theta, w=shape[-1], h=shape[-2], ow=shape[-1], oh=shape[-2])
    mode = _interpolation_modes[resample]
    return _apply_grid_transform(img, grid, mode)


-def _compute_output_size(theta: Tensor, w: int, h: int) -> Tuple[int, int]:
+def _compute_output_size(matrix: List[float], w: int, h: int) -> Tuple[int, int]:

    # Inspired of PIL implementation:
    # https://github.com/python-pillow/Pillow/blob/11de3318867e4398057373ee9f12dcb33db7335c/src/PIL/Image.py#L2054
@@ -765,6 +767,7 @@ def _compute_output_size(theta: Tensor, w: int, h: int) -> Tuple[int, int]:
        [0.5 * w, 0.5 * h, 1.0],
        [0.5 * w, -0.5 * h, 1.0],
    ])
+    theta = torch.tensor(matrix, dtype=torch.float).reshape(1, 2, 3)
    new_pts = pts.view(1, 4, 3).bmm(theta.transpose(1, 2)).view(4, 2)
    min_vals, _ = new_pts.min(dim=0)
    max_vals, _ = new_pts.max(dim=0)
@@ -807,16 +810,17 @@ def rotate(
    }

    _assert_grid_transform_inputs(img, matrix, resample, fill, _interpolation_modes)
-    theta = torch.tensor(matrix).reshape(1, 2, 3)
    w, h = img.shape[-1], img.shape[-2]
-    ow, oh = _compute_output_size(theta, w, h) if expand else (w, h)
+    ow, oh = _compute_output_size(matrix, w, h) if expand else (w, h)
+    theta = torch.tensor(matrix, dtype=torch.float, device=img.device).reshape(1, 2, 3)
+    # grid will be generated on the same device as theta and img
    grid = _gen_affine_grid(theta, w=w, h=h, ow=ow, oh=oh)
    mode = _interpolation_modes[resample]

    return _apply_grid_transform(img, grid, mode)


-def _perspective_grid(coeffs: List[float], ow: int, oh: int):
+def _perspective_grid(coeffs: List[float], ow: int, oh: int, device: torch.device):
    # https://github.com/python-pillow/Pillow/blob/4634eafe3c695a014267eefdce830b4a825beed7/
    # src/libImaging/Geometry.c#L394

@@ -828,19 +832,20 @@ def _perspective_grid(coeffs: List[float], ow: int, oh: int):
    theta1 = torch.tensor([[
        [coeffs[0], coeffs[1], coeffs[2]],
        [coeffs[3], coeffs[4], coeffs[5]]
-    ]])
+    ]], dtype=torch.float, device=device)
    theta2 = torch.tensor([[
        [coeffs[6], coeffs[7], 1.0],
        [coeffs[6], coeffs[7], 1.0]
-    ]])
+    ]], dtype=torch.float, device=device)

    d = 0.5
-    base_grid = torch.empty(1, oh, ow, 3)
+    base_grid = torch.empty(1, oh, ow, 3, dtype=torch.float, device=device)
    base_grid[..., 0].copy_(torch.linspace(d, ow * 1.0 + d - 1.0, steps=ow))
    base_grid[..., 1].copy_(torch.linspace(d, oh * 1.0 + d - 1.0, steps=oh).unsqueeze_(-1))
    base_grid[..., 2].fill_(1)

-    output_grid1 = base_grid.view(1, oh * ow, 3).bmm(theta1.transpose(1, 2) / torch.tensor([0.5 * ow, 0.5 * oh]))
+    rescaled_theta1 = theta1.transpose(1, 2) / torch.tensor([0.5 * ow, 0.5 * oh], dtype=torch.float, device=device)
+    output_grid1 = base_grid.view(1, oh * ow, 3).bmm(rescaled_theta1)
    output_grid2 = base_grid.view(1, oh * ow, 3).bmm(theta2.transpose(1, 2))

    output_grid = output_grid1 / output_grid2 - 1.0
@@ -880,7 +885,7 @@ def perspective(
    )

    ow, oh = img.shape[-1], img.shape[-2]
-    grid = _perspective_grid(perspective_coeffs, ow=ow, oh=oh)
+    grid = _perspective_grid(perspective_coeffs, ow=ow, oh=oh, device=img.device)
    mode = _interpolation_modes[interpolation]

    return _apply_grid_transform(img, grid, mode)