refactor to_pil_image and align array with tensor inputs (#8097)

Co-authored-by: Nicolas Hug <contact@nicolas-hug.com>

refactor to_pil_image and align array with tensor inputs (#8097)
Co-authored-by: Nicolas Hug <contact@nicolas-hug.com>
15c166ac · Philip Meier · GitHub · a0fcd083 · 15c166ac · 15c166ac
Unverified Commit 15c166ac authored Nov 08, 2023 by Philip Meier Committed by GitHub Nov 08, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 20 additions and 33 deletions

test/test_transforms.py test/test_transforms.py +6 -4

torchvision/transforms/functional.py torchvision/transforms/functional.py +14 -29

No files found.
--- a/test/test_transforms.py
+++ b/test/test_transforms.py
@@ -661,7 +661,7 @@ class TestToPil:
    @pytest.mark.parametrize(
        "img_data, expected_mode",
        [
-            (torch.Tensor(4, 4, 1).uniform_().numpy(), "F"),
+            (torch.Tensor(4, 4, 1).uniform_().numpy(), "L"),
            (torch.ByteTensor(4, 4, 1).random_(0, 255).numpy(), "L"),
            (torch.ShortTensor(4, 4, 1).random_().numpy(), "I;16"),
            (torch.IntTensor(4, 4, 1).random_().numpy(), "I"),
@@ -671,6 +671,8 @@ class TestToPil:
        transform = transforms.ToPILImage(mode=expected_mode) if with_mode else transforms.ToPILImage()
        img = transform(img_data)
        assert img.mode == expected_mode
+        if np.issubdtype(img_data.dtype, np.floating):
+            img_data = (img_data * 255).astype(np.uint8)
        # note: we explicitly convert img's dtype because pytorch doesn't support uint16
        # and otherwise assert_close wouldn't be able to construct a tensor from the uint16 array
        torch.testing.assert_close(img_data[:, :, 0], np.asarray(img).astype(img_data.dtype))
@@ -741,7 +743,7 @@ class TestToPil:
    @pytest.mark.parametrize(
        "img_data, expected_mode",
        [
-            (torch.Tensor(4, 4).uniform_().numpy(), "F"),
+            (torch.Tensor(4, 4).uniform_().numpy(), "L"),
            (torch.ByteTensor(4, 4).random_(0, 255).numpy(), "L"),
            (torch.ShortTensor(4, 4).random_().numpy(), "I;16"),
            (torch.IntTensor(4, 4).random_().numpy(), "I"),
@@ -751,6 +753,8 @@ class TestToPil:
        transform = transforms.ToPILImage(mode=expected_mode) if with_mode else transforms.ToPILImage()
        img = transform(img_data)
        assert img.mode == expected_mode
+        if np.issubdtype(img_data.dtype, np.floating):
+            img_data = (img_data * 255).astype(np.uint8)
        np.testing.assert_allclose(img_data, img)
    @pytest.mark.parametrize("expected_mode", [None, "RGB", "HSV", "YCbCr"])
@@ -874,8 +878,6 @@ class TestToPil:
            trans(np.ones([4, 4, 1], np.uint16))
        with pytest.raises(TypeError, match=reg_msg):
            trans(np.ones([4, 4, 1], np.uint32))
-        with pytest.raises(TypeError, match=reg_msg):
-            trans(np.ones([4, 4, 1], np.float64))
        with pytest.raises(ValueError, match=r"pic should be 2/3 dimensional. Got \d+ dimensions."):
            transforms.ToPILImage()(np.ones([1, 4, 4, 3]))

--- a/torchvision/transforms/functional.py
+++ b/torchvision/transforms/functional.py
@@ -258,41 +258,26 @@ def to_pil_image(pic, mode=None):
    if not torch.jit.is_scripting() and not torch.jit.is_tracing():
        _log_api_usage_once(to_pil_image)
-    if not (isinstance(pic, torch.Tensor) or isinstance(pic, np.ndarray)):
+    if isinstance(pic, torch.Tensor):
+        if pic.ndim == 3:
+            pic = pic.permute((1, 2, 0))
+        pic = pic.numpy(force=True)
+    elif not isinstance(pic, np.ndarray):
        raise TypeError(f"pic should be Tensor or ndarray. Got {type(pic)}.")
-    elif isinstance(pic, torch.Tensor):
+    if pic.ndim == 2:
-        if pic.ndimension() not in {2, 3}:
+        # if 2D image, add channel dimension (HWC)
-            raise ValueError(f"pic should be 2/3 dimensional. Got {pic.ndimension()} dimensions.")
+        pic = np.expand_dims(pic, 2)
+    if pic.ndim != 3:
-        elif pic.ndimension() == 2:
+        raise ValueError(f"pic should be 2/3 dimensional. Got {pic.ndim} dimensions.")
-            # if 2D image, add channel dimension (CHW)
-            pic = pic.unsqueeze(0)
-        # check number of channels
-        if pic.shape[-3] > 4:
-            raise ValueError(f"pic should not have > 4 channels. Got {pic.shape[-3]} channels.")
-    elif isinstance(pic, np.ndarray):
-        if pic.ndim not in {2, 3}:
-            raise ValueError(f"pic should be 2/3 dimensional. Got {pic.ndim} dimensions.")
-        elif pic.ndim == 2:
-            # if 2D image, add channel dimension (HWC)
-            pic = np.expand_dims(pic, 2)
-        # check number of channels
+    if pic.shape[-1] > 4:
-        if pic.shape[-1] > 4:
+        raise ValueError(f"pic should not have > 4 channels. Got {pic.shape[-1]} channels.")
-            raise ValueError(f"pic should not have > 4 channels. Got {pic.shape[-1]} channels.")
    npimg = pic
-    if isinstance(pic, torch.Tensor):
-        if pic.is_floating_point() and mode != "F":
-            pic = pic.mul(255).byte()
-        npimg = np.transpose(pic.cpu().numpy(), (1, 2, 0))
-    if not isinstance(npimg, np.ndarray):
+    if np.issubdtype(npimg.dtype, np.floating) and mode != "F":
-        raise TypeError("Input pic must be a torch.Tensor or NumPy ndarray, not {type(npimg)}")
+        npimg = (npimg * 255).astype(np.uint8)
    if npimg.shape[2] == 1:
        expected_mode = None