Speed up equalize transform: use bincount instead of histc (#3493)

* use bincount instead of hist * only use bincount when on CPU * Added equality test for CPU vs cuda * Fix flake8 and tests * tuple instead of int for size

Speed up equalize transform: use bincount instead of histc (#3493)
* use bincount instead of hist * only use bincount when on CPU * Added equality test for CPU vs cuda * Fix flake8 and tests * tuple instead of int for size
77e41870 · Nicolas Hug · GitHub · 414427dd · 77e41870 · 77e41870
Unverified Commit 77e41870 authored Mar 08, 2021 by Nicolas Hug Committed by GitHub Mar 08, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 20 additions and 1 deletion

test/test_functional_tensor.py test/test_functional_tensor.py +12 -0

torchvision/transforms/functional_tensor.py torchvision/transforms/functional_tensor.py +8 -1

No files found.
--- a/test/test_functional_tensor.py
+++ b/test/test_functional_tensor.py
@@ -977,6 +977,18 @@ class CUDATester(Tester):
    def setUp(self):
        self.device = "cuda"

+    def test_scale_channel(self):
+        """Make sure that _scale_channel gives the same results on CPU and GPU as
+        histc or bincount are used depending on the device.
+        """
+        # TODO: when # https://github.com/pytorch/pytorch/issues/53194 is fixed,
+        # only use bincount and remove that test.
+        size = (1_000,)
+        img_chan = torch.randint(0, 256, size=size).to('cpu')
+        scaled_cpu = F_t._scale_channel(img_chan)
+        scaled_cuda = F_t._scale_channel(img_chan.to('cuda'))
+        self.assertTrue(scaled_cpu.equal(scaled_cuda.to('cpu')))
+

 if __name__ == '__main__':
    unittest.main()
--- a/torchvision/transforms/functional_tensor.py
+++ b/torchvision/transforms/functional_tensor.py
@@ -902,7 +902,14 @@ def autocontrast(img: Tensor) -> Tensor:


 def _scale_channel(img_chan):
-    hist = torch.histc(img_chan.to(torch.float32), bins=256, min=0, max=255)
+    # TODO: we should expect bincount to always be faster than histc, but this
+    # isn't always the case. Once
+    # https://github.com/pytorch/pytorch/issues/53194 is fixed, remove the if
+    # block and only use bincount.
+    if img_chan.is_cuda:
+        hist = torch.histc(img_chan.to(torch.float32), bins=256, min=0, max=255)
+    else:
+        hist = torch.bincount(img_chan.view(-1), minlength=256)

    nonzero_hist = hist[hist != 0]
    step = nonzero_hist[:-1].sum() // 255