Commit af225a8a authored by pedrofreire's avatar pedrofreire Committed by Francisco Massa
Browse files

Simplify and organize test_ops. (#1551)

* Simlify and organize test_ops.

We perform the following:

- Simplify the functions slow_roi_pooling, slow_ps_roi_pooling, slow_ps_roi_align and bilinear_interpolate (including finding and removing a semi-bug in slow_ps_roi_pooling, which used bin_w instead of bin_h);
- Wrote a slow_roi_align function, that was missing;
- Create a base class testing all combinations of forward/backward, cpu/cuda, contiguous/non-contiguous;
- Organize all testing inside the base class with _test_forward and _test_backward (which can be easily overriden if a parciular op needs something different); an Op class then only needs to implement fn, get_script_fn, and expected_fn.

A few points:
- We are using the same inputs for all tests, and not trying all possible inputs in the domain of a given operation. One improvement would be to test more diverse inputs, and to personalize the inputs for some ops (e.g. different inputs for pooling ops and align ops).
- Running all tests is quite slow (~1 min only for CPU tests), so that can possibly be improved.

* Reduce input size used in gradcheck.

gradcheck can be quite costly, and it was causing OOM errors and making
the tests slow. By reducing the size of the input, the test speed is
down to 3 seconds for the CPU tests.

Other points:
- We remove an unused namedtuple;
- We inherit from object for better Python 2 compatibility;
- We remove a hardcoded pool_size from the TorchScript functions, and
add it as a parameter instead.

* Replace Tensor by torch.Tensor in type annotations.

This should fix lint errors.
parent 4897402a
from __future__ import division
import numpy as np import numpy as np
import torch import torch
from torch.autograd import gradcheck from torch.autograd import gradcheck
...@@ -8,1168 +9,305 @@ from itertools import product ...@@ -8,1168 +9,305 @@ from itertools import product
import unittest import unittest
class RoIPoolTester(unittest.TestCase): class RoIOpTester(object):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.dtype = torch.float64 cls.dtype = torch.float64
def slow_roi_pooling(self, x, rois, pool_h, pool_w, spatial_scale=1, def test_forward_cpu_contiguous(self):
device=None, dtype=torch.float64): self._test_forward(device=torch.device('cpu'), contiguous=True)
if device is None:
device = torch.device("cpu")
c = x.size(1)
y = torch.zeros(rois.size(0), c, pool_h, pool_w, dtype=dtype, device=device)
rois = torch.round(rois * spatial_scale)
for n in range(0, y.size(0)):
for r, roi in enumerate(rois):
if roi[0] == n:
start_h, end_h = int(roi[2].item()), int(roi[4].item()) + 1
start_w, end_w = int(roi[1].item()), int(roi[3].item()) + 1
roi_x = x[roi[0].long(), :, start_h:end_h, start_w:end_w]
bin_h, bin_w = roi_x.size(-2) / float(pool_h), roi_x.size(-1) / float(pool_w)
for j in range(0, pool_h):
cj = slice(int(np.floor(j * bin_h)), int(np.ceil((j + 1) * bin_h)))
for i in range(0, pool_w):
ci = slice(int(np.floor(i * bin_w)), int(np.ceil((i + 1) * bin_w)))
t = roi_x[:, cj, ci].reshape(c, -1)
if t.numel() > 0:
y[r, :, j, i] = torch.max(t, 1)[0]
return y
def test_roi_pool_basic_cpu(self):
device = torch.device('cpu')
x = torch.rand(1, 1, 10, 10, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 4, 4]], # format is (xyxy)
dtype=self.dtype, device=device)
pool_h, pool_w = (5, 5)
roi_pool = ops.RoIPool((pool_h, pool_w), 1)
y = roi_pool(x, rois)
gt_y = self.slow_roi_pooling(x, rois, pool_h, pool_w, device=device, dtype=self.dtype) def test_forward_cpu_non_contiguous(self):
self._test_forward(device=torch.device('cpu'), contiguous=False)
self.assertTrue(torch.allclose(gt_y, y), 'RoIPool layer incorrect on CPU') def test_backward_cpu_contiguous(self):
self._test_backward(device=torch.device('cpu'), contiguous=True)
# non-contiguous def test_backward_cpu_non_contiguous(self):
y = roi_pool(x.permute(0, 1, 3, 2), rois) self._test_backward(device=torch.device('cpu'), contiguous=False)
gt_y = self.slow_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'RoIPool layer incorrect on CPU')
def test_roi_pool_cpu(self):
device = torch.device('cpu')
x = torch.rand(2, 1, 10, 10, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 9, 9], # format is (xyxy)
[0, 0, 5, 4, 9],
[0, 5, 5, 9, 9],
[1, 0, 0, 9, 9]],
dtype=self.dtype, device=device)
pool_h, pool_w = (5, 5)
roi_pool = ops.RoIPool((pool_h, pool_w), 1)
y = roi_pool(x, rois)
gt_y = self.slow_roi_pooling(x, rois, pool_h, pool_w, device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'RoIPool layer incorrect on CPU for batch > 1')
# non-contiguous
y = roi_pool(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'RoIPool layer incorrect on CPU for batch > 1')
def test_roi_pool_cpu_empty_rois(self):
device = torch.device('cpu')
x = torch.tensor(
[[[[0.1767, 1.2851, 4.2325, 4.8645, 7.1496]],
[[2.5916, 4.3361, 3.8143, 6.1329, 2.0230]],
[[1.4492, 3.3384, 4.0816, 6.3116, 5.1068]]]],
dtype=self.dtype, device=device)
rois = torch.tensor(
[[0., 1., 0., 4., 0.],
[0., 2., 0., 3., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 2., 0., 2., 0.]],
dtype=self.dtype, device=device)
pool_h, pool_w = (1, 2)
roi_pool = ops.RoIPool((pool_h, pool_w), 1)
y = roi_pool(x, rois)
gt_y = self.slow_roi_pooling(x, rois, pool_h, pool_w, device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'RoIPool layer incorrect on CPU empty rois')
# non-contiguous
y = roi_pool(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'RoIPool layer incorrect on CPU for empty rois non-contiguous')
def test_roi_pool_gradient_cpu(self):
device = torch.device('cpu')
x = torch.ones(1, 1, 10, 10, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 9, 9],
[0, 0, 5, 4, 9],
[0, 0, 0, 4, 4]],
dtype=self.dtype, device=device)
layer = ops.RoIPool((5, 5), 1).to(dtype=self.dtype, device=device)
y = layer(x, rois)
s = y.sum()
s.backward()
gt_grad = torch.tensor([[[[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.],
[1., 1., 1., 1., 1., 0., 0., 0., 0., 0.],
[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.],
[1., 1., 1., 1., 1., 0., 0., 0., 0., 0.],
[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.],
[1., 1., 1., 1., 1., 0., 0., 0., 0., 0.],
[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.],
[1., 1., 1., 1., 1., 0., 0., 0., 0., 0.],
[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.],
[1., 1., 1., 1., 1., 0., 0., 0., 0., 0.]]]],
device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(x.grad, gt_grad), 'gradient incorrect for roi_pool')
def test_roi_pool_align_non_cont_grad_cpu(self):
devices = ['cpu']
if torch.cuda.is_available():
devices.append('cuda')
for d in devices:
device = torch.device(d)
rois = torch.tensor([
[0, 0, 0, 9, 9],
[0, 0, 5, 5, 9],
[0, 5, 5, 9, 9]], dtype=self.dtype, device=device)
grad_cont = torch.rand(3, 1, 5, 5, dtype=self.dtype, device=device)
grad = grad_cont.permute(2, 1, 3, 0).contiguous().permute(3, 1, 0, 2)
for op in ['RoIPool', 'RoIAlign']:
x = torch.rand(1, 1, 10, 10, dtype=self.dtype, device=device, requires_grad=True)
kwargs = {}
if op == 'RoIAlign':
kwargs['sampling_ratio'] = 1
m = getattr(ops, op)((5, 5), 1, **kwargs)
y = m(x, rois)
y.backward(grad_cont)
g1 = x.grad.detach().clone()
del x.grad
y = m(x, rois)
y.backward(grad)
g2 = x.grad.detach().clone()
del x.grad
self.assertTrue(torch.allclose(g1, g2), 'gradient incorrect for {}'.format(op))
def test_roi_pool_gradcheck_cpu(self):
device = torch.device('cpu')
x = torch.rand(1, 1, 10, 10, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 9, 9],
[0, 0, 5, 5, 9],
[0, 5, 5, 9, 9]], dtype=self.dtype, device=device)
m = ops.RoIPool((5, 5), 1).to(dtype=self.dtype, device=device)
def func(input):
return m(input, rois)
self.assertTrue(gradcheck(func, (x,)), 'gradcheck failed for roi_pool CPU')
self.assertTrue(gradcheck(func, (x.permute(0, 1, 3, 2),)), 'gradcheck failed for roi_pool CPU')
@torch.jit.script
def script_func(input, rois):
return ops.roi_pool(input, rois, 5, 1.0)[0]
self.assertTrue(gradcheck(lambda x: script_func(x, rois), (x,)), 'gradcheck failed for scripted roi_pool')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_roi_pool_basic_cuda(self): def test_forward_cuda_contiguous(self):
device = torch.device('cuda') self._test_forward(device=torch.device('cuda'), contiguous=True)
x = torch.rand(1, 1, 10, 10, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 4, 4]], # format is (xyxy)
dtype=self.dtype, device=device)
pool_h, pool_w = (5, 5)
roi_pool = ops.RoIPool((pool_h, pool_w), 1)
y = roi_pool(x, rois)
gt_y = self.slow_roi_pooling(x, rois, pool_h, pool_w, device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'RoIPool layer incorrect') @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_forward_cuda_non_contiguous(self):
self._test_forward(device=torch.device('cuda'), contiguous=False)
y = roi_pool(x.permute(0, 1, 3, 2), rois) @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
gt_y = self.slow_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device=device, dtype=self.dtype) def test_backward_cuda_contiguous(self):
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'RoIPool layer incorrect') self._test_backward(device=torch.device('cuda'), contiguous=True)
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_roi_pool_cuda(self): def test_backward_cuda_non_contiguous(self):
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') self._test_backward(device=torch.device('cuda'), contiguous=False)
x = torch.rand(2, 1, 10, 10, dtype=self.dtype, device=device)
def _test_forward(self, device, contiguous):
pool_size = 5
# n_channels % (pool_size ** 2) == 0 required for PS opeartions.
n_channels = 2 * (pool_size ** 2)
x = torch.rand(2, n_channels, 10, 10, dtype=self.dtype, device=device)
if not contiguous:
x = x.permute(0, 1, 3, 2)
rois = torch.tensor([[0, 0, 0, 9, 9], # format is (xyxy) rois = torch.tensor([[0, 0, 0, 9, 9], # format is (xyxy)
[0, 0, 5, 4, 9], [0, 0, 5, 4, 9],
[0, 5, 5, 9, 9], [0, 5, 5, 9, 9],
[1, 0, 0, 9, 9]], [1, 0, 0, 9, 9]],
dtype=self.dtype, device=device) dtype=self.dtype, device=device)
pool_h, pool_w = (5, 5) pool_h, pool_w = pool_size, pool_size
roi_pool = ops.RoIPool((pool_h, pool_w), 1) y = self.fn(x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1)
y = roi_pool(x, rois) gt_y = self.expected_fn(x, rois, pool_h, pool_w, spatial_scale=1,
sampling_ratio=-1, device=device, dtype=self.dtype)
gt_y = self.slow_roi_pooling(x, rois, pool_h, pool_w, device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y))
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'RoIPool layer incorrect')
def _test_backward(self, device, contiguous):
y = roi_pool(x.permute(0, 1, 3, 2), rois) pool_size = 2
gt_y = self.slow_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device=device, dtype=self.dtype) x = torch.rand(1, 2 * (pool_size ** 2), 5, 5, dtype=self.dtype, device=device, requires_grad=True)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'RoIPool layer incorrect') if not contiguous:
x = x.permute(0, 1, 3, 2)
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") rois = torch.tensor([[0, 0, 0, 4, 4], # format is (xyxy)
def test_roi_pool_gradient_cuda(self): [0, 0, 2, 3, 4],
device = torch.device('cuda') [0, 2, 2, 4, 4]],
layer = ops.RoIPool((5, 5), 1).to(dtype=self.dtype, device=device) dtype=self.dtype, device=device)
x = torch.ones(1, 1, 10, 10, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 9, 9],
[0, 0, 5, 4, 9],
[0, 0, 0, 4, 4]],
dtype=self.dtype, device=device)
y = layer(x, rois)
s = y.sum()
s.backward()
gt_grad = torch.tensor([[[[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.],
[1., 1., 1., 1., 1., 0., 0., 0., 0., 0.],
[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.],
[1., 1., 1., 1., 1., 0., 0., 0., 0., 0.],
[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.],
[1., 1., 1., 1., 1., 0., 0., 0., 0., 0.],
[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.],
[1., 1., 1., 1., 1., 0., 0., 0., 0., 0.],
[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.],
[1., 1., 1., 1., 1., 0., 0., 0., 0., 0.]]]],
device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(x.grad, gt_grad), 'gradient incorrect for roi_pool')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_roi_pool_gradcheck_cuda(self):
device = torch.device('cuda')
x = torch.rand(1, 1, 10, 10, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 9, 9],
[0, 0, 5, 5, 9],
[0, 5, 5, 9, 9]], dtype=self.dtype, device=device)
m = ops.RoIPool((5, 5), 1).to(dtype=self.dtype, device=device)
def func(input):
return m(input, rois)
self.assertTrue(gradcheck(func, (x,)), 'gradcheck failed for roi_pool CUDA')
self.assertTrue(gradcheck(func, (x.permute(0, 1, 3, 2),)), 'gradcheck failed for roi_pool CUDA')
@torch.jit.script
def script_func(input, rois):
return ops.roi_pool(input, rois, 5, 1.0)[0]
self.assertTrue(gradcheck(lambda x: script_func(x, rois), (x,)),
'gradcheck failed for scripted roi_pool on CUDA')
class RoIAlignTester(unittest.TestCase):
@classmethod
def setUpClass(cls):
torch.manual_seed(123)
cls.dtype = torch.float32
cls.x = torch.rand(1, 1, 10, 10, dtype=cls.dtype)
cls.single_roi = torch.tensor([[0, 0, 0, 4, 4]], # format is (xyxy)
dtype=cls.dtype)
cls.rois = torch.tensor([[0, 0, 0, 9, 9], # format is (xyxy)
[0, 0, 5, 4, 9],
[0, 5, 5, 9, 9]],
dtype=cls.dtype)
cls.gt_y_single = torch.tensor(
[[[[0.41617328, 0.5040753, 0.25266218, 0.4296828, 0.29928464],
[0.5210769, 0.57222337, 0.2524979, 0.32063985, 0.32635176],
[0.73108256, 0.6114335, 0.62033176, 0.8188273, 0.5562218],
[0.83115816, 0.70803946, 0.7084047, 0.74928707, 0.7769296],
[0.54266506, 0.45964524, 0.5780159, 0.80522037, 0.7321807]]]], dtype=cls.dtype)
cls.gt_y_multiple = torch.tensor(
[[[[0.49311584, 0.35972416, 0.40843594, 0.3638034, 0.49751836],
[0.70881474, 0.75481665, 0.5826779, 0.34767765, 0.46865487],
[0.4740328, 0.69306874, 0.3617804, 0.47145438, 0.66130304],
[0.6861706, 0.17634538, 0.47194335, 0.42473823, 0.37930614],
[0.62666404, 0.49973848, 0.37911576, 0.5842756, 0.7176864]]],
[[[0.67499936, 0.6607055, 0.42656037, 0.46134934, 0.42144877],
[0.7471722, 0.7235433, 0.14512213, 0.13031253, 0.289369],
[0.8443615, 0.6659734, 0.23614208, 0.14719573, 0.4268827],
[0.69429564, 0.5621515, 0.5019923, 0.40678093, 0.34556213],
[0.51315194, 0.7177093, 0.6494485, 0.6775592, 0.43865064]]],
[[[0.24465509, 0.36108392, 0.64635646, 0.4051828, 0.33956185],
[0.49006107, 0.42982674, 0.34184104, 0.15493104, 0.49633422],
[0.54400194, 0.5265246, 0.22381854, 0.3929715, 0.6757667],
[0.32961223, 0.38482672, 0.68877804, 0.71822757, 0.711909],
[0.561259, 0.71047884, 0.84651315, 0.8541089, 0.644432]]]], dtype=cls.dtype)
cls.x_grad = torch.tensor(
[[[[0.075625, 0.15125, 0.15124999, 0.15125002, 0.15812504,
0.15812503, 0.15124999, 0.15124999, 0.15125006, 0.0756249],
[0.15125, 0.30250007, 0.3025, 0.30250007, 0.31625012,
0.31625003, 0.3025, 0.3025, 0.30250013, 0.1512498],
[0.15124999, 0.3025, 0.30249995, 0.3025, 0.31625006,
0.31625, 0.30249995, 0.30249995, 0.30250007, 0.15124978],
[0.15125002, 0.30250007, 0.3025, 0.30250007, 0.31625012,
0.3162501, 0.3025, 0.3025, 0.30250013, 0.15124981],
[0.15812504, 0.31625012, 0.31625006, 0.31625012, 0.33062524,
0.3306251, 0.31625006, 0.31625006, 0.3162502, 0.15812483],
[0.5181251, 1.0962502, 1.0362502, 1.0962503, 0.69062525, 0.6906252,
1.0962502, 1.0362502, 1.0962503, 0.5181248],
[0.93125, 1.9925, 1.8624997, 1.9925, 1.0962502, 1.0962502,
1.9925, 1.8624998, 1.9925, 0.9312496],
[0.8712501, 1.8625, 1.7425002, 1.8625001, 1.0362502, 1.0362502,
1.8625, 1.7425001, 1.8625002, 0.8712497],
[0.93125004, 1.9925, 1.8625002, 1.9925, 1.0962503, 1.0962503,
1.9925001, 1.8625001, 1.9925001, 0.93124974],
[0.43562484, 0.9312497, 0.8712497, 0.9312497, 0.5181249, 0.5181248,
0.9312496, 0.8712497, 0.93124974, 0.43562466]]]], dtype=cls.dtype)
def test_roi_align_basic_cpu(self):
device = torch.device('cpu')
x = self.x.to(device)
single_roi = self.single_roi.to(device)
gt_y_single = self.gt_y_single.to(device)
pool_h, pool_w = (5, 5)
roi_align = ops.RoIAlign((pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to(device=device)
y = roi_align(x, single_roi)
self.assertTrue(torch.allclose(gt_y_single, y), 'RoIAlign layer incorrect for single ROI on CPU')
y = roi_align(x.transpose(2, 3).contiguous().transpose(2, 3), single_roi)
self.assertTrue(torch.allclose(gt_y_single, y), 'RoIAlign layer incorrect for single ROI on CPU')
def test_roi_align_cpu(self):
device = torch.device('cpu')
x = self.x.to(device)
rois = self.rois.to(device)
gt_y_multiple = self.gt_y_multiple.to(device)
pool_h, pool_w = (5, 5)
roi_align = ops.RoIAlign((pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to(device=device)
y = roi_align(x, rois)
self.assertTrue(torch.allclose(gt_y_multiple, y), 'RoIAlign layer incorrect for multiple ROIs on CPU')
y = roi_align(x.transpose(2, 3).contiguous().transpose(2, 3), rois)
self.assertTrue(torch.allclose(gt_y_multiple, y), 'RoIAlign layer incorrect for multiple ROIs on CPU')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_roi_align_basic_cuda(self):
device = torch.device('cuda')
x = self.x.to(device)
single_roi = self.single_roi.to(device)
gt_y_single = self.gt_y_single.to(device)
pool_h, pool_w = (5, 5)
roi_align = ops.RoIAlign((pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to(device=device)
y = roi_align(x, single_roi)
self.assertTrue(torch.allclose(gt_y_single, y), 'RoIAlign layer incorrect for single ROI on CUDA')
y = roi_align(x.transpose(2, 3).contiguous().transpose(2, 3), single_roi)
self.assertTrue(torch.allclose(gt_y_single, y), 'RoIAlign layer incorrect for single ROI on CUDA')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_roi_align_cuda(self):
device = torch.device('cuda')
x = self.x.to(device)
rois = self.rois.to(device)
gt_y_multiple = self.gt_y_multiple.to(device)
pool_h, pool_w = (5, 5)
roi_align = ops.RoIAlign((pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to(device=device)
y = roi_align(x, rois)
self.assertTrue(torch.allclose(gt_y_multiple, y), 'RoIAlign layer incorrect for multiple ROIs on CUDA') def func(z):
return self.fn(z, rois, pool_size, pool_size, spatial_scale=1, sampling_ratio=1)
y = roi_align(x.transpose(2, 3).contiguous().transpose(2, 3), rois) script_func = self.get_script_fn(rois, pool_size)
self.assertTrue(torch.allclose(gt_y_multiple, y), 'RoIAlign layer incorrect for multiple ROIs on CUDA')
def test_roi_align_gradient_cpu(self): self.assertTrue(gradcheck(func, (x,)))
""" self.assertTrue(gradcheck(script_func, (x,)))
Compute gradients for RoIAlign with multiple bounding boxes on CPU return
"""
device = torch.device('cpu')
pool_h, pool_w = (5, 5)
roi_align = ops.RoIAlign((pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to(device=device)
x = self.x.to(device).clone() def fn(*args, **kwargs):
rois = self.rois.to(device) pass
gt_grad = self.x_grad.to(device)
x.requires_grad = True def get_script_fn(*args, **kwargs):
y = roi_align(x, rois) pass
s = y.sum()
s.backward()
self.assertTrue(torch.allclose(x.grad, gt_grad), 'gradient incorrect for RoIAlign CPU') def expected_fn(*args, **kwargs):
pass
def test_roi_align_gradcheck_cpu(self):
dtype = torch.float64
device = torch.device('cpu')
m = ops.RoIAlign((5, 5), 0.5, 1).to(dtype=dtype, device=device)
x = torch.rand(1, 1, 10, 10, dtype=dtype, device=device, requires_grad=True)
rois = self.rois.to(device=device, dtype=dtype)
def func(input): class RoIPoolTester(RoIOpTester, unittest.TestCase):
return m(input, rois) def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
return ops.RoIPool((pool_h, pool_w), spatial_scale)(x, rois)
self.assertTrue(gradcheck(func, (x,)), 'gradcheck failed for RoIAlign CPU')
self.assertTrue(gradcheck(func, (x.transpose(2, 3),)), 'gradcheck failed for RoIAlign CPU')
def get_script_fn(self, rois, pool_size):
@torch.jit.script @torch.jit.script
def script_func(input, rois): def script_fn(input, rois, pool_size):
return ops.roi_align(input, rois, 5, 0.5, 1)[0] # type: (torch.Tensor, torch.Tensor, int) -> torch.Tensor
return ops.roi_pool(input, rois, pool_size, 1.0)[0]
return lambda x: script_fn(x, rois, pool_size)
self.assertTrue(gradcheck(lambda x: script_func(x, rois), (x,)), 'gradcheck failed for scripted roi_align') def expected_fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1,
device=None, dtype=torch.float64):
if device is None:
device = torch.device("cpu")
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") n_channels = x.size(1)
def test_roi_align_gradient_cuda(self): y = torch.zeros(rois.size(0), n_channels, pool_h, pool_w, dtype=dtype, device=device)
"""
Compute gradients for RoIAlign with multiple bounding boxes on the GPU
"""
device = torch.device('cuda')
pool_h, pool_w = (5, 5)
roi_align = ops.RoIAlign((pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to(device=device)
x = self.x.to(device).clone() def get_slice(k, block):
rois = self.rois.to(device) return slice(int(np.floor(k * block)), int(np.ceil((k + 1) * block)))
gt_grad = self.x_grad.to(device)
x.requires_grad = True for roi_idx, roi in enumerate(rois):
y = roi_align(x, rois) batch_idx = int(roi[0])
s = y.sum() j_begin, i_begin, j_end, i_end = (int(round(x.item() * spatial_scale)) for x in roi[1:])
s.backward() roi_x = x[batch_idx, :, i_begin:i_end + 1, j_begin:j_end + 1]
self.assertTrue(torch.allclose(x.grad, gt_grad), 'gradient incorrect for RoIAlign CUDA') roi_h, roi_w = roi_x.shape[-2:]
bin_h = roi_h / pool_h
bin_w = roi_w / pool_w
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") for i in range(0, pool_h):
def test_roi_align_gradcheck_cuda(self): for j in range(0, pool_w):
dtype = torch.float64 bin_x = roi_x[:, get_slice(i, bin_h), get_slice(j, bin_w)]
device = torch.device('cuda') if bin_x.numel() > 0:
m = ops.RoIAlign((5, 5), 0.5, 1).to(dtype=dtype, device=device) y[roi_idx, :, i, j] = bin_x.reshape(n_channels, -1).max(dim=1)[0]
x = torch.rand(1, 1, 10, 10, dtype=dtype, device=device, requires_grad=True) return y
rois = self.rois.to(device=device, dtype=dtype)
def func(input):
return m(input, rois)
self.assertTrue(gradcheck(func, (x,)), 'gradcheck failed for RoIAlign CUDA') class PSRoIPoolTester(RoIOpTester, unittest.TestCase):
self.assertTrue(gradcheck(func, (x.transpose(2, 3),)), 'gradcheck failed for RoIAlign CUDA') def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
return ops.PSRoIPool((pool_h, pool_w), 1)(x, rois)
def get_script_fn(self, rois, pool_size):
@torch.jit.script @torch.jit.script
def script_func(input, rois): def script_fn(input, rois, pool_size):
return ops.roi_align(input, rois, 5, 0.5, 1)[0] # type: (torch.Tensor, torch.Tensor, int) -> torch.Tensor
return ops.ps_roi_pool(input, rois, pool_size, 1.0)[0]
return lambda x: script_fn(x, rois, pool_size)
self.assertTrue(gradcheck(lambda x: script_func(x, rois), (x,)), def expected_fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1,
'gradcheck failed for scripted roi_align on CUDA') device=None, dtype=torch.float64):
if device is None:
device = torch.device("cpu")
n_input_channels = x.size(1)
self.assertEqual(n_input_channels % (pool_h * pool_w), 0, "input channels must be divisible by ph * pw")
n_output_channels = int(n_input_channels / (pool_h * pool_w))
y = torch.zeros(rois.size(0), n_output_channels, pool_h, pool_w, dtype=dtype, device=device)
def get_slice(k, block):
return slice(int(np.floor(k * block)), int(np.ceil((k + 1) * block)))
for roi_idx, roi in enumerate(rois):
batch_idx = int(roi[0])
j_begin, i_begin, j_end, i_end = (int(round(x.item() * spatial_scale)) for x in roi[1:])
roi_x = x[batch_idx, :, i_begin:i_end + 1, j_begin:j_end + 1]
roi_height = max(i_end - i_begin, 1)
roi_width = max(j_end - j_begin, 1)
bin_h, bin_w = roi_height / float(pool_h), roi_width / float(pool_w)
for i in range(0, pool_h):
for j in range(0, pool_w):
bin_x = roi_x[:, get_slice(i, bin_h), get_slice(j, bin_w)]
if bin_x.numel() > 0:
area = bin_x.size(-2) * bin_x.size(-1)
for c_out in range(0, n_output_channels):
c_in = c_out * (pool_h * pool_w) + pool_w * i + j
t = torch.sum(bin_x[c_in, :, :])
y[roi_idx, c_out, i, j] = t / area
return y
def bilinear_interpolate(data, height, width, y, x): def bilinear_interpolate(data, height, width, y, x):
if y < -1.0 or y > height or x < -1.0 or x > width: if y < -1.0 or y > height or x < -1.0 or x > width:
return 0. return 0.
if y <= 0: y = min(max(0, y), height - 1)
y = 0. x = min(max(0, x), width - 1)
if x <= 0:
x = 0.
y_low, x_low = int(y), int(x) y_low = int(y)
y_high, x_high = 0, 0 y_high = min(y_low + 1, height - 1)
if y_low >= height - 1: x_low = int(x)
y_high = y_low = height - 1 x_high = min(x_low + 1, width - 1)
y = float(y_low)
else:
y_high = y_low + 1
if x_low >= width - 1: wy_h = y - y_low
x_high = x_low = width - 1 wy_l = 1 - wy_h
x = float(x_low)
else:
x_high = x_low + 1
ly = y - y_low wx_h = x - x_low
lx = x - x_low wx_l = 1 - wx_h
hy, hx = 1. - ly, 1. - lx
v1 = data[y_low * width + x_low] val = 0
v2 = data[y_low * width + x_high] for wx, x in zip((wx_l, wx_h), (x_low, x_high)):
v3 = data[y_high * width + x_low] for wy, y in zip((wy_l, wy_h), (y_low, y_high)):
v4 = data[y_high * width + x_high] val += wx * wy * data[y * width + x]
w1, w2, w3, w4 = hy * hx, hy * lx, ly * hx, ly * lx return val
return w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4
class RoIAlignTester(RoIOpTester, unittest.TestCase):
def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
return ops.RoIAlign((pool_h, pool_w), spatial_scale=spatial_scale,
sampling_ratio=sampling_ratio)(x, rois)
class PSRoIAlignTester(unittest.TestCase): def get_script_fn(self, rois, pool_size):
@classmethod @torch.jit.script
def setUpClass(cls): def script_fn(input, rois, pool_size):
cls.dtype = torch.float64 # type: (torch.Tensor, torch.Tensor, int) -> torch.Tensor
return ops.roi_align(input, rois, pool_size, 1.0)[0]
return lambda x: script_fn(x, rois, pool_size)
def slow_ps_roi_align(self, in_data, rois, pool_h, pool_w, device, spatial_scale=1, def expected_fn(self, in_data, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1,
sampling_ratio=-1, dtype=torch.float64): device=None, dtype=torch.float64):
if device is None: if device is None:
device = torch.device("cpu") device = torch.device("cpu")
num_input_channels = in_data.size(1) n_channels = in_data.size(1)
self.assertEqual(num_input_channels % (pool_h * pool_w), 0, "input channels must be divisible by ph * pw") out_data = torch.zeros(rois.size(0), n_channels, pool_h, pool_w, dtype=dtype, device=device)
num_output_channels = int(num_input_channels / (pool_h * pool_w))
out_data = torch.zeros(rois.size(0), num_output_channels, pool_h, pool_w, dtype=dtype, device=device) for r, roi in enumerate(rois):
batch_idx = int(roi[0])
for n in range(0, in_data.size(0)): j_begin, i_begin, j_end, i_end = (x.item() * spatial_scale for x in roi[1:])
for r, roi in enumerate(rois):
if roi[0] != n: roi_h = i_end - i_begin
continue roi_w = j_end - j_begin
roi[1:] = (roi[1:] * spatial_scale) - 0.5 bin_h = roi_h / pool_h
c_in = 0 bin_w = roi_w / pool_w
roi_height = float(roi[4].item() - roi[2].item())
roi_width = float(roi[3].item() - roi[1].item()) for i in range(0, pool_h):
bin_h, bin_w = roi_height / float(pool_h), roi_width / float(pool_w) start_h = i_begin + i * bin_h
for c_out in range(0, num_output_channels): grid_h = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_h))
for j in range(0, pool_h): for j in range(0, pool_w):
start_h = float(j) * bin_h + roi[2].item() start_w = j_begin + j * bin_w
grid_w = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_w))
for i in range(0, pool_w):
start_w = float(i) * bin_w + roi[1].item() for channel in range(0, n_channels):
roi_bin_grid_h = sampling_ratio if sampling_ratio > 0 else int(np.ceil(roi_height / pool_h)) val = 0
roi_bin_grid_w = sampling_ratio if sampling_ratio > 0 else int(np.ceil(roi_width / pool_w)) for iy in range(0, grid_h):
y = start_h + (iy + 0.5) * bin_h / grid_h
val = 0. for ix in range(0, grid_w):
for iy in range(0, roi_bin_grid_h): x = start_w + (ix + 0.5) * bin_w / grid_w
y = start_h + (iy + 0.5) * bin_h / float(roi_bin_grid_h) val += bilinear_interpolate(
for ix in range(0, roi_bin_grid_w): in_data[batch_idx, channel, :, :].flatten(),
x = start_w + (ix + 0.5) * bin_w / float(roi_bin_grid_w) in_data.size(-2),
val += bilinear_interpolate( in_data.size(-1),
in_data[n, c_in, :, :].flatten(), y, x
in_data.size(-2), )
in_data.size(-1), val /= grid_h * grid_w
y, x
) out_data[r, channel, i, j] = val
count = roi_bin_grid_h * roi_bin_grid_w
out_data[r, c_out, j, i] = val / count
c_in += 1
return out_data return out_data
def test_ps_roi_align_basic_cpu(self):
device = torch.device('cpu')
pool_size = 3
x = torch.rand(1, 2 * (pool_size ** 2), 7, 7, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 5, 5]], # format is (xyxy)
dtype=self.dtype, device=device)
pool_h, pool_w = (pool_size, pool_size)
ps_roi_align = ops.PSRoIAlign((pool_h, pool_w), spatial_scale=1, sampling_ratio=2)
y = ps_roi_align(x, rois)
gt_y = self.slow_ps_roi_align(x, rois, pool_h, pool_w, device,
spatial_scale=1, sampling_ratio=2,
dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'PSRoIAlign layer incorrect on CPU')
y = ps_roi_align(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_ps_roi_align(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device,
spatial_scale=1, sampling_ratio=-1,
dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'PSRoIAlign layer incorrect on CPU')
def test_ps_roi_align_cpu(self):
device = torch.device('cpu')
pool_size = 5
x = torch.rand(2, 2 * (pool_size ** 2), 10, 10, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 9, 9], # format is (xyxy)
[0, 0, 5, 4, 9],
[0, 5, 5, 9, 9],
[1, 0, 0, 9, 9]],
dtype=self.dtype, device=device)
pool_h, pool_w = (pool_size, pool_size)
ps_roi_align = ops.PSRoIAlign((pool_h, pool_w), spatial_scale=1, sampling_ratio=2)
y = ps_roi_align(x, rois)
gt_y = self.slow_ps_roi_align(x, rois, pool_h, pool_w, device,
spatial_scale=1, sampling_ratio=2,
dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'PSRoIAlign layer incorrect on CPU')
y = ps_roi_align(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_ps_roi_align(x.permute(0, 1, 3, 2), rois, pool_h, pool_w,
device, spatial_scale=1, sampling_ratio=2,
dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'PSRoIAlign layer incorrect on CPU')
def test_ps_roi_align_gradient_cpu(self):
device = torch.device('cpu')
pool_size = 3
layer = ops.PSRoIAlign((pool_size, pool_size), spatial_scale=1,
sampling_ratio=-1).to(dtype=self.dtype, device=device)
x = torch.ones(1, pool_size ** 2, 5, 5, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 4, 4],
[0, 0, 3, 5, 5],
[0, 1, 0, 2, 4]],
dtype=self.dtype, device=device)
y = layer(x, rois)
s = y.sum()
s.backward()
gt_grad = torch.tensor([[[[8.125e-01, 6.875e-01, 0.0, 0.0, 0.0, ],
[2.7083333333e-01, 2.2916666667e-01, 0.0, 0.0, 0.0, ],
[1.0416666667e-01, 6.25e-02, 0.0, 0.0, 0.0, ],
[5.2083333333e-01, 3.125e-01, 0.0, 0.0, 0.0, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ]],
[[8.3266726847e-17, 1.125e00, 3.750e-01, 0.0, 0.0, ],
[2.7755575616e-17, 3.750e-01, 1.250e-01, 0.0, 0.0, ],
[0.0, 3.4722222222e-02, 9.7222222222e-02, 3.4722222222e-02, 0.0, ],
[0.0, 1.7361111111e-01, 4.8611111111e-01, 1.7361111111e-01, 0.0, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ]],
[[0.0, 5.000e-01, 4.375e-01, 5.000e-01, 6.25e-02, ],
[0.0, 1.6666666667e-01, 1.4583333333e-01, 1.6666666667e-01, 2.0833333333e-02, ],
[0.0, 0.0, 0.0, 6.25e-02, 1.0416666667e-01, ],
[0.0, 0.0, 0.0, 3.125e-01, 5.2083333333e-01, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[5.4166666667e-01, 4.5833333333e-01, 0.0, 0.0, 0.0, ],
[5.4166666667e-01, 4.5833333333e-01, 0.0, 0.0, 0.0, ],
[3.125e-01, 1.875e-01, 0.0, 0.0, 0.0, ],
[3.125e-01, 1.875e-01, 0.0, 0.0, 0.0, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[5.5511151231e-17, 7.500e-01, 2.500e-01, 0.0, 0.0, ],
[5.5511151231e-17, 7.500e-01, 2.500e-01, 0.0, 0.0, ],
[0.0, 1.0416666667e-01, 2.9166666667e-01, 1.0416666667e-01, 0.0, ],
[0.0, 1.0416666667e-01, 2.9166666667e-01, 1.0416666667e-01, 0.0, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[0.0, 3.3333333333e-01, 2.9166666667e-01, 3.3333333333e-01, 4.1666666667e-02, ],
[0.0, 3.3333333333e-01, 2.9166666667e-01, 3.3333333333e-01, 4.1666666667e-02, ],
[0.0, 0.0, 0.0, 1.875e-01, 3.125e-01, ],
[0.0, 0.0, 0.0, 1.875e-01, 3.125e-01, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ],
[2.7083333333e-01, 2.2916666667e-01, 0.0, 0.0, 0.0, ],
[7.2222222222e-01, 6.1111111111e-01, 0.0, 0.0, 0.0, ],
[7.1527777778e-01, 4.5138888889e-01, 0.0, 0.0, 0.0, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ],
[2.7755575616e-17, 3.750e-01, 1.250e-01, 0.0, 0.0, ],
[7.4014868308e-17, 1.000e00, 3.3333333333e-01, 0.0, 0.0, ],
[9.2518585385e-18, 3.3333333333e-01, 6.25e-01, 2.0833333333e-01, 0.0, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ],
[0.0, 1.6666666667e-01, 1.4583333333e-01, 1.6666666667e-01, 2.0833333333e-02, ],
[0.0, 4.4444444444e-01, 3.8888888889e-01, 4.4444444444e-01, 5.5555555556e-02, ],
[0.0, 5.5555555556e-02, 4.8611111111e-02, 4.3055555556e-01, 6.3194444444e-01, ]]]],
device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(x.grad, gt_grad), 'gradient incorrect for PSRoIAlign on CPU')
def test_ps_roi_align_gradcheck_cpu(self):
device = torch.device('cpu')
pool_size = 5
x = torch.rand(1, pool_size ** 2, 10, 10, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 9, 9],
[0, 0, 5, 5, 9],
[0, 5, 5, 9, 9]], dtype=self.dtype, device=device)
m = ops.PSRoIAlign((pool_size, pool_size), spatial_scale=1,
sampling_ratio=2).to(dtype=self.dtype, device=device)
def func(input):
return m(input, rois)
self.assertTrue(gradcheck(func, (x,)), 'gradcheck failed for PSRoIAlign on CPU')
self.assertTrue(gradcheck(func, (x.permute(0, 1, 3, 2),)), 'gradcheck failed for PSRoIAlign on CPU')
@torch.jit.script
def script_func(input, rois):
return ops.ps_roi_align(input, rois, 5, 2.0, 1)[0]
self.assertTrue(gradcheck(lambda x: script_func(x, rois), (x,)),
'gradcheck failed for scripted ps_roi_align on CPU')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_ps_roi_align_basic_cuda(self):
device = torch.device('cuda')
pool_size = 3
x = torch.rand(1, 2 * (pool_size ** 2), 7, 7, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 5, 5]], # format is (xyxy)
dtype=self.dtype, device=device)
pool_h, pool_w = (pool_size, pool_size)
ps_roi_align = ops.PSRoIAlign((pool_h, pool_w), spatial_scale=1, sampling_ratio=2)
y = ps_roi_align(x, rois)
gt_y = self.slow_ps_roi_align(x, rois, pool_h, pool_w, device,
spatial_scale=1, sampling_ratio=2,
dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'PSRoIAlign layer incorrect')
y = ps_roi_align(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_ps_roi_align(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device,
spatial_scale=1, sampling_ratio=-1,
dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'PSRoIAlign layer incorrect')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_ps_roi_align_cuda(self):
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
pool_size = 5
x = torch.rand(2, 2 * (pool_size ** 2), 10, 10, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 9, 9], # format is (xyxy)
[0, 0, 5, 4, 9],
[0, 5, 5, 9, 9],
[1, 0, 0, 9, 9]],
dtype=self.dtype, device=device)
pool_h, pool_w = (pool_size, pool_size)
ps_roi_align = ops.PSRoIAlign((pool_h, pool_w), spatial_scale=1, sampling_ratio=2)
y = ps_roi_align(x, rois)
gt_y = self.slow_ps_roi_align(x, rois, pool_h, pool_w, device,
spatial_scale=1, sampling_ratio=2,
dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'PSRoIAlign layer incorrect')
y = ps_roi_align(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_ps_roi_align(x.permute(0, 1, 3, 2), rois, pool_h, pool_w,
device, spatial_scale=1, sampling_ratio=2,
dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'PSRoIAlign layer incorrect')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_ps_roi_align_gradient_cuda(self):
device = torch.device('cuda')
pool_size = 3
layer = ops.PSRoIAlign((pool_size, pool_size), spatial_scale=1,
sampling_ratio=-1).to(dtype=self.dtype, device=device)
x = torch.ones(1, pool_size ** 2, 5, 5, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 4, 4],
[0, 0, 3, 5, 5],
[0, 1, 0, 2, 4]],
dtype=self.dtype, device=device)
y = layer(x, rois)
s = y.sum()
s.backward()
gt_grad = torch.tensor([[[[8.125e-01, 6.875e-01, 0.0, 0.0, 0.0, ],
[2.7083333333e-01, 2.2916666667e-01, 0.0, 0.0, 0.0, ],
[1.0416666667e-01, 6.25e-02, 0.0, 0.0, 0.0, ],
[5.2083333333e-01, 3.125e-01, 0.0, 0.0, 0.0, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ]],
[[8.3266726847e-17, 1.125e00, 3.750e-01, 0.0, 0.0, ],
[2.7755575616e-17, 3.750e-01, 1.250e-01, 0.0, 0.0, ],
[0.0, 3.4722222222e-02, 9.7222222222e-02, 3.4722222222e-02, 0.0, ],
[0.0, 1.7361111111e-01, 4.8611111111e-01, 1.7361111111e-01, 0.0, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ]],
[[0.0, 5.000e-01, 4.375e-01, 5.000e-01, 6.25e-02, ],
[0.0, 1.6666666667e-01, 1.4583333333e-01, 1.6666666667e-01, 2.0833333333e-02, ],
[0.0, 0.0, 0.0, 6.25e-02, 1.0416666667e-01, ],
[0.0, 0.0, 0.0, 3.125e-01, 5.2083333333e-01, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[5.4166666667e-01, 4.5833333333e-01, 0.0, 0.0, 0.0, ],
[5.4166666667e-01, 4.5833333333e-01, 0.0, 0.0, 0.0, ],
[3.125e-01, 1.875e-01, 0.0, 0.0, 0.0, ],
[3.125e-01, 1.875e-01, 0.0, 0.0, 0.0, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[5.5511151231e-17, 7.500e-01, 2.500e-01, 0.0, 0.0, ],
[5.5511151231e-17, 7.500e-01, 2.500e-01, 0.0, 0.0, ],
[0.0, 1.0416666667e-01, 2.9166666667e-01, 1.0416666667e-01, 0.0, ],
[0.0, 1.0416666667e-01, 2.9166666667e-01, 1.0416666667e-01, 0.0, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[0.0, 3.3333333333e-01, 2.9166666667e-01, 3.3333333333e-01, 4.1666666667e-02, ],
[0.0, 3.3333333333e-01, 2.9166666667e-01, 3.3333333333e-01, 4.1666666667e-02, ],
[0.0, 0.0, 0.0, 1.875e-01, 3.125e-01, ],
[0.0, 0.0, 0.0, 1.875e-01, 3.125e-01, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ],
[2.7083333333e-01, 2.2916666667e-01, 0.0, 0.0, 0.0, ],
[7.2222222222e-01, 6.1111111111e-01, 0.0, 0.0, 0.0, ],
[7.1527777778e-01, 4.5138888889e-01, 0.0, 0.0, 0.0, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ],
[2.7755575616e-17, 3.750e-01, 1.250e-01, 0.0, 0.0, ],
[7.4014868308e-17, 1.000e00, 3.3333333333e-01, 0.0, 0.0, ],
[9.2518585385e-18, 3.3333333333e-01, 6.25e-01, 2.0833333333e-01, 0.0, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ],
[0.0, 1.6666666667e-01, 1.4583333333e-01, 1.6666666667e-01, 2.0833333333e-02, ],
[0.0, 4.4444444444e-01, 3.8888888889e-01, 4.4444444444e-01, 5.5555555556e-02, ],
[0.0, 5.5555555556e-02, 4.8611111111e-02, 4.3055555556e-01, 6.3194444444e-01, ]]]],
device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(x.grad, gt_grad), 'gradient incorrect for PSRoIAlign')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") class PSRoIAlignTester(RoIOpTester, unittest.TestCase):
def test_ps_roi_align_gradcheck_cuda(self): def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
device = torch.device('cuda') return ops.PSRoIAlign((pool_h, pool_w), spatial_scale=spatial_scale,
pool_size = 5 sampling_ratio=sampling_ratio)(x, rois)
x = torch.rand(1, pool_size ** 2, 10, 10, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 9, 9],
[0, 0, 5, 5, 9],
[0, 5, 5, 9, 9]], dtype=self.dtype, device=device)
m = ops.PSRoIAlign((pool_size, pool_size), spatial_scale=1,
sampling_ratio=2).to(dtype=self.dtype, device=device)
def func(input):
return m(input, rois)
self.assertTrue(gradcheck(func, (x,)), 'gradcheck failed for PSRoIAlign CUDA')
self.assertTrue(gradcheck(func, (x.permute(0, 1, 3, 2),)), 'gradcheck failed for PSRoIAlign CUDA')
def get_script_fn(self, rois, pool_size):
@torch.jit.script @torch.jit.script
def script_func(input, rois): def script_fn(input, rois, pool_size):
return ops.ps_roi_align(input, rois, 5, 2.0, 1)[0] # type: (torch.Tensor, torch.Tensor, int) -> torch.Tensor
return ops.ps_roi_align(input, rois, pool_size, 1.0)[0]
self.assertTrue(gradcheck(lambda x: script_func(x, rois), (x,)), return lambda x: script_fn(x, rois, pool_size)
'gradcheck failed for scripted ps_roi_align on CUDA')
class PSRoIPoolTester(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.dtype = torch.float64
def slow_ps_roi_pooling(self, x, rois, pool_h, pool_w, device, spatial_scale=1, def expected_fn(self, in_data, rois, pool_h, pool_w, device, spatial_scale=1,
dtype=torch.float64): sampling_ratio=-1, dtype=torch.float64):
if device is None: if device is None:
device = torch.device("cpu") device = torch.device("cpu")
num_input_channels = x.size(1) n_input_channels = in_data.size(1)
self.assertEqual(num_input_channels % (pool_h * pool_w), 0, "input channels must be divisible by ph * pw") self.assertEqual(n_input_channels % (pool_h * pool_w), 0, "input channels must be divisible by ph * pw")
num_output_channels = int(num_input_channels / (pool_h * pool_w)) n_output_channels = int(n_input_channels / (pool_h * pool_w))
y = torch.zeros(rois.size(0), num_output_channels, pool_h, pool_w, dtype=dtype, device=device) out_data = torch.zeros(rois.size(0), n_output_channels, pool_h, pool_w, dtype=dtype, device=device)
rois = torch.round(rois * spatial_scale).int() for r, roi in enumerate(rois):
for n in range(0, x.size(0)): batch_idx = int(roi[0])
for r, roi in enumerate(rois): j_begin, i_begin, j_end, i_end = (x.item() * spatial_scale - 0.5 for x in roi[1:])
if roi[0] != n:
continue roi_h = i_end - i_begin
c_in = 0 roi_w = j_end - j_begin
for c_out in range(0, num_output_channels): bin_h = roi_h / pool_h
roi_height = max(roi[4].item() - roi[2].item(), 1) bin_w = roi_w / pool_w
roi_width = max(roi[3].item() - roi[1].item(), 1)
bin_h, bin_w = roi_height / float(pool_h), roi_width / float(pool_w) for i in range(0, pool_h):
start_h = i_begin + i * bin_h
for j in range(0, pool_h): grid_h = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_h))
start_h = int(np.floor(j * bin_h)) + roi[2].item() for j in range(0, pool_w):
end_h = int(np.ceil((j + 1) * bin_w)) + roi[2].item() start_w = j_begin + j * bin_w
grid_w = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_w))
# range-check for c_out in range(0, n_output_channels):
start_h = min(max(start_h, 0), x.size(2)) c_in = c_out * (pool_h * pool_w) + pool_w * i + j
end_h = min(max(end_h, 0), x.size(2))
val = 0
for i in range(0, pool_w): for iy in range(0, grid_h):
start_w = int(np.floor(i * bin_w)) + roi[1].item() y = start_h + (iy + 0.5) * bin_h / grid_h
end_w = int(np.ceil((i + 1) * bin_w)) + roi[1].item() for ix in range(0, grid_w):
x = start_w + (ix + 0.5) * bin_w / grid_w
# range-check val += bilinear_interpolate(
start_w = min(max(start_w, 0), x.size(3)) in_data[batch_idx, c_in, :, :].flatten(),
end_w = min(max(end_w, 0), x.size(3)) in_data.size(-2),
in_data.size(-1),
is_empty = (end_h <= start_h) or (end_w <= start_w) y, x
area = (end_h - start_h) * (end_w - start_w) )
val /= grid_h * grid_w
if not is_empty:
t = torch.sum(x[n, c_in, slice(start_h, end_h), slice(start_w, end_w)]) out_data[r, c_out, i, j] = val
y[r, c_out, j, i] = t / area return out_data
c_in += 1
return y
def test_ps_roi_pool_basic_cpu(self):
device = torch.device('cpu')
pool_size = 3
x = torch.rand(1, pool_size ** 2, 10, 10, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 4, 4]], # format is (xyxy)
dtype=self.dtype, device=device)
pool_h, pool_w = (pool_size, pool_size)
ps_roi_pool = ops.PSRoIPool((pool_h, pool_w), 1)
y = ps_roi_pool(x, rois)
gt_y = self.slow_ps_roi_pooling(x, rois, pool_h, pool_w, device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'PSRoIPool layer incorrect on CPU')
y = ps_roi_pool(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_ps_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'PSRoIPool layer incorrect on CPU')
def test_ps_roi_pool_cpu(self):
device = torch.device('cpu')
pool_size = 5
x = torch.rand(2, 2 * (pool_size ** 2), 10, 10, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 9, 9], # format is (xyxy)
[0, 0, 5, 4, 9],
[0, 5, 5, 9, 9],
[1, 0, 0, 9, 9]],
dtype=self.dtype, device=device)
pool_h, pool_w = (pool_size, pool_size)
ps_roi_pool = ops.PSRoIPool((pool_h, pool_w), 1)
y = ps_roi_pool(x, rois)
gt_y = self.slow_ps_roi_pooling(x, rois, pool_h, pool_w, device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'PSRoIPool layer incorrect on CPU')
y = ps_roi_pool(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_ps_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'PSRoIPool layer incorrect on CPU')
def test_ps_roi_pool_gradient_cpu(self):
device = torch.device('cpu')
pool_size = 3
layer = ops.PSRoIPool((pool_size, pool_size), 1).to(dtype=self.dtype, device=device)
x = torch.ones(1, pool_size ** 2, 5, 5, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 4, 4],
[0, 0, 3, 5, 5],
[0, 1, 0, 2, 4]],
dtype=self.dtype, device=device)
y = layer(x, rois)
s = y.sum()
s.backward()
gt_grad = torch.tensor([[[[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.5000, 0.5000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
[[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 1. / 3, 1. / 3, 1. / 3, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
[[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.5000, 0.5000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.2500, 0.2500, 0.0000, 0.0000, 0.0000],
[0.2500, 0.2500, 0.0000, 0.0000, 0.0000]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 1. / 6, 1. / 6, 1. / 6, 0.0000],
[0.0000, 1. / 6, 1. / 6, 1. / 6, 0.0000]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.0000, 0.0000, 0.2500, 0.2500],
[0.0000, 0.0000, 0.0000, 0.2500, 0.2500]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.5000, 0.5000, 0.0000, 0.0000, 0.0000]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 1. / 3, 1. / 3, 1. / 3, 0.0000]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.0000, 0.0000, 0.5000, 0.5000]]]],
device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(x.grad, gt_grad), 'gradient incorrect for PSRoIPool on CPU')
def test_ps_roi_pool_gradcheck_cpu(self):
device = torch.device('cpu')
pool_size = 5
x = torch.rand(1, pool_size ** 2, 10, 10, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 9, 9],
[0, 0, 5, 5, 9],
[0, 5, 5, 9, 9]], dtype=self.dtype, device=device)
m = ops.PSRoIPool((pool_size, pool_size), 1).to(dtype=self.dtype, device=device)
def func(input):
return m(input, rois)
self.assertTrue(gradcheck(func, (x,)), 'gradcheck failed for PSRoIPool on CPU')
self.assertTrue(gradcheck(func, (x.permute(0, 1, 3, 2),)), 'gradcheck failed for PSRoIPool on CPU')
@torch.jit.script
def script_func(input, rois):
return ops.ps_roi_pool(input, rois, 5, 1.0)[0]
self.assertTrue(gradcheck(lambda x: script_func(x, rois), (x,)),
'gradcheck failed for scripted ps_roi_pool on CPU')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_ps_roi_pool_basic_cuda(self):
device = torch.device('cuda')
pool_size = 3
x = torch.rand(1, pool_size ** 2, 10, 10, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 4, 4]], # format is (xyxy)
dtype=self.dtype, device=device)
pool_h, pool_w = (pool_size, pool_size)
ps_roi_pool = ops.PSRoIPool((pool_h, pool_w), 1)
y = ps_roi_pool(x, rois)
gt_y = self.slow_ps_roi_pooling(x, rois, pool_h, pool_w, device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'PSRoIPool layer incorrect')
y = ps_roi_pool(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_ps_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'PSRoIPool layer incorrect')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_ps_roi_pool_cuda(self):
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
pool_size = 5
x = torch.rand(2, 2 * (pool_size ** 2), 10, 10, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 9, 9], # format is (xyxy)
[0, 0, 5, 4, 9],
[0, 5, 5, 9, 9],
[1, 0, 0, 9, 9]],
dtype=self.dtype, device=device)
pool_h, pool_w = (pool_size, pool_size)
ps_roi_pool = ops.PSRoIPool((pool_h, pool_w), 1)
y = ps_roi_pool(x, rois)
gt_y = self.slow_ps_roi_pooling(x, rois, pool_h, pool_w, device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'PSRoIPool layer incorrect')
y = ps_roi_pool(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_ps_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'PSRoIPool layer incorrect')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_ps_roi_pool_gradient_cuda(self):
device = torch.device('cuda')
pool_size = 3
layer = ops.PSRoIPool((pool_size, pool_size), 1).to(dtype=self.dtype, device=device)
x = torch.ones(1, pool_size ** 2, 5, 5, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 4, 4],
[0, 0, 3, 5, 5],
[0, 1, 0, 2, 4]],
dtype=self.dtype, device=device)
y = layer(x, rois)
s = y.sum()
s.backward()
gt_grad = torch.tensor([[[[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.5000, 0.5000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
[[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 1. / 3, 1. / 3, 1. / 3, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
[[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.5000, 0.5000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.2500, 0.2500, 0.0000, 0.0000, 0.0000],
[0.2500, 0.2500, 0.0000, 0.0000, 0.0000]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 1. / 6, 1. / 6, 1. / 6, 0.0000],
[0.0000, 1. / 6, 1. / 6, 1. / 6, 0.0000]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.0000, 0.0000, 0.2500, 0.2500],
[0.0000, 0.0000, 0.0000, 0.2500, 0.2500]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.5000, 0.5000, 0.0000, 0.0000, 0.0000]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 1. / 3, 1. / 3, 1. / 3, 0.0000]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.0000, 0.0000, 0.5000, 0.5000]]]],
device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(x.grad, gt_grad), 'gradient incorrect for PSRoIPool')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_ps_roi_pool_gradcheck_cuda(self):
device = torch.device('cuda')
pool_size = 5
x = torch.rand(1, pool_size ** 2, 10, 10, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 9, 9],
[0, 0, 5, 5, 9],
[0, 5, 5, 9, 9]], dtype=self.dtype, device=device)
m = ops.PSRoIPool((pool_size, pool_size), 1).to(dtype=self.dtype, device=device)
def func(input):
return m(input, rois)
self.assertTrue(gradcheck(func, (x,)), 'gradcheck failed for PSRoIPool CUDA')
self.assertTrue(gradcheck(func, (x.permute(0, 1, 3, 2),)), 'gradcheck failed for PSRoIPool CUDA')
@torch.jit.script
def script_func(input, rois):
return ops.ps_roi_pool(input, rois, 5, 1.0)[0]
self.assertTrue(gradcheck(lambda x: script_func(x, rois), (x,)),
'gradcheck failed for scripted ps_roi_pool on CUDA')
class NMSTester(unittest.TestCase): class NMSTester(unittest.TestCase):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment