Commit af225a8a authored by pedrofreire's avatar pedrofreire Committed by Francisco Massa
Browse files

Simplify and organize test_ops. (#1551)

* Simlify and organize test_ops.

We perform the following:

- Simplify the functions slow_roi_pooling, slow_ps_roi_pooling, slow_ps_roi_align and bilinear_interpolate (including finding and removing a semi-bug in slow_ps_roi_pooling, which used bin_w instead of bin_h);
- Wrote a slow_roi_align function, that was missing;
- Create a base class testing all combinations of forward/backward, cpu/cuda, contiguous/non-contiguous;
- Organize all testing inside the base class with _test_forward and _test_backward (which can be easily overriden if a parciular op needs something different); an Op class then only needs to implement fn, get_script_fn, and expected_fn.

A few points:
- We are using the same inputs for all tests, and not trying all possible inputs in the domain of a given operation. One improvement would be to test more diverse inputs, and to personalize the inputs for some ops (e.g. different inputs for pooling ops and align ops).
- Running all tests is quite slow (~1 min only for CPU tests), so that can possibly be improved.

* Reduce input size used in gradcheck.

gradcheck can be quite costly, and it was causing OOM errors and making
the tests slow. By reducing the size of the input, the test speed is
down to 3 seconds for the CPU tests.

Other points:
- We remove an unused namedtuple;
- We inherit from object for better Python 2 compatibility;
- We remove a hardcoded pool_size from the TorchScript functions, and
add it as a parameter instead.

* Replace Tensor by torch.Tensor in type annotations.

This should fix lint errors.
parent 4897402a
from __future__ import division
import numpy as np
import torch
from torch.autograd import gradcheck
......@@ -8,1168 +9,305 @@ from itertools import product
import unittest
class RoIPoolTester(unittest.TestCase):
class RoIOpTester(object):
@classmethod
def setUpClass(cls):
cls.dtype = torch.float64
def slow_roi_pooling(self, x, rois, pool_h, pool_w, spatial_scale=1,
device=None, dtype=torch.float64):
if device is None:
device = torch.device("cpu")
c = x.size(1)
y = torch.zeros(rois.size(0), c, pool_h, pool_w, dtype=dtype, device=device)
rois = torch.round(rois * spatial_scale)
for n in range(0, y.size(0)):
for r, roi in enumerate(rois):
if roi[0] == n:
start_h, end_h = int(roi[2].item()), int(roi[4].item()) + 1
start_w, end_w = int(roi[1].item()), int(roi[3].item()) + 1
roi_x = x[roi[0].long(), :, start_h:end_h, start_w:end_w]
bin_h, bin_w = roi_x.size(-2) / float(pool_h), roi_x.size(-1) / float(pool_w)
for j in range(0, pool_h):
cj = slice(int(np.floor(j * bin_h)), int(np.ceil((j + 1) * bin_h)))
for i in range(0, pool_w):
ci = slice(int(np.floor(i * bin_w)), int(np.ceil((i + 1) * bin_w)))
t = roi_x[:, cj, ci].reshape(c, -1)
if t.numel() > 0:
y[r, :, j, i] = torch.max(t, 1)[0]
return y
def test_roi_pool_basic_cpu(self):
device = torch.device('cpu')
x = torch.rand(1, 1, 10, 10, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 4, 4]], # format is (xyxy)
dtype=self.dtype, device=device)
pool_h, pool_w = (5, 5)
roi_pool = ops.RoIPool((pool_h, pool_w), 1)
y = roi_pool(x, rois)
gt_y = self.slow_roi_pooling(x, rois, pool_h, pool_w, device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'RoIPool layer incorrect on CPU')
# non-contiguous
y = roi_pool(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'RoIPool layer incorrect on CPU')
def test_roi_pool_cpu(self):
device = torch.device('cpu')
x = torch.rand(2, 1, 10, 10, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 9, 9], # format is (xyxy)
[0, 0, 5, 4, 9],
[0, 5, 5, 9, 9],
[1, 0, 0, 9, 9]],
dtype=self.dtype, device=device)
pool_h, pool_w = (5, 5)
roi_pool = ops.RoIPool((pool_h, pool_w), 1)
y = roi_pool(x, rois)
gt_y = self.slow_roi_pooling(x, rois, pool_h, pool_w, device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'RoIPool layer incorrect on CPU for batch > 1')
# non-contiguous
y = roi_pool(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'RoIPool layer incorrect on CPU for batch > 1')
def test_roi_pool_cpu_empty_rois(self):
device = torch.device('cpu')
x = torch.tensor(
[[[[0.1767, 1.2851, 4.2325, 4.8645, 7.1496]],
[[2.5916, 4.3361, 3.8143, 6.1329, 2.0230]],
[[1.4492, 3.3384, 4.0816, 6.3116, 5.1068]]]],
dtype=self.dtype, device=device)
rois = torch.tensor(
[[0., 1., 0., 4., 0.],
[0., 2., 0., 3., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 2., 0., 2., 0.]],
dtype=self.dtype, device=device)
pool_h, pool_w = (1, 2)
roi_pool = ops.RoIPool((pool_h, pool_w), 1)
y = roi_pool(x, rois)
def test_forward_cpu_contiguous(self):
self._test_forward(device=torch.device('cpu'), contiguous=True)
gt_y = self.slow_roi_pooling(x, rois, pool_h, pool_w, device=device, dtype=self.dtype)
def test_forward_cpu_non_contiguous(self):
self._test_forward(device=torch.device('cpu'), contiguous=False)
self.assertTrue(torch.allclose(gt_y, y), 'RoIPool layer incorrect on CPU empty rois')
def test_backward_cpu_contiguous(self):
self._test_backward(device=torch.device('cpu'), contiguous=True)
# non-contiguous
y = roi_pool(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'RoIPool layer incorrect on CPU for empty rois non-contiguous')
def test_roi_pool_gradient_cpu(self):
device = torch.device('cpu')
x = torch.ones(1, 1, 10, 10, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 9, 9],
[0, 0, 5, 4, 9],
[0, 0, 0, 4, 4]],
dtype=self.dtype, device=device)
layer = ops.RoIPool((5, 5), 1).to(dtype=self.dtype, device=device)
y = layer(x, rois)
s = y.sum()
s.backward()
gt_grad = torch.tensor([[[[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.],
[1., 1., 1., 1., 1., 0., 0., 0., 0., 0.],
[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.],
[1., 1., 1., 1., 1., 0., 0., 0., 0., 0.],
[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.],
[1., 1., 1., 1., 1., 0., 0., 0., 0., 0.],
[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.],
[1., 1., 1., 1., 1., 0., 0., 0., 0., 0.],
[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.],
[1., 1., 1., 1., 1., 0., 0., 0., 0., 0.]]]],
device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(x.grad, gt_grad), 'gradient incorrect for roi_pool')
def test_roi_pool_align_non_cont_grad_cpu(self):
devices = ['cpu']
if torch.cuda.is_available():
devices.append('cuda')
for d in devices:
device = torch.device(d)
rois = torch.tensor([
[0, 0, 0, 9, 9],
[0, 0, 5, 5, 9],
[0, 5, 5, 9, 9]], dtype=self.dtype, device=device)
grad_cont = torch.rand(3, 1, 5, 5, dtype=self.dtype, device=device)
grad = grad_cont.permute(2, 1, 3, 0).contiguous().permute(3, 1, 0, 2)
for op in ['RoIPool', 'RoIAlign']:
x = torch.rand(1, 1, 10, 10, dtype=self.dtype, device=device, requires_grad=True)
kwargs = {}
if op == 'RoIAlign':
kwargs['sampling_ratio'] = 1
m = getattr(ops, op)((5, 5), 1, **kwargs)
y = m(x, rois)
y.backward(grad_cont)
g1 = x.grad.detach().clone()
del x.grad
y = m(x, rois)
y.backward(grad)
g2 = x.grad.detach().clone()
del x.grad
self.assertTrue(torch.allclose(g1, g2), 'gradient incorrect for {}'.format(op))
def test_roi_pool_gradcheck_cpu(self):
device = torch.device('cpu')
x = torch.rand(1, 1, 10, 10, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 9, 9],
[0, 0, 5, 5, 9],
[0, 5, 5, 9, 9]], dtype=self.dtype, device=device)
m = ops.RoIPool((5, 5), 1).to(dtype=self.dtype, device=device)
def func(input):
return m(input, rois)
self.assertTrue(gradcheck(func, (x,)), 'gradcheck failed for roi_pool CPU')
self.assertTrue(gradcheck(func, (x.permute(0, 1, 3, 2),)), 'gradcheck failed for roi_pool CPU')
@torch.jit.script
def script_func(input, rois):
return ops.roi_pool(input, rois, 5, 1.0)[0]
self.assertTrue(gradcheck(lambda x: script_func(x, rois), (x,)), 'gradcheck failed for scripted roi_pool')
def test_backward_cpu_non_contiguous(self):
self._test_backward(device=torch.device('cpu'), contiguous=False)
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_roi_pool_basic_cuda(self):
device = torch.device('cuda')
x = torch.rand(1, 1, 10, 10, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 4, 4]], # format is (xyxy)
dtype=self.dtype, device=device)
pool_h, pool_w = (5, 5)
roi_pool = ops.RoIPool((pool_h, pool_w), 1)
y = roi_pool(x, rois)
def test_forward_cuda_contiguous(self):
self._test_forward(device=torch.device('cuda'), contiguous=True)
gt_y = self.slow_roi_pooling(x, rois, pool_h, pool_w, device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'RoIPool layer incorrect')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_forward_cuda_non_contiguous(self):
self._test_forward(device=torch.device('cuda'), contiguous=False)
y = roi_pool(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'RoIPool layer incorrect')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_backward_cuda_contiguous(self):
self._test_backward(device=torch.device('cuda'), contiguous=True)
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_roi_pool_cuda(self):
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
x = torch.rand(2, 1, 10, 10, dtype=self.dtype, device=device)
def test_backward_cuda_non_contiguous(self):
self._test_backward(device=torch.device('cuda'), contiguous=False)
def _test_forward(self, device, contiguous):
pool_size = 5
# n_channels % (pool_size ** 2) == 0 required for PS opeartions.
n_channels = 2 * (pool_size ** 2)
x = torch.rand(2, n_channels, 10, 10, dtype=self.dtype, device=device)
if not contiguous:
x = x.permute(0, 1, 3, 2)
rois = torch.tensor([[0, 0, 0, 9, 9], # format is (xyxy)
[0, 0, 5, 4, 9],
[0, 5, 5, 9, 9],
[1, 0, 0, 9, 9]],
dtype=self.dtype, device=device)
pool_h, pool_w = (5, 5)
roi_pool = ops.RoIPool((pool_h, pool_w), 1)
y = roi_pool(x, rois)
gt_y = self.slow_roi_pooling(x, rois, pool_h, pool_w, device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'RoIPool layer incorrect')
y = roi_pool(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'RoIPool layer incorrect')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_roi_pool_gradient_cuda(self):
device = torch.device('cuda')
layer = ops.RoIPool((5, 5), 1).to(dtype=self.dtype, device=device)
x = torch.ones(1, 1, 10, 10, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 9, 9],
[0, 0, 5, 4, 9],
[0, 0, 0, 4, 4]],
pool_h, pool_w = pool_size, pool_size
y = self.fn(x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1)
gt_y = self.expected_fn(x, rois, pool_h, pool_w, spatial_scale=1,
sampling_ratio=-1, device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y))
def _test_backward(self, device, contiguous):
pool_size = 2
x = torch.rand(1, 2 * (pool_size ** 2), 5, 5, dtype=self.dtype, device=device, requires_grad=True)
if not contiguous:
x = x.permute(0, 1, 3, 2)
rois = torch.tensor([[0, 0, 0, 4, 4], # format is (xyxy)
[0, 0, 2, 3, 4],
[0, 2, 2, 4, 4]],
dtype=self.dtype, device=device)
y = layer(x, rois)
s = y.sum()
s.backward()
gt_grad = torch.tensor([[[[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.],
[1., 1., 1., 1., 1., 0., 0., 0., 0., 0.],
[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.],
[1., 1., 1., 1., 1., 0., 0., 0., 0., 0.],
[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.],
[1., 1., 1., 1., 1., 0., 0., 0., 0., 0.],
[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.],
[1., 1., 1., 1., 1., 0., 0., 0., 0., 0.],
[2., 1., 2., 1., 2., 0., 1., 0., 1., 0.],
[1., 1., 1., 1., 1., 0., 0., 0., 0., 0.]]]],
device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(x.grad, gt_grad), 'gradient incorrect for roi_pool')
def func(z):
return self.fn(z, rois, pool_size, pool_size, spatial_scale=1, sampling_ratio=1)
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_roi_pool_gradcheck_cuda(self):
device = torch.device('cuda')
x = torch.rand(1, 1, 10, 10, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 9, 9],
[0, 0, 5, 5, 9],
[0, 5, 5, 9, 9]], dtype=self.dtype, device=device)
script_func = self.get_script_fn(rois, pool_size)
m = ops.RoIPool((5, 5), 1).to(dtype=self.dtype, device=device)
self.assertTrue(gradcheck(func, (x,)))
self.assertTrue(gradcheck(script_func, (x,)))
return
def func(input):
return m(input, rois)
def fn(*args, **kwargs):
pass
self.assertTrue(gradcheck(func, (x,)), 'gradcheck failed for roi_pool CUDA')
self.assertTrue(gradcheck(func, (x.permute(0, 1, 3, 2),)), 'gradcheck failed for roi_pool CUDA')
def get_script_fn(*args, **kwargs):
pass
@torch.jit.script
def script_func(input, rois):
return ops.roi_pool(input, rois, 5, 1.0)[0]
self.assertTrue(gradcheck(lambda x: script_func(x, rois), (x,)),
'gradcheck failed for scripted roi_pool on CUDA')
def expected_fn(*args, **kwargs):
pass
class RoIAlignTester(unittest.TestCase):
@classmethod
def setUpClass(cls):
torch.manual_seed(123)
cls.dtype = torch.float32
cls.x = torch.rand(1, 1, 10, 10, dtype=cls.dtype)
cls.single_roi = torch.tensor([[0, 0, 0, 4, 4]], # format is (xyxy)
dtype=cls.dtype)
cls.rois = torch.tensor([[0, 0, 0, 9, 9], # format is (xyxy)
[0, 0, 5, 4, 9],
[0, 5, 5, 9, 9]],
dtype=cls.dtype)
cls.gt_y_single = torch.tensor(
[[[[0.41617328, 0.5040753, 0.25266218, 0.4296828, 0.29928464],
[0.5210769, 0.57222337, 0.2524979, 0.32063985, 0.32635176],
[0.73108256, 0.6114335, 0.62033176, 0.8188273, 0.5562218],
[0.83115816, 0.70803946, 0.7084047, 0.74928707, 0.7769296],
[0.54266506, 0.45964524, 0.5780159, 0.80522037, 0.7321807]]]], dtype=cls.dtype)
cls.gt_y_multiple = torch.tensor(
[[[[0.49311584, 0.35972416, 0.40843594, 0.3638034, 0.49751836],
[0.70881474, 0.75481665, 0.5826779, 0.34767765, 0.46865487],
[0.4740328, 0.69306874, 0.3617804, 0.47145438, 0.66130304],
[0.6861706, 0.17634538, 0.47194335, 0.42473823, 0.37930614],
[0.62666404, 0.49973848, 0.37911576, 0.5842756, 0.7176864]]],
[[[0.67499936, 0.6607055, 0.42656037, 0.46134934, 0.42144877],
[0.7471722, 0.7235433, 0.14512213, 0.13031253, 0.289369],
[0.8443615, 0.6659734, 0.23614208, 0.14719573, 0.4268827],
[0.69429564, 0.5621515, 0.5019923, 0.40678093, 0.34556213],
[0.51315194, 0.7177093, 0.6494485, 0.6775592, 0.43865064]]],
[[[0.24465509, 0.36108392, 0.64635646, 0.4051828, 0.33956185],
[0.49006107, 0.42982674, 0.34184104, 0.15493104, 0.49633422],
[0.54400194, 0.5265246, 0.22381854, 0.3929715, 0.6757667],
[0.32961223, 0.38482672, 0.68877804, 0.71822757, 0.711909],
[0.561259, 0.71047884, 0.84651315, 0.8541089, 0.644432]]]], dtype=cls.dtype)
cls.x_grad = torch.tensor(
[[[[0.075625, 0.15125, 0.15124999, 0.15125002, 0.15812504,
0.15812503, 0.15124999, 0.15124999, 0.15125006, 0.0756249],
[0.15125, 0.30250007, 0.3025, 0.30250007, 0.31625012,
0.31625003, 0.3025, 0.3025, 0.30250013, 0.1512498],
[0.15124999, 0.3025, 0.30249995, 0.3025, 0.31625006,
0.31625, 0.30249995, 0.30249995, 0.30250007, 0.15124978],
[0.15125002, 0.30250007, 0.3025, 0.30250007, 0.31625012,
0.3162501, 0.3025, 0.3025, 0.30250013, 0.15124981],
[0.15812504, 0.31625012, 0.31625006, 0.31625012, 0.33062524,
0.3306251, 0.31625006, 0.31625006, 0.3162502, 0.15812483],
[0.5181251, 1.0962502, 1.0362502, 1.0962503, 0.69062525, 0.6906252,
1.0962502, 1.0362502, 1.0962503, 0.5181248],
[0.93125, 1.9925, 1.8624997, 1.9925, 1.0962502, 1.0962502,
1.9925, 1.8624998, 1.9925, 0.9312496],
[0.8712501, 1.8625, 1.7425002, 1.8625001, 1.0362502, 1.0362502,
1.8625, 1.7425001, 1.8625002, 0.8712497],
[0.93125004, 1.9925, 1.8625002, 1.9925, 1.0962503, 1.0962503,
1.9925001, 1.8625001, 1.9925001, 0.93124974],
[0.43562484, 0.9312497, 0.8712497, 0.9312497, 0.5181249, 0.5181248,
0.9312496, 0.8712497, 0.93124974, 0.43562466]]]], dtype=cls.dtype)
def test_roi_align_basic_cpu(self):
device = torch.device('cpu')
x = self.x.to(device)
single_roi = self.single_roi.to(device)
gt_y_single = self.gt_y_single.to(device)
pool_h, pool_w = (5, 5)
roi_align = ops.RoIAlign((pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to(device=device)
y = roi_align(x, single_roi)
self.assertTrue(torch.allclose(gt_y_single, y), 'RoIAlign layer incorrect for single ROI on CPU')
y = roi_align(x.transpose(2, 3).contiguous().transpose(2, 3), single_roi)
self.assertTrue(torch.allclose(gt_y_single, y), 'RoIAlign layer incorrect for single ROI on CPU')
def test_roi_align_cpu(self):
device = torch.device('cpu')
x = self.x.to(device)
rois = self.rois.to(device)
gt_y_multiple = self.gt_y_multiple.to(device)
pool_h, pool_w = (5, 5)
roi_align = ops.RoIAlign((pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to(device=device)
y = roi_align(x, rois)
self.assertTrue(torch.allclose(gt_y_multiple, y), 'RoIAlign layer incorrect for multiple ROIs on CPU')
y = roi_align(x.transpose(2, 3).contiguous().transpose(2, 3), rois)
self.assertTrue(torch.allclose(gt_y_multiple, y), 'RoIAlign layer incorrect for multiple ROIs on CPU')
class RoIPoolTester(RoIOpTester, unittest.TestCase):
def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
return ops.RoIPool((pool_h, pool_w), spatial_scale)(x, rois)
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_roi_align_basic_cuda(self):
device = torch.device('cuda')
x = self.x.to(device)
single_roi = self.single_roi.to(device)
gt_y_single = self.gt_y_single.to(device)
pool_h, pool_w = (5, 5)
roi_align = ops.RoIAlign((pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to(device=device)
y = roi_align(x, single_roi)
self.assertTrue(torch.allclose(gt_y_single, y), 'RoIAlign layer incorrect for single ROI on CUDA')
y = roi_align(x.transpose(2, 3).contiguous().transpose(2, 3), single_roi)
self.assertTrue(torch.allclose(gt_y_single, y), 'RoIAlign layer incorrect for single ROI on CUDA')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_roi_align_cuda(self):
device = torch.device('cuda')
x = self.x.to(device)
rois = self.rois.to(device)
gt_y_multiple = self.gt_y_multiple.to(device)
pool_h, pool_w = (5, 5)
roi_align = ops.RoIAlign((pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to(device=device)
y = roi_align(x, rois)
self.assertTrue(torch.allclose(gt_y_multiple, y), 'RoIAlign layer incorrect for multiple ROIs on CUDA')
def get_script_fn(self, rois, pool_size):
@torch.jit.script
def script_fn(input, rois, pool_size):
# type: (torch.Tensor, torch.Tensor, int) -> torch.Tensor
return ops.roi_pool(input, rois, pool_size, 1.0)[0]
return lambda x: script_fn(x, rois, pool_size)
y = roi_align(x.transpose(2, 3).contiguous().transpose(2, 3), rois)
self.assertTrue(torch.allclose(gt_y_multiple, y), 'RoIAlign layer incorrect for multiple ROIs on CUDA')
def expected_fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1,
device=None, dtype=torch.float64):
if device is None:
device = torch.device("cpu")
def test_roi_align_gradient_cpu(self):
"""
Compute gradients for RoIAlign with multiple bounding boxes on CPU
"""
device = torch.device('cpu')
pool_h, pool_w = (5, 5)
roi_align = ops.RoIAlign((pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to(device=device)
n_channels = x.size(1)
y = torch.zeros(rois.size(0), n_channels, pool_h, pool_w, dtype=dtype, device=device)
x = self.x.to(device).clone()
rois = self.rois.to(device)
gt_grad = self.x_grad.to(device)
def get_slice(k, block):
return slice(int(np.floor(k * block)), int(np.ceil((k + 1) * block)))
x.requires_grad = True
y = roi_align(x, rois)
s = y.sum()
s.backward()
for roi_idx, roi in enumerate(rois):
batch_idx = int(roi[0])
j_begin, i_begin, j_end, i_end = (int(round(x.item() * spatial_scale)) for x in roi[1:])
roi_x = x[batch_idx, :, i_begin:i_end + 1, j_begin:j_end + 1]
self.assertTrue(torch.allclose(x.grad, gt_grad), 'gradient incorrect for RoIAlign CPU')
roi_h, roi_w = roi_x.shape[-2:]
bin_h = roi_h / pool_h
bin_w = roi_w / pool_w
def test_roi_align_gradcheck_cpu(self):
dtype = torch.float64
device = torch.device('cpu')
m = ops.RoIAlign((5, 5), 0.5, 1).to(dtype=dtype, device=device)
x = torch.rand(1, 1, 10, 10, dtype=dtype, device=device, requires_grad=True)
rois = self.rois.to(device=device, dtype=dtype)
for i in range(0, pool_h):
for j in range(0, pool_w):
bin_x = roi_x[:, get_slice(i, bin_h), get_slice(j, bin_w)]
if bin_x.numel() > 0:
y[roi_idx, :, i, j] = bin_x.reshape(n_channels, -1).max(dim=1)[0]
return y
def func(input):
return m(input, rois)
self.assertTrue(gradcheck(func, (x,)), 'gradcheck failed for RoIAlign CPU')
self.assertTrue(gradcheck(func, (x.transpose(2, 3),)), 'gradcheck failed for RoIAlign CPU')
class PSRoIPoolTester(RoIOpTester, unittest.TestCase):
def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
return ops.PSRoIPool((pool_h, pool_w), 1)(x, rois)
def get_script_fn(self, rois, pool_size):
@torch.jit.script
def script_func(input, rois):
return ops.roi_align(input, rois, 5, 0.5, 1)[0]
def script_fn(input, rois, pool_size):
# type: (torch.Tensor, torch.Tensor, int) -> torch.Tensor
return ops.ps_roi_pool(input, rois, pool_size, 1.0)[0]
return lambda x: script_fn(x, rois, pool_size)
self.assertTrue(gradcheck(lambda x: script_func(x, rois), (x,)), 'gradcheck failed for scripted roi_align')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_roi_align_gradient_cuda(self):
"""
Compute gradients for RoIAlign with multiple bounding boxes on the GPU
"""
device = torch.device('cuda')
pool_h, pool_w = (5, 5)
roi_align = ops.RoIAlign((pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to(device=device)
x = self.x.to(device).clone()
rois = self.rois.to(device)
gt_grad = self.x_grad.to(device)
x.requires_grad = True
y = roi_align(x, rois)
s = y.sum()
s.backward()
self.assertTrue(torch.allclose(x.grad, gt_grad), 'gradient incorrect for RoIAlign CUDA')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_roi_align_gradcheck_cuda(self):
dtype = torch.float64
device = torch.device('cuda')
m = ops.RoIAlign((5, 5), 0.5, 1).to(dtype=dtype, device=device)
x = torch.rand(1, 1, 10, 10, dtype=dtype, device=device, requires_grad=True)
rois = self.rois.to(device=device, dtype=dtype)
def expected_fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1,
device=None, dtype=torch.float64):
if device is None:
device = torch.device("cpu")
n_input_channels = x.size(1)
self.assertEqual(n_input_channels % (pool_h * pool_w), 0, "input channels must be divisible by ph * pw")
n_output_channels = int(n_input_channels / (pool_h * pool_w))
y = torch.zeros(rois.size(0), n_output_channels, pool_h, pool_w, dtype=dtype, device=device)
def func(input):
return m(input, rois)
def get_slice(k, block):
return slice(int(np.floor(k * block)), int(np.ceil((k + 1) * block)))
self.assertTrue(gradcheck(func, (x,)), 'gradcheck failed for RoIAlign CUDA')
self.assertTrue(gradcheck(func, (x.transpose(2, 3),)), 'gradcheck failed for RoIAlign CUDA')
for roi_idx, roi in enumerate(rois):
batch_idx = int(roi[0])
j_begin, i_begin, j_end, i_end = (int(round(x.item() * spatial_scale)) for x in roi[1:])
roi_x = x[batch_idx, :, i_begin:i_end + 1, j_begin:j_end + 1]
@torch.jit.script
def script_func(input, rois):
return ops.roi_align(input, rois, 5, 0.5, 1)[0]
roi_height = max(i_end - i_begin, 1)
roi_width = max(j_end - j_begin, 1)
bin_h, bin_w = roi_height / float(pool_h), roi_width / float(pool_w)
self.assertTrue(gradcheck(lambda x: script_func(x, rois), (x,)),
'gradcheck failed for scripted roi_align on CUDA')
for i in range(0, pool_h):
for j in range(0, pool_w):
bin_x = roi_x[:, get_slice(i, bin_h), get_slice(j, bin_w)]
if bin_x.numel() > 0:
area = bin_x.size(-2) * bin_x.size(-1)
for c_out in range(0, n_output_channels):
c_in = c_out * (pool_h * pool_w) + pool_w * i + j
t = torch.sum(bin_x[c_in, :, :])
y[roi_idx, c_out, i, j] = t / area
return y
def bilinear_interpolate(data, height, width, y, x):
if y < -1.0 or y > height or x < -1.0 or x > width:
return 0.
if y <= 0:
y = 0.
if x <= 0:
x = 0.
y = min(max(0, y), height - 1)
x = min(max(0, x), width - 1)
y_low, x_low = int(y), int(x)
y_high, x_high = 0, 0
y_low = int(y)
y_high = min(y_low + 1, height - 1)
if y_low >= height - 1:
y_high = y_low = height - 1
y = float(y_low)
else:
y_high = y_low + 1
x_low = int(x)
x_high = min(x_low + 1, width - 1)
if x_low >= width - 1:
x_high = x_low = width - 1
x = float(x_low)
else:
x_high = x_low + 1
wy_h = y - y_low
wy_l = 1 - wy_h
ly = y - y_low
lx = x - x_low
hy, hx = 1. - ly, 1. - lx
wx_h = x - x_low
wx_l = 1 - wx_h
v1 = data[y_low * width + x_low]
v2 = data[y_low * width + x_high]
v3 = data[y_high * width + x_low]
v4 = data[y_high * width + x_high]
w1, w2, w3, w4 = hy * hx, hy * lx, ly * hx, ly * lx
val = 0
for wx, x in zip((wx_l, wx_h), (x_low, x_high)):
for wy, y in zip((wy_l, wy_h), (y_low, y_high)):
val += wx * wy * data[y * width + x]
return val
return w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4
class RoIAlignTester(RoIOpTester, unittest.TestCase):
def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
return ops.RoIAlign((pool_h, pool_w), spatial_scale=spatial_scale,
sampling_ratio=sampling_ratio)(x, rois)
class PSRoIAlignTester(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.dtype = torch.float64
def get_script_fn(self, rois, pool_size):
@torch.jit.script
def script_fn(input, rois, pool_size):
# type: (torch.Tensor, torch.Tensor, int) -> torch.Tensor
return ops.roi_align(input, rois, pool_size, 1.0)[0]
return lambda x: script_fn(x, rois, pool_size)
def slow_ps_roi_align(self, in_data, rois, pool_h, pool_w, device, spatial_scale=1,
sampling_ratio=-1, dtype=torch.float64):
def expected_fn(self, in_data, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1,
device=None, dtype=torch.float64):
if device is None:
device = torch.device("cpu")
num_input_channels = in_data.size(1)
self.assertEqual(num_input_channels % (pool_h * pool_w), 0, "input channels must be divisible by ph * pw")
num_output_channels = int(num_input_channels / (pool_h * pool_w))
out_data = torch.zeros(rois.size(0), num_output_channels, pool_h, pool_w, dtype=dtype, device=device)
n_channels = in_data.size(1)
out_data = torch.zeros(rois.size(0), n_channels, pool_h, pool_w, dtype=dtype, device=device)
for n in range(0, in_data.size(0)):
for r, roi in enumerate(rois):
if roi[0] != n:
continue
roi[1:] = (roi[1:] * spatial_scale) - 0.5
c_in = 0
roi_height = float(roi[4].item() - roi[2].item())
roi_width = float(roi[3].item() - roi[1].item())
bin_h, bin_w = roi_height / float(pool_h), roi_width / float(pool_w)
for c_out in range(0, num_output_channels):
for j in range(0, pool_h):
start_h = float(j) * bin_h + roi[2].item()
for i in range(0, pool_w):
start_w = float(i) * bin_w + roi[1].item()
roi_bin_grid_h = sampling_ratio if sampling_ratio > 0 else int(np.ceil(roi_height / pool_h))
roi_bin_grid_w = sampling_ratio if sampling_ratio > 0 else int(np.ceil(roi_width / pool_w))
val = 0.
for iy in range(0, roi_bin_grid_h):
y = start_h + (iy + 0.5) * bin_h / float(roi_bin_grid_h)
for ix in range(0, roi_bin_grid_w):
x = start_w + (ix + 0.5) * bin_w / float(roi_bin_grid_w)
batch_idx = int(roi[0])
j_begin, i_begin, j_end, i_end = (x.item() * spatial_scale for x in roi[1:])
roi_h = i_end - i_begin
roi_w = j_end - j_begin
bin_h = roi_h / pool_h
bin_w = roi_w / pool_w
for i in range(0, pool_h):
start_h = i_begin + i * bin_h
grid_h = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_h))
for j in range(0, pool_w):
start_w = j_begin + j * bin_w
grid_w = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_w))
for channel in range(0, n_channels):
val = 0
for iy in range(0, grid_h):
y = start_h + (iy + 0.5) * bin_h / grid_h
for ix in range(0, grid_w):
x = start_w + (ix + 0.5) * bin_w / grid_w
val += bilinear_interpolate(
in_data[n, c_in, :, :].flatten(),
in_data[batch_idx, channel, :, :].flatten(),
in_data.size(-2),
in_data.size(-1),
y, x
)
count = roi_bin_grid_h * roi_bin_grid_w
out_data[r, c_out, j, i] = val / count
c_in += 1
return out_data
def test_ps_roi_align_basic_cpu(self):
device = torch.device('cpu')
pool_size = 3
x = torch.rand(1, 2 * (pool_size ** 2), 7, 7, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 5, 5]], # format is (xyxy)
dtype=self.dtype, device=device)
pool_h, pool_w = (pool_size, pool_size)
ps_roi_align = ops.PSRoIAlign((pool_h, pool_w), spatial_scale=1, sampling_ratio=2)
y = ps_roi_align(x, rois)
gt_y = self.slow_ps_roi_align(x, rois, pool_h, pool_w, device,
spatial_scale=1, sampling_ratio=2,
dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'PSRoIAlign layer incorrect on CPU')
y = ps_roi_align(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_ps_roi_align(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device,
spatial_scale=1, sampling_ratio=-1,
dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'PSRoIAlign layer incorrect on CPU')
def test_ps_roi_align_cpu(self):
device = torch.device('cpu')
pool_size = 5
x = torch.rand(2, 2 * (pool_size ** 2), 10, 10, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 9, 9], # format is (xyxy)
[0, 0, 5, 4, 9],
[0, 5, 5, 9, 9],
[1, 0, 0, 9, 9]],
dtype=self.dtype, device=device)
pool_h, pool_w = (pool_size, pool_size)
ps_roi_align = ops.PSRoIAlign((pool_h, pool_w), spatial_scale=1, sampling_ratio=2)
y = ps_roi_align(x, rois)
gt_y = self.slow_ps_roi_align(x, rois, pool_h, pool_w, device,
spatial_scale=1, sampling_ratio=2,
dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'PSRoIAlign layer incorrect on CPU')
y = ps_roi_align(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_ps_roi_align(x.permute(0, 1, 3, 2), rois, pool_h, pool_w,
device, spatial_scale=1, sampling_ratio=2,
dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'PSRoIAlign layer incorrect on CPU')
def test_ps_roi_align_gradient_cpu(self):
device = torch.device('cpu')
pool_size = 3
layer = ops.PSRoIAlign((pool_size, pool_size), spatial_scale=1,
sampling_ratio=-1).to(dtype=self.dtype, device=device)
x = torch.ones(1, pool_size ** 2, 5, 5, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 4, 4],
[0, 0, 3, 5, 5],
[0, 1, 0, 2, 4]],
dtype=self.dtype, device=device)
y = layer(x, rois)
s = y.sum()
s.backward()
gt_grad = torch.tensor([[[[8.125e-01, 6.875e-01, 0.0, 0.0, 0.0, ],
[2.7083333333e-01, 2.2916666667e-01, 0.0, 0.0, 0.0, ],
[1.0416666667e-01, 6.25e-02, 0.0, 0.0, 0.0, ],
[5.2083333333e-01, 3.125e-01, 0.0, 0.0, 0.0, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ]],
[[8.3266726847e-17, 1.125e00, 3.750e-01, 0.0, 0.0, ],
[2.7755575616e-17, 3.750e-01, 1.250e-01, 0.0, 0.0, ],
[0.0, 3.4722222222e-02, 9.7222222222e-02, 3.4722222222e-02, 0.0, ],
[0.0, 1.7361111111e-01, 4.8611111111e-01, 1.7361111111e-01, 0.0, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ]],
[[0.0, 5.000e-01, 4.375e-01, 5.000e-01, 6.25e-02, ],
[0.0, 1.6666666667e-01, 1.4583333333e-01, 1.6666666667e-01, 2.0833333333e-02, ],
[0.0, 0.0, 0.0, 6.25e-02, 1.0416666667e-01, ],
[0.0, 0.0, 0.0, 3.125e-01, 5.2083333333e-01, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[5.4166666667e-01, 4.5833333333e-01, 0.0, 0.0, 0.0, ],
[5.4166666667e-01, 4.5833333333e-01, 0.0, 0.0, 0.0, ],
[3.125e-01, 1.875e-01, 0.0, 0.0, 0.0, ],
[3.125e-01, 1.875e-01, 0.0, 0.0, 0.0, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[5.5511151231e-17, 7.500e-01, 2.500e-01, 0.0, 0.0, ],
[5.5511151231e-17, 7.500e-01, 2.500e-01, 0.0, 0.0, ],
[0.0, 1.0416666667e-01, 2.9166666667e-01, 1.0416666667e-01, 0.0, ],
[0.0, 1.0416666667e-01, 2.9166666667e-01, 1.0416666667e-01, 0.0, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[0.0, 3.3333333333e-01, 2.9166666667e-01, 3.3333333333e-01, 4.1666666667e-02, ],
[0.0, 3.3333333333e-01, 2.9166666667e-01, 3.3333333333e-01, 4.1666666667e-02, ],
[0.0, 0.0, 0.0, 1.875e-01, 3.125e-01, ],
[0.0, 0.0, 0.0, 1.875e-01, 3.125e-01, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ],
[2.7083333333e-01, 2.2916666667e-01, 0.0, 0.0, 0.0, ],
[7.2222222222e-01, 6.1111111111e-01, 0.0, 0.0, 0.0, ],
[7.1527777778e-01, 4.5138888889e-01, 0.0, 0.0, 0.0, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ],
[2.7755575616e-17, 3.750e-01, 1.250e-01, 0.0, 0.0, ],
[7.4014868308e-17, 1.000e00, 3.3333333333e-01, 0.0, 0.0, ],
[9.2518585385e-18, 3.3333333333e-01, 6.25e-01, 2.0833333333e-01, 0.0, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ],
[0.0, 1.6666666667e-01, 1.4583333333e-01, 1.6666666667e-01, 2.0833333333e-02, ],
[0.0, 4.4444444444e-01, 3.8888888889e-01, 4.4444444444e-01, 5.5555555556e-02, ],
[0.0, 5.5555555556e-02, 4.8611111111e-02, 4.3055555556e-01, 6.3194444444e-01, ]]]],
device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(x.grad, gt_grad), 'gradient incorrect for PSRoIAlign on CPU')
def test_ps_roi_align_gradcheck_cpu(self):
device = torch.device('cpu')
pool_size = 5
x = torch.rand(1, pool_size ** 2, 10, 10, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 9, 9],
[0, 0, 5, 5, 9],
[0, 5, 5, 9, 9]], dtype=self.dtype, device=device)
m = ops.PSRoIAlign((pool_size, pool_size), spatial_scale=1,
sampling_ratio=2).to(dtype=self.dtype, device=device)
def func(input):
return m(input, rois)
self.assertTrue(gradcheck(func, (x,)), 'gradcheck failed for PSRoIAlign on CPU')
self.assertTrue(gradcheck(func, (x.permute(0, 1, 3, 2),)), 'gradcheck failed for PSRoIAlign on CPU')
@torch.jit.script
def script_func(input, rois):
return ops.ps_roi_align(input, rois, 5, 2.0, 1)[0]
self.assertTrue(gradcheck(lambda x: script_func(x, rois), (x,)),
'gradcheck failed for scripted ps_roi_align on CPU')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_ps_roi_align_basic_cuda(self):
device = torch.device('cuda')
pool_size = 3
x = torch.rand(1, 2 * (pool_size ** 2), 7, 7, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 5, 5]], # format is (xyxy)
dtype=self.dtype, device=device)
pool_h, pool_w = (pool_size, pool_size)
ps_roi_align = ops.PSRoIAlign((pool_h, pool_w), spatial_scale=1, sampling_ratio=2)
y = ps_roi_align(x, rois)
gt_y = self.slow_ps_roi_align(x, rois, pool_h, pool_w, device,
spatial_scale=1, sampling_ratio=2,
dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'PSRoIAlign layer incorrect')
y = ps_roi_align(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_ps_roi_align(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device,
spatial_scale=1, sampling_ratio=-1,
dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'PSRoIAlign layer incorrect')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_ps_roi_align_cuda(self):
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
pool_size = 5
x = torch.rand(2, 2 * (pool_size ** 2), 10, 10, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 9, 9], # format is (xyxy)
[0, 0, 5, 4, 9],
[0, 5, 5, 9, 9],
[1, 0, 0, 9, 9]],
dtype=self.dtype, device=device)
pool_h, pool_w = (pool_size, pool_size)
ps_roi_align = ops.PSRoIAlign((pool_h, pool_w), spatial_scale=1, sampling_ratio=2)
y = ps_roi_align(x, rois)
gt_y = self.slow_ps_roi_align(x, rois, pool_h, pool_w, device,
spatial_scale=1, sampling_ratio=2,
dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'PSRoIAlign layer incorrect')
y = ps_roi_align(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_ps_roi_align(x.permute(0, 1, 3, 2), rois, pool_h, pool_w,
device, spatial_scale=1, sampling_ratio=2,
dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'PSRoIAlign layer incorrect')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_ps_roi_align_gradient_cuda(self):
device = torch.device('cuda')
pool_size = 3
layer = ops.PSRoIAlign((pool_size, pool_size), spatial_scale=1,
sampling_ratio=-1).to(dtype=self.dtype, device=device)
x = torch.ones(1, pool_size ** 2, 5, 5, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 4, 4],
[0, 0, 3, 5, 5],
[0, 1, 0, 2, 4]],
dtype=self.dtype, device=device)
val /= grid_h * grid_w
y = layer(x, rois)
s = y.sum()
s.backward()
gt_grad = torch.tensor([[[[8.125e-01, 6.875e-01, 0.0, 0.0, 0.0, ],
[2.7083333333e-01, 2.2916666667e-01, 0.0, 0.0, 0.0, ],
[1.0416666667e-01, 6.25e-02, 0.0, 0.0, 0.0, ],
[5.2083333333e-01, 3.125e-01, 0.0, 0.0, 0.0, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ]],
[[8.3266726847e-17, 1.125e00, 3.750e-01, 0.0, 0.0, ],
[2.7755575616e-17, 3.750e-01, 1.250e-01, 0.0, 0.0, ],
[0.0, 3.4722222222e-02, 9.7222222222e-02, 3.4722222222e-02, 0.0, ],
[0.0, 1.7361111111e-01, 4.8611111111e-01, 1.7361111111e-01, 0.0, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ]],
[[0.0, 5.000e-01, 4.375e-01, 5.000e-01, 6.25e-02, ],
[0.0, 1.6666666667e-01, 1.4583333333e-01, 1.6666666667e-01, 2.0833333333e-02, ],
[0.0, 0.0, 0.0, 6.25e-02, 1.0416666667e-01, ],
[0.0, 0.0, 0.0, 3.125e-01, 5.2083333333e-01, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[5.4166666667e-01, 4.5833333333e-01, 0.0, 0.0, 0.0, ],
[5.4166666667e-01, 4.5833333333e-01, 0.0, 0.0, 0.0, ],
[3.125e-01, 1.875e-01, 0.0, 0.0, 0.0, ],
[3.125e-01, 1.875e-01, 0.0, 0.0, 0.0, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[5.5511151231e-17, 7.500e-01, 2.500e-01, 0.0, 0.0, ],
[5.5511151231e-17, 7.500e-01, 2.500e-01, 0.0, 0.0, ],
[0.0, 1.0416666667e-01, 2.9166666667e-01, 1.0416666667e-01, 0.0, ],
[0.0, 1.0416666667e-01, 2.9166666667e-01, 1.0416666667e-01, 0.0, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[0.0, 3.3333333333e-01, 2.9166666667e-01, 3.3333333333e-01, 4.1666666667e-02, ],
[0.0, 3.3333333333e-01, 2.9166666667e-01, 3.3333333333e-01, 4.1666666667e-02, ],
[0.0, 0.0, 0.0, 1.875e-01, 3.125e-01, ],
[0.0, 0.0, 0.0, 1.875e-01, 3.125e-01, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ],
[2.7083333333e-01, 2.2916666667e-01, 0.0, 0.0, 0.0, ],
[7.2222222222e-01, 6.1111111111e-01, 0.0, 0.0, 0.0, ],
[7.1527777778e-01, 4.5138888889e-01, 0.0, 0.0, 0.0, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ],
[2.7755575616e-17, 3.750e-01, 1.250e-01, 0.0, 0.0, ],
[7.4014868308e-17, 1.000e00, 3.3333333333e-01, 0.0, 0.0, ],
[9.2518585385e-18, 3.3333333333e-01, 6.25e-01, 2.0833333333e-01, 0.0, ]],
[[0.0, 0.0, 0.0, 0.0, 0.0, ],
[0.0, 0.0, 0.0, 0.0, 0.0, ],
[0.0, 1.6666666667e-01, 1.4583333333e-01, 1.6666666667e-01, 2.0833333333e-02, ],
[0.0, 4.4444444444e-01, 3.8888888889e-01, 4.4444444444e-01, 5.5555555556e-02, ],
[0.0, 5.5555555556e-02, 4.8611111111e-02, 4.3055555556e-01, 6.3194444444e-01, ]]]],
device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(x.grad, gt_grad), 'gradient incorrect for PSRoIAlign')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_ps_roi_align_gradcheck_cuda(self):
device = torch.device('cuda')
pool_size = 5
x = torch.rand(1, pool_size ** 2, 10, 10, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 9, 9],
[0, 0, 5, 5, 9],
[0, 5, 5, 9, 9]], dtype=self.dtype, device=device)
m = ops.PSRoIAlign((pool_size, pool_size), spatial_scale=1,
sampling_ratio=2).to(dtype=self.dtype, device=device)
out_data[r, channel, i, j] = val
return out_data
def func(input):
return m(input, rois)
self.assertTrue(gradcheck(func, (x,)), 'gradcheck failed for PSRoIAlign CUDA')
self.assertTrue(gradcheck(func, (x.permute(0, 1, 3, 2),)), 'gradcheck failed for PSRoIAlign CUDA')
class PSRoIAlignTester(RoIOpTester, unittest.TestCase):
def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
return ops.PSRoIAlign((pool_h, pool_w), spatial_scale=spatial_scale,
sampling_ratio=sampling_ratio)(x, rois)
def get_script_fn(self, rois, pool_size):
@torch.jit.script
def script_func(input, rois):
return ops.ps_roi_align(input, rois, 5, 2.0, 1)[0]
def script_fn(input, rois, pool_size):
# type: (torch.Tensor, torch.Tensor, int) -> torch.Tensor
return ops.ps_roi_align(input, rois, pool_size, 1.0)[0]
return lambda x: script_fn(x, rois, pool_size)
self.assertTrue(gradcheck(lambda x: script_func(x, rois), (x,)),
'gradcheck failed for scripted ps_roi_align on CUDA')
class PSRoIPoolTester(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.dtype = torch.float64
def slow_ps_roi_pooling(self, x, rois, pool_h, pool_w, device, spatial_scale=1,
dtype=torch.float64):
def expected_fn(self, in_data, rois, pool_h, pool_w, device, spatial_scale=1,
sampling_ratio=-1, dtype=torch.float64):
if device is None:
device = torch.device("cpu")
num_input_channels = x.size(1)
self.assertEqual(num_input_channels % (pool_h * pool_w), 0, "input channels must be divisible by ph * pw")
num_output_channels = int(num_input_channels / (pool_h * pool_w))
y = torch.zeros(rois.size(0), num_output_channels, pool_h, pool_w, dtype=dtype, device=device)
n_input_channels = in_data.size(1)
self.assertEqual(n_input_channels % (pool_h * pool_w), 0, "input channels must be divisible by ph * pw")
n_output_channels = int(n_input_channels / (pool_h * pool_w))
out_data = torch.zeros(rois.size(0), n_output_channels, pool_h, pool_w, dtype=dtype, device=device)
rois = torch.round(rois * spatial_scale).int()
for n in range(0, x.size(0)):
for r, roi in enumerate(rois):
if roi[0] != n:
continue
c_in = 0
for c_out in range(0, num_output_channels):
roi_height = max(roi[4].item() - roi[2].item(), 1)
roi_width = max(roi[3].item() - roi[1].item(), 1)
bin_h, bin_w = roi_height / float(pool_h), roi_width / float(pool_w)
for j in range(0, pool_h):
start_h = int(np.floor(j * bin_h)) + roi[2].item()
end_h = int(np.ceil((j + 1) * bin_w)) + roi[2].item()
# range-check
start_h = min(max(start_h, 0), x.size(2))
end_h = min(max(end_h, 0), x.size(2))
for i in range(0, pool_w):
start_w = int(np.floor(i * bin_w)) + roi[1].item()
end_w = int(np.ceil((i + 1) * bin_w)) + roi[1].item()
# range-check
start_w = min(max(start_w, 0), x.size(3))
end_w = min(max(end_w, 0), x.size(3))
is_empty = (end_h <= start_h) or (end_w <= start_w)
area = (end_h - start_h) * (end_w - start_w)
if not is_empty:
t = torch.sum(x[n, c_in, slice(start_h, end_h), slice(start_w, end_w)])
y[r, c_out, j, i] = t / area
c_in += 1
return y
def test_ps_roi_pool_basic_cpu(self):
device = torch.device('cpu')
pool_size = 3
x = torch.rand(1, pool_size ** 2, 10, 10, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 4, 4]], # format is (xyxy)
dtype=self.dtype, device=device)
pool_h, pool_w = (pool_size, pool_size)
ps_roi_pool = ops.PSRoIPool((pool_h, pool_w), 1)
y = ps_roi_pool(x, rois)
gt_y = self.slow_ps_roi_pooling(x, rois, pool_h, pool_w, device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'PSRoIPool layer incorrect on CPU')
y = ps_roi_pool(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_ps_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'PSRoIPool layer incorrect on CPU')
def test_ps_roi_pool_cpu(self):
device = torch.device('cpu')
pool_size = 5
x = torch.rand(2, 2 * (pool_size ** 2), 10, 10, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 9, 9], # format is (xyxy)
[0, 0, 5, 4, 9],
[0, 5, 5, 9, 9],
[1, 0, 0, 9, 9]],
dtype=self.dtype, device=device)
pool_h, pool_w = (pool_size, pool_size)
ps_roi_pool = ops.PSRoIPool((pool_h, pool_w), 1)
y = ps_roi_pool(x, rois)
gt_y = self.slow_ps_roi_pooling(x, rois, pool_h, pool_w, device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'PSRoIPool layer incorrect on CPU')
y = ps_roi_pool(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_ps_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y, y), 'PSRoIPool layer incorrect on CPU')
def test_ps_roi_pool_gradient_cpu(self):
device = torch.device('cpu')
pool_size = 3
layer = ops.PSRoIPool((pool_size, pool_size), 1).to(dtype=self.dtype, device=device)
x = torch.ones(1, pool_size ** 2, 5, 5, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 4, 4],
[0, 0, 3, 5, 5],
[0, 1, 0, 2, 4]],
dtype=self.dtype, device=device)
y = layer(x, rois)
s = y.sum()
s.backward()
gt_grad = torch.tensor([[[[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.5000, 0.5000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
[[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 1. / 3, 1. / 3, 1. / 3, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
[[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.5000, 0.5000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.2500, 0.2500, 0.0000, 0.0000, 0.0000],
[0.2500, 0.2500, 0.0000, 0.0000, 0.0000]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 1. / 6, 1. / 6, 1. / 6, 0.0000],
[0.0000, 1. / 6, 1. / 6, 1. / 6, 0.0000]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.0000, 0.0000, 0.2500, 0.2500],
[0.0000, 0.0000, 0.0000, 0.2500, 0.2500]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.5000, 0.5000, 0.0000, 0.0000, 0.0000]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 1. / 3, 1. / 3, 1. / 3, 0.0000]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.0000, 0.0000, 0.5000, 0.5000]]]],
device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(x.grad, gt_grad), 'gradient incorrect for PSRoIPool on CPU')
def test_ps_roi_pool_gradcheck_cpu(self):
device = torch.device('cpu')
pool_size = 5
x = torch.rand(1, pool_size ** 2, 10, 10, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 9, 9],
[0, 0, 5, 5, 9],
[0, 5, 5, 9, 9]], dtype=self.dtype, device=device)
m = ops.PSRoIPool((pool_size, pool_size), 1).to(dtype=self.dtype, device=device)
def func(input):
return m(input, rois)
self.assertTrue(gradcheck(func, (x,)), 'gradcheck failed for PSRoIPool on CPU')
self.assertTrue(gradcheck(func, (x.permute(0, 1, 3, 2),)), 'gradcheck failed for PSRoIPool on CPU')
@torch.jit.script
def script_func(input, rois):
return ops.ps_roi_pool(input, rois, 5, 1.0)[0]
self.assertTrue(gradcheck(lambda x: script_func(x, rois), (x,)),
'gradcheck failed for scripted ps_roi_pool on CPU')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_ps_roi_pool_basic_cuda(self):
device = torch.device('cuda')
pool_size = 3
x = torch.rand(1, pool_size ** 2, 10, 10, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 4, 4]], # format is (xyxy)
dtype=self.dtype, device=device)
pool_h, pool_w = (pool_size, pool_size)
ps_roi_pool = ops.PSRoIPool((pool_h, pool_w), 1)
y = ps_roi_pool(x, rois)
gt_y = self.slow_ps_roi_pooling(x, rois, pool_h, pool_w, device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'PSRoIPool layer incorrect')
y = ps_roi_pool(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_ps_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'PSRoIPool layer incorrect')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_ps_roi_pool_cuda(self):
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
pool_size = 5
x = torch.rand(2, 2 * (pool_size ** 2), 10, 10, dtype=self.dtype, device=device)
rois = torch.tensor([[0, 0, 0, 9, 9], # format is (xyxy)
[0, 0, 5, 4, 9],
[0, 5, 5, 9, 9],
[1, 0, 0, 9, 9]],
dtype=self.dtype, device=device)
pool_h, pool_w = (pool_size, pool_size)
ps_roi_pool = ops.PSRoIPool((pool_h, pool_w), 1)
y = ps_roi_pool(x, rois)
gt_y = self.slow_ps_roi_pooling(x, rois, pool_h, pool_w, device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'PSRoIPool layer incorrect')
y = ps_roi_pool(x.permute(0, 1, 3, 2), rois)
gt_y = self.slow_ps_roi_pooling(x.permute(0, 1, 3, 2), rois, pool_h, pool_w, device, dtype=self.dtype)
self.assertTrue(torch.allclose(gt_y.cuda(), y), 'PSRoIPool layer incorrect')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_ps_roi_pool_gradient_cuda(self):
device = torch.device('cuda')
pool_size = 3
layer = ops.PSRoIPool((pool_size, pool_size), 1).to(dtype=self.dtype, device=device)
x = torch.ones(1, pool_size ** 2, 5, 5, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 4, 4],
[0, 0, 3, 5, 5],
[0, 1, 0, 2, 4]],
dtype=self.dtype, device=device)
y = layer(x, rois)
s = y.sum()
s.backward()
gt_grad = torch.tensor([[[[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.5000, 0.5000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
[[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 1. / 3, 1. / 3, 1. / 3, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
[[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.5000, 0.5000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.2500, 0.2500, 0.0000, 0.0000, 0.0000],
[0.2500, 0.2500, 0.0000, 0.0000, 0.0000]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 1. / 6, 1. / 6, 1. / 6, 0.0000],
[0.0000, 1. / 6, 1. / 6, 1. / 6, 0.0000]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.0000, 0.0000, 0.2500, 0.2500],
[0.0000, 0.0000, 0.0000, 0.2500, 0.2500]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.2500, 0.7500, 0.0000, 0.0000, 0.0000],
[0.5000, 0.5000, 0.0000, 0.0000, 0.0000]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 0.7500, 0.2500, 0.0000, 0.0000],
[0.0000, 1. / 3, 1. / 3, 1. / 3, 0.0000]],
[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.5000, 0.2500, 0.2500, 0.0000],
[0.0000, 0.0000, 0.0000, 0.5000, 0.5000]]]],
device=device, dtype=self.dtype)
self.assertTrue(torch.allclose(x.grad, gt_grad), 'gradient incorrect for PSRoIPool')
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
def test_ps_roi_pool_gradcheck_cuda(self):
device = torch.device('cuda')
pool_size = 5
x = torch.rand(1, pool_size ** 2, 10, 10, dtype=self.dtype, device=device, requires_grad=True)
rois = torch.tensor([
[0, 0, 0, 9, 9],
[0, 0, 5, 5, 9],
[0, 5, 5, 9, 9]], dtype=self.dtype, device=device)
m = ops.PSRoIPool((pool_size, pool_size), 1).to(dtype=self.dtype, device=device)
def func(input):
return m(input, rois)
self.assertTrue(gradcheck(func, (x,)), 'gradcheck failed for PSRoIPool CUDA')
self.assertTrue(gradcheck(func, (x.permute(0, 1, 3, 2),)), 'gradcheck failed for PSRoIPool CUDA')
@torch.jit.script
def script_func(input, rois):
return ops.ps_roi_pool(input, rois, 5, 1.0)[0]
batch_idx = int(roi[0])
j_begin, i_begin, j_end, i_end = (x.item() * spatial_scale - 0.5 for x in roi[1:])
roi_h = i_end - i_begin
roi_w = j_end - j_begin
bin_h = roi_h / pool_h
bin_w = roi_w / pool_w
for i in range(0, pool_h):
start_h = i_begin + i * bin_h
grid_h = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_h))
for j in range(0, pool_w):
start_w = j_begin + j * bin_w
grid_w = sampling_ratio if sampling_ratio > 0 else int(np.ceil(bin_w))
for c_out in range(0, n_output_channels):
c_in = c_out * (pool_h * pool_w) + pool_w * i + j
val = 0
for iy in range(0, grid_h):
y = start_h + (iy + 0.5) * bin_h / grid_h
for ix in range(0, grid_w):
x = start_w + (ix + 0.5) * bin_w / grid_w
val += bilinear_interpolate(
in_data[batch_idx, c_in, :, :].flatten(),
in_data.size(-2),
in_data.size(-1),
y, x
)
val /= grid_h * grid_w
self.assertTrue(gradcheck(lambda x: script_func(x, rois), (x,)),
'gradcheck failed for scripted ps_roi_pool on CUDA')
out_data[r, c_out, i, j] = val
return out_data
class NMSTester(unittest.TestCase):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment