Unverified Commit 8c32666b authored by vfdev's avatar vfdev Committed by GitHub
Browse files

Fixes crash in deformable convolutions (2598) (#2604)

* [WIP] Fixes #2598
- Adjusted num_kernels and batch_size according to kMaxGridNum * CUDA_NUM_THREADS
- Tests to add

* - Redefined kMaxGridNum as max grid according to current CUDA device
- Added test to check the code from issue and compared grads CPU/CUDA

* Fixed static kMaxGridNum evaluation to dynamic
parent 739061be
......@@ -553,6 +553,35 @@ class DeformConvTester(OpTester, unittest.TestCase):
gradcheck(lambda z, off, wei, bi: script_func(z, off, wei, bi, stride, padding, dilation),
(x, offset, weight, bias), nondet_tol=1e-5)
# Test from https://github.com/pytorch/vision/issues/2598
# Run on CUDA only
if "cuda" in device.type:
# compare grads computed on CUDA with grads computed on CPU
true_cpu_grads = None
init_weight = torch.randn(9, 9, 3, 3, requires_grad=True)
img = torch.randn(8, 9, 1000, 110)
offset = torch.rand(8, 2 * 3 * 3, 1000, 110)
if not contiguous:
img = img.permute(0, 1, 3, 2).contiguous().permute(0, 1, 3, 2)
offset = offset.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1)
weight = init_weight.permute(3, 2, 0, 1).contiguous().permute(2, 3, 1, 0)
else:
weight = init_weight
for d in ["cpu", "cuda"]:
out = ops.deform_conv2d(img.to(d), offset.to(d), weight.to(d), padding=1)
out.mean().backward()
if true_cpu_grads is None:
true_cpu_grads = init_weight.grad
self.assertTrue(true_cpu_grads is not None)
else:
self.assertTrue(init_weight.grad is not None)
res_grads = init_weight.grad.to("cpu")
self.assertTrue(true_cpu_grads.allclose(res_grads))
class FrozenBNTester(unittest.TestCase):
def test_frozenbatchnorm2d_repr(self):
......
......@@ -80,12 +80,11 @@
using namespace at;
const int CUDA_NUM_THREADS = 1024;
const int kMaxGridNum = 65535;
const unsigned int CUDA_NUM_THREADS = 1024;
const int kMaxParallelImgs = 32;
inline int GET_BLOCKS(const int N) {
inline unsigned int GET_BLOCKS(const unsigned int N) {
unsigned int kMaxGridNum = at::cuda::getCurrentDeviceProperties()->maxGridSize[0];
return std::min(kMaxGridNum, (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment