cuda bugfixes

cc6f8862 · rusty1s · ba26dfb1 · cc6f8862 · cc6f8862 · cc6f8862
Commit cc6f8862 authored Jan 15, 2018 by rusty1s
5 changed files
--- a/test/forward.json
+++ b/test/forward.json
@@ -26,10 +26,10 @@
  {
    "name": "sub",
    "index": [[0, 0], [1, 1], [1, 1], [0, 0]],
-    "input": [[5, 2], [2, 5], [4, 3], [1, 3]],
+    "input": [[5, 2], [2, 2], [4, 2], [1, 3]],
    "dim": 0,
    "fill_value": 9,
-    "expected": [[3, 4], [3, 1]]
+    "expected": [[3, 4], [3, 5]]
  },
  {
    "name": "mul",

--- a/test/test_backward.py
+++ b/test/test_backward.py
@@ -35,7 +35,7 @@ def test_backward_cpu(tensor, i):
 @pytest.mark.parametrize('tensor,i', product(tensors, range(len(data))))
 def test_backward_gpu(tensor, i):  # pragma: no cover
    name = data[i]['name']
-    index = V(torch.LongTensor(data[i]['index']).cuda())
+    index = V(torch.cuda.LongTensor(data[i]['index']))
    input = V(Tensor(tensor, data[i]['input']).cuda(), requires_grad=True)
    dim = data[i]['dim']
    fill_value = data[i]['fill_value']

--- a/test/test_forward.py
+++ b/test/test_forward.py
@@ -44,7 +44,7 @@ def test_forward_cpu(tensor, i):
 @pytest.mark.parametrize('tensor,i', product(tensors, range(len(data))))
 def test_forward_gpu(tensor, i):  # pragma: no cover
    name = data[i]['name']
-    index = torch.LongTensor(data[i]['index']).cuda()
+    index = torch.cuda.LongTensor(data[i]['index'])
    input = Tensor(tensor, data[i]['input']).cuda()
    dim = data[i]['dim']
    fill_value = data[i]['fill_value']
@@ -57,7 +57,6 @@ def test_forward_gpu(tensor, i):  # pragma: no cover
    if 'expected_arg' in data[i]:
        expected_arg = torch.LongTensor(data[i]['expected_arg'])
        assert result[1].cpu().tolist() == expected_arg.tolist()
    func = getattr(torch_scatter, 'scatter_{}'.format(name))
    result = func(index, input, dim, fill_value=fill_value)
    if 'expected_arg' not in data[i]:

--- a/torch_scatter/functions/sub.py
+++ b/torch_scatter/functions/sub.py
@@ -51,7 +51,7 @@ def scatter_sub_(output, index, input, dim=0):
       -2 -4 -4  0  0  0
       [torch.FloatTensor of size 2x6]
    """
-    return output.scatter_add_(dim, index, -1 * input)
+    return output.scatter_add_(dim, index, -input)
 def scatter_sub(index, input, dim=0, size=None, fill_value=0):

--- a/torch_scatter/kernel/kernel.cu
+++ b/torch_scatter/kernel/kernel.cu
@@ -64,7 +64,9 @@ __global__ void argKernel(TensorInfo<Real> output, TensorInfo<int64_t> index, Te
  KERNEL_LOOP(i, n) {
    int outputOffset = 0; int indexOffset = 0; int inputOffset = 0; int argOffset = 0;
    IndexToScatterOffsets4<Real, Real, int64_t, Dims>::compute(i, dim, index, &indexOffset, input, &inputOffset, output, &outputOffset, arg, &argOffset);
-    if (input.data[inputOffset] == output.data[outputOffset]) arg.data[argOffset] = inputOffset % input.size[dim];
+    if (input.data[inputOffset] == output.data[outputOffset]) {
+      arg.data[argOffset] = (inputOffset / input.stride[dim]) % input.size[dim];
+    }
  }
 }
@@ -73,7 +75,7 @@ __global__ void indexBackwardKernel(TensorInfo<Real> output, TensorInfo<int64_t>
  KERNEL_LOOP(i, n) {
    int outputOffset = 0; int indexOffset = 0; int gradOffset = 0; int argOffset = 0;
    IndexToScatterOffsets4<Real, Real, int64_t, Dims>::compute(i, dim, index, &indexOffset, output, &outputOffset, grad, &gradOffset, arg, &argOffset);
-    if (arg.data[argOffset] == outputOffset % output.size[dim]) output.data[outputOffset] = grad.data[gradOffset];
+    if (arg.data[argOffset] == (outputOffset / output.stride[dim]) % output.size[dim]) output.data[outputOffset] = grad.data[gradOffset];
  }
 }