final tests

6c6be201 · rusty1s · f7d3df7b · 6c6be201 · 6c6be201 · 6c6be201
Commit 6c6be201 authored Apr 12, 2018 by rusty1s
20 changed files
--- a/setup.cfg
+++ b/setup.cfg
@@ -5,4 +5,4 @@ description-file = README.md
 test=pytest
 [tool:pytest]
-addopts = --capture=no
+addopts = --capture=no --cov
--- a/test/test_conv.py
+++ b/test/test_conv.py
-# import pytest
+from itertools import product
-# import torch
-# from torch.autograd import Variable, gradcheck
+import pytest
-# from torch_spline_conv import spline_conv
+import torch
-# from torch_spline_conv.functions.spline_weighting import SplineWeighting
+from torch.autograd import Variable, gradcheck
-# from torch_spline_conv.functions.ffi import implemented_degrees
+from torch_spline_conv import spline_conv
+from torch_spline_conv.utils.ffi import implemented_degrees
-# from .utils import tensors, Tensor
+from .tensor import tensors
-# @pytest.mark.parametrize('tensor', tensors)
-# def test_spline_conv_cpu(tensor):
+tests = [{
-#     x = Tensor(tensor, [[9, 10], [1, 2], [3, 4], [5, 6], [7, 8]])
+    'src': [[9, 10], [1, 2], [3, 4], [5, 6], [7, 8]],
-#     edge_index = torch.LongTensor([[0, 0, 0, 0], [1, 2, 3, 4]])
+    'edge_index': [[0, 0, 0, 0], [1, 2, 3, 4]],
-#     pseudo = [[0.25, 0.125], [0.25, 0.375], [0.75, 0.625], [0.75, 0.875]]
+    'pseudo': [[0.25, 0.125], [0.25, 0.375], [0.75, 0.625], [0.75, 0.875]],
-#     pseudo = Tensor(tensor, pseudo)
+    'weight': [
-#    weight = torch.arange(0.5, 0.5 * 25, step=0.5, out=x.new()).view(12, 2, 1)
+        [[0.5], [1]],
-#     kernel_size = torch.LongTensor([3, 4])
+        [[1.5], [2]],
-#     is_open_spline = torch.ByteTensor([1, 0])
+        [[2.5], [3]],
-#     root_weight = torch.arange(12.5, 13.5, step=0.5, out=x.new()).view(2, 1)
+        [[3.5], [4]],
-#     bias = Tensor(tensor, [1])
+        [[4.5], [5]],
+        [[5.5], [6]],
-#     output = spline_conv(x, edge_index, pseudo, weight, kernel_size,
+        [[6.5], [7]],
-#                          is_open_spline, 1, root_weight, bias)
+        [[7.5], [8]],
+        [[8.5], [9]],
-#     edgewise_output = [
+        [[9.5], [10]],
-#         1 * 0.25 * (0.5 + 1.5 + 4.5 + 5.5) + 2 * 0.25 * (1 + 2 + 5 + 6),
+        [[10.5], [11]],
-#         3 * 0.25 * (1.5 + 2.5 + 5.5 + 6.5) + 4 * 0.25 * (2 + 3 + 6 + 7),
+        [[11.5], [12]],
-#         5 * 0.25 * (6.5 + 7.5 + 10.5 + 11.5) + 6 * 0.25 * (7 + 8 + 11 + 12),
+    ],
-#         7 * 0.25 * (7.5 + 4.5 + 11.5 + 8.5) + 8 * 0.25 * (8 + 5 + 12 + 9),
+    'kernel_size': [3, 4],
-#     ]
+    'is_open_spline': [1, 0],
+    'root_weight': [[12.5], [13]],
-#     expected_output = [
+    'bias': [1],
-#         [1 + 12.5 * 9 + 13 * 10 + sum(edgewise_output) / 4],
+    'output': [
-#         [1 + 12.5 * 1 + 13 * 2],
+        [1 + 12.5 * 9 + 13 * 10 + (8.5 + 40.5 + 107.5 + 101.5) / 4],
-#         [1 + 12.5 * 3 + 13 * 4],
+        [1 + 12.5 * 1 + 13 * 2],
-#         [1 + 12.5 * 5 + 13 * 6],
+        [1 + 12.5 * 3 + 13 * 4],
-#         [1 + 12.5 * 7 + 13 * 8],
+        [1 + 12.5 * 5 + 13 * 6],
-#     ]
+        [1 + 12.5 * 7 + 13 * 8],
+    ]
-#     assert output.tolist() == expected_output
+}]
-#     x, weight, pseudo = Variable(x), Variable(weight), Variable(pseudo)
-#     root_weight, bias = Variable(root_weight), Variable(bias)
+@pytest.mark.parametrize('tensor,i', product(tensors, range(len(tests))))
+def test_spline_conv_forward_cpu(tensor, i):
-#     output = spline_conv(x, edge_index, pseudo, weight, kernel_size,
+    data = tests[i]
-#                          is_open_spline, 1, root_weight, bias)
+    src = getattr(torch, tensor)(data['src'])
-#     assert output.data.tolist() == expected_output
+    edge_index = torch.LongTensor(data['edge_index'])
+    pseudo = getattr(torch, tensor)(data['pseudo'])
-# def test_spline_weighting_backward_cpu():
+    weight = getattr(torch, tensor)(data['weight'])
-#     for degree in implemented_degrees.keys():
+    kernel_size = torch.LongTensor(data['kernel_size'])
-#         kernel_size = torch.LongTensor([5, 5, 5])
+    is_open_spline = torch.ByteTensor(data['is_open_spline'])
-#         is_open_spline = torch.ByteTensor([1, 0, 1])
+    root_weight = getattr(torch, tensor)(data['root_weight'])
-#         op = SplineWeighting(kernel_size, is_open_spline, degree)
+    bias = getattr(torch, tensor)(data['bias'])
-#         x = torch.DoubleTensor(16, 2).uniform_(-1, 1)
+    output = spline_conv(src, edge_index, pseudo, weight, kernel_size,
-#         x = Variable(x, requires_grad=True)
+                         is_open_spline, 1, root_weight, bias)
-#         pseudo = torch.DoubleTensor(16, 3).uniform_(0, 1)
+    assert output.tolist() == data['output']
-#         pseudo = Variable(pseudo, requires_grad=True)
-#         weight = torch.DoubleTensor(25, 2, 4).uniform_(-1, 1)
-#         weight = Variable(weight, requires_grad=True)
+@pytest.mark.skipif(not torch.cuda.is_available(), reason='no CUDA')
+@pytest.mark.parametrize('tensor,i', product(tensors, range(len(tests))))
-#        assert gradcheck(op, (x, pseudo, weight), eps=1e-6, atol=1e-4) is True
+def test_spline_conv_forward_gpu(tensor, i):
+    data = tests[i]
-# @pytest.mark.skipif(not torch.cuda.is_available(), reason='no CUDA')
-# @pytest.mark.parametrize('tensor', tensors)
+    src = getattr(torch.cuda, tensor)(data['src'])
-# def test_spline_conv_gpu(tensor):  # pragma: no cover
+    edge_index = torch.cuda.LongTensor(data['edge_index'])
-#     x = Tensor(tensor, [[9, 10], [1, 2], [3, 4], [5, 6], [7, 8]])
+    pseudo = getattr(torch.cuda, tensor)(data['pseudo'])
-#     edge_index = torch.LongTensor([[0, 0, 0, 0], [1, 2, 3, 4]])
+    weight = getattr(torch.cuda, tensor)(data['weight'])
-#     pseudo = [[0.25, 0.125], [0.25, 0.375], [0.75, 0.625], [0.75, 0.875]]
+    kernel_size = torch.cuda.LongTensor(data['kernel_size'])
-#     pseudo = Tensor(tensor, pseudo)
+    is_open_spline = torch.cuda.ByteTensor(data['is_open_spline'])
-#    weight = torch.arange(0.5, 0.5 * 25, step=0.5, out=x.new()).view(12, 2, 1)
+    root_weight = getattr(torch.cuda, tensor)(data['root_weight'])
-#     kernel_size = torch.LongTensor([3, 4])
+    bias = getattr(torch.cuda, tensor)(data['bias'])
-#     is_open_spline = torch.ByteTensor([1, 0])
-#     root_weight = torch.arange(12.5, 13.5, step=0.5, out=x.new()).view(2, 1)
+    output = spline_conv(src, edge_index, pseudo, weight, kernel_size,
-#     bias = Tensor(tensor, [1])
+                         is_open_spline, 1, root_weight, bias)
+    assert output.cpu().tolist() == data['output']
-#     expected_output = spline_conv(x, edge_index, pseudo, weight, kernel_size,
-#                                   is_open_spline, 1, root_weight, bias)
+@pytest.mark.parametrize('degree', implemented_degrees.keys())
-#     x, edge_index, pseudo = x.cuda(), edge_index.cuda(), pseudo.cuda()
+def test_spline_basis_backward_cpu(degree):
-#     weight, kernel_size = weight.cuda(), kernel_size.cuda()
+    src = torch.DoubleTensor(3, 2).uniform_(-1, 1)
-#     is_open_spline, root_weight = is_open_spline.cuda(), root_weight.cuda()
+    edge_index = torch.LongTensor([[0, 1, 1, 2], [1, 0, 2, 1]])
-#     bias = bias.cuda()
+    pseudo = torch.DoubleTensor(4, 3).uniform_(0, 1)
+    weight = torch.DoubleTensor(125, 2, 4).uniform_(-1, 1)
-#     output = spline_conv(x, edge_index, pseudo, weight, kernel_size,
+    kernel_size = torch.LongTensor([5, 5, 5])
-#                          is_open_spline, 1, root_weight, bias)
+    is_open_spline = torch.ByteTensor([1, 0, 1])
-#     assert output.cpu().tolist() == expected_output.tolist()
+    root_weight = torch.DoubleTensor(2, 4).uniform_(-1, 1)
+    bias = torch.DoubleTensor(4).uniform_(-1, 1)
-#     x, weight, pseudo = Variable(x), Variable(weight), Variable(pseudo)
-#     root_weight, bias = Variable(root_weight), Variable(bias)
+    src = Variable(src, requires_grad=True)
+    pseudo = Variable(pseudo, requires_grad=True)
-#     output = spline_conv(x, edge_index, pseudo, weight, kernel_size,
+    weight = Variable(weight, requires_grad=True)
-#                          is_open_spline, 1, root_weight, bias)
+    root_weight = Variable(root_weight, requires_grad=True)
+    bias = Variable(bias, requires_grad=True)
-#     assert output.data.cpu().tolist() == expected_output.tolist()
+    def op(src, pseudo, weight, root_weight, bias):
-# @pytest.mark.skipif(not torch.cuda.is_available(), reason='no CUDA')
+        return spline_conv(src, edge_index, pseudo, weight, kernel_size,
-# def test_spline_weighting_backward_gpu():  # pragma: no cover
+                           is_open_spline, degree, root_weight, bias)
-#     for degree in implemented_degrees.keys():
-#         kernel_size = torch.cuda.LongTensor([5, 5, 5])
+    data = (src, pseudo, weight, root_weight, bias)
-#         is_open_spline = torch.cuda.ByteTensor([1, 0, 1])
+    assert gradcheck(op, data, eps=1e-6, atol=1e-4) is True
-#         op = SplineWeighting(kernel_size, is_open_spline, degree)
-#         x = torch.cuda.DoubleTensor(16, 2).uniform_(-1, 1)
+@pytest.mark.skipif(not torch.cuda.is_available(), reason='no CUDA')
-#         x = Variable(x, requires_grad=True)
+@pytest.mark.parametrize('degree', [2])
-#         pseudo = torch.cuda.DoubleTensor(16, 3).uniform_(0, 1)
+def test_spline_basis_backward_gpu(degree):
-#         pseudo = Variable(pseudo, requires_grad=False)  # TODO
+    src = torch.cuda.DoubleTensor(3, 2).uniform_(-1, 1)
-#         weight = torch.cuda.DoubleTensor(25, 2, 4).uniform_(-1, 1)
+    edge_index = torch.cuda.LongTensor([[0, 1, 1, 2], [1, 0, 2, 1]])
-#         weight = Variable(weight, requires_grad=True)
+    pseudo = torch.cuda.DoubleTensor(4, 3).uniform_(0, 1)
+    weight = torch.cuda.DoubleTensor(125, 2, 4).uniform_(-1, 1)
-#        assert gradcheck(op, (x, pseudo, weight), eps=1e-6, atol=1e-4) is True
+    kernel_size = torch.cuda.LongTensor([5, 5, 5])
+    is_open_spline = torch.cuda.ByteTensor([1, 0, 1])
+    root_weight = torch.cuda.DoubleTensor(2, 4).uniform_(-1, 1)
+    bias = torch.cuda.DoubleTensor(4).uniform_(-1, 1)
+    src = Variable(src, requires_grad=False)
+    pseudo = Variable(pseudo, requires_grad=True)
+    weight = Variable(weight, requires_grad=False)
+    root_weight = Variable(root_weight, requires_grad=False)
+    bias = Variable(bias, requires_grad=False)
+    def op(src, pseudo, weight, root_weight, bias):
+        return spline_conv(src, edge_index, pseudo, weight, kernel_size,
+                           is_open_spline, degree, root_weight, bias)
+    data = (src, pseudo, weight, root_weight, bias)
+    assert gradcheck(op, data, eps=1e-6, atol=1e-4) is True
--- a/test/test_weighting.py
+++ b/test/test_weighting.py
@@ -58,9 +58,10 @@ def test_spline_basis_backward_cpu():
    src = Variable(src, requires_grad=True)
    weight = Variable(weight, requires_grad=True)
    basis = Variable(basis, requires_grad=True)
+    weight_index = Variable(weight_index, requires_grad=False)
-    op = SplineWeighting(weight_index)
+    data = (src, weight, basis, weight_index)
-    assert gradcheck(op, (src, weight, basis), eps=1e-6, atol=1e-4) is True
+    assert gradcheck(SplineWeighting(), data, eps=1e-6, atol=1e-4) is True
 @pytest.mark.skipif(not torch.cuda.is_available(), reason='no CUDA')
@@ -73,8 +74,9 @@ def test_spline_basis_backward_gpu():
    basis, weight_index = spline_basis(1, pseudo, kernel_size, is_open_spline)
    src = Variable(src, requires_grad=True)
-    weight = Variable(weight, requires_grad=False)
+    weight = Variable(weight, requires_grad=True)
-    basis = Variable(basis, requires_grad=False)
+    basis = Variable(basis, requires_grad=True)
+    weight_index = Variable(weight_index, requires_grad=False)
-    op = SplineWeighting(weight_index)
+    data = (src, weight, basis, weight_index)
-    assert gradcheck(op, (src, weight, basis), eps=1e-6, atol=1e-4) is True
+    assert gradcheck(SplineWeighting(), data, eps=1e-6, atol=1e-4) is True
--- a/torch_spline_conv/conv.py
+++ b/torch_spline_conv/conv.py
 import torch
+from torch.autograd import Variable
 from .basis import spline_basis
 from .weighting import spline_weighting
@@ -23,24 +24,24 @@ def spline_conv(src,
    B-Spline tensor product basis for a single input feature map :math:`l`.
    Args:
-        src (Tensor): Input node features of shape (number_of_nodes x
+        src (Tensor or Variable): Input node features of shape
-            in_channels)
+            (number_of_nodes x in_channels)
        edge_idex (LongTensor): Graph edges, given by source and target
            indices, of shape (2 x number_of_edges)
-        pseudo (Tensor): Edge attributes, ie. pseudo coordinates, of shape
+        pseudo (Tensor or Variable): Edge attributes, ie. pseudo coordinates,
-            (number_of_edges x number_of_edge_attributes)
+            of shape (number_of_edges x number_of_edge_attributes)
-        weight (Tensor): Trainable weight parameters of shape (kernel_size x
+        weight (Tensor or Variable): Trainable weight parameters of shape
-            in_channels x out_channels)
+            (kernel_size x in_channels x out_channels)
        kernel_size (LongTensor): Number of trainable weight parameters in each
            edge dimension
        is_open_spline (ByteTensor): Whether to use open or closed B-spline
            bases for each dimension
        degree (int): B-spline basis degree (default: :obj:`1`)
-        root_weight (Tensor): Additional shared trainable parameters for each
+        root_weight (Tensor or Variable): Additional shared trainable
-            feature of the root node of shape (in_channels x out_channels)
+            parameters for each feature of the root node of shape
+            (in_channels x out_channels) (default: :obj:`None`)
+        bias (Tensor or Variable): Optional bias of shape (out_channels)
            (default: :obj:`None`)
-        bias (Tensor): Optional bias of shape (out_channels) (default:
-            :obj:`None`)
    """
    src = src.unsqueeze(-1) if src.dim() == 1 else src
@@ -57,10 +58,12 @@ def spline_conv(src,
    # Perform the real convolution => Convert e x m_out to n x m_out features.
    zero = new(src, n, m_out).fill_(0)
    row_expand = row.unsqueeze(-1).expand(e, m_out)
+    row_expand = row_expand if torch.is_tensor(src) else Variable(row_expand)
    output = zero.scatter_add_(0, row_expand, output)
    # Normalize output by node degree.
-    degree = node_degree(row, n, out=new(src))
+    index = row if torch.is_tensor(src) else Variable(row)
+    degree = node_degree(index, n, out=new(src))
    output /= degree.unsqueeze(-1).clamp_(min=1)
    # Weight root node separately (if wished).

--- a/torch_spline_conv/functions/__init__.py
+++ b/torch_spline_conv/functions/__init__.py
--- a/torch_spline_conv/functions/degree.py
+++ b/torch_spline_conv/functions/degree.py
-import torch
-def node_degree(edge_index, num_nodes, out=None):
-    zero = torch.zeros(num_nodes, out=out)
-    one = torch.ones(edge_index.size(1), out=zero.new())
-    return zero.scatter_add_(0, edge_index[0], one)
--- a/torch_spline_conv/functions/ffi.py
+++ b/torch_spline_conv/functions/ffi.py
-from .._ext import ffi as ext
-implemented_degrees = {1: 'linear', 2: 'quadratic', 3: 'cubic'}
-def get_degree_str(degree):
-    degree = implemented_degrees.get(degree)
-    assert degree is not None, (
-        'No implementation found for specified B-spline degree')
-    return degree
-def get_func(name, tensor):
-    typename = type(tensor).__name__.replace('Tensor', '')
-    cuda = 'cuda_' if tensor.is_cuda else ''
-    func = getattr(ext, 'spline_{}_{}{}'.format(name, cuda, typename))
-    return func
-def spline_basis_forward(degree, pseudo, kernel_size, is_open_spline, K):
-    s = (degree + 1)**kernel_size.size(0)
-    basis = pseudo.new(pseudo.size(0), s)
-    weight_index = kernel_size.new(pseudo.size(0), s)
-    func = get_func('{}_basis_forward'.format(get_degree_str(degree)), pseudo)
-    func(basis, weight_index, pseudo, kernel_size, is_open_spline, K)
-    return basis, weight_index
-def spline_basis_backward(degree, grad_basis, pseudo, kernel_size,
-                          is_open_spline):  # pragma: no cover
-    grad_pseudo = pseudo.new(pseudo.size())
-    func = get_func('{}_basis_backward'.format(get_degree_str(degree)), pseudo)
-    func(grad_pseudo, grad_basis, pseudo, kernel_size, is_open_spline)
-    return grad_pseudo
-def spline_weighting_forward(x, weight, basis, weight_index):
-    output = x.new(x.size(0), weight.size(2))
-    func = get_func('weighting_forward', x)
-    func(output, x, weight, basis, weight_index)
-    return output
-def spline_weighting_backward_input(grad_output, weight, basis,
-                                    weight_index):  # pragma: no cover
-    grad_input = grad_output.new(grad_output.size(0), weight.size(1))
-    func = get_func('weighting_backward_input', grad_output)
-    # Transpose for coalesced memory access on GPU.
-    weight = weight.transpose(1, 2).contiguous()
-    func(grad_input, grad_output, weight, basis, weight_index)
-    return grad_input
-def spline_weighting_backward_basis(grad_output, x, weight,
-                                    weight_index):  # pragma: no cover
-    grad_basis = x.new(weight_index.size()).fill_(0)
-    func = get_func('weighting_backward_basis', x)
-    func(grad_basis, grad_output, x, weight, weight_index)
-    return grad_basis
-def spline_weighting_backward_weight(grad_output, x, basis, weight_index,
-                                     K):  # pragma: no cover
-    grad_weight = x.new(K, x.size(1), grad_output.size(1)).fill_(0)
-    func = get_func('weighting_backward_weight', x)
-    func(grad_weight, grad_output, x, basis, weight_index)
-    return grad_weight
--- a/torch_spline_conv/functions/spline_conv.py
+++ b/torch_spline_conv/functions/spline_conv.py
-import torch
-from torch.autograd import Variable as Var
-from .degree import node_degree
-from .spline_weighting import spline_weighting
-def spline_conv(x,
-                edge_index,
-                pseudo,
-                weight,
-                kernel_size,
-                is_open_spline,
-                degree=1,
-                root_weight=None,
-                bias=None):
-    n, e, m_out = x.size(0), edge_index.size(1), weight.size(2)
-    x = x.unsqueeze(-1) if x.dim() == 1 else x
-    pseudo = pseudo.unsqueeze(-1) if pseudo.dim() == 1 else pseudo
-    # Convolve over each node.
-    output = spline_weighting(x[edge_index[1]], pseudo, weight, kernel_size,
-                              is_open_spline, degree)
-    # Perform the real convolution => Convert e x m_out to n x m_out features.
-    row = edge_index[0].unsqueeze(-1).expand(e, m_out)
-    row = row if torch.is_tensor(x) else Var(row)
-    zero = x.new(n, m_out) if torch.is_tensor(x) else Var(x.data.new(n, m_out))
-    output = zero.fill_(0).scatter_add_(0, row, output)
-    # Compute degree.
-    degree = x.new() if torch.is_tensor(x) else x.data.new()
-    degree = node_degree(edge_index, n, out=degree)
-    # Normalize output by node degree.
-    degree = degree.unsqueeze(-1).clamp_(min=1)
-    output /= degree if torch.is_tensor(x) else Var(degree)
-    # Weight root node separately (if wished).
-    if root_weight is not None:
-        output += torch.mm(x, root_weight)
-    # Add bias (if wished).
-    if bias is not None:
-        output += bias
-    return output
--- a/torch_spline_conv/functions/spline_weighting.py
+++ b/torch_spline_conv/functions/spline_weighting.py
-import torch
-from torch.autograd import Function
-from .ffi import (
-    spline_basis_forward,
-    spline_basis_backward,
-    spline_weighting_forward,
-    spline_weighting_backward_input,
-    spline_weighting_backward_basis,
-    spline_weighting_backward_weight,
-)
-class SplineWeighting(Function):
-    def __init__(self, kernel_size, is_open_spline, degree):
-        super(SplineWeighting, self).__init__()
-        self.kernel_size = kernel_size
-        self.is_open_spline = is_open_spline
-        self.degree = degree
-    def forward(self, x, pseudo, weight):
-        K = weight.size(0)
-        basis, weight_index = spline_basis_forward(
-            self.degree, pseudo, self.kernel_size, self.is_open_spline, K)
-        output = spline_weighting_forward(x, weight, basis, weight_index)
-        self.save_for_backward(x, pseudo, weight)
-        self.basis, self.weight_index = basis, weight_index
-        return output
-    def backward(self, grad_output):  # pragma: no cover
-        x, pseudo, weight = self.saved_tensors
-        basis, weight_index = self.basis, self.weight_index
-        grad_input, grad_pseudo, grad_weight = None, None, None
-        if self.needs_input_grad[0]:
-            grad_input = spline_weighting_backward_input(
-                grad_output, weight, basis, weight_index)
-        if self.needs_input_grad[1]:
-            grad_basis = spline_weighting_backward_basis(
-                grad_output, x, weight, weight_index)
-            grad_pseudo = spline_basis_backward(self.degree, grad_basis,
-                                                pseudo, self.kernel_size,
-                                                self.is_open_spline)
-        if self.needs_input_grad[2]:
-            K = weight.size(0)
-            grad_weight = spline_weighting_backward_weight(
-                grad_output, x, basis, weight_index, K)
-        return grad_input, grad_pseudo, grad_weight
-def spline_weighting(x, pseudo, weight, kernel_size, is_open_spline, degree):
-    if torch.is_tensor(x):
-        K = weight.size(0)
-        basis, weight_index = spline_basis_forward(degree, pseudo, kernel_size,
-                                                   is_open_spline, K)
-        return spline_weighting_forward(x, weight, basis, weight_index)
-    else:
-        op = SplineWeighting(kernel_size, is_open_spline, degree)
-        return op(x, pseudo, weight)
--- a/torch_spline_conv/kernel/THCAtomics.cuh
+++ b/torch_spline_conv/kernel/THCAtomics.cuh
-#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600 || CUDA_VERSION < 8000)
-static inline  __device__  void atomicAdd(double *address, double val) {
-  unsigned long long int* address_as_ull = (unsigned long long int*)address;
-  unsigned long long int old = *address_as_ull;
-  unsigned long long int assumed;
-  do {
-    assumed = old;
-    old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
-  } while (assumed != old);
-}
-#elif !defined(__CUDA_ARCH__) && (CUDA_VERSION < 8000)
-  static inline  __device__  void atomicAdd(double *address, double val) {}
-#endif
--- a/torch_spline_conv/kernel/THCBasisForward.cuh
+++ b/torch_spline_conv/kernel/THCBasisForward.cuh
-#define SPLINE_BASIS_FORWARD(NAME, basis, weight_index, pseudo, kernel_size, is_open_spline, K) { \
-  THCAssertSameGPU(THCTensor_(checkGPU)(state, 3, pseudo, kernel_size, is_open_spline)); \
-\
-  const int n = THCTensor_(nElement)(state, basis); \
-  TensorInfo<real> basisInfo = thc_(getTensorInfo)(state, basis); \
-  TensorInfo<int64_t> weightIndexInfo = thc_getTensorInfo_Long(state, weight_index); \
-  TensorInfo<real> pseudoInfo = thc_(getTensorInfo)(state, pseudo); \
-  int64_t *kernelSizeData = THCudaLongTensor_data(state, kernel_size); \
-  uint8_t *isOpenSplineData = THCudaByteTensor_data(state, is_open_spline); \
-\
-  KERNEL_D_RUN(NAME, pseudoInfo.size[1], n, basisInfo, weightIndexInfo, pseudoInfo, kernelSizeData, isOpenSplineData, K) \
-}
-#define COMPUTE_SPLINE_BASIS_FORWARD(M, D, basis, weightIndex, pseudo, kernelSize, isOpenSpline, K, CODE) { \
-  int64_t k = i % basis.size[1]; \
-  int64_t pseudoOffset = ((i / basis.size[1]) % pseudo.size[0]) * pseudo.stride[0]; \
-  int64_t d, k_mod, wi = 0, offset = K; Real b = 1, value; \
-  for (d = 0; d < D; d++) { \
-    offset /= kernelSize[d]; \
-    k_mod = k % (M + 1); \
-    k /= M + 1; \
-    value = pseudo.data[pseudoOffset + d * pseudo.stride[1]] * (kernelSize[d] - M * isOpenSpline[d]); \
-    wi += (((int64_t) value + k_mod) % kernelSize[d]) * offset; \
-    value -= floor(value); \
-    CODE \
-    b *= value; \
-  } \
-  basis.data[i] = b; \
-  weightIndex.data[i] = wi; \
-}
-template<typename Real, int D>
-struct SplineBasisForward {
-  static __device__ void linear(int i, const TensorInfo<Real>& basis, const TensorInfo<int64_t>& weightIndex, const TensorInfo<Real>& pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, int K) {
-    COMPUTE_SPLINE_BASIS_FORWARD(1, D, basis, weightIndex, pseudo, kernelSize, isOpenSpline, K,
-      value = 1 - value - k_mod + 2 * value * k_mod;
-    )
-  }
-  static __device__ void quadratic(int i, const TensorInfo<Real>& basis, const TensorInfo<int64_t>& weightIndex, const TensorInfo<Real>& pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, int K) {
-    COMPUTE_SPLINE_BASIS_FORWARD(2, D, basis, weightIndex, pseudo, kernelSize, isOpenSpline, K,
-      if (k_mod == 0) value = 0.5 * value * value - value + 0.5;
-      else if (k_mod == 1) value = -value * value + value + 0.5;
-      else value = 0.5 * value * value;
-    )
-  }
-  static __device__ void cubic(int i, const TensorInfo<Real>& basis, const TensorInfo<int64_t>& weightIndex, const TensorInfo<Real>& pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, int K) {
-    COMPUTE_SPLINE_BASIS_FORWARD(3, D, basis, weightIndex, pseudo, kernelSize, isOpenSpline, K,
-      if (k_mod == 0) { value = (1 - value); value = value * value * value / 6.0; }
-      else if (k_mod == 1) value = (3 * value * value * value - 6 * value * value + 4) / 6;
-      else if (k_mod == 2) value = (-3 * value * value * value + 3 * value * value + 3 * value + 1) / 6;
-      else value = value * value * value / 6;
-    )
-  }
-};
-template<typename Real>
-struct SplineBasisForward<Real, -1> {
-  static __device__ void linear(int i, const TensorInfo<Real>& basis, const TensorInfo<int64_t>& weightIndex, const TensorInfo<Real>& pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, int K) {
-    COMPUTE_SPLINE_BASIS_FORWARD(1, pseudo.size[1], basis, weightIndex, pseudo, kernelSize, isOpenSpline, K,
-      value = 1 - value - k_mod + 2 * value * k_mod;
-    )
-  }
-  static __device__ void quadratic(int i, const TensorInfo<Real>& basis, const TensorInfo<int64_t>& weightIndex, const TensorInfo<Real>& pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, int K) {
-    COMPUTE_SPLINE_BASIS_FORWARD(2, pseudo.size[1], basis, weightIndex, pseudo, kernelSize, isOpenSpline, K,
-      if (k_mod == 0) value = 0.5 * value * value - value + 0.5;
-      else if (k_mod == 1) value = -value * value + value + 0.5;
-      else value = 0.5 * value * value;
-    )
-  }
-  static __device__ void cubic(int i, const TensorInfo<Real>& basis, const TensorInfo<int64_t>& weightIndex, const TensorInfo<Real>& pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, int K) {
-    COMPUTE_SPLINE_BASIS_FORWARD(3, pseudo.size[1], basis, weightIndex, pseudo, kernelSize, isOpenSpline, K,
-      if (k_mod == 0) { value = (1 - value); value = value * value * value / 6.0; }
-      else if (k_mod == 1) value = (3 * value * value * value - 6 * value * value + 4) / 6;
-      else if (k_mod == 2) value = (-3 * value * value * value + 3 * value * value + 3 * value + 1) / 6;
-      else value = value * value * value / 6;
-    )
-  }
-};
-template<typename Real, int D>
-__global__ void linearBasisForwardKernel(TensorInfo<Real> basis, TensorInfo<int64_t> weightIndex, TensorInfo<Real> pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, int K, int n) {
-  KERNEL_LOOP(i, n) {
-    SplineBasisForward<Real, D>::linear(i, basis, weightIndex, pseudo, kernelSize, isOpenSpline, K);
-  }
-}
-template<typename Real, int D>
-__global__ void quadraticBasisForwardKernel(TensorInfo<Real> basis, TensorInfo<int64_t> weightIndex, TensorInfo<Real> pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, int K, int n) {
-  KERNEL_LOOP(i, n) {
-    SplineBasisForward<Real, D>::quadratic(i, basis, weightIndex, pseudo, kernelSize, isOpenSpline, K);
-  }
-}
-template<typename Real, int D>
-__global__ void cubicBasisForwardKernel(TensorInfo<Real> basis, TensorInfo<int64_t> weightIndex, TensorInfo<Real> pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, int K, int n) {
-  KERNEL_LOOP(i, n) {
-    SplineBasisForward<Real, D>::cubic(i, basis, weightIndex, pseudo, kernelSize, isOpenSpline, K);
-  }
-}
--- a/torch_spline_conv/kernel/common.cuh
+++ b/torch_spline_conv/kernel/common.cuh
-const int MAX_DIMS = 25;
-const int NUM_THREADS = 1024;
-inline int GET_BLOCKS(const int n) {
-  return (n + NUM_THREADS - 1) / NUM_THREADS;
-}
-template<typename T>
-struct TensorInfo {
-  TensorInfo(T *t, int d, int sz[MAX_DIMS], int st[MAX_DIMS]) {
-    data = t; dims = d;
-    for (int i = 0; i < dims; i++) {
-      size[i] = sz[i];
-      stride[i] = st[i];
-    }
-  }
-  T *data;
-  int dims;
-  int size[MAX_DIMS];
-  int stride[MAX_DIMS];
-};
-#define KERNEL_LOOP(I, N) \
-  for (int I = blockIdx.x * blockDim.x + threadIdx.x; I < N; i += blockDim.x * gridDim.x)
-#define KERNEL_RUN(NAME, N, ...) { \
-  int grid = GET_BLOCKS(N); \
-  cudaStream_t stream = THCState_getCurrentStream(state); \
-  NAME<real><<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); \
-  THCudaCheck(cudaGetLastError()); \
-}
-#define KERNEL_D_RUN(NAME, D, N, ...) { \
-  int grid = GET_BLOCKS(N); \
-  cudaStream_t stream = THCState_getCurrentStream(state); \
-  switch (D) { \
-    case  1: NAME<real,  1><<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); break; \
-    case  2: NAME<real,  2><<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); break; \
-    case  3: NAME<real,  3><<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); break; \
-    case  4: NAME<real,  4><<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); break; \
-    default: NAME<real, -1><<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); break; \
-  } \
-  THCudaCheck(cudaGetLastError()); \
-}
--- a/torch_spline_conv/kernel/generic/common.cu
+++ b/torch_spline_conv/kernel/generic/common.cu
-#ifndef THC_GENERIC_FILE
-#define THC_GENERIC_FILE "generic/common.cu"
-#else
-TensorInfo<real> thc_(getTensorInfo)(THCState *state, THCTensor *tensor) {
-  real *data = THCTensor_(data)(state, tensor);
-  int dims = THCTensor_(nDimension)(state, tensor);
-  int size[MAX_DIMS]; int stride[MAX_DIMS];
-  for (int i = 0; i < dims; i++) {
-    size[i] = THCTensor_(size)(state, tensor, i);
-    stride[i] = THCTensor_(stride)(state, tensor, i);
-  }
-  return TensorInfo<real>(data, dims, size, stride);
-}
-#endif
--- a/torch_spline_conv/kernel/generic/kernel.cu
+++ b/torch_spline_conv/kernel/generic/kernel.cu
-#ifndef THC_GENERIC_FILE
-#define THC_GENERIC_FILE "generic/kernel.cu"
-#else
-void spline_(linear_basis_forward)(THCState *state, THCTensor *basis, THCudaLongTensor *weight_index, THCTensor *pseudo, THCudaLongTensor *kernel_size, THCudaByteTensor *is_open_spline, int K) {
-  SPLINE_BASIS_FORWARD(linearBasisForwardKernel, basis, weight_index, pseudo, kernel_size, is_open_spline, K)
-}
-void spline_(quadratic_basis_forward)(THCState *state, THCTensor *basis, THCudaLongTensor *weight_index, THCTensor *pseudo, THCudaLongTensor *kernel_size, THCudaByteTensor *is_open_spline, int K) {
-  SPLINE_BASIS_FORWARD(quadraticBasisForwardKernel, basis, weight_index, pseudo, kernel_size, is_open_spline, K)
-}
-void spline_(cubic_basis_forward)(THCState *state, THCTensor *basis, THCudaLongTensor *weight_index, THCTensor *pseudo, THCudaLongTensor *kernel_size, THCudaByteTensor *is_open_spline, int K) {
-  SPLINE_BASIS_FORWARD(cubicBasisForwardKernel, basis, weight_index, pseudo, kernel_size, is_open_spline, K)
-}
-void spline_(weighting_forward)(THCState *state, THCTensor *output, THCTensor *input, THCTensor *weight, THCTensor *basis, THCudaLongTensor *weight_index) {
-  THCAssertSameGPU(THCTensor_(checkGPU)(state, 4, input, weight, basis, weight_index));
-  TensorInfo<real> outputInfo = thc_(getTensorInfo)(state, output);
-  TensorInfo<real> inputInfo = thc_(getTensorInfo)(state, input);
-  TensorInfo<real> weightInfo = thc_(getTensorInfo)(state, weight);
-  TensorInfo<real> basisInfo = thc_(getTensorInfo)(state, basis);
-  TensorInfo<int64_t> weightIndexInfo = thc_getTensorInfo_Long(state, weight_index);
-  KERNEL_RUN(weightingForwardKernel, THCTensor_(nElement)(state, output), outputInfo, inputInfo, weightInfo, basisInfo, weightIndexInfo)
-}
-void spline_(weighting_backward_input)(THCState *state, THCTensor *grad_input, THCTensor *grad_output, THCTensor *weight, THCTensor *basis, THCudaLongTensor *weight_index) {
-  TensorInfo<real> gradInputInfo = thc_(getTensorInfo)(state, grad_input);
-  TensorInfo<real> gradOutputInfo = thc_(getTensorInfo)(state, grad_output);
-  TensorInfo<real> weightInfo = thc_(getTensorInfo)(state, weight);
-  TensorInfo<real> basisInfo = thc_(getTensorInfo)(state, basis);
-  TensorInfo<int64_t> weightIndexInfo = thc_getTensorInfo_Long(state, weight_index);
-  KERNEL_RUN(weightingBackwardInputKernel, THCTensor_(nElement)(state, grad_input), gradInputInfo, gradOutputInfo, weightInfo, basisInfo, weightIndexInfo)
-}
-void spline_(weighting_backward_weight)(THCState *state, THCTensor *grad_weight, THCTensor *grad_output, THCTensor *input, THCTensor *basis, THCudaLongTensor *weight_index) {
-  TensorInfo<real> gradWeightInfo = thc_(getTensorInfo)(state, grad_weight);
-  TensorInfo<real> gradOutputInfo = thc_(getTensorInfo)(state, grad_output);
-  TensorInfo<real> inputInfo = thc_(getTensorInfo)(state, input);
-  TensorInfo<real> basisInfo = thc_(getTensorInfo)(state, basis);
-  TensorInfo<int64_t> weightIndexInfo = thc_getTensorInfo_Long(state, weight_index);
-  KERNEL_RUN(weightingBackwardWeightKernel, THCTensor_(nElement)(state, grad_output), gradWeightInfo, gradOutputInfo, inputInfo, basisInfo, weightIndexInfo)
-}
-#endif
--- a/torch_spline_conv/kernel/kernel.cu
+++ b/torch_spline_conv/kernel/kernel.cu
-#include <THC.h>
-#include "kernel.h"
-#include "common.cuh"
-#include "THCBasisForward.cuh"
-#include "THCAtomics.cuh"
-#define spline_(NAME) TH_CONCAT_4(spline_, NAME, _kernel_, Real)
-#define thc_(NAME) TH_CONCAT_4(thc_, NAME, _, Real)
-#include "generic/common.cu"
-#include "THCGenerateAllTypes.h"
-template<typename Real>
-__global__ void weightingForwardKernel(TensorInfo<Real> output, TensorInfo<Real> input, TensorInfo<Real> weight, TensorInfo<Real> basis, TensorInfo<int64_t> weightIndex, int n) {
-  KERNEL_LOOP(i, n) {
-    int64_t edgeOffset = i / output.size[1], inputOffset = edgeOffset * input.stride[0];
-    int64_t s, S = basis.size[1], m_in, M_in = input.size[1], m_out = i % output.size[1], M_out = output.size[1], weightOffset;
-    Real value = 0; Real b;
-    for (s = 0; s < S; s++) {
-      b = basis.data[edgeOffset * S + s];
-      weightOffset = weightIndex.data[edgeOffset * S + s] * M_in * M_out + m_out;
-      for (m_in = 0; m_in < M_in; m_in++) {
-        value += weight.data[weightOffset + m_in * M_out] * input.data[inputOffset + m_in * input.stride[1]] * b;
-      }
-    }
-    output.data[i] = value;
-  }
-}
-template<typename Real>
-__global__ void weightingBackwardInputKernel(TensorInfo<Real> gradInput, TensorInfo<Real> gradOutput, TensorInfo<Real> weight, TensorInfo<Real> basis, TensorInfo<int64_t> weightIndex, int n) {
-  KERNEL_LOOP(i, n) {
-    int64_t edgeOffset = i / gradInput.size[1], gradOutputOffset = edgeOffset * gradOutput.stride[0];
-    int64_t s, S = basis.size[1], m_in = i % gradInput.size[1], M_in = gradInput.size[1], m_out, M_out = gradOutput.size[1], weightOffset;
-    Real value = 0; Real b;
-    for (s = 0; s < S; s++) {
-      b = basis.data[edgeOffset * S + s];
-      weightOffset = weightIndex.data[edgeOffset * S + s] * M_in * M_out + m_in;
-      for (m_out = 0; m_out < M_out; m_out++) {
-        value += weight.data[weightOffset + M_in * m_out] * gradOutput.data[gradOutputOffset + m_out] * b;
-      }
-    }
-    gradInput.data[i] = value;
-  }
-}
-template<typename Real>
-__global__ void weightingBackwardWeightKernel(TensorInfo<Real> gradWeight, TensorInfo<Real> gradOutput, TensorInfo<Real> input, TensorInfo<Real> basis, TensorInfo<int64_t> weightIndex, int n) {
-  KERNEL_LOOP(i, n) {
-    int64_t edgeOffset = i / gradOutput.size[1], inputOffset = edgeOffset * input.stride[0];
-    int64_t s, S = basis.size[1];
-    int64_t m_in, M_in = input.size[1];
-    int64_t m_out = i % gradOutput.size[1], M_out = gradOutput.size[1];
-    int64_t weightOffset;
-    Real b;
-    Real value = gradOutput.data[edgeOffset * M_out + m_out];
-    for (s = 0; s < S; s++) {
-      b = basis.data[edgeOffset * S + s];
-      weightOffset = weightIndex.data[edgeOffset * S + s] * M_in * M_out + m_out;
-      for (m_in = 0; m_in < M_in; m_in++) {
-        atomicAdd(&gradWeight.data[weightOffset + m_in * M_out], b * value * input.data[inputOffset + m_in * input.stride[1]]);
-      }
-    }
-  }
-}
-#include "generic/kernel.cu"
-#include "THCGenerateFloatType.h"
-#include "generic/kernel.cu"
-#include "THCGenerateDoubleType.h"
--- a/torch_spline_conv/kernel/kernel.h
+++ b/torch_spline_conv/kernel/kernel.h
-#ifdef __cplusplus
-extern "C" {
-#endif
-void    spline_linear_basis_forward_kernel_Float (THCState *state,      THCudaTensor *basis, THCudaLongTensor *weight_index,       THCudaTensor *pseudo, THCudaLongTensor *kernel_size, THCudaByteTensor *is_open_spline, int K);
-void    spline_linear_basis_forward_kernel_Double(THCState *state, THCudaDoubleTensor *basis, THCudaLongTensor *weight_index, THCudaDoubleTensor *pseudo, THCudaLongTensor *kernel_size, THCudaByteTensor *is_open_spline, int K);
-void spline_quadratic_basis_forward_kernel_Float (THCState *state,       THCudaTensor *basis, THCudaLongTensor *weight_index,       THCudaTensor *pseudo, THCudaLongTensor *kernel_size, THCudaByteTensor *is_open_spline, int K);
-void spline_quadratic_basis_forward_kernel_Double(THCState *state, THCudaDoubleTensor *basis, THCudaLongTensor *weight_index, THCudaDoubleTensor *pseudo, THCudaLongTensor *kernel_size, THCudaByteTensor *is_open_spline, int K);
-void     spline_cubic_basis_forward_kernel_Float (THCState *state,       THCudaTensor *basis, THCudaLongTensor *weight_index,       THCudaTensor *pseudo, THCudaLongTensor *kernel_size, THCudaByteTensor *is_open_spline, int K);
-void     spline_cubic_basis_forward_kernel_Double(THCState *state, THCudaDoubleTensor *basis, THCudaLongTensor *weight_index, THCudaDoubleTensor *pseudo, THCudaLongTensor *kernel_size, THCudaByteTensor *is_open_spline, int K);
-void spline_weighting_forward_kernel_Float (THCState *state,       THCudaTensor *output,       THCudaTensor *input,       THCudaTensor *weight,       THCudaTensor *basis, THCudaLongTensor *weight_index);
-void spline_weighting_forward_kernel_Double(THCState *state, THCudaDoubleTensor *output, THCudaDoubleTensor *input, THCudaDoubleTensor *weight, THCudaDoubleTensor *basis, THCudaLongTensor *weight_index);
-void spline_weighting_backward_input_kernel_Float (THCState *state,       THCudaTensor *grad_input,       THCudaTensor *grad_output,       THCudaTensor *weight,       THCudaTensor *basis, THCudaLongTensor *weight_index);
-void spline_weighting_backward_input_kernel_Double(THCState *state, THCudaDoubleTensor *grad_input, THCudaDoubleTensor *grad_output, THCudaDoubleTensor *weight, THCudaDoubleTensor *basis, THCudaLongTensor *weight_index);
-void spline_weighting_backward_weight_kernel_Float (THCState *state,       THCudaTensor *grad_weight,       THCudaTensor *grad_output,       THCudaTensor *input,       THCudaTensor *basis, THCudaLongTensor *weight_index);
-void spline_weighting_backward_weight_kernel_Double(THCState *state, THCudaDoubleTensor *grad_weight, THCudaDoubleTensor *grad_output, THCudaDoubleTensor *input, THCudaDoubleTensor *basis, THCudaLongTensor *weight_index);
-#ifdef __cplusplus
-}
-#endif
--- a/torch_spline_conv/src/THTensorDimApply.h
+++ b/torch_spline_conv/src/THTensorDimApply.h
-#define TH_TENSOR_DIM_APPLY4(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, TYPE4, TENSOR4, DIMENSION, CODE) { \
-  TYPE1 *TENSOR1##_data = NULL; \
-  int64_t TENSOR1##_stride = 0, TENSOR1##_size = 0; \
-  TYPE2 *TENSOR2##_data = NULL; \
-  int64_t TENSOR2##_stride = 0, TENSOR2##_size = 0; \
-  TYPE3 *TENSOR3##_data = NULL; \
-  int64_t TENSOR3##_stride = 0, TENSOR3##_size = 0; \
-  TYPE4 *TENSOR4##_data = NULL; \
-  int64_t TENSOR4##_stride = 0, TENSOR4##_size = 0; \
-\
-  int64_t *TH_TENSOR_DIM_APPLY_counter = NULL; \
-  int TH_TENSOR_DIM_APPLY_hasFinished = 0; \
-  int TH_TENSOR_DIM_APPLY_i; \
-\
-  TH_TENSOR_DIM_APPLY_counter = (int64_t*)THAlloc(sizeof(int64_t)*(TENSOR1->nDimension)); \
-\
-  for (TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) { \
-    TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
-  } \
-\
-  TENSOR1##_data = (TENSOR1)->storage->data+(TENSOR1)->storageOffset; \
-  TENSOR1##_stride = (TENSOR1)->stride[DIMENSION]; \
-  TENSOR1##_size = TENSOR1->size[DIMENSION]; \
-\
-  TENSOR2##_data = (TENSOR2)->storage->data+(TENSOR2)->storageOffset; \
-  TENSOR2##_stride = (TENSOR2)->stride[DIMENSION]; \
-  TENSOR2##_size = TENSOR2->size[DIMENSION]; \
-\
-  TENSOR3##_data = (TENSOR3)->storage->data+(TENSOR3)->storageOffset; \
-  TENSOR3##_stride = (TENSOR3)->stride[DIMENSION]; \
-  TENSOR3##_size = TENSOR3->size[DIMENSION]; \
-\
-  TENSOR4##_data = (TENSOR4)->storage->data+(TENSOR4)->storageOffset; \
-  TENSOR4##_stride = (TENSOR4)->stride[DIMENSION]; \
-  TENSOR4##_size = TENSOR4->size[DIMENSION]; \
-\
-  while (!TH_TENSOR_DIM_APPLY_hasFinished) { \
-    CODE \
-\
-    if (TENSOR1->nDimension == 1) break; \
- \
-    for (TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) { \
-      if (TH_TENSOR_DIM_APPLY_i == DIMENSION) { \
-        if (TH_TENSOR_DIM_APPLY_i == TENSOR1->nDimension-1) { \
-          TH_TENSOR_DIM_APPLY_hasFinished = 1; \
-          break; \
-        } \
-        continue; \
-      } \
-\
-      TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \
-      TENSOR1##_data += TENSOR1->stride[TH_TENSOR_DIM_APPLY_i]; \
-      TENSOR2##_data += TENSOR2->stride[TH_TENSOR_DIM_APPLY_i]; \
-      TENSOR3##_data += TENSOR3->stride[TH_TENSOR_DIM_APPLY_i]; \
-      TENSOR4##_data += TENSOR4->stride[TH_TENSOR_DIM_APPLY_i]; \
-\
-      if (TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == TENSOR1->size[TH_TENSOR_DIM_APPLY_i]) { \
-        if (TH_TENSOR_DIM_APPLY_i == TENSOR1->nDimension-1) { \
-          TH_TENSOR_DIM_APPLY_hasFinished = 1; \
-          break; \
-        } \
-        else { \
-          TENSOR1##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR1->stride[TH_TENSOR_DIM_APPLY_i]; \
-          TENSOR2##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR2->stride[TH_TENSOR_DIM_APPLY_i]; \
-          TENSOR3##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR3->stride[TH_TENSOR_DIM_APPLY_i]; \
-          TENSOR4##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR4->stride[TH_TENSOR_DIM_APPLY_i]; \
-          TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
-        } \
-      } \
-      else break; \
-    } \
-  } \
-  THFree(TH_TENSOR_DIM_APPLY_counter); \
-}
--- a/torch_spline_conv/src/cpu.c
+++ b/torch_spline_conv/src/cpu.c
-#include <TH/TH.h>
-#include "THTensorDimApply.h"
-#define spline_(NAME) TH_CONCAT_4(spline_, NAME, _, Real)
-#define SPLINE_BASIS_FORWARD(M, basis, weight_index, pseudo, kernel_size, is_open_spline, K, CODE) { \
-  int64_t *kernel_size_data = kernel_size->storage->data + kernel_size->storageOffset; \
-  uint8_t *is_open_spline_data = is_open_spline->storage->data + is_open_spline->storageOffset; \
-  int64_t S = THLongTensor_size(weight_index, 1); \
-  int64_t D = THTensor_(size)(pseudo, 1); \
-  int64_t s, d, k, k_mod, wi, offset; real b, value; \
-\
-  TH_TENSOR_DIM_APPLY3(real, basis, int64_t, weight_index, real, pseudo, 1, TH_TENSOR_DIM_APPLY3_SIZE_EQ_EXCEPT_DIM, \
-    for (s = 0; s < S; s++) { \
-      b = 1; wi = 0; k = s; offset = K; \
-      for (d = 0; d < D; d++) { \
-        offset /= kernel_size_data[d]; \
-        k_mod = k % (M + 1); \
-        k /= M + 1; \
-        value = *(pseudo_data + d * pseudo_stride) * (kernel_size_data[d] - M * is_open_spline_data[d]); \
-        wi += (((int64_t) value + k_mod) % kernel_size_data[d]) * offset; \
-        value -= floor(value); \
-        CODE \
-        b *= value; \
-      } \
-      basis_data[s * basis_stride] = b; \
-      weight_index_data[s * weight_index_stride] = wi; \
-    }) \
-}
-#define SPLINE_BASIS_BACKWARD(M, grad_pseudo, grad_basis, pseudo, kernel_size, is_open_spline, EVAL_CODE, GRAD_CODE) { \
-  int64_t *kernel_size_data = kernel_size->storage->data + kernel_size->storageOffset; \
-  uint8_t *is_open_spline_data = is_open_spline->storage->data + is_open_spline->storageOffset; \
-  int64_t D = THTensor_(size)(pseudo, 1); \
-  int64_t S = THTensor_(size)(grad_basis, 1); \
-  int64_t d, s, d_it, quotient, k_mod; real g_out, g, value;\
-\
-  TH_TENSOR_DIM_APPLY3(real, grad_pseudo, real, grad_basis, real, pseudo, 1, TH_TENSOR_DIM_APPLY3_SIZE_EQ_EXCEPT_DIM, \
-    for (d = 0; d < D; d++) { \
-      g_out = 0; \
-      quotient = pow(M + 1, d); \
-      for (s = 0; s < S; s++) { \
-        k_mod = (s / quotient) % (M + 1); \
-        value = *(pseudo_data + d * pseudo_stride) * (kernel_size_data[d] - M * is_open_spline_data[d]); \
-        value -= floor(value); \
-        GRAD_CODE \
-        g = value; \
-\
-        for (d_it = 0; d_it < d; d_it++) { \
-          k_mod = (s / (int64_t) pow(M + 1, d_it)) % (M + 1); \
-          value = *(pseudo_data + d_it * pseudo_stride) * (kernel_size_data[d_it] - M * is_open_spline_data[d_it]); \
-          value -= floor(value); \
-          EVAL_CODE \
-          g *= value; \
-        } \
-        for (d_it = d + 1; d_it < D; d_it++) { \
-          k_mod = (s / (int64_t) pow(M + 1, d_it)) % (M + 1); \
-          value = *(pseudo_data + d_it * pseudo_stride) * (kernel_size_data[d_it] - M * is_open_spline_data[d_it]); \
-          value -= floor(value); \
-          EVAL_CODE \
-          g *= value; \
-        } \
-        g_out += g * *(grad_basis_data + s * grad_basis_stride); \
-      } \
-      grad_pseudo_data[d * grad_pseudo_stride] = g_out * (kernel_size_data[d] - M * is_open_spline_data[d]); \
-    } \
-  ) \
-}
-#define SPLINE_WEIGHTING(TENSOR1, TENSOR2, TENSOR3, weight_index, M_IN, M_OUT, M_S, CODE) { \
-  int64_t M_in = M_IN; int64_t M_out = M_OUT; int64_t S = M_S; \
-  int64_t m_in, m_out, s, w_idx; real value; \
-  TH_TENSOR_DIM_APPLY4(real, TENSOR1, real, TENSOR2, real, TENSOR3, int64_t, weight_index, 1, CODE) \
-}
-#include "generic/cpu.c"
-#include "THGenerateFloatType.h"
-#include "generic/cpu.c"
-#include "THGenerateDoubleType.h"
--- a/torch_spline_conv/src/cpu.h
+++ b/torch_spline_conv/src/cpu.h
-void    spline_linear_basis_forward_Float ( THFloatTensor *basis, THLongTensor *weight_index,  THFloatTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline, int K);
-void    spline_linear_basis_forward_Double(THDoubleTensor *basis, THLongTensor *weight_index, THDoubleTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline, int K);
-void spline_quadratic_basis_forward_Float ( THFloatTensor *basis, THLongTensor *weight_index,  THFloatTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline, int K);
-void spline_quadratic_basis_forward_Double(THDoubleTensor *basis, THLongTensor *weight_index, THDoubleTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline, int K);
-void     spline_cubic_basis_forward_Float ( THFloatTensor *basis, THLongTensor *weight_index,  THFloatTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline, int K);
-void     spline_cubic_basis_forward_Double(THDoubleTensor *basis, THLongTensor *weight_index, THDoubleTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline, int K);
-void    spline_linear_basis_backward_Float ( THFloatTensor *grad_pseudo,  THFloatTensor *grad_basis,  THFloatTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline);
-void    spline_linear_basis_backward_Double(THDoubleTensor *grad_pseudo, THDoubleTensor *grad_basis, THDoubleTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline);
-void spline_quadratic_basis_backward_Float ( THFloatTensor *grad_pseudo,  THFloatTensor *grad_basis,  THFloatTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline);
-void spline_quadratic_basis_backward_Double(THDoubleTensor *grad_pseudo, THDoubleTensor *grad_basis, THDoubleTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline);
-void     spline_cubic_basis_backward_Float ( THFloatTensor *grad_pseudo,  THFloatTensor *grad_basis,  THFloatTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline);
-void     spline_cubic_basis_backward_Double(THDoubleTensor *grad_pseudo, THDoubleTensor *grad_basis, THDoubleTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline);
-void spline_weighting_forward_Float ( THFloatTensor *output, THFloatTensor *input,  THFloatTensor *weight,  THFloatTensor *basis,  THLongTensor *weight_index);
-void spline_weighting_forward_Double(THDoubleTensor *output, THDoubleTensor *input, THDoubleTensor *weight, THDoubleTensor *basis, THLongTensor *weight_index);
-void spline_weighting_backward_input_Float ( THFloatTensor *grad_input,  THFloatTensor *grad_output,  THFloatTensor *weight,  THFloatTensor *basis, THLongTensor *weight_index);
-void spline_weighting_backward_input_Double(THDoubleTensor *grad_input, THDoubleTensor *grad_output, THDoubleTensor *weight, THDoubleTensor *basis, THLongTensor *weight_index);
-void spline_weighting_backward_basis_Float ( THFloatTensor *grad_basis,  THFloatTensor *grad_output,  THFloatTensor *input,  THFloatTensor *weight, THLongTensor *weight_index);
-void spline_weighting_backward_basis_Double(THDoubleTensor *grad_basis, THDoubleTensor *grad_output, THDoubleTensor *input, THDoubleTensor *weight, THLongTensor *weight_index);
-void spline_weighting_backward_weight_Float ( THFloatTensor *grad_weight,  THFloatTensor *grad_output,  THFloatTensor *input,  THFloatTensor *basis, THLongTensor *weight_index);
-void spline_weighting_backward_weight_Double(THDoubleTensor *grad_weight, THDoubleTensor *grad_output, THDoubleTensor *input, THDoubleTensor *basis, THLongTensor *weight_index);
--- a/torch_spline_conv/src/cuda.c
+++ b/torch_spline_conv/src/cuda.c
-#include <THC/THC.h>
-#include "kernel.h"
-#define spline_(NAME) TH_CONCAT_4(spline_, NAME, _cuda_, Real)
-#define spline_kernel_(NAME) TH_CONCAT_4(spline_, NAME, _kernel_, Real)
-extern THCState *state;
-#include "generic/cuda.c"
-#include "THCGenerateFloatType.h"
-#include "generic/cuda.c"
-#include "THCGenerateDoubleType.h"