Commit 6c6be201 authored by rusty1s's avatar rusty1s
Browse files

final tests

parent f7d3df7b
...@@ -5,4 +5,4 @@ description-file = README.md ...@@ -5,4 +5,4 @@ description-file = README.md
test=pytest test=pytest
[tool:pytest] [tool:pytest]
addopts = --capture=no addopts = --capture=no --cov
# import pytest from itertools import product
# import torch
# from torch.autograd import Variable, gradcheck import pytest
# from torch_spline_conv import spline_conv import torch
# from torch_spline_conv.functions.spline_weighting import SplineWeighting from torch.autograd import Variable, gradcheck
# from torch_spline_conv.functions.ffi import implemented_degrees from torch_spline_conv import spline_conv
from torch_spline_conv.utils.ffi import implemented_degrees
# from .utils import tensors, Tensor
from .tensor import tensors
# @pytest.mark.parametrize('tensor', tensors)
# def test_spline_conv_cpu(tensor): tests = [{
# x = Tensor(tensor, [[9, 10], [1, 2], [3, 4], [5, 6], [7, 8]]) 'src': [[9, 10], [1, 2], [3, 4], [5, 6], [7, 8]],
# edge_index = torch.LongTensor([[0, 0, 0, 0], [1, 2, 3, 4]]) 'edge_index': [[0, 0, 0, 0], [1, 2, 3, 4]],
# pseudo = [[0.25, 0.125], [0.25, 0.375], [0.75, 0.625], [0.75, 0.875]] 'pseudo': [[0.25, 0.125], [0.25, 0.375], [0.75, 0.625], [0.75, 0.875]],
# pseudo = Tensor(tensor, pseudo) 'weight': [
# weight = torch.arange(0.5, 0.5 * 25, step=0.5, out=x.new()).view(12, 2, 1) [[0.5], [1]],
# kernel_size = torch.LongTensor([3, 4]) [[1.5], [2]],
# is_open_spline = torch.ByteTensor([1, 0]) [[2.5], [3]],
# root_weight = torch.arange(12.5, 13.5, step=0.5, out=x.new()).view(2, 1) [[3.5], [4]],
# bias = Tensor(tensor, [1]) [[4.5], [5]],
[[5.5], [6]],
# output = spline_conv(x, edge_index, pseudo, weight, kernel_size, [[6.5], [7]],
# is_open_spline, 1, root_weight, bias) [[7.5], [8]],
[[8.5], [9]],
# edgewise_output = [ [[9.5], [10]],
# 1 * 0.25 * (0.5 + 1.5 + 4.5 + 5.5) + 2 * 0.25 * (1 + 2 + 5 + 6), [[10.5], [11]],
# 3 * 0.25 * (1.5 + 2.5 + 5.5 + 6.5) + 4 * 0.25 * (2 + 3 + 6 + 7), [[11.5], [12]],
# 5 * 0.25 * (6.5 + 7.5 + 10.5 + 11.5) + 6 * 0.25 * (7 + 8 + 11 + 12), ],
# 7 * 0.25 * (7.5 + 4.5 + 11.5 + 8.5) + 8 * 0.25 * (8 + 5 + 12 + 9), 'kernel_size': [3, 4],
# ] 'is_open_spline': [1, 0],
'root_weight': [[12.5], [13]],
# expected_output = [ 'bias': [1],
# [1 + 12.5 * 9 + 13 * 10 + sum(edgewise_output) / 4], 'output': [
# [1 + 12.5 * 1 + 13 * 2], [1 + 12.5 * 9 + 13 * 10 + (8.5 + 40.5 + 107.5 + 101.5) / 4],
# [1 + 12.5 * 3 + 13 * 4], [1 + 12.5 * 1 + 13 * 2],
# [1 + 12.5 * 5 + 13 * 6], [1 + 12.5 * 3 + 13 * 4],
# [1 + 12.5 * 7 + 13 * 8], [1 + 12.5 * 5 + 13 * 6],
# ] [1 + 12.5 * 7 + 13 * 8],
]
# assert output.tolist() == expected_output }]
# x, weight, pseudo = Variable(x), Variable(weight), Variable(pseudo)
# root_weight, bias = Variable(root_weight), Variable(bias) @pytest.mark.parametrize('tensor,i', product(tensors, range(len(tests))))
def test_spline_conv_forward_cpu(tensor, i):
# output = spline_conv(x, edge_index, pseudo, weight, kernel_size, data = tests[i]
# is_open_spline, 1, root_weight, bias)
src = getattr(torch, tensor)(data['src'])
# assert output.data.tolist() == expected_output edge_index = torch.LongTensor(data['edge_index'])
pseudo = getattr(torch, tensor)(data['pseudo'])
# def test_spline_weighting_backward_cpu(): weight = getattr(torch, tensor)(data['weight'])
# for degree in implemented_degrees.keys(): kernel_size = torch.LongTensor(data['kernel_size'])
# kernel_size = torch.LongTensor([5, 5, 5]) is_open_spline = torch.ByteTensor(data['is_open_spline'])
# is_open_spline = torch.ByteTensor([1, 0, 1]) root_weight = getattr(torch, tensor)(data['root_weight'])
# op = SplineWeighting(kernel_size, is_open_spline, degree) bias = getattr(torch, tensor)(data['bias'])
# x = torch.DoubleTensor(16, 2).uniform_(-1, 1) output = spline_conv(src, edge_index, pseudo, weight, kernel_size,
# x = Variable(x, requires_grad=True) is_open_spline, 1, root_weight, bias)
# pseudo = torch.DoubleTensor(16, 3).uniform_(0, 1) assert output.tolist() == data['output']
# pseudo = Variable(pseudo, requires_grad=True)
# weight = torch.DoubleTensor(25, 2, 4).uniform_(-1, 1)
# weight = Variable(weight, requires_grad=True) @pytest.mark.skipif(not torch.cuda.is_available(), reason='no CUDA')
@pytest.mark.parametrize('tensor,i', product(tensors, range(len(tests))))
# assert gradcheck(op, (x, pseudo, weight), eps=1e-6, atol=1e-4) is True def test_spline_conv_forward_gpu(tensor, i):
data = tests[i]
# @pytest.mark.skipif(not torch.cuda.is_available(), reason='no CUDA')
# @pytest.mark.parametrize('tensor', tensors) src = getattr(torch.cuda, tensor)(data['src'])
# def test_spline_conv_gpu(tensor): # pragma: no cover edge_index = torch.cuda.LongTensor(data['edge_index'])
# x = Tensor(tensor, [[9, 10], [1, 2], [3, 4], [5, 6], [7, 8]]) pseudo = getattr(torch.cuda, tensor)(data['pseudo'])
# edge_index = torch.LongTensor([[0, 0, 0, 0], [1, 2, 3, 4]]) weight = getattr(torch.cuda, tensor)(data['weight'])
# pseudo = [[0.25, 0.125], [0.25, 0.375], [0.75, 0.625], [0.75, 0.875]] kernel_size = torch.cuda.LongTensor(data['kernel_size'])
# pseudo = Tensor(tensor, pseudo) is_open_spline = torch.cuda.ByteTensor(data['is_open_spline'])
# weight = torch.arange(0.5, 0.5 * 25, step=0.5, out=x.new()).view(12, 2, 1) root_weight = getattr(torch.cuda, tensor)(data['root_weight'])
# kernel_size = torch.LongTensor([3, 4]) bias = getattr(torch.cuda, tensor)(data['bias'])
# is_open_spline = torch.ByteTensor([1, 0])
# root_weight = torch.arange(12.5, 13.5, step=0.5, out=x.new()).view(2, 1) output = spline_conv(src, edge_index, pseudo, weight, kernel_size,
# bias = Tensor(tensor, [1]) is_open_spline, 1, root_weight, bias)
assert output.cpu().tolist() == data['output']
# expected_output = spline_conv(x, edge_index, pseudo, weight, kernel_size,
# is_open_spline, 1, root_weight, bias)
@pytest.mark.parametrize('degree', implemented_degrees.keys())
# x, edge_index, pseudo = x.cuda(), edge_index.cuda(), pseudo.cuda() def test_spline_basis_backward_cpu(degree):
# weight, kernel_size = weight.cuda(), kernel_size.cuda() src = torch.DoubleTensor(3, 2).uniform_(-1, 1)
# is_open_spline, root_weight = is_open_spline.cuda(), root_weight.cuda() edge_index = torch.LongTensor([[0, 1, 1, 2], [1, 0, 2, 1]])
# bias = bias.cuda() pseudo = torch.DoubleTensor(4, 3).uniform_(0, 1)
weight = torch.DoubleTensor(125, 2, 4).uniform_(-1, 1)
# output = spline_conv(x, edge_index, pseudo, weight, kernel_size, kernel_size = torch.LongTensor([5, 5, 5])
# is_open_spline, 1, root_weight, bias) is_open_spline = torch.ByteTensor([1, 0, 1])
# assert output.cpu().tolist() == expected_output.tolist() root_weight = torch.DoubleTensor(2, 4).uniform_(-1, 1)
bias = torch.DoubleTensor(4).uniform_(-1, 1)
# x, weight, pseudo = Variable(x), Variable(weight), Variable(pseudo)
# root_weight, bias = Variable(root_weight), Variable(bias) src = Variable(src, requires_grad=True)
pseudo = Variable(pseudo, requires_grad=True)
# output = spline_conv(x, edge_index, pseudo, weight, kernel_size, weight = Variable(weight, requires_grad=True)
# is_open_spline, 1, root_weight, bias) root_weight = Variable(root_weight, requires_grad=True)
bias = Variable(bias, requires_grad=True)
# assert output.data.cpu().tolist() == expected_output.tolist()
def op(src, pseudo, weight, root_weight, bias):
# @pytest.mark.skipif(not torch.cuda.is_available(), reason='no CUDA') return spline_conv(src, edge_index, pseudo, weight, kernel_size,
# def test_spline_weighting_backward_gpu(): # pragma: no cover is_open_spline, degree, root_weight, bias)
# for degree in implemented_degrees.keys():
# kernel_size = torch.cuda.LongTensor([5, 5, 5]) data = (src, pseudo, weight, root_weight, bias)
# is_open_spline = torch.cuda.ByteTensor([1, 0, 1]) assert gradcheck(op, data, eps=1e-6, atol=1e-4) is True
# op = SplineWeighting(kernel_size, is_open_spline, degree)
# x = torch.cuda.DoubleTensor(16, 2).uniform_(-1, 1) @pytest.mark.skipif(not torch.cuda.is_available(), reason='no CUDA')
# x = Variable(x, requires_grad=True) @pytest.mark.parametrize('degree', [2])
# pseudo = torch.cuda.DoubleTensor(16, 3).uniform_(0, 1) def test_spline_basis_backward_gpu(degree):
# pseudo = Variable(pseudo, requires_grad=False) # TODO src = torch.cuda.DoubleTensor(3, 2).uniform_(-1, 1)
# weight = torch.cuda.DoubleTensor(25, 2, 4).uniform_(-1, 1) edge_index = torch.cuda.LongTensor([[0, 1, 1, 2], [1, 0, 2, 1]])
# weight = Variable(weight, requires_grad=True) pseudo = torch.cuda.DoubleTensor(4, 3).uniform_(0, 1)
weight = torch.cuda.DoubleTensor(125, 2, 4).uniform_(-1, 1)
# assert gradcheck(op, (x, pseudo, weight), eps=1e-6, atol=1e-4) is True kernel_size = torch.cuda.LongTensor([5, 5, 5])
is_open_spline = torch.cuda.ByteTensor([1, 0, 1])
root_weight = torch.cuda.DoubleTensor(2, 4).uniform_(-1, 1)
bias = torch.cuda.DoubleTensor(4).uniform_(-1, 1)
src = Variable(src, requires_grad=False)
pseudo = Variable(pseudo, requires_grad=True)
weight = Variable(weight, requires_grad=False)
root_weight = Variable(root_weight, requires_grad=False)
bias = Variable(bias, requires_grad=False)
def op(src, pseudo, weight, root_weight, bias):
return spline_conv(src, edge_index, pseudo, weight, kernel_size,
is_open_spline, degree, root_weight, bias)
data = (src, pseudo, weight, root_weight, bias)
assert gradcheck(op, data, eps=1e-6, atol=1e-4) is True
...@@ -58,9 +58,10 @@ def test_spline_basis_backward_cpu(): ...@@ -58,9 +58,10 @@ def test_spline_basis_backward_cpu():
src = Variable(src, requires_grad=True) src = Variable(src, requires_grad=True)
weight = Variable(weight, requires_grad=True) weight = Variable(weight, requires_grad=True)
basis = Variable(basis, requires_grad=True) basis = Variable(basis, requires_grad=True)
weight_index = Variable(weight_index, requires_grad=False)
op = SplineWeighting(weight_index) data = (src, weight, basis, weight_index)
assert gradcheck(op, (src, weight, basis), eps=1e-6, atol=1e-4) is True assert gradcheck(SplineWeighting(), data, eps=1e-6, atol=1e-4) is True
@pytest.mark.skipif(not torch.cuda.is_available(), reason='no CUDA') @pytest.mark.skipif(not torch.cuda.is_available(), reason='no CUDA')
...@@ -73,8 +74,9 @@ def test_spline_basis_backward_gpu(): ...@@ -73,8 +74,9 @@ def test_spline_basis_backward_gpu():
basis, weight_index = spline_basis(1, pseudo, kernel_size, is_open_spline) basis, weight_index = spline_basis(1, pseudo, kernel_size, is_open_spline)
src = Variable(src, requires_grad=True) src = Variable(src, requires_grad=True)
weight = Variable(weight, requires_grad=False) weight = Variable(weight, requires_grad=True)
basis = Variable(basis, requires_grad=False) basis = Variable(basis, requires_grad=True)
weight_index = Variable(weight_index, requires_grad=False)
op = SplineWeighting(weight_index) data = (src, weight, basis, weight_index)
assert gradcheck(op, (src, weight, basis), eps=1e-6, atol=1e-4) is True assert gradcheck(SplineWeighting(), data, eps=1e-6, atol=1e-4) is True
import torch import torch
from torch.autograd import Variable
from .basis import spline_basis from .basis import spline_basis
from .weighting import spline_weighting from .weighting import spline_weighting
...@@ -23,24 +24,24 @@ def spline_conv(src, ...@@ -23,24 +24,24 @@ def spline_conv(src,
B-Spline tensor product basis for a single input feature map :math:`l`. B-Spline tensor product basis for a single input feature map :math:`l`.
Args: Args:
src (Tensor): Input node features of shape (number_of_nodes x src (Tensor or Variable): Input node features of shape
in_channels) (number_of_nodes x in_channels)
edge_idex (LongTensor): Graph edges, given by source and target edge_idex (LongTensor): Graph edges, given by source and target
indices, of shape (2 x number_of_edges) indices, of shape (2 x number_of_edges)
pseudo (Tensor): Edge attributes, ie. pseudo coordinates, of shape pseudo (Tensor or Variable): Edge attributes, ie. pseudo coordinates,
(number_of_edges x number_of_edge_attributes) of shape (number_of_edges x number_of_edge_attributes)
weight (Tensor): Trainable weight parameters of shape (kernel_size x weight (Tensor or Variable): Trainable weight parameters of shape
in_channels x out_channels) (kernel_size x in_channels x out_channels)
kernel_size (LongTensor): Number of trainable weight parameters in each kernel_size (LongTensor): Number of trainable weight parameters in each
edge dimension edge dimension
is_open_spline (ByteTensor): Whether to use open or closed B-spline is_open_spline (ByteTensor): Whether to use open or closed B-spline
bases for each dimension bases for each dimension
degree (int): B-spline basis degree (default: :obj:`1`) degree (int): B-spline basis degree (default: :obj:`1`)
root_weight (Tensor): Additional shared trainable parameters for each root_weight (Tensor or Variable): Additional shared trainable
feature of the root node of shape (in_channels x out_channels) parameters for each feature of the root node of shape
(in_channels x out_channels) (default: :obj:`None`)
bias (Tensor or Variable): Optional bias of shape (out_channels)
(default: :obj:`None`) (default: :obj:`None`)
bias (Tensor): Optional bias of shape (out_channels) (default:
:obj:`None`)
""" """
src = src.unsqueeze(-1) if src.dim() == 1 else src src = src.unsqueeze(-1) if src.dim() == 1 else src
...@@ -57,10 +58,12 @@ def spline_conv(src, ...@@ -57,10 +58,12 @@ def spline_conv(src,
# Perform the real convolution => Convert e x m_out to n x m_out features. # Perform the real convolution => Convert e x m_out to n x m_out features.
zero = new(src, n, m_out).fill_(0) zero = new(src, n, m_out).fill_(0)
row_expand = row.unsqueeze(-1).expand(e, m_out) row_expand = row.unsqueeze(-1).expand(e, m_out)
row_expand = row_expand if torch.is_tensor(src) else Variable(row_expand)
output = zero.scatter_add_(0, row_expand, output) output = zero.scatter_add_(0, row_expand, output)
# Normalize output by node degree. # Normalize output by node degree.
degree = node_degree(row, n, out=new(src)) index = row if torch.is_tensor(src) else Variable(row)
degree = node_degree(index, n, out=new(src))
output /= degree.unsqueeze(-1).clamp_(min=1) output /= degree.unsqueeze(-1).clamp_(min=1)
# Weight root node separately (if wished). # Weight root node separately (if wished).
......
import torch
def node_degree(edge_index, num_nodes, out=None):
zero = torch.zeros(num_nodes, out=out)
one = torch.ones(edge_index.size(1), out=zero.new())
return zero.scatter_add_(0, edge_index[0], one)
from .._ext import ffi as ext
implemented_degrees = {1: 'linear', 2: 'quadratic', 3: 'cubic'}
def get_degree_str(degree):
degree = implemented_degrees.get(degree)
assert degree is not None, (
'No implementation found for specified B-spline degree')
return degree
def get_func(name, tensor):
typename = type(tensor).__name__.replace('Tensor', '')
cuda = 'cuda_' if tensor.is_cuda else ''
func = getattr(ext, 'spline_{}_{}{}'.format(name, cuda, typename))
return func
def spline_basis_forward(degree, pseudo, kernel_size, is_open_spline, K):
s = (degree + 1)**kernel_size.size(0)
basis = pseudo.new(pseudo.size(0), s)
weight_index = kernel_size.new(pseudo.size(0), s)
func = get_func('{}_basis_forward'.format(get_degree_str(degree)), pseudo)
func(basis, weight_index, pseudo, kernel_size, is_open_spline, K)
return basis, weight_index
def spline_basis_backward(degree, grad_basis, pseudo, kernel_size,
is_open_spline): # pragma: no cover
grad_pseudo = pseudo.new(pseudo.size())
func = get_func('{}_basis_backward'.format(get_degree_str(degree)), pseudo)
func(grad_pseudo, grad_basis, pseudo, kernel_size, is_open_spline)
return grad_pseudo
def spline_weighting_forward(x, weight, basis, weight_index):
output = x.new(x.size(0), weight.size(2))
func = get_func('weighting_forward', x)
func(output, x, weight, basis, weight_index)
return output
def spline_weighting_backward_input(grad_output, weight, basis,
weight_index): # pragma: no cover
grad_input = grad_output.new(grad_output.size(0), weight.size(1))
func = get_func('weighting_backward_input', grad_output)
# Transpose for coalesced memory access on GPU.
weight = weight.transpose(1, 2).contiguous()
func(grad_input, grad_output, weight, basis, weight_index)
return grad_input
def spline_weighting_backward_basis(grad_output, x, weight,
weight_index): # pragma: no cover
grad_basis = x.new(weight_index.size()).fill_(0)
func = get_func('weighting_backward_basis', x)
func(grad_basis, grad_output, x, weight, weight_index)
return grad_basis
def spline_weighting_backward_weight(grad_output, x, basis, weight_index,
K): # pragma: no cover
grad_weight = x.new(K, x.size(1), grad_output.size(1)).fill_(0)
func = get_func('weighting_backward_weight', x)
func(grad_weight, grad_output, x, basis, weight_index)
return grad_weight
import torch
from torch.autograd import Variable as Var
from .degree import node_degree
from .spline_weighting import spline_weighting
def spline_conv(x,
edge_index,
pseudo,
weight,
kernel_size,
is_open_spline,
degree=1,
root_weight=None,
bias=None):
n, e, m_out = x.size(0), edge_index.size(1), weight.size(2)
x = x.unsqueeze(-1) if x.dim() == 1 else x
pseudo = pseudo.unsqueeze(-1) if pseudo.dim() == 1 else pseudo
# Convolve over each node.
output = spline_weighting(x[edge_index[1]], pseudo, weight, kernel_size,
is_open_spline, degree)
# Perform the real convolution => Convert e x m_out to n x m_out features.
row = edge_index[0].unsqueeze(-1).expand(e, m_out)
row = row if torch.is_tensor(x) else Var(row)
zero = x.new(n, m_out) if torch.is_tensor(x) else Var(x.data.new(n, m_out))
output = zero.fill_(0).scatter_add_(0, row, output)
# Compute degree.
degree = x.new() if torch.is_tensor(x) else x.data.new()
degree = node_degree(edge_index, n, out=degree)
# Normalize output by node degree.
degree = degree.unsqueeze(-1).clamp_(min=1)
output /= degree if torch.is_tensor(x) else Var(degree)
# Weight root node separately (if wished).
if root_weight is not None:
output += torch.mm(x, root_weight)
# Add bias (if wished).
if bias is not None:
output += bias
return output
import torch
from torch.autograd import Function
from .ffi import (
spline_basis_forward,
spline_basis_backward,
spline_weighting_forward,
spline_weighting_backward_input,
spline_weighting_backward_basis,
spline_weighting_backward_weight,
)
class SplineWeighting(Function):
def __init__(self, kernel_size, is_open_spline, degree):
super(SplineWeighting, self).__init__()
self.kernel_size = kernel_size
self.is_open_spline = is_open_spline
self.degree = degree
def forward(self, x, pseudo, weight):
K = weight.size(0)
basis, weight_index = spline_basis_forward(
self.degree, pseudo, self.kernel_size, self.is_open_spline, K)
output = spline_weighting_forward(x, weight, basis, weight_index)
self.save_for_backward(x, pseudo, weight)
self.basis, self.weight_index = basis, weight_index
return output
def backward(self, grad_output): # pragma: no cover
x, pseudo, weight = self.saved_tensors
basis, weight_index = self.basis, self.weight_index
grad_input, grad_pseudo, grad_weight = None, None, None
if self.needs_input_grad[0]:
grad_input = spline_weighting_backward_input(
grad_output, weight, basis, weight_index)
if self.needs_input_grad[1]:
grad_basis = spline_weighting_backward_basis(
grad_output, x, weight, weight_index)
grad_pseudo = spline_basis_backward(self.degree, grad_basis,
pseudo, self.kernel_size,
self.is_open_spline)
if self.needs_input_grad[2]:
K = weight.size(0)
grad_weight = spline_weighting_backward_weight(
grad_output, x, basis, weight_index, K)
return grad_input, grad_pseudo, grad_weight
def spline_weighting(x, pseudo, weight, kernel_size, is_open_spline, degree):
if torch.is_tensor(x):
K = weight.size(0)
basis, weight_index = spline_basis_forward(degree, pseudo, kernel_size,
is_open_spline, K)
return spline_weighting_forward(x, weight, basis, weight_index)
else:
op = SplineWeighting(kernel_size, is_open_spline, degree)
return op(x, pseudo, weight)
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600 || CUDA_VERSION < 8000)
static inline __device__ void atomicAdd(double *address, double val) {
unsigned long long int* address_as_ull = (unsigned long long int*)address;
unsigned long long int old = *address_as_ull;
unsigned long long int assumed;
do {
assumed = old;
old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
} while (assumed != old);
}
#elif !defined(__CUDA_ARCH__) && (CUDA_VERSION < 8000)
static inline __device__ void atomicAdd(double *address, double val) {}
#endif
#define SPLINE_BASIS_FORWARD(NAME, basis, weight_index, pseudo, kernel_size, is_open_spline, K) { \
THCAssertSameGPU(THCTensor_(checkGPU)(state, 3, pseudo, kernel_size, is_open_spline)); \
\
const int n = THCTensor_(nElement)(state, basis); \
TensorInfo<real> basisInfo = thc_(getTensorInfo)(state, basis); \
TensorInfo<int64_t> weightIndexInfo = thc_getTensorInfo_Long(state, weight_index); \
TensorInfo<real> pseudoInfo = thc_(getTensorInfo)(state, pseudo); \
int64_t *kernelSizeData = THCudaLongTensor_data(state, kernel_size); \
uint8_t *isOpenSplineData = THCudaByteTensor_data(state, is_open_spline); \
\
KERNEL_D_RUN(NAME, pseudoInfo.size[1], n, basisInfo, weightIndexInfo, pseudoInfo, kernelSizeData, isOpenSplineData, K) \
}
#define COMPUTE_SPLINE_BASIS_FORWARD(M, D, basis, weightIndex, pseudo, kernelSize, isOpenSpline, K, CODE) { \
int64_t k = i % basis.size[1]; \
int64_t pseudoOffset = ((i / basis.size[1]) % pseudo.size[0]) * pseudo.stride[0]; \
int64_t d, k_mod, wi = 0, offset = K; Real b = 1, value; \
for (d = 0; d < D; d++) { \
offset /= kernelSize[d]; \
k_mod = k % (M + 1); \
k /= M + 1; \
value = pseudo.data[pseudoOffset + d * pseudo.stride[1]] * (kernelSize[d] - M * isOpenSpline[d]); \
wi += (((int64_t) value + k_mod) % kernelSize[d]) * offset; \
value -= floor(value); \
CODE \
b *= value; \
} \
basis.data[i] = b; \
weightIndex.data[i] = wi; \
}
template<typename Real, int D>
struct SplineBasisForward {
static __device__ void linear(int i, const TensorInfo<Real>& basis, const TensorInfo<int64_t>& weightIndex, const TensorInfo<Real>& pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, int K) {
COMPUTE_SPLINE_BASIS_FORWARD(1, D, basis, weightIndex, pseudo, kernelSize, isOpenSpline, K,
value = 1 - value - k_mod + 2 * value * k_mod;
)
}
static __device__ void quadratic(int i, const TensorInfo<Real>& basis, const TensorInfo<int64_t>& weightIndex, const TensorInfo<Real>& pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, int K) {
COMPUTE_SPLINE_BASIS_FORWARD(2, D, basis, weightIndex, pseudo, kernelSize, isOpenSpline, K,
if (k_mod == 0) value = 0.5 * value * value - value + 0.5;
else if (k_mod == 1) value = -value * value + value + 0.5;
else value = 0.5 * value * value;
)
}
static __device__ void cubic(int i, const TensorInfo<Real>& basis, const TensorInfo<int64_t>& weightIndex, const TensorInfo<Real>& pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, int K) {
COMPUTE_SPLINE_BASIS_FORWARD(3, D, basis, weightIndex, pseudo, kernelSize, isOpenSpline, K,
if (k_mod == 0) { value = (1 - value); value = value * value * value / 6.0; }
else if (k_mod == 1) value = (3 * value * value * value - 6 * value * value + 4) / 6;
else if (k_mod == 2) value = (-3 * value * value * value + 3 * value * value + 3 * value + 1) / 6;
else value = value * value * value / 6;
)
}
};
template<typename Real>
struct SplineBasisForward<Real, -1> {
static __device__ void linear(int i, const TensorInfo<Real>& basis, const TensorInfo<int64_t>& weightIndex, const TensorInfo<Real>& pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, int K) {
COMPUTE_SPLINE_BASIS_FORWARD(1, pseudo.size[1], basis, weightIndex, pseudo, kernelSize, isOpenSpline, K,
value = 1 - value - k_mod + 2 * value * k_mod;
)
}
static __device__ void quadratic(int i, const TensorInfo<Real>& basis, const TensorInfo<int64_t>& weightIndex, const TensorInfo<Real>& pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, int K) {
COMPUTE_SPLINE_BASIS_FORWARD(2, pseudo.size[1], basis, weightIndex, pseudo, kernelSize, isOpenSpline, K,
if (k_mod == 0) value = 0.5 * value * value - value + 0.5;
else if (k_mod == 1) value = -value * value + value + 0.5;
else value = 0.5 * value * value;
)
}
static __device__ void cubic(int i, const TensorInfo<Real>& basis, const TensorInfo<int64_t>& weightIndex, const TensorInfo<Real>& pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, int K) {
COMPUTE_SPLINE_BASIS_FORWARD(3, pseudo.size[1], basis, weightIndex, pseudo, kernelSize, isOpenSpline, K,
if (k_mod == 0) { value = (1 - value); value = value * value * value / 6.0; }
else if (k_mod == 1) value = (3 * value * value * value - 6 * value * value + 4) / 6;
else if (k_mod == 2) value = (-3 * value * value * value + 3 * value * value + 3 * value + 1) / 6;
else value = value * value * value / 6;
)
}
};
template<typename Real, int D>
__global__ void linearBasisForwardKernel(TensorInfo<Real> basis, TensorInfo<int64_t> weightIndex, TensorInfo<Real> pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, int K, int n) {
KERNEL_LOOP(i, n) {
SplineBasisForward<Real, D>::linear(i, basis, weightIndex, pseudo, kernelSize, isOpenSpline, K);
}
}
template<typename Real, int D>
__global__ void quadraticBasisForwardKernel(TensorInfo<Real> basis, TensorInfo<int64_t> weightIndex, TensorInfo<Real> pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, int K, int n) {
KERNEL_LOOP(i, n) {
SplineBasisForward<Real, D>::quadratic(i, basis, weightIndex, pseudo, kernelSize, isOpenSpline, K);
}
}
template<typename Real, int D>
__global__ void cubicBasisForwardKernel(TensorInfo<Real> basis, TensorInfo<int64_t> weightIndex, TensorInfo<Real> pseudo, int64_t *kernelSize, uint8_t *isOpenSpline, int K, int n) {
KERNEL_LOOP(i, n) {
SplineBasisForward<Real, D>::cubic(i, basis, weightIndex, pseudo, kernelSize, isOpenSpline, K);
}
}
const int MAX_DIMS = 25;
const int NUM_THREADS = 1024;
inline int GET_BLOCKS(const int n) {
return (n + NUM_THREADS - 1) / NUM_THREADS;
}
template<typename T>
struct TensorInfo {
TensorInfo(T *t, int d, int sz[MAX_DIMS], int st[MAX_DIMS]) {
data = t; dims = d;
for (int i = 0; i < dims; i++) {
size[i] = sz[i];
stride[i] = st[i];
}
}
T *data;
int dims;
int size[MAX_DIMS];
int stride[MAX_DIMS];
};
#define KERNEL_LOOP(I, N) \
for (int I = blockIdx.x * blockDim.x + threadIdx.x; I < N; i += blockDim.x * gridDim.x)
#define KERNEL_RUN(NAME, N, ...) { \
int grid = GET_BLOCKS(N); \
cudaStream_t stream = THCState_getCurrentStream(state); \
NAME<real><<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); \
THCudaCheck(cudaGetLastError()); \
}
#define KERNEL_D_RUN(NAME, D, N, ...) { \
int grid = GET_BLOCKS(N); \
cudaStream_t stream = THCState_getCurrentStream(state); \
switch (D) { \
case 1: NAME<real, 1><<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); break; \
case 2: NAME<real, 2><<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); break; \
case 3: NAME<real, 3><<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); break; \
case 4: NAME<real, 4><<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); break; \
default: NAME<real, -1><<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); break; \
} \
THCudaCheck(cudaGetLastError()); \
}
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/common.cu"
#else
TensorInfo<real> thc_(getTensorInfo)(THCState *state, THCTensor *tensor) {
real *data = THCTensor_(data)(state, tensor);
int dims = THCTensor_(nDimension)(state, tensor);
int size[MAX_DIMS]; int stride[MAX_DIMS];
for (int i = 0; i < dims; i++) {
size[i] = THCTensor_(size)(state, tensor, i);
stride[i] = THCTensor_(stride)(state, tensor, i);
}
return TensorInfo<real>(data, dims, size, stride);
}
#endif
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/kernel.cu"
#else
void spline_(linear_basis_forward)(THCState *state, THCTensor *basis, THCudaLongTensor *weight_index, THCTensor *pseudo, THCudaLongTensor *kernel_size, THCudaByteTensor *is_open_spline, int K) {
SPLINE_BASIS_FORWARD(linearBasisForwardKernel, basis, weight_index, pseudo, kernel_size, is_open_spline, K)
}
void spline_(quadratic_basis_forward)(THCState *state, THCTensor *basis, THCudaLongTensor *weight_index, THCTensor *pseudo, THCudaLongTensor *kernel_size, THCudaByteTensor *is_open_spline, int K) {
SPLINE_BASIS_FORWARD(quadraticBasisForwardKernel, basis, weight_index, pseudo, kernel_size, is_open_spline, K)
}
void spline_(cubic_basis_forward)(THCState *state, THCTensor *basis, THCudaLongTensor *weight_index, THCTensor *pseudo, THCudaLongTensor *kernel_size, THCudaByteTensor *is_open_spline, int K) {
SPLINE_BASIS_FORWARD(cubicBasisForwardKernel, basis, weight_index, pseudo, kernel_size, is_open_spline, K)
}
void spline_(weighting_forward)(THCState *state, THCTensor *output, THCTensor *input, THCTensor *weight, THCTensor *basis, THCudaLongTensor *weight_index) {
THCAssertSameGPU(THCTensor_(checkGPU)(state, 4, input, weight, basis, weight_index));
TensorInfo<real> outputInfo = thc_(getTensorInfo)(state, output);
TensorInfo<real> inputInfo = thc_(getTensorInfo)(state, input);
TensorInfo<real> weightInfo = thc_(getTensorInfo)(state, weight);
TensorInfo<real> basisInfo = thc_(getTensorInfo)(state, basis);
TensorInfo<int64_t> weightIndexInfo = thc_getTensorInfo_Long(state, weight_index);
KERNEL_RUN(weightingForwardKernel, THCTensor_(nElement)(state, output), outputInfo, inputInfo, weightInfo, basisInfo, weightIndexInfo)
}
void spline_(weighting_backward_input)(THCState *state, THCTensor *grad_input, THCTensor *grad_output, THCTensor *weight, THCTensor *basis, THCudaLongTensor *weight_index) {
TensorInfo<real> gradInputInfo = thc_(getTensorInfo)(state, grad_input);
TensorInfo<real> gradOutputInfo = thc_(getTensorInfo)(state, grad_output);
TensorInfo<real> weightInfo = thc_(getTensorInfo)(state, weight);
TensorInfo<real> basisInfo = thc_(getTensorInfo)(state, basis);
TensorInfo<int64_t> weightIndexInfo = thc_getTensorInfo_Long(state, weight_index);
KERNEL_RUN(weightingBackwardInputKernel, THCTensor_(nElement)(state, grad_input), gradInputInfo, gradOutputInfo, weightInfo, basisInfo, weightIndexInfo)
}
void spline_(weighting_backward_weight)(THCState *state, THCTensor *grad_weight, THCTensor *grad_output, THCTensor *input, THCTensor *basis, THCudaLongTensor *weight_index) {
TensorInfo<real> gradWeightInfo = thc_(getTensorInfo)(state, grad_weight);
TensorInfo<real> gradOutputInfo = thc_(getTensorInfo)(state, grad_output);
TensorInfo<real> inputInfo = thc_(getTensorInfo)(state, input);
TensorInfo<real> basisInfo = thc_(getTensorInfo)(state, basis);
TensorInfo<int64_t> weightIndexInfo = thc_getTensorInfo_Long(state, weight_index);
KERNEL_RUN(weightingBackwardWeightKernel, THCTensor_(nElement)(state, grad_output), gradWeightInfo, gradOutputInfo, inputInfo, basisInfo, weightIndexInfo)
}
#endif
#include <THC.h>
#include "kernel.h"
#include "common.cuh"
#include "THCBasisForward.cuh"
#include "THCAtomics.cuh"
#define spline_(NAME) TH_CONCAT_4(spline_, NAME, _kernel_, Real)
#define thc_(NAME) TH_CONCAT_4(thc_, NAME, _, Real)
#include "generic/common.cu"
#include "THCGenerateAllTypes.h"
template<typename Real>
__global__ void weightingForwardKernel(TensorInfo<Real> output, TensorInfo<Real> input, TensorInfo<Real> weight, TensorInfo<Real> basis, TensorInfo<int64_t> weightIndex, int n) {
KERNEL_LOOP(i, n) {
int64_t edgeOffset = i / output.size[1], inputOffset = edgeOffset * input.stride[0];
int64_t s, S = basis.size[1], m_in, M_in = input.size[1], m_out = i % output.size[1], M_out = output.size[1], weightOffset;
Real value = 0; Real b;
for (s = 0; s < S; s++) {
b = basis.data[edgeOffset * S + s];
weightOffset = weightIndex.data[edgeOffset * S + s] * M_in * M_out + m_out;
for (m_in = 0; m_in < M_in; m_in++) {
value += weight.data[weightOffset + m_in * M_out] * input.data[inputOffset + m_in * input.stride[1]] * b;
}
}
output.data[i] = value;
}
}
template<typename Real>
__global__ void weightingBackwardInputKernel(TensorInfo<Real> gradInput, TensorInfo<Real> gradOutput, TensorInfo<Real> weight, TensorInfo<Real> basis, TensorInfo<int64_t> weightIndex, int n) {
KERNEL_LOOP(i, n) {
int64_t edgeOffset = i / gradInput.size[1], gradOutputOffset = edgeOffset * gradOutput.stride[0];
int64_t s, S = basis.size[1], m_in = i % gradInput.size[1], M_in = gradInput.size[1], m_out, M_out = gradOutput.size[1], weightOffset;
Real value = 0; Real b;
for (s = 0; s < S; s++) {
b = basis.data[edgeOffset * S + s];
weightOffset = weightIndex.data[edgeOffset * S + s] * M_in * M_out + m_in;
for (m_out = 0; m_out < M_out; m_out++) {
value += weight.data[weightOffset + M_in * m_out] * gradOutput.data[gradOutputOffset + m_out] * b;
}
}
gradInput.data[i] = value;
}
}
template<typename Real>
__global__ void weightingBackwardWeightKernel(TensorInfo<Real> gradWeight, TensorInfo<Real> gradOutput, TensorInfo<Real> input, TensorInfo<Real> basis, TensorInfo<int64_t> weightIndex, int n) {
KERNEL_LOOP(i, n) {
int64_t edgeOffset = i / gradOutput.size[1], inputOffset = edgeOffset * input.stride[0];
int64_t s, S = basis.size[1];
int64_t m_in, M_in = input.size[1];
int64_t m_out = i % gradOutput.size[1], M_out = gradOutput.size[1];
int64_t weightOffset;
Real b;
Real value = gradOutput.data[edgeOffset * M_out + m_out];
for (s = 0; s < S; s++) {
b = basis.data[edgeOffset * S + s];
weightOffset = weightIndex.data[edgeOffset * S + s] * M_in * M_out + m_out;
for (m_in = 0; m_in < M_in; m_in++) {
atomicAdd(&gradWeight.data[weightOffset + m_in * M_out], b * value * input.data[inputOffset + m_in * input.stride[1]]);
}
}
}
}
#include "generic/kernel.cu"
#include "THCGenerateFloatType.h"
#include "generic/kernel.cu"
#include "THCGenerateDoubleType.h"
#ifdef __cplusplus
extern "C" {
#endif
void spline_linear_basis_forward_kernel_Float (THCState *state, THCudaTensor *basis, THCudaLongTensor *weight_index, THCudaTensor *pseudo, THCudaLongTensor *kernel_size, THCudaByteTensor *is_open_spline, int K);
void spline_linear_basis_forward_kernel_Double(THCState *state, THCudaDoubleTensor *basis, THCudaLongTensor *weight_index, THCudaDoubleTensor *pseudo, THCudaLongTensor *kernel_size, THCudaByteTensor *is_open_spline, int K);
void spline_quadratic_basis_forward_kernel_Float (THCState *state, THCudaTensor *basis, THCudaLongTensor *weight_index, THCudaTensor *pseudo, THCudaLongTensor *kernel_size, THCudaByteTensor *is_open_spline, int K);
void spline_quadratic_basis_forward_kernel_Double(THCState *state, THCudaDoubleTensor *basis, THCudaLongTensor *weight_index, THCudaDoubleTensor *pseudo, THCudaLongTensor *kernel_size, THCudaByteTensor *is_open_spline, int K);
void spline_cubic_basis_forward_kernel_Float (THCState *state, THCudaTensor *basis, THCudaLongTensor *weight_index, THCudaTensor *pseudo, THCudaLongTensor *kernel_size, THCudaByteTensor *is_open_spline, int K);
void spline_cubic_basis_forward_kernel_Double(THCState *state, THCudaDoubleTensor *basis, THCudaLongTensor *weight_index, THCudaDoubleTensor *pseudo, THCudaLongTensor *kernel_size, THCudaByteTensor *is_open_spline, int K);
void spline_weighting_forward_kernel_Float (THCState *state, THCudaTensor *output, THCudaTensor *input, THCudaTensor *weight, THCudaTensor *basis, THCudaLongTensor *weight_index);
void spline_weighting_forward_kernel_Double(THCState *state, THCudaDoubleTensor *output, THCudaDoubleTensor *input, THCudaDoubleTensor *weight, THCudaDoubleTensor *basis, THCudaLongTensor *weight_index);
void spline_weighting_backward_input_kernel_Float (THCState *state, THCudaTensor *grad_input, THCudaTensor *grad_output, THCudaTensor *weight, THCudaTensor *basis, THCudaLongTensor *weight_index);
void spline_weighting_backward_input_kernel_Double(THCState *state, THCudaDoubleTensor *grad_input, THCudaDoubleTensor *grad_output, THCudaDoubleTensor *weight, THCudaDoubleTensor *basis, THCudaLongTensor *weight_index);
void spline_weighting_backward_weight_kernel_Float (THCState *state, THCudaTensor *grad_weight, THCudaTensor *grad_output, THCudaTensor *input, THCudaTensor *basis, THCudaLongTensor *weight_index);
void spline_weighting_backward_weight_kernel_Double(THCState *state, THCudaDoubleTensor *grad_weight, THCudaDoubleTensor *grad_output, THCudaDoubleTensor *input, THCudaDoubleTensor *basis, THCudaLongTensor *weight_index);
#ifdef __cplusplus
}
#endif
#define TH_TENSOR_DIM_APPLY4(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, TYPE4, TENSOR4, DIMENSION, CODE) { \
TYPE1 *TENSOR1##_data = NULL; \
int64_t TENSOR1##_stride = 0, TENSOR1##_size = 0; \
TYPE2 *TENSOR2##_data = NULL; \
int64_t TENSOR2##_stride = 0, TENSOR2##_size = 0; \
TYPE3 *TENSOR3##_data = NULL; \
int64_t TENSOR3##_stride = 0, TENSOR3##_size = 0; \
TYPE4 *TENSOR4##_data = NULL; \
int64_t TENSOR4##_stride = 0, TENSOR4##_size = 0; \
\
int64_t *TH_TENSOR_DIM_APPLY_counter = NULL; \
int TH_TENSOR_DIM_APPLY_hasFinished = 0; \
int TH_TENSOR_DIM_APPLY_i; \
\
TH_TENSOR_DIM_APPLY_counter = (int64_t*)THAlloc(sizeof(int64_t)*(TENSOR1->nDimension)); \
\
for (TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) { \
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
} \
\
TENSOR1##_data = (TENSOR1)->storage->data+(TENSOR1)->storageOffset; \
TENSOR1##_stride = (TENSOR1)->stride[DIMENSION]; \
TENSOR1##_size = TENSOR1->size[DIMENSION]; \
\
TENSOR2##_data = (TENSOR2)->storage->data+(TENSOR2)->storageOffset; \
TENSOR2##_stride = (TENSOR2)->stride[DIMENSION]; \
TENSOR2##_size = TENSOR2->size[DIMENSION]; \
\
TENSOR3##_data = (TENSOR3)->storage->data+(TENSOR3)->storageOffset; \
TENSOR3##_stride = (TENSOR3)->stride[DIMENSION]; \
TENSOR3##_size = TENSOR3->size[DIMENSION]; \
\
TENSOR4##_data = (TENSOR4)->storage->data+(TENSOR4)->storageOffset; \
TENSOR4##_stride = (TENSOR4)->stride[DIMENSION]; \
TENSOR4##_size = TENSOR4->size[DIMENSION]; \
\
while (!TH_TENSOR_DIM_APPLY_hasFinished) { \
CODE \
\
if (TENSOR1->nDimension == 1) break; \
\
for (TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) { \
if (TH_TENSOR_DIM_APPLY_i == DIMENSION) { \
if (TH_TENSOR_DIM_APPLY_i == TENSOR1->nDimension-1) { \
TH_TENSOR_DIM_APPLY_hasFinished = 1; \
break; \
} \
continue; \
} \
\
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \
TENSOR1##_data += TENSOR1->stride[TH_TENSOR_DIM_APPLY_i]; \
TENSOR2##_data += TENSOR2->stride[TH_TENSOR_DIM_APPLY_i]; \
TENSOR3##_data += TENSOR3->stride[TH_TENSOR_DIM_APPLY_i]; \
TENSOR4##_data += TENSOR4->stride[TH_TENSOR_DIM_APPLY_i]; \
\
if (TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == TENSOR1->size[TH_TENSOR_DIM_APPLY_i]) { \
if (TH_TENSOR_DIM_APPLY_i == TENSOR1->nDimension-1) { \
TH_TENSOR_DIM_APPLY_hasFinished = 1; \
break; \
} \
else { \
TENSOR1##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR1->stride[TH_TENSOR_DIM_APPLY_i]; \
TENSOR2##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR2->stride[TH_TENSOR_DIM_APPLY_i]; \
TENSOR3##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR3->stride[TH_TENSOR_DIM_APPLY_i]; \
TENSOR4##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR4->stride[TH_TENSOR_DIM_APPLY_i]; \
TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
} \
} \
else break; \
} \
} \
THFree(TH_TENSOR_DIM_APPLY_counter); \
}
#include <TH/TH.h>
#include "THTensorDimApply.h"
#define spline_(NAME) TH_CONCAT_4(spline_, NAME, _, Real)
#define SPLINE_BASIS_FORWARD(M, basis, weight_index, pseudo, kernel_size, is_open_spline, K, CODE) { \
int64_t *kernel_size_data = kernel_size->storage->data + kernel_size->storageOffset; \
uint8_t *is_open_spline_data = is_open_spline->storage->data + is_open_spline->storageOffset; \
int64_t S = THLongTensor_size(weight_index, 1); \
int64_t D = THTensor_(size)(pseudo, 1); \
int64_t s, d, k, k_mod, wi, offset; real b, value; \
\
TH_TENSOR_DIM_APPLY3(real, basis, int64_t, weight_index, real, pseudo, 1, TH_TENSOR_DIM_APPLY3_SIZE_EQ_EXCEPT_DIM, \
for (s = 0; s < S; s++) { \
b = 1; wi = 0; k = s; offset = K; \
for (d = 0; d < D; d++) { \
offset /= kernel_size_data[d]; \
k_mod = k % (M + 1); \
k /= M + 1; \
value = *(pseudo_data + d * pseudo_stride) * (kernel_size_data[d] - M * is_open_spline_data[d]); \
wi += (((int64_t) value + k_mod) % kernel_size_data[d]) * offset; \
value -= floor(value); \
CODE \
b *= value; \
} \
basis_data[s * basis_stride] = b; \
weight_index_data[s * weight_index_stride] = wi; \
}) \
}
#define SPLINE_BASIS_BACKWARD(M, grad_pseudo, grad_basis, pseudo, kernel_size, is_open_spline, EVAL_CODE, GRAD_CODE) { \
int64_t *kernel_size_data = kernel_size->storage->data + kernel_size->storageOffset; \
uint8_t *is_open_spline_data = is_open_spline->storage->data + is_open_spline->storageOffset; \
int64_t D = THTensor_(size)(pseudo, 1); \
int64_t S = THTensor_(size)(grad_basis, 1); \
int64_t d, s, d_it, quotient, k_mod; real g_out, g, value;\
\
TH_TENSOR_DIM_APPLY3(real, grad_pseudo, real, grad_basis, real, pseudo, 1, TH_TENSOR_DIM_APPLY3_SIZE_EQ_EXCEPT_DIM, \
for (d = 0; d < D; d++) { \
g_out = 0; \
quotient = pow(M + 1, d); \
for (s = 0; s < S; s++) { \
k_mod = (s / quotient) % (M + 1); \
value = *(pseudo_data + d * pseudo_stride) * (kernel_size_data[d] - M * is_open_spline_data[d]); \
value -= floor(value); \
GRAD_CODE \
g = value; \
\
for (d_it = 0; d_it < d; d_it++) { \
k_mod = (s / (int64_t) pow(M + 1, d_it)) % (M + 1); \
value = *(pseudo_data + d_it * pseudo_stride) * (kernel_size_data[d_it] - M * is_open_spline_data[d_it]); \
value -= floor(value); \
EVAL_CODE \
g *= value; \
} \
for (d_it = d + 1; d_it < D; d_it++) { \
k_mod = (s / (int64_t) pow(M + 1, d_it)) % (M + 1); \
value = *(pseudo_data + d_it * pseudo_stride) * (kernel_size_data[d_it] - M * is_open_spline_data[d_it]); \
value -= floor(value); \
EVAL_CODE \
g *= value; \
} \
g_out += g * *(grad_basis_data + s * grad_basis_stride); \
} \
grad_pseudo_data[d * grad_pseudo_stride] = g_out * (kernel_size_data[d] - M * is_open_spline_data[d]); \
} \
) \
}
#define SPLINE_WEIGHTING(TENSOR1, TENSOR2, TENSOR3, weight_index, M_IN, M_OUT, M_S, CODE) { \
int64_t M_in = M_IN; int64_t M_out = M_OUT; int64_t S = M_S; \
int64_t m_in, m_out, s, w_idx; real value; \
TH_TENSOR_DIM_APPLY4(real, TENSOR1, real, TENSOR2, real, TENSOR3, int64_t, weight_index, 1, CODE) \
}
#include "generic/cpu.c"
#include "THGenerateFloatType.h"
#include "generic/cpu.c"
#include "THGenerateDoubleType.h"
void spline_linear_basis_forward_Float ( THFloatTensor *basis, THLongTensor *weight_index, THFloatTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline, int K);
void spline_linear_basis_forward_Double(THDoubleTensor *basis, THLongTensor *weight_index, THDoubleTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline, int K);
void spline_quadratic_basis_forward_Float ( THFloatTensor *basis, THLongTensor *weight_index, THFloatTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline, int K);
void spline_quadratic_basis_forward_Double(THDoubleTensor *basis, THLongTensor *weight_index, THDoubleTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline, int K);
void spline_cubic_basis_forward_Float ( THFloatTensor *basis, THLongTensor *weight_index, THFloatTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline, int K);
void spline_cubic_basis_forward_Double(THDoubleTensor *basis, THLongTensor *weight_index, THDoubleTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline, int K);
void spline_linear_basis_backward_Float ( THFloatTensor *grad_pseudo, THFloatTensor *grad_basis, THFloatTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline);
void spline_linear_basis_backward_Double(THDoubleTensor *grad_pseudo, THDoubleTensor *grad_basis, THDoubleTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline);
void spline_quadratic_basis_backward_Float ( THFloatTensor *grad_pseudo, THFloatTensor *grad_basis, THFloatTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline);
void spline_quadratic_basis_backward_Double(THDoubleTensor *grad_pseudo, THDoubleTensor *grad_basis, THDoubleTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline);
void spline_cubic_basis_backward_Float ( THFloatTensor *grad_pseudo, THFloatTensor *grad_basis, THFloatTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline);
void spline_cubic_basis_backward_Double(THDoubleTensor *grad_pseudo, THDoubleTensor *grad_basis, THDoubleTensor *pseudo, THLongTensor *kernel_size, THByteTensor *is_open_spline);
void spline_weighting_forward_Float ( THFloatTensor *output, THFloatTensor *input, THFloatTensor *weight, THFloatTensor *basis, THLongTensor *weight_index);
void spline_weighting_forward_Double(THDoubleTensor *output, THDoubleTensor *input, THDoubleTensor *weight, THDoubleTensor *basis, THLongTensor *weight_index);
void spline_weighting_backward_input_Float ( THFloatTensor *grad_input, THFloatTensor *grad_output, THFloatTensor *weight, THFloatTensor *basis, THLongTensor *weight_index);
void spline_weighting_backward_input_Double(THDoubleTensor *grad_input, THDoubleTensor *grad_output, THDoubleTensor *weight, THDoubleTensor *basis, THLongTensor *weight_index);
void spline_weighting_backward_basis_Float ( THFloatTensor *grad_basis, THFloatTensor *grad_output, THFloatTensor *input, THFloatTensor *weight, THLongTensor *weight_index);
void spline_weighting_backward_basis_Double(THDoubleTensor *grad_basis, THDoubleTensor *grad_output, THDoubleTensor *input, THDoubleTensor *weight, THLongTensor *weight_index);
void spline_weighting_backward_weight_Float ( THFloatTensor *grad_weight, THFloatTensor *grad_output, THFloatTensor *input, THFloatTensor *basis, THLongTensor *weight_index);
void spline_weighting_backward_weight_Double(THDoubleTensor *grad_weight, THDoubleTensor *grad_output, THDoubleTensor *input, THDoubleTensor *basis, THLongTensor *weight_index);
#include <THC/THC.h>
#include "kernel.h"
#define spline_(NAME) TH_CONCAT_4(spline_, NAME, _cuda_, Real)
#define spline_kernel_(NAME) TH_CONCAT_4(spline_, NAME, _kernel_, Real)
extern THCState *state;
#include "generic/cuda.c"
#include "THCGenerateFloatType.h"
#include "generic/cuda.c"
#include "THCGenerateDoubleType.h"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment