Commit 764cd87d authored by Jan Eric Lenssen's avatar Jan Eric Lenssen
Browse files

splineconv forward now approx. 10 times faster

parent 4b92ba74
...@@ -3,13 +3,105 @@ import torch ...@@ -3,13 +3,105 @@ import torch
from ....utils.cuda import (cuda_num_threads, Stream, Dtype, load_kernel, from ....utils.cuda import (cuda_num_threads, Stream, Dtype, load_kernel,
kernel_loop, get_blocks) kernel_loop, get_blocks)
_spline_kernel = kernel_loop + ''' _spline_kernel_linear = kernel_loop + '''
extern "C" extern "C"
__global__ void spline_kernel( __global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index, const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline) { const long* kernel_size, const long* is_open_spline, int num_threads) {
CUDA_KERNEL_LOOP(idx, ${num_threads}) { CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int bot;
int top;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K /= kernel_size[d_idx];
k_idx_mod = k_idx % 2;
k_idx >>= 1;
value = input[e_idx * ${dim} + d_idx] *
(kernel_size[d_idx] - is_open_spline[d_idx]);
frac = value - floor(value);
a *= (1 - k_idx_mod) * frac + k_idx_mod * (1 - frac);
bot = int(floor(value));
top = (bot + 1) % kernel_size[d_idx];
bot %= kernel_size[d_idx];
i += (k_idx_mod * bot + (1 - k_idx_mod) * top) * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
_spline_kernel_quadratic = kernel_loop + '''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int pos;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K /= kernel_size[d_idx];
k_idx_mod = k_idx % 3;
k_idx /= 3;
value = input[e_idx * ${dim} + d_idx] *
(kernel_size[d_idx] - (2 * is_open_spline[d_idx]));
frac = value - floor(value);
if (k_idx_mod == 0) a *= 0.5 * (1- frac) * (1-frac);
else if (k_idx_mod == 1) a *= -frac * frac + frac + 0.5;
else a *= 0.5 * frac * frac;
pos = int(floor(value)) + k_idx_mod;
pos %= kernel_size[d_idx];
i += pos * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
_spline_kernel_cubic = kernel_loop + '''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads}) {
const int e_idx = idx / ${k_max}; const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max}; int k_idx = idx % ${k_max};
...@@ -52,35 +144,47 @@ const long* kernel_size, const long* is_open_spline) { ...@@ -52,35 +144,47 @@ const long* kernel_size, const long* is_open_spline) {
} }
''' '''
def get_basis_kernel(k_max,K,dim,degree):
if degree==3:
_spline_kernel = _spline_kernel_cubic
elif degree==2:
_spline_kernel = _spline_kernel_quadratic
else:
_spline_kernel = _spline_kernel_linear
def spline_cubic_gpu(input, kernel_size, is_open_spline, K): cuda_tensor = torch.FloatTensor([1]).cuda()
with torch.cuda.device_of(cuda_tensor):
f = load_kernel(
'spline_kernel',
_spline_kernel,
Dtype='float',
k_max=k_max,
dim=dim,
K=K)
return f
def compute_spline_basis(input, kernel_size, is_open_spline, K, basis_kernel):
assert input.is_cuda and kernel_size.is_cuda and is_open_spline.is_cuda assert input.is_cuda and kernel_size.is_cuda and is_open_spline.is_cuda
input = input.unsqueeze(1) if len(input.size()) < 2 else input input = input.unsqueeze(1) if len(input.size()) < 2 else input
num_edges, dim = input.size() num_edges, dim = input.size()
k_max = 4**dim k_max = 2**dim
amount = input.new(num_edges, k_max) amount = input.new(num_edges, k_max)
index = input.new(num_edges, k_max).long() index = input.new(num_edges, k_max).long()
num_threads = amount.numel() num_threads = amount.numel()
with torch.cuda.device_of(input): with torch.cuda.device_of(input):
f = load_kernel( basis_kernel(block=(cuda_num_threads, 1, 1),
'spline_kernel',
_spline_kernel,
Dtype=Dtype(input),
num_threads=num_threads,
k_max=k_max,
dim=dim,
K=K)
f(block=(cuda_num_threads, 1, 1),
grid=(get_blocks(num_threads), 1, 1), grid=(get_blocks(num_threads), 1, 1),
args=[ args=[
input.data_ptr(), input.data_ptr(),
amount.data_ptr(), amount.data_ptr(),
index.data_ptr(), index.data_ptr(),
kernel_size.data_ptr(), kernel_size.data_ptr(),
is_open_spline.data_ptr() is_open_spline.data_ptr(),
num_threads
], ],
stream=Stream(ptr=torch.cuda.current_stream().cuda_stream)) stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
......
...@@ -4,11 +4,10 @@ if torch.cuda.is_available(): ...@@ -4,11 +4,10 @@ if torch.cuda.is_available():
from .edgewise_spline_weighting_gpu import EdgewiseSplineWeightingGPU from .edgewise_spline_weighting_gpu import EdgewiseSplineWeightingGPU
def edgewise_spline_weighting(input, weight, amount, index): def edgewise_spline_weighting(input, weight, amount, index, k_fw, k_bw):
if input.is_cuda: if input.is_cuda:
K, M_in, M_out = weight.size() K, M_in, M_out = weight.size()
k_max = amount.size(1) return EdgewiseSplineWeightingGPU(amount, index, K, M_in, M_out
return EdgewiseSplineWeightingGPU(amount, index, K, M_in, M_out, ,k_fw,k_bw)(input, weight)
k_max)(input, weight)
else: else:
raise NotImplementedError raise NotImplementedError
...@@ -95,9 +95,34 @@ const long* index, int num_threads) { ...@@ -95,9 +95,34 @@ const long* index, int num_threads) {
} }
''' '''
def get_forward_kernel(M_in,M_out,k_max):
cuda_tensor = torch.FloatTensor([1]).cuda()
with torch.cuda.device_of(cuda_tensor):
f_fw = load_kernel(
'edgewise_spline_weighting_forward_kernel',
_edgewise_spline_weighting_forward_kernel,
Dtype='float',
M_in=M_in,
M_out=M_out,
k_max=k_max)
return f_fw
def get_backward_kernel(M_in,M_out,k_max, K):
cuda_tensor = torch.FloatTensor([1]).cuda()
with torch.cuda.device_of(cuda_tensor):
f_bw = load_kernel(
'edgewise_spline_weighting_backward_kernel',
_edgewise_spline_weighting_backward_kernel,
Dtype='float',
M_in=M_in,
M_out=M_out,
k_max=k_max,
K=K)
return f_bw
class EdgewiseSplineWeightingGPU(Function): class EdgewiseSplineWeightingGPU(Function):
def __init__(self, amount, index, K, M_in, M_out, k_max): def __init__(self, amount, index, K, M_in, M_out, k_fw, k_bw):
super(EdgewiseSplineWeightingGPU, self).__init__() super(EdgewiseSplineWeightingGPU, self).__init__()
assert amount.is_cuda and index.is_cuda assert amount.is_cuda and index.is_cuda
self.amount = amount self.amount = amount
...@@ -105,23 +130,9 @@ class EdgewiseSplineWeightingGPU(Function): ...@@ -105,23 +130,9 @@ class EdgewiseSplineWeightingGPU(Function):
self.M_in = M_in self.M_in = M_in
self.M_out = M_out self.M_out = M_out
self.K = K self.K = K
with torch.cuda.device_of(amount): self.f_fw = k_fw
self.f_fw = load_kernel( self.f_bw = k_bw
'edgewise_spline_weighting_forward_kernel',
_edgewise_spline_weighting_forward_kernel,
Dtype=Dtype(amount),
M_in=M_in,
M_out=M_out,
k_max=k_max)
self.f_bw = load_kernel(
'edgewise_spline_weighting_backward_kernel',
_edgewise_spline_weighting_backward_kernel,
Dtype=Dtype(amount),
M_in=M_in,
M_out=M_out,
k_max=k_max,
K=K)
def forward(self, input, weight): def forward(self, input, weight):
assert input.is_cuda and weight.is_cuda assert input.is_cuda and weight.is_cuda
......
import torch import torch
if torch.cuda.is_available(): if torch.cuda.is_available():
from .spline_linear_gpu import spline_linear_gpu from .compute_spline_basis import compute_spline_basis
from .spline_quadratic_gpu import spline_quadratic_gpu from .spline_quadratic_gpu import spline_quadratic_gpu
from .spline_cubic_gpu import spline_cubic_gpu from .spline_cubic_gpu import spline_cubic_gpu
def spline(input, kernel_size, is_open_spline, K, degree): def spline(input, kernel_size, is_open_spline, K, degree, basis_kernel):
if input.is_cuda: if input.is_cuda:
if degree == 1: return compute_spline_basis(input, kernel_size, is_open_spline, K, basis_kernel)
return spline_linear_gpu(input, kernel_size, is_open_spline, K)
if degree == 2:
return spline_quadratic_gpu(input, kernel_size, is_open_spline, K)
if degree == 3:
return spline_cubic_gpu(input, kernel_size, is_open_spline, K)
else:
raise NotImplementedError()
else: else:
raise NotImplementedError() raise NotImplementedError()
...@@ -13,8 +13,11 @@ def spline_conv( ...@@ -13,8 +13,11 @@ def spline_conv(
kernel_size, kernel_size,
is_open_spline, is_open_spline,
K, K,
forward_kernel,
backward_kernel,
basis_kernel,
degree=1, degree=1,
bias=None): bias=None,):
if input.dim() == 1: if input.dim() == 1:
input = input.unsqueeze(1) input = input.unsqueeze(1)
...@@ -25,8 +28,8 @@ def spline_conv( ...@@ -25,8 +28,8 @@ def spline_conv(
# Get features for every end vertex with shape [|E| x M_in]. # Get features for every end vertex with shape [|E| x M_in].
output = input[col] output = input[col]
# Convert to [|E| x M_in] feature matrix and calculate [|E| x M_out]. # Convert to [|E| x M_in] feature matrix and calculate [|E| x M_out].
amount, index = spline(values, kernel_size, is_open_spline, K, degree) amount, index = spline(values, kernel_size, is_open_spline, K, degree, basis_kernel)
output = edgewise_spline_weighting(output, weight[:-1], amount, index) output = edgewise_spline_weighting(output, weight[:-1], amount, index, forward_kernel, backward_kernel)
# Convolution via `scatter_add`. Converts [|E| x M_out] feature matrix to # Convolution via `scatter_add`. Converts [|E| x M_out] feature matrix to
# [n x M_out] feature matrix. # [n x M_out] feature matrix.
......
...@@ -4,7 +4,8 @@ import torch ...@@ -4,7 +4,8 @@ import torch
from numpy.testing import assert_equal, assert_almost_equal from numpy.testing import assert_equal, assert_almost_equal
if torch.cuda.is_available(): if torch.cuda.is_available():
from .spline_cubic_gpu import spline_cubic_gpu from .compute_spline_basis import compute_spline_basis
from .compute_spline_basis import get_basis_kernel
class SplineQuadraticGPUTest(unittest.TestCase): class SplineQuadraticGPUTest(unittest.TestCase):
...@@ -13,8 +14,11 @@ class SplineQuadraticGPUTest(unittest.TestCase): ...@@ -13,8 +14,11 @@ class SplineQuadraticGPUTest(unittest.TestCase):
input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1]) input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1])
kernel_size = torch.cuda.LongTensor([7]) kernel_size = torch.cuda.LongTensor([7])
is_open_spline = torch.cuda.LongTensor([1]) is_open_spline = torch.cuda.LongTensor([1])
k_max = 4
a1, i1 = spline_cubic_gpu(input, kernel_size, is_open_spline, 7) K = 7
dim = 1
basis_kernel = get_basis_kernel(k_max, K, dim, 3)
a1, i1 = compute_spline_basis(input, kernel_size, is_open_spline, 7, basis_kernel)
a2 = [ a2 = [
[0.1667, 0.6667, 0.1667, 0], [0.1667, 0.6667, 0.1667, 0],
...@@ -36,8 +40,11 @@ class SplineQuadraticGPUTest(unittest.TestCase): ...@@ -36,8 +40,11 @@ class SplineQuadraticGPUTest(unittest.TestCase):
input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1]) input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1])
kernel_size = torch.cuda.LongTensor([4]) kernel_size = torch.cuda.LongTensor([4])
is_open_spline = torch.cuda.LongTensor([0]) is_open_spline = torch.cuda.LongTensor([0])
k_max = 4
a1, i1 = spline_cubic_gpu(input, kernel_size, is_open_spline, 4) K = 4
dim = 1
basis_kernel = get_basis_kernel(k_max, K, dim, 3)
a1, i1 = compute_spline_basis(input, kernel_size, is_open_spline, 4, basis_kernel)
a2 = [ a2 = [
[0.1667, 0.6667, 0.1667, 0], [0.1667, 0.6667, 0.1667, 0],
......
import torch
from ....utils.cuda import (cuda_num_threads, Stream, Dtype, load_kernel,
kernel_loop, get_blocks)
_spline_kernel = kernel_loop + '''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline) {
CUDA_KERNEL_LOOP(idx, ${num_threads}) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int bot;
int top;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K /= kernel_size[d_idx];
k_idx_mod = k_idx % 2;
k_idx >>= 1;
value = input[e_idx * ${dim} + d_idx] *
(kernel_size[d_idx] - is_open_spline[d_idx]);
frac = value - floor(value);
a *= (1 - k_idx_mod) * frac + k_idx_mod * (1 - frac);
bot = int(floor(value));
top = (bot + 1) % kernel_size[d_idx];
bot %= kernel_size[d_idx];
i += (k_idx_mod * bot + (1 - k_idx_mod) * top) * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
def spline_linear_gpu(input, kernel_size, is_open_spline, K):
assert input.is_cuda and kernel_size.is_cuda and is_open_spline.is_cuda
input = input.unsqueeze(1) if len(input.size()) < 2 else input
num_edges, dim = input.size()
k_max = 2**dim
amount = input.new(num_edges, k_max)
index = input.new(num_edges, k_max).long()
num_threads = amount.numel()
with torch.cuda.device_of(input):
f = load_kernel(
'spline_kernel',
_spline_kernel,
Dtype=Dtype(input),
num_threads=num_threads,
k_max=k_max,
dim=dim,
K=K)
f(block=(cuda_num_threads, 1, 1),
grid=(get_blocks(num_threads), 1, 1),
args=[
input.data_ptr(),
amount.data_ptr(),
index.data_ptr(),
kernel_size.data_ptr(),
is_open_spline.data_ptr()
],
stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
return amount, index
...@@ -4,7 +4,8 @@ import torch ...@@ -4,7 +4,8 @@ import torch
from numpy.testing import assert_equal, assert_almost_equal from numpy.testing import assert_equal, assert_almost_equal
if torch.cuda.is_available(): if torch.cuda.is_available():
from .spline_linear_gpu import spline_linear_gpu from .compute_spline_basis import compute_spline_basis
from .compute_spline_basis import get_basis_kernel
class SplineLinearGPUTest(unittest.TestCase): class SplineLinearGPUTest(unittest.TestCase):
...@@ -13,8 +14,11 @@ class SplineLinearGPUTest(unittest.TestCase): ...@@ -13,8 +14,11 @@ class SplineLinearGPUTest(unittest.TestCase):
input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1]) input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1])
kernel_size = torch.cuda.LongTensor([5]) kernel_size = torch.cuda.LongTensor([5])
is_open_spline = torch.cuda.LongTensor([1]) is_open_spline = torch.cuda.LongTensor([1])
k_max = 2
a1, i1 = spline_linear_gpu(input, kernel_size, is_open_spline, 5) K = 5
dim = 1
basis_kernel = get_basis_kernel(k_max, K, dim, 1)
a1, i1 = compute_spline_basis(input, kernel_size, is_open_spline, 5, basis_kernel)
a2 = [[0, 1], [0.2, 0.8], [0, 1], [0, 1], [0, 1], [0.8, 0.2], [0, 1]] a2 = [[0, 1], [0.2, 0.8], [0, 1], [0, 1], [0, 1], [0.8, 0.2], [0, 1]]
i2 = [[1, 0], [1, 0], [2, 1], [3, 2], [4, 3], [4, 3], [0, 4]] i2 = [[1, 0], [1, 0], [2, 1], [3, 2], [4, 3], [4, 3], [0, 4]]
...@@ -27,8 +31,11 @@ class SplineLinearGPUTest(unittest.TestCase): ...@@ -27,8 +31,11 @@ class SplineLinearGPUTest(unittest.TestCase):
input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1]) input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1])
kernel_size = torch.cuda.LongTensor([4]) kernel_size = torch.cuda.LongTensor([4])
is_open_spline = torch.cuda.LongTensor([0]) is_open_spline = torch.cuda.LongTensor([0])
k_max = 2
a1, i1 = spline_linear_gpu(input, kernel_size, is_open_spline, 4) K = 4
dim = 1
basis_kernel = get_basis_kernel(k_max, K, dim, 1)
a1, i1 = compute_spline_basis(input, kernel_size, is_open_spline, 4, basis_kernel)
a2 = [[0, 1], [0.2, 0.8], [0, 1], [0, 1], [0, 1], [0.8, 0.2], [0, 1]] a2 = [[0, 1], [0.2, 0.8], [0, 1], [0, 1], [0, 1], [0.8, 0.2], [0, 1]]
i2 = [[1, 0], [1, 0], [2, 1], [3, 2], [0, 3], [0, 3], [1, 0]] i2 = [[1, 0], [1, 0], [2, 1], [3, 2], [0, 3], [0, 3], [1, 0]]
......
import torch
from ....utils.cuda import (cuda_num_threads, Stream, Dtype, load_kernel,
kernel_loop, get_blocks)
_spline_kernel = kernel_loop + '''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline) {
CUDA_KERNEL_LOOP(idx, ${num_threads}) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int pos;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K /= kernel_size[d_idx];
k_idx_mod = k_idx % 3;
k_idx /= 3;
value = input[e_idx * ${dim} + d_idx] *
(kernel_size[d_idx] - (2 * is_open_spline[d_idx]));
frac = value - floor(value);
if (k_idx_mod == 0) a *= 0.5 * (1- frac) * (1-frac);
else if (k_idx_mod == 1) a *= -frac * frac + frac + 0.5;
else a *= 0.5 * frac * frac;
pos = int(floor(value)) + k_idx_mod;
pos %= kernel_size[d_idx];
i += pos * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
def spline_quadratic_gpu(input, kernel_size, is_open_spline, K):
assert input.is_cuda and kernel_size.is_cuda and is_open_spline.is_cuda
input = input.unsqueeze(1) if len(input.size()) < 2 else input
num_edges, dim = input.size()
k_max = 3**dim
amount = input.new(num_edges, k_max)
index = input.new(num_edges, k_max).long()
num_threads = amount.numel()
with torch.cuda.device_of(input):
f = load_kernel(
'spline_kernel',
_spline_kernel,
Dtype=Dtype(input),
num_threads=num_threads,
k_max=k_max,
dim=dim,
K=K)
f(block=(cuda_num_threads, 1, 1),
grid=(get_blocks(num_threads), 1, 1),
args=[
input.data_ptr(),
amount.data_ptr(),
index.data_ptr(),
kernel_size.data_ptr(),
is_open_spline.data_ptr()
],
stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
return amount, index
...@@ -4,7 +4,8 @@ import torch ...@@ -4,7 +4,8 @@ import torch
from numpy.testing import assert_equal, assert_almost_equal from numpy.testing import assert_equal, assert_almost_equal
if torch.cuda.is_available(): if torch.cuda.is_available():
from .spline_quadratic_gpu import spline_quadratic_gpu from .compute_spline_basis import compute_spline_basis
from .compute_spline_basis import get_basis_kernel
class SplineQuadraticGPUTest(unittest.TestCase): class SplineQuadraticGPUTest(unittest.TestCase):
...@@ -13,8 +14,12 @@ class SplineQuadraticGPUTest(unittest.TestCase): ...@@ -13,8 +14,12 @@ class SplineQuadraticGPUTest(unittest.TestCase):
input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1]) input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1])
kernel_size = torch.cuda.LongTensor([6]) kernel_size = torch.cuda.LongTensor([6])
is_open_spline = torch.cuda.LongTensor([1]) is_open_spline = torch.cuda.LongTensor([1])
k_max = 3
K = 6
dim=1
basis_kernel = get_basis_kernel(k_max,K,dim,2)
a1, i1 = spline_quadratic_gpu(input, kernel_size, is_open_spline, 6) a1, i1 = compute_spline_basis(input, kernel_size, is_open_spline, 6, basis_kernel)
a2 = [[0.5, 0.5, 0], [0.32, 0.66, 0.02], [0.5, 0.5, 0], [0.5, 0.5, 0], a2 = [[0.5, 0.5, 0], [0.32, 0.66, 0.02], [0.5, 0.5, 0], [0.5, 0.5, 0],
[0.5, 0.5, 0], [0.02, 0.66, 0.32], [0.5, 0.5, 0]] [0.5, 0.5, 0], [0.02, 0.66, 0.32], [0.5, 0.5, 0]]
...@@ -29,8 +34,11 @@ class SplineQuadraticGPUTest(unittest.TestCase): ...@@ -29,8 +34,11 @@ class SplineQuadraticGPUTest(unittest.TestCase):
input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1]) input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1])
kernel_size = torch.cuda.LongTensor([4]) kernel_size = torch.cuda.LongTensor([4])
is_open_spline = torch.cuda.LongTensor([0]) is_open_spline = torch.cuda.LongTensor([0])
k_max = 3
a1, i1 = spline_quadratic_gpu(input, kernel_size, is_open_spline, 4) K = 4
dim = 1
basis_kernel = get_basis_kernel(k_max, K, dim, 2)
a1, i1 = compute_spline_basis(input, kernel_size, is_open_spline, 4, basis_kernel)
a2 = [[0.5, 0.5, 0], [0.32, 0.66, 0.02], [0.5, 0.5, 0], [0.5, 0.5, 0], a2 = [[0.5, 0.5, 0], [0.32, 0.66, 0.02], [0.5, 0.5, 0], [0.5, 0.5, 0],
[0.5, 0.5, 0], [0.02, 0.66, 0.32], [0.5, 0.5, 0]] [0.5, 0.5, 0], [0.02, 0.66, 0.32], [0.5, 0.5, 0]]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment