"encoding/kernel/generic/syncbn_kernel.h" did not exist on "aa9af7fd12da3fe58d17fb758851eb8022af290f"
Commit 952a4cad authored by rusty1s's avatar rusty1s
Browse files

removed old version

parent b189dba7
'''
import unittest
import torch
from torch.autograd import Variable, gradcheck
from numpy.testing import assert_equal
from .spline import spline
if torch.cuda.is_available():
from .edgewise_spline_weighting_gpu import EdgewiseSplineWeightingGPU
class EdgewiseSplineWeightingGPUTest(unittest.TestCase):
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_forward(self):
input = [[0.25, 0.125], [0.25, 0.375], [0.75, 0.625], [0.75, 0.875]]
input = torch.cuda.FloatTensor(input)
kernel_size = torch.cuda.LongTensor([3, 4])
is_open_spline = torch.cuda.LongTensor([1, 0])
amount, index = spline(input, kernel_size, is_open_spline, 12, 1)
input = torch.cuda.FloatTensor([[1, 2], [3, 4], [5, 6], [7, 8]])
weight = torch.arange(0.5, 0.5 * 25, step=0.5).view(12, 2, 1).cuda()
input, weight = Variable(input), Variable(weight)
op = EdgewiseSplineWeightingGPU(amount, index)
out = op(input, weight)
expected_out = [
[0.25 * (1 * (0.5 + 1.5 + 4.5 + 5.5) + 2 * (1 + 2 + 5 + 6))],
[0.25 * (3 * (1.5 + 2.5 + 5.5 + 6.5) + 4 * (2 + 3 + 6 + 7))],
[0.25 * (5 * (6.5 + 7.5 + 10.5 + 11.5) + 6 * (7 + 8 + 11 + 12))],
[0.25 * (7 * (4.5 + 7.5 + 8.5 + 11.5) + 8 * (5 + 8 + 9 + 12))],
]
assert_equal(out.cpu().data.numpy(), expected_out)
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_backward(self):
input = [[0.25, 0.125], [0.25, 0.375], [0.75, 0.625], [0.75, 0.875]]
input = torch.cuda.DoubleTensor(input)
kernel_size = torch.cuda.LongTensor([3, 4])
is_open_spline = torch.cuda.LongTensor([1, 0])
amount, index = spline(input, kernel_size, is_open_spline, 12, 1)
input = torch.randn(4, 2).double().cuda()
weight = torch.randn(12, 2, 1).double().cuda()
input = Variable(input, requires_grad=True)
weight = Variable(weight, requires_grad=True)
op = EdgewiseSplineWeightingGPU(amount, index)
test = gradcheck(op, (input, weight), eps=1e-6, atol=1e-4)
self.assertTrue(test)
'''
import torch
from torch.autograd import Variable
from .spline_conv_gpu import SplineConvGPU
def spline_conv(
adj, # Pytorch Tensor (!bp_to_adj) or Pytorch Variable (bp_to_adj)
input, # Pytorch Variable
weight, # Pytorch Variable
kernel_size, # Rest tensors or python variables
is_open_spline,
K,
weighting_kernel,
weighting_backward_kernel,
basis_kernel,
basis_backward_kernel=None,
degree=1,
bias=None):
if input.dim() == 1:
input = input.unsqueeze(1)
values = adj['values']
row, col = adj['indices']
# Get features for every end vertex with shape [|E| x M_in].
output = input[col]
bp_to_adj = False if torch.is_tensor(values) else True
# Convert to [|E| x M_in] feature matrix and calculate [|E| x M_out].
if output.is_cuda:
if bp_to_adj:
output = SplineConvGPU(kernel_size, is_open_spline, K, degree,
basis_kernel, basis_backward_kernel,
weighting_kernel, weighting_backward_kernel,
bp_to_adj)(output, weight[:-1], values)
else:
output = SplineConvGPU(kernel_size, is_open_spline, K, degree,
basis_kernel, basis_backward_kernel,
weighting_kernel, weighting_backward_kernel,
bp_to_adj, values)(output, weight[:-1])
else:
# CPU Implementation not available
raise NotImplementedError()
# Convolution via `scatter_add`. Converts [|E| x M_out] feature matrix to
# [n x M_out] feature matrix.
zero = output.data.new(adj['size'][1], output.size(1)).fill_(0.0)
zero = Variable(zero) if not torch.is_tensor(output) else zero
r = row.view(-1, 1).expand(row.size(0), output.size(1))
output = zero.scatter_add_(0, Variable(r), output)
# Weighten root node features by multiplying with root weight.
output += torch.mm(input, weight[-1])
# Normalize output by degree.
ones = output.data.new(values.size(0)).fill_(1)
zero = output.data.new(output.size(0)).fill_(0)
degree = zero.scatter_add_(0, row, ones)
degree = torch.clamp(degree, min=1)
output = output / Variable(degree.view(-1, 1))
if bias is not None:
output += bias
return output
from __future__ import division
import unittest
import torch
from torch.autograd import Variable, gradcheck
from .spline_conv_gpu import (get_basis_kernel, get_basis_backward_kernel,
get_weighting_forward_kernel,
get_weighting_backward_kernel, SplineConvGPU)
class SplineConvTest(unittest.TestCase):
'''
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_forward_gpu(self):
edges = torch.LongTensor([[0, 0, 0, 0], [1, 2, 3, 4]])
values = [[0.25, 0.125], [0.25, 0.375], [0.75, 0.625], [0.75, 0.875]]
values = torch.FloatTensor(values).double()
adj = {'indices': edges.cuda(), 'values': Variable(values.cuda()),
'size': torch.Size([5, 5, 2])}
kernel_size = torch.cuda.LongTensor([3, 4])
is_open_spline = torch.cuda.LongTensor([1, 0])
input = torch.DoubleTensor([[9, 10], [1, 2], [3, 4], [5, 6], [7, 8]])
weight = torch.arange(0.5, 0.5 * 27, step=0.5).view(13, 2, 1).double()
input, weight = input.cuda(), weight.cuda()
input, weight = Variable(input), Variable(weight)
row, col = adj['indices']
output = input[col]
K = 12
in_features = 2
out_features = 1
degree = 1
dim = 2
k_max = (degree+1)**dim
fw_k = get_weighting_forward_kernel(in_features, out_features, k_max)
bw_k = get_weighting_backward_kernel(in_features, out_features, k_max,
K, True)
basis_fw_k = get_basis_kernel(k_max, K, dim, degree)
basis_bw_k = get_basis_backward_kernel(k_max, K, dim, degree)
#output = spline_conv(
# adj, input, weight, kernel_size, is_open_spline, K, fw_k, bw_k,
# basis_fw_k, basis_bw_k,bp_to_adj=True)
values = adj['values']
output = SplineConvGPU(kernel_size, is_open_spline, K, degree,
basis_fw_k, basis_bw_k, fw_k, bw_k, bp_to_adj=True)\
(output, weight, values)
zero = output.data.new(adj['size'][1], output.size(1)).fill_(0.0)
zero = Variable(zero) if not torch.is_tensor(output) else zero
r = row.view(-1, 1).expand(row.size(0), output.size(1))
output = zero.scatter_add_(0, Variable(r), output)
# Weighten root node features by multiplying with root weight.
output += torch.mm(input, weight[-1])
# Normalize output by degree.
ones = values.data.new(values.size(0)).fill_(1)
zero = values.data.new(output.size(0)).fill_(0)
degree = zero.scatter_add_(0, row, ones)
degree = torch.clamp(degree, min=1)
output = output / Variable(degree.view(-1, 1))
expected_output = [
[(12.5 * 9 + 13 * 10 + 266) / 4],
[12.5 * 1 + 13 * 2],
[12.5 * 3 + 13 * 4],
[12.5 * 5 + 13 * 6],
[12.5 * 7 + 13 * 8],
]
assert_almost_equal(output.cpu().data.numpy(), expected_output, 1)
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_backward(self):
kernel_size = torch.cuda.LongTensor([3, 4])
is_open_spline = torch.cuda.LongTensor([1, 1])
input = torch.randn(4, 2).double().cuda()
weight = torch.randn(12, 2, 1).double().cuda()
values = torch.FloatTensor(4, 2).uniform_(0, 1).double().cuda()
print(values)
input = Variable(input, requires_grad=True)
weight = Variable(weight, requires_grad=True)
values = Variable(values, requires_grad=True)
K = 12
in_features = 2
out_features = 1
degree = 1
dim = 2
k_max = (degree + 1) ** dim
fw_k = get_weighting_forward_kernel(in_features, out_features, k_max)
bw_k = get_weighting_backward_kernel(in_features, out_features, k_max,
K, bp_to_adj=True)
basis_fw_k = get_basis_kernel(k_max, K, dim, degree)
basis_bw_k = get_basis_backward_kernel(k_max, K, dim, degree)
op = SplineConvGPU(kernel_size, is_open_spline, K, degree,
basis_fw_k, basis_bw_k, fw_k, bw_k, bp_to_adj=True)
print(op(input, weight, values))
#test = gradcheck(op, (input, weight, values), eps=1e-6, atol=1e-4)
#self.assertTrue(test)
'''
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_backward(self):
input = torch.randn(4, 2).double().cuda()
weight = torch.randn(9, 2, 1).double().cuda()
values = torch.FloatTensor(4, 2).uniform_(0, 1).double().cuda()
print(values)
input = Variable(input, requires_grad=True)
weight = Variable(weight, requires_grad=True)
values = Variable(values, requires_grad=True)
K = 9
in_features = 2
out_features = 1
degree = 1
dim = 2
k_max = (degree + 1)**dim
kernel_size = torch.cuda.LongTensor([3, 3])
is_open_spline = torch.cuda.LongTensor([1, 0])
fw_k = get_weighting_forward_kernel(
in_features, out_features, k_max, dtype='double')
bw_k = get_weighting_backward_kernel(
in_features, out_features, k_max, K, True, dtype='double')
basis_fw_k = get_basis_kernel(k_max, K, dim, degree, dtype='double')
basis_bw_k = get_basis_backward_kernel(
k_max, K, dim, degree, dtype='double')
op = SplineConvGPU(
kernel_size,
is_open_spline,
K,
degree,
basis_fw_k,
basis_bw_k,
fw_k,
bw_k,
bp_to_adj=True)
test = gradcheck(op, (input, weight, values), eps=1e-6, atol=1e-4)
self.assertTrue(test)
import torch
from torch.autograd import Function
from ....utils.cuda import (cuda_num_threads, Stream, load_kernel, kernel_loop,
get_blocks)
_edgewise_spline_weighting_forward_kernel = kernel_loop + '''
extern "C"
__global__ void edgewise_spline_weighting_forward_kernel(
const ${Dtype}* input, const ${Dtype}* weight, ${Dtype}* output,
const ${Dtype}* amount, const long* index, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${M_out};
const int m_out_idx = idx % ${M_out};
${Dtype} result = 0.0;
${Dtype} w;
${Dtype} f;
int k;
${Dtype} b;
long c;
long w_idx;
for (int k_idx = 0; k_idx < ${k_max}; k_idx++) {
k = e_idx * ${k_max} + k_idx;
b = amount[k];
c = index[k];
for (int m_in_idx = 0; m_in_idx < ${M_in}; m_in_idx++) {
w_idx = c * ${M_out} * ${M_in} +
m_in_idx * ${M_out} +
m_out_idx;
w = weight[w_idx];
f = input[e_idx * ${M_in} + m_in_idx];
result += b * w * f;
}
}
output[idx] = result;
}
}
'''
_edgewise_spline_weighting_backward_kernel = kernel_loop + '''
extern "C"
__global__ void edgewise_spline_weighting_backward_kernel(
const ${Dtype}* grad_output, ${Dtype}* grad_input, ${Dtype}* grad_weight,
const ${Dtype}* input, const ${Dtype}* weight, const ${Dtype}* amount,
const long* index, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${M_out};
const int m_out_idx = idx % ${M_out};
${Dtype} w;
${Dtype} g;
${Dtype} f;
${Dtype} w_grad;
int k;
${Dtype} b;
long c;
long w_idx;
for (int k_idx = 0; k_idx < ${k_max}; k_idx++) {
k = e_idx * ${k_max} + k_idx;
b = amount[k];
c = index[k];
for (int m_in_idx = 0; m_in_idx < ${M_in}; m_in_idx++) {
w_idx = c * ${M_out} * ${M_in} +
m_in_idx * ${M_out} +
m_out_idx;
w = weight[w_idx];
// Calculate input gradient.
g = grad_output[e_idx * ${M_out} + m_out_idx];
atomicAdd(&(grad_input[e_idx * ${M_in} + m_in_idx]), b * w * g);
// This is inefficient: `reduce_sum` shouldn't be done like this.
// Looping over `M_out` would be better to avoid the `atomicAdd`.
// Calculate weight gradient.
f = input[e_idx * ${M_in} + m_in_idx];
w_grad = f * b * g;
atomicAdd(&(grad_weight[w_idx]), w_grad);
// Not so efficient either, but not avoidable.
}
}
}
}
'''
_edgewise_spline_weighting_backward_kernel_bp2adj = kernel_loop + '''
extern "C"
__global__ void edgewise_spline_weighting_backward_kernel(
const ${Dtype}* grad_output, ${Dtype}* grad_input, ${Dtype}* grad_weight,
${Dtype}* grad_amount, const ${Dtype}* input, const ${Dtype}* weight,
const ${Dtype}* amount, const long* index, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${M_out};
const int m_out_idx = idx % ${M_out};
${Dtype} w;
${Dtype} g;
${Dtype} f;
${Dtype} w_grad;
int k;
${Dtype} b;
long c;
long w_idx;
for (int k_idx = 0; k_idx < ${k_max}; k_idx++) {
k = e_idx * ${k_max} + k_idx;
b = amount[k];
c = index[k];
${Dtype} adj_g = 0.0;
for (int m_in_idx = 0; m_in_idx < ${M_in}; m_in_idx++) {
w_idx = c * ${M_out} * ${M_in} +
m_in_idx * ${M_out} +
m_out_idx;
w = weight[w_idx];
// Calculate input gradient.
g = grad_output[e_idx * ${M_out} + m_out_idx];
atomicAdd(&(grad_input[e_idx * ${M_in} + m_in_idx]), b * w * g);
// This is inefficient: `reduce_sum` shouldn't be done like this.
// Looping over `M_out` would be better to avoid the `atomicAdd`.
// Calculate weight gradient.
f = input[e_idx * ${M_in} + m_in_idx];
w_grad = f * b * g;
atomicAdd(&(grad_weight[w_idx]), w_grad);
// Not so efficient either, but not avoidable.
// Calculate B-spline basis tensor product gradient
adj_g += g * f * w;
}
atomicAdd(&(grad_amount[e_idx*${k_max} + k_idx]), adj_g);
}
}
}
'''
def get_weighting_forward_kernel(M_in, M_out, k_max, dtype='float'):
cuda_tensor = torch.FloatTensor([1]).cuda()
kernel = _edgewise_spline_weighting_forward_kernel
with torch.cuda.device_of(cuda_tensor):
f_fw = load_kernel(
'edgewise_spline_weighting_forward_kernel',
kernel,
Dtype=dtype,
M_in=M_in,
M_out=M_out,
k_max=k_max)
return f_fw
def get_weighting_backward_kernel(M_in,
M_out,
k_max,
K,
bp_to_adj=False,
dtype='float'):
cuda_tensor = torch.FloatTensor([1]).cuda()
if bp_to_adj:
kernel = _edgewise_spline_weighting_backward_kernel_bp2adj
else:
kernel = _edgewise_spline_weighting_backward_kernel
with torch.cuda.device_of(cuda_tensor):
f_bw = load_kernel(
'edgewise_spline_weighting_backward_kernel',
kernel,
Dtype=dtype,
M_in=M_in,
M_out=M_out,
k_max=k_max,
K=K)
return f_bw
_spline_kernel_linear = kernel_loop + '''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int bot;
int top;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K/=kernel_size[d_idx];
k_idx_mod = k_idx % 2;
k_idx >>= 1;
value = input[e_idx * ${dim} + d_idx];
value *= kernel_size[d_idx] - is_open_spline[d_idx];
frac = value - floor(value);
a *= (1 - k_idx_mod) * (1 - frac) + k_idx_mod * frac;
bot = int(floor(value));
top = (bot + 1) % kernel_size[d_idx];
bot %= kernel_size[d_idx];
i += ((1 - k_idx_mod) * bot + k_idx_mod * top) * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
_spline_kernel_quadratic = kernel_loop + '''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int pos;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K /= kernel_size[d_idx];
k_idx_mod = k_idx % 3;
k_idx /= 3;
value = input[e_idx * ${dim} + d_idx] *
(kernel_size[d_idx] - (2 * is_open_spline[d_idx]));
frac = value - floor(value);
if (k_idx_mod == 0) a *= 0.5 * (1- frac) * (1-frac);
else if (k_idx_mod == 1) a *= -frac * frac + frac + 0.5;
else a *= 0.5 * frac * frac;
pos = int(floor(value)) + k_idx_mod;
pos %= kernel_size[d_idx];
i += pos * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
_spline_kernel_cubic = kernel_loop + '''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int pos;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K /= kernel_size[d_idx];
k_idx_mod = k_idx % 4;
k_idx /= 4;
value = input[e_idx * ${dim} + d_idx] *
(kernel_size[d_idx] - (3 * is_open_spline[d_idx]));
frac = value - floor(value);
if (k_idx_mod == 0) a *= (1 - frac) * (1 - frac) * (1 - frac) / 6.0;
else if (k_idx_mod == 1)
a *= (3 * frac * frac * frac - 6 * frac * frac + 4) / 6.0;
else if (k_idx_mod == 2)
a *= (-3 * frac * frac * frac + 3 * frac * frac + 3 * frac + 1) / 6.0;
else a *= frac * frac * frac / 6.0;
pos = int(floor(value)) + k_idx_mod;
pos %= kernel_size[d_idx];
i += pos * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
# This is the efficient version which uses amount but may divide by 0 and
# may be numerically unstable.
# No solution for this yet, use the less efficient version 2.
_spline_kernel_linear_backward = kernel_loop + '''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, const ${Dtype}* grad_amount, ${Dtype}* amount,
${Dtype}* grad_adj, const long* kernel_size, const long* is_open_spline,
int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${dim};
int d_idx = idx % ${dim};
int k_idx_mod;
${Dtype} value;
${Dtype} frac;
${Dtype} grad_out = 0.0;
int quotient = (int)pow(2.0,(double)d_idx);
value = input[e_idx * ${dim} + d_idx];
value *= kernel_size[d_idx] - is_open_spline[d_idx];
frac = value - floor(value);
for (int k_idx = 0; k_idx < ${k_max}; k_idx++) {
k_idx_mod = (k_idx/quotient) % 2;
int a_idx = e_idx*${k_max} + k_idx;
${Dtype} residual = - (1 - k_idx_mod) * (1 - frac) + k_idx_mod * frac;
grad_out += grad_amount[a_idx]*amount[a_idx]/residual;
}
grad_adj[idx] = grad_out*(kernel_size[d_idx] - is_open_spline[d_idx]);
}
}
/*
${Dtype} a = -(1 - k_idx_mod) + k_idx_mod;
for (int d_it = 0; d_it < ${dim}; d_it++) {
if(d_it!=d_idx)
{
value = input[e_idx * ${dim} + d_it];
value *= kernel_size[d_it] - is_open_spline[d_it];
frac = value - floor(value);
a *= (1 - k_idx_mod) * (1 - frac) + k_idx_mod * frac;
}
}
grad_out += a*grad_amount[a_idx];
*/
'''
# This is the inefficient version with gradient computation without amount.
_spline_kernel_linear_backward2 = kernel_loop + '''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, const ${Dtype}* grad_amount, ${Dtype}* amount,
${Dtype}* grad_adj, const long* kernel_size, const long* is_open_spline,
int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${dim};
int d_idx = idx % ${dim};
int k_idx_mod;
${Dtype} value;
${Dtype} frac;
${Dtype} grad_out = 0.0;
int quotient = (int)pow(2.0,(double)d_idx);
for (int k_idx = 0; k_idx < ${k_max}; k_idx++) {
k_idx_mod = (k_idx/quotient) % 2;
int a_idx = e_idx*${k_max} + k_idx;
${Dtype} a = -(1 - k_idx_mod) + k_idx_mod;
for (int d_it = 0; d_it < ${dim}; d_it++) {
if(d_it!=d_idx)
{
int quotient = (int)pow(2.0,(double)d_it);
k_idx_mod = (k_idx/quotient) % 2;
value = input[e_idx * ${dim} + d_it];
value *= kernel_size[d_it] - is_open_spline[d_it];
frac = value - floor(value);
a *= (1 - k_idx_mod) * (1 - frac) + k_idx_mod * frac;
}
}
grad_out += a*grad_amount[a_idx];
}
grad_adj[idx] = grad_out*(kernel_size[d_idx] - is_open_spline[d_idx]);
}
}
'''
def get_basis_kernel(k_max, K, dim, degree, dtype='float'):
if degree == 3:
_spline_kernel = _spline_kernel_cubic
elif degree == 2:
_spline_kernel = _spline_kernel_quadratic
else:
_spline_kernel = _spline_kernel_linear
cuda_tensor = torch.FloatTensor([1]).cuda()
with torch.cuda.device_of(cuda_tensor):
f = load_kernel(
'spline_kernel',
_spline_kernel,
Dtype=dtype,
k_max=k_max,
dim=dim,
K=K)
return f
def get_basis_backward_kernel(k_max, K, dim, degree, dtype='float'):
if degree == 3:
_spline_kernel = _spline_kernel_linear_backward2
elif degree == 2:
_spline_kernel = _spline_kernel_linear_backward2
else:
_spline_kernel = _spline_kernel_linear_backward2
cuda_tensor = torch.FloatTensor([1]).cuda()
with torch.cuda.device_of(cuda_tensor):
f = load_kernel(
'spline_kernel',
_spline_kernel,
Dtype=dtype,
k_max=k_max,
dim=dim,
K=K)
return f
class SplineConvGPU(Function):
def __init__(self,
kernel_size,
is_open_spline,
K,
degree,
basis_kernel,
basis_backward_kernel,
weighting_kernel,
weighting_backward_kernel,
bp_to_adj=False,
adj_values=None):
super(SplineConvGPU, self).__init__()
self.degree = degree
self.f_weighting_fw = weighting_kernel
self.f_weighting_bw = weighting_backward_kernel
self.kernel_size = kernel_size
self.is_open_spline = is_open_spline
self.f_basis_fw = basis_kernel
self.f_basis_bw = basis_backward_kernel
self.bp_to_adj = bp_to_adj
self.adj_values = adj_values
def forward(self, input, weight, adj_values=None):
assert input.is_cuda and weight.is_cuda
self.K, self.M_in, self.M_out = weight.size()
# If bp_to_u is false
if adj_values is None:
adj_values = self.adj_values
# Compute B-spline basis tensor products
adj_values = adj_values.unsqueeze(1) if len(adj_values.size()) < 2 \
else adj_values
if self.bp_to_adj:
self.save_for_backward(input, weight, adj_values)
# adj_values = torch.clamp(adj_values,min=0.0,max=1.0)
else:
self.save_for_backward(input, weight)
num_edges, dim = adj_values.size()
k_max = (self.degree + 1)**dim
amount = adj_values.new(num_edges, k_max)
index = adj_values.new(num_edges, k_max).long()
num_threads = amount.numel()
with torch.cuda.device_of(input):
self.f_basis_fw(
block=(cuda_num_threads, 1, 1),
grid=(get_blocks(num_threads), 1, 1),
args=[
adj_values.data_ptr(),
amount.data_ptr(),
index.data_ptr(),
self.kernel_size.data_ptr(),
self.is_open_spline.data_ptr(), num_threads
],
stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
# Weight features
output = input.new(input.size(0), self.M_out)
num_threads = output.numel()
with torch.cuda.device_of(input):
self.f_weighting_fw(
block=(cuda_num_threads, 1, 1),
grid=(get_blocks(num_threads), 1, 1),
args=[
input.data_ptr(),
weight.data_ptr(),
output.data_ptr(),
amount.data_ptr(),
index.data_ptr(), num_threads
],
stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
self.amount = amount
self.index = index
return output
def backward(self, grad_output):
grad_input = grad_output.new(grad_output.size(0), self.M_in).fill_(0)
grad_weight = grad_output.new(self.K, self.M_in, self.M_out).fill_(0)
num_threads = grad_output.numel()
if self.bp_to_adj:
if self.degree == 2 or self.degree == 3:
print('Backward to u for degree>1 not implemented!')
raise NotImplementedError
input, weight, adj_values = self.saved_tensors
# adj_values = torch.clamp(adj_values,min=0.0,max=1.0)
amount = self.amount
index = self.index
grad_amount = grad_output.new(amount.size(0),
amount.size(1)).fill_(0)
with torch.cuda.device_of(grad_output):
self.f_weighting_bw(
block=(cuda_num_threads, 1, 1),
grid=(get_blocks(num_threads), 1, 1),
args=[
grad_output.data_ptr(),
grad_input.data_ptr(),
grad_weight.data_ptr(),
grad_amount.data_ptr(),
input.data_ptr(),
weight.data_ptr(),
amount.data_ptr(),
index.data_ptr(), num_threads
],
stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
grad_adj = grad_amount.new(
grad_amount.size(0), self.kernel_size.size(0)).fill_(0)
num_threads = grad_adj.numel()
with torch.cuda.device_of(grad_amount):
self.f_basis_bw(
block=(cuda_num_threads, 1, 1),
grid=(get_blocks(num_threads), 1, 1),
args=[
adj_values.data_ptr(),
grad_amount.data_ptr(),
amount.data_ptr(),
grad_adj.data_ptr(),
self.kernel_size.data_ptr(),
self.is_open_spline.data_ptr(), num_threads
],
stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
return grad_input, grad_weight, grad_adj
else:
input, weight = self.saved_tensors
amount = self.amount
index = self.index
grad_amount = grad_output.new(amount.size(0),
amount.size(1)).fill_(0)
with torch.cuda.device_of(grad_output):
self.f_weighting_bw(
block=(cuda_num_threads, 1, 1),
grid=(get_blocks(num_threads), 1, 1),
args=[
grad_output.data_ptr(),
grad_input.data_ptr(),
grad_weight.data_ptr(),
grad_amount.data_ptr(),
input.data_ptr(),
weight.data_ptr(),
amount.data_ptr(),
index.data_ptr(), num_threads
],
stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
return grad_input, grad_weight, None
'''
import unittest
import torch
from numpy.testing import assert_equal, assert_almost_equal
if torch.cuda.is_available():
from .compute_spline_basis import compute_spline_basis
from .compute_spline_basis import get_basis_kernel
class SplineQuadraticGPUTest(unittest.TestCase):
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_open_spline(self):
input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1])
kernel_size = torch.cuda.LongTensor([7])
is_open_spline = torch.cuda.LongTensor([1])
k_max = 4
K = 7
dim = 1
basis_kernel = get_basis_kernel(k_max, K, dim, 3)
a1, i1 = compute_spline_basis(input, kernel_size, is_open_spline, 7,
basis_kernel)
a2 = [
[0.1667, 0.6667, 0.1667, 0],
[0.0853, 0.6307, 0.2827, 0.0013],
[0.1667, 0.6667, 0.1667, 0],
[0.1667, 0.6667, 0.1667, 0],
[0.1667, 0.6667, 0.1667, 0],
[0.0013, 0.2827, 0.6307, 0.0853],
[0.1667, 0.6667, 0.1667, 0],
]
i2 = [[0, 1, 2, 3], [0, 1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5],
[3, 4, 5, 6], [3, 4, 5, 6], [4, 5, 6, 0]]
assert_almost_equal(a1.cpu().numpy(), a2, 4)
assert_equal(i1.cpu().numpy(), i2)
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_closed_spline(self):
input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1])
kernel_size = torch.cuda.LongTensor([4])
is_open_spline = torch.cuda.LongTensor([0])
k_max = 4
K = 4
dim = 1
basis_kernel = get_basis_kernel(k_max, K, dim, 3)
a1, i1 = compute_spline_basis(input, kernel_size, is_open_spline, 4,
basis_kernel)
a2 = [
[0.1667, 0.6667, 0.1667, 0],
[0.0853, 0.6307, 0.2827, 0.0013],
[0.1667, 0.6667, 0.1667, 0],
[0.1667, 0.6667, 0.1667, 0],
[0.1667, 0.6667, 0.1667, 0],
[0.0013, 0.2827, 0.6307, 0.0853],
[0.1667, 0.6667, 0.1667, 0],
]
i2 = [[0, 1, 2, 3], [0, 1, 2, 3], [1, 2, 3, 0], [2, 3, 0, 1],
[3, 0, 1, 2], [3, 0, 1, 2], [0, 1, 2, 3]]
assert_almost_equal(a1.cpu().numpy(), a2, 4)
assert_equal(i1.cpu().numpy(), i2)
'''
'''
import unittest
import torch
from numpy.testing import assert_equal, assert_almost_equal
if torch.cuda.is_available():
from .compute_spline_basis import compute_spline_basis
from .compute_spline_basis import get_basis_kernel
class SplineLinearGPUTest(unittest.TestCase):
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_open_spline(self):
input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1])
kernel_size = torch.cuda.LongTensor([5])
is_open_spline = torch.cuda.LongTensor([1])
k_max = 2
K = 5
dim = 1
basis_kernel = get_basis_kernel(k_max, K, dim, 1)
a1, i1 = compute_spline_basis(input, kernel_size, is_open_spline, 5,
basis_kernel)
a2 = [[0, 1], [0.2, 0.8], [0, 1], [0, 1], [0, 1], [0.8, 0.2], [0, 1]]
i2 = [[1, 0], [1, 0], [2, 1], [3, 2], [4, 3], [4, 3], [0, 4]]
assert_almost_equal(a1.cpu().numpy(), a2, 2)
assert_equal(i1.cpu().numpy(), i2)
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_closed_spline(self):
input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1])
kernel_size = torch.cuda.LongTensor([4])
is_open_spline = torch.cuda.LongTensor([0])
k_max = 2
K = 4
dim = 1
basis_kernel = get_basis_kernel(k_max, K, dim, 1)
a1, i1 = compute_spline_basis(input, kernel_size, is_open_spline, 4,
basis_kernel)
a2 = [[0, 1], [0.2, 0.8], [0, 1], [0, 1], [0, 1], [0.8, 0.2], [0, 1]]
i2 = [[1, 0], [1, 0], [2, 1], [3, 2], [0, 3], [0, 3], [1, 0]]
assert_almost_equal(a1.cpu().numpy(), a2, 2)
assert_equal(i1.cpu().numpy(), i2)
'''
'''
import unittest
import torch
from numpy.testing import assert_equal, assert_almost_equal
if torch.cuda.is_available():
from .compute_spline_basis import compute_spline_basis
from .compute_spline_basis import get_basis_kernel
class SplineQuadraticGPUTest(unittest.TestCase):
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_open_spline(self):
input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1])
kernel_size = torch.cuda.LongTensor([6])
is_open_spline = torch.cuda.LongTensor([1])
k_max = 3
K = 6
dim = 1
basis_kernel = get_basis_kernel(k_max, K, dim, 2)
a1, i1 = compute_spline_basis(input, kernel_size, is_open_spline, 6,
basis_kernel)
a2 = [[0.5, 0.5, 0], [0.32, 0.66, 0.02], [0.5, 0.5, 0], [0.5, 0.5, 0],
[0.5, 0.5, 0], [0.02, 0.66, 0.32], [0.5, 0.5, 0]]
i2 = [[0, 1, 2], [0, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, 5], [3, 4, 5],
[4, 5, 0]]
assert_almost_equal(a1.cpu().numpy(), a2, 2)
assert_equal(i1.cpu().numpy(), i2)
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_closed_spline(self):
input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1])
kernel_size = torch.cuda.LongTensor([4])
is_open_spline = torch.cuda.LongTensor([0])
k_max = 3
K = 4
dim = 1
basis_kernel = get_basis_kernel(k_max, K, dim, 2)
a1, i1 = compute_spline_basis(input, kernel_size, is_open_spline, 4,
basis_kernel)
a2 = [[0.5, 0.5, 0], [0.32, 0.66, 0.02], [0.5, 0.5, 0], [0.5, 0.5, 0],
[0.5, 0.5, 0], [0.02, 0.66, 0.32], [0.5, 0.5, 0]]
i2 = [[0, 1, 2], [0, 1, 2], [1, 2, 3], [2, 3, 0], [3, 0, 1], [3, 0, 1],
[0, 1, 2]]
assert_almost_equal(a1.cpu().numpy(), a2, 2)
assert_equal(i1.cpu().numpy(), i2)
'''
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment