Commit 952a4cad authored by rusty1s's avatar rusty1s
Browse files

removed old version

parent b189dba7
'''
import unittest
import torch
from torch.autograd import Variable, gradcheck
from numpy.testing import assert_equal
from .spline import spline
if torch.cuda.is_available():
from .edgewise_spline_weighting_gpu import EdgewiseSplineWeightingGPU
class EdgewiseSplineWeightingGPUTest(unittest.TestCase):
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_forward(self):
input = [[0.25, 0.125], [0.25, 0.375], [0.75, 0.625], [0.75, 0.875]]
input = torch.cuda.FloatTensor(input)
kernel_size = torch.cuda.LongTensor([3, 4])
is_open_spline = torch.cuda.LongTensor([1, 0])
amount, index = spline(input, kernel_size, is_open_spline, 12, 1)
input = torch.cuda.FloatTensor([[1, 2], [3, 4], [5, 6], [7, 8]])
weight = torch.arange(0.5, 0.5 * 25, step=0.5).view(12, 2, 1).cuda()
input, weight = Variable(input), Variable(weight)
op = EdgewiseSplineWeightingGPU(amount, index)
out = op(input, weight)
expected_out = [
[0.25 * (1 * (0.5 + 1.5 + 4.5 + 5.5) + 2 * (1 + 2 + 5 + 6))],
[0.25 * (3 * (1.5 + 2.5 + 5.5 + 6.5) + 4 * (2 + 3 + 6 + 7))],
[0.25 * (5 * (6.5 + 7.5 + 10.5 + 11.5) + 6 * (7 + 8 + 11 + 12))],
[0.25 * (7 * (4.5 + 7.5 + 8.5 + 11.5) + 8 * (5 + 8 + 9 + 12))],
]
assert_equal(out.cpu().data.numpy(), expected_out)
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_backward(self):
input = [[0.25, 0.125], [0.25, 0.375], [0.75, 0.625], [0.75, 0.875]]
input = torch.cuda.DoubleTensor(input)
kernel_size = torch.cuda.LongTensor([3, 4])
is_open_spline = torch.cuda.LongTensor([1, 0])
amount, index = spline(input, kernel_size, is_open_spline, 12, 1)
input = torch.randn(4, 2).double().cuda()
weight = torch.randn(12, 2, 1).double().cuda()
input = Variable(input, requires_grad=True)
weight = Variable(weight, requires_grad=True)
op = EdgewiseSplineWeightingGPU(amount, index)
test = gradcheck(op, (input, weight), eps=1e-6, atol=1e-4)
self.assertTrue(test)
'''
import torch
from torch.autograd import Variable
from .spline_conv_gpu import SplineConvGPU
def spline_conv(
adj, # Pytorch Tensor (!bp_to_adj) or Pytorch Variable (bp_to_adj)
input, # Pytorch Variable
weight, # Pytorch Variable
kernel_size, # Rest tensors or python variables
is_open_spline,
K,
weighting_kernel,
weighting_backward_kernel,
basis_kernel,
basis_backward_kernel=None,
degree=1,
bias=None):
if input.dim() == 1:
input = input.unsqueeze(1)
values = adj['values']
row, col = adj['indices']
# Get features for every end vertex with shape [|E| x M_in].
output = input[col]
bp_to_adj = False if torch.is_tensor(values) else True
# Convert to [|E| x M_in] feature matrix and calculate [|E| x M_out].
if output.is_cuda:
if bp_to_adj:
output = SplineConvGPU(kernel_size, is_open_spline, K, degree,
basis_kernel, basis_backward_kernel,
weighting_kernel, weighting_backward_kernel,
bp_to_adj)(output, weight[:-1], values)
else:
output = SplineConvGPU(kernel_size, is_open_spline, K, degree,
basis_kernel, basis_backward_kernel,
weighting_kernel, weighting_backward_kernel,
bp_to_adj, values)(output, weight[:-1])
else:
# CPU Implementation not available
raise NotImplementedError()
# Convolution via `scatter_add`. Converts [|E| x M_out] feature matrix to
# [n x M_out] feature matrix.
zero = output.data.new(adj['size'][1], output.size(1)).fill_(0.0)
zero = Variable(zero) if not torch.is_tensor(output) else zero
r = row.view(-1, 1).expand(row.size(0), output.size(1))
output = zero.scatter_add_(0, Variable(r), output)
# Weighten root node features by multiplying with root weight.
output += torch.mm(input, weight[-1])
# Normalize output by degree.
ones = output.data.new(values.size(0)).fill_(1)
zero = output.data.new(output.size(0)).fill_(0)
degree = zero.scatter_add_(0, row, ones)
degree = torch.clamp(degree, min=1)
output = output / Variable(degree.view(-1, 1))
if bias is not None:
output += bias
return output
from __future__ import division
import unittest
import torch
from torch.autograd import Variable, gradcheck
from .spline_conv_gpu import (get_basis_kernel, get_basis_backward_kernel,
get_weighting_forward_kernel,
get_weighting_backward_kernel, SplineConvGPU)
class SplineConvTest(unittest.TestCase):
'''
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_forward_gpu(self):
edges = torch.LongTensor([[0, 0, 0, 0], [1, 2, 3, 4]])
values = [[0.25, 0.125], [0.25, 0.375], [0.75, 0.625], [0.75, 0.875]]
values = torch.FloatTensor(values).double()
adj = {'indices': edges.cuda(), 'values': Variable(values.cuda()),
'size': torch.Size([5, 5, 2])}
kernel_size = torch.cuda.LongTensor([3, 4])
is_open_spline = torch.cuda.LongTensor([1, 0])
input = torch.DoubleTensor([[9, 10], [1, 2], [3, 4], [5, 6], [7, 8]])
weight = torch.arange(0.5, 0.5 * 27, step=0.5).view(13, 2, 1).double()
input, weight = input.cuda(), weight.cuda()
input, weight = Variable(input), Variable(weight)
row, col = adj['indices']
output = input[col]
K = 12
in_features = 2
out_features = 1
degree = 1
dim = 2
k_max = (degree+1)**dim
fw_k = get_weighting_forward_kernel(in_features, out_features, k_max)
bw_k = get_weighting_backward_kernel(in_features, out_features, k_max,
K, True)
basis_fw_k = get_basis_kernel(k_max, K, dim, degree)
basis_bw_k = get_basis_backward_kernel(k_max, K, dim, degree)
#output = spline_conv(
# adj, input, weight, kernel_size, is_open_spline, K, fw_k, bw_k,
# basis_fw_k, basis_bw_k,bp_to_adj=True)
values = adj['values']
output = SplineConvGPU(kernel_size, is_open_spline, K, degree,
basis_fw_k, basis_bw_k, fw_k, bw_k, bp_to_adj=True)\
(output, weight, values)
zero = output.data.new(adj['size'][1], output.size(1)).fill_(0.0)
zero = Variable(zero) if not torch.is_tensor(output) else zero
r = row.view(-1, 1).expand(row.size(0), output.size(1))
output = zero.scatter_add_(0, Variable(r), output)
# Weighten root node features by multiplying with root weight.
output += torch.mm(input, weight[-1])
# Normalize output by degree.
ones = values.data.new(values.size(0)).fill_(1)
zero = values.data.new(output.size(0)).fill_(0)
degree = zero.scatter_add_(0, row, ones)
degree = torch.clamp(degree, min=1)
output = output / Variable(degree.view(-1, 1))
expected_output = [
[(12.5 * 9 + 13 * 10 + 266) / 4],
[12.5 * 1 + 13 * 2],
[12.5 * 3 + 13 * 4],
[12.5 * 5 + 13 * 6],
[12.5 * 7 + 13 * 8],
]
assert_almost_equal(output.cpu().data.numpy(), expected_output, 1)
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_backward(self):
kernel_size = torch.cuda.LongTensor([3, 4])
is_open_spline = torch.cuda.LongTensor([1, 1])
input = torch.randn(4, 2).double().cuda()
weight = torch.randn(12, 2, 1).double().cuda()
values = torch.FloatTensor(4, 2).uniform_(0, 1).double().cuda()
print(values)
input = Variable(input, requires_grad=True)
weight = Variable(weight, requires_grad=True)
values = Variable(values, requires_grad=True)
K = 12
in_features = 2
out_features = 1
degree = 1
dim = 2
k_max = (degree + 1) ** dim
fw_k = get_weighting_forward_kernel(in_features, out_features, k_max)
bw_k = get_weighting_backward_kernel(in_features, out_features, k_max,
K, bp_to_adj=True)
basis_fw_k = get_basis_kernel(k_max, K, dim, degree)
basis_bw_k = get_basis_backward_kernel(k_max, K, dim, degree)
op = SplineConvGPU(kernel_size, is_open_spline, K, degree,
basis_fw_k, basis_bw_k, fw_k, bw_k, bp_to_adj=True)
print(op(input, weight, values))
#test = gradcheck(op, (input, weight, values), eps=1e-6, atol=1e-4)
#self.assertTrue(test)
'''
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_backward(self):
input = torch.randn(4, 2).double().cuda()
weight = torch.randn(9, 2, 1).double().cuda()
values = torch.FloatTensor(4, 2).uniform_(0, 1).double().cuda()
print(values)
input = Variable(input, requires_grad=True)
weight = Variable(weight, requires_grad=True)
values = Variable(values, requires_grad=True)
K = 9
in_features = 2
out_features = 1
degree = 1
dim = 2
k_max = (degree + 1)**dim
kernel_size = torch.cuda.LongTensor([3, 3])
is_open_spline = torch.cuda.LongTensor([1, 0])
fw_k = get_weighting_forward_kernel(
in_features, out_features, k_max, dtype='double')
bw_k = get_weighting_backward_kernel(
in_features, out_features, k_max, K, True, dtype='double')
basis_fw_k = get_basis_kernel(k_max, K, dim, degree, dtype='double')
basis_bw_k = get_basis_backward_kernel(
k_max, K, dim, degree, dtype='double')
op = SplineConvGPU(
kernel_size,
is_open_spline,
K,
degree,
basis_fw_k,
basis_bw_k,
fw_k,
bw_k,
bp_to_adj=True)
test = gradcheck(op, (input, weight, values), eps=1e-6, atol=1e-4)
self.assertTrue(test)
import torch
from torch.autograd import Function
from ....utils.cuda import (cuda_num_threads, Stream, load_kernel, kernel_loop,
get_blocks)
_edgewise_spline_weighting_forward_kernel = kernel_loop + '''
extern "C"
__global__ void edgewise_spline_weighting_forward_kernel(
const ${Dtype}* input, const ${Dtype}* weight, ${Dtype}* output,
const ${Dtype}* amount, const long* index, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${M_out};
const int m_out_idx = idx % ${M_out};
${Dtype} result = 0.0;
${Dtype} w;
${Dtype} f;
int k;
${Dtype} b;
long c;
long w_idx;
for (int k_idx = 0; k_idx < ${k_max}; k_idx++) {
k = e_idx * ${k_max} + k_idx;
b = amount[k];
c = index[k];
for (int m_in_idx = 0; m_in_idx < ${M_in}; m_in_idx++) {
w_idx = c * ${M_out} * ${M_in} +
m_in_idx * ${M_out} +
m_out_idx;
w = weight[w_idx];
f = input[e_idx * ${M_in} + m_in_idx];
result += b * w * f;
}
}
output[idx] = result;
}
}
'''
_edgewise_spline_weighting_backward_kernel = kernel_loop + '''
extern "C"
__global__ void edgewise_spline_weighting_backward_kernel(
const ${Dtype}* grad_output, ${Dtype}* grad_input, ${Dtype}* grad_weight,
const ${Dtype}* input, const ${Dtype}* weight, const ${Dtype}* amount,
const long* index, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${M_out};
const int m_out_idx = idx % ${M_out};
${Dtype} w;
${Dtype} g;
${Dtype} f;
${Dtype} w_grad;
int k;
${Dtype} b;
long c;
long w_idx;
for (int k_idx = 0; k_idx < ${k_max}; k_idx++) {
k = e_idx * ${k_max} + k_idx;
b = amount[k];
c = index[k];
for (int m_in_idx = 0; m_in_idx < ${M_in}; m_in_idx++) {
w_idx = c * ${M_out} * ${M_in} +
m_in_idx * ${M_out} +
m_out_idx;
w = weight[w_idx];
// Calculate input gradient.
g = grad_output[e_idx * ${M_out} + m_out_idx];
atomicAdd(&(grad_input[e_idx * ${M_in} + m_in_idx]), b * w * g);
// This is inefficient: `reduce_sum` shouldn't be done like this.
// Looping over `M_out` would be better to avoid the `atomicAdd`.
// Calculate weight gradient.
f = input[e_idx * ${M_in} + m_in_idx];
w_grad = f * b * g;
atomicAdd(&(grad_weight[w_idx]), w_grad);
// Not so efficient either, but not avoidable.
}
}
}
}
'''
_edgewise_spline_weighting_backward_kernel_bp2adj = kernel_loop + '''
extern "C"
__global__ void edgewise_spline_weighting_backward_kernel(
const ${Dtype}* grad_output, ${Dtype}* grad_input, ${Dtype}* grad_weight,
${Dtype}* grad_amount, const ${Dtype}* input, const ${Dtype}* weight,
const ${Dtype}* amount, const long* index, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${M_out};
const int m_out_idx = idx % ${M_out};
${Dtype} w;
${Dtype} g;
${Dtype} f;
${Dtype} w_grad;
int k;
${Dtype} b;
long c;
long w_idx;
for (int k_idx = 0; k_idx < ${k_max}; k_idx++) {
k = e_idx * ${k_max} + k_idx;
b = amount[k];
c = index[k];
${Dtype} adj_g = 0.0;
for (int m_in_idx = 0; m_in_idx < ${M_in}; m_in_idx++) {
w_idx = c * ${M_out} * ${M_in} +
m_in_idx * ${M_out} +
m_out_idx;
w = weight[w_idx];
// Calculate input gradient.
g = grad_output[e_idx * ${M_out} + m_out_idx];
atomicAdd(&(grad_input[e_idx * ${M_in} + m_in_idx]), b * w * g);
// This is inefficient: `reduce_sum` shouldn't be done like this.
// Looping over `M_out` would be better to avoid the `atomicAdd`.
// Calculate weight gradient.
f = input[e_idx * ${M_in} + m_in_idx];
w_grad = f * b * g;
atomicAdd(&(grad_weight[w_idx]), w_grad);
// Not so efficient either, but not avoidable.
// Calculate B-spline basis tensor product gradient
adj_g += g * f * w;
}
atomicAdd(&(grad_amount[e_idx*${k_max} + k_idx]), adj_g);
}
}
}
'''
def get_weighting_forward_kernel(M_in, M_out, k_max, dtype='float'):
cuda_tensor = torch.FloatTensor([1]).cuda()
kernel = _edgewise_spline_weighting_forward_kernel
with torch.cuda.device_of(cuda_tensor):
f_fw = load_kernel(
'edgewise_spline_weighting_forward_kernel',
kernel,
Dtype=dtype,
M_in=M_in,
M_out=M_out,
k_max=k_max)
return f_fw
def get_weighting_backward_kernel(M_in,
M_out,
k_max,
K,
bp_to_adj=False,
dtype='float'):
cuda_tensor = torch.FloatTensor([1]).cuda()
if bp_to_adj:
kernel = _edgewise_spline_weighting_backward_kernel_bp2adj
else:
kernel = _edgewise_spline_weighting_backward_kernel
with torch.cuda.device_of(cuda_tensor):
f_bw = load_kernel(
'edgewise_spline_weighting_backward_kernel',
kernel,
Dtype=dtype,
M_in=M_in,
M_out=M_out,
k_max=k_max,
K=K)
return f_bw
_spline_kernel_linear = kernel_loop + '''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int bot;
int top;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K/=kernel_size[d_idx];
k_idx_mod = k_idx % 2;
k_idx >>= 1;
value = input[e_idx * ${dim} + d_idx];
value *= kernel_size[d_idx] - is_open_spline[d_idx];
frac = value - floor(value);
a *= (1 - k_idx_mod) * (1 - frac) + k_idx_mod * frac;
bot = int(floor(value));
top = (bot + 1) % kernel_size[d_idx];
bot %= kernel_size[d_idx];
i += ((1 - k_idx_mod) * bot + k_idx_mod * top) * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
_spline_kernel_quadratic = kernel_loop + '''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int pos;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K /= kernel_size[d_idx];
k_idx_mod = k_idx % 3;
k_idx /= 3;
value = input[e_idx * ${dim} + d_idx] *
(kernel_size[d_idx] - (2 * is_open_spline[d_idx]));
frac = value - floor(value);
if (k_idx_mod == 0) a *= 0.5 * (1- frac) * (1-frac);
else if (k_idx_mod == 1) a *= -frac * frac + frac + 0.5;
else a *= 0.5 * frac * frac;
pos = int(floor(value)) + k_idx_mod;
pos %= kernel_size[d_idx];
i += pos * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
_spline_kernel_cubic = kernel_loop + '''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int pos;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K /= kernel_size[d_idx];
k_idx_mod = k_idx % 4;
k_idx /= 4;
value = input[e_idx * ${dim} + d_idx] *
(kernel_size[d_idx] - (3 * is_open_spline[d_idx]));
frac = value - floor(value);
if (k_idx_mod == 0) a *= (1 - frac) * (1 - frac) * (1 - frac) / 6.0;
else if (k_idx_mod == 1)
a *= (3 * frac * frac * frac - 6 * frac * frac + 4) / 6.0;
else if (k_idx_mod == 2)
a *= (-3 * frac * frac * frac + 3 * frac * frac + 3 * frac + 1) / 6.0;
else a *= frac * frac * frac / 6.0;
pos = int(floor(value)) + k_idx_mod;
pos %= kernel_size[d_idx];
i += pos * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
# This is the efficient version which uses amount but may divide by 0 and
# may be numerically unstable.
# No solution for this yet, use the less efficient version 2.
_spline_kernel_linear_backward = kernel_loop + '''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, const ${Dtype}* grad_amount, ${Dtype}* amount,
${Dtype}* grad_adj, const long* kernel_size, const long* is_open_spline,
int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${dim};
int d_idx = idx % ${dim};
int k_idx_mod;
${Dtype} value;
${Dtype} frac;
${Dtype} grad_out = 0.0;
int quotient = (int)pow(2.0,(double)d_idx);
value = input[e_idx * ${dim} + d_idx];
value *= kernel_size[d_idx] - is_open_spline[d_idx];
frac = value - floor(value);
for (int k_idx = 0; k_idx < ${k_max}; k_idx++) {
k_idx_mod = (k_idx/quotient) % 2;
int a_idx = e_idx*${k_max} + k_idx;
${Dtype} residual = - (1 - k_idx_mod) * (1 - frac) + k_idx_mod * frac;
grad_out += grad_amount[a_idx]*amount[a_idx]/residual;
}
grad_adj[idx] = grad_out*(kernel_size[d_idx] - is_open_spline[d_idx]);
}
}
/*
${Dtype} a = -(1 - k_idx_mod) + k_idx_mod;
for (int d_it = 0; d_it < ${dim}; d_it++) {
if(d_it!=d_idx)
{
value = input[e_idx * ${dim} + d_it];
value *= kernel_size[d_it] - is_open_spline[d_it];
frac = value - floor(value);
a *= (1 - k_idx_mod) * (1 - frac) + k_idx_mod * frac;
}
}
grad_out += a*grad_amount[a_idx];
*/
'''
# This is the inefficient version with gradient computation without amount.
_spline_kernel_linear_backward2 = kernel_loop + '''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, const ${Dtype}* grad_amount, ${Dtype}* amount,
${Dtype}* grad_adj, const long* kernel_size, const long* is_open_spline,
int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${dim};
int d_idx = idx % ${dim};
int k_idx_mod;
${Dtype} value;
${Dtype} frac;
${Dtype} grad_out = 0.0;
int quotient = (int)pow(2.0,(double)d_idx);
for (int k_idx = 0; k_idx < ${k_max}; k_idx++) {
k_idx_mod = (k_idx/quotient) % 2;
int a_idx = e_idx*${k_max} + k_idx;
${Dtype} a = -(1 - k_idx_mod) + k_idx_mod;
for (int d_it = 0; d_it < ${dim}; d_it++) {
if(d_it!=d_idx)
{
int quotient = (int)pow(2.0,(double)d_it);
k_idx_mod = (k_idx/quotient) % 2;
value = input[e_idx * ${dim} + d_it];
value *= kernel_size[d_it] - is_open_spline[d_it];
frac = value - floor(value);
a *= (1 - k_idx_mod) * (1 - frac) + k_idx_mod * frac;
}
}
grad_out += a*grad_amount[a_idx];
}
grad_adj[idx] = grad_out*(kernel_size[d_idx] - is_open_spline[d_idx]);
}
}
'''
def get_basis_kernel(k_max, K, dim, degree, dtype='float'):
if degree == 3:
_spline_kernel = _spline_kernel_cubic
elif degree == 2:
_spline_kernel = _spline_kernel_quadratic
else:
_spline_kernel = _spline_kernel_linear
cuda_tensor = torch.FloatTensor([1]).cuda()
with torch.cuda.device_of(cuda_tensor):
f = load_kernel(
'spline_kernel',
_spline_kernel,
Dtype=dtype,
k_max=k_max,
dim=dim,
K=K)
return f
def get_basis_backward_kernel(k_max, K, dim, degree, dtype='float'):
if degree == 3:
_spline_kernel = _spline_kernel_linear_backward2
elif degree == 2:
_spline_kernel = _spline_kernel_linear_backward2
else:
_spline_kernel = _spline_kernel_linear_backward2
cuda_tensor = torch.FloatTensor([1]).cuda()
with torch.cuda.device_of(cuda_tensor):
f = load_kernel(
'spline_kernel',
_spline_kernel,
Dtype=dtype,
k_max=k_max,
dim=dim,
K=K)
return f
class SplineConvGPU(Function):
def __init__(self,
kernel_size,
is_open_spline,
K,
degree,
basis_kernel,
basis_backward_kernel,
weighting_kernel,
weighting_backward_kernel,
bp_to_adj=False,
adj_values=None):
super(SplineConvGPU, self).__init__()
self.degree = degree
self.f_weighting_fw = weighting_kernel
self.f_weighting_bw = weighting_backward_kernel
self.kernel_size = kernel_size
self.is_open_spline = is_open_spline
self.f_basis_fw = basis_kernel
self.f_basis_bw = basis_backward_kernel
self.bp_to_adj = bp_to_adj
self.adj_values = adj_values
def forward(self, input, weight, adj_values=None):
assert input.is_cuda and weight.is_cuda
self.K, self.M_in, self.M_out = weight.size()
# If bp_to_u is false
if adj_values is None:
adj_values = self.adj_values
# Compute B-spline basis tensor products
adj_values = adj_values.unsqueeze(1) if len(adj_values.size()) < 2 \
else adj_values
if self.bp_to_adj:
self.save_for_backward(input, weight, adj_values)
# adj_values = torch.clamp(adj_values,min=0.0,max=1.0)
else:
self.save_for_backward(input, weight)
num_edges, dim = adj_values.size()
k_max = (self.degree + 1)**dim
amount = adj_values.new(num_edges, k_max)
index = adj_values.new(num_edges, k_max).long()
num_threads = amount.numel()
with torch.cuda.device_of(input):
self.f_basis_fw(
block=(cuda_num_threads, 1, 1),
grid=(get_blocks(num_threads), 1, 1),
args=[
adj_values.data_ptr(),
amount.data_ptr(),
index.data_ptr(),
self.kernel_size.data_ptr(),
self.is_open_spline.data_ptr(), num_threads
],
stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
# Weight features
output = input.new(input.size(0), self.M_out)
num_threads = output.numel()
with torch.cuda.device_of(input):
self.f_weighting_fw(
block=(cuda_num_threads, 1, 1),
grid=(get_blocks(num_threads), 1, 1),
args=[
input.data_ptr(),
weight.data_ptr(),
output.data_ptr(),
amount.data_ptr(),
index.data_ptr(), num_threads
],
stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
self.amount = amount
self.index = index
return output
def backward(self, grad_output):
grad_input = grad_output.new(grad_output.size(0), self.M_in).fill_(0)
grad_weight = grad_output.new(self.K, self.M_in, self.M_out).fill_(0)
num_threads = grad_output.numel()
if self.bp_to_adj:
if self.degree == 2 or self.degree == 3:
print('Backward to u for degree>1 not implemented!')
raise NotImplementedError
input, weight, adj_values = self.saved_tensors
# adj_values = torch.clamp(adj_values,min=0.0,max=1.0)
amount = self.amount
index = self.index
grad_amount = grad_output.new(amount.size(0),
amount.size(1)).fill_(0)
with torch.cuda.device_of(grad_output):
self.f_weighting_bw(
block=(cuda_num_threads, 1, 1),
grid=(get_blocks(num_threads), 1, 1),
args=[
grad_output.data_ptr(),
grad_input.data_ptr(),
grad_weight.data_ptr(),
grad_amount.data_ptr(),
input.data_ptr(),
weight.data_ptr(),
amount.data_ptr(),
index.data_ptr(), num_threads
],
stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
grad_adj = grad_amount.new(
grad_amount.size(0), self.kernel_size.size(0)).fill_(0)
num_threads = grad_adj.numel()
with torch.cuda.device_of(grad_amount):
self.f_basis_bw(
block=(cuda_num_threads, 1, 1),
grid=(get_blocks(num_threads), 1, 1),
args=[
adj_values.data_ptr(),
grad_amount.data_ptr(),
amount.data_ptr(),
grad_adj.data_ptr(),
self.kernel_size.data_ptr(),
self.is_open_spline.data_ptr(), num_threads
],
stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
return grad_input, grad_weight, grad_adj
else:
input, weight = self.saved_tensors
amount = self.amount
index = self.index
grad_amount = grad_output.new(amount.size(0),
amount.size(1)).fill_(0)
with torch.cuda.device_of(grad_output):
self.f_weighting_bw(
block=(cuda_num_threads, 1, 1),
grid=(get_blocks(num_threads), 1, 1),
args=[
grad_output.data_ptr(),
grad_input.data_ptr(),
grad_weight.data_ptr(),
grad_amount.data_ptr(),
input.data_ptr(),
weight.data_ptr(),
amount.data_ptr(),
index.data_ptr(), num_threads
],
stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
return grad_input, grad_weight, None
'''
import unittest
import torch
from numpy.testing import assert_equal, assert_almost_equal
if torch.cuda.is_available():
from .compute_spline_basis import compute_spline_basis
from .compute_spline_basis import get_basis_kernel
class SplineQuadraticGPUTest(unittest.TestCase):
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_open_spline(self):
input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1])
kernel_size = torch.cuda.LongTensor([7])
is_open_spline = torch.cuda.LongTensor([1])
k_max = 4
K = 7
dim = 1
basis_kernel = get_basis_kernel(k_max, K, dim, 3)
a1, i1 = compute_spline_basis(input, kernel_size, is_open_spline, 7,
basis_kernel)
a2 = [
[0.1667, 0.6667, 0.1667, 0],
[0.0853, 0.6307, 0.2827, 0.0013],
[0.1667, 0.6667, 0.1667, 0],
[0.1667, 0.6667, 0.1667, 0],
[0.1667, 0.6667, 0.1667, 0],
[0.0013, 0.2827, 0.6307, 0.0853],
[0.1667, 0.6667, 0.1667, 0],
]
i2 = [[0, 1, 2, 3], [0, 1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5],
[3, 4, 5, 6], [3, 4, 5, 6], [4, 5, 6, 0]]
assert_almost_equal(a1.cpu().numpy(), a2, 4)
assert_equal(i1.cpu().numpy(), i2)
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_closed_spline(self):
input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1])
kernel_size = torch.cuda.LongTensor([4])
is_open_spline = torch.cuda.LongTensor([0])
k_max = 4
K = 4
dim = 1
basis_kernel = get_basis_kernel(k_max, K, dim, 3)
a1, i1 = compute_spline_basis(input, kernel_size, is_open_spline, 4,
basis_kernel)
a2 = [
[0.1667, 0.6667, 0.1667, 0],
[0.0853, 0.6307, 0.2827, 0.0013],
[0.1667, 0.6667, 0.1667, 0],
[0.1667, 0.6667, 0.1667, 0],
[0.1667, 0.6667, 0.1667, 0],
[0.0013, 0.2827, 0.6307, 0.0853],
[0.1667, 0.6667, 0.1667, 0],
]
i2 = [[0, 1, 2, 3], [0, 1, 2, 3], [1, 2, 3, 0], [2, 3, 0, 1],
[3, 0, 1, 2], [3, 0, 1, 2], [0, 1, 2, 3]]
assert_almost_equal(a1.cpu().numpy(), a2, 4)
assert_equal(i1.cpu().numpy(), i2)
'''
'''
import unittest
import torch
from numpy.testing import assert_equal, assert_almost_equal
if torch.cuda.is_available():
from .compute_spline_basis import compute_spline_basis
from .compute_spline_basis import get_basis_kernel
class SplineLinearGPUTest(unittest.TestCase):
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_open_spline(self):
input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1])
kernel_size = torch.cuda.LongTensor([5])
is_open_spline = torch.cuda.LongTensor([1])
k_max = 2
K = 5
dim = 1
basis_kernel = get_basis_kernel(k_max, K, dim, 1)
a1, i1 = compute_spline_basis(input, kernel_size, is_open_spline, 5,
basis_kernel)
a2 = [[0, 1], [0.2, 0.8], [0, 1], [0, 1], [0, 1], [0.8, 0.2], [0, 1]]
i2 = [[1, 0], [1, 0], [2, 1], [3, 2], [4, 3], [4, 3], [0, 4]]
assert_almost_equal(a1.cpu().numpy(), a2, 2)
assert_equal(i1.cpu().numpy(), i2)
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_closed_spline(self):
input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1])
kernel_size = torch.cuda.LongTensor([4])
is_open_spline = torch.cuda.LongTensor([0])
k_max = 2
K = 4
dim = 1
basis_kernel = get_basis_kernel(k_max, K, dim, 1)
a1, i1 = compute_spline_basis(input, kernel_size, is_open_spline, 4,
basis_kernel)
a2 = [[0, 1], [0.2, 0.8], [0, 1], [0, 1], [0, 1], [0.8, 0.2], [0, 1]]
i2 = [[1, 0], [1, 0], [2, 1], [3, 2], [0, 3], [0, 3], [1, 0]]
assert_almost_equal(a1.cpu().numpy(), a2, 2)
assert_equal(i1.cpu().numpy(), i2)
'''
'''
import unittest
import torch
from numpy.testing import assert_equal, assert_almost_equal
if torch.cuda.is_available():
from .compute_spline_basis import compute_spline_basis
from .compute_spline_basis import get_basis_kernel
class SplineQuadraticGPUTest(unittest.TestCase):
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_open_spline(self):
input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1])
kernel_size = torch.cuda.LongTensor([6])
is_open_spline = torch.cuda.LongTensor([1])
k_max = 3
K = 6
dim = 1
basis_kernel = get_basis_kernel(k_max, K, dim, 2)
a1, i1 = compute_spline_basis(input, kernel_size, is_open_spline, 6,
basis_kernel)
a2 = [[0.5, 0.5, 0], [0.32, 0.66, 0.02], [0.5, 0.5, 0], [0.5, 0.5, 0],
[0.5, 0.5, 0], [0.02, 0.66, 0.32], [0.5, 0.5, 0]]
i2 = [[0, 1, 2], [0, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, 5], [3, 4, 5],
[4, 5, 0]]
assert_almost_equal(a1.cpu().numpy(), a2, 2)
assert_equal(i1.cpu().numpy(), i2)
@unittest.skipIf(not torch.cuda.is_available(), 'no GPU')
def test_closed_spline(self):
input = torch.cuda.FloatTensor([0, 0.05, 0.25, 0.5, 0.75, 0.95, 1])
kernel_size = torch.cuda.LongTensor([4])
is_open_spline = torch.cuda.LongTensor([0])
k_max = 3
K = 4
dim = 1
basis_kernel = get_basis_kernel(k_max, K, dim, 2)
a1, i1 = compute_spline_basis(input, kernel_size, is_open_spline, 4,
basis_kernel)
a2 = [[0.5, 0.5, 0], [0.32, 0.66, 0.02], [0.5, 0.5, 0], [0.5, 0.5, 0],
[0.5, 0.5, 0], [0.02, 0.66, 0.32], [0.5, 0.5, 0]]
i2 = [[0, 1, 2], [0, 1, 2], [1, 2, 3], [2, 3, 0], [3, 0, 1], [3, 0, 1],
[0, 1, 2]]
assert_almost_equal(a1.cpu().numpy(), a2, 2)
assert_equal(i1.cpu().numpy(), i2)
'''
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment