Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
torch-spline-conv
Commits
e2e17501
Commit
e2e17501
authored
Mar 05, 2018
by
Jan Eric Lenssen
Browse files
first version bp_to_adj, not working atm
parent
8383b476
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
560 additions
and
774 deletions
+560
-774
compute_spline_basis.py
compute_spline_basis.py
+0
-192
edgewise_spline_weighting.py
edgewise_spline_weighting.py
+0
-13
edgewise_spline_weighting_bp2adj_gpu.py
edgewise_spline_weighting_bp2adj_gpu.py
+0
-309
edgewise_spline_weighting_gpu.py
edgewise_spline_weighting_gpu.py
+0
-184
spline.py
spline.py
+0
-12
spline_conv.py
spline_conv.py
+18
-17
spline_conv_bp2adj.py
spline_conv_bp2adj.py
+0
-47
spline_conv_gpu.py
spline_conv_gpu.py
+542
-0
No files found.
compute_spline_basis.py
deleted
100644 → 0
View file @
8383b476
import
torch
from
....utils.cuda
import
(
cuda_num_threads
,
Stream
,
Dtype
,
load_kernel
,
kernel_loop
,
get_blocks
)
_spline_kernel_linear
=
kernel_loop
+
'''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int bot;
int top;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K /= kernel_size[d_idx];
k_idx_mod = k_idx % 2;
k_idx >>= 1;
value = input[e_idx * ${dim} + d_idx];
value *= kernel_size[d_idx] - is_open_spline[d_idx];
frac = value - floor(value);
a *= (1 - k_idx_mod) * frac + k_idx_mod * (1 - frac);
bot = int(floor(value));
top = (bot + 1) % kernel_size[d_idx];
bot %= kernel_size[d_idx];
i += (k_idx_mod * bot + (1 - k_idx_mod) * top) * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
_spline_kernel_quadratic
=
kernel_loop
+
'''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int pos;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K /= kernel_size[d_idx];
k_idx_mod = k_idx % 3;
k_idx /= 3;
value = input[e_idx * ${dim} + d_idx] *
(kernel_size[d_idx] - (2 * is_open_spline[d_idx]));
frac = value - floor(value);
if (k_idx_mod == 0) a *= 0.5 * (1- frac) * (1-frac);
else if (k_idx_mod == 1) a *= -frac * frac + frac + 0.5;
else a *= 0.5 * frac * frac;
pos = int(floor(value)) + k_idx_mod;
pos %= kernel_size[d_idx];
i += pos * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
_spline_kernel_cubic
=
kernel_loop
+
'''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads}) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int pos;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K /= kernel_size[d_idx];
k_idx_mod = k_idx % 4;
k_idx /= 4;
value = input[e_idx * ${dim} + d_idx] *
(kernel_size[d_idx] - (3 * is_open_spline[d_idx]));
frac = value - floor(value);
if (k_idx_mod == 0) a *= (1 - frac) * (1 - frac) * (1 - frac) / 6.0;
else if (k_idx_mod == 1)
a *= (3 * frac * frac * frac - 6 * frac * frac + 4) / 6.0;
else if (k_idx_mod == 2)
a *= (-3 * frac * frac * frac + 3 * frac * frac + 3 * frac + 1) / 6.0;
else a *= frac * frac * frac / 6.0;
pos = int(floor(value)) + k_idx_mod;
pos %= kernel_size[d_idx];
i += pos * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
def
get_basis_kernel
(
k_max
,
K
,
dim
,
degree
):
if
degree
==
3
:
_spline_kernel
=
_spline_kernel_cubic
elif
degree
==
2
:
_spline_kernel
=
_spline_kernel_quadratic
else
:
_spline_kernel
=
_spline_kernel_linear
cuda_tensor
=
torch
.
FloatTensor
([
1
]).
cuda
()
with
torch
.
cuda
.
device_of
(
cuda_tensor
):
f
=
load_kernel
(
'spline_kernel'
,
_spline_kernel
,
Dtype
=
'float'
,
k_max
=
k_max
,
dim
=
dim
,
K
=
K
)
return
f
def
compute_spline_basis
(
input
,
kernel_size
,
is_open_spline
,
K
,
basis_kernel
):
assert
input
.
is_cuda
and
kernel_size
.
is_cuda
and
is_open_spline
.
is_cuda
input
=
input
.
unsqueeze
(
1
)
if
len
(
input
.
size
())
<
2
else
input
num_edges
,
dim
=
input
.
size
()
k_max
=
2
**
dim
amount
=
input
.
new
(
num_edges
,
k_max
)
index
=
input
.
new
(
num_edges
,
k_max
).
long
()
num_threads
=
amount
.
numel
()
with
torch
.
cuda
.
device_of
(
input
):
basis_kernel
(
block
=
(
cuda_num_threads
,
1
,
1
),
grid
=
(
get_blocks
(
num_threads
),
1
,
1
),
args
=
[
input
.
data_ptr
(),
amount
.
data_ptr
(),
index
.
data_ptr
(),
kernel_size
.
data_ptr
(),
is_open_spline
.
data_ptr
(),
num_threads
],
stream
=
Stream
(
ptr
=
torch
.
cuda
.
current_stream
().
cuda_stream
))
return
amount
,
index
edgewise_spline_weighting.py
deleted
100644 → 0
View file @
8383b476
import
torch
if
torch
.
cuda
.
is_available
():
from
.edgewise_spline_weighting_gpu
import
EdgewiseSplineWeightingGPU
def
edgewise_spline_weighting
(
input
,
weight
,
amount
,
index
,
k_fw
,
k_bw
):
if
input
.
is_cuda
:
K
,
M_in
,
M_out
=
weight
.
size
()
return
EdgewiseSplineWeightingGPU
(
amount
,
index
,
K
,
M_in
,
M_out
,
k_fw
,
k_bw
)(
input
,
weight
)
else
:
raise
NotImplementedError
edgewise_spline_weighting_bp2adj_gpu.py
deleted
100644 → 0
View file @
8383b476
import
torch
from
torch.autograd
import
Function
from
....utils.cuda
import
(
cuda_num_threads
,
Stream
,
Dtype
,
load_kernel
,
kernel_loop
,
get_blocks
)
_edgewise_spline_weighting_forward_kernel
=
kernel_loop
+
'''
extern "C"
__global__ void edgewise_spline_weighting_forward_kernel(
const ${Dtype}* input, const ${Dtype}* weight, ${Dtype}* output,
const ${Dtype}* amount, const long* index) {
CUDA_KERNEL_LOOP(idx, ${num_threads}) {
const int e_idx = idx / ${M_out};
const int m_out_idx = idx % ${M_out};
${Dtype} result = 0.0;
${Dtype} w;
${Dtype} f;
int k;
${Dtype} b;
long c;
long w_idx;
for (int k_idx = 0; k_idx < ${k_max}; k_idx++) {
k = e_idx * ${k_max} + k_idx;
b = amount[k];
c = index[k];
for (int m_in_idx = 0; m_in_idx < ${M_in}; m_in_idx++) {
w_idx = c * ${M_out} * ${M_in} +
m_in_idx * ${M_out} +
m_out_idx;
w = weight[w_idx];
f = input[e_idx * ${M_in} + m_in_idx];
result += b * w * f;
}
}
output[idx] = result;
}
}
'''
_edgewise_spline_weighting_backward_kernel
=
kernel_loop
+
'''
extern "C"
__global__ void edgewise_spline_weighting_backward_kernel(
const ${Dtype}* grad_output, ${Dtype}* grad_input, ${Dtype}* grad_weight,
${Dtype}* grad_b, const ${Dtype}* input, const ${Dtype}* weight,
const ${Dtype}* amount, const long* index) {
CUDA_KERNEL_LOOP(idx, ${num_threads}) {
const int e_idx = idx / ${M_out};
const int m_out_idx = idx % ${M_out};
${Dtype} w;
${Dtype} g;
${Dtype} f;
${Dtype} w_grad;
${Dtype} b_grad;
int k;
${Dtype} b;
long c;
long w_idx;
for (int k_idx = 0; k_idx < ${k_max}; k_idx++) {
k = e_idx * ${k_max} + k_idx;
b = amount[k];
c = index[k];
for (int m_in_idx = 0; m_in_idx < ${M_in}; m_in_idx++) {
w_idx = c * ${M_out} * ${M_in} +
m_in_idx * ${M_out} +
m_out_idx;
w = weight[w_idx];
// Calculate input gradient.
g = grad_output[e_idx * ${M_out} + m_out_idx];
atomicAdd(&(grad_input[e_idx * ${M_in} + m_in_idx]), b * w * g);
// This is inefficient: `reduce_sum` shouldn't be done like this.
// Looping over `M_out` would be better to avoid the `atomicAdd`.
// Calculate weight gradient.
f = input[e_idx * ${M_in} + m_in_idx];
w_grad = f * b * grad_output[e_idx * ${M_out} + m_out_idx];
atomicAdd(&(grad_weight[w_idx]), w_grad);
// Not so efficient either, but not avoidable.
//Calculate gradient of B
b_grad = f * w * g;
atomicAdd(&(grad_b[k]), b_grad);
}
}
}
}
'''
_spline_kernel
=
kernel_loop
+
'''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline) {
CUDA_KERNEL_LOOP(idx, ${num_threads}) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int bot;
int top;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K /= kernel_size[d_idx];
k_idx_mod = k_idx % 2;
k_idx >>= 1;
value = input[e_idx * ${dim} + d_idx] *
(kernel_size[d_idx] - is_open_spline[d_idx]);
frac = value - floor(value);
a *= (1 - k_idx_mod) * frac + k_idx_mod * (1 - frac);
bot = int(floor(value));
top = (bot + 1) % kernel_size[d_idx];
bot %= kernel_size[d_idx];
i += (k_idx_mod * bot + (1 - k_idx_mod) * top) * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
_spline_kernel_backwards
=
kernel_loop
+
'''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline) {
CUDA_KERNEL_LOOP(idx, ${num_threads}) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int bot;
int top;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K /= kernel_size[d_idx];
k_idx_mod = k_idx % 2;
k_idx >>= 1;
value = input[e_idx * ${dim} + d_idx] *
(kernel_size[d_idx] - is_open_spline[d_idx]);
frac = value - floor(value);
a *= (1 - k_idx_mod) * frac + k_idx_mod * (1 - frac);
bot = int(floor(value));
top = (bot + 1) % kernel_size[d_idx];
bot %= kernel_size[d_idx];
i += (k_idx_mod * bot + (1 - k_idx_mod) * top) * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
class
EdgewiseSplineWeightingGPU
(
Function
):
def
__init__
(
self
,
kernel_size
,
is_open_spline
,
K
):
super
(
EdgewiseSplineWeightingGPU
,
self
).
__init__
()
assert
kernel_size
.
is_cuda
and
is_open_spline
.
is_cuda
self
.
kernel_size
=
kernel_size
self
.
is_open_spline
=
is_open_spline
self
.
K
=
K
def
forward
(
self
,
input
,
values
,
weight
):
assert
input
.
is_cuda
and
weight
.
is_cuda
self
.
save_for_backward
(
input
,
weight
)
values
=
values
.
unsqueeze
(
1
)
if
len
(
values
.
size
())
<
2
else
values
num_edges
,
dim
=
values
.
size
()
k_max
=
2
**
dim
amount
=
values
.
new
(
num_edges
,
k_max
)
index
=
values
.
new
(
num_edges
,
k_max
).
long
()
num_threads
=
amount
.
numel
()
with
torch
.
cuda
.
device_of
(
input
):
f
=
load_kernel
(
'spline_kernel'
,
_spline_kernel
,
Dtype
=
Dtype
(
input
),
num_threads
=
num_threads
,
k_max
=
k_max
,
dim
=
dim
,
K
=
self
.
K
)
f
(
block
=
(
cuda_num_threads
,
1
,
1
),
grid
=
(
get_blocks
(
num_threads
),
1
,
1
),
args
=
[
values
.
data_ptr
(),
amount
.
data_ptr
(),
index
.
data_ptr
(),
self
.
kernel_size
.
data_ptr
(),
self
.
is_open_spline
.
data_ptr
()
],
stream
=
Stream
(
ptr
=
torch
.
cuda
.
current_stream
().
cuda_stream
))
self
.
amount
=
amount
self
.
index
=
index
_
,
M_in
,
M_out
=
weight
.
size
()
k_max
=
self
.
amount
.
size
(
1
)
output
=
input
.
new
(
input
.
size
(
0
),
M_out
)
num_threads
=
output
.
numel
()
with
torch
.
cuda
.
device_of
(
input
):
f
=
load_kernel
(
'edgewise_spline_weighting_forward_kernel'
,
_edgewise_spline_weighting_forward_kernel
,
Dtype
=
Dtype
(
input
),
num_threads
=
num_threads
,
M_in
=
M_in
,
M_out
=
M_out
,
k_max
=
k_max
)
f
(
block
=
(
cuda_num_threads
,
1
,
1
),
grid
=
(
get_blocks
(
num_threads
),
1
,
1
),
args
=
[
input
.
data_ptr
(),
weight
.
data_ptr
(),
output
.
data_ptr
(),
self
.
amount
.
data_ptr
(),
self
.
index
.
data_ptr
()
],
stream
=
Stream
(
ptr
=
torch
.
cuda
.
current_stream
().
cuda_stream
))
return
output
def
backward
(
self
,
grad_output
):
input
,
weight
=
self
.
saved_tensors
K
,
M_in
,
M_out
=
weight
.
size
()
E
,
k_max
=
self
.
amount
.
size
()
grad_input
=
grad_output
.
new
(
input
.
size
(
0
),
M_in
).
fill_
(
0
)
grad_weight
=
grad_output
.
new
(
K
,
M_in
,
M_out
).
fill_
(
0
)
grad_b
=
grad_output
.
new
(
E
,
k_max
).
fill_
(
0
)
num_threads
=
grad_output
.
numel
()
with
torch
.
cuda
.
device_of
(
grad_output
):
f
=
load_kernel
(
'edgewise_spline_weighting_backward_kernel'
,
_edgewise_spline_weighting_backward_kernel
,
Dtype
=
Dtype
(
input
),
num_threads
=
num_threads
,
M_in
=
M_in
,
M_out
=
M_out
,
k_max
=
k_max
,
K
=
K
)
f
(
block
=
(
cuda_num_threads
,
1
,
1
),
grid
=
(
get_blocks
(
num_threads
),
1
,
1
),
args
=
[
grad_output
.
data_ptr
(),
grad_input
.
data_ptr
(),
grad_weight
.
data_ptr
(),
grad_b
.
data_ptr
(),
input
.
data_ptr
(),
weight
.
data_ptr
(),
self
.
amount
.
data_ptr
(),
self
.
index
.
data_ptr
()
],
stream
=
Stream
(
ptr
=
torch
.
cuda
.
current_stream
().
cuda_stream
))
grad_b
=
grad_output
.
new
(
E
,
k_max
).
fill_
(
0
)
return
grad_input
,
grad_weight
,
grad_u
edgewise_spline_weighting_gpu.py
deleted
100644 → 0
View file @
8383b476
import
torch
from
torch.autograd
import
Function
from
....utils.cuda
import
(
cuda_num_threads
,
Stream
,
load_kernel
,
kernel_loop
,
get_blocks
)
_edgewise_spline_weighting_forward_kernel
=
kernel_loop
+
'''
extern "C"
__global__ void edgewise_spline_weighting_forward_kernel(
const ${Dtype}* input, const ${Dtype}* weight, ${Dtype}* output,
const ${Dtype}* amount, const long* index, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${M_out};
const int m_out_idx = idx % ${M_out};
${Dtype} result = 0.0;
${Dtype} w;
${Dtype} f;
int k;
${Dtype} b;
long c;
long w_idx;
for (int k_idx = 0; k_idx < ${k_max}; k_idx++) {
k = e_idx * ${k_max} + k_idx;
b = amount[k];
c = index[k];
for (int m_in_idx = 0; m_in_idx < ${M_in}; m_in_idx++) {
w_idx = c * ${M_out} * ${M_in} +
m_in_idx * ${M_out} +
m_out_idx;
w = weight[w_idx];
f = input[e_idx * ${M_in} + m_in_idx];
result += b * w * f;
}
}
output[idx] = result;
}
}
'''
_edgewise_spline_weighting_backward_kernel
=
kernel_loop
+
'''
extern "C"
__global__ void edgewise_spline_weighting_backward_kernel(
const ${Dtype}* grad_output, ${Dtype}* grad_input, ${Dtype}* grad_weight,
const ${Dtype}* input, const ${Dtype}* weight, const ${Dtype}* amount,
const long* index, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${M_out};
const int m_out_idx = idx % ${M_out};
${Dtype} w;
${Dtype} g;
${Dtype} f;
${Dtype} w_grad;
int k;
${Dtype} b;
long c;
long w_idx;
for (int k_idx = 0; k_idx < ${k_max}; k_idx++) {
k = e_idx * ${k_max} + k_idx;
b = amount[k];
c = index[k];
for (int m_in_idx = 0; m_in_idx < ${M_in}; m_in_idx++) {
w_idx = c * ${M_out} * ${M_in} +
m_in_idx * ${M_out} +
m_out_idx;
w = weight[w_idx];
// Calculate input gradient.
g = grad_output[e_idx * ${M_out} + m_out_idx];
atomicAdd(&(grad_input[e_idx * ${M_in} + m_in_idx]), b * w * g);
// This is inefficient: `reduce_sum` shouldn't be done like this.
// Looping over `M_out` would be better to avoid the `atomicAdd`.
// Calculate weight gradient.
f = input[e_idx * ${M_in} + m_in_idx];
w_grad = f * b * g;
atomicAdd(&(grad_weight[w_idx]), w_grad);
// Not so efficient either, but not avoidable.
}
}
}
}
'''
def
get_forward_kernel
(
M_in
,
M_out
,
k_max
):
cuda_tensor
=
torch
.
FloatTensor
([
1
]).
cuda
()
with
torch
.
cuda
.
device_of
(
cuda_tensor
):
f_fw
=
load_kernel
(
'edgewise_spline_weighting_forward_kernel'
,
_edgewise_spline_weighting_forward_kernel
,
Dtype
=
'float'
,
M_in
=
M_in
,
M_out
=
M_out
,
k_max
=
k_max
)
return
f_fw
def
get_backward_kernel
(
M_in
,
M_out
,
k_max
,
K
):
cuda_tensor
=
torch
.
FloatTensor
([
1
]).
cuda
()
with
torch
.
cuda
.
device_of
(
cuda_tensor
):
f_bw
=
load_kernel
(
'edgewise_spline_weighting_backward_kernel'
,
_edgewise_spline_weighting_backward_kernel
,
Dtype
=
'float'
,
M_in
=
M_in
,
M_out
=
M_out
,
k_max
=
k_max
,
K
=
K
)
return
f_bw
class
EdgewiseSplineWeightingGPU
(
Function
):
def
__init__
(
self
,
amount
,
index
,
K
,
M_in
,
M_out
,
k_fw
,
k_bw
):
super
(
EdgewiseSplineWeightingGPU
,
self
).
__init__
()
assert
amount
.
is_cuda
and
index
.
is_cuda
self
.
amount
=
amount
self
.
index
=
index
self
.
M_in
=
M_in
self
.
M_out
=
M_out
self
.
K
=
K
self
.
f_fw
=
k_fw
self
.
f_bw
=
k_bw
def
forward
(
self
,
input
,
weight
):
assert
input
.
is_cuda
and
weight
.
is_cuda
self
.
save_for_backward
(
input
,
weight
)
output
=
input
.
new
(
input
.
size
(
0
),
self
.
M_out
)
num_threads
=
output
.
numel
()
with
torch
.
cuda
.
device_of
(
input
):
self
.
f_fw
(
block
=
(
cuda_num_threads
,
1
,
1
),
grid
=
(
get_blocks
(
num_threads
),
1
,
1
),
args
=
[
input
.
data_ptr
(),
weight
.
data_ptr
(),
output
.
data_ptr
(),
self
.
amount
.
data_ptr
(),
self
.
index
.
data_ptr
(),
num_threads
],
stream
=
Stream
(
ptr
=
torch
.
cuda
.
current_stream
().
cuda_stream
))
return
output
def
backward
(
self
,
grad_output
):
input
,
weight
=
self
.
saved_tensors
grad_input
=
grad_output
.
new
(
input
.
size
(
0
),
self
.
M_in
).
fill_
(
0
)
grad_weight
=
grad_output
.
new
(
self
.
K
,
self
.
M_in
,
self
.
M_out
).
fill_
(
0
)
num_threads
=
grad_output
.
numel
()
with
torch
.
cuda
.
device_of
(
grad_output
):
self
.
f_bw
(
block
=
(
cuda_num_threads
,
1
,
1
),
grid
=
(
get_blocks
(
num_threads
),
1
,
1
),
args
=
[
grad_output
.
data_ptr
(),
grad_input
.
data_ptr
(),
grad_weight
.
data_ptr
(),
input
.
data_ptr
(),
weight
.
data_ptr
(),
self
.
amount
.
data_ptr
(),
self
.
index
.
data_ptr
(),
num_threads
],
stream
=
Stream
(
ptr
=
torch
.
cuda
.
current_stream
().
cuda_stream
))
return
grad_input
,
grad_weight
spline.py
deleted
100644 → 0
View file @
8383b476
import
torch
if
torch
.
cuda
.
is_available
():
from
.compute_spline_basis
import
compute_spline_basis
def
spline
(
input
,
kernel_size
,
is_open_spline
,
K
,
degree
,
basis_kernel
):
if
input
.
is_cuda
:
return
compute_spline_basis
(
input
,
kernel_size
,
is_open_spline
,
K
,
basis_kernel
)
else
:
raise
NotImplementedError
()
spline_conv.py
View file @
e2e17501
...
@@ -2,23 +2,22 @@ import torch
...
@@ -2,23 +2,22 @@ import torch
from
torch.autograd
import
Variable
from
torch.autograd
import
Variable
import
time
import
time
from
.spline
import
spline
from
.spline_conv_gpu
import
SplineConvGPU
from
.edgewise_spline_weighting
import
edgewise_spline_weighting
def
spline_conv
(
def
spline_conv
(
adj
,
#
Tensor
adj
,
#
Pytorch Tensor (!bp_to_adj) or Pytorch Variable (bp_to_adj)
input
,
# Variable
input
,
#
Pytorch
Variable
weight
,
# Variable
weight
,
#
Pytorch
Variable
kernel_size
,
kernel_size
,
# Rest tensors or python variables
is_open_spline
,
is_open_spline
,
K
,
K
,
forward
_kernel
,
weighting
_kernel
,
backward_kernel
,
weighting_
backward_kernel
,
basis_kernel
,
basis_kernel
,
basis_backward_kernel
=
None
,
degree
=
1
,
degree
=
1
,
bias
=
None
):
bias
=
None
,
bp_to_adj
=
False
):
if
input
.
dim
()
==
1
:
if
input
.
dim
()
==
1
:
input
=
input
.
unsqueeze
(
1
)
input
=
input
.
unsqueeze
(
1
)
...
@@ -29,12 +28,14 @@ def spline_conv(
...
@@ -29,12 +28,14 @@ def spline_conv(
# Get features for every end vertex with shape [|E| x M_in].
# Get features for every end vertex with shape [|E| x M_in].
output
=
input
[
col
]
output
=
input
[
col
]
# Convert to [|E| x M_in] feature matrix and calculate [|E| x M_out].
# Convert to [|E| x M_in] feature matrix and calculate [|E| x M_out].
if
output
.
is_cuda
:
amount
,
index
=
spline
(
values
,
kernel_size
,
is_open_spline
,
K
,
degree
,
output
=
SplineConvGPU
(
kernel_size
,
is_open_spline
,
K
,
degree
,
basis_kernel
)
basis_kernel
,
basis_backward_kernel
,
weighting_kernel
,
weighting_backward_kernel
,
output
=
edgewise_spline_weighting
(
output
,
weight
[:
-
1
],
amount
,
index
,
bp_to_adj
)(
output
,
weight
[:
-
1
],
values
)
forward_kernel
,
backward_kernel
)
else
:
# CPU Implementation not available
raise
NotImplementedError
()
# Convolution via `scatter_add`. Converts [|E| x M_out] feature matrix to
# Convolution via `scatter_add`. Converts [|E| x M_out] feature matrix to
# [n x M_out] feature matrix.
# [n x M_out] feature matrix.
...
...
spline_conv_bp2adj.py
deleted
100644 → 0
View file @
8383b476
import
torch
from
torch.autograd
import
Variable
from
.spline
import
spline
from
.edgewise_spline_weighting_bp2adj_gpu
import
EdgewiseSplineWeightingGPU
def
spline_conv_bp2adj
(
adj
,
# Tensor
input
,
# Variable
weight
,
# Variable
kernel_size
,
is_open_spline
,
K
,
degree
=
1
,
bias
=
None
):
values
=
adj
.
_values
()
row
,
col
=
adj
.
_indices
()
# Get features for every end vertex with shape [|E| x M_in].
output
=
input
[
col
]
# Convert to [|E| x M_in] feature matrix and calculate [|E| x M_out].
output
=
EdgewiseSplineWeightingGPU
(
kernel_size
,
is_open_spline
,
K
)(
output
,
weight
[:
-
1
],
values
)
# Convolution via `scatter_add`. Converts [|E| x M_out] feature matrix to
# [n x M_out] feature matrix.
zero
=
output
.
data
.
new
(
adj
.
size
(
1
),
output
.
size
(
1
)).
fill_
(
0.0
)
zero
=
Variable
(
zero
)
if
not
torch
.
is_tensor
(
output
)
else
zero
r
=
row
.
view
(
-
1
,
1
).
expand
(
row
.
size
(
0
),
output
.
size
(
1
))
output
=
zero
.
scatter_add_
(
0
,
Variable
(
r
),
output
)
# Weighten root node features by multiplying with root weight.
output
+=
torch
.
mm
(
input
,
weight
[
-
1
])
# Normalize output by degree.
ones
=
values
.
new
(
values
.
size
(
0
)).
fill_
(
1
)
zero
=
values
.
new
(
output
.
size
(
0
)).
fill_
(
0
)
degree
=
zero
.
scatter_add_
(
0
,
row
,
ones
)
degree
=
torch
.
clamp
(
degree
,
min
=
1
)
output
=
output
/
Variable
(
degree
.
view
(
-
1
,
1
))
if
bias
is
not
None
:
output
+=
bias
return
output
spline_conv_gpu.py
0 → 100644
View file @
e2e17501
import
torch
from
torch.autograd
import
Function
from
....utils.cuda
import
(
cuda_num_threads
,
Stream
,
load_kernel
,
kernel_loop
,
get_blocks
)
_edgewise_spline_weighting_forward_kernel
=
kernel_loop
+
'''
extern "C"
__global__ void edgewise_spline_weighting_forward_kernel(
const ${Dtype}* input, const ${Dtype}* weight, ${Dtype}* output,
const ${Dtype}* amount, const long* index, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${M_out};
const int m_out_idx = idx % ${M_out};
${Dtype} result = 0.0;
${Dtype} w;
${Dtype} f;
int k;
${Dtype} b;
long c;
long w_idx;
for (int k_idx = 0; k_idx < ${k_max}; k_idx++) {
k = e_idx * ${k_max} + k_idx;
b = amount[k];
c = index[k];
for (int m_in_idx = 0; m_in_idx < ${M_in}; m_in_idx++) {
w_idx = c * ${M_out} * ${M_in} +
m_in_idx * ${M_out} +
m_out_idx;
w = weight[w_idx];
f = input[e_idx * ${M_in} + m_in_idx];
result += b * w * f;
}
}
output[idx] = result;
}
}
'''
_edgewise_spline_weighting_backward_kernel
=
kernel_loop
+
'''
extern "C"
__global__ void edgewise_spline_weighting_backward_kernel(
const ${Dtype}* grad_output, ${Dtype}* grad_input, ${Dtype}* grad_weight,
const ${Dtype}* input, const ${Dtype}* weight, const ${Dtype}* amount,
const long* index, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${M_out};
const int m_out_idx = idx % ${M_out};
${Dtype} w;
${Dtype} g;
${Dtype} f;
${Dtype} w_grad;
int k;
${Dtype} b;
long c;
long w_idx;
for (int k_idx = 0; k_idx < ${k_max}; k_idx++) {
k = e_idx * ${k_max} + k_idx;
b = amount[k];
c = index[k];
for (int m_in_idx = 0; m_in_idx < ${M_in}; m_in_idx++) {
w_idx = c * ${M_out} * ${M_in} +
m_in_idx * ${M_out} +
m_out_idx;
w = weight[w_idx];
// Calculate input gradient.
g = grad_output[e_idx * ${M_out} + m_out_idx];
atomicAdd(&(grad_input[e_idx * ${M_in} + m_in_idx]), b * w * g);
// This is inefficient: `reduce_sum` shouldn't be done like this.
// Looping over `M_out` would be better to avoid the `atomicAdd`.
// Calculate weight gradient.
f = input[e_idx * ${M_in} + m_in_idx];
w_grad = f * b * g;
atomicAdd(&(grad_weight[w_idx]), w_grad);
// Not so efficient either, but not avoidable.
}
}
}
}
'''
_edgewise_spline_weighting_backward_kernel_bp2adj
=
kernel_loop
+
'''
extern "C"
__global__ void edgewise_spline_weighting_backward_kernel(
const ${Dtype}* grad_output, ${Dtype}* grad_input, ${Dtype}* grad_weight,
${Dtype}* grad_amount, const ${Dtype}* input, const ${Dtype}* weight,
const ${Dtype}* amount, const long* index, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${M_out};
const int m_out_idx = idx % ${M_out};
${Dtype} w;
${Dtype} g;
${Dtype} f;
${Dtype} w_grad;
int k;
${Dtype} b;
long c;
long w_idx;
for (int k_idx = 0; k_idx < ${k_max}; k_idx++) {
k = e_idx * ${k_max} + k_idx;
b = amount[k];
c = index[k];
for (int m_in_idx = 0; m_in_idx < ${M_in}; m_in_idx++) {
w_idx = c * ${M_out} * ${M_in} +
m_in_idx * ${M_out} +
m_out_idx;
w = weight[w_idx];
// Calculate input gradient.
g = grad_output[e_idx * ${M_out} + m_out_idx];
atomicAdd(&(grad_input[e_idx * ${M_in} + m_in_idx]), b * w * g);
// This is inefficient: `reduce_sum` shouldn't be done like this.
// Looping over `M_out` would be better to avoid the `atomicAdd`.
// Calculate weight gradient.
f = input[e_idx * ${M_in} + m_in_idx];
w_grad = f * b * g;
atomicAdd(&(grad_weight[w_idx]), w_grad);
// Not so efficient either, but not avoidable.
// Calculate B-spline basis tensor product gradient
adj_g += g * f * w;
}
atomicAdd(&(grad_amount[e_idx,k_idx]), adj_g);
}
}
}
'''
def
get_weighting_forward_kernel
(
M_in
,
M_out
,
k_max
,
bt_to_adj
=
False
):
cuda_tensor
=
torch
.
FloatTensor
([
1
]).
cuda
()
if
bt_to_adj
:
kernel
=
_edgewise_spline_weighting_forward_kernel
else
:
kernel
=
_edgewise_spline_weighting_forward_kernel
with
torch
.
cuda
.
device_of
(
cuda_tensor
):
f_fw
=
load_kernel
(
'edgewise_spline_weighting_forward_kernel'
,
kernel
,
Dtype
=
'float'
,
M_in
=
M_in
,
M_out
=
M_out
,
k_max
=
k_max
)
return
f_fw
def
get_weighting_backward_kernel
(
M_in
,
M_out
,
k_max
,
K
,
bt_to_adj
=
False
):
cuda_tensor
=
torch
.
FloatTensor
([
1
]).
cuda
()
if
bt_to_adj
:
kernel
=
_edgewise_spline_weighting_backward_kernel_bp2adj
else
:
kernel
=
_edgewise_spline_weighting_backward_kernel
with
torch
.
cuda
.
device_of
(
cuda_tensor
):
f_bw
=
load_kernel
(
'edgewise_spline_weighting_backward_kernel'
,
kernel
,
Dtype
=
'float'
,
M_in
=
M_in
,
M_out
=
M_out
,
k_max
=
k_max
,
K
=
K
)
return
f_bw
_spline_kernel_linear
=
kernel_loop
+
'''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = 1.0;
int k_idx_mod;
int bot;
int top;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
k_idx_mod = k_idx % 2;
k_idx >>= 1;
value = input[e_idx * ${dim} + d_idx];
if(value==1.0)
value -= 0.000001;
value *= kernel_size[d_idx] - is_open_spline[d_idx];
frac = value - floor(value);
a *= (1 - k_idx_mod) * (1 - frac) + k_idx_mod * frac;
bot = int(floor(value));
top = (bot + 1) % kernel_size[d_idx];
bot %= kernel_size[d_idx];
i += ((1 - k_idx_mod) * bot + k_idx_mod * top) * K;
K *= kernel_size[d_idx];
}
amount[idx] = a;
index[idx] = i;
}
}
'''
_spline_kernel_quadratic
=
kernel_loop
+
'''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int pos;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K /= kernel_size[d_idx];
k_idx_mod = k_idx % 3;
k_idx /= 3;
value = input[e_idx * ${dim} + d_idx] *
(kernel_size[d_idx] - (2 * is_open_spline[d_idx]));
frac = value - floor(value);
if (k_idx_mod == 0) a *= 0.5 * (1- frac) * (1-frac);
else if (k_idx_mod == 1) a *= -frac * frac + frac + 0.5;
else a *= 0.5 * frac * frac;
pos = int(floor(value)) + k_idx_mod;
pos %= kernel_size[d_idx];
i += pos * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
_spline_kernel_cubic
=
kernel_loop
+
'''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads}) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int pos;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K /= kernel_size[d_idx];
k_idx_mod = k_idx % 4;
k_idx /= 4;
value = input[e_idx * ${dim} + d_idx] *
(kernel_size[d_idx] - (3 * is_open_spline[d_idx]));
frac = value - floor(value);
if (k_idx_mod == 0) a *= (1 - frac) * (1 - frac) * (1 - frac) / 6.0;
else if (k_idx_mod == 1)
a *= (3 * frac * frac * frac - 6 * frac * frac + 4) / 6.0;
else if (k_idx_mod == 2)
a *= (-3 * frac * frac * frac + 3 * frac * frac + 3 * frac + 1) / 6.0;
else a *= frac * frac * frac / 6.0;
pos = int(floor(value)) + k_idx_mod;
pos %= kernel_size[d_idx];
i += pos * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
_spline_kernel_linear_backward
=
kernel_loop
+
'''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, const ${Dtype}* grad_amount, ${Dtype}* amount,
${Dtype}* grad_adj, const long* kernel_size, const long* is_open_spline,
int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${dim};
int d_idx = idx % ${dim};
int K = ${K};
int k_idx_mod;
int bot;
int top;
${Dtype} value;
${Dtype} frac;
${Dtype} grad_out = 0.0;
long i = 0;
int quotient = (int)pow(2.0,(float)d_idx)
for (int k_idx = 0; k_idx < ${k_max}; k_idx++) {
k_idx_mod = (k_idx/quotient) % 2;
value = input[e_idx * ${dim} + d_idx];
value *= kernel_size[d_idx] - is_open_spline[d_idx];
frac = value - floor(value);
residual = (1 - k_idx_mod) * frac + k_idx_mod * (1 - frac);
int a_idx = e_idx*${k_max} + k_idx
grad_out += grad_amount[a_idx]*amount[a_idx]/residual;
}
grad_adj[e_idx* ${dim} + d_idx] = grad_out;
}
}
'''
def
get_basis_kernel
(
k_max
,
K
,
dim
,
degree
,
bt_to_adj
=
False
):
if
degree
==
3
:
_spline_kernel
=
_spline_kernel_cubic
elif
degree
==
2
:
_spline_kernel
=
_spline_kernel_quadratic
else
:
_spline_kernel
=
_spline_kernel_linear
cuda_tensor
=
torch
.
FloatTensor
([
1
]).
cuda
()
with
torch
.
cuda
.
device_of
(
cuda_tensor
):
f
=
load_kernel
(
'spline_kernel'
,
_spline_kernel
,
Dtype
=
'float'
,
k_max
=
k_max
,
dim
=
dim
,
K
=
K
)
return
f
def
get_basis_backward_kernel
(
k_max
,
K
,
dim
,
degree
):
if
degree
==
3
:
_spline_kernel
=
_spline_kernel_cubic
elif
degree
==
2
:
_spline_kernel
=
_spline_kernel_quadratic
else
:
_spline_kernel
=
_spline_kernel_linear_backward
cuda_tensor
=
torch
.
FloatTensor
([
1
]).
cuda
()
with
torch
.
cuda
.
device_of
(
cuda_tensor
):
f
=
load_kernel
(
'spline_kernel'
,
_spline_kernel
,
Dtype
=
'float'
,
k_max
=
k_max
,
dim
=
dim
,
K
=
K
)
return
f
class
SplineConvGPU
(
Function
):
def
__init__
(
self
,
kernel_size
,
is_open_spline
,
K
,
degree
,
basis_kernel
,
basis_backward_kernel
,
weighting_kernel
,
weighting_backward_kernel
,
bp_to_adj
=
False
):
super
(
SplineConvGPU
,
self
).
__init__
()
self
.
degree
=
degree
self
.
f_weighting_fw
=
weighting_kernel
self
.
f_weighting_bw
=
weighting_backward_kernel
self
.
kernel_size
=
kernel_size
self
.
is_open_spline
=
is_open_spline
self
.
f_basis_fw
=
basis_kernel
self
.
f_basis_bw
=
basis_backward_kernel
self
.
bp_to_adj
=
bp_to_adj
def
forward
(
self
,
input
,
weight
,
adj_values
):
assert
input
.
is_cuda
and
weight
.
is_cuda
self
.
K
,
self
.
M_in
,
self
.
M_out
=
weight
.
size
()
# Compute B-spline basis tensor products
adj_values
=
adj_values
.
unsqueeze
(
1
)
if
len
(
adj_values
.
size
())
<
2
\
else
adj_values
num_edges
,
dim
=
adj_values
.
size
()
k_max
=
2
**
dim
amount
=
adj_values
.
new
(
num_edges
,
k_max
)
index
=
adj_values
.
new
(
num_edges
,
k_max
).
long
()
num_threads
=
amount
.
numel
()
with
torch
.
cuda
.
device_of
(
input
):
self
.
f_basis_fw
(
block
=
(
cuda_num_threads
,
1
,
1
),
grid
=
(
get_blocks
(
num_threads
),
1
,
1
),
args
=
[
adj_values
.
data_ptr
(),
amount
.
data_ptr
(),
index
.
data_ptr
(),
self
.
kernel_size
.
data_ptr
(),
self
.
is_open_spline
.
data_ptr
(),
num_threads
],
stream
=
Stream
(
ptr
=
torch
.
cuda
.
current_stream
().
cuda_stream
))
# Weight features
output
=
input
.
new
(
input
.
size
(
0
),
self
.
M_out
)
num_threads
=
output
.
numel
()
with
torch
.
cuda
.
device_of
(
input
):
self
.
f_weighting_fw
(
block
=
(
cuda_num_threads
,
1
,
1
),
grid
=
(
get_blocks
(
num_threads
),
1
,
1
),
args
=
[
input
.
data_ptr
(),
weight
.
data_ptr
(),
output
.
data_ptr
(),
amount
.
data_ptr
(),
index
.
data_ptr
(),
num_threads
],
stream
=
Stream
(
ptr
=
torch
.
cuda
.
current_stream
().
cuda_stream
))
if
self
.
bp_to_adj
:
self
.
save_for_backward
(
input
,
weight
,
adj_values
,
amount
,
index
)
else
:
self
.
save_for_backward
(
input
,
weight
,
amount
,
index
)
return
output
def
backward
(
self
,
grad_output
):
grad_input
=
grad_output
.
new
(
grad_output
.
size
(
0
),
self
.
M_in
).
fill_
(
0
)
grad_weight
=
grad_output
.
new
(
self
.
K
,
self
.
M_in
,
self
.
M_out
).
fill_
(
0
)
num_threads
=
grad_output
.
numel
()
if
self
.
bp_to_adj
:
input
,
weight
,
adj_values
,
amount
,
index
=
self
.
saved_tensors
grad_amount
=
grad_output
.
new
(
amount
.
size
(
0
),
amount
.
size
(
1
)).
fill_
(
0
)
with
torch
.
cuda
.
device_of
(
grad_output
):
self
.
f_weighting_bw
(
block
=
(
cuda_num_threads
,
1
,
1
),
grid
=
(
get_blocks
(
num_threads
),
1
,
1
),
args
=
[
grad_output
.
data_ptr
(),
grad_input
.
data_ptr
(),
grad_weight
.
data_ptr
(),
grad_amount
.
data_ptr
(),
input
.
data_ptr
(),
weight
.
data_ptr
(),
amount
.
data_ptr
(),
index
.
data_ptr
(),
num_threads
],
stream
=
Stream
(
ptr
=
torch
.
cuda
.
current_stream
().
cuda_stream
))
num_threads
=
grad_amount
.
numel
()
grad_adj
=
grad_amount
.
new
(
grad_amount
.
size
(
0
),
self
.
kernel_size
.
size
(
0
)).
fill_
(
0
)
with
torch
.
cuda
.
device_of
(
grad_amount
):
self
.
f_basis_bw
(
block
=
(
cuda_num_threads
,
1
,
1
),
grid
=
(
get_blocks
(
num_threads
),
1
,
1
),
args
=
[
adj_values
.
data_ptr
(),
grad_amount
.
data_ptr
(),
amount
.
data_ptr
(),
grad_adj
.
data_ptr
(),
self
.
kernel_size
.
data_ptr
(),
self
.
is_open_spline
.
data_ptr
(),
num_threads
],
stream
=
Stream
(
ptr
=
torch
.
cuda
.
current_stream
().
cuda_stream
))
return
grad_input
,
grad_weight
,
grad_adj
else
:
input
,
weight
,
amount
,
index
=
self
.
saved_tensors
with
torch
.
cuda
.
device_of
(
grad_output
):
self
.
f_weighting_bw
(
block
=
(
cuda_num_threads
,
1
,
1
),
grid
=
(
get_blocks
(
num_threads
),
1
,
1
),
args
=
[
grad_output
.
data_ptr
(),
grad_input
.
data_ptr
(),
grad_weight
.
data_ptr
(),
input
.
data_ptr
(),
weight
.
data_ptr
(),
amount
.
data_ptr
(),
index
.
data_ptr
(),
num_threads
],
stream
=
Stream
(
ptr
=
torch
.
cuda
.
current_stream
().
cuda_stream
))
return
grad_input
,
grad_weight
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment