Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
torch-spline-conv
Commits
764cd87d
Commit
764cd87d
authored
Feb 13, 2018
by
Jan Eric Lenssen
Browse files
splineconv forward now approx. 10 times faster
parent
4b92ba74
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
268 additions
and
217 deletions
+268
-217
compute_spline_basis.py
compute_spline_basis.py
+191
-0
edgewise_spline_weighting.py
edgewise_spline_weighting.py
+3
-4
edgewise_spline_weighting_gpu.py
edgewise_spline_weighting_gpu.py
+29
-18
spline.py
spline.py
+3
-10
spline_conv.py
spline_conv.py
+6
-3
spline_cubic_gpu_test.py
spline_cubic_gpu_test.py
+12
-5
spline_linear_gpu.py
spline_linear_gpu.py
+0
-84
spline_linear_gpu_test.py
spline_linear_gpu_test.py
+12
-5
spline_quadratic_gpu.py
spline_quadratic_gpu.py
+0
-84
spline_quadratic_gpu_test.py
spline_quadratic_gpu_test.py
+12
-4
No files found.
spline_cubic_gpu
.py
→
compute_spline_basis
.py
View file @
764cd87d
...
@@ -3,13 +3,105 @@ import torch
...
@@ -3,13 +3,105 @@ import torch
from
....utils.cuda
import
(
cuda_num_threads
,
Stream
,
Dtype
,
load_kernel
,
from
....utils.cuda
import
(
cuda_num_threads
,
Stream
,
Dtype
,
load_kernel
,
kernel_loop
,
get_blocks
)
kernel_loop
,
get_blocks
)
_spline_kernel
=
kernel_loop
+
'''
_spline_kernel
_linear
=
kernel_loop
+
'''
extern "C"
extern "C"
__global__ void spline_kernel(
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline) {
const long* kernel_size, const long* is_open_spline
, int num_threads
) {
CUDA_KERNEL_LOOP(idx, ${num_threads}) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int bot;
int top;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K /= kernel_size[d_idx];
k_idx_mod = k_idx % 2;
k_idx >>= 1;
value = input[e_idx * ${dim} + d_idx] *
(kernel_size[d_idx] - is_open_spline[d_idx]);
frac = value - floor(value);
a *= (1 - k_idx_mod) * frac + k_idx_mod * (1 - frac);
bot = int(floor(value));
top = (bot + 1) % kernel_size[d_idx];
bot %= kernel_size[d_idx];
i += (k_idx_mod * bot + (1 - k_idx_mod) * top) * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
_spline_kernel_quadratic
=
kernel_loop
+
'''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int pos;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K /= kernel_size[d_idx];
k_idx_mod = k_idx % 3;
k_idx /= 3;
value = input[e_idx * ${dim} + d_idx] *
(kernel_size[d_idx] - (2 * is_open_spline[d_idx]));
frac = value - floor(value);
if (k_idx_mod == 0) a *= 0.5 * (1- frac) * (1-frac);
else if (k_idx_mod == 1) a *= -frac * frac + frac + 0.5;
else a *= 0.5 * frac * frac;
pos = int(floor(value)) + k_idx_mod;
pos %= kernel_size[d_idx];
i += pos * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
_spline_kernel_cubic
=
kernel_loop
+
'''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline, int num_threads) {
CUDA_KERNEL_LOOP(idx, num_threads}) {
const int e_idx = idx / ${k_max};
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int k_idx = idx % ${k_max};
...
@@ -52,35 +144,47 @@ const long* kernel_size, const long* is_open_spline) {
...
@@ -52,35 +144,47 @@ const long* kernel_size, const long* is_open_spline) {
}
}
'''
'''
def
get_basis_kernel
(
k_max
,
K
,
dim
,
degree
):
if
degree
==
3
:
_spline_kernel
=
_spline_kernel_cubic
elif
degree
==
2
:
_spline_kernel
=
_spline_kernel_quadratic
else
:
_spline_kernel
=
_spline_kernel_linear
def
spline_cubic_gpu
(
input
,
kernel_size
,
is_open_spline
,
K
):
cuda_tensor
=
torch
.
FloatTensor
([
1
]).
cuda
()
with
torch
.
cuda
.
device_of
(
cuda_tensor
):
f
=
load_kernel
(
'spline_kernel'
,
_spline_kernel
,
Dtype
=
'float'
,
k_max
=
k_max
,
dim
=
dim
,
K
=
K
)
return
f
def
compute_spline_basis
(
input
,
kernel_size
,
is_open_spline
,
K
,
basis_kernel
):
assert
input
.
is_cuda
and
kernel_size
.
is_cuda
and
is_open_spline
.
is_cuda
assert
input
.
is_cuda
and
kernel_size
.
is_cuda
and
is_open_spline
.
is_cuda
input
=
input
.
unsqueeze
(
1
)
if
len
(
input
.
size
())
<
2
else
input
input
=
input
.
unsqueeze
(
1
)
if
len
(
input
.
size
())
<
2
else
input
num_edges
,
dim
=
input
.
size
()
num_edges
,
dim
=
input
.
size
()
k_max
=
4
**
dim
k_max
=
2
**
dim
amount
=
input
.
new
(
num_edges
,
k_max
)
amount
=
input
.
new
(
num_edges
,
k_max
)
index
=
input
.
new
(
num_edges
,
k_max
).
long
()
index
=
input
.
new
(
num_edges
,
k_max
).
long
()
num_threads
=
amount
.
numel
()
num_threads
=
amount
.
numel
()
with
torch
.
cuda
.
device_of
(
input
):
with
torch
.
cuda
.
device_of
(
input
):
f
=
load_kernel
(
basis_kernel
(
block
=
(
cuda_num_threads
,
1
,
1
),
'spline_kernel'
,
_spline_kernel
,
Dtype
=
Dtype
(
input
),
num_threads
=
num_threads
,
k_max
=
k_max
,
dim
=
dim
,
K
=
K
)
f
(
block
=
(
cuda_num_threads
,
1
,
1
),
grid
=
(
get_blocks
(
num_threads
),
1
,
1
),
grid
=
(
get_blocks
(
num_threads
),
1
,
1
),
args
=
[
args
=
[
input
.
data_ptr
(),
input
.
data_ptr
(),
amount
.
data_ptr
(),
amount
.
data_ptr
(),
index
.
data_ptr
(),
index
.
data_ptr
(),
kernel_size
.
data_ptr
(),
kernel_size
.
data_ptr
(),
is_open_spline
.
data_ptr
()
is_open_spline
.
data_ptr
(),
num_threads
],
],
stream
=
Stream
(
ptr
=
torch
.
cuda
.
current_stream
().
cuda_stream
))
stream
=
Stream
(
ptr
=
torch
.
cuda
.
current_stream
().
cuda_stream
))
...
...
edgewise_spline_weighting.py
View file @
764cd87d
...
@@ -4,11 +4,10 @@ if torch.cuda.is_available():
...
@@ -4,11 +4,10 @@ if torch.cuda.is_available():
from
.edgewise_spline_weighting_gpu
import
EdgewiseSplineWeightingGPU
from
.edgewise_spline_weighting_gpu
import
EdgewiseSplineWeightingGPU
def
edgewise_spline_weighting
(
input
,
weight
,
amount
,
index
):
def
edgewise_spline_weighting
(
input
,
weight
,
amount
,
index
,
k_fw
,
k_bw
):
if
input
.
is_cuda
:
if
input
.
is_cuda
:
K
,
M_in
,
M_out
=
weight
.
size
()
K
,
M_in
,
M_out
=
weight
.
size
()
k_max
=
amount
.
size
(
1
)
return
EdgewiseSplineWeightingGPU
(
amount
,
index
,
K
,
M_in
,
M_out
return
EdgewiseSplineWeightingGPU
(
amount
,
index
,
K
,
M_in
,
M_out
,
,
k_fw
,
k_bw
)(
input
,
weight
)
k_max
)(
input
,
weight
)
else
:
else
:
raise
NotImplementedError
raise
NotImplementedError
edgewise_spline_weighting_gpu.py
View file @
764cd87d
...
@@ -95,9 +95,34 @@ const long* index, int num_threads) {
...
@@ -95,9 +95,34 @@ const long* index, int num_threads) {
}
}
'''
'''
def
get_forward_kernel
(
M_in
,
M_out
,
k_max
):
cuda_tensor
=
torch
.
FloatTensor
([
1
]).
cuda
()
with
torch
.
cuda
.
device_of
(
cuda_tensor
):
f_fw
=
load_kernel
(
'edgewise_spline_weighting_forward_kernel'
,
_edgewise_spline_weighting_forward_kernel
,
Dtype
=
'float'
,
M_in
=
M_in
,
M_out
=
M_out
,
k_max
=
k_max
)
return
f_fw
def
get_backward_kernel
(
M_in
,
M_out
,
k_max
,
K
):
cuda_tensor
=
torch
.
FloatTensor
([
1
]).
cuda
()
with
torch
.
cuda
.
device_of
(
cuda_tensor
):
f_bw
=
load_kernel
(
'edgewise_spline_weighting_backward_kernel'
,
_edgewise_spline_weighting_backward_kernel
,
Dtype
=
'float'
,
M_in
=
M_in
,
M_out
=
M_out
,
k_max
=
k_max
,
K
=
K
)
return
f_bw
class
EdgewiseSplineWeightingGPU
(
Function
):
class
EdgewiseSplineWeightingGPU
(
Function
):
def
__init__
(
self
,
amount
,
index
,
K
,
M_in
,
M_out
,
k_
max
):
def
__init__
(
self
,
amount
,
index
,
K
,
M_in
,
M_out
,
k_
fw
,
k_bw
):
super
(
EdgewiseSplineWeightingGPU
,
self
).
__init__
()
super
(
EdgewiseSplineWeightingGPU
,
self
).
__init__
()
assert
amount
.
is_cuda
and
index
.
is_cuda
assert
amount
.
is_cuda
and
index
.
is_cuda
self
.
amount
=
amount
self
.
amount
=
amount
...
@@ -105,23 +130,9 @@ class EdgewiseSplineWeightingGPU(Function):
...
@@ -105,23 +130,9 @@ class EdgewiseSplineWeightingGPU(Function):
self
.
M_in
=
M_in
self
.
M_in
=
M_in
self
.
M_out
=
M_out
self
.
M_out
=
M_out
self
.
K
=
K
self
.
K
=
K
with
torch
.
cuda
.
device_of
(
amount
):
self
.
f_fw
=
k_fw
self
.
f_fw
=
load_kernel
(
self
.
f_bw
=
k_bw
'edgewise_spline_weighting_forward_kernel'
,
_edgewise_spline_weighting_forward_kernel
,
Dtype
=
Dtype
(
amount
),
M_in
=
M_in
,
M_out
=
M_out
,
k_max
=
k_max
)
self
.
f_bw
=
load_kernel
(
'edgewise_spline_weighting_backward_kernel'
,
_edgewise_spline_weighting_backward_kernel
,
Dtype
=
Dtype
(
amount
),
M_in
=
M_in
,
M_out
=
M_out
,
k_max
=
k_max
,
K
=
K
)
def
forward
(
self
,
input
,
weight
):
def
forward
(
self
,
input
,
weight
):
assert
input
.
is_cuda
and
weight
.
is_cuda
assert
input
.
is_cuda
and
weight
.
is_cuda
...
...
spline.py
View file @
764cd87d
import
torch
import
torch
if
torch
.
cuda
.
is_available
():
if
torch
.
cuda
.
is_available
():
from
.
spline_linear_gpu
import
spline_linear_gpu
from
.
compute_spline_basis
import
compute_spline_basis
from
.spline_quadratic_gpu
import
spline_quadratic_gpu
from
.spline_quadratic_gpu
import
spline_quadratic_gpu
from
.spline_cubic_gpu
import
spline_cubic_gpu
from
.spline_cubic_gpu
import
spline_cubic_gpu
def
spline
(
input
,
kernel_size
,
is_open_spline
,
K
,
degree
):
def
spline
(
input
,
kernel_size
,
is_open_spline
,
K
,
degree
,
basis_kernel
):
if
input
.
is_cuda
:
if
input
.
is_cuda
:
if
degree
==
1
:
return
compute_spline_basis
(
input
,
kernel_size
,
is_open_spline
,
K
,
basis_kernel
)
return
spline_linear_gpu
(
input
,
kernel_size
,
is_open_spline
,
K
)
if
degree
==
2
:
return
spline_quadratic_gpu
(
input
,
kernel_size
,
is_open_spline
,
K
)
if
degree
==
3
:
return
spline_cubic_gpu
(
input
,
kernel_size
,
is_open_spline
,
K
)
else
:
raise
NotImplementedError
()
else
:
else
:
raise
NotImplementedError
()
raise
NotImplementedError
()
spline_conv.py
View file @
764cd87d
...
@@ -13,8 +13,11 @@ def spline_conv(
...
@@ -13,8 +13,11 @@ def spline_conv(
kernel_size
,
kernel_size
,
is_open_spline
,
is_open_spline
,
K
,
K
,
forward_kernel
,
backward_kernel
,
basis_kernel
,
degree
=
1
,
degree
=
1
,
bias
=
None
):
bias
=
None
,
):
if
input
.
dim
()
==
1
:
if
input
.
dim
()
==
1
:
input
=
input
.
unsqueeze
(
1
)
input
=
input
.
unsqueeze
(
1
)
...
@@ -25,8 +28,8 @@ def spline_conv(
...
@@ -25,8 +28,8 @@ def spline_conv(
# Get features for every end vertex with shape [|E| x M_in].
# Get features for every end vertex with shape [|E| x M_in].
output
=
input
[
col
]
output
=
input
[
col
]
# Convert to [|E| x M_in] feature matrix and calculate [|E| x M_out].
# Convert to [|E| x M_in] feature matrix and calculate [|E| x M_out].
amount
,
index
=
spline
(
values
,
kernel_size
,
is_open_spline
,
K
,
degree
)
amount
,
index
=
spline
(
values
,
kernel_size
,
is_open_spline
,
K
,
degree
,
basis_kernel
)
output
=
edgewise_spline_weighting
(
output
,
weight
[:
-
1
],
amount
,
index
)
output
=
edgewise_spline_weighting
(
output
,
weight
[:
-
1
],
amount
,
index
,
forward_kernel
,
backward_kernel
)
# Convolution via `scatter_add`. Converts [|E| x M_out] feature matrix to
# Convolution via `scatter_add`. Converts [|E| x M_out] feature matrix to
# [n x M_out] feature matrix.
# [n x M_out] feature matrix.
...
...
spline_cubic_gpu_test.py
View file @
764cd87d
...
@@ -4,7 +4,8 @@ import torch
...
@@ -4,7 +4,8 @@ import torch
from
numpy.testing
import
assert_equal
,
assert_almost_equal
from
numpy.testing
import
assert_equal
,
assert_almost_equal
if
torch
.
cuda
.
is_available
():
if
torch
.
cuda
.
is_available
():
from
.spline_cubic_gpu
import
spline_cubic_gpu
from
.compute_spline_basis
import
compute_spline_basis
from
.compute_spline_basis
import
get_basis_kernel
class
SplineQuadraticGPUTest
(
unittest
.
TestCase
):
class
SplineQuadraticGPUTest
(
unittest
.
TestCase
):
...
@@ -13,8 +14,11 @@ class SplineQuadraticGPUTest(unittest.TestCase):
...
@@ -13,8 +14,11 @@ class SplineQuadraticGPUTest(unittest.TestCase):
input
=
torch
.
cuda
.
FloatTensor
([
0
,
0.05
,
0.25
,
0.5
,
0.75
,
0.95
,
1
])
input
=
torch
.
cuda
.
FloatTensor
([
0
,
0.05
,
0.25
,
0.5
,
0.75
,
0.95
,
1
])
kernel_size
=
torch
.
cuda
.
LongTensor
([
7
])
kernel_size
=
torch
.
cuda
.
LongTensor
([
7
])
is_open_spline
=
torch
.
cuda
.
LongTensor
([
1
])
is_open_spline
=
torch
.
cuda
.
LongTensor
([
1
])
k_max
=
4
a1
,
i1
=
spline_cubic_gpu
(
input
,
kernel_size
,
is_open_spline
,
7
)
K
=
7
dim
=
1
basis_kernel
=
get_basis_kernel
(
k_max
,
K
,
dim
,
3
)
a1
,
i1
=
compute_spline_basis
(
input
,
kernel_size
,
is_open_spline
,
7
,
basis_kernel
)
a2
=
[
a2
=
[
[
0.1667
,
0.6667
,
0.1667
,
0
],
[
0.1667
,
0.6667
,
0.1667
,
0
],
...
@@ -36,8 +40,11 @@ class SplineQuadraticGPUTest(unittest.TestCase):
...
@@ -36,8 +40,11 @@ class SplineQuadraticGPUTest(unittest.TestCase):
input
=
torch
.
cuda
.
FloatTensor
([
0
,
0.05
,
0.25
,
0.5
,
0.75
,
0.95
,
1
])
input
=
torch
.
cuda
.
FloatTensor
([
0
,
0.05
,
0.25
,
0.5
,
0.75
,
0.95
,
1
])
kernel_size
=
torch
.
cuda
.
LongTensor
([
4
])
kernel_size
=
torch
.
cuda
.
LongTensor
([
4
])
is_open_spline
=
torch
.
cuda
.
LongTensor
([
0
])
is_open_spline
=
torch
.
cuda
.
LongTensor
([
0
])
k_max
=
4
a1
,
i1
=
spline_cubic_gpu
(
input
,
kernel_size
,
is_open_spline
,
4
)
K
=
4
dim
=
1
basis_kernel
=
get_basis_kernel
(
k_max
,
K
,
dim
,
3
)
a1
,
i1
=
compute_spline_basis
(
input
,
kernel_size
,
is_open_spline
,
4
,
basis_kernel
)
a2
=
[
a2
=
[
[
0.1667
,
0.6667
,
0.1667
,
0
],
[
0.1667
,
0.6667
,
0.1667
,
0
],
...
...
spline_linear_gpu.py
deleted
100644 → 0
View file @
4b92ba74
import
torch
from
....utils.cuda
import
(
cuda_num_threads
,
Stream
,
Dtype
,
load_kernel
,
kernel_loop
,
get_blocks
)
_spline_kernel
=
kernel_loop
+
'''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline) {
CUDA_KERNEL_LOOP(idx, ${num_threads}) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int bot;
int top;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K /= kernel_size[d_idx];
k_idx_mod = k_idx % 2;
k_idx >>= 1;
value = input[e_idx * ${dim} + d_idx] *
(kernel_size[d_idx] - is_open_spline[d_idx]);
frac = value - floor(value);
a *= (1 - k_idx_mod) * frac + k_idx_mod * (1 - frac);
bot = int(floor(value));
top = (bot + 1) % kernel_size[d_idx];
bot %= kernel_size[d_idx];
i += (k_idx_mod * bot + (1 - k_idx_mod) * top) * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
def
spline_linear_gpu
(
input
,
kernel_size
,
is_open_spline
,
K
):
assert
input
.
is_cuda
and
kernel_size
.
is_cuda
and
is_open_spline
.
is_cuda
input
=
input
.
unsqueeze
(
1
)
if
len
(
input
.
size
())
<
2
else
input
num_edges
,
dim
=
input
.
size
()
k_max
=
2
**
dim
amount
=
input
.
new
(
num_edges
,
k_max
)
index
=
input
.
new
(
num_edges
,
k_max
).
long
()
num_threads
=
amount
.
numel
()
with
torch
.
cuda
.
device_of
(
input
):
f
=
load_kernel
(
'spline_kernel'
,
_spline_kernel
,
Dtype
=
Dtype
(
input
),
num_threads
=
num_threads
,
k_max
=
k_max
,
dim
=
dim
,
K
=
K
)
f
(
block
=
(
cuda_num_threads
,
1
,
1
),
grid
=
(
get_blocks
(
num_threads
),
1
,
1
),
args
=
[
input
.
data_ptr
(),
amount
.
data_ptr
(),
index
.
data_ptr
(),
kernel_size
.
data_ptr
(),
is_open_spline
.
data_ptr
()
],
stream
=
Stream
(
ptr
=
torch
.
cuda
.
current_stream
().
cuda_stream
))
return
amount
,
index
spline_linear_gpu_test.py
View file @
764cd87d
...
@@ -4,7 +4,8 @@ import torch
...
@@ -4,7 +4,8 @@ import torch
from
numpy.testing
import
assert_equal
,
assert_almost_equal
from
numpy.testing
import
assert_equal
,
assert_almost_equal
if
torch
.
cuda
.
is_available
():
if
torch
.
cuda
.
is_available
():
from
.spline_linear_gpu
import
spline_linear_gpu
from
.compute_spline_basis
import
compute_spline_basis
from
.compute_spline_basis
import
get_basis_kernel
class
SplineLinearGPUTest
(
unittest
.
TestCase
):
class
SplineLinearGPUTest
(
unittest
.
TestCase
):
...
@@ -13,8 +14,11 @@ class SplineLinearGPUTest(unittest.TestCase):
...
@@ -13,8 +14,11 @@ class SplineLinearGPUTest(unittest.TestCase):
input
=
torch
.
cuda
.
FloatTensor
([
0
,
0.05
,
0.25
,
0.5
,
0.75
,
0.95
,
1
])
input
=
torch
.
cuda
.
FloatTensor
([
0
,
0.05
,
0.25
,
0.5
,
0.75
,
0.95
,
1
])
kernel_size
=
torch
.
cuda
.
LongTensor
([
5
])
kernel_size
=
torch
.
cuda
.
LongTensor
([
5
])
is_open_spline
=
torch
.
cuda
.
LongTensor
([
1
])
is_open_spline
=
torch
.
cuda
.
LongTensor
([
1
])
k_max
=
2
a1
,
i1
=
spline_linear_gpu
(
input
,
kernel_size
,
is_open_spline
,
5
)
K
=
5
dim
=
1
basis_kernel
=
get_basis_kernel
(
k_max
,
K
,
dim
,
1
)
a1
,
i1
=
compute_spline_basis
(
input
,
kernel_size
,
is_open_spline
,
5
,
basis_kernel
)
a2
=
[[
0
,
1
],
[
0.2
,
0.8
],
[
0
,
1
],
[
0
,
1
],
[
0
,
1
],
[
0.8
,
0.2
],
[
0
,
1
]]
a2
=
[[
0
,
1
],
[
0.2
,
0.8
],
[
0
,
1
],
[
0
,
1
],
[
0
,
1
],
[
0.8
,
0.2
],
[
0
,
1
]]
i2
=
[[
1
,
0
],
[
1
,
0
],
[
2
,
1
],
[
3
,
2
],
[
4
,
3
],
[
4
,
3
],
[
0
,
4
]]
i2
=
[[
1
,
0
],
[
1
,
0
],
[
2
,
1
],
[
3
,
2
],
[
4
,
3
],
[
4
,
3
],
[
0
,
4
]]
...
@@ -27,8 +31,11 @@ class SplineLinearGPUTest(unittest.TestCase):
...
@@ -27,8 +31,11 @@ class SplineLinearGPUTest(unittest.TestCase):
input
=
torch
.
cuda
.
FloatTensor
([
0
,
0.05
,
0.25
,
0.5
,
0.75
,
0.95
,
1
])
input
=
torch
.
cuda
.
FloatTensor
([
0
,
0.05
,
0.25
,
0.5
,
0.75
,
0.95
,
1
])
kernel_size
=
torch
.
cuda
.
LongTensor
([
4
])
kernel_size
=
torch
.
cuda
.
LongTensor
([
4
])
is_open_spline
=
torch
.
cuda
.
LongTensor
([
0
])
is_open_spline
=
torch
.
cuda
.
LongTensor
([
0
])
k_max
=
2
a1
,
i1
=
spline_linear_gpu
(
input
,
kernel_size
,
is_open_spline
,
4
)
K
=
4
dim
=
1
basis_kernel
=
get_basis_kernel
(
k_max
,
K
,
dim
,
1
)
a1
,
i1
=
compute_spline_basis
(
input
,
kernel_size
,
is_open_spline
,
4
,
basis_kernel
)
a2
=
[[
0
,
1
],
[
0.2
,
0.8
],
[
0
,
1
],
[
0
,
1
],
[
0
,
1
],
[
0.8
,
0.2
],
[
0
,
1
]]
a2
=
[[
0
,
1
],
[
0.2
,
0.8
],
[
0
,
1
],
[
0
,
1
],
[
0
,
1
],
[
0.8
,
0.2
],
[
0
,
1
]]
i2
=
[[
1
,
0
],
[
1
,
0
],
[
2
,
1
],
[
3
,
2
],
[
0
,
3
],
[
0
,
3
],
[
1
,
0
]]
i2
=
[[
1
,
0
],
[
1
,
0
],
[
2
,
1
],
[
3
,
2
],
[
0
,
3
],
[
0
,
3
],
[
1
,
0
]]
...
...
spline_quadratic_gpu.py
deleted
100644 → 0
View file @
4b92ba74
import
torch
from
....utils.cuda
import
(
cuda_num_threads
,
Stream
,
Dtype
,
load_kernel
,
kernel_loop
,
get_blocks
)
_spline_kernel
=
kernel_loop
+
'''
extern "C"
__global__ void spline_kernel(
const ${Dtype}* input, ${Dtype}* amount, long* index,
const long* kernel_size, const long* is_open_spline) {
CUDA_KERNEL_LOOP(idx, ${num_threads}) {
const int e_idx = idx / ${k_max};
int k_idx = idx % ${k_max};
int K = ${K};
int k_idx_mod;
int pos;
${Dtype} value;
${Dtype} frac;
${Dtype} a = 1.0;
long i = 0;
for (int d_idx = 0; d_idx < ${dim}; d_idx++) {
K /= kernel_size[d_idx];
k_idx_mod = k_idx % 3;
k_idx /= 3;
value = input[e_idx * ${dim} + d_idx] *
(kernel_size[d_idx] - (2 * is_open_spline[d_idx]));
frac = value - floor(value);
if (k_idx_mod == 0) a *= 0.5 * (1- frac) * (1-frac);
else if (k_idx_mod == 1) a *= -frac * frac + frac + 0.5;
else a *= 0.5 * frac * frac;
pos = int(floor(value)) + k_idx_mod;
pos %= kernel_size[d_idx];
i += pos * K;
}
amount[idx] = a;
index[idx] = i;
}
}
'''
def
spline_quadratic_gpu
(
input
,
kernel_size
,
is_open_spline
,
K
):
assert
input
.
is_cuda
and
kernel_size
.
is_cuda
and
is_open_spline
.
is_cuda
input
=
input
.
unsqueeze
(
1
)
if
len
(
input
.
size
())
<
2
else
input
num_edges
,
dim
=
input
.
size
()
k_max
=
3
**
dim
amount
=
input
.
new
(
num_edges
,
k_max
)
index
=
input
.
new
(
num_edges
,
k_max
).
long
()
num_threads
=
amount
.
numel
()
with
torch
.
cuda
.
device_of
(
input
):
f
=
load_kernel
(
'spline_kernel'
,
_spline_kernel
,
Dtype
=
Dtype
(
input
),
num_threads
=
num_threads
,
k_max
=
k_max
,
dim
=
dim
,
K
=
K
)
f
(
block
=
(
cuda_num_threads
,
1
,
1
),
grid
=
(
get_blocks
(
num_threads
),
1
,
1
),
args
=
[
input
.
data_ptr
(),
amount
.
data_ptr
(),
index
.
data_ptr
(),
kernel_size
.
data_ptr
(),
is_open_spline
.
data_ptr
()
],
stream
=
Stream
(
ptr
=
torch
.
cuda
.
current_stream
().
cuda_stream
))
return
amount
,
index
spline_quadratic_gpu_test.py
View file @
764cd87d
...
@@ -4,7 +4,8 @@ import torch
...
@@ -4,7 +4,8 @@ import torch
from
numpy.testing
import
assert_equal
,
assert_almost_equal
from
numpy.testing
import
assert_equal
,
assert_almost_equal
if
torch
.
cuda
.
is_available
():
if
torch
.
cuda
.
is_available
():
from
.spline_quadratic_gpu
import
spline_quadratic_gpu
from
.compute_spline_basis
import
compute_spline_basis
from
.compute_spline_basis
import
get_basis_kernel
class
SplineQuadraticGPUTest
(
unittest
.
TestCase
):
class
SplineQuadraticGPUTest
(
unittest
.
TestCase
):
...
@@ -13,8 +14,12 @@ class SplineQuadraticGPUTest(unittest.TestCase):
...
@@ -13,8 +14,12 @@ class SplineQuadraticGPUTest(unittest.TestCase):
input
=
torch
.
cuda
.
FloatTensor
([
0
,
0.05
,
0.25
,
0.5
,
0.75
,
0.95
,
1
])
input
=
torch
.
cuda
.
FloatTensor
([
0
,
0.05
,
0.25
,
0.5
,
0.75
,
0.95
,
1
])
kernel_size
=
torch
.
cuda
.
LongTensor
([
6
])
kernel_size
=
torch
.
cuda
.
LongTensor
([
6
])
is_open_spline
=
torch
.
cuda
.
LongTensor
([
1
])
is_open_spline
=
torch
.
cuda
.
LongTensor
([
1
])
k_max
=
3
K
=
6
dim
=
1
basis_kernel
=
get_basis_kernel
(
k_max
,
K
,
dim
,
2
)
a1
,
i1
=
spline_quadratic_gpu
(
input
,
kernel_size
,
is_open_spline
,
6
)
a1
,
i1
=
compute_spline_basis
(
input
,
kernel_size
,
is_open_spline
,
6
,
basis_kernel
)
a2
=
[[
0.5
,
0.5
,
0
],
[
0.32
,
0.66
,
0.02
],
[
0.5
,
0.5
,
0
],
[
0.5
,
0.5
,
0
],
a2
=
[[
0.5
,
0.5
,
0
],
[
0.32
,
0.66
,
0.02
],
[
0.5
,
0.5
,
0
],
[
0.5
,
0.5
,
0
],
[
0.5
,
0.5
,
0
],
[
0.02
,
0.66
,
0.32
],
[
0.5
,
0.5
,
0
]]
[
0.5
,
0.5
,
0
],
[
0.02
,
0.66
,
0.32
],
[
0.5
,
0.5
,
0
]]
...
@@ -29,8 +34,11 @@ class SplineQuadraticGPUTest(unittest.TestCase):
...
@@ -29,8 +34,11 @@ class SplineQuadraticGPUTest(unittest.TestCase):
input
=
torch
.
cuda
.
FloatTensor
([
0
,
0.05
,
0.25
,
0.5
,
0.75
,
0.95
,
1
])
input
=
torch
.
cuda
.
FloatTensor
([
0
,
0.05
,
0.25
,
0.5
,
0.75
,
0.95
,
1
])
kernel_size
=
torch
.
cuda
.
LongTensor
([
4
])
kernel_size
=
torch
.
cuda
.
LongTensor
([
4
])
is_open_spline
=
torch
.
cuda
.
LongTensor
([
0
])
is_open_spline
=
torch
.
cuda
.
LongTensor
([
0
])
k_max
=
3
a1
,
i1
=
spline_quadratic_gpu
(
input
,
kernel_size
,
is_open_spline
,
4
)
K
=
4
dim
=
1
basis_kernel
=
get_basis_kernel
(
k_max
,
K
,
dim
,
2
)
a1
,
i1
=
compute_spline_basis
(
input
,
kernel_size
,
is_open_spline
,
4
,
basis_kernel
)
a2
=
[[
0.5
,
0.5
,
0
],
[
0.32
,
0.66
,
0.02
],
[
0.5
,
0.5
,
0
],
[
0.5
,
0.5
,
0
],
a2
=
[[
0.5
,
0.5
,
0
],
[
0.32
,
0.66
,
0.02
],
[
0.5
,
0.5
,
0
],
[
0.5
,
0.5
,
0
],
[
0.5
,
0.5
,
0
],
[
0.02
,
0.66
,
0.32
],
[
0.5
,
0.5
,
0
]]
[
0.5
,
0.5
,
0
],
[
0.02
,
0.66
,
0.32
],
[
0.5
,
0.5
,
0
]]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment