Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
torch-cluster
Commits
fb4632ba
Commit
fb4632ba
authored
Feb 06, 2018
by
rusty1s
Browse files
added cuda kernel
parent
6aad98c2
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
150 additions
and
20 deletions
+150
-20
test/test_grid.py
test/test_grid.py
+27
-15
torch_cluster/functions/grid.py
torch_cluster/functions/grid.py
+1
-2
torch_cluster/kernel/THCIndex.cuh
torch_cluster/kernel/THCIndex.cuh
+23
-0
torch_cluster/kernel/common.cuh
torch_cluster/kernel/common.cuh
+25
-0
torch_cluster/kernel/generic/common.cu
torch_cluster/kernel/generic/common.cu
+16
-0
torch_cluster/kernel/generic/kernel.cu
torch_cluster/kernel/generic/kernel.cu
+22
-1
torch_cluster/kernel/kernel.cu
torch_cluster/kernel/kernel.cu
+35
-1
torch_cluster/src/generic/cpu.c
torch_cluster/src/generic/cpu.c
+1
-1
No files found.
test/test_grid.py
View file @
fb4632ba
...
@@ -10,14 +10,16 @@ def test_grid_cluster_cpu(tensor):
...
@@ -10,14 +10,16 @@ def test_grid_cluster_cpu(tensor):
position
=
Tensor
(
tensor
,
[
0
,
9
,
2
,
8
,
3
])
position
=
Tensor
(
tensor
,
[
0
,
9
,
2
,
8
,
3
])
size
=
torch
.
LongTensor
([
5
])
size
=
torch
.
LongTensor
([
5
])
expected
=
torch
.
LongTensor
([
0
,
1
,
0
,
1
,
0
])
expected
=
torch
.
LongTensor
([
0
,
1
,
0
,
1
,
0
])
output
=
grid_cluster
(
position
,
size
)
output
=
grid_cluster
(
position
,
size
)
assert
output
.
tolist
()
==
expected
.
tolist
()
assert
output
.
tolist
()
==
expected
.
tolist
()
position
=
Tensor
(
tensor
,
[[
0
,
0
],
[
9
,
9
],
[
2
,
8
],
[
2
,
2
],
[
8
,
3
]])
position
=
Tensor
(
tensor
,
[[
0
,
0
],
[
9
,
9
],
[
2
,
8
],
[
2
,
2
],
[
8
,
3
]])
size
=
torch
.
LongTensor
([
5
,
5
])
size
=
torch
.
LongTensor
([
5
,
5
])
expected
=
torch
.
LongTensor
([
0
,
3
,
1
,
0
,
2
])
expected
=
torch
.
LongTensor
([
0
,
3
,
1
,
0
,
2
])
output
=
grid_cluster
(
position
,
size
)
assert
output
.
tolist
()
==
expected
.
tolist
()
position
=
Tensor
(
tensor
,
[[
0
,
9
,
2
,
2
,
8
],
[
0
,
9
,
8
,
2
,
3
]]).
t
()
output
=
grid_cluster
(
position
,
size
)
output
=
grid_cluster
(
position
,
size
)
assert
output
.
tolist
()
==
expected
.
tolist
()
assert
output
.
tolist
()
==
expected
.
tolist
()
...
@@ -36,21 +38,31 @@ def test_grid_cluster_cpu(tensor):
...
@@ -36,21 +38,31 @@ def test_grid_cluster_cpu(tensor):
@
pytest
.
mark
.
skipif
(
not
torch
.
cuda
.
is_available
(),
reason
=
'no CUDA'
)
@
pytest
.
mark
.
skipif
(
not
torch
.
cuda
.
is_available
(),
reason
=
'no CUDA'
)
@
pytest
.
mark
.
parametrize
(
'tensor'
,
tensors
)
@
pytest
.
mark
.
parametrize
(
'tensor'
,
tensors
)
def
test_grid_cluster_gpu
(
tensor
):
# pragma: no cover
def
test_grid_cluster_gpu
(
tensor
):
# pragma: no cover
position
=
Tensor
(
tensor
,
[[
0
,
0
],
[
9
,
9
],
[
2
,
8
],
[
2
,
2
],
[
8
,
3
]]).
cuda
()
position
=
Tensor
(
tensor
,
[
0
,
9
,
2
,
8
,
3
]).
cuda
()
print
(
position
)
size
=
torch
.
cuda
.
LongTensor
([
5
])
# size = torch.cuda.LongTensor([5, 5])
expected
=
torch
.
cuda
.
LongTensor
([
0
,
1
,
0
,
1
,
0
])
# expected = torch.LongTensor([0, 3, 1, 0, 2])
output
=
grid_cluster
(
position
,
size
)
assert
output
.
cpu
().
tolist
()
==
expected
.
tolist
()
# output = grid_cluster(position, size)
position
=
Tensor
(
tensor
,
[[
0
,
0
],
[
9
,
9
],
[
2
,
8
],
[
2
,
2
],
[
8
,
3
]])
# assert output.cpu().tolist() == expected.tolist()
position
=
position
.
cuda
()
size
=
torch
.
cuda
.
LongTensor
([
5
,
5
])
expected
=
torch
.
cuda
.
LongTensor
([
0
,
3
,
1
,
0
,
2
])
output
=
grid_cluster
(
position
,
size
)
assert
output
.
cpu
().
tolist
()
==
expected
.
tolist
()
# output = grid_cluster(position.expand(2, 5, 2), size)
position
=
Tensor
(
tensor
,
[[
0
,
9
,
2
,
2
,
8
],
[
0
,
9
,
8
,
2
,
3
]])
# # assert output.cpu().tolist() == expected.expand(2, 5).tolist()
position
=
position
.
cuda
().
t
()
output
=
grid_cluster
(
position
,
size
)
assert
output
.
cpu
().
tolist
()
==
expected
.
tolist
()
# expected = torch.LongTensor([0, 1, 3, 2, 4])
output
=
grid_cluster
(
position
.
expand
(
2
,
5
,
2
),
size
)
# batch = torch.cuda.LongTensor([0, 0, 1, 1, 1])
assert
output
.
tolist
()
==
expected
.
expand
(
2
,
5
).
tolist
()
# output = grid_cluster(position, size, batch)
# # assert output.cpu().tolist() == expected.tolist()
expected
=
torch
.
LongTensor
([
0
,
1
,
3
,
2
,
4
])
batch
=
torch
.
cuda
.
LongTensor
([
0
,
0
,
1
,
1
,
1
])
output
=
grid_cluster
(
position
,
size
,
batch
)
assert
output
.
cpu
().
tolist
()
==
expected
.
tolist
()
#
output = grid_cluster(position.expand(2, 5, 2), size, batch.expand(2, 5))
output
=
grid_cluster
(
position
.
expand
(
2
,
5
,
2
),
size
,
batch
.
expand
(
2
,
5
))
# #
assert output.cpu().tolist() == expected.expand(2, 5).tolist()
assert
output
.
cpu
().
tolist
()
==
expected
.
expand
(
2
,
5
).
tolist
()
torch_cluster/functions/grid.py
View file @
fb4632ba
...
@@ -43,7 +43,6 @@ def grid_cluster(position, size, batch=None):
...
@@ -43,7 +43,6 @@ def grid_cluster(position, size, batch=None):
func
=
get_func
(
'grid'
,
position
)
func
=
get_func
(
'grid'
,
position
)
func
(
C
,
cluster
,
position
,
size
,
c_max
)
func
(
C
,
cluster
,
position
,
size
,
c_max
)
cluster
=
cluster
.
squeeze
(
dim
=-
1
)
cluster
=
cluster
.
squeeze
(
dim
=-
1
)
if
not
cluster
.
is_cuda
:
cluster
=
consecutive
(
cluster
)
cluster
=
consecutive
(
cluster
)
return
cluster
return
cluster
torch_cluster/kernel/THCIndex.cuh
0 → 100644
View file @
fb4632ba
template
<
typename
a
,
int
Dims
>
struct
IndexToOffset
{
static
__device__
void
compute
(
int
i
,
const
TensorInfo
<
a
>&
t1
,
int
*
t1Offset
)
{
int
curDimIndex
;
for
(
int
d
=
Dims
-
2
;
d
>=
0
;
d
--
)
{
curDimIndex
=
i
%
t1
.
size
[
d
];
*
t1Offset
+=
curDimIndex
*
t1
.
stride
[
d
];
i
/=
t1
.
size
[
d
];
}
}
};
template
<
typename
a
>
struct
IndexToOffset
<
a
,
-
1
>
{
static
__device__
void
compute
(
int
i
,
const
TensorInfo
<
a
>&
t1
,
int
*
t1Offset
)
{
int
curDimIndex
;
for
(
int
d
=
t1
.
dims
-
2
;
d
>=
0
;
d
--
)
{
curDimIndex
=
i
%
t1
.
size
[
d
];
*
t1Offset
+=
curDimIndex
*
t1
.
stride
[
d
];
i
/=
t1
.
size
[
d
];
}
}
};
torch_cluster/kernel/common.cuh
0 → 100644
View file @
fb4632ba
const
int
MAX_DIMS
=
25
;
const
int
NUM_THREADS
=
1024
;
inline
int
GET_BLOCKS
(
const
int
n
)
{
return
(
n
+
NUM_THREADS
-
1
)
/
NUM_THREADS
;
}
template
<
typename
T
>
struct
TensorInfo
{
TensorInfo
(
T
*
t
,
int
d
,
int
sz
[
MAX_DIMS
],
int
st
[
MAX_DIMS
])
{
data
=
t
;
dims
=
d
;
for
(
int
i
=
0
;
i
<
dims
;
i
++
)
{
size
[
i
]
=
sz
[
i
];
stride
[
i
]
=
st
[
i
];
}
}
T
*
data
;
int
dims
;
int
size
[
MAX_DIMS
];
int
stride
[
MAX_DIMS
];
};
#define KERNEL_LOOP(I, N) \
for (int I = blockIdx.x * blockDim.x + threadIdx.x; I < N; i += blockDim.x * gridDim.x)
torch_cluster/kernel/generic/common.cu
0 → 100644
View file @
fb4632ba
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/common.cu"
#else
TensorInfo
<
real
>
thc_
(
getTensorInfo
)(
THCState
*
state
,
THCTensor
*
tensor
)
{
real
*
data
=
THCTensor_
(
data
)(
state
,
tensor
);
int
dims
=
THCTensor_
(
nDimension
)(
state
,
tensor
);
int
size
[
MAX_DIMS
];
int
stride
[
MAX_DIMS
];
for
(
int
i
=
0
;
i
<
dims
;
i
++
)
{
size
[
i
]
=
THCTensor_
(
size
)(
state
,
tensor
,
i
);
stride
[
i
]
=
THCTensor_
(
stride
)(
state
,
tensor
,
i
);
}
return
TensorInfo
<
real
>
(
data
,
dims
,
size
,
stride
);
}
#endif
torch_cluster/kernel/generic/kernel.cu
View file @
fb4632ba
...
@@ -3,7 +3,28 @@
...
@@ -3,7 +3,28 @@
#else
#else
void
cluster_
(
grid
)(
THCState
*
state
,
int
C
,
THCudaLongTensor
*
output
,
THCTensor
*
position
,
THCTensor
*
size
,
THCudaLongTensor
*
count
)
{
void
cluster_
(
grid
)(
THCState
*
state
,
int
C
,
THCudaLongTensor
*
output
,
THCTensor
*
position
,
THCTensor
*
size
,
THCudaLongTensor
*
count
)
{
printf
(
"drin"
);
THCAssertSameGPU
(
THCTensor_
(
checkGPU
)(
state
,
2
,
position
,
size
));
THCAssertSameGPU
(
THCudaLongTensor_checkGPU
(
state
,
2
,
output
,
count
));
THArgCheck
(
THCudaLongTensor_nDimension
(
state
,
output
)
<=
MAX_DIMS
,
1
,
"Tensor too large or too many dimensions"
);
int64_t
*
outputData
=
THCudaLongTensor_data
(
state
,
output
);
TensorInfo
<
real
>
positionInfo
=
thc_
(
getTensorInfo
)(
state
,
position
);
real
*
sizeData
=
THCTensor_
(
data
)(
state
,
size
);
int64_t
*
countData
=
THCudaLongTensor_data
(
state
,
count
);
const
int
N
=
THCudaLongTensor_nElement
(
state
,
output
);
int
grid
=
GET_BLOCKS
(
N
);
cudaStream_t
stream
=
THCState_getCurrentStream
(
state
);
switch
(
positionInfo
.
dims
)
{
case
1
:
gridKernel
<
real
,
1
><<<
grid
,
NUM_THREADS
,
0
,
stream
>>>
(
outputData
,
positionInfo
,
sizeData
,
countData
,
C
,
N
);
break
;
case
2
:
gridKernel
<
real
,
2
><<<
grid
,
NUM_THREADS
,
0
,
stream
>>>
(
outputData
,
positionInfo
,
sizeData
,
countData
,
C
,
N
);
break
;
case
3
:
gridKernel
<
real
,
3
><<<
grid
,
NUM_THREADS
,
0
,
stream
>>>
(
outputData
,
positionInfo
,
sizeData
,
countData
,
C
,
N
);
break
;
case
4
:
gridKernel
<
real
,
4
><<<
grid
,
NUM_THREADS
,
0
,
stream
>>>
(
outputData
,
positionInfo
,
sizeData
,
countData
,
C
,
N
);
break
;
default:
gridKernel
<
real
,
-
1
><<<
grid
,
NUM_THREADS
,
0
,
stream
>>>
(
outputData
,
positionInfo
,
sizeData
,
countData
,
C
,
N
);
break
;
}
THCudaCheck
(
cudaGetLastError
());
}
}
#endif
#endif
torch_cluster/kernel/kernel.cu
View file @
fb4632ba
...
@@ -2,7 +2,41 @@
...
@@ -2,7 +2,41 @@
#include "kernel.h"
#include "kernel.h"
#include "common.cuh"
#include "THCIndex.cuh"
#define cluster_(NAME) TH_CONCAT_4(cluster_, NAME, _kernel_, Real)
#define cluster_(NAME) TH_CONCAT_4(cluster_, NAME, _kernel_, Real)
#define thc_(NAME) TH_CONCAT_4(thc_, NAME, _, Real)
#include "generic/
kernel
.cu"
#include "generic/
common
.cu"
#include "THCGenerateAllTypes.h"
#include "THCGenerateAllTypes.h"
template
<
typename
Real
,
int
Dims
>
__global__
void
gridKernel
(
int64_t
*
output
,
TensorInfo
<
Real
>
position
,
Real
*
size
,
int64_t
*
count
,
const
int
C
,
const
int
N
)
{
KERNEL_LOOP
(
i
,
N
)
{
int
positionOffset
=
0
;
IndexToOffset
<
Real
,
Dims
>::
compute
(
i
,
position
,
&
positionOffset
);
int
tmp
=
C
;
int64_t
c
=
0
;
for
(
int
d
=
0
;
d
<
position
.
size
[
position
.
dims
-
1
];
d
++
)
{
tmp
=
tmp
/
count
[
d
];
c
+=
tmp
*
(
int64_t
)
(
position
.
data
[
positionOffset
+
d
]
/
size
[
d
]);
}
output
[
i
]
=
c
;
}
}
#include "generic/kernel.cu"
#include "THCGenerateFloatType.h"
#include "generic/kernel.cu"
#include "THCGenerateDoubleType.h"
#include "generic/kernel.cu"
#include "THCGenerateByteType.h"
#include "generic/kernel.cu"
#include "THCGenerateCharType.h"
#include "generic/kernel.cu"
#include "THCGenerateShortType.h"
#include "generic/kernel.cu"
#include "THCGenerateIntType.h"
#include "generic/kernel.cu"
#include "THCGenerateLongType.h"
torch_cluster/src/generic/cpu.c
View file @
fb4632ba
...
@@ -12,7 +12,7 @@ void cluster_(grid)(int C, THLongTensor *output, THTensor *position, THTensor *s
...
@@ -12,7 +12,7 @@ void cluster_(grid)(int C, THLongTensor *output, THTensor *position, THTensor *s
tmp
=
C
;
c
=
0
;
tmp
=
C
;
c
=
0
;
for
(
i
=
0
;
i
<
d
;
i
++
)
{
for
(
i
=
0
;
i
<
d
;
i
++
)
{
tmp
=
tmp
/
*
(
count_data
+
i
);
tmp
=
tmp
/
*
(
count_data
+
i
);
c
+=
tmp
*
(
int64_t
)
floor
(
*
(
position_data
+
i
*
position_stride
)
/
*
(
size_data
+
i
));
c
+=
tmp
*
(
int64_t
)
(
*
(
position_data
+
i
*
position_stride
)
/
*
(
size_data
+
i
));
}
}
output_data
[
0
]
=
c
;
output_data
[
0
]
=
c
;
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment