Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
ceef30b4
Unverified
Commit
ceef30b4
authored
Dec 25, 2023
by
Muhammed Fatih BALIN
Committed by
GitHub
Dec 25, 2023
Browse files
[GraphBolt][CUDA] Adds an exclusive prefix sum function for Neighbor Sampling. (#6798)
parent
869bfb67
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
45 additions
and
0 deletions
+45
-0
graphbolt/include/graphbolt/cuda_ops.h
graphbolt/include/graphbolt/cuda_ops.h
+9
-0
graphbolt/src/cuda/common.h
graphbolt/src/cuda/common.h
+2
-0
graphbolt/src/cuda/cumsum.cu
graphbolt/src/cuda/cumsum.cu
+34
-0
No files found.
graphbolt/include/graphbolt/cuda_ops.h
View file @
ceef30b4
...
...
@@ -12,6 +12,15 @@ namespace ops {
std
::
pair
<
torch
::
Tensor
,
torch
::
Tensor
>
Sort
(
torch
::
Tensor
input
,
int
num_bits
);
/**
* @brief Computes the exclusive prefix sum of the given input.
*
* @param input The input tensor.
*
* @return The prefix sum result such that r[i] = \sum_{j=0}^{i-1} input[j]
*/
torch
::
Tensor
ExclusiveCumSum
(
torch
::
Tensor
input
);
std
::
tuple
<
torch
::
Tensor
,
torch
::
Tensor
>
IndexSelectCSCImpl
(
torch
::
Tensor
indptr
,
torch
::
Tensor
indices
,
torch
::
Tensor
nodes
);
...
...
graphbolt/src/cuda/common.h
View file @
ceef30b4
...
...
@@ -67,6 +67,8 @@ struct CUDAWorkspaceAllocator {
inline
auto
GetAllocator
()
{
return
CUDAWorkspaceAllocator
{};
}
inline
auto
GetCurrentStream
()
{
return
c10
::
cuda
::
getCurrentCUDAStream
();
}
template
<
typename
T
>
inline
bool
is_zero
(
T
size
)
{
return
size
==
0
;
...
...
graphbolt/src/cuda/cumsum.cu
0 → 100644
View file @
ceef30b4
/**
* Copyright (c) 2023 by Contributors
* Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
* @file cuda/cumsum.cu
* @brief Cumsum operators implementation on CUDA.
*/
#include <cub/cub.cuh>
#include "./common.h"
namespace
graphbolt
{
namespace
ops
{
torch
::
Tensor
ExclusiveCumSum
(
torch
::
Tensor
input
)
{
auto
allocator
=
cuda
::
GetAllocator
();
auto
stream
=
cuda
::
GetCurrentStream
();
auto
result
=
torch
::
empty_like
(
input
);
AT_DISPATCH_INTEGRAL_TYPES
(
input
.
scalar_type
(),
"ExclusiveCumSum"
,
([
&
]
{
size_t
tmp_storage_size
=
0
;
cub
::
DeviceScan
::
ExclusiveSum
(
nullptr
,
tmp_storage_size
,
input
.
data_ptr
<
scalar_t
>
(),
result
.
data_ptr
<
scalar_t
>
(),
input
.
size
(
0
),
stream
);
auto
tmp_storage
=
allocator
.
AllocateStorage
<
char
>
(
tmp_storage_size
);
cub
::
DeviceScan
::
ExclusiveSum
(
tmp_storage
.
get
(),
tmp_storage_size
,
input
.
data_ptr
<
scalar_t
>
(),
result
.
data_ptr
<
scalar_t
>
(),
input
.
size
(
0
),
stream
);
}));
return
result
;
}
}
// namespace ops
}
// namespace graphbolt
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment