Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
7eb4de4b
"src/vscode:/vscode.git/clone" did not exist on "c14057c8dbc32847bac9082bcc0ae00c9a19357d"
Unverified
Commit
7eb4de4b
authored
Apr 07, 2024
by
Muhammed Fatih BALIN
Committed by
GitHub
Apr 07, 2024
Browse files
[GraphBolt][CUDA] Sample neighbors synchronization optimization. (#7264)
parent
912164a8
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
3 deletions
+6
-3
graphbolt/src/cuda/neighbor_sampler.cu
graphbolt/src/cuda/neighbor_sampler.cu
+6
-3
No files found.
graphbolt/src/cuda/neighbor_sampler.cu
View file @
7eb4de4b
...
@@ -270,9 +270,6 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
...
@@ -270,9 +270,6 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
if
(
seeds
.
has_value
()
&&
!
probs_or_mask
.
has_value
()
&&
fanouts
.
size
()
<=
1
)
{
if
(
seeds
.
has_value
()
&&
!
probs_or_mask
.
has_value
()
&&
fanouts
.
size
()
<=
1
)
{
sub_indptr
=
ExclusiveCumSum
(
in_degree
);
sub_indptr
=
ExclusiveCumSum
(
in_degree
);
}
}
auto
coo_rows
=
ExpandIndptrImpl
(
sub_indptr
,
indices
.
scalar_type
(),
torch
::
nullopt
,
num_edges
);
num_edges
=
coo_rows
.
size
(
0
);
const
continuous_seed
random_seed
=
[
&
]
{
const
continuous_seed
random_seed
=
[
&
]
{
if
(
random_seed_tensor
.
has_value
())
{
if
(
random_seed_tensor
.
has_value
())
{
return
continuous_seed
(
random_seed_tensor
.
value
(),
seed2_contribution
);
return
continuous_seed
(
random_seed_tensor
.
value
(),
seed2_contribution
);
...
@@ -317,6 +314,12 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
...
@@ -317,6 +314,12 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
auto
num_sampled_edges
=
auto
num_sampled_edges
=
cuda
::
CopyScalar
{
output_indptr
.
data_ptr
<
indptr_t
>
()
+
num_rows
};
cuda
::
CopyScalar
{
output_indptr
.
data_ptr
<
indptr_t
>
()
+
num_rows
};
// This operation is placed after num_sampled_edges copy is started to
// hide the latency of copy synchronization later.
auto
coo_rows
=
ExpandIndptrImpl
(
sub_indptr
,
indices
.
scalar_type
(),
torch
::
nullopt
,
num_edges
);
num_edges
=
coo_rows
.
size
(
0
);
// Find the smallest integer type to store the edge id offsets. We synch
// Find the smallest integer type to store the edge id offsets. We synch
// the CUDAEvent so that the access is safe.
// the CUDAEvent so that the access is safe.
auto
compute_num_bits
=
[
&
]
{
auto
compute_num_bits
=
[
&
]
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment