Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
a3d20dce
Unverified
Commit
a3d20dce
authored
Apr 18, 2024
by
Muhammed Fatih BALIN
Committed by
GitHub
Apr 18, 2024
Browse files
[GraphBolt][CUDA] Make `_convert_to_sampled_subgraph` lighter. (#7312)
parent
6f9c20c3
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
72 additions
and
15 deletions
+72
-15
graphbolt/include/graphbolt/cuda_sampling_ops.h
graphbolt/include/graphbolt/cuda_sampling_ops.h
+1
-0
graphbolt/src/cuda/neighbor_sampler.cu
graphbolt/src/cuda/neighbor_sampler.cu
+58
-4
graphbolt/src/fused_csc_sampling_graph.cc
graphbolt/src/fused_csc_sampling_graph.cc
+3
-2
graphbolt/src/utils.h
graphbolt/src/utils.h
+7
-4
python/dgl/graphbolt/impl/fused_csc_sampling_graph.py
python/dgl/graphbolt/impl/fused_csc_sampling_graph.py
+3
-5
No files found.
graphbolt/include/graphbolt/cuda_sampling_ops.h
View file @
a3d20dce
...
@@ -68,6 +68,7 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
...
@@ -68,6 +68,7 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
bool
return_eids
,
bool
return_eids
,
torch
::
optional
<
torch
::
Tensor
>
type_per_edge
=
torch
::
nullopt
,
torch
::
optional
<
torch
::
Tensor
>
type_per_edge
=
torch
::
nullopt
,
torch
::
optional
<
torch
::
Tensor
>
probs_or_mask
=
torch
::
nullopt
,
torch
::
optional
<
torch
::
Tensor
>
probs_or_mask
=
torch
::
nullopt
,
torch
::
optional
<
torch
::
Tensor
>
node_type_offset
=
torch
::
nullopt
,
torch
::
optional
<
torch
::
Dict
<
std
::
string
,
int64_t
>>
node_type_to_id
=
torch
::
optional
<
torch
::
Dict
<
std
::
string
,
int64_t
>>
node_type_to_id
=
torch
::
nullopt
,
torch
::
nullopt
,
torch
::
optional
<
torch
::
Dict
<
std
::
string
,
int64_t
>>
edge_type_to_id
=
torch
::
optional
<
torch
::
Dict
<
std
::
string
,
int64_t
>>
edge_type_to_id
=
...
...
graphbolt/src/cuda/neighbor_sampler.cu
View file @
a3d20dce
...
@@ -9,6 +9,7 @@
...
@@ -9,6 +9,7 @@
#include <graphbolt/continuous_seed.h>
#include <graphbolt/continuous_seed.h>
#include <graphbolt/cuda_ops.h>
#include <graphbolt/cuda_ops.h>
#include <graphbolt/cuda_sampling_ops.h>
#include <graphbolt/cuda_sampling_ops.h>
#include <thrust/copy.h>
#include <thrust/gather.h>
#include <thrust/gather.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/transform_iterator.h>
...
@@ -189,6 +190,7 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
...
@@ -189,6 +190,7 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
const
std
::
vector
<
int64_t
>&
fanouts
,
bool
replace
,
bool
layer
,
const
std
::
vector
<
int64_t
>&
fanouts
,
bool
replace
,
bool
layer
,
bool
return_eids
,
torch
::
optional
<
torch
::
Tensor
>
type_per_edge
,
bool
return_eids
,
torch
::
optional
<
torch
::
Tensor
>
type_per_edge
,
torch
::
optional
<
torch
::
Tensor
>
probs_or_mask
,
torch
::
optional
<
torch
::
Tensor
>
probs_or_mask
,
torch
::
optional
<
torch
::
Tensor
>
node_type_offset
,
torch
::
optional
<
torch
::
Dict
<
std
::
string
,
int64_t
>>
node_type_to_id
,
torch
::
optional
<
torch
::
Dict
<
std
::
string
,
int64_t
>>
node_type_to_id
,
torch
::
optional
<
torch
::
Dict
<
std
::
string
,
int64_t
>>
edge_type_to_id
,
torch
::
optional
<
torch
::
Dict
<
std
::
string
,
int64_t
>>
edge_type_to_id
,
torch
::
optional
<
torch
::
Tensor
>
random_seed_tensor
,
torch
::
optional
<
torch
::
Tensor
>
random_seed_tensor
,
...
@@ -531,12 +533,33 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
...
@@ -531,12 +533,33 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
// Here, we check what are the dst node types for the given seeds so that
// Here, we check what are the dst node types for the given seeds so that
// we can compute the output indptr space later.
// we can compute the output indptr space later.
std
::
vector
<
int64_t
>
etype_id_to_dst_ntype_id
(
num_etypes
);
std
::
vector
<
int64_t
>
etype_id_to_dst_ntype_id
(
num_etypes
);
// Here, we check what are the src node types for the given seeds so that
// we can subtract source node offset from indices later.
auto
etype_id_to_src_ntype_id
=
torch
::
empty
(
2
*
num_etypes
,
c10
::
TensorOptions
().
dtype
(
torch
::
kLong
).
pinned_memory
(
true
));
auto
etype_id_to_src_ntype_id_ptr
=
etype_id_to_src_ntype_id
.
data_ptr
<
int64_t
>
();
for
(
auto
&
etype_and_id
:
edge_type_to_id
.
value
())
{
for
(
auto
&
etype_and_id
:
edge_type_to_id
.
value
())
{
auto
etype
=
etype_and_id
.
key
();
auto
etype
=
etype_and_id
.
key
();
auto
id
=
etype_and_id
.
value
();
auto
id
=
etype_and_id
.
value
();
auto
dst_type
=
utils
::
parse_dst_ntype_from_etype
(
etype
);
auto
[
src_type
,
dst_type
]
=
utils
::
parse_
src_
dst_ntype_from_etype
(
etype
);
etype_id_to_dst_ntype_id
[
id
]
=
node_type_to_id
->
at
(
dst_type
);
etype_id_to_dst_ntype_id
[
id
]
=
node_type_to_id
->
at
(
dst_type
);
etype_id_to_src_ntype_id_ptr
[
2
*
id
]
=
etype_id_to_src_ntype_id_ptr
[
2
*
id
+
1
]
=
node_type_to_id
->
at
(
src_type
);
}
}
auto
indices_offsets_device
=
torch
::
empty
(
etype_id_to_src_ntype_id
.
size
(
0
),
output_indices
.
options
().
dtype
(
torch
::
kLong
));
AT_DISPATCH_INDEX_TYPES
(
node_type_offset
->
scalar_type
(),
"SampleNeighborsNodeTypeOffset"
,
([
&
]
{
THRUST_CALL
(
gather
,
etype_id_to_src_ntype_id_ptr
,
etype_id_to_src_ntype_id_ptr
+
etype_id_to_src_ntype_id
.
size
(
0
),
node_type_offset
->
data_ptr
<
index_t
>
(),
indices_offsets_device
.
data_ptr
<
int64_t
>
());
}));
// For each edge type, we compute the start and end offsets to index into
// For each edge type, we compute the start and end offsets to index into
// indptr to form the final output_indptr.
// indptr to form the final output_indptr.
auto
indptr_offsets
=
torch
::
empty
(
auto
indptr_offsets
=
torch
::
empty
(
...
@@ -571,29 +594,60 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
...
@@ -571,29 +594,60 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
num_etypes
*
2
,
c10
::
TensorOptions
()
num_etypes
*
2
,
c10
::
TensorOptions
()
.
dtype
(
output_indptr
.
scalar_type
())
.
dtype
(
output_indptr
.
scalar_type
())
.
pinned_memory
(
true
));
.
pinned_memory
(
true
));
auto
edge_offsets_device
=
torch
::
empty
(
num_etypes
*
2
,
output_indptr
.
options
());
at
::
cuda
::
CUDAEvent
edge_offsets_event
;
at
::
cuda
::
CUDAEvent
edge_offsets_event
;
AT_DISPATCH_INDEX_TYPES
(
AT_DISPATCH_INDEX_TYPES
(
indptr
.
scalar_type
(),
"SampleNeighborsEdgeOffsets"
,
([
&
]
{
indptr
.
scalar_type
(),
"SampleNeighborsEdgeOffsets"
,
([
&
]
{
auto
edge_offsets_pinned_device_pair
=
thrust
::
make_transform_output_iterator
(
thrust
::
make_zip_iterator
(
edge_offsets
->
data_ptr
<
index_t
>
(),
edge_offsets_device
.
data_ptr
<
index_t
>
()),
[
=
]
__device__
(
index_t
x
)
{
return
thrust
::
make_tuple
(
x
,
x
);
});
THRUST_CALL
(
THRUST_CALL
(
gather
,
indptr_offsets_ptr
,
gather
,
indptr_offsets_ptr
,
indptr_offsets_ptr
+
indptr_offsets
.
size
(
0
),
indptr_offsets_ptr
+
indptr_offsets
.
size
(
0
),
output_indptr
.
data_ptr
<
index_t
>
(),
output_indptr
.
data_ptr
<
index_t
>
(),
edge_offsets
->
data_ptr
<
index_t
>
()
);
edge_offsets
_pinned_device_pair
);
}));
}));
edge_offsets_event
.
record
();
edge_offsets_event
.
record
();
auto
indices_offset_subtract
=
ExpandIndptrImpl
(
edge_offsets_device
,
indices
.
scalar_type
(),
indices_offsets_device
,
output_indices
.
size
(
0
));
// The output_indices is permuted here.
// The output_indices is permuted here.
std
::
tie
(
output_indptr
,
output_indices
)
=
IndexSelectCSCImpl
(
std
::
tie
(
output_indptr
,
output_indices
)
=
IndexSelectCSCImpl
(
output_in_degree
,
sliced_output_indptr
,
output_indices
,
permutation
,
output_in_degree
,
sliced_output_indptr
,
output_indices
,
permutation
,
num_rows
-
1
,
output_indices
.
size
(
0
));
num_rows
-
1
,
output_indices
.
size
(
0
));
output_indices
-=
indices_offset_subtract
;
auto
output_indptr_offsets
=
torch
::
empty
(
num_etypes
*
2
,
c10
::
TensorOptions
().
dtype
(
torch
::
kLong
).
pinned_memory
(
true
));
auto
output_indptr_offsets_ptr
=
output_indptr_offsets
.
data_ptr
<
int64_t
>
();
std
::
vector
<
torch
::
Tensor
>
indptr_list
;
std
::
vector
<
torch
::
Tensor
>
indptr_list
;
for
(
int
i
=
0
;
i
<
num_etypes
;
i
++
)
{
for
(
int
i
=
0
;
i
<
num_etypes
;
i
++
)
{
indptr_list
.
push_back
(
output_indptr
.
slice
(
indptr_list
.
push_back
(
output_indptr
.
slice
(
0
,
indptr_offsets_ptr
[
2
*
i
],
0
,
indptr_offsets_ptr
[
2
*
i
],
indptr_offsets_ptr
[
2
*
i
+
1
]
+
1
));
indptr_offsets_ptr
[
2
*
i
+
1
]
+
(
i
==
num_etypes
-
1
)));
output_indptr_offsets_ptr
[
2
*
i
]
=
i
==
0
?
0
:
output_indptr_offsets_ptr
[
2
*
i
-
1
];
output_indptr_offsets_ptr
[
2
*
i
+
1
]
=
output_indptr_offsets_ptr
[
2
*
i
]
+
indptr_list
.
back
().
size
(
0
);
}
}
auto
output_indptr_offsets_device
=
torch
::
empty
(
output_indptr_offsets
.
size
(
0
),
output_indptr
.
options
().
dtype
(
torch
::
kLong
));
THRUST_CALL
(
copy_n
,
output_indptr_offsets_ptr
,
output_indptr_offsets
.
size
(
0
),
output_indptr_offsets_device
.
data_ptr
<
int64_t
>
());
// We form the final output indptr by concatenating pieces for different
// We form the final output indptr by concatenating pieces for different
// edge types.
// edge types.
output_indptr
=
torch
::
cat
(
indptr_list
);
output_indptr
=
torch
::
cat
(
indptr_list
);
auto
indptr_offset_subtract
=
ExpandIndptrImpl
(
output_indptr_offsets_device
,
indptr
.
scalar_type
(),
edge_offsets_device
,
output_indptr
.
size
(
0
));
output_indptr
-=
indptr_offset_subtract
;
edge_offsets_event
.
synchronize
();
edge_offsets_event
.
synchronize
();
// We read the edge_offsets here, they are in pairs but we don't need it to
// We read the edge_offsets here, they are in pairs but we don't need it to
// be in pairs. So we remove the duplicate information from it and turn it
// be in pairs. So we remove the duplicate information from it and turn it
...
...
graphbolt/src/fused_csc_sampling_graph.cc
View file @
a3d20dce
...
@@ -646,8 +646,9 @@ c10::intrusive_ptr<FusedSampledSubgraph> FusedCSCSamplingGraph::SampleNeighbors(
...
@@ -646,8 +646,9 @@ c10::intrusive_ptr<FusedSampledSubgraph> FusedCSCSamplingGraph::SampleNeighbors(
c10
::
DeviceType
::
CUDA
,
"SampleNeighbors"
,
{
c10
::
DeviceType
::
CUDA
,
"SampleNeighbors"
,
{
return
ops
::
SampleNeighbors
(
return
ops
::
SampleNeighbors
(
indptr_
,
indices_
,
seeds
,
seed_offsets
,
fanouts
,
replace
,
layer
,
indptr_
,
indices_
,
seeds
,
seed_offsets
,
fanouts
,
replace
,
layer
,
return_eids
,
type_per_edge_
,
probs_or_mask
,
node_type_to_id_
,
return_eids
,
type_per_edge_
,
probs_or_mask
,
node_type_offset_
,
edge_type_to_id_
,
random_seed
,
seed2_contribution
);
node_type_to_id_
,
edge_type_to_id_
,
random_seed
,
seed2_contribution
);
});
});
}
}
TORCH_CHECK
(
seeds
.
has_value
(),
"Nodes can not be None on the CPU."
);
TORCH_CHECK
(
seeds
.
has_value
(),
"Nodes can not be None on the CPU."
);
...
...
graphbolt/src/utils.h
View file @
a3d20dce
...
@@ -27,14 +27,17 @@ inline bool is_accessible_from_gpu(torch::Tensor tensor) {
...
@@ -27,14 +27,17 @@ inline bool is_accessible_from_gpu(torch::Tensor tensor) {
}
}
/**
/**
* @brief Parses the destination node type from a given edge type
triple
* @brief Parses the
source and
destination node type from a given edge type
* seperated with ":".
*
triple
seperated with ":".
*/
*/
inline
std
::
string
parse_dst_ntype_from_etype
(
std
::
string
etype
)
{
inline
std
::
pair
<
std
::
string
,
std
::
string
>
parse_src_dst_ntype_from_etype
(
std
::
string
etype
)
{
auto
first_seperator_it
=
std
::
find
(
etype
.
begin
(),
etype
.
end
(),
':'
);
auto
first_seperator_it
=
std
::
find
(
etype
.
begin
(),
etype
.
end
(),
':'
);
auto
second_seperator_pos
=
auto
second_seperator_pos
=
std
::
find
(
first_seperator_it
+
1
,
etype
.
end
(),
':'
)
-
etype
.
begin
();
std
::
find
(
first_seperator_it
+
1
,
etype
.
end
(),
':'
)
-
etype
.
begin
();
return
etype
.
substr
(
second_seperator_pos
+
1
);
return
{
etype
.
substr
(
0
,
first_seperator_it
-
etype
.
begin
()),
etype
.
substr
(
second_seperator_pos
+
1
)};
}
}
/**
/**
...
...
python/dgl/graphbolt/impl/fused_csc_sampling_graph.py
View file @
a3d20dce
...
@@ -576,14 +576,14 @@ class FusedCSCSamplingGraph(SamplingGraph):
...
@@ -576,14 +576,14 @@ class FusedCSCSamplingGraph(SamplingGraph):
edge_offsets
[
-
1
]
edge_offsets
[
-
1
]
+
seed_offsets
[
ntype_id
+
1
]
+
seed_offsets
[
ntype_id
+
1
]
-
seed_offsets
[
ntype_id
]
-
seed_offsets
[
ntype_id
]
+
1
)
)
for
etype
,
etype_id
in
self
.
edge_type_to_id
.
items
():
for
etype
,
etype_id
in
self
.
edge_type_to_id
.
items
():
src_ntype
,
_
,
dst_ntype
=
etype_str_to_tuple
(
etype
)
src_ntype
,
_
,
dst_ntype
=
etype_str_to_tuple
(
etype
)
ntype_id
=
self
.
node_type_to_id
[
dst_ntype
]
ntype_id
=
self
.
node_type_to_id
[
dst_ntype
]
sub_indptr
_
=
indptr
[
sub_indptr
[
etype
]
=
indptr
[
edge_offsets
[
etype_id
]
:
edge_offsets
[
etype_id
+
1
]
+
1
edge_offsets
[
etype_id
]
:
edge_offsets
[
etype_id
+
1
]
]
]
sub_indptr
[
etype
]
=
sub_indptr_
-
sub_indptr_
[
0
]
sub_indices
[
etype
]
=
indices
[
sub_indices
[
etype
]
=
indices
[
etype_offsets
[
etype_id
]
:
etype_offsets
[
etype_id
+
1
]
etype_offsets
[
etype_id
]
:
etype_offsets
[
etype_id
+
1
]
]
]
...
@@ -593,8 +593,6 @@ class FusedCSCSamplingGraph(SamplingGraph):
...
@@ -593,8 +593,6 @@ class FusedCSCSamplingGraph(SamplingGraph):
etype_id
+
1
etype_id
+
1
]
]
]
]
src_ntype_id
=
self
.
node_type_to_id
[
src_ntype
]
sub_indices
[
etype
]
-=
offset
[
src_ntype_id
]
if
has_original_eids
:
if
has_original_eids
:
original_edge_ids
=
original_hetero_edge_ids
original_edge_ids
=
original_hetero_edge_ids
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment