"...git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "5c53ca5ed89b30cd7b04cbf01665388e2af9f249"
Unverified Commit a3d20dce authored by Muhammed Fatih BALIN's avatar Muhammed Fatih BALIN Committed by GitHub
Browse files

[GraphBolt][CUDA] Make `_convert_to_sampled_subgraph` lighter. (#7312)

parent 6f9c20c3
...@@ -68,6 +68,7 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors( ...@@ -68,6 +68,7 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
bool return_eids, bool return_eids,
torch::optional<torch::Tensor> type_per_edge = torch::nullopt, torch::optional<torch::Tensor> type_per_edge = torch::nullopt,
torch::optional<torch::Tensor> probs_or_mask = torch::nullopt, torch::optional<torch::Tensor> probs_or_mask = torch::nullopt,
torch::optional<torch::Tensor> node_type_offset = torch::nullopt,
torch::optional<torch::Dict<std::string, int64_t>> node_type_to_id = torch::optional<torch::Dict<std::string, int64_t>> node_type_to_id =
torch::nullopt, torch::nullopt,
torch::optional<torch::Dict<std::string, int64_t>> edge_type_to_id = torch::optional<torch::Dict<std::string, int64_t>> edge_type_to_id =
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <graphbolt/continuous_seed.h> #include <graphbolt/continuous_seed.h>
#include <graphbolt/cuda_ops.h> #include <graphbolt/cuda_ops.h>
#include <graphbolt/cuda_sampling_ops.h> #include <graphbolt/cuda_sampling_ops.h>
#include <thrust/copy.h>
#include <thrust/gather.h> #include <thrust/gather.h>
#include <thrust/iterator/counting_iterator.h> #include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h> #include <thrust/iterator/transform_iterator.h>
...@@ -189,6 +190,7 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors( ...@@ -189,6 +190,7 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
const std::vector<int64_t>& fanouts, bool replace, bool layer, const std::vector<int64_t>& fanouts, bool replace, bool layer,
bool return_eids, torch::optional<torch::Tensor> type_per_edge, bool return_eids, torch::optional<torch::Tensor> type_per_edge,
torch::optional<torch::Tensor> probs_or_mask, torch::optional<torch::Tensor> probs_or_mask,
torch::optional<torch::Tensor> node_type_offset,
torch::optional<torch::Dict<std::string, int64_t>> node_type_to_id, torch::optional<torch::Dict<std::string, int64_t>> node_type_to_id,
torch::optional<torch::Dict<std::string, int64_t>> edge_type_to_id, torch::optional<torch::Dict<std::string, int64_t>> edge_type_to_id,
torch::optional<torch::Tensor> random_seed_tensor, torch::optional<torch::Tensor> random_seed_tensor,
...@@ -531,12 +533,33 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors( ...@@ -531,12 +533,33 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
// Here, we check what are the dst node types for the given seeds so that // Here, we check what are the dst node types for the given seeds so that
// we can compute the output indptr space later. // we can compute the output indptr space later.
std::vector<int64_t> etype_id_to_dst_ntype_id(num_etypes); std::vector<int64_t> etype_id_to_dst_ntype_id(num_etypes);
// Here, we check what are the src node types for the given seeds so that
// we can subtract source node offset from indices later.
auto etype_id_to_src_ntype_id = torch::empty(
2 * num_etypes,
c10::TensorOptions().dtype(torch::kLong).pinned_memory(true));
auto etype_id_to_src_ntype_id_ptr =
etype_id_to_src_ntype_id.data_ptr<int64_t>();
for (auto& etype_and_id : edge_type_to_id.value()) { for (auto& etype_and_id : edge_type_to_id.value()) {
auto etype = etype_and_id.key(); auto etype = etype_and_id.key();
auto id = etype_and_id.value(); auto id = etype_and_id.value();
auto dst_type = utils::parse_dst_ntype_from_etype(etype); auto [src_type, dst_type] = utils::parse_src_dst_ntype_from_etype(etype);
etype_id_to_dst_ntype_id[id] = node_type_to_id->at(dst_type); etype_id_to_dst_ntype_id[id] = node_type_to_id->at(dst_type);
etype_id_to_src_ntype_id_ptr[2 * id] =
etype_id_to_src_ntype_id_ptr[2 * id + 1] =
node_type_to_id->at(src_type);
} }
auto indices_offsets_device = torch::empty(
etype_id_to_src_ntype_id.size(0),
output_indices.options().dtype(torch::kLong));
AT_DISPATCH_INDEX_TYPES(
node_type_offset->scalar_type(), "SampleNeighborsNodeTypeOffset", ([&] {
THRUST_CALL(
gather, etype_id_to_src_ntype_id_ptr,
etype_id_to_src_ntype_id_ptr + etype_id_to_src_ntype_id.size(0),
node_type_offset->data_ptr<index_t>(),
indices_offsets_device.data_ptr<int64_t>());
}));
// For each edge type, we compute the start and end offsets to index into // For each edge type, we compute the start and end offsets to index into
// indptr to form the final output_indptr. // indptr to form the final output_indptr.
auto indptr_offsets = torch::empty( auto indptr_offsets = torch::empty(
...@@ -571,29 +594,60 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors( ...@@ -571,29 +594,60 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
num_etypes * 2, c10::TensorOptions() num_etypes * 2, c10::TensorOptions()
.dtype(output_indptr.scalar_type()) .dtype(output_indptr.scalar_type())
.pinned_memory(true)); .pinned_memory(true));
auto edge_offsets_device =
torch::empty(num_etypes * 2, output_indptr.options());
at::cuda::CUDAEvent edge_offsets_event; at::cuda::CUDAEvent edge_offsets_event;
AT_DISPATCH_INDEX_TYPES( AT_DISPATCH_INDEX_TYPES(
indptr.scalar_type(), "SampleNeighborsEdgeOffsets", ([&] { indptr.scalar_type(), "SampleNeighborsEdgeOffsets", ([&] {
auto edge_offsets_pinned_device_pair =
thrust::make_transform_output_iterator(
thrust::make_zip_iterator(
edge_offsets->data_ptr<index_t>(),
edge_offsets_device.data_ptr<index_t>()),
[=] __device__(index_t x) {
return thrust::make_tuple(x, x);
});
THRUST_CALL( THRUST_CALL(
gather, indptr_offsets_ptr, gather, indptr_offsets_ptr,
indptr_offsets_ptr + indptr_offsets.size(0), indptr_offsets_ptr + indptr_offsets.size(0),
output_indptr.data_ptr<index_t>(), output_indptr.data_ptr<index_t>(),
edge_offsets->data_ptr<index_t>()); edge_offsets_pinned_device_pair);
})); }));
edge_offsets_event.record(); edge_offsets_event.record();
auto indices_offset_subtract = ExpandIndptrImpl(
edge_offsets_device, indices.scalar_type(), indices_offsets_device,
output_indices.size(0));
// The output_indices is permuted here. // The output_indices is permuted here.
std::tie(output_indptr, output_indices) = IndexSelectCSCImpl( std::tie(output_indptr, output_indices) = IndexSelectCSCImpl(
output_in_degree, sliced_output_indptr, output_indices, permutation, output_in_degree, sliced_output_indptr, output_indices, permutation,
num_rows - 1, output_indices.size(0)); num_rows - 1, output_indices.size(0));
output_indices -= indices_offset_subtract;
auto output_indptr_offsets = torch::empty(
num_etypes * 2,
c10::TensorOptions().dtype(torch::kLong).pinned_memory(true));
auto output_indptr_offsets_ptr = output_indptr_offsets.data_ptr<int64_t>();
std::vector<torch::Tensor> indptr_list; std::vector<torch::Tensor> indptr_list;
for (int i = 0; i < num_etypes; i++) { for (int i = 0; i < num_etypes; i++) {
indptr_list.push_back(output_indptr.slice( indptr_list.push_back(output_indptr.slice(
0, indptr_offsets_ptr[2 * i], 0, indptr_offsets_ptr[2 * i], indptr_offsets_ptr[2 * i + 1] + 1));
indptr_offsets_ptr[2 * i + 1] + (i == num_etypes - 1))); output_indptr_offsets_ptr[2 * i] =
i == 0 ? 0 : output_indptr_offsets_ptr[2 * i - 1];
output_indptr_offsets_ptr[2 * i + 1] =
output_indptr_offsets_ptr[2 * i] + indptr_list.back().size(0);
} }
auto output_indptr_offsets_device = torch::empty(
output_indptr_offsets.size(0),
output_indptr.options().dtype(torch::kLong));
THRUST_CALL(
copy_n, output_indptr_offsets_ptr, output_indptr_offsets.size(0),
output_indptr_offsets_device.data_ptr<int64_t>());
// We form the final output indptr by concatenating pieces for different // We form the final output indptr by concatenating pieces for different
// edge types. // edge types.
output_indptr = torch::cat(indptr_list); output_indptr = torch::cat(indptr_list);
auto indptr_offset_subtract = ExpandIndptrImpl(
output_indptr_offsets_device, indptr.scalar_type(), edge_offsets_device,
output_indptr.size(0));
output_indptr -= indptr_offset_subtract;
edge_offsets_event.synchronize(); edge_offsets_event.synchronize();
// We read the edge_offsets here, they are in pairs but we don't need it to // We read the edge_offsets here, they are in pairs but we don't need it to
// be in pairs. So we remove the duplicate information from it and turn it // be in pairs. So we remove the duplicate information from it and turn it
......
...@@ -646,8 +646,9 @@ c10::intrusive_ptr<FusedSampledSubgraph> FusedCSCSamplingGraph::SampleNeighbors( ...@@ -646,8 +646,9 @@ c10::intrusive_ptr<FusedSampledSubgraph> FusedCSCSamplingGraph::SampleNeighbors(
c10::DeviceType::CUDA, "SampleNeighbors", { c10::DeviceType::CUDA, "SampleNeighbors", {
return ops::SampleNeighbors( return ops::SampleNeighbors(
indptr_, indices_, seeds, seed_offsets, fanouts, replace, layer, indptr_, indices_, seeds, seed_offsets, fanouts, replace, layer,
return_eids, type_per_edge_, probs_or_mask, node_type_to_id_, return_eids, type_per_edge_, probs_or_mask, node_type_offset_,
edge_type_to_id_, random_seed, seed2_contribution); node_type_to_id_, edge_type_to_id_, random_seed,
seed2_contribution);
}); });
} }
TORCH_CHECK(seeds.has_value(), "Nodes can not be None on the CPU."); TORCH_CHECK(seeds.has_value(), "Nodes can not be None on the CPU.");
......
...@@ -27,14 +27,17 @@ inline bool is_accessible_from_gpu(torch::Tensor tensor) { ...@@ -27,14 +27,17 @@ inline bool is_accessible_from_gpu(torch::Tensor tensor) {
} }
/** /**
* @brief Parses the destination node type from a given edge type triple * @brief Parses the source and destination node type from a given edge type
* seperated with ":". * triple seperated with ":".
*/ */
inline std::string parse_dst_ntype_from_etype(std::string etype) { inline std::pair<std::string, std::string> parse_src_dst_ntype_from_etype(
std::string etype) {
auto first_seperator_it = std::find(etype.begin(), etype.end(), ':'); auto first_seperator_it = std::find(etype.begin(), etype.end(), ':');
auto second_seperator_pos = auto second_seperator_pos =
std::find(first_seperator_it + 1, etype.end(), ':') - etype.begin(); std::find(first_seperator_it + 1, etype.end(), ':') - etype.begin();
return etype.substr(second_seperator_pos + 1); return {
etype.substr(0, first_seperator_it - etype.begin()),
etype.substr(second_seperator_pos + 1)};
} }
/** /**
......
...@@ -576,14 +576,14 @@ class FusedCSCSamplingGraph(SamplingGraph): ...@@ -576,14 +576,14 @@ class FusedCSCSamplingGraph(SamplingGraph):
edge_offsets[-1] edge_offsets[-1]
+ seed_offsets[ntype_id + 1] + seed_offsets[ntype_id + 1]
- seed_offsets[ntype_id] - seed_offsets[ntype_id]
+ 1
) )
for etype, etype_id in self.edge_type_to_id.items(): for etype, etype_id in self.edge_type_to_id.items():
src_ntype, _, dst_ntype = etype_str_to_tuple(etype) src_ntype, _, dst_ntype = etype_str_to_tuple(etype)
ntype_id = self.node_type_to_id[dst_ntype] ntype_id = self.node_type_to_id[dst_ntype]
sub_indptr_ = indptr[ sub_indptr[etype] = indptr[
edge_offsets[etype_id] : edge_offsets[etype_id + 1] + 1 edge_offsets[etype_id] : edge_offsets[etype_id + 1]
] ]
sub_indptr[etype] = sub_indptr_ - sub_indptr_[0]
sub_indices[etype] = indices[ sub_indices[etype] = indices[
etype_offsets[etype_id] : etype_offsets[etype_id + 1] etype_offsets[etype_id] : etype_offsets[etype_id + 1]
] ]
...@@ -593,8 +593,6 @@ class FusedCSCSamplingGraph(SamplingGraph): ...@@ -593,8 +593,6 @@ class FusedCSCSamplingGraph(SamplingGraph):
etype_id + 1 etype_id + 1
] ]
] ]
src_ntype_id = self.node_type_to_id[src_ntype]
sub_indices[etype] -= offset[src_ntype_id]
if has_original_eids: if has_original_eids:
original_edge_ids = original_hetero_edge_ids original_edge_ids = original_hetero_edge_ids
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment