/** * Copyright (c) 2023 by Contributors * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * @file cuda/sampling_utils.cu * @brief Sampling utility function implementations on CUDA. */ #include #include #include #include "./common.h" #include "./utils.h" namespace graphbolt { namespace ops { // Given rows and indptr, computes: // inrow_indptr[i] = indptr[rows[i]]; // in_degree[i] = indptr[rows[i] + 1] - indptr[rows[i]]; template struct SliceFunc { const nodes_t* rows; const indptr_t* indptr; indptr_t* in_degree; indptr_t* inrow_indptr; __host__ __device__ auto operator()(int64_t tIdx) { const auto out_row = rows[tIdx]; const auto indptr_val = indptr[out_row]; const auto degree = indptr[out_row + 1] - indptr_val; in_degree[tIdx] = degree; inrow_indptr[tIdx] = indptr_val; } }; // Returns (indptr[nodes + 1] - indptr[nodes], indptr[nodes]) std::tuple SliceCSCIndptr( torch::Tensor indptr, torch::optional nodes_optional) { if (nodes_optional.has_value()) { auto nodes = nodes_optional.value(); const int64_t num_nodes = nodes.size(0); // Read indptr only once in case it is pinned and access is slow. auto sliced_indptr = torch::empty(num_nodes, nodes.options().dtype(indptr.scalar_type())); // compute in-degrees auto in_degree = torch::empty( num_nodes + 1, nodes.options().dtype(indptr.scalar_type())); thrust::counting_iterator iota(0); AT_DISPATCH_INTEGRAL_TYPES( indptr.scalar_type(), "IndexSelectCSCIndptr", ([&] { using indptr_t = scalar_t; AT_DISPATCH_INDEX_TYPES( nodes.scalar_type(), "IndexSelectCSCNodes", ([&] { using nodes_t = index_t; THRUST_CALL( for_each, iota, iota + num_nodes, SliceFunc{ nodes.data_ptr(), indptr.data_ptr(), in_degree.data_ptr(), sliced_indptr.data_ptr()}); })); })); return {in_degree, sliced_indptr}; } else { const int64_t num_nodes = indptr.size(0) - 1; auto sliced_indptr = indptr.slice(0, 0, num_nodes); auto in_degree = torch::empty( num_nodes + 2, indptr.options().dtype(indptr.scalar_type())); AT_DISPATCH_INTEGRAL_TYPES( indptr.scalar_type(), "IndexSelectCSCIndptr", ([&] { using indptr_t = scalar_t; CUB_CALL( DeviceAdjacentDifference::SubtractLeftCopy, indptr.data_ptr(), in_degree.data_ptr(), num_nodes + 1, cub::Difference{}); })); in_degree = in_degree.slice(0, 1); return {in_degree, sliced_indptr}; } } template struct EdgeTypeSearch { const indptr_t* sub_indptr; const indptr_t* sliced_indptr; const etype_t* etypes; int64_t num_fanouts; int64_t num_rows; indptr_t* new_sub_indptr; indptr_t* new_sliced_indptr; __host__ __device__ auto operator()(int64_t i) { const auto homo_i = i / num_fanouts; const auto indptr_i = sub_indptr[homo_i]; const auto degree = sub_indptr[homo_i + 1] - indptr_i; const etype_t etype = i % num_fanouts; auto offset = cuda::LowerBound(etypes + indptr_i, degree, etype); new_sub_indptr[i] = indptr_i + offset; new_sliced_indptr[i] = sliced_indptr[homo_i] + offset; if (i == num_rows - 1) new_sub_indptr[num_rows] = indptr_i + degree; } }; std::tuple SliceCSCIndptrHetero( torch::Tensor sub_indptr, torch::Tensor etypes, torch::Tensor sliced_indptr, int64_t num_fanouts) { auto num_rows = (sub_indptr.size(0) - 1) * num_fanouts; auto new_sub_indptr = torch::empty(num_rows + 1, sub_indptr.options()); auto new_indegree = torch::empty(num_rows + 2, sub_indptr.options()); auto new_sliced_indptr = torch::empty(num_rows, sliced_indptr.options()); thrust::counting_iterator iota(0); AT_DISPATCH_INTEGRAL_TYPES( sub_indptr.scalar_type(), "SliceCSCIndptrHeteroIndptr", ([&] { using indptr_t = scalar_t; AT_DISPATCH_INTEGRAL_TYPES( etypes.scalar_type(), "SliceCSCIndptrHeteroTypePerEdge", ([&] { using etype_t = scalar_t; THRUST_CALL( for_each, iota, iota + num_rows, EdgeTypeSearch{ sub_indptr.data_ptr(), sliced_indptr.data_ptr(), etypes.data_ptr(), num_fanouts, num_rows, new_sub_indptr.data_ptr(), new_sliced_indptr.data_ptr()}); })); CUB_CALL( DeviceAdjacentDifference::SubtractLeftCopy, new_sub_indptr.data_ptr(), new_indegree.data_ptr(), num_rows + 1, cub::Difference{}); })); // Discard the first element of the SubtractLeftCopy result and ensure that // new_indegree tensor has size num_rows + 1 so that its ExclusiveCumSum is // directly equivalent to new_sub_indptr. // Equivalent to new_indegree = new_indegree[1:] in Python. new_indegree = new_indegree.slice(0, 1); return {new_sub_indptr, new_indegree, new_sliced_indptr}; } } // namespace ops } // namespace graphbolt