Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
28286ab3
Commit
28286ab3
authored
Oct 17, 2024
by
sangwzh
Browse files
fix bug for randomwalk
parent
35d7422e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
61 additions
and
20 deletions
+61
-20
src/graph/sampling/randomwalks/randomwalk_gpu.hip
src/graph/sampling/randomwalks/randomwalk_gpu.hip
+61
-20
No files found.
src/graph/sampling/randomwalks/randomwalk_gpu.hip
View file @
28286ab3
...
@@ -37,6 +37,22 @@ struct GraphKernelData {
...
@@ -37,6 +37,22 @@ struct GraphKernelData {
const IdType *in_cols;
const IdType *in_cols;
const IdType *data;
const IdType *data;
};
};
template<typename IdType>
inline IdType* __GetDevicePointer(runtime::NDArray array) {
IdType* ptr = array.Ptr<IdType>();
if (array.IsPinned()) {
CUDA_CALL(hipHostGetDevicePointer(&ptr, ptr, 0));
}
return ptr;
}
inline void* __GetDevicePointer(runtime::NDArray array) {
void* ptr = array->data;
if (array.IsPinned()) {
CUDA_CALL(hipHostGetDevicePointer(&ptr, ptr, 0));
}
return ptr;
}
template <typename IdType, typename FloatType, int BLOCK_SIZE, int TILE_SIZE>
template <typename IdType, typename FloatType, int BLOCK_SIZE, int TILE_SIZE>
__global__ void _RandomWalkKernel(
__global__ void _RandomWalkKernel(
...
@@ -178,14 +194,17 @@ std::pair<IdArray, IdArray> RandomWalkUniform(
...
@@ -178,14 +194,17 @@ std::pair<IdArray, IdArray> RandomWalkUniform(
const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath,
const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath,
FloatArray restart_prob) {
FloatArray restart_prob) {
const int64_t max_num_steps = metapath->shape[0];
const int64_t max_num_steps = metapath->shape[0];
const IdType *metapath_data = static_cast<IdType *>(metapath->data);
// const IdType *metapath_data = static_cast<IdType *>(metapath->data);
const IdType *metapath_data = static_cast<const IdType *>(__GetDevicePointer(metapath));
const int64_t begin_ntype =
const int64_t begin_ntype =
hg->meta_graph()->FindEdge(metapath_data[0]).first;
hg->meta_graph()->FindEdge(metapath_data[0]).first;
const int64_t max_nodes = hg->NumVertices(begin_ntype);
const int64_t max_nodes = hg->NumVertices(begin_ntype);
int64_t num_etypes = hg->NumEdgeTypes();
int64_t num_etypes = hg->NumEdgeTypes();
auto ctx = seeds->ctx;
auto ctx = seeds->ctx;
const IdType *seed_data = static_cast<const IdType *>(seeds->data);
// const IdType *seed_data = static_cast<const IdType *>(seeds->data);
const IdType *seed_data = static_cast<const IdType *>(__GetDevicePointer(seeds));
// const IdType *seed_data = static_cast<const IdType *>(__GetDevicePointer(seeds));
CHECK(seeds->ndim == 1) << "seeds shape is not one dimension.";
CHECK(seeds->ndim == 1) << "seeds shape is not one dimension.";
const int64_t num_seeds = seeds->shape[0];
const int64_t num_seeds = seeds->shape[0];
int64_t trace_length = max_num_steps + 1;
int64_t trace_length = max_num_steps + 1;
...
@@ -197,10 +216,15 @@ std::pair<IdArray, IdArray> RandomWalkUniform(
...
@@ -197,10 +216,15 @@ std::pair<IdArray, IdArray> RandomWalkUniform(
std::vector<GraphKernelData<IdType>> h_graphs(num_etypes);
std::vector<GraphKernelData<IdType>> h_graphs(num_etypes);
for (int64_t etype = 0; etype < num_etypes; ++etype) {
for (int64_t etype = 0; etype < num_etypes; ++etype) {
const CSRMatrix &csr = hg->GetCSRMatrix(etype);
const CSRMatrix &csr = hg->GetCSRMatrix(etype);
h_graphs[etype].in_ptr = static_cast<const IdType *>(csr.indptr->data);
// h_graphs[etype].in_ptr = static_cast<const IdType *>(csr.indptr->data);
h_graphs[etype].in_cols = static_cast<const IdType *>(csr.indices->data);
// h_graphs[etype].in_cols = static_cast<const IdType *>(csr.indices->data);
// h_graphs[etype].data =
// (CSRHasData(csr) ? static_cast<const IdType *>(csr.data->data)
// : nullptr);
h_graphs[etype].in_ptr = static_cast<const IdType *>(__GetDevicePointer(csr.indptr));
h_graphs[etype].in_cols = static_cast<const IdType *>(__GetDevicePointer(csr.indices));
h_graphs[etype].data =
h_graphs[etype].data =
(CSRHasData(csr) ? static_cast<const IdType *>(csr.data
->data
)
(CSRHasData(csr) ? static_cast<const IdType *>(
__GetDevicePointer(
csr.data
)
)
: nullptr);
: nullptr);
}
}
// use cuda stream from local thread
// use cuda stream from local thread
...
@@ -227,7 +251,8 @@ std::pair<IdArray, IdArray> RandomWalkUniform(
...
@@ -227,7 +251,8 @@ std::pair<IdArray, IdArray> RandomWalkUniform(
CHECK(restart_prob->ctx.device_type == kDGLCUDA||restart_prob->ctx.device_type == kDGLROCM)
CHECK(restart_prob->ctx.device_type == kDGLCUDA||restart_prob->ctx.device_type == kDGLROCM)
<< "restart prob should be in GPU.";
<< "restart prob should be in GPU.";
CHECK(restart_prob->ndim == 1) << "restart prob dimension should be 1.";
CHECK(restart_prob->ndim == 1) << "restart prob dimension should be 1.";
const FloatType *restart_prob_data = restart_prob.Ptr<FloatType>();
// const FloatType *restart_prob_data = restart_prob.Ptr<FloatType>();
const FloatType *restart_prob_data = static_cast<const FloatType *>(__GetDevicePointer(restart_prob));
const int64_t restart_prob_size = restart_prob->shape[0];
const int64_t restart_prob_size = restart_prob->shape[0];
CUDA_KERNEL_CALL(
CUDA_KERNEL_CALL(
(_RandomWalkKernel<IdType, FloatType, BLOCK_SIZE, TILE_SIZE>), grid,
(_RandomWalkKernel<IdType, FloatType, BLOCK_SIZE, TILE_SIZE>), grid,
...
@@ -249,21 +274,25 @@ std::pair<IdArray, IdArray> RandomWalkBiased(
...
@@ -249,21 +274,25 @@ std::pair<IdArray, IdArray> RandomWalkBiased(
const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath,
const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath,
const std::vector<FloatArray> &prob, FloatArray restart_prob) {
const std::vector<FloatArray> &prob, FloatArray restart_prob) {
const int64_t max_num_steps = metapath->shape[0];
const int64_t max_num_steps = metapath->shape[0];
const IdType *metapath_data = static_cast<IdType *>(metapath->data);
// const IdType *metapath_data = static_cast<IdType *>(metapath->data);
const IdType *metapath_data = static_cast<IdType *>(__GetDevicePointer(metapath));
const int64_t begin_ntype =
const int64_t begin_ntype =
hg->meta_graph()->FindEdge(metapath_data[0]).first;
hg->meta_graph()->FindEdge(metapath_data[0]).first;
const int64_t max_nodes = hg->NumVertices(begin_ntype);
const int64_t max_nodes = hg->NumVertices(begin_ntype);
int64_t num_etypes = hg->NumEdgeTypes();
int64_t num_etypes = hg->NumEdgeTypes();
auto ctx = seeds->ctx;
auto ctx = seeds->ctx;
const IdType *seed_data = static_cast<const IdType *>(seeds->data);
// const IdType *seed_data = static_cast<const IdType *>(seeds->data);
const IdType *seed_data = static_cast<const IdType *>(__GetDevicePointer(seeds));
CHECK(seeds->ndim == 1) << "seeds shape is not one dimension.";
CHECK(seeds->ndim == 1) << "seeds shape is not one dimension.";
const int64_t num_seeds = seeds->shape[0];
const int64_t num_seeds = seeds->shape[0];
int64_t trace_length = max_num_steps + 1;
int64_t trace_length = max_num_steps + 1;
IdArray traces = IdArray::Empty({num_seeds, trace_length}, seeds->dtype, ctx);
IdArray traces = IdArray::Empty({num_seeds, trace_length}, seeds->dtype, ctx);
IdArray eids = IdArray::Empty({num_seeds, max_num_steps}, seeds->dtype, ctx);
IdArray eids = IdArray::Empty({num_seeds, max_num_steps}, seeds->dtype, ctx);
IdType *traces_data = traces.Ptr<IdType>();
IdType *traces_data = traces.Ptr<IdType>();
IdType *eids_data = eids.Ptr<IdType>();
// IdType *traces_data = static_cast<IdType *>(__GetDevicePointer(traces));
// IdType *eids_data = eids.Ptr<IdType>();
IdType *eids_data = static_cast<IdType *>(__GetDevicePointer(eids));
hipStream_t stream = runtime::getCurrentHIPStreamMasqueradingAsCUDA();
hipStream_t stream = runtime::getCurrentHIPStreamMasqueradingAsCUDA();
auto device = DeviceAPI::Get(ctx);
auto device = DeviceAPI::Get(ctx);
...
@@ -278,10 +307,15 @@ std::pair<IdArray, IdArray> RandomWalkBiased(
...
@@ -278,10 +307,15 @@ std::pair<IdArray, IdArray> RandomWalkBiased(
std::vector<GraphKernelData<IdType>> h_graphs(num_etypes);
std::vector<GraphKernelData<IdType>> h_graphs(num_etypes);
for (int64_t etype = 0; etype < num_etypes; ++etype) {
for (int64_t etype = 0; etype < num_etypes; ++etype) {
const CSRMatrix &csr = hg->GetCSRMatrix(etype);
const CSRMatrix &csr = hg->GetCSRMatrix(etype);
h_graphs[etype].in_ptr = static_cast<const IdType *>(csr.indptr->data);
// h_graphs[etype].in_ptr = static_cast<const IdType *>(csr.indptr->data);
h_graphs[etype].in_cols = static_cast<const IdType *>(csr.indices->data);
// h_graphs[etype].in_cols = static_cast<const IdType *>(csr.indices->data);
// h_graphs[etype].data =
// (CSRHasData(csr) ? static_cast<const IdType *>(csr.data->data)
// : nullptr);
h_graphs[etype].in_ptr = static_cast<const IdType *>(__GetDevicePointer(csr.indptr));
h_graphs[etype].in_cols = static_cast<const IdType *>(__GetDevicePointer(csr.indices));
h_graphs[etype].data =
h_graphs[etype].data =
(CSRHasData(csr) ? static_cast<const IdType *>(csr.data
->data
)
(CSRHasData(csr) ? static_cast<const IdType *>(
__GetDevicePointer(
csr.data
)
)
: nullptr);
: nullptr);
int64_t num_segments = csr.indptr->shape[0] - 1;
int64_t num_segments = csr.indptr->shape[0] - 1;
...
@@ -291,13 +325,16 @@ std::pair<IdArray, IdArray> RandomWalkBiased(
...
@@ -291,13 +325,16 @@ std::pair<IdArray, IdArray> RandomWalkBiased(
prob_sums[etype] = nullptr;
prob_sums[etype] = nullptr;
continue;
continue;
}
}
probs[etype] = prob[etype].Ptr<FloatType>();
// probs[etype] = prob[etype].Ptr<FloatType>();
probs[etype] = static_cast<FloatType *>(__GetDevicePointer(prob[etype]));
prob_sums_arr.push_back(
prob_sums_arr.push_back(
FloatArray::Empty({num_segments}, prob[etype]->dtype, ctx));
FloatArray::Empty({num_segments}, prob[etype]->dtype, ctx));
prob_sums[etype] = prob_sums_arr[etype].Ptr<FloatType>();
// prob_sums[etype] = prob_sums_arr[etype].Ptr<FloatType>();
prob_sums[etype] = static_cast<FloatType *>(__GetDevicePointer(prob_sums_arr[etype]));
// calculate the sum of the neighbor weights
// calculate the sum of the neighbor weights
const IdType *d_offsets = static_cast<const IdType *>(csr.indptr->data);
// const IdType *d_offsets = static_cast<const IdType *>(csr.indptr->data);
const IdType *d_offsets = static_cast<const IdType *>(__GetDevicePointer(csr.indptr));
size_t temp_storage_size = 0;
size_t temp_storage_size = 0;
CUDA_CALL(hipcub::DeviceSegmentedReduce::Sum(
CUDA_CALL(hipcub::DeviceSegmentedReduce::Sum(
nullptr, temp_storage_size, probs[etype], prob_sums[etype],
nullptr, temp_storage_size, probs[etype], prob_sums[etype],
...
@@ -330,7 +367,8 @@ std::pair<IdArray, IdArray> RandomWalkBiased(
...
@@ -330,7 +367,8 @@ std::pair<IdArray, IdArray> RandomWalkBiased(
DGLContext{kDGLCPU, 0}, ctx, prob[0]->dtype);
DGLContext{kDGLCPU, 0}, ctx, prob[0]->dtype);
// copy metapath to GPU
// copy metapath to GPU
auto d_metapath = metapath.CopyTo(ctx);
auto d_metapath = metapath.CopyTo(ctx);
const IdType *d_metapath_data = static_cast<IdType *>(d_metapath->data);
// const IdType *d_metapath_data = static_cast<IdType *>(d_metapath->data);
const IdType *d_metapath_data = static_cast<IdType *>(__GetDevicePointer(d_metapath));
constexpr int BLOCK_SIZE = 256;
constexpr int BLOCK_SIZE = 256;
constexpr int TILE_SIZE = BLOCK_SIZE * 4;
constexpr int TILE_SIZE = BLOCK_SIZE * 4;
...
@@ -340,8 +378,9 @@ std::pair<IdArray, IdArray> RandomWalkBiased(
...
@@ -340,8 +378,9 @@ std::pair<IdArray, IdArray> RandomWalkBiased(
CHECK(restart_prob->ctx.device_type == kDGLCUDA ||restart_prob->ctx.device_type == kDGLROCM)
CHECK(restart_prob->ctx.device_type == kDGLCUDA ||restart_prob->ctx.device_type == kDGLROCM)
<< "restart prob should be in GPU.";
<< "restart prob should be in GPU.";
CHECK(restart_prob->ndim == 1) << "restart prob dimension should be 1.";
CHECK(restart_prob->ndim == 1) << "restart prob dimension should be 1.";
const FloatType *restart_prob_data = restart_prob.Ptr<FloatType>();
// const FloatType *restart_prob_data = restart_prob.Ptr<FloatType>();
const int64_t restart_prob_size = restart_prob->shape[0];
const FloatType *restart_prob_data = static_cast<const FloatType *>(__GetDevicePointer(restart_prob));
const int64_t restart_prob_size = restart_prob->shape[0];
CUDA_KERNEL_CALL(
CUDA_KERNEL_CALL(
(_RandomWalkBiasedKernel<IdType, FloatType, BLOCK_SIZE, TILE_SIZE>), grid,
(_RandomWalkBiasedKernel<IdType, FloatType, BLOCK_SIZE, TILE_SIZE>), grid,
block, 0, stream, random_seed, seed_data, num_seeds, d_metapath_data,
block, 0, stream, random_seed, seed_data, num_seeds, d_metapath_data,
...
@@ -446,8 +485,10 @@ std::tuple<IdArray, IdArray, IdArray> SelectPinSageNeighbors(
...
@@ -446,8 +485,10 @@ std::tuple<IdArray, IdArray, IdArray> SelectPinSageNeighbors(
const IdArray src, const IdArray dst, const int64_t num_samples_per_node,
const IdArray src, const IdArray dst, const int64_t num_samples_per_node,
const int64_t k) {
const int64_t k) {
CHECK(src->ctx.device_type == kDGLCUDA || src->ctx.device_type == kDGLROCM) << "IdArray needs be on GPU!";
CHECK(src->ctx.device_type == kDGLCUDA || src->ctx.device_type == kDGLROCM) << "IdArray needs be on GPU!";
const IdxType *src_data = src.Ptr<IdxType>();
// const IdxType *src_data = src.Ptr<IdxType>();
const IdxType *dst_data = dst.Ptr<IdxType>();
const IdxType *src_data = static_cast<IdxType*>(__GetDevicePointer(src));
// const IdxType *dst_data = dst.Ptr<IdxType>();
const IdxType *dst_data = static_cast<IdxType*>(__GetDevicePointer(dst));
const int64_t num_dst_nodes = (dst->shape[0] / num_samples_per_node);
const int64_t num_dst_nodes = (dst->shape[0] / num_samples_per_node);
auto ctx = src->ctx;
auto ctx = src->ctx;
// use cuda stream from local thread
// use cuda stream from local thread
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment