Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
6ac701f8
Commit
6ac701f8
authored
Sep 13, 2024
by
sangwzh
Browse files
update src and graphbolt code
parent
1547bd93
Changes
116
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
89 additions
and
56 deletions
+89
-56
graphbolt/src/unique_and_compact.cc
graphbolt/src/unique_and_compact.cc
+4
-3
src/array/arith.h
src/array/arith.h
+4
-3
src/array/array.cc
src/array/array.cc
+5
-4
src/array/array_arith.cc
src/array/array_arith.cc
+3
-2
src/array/cpu/array_cumsum.cc
src/array/cpu/array_cumsum.cc
+1
-0
src/array/cpu/array_sort.cc
src/array/cpu/array_sort.cc
+1
-1
src/array/cpu/gather_mm.cc
src/array/cpu/gather_mm.cc
+2
-1
src/array/cpu/labor_sampling.cc
src/array/cpu/labor_sampling.cc
+2
-1
src/array/cpu/rowwise_sampling.cc
src/array/cpu/rowwise_sampling.cc
+2
-1
src/array/cpu/rowwise_topk.cc
src/array/cpu/rowwise_topk.cc
+2
-1
src/array/cpu/sddmm.cc
src/array/cpu/sddmm.cc
+2
-1
src/array/cpu/segment_reduce.cc
src/array/cpu/segment_reduce.cc
+3
-2
src/array/cpu/spmm.cc
src/array/cpu/spmm.cc
+2
-1
src/array/cpu/traversal.cc
src/array/cpu/traversal.cc
+2
-1
src/array/cuda/array_cumsum.hip
src/array/cuda/array_cumsum.hip
+9
-6
src/array/cuda/array_index_select.cuh
src/array/cuda/array_index_select.cuh
+2
-0
src/array/cuda/array_index_select.hip
src/array/cuda/array_index_select.hip
+9
-6
src/array/cuda/array_nonzero.hip
src/array/cuda/array_nonzero.hip
+10
-6
src/array/cuda/array_op_impl.hip
src/array/cuda/array_op_impl.hip
+16
-12
src/array/cuda/array_scatter.hip
src/array/cuda/array_scatter.hip
+8
-4
No files found.
graphbolt/src/unique_and_compact.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2023 by Contributors
* Copyright (c) 2023 by Contributors
*
*
...
@@ -10,9 +11,9 @@
...
@@ -10,9 +11,9 @@
#include <unordered_map>
#include <unordered_map>
#include "
./
concurrent_id_hash_map.h"
#include "concurrent_id_hash_map.h"
#include "
./
macro.h"
#include "macro.h"
#include "
./
utils.h"
#include "utils.h"
namespace
graphbolt
{
namespace
graphbolt
{
namespace
sampling
{
namespace
sampling
{
...
...
src/array/arith.h
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2019 by Contributors
* Copyright (c) 2019 by Contributors
* @file array/arith.h
* @file array/arith.h
...
@@ -6,13 +7,13 @@
...
@@ -6,13 +7,13 @@
#ifndef DGL_ARRAY_ARITH_H_
#ifndef DGL_ARRAY_ARITH_H_
#define DGL_ARRAY_ARITH_H_
#define DGL_ARRAY_ARITH_H_
#ifdef __
CUDA
CC__
#ifdef __
HIP
CC__
#define DGLDEVICE __device__
#define DGLDEVICE __device__
__host__
#define DGLINLINE __forceinline__
#define DGLINLINE __forceinline__
#else
#else
#define DGLDEVICE
#define DGLDEVICE
#define DGLINLINE inline
#define DGLINLINE inline
#endif // __
CUDA
CC__
#endif // __
HIP
CC__
namespace
dgl
{
namespace
dgl
{
namespace
aten
{
namespace
aten
{
...
...
src/array/array.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2019-2022 by Contributors
* Copyright (c) 2019-2022 by Contributors
* @file array/array.cc
* @file array/array.cc
...
@@ -14,9 +15,9 @@
...
@@ -14,9 +15,9 @@
#include <sstream>
#include <sstream>
#include "../c_api_common.h"
#include "../c_api_common.h"
#include "
./
arith.h"
#include "arith.h"
#include "
./
array_op.h"
#include "array_op.h"
#include "
./
kernel_decl.h"
#include "kernel_decl.h"
using
namespace
dgl
::
runtime
;
using
namespace
dgl
::
runtime
;
...
@@ -585,7 +586,7 @@ COOMatrix CSRRowWiseSampling(
...
@@ -585,7 +586,7 @@ COOMatrix CSRRowWiseSampling(
// prob_or_mask is pinned and rows on GPU is valid
// prob_or_mask is pinned and rows on GPU is valid
CHECK_VALID_CONTEXT
(
prob_or_mask
,
rows
);
CHECK_VALID_CONTEXT
(
prob_or_mask
,
rows
);
ATEN_CSR_SWITCH_CUDA_UVA
(
mat
,
rows
,
XPU
,
IdType
,
"CSRRowWiseSampling"
,
{
ATEN_CSR_SWITCH_CUDA_UVA
(
mat
,
rows
,
XPU
,
IdType
,
"CSRRowWiseSampling"
,
{
CHECK
(
!
(
prob_or_mask
->
dtype
.
bits
==
8
&&
XPU
==
kDGLCUDA
))
CHECK
(
!
(
prob_or_mask
->
dtype
.
bits
==
8
&&
(
XPU
==
kDGLCUDA
||
XPU
==
kDGLROCM
)
))
<<
"GPU sampling with masks is currently not supported yet."
;
<<
"GPU sampling with masks is currently not supported yet."
;
ATEN_FLOAT_INT8_UINT8_TYPE_SWITCH
(
ATEN_FLOAT_INT8_UINT8_TYPE_SWITCH
(
prob_or_mask
->
dtype
,
FloatType
,
"probability or mask"
,
{
prob_or_mask
->
dtype
,
FloatType
,
"probability or mask"
,
{
...
...
src/array/array_arith.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2019 by Contributors
* Copyright (c) 2019 by Contributors
* @file array/array_aritch.cc
* @file array/array_aritch.cc
...
@@ -8,8 +9,8 @@
...
@@ -8,8 +9,8 @@
#include <dgl/runtime/ndarray.h>
#include <dgl/runtime/ndarray.h>
#include "../c_api_common.h"
#include "../c_api_common.h"
#include "
./
arith.h"
#include "arith.h"
#include "
./
array_op.h"
#include "array_op.h"
using
namespace
dgl
::
runtime
;
using
namespace
dgl
::
runtime
;
...
...
src/array/cpu/array_cumsum.cc
View file @
6ac701f8
...
@@ -29,6 +29,7 @@ IdArray CumSum(IdArray array, bool prepend_zero) {
...
@@ -29,6 +29,7 @@ IdArray CumSum(IdArray array, bool prepend_zero) {
IdType
*
out_d
=
ret
.
Ptr
<
IdType
>
();
IdType
*
out_d
=
ret
.
Ptr
<
IdType
>
();
out_d
[
0
]
=
in_d
[
0
];
out_d
[
0
]
=
in_d
[
0
];
for
(
int64_t
i
=
1
;
i
<
len
;
++
i
)
out_d
[
i
]
=
out_d
[
i
-
1
]
+
in_d
[
i
];
for
(
int64_t
i
=
1
;
i
<
len
;
++
i
)
out_d
[
i
]
=
out_d
[
i
-
1
]
+
in_d
[
i
];
std
::
cout
<<
"limm cpu ret : "
<<
ret
<<
std
::
endl
;
return
ret
;
return
ret
;
}
}
}
}
...
...
src/array/cpu/array_sort.cc
View file @
6ac701f8
...
@@ -48,7 +48,7 @@ void swap(const PairRef<V1, V2>& r1, const PairRef<V1, V2>& r2) {
...
@@ -48,7 +48,7 @@ void swap(const PairRef<V1, V2>& r1, const PairRef<V1, V2>& r2) {
}
}
template
<
typename
V1
,
typename
V2
>
template
<
typename
V1
,
typename
V2
>
struct
PairIterator
__host__
struct
PairIterator
:
public
std
::
iterator
<
:
public
std
::
iterator
<
std
::
random_access_iterator_tag
,
std
::
pair
<
V1
,
V2
>
,
std
::
ptrdiff_t
,
std
::
random_access_iterator_tag
,
std
::
pair
<
V1
,
V2
>
,
std
::
ptrdiff_t
,
std
::
pair
<
V1
*
,
V2
*>
,
PairRef
<
V1
,
V2
>>
{
std
::
pair
<
V1
*
,
V2
*>
,
PairRef
<
V1
,
V2
>>
{
...
...
src/array/cpu/gather_mm.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2020 by Contributors
* Copyright (c) 2020 by Contributors
* @file kernel/cpu/gaher_mm.cc
* @file kernel/cpu/gaher_mm.cc
* @brief GatherMM C APIs and definitions.
* @brief GatherMM C APIs and definitions.
*/
*/
#include "
./
gather_mm.h"
#include "gather_mm.h"
#include <dgl/array.h>
#include <dgl/array.h>
...
...
src/array/cpu/labor_sampling.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/*!
/*!
* Copyright (c) 2022, NVIDIA Corporation
* Copyright (c) 2022, NVIDIA Corporation
* Copyright (c) 2022, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
* Copyright (c) 2022, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
...
@@ -18,7 +19,7 @@
...
@@ -18,7 +19,7 @@
* \file array/cuda/labor_sampling.cc
* \file array/cuda/labor_sampling.cc
* \brief labor sampling
* \brief labor sampling
*/
*/
#include "
./
labor_pick.h"
#include "labor_pick.h"
namespace
dgl
{
namespace
dgl
{
namespace
aten
{
namespace
aten
{
...
...
src/array/cpu/rowwise_sampling.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2020 by Contributors
* Copyright (c) 2020 by Contributors
* @file array/cpu/rowwise_sampling.cc
* @file array/cpu/rowwise_sampling.cc
...
@@ -7,7 +8,7 @@
...
@@ -7,7 +8,7 @@
#include <numeric>
#include <numeric>
#include "
./
rowwise_pick.h"
#include "rowwise_pick.h"
namespace
dgl
{
namespace
dgl
{
namespace
aten
{
namespace
aten
{
...
...
src/array/cpu/rowwise_topk.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2020 by Contributors
* Copyright (c) 2020 by Contributors
* @file array/cpu/rowwise_topk.cc
* @file array/cpu/rowwise_topk.cc
...
@@ -6,7 +7,7 @@
...
@@ -6,7 +7,7 @@
#include <algorithm>
#include <algorithm>
#include <numeric>
#include <numeric>
#include "
./
rowwise_pick.h"
#include "rowwise_pick.h"
namespace
dgl
{
namespace
dgl
{
namespace
aten
{
namespace
aten
{
...
...
src/array/cpu/sddmm.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2020 by Contributors
* Copyright (c) 2020 by Contributors
* @file aten/cpu/sddmm.cc
* @file aten/cpu/sddmm.cc
* @brief SDDMM C APIs and definitions.
* @brief SDDMM C APIs and definitions.
*/
*/
#include "
./
sddmm.h"
#include "sddmm.h"
#include <dgl/array.h>
#include <dgl/array.h>
...
...
src/array/cpu/segment_reduce.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2020 by Contributors
* Copyright (c) 2020 by Contributors
* @file kernel/cpu/segment_reduce.cc
* @file kernel/cpu/segment_reduce.cc
* @brief Segment reduce C APIs and definitions.
* @brief Segment reduce C APIs and definitions.
*/
*/
#include "
./
segment_reduce.h"
#include "segment_reduce.h"
#include <dgl/array.h>
#include <dgl/array.h>
#include <string>
#include <string>
#include "
./
spmm_binary_ops.h"
#include "spmm_binary_ops.h"
namespace
dgl
{
namespace
dgl
{
namespace
aten
{
namespace
aten
{
...
...
src/array/cpu/spmm.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2020 by Contributors
* Copyright (c) 2020 by Contributors
* @file kernel/cpu/spmm.cc
* @file kernel/cpu/spmm.cc
* @brief SPMM C APIs and definitions.
* @brief SPMM C APIs and definitions.
*/
*/
#include "
./
spmm.h"
#include "spmm.h"
#include <dgl/array.h>
#include <dgl/array.h>
...
...
src/array/cpu/traversal.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2020 by Contributors
* Copyright (c) 2020 by Contributors
* @file array/cpu/traversal.cc
* @file array/cpu/traversal.cc
* @brief Graph traversal implementation
* @brief Graph traversal implementation
*/
*/
#include "
./
traversal.h"
#include "traversal.h"
#include <dgl/graph_traversal.h>
#include <dgl/graph_traversal.h>
...
...
src/array/cuda/array_cumsum.
cu
→
src/array/cuda/array_cumsum.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
/**
/**
* Copyright (c) 2020 by Contributors
* Copyright (c) 2020 by Contributors
* @file array/cpu/array_cumsum.cu
* @file array/cpu/array_cumsum.cu
* @brief Array cumsum GPU implementation
* @brief Array cumsum GPU implementation
*/
*/
#include <dgl/array.h>
#include <dgl/array.h>
#include "../../../include/dgl/array.h"
#include <cub/cub.
cuh
>
#include <
hip
cub/
hip
cub.
hpp
>
#include "../../runtime/cuda/cuda_common.h"
#include "../../runtime/cuda/cuda_common.h"
#include "
./
utils.h"
#include "utils.h"
namespace dgl {
namespace dgl {
using runtime::NDArray;
using runtime::NDArray;
...
@@ -23,7 +26,7 @@ IdArray CumSum(IdArray array, bool prepend_zero) {
...
@@ -23,7 +26,7 @@ IdArray CumSum(IdArray array, bool prepend_zero) {
: aten::Full(0, 1, array->dtype.bits, array->ctx);
: aten::Full(0, 1, array->dtype.bits, array->ctx);
auto device = runtime::DeviceAPI::Get(array->ctx);
auto device = runtime::DeviceAPI::Get(array->ctx);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
const IdType* in_d = array.Ptr<IdType>();
const IdType* in_d = array.Ptr<IdType>();
IdArray ret;
IdArray ret;
IdType* out_d = nullptr;
IdType* out_d = nullptr;
...
@@ -36,16 +39,16 @@ IdArray CumSum(IdArray array, bool prepend_zero) {
...
@@ -36,16 +39,16 @@ IdArray CumSum(IdArray array, bool prepend_zero) {
}
}
// Allocate workspace
// Allocate workspace
size_t workspace_size = 0;
size_t workspace_size = 0;
CUDA_CALL
(
cub
::
DeviceScan
::
InclusiveSum
(
CUDA_CALL(
hip
cub::DeviceScan::InclusiveSum(
nullptr, workspace_size, in_d, out_d, len, stream));
nullptr, workspace_size, in_d, out_d, len, stream));
void* workspace = device->AllocWorkspace(array->ctx, workspace_size);
void* workspace = device->AllocWorkspace(array->ctx, workspace_size);
// Compute cumsum
// Compute cumsum
CUDA_CALL
(
cub
::
DeviceScan
::
InclusiveSum
(
CUDA_CALL(
hip
cub::DeviceScan::InclusiveSum(
workspace, workspace_size, in_d, out_d, len, stream));
workspace, workspace_size, in_d, out_d, len, stream));
device->FreeWorkspace(array->ctx, workspace);
device->FreeWorkspace(array->ctx, workspace);
std::cout << "cuda ret : " << ret << std::endl;
return ret;
return ret;
}
}
...
...
src/array/cuda/array_index_select.cuh
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
/**
/**
* Copyright (c) 2021-2022 by Contributors
* Copyright (c) 2021-2022 by Contributors
* @file array/cuda/array_index_select.cuh
* @file array/cuda/array_index_select.cuh
...
...
src/array/cuda/array_index_select.
cu
→
src/array/cuda/array_index_select.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
/**
/**
* Copyright (c) 2019 by Contributors
* Copyright (c) 2019 by Contributors
* @file array/cpu/array_index_select.cu
* @file array/cpu/array_index_select.cu
* @brief Array index select GPU implementation
* @brief Array index select GPU implementation
*/
*/
#include <dgl/array.h>
#include <dgl/array.h>
#include "../../../include/dgl/array.h"
#include "../../runtime/cuda/cuda_common.h"
#include "../../runtime/cuda/cuda_common.h"
#include "
./
array_index_select.cuh"
#include "array_index_select.cuh"
#include "
./
utils.h"
#include "utils.h"
namespace dgl {
namespace dgl {
using runtime::NDArray;
using runtime::NDArray;
...
@@ -33,7 +36,7 @@ NDArray IndexSelect(NDArray array, IdArray index) {
...
@@ -33,7 +36,7 @@ NDArray IndexSelect(NDArray array, IdArray index) {
const DType* array_data = static_cast<DType*>(cuda::GetDevicePointer(array));
const DType* array_data = static_cast<DType*>(cuda::GetDevicePointer(array));
const IdType* idx_data = static_cast<IdType*>(index->data);
const IdType* idx_data = static_cast<IdType*>(index->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
if (num_feat == 1) {
if (num_feat == 1) {
const int nt = cuda::FindNumThreads(len);
const int nt = cuda::FindNumThreads(len);
const int nb = (len + nt - 1) / nt;
const int nb = (len + nt - 1) / nt;
...
@@ -61,9 +64,9 @@ template NDArray IndexSelect<kDGLCUDA, int64_t, int64_t>(NDArray, IdArray);
...
@@ -61,9 +64,9 @@ template NDArray IndexSelect<kDGLCUDA, int64_t, int64_t>(NDArray, IdArray);
template NDArray IndexSelect<kDGLCUDA, __half, int32_t>(NDArray, IdArray);
template NDArray IndexSelect<kDGLCUDA, __half, int32_t>(NDArray, IdArray);
template NDArray IndexSelect<kDGLCUDA, __half, int64_t>(NDArray, IdArray);
template NDArray IndexSelect<kDGLCUDA, __half, int64_t>(NDArray, IdArray);
#if BF16_ENABLED
#if BF16_ENABLED
template
NDArray
IndexSelect
<
kDGLCUDA
,
__
nv
_bfloat16
,
int32_t
>(
template NDArray IndexSelect<kDGLCUDA, __
hip
_bfloat16, int32_t>(
NDArray, IdArray);
NDArray, IdArray);
template
NDArray
IndexSelect
<
kDGLCUDA
,
__
nv
_bfloat16
,
int64_t
>(
template NDArray IndexSelect<kDGLCUDA, __
hip
_bfloat16, int64_t>(
NDArray, IdArray);
NDArray, IdArray);
#endif // BF16_ENABLED
#endif // BF16_ENABLED
template NDArray IndexSelect<kDGLCUDA, float, int32_t>(NDArray, IdArray);
template NDArray IndexSelect<kDGLCUDA, float, int32_t>(NDArray, IdArray);
...
@@ -87,7 +90,7 @@ template uint32_t IndexSelect<kDGLCUDA, uint32_t>(NDArray array, int64_t index);
...
@@ -87,7 +90,7 @@ template uint32_t IndexSelect<kDGLCUDA, uint32_t>(NDArray array, int64_t index);
template uint64_t IndexSelect<kDGLCUDA, uint64_t>(NDArray array, int64_t index);
template uint64_t IndexSelect<kDGLCUDA, uint64_t>(NDArray array, int64_t index);
template __half IndexSelect<kDGLCUDA, __half>(NDArray array, int64_t index);
template __half IndexSelect<kDGLCUDA, __half>(NDArray array, int64_t index);
#if BF16_ENABLED
#if BF16_ENABLED
template
__
nv
_bfloat16
IndexSelect
<
kDGLCUDA
,
__
nv
_bfloat16
>(
template __
hip
_bfloat16 IndexSelect<kDGLCUDA, __
hip
_bfloat16>(
NDArray array, int64_t index);
NDArray array, int64_t index);
#endif // BF16_ENABLED
#endif // BF16_ENABLED
template float IndexSelect<kDGLCUDA, float>(NDArray array, int64_t index);
template float IndexSelect<kDGLCUDA, float>(NDArray array, int64_t index);
...
...
src/array/cuda/array_nonzero.
cu
→
src/array/cuda/array_nonzero.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
/**
/**
* Copyright (c) 2020 by Contributors
* Copyright (c) 2020 by Contributors
* @file array/cpu/array_nonzero.cc
* @file array/cpu/array_nonzero.cc
...
@@ -5,11 +7,13 @@
...
@@ -5,11 +7,13 @@
*/
*/
#include <dgl/array.h>
#include <dgl/array.h>
#include "../../../include/dgl/array.h"
#include <cub/cub.cuh>
#include <hipcub/hipcub.hpp>
#include "../../runtime/cuda/cuda_common.h"
#include "../../runtime/cuda/cuda_common.h"
#include "
./
utils.h"
#include "utils.h"
namespace dgl {
namespace dgl {
using runtime::NDArray;
using runtime::NDArray;
...
@@ -33,24 +37,24 @@ IdArray NonZero(IdArray array) {
...
@@ -33,24 +37,24 @@ IdArray NonZero(IdArray array) {
const int64_t len = array->shape[0];
const int64_t len = array->shape[0];
IdArray ret = NewIdArray(len, ctx, 64);
IdArray ret = NewIdArray(len, ctx, 64);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
const IdType* const in_data = static_cast<const IdType*>(array->data);
const IdType* const in_data = static_cast<const IdType*>(array->data);
int64_t* const out_data = static_cast<int64_t*>(ret->data);
int64_t* const out_data = static_cast<int64_t*>(ret->data);
IsNonZeroIndex<IdType> comp(in_data);
IsNonZeroIndex<IdType> comp(in_data);
cub
::
CountingInputIterator
<
int64_t
>
counter
(
0
);
hip
cub::CountingInputIterator<int64_t> counter(0);
// room for cub to output on GPU
// room for cub to output on GPU
int64_t* d_num_nonzeros =
int64_t* d_num_nonzeros =
static_cast<int64_t*>(device->AllocWorkspace(ctx, sizeof(int64_t)));
static_cast<int64_t*>(device->AllocWorkspace(ctx, sizeof(int64_t)));
size_t temp_size = 0;
size_t temp_size = 0;
CUDA_CALL
(
cub
::
DeviceSelect
::
If
(
CUDA_CALL(
hip
cub::DeviceSelect::If(
nullptr, temp_size, counter, out_data, d_num_nonzeros, len, comp,
nullptr, temp_size, counter, out_data, d_num_nonzeros, len, comp,
stream));
stream));
void* temp = device->AllocWorkspace(ctx, temp_size);
void* temp = device->AllocWorkspace(ctx, temp_size);
CUDA_CALL
(
cub
::
DeviceSelect
::
If
(
CUDA_CALL(
hip
cub::DeviceSelect::If(
temp, temp_size, counter, out_data, d_num_nonzeros, len, comp, stream));
temp, temp_size, counter, out_data, d_num_nonzeros, len, comp, stream));
device->FreeWorkspace(ctx, temp);
device->FreeWorkspace(ctx, temp);
...
...
src/array/cuda/array_op_impl.
cu
→
src/array/cuda/array_op_impl.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
/**
/**
* Copyright (c) 2020-2021 by Contributors
* Copyright (c) 2020-2021 by Contributors
* @file array/cuda/array_op_impl.cu
* @file array/cuda/array_op_impl.cu
* @brief Array operator GPU implementation
* @brief Array operator GPU implementation
*/
*/
#include <dgl/array.h>
#include <dgl/array.h>
#include "../../../include/dgl/array.h"
#include "../../runtime/cuda/cuda_common.h"
#include "../../runtime/cuda/cuda_common.h"
#include "../../runtime/cuda/cuda_hashtable.cuh"
#include "../../runtime/cuda/cuda_hashtable.cuh"
#include "../arith.h"
#include "../arith.h"
#include "
./
utils.h"
#include "utils.h"
namespace dgl {
namespace dgl {
using runtime::NDArray;
using runtime::NDArray;
...
@@ -36,7 +40,7 @@ IdArray BinaryElewise(IdArray lhs, IdArray rhs) {
...
@@ -36,7 +40,7 @@ IdArray BinaryElewise(IdArray lhs, IdArray rhs) {
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
const IdType* rhs_data = static_cast<IdType*>(rhs->data);
const IdType* rhs_data = static_cast<IdType*>(rhs->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(len);
int nt = cuda::FindNumThreads(len);
int nb = (len + nt - 1) / nt;
int nb = (len + nt - 1) / nt;
CUDA_KERNEL_CALL(
CUDA_KERNEL_CALL(
...
@@ -107,7 +111,7 @@ IdArray BinaryElewise(IdArray lhs, IdType rhs) {
...
@@ -107,7 +111,7 @@ IdArray BinaryElewise(IdArray lhs, IdType rhs) {
IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits);
IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits);
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(len);
int nt = cuda::FindNumThreads(len);
int nb = (len + nt - 1) / nt;
int nb = (len + nt - 1) / nt;
CUDA_KERNEL_CALL(
CUDA_KERNEL_CALL(
...
@@ -178,7 +182,7 @@ IdArray BinaryElewise(IdType lhs, IdArray rhs) {
...
@@ -178,7 +182,7 @@ IdArray BinaryElewise(IdType lhs, IdArray rhs) {
IdArray ret = NewIdArray(rhs->shape[0], rhs->ctx, rhs->dtype.bits);
IdArray ret = NewIdArray(rhs->shape[0], rhs->ctx, rhs->dtype.bits);
const IdType* rhs_data = static_cast<IdType*>(rhs->data);
const IdType* rhs_data = static_cast<IdType*>(rhs->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(len);
int nt = cuda::FindNumThreads(len);
int nb = (len + nt - 1) / nt;
int nb = (len + nt - 1) / nt;
CUDA_KERNEL_CALL(
CUDA_KERNEL_CALL(
...
@@ -249,7 +253,7 @@ IdArray UnaryElewise(IdArray lhs) {
...
@@ -249,7 +253,7 @@ IdArray UnaryElewise(IdArray lhs) {
IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits);
IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits);
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(len);
int nt = cuda::FindNumThreads(len);
int nb = (len + nt - 1) / nt;
int nb = (len + nt - 1) / nt;
CUDA_KERNEL_CALL(
CUDA_KERNEL_CALL(
...
@@ -277,7 +281,7 @@ template <DGLDeviceType XPU, typename DType>
...
@@ -277,7 +281,7 @@ template <DGLDeviceType XPU, typename DType>
NDArray Full(DType val, int64_t length, DGLContext ctx) {
NDArray Full(DType val, int64_t length, DGLContext ctx) {
NDArray ret = NDArray::Empty({length}, DGLDataTypeTraits<DType>::dtype, ctx);
NDArray ret = NDArray::Empty({length}, DGLDataTypeTraits<DType>::dtype, ctx);
DType* ret_data = static_cast<DType*>(ret->data);
DType* ret_data = static_cast<DType*>(ret->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(length);
int nt = cuda::FindNumThreads(length);
int nb = (length + nt - 1) / nt;
int nb = (length + nt - 1) / nt;
CUDA_KERNEL_CALL(
CUDA_KERNEL_CALL(
...
@@ -292,8 +296,8 @@ template IdArray Full<kDGLCUDA, int64_t>(
...
@@ -292,8 +296,8 @@ template IdArray Full<kDGLCUDA, int64_t>(
template IdArray Full<kDGLCUDA, __half>(
template IdArray Full<kDGLCUDA, __half>(
__half val, int64_t length, DGLContext ctx);
__half val, int64_t length, DGLContext ctx);
#if BF16_ENABLED
#if BF16_ENABLED
template
IdArray
Full
<
kDGLCUDA
,
__
nv
_bfloat16
>(
template IdArray Full<kDGLCUDA, __
hip
_bfloat16>(
__
nv
_bfloat16
val
,
int64_t
length
,
DGLContext
ctx
);
__
hip
_bfloat16 val, int64_t length, DGLContext ctx);
#endif // BF16_ENABLED
#endif // BF16_ENABLED
template IdArray Full<kDGLCUDA, float>(
template IdArray Full<kDGLCUDA, float>(
float val, int64_t length, DGLContext ctx);
float val, int64_t length, DGLContext ctx);
...
@@ -319,7 +323,7 @@ IdArray Range(IdType low, IdType high, DGLContext ctx) {
...
@@ -319,7 +323,7 @@ IdArray Range(IdType low, IdType high, DGLContext ctx) {
IdArray ret = NewIdArray(length, ctx, sizeof(IdType) * 8);
IdArray ret = NewIdArray(length, ctx, sizeof(IdType) * 8);
if (length == 0) return ret;
if (length == 0) return ret;
IdType* ret_data = static_cast<IdType*>(ret->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(length);
int nt = cuda::FindNumThreads(length);
int nb = (length + nt - 1) / nt;
int nb = (length + nt - 1) / nt;
CUDA_KERNEL_CALL(
CUDA_KERNEL_CALL(
...
@@ -355,7 +359,7 @@ IdArray Relabel_(const std::vector<IdArray>& arrays) {
...
@@ -355,7 +359,7 @@ IdArray Relabel_(const std::vector<IdArray>& arrays) {
const auto& ctx = arrays[0]->ctx;
const auto& ctx = arrays[0]->ctx;
auto device = runtime::DeviceAPI::Get(ctx);
auto device = runtime::DeviceAPI::Get(ctx);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
// build node maps and get the induced nodes
// build node maps and get the induced nodes
OrderedHashTable<IdType> node_map(total_length, ctx, stream);
OrderedHashTable<IdType> node_map(total_length, ctx, stream);
...
@@ -364,7 +368,7 @@ IdArray Relabel_(const std::vector<IdArray>& arrays) {
...
@@ -364,7 +368,7 @@ IdArray Relabel_(const std::vector<IdArray>& arrays) {
static_cast<int64_t*>(device->AllocWorkspace(ctx, sizeof(int64_t)));
static_cast<int64_t*>(device->AllocWorkspace(ctx, sizeof(int64_t)));
IdArray induced_nodes = NewIdArray(total_length, ctx, sizeof(IdType) * 8);
IdArray induced_nodes = NewIdArray(total_length, ctx, sizeof(IdType) * 8);
CUDA_CALL
(
cuda
MemsetAsync
(
CUDA_CALL(
hip
MemsetAsync(
num_induced_device, 0, sizeof(*num_induced_device), stream));
num_induced_device, 0, sizeof(*num_induced_device), stream));
node_map.FillWithDuplicates(
node_map.FillWithDuplicates(
...
@@ -416,7 +420,7 @@ IdArray AsNumBits(IdArray arr, uint8_t bits) {
...
@@ -416,7 +420,7 @@ IdArray AsNumBits(IdArray arr, uint8_t bits) {
const std::vector<int64_t> shape(arr->shape, arr->shape + arr->ndim);
const std::vector<int64_t> shape(arr->shape, arr->shape + arr->ndim);
IdArray ret = IdArray::Empty(shape, DGLDataType{kDGLInt, bits, 1}, arr->ctx);
IdArray ret = IdArray::Empty(shape, DGLDataType{kDGLInt, bits, 1}, arr->ctx);
const int64_t length = ret.NumElements();
const int64_t length = ret.NumElements();
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(length);
int nt = cuda::FindNumThreads(length);
int nb = (length + nt - 1) / nt;
int nb = (length + nt - 1) / nt;
if (bits == 32) {
if (bits == 32) {
...
...
src/array/cuda/array_scatter.
cu
→
src/array/cuda/array_scatter.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
/**
/**
* Copyright (c) 2019 by Contributors
* Copyright (c) 2019 by Contributors
* @file array/cuda/array_scatter.cu
* @file array/cuda/array_scatter.cu
* @brief Array scatter GPU implementation
* @brief Array scatter GPU implementation
*/
*/
#include <dgl/array.h>
#include <dgl/array.h>
#include "../../../include/dgl/array.h"
#include "../../runtime/cuda/cuda_common.h"
#include "../../runtime/cuda/cuda_common.h"
#include "
./
utils.h"
#include "utils.h"
namespace dgl {
namespace dgl {
using runtime::NDArray;
using runtime::NDArray;
...
@@ -31,7 +35,7 @@ void Scatter_(IdArray index, NDArray value, NDArray out) {
...
@@ -31,7 +35,7 @@ void Scatter_(IdArray index, NDArray value, NDArray out) {
const DType* val = value.Ptr<DType>();
const DType* val = value.Ptr<DType>();
DType* outd = out.Ptr<DType>();
DType* outd = out.Ptr<DType>();
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
const int nt = cuda::FindNumThreads(len);
const int nt = cuda::FindNumThreads(len);
const int nb = (len + nt - 1) / nt;
const int nb = (len + nt - 1) / nt;
CUDA_KERNEL_CALL(_ScatterKernel, nb, nt, 0, stream, idx, val, len, outd);
CUDA_KERNEL_CALL(_ScatterKernel, nb, nt, 0, stream, idx, val, len, outd);
...
@@ -41,7 +45,7 @@ template void Scatter_<kDGLCUDA, int32_t, int32_t>(IdArray, NDArray, NDArray);
...
@@ -41,7 +45,7 @@ template void Scatter_<kDGLCUDA, int32_t, int32_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, int64_t, int32_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, int64_t, int32_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, __half, int32_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, __half, int32_t>(IdArray, NDArray, NDArray);
#if BF16_ENABLED
#if BF16_ENABLED
template
void
Scatter_
<
kDGLCUDA
,
__
nv
_bfloat16
,
int32_t
>(
template void Scatter_<kDGLCUDA, __
hip
_bfloat16, int32_t>(
IdArray, NDArray, NDArray);
IdArray, NDArray, NDArray);
#endif // BF16_ENABLED
#endif // BF16_ENABLED
template void Scatter_<kDGLCUDA, float, int32_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, float, int32_t>(IdArray, NDArray, NDArray);
...
@@ -50,7 +54,7 @@ template void Scatter_<kDGLCUDA, int32_t, int64_t>(IdArray, NDArray, NDArray);
...
@@ -50,7 +54,7 @@ template void Scatter_<kDGLCUDA, int32_t, int64_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, int64_t, int64_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, int64_t, int64_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, __half, int64_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, __half, int64_t>(IdArray, NDArray, NDArray);
#if BF16_ENABLED
#if BF16_ENABLED
template
void
Scatter_
<
kDGLCUDA
,
__
nv
_bfloat16
,
int64_t
>(
template void Scatter_<kDGLCUDA, __
hip
_bfloat16, int64_t>(
IdArray, NDArray, NDArray);
IdArray, NDArray, NDArray);
#endif // BF16_ENABLED
#endif // BF16_ENABLED
template void Scatter_<kDGLCUDA, float, int64_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, float, int64_t>(IdArray, NDArray, NDArray);
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment