Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
6ac701f8
Commit
6ac701f8
authored
Sep 13, 2024
by
sangwzh
Browse files
update src and graphbolt code
parent
1547bd93
Changes
116
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
89 additions
and
56 deletions
+89
-56
graphbolt/src/unique_and_compact.cc
graphbolt/src/unique_and_compact.cc
+4
-3
src/array/arith.h
src/array/arith.h
+4
-3
src/array/array.cc
src/array/array.cc
+5
-4
src/array/array_arith.cc
src/array/array_arith.cc
+3
-2
src/array/cpu/array_cumsum.cc
src/array/cpu/array_cumsum.cc
+1
-0
src/array/cpu/array_sort.cc
src/array/cpu/array_sort.cc
+1
-1
src/array/cpu/gather_mm.cc
src/array/cpu/gather_mm.cc
+2
-1
src/array/cpu/labor_sampling.cc
src/array/cpu/labor_sampling.cc
+2
-1
src/array/cpu/rowwise_sampling.cc
src/array/cpu/rowwise_sampling.cc
+2
-1
src/array/cpu/rowwise_topk.cc
src/array/cpu/rowwise_topk.cc
+2
-1
src/array/cpu/sddmm.cc
src/array/cpu/sddmm.cc
+2
-1
src/array/cpu/segment_reduce.cc
src/array/cpu/segment_reduce.cc
+3
-2
src/array/cpu/spmm.cc
src/array/cpu/spmm.cc
+2
-1
src/array/cpu/traversal.cc
src/array/cpu/traversal.cc
+2
-1
src/array/cuda/array_cumsum.hip
src/array/cuda/array_cumsum.hip
+9
-6
src/array/cuda/array_index_select.cuh
src/array/cuda/array_index_select.cuh
+2
-0
src/array/cuda/array_index_select.hip
src/array/cuda/array_index_select.hip
+9
-6
src/array/cuda/array_nonzero.hip
src/array/cuda/array_nonzero.hip
+10
-6
src/array/cuda/array_op_impl.hip
src/array/cuda/array_op_impl.hip
+16
-12
src/array/cuda/array_scatter.hip
src/array/cuda/array_scatter.hip
+8
-4
No files found.
graphbolt/src/unique_and_compact.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2023 by Contributors
*
...
...
@@ -10,9 +11,9 @@
#include <unordered_map>
#include "
./
concurrent_id_hash_map.h"
#include "
./
macro.h"
#include "
./
utils.h"
#include "concurrent_id_hash_map.h"
#include "macro.h"
#include "utils.h"
namespace
graphbolt
{
namespace
sampling
{
...
...
src/array/arith.h
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2019 by Contributors
* @file array/arith.h
...
...
@@ -6,13 +7,13 @@
#ifndef DGL_ARRAY_ARITH_H_
#define DGL_ARRAY_ARITH_H_
#ifdef __
CUDA
CC__
#define DGLDEVICE __device__
#ifdef __
HIP
CC__
#define DGLDEVICE __device__
__host__
#define DGLINLINE __forceinline__
#else
#define DGLDEVICE
#define DGLINLINE inline
#endif // __
CUDA
CC__
#endif // __
HIP
CC__
namespace
dgl
{
namespace
aten
{
...
...
src/array/array.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2019-2022 by Contributors
* @file array/array.cc
...
...
@@ -14,9 +15,9 @@
#include <sstream>
#include "../c_api_common.h"
#include "
./
arith.h"
#include "
./
array_op.h"
#include "
./
kernel_decl.h"
#include "arith.h"
#include "array_op.h"
#include "kernel_decl.h"
using
namespace
dgl
::
runtime
;
...
...
@@ -585,7 +586,7 @@ COOMatrix CSRRowWiseSampling(
// prob_or_mask is pinned and rows on GPU is valid
CHECK_VALID_CONTEXT
(
prob_or_mask
,
rows
);
ATEN_CSR_SWITCH_CUDA_UVA
(
mat
,
rows
,
XPU
,
IdType
,
"CSRRowWiseSampling"
,
{
CHECK
(
!
(
prob_or_mask
->
dtype
.
bits
==
8
&&
XPU
==
kDGLCUDA
))
CHECK
(
!
(
prob_or_mask
->
dtype
.
bits
==
8
&&
(
XPU
==
kDGLCUDA
||
XPU
==
kDGLROCM
)
))
<<
"GPU sampling with masks is currently not supported yet."
;
ATEN_FLOAT_INT8_UINT8_TYPE_SWITCH
(
prob_or_mask
->
dtype
,
FloatType
,
"probability or mask"
,
{
...
...
src/array/array_arith.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2019 by Contributors
* @file array/array_aritch.cc
...
...
@@ -8,8 +9,8 @@
#include <dgl/runtime/ndarray.h>
#include "../c_api_common.h"
#include "
./
arith.h"
#include "
./
array_op.h"
#include "arith.h"
#include "array_op.h"
using
namespace
dgl
::
runtime
;
...
...
src/array/cpu/array_cumsum.cc
View file @
6ac701f8
...
...
@@ -29,6 +29,7 @@ IdArray CumSum(IdArray array, bool prepend_zero) {
IdType
*
out_d
=
ret
.
Ptr
<
IdType
>
();
out_d
[
0
]
=
in_d
[
0
];
for
(
int64_t
i
=
1
;
i
<
len
;
++
i
)
out_d
[
i
]
=
out_d
[
i
-
1
]
+
in_d
[
i
];
std
::
cout
<<
"limm cpu ret : "
<<
ret
<<
std
::
endl
;
return
ret
;
}
}
...
...
src/array/cpu/array_sort.cc
View file @
6ac701f8
...
...
@@ -48,7 +48,7 @@ void swap(const PairRef<V1, V2>& r1, const PairRef<V1, V2>& r2) {
}
template
<
typename
V1
,
typename
V2
>
struct
PairIterator
__host__
struct
PairIterator
:
public
std
::
iterator
<
std
::
random_access_iterator_tag
,
std
::
pair
<
V1
,
V2
>
,
std
::
ptrdiff_t
,
std
::
pair
<
V1
*
,
V2
*>
,
PairRef
<
V1
,
V2
>>
{
...
...
src/array/cpu/gather_mm.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2020 by Contributors
* @file kernel/cpu/gaher_mm.cc
* @brief GatherMM C APIs and definitions.
*/
#include "
./
gather_mm.h"
#include "gather_mm.h"
#include <dgl/array.h>
...
...
src/array/cpu/labor_sampling.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/*!
* Copyright (c) 2022, NVIDIA Corporation
* Copyright (c) 2022, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
...
...
@@ -18,7 +19,7 @@
* \file array/cuda/labor_sampling.cc
* \brief labor sampling
*/
#include "
./
labor_pick.h"
#include "labor_pick.h"
namespace
dgl
{
namespace
aten
{
...
...
src/array/cpu/rowwise_sampling.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2020 by Contributors
* @file array/cpu/rowwise_sampling.cc
...
...
@@ -7,7 +8,7 @@
#include <numeric>
#include "
./
rowwise_pick.h"
#include "rowwise_pick.h"
namespace
dgl
{
namespace
aten
{
...
...
src/array/cpu/rowwise_topk.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2020 by Contributors
* @file array/cpu/rowwise_topk.cc
...
...
@@ -6,7 +7,7 @@
#include <algorithm>
#include <numeric>
#include "
./
rowwise_pick.h"
#include "rowwise_pick.h"
namespace
dgl
{
namespace
aten
{
...
...
src/array/cpu/sddmm.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2020 by Contributors
* @file aten/cpu/sddmm.cc
* @brief SDDMM C APIs and definitions.
*/
#include "
./
sddmm.h"
#include "sddmm.h"
#include <dgl/array.h>
...
...
src/array/cpu/segment_reduce.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2020 by Contributors
* @file kernel/cpu/segment_reduce.cc
* @brief Segment reduce C APIs and definitions.
*/
#include "
./
segment_reduce.h"
#include "segment_reduce.h"
#include <dgl/array.h>
#include <string>
#include "
./
spmm_binary_ops.h"
#include "spmm_binary_ops.h"
namespace
dgl
{
namespace
aten
{
...
...
src/array/cpu/spmm.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2020 by Contributors
* @file kernel/cpu/spmm.cc
* @brief SPMM C APIs and definitions.
*/
#include "
./
spmm.h"
#include "spmm.h"
#include <dgl/array.h>
...
...
src/array/cpu/traversal.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
* Copyright (c) 2020 by Contributors
* @file array/cpu/traversal.cc
* @brief Graph traversal implementation
*/
#include "
./
traversal.h"
#include "traversal.h"
#include <dgl/graph_traversal.h>
...
...
src/array/cuda/array_cumsum.
cu
→
src/array/cuda/array_cumsum.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
/**
* Copyright (c) 2020 by Contributors
* @file array/cpu/array_cumsum.cu
* @brief Array cumsum GPU implementation
*/
#include <dgl/array.h>
#include "../../../include/dgl/array.h"
#include <cub/cub.
cuh
>
#include <
hip
cub/
hip
cub.
hpp
>
#include "../../runtime/cuda/cuda_common.h"
#include "
./
utils.h"
#include "utils.h"
namespace dgl {
using runtime::NDArray;
...
...
@@ -23,7 +26,7 @@ IdArray CumSum(IdArray array, bool prepend_zero) {
: aten::Full(0, 1, array->dtype.bits, array->ctx);
auto device = runtime::DeviceAPI::Get(array->ctx);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
const IdType* in_d = array.Ptr<IdType>();
IdArray ret;
IdType* out_d = nullptr;
...
...
@@ -36,16 +39,16 @@ IdArray CumSum(IdArray array, bool prepend_zero) {
}
// Allocate workspace
size_t workspace_size = 0;
CUDA_CALL
(
cub
::
DeviceScan
::
InclusiveSum
(
CUDA_CALL(
hip
cub::DeviceScan::InclusiveSum(
nullptr, workspace_size, in_d, out_d, len, stream));
void* workspace = device->AllocWorkspace(array->ctx, workspace_size);
// Compute cumsum
CUDA_CALL
(
cub
::
DeviceScan
::
InclusiveSum
(
CUDA_CALL(
hip
cub::DeviceScan::InclusiveSum(
workspace, workspace_size, in_d, out_d, len, stream));
device->FreeWorkspace(array->ctx, workspace);
std::cout << "cuda ret : " << ret << std::endl;
return ret;
}
...
...
src/array/cuda/array_index_select.cuh
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
/**
* Copyright (c) 2021-2022 by Contributors
* @file array/cuda/array_index_select.cuh
...
...
src/array/cuda/array_index_select.
cu
→
src/array/cuda/array_index_select.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
/**
* Copyright (c) 2019 by Contributors
* @file array/cpu/array_index_select.cu
* @brief Array index select GPU implementation
*/
#include <dgl/array.h>
#include "../../../include/dgl/array.h"
#include "../../runtime/cuda/cuda_common.h"
#include "
./
array_index_select.cuh"
#include "
./
utils.h"
#include "array_index_select.cuh"
#include "utils.h"
namespace dgl {
using runtime::NDArray;
...
...
@@ -33,7 +36,7 @@ NDArray IndexSelect(NDArray array, IdArray index) {
const DType* array_data = static_cast<DType*>(cuda::GetDevicePointer(array));
const IdType* idx_data = static_cast<IdType*>(index->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
if (num_feat == 1) {
const int nt = cuda::FindNumThreads(len);
const int nb = (len + nt - 1) / nt;
...
...
@@ -61,9 +64,9 @@ template NDArray IndexSelect<kDGLCUDA, int64_t, int64_t>(NDArray, IdArray);
template NDArray IndexSelect<kDGLCUDA, __half, int32_t>(NDArray, IdArray);
template NDArray IndexSelect<kDGLCUDA, __half, int64_t>(NDArray, IdArray);
#if BF16_ENABLED
template
NDArray
IndexSelect
<
kDGLCUDA
,
__
nv
_bfloat16
,
int32_t
>(
template NDArray IndexSelect<kDGLCUDA, __
hip
_bfloat16, int32_t>(
NDArray, IdArray);
template
NDArray
IndexSelect
<
kDGLCUDA
,
__
nv
_bfloat16
,
int64_t
>(
template NDArray IndexSelect<kDGLCUDA, __
hip
_bfloat16, int64_t>(
NDArray, IdArray);
#endif // BF16_ENABLED
template NDArray IndexSelect<kDGLCUDA, float, int32_t>(NDArray, IdArray);
...
...
@@ -87,7 +90,7 @@ template uint32_t IndexSelect<kDGLCUDA, uint32_t>(NDArray array, int64_t index);
template uint64_t IndexSelect<kDGLCUDA, uint64_t>(NDArray array, int64_t index);
template __half IndexSelect<kDGLCUDA, __half>(NDArray array, int64_t index);
#if BF16_ENABLED
template
__
nv
_bfloat16
IndexSelect
<
kDGLCUDA
,
__
nv
_bfloat16
>(
template __
hip
_bfloat16 IndexSelect<kDGLCUDA, __
hip
_bfloat16>(
NDArray array, int64_t index);
#endif // BF16_ENABLED
template float IndexSelect<kDGLCUDA, float>(NDArray array, int64_t index);
...
...
src/array/cuda/array_nonzero.
cu
→
src/array/cuda/array_nonzero.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
/**
* Copyright (c) 2020 by Contributors
* @file array/cpu/array_nonzero.cc
...
...
@@ -5,11 +7,13 @@
*/
#include <dgl/array.h>
#include "../../../include/dgl/array.h"
#include <cub/cub.cuh>
#include <hipcub/hipcub.hpp>
#include "../../runtime/cuda/cuda_common.h"
#include "
./
utils.h"
#include "utils.h"
namespace dgl {
using runtime::NDArray;
...
...
@@ -33,24 +37,24 @@ IdArray NonZero(IdArray array) {
const int64_t len = array->shape[0];
IdArray ret = NewIdArray(len, ctx, 64);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
const IdType* const in_data = static_cast<const IdType*>(array->data);
int64_t* const out_data = static_cast<int64_t*>(ret->data);
IsNonZeroIndex<IdType> comp(in_data);
cub
::
CountingInputIterator
<
int64_t
>
counter
(
0
);
hip
cub::CountingInputIterator<int64_t> counter(0);
// room for cub to output on GPU
int64_t* d_num_nonzeros =
static_cast<int64_t*>(device->AllocWorkspace(ctx, sizeof(int64_t)));
size_t temp_size = 0;
CUDA_CALL
(
cub
::
DeviceSelect
::
If
(
CUDA_CALL(
hip
cub::DeviceSelect::If(
nullptr, temp_size, counter, out_data, d_num_nonzeros, len, comp,
stream));
void* temp = device->AllocWorkspace(ctx, temp_size);
CUDA_CALL
(
cub
::
DeviceSelect
::
If
(
CUDA_CALL(
hip
cub::DeviceSelect::If(
temp, temp_size, counter, out_data, d_num_nonzeros, len, comp, stream));
device->FreeWorkspace(ctx, temp);
...
...
src/array/cuda/array_op_impl.
cu
→
src/array/cuda/array_op_impl.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
/**
* Copyright (c) 2020-2021 by Contributors
* @file array/cuda/array_op_impl.cu
* @brief Array operator GPU implementation
*/
#include <dgl/array.h>
#include "../../../include/dgl/array.h"
#include "../../runtime/cuda/cuda_common.h"
#include "../../runtime/cuda/cuda_hashtable.cuh"
#include "../arith.h"
#include "
./
utils.h"
#include "utils.h"
namespace dgl {
using runtime::NDArray;
...
...
@@ -36,7 +40,7 @@ IdArray BinaryElewise(IdArray lhs, IdArray rhs) {
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
const IdType* rhs_data = static_cast<IdType*>(rhs->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(len);
int nb = (len + nt - 1) / nt;
CUDA_KERNEL_CALL(
...
...
@@ -107,7 +111,7 @@ IdArray BinaryElewise(IdArray lhs, IdType rhs) {
IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits);
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(len);
int nb = (len + nt - 1) / nt;
CUDA_KERNEL_CALL(
...
...
@@ -178,7 +182,7 @@ IdArray BinaryElewise(IdType lhs, IdArray rhs) {
IdArray ret = NewIdArray(rhs->shape[0], rhs->ctx, rhs->dtype.bits);
const IdType* rhs_data = static_cast<IdType*>(rhs->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(len);
int nb = (len + nt - 1) / nt;
CUDA_KERNEL_CALL(
...
...
@@ -249,7 +253,7 @@ IdArray UnaryElewise(IdArray lhs) {
IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits);
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(len);
int nb = (len + nt - 1) / nt;
CUDA_KERNEL_CALL(
...
...
@@ -277,7 +281,7 @@ template <DGLDeviceType XPU, typename DType>
NDArray Full(DType val, int64_t length, DGLContext ctx) {
NDArray ret = NDArray::Empty({length}, DGLDataTypeTraits<DType>::dtype, ctx);
DType* ret_data = static_cast<DType*>(ret->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(length);
int nb = (length + nt - 1) / nt;
CUDA_KERNEL_CALL(
...
...
@@ -292,8 +296,8 @@ template IdArray Full<kDGLCUDA, int64_t>(
template IdArray Full<kDGLCUDA, __half>(
__half val, int64_t length, DGLContext ctx);
#if BF16_ENABLED
template
IdArray
Full
<
kDGLCUDA
,
__
nv
_bfloat16
>(
__
nv
_bfloat16
val
,
int64_t
length
,
DGLContext
ctx
);
template IdArray Full<kDGLCUDA, __
hip
_bfloat16>(
__
hip
_bfloat16 val, int64_t length, DGLContext ctx);
#endif // BF16_ENABLED
template IdArray Full<kDGLCUDA, float>(
float val, int64_t length, DGLContext ctx);
...
...
@@ -319,7 +323,7 @@ IdArray Range(IdType low, IdType high, DGLContext ctx) {
IdArray ret = NewIdArray(length, ctx, sizeof(IdType) * 8);
if (length == 0) return ret;
IdType* ret_data = static_cast<IdType*>(ret->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(length);
int nb = (length + nt - 1) / nt;
CUDA_KERNEL_CALL(
...
...
@@ -355,7 +359,7 @@ IdArray Relabel_(const std::vector<IdArray>& arrays) {
const auto& ctx = arrays[0]->ctx;
auto device = runtime::DeviceAPI::Get(ctx);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
// build node maps and get the induced nodes
OrderedHashTable<IdType> node_map(total_length, ctx, stream);
...
...
@@ -364,7 +368,7 @@ IdArray Relabel_(const std::vector<IdArray>& arrays) {
static_cast<int64_t*>(device->AllocWorkspace(ctx, sizeof(int64_t)));
IdArray induced_nodes = NewIdArray(total_length, ctx, sizeof(IdType) * 8);
CUDA_CALL
(
cuda
MemsetAsync
(
CUDA_CALL(
hip
MemsetAsync(
num_induced_device, 0, sizeof(*num_induced_device), stream));
node_map.FillWithDuplicates(
...
...
@@ -416,7 +420,7 @@ IdArray AsNumBits(IdArray arr, uint8_t bits) {
const std::vector<int64_t> shape(arr->shape, arr->shape + arr->ndim);
IdArray ret = IdArray::Empty(shape, DGLDataType{kDGLInt, bits, 1}, arr->ctx);
const int64_t length = ret.NumElements();
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(length);
int nb = (length + nt - 1) / nt;
if (bits == 32) {
...
...
src/array/cuda/array_scatter.
cu
→
src/array/cuda/array_scatter.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
/**
* Copyright (c) 2019 by Contributors
* @file array/cuda/array_scatter.cu
* @brief Array scatter GPU implementation
*/
#include <dgl/array.h>
#include "../../../include/dgl/array.h"
#include "../../runtime/cuda/cuda_common.h"
#include "
./
utils.h"
#include "utils.h"
namespace dgl {
using runtime::NDArray;
...
...
@@ -31,7 +35,7 @@ void Scatter_(IdArray index, NDArray value, NDArray out) {
const DType* val = value.Ptr<DType>();
DType* outd = out.Ptr<DType>();
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
const int nt = cuda::FindNumThreads(len);
const int nb = (len + nt - 1) / nt;
CUDA_KERNEL_CALL(_ScatterKernel, nb, nt, 0, stream, idx, val, len, outd);
...
...
@@ -41,7 +45,7 @@ template void Scatter_<kDGLCUDA, int32_t, int32_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, int64_t, int32_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, __half, int32_t>(IdArray, NDArray, NDArray);
#if BF16_ENABLED
template
void
Scatter_
<
kDGLCUDA
,
__
nv
_bfloat16
,
int32_t
>(
template void Scatter_<kDGLCUDA, __
hip
_bfloat16, int32_t>(
IdArray, NDArray, NDArray);
#endif // BF16_ENABLED
template void Scatter_<kDGLCUDA, float, int32_t>(IdArray, NDArray, NDArray);
...
...
@@ -50,7 +54,7 @@ template void Scatter_<kDGLCUDA, int32_t, int64_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, int64_t, int64_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, __half, int64_t>(IdArray, NDArray, NDArray);
#if BF16_ENABLED
template
void
Scatter_
<
kDGLCUDA
,
__
nv
_bfloat16
,
int64_t
>(
template void Scatter_<kDGLCUDA, __
hip
_bfloat16, int64_t>(
IdArray, NDArray, NDArray);
#endif // BF16_ENABLED
template void Scatter_<kDGLCUDA, float, int64_t>(IdArray, NDArray, NDArray);
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment